From 48668b05251a69110944df51800e88d01f6e2942 Mon Sep 17 00:00:00 2001 From: Andinus Date: Mon, 23 Nov 2020 20:38:44 +0530 Subject: Release v0.2.0, load comments hidden under "load more comments" This version makes the script lot more complex. This version will load all those comments hidden behind "load more comments". But not those hidden behind "continue this thread". Starting this release we have a News section. --- draco.pl | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 111 insertions(+), 13 deletions(-) (limited to 'draco.pl') diff --git a/draco.pl b/draco.pl index 08779a1..b33fb62 100755 --- a/draco.pl +++ b/draco.pl @@ -15,39 +15,64 @@ my $lb = Unicode::LineBreak->new(ColMax => 76); # Default is 76. # Printing UTF-8 to STDOUT. binmode(STDOUT, "encoding(UTF-8)"); -die "usage: draco [-dv] \n" unless scalar @ARGV; +die "usage: draco [-dhv] \n" unless scalar @ARGV; my $DEBUG; -my $VERSION = "v0.1.3"; +my $VERSION = "v0.2.0"; # Dispatch table to be parsed before url. my %dispatch = ( '-v' => sub { print "Draco $VERSION\n"; exit; }, '-d' => sub { $DEBUG = 1; print STDERR "draco: debug on.\n"; }, + '-h' => sub { print qq{Draco $VERSION + +Options: + -d + Turn on debug messages. Debug messages will be printed to + STDERR. + -h + Print this help. + -v + Print version. + +Environment Variables: + FETCH_ALL + Fetch all comments. This will make multiple HTTP calls to + reddit. This doesn't fetch *all* the comments. +}; + exit; + }, ); if (exists $dispatch{$ARGV[0]}) { # shift @ARGV to get $url in next shift. $dispatch{shift @ARGV}->(); } -# $url contains the reddit post. +# $url contains the reddit post. Raise the limit to 500 comments which +# is the maximum reddit allows. my $url = shift @ARGV; -my $json_url = "${url}.json"; +my $json_url = "${url}.json?limit=500&sort=top"; my $http = HTTP::Tiny->new( verify_SSL => 1 ); # Fetch the post. -print STDERR "draco: fetching `$json_url'.\n" if $DEBUG; -my $response = $http->get($json_url); -die "Unexpected response - $response->{status}: $response->{reason}" - unless $response->{success}; +print STDERR "fetching `$json_url'.\n" if $DEBUG; +my $response = get_response($json_url); # Decode json. -print STDERR "draco: decoding json response.\n" if $DEBUG; +print STDERR "decoding json response.\n" if $DEBUG; my $json_data = decode_json($response->{content}); # $post contains post data my $post = $json_data->[0]->{data}->{children}->[0]->{data}; +# $comments contains comment data. We are interested in: replies, +# author, body, created_utc & permalink. +my $comments = $json_data->[1]->{data}->{children}; + +# Print total top-level comments. +print STDERR "total top-level comments: ", + scalar($comments->@*), "\n" if $DEBUG; + # Start the Org document. print "#+", "STARTUP:content\n"; @@ -66,8 +91,9 @@ foreach my $detail (qw( subreddit created_utc author permalink print ":${detail}: =$post->{$detail}=\n" if scalar $post->{$detail}; } -# Include the archive date in properties. +# Include the archive date & total top-level comments in properties. print ":ARCHIVE_DATE: $date\n"; +print ":TOTAL_TOP_LEVEL_COMMENTS: ", scalar($comments->@*), "\n"; print ":END:\n"; # Add selftext if present. @@ -77,20 +103,87 @@ print "\n#+BEGIN_SRC markdown\n", "#+END_SRC\n" if scalar $post->{selftext}; -# $comments contains comment data. We are interested in: replies, -# author, body, created_utc & permalink. -my $comments = $json_data->[1]->{data}->{children}; +my (@http_calls, @shell_comments, %counter); +$counter{skipped_due_to_more} = 0; +$counter{print_comment_chain_call} = 0; + # Iterate over top-level comments. foreach my $comment ($comments->@*) { + if ($comment->{kind} eq "more" + and $comment->{data}->{id} eq "_") { + $counter{skipped_due_to_more}++; + next; + } print_comment_chain($comment->{data}, 0); } +print STDERR "total http calls: ", + scalar @http_calls, "\n" if $DEBUG; +print STDERR "total shell comments: ", + scalar @shell_comments, "\n" if $DEBUG and scalar @shell_comments; +print STDERR "total print_comment_chain calls: ", + $counter{print_comment_chain_call}, "\n" if $DEBUG; + +# This is equivalent to "continue this thread ->" we see on +# old.reddit.com threads. +print STDERR "total comments skipped due to more: ", + $counter{skipped_due_to_more}, "\n" if $DEBUG; + +sub get_response { + my $url = shift @_; + my $response = $http->get($url); + push @http_calls, $url; + die "Unexpected response - $response->{status}: $response->{reason} : $url" + unless $response->{success}; + return $response; +} + # print_comment_chain will print the whole chain of comment while # accounting for level. sub print_comment_chain { my $comment = shift @_; my $level = shift @_; + $counter{print_comment_chain_call}++; + + # $comment->{author} & $comment->{body} not being present means + # that it's a shell comment. We can get it by making another HTTP + # call. + unless ($comment->{author}) { + push @shell_comments, $comment->{id}; + return unless $ENV{FETCH_ALL}; + unless ( eval { + my $json_url = "${url}/$comment->{id}.json?limit=500&sort=top"; + + # Fetch the comment. + my $response = get_response($json_url); + + # Decode json. + my $json_data = decode_json($response->{content}); + + # $comments contains comment data. We are interested in: replies, + # author, body, created_utc & permalink. + my $comments = $json_data->[1]->{data}->{children}; + + foreach my $comment ($comments->@*) { + if ($comment->{kind} eq "more" + and $comment->{data}->{id} eq "_") { + $counter{skipped_due_to_more}++; + next; + } + print_comment_chain($comment->{data}, $level); + } + + return 1; + } ) { + print STDERR "parsing shell comment: $comment->{id} : failed\n"; + } + + # This comment thread has been parsed, move on to the text + # one. + return; + } + print "*" x ($level + 2), " ", "$comment->{author}"; print " [S]" if $comment->{is_submitter}; print "\n"; @@ -114,6 +207,11 @@ sub print_comment_chain { # If the comment has replies then iterate over those too. if (scalar $comment->{replies}) { foreach my $reply ($comment->{replies}->{data}->{children}->@*) { + if ($reply->{kind} eq "more" + and $reply->{data}->{id} eq "_") { + $counter{skipped_due_to_more}++; + next; + } print_comment_chain($reply->{data}, $level + 1); } } -- cgit 1.4.1-2-gfad0