summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rwxr-xr-xdraco.pl208
1 files changed, 128 insertions, 80 deletions
diff --git a/draco.pl b/draco.pl
index 7d3c75c..4a85384 100755
--- a/draco.pl
+++ b/draco.pl
@@ -2,12 +2,11 @@
 
 use strict;
 use warnings;
+use Time::Piece;
 
 use HTTP::Tiny;
 use JSON::MaybeXS;
 
-use POSIX qw(strftime);
-
 # For wrapping comment blocks.
 use Unicode::LineBreak;
 my $lb = Unicode::LineBreak->new(ColMax => 76); # Default is 76.
@@ -47,6 +46,10 @@ if (exists $dispatch{$ARGV[0]}) {
     $dispatch{shift @ARGV}->();
 }
 
+# This is the start time.
+my $start_time = time;
+my $last_log = $start_time;
+
 # $url contains the reddit post. Raise the limit to 500 comments which
 # is the maximum reddit allows.
 my $url = shift @ARGV;
@@ -55,6 +58,7 @@ my $json_url = "${url}.json?limit=500&sort=top";
 my $http = HTTP::Tiny->new( verify_SSL => 1 );
 
 # Fetch the post.
+print_time() if $DEBUG;
 print STDERR "fetching `$json_url'.\n" if $DEBUG;
 my $response = get_response($json_url);
 
@@ -69,16 +73,12 @@ my $post = $json_data->[0]->{data}->{children}->[0]->{data};
 # author, body, created_utc & permalink.
 my $comments = $json_data->[1]->{data}->{children};
 
-# Print total top-level comments.
-print STDERR "total top-level comments: ",
-    scalar($comments->@*), "\n" if $DEBUG;
-
 # Start the Org document.
 print "#+", "STARTUP:content\n";
 
 # Print the date.
-my $date = strftime '%+', localtime();
-print "#+", "DATE: $date\n";
+my $current_date = Time::Piece->new->strftime('%+');
+print "#+", "DATE: $current_date\n";
 print "\n";
 
 # Print the post title & it's link.
@@ -86,14 +86,19 @@ print "* ", "[[$post->{url}][$post->{title}]]\n";
 
 # Add various details to :PROPERTIES:.
 print ":PROPERTIES:\n";
+# Include the created date, archive date & total top-level comments in
+# properties.
+print ":CREATED_UTC: ",
+    Time::Piece->strptime($post->{created_utc}, '%s')
+    ->strftime('%+'), "\n";
+
+print ":ARCHIVE_DATE: $current_date\n";
+print ":TOTAL_TOP_LEVEL_COMMENTS: ", scalar($comments->@*), "\n";
 foreach my $detail (qw( subreddit created_utc author permalink
                         upvote_ratio ups downs score )) {
     print ":${detail}: =$post->{$detail}=\n"
         if scalar $post->{$detail};
 }
-# Include the archive date & total top-level comments in properties.
-print ":ARCHIVE_DATE: $date\n";
-print ":TOTAL_TOP_LEVEL_COMMENTS: ", scalar($comments->@*), "\n";
 print ":END:\n";
 
 # Add selftext if present.
@@ -106,29 +111,40 @@ print "\n#+BEGIN_SRC markdown\n",
 my (@http_calls, @shell_comments, %counter);
 $counter{skipped_due_to_more} = 0;
 $counter{print_comment_chain_call} = 0;
+$counter{iterate_over_comments_call} = 0;
 
-# Iterate over top-level comments.
-foreach my $comment ($comments->@*) {
-    if ($comment->{kind} eq "more"
-        and $comment->{data}->{id} eq "_") {
-        $counter{skipped_due_to_more}++;
-        next;
-    }
-    print_comment_chain($comment->{data}, 0);
-}
+print_time() if $DEBUG;
+print STDERR "iterating over top-level comments.\n" if $DEBUG;
+# Iterate over top-level comments. The second argument is level
+# (depth), it should be 0 for top-level comments.
+iterate_over_comments($comments, 0);
+print_time() if $DEBUG;
 
+# Print important stats.
+print STDERR "\n" if $DEBUG;
 print STDERR "total http calls: ",
     scalar @http_calls, "\n" if $DEBUG;
+print STDERR "total top-level comments: ",
+    scalar($comments->@*), "\n" if $DEBUG;
 print STDERR "total shell comments: ",
     scalar @shell_comments, "\n" if $DEBUG and scalar @shell_comments;
 print STDERR "total print_comment_chain calls: ",
     $counter{print_comment_chain_call}, "\n" if $DEBUG;
+print STDERR "total iterate_over_comments calls: ",
+    $counter{iterate_over_comments_call}, "\n" if $DEBUG;
 
 # This is equivalent to "continue this thread ->" we see on
 # old.reddit.com threads.
 print STDERR "total comments skipped due to more: ",
     $counter{skipped_due_to_more}, "\n" if $DEBUG;
 
+sub print_time {
+    print STDERR "    ";
+    print STDERR "time since [start, last log]: [", time - $start_time,
+        ", ", time - $last_log, "] seconds\n";
+    $last_log = time;
+}
+
 sub get_response {
     my $url = shift @_;
     my $response = $http->get($url);
@@ -138,14 +154,93 @@ sub get_response {
     return $response;
 }
 
-# There are 3 kind of comments.
-#
-# 1. normal comments (includes top-level comments).
-# 2. comments hidden under "load more comments".
-# 3. comments hidden under "continue this thread".
+# First argument requires $comments & second is the level (depth).
+sub iterate_over_comments {
+    my $comments = shift @_;
+    my $level = shift @_;
+
+    $counter{iterate_over_comments_call}++;
+
+    foreach my $comment ($comments->@*) {
+        my $comment_data = $comment->{data};
+
+        # There are 3 kind of comments.
+        #
+        # 1. normal comments (includes top-level comments & replies).
+        # 2. comments hidden under "load more comments".
+        # 3. comments hidden under "continue this thread".
+
+        # We will be dealing with them in reverse order, i.e. 3rd ->
+        # 2nd -> 1st.
+
+        # This comment we are skipping is the third kind of comment,
+        # i.e. comment hidden under "continue this thread". We can't
+        # parse it yet.
+        if ($comment->{kind} eq "more"
+            and $comment_data->{id} eq "_") {
+            $counter{skipped_due_to_more}++;
+            next;
+        }
+
+        # These are second kind of comments, i.e. comments hidden
+        # under "load more comments". We can get it by making another
+        # HTTP call. This is skipped by default & user has to pass
+        # `FETCH_ALL' to enable it.
+        unless ($comment_data->{author}) {
+            push @shell_comments, $comment_data->{id};
+
+            # Don't proceed unless user has set `FETCH_ALL'.
+            next unless $ENV{FETCH_ALL};
+
+            unless ( eval {
+                # Reddit doesn't like this kind of url:
+                #     http://<reddit>/<post_id>//<comment_id>.json
+                #
+                # It wants this kind of url:
+                #     http://<reddit>/<post_id>/<comment_id>.json
+                #
+                # Notice the extra '/' in first url.
+                my $json_url = $url;
+                $json_url .= "/" unless substr $url, -1 eq "/";
+                $json_url .= "$comment_data->{id}.json?limit=500&sort=top";
+
+                # Fetch the comment.
+                my $response = get_response($json_url);
+
+                # Decode json.
+                my $json_data = decode_json($response->{content});
+
+                # $comments contains comment data. We are interested in: replies,
+                # author, body, created_utc & permalink.
+                my $comments = $json_data->[1]->{data}->{children};
+
+                # Now this is like a normal comment chain, i.e. first
+                # kind of comment. We just have to iterate over it &
+                # pass to print_comment_chain, iterate_over_comments
+                # will handle it.
+                iterate_over_comments($comments, $level);
+                return 1;
+            } ) {
+                my $err = $@;
+                print STDERR "parsing `$comment_data->{id}' failed: $err\n";
+            }
+
+            # This comment thread has been parsed, move on to the text
+            # one.
+            next;
+        }
+
+        # This is first kind of comment, we can pass it directly to
+        # print_comment_chain.
+        print_comment_chain($comment_data, $level);
+    }
+}
+
 
 # print_comment_chain will print the whole chain of comment while
-# accounting for level.
+# accounting for level. It can only parse the first kind of comment,
+# i.e. top-level comments & their replies. To learn about kinds of
+# comments, check iterate_over_comments() subroutine.
 sub print_comment_chain {
     # This was earlier called $comment & was changed to $comment_data
     # to prevent confusion because it is $comment->{data}.
@@ -154,59 +249,18 @@ sub print_comment_chain {
 
     $counter{print_comment_chain_call}++;
 
-    # $comment_data->{author} not being present means that it's a
-    # comment hidden under "load more comments". We can get it by
-    # making another HTTP call.
-    unless ($comment_data->{author}) {
-        push @shell_comments, $comment_data->{id};
-        return unless $ENV{FETCH_ALL};
-        unless ( eval {
-            # It'll fail if we fetch "${url}/$comment_data->{id}.json"
-            # & ${url} already has "/" at the end. So, we check if "/"
-            # is present, if not then we add it.
-            my $json_url = $url;
-            $json_url .= "/" unless substr $url, -1 eq "/";
-            $json_url .= "$comment_data->{id}.json?limit=500&sort=top";
-
-            # Fetch the comment.
-            my $response = get_response($json_url);
-
-            # Decode json.
-            my $json_data = decode_json($response->{content});
-
-            # $comments contains comment data. We are interested in: replies,
-            # author, body, created_utc & permalink.
-            my $comments = $json_data->[1]->{data}->{children};
-
-            foreach my $comment ($comments->@*) {
-                if ($comment->{kind} eq "more"
-                    and $comment->{data}->{id} eq "_") {
-                    $counter{skipped_due_to_more}++;
-                    next;
-                }
-                print_comment_chain($comment->{data}, $level);
-            }
-
-            return 1;
-        } ) {
-            my $err = $@;
-            print STDERR "parsing `$comment_data->{id}' failed: $err\n";
-        }
-
-        # This comment thread has been parsed, move on to the text
-        # one.
-        return;
-    }
-
     print "*" x ($level + 2), " ", "$comment_data->{author}";
     print " [S]" if $comment_data->{is_submitter};
     print "\n";
 
     # Print comment details.
     print ":PROPERTIES:\n";
+    print ":CREATED_UTC: ",
+        Time::Piece->strptime($comment_data->{created_utc}, '%s')
+          ->strftime('%+'), "\n";
     foreach my $detail (qw( created_utc author permalink upvote_ratio
-                            ups downs score edited is_submitter
-                            stickied controversiality )) {
+                            ups downs score edited stickied
+                            controversiality )) {
         print ":${detail}: =$comment_data->{$detail}=\n"
             if scalar $comment_data->{$detail};
     }
@@ -220,13 +274,7 @@ sub print_comment_chain {
 
     # If the comment has replies then iterate over those too.
     if (scalar $comment_data->{replies}) {
-        foreach my $reply ($comment_data->{replies}->{data}->{children}->@*) {
-            if ($reply->{kind} eq "more"
-                and $reply->{data}->{id} eq "_") {
-                $counter{skipped_due_to_more}++;
-                next;
-            }
-            print_comment_chain($reply->{data}, $level + 1);
-        }
+        iterate_over_comments($comment_data->{replies}->{data}->{children},
+                              $level + 1);
     }
 }