about summary refs log tree commit diff stats
path: root/browse-slack
diff options
context:
space:
mode:
authorKartik K. Agaram <vc@akkartik.com>2021-08-14 19:56:09 -0700
committerKartik K. Agaram <vc@akkartik.com>2021-08-14 19:56:09 -0700
commit7d3f2722ff3d46997fe413be761613d7378cce50 (patch)
tree5fe320bffcec3e744b4a1c26ddf7accd7bb86d12 /browse-slack
parentef3881ab5a5b710d017625c95d0589fea1a1f284 (diff)
downloadmu-7d3f2722ff3d46997fe413be761613d7378cce50.tar.gz
no, we can't just sort lines in the slack archive
Comments contain indices back to the parent. Reordering items completely
messes up the indices.
Diffstat (limited to 'browse-slack')
-rw-r--r--browse-slack/convert_slack.py21
1 files changed, 12 insertions, 9 deletions
diff --git a/browse-slack/convert_slack.py b/browse-slack/convert_slack.py
index 93be54d2..e6ee6a56 100644
--- a/browse-slack/convert_slack.py
+++ b/browse-slack/convert_slack.py
@@ -20,7 +20,6 @@
 #   cd ..  # go back to the top-level archive directory
 #   dd if=/dev/zero of=data.img count=201600  # 100MB
 #   python path/to/convert_slack.py > data.out 2> data.err
-#   (optionally sort items by timestamp; I currently do this in Vim by piping the latter half of data.out through `sort`)
 #   dd if=data.out of=data.img conv=notrunc
 # Currently this process yields errors for ~300 items (~70 posts and their comments)
 # on the Future of Software group (https://futureofcoding.org/community). We fail to load those.
@@ -60,15 +59,19 @@ def parent(item):
     else:
         return -1
 
-idx = 0
+items = []
 for channel in json.load(open('channels.json')):
     for filename in sorted(listdir(channel['name'])):
         with open(join(channel['name'], filename)) as f:
             for item in json.load(f):
-                try:
-#?                     stderr.write(repr(item)+'\n')
-                    print(f"({json.dumps(item['ts'])} {parent(item)} {json.dumps(channel['name'])} {by(item)} {json.dumps(item['text'])})")
-                    item_idx[item['ts']] = idx
-                    idx += 1  # only increment when actually used and no exception raised
-                except KeyError:
-                    stderr.write(repr(item)+'\n')
+                item['channel_name'] = channel['name']
+                items.append(item)
+
+idx = 0
+for item in sorted(items, key=lambda item: item['ts']):
+    try:
+        print(f"({json.dumps(item['ts'])} {parent(item)} {json.dumps(item['channel_name'])} {by(item)} {json.dumps(item['text'])})")
+        item_idx[item['ts']] = idx
+        idx += 1  # only increment when actually used and no exception raised
+    except KeyError:
+        stderr.write(repr(item)+'\n')