diff options
author | Kartik K. Agaram <vc@akkartik.com> | 2021-08-14 19:56:09 -0700 |
---|---|---|
committer | Kartik K. Agaram <vc@akkartik.com> | 2021-08-14 19:56:09 -0700 |
commit | 7d3f2722ff3d46997fe413be761613d7378cce50 (patch) | |
tree | 5fe320bffcec3e744b4a1c26ddf7accd7bb86d12 | |
parent | ef3881ab5a5b710d017625c95d0589fea1a1f284 (diff) | |
download | mu-7d3f2722ff3d46997fe413be761613d7378cce50.tar.gz |
no, we can't just sort lines in the slack archive
Comments contain indices back to the parent. Reordering items completely messes up the indices.
-rw-r--r-- | browse-slack/convert_slack.py | 21 |
1 files changed, 12 insertions, 9 deletions
diff --git a/browse-slack/convert_slack.py b/browse-slack/convert_slack.py index 93be54d2..e6ee6a56 100644 --- a/browse-slack/convert_slack.py +++ b/browse-slack/convert_slack.py @@ -20,7 +20,6 @@ # cd .. # go back to the top-level archive directory # dd if=/dev/zero of=data.img count=201600 # 100MB # python path/to/convert_slack.py > data.out 2> data.err -# (optionally sort items by timestamp; I currently do this in Vim by piping the latter half of data.out through `sort`) # dd if=data.out of=data.img conv=notrunc # Currently this process yields errors for ~300 items (~70 posts and their comments) # on the Future of Software group (https://futureofcoding.org/community). We fail to load those. @@ -60,15 +59,19 @@ def parent(item): else: return -1 -idx = 0 +items = [] for channel in json.load(open('channels.json')): for filename in sorted(listdir(channel['name'])): with open(join(channel['name'], filename)) as f: for item in json.load(f): - try: -#? stderr.write(repr(item)+'\n') - print(f"({json.dumps(item['ts'])} {parent(item)} {json.dumps(channel['name'])} {by(item)} {json.dumps(item['text'])})") - item_idx[item['ts']] = idx - idx += 1 # only increment when actually used and no exception raised - except KeyError: - stderr.write(repr(item)+'\n') + item['channel_name'] = channel['name'] + items.append(item) + +idx = 0 +for item in sorted(items, key=lambda item: item['ts']): + try: + print(f"({json.dumps(item['ts'])} {parent(item)} {json.dumps(item['channel_name'])} {by(item)} {json.dumps(item['text'])})") + item_idx[item['ts']] = idx + idx += 1 # only increment when actually used and no exception raised + except KeyError: + stderr.write(repr(item)+'\n') |