From 7784855c18d55026dd841454dd89db03370680e1 Mon Sep 17 00:00:00 2001 From: "Kartik K. Agaram" Date: Sun, 8 Aug 2021 11:15:22 -0700 Subject: . --- browse_slack/README.md | 16 ++++++ browse_slack/convert_slack.py | 113 ++++++++++++++++++++++++++++++++++++++++++ browse_slack/main.mu | 4 +- convert_slack.py | 108 ---------------------------------------- 4 files changed, 132 insertions(+), 109 deletions(-) create mode 100644 browse_slack/README.md create mode 100644 browse_slack/convert_slack.py delete mode 100644 convert_slack.py diff --git a/browse_slack/README.md b/browse_slack/README.md new file mode 100644 index 00000000..ba0df896 --- /dev/null +++ b/browse_slack/README.md @@ -0,0 +1,16 @@ +To try it out: + +1. Import a Slack archive into the data disk by following the instructions at + the top of `browse_slack/convert_slack.py`. + +2. Build the code disk. + +``` +./translate browse_slack/*.mu +``` + +3. Run the code and data disks: + +``` +qemu-system-i386 -m 2G -hda code.img -hdb path/to/data.img +``` diff --git a/browse_slack/convert_slack.py b/browse_slack/convert_slack.py new file mode 100644 index 00000000..1b7b39cc --- /dev/null +++ b/browse_slack/convert_slack.py @@ -0,0 +1,113 @@ +# Import JSON from a Slack admin export into a disk image Qemu can load. +# +# Dependencies: python, netpbm +# +# Step 1: download a Slack archive +# +# Step 2: download user avatars to subdirectory images/ and convert them to PPM in subdirectory images/ppm/ +# mkdir images +# cd images +# grep image_72 . -r |grep -v users.json |awk '{print $3}' |sort |uniq |sed 's/?.*//' |sed 's,\\,,g' |sed 's/"//' |sed 's/",$//' > images.list +# wget -i images.list --wait=0.1 +# # fix some lying images +# for f in $(file *.jpg |grep PNG |sed 's/:.*//'); do mv -i $f $(echo $f |sed 's/\.jpg$/.png/'); done +# # +# mkdir ppm +# for f in *.jpg; do jpegtopnm $f |pnmtopnm -plain > ppm/$(echo $f |sed 's/\.jpg$//').ppm; done +# for f in *.png; do png2pnm -n $f > ppm/$(echo $f |sed 's/\.png$//').ppm; done +# +# Step 3: construct a disk image out of the archives and avatars +# cd ../.. # go back to parent of images/ +# dd if=/dev/zero of=data.img count=201600 # 100MB +# python path/to/convert_slack.py |dd of=data.img conv=notrunc +# Currently this process yields errors for ~70 items on the Future of Software +# group. We fail to load those. +# +# Notes on input format: +# Redundant 'type' field that's always 'message'. Probably an "enterprise" feature. + +from sys import argv, stderr +import json +from os import listdir +from os.path import isfile, join, basename, splitext +from urllib.parse import urlparse + +channels = {} + +user_id = {} # name -> index +users = [] + +items = [] + +def contents(filename): + with open(filename) as f: + for item in json.load(f): + try: + if 'thread_ts' in item: + # comment + yield { + 'name': f"/{item['thread_ts']}/{item['ts']}", + 'contents': item['text'], + 'by': user_id[item['user']], +#? 'by': users[user_id[item['user']]]['avatar'][0:100], + } + else: + # top-level post + yield { + 'name': f"/{item['ts']}", + 'contents': item['text'], + 'by': user_id[item['user']], +#? 'by': users[user_id[item['user']]]['avatar'][0:100], + } + except KeyError: + stderr.write(repr(item)+'\n') + +def filenames(dir): + for filename in sorted(listdir(dir)): + result = join(dir, filename) + if isfile(result): + yield result + +def look_up_ppm_image(url): + file_root = splitext(basename(urlparse(url).path))[0] + filename = f"images/ppm/{file_root}.ppm" + if isfile(filename): + with open(filename) as f: + return f.read() + +def load_users(): + stderr.write('loading users..\n') + length = 0 + with open('users.json') as f: + for user in json.load(f): +#? if user['deleted']: +#? continue + if user['id'] not in user_id: + if 'real_name' not in user: + user['real_name'] = '' + print(f"({json.dumps(user['id'])} \"@{user['name']}\" {json.dumps(user['real_name'])} [{look_up_ppm_image(user['profile']['image_72']) or ''}])") +#? users.append({ +#? 'id': user['id'], +#? 'username': user['name'], +#? 'name': user['real_name'], +#? 'avatar': look_up_ppm_image(user['profile']['image_72']), +#? }) + user_id[user['id']] = length + length += 1 + +def load_channels(): + stderr.write('loading channels..\n') + with open('channels.json') as f: + for channel in json.load(f): + channels[channel['id']] = channel['name'] + +load_channels() +load_users() +for dir in channels.values(): + try: + for filename in filenames(dir): + print(filename) + for item in contents(filename): + print(f"({json.dumps(item['name'])} {json.dumps(dir)} {item['by']} {json.dumps(item['contents'])})") + except NotADirectoryError: + pass diff --git a/browse_slack/main.mu b/browse_slack/main.mu index e3c5affd..6948665f 100644 --- a/browse_slack/main.mu +++ b/browse_slack/main.mu @@ -79,5 +79,7 @@ fn main screen: (addr screen), keyboard: (addr keyboard), data-disk: (addr disk) } } -fn parse s: (addr stream byte), users: (addr array user), channels: (addr array channel), items: (addr array item) { +fn parse in: (addr stream byte), users: (addr array user), channels: (addr array channel), items: (addr array item) { + var line-storage: (stream byte 0x18000) + var line/ecx: (addr stream byte) <- address line-storage } diff --git a/convert_slack.py b/convert_slack.py deleted file mode 100644 index 882cc71b..00000000 --- a/convert_slack.py +++ /dev/null @@ -1,108 +0,0 @@ -# Import JSON from a Slack admin export into a disk image Qemu can load. -# -# Dependencies: python, netpbm -# -# Images downloaded as follows: -# grep image_72 . -r |grep -v users.json |awk '{print $3}' |sort |uniq |sed 's/?.*//' |sed 's,\\,,g' |sed 's/"//' |sed 's/",$//' > images.list -# wget -i images.list --wait=0.1 -# # fix some lying images -# for f in $(file *.jpg |grep PNG |sed 's/:.*//'); do mv -i $f $(echo $f |sed 's/\.jpg$/.png/'); done -# # -# mkdir ppm -# for f in *.jpg; do jpegtopnm $f |pnmtopnm -plain > ppm/$(echo $f |sed 's/\.jpg$//').ppm; done -# for f in *.png; do png2pnm -n $f > ppm/$(echo $f |sed 's/\.png$//').ppm; done -# -# To construct the disk image: -# dd if=/dev/zero of=data.img count=201600 # 100MB -# python convert_slack.py |dd of=data.img conv=notrunc -# Currently this process yields errors for ~70 items on the Future of Software -# group. We fail to load those. -# -# Notes on input format: -# Redundant 'type' field that's always 'message'. Probably an "enterprise" feature. - -from sys import argv, stderr -import json -from os import listdir -from os.path import isfile, join, basename, splitext -from urllib.parse import urlparse - -channels = {} - -user_id = {} # name -> index -users = [] - -items = [] - -def contents(filename): - with open(filename) as f: - for item in json.load(f): - try: - if 'thread_ts' in item: - # comment - yield { - 'name': f"/{item['thread_ts']}/{item['ts']}", - 'contents': item['text'], - 'by': user_id[item['user']], -#? 'by': users[user_id[item['user']]]['avatar'][0:100], - } - else: - # top-level post - yield { - 'name': f"/{item['ts']}", - 'contents': item['text'], - 'by': user_id[item['user']], -#? 'by': users[user_id[item['user']]]['avatar'][0:100], - } - except KeyError: - stderr.write(repr(item)+'\n') - -def filenames(dir): - for filename in sorted(listdir(dir)): - result = join(dir, filename) - if isfile(result): - yield result - -def look_up_ppm_image(url): - file_root = splitext(basename(urlparse(url).path))[0] - filename = f"images2/ppm/{file_root}.ppm" - if isfile(filename): - with open(filename) as f: - return f.read() - -def load_users(): - stderr.write('loading users..\n') - length = 0 - with open('users.json') as f: - for user in json.load(f): -#? if user['deleted']: -#? continue - if user['id'] not in user_id: - if 'real_name' not in user: - user['real_name'] = '' - print(f"({json.dumps(user['id'])} \"@{user['name']}\" {json.dumps(user['real_name'])} [{look_up_ppm_image(user['profile']['image_72']) or ''}])") -#? users.append({ -#? 'id': user['id'], -#? 'username': user['name'], -#? 'name': user['real_name'], -#? 'avatar': look_up_ppm_image(user['profile']['image_72']), -#? }) - user_id[user['id']] = length - length += 1 - -def load_channels(): - stderr.write('loading channels..\n') - with open('channels.json') as f: - for channel in json.load(f): - channels[channel['id']] = channel['name'] - -load_channels() -load_users() -for dir in channels.values(): - try: - for filename in filenames(dir): - print(filename) - for item in contents(filename): - print(f"({json.dumps(item['name'])} {json.dumps(dir)} {item['by']} {json.dumps(item['contents'])})") - except NotADirectoryError: - pass -- cgit 1.4.1-2-gfad0