about summary refs log tree commit diff stats
path: root/js/bird-words/beak.js
blob: 9968d5fb2d7e9d9c058c91b8b2b82b823e6305e9 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
const fs = require('fs');

const wrap = (str, width) => {
    const words = str.split(' ');
    return words.reduce((output, word) => {
        if (output.length === 0 || output[output.length - 1].length + word.length + 1 > width) {
            output.push(word);
        } else {
            output[output.length - 1] += ' ' + word;
        }
        return output;
    }, []).join('\n');
};

const prettyItUp = string => {
    return string.charAt(0).toUpperCase() + string.slice(1) + "!";
}

let corpus = fs.readFileSync('lib/corpus.txt', 'utf8');
let words = corpus.toLowerCase().replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g,"").replace(/\n/g, " ").replace(/\s{2,}/g, " ").split(" ");

let markovChain = new Map(); // CONSIDER: is there a way to persist this data structure so we don't have to rebuild it every time? What does that change?

for (let i = 0; i < words.length - 2; i++) {
    let pair = words[i] + ' ' + words[i + 1];
    if (!markovChain.has(pair)) {
        markovChain.set(pair, []);
    }
    markovChain.get(pair).push(words[i + 2]);
}

let pairs = Array.from(markovChain.keys());
let randomPair = pairs[Math.floor(Math.random() * pairs.length)];
let story = randomPair;
const storyLength = 100;

for (let i = 0; i < storyLength; i++) { 
    let nextWords = markovChain.get(randomPair);
    if (!nextWords) {
        break;
    }
    randomPair = randomPair.split(' ')[1] + ' ' + nextWords[Math.floor(Math.random() * nextWords.length)];
    story += " " + randomPair.split(' ')[1];
}

// CONSIDER: this *could* be generalized to be more clever, maybe pull in an arbitrary filename based on the template tag passed...but that feels sloppy and unsafe, so keeping with the naive approach for now
let places = fs.readFileSync('lib/places.txt', 'utf8').split('\n');
let colors = fs.readFileSync('lib/colors.txt', 'utf8').split('\n');
let nouns = fs.readFileSync('lib/nouns.txt', 'utf8').split('\n');
let verbs = fs.readFileSync('lib/verbs.txt', 'utf8').split('\n');
let adjectives = fs.readFileSync('lib/adjectives.txt', 'utf8').split('\n');
let adverbs = fs.readFileSync('lib/adverbs.txt', 'utf8').split('\n');

const fillGaps = (script, nouns, verbs, adverbs, adjectives, places, colors, markovChain, pairs) => {
    return script.split('@').map(part => {
        if (part.startsWith('NOUN')) {
            return nouns[Math.floor(Math.random() * nouns.length)] + ' ';
        } else if (part.startsWith('ADJECTIVE')) {
            return adjectives[Math.floor(Math.random() * adjectives.length)]  + ' ';
        } else if (part.startsWith('PLACE')) {
            return places[Math.floor(Math.random() * places.length)];
        } else if (part.startsWith('VERB')) {
            return verbs[Math.floor(Math.random() * verbs.length)];
        } else if (part.startsWith('ADVERB')) {
            return adverbs[Math.floor(Math.random() * adverbs.length)];
        } else if (part.startsWith('COLOR')) {
            return colors[Math.floor(Math.random() * colors.length)];
        } else if (part.startsWith('MARKOV')) {
            const length = parseInt(part.split(' ')[1]);
            const words = part.split(' ').slice(2);
            let chain = '';
            let pair = words.join(' ');
            for (let j = 0; j < length; j++) {
                let nextWords = markovChain.get(pair);
                if (!nextWords) {
                    pair = pairs[Math.floor(Math.random() * pairs.length)];
                    nextWords = markovChain.get(pair);
                }
                const nextWord = nextWords[Math.floor(Math.random() * nextWords.length)];
                chain += ' ' + nextWord;
                pair = pair.split(' ')[1] + ' ' + nextWord;
            }
            return chain;
        } else {
            return part;
        }
    }).join('');
};

const scriptFile = process.argv[2];

if (!scriptFile) {
    console.log(wrap(prettyItUp(story), 40));
} else {
    const script = fs.readFileSync(scriptFile, 'utf8');
    console.log(wrap(prettyItUp(fillGaps(script, nouns, verbs, adverbs, adjectives, places, colors, markovChain, pairs)), 40));
}