about summary refs log tree commit diff stats
path: root/js/toy-llm/app.js
blob: 2e31259f2c01cdd44074bd8b0d51924626cb91de (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
class ToyLanguageModel {
    constructor(vocabulary) {
        this.vocabulary = vocabulary;
    }

    tokenize(text) {
        // Tokenization: split by spaces
        return text.split(' ');
    }

    detokenize(tokens) {
        // Detokenization: join with spaces
        return tokens.join(' ');
    }

    embed(token) {
        // Embedding: map each token to a unique integer
        return this.vocabulary.indexOf(token);
    }

    predictNextToken(context) {
        // Prediction: return a random token
        const randomIndex = Math.floor(Math.random() * this.vocabulary.length);
        return this.vocabulary[randomIndex];
    }

    generateText(initialText, numTokensToGenerate) {
        const tokens = this.tokenize(initialText);
        const generatedTokens = Array.from({ length: numTokensToGenerate }, (_, i) => {
            const context = tokens.slice(-5); // Use last 5 tokens as context
            const nextToken = this.predictNextToken(context);
            tokens.push(nextToken);
            return nextToken;
        });
        return this.detokenize(generatedTokens);
    }
}

// Define vocabulary
const vocabulary = ['the', 'quick', 'brown', 'fox', 'jumps', 'over', 'lazy', 'dog'];

const model = new ToyLanguageModel(vocabulary);
const initialText = 'the quick brown';
const numTokensToGenerate = 5;
const generatedText = model.generateText(initialText, numTokensToGenerate);
console.log(generatedText);