diff options
author | elioat <hi@eli.li> | 2024-03-06 13:17:05 -0500 |
---|---|---|
committer | elioat <hi@eli.li> | 2024-03-06 13:17:05 -0500 |
commit | 2cf613dd027eccbfd5cb9cf0daabd6b6e2fa67f9 (patch) | |
tree | 5384ae6cd6e66a60e2367bd543946467a94e4a79 /js/toy-llm/app.js | |
parent | 82441bcfd21efcd2f2b1ce7964b0401996fc0551 (diff) | |
download | tour-2cf613dd027eccbfd5cb9cf0daabd6b6e2fa67f9.tar.gz |
*
Diffstat (limited to 'js/toy-llm/app.js')
-rw-r--r-- | js/toy-llm/app.js | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/js/toy-llm/app.js b/js/toy-llm/app.js new file mode 100644 index 0000000..2e31259 --- /dev/null +++ b/js/toy-llm/app.js @@ -0,0 +1,46 @@ +class ToyLanguageModel { + constructor(vocabulary) { + this.vocabulary = vocabulary; + } + + tokenize(text) { + // Tokenization: split by spaces + return text.split(' '); + } + + detokenize(tokens) { + // Detokenization: join with spaces + return tokens.join(' '); + } + + embed(token) { + // Embedding: map each token to a unique integer + return this.vocabulary.indexOf(token); + } + + predictNextToken(context) { + // Prediction: return a random token + const randomIndex = Math.floor(Math.random() * this.vocabulary.length); + return this.vocabulary[randomIndex]; + } + + generateText(initialText, numTokensToGenerate) { + const tokens = this.tokenize(initialText); + const generatedTokens = Array.from({ length: numTokensToGenerate }, (_, i) => { + const context = tokens.slice(-5); // Use last 5 tokens as context + const nextToken = this.predictNextToken(context); + tokens.push(nextToken); + return nextToken; + }); + return this.detokenize(generatedTokens); + } +} + +// Define vocabulary +const vocabulary = ['the', 'quick', 'brown', 'fox', 'jumps', 'over', 'lazy', 'dog']; + +const model = new ToyLanguageModel(vocabulary); +const initialText = 'the quick brown'; +const numTokensToGenerate = 5; +const generatedText = model.generateText(initialText, numTokensToGenerate); +console.log(generatedText); \ No newline at end of file |