diff options
author | elioat <hi@eli.li> | 2024-03-06 13:17:05 -0500 |
---|---|---|
committer | elioat <hi@eli.li> | 2024-03-06 13:17:05 -0500 |
commit | 2cf613dd027eccbfd5cb9cf0daabd6b6e2fa67f9 (patch) | |
tree | 5384ae6cd6e66a60e2367bd543946467a94e4a79 | |
parent | 82441bcfd21efcd2f2b1ce7964b0401996fc0551 (diff) | |
download | tour-2cf613dd027eccbfd5cb9cf0daabd6b6e2fa67f9.tar.gz |
*
-rw-r--r-- | js/MAP.md | 1 | ||||
-rw-r--r-- | js/toy-llm/app.js | 46 | ||||
-rw-r--r-- | js/toy-llm/index.html | 12 |
3 files changed, 59 insertions, 0 deletions
diff --git a/js/MAP.md b/js/MAP.md index 52b3573..c6e438b 100644 --- a/js/MAP.md +++ b/js/MAP.md @@ -25,3 +25,4 @@ - `story-interpreter`, poking at making a text adventure - `text`, poking at making a text adventure...maybe with a different finger? - `toadmode`, a visual coding toy about dropping shapes near each other on a canvas +- `toy-llm`, exactly what it says on the box, the most basic of toy LLM implementations \ No newline at end of file diff --git a/js/toy-llm/app.js b/js/toy-llm/app.js new file mode 100644 index 0000000..2e31259 --- /dev/null +++ b/js/toy-llm/app.js @@ -0,0 +1,46 @@ +class ToyLanguageModel { + constructor(vocabulary) { + this.vocabulary = vocabulary; + } + + tokenize(text) { + // Tokenization: split by spaces + return text.split(' '); + } + + detokenize(tokens) { + // Detokenization: join with spaces + return tokens.join(' '); + } + + embed(token) { + // Embedding: map each token to a unique integer + return this.vocabulary.indexOf(token); + } + + predictNextToken(context) { + // Prediction: return a random token + const randomIndex = Math.floor(Math.random() * this.vocabulary.length); + return this.vocabulary[randomIndex]; + } + + generateText(initialText, numTokensToGenerate) { + const tokens = this.tokenize(initialText); + const generatedTokens = Array.from({ length: numTokensToGenerate }, (_, i) => { + const context = tokens.slice(-5); // Use last 5 tokens as context + const nextToken = this.predictNextToken(context); + tokens.push(nextToken); + return nextToken; + }); + return this.detokenize(generatedTokens); + } +} + +// Define vocabulary +const vocabulary = ['the', 'quick', 'brown', 'fox', 'jumps', 'over', 'lazy', 'dog']; + +const model = new ToyLanguageModel(vocabulary); +const initialText = 'the quick brown'; +const numTokensToGenerate = 5; +const generatedText = model.generateText(initialText, numTokensToGenerate); +console.log(generatedText); \ No newline at end of file diff --git a/js/toy-llm/index.html b/js/toy-llm/index.html new file mode 100644 index 0000000..beffcf6 --- /dev/null +++ b/js/toy-llm/index.html @@ -0,0 +1,12 @@ +<!DOCTYPE html> +<html lang="en"> +<head> + <meta charset="UTF-8"> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + <title>toy llm</title> + <script src="app.js"></script> +</head> +<body> + <h1>toy llm</h1> +</body> +</html> \ No newline at end of file |