*

author: elioat <hi@eli.li> 2024-03-06 13:17:05 -0500
committer: elioat <hi@eli.li> 2024-03-06 13:17:05 -0500
commit: 2cf613dd027eccbfd5cb9cf0daabd6b6e2fa67f9 (patch)
tree: 5384ae6cd6e66a60e2367bd543946467a94e4a79
parent: 82441bcfd21efcd2f2b1ce7964b0401996fc0551 (diff)
download: tour-2cf613dd027eccbfd5cb9cf0daabd6b6e2fa67f9.tar.gz
3 files changed, 59 insertions, 0 deletions
diff --git a/js/MAP.md b/js/MAP.md
index 52b3573..c6e438b 100644
--- a/js/MAP.md
+++ b/js/MAP.md
@@ -25,3 +25,4 @@
 - `story-interpreter`, poking at making a text adventure
 - `text`, poking at making a text adventure...maybe with a different finger?
 - `toadmode`, a visual coding toy about dropping shapes near each other on a canvas
+- `toy-llm`, exactly what it says on the box, the most basic of toy LLM implementations 
\ No newline at end of file
diff --git a/js/toy-llm/app.js b/js/toy-llm/app.js
new file mode 100644
index 0000000..2e31259
--- /dev/null
+++ b/js/toy-llm/app.js
@@ -0,0 +1,46 @@
+class ToyLanguageModel {
+    constructor(vocabulary) {
+        this.vocabulary = vocabulary;
+    }
+
+    tokenize(text) {
+        // Tokenization: split by spaces
+        return text.split(' ');
+    }
+
+    detokenize(tokens) {
+        // Detokenization: join with spaces
+        return tokens.join(' ');
+    }
+
+    embed(token) {
+        // Embedding: map each token to a unique integer
+        return this.vocabulary.indexOf(token);
+    }
+
+    predictNextToken(context) {
+        // Prediction: return a random token
+        const randomIndex = Math.floor(Math.random() * this.vocabulary.length);
+        return this.vocabulary[randomIndex];
+    }
+
+    generateText(initialText, numTokensToGenerate) {
+        const tokens = this.tokenize(initialText);
+        const generatedTokens = Array.from({ length: numTokensToGenerate }, (_, i) => {
+            const context = tokens.slice(-5); // Use last 5 tokens as context
+            const nextToken = this.predictNextToken(context);
+            tokens.push(nextToken);
+            return nextToken;
+        });
+        return this.detokenize(generatedTokens);
+    }
+}
+
+// Define vocabulary
+const vocabulary = ['the', 'quick', 'brown', 'fox', 'jumps', 'over', 'lazy', 'dog'];
+
+const model = new ToyLanguageModel(vocabulary);
+const initialText = 'the quick brown';
+const numTokensToGenerate = 5;
+const generatedText = model.generateText(initialText, numTokensToGenerate);
+console.log(generatedText);
\ No newline at end of file
diff --git a/js/toy-llm/index.html b/js/toy-llm/index.html
new file mode 100644
index 0000000..beffcf6
--- /dev/null
+++ b/js/toy-llm/index.html
@@ -0,0 +1,12 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>toy llm</title>
+    <script src="app.js"></script>
+</head>
+<body>
+    <h1>toy llm</h1>
+</body>
+</html>
\ No newline at end of file
author	elioat <hi@eli.li>	2024-03-06 13:17:05 -0500
committer	elioat <hi@eli.li>	2024-03-06 13:17:05 -0500
commit	2cf613dd027eccbfd5cb9cf0daabd6b6e2fa67f9 (patch)
tree	5384ae6cd6e66a60e2367bd543946467a94e4a79
parent	82441bcfd21efcd2f2b1ce7964b0401996fc0551 (diff)
download	tour-2cf613dd027eccbfd5cb9cf0daabd6b6e2fa67f9.tar.gz