*

author: elioat <hi@eli.li> 2024-03-06 13:17:05 -0500
committer: elioat <hi@eli.li> 2024-03-06 13:17:05 -0500
commit: 2cf613dd027eccbfd5cb9cf0daabd6b6e2fa67f9 (patch)
tree: 5384ae6cd6e66a60e2367bd543946467a94e4a79 /js/toy-llm/app.js
parent: 82441bcfd21efcd2f2b1ce7964b0401996fc0551 (diff)
download: tour-2cf613dd027eccbfd5cb9cf0daabd6b6e2fa67f9.tar.gz
1 files changed, 46 insertions, 0 deletions
diff --git a/js/toy-llm/app.js b/js/toy-llm/app.js
new file mode 100644
index 0000000..2e31259
--- /dev/null
+++ b/js/toy-llm/app.js
@@ -0,0 +1,46 @@
+class ToyLanguageModel {
+    constructor(vocabulary) {
+        this.vocabulary = vocabulary;
+    }
+
+    tokenize(text) {
+        // Tokenization: split by spaces
+        return text.split(' ');
+    }
+
+    detokenize(tokens) {
+        // Detokenization: join with spaces
+        return tokens.join(' ');
+    }
+
+    embed(token) {
+        // Embedding: map each token to a unique integer
+        return this.vocabulary.indexOf(token);
+    }
+
+    predictNextToken(context) {
+        // Prediction: return a random token
+        const randomIndex = Math.floor(Math.random() * this.vocabulary.length);
+        return this.vocabulary[randomIndex];
+    }
+
+    generateText(initialText, numTokensToGenerate) {
+        const tokens = this.tokenize(initialText);
+        const generatedTokens = Array.from({ length: numTokensToGenerate }, (_, i) => {
+            const context = tokens.slice(-5); // Use last 5 tokens as context
+            const nextToken = this.predictNextToken(context);
+            tokens.push(nextToken);
+            return nextToken;
+        });
+        return this.detokenize(generatedTokens);
+    }
+}
+
+// Define vocabulary
+const vocabulary = ['the', 'quick', 'brown', 'fox', 'jumps', 'over', 'lazy', 'dog'];
+
+const model = new ToyLanguageModel(vocabulary);
+const initialText = 'the quick brown';
+const numTokensToGenerate = 5;
+const generatedText = model.generateText(initialText, numTokensToGenerate);
+console.log(generatedText);
\ No newline at end of file
author	elioat <hi@eli.li>	2024-03-06 13:17:05 -0500
committer	elioat <hi@eli.li>	2024-03-06 13:17:05 -0500
commit	2cf613dd027eccbfd5cb9cf0daabd6b6e2fa67f9 (patch)
tree	5384ae6cd6e66a60e2367bd543946467a94e4a79 /js/toy-llm/app.js
parent	82441bcfd21efcd2f2b1ce7964b0401996fc0551 (diff)
download	tour-2cf613dd027eccbfd5cb9cf0daabd6b6e2fa67f9.tar.gz