blob: 2e31259f2c01cdd44074bd8b0d51924626cb91de (
plain) (
tree)
|
|
class ToyLanguageModel {
constructor(vocabulary) {
this.vocabulary = vocabulary;
}
tokenize(text) {
// Tokenization: split by spaces
return text.split(' ');
}
detokenize(tokens) {
// Detokenization: join with spaces
return tokens.join(' ');
}
embed(token) {
// Embedding: map each token to a unique integer
return this.vocabulary.indexOf(token);
}
predictNextToken(context) {
// Prediction: return a random token
const randomIndex = Math.floor(Math.random() * this.vocabulary.length);
return this.vocabulary[randomIndex];
}
generateText(initialText, numTokensToGenerate) {
const tokens = this.tokenize(initialText);
const generatedTokens = Array.from({ length: numTokensToGenerate }, (_, i) => {
const context = tokens.slice(-5); // Use last 5 tokens as context
const nextToken = this.predictNextToken(context);
tokens.push(nextToken);
return nextToken;
});
return this.detokenize(generatedTokens);
}
}
// Define vocabulary
const vocabulary = ['the', 'quick', 'brown', 'fox', 'jumps', 'over', 'lazy', 'dog'];
const model = new ToyLanguageModel(vocabulary);
const initialText = 'the quick brown';
const numTokensToGenerate = 5;
const generatedText = model.generateText(initialText, numTokensToGenerate);
console.log(generatedText);
|