diff options
Diffstat (limited to 'js/fsa-tokenizer.js')
-rw-r--r-- | js/fsa-tokenizer.js | 13 |
1 files changed, 11 insertions, 2 deletions
diff --git a/js/fsa-tokenizer.js b/js/fsa-tokenizer.js index 6ff4ef7..8884894 100644 --- a/js/fsa-tokenizer.js +++ b/js/fsa-tokenizer.js @@ -42,8 +42,11 @@ const TokenizerFSA = (() => { return { state: states.START, token: '' }; default: + addToken(token, tokens); return { state: states.START, token: '' }; } + // Safety net - this shouldn't be reached unless something is wrong with one of the cases + return { state: states.START, token: '' }; // TODO: I've got a sneak suspicion this is being reached }; // Tokenize the entire input text @@ -68,6 +71,12 @@ const TokenizerFSA = (() => { })(); // example usage -const text = "Hello, world! 123"; +const text = "Oh my goodness! What an enormous banana, there must be 11 of them on that tr33!"; const tokens = TokenizerFSA.tokenize(text); -console.log(tokens); // Ought to output: ['Hello', ',', 'world', '!', '123'] +console.log(tokens); + +// Output ought to be: +// [ +// "Oh", "my", "goodness", "What", "an", "enormous", "banana", "there", "must", "be", "11", "of", "them", "on", +// "that", "tr", "33", "!" +// ] |