diff options
author | elioat <elioat@tilde.institute> | 2024-06-23 15:52:38 -0400 |
---|---|---|
committer | elioat <elioat@tilde.institute> | 2024-06-23 15:52:38 -0400 |
commit | 526e126d16e7fe2490d21519df3d6c41cf6024f7 (patch) | |
tree | 1c903de1cd111d1b8f833f3d25a21417ff21f7bf | |
parent | 2528d0624b22a956c4da40c77ce5ba1c65f6501e (diff) | |
download | tour-526e126d16e7fe2490d21519df3d6c41cf6024f7.tar.gz |
*
-rw-r--r-- | js/fsa-tokenizer.js | 13 |
1 files changed, 11 insertions, 2 deletions
diff --git a/js/fsa-tokenizer.js b/js/fsa-tokenizer.js index 6ff4ef7..8884894 100644 --- a/js/fsa-tokenizer.js +++ b/js/fsa-tokenizer.js @@ -42,8 +42,11 @@ const TokenizerFSA = (() => { return { state: states.START, token: '' }; default: + addToken(token, tokens); return { state: states.START, token: '' }; } + // Safety net - this shouldn't be reached unless something is wrong with one of the cases + return { state: states.START, token: '' }; // TODO: I've got a sneak suspicion this is being reached }; // Tokenize the entire input text @@ -68,6 +71,12 @@ const TokenizerFSA = (() => { })(); // example usage -const text = "Hello, world! 123"; +const text = "Oh my goodness! What an enormous banana, there must be 11 of them on that tr33!"; const tokens = TokenizerFSA.tokenize(text); -console.log(tokens); // Ought to output: ['Hello', ',', 'world', '!', '123'] +console.log(tokens); + +// Output ought to be: +// [ +// "Oh", "my", "goodness", "What", "an", "enormous", "banana", "there", "must", "be", "11", "of", "them", "on", +// "that", "tr", "33", "!" +// ] |