about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorelioat <elioat@tilde.institute>2024-06-23 15:52:38 -0400
committerelioat <elioat@tilde.institute>2024-06-23 15:52:38 -0400
commit526e126d16e7fe2490d21519df3d6c41cf6024f7 (patch)
tree1c903de1cd111d1b8f833f3d25a21417ff21f7bf
parent2528d0624b22a956c4da40c77ce5ba1c65f6501e (diff)
downloadtour-526e126d16e7fe2490d21519df3d6c41cf6024f7.tar.gz
*
-rw-r--r--js/fsa-tokenizer.js13
1 files changed, 11 insertions, 2 deletions
diff --git a/js/fsa-tokenizer.js b/js/fsa-tokenizer.js
index 6ff4ef7..8884894 100644
--- a/js/fsa-tokenizer.js
+++ b/js/fsa-tokenizer.js
@@ -42,8 +42,11 @@ const TokenizerFSA = (() => {
         return { state: states.START, token: '' };
 
       default:
+        addToken(token, tokens);
         return { state: states.START, token: '' };
     }
+    // Safety net - this shouldn't be reached unless something is wrong with one of the cases
+    return { state: states.START, token: '' }; // TODO: I've got a sneak suspicion this is being reached
   };
 
   // Tokenize the entire input text
@@ -68,6 +71,12 @@ const TokenizerFSA = (() => {
 })();
 
 // example usage
-const text = "Hello, world! 123";
+const text = "Oh my goodness! What an enormous banana, there must be 11 of them on that tr33!";
 const tokens = TokenizerFSA.tokenize(text);
-console.log(tokens);  // Ought to output: ['Hello', ',', 'world', '!', '123']
+console.log(tokens);  
+
+// Output ought to be:
+// [
+//   "Oh", "my", "goodness", "What", "an", "enormous", "banana", "there", "must", "be", "11", "of", "them", "on",
+//   "that", "tr", "33", "!"
+// ]