about summary refs log tree commit diff stats
path: root/py/fsa-tokenizer.py
diff options
context:
space:
mode:
authorelioat <elioat@tilde.institute>2024-06-23 19:51:51 -0400
committerelioat <elioat@tilde.institute>2024-06-23 19:51:51 -0400
commit565796ff19a25fef7111d2be5ab3fbb86c84213b (patch)
tree2c0eb28be3782c7823eba26ce3ce28189e095d16 /py/fsa-tokenizer.py
parentfe64d484a8278ce12f352809f71ba0c659e0e9db (diff)
downloadtour-565796ff19a25fef7111d2be5ab3fbb86c84213b.tar.gz
*
Diffstat (limited to 'py/fsa-tokenizer.py')
-rw-r--r--py/fsa-tokenizer.py10
1 files changed, 7 insertions, 3 deletions
diff --git a/py/fsa-tokenizer.py b/py/fsa-tokenizer.py
index 11e792f..2d73693 100644
--- a/py/fsa-tokenizer.py
+++ b/py/fsa-tokenizer.py
@@ -64,6 +64,10 @@ class TokenizerFSA:
 
 # Example usage
 tokenizer = TokenizerFSA()
-text = "Hello, world! 123"
-tokens = tokenizer.tokenize(text)
-print(tokens)  # Output: ['Hello', ',', 'world', '!', '123']
+# text = "Hello, world! 123"
+# tokens = tokenizer.tokenize(text)
+# print(tokens)  # Output: ['Hello', ',', 'world', '!', '123']
+
+t = "this is a test l33t, banana, banana! Bang? Wh00T2? We hope;; 12396 233,973,000"
+tt = tokenizer.tokenize(t)
+print(tt)
\ No newline at end of file