about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--py/fsa-tokenizer.py10
1 files changed, 7 insertions, 3 deletions
diff --git a/py/fsa-tokenizer.py b/py/fsa-tokenizer.py
index 11e792f..2d73693 100644
--- a/py/fsa-tokenizer.py
+++ b/py/fsa-tokenizer.py
@@ -64,6 +64,10 @@ class TokenizerFSA:
 
 # Example usage
 tokenizer = TokenizerFSA()
-text = "Hello, world! 123"
-tokens = tokenizer.tokenize(text)
-print(tokens)  # Output: ['Hello', ',', 'world', '!', '123']
+# text = "Hello, world! 123"
+# tokens = tokenizer.tokenize(text)
+# print(tokens)  # Output: ['Hello', ',', 'world', '!', '123']
+
+t = "this is a test l33t, banana, banana! Bang? Wh00T2? We hope;; 12396 233,973,000"
+tt = tokenizer.tokenize(t)
+print(tt)
\ No newline at end of file