diff options
-rw-r--r-- | py/fsa-tokenizer.py | 10 |
1 files changed, 7 insertions, 3 deletions
diff --git a/py/fsa-tokenizer.py b/py/fsa-tokenizer.py index 11e792f..2d73693 100644 --- a/py/fsa-tokenizer.py +++ b/py/fsa-tokenizer.py @@ -64,6 +64,10 @@ class TokenizerFSA: # Example usage tokenizer = TokenizerFSA() -text = "Hello, world! 123" -tokens = tokenizer.tokenize(text) -print(tokens) # Output: ['Hello', ',', 'world', '!', '123'] +# text = "Hello, world! 123" +# tokens = tokenizer.tokenize(text) +# print(tokens) # Output: ['Hello', ',', 'world', '!', '123'] + +t = "this is a test l33t, banana, banana! Bang? Wh00T2? We hope;; 12396 233,973,000" +tt = tokenizer.tokenize(t) +print(tt) \ No newline at end of file |