diff options
author | elioat <elioat@tilde.institute> | 2024-06-23 19:51:51 -0400 |
---|---|---|
committer | elioat <elioat@tilde.institute> | 2024-06-23 19:51:51 -0400 |
commit | 565796ff19a25fef7111d2be5ab3fbb86c84213b (patch) | |
tree | 2c0eb28be3782c7823eba26ce3ce28189e095d16 /py/fsa-tokenizer.py | |
parent | fe64d484a8278ce12f352809f71ba0c659e0e9db (diff) | |
download | tour-565796ff19a25fef7111d2be5ab3fbb86c84213b.tar.gz |
*
Diffstat (limited to 'py/fsa-tokenizer.py')
-rw-r--r-- | py/fsa-tokenizer.py | 10 |
1 files changed, 7 insertions, 3 deletions
diff --git a/py/fsa-tokenizer.py b/py/fsa-tokenizer.py index 11e792f..2d73693 100644 --- a/py/fsa-tokenizer.py +++ b/py/fsa-tokenizer.py @@ -64,6 +64,10 @@ class TokenizerFSA: # Example usage tokenizer = TokenizerFSA() -text = "Hello, world! 123" -tokens = tokenizer.tokenize(text) -print(tokens) # Output: ['Hello', ',', 'world', '!', '123'] +# text = "Hello, world! 123" +# tokens = tokenizer.tokenize(text) +# print(tokens) # Output: ['Hello', ',', 'world', '!', '123'] + +t = "this is a test l33t, banana, banana! Bang? Wh00T2? We hope;; 12396 233,973,000" +tt = tokenizer.tokenize(t) +print(tt) \ No newline at end of file |