about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--py/fsa-tokenizer.py10
1 files changed, 7 insertions, 3 deletions
diff --git a/py/fsa-tokenizer.py b/py/fsa-tokenizer.py
index 11e792f..2d73693 100644
--- a/py/fsa-tokenizer.py
+++ b/py/fsa-tokenizer.py
@@ -64,6 +64,10 @@ class TokenizerFSA:
 
 # Example usage
 tokenizer = TokenizerFSA()
-text = "Hello, world! 123"
-tokens = tokenizer.tokenize(text)
-print(tokens)  # Output: ['Hello', ',', 'world', '!', '123']
+# text = "Hello, world! 123"
+# tokens = tokenizer.tokenize(text)
+# print(tokens)  # Output: ['Hello', ',', 'world', '!', '123']
+
+t = "this is a test l33t, banana, banana! Bang? Wh00T2? We hope;; 12396 233,973,000"
+tt = tokenizer.tokenize(t)
+print(tt)
\ No newline at end of file
<garbeam@wmii.de> 2006-07-13 10:04:50 +0200 committer Anselm R. Garbe <garbeam@wmii.de> 2006-07-13 10:04:50 +0200 updated man page' href='/acidbong/suckless/dwm/commit/dwm.1?h=4.3&id=4ae1105577e4d58271c08243755b55330ed7ea42'>4ae1105 ^
e6cbe9c ^




































1076f2b


e6cbe9c ^









6458d72 ^
e6cbe9c ^


72655f0 ^
































1549faf ^
































0e5c819 ^














e6cbe9c ^



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148