about summary refs log tree commit diff stats
path: root/src/html
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2023-06-23 14:44:50 +0200
committerbptato <nincsnevem662@gmail.com>2023-06-23 14:45:40 +0200
commit966ca6213c2902778ad28a673c272713ebd0a132 (patch)
treee8d7b5743aae25e4aafe74b7e2c2ac47a4109a0b /src/html
parent5d895934d4a89bb2efafc0de8e2009d880c7cfe0 (diff)
downloadchawan-966ca6213c2902778ad28a673c272713ebd0a132.tar.gz
radixtree: optimize searching
Diffstat (limited to 'src/html')
-rw-r--r--src/html/htmltokenizer.nim34
1 files changed, 15 insertions, 19 deletions
diff --git a/src/html/htmltokenizer.nim b/src/html/htmltokenizer.nim
index 5a2c16bc..a1776820 100644
--- a/src/html/htmltokenizer.nim
+++ b/src/html/htmltokenizer.nim
@@ -8,6 +8,7 @@ import unicode
 import html/entity
 import html/tags
 import encoding/decoderstream
+import utils/opt
 import utils/radixtree
 import utils/twtstr
 
@@ -1429,25 +1430,20 @@ iterator tokenize*(tokenizer: var Tokenizer): Token =
       when nimvm:
         eprint "Cannot evaluate character references at compile time"
       else:
-        var buf = ""
-        var node = entityMap
-        var value = none(string) # last value
-        #TODO interfacing with RadixNode is suffering
-        # plus this doesn't look very efficient either
-        while not tokenizer.atEof:
-          let r = tokenizer.consume()
-          buf &= r
-          if not node.hasPrefix(buf):
-            tokenizer.reconsume()
-            break
-          let prevnode = node
-          node = node{buf}
-          if node != prevnode:
-            buf = ""
-            if node.value.issome:
-              value = node.value
-          tokenizer.tmp &= r
-        if value.issome:
+        var tokenizerp = addr tokenizer
+        var lasti = 0
+        let value = entityMap.find(proc(s: var string): bool =
+          if tokenizerp[].atEof:
+            return false
+          let rs = $tokenizerp[].consume()
+          lasti = tokenizerp[].tmp.len
+          tokenizerp[].tmp &= rs
+          s &= rs
+          return true
+        )
+        tokenizer.reconsume()
+        tokenizer.tmp.setLen(lasti)
+        if value.isOk:
           if consumed_as_an_attribute and tokenizer.tmp[^1] != ';' and peek_char in {'='} + AsciiAlpha:
             flush_code_points_consumed_as_a_character_reference
             switch_state tokenizer.rstate