diff options
author | bptato <nincsnevem662@gmail.com> | 2022-12-26 18:50:20 +0100 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2022-12-26 18:50:20 +0100 |
commit | 636ddbb788a4a4dcd4dc4bc247159b6bb191ef8a (patch) | |
tree | 51a262f8455922718b0142e17ab76dee08c042d3 /src/utils | |
parent | e694c62b4630a8373ff324b4d3480c067220e480 (diff) | |
download | chawan-636ddbb788a4a4dcd4dc4bc247159b6bb191ef8a.tar.gz |
DOM: implement many new methods
Diffstat (limited to 'src/utils')
-rw-r--r-- | src/utils/twtstr.nim | 56 |
1 files changed, 56 insertions, 0 deletions
diff --git a/src/utils/twtstr.nim b/src/utils/twtstr.nim index 48ea54f9..988f3ad5 100644 --- a/src/utils/twtstr.nim +++ b/src/utils/twtstr.nim @@ -670,6 +670,62 @@ func isValidNonZeroInt*(str: string): bool = return false true +# https://www.w3.org/TR/xml/#NT-Name +const NameStartCharRanges = [ + (0xC0, 0xD6), + (0xD8, 0xF6), + (0xF8, 0x2FF), + (0x370, 0x37D), + (0x37F, 0x1FFF), + (0x200C, 0x200D), + (0x2070, 0x218F), + (0x2C00, 0x2FEF), + (0x3001, 0xD7FF), + (0xF900, 0xFDCF), + (0xFDF0, 0xFFFD), + (0x10000, 0xEFFFF) +] +const NameCharRanges = [ # + NameStartCharRanges + (0xB7, 0xB7), + (0x0300, 0x036F), + (0x203F, 0x2040) +] +const NameStartCharAscii = {':', '_'} + AsciiAlpha +const NameCharAscii = NameStartCharAscii + {'-', '.'} + AsciiDigit +func matchNameProduction*(str: string): bool = + if str.len == 0: + return true + # NameStartChar + var i = 0 + var r: Rune + if str[i] in Ascii: + if str[i] notin NameStartCharAscii: + return false + inc i + else: + fastRuneAt(str, i, r) + if binarySearch(NameStartCharRanges, int32(r), (x, y) => cmp(x[0], y)) == -1: + return false + # NameChar + while i < str.len: + if str[i] in Ascii: + if str[i] notin NameCharAscii: + return false + inc i + else: + fastRuneAt(str, i, r) + if binarySearch(NameStartCharRanges, int32(r), (x, y) => cmp(x[0], y)) == -1: + if binarySearch(NameCharRanges, int32(r), (x, y) => cmp(x[0], y)) == -1: + return false + return true + +func utf16Len*(s: string): int = + for r in s.runes: + if cast[uint32](r) < 0x10000: # ucs-2 + result += 1 + else: # surrogate + result += 2 + proc expandPath*(path: string): string = if path.len == 0: return path |