diff options
author | bptato <nincsnevem662@gmail.com> | 2024-02-09 17:52:41 +0100 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2024-02-09 17:54:32 +0100 |
commit | 7badcc1f952c085c6a73e2dbe3aa5a460d7be8b0 (patch) | |
tree | 0b6cdc9c1895329946df9f21508ee1539c2cfc57 | |
parent | 431fd101705146f2d4baaa1a4a139adf356188ab (diff) | |
download | chawan-7badcc1f952c085c6a73e2dbe3aa5a460d7be8b0.tar.gz |
twtstr: misc refactorings
* move out half width <-> full width converters * snake_case -> camelCase * improve toScreamingSnakeCase slicing
-rw-r--r-- | adapter/protocol/dirlist.nim | 2 | ||||
-rw-r--r-- | res/widthconv.json | 100 | ||||
-rw-r--r-- | res/widthconvmap.tab | 93 | ||||
-rw-r--r-- | src/css/values.nim | 2 | ||||
-rw-r--r-- | src/layout/engine.nim | 1 | ||||
-rw-r--r-- | src/local/container.nim | 2 | ||||
-rw-r--r-- | src/utils/twtstr.nim | 165 | ||||
-rw-r--r-- | src/utils/widthconv.nim | 126 |
8 files changed, 247 insertions, 244 deletions
diff --git a/adapter/protocol/dirlist.nim b/adapter/protocol/dirlist.nim index 33dcc6ab..d0933fed 100644 --- a/adapter/protocol/dirlist.nim +++ b/adapter/protocol/dirlist.nim @@ -54,7 +54,7 @@ func makeDirlist*(items: seq[DirlistItem]): string = line &= ' ' line &= htmlEscape(item.modified) if item.t == ITEM_FILE: - line &= ' ' & convert_size(item.nsize) + line &= ' ' & convertSize(item.nsize) elif item.t == ITEM_LINK: line &= " -> " & htmlEscape(item.linkto) outs &= line & '\n' diff --git a/res/widthconv.json b/res/widthconv.json deleted file mode 100644 index 297c55db..00000000 --- a/res/widthconv.json +++ /dev/null @@ -1,100 +0,0 @@ -{ - "!": "!", - "\"": """, - "#": "#", - "$": "$", - "%": "%", - "&": "&", - "'": "'", - "(": "(", - ")": ")", - "*": "*", - "+": "+", - ",": ",", - "-": "-", - ".": ".", - "/": "/", - - "0": "0", - "1": "1", - "2": "2", - "3": "3", - "4": "4", - "5": "5", - "6": "6", - "7": "7", - "8": "8", - "9": "9", - ":": ":", - ";": ";", - "<": "<", - "=": "=", - ">": ">", - "?": "?", - - "⦆": "⦆", - "。": "。", - "「": "「", - "」": "」", - "、": "、", - "・": "・", - "ヲ": ["ヲ", "を"], - "ァ": ["ァ", "ぁ"], - "ィ": ["ィ", "ぃ"], - "ゥ": ["ゥ", "ぅ"], - "ェ": ["ェ", "ぇ"], - "ォ": ["ォ", "ぉ"], - "ャ": ["ャ", "ゃ"], - "ュ": ["ュ", "ゅ"], - "ョ": ["ョ", "ょ"], - "ッ": ["ッ", "っ"], - - "ー": "ー", - "ア": ["ア", "あ"], - "イ": ["イ", "い"], - "ウ": ["ウ", "う"], - "エ": ["エ", "え"], - "オ": ["オ", "お"], - "カ": ["カ", "か"], - "キ": ["キ", "き"], - "ク": ["ク", "く"], - "ケ": ["ケ", "け"], - "コ": ["コ", "こ"], - "サ": ["サ", "さ"], - "シ": ["シ", "し"], - "ス": ["ス", "す"], - "セ": ["セ", "せ"], - "ソ": ["ソ", "そ"], - - "タ": ["タ", "た"], - "チ": ["チ", "ち"], - "ツ": ["ツ", "つ"], - "テ": ["テ", "て"], - "ト": ["ト", "と"], - "ナ": ["ナ", "な"], - "ニ": ["ニ", "に"], - "ヌ": ["ヌ", "ぬ"], - "ネ": ["ネ", "ね"], - "ノ": ["ノ", "の"], - "ハ": ["ハ", "は"], - "ヒ": ["ヒ", "ひ"], - "フ": ["フ", "ふ"], - "ヘ": ["ヘ", "へ"], - "ホ": ["ホ", "ほ"], - "マ": ["マ", "ま"], - - "ミ": ["ミ", "み"], - "ム": ["ム", "む"], - "メ": ["メ", "め"], - "モ": ["モ", "も"], - "ヤ": ["ヤ", "や"], - "ユ": ["ユ", "ゆ"], - "ヨ": ["ヨ", "よ"], - "ラ": ["ラ", "ら"], - "リ": ["リ", "り"], - "ル": ["ル", "る"], - "レ": ["レ", "れ"], - "ロ": ["ロ", "ろ"], - "ワ": ["ワ", "わ"], - "ン": ["ン", "ん"] -} diff --git a/res/widthconvmap.tab b/res/widthconvmap.tab new file mode 100644 index 00000000..34183e25 --- /dev/null +++ b/res/widthconvmap.tab @@ -0,0 +1,93 @@ +! ! +\ " +# # +$ $ +% % +& & +' ' +( ( +) ) +* * ++ + +, , +- - +. . +/ / +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +: : +; ; +< < += = +> > +? ? +⦆ ⦆ +。 。 +「 「 +」 」 +、 、 +・ ・ +ヲ ヲ を +ァ ァ ぁ +ィ ィ ぃ +ゥ ゥ ぅ +ェ ェ ぇ +ォ ォ ぉ +ャ ャ ゃ +ュ ュ ゅ +ョ ョ ょ +ッ ッ っ +ー ー +ア ア あ +イ イ い +ウ ウ う +エ エ え +オ オ お +カ カ か +キ キ き +ク ク く +ケ ケ け +コ コ こ +サ サ さ +シ シ し +ス ス す +セ セ せ +ソ ソ そ +タ タ た +チ チ ち +ツ ツ つ +テ テ て +ト ト と +ナ ナ な +ニ ニ に +ヌ ヌ ぬ +ネ ネ ね +ノ ノ の +ハ ハ は +ヒ ヒ ひ +フ フ ふ +ヘ ヘ へ +ホ ホ ほ +マ マ ま +ミ ミ み +ム ム む +メ メ め +モ モ も +ヤ ヤ や +ユ ユ ゆ +ヨ ヨ よ +ラ ラ ら +リ リ り +ル ル る +レ レ れ +ロ ロ ろ +ワ ワ わ +ン ン ん diff --git a/src/css/values.nim b/src/css/values.nim index ddde8953..0bf57675 100644 --- a/src/css/values.nim +++ b/src/css/values.nim @@ -502,7 +502,7 @@ func listMarker*(t: CSSListStyleType, i: int): string = of LIST_STYLE_TYPE_DISCLOSURE_CLOSED: return "▼ " # U+25BC of LIST_STYLE_TYPE_DECIMAL: return $i & ". " of LIST_STYLE_TYPE_UPPER_ROMAN: return romanNumber(i) & ". " - of LIST_STYLE_TYPE_LOWER_ROMAN: return romanNumber_lower(i) & ". " + of LIST_STYLE_TYPE_LOWER_ROMAN: return romanNumberLower(i) & ". " of LIST_STYLE_TYPE_UPPER_ALPHA: return numToBase(i, UpperAlphaMap) & ". " of LIST_STYLE_TYPE_LOWER_ALPHA: return numToBase(i, LowerAlphaMap) & ". " of LIST_STYLE_TYPE_LOWER_GREEK: return numToBase(i, LowerGreekMap) & ". " diff --git a/src/layout/engine.nim b/src/layout/engine.nim index ee2ab619..b4d61864 100644 --- a/src/layout/engine.nim +++ b/src/layout/engine.nim @@ -11,6 +11,7 @@ import layout/layoutunit import utils/luwrap import utils/strwidth import utils/twtstr +import utils/widthconv type LayoutState = ref object diff --git a/src/local/container.nim b/src/local/container.nim index edb1f992..2bf53756 100644 --- a/src/local/container.nim +++ b/src/local/container.nim @@ -1298,7 +1298,7 @@ proc onload*(container: Container, res: LoadResult) = if res.bytes == -1 or res.atend: container.setLoadInfo("") elif not res.atend: - container.setLoadInfo(convert_size(res.bytes) & " loaded") + container.setLoadInfo(convertSize(res.bytes) & " loaded") if res.lines > container.numLines: container.setNumLines(res.lines) container.triggerEvent(STATUS) diff --git a/src/utils/twtstr.nim b/src/utils/twtstr.nim index 1584f3f5..80b6be65 100644 --- a/src/utils/twtstr.nim +++ b/src/utils/twtstr.nim @@ -1,10 +1,8 @@ import std/algorithm -import std/json import std/math import std/options import std/os import std/strutils -import std/tables import std/unicode when defined(posix): @@ -65,7 +63,7 @@ func toHeaderCase*(str: string): string = func toScreamingSnakeCase*(str: string): string = # input is camel case if str.len >= 1: result &= str[0].toUpperAscii() - for c in str[1..^1]: + for c in str.toOpenArray(1, str.high): if c in AsciiUpperAlpha: result &= '_' result &= c @@ -245,14 +243,15 @@ func beforeLast*(s: string, c: set[char], n = 1): string = func beforeLast*(s: string, c: char, n = 1): string = s.beforeLast({c}, n) -proc c_sprintf(buf, fm: cstring): cint {.header: "<stdio.h>", importc: "sprintf", varargs} +proc c_sprintf(buf, fm: cstring): cint + {.header: "<stdio.h>", importc: "sprintf", varargs} # From w3m const SizeUnit = [ cstring"b", cstring"kb", cstring"Mb", cstring"Gb", cstring"Tb", cstring"Pb", cstring"Eb", cstring"Zb", cstring"Bb", cstring"Yb" ] -func convert_size*(size: int): string = +func convertSize*(size: int): string = var sizepos = 0 var csize = float32(size) while csize >= 999.495 and sizepos < SizeUnit.len: @@ -263,10 +262,10 @@ func convert_size*(size: int): string = discard c_sprintf(cstring(result), cstring("%.3g%s"), f, SizeUnit[sizepos]) result.setLen(cstring(result).len) -func number_additive*(i: int, range: HSlice[int, int], symbols: openarray[(int, string)]): string = +func numberAdditive*(i: int, range: HSlice[int, int], + symbols: openArray[(int, string)]): string = if i notin range: return $i - var n = i var at = 0 while n > 0: @@ -275,25 +274,24 @@ func number_additive*(i: int, range: HSlice[int, int], symbols: openarray[(int, result &= symbols[at][1] continue inc at - return result const romanNumbers = [ - (1000, "M"), (900, "CM"), (500, "D"), (400, "CD"), (100, "C"), (90, "XC"), (50, "L"), - (40, "XL"), (10, "X"), (9, "IX"), (5, "V"), (4, "IV"), (1, "I") + (1000, "M"), (900, "CM"), (500, "D"), (400, "CD"), (100, "C"), (90, "XC"), + (50, "L"), (40, "XL"), (10, "X"), (9, "IX"), (5, "V"), (4, "IV"), (1, "I") ] -const romanNumbers_lower = block: +const romanNumbersLower = block: var res: seq[(int, string)] for (n, s) in romanNumbers: res.add((n, s.toLowerAscii())) res func romanNumber*(i: int): string = - return number_additive(i, 1..3999, romanNumbers) + return numberAdditive(i, 1..3999, romanNumbers) -func romanNumber_lower*(i: int): string = - return number_additive(i, 1..3999, romanNumbers_lower) +func romanNumberLower*(i: int): string = + return numberAdditive(i, 1..3999, romanNumbersLower) func japaneseNumber*(i: int): string = if i == 0: @@ -445,7 +443,7 @@ func parseUInt32*(s: string): Option[uint32] = #TODO not sure where this algorithm is from... func parseFloat64*(s: string): float64 = - var sign = 1 + var sign = 1f64 var t = 1 var d = 0 var integer: float64 = 0 @@ -454,7 +452,7 @@ func parseFloat64*(s: string): float64 = var i = 0 if i < s.len and s[i] == '-': - sign = -1 + sign = -1f64 inc i elif i < s.len and s[i] == '+': inc i @@ -485,17 +483,7 @@ func parseFloat64*(s: string): float64 = e += float64(decValue(s[i])) inc i - return float64(sign) * (integer + f * pow(10, float64(-d))) * pow(10, (float64(t) * e)) - -func isSurrogate*(r: Rune): bool = int32(r) in 0xD800..0xDFFF -func isNonCharacter*(r: Rune): bool = - let n = int32(r) - n in 0xFDD0..0xFDEF or - n in [0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, - 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, - 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, 0xBFFFF, - 0xCFFFE, 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, - 0x10FFFE, 0x10FFFF] + return sign * (integer + f * pow(10, float64(-d))) * pow(10, (float64(t) * e)) const ControlPercentEncodeSet* = Controls + NonAscii const FragmentPercentEncodeSet* = ControlPercentEncodeSet + @@ -517,7 +505,8 @@ else: const LocalPathPercentEncodeSet* = Ascii - AsciiAlpha - AsciiDigit - {'.', '/'} -proc percentEncode*(append: var string, c: char, set: set[char], spaceAsPlus = false) {.inline.} = +proc percentEncode*(append: var string, c: char, set: set[char], + spaceAsPlus = false) {.inline.} = if spaceAsPlus and c == ' ': append &= '+' elif c notin set: @@ -526,11 +515,13 @@ proc percentEncode*(append: var string, c: char, set: set[char], spaceAsPlus = f append &= '%' append.pushHex(c) -proc percentEncode*(append: var string, s: string, set: set[char], spaceAsPlus = false) {.inline.} = +proc percentEncode*(append: var string, s: string, set: set[char], + spaceAsPlus = false) {.inline.} = for c in s: append.percentEncode(c, set, spaceAsPlus) -func percentEncode*(c: char, set: set[char], spaceAsPlus = false): string {.inline.} = +func percentEncode*(c: char, set: set[char], spaceAsPlus = false): string + {.inline.} = result.percentEncode(c, set, spaceAsPlus) func percentEncode*(s: string, set: set[char], spaceAsPlus = false): string = @@ -583,8 +574,7 @@ func join*(ss: openarray[string], sep: char): string = result &= sep result &= ss[i] -proc passRealloc*(opaque: pointer, p: pointer, size: csize_t): pointer - {.cdecl.} = +proc passRealloc*(opaque, p: pointer, size: csize_t): pointer {.cdecl.} = return realloc(p, size) # https://www.w3.org/TR/xml/#NT-Name @@ -652,8 +642,9 @@ func matchQNameProduction*(s: string): bool = return s.matchNameProduction() func utf16Len*(s: string): int = + result = 0 for r in s.runes: - if cast[uint32](r) < 0x10000: # ucs-2 + if uint32(r) < 0x10000: # ucs-2 result += 1 else: # surrogate result += 2 @@ -722,111 +713,3 @@ proc makeCRLF*(s: string): string = result &= '\n' else: result &= s[i] - -const CanHaveDakuten = ("かきくけこさしすせそたちつてとはひふへほカキクケコ" & - "サシスセソタチツテトハヒフヘホ").toRunes() - -const CanHaveHandakuten = "はひふへほハヒフヘホ".toRunes() - -const HasDakuten = ("がぎぐげござじずぜぞだぢづでどばびぶべぼガギグゲゴ" & - "ザジゼゾダヂヅデドバビブベボ").toRunes() - -const HasHanDakuten = "ぱぴぷぺぽパピプペポ".toRunes() - -# in unicode, char + 1 is dakuten and char + 2 handakuten - -const HalfDakuten = Rune(0xFF9E) # half-width dakuten -const HalfHanDakuten = Rune(0xFF9F) - -func dakuten(r: Rune): Rune = - if r in CanHaveDakuten: - return Rune(int32(r) + 1) - return r - -func handakuten(r: Rune): Rune = - if r in CanHaveHandakuten: - return Rune(int32(r) + 2) - return r - -func nodakuten(r: Rune): Rune = - return Rune(int32(r) - 1) - -func nohandakuten(r: Rune): Rune = - return Rune(int32(r) - 2) - -# Halfwidth to fullwidth & vice versa -const widthconv = staticRead"res/widthconv.json" -proc genHalfWidthTable(): Table[Rune, Rune] = - let widthconvjson = parseJson(widthconv) - for k, v in widthconvjson: - if v.kind == JString: - result[v.getStr().toRunes()[0]] = k.toRunes()[0] - else: - for s in v: - result[s.getStr().toRunes()[0]] = k.toRunes()[0] - -proc genFullWidthTable(): Table[Rune, Rune] = - let widthconvjson = parseJson(widthconv) - for k, v in widthconvjson: - if v.kind == JString: - result[k.toRunes()[0]] = v.getStr().toRunes()[0] - else: - result[k.toRunes()[0]] = v[0].getStr().toRunes()[0] - -const halfwidthtable = genHalfWidthTable() -const fullwidthtable = genFullWidthTable() - -func halfwidth(r: Rune): Rune = - return halfwidthtable.getOrDefault(r, r) - -func halfwidth*(s: string): string = - for r in s.runes: - case r - of HasDakuten: - result.add(halfwidth(r.nodakuten())) - result.add(HalfDakuten) - of HasHanDakuten: - result.add(halfwidth(r.nohandakuten())) - result.add(HalfHanDakuten) - else: - result.add(halfwidth(r)) - -func fullwidth(r: Rune): Rune = - return fullwidthtable.getOrDefault(r, r) - -proc fullwidth(s: seq[Rune]): seq[Rune] = - for r in s: - if r == HalfDakuten: #dakuten - if result.len > 0: - result[^1] = result[^1].dakuten() - else: - result.add(r) - elif r == HalfHanDakuten: #handakuten - if result.len > 0: - result[^1] = result[^1].handakuten() - else: - result.add(r) - else: - result.add(fullwidth(r)) - -proc fullwidth*(s: string): string = - return $fullwidth(s.toRunes()) - -const kanamap = staticRead"res/kanamap.tab" -func genFullSizeMap(): seq[(uint32, uint32)] = - result = @[] - for line in kanamap.split('\n'): - if line.len == 0: break - let rs = line.toRunes() - assert rs[1] == Rune('\t') - result.add((uint32(rs[0]), uint32(rs[2]))) -const fullSizeMap = genFullSizeMap() - -proc fullsize*(s: string): string = - result = "" - for r in s.runes(): - let i = searchInMap(fullSizeMap, uint32(r)) - if i == -1: - result &= r - else: - result &= $Rune(fullSizeMap[i][1]) diff --git a/src/utils/widthconv.nim b/src/utils/widthconv.nim new file mode 100644 index 00000000..8cf71b4c --- /dev/null +++ b/src/utils/widthconv.nim @@ -0,0 +1,126 @@ +import std/strutils +import std/unicode +import utils/map + +const CanHaveDakuten = ("かきくけこさしすせそたちつてとはひふへほカキクケコ" & + "サシスセソタチツテトハヒフヘホ").toRunes() + +const CanHaveHanDakuten = "はひふへほハヒフヘホ".toRunes() + +const HasDakuten = ("がぎぐげござじずぜぞだぢづでどばびぶべぼガギグゲゴ" & + "ザジズゼゾダヂヅデドバビブベボ").toRunes() + +const HasHanDakuten = "ぱぴぷぺぽパピプペポ".toRunes() + +# in unicode, char + 1 is dakuten and char + 2 handakuten + +const HalfDakuten = Rune(0xFF9E) # half-width dakuten +const HalfHanDakuten = Rune(0xFF9F) + +func dakuten(r: Rune): Rune = + assert r in CanHaveDakuten + return Rune(int32(r) + 1) + +func handakuten(r: Rune): Rune = + assert r in CanHaveHanDakuten + return Rune(int32(r) + 2) + +func nodakuten(r: Rune): Rune = + return Rune(int32(r) - 1) + +func nohandakuten(r: Rune): Rune = + return Rune(int32(r) - 2) + +# Halfwidth to fullwidth & vice versa +const halfFullMap = (func(): seq[tuple[half, full1, full2: Rune]] = + result = @[] + const map = staticRead"res/widthconvmap.tab" + for line in map.split('\n'): + if line == "": + break + var i = 0 + var half: Rune + fastRuneAt(line, i, half) + assert line[i] == '\t' + inc i + var full1: Rune + fastRuneAt(line, i, full1) + var full2 = Rune(0) + if i < line.len: + assert line[i] == '\t' + inc i + fastRuneAt(line, i, full2) + result.add((half, full1, full2)) +)() + +func halfwidth(r: Rune): Rune = + if r != Rune(0): # special case to avoid comparison with f2 + for (h, f1, f2) in halfFullMap: + if f1 == r or f2 == r: + return h + return r + +func halfwidth*(s: string): string = + result = "" + for r in s.runes: + case r + of HasDakuten: + result &= halfwidth(r.nodakuten()) + result &= HalfDakuten + of HasHanDakuten: + result &= halfwidth(r.nohandakuten()) + result &= HalfHanDakuten + else: + result &= halfwidth(r) + +func fullwidth(r: Rune): Rune = + if r != Rune(0): # special case to avoid comparison with f2 + for (h, f1, f2) in halfFullMap: + if h == r: + return f1 + return r + +func fullwidth*(s: string): string = + result = "" + var lastr = Rune(0) + for r in s.runes: + if lastr != Rune(0): + if r == HalfDakuten: + # flush with dakuten + result &= lastr.dakuten() + lastr = Rune(0) + continue + elif r == HalfHanDakuten: + # flush with handakuten + result &= lastr.handakuten() + lastr = Rune(0) + continue + result &= lastr + lastr = Rune(0) + let r = fullwidth(r) + if r in CanHaveDakuten: + lastr = r + else: + result &= r + if lastr != Rune(0): + # flush + result &= lastr + +const kanamap = staticRead"res/kanamap.tab" +func genFullSizeMap(): seq[(uint32, uint32)] = + result = @[] + for line in kanamap.split('\n'): + if line.len == 0: break + let rs = line.toRunes() + assert rs[1] == Rune('\t') + result.add((uint32(rs[0]), uint32(rs[2]))) +const fullSizeMap = genFullSizeMap() + +proc fullsize*(s: string): string = + result = "" + for r in s.runes(): + let i = searchInMap(fullSizeMap, uint32(r)) + if i == -1: + result &= r + else: + result &= $Rune(fullSizeMap[i][1]) |