From 10c3ab626941d9c1ec69a2921696f4b7d0c2a6ac Mon Sep 17 00:00:00 2001 From: Andreas Rumpf Date: Mon, 16 Oct 2023 00:01:33 +0200 Subject: NIR: store sizes, alignments and offsets in the type graph; beginning… (#22822) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …s of a patent-pending new VM --- compiler/nir/stringcases.nim | 200 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100644 compiler/nir/stringcases.nim (limited to 'compiler/nir/stringcases.nim') diff --git a/compiler/nir/stringcases.nim b/compiler/nir/stringcases.nim new file mode 100644 index 000000000..9417d613d --- /dev/null +++ b/compiler/nir/stringcases.nim @@ -0,0 +1,200 @@ +# +# +# The Nim Compiler +# (c) Copyright 2023 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## included from ast2ir.nim + +#[ + +case s +of "abc", "abbd": + echo 1 +of "hah": + echo 2 +of "gah": + echo 3 + +# we produce code like this: + +if s[0] <= 'a': + if s == "abc: goto L1 + elif s == "abbd": goto L1 +else: + if s[2] <= 'h': + if s == "hah": goto L2 + elif s == "gah": goto L3 +goto afterCase + +L1: + echo 1 + goto afterCase +L2: + echo 2 + goto afterCase +L3: + echo 3 + goto afterCase + +afterCase: ... + +]# + +# We split the set of strings into 2 sets of roughly the same size. +# The condition used for splitting is a (position, char) tuple. +# Every string of length > position for which s[position] <= char is in one +# set else it is in the other set. + +from sequtils import addUnique + +type + Key = (LitId, LabelId) + +proc splitValue(strings: BiTable[string]; a: openArray[Key]; position: int): (char, float) = + var cand: seq[char] = @[] + for t in items a: + let s = strings[t[0]] + if s.len > position: cand.addUnique s[position] + + result = ('\0', -1.0) + for disc in items cand: + var hits = 0 + for t in items a: + let s = strings[t[0]] + if s.len > position and s[position] <= disc: + inc hits + # the split is the better, the more `hits` is close to `a.len / 2`: + let grade = 100000.0 - abs(hits.float - a.len.float / 2.0) + if grade > result[1]: + result = (disc, grade) + +proc tryAllPositions(strings: BiTable[string]; a: openArray[Key]): (char, int) = + var m = 0 + for t in items a: + m = max(m, strings[t[0]].len) + + result = ('\0', -1) + var best = -1.0 + for i in 0 ..< m: + let current = splitValue(strings, a, i) + if current[1] > best: + best = current[1] + result = (current[0], i) + +type + SearchKind = enum + LinearSearch, SplitSearch + SearchResult* = object + case kind: SearchKind + of LinearSearch: + a: seq[Key] + of SplitSearch: + span: int + best: (char, int) + +proc emitLinearSearch(strings: BiTable[string]; a: openArray[Key]; dest: var seq[SearchResult]) = + var d = SearchResult(kind: LinearSearch, a: @[]) + for x in a: d.a.add x + dest.add d + +proc split(strings: BiTable[string]; a: openArray[Key]; dest: var seq[SearchResult]) = + if a.len <= 4: + emitLinearSearch strings, a, dest + else: + let best = tryAllPositions(strings, a) + var groupA: seq[Key] = @[] + var groupB: seq[Key] = @[] + for t in items a: + let s = strings[t[0]] + if s.len > best[1] and s[best[1]] <= best[0]: + groupA.add t + else: + groupB.add t + if groupA.len == 0 or groupB.len == 0: + emitLinearSearch strings, a, dest + else: + let toPatch = dest.len + dest.add SearchResult(kind: SplitSearch, span: 1, best: best) + split strings, groupA, dest + split strings, groupB, dest + let dist = dest.len - toPatch + assert dist > 0 + dest[toPatch].span = dist + +proc toProblemDescription(c: var ProcCon; n: PNode): (seq[Key], LabelId) = + result = (@[], newLabels(c.labelGen, n.len)) + assert n.kind == nkCaseStmt + for i in 1..