diff options
author | Araq <rumpf_a@web.de> | 2018-04-30 19:25:04 +0200 |
---|---|---|
committer | Araq <rumpf_a@web.de> | 2018-04-30 19:25:04 +0200 |
commit | f74f485bf7064c625b391733985be00b484a801a (patch) | |
tree | 83304d401a98d2477bc313c75ca4e3535f764d72 | |
parent | e53b49d024c01c9740a9cbedd763dc22b6c3e2c2 (diff) | |
parent | ed79201d0b9710efb9ad89ab6ecbe7eff1742516 (diff) | |
download | Nim-f74f485bf7064c625b391733985be00b484a801a.tar.gz |
Merge branch 'araq-strings-v1' into devel
30 files changed, 156 insertions, 1928 deletions
diff --git a/changelog.md b/changelog.md index 17c169d22..251aca4ab 100644 --- a/changelog.md +++ b/changelog.md @@ -83,6 +83,9 @@ - ``nil`` for strings/seqs is finally gone. Instead the default value for these is ``"" / @[]``. +- Accessing the binary zero terminator in Nim's native strings + is now invalid. Internally a Nim string still has the trailing zero for + zero-copy interoperability with ``cstring``. ### Tool changes diff --git a/doc/manual.rst b/doc/manual.rst index 636bf796b..fabcf058a 100644 --- a/doc/manual.rst +++ b/doc/manual.rst @@ -1471,7 +1471,7 @@ mysterious crashes. **Note**: The example only works because the memory is initialized to zero (``alloc0`` instead of ``alloc`` does this): ``d.s`` is thus initialized to -``nil`` which the string assignment can handle. One needs to know low level +binary zero which the string assignment can handle. One needs to know low level details like this when mixing garbage collected data with unmanaged memory. .. XXX finalizers for traced objects @@ -2512,8 +2512,8 @@ char '\\0' bool false ref or pointer type nil procedural type nil -sequence nil (*not* ``@[]``) -string nil (*not* "") +sequence ``@[]`` +string ``""`` tuple[x: A, y: B, ...] (default(A), default(B), ...) (analogous for objects) array[0..., T] [default(T), ...] @@ -4270,7 +4270,7 @@ therefore very useful for type specialization within generic code: Table[Key, Value] = object keys: seq[Key] values: seq[Value] - when not (Key is string): # nil value for strings used for optimization + when not (Key is string): # empty value for strings used for optimization deletedKeys: seq[bool] @@ -7434,8 +7434,8 @@ code generation directly, but their presence can be detected by macros. Custom pragmas are defined using templates annotated with pragma ``pragma``: .. code-block:: nim - template dbTable(name: string, table_space: string = nil) {.pragma.} - template dbKey(name: string = nil, primary_key: bool = false) {.pragma.} + template dbTable(name: string, table_space: string = "") {.pragma.} + template dbKey(name: string = "", primary_key: bool = false) {.pragma.} template dbForeignKey(t: typedesc) {.pragma.} template dbIgnore {.pragma.} diff --git a/doc/tut1.rst b/doc/tut1.rst index cbfef183e..2dc502fab 100644 --- a/doc/tut1.rst +++ b/doc/tut1.rst @@ -960,11 +960,7 @@ enforced. For example, when reading strings from binary files, they are merely a sequence of bytes. The index operation ``s[i]`` means the i-th *char* of ``s``, not the i-th *unichar*. -String variables are initialized with a special value, called ``nil``. However, -most string operations cannot deal with ``nil`` (leading to an exception being -raised) for performance reasons. It is best to use empty strings ``""`` -rather than ``nil`` as the *empty* value. But ``""`` often creates a string -object on the heap, so there is a trade-off to be made here. +String variables are initialized with the empty strings ``""``. Integers @@ -1309,11 +1305,7 @@ Example: x: seq[int] # a reference to a sequence of integers x = @[1, 2, 3, 4, 5, 6] # the @ turns the array into a sequence allocated on the heap -Sequence variables are initialized with ``nil``. However, most sequence -operations cannot deal with ``nil`` (leading to an exception being -raised) for performance reasons. Thus one should use empty sequences ``@[]`` -rather than ``nil`` as the *empty* value. But ``@[]`` creates a sequence -object on the heap, so there is a trade-off to be made here. +Sequence variables are initialized with ``@[]``. The ``for`` statement can be used with one or two variables when used with a sequence. When you use the one variable form, the variable will hold the value @@ -1355,11 +1347,9 @@ type does not matter. .. code-block:: nim :test: "nim c $1" var - fruits: seq[string] # reference to a sequence of strings that is initialized with 'nil' + fruits: seq[string] # reference to a sequence of strings that is initialized with '@[]' capitals: array[3, string] # array of strings with a fixed size - fruits = @[] # creates an empty sequence on the heap that will be referenced by 'fruits' - capitals = ["New York", "London", "Berlin"] # array 'capitals' allows assignment of only three elements fruits.add("Banana") # sequence 'fruits' is dynamically expandable during runtime fruits.add("Mango") @@ -1691,7 +1681,7 @@ rules apply: write(stdout, x(3)) # no error: A.x is called write(stdout, x("")) # no error: B.x is called - proc x*(a: int): string = nil + proc x*(a: int): string = discard write(stdout, x(3)) # ambiguous: which `x` is to call? diff --git a/examples/httpserver2.nim b/examples/httpserver2.nim index 050684d3e..1843ff967 100644 --- a/examples/httpserver2.nim +++ b/examples/httpserver2.nim @@ -107,7 +107,7 @@ proc executeCgi(server: var TServer, client: Socket, path, query: string, dataAvail = recvLine(client, buf) if buf.len == 0: break - var L = toLower(buf) + var L = toLowerAscii(buf) if L.startsWith("content-length:"): var i = len("content-length:") while L[i] in Whitespace: inc(i) @@ -205,7 +205,7 @@ proc acceptRequest(server: var TServer, client: Socket) = client.close() else: when defined(Windows): - var ext = splitFile(path).ext.toLower + var ext = splitFile(path).ext.toLowerAscii if ext == ".exe" or ext == ".cgi": # XXX: extract interpreter information here? cgi = true diff --git a/lib/impure/nre.nim b/lib/impure/nre.nim index 582f5ab3b..6058128dd 100644 --- a/lib/impure/nre.nim +++ b/lib/impure/nre.nim @@ -10,7 +10,7 @@ from pcre import nil import nre.private.util import tables -from strutils import toLower, `%` +from strutils import `%` from math import ceil import options from unicode import runeLenAt @@ -326,15 +326,15 @@ proc `$`*(pattern: RegexMatch): string = proc `==`*(a, b: Regex): bool = if not a.isNil and not b.isNil: - return a.pattern == b.pattern and - a.pcreObj == b.pcreObj and + return a.pattern == b.pattern and + a.pcreObj == b.pcreObj and a.pcreExtra == b.pcreExtra else: return system.`==`(a, b) proc `==`*(a, b: RegexMatch): bool = return a.pattern == b.pattern and - a.str == b.str + a.str == b.str # }}} # Creation & Destruction {{{ diff --git a/lib/pure/asyncnet.nim b/lib/pure/asyncnet.nim index a75f9daac..7c354151b 100644 --- a/lib/pure/asyncnet.nim +++ b/lib/pure/asyncnet.nim @@ -201,7 +201,7 @@ when defineSsl: flags: set[SocketFlag]) {.async.} = let len = bioCtrlPending(socket.bioOut) if len > 0: - var data = newStringOfCap(len) + var data = newString(len) let read = bioRead(socket.bioOut, addr data[0], len) assert read != 0 if read < 0: diff --git a/lib/pure/base64.nim b/lib/pure/base64.nim index 4b0d08292..bfb8a1666 100644 --- a/lib/pure/base64.nim +++ b/lib/pure/base64.nim @@ -52,7 +52,7 @@ template encodeInternal(s: typed, lineLen: int, newLine: string): untyped = if numLines > 0: inc(total, (numLines - 1) * newLine.len) result = newString(total) - var + var i = 0 r = 0 currLine = 0 @@ -76,7 +76,7 @@ template encodeInternal(s: typed, lineLen: int, newLine: string): untyped = currLine = 0 if i < s.len-1: - let + let a = ord(s[i]) b = ord(s[i+1]) result[r] = cb64[a shr 2] @@ -130,11 +130,11 @@ proc decode*(s: string): string = # total is an upper bound, as we will skip arbitrary whitespace: result = newString(total) - var + var i = 0 r = 0 while true: - while s[i] in Whitespace: inc(i) + while i < s.len and s[i] in Whitespace: inc(i) if i < s.len-3: let a = s[i].decodeByte diff --git a/lib/pure/collections/critbits.nim b/lib/pure/collections/critbits.nim index 34f5c5470..95fffdf88 100644 --- a/lib/pure/collections/critbits.nim +++ b/lib/pure/collections/critbits.nim @@ -74,18 +74,19 @@ proc rawInsert[T](c: var CritBitTree[T], key: string): Node[T] = var newByte = 0 block blockX: while newbyte < key.len: - if it.key[newbyte] != key[newbyte]: - newotherbits = it.key[newbyte].ord xor key[newbyte].ord + let ch = if newbyte < it.key.len: it.key[newbyte] else: '\0' + if ch != key[newbyte]: + newotherbits = ch.ord xor key[newbyte].ord break blockX inc newbyte - if it.key[newbyte] != '\0': + if newbyte < it.key.len: newotherbits = it.key[newbyte].ord else: return it while (newOtherBits and (newOtherBits-1)) != 0: newOtherBits = newOtherBits and (newOtherBits-1) newOtherBits = newOtherBits xor 255 - let ch = it.key[newByte] + let ch = if newByte < it.key.len: it.key[newByte] else: '\0' let dir = (1 + (ord(ch) or newOtherBits)) shr 8 var inner: Node[T] diff --git a/lib/pure/cookies.nim b/lib/pure/cookies.nim index aca0ac2b4..eaff86ae6 100644 --- a/lib/pure/cookies.nim +++ b/lib/pure/cookies.nim @@ -25,16 +25,16 @@ proc parseCookies*(s: string): StringTableRef = result = newStringTable(modeCaseInsensitive) var i = 0 while true: - while s[i] == ' ' or s[i] == '\t': inc(i) + while i < s.len and (s[i] == ' ' or s[i] == '\t'): inc(i) var keystart = i - while s[i] != '=' and s[i] != '\0': inc(i) + while i < s.len and s[i] != '=': inc(i) var keyend = i-1 - if s[i] == '\0': break + if i >= s.len: break inc(i) # skip '=' var valstart = i - while s[i] != ';' and s[i] != '\0': inc(i) + while i < s.len and s[i] != ';': inc(i) result[substr(s, keystart, keyend)] = substr(s, valstart, i-1) - if s[i] == '\0': break + if i >= s.len: break inc(i) # skip ';' proc setCookie*(key, value: string, domain = "", path = "", diff --git a/lib/pure/encodings.nim b/lib/pure/encodings.nim index 5840d443d..731fbbc29 100644 --- a/lib/pure/encodings.nim +++ b/lib/pure/encodings.nim @@ -36,7 +36,7 @@ when defined(windows): while i < a.len and j < b.len: if a[i] in {'-', '_'}: inc i if b[j] in {'-', '_'}: inc j - if a[i].toLower != b[j].toLower: return false + if i < a.len and j < b.len and a[i].toLowerAscii != b[j].toLowerAscii: return false inc i inc j result = i == a.len and j == b.len diff --git a/lib/pure/httpclient.nim b/lib/pure/httpclient.nim index db8acf0eb..b7617b0b5 100644 --- a/lib/pure/httpclient.nim +++ b/lib/pure/httpclient.nim @@ -74,7 +74,7 @@ ## ## .. code-block:: Nim ## import asyncdispatch, httpclient -## +## ## proc onProgressChanged(total, progress, speed: BiggestInt) {.async.} = ## echo("Downloaded ", progress, " of ", total) ## echo("Current rate: ", speed div 1000, "kb/s") @@ -247,7 +247,7 @@ proc parseChunks(s: Socket, timeout: int): string = var i = 0 if chunkSizeStr == "": httpError("Server terminated connection prematurely") - while true: + while i < chunkSizeStr.len: case chunkSizeStr[i] of '0'..'9': chunkSize = chunkSize shl 4 or (ord(chunkSizeStr[i]) - ord('0')) @@ -255,8 +255,6 @@ proc parseChunks(s: Socket, timeout: int): string = chunkSize = chunkSize shl 4 or (ord(chunkSizeStr[i]) - ord('a') + 10) of 'A'..'F': chunkSize = chunkSize shl 4 or (ord(chunkSizeStr[i]) - ord('A') + 10) - of '\0': - break of ';': # http://tools.ietf.org/html/rfc2616#section-3.6.1 # We don't care about chunk-extensions. @@ -938,7 +936,7 @@ proc parseChunks(client: HttpClient | AsyncHttpClient): Future[void] var i = 0 if chunkSizeStr == "": httpError("Server terminated connection prematurely") - while true: + while i < chunkSizeStr.len: case chunkSizeStr[i] of '0'..'9': chunkSize = chunkSize shl 4 or (ord(chunkSizeStr[i]) - ord('0')) @@ -946,8 +944,6 @@ proc parseChunks(client: HttpClient | AsyncHttpClient): Future[void] chunkSize = chunkSize shl 4 or (ord(chunkSizeStr[i]) - ord('a') + 10) of 'A'..'F': chunkSize = chunkSize shl 4 or (ord(chunkSizeStr[i]) - ord('A') + 10) - of '\0': - break of ';': # http://tools.ietf.org/html/rfc2616#section-3.6.1 # We don't care about chunk-extensions. diff --git a/lib/pure/httpcore.nim b/lib/pure/httpcore.nim index f150fa1c1..5df26fdd5 100644 --- a/lib/pure/httpcore.nim +++ b/lib/pure/httpcore.nim @@ -190,11 +190,11 @@ proc len*(headers: HttpHeaders): int = return headers.table.len proc parseList(line: string, list: var seq[string], start: int): int = var i = 0 var current = "" - while line[start + i] notin {'\c', '\l', '\0'}: + while start+i < line.len and line[start + i] notin {'\c', '\l'}: i += line.skipWhitespace(start + i) i += line.parseUntil(current, {'\c', '\l', ','}, start + i) list.add(current) - if line[start + i] == ',': + if start+i < line.len and line[start + i] == ',': i.inc # Skip , current.setLen(0) diff --git a/lib/pure/httpserver.nim b/lib/pure/httpserver.nim index 632eb198a..9f54ed9e8 100644 --- a/lib/pure/httpserver.nim +++ b/lib/pure/httpserver.nim @@ -126,7 +126,7 @@ when false: var dataAvail = false while dataAvail: dataAvail = recvLine(client, buf) # TODO: This is incorrect. - var L = toLower(buf.string) + var L = toLowerAscii(buf.string) if L.startsWith("content-length:"): var i = len("content-length:") while L[i] in Whitespace: inc(i) @@ -199,7 +199,7 @@ when false: notFound(client) else: when defined(Windows): - var ext = splitFile(path).ext.toLower + var ext = splitFile(path).ext.toLowerAscii if ext == ".exe" or ext == ".cgi": # XXX: extract interpreter information here? cgi = true @@ -303,7 +303,7 @@ proc next*(s: var Server) = if s.reqMethod == "POST": # Check for Expect header if s.headers.hasKey("Expect"): - if s.headers["Expect"].toLower == "100-continue": + if s.headers["Expect"].toLowerAscii == "100-continue": s.client.sendStatus("100 Continue") else: s.client.sendStatus("417 Expectation Failed") @@ -427,7 +427,7 @@ proc nextAsync(s: PAsyncHTTPServer) = if s.reqMethod == "POST": # Check for Expect header if s.headers.hasKey("Expect"): - if s.headers["Expect"].toLower == "100-continue": + if s.headers["Expect"].toLowerAscii == "100-continue": s.client.sendStatus("100 Continue") else: s.client.sendStatus("417 Expectation Failed") diff --git a/lib/pure/logging.nim b/lib/pure/logging.nim index 751fc0e8d..43b5ab3ab 100644 --- a/lib/pure/logging.nim +++ b/lib/pure/logging.nim @@ -126,7 +126,7 @@ proc substituteLog*(frmt: string, level: Level, args: varargs[string, `$`]): str var v = "" let app = when defined(js): "" else: getAppFilename() while frmt[i] in IdentChars: - v.add(toLower(frmt[i])) + v.add(toLowerAscii(frmt[i])) inc(i) case v of "date": result.add(getDateStr()) diff --git a/lib/pure/matchers.nim b/lib/pure/matchers.nim index 685c3b07a..97223ed01 100644 --- a/lib/pure/matchers.nim +++ b/lib/pure/matchers.nim @@ -29,21 +29,21 @@ proc validEmailAddress*(s: string): bool {.noSideEffect, chars = Letters + Digits + {'!','#','$','%','&', '\'','*','+','/','=','?','^','_','`','{','}','|','~','-','.'} var i = 0 - if s[i] notin chars or s[i] == '.': return false - while s[i] in chars: - if s[i] == '.' and s[i+1] == '.': return false + if i >= s.len or s[i] notin chars or s[i] == '.': return false + while i < s.len and s[i] in chars: + if i+1 < s.len and s[i] == '.' and s[i+1] == '.': return false inc(i) - if s[i] != '@': return false + if i >= s.len or s[i] != '@': return false var j = len(s)-1 - if s[j] notin Letters: return false + if j >= 0 and s[j] notin Letters: return false while j >= i and s[j] in Letters: dec(j) inc(i) # skip '@' - while s[i] in {'0'..'9', 'a'..'z', '-', '.'}: inc(i) - if s[i] != '\0': return false + while i < s.len and s[i] in {'0'..'9', 'a'..'z', '-', '.'}: inc(i) + if i != s.len: return false var x = substr(s, j+1) if len(x) == 2 and x[0] in Letters and x[1] in Letters: return true - case toLower(x) + case toLowerAscii(x) of "com", "org", "net", "gov", "mil", "biz", "info", "mobi", "name", "aero", "jobs", "museum": return true else: return false diff --git a/lib/pure/os.nim b/lib/pure/os.nim index 11be8f0c1..644afee32 100644 --- a/lib/pure/os.nim +++ b/lib/pure/os.nim @@ -1060,18 +1060,17 @@ proc parseCmdLine*(c: string): seq[string] {. while true: setLen(a, 0) # eat all delimiting whitespace - while c[i] == ' ' or c[i] == '\t' or c[i] == '\l' or c[i] == '\r' : inc(i) + while i < c.len and c[i] in {' ', '\t', '\l', '\r'}: inc(i) + if i >= c.len: break when defined(windows): # parse a single argument according to the above rules: - if c[i] == '\0': break var inQuote = false - while true: + while i < c.len: case c[i] - of '\0': break of '\\': var j = i - while c[j] == '\\': inc(j) - if c[j] == '"': + while j < c.len and c[j] == '\\': inc(j) + if j < c.len and c[j] == '"': for k in 1..(j-i) div 2: a.add('\\') if (j-i) mod 2 == 0: i = j @@ -1084,7 +1083,7 @@ proc parseCmdLine*(c: string): seq[string] {. of '"': inc(i) if not inQuote: inQuote = true - elif c[i] == '"': + elif i < c.len and c[i] == '"': a.add(c[i]) inc(i) else: @@ -1102,13 +1101,12 @@ proc parseCmdLine*(c: string): seq[string] {. of '\'', '\"': var delim = c[i] inc(i) # skip ' or " - while c[i] != '\0' and c[i] != delim: + while i < c.len and c[i] != delim: add a, c[i] inc(i) - if c[i] != '\0': inc(i) - of '\0': break + if i < c.len: inc(i) else: - while c[i] > ' ': + while i < c.len and c[i] > ' ': add(a, c[i]) inc(i) add(result, a) diff --git a/lib/pure/ospaths.nim b/lib/pure/ospaths.nim index 0d638abb9..ee2b715d3 100644 --- a/lib/pure/ospaths.nim +++ b/lib/pure/ospaths.nim @@ -196,7 +196,7 @@ proc joinPath*(head, tail: string): string {. else: result = head & tail else: - if tail[0] in {DirSep, AltSep}: + if tail.len > 0 and tail[0] in {DirSep, AltSep}: result = head & tail else: result = head & DirSep & tail @@ -477,7 +477,7 @@ proc unixToNativePath*(path: string, drive=""): string {. var i = start while i < len(path): # ../../../ --> :::: - if path[i] == '.' and path[i+1] == '.' and path[i+2] == '/': + if i+2 < path.len and path[i] == '.' and path[i+1] == '.' and path[i+2] == '/': # parent directory when defined(macos): if result[high(result)] == ':': diff --git a/lib/pure/pegs.nim b/lib/pure/pegs.nim index 5ae2d9182..6d415efd0 100644 --- a/lib/pure/pegs.nim +++ b/lib/pure/pegs.nim @@ -534,15 +534,15 @@ proc rawMatch*(s: string, p: Peg, start: int, c: var Captures): int {. case p.kind of pkEmpty: result = 0 # match of length 0 of pkAny: - if s[start] != '\0': result = 1 + if start < s.len: result = 1 else: result = -1 of pkAnyRune: - if s[start] != '\0': + if start < s.len: result = runeLenAt(s, start) else: result = -1 of pkLetter: - if s[start] != '\0': + if start < s.len: var a: Rune result = start fastRuneAt(s, result, a) @@ -551,7 +551,7 @@ proc rawMatch*(s: string, p: Peg, start: int, c: var Captures): int {. else: result = -1 of pkLower: - if s[start] != '\0': + if start < s.len: var a: Rune result = start fastRuneAt(s, result, a) @@ -560,7 +560,7 @@ proc rawMatch*(s: string, p: Peg, start: int, c: var Captures): int {. else: result = -1 of pkUpper: - if s[start] != '\0': + if start < s.len: var a: Rune result = start fastRuneAt(s, result, a) @@ -569,7 +569,7 @@ proc rawMatch*(s: string, p: Peg, start: int, c: var Captures): int {. else: result = -1 of pkTitle: - if s[start] != '\0': + if start < s.len: var a: Rune result = start fastRuneAt(s, result, a) @@ -578,7 +578,7 @@ proc rawMatch*(s: string, p: Peg, start: int, c: var Captures): int {. else: result = -1 of pkWhitespace: - if s[start] != '\0': + if start < s.len: var a: Rune result = start fastRuneAt(s, result, a) @@ -589,15 +589,15 @@ proc rawMatch*(s: string, p: Peg, start: int, c: var Captures): int {. of pkGreedyAny: result = len(s) - start of pkNewLine: - if s[start] == '\L': result = 1 - elif s[start] == '\C': - if s[start+1] == '\L': result = 2 + if start < s.len and s[start] == '\L': result = 1 + elif start < s.len and s[start] == '\C': + if start+1 < s.len and s[start+1] == '\L': result = 2 else: result = 1 else: result = -1 of pkTerminal: result = len(p.term) for i in 0..result-1: - if p.term[i] != s[start+i]: + if start+i >= s.len or p.term[i] != s[start+i]: result = -1 break of pkTerminalIgnoreCase: @@ -606,6 +606,9 @@ proc rawMatch*(s: string, p: Peg, start: int, c: var Captures): int {. a, b: Rune result = start while i < len(p.term): + if result >= s.len: + result = -1 + break fastRuneAt(p.term, i, a) fastRuneAt(s, result, b) if toLower(a) != toLower(b): @@ -621,18 +624,23 @@ proc rawMatch*(s: string, p: Peg, start: int, c: var Captures): int {. while true: fastRuneAt(p.term, i, a) if a != Rune('_'): break - while true: + while result < s.len: fastRuneAt(s, result, b) if b != Rune('_'): break - if toLower(a) != toLower(b): + if result >= s.len: + if i >= p.term.len: break + else: + result = -1 + break + elif toLower(a) != toLower(b): result = -1 break dec(result, start) of pkChar: - if p.ch == s[start]: result = 1 + if start < s.len and p.ch == s[start]: result = 1 else: result = -1 of pkCharChoice: - if contains(p.charChoice[], s[start]): result = 1 + if start < s.len and contains(p.charChoice[], s[start]): result = 1 else: result = -1 of pkNonTerminal: var oldMl = c.ml @@ -695,10 +703,10 @@ proc rawMatch*(s: string, p: Peg, start: int, c: var Captures): int {. of pkGreedyRepChar: result = 0 var ch = p.ch - while ch == s[start+result]: inc(result) + while start+result < s.len and ch == s[start+result]: inc(result) of pkGreedyRepSet: result = 0 - while contains(p.charChoice[], s[start+result]): inc(result) + while start+result < s.len and contains(p.charChoice[], s[start+result]): inc(result) of pkOption: result = max(0, rawMatch(s, p.sons[0], start, c)) of pkAndPredicate: diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index 4500a163f..3f01f0285 100644 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -1126,7 +1126,7 @@ proc unindent*(s: string, count: Natural, padding: string = " "): string var indentCount = 0 for j in 0..<count.int: indentCount.inc - if line[j .. j + padding.len-1] != padding: + if j + padding.len-1 >= line.len or line[j .. j + padding.len-1] != padding: indentCount = j break result.add(line[indentCount*padding.len .. ^1]) diff --git a/lib/pure/times.nim b/lib/pure/times.nim index 7d101beab..bc8a50fd7 100644 --- a/lib/pure/times.nim +++ b/lib/pure/times.nim @@ -1318,24 +1318,23 @@ proc format*(dt: DateTime, f: string): string {.tags: [].}= result = "" var i = 0 var currentF = "" - while true: + while i < f.len: case f[i] - of ' ', '-', '/', ':', '\'', '\0', '(', ')', '[', ']', ',': + of ' ', '-', '/', ':', '\'', '(', ')', '[', ']', ',': formatToken(dt, currentF, result) currentF = "" - if f[i] == '\0': break if f[i] == '\'': inc(i) # Skip ' - while f[i] != '\'' and f.len-1 > i: + while i < f.len-1 and f[i] != '\'': result.add(f[i]) inc(i) else: result.add(f[i]) else: # Check if the letter being added matches previous accumulated buffer. - if currentF.len < 1 or currentF[high(currentF)] == f[i]: + if currentF.len == 0 or currentF[high(currentF)] == f[i]: currentF.add(f[i]) else: formatToken(dt, currentF, result) @@ -1343,6 +1342,7 @@ proc format*(dt: DateTime, f: string): string {.tags: [].}= currentF = "" inc(i) + formatToken(dt, currentF, result) proc `$`*(dt: DateTime): string {.tags: [], raises: [], benign.} = ## Converts a `DateTime` object to a string representation. @@ -1439,58 +1439,58 @@ proc parseToken(dt: var DateTime; token, value: string; j: var int) = dt.month = month.Month of "MMM": case value[j..j+2].toLowerAscii(): - of "jan": dt.month = mJan - of "feb": dt.month = mFeb - of "mar": dt.month = mMar - of "apr": dt.month = mApr - of "may": dt.month = mMay - of "jun": dt.month = mJun - of "jul": dt.month = mJul - of "aug": dt.month = mAug - of "sep": dt.month = mSep - of "oct": dt.month = mOct - of "nov": dt.month = mNov - of "dec": dt.month = mDec + of "jan": dt.month = mJan + of "feb": dt.month = mFeb + of "mar": dt.month = mMar + of "apr": dt.month = mApr + of "may": dt.month = mMay + of "jun": dt.month = mJun + of "jul": dt.month = mJul + of "aug": dt.month = mAug + of "sep": dt.month = mSep + of "oct": dt.month = mOct + of "nov": dt.month = mNov + of "dec": dt.month = mDec else: raise newException(ValueError, "Couldn't parse month (MMM), got: " & value) j += 3 of "MMMM": if value.len >= j+7 and value[j..j+6].cmpIgnoreCase("january") == 0: - dt.month = mJan + dt.month = mJan j += 7 elif value.len >= j+8 and value[j..j+7].cmpIgnoreCase("february") == 0: - dt.month = mFeb + dt.month = mFeb j += 8 elif value.len >= j+5 and value[j..j+4].cmpIgnoreCase("march") == 0: - dt.month = mMar + dt.month = mMar j += 5 elif value.len >= j+5 and value[j..j+4].cmpIgnoreCase("april") == 0: - dt.month = mApr + dt.month = mApr j += 5 elif value.len >= j+3 and value[j..j+2].cmpIgnoreCase("may") == 0: - dt.month = mMay + dt.month = mMay j += 3 elif value.len >= j+4 and value[j..j+3].cmpIgnoreCase("june") == 0: - dt.month = mJun + dt.month = mJun j += 4 elif value.len >= j+4 and value[j..j+3].cmpIgnoreCase("july") == 0: - dt.month = mJul + dt.month = mJul j += 4 elif value.len >= j+6 and value[j..j+5].cmpIgnoreCase("august") == 0: - dt.month = mAug + dt.month = mAug j += 6 elif value.len >= j+9 and value[j..j+8].cmpIgnoreCase("september") == 0: - dt.month = mSep + dt.month = mSep j += 9 elif value.len >= j+7 and value[j..j+6].cmpIgnoreCase("october") == 0: - dt.month = mOct + dt.month = mOct j += 7 elif value.len >= j+8 and value[j..j+7].cmpIgnoreCase("november") == 0: - dt.month = mNov + dt.month = mNov j += 8 elif value.len >= j+8 and value[j..j+7].cmpIgnoreCase("december") == 0: - dt.month = mDec + dt.month = mDec j += 8 else: raise newException(ValueError, @@ -1521,44 +1521,47 @@ proc parseToken(dt: var DateTime; token, value: string; j: var int) = j += 4 of "z": dt.isDst = false - if value[j] == '+': + let ch = if j < value.len: value[j] else: '\0' + if ch == '+': dt.utcOffset = 0 - parseInt($value[j+1]) * secondsInHour - elif value[j] == '-': + elif ch == '-': dt.utcOffset = parseInt($value[j+1]) * secondsInHour - elif value[j] == 'Z': + elif ch == 'Z': dt.utcOffset = 0 j += 1 return else: raise newException(ValueError, - "Couldn't parse timezone offset (z), got: " & value[j]) + "Couldn't parse timezone offset (z), got: " & ch) j += 2 of "zz": dt.isDst = false - if value[j] == '+': + let ch = if j < value.len: value[j] else: '\0' + if ch == '+': dt.utcOffset = 0 - value[j+1..j+2].parseInt() * secondsInHour - elif value[j] == '-': + elif ch == '-': dt.utcOffset = value[j+1..j+2].parseInt() * secondsInHour - elif value[j] == 'Z': + elif ch == 'Z': dt.utcOffset = 0 j += 1 return else: raise newException(ValueError, - "Couldn't parse timezone offset (zz), got: " & value[j]) + "Couldn't parse timezone offset (zz), got: " & ch) j += 3 of "zzz": dt.isDst = false var factor = 0 - if value[j] == '+': factor = -1 - elif value[j] == '-': factor = 1 - elif value[j] == 'Z': + let ch = if j < value.len: value[j] else: '\0' + if ch == '+': factor = -1 + elif ch == '-': factor = 1 + elif ch == 'Z': dt.utcOffset = 0 j += 1 return else: raise newException(ValueError, - "Couldn't parse timezone offset (zzz), got: " & value[j]) + "Couldn't parse timezone offset (zzz), got: " & ch) dt.utcOffset = factor * value[j+1..j+2].parseInt() * secondsInHour j += 4 dt.utcOffset += factor * value[j..j+1].parseInt() * 60 @@ -1620,20 +1623,18 @@ proc parse*(value, layout: string, zone: Timezone = local()): DateTime = dt.nanosecond = 0 dt.isDst = true # using this is flag for checking whether a timezone has \ # been read (because DST is always false when a tz is parsed) - while true: + while i < layout.len: case layout[i] - of ' ', '-', '/', ':', '\'', '\0', '(', ')', '[', ']', ',': + of ' ', '-', '/', ':', '\'', '(', ')', '[', ']', ',': if token.len > 0: parseToken(dt, token, value, j) # Reset token token = "" - # Break if at end of line - if layout[i] == '\0': break # Skip separator and everything between single quotes # These are literals in both the layout and the value string if layout[i] == '\'': inc(i) - while layout[i] != '\'' and layout.len-1 > i: + while i < layout.len-1 and layout[i] != '\'': inc(i) inc(j) inc(i) @@ -1642,13 +1643,15 @@ proc parse*(value, layout: string, zone: Timezone = local()): DateTime = inc(j) else: # Check if the letter being added matches previous accumulated buffer. - if token.len < 1 or token[high(token)] == layout[i]: + if token.len == 0 or token[high(token)] == layout[i]: token.add(layout[i]) inc(i) else: parseToken(dt, token, value, j) token = "" + if i >= layout.len and token.len > 0: + parseToken(dt, token, value, j) if dt.isDst: # No timezone parsed - assume timezone is `zone` result = initDateTime(zone.zoneInfoFromTz(dt.toAdjTime), zone) diff --git a/lib/pure/xmldom.nim b/lib/pure/xmldom.nim index 3c891c81b..c38d36026 100644 --- a/lib/pure/xmldom.nim +++ b/lib/pure/xmldom.nim @@ -232,10 +232,10 @@ proc createAttributeNS*(doc: PDocument, namespaceURI: string, qualifiedName: str raise newException(EInvalidCharacterErr, "Invalid character") # Exceptions if qualifiedName.contains(':'): - let qfnamespaces = qualifiedName.toLower().split(':') + let qfnamespaces = qualifiedName.toLowerAscii().split(':') if isNil(namespaceURI): raise newException(ENamespaceErr, "When qualifiedName contains a prefix namespaceURI cannot be nil") - elif qfnamespaces[0] == "xml" and + elif qfnamespaces[0] == "xml" and namespaceURI != "http://www.w3.org/XML/1998/namespace" and qfnamespaces[1] notin stdattrnames: raise newException(ENamespaceErr, @@ -311,10 +311,10 @@ proc createElement*(doc: PDocument, tagName: string): PElement = proc createElementNS*(doc: PDocument, namespaceURI: string, qualifiedName: string): PElement = ## Creates an element of the given qualified name and namespace URI. if qualifiedName.contains(':'): - let qfnamespaces = qualifiedName.toLower().split(':') + let qfnamespaces = qualifiedName.toLowerAscii().split(':') if isNil(namespaceURI): raise newException(ENamespaceErr, "When qualifiedName contains a prefix namespaceURI cannot be nil") - elif qfnamespaces[0] == "xml" and + elif qfnamespaces[0] == "xml" and namespaceURI != "http://www.w3.org/XML/1998/namespace" and qfnamespaces[1] notin stdattrnames: raise newException(ENamespaceErr, @@ -533,13 +533,13 @@ proc `prefix=`*(n: PNode, value: string) = if isNil(n.fNamespaceURI): raise newException(ENamespaceErr, "namespaceURI cannot be nil") - elif value.toLower() == "xml" and n.fNamespaceURI != "http://www.w3.org/XML/1998/namespace": + elif value.toLowerAscii() == "xml" and n.fNamespaceURI != "http://www.w3.org/XML/1998/namespace": raise newException(ENamespaceErr, "When the namespace prefix is \"xml\" namespaceURI has to be \"http://www.w3.org/XML/1998/namespace\"") - elif value.toLower() == "xmlns" and n.fNamespaceURI != "http://www.w3.org/2000/xmlns/": + elif value.toLowerAscii() == "xmlns" and n.fNamespaceURI != "http://www.w3.org/2000/xmlns/": raise newException(ENamespaceErr, "When the namespace prefix is \"xmlns\" namespaceURI has to be \"http://www.w3.org/2000/xmlns/\"") - elif value.toLower() == "xmlns" and n.fNodeType == AttributeNode: + elif value.toLowerAscii() == "xmlns" and n.fNodeType == AttributeNode: raise newException(ENamespaceErr, "An AttributeNode cannot have a prefix of \"xmlns\"") n.fNodeName = value & ":" & n.fLocalName diff --git a/tests/async/tlambda.nim b/tests/async/tlambda.nim index d187c0d50..8f570689b 100644 --- a/tests/async/tlambda.nim +++ b/tests/async/tlambda.nim @@ -1,7 +1,7 @@ # bug 2007 -import asyncdispatch, asyncnet, logging, json, uri, strutils, future +import asyncdispatch, asyncnet, logging, json, uri, strutils, sugar type Builder = ref object @@ -27,7 +27,7 @@ proc newBuild*(onProgress: ProgressCB): Build = result.onProgress = onProgress proc start(build: Build, repo, hash: string) {.async.} = - let path = repo.parseUri().path.toLower() + let path = repo.parseUri().path.toLowerAscii() proc onProgress(builder: Builder, message: string) {.async.} = debug($message) diff --git a/tests/concepts/tstackconcept.nim b/tests/concepts/tstackconcept.nim index 2238dacb6..cb8db566d 100644 --- a/tests/concepts/tstackconcept.nim +++ b/tests/concepts/tstackconcept.nim @@ -31,7 +31,7 @@ type s.pop() is T type ValueType = T - const ValueTypeName = T.name.toUpper + const ValueTypeName = T.name.toUpperAscii proc genericAlgorithm[T](s: var Stack[T], y: T) = static: diff --git a/tests/manyloc/keineschweine/dependencies/nake/nakefile.nim b/tests/manyloc/keineschweine/dependencies/nake/nakefile.nim index bdf2139c9..5490211de 100644 --- a/tests/manyloc/keineschweine/dependencies/nake/nakefile.nim +++ b/tests/manyloc/keineschweine/dependencies/nake/nakefile.nim @@ -7,7 +7,7 @@ task "install", "compile and install nake binary": for index, dir in pairs(path): echo " ", index, ". ", dir echo "Where to install nake binary? (quit with ^C or quit or exit)" - let ans = stdin.readLine().toLower + let ans = stdin.readLine().toLowerAscii var index = 0 case ans of "q", "quit", "x", "exit": diff --git a/tests/manyloc/keineschweine/lib/sg_assets.nim b/tests/manyloc/keineschweine/lib/sg_assets.nim index fbc3c9ab8..c6540a428 100644 --- a/tests/manyloc/keineschweine/lib/sg_assets.nim +++ b/tests/manyloc/keineschweine/lib/sg_assets.nim @@ -562,7 +562,7 @@ proc importItem(data: PJsonNode; errors: var seq[string]): PItemRecord = result.useSound = importSound(data[2], errors, "useSound") - case data[1].str.toLower + case data[1].str.toLowerAscii of "projectile": result.kind = Projectile if data[2]["bullet"].kind == JString: diff --git a/tests/manyloc/nake/nakefile.nim b/tests/manyloc/nake/nakefile.nim index 97af79a84..3e8609169 100644 --- a/tests/manyloc/nake/nakefile.nim +++ b/tests/manyloc/nake/nakefile.nim @@ -88,7 +88,7 @@ task "download", "download game assets": if existsFile(path): echo "The file already exists\n", "[R]emove [M]ove [Q]uit [S]kip Source: ", GameAssets - case stdin.readLine.toLower + case stdin.readLine.toLowerAscii of "r": removeFile path of "m": @@ -120,7 +120,7 @@ task "download", "download game assets": echo "Download binary libs? Only libs for linux are available currently, enjoy the irony.\n", "[Y]es [N]o Source: ", BinLibs - case stdin.readline.toLower + case stdin.readline.toLowerAscii of "y", "yes": discard ## o_O else: diff --git a/tests/method/tsimmeth.nim b/tests/method/tsimmeth.nim index 11ff2674f..a057c35b7 100644 --- a/tests/method/tsimmeth.nim +++ b/tests/method/tsimmeth.nim @@ -6,7 +6,7 @@ discard """ import strutils -var x = "hello world!".toLower.toUpper +var x = "hello world!".toLowerAscii.toUpperAscii x.echo() #OUT HELLO WORLD! diff --git a/tests/misc/tmemoization.nim b/tests/misc/tmemoization.nim index 180acd89b..840eb3b0d 100644 --- a/tests/misc/tmemoization.nim +++ b/tests/misc/tmemoization.nim @@ -8,7 +8,7 @@ import strutils proc foo(s: static[string]): string = static: echo s - const R = s.toUpper + const R = s.toUpperAscii return R echo foo("test 1") diff --git a/tests/stdlib/tpegs.nim b/tests/stdlib/tpegs.nim deleted file mode 100644 index e2a5a1715..000000000 --- a/tests/stdlib/tpegs.nim +++ /dev/null @@ -1,1770 +0,0 @@ -discard """ - output: '''this -is -an -example -d -e -f -('keyvalue' 'key'*)''' -""" -# PEGs module turned out to be a good test to detect memory management bugs. - -include "system/inclrtl" - -const - useUnicode = true ## change this to deactivate proper UTF-8 support - -import - strutils - -when useUnicode: - import unicode - -const - InlineThreshold = 5 ## number of leaves; -1 to disable inlining - MaxSubpatterns* = 10 ## defines the maximum number of subpatterns that - ## can be captured. More subpatterns cannot be captured! - -type - TPegKind = enum - pkEmpty, - pkAny, ## any character (.) - pkAnyRune, ## any Unicode character (_) - pkNewLine, ## CR-LF, LF, CR - pkLetter, ## Unicode letter - pkLower, ## Unicode lower case letter - pkUpper, ## Unicode upper case letter - pkTitle, ## Unicode title character - pkWhitespace, ## Unicode whitespace character - pkTerminal, - pkTerminalIgnoreCase, - pkTerminalIgnoreStyle, - pkChar, ## single character to match - pkCharChoice, - pkNonTerminal, - pkSequence, ## a b c ... --> Internal DSL: peg(a, b, c) - pkOrderedChoice, ## a / b / ... --> Internal DSL: a / b or /[a, b, c] - pkGreedyRep, ## a* --> Internal DSL: *a - ## a+ --> (a a*) - pkGreedyRepChar, ## x* where x is a single character (superop) - pkGreedyRepSet, ## [set]* (superop) - pkGreedyAny, ## .* or _* (superop) - pkOption, ## a? --> Internal DSL: ?a - pkAndPredicate, ## &a --> Internal DSL: &a - pkNotPredicate, ## !a --> Internal DSL: !a - pkCapture, ## {a} --> Internal DSL: capture(a) - pkBackRef, ## $i --> Internal DSL: backref(i) - pkBackRefIgnoreCase, - pkBackRefIgnoreStyle, - pkSearch, ## @a --> Internal DSL: !*a - pkCapturedSearch, ## {@} a --> Internal DSL: !*\a - pkRule, ## a <- b - pkList, ## a, b - pkStartAnchor ## ^ --> Internal DSL: startAnchor() - TNonTerminalFlag = enum - ntDeclared, ntUsed - TNonTerminal {.final.} = object ## represents a non terminal symbol - name: string ## the name of the symbol - line: int ## line the symbol has been declared/used in - col: int ## column the symbol has been declared/used in - flags: set[TNonTerminalFlag] ## the nonterminal's flags - rule: TNode ## the rule that the symbol refers to - TNode {.final, shallow.} = object - case kind: TPegKind - of pkEmpty..pkWhitespace: nil - of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle: term: string - of pkChar, pkGreedyRepChar: ch: char - of pkCharChoice, pkGreedyRepSet: charChoice: ref set[char] - of pkNonTerminal: nt: PNonTerminal - of pkBackRef..pkBackRefIgnoreStyle: index: range[0..MaxSubpatterns] - else: sons: seq[TNode] - PNonTerminal* = ref TNonTerminal - - TPeg* = TNode ## type that represents a PEG - -proc term*(t: string): TPeg {.rtl, extern: "npegs$1Str".} = - ## constructs a PEG from a terminal string - if t.len != 1: - result.kind = pkTerminal - result.term = t - else: - result.kind = pkChar - result.ch = t[0] - -proc termIgnoreCase*(t: string): TPeg {. - rtl, extern: "npegs$1".} = - ## constructs a PEG from a terminal string; ignore case for matching - result.kind = pkTerminalIgnoreCase - result.term = t - -proc termIgnoreStyle*(t: string): TPeg {. - rtl, extern: "npegs$1".} = - ## constructs a PEG from a terminal string; ignore style for matching - result.kind = pkTerminalIgnoreStyle - result.term = t - -proc term*(t: char): TPeg {.rtl, extern: "npegs$1Char".} = - ## constructs a PEG from a terminal char - assert t != '\0' - result.kind = pkChar - result.ch = t - -proc charSet*(s: set[char]): TPeg {.rtl, extern: "npegs$1".} = - ## constructs a PEG from a character set `s` - assert '\0' notin s - result.kind = pkCharChoice - new(result.charChoice) - result.charChoice[] = s - -proc len(a: TPeg): int {.inline.} = return a.sons.len -proc add(d: var TPeg, s: TPeg) {.inline.} = add(d.sons, s) - -proc copyPeg(a: TPeg): TPeg = - result.kind = a.kind - case a.kind - of pkEmpty..pkWhitespace: discard - of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle: - result.term = a.term - of pkChar, pkGreedyRepChar: - result.ch = a.ch - of pkCharChoice, pkGreedyRepSet: - new(result.charChoice) - result.charChoice[] = a.charChoice[] - of pkNonTerminal: result.nt = a.nt - of pkBackRef..pkBackRefIgnoreStyle: - result.index = a.index - else: - result.sons = a.sons - -proc addChoice(dest: var TPeg, elem: TPeg) = - var L = dest.len-1 - if L >= 0 and dest.sons[L].kind == pkCharChoice: - # caution! Do not introduce false aliasing here! - case elem.kind - of pkCharChoice: - dest.sons[L] = charSet(dest.sons[L].charChoice[] + elem.charChoice[]) - of pkChar: - dest.sons[L] = charSet(dest.sons[L].charChoice[] + {elem.ch}) - else: add(dest, elem) - else: add(dest, elem) - -template multipleOp(k: TPegKind, localOpt) = - result.kind = k - result.sons = @[] - for x in items(a): - if x.kind == k: - for y in items(x.sons): - localOpt(result, y) - else: - localOpt(result, x) - if result.len == 1: - result = result.sons[0] - -proc `/`*(a: varargs[TPeg]): TPeg {. - rtl, extern: "npegsOrderedChoice".} = - ## constructs an ordered choice with the PEGs in `a` - multipleOp(pkOrderedChoice, addChoice) - -proc addSequence(dest: var TPeg, elem: TPeg) = - var L = dest.len-1 - if L >= 0 and dest.sons[L].kind == pkTerminal: - # caution! Do not introduce false aliasing here! - case elem.kind - of pkTerminal: - dest.sons[L] = term(dest.sons[L].term & elem.term) - of pkChar: - dest.sons[L] = term(dest.sons[L].term & elem.ch) - else: add(dest, elem) - else: add(dest, elem) - -proc sequence*(a: varargs[TPeg]): TPeg {. - rtl, extern: "npegs$1".} = - ## constructs a sequence with all the PEGs from `a` - multipleOp(pkSequence, addSequence) - -proc `?`*(a: TPeg): TPeg {.rtl, extern: "npegsOptional".} = - ## constructs an optional for the PEG `a` - if a.kind in {pkOption, pkGreedyRep, pkGreedyAny, pkGreedyRepChar, - pkGreedyRepSet}: - # a* ? --> a* - # a? ? --> a? - result = a - else: - result.kind = pkOption - result.sons = @[a] - -proc `*`*(a: TPeg): TPeg {.rtl, extern: "npegsGreedyRep".} = - ## constructs a "greedy repetition" for the PEG `a` - case a.kind - of pkGreedyRep, pkGreedyRepChar, pkGreedyRepSet, pkGreedyAny, pkOption: - assert false - # produces endless loop! - of pkChar: - result.kind = pkGreedyRepChar - result.ch = a.ch - of pkCharChoice: - result.kind = pkGreedyRepSet - result.charChoice = a.charChoice # copying a reference suffices! - of pkAny, pkAnyRune: - result.kind = pkGreedyAny - else: - result.kind = pkGreedyRep - result.sons = @[a] - -proc `!*`*(a: TPeg): TPeg {.rtl, extern: "npegsSearch".} = - ## constructs a "search" for the PEG `a` - result.kind = pkSearch - result.sons = @[a] - -proc `!*\`*(a: TPeg): TPeg {.rtl, - extern: "npgegsCapturedSearch".} = - ## constructs a "captured search" for the PEG `a` - result.kind = pkCapturedSearch - result.sons = @[a] - -when false: - proc contains(a: TPeg, k: TPegKind): bool = - if a.kind == k: return true - case a.kind - of pkEmpty, pkAny, pkAnyRune, pkGreedyAny, pkNewLine, pkTerminal, - pkTerminalIgnoreCase, pkTerminalIgnoreStyle, pkChar, pkGreedyRepChar, - pkCharChoice, pkGreedyRepSet: discard - of pkNonTerminal: return true - else: - for i in 0..a.sons.len-1: - if contains(a.sons[i], k): return true - -proc `+`*(a: TPeg): TPeg {.rtl, extern: "npegsGreedyPosRep".} = - ## constructs a "greedy positive repetition" with the PEG `a` - return sequence(a, *a) - -proc `&`*(a: TPeg): TPeg {.rtl, extern: "npegsAndPredicate".} = - ## constructs an "and predicate" with the PEG `a` - result.kind = pkAndPredicate - result.sons = @[a] - -proc `!`*(a: TPeg): TPeg {.rtl, extern: "npegsNotPredicate".} = - ## constructs a "not predicate" with the PEG `a` - result.kind = pkNotPredicate - result.sons = @[a] - -proc any*: TPeg {.inline.} = - ## constructs the PEG `any character`:idx: (``.``) - result.kind = pkAny - -proc anyRune*: TPeg {.inline.} = - ## constructs the PEG `any rune`:idx: (``_``) - result.kind = pkAnyRune - -proc newLine*: TPeg {.inline.} = - ## constructs the PEG `newline`:idx: (``\n``) - result.kind = pkNewline - -proc UnicodeLetter*: TPeg {.inline.} = - ## constructs the PEG ``\letter`` which matches any Unicode letter. - result.kind = pkLetter - -proc UnicodeLower*: TPeg {.inline.} = - ## constructs the PEG ``\lower`` which matches any Unicode lowercase letter. - result.kind = pkLower - -proc UnicodeUpper*: TPeg {.inline.} = - ## constructs the PEG ``\upper`` which matches any Unicode lowercase letter. - result.kind = pkUpper - -proc UnicodeTitle*: TPeg {.inline.} = - ## constructs the PEG ``\title`` which matches any Unicode title letter. - result.kind = pkTitle - -proc UnicodeWhitespace*: TPeg {.inline.} = - ## constructs the PEG ``\white`` which matches any Unicode - ## whitespace character. - result.kind = pkWhitespace - -proc startAnchor*: TPeg {.inline.} = - ## constructs the PEG ``^`` which matches the start of the input. - result.kind = pkStartAnchor - -proc endAnchor*: TPeg {.inline.} = - ## constructs the PEG ``$`` which matches the end of the input. - result = !any() - -proc capture*(a: TPeg): TPeg {.rtl, extern: "npegsCapture".} = - ## constructs a capture with the PEG `a` - result.kind = pkCapture - result.sons = @[a] - -proc backref*(index: range[1..MaxSubPatterns]): TPeg {. - rtl, extern: "npegs$1".} = - ## constructs a back reference of the given `index`. `index` starts counting - ## from 1. - result.kind = pkBackRef - result.index = index-1 - -proc backrefIgnoreCase*(index: range[1..MaxSubPatterns]): TPeg {. - rtl, extern: "npegs$1".} = - ## constructs a back reference of the given `index`. `index` starts counting - ## from 1. Ignores case for matching. - result.kind = pkBackRefIgnoreCase - result.index = index-1 - -proc backrefIgnoreStyle*(index: range[1..MaxSubPatterns]): TPeg {. - rtl, extern: "npegs$1".}= - ## constructs a back reference of the given `index`. `index` starts counting - ## from 1. Ignores style for matching. - result.kind = pkBackRefIgnoreStyle - result.index = index-1 - -proc spaceCost(n: TPeg): int = - case n.kind - of pkEmpty: discard - of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle, pkChar, - pkGreedyRepChar, pkCharChoice, pkGreedyRepSet, - pkAny..pkWhitespace, pkGreedyAny: - result = 1 - of pkNonTerminal: - # we cannot inline a rule with a non-terminal - result = InlineThreshold+1 - else: - for i in 0..n.len-1: - inc(result, spaceCost(n.sons[i])) - if result >= InlineThreshold: break - -proc nonterminal*(n: PNonTerminal): TPeg {. - rtl, extern: "npegs$1".} = - ## constructs a PEG that consists of the nonterminal symbol - assert n != nil - if ntDeclared in n.flags and spaceCost(n.rule) < InlineThreshold: - when false: echo "inlining symbol: ", n.name - result = n.rule # inlining of rule enables better optimizations - else: - result.kind = pkNonTerminal - result.nt = n - -proc newNonTerminal*(name: string, line, column: int): PNonTerminal {. - rtl, extern: "npegs$1".} = - ## constructs a nonterminal symbol - new(result) - result.name = name - result.line = line - result.col = column - -template letters*: TPeg = - ## expands to ``charset({'A'..'Z', 'a'..'z'})`` - charset({'A'..'Z', 'a'..'z'}) - -template digits*: TPeg = - ## expands to ``charset({'0'..'9'})`` - charset({'0'..'9'}) - -template whitespace*: TPeg = - ## expands to ``charset({' ', '\9'..'\13'})`` - charset({' ', '\9'..'\13'}) - -template identChars*: TPeg = - ## expands to ``charset({'a'..'z', 'A'..'Z', '0'..'9', '_'})`` - charset({'a'..'z', 'A'..'Z', '0'..'9', '_'}) - -template identStartChars*: TPeg = - ## expands to ``charset({'A'..'Z', 'a'..'z', '_'})`` - charset({'a'..'z', 'A'..'Z', '_'}) - -template ident*: TPeg = - ## same as ``[a-zA-Z_][a-zA-z_0-9]*``; standard identifier - sequence(charset({'a'..'z', 'A'..'Z', '_'}), - *charset({'a'..'z', 'A'..'Z', '0'..'9', '_'})) - -template natural*: TPeg = - ## same as ``\d+`` - +digits - -# ------------------------- debugging ----------------------------------------- - -proc esc(c: char, reserved = {'\0'..'\255'}): string = - case c - of '\b': result = "\\b" - of '\t': result = "\\t" - of '\c': result = "\\c" - of '\L': result = "\\l" - of '\v': result = "\\v" - of '\f': result = "\\f" - of '\e': result = "\\e" - of '\a': result = "\\a" - of '\\': result = "\\\\" - of 'a'..'z', 'A'..'Z', '0'..'9', '_': result = $c - elif c < ' ' or c >= '\128': result = '\\' & $ord(c) - elif c in reserved: result = '\\' & c - else: result = $c - -proc singleQuoteEsc(c: char): string = return "'" & esc(c, {'\''}) & "'" - -proc singleQuoteEsc(str: string): string = - result = "'" - for c in items(str): add result, esc(c, {'\''}) - add result, '\'' - -proc charSetEscAux(cc: set[char]): string = - const reserved = {'^', '-', ']'} - result = "" - var c1 = 0 - while c1 <= 0xff: - if chr(c1) in cc: - var c2 = c1 - while c2 < 0xff and chr(succ(c2)) in cc: inc(c2) - if c1 == c2: - add result, esc(chr(c1), reserved) - elif c2 == succ(c1): - add result, esc(chr(c1), reserved) & esc(chr(c2), reserved) - else: - add result, esc(chr(c1), reserved) & '-' & esc(chr(c2), reserved) - c1 = c2 - inc(c1) - -proc charSetEsc(cc: set[char]): string = - if card(cc) >= 128+64: - result = "[^" & charSetEscAux({'\1'..'\xFF'} - cc) & ']' - else: - result = '[' & charSetEscAux(cc) & ']' - -proc toStrAux(r: TPeg, res: var string) = - case r.kind - of pkEmpty: add(res, "()") - of pkAny: add(res, '.') - of pkAnyRune: add(res, '_') - of pkLetter: add(res, "\\letter") - of pkLower: add(res, "\\lower") - of pkUpper: add(res, "\\upper") - of pkTitle: add(res, "\\title") - of pkWhitespace: add(res, "\\white") - - of pkNewline: add(res, "\\n") - of pkTerminal: add(res, singleQuoteEsc(r.term)) - of pkTerminalIgnoreCase: - add(res, 'i') - add(res, singleQuoteEsc(r.term)) - of pkTerminalIgnoreStyle: - add(res, 'y') - add(res, singleQuoteEsc(r.term)) - of pkChar: add(res, singleQuoteEsc(r.ch)) - of pkCharChoice: add(res, charSetEsc(r.charChoice[])) - of pkNonTerminal: add(res, r.nt.name) - of pkSequence: - add(res, '(') - toStrAux(r.sons[0], res) - for i in 1 .. high(r.sons): - add(res, ' ') - toStrAux(r.sons[i], res) - add(res, ')') - of pkOrderedChoice: - add(res, '(') - toStrAux(r.sons[0], res) - for i in 1 .. high(r.sons): - add(res, " / ") - toStrAux(r.sons[i], res) - add(res, ')') - of pkGreedyRep: - toStrAux(r.sons[0], res) - add(res, '*') - of pkGreedyRepChar: - add(res, singleQuoteEsc(r.ch)) - add(res, '*') - of pkGreedyRepSet: - add(res, charSetEsc(r.charChoice[])) - add(res, '*') - of pkGreedyAny: - add(res, ".*") - of pkOption: - toStrAux(r.sons[0], res) - add(res, '?') - of pkAndPredicate: - add(res, '&') - toStrAux(r.sons[0], res) - of pkNotPredicate: - add(res, '!') - toStrAux(r.sons[0], res) - of pkSearch: - add(res, '@') - toStrAux(r.sons[0], res) - of pkCapturedSearch: - add(res, "{@}") - toStrAux(r.sons[0], res) - of pkCapture: - add(res, '{') - toStrAux(r.sons[0], res) - add(res, '}') - of pkBackRef: - add(res, '$') - add(res, $r.index) - of pkBackRefIgnoreCase: - add(res, "i$") - add(res, $r.index) - of pkBackRefIgnoreStyle: - add(res, "y$") - add(res, $r.index) - of pkRule: - toStrAux(r.sons[0], res) - add(res, " <- ") - toStrAux(r.sons[1], res) - of pkList: - for i in 0 .. high(r.sons): - toStrAux(r.sons[i], res) - add(res, "\n") - of pkStartAnchor: - add(res, '^') - -proc `$` *(r: TPeg): string {.rtl, extern: "npegsToString".} = - ## converts a PEG to its string representation - result = "" - toStrAux(r, result) - -# --------------------- core engine ------------------------------------------- - -type - TCaptures* {.final.} = object ## contains the captured substrings. - matches: array[0..MaxSubpatterns-1, tuple[first, last: int]] - ml: int - origStart: int - -proc bounds*(c: TCaptures, - i: range[0..MaxSubpatterns-1]): tuple[first, last: int] = - ## returns the bounds ``[first..last]`` of the `i`'th capture. - result = c.matches[i] - -when not useUnicode: - type - Rune = char - template fastRuneAt(s, i, ch) = - ch = s[i] - inc(i) - template runeLenAt(s, i): untyped = 1 - - proc isAlpha(a: char): bool {.inline.} = return a in {'a'..'z','A'..'Z'} - proc isUpper(a: char): bool {.inline.} = return a in {'A'..'Z'} - proc isLower(a: char): bool {.inline.} = return a in {'a'..'z'} - proc isTitle(a: char): bool {.inline.} = return false - proc isWhiteSpace(a: char): bool {.inline.} = return a in {' ', '\9'..'\13'} - -proc rawMatch*(s: string, p: TPeg, start: int, c: var TCaptures): int {. - rtl, extern: "npegs$1".} = - ## low-level matching proc that implements the PEG interpreter. Use this - ## for maximum efficiency (every other PEG operation ends up calling this - ## proc). - ## Returns -1 if it does not match, else the length of the match - case p.kind - of pkEmpty: result = 0 # match of length 0 - of pkAny: - if s[start] != '\0': result = 1 - else: result = -1 - of pkAnyRune: - if s[start] != '\0': - result = runeLenAt(s, start) - else: - result = -1 - of pkLetter: - if s[start] != '\0': - var a: Rune - result = start - fastRuneAt(s, result, a) - if isAlpha(a): dec(result, start) - else: result = -1 - else: - result = -1 - of pkLower: - if s[start] != '\0': - var a: Rune - result = start - fastRuneAt(s, result, a) - if isLower(a): dec(result, start) - else: result = -1 - else: - result = -1 - of pkUpper: - if s[start] != '\0': - var a: Rune - result = start - fastRuneAt(s, result, a) - if isUpper(a): dec(result, start) - else: result = -1 - else: - result = -1 - of pkTitle: - if s[start] != '\0': - var a: Rune - result = start - fastRuneAt(s, result, a) - if isTitle(a): dec(result, start) - else: result = -1 - else: - result = -1 - of pkWhitespace: - if s[start] != '\0': - var a: Rune - result = start - fastRuneAt(s, result, a) - if isWhitespace(a): dec(result, start) - else: result = -1 - else: - result = -1 - of pkGreedyAny: - result = len(s) - start - of pkNewLine: - if s[start] == '\L': result = 1 - elif s[start] == '\C': - if s[start+1] == '\L': result = 2 - else: result = 1 - else: result = -1 - of pkTerminal: - result = len(p.term) - for i in 0..result-1: - if p.term[i] != s[start+i]: - result = -1 - break - of pkTerminalIgnoreCase: - var - i = 0 - a, b: Rune - result = start - while i < len(p.term): - fastRuneAt(p.term, i, a) - fastRuneAt(s, result, b) - if toLower(a) != toLower(b): - result = -1 - break - dec(result, start) - of pkTerminalIgnoreStyle: - var - i = 0 - a, b: Rune - result = start - while i < len(p.term): - while true: - fastRuneAt(p.term, i, a) - if a != Rune('_'): break - while true: - fastRuneAt(s, result, b) - if b != Rune('_'): break - if toLower(a) != toLower(b): - result = -1 - break - dec(result, start) - of pkChar: - if p.ch == s[start]: result = 1 - else: result = -1 - of pkCharChoice: - if contains(p.charChoice[], s[start]): result = 1 - else: result = -1 - of pkNonTerminal: - var oldMl = c.ml - when false: echo "enter: ", p.nt.name - result = rawMatch(s, p.nt.rule, start, c) - when false: echo "leave: ", p.nt.name - if result < 0: c.ml = oldMl - of pkSequence: - var oldMl = c.ml - result = 0 - assert(not isNil(p.sons)) - for i in 0..high(p.sons): - var x = rawMatch(s, p.sons[i], start+result, c) - if x < 0: - c.ml = oldMl - result = -1 - break - else: inc(result, x) - of pkOrderedChoice: - var oldMl = c.ml - for i in 0..high(p.sons): - result = rawMatch(s, p.sons[i], start, c) - if result >= 0: break - c.ml = oldMl - of pkSearch: - var oldMl = c.ml - result = 0 - while start+result < s.len: - var x = rawMatch(s, p.sons[0], start+result, c) - if x >= 0: - inc(result, x) - return - inc(result) - result = -1 - c.ml = oldMl - of pkCapturedSearch: - var idx = c.ml # reserve a slot for the subpattern - inc(c.ml) - result = 0 - while start+result < s.len: - var x = rawMatch(s, p.sons[0], start+result, c) - if x >= 0: - if idx < MaxSubpatterns: - c.matches[idx] = (start, start+result-1) - #else: silently ignore the capture - inc(result, x) - return - inc(result) - result = -1 - c.ml = idx - of pkGreedyRep: - result = 0 - while true: - var x = rawMatch(s, p.sons[0], start+result, c) - # if x == 0, we have an endless loop; so the correct behaviour would be - # not to break. But endless loops can be easily introduced: - # ``(comment / \w*)*`` is such an example. Breaking for x == 0 does the - # expected thing in this case. - if x <= 0: break - inc(result, x) - of pkGreedyRepChar: - result = 0 - var ch = p.ch - while ch == s[start+result]: inc(result) - of pkGreedyRepSet: - result = 0 - while contains(p.charChoice[], s[start+result]): inc(result) - of pkOption: - result = max(0, rawMatch(s, p.sons[0], start, c)) - of pkAndPredicate: - var oldMl = c.ml - result = rawMatch(s, p.sons[0], start, c) - if result >= 0: result = 0 # do not consume anything - else: c.ml = oldMl - of pkNotPredicate: - var oldMl = c.ml - result = rawMatch(s, p.sons[0], start, c) - if result < 0: result = 0 - else: - c.ml = oldMl - result = -1 - of pkCapture: - var idx = c.ml # reserve a slot for the subpattern - inc(c.ml) - result = rawMatch(s, p.sons[0], start, c) - if result >= 0: - if idx < MaxSubpatterns: - c.matches[idx] = (start, start+result-1) - #else: silently ignore the capture - else: - c.ml = idx - of pkBackRef..pkBackRefIgnoreStyle: - if p.index >= c.ml: return -1 - var (a, b) = c.matches[p.index] - var n: TPeg - n.kind = succ(pkTerminal, ord(p.kind)-ord(pkBackRef)) - n.term = s.substr(a, b) - result = rawMatch(s, n, start, c) - of pkStartAnchor: - if c.origStart == start: result = 0 - else: result = -1 - of pkRule, pkList: assert false - -proc match*(s: string, pattern: TPeg, matches: var openarray[string], - start = 0): bool {.rtl, extern: "npegs$1Capture".} = - ## returns ``true`` if ``s[start..]`` matches the ``pattern`` and - ## the captured substrings in the array ``matches``. If it does not - ## match, nothing is written into ``matches`` and ``false`` is - ## returned. - var c: TCaptures - c.origStart = start - result = rawMatch(s, pattern, start, c) == len(s)-start - if result: - for i in 0..c.ml-1: - matches[i] = substr(s, c.matches[i][0], c.matches[i][1]) - -proc match*(s: string, pattern: TPeg, - start = 0): bool {.rtl, extern: "npegs$1".} = - ## returns ``true`` if ``s`` matches the ``pattern`` beginning from ``start``. - var c: TCaptures - c.origStart = start - result = rawMatch(s, pattern, start, c) == len(s)-start - -proc matchLen*(s: string, pattern: TPeg, matches: var openarray[string], - start = 0): int {.rtl, extern: "npegs$1Capture".} = - ## the same as ``match``, but it returns the length of the match, - ## if there is no match, -1 is returned. Note that a match length - ## of zero can happen. It's possible that a suffix of `s` remains - ## that does not belong to the match. - var c: TCaptures - c.origStart = start - result = rawMatch(s, pattern, start, c) - if result >= 0: - for i in 0..c.ml-1: - matches[i] = substr(s, c.matches[i][0], c.matches[i][1]) - -proc matchLen*(s: string, pattern: TPeg, - start = 0): int {.rtl, extern: "npegs$1".} = - ## the same as ``match``, but it returns the length of the match, - ## if there is no match, -1 is returned. Note that a match length - ## of zero can happen. It's possible that a suffix of `s` remains - ## that does not belong to the match. - var c: TCaptures - c.origStart = start - result = rawMatch(s, pattern, start, c) - -proc find*(s: string, pattern: TPeg, matches: var openarray[string], - start = 0): int {.rtl, extern: "npegs$1Capture".} = - ## returns the starting position of ``pattern`` in ``s`` and the captured - ## substrings in the array ``matches``. If it does not match, nothing - ## is written into ``matches`` and -1 is returned. - for i in start .. s.len-1: - if matchLen(s, pattern, matches, i) >= 0: return i - return -1 - # could also use the pattern here: (!P .)* P - -proc findBounds*(s: string, pattern: TPeg, matches: var openarray[string], - start = 0): tuple[first, last: int] {. - rtl, extern: "npegs$1Capture".} = - ## returns the starting position and end position of ``pattern`` in ``s`` - ## and the captured - ## substrings in the array ``matches``. If it does not match, nothing - ## is written into ``matches`` and (-1,0) is returned. - for i in start .. s.len-1: - var L = matchLen(s, pattern, matches, i) - if L >= 0: return (i, i+L-1) - return (-1, 0) - -proc find*(s: string, pattern: TPeg, - start = 0): int {.rtl, extern: "npegs$1".} = - ## returns the starting position of ``pattern`` in ``s``. If it does not - ## match, -1 is returned. - for i in start .. s.len-1: - if matchLen(s, pattern, i) >= 0: return i - return -1 - -iterator findAll*(s: string, pattern: TPeg, start = 0): string = - ## yields all matching captures of pattern in `s`. - var matches: array[0..MaxSubpatterns-1, string] - var i = start - while i < s.len: - var L = matchLen(s, pattern, matches, i) - if L < 0: break - for k in 0..MaxSubPatterns-1: - if isNil(matches[k]): break - yield matches[k] - inc(i, L) - -proc findAll*(s: string, pattern: TPeg, start = 0): seq[string] {. - rtl, extern: "npegs$1".} = - ## returns all matching captures of pattern in `s`. - ## If it does not match, @[] is returned. - accumulateResult(findAll(s, pattern, start)) - -template `=~`*(s: string, pattern: TPeg): untyped = - ## This calls ``match`` with an implicit declared ``matches`` array that - ## can be used in the scope of the ``=~`` call: - ## - ## .. code-block:: nim - ## - ## if line =~ peg"\s* {\w+} \s* '=' \s* {\w+}": - ## # matches a key=value pair: - ## echo("Key: ", matches[0]) - ## echo("Value: ", matches[1]) - ## elif line =~ peg"\s*{'#'.*}": - ## # matches a comment - ## # note that the implicit ``matches`` array is different from the - ## # ``matches`` array of the first branch - ## echo("comment: ", matches[0]) - ## else: - ## echo("syntax error") - ## - when not declaredInScope(matches): - var matches {.inject.}: array[0..MaxSubpatterns-1, string] - match(s, pattern, matches) - -# ------------------------- more string handling ------------------------------ - -proc contains*(s: string, pattern: TPeg, start = 0): bool {. - rtl, extern: "npegs$1".} = - ## same as ``find(s, pattern, start) >= 0`` - return find(s, pattern, start) >= 0 - -proc contains*(s: string, pattern: TPeg, matches: var openArray[string], - start = 0): bool {.rtl, extern: "npegs$1Capture".} = - ## same as ``find(s, pattern, matches, start) >= 0`` - return find(s, pattern, matches, start) >= 0 - -proc startsWith*(s: string, prefix: TPeg, start = 0): bool {. - rtl, extern: "npegs$1".} = - ## returns true if `s` starts with the pattern `prefix` - result = matchLen(s, prefix, start) >= 0 - -proc endsWith*(s: string, suffix: TPeg, start = 0): bool {. - rtl, extern: "npegs$1".} = - ## returns true if `s` ends with the pattern `prefix` - for i in start .. s.len-1: - if matchLen(s, suffix, i) == s.len - i: return true - -proc replacef*(s: string, sub: TPeg, by: string): string {. - rtl, extern: "npegs$1".} = - ## Replaces `sub` in `s` by the string `by`. Captures can be accessed in `by` - ## with the notation ``$i`` and ``$#`` (see strutils.`%`). Examples: - ## - ## .. code-block:: nim - ## "var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") - ## - ## Results in: - ## - ## .. code-block:: nim - ## - ## "var1<-keykey; val2<-key2key2" - result = "" - var i = 0 - var caps: array[0..MaxSubpatterns-1, string] - while i < s.len: - var x = matchLen(s, sub, caps, i) - if x <= 0: - add(result, s[i]) - inc(i) - else: - addf(result, by, caps) - inc(i, x) - add(result, substr(s, i)) - -proc replace*(s: string, sub: TPeg, by = ""): string {. - rtl, extern: "npegs$1".} = - ## Replaces `sub` in `s` by the string `by`. Captures cannot be accessed - ## in `by`. - result = "" - var i = 0 - var caps: array[0..MaxSubpatterns-1, string] - while i < s.len: - var x = matchLen(s, sub, caps, i) - if x <= 0: - add(result, s[i]) - inc(i) - else: - addf(result, by, caps) - inc(i, x) - add(result, substr(s, i)) - -proc parallelReplace*(s: string, subs: varargs[ - tuple[pattern: TPeg, repl: string]]): string {. - rtl, extern: "npegs$1".} = - ## Returns a modified copy of `s` with the substitutions in `subs` - ## applied in parallel. - result = "" - var i = 0 - var caps: array[0..MaxSubpatterns-1, string] - while i < s.len: - block searchSubs: - for j in 0..high(subs): - var x = matchLen(s, subs[j][0], caps, i) - if x > 0: - addf(result, subs[j][1], caps) - inc(i, x) - break searchSubs - add(result, s[i]) - inc(i) - # copy the rest: - add(result, substr(s, i)) - -proc transformFile*(infile, outfile: string, - subs: varargs[tuple[pattern: TPeg, repl: string]]) {. - rtl, extern: "npegs$1".} = - ## reads in the file `infile`, performs a parallel replacement (calls - ## `parallelReplace`) and writes back to `outfile`. Calls ``quit`` if an - ## error occurs. This is supposed to be used for quick scripting. - var x = readFile(infile) - if not isNil(x): - var f: File - if open(f, outfile, fmWrite): - write(f, x.parallelReplace(subs)) - close(f) - else: - quit("cannot open for writing: " & outfile) - else: - quit("cannot open for reading: " & infile) - -iterator split*(s: string, sep: TPeg): string = - ## Splits the string `s` into substrings. - ## - ## Substrings are separated by the PEG `sep`. - ## Examples: - ## - ## .. code-block:: nim - ## for word in split("00232this02939is39an22example111", peg"\d+"): - ## writeLine(stdout, word) - ## - ## Results in: - ## - ## .. code-block:: nim - ## "this" - ## "is" - ## "an" - ## "example" - ## - var - first = 0 - last = 0 - while last < len(s): - var x = matchLen(s, sep, last) - if x > 0: inc(last, x) - first = last - while last < len(s): - inc(last) - x = matchLen(s, sep, last) - if x > 0: break - if first < last: - yield substr(s, first, last-1) - -proc split*(s: string, sep: TPeg): seq[string] {. - rtl, extern: "npegs$1".} = - ## Splits the string `s` into substrings. - accumulateResult(split(s, sep)) - -# ------------------- scanner ------------------------------------------------- - -type - TModifier = enum - modNone, - modVerbatim, - modIgnoreCase, - modIgnoreStyle - TTokKind = enum ## enumeration of all tokens - tkInvalid, ## invalid token - tkEof, ## end of file reached - tkAny, ## . - tkAnyRune, ## _ - tkIdentifier, ## abc - tkStringLit, ## "abc" or 'abc' - tkCharSet, ## [^A-Z] - tkParLe, ## '(' - tkParRi, ## ')' - tkCurlyLe, ## '{' - tkCurlyRi, ## '}' - tkCurlyAt, ## '{@}' - tkArrow, ## '<-' - tkBar, ## '/' - tkStar, ## '*' - tkPlus, ## '+' - tkAmp, ## '&' - tkNot, ## '!' - tkOption, ## '?' - tkAt, ## '@' - tkBuiltin, ## \identifier - tkEscaped, ## \\ - tkBackref, ## '$' - tkDollar, ## '$' - tkHat ## '^' - - TToken {.final.} = object ## a token - kind: TTokKind ## the type of the token - modifier: TModifier - literal: string ## the parsed (string) literal - charset: set[char] ## if kind == tkCharSet - index: int ## if kind == tkBackref - - TPegLexer {.inheritable.} = object ## the lexer object. - bufpos: int ## the current position within the buffer - buf: cstring ## the buffer itself - lineNumber: int ## the current line number - lineStart: int ## index of last line start in buffer - colOffset: int ## column to add - filename: string - -const - tokKindToStr: array[TTokKind, string] = [ - "invalid", "[EOF]", ".", "_", "identifier", "string literal", - "character set", "(", ")", "{", "}", "{@}", - "<-", "/", "*", "+", "&", "!", "?", - "@", "built-in", "escaped", "$", "$", "^" - ] - -proc HandleCR(L: var TPegLexer, pos: int): int = - assert(L.buf[pos] == '\c') - inc(L.linenumber) - result = pos+1 - if L.buf[result] == '\L': inc(result) - L.lineStart = result - -proc HandleLF(L: var TPegLexer, pos: int): int = - assert(L.buf[pos] == '\L') - inc(L.linenumber) - result = pos+1 - L.lineStart = result - -proc init(L: var TPegLexer, input, filename: string, line = 1, col = 0) = - L.buf = input - L.bufpos = 0 - L.lineNumber = line - L.colOffset = col - L.lineStart = 0 - L.filename = filename - -proc getColumn(L: TPegLexer): int {.inline.} = - result = abs(L.bufpos - L.lineStart) + L.colOffset - -proc getLine(L: TPegLexer): int {.inline.} = - result = L.linenumber - -proc errorStr(L: TPegLexer, msg: string, line = -1, col = -1): string = - var line = if line < 0: getLine(L) else: line - var col = if col < 0: getColumn(L) else: col - result = "$1($2, $3) Error: $4" % [L.filename, $line, $col, msg] - -proc handleHexChar(c: var TPegLexer, xi: var int) = - case c.buf[c.bufpos] - of '0'..'9': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0')) - inc(c.bufpos) - of 'a'..'f': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10) - inc(c.bufpos) - of 'A'..'F': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10) - inc(c.bufpos) - else: discard - -proc getEscapedChar(c: var TPegLexer, tok: var TToken) = - inc(c.bufpos) - case c.buf[c.bufpos] - of 'r', 'R', 'c', 'C': - add(tok.literal, '\c') - inc(c.bufpos) - of 'l', 'L': - add(tok.literal, '\L') - inc(c.bufpos) - of 'f', 'F': - add(tok.literal, '\f') - inc(c.bufpos) - of 'e', 'E': - add(tok.literal, '\e') - inc(c.bufpos) - of 'a', 'A': - add(tok.literal, '\a') - inc(c.bufpos) - of 'b', 'B': - add(tok.literal, '\b') - inc(c.bufpos) - of 'v', 'V': - add(tok.literal, '\v') - inc(c.bufpos) - of 't', 'T': - add(tok.literal, '\t') - inc(c.bufpos) - of 'x', 'X': - inc(c.bufpos) - var xi = 0 - handleHexChar(c, xi) - handleHexChar(c, xi) - if xi == 0: tok.kind = tkInvalid - else: add(tok.literal, chr(xi)) - of '0'..'9': - var val = ord(c.buf[c.bufpos]) - ord('0') - inc(c.bufpos) - var i = 1 - while (i <= 3) and (c.buf[c.bufpos] in {'0'..'9'}): - val = val * 10 + ord(c.buf[c.bufpos]) - ord('0') - inc(c.bufpos) - inc(i) - if val > 0 and val <= 255: add(tok.literal, chr(val)) - else: tok.kind = tkInvalid - of '\0'..'\31': - tok.kind = tkInvalid - elif c.buf[c.bufpos] in strutils.Letters: - tok.kind = tkInvalid - else: - add(tok.literal, c.buf[c.bufpos]) - inc(c.bufpos) - -proc skip(c: var TPegLexer) = - var pos = c.bufpos - var buf = c.buf - while true: - case buf[pos] - of ' ', '\t': - inc(pos) - of '#': - while not (buf[pos] in {'\c', '\L', '\0'}): inc(pos) - of '\c': - pos = HandleCR(c, pos) - buf = c.buf - of '\L': - pos = HandleLF(c, pos) - buf = c.buf - else: - break # EndOfFile also leaves the loop - c.bufpos = pos - -proc getString(c: var TPegLexer, tok: var TToken) = - tok.kind = tkStringLit - var pos = c.bufPos + 1 - var buf = c.buf - var quote = buf[pos-1] - while true: - case buf[pos] - of '\\': - c.bufpos = pos - getEscapedChar(c, tok) - pos = c.bufpos - of '\c', '\L', '\0': - tok.kind = tkInvalid - break - elif buf[pos] == quote: - inc(pos) - break - else: - add(tok.literal, buf[pos]) - inc(pos) - c.bufpos = pos - -proc getDollar(c: var TPegLexer, tok: var TToken) = - var pos = c.bufPos + 1 - var buf = c.buf - if buf[pos] in {'0'..'9'}: - tok.kind = tkBackref - tok.index = 0 - while buf[pos] in {'0'..'9'}: - tok.index = tok.index * 10 + ord(buf[pos]) - ord('0') - inc(pos) - else: - tok.kind = tkDollar - c.bufpos = pos - -proc getCharSet(c: var TPegLexer, tok: var TToken) = - tok.kind = tkCharSet - tok.charset = {} - var pos = c.bufPos + 1 - var buf = c.buf - var caret = false - if buf[pos] == '^': - inc(pos) - caret = true - while true: - var ch: char - case buf[pos] - of ']': - inc(pos) - break - of '\\': - c.bufpos = pos - getEscapedChar(c, tok) - pos = c.bufpos - ch = tok.literal[tok.literal.len-1] - of '\C', '\L', '\0': - tok.kind = tkInvalid - break - else: - ch = buf[pos] - inc(pos) - incl(tok.charset, ch) - if buf[pos] == '-': - if buf[pos+1] == ']': - incl(tok.charset, '-') - inc(pos) - else: - inc(pos) - var ch2: char - case buf[pos] - of '\\': - c.bufpos = pos - getEscapedChar(c, tok) - pos = c.bufpos - ch2 = tok.literal[tok.literal.len-1] - of '\C', '\L', '\0': - tok.kind = tkInvalid - break - else: - ch2 = buf[pos] - inc(pos) - for i in ord(ch)+1 .. ord(ch2): - incl(tok.charset, chr(i)) - c.bufpos = pos - if caret: tok.charset = {'\1'..'\xFF'} - tok.charset - -proc getSymbol(c: var TPegLexer, tok: var TToken) = - var pos = c.bufpos - var buf = c.buf - while true: - add(tok.literal, buf[pos]) - inc(pos) - if buf[pos] notin strutils.IdentChars: break - c.bufpos = pos - tok.kind = tkIdentifier - -proc getBuiltin(c: var TPegLexer, tok: var TToken) = - if c.buf[c.bufpos+1] in strutils.Letters: - inc(c.bufpos) - getSymbol(c, tok) - tok.kind = tkBuiltin - else: - tok.kind = tkEscaped - getEscapedChar(c, tok) # may set tok.kind to tkInvalid - -proc getTok(c: var TPegLexer, tok: var TToken) = - tok.kind = tkInvalid - tok.modifier = modNone - setlen(tok.literal, 0) - skip(c) - case c.buf[c.bufpos] - of '{': - inc(c.bufpos) - if c.buf[c.bufpos] == '@' and c.buf[c.bufpos+1] == '}': - tok.kind = tkCurlyAt - inc(c.bufpos, 2) - add(tok.literal, "{@}") - else: - tok.kind = tkCurlyLe - add(tok.literal, '{') - of '}': - tok.kind = tkCurlyRi - inc(c.bufpos) - add(tok.literal, '}') - of '[': - getCharset(c, tok) - of '(': - tok.kind = tkParLe - inc(c.bufpos) - add(tok.literal, '(') - of ')': - tok.kind = tkParRi - inc(c.bufpos) - add(tok.literal, ')') - of '.': - tok.kind = tkAny - inc(c.bufpos) - add(tok.literal, '.') - of '_': - tok.kind = tkAnyRune - inc(c.bufpos) - add(tok.literal, '_') - of '\\': - getBuiltin(c, tok) - of '\'', '"': getString(c, tok) - of '$': getDollar(c, tok) - of '\0': - tok.kind = tkEof - tok.literal = "[EOF]" - of 'a'..'z', 'A'..'Z', '\128'..'\255': - getSymbol(c, tok) - if c.buf[c.bufpos] in {'\'', '"'} or - c.buf[c.bufpos] == '$' and c.buf[c.bufpos+1] in {'0'..'9'}: - case tok.literal - of "i": tok.modifier = modIgnoreCase - of "y": tok.modifier = modIgnoreStyle - of "v": tok.modifier = modVerbatim - else: discard - setLen(tok.literal, 0) - if c.buf[c.bufpos] == '$': - getDollar(c, tok) - else: - getString(c, tok) - if tok.modifier == modNone: tok.kind = tkInvalid - of '+': - tok.kind = tkPlus - inc(c.bufpos) - add(tok.literal, '+') - of '*': - tok.kind = tkStar - inc(c.bufpos) - add(tok.literal, '+') - of '<': - if c.buf[c.bufpos+1] == '-': - inc(c.bufpos, 2) - tok.kind = tkArrow - add(tok.literal, "<-") - else: - add(tok.literal, '<') - of '/': - tok.kind = tkBar - inc(c.bufpos) - add(tok.literal, '/') - of '?': - tok.kind = tkOption - inc(c.bufpos) - add(tok.literal, '?') - of '!': - tok.kind = tkNot - inc(c.bufpos) - add(tok.literal, '!') - of '&': - tok.kind = tkAmp - inc(c.bufpos) - add(tok.literal, '!') - of '@': - tok.kind = tkAt - inc(c.bufpos) - add(tok.literal, '@') - if c.buf[c.bufpos] == '@': - tok.kind = tkCurlyAt - inc(c.bufpos) - add(tok.literal, '@') - of '^': - tok.kind = tkHat - inc(c.bufpos) - add(tok.literal, '^') - else: - add(tok.literal, c.buf[c.bufpos]) - inc(c.bufpos) - -proc arrowIsNextTok(c: TPegLexer): bool = - # the only look ahead we need - var pos = c.bufpos - while c.buf[pos] in {'\t', ' '}: inc(pos) - result = c.buf[pos] == '<' and c.buf[pos+1] == '-' - -# ----------------------------- parser ---------------------------------------- - -type - EInvalidPeg* = object of ValueError ## raised if an invalid - ## PEG has been detected - TPegParser = object of TPegLexer ## the PEG parser object - tok: TToken - nonterms: seq[PNonTerminal] - modifier: TModifier - captures: int - identIsVerbatim: bool - skip: TPeg - -proc pegError(p: TPegParser, msg: string, line = -1, col = -1) = - var e: ref EInvalidPeg - new(e) - e.msg = errorStr(p, msg, line, col) - raise e - -proc getTok(p: var TPegParser) = - getTok(p, p.tok) - if p.tok.kind == tkInvalid: pegError(p, "invalid token") - -proc eat(p: var TPegParser, kind: TTokKind) = - if p.tok.kind == kind: getTok(p) - else: pegError(p, tokKindToStr[kind] & " expected") - -proc parseExpr(p: var TPegParser): TPeg - -proc getNonTerminal(p: var TPegParser, name: string): PNonTerminal = - for i in 0..high(p.nonterms): - result = p.nonterms[i] - if cmpIgnoreStyle(result.name, name) == 0: return - # forward reference: - result = newNonTerminal(name, getLine(p), getColumn(p)) - add(p.nonterms, result) - -proc modifiedTerm(s: string, m: TModifier): TPeg = - case m - of modNone, modVerbatim: result = term(s) - of modIgnoreCase: result = termIgnoreCase(s) - of modIgnoreStyle: result = termIgnoreStyle(s) - -proc modifiedBackref(s: int, m: TModifier): TPeg = - case m - of modNone, modVerbatim: result = backRef(s) - of modIgnoreCase: result = backRefIgnoreCase(s) - of modIgnoreStyle: result = backRefIgnoreStyle(s) - -proc builtin(p: var TPegParser): TPeg = - # do not use "y", "skip" or "i" as these would be ambiguous - case p.tok.literal - of "n": result = newLine() - of "d": result = charset({'0'..'9'}) - of "D": result = charset({'\1'..'\xff'} - {'0'..'9'}) - of "s": result = charset({' ', '\9'..'\13'}) - of "S": result = charset({'\1'..'\xff'} - {' ', '\9'..'\13'}) - of "w": result = charset({'a'..'z', 'A'..'Z', '_', '0'..'9'}) - of "W": result = charset({'\1'..'\xff'} - {'a'..'z','A'..'Z','_','0'..'9'}) - of "a": result = charset({'a'..'z', 'A'..'Z'}) - of "A": result = charset({'\1'..'\xff'} - {'a'..'z', 'A'..'Z'}) - of "ident": result = tpegs.ident - of "letter": result = UnicodeLetter() - of "upper": result = UnicodeUpper() - of "lower": result = UnicodeLower() - of "title": result = UnicodeTitle() - of "white": result = UnicodeWhitespace() - else: pegError(p, "unknown built-in: " & p.tok.literal) - -proc token(terminal: TPeg, p: TPegParser): TPeg = - if p.skip.kind == pkEmpty: result = terminal - else: result = sequence(p.skip, terminal) - -proc primary(p: var TPegParser): TPeg = - case p.tok.kind - of tkAmp: - getTok(p) - return &primary(p) - of tkNot: - getTok(p) - return !primary(p) - of tkAt: - getTok(p) - return !*primary(p) - of tkCurlyAt: - getTok(p) - return !*\primary(p).token(p) - else: discard - case p.tok.kind - of tkIdentifier: - if p.identIsVerbatim: - var m = p.tok.modifier - if m == modNone: m = p.modifier - result = modifiedTerm(p.tok.literal, m).token(p) - getTok(p) - elif not arrowIsNextTok(p): - var nt = getNonTerminal(p, p.tok.literal) - incl(nt.flags, ntUsed) - result = nonTerminal(nt).token(p) - getTok(p) - else: - pegError(p, "expression expected, but found: " & p.tok.literal) - of tkStringLit: - var m = p.tok.modifier - if m == modNone: m = p.modifier - result = modifiedTerm(p.tok.literal, m).token(p) - getTok(p) - of tkCharSet: - if '\0' in p.tok.charset: - pegError(p, "binary zero ('\\0') not allowed in character class") - result = charset(p.tok.charset).token(p) - getTok(p) - of tkParLe: - getTok(p) - result = parseExpr(p) - eat(p, tkParRi) - of tkCurlyLe: - getTok(p) - result = capture(parseExpr(p)).token(p) - eat(p, tkCurlyRi) - inc(p.captures) - of tkAny: - result = any().token(p) - getTok(p) - of tkAnyRune: - result = anyRune().token(p) - getTok(p) - of tkBuiltin: - result = builtin(p).token(p) - getTok(p) - of tkEscaped: - result = term(p.tok.literal[0]).token(p) - getTok(p) - of tkDollar: - result = endAnchor() - getTok(p) - of tkHat: - result = startAnchor() - getTok(p) - of tkBackref: - var m = p.tok.modifier - if m == modNone: m = p.modifier - result = modifiedBackRef(p.tok.index, m).token(p) - if p.tok.index < 0 or p.tok.index > p.captures: - pegError(p, "invalid back reference index: " & $p.tok.index) - getTok(p) - else: - pegError(p, "expression expected, but found: " & p.tok.literal) - getTok(p) # we must consume a token here to prevent endless loops! - while true: - case p.tok.kind - of tkOption: - result = ?result - getTok(p) - of tkStar: - result = *result - getTok(p) - of tkPlus: - result = +result - getTok(p) - else: break - -proc seqExpr(p: var TPegParser): TPeg = - result = primary(p) - while true: - case p.tok.kind - of tkAmp, tkNot, tkAt, tkStringLit, tkCharset, tkParLe, tkCurlyLe, - tkAny, tkAnyRune, tkBuiltin, tkEscaped, tkDollar, tkBackref, - tkHat, tkCurlyAt: - result = sequence(result, primary(p)) - of tkIdentifier: - if not arrowIsNextTok(p): - result = sequence(result, primary(p)) - else: break - else: break - -proc parseExpr(p: var TPegParser): TPeg = - result = seqExpr(p) - while p.tok.kind == tkBar: - getTok(p) - result = result / seqExpr(p) - -proc parseRule(p: var TPegParser): PNonTerminal = - if p.tok.kind == tkIdentifier and arrowIsNextTok(p): - result = getNonTerminal(p, p.tok.literal) - if ntDeclared in result.flags: - pegError(p, "attempt to redefine: " & result.name) - result.line = getLine(p) - result.col = getColumn(p) - getTok(p) - eat(p, tkArrow) - result.rule = parseExpr(p) - incl(result.flags, ntDeclared) # NOW inlining may be attempted - else: - pegError(p, "rule expected, but found: " & p.tok.literal) - -proc rawParse(p: var TPegParser): TPeg = - ## parses a rule or a PEG expression - while p.tok.kind == tkBuiltin: - case p.tok.literal - of "i": - p.modifier = modIgnoreCase - getTok(p) - of "y": - p.modifier = modIgnoreStyle - getTok(p) - of "skip": - getTok(p) - p.skip = ?primary(p) - else: break - if p.tok.kind == tkIdentifier and arrowIsNextTok(p): - result = parseRule(p).rule - while p.tok.kind != tkEof: - discard parseRule(p) - else: - p.identIsVerbatim = true - result = parseExpr(p) - if p.tok.kind != tkEof: - pegError(p, "EOF expected, but found: " & p.tok.literal) - for i in 0..high(p.nonterms): - var nt = p.nonterms[i] - if ntDeclared notin nt.flags: - pegError(p, "undeclared identifier: " & nt.name, nt.line, nt.col) - elif ntUsed notin nt.flags and i > 0: - pegError(p, "unused rule: " & nt.name, nt.line, nt.col) - -proc parsePeg*(pattern: string, filename = "pattern", line = 1, col = 0): TPeg = - ## constructs a TPeg object from `pattern`. `filename`, `line`, `col` are - ## used for error messages, but they only provide start offsets. `parsePeg` - ## keeps track of line and column numbers within `pattern`. - var p: TPegParser - init(TPegLexer(p), pattern, filename, line, col) - p.tok.kind = tkInvalid - p.tok.modifier = modNone - p.tok.literal = "" - p.tok.charset = {} - p.nonterms = @[] - p.identIsVerbatim = false - getTok(p) - result = rawParse(p) - -proc peg*(pattern: string): TPeg = - ## constructs a TPeg object from the `pattern`. The short name has been - ## chosen to encourage its use as a raw string modifier:: - ## - ## peg"{\ident} \s* '=' \s* {.*}" - result = parsePeg(pattern, "pattern") - -proc escapePeg*(s: string): string = - ## escapes `s` so that it is matched verbatim when used as a peg. - result = "" - var inQuote = false - for c in items(s): - case c - of '\0'..'\31', '\'', '"', '\\': - if inQuote: - result.add('\'') - inQuote = false - result.add("\\x") - result.add(toHex(ord(c), 2)) - else: - if not inQuote: - result.add('\'') - inQuote = true - result.add(c) - if inQuote: result.add('\'') - -when isMainModule: - doAssert escapePeg("abc''def'") == r"'abc'\x27\x27'def'\x27" - #doAssert match("(a b c)", peg"'(' @ ')'") - doAssert match("W_HI_Le", peg"\y 'while'") - doAssert(not match("W_HI_L", peg"\y 'while'")) - doAssert(not match("W_HI_Le", peg"\y v'while'")) - doAssert match("W_HI_Le", peg"y'while'") - - doAssert($ +digits == $peg"\d+") - doAssert "0158787".match(peg"\d+") - doAssert "ABC 0232".match(peg"\w+\s+\d+") - doAssert "ABC".match(peg"\d+ / \w+") - - for word in split("00232this02939is39an22example111", peg"\d+"): - writeLine(stdout, word) - - doAssert matchLen("key", ident) == 3 - - var pattern = sequence(ident, *whitespace, term('='), *whitespace, ident) - doAssert matchLen("key1= cal9", pattern) == 11 - - var ws = newNonTerminal("ws", 1, 1) - ws.rule = *whitespace - - var expr = newNonTerminal("expr", 1, 1) - expr.rule = sequence(capture(ident), *sequence( - nonterminal(ws), term('+'), nonterminal(ws), nonterminal(expr))) - - var c: TCaptures - var s = "a+b + c +d+e+f" - doAssert rawMatch(s, expr.rule, 0, c) == len(s) - var a = "" - for i in 0..c.ml-1: - a.add(substr(s, c.matches[i][0], c.matches[i][1])) - doAssert a == "abcdef" - #echo expr.rule - - #const filename = "lib/devel/peg/grammar.txt" - #var grammar = parsePeg(newFileStream(filename, fmRead), filename) - #echo "a <- [abc]*?".match(grammar) - doAssert find("_____abc_______", term("abc"), 2) == 5 - doAssert match("_______ana", peg"A <- 'ana' / . A") - doAssert match("abcs%%%", peg"A <- ..A / .A / '%'") - - if "abc" =~ peg"{'a'}'bc' 'xyz' / {\ident}": - doAssert matches[0] == "abc" - else: - doAssert false - - var g2 = peg"""S <- A B / C D - A <- 'a'+ - B <- 'b'+ - C <- 'c'+ - D <- 'd'+ - """ - doAssert($g2 == "((A B) / (C D))") - doAssert match("cccccdddddd", g2) - doAssert("var1=key; var2=key2".replacef(peg"{\ident}'='{\ident}", "$1<-$2$2") == - "var1<-keykey; var2<-key2key2") - doAssert "var1=key; var2=key2".endsWith(peg"{\ident}'='{\ident}") - - if "aaaaaa" =~ peg"'aa' !. / ({'a'})+": - doAssert matches[0] == "a" - else: - doAssert false - - block: - var matches: array[0..2, string] - if match("abcdefg", peg"c {d} ef {g}", matches, 2): - doAssert matches[0] == "d" - doAssert matches[1] == "g" - else: - doAssert false - - for x in findAll("abcdef", peg"{.}", 3): - echo x - - if "f(a, b)" =~ peg"{[0-9]+} / ({\ident} '(' {@} ')')": - doAssert matches[0] == "f" - doAssert matches[1] == "a, b" - else: - doAssert false - - doAssert match("eine übersicht und außerdem", peg"(\letter \white*)+") - # ß is not a lower cased letter?! - doAssert match("eine übersicht und auerdem", peg"(\lower \white*)+") - doAssert match("EINE ÜBERSICHT UND AUSSERDEM", peg"(\upper \white*)+") - doAssert(not match("456678", peg"(\letter)+")) - - doAssert("var1 = key; var2 = key2".replacef( - peg"\skip(\s*) {\ident}'='{\ident}", "$1<-$2$2") == - "var1<-keykey;var2<-key2key2") - - doAssert match("prefix/start", peg"^start$", 7) - - # tricky test to check for false aliasing: - block: - var a = term"key" - echo($sequence(sequence(a, term"value"), *a)) - diff --git a/tests/stdlib/twchartoutf8.nim b/tests/stdlib/twchartoutf8.nim index b2f68ee32..a6602e3e3 100644 --- a/tests/stdlib/twchartoutf8.nim +++ b/tests/stdlib/twchartoutf8.nim @@ -30,7 +30,6 @@ else: result = newString(size) let res = WideCharToMultiByte(CP_UTF8, 0'i32, cast[LPWCSTR](addr(wc[0])), wclen, cstring(result), size, cstring(nil), LPBOOL(nil)) - result[size] = chr(0) doAssert size == res proc testCP(wc: WideCString, lo, hi: int) = |