diff options
author | Arne Döring <arne.doering@gmx.net> | 2019-02-28 22:57:57 +0100 |
---|---|---|
committer | Andreas Rumpf <rumpf_a@web.de> | 2019-02-28 22:57:57 +0100 |
commit | 1102b8ac6e643c8f8428dd7db0994d26b0c65ea6 (patch) | |
tree | b08388f89e7867f03e5d59be00db70a6535752dc /lib | |
parent | 728ff1004a60835c18c44b64830ea08dc805485e (diff) | |
download | Nim-1102b8ac6e643c8f8428dd7db0994d26b0c65ea6.tar.gz |
StringStream and parseJson, parseCfg, parseSql et al for the vm (#10746)
Diffstat (limited to 'lib')
-rw-r--r-- | lib/packages/docutils/rst.nim | 17 | ||||
-rw-r--r-- | lib/pure/lexbase.nim | 51 | ||||
-rw-r--r-- | lib/pure/parsecfg.nim | 24 | ||||
-rw-r--r-- | lib/pure/parsecsv.nim | 15 | ||||
-rw-r--r-- | lib/pure/parsejson.nim | 76 | ||||
-rw-r--r-- | lib/pure/parsesql.nim | 101 | ||||
-rw-r--r-- | lib/pure/parsexml.nim | 129 | ||||
-rw-r--r-- | lib/pure/pegs.nim | 43 | ||||
-rw-r--r-- | lib/pure/streams.nim | 33 |
9 files changed, 216 insertions, 273 deletions
diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim index 615119135..0b077b1f1 100644 --- a/lib/packages/docutils/rst.nim +++ b/lib/packages/docutils/rst.nim @@ -155,18 +155,17 @@ proc getAdornment(L: var Lexer, tok: var Token) = proc getIndentAux(L: var Lexer, start: int): int = var pos = start - var buf = L.buf # skip the newline (but include it in the token!) - if buf[pos] == '\x0D': - if buf[pos + 1] == '\x0A': inc(pos, 2) + if L.buf[pos] == '\x0D': + if L.buf[pos + 1] == '\x0A': inc(pos, 2) else: inc(pos) - elif buf[pos] == '\x0A': + elif L.buf[pos] == '\x0A': inc(pos) if L.skipPounds: - if buf[pos] == '#': inc(pos) - if buf[pos] == '#': inc(pos) + if L.buf[pos] == '#': inc(pos) + if L.buf[pos] == '#': inc(pos) while true: - case buf[pos] + case L.buf[pos] of ' ', '\x0B', '\x0C': inc(pos) inc(result) @@ -175,9 +174,9 @@ proc getIndentAux(L: var Lexer, start: int): int = result = result - (result mod 8) + 8 else: break # EndOfFile also leaves the loop - if buf[pos] == '\0': + if L.buf[pos] == '\0': result = 0 - elif (buf[pos] == '\x0A') or (buf[pos] == '\x0D'): + elif (L.buf[pos] == '\x0A') or (L.buf[pos] == '\x0D'): # look at the next line for proper indentation: result = getIndentAux(L, pos) L.bufpos = pos # no need to set back buf diff --git a/lib/pure/lexbase.nim b/lib/pure/lexbase.nim index e38acd5ef..11ec45a37 100644 --- a/lib/pure/lexbase.nim +++ b/lib/pure/lexbase.nim @@ -28,11 +28,7 @@ type BaseLexer* = object of RootObj ## the base lexer. Inherit your lexer from ## this object. bufpos*: int ## the current position within the buffer - when defined(js): ## the buffer itself - buf*: string - else: - buf*: cstring - bufLen*: int ## length of buffer in characters + buf*: string ## the buffer itself input: Stream ## the input stream lineNumber*: int ## the current line number sentinel: int @@ -40,13 +36,8 @@ type offsetBase*: int # use ``offsetBase + bufpos`` to get the offset refillChars: set[char] -const - chrSize = sizeof(char) - proc close*(L: var BaseLexer) = ## closes the base lexer. This closes `L`'s associated stream too. - when not defined(js): - dealloc(L.buf) close(L.input) proc fillBuffer(L: var BaseLexer) = @@ -57,17 +48,21 @@ proc fillBuffer(L: var BaseLexer) = oldBufLen: int # we know here that pos == L.sentinel, but not if this proc # is called the first time by initBaseLexer() - assert(L.sentinel < L.bufLen) - toCopy = L.bufLen - L.sentinel - 1 + assert(L.sentinel + 1 <= L.buf.len) + toCopy = L.buf.len - (L.sentinel + 1) assert(toCopy >= 0) if toCopy > 0: when defined(js): - for i in 0 ..< toCopy: L.buf[i] = L.buf[L.sentinel + 1 + i] + for i in 0 ..< toCopy: + L.buf[i] = L.buf[L.sentinel + 1 + i] else: - # "moveMem" handles overlapping regions - moveMem(L.buf, addr L.buf[L.sentinel + 1], toCopy * chrSize) - charsRead = readData(L.input, addr(L.buf[toCopy]), - (L.sentinel + 1) * chrSize) div chrSize + when nimvm: + for i in 0 ..< toCopy: + L.buf[i] = L.buf[L.sentinel + 1 + i] + else: + # "moveMem" handles overlapping regions + moveMem(addr L.buf[0], addr L.buf[L.sentinel + 1], toCopy) + charsRead = L.input.readDataStr(L.buf, toCopy ..< toCopy + L.sentinel + 1) s = toCopy + charsRead if charsRead < L.sentinel + 1: L.buf[s] = EndOfFile # set end marker @@ -76,7 +71,7 @@ proc fillBuffer(L: var BaseLexer) = # compute sentinel: dec(s) # BUGFIX (valgrind) while true: - assert(s < L.bufLen) + assert(s < L.buf.len) while s >= 0 and L.buf[s] notin L.refillChars: dec(s) if s >= 0: # we found an appropriate character for a sentinel: @@ -85,20 +80,14 @@ proc fillBuffer(L: var BaseLexer) = else: # rather than to give up here because the line is too long, # double the buffer's size and try again: - oldBufLen = L.bufLen - L.bufLen = L.bufLen * 2 - when defined(js): - L.buf.setLen(L.bufLen) - else: - L.buf = cast[cstring](realloc(L.buf, L.bufLen * chrSize)) - assert(L.bufLen - oldBufLen == oldBufLen) - charsRead = readData(L.input, addr(L.buf[oldBufLen]), - oldBufLen * chrSize) div chrSize + oldBufLen = L.buf.len + L.buf.setLen(L.buf.len * 2) + charsRead = readDataStr(L.input, L.buf, oldBufLen ..< L.buf.len) if charsRead < oldBufLen: L.buf[oldBufLen + charsRead] = EndOfFile L.sentinel = oldBufLen + charsRead break - s = L.bufLen - 1 + s = L.buf.len - 1 proc fillBaseLexer(L: var BaseLexer, pos: int): int = assert(pos <= L.sentinel) @@ -148,12 +137,8 @@ proc open*(L: var BaseLexer, input: Stream, bufLen: int = 8192; L.input = input L.bufpos = 0 L.offsetBase = 0 - L.bufLen = bufLen L.refillChars = refillChars - when defined(js): - L.buf = newString(bufLen) - else: - L.buf = cast[cstring](alloc(bufLen * chrSize)) + L.buf = newString(bufLen) L.sentinel = bufLen - 1 L.lineStart = 0 L.lineNumber = 1 # lines start at 1 diff --git a/lib/pure/parsecfg.nim b/lib/pure/parsecfg.nim index 106d59017..d043cd321 100644 --- a/lib/pure/parsecfg.nim +++ b/lib/pure/parsecfg.nim @@ -261,35 +261,32 @@ proc handleCRLF(c: var CfgParser, pos: int): int = proc getString(c: var CfgParser, tok: var Token, rawMode: bool) = var pos = c.bufpos + 1 # skip " - var buf = c.buf # put `buf` in a register tok.kind = tkSymbol - if (buf[pos] == '"') and (buf[pos + 1] == '"'): + if (c.buf[pos] == '"') and (c.buf[pos + 1] == '"'): # long string literal: inc(pos, 2) # skip "" # skip leading newline: pos = handleCRLF(c, pos) - buf = c.buf while true: - case buf[pos] + case c.buf[pos] of '"': - if (buf[pos + 1] == '"') and (buf[pos + 2] == '"'): break + if (c.buf[pos + 1] == '"') and (c.buf[pos + 2] == '"'): break add(tok.literal, '"') inc(pos) of '\c', '\L': pos = handleCRLF(c, pos) - buf = c.buf add(tok.literal, "\n") of lexbase.EndOfFile: tok.kind = tkInvalid break else: - add(tok.literal, buf[pos]) + add(tok.literal, c.buf[pos]) inc(pos) c.bufpos = pos + 3 # skip the three """ else: # ordinary string literal while true: - var ch = buf[pos] + var ch = c.buf[pos] if ch == '"': inc(pos) # skip '"' break @@ -307,26 +304,23 @@ proc getString(c: var CfgParser, tok: var Token, rawMode: bool) = proc getSymbol(c: var CfgParser, tok: var Token) = var pos = c.bufpos - var buf = c.buf while true: - add(tok.literal, buf[pos]) + add(tok.literal, c.buf[pos]) inc(pos) - if not (buf[pos] in SymChars): break + if not (c.buf[pos] in SymChars): break c.bufpos = pos tok.kind = tkSymbol proc skip(c: var CfgParser) = var pos = c.bufpos - var buf = c.buf while true: - case buf[pos] + case c.buf[pos] of ' ', '\t': inc(pos) of '#', ';': - while not (buf[pos] in {'\c', '\L', lexbase.EndOfFile}): inc(pos) + while not (c.buf[pos] in {'\c', '\L', lexbase.EndOfFile}): inc(pos) of '\c', '\L': pos = handleCRLF(c, pos) - buf = c.buf else: break # EndOfFile also leaves the loop c.bufpos = pos diff --git a/lib/pure/parsecsv.nim b/lib/pure/parsecsv.nim index e0c4f38a4..402e3ad31 100644 --- a/lib/pure/parsecsv.nim +++ b/lib/pure/parsecsv.nim @@ -156,44 +156,41 @@ proc open*(my: var CsvParser, filename: string, proc parseField(my: var CsvParser, a: var string) = var pos = my.bufpos - var buf = my.buf if my.skipWhite: - while buf[pos] in {' ', '\t'}: inc(pos) + while my.buf[pos] in {' ', '\t'}: inc(pos) setLen(a, 0) # reuse memory - if buf[pos] == my.quote and my.quote != '\0': + if my.buf[pos] == my.quote and my.quote != '\0': inc(pos) while true: - let c = buf[pos] + let c = my.buf[pos] if c == '\0': my.bufpos = pos # can continue after exception? error(my, pos, my.quote & " expected") break elif c == my.quote: - if my.esc == '\0' and buf[pos+1] == my.quote: + if my.esc == '\0' and my.buf[pos+1] == my.quote: add(a, my.quote) inc(pos, 2) else: inc(pos) break elif c == my.esc: - add(a, buf[pos+1]) + add(a, my.buf[pos+1]) inc(pos, 2) else: case c of '\c': pos = handleCR(my, pos) - buf = my.buf add(a, "\n") of '\l': pos = handleLF(my, pos) - buf = my.buf add(a, "\n") else: add(a, c) inc(pos) else: while true: - let c = buf[pos] + let c = my.buf[pos] if c == my.sep: break if c in {'\c', '\l', '\0'}: break add(a, c) diff --git a/lib/pure/parsejson.nim b/lib/pure/parsejson.nim index 9c53af6a6..abf2854dd 100644 --- a/lib/pure/parsejson.nim +++ b/lib/pure/parsejson.nim @@ -182,11 +182,10 @@ proc parseEscapedUTF16*(buf: cstring, pos: var int): int = proc parseString(my: var JsonParser): TokKind = result = tkString var pos = my.bufpos + 1 - var buf = my.buf if my.rawStringLiterals: add(my.a, '"') while true: - case buf[pos] + case my.buf[pos] of '\0': my.err = errQuoteExpected result = tkError @@ -199,9 +198,9 @@ proc parseString(my: var JsonParser): TokKind = of '\\': if my.rawStringLiterals: add(my.a, '\\') - case buf[pos+1] + case my.buf[pos+1] of '\\', '"', '\'', '/': - add(my.a, buf[pos+1]) + add(my.a, my.buf[pos+1]) inc(pos, 2) of 'b': add(my.a, '\b') @@ -223,17 +222,17 @@ proc parseString(my: var JsonParser): TokKind = add(my.a, 'u') inc(pos, 2) var pos2 = pos - var r = parseEscapedUTF16(buf, pos) + var r = parseEscapedUTF16(my.buf, pos) if r < 0: my.err = errInvalidToken break # Deal with surrogates if (r and 0xfc00) == 0xd800: - if buf[pos] != '\\' or buf[pos+1] != 'u': + if my.buf[pos] != '\\' or my.buf[pos+1] != 'u': my.err = errInvalidToken break inc(pos, 2) - var s = parseEscapedUTF16(buf, pos) + var s = parseEscapedUTF16(my.buf, pos) if (s and 0xfc00) == 0xdc00 and s > 0: r = 0x10000 + (((r - 0xd800) shl 10) or (s - 0xdc00)) else: @@ -242,8 +241,8 @@ proc parseString(my: var JsonParser): TokKind = if my.rawStringLiterals: let length = pos - pos2 for i in 1 .. length: - if buf[pos2] in {'0'..'9', 'A'..'F', 'a'..'f'}: - add(my.a, buf[pos2]) + if my.buf[pos2] in {'0'..'9', 'A'..'F', 'a'..'f'}: + add(my.a, my.buf[pos2]) inc pos2 else: break @@ -251,61 +250,54 @@ proc parseString(my: var JsonParser): TokKind = add(my.a, toUTF8(Rune(r))) else: # don't bother with the error - add(my.a, buf[pos]) + add(my.a, my.buf[pos]) inc(pos) of '\c': pos = lexbase.handleCR(my, pos) - buf = my.buf add(my.a, '\c') of '\L': pos = lexbase.handleLF(my, pos) - buf = my.buf add(my.a, '\L') else: - add(my.a, buf[pos]) + add(my.a, my.buf[pos]) inc(pos) my.bufpos = pos # store back proc skip(my: var JsonParser) = var pos = my.bufpos - var buf = my.buf while true: - case buf[pos] + case my.buf[pos] of '/': - if buf[pos+1] == '/': + if my.buf[pos+1] == '/': # skip line comment: inc(pos, 2) while true: - case buf[pos] + case my.buf[pos] of '\0': break of '\c': pos = lexbase.handleCR(my, pos) - buf = my.buf break of '\L': pos = lexbase.handleLF(my, pos) - buf = my.buf break else: inc(pos) - elif buf[pos+1] == '*': + elif my.buf[pos+1] == '*': # skip long comment: inc(pos, 2) while true: - case buf[pos] + case my.buf[pos] of '\0': my.err = errEOC_Expected break of '\c': pos = lexbase.handleCR(my, pos) - buf = my.buf of '\L': pos = lexbase.handleLF(my, pos) - buf = my.buf of '*': inc(pos) - if buf[pos] == '/': + if my.buf[pos] == '/': inc(pos) break else: @@ -316,51 +308,47 @@ proc skip(my: var JsonParser) = inc(pos) of '\c': pos = lexbase.handleCR(my, pos) - buf = my.buf of '\L': pos = lexbase.handleLF(my, pos) - buf = my.buf else: break my.bufpos = pos proc parseNumber(my: var JsonParser) = var pos = my.bufpos - var buf = my.buf - if buf[pos] == '-': + if my.buf[pos] == '-': add(my.a, '-') inc(pos) - if buf[pos] == '.': + if my.buf[pos] == '.': add(my.a, "0.") inc(pos) else: - while buf[pos] in Digits: - add(my.a, buf[pos]) + while my.buf[pos] in Digits: + add(my.a, my.buf[pos]) inc(pos) - if buf[pos] == '.': + if my.buf[pos] == '.': add(my.a, '.') inc(pos) # digits after the dot: - while buf[pos] in Digits: - add(my.a, buf[pos]) + while my.buf[pos] in Digits: + add(my.a, my.buf[pos]) inc(pos) - if buf[pos] in {'E', 'e'}: - add(my.a, buf[pos]) + if my.buf[pos] in {'E', 'e'}: + add(my.a, my.buf[pos]) inc(pos) - if buf[pos] in {'+', '-'}: - add(my.a, buf[pos]) + if my.buf[pos] in {'+', '-'}: + add(my.a, my.buf[pos]) inc(pos) - while buf[pos] in Digits: - add(my.a, buf[pos]) + while my.buf[pos] in Digits: + add(my.a, my.buf[pos]) inc(pos) my.bufpos = pos proc parseName(my: var JsonParser) = var pos = my.bufpos - var buf = my.buf - if buf[pos] in IdentStartChars: - while buf[pos] in IdentChars: - add(my.a, buf[pos]) + if my.buf[pos] in IdentStartChars: + while my.buf[pos] in IdentChars: + add(my.a, my.buf[pos]) inc(pos) my.bufpos = pos diff --git a/lib/pure/parsesql.nim b/lib/pure/parsesql.nim index f0961829b..abe712e7f 100644 --- a/lib/pure/parsesql.nim +++ b/lib/pure/parsesql.nim @@ -148,35 +148,33 @@ proc handleCRLF(c: var SqlLexer, pos: int): int = proc skip(c: var SqlLexer) = var pos = c.bufpos - var buf = c.buf var nested = 0 while true: - case buf[pos] + case c.buf[pos] of ' ', '\t': inc(pos) of '-': - if buf[pos+1] == '-': - while not (buf[pos] in {'\c', '\L', lexbase.EndOfFile}): inc(pos) + if c.buf[pos+1] == '-': + while not (c.buf[pos] in {'\c', '\L', lexbase.EndOfFile}): inc(pos) else: break of '/': - if buf[pos+1] == '*': + if c.buf[pos+1] == '*': inc(pos,2) while true: - case buf[pos] + case c.buf[pos] of '\0': break of '\c', '\L': pos = handleCRLF(c, pos) - buf = c.buf of '*': - if buf[pos+1] == '/': + if c.buf[pos+1] == '/': inc(pos, 2) if nested <= 0: break dec(nested) else: inc(pos) of '/': - if buf[pos+1] == '*': + if c.buf[pos+1] == '*': inc(pos, 2) inc(nested) else: @@ -185,21 +183,19 @@ proc skip(c: var SqlLexer) = else: break of '\c', '\L': pos = handleCRLF(c, pos) - buf = c.buf else: break # EndOfFile also leaves the loop c.bufpos = pos proc getString(c: var SqlLexer, tok: var Token, kind: TokKind) = var pos = c.bufpos + 1 - var buf = c.buf tok.kind = kind block parseLoop: while true: while true: - var ch = buf[pos] + var ch = c.buf[pos] if ch == '\'': - if buf[pos+1] == '\'': + if c.buf[pos+1] == '\'': inc(pos, 2) add(tok.literal, '\'') else: @@ -221,30 +217,27 @@ proc getString(c: var SqlLexer, tok: var Token, kind: TokKind) = if c.lineNumber > line: # a new line whitespace has been parsed, so we check if the string # continues after the whitespace: - buf = c.buf # may have been reallocated pos = c.bufpos - if buf[pos] == '\'': inc(pos) + if c.buf[pos] == '\'': inc(pos) else: break parseLoop else: break parseLoop c.bufpos = pos proc getDollarString(c: var SqlLexer, tok: var Token) = var pos = c.bufpos + 1 - var buf = c.buf tok.kind = tkDollarQuotedConstant var tag = "$" - while buf[pos] in IdentChars: - add(tag, buf[pos]) + while c.buf[pos] in IdentChars: + add(tag, c.buf[pos]) inc(pos) - if buf[pos] == '$': inc(pos) + if c.buf[pos] == '$': inc(pos) else: tok.kind = tkInvalid return while true: - case buf[pos] + case c.buf[pos] of '\c', '\L': pos = handleCRLF(c, pos) - buf = c.buf add(tok.literal, "\L") of '\0': tok.kind = tkInvalid @@ -252,37 +245,35 @@ proc getDollarString(c: var SqlLexer, tok: var Token) = of '$': inc(pos) var tag2 = "$" - while buf[pos] in IdentChars: - add(tag2, buf[pos]) + while c.buf[pos] in IdentChars: + add(tag2, c.buf[pos]) inc(pos) - if buf[pos] == '$': inc(pos) + if c.buf[pos] == '$': inc(pos) if tag2 == tag: break add(tok.literal, tag2) add(tok.literal, '$') else: - add(tok.literal, buf[pos]) + add(tok.literal, c.buf[pos]) inc(pos) c.bufpos = pos proc getSymbol(c: var SqlLexer, tok: var Token) = var pos = c.bufpos - var buf = c.buf while true: - add(tok.literal, buf[pos]) + add(tok.literal, c.buf[pos]) inc(pos) - if buf[pos] notin {'a'..'z','A'..'Z','0'..'9','_','$', '\128'..'\255'}: + if c.buf[pos] notin {'a'..'z','A'..'Z','0'..'9','_','$', '\128'..'\255'}: break c.bufpos = pos tok.kind = tkIdentifier proc getQuotedIdentifier(c: var SqlLexer, tok: var Token, quote='\"') = var pos = c.bufpos + 1 - var buf = c.buf tok.kind = tkQuotedIdentifier while true: - var ch = buf[pos] + var ch = c.buf[pos] if ch == quote: - if buf[pos+1] == quote: + if c.buf[pos+1] == quote: inc(pos, 2) add(tok.literal, quote) else: @@ -298,11 +289,10 @@ proc getQuotedIdentifier(c: var SqlLexer, tok: var Token, quote='\"') = proc getBitHexString(c: var SqlLexer, tok: var Token, validChars: set[char]) = var pos = c.bufpos + 1 - var buf = c.buf block parseLoop: while true: while true: - var ch = buf[pos] + var ch = c.buf[pos] if ch in validChars: add(tok.literal, ch) inc(pos) @@ -318,9 +308,8 @@ proc getBitHexString(c: var SqlLexer, tok: var Token, validChars: set[char]) = if c.lineNumber > line: # a new line whitespace has been parsed, so we check if the string # continues after the whitespace: - buf = c.buf # may have been reallocated pos = c.bufpos - if buf[pos] == '\'': inc(pos) + if c.buf[pos] == '\'': inc(pos) else: break parseLoop else: break parseLoop c.bufpos = pos @@ -328,29 +317,28 @@ proc getBitHexString(c: var SqlLexer, tok: var Token, validChars: set[char]) = proc getNumeric(c: var SqlLexer, tok: var Token) = tok.kind = tkInteger var pos = c.bufpos - var buf = c.buf - while buf[pos] in Digits: - add(tok.literal, buf[pos]) + while c.buf[pos] in Digits: + add(tok.literal, c.buf[pos]) inc(pos) - if buf[pos] == '.': + if c.buf[pos] == '.': tok.kind = tkNumeric - add(tok.literal, buf[pos]) + add(tok.literal, c.buf[pos]) inc(pos) - while buf[pos] in Digits: - add(tok.literal, buf[pos]) + while c.buf[pos] in Digits: + add(tok.literal, c.buf[pos]) inc(pos) - if buf[pos] in {'E', 'e'}: + if c.buf[pos] in {'E', 'e'}: tok.kind = tkNumeric - add(tok.literal, buf[pos]) + add(tok.literal, c.buf[pos]) inc(pos) - if buf[pos] == '+': + if c.buf[pos] == '+': inc(pos) - elif buf[pos] == '-': - add(tok.literal, buf[pos]) + elif c.buf[pos] == '-': + add(tok.literal, c.buf[pos]) inc(pos) - if buf[pos] in Digits: - while buf[pos] in Digits: - add(tok.literal, buf[pos]) + if c.buf[pos] in Digits: + while c.buf[pos] in Digits: + add(tok.literal, c.buf[pos]) inc(pos) else: tok.kind = tkInvalid @@ -361,24 +349,23 @@ proc getOperator(c: var SqlLexer, tok: var Token) = '^', '&', '|', '`', '?'} tok.kind = tkOperator var pos = c.bufpos - var buf = c.buf var trailingPlusMinus = false while true: - case buf[pos] + case c.buf[pos] of '-': - if buf[pos] == '-': break - if not trailingPlusMinus and buf[pos+1] notin operators and + if c.buf[pos] == '-': break + if not trailingPlusMinus and c.buf[pos+1] notin operators and tok.literal.len > 0: break of '/': - if buf[pos] == '*': break + if c.buf[pos] == '*': break of '~', '!', '@', '#', '%', '^', '&', '|', '`', '?': trailingPlusMinus = true of '+': - if not trailingPlusMinus and buf[pos+1] notin operators and + if not trailingPlusMinus and c.buf[pos+1] notin operators and tok.literal.len > 0: break of '*', '<', '>', '=': discard else: break - add(tok.literal, buf[pos]) + add(tok.literal, c.buf[pos]) inc(pos) c.bufpos = pos diff --git a/lib/pure/parsexml.nim b/lib/pure/parsexml.nim index 953c5cdde..3b77f9c62 100644 --- a/lib/pure/parsexml.nim +++ b/lib/pure/parsexml.nim @@ -345,11 +345,10 @@ proc markError(my: var XmlParser, kind: XmlErrorKind) {.inline.} = proc parseCDATA(my: var XmlParser) = var pos = my.bufpos + len("<![CDATA[") - var buf = my.buf while true: - case buf[pos] + case my.buf[pos] of ']': - if buf[pos+1] == ']' and buf[pos+2] == '>': + if my.buf[pos+1] == ']' and my.buf[pos+2] == '>': inc(pos, 3) break add(my.a, ']') @@ -359,29 +358,25 @@ proc parseCDATA(my: var XmlParser) = break of '\c': pos = lexbase.handleCR(my, pos) - buf = my.buf add(my.a, '\L') of '\L': pos = lexbase.handleLF(my, pos) - buf = my.buf add(my.a, '\L') of '/': pos = lexbase.handleRefillChar(my, pos) - buf = my.buf add(my.a, '/') else: - add(my.a, buf[pos]) + add(my.a, my.buf[pos]) inc(pos) my.bufpos = pos # store back my.kind = xmlCData proc parseComment(my: var XmlParser) = var pos = my.bufpos + len("<!--") - var buf = my.buf while true: - case buf[pos] + case my.buf[pos] of '-': - if buf[pos+1] == '-' and buf[pos+2] == '>': + if my.buf[pos+1] == '-' and my.buf[pos+2] == '>': inc(pos, 3) break if my.options.contains(reportComments): add(my.a, '-') @@ -391,38 +386,32 @@ proc parseComment(my: var XmlParser) = break of '\c': pos = lexbase.handleCR(my, pos) - buf = my.buf if my.options.contains(reportComments): add(my.a, '\L') of '\L': pos = lexbase.handleLF(my, pos) - buf = my.buf if my.options.contains(reportComments): add(my.a, '\L') of '/': pos = lexbase.handleRefillChar(my, pos) - buf = my.buf if my.options.contains(reportComments): add(my.a, '/') else: - if my.options.contains(reportComments): add(my.a, buf[pos]) + if my.options.contains(reportComments): add(my.a, my.buf[pos]) inc(pos) my.bufpos = pos my.kind = xmlComment proc parseWhitespace(my: var XmlParser, skip=false) = var pos = my.bufpos - var buf = my.buf while true: - case buf[pos] + case my.buf[pos] of ' ', '\t': - if not skip: add(my.a, buf[pos]) + if not skip: add(my.a, my.buf[pos]) inc(pos) of '\c': # the specification says that CR-LF, CR are to be transformed to LF pos = lexbase.handleCR(my, pos) - buf = my.buf if not skip: add(my.a, '\L') of '\L': pos = lexbase.handleLF(my, pos) - buf = my.buf if not skip: add(my.a, '\L') else: break @@ -434,53 +423,51 @@ const proc parseName(my: var XmlParser, dest: var string) = var pos = my.bufpos - var buf = my.buf - if buf[pos] in NameStartChar: + if my.buf[pos] in NameStartChar: while true: - add(dest, buf[pos]) + add(dest, my.buf[pos]) inc(pos) - if buf[pos] notin NameChar: break + if my.buf[pos] notin NameChar: break my.bufpos = pos else: markError(my, errNameExpected) proc parseEntity(my: var XmlParser, dest: var string) = var pos = my.bufpos+1 - var buf = my.buf my.kind = xmlCharData - if buf[pos] == '#': + if my.buf[pos] == '#': var r: int inc(pos) - if buf[pos] == 'x': + if my.buf[pos] == 'x': inc(pos) while true: - case buf[pos] - of '0'..'9': r = (r shl 4) or (ord(buf[pos]) - ord('0')) - of 'a'..'f': r = (r shl 4) or (ord(buf[pos]) - ord('a') + 10) - of 'A'..'F': r = (r shl 4) or (ord(buf[pos]) - ord('A') + 10) + case my.buf[pos] + of '0'..'9': r = (r shl 4) or (ord(my.buf[pos]) - ord('0')) + of 'a'..'f': r = (r shl 4) or (ord(my.buf[pos]) - ord('a') + 10) + of 'A'..'F': r = (r shl 4) or (ord(my.buf[pos]) - ord('A') + 10) else: break inc(pos) else: - while buf[pos] in {'0'..'9'}: - r = r * 10 + (ord(buf[pos]) - ord('0')) + while my.buf[pos] in {'0'..'9'}: + r = r * 10 + (ord(my.buf[pos]) - ord('0')) inc(pos) add(dest, toUTF8(Rune(r))) - elif buf[pos] == 'l' and buf[pos+1] == 't' and buf[pos+2] == ';': + elif my.buf[pos] == 'l' and my.buf[pos+1] == 't' and my.buf[pos+2] == ';': add(dest, '<') inc(pos, 2) - elif buf[pos] == 'g' and buf[pos+1] == 't' and buf[pos+2] == ';': + elif my.buf[pos] == 'g' and my.buf[pos+1] == 't' and my.buf[pos+2] == ';': add(dest, '>') inc(pos, 2) - elif buf[pos] == 'a' and buf[pos+1] == 'm' and buf[pos+2] == 'p' and - buf[pos+3] == ';': + elif my.buf[pos] == 'a' and my.buf[pos+1] == 'm' and my.buf[pos+2] == 'p' and + my.buf[pos+3] == ';': add(dest, '&') inc(pos, 3) - elif buf[pos] == 'a' and buf[pos+1] == 'p' and buf[pos+2] == 'o' and - buf[pos+3] == 's' and buf[pos+4] == ';': + elif my.buf[pos] == 'a' and my.buf[pos+1] == 'p' and my.buf[pos+2] == 'o' and + my.buf[pos+3] == 's' and my.buf[pos+4] == ';': add(dest, '\'') inc(pos, 4) - elif buf[pos] == 'q' and buf[pos+1] == 'u' and buf[pos+2] == 'o' and - buf[pos+3] == 't' and buf[pos+4] == ';': + elif my.buf[pos] == 'q' and my.buf[pos+1] == 'u' and my.buf[pos+2] == 'o' and + my.buf[pos+3] == 't' and my.buf[pos+4] == ';': add(dest, '"') inc(pos, 4) else: @@ -491,7 +478,7 @@ proc parseEntity(my: var XmlParser, dest: var string) = my.kind = xmlEntity else: add(dest, '&') - if buf[pos] == ';': + if my.buf[pos] == ';': inc(pos) else: markError(my, errSemicolonExpected) @@ -501,15 +488,14 @@ proc parsePI(my: var XmlParser) = inc(my.bufpos, "<?".len) parseName(my, my.a) var pos = my.bufpos - var buf = my.buf setLen(my.b, 0) while true: - case buf[pos] + case my.buf[pos] of '\0': markError(my, errQmGtExpected) break of '?': - if buf[pos+1] == '>': + if my.buf[pos+1] == '>': inc(pos, 2) break add(my.b, '?') @@ -517,18 +503,15 @@ proc parsePI(my: var XmlParser) = of '\c': # the specification says that CR-LF, CR are to be transformed to LF pos = lexbase.handleCR(my, pos) - buf = my.buf add(my.b, '\L') of '\L': pos = lexbase.handleLF(my, pos) - buf = my.buf add(my.b, '\L') of '/': pos = lexbase.handleRefillChar(my, pos) - buf = my.buf add(my.b, '/') else: - add(my.b, buf[pos]) + add(my.b, my.buf[pos]) inc(pos) my.bufpos = pos my.kind = xmlPI @@ -536,10 +519,9 @@ proc parsePI(my: var XmlParser) = proc parseSpecial(my: var XmlParser) = # things that start with <! var pos = my.bufpos + 2 - var buf = my.buf var opentags = 0 while true: - case buf[pos] + case my.buf[pos] of '\0': markError(my, errGtExpected) break @@ -556,18 +538,15 @@ proc parseSpecial(my: var XmlParser) = add(my.a, '>') of '\c': pos = lexbase.handleCR(my, pos) - buf = my.buf add(my.a, '\L') of '\L': pos = lexbase.handleLF(my, pos) - buf = my.buf add(my.a, '\L') of '/': pos = lexbase.handleRefillChar(my, pos) - buf = my.buf add(my.b, '/') else: - add(my.a, buf[pos]) + add(my.a, my.buf[pos]) inc(pos) my.bufpos = pos my.kind = xmlSpecial @@ -635,13 +614,12 @@ proc parseAttribute(my: var XmlParser) = parseWhitespace(my, skip=true) var pos = my.bufpos - var buf = my.buf - if buf[pos] in {'\'', '"'}: - var quote = buf[pos] + if my.buf[pos] in {'\'', '"'}: + var quote = my.buf[pos] var pendingSpace = false inc(pos) while true: - case buf[pos] + case my.buf[pos] of '\0': markError(my, errQuoteExpected) break @@ -658,31 +636,28 @@ proc parseAttribute(my: var XmlParser) = inc(pos) of '\c': pos = lexbase.handleCR(my, pos) - buf = my.buf pendingSpace = true of '\L': pos = lexbase.handleLF(my, pos) - buf = my.buf pendingSpace = true of '/': pos = lexbase.handleRefillChar(my, pos) - buf = my.buf add(my.b, '/') else: - if buf[pos] == quote: + if my.buf[pos] == quote: inc(pos) break else: if pendingSpace: add(my.b, ' ') pendingSpace = false - add(my.b, buf[pos]) + add(my.b, my.buf[pos]) inc(pos) elif allowUnquotedAttribs in my.options: const disallowedChars = {'"', '\'', '`', '=', '<', '>', ' ', '\0', '\t', '\L', '\F', '\f'} let startPos = pos - while (let c = buf[pos]; c notin disallowedChars): + while (let c = my.buf[pos]; c notin disallowedChars): if c == '&': my.bufpos = pos parseEntity(my, my.b) @@ -696,33 +671,29 @@ proc parseAttribute(my: var XmlParser) = else: markError(my, errQuoteExpected) # error corrections: guess what was meant - while buf[pos] != '>' and buf[pos] > ' ': - add(my.b, buf[pos]) + while my.buf[pos] != '>' and my.buf[pos] > ' ': + add(my.b, my.buf[pos]) inc pos my.bufpos = pos parseWhitespace(my, skip=true) proc parseCharData(my: var XmlParser) = var pos = my.bufpos - var buf = my.buf while true: - case buf[pos] + case my.buf[pos] of '\0', '<', '&': break of '\c': # the specification says that CR-LF, CR are to be transformed to LF pos = lexbase.handleCR(my, pos) - buf = my.buf add(my.a, '\L') of '\L': pos = lexbase.handleLF(my, pos) - buf = my.buf add(my.a, '\L') of '/': pos = lexbase.handleRefillChar(my, pos) - buf = my.buf add(my.a, '/') else: - add(my.a, buf[pos]) + add(my.a, my.buf[pos]) inc(pos) my.bufpos = pos my.kind = xmlCharData @@ -731,18 +702,17 @@ proc rawGetTok(my: var XmlParser) = my.kind = xmlError setLen(my.a, 0) var pos = my.bufpos - var buf = my.buf - case buf[pos] + case my.buf[pos] of '<': - case buf[pos+1] + case my.buf[pos+1] of '/': parseEndTag(my) of '!': - if buf[pos+2] == '[' and buf[pos+3] == 'C' and buf[pos+4] == 'D' and - buf[pos+5] == 'A' and buf[pos+6] == 'T' and buf[pos+7] == 'A' and - buf[pos+8] == '[': + if my.buf[pos+2] == '[' and my.buf[pos+3] == 'C' and my.buf[pos+4] == 'D' and + my.buf[pos+5] == 'A' and my.buf[pos+6] == 'T' and my.buf[pos+7] == 'A' and + my.buf[pos+8] == '[': parseCDATA(my) - elif buf[pos+2] == '-' and buf[pos+3] == '-': + elif my.buf[pos+2] == '-' and my.buf[pos+3] == '-': parseComment(my) else: parseSpecial(my) @@ -841,4 +811,3 @@ when not defined(testing) and isMainModule: of xmlSpecial: echo("SPECIAL: " & x.charData) close(x) - diff --git a/lib/pure/pegs.nim b/lib/pure/pegs.nim index 957091918..644de6007 100644 --- a/lib/pure/pegs.nim +++ b/lib/pure/pegs.nim @@ -1545,20 +1545,17 @@ proc getEscapedChar(c: var PegLexer, tok: var Token) = proc skip(c: var PegLexer) = var pos = c.bufpos - var buf = c.buf while pos < c.buf.len: - case buf[pos] + case c.buf[pos] of ' ', '\t': inc(pos) of '#': while (pos < c.buf.len) and - not (buf[pos] in {'\c', '\L', '\0'}): inc(pos) + not (c.buf[pos] in {'\c', '\L', '\0'}): inc(pos) of '\c': pos = handleCR(c, pos) - buf = c.buf of '\L': pos = handleLF(c, pos) - buf = c.buf else: break # EndOfFile also leaves the loop c.bufpos = pos @@ -1566,10 +1563,9 @@ proc skip(c: var PegLexer) = proc getString(c: var PegLexer, tok: var Token) = tok.kind = tkStringLit var pos = c.bufpos + 1 - var buf = c.buf - var quote = buf[pos-1] + var quote = c.buf[pos-1] while pos < c.buf.len: - case buf[pos] + case c.buf[pos] of '\\': c.bufpos = pos getEscapedChar(c, tok) @@ -1577,22 +1573,21 @@ proc getString(c: var PegLexer, tok: var Token) = of '\c', '\L', '\0': tok.kind = tkInvalid break - elif buf[pos] == quote: + elif c.buf[pos] == quote: inc(pos) break else: - add(tok.literal, buf[pos]) + add(tok.literal, c.buf[pos]) inc(pos) c.bufpos = pos proc getDollar(c: var PegLexer, tok: var Token) = var pos = c.bufpos + 1 - var buf = c.buf - if buf[pos] in {'0'..'9'}: + if c.buf[pos] in {'0'..'9'}: tok.kind = tkBackref tok.index = 0 - while pos < c.buf.len and buf[pos] in {'0'..'9'}: - tok.index = tok.index * 10 + ord(buf[pos]) - ord('0') + while pos < c.buf.len and c.buf[pos] in {'0'..'9'}: + tok.index = tok.index * 10 + ord(c.buf[pos]) - ord('0') inc(pos) else: tok.kind = tkDollar @@ -1602,14 +1597,13 @@ proc getCharSet(c: var PegLexer, tok: var Token) = tok.kind = tkCharSet tok.charset = {} var pos = c.bufpos + 1 - var buf = c.buf var caret = false - if buf[pos] == '^': + if c.buf[pos] == '^': inc(pos) caret = true while pos < c.buf.len: var ch: char - case buf[pos] + case c.buf[pos] of ']': if pos < c.buf.len: inc(pos) break @@ -1622,11 +1616,11 @@ proc getCharSet(c: var PegLexer, tok: var Token) = tok.kind = tkInvalid break else: - ch = buf[pos] + ch = c.buf[pos] inc(pos) incl(tok.charset, ch) - if buf[pos] == '-': - if pos+1 < c.buf.len and buf[pos+1] == ']': + if c.buf[pos] == '-': + if pos+1 < c.buf.len and c.buf[pos+1] == ']': incl(tok.charset, '-') inc(pos) else: @@ -1635,7 +1629,7 @@ proc getCharSet(c: var PegLexer, tok: var Token) = else: break var ch2: char - case buf[pos] + case c.buf[pos] of '\\': c.bufpos = pos getEscapedChar(c, tok) @@ -1646,7 +1640,7 @@ proc getCharSet(c: var PegLexer, tok: var Token) = break else: if pos+1 < c.buf.len: - ch2 = buf[pos] + ch2 = c.buf[pos] inc(pos) else: break @@ -1657,11 +1651,10 @@ proc getCharSet(c: var PegLexer, tok: var Token) = proc getSymbol(c: var PegLexer, tok: var Token) = var pos = c.bufpos - var buf = c.buf while pos < c.buf.len: - add(tok.literal, buf[pos]) + add(tok.literal, c.buf[pos]) inc(pos) - if pos < buf.len and buf[pos] notin strutils.IdentChars: break + if pos < c.buf.len and c.buf[pos] notin strutils.IdentChars: break c.bufpos = pos tok.kind = tkIdentifier diff --git a/lib/pure/streams.nim b/lib/pure/streams.nim index 0f65d6c0e..6c69a9bb6 100644 --- a/lib/pure/streams.nim +++ b/lib/pure/streams.nim @@ -53,12 +53,17 @@ type {.nimcall, raises: [Defect, IOError, OSError], tags: [], gcsafe.} getPositionImpl*: proc (s: Stream): int {.nimcall, raises: [Defect, IOError, OSError], tags: [], gcsafe.} + + readDataStrImpl*: proc (s: Stream, buffer: var string, slice: Slice[int]): int + {.nimcall, raises: [Defect, IOError, OSError], tags: [ReadIOEffect], gcsafe.} + readDataImpl*: proc (s: Stream, buffer: pointer, bufLen: int): int {.nimcall, raises: [Defect, IOError, OSError], tags: [ReadIOEffect], gcsafe.} peekDataImpl*: proc (s: Stream, buffer: pointer, bufLen: int): int {.nimcall, raises: [Defect, IOError, OSError], tags: [ReadIOEffect], gcsafe.} writeDataImpl*: proc (s: Stream, buffer: pointer, bufLen: int) - {.nimcall, raises: [Defect, IOError, OSError], tags: [WriteIOEffect], gcsafe.} + {.nimcall, raises: [Defect, IOError, OSError], tags: [WriteIOEffect], gcsafe.} + flushImpl*: proc (s: Stream) {.nimcall, raises: [Defect, IOError, OSError], tags: [WriteIOEffect], gcsafe.} @@ -87,6 +92,14 @@ proc readData*(s: Stream, buffer: pointer, bufLen: int): int = ## low level proc that reads data into an untyped `buffer` of `bufLen` size. result = s.readDataImpl(s, buffer, bufLen) +proc readDataStr*(s: Stream, buffer: var string, slice: Slice[int]): int = + ## low level proc that reads data into a string ``buffer`` at ``slice``. + if s.readDataStrImpl != nil: + result = s.readDataStrImpl(s, buffer, slice) + else: + # fallback + result = s.readData(addr buffer[0], buffer.len) + when not defined(js): proc readAll*(s: Stream): string = ## Reads all available data. @@ -344,6 +357,19 @@ when not defined(js): var s = StringStream(s) return s.pos + proc ssReadDataStr(s: Stream, buffer: var string, slice: Slice[int]): int = + var s = StringStream(s) + result = min(slice.b + 1 - slice.a, s.data.len - s.pos) + if result > 0: + when nimvm: + for i in 0 ..< result: # sorry, but no fast string splicing on the vm. + buffer[slice.a + i] = s.data[s.pos + i] + else: + copyMem(unsafeAddr buffer[slice.a], addr s.data[s.pos], result) + inc(s.pos, result) + else: + result = 0 + proc ssReadData(s: Stream, buffer: pointer, bufLen: int): int = var s = StringStream(s) result = min(bufLen, s.data.len - s.pos) @@ -389,6 +415,7 @@ when not defined(js): result.readDataImpl = ssReadData result.peekDataImpl = ssPeekData result.writeDataImpl = ssWriteData + result.readDataStrImpl = ssReadDataStr type FileStream* = ref FileStreamObj ## a stream that encapsulates a `File` @@ -407,6 +434,9 @@ when not defined(js): proc fsReadData(s: Stream, buffer: pointer, bufLen: int): int = result = readBuffer(FileStream(s).f, buffer, bufLen) + proc fsReadDataStr(s: Stream, buffer: var string, slice: Slice[int]): int = + result = readBuffer(FileStream(s).f, addr buffer[slice.a], slice.b + 1 - slice.a) + proc fsPeekData(s: Stream, buffer: pointer, bufLen: int): int = let pos = fsGetPosition(s) defer: fsSetPosition(s, pos) @@ -424,6 +454,7 @@ when not defined(js): result.atEndImpl = fsAtEnd result.setPositionImpl = fsSetPosition result.getPositionImpl = fsGetPosition + result.readDataStrImpl = fsReadDataStr result.readDataImpl = fsReadData result.peekDataImpl = fsPeekData result.writeDataImpl = fsWriteData |