diff options
Diffstat (limited to 'lib')
-rwxr-xr-x | lib/devel/httpclient.nim | 78 | ||||
-rw-r--r-- | lib/devel/parseutils.nim | 63 | ||||
-rw-r--r-- | lib/pure/parseutils.nim | 225 | ||||
-rwxr-xr-x | lib/pure/parsexml.nim | 11 | ||||
-rwxr-xr-x | lib/pure/strutils.nim | 123 | ||||
-rwxr-xr-x | lib/system.nim | 7 |
6 files changed, 287 insertions, 220 deletions
diff --git a/lib/devel/httpclient.nim b/lib/devel/httpclient.nim index fb9359630..d600fcb59 100755 --- a/lib/devel/httpclient.nim +++ b/lib/devel/httpclient.nim @@ -11,7 +11,7 @@ ## webpages/other data. # neuer Code: -import sockets, strutils, parseurl, pegs, os, parseutils +import sockets, strutils, parseurl, pegs, parseutils type TResponse* = tuple[ @@ -19,9 +19,9 @@ type body: string] THeader* = tuple[htype: string, hvalue: string] - EInvalidHttp* = object of EBase ## exception that is raised when server does - ## not conform to the implemented HTTP - ## protocol + EInvalidProtocol* = object of EBase ## exception that is raised when server + ## does not conform to the implemented + ## protocol EHttpRequestErr* = object of EBase ## Thrown in the ``getContent`` proc, ## when the server returns an error @@ -35,7 +35,7 @@ template newException(exceptn, message: expr): expr = e proc httpError(msg: string) = - var e: ref EInvalidHttp + var e: ref EInvalidProtocol new(e) e.msg = msg raise e @@ -54,42 +54,44 @@ proc getHeaderValue*(headers: seq[THeader], name: string): string = return headers[i].hvalue return "" +proc parseChunks(data: var string, start: int, s: TSocket): string = + # get chunks: + var i = start + result = "" + while true: + var chunkSize = 0 + var j = parseHex(data, chunkSize, i) + if j <= 0: break + inc(i, j) + while data[i] notin {'\C', '\L', '\0'}: inc(i) + if data[i] == '\C': inc(i) + if data[i] == '\L': inc(i) + if chunkSize <= 0: break + var x = copy(data, i, i+chunkSize-1) + var size = x.len + result.add(x) + + if size < chunkSize: + # read in the rest: + var missing = chunkSize - size + var L = result.len + setLen(result, L + missing) + while missing > 0: + var bytesRead = s.recv(addr(result[L]), missing) + inc(L, bytesRead) + dec(missing, bytesRead) + + # next chunk: + data = s.recv() + i = 0 + # skip trailing CR-LF: + while data[i] in {'\C', '\L'}: inc(i) + if data[i] == '\0': data.add(s.recv()) + proc parseBody(data: var string, start: int, s: TSocket, headers: seq[THeader]): string = if getHeaderValue(headers, "Transfer-Encoding") == "chunked": - # get chunks: - var i = start - result = "" - while true: - var chunkSize = 0 - var j = parseHex(data, chunkSize, i) - if j <= 0: break - inc(i, j) - while data[i] notin {'\C', '\L', '\0'}: inc(i) - if data[i] == '\C': inc(i) - if data[i] == '\L': inc(i) - echo "ChunkSize: ", chunkSize - if chunkSize <= 0: break - - var x = copy(data, i, i+chunkSize-1) - var size = x.len - result.add(x) - - if size < chunkSize: - # read in the rest: - var missing = chunkSize - size - var L = result.len - setLen(result, L + missing) - discard s.recv(addr(result[L]), missing) - - # next chunk: - data = s.recv() - echo data - i = 0 - - # skip trailing CR-LF: - while data[i] in {'\C', '\L'}: inc(i) - + result = parseChunks(data, start, s) else: result = copy(data, start) # -REGION- Content-Length diff --git a/lib/devel/parseutils.nim b/lib/devel/parseutils.nim deleted file mode 100644 index 4c5152167..000000000 --- a/lib/devel/parseutils.nim +++ /dev/null @@ -1,63 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2010 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## Helpers for parsing. - -import strutils - -proc parseHex*(s: string, number: var int, start = 0): int = - ## parses a hexadecimal number and stores its value in ``number``. Returns - ## the number of the parsed characters or 0 in case of an error. - var i = start - var foundDigit = false - if s[i] == '0' and (s[i+1] == 'x' or s[i+1] == 'X'): inc(i, 2) - elif s[i] == '#': inc(i) - while true: - case s[i] - of '_': nil - of '0'..'9': - number = number shl 4 or (ord(s[i]) - ord('0')) - foundDigit = true - of 'a'..'f': - number = number shl 4 or (ord(s[i]) - ord('a') + 10) - foundDigit = true - of 'A'..'F': - number = number shl 4 or (ord(s[i]) - ord('A') + 10) - foundDigit = true - else: break - inc(i) - if foundDigit: result = i-start - -proc parseIdent*(s: string, ident: var string, start = 0): int = - ## parses an identifier and stores it in ``ident``. Returns - ## the number of the parsed characters or 0 in case of an error. - var i = start - if s[i] in IdentStartChars: - inc(i) - while s[i] in IdentChars: inc(i) - ident = copy(s, start, i-1) - result = i-start - -proc skipWhitespace*(s: string, start = 0): int {.inline.} = - while s[start+result] in Whitespace: inc(result) - -proc skip*(s, token: string, start = 0): int = - while result < token.len and s[result+start] == token[result]: inc(result) - if result != token.len: result = 0 - -proc skipIgnoreCase*(s, token: string, start = 0): int = - while result < token.len and - toLower(s[result+start]) == toLower(token[result]): inc(result) - if result != token.len: result = 0 - -proc parseBiggestInt*(s: string, number: var biggestInt, start = 0): int = - assert(false) # to implement - -proc parseBiggestFloat*(s: string, number: var biggestFloat, start = 0): int = - assert(false) # to implement diff --git a/lib/pure/parseutils.nim b/lib/pure/parseutils.nim new file mode 100644 index 000000000..0878f87eb --- /dev/null +++ b/lib/pure/parseutils.nim @@ -0,0 +1,225 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2010 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Helpers for parsing. + +{.deadCodeElim: on.} + +{.push debugger:off .} # the user does not want to trace a part + # of the standard library! + +# copied from excpt.nim, because I don't want to make this template public +template newException(exceptn, message: expr): expr = + block: # open a new scope + var + e: ref exceptn + new(e) + e.msg = message + e + +const + Whitespace = {' ', '\t', '\v', '\r', '\l', '\f'} + Letters = {'A'..'Z', 'a'..'z'} + Digits = {'0'..'9'} + IdentChars = {'a'..'z', 'A'..'Z', '0'..'9', '_'} + IdentStartChars = {'a'..'z', 'A'..'Z', '_'} + ## copied from strutils + +proc toLower(c: char): char {.inline.} = + result = if c in {'A'..'Z'}: chr(ord(c)-ord('A')+ord('a')) else: c + +proc parseHex*(s: string, number: var int, start = 0): int = + ## parses a hexadecimal number and stores its value in ``number``. Returns + ## the number of the parsed characters or 0 in case of an error. + var i = start + var foundDigit = false + if s[i] == '0' and (s[i+1] == 'x' or s[i+1] == 'X'): inc(i, 2) + elif s[i] == '#': inc(i) + while true: + case s[i] + of '_': nil + of '0'..'9': + number = number shl 4 or (ord(s[i]) - ord('0')) + foundDigit = true + of 'a'..'f': + number = number shl 4 or (ord(s[i]) - ord('a') + 10) + foundDigit = true + of 'A'..'F': + number = number shl 4 or (ord(s[i]) - ord('A') + 10) + foundDigit = true + else: break + inc(i) + if foundDigit: result = i-start + +proc parseOct*(s: string, number: var int, start = 0): int = + ## parses an octal number and stores its value in ``number``. Returns + ## the number of the parsed characters or 0 in case of an error. + var i = start + var foundDigit = false + if s[i] == '0' and (s[i+1] == 'o' or s[i+1] == 'O'): inc(i, 2) + while true: + case s[i] + of '_': nil + of '0'..'7': + number = number shl 3 or (ord(s[i]) - ord('0')) + foundDigit = true + else: break + inc(i) + if foundDigit: result = i-start + +proc parseIdent*(s: string, ident: var string, start = 0): int = + ## parses an identifier and stores it in ``ident``. Returns + ## the number of the parsed characters or 0 in case of an error. + var i = start + if s[i] in IdentStartChars: + inc(i) + while s[i] in IdentChars: inc(i) + ident = copy(s, start, i-1) + result = i-start + +proc parseToken*(s: string, token: var string, validChars: set[char], + start = 0): int = + ## parses a token and stores it in ``token``. Returns + ## the number of the parsed characters or 0 in case of an error. A token + ## consists of the characters in `validChars`. + var i = start + while s[i] in validChars: inc(i) + result = i-start + token = copy(s, start, i-1) + +proc skipWhitespace*(s: string, start = 0): int {.inline.} = + ## skips the whitespace starting at ``s[start]``. Returns the number of + ## skipped characters. + while s[start+result] in Whitespace: inc(result) + +proc skip*(s, token: string, start = 0): int = + while result < token.len and s[result+start] == token[result]: inc(result) + if result != token.len: result = 0 + +proc skipIgnoreCase*(s, token: string, start = 0): int = + while result < token.len and + toLower(s[result+start]) == toLower(token[result]): inc(result) + if result != token.len: result = 0 + +{.push overflowChecks: on.} +# this must be compiled with overflow checking turned on: +proc rawParseInt(s: string, b: var biggestInt, start = 0): int = + var + sign: BiggestInt = -1 + i = start + if s[i] == '+': inc(i) + elif s[i] == '-': + inc(i) + sign = 1 + if s[i] in {'0'..'9'}: + b = 0 + while s[i] in {'0'..'9'}: + b = b * 10 - (ord(s[i]) - ord('0')) + inc(i) + while s[i] == '_': inc(i) # underscores are allowed and ignored + b = b * sign + result = i - start +{.pop.} # overflowChecks + +proc parseBiggestInt*(s: string, number: var biggestInt, start = 0): int = + ## parses an integer starting at `start` and stores the value into `number`. + ## Result is the number of processed chars or 0 if there is no integer. + ## `EOverflow` is raised if an overflow occurs. + result = rawParseInt(s, number, start) + +proc parseInt*(s: string, number: var int, start = 0): int = + ## parses an integer starting at `start` and stores the value into `number`. + ## Result is the number of processed chars or 0 if there is no integer. + ## `EOverflow` is raised if an overflow occurs. + var res: biggestInt + result = parseBiggestInt(s, res, start) + if (sizeof(int) <= 4) and + ((res < low(int)) or (res > high(int))): + raise newException(EOverflow, "overflow") + else: + number = int(res) + +proc parseBiggestFloat*(s: string, number: var biggestFloat, start = 0): int = + ## parses a float starting at `start` and stores the value into `number`. + ## Result is the number of processed chars or 0 if there occured a parsing + ## error. + var + esign = 1.0 + sign = 1.0 + i = start + exponent: int + flags: int + number = 0.0 + if s[i] == '+': inc(i) + elif s[i] == '-': + sign = -1.0 + inc(i) + if s[i] == 'N' or s[i] == 'n': + if s[i+1] == 'A' or s[i+1] == 'a': + if s[i+2] == 'N' or s[i+2] == 'n': + if s[i+3] notin IdentChars: + number = NaN + return i+3 - start + return 0 + if s[i] == 'I' or s[i] == 'i': + if s[i+1] == 'N' or s[i+1] == 'n': + if s[i+2] == 'F' or s[i+2] == 'f': + if s[i+3] notin IdentChars: + number = Inf*sign + return i+3 - start + return 0 + while s[i] in {'0'..'9'}: + # Read integer part + flags = flags or 1 + number = number * 10.0 + toFloat(ord(s[i]) - ord('0')) + inc(i) + while s[i] == '_': inc(i) + # Decimal? + if s[i] == '.': + var hd = 1.0 + inc(i) + while s[i] in {'0'..'9'}: + # Read fractional part + flags = flags or 2 + number = number * 10.0 + toFloat(ord(s[i]) - ord('0')) + hd = hd * 10.0 + inc(i) + while s[i] == '_': inc(i) + number = number / hd # this complicated way preserves precision + # Again, read integer and fractional part + if flags == 0: return 0 + # Exponent? + if s[i] in {'e', 'E'}: + inc(i) + if s[i] == '+': + inc(i) + elif s[i] == '-': + esign = -1.0 + inc(i) + if s[i] notin {'0'..'9'}: + return 0 + while s[i] in {'0'..'9'}: + exponent = exponent * 10 + ord(s[i]) - ord('0') + inc(i) + while s[i] == '_': inc(i) + # Calculate Exponent + var hd = 1.0 + for j in 1..exponent: hd = hd * 10.0 + if esign > 0.0: number = number * hd + else: number = number / hd + # evaluate sign + number = number * sign + result = i - start + +proc parseFloat*(s: string, number: var float, start = 0): int = + var bf: biggestFloat + result = parseBiggestFloat(s, bf, start) + number = bf + +{.pop.} diff --git a/lib/pure/parsexml.nim b/lib/pure/parsexml.nim index 343fabd8c..0728d07b1 100755 --- a/lib/pure/parsexml.nim +++ b/lib/pure/parsexml.nim @@ -315,21 +315,22 @@ proc parseEntity(my: var TXmlParser, dest: var string) = r = r * 10 + (ord(buf[pos]) - ord('0')) inc(pos) add(dest, toUTF8(TRune(r))) - elif buf[pos] == 'l' and buf[pos+1] == 't': + elif buf[pos] == 'l' and buf[pos+1] == 't' and buf[pos+2] == ';': add(dest, '<') inc(pos, 2) - elif buf[pos] == 'g' and buf[pos+1] == 't': + elif buf[pos] == 'g' and buf[pos+1] == 't' and buf[pos+2] == ';': add(dest, '>') inc(pos, 2) - elif buf[pos] == 'a' and buf[pos+1] == 'm' and buf[pos+2] == 'p': + elif buf[pos] == 'a' and buf[pos+1] == 'm' and buf[pos+2] == 'p' + and buf[pos+3] == ';': add(dest, '&') inc(pos, 3) elif buf[pos] == 'a' and buf[pos+1] == 'p' and buf[pos+2] == 'o' and - buf[pos+3] == 's': + buf[pos+3] == 's' and buf[pos+4] == ';': add(dest, '\'') inc(pos, 4) elif buf[pos] == 'q' and buf[pos+1] == 'u' and buf[pos+2] == 'o' and - buf[pos+3] == 't': + buf[pos+3] == 't' and buf[pos+4] == ';': add(dest, '"') inc(pos, 4) else: diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index 292810538..fe70130e5 100755 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -10,6 +10,8 @@ ## This module contains various string utility routines. ## See the module `regexprs` for regular expression support. +import parseutils + {.deadCodeElim: on.} {.push debugger:off .} # the user does not want to trace a part @@ -440,7 +442,7 @@ proc findNormalized(x: string, inArray: openarray[string]): int = while i < high(inArray): if cmpIgnoreStyle(x, inArray[i]) == 0: return i inc(i, 2) # incrementing by 1 would probably result in a - # security whole ... + # security hole ... return -1 proc addf(s: var string, formatstr: string, a: openarray[string]) = @@ -686,56 +688,13 @@ proc toHex(x: BiggestInt, len: int): string = result[j] = HexChars[toU32(x shr shift) and 0xF'i32] shift = shift + 4 -{.push overflowChecks: on.} -# this must be compiled with overflow checking turned on: -proc rawParseInt(s: string, index: var int): BiggestInt = - # index contains the start position at proc entry; end position will be - # an index before the proc returns; index = -1 on error (no number at all) - # the problem here is that integers have an asymmetrical range: there is - # one more valid negative than prositive integer. Thus we perform the - # computation as a negative number and then change the sign at the end. - var - i = index # a local i is more efficient than accessing a var parameter - sign: BiggestInt = -1 - if s[i] == '+': - inc(i) - elif s[i] == '-': - inc(i) - sign = 1 - if s[i] in {'0'..'9'}: - result = 0 - while s[i] in {'0'..'9'}: - result = result * 10 - (ord(s[i]) - ord('0')) - inc(i) - while s[i] == '_': - inc(i) # underscores are allowed and ignored - result = result * sign - if s[i] == '\0': - index = i # store index back - else: - index = -1 # BUGFIX: error! - else: - index = -1 - -{.pop.} # overflowChecks - proc parseInt(s: string): int = - var - index = 0 - res = rawParseInt(s, index) - if index == -1: - raise newException(EInvalidValue, "invalid integer: " & s) - elif (sizeof(int) <= 4) and - ((res < low(int)) or (res > high(int))): - raise newException(EOverflow, "overflow") - else: - result = int(res) # convert to smaller integer type + var L = parseutils.parseInt(s, result, 0) + if L != s.len: raise newException(EInvalidValue, "invalid integer: " & s) proc ParseBiggestInt(s: string): biggestInt = - var index = 0 - result = rawParseInt(s, index) - if index == -1: - raise newException(EInvalidValue, "invalid integer: " & s) + var L = parseutils.parseBiggestInt(s, result, 0) + if L != s.len: raise newException(EInvalidValue, "invalid integer: " & s) proc ParseOctInt*(s: string): int = var i = 0 @@ -769,72 +728,8 @@ proc ParseHexInt(s: string): int = else: raise newException(EInvalidValue, "invalid integer: " & s) proc ParseFloat(s: string): float = - var - esign = 1.0 - sign = 1.0 - i = 0 - exponent: int - flags: int - result = 0.0 - if s[i] == '+': inc(i) - elif s[i] == '-': - sign = -1.0 - inc(i) - if s[i] == 'N' or s[i] == 'n': - if s[i+1] == 'A' or s[i+1] == 'a': - if s[i+2] == 'N' or s[i+2] == 'n': - if s[i+3] == '\0': return NaN - raise newException(EInvalidValue, "invalid float: " & s) - if s[i] == 'I' or s[i] == 'i': - if s[i+1] == 'N' or s[i+1] == 'n': - if s[i+2] == 'F' or s[i+2] == 'f': - if s[i+3] == '\0': return Inf*sign - raise newException(EInvalidValue, "invalid float: " & s) - while s[i] in {'0'..'9'}: - # Read integer part - flags = flags or 1 - result = result * 10.0 + toFloat(ord(s[i]) - ord('0')) - inc(i) - while s[i] == '_': inc(i) - # Decimal? - if s[i] == '.': - var hd = 1.0 - inc(i) - while s[i] in {'0'..'9'}: - # Read fractional part - flags = flags or 2 - result = result * 10.0 + toFloat(ord(s[i]) - ord('0')) - hd = hd * 10.0 - inc(i) - while s[i] == '_': inc(i) - result = result / hd # this complicated way preserves precision - # Again, read integer and fractional part - if flags == 0: - raise newException(EInvalidValue, "invalid float: " & s) - # Exponent? - if s[i] in {'e', 'E'}: - inc(i) - if s[i] == '+': - inc(i) - elif s[i] == '-': - esign = -1.0 - inc(i) - if s[i] notin {'0'..'9'}: - raise newException(EInvalidValue, "invalid float: " & s) - while s[i] in {'0'..'9'}: - exponent = exponent * 10 + ord(s[i]) - ord('0') - inc(i) - while s[i] == '_': inc(i) - # Calculate Exponent - var hd = 1.0 - for j in 1..exponent: - hd = hd * 10.0 - if esign > 0.0: result = result * hd - else: result = result / hd - # Not all characters are read? - if s[i] != '\0': raise newException(EInvalidValue, "invalid float: " & s) - # evaluate sign - result = result * sign + var L = parseutils.parseFloat(s, result, 0) + if L != s.len: raise newException(EInvalidValue, "invalid float: " & s) proc toOct*(x: BiggestInt, len: int): string = ## converts `x` into its octal representation. The resulting string is diff --git a/lib/system.nim b/lib/system.nim index f287d7bda..7788e40e5 100755 --- a/lib/system.nim +++ b/lib/system.nim @@ -1460,6 +1460,13 @@ when not defined(EcmaScript) and not defined(NimrodVM): yield res Close(f) + iterator lines*(f: TFile): string = + ## Iterate over any line in the file `f`. + var res = "" + while not endOfFile(f): + rawReadLine(f, res) + yield res + proc fileHandle*(f: TFile): TFileHandle {.importc: "fileno", header: "<stdio.h>"} ## returns the OS file handle of the file ``f``. This is only useful for |