summary refs log tree commit diff stats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rwxr-xr-xlib/devel/httpclient.nim78
-rw-r--r--lib/devel/parseutils.nim63
-rw-r--r--lib/pure/parseutils.nim225
-rwxr-xr-xlib/pure/parsexml.nim11
-rwxr-xr-xlib/pure/strutils.nim123
-rwxr-xr-xlib/system.nim7
6 files changed, 287 insertions, 220 deletions
diff --git a/lib/devel/httpclient.nim b/lib/devel/httpclient.nim
index fb9359630..d600fcb59 100755
--- a/lib/devel/httpclient.nim
+++ b/lib/devel/httpclient.nim
@@ -11,7 +11,7 @@
 ## webpages/other data.
 
 # neuer Code:
-import sockets, strutils, parseurl, pegs, os, parseutils
+import sockets, strutils, parseurl, pegs, parseutils
 
 type
   TResponse* = tuple[
@@ -19,9 +19,9 @@ type
     body: string]
   THeader* = tuple[htype: string, hvalue: string]
 
-  EInvalidHttp* = object of EBase ## exception that is raised when server does
-                                  ## not conform to the implemented HTTP
-                                  ## protocol
+  EInvalidProtocol* = object of EBase ## exception that is raised when server
+                                      ## does not conform to the implemented
+                                      ## protocol
 
   EHttpRequestErr* = object of EBase ## Thrown in the ``getContent`` proc,
                                      ## when the server returns an error
@@ -35,7 +35,7 @@ template newException(exceptn, message: expr): expr =
     e
 
 proc httpError(msg: string) =
-  var e: ref EInvalidHttp
+  var e: ref EInvalidProtocol
   new(e)
   e.msg = msg
   raise e
@@ -54,42 +54,44 @@ proc getHeaderValue*(headers: seq[THeader], name: string): string =
       return headers[i].hvalue
   return ""
 
+proc parseChunks(data: var string, start: int, s: TSocket): string =
+  # get chunks:
+  var i = start
+  result = ""
+  while true:
+    var chunkSize = 0
+    var j = parseHex(data, chunkSize, i)
+    if j <= 0: break
+    inc(i, j)
+    while data[i] notin {'\C', '\L', '\0'}: inc(i)
+    if data[i] == '\C': inc(i)
+    if data[i] == '\L': inc(i)
+    if chunkSize <= 0: break
+    var x = copy(data, i, i+chunkSize-1)
+    var size = x.len
+    result.add(x)
+    
+    if size < chunkSize:
+      # read in the rest:
+      var missing = chunkSize - size
+      var L = result.len
+      setLen(result, L + missing)
+      while missing > 0:
+        var bytesRead = s.recv(addr(result[L]), missing)
+        inc(L, bytesRead)
+        dec(missing, bytesRead)
+    
+    # next chunk:
+    data = s.recv()
+    i = 0
+    # skip trailing CR-LF:
+    while data[i] in {'\C', '\L'}: inc(i)
+    if data[i] == '\0': data.add(s.recv())
+  
 proc parseBody(data: var string, start: int, s: TSocket,
                headers: seq[THeader]): string =
   if getHeaderValue(headers, "Transfer-Encoding") == "chunked":
-    # get chunks:
-    var i = start
-    result = ""
-    while true:
-      var chunkSize = 0
-      var j = parseHex(data, chunkSize, i)
-      if j <= 0: break
-      inc(i, j)
-      while data[i] notin {'\C', '\L', '\0'}: inc(i)
-      if data[i] == '\C': inc(i)
-      if data[i] == '\L': inc(i)
-      echo "ChunkSize: ", chunkSize
-      if chunkSize <= 0: break
-      
-      var x = copy(data, i, i+chunkSize-1)
-      var size = x.len
-      result.add(x)
-      
-      if size < chunkSize:
-        # read in the rest:
-        var missing = chunkSize - size
-        var L = result.len
-        setLen(result, L + missing)
-        discard s.recv(addr(result[L]), missing)
-      
-      # next chunk:
-      data = s.recv()
-      echo data
-      i = 0
-      
-      # skip trailing CR-LF:
-      while data[i] in {'\C', '\L'}: inc(i)
-            
+    result = parseChunks(data, start, s)
   else:
     result = copy(data, start)
     # -REGION- Content-Length
diff --git a/lib/devel/parseutils.nim b/lib/devel/parseutils.nim
deleted file mode 100644
index 4c5152167..000000000
--- a/lib/devel/parseutils.nim
+++ /dev/null
@@ -1,63 +0,0 @@
-#
-#
-#            Nimrod's Runtime Library
-#        (c) Copyright 2010 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-## Helpers for parsing.
-
-import strutils
-
-proc parseHex*(s: string, number: var int, start = 0): int = 
-  ## parses a hexadecimal number and stores its value in ``number``. Returns
-  ## the number of the parsed characters or 0 in case of an error.
-  var i = start
-  var foundDigit = false
-  if s[i] == '0' and (s[i+1] == 'x' or s[i+1] == 'X'): inc(i, 2)
-  elif s[i] == '#': inc(i)
-  while true: 
-    case s[i]
-    of '_': nil
-    of '0'..'9':
-      number = number shl 4 or (ord(s[i]) - ord('0'))
-      foundDigit = true
-    of 'a'..'f':
-      number = number shl 4 or (ord(s[i]) - ord('a') + 10)
-      foundDigit = true
-    of 'A'..'F':
-      number = number shl 4 or (ord(s[i]) - ord('A') + 10)
-      foundDigit = true
-    else: break
-    inc(i)
-  if foundDigit: result = i-start
-
-proc parseIdent*(s: string, ident: var string, start = 0): int =
-  ## parses an identifier and stores it in ``ident``. Returns
-  ## the number of the parsed characters or 0 in case of an error.
-  var i = start
-  if s[i] in IdentStartChars:
-    inc(i)
-    while s[i] in IdentChars: inc(i)
-    ident = copy(s, start, i-1)
-    result = i-start
-
-proc skipWhitespace*(s: string, start = 0): int {.inline.} =
-  while s[start+result] in Whitespace: inc(result)
-
-proc skip*(s, token: string, start = 0): int =
-  while result < token.len and s[result+start] == token[result]: inc(result)
-  if result != token.len: result = 0
-  
-proc skipIgnoreCase*(s, token: string, start = 0): int =
-  while result < token.len and
-      toLower(s[result+start]) == toLower(token[result]): inc(result)
-  if result != token.len: result = 0  
-
-proc parseBiggestInt*(s: string, number: var biggestInt, start = 0): int =
-  assert(false) # to implement
-
-proc parseBiggestFloat*(s: string, number: var biggestFloat, start = 0): int = 
-  assert(false) # to implement
diff --git a/lib/pure/parseutils.nim b/lib/pure/parseutils.nim
new file mode 100644
index 000000000..0878f87eb
--- /dev/null
+++ b/lib/pure/parseutils.nim
@@ -0,0 +1,225 @@
+#
+#
+#            Nimrod's Runtime Library
+#        (c) Copyright 2010 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## Helpers for parsing.
+
+{.deadCodeElim: on.}
+
+{.push debugger:off .} # the user does not want to trace a part
+                       # of the standard library!
+
+# copied from excpt.nim, because I don't want to make this template public
+template newException(exceptn, message: expr): expr =
+  block: # open a new scope
+    var
+      e: ref exceptn
+    new(e)
+    e.msg = message
+    e
+
+const
+  Whitespace = {' ', '\t', '\v', '\r', '\l', '\f'}
+  Letters = {'A'..'Z', 'a'..'z'}
+  Digits = {'0'..'9'}
+  IdentChars = {'a'..'z', 'A'..'Z', '0'..'9', '_'}
+  IdentStartChars = {'a'..'z', 'A'..'Z', '_'}
+    ## copied from strutils
+
+proc toLower(c: char): char {.inline.} =
+  result = if c in {'A'..'Z'}: chr(ord(c)-ord('A')+ord('a')) else: c
+
+proc parseHex*(s: string, number: var int, start = 0): int = 
+  ## parses a hexadecimal number and stores its value in ``number``. Returns
+  ## the number of the parsed characters or 0 in case of an error.
+  var i = start
+  var foundDigit = false
+  if s[i] == '0' and (s[i+1] == 'x' or s[i+1] == 'X'): inc(i, 2)
+  elif s[i] == '#': inc(i)
+  while true: 
+    case s[i]
+    of '_': nil
+    of '0'..'9':
+      number = number shl 4 or (ord(s[i]) - ord('0'))
+      foundDigit = true
+    of 'a'..'f':
+      number = number shl 4 or (ord(s[i]) - ord('a') + 10)
+      foundDigit = true
+    of 'A'..'F':
+      number = number shl 4 or (ord(s[i]) - ord('A') + 10)
+      foundDigit = true
+    else: break
+    inc(i)
+  if foundDigit: result = i-start
+
+proc parseOct*(s: string, number: var int, start = 0): int = 
+  ## parses an octal number and stores its value in ``number``. Returns
+  ## the number of the parsed characters or 0 in case of an error.
+  var i = start
+  var foundDigit = false
+  if s[i] == '0' and (s[i+1] == 'o' or s[i+1] == 'O'): inc(i, 2)
+  while true: 
+    case s[i]
+    of '_': nil
+    of '0'..'7':
+      number = number shl 3 or (ord(s[i]) - ord('0'))
+      foundDigit = true
+    else: break
+    inc(i)
+  if foundDigit: result = i-start
+
+proc parseIdent*(s: string, ident: var string, start = 0): int =
+  ## parses an identifier and stores it in ``ident``. Returns
+  ## the number of the parsed characters or 0 in case of an error.
+  var i = start
+  if s[i] in IdentStartChars:
+    inc(i)
+    while s[i] in IdentChars: inc(i)
+    ident = copy(s, start, i-1)
+    result = i-start
+
+proc parseToken*(s: string, token: var string, validChars: set[char],
+                 start = 0): int =
+  ## parses a token and stores it in ``token``. Returns
+  ## the number of the parsed characters or 0 in case of an error. A token
+  ## consists of the characters in `validChars`. 
+  var i = start
+  while s[i] in validChars: inc(i)
+  result = i-start
+  token = copy(s, start, i-1)
+
+proc skipWhitespace*(s: string, start = 0): int {.inline.} =
+  ## skips the whitespace starting at ``s[start]``. Returns the number of
+  ## skipped characters.
+  while s[start+result] in Whitespace: inc(result)
+
+proc skip*(s, token: string, start = 0): int =
+  while result < token.len and s[result+start] == token[result]: inc(result)
+  if result != token.len: result = 0
+  
+proc skipIgnoreCase*(s, token: string, start = 0): int =
+  while result < token.len and
+      toLower(s[result+start]) == toLower(token[result]): inc(result)
+  if result != token.len: result = 0  
+
+{.push overflowChecks: on.}
+# this must be compiled with overflow checking turned on:
+proc rawParseInt(s: string, b: var biggestInt, start = 0): int =
+  var
+    sign: BiggestInt = -1
+    i = start
+  if s[i] == '+': inc(i)
+  elif s[i] == '-':
+    inc(i)
+    sign = 1
+  if s[i] in {'0'..'9'}:
+    b = 0
+    while s[i] in {'0'..'9'}:
+      b = b * 10 - (ord(s[i]) - ord('0'))
+      inc(i)
+      while s[i] == '_': inc(i) # underscores are allowed and ignored
+    b = b * sign
+    result = i - start
+{.pop.} # overflowChecks
+
+proc parseBiggestInt*(s: string, number: var biggestInt, start = 0): int =
+  ## parses an integer starting at `start` and stores the value into `number`.
+  ## Result is the number of processed chars or 0 if there is no integer.
+  ## `EOverflow` is raised if an overflow occurs.
+  result = rawParseInt(s, number, start)
+
+proc parseInt*(s: string, number: var int, start = 0): int =
+  ## parses an integer starting at `start` and stores the value into `number`.
+  ## Result is the number of processed chars or 0 if there is no integer.
+  ## `EOverflow` is raised if an overflow occurs.
+  var res: biggestInt
+  result = parseBiggestInt(s, res, start)
+  if (sizeof(int) <= 4) and
+      ((res < low(int)) or (res > high(int))):
+    raise newException(EOverflow, "overflow")
+  else:
+    number = int(res)
+
+proc parseBiggestFloat*(s: string, number: var biggestFloat, start = 0): int =
+  ## parses a float starting at `start` and stores the value into `number`.
+  ## Result is the number of processed chars or 0 if there occured a parsing
+  ## error.
+  var
+    esign = 1.0
+    sign = 1.0
+    i = start
+    exponent: int
+    flags: int
+  number = 0.0
+  if s[i] == '+': inc(i)
+  elif s[i] == '-':
+    sign = -1.0
+    inc(i)
+  if s[i] == 'N' or s[i] == 'n':
+    if s[i+1] == 'A' or s[i+1] == 'a':
+      if s[i+2] == 'N' or s[i+2] == 'n':
+        if s[i+3] notin IdentChars:
+          number = NaN
+          return i+3 - start
+    return 0
+  if s[i] == 'I' or s[i] == 'i':
+    if s[i+1] == 'N' or s[i+1] == 'n':
+      if s[i+2] == 'F' or s[i+2] == 'f':
+        if s[i+3] notin IdentChars: 
+          number = Inf*sign
+          return i+3 - start
+    return 0
+  while s[i] in {'0'..'9'}:
+    # Read integer part
+    flags = flags or 1
+    number = number * 10.0 + toFloat(ord(s[i]) - ord('0'))
+    inc(i)
+    while s[i] == '_': inc(i)
+  # Decimal?
+  if s[i] == '.':
+    var hd = 1.0
+    inc(i)
+    while s[i] in {'0'..'9'}:
+      # Read fractional part
+      flags = flags or 2
+      number = number * 10.0 + toFloat(ord(s[i]) - ord('0'))
+      hd = hd * 10.0
+      inc(i)
+      while s[i] == '_': inc(i)
+    number = number / hd # this complicated way preserves precision
+  # Again, read integer and fractional part
+  if flags == 0: return 0
+  # Exponent?
+  if s[i] in {'e', 'E'}:
+    inc(i)
+    if s[i] == '+':
+      inc(i)
+    elif s[i] == '-':
+      esign = -1.0
+      inc(i)
+    if s[i] notin {'0'..'9'}:
+      return 0
+    while s[i] in {'0'..'9'}:
+      exponent = exponent * 10 + ord(s[i]) - ord('0')
+      inc(i)
+      while s[i] == '_': inc(i)
+  # Calculate Exponent
+  var hd = 1.0
+  for j in 1..exponent: hd = hd * 10.0
+  if esign > 0.0: number = number * hd
+  else:           number = number / hd
+  # evaluate sign
+  number = number * sign
+  result = i - start
+
+proc parseFloat*(s: string, number: var float, start = 0): int =
+  var bf: biggestFloat
+  result = parseBiggestFloat(s, bf, start)
+  number = bf
+  
+{.pop.}
diff --git a/lib/pure/parsexml.nim b/lib/pure/parsexml.nim
index 343fabd8c..0728d07b1 100755
--- a/lib/pure/parsexml.nim
+++ b/lib/pure/parsexml.nim
@@ -315,21 +315,22 @@ proc parseEntity(my: var TXmlParser, dest: var string) =
         r = r * 10 + (ord(buf[pos]) - ord('0'))
         inc(pos)
     add(dest, toUTF8(TRune(r)))
-  elif buf[pos] == 'l' and buf[pos+1] == 't':
+  elif buf[pos] == 'l' and buf[pos+1] == 't' and buf[pos+2] == ';':
     add(dest, '<')
     inc(pos, 2)
-  elif buf[pos] == 'g' and buf[pos+1] == 't':
+  elif buf[pos] == 'g' and buf[pos+1] == 't' and buf[pos+2] == ';':
     add(dest, '>')
     inc(pos, 2)
-  elif buf[pos] == 'a' and buf[pos+1] == 'm' and buf[pos+2] == 'p':
+  elif buf[pos] == 'a' and buf[pos+1] == 'm' and buf[pos+2] == 'p'
+       and buf[pos+3] == ';':
     add(dest, '&')
     inc(pos, 3)
   elif buf[pos] == 'a' and buf[pos+1] == 'p' and buf[pos+2] == 'o' and 
-      buf[pos+3] == 's':
+      buf[pos+3] == 's' and buf[pos+4] == ';':
     add(dest, '\'')
     inc(pos, 4)
   elif buf[pos] == 'q' and buf[pos+1] == 'u' and buf[pos+2] == 'o' and 
-      buf[pos+3] == 't':
+      buf[pos+3] == 't' and buf[pos+4] == ';':
     add(dest, '"')
     inc(pos, 4)
   else:
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim
index 292810538..fe70130e5 100755
--- a/lib/pure/strutils.nim
+++ b/lib/pure/strutils.nim
@@ -10,6 +10,8 @@
 ## This module contains various string utility routines.
 ## See the module `regexprs` for regular expression support.
 
+import parseutils
+
 {.deadCodeElim: on.}
 
 {.push debugger:off .} # the user does not want to trace a part
@@ -440,7 +442,7 @@ proc findNormalized(x: string, inArray: openarray[string]): int =
   while i < high(inArray):
     if cmpIgnoreStyle(x, inArray[i]) == 0: return i
     inc(i, 2) # incrementing by 1 would probably result in a
-              # security whole ...
+              # security hole ...
   return -1
 
 proc addf(s: var string, formatstr: string, a: openarray[string]) =
@@ -686,56 +688,13 @@ proc toHex(x: BiggestInt, len: int): string =
     result[j] = HexChars[toU32(x shr shift) and 0xF'i32]
     shift = shift + 4
 
-{.push overflowChecks: on.}
-# this must be compiled with overflow checking turned on:
-proc rawParseInt(s: string, index: var int): BiggestInt =
-  # index contains the start position at proc entry; end position will be
-  # an index before the proc returns; index = -1 on error (no number at all)
-  # the problem here is that integers have an asymmetrical range: there is
-  # one more valid negative than prositive integer. Thus we perform the
-  # computation as a negative number and then change the sign at the end.
-  var
-    i = index # a local i is more efficient than accessing a var parameter
-    sign: BiggestInt = -1
-  if s[i] == '+':
-    inc(i)
-  elif s[i] == '-':
-    inc(i)
-    sign = 1
-  if s[i] in {'0'..'9'}:
-    result = 0
-    while s[i] in {'0'..'9'}:
-      result = result * 10 - (ord(s[i]) - ord('0'))
-      inc(i)
-      while s[i] == '_':
-        inc(i)               # underscores are allowed and ignored
-    result = result * sign
-    if s[i] == '\0':
-      index = i              # store index back
-    else:
-      index = -1 # BUGFIX: error!
-  else:
-    index = -1
-
-{.pop.} # overflowChecks
-
 proc parseInt(s: string): int =
-  var
-    index = 0
-    res = rawParseInt(s, index)
-  if index == -1:
-    raise newException(EInvalidValue, "invalid integer: " & s)
-  elif (sizeof(int) <= 4) and
-      ((res < low(int)) or (res > high(int))):
-    raise newException(EOverflow, "overflow")
-  else:
-    result = int(res) # convert to smaller integer type
+  var L = parseutils.parseInt(s, result, 0)
+  if L != s.len: raise newException(EInvalidValue, "invalid integer: " & s)
 
 proc ParseBiggestInt(s: string): biggestInt =
-  var index = 0
-  result = rawParseInt(s, index)
-  if index == -1:
-    raise newException(EInvalidValue, "invalid integer: " & s)
+  var L = parseutils.parseBiggestInt(s, result, 0)
+  if L != s.len: raise newException(EInvalidValue, "invalid integer: " & s)
 
 proc ParseOctInt*(s: string): int =
   var i = 0
@@ -769,72 +728,8 @@ proc ParseHexInt(s: string): int =
     else: raise newException(EInvalidValue, "invalid integer: " & s)
 
 proc ParseFloat(s: string): float =
-  var
-    esign = 1.0
-    sign = 1.0
-    i = 0
-    exponent: int
-    flags: int
-  result = 0.0
-  if s[i] == '+': inc(i)
-  elif s[i] == '-':
-    sign = -1.0
-    inc(i)
-  if s[i] == 'N' or s[i] == 'n':
-    if s[i+1] == 'A' or s[i+1] == 'a':
-      if s[i+2] == 'N' or s[i+2] == 'n':
-        if s[i+3] == '\0': return NaN
-    raise newException(EInvalidValue, "invalid float: " & s)
-  if s[i] == 'I' or s[i] == 'i':
-    if s[i+1] == 'N' or s[i+1] == 'n':
-      if s[i+2] == 'F' or s[i+2] == 'f':
-        if s[i+3] == '\0': return Inf*sign
-    raise newException(EInvalidValue, "invalid float: " & s)
-  while s[i] in {'0'..'9'}:
-    # Read integer part
-    flags = flags or 1
-    result = result * 10.0 + toFloat(ord(s[i]) - ord('0'))
-    inc(i)
-    while s[i] == '_': inc(i)
-  # Decimal?
-  if s[i] == '.':
-    var hd = 1.0
-    inc(i)
-    while s[i] in {'0'..'9'}:
-      # Read fractional part
-      flags = flags or 2
-      result = result * 10.0 + toFloat(ord(s[i]) - ord('0'))
-      hd = hd * 10.0
-      inc(i)
-      while s[i] == '_': inc(i)
-    result = result / hd # this complicated way preserves precision
-  # Again, read integer and fractional part
-  if flags == 0:
-    raise newException(EInvalidValue, "invalid float: " & s)
-  # Exponent?
-  if s[i] in {'e', 'E'}:
-    inc(i)
-    if s[i] == '+':
-      inc(i)
-    elif s[i] == '-':
-      esign = -1.0
-      inc(i)
-    if s[i] notin {'0'..'9'}:
-      raise newException(EInvalidValue, "invalid float: " & s)
-    while s[i] in {'0'..'9'}:
-      exponent = exponent * 10 + ord(s[i]) - ord('0')
-      inc(i)
-      while s[i] == '_': inc(i)
-  # Calculate Exponent
-  var hd = 1.0
-  for j in 1..exponent:
-    hd = hd * 10.0
-  if esign > 0.0: result = result * hd
-  else:           result = result / hd
-  # Not all characters are read?
-  if s[i] != '\0': raise newException(EInvalidValue, "invalid float: " & s)
-  # evaluate sign
-  result = result * sign
+  var L = parseutils.parseFloat(s, result, 0)
+  if L != s.len: raise newException(EInvalidValue, "invalid float: " & s)
 
 proc toOct*(x: BiggestInt, len: int): string =
   ## converts `x` into its octal representation. The resulting string is
diff --git a/lib/system.nim b/lib/system.nim
index f287d7bda..7788e40e5 100755
--- a/lib/system.nim
+++ b/lib/system.nim
@@ -1460,6 +1460,13 @@ when not defined(EcmaScript) and not defined(NimrodVM):
       yield res
     Close(f)
 
+  iterator lines*(f: TFile): string =
+    ## Iterate over any line in the file `f`.
+    var res = ""
+    while not endOfFile(f):
+      rawReadLine(f, res)
+      yield res
+
   proc fileHandle*(f: TFile): TFileHandle {.importc: "fileno",
                                             header: "<stdio.h>"}
     ## returns the OS file handle of the file ``f``. This is only useful for