newStringOfCap implemented and used to optimize some procs

author: Araq <rumpf_a@web.de> 2011-05-08 17:38:34 +0200
committer: Araq <rumpf_a@web.de> 2011-05-08 17:38:34 +0200
commit: d2e2d71d05b9a8381bf7fef7bb23da029e576c2a (patch)
tree: 137e4494aa119f8477204f8f974e4391f9ef1b2c
parent: 1893f4101a59497d9c5713068ad396efdddb8574 (diff)
download: Nim-d2e2d71d05b9a8381bf7fef7bb23da029e576c2a.tar.gz
10 files changed, 53 insertions, 21 deletions
diff --git a/compiler/ast.nim b/compiler/ast.nim
index fa7880c30..d1c10168a 100755
--- a/compiler/ast.nim
+++ b/compiler/ast.nim
@@ -337,7 +337,9 @@ type
     mFields, mFieldPairs,
     mAppendStrCh, mAppendStrStr, mAppendSeqElem, 
     mInRange, mInSet, mRepr, mExit, mSetLengthStr, mSetLengthSeq, mAssert, 
-    mSwap, mIsNil, mArrToSeq, mCopyStr, mCopyStrLast, mNewString, mReset, 
+    mSwap, mIsNil, mArrToSeq, mCopyStr, mCopyStrLast, 
+    mNewString, mNewStringOfCap,
+    mReset,
     mArray, mOpenArray, mRange, mSet, mSeq, 
     mOrdinal, mInt, mInt8, mInt16, mInt32, 
     mInt64, mFloat, mFloat32, mFloat64, mBool, mChar, mString, mCstring, 
diff --git a/compiler/ccgexprs.nim b/compiler/ccgexprs.nim
index 49afc3088..69c06fbb5 100755
--- a/compiler/ccgexprs.nim
+++ b/compiler/ccgexprs.nim
@@ -1457,7 +1457,8 @@ proc genMagicExpr(p: BProc, e: PNode, d: var TLoc, op: TMagic) =
   of mIncl, mExcl, mCard, mLtSet, mLeSet, mEqSet, mMulSet, mPlusSet, mMinusSet,
      mInSet:
     genSetOp(p, e, d, op)
-  of mNewString, mCopyStr, mCopyStrLast, mExit: genCall(p, e, d)
+  of mNewString, mNewStringOfCap, mCopyStr, mCopyStrLast, mExit: 
+    genCall(p, e, d)
   of mReset: genReset(p, e)
   of mEcho: genEcho(p, e)
   of mArrToSeq: genArrToSeq(p, e, d)
diff --git a/compiler/evals.nim b/compiler/evals.nim
index 20d2a68be..6fcbd911a 100755
--- a/compiler/evals.nim
+++ b/compiler/evals.nim
@@ -990,7 +990,13 @@ proc evalMagicOrCall(c: PEvalContext, n: PNode): PNode =
     var a = result
     result = newNodeIT(nkStrLit, n.info, n.typ)
     result.strVal = newString(int(getOrdValue(a)))
-  else: 
+  of mNewStringOfCap:
+    result = evalAux(c, n.sons[1], {})
+    if isSpecial(result): return 
+    var a = result
+    result = newNodeIT(nkStrLit, n.info, n.typ)
+    result.strVal = newString(0)
+  else:
     result = evalAux(c, n.sons[1], {})
     if isSpecial(result): return 
     var a = result
diff --git a/compiler/semfold.nim b/compiler/semfold.nim
index 878c0a1a6..5b7f5603d 100755
--- a/compiler/semfold.nim
+++ b/compiler/semfold.nim
@@ -1,7 +1,7 @@
 #
 #
 #           The Nimrod Compiler
-#        (c) Copyright 2010 Andreas Rumpf
+#        (c) Copyright 2011 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -205,7 +205,8 @@ proc evalOp(m: TMagic, n, a, b, c: PNode): PNode =
   of mCompileOptionArg:
     result = newIntNodeT(Ord(
       testCompileOptionArg(getStr(a), getStr(b), n.info)), n)
-  of mNewString, mExit, mInc, ast.mDec, mEcho, mAssert, mSwap, mAppendStrCh, 
+  of mNewString, mNewStringOfCap, 
+     mExit, mInc, ast.mDec, mEcho, mAssert, mSwap, mAppendStrCh, 
      mAppendStrStr, mAppendSeqElem, mSetLengthStr, mSetLengthSeq, 
      mNLen..mNError, mEqRef: 
     nil
diff --git a/doc/apis.txt b/doc/apis.txt
index 2932c05a9..f0d4db045 100755
--- a/doc/apis.txt
+++ b/doc/apis.txt
@@ -32,6 +32,7 @@ get                     get, ``[]``    consider overloading ``[]`` for get;
                                        prefix: ``len`` instead of ``getLen``
 length                  len            also used for *number of elements*
 size                    size, len      size should refer to a byte size
+capacity                cap
 memory                  mem            implies a low-level operation
 items                   items          default iterator over a collection
 pairs                   pairs          iterator over (key, value) pairs
diff --git a/lib/pure/cgi.nim b/lib/pure/cgi.nim
index af222caba..ae05d5734 100755
--- a/lib/pure/cgi.nim
+++ b/lib/pure/cgi.nim
@@ -36,7 +36,7 @@ proc URLencode*(s: string): string =
   ## ``{'A'..'Z', 'a'..'z', '0'..'9', '_'}`` are carried over to the result,
   ## a space is converted to ``'+'`` and every other character is encoded as
   ## ``'%xx'`` where ``xx`` denotes its hexadecimal value. 
-  result = ""
+  result = newStringOfCap(s.len + s.len shr 2) # assume 12% non-alnum-chars
   for i in 0..s.len-1:
     case s[i]
     of 'a'..'z', 'A'..'Z', '0'..'9', '_': add(result, s[i])
@@ -57,8 +57,9 @@ proc URLdecode*(s: string): string =
   ## is converted to a space, ``'%xx'`` (where ``xx`` denotes a hexadecimal
   ## value) is converted to the character with ordinal number ``xx``, and  
   ## and every other character is carried over. 
-  result = ""
+  result = newString(s.len)
   var i = 0
+  var j = 0
   while i < s.len:
     case s[i]
     of '%': 
@@ -66,10 +67,12 @@ proc URLdecode*(s: string): string =
       handleHexChar(s[i+1], x)
       handleHexChar(s[i+2], x)
       inc(i, 2)
-      add(result, chr(x))
-    of '+': add(result, ' ')
-    else: add(result, s[i])
+      result[j] = chr(x)
+    of '+': result[j] = ' '
+    else: result[j] = s[i]
     inc(i)
+    inc(j)
+  setLen(result, j)
 
 proc addXmlChar(dest: var string, c: Char) {.inline.} = 
   case c
@@ -86,7 +89,7 @@ proc XMLencode*(s: string): string =
   ## * ``>`` is replaced by ``&gt;``
   ## * ``&`` is replaced by ``&amp;``
   ## * every other character is carried over.
-  result = ""
+  result = newStringOfCap(s.len + s.len shr 2)
   for i in 0..len(s)-1: addXmlChar(result, s[i])
 
 type
@@ -367,4 +370,8 @@ proc existsCookie*(name: string): bool =
   if gcookies == nil: gcookies = parseCookies(getHttpCookie())
   result = hasKey(gcookies, name)
 
+when isMainModule:
+  const test1 = "abc\L+def xyz"
+  assert UrlEncode(test1) == "abc%0A%2Bdef+xyz"
+  assert UrlDecode(UrlEncode(test1)) == test1
 
diff --git a/lib/pure/json.nim b/lib/pure/json.nim
index 75958a55f..efadf030c 100755
--- a/lib/pure/json.nim
+++ b/lib/pure/json.nim
@@ -620,7 +620,8 @@ proc nl(s: var string, ml: bool) =
 
 proc escapeJson*(s: string): string = 
   ## Converts a string `s` to its JSON representation.
-  result = "\""
+  result = newStringOfCap(s.len + s.len shr 3)
+  result.add("\"")
   for x in runes(s):
     var r = int(x)
     if r >= 32 and r <= 127:
diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim
index 382eece7b..f13910dbf 100755
--- a/lib/pure/strutils.nim
+++ b/lib/pure/strutils.nim
@@ -89,12 +89,16 @@ proc normalize*(s: string): string {.noSideEffect, procvar,
   rtl, extern: "nsuNormalize".} =
   ## Normalizes the string `s`. That means to convert it to lower case and
   ## remove any '_'. This is needed for Nimrod identifiers for example.
-  result = ""
+  result = newString(s.len)
+  var j = 0
   for i in 0..len(s) - 1:
     if s[i] in {'A'..'Z'}:
-      add result, Chr(Ord(s[i]) + (Ord('a') - Ord('A')))
+      result[j] = Chr(Ord(s[i]) + (Ord('a') - Ord('A')))
+      inc j
     elif s[i] != '_':
-      add result, s[i]
+      result[j] = s[i]
+      inc j
+  if j != s.len: setLen(result, j)
 
 proc cmpIgnoreCase*(a, b: string): int {.noSideEffect,
   rtl, extern: "nsuCmpIgnoreCase", procvar.} =
@@ -226,13 +230,14 @@ proc `%` *(formatstr: string, a: openarray[string]): string {.noSideEffect,
   ##
   ## The variables are compared with `cmpIgnoreStyle`. `EInvalidValue` is
   ## raised if an ill-formed format string has been passed to the `%` operator.
-  result = ""
+  result = newStringOfCap(formatstr.len + a.len shl 4)
   addf(result, formatstr, a)
 
 proc `%` *(formatstr, a: string): string {.noSideEffect, 
   rtl, extern: "nsuFormatSingleElem".} =
   ## This is the same as ``formatstr % [a]``.
-  return formatstr % [a]
+  result = newStringOfCap(formatstr.len + a.len)
+  addf(result, formatstr, [a])
 
 proc strip*(s: string, leading = true, trailing = true): string {.noSideEffect,
   rtl, extern: "nsuStrip".} =
@@ -510,7 +515,7 @@ proc wordWrap*(s: string, maxLineWidth = 80,
                newLine = "\n"): string {.
                noSideEffect, rtl, extern: "nsuWordWrap".} = 
   ## word wraps `s`.
-  result = ""
+  result = newStringOfCap(s.len + s.len shr 6)
   var SpaceLeft = maxLineWidth
   for word, isSep in tokenize(s, seps):
     if len(word) > SpaceLeft:
@@ -804,7 +809,8 @@ proc escape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect,
   ## The procedure has been designed so that its output is usable for many
   ## different common syntaxes. The resulting string is prefixed with
   ## `prefix` and suffixed with `suffix`. Both may be empty strings.
-  result = prefix
+  result = newStringOfCap(s.len + s.len shr 2)
+  result.add(prefix)
   for c in items(s):
     case c
     of '\0'..'\31', '\128'..'\255':
diff --git a/lib/system.nim b/lib/system.nim
index bacb4325a..93895605c 100755
--- a/lib/system.nim
+++ b/lib/system.nim
@@ -685,7 +685,13 @@ proc newString*(len: int): string {.
   ## content. One needs to fill the string character after character
   ## with the index operator ``s[i]``. This procedure exists only for
   ## optimization purposes; the same effect can be achieved with the
-  ## ``&`` operator.
+  ## ``&`` operator or with ``add``.
+
+proc newStringOfCap*(cap: int): string {.
+  magic: "NewStringOfCap", importc: "rawNewString", noSideEffect.}
+  ## returns a new string of length ``0`` but with capacity `cap`.This
+  ## procedure exists only for optimization purposes; the same effect can 
+  ## be achieved with the ``&`` operator or with ``add``.
 
 proc `&` * (x: string, y: char): string {.
   magic: "ConStrStr", noSideEffect, merge.}
diff --git a/web/news.txt b/web/news.txt
index f8bcef3db..62851a9a7 100755
--- a/web/news.txt
+++ b/web/news.txt
@@ -74,10 +74,11 @@ Additions
 - Added the ``linearScanEnd``, ``unroll``, ``shallow`` pragmas.
 - Added ``system.reset`` and a version of ``system.open`` that 
   returns a ``TFile`` and raises an exception in case of an error.
-- The compiler now might use a hashing for string case statements depending
+- The compiler now might use hashing for string case statements depending
   on the number of string literals in the case statement.
 - Added a wrapper for ``redis``.
 - The compiler now supports array, sequence and string slicing.
+- Added ``system.newStringOfCap``.
 
 
 2010-10-20 Version 0.8.10 released
author	Araq <rumpf_a@web.de>	2011-05-08 17:38:34 +0200
committer	Araq <rumpf_a@web.de>	2011-05-08 17:38:34 +0200
commit	d2e2d71d05b9a8381bf7fef7bb23da029e576c2a (patch)
tree	137e4494aa119f8477204f8f974e4391f9ef1b2c
parent	1893f4101a59497d9c5713068ad396efdddb8574 (diff)
download	Nim-d2e2d71d05b9a8381bf7fef7bb23da029e576c2a.tar.gz