diff options
Diffstat (limited to 'lib')
-rwxr-xr-x | lib/impure/re.nim | 10 | ||||
-rw-r--r-- | lib/pure/algorithm.nim | 101 | ||||
-rwxr-xr-x | lib/pure/strutils.nim | 9 | ||||
-rwxr-xr-x | lib/pure/unicode.nim | 18 | ||||
-rwxr-xr-x | lib/system.nim | 16 |
5 files changed, 131 insertions, 23 deletions
diff --git a/lib/impure/re.nim b/lib/impure/re.nim index 9198a5bfe..b74116395 100755 --- a/lib/impure/re.nim +++ b/lib/impure/re.nim @@ -62,7 +62,7 @@ proc finalizeRegEx(x: TRegEx) = proc re*(s: string, flags = {reExtended}): TRegEx = ## Constructor of regular expressions. Note that Nimrod's - ## extended raw string literals supports this syntax ``re"[abc]"`` as + ## extended raw string literals support this syntax ``re"[abc]"`` as ## a short form for ``re(r"[abc]")``. new(result, finalizeRegEx) result.h = rawCompile(s, cast[cint](flags)) @@ -152,8 +152,12 @@ proc find*(s: string, pattern: TRegEx, matches: var openarray[string], proc find*(s: string, pattern: TRegEx, start = 0): int = ## returns the starting position of ``pattern`` in ``s``. If it does not ## match, -1 is returned. - var matches: array[0..maxSubpatterns-1, string] - result = find(s, pattern, matches, start) + var + rawMatches: array[0..3 - 1, cint] + res = pcre.Exec(pattern.h, nil, s, len(s), start, 0'i32, + cast[ptr cint](addr(rawMatches)), 3) + if res < 0'i32: return res + return rawMatches[0] iterator findAll*(s: string, pattern: TRegEx, start = 0): string = ## yields all matching captures of pattern in `s`. diff --git a/lib/pure/algorithm.nim b/lib/pure/algorithm.nim new file mode 100644 index 000000000..c9e5b0e14 --- /dev/null +++ b/lib/pure/algorithm.nim @@ -0,0 +1,101 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2010 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements some common generic algorithms. + +type + TSortOrder* = enum ## sort order + Descending, Ascending + +proc `*`*(x: int, order: TSortOrder): int {.inline.} = + ## flips `x` if ``order == Descending``; + ## if ``order == Ascending`` then `x` is returned. + ## `x` is supposed to be the result of a comparator. + var y = order.ord - 1 + result = (x xor y) - y + +proc reverse*[T](a: var openArray[T], first, last: int) = + ## reverses the array ``a[first..last]``. + var x = first + var y = last + while x < y: + swap(a[x], a[y]) + dec(y) + inc(x) + +proc reverse*[T](a: var openArray[T]) = + ## reverses the array `a`. + reverse(a, 0, a.high) + +const + onlySafeCode = false + +proc merge[T](a, b: var openArray[T], lo, m, hi: int, + cmp: proc (x, y: T): int, order: TSortOrder) = + template `<-` (a, b: expr) = + when onlySafeCode: + a = b + else: + copyMem(addr(a), addr(b), sizeof(T)) + # optimization: If max(left) <= min(right) there is nothing to do! + # 1 2 3 4 ## 5 6 7 8 + # -> O(n) for sorted arrays. + # On random data this safes up to 40% of merge calls + if cmp(a[m], a[m+1]) * order <= 0: return + var j = lo + # copy a[j..m] into b: + assert j <= m + when onlySafeCode: + var bb = 0 + while j <= m: + b[bb] = a[j] + inc(bb) + inc(j) + else: + CopyMem(addr(b[0]), addr(a[j]), sizeof(T)*(m-j+1)) + j = m+1 + var i = 0 + var k = lo + # copy proper element back: + while k < j and j <= hi: + if cmp(b[i], a[j]) * order <= 0: + a[k] <- b[i] + inc(i) + else: + a[k] <- a[j] + inc(j) + inc(k) + # copy rest of b: + when onlySafeCode: + while k < j: + a[k] = b[i] + inc(k) + inc(i) + else: + if k < j: copyMem(addr(a[k]), addr(b[i]), sizeof(T)*(j-k)) + +proc sort*[T](a: var openArray[T], + cmp: proc (x, y: T): int = cmp, + order = TSortOrder.Ascending) = + ## Default Nimrod sort. The sorting is guaranteed to be stable and + ## the worst case is guaranteed to be O(n log n). + ## The current implementation uses an iterative + ## mergesort to achieve this. It uses a temporary sequence of + ## length ``a.len div 2``. + var n = a.len + var b: seq[T] + newSeq(b, n div 2) + var s = 1 + while s < n: + var m = n-1-s + while m >= 0: + merge(a, b, max(m-s+1, 0), m, m+s, cmp, order) + dec(m, s*2) + s = s*2 + diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index 8d5966670..0673a9588 100755 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -97,14 +97,15 @@ proc normalize*(s: string): string {.noSideEffect, procvar, add result, s[i] proc cmpIgnoreCase*(a, b: string): int {.noSideEffect, - rtl, extern: "nsuCmpIgnoreCase".} = + rtl, extern: "nsuCmpIgnoreCase", procvar.} = ## Compares two strings in a case insensitive manner. Returns: ## ## | 0 iff a == b ## | < 0 iff a < b ## | > 0 iff a > b - var i = 0 - while i < a.len and i < b.len: + var i = 0 + var m = min(a.len, b.len) + while i < m: result = ord(toLower(a[i])) - ord(toLower(b[i])) if result != 0: return inc(i) @@ -114,7 +115,7 @@ proc cmpIgnoreCase*(a, b: string): int {.noSideEffect, # thus we compile without checks here proc cmpIgnoreStyle*(a, b: string): int {.noSideEffect, - rtl, extern: "nsuCmpIgnoreStyle".} = + rtl, extern: "nsuCmpIgnoreStyle", procvar.} = ## Compares two strings normalized (i.e. case and ## underscores do not matter). Returns: ## diff --git a/lib/pure/unicode.nim b/lib/pure/unicode.nim index 0939a3066..f0b906b1f 100755 --- a/lib/pure/unicode.nim +++ b/lib/pure/unicode.nim @@ -1075,7 +1075,7 @@ proc binarySearch(c: irune, tab: openArray[iRune], len, stride: int): int = return t return -1 -proc toLower*(c: TRune): TRune {.rtl, extern: "nuc$1".} = +proc toLower*(c: TRune): TRune {.rtl, extern: "nuc$1", procvar.} = ## Converts `c` into lower case. This works for any Unicode character. ## If possible, prefer `toLower` over `toUpper`. var c = irune(c) @@ -1087,7 +1087,7 @@ proc toLower*(c: TRune): TRune {.rtl, extern: "nuc$1".} = return TRune(c + toLowerSinglets[p+1] - 500) return TRune(c) -proc toUpper*(c: TRune): TRune {.rtl, extern: "nuc$1".} = +proc toUpper*(c: TRune): TRune {.rtl, extern: "nuc$1", procvar.} = ## Converts `c` into upper case. This works for any Unicode character. ## If possible, prefer `toLower` over `toUpper`. var c = irune(c) @@ -1099,14 +1099,14 @@ proc toUpper*(c: TRune): TRune {.rtl, extern: "nuc$1".} = return TRune(c + toUpperSinglets[p+1] - 500) return TRune(c) -proc toTitle*(c: TRune): TRune {.rtl, extern: "nuc$1".} = +proc toTitle*(c: TRune): TRune {.rtl, extern: "nuc$1", procvar.} = var c = irune(c) var p = binarySearch(c, toTitleSinglets, len(toTitleSinglets) div 2, 2) if p >= 0 and c == toTitleSinglets[p]: return TRune(c + toTitleSinglets[p+1] - 500) return TRune(c) -proc isLower*(c: TRune): bool {.rtl, extern: "nuc$1".} = +proc isLower*(c: TRune): bool {.rtl, extern: "nuc$1", procvar.} = ## returns true iff `c` is a lower case Unicode character ## If possible, prefer `isLower` over `isUpper`. var c = irune(c) @@ -1118,7 +1118,7 @@ proc isLower*(c: TRune): bool {.rtl, extern: "nuc$1".} = if p >= 0 and c == toUpperSinglets[p]: return true -proc isUpper*(c: TRune): bool {.rtl, extern: "nuc$1".} = +proc isUpper*(c: TRune): bool {.rtl, extern: "nuc$1", procvar.} = ## returns true iff `c` is a upper case Unicode character ## If possible, prefer `isLower` over `isUpper`. var c = irune(c) @@ -1130,7 +1130,7 @@ proc isUpper*(c: TRune): bool {.rtl, extern: "nuc$1".} = if p >= 0 and c == toLowerSinglets[p]: return true -proc isAlpha*(c: TRune): bool {.rtl, extern: "nuc$1".} = +proc isAlpha*(c: TRune): bool {.rtl, extern: "nuc$1", procvar.} = ## returns true iff `c` is an *alpha* Unicode character (i.e. a letter) if isUpper(c) or isLower(c): return true @@ -1142,10 +1142,10 @@ proc isAlpha*(c: TRune): bool {.rtl, extern: "nuc$1".} = if p >= 0 and c == alphaSinglets[p]: return true -proc isTitle*(c: TRune): bool {.rtl, extern: "nuc$1".} = +proc isTitle*(c: TRune): bool {.rtl, extern: "nuc$1", procvar.} = return isUpper(c) and isLower(c) -proc isWhiteSpace*(c: TRune): bool {.rtl, extern: "nuc$1".} = +proc isWhiteSpace*(c: TRune): bool {.rtl, extern: "nuc$1", procvar.} = ## returns true iff `c` is a Unicode whitespace character var c = irune(c) var p = binarySearch(c, spaceRanges, len(spaceRanges) div 2, 2) @@ -1161,7 +1161,7 @@ iterator runes*(s: string): TRune = fastRuneAt(s, i, result, true) yield result -proc cmpRunesIgnoreCase*(a, b: string): int {.rtl, extern: "nuc$1".} = +proc cmpRunesIgnoreCase*(a, b: string): int {.rtl, extern: "nuc$1", procvar.} = ## compares two UTF8 strings and ignores the case. Returns: ## ## | 0 iff a == b diff --git a/lib/system.nim b/lib/system.nim index cf92594d9..aeca9b683 100755 --- a/lib/system.nim +++ b/lib/system.nim @@ -623,7 +623,7 @@ template `not_in` * (x, y: expr): expr = not contains(y, x) proc `is` *[T, S](x: T, y: S): bool {.magic: "Is", noSideEffect.} template `is_not` *(x, y: expr): expr = not (x is y) -proc cmp*[T, S: typeDesc](x: T, y: S): int = +proc cmp*[T, S: typeDesc](x: T, y: S): int {.procvar.} = ## Generic compare proc. Returns a value < 0 iff x < y, a value > 0 iff x > y ## and 0 iff x == y. This is useful for writing generic algorithms without ## performance loss. This generic implementation uses the `==` and `<` @@ -632,7 +632,7 @@ proc cmp*[T, S: typeDesc](x: T, y: S): int = if x < y: return -1 return 1 -proc cmp*(x, y: string): int {.noSideEffect.} +proc cmp*(x, y: string): int {.noSideEffect, procvar.} ## Compare proc for strings. More efficient than the generic version. proc `@` * [IDX, T](a: array[IDX, T]): seq[T] {. @@ -714,7 +714,8 @@ const cpuEndian* {.magic: "CpuEndian"}: TEndian = littleEndian ## is the endianness of the target CPU. This is a valuable piece of - ## information for low-level code only. This works thanks to compiler magic. + ## information for low-level code only. This works thanks to compiler + ## magic. hostOS* {.magic: "HostOS"}: string = "" ## a string that describes the host operating system. Possible values: @@ -1307,7 +1308,7 @@ proc echo*[Ty](x: openarray[Ty]) {.magic: "Echo".} ## available for the ECMAScript target too! template newException*(exceptn, message: expr): expr = - ## creates an exception object of type "exceptn" and sets its ``msg`` field + ## creates an exception object of type ``exceptn`` and sets its ``msg`` field ## to `message`. Returns the new exception object. block: # open a new scope var @@ -1365,7 +1366,7 @@ when not defined(EcmaScript) and not defined(NimrodVM): include "system/ansi_c" proc cmp(x, y: string): int = - return int(c_strcmp(x, y)) + result = int(c_strcmp(x, y)) const pccHack = if defined(pcc): "_" else: "" # Hack for PCC when defined(windows): @@ -1419,8 +1420,9 @@ when not defined(EcmaScript) and not defined(NimrodVM): ## stream* is a good idea, but since it is named ``stderr`` there are few ## programs out there that distinguish properly between ``stdout`` and ## ``stderr``. So, that's what you get if you don't name your variables - ## appropriately. It also annoys people if redirection via ``>output.txt`` - ## does not work because the program writes to ``stderr``. + ## appropriately. It also annoys people if redirection + ## via ``>output.txt`` does not work because the program writes + ## to ``stderr``. proc Open*(f: var TFile, filename: string, mode: TFileMode = fmRead, bufSize: int = -1): Bool |