diff options
Diffstat (limited to 'nimlib/pure')
28 files changed, 11926 insertions, 0 deletions
diff --git a/nimlib/pure/cgi.nim b/nimlib/pure/cgi.nim new file mode 100755 index 000000000..baae244e7 --- /dev/null +++ b/nimlib/pure/cgi.nim @@ -0,0 +1,375 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements helper procs for CGI applictions. Example: +## +## .. code-block:: Nimrod +## +## import strtabs, cgi +## +## # Fill the values when debugging: +## when debug: +## setTestData("name", "Klaus", "password", "123456") +## # read the data into `myData` +## var myData = readData() +## # check that the data's variable names are "name" or "passwort" +## validateData(myData, "name", "password") +## # start generating content: +## writeContentType() +## # generate content: +## write(stdout, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\n") +## write(stdout, "<html><head><title>Test</title></head><body>\n") +## writeln(stdout, "your name: " & myData["name"]) +## writeln(stdout, "your password: " & myData["password"]) +## writeln(stdout, "</body></html>") + +import strutils, os, strtabs + +proc URLencode*(s: string): string = + ## Encodes a value to be HTTP safe: This means that characters in the set + ## ``{'A'..'Z', 'a'..'z', '0'..'9', '_'}`` are carried over to the result, + ## a space is converted to ``'+'`` and every other character is encoded as + ## ``'%xx'`` where ``xx`` denotes its hexadecimal value. + result = "" + for i in 0..s.len-1: + case s[i] + of 'a'..'z', 'A'..'Z', '0'..'9', '_': add(result, s[i]) + of ' ': add(result, '+') + else: + add(result, '%') + add(result, toHex(ord(s[i]), 2)) + +proc handleHexChar(c: char, x: var int) {.inline.} = + case c + of '0'..'9': x = (x shl 4) or (ord(c) - ord('0')) + of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10) + of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10) + else: assert(false) + +proc URLdecode*(s: string): string = + ## Decodes a value from its HTTP representation: This means that a ``'+'`` + ## is converted to a space, ``'%xx'`` (where ``xx`` denotes a hexadecimal + ## value) is converted to the character with ordinal number ``xx``, and + ## and every other character is carried over. + result = "" + var i = 0 + while i < s.len: + case s[i] + of '%': + var x = 0 + handleHexChar(s[i+1], x) + handleHexChar(s[i+2], x) + inc(i, 2) + add(result, chr(x)) + of '+': add(result, ' ') + else: add(result, s[i]) + inc(i) + +proc addXmlChar(dest: var string, c: Char) {.inline.} = + case c + of '&': add(dest, "&") + of '<': add(dest, "<") + of '>': add(dest, ">") + of '\"': add(dest, """) + else: add(dest, c) + +proc XMLencode*(s: string): string = + ## Encodes a value to be XML safe: + ## * ``"`` is replaced by ``"`` + ## * ``<`` is replaced by ``<`` + ## * ``>`` is replaced by ``>`` + ## * ``&`` is replaced by ``&`` + ## * every other character is carried over. + result = "" + for i in 0..len(s)-1: addXmlChar(result, s[i]) + +type + ECgi* = object of EIO ## the exception that is raised, if a CGI error occurs + TRequestMethod* = enum ## the used request method + methodNone, ## no REQUEST_METHOD environment variable + methodPost, ## query uses the POST method + methodGet ## query uses the GET method + +proc cgiError*(msg: string) {.noreturn.} = + ## raises an ECgi exception with message `msg`. + var e: ref ECgi + new(e) + e.msg = msg + raise e + +proc getEncodedData(allowedMethods: set[TRequestMethod]): string = + case getenv("REQUEST_METHOD") + of "POST": + if methodPost notin allowedMethods: + cgiError("'REQUEST_METHOD' 'POST' is not supported") + var L = parseInt(getenv("CONTENT_LENGTH")) + result = newString(L) + if readBuffer(stdin, addr(result[0]), L) != L: + cgiError("cannot read from stdin") + of "GET": + if methodGet notin allowedMethods: + cgiError("'REQUEST_METHOD' 'GET' is not supported") + result = getenv("QUERY_STRING") + else: + if methodNone notin allowedMethods: + cgiError("'REQUEST_METHOD' must be 'POST' or 'GET'") + +iterator decodeData*(allowedMethods: set[TRequestMethod] = + {methodNone, methodPost, methodGet}): tuple[key, value: string] = + ## Reads and decodes CGI data and yields the (name, value) pairs the + ## data consists of. If the client does not use a method listed in the + ## `allowedMethods` set, an `ECgi` exception is raised. + var enc = getEncodedData(allowedMethods) + if not isNil(enc): + # decode everything in one pass: + var i = 0 + var name = "" + var value = "" + while enc[i] != '\0': + setLen(name, 0) # reuse memory + while true: + case enc[i] + of '\0': break + of '%': + var x = 0 + handleHexChar(enc[i+1], x) + handleHexChar(enc[i+2], x) + inc(i, 2) + add(name, chr(x)) + of '+': add(name, ' ') + of '=', '&': break + else: add(name, enc[i]) + inc(i) + if enc[i] != '=': cgiError("'=' expected") + inc(i) # skip '=' + setLen(value, 0) # reuse memory + while true: + case enc[i] + of '%': + var x = 0 + handleHexChar(enc[i+1], x) + handleHexChar(enc[i+2], x) + inc(i, 2) + add(value, chr(x)) + of '+': add(value, ' ') + of '&', '\0': break + else: add(value, enc[i]) + inc(i) + yield (name, value) + if enc[i] == '&': inc(i) + elif enc[i] == '\0': break + else: cgiError("'&' expected") + +proc readData*(allowedMethods: set[TRequestMethod] = + {methodNone, methodPost, methodGet}): PStringTable = + ## Read CGI data. If the client does not use a method listed in the + ## `allowedMethods` set, an `ECgi` exception is raised. + result = newStringTable() + for name, value in decodeData(allowedMethods): + result[name] = value + +proc validateData*(data: PStringTable, validKeys: openarray[string]) = + ## validates data; raises `ECgi` if this fails. This checks that each variable + ## name of the CGI `data` occurs in the `validKeys` array. + for key, val in pairs(data): + if find(validKeys, key) < 0: + cgiError("unknown variable name: " & key) + +proc getContentLength*(): string = + ## returns contents of the ``CONTENT_LENGTH`` environment variable + return getenv("CONTENT_LENGTH") + +proc getContentType*(): string = + ## returns contents of the ``CONTENT_TYPE`` environment variable + return getenv("CONTENT_Type") + +proc getDocumentRoot*(): string = + ## returns contents of the ``DOCUMENT_ROOT`` environment variable + return getenv("DOCUMENT_ROOT") + +proc getGatewayInterface*(): string = + ## returns contents of the ``GATEWAY_INTERFACE`` environment variable + return getenv("GATEWAY_INTERFACE") + +proc getHttpAccept*(): string = + ## returns contents of the ``HTTP_ACCEPT`` environment variable + return getenv("HTTP_ACCEPT") + +proc getHttpAcceptCharset*(): string = + ## returns contents of the ``HTTP_ACCEPT_CHARSET`` environment variable + return getenv("HTTP_ACCEPT_CHARSET") + +proc getHttpAcceptEncoding*(): string = + ## returns contents of the ``HTTP_ACCEPT_ENCODING`` environment variable + return getenv("HTTP_ACCEPT_ENCODING") + +proc getHttpAcceptLanguage*(): string = + ## returns contents of the ``HTTP_ACCEPT_LANGUAGE`` environment variable + return getenv("HTTP_ACCEPT_LANGUAGE") + +proc getHttpConnection*(): string = + ## returns contents of the ``HTTP_CONNECTION`` environment variable + return getenv("HTTP_CONNECTION") + +proc getHttpCookie*(): string = + ## returns contents of the ``HTTP_COOKIE`` environment variable + return getenv("HTTP_COOKIE") + +proc getHttpHost*(): string = + ## returns contents of the ``HTTP_HOST`` environment variable + return getenv("HTTP_HOST") + +proc getHttpReferer*(): string = + ## returns contents of the ``HTTP_REFERER`` environment variable + return getenv("HTTP_REFERER") + +proc getHttpUserAgent*(): string = + ## returns contents of the ``HTTP_USER_AGENT`` environment variable + return getenv("HTTP_USER_AGENT") + +proc getPathInfo*(): string = + ## returns contents of the ``PATH_INFO`` environment variable + return getenv("PATH_INFO") + +proc getPathTranslated*(): string = + ## returns contents of the ``PATH_TRANSLATED`` environment variable + return getenv("PATH_TRANSLATED") + +proc getQueryString*(): string = + ## returns contents of the ``QUERY_STRING`` environment variable + return getenv("QUERY_STRING") + +proc getRemoteAddr*(): string = + ## returns contents of the ``REMOTE_ADDR`` environment variable + return getenv("REMOTE_ADDR") + +proc getRemoteHost*(): string = + ## returns contents of the ``REMOTE_HOST`` environment variable + return getenv("REMOTE_HOST") + +proc getRemoteIdent*(): string = + ## returns contents of the ``REMOTE_IDENT`` environment variable + return getenv("REMOTE_IDENT") + +proc getRemotePort*(): string = + ## returns contents of the ``REMOTE_PORT`` environment variable + return getenv("REMOTE_PORT") + +proc getRemoteUser*(): string = + ## returns contents of the ``REMOTE_USER`` environment variable + return getenv("REMOTE_USER") + +proc getRequestMethod*(): string = + ## returns contents of the ``REQUEST_METHOD`` environment variable + return getenv("REQUEST_METHOD") + +proc getRequestURI*(): string = + ## returns contents of the ``REQUEST_URI`` environment variable + return getenv("REQUEST_URI") + +proc getScriptFilename*(): string = + ## returns contents of the ``SCRIPT_FILENAME`` environment variable + return getenv("SCRIPT_FILENAME") + +proc getScriptName*(): string = + ## returns contents of the ``SCRIPT_NAME`` environment variable + return getenv("SCRIPT_NAME") + +proc getServerAddr*(): string = + ## returns contents of the ``SERVER_ADDR`` environment variable + return getenv("SERVER_ADDR") + +proc getServerAdmin*(): string = + ## returns contents of the ``SERVER_ADMIN`` environment variable + return getenv("SERVER_ADMIN") + +proc getServerName*(): string = + ## returns contents of the ``SERVER_NAME`` environment variable + return getenv("SERVER_NAME") + +proc getServerPort*(): string = + ## returns contents of the ``SERVER_PORT`` environment variable + return getenv("SERVER_PORT") + +proc getServerProtocol*(): string = + ## returns contents of the ``SERVER_PROTOCOL`` environment variable + return getenv("SERVER_PROTOCOL") + +proc getServerSignature*(): string = + ## returns contents of the ``SERVER_SIGNATURE`` environment variable + return getenv("SERVER_SIGNATURE") + +proc getServerSoftware*(): string = + ## returns contents of the ``SERVER_SOFTWARE`` environment variable + return getenv("SERVER_SOFTWARE") + +proc setTestData*(keysvalues: openarray[string]) = + ## fills the appropriate environment variables to test your CGI application. + ## This can only simulate the 'GET' request method. `keysvalues` should + ## provide embedded (name, value)-pairs. Example: + ## + ## .. code-block:: Nimrod + ## setTestData("name", "Hanz", "password", "12345") + putenv("REQUEST_METHOD", "GET") + var i = 0 + var query = "" + while i < keysvalues.len: + add(query, URLencode(keysvalues[i])) + add(query, '=') + add(query, URLencode(keysvalues[i+1])) + add(query, '&') + inc(i, 2) + putenv("QUERY_STRING", query) + +proc writeContentType*() = + ## call this before starting to send your HTML data to `stdout`. This + ## implements this part of the CGI protocol: + ## + ## .. code-block:: Nimrod + ## write(stdout, "Content-type: text/html\n\n") + ## + ## It also modifies the debug stack traces so that they contain + ## ``<br />`` and are easily readable in a browser. + write(stdout, "Content-type: text/html\n\n") + system.stackTraceNewLine = "<br />\n" + +proc setCookie*(name, value: string) = + ## Sets a cookie. + write(stdout, "Set-Cookie: ", name, "=", value, "\n") + +var + cookies: PStringTable = nil + +proc parseCookies(s: string): PStringTable = + result = newStringTable(modeCaseInsensitive) + var i = 0 + while true: + while s[i] == ' ' or s[i] == '\t': inc(i) + var keystart = i + while s[i] != '=' and s[i] != '\0': inc(i) + var keyend = i-1 + if s[i] == '\0': break + inc(i) # skip '=' + var valstart = i + while s[i] != ';' and s[i] != '\0': inc(i) + result[copy(s, keystart, keyend)] = copy(s, valstart, i-1) + if s[i] == '\0': break + inc(i) # skip ';' + +proc getCookie*(name: string): string = + ## Gets a cookie. If no cookie of `name` exists, "" is returned. + if cookies == nil: cookies = parseCookies(getHttpCookie()) + result = cookies[name] + +proc existsCookie*(name: string): bool = + ## Checks if a cookie of `name` exists. + if cookies == nil: cookies = parseCookies(getHttpCookie()) + result = hasKey(cookies) + + diff --git a/nimlib/pure/complex.nim b/nimlib/pure/complex.nim new file mode 100755 index 000000000..f50ff4bd0 --- /dev/null +++ b/nimlib/pure/complex.nim @@ -0,0 +1,106 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2006 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + + + +## This module implements complex numbers. + +{.push checks:off, line_dir:off, stack_trace:off, debugger:off.} +# the user does not want to trace a part +# of the standard library! + +import + math + +type + TComplex* = tuple[re, im: float] + ## a complex number, consisting of a real and an imaginary part + +proc `==` *(x, y: TComplex): bool = + ## Compare two complex numbers `x` and `y` for equality. + result = x.re == y.re and x.im == y.im + +proc `+` *(x, y: TComplex): TComplex = + ## Add two complex numbers. + result.re = x.re + y.re + result.im = x.im + y.im + +proc `-` *(x, y: TComplex): TComplex = + ## Subtract two complex numbers. + result.re = x.re - y.re + result.im = x.im - y.im + +proc `-` *(z: TComplex): TComplex = + ## Unary minus for complex numbers. + result.re = -z.re + result.im = -z.im + +proc `/` *(x, y: TComplex): TComplex = + ## Divide `x` by `y`. + var + r, den: float + if abs(y.re) < abs(y.im): + r = y.re / y.im + den = y.im + r * y.re + result.re = (x.re * r + x.im) / den + result.im = (x.im * r - x.re) / den + else: + r = y.im / y.re + den = y.re + r * y.im + result.re = (x.re + r * x.im) / den + result.im = (x.im - r * x.re) / den + +proc `*` *(x, y: TComplex): TComplex = + ## Multiply `x` with `y`. + result.re = x.re * y.re - x.im * y.im + result.im = x.im * y.re + x.re * y.im + +proc abs*(z: TComplex): float = + ## Return the distance from (0,0) to `z`. + + # optimized by checking special cases (sqrt is expensive) + var x, y, temp: float + + x = abs(z.re) + y = abs(z.im) + if x == 0.0: + result = y + elif y == 0.0: + result = x + elif x > y: + temp = y / x + result = x * sqrt(1.0 + temp * temp) + else: + temp = x / y + result = y * sqrt(1.0 + temp * temp) + +proc sqrt*(z: TComplex): TComplex = + ## Square root for a complex number `z`. + var x, y, w, r: float + + if z.re == 0.0 and z.im == 0.0: + result = z + else: + x = abs(z.re) + y = abs(z.im) + if x >= y: + r = y / x + w = sqrt(x) * sqrt(0.5 * (1.0 + sqrt(1.0 + r * r))) + else: + r = x / y + w = sqrt(y) * sqrt(0.5 * (r + sqrt(1.0 + r * r))) + if z.re >= 0.0: + result.re = w + result.im = z.im / (w * 2) + else: + if z.im >= 0.0: result.im = w + else: result.im = -w + result.re = z.im / (c.im + c.im) + +{.pop.} diff --git a/nimlib/pure/dynlib.nim b/nimlib/pure/dynlib.nim new file mode 100755 index 000000000..592073e3d --- /dev/null +++ b/nimlib/pure/dynlib.nim @@ -0,0 +1,84 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements the ability to access symbols from shared +## libraries. On POSIX this uses the ``dlsym`` mechanism, on +## Windows ``LoadLibrary``. + +type + TLibHandle* = pointer ## a handle to a dynamically loaded library + +proc LoadLib*(path: string): TLibHandle + ## loads a library from `path`. Returns nil if the library could not + ## be loaded. + +proc UnloadLib*(lib: TLibHandle) + ## unloads the library `lib` + +proc symAddr*(lib: TLibHandle, name: string): pointer + ## retrieves the address of a procedure/variable from `lib`. Returns nil + ## if the symbol could not be found. + +proc checkedSymAddr*(lib: TLibHandle, name: string): pointer = + ## retrieves the address of a procedure/variable from `lib`. Raises + ## `EInvalidLibrary` if the symbol could not be found. + result = symAddr(lib, name) + if result == nil: + var e: ref EInvalidLibrary + new(e) + e.msg = "could not find symbol: " & name + raise e + +when defined(posix): + # + # ========================================================================= + # This is an implementation based on the dlfcn interface. + # The dlfcn interface is available in Linux, SunOS, Solaris, IRIX, FreeBSD, + # NetBSD, AIX 4.2, HPUX 11, and probably most other Unix flavors, at least + # as an emulation layer on top of native functions. + # ========================================================================= + # + var + RTLD_NOW {.importc: "RTLD_NOW", header: "<dlfcn.h>".}: int + + proc dlclose(lib: TLibHandle) {.importc, header: "<dlfcn.h>".} + proc dlopen(path: CString, mode: int): TLibHandle {. + importc, header: "<dlfcn.h>".} + proc dlsym(lib: TLibHandle, name: cstring): pointer {. + importc, header: "<dlfcn.h>".} + + proc LoadLib(path: string): TLibHandle = return dlopen(path, RTLD_NOW) + proc UnloadLib(lib: TLibHandle) = dlclose(lib) + proc symAddr(lib: TLibHandle, name: string): pointer = + return dlsym(lib, name) + +elif defined(windows) or defined(dos): + # + # ======================================================================= + # Native Windows Implementation + # ======================================================================= + # + type + THINSTANCE {.importc: "HINSTANCE".} = pointer + + proc FreeLibrary(lib: THINSTANCE) {.importc, header: "<windows.h>", stdcall.} + proc winLoadLibrary(path: cstring): THINSTANCE {. + importc: "LoadLibraryA", header: "<windows.h>", stdcall.} + proc GetProcAddress(lib: THINSTANCE, name: cstring): pointer {. + importc: "GetProcAddress", header: "<windows.h>", stdcall.} + + proc LoadLib(path: string): TLibHandle = + result = cast[TLibHandle](winLoadLibrary(path)) + proc UnloadLib(lib: TLibHandle) = FreeLibrary(cast[THINSTANCE](lib)) + + proc symAddr(lib: TLibHandle, name: string): pointer = + result = GetProcAddress(cast[THINSTANCE](lib), name) + +else: + {.error: "no implementation for dynlib".} diff --git a/nimlib/pure/hashes.nim b/nimlib/pure/hashes.nim new file mode 100755 index 000000000..1593119bd --- /dev/null +++ b/nimlib/pure/hashes.nim @@ -0,0 +1,97 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2008 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements efficient computations of hash values for diverse +## Nimrod types. + +import + strutils + +type + THash* = int ## a hash value; hash tables using these values should + ## always have a size of a power of two and can use the ``and`` + ## operator instead of ``mod`` for truncation of the hash value. + +proc concHash(h: THash, val: int): THash {.inline.} = + result = h +% val + result = result +% result shl 10 + result = result xor (result shr 6) + +proc finishHash(h: THash): THash {.inline.} = + result = h +% h shl 3 + result = result xor (result shr 11) + result = result +% result shl 15 + +proc hashData*(Data: Pointer, Size: int): THash = + ## hashes an array of bytes of size `size` + var + h: THash + p: cstring + i, s: int + h = 0 + p = cast[cstring](Data) + i = 0 + s = size + while s > 0: + h = concHash(h, ord(p[i])) + Inc(i) + Dec(s) + result = finishHash(h) + +proc hash*(x: Pointer): THash {.inline.} = + ## efficient hashing of pointers + result = (cast[THash](x)) shr 3 # skip the alignment + +proc hash*(x: int): THash {.inline.} = + ## efficient hashing of integers + result = x + +proc hash*(x: int64): THash {.inline.} = + ## efficient hashing of integers + result = toU32(x) + +proc hash*(x: char): THash {.inline.} = + ## efficient hashing of characters + result = ord(x) + +proc hash*(x: string): THash = + ## efficient hashing of strings + var h: THash + h = 0 + for i in 0..x.len-1: + h = concHash(h, ord(x[i])) + result = finishHash(h) + +proc hashIgnoreStyle*(x: string): THash = + ## efficient hashing of strings; style is ignored + var + h: THash + c: Char + h = 0 + for i in 0..x.len-1: + c = x[i] + if c == '_': + continue # skip _ + if c in {'A'..'Z'}: + c = chr(ord(c) + (ord('a') - ord('A'))) # toLower() + h = concHash(h, ord(c)) + result = finishHash(h) + +proc hashIgnoreCase*(x: string): THash = + ## efficient hashing of strings; case is ignored + var + h: THash + c: Char + h = 0 + for i in 0..x.len-1: + c = x[i] + if c in {'A'..'Z'}: + c = chr(ord(c) + (ord('a') - ord('A'))) # toLower() + h = concHash(h, ord(c)) + result = finishHash(h) diff --git a/nimlib/pure/hashtabs.nim b/nimlib/pure/hashtabs.nim new file mode 100755 index 000000000..68d19d63b --- /dev/null +++ b/nimlib/pure/hashtabs.nim @@ -0,0 +1,163 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## The ``hashtabs`` module implements an efficient generic hash +## table/dictionary data type. + +import + hashes + +const + growthFactor = 2 + startSize = 8 + sham = sizeof(THash)*8-2 # shift amount + mask = 0b11 shl sham + usedSlot = 0b10 shl sham + delSlot = 0b01 shl sham + emptySlot = 0 + +type + TTable*[TKey, TValue] = object + counter: int + data: seq[tuple[key: TKey, val: TValue, h: THash]] + +proc init*(t: var TTable, size = startSize) = + t.counter = 0 + newSeq(t.data, size) + +proc markUsed(h: THash): THash {.inline.} = + return h and not mask or usedSlot + +proc len*(t: TTable): int {.inline.} = + ## returns the number of keys in `t`. + result = t.counter + +proc mustRehash(length, counter: int): bool = + assert(length > counter) + result = (length * 2 < counter * 3) or (length - counter < 4) + +proc nextTry(h, maxHash: THash): THash {.inline.} = + result = ((5 * h) + 1) and maxHash + +template eq(a, b: expr): expr = a == b + +proc rawGet(t: TTable, key: TKey, fullhash: THash): int = + var h = fullhash and high(t.data) + while (t.data[h].h and mask) != 0: + # If it is a deleted entry, the comparison with ``markUsed(fullhash)`` + # fails, so there is no need to check for this explicitely. + if t.data[h].h == markUsed(fullhash) and eq(t.data[h].key, key): return h + h = nextTry(h, high(t.data)) + result = - 1 + +proc `[]`*(t: TTable, key: TKey): TValue = + ## retrieves the value at ``t[key]``. If `key` is not in `t`, + ## `EInvalidValue` is raised. + var index = rawGet(t, key, hash(key)) + if index >= 0: result = t.data[index].val + else: + var e: ref EInvalidValue + new(e) + e.msg = "invalid key: " & $key + raise e + +proc hasKey*(t: TTable, key: TKey): bool = + ## returns true iff `key` is in the table `t`. + result = rawGet(t, key) >= 0 + +proc rawInsert[TKey, TValue]( + data: var seq[tuple[key: TKey, val: TValue, h: THash]], + tup: tuple[key: TKey, val: TValue, h: THash]) = + var h = tup.h and high(data) + while (data[h].h and mask) == usedSlot: h = nextTry(h, high(data)) + data[h] = tup + +proc enlarge(t: var TTable) = + var n: seq[tuple[key: TKey, val: TValue, h: THash]] + newSeq(n, len(t.data) * growthFactor) + for i in 0..high(t.data): + if (t.data[i].h and mask) == usedSlot: rawInsert(n, t.data[i]) + swap(t.data, n) + +proc `[]=`*(t: var TTable, key: TKey, val: TValue) = + ## puts a (key, value)-pair into `t`. + var fullhash = hash(key) + var index = rawGet(t, key, fullhash) + if index >= 0: + t.data[index].val = val + else: + if mustRehash(len(t.data), t.counter): enlarge(t) + rawInsert(t.data, (key, val, markUsed(fullhash))) + inc(t.counter) + +proc add*(t: var TTable, key: TKey, val: TValue) = + ## puts a (key, value)-pair into `t`, but does not check if key already + ## exists. + if mustRehash(len(t.data), t.counter): enlarge(t) + rawInsert(t.data, (key, val, markUsed(hash(key)))) + inc(t.counter) + +proc del*(t: var TTable, key: TKey) = + ## deletes a (key, val)-pair in `t`. + var index = rawGet(t, key) + if index >= 0: + t.data[index].h = delSlot + +proc delAll*(t: var TTable, key: TKey) = + ## deletes all (key, val)-pairs in `t`. + while true: + var index = rawGet(t, key) + if index < 0: break + t.data[index].h = delSlot + +iterator pairs*(t: TTable): tuple[key: TKey, value: TValue] = + ## iterates over any (key, value) pair in the table `t`. + for h in 0..high(t.data): + if (t.data[h].h and mask) == usedSlot: + yield (t.data[h].key, t.data[h].val) + +iterator keys*(t: TTable): TKey = + ## iterate over any key in the table `t`. If key occurs multiple times, it + ## is yielded multiple times. + for h in 0..high(t.data): + if (t.data[h].h and mask) == usedSlot: + yield t.data[h].key + +iterator values*(t: TTable): TValue = + ## iterate over any value in the table `t`. + for h in 0..high(t.data): + if (t.data[h].h and mask) == usedSlot: + yield t.data[h].val + +iterator values*(t: TTable, key: TKey): TValue = + ## iterate over any value associated with `key` in `t`. + var fullhash = hash(key) + var h = fullhash and high(t.data) + while (t.data[h].h and mask) != 0: + # If it is a deleted entry, the comparison with ``markUsed(fullhash)`` + # fails, so there is no need to check for this explicitely. + if t.data[h].h == markUsed(fullhash) and eq(t.data[h].key, key): + yield t.data[h].val + h = nextTry(h, high(t.data)) + +proc `$`*[KeyToStr=`$`, ValueToStr=`$`](t: TTable): string = + ## turns the table into its string representation. `$` must be available + ## for TKey and TValue for this to work. + if t.len == 0: + result = "{:}" + else: + result = "{" + var i = 0 + for k, v in pairs(t): + if i > 0: add(result, ", ") + add(result, KeyToStr(k)) + add(result, ": ") + add(result, ValueToStr(v)) + inc(i) + add(result, "}") diff --git a/nimlib/pure/lexbase.nim b/nimlib/pure/lexbase.nim new file mode 100755 index 000000000..bb207e92a --- /dev/null +++ b/nimlib/pure/lexbase.nim @@ -0,0 +1,166 @@ +# +# +# The Nimrod Compiler +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a base object of a lexer with efficient buffer +## handling. Only at line endings checks are necessary if the buffer +## needs refilling. + +import + strutils, streams + +const + EndOfFile* = '\0' ## end of file marker + NewLines* = {'\c', '\L'} + +# Buffer handling: +# buf: +# "Example Text\n ha!" bufLen = 17 +# ^pos = 0 ^ sentinel = 12 +# + +type + TBaseLexer* = object of TObject ## the base lexer. Inherit your lexer from + ## this object. + bufpos*: int ## the current position within the buffer + buf*: cstring ## the buffer itself + bufLen*: int ## length of buffer in characters + input: PStream ## the input stream + LineNumber*: int ## the current line number + sentinel: int + lineStart: int # index of last line start in buffer + fileOpened: bool + +proc open*(L: var TBaseLexer, input: PStream, bufLen: int = 8192) + ## inits the TBaseLexer with a stream to read from + +proc close*(L: var TBaseLexer) + ## closes the base lexer. This closes `L`'s associated stream too. + +proc getCurrentLine*(L: TBaseLexer, marker: bool = true): string + ## retrieves the current line. + +proc getColNumber*(L: TBaseLexer, pos: int): int + ## retrieves the current column. + +proc HandleCR*(L: var TBaseLexer, pos: int): int + ## Call this if you scanned over '\c' in the buffer; it returns the the + ## position to continue the scanning from. `pos` must be the position + ## of the '\c'. +proc HandleLF*(L: var TBaseLexer, pos: int): int + ## Call this if you scanned over '\L' in the buffer; it returns the the + ## position to continue the scanning from. `pos` must be the position + ## of the '\L'. + +# implementation + +const + chrSize = sizeof(char) + +proc close(L: var TBaseLexer) = + dealloc(L.buf) + L.input.close(L.input) + +proc FillBuffer(L: var TBaseLexer) = + var + charsRead, toCopy, s: int # all are in characters, + # not bytes (in case this + # is not the same) + oldBufLen: int + # we know here that pos == L.sentinel, but not if this proc + # is called the first time by initBaseLexer() + assert(L.sentinel < L.bufLen) + toCopy = L.BufLen - L.sentinel - 1 + assert(toCopy >= 0) + if toCopy > 0: + MoveMem(L.buf, addr(L.buf[L.sentinel + 1]), toCopy * chrSize) # "moveMem" handles overlapping regions + charsRead = L.input.readData(L.input, addr(L.buf[toCopy]), + (L.sentinel + 1) * chrSize) div chrSize + s = toCopy + charsRead + if charsRead < L.sentinel + 1: + L.buf[s] = EndOfFile # set end marker + L.sentinel = s + else: + # compute sentinel: + dec(s) # BUGFIX (valgrind) + while true: + assert(s < L.bufLen) + while (s >= 0) and not (L.buf[s] in NewLines): Dec(s) + if s >= 0: + # we found an appropriate character for a sentinel: + L.sentinel = s + break + else: + # rather than to give up here because the line is too long, + # double the buffer's size and try again: + oldBufLen = L.BufLen + L.bufLen = L.BufLen * 2 + L.buf = cast[cstring](realloc(L.buf, L.bufLen * chrSize)) + assert(L.bufLen - oldBuflen == oldBufLen) + charsRead = L.input.ReadData(L.input, addr(L.buf[oldBufLen]), + oldBufLen * chrSize) div chrSize + if charsRead < oldBufLen: + L.buf[oldBufLen + charsRead] = EndOfFile + L.sentinel = oldBufLen + charsRead + break + s = L.bufLen - 1 + +proc fillBaseLexer(L: var TBaseLexer, pos: int): int = + assert(pos <= L.sentinel) + if pos < L.sentinel: + result = pos + 1 # nothing to do + else: + fillBuffer(L) + L.bufpos = 0 # XXX: is this really correct? + result = 0 + L.lineStart = result + +proc HandleCR(L: var TBaseLexer, pos: int): int = + assert(L.buf[pos] == '\c') + inc(L.linenumber) + result = fillBaseLexer(L, pos) + if L.buf[result] == '\L': + result = fillBaseLexer(L, result) + +proc HandleLF(L: var TBaseLexer, pos: int): int = + assert(L.buf[pos] == '\L') + inc(L.linenumber) + result = fillBaseLexer(L, pos) #L.lastNL := result-1; // BUGFIX: was: result; + +proc skip_UTF_8_BOM(L: var TBaseLexer) = + if (L.buf[0] == '\xEF') and (L.buf[1] == '\xBB') and (L.buf[2] == '\xBF'): + inc(L.bufpos, 3) + inc(L.lineStart, 3) + +proc open(L: var TBaseLexer, input: PStream, bufLen: int = 8192) = + assert(bufLen > 0) + assert(input != nil) + L.input = input + L.bufpos = 0 + L.bufLen = bufLen + L.buf = cast[cstring](alloc(bufLen * chrSize)) + L.sentinel = bufLen - 1 + L.lineStart = 0 + L.linenumber = 1 # lines start at 1 + fillBuffer(L) + skip_UTF_8_BOM(L) + +proc getColNumber(L: TBaseLexer, pos: int): int = + result = abs(pos - L.lineStart) + +proc getCurrentLine(L: TBaseLexer, marker: bool = true): string = + var i: int + result = "" + i = L.lineStart + while not (L.buf[i] in {'\c', '\L', EndOfFile}): + add(result, L.buf[i]) + inc(i) + add(result, "\n") + if marker: + add(result, RepeatChar(getColNumber(L, L.bufpos)) & "^\n") + diff --git a/nimlib/pure/logging.nim b/nimlib/pure/logging.nim new file mode 100755 index 000000000..6df39f50b --- /dev/null +++ b/nimlib/pure/logging.nim @@ -0,0 +1,146 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a simple logger. It is based on the following design: +## * Runtime log formating is a bug: Sooner or later ever log file is parsed. +## * Keep it simple: If this library does not fullfill your needs, write your +## own. Trying to support every logging feature just leads to bloat. +## +## Format is:: +## +## DEBUG|INFO|... (2009-11-02 00:00:00)? (Component: )? Message +## +## + +type + TLevel* = enum ## logging level + lvlAll, ## all levels active + lvlDebug, ## debug level (and any above) active + lvlInfo, ## info level (and any above) active + lvlWarn, ## warn level (and any above) active + lvlError, ## error level (and any above) active + lvlFatal ## fatal level (and any above) active + +const + LevelNames*: array [TLevel, string] = [ + "DEBUG", "DEBUG", "INFO", "WARN", "ERROR", "FATAL" + ] + +type + TLogger* = object of TObject ## abstract logger; the base type of all loggers + levelThreshold*: TLevel ## only messages of level >= levelThreshold + ## should be processed + TConsoleLogger* = object of TLogger ## logger that writes the messages to the + ## console + + TFileLogger* = object of TLogger ## logger that writes the messages to a file + f: TFile + + TRollingFileLogger* = object of + TFileLogger ## logger that writes the message to a file + maxlines: int # maximum number of lines + lines: seq[string] + +method log*(L: ref TLogger, level: TLevel, + frmt: string, args: openArray[string]) = + ## override this method in custom loggers. Default implementation does + ## nothing. + nil + +method log*(L: ref TConsoleLogger, level: TLevel, + frmt: string, args: openArray[string]) = + Writeln(stdout, LevelNames[level], " ", frmt % args) + +method log*(L: ref TFileLogger, level: TLevel, + frmt: string, args: openArray[string]) = + Writeln(L.f, LevelNames[level], " ", frmt % args) + +proc defaultFilename*(): string = + ## returns the default filename for a logger + var (path, name, ext) = splitFile(getApplicationFilename()) + result = changeFileExt(path / name & "_" & getDateStr(), "log") + +proc substituteLog*(frmt: string): string = + ## converts $date to the current date + ## converts $time to the current time + ## converts $app to getApplicationFilename() + ## converts + result = "" + var i = 0 + while i < frmt.len: + if frmt[i] != '$': + result.add(frmt[i]) + inc(i) + else: + inc(i) + var v = "" + var app = getApplicationFilename() + while frmt[i] in IdentChars: + v.add(toLower(frmt[i])) + inc(i) + case v + of "date": result.add(getDateStr()) + of "time": result.add(getClockStr()) + of "app": result.add(app) + of "appdir": result.add(app.splitFile.dir) + of "appname": result.add(app.splitFile.name) + + +proc newFileLogger(filename = defaultFilename(), + mode: TFileMode = fmAppend, + levelThreshold = lvlNone): ref TFileLogger = + new(result) + result.levelThreshold = levelThreshold + if not open(result.f, filename, mode): + raiseException(EIO, "cannot open for writing: " & filename) + +proc newRollingFileLogger(filename = defaultFilename(), + mode: TFileMode = fmAppend, + levelThreshold = lvlNone, + maxLines = 1000): ref TFileLogger = + new(result) + result.levelThreshold = levelThreshold + result.maxLines = maxLines + if not open(result.f, filename, mode): + raiseException(EIO, "cannot open for writing: " & filename) + +var + level* = lvlNone + handlers*: seq[ref TLogger] = @[] + +proc logLoop(level: TLevel, msg: string) = + for logger in items(handlers): + if level >= logger.levelThreshold: + log(logger, level, msg) + +template log*(level: TLevel, msg: string) = + ## logs a message of the given level + if level >= logging.Level: + (bind logLoop)(level, frmt, args) + +template debug*(msg: string) = + ## logs a debug message + log(lvlDebug, msg) + +template info*(msg: string) = + ## logs an info message + log(lvlInfo, msg) + +template warn*(msg: string) = + ## logs a warning message + log(lvlWarn, msg) + +template error*(msg: string) = + ## logs an error message + log(lvlError, msg) + +template fatal*(msg: string) = + ## logs a fatal error message and calls ``quit(msg)`` + log(lvlFatal, msg) + diff --git a/nimlib/pure/macros.nim b/nimlib/pure/macros.nim new file mode 100755 index 000000000..677469ed2 --- /dev/null +++ b/nimlib/pure/macros.nim @@ -0,0 +1,249 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + + +## This module contains the interface to the compiler's abstract syntax +## tree (`AST`:idx:). Macros operate on this tree. + +## .. include:: ../doc/astspec.txt + +#[[[cog +#def toEnum(name, elems): +# body = "" +# counter = 0 +# for e in elems: +# if counter % 4 == 0: p = "\n " +# else: p = "" +# body = body + p + 'n' + e + ', ' +# counter = counter + 1 +# +# return (" TNimrod%s* = enum%s\n TNim%ss* = set[TNimrod%s]\n" % +# (name, body[:-2], name, name)) +# +#enums = eval(open("data/ast.yml").read()) +#cog.out("type\n") +#for key, val in enums.items(): +# if key[-4:] == "Flag": continue +# cog.out(toEnum(key, val)) +#]]] +type + TNimrodNodeKind* = enum + nnkNone, nnkEmpty, nnkIdent, nnkSym, + nnkType, nnkCharLit, nnkIntLit, nnkInt8Lit, + nnkInt16Lit, nnkInt32Lit, nnkInt64Lit, nnkFloatLit, + nnkFloat32Lit, nnkFloat64Lit, nnkStrLit, nnkRStrLit, + nnkTripleStrLit, nnkMetaNode, nnkNilLit, nnkDotCall, + nnkCommand, nnkCall, nnkCallStrLit, nnkExprEqExpr, + nnkExprColonExpr, nnkIdentDefs, nnkVarTuple, nnkInfix, + nnkPrefix, nnkPostfix, nnkPar, nnkCurly, + nnkBracket, nnkBracketExpr, nnkPragmaExpr, nnkRange, + nnkDotExpr, nnkCheckedFieldExpr, nnkDerefExpr, nnkIfExpr, + nnkElifExpr, nnkElseExpr, nnkLambda, nnkAccQuoted, + nnkTableConstr, nnkBind, nnkSymChoice, nnkHiddenStdConv, + nnkHiddenSubConv, nnkHiddenCallConv, nnkConv, nnkCast, + nnkAddr, nnkHiddenAddr, nnkHiddenDeref, nnkObjDownConv, + nnkObjUpConv, nnkChckRangeF, nnkChckRange64, nnkChckRange, + nnkStringToCString, nnkCStringToString, nnkPassAsOpenArray, nnkAsgn, + nnkFastAsgn, nnkGenericParams, nnkFormalParams, nnkOfInherit, + nnkModule, nnkProcDef, nnkMethodDef, nnkConverterDef, + nnkMacroDef, nnkTemplateDef, nnkIteratorDef, nnkOfBranch, + nnkElifBranch, nnkExceptBranch, nnkElse, nnkMacroStmt, + nnkAsmStmt, nnkPragma, nnkIfStmt, nnkWhenStmt, + nnkForStmt, nnkWhileStmt, nnkCaseStmt, nnkVarSection, + nnkConstSection, nnkConstDef, nnkTypeSection, nnkTypeDef, + nnkYieldStmt, nnkTryStmt, nnkFinally, nnkRaiseStmt, + nnkReturnStmt, nnkBreakStmt, nnkContinueStmt, nnkBlockStmt, + nnkDiscardStmt, nnkStmtList, nnkImportStmt, nnkFromStmt, + nnkIncludeStmt, nnkCommentStmt, nnkStmtListExpr, nnkBlockExpr, + nnkStmtListType, nnkBlockType, nnkTypeOfExpr, nnkObjectTy, + nnkTupleTy, nnkRecList, nnkRecCase, nnkRecWhen, + nnkRefTy, nnkPtrTy, nnkVarTy, nnkDistinctTy, + nnkProcTy, nnkEnumTy, nnkEnumFieldDef, nnkReturnToken + TNimNodeKinds* = set[TNimrodNodeKind] + TNimrodTypeKind* = enum + ntyNone, ntyBool, ntyChar, ntyEmpty, + ntyArrayConstr, ntyNil, ntyExpr, ntyStmt, + ntyTypeDesc, ntyGenericInvokation, ntyGenericBody, ntyGenericInst, + ntyGenericParam, ntyDistinct, ntyEnum, ntyOrdinal, + ntyArray, ntyObject, ntyTuple, ntySet, + ntyRange, ntyPtr, ntyRef, ntyVar, + ntySequence, ntyProc, ntyPointer, ntyOpenArray, + ntyString, ntyCString, ntyForward, ntyInt, + ntyInt8, ntyInt16, ntyInt32, ntyInt64, + ntyFloat, ntyFloat32, ntyFloat64, ntyFloat128 + TNimTypeKinds* = set[TNimrodTypeKind] + TNimrodSymKind* = enum + nskUnknown, nskConditional, nskDynLib, nskParam, + nskGenericParam, nskTemp, nskType, nskConst, + nskVar, nskProc, nskMethod, nskIterator, + nskConverter, nskMacro, nskTemplate, nskField, + nskEnumField, nskForVar, nskModule, nskLabel, + nskStub + TNimSymKinds* = set[TNimrodSymKind] +#[[[end]]] + +type + TNimrodIdent* = object of TObject + ## represents a Nimrod identifier in the AST + + TNimrodSymbol {.final.} = object # hidden + TNimrodType {.final.} = object # hidden + + PNimrodType* {.compilerproc.} = ref TNimrodType + ## represents a Nimrod type in the compiler; currently this is not very + ## useful as there is no API to deal with Nimrod types. + + PNimrodSymbol* {.compilerproc.} = ref TNimrodSymbol + ## represents a Nimrod *symbol* in the compiler; a *symbol* is a looked-up + ## *ident*. + + PNimrodNode* = expr + ## represents a Nimrod AST node. Macros operate on this type. + +# Nodes should be reference counted to make the `copy` operation very fast! +# However, this is difficult to achieve: modify(n[0][1]) should propagate to +# its father. How to do this without back references? + +proc `[]`* (n: PNimrodNode, i: int): PNimrodNode {.magic: "NChild".} + ## get `n`'s `i`'th child. + +proc `[]=`* (n: PNimrodNode, i: int, child: PNimrodNode) {.magic: "NSetChild".} + ## set `n`'s `i`'th child to `child`. + +proc `!` *(s: string): TNimrodIdent {.magic: "StrToIdent".} + ## constructs an identifier from the string `s` + +proc `$`*(i: TNimrodIdent): string {.magic: "IdentToStr".} + ## converts a Nimrod identifier to a string + +proc `==`* (a, b: TNimrodIdent): bool {.magic: "EqIdent", noSideEffect.} + ## compares two Nimrod identifiers + +proc `==`* (a, b: PNimrodNode): bool {.magic: "EqNimrodNode", noSideEffect.} + ## compares two Nimrod nodes + +proc len*(n: PNimrodNode): int {.magic: "NLen".} + ## returns the number of children of `n`. + +proc add*(father, child: PNimrodNode) {.magic: "NAdd".} + ## adds the `child` to the `father` node + +proc add*(father: PNimrodNode, children: openArray[PNimrodNode]) {. + magic: "NAddMultiple".} + ## adds each child of `children` to the `father` node + +proc del*(father: PNimrodNode, idx = 0, n = 1) {.magic: "NDel".} + ## deletes `n` children of `father` starting at index `idx`. + +proc kind*(n: PNimrodNode): TNimrodNodeKind {.magic: "NKind".} + ## returns the `kind` of the node `n`. + +proc intVal*(n: PNimrodNode): biggestInt {.magic: "NIntVal".} +proc floatVal*(n: PNimrodNode): biggestFloat {.magic: "NFloatVal".} +proc symbol*(n: PNimrodNode): PNimrodSymbol {.magic: "NSymbol".} +proc ident*(n: PNimrodNode): TNimrodIdent {.magic: "NIdent".} +proc typ*(n: PNimrodNode): PNimrodType {.magic: "NGetType".} +proc strVal*(n: PNimrodNode): string {.magic: "NStrVal".} + +proc `intVal=`*(n: PNimrodNode, val: biggestInt) {.magic: "NSetIntVal".} +proc `floatVal=`*(n: PNimrodNode, val: biggestFloat) {.magic: "NSetFloatVal".} +proc `symbol=`*(n: PNimrodNode, val: PNimrodSymbol) {.magic: "NSetSymbol".} +proc `ident=`*(n: PNimrodNode, val: TNimrodIdent) {.magic: "NSetIdent".} +proc `typ=`*(n: PNimrodNode, typ: PNimrodType) {.magic: "NSetType".} +proc `strVal=`*(n: PNimrodNode, val: string) {.magic: "NSetStrVal".} + +proc newNimNode*(kind: TNimrodNodeKind, + n: PNimrodNode=nil): PNimrodNode {.magic: "NNewNimNode".} + +proc copyNimNode*(n: PNimrodNode): PNimrodNode {.magic: "NCopyNimNode".} +proc copyNimTree*(n: PNimrodNode): PNimrodNode {.magic: "NCopyNimTree".} + +proc error*(msg: string) {.magic: "NError".} + ## writes an error message at compile time + +proc warning*(msg: string) {.magic: "NWarning".} + ## writes a warning message at compile time + +proc hint*(msg: string) {.magic: "NHint".} + ## writes a hint message at compile time + +proc newStrLitNode*(s: string): PNimrodNode {.compileTime.} = + ## creates a string literal node from `s` + result = newNimNode(nnkStrLit) + result.strVal = s + +proc newIntLitNode*(i: biggestInt): PNimrodNode {.compileTime.} = + ## creates a int literal node from `i` + result = newNimNode(nnkIntLit) + result.intVal = i + +proc newFloatLitNode*(f: biggestFloat): PNimrodNode {.compileTime.} = + ## creates a float literal node from `f` + result = newNimNode(nnkFloatLit) + result.floatVal = f + +proc newIdentNode*(i: TNimrodIdent): PNimrodNode {.compileTime.} = + ## creates an identifier node from `i` + result = newNimNode(nnkIdent) + result.ident = i + +proc newIdentNode*(i: string): PNimrodNode {.compileTime.} = + ## creates an identifier node from `i` + result = newNimNode(nnkIdent) + result.ident = !i + +proc toStrLit*(n: PNimrodNode): PNimrodNode {.compileTime.} = + ## converts the AST `n` to the concrete Nimrod code and wraps that + ## in a string literal node + return newStrLitNode(repr(n)) + +proc expectKind*(n: PNimrodNode, k: TNimrodNodeKind) {.compileTime.} = + ## checks that `n` is of kind `k`. If this is not the case, + ## compilation aborts with an error message. This is useful for writing + ## macros that check the AST that is passed to them. + if n.kind != k: error("macro expects a node of kind: " & repr(k)) + +proc expectMinLen*(n: PNimrodNode, min: int) {.compileTime.} = + ## checks that `n` has at least `min` children. If this is not the case, + ## compilation aborts with an error message. This is useful for writing + ## macros that check its number of arguments. + if n.len < min: error("macro expects a node with " & $min & " children") + +proc expectLen*(n: PNimrodNode, len: int) {.compileTime.} = + ## checks that `n` has exactly `len` children. If this is not the case, + ## compilation aborts with an error message. This is useful for writing + ## macros that check its number of arguments. + if n.len != len: error("macro expects a node with " & $len & " children") + +proc newCall*(theProc: TNimrodIdent, + args: openArray[PNimrodNode]): PNimrodNode {.compileTime.} = + ## produces a new call node. `theProc` is the proc that is called with + ## the arguments ``args[0..]``. + result = newNimNode(nnkCall) + result.add(newIdentNode(theProc)) + result.add(args) + +proc newCall*(theProc: string, + args: openArray[PNimrodNode]): PNimrodNode {.compileTime.} = + ## produces a new call node. `theProc` is the proc that is called with + ## the arguments ``args[0..]``. + result = newNimNode(nnkCall) + result.add(newIdentNode(theProc)) + result.add(args) + +proc nestList*(theProc: TNimrodIdent, + x: PNimrodNode): PNimrodNode {.compileTime.} = + ## nests the list `x` into a tree of call expressions: + ## ``[a, b, c]`` is transformed into ``theProc(a, theProc(c, d))`` + var L = x.len + result = newCall(theProc, x[L-2], x[L-1]) + var a = result + for i in countdown(L-3, 0): + a = newCall(theProc, x[i], copyNimTree(a)) + diff --git a/nimlib/pure/math.nim b/nimlib/pure/math.nim new file mode 100755 index 000000000..bca45894c --- /dev/null +++ b/nimlib/pure/math.nim @@ -0,0 +1,249 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Basic math routines for Nimrod. +## This module is available for the ECMAScript target. + +{.push debugger:off .} # the user does not want to trace a part + # of the standard library! + +{.push checks:off, line_dir:off, stack_trace:off.} + +when defined(Posix): + {.passl: "-lm".} + +const + PI* = 3.1415926535897932384626433 ## the circle constant PI (Ludolph's number) + E* = 2.71828182845904523536028747 ## Euler's number + +type + TFloatClass* = enum ## describes the class a floating point value belongs to. + ## This is the type that is returned by `classify`. + fcNormal, ## value is an ordinary nonzero floating point value + fcSubnormal, ## value is a subnormal (a very small) floating point value + fcZero, ## value is zero + fcNegZero, ## value is the negative zero + fcNan, ## value is Not-A-Number (NAN) + fcInf, ## value is positive infinity + fcNegInf ## value is negative infinity + +proc classify*(x: float): TFloatClass = + ## classifies a floating point value. Returns `x`'s class as specified by + ## `TFloatClass`. + + # ECMAScript and most C compilers have no classify: + if x == 0.0: + if 1.0/x == Inf: + return fcZero + else: + return fcNegZero + if x*0.5 == x: + if x > 0.0: return fcInf + else: return fcNegInf + if x != x: return fcNan + return fcNormal + # XXX: fcSubnormal is not detected! + + +proc binom*(n, k: int): int {.noSideEffect.} = + ## computes the binomial coefficient + if k <= 0: return 1 + if 2*k > n: return binom(n, n-k) + result = n + for i in countup(2, k): + result = (result * (n + 1 - i)) div i + +proc fac*(n: int): int {.noSideEffect.} = + ## computes the faculty function + result = 1 + for i in countup(2, n): + result = result * i + +proc isPowerOfTwo*(x: int): bool {.noSideEffect.} = + ## returns true, if x is a power of two, false otherwise. + ## Negative numbers are not a power of two. + return (x and -x) == x + +proc nextPowerOfTwo*(x: int): int = + ## returns the nearest power of two, so that + ## result**2 >= x > (result-1)**2. + result = x - 1 + when defined(cpu64): + result = result or (result shr 32) + result = result or (result shr 16) + result = result or (result shr 8) + result = result or (result shr 4) + result = result or (result shr 2) + result = result or (result shr 1) + Inc(result) + +proc countBits32*(n: int32): int {.noSideEffect.} = + ## counts the set bits in `n`. + var v = n + v = v -% ((v shr 1'i32) and 0x55555555'i32) + v = (v and 0x33333333'i32) +% ((v shr 2'i32) and 0x33333333'i32) + result = ((v +% (v shr 4'i32) and 0xF0F0F0F'i32) *% 0x1010101'i32) shr 24'i32 + +proc sum*[T](x: openarray[T]): T {.noSideEffect.} = + ## computes the sum of the elements in `x`. + ## If `x` is empty, 0 is returned. + for i in items(x): result = result + i + +proc mean*(x: openarray[float]): float {.noSideEffect.} = + ## computes the mean of the elements in `x`. + ## If `x` is empty, NaN is returned. + result = sum(x) / toFloat(len(x)) + +proc variance*(x: openarray[float]): float {.noSideEffect.} = + ## computes the mean of the elements in `x`. + ## If `x` is empty, NaN is returned. + result = 0.0 + var m = mean(x) + for i in 0 .. high(x): + var diff = x[i] - m + result = result + diff*diff + result = result / toFloat(len(x)) + +when not defined(ECMAScript): + proc random*(max: int): int + ## returns a random number in the range 0..max-1. The sequence of + ## random number is always the same, unless `randomize` is called + ## which initializes the random number generator with a "random" + ## number, i.e. a tickcount. + proc randomize*() + ## initializes the random number generator with a "random" + ## number, i.e. a tickcount. Note: Does nothing for the ECMAScript target, + ## as ECMAScript does not support this. + + proc sqrt*(x: float): float {.importc: "sqrt", header: "<math.h>".} + ## computes the square root of `x`. + + proc ln*(x: float): float {.importc: "log", header: "<math.h>".} + ## computes ln(x). + proc log10*(x: float): float {.importc: "log10", header: "<math.h>".} + proc log2*(x: float): float = return ln(x) / ln(2.0) + proc exp*(x: float): float {.importc: "exp", header: "<math.h>".} + ## computes e**x. + + proc frexp*(x: float, exponent: var int): float {. + importc: "frexp", header: "<math.h>".} + ## Split a number into mantissa and exponent. + ## `frexp` calculates the mantissa m (a float greater than or equal to 0.5 + ## and less than 1) and the integer value n such that `x` (the original + ## float value) equals m * 2**n. frexp stores n in `exponent` and returns + ## m. + + proc round*(x: float): int {.importc: "lrint", nodecl.} + ## converts a float to an int by rounding. + + proc arccos*(x: float): float {.importc: "acos", header: "<math.h>".} + proc arcsin*(x: float): float {.importc: "asin", header: "<math.h>".} + proc arctan*(x: float): float {.importc: "atan", header: "<math.h>".} + proc arctan2*(y, x: float): float {.importc: "atan2", header: "<math.h>".} + ## Calculate the arc tangent of `y` / `x`. + ## `atan2` returns the arc tangent of `y` / `x`; it produces correct + ## results even when the resulting angle is near pi/2 or -pi/2 + ## (`x` near 0). + + proc cos*(x: float): float {.importc: "cos", header: "<math.h>".} + proc cosh*(x: float): float {.importc: "cosh", header: "<math.h>".} + proc hypot*(x, y: float): float {.importc: "hypot", header: "<math.h>".} + ## same as ``sqrt(x*x + y*y)``. + + proc sinh*(x: float): float {.importc: "sinh", header: "<math.h>".} + proc tan*(x: float): float {.importc: "tan", header: "<math.h>".} + proc tanh*(x: float): float {.importc: "tanh", header: "<math.h>".} + proc pow*(x, y: float): float {.importc: "pow", header: "<math.h>".} + ## computes x to power raised of y. + + # C procs: + proc gettime(dummy: ptr cint): cint {.importc: "time", header: "<time.h>".} + proc srand(seed: cint) {.importc: "srand", nodecl.} + proc rand(): cint {.importc: "rand", nodecl.} + + proc randomize() = srand(gettime(nil)) + proc random(max: int): int = return int(rand()) mod max + +else: + proc mathrandom(): float {.importc: "Math.random", nodecl.} + proc mathfloor(x: float): float {.importc: "Math.floor", nodecl.} + proc random*(max: int): int = return mathfloor(mathrandom() * max) + proc randomize*() = nil + + proc sqrt*(x: float): float {.importc: "Math.sqrt", nodecl.} + proc ln*(x: float): float {.importc: "Math.log", nodecl.} + proc log10*(x: float): float = return ln(x) / ln(10.0) + proc log2*(x: float): float = return ln(x) / ln(2.0) + + proc exp*(x: float): float {.importc: "Math.exp", nodecl.} + proc round*(x: float): int {.importc: "Math.round", nodecl.} + proc pow*(x, y: float): float {.importc: "Math.pow", nodecl.} + + proc frexp*(x: float, exponent: var int): float = + if x == 0.0: + exponent = 0.0 + result = 0.0 + elif x < 0.0: + result = -frexp(-x, exponent) + else: + var ex = mathfloor(log2(x)) + exponent = round(ex) + result = x / pow(2.0, ex) + + proc arccos*(x: float): float {.importc: "Math.acos", nodecl.} + proc arcsin*(x: float): float {.importc: "Math.asin", nodecl.} + proc arctan*(x: float): float {.importc: "Math.atan", nodecl.} + proc arctan2*(y, x: float): float {.importc: "Math.atan2", nodecl.} + + proc cos*(x: float): float {.importc: "Math.cos", nodecl.} + proc cosh*(x: float): float = return (exp(x)+exp(-x))*0.5 + proc hypot*(x, y: float): float = return sqrt(x*x + y*y) + proc sinh*(x: float): float = return (exp(x)-exp(-x))*0.5 + proc tan*(x: float): float {.importc: "Math.tan", nodecl.} + proc tanh*(x: float): float = + var y = exp(2.0*x) + return (y-1.0)/(y+1.0) + + +type + TRunningStat* = object ## an accumulator for statistical data + n*: int ## number of pushed data + sum*, min*, max*, mean*: float ## self-explaining + oldM, oldS, newS: float + +proc push*(s: var TRunningStat, x: float) = + ## pushes a value `x` for processing + inc(s.n) + # See Knuth TAOCP vol 2, 3rd edition, page 232 + if s.n == 1: + s.oldM = x + s.mean = x + s.oldS = 0.0 + else: + s.mean = s.oldM + (x - s.oldM)/toFloat(s.n) + s.newS = s.oldS + (x - s.oldM)*(x - s.mean) + + # set up for next iteration: + s.oldM = s.mean + s.oldS = s.newS + + s.sum = s.sum + x + if s.min > x: s.min = x + if s.max < x: s.max = x + +proc variance*(s: TRunningStat): float = + ## computes the current variance of `s` + if s.n > 1: result = s.newS / (toFloat(s.n - 1)) + +proc standardDeviation*(s: TRunningStat): float = + ## computes the current standard deviation of `s` + result = sqrt(variance(s)) + +{.pop.} +{.pop.} diff --git a/nimlib/pure/md5.nim b/nimlib/pure/md5.nim new file mode 100755 index 000000000..d9bb92949 --- /dev/null +++ b/nimlib/pure/md5.nim @@ -0,0 +1,245 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Module for computing MD5 checksums. + +type + MD5State = array[0..3, int32] + MD5Block = array[0..15, int32] + MD5CBits = array[0..7, int8] + MD5Digest* = array[0..15, int8] + MD5Buffer = array[0..63, int8] + MD5Context* {.final.} = object + State: MD5State + Count: array[0..1, int32] + Buffer: MD5Buffer + +const + padding: cstring = "\x80\0\0\0" & + "\0\0\0\0\0\0\0\0" & + "\0\0\0\0\0\0\0\0" & + "\0\0\0\0\0\0\0\0" & + "\0\0\0\0\0\0\0\0" & + "\0\0\0\0\0\0\0\0" & + "\0\0\0\0\0\0\0\0" & + "\0\0\0\0\0\0\0\0" & + "\0\0\0\0" + +proc F(x, y, z: int32): int32 {.inline.} = + Result = (x and y) or ((not x) and z) + +proc G(x, y, z: int32): int32 {.inline.} = + Result = (x and z) or (y and (not z)) + +proc H(x, y, z: int32): int32 {.inline.} = + Result = x xor y xor z + +proc I(x, y, z: int32): int32 {.inline.} = + Result = y xor (x or (not z)) + +proc rot(x: var int32, n: int8) {.inline.} = + x = toU32(x shl ze(n)) or (x shr toU32(32 -% ze(n))) + +proc FF(a: var int32, b, c, d, x: int32, s: int8, ac: int32) = + a = a +% F(b, c, d) +% x +% ac + rot(a, s) + a = a +% b + +proc GG(a: var int32, b, c, d, x: int32, s: int8, ac: int32) = + a = a +% G(b, c, d) +% x +% ac + rot(a, s) + a = a +% b + +proc HH(a: var int32, b, c, d, x: int32, s: int8, ac: int32) = + a = a +% H(b, c, d) +% x +% ac + rot(a, s) + a = a +% b + +proc II(a: var int32, b, c, d, x: int32, s: int8, ac: int32) = + a = a +% I(b, c, d) +% x +% ac + rot(a, s) + a = a +% b + +proc encode(dest: var MD5Block, src: cstring) = + var j = 0 + for i in 0..high(dest): + dest[i] = toU32(ord(src[j]) or + ord(src[j+1]) shl 8 or + ord(src[j+2]) shl 16 or + ord(src[j+3]) shl 24) + inc(j, 4) + +proc decode(dest: var openarray[int8], src: openarray[int32]) = + var i = 0 + for j in 0..high(src): + dest[i] = toU8(src[j] and 0xff'i32) + dest[i+1] = toU8(src[j] shr 8'i32 and 0xff'i32) + dest[i+2] = toU8(src[j] shr 16'i32 and 0xff'i32) + dest[i+3] = toU8(src[j] shr 24'i32 and 0xff'i32) + inc(i, 4) + +proc transform(Buffer: pointer, State: var MD5State) = + var + myBlock: MD5Block + encode(myBlock, cast[cstring](buffer)) + var a = State[0] + var b = State[1] + var c = State[2] + var d = State[3] + FF(a, b, c, d, myBlock[0], 7'i8, 0xD76AA478'i32) + FF(d, a, b, c, myBlock[1], 12'i8, 0xE8C7B756'i32) + FF(c, d, a, b, myBlock[2], 17'i8, 0x242070DB'i32) + FF(b, c, d, a, myBlock[3], 22'i8, 0xC1BDCEEE'i32) + FF(a, b, c, d, myBlock[4], 7'i8, 0xF57C0FAF'i32) + FF(d, a, b, c, myBlock[5], 12'i8, 0x4787C62A'i32) + FF(c, d, a, b, myBlock[6], 17'i8, 0xA8304613'i32) + FF(b, c, d, a, myBlock[7], 22'i8, 0xFD469501'i32) + FF(a, b, c, d, myBlock[8], 7'i8, 0x698098D8'i32) + FF(d, a, b, c, myBlock[9], 12'i8, 0x8B44F7AF'i32) + FF(c, d, a, b, myBlock[10], 17'i8, 0xFFFF5BB1'i32) + FF(b, c, d, a, myBlock[11], 22'i8, 0x895CD7BE'i32) + FF(a, b, c, d, myBlock[12], 7'i8, 0x6B901122'i32) + FF(d, a, b, c, myBlock[13], 12'i8, 0xFD987193'i32) + FF(c, d, a, b, myBlock[14], 17'i8, 0xA679438E'i32) + FF(b, c, d, a, myBlock[15], 22'i8, 0x49B40821'i32) + GG(a, b, c, d, myBlock[1], 5'i8, 0xF61E2562'i32) + GG(d, a, b, c, myBlock[6], 9'i8, 0xC040B340'i32) + GG(c, d, a, b, myBlock[11], 14'i8, 0x265E5A51'i32) + GG(b, c, d, a, myBlock[0], 20'i8, 0xE9B6C7AA'i32) + GG(a, b, c, d, myBlock[5], 5'i8, 0xD62F105D'i32) + GG(d, a, b, c, myBlock[10], 9'i8, 0x02441453'i32) + GG(c, d, a, b, myBlock[15], 14'i8, 0xD8A1E681'i32) + GG(b, c, d, a, myBlock[4], 20'i8, 0xE7D3FBC8'i32) + GG(a, b, c, d, myBlock[9], 5'i8, 0x21E1CDE6'i32) + GG(d, a, b, c, myBlock[14], 9'i8, 0xC33707D6'i32) + GG(c, d, a, b, myBlock[3], 14'i8, 0xF4D50D87'i32) + GG(b, c, d, a, myBlock[8], 20'i8, 0x455A14ED'i32) + GG(a, b, c, d, myBlock[13], 5'i8, 0xA9E3E905'i32) + GG(d, a, b, c, myBlock[2], 9'i8, 0xFCEFA3F8'i32) + GG(c, d, a, b, myBlock[7], 14'i8, 0x676F02D9'i32) + GG(b, c, d, a, myBlock[12], 20'i8, 0x8D2A4C8A'i32) + HH(a, b, c, d, myBlock[5], 4'i8, 0xFFFA3942'i32) + HH(d, a, b, c, myBlock[8], 11'i8, 0x8771F681'i32) + HH(c, d, a, b, myBlock[11], 16'i8, 0x6D9D6122'i32) + HH(b, c, d, a, myBlock[14], 23'i8, 0xFDE5380C'i32) + HH(a, b, c, d, myBlock[1], 4'i8, 0xA4BEEA44'i32) + HH(d, a, b, c, myBlock[4], 11'i8, 0x4BDECFA9'i32) + HH(c, d, a, b, myBlock[7], 16'i8, 0xF6BB4B60'i32) + HH(b, c, d, a, myBlock[10], 23'i8, 0xBEBFBC70'i32) + HH(a, b, c, d, myBlock[13], 4'i8, 0x289B7EC6'i32) + HH(d, a, b, c, myBlock[0], 11'i8, 0xEAA127FA'i32) + HH(c, d, a, b, myBlock[3], 16'i8, 0xD4EF3085'i32) + HH(b, c, d, a, myBlock[6], 23'i8, 0x04881D05'i32) + HH(a, b, c, d, myBlock[9], 4'i8, 0xD9D4D039'i32) + HH(d, a, b, c, myBlock[12], 11'i8, 0xE6DB99E5'i32) + HH(c, d, a, b, myBlock[15], 16'i8, 0x1FA27CF8'i32) + HH(b, c, d, a, myBlock[2], 23'i8, 0xC4AC5665'i32) + II(a, b, c, d, myBlock[0], 6'i8, 0xF4292244'i32) + II(d, a, b, c, myBlock[7], 10'i8, 0x432AFF97'i32) + II(c, d, a, b, myBlock[14], 15'i8, 0xAB9423A7'i32) + II(b, c, d, a, myBlock[5], 21'i8, 0xFC93A039'i32) + II(a, b, c, d, myBlock[12], 6'i8, 0x655B59C3'i32) + II(d, a, b, c, myBlock[3], 10'i8, 0x8F0CCC92'i32) + II(c, d, a, b, myBlock[10], 15'i8, 0xFFEFF47D'i32) + II(b, c, d, a, myBlock[1], 21'i8, 0x85845DD1'i32) + II(a, b, c, d, myBlock[8], 6'i8, 0x6FA87E4F'i32) + II(d, a, b, c, myBlock[15], 10'i8, 0xFE2CE6E0'i32) + II(c, d, a, b, myBlock[6], 15'i8, 0xA3014314'i32) + II(b, c, d, a, myBlock[13], 21'i8, 0x4E0811A1'i32) + II(a, b, c, d, myBlock[4], 6'i8, 0xF7537E82'i32) + II(d, a, b, c, myBlock[11], 10'i8, 0xBD3AF235'i32) + II(c, d, a, b, myBlock[2], 15'i8, 0x2AD7D2BB'i32) + II(b, c, d, a, myBlock[9], 21'i8, 0xEB86D391'i32) + State[0] = State[0] +% a + State[1] = State[1] +% b + State[2] = State[2] +% c + State[3] = State[3] +% d + +proc MD5Init*(c: var MD5Context) = + ## initializes a MD5Context + c.State[0] = 0x67452301'i32 + c.State[1] = 0xEFCDAB89'i32 + c.State[2] = 0x98BADCFE'i32 + c.State[3] = 0x10325476'i32 + c.Count[0] = 0'i32 + c.Count[1] = 0'i32 + ZeroMem(addr(c.Buffer), SizeOf(MD5Buffer)) + +proc MD5Update*(c: var MD5Context, input: cstring, len: int) = + ## updates the MD5Context with the `input` data of length `len` + var input = input + var Index = (c.Count[0] shr 3) and 0x3F + c.Count[0] = c.count[0] +% toU32(len shl 3) + if c.Count[0] < (len shl 3): c.Count[1] = c.count[1] +% 1'i32 + c.Count[1] = c.count[1] +% toU32(len shr 29) + var PartLen = 64 - Index + if len >= PartLen: + CopyMem(addr(c.Buffer[Index]), Input, PartLen) + transform(addr(c.Buffer), c.State) + var i = PartLen + while i + 63 < len: + Transform(addr(Input[I]), c.State) + inc(i, 64) + CopyMem(addr(c.Buffer[0]), addr(Input[i]), len-i) + else: + CopyMem(addr(c.Buffer[Index]), addr(Input[0]), len) + +proc MD5Final*(c: var MD5Context, digest: var MD5Digest) = + ## finishes the MD5Context and stores the result in `digest` + var + Bits: MD5CBits + PadLen: int + decode(bits, c.Count) + var Index = (c.Count[0] shr 3) and 0x3F + if Index < 56: PadLen = 56 - Index + else: PadLen = 120 - Index + MD5Update(c, padding, PadLen) + MD5Update(c, cast[cstring](addr(Bits)), 8) + decode(digest, c.State) + ZeroMem(addr(c), SizeOf(MD5Context)) + +proc toMD5*(s: string): MD5Digest = + ## computes the MD5Digest value for a string `s` + var c: MD5Context + MD5Init(c) + MD5Update(c, cstring(s), len(s)) + MD5Final(c, result) + +proc `$`*(D: MD5Digest): string = + ## converts a MD5Digest value into its string representation + const digits = "0123456789abcdef" + result = "" + for i in 0..15: + add(result, Digits[(D[I] shr 4) and 0xF]) + add(result, Digits[D[I] and 0xF]) + +proc getMD5*(s: string): string = + ## computes an MD5 value of `s` and returns its string representation + var + c: MD5Context + d: MD5Digest + MD5Init(c) + MD5Update(c, cstring(s), len(s)) + MD5Final(c, d) + result = $d + +proc `==`*(D1, D2: MD5Digest): bool = + ## checks if two MD5Digest values are identical + for i in 0..15: + if D1[i] != D2[i]: return false + return true + +when isMainModule: + assert(getMD5("Franz jagt im komplett verwahrlosten Taxi quer durch Bayern") == + "a3cca2b2aa1e3b5b3b5aad99a8529074") + assert(getMD5("Frank jagt im komplett verwahrlosten Taxi quer durch Bayern") == + "7e716d0e702df0505fc72e2b89467910") + assert($toMD5("") == "d41d8cd98f00b204e9800998ecf8427e") + + diff --git a/nimlib/pure/os.nim b/nimlib/pure/os.nim new file mode 100755 index 000000000..afa145e9f --- /dev/null +++ b/nimlib/pure/os.nim @@ -0,0 +1,1147 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module contains basic operating system facilities like +## retrieving environment variables, reading command line arguments, +## working with directories, running shell commands, etc. +{.deadCodeElim: on.} + +{.push debugger: off.} + +import + strutils, times + +when defined(windows): + import winlean +elif defined(posix): + import posix +else: + {.error: "OS module not ported to your operating system!".} + +include "system/ansi_c" + +# copied from excpt.nim, because I don't want to make this template public +template newException(exceptn, message: expr): expr = + block: # open a new scope + var + e: ref exceptn + new(e) + e.msg = message + e + +const + doslike = defined(windows) or defined(OS2) or defined(DOS) + # DOS-like filesystem + +when defined(Nimdoc): # only for proper documentation: + const + CurDir* = '.' + ## The constant string used by the operating system to refer to the + ## current directory. + ## + ## For example: '.' for POSIX or ':' for the classic Macintosh. + + ParDir* = ".." + ## The constant string used by the operating system to refer to the parent + ## directory. + ## + ## For example: ".." for POSIX or "::" for the classic Macintosh. + + DirSep* = '/' + ## The character used by the operating system to separate pathname + ## components, for example, '/' for POSIX or ':' for the classic + ## Macintosh. + + AltSep* = '/' + ## An alternative character used by the operating system to separate + ## pathname components, or the same as `DirSep` if only one separator + ## character exists. This is set to '/' on Windows systems where `DirSep` + ## is a backslash. + + PathSep* = ':' + ## The character conventionally used by the operating system to separate + ## search patch components (as in PATH), such as ':' for POSIX or ';' for + ## Windows. + + FileSystemCaseSensitive* = True + ## True if the file system is case sensitive, false otherwise. Used by + ## `cmpPaths` to compare filenames properly. + + ExeExt* = "" + ## The file extension of native executables. For example: + ## "" for POSIX, "exe" on Windows. + + ScriptExt* = "" + ## The file extension of a script file. For example: "" for POSIX, + ## "bat" on Windows. + +elif defined(macos): + const + curdir* = ':' + pardir* = "::" + dirsep* = ':' + altsep* = dirsep + pathsep* = ',' + FileSystemCaseSensitive* = false + ExeExt* = "" + ScriptExt* = "" + + # MacOS paths + # =========== + # MacOS directory separator is a colon ":" which is the only character not + # allowed in filenames. + # + # A path containing no colon or which begins with a colon is a partial path. + # E.g. ":kalle:petter" ":kalle" "kalle" + # + # All other paths are full (absolute) paths. E.g. "HD:kalle:" "HD:" + # When generating paths, one is safe if one ensures that all partial paths + # begin with a colon, and all full paths end with a colon. + # In full paths the first name (e g HD above) is the name of a mounted + # volume. + # These names are not unique, because, for instance, two diskettes with the + # same names could be inserted. This means that paths on MacOS are not + # waterproof. In case of equal names the first volume found will do. + # Two colons "::" are the relative path to the parent. Three is to the + # grandparent etc. +elif doslike: + const + curdir* = '.' + pardir* = ".." + dirsep* = '\\' # seperator within paths + altsep* = '/' + pathSep* = ';' # seperator between paths + FileSystemCaseSensitive* = false + ExeExt* = "exe" + ScriptExt* = "bat" +elif defined(PalmOS) or defined(MorphOS): + const + dirsep* = '/' + altsep* = dirsep + PathSep* = ';' + pardir* = ".." + FileSystemCaseSensitive* = false + ExeExt* = "" + ScriptExt* = "" +elif defined(RISCOS): + const + dirsep* = '.' + altsep* = '.' + pardir* = ".." # is this correct? + pathSep* = ',' + FileSystemCaseSensitive* = true + ExeExt* = "" + ScriptExt* = "" +else: # UNIX-like operating system + const + curdir* = '.' + pardir* = ".." + dirsep* = '/' + altsep* = dirsep + pathSep* = ':' + FileSystemCaseSensitive* = true + ExeExt* = "" + ScriptExt* = "" + +const + ExtSep* = '.' + ## The character which separates the base filename from the extension; + ## for example, the '.' in ``os.nim``. + +# procs dealing with command line arguments: +proc paramCount*(): int + ## Returns the number of command line arguments given to the + ## application. + +proc paramStr*(i: int): string + ## Returns the `i`-th command line arguments given to the + ## application. + ## + ## `i` should be in the range `1..paramCount()`, else + ## the `EOutOfIndex` exception is raised. + +proc OSError*(msg: string = "") {.noinline.} = + ## raises an EOS exception with the given message ``msg``. + ## If ``msg == ""``, the operating system's error flag + ## (``errno``) is converted to a readable error message. On Windows + ## ``GetLastError`` is checked before ``errno``. + ## If no error flag is set, the message ``unknown OS error`` is used. + if len(msg) == 0: + when defined(Windows): + var err = GetLastError() + if err != 0'i32: + # sigh, why is this is so difficult? + var msgbuf: cstring + if FormatMessageA(0x00000100 or 0x00001000 or 0x00000200, + nil, err, 0, addr(msgbuf), 0, nil) != 0'i32: + var m = $msgbuf + if msgbuf != nil: + LocalFree(msgbuf) + raise newException(EOS, m) + if errno != 0'i32: + raise newException(EOS, $os.strerror(errno)) + else: + raise newException(EOS, "unknown OS error") + else: + raise newException(EOS, msg) + +proc UnixToNativePath*(path: string): string {.noSideEffect.} = + ## Converts an UNIX-like path to a native one. + ## + ## On an UNIX system this does nothing. Else it converts + ## '/', '.', '..' to the appropriate things. + when defined(unix): + result = path + else: + var start: int + if path[0] == '/': + # an absolute path + when doslike: + result = r"C:\" + elif defined(macos): + result = "" # must not start with ':' + else: + result = $dirSep + start = 1 + elif path[0] == '.' and path[1] == '/': + # current directory + result = $curdir + start = 2 + else: + result = "" + start = 0 + + var i = start + while i < len(path): # ../../../ --> :::: + if path[i] == '.' and path[i+1] == '.' and path[i+2] == '/': + # parent directory + when defined(macos): + if result[high(result)] == ':': + add result, ':' + else: + add result, pardir + else: + add result, pardir & dirSep + inc(i, 3) + elif path[i] == '/': + add result, dirSep + inc(i) + else: + add result, path[i] + inc(i) + +proc existsFile*(filename: string): bool = + ## Returns true if the file exists, false otherwise. + when defined(windows): + var a = GetFileAttributesA(filename) + if a != -1'i32: + result = (a and FILE_ATTRIBUTE_DIRECTORY) == 0'i32 + else: + var res: TStat + return stat(filename, res) >= 0'i32 and S_ISREG(res.st_mode) + +proc existsDir*(dir: string): bool = + ## Returns true iff the directory `dir` exists. If `dir` is a file, false + ## is returned. + when defined(windows): + var a = GetFileAttributesA(dir) + if a != -1'i32: + result = (a and FILE_ATTRIBUTE_DIRECTORY) != 0'i32 + else: + var res: TStat + return stat(dir, res) >= 0'i32 and S_ISDIR(res.st_mode) + +proc getLastModificationTime*(file: string): TTime = + ## Returns the `file`'s last modification time. + when defined(posix): + var res: TStat + if stat(file, res) < 0'i32: OSError() + return res.st_mtime + else: + var f: TWIN32_Find_Data + var h = findfirstFileA(file, f) + if h == -1'i32: OSError() + result = winTimeToUnixTime(rdFileTime(f.ftLastWriteTime)) + findclose(h) + +proc getLastAccessTime*(file: string): TTime = + ## Returns the `file`'s last read or write access time. + when defined(posix): + var res: TStat + if stat(file, res) < 0'i32: OSError() + return res.st_atime + else: + var f: TWIN32_Find_Data + var h = findfirstFileA(file, f) + if h == -1'i32: OSError() + result = winTimeToUnixTime(rdFileTime(f.ftLastAccessTime)) + findclose(h) + +proc getCreationTime*(file: string): TTime = + ## Returns the `file`'s creation time. + when defined(posix): + var res: TStat + if stat(file, res) < 0'i32: OSError() + return res.st_ctime + else: + var f: TWIN32_Find_Data + var h = findfirstFileA(file, f) + if h == -1'i32: OSError() + result = winTimeToUnixTime(rdFileTime(f.ftCreationTime)) + findclose(h) + +proc fileNewer*(a, b: string): bool = + ## Returns true if the file `a` is newer than file `b`, i.e. if `a`'s + ## modification time is later than `b`'s. + result = getLastModificationTime(a) - getLastModificationTime(b) > 0 + +proc getCurrentDir*(): string = + ## Returns the current working directory. + const bufsize = 512 # should be enough + result = newString(bufsize) + when defined(windows): + var L = GetCurrentDirectoryA(bufsize, result) + if L == 0'i32: OSError() + setLen(result, L) + else: + if getcwd(result, bufsize) != nil: + setlen(result, c_strlen(result)) + else: + OSError() + +proc setCurrentDir*(newDir: string) {.inline.} = + ## Sets the current working directory; `EOS` is raised if + ## `newDir` cannot been set. + when defined(Windows): + if SetCurrentDirectoryA(newDir) == 0'i32: OSError() + else: + if chdir(newDir) != 0'i32: OSError() + +proc JoinPath*(head, tail: string): string {.noSideEffect.} = + ## Joins two directory names to one. + ## + ## For example on Unix: + ## + ## ..code-block:: nimrod + ## JoinPath("usr", "lib") + ## + ## results in: + ## + ## ..code-block:: nimrod + ## "usr/lib" + ## + ## If head is the empty string, tail is returned. + ## If tail is the empty string, head is returned. + if len(head) == 0: + result = tail + elif head[len(head)-1] in {DirSep, AltSep}: + if tail[0] in {DirSep, AltSep}: + result = head & copy(tail, 1) + else: + result = head & tail + else: + if tail[0] in {DirSep, AltSep}: + result = head & tail + else: + result = head & DirSep & tail + +proc JoinPath*(parts: openarray[string]): string {.noSideEffect.} = + ## The same as `JoinPath(head, tail)`, but works with any number + ## of directory parts. + result = parts[0] + for i in 1..high(parts): + result = JoinPath(result, parts[i]) + +proc `/` * (head, tail: string): string {.noSideEffect.} = + ## The same as ``joinPath(head, tail)`` + return joinPath(head, tail) + +proc SplitPath*(path: string, head, tail: var string) {.noSideEffect, + deprecated.} = + ## **Deprecated since version 0.8.2**: use the version that returns a tuple + ## instead + var + sepPos = -1 + for i in countdown(len(path)-1, 0): + if path[i] in {dirsep, altsep}: + sepPos = i + break + if sepPos >= 0: + head = copy(path, 0, sepPos-1) + tail = copy(path, sepPos+1) + else: + head = "" + tail = path # make a string copy here + +proc SplitPath*(path: string): tuple[head, tail: string] {.noSideEffect.} = + ## Splits a directory into (head, tail), so that + ## ``JoinPath(head, tail) == path``. + ## + ## Examples: + ## .. code-block:: nimrod + ## SplitPath("usr/local/bin") -> ("usr/local", "bin") + ## SplitPath("usr/local/bin/") -> ("usr/local/bin", "") + ## SplitPath("bin") -> ("", "bin") + ## SplitPath("/bin") -> ("", "bin") + ## SplitPath("") -> ("", "") + var + sepPos = -1 + for i in countdown(len(path)-1, 0): + if path[i] in {dirsep, altsep}: + sepPos = i + break + if sepPos >= 0: + result.head = copy(path, 0, sepPos-1) + result.tail = copy(path, sepPos+1) + else: + result.head = "" + result.tail = path + +proc parentDir*(path: string): string {.noSideEffect.} = + ## Returns the parent directory of `path`. + ## + ## This is often the same as the ``head`` result of ``splitPath``. + ## If there is no parent, ``path`` is returned. + ## Example: ``parentDir("/usr/local/bin") == "/usr/local"``. + ## Example: ``parentDir("/usr/local/bin/") == "/usr/local"``. + var + sepPos = -1 + q = 1 + if path[len(path)-1] in {dirsep, altsep}: + q = 2 + for i in countdown(len(path)-q, 0): + if path[i] in {dirsep, altsep}: + sepPos = i + break + if sepPos >= 0: + result = copy(path, 0, sepPos-1) + else: + result = path + +proc `/../` * (head, tail: string): string {.noSideEffect.} = + ## The same as ``parentDir(head) / tail`` + return parentDir(head) / tail + +proc normExt(ext: string): string = + if ext == "" or ext[0] == extSep: result = ext # no copy needed here + else: result = extSep & ext + +proc searchExtPos(s: string): int = + # BUGFIX: do not search until 0! .DS_Store is no file extension! + result = -1 + for i in countdown(len(s)-1, 1): + if s[i] == extsep: + result = i + break + elif s[i] in {dirsep, altsep}: + break # do not skip over path + +proc splitFile*(path: string): tuple[dir, name, ext: string] {.noSideEffect.} = + ## Splits a filename into (dir, filename, extension). + ## `dir` does not end in `DirSep`. + ## `extension` includes the leading dot. + ## + ## Example: + ## + ## .. code-block:: nimrod + ## var (dir, name, ext) = splitFile("usr/local/nimrodc.html") + ## assert dir == "usr/local" + ## assert name == "nimrodc" + ## assert ext == ".html" + ## + ## If `path` has no extension, `ext` is the empty string. + ## If `path` has no directory component, `dir` is the empty string. + ## If `path` has no filename component, `name` and `ext` are empty strings. + if path.len == 0 or path[path.len-1] in {dirSep, altSep}: + result = (path, "", "") + else: + var sepPos = -1 + var dotPos = path.len + for i in countdown(len(path)-1, 0): + if path[i] == ExtSep: + if dotPos == path.len and i > 0: dotPos = i + elif path[i] in {dirsep, altsep}: + sepPos = i + break + result.dir = copy(path, 0, sepPos-1) + result.name = copy(path, sepPos+1, dotPos-1) + result.ext = copy(path, dotPos) + +proc extractDir*(path: string): string {.noSideEffect, deprecated.} = + ## Extracts the directory of a given path. This is almost the + ## same as the `head` result of `splitPath`, except that + ## ``extractDir("/usr/lib/") == "/usr/lib/"``. + ## **Deprecated since version 0.8.2**: Use ``splitFile(path).dir`` instead. + result = splitFile(path).dir + +proc extractFilename*(path: string): string {.noSideEffect.} = + ## Extracts the filename of a given `path`. This is the same as + ## ``name & ext`` from ``splitFile(path)``. + if path.len == 0 or path[path.len-1] in {dirSep, altSep}: + result = "" + else: + result = splitPath(path).tail + +proc expandFilename*(filename: string): string = + ## Returns the full path of `filename`, raises EOS in case of an error. + when defined(windows): + var unused: cstring + result = newString(3072) + var L = GetFullPathNameA(filename, 3072'i32, result, unused) + if L <= 0'i32 or L >= 3072'i32: OSError() + setLen(result, L) + else: + var res = realpath(filename, nil) + if res == nil: OSError() + result = $res + c_free(res) + +proc SplitFilename*(filename: string, name, extension: var string) {. + noSideEffect, deprecated.} = + ## Splits a filename into (name, extension), so that + ## ``name & extension == filename``. + ## + ## Example: After ``SplitFilename("usr/local/nimrodc.html", name, ext)``, + ## `name` is "usr/local/nimrodc" and `ext` is ".html". + ## If the file has no extension, extension is the empty string. + ## **Deprecated since version 0.8.2**: Use ``splitFile(filename)`` instead. + var extPos = searchExtPos(filename) + if extPos >= 0: + name = copy(filename, 0, extPos-1) + extension = copy(filename, extPos) + else: + name = filename # make a string copy here + extension = "" + +proc extractFileExt*(filename: string): string {.noSideEffect, deprecated.} = + ## Extracts the file extension of a given `filename`. This is the + ## same as the `extension` result of `splitFilename`. + ## **Deprecated since version 0.8.2**: Use ``splitFile(filename).ext`` + ## instead. + result = splitFile(filename).ext + +proc extractFileTrunk*(filename: string): string {.noSideEffect, deprecated.} = + ## Extracts the file name of a given `filename`. This removes any + ## directory information and the file extension. + ## **Deprecated since version 0.8.2**: Use ``splitFile(path).name`` instead. + result = splitFile(filename).name + +proc ChangeFileExt*(filename, ext: string): string {.noSideEffect.} = + ## Changes the file extension to `ext`. + ## + ## If the `filename` has no extension, `ext` will be added. + ## If `ext` == "" then any extension is removed. + ## `Ext` should be given without the leading '.', because some + ## filesystems may use a different character. (Although I know + ## of none such beast.) + var extPos = searchExtPos(filename) + if extPos < 0: result = filename & normExt(ext) + else: result = copy(filename, 0, extPos-1) & normExt(ext) + +proc addFileExt*(filename, ext: string): string {.noSideEffect.} = + ## Adds the file extension `ext` to `filename`, unless + ## `filename` already has an extension. + ## + ## `Ext` should be given without the leading '.', because some + ## filesystems may use a different character. + ## (Although I know of none such beast.) + var extPos = searchExtPos(filename) + if extPos < 0: result = filename & normExt(ext) + else: result = filename + +proc AppendFileExt*(filename, ext: string): string {. + noSideEffect, deprecated.} = + ## **Deprecated since version 0.8.2**: Use `addFileExt` instead. + result = addFileExt(filename, ext) + +proc cmpPaths*(pathA, pathB: string): int {.noSideEffect.} = + ## Compares two paths. + ## + ## On a case-sensitive filesystem this is done + ## case-sensitively otherwise case-insensitively. Returns: + ## + ## | 0 iff pathA == pathB + ## | < 0 iff pathA < pathB + ## | > 0 iff pathA > pathB + if FileSystemCaseSensitive: + result = cmp(pathA, pathB) + else: + result = cmpIgnoreCase(pathA, pathB) + +proc sameFile*(path1, path2: string): bool = + ## Returns True if both pathname arguments refer to the same file or + ## directory (as indicated by device number and i-node number). + ## Raises an exception if an os.stat() call on either pathname fails. + when defined(Windows): + var + a, b: TWin32FindData + var resA = findfirstFileA(path1, a) + var resB = findfirstFileA(path2, b) + if resA != -1 and resB != -1: + result = $a.cFileName == $b.cFileName + else: + # work around some ``findfirstFileA`` bugs + result = cmpPaths(path1, path2) == 0 + if resA != -1: findclose(resA) + if resB != -1: findclose(resB) + else: + var + a, b: TStat + if stat(path1, a) < 0'i32 or stat(path2, b) < 0'i32: + result = cmpPaths(path1, path2) == 0 # be consistent with Windows + else: + result = a.st_dev == b.st_dev and a.st_ino == b.st_ino + +proc sameFileContent*(path1, path2: string): bool = + ## Returns True if both pathname arguments refer to files with identical + ## binary content. + const + bufSize = 8192 # 8K buffer + var + a, b: TFile + if not open(a, path1): return false + if not open(b, path2): + close(a) + return false + var bufA = alloc(bufsize) + var bufB = alloc(bufsize) + while True: + var readA = readBuffer(a, bufA, bufsize) + var readB = readBuffer(b, bufB, bufsize) + if readA != readB: + result = false + break + if readA == 0: + result = true + break + result = equalMem(bufA, bufB, readA) + if not result: break + if readA != bufSize: break # end of file + dealloc(bufA) + dealloc(bufB) + close(a) + close(b) + +proc copyFile*(dest, source: string) = + ## Copies a file from `source` to `dest`. If this fails, + ## `EOS` is raised. + when defined(Windows): + if CopyFileA(source, dest, 0'i32) == 0'i32: OSError() + else: + # generic version of copyFile which works for any platform: + const + bufSize = 8192 # 8K buffer + var + d, s: TFile + if not open(s, source): OSError() + if not open(d, dest, fmWrite): + close(s) + OSError() + var + buf: Pointer = alloc(bufsize) + bytesread, byteswritten: int + while True: + bytesread = readBuffer(s, buf, bufsize) + byteswritten = writeBuffer(d, buf, bytesread) + if bytesread != bufSize: break + if bytesread != bytesWritten: OSError() + dealloc(buf) + close(s) + close(d) + +proc moveFile*(dest, source: string) = + ## Moves a file from `source` to `dest`. If this fails, `EOS` is raised. + if crename(source, dest) != 0'i32: OSError() + +proc removeFile*(file: string) = + ## Removes the `file`. If this fails, `EOS` is raised. + if cremove(file) != 0'i32: OSError() + +proc executeShellCommand*(command: string): int {.deprecated.} = + ## **Deprecated since version 0.8.2**: Use `execShellCmd` instead. + result = csystem(command) + +proc execShellCmd*(command: string): int = + ## Executes a shell command. + ## + ## Command has the form 'program args' where args are the command + ## line arguments given to program. The proc returns the error code + ## of the shell when it has finished. The proc does not return until + ## the process has finished. To execute a program without having a + ## shell involved, use the `execProcess` proc of the `osproc` + ## module. + result = csystem(command) + +var + envComputed: bool = false + environment: seq[string] = @[] + +when defined(windows): + # because we support Windows GUI applications, things get really + # messy here... + proc strEnd(cstr: CString, c = 0'i32): CString {. + importc: "strchr", header: "<string.h>".} + + proc getEnvVarsC() = + if not envComputed: + var + env = getEnvironmentStringsA() + e = env + if e == nil: return # an error occured + while True: + var eend = strEnd(e) + add(environment, $e) + e = cast[CString](cast[TAddress](eend)+1) + if eend[1] == '\0': break + envComputed = true + discard FreeEnvironmentStringsA(env) + +else: + var + gEnv {.importc: "gEnv".}: ptr array [0..10_000, CString] + + proc getEnvVarsC() = + # retrieves the variables of char** env of C's main proc + if not envComputed: + var i = 0 + while True: + if gEnv[i] == nil: break + add environment, $gEnv[i] + inc(i) + envComputed = true + +proc findEnvVar(key: string): int = + getEnvVarsC() + var temp = key & '=' + for i in 0..high(environment): + if startsWith(environment[i], temp): return i + return -1 + +proc getEnv*(key: string): string = + ## Returns the value of the environment variable named `key`. + ## + ## If the variable does not exist, "" is returned. To distinguish + ## whether a variable exists or it's value is just "", call + ## `existsEnv(key)`. + var i = findEnvVar(key) + if i >= 0: + return copy(environment[i], find(environment[i], '=')+1) + else: + var env = cgetenv(key) + if env == nil: return "" + result = $env + +proc existsEnv*(key: string): bool = + ## Checks whether the environment variable named `key` exists. + ## Returns true if it exists, false otherwise. + if cgetenv(key) != nil: return true + else: return findEnvVar(key) >= 0 + +proc putEnv*(key, val: string) = + ## Sets the value of the environment variable named `key` to `val`. + ## If an error occurs, `EInvalidEnvVar` is raised. + + # Note: by storing the string in the environment sequence, + # we gurantee that we don't free the memory before the program + # ends (this is needed for POSIX compliance). It is also needed so that + # the process itself may access its modified environment variables! + var indx = findEnvVar(key) + if indx >= 0: + environment[indx] = key & '=' & val + else: + add environment, (key & '=' & val) + indx = high(environment) + when defined(unix): + if cputenv(environment[indx]) != 0'i32: + OSError() + else: + if SetEnvironmentVariableA(key, val) == 0'i32: + OSError() + +iterator iterOverEnvironment*(): tuple[key, value: string] {.deprecated.} = + ## Iterate over all environments variables. In the first component of the + ## tuple is the name of the current variable stored, in the second its value. + ## **Deprecated since version 0.8.2**: Use `envPairs` instead. + getEnvVarsC() + for i in 0..high(environment): + var p = find(environment[i], '=') + yield (copy(environment[i], 0, p-1), copy(environment[i], p+1)) + +iterator envPairs*(): tuple[key, value: string] = + ## Iterate over all environments variables. In the first component of the + ## tuple is the name of the current variable stored, in the second its value. + getEnvVarsC() + for i in 0..high(environment): + var p = find(environment[i], '=') + yield (copy(environment[i], 0, p-1), copy(environment[i], p+1)) + +iterator walkFiles*(pattern: string): string = + ## Iterate over all the files that match the `pattern`. + ## + ## `pattern` is OS dependant, but at least the "\*.ext" + ## notation is supported. + when defined(windows): + var + f: TWin32FindData + res: int + res = findfirstFileA(pattern, f) + if res != -1: + while true: + if f.cFileName[0] != '.': + yield splitFile(pattern).dir / extractFilename($f.cFileName) + if findnextFileA(res, f) == 0'i32: break + findclose(res) + else: # here we use glob + var + f: TGlob + res: int + f.gl_offs = 0 + f.gl_pathc = 0 + f.gl_pathv = nil + res = glob(pattern, 0, nil, addr(f)) + if res == 0: + for i in 0.. f.gl_pathc - 1: + assert(f.gl_pathv[i] != nil) + yield $f.gl_pathv[i] + globfree(addr(f)) + +type + TPathComponent* = enum ## Enumeration specifying a path component. + pcFile, ## path refers to a file + pcLinkToFile, ## path refers to a symbolic link to a file + pcDir, ## path refers to a directory + pcLinkToDir ## path refers to a symbolic link to a directory + +const + pcDirectory* {.deprecated.} = pcDir ## deprecated alias + pcLinkToDirectory* {.deprecated.} = pcLinkToDir ## deprecated alias + +iterator walkDir*(dir: string): tuple[kind: TPathComponent, path: string] = + ## walks over the directory `dir` and yields for each directory or file in + ## `dir`. The component type and full path for each item is returned. + ## Walking is not recursive. + ## Example: This directory structure:: + ## dirA / dirB / fileB1.txt + ## / dirC + ## / fileA1.txt + ## / fileA2.txt + ## + ## and this code: + ## + ## .. code-block:: Nimrod + ## for kind, path in walkDir("dirA"): + ## echo(path) + ## + ## produces this output (though not necessarily in this order!):: + ## dirA/dirB + ## dirA/dirC + ## dirA/fileA1.txt + ## dirA/fileA2.txt + when defined(windows): + var f: TWIN32_Find_Data + var h = findfirstFileA(dir / "*", f) + if h != -1: + while true: + var k = pcFile + if f.cFilename[0] != '.': + if (f.dwFileAttributes and FILE_ATTRIBUTE_DIRECTORY) != 0'i32: + k = pcDir + yield (k, dir / extractFilename($f.cFilename)) + if findnextFileA(h, f) == 0'i32: break + findclose(h) + else: + var d = openDir(dir) + if d != nil: + while true: + var x = readDir(d) + if x == nil: break + var y = $x.d_name + if y != "." and y != "..": + var s: TStat + y = dir / y + if stat(y, s) < 0'i32: break + var k = pcFile + if S_ISDIR(s.st_mode): k = pcDir + if S_ISLNK(s.st_mode): k = succ(k) + yield (k, y) + discard closeDir(d) + +iterator walkDirRec*(dir: string, filter={pcFile, pcDir}): string = + ## walks over the directory `dir` and yields for each file in `dir`. The + ## full path for each file is returned. + ## Walking is recursive. `filter` controls the behaviour of the iterator: + ## + ## --------------------- --------------------------------------------- + ## filter meaning + ## --------------------- --------------------------------------------- + ## ``pcFile`` yield real files + ## ``pcLinkToFile`` yield symbolic links to files + ## ``pcDir`` follow real directories + ## ``pcLinkToDir`` follow symbolic links to directories + ## --------------------- --------------------------------------------- + ## + var stack = @[dir] + while stack.len > 0: + for k,p in walkDir(stack.pop()): + if k in filter: + case k + of pcFile, pcLinkToFile: yield p + of pcDir, pcLinkToDir: stack.add(p) + +proc rawRemoveDir(dir: string) = + when defined(windows): + if RemoveDirectoryA(dir) == 0'i32: OSError() + else: + if rmdir(dir) != 0'i32: OSError() + +proc removeDir*(dir: string) = + ## Removes the directory `dir` including all subdirectories and files + ## in `dir` (recursively). If this fails, `EOS` is raised. + for kind, path in walkDir(dir): + case kind + of pcFile, pcLinkToFile, pcLinkToDir: removeFile(path) + of pcDir: removeDir(path) + rawRemoveDir(dir) + +proc rawCreateDir(dir: string) = + when defined(unix): + if mkdir(dir, 0o711) != 0'i32 and errno != EEXIST: + OSError() + else: + if CreateDirectoryA(dir, nil) == 0'i32 and GetLastError() != 183'i32: + OSError() + +proc createDir*(dir: string) = + ## Creates the directory `dir`. + ## + ## The directory may contain several subdirectories that do not exist yet. + ## The full path is created. If this fails, `EOS` is raised. It does **not** + ## fail if the path already exists because for most usages this does not + ## indicate an error. + for i in 1.. dir.len-1: + if dir[i] in {dirsep, altsep}: rawCreateDir(copy(dir, 0, i-1)) + rawCreateDir(dir) + +proc parseCmdLine*(c: string): seq[string] = + ## Splits a command line into several components; components are separated by + ## whitespace unless the whitespace occurs within ``"`` or ``'`` quotes. + ## This proc is only occassionally useful, better use the `parseopt` module. + result = @[] + var i = 0 + var a = "" + while true: + setLen(a, 0) + while c[i] >= '\1' and c[i] <= ' ': inc(i) # skip whitespace + case c[i] + of '\'', '\"': + var delim = c[i] + inc(i) # skip ' or " + while c[i] != '\0' and c[i] != delim: + add a, c[i] + inc(i) + if c[i] != '\0': inc(i) + of '\0': break + else: + while c[i] > ' ': + add(a, c[i]) + inc(i) + add(result, a) + +type + TFilePermission* = enum ## file access permission; modelled after UNIX + fpUserExec, ## execute access for the file owner + fpUserWrite, ## write access for the file owner + fpUserRead, ## read access for the file owner + fpGroupExec, ## execute access for the group + fpGroupWrite, ## write access for the group + fpGroupRead, ## read access for the group + fpOthersExec, ## execute access for others + fpOthersWrite, ## write access for others + fpOthersRead ## read access for others + +proc getFilePermissions*(filename: string): set[TFilePermission] = + ## retrieves file permissions for `filename`. `OSError` is raised in case of + ## an error. On Windows, only the ``readonly`` flag is checked, every other + ## permission is available in any case. + when defined(posix): + var a: TStat + if stat(filename, a) < 0'i32: OSError() + result = {} + if (a.st_mode and S_IRUSR) != 0'i32: result.incl(fpUserRead) + if (a.st_mode and S_IWUSR) != 0'i32: result.incl(fpUserWrite) + if (a.st_mode and S_IXUSR) != 0'i32: result.incl(fpUserExec) + + if (a.st_mode and S_IRGRP) != 0'i32: result.incl(fpGroupRead) + if (a.st_mode and S_IWGRP) != 0'i32: result.incl(fpGroupWrite) + if (a.st_mode and S_IXGRP) != 0'i32: result.incl(fpGroupExec) + + if (a.st_mode and S_IROTH) != 0'i32: result.incl(fpOthersRead) + if (a.st_mode and S_IWOTH) != 0'i32: result.incl(fpOthersWrite) + if (a.st_mode and S_IXOTH) != 0'i32: result.incl(fpOthersExec) + else: + var res = GetFileAttributesA(filename) + if res == -1'i32: OSError() + if (res and FILE_ATTRIBUTE_READONLY) != 0'i32: + result = {fpUserExec, fpUserRead, fpGroupExec, fpGroupRead, + fpOthersExec, fpOthersRead} + else: + result = {fpUserExec..fpOthersRead} + +proc setFilePermissions*(filename: string, permissions: set[TFilePermission]) = + ## sets the file permissions for `filename`. `OSError` is raised in case of + ## an error. On Windows, only the ``readonly`` flag is changed, depending on + ## ``fpUserWrite``. + when defined(posix): + var p = 0'i32 + if fpUserRead in permissions: p = p or S_IRUSR + if fpUserWrite in permissions: p = p or S_IWUSR + if fpUserExec in permissions: p = p or S_IXUSR + + if fpGroupRead in permissions: p = p or S_IRGRP + if fpGroupWrite in permissions: p = p or S_IWGRP + if fpGroupExec in permissions: p = p or S_IXGRP + + if fpOthersRead in permissions: p = p or S_IROTH + if fpOthersWrite in permissions: p = p or S_IWOTH + if fpOthersExec in permissions: p = p or S_IXOTH + + if chmod(filename, p) != 0: OSError() + else: + var res = GetFileAttributesA(filename) + if res == -1'i32: OSError() + if fpUserWrite in permissions: + res = res and not FILE_ATTRIBUTE_READONLY + else: + res = res or FILE_ATTRIBUTE_READONLY + if SetFileAttributesA(filename, res) != 0'i32: + OSError() + +proc inclFilePermissions*(filename: string, + permissions: set[TFilePermission]) = + ## a convenience procedure for: + ## + ## .. code-block:: nimrod + ## setFilePermissions(filename, getFilePermissions(filename)+permissions) + setFilePermissions(filename, getFilePermissions(filename)+permissions) + +proc exclFilePermissions*(filename: string, + permissions: set[TFilePermission]) = + ## a convenience procedure for: + ## + ## .. code-block:: nimrod + ## setFilePermissions(filename, getFilePermissions(filename)-permissions) + setFilePermissions(filename, getFilePermissions(filename)-permissions) + +proc getHomeDir*(): string = + ## Returns the home directory of the current user. + when defined(windows): return getEnv("USERPROFILE") & "\\" + else: return getEnv("HOME") & "/" + +proc getConfigDir*(): string = + ## Returns the config directory of the current user for applications. + when defined(windows): return getEnv("APPDATA") & "\\" + else: return getEnv("HOME") & "/.config/" + +when defined(windows): + # Since we support GUI applications with Nimrod, we sometimes generate + # a WinMain entry proc. But a WinMain proc has no access to the parsed + # command line arguments. The way to get them differs. Thus we parse them + # ourselves. This has the additional benefit that the program's behaviour + # is always the same -- independent of the used C compiler. + var + ownArgv: seq[string] + + proc paramStr(i: int): string = + if isNil(ownArgv): ownArgv = parseCmdLine($getCommandLineA()) + return ownArgv[i] + + proc paramCount(): int = + if isNil(ownArgv): ownArgv = parseCmdLine($getCommandLineA()) + result = ownArgv.len-1 + +else: + var + cmdCount {.importc: "cmdCount".}: cint + cmdLine {.importc: "cmdLine".}: cstringArray + + proc paramStr(i: int): string = + if i < cmdCount and i >= 0: return $cmdLine[i] + raise newException(EInvalidIndex, "invalid index") + + proc paramCount(): int = return cmdCount-1 + +when defined(linux) or defined(solaris) or defined(bsd) or defined(aix): + proc getApplAux(procPath: string): string = + result = newString(256) + var len = readlink(procPath, result, 256) + if len > 256: + result = newString(len+1) + len = readlink(procPath, result, len) + setlen(result, len) + +when defined(macosx): + # a really hacky solution: since we like to include 2 headers we have to + # define two procs which in reality are the same + proc getExecPath1(c: cstring, size: var int32) {. + importc: "_NSGetExecutablePath", header: "<sys/param.h>".} + proc getExecPath2(c: cstring, size: var int32): bool {. + importc: "_NSGetExecutablePath", header: "<mach-o/dyld.h>".} + +proc getApplicationFilename*(): string = + ## Returns the filename of the application's executable. + + # Linux: /proc/<pid>/exe + # Solaris: + # /proc/<pid>/object/a.out (filename only) + # /proc/<pid>/path/a.out (complete pathname) + # *BSD (and maybe Darwin too): + # /proc/<pid>/file + when defined(windows): + result = newString(256) + var len = getModuleFileNameA(0, result, 256) + setlen(result, int(len)) + elif defined(linux) or defined(aix): + result = getApplAux("/proc/self/exe") + elif defined(solaris): + result = getApplAux("/proc/" & $getpid() & "/path/a.out") + elif defined(bsd): + result = getApplAux("/proc/" & $getpid() & "/file") + elif defined(macosx): + var size: int32 + getExecPath1(nil, size) + result = newString(int(size)) + if getExecPath2(result, size): + result = "" # error! + else: + # little heuristic that may work on other POSIX-like systems: + result = getEnv("_") + if len(result) == 0: + result = ParamStr(0) # POSIX guaranties that this contains the executable + # as it has been executed by the calling process + if len(result) > 0 and result[0] != DirSep: # not an absolute path? + # iterate over any path in the $PATH environment variable + for p in split(getEnv("PATH"), {PathSep}): + var x = joinPath(p, result) + if ExistsFile(x): return x + +proc getApplicationDir*(): string = + ## Returns the directory of the application's executable. + result = splitFile(getApplicationFilename()).dir + +proc sleep*(milsecs: int) = + ## sleeps `milsecs` milliseconds. + when defined(windows): + winlean.sleep(int32(milsecs)) + else: + var a, b: Ttimespec + a.tv_sec = TTime(milsecs div 1000) + a.tv_nsec = (milsecs mod 1000) * 1000 + discard posix.nanosleep(a, b) + +{.pop.} diff --git a/nimlib/pure/osproc.nim b/nimlib/pure/osproc.nim new file mode 100755 index 000000000..d76825531 --- /dev/null +++ b/nimlib/pure/osproc.nim @@ -0,0 +1,543 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements an advanced facility for executing OS processes +## and process communication. + +import + strutils, os, strtabs, streams + +when defined(windows): + import winlean +else: + import posix + +type + TProcess = object of TObject + when defined(windows): + FProcessHandle: Thandle + inputHandle, outputHandle, errorHandle: TFileHandle + else: + inputHandle, outputHandle, errorHandle: TFileHandle + id: cint + exitCode: cint + + PProcess* = ref TProcess ## represents an operating system process + + TProcessOption* = enum ## options that can be passed `startProcess` + poEchoCmd, ## echo the command before execution + poUseShell, ## use the shell to execute the command; NOTE: This + ## often creates a security whole! + poStdErrToStdOut, ## merge stdout and stderr to the stdout stream + poParentStreams ## use the parent's streams + +proc execProcess*(command: string, + options: set[TProcessOption] = {poStdErrToStdOut, + poUseShell}): string + ## A convience procedure that executes ``command`` with ``startProcess`` + ## and returns its output as a string. + +proc executeProcess*(command: string, + options: set[TProcessOption] = {poStdErrToStdOut, + poUseShell}): string {. + deprecated.} = + ## **Deprecated since version 0.8.2**: Use `execProcess` instead. + result = execProcess(command, options) + +proc execCmd*(command: string): int + ## Executes ``command`` and returns its error code. Standard input, output, + ## error streams are inherited from the calling process. + +proc executeCommand*(command: string): int {.deprecated.} = + ## **Deprecated since version 0.8.2**: Use `execCmd` instead. + result = execCmd(command) + + +proc startProcess*(command: string, + workingDir: string = "", + args: openarray[string] = [], + env: PStringTable = nil, + options: set[TProcessOption] = {poStdErrToStdOut}): PProcess + ## Starts a process. `Command` is the executable file, `workingDir` is the + ## process's working directory. If ``workingDir == ""`` the current directory + ## is used. `args` are the command line arguments that are passed to the + ## process. On many operating systems, the first command line argument is the + ## name of the executable. `args` should not contain this argument! + ## `env` is the environment that will be passed to the process. + ## If ``env == nil`` the environment is inherited of + ## the parent process. `options` are additional flags that may be passed + ## to `startProcess`. See the documentation of ``TProcessOption`` for the + ## meaning of these flags. + ## + ## Return value: The newly created process object. Nil is never returned, + ## but ``EOS`` is raised in case of an error. + +proc suspend*(p: PProcess) + ## Suspends the process `p`. + +proc resume*(p: PProcess) + ## Resumes the process `p`. + +proc terminate*(p: PProcess) + ## Terminates the process `p`. + +proc running*(p: PProcess): bool + ## Returns true iff the process `p` is still running. Returns immediately. + +proc processID*(p: PProcess): int = + ## returns `p`'s process ID. + return p.id + +proc waitForExit*(p: PProcess): int + ## waits for the process to finish and returns `p`'s error code. + +proc inputStream*(p: PProcess): PStream + ## returns ``p``'s input stream for writing to + +proc outputStream*(p: PProcess): PStream + ## returns ``p``'s output stream for reading from + +proc errorStream*(p: PProcess): PStream + ## returns ``p``'s output stream for reading from + +when defined(macosx) or defined(bsd): + const + CTL_HW = 6 + HW_AVAILCPU = 25 + HW_NCPU = 3 + proc sysctl(x: ptr array[0..3, cint], y: cint, z: pointer, + a: var int, b: pointer, c: int): cint {. + importc: "sysctl", header: "<sys/sysctl.h>".} + +proc countProcessors*(): int = + ## returns the numer of the processors/cores the machine has. + ## Returns 0 if it cannot be determined. + when defined(windows): + var x = getenv("NUMBER_OF_PROCESSORS") + if x.len > 0: result = parseInt(x) + elif defined(macosx) or defined(bsd): + var + mib: array[0..3, cint] + len, numCPU: int + mib[0] = CTL_HW + mib[1] = HW_AVAILCPU + len = sizeof(numCPU) + discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) + if numCPU < 1: + mib[1] = HW_NCPU + discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) + result = numCPU + elif defined(hpux): + result = mpctl(MPC_GETNUMSPUS, nil, nil) + elif defined(irix): + var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "<unistd.h>".}: cint + result = sysconf(SC_NPROC_ONLN) + else: + result = sysconf(SC_NPROCESSORS_ONLN) + if result <= 0: result = 1 + +proc startProcessAux(cmd: string, options: set[TProcessOption]): PProcess = + var c = parseCmdLine(cmd) + var a: seq[string] = @[] # slicing is not yet implemented :-( + for i in 1 .. c.len-1: add(a, c[i]) + result = startProcess(command=c[0], args=a, options=options) + +proc execProcesses*(cmds: openArray[string], + options = {poStdErrToStdOut, poParentStreams}, + n = countProcessors()): int = + ## executes the commands `cmds` in parallel. Creates `n` processes + ## that execute in parallel. The highest return value of all processes + ## is returned. + assert n > 0 + if n > 1: + var q: seq[PProcess] + newSeq(q, n) + var m = min(n, cmds.len) + for i in 0..m-1: + q[i] = startProcessAux(cmds[i], options=options) + when defined(noBusyWaiting): + var r = 0 + for i in m..high(cmds): + when defined(debugExecProcesses): + var err = "" + var outp = outputStream(q[r]) + while running(q[r]) or not outp.atEnd(outp): + err.add(outp.readLine()) + err.add("\n") + echo(err) + result = max(waitForExit(q[r]), result) + q[r] = startProcessAux(cmds[i], options=options) + r = (r + 1) mod n + else: + var i = m + while i <= high(cmds): + sleep(50) + for r in 0..n-1: + if not running(q[r]): + #echo(outputStream(q[r]).readLine()) + result = max(waitForExit(q[r]), result) + q[r] = startProcessAux(cmds[i], options=options) + inc(i) + if i > high(cmds): break + for i in 0..m-1: + result = max(waitForExit(q[i]), result) + else: + for i in 0..high(cmds): + var p = startProcessAux(cmds[i], options=options) + result = max(waitForExit(p), result) + +when true: + nil +else: + proc startGUIProcess*(command: string, + workingDir: string = "", + args: openarray[string] = [], + env: PStringTable = nil, + x = -1, + y = -1, + width = -1, + height = -1): PProcess + +proc execProcess(command: string, + options: set[TProcessOption] = {poStdErrToStdOut, + poUseShell}): string = + var p = startProcessAux(command, options=options) + var outp = outputStream(p) + result = "" + while running(p) or not outp.atEnd(outp): + result.add(outp.readLine()) + result.add("\n") + +when false: + proc deallocCStringArray(a: cstringArray) = + var i = 0 + while a[i] != nil: + dealloc(a[i]) + inc(i) + dealloc(a) + +when defined(Windows): + # We need to implement a handle stream for Windows: + type + PFileHandleStream = ref TFileHandleStream + TFileHandleStream = object of TStream + handle: THandle + atTheEnd: bool + + proc hsClose(s: PFileHandleStream) = nil # nothing to do here + proc hsAtEnd(s: PFileHandleStream): bool = return s.atTheEnd + + proc hsReadData(s: PFileHandleStream, buffer: pointer, bufLen: int): int = + if s.atTheEnd: return 0 + var br: int32 + var a = winlean.ReadFile(s.handle, buffer, bufLen, br, nil) + # TRUE and zero bytes returned (EOF). + # TRUE and n (>0) bytes returned (good data). + # FALSE and bytes returned undefined (system error). + if a == 0 and br != 0: OSError() + s.atTheEnd = br < bufLen + result = br + + proc hsWriteData(s: PFileHandleStream, buffer: pointer, bufLen: int) = + var bytesWritten: int32 + var a = winlean.writeFile(s.handle, buffer, bufLen, bytesWritten, nil) + if a == 0: OSError() + + proc newFileHandleStream(handle: THandle): PFileHandleStream = + new(result) + result.handle = handle + result.close = hsClose + result.atEnd = hsAtEnd + result.readData = hsReadData + result.writeData = hsWriteData + + proc buildCommandLine(a: string, args: openarray[string]): cstring = + var res = quoteIfContainsWhite(a) + for i in 0..high(args): + res.add(' ') + res.add(quoteIfContainsWhite(args[i])) + result = cast[cstring](alloc0(res.len+1)) + copyMem(result, cstring(res), res.len) + + proc buildEnv(env: PStringTable): cstring = + var L = 0 + for key, val in pairs(env): inc(L, key.len + val.len + 2) + result = cast[cstring](alloc0(L+2)) + L = 0 + for key, val in pairs(env): + var x = key & "=" & val + copyMem(addr(result[L]), cstring(x), x.len+1) # copy \0 + inc(L, x.len+1) + + #proc open_osfhandle(osh: THandle, mode: int): int {. + # importc: "_open_osfhandle", header: "<fcntl.h>".} + + #var + # O_WRONLY {.importc: "_O_WRONLY", header: "<fcntl.h>".}: int + # O_RDONLY {.importc: "_O_RDONLY", header: "<fcntl.h>".}: int + + proc CreatePipeHandles(Rdhandle, WrHandle: var THandle) = + var piInheritablePipe: TSecurityAttributes + piInheritablePipe.nlength = SizeOF(TSecurityAttributes) + piInheritablePipe.lpSecurityDescriptor = nil + piInheritablePipe.Binherithandle = 1 + if CreatePipe(Rdhandle, Wrhandle, piInheritablePipe, 1024) == 0'i32: + OSError() + + proc fileClose(h: THandle) {.inline.} = + if h > 4: discard CloseHandle(h) + + proc startProcess(command: string, + workingDir: string = "", + args: openarray[string] = [], + env: PStringTable = nil, + options: set[TProcessOption] = {poStdErrToStdOut}): PProcess = + var + SI: TStartupInfo + ProcInfo: TProcessInformation + success: int + hi, ho, he: THandle + new(result) + SI.cb = SizeOf(SI) + if poParentStreams notin options: + SI.dwFlags = STARTF_USESTDHANDLES # STARTF_USESHOWWINDOW or + CreatePipeHandles(SI.hStdInput, HI) + CreatePipeHandles(HO, Si.hStdOutput) + if poStdErrToStdOut in options: + SI.hStdError = SI.hStdOutput + HE = HO + else: + CreatePipeHandles(HE, Si.hStdError) + result.inputHandle = hi + result.outputHandle = ho + result.errorHandle = he + else: + SI.hStdError = GetStdHandle(STD_ERROR_HANDLE) + SI.hStdInput = GetStdHandle(STD_INPUT_HANDLE) + SI.hStdOutput = GetStdHandle(STD_OUTPUT_HANDLE) + result.inputHandle = si.hStdInput + result.outputHandle = si.hStdOutput + result.errorHandle = si.hStdError + + var cmdl: cstring + if false: # poUseShell in options: + cmdl = buildCommandLine(getEnv("COMSPEC"), @["/c", command] & args) + else: + cmdl = buildCommandLine(command, args) + var wd: cstring = nil + var e: cstring = nil + if len(workingDir) > 0: wd = workingDir + if env != nil: e = buildEnv(env) + if poEchoCmd in options: echo($cmdl) + success = winlean.CreateProcess(nil, + cmdl, nil, nil, 1, NORMAL_PRIORITY_CLASS, e, wd, SI, ProcInfo) + + if poParentStreams notin options: + FileClose(si.hStdInput) + FileClose(si.hStdOutput) + if poStdErrToStdOut notin options: + FileClose(si.hStdError) + + if e != nil: dealloc(e) + dealloc(cmdl) + if success == 0: OSError() + # Close the handle now so anyone waiting is woken: + discard closeHandle(procInfo.hThread) + result.FProcessHandle = procInfo.hProcess + result.id = procInfo.dwProcessID + + proc suspend(p: PProcess) = + discard SuspendThread(p.FProcessHandle) + + proc resume(p: PProcess) = + discard ResumeThread(p.FProcessHandle) + + proc running(p: PProcess): bool = + var x = waitForSingleObject(p.FProcessHandle, 50) + return x == WAIT_TIMEOUT + + proc terminate(p: PProcess) = + if running(p): + discard TerminateProcess(p.FProcessHandle, 0) + + proc waitForExit(p: PProcess): int = + discard WaitForSingleObject(p.FProcessHandle, Infinite) + var res: int32 + discard GetExitCodeProcess(p.FProcessHandle, res) + result = res + discard CloseHandle(p.FProcessHandle) + + proc inputStream(p: PProcess): PStream = + result = newFileHandleStream(p.inputHandle) + + proc outputStream(p: PProcess): PStream = + result = newFileHandleStream(p.outputHandle) + + proc errorStream(p: PProcess): PStream = + result = newFileHandleStream(p.errorHandle) + + proc execCmd(command: string): int = + var + SI: TStartupInfo + ProcInfo: TProcessInformation + process: THandle + L: int32 + SI.cb = SizeOf(SI) + SI.hStdError = GetStdHandle(STD_ERROR_HANDLE) + SI.hStdInput = GetStdHandle(STD_INPUT_HANDLE) + SI.hStdOutput = GetStdHandle(STD_OUTPUT_HANDLE) + if winlean.CreateProcess(nil, command, nil, nil, 0, + NORMAL_PRIORITY_CLASS, nil, nil, SI, ProcInfo) == 0: + OSError() + else: + Process = ProcInfo.hProcess + discard CloseHandle(ProcInfo.hThread) + if WaitForSingleObject(Process, INFINITE) != -1: + discard GetExitCodeProcess(Process, L) + result = int(L) + else: + result = -1 + discard CloseHandle(Process) + +else: + const + readIdx = 0 + writeIdx = 1 + + proc addCmdArgs(command: string, args: openarray[string]): string = + result = quoteIfContainsWhite(command) + for i in 0 .. high(args): + add(result, " ") + add(result, quoteIfContainsWhite(args[i])) + + proc toCStringArray(b, a: openarray[string]): cstringArray = + result = cast[cstringArray](alloc0((a.len + b.len + 1) * sizeof(cstring))) + for i in 0..high(b): + result[i] = cast[cstring](alloc(b[i].len+1)) + copyMem(result[i], cstring(b[i]), b[i].len+1) + for i in 0..high(a): + result[i+b.len] = cast[cstring](alloc(a[i].len+1)) + copyMem(result[i+b.len], cstring(a[i]), a[i].len+1) + + proc ToCStringArray(t: PStringTable): cstringArray = + result = cast[cstringArray](alloc0((t.len + 1) * sizeof(cstring))) + var i = 0 + for key, val in pairs(t): + var x = key & "=" & val + result[i] = cast[cstring](alloc(x.len+1)) + copyMem(result[i], addr(x[0]), x.len+1) + inc(i) + + proc startProcess(command: string, + workingDir: string = "", + args: openarray[string] = [], + env: PStringTable = nil, + options: set[TProcessOption] = {poStdErrToStdOut}): PProcess = + var + p_stdin, p_stdout, p_stderr: array [0..1, cint] + new(result) + result.exitCode = 3 # for ``waitForExit`` + if pipe(p_stdin) != 0'i32 or pipe(p_stdout) != 0'i32: + OSError("failed to create a pipe") + var Pid = fork() + if Pid < 0: + OSError("failed to fork process") + + if pid == 0: + ## child process: + discard close(p_stdin[writeIdx]) + if dup2(p_stdin[readIdx], readIdx) < 0: OSError() + discard close(p_stdout[readIdx]) + if dup2(p_stdout[writeIdx], writeIdx) < 0: OSError() + if poStdErrToStdOut in options: + if dup2(p_stdout[writeIdx], 2) < 0: OSError() + else: + if pipe(p_stderr) != 0'i32: OSError("failed to create a pipe") + discard close(p_stderr[readIdx]) + if dup2(p_stderr[writeIdx], 2) < 0: OSError() + + if workingDir.len > 0: + os.setCurrentDir(workingDir) + if poUseShell notin options: + var a = toCStringArray([extractFilename(command)], args) + if env == nil: + discard execv(command, a) + else: + discard execve(command, a, ToCStringArray(env)) + else: + var x = addCmdArgs(command, args) + var a = toCStringArray(["sh", "-c"], [x]) + if env == nil: + discard execv("/bin/sh", a) + else: + discard execve("/bin/sh", a, ToCStringArray(env)) + # too risky to raise an exception here: + quit("execve call failed: " & $strerror(errno)) + # Parent process. Copy process information. + if poEchoCmd in options: + echo(command & " " & join(args, " ")) + result.id = pid + + result.inputHandle = p_stdin[writeIdx] + result.outputHandle = p_stdout[readIdx] + if poStdErrToStdOut in options: + result.errorHandle = result.outputHandle + else: + result.errorHandle = p_stderr[readIdx] + discard close(p_stderr[writeIdx]) + discard close(p_stdin[readIdx]) + discard close(p_stdout[writeIdx]) + + proc suspend(p: PProcess) = + discard kill(p.id, SIGSTOP) + + proc resume(p: PProcess) = + discard kill(p.id, SIGCONT) + + proc running(p: PProcess): bool = + result = waitPid(p.id, p.exitCode, WNOHANG) == int(p.id) + + proc terminate(p: PProcess) = + if kill(p.id, SIGTERM) == 0'i32: + if running(p): discard kill(p.id, SIGKILL) + + proc waitForExit(p: PProcess): int = + #if waitPid(p.id, p.exitCode, 0) == int(p.id): + # ``waitPid`` fails if the process is not running anymore. But then + # ``running`` probably set ``p.exitCode`` for us. Since ``p.exitCode`` is + # initialized with 3, wrong success exit codes are prevented. + var oldExitCode = p.exitCode + if waitPid(p.id, p.exitCode, 0) < 0: + # failed, so restore old exitCode + p.exitCode = oldExitCode + result = int(p.exitCode) + + proc inputStream(p: PProcess): PStream = + var f: TFile + if not open(f, p.inputHandle, fmWrite): OSError() + result = newFileStream(f) + + proc outputStream(p: PProcess): PStream = + var f: TFile + if not open(f, p.outputHandle, fmRead): OSError() + result = newFileStream(f) + + proc errorStream(p: PProcess): PStream = + var f: TFile + if not open(f, p.errorHandle, fmRead): OSError() + result = newFileStream(f) + + proc csystem(cmd: cstring): cint {.nodecl, importc: "system".} + + proc execCmd(command: string): int = + result = csystem(command) + +when isMainModule: + var x = execProcess("gcc -v") + echo "ECHO ", x diff --git a/nimlib/pure/parsecfg.nim b/nimlib/pure/parsecfg.nim new file mode 100755 index 000000000..c26dab099 --- /dev/null +++ b/nimlib/pure/parsecfg.nim @@ -0,0 +1,352 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2008 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## The ``parsecfg`` module implements a high performance configuration file +## parser. The configuration file's syntax is similar to the Windows ``.ini`` +## format, but much more powerful, as it is not a line based parser. String +## literals, raw string literals and triple quoted string literals are supported +## as in the Nimrod programming language. + +## This is an example of how a configuration file may look like: +## +## .. include:: doc/mytest.cfg +## :literal: +## The file ``tests/tparscfg.nim`` demonstrates how to use the +## configuration file parser: +## +## .. code-block:: nimrod +## :file: tests/tparscfg.nim + + +import + hashes, strutils, lexbase, streams + +type + TCfgEventKind* = enum ## enumation of all events that may occur when parsing + cfgEof, ## end of file reached + cfgSectionStart, ## a ``[section]`` has been parsed + cfgKeyValuePair, ## a ``key=value`` pair has been detected + cfgOption, ## a ``--key=value`` command line option + cfgError ## an error ocurred during parsing + + TCfgEvent* = object of TObject ## describes a parsing event + case kind*: TCfgEventKind ## the kind of the event + of cfgEof: nil + of cfgSectionStart: + section*: string ## `section` contains the name of the + ## parsed section start (syntax: ``[section]``) + of cfgKeyValuePair, cfgOption: + key*, value*: string ## contains the (key, value) pair if an option + ## of the form ``--key: value`` or an ordinary + ## ``key= value`` pair has been parsed. + ## ``value==""`` if it was not specified in the + ## configuration file. + of cfgError: ## the parser encountered an error: `msg` + msg*: string ## contains the error message. No exceptions + ## are thrown if a parse error occurs. + + TTokKind = enum + tkInvalid, tkEof, + tkSymbol, tkEquals, tkColon, tkBracketLe, tkBracketRi, tkDashDash + TToken {.final.} = object # a token + kind: TTokKind # the type of the token + literal: string # the parsed (string) literal + + TParserState = enum + startState # , commaState # not yet used + TCfgParser* = object of TBaseLexer ## the parser object. + tok: TToken + state: TParserState + filename: string + +proc open*(c: var TCfgParser, input: PStream, filename: string) + ## initializes the parser with an input stream. `Filename` is only used + ## for nice error messages. + +proc close*(c: var TCfgParser) + ## closes the parser `c` and its associated input stream. + +proc next*(c: var TCfgParser): TCfgEvent + ## retrieves the first/next event. This controls the parser. + +proc getColumn*(c: TCfgParser): int + ## get the current column the parser has arrived at. + +proc getLine*(c: TCfgParser): int + ## get the current line the parser has arrived at. + +proc getFilename*(c: TCfgParser): string + ## get the filename of the file that the parser processes. + +proc errorStr*(c: TCfgParser, msg: string): string + ## returns a properly formated error message containing current line and + ## column information. + + +# implementation + +const + SymChars: TCharSet = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\x80'..'\xFF'} + +proc rawGetTok(c: var TCfgParser, tok: var TToken) +proc open(c: var TCfgParser, input: PStream, filename: string) = + lexbase.open(c, input) + c.filename = filename + c.state = startState + c.tok.kind = tkInvalid + c.tok.literal = "" + rawGetTok(c, c.tok) + +proc close(c: var TCfgParser) = + lexbase.close(c) + +proc getColumn(c: TCfgParser): int = + result = getColNumber(c, c.bufPos) + +proc getLine(c: TCfgParser): int = + result = c.linenumber + +proc getFilename(c: TCfgParser): string = + result = c.filename + +proc handleHexChar(c: var TCfgParser, xi: var int) = + case c.buf[c.bufpos] + of '0'..'9': + xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0')) + inc(c.bufpos) + of 'a'..'f': + xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10) + inc(c.bufpos) + of 'A'..'F': + xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10) + inc(c.bufpos) + else: + nil + +proc handleDecChars(c: var TCfgParser, xi: var int) = + while c.buf[c.bufpos] in {'0'..'9'}: + xi = (xi * 10) + (ord(c.buf[c.bufpos]) - ord('0')) + inc(c.bufpos) + +proc getEscapedChar(c: var TCfgParser, tok: var TToken) = + inc(c.bufpos) # skip '\' + case c.buf[c.bufpos] + of 'n', 'N': + add(tok.literal, "\n") + Inc(c.bufpos) + of 'r', 'R', 'c', 'C': + add(tok.literal, '\c') + Inc(c.bufpos) + of 'l', 'L': + add(tok.literal, '\L') + Inc(c.bufpos) + of 'f', 'F': + add(tok.literal, '\f') + inc(c.bufpos) + of 'e', 'E': + add(tok.literal, '\e') + Inc(c.bufpos) + of 'a', 'A': + add(tok.literal, '\a') + Inc(c.bufpos) + of 'b', 'B': + add(tok.literal, '\b') + Inc(c.bufpos) + of 'v', 'V': + add(tok.literal, '\v') + Inc(c.bufpos) + of 't', 'T': + add(tok.literal, '\t') + Inc(c.bufpos) + of '\'', '\"': + add(tok.literal, c.buf[c.bufpos]) + Inc(c.bufpos) + of '\\': + add(tok.literal, '\\') + Inc(c.bufpos) + of 'x', 'X': + inc(c.bufpos) + var xi = 0 + handleHexChar(c, xi) + handleHexChar(c, xi) + add(tok.literal, Chr(xi)) + of '0'..'9': + var xi = 0 + handleDecChars(c, xi) + if (xi <= 255): add(tok.literal, Chr(xi)) + else: tok.kind = tkInvalid + else: tok.kind = tkInvalid + +proc HandleCRLF(c: var TCfgParser, pos: int): int = + case c.buf[pos] + of '\c': result = lexbase.HandleCR(c, pos) + of '\L': result = lexbase.HandleLF(c, pos) + else: result = pos + +proc getString(c: var TCfgParser, tok: var TToken, rawMode: bool) = + var pos = c.bufPos + 1 # skip " + var buf = c.buf # put `buf` in a register + tok.kind = tkSymbol + if (buf[pos] == '\"') and (buf[pos + 1] == '\"'): + # long string literal: + inc(pos, 2) # skip "" + # skip leading newline: + pos = HandleCRLF(c, pos) + buf = c.buf + while true: + case buf[pos] + of '\"': + if (buf[pos + 1] == '\"') and (buf[pos + 2] == '\"'): break + add(tok.literal, '\"') + Inc(pos) + of '\c', '\L': + pos = HandleCRLF(c, pos) + buf = c.buf + add(tok.literal, "\n") + of lexbase.EndOfFile: + tok.kind = tkInvalid + break + else: + add(tok.literal, buf[pos]) + Inc(pos) + c.bufpos = pos + 3 # skip the three """ + else: + # ordinary string literal + while true: + var ch = buf[pos] + if ch == '\"': + inc(pos) # skip '"' + break + if ch in {'\c', '\L', lexbase.EndOfFile}: + tok.kind = tkInvalid + break + if (ch == '\\') and not rawMode: + c.bufPos = pos + getEscapedChar(c, tok) + pos = c.bufPos + else: + add(tok.literal, ch) + Inc(pos) + c.bufpos = pos + +proc getSymbol(c: var TCfgParser, tok: var TToken) = + var pos = c.bufpos + var buf = c.buf + while true: + add(tok.literal, buf[pos]) + Inc(pos) + if not (buf[pos] in SymChars): break + c.bufpos = pos + tok.kind = tkSymbol + +proc skip(c: var TCfgParser) = + var pos = c.bufpos + var buf = c.buf + while true: + case buf[pos] + of ' ', '\t': + Inc(pos) + of '#', ';': + while not (buf[pos] in {'\c', '\L', lexbase.EndOfFile}): inc(pos) + of '\c', '\L': + pos = HandleCRLF(c, pos) + buf = c.buf + else: + break # EndOfFile also leaves the loop + c.bufpos = pos + +proc rawGetTok(c: var TCfgParser, tok: var TToken) = + tok.kind = tkInvalid + setlen(tok.literal, 0) + skip(c) + case c.buf[c.bufpos] + of '=': + tok.kind = tkEquals + inc(c.bufpos) + tok.literal = "=" + of '-': + inc(c.bufPos) + if c.buf[c.bufPos] == '-': inc(c.bufPos) + tok.kind = tkDashDash + tok.literal = "--" + of ':': + tok.kind = tkColon + inc(c.bufpos) + tok.literal = ":" + of 'r', 'R': + if c.buf[c.bufPos + 1] == '\"': + Inc(c.bufPos) + getString(c, tok, true) + else: + getSymbol(c, tok) + of '[': + tok.kind = tkBracketLe + inc(c.bufpos) + tok.literal = "]" + of ']': + tok.kind = tkBracketRi + Inc(c.bufpos) + tok.literal = "]" + of '\"': + getString(c, tok, false) + of lexbase.EndOfFile: + tok.kind = tkEof + tok.literal = "[EOF]" + else: getSymbol(c, tok) + +proc errorStr(c: TCfgParser, msg: string): string = + result = `%`("$1($2, $3) Error: $4", + [c.filename, $getLine(c), $getColumn(c), msg]) + +proc getKeyValPair(c: var TCfgParser, kind: TCfgEventKind): TCfgEvent = + if c.tok.kind == tkSymbol: + result.kind = kind + result.key = c.tok.literal + result.value = "" + rawGetTok(c, c.tok) + if c.tok.kind in {tkEquals, tkColon}: + rawGetTok(c, c.tok) + if c.tok.kind == tkSymbol: + result.value = c.tok.literal + else: + result.kind = cfgError + result.msg = errorStr(c, "symbol expected, but found: " & c.tok.literal) + rawGetTok(c, c.tok) + else: + result.kind = cfgError + result.msg = errorStr(c, "symbol expected, but found: " & c.tok.literal) + rawGetTok(c, c.tok) + +proc next(c: var TCfgParser): TCfgEvent = + case c.tok.kind + of tkEof: + result.kind = cfgEof + of tkDashDash: + rawGetTok(c, c.tok) + result = getKeyValPair(c, cfgOption) + of tkSymbol: + result = getKeyValPair(c, cfgKeyValuePair) + of tkBracketLe: + rawGetTok(c, c.tok) + if c.tok.kind == tkSymbol: + result.kind = cfgSectionStart + result.section = c.tok.literal + else: + result.kind = cfgError + result.msg = errorStr(c, "symbol expected, but found: " & c.tok.literal) + rawGetTok(c, c.tok) + if c.tok.kind == tkBracketRi: + rawGetTok(c, c.tok) + else: + result.kind = cfgError + result.msg = errorStr(c, "\']\' expected, but found: " & c.tok.literal) + of tkInvalid, tkEquals, tkColon, tkBracketRi: + result.kind = cfgError + result.msg = errorStr(c, "invalid token: " & c.tok.literal) + rawGetTok(c, c.tok) diff --git a/nimlib/pure/parsecsv.nim b/nimlib/pure/parsecsv.nim new file mode 100755 index 000000000..5970f2090 --- /dev/null +++ b/nimlib/pure/parsecsv.nim @@ -0,0 +1,178 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a simple high performance `CSV`:idx: +## (`comma separated value`:idx:) parser. +## +## Example: How to use the parser +## ============================== +## +## .. code-block:: nimrod +## import os, parsecsv, streams +## var s = newFileStream(ParamStr(1), fmRead) +## if s == nil: quit("cannot open the file" & ParamStr(1)) +## var x: TCsvParser +## open(x, s, ParamStr(1)) +## while readRow(x): +## Echo "new row: " +## for val in items(x.row): +## Echo "##", val, "##" +## close(x) +## + +import + lexbase, streams + +type + TCsvRow* = seq[string] ## a row in a CSV file + TCsvParser* = object of TBaseLexer ## the parser object. + row*: TCsvRow ## the current row + filename: string + sep, quote, esc: char + skipWhite: bool + currRow: int + + EInvalidCsv* = object of EIO ## exception that is raised if + ## a parsing error occurs + +proc raiseEInvalidCsv(filename: string, line, col: int, + msg: string) {.noreturn.} = + var e: ref EInvalidCsv + new(e) + e.msg = filename & "(" & $line & ", " & $col & ") Error: " & msg + raise e + +proc error(my: TCsvParser, pos: int, msg: string) = + raiseEInvalidCsv(my.filename, my.LineNumber, getColNumber(my, pos), msg) + +proc open*(my: var TCsvParser, input: PStream, filename: string, + separator = ',', quote = '"', escape = '\0', + skipInitialSpace = false) = + ## initializes the parser with an input stream. `Filename` is only used + ## for nice error messages. The parser's behaviour can be controlled by + ## the diverse optional parameters: + ## - `separator`: character used to separate fields + ## - `quote`: Used to quote fields containing special characters like + ## `separator`, `quote` or new-line characters. '\0' disables the parsing + ## of quotes. + ## - `escape`: removes any special meaning from the following character; + ## '\0' disables escaping; if escaping is disabled and `quote` is not '\0', + ## two `quote` characters are parsed one literal `quote` character. + ## - `skipInitialSpace`: If true, whitespace immediately following the + ## `separator` is ignored. + lexbase.open(my, input) + my.filename = filename + my.sep = separator + my.quote = quote + my.esc = escape + my.skipWhite = skipInitialSpace + my.row = @[] + my.currRow = 0 + +proc parseField(my: var TCsvParser, a: var string) = + var pos = my.bufpos + var buf = my.buf + if my.skipWhite: + while buf[pos] in {' ', '\t'}: inc(pos) + setLen(a, 0) # reuse memory + if buf[pos] == my.quote and my.quote != '\0': + inc(pos) + while true: + var c = buf[pos] + if c == '\0': + my.bufpos = pos # can continue after exception? + error(my, pos, my.quote & " expected") + break + elif c == my.quote: + if my.esc == '\0' and buf[pos+1] == my.quote: + add(a, my.quote) + inc(pos, 2) + else: + inc(pos) + break + elif c == my.esc: + add(a, buf[pos+1]) + inc(pos, 2) + else: + case c + of '\c': + pos = handleCR(my, pos) + buf = my.buf + add(a, "\n") + of '\l': + pos = handleLF(my, pos) + buf = my.buf + add(a, "\n") + else: + add(a, c) + inc(pos) + else: + while true: + var c = buf[pos] + if c == my.sep: break + if c in {'\c', '\l', '\0'}: break + add(a, c) + inc(pos) + my.bufpos = pos + +proc processedRows*(my: var TCsvParser): int = + ## returns number of the processed rows + return my.currRow + +proc readRow*(my: var TCsvParser, columns = 0): bool = + ## reads the next row; if `columns` > 0, it expects the row to have + ## exactly this many columns. Returns false if the end of the file + ## has been encountered else true. + var col = 0 # current column + var oldpos = my.bufpos + while my.buf[my.bufpos] != '\0': + var oldlen = my.row.len + if oldlen < col+1: + setLen(my.row, col+1) + my.row[col] = "" + parseField(my, my.row[col]) + inc(col) + if my.buf[my.bufpos] == my.sep: + inc(my.bufpos) + else: + case my.buf[my.bufpos] + of '\c', '\l': + # skip empty lines: + while true: + case my.buf[my.bufpos] + of '\c': my.bufpos = handleCR(my, my.bufpos) + of '\l': my.bufpos = handleLF(my, my.bufpos) + else: break + of '\0': nil + else: error(my, my.bufpos, my.sep & " expected") + break + + setlen(my.row, col) + result = col > 0 + if result and col != columns and columns > 0: + error(my, oldpos+1, $columns & " columns expected, but found " & + $col & " columns") + inc(my.currRow) + +proc close*(my: var TCsvParser) {.inline.} = + ## closes the parser `my` and its associated input stream. + lexbase.close(my) + +when isMainModule: + import os + var s = newFileStream(ParamStr(1), fmRead) + if s == nil: quit("cannot open the file" & ParamStr(1)) + var x: TCsvParser + open(x, s, ParamStr(1)) + while readRow(x): + Echo "new row: " + for val in items(x.row): + Echo "##", val, "##" + close(x) + diff --git a/nimlib/pure/parseopt.nim b/nimlib/pure/parseopt.nim new file mode 100755 index 000000000..8f4be98f4 --- /dev/null +++ b/nimlib/pure/parseopt.nim @@ -0,0 +1,152 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module provides the standard Nimrod command line parser. +## It supports one convenience iterator over all command line options and some +## lower-level features. + +{.push debugger: off.} + +import + os, strutils + +type + TCmdLineKind* = enum ## the detected command line token + cmdEnd, ## end of command line reached + cmdArgument, ## argument detected + cmdLongoption, ## a long option ``--option`` detected + cmdShortOption ## a short option ``-c`` detected + TOptParser* = + object of TObject ## this object implements the command line parser + cmd: string + pos: int + inShortState: bool + kind*: TCmdLineKind ## the dected command line token + key*, val*: string ## key and value pair; ``key`` is the option + ## or the argument, ``value`` is not "" if + ## the option was given a value + +proc initOptParser*(cmdline = ""): TOptParser = + ## inits the option parser. If ``cmdline == ""``, the real command line + ## (as provided by the ``OS`` module) is taken. + result.pos = 0 + result.inShortState = false + if cmdline != "": + result.cmd = cmdline + else: + result.cmd = "" + for i in countup(1, ParamCount()): + result.cmd = result.cmd & quoteIfContainsWhite(paramStr(i)) & ' ' + result.kind = cmdEnd + result.key = "" + result.val = "" + +proc init*(cmdline: string = ""): TOptParser {.deprecated.} = + ## **Deprecated since version 0.8.2**: Use `initOptParser` instead. + result = initOptParser(cmdline) + +proc parseWord(s: string, i: int, w: var string, + delim: TCharSet = {'\x09', ' ', '\0'}): int = + result = i + if s[result] == '\"': + inc(result) + while not (s[result] in {'\0', '\"'}): + add(w, s[result]) + inc(result) + if s[result] == '\"': inc(result) + else: + while not (s[result] in delim): + add(w, s[result]) + inc(result) + +proc handleShortOption(p: var TOptParser) = + var i = p.pos + p.kind = cmdShortOption + add(p.key, p.cmd[i]) + inc(i) + p.inShortState = true + while p.cmd[i] in {'\x09', ' '}: + inc(i) + p.inShortState = false + if p.cmd[i] in {':', '='}: + inc(i) + p.inShortState = false + while p.cmd[i] in {'\x09', ' '}: inc(i) + i = parseWord(p.cmd, i, p.val) + if p.cmd[i] == '\0': p.inShortState = false + p.pos = i + +proc next*(p: var TOptParser) = + ## parses the first or next option; ``p.kind`` describes what token has been + ## parsed. ``p.key`` and ``p.val`` are set accordingly. + var i = p.pos + while p.cmd[i] in {'\x09', ' '}: inc(i) + p.pos = i + setlen(p.key, 0) + setlen(p.val, 0) + if p.inShortState: + handleShortOption(p) + return + case p.cmd[i] + of '\0': + p.kind = cmdEnd + of '-': + inc(i) + if p.cmd[i] == '-': + p.kind = cmdLongOption + inc(i) + i = parseWord(p.cmd, i, p.key, {'\0', ' ', '\x09', ':', '='}) + while p.cmd[i] in {'\x09', ' '}: inc(i) + if p.cmd[i] in {':', '='}: + inc(i) + while p.cmd[i] in {'\x09', ' '}: inc(i) + p.pos = parseWord(p.cmd, i, p.val) + else: + p.pos = i + else: + p.pos = i + handleShortOption(p) + else: + p.kind = cmdArgument + p.pos = parseWord(p.cmd, i, p.key) + +proc cmdLineRest*(p: TOptParser): string = + ## retrieves the rest of the command line that has not been parsed yet. + result = strip(copy(p.cmd, p.pos, len(p.cmd) - 1)) + +proc getRestOfCommandLine*(p: TOptParser): string {.deprecated.} = + ## **Deprecated since version 0.8.2**: Use `cmdLineRest` instead. + result = cmdLineRest(p) + +iterator getopt*(): tuple[kind: TCmdLineKind, key, val: string] = + ## This is an convenience iterator for iterating over the command line. + ## This uses the TOptParser object. Example: + ## + ## .. code-block:: nimrod + ## var + ## filename = "" + ## for kind, key, val in getopt(): + ## case kind + ## of cmdArgument: + ## filename = key + ## of cmdLongOption, cmdShortOption: + ## case key + ## of "help", "h": writeHelp() + ## of "version", "v": writeVersion() + ## of cmdEnd: assert(false) # cannot happen + ## if filename == "": + ## # no filename has been given, so we show the help: + ## writeHelp() + var p = initOptParser() + while true: + next(p) + if p.kind == cmdEnd: break + yield (p.kind, p.key, p.val) + +{.pop.} diff --git a/nimlib/pure/parsesql.nim b/nimlib/pure/parsesql.nim new file mode 100755 index 000000000..2109c273a --- /dev/null +++ b/nimlib/pure/parsesql.nim @@ -0,0 +1,1345 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## The ``parsesql`` module implements a high performance SQL file +## parser. It parses PostgreSQL syntax and the SQL ANSI standard. + +import + hashes, strutils, lexbase, streams + +# ------------------- scanner ------------------------------------------------- + +type + TTokKind = enum ## enumeration of all SQL tokens + tkInvalid, ## invalid token + tkEof, ## end of file reached + tkIdentifier, ## abc + tkQuotedIdentifier, ## "abc" + tkStringConstant, ## 'abc' + tkEscapeConstant, ## e'abc' + tkDollarQuotedConstant, ## $tag$abc$tag$ + tkBitStringConstant, ## B'00011' + tkHexStringConstant, ## x'00011' + tkInteger, + tkNumeric, + tkOperator, ## + - * / < > = ~ ! @ # % ^ & | ` ? + tkSemicolon, ## ';' + tkColon, ## ':' + tkComma, ## ',' + tkParLe, ## '(' + tkParRi, ## ')' + tkBracketLe, ## '[' + tkBracketRi, ## ']' + tkDot ## '.' + + TToken {.final.} = object # a token + kind: TTokKind # the type of the token + literal: string # the parsed (string) literal + + TSqlLexer* = object of TBaseLexer ## the parser object. + filename: string + +const + tokKindToStr: array[TTokKind, string] = [ + "invalid", "[EOF]", "identifier", "quoted identifier", "string constant", + "escape string constant", "dollar quoted constant", "bit string constant", + "hex string constant", "integer constant", "numeric constant", "operator", + ";", ":", ",", "(", ")", "[", "]", "." + ] + +proc open(L: var TSqlLexer, input: PStream, filename: string) = + lexbase.open(L, input) + L.filename = filename + +proc close(L: var TSqlLexer) = + lexbase.close(L) + +proc getColumn(L: TSqlLexer): int = + ## get the current column the parser has arrived at. + result = getColNumber(L, L.bufPos) + +proc getLine(L: TSqlLexer): int = + result = L.linenumber + +proc handleHexChar(c: var TSqlLexer, xi: var int) = + case c.buf[c.bufpos] + of '0'..'9': + xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0')) + inc(c.bufpos) + of 'a'..'f': + xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10) + inc(c.bufpos) + of 'A'..'F': + xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10) + inc(c.bufpos) + else: + nil + +proc handleOctChar(c: var TSqlLexer, xi: var int) = + if c.buf[c.bufpos] in {'0'..'7'}: + xi = (xi shl 3) or (ord(c.buf[c.bufpos]) - ord('0')) + inc(c.bufpos) + +proc getEscapedChar(c: var TSqlLexer, tok: var TToken) = + inc(c.bufpos) + case c.buf[c.bufpos] + of 'n', 'N': + add(tok.literal, '\L') + Inc(c.bufpos) + of 'r', 'R', 'c', 'C': + add(tok.literal, '\c') + Inc(c.bufpos) + of 'l', 'L': + add(tok.literal, '\L') + Inc(c.bufpos) + of 'f', 'F': + add(tok.literal, '\f') + inc(c.bufpos) + of 'e', 'E': + add(tok.literal, '\e') + Inc(c.bufpos) + of 'a', 'A': + add(tok.literal, '\a') + Inc(c.bufpos) + of 'b', 'B': + add(tok.literal, '\b') + Inc(c.bufpos) + of 'v', 'V': + add(tok.literal, '\v') + Inc(c.bufpos) + of 't', 'T': + add(tok.literal, '\t') + Inc(c.bufpos) + of '\'', '\"': + add(tok.literal, c.buf[c.bufpos]) + Inc(c.bufpos) + of '\\': + add(tok.literal, '\\') + Inc(c.bufpos) + of 'x', 'X': + inc(c.bufpos) + var xi = 0 + handleHexChar(c, xi) + handleHexChar(c, xi) + add(tok.literal, Chr(xi)) + of '0'..'7': + var xi = 0 + handleOctChar(c, xi) + handleOctChar(c, xi) + handleOctChar(c, xi) + if (xi <= 255): add(tok.literal, Chr(xi)) + else: tok.kind = tkInvalid + else: tok.kind = tkInvalid + +proc HandleCRLF(c: var TSqlLexer, pos: int): int = + case c.buf[pos] + of '\c': result = lexbase.HandleCR(c, pos) + of '\L': result = lexbase.HandleLF(c, pos) + else: result = pos + +proc skip(c: var TSqlLexer) = + var pos = c.bufpos + var buf = c.buf + var nested = 0 + while true: + case buf[pos] + of ' ', '\t': + Inc(pos) + of '-': + if buf[pos+1] == '-': + while not (buf[pos] in {'\c', '\L', lexbase.EndOfFile}): inc(pos) + else: + break + of '/': + if buf[pos+1] == '*': + inc(pos,2) + while true: + case buf[pos] + of '\0': break + of '\c', '\L': + pos = HandleCRLF(c, pos) + buf = c.buf + of '*': + if buf[pos+1] == '/': + inc(pos, 2) + if nested <= 0: break + dec(nested) + else: + inc(pos) + of '/': + if buf[pos+1] == '*': + inc(pos, 2) + inc(nested) + else: + inc(pos) + else: inc(pos) + else: break + of '\c', '\L': + pos = HandleCRLF(c, pos) + buf = c.buf + else: + break # EndOfFile also leaves the loop + c.bufpos = pos + +proc getString(c: var TSqlLexer, tok: var TToken, kind: TTokKind) = + var pos = c.bufPos + 1 + var buf = c.buf + tok.kind = kind + block parseLoop: + while true: + while true: + var ch = buf[pos] + if ch == '\'': + if buf[pos+1] == '\'': + inc(pos, 2) + add(tok.literal, '\'') + else: + inc(pos) + break + elif ch in {'\c', '\L', lexbase.EndOfFile}: + tok.kind = tkInvalid + break parseLoop + elif (ch == '\\') and kind == tkEscapeConstant: + c.bufPos = pos + getEscapedChar(c, tok) + pos = c.bufPos + else: + add(tok.literal, ch) + Inc(pos) + c.bufpos = pos + var line = c.linenumber + skip(c) + if c.linenumber > line: + # a new line whitespace has been parsed, so we check if the string + # continues after the whitespace: + buf = c.buf # may have been reallocated + pos = c.bufpos + if buf[pos] == '\'': inc(pos) + else: break parseLoop + else: break parseLoop + c.bufpos = pos + +proc getDollarString(c: var TSqlLexer, tok: var TToken) = + var pos = c.bufPos + 1 + var buf = c.buf + tok.kind = tkDollarQuotedConstant + var tag = "$" + while buf[pos] in IdentChars: + add(tag, buf[pos]) + inc(pos) + if buf[pos] == '$': inc(pos) + else: + tok.kind = tkInvalid + return + while true: + case buf[pos] + of '\c', '\L': + pos = HandleCRLF(c, pos) + buf = c.buf + add(tok.literal, "\L") + of '\0': + tok.kind = tkInvalid + break + of '$': + inc(pos) + var tag2 = "$" + while buf[pos] in IdentChars: + add(tag2, buf[pos]) + inc(pos) + if buf[pos] == '$': inc(pos) + if tag2 == tag: break + add(tok.literal, tag2) + add(tok.literal, '$') + else: + add(tok.literal, buf[pos]) + inc(pos) + c.bufpos = pos + +proc getSymbol(c: var TSqlLexer, tok: var TToken) = + var pos = c.bufpos + var buf = c.buf + while true: + add(tok.literal, buf[pos]) + Inc(pos) + if not (buf[pos] in {'a'..'z','A'..'Z','0'..'9','_','$', '\128'..'\255'}): + break + c.bufpos = pos + tok.kind = tkIdentifier + +proc getQuotedIdentifier(c: var TSqlLexer, tok: var TToken) = + var pos = c.bufPos + 1 + var buf = c.buf + tok.kind = tkQuotedIdentifier + while true: + var ch = buf[pos] + if ch == '\"': + if buf[pos+1] == '\"': + inc(pos, 2) + add(tok.literal, '\"') + else: + inc(pos) + break + elif ch in {'\c', '\L', lexbase.EndOfFile}: + tok.kind = tkInvalid + break + else: + add(tok.literal, ch) + Inc(pos) + c.bufpos = pos + +proc getBitHexString(c: var TSqlLexer, tok: var TToken, validChars: TCharSet) = + var pos = c.bufPos + 1 + var buf = c.buf + block parseLoop: + while true: + while true: + var ch = buf[pos] + if ch in validChars: + add(tok.literal, ch) + Inc(pos) + elif ch == '\'': + inc(pos) + break + else: + tok.kind = tkInvalid + break parseLoop + c.bufpos = pos + var line = c.linenumber + skip(c) + if c.linenumber > line: + # a new line whitespace has been parsed, so we check if the string + # continues after the whitespace: + buf = c.buf # may have been reallocated + pos = c.bufpos + if buf[pos] == '\'': inc(pos) + else: break parseLoop + else: break parseLoop + c.bufpos = pos + +proc getNumeric(c: var TSqlLexer, tok: var TToken) = + tok.kind = tkInteger + var pos = c.bufPos + var buf = c.buf + while buf[pos] in Digits: + add(tok.literal, buf[pos]) + inc(pos) + if buf[pos] == '.': + tok.kind = tkNumeric + add(tok.literal, buf[pos]) + inc(pos) + while buf[pos] in Digits: + add(tok.literal, buf[pos]) + inc(pos) + if buf[pos] in {'E', 'e'}: + tok.kind = tkNumeric + add(tok.literal, buf[pos]) + inc(pos) + if buf[pos] == '+': + inc(pos) + elif buf[pos] == '-': + add(tok.literal, buf[pos]) + inc(pos) + if buf[pos] in Digits: + while buf[pos] in Digits: + add(tok.literal, buf[pos]) + inc(pos) + else: + tok.kind = tkInvalid + c.bufpos = pos + +proc getOperator(c: var TSqlLexer, tok: var TToken) = + const operators = {'+', '-', '*', '/', '<', '>', '=', '~', '!', '@', '#', '%', + '^', '&', '|', '`', '?'} + tok.kind = tkOperator + var pos = c.bufPos + var buf = c.buf + var trailingPlusMinus = false + while true: + case buf[pos] + of '-': + if buf[pos] == '-': break + if not trailingPlusMinus and buf[pos+1] notin operators and + tok.literal.len > 0: break + of '/': + if buf[pos] == '*': break + of '~', '!', '@', '#', '%', '^', '&', '|', '`', '?': + trailingPlusMinus = true + of '+': + if not trailingPlusMinus and buf[pos+1] notin operators and + tok.literal.len > 0: break + of '*', '<', '>', '=': nil + else: break + add(tok.literal, buf[pos]) + inc(pos) + c.bufpos = pos + +proc getTok(c: var TSqlLexer, tok: var TToken) = + tok.kind = tkInvalid + setlen(tok.literal, 0) + skip(c) + case c.buf[c.bufpos] + of ';': + tok.kind = tkSemiColon + inc(c.bufPos) + add(tok.literal, ';') + of ',': + tok.kind = tkComma + inc(c.bufpos) + add(tok.literal, ',') + of ':': + tok.kind = tkColon + inc(c.bufpos) + add(tok.literal, ':') + of 'e', 'E': + if c.buf[c.bufPos + 1] == '\'': + Inc(c.bufPos) + getString(c, tok, tkEscapeConstant) + else: + getSymbol(c, tok) + of 'b', 'B': + if c.buf[c.bufPos + 1] == '\'': + tok.kind = tkBitStringConstant + getBitHexString(c, tok, {'0'..'1'}) + else: + getSymbol(c, tok) + of 'x', 'X': + if c.buf[c.bufPos + 1] == '\'': + tok.kind = tkHexStringConstant + getBitHexString(c, tok, {'a'..'f','A'..'F','0'..'9'}) + else: + getSymbol(c, tok) + of '$': getDollarString(c, tok) + of '[': + tok.kind = tkBracketLe + inc(c.bufpos) + add(tok.literal, '[') + of ']': + tok.kind = tkBracketRi + Inc(c.bufpos) + add(tok.literal, ']') + of '(': + tok.kind = tkParLe + Inc(c.bufpos) + add(tok.literal, '(') + of ')': + tok.kind = tkParRi + Inc(c.bufpos) + add(tok.literal, ')') + of '.': + if c.buf[c.bufPos + 1] in Digits: + getNumeric(c, tok) + else: + tok.kind = tkDot + inc(c.bufpos) + add(tok.literal, '.') + of '0'..'9': getNumeric(c, tok) + of '\'': getString(c, tok, tkStringConstant) + of '"': getQuotedIdentifier(c, tok) + of lexbase.EndOfFile: + tok.kind = tkEof + tok.literal = "[EOF]" + of 'a', 'c', 'd', 'f'..'w', 'y', 'z', 'A', 'C', 'D', 'F'..'W', 'Y', 'Z', '_', + '\128'..'\255': + getSymbol(c, tok) + of '+', '-', '*', '/', '<', '>', '=', '~', '!', '@', '#', '%', + '^', '&', '|', '`', '?': + getOperator(c, tok) + else: + add(tok.literal, c.buf[c.bufpos]) + inc(c.bufpos) + +proc errorStr(L: TSqlLexer, msg: string): string = + result = "$1($2, $3) Error: $4" % [L.filename, $getLine(L), $getColumn(L), msg] + + +# ----------------------------- parser ---------------------------------------- + +# Operator/Element Associativity Description +# . left table/column name separator +# :: left PostgreSQL-style typecast +# [ ] left array element selection +# - right unary minus +# ^ left exponentiation +# * / % left multiplication, division, modulo +# + - left addition, subtraction +# IS IS TRUE, IS FALSE, IS UNKNOWN, IS NULL +# ISNULL test for null +# NOTNULL test for not null +# (any other) left all other native and user-defined oprs +# IN set membership +# BETWEEN range containment +# OVERLAPS time interval overlap +# LIKE ILIKE SIMILAR string pattern matching +# < > less than, greater than +# = right equality, assignment +# NOT right logical negation +# AND left logical conjunction +# OR left logical disjunction + +type + TSqlNodeKind* = enum ## kind of SQL abstract syntax tree + nkNone, + nkIdent, + nkStringLit, + nkBitStringLit, + nkHexStringLit, + nkIntegerLit, + nkNumericLit, + nkPrimaryKey, + nkForeignKey, + nkNotNull, + + nkStmtList, + nkDot, + nkDotDot, + nkPrefix, + nkInfix, + nkCall, + nkColumnReference, + nkReferences, + nkDefault, + nkCheck, + nkConstraint, + nkUnique, + nkIdentity, + nkColumnDef, ## name, datatype, constraints + nkInsert, + nkUpdate, + nkDelete, + nkSelect, + nkSelectDistinct, + nkSelectColumns, + nkAsgn, + nkFrom, + nkGroup, + nkHaving, + nkOrder, + nkDesc, + nkUnion, + nkIntersect, + nkExcept, + nkColumnList, + nkValueList, + nkWhere, + nkCreateTable, + nkCreateTableIfNotExists, + nkCreateType, + nkCreateTypeIfNotExists, + nkCreateIndex, + nkCreateIndexIfNotExists, + nkEnumDef + +type + EInvalidSql* = object of EBase ## Invalid SQL encountered + PSqlNode* = ref TSqlNode ## an SQL abstract syntax tree node + TSqlNode* = object ## an SQL abstract syntax tree node + case kind*: TSqlNodeKind ## kind of syntax tree + of nkIdent, nkStringLit, nkBitStringLit, nkHexStringLit, + nkIntegerLit, nkNumericLit: + strVal*: string ## AST leaf: the identifier, numeric literal + ## string literal, etc. + else: + sons*: seq[PSqlNode] ## the node's children + + TSqlParser* = object of TSqlLexer ## SQL parser object + tok: TToken + +proc newNode(k: TSqlNodeKind): PSqlNode = + new(result) + result.kind = k + +proc newNode(k: TSqlNodeKind, s: string): PSqlNode = + new(result) + result.kind = k + result.strVal = s + +proc len*(n: PSqlNode): int = + if isNil(n.sons): result = 0 + else: result = n.sons.len + +proc add*(father, n: PSqlNode) = + if isNil(father.sons): father.sons = @[] + add(father.sons, n) + +proc getTok(p: var TSqlParser) = + getTok(p, p.tok) + +proc sqlError(p: TSqlParser, msg: string) = + var e: ref EInvalidSql + new(e) + e.msg = errorStr(p, msg) + raise e + +proc isKeyw(p: TSqlParser, keyw: string): bool = + result = p.tok.kind == tkIdentifier and + cmpIgnoreCase(p.tok.literal, keyw) == 0 + +proc isOpr(p: TSqlParser, opr: string): bool = + result = p.tok.kind == tkOperator and + cmpIgnoreCase(p.tok.literal, opr) == 0 + +proc optKeyw(p: var TSqlParser, keyw: string) = + if p.tok.kind == tkIdentifier and cmpIgnoreCase(p.tok.literal, keyw) == 0: + getTok(p) + +proc expectIdent(p: TSqlParser) = + if p.tok.kind != tkIdentifier and p.tok.kind != tkQuotedIdentifier: + sqlError(p, "identifier expected") + +proc expect(p: TSqlParser, kind: TTokKind) = + if p.tok.kind != kind: + sqlError(p, tokKindToStr[kind] & " expected") + +proc eat(p: var TSqlParser, kind: TTokKind) = + if p.tok.kind == kind: + getTok(p) + else: + sqlError(p, tokKindToStr[kind] & " expected") + +proc eat(p: var TSqlParser, keyw: string) = + if isKeyw(p, keyw): + getTok(p) + else: + sqlError(p, keyw.toUpper() & " expected") + +proc parseDataType(p: var TSqlParser): PSqlNode = + if isKeyw(p, "enum"): + result = newNode(nkEnumDef) + getTok(p) + if p.tok.kind == tkParLe: + getTok(p) + result.add(newNode(nkStringLit, p.tok.literal)) + getTok(p) + while p.tok.kind == tkComma: + getTok(p) + result.add(newNode(nkStringLit, p.tok.literal)) + getTok(p) + eat(p, tkParRi) + else: + expectIdent(p) + result = newNode(nkIdent, p.tok.literal) + getTok(p) + # ignore (12, 13) part: + if p.tok.kind == tkParLe: + getTok(p) + expect(p, tkInteger) + getTok(p) + while p.tok.kind == tkComma: + getTok(p) + expect(p, tkInteger) + getTok(p) + eat(p, tkParRi) + +proc getPrecedence(p: TSqlParser): int = + if isOpr(p, "*") or isOpr(p, "/") or isOpr(p, "%"): + result = 6 + elif isOpr(p, "+") or isOpr(p, "-"): + result = 5 + elif isOpr(p, "=") or isOpr(p, "<") or isOpr(p, ">") or isOpr(p, ">=") or + isOpr(p, "<=") or isOpr(p, "<>") or isOpr(p, "!=") or isKeyw(p, "is") or + isKeyw(p, "like"): + result = 3 + elif isKeyw(p, "and"): + result = 2 + elif isKeyw(p, "or"): + result = 1 + elif p.tok.kind == tkOperator: + # user-defined operator: + result = 0 + else: + result = - 1 + +proc parseExpr(p: var TSqlParser): PSqlNode + +proc identOrLiteral(p: var TSqlParser): PSqlNode = + case p.tok.kind + of tkIdentifier, tkQuotedIdentifier: + result = newNode(nkIdent, p.tok.literal) + getTok(p) + of tkStringConstant, tkEscapeConstant, tkDollarQuotedConstant: + result = newNode(nkStringLit, p.tok.literal) + getTok(p) + of tkBitStringConstant: + result = newNode(nkBitStringLit, p.tok.literal) + getTok(p) + of tkHexStringConstant: + result = newNode(nkHexStringLit, p.tok.literal) + getTok(p) + of tkInteger: + result = newNode(nkIntegerLit, p.tok.literal) + getTok(p) + of tkNumeric: + result = newNode(nkNumericLit, p.tok.literal) + getTok(p) + of tkParLe: + getTok(p) + result = parseExpr(p) + eat(p, tkParRi) + else: + sqlError(p, "expression expected") + getTok(p) # we must consume a token here to prevend endless loops! + +proc primary(p: var TSqlParser): PSqlNode = + if p.tok.kind == tkOperator or isKeyw(p, "not"): + result = newNode(nkPrefix) + result.add(newNode(nkIdent, p.tok.literal)) + getTok(p) + result.add(primary(p)) + return + result = identOrLiteral(p) + while true: + case p.tok.kind + of tkParLe: + var a = result + result = newNode(nkCall) + result.add(a) + getTok(p) + while true: + result.add(parseExpr(p)) + if p.tok.kind == tkComma: getTok(p) + else: break + eat(p, tkParRi) + of tkDot: + getTok(p) + var a = result + if p.tok.kind == tkDot: + getTok(p) + result = newNode(nkDotDot) + else: + result = newNode(nkDot) + result.add(a) + if isOpr(p, "*"): + result.add(newNode(nkIdent, "*")) + elif p.tok.kind in {tkIdentifier, tkQuotedIdentifier}: + result.add(newNode(nkIdent, p.tok.literal)) + else: + sqlError(p, "identifier expected") + getTok(p) + else: break + +proc lowestExprAux(p: var TSqlParser, v: var PSqlNode, limit: int): int = + var + v2, node, opNode: PSqlNode + v = primary(p) # expand while operators have priorities higher than 'limit' + var opPred = getPrecedence(p) + result = opPred + while opPred > limit: + node = newNode(nkInfix) + opNode = newNode(nkIdent, p.tok.literal) + getTok(p) + result = lowestExprAux(p, v2, opPred) + node.add(opNode) + node.add(v) + node.add(v2) + v = node + opPred = getPrecedence(p) + +proc parseExpr(p: var TSqlParser): PSqlNode = + discard lowestExprAux(p, result, - 1) + +proc parseTableName(p: var TSqlParser): PSqlNode = + expectIdent(p) + result = primary(p) + +proc parseColumnReference(p: var TSqlParser): PSqlNode = + result = parseTableName(p) + if p.tok.kind == tkParLe: + getTok(p) + var a = result + result = newNode(nkColumnReference) + result.add(a) + result.add(parseTableName(p)) + while p.tok.kind == tkComma: + getTok(p) + result.add(parseTableName(p)) + eat(p, tkParRi) + +proc parseCheck(p: var TSqlParser): PSqlNode = + getTok(p) + result = newNode(nkCheck) + result.add(parseExpr(p)) + +proc parseConstraint(p: var TSqlParser): PSqlNode = + getTok(p) + result = newNode(nkConstraint) + expectIdent(p) + result.add(newNode(nkIdent, p.tok.literal)) + getTok(p) + eat(p, "check") + result.add(parseExpr(p)) + +proc parseColumnConstraints(p: var TSqlParser, result: PSqlNode) = + while true: + if isKeyw(p, "default"): + getTok(p) + var n = newNode(nkDefault) + n.add(parseExpr(p)) + result.add(n) + elif isKeyw(p, "references"): + getTok(p) + var n = newNode(nkReferences) + n.add(parseColumnReference(p)) + result.add(n) + elif isKeyw(p, "not"): + getTok(p) + eat(p, "null") + result.add(newNode(nkNotNull)) + elif isKeyw(p, "identity"): + getTok(p) + result.add(newNode(nkIdentity)) + elif isKeyw(p, "primary"): + getTok(p) + eat(p, "key") + result.add(newNode(nkPrimaryKey)) + elif isKeyw(p, "check"): + result.add(parseCheck(p)) + elif isKeyw(p, "constraint"): + result.add(parseConstraint(p)) + elif isKeyw(p, "unique"): + result.add(newNode(nkUnique)) + else: + break + +proc parseColumnDef(p: var TSqlParser): PSqlNode = + expectIdent(p) + result = newNode(nkColumnDef) + result.add(newNode(nkIdent, p.tok.literal)) + getTok(p) + result.add(parseDataType(p)) + parseColumnConstraints(p, result) + +proc parseIfNotExists(p: var TSqlParser, k: TSqlNodeKind): PSqlNode = + getTok(p) + if isKeyw(p, "if"): + getTok(p) + eat(p, "not") + eat(p, "exists") + result = newNode(succ(k)) + else: + result = newNode(k) + +proc parseParIdentList(p: var TSqlParser, father: PSqlNode) = + eat(p, tkParLe) + while true: + expectIdent(p) + father.add(newNode(nkIdent, p.tok.literal)) + getTok(p) + if p.tok.kind != tkComma: break + getTok(p) + eat(p, tkParRi) + +proc parseTableConstraint(p: var TSqlParser): PSqlNode = + if isKeyw(p, "primary"): + getTok(p) + eat(p, "key") + result = newNode(nkPrimaryKey) + parseParIdentList(p, result) + elif isKeyw(p, "foreign"): + getTok(p) + eat(p, "key") + result = newNode(nkForeignKey) + parseParIdentList(p, result) + eat(p, "references") + var m = newNode(nkReferences) + m.add(parseColumnReference(p)) + result.add(m) + elif isKeyw(p, "unique"): + getTok(p) + eat(p, "key") + result = newNode(nkUnique) + parseParIdentList(p, result) + elif isKeyw(p, "check"): + result = parseCheck(p) + elif isKeyw(p, "constraint"): + result = parseConstraint(p) + else: + sqlError(p, "column definition expected") + +proc parseTableDef(p: var TSqlParser): PSqlNode = + result = parseIfNotExists(p, nkCreateTable) + expectIdent(p) + result.add(newNode(nkIdent, p.tok.literal)) + getTok(p) + if p.tok.kind == tkParLe: + while true: + getTok(p) + if p.tok.kind == tkIdentifier or p.tok.kind == tkQuotedIdentifier: + result.add(parseColumnDef(p)) + else: + result.add(parseTableConstraint(p)) + if p.tok.kind != tkComma: break + eat(p, tkParRi) + +proc parseTypeDef(p: var TSqlParser): PSqlNode = + result = parseIfNotExists(p, nkCreateType) + expectIdent(p) + result.add(newNode(nkIdent, p.tok.literal)) + getTok(p) + eat(p, "as") + result.add(parseDataType(p)) + +proc parseWhere(p: var TSqlParser): PSqlNode = + getTok(p) + result = newNode(nkWhere) + result.add(parseExpr(p)) + +proc parseIndexDef(p: var TSqlParser): PSqlNode = + result = parseIfNotExists(p, nkCreateIndex) + if isKeyw(p, "primary"): + getTok(p) + eat(p, "key") + result.add(newNode(nkPrimaryKey)) + else: + expectIdent(p) + result.add(newNode(nkIdent, p.tok.literal)) + getTok(p) + eat(p, "on") + expectIdent(p) + result.add(newNode(nkIdent, p.tok.literal)) + getTok(p) + eat(p, tkParLe) + expectIdent(p) + result.add(newNode(nkIdent, p.tok.literal)) + getTok(p) + while p.tok.kind == tkComma: + getTok(p) + expectIdent(p) + result.add(newNode(nkIdent, p.tok.literal)) + getTok(p) + eat(p, tkParRi) + +proc parseInsert(p: var TSqlParser): PSqlNode = + getTok(p) + eat(p, "into") + expectIdent(p) + result = newNode(nkInsert) + result.add(newNode(nkIdent, p.tok.literal)) + getTok(p) + if p.tok.kind == tkParLe: + var n = newNode(nkColumnList) + parseParIdentList(p, n) + else: + result.add(nil) + if isKeyw(p, "default"): + getTok(p) + eat(p, "values") + result.add(newNode(nkDefault)) + else: + eat(p, "values") + eat(p, tkParLe) + var n = newNode(nkValueList) + while true: + n.add(parseExpr(p)) + if p.tok.kind != tkComma: break + getTok(p) + result.add(n) + eat(p, tkParRi) + +proc parseUpdate(p: var TSqlParser): PSqlNode = + getTok(p) + result = newNode(nkUpdate) + result.add(primary(p)) + eat(p, "set") + while true: + var a = newNode(nkAsgn) + expectIdent(p) + a.add(newNode(nkIdent, p.tok.literal)) + getTok(p) + if isOpr(p, "="): getTok(p) + else: sqlError(p, "= expected") + a.add(parseExpr(p)) + result.add(a) + if p.tok.kind != tkComma: break + getTok(p) + if isKeyw(p, "where"): + result.add(parseWhere(p)) + else: + result.add(nil) + +proc parseDelete(p: var TSqlParser): PSqlNode = + getTok(p) + result = newNode(nkDelete) + eat(p, "from") + result.add(primary(p)) + if isKeyw(p, "where"): + result.add(parseWhere(p)) + else: + result.add(nil) + +proc parseSelect(p: var TSqlParser): PSqlNode = + getTok(p) + if isKeyw(p, "distinct"): + getTok(p) + result = newNode(nkSelectDistinct) + elif isKeyw(p, "all"): + getTok(p) + result = newNode(nkSelect) + var a = newNode(nkSelectColumns) + while true: + if isOpr(p, "*"): + a.add(newNode(nkIdent, "*")) + getTok(p) + else: + a.add(parseExpr(p)) + if p.tok.kind != tkComma: break + getTok(p) + result.add(a) + if isKeyw(p, "from"): + var f = newNode(nkFrom) + while true: + getTok(p) + f.add(parseExpr(p)) + if p.tok.kind != tkComma: break + result.add(f) + if isKeyw(p, "where"): + result.add(parseWhere(p)) + if isKeyw(p, "group"): + getTok(p) + eat(p, "by") + var g = newNode(nkGroup) + while true: + g.add(parseExpr(p)) + if p.tok.kind != tkComma: break + getTok(p) + result.add(g) + if isKeyw(p, "having"): + var h = newNode(nkHaving) + while true: + getTok(p) + h.add(parseExpr(p)) + if p.tok.kind != tkComma: break + result.add(h) + if isKeyw(p, "union"): + result.add(newNode(nkUnion)) + getTok(p) + elif isKeyw(p, "intersect"): + result.add(newNode(nkIntersect)) + getTok(p) + elif isKeyw(p, "except"): + result.add(newNode(nkExcept)) + getTok(p) + if isKeyw(p, "order"): + getTok(p) + eat(p, "by") + var n = newNode(nkOrder) + while true: + var e = parseExpr(p) + if isKeyw(p, "asc"): getTok(p) # is default + elif isKeyw(p, "desc"): + getTok(p) + var x = newNode(nkDesc) + x.add(e) + e = x + n.add(e) + if p.tok.kind != tkComma: break + getTok(p) + result.add(n) + +proc parseStmt(p: var TSqlParser): PSqlNode = + if isKeyw(p, "create"): + getTok(p) + optKeyw(p, "cached") + optKeyw(p, "memory") + optKeyw(p, "temp") + optKeyw(p, "global") + optKeyw(p, "local") + optKeyw(p, "temporary") + optKeyw(p, "unique") + optKeyw(p, "hash") + if isKeyw(p, "table"): + result = parseTableDef(p) + elif isKeyw(p, "type"): + result = parseTypeDef(p) + elif isKeyw(p, "index"): + result = parseIndexDef(p) + else: + sqlError(p, "TABLE expected") + elif isKeyw(p, "insert"): + result = parseInsert(p) + elif isKeyw(p, "update"): + result = parseUpdate(p) + elif isKeyw(p, "delete"): + result = parseDelete(p) + elif isKeyw(p, "select"): + result = parseSelect(p) + else: + sqlError(p, "CREATE expected") + +proc open(p: var TSqlParser, input: PStream, filename: string) = + ## opens the parser `p` and assigns the input stream `input` to it. + ## `filename` is only used for error messages. + open(TSqlLexer(p), input, filename) + p.tok.kind = tkInvalid + p.tok.literal = "" + getTok(p) + +proc parse(p: var TSqlParser): PSqlNode = + ## parses the content of `p`'s input stream and returns the SQL AST. + ## Syntax errors raise an `EInvalidSql` exception. + result = newNode(nkStmtList) + while p.tok.kind != tkEof: + var s = parseStmt(p) + eat(p, tkSemiColon) + result.add(s) + if result.len == 1: + result = result.sons[0] + +proc close(p: var TSqlParser) = + ## closes the parser `p`. The associated input stream is closed too. + close(TSqlLexer(p)) + +proc parseSQL*(input: PStream, filename: string): PSqlNode = + ## parses the SQL from `input` into an AST and returns the AST. + ## `filename` is only used for error messages. + ## Syntax errors raise an `EInvalidSql` exception. + var p: TSqlParser + open(p, input, filename) + try: + result = parse(p) + finally: + close(p) + +proc ra(n: PSqlNode, s: var string, indent: int) + +proc rs(n: PSqlNode, s: var string, indent: int, + prefix = "(", suffix = ")", + sep = ", ") = + if n.len > 0: + s.add(prefix) + for i in 0 .. n.len-1: + if i > 0: s.add(sep) + ra(n.sons[i], s, indent) + s.add(suffix) + +proc ra(n: PSqlNode, s: var string, indent: int) = + if n == nil: return + case n.kind + of nkNone: nil + of nkIdent: + if allCharsInSet(n.strVal, {'\33'..'\127'}): + s.add(n.strVal) + else: + s.add("\"" & replace(n.strVal, "\"", "\"\"") & "\"") + of nkStringLit: + s.add(escape(n.strVal, "e'", "'")) + of nkBitStringLit: + s.add("b'" & n.strVal & "'") + of nkHexStringLit: + s.add("x'" & n.strVal & "'") + of nkIntegerLit, nkNumericLit: + s.add(n.strVal) + of nkPrimaryKey: + s.add(" primary key") + rs(n, s, indent) + of nkForeignKey: + s.add(" foreign key") + rs(n, s, indent) + of nkNotNull: + s.add(" not null") + of nkDot: + ra(n.sons[0], s, indent) + s.add(".") + ra(n.sons[1], s, indent) + of nkDotDot: + ra(n.sons[0], s, indent) + s.add(". .") + ra(n.sons[1], s, indent) + of nkPrefix: + s.add('(') + ra(n.sons[0], s, indent) + s.add(' ') + ra(n.sons[1], s, indent) + s.add(')') + of nkInfix: + s.add('(') + ra(n.sons[1], s, indent) + s.add(' ') + ra(n.sons[0], s, indent) + s.add(' ') + ra(n.sons[2], s, indent) + s.add(')') + of nkCall, nkColumnReference: + ra(n.sons[0], s, indent) + s.add('(') + for i in 1..n.len-1: + if i > 1: s.add(", ") + ra(n.sons[i], s, indent) + s.add(')') + of nkReferences: + s.add(" references ") + ra(n.sons[0], s, indent) + of nkDefault: + s.add(" default ") + ra(n.sons[0], s, indent) + of nkCheck: + s.add(" check ") + ra(n.sons[0], s, indent) + of nkConstraint: + s.add(" constraint ") + ra(n.sons[0], s, indent) + s.add(" check ") + ra(n.sons[1], s, indent) + of nkUnique: + s.add(" unique") + rs(n, s, indent) + of nkIdentity: + s.add(" identity") + of nkColumnDef: + s.add("\n ") + rs(n, s, indent, "", "", " ") + of nkStmtList: + for i in 0..n.len-1: + ra(n.sons[i], s, indent) + s.add("\n") + of nkInsert: + assert n.len == 3 + s.add("insert into ") + ra(n.sons[0], s, indent) + ra(n.sons[1], s, indent) + if n.sons[2].kind == nkDefault: + s.add("default values") + else: + s.add("\nvalues ") + ra(n.sons[2], s, indent) + s.add(';') + of nkUpdate: + s.add("update ") + ra(n.sons[0], s, indent) + s.add(" set ") + var L = n.len + for i in 1 .. L-2: + if i > 1: s.add(", ") + var it = n.sons[i] + assert it.kind == nkAsgn + ra(it, s, indent) + ra(n.sons[L-1], s, indent) + s.add(';') + of nkDelete: + s.add("delete from ") + ra(n.sons[0], s, indent) + ra(n.sons[1], s, indent) + s.add(';') + of nkSelect, nkSelectDistinct: + s.add("select ") + if n.kind == nkSelectDistinct: + s.add("distinct ") + rs(n.sons[0], s, indent, "", "", ", ") + for i in 1 .. n.len-1: ra(n.sons[i], s, indent) + s.add(';') + of nkSelectColumns: + assert(false) + of nkAsgn: + ra(n.sons[0], s, indent) + s.add(" = ") + ra(n.sons[1], s, indent) + of nkFrom: + s.add("\nfrom ") + rs(n, s, indent, "", "", ", ") + of nkGroup: + s.add("\ngroup by") + rs(n, s, indent, "", "", ", ") + of nkHaving: + s.add("\nhaving") + rs(n, s, indent, "", "", ", ") + of nkOrder: + s.add("\norder by ") + rs(n, s, indent, "", "", ", ") + of nkDesc: + ra(n.sons[0], s, indent) + s.add(" desc") + of nkUnion: + s.add(" union") + of nkIntersect: + s.add(" intersect") + of nkExcept: + s.add(" except") + of nkColumnList: + rs(n, s, indent) + of nkValueList: + s.add("values ") + rs(n, s, indent) + of nkWhere: + s.add("\nwhere ") + ra(n.sons[0], s, indent) + of nkCreateTable, nkCreateTableIfNotExists: + s.add("create table ") + if n.kind == nkCreateTableIfNotExists: + s.add("if not exists ") + ra(n.sons[0], s, indent) + s.add('(') + for i in 1..n.len-1: + if i > 1: s.add(", ") + ra(n.sons[i], s, indent) + s.add(");") + of nkCreateType, nkCreateTypeIfNotExists: + s.add("create type ") + if n.kind == nkCreateTypeIfNotExists: + s.add("if not exists ") + ra(n.sons[0], s, indent) + s.add(" as ") + ra(n.sons[1], s, indent) + s.add(';') + of nkCreateIndex, nkCreateIndexIfNotExists: + s.add("create index ") + if n.kind == nkCreateIndexIfNotExists: + s.add("if not exists ") + ra(n.sons[0], s, indent) + s.add(" on ") + ra(n.sons[1], s, indent) + s.add('(') + for i in 2..n.len-1: + if i > 2: s.add(", ") + ra(n.sons[i], s, indent) + s.add(");") + of nkEnumDef: + s.add("enum ") + rs(n, s, indent) + +# What I want: +# +#select(columns = [T1.all, T2.name], +# fromm = [T1, T2], +# where = T1.name ==. T2.name, +# orderby = [name]): +# +#for row in dbQuery(db, """select x, y, z +# from a, b +# where a.name = b.name"""): +# + +#select x, y, z: +# fromm: Table1, Table2 +# where: x.name == y.name +#db.select(fromm = [t1, t2], where = t1.name == t2.name): +#for x, y, z in db.select(fromm = a, b where = a.name == b.name): +# writeln x, y, z + +proc renderSQL*(n: PSqlNode): string = + ## Converts an SQL abstract syntax tree to its string representation. + result = "" + ra(n, result, 0) + +when isMainModule: + echo(renderSQL(parseSQL(newStringStream(""" + CREATE TYPE happiness AS ENUM ('happy', 'very happy', 'ecstatic'); + CREATE TABLE holidays ( + num_weeks int, + happiness happiness + ); + CREATE INDEX table1_attr1 ON table1(attr1); + + SELECT * FROM myTab WHERE col1 = 'happy'; + """), "stdin"))) + +# CREATE TYPE happiness AS ENUM ('happy', 'very happy', 'ecstatic'); +# CREATE TABLE holidays ( +# num_weeks int, +# happiness happiness +# ); +# CREATE INDEX table1_attr1 ON table1(attr1) diff --git a/nimlib/pure/parsexml.nim b/nimlib/pure/parsexml.nim new file mode 100755 index 000000000..54f62a9a4 --- /dev/null +++ b/nimlib/pure/parsexml.nim @@ -0,0 +1,635 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a simple high performance `XML`:idx: / `HTML`:idx: +## parser. +## The only encoding that is supported is UTF-8. The parser has been designed +## to be somewhat error correcting, so that even most "wild HTML" found on the +## web can be parsed with it. **Note:** This parser does not check that each +## ``<tag>`` has a corresponding ``</tag>``! These checks have do be +## implemented by the client code for various reasons: +## +## * Old HTML contains tags that have no end tag: ``<br>`` for example. +## * HTML tags are case insensitive, XML tags are case sensitive. Since this +## library can parse both, only the client knows which comparison is to be +## used. +## * Thus the checks would have been very difficult to implement properly with +## little benefit, especially since they are simple to implement in the +## client. The client should use the `errorMsgExpected` proc to generate +## a nice error message that fits the other error messages this library +## creates. +## +## +## Example 1: Retrieve HTML title +## ============================== +## +## The file ``examples/htmltitle.nim`` demonstrates how to use the +## XML parser to accomplish a simple task: To determine the title of an HTML +## document. +## +## .. code-block:: nimrod +## :file: examples/htmltitle.nim +## +## +## Example 2: Retrieve all HTML links +## ================================== +## +## The file ``examples/htmlrefs.nim`` demonstrates how to use the +## XML parser to accomplish another simple task: To determine all the links +## an HTML document contains. +## +## .. code-block:: nimrod +## :file: examples/htmlrefs.nim +## + +import + hashes, strutils, lexbase, streams, unicode + +# the parser treats ``<br />`` as ``<br></br>`` + +type + TXmlEventKind* = enum ## enumation of all events that may occur when parsing + xmlError, ## an error ocurred during parsing + xmlEof, ## end of file reached + xmlCharData, ## character data + xmlWhitespace, ## whitespace has been parsed + xmlComment, ## a comment has been parsed + xmlPI, ## processing instruction (``<?name something ?>``) + xmlElementStart, ## ``<elem>`` + xmlElementEnd, ## ``</elem>`` + xmlElementOpen, ## ``<elem + xmlAttribute, ## ``key = "value"`` pair + xmlElementClose, ## ``>`` + xmlCData, ## ``<![CDATA[`` ... data ... ``]]>`` + xmlEntity, ## &entity; + xmlSpecial ## ``<! ... data ... >`` + + TXmlError* = enum ## enumeration that lists all errors that can occur + errNone, ## no error + errEndOfCDataExpected, ## ``]]>`` expected + errNameExpected, ## name expected + errSemicolonExpected, ## ``;`` expected + errQmGtExpected, ## ``?>`` expected + errGtExpected, ## ``>`` expected + errEqExpected, ## ``=`` expected + errQuoteExpected, ## ``"`` or ``'`` expected + errEndOfCommentExpected ## ``-->`` expected + + TParserState = enum + stateStart, stateNormal, stateAttr, stateEmptyElementTag, stateError + + TXmlParseOption* = enum ## options for the XML parser + reportWhitespace, ## report whitespace + reportComments ## report comments + + TXmlParser* = object of TBaseLexer ## the parser object. + a, b: string + kind: TXmlEventKind + err: TXmlError + state: TParserState + filename: string + options: set[TXmlParseOption] + +const + errorMessages: array [TXmlError, string] = [ + "no error", + "']]>' expected", + "name expected", + "';' expected", + "'?>' expected", + "'>' expected", + "'=' expected", + "'\"' or \"'\" expected", + "'-->' expected" + ] + +proc open*(my: var TXmlParser, input: PStream, filename: string, + options: set[TXmlParseOption] = {}) = + ## initializes the parser with an input stream. `Filename` is only used + ## for nice error messages. The parser's behaviour can be controlled by + ## the `options` parameter: If `options` contains ``reportWhitespace`` + ## a whitespace token is reported as an ``xmlWhitespace`` event. + ## If `options` contains ``reportComments`` a comment token is reported as an + ## ``xmlComment`` event. + lexbase.open(my, input) + my.filename = filename + my.state = stateStart + my.kind = xmlError + my.a = "" + my.b = "" + my.options = options + +proc close*(my: var TXmlParser) {.inline.} = + ## closes the parser `my` and its associated input stream. + lexbase.close(my) + +proc charData*(my: TXmlParser): string {.inline.} = + ## returns the character data for the events: ``xmlCharData``, + ## ``xmlWhitespace``, ``xmlComment``, ``xmlCData``, ``xmlSpecial`` + assert(my.kind in {xmlCharData, xmlWhitespace, xmlComment, xmlCData, + xmlSpecial}) + return my.a + +proc kind*(my: TXmlParser): TXmlEventKind {.inline.} = + ## returns the current event type for the XML parser + return my.kind + +proc elementName*(my: TXmlParser): string {.inline.} = + ## returns the element name for the events: ``xmlElementStart``, + ## ``xmlElementEnd``, ``xmlElementOpen`` + assert(my.kind in {xmlElementStart, xmlElementEnd, xmlElementOpen}) + return my.a + +proc entityName*(my: TXmlParser): string {.inline.} = + ## returns the entity name for the event: ``xmlEntity`` + assert(my.kind == xmlEntity) + return my.a + +proc attrKey*(my: TXmlParser): string {.inline.} = + ## returns the attribute key for the event ``xmlAttribute`` + assert(my.kind == xmlAttribute) + return my.a + +proc attrValue*(my: TXmlParser): string {.inline.} = + ## returns the attribute value for the event ``xmlAttribute`` + assert(my.kind == xmlAttribute) + return my.b + +proc PIName*(my: TXmlParser): string {.inline.} = + ## returns the processing instruction name for the event ``xmlPI`` + assert(my.kind == xmlPI) + return my.a + +proc PIRest*(my: TXmlParser): string {.inline.} = + ## returns the rest of the processing instruction for the event ``xmlPI`` + assert(my.kind == xmlPI) + return my.b + +proc getColumn*(my: TXmlParser): int {.inline.} = + ## get the current column the parser has arrived at. + result = getColNumber(my, my.bufPos) + +proc getLine*(my: TXmlParser): int {.inline.} = + ## get the current line the parser has arrived at. + result = my.linenumber + +proc getFilename*(my: TXmlParser): string {.inline.} = + ## get the filename of the file that the parser processes. + result = my.filename + +proc errorMsg*(my: TXmlParser): string = + ## returns a helpful error message for the event ``xmlError`` + assert(my.kind == xmlError) + result = "$1($2, $3) Error: $4" % [ + my.filename, $getLine(my), $getColumn(my), errorMessages[my.err]] + +proc errorMsgExpected*(my: TXmlParser, tag: string): string = + ## returns an error message "<tag> expected" in the same format as the + ## other error messages + result = "$1($2, $3) Error: $4" % [ + my.filename, $getLine(my), $getColumn(my), "<$1> expected" % tag] + +proc markError(my: var TXmlParser, kind: TXmlError) {.inline.} = + my.err = kind + my.state = stateError + +proc parseCDATA(my: var TXMLParser) = + var pos = my.bufpos + len("<![CDATA[") + var buf = my.buf + while true: + case buf[pos] + of ']': + if buf[pos+1] == ']' and buf[pos+2] == '>': + inc(pos, 3) + break + add(my.a, ']') + inc(pos) + of '\0': + markError(my, errEndOfCDataExpected) + break + of '\c': + pos = lexbase.HandleCR(my, pos) + buf = my.buf + add(my.a, '\L') + of '\L': + pos = lexbase.HandleLF(my, pos) + buf = my.buf + add(my.a, '\L') + else: + add(my.a, buf[pos]) + inc(pos) + my.bufpos = pos # store back + my.kind = xmlCDATA + +proc parseComment(my: var TXMLParser) = + var pos = my.bufpos + len("<!--") + var buf = my.buf + while true: + case buf[pos] + of '-': + if buf[pos+1] == '-' and buf[pos+2] == '>': + inc(pos, 3) + break + if my.options.contains(reportComments): add(my.a, '-') + inc(pos) + of '\0': + markError(my, errEndOfCommentExpected) + break + of '\c': + pos = lexbase.HandleCR(my, pos) + buf = my.buf + if my.options.contains(reportComments): add(my.a, '\L') + of '\L': + pos = lexbase.HandleLF(my, pos) + buf = my.buf + if my.options.contains(reportComments): add(my.a, '\L') + else: + if my.options.contains(reportComments): add(my.a, buf[pos]) + inc(pos) + my.bufpos = pos + my.kind = xmlComment + +proc parseWhitespace(my: var TXmlParser, skip=False) = + var pos = my.bufpos + var buf = my.buf + while true: + case buf[pos] + of ' ', '\t': + if not skip: add(my.a, buf[pos]) + Inc(pos) + of '\c': + # the specification says that CR-LF, CR are to be transformed to LF + pos = lexbase.HandleCR(my, pos) + buf = my.buf + if not skip: add(my.a, '\L') + of '\L': + pos = lexbase.HandleLF(my, pos) + buf = my.buf + if not skip: add(my.a, '\L') + else: + break + my.bufpos = pos + +const + NameStartChar = {'A'..'Z', 'a'..'z', '_', ':', '\128'..'\255'} + NameChar = {'A'..'Z', 'a'..'z', '0'..'9', '.', '-', '_', ':', '\128'..'\255'} + +proc parseName(my: var TXmlParser, dest: var string) = + var pos = my.bufpos + var buf = my.buf + if buf[pos] in nameStartChar: + while true: + add(dest, buf[pos]) + inc(pos) + if buf[pos] notin NameChar: break + my.bufpos = pos + else: + markError(my, errNameExpected) + +proc parseEntity(my: var TXmlParser, dest: var string) = + var pos = my.bufpos+1 + var buf = my.buf + my.kind = xmlCharData + if buf[pos] == '#': + var r: int + inc(pos) + if buf[pos] == 'x': + inc(pos) + while true: + case buf[pos] + of '0'..'9': r = (r shl 4) or (ord(buf[pos]) - ord('0')) + of 'a'..'f': r = (r shl 4) or (ord(buf[pos]) - ord('a') + 10) + of 'A'..'F': r = (r shl 4) or (ord(buf[pos]) - ord('A') + 10) + else: break + inc(pos) + else: + while buf[pos] in {'0'..'9'}: + r = r * 10 + (ord(buf[pos]) - ord('0')) + inc(pos) + add(dest, toUTF8(TRune(r))) + elif buf[pos] == 'l' and buf[pos+1] == 't': + add(dest, '<') + inc(pos, 2) + elif buf[pos] == 'g' and buf[pos+1] == 't': + add(dest, '>') + inc(pos, 2) + elif buf[pos] == 'a' and buf[pos+1] == 'm' and buf[pos+2] == 'p': + add(dest, '&') + inc(pos, 3) + elif buf[pos] == 'a' and buf[pos+1] == 'p' and buf[pos+2] == 'o' and + buf[pos+3] == 's': + add(dest, '\'') + inc(pos, 4) + elif buf[pos] == 'q' and buf[pos+1] == 'u' and buf[pos+2] == 'o' and + buf[pos+3] == 't': + add(dest, '"') + inc(pos, 4) + else: + my.bufpos = pos + parseName(my, dest) + pos = my.bufpos + if my.err != errNameExpected: + my.kind = xmlEntity + else: + add(dest, '&') + if buf[pos] == ';': + inc(pos) + else: + markError(my, errSemiColonExpected) + my.bufpos = pos + +proc parsePI(my: var TXmlParser) = + inc(my.bufpos, "<?".len) + parseName(my, my.a) + var pos = my.bufpos + var buf = my.buf + setLen(my.b, 0) + while true: + case buf[pos] + of '\0': + markError(my, errQmGtExpected) + break + of '?': + if buf[pos+1] == '>': + inc(pos, 2) + break + add(my.b, '?') + inc(pos) + of '\c': + # the specification says that CR-LF, CR are to be transformed to LF + pos = lexbase.HandleCR(my, pos) + buf = my.buf + add(my.b, '\L') + of '\L': + pos = lexbase.HandleLF(my, pos) + buf = my.buf + add(my.b, '\L') + else: + add(my.b, buf[pos]) + inc(pos) + my.bufpos = pos + my.kind = xmlPI + +proc parseSpecial(my: var TXmlParser) = + # things that start with <! + var pos = my.bufpos + 2 + var buf = my.buf + var opentags = 0 + while true: + case buf[pos] + of '\0': + markError(my, errGtExpected) + break + of '<': + inc(opentags) + inc(pos) + add(my.a, '<') + of '>': + if opentags <= 0: + inc(pos) + break + dec(opentags) + inc(pos) + add(my.a, '>') + of '\c': + pos = lexbase.HandleCR(my, pos) + buf = my.buf + add(my.a, '\L') + of '\L': + pos = lexbase.HandleLF(my, pos) + buf = my.buf + add(my.a, '\L') + else: + add(my.a, buf[pos]) + inc(pos) + my.bufpos = pos + my.kind = xmlSpecial + +proc parseTag(my: var TXmlParser) = + inc(my.bufpos) + parseName(my, my.a) + # if we have no name, do not interpret the '<': + if my.a.len == 0: + my.kind = xmlCharData + add(my.a, '<') + return + parseWhitespace(my, skip=True) + if my.buf[my.bufpos] in NameStartChar: + # an attribute follows: + my.kind = xmlElementOpen + my.state = stateAttr + else: + my.kind = xmlElementStart + if my.buf[my.bufpos] == '/' and my.buf[my.bufpos+1] == '>': + inc(my.bufpos, 2) + my.state = stateEmptyElementTag + elif my.buf[my.bufpos] == '>': + inc(my.bufpos) + else: + markError(my, errGtExpected) + +proc parseEndTag(my: var TXmlParser) = + inc(my.bufpos, 2) + parseName(my, my.a) + parseWhitespace(my, skip=True) + if my.buf[my.bufpos] == '>': + inc(my.bufpos) + else: + markError(my, errGtExpected) + my.kind = xmlElementEnd + +proc parseAttribute(my: var TXmlParser) = + my.kind = xmlAttribute + setLen(my.a, 0) + setLen(my.b, 0) + parseName(my, my.a) + # if we have no name, we have '<tag attr= key %&$$%': + if my.a.len == 0: + markError(my, errGtExpected) + return + parseWhitespace(my, skip=True) + if my.buf[my.bufpos] != '=': + markError(my, errEqExpected) + return + inc(my.bufpos) + parseWhitespace(my, skip=True) + + var pos = my.bufpos + var buf = my.buf + if buf[pos] in {'\'', '"'}: + var quote = buf[pos] + var pendingSpace = false + inc(pos) + while true: + case buf[pos] + of '\0': + markError(my, errQuoteExpected) + break + of '&': + if pendingSpace: + add(my.b, ' ') + pendingSpace = false + my.bufpos = pos + parseEntity(my, my.b) + my.kind = xmlAttribute # parseEntity overwrites my.kind! + pos = my.bufpos + of ' ', '\t': + pendingSpace = true + inc(pos) + of '\c': + pos = lexbase.HandleCR(my, pos) + buf = my.buf + pendingSpace = true + of '\L': + pos = lexbase.HandleLF(my, pos) + buf = my.buf + pendingSpace = true + else: + if buf[pos] == quote: + inc(pos) + break + else: + if pendingSpace: + add(my.b, ' ') + pendingSpace = false + add(my.b, buf[pos]) + inc(pos) + else: + markError(my, errQuoteExpected) + my.bufpos = pos + parseWhitespace(my, skip=True) + +proc parseCharData(my: var TXmlParser) = + var pos = my.bufpos + var buf = my.buf + while true: + case buf[pos] + of '\0', '<', '&': break + of '\c': + # the specification says that CR-LF, CR are to be transformed to LF + pos = lexbase.HandleCR(my, pos) + buf = my.buf + add(my.a, '\L') + of '\L': + pos = lexbase.HandleLF(my, pos) + buf = my.buf + add(my.a, '\L') + else: + add(my.a, buf[pos]) + inc(pos) + my.bufpos = pos + my.kind = xmlCharData + +proc rawGetTok(my: var TXmlParser) = + my.kind = xmlError + setLen(my.a, 0) + var pos = my.bufpos + var buf = my.buf + case buf[pos] + of '<': + case buf[pos+1] + of '/': + parseEndTag(my) + of '!': + if buf[pos+2] == '[' and buf[pos+3] == 'C' and buf[pos+4] == 'D' and + buf[pos+5] == 'A' and buf[pos+6] == 'T' and buf[pos+7] == 'A' and + buf[pos+8] == '[': + parseCDATA(my) + elif buf[pos+2] == '-' and buf[pos+3] == '-': + parseComment(my) + else: + parseSpecial(my) + of '?': + parsePI(my) + else: + parseTag(my) + of ' ', '\t', '\c', '\l': + parseWhiteSpace(my) + my.kind = xmlWhitespace + of '\0': + my.kind = xmlEof + of '&': + parseEntity(my, my.a) + else: + parseCharData(my) + assert my.kind != xmlError + +proc getTok(my: var TXmlParser) = + while true: + rawGetTok(my) + case my.kind + of xmlComment: + if my.options.contains(reportComments): break + of xmlWhitespace: + if my.options.contains(reportWhitespace): break + else: break + +proc next*(my: var TXmlParser) = + ## retrieves the first/next event. This controls the parser. + case my.state + of stateNormal: + getTok(my) + of stateStart: + getTok(my) + if my.kind == xmlPI and my.a == "xml": + # just skip the first ``<?xml >`` processing instruction + getTok(my) + my.state = stateNormal + of stateAttr: + # parse an attribute key-value pair: + if my.buf[my.bufpos] == '>': + my.kind = xmlElementClose + inc(my.bufpos) + my.state = stateNormal + elif my.buf[my.bufpos] == '/' and my.buf[my.bufpos+1] == '>': + my.kind = xmlElementClose + inc(my.bufpos, 2) + my.state = stateEmptyElementTag + else: + parseAttribute(my) + # state remains the same + of stateEmptyElementTag: + my.state = stateNormal + my.kind = xmlElementEnd + of stateError: + my.kind = xmlError + my.state = stateNormal + +when isMainModule: + import os + var s = newFileStream(ParamStr(1), fmRead) + if s == nil: quit("cannot open the file" & ParamStr(1)) + var x: TXmlParser + open(x, s, ParamStr(1)) + while true: + next(x) + case x.kind + of xmlError: Echo(x.errorMsg()) + of xmlEof: break + of xmlCharData: echo(x.charData) + of xmlWhitespace: echo("|$1|" % x.charData) + of xmlComment: echo("<!-- $1 -->" % x.charData) + of xmlPI: echo("<? $1 ## $2 ?>" % [x.PIName, x.PIRest]) + of xmlElementStart: echo("<$1>" % x.elementName) + of xmlElementEnd: echo("</$1>" % x.elementName) + + of xmlElementOpen: echo("<$1" % x.elementName) + of xmlAttribute: + echo("Key: " & x.attrKey) + echo("Value: " & x.attrValue) + + of xmlElementClose: echo(">") + of xmlCData: + echo("<![CDATA[$1]]>" % x.charData) + of xmlEntity: + echo("&$1;" % x.entityName) + of xmlSpecial: + echo("SPECIAL: " & x.charData) + close(x) + diff --git a/nimlib/pure/pegs.nim b/nimlib/pure/pegs.nim new file mode 100755 index 000000000..488e42c7d --- /dev/null +++ b/nimlib/pure/pegs.nim @@ -0,0 +1,1365 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Simple PEG (Parsing expression grammar) matching. Uses no memorization, but +## uses superoperators and symbol inlining to improve performance. Note: +## Matching performance is hopefully competitive with optimized regular +## expression engines. +## +## .. include:: ../doc/pegdocs.txt +## + +const + useUnicode = true ## change this to deactivate proper UTF-8 support + +import + strutils + +when useUnicode: + import unicode + +const + InlineThreshold = 5 ## number of leaves; -1 to disable inlining + +type + TPegKind = enum + pkEmpty, + pkAny, ## any character (.) + pkAnyRune, ## any Unicode character (_) + pkNewLine, ## CR-LF, LF, CR + pkTerminal, + pkTerminalIgnoreCase, + pkTerminalIgnoreStyle, + pkChar, ## single character to match + pkCharChoice, + pkNonTerminal, + pkSequence, ## a b c ... --> Internal DSL: peg(a, b, c) + pkOrderedChoice, ## a / b / ... --> Internal DSL: a / b or /[a, b, c] + pkGreedyRep, ## a* --> Internal DSL: *a + ## a+ --> (a a*) + pkGreedyRepChar, ## x* where x is a single character (superop) + pkGreedyRepSet, ## [set]* (superop) + pkGreedyAny, ## .* or _* (superop) + pkOption, ## a? --> Internal DSL: ?a + pkAndPredicate, ## &a --> Internal DSL: &a + pkNotPredicate, ## !a --> Internal DSL: !a + pkCapture, ## {a} --> Internal DSL: capture(a) + pkSearch, ## @a --> Internal DSL: @a + pkRule, ## a <- b + pkList ## a, b + TNonTerminalFlag = enum + ntDeclared, ntUsed + TNonTerminal {.final.} = object ## represents a non terminal symbol + name: string ## the name of the symbol + line: int ## the line the symbol has been declared/used in + col: int ## the column the symbol has been declared/used in + flags: set[TNonTerminalFlag] ## the nonterminal's flags + rule: TNode ## the rule that the symbol refers to + TNode {.final.} = object + case kind: TPegKind + of pkEmpty, pkAny, pkAnyRune, pkGreedyAny, pkNewLine: nil + of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle: term: string + of pkChar, pkGreedyRepChar: ch: char + of pkCharChoice, pkGreedyRepSet: charChoice: ref set[char] + of pkNonTerminal: nt: PNonTerminal + else: sons: seq[TNode] + PNonTerminal* = ref TNonTerminal + + TPeg* = TNode ## type that represents a PEG + +proc term*(t: string): TPeg = + ## constructs a PEG from a terminal string + if t.len != 1: + result.kind = pkTerminal + result.term = t + else: + result.kind = pkChar + result.ch = t[0] + +proc termIgnoreCase*(t: string): TPeg = + ## constructs a PEG from a terminal string; ignore case for matching + result.kind = pkTerminalIgnoreCase + result.term = t + +proc termIgnoreStyle*(t: string): TPeg = + ## constructs a PEG from a terminal string; ignore style for matching + result.kind = pkTerminalIgnoreStyle + result.term = t + +proc term*(t: char): TPeg = + ## constructs a PEG from a terminal char + assert t != '\0' + result.kind = pkChar + result.ch = t + +proc charSet*(s: set[char]): TPeg = + ## constructs a PEG from a character set `s` + assert '\0' notin s + result.kind = pkCharChoice + new(result.charChoice) + result.charChoice^ = s + +proc len(a: TPeg): int {.inline.} = return a.sons.len +proc add(d: var TPeg, s: TPeg) {.inline.} = add(d.sons, s) + +proc addChoice(dest: var TPeg, elem: TPeg) = + var L = dest.len-1 + if L >= 0 and dest.sons[L].kind == pkCharChoice: + case elem.kind + of pkCharChoice: + dest.sons[L].charChoice^ = dest.sons[L].charChoice^ + elem.charChoice^ + of pkChar: incl(dest.sons[L].charChoice^, elem.ch) + else: add(dest, elem) + else: add(dest, elem) + +template multipleOp(k: TPegKind, localOpt: expr) = + result.kind = k + result.sons = @[] + for x in items(a): + if x.kind == k: + for y in items(x.sons): + localOpt(result, y) + else: + localOpt(result, x) + if result.len == 1: + result = result.sons[0] + +proc `/`*(a: openArray[TPeg]): TPeg = + ## constructs an ordered choice with the PEGs in `a` + multipleOp(pkOrderedChoice, addChoice) + +proc addSequence(dest: var TPeg, elem: TPeg) = + var L = dest.len-1 + if L >= 0 and dest.sons[L].kind == pkTerminal: + case elem.kind + of pkTerminal: add(dest.sons[L].term, elem.term) + of pkChar: add(dest.sons[L].term, elem.ch) + else: add(dest, elem) + else: add(dest, elem) + +proc sequence*(a: openArray[TPeg]): TPeg = + ## constructs a sequence with all the PEGs from `a` + multipleOp(pkSequence, addSequence) + +proc `?`*(a: TPeg): TPeg = + ## constructs an optional for the PEG `a` + if a.kind in {pkOption, pkGreedyRep, pkGreedyAny, pkGreedyRepChar, + pkGreedyRepSet}: + # a* ? --> a* + # a? ? --> a? + result = a + else: + result.kind = pkOption + result.sons = @[a] + +proc `*`*(a: TPeg): TPeg = + ## constructs a "greedy repetition" for the PEG `a` + case a.kind + of pkGreedyRep, pkGreedyRepChar, pkGreedyRepSet, pkGreedyAny, pkOption: + assert false + # produces endless loop! + of pkChar: + result.kind = pkGreedyRepChar + result.ch = a.ch + of pkCharChoice: + result.kind = pkGreedyRepSet + result.charChoice = a.charChoice # copying a reference suffices! + of pkAny, pkAnyRune: + result.kind = pkGreedyAny + else: + result.kind = pkGreedyRep + result.sons = @[a] + +proc `@`*(a: TPeg): TPeg = + ## constructs a "search" for the PEG `a` + result.kind = pkSearch + result.sons = @[a] + +when false: + proc contains(a: TPeg, k: TPegKind): bool = + if a.kind == k: return true + case a.kind + of pkEmpty, pkAny, pkAnyRune, pkGreedyAny, pkNewLine, pkTerminal, + pkTerminalIgnoreCase, pkTerminalIgnoreStyle, pkChar, pkGreedyRepChar, + pkCharChoice, pkGreedyRepSet: nil + of pkNonTerminal: return true + else: + for i in 0..a.sons.len-1: + if contains(a.sons[i], k): return true + +proc `+`*(a: TPeg): TPeg = + ## constructs a "greedy positive repetition" with the PEG `a` + return sequence(a, *a) + +proc `&`*(a: TPeg): TPeg = + ## constructs an "and predicate" with the PEG `a` + result.kind = pkAndPredicate + result.sons = @[a] + +proc `!`*(a: TPeg): TPeg = + ## constructs a "not predicate" with the PEG `a` + result.kind = pkNotPredicate + result.sons = @[a] + +proc any*: TPeg {.inline.} = + ## constructs the PEG `any character`:idx: (``.``) + result.kind = pkAny + +proc anyRune*: TPeg {.inline.} = + ## constructs the PEG `any rune`:idx: (``_``) + result.kind = pkAnyRune + +proc newLine*: TPeg {.inline.} = + ## constructs the PEG `newline`:idx: (``\n``) + result.kind = pkNewline + +proc capture*(a: TPeg): TPeg = + ## constructs a capture with the PEG `a` + result.kind = pkCapture + result.sons = @[a] + +proc spaceCost(n: TPeg): int = + case n.kind + of pkEmpty: nil + of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle, pkChar, + pkGreedyRepChar, pkCharChoice, pkGreedyRepSet, pkAny, pkAnyRune, + pkNewLine, pkGreedyAny: + result = 1 + of pkNonTerminal: + # we cannot inline a rule with a non-terminal + result = InlineThreshold+1 + else: + for i in 0..n.len-1: + inc(result, spaceCost(n.sons[i])) + if result >= InlineThreshold: break + +proc nonterminal*(n: PNonTerminal): TPeg = + ## constructs a PEG that consists of the nonterminal symbol + assert n != nil + if ntDeclared in n.flags and spaceCost(n.rule) < InlineThreshold: + when false: echo "inlining symbol: ", n.name + result = n.rule # inlining of rule enables better optimizations + else: + result.kind = pkNonTerminal + result.nt = n + +proc newNonTerminal*(name: string, line, column: int): PNonTerminal = + ## constructs a nonterminal symbol + new(result) + result.name = name + result.line = line + result.col = column + +template letters*: expr = + ## expands to ``charset({'A'..'Z', 'a'..'z'})`` + charset({'A'..'Z', 'a'..'z'}) + +template digits*: expr = + ## expands to ``charset({'0'..'9'})`` + charset({'0'..'9'}) + +template whitespace*: expr = + ## expands to ``charset({' ', '\9'..'\13'})`` + charset({' ', '\9'..'\13'}) + +template identChars*: expr = + ## expands to ``charset({'a'..'z', 'A'..'Z', '0'..'9', '_'})`` + charset({'a'..'z', 'A'..'Z', '0'..'9', '_'}) + +template identStartChars*: expr = + ## expands to ``charset({'A'..'Z', 'a'..'z', '_'})`` + charset({'a'..'z', 'A'..'Z', '_'}) + +template ident*: expr = + ## same as ``[a-zA-Z_][a-zA-z_0-9]*``; standard identifier + sequence(charset({'a'..'z', 'A'..'Z', '_'}), + *charset({'a'..'z', 'A'..'Z', '0'..'9', '_'})) + +template natural*: expr = + ## same as ``\d+`` + +digits + +const + MaxSubpatterns* = 10 ## defines the maximum number of subpatterns that + ## can be captured. More subpatterns cannot be captured! + +# ------------------------- debugging ----------------------------------------- + +proc esc(c: char, reserved = {'\0'..'\255'}): string = + case c + of '\b': result = "\\b" + of '\t': result = "\\t" + of '\c': result = "\\c" + of '\L': result = "\\l" + of '\v': result = "\\v" + of '\f': result = "\\f" + of '\e': result = "\\e" + of '\a': result = "\\a" + of '\\': result = "\\\\" + of 'a'..'z', 'A'..'Z', '0'..'9', '_': result = $c + elif c < ' ' or c >= '\128': result = '\\' & $ord(c) + elif c in reserved: result = '\\' & c + else: result = $c + +proc singleQuoteEsc(c: Char): string = return "'" & esc(c, {'\''}) & "'" + +proc singleQuoteEsc(str: string): string = + result = "'" + for c in items(str): add result, esc(c, {'\''}) + add result, '\'' + +proc charSetEscAux(cc: set[char]): string = + const reserved = {'^', '-', ']'} + result = "" + var c1 = 0 + while c1 <= 0xff: + if chr(c1) in cc: + var c2 = c1 + while c2 < 0xff and chr(succ(c2)) in cc: inc(c2) + if c1 == c2: + add result, esc(chr(c1), reserved) + elif c2 == succ(c1): + add result, esc(chr(c1), reserved) & esc(chr(c2), reserved) + else: + add result, esc(chr(c1), reserved) & '-' & esc(chr(c2), reserved) + c1 = c2 + inc(c1) + +proc CharSetEsc(cc: set[char]): string = + if card(cc) >= 128+64: + result = "[^" & CharSetEscAux({'\1'..'\xFF'} - cc) & ']' + else: + result = '[' & CharSetEscAux(cc) & ']' + +proc toStrAux(r: TPeg, res: var string) = + case r.kind + of pkEmpty: add(res, "()") + of pkAny: add(res, '.') + of pkAnyRune: add(res, '_') + of pkNewline: add(res, "\\n") + of pkTerminal: add(res, singleQuoteEsc(r.term)) + of pkTerminalIgnoreCase: + add(res, 'i') + add(res, singleQuoteEsc(r.term)) + of pkTerminalIgnoreStyle: + add(res, 'y') + add(res, singleQuoteEsc(r.term)) + of pkChar: add(res, singleQuoteEsc(r.ch)) + of pkCharChoice: add(res, charSetEsc(r.charChoice^)) + of pkNonTerminal: add(res, r.nt.name) + of pkSequence: + add(res, '(') + toStrAux(r.sons[0], res) + for i in 1 .. high(r.sons): + add(res, ' ') + toStrAux(r.sons[i], res) + add(res, ')') + of pkOrderedChoice: + add(res, '(') + toStrAux(r.sons[0], res) + for i in 1 .. high(r.sons): + add(res, " / ") + toStrAux(r.sons[i], res) + add(res, ')') + of pkGreedyRep: + toStrAux(r.sons[0], res) + add(res, '*') + of pkGreedyRepChar: + add(res, singleQuoteEsc(r.ch)) + add(res, '*') + of pkGreedyRepSet: + add(res, charSetEsc(r.charChoice^)) + add(res, '*') + of pkGreedyAny: + add(res, ".*") + of pkOption: + toStrAux(r.sons[0], res) + add(res, '?') + of pkAndPredicate: + add(res, '&') + toStrAux(r.sons[0], res) + of pkNotPredicate: + add(res, '!') + toStrAux(r.sons[0], res) + of pkSearch: + add(res, '@') + toStrAux(r.sons[0], res) + of pkCapture: + add(res, '{') + toStrAux(r.sons[0], res) + add(res, '}') + of pkRule: + toStrAux(r.sons[0], res) + add(res, " <- ") + toStrAux(r.sons[1], res) + of pkList: + for i in 0 .. high(r.sons): + toStrAux(r.sons[i], res) + add(res, "\n") + +proc `$` *(r: TPeg): string = + ## converts a PEG to its string representation + result = "" + toStrAux(r, result) + +# --------------------- core engine ------------------------------------------- + +type + TMatchClosure {.final.} = object + matches: array[0..maxSubpatterns-1, tuple[first, last: int]] + ml: int + +when not useUnicode: + type + TRune = char + template fastRuneAt(s, i, ch: expr) = + ch = s[i] + inc(i) + template runeLenAt(s, i: expr): expr = 1 + +proc m(s: string, p: TPeg, start: int, c: var TMatchClosure): int = + ## this implements a simple PEG interpreter. Thanks to superoperators it + ## has competitive performance nevertheless. + ## Returns -1 if it does not match, else the length of the match + case p.kind + of pkEmpty: result = 0 # match of length 0 + of pkAny: + if s[start] != '\0': result = 1 + else: result = -1 + of pkAnyRune: + if s[start] != '\0': + result = runeLenAt(s, start) + else: + result = -1 + of pkGreedyAny: + result = len(s) - start + of pkNewLine: + if s[start] == '\L': result = 1 + elif s[start] == '\C': + if s[start+1] == '\L': result = 2 + else: result = 1 + else: result = -1 + of pkTerminal: + result = len(p.term) + for i in 0..result-1: + if p.term[i] != s[start+i]: + result = -1 + break + of pkTerminalIgnoreCase: + var + i = 0 + a, b: TRune + result = start + while i < len(p.term): + fastRuneAt(p.term, i, a) + fastRuneAt(s, result, b) + if toLower(a) != toLower(b): + result = -1 + break + dec(result, start) + of pkTerminalIgnoreStyle: + var + i = 0 + a, b: TRune + result = start + while i < len(p.term): + while true: + fastRuneAt(p.term, i, a) + if a != TRune('_'): break + while true: + fastRuneAt(s, result, b) + if b != TRune('_'): break + if toLower(a) != toLower(b): + result = -1 + break + dec(result, start) + of pkChar: + if p.ch == s[start]: result = 1 + else: result = -1 + of pkCharChoice: + if contains(p.charChoice^, s[start]): result = 1 + else: result = -1 + of pkNonTerminal: + var oldMl = c.ml + when false: echo "enter: ", p.nt.name + result = m(s, p.nt.rule, start, c) + when false: echo "leave: ", p.nt.name + if result < 0: c.ml = oldMl + of pkSequence: + var oldMl = c.ml + result = 0 + for i in 0..high(p.sons): + var x = m(s, p.sons[i], start+result, c) + if x < 0: + c.ml = oldMl + result = -1 + break + else: inc(result, x) + of pkOrderedChoice: + var oldMl = c.ml + for i in 0..high(p.sons): + result = m(s, p.sons[i], start, c) + if result >= 0: break + c.ml = oldMl + of pkSearch: + var oldMl = c.ml + result = 0 + while start+result < s.len: + var x = m(s, p.sons[0], start+result, c) + if x >= 0: + inc(result, x) + return + inc(result) + result = -1 + c.ml = oldMl + of pkGreedyRep: + result = 0 + while true: + var x = m(s, p.sons[0], start+result, c) + # if x == 0, we have an endless loop; so the correct behaviour would be + # not to break. But endless loops can be easily introduced: + # ``(comment / \w*)*`` is such an example. Breaking for x == 0 does the + # expected thing in this case. + if x <= 0: break + inc(result, x) + of pkGreedyRepChar: + result = 0 + var ch = p.ch + while ch == s[start+result]: inc(result) + of pkGreedyRepSet: + result = 0 + while contains(p.charChoice^, s[start+result]): inc(result) + of pkOption: + result = max(0, m(s, p.sons[0], start, c)) + of pkAndPredicate: + var oldMl = c.ml + result = m(s, p.sons[0], start, c) + if result >= 0: result = 0 # do not consume anything + else: c.ml = oldMl + of pkNotPredicate: + var oldMl = c.ml + result = m(s, p.sons[0], start, c) + if result < 0: result = 0 + else: + c.ml = oldMl + result = -1 + of pkCapture: + var idx = c.ml # reserve a slot for the subpattern + inc(c.ml) + result = m(s, p.sons[0], start, c) + if result >= 0: + if idx < maxSubpatterns: + c.matches[idx] = (start, start+result-1) + #else: silently ignore the capture + else: + c.ml = idx + of pkRule, pkList: assert false + +proc match*(s: string, pattern: TPeg, matches: var openarray[string], + start = 0): bool = + ## returns ``true`` if ``s[start..]`` matches the ``pattern`` and + ## the captured substrings in the array ``matches``. If it does not + ## match, nothing is written into ``matches`` and ``false`` is + ## returned. + var c: TMatchClosure + result = m(s, pattern, start, c) == len(s) + if result: + for i in 0..c.ml-1: + matches[i] = copy(s, c.matches[i][0], c.matches[i][1]) + +proc match*(s: string, pattern: TPeg, start = 0): bool = + ## returns ``true`` if ``s`` matches the ``pattern`` beginning from ``start``. + var c: TMatchClosure + result = m(s, pattern, start, c) == len(s) + +proc matchLen*(s: string, pattern: TPeg, matches: var openarray[string], + start = 0): int = + ## the same as ``match``, but it returns the length of the match, + ## if there is no match, -1 is returned. Note that a match length + ## of zero can happen. It's possible that a suffix of `s` remains + ## that does not belong to the match. + var c: TMatchClosure + result = m(s, pattern, start, c) + if result >= 0: + for i in 0..c.ml-1: + matches[i] = copy(s, c.matches[i][0], c.matches[i][1]) + +proc matchLen*(s: string, pattern: TPeg, start = 0): int = + ## the same as ``match``, but it returns the length of the match, + ## if there is no match, -1 is returned. Note that a match length + ## of zero can happen. It's possible that a suffix of `s` remains + ## that does not belong to the match. + var c: TMatchClosure + result = m(s, pattern, start, c) + +proc find*(s: string, pattern: TPeg, matches: var openarray[string], + start = 0): int = + ## returns the starting position of ``pattern`` in ``s`` and the captured + ## substrings in the array ``matches``. If it does not match, nothing + ## is written into ``matches`` and -1 is returned. + for i in 0 .. s.len-1: + if matchLen(s, pattern, matches, i) >= 0: return i + return -1 + # could also use the pattern here: (!P .)* P + +proc find*(s: string, pattern: TPeg, start = 0): int = + ## returns the starting position of ``pattern`` in ``s``. If it does not + ## match, -1 is returned. + for i in 0 .. s.len-1: + if matchLen(s, pattern, i) >= 0: return i + return -1 + +template `=~`*(s: string, pattern: TPeg): expr = + ## This calls ``match`` with an implicit declared ``matches`` array that + ## can be used in the scope of the ``=~`` call: + ## + ## .. code-block:: nimrod + ## + ## if line =~ peg"\s* {\w+} \s* '=' \s* {\w+}": + ## # matches a key=value pair: + ## echo("Key: ", matches[0]) + ## echo("Value: ", matches[1]) + ## elif line =~ peg"\s*{'#'.*}": + ## # matches a comment + ## # note that the implicit ``matches`` array is different from the + ## # ``matches`` array of the first branch + ## echo("comment: ", matches[0]) + ## else: + ## echo("syntax error") + ## + when not definedInScope(matches): + var matches: array[0..maxSubpatterns-1, string] + match(s, pattern, matches) + +# ------------------------- more string handling ------------------------------ + +proc contains*(s: string, pattern: TPeg, start = 0): bool = + ## same as ``find(s, pattern, start) >= 0`` + return find(s, pattern, start) >= 0 + +proc contains*(s: string, pattern: TPeg, matches: var openArray[string], + start = 0): bool = + ## same as ``find(s, pattern, matches, start) >= 0`` + return find(s, pattern, matches, start) >= 0 + +proc startsWith*(s: string, prefix: TPeg): bool = + ## returns true if `s` starts with the pattern `prefix` + result = matchLen(s, prefix) >= 0 + +proc endsWith*(s: string, suffix: TPeg): bool = + ## returns true if `s` ends with the pattern `prefix` + for i in 0 .. s.len-1: + if matchLen(s, suffix, i) == s.len - i: return true + +proc replace*(s: string, sub: TPeg, by: string): string = + ## Replaces `sub` in `s` by the string `by`. Captures can be accessed in `by` + ## with the notation ``$i`` and ``$#`` (see strutils.`%`). Examples: + ## + ## .. code-block:: nimrod + ## "var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## + ## "var1<-keykey; val2<-key2key2" + result = "" + var i = 0 + var caps: array[0..maxSubpatterns-1, string] + while i < s.len: + var x = matchLen(s, sub, caps, i) + if x <= 0: + add(result, s[i]) + inc(i) + else: + addf(result, by, caps) + inc(i, x) + # copy the rest: + add(result, copy(s, i)) + +proc parallelReplace*(s: string, subs: openArray[ + tuple[pattern: TPeg, repl: string]]): string = + ## Returns a modified copy of `s` with the substitutions in `subs` + ## applied in parallel. + result = "" + var i = 0 + var caps: array[0..maxSubpatterns-1, string] + while i < s.len: + block searchSubs: + for j in 0..high(subs): + var x = matchLen(s, subs[j][0], caps, i) + if x > 0: + addf(result, subs[j][1], caps) + inc(i, x) + break searchSubs + add(result, s[i]) + inc(i) + # copy the rest: + add(result, copy(s, i)) + +proc transformFile*(infile, outfile: string, + subs: openArray[tuple[pattern: TPeg, repl: string]]) = + ## reads in the file `infile`, performs a parallel replacement (calls + ## `parallelReplace`) and writes back to `outfile`. Calls ``quit`` if an + ## error occurs. This is supposed to be used for quick scripting. + var x = readFile(infile) + if not isNil(x): + var f: TFile + if open(f, outfile, fmWrite): + write(f, x.parallelReplace(subs)) + close(f) + else: + quit("cannot open for writing: " & outfile) + else: + quit("cannot open for reading: " & infile) + +iterator split*(s: string, sep: TPeg): string = + ## Splits the string `s` into substrings. + ## + ## Substrings are separated by the PEG `sep`. + ## Examples: + ## + ## .. code-block:: nimrod + ## for word in split("00232this02939is39an22example111", peg"\d+"): + ## writeln(stdout, word) + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## "this" + ## "is" + ## "an" + ## "example" + ## + var + first = 0 + last = 0 + while last < len(s): + var x = matchLen(s, sep, last) + if x > 0: inc(last, x) + first = last + while last < len(s): + inc(last) + x = matchLen(s, sep, last) + if x > 0: break + if first < last: + yield copy(s, first, last-1) + +proc split*(s: string, sep: TPeg): seq[string] {.noSideEffect.} = + ## Splits the string `s` into substrings. + accumulateResult(split(s, sep)) + +# ------------------- scanner ------------------------------------------------- + +type + TModifier = enum + modNone, + modVerbatim, + modIgnoreCase, + modIgnoreStyle + TTokKind = enum ## enumeration of all tokens + tkInvalid, ## invalid token + tkEof, ## end of file reached + tkAny, ## . + tkAnyRune, ## _ + tkIdentifier, ## abc + tkStringLit, ## "abc" or 'abc' + tkCharSet, ## [^A-Z] + tkParLe, ## '(' + tkParRi, ## ')' + tkCurlyLe, ## '{' + tkCurlyRi, ## '}' + tkArrow, ## '<-' + tkBar, ## '/' + tkStar, ## '*' + tkPlus, ## '+' + tkAmp, ## '&' + tkNot, ## '!' + tkOption, ## '?' + tkAt, ## '@' + tkBuiltin, ## \identifier + tkEscaped ## \\ + + TToken {.final.} = object ## a token + kind: TTokKind ## the type of the token + modifier: TModifier + literal: string ## the parsed (string) literal + charset: set[char] ## if kind == tkCharSet + + TPegLexer = object ## the lexer object. + bufpos: int ## the current position within the buffer + buf: cstring ## the buffer itself + LineNumber: int ## the current line number + lineStart: int ## index of last line start in buffer + colOffset: int ## column to add + filename: string + +const + tokKindToStr: array[TTokKind, string] = [ + "invalid", "[EOF]", ".", "_", "identifier", "string literal", + "character set", "(", ")", "{", "}", "<-", "/", "*", "+", "&", "!", "?", + "@", "built-in", "escaped" + ] + +proc HandleCR(L: var TPegLexer, pos: int): int = + assert(L.buf[pos] == '\c') + inc(L.linenumber) + result = pos+1 + if L.buf[result] == '\L': inc(result) + L.lineStart = result + +proc HandleLF(L: var TPegLexer, pos: int): int = + assert(L.buf[pos] == '\L') + inc(L.linenumber) + result = pos+1 + L.lineStart = result + +proc init(L: var TPegLexer, input, filename: string, line = 1, col = 0) = + L.buf = input + L.bufpos = 0 + L.lineNumber = line + L.colOffset = col + L.lineStart = 0 + L.filename = filename + +proc getColumn(L: TPegLexer): int {.inline.} = + result = abs(L.bufpos - L.lineStart) + L.colOffset + +proc getLine(L: TPegLexer): int {.inline.} = + result = L.linenumber + +proc errorStr(L: TPegLexer, msg: string, line = -1, col = -1): string = + var line = if line < 0: getLine(L) else: line + var col = if col < 0: getColumn(L) else: col + result = "$1($2, $3) Error: $4" % [L.filename, $line, $col, msg] + +proc handleHexChar(c: var TPegLexer, xi: var int) = + case c.buf[c.bufpos] + of '0'..'9': + xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0')) + inc(c.bufpos) + of 'a'..'f': + xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10) + inc(c.bufpos) + of 'A'..'F': + xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10) + inc(c.bufpos) + else: nil + +proc getEscapedChar(c: var TPegLexer, tok: var TToken) = + inc(c.bufpos) + case c.buf[c.bufpos] + of 'r', 'R', 'c', 'C': + add(tok.literal, '\c') + Inc(c.bufpos) + of 'l', 'L': + add(tok.literal, '\L') + Inc(c.bufpos) + of 'f', 'F': + add(tok.literal, '\f') + inc(c.bufpos) + of 'e', 'E': + add(tok.literal, '\e') + Inc(c.bufpos) + of 'a', 'A': + add(tok.literal, '\a') + Inc(c.bufpos) + of 'b', 'B': + add(tok.literal, '\b') + Inc(c.bufpos) + of 'v', 'V': + add(tok.literal, '\v') + Inc(c.bufpos) + of 't', 'T': + add(tok.literal, '\t') + Inc(c.bufpos) + of 'x', 'X': + inc(c.bufpos) + var xi = 0 + handleHexChar(c, xi) + handleHexChar(c, xi) + if xi == 0: tok.kind = tkInvalid + else: add(tok.literal, Chr(xi)) + of '0'..'9': + var val = ord(c.buf[c.bufpos]) - ord('0') + Inc(c.bufpos) + var i = 1 + while (i <= 3) and (c.buf[c.bufpos] in {'0'..'9'}): + val = val * 10 + ord(c.buf[c.bufpos]) - ord('0') + inc(c.bufpos) + inc(i) + if val > 0 and val <= 255: add(tok.literal, chr(val)) + else: tok.kind = tkInvalid + of '\0'..'\31': + tok.kind = tkInvalid + elif c.buf[c.bufpos] in strutils.letters: + tok.kind = tkInvalid + else: + add(tok.literal, c.buf[c.bufpos]) + Inc(c.bufpos) + +proc skip(c: var TPegLexer) = + var pos = c.bufpos + var buf = c.buf + while true: + case buf[pos] + of ' ', '\t': + Inc(pos) + of '#': + while not (buf[pos] in {'\c', '\L', '\0'}): inc(pos) + of '\c': + pos = HandleCR(c, pos) + buf = c.buf + of '\L': + pos = HandleLF(c, pos) + buf = c.buf + else: + break # EndOfFile also leaves the loop + c.bufpos = pos + +proc getString(c: var TPegLexer, tok: var TToken) = + tok.kind = tkStringLit + var pos = c.bufPos + 1 + var buf = c.buf + var quote = buf[pos-1] + while true: + case buf[pos] + of '\\': + c.bufpos = pos + getEscapedChar(c, tok) + pos = c.bufpos + of '\c', '\L', '\0': + tok.kind = tkInvalid + break + elif buf[pos] == quote: + inc(pos) + break + else: + add(tok.literal, buf[pos]) + Inc(pos) + c.bufpos = pos + +proc getCharSet(c: var TPegLexer, tok: var TToken) = + tok.kind = tkCharSet + tok.charset = {} + var pos = c.bufPos + 1 + var buf = c.buf + var caret = false + if buf[pos] == '^': + inc(pos) + caret = true + while true: + var ch: char + case buf[pos] + of ']': + inc(pos) + break + of '\\': + c.bufpos = pos + getEscapedChar(c, tok) + pos = c.bufpos + ch = tok.literal[tok.literal.len-1] + of '\C', '\L', '\0': + tok.kind = tkInvalid + break + else: + ch = buf[pos] + Inc(pos) + incl(tok.charset, ch) + if buf[pos] == '-': + if buf[pos+1] == ']': + incl(tok.charset, '-') + inc(pos) + else: + inc(pos) + var ch2: char + case buf[pos] + of '\\': + c.bufpos = pos + getEscapedChar(c, tok) + pos = c.bufpos + ch2 = tok.literal[tok.literal.len-1] + of '\C', '\L', '\0': + tok.kind = tkInvalid + break + else: + ch2 = buf[pos] + Inc(pos) + for i in ord(ch)+1 .. ord(ch2): + incl(tok.charset, chr(i)) + c.bufpos = pos + if caret: tok.charset = {'\1'..'\xFF'} - tok.charset + +proc getSymbol(c: var TPegLexer, tok: var TToken) = + var pos = c.bufpos + var buf = c.buf + while true: + add(tok.literal, buf[pos]) + Inc(pos) + if buf[pos] notin strutils.IdentChars: break + c.bufpos = pos + tok.kind = tkIdentifier + +proc getBuiltin(c: var TPegLexer, tok: var TToken) = + if c.buf[c.bufpos+1] in strutils.Letters: + inc(c.bufpos) + getSymbol(c, tok) + tok.kind = tkBuiltin + else: + tok.kind = tkEscaped + getEscapedChar(c, tok) # may set tok.kind to tkInvalid + +proc getTok(c: var TPegLexer, tok: var TToken) = + tok.kind = tkInvalid + tok.modifier = modNone + setlen(tok.literal, 0) + skip(c) + case c.buf[c.bufpos] + of '{': + tok.kind = tkCurlyLe + inc(c.bufpos) + add(tok.literal, '{') + of '}': + tok.kind = tkCurlyRi + inc(c.bufpos) + add(tok.literal, '}') + of '[': + getCharset(c, tok) + of '(': + tok.kind = tkParLe + Inc(c.bufpos) + add(tok.literal, '(') + of ')': + tok.kind = tkParRi + Inc(c.bufpos) + add(tok.literal, ')') + of '.': + tok.kind = tkAny + inc(c.bufpos) + add(tok.literal, '.') + of '_': + tok.kind = tkAnyRune + inc(c.bufpos) + add(tok.literal, '_') + of '\\': + getBuiltin(c, tok) + of '\'', '"': getString(c, tok) + of '\0': + tok.kind = tkEof + tok.literal = "[EOF]" + of 'a'..'z', 'A'..'Z', '\128'..'\255': + getSymbol(c, tok) + if c.buf[c.bufpos] in {'\'', '"'}: + case tok.literal + of "i": tok.modifier = modIgnoreCase + of "y": tok.modifier = modIgnoreStyle + of "v": tok.modifier = modVerbatim + else: nil + setLen(tok.literal, 0) + getString(c, tok) + if tok.modifier == modNone: tok.kind = tkInvalid + of '+': + tok.kind = tkPlus + inc(c.bufpos) + add(tok.literal, '+') + of '*': + tok.kind = tkStar + inc(c.bufpos) + add(tok.literal, '+') + of '<': + if c.buf[c.bufpos+1] == '-': + inc(c.bufpos, 2) + tok.kind = tkArrow + add(tok.literal, "<-") + else: + add(tok.literal, '<') + of '/': + tok.kind = tkBar + inc(c.bufpos) + add(tok.literal, '/') + of '?': + tok.kind = tkOption + inc(c.bufpos) + add(tok.literal, '?') + of '!': + tok.kind = tkNot + inc(c.bufpos) + add(tok.literal, '!') + of '&': + tok.kind = tkAmp + inc(c.bufpos) + add(tok.literal, '!') + of '@': + tok.kind = tkAt + inc(c.bufpos) + add(tok.literal, '@') + else: + add(tok.literal, c.buf[c.bufpos]) + inc(c.bufpos) + +proc arrowIsNextTok(c: TPegLexer): bool = + # the only look ahead we need + var pos = c.bufpos + while c.buf[pos] in {'\t', ' '}: inc(pos) + result = c.buf[pos] == '<' and c.buf[pos+1] == '-' + +# ----------------------------- parser ---------------------------------------- + +type + EInvalidPeg* = object of EBase ## raised if an invalid PEG has been detected + TPegParser = object of TPegLexer ## the PEG parser object + tok: TToken + nonterms: seq[PNonTerminal] + modifier: TModifier + +proc getTok(p: var TPegParser) = getTok(p, p.tok) + +proc pegError(p: TPegParser, msg: string, line = -1, col = -1) = + var e: ref EInvalidPeg + new(e) + e.msg = errorStr(p, msg, line, col) + raise e + +proc eat(p: var TPegParser, kind: TTokKind) = + if p.tok.kind == kind: getTok(p) + else: pegError(p, tokKindToStr[kind] & " expected") + +proc parseExpr(p: var TPegParser): TPeg + +proc getNonTerminal(p: TPegParser, name: string): PNonTerminal = + for i in 0..high(p.nonterms): + result = p.nonterms[i] + if cmpIgnoreStyle(result.name, name) == 0: return + # forward reference: + result = newNonTerminal(name, getLine(p), getColumn(p)) + add(p.nonterms, result) + +proc modifiedTerm(s: string, m: TModifier): TPeg = + case m + of modNone, modVerbatim: result = term(s) + of modIgnoreCase: result = termIgnoreCase(s) + of modIgnoreStyle: result = termIgnoreStyle(s) + +proc primary(p: var TPegParser): TPeg = + case p.tok.kind + of tkAmp: + getTok(p) + return &primary(p) + of tkNot: + getTok(p) + return !primary(p) + of tkAt: + getTok(p) + return @primary(p) + else: nil + case p.tok.kind + of tkIdentifier: + if not arrowIsNextTok(p): + var nt = getNonTerminal(p, p.tok.literal) + incl(nt.flags, ntUsed) + result = nonTerminal(nt) + getTok(p) + else: + pegError(p, "expression expected, but found: " & p.tok.literal) + of tkStringLit: + var m = p.tok.modifier + if m == modNone: m = p.modifier + result = modifiedTerm(p.tok.literal, m) + getTok(p) + of tkCharSet: + if '\0' in p.tok.charset: + pegError(p, "binary zero ('\\0') not allowed in character class") + result = charset(p.tok.charset) + getTok(p) + of tkParLe: + getTok(p) + result = parseExpr(p) + eat(p, tkParRi) + of tkCurlyLe: + getTok(p) + result = capture(parseExpr(p)) + eat(p, tkCurlyRi) + of tkAny: + result = any() + getTok(p) + of tkAnyRune: + result = anyRune() + getTok(p) + of tkBuiltin: + case p.tok.literal + of "n": result = newLine() + of "d": result = charset({'0'..'9'}) + of "D": result = charset({'\1'..'\xff'} - {'0'..'9'}) + of "s": result = charset({' ', '\9'..'\13'}) + of "S": result = charset({'\1'..'\xff'} - {' ', '\9'..'\13'}) + of "w": result = charset({'a'..'z', 'A'..'Z', '_'}) + of "W": result = charset({'\1'..'\xff'} - {'a'..'z', 'A'..'Z', '_'}) + of "ident": result = pegs.ident + else: pegError(p, "unknown built-in: " & p.tok.literal) + getTok(p) + of tkEscaped: + result = term(p.tok.literal[0]) + getTok(p) + else: + pegError(p, "expression expected, but found: " & p.tok.literal) + getTok(p) # we must consume a token here to prevent endless loops! + while true: + case p.tok.kind + of tkOption: + result = ?result + getTok(p) + of tkStar: + result = *result + getTok(p) + of tkPlus: + result = +result + getTok(p) + else: break + +proc seqExpr(p: var TPegParser): TPeg = + result = primary(p) + while true: + case p.tok.kind + of tkAmp, tkNot, tkAt, tkStringLit, tkCharset, tkParLe, tkCurlyLe, + tkAny, tkAnyRune, tkBuiltin, tkEscaped: + result = sequence(result, primary(p)) + of tkIdentifier: + if not arrowIsNextTok(p): + result = sequence(result, primary(p)) + else: break + else: break + +proc parseExpr(p: var TPegParser): TPeg = + result = seqExpr(p) + while p.tok.kind == tkBar: + getTok(p) + result = result / seqExpr(p) + +proc parseRule(p: var TPegParser): PNonTerminal = + if p.tok.kind == tkIdentifier and arrowIsNextTok(p): + result = getNonTerminal(p, p.tok.literal) + if ntDeclared in result.flags: + pegError(p, "attempt to redefine: " & result.name) + result.line = getLine(p) + result.col = getColumn(p) + getTok(p) + eat(p, tkArrow) + result.rule = parseExpr(p) + incl(result.flags, ntDeclared) # NOW inlining may be attempted + else: + pegError(p, "rule expected, but found: " & p.tok.literal) + +proc rawParse(p: var TPegParser): TPeg = + ## parses a rule or a PEG expression + if p.tok.kind == tkBuiltin: + case p.tok.literal + of "i": + p.modifier = modIgnoreCase + getTok(p) + of "y": + p.modifier = modIgnoreStyle + getTok(p) + else: nil + if p.tok.kind == tkIdentifier and arrowIsNextTok(p): + result = parseRule(p).rule + while p.tok.kind != tkEof: + discard parseRule(p) + else: + result = parseExpr(p) + if p.tok.kind != tkEof: + pegError(p, "EOF expected, but found: " & p.tok.literal) + for i in 0..high(p.nonterms): + var nt = p.nonterms[i] + if ntDeclared notin nt.flags: + pegError(p, "undeclared identifier: " & nt.name, nt.line, nt.col) + elif ntUsed notin nt.flags and i > 0: + pegError(p, "unused rule: " & nt.name, nt.line, nt.col) + +proc parsePeg*(input: string, filename = "pattern", line = 1, col = 0): TPeg = + var p: TPegParser + init(TPegLexer(p), input, filename, line, col) + p.tok.kind = tkInvalid + p.tok.modifier = modNone + p.tok.literal = "" + p.tok.charset = {} + p.nonterms = @[] + getTok(p) + result = rawParse(p) + +proc peg*(pattern: string): TPeg = + ## constructs a TPeg object from the `pattern`. The short name has been + ## chosen to encourage its use as a raw string modifier:: + ## + ## peg"{\ident} \s* '=' \s* {.*}" + result = parsePeg(pattern, "pattern") + +when isMainModule: + assert match("(a b c)", peg"'(' @ ')'") + assert match("W_HI_Le", peg"\y 'while'") + assert(not match("W_HI_L", peg"\y 'while'")) + assert(not match("W_HI_Le", peg"\y v'while'")) + assert match("W_HI_Le", peg"y'while'") + + assert($ +digits == $peg"\d+") + assert "0158787".match(peg"\d+") + assert "ABC 0232".match(peg"\w+\s+\d+") + assert "ABC".match(peg"\d+ / \w+") + + for word in split("00232this02939is39an22example111", peg"\d+"): + writeln(stdout, word) + + assert matchLen("key", ident) == 3 + + var pattern = sequence(ident, *whitespace, term('='), *whitespace, ident) + assert matchLen("key1= cal9", pattern) == 11 + + var ws = newNonTerminal("ws", 1, 1) + ws.rule = *whitespace + + var expr = newNonTerminal("expr", 1, 1) + expr.rule = sequence(capture(ident), *sequence( + nonterminal(ws), term('+'), nonterminal(ws), nonterminal(expr))) + + var c: TMatchClosure + var s = "a+b + c +d+e+f" + assert m(s, expr.rule, 0, c) == len(s) + var a = "" + for i in 0..c.ml-1: + a.add(copy(s, c.matches[i][0], c.matches[i][1])) + assert a == "abcdef" + #echo expr.rule + + #const filename = "lib/devel/peg/grammar.txt" + #var grammar = parsePeg(newFileStream(filename, fmRead), filename) + #echo "a <- [abc]*?".match(grammar) + assert find("_____abc_______", term("abc")) == 5 + assert match("_______ana", peg"A <- 'ana' / . A") + assert match("abcs%%%", peg"A <- ..A / .A / '%'") + + if "abc" =~ peg"{'a'}'bc' 'xyz' / {\ident}": + assert matches[0] == "abc" + else: + assert false + + var g2 = peg"""S <- A B / C D + A <- 'a'+ + B <- 'b'+ + C <- 'c'+ + D <- 'd'+ + """ + assert($g2 == "((A B) / (C D))") + assert match("cccccdddddd", g2) + assert("var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") == + "var1<-keykey; var2<-key2key2") + assert "var1=key; var2=key2".endsWith(peg"{\ident}'='{\ident}") + + if "aaaaaa" =~ peg"'aa' !. / ({'a'})+": + assert matches[0] == "a" + else: + assert false diff --git a/nimlib/pure/re.nim b/nimlib/pure/re.nim new file mode 100755 index 000000000..1328f5f1f --- /dev/null +++ b/nimlib/pure/re.nim @@ -0,0 +1,354 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Regular expression support for Nimrod. Consider using the pegs module +## instead. + +{.compile: "tre/tre_all.c".} + +from strutils import addf + +type + TRegExDesc {.pure, final.} = object + re_nsub: int # Number of parenthesized subexpressions. + value: pointer # For internal use only. + + TRegEx* = ref TRegExDesc ## a compiled regular expression + EInvalidRegEx* = object of EInvalidValue + ## is raised if the pattern is no valid regular expression. + + TRegMatch {.pure.} = object + so, eo: cint + +const + MaxSubpatterns* = 10 + ## defines the maximum number of subpatterns that can be captured. + ## More subpatterns cannot be captured! + +proc regnexec(preg: ptr TRegExDesc, s: cstring, len, nmatch: int, + pmatch: ptr array [0..maxSubpatterns-1, TRegMatch], + eflags: cint): cint {.importc.} +proc regncomp(preg: ptr TRegExDesc, regex: cstring, n: int, + cflags: cint): cint {.importc.} +proc regfree(preg: ptr TRegExDesc) {.importc.} + +const + # POSIX regcomp() flags + REG_EXTENDED = 1 + REG_ICASE = (REG_EXTENDED shl 1) + REG_NEWLINE = (REG_ICASE shl 1) + REG_NOSUB = (REG_NEWLINE shl 1) + # Extra regcomp() flags + REG_BASIC = 0 + REG_LITERAL = (REG_NOSUB shl 1) + REG_RIGHT_ASSOC = (REG_LITERAL shl 1) + REG_UNGREEDY = (REG_RIGHT_ASSOC shl 1) + + # POSIX regexec() flags + REG_NOTBOL = 1 + REG_NOTEOL = (REG_NOTBOL shl 1) + + # Extra regexec() flags + REG_APPROX_MATCHER = (REG_NOTEOL shl 1) + REG_BACKTRACKING_MATCHER = (REG_APPROX_MATCHER shl 1) + + ErrorMessages = [ + "No error", + "No match", + "Invalid regexp", + "Unknown collating element", + "Unknown character class name", + "Trailing backslash", + "Invalid back reference", + "Missing ']'", + "Missing ')'", + "Missing '}'", + "Invalid contents of {}", + "Invalid character range", + "Out of memory", + "Invalid use of repetition operators" + ] + +proc finalizeRegEx(x: TRegEx) = regfree(addr(x^)) + +proc re*(s: string): TRegEx = + ## Constructor of regular expressions. Note that Nimrod's + ## extended raw string literals supports this syntax ``re"[abc]"`` as + ## a short form for ``re(r"[abc]")``. + new(result, finalizeRegEx) + var err = int(regncomp(addr(result^), s, s.len, + cint(REG_EXTENDED or REG_NEWLINE))) + if err != 0: + var e: ref EInvalidRegEx + new(e) + e.msg = ErrorMessages[err] + raise e + +proc xre*(pattern: string): TRegEx = + ## deletes whitespace from a pattern that is not escaped or in a character + ## class. Then it constructs a regular expresion object via `re`. + ## This is modelled after Perl's ``/x`` modifier. + var p = "" + var i = 0 + while i < pattern.len: + case pattern[i] + of ' ', '\t': + inc i + of '\\': + add p, '\\' + add p, pattern[i+1] + inc i, 2 + of '[': + while pattern[i] != ']' and pattern[i] != '\0': + add p, pattern[i] + inc i + else: + add p, pattern[i] + inc i + result = re(p) + +proc rawmatch(s: string, pattern: TRegEx, matches: var openarray[string], + start: int): tuple[first, last: int] = + var + rawMatches: array [0..maxSubpatterns-1, TRegMatch] + cs = cstring(s) + res = int(regnexec(addr(pattern^), cast[cstring](addr(cs[start])), + s.len-start, maxSubpatterns, addr(rawMatches), cint(0))) + if res == 0: + for i in 0..min(matches.len, int(pattern.re_nsub))-1: + var a = int(rawMatches[i].so) + var b = int(rawMatches[i].eo) + echo "a: ", a, " b: ", b + if a >= 0 and b >= 0: + matches[i] = copy(s, a+start, b - 1 + start) + else: + matches[i] = "" + return (int(rawMatches[0].so), int(rawMatches[0].eo)-1) + return (-1, -1) + +proc match*(s: string, pattern: TRegEx, matches: var openarray[string], + start = 0): bool = + ## returns ``true`` if ``s[start..]`` matches the ``pattern`` and + ## the captured substrings in the array ``matches``. If it does not + ## match, nothing is written into ``matches`` and ``false`` is + ## returned. + result = rawmatch(s, pattern, matches, start).first == 0 + +proc match*(s: string, pattern: TRegEx, start: int = 0): bool = + ## returns ``true`` if ``s`` matches the ``pattern`` beginning + ## from ``start``. + var matches: array [0..0, string] + result = rawmatch(s, pattern, matches, start).first == 0 + +proc matchLen*(s: string, pattern: TRegEx, matches: var openarray[string], + start = 0): int = + ## the same as ``match``, but it returns the length of the match, + ## if there is no match, -1 is returned. Note that a match length + ## of zero can happen. + var (a, b) = rawmatch(s, pattern, matches, start) + result = a - b + 1 + +proc matchLen*(s: string, pattern: TRegEx, start = 0): int = + ## the same as ``match``, but it returns the length of the match, + ## if there is no match, -1 is returned. Note that a match length + ## of zero can happen. + var matches: array [0..0, string] + var (a, b) = rawmatch(s, pattern, matches, start) + result = a - b + 1 + +proc find*(s: string, pattern: TRegEx, matches: var openarray[string], + start = 0): int = + ## returns ``true`` if ``pattern`` occurs in ``s`` and the captured + ## substrings in the array ``matches``. If it does not match, nothing + ## is written into ``matches``. + result = rawmatch(s, pattern, matches, start).first + if result >= 0: inc(result, start) + +proc find*(s: string, pattern: TRegEx, start = 0): int = + ## returns ``true`` if ``pattern`` occurs in ``s``. + var matches: array [0..0, string] + result = rawmatch(s, pattern, matches, start).first + if result >= 0: inc(result, start) + +template `=~`*(s: string, pattern: TRegEx): expr = + ## This calls ``match`` with an implicit declared ``matches`` array that + ## can be used in the scope of the ``=~`` call: + ## + ## .. code-block:: nimrod + ## + ## if line =~ r"\s*(\w+)\s*\=\s*(\w+)": + ## # matches a key=value pair: + ## echo("Key: ", matches[1]) + ## echo("Value: ", matches[2]) + ## elif line =~ r"\s*(\#.*)": + ## # matches a comment + ## # note that the implicit ``matches`` array is different from the + ## # ``matches`` array of the first branch + ## echo("comment: ", matches[1]) + ## else: + ## echo("syntax error") + ## + when not definedInScope(matches): + var matches: array[0..maxSubPatterns-1, string] + match(s, pattern, matches) + +# ------------------------- more string handling ------------------------------ + +proc contains*(s: string, pattern: TRegEx, start = 0): bool = + ## same as ``find(s, pattern, start) >= 0`` + return find(s, pattern, start) >= 0 + +proc contains*(s: string, pattern: TRegEx, matches: var openArray[string], + start = 0): bool = + ## same as ``find(s, pattern, matches, start) >= 0`` + return find(s, pattern, matches, start) >= 0 + +proc startsWith*(s: string, prefix: TRegEx): bool = + ## returns true if `s` starts with the pattern `prefix` + result = matchLen(s, prefix) >= 0 + +proc endsWith*(s: string, suffix: TRegEx): bool = + ## returns true if `s` ends with the pattern `prefix` + for i in 0 .. s.len-1: + if matchLen(s, suffix, i) == s.len - i: return true + +proc replace*(s: string, sub: TRegEx, by: string): string = + ## Replaces `sub` in `s` by the string `by`. Captures can be accessed in `by` + ## with the notation ``$i`` and ``$#`` (see strutils.`%`). Examples: + ## + ## .. code-block:: nimrod + ## "var1=key; var2=key2".replace(re"(\w+)'='(\w+)", "$1<-$2$2") + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## + ## "var1<-keykey; val2<-key2key2" + result = "" + var i = 0 + var caps: array[0..maxSubpatterns-1, string] + while i < s.len: + var x = matchLen(s, sub, caps, i) + if x <= 0: + add(result, s[i]) + inc(i) + else: + addf(result, by, caps) + inc(i, x) + # copy the rest: + add(result, copy(s, i)) + +proc parallelReplace*(s: string, subs: openArray[ + tuple[pattern: TRegEx, repl: string]]): string = + ## Returns a modified copy of `s` with the substitutions in `subs` + ## applied in parallel. + result = "" + var i = 0 + var caps: array[0..maxSubpatterns-1, string] + while i < s.len: + block searchSubs: + for j in 0..high(subs): + var x = matchLen(s, subs[j][0], caps, i) + if x > 0: + addf(result, subs[j][1], caps) + inc(i, x) + break searchSubs + add(result, s[i]) + inc(i) + # copy the rest: + add(result, copy(s, i)) + +proc transformFile*(infile, outfile: string, + subs: openArray[tuple[pattern: TRegEx, repl: string]]) = + ## reads in the file `infile`, performs a parallel replacement (calls + ## `parallelReplace`) and writes back to `outfile`. Calls ``quit`` if an + ## error occurs. This is supposed to be used for quick scripting. + var x = readFile(infile) + if not isNil(x): + var f: TFile + if open(f, outfile, fmWrite): + write(f, x.parallelReplace(subs)) + close(f) + else: + quit("cannot open for writing: " & outfile) + else: + quit("cannot open for reading: " & infile) + +iterator split*(s: string, sep: TRegEx): string = + ## Splits the string `s` into substrings. + ## + ## Substrings are separated by the regular expression `sep`. + ## Examples: + ## + ## .. code-block:: nimrod + ## for word in split("00232this02939is39an22example111", re"\d+"): + ## writeln(stdout, word) + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## "this" + ## "is" + ## "an" + ## "example" + ## + var + first = 0 + last = 0 + while last < len(s): + var x = matchLen(s, sep, last) + if x > 0: inc(last, x) + first = last + while last < len(s): + inc(last) + x = matchLen(s, sep, last) + if x > 0: break + if first < last: + yield copy(s, first, last-1) + +proc split*(s: string, sep: TRegEx): seq[string] = + ## Splits the string `s` into substrings. + accumulateResult(split(s, sep)) + +const ## common regular expressions + reIdentifier* = r"\b[a-zA-Z_]+[a-zA-Z_0-9]*\b" ## describes an identifier + reNatural* = r"\b\d+\b" ## describes a natural number + reInteger* = r"\b[-+]?\d+\b" ## describes an integer + reHex* = r"\b0[xX][0-9a-fA-F]+\b" ## describes a hexadecimal number + reBinary* = r"\b0[bB][01]+\b" ## describes a binary number (example: 0b11101) + reOctal* = r"\b0[oO][0-7]+\b" ## describes an octal number (example: 0o777) + reFloat* = r"\b[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?\b" + ## describes a floating point number + reEmail* = r"\b[a-zA-Z0-9!#$%&'*+/=?^_`{|}~\-]+(?:\. &" & + r"[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)" & + r"*@(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?\.)+" & + r"(?:[a-zA-Z]{2}|com|org|" & + r"net|gov|mil|biz|info|mobi|name|aero|jobs|museum)\b" + ## describes a common email address + reURL* = r"\b(http(s)?|ftp|gopher|telnet|file|notes|ms\-help):" & + r"((//)|(\\\\))+[\w\d:#@%/;$()~_?\+\-\=\\\.\&]*\b" + ## describes an URL + +when isMainModule: + echo matchLen("key", re"[a-zA-Z_][a-zA-Z_0-9]*") + + var pattern = re"[a-zA-Z_][a-zA-Z_0-9]*\s*=\s*[a-zA-Z_][a-zA-Z_0-9]*" + echo matchLen("key1= cal9", pattern, 2) + + echo find("_____abc_______", re("abc"), 3) + #echo "var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") + #echo "var1=key; var2=key2".endsWith(peg"{\ident}'='{\ident}") + + if "abc" =~ re"(a)bc xyz|([a-z]+)": + echo matches[0] + else: + echo "BUG" + +# for word in split("00232this02939is39an22example111", peg"\d+"): +# writeln(stdout, word) diff --git a/nimlib/pure/regexprs.nim b/nimlib/pure/regexprs.nim new file mode 100755 index 000000000..cff3152cf --- /dev/null +++ b/nimlib/pure/regexprs.nim @@ -0,0 +1,177 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Regular expression support for Nimrod. +## Currently this module is implemented by providing a wrapper around the +## `PRCE (Perl-Compatible Regular Expressions) <http://www.pcre.org>`_ +## C library. This means that your application will depend on the PRCE +## library's licence when using this module, which should not be a problem +## though. +## PRCE's licence follows: +## +## .. include:: ../doc/regexprs.txt +## + +# This is not just a convenient wrapper for the pcre library; the +# API will stay the same if the implementation should change. + +import + pcre, strutils + +type + EInvalidRegEx* = object of EInvalidValue + ## is raised if the pattern is no valid regular expression. + +const + MaxSubpatterns* = 10 + ## defines the maximum number of subpatterns that can be captured. + ## More subpatterns cannot be captured! + +proc match*(s, pattern: string, matches: var openarray[string], + start: int = 0): bool + ## returns ``true`` if ``s[start..]`` matches the ``pattern`` and + ## the captured substrings in the array ``matches``. If it does not + ## match, nothing is written into ``matches`` and ``false`` is + ## returned. + +proc match*(s, pattern: string, start: int = 0): bool + ## returns ``true`` if ``s`` matches the ``pattern`` beginning from ``start``. + +proc matchLen*(s, pattern: string, matches: var openarray[string], + start: int = 0): int + ## the same as ``match``, but it returns the length of the match, + ## if there is no match, -1 is returned. Note that a match length + ## of zero can happen. + +proc find*(s, pattern: string, matches: var openarray[string], + start: int = 0): bool + ## returns ``true`` if ``pattern`` occurs in ``s`` and the captured + ## substrings in the array ``matches``. If it does not match, nothing + ## is written into ``matches``. + +proc find*(s, pattern: string, start: int = 0): bool + ## returns ``true`` if ``pattern`` occurs in ``s``. + +proc rawCompile(pattern: string, flags: cint): PPcre = + var + msg: CString + offset: int + com = pcreCompile(pattern, flags, addr(msg), addr(offset), nil) + if com == nil: + var e: ref EInvalidRegEx + new(e) + e.msg = $msg & "\n" & pattern & "\n" & repeatChar(offset) & "^\n" + raise e + return com + +proc matchOrFind(s: string, pattern: PPcre, matches: var openarray[string], + start: cint): cint = + var + rawMatches: array [0..maxSubpatterns * 3 - 1, cint] + res = int(pcreExec(pattern, nil, s, len(s), start, 0, + cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3)) + dealloc(pattern) + if res < 0: return res + for i in 0..res-1: + var + a = rawMatches[i * 2] + b = rawMatches[i * 2 + 1] + if a >= 0'i32: matches[i] = copy(s, a, int(b)-1) + else: matches[i] = "" + return res + +proc matchOrFind(s: string, pattern: PPcre, start: cint): cint = + var + rawMatches: array [0..maxSubpatterns * 3 - 1, cint] + res = pcreExec(pattern, nil, s, len(s), start, 0, + cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3) + dealloc(pattern) + return res + +proc match(s, pattern: string, matches: var openarray[string], + start: int = 0): bool = + return matchOrFind(s, rawCompile(pattern, PCRE_ANCHORED), + matches, start) >= 0'i32 + +proc matchLen(s, pattern: string, matches: var openarray[string], + start: int = 0): int = + return matchOrFind(s, rawCompile(pattern, PCRE_ANCHORED), matches, start) + +proc find(s, pattern: string, matches: var openarray[string], + start: int = 0): bool = + return matchOrFind(s, rawCompile(pattern, PCRE_MULTILINE), + matches, start) >= 0'i32 + +proc match(s, pattern: string, start: int = 0): bool = + return matchOrFind(s, rawCompile(pattern, PCRE_ANCHORED), start) >= 0'i32 + +proc find(s, pattern: string, start: int = 0): bool = + return matchOrFind(s, rawCompile(pattern, PCRE_MULTILINE), start) >= 0'i32 + +template `=~` *(s, pattern: expr): expr = + ## This calls ``match`` with an implicit declared ``matches`` array that + ## can be used in the scope of the ``=~`` call: + ## + ## .. code-block:: nimrod + ## + ## if line =~ r"\s*(\w+)\s*\=\s*(\w+)": + ## # matches a key=value pair: + ## echo("Key: ", matches[1]) + ## echo("Value: ", matches[2]) + ## elif line =~ r"\s*(\#.*)": + ## # matches a comment + ## # note that the implicit ``matches`` array is different from the + ## # ``matches`` array of the first branch + ## echo("comment: ", matches[1]) + ## else: + ## echo("syntax error") + ## + when not definedInScope(matches): + var matches: array[0..maxSubPatterns-1, string] + match(s, pattern, matches) + + +const ## common regular expressions + reIdentifier* = r"\b[a-zA-Z_][a-zA-Z_0-9]*\b" ## describes an identifier + reNatural* = r"\b\d+\b" ## describes a natural number + reInteger* = r"\b[-+]?\d+\b" ## describes an integer + reHex* = r"\b0[xX][0-9a-fA-F]+\b" ## describes a hexadecimal number + reBinary* = r"\b0[bB][01]+\b" ## describes a binary number (example: 0b11101) + reOctal* = r"\b0[oO][0-7]+\b" ## describes an octal number (example: 0o777) + reFloat* = r"\b[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?\b" + ## describes a floating point number + reEmail* = r"\b[a-zA-Z0-9!#$%&'*+/=?^_`{|}~\-]+(?:\.[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)" & + r"*@(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?\.)+(?:[a-zA-Z]{2}|com|org|" & + r"net|gov|mil|biz|info|mobi|name|aero|jobs|museum)\b" + ## describes a common email address + reURL* = r"\b(http(s)?|ftp|gopher|telnet|file|notes|ms\-help):" & + r"((//)|(\\\\))+[\w\d:#@%/;$()~_?\+\-\=\\\.\&]*\b" + ## describes an URL + +proc verbose*(pattern: string): string {.noSideEffect.} = + ## deletes whitespace from a pattern that is not escaped or in a character + ## class. This is modelled after Perl's ``/x`` modifier. + result = "" + var i = 0 + while i < pattern.len: + case pattern[i] + of ' ', '\t': + inc i + of '\\': + add result, '\\' + add result, pattern[i+1] + inc i, 2 + of '[': + while pattern[i] != ']' and pattern[i] != '\0': + add result, pattern[i] + inc i + else: + add result, pattern[i] + inc i + diff --git a/nimlib/pure/streams.nim b/nimlib/pure/streams.nim new file mode 100755 index 000000000..f4d2911fc --- /dev/null +++ b/nimlib/pure/streams.nim @@ -0,0 +1,245 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module provides a stream interface and two implementations thereof: +## the `PFileStream` and the `PStringStream` which implement the stream +## interface for Nimrod file objects (`TFile`) and strings. Other modules +## may provide other implementations for this standard stream interface. + +proc newEIO(msg: string): ref EIO = + new(result) + result.msg = msg + +type + PStream* = ref TStream + TStream* = object of TObject ## Stream interface that supports + ## writing or reading. + close*: proc (s: PStream) + atEnd*: proc (s: PStream): bool + setPosition*: proc (s: PStream, pos: int) + getPosition*: proc (s: PStream): int + readData*: proc (s: PStream, buffer: pointer, bufLen: int): int + writeData*: proc (s: PStream, buffer: pointer, bufLen: int) + +proc write*[T](s: PStream, x: T) = + ## generic write procedure. Writes `x` to the stream `s`. Implementation: + ## + ## .. code-block:: Nimrod + ## + ## s.writeData(s, addr(x), sizeof(x)) + var x = x + s.writeData(s, addr(x), sizeof(x)) + +proc write*(s: PStream, x: string) = + ## writes the string `x` to the the stream `s`. No length field or + ## terminating zero is written. + s.writeData(s, cstring(x), x.len) + +proc read[T](s: PStream, result: var T) = + ## generic read procedure. Reads `result` from the stream `s`. + if s.readData(s, addr(result), sizeof(T)) != sizeof(T): + raise newEIO("cannot read from stream") + +proc readChar*(s: PStream): char = + ## reads a char from the stream `s`. Raises `EIO` if an error occured. + ## Returns '\0' as an EOF marker. + discard s.readData(s, addr(result), sizeof(result)) + +proc readBool*(s: PStream): bool = + ## reads a bool from the stream `s`. Raises `EIO` if an error occured. + read(s, result) + +proc readInt8*(s: PStream): int8 = + ## reads an int8 from the stream `s`. Raises `EIO` if an error occured. + read(s, result) + +proc readInt16*(s: PStream): int16 = + ## reads an int16 from the stream `s`. Raises `EIO` if an error occured. + read(s, result) + +proc readInt32*(s: PStream): int32 = + ## reads an int32 from the stream `s`. Raises `EIO` if an error occured. + read(s, result) + +proc readInt64*(s: PStream): int64 = + ## reads an int64 from the stream `s`. Raises `EIO` if an error occured. + read(s, result) + +proc readFloat32*(s: PStream): float32 = + ## reads a float32 from the stream `s`. Raises `EIO` if an error occured. + read(s, result) + +proc readFloat64*(s: PStream): float64 = + ## reads a float64 from the stream `s`. Raises `EIO` if an error occured. + read(s, result) + +proc readStr*(s: PStream, length: int): string = + ## reads a string of length `length` from the stream `s`. Raises `EIO` if + ## an error occured. + result = newString(length) + var L = s.readData(s, addr(result[0]), length) + if L != length: setLen(result, L) + +proc readLine*(s: PStream): string = + ## Reads a line from a stream `s`. Note: This is not very efficient. Raises + ## `EIO` if an error occured. + result = "" + while not s.atEnd(s): + var c = readChar(s) + if c == '\c': + c = readChar(s) + break + elif c == '\L' or c == '\0': break + result.add(c) + +type + PStringStream* = ref TStringStream ## a stream that encapsulates a string + TStringStream* = object of TStream + data*: string + pos: int + +proc ssAtEnd(s: PStringStream): bool = + return s.pos >= s.data.len + +proc ssSetPosition(s: PStringStream, pos: int) = + s.pos = min(pos, s.data.len-1) + +proc ssGetPosition(s: PStringStream): int = + return s.pos + +proc ssReadData(s: PStringStream, buffer: pointer, bufLen: int): int = + result = min(bufLen, s.data.len - s.pos) + if result > 0: + copyMem(buffer, addr(s.data[s.pos]), result) + inc(s.pos, result) + +proc ssWriteData(s: PStringStream, buffer: pointer, bufLen: int) = + if bufLen > 0: + setLen(s.data, s.data.len + bufLen) + copyMem(addr(s.data[s.pos]), buffer, bufLen) + inc(s.pos, bufLen) + +proc ssClose(s: PStringStream) = + s.data = nil + +proc newStringStream*(s: string = ""): PStringStream = + ## creates a new stream from the string `s`. + new(result) + result.data = s + result.pos = 0 + result.close = ssClose + result.atEnd = ssAtEnd + result.setPosition = ssSetPosition + result.getPosition = ssGetPosition + result.readData = ssReadData + result.writeData = ssWriteData + +type + PFileStream* = ref TFileStream ## a stream that encapsulates a `TFile` + TFileStream* = object of TStream + f: TFile + +proc fsClose(s: PFileStream) = close(s.f) +proc fsAtEnd(s: PFileStream): bool = return EndOfFile(s.f) +proc fsSetPosition(s: PFileStream, pos: int) = setFilePos(s.f, pos) +proc fsGetPosition(s: PFileStream): int = return int(getFilePos(s.f)) + +proc fsReadData(s: PFileStream, buffer: pointer, bufLen: int): int = + result = readBuffer(s.f, buffer, bufLen) + +proc fsWriteData(s: PFileStream, buffer: pointer, bufLen: int) = + if writeBuffer(s.f, buffer, bufLen) != bufLen: + raise newEIO("cannot write to stream") + +proc newFileStream*(f: TFile): PFileStream = + ## creates a new stream from the file `f`. + new(result) + result.f = f + result.close = fsClose + result.atEnd = fsAtEnd + result.setPosition = fsSetPosition + result.getPosition = fsGetPosition + result.readData = fsReadData + result.writeData = fsWriteData + +proc newFileStream*(filename: string, mode: TFileMode): PFileStream = + ## creates a new stream from the file named `filename` with the mode `mode`. + ## If the file cannot be opened, nil is returned. + var f: TFile + if Open(f, filename, mode): result = newFileStream(f) + + +when true: + nil +else: + type + TFileHandle* = cint ## Operating system file handle + PFileHandleStream* = ref TFileHandleStream + TFileHandleStream* = object of TStream + handle*: TFileHandle + pos: int + + proc newEOS(msg: string): ref EOS = + new(result) + result.msg = msg + + proc hsGetPosition(s: PFileHandleStream): int = + return s.pos + + when defined(windows): + # do not import windows as this increases compile times: + nil + else: + import posix + + proc hsSetPosition(s: PFileHandleStream, pos: int) = + discard lseek(s.handle, pos, SEEK_SET) + + proc hsClose(s: PFileHandleStream) = discard close(s.handle) + proc hsAtEnd(s: PFileHandleStream): bool = + var pos = hsGetPosition(s) + var theEnd = lseek(s.handle, 0, SEEK_END) + result = pos >= theEnd + hsSetPosition(s, pos) # set position back + + proc hsReadData(s: PFileHandleStream, buffer: pointer, bufLen: int): int = + result = posix.read(s.handle, buffer, bufLen) + inc(s.pos, result) + + proc hsWriteData(s: PFileHandleStream, buffer: pointer, bufLen: int) = + if posix.write(s.handle, buffer, bufLen) != bufLen: + raise newEIO("cannot write to stream") + inc(s.pos, bufLen) + + proc newFileHandleStream*(handle: TFileHandle): PFileHandleStream = + new(result) + result.handle = handle + result.pos = 0 + result.close = hsClose + result.atEnd = hsAtEnd + result.setPosition = hsSetPosition + result.getPosition = hsGetPosition + result.readData = hsReadData + result.writeData = hsWriteData + + proc newFileHandleStream*(filename: string, + mode: TFileMode): PFileHandleStream = + when defined(windows): + nil + else: + var flags: cint + case mode + of fmRead: flags = posix.O_RDONLY + of fmWrite: flags = O_WRONLY or int(O_CREAT) + of fmReadWrite: flags = O_RDWR or int(O_CREAT) + of fmReadWriteExisting: flags = O_RDWR + of fmAppend: flags = O_WRONLY or int(O_CREAT) or O_APPEND + var handle = open(filename, flags) + if handle < 0: raise newEOS("posix.open() call failed") + result = newFileHandleStream(handle) diff --git a/nimlib/pure/strtabs.nim b/nimlib/pure/strtabs.nim new file mode 100755 index 000000000..10cd0b933 --- /dev/null +++ b/nimlib/pure/strtabs.nim @@ -0,0 +1,198 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2008 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## The ``strtabs`` module implements an efficient hash table that is a mapping +## from strings to strings. Supports a case-sensitive, case-insensitive and +## style-insensitive mode. An efficient string substitution operator ``%`` +## for the string table is also provided. + +import + os, hashes, strutils + +type + TStringTableMode* = enum ## describes the tables operation mode + modeCaseSensitive, ## the table is case sensitive + modeCaseInsensitive, ## the table is case insensitive + modeStyleInsensitive ## the table is style insensitive + TKeyValuePair = tuple[key, val: string] + TKeyValuePairSeq = seq[TKeyValuePair] + TStringTable* = object of TObject + counter: int + data: TKeyValuePairSeq + mode: TStringTableMode + + PStringTable* = ref TStringTable ## use this type to declare string tables + +proc newStringTable*(keyValuePairs: openarray[string], + mode: TStringTableMode = modeCaseSensitive): PStringTable + ## creates a new string table with given key value pairs. + ## Example:: + ## var mytab = newStringTable("key1", "val1", "key2", "val2", + ## modeCaseInsensitive) + +proc newStringTable*(mode: TStringTableMode = modeCaseSensitive): PStringTable + ## creates a new string table that is empty. + +proc `[]=`*(t: PStringTable, key, val: string) + ## puts a (key, value)-pair into `t`. + +proc `[]`*(t: PStringTable, key: string): string + ## retrieves the value at ``t[key]``. If `key` is not in `t`, "" is returned + ## and no exception is raised. One can check with ``hasKey`` whether the key + ## exists. + +proc hasKey*(t: PStringTable, key: string): bool + ## returns true iff `key` is in the table `t`. + +proc len*(t: PStringTable): int = + ## returns the number of keys in `t`. + result = t.counter + +iterator pairs*(t: PStringTable): tuple[key, value: string] = + ## iterates over any (key, value) pair in the table `t`. + for h in 0..high(t.data): + if not isNil(t.data[h].key): + yield (t.data[h].key, t.data[h].val) + +type + TFormatFlag* = enum ## flags for the `%` operator + useEnvironment, ## use environment variable if the ``$key`` + ## is not found in the table + useEmpty, ## use the empty string as a default, thus it + ## won't throw an exception if ``$key`` is not + ## in the table + useKey ## do not replace ``$key`` if it is not found + ## in the table (or in the environment) + +proc `%`*(f: string, t: PStringTable, flags: set[TFormatFlag] = {}): string + ## The `%` operator for string tables. + +# implementation + +const + growthFactor = 2 + startSize = 64 + +proc newStringTable(mode: TStringTableMode = modeCaseSensitive): PStringTable = + new(result) + result.mode = mode + result.counter = 0 + newSeq(result.data, startSize) + +proc newStringTable(keyValuePairs: openarray[string], + mode: TStringTableMode = modeCaseSensitive): PStringTable = + result = newStringTable(mode) + var i = 0 + while i < high(keyValuePairs): + result[keyValuePairs[i]] = keyValuePairs[i + 1] + inc(i, 2) + +proc myhash(t: PStringTable, key: string): THash = + case t.mode + of modeCaseSensitive: result = hashes.hash(key) + of modeCaseInsensitive: result = hashes.hashIgnoreCase(key) + of modeStyleInsensitive: result = hashes.hashIgnoreStyle(key) + +proc myCmp(t: PStringTable, a, b: string): bool = + case t.mode + of modeCaseSensitive: result = cmp(a, b) == 0 + of modeCaseInsensitive: result = cmpIgnoreCase(a, b) == 0 + of modeStyleInsensitive: result = cmpIgnoreStyle(a, b) == 0 + +proc mustRehash(length, counter: int): bool = + assert(length > counter) + result = (length * 2 < counter * 3) or (length - counter < 4) + +proc nextTry(h, maxHash: THash): THash = + result = ((5 * h) + 1) and maxHash + +proc RawGet(t: PStringTable, key: string): int = + var h: THash + h = myhash(t, key) and high(t.data) # start with real hash value + while not isNil(t.data[h].key): + if mycmp(t, t.data[h].key, key): + return h + h = nextTry(h, high(t.data)) + result = - 1 + +proc `[]`(t: PStringTable, key: string): string = + var index: int + index = RawGet(t, key) + if index >= 0: result = t.data[index].val + else: result = "" + +proc hasKey(t: PStringTable, key: string): bool = + result = rawGet(t, key) >= 0 + +proc RawInsert(t: PStringTable, data: var TKeyValuePairSeq, key, val: string) = + var h: THash + h = myhash(t, key) and high(data) + while not isNil(data[h].key): + h = nextTry(h, high(data)) + data[h].key = key + data[h].val = val + +proc Enlarge(t: PStringTable) = + var n: TKeyValuePairSeq + newSeq(n, len(t.data) * growthFactor) + for i in countup(0, high(t.data)): + if not isNil(t.data[i].key): RawInsert(t, n, t.data[i].key, t.data[i].val) + swap(t.data, n) + +proc `[]=`(t: PStringTable, key, val: string) = + var index = RawGet(t, key) + if index >= 0: + t.data[index].val = val + else: + if mustRehash(len(t.data), t.counter): Enlarge(t) + RawInsert(t, t.data, key, val) + inc(t.counter) + +proc RaiseFormatException(s: string) = + var e: ref EInvalidValue + new(e) + e.msg = "format string: key not found: " & s + raise e + +proc getValue(t: PStringTable, flags: set[TFormatFlag], key: string): string = + if hasKey(t, key): return t[key] + if useEnvironment in flags: result = os.getEnv(key) + else: result = "" + if result.len == 0: + if useKey in flags: result = '$' & key + elif not (useEmpty in flags): raiseFormatException(key) + +proc `%`(f: string, t: PStringTable, flags: set[TFormatFlag] = {}): string = + const + PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\x80'..'\xFF'} + result = "" + var i = 0 + while i < len(f): + if f[i] == '$': + case f[i+1] + of '$': + add(result, '$') + inc(i, 2) + of '{': + var j = i + 1 + while j < f.len and f[j] != '}': inc(j) + add(result, getValue(t, flags, copy(f, i+2, j-1))) + i = j + 1 + of 'a'..'z', 'A'..'Z', '\x80'..'\xFF', '_': + var j = i + 1 + while j < f.len and f[j] in PatternChars: inc(j) + add(result, getValue(t, flags, copy(f, i+1, j-1))) + i = j + else: + add(result, f[i]) + inc(i) + else: + add(result, f[i]) + inc(i) + diff --git a/nimlib/pure/strutils.nim b/nimlib/pure/strutils.nim new file mode 100755 index 000000000..d7fd69f61 --- /dev/null +++ b/nimlib/pure/strutils.nim @@ -0,0 +1,973 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module contains various string utility routines. +## See the module `regexprs` for regular expression support. + +{.deadCodeElim: on.} + +{.push debugger:off .} # the user does not want to trace a part + # of the standard library! + +# copied from excpt.nim, because I don't want to make this template public +template newException(exceptn, message: expr): expr = + block: # open a new scope + var + e: ref exceptn + new(e) + e.msg = message + e + + +type + TCharSet* = set[char] # for compatibility with Nim + +const + Whitespace* = {' ', '\t', '\v', '\r', '\l', '\f'} + ## All the characters that count as whitespace. + + Letters* = {'A'..'Z', 'a'..'z'} + ## the set of letters + + Digits* = {'0'..'9'} + ## the set of digits + + IdentChars* = {'a'..'z', 'A'..'Z', '0'..'9', '_'} + ## the set of characters an identifier can consist of + + IdentStartChars* = {'a'..'z', 'A'..'Z', '_'} + ## the set of characters an identifier can start with + +proc `%` *(formatstr: string, a: openarray[string]): string {.noSideEffect.} + ## The `substitution`:idx: operator performs string substitutions in + ## `formatstr` and returns a modified `formatstr`. This is often called + ## `string interpolation`:idx:. + ## + ## This is best explained by an example: + ## + ## .. code-block:: nimrod + ## "$1 eats $2." % ["The cat", "fish"] + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## "The cat eats fish." + ## + ## The substitution variables (the thing after the ``$``) are enumerated + ## from 1 to ``a.len``. + ## The notation ``$#`` can be used to refer to the next substitution variable: + ## + ## .. code-block:: nimrod + ## "$# eats $#." % ["The cat", "fish"] + ## + ## Substitution variables can also be words (that is + ## ``[A-Za-z_]+[A-Za-z0-9_]*``) in which case the arguments in `a` with even + ## indices are keys and with odd indices are the corresponding values. + ## An example: + ## + ## .. code-block:: nimrod + ## "$animal eats $food." % ["animal", "The cat", "food", "fish"] + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## "The cat eats fish." + ## + ## The variables are compared with `cmpIgnoreStyle`. `EInvalidValue` is + ## raised if an ill-formed format string has been passed to the `%` operator. + +proc `%` *(formatstr, a: string): string {.noSideEffect.} + ## This is the same as ``formatstr % [a]``. + +proc addf*(s: var string, formatstr: string, a: openarray[string]) + ## The same as ``add(s, formatstr % a)``, but more efficient. + +proc strip*(s: string, leading = true, trailing = true): string {.noSideEffect.} + ## Strips whitespace from `s` and returns the resulting string. + ## If `leading` is true, leading whitespace is stripped. + ## If `trailing` is true, trailing whitespace is stripped. + +proc toLower*(s: string): string {.noSideEffect, procvar.} + ## Converts `s` into lower case. This works only for the letters A-Z. + ## See `unicode.toLower` for a version that works for any Unicode character. + +proc toLower*(c: Char): Char {.noSideEffect, procvar.} + ## Converts `c` into lower case. This works only for the letters A-Z. + ## See `unicode.toLower` for a version that works for any Unicode character. + +proc toUpper*(s: string): string {.noSideEffect, procvar.} + ## Converts `s` into upper case. This works only for the letters a-z. + ## See `unicode.toUpper` for a version that works for any Unicode character. + +proc toUpper*(c: Char): Char {.noSideEffect, procvar.} + ## Converts `c` into upper case. This works only for the letters a-z. + ## See `unicode.toUpper` for a version that works for any Unicode character. + +proc capitalize*(s: string): string {.noSideEffect, procvar.} + ## Converts the first character of `s` into upper case. + ## This works only for the letters a-z. + +proc normalize*(s: string): string {.noSideEffect, procvar.} + ## Normalizes the string `s`. That means to convert it to lower case and + ## remove any '_'. This is needed for Nimrod identifiers for example. + +proc find*(s, sub: string, start: int = 0): int {.noSideEffect.} + ## Searches for `sub` in `s` starting at position `start`. Searching is + ## case-sensitive. If `sub` is not in `s`, -1 is returned. + +proc find*(s: string, sub: char, start: int = 0): int {.noSideEffect.} + ## Searches for `sub` in `s` starting at position `start`. Searching is + ## case-sensitive. If `sub` is not in `s`, -1 is returned. + +proc find*(s: string, chars: set[char], start: int = 0): int {.noSideEffect.} + ## Searches for `chars` in `s` starting at position `start`. If `s` contains + ## none of the characters in `chars`, -1 is returned. + +proc replaceStr*(s, sub, by: string): string {.noSideEffect, deprecated.} + ## Replaces `sub` in `s` by the string `by`. + ## **Deprecated since version 0.8.0**: Use `replace` instead. + +proc replaceStr*(s: string, sub, by: char): string {.noSideEffect, deprecated.} + ## optimized version for characters. + ## **Deprecated since version 0.8.0**: Use `replace` instead. + +proc deleteStr*(s: var string, first, last: int) {.deprecated.} + ## Deletes in `s` the characters at position `first`..`last`. This modifies + ## `s` itself, it does not return a copy. + ## **Deprecated since version 0.8.0**: Use `delete` instead. + +proc toOctal*(c: char): string + ## Converts a character `c` to its octal representation. The resulting + ## string may not have a leading zero. Its length is always exactly 3. + +iterator split*(s: string, seps: set[char] = Whitespace): string = + ## Splits the string `s` into substrings. + ## + ## Substrings are separated by a substring containing only `seps`. + ## Examples: + ## + ## .. code-block:: nimrod + ## for word in split(" this is an example "): + ## writeln(stdout, word) + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## "this" + ## "is" + ## "an" + ## "example" + ## + ## for word in split(";;this;is;an;;example;;;", {';'}): + ## writeln(stdout, word) + ## + ## produces the same output. + var last = 0 + assert(not ('\0' in seps)) + while last < len(s): + while s[last] in seps: inc(last) + var first = last + while last < len(s) and s[last] not_in seps: inc(last) # BUGFIX! + if first <= last-1: + yield copy(s, first, last-1) + +iterator split*(s: string, sep: char): string = + ## Splits the string `s` into substrings. + ## + ## Substrings are separated by the character `sep`. + ## Example: + ## + ## .. code-block:: nimrod + ## for word in split(";;this;is;an;;example;;;", ';'): + ## writeln(stdout, word) + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## "" + ## "" + ## "this" + ## "is" + ## "an" + ## "" + ## "example" + ## "" + ## "" + ## "" + ## + var last = 0 + assert('\0' != sep) + if len(s) > 0: + # `<=` is correct here for the edge cases! + while last <= len(s): + var first = last + while last < len(s) and s[last] != sep: inc(last) + yield copy(s, first, last-1) + inc(last) + +iterator splitLines*(s: string): string = + ## Splits the string `s` into its containing lines. Every newline + ## combination (CR, LF, CR-LF) is supported. The result strings contain + ## no trailing ``\n``. + ## + ## Example: + ## + ## .. code-block:: nimrod + ## for line in lines("\nthis\nis\nan\n\nexample\n"): + ## writeln(stdout, line) + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## "" + ## "this" + ## "is" + ## "an" + ## "" + ## "example" + ## "" + var first = 0 + var last = 0 + while true: + while s[last] notin {'\0', '\c', '\l'}: inc(last) + yield copy(s, first, last-1) + # skip newlines: + if s[last] == '\l': inc(last) + elif s[last] == '\c': + inc(last) + if s[last] == '\l': inc(last) + else: break # was '\0' + first = last + +proc splitLinesSeq*(s: string): seq[string] {.noSideEffect, deprecated.} = + ## The same as `splitLines`, but is a proc that returns a sequence + ## of substrings. + ## **Deprecated since version 0.8.0**: Use `splitLines` instead. + accumulateResult(splitLines(s)) + +proc splitSeq*(s: string, seps: set[char] = Whitespace): seq[string] {. + noSideEffect, deprecated.} = + ## The same as `split`, but is a proc that returns a sequence of substrings. + ## **Deprecated since version 0.8.0**: Use `split` instead. + accumulateResult(split(s, seps)) + +proc splitSeq*(s: string, sep: char): seq[string] {.noSideEffect, + deprecated.} = + ## The same as `split`, but is a proc that returns a sequence of substrings. + ## **Deprecated since version 0.8.0**: Use `split` instead. + accumulateResult(split(s, sep)) + +proc splitLines*(s: string): seq[string] {.noSideEffect.} = + ## The same as the `splitLines` iterator, but is a proc that returns a + ## sequence of substrings. + accumulateResult(splitLines(s)) + +proc split*(s: string, seps: set[char] = Whitespace): seq[string] {. + noSideEffect.} = + ## The same as the `split` iterator, but is a proc that returns a + ## sequence of substrings. + accumulateResult(split(s, seps)) + +proc split*(s: string, sep: char): seq[string] {.noSideEffect.} = + ## The same as the `split` iterator, but is a proc that returns a sequence + ## of substrings. + accumulateResult(split(s, sep)) + +proc cmpIgnoreCase*(a, b: string): int {.noSideEffect.} + ## Compares two strings in a case insensitive manner. Returns: + ## + ## | 0 iff a == b + ## | < 0 iff a < b + ## | > 0 iff a > b + +proc cmpIgnoreStyle*(a, b: string): int {.noSideEffect.} + ## Compares two strings normalized (i.e. case and + ## underscores do not matter). Returns: + ## + ## | 0 iff a == b + ## | < 0 iff a < b + ## | > 0 iff a > b + +proc contains*(s: string, c: char): bool {.noSideEffect.} + ## Same as ``find(s, c) >= 0``. + +proc contains*(s, sub: string): bool {.noSideEffect.} + ## Same as ``find(s, sub) >= 0``. + +proc contains*(s: string, chars: set[char]): bool {.noSideEffect.} + ## Same as ``find(s, chars) >= 0``. + +proc toHex*(x: BiggestInt, len: int): string {.noSideEffect.} + ## Converts `x` to its hexadecimal representation. The resulting string + ## will be exactly `len` characters long. No prefix like ``0x`` + ## is generated. `x` is treated as an unsigned value. + +proc intToStr*(x: int, minchars: int = 1): string + ## Converts `x` to its decimal representation. The resulting string + ## will be minimally `minchars` characters long. This is achieved by + ## adding leading zeros. + +proc ParseInt*(s: string): int {.noSideEffect, procvar.} + ## Parses a decimal integer value contained in `s`. If `s` is not + ## a valid integer, `EInvalidValue` is raised. + +proc ParseBiggestInt*(s: string): biggestInt {.noSideEffect, procvar.} + ## Parses a decimal integer value contained in `s`. If `s` is not + ## a valid integer, `EInvalidValue` is raised. + +proc ParseFloat*(s: string): float {.noSideEffect, procvar.} + ## Parses a decimal floating point value contained in `s`. If `s` is not + ## a valid floating point number, `EInvalidValue` is raised. ``NAN``, + ## ``INF``, ``-INF`` are also supported (case insensitive comparison). + +# the stringify and format operators: +proc toString*[Ty](x: Ty): string {.deprecated.} + ## This generic proc is the same as the stringify operator `$`. + ## + ## **Deprecated since version 0.8.2:** Use `$` instead. + +proc repeatChar*(count: int, c: Char = ' '): string + ## Returns a string of length `count` consisting only of + ## the character `c`. + +proc startsWith*(s, prefix: string): bool {.noSideEffect.} + ## Returns true iff ``s`` starts with ``prefix``. + ## If ``prefix == ""`` true is returned. + +proc endsWith*(s, suffix: string): bool {.noSideEffect.} + ## Returns true iff ``s`` ends with ``suffix``. + ## If ``suffix == ""`` true is returned. + +proc addSep*(dest: var string, sep = ", ", startLen = 0) {.noSideEffect, + inline.} = + ## A shorthand for: + ## + ## .. code-block:: nimrod + ## if dest.len > startLen: add(dest, sep) + ## + ## This is often useful for generating some code where the items need to + ## be *separated* by `sep`. `sep` is only added if `dest` is longer than + ## `startLen`. The following example creates a string describing + ## an array of integers: + ## + ## .. code-block:: nimrod + ## var arr = "[" + ## for x in items([2, 3, 5, 7, 11]): + ## addSep(arr, startLen=len("[")) + ## add(arr, $x) + ## add(arr, "]") + if dest.len > startLen: add(dest, sep) + +proc allCharsInSet*(s: string, theSet: TCharSet): bool = + ## returns true iff each character of `s` is in the set `theSet`. + for c in items(s): + if c notin theSet: return false + return true + +proc quoteIfContainsWhite*(s: string): string = + ## returns ``'"' & s & '"'`` if `s` contains a space and does not + ## start with a quote, else returns `s` + if find(s, {' ', '\t'}) >= 0 and s[0] != '"': + result = '"' & s & '"' + else: + result = s + +proc startsWith(s, prefix: string): bool = + var i = 0 + while true: + if prefix[i] == '\0': return true + if s[i] != prefix[i]: return false + inc(i) + +proc endsWith(s, suffix: string): bool = + var + i = 0 + j = len(s) - len(suffix) + while true: + if suffix[i] == '\0': return true + if s[i+j] != suffix[i]: return false + inc(i) + +when false: + proc abbrev(s: string, possibilities: openarray[string]): int = + ## returns the index of the first item in `possibilities` if not + ## ambiguous; -1 if no item has been found; -2 if multiple items + ## match. + result = -1 # none found + for i in 0..possibilities.len-1: + if possibilities[i].startsWith(s): + if result >= 0: return -2 # ambiguous + result = i + +proc repeatChar(count: int, c: Char = ' '): string = + result = newString(count) + for i in 0..count-1: + result[i] = c + +proc intToStr(x: int, minchars: int = 1): string = + result = $abs(x) + for i in 1 .. minchars - len(result): + result = '0' & result + if x < 0: + result = '-' & result + +proc toString[Ty](x: Ty): string = return $x + +proc toOctal(c: char): string = + result = newString(3) + var val = ord(c) + for i in countdown(2, 0): + result[i] = Chr(val mod 8 + ord('0')) + val = val div 8 + +proc `%`(formatstr: string, a: string): string = + return formatstr % [a] + +proc findNormalized(x: string, inArray: openarray[string]): int = + var i = 0 + while i < high(inArray): + if cmpIgnoreStyle(x, inArray[i]) == 0: return i + inc(i, 2) # incrementing by 1 would probably result in a + # security whole ... + return -1 + +proc addf(s: var string, formatstr: string, a: openarray[string]) = + const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '\128'..'\255', '_'} + var i = 0 + var num = 0 + while i < len(formatstr): + if formatstr[i] == '$': + case formatstr[i+1] # again we use the fact that strings + # are zero-terminated here + of '#': + add s, a[num] + inc i, 2 + inc num + of '$': + add s, '$' + inc(i, 2) + of '1'..'9': + var j = 0 + inc(i) # skip $ + while formatstr[i] in {'0'..'9'}: + j = j * 10 + ord(formatstr[i]) - ord('0') + inc(i) + num = j + add s, a[j - 1] + of '{': + var j = i+1 + while formatstr[j] notin {'\0', '}'}: inc(j) + var x = findNormalized(copy(formatstr, i+2, j-1), a) + if x >= 0 and x < high(a): add s, a[x+1] + else: raise newException(EInvalidValue, "invalid format string") + i = j+1 + of 'a'..'z', 'A'..'Z', '\128'..'\255', '_': + var j = i+1 + while formatstr[j] in PatternChars: inc(j) + var x = findNormalized(copy(formatstr, i+1, j-1), a) + if x >= 0 and x < high(a): add s, a[x+1] + else: raise newException(EInvalidValue, "invalid format string") + i = j + else: raise newException(EInvalidValue, "invalid format string") + else: + add s, formatstr[i] + inc(i) + +proc `%`(formatstr: string, a: openarray[string]): string = + result = "" + addf(result, formatstr, a) + +proc cmpIgnoreCase(a, b: string): int = + # makes usage of the fact that strings are zero-terminated + for i in 0..len(a)-1: + var aa = toLower(a[i]) + var bb = toLower(b[i]) + result = ord(aa) - ord(bb) + if result != 0: break + +{.push checks: off, line_trace: off .} # this is a hot-spot in the compiler! + # thus we compile without checks here + +proc cmpIgnoreStyle(a, b: string): int = + var i = 0 + var j = 0 + while True: + while a[i] == '_': inc(i) + while b[j] == '_': inc(j) # BUGFIX: typo + var aa = toLower(a[i]) + var bb = toLower(b[j]) + result = ord(aa) - ord(bb) + if result != 0 or aa == '\0': break + inc(i) + inc(j) + +{.pop.} + +# --------------------------------------------------------------------------- + +proc join*(a: openArray[string], sep: string): string = + ## concatenates all strings in `a` separating them with `sep`. + if len(a) > 0: + var L = sep.len * (a.len-1) + for i in 0..high(a): inc(L, a[i].len) + result = newString(L) + setLen(result, 0) + add(result, a[0]) + for i in 1..high(a): + add(result, sep) + add(result, a[i]) + else: + result = "" + +proc join*(a: openArray[string]): string = + ## concatenates all strings in `a`. + if len(a) > 0: + var L = 0 + for i in 0..high(a): inc(L, a[i].len) + result = newString(L) + setLen(result, 0) + for i in 0..high(a): add(result, a[i]) + else: + result = "" + +proc strip(s: string, leading = true, trailing = true): string = + const + chars: set[Char] = Whitespace + var + first = 0 + last = len(s)-1 + if leading: + while s[first] in chars: inc(first) + if trailing: + while last >= 0 and s[last] in chars: dec(last) + result = copy(s, first, last) + +proc toLower(c: Char): Char = + if c in {'A'..'Z'}: + result = chr(ord(c) + (ord('a') - ord('A'))) + else: + result = c + +proc toLower(s: string): string = + result = newString(len(s)) + for i in 0..len(s) - 1: + result[i] = toLower(s[i]) + +proc toUpper(c: Char): Char = + if c in {'a'..'z'}: + result = Chr(Ord(c) - (Ord('a') - Ord('A'))) + else: + result = c + +proc toUpper(s: string): string = + result = newString(len(s)) + for i in 0..len(s) - 1: + result[i] = toUpper(s[i]) + +proc capitalize(s: string): string = + result = toUpper(s[0]) & copy(s, 1) + +proc normalize(s: string): string = + result = "" + for i in 0..len(s) - 1: + if s[i] in {'A'..'Z'}: + add result, Chr(Ord(s[i]) + (Ord('a') - Ord('A'))) + elif s[i] != '_': + add result, s[i] + +type + TSkipTable = array[Char, int] + +proc preprocessSub(sub: string, a: var TSkipTable) = + var m = len(sub) + for i in 0..0xff: a[chr(i)] = m+1 + for i in 0..m-1: a[sub[i]] = m-i + +proc findAux(s, sub: string, start: int, a: TSkipTable): int = + # fast "quick search" algorithm: + var + m = len(sub) + n = len(s) + # search: + var j = start + while j <= n - m: + block match: + for k in 0..m-1: + if sub[k] != s[k+j]: break match + return j + inc(j, a[s[j+m]]) + return -1 + +proc find(s, sub: string, start: int = 0): int = + var a: TSkipTable + preprocessSub(sub, a) + result = findAux(s, sub, start, a) + +proc find(s: string, sub: char, start: int = 0): int = + for i in start..len(s)-1: + if sub == s[i]: return i + return -1 + +proc find(s: string, chars: set[char], start: int = 0): int = + for i in start..s.len-1: + if s[i] in chars: return i + return -1 + +proc contains(s: string, chars: set[char]): bool = + return find(s, chars) >= 0 + +proc contains(s: string, c: char): bool = + return find(s, c) >= 0 + +proc contains(s, sub: string): bool = + return find(s, sub) >= 0 + +proc replace*(s, sub, by: string): string = + ## Replaces `sub` in `s` by the string `by`. + var a: TSkipTable + result = "" + preprocessSub(sub, a) + var i = 0 + while true: + var j = findAux(s, sub, i, a) + if j < 0: break + add result, copy(s, i, j - 1) + add result, by + i = j + len(sub) + # copy the rest: + add result, copy(s, i) + +proc replace*(s: string, sub, by: char): string = + ## optimized version for characters. + result = newString(s.len) + var i = 0 + while i < s.len: + if s[i] == sub: result[i] = by + else: result[i] = s[i] + inc(i) + +proc delete*(s: var string, first, last: int) = + ## Deletes in `s` the characters at position `first`..`last`. This modifies + ## `s` itself, it does not return a copy. + var + i = first + # example: "abc___uvwxyz\0" (___ is to be deleted) + # --> first == 3, last == 5 + # s[first..] = s[last+1..] + while last+i+1 < len(s): + s[i] = s[last+i+1] + inc(i) + setlen(s, len(s)-(last-first+1)) + +proc replaceStr(s, sub, by: string): string = return replace(s, sub, by) +proc replaceStr(s: string, sub, by: char): string = return replace(s, sub, by) +proc deleteStr*(s: var string, first, last: int) = delete(s, first, last) + +# parsing numbers: + +proc toHex(x: BiggestInt, len: int): string = + const + HexChars = "0123456789ABCDEF" + var + shift: BiggestInt + result = newString(len) + for j in countdown(len-1, 0): + result[j] = HexChars[toU32(x shr shift) and 0xF'i32] + shift = shift + 4 + +{.push overflowChecks: on.} +# this must be compiled with overflow checking turned on: +proc rawParseInt(s: string, index: var int): BiggestInt = + # index contains the start position at proc entry; end position will be + # an index before the proc returns; index = -1 on error (no number at all) + # the problem here is that integers have an asymmetrical range: there is + # one more valid negative than prositive integer. Thus we perform the + # computation as a negative number and then change the sign at the end. + var + i = index # a local i is more efficient than accessing a var parameter + sign: BiggestInt = -1 + if s[i] == '+': + inc(i) + elif s[i] == '-': + inc(i) + sign = 1 + if s[i] in {'0'..'9'}: + result = 0 + while s[i] in {'0'..'9'}: + result = result * 10 - (ord(s[i]) - ord('0')) + inc(i) + while s[i] == '_': + inc(i) # underscores are allowed and ignored + result = result * sign + if s[i] == '\0': + index = i # store index back + else: + index = -1 # BUGFIX: error! + else: + index = -1 + +{.pop.} # overflowChecks + +proc parseInt(s: string): int = + var + index = 0 + res = rawParseInt(s, index) + if index == -1: + raise newException(EInvalidValue, "invalid integer: " & s) + elif (sizeof(int) <= 4) and + ((res < low(int)) or (res > high(int))): + raise newException(EOverflow, "overflow") + else: + result = int(res) # convert to smaller integer type + +proc ParseBiggestInt(s: string): biggestInt = + var index = 0 + result = rawParseInt(s, index) + if index == -1: + raise newException(EInvalidValue, "invalid integer: " & s) + +proc ParseFloat(s: string): float = + var + esign = 1.0 + sign = 1.0 + i = 0 + exponent: int + flags: int + result = 0.0 + if s[i] == '+': inc(i) + elif s[i] == '-': + sign = -1.0 + inc(i) + if s[i] == 'N' or s[i] == 'n': + if s[i+1] == 'A' or s[i+1] == 'a': + if s[i+2] == 'N' or s[i+2] == 'n': + if s[i+3] == '\0': return NaN + raise newException(EInvalidValue, "invalid float: " & s) + if s[i] == 'I' or s[i] == 'i': + if s[i+1] == 'N' or s[i+1] == 'n': + if s[i+2] == 'F' or s[i+2] == 'f': + if s[i+3] == '\0': return Inf*sign + raise newException(EInvalidValue, "invalid float: " & s) + while s[i] in {'0'..'9'}: + # Read integer part + flags = flags or 1 + result = result * 10.0 + toFloat(ord(s[i]) - ord('0')) + inc(i) + while s[i] == '_': inc(i) + # Decimal? + if s[i] == '.': + var hd = 1.0 + inc(i) + while s[i] in {'0'..'9'}: + # Read fractional part + flags = flags or 2 + result = result * 10.0 + toFloat(ord(s[i]) - ord('0')) + hd = hd * 10.0 + inc(i) + while s[i] == '_': inc(i) + result = result / hd # this complicated way preserves precision + # Again, read integer and fractional part + if flags == 0: + raise newException(EInvalidValue, "invalid float: " & s) + # Exponent? + if s[i] in {'e', 'E'}: + inc(i) + if s[i] == '+': + inc(i) + elif s[i] == '-': + esign = -1.0 + inc(i) + if s[i] notin {'0'..'9'}: + raise newException(EInvalidValue, "invalid float: " & s) + while s[i] in {'0'..'9'}: + exponent = exponent * 10 + ord(s[i]) - ord('0') + inc(i) + while s[i] == '_': inc(i) + # Calculate Exponent + var hd = 1.0 + for j in 1..exponent: + hd = hd * 10.0 + if esign > 0.0: result = result * hd + else: result = result / hd + # Not all characters are read? + if s[i] != '\0': raise newException(EInvalidValue, "invalid float: " & s) + # evaluate sign + result = result * sign + +proc toOct*(x: BiggestInt, len: int): string = + ## converts `x` into its octal representation. The resulting string is + ## always `len` characters long. No leading ``0o`` prefix is generated. + var + mask: BiggestInt = 7 + shift: BiggestInt = 0 + assert(len > 0) + result = newString(len) + for j in countdown(len-1, 0): + result[j] = chr(int((x and mask) shr shift) + ord('0')) + shift = shift + 3 + mask = mask shl 3 + +proc toBin*(x: BiggestInt, len: int): string = + ## converts `x` into its binary representation. The resulting string is + ## always `len` characters long. No leading ``0b`` prefix is generated. + var + mask: BiggestInt = 1 + shift: BiggestInt = 0 + assert(len > 0) + result = newString(len) + for j in countdown(len-1, 0): + result[j] = chr(int((x and mask) shr shift) + ord('0')) + shift = shift + 1 + mask = mask shl 1 + +proc escape*(s: string, prefix = "\"", suffix = "\""): string = + ## Escapes a string `s`. This does these operations (at the same time): + ## * replaces any ``\`` by ``\\`` + ## * replaces any ``'`` by ``\'`` + ## * replaces any ``"`` by ``\"`` + ## * replaces any other character in the set ``{'\0'..'\31', '\128'..'\255'}`` + ## by ``\xHH`` where ``HH`` is its hexadecimal value. + ## The procedure has been designed so that its output is usable for many + ## different common syntaxes. The resulting string is prefixed with + ## ``prefix`` and suffixed with ``suffix``. Both may be empty strings. + result = prefix + for c in items(s): + case c + of '\0'..'\31', '\128'..'\255': + add(result, '\\') + add(result, toHex(ord(c), 2)) + of '\\': add(result, "\\\\") + of '\'': add(result, "\\'") + of '\"': add(result, "\\\"") + else: add(result, c) + add(result, suffix) + +proc validEmailAddress*(s: string): bool = + ## returns true if `s` seems to be a valid e-mail address. + ## The checking also uses a domain list. + const + chars = Letters + Digits + {'!','#','$','%','&', + '\'','*','+','/','=','?','^','_','`','{','}','|','~','-','.'} + var i = 0 + if s[i] notin chars or s[i] == '.': return false + while s[i] in chars: + if s[i] == '.' and s[i+1] == '.': return false + inc(i) + if s[i] != '@': return false + var j = len(s)-1 + if s[j] notin letters: return false + while j >= i and s[j] in letters: dec(j) + inc(i) # skip '@' + while s[i] in {'0'..'9', 'a'..'z', '-', '.'}: inc(i) + if s[i] != '\0': return false + + var x = copy(s, j+1) + if len(x) == 2 and x[0] in Letters and x[1] in Letters: return true + case toLower(x) + of "com", "org", "net", "gov", "mil", "biz", "info", "mobi", "name", + "aero", "jobs", "museum": return true + return false + +proc validIdentifier*(s: string): bool = + ## returns true if `s` is a valid identifier. A valid identifier starts + ## with a character of the set `IdentStartChars` and is followed by any + ## number of characters of the set `IdentChars`. + if s[0] in IdentStartChars: + for i in 1..s.len-1: + if s[i] notin IdentChars: return false + return true + +proc editDistance*(a, b: string): int = + ## returns the edit distance between `a` and `b`. This uses the Levenshtein + ## distance algorithm with only a linear memory overhead. This implementation + ## is highly optimized! + var len1 = a.len + var len2 = b.len + if len1 > len2: + # make `b` the longer string + return editDistance(b, a) + + # strip common prefix: + var s = 0 + while a[s] == b[s] and a[s] != '\0': + inc(s) + dec(len1) + dec(len2) + # strip common suffix: + while len1 > 0 and len2 > 0 and a[s+len1-1] == b[s+len2-1]: + dec(len1) + dec(len2) + # trivial cases: + if len1 == 0: return len2 + if len2 == 0: return len1 + + # another special case: + if len1 == 1: + for j in s..len2-1: + if a[s] == b[j]: return len2 - 1 + return len2 + + inc(len1) + inc(len2) + var half = len1 shr 1 + # initalize first row: + #var row = cast[ptr array[0..high(int) div 8, int]](alloc(len2 * sizeof(int))) + var row: seq[int] + newSeq(row, len2) + var e = s + len2 - 1 # end marker + for i in 1..len2 - half - 1: row[i] = i + row[0] = len1 - half - 1 + for i in 1 .. len1 - 1: + var char1 = a[i + s - 1] + var char2p: int + var D, x: int + var p: int + if i >= len1 - half: + # skip the upper triangle: + var offset = i - len1 + half + char2p = offset + p = offset + var c3 = row[p] + ord(char1 != b[s + char2p]) + inc(p) + inc(char2p) + x = row[p] + 1 + D = x + if x > c3: x = c3 + row[p] = x + inc(p) + else: + p = 1 + char2p = 0 + D = i + x = i + if i <= half + 1: + # skip the lower triangle: + e = len2 + i - half - 2 + # main: + while p <= e: + dec(D) + var c3 = D + ord(char1 != b[char2p + s]) + inc(char2p) + inc(x) + if x > c3: x = c3 + D = row[p] + 1 + if x > D: x = D + row[p] = x + inc(p) + # lower triangle sentinel: + if i <= half: + dec(D) + var c3 = D + ord(char1 != b[char2p + s]) + inc(x) + if x > c3: x = c3 + row[p] = x + result = row[e] + #dealloc(row) + +{.pop.} diff --git a/nimlib/pure/terminal.nim b/nimlib/pure/terminal.nim new file mode 100755 index 000000000..42bd80cb4 --- /dev/null +++ b/nimlib/pure/terminal.nim @@ -0,0 +1,310 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module contains a few procedures to control the *terminal* +## (also called *console*). On UNIX, the implementation simply uses ANSI escape +## sequences and does not depend on any other module, on Windows it uses the +## Windows API. +## Changing the style is permanent even after program termination! Use the +## code ``system.addQuitProc(resetAttributes)`` to restore the defaults. + +when defined(windows): + import windows, os + + var + conHandle: THandle + # = createFile("CONOUT$", GENERIC_WRITE, 0, nil, OPEN_ALWAYS, 0, 0) + + block: + var hTemp = GetStdHandle(STD_OUTPUT_HANDLE()) + if DuplicateHandle(GetCurrentProcess(), hTemp, GetCurrentProcess(), + addr(conHandle), 0, 1, DUPLICATE_SAME_ACCESS) == 0: + OSError() + + proc getCursorPos(): tuple [x,y: int] = + var c: TCONSOLE_SCREEN_BUFFER_INFO + if GetConsoleScreenBufferInfo(conHandle, addr(c)) == 0: OSError() + return (int(c.dwCursorPosition.x), int(c.dwCursorPosition.y)) + + proc getAttributes(): int16 = + var c: TCONSOLE_SCREEN_BUFFER_INFO + # workaround Windows bugs: try several times + if GetConsoleScreenBufferInfo(conHandle, addr(c)) != 0: + return c.wAttributes + else: + OSError() + return 0x70'i16 # ERROR: return white background, black text + + var + oldAttr = getAttributes() + +proc setCursorPos*(x, y: int) = + ## sets the terminal's cursor to the (x,y) position. (0,0) is the + ## upper left of the screen. + when defined(windows): + var c: TCoord + c.x = int16(x) + c.y = int16(y) + if SetConsoleCursorPosition(conHandle, c) == 0: OSError() + else: + stdout.write("\e[" & $y & ';' & $x & 'f') + +proc setCursorXPos*(x: int) = + ## sets the terminal's cursor to the x position. The y position is + ## not changed. + when defined(windows): + var scrbuf: TCONSOLE_SCREEN_BUFFER_INFO + var hStdout = conHandle + if GetConsoleScreenBufferInfo(hStdout, addr(scrbuf)) == 0: OSError() + var origin = scrbuf.dwCursorPosition + origin.x = int16(x) + if SetConsoleCursorPosition(conHandle, origin) == 0: OSError() + else: + stdout.write("\e[" & $x & 'G') + +when defined(windows): + proc setCursorYPos*(y: int) = + ## sets the terminal's cursor to the y position. The x position is + ## not changed. **Warning**: This is not supported on UNIX! + when defined(windows): + var scrbuf: TCONSOLE_SCREEN_BUFFER_INFO + var hStdout = conHandle + if GetConsoleScreenBufferInfo(hStdout, addr(scrbuf)) == 0: OSError() + var origin = scrbuf.dwCursorPosition + origin.y = int16(y) + if SetConsoleCursorPosition(conHandle, origin) == 0: OSError() + else: + nil + +proc CursorUp*(count=1) = + ## Moves the cursor up by `count` rows. + when defined(windows): + var p = getCursorPos() + dec(p.y, count) + setCursorPos(p.x, p.y) + else: + stdout.write("\e[" & $count & 'A') + +proc CursorDown*(count=1) = + ## Moves the cursor down by `count` rows. + when defined(windows): + var p = getCursorPos() + inc(p.y, count) + setCursorPos(p.x, p.y) + else: + stdout.write("\e[" & $count & 'B') + +proc CursorForward*(count=1) = + ## Moves the cursor forward by `count` columns. + when defined(windows): + var p = getCursorPos() + inc(p.x, count) + setCursorPos(p.x, p.y) + else: + stdout.write("\e[" & $count & 'C') + +proc CursorBackward*(count=1) = + ## Moves the cursor backward by `count` columns. + when defined(windows): + var p = getCursorPos() + dec(p.x, count) + setCursorPos(p.x, p.y) + else: + stdout.write("\e[" & $count & 'D') + +when true: + nil +else: + proc EraseLineEnd* = + ## Erases from the current cursor position to the end of the current line. + when defined(windows): + nil + else: + stdout.write("\e[K") + + proc EraseLineStart* = + ## Erases from the current cursor position to the start of the current line. + when defined(windows): + nil + else: + stdout.write("\e[1K") + + proc EraseDown* = + ## Erases the screen from the current line down to the bottom of the screen. + when defined(windows): + nil + else: + stdout.write("\e[J") + + proc EraseUp* = + ## Erases the screen from the current line up to the top of the screen. + when defined(windows): + nil + else: + stdout.write("\e[1J") + +proc EraseLine* = + ## Erases the entire current line. + when defined(windows): + var scrbuf: TCONSOLE_SCREEN_BUFFER_INFO + var numwrote: DWORD + var hStdout = conHandle + if GetConsoleScreenBufferInfo(hStdout, addr(scrbuf)) == 0: OSError() + var origin = scrbuf.dwCursorPosition + origin.x = 0'i16 + if SetConsoleCursorPosition(conHandle, origin) == 0: OSError() + var ht = scrbuf.dwSize.Y - origin.Y + var wt = scrbuf.dwSize.X - origin.X + if FillConsoleOutputCharacter(hStdout,' ', ht*wt, + origin, addr(numwrote)) == 0: + OSError() + if FillConsoleOutputAttribute(hStdout, scrbuf.wAttributes, ht * wt, + scrbuf.dwCursorPosition, addr(numwrote)) == 0: + OSError() + else: + stdout.write("\e[2K") + setCursorXPos(0) + +proc EraseScreen* = + ## Erases the screen with the background colour and moves the cursor to home. + when defined(windows): + var scrbuf: TCONSOLE_SCREEN_BUFFER_INFO + var numwrote: DWORD + var origin: TCoord # is inititalized to 0, 0 + var hStdout = conHandle + if GetConsoleScreenBufferInfo(hStdout, addr(scrbuf)) == 0: OSError() + if FillConsoleOutputCharacter(hStdout, ' ', scrbuf.dwSize.X*scrbuf.dwSize.Y, + origin, addr(numwrote)) == 0: + OSError() + if FillConsoleOutputAttribute(hStdout, scrbuf.wAttributes, + scrbuf.dwSize.X * scrbuf.dwSize.Y, + origin, addr(numwrote)) == 0: + OSError() + setCursorXPos(0) + else: + stdout.write("\e[2J") + +proc ResetAttributes* {.noconv.} = + ## resets all attributes; it is advisable to register this as a quit proc + ## with ``system.addQuitProc(resetAttributes)``. + when defined(windows): + discard SetConsoleTextAttribute(conHandle, oldAttr) + else: + stdout.write("\e[0m") + +type + TStyle* = enum ## different styles for text output + styleBright = 1, ## bright text + styleDim, ## dim text + styleUnknown, ## unknown + styleUnderscore = 4, ## underscored text + styleBlink, ## blinking/bold text + styleReverse, ## unknown + styleHidden ## hidden text + +when not defined(windows): + var + gFG = 0 + gBG = 0 + +proc WriteStyled*(txt: string, style: set[TStyle] = {styleBright}) = + ## writes the text `txt` in a given `style`. + when defined(windows): + var a = 0'i16 + if styleBright in style: a = a or int16(FOREGROUND_INTENSITY) + if styleBlink in style: a = a or int16(BACKGROUND_INTENSITY) + if styleReverse in style: a = a or 0x4000'i16 # COMMON_LVB_REVERSE_VIDEO + if styleUnderscore in style: a = a or 0x8000'i16 # COMMON_LVB_UNDERSCORE + var old = getAttributes() + discard SetConsoleTextAttribute(conHandle, old or a) + stdout.write(txt) + discard SetConsoleTextAttribute(conHandle, old) + else: + for s in items(style): + stdout.write("\e[" & $ord(s) & 'm') + stdout.write(txt) + resetAttributes() + if gFG != 0: + stdout.write("\e[" & $ord(gFG) & 'm') + if gBG != 0: + stdout.write("\e[" & $ord(gBG) & 'm') + +type + TForegroundColor* = enum ## terminal's foreground colors + fgBlack = 30, ## black + fgRed, ## red + fgGreen, ## green + fgYellow, ## yellow + fgBlue, ## blue + fgMagenta, ## magenta + fgCyan, ## cyan + fgWhite ## white + + TBackgroundColor* = enum ## terminal's background colors + bgBlack = 40, ## black + bgRed, ## red + bgGreen, ## green + bgYellow, ## yellow + bgBlue, ## blue + bgMagenta, ## magenta + bgCyan, ## cyan + bgWhite ## white + +proc setForegroundColor*(fg: TForegroundColor, bright=false) = + ## sets the terminal's foreground color + when defined(windows): + var old = getAttributes() and not 0x0007 + if bright: + old = old or FOREGROUND_INTENSITY + const lookup: array [TForegroundColor, int] = [ + 0, + (FOREGROUND_RED), + (FOREGROUND_GREEN), + (FOREGROUND_RED or FOREGROUND_GREEN), + (FOREGROUND_BLUE), + (FOREGROUND_RED or FOREGROUND_BLUE), + (FOREGROUND_BLUE or FOREGROUND_GREEN), + (FOREGROUND_BLUE or FOREGROUND_GREEN or FOREGROUND_RED)] + discard SetConsoleTextAttribute(conHandle, toU16(old or lookup[fg])) + else: + gFG = ord(fg) + if bright: inc(gFG, 60) + stdout.write("\e[" & $gFG & 'm') + +proc setBackgroundColor*(bg: TBackgroundColor, bright=false) = + ## sets the terminal's background color + when defined(windows): + var old = getAttributes() and not 0x0070 + if bright: + old = old or BACKGROUND_INTENSITY + const lookup: array [TBackgroundColor, int] = [ + 0, + (BACKGROUND_RED), + (BACKGROUND_GREEN), + (BACKGROUND_RED or BACKGROUND_GREEN), + (BACKGROUND_BLUE), + (BACKGROUND_RED or BACKGROUND_BLUE), + (BACKGROUND_BLUE or BACKGROUND_GREEN), + (BACKGROUND_BLUE or BACKGROUND_GREEN or BACKGROUND_RED)] + discard SetConsoleTextAttribute(conHandle, toU16(old or lookup[bg])) + else: + gBG = ord(bg) + if bright: inc(gBG, 60) + stdout.write("\e[" & $gBG & 'm') + +when isMainModule: + system.addQuitProc(resetAttributes) + write(stdout, "never mind") + eraseLine() + #setCursorPos(2, 2) + writeStyled("styled text ", {styleBright, styleBlink, styleUnderscore}) + setBackGroundColor(bgCyan, true) + setForeGroundColor(fgBlue) + writeln(stdout, "ordinary text") + diff --git a/nimlib/pure/times.nim b/nimlib/pure/times.nim new file mode 100755 index 000000000..8c21b6027 --- /dev/null +++ b/nimlib/pure/times.nim @@ -0,0 +1,307 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + + +## This module contains routines and types for dealing with time. +## This module is available for the ECMAScript target. + +{.push debugger:off .} # the user does not want to trace a part + # of the standard library! + +import + strutils + +type + TMonth* = enum ## represents a month + mJan, mFeb, mMar, mApr, mMay, mJun, mJul, mAug, mSep, mOct, mNov, mDec + TWeekDay* = enum ## represents a weekday + dMon, dTue, dWed, dThu, dFri, dSat, dSun + +when defined(posix): + type + TTime* = distinct int ## distinct type that represents a time +elif defined(windows): + when defined(vcc): + # newest version of Visual C++ defines time_t to be of 64 bits + type TTime* = distinct int64 + else: + type TTime* = distinct int32 +elif defined(ECMAScript): + type + TTime* {.final.} = object + getDay: proc (): int + getFullYear: proc (): int + getHours: proc (): int + getMilliseconds: proc (): int + getMinutes: proc (): int + getMonth: proc (): int + getSeconds: proc (): int + getTime: proc (): int + getTimezoneOffset: proc (): int + getUTCDate: proc (): int + getUTCFullYear: proc (): int + getUTCHours: proc (): int + getUTCMilliseconds: proc (): int + getUTCMinutes: proc (): int + getUTCMonth: proc (): int + getUTCSeconds: proc (): int + getYear: proc (): int + parse: proc (s: cstring): TTime + setDate: proc (x: int) + setFullYear: proc (x: int) + setHours: proc (x: int) + setMilliseconds: proc (x: int) + setMinutes: proc (x: int) + setMonth: proc (x: int) + setSeconds: proc (x: int) + setTime: proc (x: int) + setUTCDate: proc (x: int) + setUTCFullYear: proc (x: int) + setUTCHours: proc (x: int) + setUTCMilliseconds: proc (x: int) + setUTCMinutes: proc (x: int) + setUTCMonth: proc (x: int) + setUTCSeconds: proc (x: int) + setYear: proc (x: int) + toGMTString: proc (): cstring + toLocaleString: proc (): cstring + UTC: proc (): int + +type + TTimeInfo* = object of TObject ## represents a time in different parts + second*: range[0..61] ## The number of seconds after the minute, + ## normally in the range 0 to 59, but can + ## be up to 61 to allow for leap seconds. + minute*: range[0..59] ## The number of minutes after the hour, + ## in the range 0 to 59. + hour*: range[0..23] ## The number of hours past midnight, + ## in the range 0 to 23. + monthday*: range[1..31] ## The day of the month, in the range 1 to 31. + month*: TMonth ## The current month. + year*: int ## The current year. + weekday*: TWeekDay ## The current day of the week. + yearday*: range[0..365] ## The number of days since January 1, + ## in the range 0 to 365. + ## Always 0 if the target is ECMAScript. + +proc getTime*(): TTime ## gets the current calendar time +proc getLocalTime*(t: TTime): TTimeInfo + ## converts the calendar time `t` to broken-time representation, + ## expressed relative to the user's specified time zone. +proc getGMTime*(t: TTime): TTimeInfo + ## converts the calendar time `t` to broken-down time representation, + ## expressed in Coordinated Universal Time (UTC). + +proc TimeInfoToTime*(timeInfo: TTimeInfo): TTime + ## converts a broken-down time structure, expressed as local time, to + ## calendar time representation. The function ignores the specified + ## contents of the structure members `weekday` and `yearday` and recomputes + ## them from the other information in the broken-down time structure. + +proc `$` *(timeInfo: TTimeInfo): string + ## converts a `TTimeInfo` object to a string representation. +proc `$` *(time: TTime): string + ## converts a calendar time to a string representation. + +proc getDateStr*(): string + ## gets the current date as a string of the format + ## ``YYYY-MM-DD``. +proc getClockStr*(): string + ## gets the current clock time as a string of the format ``HH:MM:SS``. + +proc `-` *(a, b: TTime): int64 + ## computes the difference of two calendar times. Result is in seconds. + +proc `<` * (a, b: TTime): bool = + ## returns true iff ``a < b``, that is iff a happened before b. + result = a - b < 0 + +proc `<=` * (a, b: TTime): bool = + ## returns true iff ``a <= b``. + result = a - b <= 0 + +proc getStartMilsecs*(): int + ## get the miliseconds from the start of the program + + +when not defined(ECMAScript): + # C wrapper: + type + structTM {.importc: "struct tm", final.} = object + second {.importc: "tm_sec".}, + minute {.importc: "tm_min".}, + hour {.importc: "tm_hour".}, + monthday {.importc: "tm_mday".}, + month {.importc: "tm_mon".}, + year {.importc: "tm_year".}, + weekday {.importc: "tm_wday".}, + yearday {.importc: "tm_yday".}, + isdst {.importc: "tm_isdst".}: cint + + PTimeInfo = ptr structTM + PTime = ptr TTime + + TClock {.importc: "clock_t".} = range[low(int)..high(int)] + + proc localtime(timer: PTime): PTimeInfo {. + importc: "localtime", header: "<time.h>".} + proc gmtime(timer: PTime): PTimeInfo {.importc: "gmtime", header: "<time.h>".} + proc timec(timer: PTime): TTime {.importc: "time", header: "<time.h>".} + proc mktime(t: structTM): TTime {.importc: "mktime", header: "<time.h>".} + proc asctime(tblock: structTM): CString {. + importc: "asctime", header: "<time.h>".} + proc ctime(time: PTime): CString {.importc: "ctime", header: "<time.h>".} + # strftime(s: CString, maxsize: int, fmt: CString, t: tm): int {. + # importc: "strftime", header: "<time.h>".} + proc clock(): TClock {.importc: "clock", header: "<time.h>".} + proc difftime(a, b: TTime): float {.importc: "difftime", header: "<time.h>".} + + var + clocksPerSec {.importc: "CLOCKS_PER_SEC", nodecl.}: int + + + # our own procs on top of that: + proc tmToTimeInfo(tm: structTM): TTimeInfo = + const + weekDays: array [0..6, TWeekDay] = [ + dSun, dMon, dTue, dWed, dThu, dFri, dSat] + result.second = int(tm.second) + result.minute = int(tm.minute) + result.hour = int(tm.hour) + result.monthday = int(tm.monthday) + result.month = TMonth(tm.month) + result.year = tm.year + 1900'i32 + result.weekday = weekDays[int(tm.weekDay)] + result.yearday = int(tm.yearday) + + proc timeInfoToTM(t: TTimeInfo): structTM = + const + weekDays: array [TWeekDay, int] = [1, 2, 3, 4, 5, 6, 0] + result.second = t.second + result.minute = t.minute + result.hour = t.hour + result.monthday = t.monthday + result.month = ord(t.month) + result.year = t.year - 1900 + result.weekday = weekDays[t.weekDay] + result.yearday = t.yearday + result.isdst = -1 + + proc `-` (a, b: TTime): int64 = + return toBiggestInt(difftime(a, b)) + + proc getStartMilsecs(): int = return clock() div (clocksPerSec div 1000) + proc getTime(): TTime = return timec(nil) + proc getLocalTime(t: TTime): TTimeInfo = + var a = t + result = tmToTimeInfo(localtime(addr(a))^) + # copying is needed anyway to provide reentrancity; thus + # the convertion is not expensive + + proc getGMTime(t: TTime): TTimeInfo = + var a = t + result = tmToTimeInfo(gmtime(addr(a))^) + # copying is needed anyway to provide reentrancity; thus + # the convertion is not expensive + + proc TimeInfoToTime(timeInfo: TTimeInfo): TTime = + var cTimeInfo = timeInfo # for C++ we have to make a copy, + # because the header of mktime is broken in my version of libc + return mktime(timeInfoToTM(cTimeInfo)) + + proc toStringTillNL(p: cstring): string = + result = "" + var i = 0 + while p[i] != '\0' and p[i] != '\10' and p[i] != '\13': + add(result, p[i]) + inc(i) + return result + + proc `$`(timeInfo: TTimeInfo): string = + # BUGFIX: asctime returns a newline at the end! + var p = asctime(timeInfoToTM(timeInfo)) + result = toStringTillNL(p) + + proc `$`(time: TTime): string = + # BUGFIX: ctime returns a newline at the end! + var a = time + return toStringTillNL(ctime(addr(a))) + + const + epochDiff = 116444736000000000'i64 + rateDiff = 10000000'i64 # 100 nsecs + + proc unixTimeToWinTime*(t: TTime): int64 = + ## converts a UNIX `TTime` (``time_t``) to a Windows file time + result = int64(t) * rateDiff + epochDiff + + proc winTimeToUnixTime*(t: int64): TTime = + ## converts a Windows time to a UNIX `TTime` (``time_t``) + result = TTime((t - epochDiff) div rateDiff) + +else: + proc getTime(): TTime {.importc: "new Date", nodecl.} + + const + weekDays: array [0..6, TWeekDay] = [ + dSun, dMon, dTue, dWed, dThu, dFri, dSat] + + proc getLocalTime(t: TTime): TTimeInfo = + result.second = t.getSeconds() + result.minute = t.getMinutes() + result.hour = t.getHours() + result.monthday = t.getDate() + result.month = TMonth(t.getMonth()) + result.year = t.getFullYear() + result.weekday = weekDays[t.getDay()] + result.yearday = 0 + + proc getGMTime(t: TTime): TTimeInfo = + result.second = t.getUTCSeconds() + result.minute = t.getUTCMinutes() + result.hour = t.getUTCHours() + result.monthday = t.getUTCDate() + result.month = TMonth(t.getUTCMonth()) + result.year = t.getUTCFullYear() + result.weekday = weekDays[t.getDay()] + result.yearday = 0 + + proc TimeInfoToTime*(timeInfo: TTimeInfo): TTime = + result = getTime() + result.setSeconds(timeInfo.second) + result.setMinutes(timeInfo.minute) + result.setHours(timeInfo.hour) + result.setMonth(ord(timeInfo.month)) + result.setFullYear(timeInfo.year) + result.setDate(timeInfo.monthday) + + proc `$`(timeInfo: TTimeInfo): string = return $(TimeInfoToTIme(timeInfo)) + proc `$`(time: TTime): string = $time.toLocaleString() + + proc `-` (a, b: TTime): int64 = + return a.getTime() - b.getTime() + + var + startMilsecs = getTime() + + proc getStartMilsecs(): int = + ## get the miliseconds from the start of the program + return int(getTime() - startMilsecs) + +proc getDateStr(): string = + var ti = getLocalTime(getTime()) + result = $ti.year & '-' & intToStr(ord(ti.month)+1, 2) & + '-' & intToStr(ti.monthDay, 2) + +proc getClockStr(): string = + var ti = getLocalTime(getTime()) + result = intToStr(ti.hour, 2) & ':' & intToStr(ti.minute, 2) & + ':' & intToStr(ti.second, 2) + +{.pop.} diff --git a/nimlib/pure/unicode.nim b/nimlib/pure/unicode.nim new file mode 100755 index 000000000..2a53d7660 --- /dev/null +++ b/nimlib/pure/unicode.nim @@ -0,0 +1,1178 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module provides support to handle the Unicode UTF-8 encoding. + +{.deadCodeElim: on.} + +type + irune = int # underlying type of TRune + TRune* = distinct irune ## type that can hold any Unicode character + TRune16* = distinct int16 ## 16 bit Unicode character + +proc `<=%`*(a, b: TRune): bool {.borrow.} +proc `<%`*(a, b: TRune): bool {.borrow.} +proc `==`*(a, b: TRune): bool {.borrow.} + +template ones(n: expr): expr = ((1 shl n)-1) + +proc runeLen*(s: string): int = + ## returns the number of Unicode characters of the string `s`. + var i = 0 + while i < len(s): + if ord(s[i]) <=% 127: inc(i) + elif ord(s[i]) shr 5 == 0b110: inc(i, 2) + elif ord(s[i]) shr 4 == 0b1110: inc(i, 3) + elif ord(s[i]) shr 3 == 0b11110: inc(i, 4) + else: assert(false) + inc(result) + +proc runeLenAt*(s: string, i: int): int = + ## returns the number of bytes the rune starting at ``s[i]`` takes. + if ord(s[i]) <=% 127: result = 1 + elif ord(s[i]) shr 5 == 0b110: result = 2 + elif ord(s[i]) shr 4 == 0b1110: result = 3 + elif ord(s[i]) shr 3 == 0b11110: result = 4 + else: assert(false) + +template fastRuneAt*(s: string, i: int, result: expr, doInc = true) = + ## Returns the unicode character ``s[i]`` in `result`. If ``doInc == true`` + ## `i` is incremented by the number of bytes that have been processed. + if ord(s[i]) <=% 127: + result = TRune(ord(s[i])) + when doInc: inc(i) + elif ord(s[i]) shr 5 == 0b110: + assert(ord(s[i+1]) shr 6 == 0b10) + result = TRune((ord(s[i]) and (bind ones(5))) shl 6 or + (ord(s[i+1]) and ones(6))) + when doInc: inc(i, 2) + elif ord(s[i]) shr 4 == 0b1110: + assert(ord(s[i+1]) shr 6 == 0b10) + assert(ord(s[i+2]) shr 6 == 0b10) + result = TRune((ord(s[i]) and ones(4)) shl 12 or + (ord(s[i+1]) and ones(6)) shl 6 or + (ord(s[i+2]) and ones(6))) + when doInc: inc(i, 3) + elif ord(s[i]) shr 3 == 0b11110: + assert(ord(s[i+1]) shr 6 == 0b10) + assert(ord(s[i+2]) shr 6 == 0b10) + assert(ord(s[i+3]) shr 6 == 0b10) + result = TRune((ord(s[i]) and ones(3)) shl 18 or + (ord(s[i+1]) and ones(6)) shl 12 or + (ord(s[i+2]) and ones(6)) shl 6 or + (ord(s[i+3]) and ones(6))) + when doInc: inc(i, 4) + else: + assert(false) + +proc runeAt*(s: string, i: int): TRune = + ## returns the unicode character in `s` at byte index `i` + fastRuneAt(s, i, result, false) + +proc toUTF8*(c: TRune): string = + ## converts a rune into its UTF8 representation + var i = irune(c) + if i <=% 127: + result = newString(1) + result[0] = chr(i) + elif i <=% 0x07FF: + result = newString(2) + result[0] = chr(i shr 6 or 0b110_0000) + result[1] = chr(i and ones(6) or 0b10_000000) + elif i <=% 0xFFFF: + result = newString(3) + result[0] = chr(i shr 12 or 0b1110_0000) + result[1] = chr(i shr 6 and ones(6) or 0b10_0000_00) + result[2] = chr(i and ones(6) or 0b10_0000_00) + elif i <=% 0x0010FFFF: + result = newString(4) + result[0] = chr(i shr 18 or 0b1111_0000) + result[1] = chr(i shr 12 and ones(6) or 0b10_0000_00) + result[2] = chr(i shr 6 and ones(6) or 0b10_0000_00) + result[3] = chr(i and ones(6) or 0b10_0000_00) + else: + assert false + +const + alphaRanges = [ + 0x00d8, 0x00f6, # Ø - ö + 0x00f8, 0x01f5, # ø - ǵ + 0x0250, 0x02a8, # ɐ - ʨ + 0x038e, 0x03a1, # Ύ - Ρ + 0x03a3, 0x03ce, # Σ - ώ + 0x03d0, 0x03d6, # ϐ - ϖ + 0x03e2, 0x03f3, # Ϣ - ϳ + 0x0490, 0x04c4, # Ґ - ӄ + 0x0561, 0x0587, # ա - և + 0x05d0, 0x05ea, # א - ת + 0x05f0, 0x05f2, # װ - ײ + 0x0621, 0x063a, # ء - غ + 0x0640, 0x064a, # ـ - ي + 0x0671, 0x06b7, # ٱ - ڷ + 0x06ba, 0x06be, # ں - ھ + 0x06c0, 0x06ce, # ۀ - ێ + 0x06d0, 0x06d3, # ې - ۓ + 0x0905, 0x0939, # अ - ह + 0x0958, 0x0961, # क़ - ॡ + 0x0985, 0x098c, # অ - ঌ + 0x098f, 0x0990, # এ - ঐ + 0x0993, 0x09a8, # ও - ন + 0x09aa, 0x09b0, # প - র + 0x09b6, 0x09b9, # শ - হ + 0x09dc, 0x09dd, # ড় - ঢ় + 0x09df, 0x09e1, # য় - ৡ + 0x09f0, 0x09f1, # ৰ - ৱ + 0x0a05, 0x0a0a, # ਅ - ਊ + 0x0a0f, 0x0a10, # ਏ - ਐ + 0x0a13, 0x0a28, # ਓ - ਨ + 0x0a2a, 0x0a30, # ਪ - ਰ + 0x0a32, 0x0a33, # ਲ - ਲ਼ + 0x0a35, 0x0a36, # ਵ - ਸ਼ + 0x0a38, 0x0a39, # ਸ - ਹ + 0x0a59, 0x0a5c, # ਖ਼ - ੜ + 0x0a85, 0x0a8b, # અ - ઋ + 0x0a8f, 0x0a91, # એ - ઑ + 0x0a93, 0x0aa8, # ઓ - ન + 0x0aaa, 0x0ab0, # પ - ર + 0x0ab2, 0x0ab3, # લ - ળ + 0x0ab5, 0x0ab9, # વ - હ + 0x0b05, 0x0b0c, # ଅ - ଌ + 0x0b0f, 0x0b10, # ଏ - ଐ + 0x0b13, 0x0b28, # ଓ - ନ + 0x0b2a, 0x0b30, # ପ - ର + 0x0b32, 0x0b33, # ଲ - ଳ + 0x0b36, 0x0b39, # ଶ - ହ + 0x0b5c, 0x0b5d, # ଡ଼ - ଢ଼ + 0x0b5f, 0x0b61, # ୟ - ୡ + 0x0b85, 0x0b8a, # அ - ஊ + 0x0b8e, 0x0b90, # எ - ஐ + 0x0b92, 0x0b95, # ஒ - க + 0x0b99, 0x0b9a, # ங - ச + 0x0b9e, 0x0b9f, # ஞ - ட + 0x0ba3, 0x0ba4, # ண - த + 0x0ba8, 0x0baa, # ந - ப + 0x0bae, 0x0bb5, # ம - வ + 0x0bb7, 0x0bb9, # ஷ - ஹ + 0x0c05, 0x0c0c, # అ - ఌ + 0x0c0e, 0x0c10, # ఎ - ఐ + 0x0c12, 0x0c28, # ఒ - న + 0x0c2a, 0x0c33, # ప - ళ + 0x0c35, 0x0c39, # వ - హ + 0x0c60, 0x0c61, # ౠ - ౡ + 0x0c85, 0x0c8c, # ಅ - ಌ + 0x0c8e, 0x0c90, # ಎ - ಐ + 0x0c92, 0x0ca8, # ಒ - ನ + 0x0caa, 0x0cb3, # ಪ - ಳ + 0x0cb5, 0x0cb9, # ವ - ಹ + 0x0ce0, 0x0ce1, # ೠ - ೡ + 0x0d05, 0x0d0c, # അ - ഌ + 0x0d0e, 0x0d10, # എ - ഐ + 0x0d12, 0x0d28, # ഒ - ന + 0x0d2a, 0x0d39, # പ - ഹ + 0x0d60, 0x0d61, # ൠ - ൡ + 0x0e01, 0x0e30, # ก - ะ + 0x0e32, 0x0e33, # า - ำ + 0x0e40, 0x0e46, # เ - ๆ + 0x0e5a, 0x0e5b, # ๚ - ๛ + 0x0e81, 0x0e82, # ກ - ຂ + 0x0e87, 0x0e88, # ງ - ຈ + 0x0e94, 0x0e97, # ດ - ທ + 0x0e99, 0x0e9f, # ນ - ຟ + 0x0ea1, 0x0ea3, # ມ - ຣ + 0x0eaa, 0x0eab, # ສ - ຫ + 0x0ead, 0x0eae, # ອ - ຮ + 0x0eb2, 0x0eb3, # າ - ຳ + 0x0ec0, 0x0ec4, # ເ - ໄ + 0x0edc, 0x0edd, # ໜ - ໝ + 0x0f18, 0x0f19, # ༘ - ༙ + 0x0f40, 0x0f47, # ཀ - ཇ + 0x0f49, 0x0f69, # ཉ - ཀྵ + 0x10d0, 0x10f6, # ა - ჶ + 0x1100, 0x1159, # ᄀ - ᅙ + 0x115f, 0x11a2, # ᅟ - ᆢ + 0x11a8, 0x11f9, # ᆨ - ᇹ + 0x1e00, 0x1e9b, # Ḁ - ẛ + 0x1f50, 0x1f57, # ὐ - ὗ + 0x1f80, 0x1fb4, # ᾀ - ᾴ + 0x1fb6, 0x1fbc, # ᾶ - ᾼ + 0x1fc2, 0x1fc4, # ῂ - ῄ + 0x1fc6, 0x1fcc, # ῆ - ῌ + 0x1fd0, 0x1fd3, # ῐ - ΐ + 0x1fd6, 0x1fdb, # ῖ - Ί + 0x1fe0, 0x1fec, # ῠ - Ῥ + 0x1ff2, 0x1ff4, # ῲ - ῴ + 0x1ff6, 0x1ffc, # ῶ - ῼ + 0x210a, 0x2113, # ℊ - ℓ + 0x2115, 0x211d, # ℕ - ℝ + 0x2120, 0x2122, # ℠ - ™ + 0x212a, 0x2131, # K - ℱ + 0x2133, 0x2138, # ℳ - ℸ + 0x3041, 0x3094, # ぁ - ゔ + 0x30a1, 0x30fa, # ァ - ヺ + 0x3105, 0x312c, # ㄅ - ㄬ + 0x3131, 0x318e, # ㄱ - ㆎ + 0x3192, 0x319f, # ㆒ - ㆟ + 0x3260, 0x327b, # ㉠ - ㉻ + 0x328a, 0x32b0, # ㊊ - ㊰ + 0x32d0, 0x32fe, # ㋐ - ㋾ + 0x3300, 0x3357, # ㌀ - ㍗ + 0x3371, 0x3376, # ㍱ - ㍶ + 0x337b, 0x3394, # ㍻ - ㎔ + 0x3399, 0x339e, # ㎙ - ㎞ + 0x33a9, 0x33ad, # ㎩ - ㎭ + 0x33b0, 0x33c1, # ㎰ - ㏁ + 0x33c3, 0x33c5, # ㏃ - ㏅ + 0x33c7, 0x33d7, # ㏇ - ㏗ + 0x33d9, 0x33dd, # ㏙ - ㏝ + 0x4e00, 0x9fff, # 一 - 鿿 + 0xac00, 0xd7a3, # 가 - 힣 + 0xf900, 0xfb06, # 豈 - st + 0xfb13, 0xfb17, # ﬓ - ﬗ + 0xfb1f, 0xfb28, # ײַ - ﬨ + 0xfb2a, 0xfb36, # שׁ - זּ + 0xfb38, 0xfb3c, # טּ - לּ + 0xfb40, 0xfb41, # נּ - סּ + 0xfb43, 0xfb44, # ףּ - פּ + 0xfb46, 0xfbb1, # צּ - ﮱ + 0xfbd3, 0xfd3d, # ﯓ - ﴽ + 0xfd50, 0xfd8f, # ﵐ - ﶏ + 0xfd92, 0xfdc7, # ﶒ - ﷇ + 0xfdf0, 0xfdf9, # ﷰ - ﷹ + 0xfe70, 0xfe72, # ﹰ - ﹲ + 0xfe76, 0xfefc, # ﹶ - ﻼ + 0xff66, 0xff6f, # ヲ - ッ + 0xff71, 0xff9d, # ア - ン + 0xffa0, 0xffbe, # ᅠ - ᄒ + 0xffc2, 0xffc7, # ᅡ - ᅦ + 0xffca, 0xffcf, # ᅧ - ᅬ + 0xffd2, 0xffd7, # ᅭ - ᅲ + 0xffda, 0xffdc] # ᅳ - ᅵ + + alphaSinglets = [ + 0x00aa, # ª + 0x00b5, # µ + 0x00ba, # º + 0x03da, # Ϛ + 0x03dc, # Ϝ + 0x03de, # Ϟ + 0x03e0, # Ϡ + 0x06d5, # ە + 0x09b2, # ল + 0x0a5e, # ਫ਼ + 0x0a8d, # ઍ + 0x0ae0, # ૠ + 0x0b9c, # ஜ + 0x0cde, # ೞ + 0x0e4f, # ๏ + 0x0e84, # ຄ + 0x0e8a, # ຊ + 0x0e8d, # ຍ + 0x0ea5, # ລ + 0x0ea7, # ວ + 0x0eb0, # ະ + 0x0ebd, # ຽ + 0x1fbe, # ι + 0x207f, # ⁿ + 0x20a8, # ₨ + 0x2102, # ℂ + 0x2107, # ℇ + 0x2124, # ℤ + 0x2126, # Ω + 0x2128, # ℨ + 0xfb3e, # מּ + 0xfe74] # ﹴ + + spaceRanges = [ + 0x0009, 0x000a, # tab and newline + 0x0020, 0x0020, # space + 0x00a0, 0x00a0, # + 0x2000, 0x200b, # - + 0x2028, 0x2029, # - 0x3000, 0x3000, # + 0xfeff, 0xfeff] # + + toupperRanges = [ + 0x0061, 0x007a, 468, # a-z A-Z + 0x00e0, 0x00f6, 468, # à-ö À-Ö + 0x00f8, 0x00fe, 468, # ø-þ Ø-Þ + 0x0256, 0x0257, 295, # ɖ-ɗ Ɖ-Ɗ + 0x0258, 0x0259, 298, # ɘ-ə Ǝ-Ə + 0x028a, 0x028b, 283, # ʊ-ʋ Ʊ-Ʋ + 0x03ad, 0x03af, 463, # έ-ί Έ-Ί + 0x03b1, 0x03c1, 468, # α-ρ Α-Ρ + 0x03c3, 0x03cb, 468, # σ-ϋ Σ-Ϋ + 0x03cd, 0x03ce, 437, # ύ-ώ Ύ-Ώ + 0x0430, 0x044f, 468, # а-я А-Я + 0x0451, 0x045c, 420, # ё-ќ Ё-Ќ + 0x045e, 0x045f, 420, # ў-џ Ў-Џ + 0x0561, 0x0586, 452, # ա-ֆ Ա-Ֆ + 0x1f00, 0x1f07, 508, # ἀ-ἇ Ἀ-Ἇ + 0x1f10, 0x1f15, 508, # ἐ-ἕ Ἐ-Ἕ + 0x1f20, 0x1f27, 508, # ἠ-ἧ Ἠ-Ἧ + 0x1f30, 0x1f37, 508, # ἰ-ἷ Ἰ-Ἷ + 0x1f40, 0x1f45, 508, # ὀ-ὅ Ὀ-Ὅ + 0x1f60, 0x1f67, 508, # ὠ-ὧ Ὠ-Ὧ + 0x1f70, 0x1f71, 574, # ὰ-ά Ὰ-Ά + 0x1f72, 0x1f75, 586, # ὲ-ή Ὲ-Ή + 0x1f76, 0x1f77, 600, # ὶ-ί Ὶ-Ί + 0x1f78, 0x1f79, 628, # ὸ-ό Ὸ-Ό + 0x1f7a, 0x1f7b, 612, # ὺ-ύ Ὺ-Ύ + 0x1f7c, 0x1f7d, 626, # ὼ-ώ Ὼ-Ώ + 0x1f80, 0x1f87, 508, # ᾀ-ᾇ ᾈ-ᾏ + 0x1f90, 0x1f97, 508, # ᾐ-ᾗ ᾘ-ᾟ + 0x1fa0, 0x1fa7, 508, # ᾠ-ᾧ ᾨ-ᾯ + 0x1fb0, 0x1fb1, 508, # ᾰ-ᾱ Ᾰ-Ᾱ + 0x1fd0, 0x1fd1, 508, # ῐ-ῑ Ῐ-Ῑ + 0x1fe0, 0x1fe1, 508, # ῠ-ῡ Ῠ-Ῡ + 0x2170, 0x217f, 484, # ⅰ-ⅿ Ⅰ-Ⅿ + 0x24d0, 0x24e9, 474, # ⓐ-ⓩ Ⓐ-Ⓩ + 0xff41, 0xff5a, 468] # a-z A-Z + + toupperSinglets = [ + 0x00ff, 621, # ÿ Ÿ + 0x0101, 499, # ā Ā + 0x0103, 499, # ă Ă + 0x0105, 499, # ą Ą + 0x0107, 499, # ć Ć + 0x0109, 499, # ĉ Ĉ + 0x010b, 499, # ċ Ċ + 0x010d, 499, # č Č + 0x010f, 499, # ď Ď + 0x0111, 499, # đ Đ + 0x0113, 499, # ē Ē + 0x0115, 499, # ĕ Ĕ + 0x0117, 499, # ė Ė + 0x0119, 499, # ę Ę + 0x011b, 499, # ě Ě + 0x011d, 499, # ĝ Ĝ + 0x011f, 499, # ğ Ğ + 0x0121, 499, # ġ Ġ + 0x0123, 499, # ģ Ģ + 0x0125, 499, # ĥ Ĥ + 0x0127, 499, # ħ Ħ + 0x0129, 499, # ĩ Ĩ + 0x012b, 499, # ī Ī + 0x012d, 499, # ĭ Ĭ + 0x012f, 499, # į Į + 0x0131, 268, # ı I + 0x0133, 499, # ij IJ + 0x0135, 499, # ĵ Ĵ + 0x0137, 499, # ķ Ķ + 0x013a, 499, # ĺ Ĺ + 0x013c, 499, # ļ Ļ + 0x013e, 499, # ľ Ľ + 0x0140, 499, # ŀ Ŀ + 0x0142, 499, # ł Ł + 0x0144, 499, # ń Ń + 0x0146, 499, # ņ Ņ + 0x0148, 499, # ň Ň + 0x014b, 499, # ŋ Ŋ + 0x014d, 499, # ō Ō + 0x014f, 499, # ŏ Ŏ + 0x0151, 499, # ő Ő + 0x0153, 499, # œ Œ + 0x0155, 499, # ŕ Ŕ + 0x0157, 499, # ŗ Ŗ + 0x0159, 499, # ř Ř + 0x015b, 499, # ś Ś + 0x015d, 499, # ŝ Ŝ + 0x015f, 499, # ş Ş + 0x0161, 499, # š Š + 0x0163, 499, # ţ Ţ + 0x0165, 499, # ť Ť + 0x0167, 499, # ŧ Ŧ + 0x0169, 499, # ũ Ũ + 0x016b, 499, # ū Ū + 0x016d, 499, # ŭ Ŭ + 0x016f, 499, # ů Ů + 0x0171, 499, # ű Ű + 0x0173, 499, # ų Ų + 0x0175, 499, # ŵ Ŵ + 0x0177, 499, # ŷ Ŷ + 0x017a, 499, # ź Ź + 0x017c, 499, # ż Ż + 0x017e, 499, # ž Ž + 0x017f, 200, # ſ S + 0x0183, 499, # ƃ Ƃ + 0x0185, 499, # ƅ Ƅ + 0x0188, 499, # ƈ Ƈ + 0x018c, 499, # ƌ Ƌ + 0x0192, 499, # ƒ Ƒ + 0x0199, 499, # ƙ Ƙ + 0x01a1, 499, # ơ Ơ + 0x01a3, 499, # ƣ Ƣ + 0x01a5, 499, # ƥ Ƥ + 0x01a8, 499, # ƨ Ƨ + 0x01ad, 499, # ƭ Ƭ + 0x01b0, 499, # ư Ư + 0x01b4, 499, # ƴ Ƴ + 0x01b6, 499, # ƶ Ƶ + 0x01b9, 499, # ƹ Ƹ + 0x01bd, 499, # ƽ Ƽ + 0x01c5, 499, # Dž DŽ + 0x01c6, 498, # dž DŽ + 0x01c8, 499, # Lj LJ + 0x01c9, 498, # lj LJ + 0x01cb, 499, # Nj NJ + 0x01cc, 498, # nj NJ + 0x01ce, 499, # ǎ Ǎ + 0x01d0, 499, # ǐ Ǐ + 0x01d2, 499, # ǒ Ǒ + 0x01d4, 499, # ǔ Ǔ + 0x01d6, 499, # ǖ Ǖ + 0x01d8, 499, # ǘ Ǘ + 0x01da, 499, # ǚ Ǚ + 0x01dc, 499, # ǜ Ǜ + 0x01df, 499, # ǟ Ǟ + 0x01e1, 499, # ǡ Ǡ + 0x01e3, 499, # ǣ Ǣ + 0x01e5, 499, # ǥ Ǥ + 0x01e7, 499, # ǧ Ǧ + 0x01e9, 499, # ǩ Ǩ + 0x01eb, 499, # ǫ Ǫ + 0x01ed, 499, # ǭ Ǭ + 0x01ef, 499, # ǯ Ǯ + 0x01f2, 499, # Dz DZ + 0x01f3, 498, # dz DZ + 0x01f5, 499, # ǵ Ǵ + 0x01fb, 499, # ǻ Ǻ + 0x01fd, 499, # ǽ Ǽ + 0x01ff, 499, # ǿ Ǿ + 0x0201, 499, # ȁ Ȁ + 0x0203, 499, # ȃ Ȃ + 0x0205, 499, # ȅ Ȅ + 0x0207, 499, # ȇ Ȇ + 0x0209, 499, # ȉ Ȉ + 0x020b, 499, # ȋ Ȋ + 0x020d, 499, # ȍ Ȍ + 0x020f, 499, # ȏ Ȏ + 0x0211, 499, # ȑ Ȑ + 0x0213, 499, # ȓ Ȓ + 0x0215, 499, # ȕ Ȕ + 0x0217, 499, # ȗ Ȗ + 0x0253, 290, # ɓ Ɓ + 0x0254, 294, # ɔ Ɔ + 0x025b, 297, # ɛ Ɛ + 0x0260, 295, # ɠ Ɠ + 0x0263, 293, # ɣ Ɣ + 0x0268, 291, # ɨ Ɨ + 0x0269, 289, # ɩ Ɩ + 0x026f, 289, # ɯ Ɯ + 0x0272, 287, # ɲ Ɲ + 0x0283, 282, # ʃ Ʃ + 0x0288, 282, # ʈ Ʈ + 0x0292, 281, # ʒ Ʒ + 0x03ac, 462, # ά Ά + 0x03cc, 436, # ό Ό + 0x03d0, 438, # ϐ Β + 0x03d1, 443, # ϑ Θ + 0x03d5, 453, # ϕ Φ + 0x03d6, 446, # ϖ Π + 0x03e3, 499, # ϣ Ϣ + 0x03e5, 499, # ϥ Ϥ + 0x03e7, 499, # ϧ Ϧ + 0x03e9, 499, # ϩ Ϩ + 0x03eb, 499, # ϫ Ϫ + 0x03ed, 499, # ϭ Ϭ + 0x03ef, 499, # ϯ Ϯ + 0x03f0, 414, # ϰ Κ + 0x03f1, 420, # ϱ Ρ + 0x0461, 499, # ѡ Ѡ + 0x0463, 499, # ѣ Ѣ + 0x0465, 499, # ѥ Ѥ + 0x0467, 499, # ѧ Ѧ + 0x0469, 499, # ѩ Ѩ + 0x046b, 499, # ѫ Ѫ + 0x046d, 499, # ѭ Ѭ + 0x046f, 499, # ѯ Ѯ + 0x0471, 499, # ѱ Ѱ + 0x0473, 499, # ѳ Ѳ + 0x0475, 499, # ѵ Ѵ + 0x0477, 499, # ѷ Ѷ + 0x0479, 499, # ѹ Ѹ + 0x047b, 499, # ѻ Ѻ + 0x047d, 499, # ѽ Ѽ + 0x047f, 499, # ѿ Ѿ + 0x0481, 499, # ҁ Ҁ + 0x0491, 499, # ґ Ґ + 0x0493, 499, # ғ Ғ + 0x0495, 499, # ҕ Ҕ + 0x0497, 499, # җ Җ + 0x0499, 499, # ҙ Ҙ + 0x049b, 499, # қ Қ + 0x049d, 499, # ҝ Ҝ + 0x049f, 499, # ҟ Ҟ + 0x04a1, 499, # ҡ Ҡ + 0x04a3, 499, # ң Ң + 0x04a5, 499, # ҥ Ҥ + 0x04a7, 499, # ҧ Ҧ + 0x04a9, 499, # ҩ Ҩ + 0x04ab, 499, # ҫ Ҫ + 0x04ad, 499, # ҭ Ҭ + 0x04af, 499, # ү Ү + 0x04b1, 499, # ұ Ұ + 0x04b3, 499, # ҳ Ҳ + 0x04b5, 499, # ҵ Ҵ + 0x04b7, 499, # ҷ Ҷ + 0x04b9, 499, # ҹ Ҹ + 0x04bb, 499, # һ Һ + 0x04bd, 499, # ҽ Ҽ + 0x04bf, 499, # ҿ Ҿ + 0x04c2, 499, # ӂ Ӂ + 0x04c4, 499, # ӄ Ӄ + 0x04c8, 499, # ӈ Ӈ + 0x04cc, 499, # ӌ Ӌ + 0x04d1, 499, # ӑ Ӑ + 0x04d3, 499, # ӓ Ӓ + 0x04d5, 499, # ӕ Ӕ + 0x04d7, 499, # ӗ Ӗ + 0x04d9, 499, # ә Ә + 0x04db, 499, # ӛ Ӛ + 0x04dd, 499, # ӝ Ӝ + 0x04df, 499, # ӟ Ӟ + 0x04e1, 499, # ӡ Ӡ + 0x04e3, 499, # ӣ Ӣ + 0x04e5, 499, # ӥ Ӥ + 0x04e7, 499, # ӧ Ӧ + 0x04e9, 499, # ө Ө + 0x04eb, 499, # ӫ Ӫ + 0x04ef, 499, # ӯ Ӯ + 0x04f1, 499, # ӱ Ӱ + 0x04f3, 499, # ӳ Ӳ + 0x04f5, 499, # ӵ Ӵ + 0x04f9, 499, # ӹ Ӹ + 0x1e01, 499, # ḁ Ḁ + 0x1e03, 499, # ḃ Ḃ + 0x1e05, 499, # ḅ Ḅ + 0x1e07, 499, # ḇ Ḇ + 0x1e09, 499, # ḉ Ḉ + 0x1e0b, 499, # ḋ Ḋ + 0x1e0d, 499, # ḍ Ḍ + 0x1e0f, 499, # ḏ Ḏ + 0x1e11, 499, # ḑ Ḑ + 0x1e13, 499, # ḓ Ḓ + 0x1e15, 499, # ḕ Ḕ + 0x1e17, 499, # ḗ Ḗ + 0x1e19, 499, # ḙ Ḙ + 0x1e1b, 499, # ḛ Ḛ + 0x1e1d, 499, # ḝ Ḝ + 0x1e1f, 499, # ḟ Ḟ + 0x1e21, 499, # ḡ Ḡ + 0x1e23, 499, # ḣ Ḣ + 0x1e25, 499, # ḥ Ḥ + 0x1e27, 499, # ḧ Ḧ + 0x1e29, 499, # ḩ Ḩ + 0x1e2b, 499, # ḫ Ḫ + 0x1e2d, 499, # ḭ Ḭ + 0x1e2f, 499, # ḯ Ḯ + 0x1e31, 499, # ḱ Ḱ + 0x1e33, 499, # ḳ Ḳ + 0x1e35, 499, # ḵ Ḵ + 0x1e37, 499, # ḷ Ḷ + 0x1e39, 499, # ḹ Ḹ + 0x1e3b, 499, # ḻ Ḻ + 0x1e3d, 499, # ḽ Ḽ + 0x1e3f, 499, # ḿ Ḿ + 0x1e41, 499, # ṁ Ṁ + 0x1e43, 499, # ṃ Ṃ + 0x1e45, 499, # ṅ Ṅ + 0x1e47, 499, # ṇ Ṇ + 0x1e49, 499, # ṉ Ṉ + 0x1e4b, 499, # ṋ Ṋ + 0x1e4d, 499, # ṍ Ṍ + 0x1e4f, 499, # ṏ Ṏ + 0x1e51, 499, # ṑ Ṑ + 0x1e53, 499, # ṓ Ṓ + 0x1e55, 499, # ṕ Ṕ + 0x1e57, 499, # ṗ Ṗ + 0x1e59, 499, # ṙ Ṙ + 0x1e5b, 499, # ṛ Ṛ + 0x1e5d, 499, # ṝ Ṝ + 0x1e5f, 499, # ṟ Ṟ + 0x1e61, 499, # ṡ Ṡ + 0x1e63, 499, # ṣ Ṣ + 0x1e65, 499, # ṥ Ṥ + 0x1e67, 499, # ṧ Ṧ + 0x1e69, 499, # ṩ Ṩ + 0x1e6b, 499, # ṫ Ṫ + 0x1e6d, 499, # ṭ Ṭ + 0x1e6f, 499, # ṯ Ṯ + 0x1e71, 499, # ṱ Ṱ + 0x1e73, 499, # ṳ Ṳ + 0x1e75, 499, # ṵ Ṵ + 0x1e77, 499, # ṷ Ṷ + 0x1e79, 499, # ṹ Ṹ + 0x1e7b, 499, # ṻ Ṻ + 0x1e7d, 499, # ṽ Ṽ + 0x1e7f, 499, # ṿ Ṿ + 0x1e81, 499, # ẁ Ẁ + 0x1e83, 499, # ẃ Ẃ + 0x1e85, 499, # ẅ Ẅ + 0x1e87, 499, # ẇ Ẇ + 0x1e89, 499, # ẉ Ẉ + 0x1e8b, 499, # ẋ Ẋ + 0x1e8d, 499, # ẍ Ẍ + 0x1e8f, 499, # ẏ Ẏ + 0x1e91, 499, # ẑ Ẑ + 0x1e93, 499, # ẓ Ẓ + 0x1e95, 499, # ẕ Ẕ + 0x1ea1, 499, # ạ Ạ + 0x1ea3, 499, # ả Ả + 0x1ea5, 499, # ấ Ấ + 0x1ea7, 499, # ầ Ầ + 0x1ea9, 499, # ẩ Ẩ + 0x1eab, 499, # ẫ Ẫ + 0x1ead, 499, # ậ Ậ + 0x1eaf, 499, # ắ Ắ + 0x1eb1, 499, # ằ Ằ + 0x1eb3, 499, # ẳ Ẳ + 0x1eb5, 499, # ẵ Ẵ + 0x1eb7, 499, # ặ Ặ + 0x1eb9, 499, # ẹ Ẹ + 0x1ebb, 499, # ẻ Ẻ + 0x1ebd, 499, # ẽ Ẽ + 0x1ebf, 499, # ế Ế + 0x1ec1, 499, # ề Ề + 0x1ec3, 499, # ể Ể + 0x1ec5, 499, # ễ Ễ + 0x1ec7, 499, # ệ Ệ + 0x1ec9, 499, # ỉ Ỉ + 0x1ecb, 499, # ị Ị + 0x1ecd, 499, # ọ Ọ + 0x1ecf, 499, # ỏ Ỏ + 0x1ed1, 499, # ố Ố + 0x1ed3, 499, # ồ Ồ + 0x1ed5, 499, # ổ Ổ + 0x1ed7, 499, # ỗ Ỗ + 0x1ed9, 499, # ộ Ộ + 0x1edb, 499, # ớ Ớ + 0x1edd, 499, # ờ Ờ + 0x1edf, 499, # ở Ở + 0x1ee1, 499, # ỡ Ỡ + 0x1ee3, 499, # ợ Ợ + 0x1ee5, 499, # ụ Ụ + 0x1ee7, 499, # ủ Ủ + 0x1ee9, 499, # ứ Ứ + 0x1eeb, 499, # ừ Ừ + 0x1eed, 499, # ử Ử + 0x1eef, 499, # ữ Ữ + 0x1ef1, 499, # ự Ự + 0x1ef3, 499, # ỳ Ỳ + 0x1ef5, 499, # ỵ Ỵ + 0x1ef7, 499, # ỷ Ỷ + 0x1ef9, 499, # ỹ Ỹ + 0x1f51, 508, # ὑ Ὑ + 0x1f53, 508, # ὓ Ὓ + 0x1f55, 508, # ὕ Ὕ + 0x1f57, 508, # ὗ Ὗ + 0x1fb3, 509, # ᾳ ᾼ + 0x1fc3, 509, # ῃ ῌ + 0x1fe5, 507, # ῥ Ῥ + 0x1ff3, 509] # ῳ ῼ + + tolowerRanges = [ + 0x0041, 0x005a, 532, # A-Z a-z + 0x00c0, 0x00d6, 532, # À-Ö à-ö + 0x00d8, 0x00de, 532, # Ø-Þ ø-þ + 0x0189, 0x018a, 705, # Ɖ-Ɗ ɖ-ɗ + 0x018e, 0x018f, 702, # Ǝ-Ə ɘ-ə + 0x01b1, 0x01b2, 717, # Ʊ-Ʋ ʊ-ʋ + 0x0388, 0x038a, 537, # Έ-Ί έ-ί + 0x038e, 0x038f, 563, # Ύ-Ώ ύ-ώ + 0x0391, 0x03a1, 532, # Α-Ρ α-ρ + 0x03a3, 0x03ab, 532, # Σ-Ϋ σ-ϋ + 0x0401, 0x040c, 580, # Ё-Ќ ё-ќ + 0x040e, 0x040f, 580, # Ў-Џ ў-џ + 0x0410, 0x042f, 532, # А-Я а-я + 0x0531, 0x0556, 548, # Ա-Ֆ ա-ֆ + 0x10a0, 0x10c5, 548, # Ⴀ-Ⴥ ა-ჵ + 0x1f08, 0x1f0f, 492, # Ἀ-Ἇ ἀ-ἇ + 0x1f18, 0x1f1d, 492, # Ἐ-Ἕ ἐ-ἕ + 0x1f28, 0x1f2f, 492, # Ἠ-Ἧ ἠ-ἧ + 0x1f38, 0x1f3f, 492, # Ἰ-Ἷ ἰ-ἷ + 0x1f48, 0x1f4d, 492, # Ὀ-Ὅ ὀ-ὅ + 0x1f68, 0x1f6f, 492, # Ὠ-Ὧ ὠ-ὧ + 0x1f88, 0x1f8f, 492, # ᾈ-ᾏ ᾀ-ᾇ + 0x1f98, 0x1f9f, 492, # ᾘ-ᾟ ᾐ-ᾗ + 0x1fa8, 0x1faf, 492, # ᾨ-ᾯ ᾠ-ᾧ + 0x1fb8, 0x1fb9, 492, # Ᾰ-Ᾱ ᾰ-ᾱ + 0x1fba, 0x1fbb, 426, # Ὰ-Ά ὰ-ά + 0x1fc8, 0x1fcb, 414, # Ὲ-Ή ὲ-ή + 0x1fd8, 0x1fd9, 492, # Ῐ-Ῑ ῐ-ῑ + 0x1fda, 0x1fdb, 400, # Ὶ-Ί ὶ-ί + 0x1fe8, 0x1fe9, 492, # Ῠ-Ῡ ῠ-ῡ + 0x1fea, 0x1feb, 388, # Ὺ-Ύ ὺ-ύ + 0x1ff8, 0x1ff9, 372, # Ὸ-Ό ὸ-ό + 0x1ffa, 0x1ffb, 374, # Ὼ-Ώ ὼ-ώ + 0x2160, 0x216f, 516, # Ⅰ-Ⅿ ⅰ-ⅿ + 0x24b6, 0x24cf, 526, # Ⓐ-Ⓩ ⓐ-ⓩ + 0xff21, 0xff3a, 532] # A-Z a-z + + tolowerSinglets = [ + 0x0100, 501, # Ā ā + 0x0102, 501, # Ă ă + 0x0104, 501, # Ą ą + 0x0106, 501, # Ć ć + 0x0108, 501, # Ĉ ĉ + 0x010a, 501, # Ċ ċ + 0x010c, 501, # Č č + 0x010e, 501, # Ď ď + 0x0110, 501, # Đ đ + 0x0112, 501, # Ē ē + 0x0114, 501, # Ĕ ĕ + 0x0116, 501, # Ė ė + 0x0118, 501, # Ę ę + 0x011a, 501, # Ě ě + 0x011c, 501, # Ĝ ĝ + 0x011e, 501, # Ğ ğ + 0x0120, 501, # Ġ ġ + 0x0122, 501, # Ģ ģ + 0x0124, 501, # Ĥ ĥ + 0x0126, 501, # Ħ ħ + 0x0128, 501, # Ĩ ĩ + 0x012a, 501, # Ī ī + 0x012c, 501, # Ĭ ĭ + 0x012e, 501, # Į į + 0x0130, 301, # İ i + 0x0132, 501, # IJ ij + 0x0134, 501, # Ĵ ĵ + 0x0136, 501, # Ķ ķ + 0x0139, 501, # Ĺ ĺ + 0x013b, 501, # Ļ ļ + 0x013d, 501, # Ľ ľ + 0x013f, 501, # Ŀ ŀ + 0x0141, 501, # Ł ł + 0x0143, 501, # Ń ń + 0x0145, 501, # Ņ ņ + 0x0147, 501, # Ň ň + 0x014a, 501, # Ŋ ŋ + 0x014c, 501, # Ō ō + 0x014e, 501, # Ŏ ŏ + 0x0150, 501, # Ő ő + 0x0152, 501, # Œ œ + 0x0154, 501, # Ŕ ŕ + 0x0156, 501, # Ŗ ŗ + 0x0158, 501, # Ř ř + 0x015a, 501, # Ś ś + 0x015c, 501, # Ŝ ŝ + 0x015e, 501, # Ş ş + 0x0160, 501, # Š š + 0x0162, 501, # Ţ ţ + 0x0164, 501, # Ť ť + 0x0166, 501, # Ŧ ŧ + 0x0168, 501, # Ũ ũ + 0x016a, 501, # Ū ū + 0x016c, 501, # Ŭ ŭ + 0x016e, 501, # Ů ů + 0x0170, 501, # Ű ű + 0x0172, 501, # Ų ų + 0x0174, 501, # Ŵ ŵ + 0x0176, 501, # Ŷ ŷ + 0x0178, 379, # Ÿ ÿ + 0x0179, 501, # Ź ź + 0x017b, 501, # Ż ż + 0x017d, 501, # Ž ž + 0x0181, 710, # Ɓ ɓ + 0x0182, 501, # Ƃ ƃ + 0x0184, 501, # Ƅ ƅ + 0x0186, 706, # Ɔ ɔ + 0x0187, 501, # Ƈ ƈ + 0x018b, 501, # Ƌ ƌ + 0x0190, 703, # Ɛ ɛ + 0x0191, 501, # Ƒ ƒ + 0x0193, 705, # Ɠ ɠ + 0x0194, 707, # Ɣ ɣ + 0x0196, 711, # Ɩ ɩ + 0x0197, 709, # Ɨ ɨ + 0x0198, 501, # Ƙ ƙ + 0x019c, 711, # Ɯ ɯ + 0x019d, 713, # Ɲ ɲ + 0x01a0, 501, # Ơ ơ + 0x01a2, 501, # Ƣ ƣ + 0x01a4, 501, # Ƥ ƥ + 0x01a7, 501, # Ƨ ƨ + 0x01a9, 718, # Ʃ ʃ + 0x01ac, 501, # Ƭ ƭ + 0x01ae, 718, # Ʈ ʈ + 0x01af, 501, # Ư ư + 0x01b3, 501, # Ƴ ƴ + 0x01b5, 501, # Ƶ ƶ + 0x01b7, 719, # Ʒ ʒ + 0x01b8, 501, # Ƹ ƹ + 0x01bc, 501, # Ƽ ƽ + 0x01c4, 502, # DŽ dž + 0x01c5, 501, # Dž dž + 0x01c7, 502, # LJ lj + 0x01c8, 501, # Lj lj + 0x01ca, 502, # NJ nj + 0x01cb, 501, # Nj nj + 0x01cd, 501, # Ǎ ǎ + 0x01cf, 501, # Ǐ ǐ + 0x01d1, 501, # Ǒ ǒ + 0x01d3, 501, # Ǔ ǔ + 0x01d5, 501, # Ǖ ǖ + 0x01d7, 501, # Ǘ ǘ + 0x01d9, 501, # Ǚ ǚ + 0x01db, 501, # Ǜ ǜ + 0x01de, 501, # Ǟ ǟ + 0x01e0, 501, # Ǡ ǡ + 0x01e2, 501, # Ǣ ǣ + 0x01e4, 501, # Ǥ ǥ + 0x01e6, 501, # Ǧ ǧ + 0x01e8, 501, # Ǩ ǩ + 0x01ea, 501, # Ǫ ǫ + 0x01ec, 501, # Ǭ ǭ + 0x01ee, 501, # Ǯ ǯ + 0x01f1, 502, # DZ dz + 0x01f2, 501, # Dz dz + 0x01f4, 501, # Ǵ ǵ + 0x01fa, 501, # Ǻ ǻ + 0x01fc, 501, # Ǽ ǽ + 0x01fe, 501, # Ǿ ǿ + 0x0200, 501, # Ȁ ȁ + 0x0202, 501, # Ȃ ȃ + 0x0204, 501, # Ȅ ȅ + 0x0206, 501, # Ȇ ȇ + 0x0208, 501, # Ȉ ȉ + 0x020a, 501, # Ȋ ȋ + 0x020c, 501, # Ȍ ȍ + 0x020e, 501, # Ȏ ȏ + 0x0210, 501, # Ȑ ȑ + 0x0212, 501, # Ȓ ȓ + 0x0214, 501, # Ȕ ȕ + 0x0216, 501, # Ȗ ȗ + 0x0386, 538, # Ά ά + 0x038c, 564, # Ό ό + 0x03e2, 501, # Ϣ ϣ + 0x03e4, 501, # Ϥ ϥ + 0x03e6, 501, # Ϧ ϧ + 0x03e8, 501, # Ϩ ϩ + 0x03ea, 501, # Ϫ ϫ + 0x03ec, 501, # Ϭ ϭ + 0x03ee, 501, # Ϯ ϯ + 0x0460, 501, # Ѡ ѡ + 0x0462, 501, # Ѣ ѣ + 0x0464, 501, # Ѥ ѥ + 0x0466, 501, # Ѧ ѧ + 0x0468, 501, # Ѩ ѩ + 0x046a, 501, # Ѫ ѫ + 0x046c, 501, # Ѭ ѭ + 0x046e, 501, # Ѯ ѯ + 0x0470, 501, # Ѱ ѱ + 0x0472, 501, # Ѳ ѳ + 0x0474, 501, # Ѵ ѵ + 0x0476, 501, # Ѷ ѷ + 0x0478, 501, # Ѹ ѹ + 0x047a, 501, # Ѻ ѻ + 0x047c, 501, # Ѽ ѽ + 0x047e, 501, # Ѿ ѿ + 0x0480, 501, # Ҁ ҁ + 0x0490, 501, # Ґ ґ + 0x0492, 501, # Ғ ғ + 0x0494, 501, # Ҕ ҕ + 0x0496, 501, # Җ җ + 0x0498, 501, # Ҙ ҙ + 0x049a, 501, # Қ қ + 0x049c, 501, # Ҝ ҝ + 0x049e, 501, # Ҟ ҟ + 0x04a0, 501, # Ҡ ҡ + 0x04a2, 501, # Ң ң + 0x04a4, 501, # Ҥ ҥ + 0x04a6, 501, # Ҧ ҧ + 0x04a8, 501, # Ҩ ҩ + 0x04aa, 501, # Ҫ ҫ + 0x04ac, 501, # Ҭ ҭ + 0x04ae, 501, # Ү ү + 0x04b0, 501, # Ұ ұ + 0x04b2, 501, # Ҳ ҳ + 0x04b4, 501, # Ҵ ҵ + 0x04b6, 501, # Ҷ ҷ + 0x04b8, 501, # Ҹ ҹ + 0x04ba, 501, # Һ һ + 0x04bc, 501, # Ҽ ҽ + 0x04be, 501, # Ҿ ҿ + 0x04c1, 501, # Ӂ ӂ + 0x04c3, 501, # Ӄ ӄ + 0x04c7, 501, # Ӈ ӈ + 0x04cb, 501, # Ӌ ӌ + 0x04d0, 501, # Ӑ ӑ + 0x04d2, 501, # Ӓ ӓ + 0x04d4, 501, # Ӕ ӕ + 0x04d6, 501, # Ӗ ӗ + 0x04d8, 501, # Ә ә + 0x04da, 501, # Ӛ ӛ + 0x04dc, 501, # Ӝ ӝ + 0x04de, 501, # Ӟ ӟ + 0x04e0, 501, # Ӡ ӡ + 0x04e2, 501, # Ӣ ӣ + 0x04e4, 501, # Ӥ ӥ + 0x04e6, 501, # Ӧ ӧ + 0x04e8, 501, # Ө ө + 0x04ea, 501, # Ӫ ӫ + 0x04ee, 501, # Ӯ ӯ + 0x04f0, 501, # Ӱ ӱ + 0x04f2, 501, # Ӳ ӳ + 0x04f4, 501, # Ӵ ӵ + 0x04f8, 501, # Ӹ ӹ + 0x1e00, 501, # Ḁ ḁ + 0x1e02, 501, # Ḃ ḃ + 0x1e04, 501, # Ḅ ḅ + 0x1e06, 501, # Ḇ ḇ + 0x1e08, 501, # Ḉ ḉ + 0x1e0a, 501, # Ḋ ḋ + 0x1e0c, 501, # Ḍ ḍ + 0x1e0e, 501, # Ḏ ḏ + 0x1e10, 501, # Ḑ ḑ + 0x1e12, 501, # Ḓ ḓ + 0x1e14, 501, # Ḕ ḕ + 0x1e16, 501, # Ḗ ḗ + 0x1e18, 501, # Ḙ ḙ + 0x1e1a, 501, # Ḛ ḛ + 0x1e1c, 501, # Ḝ ḝ + 0x1e1e, 501, # Ḟ ḟ + 0x1e20, 501, # Ḡ ḡ + 0x1e22, 501, # Ḣ ḣ + 0x1e24, 501, # Ḥ ḥ + 0x1e26, 501, # Ḧ ḧ + 0x1e28, 501, # Ḩ ḩ + 0x1e2a, 501, # Ḫ ḫ + 0x1e2c, 501, # Ḭ ḭ + 0x1e2e, 501, # Ḯ ḯ + 0x1e30, 501, # Ḱ ḱ + 0x1e32, 501, # Ḳ ḳ + 0x1e34, 501, # Ḵ ḵ + 0x1e36, 501, # Ḷ ḷ + 0x1e38, 501, # Ḹ ḹ + 0x1e3a, 501, # Ḻ ḻ + 0x1e3c, 501, # Ḽ ḽ + 0x1e3e, 501, # Ḿ ḿ + 0x1e40, 501, # Ṁ ṁ + 0x1e42, 501, # Ṃ ṃ + 0x1e44, 501, # Ṅ ṅ + 0x1e46, 501, # Ṇ ṇ + 0x1e48, 501, # Ṉ ṉ + 0x1e4a, 501, # Ṋ ṋ + 0x1e4c, 501, # Ṍ ṍ + 0x1e4e, 501, # Ṏ ṏ + 0x1e50, 501, # Ṑ ṑ + 0x1e52, 501, # Ṓ ṓ + 0x1e54, 501, # Ṕ ṕ + 0x1e56, 501, # Ṗ ṗ + 0x1e58, 501, # Ṙ ṙ + 0x1e5a, 501, # Ṛ ṛ + 0x1e5c, 501, # Ṝ ṝ + 0x1e5e, 501, # Ṟ ṟ + 0x1e60, 501, # Ṡ ṡ + 0x1e62, 501, # Ṣ ṣ + 0x1e64, 501, # Ṥ ṥ + 0x1e66, 501, # Ṧ ṧ + 0x1e68, 501, # Ṩ ṩ + 0x1e6a, 501, # Ṫ ṫ + 0x1e6c, 501, # Ṭ ṭ + 0x1e6e, 501, # Ṯ ṯ + 0x1e70, 501, # Ṱ ṱ + 0x1e72, 501, # Ṳ ṳ + 0x1e74, 501, # Ṵ ṵ + 0x1e76, 501, # Ṷ ṷ + 0x1e78, 501, # Ṹ ṹ + 0x1e7a, 501, # Ṻ ṻ + 0x1e7c, 501, # Ṽ ṽ + 0x1e7e, 501, # Ṿ ṿ + 0x1e80, 501, # Ẁ ẁ + 0x1e82, 501, # Ẃ ẃ + 0x1e84, 501, # Ẅ ẅ + 0x1e86, 501, # Ẇ ẇ + 0x1e88, 501, # Ẉ ẉ + 0x1e8a, 501, # Ẋ ẋ + 0x1e8c, 501, # Ẍ ẍ + 0x1e8e, 501, # Ẏ ẏ + 0x1e90, 501, # Ẑ ẑ + 0x1e92, 501, # Ẓ ẓ + 0x1e94, 501, # Ẕ ẕ + 0x1ea0, 501, # Ạ ạ + 0x1ea2, 501, # Ả ả + 0x1ea4, 501, # Ấ ấ + 0x1ea6, 501, # Ầ ầ + 0x1ea8, 501, # Ẩ ẩ + 0x1eaa, 501, # Ẫ ẫ + 0x1eac, 501, # Ậ ậ + 0x1eae, 501, # Ắ ắ + 0x1eb0, 501, # Ằ ằ + 0x1eb2, 501, # Ẳ ẳ + 0x1eb4, 501, # Ẵ ẵ + 0x1eb6, 501, # Ặ ặ + 0x1eb8, 501, # Ẹ ẹ + 0x1eba, 501, # Ẻ ẻ + 0x1ebc, 501, # Ẽ ẽ + 0x1ebe, 501, # Ế ế + 0x1ec0, 501, # Ề ề + 0x1ec2, 501, # Ể ể + 0x1ec4, 501, # Ễ ễ + 0x1ec6, 501, # Ệ ệ + 0x1ec8, 501, # Ỉ ỉ + 0x1eca, 501, # Ị ị + 0x1ecc, 501, # Ọ ọ + 0x1ece, 501, # Ỏ ỏ + 0x1ed0, 501, # Ố ố + 0x1ed2, 501, # Ồ ồ + 0x1ed4, 501, # Ổ ổ + 0x1ed6, 501, # Ỗ ỗ + 0x1ed8, 501, # Ộ ộ + 0x1eda, 501, # Ớ ớ + 0x1edc, 501, # Ờ ờ + 0x1ede, 501, # Ở ở + 0x1ee0, 501, # Ỡ ỡ + 0x1ee2, 501, # Ợ ợ + 0x1ee4, 501, # Ụ ụ + 0x1ee6, 501, # Ủ ủ + 0x1ee8, 501, # Ứ ứ + 0x1eea, 501, # Ừ ừ + 0x1eec, 501, # Ử ử + 0x1eee, 501, # Ữ ữ + 0x1ef0, 501, # Ự ự + 0x1ef2, 501, # Ỳ ỳ + 0x1ef4, 501, # Ỵ ỵ + 0x1ef6, 501, # Ỷ ỷ + 0x1ef8, 501, # Ỹ ỹ + 0x1f59, 492, # Ὑ ὑ + 0x1f5b, 492, # Ὓ ὓ + 0x1f5d, 492, # Ὕ ὕ + 0x1f5f, 492, # Ὗ ὗ + 0x1fbc, 491, # ᾼ ᾳ + 0x1fcc, 491, # ῌ ῃ + 0x1fec, 493, # Ῥ ῥ + 0x1ffc, 491] # ῼ ῳ + + toTitleSinglets = [ + 0x01c4, 501, # DŽ Dž + 0x01c6, 499, # dž Dž + 0x01c7, 501, # LJ Lj + 0x01c9, 499, # lj Lj + 0x01ca, 501, # NJ Nj + 0x01cc, 499, # nj Nj + 0x01f1, 501, # DZ Dz + 0x01f3, 499] # dz Dz + +proc binarySearch(c: irune, tab: openArray[iRune], len, stride: int): int = + var n = len + var t = 0 + while n > 1: + var m = n div 2 + var p = t + m*stride + if c >= tab[p]: + t = p + n = n-m + else: + n = m + if n != 0 and c >= tab[t]: + return t + return -1 + +proc toLower*(c: TRune): TRune = + ## Converts `c` into lower case. This works for any Unicode character. + ## If possible, prefer `toLower` over `toUpper`. + var c = irune(c) + var p = binarySearch(c, tolowerRanges, len(toLowerRanges) div 3, 3) + if p >= 0 and c >= tolowerRanges[p] and c <= tolowerRanges[p+1]: + return TRune(c + tolowerRanges[p+2] - 500) + p = binarySearch(c, toLowerSinglets, len(toLowerSinglets) div 2, 2) + if p >= 0 and c == toLowerSinglets[p]: + return TRune(c + toLowerSinglets[p+1] - 500) + return TRune(c) + +proc toUpper*(c: TRune): TRune = + ## Converts `c` into upper case. This works for any Unicode character. + ## If possible, prefer `toLower` over `toUpper`. + var c = irune(c) + var p = binarySearch(c, toUpperRanges, len(toUpperRanges) div 3, 3) + if p >= 0 and c >= toUpperRanges[p] and c <= toUpperRanges[p+1]: + return TRune(c + toUpperRanges[p+2] - 500) + p = binarySearch(c, toUpperSinglets, len(toUpperSinglets) div 2, 2) + if p >= 0 and c == toUpperSinglets[p]: + return TRune(c + toUpperSinglets[p+1] - 500) + return TRune(c) + +proc toTitle*(c: TRune): TRune = + var c = irune(c) + var p = binarySearch(c, toTitleSinglets, len(toTitleSinglets) div 2, 2) + if p >= 0 and c == toTitleSinglets[p]: + return TRune(c + toTitleSinglets[p+1] - 500) + return TRune(c) + +proc isLower*(c: TRune): bool = + ## returns true iff `c` is a lower case Unicode character + ## If possible, prefer `isLower` over `isUpper`. + var c = irune(c) + # Note: toUpperRanges is correct here! + var p = binarySearch(c, toUpperRanges, len(toUpperRanges) div 3, 3) + if p >= 0 and c >= toUpperRanges[p] and c <= toUpperRanges[p+1]: + return true + p = binarySearch(c, toUpperSinglets, len(toUpperSinglets) div 2, 2) + if p >= 0 and c == toUpperSinglets[p]: + return true + +proc isUpper*(c: TRune): bool = + ## returns true iff `c` is a upper case Unicode character + ## If possible, prefer `isLower` over `isUpper`. + var c = irune(c) + # Note: toLowerRanges is correct here! + var p = binarySearch(c, toLowerRanges, len(toLowerRanges) div 3, 3) + if p >= 0 and c >= toLowerRanges[p] and c <= toLowerRanges[p+1]: + return true + p = binarySearch(c, toLowerSinglets, len(toLowerSinglets) div 2, 2) + if p >= 0 and c == toLowerSinglets[p]: + return true + +proc isAlpha*(c: TRune): bool = + ## returns true iff `c` is an *alpha* Unicode character (i.e. a letter) + if isUpper(c) or isLower(c): + return true + var c = irune(c) + var p = binarySearch(c, alphaRanges, len(alphaRanges) div 2, 2) + if p >= 0 and c >= alphaRanges[p] and c <= alphaRanges[p+1]: + return true + p = binarySearch(c, alphaSinglets, len(alphaSinglets), 1) + if p >= 0 and c == alphaSinglets[p]: + return true + +proc isTitle*(c: TRune): bool = + return isUpper(c) and isLower(c) + +proc isWhiteSpace*(c: TRune): bool = + ## returns true iff `c` is a Unicode whitespace character + var c = irune(c) + var p = binarySearch(c, spaceRanges, len(spaceRanges) div 2, 2) + if p >= 0 and c >= spaceRanges[p] and c <= spaceRanges[p+1]: + return true + +iterator runes*(s: string): TRune = + ## iterates over any unicode character of the string `s`. + var + i = 0 + result: TRune + while i < len(s): + fastRuneAt(s, i, result, true) + yield result + +proc cmpRunesIgnoreCase*(a, b: string): int = + ## compares two UTF8 strings and ignores the case. Returns: + ## + ## | 0 iff a == b + ## | < 0 iff a < b + ## | > 0 iff a > b + var i = 0 + var j = 0 + var ar, br: TRune + while i < a.len and j < b.len: + # slow path: + fastRuneAt(a, i, ar) + fastRuneAt(b, j, br) + result = irune(toLower(ar)) - irune(toLower(br)) + if result != 0: return + result = a.len - b.len + diff --git a/nimlib/pure/variants.nim b/nimlib/pure/variants.nim new file mode 100755 index 000000000..f661f81a6 --- /dev/null +++ b/nimlib/pure/variants.nim @@ -0,0 +1,181 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements Nimrod's support for the ``variant`` datatype. +## `TVariant` shows how the flexibility of dynamic typing is achieved +## within a static type system. + +type + TVarType* = enum + vtNone, + vtBool, + vtChar, + vtEnum, + vtInt, + vtFloat, + vtString, + vtSet, + vtSeq, + vtDict + TVariant* {.final.} = object of TObject + case vtype: TVarType + of vtNone: nil + of vtBool, vtChar, vtEnum, vtInt: vint: int64 + of vtFloat: vfloat: float64 + of vtString: vstring: string + of vtSet, vtSeq: q: seq[TVariant] + of vtDict: d: seq[tuple[key, val: TVariant]] + +iterator objectFields*[T](x: T, skipInherited: bool): tuple[ + key: string, val: TVariant] {.magic: "ObjectFields"} + +proc `<>`*(x: ordinal): TVariant = + result.kind = vtEnum + result.vint = x + +proc `<>`*(x: biggestInt): TVariant = + result.kind = vtInt + result.vint = x + +proc `<>`*(x: char): TVariant = + result.kind = vtChar + result.vint = ord(x) + +proc `<>`*(x: bool): TVariant = + result.kind = vtBool + result.vint = ord(x) + +proc `<>`*(x: biggestFloat): TVariant = + result.kind = vtFloat + result.vfloat = x + +proc `<>`*(x: string): TVariant = + result.kind = vtString + result.vstring = x + +proc `<>`*[T](x: openArray[T]): TVariant = + result.kind = vtSeq + newSeq(result.q, x.len) + for i in 0..x.len-1: result.q[i] = <>x[i] + +proc `<>`*[T](x: set[T]): TVariant = + result.kind = vtSet + result.q = @[] + for a in items(x): result.q.add(<>a) + +proc `<>`* [T: object](x: T): TVariant {.magic: "ToVariant".} + ## this converts a value to a variant ("boxing") + +proc `><`*[T](v: TVariant, typ: T): T {.magic: "FromVariant".} + +[<>5, <>67, <>"hallo"] +myVar><int + + +proc `==`* (x, y: TVariant): bool = + if x.vtype == y.vtype: + case x.vtype + of vtNone: result = true + of vtBool, vtChar, vtEnum, vtInt: result = x.vint == y.vint + of vtFloat: result = x.vfloat == y.vfloat + of vtString: result = x.vstring == y.vstring + of vtSet: + # complicated! We check that each a in x also occurs in y and that the + # counts are identical: + if x.q.len == y.q.len: + for a in items(x.q): + block inner: + for b in items(y.q): + if a == b: break inner + return false + result = true + of vtSeq: + if x.q.len == y.q.len: + for i in 0..x.q.len-1: + if x.q[i] != y.q[i]: return false + result = true + of vtDict: + # it is an ordered dict: + if x.d.len == y.d.len: + for i in 0..x.d.len-1: + if x.d[i].key != y.d[i].key: return false + if x.d[i].val != y.d[i].val: return false + result = true + +proc `[]`* (a, b: TVariant): TVariant = + case a.vtype + of vtSeq: + if b.vtype in {vtBool, vtChar, vtEnum, vtInt}: + result = a.q[b.vint] + else: + variantError() + of vtDict: + for i in 0..a.d.len-1: + if a.d[i].key == b: return a.d[i].val + if b.vtype in {vtBool, vtChar, vtEnum, vtInt}: + result = a.d[b.vint].val + variantError() + else: variantError() + +proc `[]=`* (a, b, c: TVariant) = + case a.vtype + of vtSeq: + if b.vtype in {vtBool, vtChar, vtEnum, vtInt}: + a.q[b.vint] = b + else: + variantError() + of vtDict: + for i in 0..a.d.len-1: + if a.d[i].key == b: + a.d[i].val = c + return + if b.vtype in {vtBool, vtChar, vtEnum, vtInt}: + a.d[b.vint].val = c + variantError() + else: variantError() + +proc `[]`* (a: TVariant, b: int): TVariant {.inline} = return a[<>b] +proc `[]`* (a: TVariant, b: string): TVariant {.inline} = return a[<>b] +proc `[]=`* (a: TVariant, b: int, c: TVariant) {.inline} = a[<>b] = c +proc `[]=`* (a: TVariant, b: string, c: TVariant) {.inline} = a[<>b] = c + +proc `+`* (x, y: TVariant): TVariant = + case x.vtype + of vtBool, vtChar, vtEnum, vtInt: + if y.vtype == x.vtype: + result.vtype = x.vtype + result.vint = x.vint + y.vint + else: + case y.vtype + of vtBool, vtChar, vtEnum, vtInt: + + + + vint: int64 + of vtFloat: vfloat: float64 + of vtString: vstring: string + of vtSet, vtSeq: q: seq[TVariant] + of vtDict: d: seq[tuple[key, val: TVariant]] + +proc `-`* (x, y: TVariant): TVariant +proc `*`* (x, y: TVariant): TVariant +proc `/`* (x, y: TVariant): TVariant +proc `div`* (x, y: TVariant): TVariant +proc `mod`* (x, y: TVariant): TVariant +proc `&`* (x, y: TVariant): TVariant +proc `$`* (x: TVariant): string = + # uses JS notation + +proc parseVariant*(s: string): TVariant +proc `<`* (x, y: TVariant): bool +proc `<=`* (x, y: TVariant): bool + +proc hash*(x: TVariant): int = + + diff --git a/nimlib/pure/xmlgen.nim b/nimlib/pure/xmlgen.nim new file mode 100755 index 000000000..79a782252 --- /dev/null +++ b/nimlib/pure/xmlgen.nim @@ -0,0 +1,406 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a simple `XML`:idx: and `HTML`:idx: code +## generator. Each commonly used HTML tag has a corresponding macro +## that generates a string with its HTML representation. +## +## Example: +## +## .. code-block:: nimrod +## var nim = "Nimrod" +## echo h1(a(href="http://force7.de/nimrod", nim)) +## +## Writes the string:: +## +## <h1><a href="http://force7.de/nimrod">Nimrod</a></h1> +## + +import + macros, strutils + +const + coreAttr* = " id class title style " + eventAttr* = " onclick ondblclick onmousedown onmouseup " & + "onmouseover onmousemove onmouseout onkeypress onkeydown onkeyup " + commonAttr* = coreAttr & eventAttr + +proc getIdent(e: PNimrodNode): string {.compileTime.} = + case e.kind + of nnkIdent: result = normalize($e.ident) + of nnkAccQuoted: result = getIdent(e[0]) + else: error("cannot extract identifier from node: " & toStrLit(e).strVal) + +proc delete[T](s: var seq[T], attr: T): bool = + var idx = find(s, attr) + if idx >= 0: + var L = s.len + s[idx] = s[L-1] + setLen(s, L-1) + result = true + +proc xmlCheckedTag*(e: PNimrodNode, tag: string, + optAttr = "", reqAttr = "", + isLeaf = false): PNimrodNode {.compileTime.} = + ## use this procedure to define a new XML tag + + # copy the attributes; when iterating over them these lists + # will be modified, so that each attribute is only given one value + var req = splitSeq(reqAttr) + var opt = splitSeq(optAttr) + result = newNimNode(nnkBracket, e) + result.add(newStrLitNode("<")) + result.add(newStrLitNode(tag)) + # first pass over attributes: + for i in 1..e.len-1: + if e[i].kind == nnkExprEqExpr: + var name = getIdent(e[i][0]) + if delete(req, name) or delete(opt, name): + result.add(newStrLitNode(" ")) + result.add(newStrLitNode(name)) + result.add(newStrLitNode("=\"")) + result.add(e[i][1]) + result.add(newStrLitNode("\"")) + else: + error("invalid attribute for '" & tag & "' element: " & name) + # check each required attribute exists: + if req.len > 0: + error(req[0] & " attribute for '" & tag & "' element expected") + if isLeaf: + for i in 1..e.len-1: + if e[i].kind != nnkExprEqExpr: + error("element " & tag & " cannot be nested") + result.add(newStrLitNode(" />")) + else: + result.add(newStrLitNode(">")) + # second pass over elements: + for i in 1..e.len-1: + if e[i].kind != nnkExprEqExpr: result.add(e[i]) + result.add(newStrLitNode("</")) + result.add(newStrLitNode(tag)) + result.add(newStrLitNode(">")) + result = NestList(!"&", result) + + +macro a*(e: expr): expr = + ## generates the HTML ``a`` element. + result = xmlCheckedTag(e, "a", "href charset type hreflang rel rev " & + "accesskey tabindex" & commonAttr) + +macro acronym*(e: expr): expr = + ## generates the HTML ``acronym`` element. + result = xmlCheckedTag(e, "acronym", commonAttr) + +macro address*(e: expr): expr = + ## generates the HTML ``address`` element. + result = xmlCheckedTag(e, "address", commonAttr) + +macro area*(e: expr): expr = + ## generates the HTML ``area`` element. + result = xmlCheckedTag(e, "area", "shape coords href nohref" & + " accesskey tabindex" & commonAttr, "alt", true) + +macro b*(e: expr): expr = + ## generates the HTML ``b`` element. + result = xmlCheckedTag(e, "b", commonAttr) + +macro base*(e: expr): expr = + ## generates the HTML ``base`` element. + result = xmlCheckedTag(e, "base", "", "href", true) + +macro big*(e: expr): expr = + ## generates the HTML ``big`` element. + result = xmlCheckedTag(e, "big", commonAttr) + +macro blockquote*(e: expr): expr = + ## generates the HTML ``blockquote`` element. + result = xmlCheckedTag(e, "blockquote", " cite" & commonAttr) + +macro body*(e: expr): expr = + ## generates the HTML ``body`` element. + result = xmlCheckedTag(e, "body", commonAttr) + +macro br*(e: expr): expr = + ## generates the HTML ``br`` element. + result = xmlCheckedTag(e, "br", "", "", true) + +macro button*(e: expr): expr = + ## generates the HTML ``button`` element. + result = xmlCheckedTag(e, "button", "accesskey tabindex " & + "disabled name type value" & commonAttr) + +macro caption*(e: expr): expr = + ## generates the HTML ``caption`` element. + result = xmlCheckedTag(e, "caption", commonAttr) + +macro cite*(e: expr): expr = + ## generates the HTML ``cite`` element. + result = xmlCheckedTag(e, "cite", commonAttr) + +macro code*(e: expr): expr = + ## generates the HTML ``code`` element. + result = xmlCheckedTag(e, "code", commonAttr) + +macro col*(e: expr): expr = + ## generates the HTML ``col`` element. + result = xmlCheckedTag(e, "col", "span align valign" & commonAttr, "", true) + +macro colgroup*(e: expr): expr = + ## generates the HTML ``colgroup`` element. + result = xmlCheckedTag(e, "colgroup", "span align valign" & commonAttr) + +macro dd*(e: expr): expr = + ## generates the HTML ``dd`` element. + result = xmlCheckedTag(e, "dd", commonAttr) + +macro del*(e: expr): expr = + ## generates the HTML ``del`` element. + result = xmlCheckedTag(e, "del", "cite datetime" & commonAttr) + +macro dfn*(e: expr): expr = + ## generates the HTML ``dfn`` element. + result = xmlCheckedTag(e, "dfn", commonAttr) + +macro `div`*(e: expr): expr = + ## generates the HTML ``div`` element. + result = xmlCheckedTag(e, "div", commonAttr) + +macro dl*(e: expr): expr = + ## generates the HTML ``dl`` element. + result = xmlCheckedTag(e, "dl", commonAttr) + +macro dt*(e: expr): expr = + ## generates the HTML ``dt`` element. + result = xmlCheckedTag(e, "dt", commonAttr) + +macro em*(e: expr): expr = + ## generates the HTML ``em`` element. + result = xmlCheckedTag(e, "em", commonAttr) + +macro fieldset*(e: expr): expr = + ## generates the HTML ``fieldset`` element. + result = xmlCheckedTag(e, "fieldset", commonAttr) + +macro form*(e: expr): expr = + ## generates the HTML ``form`` element. + result = xmlCheckedTag(e, "form", "method encype accept accept-charset" & + commonAttr, "action") + +macro h1*(e: expr): expr = + ## generates the HTML ``h1`` element. + result = xmlCheckedTag(e, "h1", commonAttr) + +macro h2*(e: expr): expr = + ## generates the HTML ``h2`` element. + result = xmlCheckedTag(e, "h2", commonAttr) + +macro h3*(e: expr): expr = + ## generates the HTML ``h3`` element. + result = xmlCheckedTag(e, "h3", commonAttr) + +macro h4*(e: expr): expr = + ## generates the HTML ``h4`` element. + result = xmlCheckedTag(e, "h4", commonAttr) + +macro h5*(e: expr): expr = + ## generates the HTML ``h5`` element. + result = xmlCheckedTag(e, "h5", commonAttr) + +macro h6*(e: expr): expr = + ## generates the HTML ``h6`` element. + result = xmlCheckedTag(e, "h6", commonAttr) + +macro head*(e: expr): expr = + ## generates the HTML ``head`` element. + result = xmlCheckedTag(e, "head", "profile") + +macro html*(e: expr): expr = + ## generates the HTML ``html`` element. + result = xmlCheckedTag(e, "html", "", "xmlns") + +macro hr*(e: expr): expr = + ## generates the HTML ``hr`` element. + result = xmlCheckedTag(e, "hr", commonAttr, "", true) + +macro i*(e: expr): expr = + ## generates the HTML ``i`` element. + result = xmlCheckedTag(e, "i", commonAttr) + +macro img*(e: expr): expr = + ## generates the HTML ``img`` element. + result = xmlCheckedTag(e, "img", "longdesc height width", "src alt", true) + +macro input*(e: expr): expr = + ## generates the HTML ``input`` element. + result = xmlCheckedTag(e, "input", "name type value checked maxlength src" & + " alt accept disabled readonly accesskey tabindex" & commonAttr, "", true) + +macro ins*(e: expr): expr = + ## generates the HTML ``ins`` element. + result = xmlCheckedTag(e, "ins", "cite datetime" & commonAttr) + +macro kbd*(e: expr): expr = + ## generates the HTML ``kbd`` element. + result = xmlCheckedTag(e, "kbd", commonAttr) + +macro label*(e: expr): expr = + ## generates the HTML ``label`` element. + result = xmlCheckedTag(e, "label", "for accesskey" & commonAttr) + +macro legend*(e: expr): expr = + ## generates the HTML ``legend`` element. + result = xmlCheckedTag(e, "legend", "accesskey" & commonAttr) + +macro li*(e: expr): expr = + ## generates the HTML ``li`` element. + result = xmlCheckedTag(e, "li", commonAttr) + +macro link*(e: expr): expr = + ## generates the HTML ``link`` element. + result = xmlCheckedTag(e, "link", "href charset hreflang type rel rev media" & + commonAttr, "", true) + +macro map*(e: expr): expr = + ## generates the HTML ``map`` element. + result = xmlCheckedTag(e, "map", "class title" & eventAttr, "id", false) + +macro meta*(e: expr): expr = + ## generates the HTML ``meta`` element. + result = xmlCheckedTag(e, "meta", "name http-equiv scheme", "content", true) + +macro noscript*(e: expr): expr = + ## generates the HTML ``noscript`` element. + result = xmlCheckedTag(e, "noscript", commonAttr) + +macro `object`*(e: expr): expr = + ## generates the HTML ``object`` element. + result = xmlCheckedTag(e, "object", "classid data codebase declare type " & + "codetype archive standby width height name tabindex" & commonAttr) + +macro ol*(e: expr): expr = + ## generates the HTML ``ol`` element. + result = xmlCheckedTag(e, "ol", commonAttr) + +macro optgroup*(e: expr): expr = + ## generates the HTML ``optgroup`` element. + result = xmlCheckedTag(e, "optgroup", "disabled" & commonAttr, "label", false) + +macro option*(e: expr): expr = + ## generates the HTML ``option`` element. + result = xmlCheckedTag(e, "option", "selected value" & commonAttr) + +macro p*(e: expr): expr = + ## generates the HTML ``p`` element. + result = xmlCheckedTag(e, "p", commonAttr) + +macro param*(e: expr): expr = + ## generates the HTML ``param`` element. + result = xmlCheckedTag(e, "param", "value id type valuetype", "name", true) + +macro pre*(e: expr): expr = + ## generates the HTML ``pre`` element. + result = xmlCheckedTag(e, "pre", commonAttr) + +macro q*(e: expr): expr = + ## generates the HTML ``q`` element. + result = xmlCheckedTag(e, "q", "cite" & commonAttr) + +macro samp*(e: expr): expr = + ## generates the HTML ``samp`` element. + result = xmlCheckedTag(e, "samp", commonAttr) + +macro script*(e: expr): expr = + ## generates the HTML ``script`` element. + result = xmlCheckedTag(e, "script", "src charset defer", "type", false) + +macro select*(e: expr): expr = + ## generates the HTML ``select`` element. + result = xmlCheckedTag(e, "select", "name size multiple disabled tabindex" & + commonAttr) + +macro small*(e: expr): expr = + ## generates the HTML ``small`` element. + result = xmlCheckedTag(e, "small", commonAttr) + +macro span*(e: expr): expr = + ## generates the HTML ``span`` element. + result = xmlCheckedTag(e, "span", commonAttr) + +macro strong*(e: expr): expr = + ## generates the HTML ``strong`` element. + result = xmlCheckedTag(e, "strong", commonAttr) + +macro style*(e: expr): expr = + ## generates the HTML ``style`` element. + result = xmlCheckedTag(e, "style", "media title", "type") + +macro sub*(e: expr): expr = + ## generates the HTML ``sub`` element. + result = xmlCheckedTag(e, "sub", commonAttr) + +macro sup*(e: expr): expr = + ## generates the HTML ``sup`` element. + result = xmlCheckedTag(e, "sup", commonAttr) + +macro table*(e: expr): expr = + ## generates the HTML ``table`` element. + result = xmlCheckedTag(e, "table", "summary border cellpadding cellspacing" & + " frame rules width" & commonAttr) + +macro tbody*(e: expr): expr = + ## generates the HTML ``tbody`` element. + result = xmlCheckedTag(e, "tbody", "align valign" & commonAttr) + +macro td*(e: expr): expr = + ## generates the HTML ``td`` element. + result = xmlCheckedTag(e, "td", "colspan rowspan abbr axis headers scope" & + " align valign" & commonAttr) + +macro textarea*(e: expr): expr = + ## generates the HTML ``textarea`` element. + result = xmlCheckedTag(e, "textarea", " name disabled readonly accesskey" & + " tabindex" & commonAttr, "rows cols", false) + +macro tfoot*(e: expr): expr = + ## generates the HTML ``tfoot`` element. + result = xmlCheckedTag(e, "tfoot", "align valign" & commonAttr) + +macro th*(e: expr): expr = + ## generates the HTML ``th`` element. + result = xmlCheckedTag(e, "th", "colspan rowspan abbr axis headers scope" & + " align valign" & commonAttr) + +macro thead*(e: expr): expr = + ## generates the HTML ``thead`` element. + result = xmlCheckedTag(e, "thead", "align valign" & commonAttr) + +macro title*(e: expr): expr = + ## generates the HTML ``title`` element. + result = xmlCheckedTag(e, "title") + +macro tr*(e: expr): expr = + ## generates the HTML ``tr`` element. + result = xmlCheckedTag(e, "tr", "align valign" & commonAttr) + +macro tt*(e: expr): expr = + ## generates the HTML ``tt`` element. + result = xmlCheckedTag(e, "tt", commonAttr) + +macro ul*(e: expr): expr = + ## generates the HTML ``ul`` element. + result = xmlCheckedTag(e, "ul", commonAttr) + +macro `var`*(e: expr): expr = + ## generates the HTML ``var`` element. + result = xmlCheckedTag(e, "var", commonAttr) + +when isMainModule: + var nim = "Nimrod" + echo h1(a(href="http://force7.de/nimrod", nim)) + |