diff options
author | Araq <rumpf_a@web.de> | 2011-04-12 01:13:42 +0200 |
---|---|---|
committer | Araq <rumpf_a@web.de> | 2011-04-12 01:13:42 +0200 |
commit | cd292568d775d55d9abb51e962882ecda12c03a9 (patch) | |
tree | 85451f0e1f17dc0463350915f12bdd0a82a73455 /nimlib/pure | |
parent | 46c41e43690cba9bc1caff6a994bb6915df8a1b7 (diff) | |
download | Nim-cd292568d775d55d9abb51e962882ecda12c03a9.tar.gz |
big repo cleanup
Diffstat (limited to 'nimlib/pure')
28 files changed, 0 insertions, 11926 deletions
diff --git a/nimlib/pure/cgi.nim b/nimlib/pure/cgi.nim deleted file mode 100755 index baae244e7..000000000 --- a/nimlib/pure/cgi.nim +++ /dev/null @@ -1,375 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2009 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module implements helper procs for CGI applictions. Example: -## -## .. code-block:: Nimrod -## -## import strtabs, cgi -## -## # Fill the values when debugging: -## when debug: -## setTestData("name", "Klaus", "password", "123456") -## # read the data into `myData` -## var myData = readData() -## # check that the data's variable names are "name" or "passwort" -## validateData(myData, "name", "password") -## # start generating content: -## writeContentType() -## # generate content: -## write(stdout, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\n") -## write(stdout, "<html><head><title>Test</title></head><body>\n") -## writeln(stdout, "your name: " & myData["name"]) -## writeln(stdout, "your password: " & myData["password"]) -## writeln(stdout, "</body></html>") - -import strutils, os, strtabs - -proc URLencode*(s: string): string = - ## Encodes a value to be HTTP safe: This means that characters in the set - ## ``{'A'..'Z', 'a'..'z', '0'..'9', '_'}`` are carried over to the result, - ## a space is converted to ``'+'`` and every other character is encoded as - ## ``'%xx'`` where ``xx`` denotes its hexadecimal value. - result = "" - for i in 0..s.len-1: - case s[i] - of 'a'..'z', 'A'..'Z', '0'..'9', '_': add(result, s[i]) - of ' ': add(result, '+') - else: - add(result, '%') - add(result, toHex(ord(s[i]), 2)) - -proc handleHexChar(c: char, x: var int) {.inline.} = - case c - of '0'..'9': x = (x shl 4) or (ord(c) - ord('0')) - of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10) - of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10) - else: assert(false) - -proc URLdecode*(s: string): string = - ## Decodes a value from its HTTP representation: This means that a ``'+'`` - ## is converted to a space, ``'%xx'`` (where ``xx`` denotes a hexadecimal - ## value) is converted to the character with ordinal number ``xx``, and - ## and every other character is carried over. - result = "" - var i = 0 - while i < s.len: - case s[i] - of '%': - var x = 0 - handleHexChar(s[i+1], x) - handleHexChar(s[i+2], x) - inc(i, 2) - add(result, chr(x)) - of '+': add(result, ' ') - else: add(result, s[i]) - inc(i) - -proc addXmlChar(dest: var string, c: Char) {.inline.} = - case c - of '&': add(dest, "&") - of '<': add(dest, "<") - of '>': add(dest, ">") - of '\"': add(dest, """) - else: add(dest, c) - -proc XMLencode*(s: string): string = - ## Encodes a value to be XML safe: - ## * ``"`` is replaced by ``"`` - ## * ``<`` is replaced by ``<`` - ## * ``>`` is replaced by ``>`` - ## * ``&`` is replaced by ``&`` - ## * every other character is carried over. - result = "" - for i in 0..len(s)-1: addXmlChar(result, s[i]) - -type - ECgi* = object of EIO ## the exception that is raised, if a CGI error occurs - TRequestMethod* = enum ## the used request method - methodNone, ## no REQUEST_METHOD environment variable - methodPost, ## query uses the POST method - methodGet ## query uses the GET method - -proc cgiError*(msg: string) {.noreturn.} = - ## raises an ECgi exception with message `msg`. - var e: ref ECgi - new(e) - e.msg = msg - raise e - -proc getEncodedData(allowedMethods: set[TRequestMethod]): string = - case getenv("REQUEST_METHOD") - of "POST": - if methodPost notin allowedMethods: - cgiError("'REQUEST_METHOD' 'POST' is not supported") - var L = parseInt(getenv("CONTENT_LENGTH")) - result = newString(L) - if readBuffer(stdin, addr(result[0]), L) != L: - cgiError("cannot read from stdin") - of "GET": - if methodGet notin allowedMethods: - cgiError("'REQUEST_METHOD' 'GET' is not supported") - result = getenv("QUERY_STRING") - else: - if methodNone notin allowedMethods: - cgiError("'REQUEST_METHOD' must be 'POST' or 'GET'") - -iterator decodeData*(allowedMethods: set[TRequestMethod] = - {methodNone, methodPost, methodGet}): tuple[key, value: string] = - ## Reads and decodes CGI data and yields the (name, value) pairs the - ## data consists of. If the client does not use a method listed in the - ## `allowedMethods` set, an `ECgi` exception is raised. - var enc = getEncodedData(allowedMethods) - if not isNil(enc): - # decode everything in one pass: - var i = 0 - var name = "" - var value = "" - while enc[i] != '\0': - setLen(name, 0) # reuse memory - while true: - case enc[i] - of '\0': break - of '%': - var x = 0 - handleHexChar(enc[i+1], x) - handleHexChar(enc[i+2], x) - inc(i, 2) - add(name, chr(x)) - of '+': add(name, ' ') - of '=', '&': break - else: add(name, enc[i]) - inc(i) - if enc[i] != '=': cgiError("'=' expected") - inc(i) # skip '=' - setLen(value, 0) # reuse memory - while true: - case enc[i] - of '%': - var x = 0 - handleHexChar(enc[i+1], x) - handleHexChar(enc[i+2], x) - inc(i, 2) - add(value, chr(x)) - of '+': add(value, ' ') - of '&', '\0': break - else: add(value, enc[i]) - inc(i) - yield (name, value) - if enc[i] == '&': inc(i) - elif enc[i] == '\0': break - else: cgiError("'&' expected") - -proc readData*(allowedMethods: set[TRequestMethod] = - {methodNone, methodPost, methodGet}): PStringTable = - ## Read CGI data. If the client does not use a method listed in the - ## `allowedMethods` set, an `ECgi` exception is raised. - result = newStringTable() - for name, value in decodeData(allowedMethods): - result[name] = value - -proc validateData*(data: PStringTable, validKeys: openarray[string]) = - ## validates data; raises `ECgi` if this fails. This checks that each variable - ## name of the CGI `data` occurs in the `validKeys` array. - for key, val in pairs(data): - if find(validKeys, key) < 0: - cgiError("unknown variable name: " & key) - -proc getContentLength*(): string = - ## returns contents of the ``CONTENT_LENGTH`` environment variable - return getenv("CONTENT_LENGTH") - -proc getContentType*(): string = - ## returns contents of the ``CONTENT_TYPE`` environment variable - return getenv("CONTENT_Type") - -proc getDocumentRoot*(): string = - ## returns contents of the ``DOCUMENT_ROOT`` environment variable - return getenv("DOCUMENT_ROOT") - -proc getGatewayInterface*(): string = - ## returns contents of the ``GATEWAY_INTERFACE`` environment variable - return getenv("GATEWAY_INTERFACE") - -proc getHttpAccept*(): string = - ## returns contents of the ``HTTP_ACCEPT`` environment variable - return getenv("HTTP_ACCEPT") - -proc getHttpAcceptCharset*(): string = - ## returns contents of the ``HTTP_ACCEPT_CHARSET`` environment variable - return getenv("HTTP_ACCEPT_CHARSET") - -proc getHttpAcceptEncoding*(): string = - ## returns contents of the ``HTTP_ACCEPT_ENCODING`` environment variable - return getenv("HTTP_ACCEPT_ENCODING") - -proc getHttpAcceptLanguage*(): string = - ## returns contents of the ``HTTP_ACCEPT_LANGUAGE`` environment variable - return getenv("HTTP_ACCEPT_LANGUAGE") - -proc getHttpConnection*(): string = - ## returns contents of the ``HTTP_CONNECTION`` environment variable - return getenv("HTTP_CONNECTION") - -proc getHttpCookie*(): string = - ## returns contents of the ``HTTP_COOKIE`` environment variable - return getenv("HTTP_COOKIE") - -proc getHttpHost*(): string = - ## returns contents of the ``HTTP_HOST`` environment variable - return getenv("HTTP_HOST") - -proc getHttpReferer*(): string = - ## returns contents of the ``HTTP_REFERER`` environment variable - return getenv("HTTP_REFERER") - -proc getHttpUserAgent*(): string = - ## returns contents of the ``HTTP_USER_AGENT`` environment variable - return getenv("HTTP_USER_AGENT") - -proc getPathInfo*(): string = - ## returns contents of the ``PATH_INFO`` environment variable - return getenv("PATH_INFO") - -proc getPathTranslated*(): string = - ## returns contents of the ``PATH_TRANSLATED`` environment variable - return getenv("PATH_TRANSLATED") - -proc getQueryString*(): string = - ## returns contents of the ``QUERY_STRING`` environment variable - return getenv("QUERY_STRING") - -proc getRemoteAddr*(): string = - ## returns contents of the ``REMOTE_ADDR`` environment variable - return getenv("REMOTE_ADDR") - -proc getRemoteHost*(): string = - ## returns contents of the ``REMOTE_HOST`` environment variable - return getenv("REMOTE_HOST") - -proc getRemoteIdent*(): string = - ## returns contents of the ``REMOTE_IDENT`` environment variable - return getenv("REMOTE_IDENT") - -proc getRemotePort*(): string = - ## returns contents of the ``REMOTE_PORT`` environment variable - return getenv("REMOTE_PORT") - -proc getRemoteUser*(): string = - ## returns contents of the ``REMOTE_USER`` environment variable - return getenv("REMOTE_USER") - -proc getRequestMethod*(): string = - ## returns contents of the ``REQUEST_METHOD`` environment variable - return getenv("REQUEST_METHOD") - -proc getRequestURI*(): string = - ## returns contents of the ``REQUEST_URI`` environment variable - return getenv("REQUEST_URI") - -proc getScriptFilename*(): string = - ## returns contents of the ``SCRIPT_FILENAME`` environment variable - return getenv("SCRIPT_FILENAME") - -proc getScriptName*(): string = - ## returns contents of the ``SCRIPT_NAME`` environment variable - return getenv("SCRIPT_NAME") - -proc getServerAddr*(): string = - ## returns contents of the ``SERVER_ADDR`` environment variable - return getenv("SERVER_ADDR") - -proc getServerAdmin*(): string = - ## returns contents of the ``SERVER_ADMIN`` environment variable - return getenv("SERVER_ADMIN") - -proc getServerName*(): string = - ## returns contents of the ``SERVER_NAME`` environment variable - return getenv("SERVER_NAME") - -proc getServerPort*(): string = - ## returns contents of the ``SERVER_PORT`` environment variable - return getenv("SERVER_PORT") - -proc getServerProtocol*(): string = - ## returns contents of the ``SERVER_PROTOCOL`` environment variable - return getenv("SERVER_PROTOCOL") - -proc getServerSignature*(): string = - ## returns contents of the ``SERVER_SIGNATURE`` environment variable - return getenv("SERVER_SIGNATURE") - -proc getServerSoftware*(): string = - ## returns contents of the ``SERVER_SOFTWARE`` environment variable - return getenv("SERVER_SOFTWARE") - -proc setTestData*(keysvalues: openarray[string]) = - ## fills the appropriate environment variables to test your CGI application. - ## This can only simulate the 'GET' request method. `keysvalues` should - ## provide embedded (name, value)-pairs. Example: - ## - ## .. code-block:: Nimrod - ## setTestData("name", "Hanz", "password", "12345") - putenv("REQUEST_METHOD", "GET") - var i = 0 - var query = "" - while i < keysvalues.len: - add(query, URLencode(keysvalues[i])) - add(query, '=') - add(query, URLencode(keysvalues[i+1])) - add(query, '&') - inc(i, 2) - putenv("QUERY_STRING", query) - -proc writeContentType*() = - ## call this before starting to send your HTML data to `stdout`. This - ## implements this part of the CGI protocol: - ## - ## .. code-block:: Nimrod - ## write(stdout, "Content-type: text/html\n\n") - ## - ## It also modifies the debug stack traces so that they contain - ## ``<br />`` and are easily readable in a browser. - write(stdout, "Content-type: text/html\n\n") - system.stackTraceNewLine = "<br />\n" - -proc setCookie*(name, value: string) = - ## Sets a cookie. - write(stdout, "Set-Cookie: ", name, "=", value, "\n") - -var - cookies: PStringTable = nil - -proc parseCookies(s: string): PStringTable = - result = newStringTable(modeCaseInsensitive) - var i = 0 - while true: - while s[i] == ' ' or s[i] == '\t': inc(i) - var keystart = i - while s[i] != '=' and s[i] != '\0': inc(i) - var keyend = i-1 - if s[i] == '\0': break - inc(i) # skip '=' - var valstart = i - while s[i] != ';' and s[i] != '\0': inc(i) - result[copy(s, keystart, keyend)] = copy(s, valstart, i-1) - if s[i] == '\0': break - inc(i) # skip ';' - -proc getCookie*(name: string): string = - ## Gets a cookie. If no cookie of `name` exists, "" is returned. - if cookies == nil: cookies = parseCookies(getHttpCookie()) - result = cookies[name] - -proc existsCookie*(name: string): bool = - ## Checks if a cookie of `name` exists. - if cookies == nil: cookies = parseCookies(getHttpCookie()) - result = hasKey(cookies) - - diff --git a/nimlib/pure/complex.nim b/nimlib/pure/complex.nim deleted file mode 100755 index f50ff4bd0..000000000 --- a/nimlib/pure/complex.nim +++ /dev/null @@ -1,106 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2006 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - - - -## This module implements complex numbers. - -{.push checks:off, line_dir:off, stack_trace:off, debugger:off.} -# the user does not want to trace a part -# of the standard library! - -import - math - -type - TComplex* = tuple[re, im: float] - ## a complex number, consisting of a real and an imaginary part - -proc `==` *(x, y: TComplex): bool = - ## Compare two complex numbers `x` and `y` for equality. - result = x.re == y.re and x.im == y.im - -proc `+` *(x, y: TComplex): TComplex = - ## Add two complex numbers. - result.re = x.re + y.re - result.im = x.im + y.im - -proc `-` *(x, y: TComplex): TComplex = - ## Subtract two complex numbers. - result.re = x.re - y.re - result.im = x.im - y.im - -proc `-` *(z: TComplex): TComplex = - ## Unary minus for complex numbers. - result.re = -z.re - result.im = -z.im - -proc `/` *(x, y: TComplex): TComplex = - ## Divide `x` by `y`. - var - r, den: float - if abs(y.re) < abs(y.im): - r = y.re / y.im - den = y.im + r * y.re - result.re = (x.re * r + x.im) / den - result.im = (x.im * r - x.re) / den - else: - r = y.im / y.re - den = y.re + r * y.im - result.re = (x.re + r * x.im) / den - result.im = (x.im - r * x.re) / den - -proc `*` *(x, y: TComplex): TComplex = - ## Multiply `x` with `y`. - result.re = x.re * y.re - x.im * y.im - result.im = x.im * y.re + x.re * y.im - -proc abs*(z: TComplex): float = - ## Return the distance from (0,0) to `z`. - - # optimized by checking special cases (sqrt is expensive) - var x, y, temp: float - - x = abs(z.re) - y = abs(z.im) - if x == 0.0: - result = y - elif y == 0.0: - result = x - elif x > y: - temp = y / x - result = x * sqrt(1.0 + temp * temp) - else: - temp = x / y - result = y * sqrt(1.0 + temp * temp) - -proc sqrt*(z: TComplex): TComplex = - ## Square root for a complex number `z`. - var x, y, w, r: float - - if z.re == 0.0 and z.im == 0.0: - result = z - else: - x = abs(z.re) - y = abs(z.im) - if x >= y: - r = y / x - w = sqrt(x) * sqrt(0.5 * (1.0 + sqrt(1.0 + r * r))) - else: - r = x / y - w = sqrt(y) * sqrt(0.5 * (r + sqrt(1.0 + r * r))) - if z.re >= 0.0: - result.re = w - result.im = z.im / (w * 2) - else: - if z.im >= 0.0: result.im = w - else: result.im = -w - result.re = z.im / (c.im + c.im) - -{.pop.} diff --git a/nimlib/pure/dynlib.nim b/nimlib/pure/dynlib.nim deleted file mode 100755 index 592073e3d..000000000 --- a/nimlib/pure/dynlib.nim +++ /dev/null @@ -1,84 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2009 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module implements the ability to access symbols from shared -## libraries. On POSIX this uses the ``dlsym`` mechanism, on -## Windows ``LoadLibrary``. - -type - TLibHandle* = pointer ## a handle to a dynamically loaded library - -proc LoadLib*(path: string): TLibHandle - ## loads a library from `path`. Returns nil if the library could not - ## be loaded. - -proc UnloadLib*(lib: TLibHandle) - ## unloads the library `lib` - -proc symAddr*(lib: TLibHandle, name: string): pointer - ## retrieves the address of a procedure/variable from `lib`. Returns nil - ## if the symbol could not be found. - -proc checkedSymAddr*(lib: TLibHandle, name: string): pointer = - ## retrieves the address of a procedure/variable from `lib`. Raises - ## `EInvalidLibrary` if the symbol could not be found. - result = symAddr(lib, name) - if result == nil: - var e: ref EInvalidLibrary - new(e) - e.msg = "could not find symbol: " & name - raise e - -when defined(posix): - # - # ========================================================================= - # This is an implementation based on the dlfcn interface. - # The dlfcn interface is available in Linux, SunOS, Solaris, IRIX, FreeBSD, - # NetBSD, AIX 4.2, HPUX 11, and probably most other Unix flavors, at least - # as an emulation layer on top of native functions. - # ========================================================================= - # - var - RTLD_NOW {.importc: "RTLD_NOW", header: "<dlfcn.h>".}: int - - proc dlclose(lib: TLibHandle) {.importc, header: "<dlfcn.h>".} - proc dlopen(path: CString, mode: int): TLibHandle {. - importc, header: "<dlfcn.h>".} - proc dlsym(lib: TLibHandle, name: cstring): pointer {. - importc, header: "<dlfcn.h>".} - - proc LoadLib(path: string): TLibHandle = return dlopen(path, RTLD_NOW) - proc UnloadLib(lib: TLibHandle) = dlclose(lib) - proc symAddr(lib: TLibHandle, name: string): pointer = - return dlsym(lib, name) - -elif defined(windows) or defined(dos): - # - # ======================================================================= - # Native Windows Implementation - # ======================================================================= - # - type - THINSTANCE {.importc: "HINSTANCE".} = pointer - - proc FreeLibrary(lib: THINSTANCE) {.importc, header: "<windows.h>", stdcall.} - proc winLoadLibrary(path: cstring): THINSTANCE {. - importc: "LoadLibraryA", header: "<windows.h>", stdcall.} - proc GetProcAddress(lib: THINSTANCE, name: cstring): pointer {. - importc: "GetProcAddress", header: "<windows.h>", stdcall.} - - proc LoadLib(path: string): TLibHandle = - result = cast[TLibHandle](winLoadLibrary(path)) - proc UnloadLib(lib: TLibHandle) = FreeLibrary(cast[THINSTANCE](lib)) - - proc symAddr(lib: TLibHandle, name: string): pointer = - result = GetProcAddress(cast[THINSTANCE](lib), name) - -else: - {.error: "no implementation for dynlib".} diff --git a/nimlib/pure/hashes.nim b/nimlib/pure/hashes.nim deleted file mode 100755 index 1593119bd..000000000 --- a/nimlib/pure/hashes.nim +++ /dev/null @@ -1,97 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2008 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module implements efficient computations of hash values for diverse -## Nimrod types. - -import - strutils - -type - THash* = int ## a hash value; hash tables using these values should - ## always have a size of a power of two and can use the ``and`` - ## operator instead of ``mod`` for truncation of the hash value. - -proc concHash(h: THash, val: int): THash {.inline.} = - result = h +% val - result = result +% result shl 10 - result = result xor (result shr 6) - -proc finishHash(h: THash): THash {.inline.} = - result = h +% h shl 3 - result = result xor (result shr 11) - result = result +% result shl 15 - -proc hashData*(Data: Pointer, Size: int): THash = - ## hashes an array of bytes of size `size` - var - h: THash - p: cstring - i, s: int - h = 0 - p = cast[cstring](Data) - i = 0 - s = size - while s > 0: - h = concHash(h, ord(p[i])) - Inc(i) - Dec(s) - result = finishHash(h) - -proc hash*(x: Pointer): THash {.inline.} = - ## efficient hashing of pointers - result = (cast[THash](x)) shr 3 # skip the alignment - -proc hash*(x: int): THash {.inline.} = - ## efficient hashing of integers - result = x - -proc hash*(x: int64): THash {.inline.} = - ## efficient hashing of integers - result = toU32(x) - -proc hash*(x: char): THash {.inline.} = - ## efficient hashing of characters - result = ord(x) - -proc hash*(x: string): THash = - ## efficient hashing of strings - var h: THash - h = 0 - for i in 0..x.len-1: - h = concHash(h, ord(x[i])) - result = finishHash(h) - -proc hashIgnoreStyle*(x: string): THash = - ## efficient hashing of strings; style is ignored - var - h: THash - c: Char - h = 0 - for i in 0..x.len-1: - c = x[i] - if c == '_': - continue # skip _ - if c in {'A'..'Z'}: - c = chr(ord(c) + (ord('a') - ord('A'))) # toLower() - h = concHash(h, ord(c)) - result = finishHash(h) - -proc hashIgnoreCase*(x: string): THash = - ## efficient hashing of strings; case is ignored - var - h: THash - c: Char - h = 0 - for i in 0..x.len-1: - c = x[i] - if c in {'A'..'Z'}: - c = chr(ord(c) + (ord('a') - ord('A'))) # toLower() - h = concHash(h, ord(c)) - result = finishHash(h) diff --git a/nimlib/pure/hashtabs.nim b/nimlib/pure/hashtabs.nim deleted file mode 100755 index 68d19d63b..000000000 --- a/nimlib/pure/hashtabs.nim +++ /dev/null @@ -1,163 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2009 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## The ``hashtabs`` module implements an efficient generic hash -## table/dictionary data type. - -import - hashes - -const - growthFactor = 2 - startSize = 8 - sham = sizeof(THash)*8-2 # shift amount - mask = 0b11 shl sham - usedSlot = 0b10 shl sham - delSlot = 0b01 shl sham - emptySlot = 0 - -type - TTable*[TKey, TValue] = object - counter: int - data: seq[tuple[key: TKey, val: TValue, h: THash]] - -proc init*(t: var TTable, size = startSize) = - t.counter = 0 - newSeq(t.data, size) - -proc markUsed(h: THash): THash {.inline.} = - return h and not mask or usedSlot - -proc len*(t: TTable): int {.inline.} = - ## returns the number of keys in `t`. - result = t.counter - -proc mustRehash(length, counter: int): bool = - assert(length > counter) - result = (length * 2 < counter * 3) or (length - counter < 4) - -proc nextTry(h, maxHash: THash): THash {.inline.} = - result = ((5 * h) + 1) and maxHash - -template eq(a, b: expr): expr = a == b - -proc rawGet(t: TTable, key: TKey, fullhash: THash): int = - var h = fullhash and high(t.data) - while (t.data[h].h and mask) != 0: - # If it is a deleted entry, the comparison with ``markUsed(fullhash)`` - # fails, so there is no need to check for this explicitely. - if t.data[h].h == markUsed(fullhash) and eq(t.data[h].key, key): return h - h = nextTry(h, high(t.data)) - result = - 1 - -proc `[]`*(t: TTable, key: TKey): TValue = - ## retrieves the value at ``t[key]``. If `key` is not in `t`, - ## `EInvalidValue` is raised. - var index = rawGet(t, key, hash(key)) - if index >= 0: result = t.data[index].val - else: - var e: ref EInvalidValue - new(e) - e.msg = "invalid key: " & $key - raise e - -proc hasKey*(t: TTable, key: TKey): bool = - ## returns true iff `key` is in the table `t`. - result = rawGet(t, key) >= 0 - -proc rawInsert[TKey, TValue]( - data: var seq[tuple[key: TKey, val: TValue, h: THash]], - tup: tuple[key: TKey, val: TValue, h: THash]) = - var h = tup.h and high(data) - while (data[h].h and mask) == usedSlot: h = nextTry(h, high(data)) - data[h] = tup - -proc enlarge(t: var TTable) = - var n: seq[tuple[key: TKey, val: TValue, h: THash]] - newSeq(n, len(t.data) * growthFactor) - for i in 0..high(t.data): - if (t.data[i].h and mask) == usedSlot: rawInsert(n, t.data[i]) - swap(t.data, n) - -proc `[]=`*(t: var TTable, key: TKey, val: TValue) = - ## puts a (key, value)-pair into `t`. - var fullhash = hash(key) - var index = rawGet(t, key, fullhash) - if index >= 0: - t.data[index].val = val - else: - if mustRehash(len(t.data), t.counter): enlarge(t) - rawInsert(t.data, (key, val, markUsed(fullhash))) - inc(t.counter) - -proc add*(t: var TTable, key: TKey, val: TValue) = - ## puts a (key, value)-pair into `t`, but does not check if key already - ## exists. - if mustRehash(len(t.data), t.counter): enlarge(t) - rawInsert(t.data, (key, val, markUsed(hash(key)))) - inc(t.counter) - -proc del*(t: var TTable, key: TKey) = - ## deletes a (key, val)-pair in `t`. - var index = rawGet(t, key) - if index >= 0: - t.data[index].h = delSlot - -proc delAll*(t: var TTable, key: TKey) = - ## deletes all (key, val)-pairs in `t`. - while true: - var index = rawGet(t, key) - if index < 0: break - t.data[index].h = delSlot - -iterator pairs*(t: TTable): tuple[key: TKey, value: TValue] = - ## iterates over any (key, value) pair in the table `t`. - for h in 0..high(t.data): - if (t.data[h].h and mask) == usedSlot: - yield (t.data[h].key, t.data[h].val) - -iterator keys*(t: TTable): TKey = - ## iterate over any key in the table `t`. If key occurs multiple times, it - ## is yielded multiple times. - for h in 0..high(t.data): - if (t.data[h].h and mask) == usedSlot: - yield t.data[h].key - -iterator values*(t: TTable): TValue = - ## iterate over any value in the table `t`. - for h in 0..high(t.data): - if (t.data[h].h and mask) == usedSlot: - yield t.data[h].val - -iterator values*(t: TTable, key: TKey): TValue = - ## iterate over any value associated with `key` in `t`. - var fullhash = hash(key) - var h = fullhash and high(t.data) - while (t.data[h].h and mask) != 0: - # If it is a deleted entry, the comparison with ``markUsed(fullhash)`` - # fails, so there is no need to check for this explicitely. - if t.data[h].h == markUsed(fullhash) and eq(t.data[h].key, key): - yield t.data[h].val - h = nextTry(h, high(t.data)) - -proc `$`*[KeyToStr=`$`, ValueToStr=`$`](t: TTable): string = - ## turns the table into its string representation. `$` must be available - ## for TKey and TValue for this to work. - if t.len == 0: - result = "{:}" - else: - result = "{" - var i = 0 - for k, v in pairs(t): - if i > 0: add(result, ", ") - add(result, KeyToStr(k)) - add(result, ": ") - add(result, ValueToStr(v)) - inc(i) - add(result, "}") diff --git a/nimlib/pure/lexbase.nim b/nimlib/pure/lexbase.nim deleted file mode 100755 index bb207e92a..000000000 --- a/nimlib/pure/lexbase.nim +++ /dev/null @@ -1,166 +0,0 @@ -# -# -# The Nimrod Compiler -# (c) Copyright 2009 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module implements a base object of a lexer with efficient buffer -## handling. Only at line endings checks are necessary if the buffer -## needs refilling. - -import - strutils, streams - -const - EndOfFile* = '\0' ## end of file marker - NewLines* = {'\c', '\L'} - -# Buffer handling: -# buf: -# "Example Text\n ha!" bufLen = 17 -# ^pos = 0 ^ sentinel = 12 -# - -type - TBaseLexer* = object of TObject ## the base lexer. Inherit your lexer from - ## this object. - bufpos*: int ## the current position within the buffer - buf*: cstring ## the buffer itself - bufLen*: int ## length of buffer in characters - input: PStream ## the input stream - LineNumber*: int ## the current line number - sentinel: int - lineStart: int # index of last line start in buffer - fileOpened: bool - -proc open*(L: var TBaseLexer, input: PStream, bufLen: int = 8192) - ## inits the TBaseLexer with a stream to read from - -proc close*(L: var TBaseLexer) - ## closes the base lexer. This closes `L`'s associated stream too. - -proc getCurrentLine*(L: TBaseLexer, marker: bool = true): string - ## retrieves the current line. - -proc getColNumber*(L: TBaseLexer, pos: int): int - ## retrieves the current column. - -proc HandleCR*(L: var TBaseLexer, pos: int): int - ## Call this if you scanned over '\c' in the buffer; it returns the the - ## position to continue the scanning from. `pos` must be the position - ## of the '\c'. -proc HandleLF*(L: var TBaseLexer, pos: int): int - ## Call this if you scanned over '\L' in the buffer; it returns the the - ## position to continue the scanning from. `pos` must be the position - ## of the '\L'. - -# implementation - -const - chrSize = sizeof(char) - -proc close(L: var TBaseLexer) = - dealloc(L.buf) - L.input.close(L.input) - -proc FillBuffer(L: var TBaseLexer) = - var - charsRead, toCopy, s: int # all are in characters, - # not bytes (in case this - # is not the same) - oldBufLen: int - # we know here that pos == L.sentinel, but not if this proc - # is called the first time by initBaseLexer() - assert(L.sentinel < L.bufLen) - toCopy = L.BufLen - L.sentinel - 1 - assert(toCopy >= 0) - if toCopy > 0: - MoveMem(L.buf, addr(L.buf[L.sentinel + 1]), toCopy * chrSize) # "moveMem" handles overlapping regions - charsRead = L.input.readData(L.input, addr(L.buf[toCopy]), - (L.sentinel + 1) * chrSize) div chrSize - s = toCopy + charsRead - if charsRead < L.sentinel + 1: - L.buf[s] = EndOfFile # set end marker - L.sentinel = s - else: - # compute sentinel: - dec(s) # BUGFIX (valgrind) - while true: - assert(s < L.bufLen) - while (s >= 0) and not (L.buf[s] in NewLines): Dec(s) - if s >= 0: - # we found an appropriate character for a sentinel: - L.sentinel = s - break - else: - # rather than to give up here because the line is too long, - # double the buffer's size and try again: - oldBufLen = L.BufLen - L.bufLen = L.BufLen * 2 - L.buf = cast[cstring](realloc(L.buf, L.bufLen * chrSize)) - assert(L.bufLen - oldBuflen == oldBufLen) - charsRead = L.input.ReadData(L.input, addr(L.buf[oldBufLen]), - oldBufLen * chrSize) div chrSize - if charsRead < oldBufLen: - L.buf[oldBufLen + charsRead] = EndOfFile - L.sentinel = oldBufLen + charsRead - break - s = L.bufLen - 1 - -proc fillBaseLexer(L: var TBaseLexer, pos: int): int = - assert(pos <= L.sentinel) - if pos < L.sentinel: - result = pos + 1 # nothing to do - else: - fillBuffer(L) - L.bufpos = 0 # XXX: is this really correct? - result = 0 - L.lineStart = result - -proc HandleCR(L: var TBaseLexer, pos: int): int = - assert(L.buf[pos] == '\c') - inc(L.linenumber) - result = fillBaseLexer(L, pos) - if L.buf[result] == '\L': - result = fillBaseLexer(L, result) - -proc HandleLF(L: var TBaseLexer, pos: int): int = - assert(L.buf[pos] == '\L') - inc(L.linenumber) - result = fillBaseLexer(L, pos) #L.lastNL := result-1; // BUGFIX: was: result; - -proc skip_UTF_8_BOM(L: var TBaseLexer) = - if (L.buf[0] == '\xEF') and (L.buf[1] == '\xBB') and (L.buf[2] == '\xBF'): - inc(L.bufpos, 3) - inc(L.lineStart, 3) - -proc open(L: var TBaseLexer, input: PStream, bufLen: int = 8192) = - assert(bufLen > 0) - assert(input != nil) - L.input = input - L.bufpos = 0 - L.bufLen = bufLen - L.buf = cast[cstring](alloc(bufLen * chrSize)) - L.sentinel = bufLen - 1 - L.lineStart = 0 - L.linenumber = 1 # lines start at 1 - fillBuffer(L) - skip_UTF_8_BOM(L) - -proc getColNumber(L: TBaseLexer, pos: int): int = - result = abs(pos - L.lineStart) - -proc getCurrentLine(L: TBaseLexer, marker: bool = true): string = - var i: int - result = "" - i = L.lineStart - while not (L.buf[i] in {'\c', '\L', EndOfFile}): - add(result, L.buf[i]) - inc(i) - add(result, "\n") - if marker: - add(result, RepeatChar(getColNumber(L, L.bufpos)) & "^\n") - diff --git a/nimlib/pure/logging.nim b/nimlib/pure/logging.nim deleted file mode 100755 index 6df39f50b..000000000 --- a/nimlib/pure/logging.nim +++ /dev/null @@ -1,146 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2009 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module implements a simple logger. It is based on the following design: -## * Runtime log formating is a bug: Sooner or later ever log file is parsed. -## * Keep it simple: If this library does not fullfill your needs, write your -## own. Trying to support every logging feature just leads to bloat. -## -## Format is:: -## -## DEBUG|INFO|... (2009-11-02 00:00:00)? (Component: )? Message -## -## - -type - TLevel* = enum ## logging level - lvlAll, ## all levels active - lvlDebug, ## debug level (and any above) active - lvlInfo, ## info level (and any above) active - lvlWarn, ## warn level (and any above) active - lvlError, ## error level (and any above) active - lvlFatal ## fatal level (and any above) active - -const - LevelNames*: array [TLevel, string] = [ - "DEBUG", "DEBUG", "INFO", "WARN", "ERROR", "FATAL" - ] - -type - TLogger* = object of TObject ## abstract logger; the base type of all loggers - levelThreshold*: TLevel ## only messages of level >= levelThreshold - ## should be processed - TConsoleLogger* = object of TLogger ## logger that writes the messages to the - ## console - - TFileLogger* = object of TLogger ## logger that writes the messages to a file - f: TFile - - TRollingFileLogger* = object of - TFileLogger ## logger that writes the message to a file - maxlines: int # maximum number of lines - lines: seq[string] - -method log*(L: ref TLogger, level: TLevel, - frmt: string, args: openArray[string]) = - ## override this method in custom loggers. Default implementation does - ## nothing. - nil - -method log*(L: ref TConsoleLogger, level: TLevel, - frmt: string, args: openArray[string]) = - Writeln(stdout, LevelNames[level], " ", frmt % args) - -method log*(L: ref TFileLogger, level: TLevel, - frmt: string, args: openArray[string]) = - Writeln(L.f, LevelNames[level], " ", frmt % args) - -proc defaultFilename*(): string = - ## returns the default filename for a logger - var (path, name, ext) = splitFile(getApplicationFilename()) - result = changeFileExt(path / name & "_" & getDateStr(), "log") - -proc substituteLog*(frmt: string): string = - ## converts $date to the current date - ## converts $time to the current time - ## converts $app to getApplicationFilename() - ## converts - result = "" - var i = 0 - while i < frmt.len: - if frmt[i] != '$': - result.add(frmt[i]) - inc(i) - else: - inc(i) - var v = "" - var app = getApplicationFilename() - while frmt[i] in IdentChars: - v.add(toLower(frmt[i])) - inc(i) - case v - of "date": result.add(getDateStr()) - of "time": result.add(getClockStr()) - of "app": result.add(app) - of "appdir": result.add(app.splitFile.dir) - of "appname": result.add(app.splitFile.name) - - -proc newFileLogger(filename = defaultFilename(), - mode: TFileMode = fmAppend, - levelThreshold = lvlNone): ref TFileLogger = - new(result) - result.levelThreshold = levelThreshold - if not open(result.f, filename, mode): - raiseException(EIO, "cannot open for writing: " & filename) - -proc newRollingFileLogger(filename = defaultFilename(), - mode: TFileMode = fmAppend, - levelThreshold = lvlNone, - maxLines = 1000): ref TFileLogger = - new(result) - result.levelThreshold = levelThreshold - result.maxLines = maxLines - if not open(result.f, filename, mode): - raiseException(EIO, "cannot open for writing: " & filename) - -var - level* = lvlNone - handlers*: seq[ref TLogger] = @[] - -proc logLoop(level: TLevel, msg: string) = - for logger in items(handlers): - if level >= logger.levelThreshold: - log(logger, level, msg) - -template log*(level: TLevel, msg: string) = - ## logs a message of the given level - if level >= logging.Level: - (bind logLoop)(level, frmt, args) - -template debug*(msg: string) = - ## logs a debug message - log(lvlDebug, msg) - -template info*(msg: string) = - ## logs an info message - log(lvlInfo, msg) - -template warn*(msg: string) = - ## logs a warning message - log(lvlWarn, msg) - -template error*(msg: string) = - ## logs an error message - log(lvlError, msg) - -template fatal*(msg: string) = - ## logs a fatal error message and calls ``quit(msg)`` - log(lvlFatal, msg) - diff --git a/nimlib/pure/macros.nim b/nimlib/pure/macros.nim deleted file mode 100755 index 677469ed2..000000000 --- a/nimlib/pure/macros.nim +++ /dev/null @@ -1,249 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2009 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - - -## This module contains the interface to the compiler's abstract syntax -## tree (`AST`:idx:). Macros operate on this tree. - -## .. include:: ../doc/astspec.txt - -#[[[cog -#def toEnum(name, elems): -# body = "" -# counter = 0 -# for e in elems: -# if counter % 4 == 0: p = "\n " -# else: p = "" -# body = body + p + 'n' + e + ', ' -# counter = counter + 1 -# -# return (" TNimrod%s* = enum%s\n TNim%ss* = set[TNimrod%s]\n" % -# (name, body[:-2], name, name)) -# -#enums = eval(open("data/ast.yml").read()) -#cog.out("type\n") -#for key, val in enums.items(): -# if key[-4:] == "Flag": continue -# cog.out(toEnum(key, val)) -#]]] -type - TNimrodNodeKind* = enum - nnkNone, nnkEmpty, nnkIdent, nnkSym, - nnkType, nnkCharLit, nnkIntLit, nnkInt8Lit, - nnkInt16Lit, nnkInt32Lit, nnkInt64Lit, nnkFloatLit, - nnkFloat32Lit, nnkFloat64Lit, nnkStrLit, nnkRStrLit, - nnkTripleStrLit, nnkMetaNode, nnkNilLit, nnkDotCall, - nnkCommand, nnkCall, nnkCallStrLit, nnkExprEqExpr, - nnkExprColonExpr, nnkIdentDefs, nnkVarTuple, nnkInfix, - nnkPrefix, nnkPostfix, nnkPar, nnkCurly, - nnkBracket, nnkBracketExpr, nnkPragmaExpr, nnkRange, - nnkDotExpr, nnkCheckedFieldExpr, nnkDerefExpr, nnkIfExpr, - nnkElifExpr, nnkElseExpr, nnkLambda, nnkAccQuoted, - nnkTableConstr, nnkBind, nnkSymChoice, nnkHiddenStdConv, - nnkHiddenSubConv, nnkHiddenCallConv, nnkConv, nnkCast, - nnkAddr, nnkHiddenAddr, nnkHiddenDeref, nnkObjDownConv, - nnkObjUpConv, nnkChckRangeF, nnkChckRange64, nnkChckRange, - nnkStringToCString, nnkCStringToString, nnkPassAsOpenArray, nnkAsgn, - nnkFastAsgn, nnkGenericParams, nnkFormalParams, nnkOfInherit, - nnkModule, nnkProcDef, nnkMethodDef, nnkConverterDef, - nnkMacroDef, nnkTemplateDef, nnkIteratorDef, nnkOfBranch, - nnkElifBranch, nnkExceptBranch, nnkElse, nnkMacroStmt, - nnkAsmStmt, nnkPragma, nnkIfStmt, nnkWhenStmt, - nnkForStmt, nnkWhileStmt, nnkCaseStmt, nnkVarSection, - nnkConstSection, nnkConstDef, nnkTypeSection, nnkTypeDef, - nnkYieldStmt, nnkTryStmt, nnkFinally, nnkRaiseStmt, - nnkReturnStmt, nnkBreakStmt, nnkContinueStmt, nnkBlockStmt, - nnkDiscardStmt, nnkStmtList, nnkImportStmt, nnkFromStmt, - nnkIncludeStmt, nnkCommentStmt, nnkStmtListExpr, nnkBlockExpr, - nnkStmtListType, nnkBlockType, nnkTypeOfExpr, nnkObjectTy, - nnkTupleTy, nnkRecList, nnkRecCase, nnkRecWhen, - nnkRefTy, nnkPtrTy, nnkVarTy, nnkDistinctTy, - nnkProcTy, nnkEnumTy, nnkEnumFieldDef, nnkReturnToken - TNimNodeKinds* = set[TNimrodNodeKind] - TNimrodTypeKind* = enum - ntyNone, ntyBool, ntyChar, ntyEmpty, - ntyArrayConstr, ntyNil, ntyExpr, ntyStmt, - ntyTypeDesc, ntyGenericInvokation, ntyGenericBody, ntyGenericInst, - ntyGenericParam, ntyDistinct, ntyEnum, ntyOrdinal, - ntyArray, ntyObject, ntyTuple, ntySet, - ntyRange, ntyPtr, ntyRef, ntyVar, - ntySequence, ntyProc, ntyPointer, ntyOpenArray, - ntyString, ntyCString, ntyForward, ntyInt, - ntyInt8, ntyInt16, ntyInt32, ntyInt64, - ntyFloat, ntyFloat32, ntyFloat64, ntyFloat128 - TNimTypeKinds* = set[TNimrodTypeKind] - TNimrodSymKind* = enum - nskUnknown, nskConditional, nskDynLib, nskParam, - nskGenericParam, nskTemp, nskType, nskConst, - nskVar, nskProc, nskMethod, nskIterator, - nskConverter, nskMacro, nskTemplate, nskField, - nskEnumField, nskForVar, nskModule, nskLabel, - nskStub - TNimSymKinds* = set[TNimrodSymKind] -#[[[end]]] - -type - TNimrodIdent* = object of TObject - ## represents a Nimrod identifier in the AST - - TNimrodSymbol {.final.} = object # hidden - TNimrodType {.final.} = object # hidden - - PNimrodType* {.compilerproc.} = ref TNimrodType - ## represents a Nimrod type in the compiler; currently this is not very - ## useful as there is no API to deal with Nimrod types. - - PNimrodSymbol* {.compilerproc.} = ref TNimrodSymbol - ## represents a Nimrod *symbol* in the compiler; a *symbol* is a looked-up - ## *ident*. - - PNimrodNode* = expr - ## represents a Nimrod AST node. Macros operate on this type. - -# Nodes should be reference counted to make the `copy` operation very fast! -# However, this is difficult to achieve: modify(n[0][1]) should propagate to -# its father. How to do this without back references? - -proc `[]`* (n: PNimrodNode, i: int): PNimrodNode {.magic: "NChild".} - ## get `n`'s `i`'th child. - -proc `[]=`* (n: PNimrodNode, i: int, child: PNimrodNode) {.magic: "NSetChild".} - ## set `n`'s `i`'th child to `child`. - -proc `!` *(s: string): TNimrodIdent {.magic: "StrToIdent".} - ## constructs an identifier from the string `s` - -proc `$`*(i: TNimrodIdent): string {.magic: "IdentToStr".} - ## converts a Nimrod identifier to a string - -proc `==`* (a, b: TNimrodIdent): bool {.magic: "EqIdent", noSideEffect.} - ## compares two Nimrod identifiers - -proc `==`* (a, b: PNimrodNode): bool {.magic: "EqNimrodNode", noSideEffect.} - ## compares two Nimrod nodes - -proc len*(n: PNimrodNode): int {.magic: "NLen".} - ## returns the number of children of `n`. - -proc add*(father, child: PNimrodNode) {.magic: "NAdd".} - ## adds the `child` to the `father` node - -proc add*(father: PNimrodNode, children: openArray[PNimrodNode]) {. - magic: "NAddMultiple".} - ## adds each child of `children` to the `father` node - -proc del*(father: PNimrodNode, idx = 0, n = 1) {.magic: "NDel".} - ## deletes `n` children of `father` starting at index `idx`. - -proc kind*(n: PNimrodNode): TNimrodNodeKind {.magic: "NKind".} - ## returns the `kind` of the node `n`. - -proc intVal*(n: PNimrodNode): biggestInt {.magic: "NIntVal".} -proc floatVal*(n: PNimrodNode): biggestFloat {.magic: "NFloatVal".} -proc symbol*(n: PNimrodNode): PNimrodSymbol {.magic: "NSymbol".} -proc ident*(n: PNimrodNode): TNimrodIdent {.magic: "NIdent".} -proc typ*(n: PNimrodNode): PNimrodType {.magic: "NGetType".} -proc strVal*(n: PNimrodNode): string {.magic: "NStrVal".} - -proc `intVal=`*(n: PNimrodNode, val: biggestInt) {.magic: "NSetIntVal".} -proc `floatVal=`*(n: PNimrodNode, val: biggestFloat) {.magic: "NSetFloatVal".} -proc `symbol=`*(n: PNimrodNode, val: PNimrodSymbol) {.magic: "NSetSymbol".} -proc `ident=`*(n: PNimrodNode, val: TNimrodIdent) {.magic: "NSetIdent".} -proc `typ=`*(n: PNimrodNode, typ: PNimrodType) {.magic: "NSetType".} -proc `strVal=`*(n: PNimrodNode, val: string) {.magic: "NSetStrVal".} - -proc newNimNode*(kind: TNimrodNodeKind, - n: PNimrodNode=nil): PNimrodNode {.magic: "NNewNimNode".} - -proc copyNimNode*(n: PNimrodNode): PNimrodNode {.magic: "NCopyNimNode".} -proc copyNimTree*(n: PNimrodNode): PNimrodNode {.magic: "NCopyNimTree".} - -proc error*(msg: string) {.magic: "NError".} - ## writes an error message at compile time - -proc warning*(msg: string) {.magic: "NWarning".} - ## writes a warning message at compile time - -proc hint*(msg: string) {.magic: "NHint".} - ## writes a hint message at compile time - -proc newStrLitNode*(s: string): PNimrodNode {.compileTime.} = - ## creates a string literal node from `s` - result = newNimNode(nnkStrLit) - result.strVal = s - -proc newIntLitNode*(i: biggestInt): PNimrodNode {.compileTime.} = - ## creates a int literal node from `i` - result = newNimNode(nnkIntLit) - result.intVal = i - -proc newFloatLitNode*(f: biggestFloat): PNimrodNode {.compileTime.} = - ## creates a float literal node from `f` - result = newNimNode(nnkFloatLit) - result.floatVal = f - -proc newIdentNode*(i: TNimrodIdent): PNimrodNode {.compileTime.} = - ## creates an identifier node from `i` - result = newNimNode(nnkIdent) - result.ident = i - -proc newIdentNode*(i: string): PNimrodNode {.compileTime.} = - ## creates an identifier node from `i` - result = newNimNode(nnkIdent) - result.ident = !i - -proc toStrLit*(n: PNimrodNode): PNimrodNode {.compileTime.} = - ## converts the AST `n` to the concrete Nimrod code and wraps that - ## in a string literal node - return newStrLitNode(repr(n)) - -proc expectKind*(n: PNimrodNode, k: TNimrodNodeKind) {.compileTime.} = - ## checks that `n` is of kind `k`. If this is not the case, - ## compilation aborts with an error message. This is useful for writing - ## macros that check the AST that is passed to them. - if n.kind != k: error("macro expects a node of kind: " & repr(k)) - -proc expectMinLen*(n: PNimrodNode, min: int) {.compileTime.} = - ## checks that `n` has at least `min` children. If this is not the case, - ## compilation aborts with an error message. This is useful for writing - ## macros that check its number of arguments. - if n.len < min: error("macro expects a node with " & $min & " children") - -proc expectLen*(n: PNimrodNode, len: int) {.compileTime.} = - ## checks that `n` has exactly `len` children. If this is not the case, - ## compilation aborts with an error message. This is useful for writing - ## macros that check its number of arguments. - if n.len != len: error("macro expects a node with " & $len & " children") - -proc newCall*(theProc: TNimrodIdent, - args: openArray[PNimrodNode]): PNimrodNode {.compileTime.} = - ## produces a new call node. `theProc` is the proc that is called with - ## the arguments ``args[0..]``. - result = newNimNode(nnkCall) - result.add(newIdentNode(theProc)) - result.add(args) - -proc newCall*(theProc: string, - args: openArray[PNimrodNode]): PNimrodNode {.compileTime.} = - ## produces a new call node. `theProc` is the proc that is called with - ## the arguments ``args[0..]``. - result = newNimNode(nnkCall) - result.add(newIdentNode(theProc)) - result.add(args) - -proc nestList*(theProc: TNimrodIdent, - x: PNimrodNode): PNimrodNode {.compileTime.} = - ## nests the list `x` into a tree of call expressions: - ## ``[a, b, c]`` is transformed into ``theProc(a, theProc(c, d))`` - var L = x.len - result = newCall(theProc, x[L-2], x[L-1]) - var a = result - for i in countdown(L-3, 0): - a = newCall(theProc, x[i], copyNimTree(a)) - diff --git a/nimlib/pure/math.nim b/nimlib/pure/math.nim deleted file mode 100755 index bca45894c..000000000 --- a/nimlib/pure/math.nim +++ /dev/null @@ -1,249 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2009 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## Basic math routines for Nimrod. -## This module is available for the ECMAScript target. - -{.push debugger:off .} # the user does not want to trace a part - # of the standard library! - -{.push checks:off, line_dir:off, stack_trace:off.} - -when defined(Posix): - {.passl: "-lm".} - -const - PI* = 3.1415926535897932384626433 ## the circle constant PI (Ludolph's number) - E* = 2.71828182845904523536028747 ## Euler's number - -type - TFloatClass* = enum ## describes the class a floating point value belongs to. - ## This is the type that is returned by `classify`. - fcNormal, ## value is an ordinary nonzero floating point value - fcSubnormal, ## value is a subnormal (a very small) floating point value - fcZero, ## value is zero - fcNegZero, ## value is the negative zero - fcNan, ## value is Not-A-Number (NAN) - fcInf, ## value is positive infinity - fcNegInf ## value is negative infinity - -proc classify*(x: float): TFloatClass = - ## classifies a floating point value. Returns `x`'s class as specified by - ## `TFloatClass`. - - # ECMAScript and most C compilers have no classify: - if x == 0.0: - if 1.0/x == Inf: - return fcZero - else: - return fcNegZero - if x*0.5 == x: - if x > 0.0: return fcInf - else: return fcNegInf - if x != x: return fcNan - return fcNormal - # XXX: fcSubnormal is not detected! - - -proc binom*(n, k: int): int {.noSideEffect.} = - ## computes the binomial coefficient - if k <= 0: return 1 - if 2*k > n: return binom(n, n-k) - result = n - for i in countup(2, k): - result = (result * (n + 1 - i)) div i - -proc fac*(n: int): int {.noSideEffect.} = - ## computes the faculty function - result = 1 - for i in countup(2, n): - result = result * i - -proc isPowerOfTwo*(x: int): bool {.noSideEffect.} = - ## returns true, if x is a power of two, false otherwise. - ## Negative numbers are not a power of two. - return (x and -x) == x - -proc nextPowerOfTwo*(x: int): int = - ## returns the nearest power of two, so that - ## result**2 >= x > (result-1)**2. - result = x - 1 - when defined(cpu64): - result = result or (result shr 32) - result = result or (result shr 16) - result = result or (result shr 8) - result = result or (result shr 4) - result = result or (result shr 2) - result = result or (result shr 1) - Inc(result) - -proc countBits32*(n: int32): int {.noSideEffect.} = - ## counts the set bits in `n`. - var v = n - v = v -% ((v shr 1'i32) and 0x55555555'i32) - v = (v and 0x33333333'i32) +% ((v shr 2'i32) and 0x33333333'i32) - result = ((v +% (v shr 4'i32) and 0xF0F0F0F'i32) *% 0x1010101'i32) shr 24'i32 - -proc sum*[T](x: openarray[T]): T {.noSideEffect.} = - ## computes the sum of the elements in `x`. - ## If `x` is empty, 0 is returned. - for i in items(x): result = result + i - -proc mean*(x: openarray[float]): float {.noSideEffect.} = - ## computes the mean of the elements in `x`. - ## If `x` is empty, NaN is returned. - result = sum(x) / toFloat(len(x)) - -proc variance*(x: openarray[float]): float {.noSideEffect.} = - ## computes the mean of the elements in `x`. - ## If `x` is empty, NaN is returned. - result = 0.0 - var m = mean(x) - for i in 0 .. high(x): - var diff = x[i] - m - result = result + diff*diff - result = result / toFloat(len(x)) - -when not defined(ECMAScript): - proc random*(max: int): int - ## returns a random number in the range 0..max-1. The sequence of - ## random number is always the same, unless `randomize` is called - ## which initializes the random number generator with a "random" - ## number, i.e. a tickcount. - proc randomize*() - ## initializes the random number generator with a "random" - ## number, i.e. a tickcount. Note: Does nothing for the ECMAScript target, - ## as ECMAScript does not support this. - - proc sqrt*(x: float): float {.importc: "sqrt", header: "<math.h>".} - ## computes the square root of `x`. - - proc ln*(x: float): float {.importc: "log", header: "<math.h>".} - ## computes ln(x). - proc log10*(x: float): float {.importc: "log10", header: "<math.h>".} - proc log2*(x: float): float = return ln(x) / ln(2.0) - proc exp*(x: float): float {.importc: "exp", header: "<math.h>".} - ## computes e**x. - - proc frexp*(x: float, exponent: var int): float {. - importc: "frexp", header: "<math.h>".} - ## Split a number into mantissa and exponent. - ## `frexp` calculates the mantissa m (a float greater than or equal to 0.5 - ## and less than 1) and the integer value n such that `x` (the original - ## float value) equals m * 2**n. frexp stores n in `exponent` and returns - ## m. - - proc round*(x: float): int {.importc: "lrint", nodecl.} - ## converts a float to an int by rounding. - - proc arccos*(x: float): float {.importc: "acos", header: "<math.h>".} - proc arcsin*(x: float): float {.importc: "asin", header: "<math.h>".} - proc arctan*(x: float): float {.importc: "atan", header: "<math.h>".} - proc arctan2*(y, x: float): float {.importc: "atan2", header: "<math.h>".} - ## Calculate the arc tangent of `y` / `x`. - ## `atan2` returns the arc tangent of `y` / `x`; it produces correct - ## results even when the resulting angle is near pi/2 or -pi/2 - ## (`x` near 0). - - proc cos*(x: float): float {.importc: "cos", header: "<math.h>".} - proc cosh*(x: float): float {.importc: "cosh", header: "<math.h>".} - proc hypot*(x, y: float): float {.importc: "hypot", header: "<math.h>".} - ## same as ``sqrt(x*x + y*y)``. - - proc sinh*(x: float): float {.importc: "sinh", header: "<math.h>".} - proc tan*(x: float): float {.importc: "tan", header: "<math.h>".} - proc tanh*(x: float): float {.importc: "tanh", header: "<math.h>".} - proc pow*(x, y: float): float {.importc: "pow", header: "<math.h>".} - ## computes x to power raised of y. - - # C procs: - proc gettime(dummy: ptr cint): cint {.importc: "time", header: "<time.h>".} - proc srand(seed: cint) {.importc: "srand", nodecl.} - proc rand(): cint {.importc: "rand", nodecl.} - - proc randomize() = srand(gettime(nil)) - proc random(max: int): int = return int(rand()) mod max - -else: - proc mathrandom(): float {.importc: "Math.random", nodecl.} - proc mathfloor(x: float): float {.importc: "Math.floor", nodecl.} - proc random*(max: int): int = return mathfloor(mathrandom() * max) - proc randomize*() = nil - - proc sqrt*(x: float): float {.importc: "Math.sqrt", nodecl.} - proc ln*(x: float): float {.importc: "Math.log", nodecl.} - proc log10*(x: float): float = return ln(x) / ln(10.0) - proc log2*(x: float): float = return ln(x) / ln(2.0) - - proc exp*(x: float): float {.importc: "Math.exp", nodecl.} - proc round*(x: float): int {.importc: "Math.round", nodecl.} - proc pow*(x, y: float): float {.importc: "Math.pow", nodecl.} - - proc frexp*(x: float, exponent: var int): float = - if x == 0.0: - exponent = 0.0 - result = 0.0 - elif x < 0.0: - result = -frexp(-x, exponent) - else: - var ex = mathfloor(log2(x)) - exponent = round(ex) - result = x / pow(2.0, ex) - - proc arccos*(x: float): float {.importc: "Math.acos", nodecl.} - proc arcsin*(x: float): float {.importc: "Math.asin", nodecl.} - proc arctan*(x: float): float {.importc: "Math.atan", nodecl.} - proc arctan2*(y, x: float): float {.importc: "Math.atan2", nodecl.} - - proc cos*(x: float): float {.importc: "Math.cos", nodecl.} - proc cosh*(x: float): float = return (exp(x)+exp(-x))*0.5 - proc hypot*(x, y: float): float = return sqrt(x*x + y*y) - proc sinh*(x: float): float = return (exp(x)-exp(-x))*0.5 - proc tan*(x: float): float {.importc: "Math.tan", nodecl.} - proc tanh*(x: float): float = - var y = exp(2.0*x) - return (y-1.0)/(y+1.0) - - -type - TRunningStat* = object ## an accumulator for statistical data - n*: int ## number of pushed data - sum*, min*, max*, mean*: float ## self-explaining - oldM, oldS, newS: float - -proc push*(s: var TRunningStat, x: float) = - ## pushes a value `x` for processing - inc(s.n) - # See Knuth TAOCP vol 2, 3rd edition, page 232 - if s.n == 1: - s.oldM = x - s.mean = x - s.oldS = 0.0 - else: - s.mean = s.oldM + (x - s.oldM)/toFloat(s.n) - s.newS = s.oldS + (x - s.oldM)*(x - s.mean) - - # set up for next iteration: - s.oldM = s.mean - s.oldS = s.newS - - s.sum = s.sum + x - if s.min > x: s.min = x - if s.max < x: s.max = x - -proc variance*(s: TRunningStat): float = - ## computes the current variance of `s` - if s.n > 1: result = s.newS / (toFloat(s.n - 1)) - -proc standardDeviation*(s: TRunningStat): float = - ## computes the current standard deviation of `s` - result = sqrt(variance(s)) - -{.pop.} -{.pop.} diff --git a/nimlib/pure/md5.nim b/nimlib/pure/md5.nim deleted file mode 100755 index d9bb92949..000000000 --- a/nimlib/pure/md5.nim +++ /dev/null @@ -1,245 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2009 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## Module for computing MD5 checksums. - -type - MD5State = array[0..3, int32] - MD5Block = array[0..15, int32] - MD5CBits = array[0..7, int8] - MD5Digest* = array[0..15, int8] - MD5Buffer = array[0..63, int8] - MD5Context* {.final.} = object - State: MD5State - Count: array[0..1, int32] - Buffer: MD5Buffer - -const - padding: cstring = "\x80\0\0\0" & - "\0\0\0\0\0\0\0\0" & - "\0\0\0\0\0\0\0\0" & - "\0\0\0\0\0\0\0\0" & - "\0\0\0\0\0\0\0\0" & - "\0\0\0\0\0\0\0\0" & - "\0\0\0\0\0\0\0\0" & - "\0\0\0\0\0\0\0\0" & - "\0\0\0\0" - -proc F(x, y, z: int32): int32 {.inline.} = - Result = (x and y) or ((not x) and z) - -proc G(x, y, z: int32): int32 {.inline.} = - Result = (x and z) or (y and (not z)) - -proc H(x, y, z: int32): int32 {.inline.} = - Result = x xor y xor z - -proc I(x, y, z: int32): int32 {.inline.} = - Result = y xor (x or (not z)) - -proc rot(x: var int32, n: int8) {.inline.} = - x = toU32(x shl ze(n)) or (x shr toU32(32 -% ze(n))) - -proc FF(a: var int32, b, c, d, x: int32, s: int8, ac: int32) = - a = a +% F(b, c, d) +% x +% ac - rot(a, s) - a = a +% b - -proc GG(a: var int32, b, c, d, x: int32, s: int8, ac: int32) = - a = a +% G(b, c, d) +% x +% ac - rot(a, s) - a = a +% b - -proc HH(a: var int32, b, c, d, x: int32, s: int8, ac: int32) = - a = a +% H(b, c, d) +% x +% ac - rot(a, s) - a = a +% b - -proc II(a: var int32, b, c, d, x: int32, s: int8, ac: int32) = - a = a +% I(b, c, d) +% x +% ac - rot(a, s) - a = a +% b - -proc encode(dest: var MD5Block, src: cstring) = - var j = 0 - for i in 0..high(dest): - dest[i] = toU32(ord(src[j]) or - ord(src[j+1]) shl 8 or - ord(src[j+2]) shl 16 or - ord(src[j+3]) shl 24) - inc(j, 4) - -proc decode(dest: var openarray[int8], src: openarray[int32]) = - var i = 0 - for j in 0..high(src): - dest[i] = toU8(src[j] and 0xff'i32) - dest[i+1] = toU8(src[j] shr 8'i32 and 0xff'i32) - dest[i+2] = toU8(src[j] shr 16'i32 and 0xff'i32) - dest[i+3] = toU8(src[j] shr 24'i32 and 0xff'i32) - inc(i, 4) - -proc transform(Buffer: pointer, State: var MD5State) = - var - myBlock: MD5Block - encode(myBlock, cast[cstring](buffer)) - var a = State[0] - var b = State[1] - var c = State[2] - var d = State[3] - FF(a, b, c, d, myBlock[0], 7'i8, 0xD76AA478'i32) - FF(d, a, b, c, myBlock[1], 12'i8, 0xE8C7B756'i32) - FF(c, d, a, b, myBlock[2], 17'i8, 0x242070DB'i32) - FF(b, c, d, a, myBlock[3], 22'i8, 0xC1BDCEEE'i32) - FF(a, b, c, d, myBlock[4], 7'i8, 0xF57C0FAF'i32) - FF(d, a, b, c, myBlock[5], 12'i8, 0x4787C62A'i32) - FF(c, d, a, b, myBlock[6], 17'i8, 0xA8304613'i32) - FF(b, c, d, a, myBlock[7], 22'i8, 0xFD469501'i32) - FF(a, b, c, d, myBlock[8], 7'i8, 0x698098D8'i32) - FF(d, a, b, c, myBlock[9], 12'i8, 0x8B44F7AF'i32) - FF(c, d, a, b, myBlock[10], 17'i8, 0xFFFF5BB1'i32) - FF(b, c, d, a, myBlock[11], 22'i8, 0x895CD7BE'i32) - FF(a, b, c, d, myBlock[12], 7'i8, 0x6B901122'i32) - FF(d, a, b, c, myBlock[13], 12'i8, 0xFD987193'i32) - FF(c, d, a, b, myBlock[14], 17'i8, 0xA679438E'i32) - FF(b, c, d, a, myBlock[15], 22'i8, 0x49B40821'i32) - GG(a, b, c, d, myBlock[1], 5'i8, 0xF61E2562'i32) - GG(d, a, b, c, myBlock[6], 9'i8, 0xC040B340'i32) - GG(c, d, a, b, myBlock[11], 14'i8, 0x265E5A51'i32) - GG(b, c, d, a, myBlock[0], 20'i8, 0xE9B6C7AA'i32) - GG(a, b, c, d, myBlock[5], 5'i8, 0xD62F105D'i32) - GG(d, a, b, c, myBlock[10], 9'i8, 0x02441453'i32) - GG(c, d, a, b, myBlock[15], 14'i8, 0xD8A1E681'i32) - GG(b, c, d, a, myBlock[4], 20'i8, 0xE7D3FBC8'i32) - GG(a, b, c, d, myBlock[9], 5'i8, 0x21E1CDE6'i32) - GG(d, a, b, c, myBlock[14], 9'i8, 0xC33707D6'i32) - GG(c, d, a, b, myBlock[3], 14'i8, 0xF4D50D87'i32) - GG(b, c, d, a, myBlock[8], 20'i8, 0x455A14ED'i32) - GG(a, b, c, d, myBlock[13], 5'i8, 0xA9E3E905'i32) - GG(d, a, b, c, myBlock[2], 9'i8, 0xFCEFA3F8'i32) - GG(c, d, a, b, myBlock[7], 14'i8, 0x676F02D9'i32) - GG(b, c, d, a, myBlock[12], 20'i8, 0x8D2A4C8A'i32) - HH(a, b, c, d, myBlock[5], 4'i8, 0xFFFA3942'i32) - HH(d, a, b, c, myBlock[8], 11'i8, 0x8771F681'i32) - HH(c, d, a, b, myBlock[11], 16'i8, 0x6D9D6122'i32) - HH(b, c, d, a, myBlock[14], 23'i8, 0xFDE5380C'i32) - HH(a, b, c, d, myBlock[1], 4'i8, 0xA4BEEA44'i32) - HH(d, a, b, c, myBlock[4], 11'i8, 0x4BDECFA9'i32) - HH(c, d, a, b, myBlock[7], 16'i8, 0xF6BB4B60'i32) - HH(b, c, d, a, myBlock[10], 23'i8, 0xBEBFBC70'i32) - HH(a, b, c, d, myBlock[13], 4'i8, 0x289B7EC6'i32) - HH(d, a, b, c, myBlock[0], 11'i8, 0xEAA127FA'i32) - HH(c, d, a, b, myBlock[3], 16'i8, 0xD4EF3085'i32) - HH(b, c, d, a, myBlock[6], 23'i8, 0x04881D05'i32) - HH(a, b, c, d, myBlock[9], 4'i8, 0xD9D4D039'i32) - HH(d, a, b, c, myBlock[12], 11'i8, 0xE6DB99E5'i32) - HH(c, d, a, b, myBlock[15], 16'i8, 0x1FA27CF8'i32) - HH(b, c, d, a, myBlock[2], 23'i8, 0xC4AC5665'i32) - II(a, b, c, d, myBlock[0], 6'i8, 0xF4292244'i32) - II(d, a, b, c, myBlock[7], 10'i8, 0x432AFF97'i32) - II(c, d, a, b, myBlock[14], 15'i8, 0xAB9423A7'i32) - II(b, c, d, a, myBlock[5], 21'i8, 0xFC93A039'i32) - II(a, b, c, d, myBlock[12], 6'i8, 0x655B59C3'i32) - II(d, a, b, c, myBlock[3], 10'i8, 0x8F0CCC92'i32) - II(c, d, a, b, myBlock[10], 15'i8, 0xFFEFF47D'i32) - II(b, c, d, a, myBlock[1], 21'i8, 0x85845DD1'i32) - II(a, b, c, d, myBlock[8], 6'i8, 0x6FA87E4F'i32) - II(d, a, b, c, myBlock[15], 10'i8, 0xFE2CE6E0'i32) - II(c, d, a, b, myBlock[6], 15'i8, 0xA3014314'i32) - II(b, c, d, a, myBlock[13], 21'i8, 0x4E0811A1'i32) - II(a, b, c, d, myBlock[4], 6'i8, 0xF7537E82'i32) - II(d, a, b, c, myBlock[11], 10'i8, 0xBD3AF235'i32) - II(c, d, a, b, myBlock[2], 15'i8, 0x2AD7D2BB'i32) - II(b, c, d, a, myBlock[9], 21'i8, 0xEB86D391'i32) - State[0] = State[0] +% a - State[1] = State[1] +% b - State[2] = State[2] +% c - State[3] = State[3] +% d - -proc MD5Init*(c: var MD5Context) = - ## initializes a MD5Context - c.State[0] = 0x67452301'i32 - c.State[1] = 0xEFCDAB89'i32 - c.State[2] = 0x98BADCFE'i32 - c.State[3] = 0x10325476'i32 - c.Count[0] = 0'i32 - c.Count[1] = 0'i32 - ZeroMem(addr(c.Buffer), SizeOf(MD5Buffer)) - -proc MD5Update*(c: var MD5Context, input: cstring, len: int) = - ## updates the MD5Context with the `input` data of length `len` - var input = input - var Index = (c.Count[0] shr 3) and 0x3F - c.Count[0] = c.count[0] +% toU32(len shl 3) - if c.Count[0] < (len shl 3): c.Count[1] = c.count[1] +% 1'i32 - c.Count[1] = c.count[1] +% toU32(len shr 29) - var PartLen = 64 - Index - if len >= PartLen: - CopyMem(addr(c.Buffer[Index]), Input, PartLen) - transform(addr(c.Buffer), c.State) - var i = PartLen - while i + 63 < len: - Transform(addr(Input[I]), c.State) - inc(i, 64) - CopyMem(addr(c.Buffer[0]), addr(Input[i]), len-i) - else: - CopyMem(addr(c.Buffer[Index]), addr(Input[0]), len) - -proc MD5Final*(c: var MD5Context, digest: var MD5Digest) = - ## finishes the MD5Context and stores the result in `digest` - var - Bits: MD5CBits - PadLen: int - decode(bits, c.Count) - var Index = (c.Count[0] shr 3) and 0x3F - if Index < 56: PadLen = 56 - Index - else: PadLen = 120 - Index - MD5Update(c, padding, PadLen) - MD5Update(c, cast[cstring](addr(Bits)), 8) - decode(digest, c.State) - ZeroMem(addr(c), SizeOf(MD5Context)) - -proc toMD5*(s: string): MD5Digest = - ## computes the MD5Digest value for a string `s` - var c: MD5Context - MD5Init(c) - MD5Update(c, cstring(s), len(s)) - MD5Final(c, result) - -proc `$`*(D: MD5Digest): string = - ## converts a MD5Digest value into its string representation - const digits = "0123456789abcdef" - result = "" - for i in 0..15: - add(result, Digits[(D[I] shr 4) and 0xF]) - add(result, Digits[D[I] and 0xF]) - -proc getMD5*(s: string): string = - ## computes an MD5 value of `s` and returns its string representation - var - c: MD5Context - d: MD5Digest - MD5Init(c) - MD5Update(c, cstring(s), len(s)) - MD5Final(c, d) - result = $d - -proc `==`*(D1, D2: MD5Digest): bool = - ## checks if two MD5Digest values are identical - for i in 0..15: - if D1[i] != D2[i]: return false - return true - -when isMainModule: - assert(getMD5("Franz jagt im komplett verwahrlosten Taxi quer durch Bayern") == - "a3cca2b2aa1e3b5b3b5aad99a8529074") - assert(getMD5("Frank jagt im komplett verwahrlosten Taxi quer durch Bayern") == - "7e716d0e702df0505fc72e2b89467910") - assert($toMD5("") == "d41d8cd98f00b204e9800998ecf8427e") - - diff --git a/nimlib/pure/os.nim b/nimlib/pure/os.nim deleted file mode 100755 index afa145e9f..000000000 --- a/nimlib/pure/os.nim +++ /dev/null @@ -1,1147 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2009 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module contains basic operating system facilities like -## retrieving environment variables, reading command line arguments, -## working with directories, running shell commands, etc. -{.deadCodeElim: on.} - -{.push debugger: off.} - -import - strutils, times - -when defined(windows): - import winlean -elif defined(posix): - import posix -else: - {.error: "OS module not ported to your operating system!".} - -include "system/ansi_c" - -# copied from excpt.nim, because I don't want to make this template public -template newException(exceptn, message: expr): expr = - block: # open a new scope - var - e: ref exceptn - new(e) - e.msg = message - e - -const - doslike = defined(windows) or defined(OS2) or defined(DOS) - # DOS-like filesystem - -when defined(Nimdoc): # only for proper documentation: - const - CurDir* = '.' - ## The constant string used by the operating system to refer to the - ## current directory. - ## - ## For example: '.' for POSIX or ':' for the classic Macintosh. - - ParDir* = ".." - ## The constant string used by the operating system to refer to the parent - ## directory. - ## - ## For example: ".." for POSIX or "::" for the classic Macintosh. - - DirSep* = '/' - ## The character used by the operating system to separate pathname - ## components, for example, '/' for POSIX or ':' for the classic - ## Macintosh. - - AltSep* = '/' - ## An alternative character used by the operating system to separate - ## pathname components, or the same as `DirSep` if only one separator - ## character exists. This is set to '/' on Windows systems where `DirSep` - ## is a backslash. - - PathSep* = ':' - ## The character conventionally used by the operating system to separate - ## search patch components (as in PATH), such as ':' for POSIX or ';' for - ## Windows. - - FileSystemCaseSensitive* = True - ## True if the file system is case sensitive, false otherwise. Used by - ## `cmpPaths` to compare filenames properly. - - ExeExt* = "" - ## The file extension of native executables. For example: - ## "" for POSIX, "exe" on Windows. - - ScriptExt* = "" - ## The file extension of a script file. For example: "" for POSIX, - ## "bat" on Windows. - -elif defined(macos): - const - curdir* = ':' - pardir* = "::" - dirsep* = ':' - altsep* = dirsep - pathsep* = ',' - FileSystemCaseSensitive* = false - ExeExt* = "" - ScriptExt* = "" - - # MacOS paths - # =========== - # MacOS directory separator is a colon ":" which is the only character not - # allowed in filenames. - # - # A path containing no colon or which begins with a colon is a partial path. - # E.g. ":kalle:petter" ":kalle" "kalle" - # - # All other paths are full (absolute) paths. E.g. "HD:kalle:" "HD:" - # When generating paths, one is safe if one ensures that all partial paths - # begin with a colon, and all full paths end with a colon. - # In full paths the first name (e g HD above) is the name of a mounted - # volume. - # These names are not unique, because, for instance, two diskettes with the - # same names could be inserted. This means that paths on MacOS are not - # waterproof. In case of equal names the first volume found will do. - # Two colons "::" are the relative path to the parent. Three is to the - # grandparent etc. -elif doslike: - const - curdir* = '.' - pardir* = ".." - dirsep* = '\\' # seperator within paths - altsep* = '/' - pathSep* = ';' # seperator between paths - FileSystemCaseSensitive* = false - ExeExt* = "exe" - ScriptExt* = "bat" -elif defined(PalmOS) or defined(MorphOS): - const - dirsep* = '/' - altsep* = dirsep - PathSep* = ';' - pardir* = ".." - FileSystemCaseSensitive* = false - ExeExt* = "" - ScriptExt* = "" -elif defined(RISCOS): - const - dirsep* = '.' - altsep* = '.' - pardir* = ".." # is this correct? - pathSep* = ',' - FileSystemCaseSensitive* = true - ExeExt* = "" - ScriptExt* = "" -else: # UNIX-like operating system - const - curdir* = '.' - pardir* = ".." - dirsep* = '/' - altsep* = dirsep - pathSep* = ':' - FileSystemCaseSensitive* = true - ExeExt* = "" - ScriptExt* = "" - -const - ExtSep* = '.' - ## The character which separates the base filename from the extension; - ## for example, the '.' in ``os.nim``. - -# procs dealing with command line arguments: -proc paramCount*(): int - ## Returns the number of command line arguments given to the - ## application. - -proc paramStr*(i: int): string - ## Returns the `i`-th command line arguments given to the - ## application. - ## - ## `i` should be in the range `1..paramCount()`, else - ## the `EOutOfIndex` exception is raised. - -proc OSError*(msg: string = "") {.noinline.} = - ## raises an EOS exception with the given message ``msg``. - ## If ``msg == ""``, the operating system's error flag - ## (``errno``) is converted to a readable error message. On Windows - ## ``GetLastError`` is checked before ``errno``. - ## If no error flag is set, the message ``unknown OS error`` is used. - if len(msg) == 0: - when defined(Windows): - var err = GetLastError() - if err != 0'i32: - # sigh, why is this is so difficult? - var msgbuf: cstring - if FormatMessageA(0x00000100 or 0x00001000 or 0x00000200, - nil, err, 0, addr(msgbuf), 0, nil) != 0'i32: - var m = $msgbuf - if msgbuf != nil: - LocalFree(msgbuf) - raise newException(EOS, m) - if errno != 0'i32: - raise newException(EOS, $os.strerror(errno)) - else: - raise newException(EOS, "unknown OS error") - else: - raise newException(EOS, msg) - -proc UnixToNativePath*(path: string): string {.noSideEffect.} = - ## Converts an UNIX-like path to a native one. - ## - ## On an UNIX system this does nothing. Else it converts - ## '/', '.', '..' to the appropriate things. - when defined(unix): - result = path - else: - var start: int - if path[0] == '/': - # an absolute path - when doslike: - result = r"C:\" - elif defined(macos): - result = "" # must not start with ':' - else: - result = $dirSep - start = 1 - elif path[0] == '.' and path[1] == '/': - # current directory - result = $curdir - start = 2 - else: - result = "" - start = 0 - - var i = start - while i < len(path): # ../../../ --> :::: - if path[i] == '.' and path[i+1] == '.' and path[i+2] == '/': - # parent directory - when defined(macos): - if result[high(result)] == ':': - add result, ':' - else: - add result, pardir - else: - add result, pardir & dirSep - inc(i, 3) - elif path[i] == '/': - add result, dirSep - inc(i) - else: - add result, path[i] - inc(i) - -proc existsFile*(filename: string): bool = - ## Returns true if the file exists, false otherwise. - when defined(windows): - var a = GetFileAttributesA(filename) - if a != -1'i32: - result = (a and FILE_ATTRIBUTE_DIRECTORY) == 0'i32 - else: - var res: TStat - return stat(filename, res) >= 0'i32 and S_ISREG(res.st_mode) - -proc existsDir*(dir: string): bool = - ## Returns true iff the directory `dir` exists. If `dir` is a file, false - ## is returned. - when defined(windows): - var a = GetFileAttributesA(dir) - if a != -1'i32: - result = (a and FILE_ATTRIBUTE_DIRECTORY) != 0'i32 - else: - var res: TStat - return stat(dir, res) >= 0'i32 and S_ISDIR(res.st_mode) - -proc getLastModificationTime*(file: string): TTime = - ## Returns the `file`'s last modification time. - when defined(posix): - var res: TStat - if stat(file, res) < 0'i32: OSError() - return res.st_mtime - else: - var f: TWIN32_Find_Data - var h = findfirstFileA(file, f) - if h == -1'i32: OSError() - result = winTimeToUnixTime(rdFileTime(f.ftLastWriteTime)) - findclose(h) - -proc getLastAccessTime*(file: string): TTime = - ## Returns the `file`'s last read or write access time. - when defined(posix): - var res: TStat - if stat(file, res) < 0'i32: OSError() - return res.st_atime - else: - var f: TWIN32_Find_Data - var h = findfirstFileA(file, f) - if h == -1'i32: OSError() - result = winTimeToUnixTime(rdFileTime(f.ftLastAccessTime)) - findclose(h) - -proc getCreationTime*(file: string): TTime = - ## Returns the `file`'s creation time. - when defined(posix): - var res: TStat - if stat(file, res) < 0'i32: OSError() - return res.st_ctime - else: - var f: TWIN32_Find_Data - var h = findfirstFileA(file, f) - if h == -1'i32: OSError() - result = winTimeToUnixTime(rdFileTime(f.ftCreationTime)) - findclose(h) - -proc fileNewer*(a, b: string): bool = - ## Returns true if the file `a` is newer than file `b`, i.e. if `a`'s - ## modification time is later than `b`'s. - result = getLastModificationTime(a) - getLastModificationTime(b) > 0 - -proc getCurrentDir*(): string = - ## Returns the current working directory. - const bufsize = 512 # should be enough - result = newString(bufsize) - when defined(windows): - var L = GetCurrentDirectoryA(bufsize, result) - if L == 0'i32: OSError() - setLen(result, L) - else: - if getcwd(result, bufsize) != nil: - setlen(result, c_strlen(result)) - else: - OSError() - -proc setCurrentDir*(newDir: string) {.inline.} = - ## Sets the current working directory; `EOS` is raised if - ## `newDir` cannot been set. - when defined(Windows): - if SetCurrentDirectoryA(newDir) == 0'i32: OSError() - else: - if chdir(newDir) != 0'i32: OSError() - -proc JoinPath*(head, tail: string): string {.noSideEffect.} = - ## Joins two directory names to one. - ## - ## For example on Unix: - ## - ## ..code-block:: nimrod - ## JoinPath("usr", "lib") - ## - ## results in: - ## - ## ..code-block:: nimrod - ## "usr/lib" - ## - ## If head is the empty string, tail is returned. - ## If tail is the empty string, head is returned. - if len(head) == 0: - result = tail - elif head[len(head)-1] in {DirSep, AltSep}: - if tail[0] in {DirSep, AltSep}: - result = head & copy(tail, 1) - else: - result = head & tail - else: - if tail[0] in {DirSep, AltSep}: - result = head & tail - else: - result = head & DirSep & tail - -proc JoinPath*(parts: openarray[string]): string {.noSideEffect.} = - ## The same as `JoinPath(head, tail)`, but works with any number - ## of directory parts. - result = parts[0] - for i in 1..high(parts): - result = JoinPath(result, parts[i]) - -proc `/` * (head, tail: string): string {.noSideEffect.} = - ## The same as ``joinPath(head, tail)`` - return joinPath(head, tail) - -proc SplitPath*(path: string, head, tail: var string) {.noSideEffect, - deprecated.} = - ## **Deprecated since version 0.8.2**: use the version that returns a tuple - ## instead - var - sepPos = -1 - for i in countdown(len(path)-1, 0): - if path[i] in {dirsep, altsep}: - sepPos = i - break - if sepPos >= 0: - head = copy(path, 0, sepPos-1) - tail = copy(path, sepPos+1) - else: - head = "" - tail = path # make a string copy here - -proc SplitPath*(path: string): tuple[head, tail: string] {.noSideEffect.} = - ## Splits a directory into (head, tail), so that - ## ``JoinPath(head, tail) == path``. - ## - ## Examples: - ## .. code-block:: nimrod - ## SplitPath("usr/local/bin") -> ("usr/local", "bin") - ## SplitPath("usr/local/bin/") -> ("usr/local/bin", "") - ## SplitPath("bin") -> ("", "bin") - ## SplitPath("/bin") -> ("", "bin") - ## SplitPath("") -> ("", "") - var - sepPos = -1 - for i in countdown(len(path)-1, 0): - if path[i] in {dirsep, altsep}: - sepPos = i - break - if sepPos >= 0: - result.head = copy(path, 0, sepPos-1) - result.tail = copy(path, sepPos+1) - else: - result.head = "" - result.tail = path - -proc parentDir*(path: string): string {.noSideEffect.} = - ## Returns the parent directory of `path`. - ## - ## This is often the same as the ``head`` result of ``splitPath``. - ## If there is no parent, ``path`` is returned. - ## Example: ``parentDir("/usr/local/bin") == "/usr/local"``. - ## Example: ``parentDir("/usr/local/bin/") == "/usr/local"``. - var - sepPos = -1 - q = 1 - if path[len(path)-1] in {dirsep, altsep}: - q = 2 - for i in countdown(len(path)-q, 0): - if path[i] in {dirsep, altsep}: - sepPos = i - break - if sepPos >= 0: - result = copy(path, 0, sepPos-1) - else: - result = path - -proc `/../` * (head, tail: string): string {.noSideEffect.} = - ## The same as ``parentDir(head) / tail`` - return parentDir(head) / tail - -proc normExt(ext: string): string = - if ext == "" or ext[0] == extSep: result = ext # no copy needed here - else: result = extSep & ext - -proc searchExtPos(s: string): int = - # BUGFIX: do not search until 0! .DS_Store is no file extension! - result = -1 - for i in countdown(len(s)-1, 1): - if s[i] == extsep: - result = i - break - elif s[i] in {dirsep, altsep}: - break # do not skip over path - -proc splitFile*(path: string): tuple[dir, name, ext: string] {.noSideEffect.} = - ## Splits a filename into (dir, filename, extension). - ## `dir` does not end in `DirSep`. - ## `extension` includes the leading dot. - ## - ## Example: - ## - ## .. code-block:: nimrod - ## var (dir, name, ext) = splitFile("usr/local/nimrodc.html") - ## assert dir == "usr/local" - ## assert name == "nimrodc" - ## assert ext == ".html" - ## - ## If `path` has no extension, `ext` is the empty string. - ## If `path` has no directory component, `dir` is the empty string. - ## If `path` has no filename component, `name` and `ext` are empty strings. - if path.len == 0 or path[path.len-1] in {dirSep, altSep}: - result = (path, "", "") - else: - var sepPos = -1 - var dotPos = path.len - for i in countdown(len(path)-1, 0): - if path[i] == ExtSep: - if dotPos == path.len and i > 0: dotPos = i - elif path[i] in {dirsep, altsep}: - sepPos = i - break - result.dir = copy(path, 0, sepPos-1) - result.name = copy(path, sepPos+1, dotPos-1) - result.ext = copy(path, dotPos) - -proc extractDir*(path: string): string {.noSideEffect, deprecated.} = - ## Extracts the directory of a given path. This is almost the - ## same as the `head` result of `splitPath`, except that - ## ``extractDir("/usr/lib/") == "/usr/lib/"``. - ## **Deprecated since version 0.8.2**: Use ``splitFile(path).dir`` instead. - result = splitFile(path).dir - -proc extractFilename*(path: string): string {.noSideEffect.} = - ## Extracts the filename of a given `path`. This is the same as - ## ``name & ext`` from ``splitFile(path)``. - if path.len == 0 or path[path.len-1] in {dirSep, altSep}: - result = "" - else: - result = splitPath(path).tail - -proc expandFilename*(filename: string): string = - ## Returns the full path of `filename`, raises EOS in case of an error. - when defined(windows): - var unused: cstring - result = newString(3072) - var L = GetFullPathNameA(filename, 3072'i32, result, unused) - if L <= 0'i32 or L >= 3072'i32: OSError() - setLen(result, L) - else: - var res = realpath(filename, nil) - if res == nil: OSError() - result = $res - c_free(res) - -proc SplitFilename*(filename: string, name, extension: var string) {. - noSideEffect, deprecated.} = - ## Splits a filename into (name, extension), so that - ## ``name & extension == filename``. - ## - ## Example: After ``SplitFilename("usr/local/nimrodc.html", name, ext)``, - ## `name` is "usr/local/nimrodc" and `ext` is ".html". - ## If the file has no extension, extension is the empty string. - ## **Deprecated since version 0.8.2**: Use ``splitFile(filename)`` instead. - var extPos = searchExtPos(filename) - if extPos >= 0: - name = copy(filename, 0, extPos-1) - extension = copy(filename, extPos) - else: - name = filename # make a string copy here - extension = "" - -proc extractFileExt*(filename: string): string {.noSideEffect, deprecated.} = - ## Extracts the file extension of a given `filename`. This is the - ## same as the `extension` result of `splitFilename`. - ## **Deprecated since version 0.8.2**: Use ``splitFile(filename).ext`` - ## instead. - result = splitFile(filename).ext - -proc extractFileTrunk*(filename: string): string {.noSideEffect, deprecated.} = - ## Extracts the file name of a given `filename`. This removes any - ## directory information and the file extension. - ## **Deprecated since version 0.8.2**: Use ``splitFile(path).name`` instead. - result = splitFile(filename).name - -proc ChangeFileExt*(filename, ext: string): string {.noSideEffect.} = - ## Changes the file extension to `ext`. - ## - ## If the `filename` has no extension, `ext` will be added. - ## If `ext` == "" then any extension is removed. - ## `Ext` should be given without the leading '.', because some - ## filesystems may use a different character. (Although I know - ## of none such beast.) - var extPos = searchExtPos(filename) - if extPos < 0: result = filename & normExt(ext) - else: result = copy(filename, 0, extPos-1) & normExt(ext) - -proc addFileExt*(filename, ext: string): string {.noSideEffect.} = - ## Adds the file extension `ext` to `filename`, unless - ## `filename` already has an extension. - ## - ## `Ext` should be given without the leading '.', because some - ## filesystems may use a different character. - ## (Although I know of none such beast.) - var extPos = searchExtPos(filename) - if extPos < 0: result = filename & normExt(ext) - else: result = filename - -proc AppendFileExt*(filename, ext: string): string {. - noSideEffect, deprecated.} = - ## **Deprecated since version 0.8.2**: Use `addFileExt` instead. - result = addFileExt(filename, ext) - -proc cmpPaths*(pathA, pathB: string): int {.noSideEffect.} = - ## Compares two paths. - ## - ## On a case-sensitive filesystem this is done - ## case-sensitively otherwise case-insensitively. Returns: - ## - ## | 0 iff pathA == pathB - ## | < 0 iff pathA < pathB - ## | > 0 iff pathA > pathB - if FileSystemCaseSensitive: - result = cmp(pathA, pathB) - else: - result = cmpIgnoreCase(pathA, pathB) - -proc sameFile*(path1, path2: string): bool = - ## Returns True if both pathname arguments refer to the same file or - ## directory (as indicated by device number and i-node number). - ## Raises an exception if an os.stat() call on either pathname fails. - when defined(Windows): - var - a, b: TWin32FindData - var resA = findfirstFileA(path1, a) - var resB = findfirstFileA(path2, b) - if resA != -1 and resB != -1: - result = $a.cFileName == $b.cFileName - else: - # work around some ``findfirstFileA`` bugs - result = cmpPaths(path1, path2) == 0 - if resA != -1: findclose(resA) - if resB != -1: findclose(resB) - else: - var - a, b: TStat - if stat(path1, a) < 0'i32 or stat(path2, b) < 0'i32: - result = cmpPaths(path1, path2) == 0 # be consistent with Windows - else: - result = a.st_dev == b.st_dev and a.st_ino == b.st_ino - -proc sameFileContent*(path1, path2: string): bool = - ## Returns True if both pathname arguments refer to files with identical - ## binary content. - const - bufSize = 8192 # 8K buffer - var - a, b: TFile - if not open(a, path1): return false - if not open(b, path2): - close(a) - return false - var bufA = alloc(bufsize) - var bufB = alloc(bufsize) - while True: - var readA = readBuffer(a, bufA, bufsize) - var readB = readBuffer(b, bufB, bufsize) - if readA != readB: - result = false - break - if readA == 0: - result = true - break - result = equalMem(bufA, bufB, readA) - if not result: break - if readA != bufSize: break # end of file - dealloc(bufA) - dealloc(bufB) - close(a) - close(b) - -proc copyFile*(dest, source: string) = - ## Copies a file from `source` to `dest`. If this fails, - ## `EOS` is raised. - when defined(Windows): - if CopyFileA(source, dest, 0'i32) == 0'i32: OSError() - else: - # generic version of copyFile which works for any platform: - const - bufSize = 8192 # 8K buffer - var - d, s: TFile - if not open(s, source): OSError() - if not open(d, dest, fmWrite): - close(s) - OSError() - var - buf: Pointer = alloc(bufsize) - bytesread, byteswritten: int - while True: - bytesread = readBuffer(s, buf, bufsize) - byteswritten = writeBuffer(d, buf, bytesread) - if bytesread != bufSize: break - if bytesread != bytesWritten: OSError() - dealloc(buf) - close(s) - close(d) - -proc moveFile*(dest, source: string) = - ## Moves a file from `source` to `dest`. If this fails, `EOS` is raised. - if crename(source, dest) != 0'i32: OSError() - -proc removeFile*(file: string) = - ## Removes the `file`. If this fails, `EOS` is raised. - if cremove(file) != 0'i32: OSError() - -proc executeShellCommand*(command: string): int {.deprecated.} = - ## **Deprecated since version 0.8.2**: Use `execShellCmd` instead. - result = csystem(command) - -proc execShellCmd*(command: string): int = - ## Executes a shell command. - ## - ## Command has the form 'program args' where args are the command - ## line arguments given to program. The proc returns the error code - ## of the shell when it has finished. The proc does not return until - ## the process has finished. To execute a program without having a - ## shell involved, use the `execProcess` proc of the `osproc` - ## module. - result = csystem(command) - -var - envComputed: bool = false - environment: seq[string] = @[] - -when defined(windows): - # because we support Windows GUI applications, things get really - # messy here... - proc strEnd(cstr: CString, c = 0'i32): CString {. - importc: "strchr", header: "<string.h>".} - - proc getEnvVarsC() = - if not envComputed: - var - env = getEnvironmentStringsA() - e = env - if e == nil: return # an error occured - while True: - var eend = strEnd(e) - add(environment, $e) - e = cast[CString](cast[TAddress](eend)+1) - if eend[1] == '\0': break - envComputed = true - discard FreeEnvironmentStringsA(env) - -else: - var - gEnv {.importc: "gEnv".}: ptr array [0..10_000, CString] - - proc getEnvVarsC() = - # retrieves the variables of char** env of C's main proc - if not envComputed: - var i = 0 - while True: - if gEnv[i] == nil: break - add environment, $gEnv[i] - inc(i) - envComputed = true - -proc findEnvVar(key: string): int = - getEnvVarsC() - var temp = key & '=' - for i in 0..high(environment): - if startsWith(environment[i], temp): return i - return -1 - -proc getEnv*(key: string): string = - ## Returns the value of the environment variable named `key`. - ## - ## If the variable does not exist, "" is returned. To distinguish - ## whether a variable exists or it's value is just "", call - ## `existsEnv(key)`. - var i = findEnvVar(key) - if i >= 0: - return copy(environment[i], find(environment[i], '=')+1) - else: - var env = cgetenv(key) - if env == nil: return "" - result = $env - -proc existsEnv*(key: string): bool = - ## Checks whether the environment variable named `key` exists. - ## Returns true if it exists, false otherwise. - if cgetenv(key) != nil: return true - else: return findEnvVar(key) >= 0 - -proc putEnv*(key, val: string) = - ## Sets the value of the environment variable named `key` to `val`. - ## If an error occurs, `EInvalidEnvVar` is raised. - - # Note: by storing the string in the environment sequence, - # we gurantee that we don't free the memory before the program - # ends (this is needed for POSIX compliance). It is also needed so that - # the process itself may access its modified environment variables! - var indx = findEnvVar(key) - if indx >= 0: - environment[indx] = key & '=' & val - else: - add environment, (key & '=' & val) - indx = high(environment) - when defined(unix): - if cputenv(environment[indx]) != 0'i32: - OSError() - else: - if SetEnvironmentVariableA(key, val) == 0'i32: - OSError() - -iterator iterOverEnvironment*(): tuple[key, value: string] {.deprecated.} = - ## Iterate over all environments variables. In the first component of the - ## tuple is the name of the current variable stored, in the second its value. - ## **Deprecated since version 0.8.2**: Use `envPairs` instead. - getEnvVarsC() - for i in 0..high(environment): - var p = find(environment[i], '=') - yield (copy(environment[i], 0, p-1), copy(environment[i], p+1)) - -iterator envPairs*(): tuple[key, value: string] = - ## Iterate over all environments variables. In the first component of the - ## tuple is the name of the current variable stored, in the second its value. - getEnvVarsC() - for i in 0..high(environment): - var p = find(environment[i], '=') - yield (copy(environment[i], 0, p-1), copy(environment[i], p+1)) - -iterator walkFiles*(pattern: string): string = - ## Iterate over all the files that match the `pattern`. - ## - ## `pattern` is OS dependant, but at least the "\*.ext" - ## notation is supported. - when defined(windows): - var - f: TWin32FindData - res: int - res = findfirstFileA(pattern, f) - if res != -1: - while true: - if f.cFileName[0] != '.': - yield splitFile(pattern).dir / extractFilename($f.cFileName) - if findnextFileA(res, f) == 0'i32: break - findclose(res) - else: # here we use glob - var - f: TGlob - res: int - f.gl_offs = 0 - f.gl_pathc = 0 - f.gl_pathv = nil - res = glob(pattern, 0, nil, addr(f)) - if res == 0: - for i in 0.. f.gl_pathc - 1: - assert(f.gl_pathv[i] != nil) - yield $f.gl_pathv[i] - globfree(addr(f)) - -type - TPathComponent* = enum ## Enumeration specifying a path component. - pcFile, ## path refers to a file - pcLinkToFile, ## path refers to a symbolic link to a file - pcDir, ## path refers to a directory - pcLinkToDir ## path refers to a symbolic link to a directory - -const - pcDirectory* {.deprecated.} = pcDir ## deprecated alias - pcLinkToDirectory* {.deprecated.} = pcLinkToDir ## deprecated alias - -iterator walkDir*(dir: string): tuple[kind: TPathComponent, path: string] = - ## walks over the directory `dir` and yields for each directory or file in - ## `dir`. The component type and full path for each item is returned. - ## Walking is not recursive. - ## Example: This directory structure:: - ## dirA / dirB / fileB1.txt - ## / dirC - ## / fileA1.txt - ## / fileA2.txt - ## - ## and this code: - ## - ## .. code-block:: Nimrod - ## for kind, path in walkDir("dirA"): - ## echo(path) - ## - ## produces this output (though not necessarily in this order!):: - ## dirA/dirB - ## dirA/dirC - ## dirA/fileA1.txt - ## dirA/fileA2.txt - when defined(windows): - var f: TWIN32_Find_Data - var h = findfirstFileA(dir / "*", f) - if h != -1: - while true: - var k = pcFile - if f.cFilename[0] != '.': - if (f.dwFileAttributes and FILE_ATTRIBUTE_DIRECTORY) != 0'i32: - k = pcDir - yield (k, dir / extractFilename($f.cFilename)) - if findnextFileA(h, f) == 0'i32: break - findclose(h) - else: - var d = openDir(dir) - if d != nil: - while true: - var x = readDir(d) - if x == nil: break - var y = $x.d_name - if y != "." and y != "..": - var s: TStat - y = dir / y - if stat(y, s) < 0'i32: break - var k = pcFile - if S_ISDIR(s.st_mode): k = pcDir - if S_ISLNK(s.st_mode): k = succ(k) - yield (k, y) - discard closeDir(d) - -iterator walkDirRec*(dir: string, filter={pcFile, pcDir}): string = - ## walks over the directory `dir` and yields for each file in `dir`. The - ## full path for each file is returned. - ## Walking is recursive. `filter` controls the behaviour of the iterator: - ## - ## --------------------- --------------------------------------------- - ## filter meaning - ## --------------------- --------------------------------------------- - ## ``pcFile`` yield real files - ## ``pcLinkToFile`` yield symbolic links to files - ## ``pcDir`` follow real directories - ## ``pcLinkToDir`` follow symbolic links to directories - ## --------------------- --------------------------------------------- - ## - var stack = @[dir] - while stack.len > 0: - for k,p in walkDir(stack.pop()): - if k in filter: - case k - of pcFile, pcLinkToFile: yield p - of pcDir, pcLinkToDir: stack.add(p) - -proc rawRemoveDir(dir: string) = - when defined(windows): - if RemoveDirectoryA(dir) == 0'i32: OSError() - else: - if rmdir(dir) != 0'i32: OSError() - -proc removeDir*(dir: string) = - ## Removes the directory `dir` including all subdirectories and files - ## in `dir` (recursively). If this fails, `EOS` is raised. - for kind, path in walkDir(dir): - case kind - of pcFile, pcLinkToFile, pcLinkToDir: removeFile(path) - of pcDir: removeDir(path) - rawRemoveDir(dir) - -proc rawCreateDir(dir: string) = - when defined(unix): - if mkdir(dir, 0o711) != 0'i32 and errno != EEXIST: - OSError() - else: - if CreateDirectoryA(dir, nil) == 0'i32 and GetLastError() != 183'i32: - OSError() - -proc createDir*(dir: string) = - ## Creates the directory `dir`. - ## - ## The directory may contain several subdirectories that do not exist yet. - ## The full path is created. If this fails, `EOS` is raised. It does **not** - ## fail if the path already exists because for most usages this does not - ## indicate an error. - for i in 1.. dir.len-1: - if dir[i] in {dirsep, altsep}: rawCreateDir(copy(dir, 0, i-1)) - rawCreateDir(dir) - -proc parseCmdLine*(c: string): seq[string] = - ## Splits a command line into several components; components are separated by - ## whitespace unless the whitespace occurs within ``"`` or ``'`` quotes. - ## This proc is only occassionally useful, better use the `parseopt` module. - result = @[] - var i = 0 - var a = "" - while true: - setLen(a, 0) - while c[i] >= '\1' and c[i] <= ' ': inc(i) # skip whitespace - case c[i] - of '\'', '\"': - var delim = c[i] - inc(i) # skip ' or " - while c[i] != '\0' and c[i] != delim: - add a, c[i] - inc(i) - if c[i] != '\0': inc(i) - of '\0': break - else: - while c[i] > ' ': - add(a, c[i]) - inc(i) - add(result, a) - -type - TFilePermission* = enum ## file access permission; modelled after UNIX - fpUserExec, ## execute access for the file owner - fpUserWrite, ## write access for the file owner - fpUserRead, ## read access for the file owner - fpGroupExec, ## execute access for the group - fpGroupWrite, ## write access for the group - fpGroupRead, ## read access for the group - fpOthersExec, ## execute access for others - fpOthersWrite, ## write access for others - fpOthersRead ## read access for others - -proc getFilePermissions*(filename: string): set[TFilePermission] = - ## retrieves file permissions for `filename`. `OSError` is raised in case of - ## an error. On Windows, only the ``readonly`` flag is checked, every other - ## permission is available in any case. - when defined(posix): - var a: TStat - if stat(filename, a) < 0'i32: OSError() - result = {} - if (a.st_mode and S_IRUSR) != 0'i32: result.incl(fpUserRead) - if (a.st_mode and S_IWUSR) != 0'i32: result.incl(fpUserWrite) - if (a.st_mode and S_IXUSR) != 0'i32: result.incl(fpUserExec) - - if (a.st_mode and S_IRGRP) != 0'i32: result.incl(fpGroupRead) - if (a.st_mode and S_IWGRP) != 0'i32: result.incl(fpGroupWrite) - if (a.st_mode and S_IXGRP) != 0'i32: result.incl(fpGroupExec) - - if (a.st_mode and S_IROTH) != 0'i32: result.incl(fpOthersRead) - if (a.st_mode and S_IWOTH) != 0'i32: result.incl(fpOthersWrite) - if (a.st_mode and S_IXOTH) != 0'i32: result.incl(fpOthersExec) - else: - var res = GetFileAttributesA(filename) - if res == -1'i32: OSError() - if (res and FILE_ATTRIBUTE_READONLY) != 0'i32: - result = {fpUserExec, fpUserRead, fpGroupExec, fpGroupRead, - fpOthersExec, fpOthersRead} - else: - result = {fpUserExec..fpOthersRead} - -proc setFilePermissions*(filename: string, permissions: set[TFilePermission]) = - ## sets the file permissions for `filename`. `OSError` is raised in case of - ## an error. On Windows, only the ``readonly`` flag is changed, depending on - ## ``fpUserWrite``. - when defined(posix): - var p = 0'i32 - if fpUserRead in permissions: p = p or S_IRUSR - if fpUserWrite in permissions: p = p or S_IWUSR - if fpUserExec in permissions: p = p or S_IXUSR - - if fpGroupRead in permissions: p = p or S_IRGRP - if fpGroupWrite in permissions: p = p or S_IWGRP - if fpGroupExec in permissions: p = p or S_IXGRP - - if fpOthersRead in permissions: p = p or S_IROTH - if fpOthersWrite in permissions: p = p or S_IWOTH - if fpOthersExec in permissions: p = p or S_IXOTH - - if chmod(filename, p) != 0: OSError() - else: - var res = GetFileAttributesA(filename) - if res == -1'i32: OSError() - if fpUserWrite in permissions: - res = res and not FILE_ATTRIBUTE_READONLY - else: - res = res or FILE_ATTRIBUTE_READONLY - if SetFileAttributesA(filename, res) != 0'i32: - OSError() - -proc inclFilePermissions*(filename: string, - permissions: set[TFilePermission]) = - ## a convenience procedure for: - ## - ## .. code-block:: nimrod - ## setFilePermissions(filename, getFilePermissions(filename)+permissions) - setFilePermissions(filename, getFilePermissions(filename)+permissions) - -proc exclFilePermissions*(filename: string, - permissions: set[TFilePermission]) = - ## a convenience procedure for: - ## - ## .. code-block:: nimrod - ## setFilePermissions(filename, getFilePermissions(filename)-permissions) - setFilePermissions(filename, getFilePermissions(filename)-permissions) - -proc getHomeDir*(): string = - ## Returns the home directory of the current user. - when defined(windows): return getEnv("USERPROFILE") & "\\" - else: return getEnv("HOME") & "/" - -proc getConfigDir*(): string = - ## Returns the config directory of the current user for applications. - when defined(windows): return getEnv("APPDATA") & "\\" - else: return getEnv("HOME") & "/.config/" - -when defined(windows): - # Since we support GUI applications with Nimrod, we sometimes generate - # a WinMain entry proc. But a WinMain proc has no access to the parsed - # command line arguments. The way to get them differs. Thus we parse them - # ourselves. This has the additional benefit that the program's behaviour - # is always the same -- independent of the used C compiler. - var - ownArgv: seq[string] - - proc paramStr(i: int): string = - if isNil(ownArgv): ownArgv = parseCmdLine($getCommandLineA()) - return ownArgv[i] - - proc paramCount(): int = - if isNil(ownArgv): ownArgv = parseCmdLine($getCommandLineA()) - result = ownArgv.len-1 - -else: - var - cmdCount {.importc: "cmdCount".}: cint - cmdLine {.importc: "cmdLine".}: cstringArray - - proc paramStr(i: int): string = - if i < cmdCount and i >= 0: return $cmdLine[i] - raise newException(EInvalidIndex, "invalid index") - - proc paramCount(): int = return cmdCount-1 - -when defined(linux) or defined(solaris) or defined(bsd) or defined(aix): - proc getApplAux(procPath: string): string = - result = newString(256) - var len = readlink(procPath, result, 256) - if len > 256: - result = newString(len+1) - len = readlink(procPath, result, len) - setlen(result, len) - -when defined(macosx): - # a really hacky solution: since we like to include 2 headers we have to - # define two procs which in reality are the same - proc getExecPath1(c: cstring, size: var int32) {. - importc: "_NSGetExecutablePath", header: "<sys/param.h>".} - proc getExecPath2(c: cstring, size: var int32): bool {. - importc: "_NSGetExecutablePath", header: "<mach-o/dyld.h>".} - -proc getApplicationFilename*(): string = - ## Returns the filename of the application's executable. - - # Linux: /proc/<pid>/exe - # Solaris: - # /proc/<pid>/object/a.out (filename only) - # /proc/<pid>/path/a.out (complete pathname) - # *BSD (and maybe Darwin too): - # /proc/<pid>/file - when defined(windows): - result = newString(256) - var len = getModuleFileNameA(0, result, 256) - setlen(result, int(len)) - elif defined(linux) or defined(aix): - result = getApplAux("/proc/self/exe") - elif defined(solaris): - result = getApplAux("/proc/" & $getpid() & "/path/a.out") - elif defined(bsd): - result = getApplAux("/proc/" & $getpid() & "/file") - elif defined(macosx): - var size: int32 - getExecPath1(nil, size) - result = newString(int(size)) - if getExecPath2(result, size): - result = "" # error! - else: - # little heuristic that may work on other POSIX-like systems: - result = getEnv("_") - if len(result) == 0: - result = ParamStr(0) # POSIX guaranties that this contains the executable - # as it has been executed by the calling process - if len(result) > 0 and result[0] != DirSep: # not an absolute path? - # iterate over any path in the $PATH environment variable - for p in split(getEnv("PATH"), {PathSep}): - var x = joinPath(p, result) - if ExistsFile(x): return x - -proc getApplicationDir*(): string = - ## Returns the directory of the application's executable. - result = splitFile(getApplicationFilename()).dir - -proc sleep*(milsecs: int) = - ## sleeps `milsecs` milliseconds. - when defined(windows): - winlean.sleep(int32(milsecs)) - else: - var a, b: Ttimespec - a.tv_sec = TTime(milsecs div 1000) - a.tv_nsec = (milsecs mod 1000) * 1000 - discard posix.nanosleep(a, b) - -{.pop.} diff --git a/nimlib/pure/osproc.nim b/nimlib/pure/osproc.nim deleted file mode 100755 index d76825531..000000000 --- a/nimlib/pure/osproc.nim +++ /dev/null @@ -1,543 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2009 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module implements an advanced facility for executing OS processes -## and process communication. - -import - strutils, os, strtabs, streams - -when defined(windows): - import winlean -else: - import posix - -type - TProcess = object of TObject - when defined(windows): - FProcessHandle: Thandle - inputHandle, outputHandle, errorHandle: TFileHandle - else: - inputHandle, outputHandle, errorHandle: TFileHandle - id: cint - exitCode: cint - - PProcess* = ref TProcess ## represents an operating system process - - TProcessOption* = enum ## options that can be passed `startProcess` - poEchoCmd, ## echo the command before execution - poUseShell, ## use the shell to execute the command; NOTE: This - ## often creates a security whole! - poStdErrToStdOut, ## merge stdout and stderr to the stdout stream - poParentStreams ## use the parent's streams - -proc execProcess*(command: string, - options: set[TProcessOption] = {poStdErrToStdOut, - poUseShell}): string - ## A convience procedure that executes ``command`` with ``startProcess`` - ## and returns its output as a string. - -proc executeProcess*(command: string, - options: set[TProcessOption] = {poStdErrToStdOut, - poUseShell}): string {. - deprecated.} = - ## **Deprecated since version 0.8.2**: Use `execProcess` instead. - result = execProcess(command, options) - -proc execCmd*(command: string): int - ## Executes ``command`` and returns its error code. Standard input, output, - ## error streams are inherited from the calling process. - -proc executeCommand*(command: string): int {.deprecated.} = - ## **Deprecated since version 0.8.2**: Use `execCmd` instead. - result = execCmd(command) - - -proc startProcess*(command: string, - workingDir: string = "", - args: openarray[string] = [], - env: PStringTable = nil, - options: set[TProcessOption] = {poStdErrToStdOut}): PProcess - ## Starts a process. `Command` is the executable file, `workingDir` is the - ## process's working directory. If ``workingDir == ""`` the current directory - ## is used. `args` are the command line arguments that are passed to the - ## process. On many operating systems, the first command line argument is the - ## name of the executable. `args` should not contain this argument! - ## `env` is the environment that will be passed to the process. - ## If ``env == nil`` the environment is inherited of - ## the parent process. `options` are additional flags that may be passed - ## to `startProcess`. See the documentation of ``TProcessOption`` for the - ## meaning of these flags. - ## - ## Return value: The newly created process object. Nil is never returned, - ## but ``EOS`` is raised in case of an error. - -proc suspend*(p: PProcess) - ## Suspends the process `p`. - -proc resume*(p: PProcess) - ## Resumes the process `p`. - -proc terminate*(p: PProcess) - ## Terminates the process `p`. - -proc running*(p: PProcess): bool - ## Returns true iff the process `p` is still running. Returns immediately. - -proc processID*(p: PProcess): int = - ## returns `p`'s process ID. - return p.id - -proc waitForExit*(p: PProcess): int - ## waits for the process to finish and returns `p`'s error code. - -proc inputStream*(p: PProcess): PStream - ## returns ``p``'s input stream for writing to - -proc outputStream*(p: PProcess): PStream - ## returns ``p``'s output stream for reading from - -proc errorStream*(p: PProcess): PStream - ## returns ``p``'s output stream for reading from - -when defined(macosx) or defined(bsd): - const - CTL_HW = 6 - HW_AVAILCPU = 25 - HW_NCPU = 3 - proc sysctl(x: ptr array[0..3, cint], y: cint, z: pointer, - a: var int, b: pointer, c: int): cint {. - importc: "sysctl", header: "<sys/sysctl.h>".} - -proc countProcessors*(): int = - ## returns the numer of the processors/cores the machine has. - ## Returns 0 if it cannot be determined. - when defined(windows): - var x = getenv("NUMBER_OF_PROCESSORS") - if x.len > 0: result = parseInt(x) - elif defined(macosx) or defined(bsd): - var - mib: array[0..3, cint] - len, numCPU: int - mib[0] = CTL_HW - mib[1] = HW_AVAILCPU - len = sizeof(numCPU) - discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) - if numCPU < 1: - mib[1] = HW_NCPU - discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) - result = numCPU - elif defined(hpux): - result = mpctl(MPC_GETNUMSPUS, nil, nil) - elif defined(irix): - var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "<unistd.h>".}: cint - result = sysconf(SC_NPROC_ONLN) - else: - result = sysconf(SC_NPROCESSORS_ONLN) - if result <= 0: result = 1 - -proc startProcessAux(cmd: string, options: set[TProcessOption]): PProcess = - var c = parseCmdLine(cmd) - var a: seq[string] = @[] # slicing is not yet implemented :-( - for i in 1 .. c.len-1: add(a, c[i]) - result = startProcess(command=c[0], args=a, options=options) - -proc execProcesses*(cmds: openArray[string], - options = {poStdErrToStdOut, poParentStreams}, - n = countProcessors()): int = - ## executes the commands `cmds` in parallel. Creates `n` processes - ## that execute in parallel. The highest return value of all processes - ## is returned. - assert n > 0 - if n > 1: - var q: seq[PProcess] - newSeq(q, n) - var m = min(n, cmds.len) - for i in 0..m-1: - q[i] = startProcessAux(cmds[i], options=options) - when defined(noBusyWaiting): - var r = 0 - for i in m..high(cmds): - when defined(debugExecProcesses): - var err = "" - var outp = outputStream(q[r]) - while running(q[r]) or not outp.atEnd(outp): - err.add(outp.readLine()) - err.add("\n") - echo(err) - result = max(waitForExit(q[r]), result) - q[r] = startProcessAux(cmds[i], options=options) - r = (r + 1) mod n - else: - var i = m - while i <= high(cmds): - sleep(50) - for r in 0..n-1: - if not running(q[r]): - #echo(outputStream(q[r]).readLine()) - result = max(waitForExit(q[r]), result) - q[r] = startProcessAux(cmds[i], options=options) - inc(i) - if i > high(cmds): break - for i in 0..m-1: - result = max(waitForExit(q[i]), result) - else: - for i in 0..high(cmds): - var p = startProcessAux(cmds[i], options=options) - result = max(waitForExit(p), result) - -when true: - nil -else: - proc startGUIProcess*(command: string, - workingDir: string = "", - args: openarray[string] = [], - env: PStringTable = nil, - x = -1, - y = -1, - width = -1, - height = -1): PProcess - -proc execProcess(command: string, - options: set[TProcessOption] = {poStdErrToStdOut, - poUseShell}): string = - var p = startProcessAux(command, options=options) - var outp = outputStream(p) - result = "" - while running(p) or not outp.atEnd(outp): - result.add(outp.readLine()) - result.add("\n") - -when false: - proc deallocCStringArray(a: cstringArray) = - var i = 0 - while a[i] != nil: - dealloc(a[i]) - inc(i) - dealloc(a) - -when defined(Windows): - # We need to implement a handle stream for Windows: - type - PFileHandleStream = ref TFileHandleStream - TFileHandleStream = object of TStream - handle: THandle - atTheEnd: bool - - proc hsClose(s: PFileHandleStream) = nil # nothing to do here - proc hsAtEnd(s: PFileHandleStream): bool = return s.atTheEnd - - proc hsReadData(s: PFileHandleStream, buffer: pointer, bufLen: int): int = - if s.atTheEnd: return 0 - var br: int32 - var a = winlean.ReadFile(s.handle, buffer, bufLen, br, nil) - # TRUE and zero bytes returned (EOF). - # TRUE and n (>0) bytes returned (good data). - # FALSE and bytes returned undefined (system error). - if a == 0 and br != 0: OSError() - s.atTheEnd = br < bufLen - result = br - - proc hsWriteData(s: PFileHandleStream, buffer: pointer, bufLen: int) = - var bytesWritten: int32 - var a = winlean.writeFile(s.handle, buffer, bufLen, bytesWritten, nil) - if a == 0: OSError() - - proc newFileHandleStream(handle: THandle): PFileHandleStream = - new(result) - result.handle = handle - result.close = hsClose - result.atEnd = hsAtEnd - result.readData = hsReadData - result.writeData = hsWriteData - - proc buildCommandLine(a: string, args: openarray[string]): cstring = - var res = quoteIfContainsWhite(a) - for i in 0..high(args): - res.add(' ') - res.add(quoteIfContainsWhite(args[i])) - result = cast[cstring](alloc0(res.len+1)) - copyMem(result, cstring(res), res.len) - - proc buildEnv(env: PStringTable): cstring = - var L = 0 - for key, val in pairs(env): inc(L, key.len + val.len + 2) - result = cast[cstring](alloc0(L+2)) - L = 0 - for key, val in pairs(env): - var x = key & "=" & val - copyMem(addr(result[L]), cstring(x), x.len+1) # copy \0 - inc(L, x.len+1) - - #proc open_osfhandle(osh: THandle, mode: int): int {. - # importc: "_open_osfhandle", header: "<fcntl.h>".} - - #var - # O_WRONLY {.importc: "_O_WRONLY", header: "<fcntl.h>".}: int - # O_RDONLY {.importc: "_O_RDONLY", header: "<fcntl.h>".}: int - - proc CreatePipeHandles(Rdhandle, WrHandle: var THandle) = - var piInheritablePipe: TSecurityAttributes - piInheritablePipe.nlength = SizeOF(TSecurityAttributes) - piInheritablePipe.lpSecurityDescriptor = nil - piInheritablePipe.Binherithandle = 1 - if CreatePipe(Rdhandle, Wrhandle, piInheritablePipe, 1024) == 0'i32: - OSError() - - proc fileClose(h: THandle) {.inline.} = - if h > 4: discard CloseHandle(h) - - proc startProcess(command: string, - workingDir: string = "", - args: openarray[string] = [], - env: PStringTable = nil, - options: set[TProcessOption] = {poStdErrToStdOut}): PProcess = - var - SI: TStartupInfo - ProcInfo: TProcessInformation - success: int - hi, ho, he: THandle - new(result) - SI.cb = SizeOf(SI) - if poParentStreams notin options: - SI.dwFlags = STARTF_USESTDHANDLES # STARTF_USESHOWWINDOW or - CreatePipeHandles(SI.hStdInput, HI) - CreatePipeHandles(HO, Si.hStdOutput) - if poStdErrToStdOut in options: - SI.hStdError = SI.hStdOutput - HE = HO - else: - CreatePipeHandles(HE, Si.hStdError) - result.inputHandle = hi - result.outputHandle = ho - result.errorHandle = he - else: - SI.hStdError = GetStdHandle(STD_ERROR_HANDLE) - SI.hStdInput = GetStdHandle(STD_INPUT_HANDLE) - SI.hStdOutput = GetStdHandle(STD_OUTPUT_HANDLE) - result.inputHandle = si.hStdInput - result.outputHandle = si.hStdOutput - result.errorHandle = si.hStdError - - var cmdl: cstring - if false: # poUseShell in options: - cmdl = buildCommandLine(getEnv("COMSPEC"), @["/c", command] & args) - else: - cmdl = buildCommandLine(command, args) - var wd: cstring = nil - var e: cstring = nil - if len(workingDir) > 0: wd = workingDir - if env != nil: e = buildEnv(env) - if poEchoCmd in options: echo($cmdl) - success = winlean.CreateProcess(nil, - cmdl, nil, nil, 1, NORMAL_PRIORITY_CLASS, e, wd, SI, ProcInfo) - - if poParentStreams notin options: - FileClose(si.hStdInput) - FileClose(si.hStdOutput) - if poStdErrToStdOut notin options: - FileClose(si.hStdError) - - if e != nil: dealloc(e) - dealloc(cmdl) - if success == 0: OSError() - # Close the handle now so anyone waiting is woken: - discard closeHandle(procInfo.hThread) - result.FProcessHandle = procInfo.hProcess - result.id = procInfo.dwProcessID - - proc suspend(p: PProcess) = - discard SuspendThread(p.FProcessHandle) - - proc resume(p: PProcess) = - discard ResumeThread(p.FProcessHandle) - - proc running(p: PProcess): bool = - var x = waitForSingleObject(p.FProcessHandle, 50) - return x == WAIT_TIMEOUT - - proc terminate(p: PProcess) = - if running(p): - discard TerminateProcess(p.FProcessHandle, 0) - - proc waitForExit(p: PProcess): int = - discard WaitForSingleObject(p.FProcessHandle, Infinite) - var res: int32 - discard GetExitCodeProcess(p.FProcessHandle, res) - result = res - discard CloseHandle(p.FProcessHandle) - - proc inputStream(p: PProcess): PStream = - result = newFileHandleStream(p.inputHandle) - - proc outputStream(p: PProcess): PStream = - result = newFileHandleStream(p.outputHandle) - - proc errorStream(p: PProcess): PStream = - result = newFileHandleStream(p.errorHandle) - - proc execCmd(command: string): int = - var - SI: TStartupInfo - ProcInfo: TProcessInformation - process: THandle - L: int32 - SI.cb = SizeOf(SI) - SI.hStdError = GetStdHandle(STD_ERROR_HANDLE) - SI.hStdInput = GetStdHandle(STD_INPUT_HANDLE) - SI.hStdOutput = GetStdHandle(STD_OUTPUT_HANDLE) - if winlean.CreateProcess(nil, command, nil, nil, 0, - NORMAL_PRIORITY_CLASS, nil, nil, SI, ProcInfo) == 0: - OSError() - else: - Process = ProcInfo.hProcess - discard CloseHandle(ProcInfo.hThread) - if WaitForSingleObject(Process, INFINITE) != -1: - discard GetExitCodeProcess(Process, L) - result = int(L) - else: - result = -1 - discard CloseHandle(Process) - -else: - const - readIdx = 0 - writeIdx = 1 - - proc addCmdArgs(command: string, args: openarray[string]): string = - result = quoteIfContainsWhite(command) - for i in 0 .. high(args): - add(result, " ") - add(result, quoteIfContainsWhite(args[i])) - - proc toCStringArray(b, a: openarray[string]): cstringArray = - result = cast[cstringArray](alloc0((a.len + b.len + 1) * sizeof(cstring))) - for i in 0..high(b): - result[i] = cast[cstring](alloc(b[i].len+1)) - copyMem(result[i], cstring(b[i]), b[i].len+1) - for i in 0..high(a): - result[i+b.len] = cast[cstring](alloc(a[i].len+1)) - copyMem(result[i+b.len], cstring(a[i]), a[i].len+1) - - proc ToCStringArray(t: PStringTable): cstringArray = - result = cast[cstringArray](alloc0((t.len + 1) * sizeof(cstring))) - var i = 0 - for key, val in pairs(t): - var x = key & "=" & val - result[i] = cast[cstring](alloc(x.len+1)) - copyMem(result[i], addr(x[0]), x.len+1) - inc(i) - - proc startProcess(command: string, - workingDir: string = "", - args: openarray[string] = [], - env: PStringTable = nil, - options: set[TProcessOption] = {poStdErrToStdOut}): PProcess = - var - p_stdin, p_stdout, p_stderr: array [0..1, cint] - new(result) - result.exitCode = 3 # for ``waitForExit`` - if pipe(p_stdin) != 0'i32 or pipe(p_stdout) != 0'i32: - OSError("failed to create a pipe") - var Pid = fork() - if Pid < 0: - OSError("failed to fork process") - - if pid == 0: - ## child process: - discard close(p_stdin[writeIdx]) - if dup2(p_stdin[readIdx], readIdx) < 0: OSError() - discard close(p_stdout[readIdx]) - if dup2(p_stdout[writeIdx], writeIdx) < 0: OSError() - if poStdErrToStdOut in options: - if dup2(p_stdout[writeIdx], 2) < 0: OSError() - else: - if pipe(p_stderr) != 0'i32: OSError("failed to create a pipe") - discard close(p_stderr[readIdx]) - if dup2(p_stderr[writeIdx], 2) < 0: OSError() - - if workingDir.len > 0: - os.setCurrentDir(workingDir) - if poUseShell notin options: - var a = toCStringArray([extractFilename(command)], args) - if env == nil: - discard execv(command, a) - else: - discard execve(command, a, ToCStringArray(env)) - else: - var x = addCmdArgs(command, args) - var a = toCStringArray(["sh", "-c"], [x]) - if env == nil: - discard execv("/bin/sh", a) - else: - discard execve("/bin/sh", a, ToCStringArray(env)) - # too risky to raise an exception here: - quit("execve call failed: " & $strerror(errno)) - # Parent process. Copy process information. - if poEchoCmd in options: - echo(command & " " & join(args, " ")) - result.id = pid - - result.inputHandle = p_stdin[writeIdx] - result.outputHandle = p_stdout[readIdx] - if poStdErrToStdOut in options: - result.errorHandle = result.outputHandle - else: - result.errorHandle = p_stderr[readIdx] - discard close(p_stderr[writeIdx]) - discard close(p_stdin[readIdx]) - discard close(p_stdout[writeIdx]) - - proc suspend(p: PProcess) = - discard kill(p.id, SIGSTOP) - - proc resume(p: PProcess) = - discard kill(p.id, SIGCONT) - - proc running(p: PProcess): bool = - result = waitPid(p.id, p.exitCode, WNOHANG) == int(p.id) - - proc terminate(p: PProcess) = - if kill(p.id, SIGTERM) == 0'i32: - if running(p): discard kill(p.id, SIGKILL) - - proc waitForExit(p: PProcess): int = - #if waitPid(p.id, p.exitCode, 0) == int(p.id): - # ``waitPid`` fails if the process is not running anymore. But then - # ``running`` probably set ``p.exitCode`` for us. Since ``p.exitCode`` is - # initialized with 3, wrong success exit codes are prevented. - var oldExitCode = p.exitCode - if waitPid(p.id, p.exitCode, 0) < 0: - # failed, so restore old exitCode - p.exitCode = oldExitCode - result = int(p.exitCode) - - proc inputStream(p: PProcess): PStream = - var f: TFile - if not open(f, p.inputHandle, fmWrite): OSError() - result = newFileStream(f) - - proc outputStream(p: PProcess): PStream = - var f: TFile - if not open(f, p.outputHandle, fmRead): OSError() - result = newFileStream(f) - - proc errorStream(p: PProcess): PStream = - var f: TFile - if not open(f, p.errorHandle, fmRead): OSError() - result = newFileStream(f) - - proc csystem(cmd: cstring): cint {.nodecl, importc: "system".} - - proc execCmd(command: string): int = - result = csystem(command) - -when isMainModule: - var x = execProcess("gcc -v") - echo "ECHO ", x diff --git a/nimlib/pure/parsecfg.nim b/nimlib/pure/parsecfg.nim deleted file mode 100755 index c26dab099..000000000 --- a/nimlib/pure/parsecfg.nim +++ /dev/null @@ -1,352 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2008 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## The ``parsecfg`` module implements a high performance configuration file -## parser. The configuration file's syntax is similar to the Windows ``.ini`` -## format, but much more powerful, as it is not a line based parser. String -## literals, raw string literals and triple quoted string literals are supported -## as in the Nimrod programming language. - -## This is an example of how a configuration file may look like: -## -## .. include:: doc/mytest.cfg -## :literal: -## The file ``tests/tparscfg.nim`` demonstrates how to use the -## configuration file parser: -## -## .. code-block:: nimrod -## :file: tests/tparscfg.nim - - -import - hashes, strutils, lexbase, streams - -type - TCfgEventKind* = enum ## enumation of all events that may occur when parsing - cfgEof, ## end of file reached - cfgSectionStart, ## a ``[section]`` has been parsed - cfgKeyValuePair, ## a ``key=value`` pair has been detected - cfgOption, ## a ``--key=value`` command line option - cfgError ## an error ocurred during parsing - - TCfgEvent* = object of TObject ## describes a parsing event - case kind*: TCfgEventKind ## the kind of the event - of cfgEof: nil - of cfgSectionStart: - section*: string ## `section` contains the name of the - ## parsed section start (syntax: ``[section]``) - of cfgKeyValuePair, cfgOption: - key*, value*: string ## contains the (key, value) pair if an option - ## of the form ``--key: value`` or an ordinary - ## ``key= value`` pair has been parsed. - ## ``value==""`` if it was not specified in the - ## configuration file. - of cfgError: ## the parser encountered an error: `msg` - msg*: string ## contains the error message. No exceptions - ## are thrown if a parse error occurs. - - TTokKind = enum - tkInvalid, tkEof, - tkSymbol, tkEquals, tkColon, tkBracketLe, tkBracketRi, tkDashDash - TToken {.final.} = object # a token - kind: TTokKind # the type of the token - literal: string # the parsed (string) literal - - TParserState = enum - startState # , commaState # not yet used - TCfgParser* = object of TBaseLexer ## the parser object. - tok: TToken - state: TParserState - filename: string - -proc open*(c: var TCfgParser, input: PStream, filename: string) - ## initializes the parser with an input stream. `Filename` is only used - ## for nice error messages. - -proc close*(c: var TCfgParser) - ## closes the parser `c` and its associated input stream. - -proc next*(c: var TCfgParser): TCfgEvent - ## retrieves the first/next event. This controls the parser. - -proc getColumn*(c: TCfgParser): int - ## get the current column the parser has arrived at. - -proc getLine*(c: TCfgParser): int - ## get the current line the parser has arrived at. - -proc getFilename*(c: TCfgParser): string - ## get the filename of the file that the parser processes. - -proc errorStr*(c: TCfgParser, msg: string): string - ## returns a properly formated error message containing current line and - ## column information. - - -# implementation - -const - SymChars: TCharSet = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\x80'..'\xFF'} - -proc rawGetTok(c: var TCfgParser, tok: var TToken) -proc open(c: var TCfgParser, input: PStream, filename: string) = - lexbase.open(c, input) - c.filename = filename - c.state = startState - c.tok.kind = tkInvalid - c.tok.literal = "" - rawGetTok(c, c.tok) - -proc close(c: var TCfgParser) = - lexbase.close(c) - -proc getColumn(c: TCfgParser): int = - result = getColNumber(c, c.bufPos) - -proc getLine(c: TCfgParser): int = - result = c.linenumber - -proc getFilename(c: TCfgParser): string = - result = c.filename - -proc handleHexChar(c: var TCfgParser, xi: var int) = - case c.buf[c.bufpos] - of '0'..'9': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0')) - inc(c.bufpos) - of 'a'..'f': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10) - inc(c.bufpos) - of 'A'..'F': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10) - inc(c.bufpos) - else: - nil - -proc handleDecChars(c: var TCfgParser, xi: var int) = - while c.buf[c.bufpos] in {'0'..'9'}: - xi = (xi * 10) + (ord(c.buf[c.bufpos]) - ord('0')) - inc(c.bufpos) - -proc getEscapedChar(c: var TCfgParser, tok: var TToken) = - inc(c.bufpos) # skip '\' - case c.buf[c.bufpos] - of 'n', 'N': - add(tok.literal, "\n") - Inc(c.bufpos) - of 'r', 'R', 'c', 'C': - add(tok.literal, '\c') - Inc(c.bufpos) - of 'l', 'L': - add(tok.literal, '\L') - Inc(c.bufpos) - of 'f', 'F': - add(tok.literal, '\f') - inc(c.bufpos) - of 'e', 'E': - add(tok.literal, '\e') - Inc(c.bufpos) - of 'a', 'A': - add(tok.literal, '\a') - Inc(c.bufpos) - of 'b', 'B': - add(tok.literal, '\b') - Inc(c.bufpos) - of 'v', 'V': - add(tok.literal, '\v') - Inc(c.bufpos) - of 't', 'T': - add(tok.literal, '\t') - Inc(c.bufpos) - of '\'', '\"': - add(tok.literal, c.buf[c.bufpos]) - Inc(c.bufpos) - of '\\': - add(tok.literal, '\\') - Inc(c.bufpos) - of 'x', 'X': - inc(c.bufpos) - var xi = 0 - handleHexChar(c, xi) - handleHexChar(c, xi) - add(tok.literal, Chr(xi)) - of '0'..'9': - var xi = 0 - handleDecChars(c, xi) - if (xi <= 255): add(tok.literal, Chr(xi)) - else: tok.kind = tkInvalid - else: tok.kind = tkInvalid - -proc HandleCRLF(c: var TCfgParser, pos: int): int = - case c.buf[pos] - of '\c': result = lexbase.HandleCR(c, pos) - of '\L': result = lexbase.HandleLF(c, pos) - else: result = pos - -proc getString(c: var TCfgParser, tok: var TToken, rawMode: bool) = - var pos = c.bufPos + 1 # skip " - var buf = c.buf # put `buf` in a register - tok.kind = tkSymbol - if (buf[pos] == '\"') and (buf[pos + 1] == '\"'): - # long string literal: - inc(pos, 2) # skip "" - # skip leading newline: - pos = HandleCRLF(c, pos) - buf = c.buf - while true: - case buf[pos] - of '\"': - if (buf[pos + 1] == '\"') and (buf[pos + 2] == '\"'): break - add(tok.literal, '\"') - Inc(pos) - of '\c', '\L': - pos = HandleCRLF(c, pos) - buf = c.buf - add(tok.literal, "\n") - of lexbase.EndOfFile: - tok.kind = tkInvalid - break - else: - add(tok.literal, buf[pos]) - Inc(pos) - c.bufpos = pos + 3 # skip the three """ - else: - # ordinary string literal - while true: - var ch = buf[pos] - if ch == '\"': - inc(pos) # skip '"' - break - if ch in {'\c', '\L', lexbase.EndOfFile}: - tok.kind = tkInvalid - break - if (ch == '\\') and not rawMode: - c.bufPos = pos - getEscapedChar(c, tok) - pos = c.bufPos - else: - add(tok.literal, ch) - Inc(pos) - c.bufpos = pos - -proc getSymbol(c: var TCfgParser, tok: var TToken) = - var pos = c.bufpos - var buf = c.buf - while true: - add(tok.literal, buf[pos]) - Inc(pos) - if not (buf[pos] in SymChars): break - c.bufpos = pos - tok.kind = tkSymbol - -proc skip(c: var TCfgParser) = - var pos = c.bufpos - var buf = c.buf - while true: - case buf[pos] - of ' ', '\t': - Inc(pos) - of '#', ';': - while not (buf[pos] in {'\c', '\L', lexbase.EndOfFile}): inc(pos) - of '\c', '\L': - pos = HandleCRLF(c, pos) - buf = c.buf - else: - break # EndOfFile also leaves the loop - c.bufpos = pos - -proc rawGetTok(c: var TCfgParser, tok: var TToken) = - tok.kind = tkInvalid - setlen(tok.literal, 0) - skip(c) - case c.buf[c.bufpos] - of '=': - tok.kind = tkEquals - inc(c.bufpos) - tok.literal = "=" - of '-': - inc(c.bufPos) - if c.buf[c.bufPos] == '-': inc(c.bufPos) - tok.kind = tkDashDash - tok.literal = "--" - of ':': - tok.kind = tkColon - inc(c.bufpos) - tok.literal = ":" - of 'r', 'R': - if c.buf[c.bufPos + 1] == '\"': - Inc(c.bufPos) - getString(c, tok, true) - else: - getSymbol(c, tok) - of '[': - tok.kind = tkBracketLe - inc(c.bufpos) - tok.literal = "]" - of ']': - tok.kind = tkBracketRi - Inc(c.bufpos) - tok.literal = "]" - of '\"': - getString(c, tok, false) - of lexbase.EndOfFile: - tok.kind = tkEof - tok.literal = "[EOF]" - else: getSymbol(c, tok) - -proc errorStr(c: TCfgParser, msg: string): string = - result = `%`("$1($2, $3) Error: $4", - [c.filename, $getLine(c), $getColumn(c), msg]) - -proc getKeyValPair(c: var TCfgParser, kind: TCfgEventKind): TCfgEvent = - if c.tok.kind == tkSymbol: - result.kind = kind - result.key = c.tok.literal - result.value = "" - rawGetTok(c, c.tok) - if c.tok.kind in {tkEquals, tkColon}: - rawGetTok(c, c.tok) - if c.tok.kind == tkSymbol: - result.value = c.tok.literal - else: - result.kind = cfgError - result.msg = errorStr(c, "symbol expected, but found: " & c.tok.literal) - rawGetTok(c, c.tok) - else: - result.kind = cfgError - result.msg = errorStr(c, "symbol expected, but found: " & c.tok.literal) - rawGetTok(c, c.tok) - -proc next(c: var TCfgParser): TCfgEvent = - case c.tok.kind - of tkEof: - result.kind = cfgEof - of tkDashDash: - rawGetTok(c, c.tok) - result = getKeyValPair(c, cfgOption) - of tkSymbol: - result = getKeyValPair(c, cfgKeyValuePair) - of tkBracketLe: - rawGetTok(c, c.tok) - if c.tok.kind == tkSymbol: - result.kind = cfgSectionStart - result.section = c.tok.literal - else: - result.kind = cfgError - result.msg = errorStr(c, "symbol expected, but found: " & c.tok.literal) - rawGetTok(c, c.tok) - if c.tok.kind == tkBracketRi: - rawGetTok(c, c.tok) - else: - result.kind = cfgError - result.msg = errorStr(c, "\']\' expected, but found: " & c.tok.literal) - of tkInvalid, tkEquals, tkColon, tkBracketRi: - result.kind = cfgError - result.msg = errorStr(c, "invalid token: " & c.tok.literal) - rawGetTok(c, c.tok) diff --git a/nimlib/pure/parsecsv.nim b/nimlib/pure/parsecsv.nim deleted file mode 100755 index 5970f2090..000000000 --- a/nimlib/pure/parsecsv.nim +++ /dev/null @@ -1,178 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2009 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module implements a simple high performance `CSV`:idx: -## (`comma separated value`:idx:) parser. -## -## Example: How to use the parser -## ============================== -## -## .. code-block:: nimrod -## import os, parsecsv, streams -## var s = newFileStream(ParamStr(1), fmRead) -## if s == nil: quit("cannot open the file" & ParamStr(1)) -## var x: TCsvParser -## open(x, s, ParamStr(1)) -## while readRow(x): -## Echo "new row: " -## for val in items(x.row): -## Echo "##", val, "##" -## close(x) -## - -import - lexbase, streams - -type - TCsvRow* = seq[string] ## a row in a CSV file - TCsvParser* = object of TBaseLexer ## the parser object. - row*: TCsvRow ## the current row - filename: string - sep, quote, esc: char - skipWhite: bool - currRow: int - - EInvalidCsv* = object of EIO ## exception that is raised if - ## a parsing error occurs - -proc raiseEInvalidCsv(filename: string, line, col: int, - msg: string) {.noreturn.} = - var e: ref EInvalidCsv - new(e) - e.msg = filename & "(" & $line & ", " & $col & ") Error: " & msg - raise e - -proc error(my: TCsvParser, pos: int, msg: string) = - raiseEInvalidCsv(my.filename, my.LineNumber, getColNumber(my, pos), msg) - -proc open*(my: var TCsvParser, input: PStream, filename: string, - separator = ',', quote = '"', escape = '\0', - skipInitialSpace = false) = - ## initializes the parser with an input stream. `Filename` is only used - ## for nice error messages. The parser's behaviour can be controlled by - ## the diverse optional parameters: - ## - `separator`: character used to separate fields - ## - `quote`: Used to quote fields containing special characters like - ## `separator`, `quote` or new-line characters. '\0' disables the parsing - ## of quotes. - ## - `escape`: removes any special meaning from the following character; - ## '\0' disables escaping; if escaping is disabled and `quote` is not '\0', - ## two `quote` characters are parsed one literal `quote` character. - ## - `skipInitialSpace`: If true, whitespace immediately following the - ## `separator` is ignored. - lexbase.open(my, input) - my.filename = filename - my.sep = separator - my.quote = quote - my.esc = escape - my.skipWhite = skipInitialSpace - my.row = @[] - my.currRow = 0 - -proc parseField(my: var TCsvParser, a: var string) = - var pos = my.bufpos - var buf = my.buf - if my.skipWhite: - while buf[pos] in {' ', '\t'}: inc(pos) - setLen(a, 0) # reuse memory - if buf[pos] == my.quote and my.quote != '\0': - inc(pos) - while true: - var c = buf[pos] - if c == '\0': - my.bufpos = pos # can continue after exception? - error(my, pos, my.quote & " expected") - break - elif c == my.quote: - if my.esc == '\0' and buf[pos+1] == my.quote: - add(a, my.quote) - inc(pos, 2) - else: - inc(pos) - break - elif c == my.esc: - add(a, buf[pos+1]) - inc(pos, 2) - else: - case c - of '\c': - pos = handleCR(my, pos) - buf = my.buf - add(a, "\n") - of '\l': - pos = handleLF(my, pos) - buf = my.buf - add(a, "\n") - else: - add(a, c) - inc(pos) - else: - while true: - var c = buf[pos] - if c == my.sep: break - if c in {'\c', '\l', '\0'}: break - add(a, c) - inc(pos) - my.bufpos = pos - -proc processedRows*(my: var TCsvParser): int = - ## returns number of the processed rows - return my.currRow - -proc readRow*(my: var TCsvParser, columns = 0): bool = - ## reads the next row; if `columns` > 0, it expects the row to have - ## exactly this many columns. Returns false if the end of the file - ## has been encountered else true. - var col = 0 # current column - var oldpos = my.bufpos - while my.buf[my.bufpos] != '\0': - var oldlen = my.row.len - if oldlen < col+1: - setLen(my.row, col+1) - my.row[col] = "" - parseField(my, my.row[col]) - inc(col) - if my.buf[my.bufpos] == my.sep: - inc(my.bufpos) - else: - case my.buf[my.bufpos] - of '\c', '\l': - # skip empty lines: - while true: - case my.buf[my.bufpos] - of '\c': my.bufpos = handleCR(my, my.bufpos) - of '\l': my.bufpos = handleLF(my, my.bufpos) - else: break - of '\0': nil - else: error(my, my.bufpos, my.sep & " expected") - break - - setlen(my.row, col) - result = col > 0 - if result and col != columns and columns > 0: - error(my, oldpos+1, $columns & " columns expected, but found " & - $col & " columns") - inc(my.currRow) - -proc close*(my: var TCsvParser) {.inline.} = - ## closes the parser `my` and its associated input stream. - lexbase.close(my) - -when isMainModule: - import os - var s = newFileStream(ParamStr(1), fmRead) - if s == nil: quit("cannot open the file" & ParamStr(1)) - var x: TCsvParser - open(x, s, ParamStr(1)) - while readRow(x): - Echo "new row: " - for val in items(x.row): - Echo "##", val, "##" - close(x) - diff --git a/nimlib/pure/parseopt.nim b/nimlib/pure/parseopt.nim deleted file mode 100755 index 8f4be98f4..000000000 --- a/nimlib/pure/parseopt.nim +++ /dev/null @@ -1,152 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2009 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module provides the standard Nimrod command line parser. -## It supports one convenience iterator over all command line options and some -## lower-level features. - -{.push debugger: off.} - -import - os, strutils - -type - TCmdLineKind* = enum ## the detected command line token - cmdEnd, ## end of command line reached - cmdArgument, ## argument detected - cmdLongoption, ## a long option ``--option`` detected - cmdShortOption ## a short option ``-c`` detected - TOptParser* = - object of TObject ## this object implements the command line parser - cmd: string - pos: int - inShortState: bool - kind*: TCmdLineKind ## the dected command line token - key*, val*: string ## key and value pair; ``key`` is the option - ## or the argument, ``value`` is not "" if - ## the option was given a value - -proc initOptParser*(cmdline = ""): TOptParser = - ## inits the option parser. If ``cmdline == ""``, the real command line - ## (as provided by the ``OS`` module) is taken. - result.pos = 0 - result.inShortState = false - if cmdline != "": - result.cmd = cmdline - else: - result.cmd = "" - for i in countup(1, ParamCount()): - result.cmd = result.cmd & quoteIfContainsWhite(paramStr(i)) & ' ' - result.kind = cmdEnd - result.key = "" - result.val = "" - -proc init*(cmdline: string = ""): TOptParser {.deprecated.} = - ## **Deprecated since version 0.8.2**: Use `initOptParser` instead. - result = initOptParser(cmdline) - -proc parseWord(s: string, i: int, w: var string, - delim: TCharSet = {'\x09', ' ', '\0'}): int = - result = i - if s[result] == '\"': - inc(result) - while not (s[result] in {'\0', '\"'}): - add(w, s[result]) - inc(result) - if s[result] == '\"': inc(result) - else: - while not (s[result] in delim): - add(w, s[result]) - inc(result) - -proc handleShortOption(p: var TOptParser) = - var i = p.pos - p.kind = cmdShortOption - add(p.key, p.cmd[i]) - inc(i) - p.inShortState = true - while p.cmd[i] in {'\x09', ' '}: - inc(i) - p.inShortState = false - if p.cmd[i] in {':', '='}: - inc(i) - p.inShortState = false - while p.cmd[i] in {'\x09', ' '}: inc(i) - i = parseWord(p.cmd, i, p.val) - if p.cmd[i] == '\0': p.inShortState = false - p.pos = i - -proc next*(p: var TOptParser) = - ## parses the first or next option; ``p.kind`` describes what token has been - ## parsed. ``p.key`` and ``p.val`` are set accordingly. - var i = p.pos - while p.cmd[i] in {'\x09', ' '}: inc(i) - p.pos = i - setlen(p.key, 0) - setlen(p.val, 0) - if p.inShortState: - handleShortOption(p) - return - case p.cmd[i] - of '\0': - p.kind = cmdEnd - of '-': - inc(i) - if p.cmd[i] == '-': - p.kind = cmdLongOption - inc(i) - i = parseWord(p.cmd, i, p.key, {'\0', ' ', '\x09', ':', '='}) - while p.cmd[i] in {'\x09', ' '}: inc(i) - if p.cmd[i] in {':', '='}: - inc(i) - while p.cmd[i] in {'\x09', ' '}: inc(i) - p.pos = parseWord(p.cmd, i, p.val) - else: - p.pos = i - else: - p.pos = i - handleShortOption(p) - else: - p.kind = cmdArgument - p.pos = parseWord(p.cmd, i, p.key) - -proc cmdLineRest*(p: TOptParser): string = - ## retrieves the rest of the command line that has not been parsed yet. - result = strip(copy(p.cmd, p.pos, len(p.cmd) - 1)) - -proc getRestOfCommandLine*(p: TOptParser): string {.deprecated.} = - ## **Deprecated since version 0.8.2**: Use `cmdLineRest` instead. - result = cmdLineRest(p) - -iterator getopt*(): tuple[kind: TCmdLineKind, key, val: string] = - ## This is an convenience iterator for iterating over the command line. - ## This uses the TOptParser object. Example: - ## - ## .. code-block:: nimrod - ## var - ## filename = "" - ## for kind, key, val in getopt(): - ## case kind - ## of cmdArgument: - ## filename = key - ## of cmdLongOption, cmdShortOption: - ## case key - ## of "help", "h": writeHelp() - ## of "version", "v": writeVersion() - ## of cmdEnd: assert(false) # cannot happen - ## if filename == "": - ## # no filename has been given, so we show the help: - ## writeHelp() - var p = initOptParser() - while true: - next(p) - if p.kind == cmdEnd: break - yield (p.kind, p.key, p.val) - -{.pop.} diff --git a/nimlib/pure/parsesql.nim b/nimlib/pure/parsesql.nim deleted file mode 100755 index 2109c273a..000000000 --- a/nimlib/pure/parsesql.nim +++ /dev/null @@ -1,1345 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2009 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## The ``parsesql`` module implements a high performance SQL file -## parser. It parses PostgreSQL syntax and the SQL ANSI standard. - -import - hashes, strutils, lexbase, streams - -# ------------------- scanner ------------------------------------------------- - -type - TTokKind = enum ## enumeration of all SQL tokens - tkInvalid, ## invalid token - tkEof, ## end of file reached - tkIdentifier, ## abc - tkQuotedIdentifier, ## "abc" - tkStringConstant, ## 'abc' - tkEscapeConstant, ## e'abc' - tkDollarQuotedConstant, ## $tag$abc$tag$ - tkBitStringConstant, ## B'00011' - tkHexStringConstant, ## x'00011' - tkInteger, - tkNumeric, - tkOperator, ## + - * / < > = ~ ! @ # % ^ & | ` ? - tkSemicolon, ## ';' - tkColon, ## ':' - tkComma, ## ',' - tkParLe, ## '(' - tkParRi, ## ')' - tkBracketLe, ## '[' - tkBracketRi, ## ']' - tkDot ## '.' - - TToken {.final.} = object # a token - kind: TTokKind # the type of the token - literal: string # the parsed (string) literal - - TSqlLexer* = object of TBaseLexer ## the parser object. - filename: string - -const - tokKindToStr: array[TTokKind, string] = [ - "invalid", "[EOF]", "identifier", "quoted identifier", "string constant", - "escape string constant", "dollar quoted constant", "bit string constant", - "hex string constant", "integer constant", "numeric constant", "operator", - ";", ":", ",", "(", ")", "[", "]", "." - ] - -proc open(L: var TSqlLexer, input: PStream, filename: string) = - lexbase.open(L, input) - L.filename = filename - -proc close(L: var TSqlLexer) = - lexbase.close(L) - -proc getColumn(L: TSqlLexer): int = - ## get the current column the parser has arrived at. - result = getColNumber(L, L.bufPos) - -proc getLine(L: TSqlLexer): int = - result = L.linenumber - -proc handleHexChar(c: var TSqlLexer, xi: var int) = - case c.buf[c.bufpos] - of '0'..'9': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0')) - inc(c.bufpos) - of 'a'..'f': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10) - inc(c.bufpos) - of 'A'..'F': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10) - inc(c.bufpos) - else: - nil - -proc handleOctChar(c: var TSqlLexer, xi: var int) = - if c.buf[c.bufpos] in {'0'..'7'}: - xi = (xi shl 3) or (ord(c.buf[c.bufpos]) - ord('0')) - inc(c.bufpos) - -proc getEscapedChar(c: var TSqlLexer, tok: var TToken) = - inc(c.bufpos) - case c.buf[c.bufpos] - of 'n', 'N': - add(tok.literal, '\L') - Inc(c.bufpos) - of 'r', 'R', 'c', 'C': - add(tok.literal, '\c') - Inc(c.bufpos) - of 'l', 'L': - add(tok.literal, '\L') - Inc(c.bufpos) - of 'f', 'F': - add(tok.literal, '\f') - inc(c.bufpos) - of 'e', 'E': - add(tok.literal, '\e') - Inc(c.bufpos) - of 'a', 'A': - add(tok.literal, '\a') - Inc(c.bufpos) - of 'b', 'B': - add(tok.literal, '\b') - Inc(c.bufpos) - of 'v', 'V': - add(tok.literal, '\v') - Inc(c.bufpos) - of 't', 'T': - add(tok.literal, '\t') - Inc(c.bufpos) - of '\'', '\"': - add(tok.literal, c.buf[c.bufpos]) - Inc(c.bufpos) - of '\\': - add(tok.literal, '\\') - Inc(c.bufpos) - of 'x', 'X': - inc(c.bufpos) - var xi = 0 - handleHexChar(c, xi) - handleHexChar(c, xi) - add(tok.literal, Chr(xi)) - of '0'..'7': - var xi = 0 - handleOctChar(c, xi) - handleOctChar(c, xi) - handleOctChar(c, xi) - if (xi <= 255): add(tok.literal, Chr(xi)) - else: tok.kind = tkInvalid - else: tok.kind = tkInvalid - -proc HandleCRLF(c: var TSqlLexer, pos: int): int = - case c.buf[pos] - of '\c': result = lexbase.HandleCR(c, pos) - of '\L': result = lexbase.HandleLF(c, pos) - else: result = pos - -proc skip(c: var TSqlLexer) = - var pos = c.bufpos - var buf = c.buf - var nested = 0 - while true: - case buf[pos] - of ' ', '\t': - Inc(pos) - of '-': - if buf[pos+1] == '-': - while not (buf[pos] in {'\c', '\L', lexbase.EndOfFile}): inc(pos) - else: - break - of '/': - if buf[pos+1] == '*': - inc(pos,2) - while true: - case buf[pos] - of '\0': break - of '\c', '\L': - pos = HandleCRLF(c, pos) - buf = c.buf - of '*': - if buf[pos+1] == '/': - inc(pos, 2) - if nested <= 0: break - dec(nested) - else: - inc(pos) - of '/': - if buf[pos+1] == '*': - inc(pos, 2) - inc(nested) - else: - inc(pos) - else: inc(pos) - else: break - of '\c', '\L': - pos = HandleCRLF(c, pos) - buf = c.buf - else: - break # EndOfFile also leaves the loop - c.bufpos = pos - -proc getString(c: var TSqlLexer, tok: var TToken, kind: TTokKind) = - var pos = c.bufPos + 1 - var buf = c.buf - tok.kind = kind - block parseLoop: - while true: - while true: - var ch = buf[pos] - if ch == '\'': - if buf[pos+1] == '\'': - inc(pos, 2) - add(tok.literal, '\'') - else: - inc(pos) - break - elif ch in {'\c', '\L', lexbase.EndOfFile}: - tok.kind = tkInvalid - break parseLoop - elif (ch == '\\') and kind == tkEscapeConstant: - c.bufPos = pos - getEscapedChar(c, tok) - pos = c.bufPos - else: - add(tok.literal, ch) - Inc(pos) - c.bufpos = pos - var line = c.linenumber - skip(c) - if c.linenumber > line: - # a new line whitespace has been parsed, so we check if the string - # continues after the whitespace: - buf = c.buf # may have been reallocated - pos = c.bufpos - if buf[pos] == '\'': inc(pos) - else: break parseLoop - else: break parseLoop - c.bufpos = pos - -proc getDollarString(c: var TSqlLexer, tok: var TToken) = - var pos = c.bufPos + 1 - var buf = c.buf - tok.kind = tkDollarQuotedConstant - var tag = "$" - while buf[pos] in IdentChars: - add(tag, buf[pos]) - inc(pos) - if buf[pos] == '$': inc(pos) - else: - tok.kind = tkInvalid - return - while true: - case buf[pos] - of '\c', '\L': - pos = HandleCRLF(c, pos) - buf = c.buf - add(tok.literal, "\L") - of '\0': - tok.kind = tkInvalid - break - of '$': - inc(pos) - var tag2 = "$" - while buf[pos] in IdentChars: - add(tag2, buf[pos]) - inc(pos) - if buf[pos] == '$': inc(pos) - if tag2 == tag: break - add(tok.literal, tag2) - add(tok.literal, '$') - else: - add(tok.literal, buf[pos]) - inc(pos) - c.bufpos = pos - -proc getSymbol(c: var TSqlLexer, tok: var TToken) = - var pos = c.bufpos - var buf = c.buf - while true: - add(tok.literal, buf[pos]) - Inc(pos) - if not (buf[pos] in {'a'..'z','A'..'Z','0'..'9','_','$', '\128'..'\255'}): - break - c.bufpos = pos - tok.kind = tkIdentifier - -proc getQuotedIdentifier(c: var TSqlLexer, tok: var TToken) = - var pos = c.bufPos + 1 - var buf = c.buf - tok.kind = tkQuotedIdentifier - while true: - var ch = buf[pos] - if ch == '\"': - if buf[pos+1] == '\"': - inc(pos, 2) - add(tok.literal, '\"') - else: - inc(pos) - break - elif ch in {'\c', '\L', lexbase.EndOfFile}: - tok.kind = tkInvalid - break - else: - add(tok.literal, ch) - Inc(pos) - c.bufpos = pos - -proc getBitHexString(c: var TSqlLexer, tok: var TToken, validChars: TCharSet) = - var pos = c.bufPos + 1 - var buf = c.buf - block parseLoop: - while true: - while true: - var ch = buf[pos] - if ch in validChars: - add(tok.literal, ch) - Inc(pos) - elif ch == '\'': - inc(pos) - break - else: - tok.kind = tkInvalid - break parseLoop - c.bufpos = pos - var line = c.linenumber - skip(c) - if c.linenumber > line: - # a new line whitespace has been parsed, so we check if the string - # continues after the whitespace: - buf = c.buf # may have been reallocated - pos = c.bufpos - if buf[pos] == '\'': inc(pos) - else: break parseLoop - else: break parseLoop - c.bufpos = pos - -proc getNumeric(c: var TSqlLexer, tok: var TToken) = - tok.kind = tkInteger - var pos = c.bufPos - var buf = c.buf - while buf[pos] in Digits: - add(tok.literal, buf[pos]) - inc(pos) - if buf[pos] == '.': - tok.kind = tkNumeric - add(tok.literal, buf[pos]) - inc(pos) - while buf[pos] in Digits: - add(tok.literal, buf[pos]) - inc(pos) - if buf[pos] in {'E', 'e'}: - tok.kind = tkNumeric - add(tok.literal, buf[pos]) - inc(pos) - if buf[pos] == '+': - inc(pos) - elif buf[pos] == '-': - add(tok.literal, buf[pos]) - inc(pos) - if buf[pos] in Digits: - while buf[pos] in Digits: - add(tok.literal, buf[pos]) - inc(pos) - else: - tok.kind = tkInvalid - c.bufpos = pos - -proc getOperator(c: var TSqlLexer, tok: var TToken) = - const operators = {'+', '-', '*', '/', '<', '>', '=', '~', '!', '@', '#', '%', - '^', '&', '|', '`', '?'} - tok.kind = tkOperator - var pos = c.bufPos - var buf = c.buf - var trailingPlusMinus = false - while true: - case buf[pos] - of '-': - if buf[pos] == '-': break - if not trailingPlusMinus and buf[pos+1] notin operators and - tok.literal.len > 0: break - of '/': - if buf[pos] == '*': break - of '~', '!', '@', '#', '%', '^', '&', '|', '`', '?': - trailingPlusMinus = true - of '+': - if not trailingPlusMinus and buf[pos+1] notin operators and - tok.literal.len > 0: break - of '*', '<', '>', '=': nil - else: break - add(tok.literal, buf[pos]) - inc(pos) - c.bufpos = pos - -proc getTok(c: var TSqlLexer, tok: var TToken) = - tok.kind = tkInvalid - setlen(tok.literal, 0) - skip(c) - case c.buf[c.bufpos] - of ';': - tok.kind = tkSemiColon - inc(c.bufPos) - add(tok.literal, ';') - of ',': - tok.kind = tkComma - inc(c.bufpos) - add(tok.literal, ',') - of ':': - tok.kind = tkColon - inc(c.bufpos) - add(tok.literal, ':') - of 'e', 'E': - if c.buf[c.bufPos + 1] == '\'': - Inc(c.bufPos) - getString(c, tok, tkEscapeConstant) - else: - getSymbol(c, tok) - of 'b', 'B': - if c.buf[c.bufPos + 1] == '\'': - tok.kind = tkBitStringConstant - getBitHexString(c, tok, {'0'..'1'}) - else: - getSymbol(c, tok) - of 'x', 'X': - if c.buf[c.bufPos + 1] == '\'': - tok.kind = tkHexStringConstant - getBitHexString(c, tok, {'a'..'f','A'..'F','0'..'9'}) - else: - getSymbol(c, tok) - of '$': getDollarString(c, tok) - of '[': - tok.kind = tkBracketLe - inc(c.bufpos) - add(tok.literal, '[') - of ']': - tok.kind = tkBracketRi - Inc(c.bufpos) - add(tok.literal, ']') - of '(': - tok.kind = tkParLe - Inc(c.bufpos) - add(tok.literal, '(') - of ')': - tok.kind = tkParRi - Inc(c.bufpos) - add(tok.literal, ')') - of '.': - if c.buf[c.bufPos + 1] in Digits: - getNumeric(c, tok) - else: - tok.kind = tkDot - inc(c.bufpos) - add(tok.literal, '.') - of '0'..'9': getNumeric(c, tok) - of '\'': getString(c, tok, tkStringConstant) - of '"': getQuotedIdentifier(c, tok) - of lexbase.EndOfFile: - tok.kind = tkEof - tok.literal = "[EOF]" - of 'a', 'c', 'd', 'f'..'w', 'y', 'z', 'A', 'C', 'D', 'F'..'W', 'Y', 'Z', '_', - '\128'..'\255': - getSymbol(c, tok) - of '+', '-', '*', '/', '<', '>', '=', '~', '!', '@', '#', '%', - '^', '&', '|', '`', '?': - getOperator(c, tok) - else: - add(tok.literal, c.buf[c.bufpos]) - inc(c.bufpos) - -proc errorStr(L: TSqlLexer, msg: string): string = - result = "$1($2, $3) Error: $4" % [L.filename, $getLine(L), $getColumn(L), msg] - - -# ----------------------------- parser ---------------------------------------- - -# Operator/Element Associativity Description -# . left table/column name separator -# :: left PostgreSQL-style typecast -# [ ] left array element selection -# - right unary minus -# ^ left exponentiation -# * / % left multiplication, division, modulo -# + - left addition, subtraction -# IS IS TRUE, IS FALSE, IS UNKNOWN, IS NULL -# ISNULL test for null -# NOTNULL test for not null -# (any other) left all other native and user-defined oprs -# IN set membership -# BETWEEN range containment -# OVERLAPS time interval overlap -# LIKE ILIKE SIMILAR string pattern matching -# < > less than, greater than -# = right equality, assignment -# NOT right logical negation -# AND left logical conjunction -# OR left logical disjunction - -type - TSqlNodeKind* = enum ## kind of SQL abstract syntax tree - nkNone, - nkIdent, - nkStringLit, - nkBitStringLit, - nkHexStringLit, - nkIntegerLit, - nkNumericLit, - nkPrimaryKey, - nkForeignKey, - nkNotNull, - - nkStmtList, - nkDot, - nkDotDot, - nkPrefix, - nkInfix, - nkCall, - nkColumnReference, - nkReferences, - nkDefault, - nkCheck, - nkConstraint, - nkUnique, - nkIdentity, - nkColumnDef, ## name, datatype, constraints - nkInsert, - nkUpdate, - nkDelete, - nkSelect, - nkSelectDistinct, - nkSelectColumns, - nkAsgn, - nkFrom, - nkGroup, - nkHaving, - nkOrder, - nkDesc, - nkUnion, - nkIntersect, - nkExcept, - nkColumnList, - nkValueList, - nkWhere, - nkCreateTable, - nkCreateTableIfNotExists, - nkCreateType, - nkCreateTypeIfNotExists, - nkCreateIndex, - nkCreateIndexIfNotExists, - nkEnumDef - -type - EInvalidSql* = object of EBase ## Invalid SQL encountered - PSqlNode* = ref TSqlNode ## an SQL abstract syntax tree node - TSqlNode* = object ## an SQL abstract syntax tree node - case kind*: TSqlNodeKind ## kind of syntax tree - of nkIdent, nkStringLit, nkBitStringLit, nkHexStringLit, - nkIntegerLit, nkNumericLit: - strVal*: string ## AST leaf: the identifier, numeric literal - ## string literal, etc. - else: - sons*: seq[PSqlNode] ## the node's children - - TSqlParser* = object of TSqlLexer ## SQL parser object - tok: TToken - -proc newNode(k: TSqlNodeKind): PSqlNode = - new(result) - result.kind = k - -proc newNode(k: TSqlNodeKind, s: string): PSqlNode = - new(result) - result.kind = k - result.strVal = s - -proc len*(n: PSqlNode): int = - if isNil(n.sons): result = 0 - else: result = n.sons.len - -proc add*(father, n: PSqlNode) = - if isNil(father.sons): father.sons = @[] - add(father.sons, n) - -proc getTok(p: var TSqlParser) = - getTok(p, p.tok) - -proc sqlError(p: TSqlParser, msg: string) = - var e: ref EInvalidSql - new(e) - e.msg = errorStr(p, msg) - raise e - -proc isKeyw(p: TSqlParser, keyw: string): bool = - result = p.tok.kind == tkIdentifier and - cmpIgnoreCase(p.tok.literal, keyw) == 0 - -proc isOpr(p: TSqlParser, opr: string): bool = - result = p.tok.kind == tkOperator and - cmpIgnoreCase(p.tok.literal, opr) == 0 - -proc optKeyw(p: var TSqlParser, keyw: string) = - if p.tok.kind == tkIdentifier and cmpIgnoreCase(p.tok.literal, keyw) == 0: - getTok(p) - -proc expectIdent(p: TSqlParser) = - if p.tok.kind != tkIdentifier and p.tok.kind != tkQuotedIdentifier: - sqlError(p, "identifier expected") - -proc expect(p: TSqlParser, kind: TTokKind) = - if p.tok.kind != kind: - sqlError(p, tokKindToStr[kind] & " expected") - -proc eat(p: var TSqlParser, kind: TTokKind) = - if p.tok.kind == kind: - getTok(p) - else: - sqlError(p, tokKindToStr[kind] & " expected") - -proc eat(p: var TSqlParser, keyw: string) = - if isKeyw(p, keyw): - getTok(p) - else: - sqlError(p, keyw.toUpper() & " expected") - -proc parseDataType(p: var TSqlParser): PSqlNode = - if isKeyw(p, "enum"): - result = newNode(nkEnumDef) - getTok(p) - if p.tok.kind == tkParLe: - getTok(p) - result.add(newNode(nkStringLit, p.tok.literal)) - getTok(p) - while p.tok.kind == tkComma: - getTok(p) - result.add(newNode(nkStringLit, p.tok.literal)) - getTok(p) - eat(p, tkParRi) - else: - expectIdent(p) - result = newNode(nkIdent, p.tok.literal) - getTok(p) - # ignore (12, 13) part: - if p.tok.kind == tkParLe: - getTok(p) - expect(p, tkInteger) - getTok(p) - while p.tok.kind == tkComma: - getTok(p) - expect(p, tkInteger) - getTok(p) - eat(p, tkParRi) - -proc getPrecedence(p: TSqlParser): int = - if isOpr(p, "*") or isOpr(p, "/") or isOpr(p, "%"): - result = 6 - elif isOpr(p, "+") or isOpr(p, "-"): - result = 5 - elif isOpr(p, "=") or isOpr(p, "<") or isOpr(p, ">") or isOpr(p, ">=") or - isOpr(p, "<=") or isOpr(p, "<>") or isOpr(p, "!=") or isKeyw(p, "is") or - isKeyw(p, "like"): - result = 3 - elif isKeyw(p, "and"): - result = 2 - elif isKeyw(p, "or"): - result = 1 - elif p.tok.kind == tkOperator: - # user-defined operator: - result = 0 - else: - result = - 1 - -proc parseExpr(p: var TSqlParser): PSqlNode - -proc identOrLiteral(p: var TSqlParser): PSqlNode = - case p.tok.kind - of tkIdentifier, tkQuotedIdentifier: - result = newNode(nkIdent, p.tok.literal) - getTok(p) - of tkStringConstant, tkEscapeConstant, tkDollarQuotedConstant: - result = newNode(nkStringLit, p.tok.literal) - getTok(p) - of tkBitStringConstant: - result = newNode(nkBitStringLit, p.tok.literal) - getTok(p) - of tkHexStringConstant: - result = newNode(nkHexStringLit, p.tok.literal) - getTok(p) - of tkInteger: - result = newNode(nkIntegerLit, p.tok.literal) - getTok(p) - of tkNumeric: - result = newNode(nkNumericLit, p.tok.literal) - getTok(p) - of tkParLe: - getTok(p) - result = parseExpr(p) - eat(p, tkParRi) - else: - sqlError(p, "expression expected") - getTok(p) # we must consume a token here to prevend endless loops! - -proc primary(p: var TSqlParser): PSqlNode = - if p.tok.kind == tkOperator or isKeyw(p, "not"): - result = newNode(nkPrefix) - result.add(newNode(nkIdent, p.tok.literal)) - getTok(p) - result.add(primary(p)) - return - result = identOrLiteral(p) - while true: - case p.tok.kind - of tkParLe: - var a = result - result = newNode(nkCall) - result.add(a) - getTok(p) - while true: - result.add(parseExpr(p)) - if p.tok.kind == tkComma: getTok(p) - else: break - eat(p, tkParRi) - of tkDot: - getTok(p) - var a = result - if p.tok.kind == tkDot: - getTok(p) - result = newNode(nkDotDot) - else: - result = newNode(nkDot) - result.add(a) - if isOpr(p, "*"): - result.add(newNode(nkIdent, "*")) - elif p.tok.kind in {tkIdentifier, tkQuotedIdentifier}: - result.add(newNode(nkIdent, p.tok.literal)) - else: - sqlError(p, "identifier expected") - getTok(p) - else: break - -proc lowestExprAux(p: var TSqlParser, v: var PSqlNode, limit: int): int = - var - v2, node, opNode: PSqlNode - v = primary(p) # expand while operators have priorities higher than 'limit' - var opPred = getPrecedence(p) - result = opPred - while opPred > limit: - node = newNode(nkInfix) - opNode = newNode(nkIdent, p.tok.literal) - getTok(p) - result = lowestExprAux(p, v2, opPred) - node.add(opNode) - node.add(v) - node.add(v2) - v = node - opPred = getPrecedence(p) - -proc parseExpr(p: var TSqlParser): PSqlNode = - discard lowestExprAux(p, result, - 1) - -proc parseTableName(p: var TSqlParser): PSqlNode = - expectIdent(p) - result = primary(p) - -proc parseColumnReference(p: var TSqlParser): PSqlNode = - result = parseTableName(p) - if p.tok.kind == tkParLe: - getTok(p) - var a = result - result = newNode(nkColumnReference) - result.add(a) - result.add(parseTableName(p)) - while p.tok.kind == tkComma: - getTok(p) - result.add(parseTableName(p)) - eat(p, tkParRi) - -proc parseCheck(p: var TSqlParser): PSqlNode = - getTok(p) - result = newNode(nkCheck) - result.add(parseExpr(p)) - -proc parseConstraint(p: var TSqlParser): PSqlNode = - getTok(p) - result = newNode(nkConstraint) - expectIdent(p) - result.add(newNode(nkIdent, p.tok.literal)) - getTok(p) - eat(p, "check") - result.add(parseExpr(p)) - -proc parseColumnConstraints(p: var TSqlParser, result: PSqlNode) = - while true: - if isKeyw(p, "default"): - getTok(p) - var n = newNode(nkDefault) - n.add(parseExpr(p)) - result.add(n) - elif isKeyw(p, "references"): - getTok(p) - var n = newNode(nkReferences) - n.add(parseColumnReference(p)) - result.add(n) - elif isKeyw(p, "not"): - getTok(p) - eat(p, "null") - result.add(newNode(nkNotNull)) - elif isKeyw(p, "identity"): - getTok(p) - result.add(newNode(nkIdentity)) - elif isKeyw(p, "primary"): - getTok(p) - eat(p, "key") - result.add(newNode(nkPrimaryKey)) - elif isKeyw(p, "check"): - result.add(parseCheck(p)) - elif isKeyw(p, "constraint"): - result.add(parseConstraint(p)) - elif isKeyw(p, "unique"): - result.add(newNode(nkUnique)) - else: - break - -proc parseColumnDef(p: var TSqlParser): PSqlNode = - expectIdent(p) - result = newNode(nkColumnDef) - result.add(newNode(nkIdent, p.tok.literal)) - getTok(p) - result.add(parseDataType(p)) - parseColumnConstraints(p, result) - -proc parseIfNotExists(p: var TSqlParser, k: TSqlNodeKind): PSqlNode = - getTok(p) - if isKeyw(p, "if"): - getTok(p) - eat(p, "not") - eat(p, "exists") - result = newNode(succ(k)) - else: - result = newNode(k) - -proc parseParIdentList(p: var TSqlParser, father: PSqlNode) = - eat(p, tkParLe) - while true: - expectIdent(p) - father.add(newNode(nkIdent, p.tok.literal)) - getTok(p) - if p.tok.kind != tkComma: break - getTok(p) - eat(p, tkParRi) - -proc parseTableConstraint(p: var TSqlParser): PSqlNode = - if isKeyw(p, "primary"): - getTok(p) - eat(p, "key") - result = newNode(nkPrimaryKey) - parseParIdentList(p, result) - elif isKeyw(p, "foreign"): - getTok(p) - eat(p, "key") - result = newNode(nkForeignKey) - parseParIdentList(p, result) - eat(p, "references") - var m = newNode(nkReferences) - m.add(parseColumnReference(p)) - result.add(m) - elif isKeyw(p, "unique"): - getTok(p) - eat(p, "key") - result = newNode(nkUnique) - parseParIdentList(p, result) - elif isKeyw(p, "check"): - result = parseCheck(p) - elif isKeyw(p, "constraint"): - result = parseConstraint(p) - else: - sqlError(p, "column definition expected") - -proc parseTableDef(p: var TSqlParser): PSqlNode = - result = parseIfNotExists(p, nkCreateTable) - expectIdent(p) - result.add(newNode(nkIdent, p.tok.literal)) - getTok(p) - if p.tok.kind == tkParLe: - while true: - getTok(p) - if p.tok.kind == tkIdentifier or p.tok.kind == tkQuotedIdentifier: - result.add(parseColumnDef(p)) - else: - result.add(parseTableConstraint(p)) - if p.tok.kind != tkComma: break - eat(p, tkParRi) - -proc parseTypeDef(p: var TSqlParser): PSqlNode = - result = parseIfNotExists(p, nkCreateType) - expectIdent(p) - result.add(newNode(nkIdent, p.tok.literal)) - getTok(p) - eat(p, "as") - result.add(parseDataType(p)) - -proc parseWhere(p: var TSqlParser): PSqlNode = - getTok(p) - result = newNode(nkWhere) - result.add(parseExpr(p)) - -proc parseIndexDef(p: var TSqlParser): PSqlNode = - result = parseIfNotExists(p, nkCreateIndex) - if isKeyw(p, "primary"): - getTok(p) - eat(p, "key") - result.add(newNode(nkPrimaryKey)) - else: - expectIdent(p) - result.add(newNode(nkIdent, p.tok.literal)) - getTok(p) - eat(p, "on") - expectIdent(p) - result.add(newNode(nkIdent, p.tok.literal)) - getTok(p) - eat(p, tkParLe) - expectIdent(p) - result.add(newNode(nkIdent, p.tok.literal)) - getTok(p) - while p.tok.kind == tkComma: - getTok(p) - expectIdent(p) - result.add(newNode(nkIdent, p.tok.literal)) - getTok(p) - eat(p, tkParRi) - -proc parseInsert(p: var TSqlParser): PSqlNode = - getTok(p) - eat(p, "into") - expectIdent(p) - result = newNode(nkInsert) - result.add(newNode(nkIdent, p.tok.literal)) - getTok(p) - if p.tok.kind == tkParLe: - var n = newNode(nkColumnList) - parseParIdentList(p, n) - else: - result.add(nil) - if isKeyw(p, "default"): - getTok(p) - eat(p, "values") - result.add(newNode(nkDefault)) - else: - eat(p, "values") - eat(p, tkParLe) - var n = newNode(nkValueList) - while true: - n.add(parseExpr(p)) - if p.tok.kind != tkComma: break - getTok(p) - result.add(n) - eat(p, tkParRi) - -proc parseUpdate(p: var TSqlParser): PSqlNode = - getTok(p) - result = newNode(nkUpdate) - result.add(primary(p)) - eat(p, "set") - while true: - var a = newNode(nkAsgn) - expectIdent(p) - a.add(newNode(nkIdent, p.tok.literal)) - getTok(p) - if isOpr(p, "="): getTok(p) - else: sqlError(p, "= expected") - a.add(parseExpr(p)) - result.add(a) - if p.tok.kind != tkComma: break - getTok(p) - if isKeyw(p, "where"): - result.add(parseWhere(p)) - else: - result.add(nil) - -proc parseDelete(p: var TSqlParser): PSqlNode = - getTok(p) - result = newNode(nkDelete) - eat(p, "from") - result.add(primary(p)) - if isKeyw(p, "where"): - result.add(parseWhere(p)) - else: - result.add(nil) - -proc parseSelect(p: var TSqlParser): PSqlNode = - getTok(p) - if isKeyw(p, "distinct"): - getTok(p) - result = newNode(nkSelectDistinct) - elif isKeyw(p, "all"): - getTok(p) - result = newNode(nkSelect) - var a = newNode(nkSelectColumns) - while true: - if isOpr(p, "*"): - a.add(newNode(nkIdent, "*")) - getTok(p) - else: - a.add(parseExpr(p)) - if p.tok.kind != tkComma: break - getTok(p) - result.add(a) - if isKeyw(p, "from"): - var f = newNode(nkFrom) - while true: - getTok(p) - f.add(parseExpr(p)) - if p.tok.kind != tkComma: break - result.add(f) - if isKeyw(p, "where"): - result.add(parseWhere(p)) - if isKeyw(p, "group"): - getTok(p) - eat(p, "by") - var g = newNode(nkGroup) - while true: - g.add(parseExpr(p)) - if p.tok.kind != tkComma: break - getTok(p) - result.add(g) - if isKeyw(p, "having"): - var h = newNode(nkHaving) - while true: - getTok(p) - h.add(parseExpr(p)) - if p.tok.kind != tkComma: break - result.add(h) - if isKeyw(p, "union"): - result.add(newNode(nkUnion)) - getTok(p) - elif isKeyw(p, "intersect"): - result.add(newNode(nkIntersect)) - getTok(p) - elif isKeyw(p, "except"): - result.add(newNode(nkExcept)) - getTok(p) - if isKeyw(p, "order"): - getTok(p) - eat(p, "by") - var n = newNode(nkOrder) - while true: - var e = parseExpr(p) - if isKeyw(p, "asc"): getTok(p) # is default - elif isKeyw(p, "desc"): - getTok(p) - var x = newNode(nkDesc) - x.add(e) - e = x - n.add(e) - if p.tok.kind != tkComma: break - getTok(p) - result.add(n) - -proc parseStmt(p: var TSqlParser): PSqlNode = - if isKeyw(p, "create"): - getTok(p) - optKeyw(p, "cached") - optKeyw(p, "memory") - optKeyw(p, "temp") - optKeyw(p, "global") - optKeyw(p, "local") - optKeyw(p, "temporary") - optKeyw(p, "unique") - optKeyw(p, "hash") - if isKeyw(p, "table"): - result = parseTableDef(p) - elif isKeyw(p, "type"): - result = parseTypeDef(p) - elif isKeyw(p, "index"): - result = parseIndexDef(p) - else: - sqlError(p, "TABLE expected") - elif isKeyw(p, "insert"): - result = parseInsert(p) - elif isKeyw(p, "update"): - result = parseUpdate(p) - elif isKeyw(p, "delete"): - result = parseDelete(p) - elif isKeyw(p, "select"): - result = parseSelect(p) - else: - sqlError(p, "CREATE expected") - -proc open(p: var TSqlParser, input: PStream, filename: string) = - ## opens the parser `p` and assigns the input stream `input` to it. - ## `filename` is only used for error messages. - open(TSqlLexer(p), input, filename) - p.tok.kind = tkInvalid - p.tok.literal = "" - getTok(p) - -proc parse(p: var TSqlParser): PSqlNode = - ## parses the content of `p`'s input stream and returns the SQL AST. - ## Syntax errors raise an `EInvalidSql` exception. - result = newNode(nkStmtList) - while p.tok.kind != tkEof: - var s = parseStmt(p) - eat(p, tkSemiColon) - result.add(s) - if result.len == 1: - result = result.sons[0] - -proc close(p: var TSqlParser) = - ## closes the parser `p`. The associated input stream is closed too. - close(TSqlLexer(p)) - -proc parseSQL*(input: PStream, filename: string): PSqlNode = - ## parses the SQL from `input` into an AST and returns the AST. - ## `filename` is only used for error messages. - ## Syntax errors raise an `EInvalidSql` exception. - var p: TSqlParser - open(p, input, filename) - try: - result = parse(p) - finally: - close(p) - -proc ra(n: PSqlNode, s: var string, indent: int) - -proc rs(n: PSqlNode, s: var string, indent: int, - prefix = "(", suffix = ")", - sep = ", ") = - if n.len > 0: - s.add(prefix) - for i in 0 .. n.len-1: - if i > 0: s.add(sep) - ra(n.sons[i], s, indent) - s.add(suffix) - -proc ra(n: PSqlNode, s: var string, indent: int) = - if n == nil: return - case n.kind - of nkNone: nil - of nkIdent: - if allCharsInSet(n.strVal, {'\33'..'\127'}): - s.add(n.strVal) - else: - s.add("\"" & replace(n.strVal, "\"", "\"\"") & "\"") - of nkStringLit: - s.add(escape(n.strVal, "e'", "'")) - of nkBitStringLit: - s.add("b'" & n.strVal & "'") - of nkHexStringLit: - s.add("x'" & n.strVal & "'") - of nkIntegerLit, nkNumericLit: - s.add(n.strVal) - of nkPrimaryKey: - s.add(" primary key") - rs(n, s, indent) - of nkForeignKey: - s.add(" foreign key") - rs(n, s, indent) - of nkNotNull: - s.add(" not null") - of nkDot: - ra(n.sons[0], s, indent) - s.add(".") - ra(n.sons[1], s, indent) - of nkDotDot: - ra(n.sons[0], s, indent) - s.add(". .") - ra(n.sons[1], s, indent) - of nkPrefix: - s.add('(') - ra(n.sons[0], s, indent) - s.add(' ') - ra(n.sons[1], s, indent) - s.add(')') - of nkInfix: - s.add('(') - ra(n.sons[1], s, indent) - s.add(' ') - ra(n.sons[0], s, indent) - s.add(' ') - ra(n.sons[2], s, indent) - s.add(')') - of nkCall, nkColumnReference: - ra(n.sons[0], s, indent) - s.add('(') - for i in 1..n.len-1: - if i > 1: s.add(", ") - ra(n.sons[i], s, indent) - s.add(')') - of nkReferences: - s.add(" references ") - ra(n.sons[0], s, indent) - of nkDefault: - s.add(" default ") - ra(n.sons[0], s, indent) - of nkCheck: - s.add(" check ") - ra(n.sons[0], s, indent) - of nkConstraint: - s.add(" constraint ") - ra(n.sons[0], s, indent) - s.add(" check ") - ra(n.sons[1], s, indent) - of nkUnique: - s.add(" unique") - rs(n, s, indent) - of nkIdentity: - s.add(" identity") - of nkColumnDef: - s.add("\n ") - rs(n, s, indent, "", "", " ") - of nkStmtList: - for i in 0..n.len-1: - ra(n.sons[i], s, indent) - s.add("\n") - of nkInsert: - assert n.len == 3 - s.add("insert into ") - ra(n.sons[0], s, indent) - ra(n.sons[1], s, indent) - if n.sons[2].kind == nkDefault: - s.add("default values") - else: - s.add("\nvalues ") - ra(n.sons[2], s, indent) - s.add(';') - of nkUpdate: - s.add("update ") - ra(n.sons[0], s, indent) - s.add(" set ") - var L = n.len - for i in 1 .. L-2: - if i > 1: s.add(", ") - var it = n.sons[i] - assert it.kind == nkAsgn - ra(it, s, indent) - ra(n.sons[L-1], s, indent) - s.add(';') - of nkDelete: - s.add("delete from ") - ra(n.sons[0], s, indent) - ra(n.sons[1], s, indent) - s.add(';') - of nkSelect, nkSelectDistinct: - s.add("select ") - if n.kind == nkSelectDistinct: - s.add("distinct ") - rs(n.sons[0], s, indent, "", "", ", ") - for i in 1 .. n.len-1: ra(n.sons[i], s, indent) - s.add(';') - of nkSelectColumns: - assert(false) - of nkAsgn: - ra(n.sons[0], s, indent) - s.add(" = ") - ra(n.sons[1], s, indent) - of nkFrom: - s.add("\nfrom ") - rs(n, s, indent, "", "", ", ") - of nkGroup: - s.add("\ngroup by") - rs(n, s, indent, "", "", ", ") - of nkHaving: - s.add("\nhaving") - rs(n, s, indent, "", "", ", ") - of nkOrder: - s.add("\norder by ") - rs(n, s, indent, "", "", ", ") - of nkDesc: - ra(n.sons[0], s, indent) - s.add(" desc") - of nkUnion: - s.add(" union") - of nkIntersect: - s.add(" intersect") - of nkExcept: - s.add(" except") - of nkColumnList: - rs(n, s, indent) - of nkValueList: - s.add("values ") - rs(n, s, indent) - of nkWhere: - s.add("\nwhere ") - ra(n.sons[0], s, indent) - of nkCreateTable, nkCreateTableIfNotExists: - s.add("create table ") - if n.kind == nkCreateTableIfNotExists: - s.add("if not exists ") - ra(n.sons[0], s, indent) - s.add('(') - for i in 1..n.len-1: - if i > 1: s.add(", ") - ra(n.sons[i], s, indent) - s.add(");") - of nkCreateType, nkCreateTypeIfNotExists: - s.add("create type ") - if n.kind == nkCreateTypeIfNotExists: - s.add("if not exists ") - ra(n.sons[0], s, indent) - s.add(" as ") - ra(n.sons[1], s, indent) - s.add(';') - of nkCreateIndex, nkCreateIndexIfNotExists: - s.add("create index ") - if n.kind == nkCreateIndexIfNotExists: - s.add("if not exists ") - ra(n.sons[0], s, indent) - s.add(" on ") - ra(n.sons[1], s, indent) - s.add('(') - for i in 2..n.len-1: - if i > 2: s.add(", ") - ra(n.sons[i], s, indent) - s.add(");") - of nkEnumDef: - s.add("enum ") - rs(n, s, indent) - -# What I want: -# -#select(columns = [T1.all, T2.name], -# fromm = [T1, T2], -# where = T1.name ==. T2.name, -# orderby = [name]): -# -#for row in dbQuery(db, """select x, y, z -# from a, b -# where a.name = b.name"""): -# - -#select x, y, z: -# fromm: Table1, Table2 -# where: x.name == y.name -#db.select(fromm = [t1, t2], where = t1.name == t2.name): -#for x, y, z in db.select(fromm = a, b where = a.name == b.name): -# writeln x, y, z - -proc renderSQL*(n: PSqlNode): string = - ## Converts an SQL abstract syntax tree to its string representation. - result = "" - ra(n, result, 0) - -when isMainModule: - echo(renderSQL(parseSQL(newStringStream(""" - CREATE TYPE happiness AS ENUM ('happy', 'very happy', 'ecstatic'); - CREATE TABLE holidays ( - num_weeks int, - happiness happiness - ); - CREATE INDEX table1_attr1 ON table1(attr1); - - SELECT * FROM myTab WHERE col1 = 'happy'; - """), "stdin"))) - -# CREATE TYPE happiness AS ENUM ('happy', 'very happy', 'ecstatic'); -# CREATE TABLE holidays ( -# num_weeks int, -# happiness happiness -# ); -# CREATE INDEX table1_attr1 ON table1(attr1) diff --git a/nimlib/pure/parsexml.nim b/nimlib/pure/parsexml.nim deleted file mode 100755 index 54f62a9a4..000000000 --- a/nimlib/pure/parsexml.nim +++ /dev/null @@ -1,635 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2009 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module implements a simple high performance `XML`:idx: / `HTML`:idx: -## parser. -## The only encoding that is supported is UTF-8. The parser has been designed -## to be somewhat error correcting, so that even most "wild HTML" found on the -## web can be parsed with it. **Note:** This parser does not check that each -## ``<tag>`` has a corresponding ``</tag>``! These checks have do be -## implemented by the client code for various reasons: -## -## * Old HTML contains tags that have no end tag: ``<br>`` for example. -## * HTML tags are case insensitive, XML tags are case sensitive. Since this -## library can parse both, only the client knows which comparison is to be -## used. -## * Thus the checks would have been very difficult to implement properly with -## little benefit, especially since they are simple to implement in the -## client. The client should use the `errorMsgExpected` proc to generate -## a nice error message that fits the other error messages this library -## creates. -## -## -## Example 1: Retrieve HTML title -## ============================== -## -## The file ``examples/htmltitle.nim`` demonstrates how to use the -## XML parser to accomplish a simple task: To determine the title of an HTML -## document. -## -## .. code-block:: nimrod -## :file: examples/htmltitle.nim -## -## -## Example 2: Retrieve all HTML links -## ================================== -## -## The file ``examples/htmlrefs.nim`` demonstrates how to use the -## XML parser to accomplish another simple task: To determine all the links -## an HTML document contains. -## -## .. code-block:: nimrod -## :file: examples/htmlrefs.nim -## - -import - hashes, strutils, lexbase, streams, unicode - -# the parser treats ``<br />`` as ``<br></br>`` - -type - TXmlEventKind* = enum ## enumation of all events that may occur when parsing - xmlError, ## an error ocurred during parsing - xmlEof, ## end of file reached - xmlCharData, ## character data - xmlWhitespace, ## whitespace has been parsed - xmlComment, ## a comment has been parsed - xmlPI, ## processing instruction (``<?name something ?>``) - xmlElementStart, ## ``<elem>`` - xmlElementEnd, ## ``</elem>`` - xmlElementOpen, ## ``<elem - xmlAttribute, ## ``key = "value"`` pair - xmlElementClose, ## ``>`` - xmlCData, ## ``<![CDATA[`` ... data ... ``]]>`` - xmlEntity, ## &entity; - xmlSpecial ## ``<! ... data ... >`` - - TXmlError* = enum ## enumeration that lists all errors that can occur - errNone, ## no error - errEndOfCDataExpected, ## ``]]>`` expected - errNameExpected, ## name expected - errSemicolonExpected, ## ``;`` expected - errQmGtExpected, ## ``?>`` expected - errGtExpected, ## ``>`` expected - errEqExpected, ## ``=`` expected - errQuoteExpected, ## ``"`` or ``'`` expected - errEndOfCommentExpected ## ``-->`` expected - - TParserState = enum - stateStart, stateNormal, stateAttr, stateEmptyElementTag, stateError - - TXmlParseOption* = enum ## options for the XML parser - reportWhitespace, ## report whitespace - reportComments ## report comments - - TXmlParser* = object of TBaseLexer ## the parser object. - a, b: string - kind: TXmlEventKind - err: TXmlError - state: TParserState - filename: string - options: set[TXmlParseOption] - -const - errorMessages: array [TXmlError, string] = [ - "no error", - "']]>' expected", - "name expected", - "';' expected", - "'?>' expected", - "'>' expected", - "'=' expected", - "'\"' or \"'\" expected", - "'-->' expected" - ] - -proc open*(my: var TXmlParser, input: PStream, filename: string, - options: set[TXmlParseOption] = {}) = - ## initializes the parser with an input stream. `Filename` is only used - ## for nice error messages. The parser's behaviour can be controlled by - ## the `options` parameter: If `options` contains ``reportWhitespace`` - ## a whitespace token is reported as an ``xmlWhitespace`` event. - ## If `options` contains ``reportComments`` a comment token is reported as an - ## ``xmlComment`` event. - lexbase.open(my, input) - my.filename = filename - my.state = stateStart - my.kind = xmlError - my.a = "" - my.b = "" - my.options = options - -proc close*(my: var TXmlParser) {.inline.} = - ## closes the parser `my` and its associated input stream. - lexbase.close(my) - -proc charData*(my: TXmlParser): string {.inline.} = - ## returns the character data for the events: ``xmlCharData``, - ## ``xmlWhitespace``, ``xmlComment``, ``xmlCData``, ``xmlSpecial`` - assert(my.kind in {xmlCharData, xmlWhitespace, xmlComment, xmlCData, - xmlSpecial}) - return my.a - -proc kind*(my: TXmlParser): TXmlEventKind {.inline.} = - ## returns the current event type for the XML parser - return my.kind - -proc elementName*(my: TXmlParser): string {.inline.} = - ## returns the element name for the events: ``xmlElementStart``, - ## ``xmlElementEnd``, ``xmlElementOpen`` - assert(my.kind in {xmlElementStart, xmlElementEnd, xmlElementOpen}) - return my.a - -proc entityName*(my: TXmlParser): string {.inline.} = - ## returns the entity name for the event: ``xmlEntity`` - assert(my.kind == xmlEntity) - return my.a - -proc attrKey*(my: TXmlParser): string {.inline.} = - ## returns the attribute key for the event ``xmlAttribute`` - assert(my.kind == xmlAttribute) - return my.a - -proc attrValue*(my: TXmlParser): string {.inline.} = - ## returns the attribute value for the event ``xmlAttribute`` - assert(my.kind == xmlAttribute) - return my.b - -proc PIName*(my: TXmlParser): string {.inline.} = - ## returns the processing instruction name for the event ``xmlPI`` - assert(my.kind == xmlPI) - return my.a - -proc PIRest*(my: TXmlParser): string {.inline.} = - ## returns the rest of the processing instruction for the event ``xmlPI`` - assert(my.kind == xmlPI) - return my.b - -proc getColumn*(my: TXmlParser): int {.inline.} = - ## get the current column the parser has arrived at. - result = getColNumber(my, my.bufPos) - -proc getLine*(my: TXmlParser): int {.inline.} = - ## get the current line the parser has arrived at. - result = my.linenumber - -proc getFilename*(my: TXmlParser): string {.inline.} = - ## get the filename of the file that the parser processes. - result = my.filename - -proc errorMsg*(my: TXmlParser): string = - ## returns a helpful error message for the event ``xmlError`` - assert(my.kind == xmlError) - result = "$1($2, $3) Error: $4" % [ - my.filename, $getLine(my), $getColumn(my), errorMessages[my.err]] - -proc errorMsgExpected*(my: TXmlParser, tag: string): string = - ## returns an error message "<tag> expected" in the same format as the - ## other error messages - result = "$1($2, $3) Error: $4" % [ - my.filename, $getLine(my), $getColumn(my), "<$1> expected" % tag] - -proc markError(my: var TXmlParser, kind: TXmlError) {.inline.} = - my.err = kind - my.state = stateError - -proc parseCDATA(my: var TXMLParser) = - var pos = my.bufpos + len("<![CDATA[") - var buf = my.buf - while true: - case buf[pos] - of ']': - if buf[pos+1] == ']' and buf[pos+2] == '>': - inc(pos, 3) - break - add(my.a, ']') - inc(pos) - of '\0': - markError(my, errEndOfCDataExpected) - break - of '\c': - pos = lexbase.HandleCR(my, pos) - buf = my.buf - add(my.a, '\L') - of '\L': - pos = lexbase.HandleLF(my, pos) - buf = my.buf - add(my.a, '\L') - else: - add(my.a, buf[pos]) - inc(pos) - my.bufpos = pos # store back - my.kind = xmlCDATA - -proc parseComment(my: var TXMLParser) = - var pos = my.bufpos + len("<!--") - var buf = my.buf - while true: - case buf[pos] - of '-': - if buf[pos+1] == '-' and buf[pos+2] == '>': - inc(pos, 3) - break - if my.options.contains(reportComments): add(my.a, '-') - inc(pos) - of '\0': - markError(my, errEndOfCommentExpected) - break - of '\c': - pos = lexbase.HandleCR(my, pos) - buf = my.buf - if my.options.contains(reportComments): add(my.a, '\L') - of '\L': - pos = lexbase.HandleLF(my, pos) - buf = my.buf - if my.options.contains(reportComments): add(my.a, '\L') - else: - if my.options.contains(reportComments): add(my.a, buf[pos]) - inc(pos) - my.bufpos = pos - my.kind = xmlComment - -proc parseWhitespace(my: var TXmlParser, skip=False) = - var pos = my.bufpos - var buf = my.buf - while true: - case buf[pos] - of ' ', '\t': - if not skip: add(my.a, buf[pos]) - Inc(pos) - of '\c': - # the specification says that CR-LF, CR are to be transformed to LF - pos = lexbase.HandleCR(my, pos) - buf = my.buf - if not skip: add(my.a, '\L') - of '\L': - pos = lexbase.HandleLF(my, pos) - buf = my.buf - if not skip: add(my.a, '\L') - else: - break - my.bufpos = pos - -const - NameStartChar = {'A'..'Z', 'a'..'z', '_', ':', '\128'..'\255'} - NameChar = {'A'..'Z', 'a'..'z', '0'..'9', '.', '-', '_', ':', '\128'..'\255'} - -proc parseName(my: var TXmlParser, dest: var string) = - var pos = my.bufpos - var buf = my.buf - if buf[pos] in nameStartChar: - while true: - add(dest, buf[pos]) - inc(pos) - if buf[pos] notin NameChar: break - my.bufpos = pos - else: - markError(my, errNameExpected) - -proc parseEntity(my: var TXmlParser, dest: var string) = - var pos = my.bufpos+1 - var buf = my.buf - my.kind = xmlCharData - if buf[pos] == '#': - var r: int - inc(pos) - if buf[pos] == 'x': - inc(pos) - while true: - case buf[pos] - of '0'..'9': r = (r shl 4) or (ord(buf[pos]) - ord('0')) - of 'a'..'f': r = (r shl 4) or (ord(buf[pos]) - ord('a') + 10) - of 'A'..'F': r = (r shl 4) or (ord(buf[pos]) - ord('A') + 10) - else: break - inc(pos) - else: - while buf[pos] in {'0'..'9'}: - r = r * 10 + (ord(buf[pos]) - ord('0')) - inc(pos) - add(dest, toUTF8(TRune(r))) - elif buf[pos] == 'l' and buf[pos+1] == 't': - add(dest, '<') - inc(pos, 2) - elif buf[pos] == 'g' and buf[pos+1] == 't': - add(dest, '>') - inc(pos, 2) - elif buf[pos] == 'a' and buf[pos+1] == 'm' and buf[pos+2] == 'p': - add(dest, '&') - inc(pos, 3) - elif buf[pos] == 'a' and buf[pos+1] == 'p' and buf[pos+2] == 'o' and - buf[pos+3] == 's': - add(dest, '\'') - inc(pos, 4) - elif buf[pos] == 'q' and buf[pos+1] == 'u' and buf[pos+2] == 'o' and - buf[pos+3] == 't': - add(dest, '"') - inc(pos, 4) - else: - my.bufpos = pos - parseName(my, dest) - pos = my.bufpos - if my.err != errNameExpected: - my.kind = xmlEntity - else: - add(dest, '&') - if buf[pos] == ';': - inc(pos) - else: - markError(my, errSemiColonExpected) - my.bufpos = pos - -proc parsePI(my: var TXmlParser) = - inc(my.bufpos, "<?".len) - parseName(my, my.a) - var pos = my.bufpos - var buf = my.buf - setLen(my.b, 0) - while true: - case buf[pos] - of '\0': - markError(my, errQmGtExpected) - break - of '?': - if buf[pos+1] == '>': - inc(pos, 2) - break - add(my.b, '?') - inc(pos) - of '\c': - # the specification says that CR-LF, CR are to be transformed to LF - pos = lexbase.HandleCR(my, pos) - buf = my.buf - add(my.b, '\L') - of '\L': - pos = lexbase.HandleLF(my, pos) - buf = my.buf - add(my.b, '\L') - else: - add(my.b, buf[pos]) - inc(pos) - my.bufpos = pos - my.kind = xmlPI - -proc parseSpecial(my: var TXmlParser) = - # things that start with <! - var pos = my.bufpos + 2 - var buf = my.buf - var opentags = 0 - while true: - case buf[pos] - of '\0': - markError(my, errGtExpected) - break - of '<': - inc(opentags) - inc(pos) - add(my.a, '<') - of '>': - if opentags <= 0: - inc(pos) - break - dec(opentags) - inc(pos) - add(my.a, '>') - of '\c': - pos = lexbase.HandleCR(my, pos) - buf = my.buf - add(my.a, '\L') - of '\L': - pos = lexbase.HandleLF(my, pos) - buf = my.buf - add(my.a, '\L') - else: - add(my.a, buf[pos]) - inc(pos) - my.bufpos = pos - my.kind = xmlSpecial - -proc parseTag(my: var TXmlParser) = - inc(my.bufpos) - parseName(my, my.a) - # if we have no name, do not interpret the '<': - if my.a.len == 0: - my.kind = xmlCharData - add(my.a, '<') - return - parseWhitespace(my, skip=True) - if my.buf[my.bufpos] in NameStartChar: - # an attribute follows: - my.kind = xmlElementOpen - my.state = stateAttr - else: - my.kind = xmlElementStart - if my.buf[my.bufpos] == '/' and my.buf[my.bufpos+1] == '>': - inc(my.bufpos, 2) - my.state = stateEmptyElementTag - elif my.buf[my.bufpos] == '>': - inc(my.bufpos) - else: - markError(my, errGtExpected) - -proc parseEndTag(my: var TXmlParser) = - inc(my.bufpos, 2) - parseName(my, my.a) - parseWhitespace(my, skip=True) - if my.buf[my.bufpos] == '>': - inc(my.bufpos) - else: - markError(my, errGtExpected) - my.kind = xmlElementEnd - -proc parseAttribute(my: var TXmlParser) = - my.kind = xmlAttribute - setLen(my.a, 0) - setLen(my.b, 0) - parseName(my, my.a) - # if we have no name, we have '<tag attr= key %&$$%': - if my.a.len == 0: - markError(my, errGtExpected) - return - parseWhitespace(my, skip=True) - if my.buf[my.bufpos] != '=': - markError(my, errEqExpected) - return - inc(my.bufpos) - parseWhitespace(my, skip=True) - - var pos = my.bufpos - var buf = my.buf - if buf[pos] in {'\'', '"'}: - var quote = buf[pos] - var pendingSpace = false - inc(pos) - while true: - case buf[pos] - of '\0': - markError(my, errQuoteExpected) - break - of '&': - if pendingSpace: - add(my.b, ' ') - pendingSpace = false - my.bufpos = pos - parseEntity(my, my.b) - my.kind = xmlAttribute # parseEntity overwrites my.kind! - pos = my.bufpos - of ' ', '\t': - pendingSpace = true - inc(pos) - of '\c': - pos = lexbase.HandleCR(my, pos) - buf = my.buf - pendingSpace = true - of '\L': - pos = lexbase.HandleLF(my, pos) - buf = my.buf - pendingSpace = true - else: - if buf[pos] == quote: - inc(pos) - break - else: - if pendingSpace: - add(my.b, ' ') - pendingSpace = false - add(my.b, buf[pos]) - inc(pos) - else: - markError(my, errQuoteExpected) - my.bufpos = pos - parseWhitespace(my, skip=True) - -proc parseCharData(my: var TXmlParser) = - var pos = my.bufpos - var buf = my.buf - while true: - case buf[pos] - of '\0', '<', '&': break - of '\c': - # the specification says that CR-LF, CR are to be transformed to LF - pos = lexbase.HandleCR(my, pos) - buf = my.buf - add(my.a, '\L') - of '\L': - pos = lexbase.HandleLF(my, pos) - buf = my.buf - add(my.a, '\L') - else: - add(my.a, buf[pos]) - inc(pos) - my.bufpos = pos - my.kind = xmlCharData - -proc rawGetTok(my: var TXmlParser) = - my.kind = xmlError - setLen(my.a, 0) - var pos = my.bufpos - var buf = my.buf - case buf[pos] - of '<': - case buf[pos+1] - of '/': - parseEndTag(my) - of '!': - if buf[pos+2] == '[' and buf[pos+3] == 'C' and buf[pos+4] == 'D' and - buf[pos+5] == 'A' and buf[pos+6] == 'T' and buf[pos+7] == 'A' and - buf[pos+8] == '[': - parseCDATA(my) - elif buf[pos+2] == '-' and buf[pos+3] == '-': - parseComment(my) - else: - parseSpecial(my) - of '?': - parsePI(my) - else: - parseTag(my) - of ' ', '\t', '\c', '\l': - parseWhiteSpace(my) - my.kind = xmlWhitespace - of '\0': - my.kind = xmlEof - of '&': - parseEntity(my, my.a) - else: - parseCharData(my) - assert my.kind != xmlError - -proc getTok(my: var TXmlParser) = - while true: - rawGetTok(my) - case my.kind - of xmlComment: - if my.options.contains(reportComments): break - of xmlWhitespace: - if my.options.contains(reportWhitespace): break - else: break - -proc next*(my: var TXmlParser) = - ## retrieves the first/next event. This controls the parser. - case my.state - of stateNormal: - getTok(my) - of stateStart: - getTok(my) - if my.kind == xmlPI and my.a == "xml": - # just skip the first ``<?xml >`` processing instruction - getTok(my) - my.state = stateNormal - of stateAttr: - # parse an attribute key-value pair: - if my.buf[my.bufpos] == '>': - my.kind = xmlElementClose - inc(my.bufpos) - my.state = stateNormal - elif my.buf[my.bufpos] == '/' and my.buf[my.bufpos+1] == '>': - my.kind = xmlElementClose - inc(my.bufpos, 2) - my.state = stateEmptyElementTag - else: - parseAttribute(my) - # state remains the same - of stateEmptyElementTag: - my.state = stateNormal - my.kind = xmlElementEnd - of stateError: - my.kind = xmlError - my.state = stateNormal - -when isMainModule: - import os - var s = newFileStream(ParamStr(1), fmRead) - if s == nil: quit("cannot open the file" & ParamStr(1)) - var x: TXmlParser - open(x, s, ParamStr(1)) - while true: - next(x) - case x.kind - of xmlError: Echo(x.errorMsg()) - of xmlEof: break - of xmlCharData: echo(x.charData) - of xmlWhitespace: echo("|$1|" % x.charData) - of xmlComment: echo("<!-- $1 -->" % x.charData) - of xmlPI: echo("<? $1 ## $2 ?>" % [x.PIName, x.PIRest]) - of xmlElementStart: echo("<$1>" % x.elementName) - of xmlElementEnd: echo("</$1>" % x.elementName) - - of xmlElementOpen: echo("<$1" % x.elementName) - of xmlAttribute: - echo("Key: " & x.attrKey) - echo("Value: " & x.attrValue) - - of xmlElementClose: echo(">") - of xmlCData: - echo("<![CDATA[$1]]>" % x.charData) - of xmlEntity: - echo("&$1;" % x.entityName) - of xmlSpecial: - echo("SPECIAL: " & x.charData) - close(x) - diff --git a/nimlib/pure/pegs.nim b/nimlib/pure/pegs.nim deleted file mode 100755 index 488e42c7d..000000000 --- a/nimlib/pure/pegs.nim +++ /dev/null @@ -1,1365 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2009 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## Simple PEG (Parsing expression grammar) matching. Uses no memorization, but -## uses superoperators and symbol inlining to improve performance. Note: -## Matching performance is hopefully competitive with optimized regular -## expression engines. -## -## .. include:: ../doc/pegdocs.txt -## - -const - useUnicode = true ## change this to deactivate proper UTF-8 support - -import - strutils - -when useUnicode: - import unicode - -const - InlineThreshold = 5 ## number of leaves; -1 to disable inlining - -type - TPegKind = enum - pkEmpty, - pkAny, ## any character (.) - pkAnyRune, ## any Unicode character (_) - pkNewLine, ## CR-LF, LF, CR - pkTerminal, - pkTerminalIgnoreCase, - pkTerminalIgnoreStyle, - pkChar, ## single character to match - pkCharChoice, - pkNonTerminal, - pkSequence, ## a b c ... --> Internal DSL: peg(a, b, c) - pkOrderedChoice, ## a / b / ... --> Internal DSL: a / b or /[a, b, c] - pkGreedyRep, ## a* --> Internal DSL: *a - ## a+ --> (a a*) - pkGreedyRepChar, ## x* where x is a single character (superop) - pkGreedyRepSet, ## [set]* (superop) - pkGreedyAny, ## .* or _* (superop) - pkOption, ## a? --> Internal DSL: ?a - pkAndPredicate, ## &a --> Internal DSL: &a - pkNotPredicate, ## !a --> Internal DSL: !a - pkCapture, ## {a} --> Internal DSL: capture(a) - pkSearch, ## @a --> Internal DSL: @a - pkRule, ## a <- b - pkList ## a, b - TNonTerminalFlag = enum - ntDeclared, ntUsed - TNonTerminal {.final.} = object ## represents a non terminal symbol - name: string ## the name of the symbol - line: int ## the line the symbol has been declared/used in - col: int ## the column the symbol has been declared/used in - flags: set[TNonTerminalFlag] ## the nonterminal's flags - rule: TNode ## the rule that the symbol refers to - TNode {.final.} = object - case kind: TPegKind - of pkEmpty, pkAny, pkAnyRune, pkGreedyAny, pkNewLine: nil - of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle: term: string - of pkChar, pkGreedyRepChar: ch: char - of pkCharChoice, pkGreedyRepSet: charChoice: ref set[char] - of pkNonTerminal: nt: PNonTerminal - else: sons: seq[TNode] - PNonTerminal* = ref TNonTerminal - - TPeg* = TNode ## type that represents a PEG - -proc term*(t: string): TPeg = - ## constructs a PEG from a terminal string - if t.len != 1: - result.kind = pkTerminal - result.term = t - else: - result.kind = pkChar - result.ch = t[0] - -proc termIgnoreCase*(t: string): TPeg = - ## constructs a PEG from a terminal string; ignore case for matching - result.kind = pkTerminalIgnoreCase - result.term = t - -proc termIgnoreStyle*(t: string): TPeg = - ## constructs a PEG from a terminal string; ignore style for matching - result.kind = pkTerminalIgnoreStyle - result.term = t - -proc term*(t: char): TPeg = - ## constructs a PEG from a terminal char - assert t != '\0' - result.kind = pkChar - result.ch = t - -proc charSet*(s: set[char]): TPeg = - ## constructs a PEG from a character set `s` - assert '\0' notin s - result.kind = pkCharChoice - new(result.charChoice) - result.charChoice^ = s - -proc len(a: TPeg): int {.inline.} = return a.sons.len -proc add(d: var TPeg, s: TPeg) {.inline.} = add(d.sons, s) - -proc addChoice(dest: var TPeg, elem: TPeg) = - var L = dest.len-1 - if L >= 0 and dest.sons[L].kind == pkCharChoice: - case elem.kind - of pkCharChoice: - dest.sons[L].charChoice^ = dest.sons[L].charChoice^ + elem.charChoice^ - of pkChar: incl(dest.sons[L].charChoice^, elem.ch) - else: add(dest, elem) - else: add(dest, elem) - -template multipleOp(k: TPegKind, localOpt: expr) = - result.kind = k - result.sons = @[] - for x in items(a): - if x.kind == k: - for y in items(x.sons): - localOpt(result, y) - else: - localOpt(result, x) - if result.len == 1: - result = result.sons[0] - -proc `/`*(a: openArray[TPeg]): TPeg = - ## constructs an ordered choice with the PEGs in `a` - multipleOp(pkOrderedChoice, addChoice) - -proc addSequence(dest: var TPeg, elem: TPeg) = - var L = dest.len-1 - if L >= 0 and dest.sons[L].kind == pkTerminal: - case elem.kind - of pkTerminal: add(dest.sons[L].term, elem.term) - of pkChar: add(dest.sons[L].term, elem.ch) - else: add(dest, elem) - else: add(dest, elem) - -proc sequence*(a: openArray[TPeg]): TPeg = - ## constructs a sequence with all the PEGs from `a` - multipleOp(pkSequence, addSequence) - -proc `?`*(a: TPeg): TPeg = - ## constructs an optional for the PEG `a` - if a.kind in {pkOption, pkGreedyRep, pkGreedyAny, pkGreedyRepChar, - pkGreedyRepSet}: - # a* ? --> a* - # a? ? --> a? - result = a - else: - result.kind = pkOption - result.sons = @[a] - -proc `*`*(a: TPeg): TPeg = - ## constructs a "greedy repetition" for the PEG `a` - case a.kind - of pkGreedyRep, pkGreedyRepChar, pkGreedyRepSet, pkGreedyAny, pkOption: - assert false - # produces endless loop! - of pkChar: - result.kind = pkGreedyRepChar - result.ch = a.ch - of pkCharChoice: - result.kind = pkGreedyRepSet - result.charChoice = a.charChoice # copying a reference suffices! - of pkAny, pkAnyRune: - result.kind = pkGreedyAny - else: - result.kind = pkGreedyRep - result.sons = @[a] - -proc `@`*(a: TPeg): TPeg = - ## constructs a "search" for the PEG `a` - result.kind = pkSearch - result.sons = @[a] - -when false: - proc contains(a: TPeg, k: TPegKind): bool = - if a.kind == k: return true - case a.kind - of pkEmpty, pkAny, pkAnyRune, pkGreedyAny, pkNewLine, pkTerminal, - pkTerminalIgnoreCase, pkTerminalIgnoreStyle, pkChar, pkGreedyRepChar, - pkCharChoice, pkGreedyRepSet: nil - of pkNonTerminal: return true - else: - for i in 0..a.sons.len-1: - if contains(a.sons[i], k): return true - -proc `+`*(a: TPeg): TPeg = - ## constructs a "greedy positive repetition" with the PEG `a` - return sequence(a, *a) - -proc `&`*(a: TPeg): TPeg = - ## constructs an "and predicate" with the PEG `a` - result.kind = pkAndPredicate - result.sons = @[a] - -proc `!`*(a: TPeg): TPeg = - ## constructs a "not predicate" with the PEG `a` - result.kind = pkNotPredicate - result.sons = @[a] - -proc any*: TPeg {.inline.} = - ## constructs the PEG `any character`:idx: (``.``) - result.kind = pkAny - -proc anyRune*: TPeg {.inline.} = - ## constructs the PEG `any rune`:idx: (``_``) - result.kind = pkAnyRune - -proc newLine*: TPeg {.inline.} = - ## constructs the PEG `newline`:idx: (``\n``) - result.kind = pkNewline - -proc capture*(a: TPeg): TPeg = - ## constructs a capture with the PEG `a` - result.kind = pkCapture - result.sons = @[a] - -proc spaceCost(n: TPeg): int = - case n.kind - of pkEmpty: nil - of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle, pkChar, - pkGreedyRepChar, pkCharChoice, pkGreedyRepSet, pkAny, pkAnyRune, - pkNewLine, pkGreedyAny: - result = 1 - of pkNonTerminal: - # we cannot inline a rule with a non-terminal - result = InlineThreshold+1 - else: - for i in 0..n.len-1: - inc(result, spaceCost(n.sons[i])) - if result >= InlineThreshold: break - -proc nonterminal*(n: PNonTerminal): TPeg = - ## constructs a PEG that consists of the nonterminal symbol - assert n != nil - if ntDeclared in n.flags and spaceCost(n.rule) < InlineThreshold: - when false: echo "inlining symbol: ", n.name - result = n.rule # inlining of rule enables better optimizations - else: - result.kind = pkNonTerminal - result.nt = n - -proc newNonTerminal*(name: string, line, column: int): PNonTerminal = - ## constructs a nonterminal symbol - new(result) - result.name = name - result.line = line - result.col = column - -template letters*: expr = - ## expands to ``charset({'A'..'Z', 'a'..'z'})`` - charset({'A'..'Z', 'a'..'z'}) - -template digits*: expr = - ## expands to ``charset({'0'..'9'})`` - charset({'0'..'9'}) - -template whitespace*: expr = - ## expands to ``charset({' ', '\9'..'\13'})`` - charset({' ', '\9'..'\13'}) - -template identChars*: expr = - ## expands to ``charset({'a'..'z', 'A'..'Z', '0'..'9', '_'})`` - charset({'a'..'z', 'A'..'Z', '0'..'9', '_'}) - -template identStartChars*: expr = - ## expands to ``charset({'A'..'Z', 'a'..'z', '_'})`` - charset({'a'..'z', 'A'..'Z', '_'}) - -template ident*: expr = - ## same as ``[a-zA-Z_][a-zA-z_0-9]*``; standard identifier - sequence(charset({'a'..'z', 'A'..'Z', '_'}), - *charset({'a'..'z', 'A'..'Z', '0'..'9', '_'})) - -template natural*: expr = - ## same as ``\d+`` - +digits - -const - MaxSubpatterns* = 10 ## defines the maximum number of subpatterns that - ## can be captured. More subpatterns cannot be captured! - -# ------------------------- debugging ----------------------------------------- - -proc esc(c: char, reserved = {'\0'..'\255'}): string = - case c - of '\b': result = "\\b" - of '\t': result = "\\t" - of '\c': result = "\\c" - of '\L': result = "\\l" - of '\v': result = "\\v" - of '\f': result = "\\f" - of '\e': result = "\\e" - of '\a': result = "\\a" - of '\\': result = "\\\\" - of 'a'..'z', 'A'..'Z', '0'..'9', '_': result = $c - elif c < ' ' or c >= '\128': result = '\\' & $ord(c) - elif c in reserved: result = '\\' & c - else: result = $c - -proc singleQuoteEsc(c: Char): string = return "'" & esc(c, {'\''}) & "'" - -proc singleQuoteEsc(str: string): string = - result = "'" - for c in items(str): add result, esc(c, {'\''}) - add result, '\'' - -proc charSetEscAux(cc: set[char]): string = - const reserved = {'^', '-', ']'} - result = "" - var c1 = 0 - while c1 <= 0xff: - if chr(c1) in cc: - var c2 = c1 - while c2 < 0xff and chr(succ(c2)) in cc: inc(c2) - if c1 == c2: - add result, esc(chr(c1), reserved) - elif c2 == succ(c1): - add result, esc(chr(c1), reserved) & esc(chr(c2), reserved) - else: - add result, esc(chr(c1), reserved) & '-' & esc(chr(c2), reserved) - c1 = c2 - inc(c1) - -proc CharSetEsc(cc: set[char]): string = - if card(cc) >= 128+64: - result = "[^" & CharSetEscAux({'\1'..'\xFF'} - cc) & ']' - else: - result = '[' & CharSetEscAux(cc) & ']' - -proc toStrAux(r: TPeg, res: var string) = - case r.kind - of pkEmpty: add(res, "()") - of pkAny: add(res, '.') - of pkAnyRune: add(res, '_') - of pkNewline: add(res, "\\n") - of pkTerminal: add(res, singleQuoteEsc(r.term)) - of pkTerminalIgnoreCase: - add(res, 'i') - add(res, singleQuoteEsc(r.term)) - of pkTerminalIgnoreStyle: - add(res, 'y') - add(res, singleQuoteEsc(r.term)) - of pkChar: add(res, singleQuoteEsc(r.ch)) - of pkCharChoice: add(res, charSetEsc(r.charChoice^)) - of pkNonTerminal: add(res, r.nt.name) - of pkSequence: - add(res, '(') - toStrAux(r.sons[0], res) - for i in 1 .. high(r.sons): - add(res, ' ') - toStrAux(r.sons[i], res) - add(res, ')') - of pkOrderedChoice: - add(res, '(') - toStrAux(r.sons[0], res) - for i in 1 .. high(r.sons): - add(res, " / ") - toStrAux(r.sons[i], res) - add(res, ')') - of pkGreedyRep: - toStrAux(r.sons[0], res) - add(res, '*') - of pkGreedyRepChar: - add(res, singleQuoteEsc(r.ch)) - add(res, '*') - of pkGreedyRepSet: - add(res, charSetEsc(r.charChoice^)) - add(res, '*') - of pkGreedyAny: - add(res, ".*") - of pkOption: - toStrAux(r.sons[0], res) - add(res, '?') - of pkAndPredicate: - add(res, '&') - toStrAux(r.sons[0], res) - of pkNotPredicate: - add(res, '!') - toStrAux(r.sons[0], res) - of pkSearch: - add(res, '@') - toStrAux(r.sons[0], res) - of pkCapture: - add(res, '{') - toStrAux(r.sons[0], res) - add(res, '}') - of pkRule: - toStrAux(r.sons[0], res) - add(res, " <- ") - toStrAux(r.sons[1], res) - of pkList: - for i in 0 .. high(r.sons): - toStrAux(r.sons[i], res) - add(res, "\n") - -proc `$` *(r: TPeg): string = - ## converts a PEG to its string representation - result = "" - toStrAux(r, result) - -# --------------------- core engine ------------------------------------------- - -type - TMatchClosure {.final.} = object - matches: array[0..maxSubpatterns-1, tuple[first, last: int]] - ml: int - -when not useUnicode: - type - TRune = char - template fastRuneAt(s, i, ch: expr) = - ch = s[i] - inc(i) - template runeLenAt(s, i: expr): expr = 1 - -proc m(s: string, p: TPeg, start: int, c: var TMatchClosure): int = - ## this implements a simple PEG interpreter. Thanks to superoperators it - ## has competitive performance nevertheless. - ## Returns -1 if it does not match, else the length of the match - case p.kind - of pkEmpty: result = 0 # match of length 0 - of pkAny: - if s[start] != '\0': result = 1 - else: result = -1 - of pkAnyRune: - if s[start] != '\0': - result = runeLenAt(s, start) - else: - result = -1 - of pkGreedyAny: - result = len(s) - start - of pkNewLine: - if s[start] == '\L': result = 1 - elif s[start] == '\C': - if s[start+1] == '\L': result = 2 - else: result = 1 - else: result = -1 - of pkTerminal: - result = len(p.term) - for i in 0..result-1: - if p.term[i] != s[start+i]: - result = -1 - break - of pkTerminalIgnoreCase: - var - i = 0 - a, b: TRune - result = start - while i < len(p.term): - fastRuneAt(p.term, i, a) - fastRuneAt(s, result, b) - if toLower(a) != toLower(b): - result = -1 - break - dec(result, start) - of pkTerminalIgnoreStyle: - var - i = 0 - a, b: TRune - result = start - while i < len(p.term): - while true: - fastRuneAt(p.term, i, a) - if a != TRune('_'): break - while true: - fastRuneAt(s, result, b) - if b != TRune('_'): break - if toLower(a) != toLower(b): - result = -1 - break - dec(result, start) - of pkChar: - if p.ch == s[start]: result = 1 - else: result = -1 - of pkCharChoice: - if contains(p.charChoice^, s[start]): result = 1 - else: result = -1 - of pkNonTerminal: - var oldMl = c.ml - when false: echo "enter: ", p.nt.name - result = m(s, p.nt.rule, start, c) - when false: echo "leave: ", p.nt.name - if result < 0: c.ml = oldMl - of pkSequence: - var oldMl = c.ml - result = 0 - for i in 0..high(p.sons): - var x = m(s, p.sons[i], start+result, c) - if x < 0: - c.ml = oldMl - result = -1 - break - else: inc(result, x) - of pkOrderedChoice: - var oldMl = c.ml - for i in 0..high(p.sons): - result = m(s, p.sons[i], start, c) - if result >= 0: break - c.ml = oldMl - of pkSearch: - var oldMl = c.ml - result = 0 - while start+result < s.len: - var x = m(s, p.sons[0], start+result, c) - if x >= 0: - inc(result, x) - return - inc(result) - result = -1 - c.ml = oldMl - of pkGreedyRep: - result = 0 - while true: - var x = m(s, p.sons[0], start+result, c) - # if x == 0, we have an endless loop; so the correct behaviour would be - # not to break. But endless loops can be easily introduced: - # ``(comment / \w*)*`` is such an example. Breaking for x == 0 does the - # expected thing in this case. - if x <= 0: break - inc(result, x) - of pkGreedyRepChar: - result = 0 - var ch = p.ch - while ch == s[start+result]: inc(result) - of pkGreedyRepSet: - result = 0 - while contains(p.charChoice^, s[start+result]): inc(result) - of pkOption: - result = max(0, m(s, p.sons[0], start, c)) - of pkAndPredicate: - var oldMl = c.ml - result = m(s, p.sons[0], start, c) - if result >= 0: result = 0 # do not consume anything - else: c.ml = oldMl - of pkNotPredicate: - var oldMl = c.ml - result = m(s, p.sons[0], start, c) - if result < 0: result = 0 - else: - c.ml = oldMl - result = -1 - of pkCapture: - var idx = c.ml # reserve a slot for the subpattern - inc(c.ml) - result = m(s, p.sons[0], start, c) - if result >= 0: - if idx < maxSubpatterns: - c.matches[idx] = (start, start+result-1) - #else: silently ignore the capture - else: - c.ml = idx - of pkRule, pkList: assert false - -proc match*(s: string, pattern: TPeg, matches: var openarray[string], - start = 0): bool = - ## returns ``true`` if ``s[start..]`` matches the ``pattern`` and - ## the captured substrings in the array ``matches``. If it does not - ## match, nothing is written into ``matches`` and ``false`` is - ## returned. - var c: TMatchClosure - result = m(s, pattern, start, c) == len(s) - if result: - for i in 0..c.ml-1: - matches[i] = copy(s, c.matches[i][0], c.matches[i][1]) - -proc match*(s: string, pattern: TPeg, start = 0): bool = - ## returns ``true`` if ``s`` matches the ``pattern`` beginning from ``start``. - var c: TMatchClosure - result = m(s, pattern, start, c) == len(s) - -proc matchLen*(s: string, pattern: TPeg, matches: var openarray[string], - start = 0): int = - ## the same as ``match``, but it returns the length of the match, - ## if there is no match, -1 is returned. Note that a match length - ## of zero can happen. It's possible that a suffix of `s` remains - ## that does not belong to the match. - var c: TMatchClosure - result = m(s, pattern, start, c) - if result >= 0: - for i in 0..c.ml-1: - matches[i] = copy(s, c.matches[i][0], c.matches[i][1]) - -proc matchLen*(s: string, pattern: TPeg, start = 0): int = - ## the same as ``match``, but it returns the length of the match, - ## if there is no match, -1 is returned. Note that a match length - ## of zero can happen. It's possible that a suffix of `s` remains - ## that does not belong to the match. - var c: TMatchClosure - result = m(s, pattern, start, c) - -proc find*(s: string, pattern: TPeg, matches: var openarray[string], - start = 0): int = - ## returns the starting position of ``pattern`` in ``s`` and the captured - ## substrings in the array ``matches``. If it does not match, nothing - ## is written into ``matches`` and -1 is returned. - for i in 0 .. s.len-1: - if matchLen(s, pattern, matches, i) >= 0: return i - return -1 - # could also use the pattern here: (!P .)* P - -proc find*(s: string, pattern: TPeg, start = 0): int = - ## returns the starting position of ``pattern`` in ``s``. If it does not - ## match, -1 is returned. - for i in 0 .. s.len-1: - if matchLen(s, pattern, i) >= 0: return i - return -1 - -template `=~`*(s: string, pattern: TPeg): expr = - ## This calls ``match`` with an implicit declared ``matches`` array that - ## can be used in the scope of the ``=~`` call: - ## - ## .. code-block:: nimrod - ## - ## if line =~ peg"\s* {\w+} \s* '=' \s* {\w+}": - ## # matches a key=value pair: - ## echo("Key: ", matches[0]) - ## echo("Value: ", matches[1]) - ## elif line =~ peg"\s*{'#'.*}": - ## # matches a comment - ## # note that the implicit ``matches`` array is different from the - ## # ``matches`` array of the first branch - ## echo("comment: ", matches[0]) - ## else: - ## echo("syntax error") - ## - when not definedInScope(matches): - var matches: array[0..maxSubpatterns-1, string] - match(s, pattern, matches) - -# ------------------------- more string handling ------------------------------ - -proc contains*(s: string, pattern: TPeg, start = 0): bool = - ## same as ``find(s, pattern, start) >= 0`` - return find(s, pattern, start) >= 0 - -proc contains*(s: string, pattern: TPeg, matches: var openArray[string], - start = 0): bool = - ## same as ``find(s, pattern, matches, start) >= 0`` - return find(s, pattern, matches, start) >= 0 - -proc startsWith*(s: string, prefix: TPeg): bool = - ## returns true if `s` starts with the pattern `prefix` - result = matchLen(s, prefix) >= 0 - -proc endsWith*(s: string, suffix: TPeg): bool = - ## returns true if `s` ends with the pattern `prefix` - for i in 0 .. s.len-1: - if matchLen(s, suffix, i) == s.len - i: return true - -proc replace*(s: string, sub: TPeg, by: string): string = - ## Replaces `sub` in `s` by the string `by`. Captures can be accessed in `by` - ## with the notation ``$i`` and ``$#`` (see strutils.`%`). Examples: - ## - ## .. code-block:: nimrod - ## "var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") - ## - ## Results in: - ## - ## .. code-block:: nimrod - ## - ## "var1<-keykey; val2<-key2key2" - result = "" - var i = 0 - var caps: array[0..maxSubpatterns-1, string] - while i < s.len: - var x = matchLen(s, sub, caps, i) - if x <= 0: - add(result, s[i]) - inc(i) - else: - addf(result, by, caps) - inc(i, x) - # copy the rest: - add(result, copy(s, i)) - -proc parallelReplace*(s: string, subs: openArray[ - tuple[pattern: TPeg, repl: string]]): string = - ## Returns a modified copy of `s` with the substitutions in `subs` - ## applied in parallel. - result = "" - var i = 0 - var caps: array[0..maxSubpatterns-1, string] - while i < s.len: - block searchSubs: - for j in 0..high(subs): - var x = matchLen(s, subs[j][0], caps, i) - if x > 0: - addf(result, subs[j][1], caps) - inc(i, x) - break searchSubs - add(result, s[i]) - inc(i) - # copy the rest: - add(result, copy(s, i)) - -proc transformFile*(infile, outfile: string, - subs: openArray[tuple[pattern: TPeg, repl: string]]) = - ## reads in the file `infile`, performs a parallel replacement (calls - ## `parallelReplace`) and writes back to `outfile`. Calls ``quit`` if an - ## error occurs. This is supposed to be used for quick scripting. - var x = readFile(infile) - if not isNil(x): - var f: TFile - if open(f, outfile, fmWrite): - write(f, x.parallelReplace(subs)) - close(f) - else: - quit("cannot open for writing: " & outfile) - else: - quit("cannot open for reading: " & infile) - -iterator split*(s: string, sep: TPeg): string = - ## Splits the string `s` into substrings. - ## - ## Substrings are separated by the PEG `sep`. - ## Examples: - ## - ## .. code-block:: nimrod - ## for word in split("00232this02939is39an22example111", peg"\d+"): - ## writeln(stdout, word) - ## - ## Results in: - ## - ## .. code-block:: nimrod - ## "this" - ## "is" - ## "an" - ## "example" - ## - var - first = 0 - last = 0 - while last < len(s): - var x = matchLen(s, sep, last) - if x > 0: inc(last, x) - first = last - while last < len(s): - inc(last) - x = matchLen(s, sep, last) - if x > 0: break - if first < last: - yield copy(s, first, last-1) - -proc split*(s: string, sep: TPeg): seq[string] {.noSideEffect.} = - ## Splits the string `s` into substrings. - accumulateResult(split(s, sep)) - -# ------------------- scanner ------------------------------------------------- - -type - TModifier = enum - modNone, - modVerbatim, - modIgnoreCase, - modIgnoreStyle - TTokKind = enum ## enumeration of all tokens - tkInvalid, ## invalid token - tkEof, ## end of file reached - tkAny, ## . - tkAnyRune, ## _ - tkIdentifier, ## abc - tkStringLit, ## "abc" or 'abc' - tkCharSet, ## [^A-Z] - tkParLe, ## '(' - tkParRi, ## ')' - tkCurlyLe, ## '{' - tkCurlyRi, ## '}' - tkArrow, ## '<-' - tkBar, ## '/' - tkStar, ## '*' - tkPlus, ## '+' - tkAmp, ## '&' - tkNot, ## '!' - tkOption, ## '?' - tkAt, ## '@' - tkBuiltin, ## \identifier - tkEscaped ## \\ - - TToken {.final.} = object ## a token - kind: TTokKind ## the type of the token - modifier: TModifier - literal: string ## the parsed (string) literal - charset: set[char] ## if kind == tkCharSet - - TPegLexer = object ## the lexer object. - bufpos: int ## the current position within the buffer - buf: cstring ## the buffer itself - LineNumber: int ## the current line number - lineStart: int ## index of last line start in buffer - colOffset: int ## column to add - filename: string - -const - tokKindToStr: array[TTokKind, string] = [ - "invalid", "[EOF]", ".", "_", "identifier", "string literal", - "character set", "(", ")", "{", "}", "<-", "/", "*", "+", "&", "!", "?", - "@", "built-in", "escaped" - ] - -proc HandleCR(L: var TPegLexer, pos: int): int = - assert(L.buf[pos] == '\c') - inc(L.linenumber) - result = pos+1 - if L.buf[result] == '\L': inc(result) - L.lineStart = result - -proc HandleLF(L: var TPegLexer, pos: int): int = - assert(L.buf[pos] == '\L') - inc(L.linenumber) - result = pos+1 - L.lineStart = result - -proc init(L: var TPegLexer, input, filename: string, line = 1, col = 0) = - L.buf = input - L.bufpos = 0 - L.lineNumber = line - L.colOffset = col - L.lineStart = 0 - L.filename = filename - -proc getColumn(L: TPegLexer): int {.inline.} = - result = abs(L.bufpos - L.lineStart) + L.colOffset - -proc getLine(L: TPegLexer): int {.inline.} = - result = L.linenumber - -proc errorStr(L: TPegLexer, msg: string, line = -1, col = -1): string = - var line = if line < 0: getLine(L) else: line - var col = if col < 0: getColumn(L) else: col - result = "$1($2, $3) Error: $4" % [L.filename, $line, $col, msg] - -proc handleHexChar(c: var TPegLexer, xi: var int) = - case c.buf[c.bufpos] - of '0'..'9': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0')) - inc(c.bufpos) - of 'a'..'f': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10) - inc(c.bufpos) - of 'A'..'F': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10) - inc(c.bufpos) - else: nil - -proc getEscapedChar(c: var TPegLexer, tok: var TToken) = - inc(c.bufpos) - case c.buf[c.bufpos] - of 'r', 'R', 'c', 'C': - add(tok.literal, '\c') - Inc(c.bufpos) - of 'l', 'L': - add(tok.literal, '\L') - Inc(c.bufpos) - of 'f', 'F': - add(tok.literal, '\f') - inc(c.bufpos) - of 'e', 'E': - add(tok.literal, '\e') - Inc(c.bufpos) - of 'a', 'A': - add(tok.literal, '\a') - Inc(c.bufpos) - of 'b', 'B': - add(tok.literal, '\b') - Inc(c.bufpos) - of 'v', 'V': - add(tok.literal, '\v') - Inc(c.bufpos) - of 't', 'T': - add(tok.literal, '\t') - Inc(c.bufpos) - of 'x', 'X': - inc(c.bufpos) - var xi = 0 - handleHexChar(c, xi) - handleHexChar(c, xi) - if xi == 0: tok.kind = tkInvalid - else: add(tok.literal, Chr(xi)) - of '0'..'9': - var val = ord(c.buf[c.bufpos]) - ord('0') - Inc(c.bufpos) - var i = 1 - while (i <= 3) and (c.buf[c.bufpos] in {'0'..'9'}): - val = val * 10 + ord(c.buf[c.bufpos]) - ord('0') - inc(c.bufpos) - inc(i) - if val > 0 and val <= 255: add(tok.literal, chr(val)) - else: tok.kind = tkInvalid - of '\0'..'\31': - tok.kind = tkInvalid - elif c.buf[c.bufpos] in strutils.letters: - tok.kind = tkInvalid - else: - add(tok.literal, c.buf[c.bufpos]) - Inc(c.bufpos) - -proc skip(c: var TPegLexer) = - var pos = c.bufpos - var buf = c.buf - while true: - case buf[pos] - of ' ', '\t': - Inc(pos) - of '#': - while not (buf[pos] in {'\c', '\L', '\0'}): inc(pos) - of '\c': - pos = HandleCR(c, pos) - buf = c.buf - of '\L': - pos = HandleLF(c, pos) - buf = c.buf - else: - break # EndOfFile also leaves the loop - c.bufpos = pos - -proc getString(c: var TPegLexer, tok: var TToken) = - tok.kind = tkStringLit - var pos = c.bufPos + 1 - var buf = c.buf - var quote = buf[pos-1] - while true: - case buf[pos] - of '\\': - c.bufpos = pos - getEscapedChar(c, tok) - pos = c.bufpos - of '\c', '\L', '\0': - tok.kind = tkInvalid - break - elif buf[pos] == quote: - inc(pos) - break - else: - add(tok.literal, buf[pos]) - Inc(pos) - c.bufpos = pos - -proc getCharSet(c: var TPegLexer, tok: var TToken) = - tok.kind = tkCharSet - tok.charset = {} - var pos = c.bufPos + 1 - var buf = c.buf - var caret = false - if buf[pos] == '^': - inc(pos) - caret = true - while true: - var ch: char - case buf[pos] - of ']': - inc(pos) - break - of '\\': - c.bufpos = pos - getEscapedChar(c, tok) - pos = c.bufpos - ch = tok.literal[tok.literal.len-1] - of '\C', '\L', '\0': - tok.kind = tkInvalid - break - else: - ch = buf[pos] - Inc(pos) - incl(tok.charset, ch) - if buf[pos] == '-': - if buf[pos+1] == ']': - incl(tok.charset, '-') - inc(pos) - else: - inc(pos) - var ch2: char - case buf[pos] - of '\\': - c.bufpos = pos - getEscapedChar(c, tok) - pos = c.bufpos - ch2 = tok.literal[tok.literal.len-1] - of '\C', '\L', '\0': - tok.kind = tkInvalid - break - else: - ch2 = buf[pos] - Inc(pos) - for i in ord(ch)+1 .. ord(ch2): - incl(tok.charset, chr(i)) - c.bufpos = pos - if caret: tok.charset = {'\1'..'\xFF'} - tok.charset - -proc getSymbol(c: var TPegLexer, tok: var TToken) = - var pos = c.bufpos - var buf = c.buf - while true: - add(tok.literal, buf[pos]) - Inc(pos) - if buf[pos] notin strutils.IdentChars: break - c.bufpos = pos - tok.kind = tkIdentifier - -proc getBuiltin(c: var TPegLexer, tok: var TToken) = - if c.buf[c.bufpos+1] in strutils.Letters: - inc(c.bufpos) - getSymbol(c, tok) - tok.kind = tkBuiltin - else: - tok.kind = tkEscaped - getEscapedChar(c, tok) # may set tok.kind to tkInvalid - -proc getTok(c: var TPegLexer, tok: var TToken) = - tok.kind = tkInvalid - tok.modifier = modNone - setlen(tok.literal, 0) - skip(c) - case c.buf[c.bufpos] - of '{': - tok.kind = tkCurlyLe - inc(c.bufpos) - add(tok.literal, '{') - of '}': - tok.kind = tkCurlyRi - inc(c.bufpos) - add(tok.literal, '}') - of '[': - getCharset(c, tok) - of '(': - tok.kind = tkParLe - Inc(c.bufpos) - add(tok.literal, '(') - of ')': - tok.kind = tkParRi - Inc(c.bufpos) - add(tok.literal, ')') - of '.': - tok.kind = tkAny - inc(c.bufpos) - add(tok.literal, '.') - of '_': - tok.kind = tkAnyRune - inc(c.bufpos) - add(tok.literal, '_') - of '\\': - getBuiltin(c, tok) - of '\'', '"': getString(c, tok) - of '\0': - tok.kind = tkEof - tok.literal = "[EOF]" - of 'a'..'z', 'A'..'Z', '\128'..'\255': - getSymbol(c, tok) - if c.buf[c.bufpos] in {'\'', '"'}: - case tok.literal - of "i": tok.modifier = modIgnoreCase - of "y": tok.modifier = modIgnoreStyle - of "v": tok.modifier = modVerbatim - else: nil - setLen(tok.literal, 0) - getString(c, tok) - if tok.modifier == modNone: tok.kind = tkInvalid - of '+': - tok.kind = tkPlus - inc(c.bufpos) - add(tok.literal, '+') - of '*': - tok.kind = tkStar - inc(c.bufpos) - add(tok.literal, '+') - of '<': - if c.buf[c.bufpos+1] == '-': - inc(c.bufpos, 2) - tok.kind = tkArrow - add(tok.literal, "<-") - else: - add(tok.literal, '<') - of '/': - tok.kind = tkBar - inc(c.bufpos) - add(tok.literal, '/') - of '?': - tok.kind = tkOption - inc(c.bufpos) - add(tok.literal, '?') - of '!': - tok.kind = tkNot - inc(c.bufpos) - add(tok.literal, '!') - of '&': - tok.kind = tkAmp - inc(c.bufpos) - add(tok.literal, '!') - of '@': - tok.kind = tkAt - inc(c.bufpos) - add(tok.literal, '@') - else: - add(tok.literal, c.buf[c.bufpos]) - inc(c.bufpos) - -proc arrowIsNextTok(c: TPegLexer): bool = - # the only look ahead we need - var pos = c.bufpos - while c.buf[pos] in {'\t', ' '}: inc(pos) - result = c.buf[pos] == '<' and c.buf[pos+1] == '-' - -# ----------------------------- parser ---------------------------------------- - -type - EInvalidPeg* = object of EBase ## raised if an invalid PEG has been detected - TPegParser = object of TPegLexer ## the PEG parser object - tok: TToken - nonterms: seq[PNonTerminal] - modifier: TModifier - -proc getTok(p: var TPegParser) = getTok(p, p.tok) - -proc pegError(p: TPegParser, msg: string, line = -1, col = -1) = - var e: ref EInvalidPeg - new(e) - e.msg = errorStr(p, msg, line, col) - raise e - -proc eat(p: var TPegParser, kind: TTokKind) = - if p.tok.kind == kind: getTok(p) - else: pegError(p, tokKindToStr[kind] & " expected") - -proc parseExpr(p: var TPegParser): TPeg - -proc getNonTerminal(p: TPegParser, name: string): PNonTerminal = - for i in 0..high(p.nonterms): - result = p.nonterms[i] - if cmpIgnoreStyle(result.name, name) == 0: return - # forward reference: - result = newNonTerminal(name, getLine(p), getColumn(p)) - add(p.nonterms, result) - -proc modifiedTerm(s: string, m: TModifier): TPeg = - case m - of modNone, modVerbatim: result = term(s) - of modIgnoreCase: result = termIgnoreCase(s) - of modIgnoreStyle: result = termIgnoreStyle(s) - -proc primary(p: var TPegParser): TPeg = - case p.tok.kind - of tkAmp: - getTok(p) - return &primary(p) - of tkNot: - getTok(p) - return !primary(p) - of tkAt: - getTok(p) - return @primary(p) - else: nil - case p.tok.kind - of tkIdentifier: - if not arrowIsNextTok(p): - var nt = getNonTerminal(p, p.tok.literal) - incl(nt.flags, ntUsed) - result = nonTerminal(nt) - getTok(p) - else: - pegError(p, "expression expected, but found: " & p.tok.literal) - of tkStringLit: - var m = p.tok.modifier - if m == modNone: m = p.modifier - result = modifiedTerm(p.tok.literal, m) - getTok(p) - of tkCharSet: - if '\0' in p.tok.charset: - pegError(p, "binary zero ('\\0') not allowed in character class") - result = charset(p.tok.charset) - getTok(p) - of tkParLe: - getTok(p) - result = parseExpr(p) - eat(p, tkParRi) - of tkCurlyLe: - getTok(p) - result = capture(parseExpr(p)) - eat(p, tkCurlyRi) - of tkAny: - result = any() - getTok(p) - of tkAnyRune: - result = anyRune() - getTok(p) - of tkBuiltin: - case p.tok.literal - of "n": result = newLine() - of "d": result = charset({'0'..'9'}) - of "D": result = charset({'\1'..'\xff'} - {'0'..'9'}) - of "s": result = charset({' ', '\9'..'\13'}) - of "S": result = charset({'\1'..'\xff'} - {' ', '\9'..'\13'}) - of "w": result = charset({'a'..'z', 'A'..'Z', '_'}) - of "W": result = charset({'\1'..'\xff'} - {'a'..'z', 'A'..'Z', '_'}) - of "ident": result = pegs.ident - else: pegError(p, "unknown built-in: " & p.tok.literal) - getTok(p) - of tkEscaped: - result = term(p.tok.literal[0]) - getTok(p) - else: - pegError(p, "expression expected, but found: " & p.tok.literal) - getTok(p) # we must consume a token here to prevent endless loops! - while true: - case p.tok.kind - of tkOption: - result = ?result - getTok(p) - of tkStar: - result = *result - getTok(p) - of tkPlus: - result = +result - getTok(p) - else: break - -proc seqExpr(p: var TPegParser): TPeg = - result = primary(p) - while true: - case p.tok.kind - of tkAmp, tkNot, tkAt, tkStringLit, tkCharset, tkParLe, tkCurlyLe, - tkAny, tkAnyRune, tkBuiltin, tkEscaped: - result = sequence(result, primary(p)) - of tkIdentifier: - if not arrowIsNextTok(p): - result = sequence(result, primary(p)) - else: break - else: break - -proc parseExpr(p: var TPegParser): TPeg = - result = seqExpr(p) - while p.tok.kind == tkBar: - getTok(p) - result = result / seqExpr(p) - -proc parseRule(p: var TPegParser): PNonTerminal = - if p.tok.kind == tkIdentifier and arrowIsNextTok(p): - result = getNonTerminal(p, p.tok.literal) - if ntDeclared in result.flags: - pegError(p, "attempt to redefine: " & result.name) - result.line = getLine(p) - result.col = getColumn(p) - getTok(p) - eat(p, tkArrow) - result.rule = parseExpr(p) - incl(result.flags, ntDeclared) # NOW inlining may be attempted - else: - pegError(p, "rule expected, but found: " & p.tok.literal) - -proc rawParse(p: var TPegParser): TPeg = - ## parses a rule or a PEG expression - if p.tok.kind == tkBuiltin: - case p.tok.literal - of "i": - p.modifier = modIgnoreCase - getTok(p) - of "y": - p.modifier = modIgnoreStyle - getTok(p) - else: nil - if p.tok.kind == tkIdentifier and arrowIsNextTok(p): - result = parseRule(p).rule - while p.tok.kind != tkEof: - discard parseRule(p) - else: - result = parseExpr(p) - if p.tok.kind != tkEof: - pegError(p, "EOF expected, but found: " & p.tok.literal) - for i in 0..high(p.nonterms): - var nt = p.nonterms[i] - if ntDeclared notin nt.flags: - pegError(p, "undeclared identifier: " & nt.name, nt.line, nt.col) - elif ntUsed notin nt.flags and i > 0: - pegError(p, "unused rule: " & nt.name, nt.line, nt.col) - -proc parsePeg*(input: string, filename = "pattern", line = 1, col = 0): TPeg = - var p: TPegParser - init(TPegLexer(p), input, filename, line, col) - p.tok.kind = tkInvalid - p.tok.modifier = modNone - p.tok.literal = "" - p.tok.charset = {} - p.nonterms = @[] - getTok(p) - result = rawParse(p) - -proc peg*(pattern: string): TPeg = - ## constructs a TPeg object from the `pattern`. The short name has been - ## chosen to encourage its use as a raw string modifier:: - ## - ## peg"{\ident} \s* '=' \s* {.*}" - result = parsePeg(pattern, "pattern") - -when isMainModule: - assert match("(a b c)", peg"'(' @ ')'") - assert match("W_HI_Le", peg"\y 'while'") - assert(not match("W_HI_L", peg"\y 'while'")) - assert(not match("W_HI_Le", peg"\y v'while'")) - assert match("W_HI_Le", peg"y'while'") - - assert($ +digits == $peg"\d+") - assert "0158787".match(peg"\d+") - assert "ABC 0232".match(peg"\w+\s+\d+") - assert "ABC".match(peg"\d+ / \w+") - - for word in split("00232this02939is39an22example111", peg"\d+"): - writeln(stdout, word) - - assert matchLen("key", ident) == 3 - - var pattern = sequence(ident, *whitespace, term('='), *whitespace, ident) - assert matchLen("key1= cal9", pattern) == 11 - - var ws = newNonTerminal("ws", 1, 1) - ws.rule = *whitespace - - var expr = newNonTerminal("expr", 1, 1) - expr.rule = sequence(capture(ident), *sequence( - nonterminal(ws), term('+'), nonterminal(ws), nonterminal(expr))) - - var c: TMatchClosure - var s = "a+b + c +d+e+f" - assert m(s, expr.rule, 0, c) == len(s) - var a = "" - for i in 0..c.ml-1: - a.add(copy(s, c.matches[i][0], c.matches[i][1])) - assert a == "abcdef" - #echo expr.rule - - #const filename = "lib/devel/peg/grammar.txt" - #var grammar = parsePeg(newFileStream(filename, fmRead), filename) - #echo "a <- [abc]*?".match(grammar) - assert find("_____abc_______", term("abc")) == 5 - assert match("_______ana", peg"A <- 'ana' / . A") - assert match("abcs%%%", peg"A <- ..A / .A / '%'") - - if "abc" =~ peg"{'a'}'bc' 'xyz' / {\ident}": - assert matches[0] == "abc" - else: - assert false - - var g2 = peg"""S <- A B / C D - A <- 'a'+ - B <- 'b'+ - C <- 'c'+ - D <- 'd'+ - """ - assert($g2 == "((A B) / (C D))") - assert match("cccccdddddd", g2) - assert("var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") == - "var1<-keykey; var2<-key2key2") - assert "var1=key; var2=key2".endsWith(peg"{\ident}'='{\ident}") - - if "aaaaaa" =~ peg"'aa' !. / ({'a'})+": - assert matches[0] == "a" - else: - assert false diff --git a/nimlib/pure/re.nim b/nimlib/pure/re.nim deleted file mode 100755 index 1328f5f1f..000000000 --- a/nimlib/pure/re.nim +++ /dev/null @@ -1,354 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2009 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## Regular expression support for Nimrod. Consider using the pegs module -## instead. - -{.compile: "tre/tre_all.c".} - -from strutils import addf - -type - TRegExDesc {.pure, final.} = object - re_nsub: int # Number of parenthesized subexpressions. - value: pointer # For internal use only. - - TRegEx* = ref TRegExDesc ## a compiled regular expression - EInvalidRegEx* = object of EInvalidValue - ## is raised if the pattern is no valid regular expression. - - TRegMatch {.pure.} = object - so, eo: cint - -const - MaxSubpatterns* = 10 - ## defines the maximum number of subpatterns that can be captured. - ## More subpatterns cannot be captured! - -proc regnexec(preg: ptr TRegExDesc, s: cstring, len, nmatch: int, - pmatch: ptr array [0..maxSubpatterns-1, TRegMatch], - eflags: cint): cint {.importc.} -proc regncomp(preg: ptr TRegExDesc, regex: cstring, n: int, - cflags: cint): cint {.importc.} -proc regfree(preg: ptr TRegExDesc) {.importc.} - -const - # POSIX regcomp() flags - REG_EXTENDED = 1 - REG_ICASE = (REG_EXTENDED shl 1) - REG_NEWLINE = (REG_ICASE shl 1) - REG_NOSUB = (REG_NEWLINE shl 1) - # Extra regcomp() flags - REG_BASIC = 0 - REG_LITERAL = (REG_NOSUB shl 1) - REG_RIGHT_ASSOC = (REG_LITERAL shl 1) - REG_UNGREEDY = (REG_RIGHT_ASSOC shl 1) - - # POSIX regexec() flags - REG_NOTBOL = 1 - REG_NOTEOL = (REG_NOTBOL shl 1) - - # Extra regexec() flags - REG_APPROX_MATCHER = (REG_NOTEOL shl 1) - REG_BACKTRACKING_MATCHER = (REG_APPROX_MATCHER shl 1) - - ErrorMessages = [ - "No error", - "No match", - "Invalid regexp", - "Unknown collating element", - "Unknown character class name", - "Trailing backslash", - "Invalid back reference", - "Missing ']'", - "Missing ')'", - "Missing '}'", - "Invalid contents of {}", - "Invalid character range", - "Out of memory", - "Invalid use of repetition operators" - ] - -proc finalizeRegEx(x: TRegEx) = regfree(addr(x^)) - -proc re*(s: string): TRegEx = - ## Constructor of regular expressions. Note that Nimrod's - ## extended raw string literals supports this syntax ``re"[abc]"`` as - ## a short form for ``re(r"[abc]")``. - new(result, finalizeRegEx) - var err = int(regncomp(addr(result^), s, s.len, - cint(REG_EXTENDED or REG_NEWLINE))) - if err != 0: - var e: ref EInvalidRegEx - new(e) - e.msg = ErrorMessages[err] - raise e - -proc xre*(pattern: string): TRegEx = - ## deletes whitespace from a pattern that is not escaped or in a character - ## class. Then it constructs a regular expresion object via `re`. - ## This is modelled after Perl's ``/x`` modifier. - var p = "" - var i = 0 - while i < pattern.len: - case pattern[i] - of ' ', '\t': - inc i - of '\\': - add p, '\\' - add p, pattern[i+1] - inc i, 2 - of '[': - while pattern[i] != ']' and pattern[i] != '\0': - add p, pattern[i] - inc i - else: - add p, pattern[i] - inc i - result = re(p) - -proc rawmatch(s: string, pattern: TRegEx, matches: var openarray[string], - start: int): tuple[first, last: int] = - var - rawMatches: array [0..maxSubpatterns-1, TRegMatch] - cs = cstring(s) - res = int(regnexec(addr(pattern^), cast[cstring](addr(cs[start])), - s.len-start, maxSubpatterns, addr(rawMatches), cint(0))) - if res == 0: - for i in 0..min(matches.len, int(pattern.re_nsub))-1: - var a = int(rawMatches[i].so) - var b = int(rawMatches[i].eo) - echo "a: ", a, " b: ", b - if a >= 0 and b >= 0: - matches[i] = copy(s, a+start, b - 1 + start) - else: - matches[i] = "" - return (int(rawMatches[0].so), int(rawMatches[0].eo)-1) - return (-1, -1) - -proc match*(s: string, pattern: TRegEx, matches: var openarray[string], - start = 0): bool = - ## returns ``true`` if ``s[start..]`` matches the ``pattern`` and - ## the captured substrings in the array ``matches``. If it does not - ## match, nothing is written into ``matches`` and ``false`` is - ## returned. - result = rawmatch(s, pattern, matches, start).first == 0 - -proc match*(s: string, pattern: TRegEx, start: int = 0): bool = - ## returns ``true`` if ``s`` matches the ``pattern`` beginning - ## from ``start``. - var matches: array [0..0, string] - result = rawmatch(s, pattern, matches, start).first == 0 - -proc matchLen*(s: string, pattern: TRegEx, matches: var openarray[string], - start = 0): int = - ## the same as ``match``, but it returns the length of the match, - ## if there is no match, -1 is returned. Note that a match length - ## of zero can happen. - var (a, b) = rawmatch(s, pattern, matches, start) - result = a - b + 1 - -proc matchLen*(s: string, pattern: TRegEx, start = 0): int = - ## the same as ``match``, but it returns the length of the match, - ## if there is no match, -1 is returned. Note that a match length - ## of zero can happen. - var matches: array [0..0, string] - var (a, b) = rawmatch(s, pattern, matches, start) - result = a - b + 1 - -proc find*(s: string, pattern: TRegEx, matches: var openarray[string], - start = 0): int = - ## returns ``true`` if ``pattern`` occurs in ``s`` and the captured - ## substrings in the array ``matches``. If it does not match, nothing - ## is written into ``matches``. - result = rawmatch(s, pattern, matches, start).first - if result >= 0: inc(result, start) - -proc find*(s: string, pattern: TRegEx, start = 0): int = - ## returns ``true`` if ``pattern`` occurs in ``s``. - var matches: array [0..0, string] - result = rawmatch(s, pattern, matches, start).first - if result >= 0: inc(result, start) - -template `=~`*(s: string, pattern: TRegEx): expr = - ## This calls ``match`` with an implicit declared ``matches`` array that - ## can be used in the scope of the ``=~`` call: - ## - ## .. code-block:: nimrod - ## - ## if line =~ r"\s*(\w+)\s*\=\s*(\w+)": - ## # matches a key=value pair: - ## echo("Key: ", matches[1]) - ## echo("Value: ", matches[2]) - ## elif line =~ r"\s*(\#.*)": - ## # matches a comment - ## # note that the implicit ``matches`` array is different from the - ## # ``matches`` array of the first branch - ## echo("comment: ", matches[1]) - ## else: - ## echo("syntax error") - ## - when not definedInScope(matches): - var matches: array[0..maxSubPatterns-1, string] - match(s, pattern, matches) - -# ------------------------- more string handling ------------------------------ - -proc contains*(s: string, pattern: TRegEx, start = 0): bool = - ## same as ``find(s, pattern, start) >= 0`` - return find(s, pattern, start) >= 0 - -proc contains*(s: string, pattern: TRegEx, matches: var openArray[string], - start = 0): bool = - ## same as ``find(s, pattern, matches, start) >= 0`` - return find(s, pattern, matches, start) >= 0 - -proc startsWith*(s: string, prefix: TRegEx): bool = - ## returns true if `s` starts with the pattern `prefix` - result = matchLen(s, prefix) >= 0 - -proc endsWith*(s: string, suffix: TRegEx): bool = - ## returns true if `s` ends with the pattern `prefix` - for i in 0 .. s.len-1: - if matchLen(s, suffix, i) == s.len - i: return true - -proc replace*(s: string, sub: TRegEx, by: string): string = - ## Replaces `sub` in `s` by the string `by`. Captures can be accessed in `by` - ## with the notation ``$i`` and ``$#`` (see strutils.`%`). Examples: - ## - ## .. code-block:: nimrod - ## "var1=key; var2=key2".replace(re"(\w+)'='(\w+)", "$1<-$2$2") - ## - ## Results in: - ## - ## .. code-block:: nimrod - ## - ## "var1<-keykey; val2<-key2key2" - result = "" - var i = 0 - var caps: array[0..maxSubpatterns-1, string] - while i < s.len: - var x = matchLen(s, sub, caps, i) - if x <= 0: - add(result, s[i]) - inc(i) - else: - addf(result, by, caps) - inc(i, x) - # copy the rest: - add(result, copy(s, i)) - -proc parallelReplace*(s: string, subs: openArray[ - tuple[pattern: TRegEx, repl: string]]): string = - ## Returns a modified copy of `s` with the substitutions in `subs` - ## applied in parallel. - result = "" - var i = 0 - var caps: array[0..maxSubpatterns-1, string] - while i < s.len: - block searchSubs: - for j in 0..high(subs): - var x = matchLen(s, subs[j][0], caps, i) - if x > 0: - addf(result, subs[j][1], caps) - inc(i, x) - break searchSubs - add(result, s[i]) - inc(i) - # copy the rest: - add(result, copy(s, i)) - -proc transformFile*(infile, outfile: string, - subs: openArray[tuple[pattern: TRegEx, repl: string]]) = - ## reads in the file `infile`, performs a parallel replacement (calls - ## `parallelReplace`) and writes back to `outfile`. Calls ``quit`` if an - ## error occurs. This is supposed to be used for quick scripting. - var x = readFile(infile) - if not isNil(x): - var f: TFile - if open(f, outfile, fmWrite): - write(f, x.parallelReplace(subs)) - close(f) - else: - quit("cannot open for writing: " & outfile) - else: - quit("cannot open for reading: " & infile) - -iterator split*(s: string, sep: TRegEx): string = - ## Splits the string `s` into substrings. - ## - ## Substrings are separated by the regular expression `sep`. - ## Examples: - ## - ## .. code-block:: nimrod - ## for word in split("00232this02939is39an22example111", re"\d+"): - ## writeln(stdout, word) - ## - ## Results in: - ## - ## .. code-block:: nimrod - ## "this" - ## "is" - ## "an" - ## "example" - ## - var - first = 0 - last = 0 - while last < len(s): - var x = matchLen(s, sep, last) - if x > 0: inc(last, x) - first = last - while last < len(s): - inc(last) - x = matchLen(s, sep, last) - if x > 0: break - if first < last: - yield copy(s, first, last-1) - -proc split*(s: string, sep: TRegEx): seq[string] = - ## Splits the string `s` into substrings. - accumulateResult(split(s, sep)) - -const ## common regular expressions - reIdentifier* = r"\b[a-zA-Z_]+[a-zA-Z_0-9]*\b" ## describes an identifier - reNatural* = r"\b\d+\b" ## describes a natural number - reInteger* = r"\b[-+]?\d+\b" ## describes an integer - reHex* = r"\b0[xX][0-9a-fA-F]+\b" ## describes a hexadecimal number - reBinary* = r"\b0[bB][01]+\b" ## describes a binary number (example: 0b11101) - reOctal* = r"\b0[oO][0-7]+\b" ## describes an octal number (example: 0o777) - reFloat* = r"\b[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?\b" - ## describes a floating point number - reEmail* = r"\b[a-zA-Z0-9!#$%&'*+/=?^_`{|}~\-]+(?:\. &" & - r"[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)" & - r"*@(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?\.)+" & - r"(?:[a-zA-Z]{2}|com|org|" & - r"net|gov|mil|biz|info|mobi|name|aero|jobs|museum)\b" - ## describes a common email address - reURL* = r"\b(http(s)?|ftp|gopher|telnet|file|notes|ms\-help):" & - r"((//)|(\\\\))+[\w\d:#@%/;$()~_?\+\-\=\\\.\&]*\b" - ## describes an URL - -when isMainModule: - echo matchLen("key", re"[a-zA-Z_][a-zA-Z_0-9]*") - - var pattern = re"[a-zA-Z_][a-zA-Z_0-9]*\s*=\s*[a-zA-Z_][a-zA-Z_0-9]*" - echo matchLen("key1= cal9", pattern, 2) - - echo find("_____abc_______", re("abc"), 3) - #echo "var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") - #echo "var1=key; var2=key2".endsWith(peg"{\ident}'='{\ident}") - - if "abc" =~ re"(a)bc xyz|([a-z]+)": - echo matches[0] - else: - echo "BUG" - -# for word in split("00232this02939is39an22example111", peg"\d+"): -# writeln(stdout, word) diff --git a/nimlib/pure/regexprs.nim b/nimlib/pure/regexprs.nim deleted file mode 100755 index cff3152cf..000000000 --- a/nimlib/pure/regexprs.nim +++ /dev/null @@ -1,177 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2009 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## Regular expression support for Nimrod. -## Currently this module is implemented by providing a wrapper around the -## `PRCE (Perl-Compatible Regular Expressions) <http://www.pcre.org>`_ -## C library. This means that your application will depend on the PRCE -## library's licence when using this module, which should not be a problem -## though. -## PRCE's licence follows: -## -## .. include:: ../doc/regexprs.txt -## - -# This is not just a convenient wrapper for the pcre library; the -# API will stay the same if the implementation should change. - -import - pcre, strutils - -type - EInvalidRegEx* = object of EInvalidValue - ## is raised if the pattern is no valid regular expression. - -const - MaxSubpatterns* = 10 - ## defines the maximum number of subpatterns that can be captured. - ## More subpatterns cannot be captured! - -proc match*(s, pattern: string, matches: var openarray[string], - start: int = 0): bool - ## returns ``true`` if ``s[start..]`` matches the ``pattern`` and - ## the captured substrings in the array ``matches``. If it does not - ## match, nothing is written into ``matches`` and ``false`` is - ## returned. - -proc match*(s, pattern: string, start: int = 0): bool - ## returns ``true`` if ``s`` matches the ``pattern`` beginning from ``start``. - -proc matchLen*(s, pattern: string, matches: var openarray[string], - start: int = 0): int - ## the same as ``match``, but it returns the length of the match, - ## if there is no match, -1 is returned. Note that a match length - ## of zero can happen. - -proc find*(s, pattern: string, matches: var openarray[string], - start: int = 0): bool - ## returns ``true`` if ``pattern`` occurs in ``s`` and the captured - ## substrings in the array ``matches``. If it does not match, nothing - ## is written into ``matches``. - -proc find*(s, pattern: string, start: int = 0): bool - ## returns ``true`` if ``pattern`` occurs in ``s``. - -proc rawCompile(pattern: string, flags: cint): PPcre = - var - msg: CString - offset: int - com = pcreCompile(pattern, flags, addr(msg), addr(offset), nil) - if com == nil: - var e: ref EInvalidRegEx - new(e) - e.msg = $msg & "\n" & pattern & "\n" & repeatChar(offset) & "^\n" - raise e - return com - -proc matchOrFind(s: string, pattern: PPcre, matches: var openarray[string], - start: cint): cint = - var - rawMatches: array [0..maxSubpatterns * 3 - 1, cint] - res = int(pcreExec(pattern, nil, s, len(s), start, 0, - cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3)) - dealloc(pattern) - if res < 0: return res - for i in 0..res-1: - var - a = rawMatches[i * 2] - b = rawMatches[i * 2 + 1] - if a >= 0'i32: matches[i] = copy(s, a, int(b)-1) - else: matches[i] = "" - return res - -proc matchOrFind(s: string, pattern: PPcre, start: cint): cint = - var - rawMatches: array [0..maxSubpatterns * 3 - 1, cint] - res = pcreExec(pattern, nil, s, len(s), start, 0, - cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3) - dealloc(pattern) - return res - -proc match(s, pattern: string, matches: var openarray[string], - start: int = 0): bool = - return matchOrFind(s, rawCompile(pattern, PCRE_ANCHORED), - matches, start) >= 0'i32 - -proc matchLen(s, pattern: string, matches: var openarray[string], - start: int = 0): int = - return matchOrFind(s, rawCompile(pattern, PCRE_ANCHORED), matches, start) - -proc find(s, pattern: string, matches: var openarray[string], - start: int = 0): bool = - return matchOrFind(s, rawCompile(pattern, PCRE_MULTILINE), - matches, start) >= 0'i32 - -proc match(s, pattern: string, start: int = 0): bool = - return matchOrFind(s, rawCompile(pattern, PCRE_ANCHORED), start) >= 0'i32 - -proc find(s, pattern: string, start: int = 0): bool = - return matchOrFind(s, rawCompile(pattern, PCRE_MULTILINE), start) >= 0'i32 - -template `=~` *(s, pattern: expr): expr = - ## This calls ``match`` with an implicit declared ``matches`` array that - ## can be used in the scope of the ``=~`` call: - ## - ## .. code-block:: nimrod - ## - ## if line =~ r"\s*(\w+)\s*\=\s*(\w+)": - ## # matches a key=value pair: - ## echo("Key: ", matches[1]) - ## echo("Value: ", matches[2]) - ## elif line =~ r"\s*(\#.*)": - ## # matches a comment - ## # note that the implicit ``matches`` array is different from the - ## # ``matches`` array of the first branch - ## echo("comment: ", matches[1]) - ## else: - ## echo("syntax error") - ## - when not definedInScope(matches): - var matches: array[0..maxSubPatterns-1, string] - match(s, pattern, matches) - - -const ## common regular expressions - reIdentifier* = r"\b[a-zA-Z_][a-zA-Z_0-9]*\b" ## describes an identifier - reNatural* = r"\b\d+\b" ## describes a natural number - reInteger* = r"\b[-+]?\d+\b" ## describes an integer - reHex* = r"\b0[xX][0-9a-fA-F]+\b" ## describes a hexadecimal number - reBinary* = r"\b0[bB][01]+\b" ## describes a binary number (example: 0b11101) - reOctal* = r"\b0[oO][0-7]+\b" ## describes an octal number (example: 0o777) - reFloat* = r"\b[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?\b" - ## describes a floating point number - reEmail* = r"\b[a-zA-Z0-9!#$%&'*+/=?^_`{|}~\-]+(?:\.[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)" & - r"*@(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?\.)+(?:[a-zA-Z]{2}|com|org|" & - r"net|gov|mil|biz|info|mobi|name|aero|jobs|museum)\b" - ## describes a common email address - reURL* = r"\b(http(s)?|ftp|gopher|telnet|file|notes|ms\-help):" & - r"((//)|(\\\\))+[\w\d:#@%/;$()~_?\+\-\=\\\.\&]*\b" - ## describes an URL - -proc verbose*(pattern: string): string {.noSideEffect.} = - ## deletes whitespace from a pattern that is not escaped or in a character - ## class. This is modelled after Perl's ``/x`` modifier. - result = "" - var i = 0 - while i < pattern.len: - case pattern[i] - of ' ', '\t': - inc i - of '\\': - add result, '\\' - add result, pattern[i+1] - inc i, 2 - of '[': - while pattern[i] != ']' and pattern[i] != '\0': - add result, pattern[i] - inc i - else: - add result, pattern[i] - inc i - diff --git a/nimlib/pure/streams.nim b/nimlib/pure/streams.nim deleted file mode 100755 index f4d2911fc..000000000 --- a/nimlib/pure/streams.nim +++ /dev/null @@ -1,245 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2009 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module provides a stream interface and two implementations thereof: -## the `PFileStream` and the `PStringStream` which implement the stream -## interface for Nimrod file objects (`TFile`) and strings. Other modules -## may provide other implementations for this standard stream interface. - -proc newEIO(msg: string): ref EIO = - new(result) - result.msg = msg - -type - PStream* = ref TStream - TStream* = object of TObject ## Stream interface that supports - ## writing or reading. - close*: proc (s: PStream) - atEnd*: proc (s: PStream): bool - setPosition*: proc (s: PStream, pos: int) - getPosition*: proc (s: PStream): int - readData*: proc (s: PStream, buffer: pointer, bufLen: int): int - writeData*: proc (s: PStream, buffer: pointer, bufLen: int) - -proc write*[T](s: PStream, x: T) = - ## generic write procedure. Writes `x` to the stream `s`. Implementation: - ## - ## .. code-block:: Nimrod - ## - ## s.writeData(s, addr(x), sizeof(x)) - var x = x - s.writeData(s, addr(x), sizeof(x)) - -proc write*(s: PStream, x: string) = - ## writes the string `x` to the the stream `s`. No length field or - ## terminating zero is written. - s.writeData(s, cstring(x), x.len) - -proc read[T](s: PStream, result: var T) = - ## generic read procedure. Reads `result` from the stream `s`. - if s.readData(s, addr(result), sizeof(T)) != sizeof(T): - raise newEIO("cannot read from stream") - -proc readChar*(s: PStream): char = - ## reads a char from the stream `s`. Raises `EIO` if an error occured. - ## Returns '\0' as an EOF marker. - discard s.readData(s, addr(result), sizeof(result)) - -proc readBool*(s: PStream): bool = - ## reads a bool from the stream `s`. Raises `EIO` if an error occured. - read(s, result) - -proc readInt8*(s: PStream): int8 = - ## reads an int8 from the stream `s`. Raises `EIO` if an error occured. - read(s, result) - -proc readInt16*(s: PStream): int16 = - ## reads an int16 from the stream `s`. Raises `EIO` if an error occured. - read(s, result) - -proc readInt32*(s: PStream): int32 = - ## reads an int32 from the stream `s`. Raises `EIO` if an error occured. - read(s, result) - -proc readInt64*(s: PStream): int64 = - ## reads an int64 from the stream `s`. Raises `EIO` if an error occured. - read(s, result) - -proc readFloat32*(s: PStream): float32 = - ## reads a float32 from the stream `s`. Raises `EIO` if an error occured. - read(s, result) - -proc readFloat64*(s: PStream): float64 = - ## reads a float64 from the stream `s`. Raises `EIO` if an error occured. - read(s, result) - -proc readStr*(s: PStream, length: int): string = - ## reads a string of length `length` from the stream `s`. Raises `EIO` if - ## an error occured. - result = newString(length) - var L = s.readData(s, addr(result[0]), length) - if L != length: setLen(result, L) - -proc readLine*(s: PStream): string = - ## Reads a line from a stream `s`. Note: This is not very efficient. Raises - ## `EIO` if an error occured. - result = "" - while not s.atEnd(s): - var c = readChar(s) - if c == '\c': - c = readChar(s) - break - elif c == '\L' or c == '\0': break - result.add(c) - -type - PStringStream* = ref TStringStream ## a stream that encapsulates a string - TStringStream* = object of TStream - data*: string - pos: int - -proc ssAtEnd(s: PStringStream): bool = - return s.pos >= s.data.len - -proc ssSetPosition(s: PStringStream, pos: int) = - s.pos = min(pos, s.data.len-1) - -proc ssGetPosition(s: PStringStream): int = - return s.pos - -proc ssReadData(s: PStringStream, buffer: pointer, bufLen: int): int = - result = min(bufLen, s.data.len - s.pos) - if result > 0: - copyMem(buffer, addr(s.data[s.pos]), result) - inc(s.pos, result) - -proc ssWriteData(s: PStringStream, buffer: pointer, bufLen: int) = - if bufLen > 0: - setLen(s.data, s.data.len + bufLen) - copyMem(addr(s.data[s.pos]), buffer, bufLen) - inc(s.pos, bufLen) - -proc ssClose(s: PStringStream) = - s.data = nil - -proc newStringStream*(s: string = ""): PStringStream = - ## creates a new stream from the string `s`. - new(result) - result.data = s - result.pos = 0 - result.close = ssClose - result.atEnd = ssAtEnd - result.setPosition = ssSetPosition - result.getPosition = ssGetPosition - result.readData = ssReadData - result.writeData = ssWriteData - -type - PFileStream* = ref TFileStream ## a stream that encapsulates a `TFile` - TFileStream* = object of TStream - f: TFile - -proc fsClose(s: PFileStream) = close(s.f) -proc fsAtEnd(s: PFileStream): bool = return EndOfFile(s.f) -proc fsSetPosition(s: PFileStream, pos: int) = setFilePos(s.f, pos) -proc fsGetPosition(s: PFileStream): int = return int(getFilePos(s.f)) - -proc fsReadData(s: PFileStream, buffer: pointer, bufLen: int): int = - result = readBuffer(s.f, buffer, bufLen) - -proc fsWriteData(s: PFileStream, buffer: pointer, bufLen: int) = - if writeBuffer(s.f, buffer, bufLen) != bufLen: - raise newEIO("cannot write to stream") - -proc newFileStream*(f: TFile): PFileStream = - ## creates a new stream from the file `f`. - new(result) - result.f = f - result.close = fsClose - result.atEnd = fsAtEnd - result.setPosition = fsSetPosition - result.getPosition = fsGetPosition - result.readData = fsReadData - result.writeData = fsWriteData - -proc newFileStream*(filename: string, mode: TFileMode): PFileStream = - ## creates a new stream from the file named `filename` with the mode `mode`. - ## If the file cannot be opened, nil is returned. - var f: TFile - if Open(f, filename, mode): result = newFileStream(f) - - -when true: - nil -else: - type - TFileHandle* = cint ## Operating system file handle - PFileHandleStream* = ref TFileHandleStream - TFileHandleStream* = object of TStream - handle*: TFileHandle - pos: int - - proc newEOS(msg: string): ref EOS = - new(result) - result.msg = msg - - proc hsGetPosition(s: PFileHandleStream): int = - return s.pos - - when defined(windows): - # do not import windows as this increases compile times: - nil - else: - import posix - - proc hsSetPosition(s: PFileHandleStream, pos: int) = - discard lseek(s.handle, pos, SEEK_SET) - - proc hsClose(s: PFileHandleStream) = discard close(s.handle) - proc hsAtEnd(s: PFileHandleStream): bool = - var pos = hsGetPosition(s) - var theEnd = lseek(s.handle, 0, SEEK_END) - result = pos >= theEnd - hsSetPosition(s, pos) # set position back - - proc hsReadData(s: PFileHandleStream, buffer: pointer, bufLen: int): int = - result = posix.read(s.handle, buffer, bufLen) - inc(s.pos, result) - - proc hsWriteData(s: PFileHandleStream, buffer: pointer, bufLen: int) = - if posix.write(s.handle, buffer, bufLen) != bufLen: - raise newEIO("cannot write to stream") - inc(s.pos, bufLen) - - proc newFileHandleStream*(handle: TFileHandle): PFileHandleStream = - new(result) - result.handle = handle - result.pos = 0 - result.close = hsClose - result.atEnd = hsAtEnd - result.setPosition = hsSetPosition - result.getPosition = hsGetPosition - result.readData = hsReadData - result.writeData = hsWriteData - - proc newFileHandleStream*(filename: string, - mode: TFileMode): PFileHandleStream = - when defined(windows): - nil - else: - var flags: cint - case mode - of fmRead: flags = posix.O_RDONLY - of fmWrite: flags = O_WRONLY or int(O_CREAT) - of fmReadWrite: flags = O_RDWR or int(O_CREAT) - of fmReadWriteExisting: flags = O_RDWR - of fmAppend: flags = O_WRONLY or int(O_CREAT) or O_APPEND - var handle = open(filename, flags) - if handle < 0: raise newEOS("posix.open() call failed") - result = newFileHandleStream(handle) diff --git a/nimlib/pure/strtabs.nim b/nimlib/pure/strtabs.nim deleted file mode 100755 index 10cd0b933..000000000 --- a/nimlib/pure/strtabs.nim +++ /dev/null @@ -1,198 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2008 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## The ``strtabs`` module implements an efficient hash table that is a mapping -## from strings to strings. Supports a case-sensitive, case-insensitive and -## style-insensitive mode. An efficient string substitution operator ``%`` -## for the string table is also provided. - -import - os, hashes, strutils - -type - TStringTableMode* = enum ## describes the tables operation mode - modeCaseSensitive, ## the table is case sensitive - modeCaseInsensitive, ## the table is case insensitive - modeStyleInsensitive ## the table is style insensitive - TKeyValuePair = tuple[key, val: string] - TKeyValuePairSeq = seq[TKeyValuePair] - TStringTable* = object of TObject - counter: int - data: TKeyValuePairSeq - mode: TStringTableMode - - PStringTable* = ref TStringTable ## use this type to declare string tables - -proc newStringTable*(keyValuePairs: openarray[string], - mode: TStringTableMode = modeCaseSensitive): PStringTable - ## creates a new string table with given key value pairs. - ## Example:: - ## var mytab = newStringTable("key1", "val1", "key2", "val2", - ## modeCaseInsensitive) - -proc newStringTable*(mode: TStringTableMode = modeCaseSensitive): PStringTable - ## creates a new string table that is empty. - -proc `[]=`*(t: PStringTable, key, val: string) - ## puts a (key, value)-pair into `t`. - -proc `[]`*(t: PStringTable, key: string): string - ## retrieves the value at ``t[key]``. If `key` is not in `t`, "" is returned - ## and no exception is raised. One can check with ``hasKey`` whether the key - ## exists. - -proc hasKey*(t: PStringTable, key: string): bool - ## returns true iff `key` is in the table `t`. - -proc len*(t: PStringTable): int = - ## returns the number of keys in `t`. - result = t.counter - -iterator pairs*(t: PStringTable): tuple[key, value: string] = - ## iterates over any (key, value) pair in the table `t`. - for h in 0..high(t.data): - if not isNil(t.data[h].key): - yield (t.data[h].key, t.data[h].val) - -type - TFormatFlag* = enum ## flags for the `%` operator - useEnvironment, ## use environment variable if the ``$key`` - ## is not found in the table - useEmpty, ## use the empty string as a default, thus it - ## won't throw an exception if ``$key`` is not - ## in the table - useKey ## do not replace ``$key`` if it is not found - ## in the table (or in the environment) - -proc `%`*(f: string, t: PStringTable, flags: set[TFormatFlag] = {}): string - ## The `%` operator for string tables. - -# implementation - -const - growthFactor = 2 - startSize = 64 - -proc newStringTable(mode: TStringTableMode = modeCaseSensitive): PStringTable = - new(result) - result.mode = mode - result.counter = 0 - newSeq(result.data, startSize) - -proc newStringTable(keyValuePairs: openarray[string], - mode: TStringTableMode = modeCaseSensitive): PStringTable = - result = newStringTable(mode) - var i = 0 - while i < high(keyValuePairs): - result[keyValuePairs[i]] = keyValuePairs[i + 1] - inc(i, 2) - -proc myhash(t: PStringTable, key: string): THash = - case t.mode - of modeCaseSensitive: result = hashes.hash(key) - of modeCaseInsensitive: result = hashes.hashIgnoreCase(key) - of modeStyleInsensitive: result = hashes.hashIgnoreStyle(key) - -proc myCmp(t: PStringTable, a, b: string): bool = - case t.mode - of modeCaseSensitive: result = cmp(a, b) == 0 - of modeCaseInsensitive: result = cmpIgnoreCase(a, b) == 0 - of modeStyleInsensitive: result = cmpIgnoreStyle(a, b) == 0 - -proc mustRehash(length, counter: int): bool = - assert(length > counter) - result = (length * 2 < counter * 3) or (length - counter < 4) - -proc nextTry(h, maxHash: THash): THash = - result = ((5 * h) + 1) and maxHash - -proc RawGet(t: PStringTable, key: string): int = - var h: THash - h = myhash(t, key) and high(t.data) # start with real hash value - while not isNil(t.data[h].key): - if mycmp(t, t.data[h].key, key): - return h - h = nextTry(h, high(t.data)) - result = - 1 - -proc `[]`(t: PStringTable, key: string): string = - var index: int - index = RawGet(t, key) - if index >= 0: result = t.data[index].val - else: result = "" - -proc hasKey(t: PStringTable, key: string): bool = - result = rawGet(t, key) >= 0 - -proc RawInsert(t: PStringTable, data: var TKeyValuePairSeq, key, val: string) = - var h: THash - h = myhash(t, key) and high(data) - while not isNil(data[h].key): - h = nextTry(h, high(data)) - data[h].key = key - data[h].val = val - -proc Enlarge(t: PStringTable) = - var n: TKeyValuePairSeq - newSeq(n, len(t.data) * growthFactor) - for i in countup(0, high(t.data)): - if not isNil(t.data[i].key): RawInsert(t, n, t.data[i].key, t.data[i].val) - swap(t.data, n) - -proc `[]=`(t: PStringTable, key, val: string) = - var index = RawGet(t, key) - if index >= 0: - t.data[index].val = val - else: - if mustRehash(len(t.data), t.counter): Enlarge(t) - RawInsert(t, t.data, key, val) - inc(t.counter) - -proc RaiseFormatException(s: string) = - var e: ref EInvalidValue - new(e) - e.msg = "format string: key not found: " & s - raise e - -proc getValue(t: PStringTable, flags: set[TFormatFlag], key: string): string = - if hasKey(t, key): return t[key] - if useEnvironment in flags: result = os.getEnv(key) - else: result = "" - if result.len == 0: - if useKey in flags: result = '$' & key - elif not (useEmpty in flags): raiseFormatException(key) - -proc `%`(f: string, t: PStringTable, flags: set[TFormatFlag] = {}): string = - const - PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\x80'..'\xFF'} - result = "" - var i = 0 - while i < len(f): - if f[i] == '$': - case f[i+1] - of '$': - add(result, '$') - inc(i, 2) - of '{': - var j = i + 1 - while j < f.len and f[j] != '}': inc(j) - add(result, getValue(t, flags, copy(f, i+2, j-1))) - i = j + 1 - of 'a'..'z', 'A'..'Z', '\x80'..'\xFF', '_': - var j = i + 1 - while j < f.len and f[j] in PatternChars: inc(j) - add(result, getValue(t, flags, copy(f, i+1, j-1))) - i = j - else: - add(result, f[i]) - inc(i) - else: - add(result, f[i]) - inc(i) - diff --git a/nimlib/pure/strutils.nim b/nimlib/pure/strutils.nim deleted file mode 100755 index d7fd69f61..000000000 --- a/nimlib/pure/strutils.nim +++ /dev/null @@ -1,973 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2009 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module contains various string utility routines. -## See the module `regexprs` for regular expression support. - -{.deadCodeElim: on.} - -{.push debugger:off .} # the user does not want to trace a part - # of the standard library! - -# copied from excpt.nim, because I don't want to make this template public -template newException(exceptn, message: expr): expr = - block: # open a new scope - var - e: ref exceptn - new(e) - e.msg = message - e - - -type - TCharSet* = set[char] # for compatibility with Nim - -const - Whitespace* = {' ', '\t', '\v', '\r', '\l', '\f'} - ## All the characters that count as whitespace. - - Letters* = {'A'..'Z', 'a'..'z'} - ## the set of letters - - Digits* = {'0'..'9'} - ## the set of digits - - IdentChars* = {'a'..'z', 'A'..'Z', '0'..'9', '_'} - ## the set of characters an identifier can consist of - - IdentStartChars* = {'a'..'z', 'A'..'Z', '_'} - ## the set of characters an identifier can start with - -proc `%` *(formatstr: string, a: openarray[string]): string {.noSideEffect.} - ## The `substitution`:idx: operator performs string substitutions in - ## `formatstr` and returns a modified `formatstr`. This is often called - ## `string interpolation`:idx:. - ## - ## This is best explained by an example: - ## - ## .. code-block:: nimrod - ## "$1 eats $2." % ["The cat", "fish"] - ## - ## Results in: - ## - ## .. code-block:: nimrod - ## "The cat eats fish." - ## - ## The substitution variables (the thing after the ``$``) are enumerated - ## from 1 to ``a.len``. - ## The notation ``$#`` can be used to refer to the next substitution variable: - ## - ## .. code-block:: nimrod - ## "$# eats $#." % ["The cat", "fish"] - ## - ## Substitution variables can also be words (that is - ## ``[A-Za-z_]+[A-Za-z0-9_]*``) in which case the arguments in `a` with even - ## indices are keys and with odd indices are the corresponding values. - ## An example: - ## - ## .. code-block:: nimrod - ## "$animal eats $food." % ["animal", "The cat", "food", "fish"] - ## - ## Results in: - ## - ## .. code-block:: nimrod - ## "The cat eats fish." - ## - ## The variables are compared with `cmpIgnoreStyle`. `EInvalidValue` is - ## raised if an ill-formed format string has been passed to the `%` operator. - -proc `%` *(formatstr, a: string): string {.noSideEffect.} - ## This is the same as ``formatstr % [a]``. - -proc addf*(s: var string, formatstr: string, a: openarray[string]) - ## The same as ``add(s, formatstr % a)``, but more efficient. - -proc strip*(s: string, leading = true, trailing = true): string {.noSideEffect.} - ## Strips whitespace from `s` and returns the resulting string. - ## If `leading` is true, leading whitespace is stripped. - ## If `trailing` is true, trailing whitespace is stripped. - -proc toLower*(s: string): string {.noSideEffect, procvar.} - ## Converts `s` into lower case. This works only for the letters A-Z. - ## See `unicode.toLower` for a version that works for any Unicode character. - -proc toLower*(c: Char): Char {.noSideEffect, procvar.} - ## Converts `c` into lower case. This works only for the letters A-Z. - ## See `unicode.toLower` for a version that works for any Unicode character. - -proc toUpper*(s: string): string {.noSideEffect, procvar.} - ## Converts `s` into upper case. This works only for the letters a-z. - ## See `unicode.toUpper` for a version that works for any Unicode character. - -proc toUpper*(c: Char): Char {.noSideEffect, procvar.} - ## Converts `c` into upper case. This works only for the letters a-z. - ## See `unicode.toUpper` for a version that works for any Unicode character. - -proc capitalize*(s: string): string {.noSideEffect, procvar.} - ## Converts the first character of `s` into upper case. - ## This works only for the letters a-z. - -proc normalize*(s: string): string {.noSideEffect, procvar.} - ## Normalizes the string `s`. That means to convert it to lower case and - ## remove any '_'. This is needed for Nimrod identifiers for example. - -proc find*(s, sub: string, start: int = 0): int {.noSideEffect.} - ## Searches for `sub` in `s` starting at position `start`. Searching is - ## case-sensitive. If `sub` is not in `s`, -1 is returned. - -proc find*(s: string, sub: char, start: int = 0): int {.noSideEffect.} - ## Searches for `sub` in `s` starting at position `start`. Searching is - ## case-sensitive. If `sub` is not in `s`, -1 is returned. - -proc find*(s: string, chars: set[char], start: int = 0): int {.noSideEffect.} - ## Searches for `chars` in `s` starting at position `start`. If `s` contains - ## none of the characters in `chars`, -1 is returned. - -proc replaceStr*(s, sub, by: string): string {.noSideEffect, deprecated.} - ## Replaces `sub` in `s` by the string `by`. - ## **Deprecated since version 0.8.0**: Use `replace` instead. - -proc replaceStr*(s: string, sub, by: char): string {.noSideEffect, deprecated.} - ## optimized version for characters. - ## **Deprecated since version 0.8.0**: Use `replace` instead. - -proc deleteStr*(s: var string, first, last: int) {.deprecated.} - ## Deletes in `s` the characters at position `first`..`last`. This modifies - ## `s` itself, it does not return a copy. - ## **Deprecated since version 0.8.0**: Use `delete` instead. - -proc toOctal*(c: char): string - ## Converts a character `c` to its octal representation. The resulting - ## string may not have a leading zero. Its length is always exactly 3. - -iterator split*(s: string, seps: set[char] = Whitespace): string = - ## Splits the string `s` into substrings. - ## - ## Substrings are separated by a substring containing only `seps`. - ## Examples: - ## - ## .. code-block:: nimrod - ## for word in split(" this is an example "): - ## writeln(stdout, word) - ## - ## Results in: - ## - ## .. code-block:: nimrod - ## "this" - ## "is" - ## "an" - ## "example" - ## - ## for word in split(";;this;is;an;;example;;;", {';'}): - ## writeln(stdout, word) - ## - ## produces the same output. - var last = 0 - assert(not ('\0' in seps)) - while last < len(s): - while s[last] in seps: inc(last) - var first = last - while last < len(s) and s[last] not_in seps: inc(last) # BUGFIX! - if first <= last-1: - yield copy(s, first, last-1) - -iterator split*(s: string, sep: char): string = - ## Splits the string `s` into substrings. - ## - ## Substrings are separated by the character `sep`. - ## Example: - ## - ## .. code-block:: nimrod - ## for word in split(";;this;is;an;;example;;;", ';'): - ## writeln(stdout, word) - ## - ## Results in: - ## - ## .. code-block:: nimrod - ## "" - ## "" - ## "this" - ## "is" - ## "an" - ## "" - ## "example" - ## "" - ## "" - ## "" - ## - var last = 0 - assert('\0' != sep) - if len(s) > 0: - # `<=` is correct here for the edge cases! - while last <= len(s): - var first = last - while last < len(s) and s[last] != sep: inc(last) - yield copy(s, first, last-1) - inc(last) - -iterator splitLines*(s: string): string = - ## Splits the string `s` into its containing lines. Every newline - ## combination (CR, LF, CR-LF) is supported. The result strings contain - ## no trailing ``\n``. - ## - ## Example: - ## - ## .. code-block:: nimrod - ## for line in lines("\nthis\nis\nan\n\nexample\n"): - ## writeln(stdout, line) - ## - ## Results in: - ## - ## .. code-block:: nimrod - ## "" - ## "this" - ## "is" - ## "an" - ## "" - ## "example" - ## "" - var first = 0 - var last = 0 - while true: - while s[last] notin {'\0', '\c', '\l'}: inc(last) - yield copy(s, first, last-1) - # skip newlines: - if s[last] == '\l': inc(last) - elif s[last] == '\c': - inc(last) - if s[last] == '\l': inc(last) - else: break # was '\0' - first = last - -proc splitLinesSeq*(s: string): seq[string] {.noSideEffect, deprecated.} = - ## The same as `splitLines`, but is a proc that returns a sequence - ## of substrings. - ## **Deprecated since version 0.8.0**: Use `splitLines` instead. - accumulateResult(splitLines(s)) - -proc splitSeq*(s: string, seps: set[char] = Whitespace): seq[string] {. - noSideEffect, deprecated.} = - ## The same as `split`, but is a proc that returns a sequence of substrings. - ## **Deprecated since version 0.8.0**: Use `split` instead. - accumulateResult(split(s, seps)) - -proc splitSeq*(s: string, sep: char): seq[string] {.noSideEffect, - deprecated.} = - ## The same as `split`, but is a proc that returns a sequence of substrings. - ## **Deprecated since version 0.8.0**: Use `split` instead. - accumulateResult(split(s, sep)) - -proc splitLines*(s: string): seq[string] {.noSideEffect.} = - ## The same as the `splitLines` iterator, but is a proc that returns a - ## sequence of substrings. - accumulateResult(splitLines(s)) - -proc split*(s: string, seps: set[char] = Whitespace): seq[string] {. - noSideEffect.} = - ## The same as the `split` iterator, but is a proc that returns a - ## sequence of substrings. - accumulateResult(split(s, seps)) - -proc split*(s: string, sep: char): seq[string] {.noSideEffect.} = - ## The same as the `split` iterator, but is a proc that returns a sequence - ## of substrings. - accumulateResult(split(s, sep)) - -proc cmpIgnoreCase*(a, b: string): int {.noSideEffect.} - ## Compares two strings in a case insensitive manner. Returns: - ## - ## | 0 iff a == b - ## | < 0 iff a < b - ## | > 0 iff a > b - -proc cmpIgnoreStyle*(a, b: string): int {.noSideEffect.} - ## Compares two strings normalized (i.e. case and - ## underscores do not matter). Returns: - ## - ## | 0 iff a == b - ## | < 0 iff a < b - ## | > 0 iff a > b - -proc contains*(s: string, c: char): bool {.noSideEffect.} - ## Same as ``find(s, c) >= 0``. - -proc contains*(s, sub: string): bool {.noSideEffect.} - ## Same as ``find(s, sub) >= 0``. - -proc contains*(s: string, chars: set[char]): bool {.noSideEffect.} - ## Same as ``find(s, chars) >= 0``. - -proc toHex*(x: BiggestInt, len: int): string {.noSideEffect.} - ## Converts `x` to its hexadecimal representation. The resulting string - ## will be exactly `len` characters long. No prefix like ``0x`` - ## is generated. `x` is treated as an unsigned value. - -proc intToStr*(x: int, minchars: int = 1): string - ## Converts `x` to its decimal representation. The resulting string - ## will be minimally `minchars` characters long. This is achieved by - ## adding leading zeros. - -proc ParseInt*(s: string): int {.noSideEffect, procvar.} - ## Parses a decimal integer value contained in `s`. If `s` is not - ## a valid integer, `EInvalidValue` is raised. - -proc ParseBiggestInt*(s: string): biggestInt {.noSideEffect, procvar.} - ## Parses a decimal integer value contained in `s`. If `s` is not - ## a valid integer, `EInvalidValue` is raised. - -proc ParseFloat*(s: string): float {.noSideEffect, procvar.} - ## Parses a decimal floating point value contained in `s`. If `s` is not - ## a valid floating point number, `EInvalidValue` is raised. ``NAN``, - ## ``INF``, ``-INF`` are also supported (case insensitive comparison). - -# the stringify and format operators: -proc toString*[Ty](x: Ty): string {.deprecated.} - ## This generic proc is the same as the stringify operator `$`. - ## - ## **Deprecated since version 0.8.2:** Use `$` instead. - -proc repeatChar*(count: int, c: Char = ' '): string - ## Returns a string of length `count` consisting only of - ## the character `c`. - -proc startsWith*(s, prefix: string): bool {.noSideEffect.} - ## Returns true iff ``s`` starts with ``prefix``. - ## If ``prefix == ""`` true is returned. - -proc endsWith*(s, suffix: string): bool {.noSideEffect.} - ## Returns true iff ``s`` ends with ``suffix``. - ## If ``suffix == ""`` true is returned. - -proc addSep*(dest: var string, sep = ", ", startLen = 0) {.noSideEffect, - inline.} = - ## A shorthand for: - ## - ## .. code-block:: nimrod - ## if dest.len > startLen: add(dest, sep) - ## - ## This is often useful for generating some code where the items need to - ## be *separated* by `sep`. `sep` is only added if `dest` is longer than - ## `startLen`. The following example creates a string describing - ## an array of integers: - ## - ## .. code-block:: nimrod - ## var arr = "[" - ## for x in items([2, 3, 5, 7, 11]): - ## addSep(arr, startLen=len("[")) - ## add(arr, $x) - ## add(arr, "]") - if dest.len > startLen: add(dest, sep) - -proc allCharsInSet*(s: string, theSet: TCharSet): bool = - ## returns true iff each character of `s` is in the set `theSet`. - for c in items(s): - if c notin theSet: return false - return true - -proc quoteIfContainsWhite*(s: string): string = - ## returns ``'"' & s & '"'`` if `s` contains a space and does not - ## start with a quote, else returns `s` - if find(s, {' ', '\t'}) >= 0 and s[0] != '"': - result = '"' & s & '"' - else: - result = s - -proc startsWith(s, prefix: string): bool = - var i = 0 - while true: - if prefix[i] == '\0': return true - if s[i] != prefix[i]: return false - inc(i) - -proc endsWith(s, suffix: string): bool = - var - i = 0 - j = len(s) - len(suffix) - while true: - if suffix[i] == '\0': return true - if s[i+j] != suffix[i]: return false - inc(i) - -when false: - proc abbrev(s: string, possibilities: openarray[string]): int = - ## returns the index of the first item in `possibilities` if not - ## ambiguous; -1 if no item has been found; -2 if multiple items - ## match. - result = -1 # none found - for i in 0..possibilities.len-1: - if possibilities[i].startsWith(s): - if result >= 0: return -2 # ambiguous - result = i - -proc repeatChar(count: int, c: Char = ' '): string = - result = newString(count) - for i in 0..count-1: - result[i] = c - -proc intToStr(x: int, minchars: int = 1): string = - result = $abs(x) - for i in 1 .. minchars - len(result): - result = '0' & result - if x < 0: - result = '-' & result - -proc toString[Ty](x: Ty): string = return $x - -proc toOctal(c: char): string = - result = newString(3) - var val = ord(c) - for i in countdown(2, 0): - result[i] = Chr(val mod 8 + ord('0')) - val = val div 8 - -proc `%`(formatstr: string, a: string): string = - return formatstr % [a] - -proc findNormalized(x: string, inArray: openarray[string]): int = - var i = 0 - while i < high(inArray): - if cmpIgnoreStyle(x, inArray[i]) == 0: return i - inc(i, 2) # incrementing by 1 would probably result in a - # security whole ... - return -1 - -proc addf(s: var string, formatstr: string, a: openarray[string]) = - const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '\128'..'\255', '_'} - var i = 0 - var num = 0 - while i < len(formatstr): - if formatstr[i] == '$': - case formatstr[i+1] # again we use the fact that strings - # are zero-terminated here - of '#': - add s, a[num] - inc i, 2 - inc num - of '$': - add s, '$' - inc(i, 2) - of '1'..'9': - var j = 0 - inc(i) # skip $ - while formatstr[i] in {'0'..'9'}: - j = j * 10 + ord(formatstr[i]) - ord('0') - inc(i) - num = j - add s, a[j - 1] - of '{': - var j = i+1 - while formatstr[j] notin {'\0', '}'}: inc(j) - var x = findNormalized(copy(formatstr, i+2, j-1), a) - if x >= 0 and x < high(a): add s, a[x+1] - else: raise newException(EInvalidValue, "invalid format string") - i = j+1 - of 'a'..'z', 'A'..'Z', '\128'..'\255', '_': - var j = i+1 - while formatstr[j] in PatternChars: inc(j) - var x = findNormalized(copy(formatstr, i+1, j-1), a) - if x >= 0 and x < high(a): add s, a[x+1] - else: raise newException(EInvalidValue, "invalid format string") - i = j - else: raise newException(EInvalidValue, "invalid format string") - else: - add s, formatstr[i] - inc(i) - -proc `%`(formatstr: string, a: openarray[string]): string = - result = "" - addf(result, formatstr, a) - -proc cmpIgnoreCase(a, b: string): int = - # makes usage of the fact that strings are zero-terminated - for i in 0..len(a)-1: - var aa = toLower(a[i]) - var bb = toLower(b[i]) - result = ord(aa) - ord(bb) - if result != 0: break - -{.push checks: off, line_trace: off .} # this is a hot-spot in the compiler! - # thus we compile without checks here - -proc cmpIgnoreStyle(a, b: string): int = - var i = 0 - var j = 0 - while True: - while a[i] == '_': inc(i) - while b[j] == '_': inc(j) # BUGFIX: typo - var aa = toLower(a[i]) - var bb = toLower(b[j]) - result = ord(aa) - ord(bb) - if result != 0 or aa == '\0': break - inc(i) - inc(j) - -{.pop.} - -# --------------------------------------------------------------------------- - -proc join*(a: openArray[string], sep: string): string = - ## concatenates all strings in `a` separating them with `sep`. - if len(a) > 0: - var L = sep.len * (a.len-1) - for i in 0..high(a): inc(L, a[i].len) - result = newString(L) - setLen(result, 0) - add(result, a[0]) - for i in 1..high(a): - add(result, sep) - add(result, a[i]) - else: - result = "" - -proc join*(a: openArray[string]): string = - ## concatenates all strings in `a`. - if len(a) > 0: - var L = 0 - for i in 0..high(a): inc(L, a[i].len) - result = newString(L) - setLen(result, 0) - for i in 0..high(a): add(result, a[i]) - else: - result = "" - -proc strip(s: string, leading = true, trailing = true): string = - const - chars: set[Char] = Whitespace - var - first = 0 - last = len(s)-1 - if leading: - while s[first] in chars: inc(first) - if trailing: - while last >= 0 and s[last] in chars: dec(last) - result = copy(s, first, last) - -proc toLower(c: Char): Char = - if c in {'A'..'Z'}: - result = chr(ord(c) + (ord('a') - ord('A'))) - else: - result = c - -proc toLower(s: string): string = - result = newString(len(s)) - for i in 0..len(s) - 1: - result[i] = toLower(s[i]) - -proc toUpper(c: Char): Char = - if c in {'a'..'z'}: - result = Chr(Ord(c) - (Ord('a') - Ord('A'))) - else: - result = c - -proc toUpper(s: string): string = - result = newString(len(s)) - for i in 0..len(s) - 1: - result[i] = toUpper(s[i]) - -proc capitalize(s: string): string = - result = toUpper(s[0]) & copy(s, 1) - -proc normalize(s: string): string = - result = "" - for i in 0..len(s) - 1: - if s[i] in {'A'..'Z'}: - add result, Chr(Ord(s[i]) + (Ord('a') - Ord('A'))) - elif s[i] != '_': - add result, s[i] - -type - TSkipTable = array[Char, int] - -proc preprocessSub(sub: string, a: var TSkipTable) = - var m = len(sub) - for i in 0..0xff: a[chr(i)] = m+1 - for i in 0..m-1: a[sub[i]] = m-i - -proc findAux(s, sub: string, start: int, a: TSkipTable): int = - # fast "quick search" algorithm: - var - m = len(sub) - n = len(s) - # search: - var j = start - while j <= n - m: - block match: - for k in 0..m-1: - if sub[k] != s[k+j]: break match - return j - inc(j, a[s[j+m]]) - return -1 - -proc find(s, sub: string, start: int = 0): int = - var a: TSkipTable - preprocessSub(sub, a) - result = findAux(s, sub, start, a) - -proc find(s: string, sub: char, start: int = 0): int = - for i in start..len(s)-1: - if sub == s[i]: return i - return -1 - -proc find(s: string, chars: set[char], start: int = 0): int = - for i in start..s.len-1: - if s[i] in chars: return i - return -1 - -proc contains(s: string, chars: set[char]): bool = - return find(s, chars) >= 0 - -proc contains(s: string, c: char): bool = - return find(s, c) >= 0 - -proc contains(s, sub: string): bool = - return find(s, sub) >= 0 - -proc replace*(s, sub, by: string): string = - ## Replaces `sub` in `s` by the string `by`. - var a: TSkipTable - result = "" - preprocessSub(sub, a) - var i = 0 - while true: - var j = findAux(s, sub, i, a) - if j < 0: break - add result, copy(s, i, j - 1) - add result, by - i = j + len(sub) - # copy the rest: - add result, copy(s, i) - -proc replace*(s: string, sub, by: char): string = - ## optimized version for characters. - result = newString(s.len) - var i = 0 - while i < s.len: - if s[i] == sub: result[i] = by - else: result[i] = s[i] - inc(i) - -proc delete*(s: var string, first, last: int) = - ## Deletes in `s` the characters at position `first`..`last`. This modifies - ## `s` itself, it does not return a copy. - var - i = first - # example: "abc___uvwxyz\0" (___ is to be deleted) - # --> first == 3, last == 5 - # s[first..] = s[last+1..] - while last+i+1 < len(s): - s[i] = s[last+i+1] - inc(i) - setlen(s, len(s)-(last-first+1)) - -proc replaceStr(s, sub, by: string): string = return replace(s, sub, by) -proc replaceStr(s: string, sub, by: char): string = return replace(s, sub, by) -proc deleteStr*(s: var string, first, last: int) = delete(s, first, last) - -# parsing numbers: - -proc toHex(x: BiggestInt, len: int): string = - const - HexChars = "0123456789ABCDEF" - var - shift: BiggestInt - result = newString(len) - for j in countdown(len-1, 0): - result[j] = HexChars[toU32(x shr shift) and 0xF'i32] - shift = shift + 4 - -{.push overflowChecks: on.} -# this must be compiled with overflow checking turned on: -proc rawParseInt(s: string, index: var int): BiggestInt = - # index contains the start position at proc entry; end position will be - # an index before the proc returns; index = -1 on error (no number at all) - # the problem here is that integers have an asymmetrical range: there is - # one more valid negative than prositive integer. Thus we perform the - # computation as a negative number and then change the sign at the end. - var - i = index # a local i is more efficient than accessing a var parameter - sign: BiggestInt = -1 - if s[i] == '+': - inc(i) - elif s[i] == '-': - inc(i) - sign = 1 - if s[i] in {'0'..'9'}: - result = 0 - while s[i] in {'0'..'9'}: - result = result * 10 - (ord(s[i]) - ord('0')) - inc(i) - while s[i] == '_': - inc(i) # underscores are allowed and ignored - result = result * sign - if s[i] == '\0': - index = i # store index back - else: - index = -1 # BUGFIX: error! - else: - index = -1 - -{.pop.} # overflowChecks - -proc parseInt(s: string): int = - var - index = 0 - res = rawParseInt(s, index) - if index == -1: - raise newException(EInvalidValue, "invalid integer: " & s) - elif (sizeof(int) <= 4) and - ((res < low(int)) or (res > high(int))): - raise newException(EOverflow, "overflow") - else: - result = int(res) # convert to smaller integer type - -proc ParseBiggestInt(s: string): biggestInt = - var index = 0 - result = rawParseInt(s, index) - if index == -1: - raise newException(EInvalidValue, "invalid integer: " & s) - -proc ParseFloat(s: string): float = - var - esign = 1.0 - sign = 1.0 - i = 0 - exponent: int - flags: int - result = 0.0 - if s[i] == '+': inc(i) - elif s[i] == '-': - sign = -1.0 - inc(i) - if s[i] == 'N' or s[i] == 'n': - if s[i+1] == 'A' or s[i+1] == 'a': - if s[i+2] == 'N' or s[i+2] == 'n': - if s[i+3] == '\0': return NaN - raise newException(EInvalidValue, "invalid float: " & s) - if s[i] == 'I' or s[i] == 'i': - if s[i+1] == 'N' or s[i+1] == 'n': - if s[i+2] == 'F' or s[i+2] == 'f': - if s[i+3] == '\0': return Inf*sign - raise newException(EInvalidValue, "invalid float: " & s) - while s[i] in {'0'..'9'}: - # Read integer part - flags = flags or 1 - result = result * 10.0 + toFloat(ord(s[i]) - ord('0')) - inc(i) - while s[i] == '_': inc(i) - # Decimal? - if s[i] == '.': - var hd = 1.0 - inc(i) - while s[i] in {'0'..'9'}: - # Read fractional part - flags = flags or 2 - result = result * 10.0 + toFloat(ord(s[i]) - ord('0')) - hd = hd * 10.0 - inc(i) - while s[i] == '_': inc(i) - result = result / hd # this complicated way preserves precision - # Again, read integer and fractional part - if flags == 0: - raise newException(EInvalidValue, "invalid float: " & s) - # Exponent? - if s[i] in {'e', 'E'}: - inc(i) - if s[i] == '+': - inc(i) - elif s[i] == '-': - esign = -1.0 - inc(i) - if s[i] notin {'0'..'9'}: - raise newException(EInvalidValue, "invalid float: " & s) - while s[i] in {'0'..'9'}: - exponent = exponent * 10 + ord(s[i]) - ord('0') - inc(i) - while s[i] == '_': inc(i) - # Calculate Exponent - var hd = 1.0 - for j in 1..exponent: - hd = hd * 10.0 - if esign > 0.0: result = result * hd - else: result = result / hd - # Not all characters are read? - if s[i] != '\0': raise newException(EInvalidValue, "invalid float: " & s) - # evaluate sign - result = result * sign - -proc toOct*(x: BiggestInt, len: int): string = - ## converts `x` into its octal representation. The resulting string is - ## always `len` characters long. No leading ``0o`` prefix is generated. - var - mask: BiggestInt = 7 - shift: BiggestInt = 0 - assert(len > 0) - result = newString(len) - for j in countdown(len-1, 0): - result[j] = chr(int((x and mask) shr shift) + ord('0')) - shift = shift + 3 - mask = mask shl 3 - -proc toBin*(x: BiggestInt, len: int): string = - ## converts `x` into its binary representation. The resulting string is - ## always `len` characters long. No leading ``0b`` prefix is generated. - var - mask: BiggestInt = 1 - shift: BiggestInt = 0 - assert(len > 0) - result = newString(len) - for j in countdown(len-1, 0): - result[j] = chr(int((x and mask) shr shift) + ord('0')) - shift = shift + 1 - mask = mask shl 1 - -proc escape*(s: string, prefix = "\"", suffix = "\""): string = - ## Escapes a string `s`. This does these operations (at the same time): - ## * replaces any ``\`` by ``\\`` - ## * replaces any ``'`` by ``\'`` - ## * replaces any ``"`` by ``\"`` - ## * replaces any other character in the set ``{'\0'..'\31', '\128'..'\255'}`` - ## by ``\xHH`` where ``HH`` is its hexadecimal value. - ## The procedure has been designed so that its output is usable for many - ## different common syntaxes. The resulting string is prefixed with - ## ``prefix`` and suffixed with ``suffix``. Both may be empty strings. - result = prefix - for c in items(s): - case c - of '\0'..'\31', '\128'..'\255': - add(result, '\\') - add(result, toHex(ord(c), 2)) - of '\\': add(result, "\\\\") - of '\'': add(result, "\\'") - of '\"': add(result, "\\\"") - else: add(result, c) - add(result, suffix) - -proc validEmailAddress*(s: string): bool = - ## returns true if `s` seems to be a valid e-mail address. - ## The checking also uses a domain list. - const - chars = Letters + Digits + {'!','#','$','%','&', - '\'','*','+','/','=','?','^','_','`','{','}','|','~','-','.'} - var i = 0 - if s[i] notin chars or s[i] == '.': return false - while s[i] in chars: - if s[i] == '.' and s[i+1] == '.': return false - inc(i) - if s[i] != '@': return false - var j = len(s)-1 - if s[j] notin letters: return false - while j >= i and s[j] in letters: dec(j) - inc(i) # skip '@' - while s[i] in {'0'..'9', 'a'..'z', '-', '.'}: inc(i) - if s[i] != '\0': return false - - var x = copy(s, j+1) - if len(x) == 2 and x[0] in Letters and x[1] in Letters: return true - case toLower(x) - of "com", "org", "net", "gov", "mil", "biz", "info", "mobi", "name", - "aero", "jobs", "museum": return true - return false - -proc validIdentifier*(s: string): bool = - ## returns true if `s` is a valid identifier. A valid identifier starts - ## with a character of the set `IdentStartChars` and is followed by any - ## number of characters of the set `IdentChars`. - if s[0] in IdentStartChars: - for i in 1..s.len-1: - if s[i] notin IdentChars: return false - return true - -proc editDistance*(a, b: string): int = - ## returns the edit distance between `a` and `b`. This uses the Levenshtein - ## distance algorithm with only a linear memory overhead. This implementation - ## is highly optimized! - var len1 = a.len - var len2 = b.len - if len1 > len2: - # make `b` the longer string - return editDistance(b, a) - - # strip common prefix: - var s = 0 - while a[s] == b[s] and a[s] != '\0': - inc(s) - dec(len1) - dec(len2) - # strip common suffix: - while len1 > 0 and len2 > 0 and a[s+len1-1] == b[s+len2-1]: - dec(len1) - dec(len2) - # trivial cases: - if len1 == 0: return len2 - if len2 == 0: return len1 - - # another special case: - if len1 == 1: - for j in s..len2-1: - if a[s] == b[j]: return len2 - 1 - return len2 - - inc(len1) - inc(len2) - var half = len1 shr 1 - # initalize first row: - #var row = cast[ptr array[0..high(int) div 8, int]](alloc(len2 * sizeof(int))) - var row: seq[int] - newSeq(row, len2) - var e = s + len2 - 1 # end marker - for i in 1..len2 - half - 1: row[i] = i - row[0] = len1 - half - 1 - for i in 1 .. len1 - 1: - var char1 = a[i + s - 1] - var char2p: int - var D, x: int - var p: int - if i >= len1 - half: - # skip the upper triangle: - var offset = i - len1 + half - char2p = offset - p = offset - var c3 = row[p] + ord(char1 != b[s + char2p]) - inc(p) - inc(char2p) - x = row[p] + 1 - D = x - if x > c3: x = c3 - row[p] = x - inc(p) - else: - p = 1 - char2p = 0 - D = i - x = i - if i <= half + 1: - # skip the lower triangle: - e = len2 + i - half - 2 - # main: - while p <= e: - dec(D) - var c3 = D + ord(char1 != b[char2p + s]) - inc(char2p) - inc(x) - if x > c3: x = c3 - D = row[p] + 1 - if x > D: x = D - row[p] = x - inc(p) - # lower triangle sentinel: - if i <= half: - dec(D) - var c3 = D + ord(char1 != b[char2p + s]) - inc(x) - if x > c3: x = c3 - row[p] = x - result = row[e] - #dealloc(row) - -{.pop.} diff --git a/nimlib/pure/terminal.nim b/nimlib/pure/terminal.nim deleted file mode 100755 index 42bd80cb4..000000000 --- a/nimlib/pure/terminal.nim +++ /dev/null @@ -1,310 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2009 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module contains a few procedures to control the *terminal* -## (also called *console*). On UNIX, the implementation simply uses ANSI escape -## sequences and does not depend on any other module, on Windows it uses the -## Windows API. -## Changing the style is permanent even after program termination! Use the -## code ``system.addQuitProc(resetAttributes)`` to restore the defaults. - -when defined(windows): - import windows, os - - var - conHandle: THandle - # = createFile("CONOUT$", GENERIC_WRITE, 0, nil, OPEN_ALWAYS, 0, 0) - - block: - var hTemp = GetStdHandle(STD_OUTPUT_HANDLE()) - if DuplicateHandle(GetCurrentProcess(), hTemp, GetCurrentProcess(), - addr(conHandle), 0, 1, DUPLICATE_SAME_ACCESS) == 0: - OSError() - - proc getCursorPos(): tuple [x,y: int] = - var c: TCONSOLE_SCREEN_BUFFER_INFO - if GetConsoleScreenBufferInfo(conHandle, addr(c)) == 0: OSError() - return (int(c.dwCursorPosition.x), int(c.dwCursorPosition.y)) - - proc getAttributes(): int16 = - var c: TCONSOLE_SCREEN_BUFFER_INFO - # workaround Windows bugs: try several times - if GetConsoleScreenBufferInfo(conHandle, addr(c)) != 0: - return c.wAttributes - else: - OSError() - return 0x70'i16 # ERROR: return white background, black text - - var - oldAttr = getAttributes() - -proc setCursorPos*(x, y: int) = - ## sets the terminal's cursor to the (x,y) position. (0,0) is the - ## upper left of the screen. - when defined(windows): - var c: TCoord - c.x = int16(x) - c.y = int16(y) - if SetConsoleCursorPosition(conHandle, c) == 0: OSError() - else: - stdout.write("\e[" & $y & ';' & $x & 'f') - -proc setCursorXPos*(x: int) = - ## sets the terminal's cursor to the x position. The y position is - ## not changed. - when defined(windows): - var scrbuf: TCONSOLE_SCREEN_BUFFER_INFO - var hStdout = conHandle - if GetConsoleScreenBufferInfo(hStdout, addr(scrbuf)) == 0: OSError() - var origin = scrbuf.dwCursorPosition - origin.x = int16(x) - if SetConsoleCursorPosition(conHandle, origin) == 0: OSError() - else: - stdout.write("\e[" & $x & 'G') - -when defined(windows): - proc setCursorYPos*(y: int) = - ## sets the terminal's cursor to the y position. The x position is - ## not changed. **Warning**: This is not supported on UNIX! - when defined(windows): - var scrbuf: TCONSOLE_SCREEN_BUFFER_INFO - var hStdout = conHandle - if GetConsoleScreenBufferInfo(hStdout, addr(scrbuf)) == 0: OSError() - var origin = scrbuf.dwCursorPosition - origin.y = int16(y) - if SetConsoleCursorPosition(conHandle, origin) == 0: OSError() - else: - nil - -proc CursorUp*(count=1) = - ## Moves the cursor up by `count` rows. - when defined(windows): - var p = getCursorPos() - dec(p.y, count) - setCursorPos(p.x, p.y) - else: - stdout.write("\e[" & $count & 'A') - -proc CursorDown*(count=1) = - ## Moves the cursor down by `count` rows. - when defined(windows): - var p = getCursorPos() - inc(p.y, count) - setCursorPos(p.x, p.y) - else: - stdout.write("\e[" & $count & 'B') - -proc CursorForward*(count=1) = - ## Moves the cursor forward by `count` columns. - when defined(windows): - var p = getCursorPos() - inc(p.x, count) - setCursorPos(p.x, p.y) - else: - stdout.write("\e[" & $count & 'C') - -proc CursorBackward*(count=1) = - ## Moves the cursor backward by `count` columns. - when defined(windows): - var p = getCursorPos() - dec(p.x, count) - setCursorPos(p.x, p.y) - else: - stdout.write("\e[" & $count & 'D') - -when true: - nil -else: - proc EraseLineEnd* = - ## Erases from the current cursor position to the end of the current line. - when defined(windows): - nil - else: - stdout.write("\e[K") - - proc EraseLineStart* = - ## Erases from the current cursor position to the start of the current line. - when defined(windows): - nil - else: - stdout.write("\e[1K") - - proc EraseDown* = - ## Erases the screen from the current line down to the bottom of the screen. - when defined(windows): - nil - else: - stdout.write("\e[J") - - proc EraseUp* = - ## Erases the screen from the current line up to the top of the screen. - when defined(windows): - nil - else: - stdout.write("\e[1J") - -proc EraseLine* = - ## Erases the entire current line. - when defined(windows): - var scrbuf: TCONSOLE_SCREEN_BUFFER_INFO - var numwrote: DWORD - var hStdout = conHandle - if GetConsoleScreenBufferInfo(hStdout, addr(scrbuf)) == 0: OSError() - var origin = scrbuf.dwCursorPosition - origin.x = 0'i16 - if SetConsoleCursorPosition(conHandle, origin) == 0: OSError() - var ht = scrbuf.dwSize.Y - origin.Y - var wt = scrbuf.dwSize.X - origin.X - if FillConsoleOutputCharacter(hStdout,' ', ht*wt, - origin, addr(numwrote)) == 0: - OSError() - if FillConsoleOutputAttribute(hStdout, scrbuf.wAttributes, ht * wt, - scrbuf.dwCursorPosition, addr(numwrote)) == 0: - OSError() - else: - stdout.write("\e[2K") - setCursorXPos(0) - -proc EraseScreen* = - ## Erases the screen with the background colour and moves the cursor to home. - when defined(windows): - var scrbuf: TCONSOLE_SCREEN_BUFFER_INFO - var numwrote: DWORD - var origin: TCoord # is inititalized to 0, 0 - var hStdout = conHandle - if GetConsoleScreenBufferInfo(hStdout, addr(scrbuf)) == 0: OSError() - if FillConsoleOutputCharacter(hStdout, ' ', scrbuf.dwSize.X*scrbuf.dwSize.Y, - origin, addr(numwrote)) == 0: - OSError() - if FillConsoleOutputAttribute(hStdout, scrbuf.wAttributes, - scrbuf.dwSize.X * scrbuf.dwSize.Y, - origin, addr(numwrote)) == 0: - OSError() - setCursorXPos(0) - else: - stdout.write("\e[2J") - -proc ResetAttributes* {.noconv.} = - ## resets all attributes; it is advisable to register this as a quit proc - ## with ``system.addQuitProc(resetAttributes)``. - when defined(windows): - discard SetConsoleTextAttribute(conHandle, oldAttr) - else: - stdout.write("\e[0m") - -type - TStyle* = enum ## different styles for text output - styleBright = 1, ## bright text - styleDim, ## dim text - styleUnknown, ## unknown - styleUnderscore = 4, ## underscored text - styleBlink, ## blinking/bold text - styleReverse, ## unknown - styleHidden ## hidden text - -when not defined(windows): - var - gFG = 0 - gBG = 0 - -proc WriteStyled*(txt: string, style: set[TStyle] = {styleBright}) = - ## writes the text `txt` in a given `style`. - when defined(windows): - var a = 0'i16 - if styleBright in style: a = a or int16(FOREGROUND_INTENSITY) - if styleBlink in style: a = a or int16(BACKGROUND_INTENSITY) - if styleReverse in style: a = a or 0x4000'i16 # COMMON_LVB_REVERSE_VIDEO - if styleUnderscore in style: a = a or 0x8000'i16 # COMMON_LVB_UNDERSCORE - var old = getAttributes() - discard SetConsoleTextAttribute(conHandle, old or a) - stdout.write(txt) - discard SetConsoleTextAttribute(conHandle, old) - else: - for s in items(style): - stdout.write("\e[" & $ord(s) & 'm') - stdout.write(txt) - resetAttributes() - if gFG != 0: - stdout.write("\e[" & $ord(gFG) & 'm') - if gBG != 0: - stdout.write("\e[" & $ord(gBG) & 'm') - -type - TForegroundColor* = enum ## terminal's foreground colors - fgBlack = 30, ## black - fgRed, ## red - fgGreen, ## green - fgYellow, ## yellow - fgBlue, ## blue - fgMagenta, ## magenta - fgCyan, ## cyan - fgWhite ## white - - TBackgroundColor* = enum ## terminal's background colors - bgBlack = 40, ## black - bgRed, ## red - bgGreen, ## green - bgYellow, ## yellow - bgBlue, ## blue - bgMagenta, ## magenta - bgCyan, ## cyan - bgWhite ## white - -proc setForegroundColor*(fg: TForegroundColor, bright=false) = - ## sets the terminal's foreground color - when defined(windows): - var old = getAttributes() and not 0x0007 - if bright: - old = old or FOREGROUND_INTENSITY - const lookup: array [TForegroundColor, int] = [ - 0, - (FOREGROUND_RED), - (FOREGROUND_GREEN), - (FOREGROUND_RED or FOREGROUND_GREEN), - (FOREGROUND_BLUE), - (FOREGROUND_RED or FOREGROUND_BLUE), - (FOREGROUND_BLUE or FOREGROUND_GREEN), - (FOREGROUND_BLUE or FOREGROUND_GREEN or FOREGROUND_RED)] - discard SetConsoleTextAttribute(conHandle, toU16(old or lookup[fg])) - else: - gFG = ord(fg) - if bright: inc(gFG, 60) - stdout.write("\e[" & $gFG & 'm') - -proc setBackgroundColor*(bg: TBackgroundColor, bright=false) = - ## sets the terminal's background color - when defined(windows): - var old = getAttributes() and not 0x0070 - if bright: - old = old or BACKGROUND_INTENSITY - const lookup: array [TBackgroundColor, int] = [ - 0, - (BACKGROUND_RED), - (BACKGROUND_GREEN), - (BACKGROUND_RED or BACKGROUND_GREEN), - (BACKGROUND_BLUE), - (BACKGROUND_RED or BACKGROUND_BLUE), - (BACKGROUND_BLUE or BACKGROUND_GREEN), - (BACKGROUND_BLUE or BACKGROUND_GREEN or BACKGROUND_RED)] - discard SetConsoleTextAttribute(conHandle, toU16(old or lookup[bg])) - else: - gBG = ord(bg) - if bright: inc(gBG, 60) - stdout.write("\e[" & $gBG & 'm') - -when isMainModule: - system.addQuitProc(resetAttributes) - write(stdout, "never mind") - eraseLine() - #setCursorPos(2, 2) - writeStyled("styled text ", {styleBright, styleBlink, styleUnderscore}) - setBackGroundColor(bgCyan, true) - setForeGroundColor(fgBlue) - writeln(stdout, "ordinary text") - diff --git a/nimlib/pure/times.nim b/nimlib/pure/times.nim deleted file mode 100755 index 8c21b6027..000000000 --- a/nimlib/pure/times.nim +++ /dev/null @@ -1,307 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2009 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - - -## This module contains routines and types for dealing with time. -## This module is available for the ECMAScript target. - -{.push debugger:off .} # the user does not want to trace a part - # of the standard library! - -import - strutils - -type - TMonth* = enum ## represents a month - mJan, mFeb, mMar, mApr, mMay, mJun, mJul, mAug, mSep, mOct, mNov, mDec - TWeekDay* = enum ## represents a weekday - dMon, dTue, dWed, dThu, dFri, dSat, dSun - -when defined(posix): - type - TTime* = distinct int ## distinct type that represents a time -elif defined(windows): - when defined(vcc): - # newest version of Visual C++ defines time_t to be of 64 bits - type TTime* = distinct int64 - else: - type TTime* = distinct int32 -elif defined(ECMAScript): - type - TTime* {.final.} = object - getDay: proc (): int - getFullYear: proc (): int - getHours: proc (): int - getMilliseconds: proc (): int - getMinutes: proc (): int - getMonth: proc (): int - getSeconds: proc (): int - getTime: proc (): int - getTimezoneOffset: proc (): int - getUTCDate: proc (): int - getUTCFullYear: proc (): int - getUTCHours: proc (): int - getUTCMilliseconds: proc (): int - getUTCMinutes: proc (): int - getUTCMonth: proc (): int - getUTCSeconds: proc (): int - getYear: proc (): int - parse: proc (s: cstring): TTime - setDate: proc (x: int) - setFullYear: proc (x: int) - setHours: proc (x: int) - setMilliseconds: proc (x: int) - setMinutes: proc (x: int) - setMonth: proc (x: int) - setSeconds: proc (x: int) - setTime: proc (x: int) - setUTCDate: proc (x: int) - setUTCFullYear: proc (x: int) - setUTCHours: proc (x: int) - setUTCMilliseconds: proc (x: int) - setUTCMinutes: proc (x: int) - setUTCMonth: proc (x: int) - setUTCSeconds: proc (x: int) - setYear: proc (x: int) - toGMTString: proc (): cstring - toLocaleString: proc (): cstring - UTC: proc (): int - -type - TTimeInfo* = object of TObject ## represents a time in different parts - second*: range[0..61] ## The number of seconds after the minute, - ## normally in the range 0 to 59, but can - ## be up to 61 to allow for leap seconds. - minute*: range[0..59] ## The number of minutes after the hour, - ## in the range 0 to 59. - hour*: range[0..23] ## The number of hours past midnight, - ## in the range 0 to 23. - monthday*: range[1..31] ## The day of the month, in the range 1 to 31. - month*: TMonth ## The current month. - year*: int ## The current year. - weekday*: TWeekDay ## The current day of the week. - yearday*: range[0..365] ## The number of days since January 1, - ## in the range 0 to 365. - ## Always 0 if the target is ECMAScript. - -proc getTime*(): TTime ## gets the current calendar time -proc getLocalTime*(t: TTime): TTimeInfo - ## converts the calendar time `t` to broken-time representation, - ## expressed relative to the user's specified time zone. -proc getGMTime*(t: TTime): TTimeInfo - ## converts the calendar time `t` to broken-down time representation, - ## expressed in Coordinated Universal Time (UTC). - -proc TimeInfoToTime*(timeInfo: TTimeInfo): TTime - ## converts a broken-down time structure, expressed as local time, to - ## calendar time representation. The function ignores the specified - ## contents of the structure members `weekday` and `yearday` and recomputes - ## them from the other information in the broken-down time structure. - -proc `$` *(timeInfo: TTimeInfo): string - ## converts a `TTimeInfo` object to a string representation. -proc `$` *(time: TTime): string - ## converts a calendar time to a string representation. - -proc getDateStr*(): string - ## gets the current date as a string of the format - ## ``YYYY-MM-DD``. -proc getClockStr*(): string - ## gets the current clock time as a string of the format ``HH:MM:SS``. - -proc `-` *(a, b: TTime): int64 - ## computes the difference of two calendar times. Result is in seconds. - -proc `<` * (a, b: TTime): bool = - ## returns true iff ``a < b``, that is iff a happened before b. - result = a - b < 0 - -proc `<=` * (a, b: TTime): bool = - ## returns true iff ``a <= b``. - result = a - b <= 0 - -proc getStartMilsecs*(): int - ## get the miliseconds from the start of the program - - -when not defined(ECMAScript): - # C wrapper: - type - structTM {.importc: "struct tm", final.} = object - second {.importc: "tm_sec".}, - minute {.importc: "tm_min".}, - hour {.importc: "tm_hour".}, - monthday {.importc: "tm_mday".}, - month {.importc: "tm_mon".}, - year {.importc: "tm_year".}, - weekday {.importc: "tm_wday".}, - yearday {.importc: "tm_yday".}, - isdst {.importc: "tm_isdst".}: cint - - PTimeInfo = ptr structTM - PTime = ptr TTime - - TClock {.importc: "clock_t".} = range[low(int)..high(int)] - - proc localtime(timer: PTime): PTimeInfo {. - importc: "localtime", header: "<time.h>".} - proc gmtime(timer: PTime): PTimeInfo {.importc: "gmtime", header: "<time.h>".} - proc timec(timer: PTime): TTime {.importc: "time", header: "<time.h>".} - proc mktime(t: structTM): TTime {.importc: "mktime", header: "<time.h>".} - proc asctime(tblock: structTM): CString {. - importc: "asctime", header: "<time.h>".} - proc ctime(time: PTime): CString {.importc: "ctime", header: "<time.h>".} - # strftime(s: CString, maxsize: int, fmt: CString, t: tm): int {. - # importc: "strftime", header: "<time.h>".} - proc clock(): TClock {.importc: "clock", header: "<time.h>".} - proc difftime(a, b: TTime): float {.importc: "difftime", header: "<time.h>".} - - var - clocksPerSec {.importc: "CLOCKS_PER_SEC", nodecl.}: int - - - # our own procs on top of that: - proc tmToTimeInfo(tm: structTM): TTimeInfo = - const - weekDays: array [0..6, TWeekDay] = [ - dSun, dMon, dTue, dWed, dThu, dFri, dSat] - result.second = int(tm.second) - result.minute = int(tm.minute) - result.hour = int(tm.hour) - result.monthday = int(tm.monthday) - result.month = TMonth(tm.month) - result.year = tm.year + 1900'i32 - result.weekday = weekDays[int(tm.weekDay)] - result.yearday = int(tm.yearday) - - proc timeInfoToTM(t: TTimeInfo): structTM = - const - weekDays: array [TWeekDay, int] = [1, 2, 3, 4, 5, 6, 0] - result.second = t.second - result.minute = t.minute - result.hour = t.hour - result.monthday = t.monthday - result.month = ord(t.month) - result.year = t.year - 1900 - result.weekday = weekDays[t.weekDay] - result.yearday = t.yearday - result.isdst = -1 - - proc `-` (a, b: TTime): int64 = - return toBiggestInt(difftime(a, b)) - - proc getStartMilsecs(): int = return clock() div (clocksPerSec div 1000) - proc getTime(): TTime = return timec(nil) - proc getLocalTime(t: TTime): TTimeInfo = - var a = t - result = tmToTimeInfo(localtime(addr(a))^) - # copying is needed anyway to provide reentrancity; thus - # the convertion is not expensive - - proc getGMTime(t: TTime): TTimeInfo = - var a = t - result = tmToTimeInfo(gmtime(addr(a))^) - # copying is needed anyway to provide reentrancity; thus - # the convertion is not expensive - - proc TimeInfoToTime(timeInfo: TTimeInfo): TTime = - var cTimeInfo = timeInfo # for C++ we have to make a copy, - # because the header of mktime is broken in my version of libc - return mktime(timeInfoToTM(cTimeInfo)) - - proc toStringTillNL(p: cstring): string = - result = "" - var i = 0 - while p[i] != '\0' and p[i] != '\10' and p[i] != '\13': - add(result, p[i]) - inc(i) - return result - - proc `$`(timeInfo: TTimeInfo): string = - # BUGFIX: asctime returns a newline at the end! - var p = asctime(timeInfoToTM(timeInfo)) - result = toStringTillNL(p) - - proc `$`(time: TTime): string = - # BUGFIX: ctime returns a newline at the end! - var a = time - return toStringTillNL(ctime(addr(a))) - - const - epochDiff = 116444736000000000'i64 - rateDiff = 10000000'i64 # 100 nsecs - - proc unixTimeToWinTime*(t: TTime): int64 = - ## converts a UNIX `TTime` (``time_t``) to a Windows file time - result = int64(t) * rateDiff + epochDiff - - proc winTimeToUnixTime*(t: int64): TTime = - ## converts a Windows time to a UNIX `TTime` (``time_t``) - result = TTime((t - epochDiff) div rateDiff) - -else: - proc getTime(): TTime {.importc: "new Date", nodecl.} - - const - weekDays: array [0..6, TWeekDay] = [ - dSun, dMon, dTue, dWed, dThu, dFri, dSat] - - proc getLocalTime(t: TTime): TTimeInfo = - result.second = t.getSeconds() - result.minute = t.getMinutes() - result.hour = t.getHours() - result.monthday = t.getDate() - result.month = TMonth(t.getMonth()) - result.year = t.getFullYear() - result.weekday = weekDays[t.getDay()] - result.yearday = 0 - - proc getGMTime(t: TTime): TTimeInfo = - result.second = t.getUTCSeconds() - result.minute = t.getUTCMinutes() - result.hour = t.getUTCHours() - result.monthday = t.getUTCDate() - result.month = TMonth(t.getUTCMonth()) - result.year = t.getUTCFullYear() - result.weekday = weekDays[t.getDay()] - result.yearday = 0 - - proc TimeInfoToTime*(timeInfo: TTimeInfo): TTime = - result = getTime() - result.setSeconds(timeInfo.second) - result.setMinutes(timeInfo.minute) - result.setHours(timeInfo.hour) - result.setMonth(ord(timeInfo.month)) - result.setFullYear(timeInfo.year) - result.setDate(timeInfo.monthday) - - proc `$`(timeInfo: TTimeInfo): string = return $(TimeInfoToTIme(timeInfo)) - proc `$`(time: TTime): string = $time.toLocaleString() - - proc `-` (a, b: TTime): int64 = - return a.getTime() - b.getTime() - - var - startMilsecs = getTime() - - proc getStartMilsecs(): int = - ## get the miliseconds from the start of the program - return int(getTime() - startMilsecs) - -proc getDateStr(): string = - var ti = getLocalTime(getTime()) - result = $ti.year & '-' & intToStr(ord(ti.month)+1, 2) & - '-' & intToStr(ti.monthDay, 2) - -proc getClockStr(): string = - var ti = getLocalTime(getTime()) - result = intToStr(ti.hour, 2) & ':' & intToStr(ti.minute, 2) & - ':' & intToStr(ti.second, 2) - -{.pop.} diff --git a/nimlib/pure/unicode.nim b/nimlib/pure/unicode.nim deleted file mode 100755 index 2a53d7660..000000000 --- a/nimlib/pure/unicode.nim +++ /dev/null @@ -1,1178 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2009 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module provides support to handle the Unicode UTF-8 encoding. - -{.deadCodeElim: on.} - -type - irune = int # underlying type of TRune - TRune* = distinct irune ## type that can hold any Unicode character - TRune16* = distinct int16 ## 16 bit Unicode character - -proc `<=%`*(a, b: TRune): bool {.borrow.} -proc `<%`*(a, b: TRune): bool {.borrow.} -proc `==`*(a, b: TRune): bool {.borrow.} - -template ones(n: expr): expr = ((1 shl n)-1) - -proc runeLen*(s: string): int = - ## returns the number of Unicode characters of the string `s`. - var i = 0 - while i < len(s): - if ord(s[i]) <=% 127: inc(i) - elif ord(s[i]) shr 5 == 0b110: inc(i, 2) - elif ord(s[i]) shr 4 == 0b1110: inc(i, 3) - elif ord(s[i]) shr 3 == 0b11110: inc(i, 4) - else: assert(false) - inc(result) - -proc runeLenAt*(s: string, i: int): int = - ## returns the number of bytes the rune starting at ``s[i]`` takes. - if ord(s[i]) <=% 127: result = 1 - elif ord(s[i]) shr 5 == 0b110: result = 2 - elif ord(s[i]) shr 4 == 0b1110: result = 3 - elif ord(s[i]) shr 3 == 0b11110: result = 4 - else: assert(false) - -template fastRuneAt*(s: string, i: int, result: expr, doInc = true) = - ## Returns the unicode character ``s[i]`` in `result`. If ``doInc == true`` - ## `i` is incremented by the number of bytes that have been processed. - if ord(s[i]) <=% 127: - result = TRune(ord(s[i])) - when doInc: inc(i) - elif ord(s[i]) shr 5 == 0b110: - assert(ord(s[i+1]) shr 6 == 0b10) - result = TRune((ord(s[i]) and (bind ones(5))) shl 6 or - (ord(s[i+1]) and ones(6))) - when doInc: inc(i, 2) - elif ord(s[i]) shr 4 == 0b1110: - assert(ord(s[i+1]) shr 6 == 0b10) - assert(ord(s[i+2]) shr 6 == 0b10) - result = TRune((ord(s[i]) and ones(4)) shl 12 or - (ord(s[i+1]) and ones(6)) shl 6 or - (ord(s[i+2]) and ones(6))) - when doInc: inc(i, 3) - elif ord(s[i]) shr 3 == 0b11110: - assert(ord(s[i+1]) shr 6 == 0b10) - assert(ord(s[i+2]) shr 6 == 0b10) - assert(ord(s[i+3]) shr 6 == 0b10) - result = TRune((ord(s[i]) and ones(3)) shl 18 or - (ord(s[i+1]) and ones(6)) shl 12 or - (ord(s[i+2]) and ones(6)) shl 6 or - (ord(s[i+3]) and ones(6))) - when doInc: inc(i, 4) - else: - assert(false) - -proc runeAt*(s: string, i: int): TRune = - ## returns the unicode character in `s` at byte index `i` - fastRuneAt(s, i, result, false) - -proc toUTF8*(c: TRune): string = - ## converts a rune into its UTF8 representation - var i = irune(c) - if i <=% 127: - result = newString(1) - result[0] = chr(i) - elif i <=% 0x07FF: - result = newString(2) - result[0] = chr(i shr 6 or 0b110_0000) - result[1] = chr(i and ones(6) or 0b10_000000) - elif i <=% 0xFFFF: - result = newString(3) - result[0] = chr(i shr 12 or 0b1110_0000) - result[1] = chr(i shr 6 and ones(6) or 0b10_0000_00) - result[2] = chr(i and ones(6) or 0b10_0000_00) - elif i <=% 0x0010FFFF: - result = newString(4) - result[0] = chr(i shr 18 or 0b1111_0000) - result[1] = chr(i shr 12 and ones(6) or 0b10_0000_00) - result[2] = chr(i shr 6 and ones(6) or 0b10_0000_00) - result[3] = chr(i and ones(6) or 0b10_0000_00) - else: - assert false - -const - alphaRanges = [ - 0x00d8, 0x00f6, # Ø - ö - 0x00f8, 0x01f5, # ø - ǵ - 0x0250, 0x02a8, # ɐ - ʨ - 0x038e, 0x03a1, # Ύ - Ρ - 0x03a3, 0x03ce, # Σ - ώ - 0x03d0, 0x03d6, # ϐ - ϖ - 0x03e2, 0x03f3, # Ϣ - ϳ - 0x0490, 0x04c4, # Ґ - ӄ - 0x0561, 0x0587, # ա - և - 0x05d0, 0x05ea, # א - ת - 0x05f0, 0x05f2, # װ - ײ - 0x0621, 0x063a, # ء - غ - 0x0640, 0x064a, # ـ - ي - 0x0671, 0x06b7, # ٱ - ڷ - 0x06ba, 0x06be, # ں - ھ - 0x06c0, 0x06ce, # ۀ - ێ - 0x06d0, 0x06d3, # ې - ۓ - 0x0905, 0x0939, # अ - ह - 0x0958, 0x0961, # क़ - ॡ - 0x0985, 0x098c, # অ - ঌ - 0x098f, 0x0990, # এ - ঐ - 0x0993, 0x09a8, # ও - ন - 0x09aa, 0x09b0, # প - র - 0x09b6, 0x09b9, # শ - হ - 0x09dc, 0x09dd, # ড় - ঢ় - 0x09df, 0x09e1, # য় - ৡ - 0x09f0, 0x09f1, # ৰ - ৱ - 0x0a05, 0x0a0a, # ਅ - ਊ - 0x0a0f, 0x0a10, # ਏ - ਐ - 0x0a13, 0x0a28, # ਓ - ਨ - 0x0a2a, 0x0a30, # ਪ - ਰ - 0x0a32, 0x0a33, # ਲ - ਲ਼ - 0x0a35, 0x0a36, # ਵ - ਸ਼ - 0x0a38, 0x0a39, # ਸ - ਹ - 0x0a59, 0x0a5c, # ਖ਼ - ੜ - 0x0a85, 0x0a8b, # અ - ઋ - 0x0a8f, 0x0a91, # એ - ઑ - 0x0a93, 0x0aa8, # ઓ - ન - 0x0aaa, 0x0ab0, # પ - ર - 0x0ab2, 0x0ab3, # લ - ળ - 0x0ab5, 0x0ab9, # વ - હ - 0x0b05, 0x0b0c, # ଅ - ଌ - 0x0b0f, 0x0b10, # ଏ - ଐ - 0x0b13, 0x0b28, # ଓ - ନ - 0x0b2a, 0x0b30, # ପ - ର - 0x0b32, 0x0b33, # ଲ - ଳ - 0x0b36, 0x0b39, # ଶ - ହ - 0x0b5c, 0x0b5d, # ଡ଼ - ଢ଼ - 0x0b5f, 0x0b61, # ୟ - ୡ - 0x0b85, 0x0b8a, # அ - ஊ - 0x0b8e, 0x0b90, # எ - ஐ - 0x0b92, 0x0b95, # ஒ - க - 0x0b99, 0x0b9a, # ங - ச - 0x0b9e, 0x0b9f, # ஞ - ட - 0x0ba3, 0x0ba4, # ண - த - 0x0ba8, 0x0baa, # ந - ப - 0x0bae, 0x0bb5, # ம - வ - 0x0bb7, 0x0bb9, # ஷ - ஹ - 0x0c05, 0x0c0c, # అ - ఌ - 0x0c0e, 0x0c10, # ఎ - ఐ - 0x0c12, 0x0c28, # ఒ - న - 0x0c2a, 0x0c33, # ప - ళ - 0x0c35, 0x0c39, # వ - హ - 0x0c60, 0x0c61, # ౠ - ౡ - 0x0c85, 0x0c8c, # ಅ - ಌ - 0x0c8e, 0x0c90, # ಎ - ಐ - 0x0c92, 0x0ca8, # ಒ - ನ - 0x0caa, 0x0cb3, # ಪ - ಳ - 0x0cb5, 0x0cb9, # ವ - ಹ - 0x0ce0, 0x0ce1, # ೠ - ೡ - 0x0d05, 0x0d0c, # അ - ഌ - 0x0d0e, 0x0d10, # എ - ഐ - 0x0d12, 0x0d28, # ഒ - ന - 0x0d2a, 0x0d39, # പ - ഹ - 0x0d60, 0x0d61, # ൠ - ൡ - 0x0e01, 0x0e30, # ก - ะ - 0x0e32, 0x0e33, # า - ำ - 0x0e40, 0x0e46, # เ - ๆ - 0x0e5a, 0x0e5b, # ๚ - ๛ - 0x0e81, 0x0e82, # ກ - ຂ - 0x0e87, 0x0e88, # ງ - ຈ - 0x0e94, 0x0e97, # ດ - ທ - 0x0e99, 0x0e9f, # ນ - ຟ - 0x0ea1, 0x0ea3, # ມ - ຣ - 0x0eaa, 0x0eab, # ສ - ຫ - 0x0ead, 0x0eae, # ອ - ຮ - 0x0eb2, 0x0eb3, # າ - ຳ - 0x0ec0, 0x0ec4, # ເ - ໄ - 0x0edc, 0x0edd, # ໜ - ໝ - 0x0f18, 0x0f19, # ༘ - ༙ - 0x0f40, 0x0f47, # ཀ - ཇ - 0x0f49, 0x0f69, # ཉ - ཀྵ - 0x10d0, 0x10f6, # ა - ჶ - 0x1100, 0x1159, # ᄀ - ᅙ - 0x115f, 0x11a2, # ᅟ - ᆢ - 0x11a8, 0x11f9, # ᆨ - ᇹ - 0x1e00, 0x1e9b, # Ḁ - ẛ - 0x1f50, 0x1f57, # ὐ - ὗ - 0x1f80, 0x1fb4, # ᾀ - ᾴ - 0x1fb6, 0x1fbc, # ᾶ - ᾼ - 0x1fc2, 0x1fc4, # ῂ - ῄ - 0x1fc6, 0x1fcc, # ῆ - ῌ - 0x1fd0, 0x1fd3, # ῐ - ΐ - 0x1fd6, 0x1fdb, # ῖ - Ί - 0x1fe0, 0x1fec, # ῠ - Ῥ - 0x1ff2, 0x1ff4, # ῲ - ῴ - 0x1ff6, 0x1ffc, # ῶ - ῼ - 0x210a, 0x2113, # ℊ - ℓ - 0x2115, 0x211d, # ℕ - ℝ - 0x2120, 0x2122, # ℠ - ™ - 0x212a, 0x2131, # K - ℱ - 0x2133, 0x2138, # ℳ - ℸ - 0x3041, 0x3094, # ぁ - ゔ - 0x30a1, 0x30fa, # ァ - ヺ - 0x3105, 0x312c, # ㄅ - ㄬ - 0x3131, 0x318e, # ㄱ - ㆎ - 0x3192, 0x319f, # ㆒ - ㆟ - 0x3260, 0x327b, # ㉠ - ㉻ - 0x328a, 0x32b0, # ㊊ - ㊰ - 0x32d0, 0x32fe, # ㋐ - ㋾ - 0x3300, 0x3357, # ㌀ - ㍗ - 0x3371, 0x3376, # ㍱ - ㍶ - 0x337b, 0x3394, # ㍻ - ㎔ - 0x3399, 0x339e, # ㎙ - ㎞ - 0x33a9, 0x33ad, # ㎩ - ㎭ - 0x33b0, 0x33c1, # ㎰ - ㏁ - 0x33c3, 0x33c5, # ㏃ - ㏅ - 0x33c7, 0x33d7, # ㏇ - ㏗ - 0x33d9, 0x33dd, # ㏙ - ㏝ - 0x4e00, 0x9fff, # 一 - 鿿 - 0xac00, 0xd7a3, # 가 - 힣 - 0xf900, 0xfb06, # 豈 - st - 0xfb13, 0xfb17, # ﬓ - ﬗ - 0xfb1f, 0xfb28, # ײַ - ﬨ - 0xfb2a, 0xfb36, # שׁ - זּ - 0xfb38, 0xfb3c, # טּ - לּ - 0xfb40, 0xfb41, # נּ - סּ - 0xfb43, 0xfb44, # ףּ - פּ - 0xfb46, 0xfbb1, # צּ - ﮱ - 0xfbd3, 0xfd3d, # ﯓ - ﴽ - 0xfd50, 0xfd8f, # ﵐ - ﶏ - 0xfd92, 0xfdc7, # ﶒ - ﷇ - 0xfdf0, 0xfdf9, # ﷰ - ﷹ - 0xfe70, 0xfe72, # ﹰ - ﹲ - 0xfe76, 0xfefc, # ﹶ - ﻼ - 0xff66, 0xff6f, # ヲ - ッ - 0xff71, 0xff9d, # ア - ン - 0xffa0, 0xffbe, # ᅠ - ᄒ - 0xffc2, 0xffc7, # ᅡ - ᅦ - 0xffca, 0xffcf, # ᅧ - ᅬ - 0xffd2, 0xffd7, # ᅭ - ᅲ - 0xffda, 0xffdc] # ᅳ - ᅵ - - alphaSinglets = [ - 0x00aa, # ª - 0x00b5, # µ - 0x00ba, # º - 0x03da, # Ϛ - 0x03dc, # Ϝ - 0x03de, # Ϟ - 0x03e0, # Ϡ - 0x06d5, # ە - 0x09b2, # ল - 0x0a5e, # ਫ਼ - 0x0a8d, # ઍ - 0x0ae0, # ૠ - 0x0b9c, # ஜ - 0x0cde, # ೞ - 0x0e4f, # ๏ - 0x0e84, # ຄ - 0x0e8a, # ຊ - 0x0e8d, # ຍ - 0x0ea5, # ລ - 0x0ea7, # ວ - 0x0eb0, # ະ - 0x0ebd, # ຽ - 0x1fbe, # ι - 0x207f, # ⁿ - 0x20a8, # ₨ - 0x2102, # ℂ - 0x2107, # ℇ - 0x2124, # ℤ - 0x2126, # Ω - 0x2128, # ℨ - 0xfb3e, # מּ - 0xfe74] # ﹴ - - spaceRanges = [ - 0x0009, 0x000a, # tab and newline - 0x0020, 0x0020, # space - 0x00a0, 0x00a0, # - 0x2000, 0x200b, # - - 0x2028, 0x2029, # - 0x3000, 0x3000, # - 0xfeff, 0xfeff] # - - toupperRanges = [ - 0x0061, 0x007a, 468, # a-z A-Z - 0x00e0, 0x00f6, 468, # à-ö À-Ö - 0x00f8, 0x00fe, 468, # ø-þ Ø-Þ - 0x0256, 0x0257, 295, # ɖ-ɗ Ɖ-Ɗ - 0x0258, 0x0259, 298, # ɘ-ə Ǝ-Ə - 0x028a, 0x028b, 283, # ʊ-ʋ Ʊ-Ʋ - 0x03ad, 0x03af, 463, # έ-ί Έ-Ί - 0x03b1, 0x03c1, 468, # α-ρ Α-Ρ - 0x03c3, 0x03cb, 468, # σ-ϋ Σ-Ϋ - 0x03cd, 0x03ce, 437, # ύ-ώ Ύ-Ώ - 0x0430, 0x044f, 468, # а-я А-Я - 0x0451, 0x045c, 420, # ё-ќ Ё-Ќ - 0x045e, 0x045f, 420, # ў-џ Ў-Џ - 0x0561, 0x0586, 452, # ա-ֆ Ա-Ֆ - 0x1f00, 0x1f07, 508, # ἀ-ἇ Ἀ-Ἇ - 0x1f10, 0x1f15, 508, # ἐ-ἕ Ἐ-Ἕ - 0x1f20, 0x1f27, 508, # ἠ-ἧ Ἠ-Ἧ - 0x1f30, 0x1f37, 508, # ἰ-ἷ Ἰ-Ἷ - 0x1f40, 0x1f45, 508, # ὀ-ὅ Ὀ-Ὅ - 0x1f60, 0x1f67, 508, # ὠ-ὧ Ὠ-Ὧ - 0x1f70, 0x1f71, 574, # ὰ-ά Ὰ-Ά - 0x1f72, 0x1f75, 586, # ὲ-ή Ὲ-Ή - 0x1f76, 0x1f77, 600, # ὶ-ί Ὶ-Ί - 0x1f78, 0x1f79, 628, # ὸ-ό Ὸ-Ό - 0x1f7a, 0x1f7b, 612, # ὺ-ύ Ὺ-Ύ - 0x1f7c, 0x1f7d, 626, # ὼ-ώ Ὼ-Ώ - 0x1f80, 0x1f87, 508, # ᾀ-ᾇ ᾈ-ᾏ - 0x1f90, 0x1f97, 508, # ᾐ-ᾗ ᾘ-ᾟ - 0x1fa0, 0x1fa7, 508, # ᾠ-ᾧ ᾨ-ᾯ - 0x1fb0, 0x1fb1, 508, # ᾰ-ᾱ Ᾰ-Ᾱ - 0x1fd0, 0x1fd1, 508, # ῐ-ῑ Ῐ-Ῑ - 0x1fe0, 0x1fe1, 508, # ῠ-ῡ Ῠ-Ῡ - 0x2170, 0x217f, 484, # ⅰ-ⅿ Ⅰ-Ⅿ - 0x24d0, 0x24e9, 474, # ⓐ-ⓩ Ⓐ-Ⓩ - 0xff41, 0xff5a, 468] # a-z A-Z - - toupperSinglets = [ - 0x00ff, 621, # ÿ Ÿ - 0x0101, 499, # ā Ā - 0x0103, 499, # ă Ă - 0x0105, 499, # ą Ą - 0x0107, 499, # ć Ć - 0x0109, 499, # ĉ Ĉ - 0x010b, 499, # ċ Ċ - 0x010d, 499, # č Č - 0x010f, 499, # ď Ď - 0x0111, 499, # đ Đ - 0x0113, 499, # ē Ē - 0x0115, 499, # ĕ Ĕ - 0x0117, 499, # ė Ė - 0x0119, 499, # ę Ę - 0x011b, 499, # ě Ě - 0x011d, 499, # ĝ Ĝ - 0x011f, 499, # ğ Ğ - 0x0121, 499, # ġ Ġ - 0x0123, 499, # ģ Ģ - 0x0125, 499, # ĥ Ĥ - 0x0127, 499, # ħ Ħ - 0x0129, 499, # ĩ Ĩ - 0x012b, 499, # ī Ī - 0x012d, 499, # ĭ Ĭ - 0x012f, 499, # į Į - 0x0131, 268, # ı I - 0x0133, 499, # ij IJ - 0x0135, 499, # ĵ Ĵ - 0x0137, 499, # ķ Ķ - 0x013a, 499, # ĺ Ĺ - 0x013c, 499, # ļ Ļ - 0x013e, 499, # ľ Ľ - 0x0140, 499, # ŀ Ŀ - 0x0142, 499, # ł Ł - 0x0144, 499, # ń Ń - 0x0146, 499, # ņ Ņ - 0x0148, 499, # ň Ň - 0x014b, 499, # ŋ Ŋ - 0x014d, 499, # ō Ō - 0x014f, 499, # ŏ Ŏ - 0x0151, 499, # ő Ő - 0x0153, 499, # œ Œ - 0x0155, 499, # ŕ Ŕ - 0x0157, 499, # ŗ Ŗ - 0x0159, 499, # ř Ř - 0x015b, 499, # ś Ś - 0x015d, 499, # ŝ Ŝ - 0x015f, 499, # ş Ş - 0x0161, 499, # š Š - 0x0163, 499, # ţ Ţ - 0x0165, 499, # ť Ť - 0x0167, 499, # ŧ Ŧ - 0x0169, 499, # ũ Ũ - 0x016b, 499, # ū Ū - 0x016d, 499, # ŭ Ŭ - 0x016f, 499, # ů Ů - 0x0171, 499, # ű Ű - 0x0173, 499, # ų Ų - 0x0175, 499, # ŵ Ŵ - 0x0177, 499, # ŷ Ŷ - 0x017a, 499, # ź Ź - 0x017c, 499, # ż Ż - 0x017e, 499, # ž Ž - 0x017f, 200, # ſ S - 0x0183, 499, # ƃ Ƃ - 0x0185, 499, # ƅ Ƅ - 0x0188, 499, # ƈ Ƈ - 0x018c, 499, # ƌ Ƌ - 0x0192, 499, # ƒ Ƒ - 0x0199, 499, # ƙ Ƙ - 0x01a1, 499, # ơ Ơ - 0x01a3, 499, # ƣ Ƣ - 0x01a5, 499, # ƥ Ƥ - 0x01a8, 499, # ƨ Ƨ - 0x01ad, 499, # ƭ Ƭ - 0x01b0, 499, # ư Ư - 0x01b4, 499, # ƴ Ƴ - 0x01b6, 499, # ƶ Ƶ - 0x01b9, 499, # ƹ Ƹ - 0x01bd, 499, # ƽ Ƽ - 0x01c5, 499, # Dž DŽ - 0x01c6, 498, # dž DŽ - 0x01c8, 499, # Lj LJ - 0x01c9, 498, # lj LJ - 0x01cb, 499, # Nj NJ - 0x01cc, 498, # nj NJ - 0x01ce, 499, # ǎ Ǎ - 0x01d0, 499, # ǐ Ǐ - 0x01d2, 499, # ǒ Ǒ - 0x01d4, 499, # ǔ Ǔ - 0x01d6, 499, # ǖ Ǖ - 0x01d8, 499, # ǘ Ǘ - 0x01da, 499, # ǚ Ǚ - 0x01dc, 499, # ǜ Ǜ - 0x01df, 499, # ǟ Ǟ - 0x01e1, 499, # ǡ Ǡ - 0x01e3, 499, # ǣ Ǣ - 0x01e5, 499, # ǥ Ǥ - 0x01e7, 499, # ǧ Ǧ - 0x01e9, 499, # ǩ Ǩ - 0x01eb, 499, # ǫ Ǫ - 0x01ed, 499, # ǭ Ǭ - 0x01ef, 499, # ǯ Ǯ - 0x01f2, 499, # Dz DZ - 0x01f3, 498, # dz DZ - 0x01f5, 499, # ǵ Ǵ - 0x01fb, 499, # ǻ Ǻ - 0x01fd, 499, # ǽ Ǽ - 0x01ff, 499, # ǿ Ǿ - 0x0201, 499, # ȁ Ȁ - 0x0203, 499, # ȃ Ȃ - 0x0205, 499, # ȅ Ȅ - 0x0207, 499, # ȇ Ȇ - 0x0209, 499, # ȉ Ȉ - 0x020b, 499, # ȋ Ȋ - 0x020d, 499, # ȍ Ȍ - 0x020f, 499, # ȏ Ȏ - 0x0211, 499, # ȑ Ȑ - 0x0213, 499, # ȓ Ȓ - 0x0215, 499, # ȕ Ȕ - 0x0217, 499, # ȗ Ȗ - 0x0253, 290, # ɓ Ɓ - 0x0254, 294, # ɔ Ɔ - 0x025b, 297, # ɛ Ɛ - 0x0260, 295, # ɠ Ɠ - 0x0263, 293, # ɣ Ɣ - 0x0268, 291, # ɨ Ɨ - 0x0269, 289, # ɩ Ɩ - 0x026f, 289, # ɯ Ɯ - 0x0272, 287, # ɲ Ɲ - 0x0283, 282, # ʃ Ʃ - 0x0288, 282, # ʈ Ʈ - 0x0292, 281, # ʒ Ʒ - 0x03ac, 462, # ά Ά - 0x03cc, 436, # ό Ό - 0x03d0, 438, # ϐ Β - 0x03d1, 443, # ϑ Θ - 0x03d5, 453, # ϕ Φ - 0x03d6, 446, # ϖ Π - 0x03e3, 499, # ϣ Ϣ - 0x03e5, 499, # ϥ Ϥ - 0x03e7, 499, # ϧ Ϧ - 0x03e9, 499, # ϩ Ϩ - 0x03eb, 499, # ϫ Ϫ - 0x03ed, 499, # ϭ Ϭ - 0x03ef, 499, # ϯ Ϯ - 0x03f0, 414, # ϰ Κ - 0x03f1, 420, # ϱ Ρ - 0x0461, 499, # ѡ Ѡ - 0x0463, 499, # ѣ Ѣ - 0x0465, 499, # ѥ Ѥ - 0x0467, 499, # ѧ Ѧ - 0x0469, 499, # ѩ Ѩ - 0x046b, 499, # ѫ Ѫ - 0x046d, 499, # ѭ Ѭ - 0x046f, 499, # ѯ Ѯ - 0x0471, 499, # ѱ Ѱ - 0x0473, 499, # ѳ Ѳ - 0x0475, 499, # ѵ Ѵ - 0x0477, 499, # ѷ Ѷ - 0x0479, 499, # ѹ Ѹ - 0x047b, 499, # ѻ Ѻ - 0x047d, 499, # ѽ Ѽ - 0x047f, 499, # ѿ Ѿ - 0x0481, 499, # ҁ Ҁ - 0x0491, 499, # ґ Ґ - 0x0493, 499, # ғ Ғ - 0x0495, 499, # ҕ Ҕ - 0x0497, 499, # җ Җ - 0x0499, 499, # ҙ Ҙ - 0x049b, 499, # қ Қ - 0x049d, 499, # ҝ Ҝ - 0x049f, 499, # ҟ Ҟ - 0x04a1, 499, # ҡ Ҡ - 0x04a3, 499, # ң Ң - 0x04a5, 499, # ҥ Ҥ - 0x04a7, 499, # ҧ Ҧ - 0x04a9, 499, # ҩ Ҩ - 0x04ab, 499, # ҫ Ҫ - 0x04ad, 499, # ҭ Ҭ - 0x04af, 499, # ү Ү - 0x04b1, 499, # ұ Ұ - 0x04b3, 499, # ҳ Ҳ - 0x04b5, 499, # ҵ Ҵ - 0x04b7, 499, # ҷ Ҷ - 0x04b9, 499, # ҹ Ҹ - 0x04bb, 499, # һ Һ - 0x04bd, 499, # ҽ Ҽ - 0x04bf, 499, # ҿ Ҿ - 0x04c2, 499, # ӂ Ӂ - 0x04c4, 499, # ӄ Ӄ - 0x04c8, 499, # ӈ Ӈ - 0x04cc, 499, # ӌ Ӌ - 0x04d1, 499, # ӑ Ӑ - 0x04d3, 499, # ӓ Ӓ - 0x04d5, 499, # ӕ Ӕ - 0x04d7, 499, # ӗ Ӗ - 0x04d9, 499, # ә Ә - 0x04db, 499, # ӛ Ӛ - 0x04dd, 499, # ӝ Ӝ - 0x04df, 499, # ӟ Ӟ - 0x04e1, 499, # ӡ Ӡ - 0x04e3, 499, # ӣ Ӣ - 0x04e5, 499, # ӥ Ӥ - 0x04e7, 499, # ӧ Ӧ - 0x04e9, 499, # ө Ө - 0x04eb, 499, # ӫ Ӫ - 0x04ef, 499, # ӯ Ӯ - 0x04f1, 499, # ӱ Ӱ - 0x04f3, 499, # ӳ Ӳ - 0x04f5, 499, # ӵ Ӵ - 0x04f9, 499, # ӹ Ӹ - 0x1e01, 499, # ḁ Ḁ - 0x1e03, 499, # ḃ Ḃ - 0x1e05, 499, # ḅ Ḅ - 0x1e07, 499, # ḇ Ḇ - 0x1e09, 499, # ḉ Ḉ - 0x1e0b, 499, # ḋ Ḋ - 0x1e0d, 499, # ḍ Ḍ - 0x1e0f, 499, # ḏ Ḏ - 0x1e11, 499, # ḑ Ḑ - 0x1e13, 499, # ḓ Ḓ - 0x1e15, 499, # ḕ Ḕ - 0x1e17, 499, # ḗ Ḗ - 0x1e19, 499, # ḙ Ḙ - 0x1e1b, 499, # ḛ Ḛ - 0x1e1d, 499, # ḝ Ḝ - 0x1e1f, 499, # ḟ Ḟ - 0x1e21, 499, # ḡ Ḡ - 0x1e23, 499, # ḣ Ḣ - 0x1e25, 499, # ḥ Ḥ - 0x1e27, 499, # ḧ Ḧ - 0x1e29, 499, # ḩ Ḩ - 0x1e2b, 499, # ḫ Ḫ - 0x1e2d, 499, # ḭ Ḭ - 0x1e2f, 499, # ḯ Ḯ - 0x1e31, 499, # ḱ Ḱ - 0x1e33, 499, # ḳ Ḳ - 0x1e35, 499, # ḵ Ḵ - 0x1e37, 499, # ḷ Ḷ - 0x1e39, 499, # ḹ Ḹ - 0x1e3b, 499, # ḻ Ḻ - 0x1e3d, 499, # ḽ Ḽ - 0x1e3f, 499, # ḿ Ḿ - 0x1e41, 499, # ṁ Ṁ - 0x1e43, 499, # ṃ Ṃ - 0x1e45, 499, # ṅ Ṅ - 0x1e47, 499, # ṇ Ṇ - 0x1e49, 499, # ṉ Ṉ - 0x1e4b, 499, # ṋ Ṋ - 0x1e4d, 499, # ṍ Ṍ - 0x1e4f, 499, # ṏ Ṏ - 0x1e51, 499, # ṑ Ṑ - 0x1e53, 499, # ṓ Ṓ - 0x1e55, 499, # ṕ Ṕ - 0x1e57, 499, # ṗ Ṗ - 0x1e59, 499, # ṙ Ṙ - 0x1e5b, 499, # ṛ Ṛ - 0x1e5d, 499, # ṝ Ṝ - 0x1e5f, 499, # ṟ Ṟ - 0x1e61, 499, # ṡ Ṡ - 0x1e63, 499, # ṣ Ṣ - 0x1e65, 499, # ṥ Ṥ - 0x1e67, 499, # ṧ Ṧ - 0x1e69, 499, # ṩ Ṩ - 0x1e6b, 499, # ṫ Ṫ - 0x1e6d, 499, # ṭ Ṭ - 0x1e6f, 499, # ṯ Ṯ - 0x1e71, 499, # ṱ Ṱ - 0x1e73, 499, # ṳ Ṳ - 0x1e75, 499, # ṵ Ṵ - 0x1e77, 499, # ṷ Ṷ - 0x1e79, 499, # ṹ Ṹ - 0x1e7b, 499, # ṻ Ṻ - 0x1e7d, 499, # ṽ Ṽ - 0x1e7f, 499, # ṿ Ṿ - 0x1e81, 499, # ẁ Ẁ - 0x1e83, 499, # ẃ Ẃ - 0x1e85, 499, # ẅ Ẅ - 0x1e87, 499, # ẇ Ẇ - 0x1e89, 499, # ẉ Ẉ - 0x1e8b, 499, # ẋ Ẋ - 0x1e8d, 499, # ẍ Ẍ - 0x1e8f, 499, # ẏ Ẏ - 0x1e91, 499, # ẑ Ẑ - 0x1e93, 499, # ẓ Ẓ - 0x1e95, 499, # ẕ Ẕ - 0x1ea1, 499, # ạ Ạ - 0x1ea3, 499, # ả Ả - 0x1ea5, 499, # ấ Ấ - 0x1ea7, 499, # ầ Ầ - 0x1ea9, 499, # ẩ Ẩ - 0x1eab, 499, # ẫ Ẫ - 0x1ead, 499, # ậ Ậ - 0x1eaf, 499, # ắ Ắ - 0x1eb1, 499, # ằ Ằ - 0x1eb3, 499, # ẳ Ẳ - 0x1eb5, 499, # ẵ Ẵ - 0x1eb7, 499, # ặ Ặ - 0x1eb9, 499, # ẹ Ẹ - 0x1ebb, 499, # ẻ Ẻ - 0x1ebd, 499, # ẽ Ẽ - 0x1ebf, 499, # ế Ế - 0x1ec1, 499, # ề Ề - 0x1ec3, 499, # ể Ể - 0x1ec5, 499, # ễ Ễ - 0x1ec7, 499, # ệ Ệ - 0x1ec9, 499, # ỉ Ỉ - 0x1ecb, 499, # ị Ị - 0x1ecd, 499, # ọ Ọ - 0x1ecf, 499, # ỏ Ỏ - 0x1ed1, 499, # ố Ố - 0x1ed3, 499, # ồ Ồ - 0x1ed5, 499, # ổ Ổ - 0x1ed7, 499, # ỗ Ỗ - 0x1ed9, 499, # ộ Ộ - 0x1edb, 499, # ớ Ớ - 0x1edd, 499, # ờ Ờ - 0x1edf, 499, # ở Ở - 0x1ee1, 499, # ỡ Ỡ - 0x1ee3, 499, # ợ Ợ - 0x1ee5, 499, # ụ Ụ - 0x1ee7, 499, # ủ Ủ - 0x1ee9, 499, # ứ Ứ - 0x1eeb, 499, # ừ Ừ - 0x1eed, 499, # ử Ử - 0x1eef, 499, # ữ Ữ - 0x1ef1, 499, # ự Ự - 0x1ef3, 499, # ỳ Ỳ - 0x1ef5, 499, # ỵ Ỵ - 0x1ef7, 499, # ỷ Ỷ - 0x1ef9, 499, # ỹ Ỹ - 0x1f51, 508, # ὑ Ὑ - 0x1f53, 508, # ὓ Ὓ - 0x1f55, 508, # ὕ Ὕ - 0x1f57, 508, # ὗ Ὗ - 0x1fb3, 509, # ᾳ ᾼ - 0x1fc3, 509, # ῃ ῌ - 0x1fe5, 507, # ῥ Ῥ - 0x1ff3, 509] # ῳ ῼ - - tolowerRanges = [ - 0x0041, 0x005a, 532, # A-Z a-z - 0x00c0, 0x00d6, 532, # À-Ö à-ö - 0x00d8, 0x00de, 532, # Ø-Þ ø-þ - 0x0189, 0x018a, 705, # Ɖ-Ɗ ɖ-ɗ - 0x018e, 0x018f, 702, # Ǝ-Ə ɘ-ə - 0x01b1, 0x01b2, 717, # Ʊ-Ʋ ʊ-ʋ - 0x0388, 0x038a, 537, # Έ-Ί έ-ί - 0x038e, 0x038f, 563, # Ύ-Ώ ύ-ώ - 0x0391, 0x03a1, 532, # Α-Ρ α-ρ - 0x03a3, 0x03ab, 532, # Σ-Ϋ σ-ϋ - 0x0401, 0x040c, 580, # Ё-Ќ ё-ќ - 0x040e, 0x040f, 580, # Ў-Џ ў-џ - 0x0410, 0x042f, 532, # А-Я а-я - 0x0531, 0x0556, 548, # Ա-Ֆ ա-ֆ - 0x10a0, 0x10c5, 548, # Ⴀ-Ⴥ ა-ჵ - 0x1f08, 0x1f0f, 492, # Ἀ-Ἇ ἀ-ἇ - 0x1f18, 0x1f1d, 492, # Ἐ-Ἕ ἐ-ἕ - 0x1f28, 0x1f2f, 492, # Ἠ-Ἧ ἠ-ἧ - 0x1f38, 0x1f3f, 492, # Ἰ-Ἷ ἰ-ἷ - 0x1f48, 0x1f4d, 492, # Ὀ-Ὅ ὀ-ὅ - 0x1f68, 0x1f6f, 492, # Ὠ-Ὧ ὠ-ὧ - 0x1f88, 0x1f8f, 492, # ᾈ-ᾏ ᾀ-ᾇ - 0x1f98, 0x1f9f, 492, # ᾘ-ᾟ ᾐ-ᾗ - 0x1fa8, 0x1faf, 492, # ᾨ-ᾯ ᾠ-ᾧ - 0x1fb8, 0x1fb9, 492, # Ᾰ-Ᾱ ᾰ-ᾱ - 0x1fba, 0x1fbb, 426, # Ὰ-Ά ὰ-ά - 0x1fc8, 0x1fcb, 414, # Ὲ-Ή ὲ-ή - 0x1fd8, 0x1fd9, 492, # Ῐ-Ῑ ῐ-ῑ - 0x1fda, 0x1fdb, 400, # Ὶ-Ί ὶ-ί - 0x1fe8, 0x1fe9, 492, # Ῠ-Ῡ ῠ-ῡ - 0x1fea, 0x1feb, 388, # Ὺ-Ύ ὺ-ύ - 0x1ff8, 0x1ff9, 372, # Ὸ-Ό ὸ-ό - 0x1ffa, 0x1ffb, 374, # Ὼ-Ώ ὼ-ώ - 0x2160, 0x216f, 516, # Ⅰ-Ⅿ ⅰ-ⅿ - 0x24b6, 0x24cf, 526, # Ⓐ-Ⓩ ⓐ-ⓩ - 0xff21, 0xff3a, 532] # A-Z a-z - - tolowerSinglets = [ - 0x0100, 501, # Ā ā - 0x0102, 501, # Ă ă - 0x0104, 501, # Ą ą - 0x0106, 501, # Ć ć - 0x0108, 501, # Ĉ ĉ - 0x010a, 501, # Ċ ċ - 0x010c, 501, # Č č - 0x010e, 501, # Ď ď - 0x0110, 501, # Đ đ - 0x0112, 501, # Ē ē - 0x0114, 501, # Ĕ ĕ - 0x0116, 501, # Ė ė - 0x0118, 501, # Ę ę - 0x011a, 501, # Ě ě - 0x011c, 501, # Ĝ ĝ - 0x011e, 501, # Ğ ğ - 0x0120, 501, # Ġ ġ - 0x0122, 501, # Ģ ģ - 0x0124, 501, # Ĥ ĥ - 0x0126, 501, # Ħ ħ - 0x0128, 501, # Ĩ ĩ - 0x012a, 501, # Ī ī - 0x012c, 501, # Ĭ ĭ - 0x012e, 501, # Į į - 0x0130, 301, # İ i - 0x0132, 501, # IJ ij - 0x0134, 501, # Ĵ ĵ - 0x0136, 501, # Ķ ķ - 0x0139, 501, # Ĺ ĺ - 0x013b, 501, # Ļ ļ - 0x013d, 501, # Ľ ľ - 0x013f, 501, # Ŀ ŀ - 0x0141, 501, # Ł ł - 0x0143, 501, # Ń ń - 0x0145, 501, # Ņ ņ - 0x0147, 501, # Ň ň - 0x014a, 501, # Ŋ ŋ - 0x014c, 501, # Ō ō - 0x014e, 501, # Ŏ ŏ - 0x0150, 501, # Ő ő - 0x0152, 501, # Œ œ - 0x0154, 501, # Ŕ ŕ - 0x0156, 501, # Ŗ ŗ - 0x0158, 501, # Ř ř - 0x015a, 501, # Ś ś - 0x015c, 501, # Ŝ ŝ - 0x015e, 501, # Ş ş - 0x0160, 501, # Š š - 0x0162, 501, # Ţ ţ - 0x0164, 501, # Ť ť - 0x0166, 501, # Ŧ ŧ - 0x0168, 501, # Ũ ũ - 0x016a, 501, # Ū ū - 0x016c, 501, # Ŭ ŭ - 0x016e, 501, # Ů ů - 0x0170, 501, # Ű ű - 0x0172, 501, # Ų ų - 0x0174, 501, # Ŵ ŵ - 0x0176, 501, # Ŷ ŷ - 0x0178, 379, # Ÿ ÿ - 0x0179, 501, # Ź ź - 0x017b, 501, # Ż ż - 0x017d, 501, # Ž ž - 0x0181, 710, # Ɓ ɓ - 0x0182, 501, # Ƃ ƃ - 0x0184, 501, # Ƅ ƅ - 0x0186, 706, # Ɔ ɔ - 0x0187, 501, # Ƈ ƈ - 0x018b, 501, # Ƌ ƌ - 0x0190, 703, # Ɛ ɛ - 0x0191, 501, # Ƒ ƒ - 0x0193, 705, # Ɠ ɠ - 0x0194, 707, # Ɣ ɣ - 0x0196, 711, # Ɩ ɩ - 0x0197, 709, # Ɨ ɨ - 0x0198, 501, # Ƙ ƙ - 0x019c, 711, # Ɯ ɯ - 0x019d, 713, # Ɲ ɲ - 0x01a0, 501, # Ơ ơ - 0x01a2, 501, # Ƣ ƣ - 0x01a4, 501, # Ƥ ƥ - 0x01a7, 501, # Ƨ ƨ - 0x01a9, 718, # Ʃ ʃ - 0x01ac, 501, # Ƭ ƭ - 0x01ae, 718, # Ʈ ʈ - 0x01af, 501, # Ư ư - 0x01b3, 501, # Ƴ ƴ - 0x01b5, 501, # Ƶ ƶ - 0x01b7, 719, # Ʒ ʒ - 0x01b8, 501, # Ƹ ƹ - 0x01bc, 501, # Ƽ ƽ - 0x01c4, 502, # DŽ dž - 0x01c5, 501, # Dž dž - 0x01c7, 502, # LJ lj - 0x01c8, 501, # Lj lj - 0x01ca, 502, # NJ nj - 0x01cb, 501, # Nj nj - 0x01cd, 501, # Ǎ ǎ - 0x01cf, 501, # Ǐ ǐ - 0x01d1, 501, # Ǒ ǒ - 0x01d3, 501, # Ǔ ǔ - 0x01d5, 501, # Ǖ ǖ - 0x01d7, 501, # Ǘ ǘ - 0x01d9, 501, # Ǚ ǚ - 0x01db, 501, # Ǜ ǜ - 0x01de, 501, # Ǟ ǟ - 0x01e0, 501, # Ǡ ǡ - 0x01e2, 501, # Ǣ ǣ - 0x01e4, 501, # Ǥ ǥ - 0x01e6, 501, # Ǧ ǧ - 0x01e8, 501, # Ǩ ǩ - 0x01ea, 501, # Ǫ ǫ - 0x01ec, 501, # Ǭ ǭ - 0x01ee, 501, # Ǯ ǯ - 0x01f1, 502, # DZ dz - 0x01f2, 501, # Dz dz - 0x01f4, 501, # Ǵ ǵ - 0x01fa, 501, # Ǻ ǻ - 0x01fc, 501, # Ǽ ǽ - 0x01fe, 501, # Ǿ ǿ - 0x0200, 501, # Ȁ ȁ - 0x0202, 501, # Ȃ ȃ - 0x0204, 501, # Ȅ ȅ - 0x0206, 501, # Ȇ ȇ - 0x0208, 501, # Ȉ ȉ - 0x020a, 501, # Ȋ ȋ - 0x020c, 501, # Ȍ ȍ - 0x020e, 501, # Ȏ ȏ - 0x0210, 501, # Ȑ ȑ - 0x0212, 501, # Ȓ ȓ - 0x0214, 501, # Ȕ ȕ - 0x0216, 501, # Ȗ ȗ - 0x0386, 538, # Ά ά - 0x038c, 564, # Ό ό - 0x03e2, 501, # Ϣ ϣ - 0x03e4, 501, # Ϥ ϥ - 0x03e6, 501, # Ϧ ϧ - 0x03e8, 501, # Ϩ ϩ - 0x03ea, 501, # Ϫ ϫ - 0x03ec, 501, # Ϭ ϭ - 0x03ee, 501, # Ϯ ϯ - 0x0460, 501, # Ѡ ѡ - 0x0462, 501, # Ѣ ѣ - 0x0464, 501, # Ѥ ѥ - 0x0466, 501, # Ѧ ѧ - 0x0468, 501, # Ѩ ѩ - 0x046a, 501, # Ѫ ѫ - 0x046c, 501, # Ѭ ѭ - 0x046e, 501, # Ѯ ѯ - 0x0470, 501, # Ѱ ѱ - 0x0472, 501, # Ѳ ѳ - 0x0474, 501, # Ѵ ѵ - 0x0476, 501, # Ѷ ѷ - 0x0478, 501, # Ѹ ѹ - 0x047a, 501, # Ѻ ѻ - 0x047c, 501, # Ѽ ѽ - 0x047e, 501, # Ѿ ѿ - 0x0480, 501, # Ҁ ҁ - 0x0490, 501, # Ґ ґ - 0x0492, 501, # Ғ ғ - 0x0494, 501, # Ҕ ҕ - 0x0496, 501, # Җ җ - 0x0498, 501, # Ҙ ҙ - 0x049a, 501, # Қ қ - 0x049c, 501, # Ҝ ҝ - 0x049e, 501, # Ҟ ҟ - 0x04a0, 501, # Ҡ ҡ - 0x04a2, 501, # Ң ң - 0x04a4, 501, # Ҥ ҥ - 0x04a6, 501, # Ҧ ҧ - 0x04a8, 501, # Ҩ ҩ - 0x04aa, 501, # Ҫ ҫ - 0x04ac, 501, # Ҭ ҭ - 0x04ae, 501, # Ү ү - 0x04b0, 501, # Ұ ұ - 0x04b2, 501, # Ҳ ҳ - 0x04b4, 501, # Ҵ ҵ - 0x04b6, 501, # Ҷ ҷ - 0x04b8, 501, # Ҹ ҹ - 0x04ba, 501, # Һ һ - 0x04bc, 501, # Ҽ ҽ - 0x04be, 501, # Ҿ ҿ - 0x04c1, 501, # Ӂ ӂ - 0x04c3, 501, # Ӄ ӄ - 0x04c7, 501, # Ӈ ӈ - 0x04cb, 501, # Ӌ ӌ - 0x04d0, 501, # Ӑ ӑ - 0x04d2, 501, # Ӓ ӓ - 0x04d4, 501, # Ӕ ӕ - 0x04d6, 501, # Ӗ ӗ - 0x04d8, 501, # Ә ә - 0x04da, 501, # Ӛ ӛ - 0x04dc, 501, # Ӝ ӝ - 0x04de, 501, # Ӟ ӟ - 0x04e0, 501, # Ӡ ӡ - 0x04e2, 501, # Ӣ ӣ - 0x04e4, 501, # Ӥ ӥ - 0x04e6, 501, # Ӧ ӧ - 0x04e8, 501, # Ө ө - 0x04ea, 501, # Ӫ ӫ - 0x04ee, 501, # Ӯ ӯ - 0x04f0, 501, # Ӱ ӱ - 0x04f2, 501, # Ӳ ӳ - 0x04f4, 501, # Ӵ ӵ - 0x04f8, 501, # Ӹ ӹ - 0x1e00, 501, # Ḁ ḁ - 0x1e02, 501, # Ḃ ḃ - 0x1e04, 501, # Ḅ ḅ - 0x1e06, 501, # Ḇ ḇ - 0x1e08, 501, # Ḉ ḉ - 0x1e0a, 501, # Ḋ ḋ - 0x1e0c, 501, # Ḍ ḍ - 0x1e0e, 501, # Ḏ ḏ - 0x1e10, 501, # Ḑ ḑ - 0x1e12, 501, # Ḓ ḓ - 0x1e14, 501, # Ḕ ḕ - 0x1e16, 501, # Ḗ ḗ - 0x1e18, 501, # Ḙ ḙ - 0x1e1a, 501, # Ḛ ḛ - 0x1e1c, 501, # Ḝ ḝ - 0x1e1e, 501, # Ḟ ḟ - 0x1e20, 501, # Ḡ ḡ - 0x1e22, 501, # Ḣ ḣ - 0x1e24, 501, # Ḥ ḥ - 0x1e26, 501, # Ḧ ḧ - 0x1e28, 501, # Ḩ ḩ - 0x1e2a, 501, # Ḫ ḫ - 0x1e2c, 501, # Ḭ ḭ - 0x1e2e, 501, # Ḯ ḯ - 0x1e30, 501, # Ḱ ḱ - 0x1e32, 501, # Ḳ ḳ - 0x1e34, 501, # Ḵ ḵ - 0x1e36, 501, # Ḷ ḷ - 0x1e38, 501, # Ḹ ḹ - 0x1e3a, 501, # Ḻ ḻ - 0x1e3c, 501, # Ḽ ḽ - 0x1e3e, 501, # Ḿ ḿ - 0x1e40, 501, # Ṁ ṁ - 0x1e42, 501, # Ṃ ṃ - 0x1e44, 501, # Ṅ ṅ - 0x1e46, 501, # Ṇ ṇ - 0x1e48, 501, # Ṉ ṉ - 0x1e4a, 501, # Ṋ ṋ - 0x1e4c, 501, # Ṍ ṍ - 0x1e4e, 501, # Ṏ ṏ - 0x1e50, 501, # Ṑ ṑ - 0x1e52, 501, # Ṓ ṓ - 0x1e54, 501, # Ṕ ṕ - 0x1e56, 501, # Ṗ ṗ - 0x1e58, 501, # Ṙ ṙ - 0x1e5a, 501, # Ṛ ṛ - 0x1e5c, 501, # Ṝ ṝ - 0x1e5e, 501, # Ṟ ṟ - 0x1e60, 501, # Ṡ ṡ - 0x1e62, 501, # Ṣ ṣ - 0x1e64, 501, # Ṥ ṥ - 0x1e66, 501, # Ṧ ṧ - 0x1e68, 501, # Ṩ ṩ - 0x1e6a, 501, # Ṫ ṫ - 0x1e6c, 501, # Ṭ ṭ - 0x1e6e, 501, # Ṯ ṯ - 0x1e70, 501, # Ṱ ṱ - 0x1e72, 501, # Ṳ ṳ - 0x1e74, 501, # Ṵ ṵ - 0x1e76, 501, # Ṷ ṷ - 0x1e78, 501, # Ṹ ṹ - 0x1e7a, 501, # Ṻ ṻ - 0x1e7c, 501, # Ṽ ṽ - 0x1e7e, 501, # Ṿ ṿ - 0x1e80, 501, # Ẁ ẁ - 0x1e82, 501, # Ẃ ẃ - 0x1e84, 501, # Ẅ ẅ - 0x1e86, 501, # Ẇ ẇ - 0x1e88, 501, # Ẉ ẉ - 0x1e8a, 501, # Ẋ ẋ - 0x1e8c, 501, # Ẍ ẍ - 0x1e8e, 501, # Ẏ ẏ - 0x1e90, 501, # Ẑ ẑ - 0x1e92, 501, # Ẓ ẓ - 0x1e94, 501, # Ẕ ẕ - 0x1ea0, 501, # Ạ ạ - 0x1ea2, 501, # Ả ả - 0x1ea4, 501, # Ấ ấ - 0x1ea6, 501, # Ầ ầ - 0x1ea8, 501, # Ẩ ẩ - 0x1eaa, 501, # Ẫ ẫ - 0x1eac, 501, # Ậ ậ - 0x1eae, 501, # Ắ ắ - 0x1eb0, 501, # Ằ ằ - 0x1eb2, 501, # Ẳ ẳ - 0x1eb4, 501, # Ẵ ẵ - 0x1eb6, 501, # Ặ ặ - 0x1eb8, 501, # Ẹ ẹ - 0x1eba, 501, # Ẻ ẻ - 0x1ebc, 501, # Ẽ ẽ - 0x1ebe, 501, # Ế ế - 0x1ec0, 501, # Ề ề - 0x1ec2, 501, # Ể ể - 0x1ec4, 501, # Ễ ễ - 0x1ec6, 501, # Ệ ệ - 0x1ec8, 501, # Ỉ ỉ - 0x1eca, 501, # Ị ị - 0x1ecc, 501, # Ọ ọ - 0x1ece, 501, # Ỏ ỏ - 0x1ed0, 501, # Ố ố - 0x1ed2, 501, # Ồ ồ - 0x1ed4, 501, # Ổ ổ - 0x1ed6, 501, # Ỗ ỗ - 0x1ed8, 501, # Ộ ộ - 0x1eda, 501, # Ớ ớ - 0x1edc, 501, # Ờ ờ - 0x1ede, 501, # Ở ở - 0x1ee0, 501, # Ỡ ỡ - 0x1ee2, 501, # Ợ ợ - 0x1ee4, 501, # Ụ ụ - 0x1ee6, 501, # Ủ ủ - 0x1ee8, 501, # Ứ ứ - 0x1eea, 501, # Ừ ừ - 0x1eec, 501, # Ử ử - 0x1eee, 501, # Ữ ữ - 0x1ef0, 501, # Ự ự - 0x1ef2, 501, # Ỳ ỳ - 0x1ef4, 501, # Ỵ ỵ - 0x1ef6, 501, # Ỷ ỷ - 0x1ef8, 501, # Ỹ ỹ - 0x1f59, 492, # Ὑ ὑ - 0x1f5b, 492, # Ὓ ὓ - 0x1f5d, 492, # Ὕ ὕ - 0x1f5f, 492, # Ὗ ὗ - 0x1fbc, 491, # ᾼ ᾳ - 0x1fcc, 491, # ῌ ῃ - 0x1fec, 493, # Ῥ ῥ - 0x1ffc, 491] # ῼ ῳ - - toTitleSinglets = [ - 0x01c4, 501, # DŽ Dž - 0x01c6, 499, # dž Dž - 0x01c7, 501, # LJ Lj - 0x01c9, 499, # lj Lj - 0x01ca, 501, # NJ Nj - 0x01cc, 499, # nj Nj - 0x01f1, 501, # DZ Dz - 0x01f3, 499] # dz Dz - -proc binarySearch(c: irune, tab: openArray[iRune], len, stride: int): int = - var n = len - var t = 0 - while n > 1: - var m = n div 2 - var p = t + m*stride - if c >= tab[p]: - t = p - n = n-m - else: - n = m - if n != 0 and c >= tab[t]: - return t - return -1 - -proc toLower*(c: TRune): TRune = - ## Converts `c` into lower case. This works for any Unicode character. - ## If possible, prefer `toLower` over `toUpper`. - var c = irune(c) - var p = binarySearch(c, tolowerRanges, len(toLowerRanges) div 3, 3) - if p >= 0 and c >= tolowerRanges[p] and c <= tolowerRanges[p+1]: - return TRune(c + tolowerRanges[p+2] - 500) - p = binarySearch(c, toLowerSinglets, len(toLowerSinglets) div 2, 2) - if p >= 0 and c == toLowerSinglets[p]: - return TRune(c + toLowerSinglets[p+1] - 500) - return TRune(c) - -proc toUpper*(c: TRune): TRune = - ## Converts `c` into upper case. This works for any Unicode character. - ## If possible, prefer `toLower` over `toUpper`. - var c = irune(c) - var p = binarySearch(c, toUpperRanges, len(toUpperRanges) div 3, 3) - if p >= 0 and c >= toUpperRanges[p] and c <= toUpperRanges[p+1]: - return TRune(c + toUpperRanges[p+2] - 500) - p = binarySearch(c, toUpperSinglets, len(toUpperSinglets) div 2, 2) - if p >= 0 and c == toUpperSinglets[p]: - return TRune(c + toUpperSinglets[p+1] - 500) - return TRune(c) - -proc toTitle*(c: TRune): TRune = - var c = irune(c) - var p = binarySearch(c, toTitleSinglets, len(toTitleSinglets) div 2, 2) - if p >= 0 and c == toTitleSinglets[p]: - return TRune(c + toTitleSinglets[p+1] - 500) - return TRune(c) - -proc isLower*(c: TRune): bool = - ## returns true iff `c` is a lower case Unicode character - ## If possible, prefer `isLower` over `isUpper`. - var c = irune(c) - # Note: toUpperRanges is correct here! - var p = binarySearch(c, toUpperRanges, len(toUpperRanges) div 3, 3) - if p >= 0 and c >= toUpperRanges[p] and c <= toUpperRanges[p+1]: - return true - p = binarySearch(c, toUpperSinglets, len(toUpperSinglets) div 2, 2) - if p >= 0 and c == toUpperSinglets[p]: - return true - -proc isUpper*(c: TRune): bool = - ## returns true iff `c` is a upper case Unicode character - ## If possible, prefer `isLower` over `isUpper`. - var c = irune(c) - # Note: toLowerRanges is correct here! - var p = binarySearch(c, toLowerRanges, len(toLowerRanges) div 3, 3) - if p >= 0 and c >= toLowerRanges[p] and c <= toLowerRanges[p+1]: - return true - p = binarySearch(c, toLowerSinglets, len(toLowerSinglets) div 2, 2) - if p >= 0 and c == toLowerSinglets[p]: - return true - -proc isAlpha*(c: TRune): bool = - ## returns true iff `c` is an *alpha* Unicode character (i.e. a letter) - if isUpper(c) or isLower(c): - return true - var c = irune(c) - var p = binarySearch(c, alphaRanges, len(alphaRanges) div 2, 2) - if p >= 0 and c >= alphaRanges[p] and c <= alphaRanges[p+1]: - return true - p = binarySearch(c, alphaSinglets, len(alphaSinglets), 1) - if p >= 0 and c == alphaSinglets[p]: - return true - -proc isTitle*(c: TRune): bool = - return isUpper(c) and isLower(c) - -proc isWhiteSpace*(c: TRune): bool = - ## returns true iff `c` is a Unicode whitespace character - var c = irune(c) - var p = binarySearch(c, spaceRanges, len(spaceRanges) div 2, 2) - if p >= 0 and c >= spaceRanges[p] and c <= spaceRanges[p+1]: - return true - -iterator runes*(s: string): TRune = - ## iterates over any unicode character of the string `s`. - var - i = 0 - result: TRune - while i < len(s): - fastRuneAt(s, i, result, true) - yield result - -proc cmpRunesIgnoreCase*(a, b: string): int = - ## compares two UTF8 strings and ignores the case. Returns: - ## - ## | 0 iff a == b - ## | < 0 iff a < b - ## | > 0 iff a > b - var i = 0 - var j = 0 - var ar, br: TRune - while i < a.len and j < b.len: - # slow path: - fastRuneAt(a, i, ar) - fastRuneAt(b, j, br) - result = irune(toLower(ar)) - irune(toLower(br)) - if result != 0: return - result = a.len - b.len - diff --git a/nimlib/pure/variants.nim b/nimlib/pure/variants.nim deleted file mode 100755 index 40679c779..000000000 --- a/nimlib/pure/variants.nim +++ /dev/null @@ -1,181 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2009 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module implements Nimrod's support for the ``variant`` datatype. -## `TVariant` shows how the flexibility of dynamic typing is achieved -## within a static type system. - -type - TVarType* = enum - vtNone, - vtBool, - vtChar, - vtEnum, - vtInt, - vtFloat, - vtString, - vtSet, - vtSeq, - vtDict - TVariant* {.final.} = object of TObject - case vtype: TVarType - of vtNone: nil - of vtBool, vtChar, vtEnum, vtInt: vint: int64 - of vtFloat: vfloat: float64 - of vtString: vstring: string - of vtSet, vtSeq: q: seq[TVariant] - of vtDict: d: seq[tuple[key, val: TVariant]] - -iterator objectFields*[T](x: T, skipInherited: bool): tuple[ - key: string, val: TVariant] {.magic: "ObjectFields"} - -proc `<>`*(x: ordinal): TVariant = - result.kind = vtEnum - result.vint = x - -proc `<>`*(x: biggestInt): TVariant = - result.kind = vtInt - result.vint = x - -proc `<>`*(x: char): TVariant = - result.kind = vtChar - result.vint = ord(x) - -proc `<>`*(x: bool): TVariant = - result.kind = vtBool - result.vint = ord(x) - -proc `<>`*(x: biggestFloat): TVariant = - result.kind = vtFloat - result.vfloat = x - -proc `<>`*(x: string): TVariant = - result.kind = vtString - result.vstring = x - -proc `<>`*[T](x: openArray[T]): TVariant = - result.kind = vtSeq - newSeq(result.q, x.len) - for i in 0..x.len-1: result.q[i] = <>x[i] - -proc `<>`*[T](x: set[T]): TVariant = - result.kind = vtSet - result.q = @[] - for a in items(x): result.q.add(<>a) - -proc `<>`* [T: object](x: T): TVariant {.magic: "ToVariant".} - ## this converts a value to a variant ("boxing") - -proc `><`*[T](v: TVariant, typ: T): T {.magic: "FromVariant".} - -[<>5, <>67, <>"hello"] -myVar><int - - -proc `==`* (x, y: TVariant): bool = - if x.vtype == y.vtype: - case x.vtype - of vtNone: result = true - of vtBool, vtChar, vtEnum, vtInt: result = x.vint == y.vint - of vtFloat: result = x.vfloat == y.vfloat - of vtString: result = x.vstring == y.vstring - of vtSet: - # complicated! We check that each a in x also occurs in y and that the - # counts are identical: - if x.q.len == y.q.len: - for a in items(x.q): - block inner: - for b in items(y.q): - if a == b: break inner - return false - result = true - of vtSeq: - if x.q.len == y.q.len: - for i in 0..x.q.len-1: - if x.q[i] != y.q[i]: return false - result = true - of vtDict: - # it is an ordered dict: - if x.d.len == y.d.len: - for i in 0..x.d.len-1: - if x.d[i].key != y.d[i].key: return false - if x.d[i].val != y.d[i].val: return false - result = true - -proc `[]`* (a, b: TVariant): TVariant = - case a.vtype - of vtSeq: - if b.vtype in {vtBool, vtChar, vtEnum, vtInt}: - result = a.q[b.vint] - else: - variantError() - of vtDict: - for i in 0..a.d.len-1: - if a.d[i].key == b: return a.d[i].val - if b.vtype in {vtBool, vtChar, vtEnum, vtInt}: - result = a.d[b.vint].val - variantError() - else: variantError() - -proc `[]=`* (a, b, c: TVariant) = - case a.vtype - of vtSeq: - if b.vtype in {vtBool, vtChar, vtEnum, vtInt}: - a.q[b.vint] = b - else: - variantError() - of vtDict: - for i in 0..a.d.len-1: - if a.d[i].key == b: - a.d[i].val = c - return - if b.vtype in {vtBool, vtChar, vtEnum, vtInt}: - a.d[b.vint].val = c - variantError() - else: variantError() - -proc `[]`* (a: TVariant, b: int): TVariant {.inline} = return a[<>b] -proc `[]`* (a: TVariant, b: string): TVariant {.inline} = return a[<>b] -proc `[]=`* (a: TVariant, b: int, c: TVariant) {.inline} = a[<>b] = c -proc `[]=`* (a: TVariant, b: string, c: TVariant) {.inline} = a[<>b] = c - -proc `+`* (x, y: TVariant): TVariant = - case x.vtype - of vtBool, vtChar, vtEnum, vtInt: - if y.vtype == x.vtype: - result.vtype = x.vtype - result.vint = x.vint + y.vint - else: - case y.vtype - of vtBool, vtChar, vtEnum, vtInt: - - - - vint: int64 - of vtFloat: vfloat: float64 - of vtString: vstring: string - of vtSet, vtSeq: q: seq[TVariant] - of vtDict: d: seq[tuple[key, val: TVariant]] - -proc `-`* (x, y: TVariant): TVariant -proc `*`* (x, y: TVariant): TVariant -proc `/`* (x, y: TVariant): TVariant -proc `div`* (x, y: TVariant): TVariant -proc `mod`* (x, y: TVariant): TVariant -proc `&`* (x, y: TVariant): TVariant -proc `$`* (x: TVariant): string = - # uses JS notation - -proc parseVariant*(s: string): TVariant -proc `<`* (x, y: TVariant): bool -proc `<=`* (x, y: TVariant): bool - -proc hash*(x: TVariant): int = - - diff --git a/nimlib/pure/xmlgen.nim b/nimlib/pure/xmlgen.nim deleted file mode 100755 index 79a782252..000000000 --- a/nimlib/pure/xmlgen.nim +++ /dev/null @@ -1,406 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2009 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module implements a simple `XML`:idx: and `HTML`:idx: code -## generator. Each commonly used HTML tag has a corresponding macro -## that generates a string with its HTML representation. -## -## Example: -## -## .. code-block:: nimrod -## var nim = "Nimrod" -## echo h1(a(href="http://force7.de/nimrod", nim)) -## -## Writes the string:: -## -## <h1><a href="http://force7.de/nimrod">Nimrod</a></h1> -## - -import - macros, strutils - -const - coreAttr* = " id class title style " - eventAttr* = " onclick ondblclick onmousedown onmouseup " & - "onmouseover onmousemove onmouseout onkeypress onkeydown onkeyup " - commonAttr* = coreAttr & eventAttr - -proc getIdent(e: PNimrodNode): string {.compileTime.} = - case e.kind - of nnkIdent: result = normalize($e.ident) - of nnkAccQuoted: result = getIdent(e[0]) - else: error("cannot extract identifier from node: " & toStrLit(e).strVal) - -proc delete[T](s: var seq[T], attr: T): bool = - var idx = find(s, attr) - if idx >= 0: - var L = s.len - s[idx] = s[L-1] - setLen(s, L-1) - result = true - -proc xmlCheckedTag*(e: PNimrodNode, tag: string, - optAttr = "", reqAttr = "", - isLeaf = false): PNimrodNode {.compileTime.} = - ## use this procedure to define a new XML tag - - # copy the attributes; when iterating over them these lists - # will be modified, so that each attribute is only given one value - var req = splitSeq(reqAttr) - var opt = splitSeq(optAttr) - result = newNimNode(nnkBracket, e) - result.add(newStrLitNode("<")) - result.add(newStrLitNode(tag)) - # first pass over attributes: - for i in 1..e.len-1: - if e[i].kind == nnkExprEqExpr: - var name = getIdent(e[i][0]) - if delete(req, name) or delete(opt, name): - result.add(newStrLitNode(" ")) - result.add(newStrLitNode(name)) - result.add(newStrLitNode("=\"")) - result.add(e[i][1]) - result.add(newStrLitNode("\"")) - else: - error("invalid attribute for '" & tag & "' element: " & name) - # check each required attribute exists: - if req.len > 0: - error(req[0] & " attribute for '" & tag & "' element expected") - if isLeaf: - for i in 1..e.len-1: - if e[i].kind != nnkExprEqExpr: - error("element " & tag & " cannot be nested") - result.add(newStrLitNode(" />")) - else: - result.add(newStrLitNode(">")) - # second pass over elements: - for i in 1..e.len-1: - if e[i].kind != nnkExprEqExpr: result.add(e[i]) - result.add(newStrLitNode("</")) - result.add(newStrLitNode(tag)) - result.add(newStrLitNode(">")) - result = NestList(!"&", result) - - -macro a*(e: expr): expr = - ## generates the HTML ``a`` element. - result = xmlCheckedTag(e, "a", "href charset type hreflang rel rev " & - "accesskey tabindex" & commonAttr) - -macro acronym*(e: expr): expr = - ## generates the HTML ``acronym`` element. - result = xmlCheckedTag(e, "acronym", commonAttr) - -macro address*(e: expr): expr = - ## generates the HTML ``address`` element. - result = xmlCheckedTag(e, "address", commonAttr) - -macro area*(e: expr): expr = - ## generates the HTML ``area`` element. - result = xmlCheckedTag(e, "area", "shape coords href nohref" & - " accesskey tabindex" & commonAttr, "alt", true) - -macro b*(e: expr): expr = - ## generates the HTML ``b`` element. - result = xmlCheckedTag(e, "b", commonAttr) - -macro base*(e: expr): expr = - ## generates the HTML ``base`` element. - result = xmlCheckedTag(e, "base", "", "href", true) - -macro big*(e: expr): expr = - ## generates the HTML ``big`` element. - result = xmlCheckedTag(e, "big", commonAttr) - -macro blockquote*(e: expr): expr = - ## generates the HTML ``blockquote`` element. - result = xmlCheckedTag(e, "blockquote", " cite" & commonAttr) - -macro body*(e: expr): expr = - ## generates the HTML ``body`` element. - result = xmlCheckedTag(e, "body", commonAttr) - -macro br*(e: expr): expr = - ## generates the HTML ``br`` element. - result = xmlCheckedTag(e, "br", "", "", true) - -macro button*(e: expr): expr = - ## generates the HTML ``button`` element. - result = xmlCheckedTag(e, "button", "accesskey tabindex " & - "disabled name type value" & commonAttr) - -macro caption*(e: expr): expr = - ## generates the HTML ``caption`` element. - result = xmlCheckedTag(e, "caption", commonAttr) - -macro cite*(e: expr): expr = - ## generates the HTML ``cite`` element. - result = xmlCheckedTag(e, "cite", commonAttr) - -macro code*(e: expr): expr = - ## generates the HTML ``code`` element. - result = xmlCheckedTag(e, "code", commonAttr) - -macro col*(e: expr): expr = - ## generates the HTML ``col`` element. - result = xmlCheckedTag(e, "col", "span align valign" & commonAttr, "", true) - -macro colgroup*(e: expr): expr = - ## generates the HTML ``colgroup`` element. - result = xmlCheckedTag(e, "colgroup", "span align valign" & commonAttr) - -macro dd*(e: expr): expr = - ## generates the HTML ``dd`` element. - result = xmlCheckedTag(e, "dd", commonAttr) - -macro del*(e: expr): expr = - ## generates the HTML ``del`` element. - result = xmlCheckedTag(e, "del", "cite datetime" & commonAttr) - -macro dfn*(e: expr): expr = - ## generates the HTML ``dfn`` element. - result = xmlCheckedTag(e, "dfn", commonAttr) - -macro `div`*(e: expr): expr = - ## generates the HTML ``div`` element. - result = xmlCheckedTag(e, "div", commonAttr) - -macro dl*(e: expr): expr = - ## generates the HTML ``dl`` element. - result = xmlCheckedTag(e, "dl", commonAttr) - -macro dt*(e: expr): expr = - ## generates the HTML ``dt`` element. - result = xmlCheckedTag(e, "dt", commonAttr) - -macro em*(e: expr): expr = - ## generates the HTML ``em`` element. - result = xmlCheckedTag(e, "em", commonAttr) - -macro fieldset*(e: expr): expr = - ## generates the HTML ``fieldset`` element. - result = xmlCheckedTag(e, "fieldset", commonAttr) - -macro form*(e: expr): expr = - ## generates the HTML ``form`` element. - result = xmlCheckedTag(e, "form", "method encype accept accept-charset" & - commonAttr, "action") - -macro h1*(e: expr): expr = - ## generates the HTML ``h1`` element. - result = xmlCheckedTag(e, "h1", commonAttr) - -macro h2*(e: expr): expr = - ## generates the HTML ``h2`` element. - result = xmlCheckedTag(e, "h2", commonAttr) - -macro h3*(e: expr): expr = - ## generates the HTML ``h3`` element. - result = xmlCheckedTag(e, "h3", commonAttr) - -macro h4*(e: expr): expr = - ## generates the HTML ``h4`` element. - result = xmlCheckedTag(e, "h4", commonAttr) - -macro h5*(e: expr): expr = - ## generates the HTML ``h5`` element. - result = xmlCheckedTag(e, "h5", commonAttr) - -macro h6*(e: expr): expr = - ## generates the HTML ``h6`` element. - result = xmlCheckedTag(e, "h6", commonAttr) - -macro head*(e: expr): expr = - ## generates the HTML ``head`` element. - result = xmlCheckedTag(e, "head", "profile") - -macro html*(e: expr): expr = - ## generates the HTML ``html`` element. - result = xmlCheckedTag(e, "html", "", "xmlns") - -macro hr*(e: expr): expr = - ## generates the HTML ``hr`` element. - result = xmlCheckedTag(e, "hr", commonAttr, "", true) - -macro i*(e: expr): expr = - ## generates the HTML ``i`` element. - result = xmlCheckedTag(e, "i", commonAttr) - -macro img*(e: expr): expr = - ## generates the HTML ``img`` element. - result = xmlCheckedTag(e, "img", "longdesc height width", "src alt", true) - -macro input*(e: expr): expr = - ## generates the HTML ``input`` element. - result = xmlCheckedTag(e, "input", "name type value checked maxlength src" & - " alt accept disabled readonly accesskey tabindex" & commonAttr, "", true) - -macro ins*(e: expr): expr = - ## generates the HTML ``ins`` element. - result = xmlCheckedTag(e, "ins", "cite datetime" & commonAttr) - -macro kbd*(e: expr): expr = - ## generates the HTML ``kbd`` element. - result = xmlCheckedTag(e, "kbd", commonAttr) - -macro label*(e: expr): expr = - ## generates the HTML ``label`` element. - result = xmlCheckedTag(e, "label", "for accesskey" & commonAttr) - -macro legend*(e: expr): expr = - ## generates the HTML ``legend`` element. - result = xmlCheckedTag(e, "legend", "accesskey" & commonAttr) - -macro li*(e: expr): expr = - ## generates the HTML ``li`` element. - result = xmlCheckedTag(e, "li", commonAttr) - -macro link*(e: expr): expr = - ## generates the HTML ``link`` element. - result = xmlCheckedTag(e, "link", "href charset hreflang type rel rev media" & - commonAttr, "", true) - -macro map*(e: expr): expr = - ## generates the HTML ``map`` element. - result = xmlCheckedTag(e, "map", "class title" & eventAttr, "id", false) - -macro meta*(e: expr): expr = - ## generates the HTML ``meta`` element. - result = xmlCheckedTag(e, "meta", "name http-equiv scheme", "content", true) - -macro noscript*(e: expr): expr = - ## generates the HTML ``noscript`` element. - result = xmlCheckedTag(e, "noscript", commonAttr) - -macro `object`*(e: expr): expr = - ## generates the HTML ``object`` element. - result = xmlCheckedTag(e, "object", "classid data codebase declare type " & - "codetype archive standby width height name tabindex" & commonAttr) - -macro ol*(e: expr): expr = - ## generates the HTML ``ol`` element. - result = xmlCheckedTag(e, "ol", commonAttr) - -macro optgroup*(e: expr): expr = - ## generates the HTML ``optgroup`` element. - result = xmlCheckedTag(e, "optgroup", "disabled" & commonAttr, "label", false) - -macro option*(e: expr): expr = - ## generates the HTML ``option`` element. - result = xmlCheckedTag(e, "option", "selected value" & commonAttr) - -macro p*(e: expr): expr = - ## generates the HTML ``p`` element. - result = xmlCheckedTag(e, "p", commonAttr) - -macro param*(e: expr): expr = - ## generates the HTML ``param`` element. - result = xmlCheckedTag(e, "param", "value id type valuetype", "name", true) - -macro pre*(e: expr): expr = - ## generates the HTML ``pre`` element. - result = xmlCheckedTag(e, "pre", commonAttr) - -macro q*(e: expr): expr = - ## generates the HTML ``q`` element. - result = xmlCheckedTag(e, "q", "cite" & commonAttr) - -macro samp*(e: expr): expr = - ## generates the HTML ``samp`` element. - result = xmlCheckedTag(e, "samp", commonAttr) - -macro script*(e: expr): expr = - ## generates the HTML ``script`` element. - result = xmlCheckedTag(e, "script", "src charset defer", "type", false) - -macro select*(e: expr): expr = - ## generates the HTML ``select`` element. - result = xmlCheckedTag(e, "select", "name size multiple disabled tabindex" & - commonAttr) - -macro small*(e: expr): expr = - ## generates the HTML ``small`` element. - result = xmlCheckedTag(e, "small", commonAttr) - -macro span*(e: expr): expr = - ## generates the HTML ``span`` element. - result = xmlCheckedTag(e, "span", commonAttr) - -macro strong*(e: expr): expr = - ## generates the HTML ``strong`` element. - result = xmlCheckedTag(e, "strong", commonAttr) - -macro style*(e: expr): expr = - ## generates the HTML ``style`` element. - result = xmlCheckedTag(e, "style", "media title", "type") - -macro sub*(e: expr): expr = - ## generates the HTML ``sub`` element. - result = xmlCheckedTag(e, "sub", commonAttr) - -macro sup*(e: expr): expr = - ## generates the HTML ``sup`` element. - result = xmlCheckedTag(e, "sup", commonAttr) - -macro table*(e: expr): expr = - ## generates the HTML ``table`` element. - result = xmlCheckedTag(e, "table", "summary border cellpadding cellspacing" & - " frame rules width" & commonAttr) - -macro tbody*(e: expr): expr = - ## generates the HTML ``tbody`` element. - result = xmlCheckedTag(e, "tbody", "align valign" & commonAttr) - -macro td*(e: expr): expr = - ## generates the HTML ``td`` element. - result = xmlCheckedTag(e, "td", "colspan rowspan abbr axis headers scope" & - " align valign" & commonAttr) - -macro textarea*(e: expr): expr = - ## generates the HTML ``textarea`` element. - result = xmlCheckedTag(e, "textarea", " name disabled readonly accesskey" & - " tabindex" & commonAttr, "rows cols", false) - -macro tfoot*(e: expr): expr = - ## generates the HTML ``tfoot`` element. - result = xmlCheckedTag(e, "tfoot", "align valign" & commonAttr) - -macro th*(e: expr): expr = - ## generates the HTML ``th`` element. - result = xmlCheckedTag(e, "th", "colspan rowspan abbr axis headers scope" & - " align valign" & commonAttr) - -macro thead*(e: expr): expr = - ## generates the HTML ``thead`` element. - result = xmlCheckedTag(e, "thead", "align valign" & commonAttr) - -macro title*(e: expr): expr = - ## generates the HTML ``title`` element. - result = xmlCheckedTag(e, "title") - -macro tr*(e: expr): expr = - ## generates the HTML ``tr`` element. - result = xmlCheckedTag(e, "tr", "align valign" & commonAttr) - -macro tt*(e: expr): expr = - ## generates the HTML ``tt`` element. - result = xmlCheckedTag(e, "tt", commonAttr) - -macro ul*(e: expr): expr = - ## generates the HTML ``ul`` element. - result = xmlCheckedTag(e, "ul", commonAttr) - -macro `var`*(e: expr): expr = - ## generates the HTML ``var`` element. - result = xmlCheckedTag(e, "var", commonAttr) - -when isMainModule: - var nim = "Nimrod" - echo h1(a(href="http://force7.de/nimrod", nim)) - |