diff options
author | rumpf_a@web.de <> | 2009-10-21 10:20:15 +0200 |
---|---|---|
committer | rumpf_a@web.de <> | 2009-10-21 10:20:15 +0200 |
commit | 053309e60aee1eda594a4817ac8ac2fb8c18fb04 (patch) | |
tree | 0f1ce8b0de0b493045eb97eeca6ebf06542de601 /lib | |
parent | 581572b28c65bc9fe47974cfd625210a69be0f3f (diff) | |
download | Nim-053309e60aee1eda594a4817ac8ac2fb8c18fb04.tar.gz |
version 0.8.2
Diffstat (limited to 'lib')
-rwxr-xr-x | lib/impure/db_postgres.nim | 129 | ||||
-rwxr-xr-x | lib/nimbase.h | 6 | ||||
-rwxr-xr-x | lib/posix/posix.nim | 2 | ||||
-rwxr-xr-x | lib/pure/hashtabs.nim | 163 | ||||
-rwxr-xr-x | lib/pure/macros.nim | 45 | ||||
-rwxr-xr-x | lib/pure/os.nim | 54 | ||||
-rwxr-xr-x | lib/pure/osproc.nim | 266 | ||||
-rwxr-xr-x | lib/pure/pegs.nim | 1320 | ||||
-rwxr-xr-x | lib/pure/re.nim | 353 | ||||
-rwxr-xr-x | lib/pure/regexprs.nim | 3 | ||||
-rwxr-xr-x | lib/pure/streams.nim | 5 | ||||
-rwxr-xr-x | lib/pure/strutils.nim | 91 | ||||
-rwxr-xr-x | lib/pure/variants.nim | 181 | ||||
-rwxr-xr-x | lib/system.nim | 16 | ||||
-rwxr-xr-x | lib/system/mm.nim | 33 | ||||
-rwxr-xr-x | lib/windows/winlean.nim | 6 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/wrappers/tre/config.h | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/wrappers/tre/tre_all.c | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | lib/wrappers/tre/version.txt | 0 |
19 files changed, 2449 insertions, 224 deletions
diff --git a/lib/impure/db_postgres.nim b/lib/impure/db_postgres.nim new file mode 100755 index 000000000..197250d9c --- /dev/null +++ b/lib/impure/db_postgres.nim @@ -0,0 +1,129 @@ +# Nimrod PostgreSQL database wrapper +# (c) 2009 Andreas Rumpf + +import strutils, postgres + +type + TDbHandle* = PGconn + TRow* = seq[string] + EDb* = object of EIO + +proc dbError(db: TDbHandle) {.noreturn.} = + ## raises an EDb exception. + var e: ref EDb + new(e) + e.msg = PQerrorMessage(db) + raise e + +proc dbError*(msg: string) {.noreturn.} = + ## raises an EDb exception with message `msg`. + var e: ref EDb + new(e) + e.msg = msg + raise e + +when false: + proc dbQueryOpt*(db: TDbHandle, query: string, args: openarray[string]) = + var stmt = mysql_stmt_init(db) + if stmt == nil: dbError(db) + if mysql_stmt_prepare(stmt, query, len(query)) != 0: + dbError(db) + var + bind: seq[MYSQL_BIND] + discard mysql_stmt_close(stmt) + +proc dbQuote(s: string): string = + result = "'" + for c in items(s): + if c == '\'': add(result, "''") + else: add(result, c) + add(result, '\'') + +proc dbFormat(formatstr: string, args: openarray[string]): string = + result = "" + var a = 0 + for c in items(formatstr): + if c == '?': + add(result, dbQuote(args[a])) + inc(a) + else: + add(result, c) + +proc dbTryQuery*(db: TDbHandle, query: string, args: openarray[string]): bool = + var q = dbFormat(query, args) + var res = PQExec(db, q) + result = PQresultStatus(res) == PGRES_COMMAND_OK + PQclear(res) + +proc dbQuery*(db: TDbHandle, query: string, args: openarray[string]) = + var q = dbFormat(query, args) + var res = PQExec(db, q) + if PQresultStatus(res) != PGRES_COMMAND_OK: dbError(db) + PQclear(res) + +proc dbTryInsertID*(db: TDbHandle, query: string, + args: openarray[string]): int64 = + var q = dbFormat(query, args) + + + if mysqlRealQuery(db, q, q.len) != 0'i32: + result = -1'i64 + else: + result = mysql_insert_id(db) + LAST_INSERT_ID() + +proc dbInsertID*(db: TDbHandle, query: string, args: openArray[string]): int64 = + result = dbTryInsertID(db, query, args) + if result < 0: dbError(db) + +proc dbQueryAffectedRows*(db: TDbHandle, query: string, + args: openArray[string]): int64 = + ## runs the query (typically "UPDATE") and returns the + ## number of affected rows + var q = dbFormat(query, args) + var res = PQExec(db, q) + if PQresultStatus(res) != PGRES_COMMAND_OK: dbError(db) + result = parseBiggestInt($PQcmdTuples(res)) + PQclear(res) + +proc newRow(L: int): TRow = + newSeq(result, L) + for i in 0..L-1: result[i] = "" + +iterator dbFastRows*(db: TDbHandle, query: string, + args: openarray[string]): TRow = + var q = dbFormat(query, args) + var res = PQExec(db, q) + if PQresultStatus(res) != PGRES_TUPLES_OK: dbError(db) + var L = int(PQnfields(res)) + var result = newRow(L) + for i in 0..PQntuples(res)-1: + for j in 0..L-1: + setLen(result[j], 0) + add(result[j], PQgetvalue(res, i, j)) + yield result + PQclear(res) + +proc dbGetAllRows*(db: TDbHandle, query: string, + args: openarray[string]): seq[TRow] = + result = @[] + for r in dbFastRows(db, query, args): + result.add(r) + +iterator dbRows*(db: TDbHandle, query: string, + args: openarray[string]): TRow = + for r in items(dbGetAllRows(db, query, args)): yield r + +proc dbGetValue*(db: TDbHandle, query: string, + args: openarray[string]): string = + result = "" + for row in dbFastRows(db, query, args): + result = row[0] + break + +proc dbClose*(db: TDbHandle) = + if db != nil: PQfinish(db) + +proc dbOpen*(connection, user, password, database: string): TDbHandle = + result = PQsetdbLogin(nil, nil, nil, nil, database, user, password) + if PQStatus(result) != CONNECTION_OK: result = nil diff --git a/lib/nimbase.h b/lib/nimbase.h index e62ad7096..a0f08f4f3 100755 --- a/lib/nimbase.h +++ b/lib/nimbase.h @@ -92,8 +92,8 @@ __TINYC__ # define N_FASTCALL_PTR(rettype, name) rettype (__fastcall *name) # define N_SAFECALL_PTR(rettype, name) rettype (__safecall *name) -# define N_LIB_EXPORT __declspec(dllexport) -# define N_LIB_IMPORT __declspec(dllimport) +# define N_LIB_EXPORT extern __declspec(dllexport) +# define N_LIB_IMPORT extern __declspec(dllimport) #else # define N_CDECL(rettype, name) rettype name # define N_STDCALL(rettype, name) rettype name @@ -107,7 +107,7 @@ __TINYC__ # define N_FASTCALL_PTR(rettype, name) rettype (*name) # define N_SAFECALL_PTR(rettype, name) rettype (*name) -# define N_LIB_EXPORT +# define N_LIB_EXPORT extern # define N_LIB_IMPORT extern #endif diff --git a/lib/posix/posix.nim b/lib/posix/posix.nim index 5c8103b9a..ddeaec664 100755 --- a/lib/posix/posix.nim +++ b/lib/posix/posix.nim @@ -1222,6 +1222,8 @@ var SC_XOPEN_STREAMS*{.importc: "_SC_XOPEN_STREAMS", header: "<unistd.h>".}: cint SC_XOPEN_UNIX*{.importc: "_SC_XOPEN_UNIX", header: "<unistd.h>".}: cint SC_XOPEN_VERSION*{.importc: "_SC_XOPEN_VERSION", header: "<unistd.h>".}: cint + SC_NPROCESSORS_ONLN*{.importc: "_SC_NPROCESSORS_ONLN", + header: "<unistd.h>".}: cint SEM_FAILED* {.importc, header: "<semaphore.h>".}: pointer IPC_CREAT* {.importc, header: "<sys/ipc.h>".}: cint diff --git a/lib/pure/hashtabs.nim b/lib/pure/hashtabs.nim new file mode 100755 index 000000000..68d19d63b --- /dev/null +++ b/lib/pure/hashtabs.nim @@ -0,0 +1,163 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## The ``hashtabs`` module implements an efficient generic hash +## table/dictionary data type. + +import + hashes + +const + growthFactor = 2 + startSize = 8 + sham = sizeof(THash)*8-2 # shift amount + mask = 0b11 shl sham + usedSlot = 0b10 shl sham + delSlot = 0b01 shl sham + emptySlot = 0 + +type + TTable*[TKey, TValue] = object + counter: int + data: seq[tuple[key: TKey, val: TValue, h: THash]] + +proc init*(t: var TTable, size = startSize) = + t.counter = 0 + newSeq(t.data, size) + +proc markUsed(h: THash): THash {.inline.} = + return h and not mask or usedSlot + +proc len*(t: TTable): int {.inline.} = + ## returns the number of keys in `t`. + result = t.counter + +proc mustRehash(length, counter: int): bool = + assert(length > counter) + result = (length * 2 < counter * 3) or (length - counter < 4) + +proc nextTry(h, maxHash: THash): THash {.inline.} = + result = ((5 * h) + 1) and maxHash + +template eq(a, b: expr): expr = a == b + +proc rawGet(t: TTable, key: TKey, fullhash: THash): int = + var h = fullhash and high(t.data) + while (t.data[h].h and mask) != 0: + # If it is a deleted entry, the comparison with ``markUsed(fullhash)`` + # fails, so there is no need to check for this explicitely. + if t.data[h].h == markUsed(fullhash) and eq(t.data[h].key, key): return h + h = nextTry(h, high(t.data)) + result = - 1 + +proc `[]`*(t: TTable, key: TKey): TValue = + ## retrieves the value at ``t[key]``. If `key` is not in `t`, + ## `EInvalidValue` is raised. + var index = rawGet(t, key, hash(key)) + if index >= 0: result = t.data[index].val + else: + var e: ref EInvalidValue + new(e) + e.msg = "invalid key: " & $key + raise e + +proc hasKey*(t: TTable, key: TKey): bool = + ## returns true iff `key` is in the table `t`. + result = rawGet(t, key) >= 0 + +proc rawInsert[TKey, TValue]( + data: var seq[tuple[key: TKey, val: TValue, h: THash]], + tup: tuple[key: TKey, val: TValue, h: THash]) = + var h = tup.h and high(data) + while (data[h].h and mask) == usedSlot: h = nextTry(h, high(data)) + data[h] = tup + +proc enlarge(t: var TTable) = + var n: seq[tuple[key: TKey, val: TValue, h: THash]] + newSeq(n, len(t.data) * growthFactor) + for i in 0..high(t.data): + if (t.data[i].h and mask) == usedSlot: rawInsert(n, t.data[i]) + swap(t.data, n) + +proc `[]=`*(t: var TTable, key: TKey, val: TValue) = + ## puts a (key, value)-pair into `t`. + var fullhash = hash(key) + var index = rawGet(t, key, fullhash) + if index >= 0: + t.data[index].val = val + else: + if mustRehash(len(t.data), t.counter): enlarge(t) + rawInsert(t.data, (key, val, markUsed(fullhash))) + inc(t.counter) + +proc add*(t: var TTable, key: TKey, val: TValue) = + ## puts a (key, value)-pair into `t`, but does not check if key already + ## exists. + if mustRehash(len(t.data), t.counter): enlarge(t) + rawInsert(t.data, (key, val, markUsed(hash(key)))) + inc(t.counter) + +proc del*(t: var TTable, key: TKey) = + ## deletes a (key, val)-pair in `t`. + var index = rawGet(t, key) + if index >= 0: + t.data[index].h = delSlot + +proc delAll*(t: var TTable, key: TKey) = + ## deletes all (key, val)-pairs in `t`. + while true: + var index = rawGet(t, key) + if index < 0: break + t.data[index].h = delSlot + +iterator pairs*(t: TTable): tuple[key: TKey, value: TValue] = + ## iterates over any (key, value) pair in the table `t`. + for h in 0..high(t.data): + if (t.data[h].h and mask) == usedSlot: + yield (t.data[h].key, t.data[h].val) + +iterator keys*(t: TTable): TKey = + ## iterate over any key in the table `t`. If key occurs multiple times, it + ## is yielded multiple times. + for h in 0..high(t.data): + if (t.data[h].h and mask) == usedSlot: + yield t.data[h].key + +iterator values*(t: TTable): TValue = + ## iterate over any value in the table `t`. + for h in 0..high(t.data): + if (t.data[h].h and mask) == usedSlot: + yield t.data[h].val + +iterator values*(t: TTable, key: TKey): TValue = + ## iterate over any value associated with `key` in `t`. + var fullhash = hash(key) + var h = fullhash and high(t.data) + while (t.data[h].h and mask) != 0: + # If it is a deleted entry, the comparison with ``markUsed(fullhash)`` + # fails, so there is no need to check for this explicitely. + if t.data[h].h == markUsed(fullhash) and eq(t.data[h].key, key): + yield t.data[h].val + h = nextTry(h, high(t.data)) + +proc `$`*[KeyToStr=`$`, ValueToStr=`$`](t: TTable): string = + ## turns the table into its string representation. `$` must be available + ## for TKey and TValue for this to work. + if t.len == 0: + result = "{:}" + else: + result = "{" + var i = 0 + for k, v in pairs(t): + if i > 0: add(result, ", ") + add(result, KeyToStr(k)) + add(result, ": ") + add(result, ValueToStr(v)) + inc(i) + add(result, "}") diff --git a/lib/pure/macros.nim b/lib/pure/macros.nim index 341a7cb61..5129cde1b 100755 --- a/lib/pure/macros.nim +++ b/lib/pure/macros.nim @@ -45,27 +45,26 @@ type nnkBracket, nnkBracketExpr, nnkPragmaExpr, nnkRange, nnkDotExpr, nnkCheckedFieldExpr, nnkDerefExpr, nnkIfExpr, nnkElifExpr, nnkElseExpr, nnkLambda, nnkAccQuoted, - nnkTableConstr, nnkQualified, nnkBind, nnkSymChoice, - nnkHiddenStdConv, nnkHiddenSubConv, nnkHiddenCallConv, nnkConv, - nnkCast, nnkAddr, nnkHiddenAddr, nnkHiddenDeref, - nnkObjDownConv, nnkObjUpConv, nnkChckRangeF, nnkChckRange64, - nnkChckRange, nnkStringToCString, nnkCStringToString, nnkPassAsOpenArray, - nnkAsgn, nnkFastAsgn, nnkGenericParams, nnkFormalParams, - nnkOfInherit, nnkModule, nnkProcDef, nnkMethodDef, - nnkConverterDef, nnkMacroDef, nnkTemplateDef, nnkIteratorDef, - nnkOfBranch, nnkElifBranch, nnkExceptBranch, nnkElse, - nnkMacroStmt, nnkAsmStmt, nnkPragma, nnkIfStmt, - nnkWhenStmt, nnkForStmt, nnkWhileStmt, nnkCaseStmt, - nnkVarSection, nnkConstSection, nnkConstDef, nnkTypeSection, - nnkTypeDef, nnkYieldStmt, nnkTryStmt, nnkFinally, - nnkRaiseStmt, nnkReturnStmt, nnkBreakStmt, nnkContinueStmt, - nnkBlockStmt, nnkDiscardStmt, nnkStmtList, nnkImportStmt, - nnkFromStmt, nnkIncludeStmt, nnkCommentStmt, nnkStmtListExpr, - nnkBlockExpr, nnkStmtListType, nnkBlockType, nnkTypeOfExpr, - nnkObjectTy, nnkTupleTy, nnkRecList, nnkRecCase, - nnkRecWhen, nnkRefTy, nnkPtrTy, nnkVarTy, - nnkDistinctTy, nnkProcTy, nnkEnumTy, nnkEnumFieldDef, - nnkReturnToken + nnkTableConstr, nnkBind, nnkSymChoice, nnkHiddenStdConv, + nnkHiddenSubConv, nnkHiddenCallConv, nnkConv, nnkCast, + nnkAddr, nnkHiddenAddr, nnkHiddenDeref, nnkObjDownConv, + nnkObjUpConv, nnkChckRangeF, nnkChckRange64, nnkChckRange, + nnkStringToCString, nnkCStringToString, nnkPassAsOpenArray, nnkAsgn, + nnkFastAsgn, nnkGenericParams, nnkFormalParams, nnkOfInherit, + nnkModule, nnkProcDef, nnkMethodDef, nnkConverterDef, + nnkMacroDef, nnkTemplateDef, nnkIteratorDef, nnkOfBranch, + nnkElifBranch, nnkExceptBranch, nnkElse, nnkMacroStmt, + nnkAsmStmt, nnkPragma, nnkIfStmt, nnkWhenStmt, + nnkForStmt, nnkWhileStmt, nnkCaseStmt, nnkVarSection, + nnkConstSection, nnkConstDef, nnkTypeSection, nnkTypeDef, + nnkYieldStmt, nnkTryStmt, nnkFinally, nnkRaiseStmt, + nnkReturnStmt, nnkBreakStmt, nnkContinueStmt, nnkBlockStmt, + nnkDiscardStmt, nnkStmtList, nnkImportStmt, nnkFromStmt, + nnkIncludeStmt, nnkCommentStmt, nnkStmtListExpr, nnkBlockExpr, + nnkStmtListType, nnkBlockType, nnkTypeOfExpr, nnkObjectTy, + nnkTupleTy, nnkRecList, nnkRecCase, nnkRecWhen, + nnkRefTy, nnkPtrTy, nnkVarTy, nnkDistinctTy, + nnkProcTy, nnkEnumTy, nnkEnumFieldDef, nnkReturnToken TNimNodeKinds* = set[TNimrodNodeKind] TNimrodTypeKind* = enum ntyNone, ntyBool, ntyChar, ntyEmpty, @@ -90,7 +89,7 @@ type #[[[end]]] type - TNimrodIdent = object of TObject + TNimrodIdent* = object of TObject ## represents a Nimrod identifier in the AST TNimrodSymbol {.final.} = object # hidden @@ -134,7 +133,7 @@ proc add*(father, child: PNimrodNode) {.magic: "NAdd".} proc add*(father: PNimrodNode, children: openArray[PNimrodNode]) {. magic: "NAddMultiple".} - ## adds each `children` to the `father` node + ## adds each child of `children` to the `father` node proc del*(father: PNimrodNode, idx = 0, n = 1) {.magic: "NDel".} ## deletes `n` children of `father` starting at index `idx`. diff --git a/lib/pure/os.nim b/lib/pure/os.nim index 83cdbc0dd..a2a1830f7 100755 --- a/lib/pure/os.nim +++ b/lib/pure/os.nim @@ -790,7 +790,7 @@ iterator walkFiles*(pattern: string): string = if res != -1: while true: if f.cFileName[0] != '.': - yield extractDir(pattern) / extractFilename($f.cFileName) + yield splitFile(pattern).dir / extractFilename($f.cFileName) if findnextFileA(res, f) == 0'i32: break findclose(res) else: # here we use glob @@ -811,8 +811,12 @@ type TPathComponent* = enum ## Enumeration specifying a path component. pcFile, ## path refers to a file pcLinkToFile, ## path refers to a symbolic link to a file - pcDirectory, ## path refers to a directory - pcLinkToDirectory ## path refers to a symbolic link to a directory + pcDir, ## path refers to a directory + pcLinkToDir ## path refers to a symbolic link to a directory + +const + pcDirectory* {.deprecated.} = pcDir ## deprecated alias + pcLinkToDirectory* {.deprecated.} = pcLinkToDir ## deprecated alias iterator walkDir*(dir: string): tuple[kind: TPathComponent, path: string] = ## walks over the directory `dir` and yields for each directory or file in @@ -843,7 +847,7 @@ iterator walkDir*(dir: string): tuple[kind: TPathComponent, path: string] = var k = pcFile if f.cFilename[0] != '.': if (f.dwFileAttributes and FILE_ATTRIBUTE_DIRECTORY) != 0'i32: - k = pcDirectory + k = pcDir yield (k, dir / extractFilename($f.cFilename)) if findnextFileA(h, f) == 0'i32: break findclose(h) @@ -859,11 +863,33 @@ iterator walkDir*(dir: string): tuple[kind: TPathComponent, path: string] = y = dir / y if stat(y, s) < 0'i32: break var k = pcFile - if S_ISDIR(s.st_mode): k = pcDirectory + if S_ISDIR(s.st_mode): k = pcDir if S_ISLNK(s.st_mode): k = succ(k) yield (k, y) discard closeDir(d) +iterator walkDirRec*(dir: string, filter={pcFile, pcDir}): string = + ## walks over the directory `dir` and yields for each file in `dir`. The + ## full path for each file is returned. + ## Walking is recursive. `filter` controls the behaviour of the iterator: + ## + ## --------------------- --------------------------------------------- + ## filter meaning + ## --------------------- --------------------------------------------- + ## ``pcFile`` yield real files + ## ``pcLinkToFile`` yield symbol links to files + ## ``pcDir`` follow real directories + ## ``pcLinkToDir`` follow symbol links to directories + ## --------------------- --------------------------------------------- + ## + var stack = @[dir] + while stack.len > 0: + for k,p in walkDir(stack.pop()): + if k in filter: + case k + of pcFile, pcLinkToFile: yield p + of pcDir, pcLinkToDir: stack.add(p) + proc rawRemoveDir(dir: string) = when defined(windows): if RemoveDirectoryA(dir) == 0'i32: OSError() @@ -871,12 +897,12 @@ proc rawRemoveDir(dir: string) = if rmdir(dir) != 0'i32: OSError() proc removeDir*(dir: string) = - ## Removes the directory `dir` including all subdirectories or files + ## Removes the directory `dir` including all subdirectories and files ## in `dir` (recursively). If this fails, `EOS` is raised. for kind, path in walkDir(dir): case kind - of pcFile, pcLinkToFile, pcLinkToDirectory: removeFile(path) - of pcDirectory: removeDir(dir) + of pcFile, pcLinkToFile, pcLinkToDir: removeFile(path) + of pcDir: removeDir(dir) rawRemoveDir(dir) proc rawCreateDir(dir: string) = @@ -935,7 +961,7 @@ type fpOthersRead ## read access for others proc getFilePermissions*(filename: string): set[TFilePermission] = - ## retrives file permissions for `filename`. `OSError` is raised in case of + ## retrieves file permissions for `filename`. `OSError` is raised in case of ## an error. On Windows, only the ``readonly`` flag is checked, every other ## permission is available in any case. when defined(posix): @@ -1103,4 +1129,14 @@ proc getApplicationDir*(): string = ## Returns the directory of the application's executable. result = splitFile(getApplicationFilename()).dir +proc sleep*(milsecs: int) = + ## sleeps `milsecs` milliseconds. + when defined(windows): + winlean.sleep(int32(milsecs)) + else: + var a, b: Ttimespec + a.tv_sec = TTime(milsecs div 1000) + a.tv_nsec = (milsecs mod 1000) * 1000 + discard posix.nanosleep(a, b) + {.pop.} diff --git a/lib/pure/osproc.nim b/lib/pure/osproc.nim index 7d017616e..d76825531 100755 --- a/lib/pure/osproc.nim +++ b/lib/pure/osproc.nim @@ -9,19 +9,19 @@ ## This module implements an advanced facility for executing OS processes ## and process communication. -## **On Windows this module does not work properly. Please help!** import - os, strtabs, streams + strutils, os, strtabs, streams when defined(windows): import winlean +else: + import posix type TProcess = object of TObject when defined(windows): FProcessHandle: Thandle - FThreadHandle: Thandle inputHandle, outputHandle, errorHandle: TFileHandle else: inputHandle, outputHandle, errorHandle: TFileHandle @@ -31,10 +31,11 @@ type PProcess* = ref TProcess ## represents an operating system process TProcessOption* = enum ## options that can be passed `startProcess` - poNone, ## none option + poEchoCmd, ## echo the command before execution poUseShell, ## use the shell to execute the command; NOTE: This ## often creates a security whole! - poStdErrToStdOut ## merge stdout and stderr to the stdout stream + poStdErrToStdOut, ## merge stdout and stderr to the stdout stream + poParentStreams ## use the parent's streams proc execProcess*(command: string, options: set[TProcessOption] = {poStdErrToStdOut, @@ -77,18 +78,6 @@ proc startProcess*(command: string, ## Return value: The newly created process object. Nil is never returned, ## but ``EOS`` is raised in case of an error. -when true: - nil -else: - proc startGUIProcess*(command: string, - workingDir: string = "", - args: openarray[string] = [], - env: PStringTable = nil, - x = -1, - y = -1, - width = -1, - height = -1): PProcess - proc suspend*(p: PProcess) ## Suspends the process `p`. @@ -117,13 +106,108 @@ proc outputStream*(p: PProcess): PStream proc errorStream*(p: PProcess): PStream ## returns ``p``'s output stream for reading from +when defined(macosx) or defined(bsd): + const + CTL_HW = 6 + HW_AVAILCPU = 25 + HW_NCPU = 3 + proc sysctl(x: ptr array[0..3, cint], y: cint, z: pointer, + a: var int, b: pointer, c: int): cint {. + importc: "sysctl", header: "<sys/sysctl.h>".} + +proc countProcessors*(): int = + ## returns the numer of the processors/cores the machine has. + ## Returns 0 if it cannot be determined. + when defined(windows): + var x = getenv("NUMBER_OF_PROCESSORS") + if x.len > 0: result = parseInt(x) + elif defined(macosx) or defined(bsd): + var + mib: array[0..3, cint] + len, numCPU: int + mib[0] = CTL_HW + mib[1] = HW_AVAILCPU + len = sizeof(numCPU) + discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) + if numCPU < 1: + mib[1] = HW_NCPU + discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) + result = numCPU + elif defined(hpux): + result = mpctl(MPC_GETNUMSPUS, nil, nil) + elif defined(irix): + var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "<unistd.h>".}: cint + result = sysconf(SC_NPROC_ONLN) + else: + result = sysconf(SC_NPROCESSORS_ONLN) + if result <= 0: result = 1 + +proc startProcessAux(cmd: string, options: set[TProcessOption]): PProcess = + var c = parseCmdLine(cmd) + var a: seq[string] = @[] # slicing is not yet implemented :-( + for i in 1 .. c.len-1: add(a, c[i]) + result = startProcess(command=c[0], args=a, options=options) + +proc execProcesses*(cmds: openArray[string], + options = {poStdErrToStdOut, poParentStreams}, + n = countProcessors()): int = + ## executes the commands `cmds` in parallel. Creates `n` processes + ## that execute in parallel. The highest return value of all processes + ## is returned. + assert n > 0 + if n > 1: + var q: seq[PProcess] + newSeq(q, n) + var m = min(n, cmds.len) + for i in 0..m-1: + q[i] = startProcessAux(cmds[i], options=options) + when defined(noBusyWaiting): + var r = 0 + for i in m..high(cmds): + when defined(debugExecProcesses): + var err = "" + var outp = outputStream(q[r]) + while running(q[r]) or not outp.atEnd(outp): + err.add(outp.readLine()) + err.add("\n") + echo(err) + result = max(waitForExit(q[r]), result) + q[r] = startProcessAux(cmds[i], options=options) + r = (r + 1) mod n + else: + var i = m + while i <= high(cmds): + sleep(50) + for r in 0..n-1: + if not running(q[r]): + #echo(outputStream(q[r]).readLine()) + result = max(waitForExit(q[r]), result) + q[r] = startProcessAux(cmds[i], options=options) + inc(i) + if i > high(cmds): break + for i in 0..m-1: + result = max(waitForExit(q[i]), result) + else: + for i in 0..high(cmds): + var p = startProcessAux(cmds[i], options=options) + result = max(waitForExit(p), result) + +when true: + nil +else: + proc startGUIProcess*(command: string, + workingDir: string = "", + args: openarray[string] = [], + env: PStringTable = nil, + x = -1, + y = -1, + width = -1, + height = -1): PProcess + proc execProcess(command: string, options: set[TProcessOption] = {poStdErrToStdOut, poUseShell}): string = - var c = parseCmdLine(command) - var a: seq[string] = @[] # slicing is not yet implemented :-( - for i in 1 .. c.len-1: add(a, c[i]) - var p = startProcess(command=c[0], args=a, options=options) + var p = startProcessAux(command, options=options) var outp = outputStream(p) result = "" while running(p) or not outp.atEnd(outp): @@ -147,15 +231,19 @@ when defined(Windows): atTheEnd: bool proc hsClose(s: PFileHandleStream) = nil # nothing to do here - proc hsAtEnd(s: PFileHandleStream): bool = return true + proc hsAtEnd(s: PFileHandleStream): bool = return s.atTheEnd proc hsReadData(s: PFileHandleStream, buffer: pointer, bufLen: int): int = + if s.atTheEnd: return 0 var br: int32 var a = winlean.ReadFile(s.handle, buffer, bufLen, br, nil) - if a == 0: OSError() + # TRUE and zero bytes returned (EOF). + # TRUE and n (>0) bytes returned (good data). + # FALSE and bytes returned undefined (system error). + if a == 0 and br != 0: OSError() + s.atTheEnd = br < bufLen result = br - #atEnd = bytesRead < bufLen - + proc hsWriteData(s: PFileHandleStream, buffer: pointer, bufLen: int) = var bytesWritten: int32 var a = winlean.writeFile(s.handle, buffer, bufLen, bytesWritten, nil) @@ -168,18 +256,14 @@ when defined(Windows): result.atEnd = hsAtEnd result.readData = hsReadData result.writeData = hsWriteData - + proc buildCommandLine(a: string, args: openarray[string]): cstring = - var L = a.len - for i in 0..high(args): inc(L, args[i].len+1) - result = cast[cstring](alloc0(L+1)) - copyMem(result, cstring(a), a.len) - L = a.len - for i in 0..high(args): - result[L] = ' ' - inc(L) - copyMem(addr(result[L]), cstring(args[i]), args[i].len) - inc(L, args[i].len) + var res = quoteIfContainsWhite(a) + for i in 0..high(args): + res.add(' ') + res.add(quoteIfContainsWhite(args[i])) + result = cast[cstring](alloc0(res.len+1)) + copyMem(result, cstring(res), res.len) proc buildEnv(env: PStringTable): cstring = var L = 0 @@ -198,72 +282,81 @@ when defined(Windows): # O_WRONLY {.importc: "_O_WRONLY", header: "<fcntl.h>".}: int # O_RDONLY {.importc: "_O_RDONLY", header: "<fcntl.h>".}: int - proc CreatePipeHandles(Inhandle, OutHandle: var THandle) = + proc CreatePipeHandles(Rdhandle, WrHandle: var THandle) = var piInheritablePipe: TSecurityAttributes piInheritablePipe.nlength = SizeOF(TSecurityAttributes) piInheritablePipe.lpSecurityDescriptor = nil piInheritablePipe.Binherithandle = 1 - if CreatePipe(Inhandle, Outhandle, piInheritablePipe, 0) == 0'i32: + if CreatePipe(Rdhandle, Wrhandle, piInheritablePipe, 1024) == 0'i32: OSError() - proc startProcess*(command: string, + proc fileClose(h: THandle) {.inline.} = + if h > 4: discard CloseHandle(h) + + proc startProcess(command: string, workingDir: string = "", args: openarray[string] = [], env: PStringTable = nil, options: set[TProcessOption] = {poStdErrToStdOut}): PProcess = - new(result) var SI: TStartupInfo ProcInfo: TProcessInformation success: int hi, ho, he: THandle + new(result) SI.cb = SizeOf(SI) - SI.dwFlags = STARTF_USESHOWWINDOW or STARTF_USESTDHANDLES - CreatePipeHandles(SI.hStdInput, HI) - CreatePipeHandles(HO, Si.hStdOutput) - if poStdErrToStdOut in options: - SI.hStdError = SI.hStdOutput - HE = HO + if poParentStreams notin options: + SI.dwFlags = STARTF_USESTDHANDLES # STARTF_USESHOWWINDOW or + CreatePipeHandles(SI.hStdInput, HI) + CreatePipeHandles(HO, Si.hStdOutput) + if poStdErrToStdOut in options: + SI.hStdError = SI.hStdOutput + HE = HO + else: + CreatePipeHandles(HE, Si.hStdError) + result.inputHandle = hi + result.outputHandle = ho + result.errorHandle = he else: - CreatePipeHandles(HE, Si.hStdError) - result.inputHandle = hi - result.outputHandle = ho - result.errorHandle = he + SI.hStdError = GetStdHandle(STD_ERROR_HANDLE) + SI.hStdInput = GetStdHandle(STD_INPUT_HANDLE) + SI.hStdOutput = GetStdHandle(STD_OUTPUT_HANDLE) + result.inputHandle = si.hStdInput + result.outputHandle = si.hStdOutput + result.errorHandle = si.hStdError + var cmdl: cstring - if poUseShell in options: - var comspec = getEnv("COMSPEC") - var a: seq[string] = @[] - add(a, "/c") - add(a, command) - add(a, args) - cmdl = buildCommandLine(comspec, a) + if false: # poUseShell in options: + cmdl = buildCommandLine(getEnv("COMSPEC"), @["/c", command] & args) else: cmdl = buildCommandLine(command, args) var wd: cstring = nil + var e: cstring = nil if len(workingDir) > 0: wd = workingDir - if env == nil: - success = winlean.CreateProcess(nil, - cmdl, nil, nil, 0, NORMAL_PRIORITY_CLASS, nil, wd, SI, ProcInfo) - else: - var e = buildEnv(env) - success = winlean.CreateProcess(nil, - cmdl, nil, nil, 0, NORMAL_PRIORITY_CLASS, e, wd, SI, ProcInfo) - dealloc(e) + if env != nil: e = buildEnv(env) + if poEchoCmd in options: echo($cmdl) + success = winlean.CreateProcess(nil, + cmdl, nil, nil, 1, NORMAL_PRIORITY_CLASS, e, wd, SI, ProcInfo) + + if poParentStreams notin options: + FileClose(si.hStdInput) + FileClose(si.hStdOutput) + if poStdErrToStdOut notin options: + FileClose(si.hStdError) + + if e != nil: dealloc(e) dealloc(cmdl) - if success == 0: - OSError() - # NEW: - # Close the handles now so anyone waiting is woken. + if success == 0: OSError() + # Close the handle now so anyone waiting is woken: discard closeHandle(procInfo.hThread) result.FProcessHandle = procInfo.hProcess - result.FThreadHandle = procInfo.hThread result.id = procInfo.dwProcessID proc suspend(p: PProcess) = - discard SuspendThread(p.FThreadHandle) + discard SuspendThread(p.FProcessHandle) proc resume(p: PProcess) = - discard ResumeThread(p.FThreadHandle) + discard ResumeThread(p.FProcessHandle) proc running(p: PProcess): bool = var x = waitForSingleObject(p.FProcessHandle, 50) @@ -274,7 +367,6 @@ when defined(Windows): discard TerminateProcess(p.FProcessHandle, 0) proc waitForExit(p: PProcess): int = - #CloseHandle(p.FThreadHandle) discard WaitForSingleObject(p.FProcessHandle, Infinite) var res: int32 discard GetExitCodeProcess(p.FProcessHandle, res) @@ -314,17 +406,15 @@ when defined(Windows): discard CloseHandle(Process) else: - import posix - const readIdx = 0 writeIdx = 1 proc addCmdArgs(command: string, args: openarray[string]): string = - result = command + result = quoteIfContainsWhite(command) for i in 0 .. high(args): add(result, " ") - add(result, args[i]) + add(result, quoteIfContainsWhite(args[i])) proc toCStringArray(b, a: openarray[string]): cstringArray = result = cast[cstringArray](alloc0((a.len + b.len + 1) * sizeof(cstring))) @@ -344,14 +434,15 @@ else: copyMem(result[i], addr(x[0]), x.len+1) inc(i) - proc startProcess*(command: string, + proc startProcess(command: string, workingDir: string = "", args: openarray[string] = [], env: PStringTable = nil, options: set[TProcessOption] = {poStdErrToStdOut}): PProcess = - new(result) var p_stdin, p_stdout, p_stderr: array [0..1, cint] + new(result) + result.exitCode = 3 # for ``waitForExit`` if pipe(p_stdin) != 0'i32 or pipe(p_stdout) != 0'i32: OSError("failed to create a pipe") var Pid = fork() @@ -389,6 +480,8 @@ else: # too risky to raise an exception here: quit("execve call failed: " & $strerror(errno)) # Parent process. Copy process information. + if poEchoCmd in options: + echo(command & " " & join(args, " ")) result.id = pid result.inputHandle = p_stdin[writeIdx] @@ -415,9 +508,15 @@ else: if running(p): discard kill(p.id, SIGKILL) proc waitForExit(p: PProcess): int = - result = 1 - if waitPid(p.id, p.exitCode, 0) == int(p.id): - result = p.exitCode + #if waitPid(p.id, p.exitCode, 0) == int(p.id): + # ``waitPid`` fails if the process is not running anymore. But then + # ``running`` probably set ``p.exitCode`` for us. Since ``p.exitCode`` is + # initialized with 3, wrong success exit codes are prevented. + var oldExitCode = p.exitCode + if waitPid(p.id, p.exitCode, 0) < 0: + # failed, so restore old exitCode + p.exitCode = oldExitCode + result = int(p.exitCode) proc inputStream(p: PProcess): PStream = var f: TFile @@ -440,4 +539,5 @@ else: result = csystem(command) when isMainModule: - echo execCmd("gcc -v") + var x = execProcess("gcc -v") + echo "ECHO ", x diff --git a/lib/pure/pegs.nim b/lib/pure/pegs.nim new file mode 100755 index 000000000..c029f92a2 --- /dev/null +++ b/lib/pure/pegs.nim @@ -0,0 +1,1320 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Simple PEG (Parsing expression grammar) matching. Uses no memorization, but +## uses superoperators and symbol inlining to improve performance. Note: +## Matching performance is hopefully competitive with optimized regular +## expression engines. +## +## .. include:: ../doc/pegdocs.txt +## + +const + useUnicode = true ## change this to deactivate proper UTF-8 support + +import + strutils + +when useUnicode: + import unicode + +const + InlineThreshold = 5 ## number of leaves; -1 to disable inlining + +type + TPegKind = enum + pkEmpty, + pkAny, ## any character (.) + pkAnyRune, ## any Unicode character (_) + pkNewLine, ## CR-LF, LF, CR + pkTerminal, + pkTerminalIgnoreCase, + pkTerminalIgnoreStyle, + pkChar, ## single character to match + pkCharChoice, + pkNonTerminal, + pkSequence, ## a b c ... --> Internal DSL: peg(a, b, c) + pkOrderedChoice, ## a / b / ... --> Internal DSL: a / b or /[a, b, c] + pkGreedyRep, ## a* --> Internal DSL: *a + ## a+ --> Internal DSL: +a; represented as (a a*) + pkGreedyRepChar, ## x* where x is a single character (superop) + pkGreedyRepSet, ## [set]* (superop) + pkGreedyAny, ## .* or _* (superop) + pkOption, ## a? --> Internal DSL: ?a + pkAndPredicate, ## &a --> Internal DSL: &a + pkNotPredicate, ## !a --> Internal DSL: !a + pkCapture, ## {a} --> Internal DSL: capture(a) + pkRule, ## a <- b + pkList ## a, b + TNonTerminalFlag = enum + ntDeclared, ntUsed + TNonTerminal {.final.} = object ## represents a non terminal symbol + name: string ## the name of the symbol + line: int ## the line the symbol has been declared/used in + col: int ## the column the symbol has been declared/used in + flags: set[TNonTerminalFlag] ## the nonterminal's flags + rule: TNode ## the rule that the symbol refers to + TNode {.final.} = object + case kind: TPegKind + of pkEmpty, pkAny, pkAnyRune, pkGreedyAny, pkNewLine: nil + of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle: term: string + of pkChar, pkGreedyRepChar: ch: char + of pkCharChoice, pkGreedyRepSet: charChoice: ref set[char] + of pkNonTerminal: nt: PNonTerminal + else: sons: seq[TNode] + PNonTerminal* = ref TNonTerminal + + TPeg* = TNode ## type that represents a PEG + +proc term*(t: string): TPeg = + ## constructs a PEG from a terminal string + if t.len != 1: + result.kind = pkTerminal + result.term = t + else: + result.kind = pkChar + result.ch = t[0] + +proc termIgnoreCase*(t: string): TPeg = + ## constructs a PEG from a terminal string; ignore case for matching + result.kind = pkTerminalIgnoreCase + result.term = t + +proc termIgnoreStyle*(t: string): TPeg = + ## constructs a PEG from a terminal string; ignore style for matching + result.kind = pkTerminalIgnoreStyle + result.term = t + +proc term*(t: char): TPeg = + ## constructs a PEG from a terminal char + assert t != '\0' + result.kind = pkChar + result.ch = t + +proc charSet*(s: set[char]): TPeg = + ## constructs a PEG from a character set `s` + assert '\0' notin s + result.kind = pkCharChoice + new(result.charChoice) + result.charChoice^ = s + +proc len(a: TPeg): int {.inline.} = return a.sons.len +proc add(d: var TPeg, s: TPeg) {.inline.} = add(d.sons, s) + +proc addChoice(dest: var TPeg, elem: TPeg) = + var L = dest.len-1 + if L >= 0 and dest.sons[L].kind == pkCharChoice: + case elem.kind + of pkCharChoice: + dest.sons[L].charChoice^ = dest.sons[L].charChoice^ + elem.charChoice^ + of pkChar: incl(dest.sons[L].charChoice^, elem.ch) + else: add(dest, elem) + else: add(dest, elem) + +template multipleOp(k: TPegKind, localOpt: expr) = + result.kind = k + result.sons = @[] + for x in items(a): + if x.kind == k: + for y in items(x.sons): + localOpt(result, y) + else: + localOpt(result, x) + if result.len == 1: + result = result.sons[0] + +proc `/`*(a: openArray[TPeg]): TPeg = + ## constructs an ordered choice with the PEGs in `a` + multipleOp(pkOrderedChoice, addChoice) + +proc addSequence(dest: var TPeg, elem: TPeg) = + var L = dest.len-1 + if L >= 0 and dest.sons[L].kind == pkTerminal: + case elem.kind + of pkTerminal: add(dest.sons[L].term, elem.term) + of pkChar: add(dest.sons[L].term, elem.ch) + else: add(dest, elem) + else: add(dest, elem) + +proc sequence*(a: openArray[TPeg]): TPeg = + ## constructs a sequence with all the PEGs from `a` + multipleOp(pkSequence, addSequence) + +proc `?`*(a: TPeg): TPeg = + ## constructs an optional piece with the PEG `a` + if a.kind in {pkOption, pkGreedyRep, pkGreedyAny, pkGreedyRepChar, + pkGreedyRepSet}: + # a* ? --> a* + # a? ? --> a? + result = a + else: + result.kind = pkOption + result.sons = @[a] + +proc `*`*(a: TPeg): TPeg = + ## constructs a "greedy repetition" piece the PEG `a` + case a.kind + of pkGreedyRep, pkGreedyRepChar, pkGreedyRepSet, pkGreedyAny, pkOption: + assert false + # produces endless loop! + of pkChar: + result.kind = pkGreedyRepChar + result.ch = a.ch + of pkCharChoice: + result.kind = pkGreedyRepSet + result.charChoice = a.charChoice # copying a reference suffices! + of pkAny, pkAnyRune: + result.kind = pkGreedyAny + else: + result.kind = pkGreedyRep + result.sons = @[a] + +proc `+`*(a: TPeg): TPeg = + ## constructs a "greedy positive repetition" with the PEG `a` + return sequence(a, *a) + +proc `&`*(a: TPeg): TPeg = + ## constructs an "and predicate" with the PEG `a` + result.kind = pkAndPredicate + result.sons = @[a] + +proc `!`*(a: TPeg): TPeg = + ## constructs a "not predicate" with the PEG `a` + result.kind = pkNotPredicate + result.sons = @[a] + +proc any*: TPeg {.inline.} = + ## constructs the PEG `any character`:idx: (``.``) + result.kind = pkAny + +proc anyRune*: TPeg {.inline.} = + ## constructs the PEG `any rune`:idx: (``_``) + result.kind = pkAnyRune + +proc newLine*: TPeg {.inline.} = + ## constructs the PEG `newline`:idx: (``\n``) + result.kind = pkNewline + +proc capture*(a: TPeg): TPeg = + ## constructs a capture with the PEG `a` + result.kind = pkCapture + result.sons = @[a] + +proc spaceCost(n: TPeg): int = + case n.kind + of pkEmpty: nil + of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle, pkChar, + pkGreedyRepChar, pkCharChoice, pkGreedyRepSet, pkAny, pkAnyRune, + pkNewLine, pkGreedyAny: + result = 1 + of pkNonTerminal: + # we cannot inline a rule with a non-terminal + result = InlineThreshold+1 + else: + for i in 0..n.len-1: + inc(result, spaceCost(n.sons[i])) + if result >= InlineThreshold: break + +proc nonterminal*(n: PNonTerminal): TPeg = + ## constructs a PEG that consists of the nonterminal symbol + assert n != nil + if ntDeclared in n.flags and spaceCost(n.rule) < InlineThreshold: + when false: echo "inlining symbol: ", n.name + result = n.rule # inlining of rule enables better optimizations + else: + result.kind = pkNonTerminal + result.nt = n + +proc newNonTerminal*(name: string, line, column: int): PNonTerminal = + ## constructs a nonterminal symbol + new(result) + result.name = name + result.line = line + result.col = column + +template letters*: expr = + ## expands to ``charset({'A'..'Z', 'a'..'z'})`` + charset({'A'..'Z', 'a'..'z'}) + +template digits*: expr = + ## expands to ``charset({'0'..'9'})`` + charset({'0'..'9'}) + +template whitespace*: expr = + ## expands to ``charset({' ', '\9'..'\13'})`` + charset({' ', '\9'..'\13'}) + +template identChars*: expr = + ## expands to ``charset({'a'..'z', 'A'..'Z', '0'..'9', '_'})`` + charset({'a'..'z', 'A'..'Z', '0'..'9', '_'}) + +template identStartChars*: expr = + ## expands to ``charset({'A'..'Z', 'a'..'z', '_'})`` + charset({'a'..'z', 'A'..'Z', '_'}) + +template ident*: expr = + ## same as ``[a-zA-Z_][a-zA-z_0-9]*``; standard identifier + sequence(charset({'a'..'z', 'A'..'Z', '_'}), + *charset({'a'..'z', 'A'..'Z', '0'..'9', '_'})) + +template natural*: expr = + ## same as ``\d+`` + +digits + +const + MaxSubpatterns* = 10 ## defines the maximum number of subpatterns that + ## can be captured. More subpatterns cannot be captured! + +# --------------------- core engine ------------------------------------------- + +type + TMatchClosure {.final.} = object + matches: array[0..maxSubpatterns-1, tuple[first, last: int]] + ml: int + +when not useUnicode: + type + TRune = char + template fastRuneAt(s, i, ch: expr) = + ch = s[i] + inc(i) + template runeLenAt(s, i: expr): expr = 1 + +proc m(s: string, p: TPeg, start: int, c: var TMatchClosure): int = + ## this implements a simple PEG interpreter. Thanks to superoperators it + ## has competitive performance nevertheless. + ## Returns -1 if it does not match, else the length of the match + case p.kind + of pkEmpty: result = 0 # match of length 0 + of pkAny: + if s[start] != '\0': result = 1 + else: result = -1 + of pkAnyRune: + if s[start] != '\0': + result = runeLenAt(s, start) + else: + result = -1 + of pkGreedyAny: + result = len(s) - start + of pkNewLine: + if s[start] == '\L': result = 1 + elif s[start] == '\C': + if s[start+1] == '\L': result = 2 + else: result = 1 + else: result = -1 + of pkTerminal: + result = len(p.term) + for i in 0..result-1: + if p.term[i] != s[start+i]: + result = -1 + break + of pkTerminalIgnoreCase: + var + i = 0 + a, b: TRune + result = start + while i < len(p.term): + fastRuneAt(p.term, i, a) + fastRuneAt(s, result, b) + if toLower(a) != toLower(b): + result = -1 + break + dec(result, start) + of pkTerminalIgnoreStyle: + var + i = 0 + a, b: TRune + result = start + while i < len(p.term): + while true: + fastRuneAt(p.term, i, a) + if a != TRune('_'): break + while true: + fastRuneAt(s, result, b) + if b != TRune('_'): break + if toLower(a) != toLower(b): + result = -1 + break + dec(result, start) + of pkChar: + if p.ch == s[start]: result = 1 + else: result = -1 + of pkCharChoice: + if contains(p.charChoice^, s[start]): result = 1 + else: result = -1 + of pkNonTerminal: + var oldMl = c.ml + when false: echo "enter: ", p.nt.name + result = m(s, p.nt.rule, start, c) + when false: echo "leave: ", p.nt.name + if result < 0: c.ml = oldMl + of pkSequence: + var oldMl = c.ml + result = 0 + for i in 0..high(p.sons): + var x = m(s, p.sons[i], start+result, c) + if x < 0: + c.ml = oldMl + result = -1 + break + else: inc(result, x) + of pkOrderedChoice: + var oldMl = c.ml + for i in 0..high(p.sons): + result = m(s, p.sons[i], start, c) + if result >= 0: break + c.ml = oldMl + of pkGreedyRep: + result = 0 + while true: + var x = m(s, p.sons[0], start+result, c) + # if x == 0, we have an endless loop; so the correct behaviour would be + # not to break. But endless loops can be easily introduced: + # ``(comment / \w*)*`` is such an example. Breaking for x == 0 does the + # expected thing in this case. + if x <= 0: break + inc(result, x) + of pkGreedyRepChar: + result = 0 + var ch = p.ch + while ch == s[start+result]: inc(result) + of pkGreedyRepSet: + result = 0 + while contains(p.charChoice^, s[start+result]): inc(result) + of pkOption: + result = max(0, m(s, p.sons[0], start, c)) + of pkAndPredicate: + var oldMl = c.ml + result = m(s, p.sons[0], start, c) + if result >= 0: result = 0 # do not consume anything + else: c.ml = oldMl + of pkNotPredicate: + var oldMl = c.ml + result = m(s, p.sons[0], start, c) + if result < 0: result = 0 + else: + c.ml = oldMl + result = -1 + of pkCapture: + var idx = c.ml # reserve a slot for the subpattern + inc(c.ml) + result = m(s, p.sons[0], start, c) + if result >= 0: + if idx < maxSubpatterns: + c.matches[idx] = (start, start+result-1) + #else: silently ignore the capture + else: + c.ml = idx + of pkRule, pkList: assert false + +proc match*(s: string, pattern: TPeg, matches: var openarray[string], + start = 0): bool = + ## returns ``true`` if ``s[start..]`` matches the ``pattern`` and + ## the captured substrings in the array ``matches``. If it does not + ## match, nothing is written into ``matches`` and ``false`` is + ## returned. + var c: TMatchClosure + result = m(s, pattern, start, c) == len(s) + if result: + for i in 0..c.ml-1: + matches[i] = copy(s, c.matches[i][0], c.matches[i][1]) + +proc match*(s: string, pattern: TPeg, start = 0): bool = + ## returns ``true`` if ``s`` matches the ``pattern`` beginning from ``start``. + var c: TMatchClosure + result = m(s, pattern, start, c) == len(s) + +proc matchLen*(s: string, pattern: TPeg, matches: var openarray[string], + start = 0): int = + ## the same as ``match``, but it returns the length of the match, + ## if there is no match, -1 is returned. Note that a match length + ## of zero can happen. It's possible that a suffix of `s` remains + ## that does not belong to the match. + var c: TMatchClosure + result = m(s, pattern, start, c) + if result >= 0: + for i in 0..c.ml-1: + matches[i] = copy(s, c.matches[i][0], c.matches[i][1]) + +proc matchLen*(s: string, pattern: TPeg, start = 0): int = + ## the same as ``match``, but it returns the length of the match, + ## if there is no match, -1 is returned. Note that a match length + ## of zero can happen. It's possible that a suffix of `s` remains + ## that does not belong to the match. + var c: TMatchClosure + result = m(s, pattern, start, c) + +proc find*(s: string, pattern: TPeg, matches: var openarray[string], + start = 0): int = + ## returns the starting position of ``pattern`` in ``s`` and the captured + ## substrings in the array ``matches``. If it does not match, nothing + ## is written into ``matches`` and -1 is returned. + for i in 0 .. s.len-1: + if matchLen(s, pattern, matches, i) >= 0: return i + return -1 + # could also use the pattern here: (!P .)* P + +proc find*(s: string, pattern: TPeg, start = 0): int = + ## returns the starting position of ``pattern`` in ``s``. If it does not + ## match, -1 is returned. + for i in 0 .. s.len-1: + if matchLen(s, pattern, i) >= 0: return i + return -1 + +template `=~`*(s: string, pattern: TPeg): expr = + ## This calls ``match`` with an implicit declared ``matches`` array that + ## can be used in the scope of the ``=~`` call: + ## + ## .. code-block:: nimrod + ## + ## if line =~ peg"\s* {\w+} \s* '=' \s* {\w+}": + ## # matches a key=value pair: + ## echo("Key: ", matches[0]) + ## echo("Value: ", matches[1]) + ## elif line =~ peg"\s*{'#'.*}": + ## # matches a comment + ## # note that the implicit ``matches`` array is different from the + ## # ``matches`` array of the first branch + ## echo("comment: ", matches[0]) + ## else: + ## echo("syntax error") + ## + when not definedInScope(matches): + var matches: array[0..maxSubpatterns-1, string] + match(s, pattern, matches) + +# ------------------------- more string handling ------------------------------ + +proc contains*(s: string, pattern: TPeg, start = 0): bool = + ## same as ``find(s, pattern, start) >= 0`` + return find(s, pattern, start) >= 0 + +proc contains*(s: string, pattern: TPeg, matches: var openArray[string], + start = 0): bool = + ## same as ``find(s, pattern, matches, start) >= 0`` + return find(s, pattern, matches, start) >= 0 + +proc startsWith*(s: string, prefix: TPeg): bool = + ## returns true if `s` starts with the pattern `prefix` + result = matchLen(s, prefix) >= 0 + +proc endsWith*(s: string, suffix: TPeg): bool = + ## returns true if `s` ends with the pattern `prefix` + for i in 0 .. s.len-1: + if matchLen(s, suffix, i) == s.len - i: return true + +proc replace*(s: string, sub: TPeg, by: string): string = + ## Replaces `sub` in `s` by the string `by`. Captures can be accessed in `by` + ## with the notation ``$i`` and ``$#`` (see strutils.`%`). Examples: + ## + ## .. code-block:: nimrod + ## "var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## + ## "var1<-keykey; val2<-key2key2" + result = "" + var i = 0 + var caps: array[0..maxSubpatterns-1, string] + while i < s.len: + var x = matchLen(s, sub, caps, i) + if x <= 0: + add(result, s[i]) + inc(i) + else: + addf(result, by, caps) + inc(i, x) + # copy the rest: + add(result, copy(s, i)) + +proc parallelReplace*(s: string, subs: openArray[ + tuple[pattern: TPeg, repl: string]]): string = + ## Returns a modified copy of `s` with the substitutions in `subs` + ## applied in parallel. + result = "" + var i = 0 + var caps: array[0..maxSubpatterns-1, string] + while i < s.len: + block searchSubs: + for j in 0..high(subs): + var x = matchLen(s, subs[j][0], caps, i) + if x > 0: + addf(result, subs[j][1], caps) + inc(i, x) + break searchSubs + add(result, s[i]) + inc(i) + # copy the rest: + add(result, copy(s, i)) + +proc transformFile*(infile, outfile: string, + subs: openArray[tuple[pattern: TPeg, repl: string]]) = + ## reads in the file `infile`, performs a parallel replacement (calls + ## `parallelReplace`) and writes back to `outfile`. Calls ``quit`` if an + ## error occurs. This is supposed to be used for quick scripting. + var x = readFile(infile) + if not isNil(x): + var f: TFile + if open(f, outfile, fmWrite): + write(f, x.parallelReplace(subs)) + close(f) + else: + quit("cannot open for writing: " & outfile) + else: + quit("cannot open for reading: " & infile) + +iterator split*(s: string, sep: TPeg): string = + ## Splits the string `s` into substrings. + ## + ## Substrings are separated by the PEG `sep`. + ## Examples: + ## + ## .. code-block:: nimrod + ## for word in split("00232this02939is39an22example111", peg"\d+"): + ## writeln(stdout, word) + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## "this" + ## "is" + ## "an" + ## "example" + ## + var + first = 0 + last = 0 + while last < len(s): + var x = matchLen(s, sep, last) + if x > 0: inc(last, x) + first = last + while last < len(s): + inc(last) + x = matchLen(s, sep, last) + if x > 0: break + if first < last: + yield copy(s, first, last-1) + +proc split*(s: string, sep: TPeg): seq[string] {.noSideEffect.} = + ## Splits the string `s` into substrings. + accumulateResult(split(s, sep)) + +# ------------------------- debugging ----------------------------------------- + +proc esc(c: char, reserved = {'\0'..'\255'}): string = + case c + of '\b': result = "\\b" + of '\t': result = "\\t" + of '\c': result = "\\c" + of '\L': result = "\\l" + of '\v': result = "\\v" + of '\f': result = "\\f" + of '\e': result = "\\e" + of '\a': result = "\\a" + of '\\': result = "\\\\" + of 'a'..'z', 'A'..'Z', '0'..'9', '_': result = $c + elif c < ' ' or c >= '\128': result = '\\' & $ord(c) + elif c in reserved: result = '\\' & c + else: result = $c + +proc singleQuoteEsc(c: Char): string = return "'" & esc(c, {'\''}) & "'" + +proc singleQuoteEsc(str: string): string = + result = "'" + for c in items(str): add result, esc(c, {'\''}) + add result, '\'' + +proc charSetEscAux(cc: set[char]): string = + const reserved = {'^', '-', ']'} + result = "" + var c1 = 0 + while c1 <= 0xff: + if chr(c1) in cc: + var c2 = c1 + while c2 < 0xff and chr(succ(c2)) in cc: inc(c2) + if c1 == c2: + add result, esc(chr(c1), reserved) + elif c2 == succ(c1): + add result, esc(chr(c1), reserved) & esc(chr(c2), reserved) + else: + add result, esc(chr(c1), reserved) & '-' & esc(chr(c2), reserved) + c1 = c2 + inc(c1) + +proc CharSetEsc(cc: set[char]): string = + if card(cc) >= 128+64: + result = "[^" & CharSetEscAux({'\1'..'\xFF'} - cc) & ']' + else: + result = '[' & CharSetEscAux(cc) & ']' + +proc toStrAux(r: TPeg, res: var string) = + case r.kind + of pkEmpty: add(res, "()") + of pkAny: add(res, '.') + of pkAnyRune: add(res, '_') + of pkNewline: add(res, "\\n") + of pkTerminal: add(res, singleQuoteEsc(r.term)) + of pkTerminalIgnoreCase: + add(res, 'i') + add(res, singleQuoteEsc(r.term)) + of pkTerminalIgnoreStyle: + add(res, 'y') + add(res, singleQuoteEsc(r.term)) + of pkChar: add(res, singleQuoteEsc(r.ch)) + of pkCharChoice: add(res, charSetEsc(r.charChoice^)) + of pkNonTerminal: add(res, r.nt.name) + of pkSequence: + add(res, '(') + toStrAux(r.sons[0], res) + for i in 1 .. high(r.sons): + add(res, ' ') + toStrAux(r.sons[i], res) + add(res, ')') + of pkOrderedChoice: + add(res, '(') + toStrAux(r.sons[0], res) + for i in 1 .. high(r.sons): + add(res, " / ") + toStrAux(r.sons[i], res) + add(res, ')') + of pkGreedyRep: + toStrAux(r.sons[0], res) + add(res, '*') + of pkGreedyRepChar: + add(res, singleQuoteEsc(r.ch)) + add(res, '*') + of pkGreedyRepSet: + add(res, charSetEsc(r.charChoice^)) + add(res, '*') + of pkGreedyAny: + add(res, ".*") + of pkOption: + toStrAux(r.sons[0], res) + add(res, '?') + of pkAndPredicate: + add(res, '&') + toStrAux(r.sons[0], res) + of pkNotPredicate: + add(res, '!') + toStrAux(r.sons[0], res) + of pkCapture: + add(res, '{') + toStrAux(r.sons[0], res) + add(res, '}') + of pkRule: + toStrAux(r.sons[0], res) + add(res, " <- ") + toStrAux(r.sons[1], res) + of pkList: + for i in 0 .. high(r.sons): + toStrAux(r.sons[i], res) + add(res, "\n") + +proc `$` *(r: TPeg): string = + ## converts a PEG to its string representation + result = "" + toStrAux(r, result) + + +# ------------------- scanner ------------------------------------------------- + +type + TModifier = enum + modNone, + modVerbatim, + modIgnoreCase, + modIgnoreStyle + TTokKind = enum ## enumeration of all tokens + tkInvalid, ## invalid token + tkEof, ## end of file reached + tkAny, ## . + tkAnyRune, ## _ + tkIdentifier, ## abc + tkStringLit, ## "abc" or 'abc' + tkCharSet, ## [^A-Z] + tkParLe, ## '(' + tkParRi, ## ')' + tkCurlyLe, ## '{' + tkCurlyRi, ## '}' + tkArrow, ## '<-' + tkBar, ## '/' + tkStar, ## '*' + tkPlus, ## '+' + tkAmp, ## '&' + tkNot, ## '!' + tkOption, ## '?' + tkBuiltin, ## \identifier + tkEscaped ## \\ + + TToken {.final.} = object ## a token + kind: TTokKind ## the type of the token + modifier: TModifier + literal: string ## the parsed (string) literal + charset: set[char] ## if kind == tkCharSet + + TPegLexer = object ## the lexer object. + bufpos: int ## the current position within the buffer + buf: cstring ## the buffer itself + LineNumber: int ## the current line number + lineStart: int ## index of last line start in buffer + colOffset: int ## column to add + filename: string + +const + tokKindToStr: array[TTokKind, string] = [ + "invalid", "[EOF]", ".", "_", "identifier", "string literal", + "character set", "(", ")", "{", "}", "<-", "/", "*", "+", "&", "!", "?", + "built-in", "escaped" + ] + +proc HandleCR(L: var TPegLexer, pos: int): int = + assert(L.buf[pos] == '\c') + inc(L.linenumber) + result = pos+1 + if L.buf[result] == '\L': inc(result) + L.lineStart = result + +proc HandleLF(L: var TPegLexer, pos: int): int = + assert(L.buf[pos] == '\L') + inc(L.linenumber) + result = pos+1 + L.lineStart = result + +proc init(L: var TPegLexer, input, filename: string, line = 1, col = 0) = + L.buf = input + L.bufpos = 0 + L.lineNumber = line + L.colOffset = col + L.lineStart = 0 + L.filename = filename + +proc getColumn(L: TPegLexer): int {.inline.} = + result = abs(L.bufpos - L.lineStart) + L.colOffset + +proc getLine(L: TPegLexer): int {.inline.} = + result = L.linenumber + +proc errorStr(L: TPegLexer, msg: string, line = -1, col = -1): string = + var line = if line < 0: getLine(L) else: line + var col = if col < 0: getColumn(L) else: col + result = "$1($2, $3) Error: $4" % [L.filename, $line, $col, msg] + +proc handleHexChar(c: var TPegLexer, xi: var int) = + case c.buf[c.bufpos] + of '0'..'9': + xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0')) + inc(c.bufpos) + of 'a'..'f': + xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10) + inc(c.bufpos) + of 'A'..'F': + xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10) + inc(c.bufpos) + else: nil + +proc getEscapedChar(c: var TPegLexer, tok: var TToken) = + inc(c.bufpos) + case c.buf[c.bufpos] + of 'r', 'R', 'c', 'C': + add(tok.literal, '\c') + Inc(c.bufpos) + of 'l', 'L': + add(tok.literal, '\L') + Inc(c.bufpos) + of 'f', 'F': + add(tok.literal, '\f') + inc(c.bufpos) + of 'e', 'E': + add(tok.literal, '\e') + Inc(c.bufpos) + of 'a', 'A': + add(tok.literal, '\a') + Inc(c.bufpos) + of 'b', 'B': + add(tok.literal, '\b') + Inc(c.bufpos) + of 'v', 'V': + add(tok.literal, '\v') + Inc(c.bufpos) + of 't', 'T': + add(tok.literal, '\t') + Inc(c.bufpos) + of 'x', 'X': + inc(c.bufpos) + var xi = 0 + handleHexChar(c, xi) + handleHexChar(c, xi) + if xi == 0: tok.kind = tkInvalid + else: add(tok.literal, Chr(xi)) + of '0'..'9': + var val = ord(c.buf[c.bufpos]) - ord('0') + Inc(c.bufpos) + var i = 1 + while (i <= 3) and (c.buf[c.bufpos] in {'0'..'9'}): + val = val * 10 + ord(c.buf[c.bufpos]) - ord('0') + inc(c.bufpos) + inc(i) + if val > 0 and val <= 255: add(tok.literal, chr(val)) + else: tok.kind = tkInvalid + of '\0'..'\31': + tok.kind = tkInvalid + elif c.buf[c.bufpos] in strutils.letters: + tok.kind = tkInvalid + else: + add(tok.literal, c.buf[c.bufpos]) + Inc(c.bufpos) + +proc skip(c: var TPegLexer) = + var pos = c.bufpos + var buf = c.buf + while true: + case buf[pos] + of ' ', '\t': + Inc(pos) + of '#': + while not (buf[pos] in {'\c', '\L', '\0'}): inc(pos) + of '\c': + pos = HandleCR(c, pos) + buf = c.buf + of '\L': + pos = HandleLF(c, pos) + buf = c.buf + else: + break # EndOfFile also leaves the loop + c.bufpos = pos + +proc getString(c: var TPegLexer, tok: var TToken) = + tok.kind = tkStringLit + var pos = c.bufPos + 1 + var buf = c.buf + var quote = buf[pos-1] + while true: + case buf[pos] + of '\\': + c.bufpos = pos + getEscapedChar(c, tok) + pos = c.bufpos + of '\c', '\L', '\0': + tok.kind = tkInvalid + break + elif buf[pos] == quote: + inc(pos) + break + else: + add(tok.literal, buf[pos]) + Inc(pos) + c.bufpos = pos + +proc getCharSet(c: var TPegLexer, tok: var TToken) = + tok.kind = tkCharSet + tok.charset = {} + var pos = c.bufPos + 1 + var buf = c.buf + var caret = false + if buf[pos] == '^': + inc(pos) + caret = true + while true: + var ch: char + case buf[pos] + of ']': + inc(pos) + break + of '\\': + c.bufpos = pos + getEscapedChar(c, tok) + pos = c.bufpos + ch = tok.literal[tok.literal.len-1] + of '\C', '\L', '\0': + tok.kind = tkInvalid + break + else: + ch = buf[pos] + Inc(pos) + incl(tok.charset, ch) + if buf[pos] == '-': + if buf[pos+1] == ']': + incl(tok.charset, '-') + inc(pos) + else: + inc(pos) + var ch2: char + case buf[pos] + of '\\': + c.bufpos = pos + getEscapedChar(c, tok) + pos = c.bufpos + ch2 = tok.literal[tok.literal.len-1] + of '\C', '\L', '\0': + tok.kind = tkInvalid + break + else: + ch2 = buf[pos] + Inc(pos) + for i in ord(ch)+1 .. ord(ch2): + incl(tok.charset, chr(i)) + c.bufpos = pos + if caret: tok.charset = {'\1'..'\xFF'} - tok.charset + +proc getSymbol(c: var TPegLexer, tok: var TToken) = + var pos = c.bufpos + var buf = c.buf + while true: + add(tok.literal, buf[pos]) + Inc(pos) + if buf[pos] notin strutils.IdentChars: break + c.bufpos = pos + tok.kind = tkIdentifier + +proc getBuiltin(c: var TPegLexer, tok: var TToken) = + if c.buf[c.bufpos+1] in strutils.Letters: + inc(c.bufpos) + getSymbol(c, tok) + tok.kind = tkBuiltin + else: + tok.kind = tkEscaped + getEscapedChar(c, tok) # may set tok.kind to tkInvalid + +proc getTok(c: var TPegLexer, tok: var TToken) = + tok.kind = tkInvalid + tok.modifier = modNone + setlen(tok.literal, 0) + skip(c) + case c.buf[c.bufpos] + of '{': + tok.kind = tkCurlyLe + inc(c.bufpos) + add(tok.literal, '{') + of '}': + tok.kind = tkCurlyRi + inc(c.bufpos) + add(tok.literal, '}') + of '[': + getCharset(c, tok) + of '(': + tok.kind = tkParLe + Inc(c.bufpos) + add(tok.literal, '(') + of ')': + tok.kind = tkParRi + Inc(c.bufpos) + add(tok.literal, ')') + of '.': + tok.kind = tkAny + inc(c.bufpos) + add(tok.literal, '.') + of '_': + tok.kind = tkAnyRune + inc(c.bufpos) + add(tok.literal, '_') + of '\\': + getBuiltin(c, tok) + of '\'', '"': getString(c, tok) + of '\0': + tok.kind = tkEof + tok.literal = "[EOF]" + of 'a'..'z', 'A'..'Z', '\128'..'\255': + getSymbol(c, tok) + if c.buf[c.bufpos] in {'\'', '"'}: + case tok.literal + of "i": tok.modifier = modIgnoreCase + of "y": tok.modifier = modIgnoreStyle + of "v": tok.modifier = modVerbatim + else: nil + setLen(tok.literal, 0) + getString(c, tok) + if tok.modifier == modNone: tok.kind = tkInvalid + of '+': + tok.kind = tkPlus + inc(c.bufpos) + add(tok.literal, '+') + of '*': + tok.kind = tkStar + inc(c.bufpos) + add(tok.literal, '+') + of '<': + if c.buf[c.bufpos+1] == '-': + inc(c.bufpos, 2) + tok.kind = tkArrow + add(tok.literal, "<-") + else: + add(tok.literal, '<') + of '/': + tok.kind = tkBar + inc(c.bufpos) + add(tok.literal, '/') + of '?': + tok.kind = tkOption + inc(c.bufpos) + add(tok.literal, '?') + of '!': + tok.kind = tkNot + inc(c.bufpos) + add(tok.literal, '!') + of '&': + tok.kind = tkAmp + inc(c.bufpos) + add(tok.literal, '!') + else: + add(tok.literal, c.buf[c.bufpos]) + inc(c.bufpos) + +proc arrowIsNextTok(c: TPegLexer): bool = + # the only look ahead we need + var pos = c.bufpos + while c.buf[pos] in {'\t', ' '}: inc(pos) + result = c.buf[pos] == '<' and c.buf[pos+1] == '-' + +# ----------------------------- parser ---------------------------------------- + +type + EInvalidPeg* = object of EBase ## raised if an invalid PEG has been detected + TPegParser = object of TPegLexer ## the PEG parser object + tok: TToken + nonterms: seq[PNonTerminal] + modifier: TModifier + +proc getTok(p: var TPegParser) = getTok(p, p.tok) + +proc pegError(p: TPegParser, msg: string, line = -1, col = -1) = + var e: ref EInvalidPeg + new(e) + e.msg = errorStr(p, msg, line, col) + raise e + +proc eat(p: var TPegParser, kind: TTokKind) = + if p.tok.kind == kind: getTok(p) + else: pegError(p, tokKindToStr[kind] & " expected") + +proc parseExpr(p: var TPegParser): TPeg + +proc getNonTerminal(p: TPegParser, name: string): PNonTerminal = + for i in 0..high(p.nonterms): + result = p.nonterms[i] + if cmpIgnoreStyle(result.name, name) == 0: return + # forward reference: + result = newNonTerminal(name, getLine(p), getColumn(p)) + add(p.nonterms, result) + +proc modifiedTerm(s: string, m: TModifier): TPeg = + case m + of modNone, modVerbatim: result = term(s) + of modIgnoreCase: result = termIgnoreCase(s) + of modIgnoreStyle: result = termIgnoreStyle(s) + +proc primary(p: var TPegParser): TPeg = + case p.tok.kind + of tkAmp: + getTok(p) + return &primary(p) + of tkNot: + getTok(p) + return !primary(p) + else: nil + case p.tok.kind + of tkIdentifier: + if not arrowIsNextTok(p): + var nt = getNonTerminal(p, p.tok.literal) + incl(nt.flags, ntUsed) + result = nonTerminal(nt) + getTok(p) + else: + pegError(p, "expression expected, but found: " & p.tok.literal) + of tkStringLit: + var m = p.tok.modifier + if m == modNone: m = p.modifier + result = modifiedTerm(p.tok.literal, m) + getTok(p) + of tkCharSet: + if '\0' in p.tok.charset: + pegError(p, "binary zero ('\\0') not allowed in character class") + result = charset(p.tok.charset) + getTok(p) + of tkParLe: + getTok(p) + result = parseExpr(p) + eat(p, tkParRi) + of tkCurlyLe: + getTok(p) + result = capture(parseExpr(p)) + eat(p, tkCurlyRi) + of tkAny: + result = any() + getTok(p) + of tkAnyRune: + result = anyRune() + getTok(p) + of tkBuiltin: + case p.tok.literal + of "n": result = newLine() + of "d": result = charset({'0'..'9'}) + of "D": result = charset({'\1'..'\xff'} - {'0'..'9'}) + of "s": result = charset({' ', '\9'..'\13'}) + of "S": result = charset({'\1'..'\xff'} - {' ', '\9'..'\13'}) + of "w": result = charset({'a'..'z', 'A'..'Z', '_'}) + of "W": result = charset({'\1'..'\xff'} - {'a'..'z', 'A'..'Z', '_'}) + of "ident": result = pegs.ident + else: pegError(p, "unknown built-in: " & p.tok.literal) + getTok(p) + of tkEscaped: + result = term(p.tok.literal[0]) + getTok(p) + else: + pegError(p, "expression expected, but found: " & p.tok.literal) + getTok(p) # we must consume a token here to prevent endless loops! + while true: + case p.tok.kind + of tkOption: + result = ?result + getTok(p) + of tkStar: + result = *result + getTok(p) + of tkPlus: + result = +result + getTok(p) + else: break + +proc seqExpr(p: var TPegParser): TPeg = + result = primary(p) + while true: + case p.tok.kind + of tkAmp, tkNot, tkStringLit, tkCharset, tkParLe, tkCurlyLe, + tkAny, tkAnyRune, tkBuiltin, tkEscaped: + result = sequence(result, primary(p)) + of tkIdentifier: + if not arrowIsNextTok(p): + result = sequence(result, primary(p)) + else: break + else: break + +proc parseExpr(p: var TPegParser): TPeg = + result = seqExpr(p) + while p.tok.kind == tkBar: + getTok(p) + result = result / seqExpr(p) + +proc parseRule(p: var TPegParser): PNonTerminal = + if p.tok.kind == tkIdentifier and arrowIsNextTok(p): + result = getNonTerminal(p, p.tok.literal) + if ntDeclared in result.flags: + pegError(p, "attempt to redefine: " & result.name) + result.line = getLine(p) + result.col = getColumn(p) + getTok(p) + eat(p, tkArrow) + result.rule = parseExpr(p) + incl(result.flags, ntDeclared) # NOW inlining may be attempted + else: + pegError(p, "rule expected, but found: " & p.tok.literal) + +proc rawParse(p: var TPegParser): TPeg = + ## parses a rule or a PEG expression + if p.tok.kind == tkBuiltin: + case p.tok.literal + of "i": + p.modifier = modIgnoreCase + getTok(p) + of "y": + p.modifier = modIgnoreStyle + getTok(p) + else: nil + if p.tok.kind == tkIdentifier and arrowIsNextTok(p): + result = parseRule(p).rule + while p.tok.kind != tkEof: + discard parseRule(p) + else: + result = parseExpr(p) + if p.tok.kind != tkEof: + pegError(p, "EOF expected, but found: " & p.tok.literal) + for i in 0..high(p.nonterms): + var nt = p.nonterms[i] + if ntDeclared notin nt.flags: + pegError(p, "undeclared identifier: " & nt.name, nt.line, nt.col) + elif ntUsed notin nt.flags and i > 0: + pegError(p, "unused rule: " & nt.name, nt.line, nt.col) + +proc parsePeg*(input: string, filename = "pattern", line = 1, col = 0): TPeg = + var p: TPegParser + init(TPegLexer(p), input, filename, line, col) + p.tok.kind = tkInvalid + p.tok.modifier = modNone + p.tok.literal = "" + p.tok.charset = {} + p.nonterms = @[] + getTok(p) + result = rawParse(p) + +proc peg*(pattern: string): TPeg = + ## constructs a TPeg object from the `pattern`. The short name has been + ## chosen to encourage its use as a raw string modifier:: + ## + ## peg"{\ident} \s* '=' \s* {.*}" + result = parsePeg(pattern, "pattern") + +when isMainModule: + assert match("W_HI_Le", peg"\y 'while'") + assert(not match("W_HI_L", peg"\y 'while'")) + assert(not match("W_HI_Le", peg"\y v'while'")) + assert match("W_HI_Le", peg"y'while'") + + assert($ +digits == $peg"\d+") + assert "0158787".match(peg"\d+") + assert "ABC 0232".match(peg"\w+\s+\d+") + assert "ABC".match(peg"\d+ / \w+") + + for word in split("00232this02939is39an22example111", peg"\d+"): + writeln(stdout, word) + + assert matchLen("key", ident) == 3 + + var pattern = sequence(ident, *whitespace, term('='), *whitespace, ident) + assert matchLen("key1= cal9", pattern) == 11 + + var ws = newNonTerminal("ws", 1, 1) + ws.rule = *whitespace + + var expr = newNonTerminal("expr", 1, 1) + expr.rule = sequence(capture(ident), *sequence( + nonterminal(ws), term('+'), nonterminal(ws), nonterminal(expr))) + + var c: TMatchClosure + var s = "a+b + c +d+e+f" + assert m(s, expr.rule, 0, c) == len(s) + var a = "" + for i in 0..c.ml-1: + a.add(copy(s, c.matches[i][0], c.matches[i][1])) + assert a == "abcdef" + #echo expr.rule + + #const filename = "lib/devel/peg/grammar.txt" + #var grammar = parsePeg(newFileStream(filename, fmRead), filename) + #echo "a <- [abc]*?".match(grammar) + assert find("_____abc_______", term("abc")) == 5 + assert match("_______ana", peg"A <- 'ana' / . A") + assert match("abcs%%%", peg"A <- ..A / .A / '%'") + + if "abc" =~ peg"{'a'}'bc' 'xyz' / {\ident}": + assert matches[0] == "abc" + else: + assert false + + var g2 = peg"""S <- A B / C D + A <- 'a'+ + B <- 'b'+ + C <- 'c'+ + D <- 'd'+ + """ + assert($g2 == "((A B) / (C D))") + assert match("cccccdddddd", g2) + echo "var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") + assert "var1=key; var2=key2".endsWith(peg"{\ident}'='{\ident}") + diff --git a/lib/pure/re.nim b/lib/pure/re.nim new file mode 100755 index 000000000..f854c07e5 --- /dev/null +++ b/lib/pure/re.nim @@ -0,0 +1,353 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Regular expression support for Nimrod. Consider using the pegs module +## instead. + +{.compile: "tre/tre_all.c".} + +from strutils import addf + +type + TRegExDesc {.pure, final.} = object + re_nsub: int # Number of parenthesized subexpressions. + value: pointer # For internal use only. + + TRegEx* = ref TRegExDesc ## a compiled regular expression + EInvalidRegEx* = object of EInvalidValue + ## is raised if the pattern is no valid regular expression. + + TRegMatch {.pure.} = object + so, eo: cint + +const + MaxSubpatterns* = 10 + ## defines the maximum number of subpatterns that can be captured. + ## More subpatterns cannot be captured! + +proc regnexec(preg: ptr TRegExDesc, s: cstring, len, nmatch: int, + pmatch: ptr array [0..maxSubpatterns-1, TRegMatch], + eflags: cint): cint {.importc.} +proc regncomp(preg: ptr TRegExDesc, regex: cstring, n: int, + cflags: cint): cint {.importc.} +proc regfree(preg: ptr TRegExDesc) {.importc.} + +const + # POSIX regcomp() flags + REG_EXTENDED = 1 + REG_ICASE = (REG_EXTENDED shl 1) + REG_NEWLINE = (REG_ICASE shl 1) + REG_NOSUB = (REG_NEWLINE shl 1) + # Extra regcomp() flags + REG_BASIC = 0 + REG_LITERAL = (REG_NOSUB shl 1) + REG_RIGHT_ASSOC = (REG_LITERAL shl 1) + REG_UNGREEDY = (REG_RIGHT_ASSOC shl 1) + + # POSIX regexec() flags + REG_NOTBOL = 1 + REG_NOTEOL = (REG_NOTBOL shl 1) + + # Extra regexec() flags + REG_APPROX_MATCHER = (REG_NOTEOL shl 1) + REG_BACKTRACKING_MATCHER = (REG_APPROX_MATCHER shl 1) + + ErrorMessages = [ + "No error", + "No match", + "Invalid regexp", + "Unknown collating element", + "Unknown character class name", + "Trailing backslash", + "Invalid back reference", + "Missing ']'", + "Missing ')'", + "Missing '}'", + "Invalid contents of {}", + "Invalid character range", + "Out of memory", + "Invalid use of repetition operators" + ] + +proc finalizeRegEx(x: TRegEx) = regfree(addr(x^)) + +proc re*(s: string): TRegEx = + ## Constructor of regular expressions. Note that Nimrod's + ## extended raw string literals supports this syntax ``re"[abc]"`` as + ## a short form for ``re(r"[abc]")``. + new(result, finalizeRegEx) + var err = int(regncomp(addr(result^), s, s.len, + cint(REG_EXTENDED or REG_NEWLINE))) + if err != 0: + var e: ref EInvalidRegEx + new(e) + e.msg = ErrorMessages[err] + raise e + +proc xre*(pattern: string): TRegEx = + ## deletes whitespace from a pattern that is not escaped or in a character + ## class. Then it constructs a regular expresion object via `re`. + ## This is modelled after Perl's ``/x`` modifier. + var p = "" + var i = 0 + while i < pattern.len: + case pattern[i] + of ' ', '\t': + inc i + of '\\': + add p, '\\' + add p, pattern[i+1] + inc i, 2 + of '[': + while pattern[i] != ']' and pattern[i] != '\0': + add p, pattern[i] + inc i + else: + add p, pattern[i] + inc i + result = re(p) + +proc rawmatch(s: string, pattern: TRegEx, matches: var openarray[string], + start: int): tuple[first, last: int] = + var + rawMatches: array [0..maxSubpatterns-1, TRegMatch] + cs = cstring(s) + res = int(regnexec(addr(pattern^), cast[cstring](addr(cs[start])), + s.len-start, maxSubpatterns, addr(rawMatches), cint(0))) + if res == 0: + for i in 0..min(matches.len, int(pattern.re_nsub))-1: + var + a = int(rawMatches[i].so) + b = int(rawMatches[i].eo) + echo "a: ", a, " b: ", b + if a >= 0 and b >= 0: + matches[i] = copy(s, a+start, b - 1 + start) + else: + matches[i] = "" + return (int(rawMatches[0].so), int(rawMatches[0].eo)-1) + return (-1, -1) + +proc match*(s: string, pattern: TRegEx, matches: var openarray[string], + start = 0): bool = + ## returns ``true`` if ``s[start..]`` matches the ``pattern`` and + ## the captured substrings in the array ``matches``. If it does not + ## match, nothing is written into ``matches`` and ``false`` is + ## returned. + result = rawmatch(s, pattern, matches, start).first == 0 + +proc match*(s: string, pattern: TRegEx, start: int = 0): bool = + ## returns ``true`` if ``s`` matches the ``pattern`` beginning + ## from ``start``. + var matches: array [0..0, string] + result = rawmatch(s, pattern, matches, start).first == 0 + +proc matchLen*(s: string, pattern: TRegEx, matches: var openarray[string], + start = 0): int = + ## the same as ``match``, but it returns the length of the match, + ## if there is no match, -1 is returned. Note that a match length + ## of zero can happen. + var (a, b) = rawmatch(s, pattern, matches, start) + result = a - b + 1 + +proc matchLen*(s: string, pattern: TRegEx, start = 0): int = + ## the same as ``match``, but it returns the length of the match, + ## if there is no match, -1 is returned. Note that a match length + ## of zero can happen. + var matches: array [0..0, string] + var (a, b) = rawmatch(s, pattern, matches, start) + result = a - b + 1 + +proc find*(s: string, pattern: TRegEx, matches: var openarray[string], + start = 0): int = + ## returns ``true`` if ``pattern`` occurs in ``s`` and the captured + ## substrings in the array ``matches``. If it does not match, nothing + ## is written into ``matches``. + result = rawmatch(s, pattern, matches, start).first + if result >= 0: inc(result, start) + +proc find*(s: string, pattern: TRegEx, start = 0): int = + ## returns ``true`` if ``pattern`` occurs in ``s``. + var matches: array [0..0, string] + result = rawmatch(s, pattern, matches, start).first + if result >= 0: inc(result, start) + +template `=~`*(s: string, pattern: TRegEx): expr = + ## This calls ``match`` with an implicit declared ``matches`` array that + ## can be used in the scope of the ``=~`` call: + ## + ## .. code-block:: nimrod + ## + ## if line =~ r"\s*(\w+)\s*\=\s*(\w+)": + ## # matches a key=value pair: + ## echo("Key: ", matches[1]) + ## echo("Value: ", matches[2]) + ## elif line =~ r"\s*(\#.*)": + ## # matches a comment + ## # note that the implicit ``matches`` array is different from the + ## # ``matches`` array of the first branch + ## echo("comment: ", matches[1]) + ## else: + ## echo("syntax error") + ## + when not definedInScope(matches): + var matches: array[0..maxSubPatterns-1, string] + match(s, pattern, matches) + +# ------------------------- more string handling ------------------------------ + +proc contains*(s: string, pattern: TRegEx, start = 0): bool = + ## same as ``find(s, pattern, start) >= 0`` + return find(s, pattern, start) >= 0 + +proc contains*(s: string, pattern: TRegEx, matches: var openArray[string], + start = 0): bool = + ## same as ``find(s, pattern, matches, start) >= 0`` + return find(s, pattern, matches, start) >= 0 + +proc startsWith*(s: string, prefix: TRegEx): bool = + ## returns true if `s` starts with the pattern `prefix` + result = matchLen(s, prefix) >= 0 + +proc endsWith*(s: string, suffix: TRegEx): bool = + ## returns true if `s` ends with the pattern `prefix` + for i in 0 .. s.len-1: + if matchLen(s, suffix, i) == s.len - i: return true + +proc replace*(s: string, sub: TRegEx, by: string): string = + ## Replaces `sub` in `s` by the string `by`. Captures can be accessed in `by` + ## with the notation ``$i`` and ``$#`` (see strutils.`%`). Examples: + ## + ## .. code-block:: nimrod + ## "var1=key; var2=key2".replace(p"{\ident}'='{\ident}", "$1<-$2$2") + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## + ## "var1<-keykey; val2<-key2key2" + result = "" + var i = 0 + var caps: array[0..maxSubpatterns-1, string] + while i < s.len: + var x = matchLen(s, sub, caps, i) + if x <= 0: + add(result, s[i]) + inc(i) + else: + addf(result, by, caps) + inc(i, x) + # copy the rest: + add(result, copy(s, i)) + +proc parallelReplace*(s: string, subs: openArray[ + tuple[pattern: TRegEx, repl: string]]): string = + ## Returns a modified copy of `s` with the substitutions in `subs` + ## applied in parallel. + result = "" + var i = 0 + var caps: array[0..maxSubpatterns-1, string] + while i < s.len: + block searchSubs: + for j in 0..high(subs): + var x = matchLen(s, subs[j][0], caps, i) + if x > 0: + addf(result, subs[j][1], caps) + inc(i, x) + break searchSubs + add(result, s[i]) + inc(i) + # copy the rest: + add(result, copy(s, i)) + +proc transformFile*(infile, outfile: string, + subs: openArray[tuple[pattern: TRegEx, repl: string]]) = + ## reads in the file `infile`, performs a parallel replacement (calls + ## `parallelReplace`) and writes back to `outfile`. Calls ``quit`` if an + ## error occurs. This is supposed to be used for quick scripting. + var x = readFile(infile) + if not isNil(x): + var f: TFile + if open(f, outfile, fmWrite): + write(f, x.parallelReplace(subs)) + close(f) + else: + quit("cannot open for writing: " & outfile) + else: + quit("cannot open for reading: " & infile) + +iterator split*(s: string, sep: TRegEx): string = + ## Splits the string `s` into substrings. + ## + ## Substrings are separated by the regular expression `sep`. + ## Examples: + ## + ## .. code-block:: nimrod + ## for word in split("00232this02939is39an22example111", re"\d+"): + ## writeln(stdout, word) + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## "this" + ## "is" + ## "an" + ## "example" + ## + var + first = 0 + last = 0 + while last < len(s): + var x = matchLen(s, sep, last) + if x > 0: inc(last, x) + first = last + while last < len(s): + inc(last) + x = matchLen(s, sep, last) + if x > 0: break + if first < last: + yield copy(s, first, last-1) + +proc split*(s: string, sep: TRegEx): seq[string] = + ## Splits the string `s` into substrings. + accumulateResult(split(s, sep)) + +const ## common regular expressions + reIdentifier* = r"\b[a-zA-Z_]+[a-zA-Z_0-9]*\b" ## describes an identifier + reNatural* = r"\b\d+\b" ## describes a natural number + reInteger* = r"\b[-+]?\d+\b" ## describes an integer + reHex* = r"\b0[xX][0-9a-fA-F]+\b" ## describes a hexadecimal number + reBinary* = r"\b0[bB][01]+\b" ## describes a binary number (example: 0b11101) + reOctal* = r"\b0[oO][0-7]+\b" ## describes an octal number (example: 0o777) + reFloat* = r"\b[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?\b" + ## describes a floating point number + reEmail* = r"\b[a-zA-Z0-9!#$%&'*+/=?^_`{|}~\-]+(?:\.[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)" & + r"*@(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?\.)+(?:[a-zA-Z]{2}|com|org|" & + r"net|gov|mil|biz|info|mobi|name|aero|jobs|museum)\b" + ## describes a common email address + reURL* = r"\b(http(s)?|ftp|gopher|telnet|file|notes|ms\-help):" & + r"((//)|(\\\\))+[\w\d:#@%/;$()~_?\+\-\=\\\.\&]*\b" + ## describes an URL + +when isMainModule: + echo matchLen("key", re"[a-zA-Z_][a-zA-Z_0-9]*") + + var pattern = re"[a-zA-Z_][a-zA-Z_0-9]*\s*=\s*[a-zA-Z_][a-zA-Z_0-9]*" + echo matchLen("key1= cal9", pattern, 2) + + echo find("_____abc_______", re("abc"), 3) + #echo "var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") + #echo "var1=key; var2=key2".endsWith(peg"{\ident}'='{\ident}") + + if "abc" =~ re"(a)bc xyz|([a-z]+)": + echo matches[0] + else: + echo "BUG" + +# for word in split("00232this02939is39an22example111", peg"\d+"): +# writeln(stdout, word) diff --git a/lib/pure/regexprs.nim b/lib/pure/regexprs.nim index 54f9dc471..cff3152cf 100755 --- a/lib/pure/regexprs.nim +++ b/lib/pure/regexprs.nim @@ -8,9 +8,6 @@ # ## Regular expression support for Nimrod. -## **Deprecated** since version 0.8.2. Use the module `re` instead. -{.deprecated.} - ## Currently this module is implemented by providing a wrapper around the ## `PRCE (Perl-Compatible Regular Expressions) <http://www.pcre.org>`_ ## C library. This means that your application will depend on the PRCE diff --git a/lib/pure/streams.nim b/lib/pure/streams.nim index 337be31d1..f4d2911fc 100755 --- a/lib/pure/streams.nim +++ b/lib/pure/streams.nim @@ -48,7 +48,8 @@ proc read[T](s: PStream, result: var T) = proc readChar*(s: PStream): char = ## reads a char from the stream `s`. Raises `EIO` if an error occured. - read(s, result) + ## Returns '\0' as an EOF marker. + discard s.readData(s, addr(result), sizeof(result)) proc readBool*(s: PStream): bool = ## reads a bool from the stream `s`. Raises `EIO` if an error occured. @@ -94,7 +95,7 @@ proc readLine*(s: PStream): string = if c == '\c': c = readChar(s) break - elif c == '\L': break + elif c == '\L' or c == '\0': break result.add(c) type diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index cfbfef810..25b495f10 100755 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -45,10 +45,6 @@ const IdentStartChars* = {'a'..'z', 'A'..'Z', '_'} ## the set of characters an identifier can start with -# strStart* = 0 ## this is only for bootstraping -# ## XXX: remove this someday -# nl* = "\n" ## this is only for bootstraping XXX: remove this someday - proc `%` *(formatstr: string, a: openarray[string]): string {.noSideEffect.} ## The `substitution`:idx: operator performs string substitutions in ## `formatstr` and returns a modified `formatstr`. This is often called @@ -98,51 +94,30 @@ proc strip*(s: string, leading = true, trailing = true): string {.noSideEffect.} ## If `leading` is true, leading whitespace is stripped. ## If `trailing` is true, trailing whitespace is stripped. -proc toLower*(s: string): string {.noSideEffect.} +proc toLower*(s: string): string {.noSideEffect, procvar.} ## Converts `s` into lower case. This works only for the letters A-Z. ## See `unicode.toLower` for a version that works for any Unicode character. -proc toLower*(c: Char): Char {.noSideEffect.} +proc toLower*(c: Char): Char {.noSideEffect, procvar.} ## Converts `c` into lower case. This works only for the letters A-Z. ## See `unicode.toLower` for a version that works for any Unicode character. -proc toUpper*(s: string): string {.noSideEffect.} +proc toUpper*(s: string): string {.noSideEffect, procvar.} ## Converts `s` into upper case. This works only for the letters a-z. ## See `unicode.toUpper` for a version that works for any Unicode character. -proc toUpper*(c: Char): Char {.noSideEffect.} +proc toUpper*(c: Char): Char {.noSideEffect, procvar.} ## Converts `c` into upper case. This works only for the letters a-z. ## See `unicode.toUpper` for a version that works for any Unicode character. -proc capitalize*(s: string): string {.noSideEffect.} +proc capitalize*(s: string): string {.noSideEffect, procvar.} ## Converts the first character of `s` into upper case. ## This works only for the letters a-z. -proc normalize*(s: string): string {.noSideEffect.} +proc normalize*(s: string): string {.noSideEffect, procvar.} ## Normalizes the string `s`. That means to convert it to lower case and ## remove any '_'. This is needed for Nimrod identifiers for example. -proc findSubStr*(sub, s: string, start: int = 0): int {. - noSideEffect, deprecated.} - ## Searches for `sub` in `s` starting at position `start`. Searching is - ## case-sensitive. If `sub` is not in `s`, -1 is returned. - ## **Deprecated since version 0.7.6**: Use `find` instead, but beware that - ## this has a different parameter order. - -proc findSubStr*(sub: char, s: string, start: int = 0): int {. - noSideEffect, deprecated.} - ## Searches for `sub` in `s` starting at position `start`. Searching is - ## case-sensitive. If `sub` is not in `s`, -1 is returned. - ## **Deprecated since version 0.7.6**: Use `find` instead, but beware that - ## this has a different parameter order. - -proc findChars*(chars: set[char], s: string, start: int = 0): int {. - noSideEffect, deprecated.} - ## Searches for `chars` in `s` starting at position `start`. If `s` contains - ## none of the characters in `chars`, -1 is returned. - ## **Deprecated since version 0.7.6**: Use `find` instead, but beware that - ## this has a different parameter order. - proc find*(s, sub: string, start: int = 0): int {.noSideEffect.} ## Searches for `sub` in `s` starting at position `start`. Searching is ## case-sensitive. If `sub` is not in `s`, -1 is returned. @@ -322,13 +297,13 @@ proc cmpIgnoreStyle*(a, b: string): int {.noSideEffect.} ## | > 0 iff a > b proc contains*(s: string, c: char): bool {.noSideEffect.} - ## Same as ``findSubStr(c, s) >= 0``. + ## Same as ``find(s, c) >= 0``. proc contains*(s, sub: string): bool {.noSideEffect.} - ## Same as ``findSubStr(sub, s) >= 0``. + ## Same as ``find(s, sub) >= 0``. proc contains*(s: string, chars: set[char]): bool {.noSideEffect.} - ## Same as ``findChars(s, chars) >= 0``. + ## Same as ``find(s, chars) >= 0``. proc toHex*(x: BiggestInt, len: int): string {.noSideEffect.} ## Converts `x` to its hexadecimal representation. The resulting string @@ -340,15 +315,15 @@ proc intToStr*(x: int, minchars: int = 1): string ## will be minimally `minchars` characters long. This is achieved by ## adding leading zeros. -proc ParseInt*(s: string): int {.noSideEffect.} +proc ParseInt*(s: string): int {.noSideEffect, procvar.} ## Parses a decimal integer value contained in `s`. If `s` is not ## a valid integer, `EInvalidValue` is raised. -proc ParseBiggestInt*(s: string): biggestInt {.noSideEffect.} +proc ParseBiggestInt*(s: string): biggestInt {.noSideEffect, procvar.} ## Parses a decimal integer value contained in `s`. If `s` is not ## a valid integer, `EInvalidValue` is raised. -proc ParseFloat*(s: string): float {.noSideEffect.} +proc ParseFloat*(s: string): float {.noSideEffect, procvar.} ## Parses a decimal floating point value contained in `s`. If `s` is not ## a valid floating point number, `EInvalidValue` is raised. ``NAN``, ## ``INF``, ``-INF`` are also supported (case insensitive comparison). @@ -616,7 +591,7 @@ proc preprocessSub(sub: string, a: var TSkipTable) = for i in 0..0xff: a[chr(i)] = m+1 for i in 0..m-1: a[sub[i]] = m-i -proc findSubStrAux(s, sub: string, start: int, a: TSkipTable): int = +proc findAux(s, sub: string, start: int, a: TSkipTable): int = # fast "quick search" algorithm: var m = len(sub) @@ -631,36 +606,10 @@ proc findSubStrAux(s, sub: string, start: int, a: TSkipTable): int = inc(j, a[s[j+m]]) return -1 -proc findSubStr(sub, s: string, start: int = 0): int = - var a: TSkipTable - preprocessSub(sub, a) - result = findSubStrAux(s, sub, start, a) - # slow linear search: - #var - # i, j, M, N: int - #M = len(sub) - #N = len(s) - #i = start - #j = 0 - #if i >= N: - # result = -1 - #else: - # while True: - # if s[i] == sub[j]: - # Inc(i) - # Inc(j) - # else: - # i = i - j + 1 - # j = 0 - # if (j >= M): - # return i - M - # elif (i >= N): - # return -1 - proc find(s, sub: string, start: int = 0): int = var a: TSkipTable preprocessSub(sub, a) - result = findSubStrAux(s, sub, start, a) + result = findAux(s, sub, start, a) proc find(s: string, sub: char, start: int = 0): int = for i in start..len(s)-1: @@ -672,16 +621,6 @@ proc find(s: string, chars: set[char], start: int = 0): int = if s[i] in chars: return i return -1 -proc findSubStr(sub: char, s: string, start: int = 0): int = - for i in start..len(s)-1: - if sub == s[i]: return i - return -1 - -proc findChars(chars: set[char], s: string, start: int = 0): int = - for i in start..s.len-1: - if s[i] in chars: return i - return -1 - proc contains(s: string, chars: set[char]): bool = return find(s, chars) >= 0 @@ -698,7 +637,7 @@ proc replace*(s, sub, by: string): string = preprocessSub(sub, a) var i = 0 while true: - var j = findSubStrAux(s, sub, i, a) + var j = findAux(s, sub, i, a) if j < 0: break add result, copy(s, i, j - 1) add result, by diff --git a/lib/pure/variants.nim b/lib/pure/variants.nim new file mode 100755 index 000000000..f661f81a6 --- /dev/null +++ b/lib/pure/variants.nim @@ -0,0 +1,181 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements Nimrod's support for the ``variant`` datatype. +## `TVariant` shows how the flexibility of dynamic typing is achieved +## within a static type system. + +type + TVarType* = enum + vtNone, + vtBool, + vtChar, + vtEnum, + vtInt, + vtFloat, + vtString, + vtSet, + vtSeq, + vtDict + TVariant* {.final.} = object of TObject + case vtype: TVarType + of vtNone: nil + of vtBool, vtChar, vtEnum, vtInt: vint: int64 + of vtFloat: vfloat: float64 + of vtString: vstring: string + of vtSet, vtSeq: q: seq[TVariant] + of vtDict: d: seq[tuple[key, val: TVariant]] + +iterator objectFields*[T](x: T, skipInherited: bool): tuple[ + key: string, val: TVariant] {.magic: "ObjectFields"} + +proc `<>`*(x: ordinal): TVariant = + result.kind = vtEnum + result.vint = x + +proc `<>`*(x: biggestInt): TVariant = + result.kind = vtInt + result.vint = x + +proc `<>`*(x: char): TVariant = + result.kind = vtChar + result.vint = ord(x) + +proc `<>`*(x: bool): TVariant = + result.kind = vtBool + result.vint = ord(x) + +proc `<>`*(x: biggestFloat): TVariant = + result.kind = vtFloat + result.vfloat = x + +proc `<>`*(x: string): TVariant = + result.kind = vtString + result.vstring = x + +proc `<>`*[T](x: openArray[T]): TVariant = + result.kind = vtSeq + newSeq(result.q, x.len) + for i in 0..x.len-1: result.q[i] = <>x[i] + +proc `<>`*[T](x: set[T]): TVariant = + result.kind = vtSet + result.q = @[] + for a in items(x): result.q.add(<>a) + +proc `<>`* [T: object](x: T): TVariant {.magic: "ToVariant".} + ## this converts a value to a variant ("boxing") + +proc `><`*[T](v: TVariant, typ: T): T {.magic: "FromVariant".} + +[<>5, <>67, <>"hallo"] +myVar><int + + +proc `==`* (x, y: TVariant): bool = + if x.vtype == y.vtype: + case x.vtype + of vtNone: result = true + of vtBool, vtChar, vtEnum, vtInt: result = x.vint == y.vint + of vtFloat: result = x.vfloat == y.vfloat + of vtString: result = x.vstring == y.vstring + of vtSet: + # complicated! We check that each a in x also occurs in y and that the + # counts are identical: + if x.q.len == y.q.len: + for a in items(x.q): + block inner: + for b in items(y.q): + if a == b: break inner + return false + result = true + of vtSeq: + if x.q.len == y.q.len: + for i in 0..x.q.len-1: + if x.q[i] != y.q[i]: return false + result = true + of vtDict: + # it is an ordered dict: + if x.d.len == y.d.len: + for i in 0..x.d.len-1: + if x.d[i].key != y.d[i].key: return false + if x.d[i].val != y.d[i].val: return false + result = true + +proc `[]`* (a, b: TVariant): TVariant = + case a.vtype + of vtSeq: + if b.vtype in {vtBool, vtChar, vtEnum, vtInt}: + result = a.q[b.vint] + else: + variantError() + of vtDict: + for i in 0..a.d.len-1: + if a.d[i].key == b: return a.d[i].val + if b.vtype in {vtBool, vtChar, vtEnum, vtInt}: + result = a.d[b.vint].val + variantError() + else: variantError() + +proc `[]=`* (a, b, c: TVariant) = + case a.vtype + of vtSeq: + if b.vtype in {vtBool, vtChar, vtEnum, vtInt}: + a.q[b.vint] = b + else: + variantError() + of vtDict: + for i in 0..a.d.len-1: + if a.d[i].key == b: + a.d[i].val = c + return + if b.vtype in {vtBool, vtChar, vtEnum, vtInt}: + a.d[b.vint].val = c + variantError() + else: variantError() + +proc `[]`* (a: TVariant, b: int): TVariant {.inline} = return a[<>b] +proc `[]`* (a: TVariant, b: string): TVariant {.inline} = return a[<>b] +proc `[]=`* (a: TVariant, b: int, c: TVariant) {.inline} = a[<>b] = c +proc `[]=`* (a: TVariant, b: string, c: TVariant) {.inline} = a[<>b] = c + +proc `+`* (x, y: TVariant): TVariant = + case x.vtype + of vtBool, vtChar, vtEnum, vtInt: + if y.vtype == x.vtype: + result.vtype = x.vtype + result.vint = x.vint + y.vint + else: + case y.vtype + of vtBool, vtChar, vtEnum, vtInt: + + + + vint: int64 + of vtFloat: vfloat: float64 + of vtString: vstring: string + of vtSet, vtSeq: q: seq[TVariant] + of vtDict: d: seq[tuple[key, val: TVariant]] + +proc `-`* (x, y: TVariant): TVariant +proc `*`* (x, y: TVariant): TVariant +proc `/`* (x, y: TVariant): TVariant +proc `div`* (x, y: TVariant): TVariant +proc `mod`* (x, y: TVariant): TVariant +proc `&`* (x, y: TVariant): TVariant +proc `$`* (x: TVariant): string = + # uses JS notation + +proc parseVariant*(s: string): TVariant +proc `<`* (x, y: TVariant): bool +proc `<=`* (x, y: TVariant): bool + +proc hash*(x: TVariant): int = + + diff --git a/lib/system.nim b/lib/system.nim index f52a233f8..9a64362c5 100755 --- a/lib/system.nim +++ b/lib/system.nim @@ -668,7 +668,7 @@ else: """ proc add *[T](x: var seq[T], y: T) {.magic: "AppendSeqElem", noSideEffect.} -proc add *[T](x: var seq[T], y: seq[T]) {.noSideEffect.} = +proc add *[T](x: var seq[T], y: openArray[T]) {.noSideEffect.} = ## Generic proc for adding a data item `y` to a container `x`. ## For containers that have an order, `add` means *append*. New generic ## containers should also call their adding proc `add` for consistency. @@ -721,7 +721,8 @@ type # these work for most platforms: ## This C type is not supported by Nimrod's code generator cstringArray* {.importc: "char**", nodecl.} = ptr array [0..50_000, cstring] - ## This is the same as the type ``char**`` in *C*. + ## This is binary compatible to the type ``char**`` in *C*. The array's + ## high value is large enough to disable bounds checking in practice. TEndian* = enum ## is a type describing the endianness of a processor. littleEndian, bigEndian @@ -1072,28 +1073,25 @@ proc isNil*(x: cstring): bool {.noSideEffect, magic: "IsNil".} # This is an undocumented pragma that can only be used # once in the system module. -proc `&` *[T](x, y: seq[T]): seq[T] {.noSideEffect.} = +proc `&` *[T](x, y: openArray[T]): seq[T] {.noSideEffect.} = newSeq(result, x.len + y.len) for i in 0..x.len-1: result[i] = x[i] for i in 0..y.len-1: - result[i] = y[i] + result[i+x.len] = y[i] -proc `&` *[T](x: seq[T], y: T): seq[T] {.noSideEffect.} = +proc `&` *[T](x: openArray[T], y: T): seq[T] {.noSideEffect.} = newSeq(result, x.len + 1) for i in 0..x.len-1: result[i] = x[i] result[x.len] = y -proc `&` *[T](x: T, y: seq[T]): seq[T] {.noSideEffect.} = +proc `&` *[T](x: T, y: openArray[T]): seq[T] {.noSideEffect.} = newSeq(result, y.len + 1) for i in 0..y.len-1: result[i] = y[i] result[y.len] = x -proc `&` *[T](x, y: T): seq[T] {.noSideEffect.} = - return [x, y] - when not defined(NimrodVM): when not defined(ECMAScript): proc seqToPtr[T](x: seq[T]): pointer {.inline, nosideeffect.} = diff --git a/lib/system/mm.nim b/lib/system/mm.nim index 9e207fcf0..76b5d83bd 100755 --- a/lib/system/mm.nim +++ b/lib/system/mm.nim @@ -130,17 +130,22 @@ when defined(boehmgc): include "system/cellsets" elif defined(nogc): - proc alloc(size: int): pointer = - result = c_malloc(size) - if result == nil: raiseOutOfMem() - proc alloc0(size: int): pointer = - result = alloc(size) - zeroMem(result, size) - proc realloc(p: Pointer, newsize: int): pointer = - result = c_realloc(p, newsize) - if result == nil: raiseOutOfMem() - proc dealloc(p: Pointer) = - c_free(p) + include "system/alloc" + + when false: + proc alloc(size: int): pointer = + result = c_malloc(size) + if result == nil: raiseOutOfMem() + proc alloc0(size: int): pointer = + result = alloc(size) + zeroMem(result, size) + proc realloc(p: Pointer, newsize: int): pointer = + result = c_realloc(p, newsize) + if result == nil: raiseOutOfMem() + proc dealloc(p: Pointer) = c_free(p) + proc getOccupiedMem(): int = return -1 + proc getFreeMem(): int = return -1 + proc getTotalMem(): int = return -1 proc initGC() = nil proc GC_disable() = nil @@ -151,10 +156,7 @@ elif defined(nogc): proc GC_disableMarkAndSweep() = nil proc GC_getStatistics(): string = return "" - proc getOccupiedMem(): int = return -1 - proc getFreeMem(): int = return -1 - proc getTotalMem(): int = return -1 - + proc newObj(typ: PNimType, size: int): pointer {.compilerproc.} = result = alloc0(size) proc newSeq(typ: PNimType, len: int): pointer {.compilerproc.} = @@ -163,7 +165,6 @@ elif defined(nogc): cast[PGenericSeq](result).space = len proc growObj(old: pointer, newsize: int): pointer = result = realloc(old, newsize) - # XXX BUG: we need realloc0 here, but C does not support this... proc setStackBottom(theStackBottom: pointer) {.compilerproc.} = nil proc nimGCref(p: pointer) {.compilerproc, inline.} = nil diff --git a/lib/windows/winlean.nim b/lib/windows/winlean.nim index c0fbe2bb8..40e6e7b11 100755 --- a/lib/windows/winlean.nim +++ b/lib/windows/winlean.nim @@ -59,6 +59,8 @@ const STD_OUTPUT_HANDLE* = -11'i32 STD_ERROR_HANDLE* = -12'i32 + DETACHED_PROCESS* = 8'i32 + proc CloseHandle*(hObject: THANDLE): WINBOOL {.stdcall, dynlib: "kernel32", importc: "CloseHandle".} @@ -184,3 +186,7 @@ proc GetCommandLineA*(): CString {.importc, stdcall, dynlib: "kernel32".} proc rdFileTime*(f: FILETIME): int64 = result = ze64(f.dwLowDateTime) or (ze64(f.dwHighDateTime) shl 32) +proc Sleep*(dwMilliseconds: int32){.stdcall, dynlib: "kernel32", + importc: "Sleep".} + + diff --git a/lib/wrappers/tre/config.h b/lib/wrappers/tre/config.h index 1a3a0bc3a..1a3a0bc3a 100644..100755 --- a/lib/wrappers/tre/config.h +++ b/lib/wrappers/tre/config.h diff --git a/lib/wrappers/tre/tre_all.c b/lib/wrappers/tre/tre_all.c index 8272657a3..8272657a3 100644..100755 --- a/lib/wrappers/tre/tre_all.c +++ b/lib/wrappers/tre/tre_all.c diff --git a/lib/wrappers/tre/version.txt b/lib/wrappers/tre/version.txt index 4d01880a7..4d01880a7 100644..100755 --- a/lib/wrappers/tre/version.txt +++ b/lib/wrappers/tre/version.txt |