diff options
Diffstat (limited to 'lib/pure')
139 files changed, 56426 insertions, 33672 deletions
diff --git a/lib/pure/actors.nim b/lib/pure/actors.nim deleted file mode 100644 index 8c61ce7df..000000000 --- a/lib/pure/actors.nim +++ /dev/null @@ -1,239 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2012 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## `Actor`:idx: support for Nim. An actor is implemented as a thread with -## a channel as its inbox. This module requires the ``--threads:on`` -## command line switch. -## -## Example: -## -## .. code-block:: nim -## -## var -## a: TActorPool[int, void] -## createActorPool(a) -## for i in 0 .. < 300: -## a.spawn(i, proc (x: int) {.thread.} = echo x) -## a.join() -## -## **Note**: This whole module is deprecated. Use `threadpool` and ``spawn`` -## instead. - -{.deprecated.} - -from os import sleep - -type - TTask*[TIn, TOut] = object{.pure, final.} ## a task - when TOut isnot void: - receiver*: ptr TChannel[TOut] ## the receiver channel of the response - action*: proc (x: TIn): TOut {.thread.} ## action to execute; - ## sometimes useful - shutDown*: bool ## set to tell an actor to shut-down - data*: TIn ## the data to process - - TActor[TIn, TOut] = object{.pure, final.} - i: TChannel[TTask[TIn, TOut]] - t: TThread[ptr TActor[TIn, TOut]] - - PActor*[TIn, TOut] = ptr TActor[TIn, TOut] ## an actor - -proc spawn*[TIn, TOut](action: proc( - self: PActor[TIn, TOut]){.thread.}): PActor[TIn, TOut] = - ## creates an actor; that is a thread with an inbox. The caller MUST call - ## ``join`` because that also frees the actor's associated resources. - result = cast[PActor[TIn, TOut]](allocShared0(sizeof(result[]))) - open(result.i) - createThread(result.t, action, result) - -proc inbox*[TIn, TOut](self: PActor[TIn, TOut]): ptr TChannel[TIn] = - ## gets a pointer to the associated inbox of the actor `self`. - result = addr(self.i) - -proc running*[TIn, TOut](a: PActor[TIn, TOut]): bool = - ## returns true if the actor `a` is running. - result = running(a.t) - -proc ready*[TIn, TOut](a: PActor[TIn, TOut]): bool = - ## returns true if the actor `a` is ready to process new messages. - result = ready(a.i) - -proc join*[TIn, TOut](a: PActor[TIn, TOut]) = - ## joins an actor. - joinThread(a.t) - close(a.i) - deallocShared(a) - -proc recv*[TIn, TOut](a: PActor[TIn, TOut]): TTask[TIn, TOut] = - ## receives a task from `a`'s inbox. - result = recv(a.i) - -proc send*[TIn, TOut, X, Y](receiver: PActor[TIn, TOut], msg: TIn, - sender: PActor[X, Y]) = - ## sends a message to `a`'s inbox. - var t: TTask[TIn, TOut] - t.receiver = addr(sender.i) - shallowCopy(t.data, msg) - send(receiver.i, t) - -proc send*[TIn, TOut](receiver: PActor[TIn, TOut], msg: TIn, - sender: ptr TChannel[TOut] = nil) = - ## sends a message to `receiver`'s inbox. - var t: TTask[TIn, TOut] - t.receiver = sender - shallowCopy(t.data, msg) - send(receiver.i, t) - -proc sendShutdown*[TIn, TOut](receiver: PActor[TIn, TOut]) = - ## send a shutdown message to `receiver`. - var t: TTask[TIn, TOut] - t.shutdown = true - send(receiver.i, t) - -proc reply*[TIn, TOut](t: TTask[TIn, TOut], m: TOut) = - ## sends a message to io's output message box. - when TOut is void: - {.error: "you cannot reply to a void outbox".} - assert t.receiver != nil - send(t.receiver[], m) - - -# ----------------- actor pools ---------------------------------------------- - -type - TActorPool*[TIn, TOut] = object{.pure, final.} ## an actor pool - actors: seq[PActor[TIn, TOut]] - when TOut isnot void: - outputs: TChannel[TOut] - -proc `^`*[T](f: ptr TChannel[T]): T = - ## alias for 'recv'. - result = recv(f[]) - -proc poolWorker[TIn, TOut](self: PActor[TIn, TOut]) {.thread.} = - while true: - var m = self.recv - if m.shutDown: break - when TOut is void: - m.action(m.data) - else: - send(m.receiver[], m.action(m.data)) - #self.reply() - -proc createActorPool*[TIn, TOut](a: var TActorPool[TIn, TOut], poolSize = 4) = - ## creates an actor pool. - newSeq(a.actors, poolSize) - when TOut isnot void: - open(a.outputs) - for i in 0 .. < a.actors.len: - a.actors[i] = spawn(poolWorker[TIn, TOut]) - -proc sync*[TIn, TOut](a: var TActorPool[TIn, TOut], polling=50) = - ## waits for every actor of `a` to finish with its work. Currently this is - ## implemented as polling every `polling` ms and has a slight chance - ## of failing since we check for every actor to be in `ready` state and not - ## for messages still in ether. This will change in a later - ## version, however. - var allReadyCount = 0 - while true: - var wait = false - for i in 0..high(a.actors): - if not a.actors[i].i.ready: - wait = true - allReadyCount = 0 - break - if not wait: - # it's possible that some actor sent a message to some other actor but - # both appeared to be non-working as the message takes some time to - # arrive. We assume that this won't take longer than `polling` and - # simply attempt a second time and declare victory then. ;-) - inc allReadyCount - if allReadyCount > 1: break - sleep(polling) - -proc terminate*[TIn, TOut](a: var TActorPool[TIn, TOut]) = - ## terminates each actor in the actor pool `a` and frees the - ## resources attached to `a`. - var t: TTask[TIn, TOut] - t.shutdown = true - for i in 0.. <a.actors.len: send(a.actors[i].i, t) - for i in 0.. <a.actors.len: join(a.actors[i]) - when TOut isnot void: - close(a.outputs) - a.actors = nil - -proc join*[TIn, TOut](a: var TActorPool[TIn, TOut]) = - ## short-cut for `sync` and then `terminate`. - sync(a) - terminate(a) - -template setupTask = - t.action = action - shallowCopy(t.data, input) - -template schedule = - # extremely simple scheduler: We always try the first thread first, so that - # it remains 'hot' ;-). Round-robin hurts for keeping threads hot. - for i in 0..high(p.actors): - if p.actors[i].i.ready: - p.actors[i].i.send(t) - return - # no thread ready :-( --> send message to the thread which has the least - # messages pending: - var minIdx = -1 - var minVal = high(int) - for i in 0..high(p.actors): - var curr = p.actors[i].i.peek - if curr == 0: - # ok, is ready now: - p.actors[i].i.send(t) - return - if curr < minVal and curr >= 0: - minVal = curr - minIdx = i - if minIdx >= 0: - p.actors[minIdx].i.send(t) - else: - raise newException(DeadThreadError, "cannot send message; thread died") - -proc spawn*[TIn, TOut](p: var TActorPool[TIn, TOut], input: TIn, - action: proc (input: TIn): TOut {.thread.} - ): ptr TChannel[TOut] = - ## uses the actor pool to run ``action(input)`` concurrently. - ## `spawn` is guaranteed to not block. - var t: TTask[TIn, TOut] - setupTask() - result = addr(p.outputs) - t.receiver = result - schedule() - -proc spawn*[TIn](p: var TActorPool[TIn, void], input: TIn, - action: proc (input: TIn) {.thread.}) = - ## uses the actor pool to run ``action(input)`` concurrently. - ## `spawn` is guaranteed to not block. - var t: TTask[TIn, void] - setupTask() - schedule() - -when isMainModule: - var - a: TActorPool[int, void] - createActorPool(a) - for i in 0 .. < 300: - a.spawn(i, proc (x: int) {.thread.} = echo x) - - when false: - proc treeDepth(n: PNode): int {.thread.} = - var x = a.spawn(treeDepth, n.le) - var y = a.spawn(treeDepth, n.ri) - result = max(^x, ^y) + 1 - - a.join() - - diff --git a/lib/pure/actors.nim.cfg b/lib/pure/actors.nim.cfg deleted file mode 100644 index c6bb9c545..000000000 --- a/lib/pure/actors.nim.cfg +++ /dev/null @@ -1,3 +0,0 @@ -# to shut up the tester: ---threads:on - diff --git a/lib/pure/algorithm.nim b/lib/pure/algorithm.nim index a1ab7be13..b12ed7cdd 100644 --- a/lib/pure/algorithm.nim +++ b/lib/pure/algorithm.nim @@ -1,31 +1,118 @@ # # # Nim's Runtime Library -# (c) Copyright 2012 Andreas Rumpf +# (c) Copyright 2015 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # -## This module implements some common generic algorithms. +## This module implements some common generic algorithms on `openArray`s. +## +## Basic usage +## =========== +## -type - SortOrder* = enum ## sort order - Descending, Ascending +runnableExamples: + type People = tuple + year: int + name: string + + var a: seq[People] + + a.add((2000, "John")) + a.add((2005, "Marie")) + a.add((2010, "Jane")) + + # Sorting with default system.cmp + a.sort() + assert a == @[(year: 2000, name: "John"), (year: 2005, name: "Marie"), + (year: 2010, name: "Jane")] + + proc myCmp(x, y: People): int = + cmp(x.name, y.name) + + # Sorting with custom proc + a.sort(myCmp) + assert a == @[(year: 2010, name: "Jane"), (year: 2000, name: "John"), + (year: 2005, name: "Marie")] + +## See also +## ======== +## * `sequtils module<sequtils.html>`_ for working with the built-in seq type +## * `tables module<tables.html>`_ for sorting tables + +import std/private/since -{.deprecated: [TSortOrder: SortOrder].} +when defined(nimPreviewSlimSystem): + import std/assertions +type + SortOrder* = enum + Descending, Ascending + proc `*`*(x: int, order: SortOrder): int {.inline.} = - ## flips `x` if ``order == Descending``; - ## if ``order == Ascending`` then `x` is returned. - ## `x` is supposed to be the result of a comparator, ie ``< 0`` for - ## *less than*, ``== 0`` for *equal*, ``> 0`` for *greater than*. + ## Flips the sign of `x` if `order == Descending`. + ## If `order == Ascending` then `x` is returned. + ## + ## `x` is supposed to be the result of a comparator, i.e. + ## | `< 0` for *less than*, + ## | `== 0` for *equal*, + ## | `> 0` for *greater than*. + runnableExamples: + assert -123 * Descending == 123 + assert 123 * Descending == -123 + assert -123 * Ascending == -123 + assert 123 * Ascending == 123 var y = order.ord - 1 result = (x xor y) - y -proc reverse*[T](a: var openArray[T], first, last: int) = - ## reverses the array ``a[first..last]``. +template fillImpl[T](a: var openArray[T], first, last: int, value: T) = + var x = first + while x <= last: + a[x] = value + inc(x) + +proc fill*[T](a: var openArray[T], first, last: Natural, value: T) = + ## Assigns `value` to all elements of the slice `a[first..last]`. + ## + ## If an invalid range is passed, it raises `IndexDefect`. + runnableExamples: + var a: array[6, int] + a.fill(1, 3, 9) + assert a == [0, 9, 9, 9, 0, 0] + a.fill(3, 5, 7) + assert a == [0, 9, 9, 7, 7, 7] + doAssertRaises(IndexDefect, a.fill(1, 7, 9)) + fillImpl(a, first, last, value) + +proc fill*[T](a: var openArray[T], value: T) = + ## Assigns `value` to all elements of the container `a`. + runnableExamples: + var a: array[6, int] + a.fill(9) + assert a == [9, 9, 9, 9, 9, 9] + a.fill(4) + assert a == [4, 4, 4, 4, 4, 4] + fillImpl(a, 0, a.high, value) + + +proc reverse*[T](a: var openArray[T], first, last: Natural) = + ## Reverses the slice `a[first..last]`. + ## + ## If an invalid range is passed, it raises `IndexDefect`. + ## + ## **See also:** + ## * `reversed proc<#reversed,openArray[T],Natural,int>`_ reverse a slice and returns a `seq[T]` + ## * `reversed proc<#reversed,openArray[T]>`_ reverse and returns a `seq[T]` + runnableExamples: + var a = [1, 2, 3, 4, 5, 6] + a.reverse(1, 3) + assert a == [1, 4, 3, 2, 5, 6] + a.reverse(1, 3) + assert a == [1, 2, 3, 4, 5, 6] + doAssertRaises(IndexDefect, a.reverse(1, 7)) var x = first var y = last while x < y: @@ -34,88 +121,217 @@ proc reverse*[T](a: var openArray[T], first, last: int) = inc(x) proc reverse*[T](a: var openArray[T]) = - ## reverses the array `a`. - reverse(a, 0, a.high) + ## Reverses the contents of the container `a`. + ## + ## **See also:** + ## * `reversed proc<#reversed,openArray[T],Natural,int>`_ reverse a slice and returns a `seq[T]` + ## * `reversed proc<#reversed,openArray[T]>`_ reverse and returns a `seq[T]` + runnableExamples: + var a = [1, 2, 3, 4, 5, 6] + a.reverse() + assert a == [6, 5, 4, 3, 2, 1] + a.reverse() + assert a == [1, 2, 3, 4, 5, 6] + # the max is needed, since a.high is -1 if a is empty + reverse(a, 0, max(0, a.high)) -proc reversed*[T](a: openArray[T], first, last: int): seq[T] = - ## returns the reverse of the array `a[first..last]`. - result = newSeq[T](last - first + 1) - var x = first - var y = last - while x <= last: - result[x] = a[y] - dec(y) - inc(x) +proc reversed*[T](a: openArray[T]): seq[T] {.inline.} = + ## Returns the elements of `a` in reverse order. + ## + ## **See also:** + ## * `reverse proc<#reverse,openArray[T]>`_ + runnableExamples: + assert [10, 11, 12].reversed == @[12, 11, 10] + assert seq[string].default.reversed == @[] + let n = a.len + result.setLen(n) + for i in 0..<n: result[i] = a[n - (i + 1)] + +proc reversed*[T](a: openArray[T], first: Natural, last: int): seq[T] + {.inline, deprecated: "use: `reversed(toOpenArray(a, first, last))`".} = + reversed(toOpenArray(a, first, last)) -proc reversed*[T](a: openArray[T]): seq[T] = - ## returns the reverse of the array `a`. - reversed(a, 0, a.high) +when defined(nimHasEffectsOf): + {.experimental: "strictEffects".} +else: + {.pragma: effectsOf.} + +proc binarySearch*[T, K](a: openArray[T], key: K, + cmp: proc (x: T, y: K): int {.closure.}): int {.effectsOf: cmp.} = + ## Binary search for `key` in `a`. Return the index of `key` or -1 if not found. + ## Assumes that `a` is sorted according to `cmp`. + ## + ## `cmp` is the comparator function to use, the expected return values are + ## the same as those of system.cmp. + runnableExamples: + assert binarySearch(["a", "b", "c", "d"], "d", system.cmp[string]) == 3 + assert binarySearch(["a", "b", "c", "d"], "c", system.cmp[string]) == 2 + let len = a.len + + if len == 0: + return -1 + + if len == 1: + if cmp(a[0], key) == 0: + return 0 + else: + return -1 + + result = 0 + if (len and (len - 1)) == 0: + # when `len` is a power of 2, a faster shr can be used. + var step = len shr 1 + var cmpRes: int + while step > 0: + let i = result or step + cmpRes = cmp(a[i], key) + if cmpRes == 0: + return i + + if cmpRes < 0: + result = i + step = step shr 1 + if cmp(a[result], key) != 0: result = -1 + else: + var b = len + var cmpRes: int + while result < b: + var mid = (result + b) shr 1 + cmpRes = cmp(a[mid], key) + if cmpRes == 0: + return mid + + if cmpRes < 0: + result = mid + 1 + else: + b = mid + if result >= len or cmp(a[result], key) != 0: result = -1 proc binarySearch*[T](a: openArray[T], key: T): int = - ## binary search for `key` in `a`. Returns -1 if not found. - var b = len(a) - while result < b: - var mid = (result + b) div 2 - if a[mid] < key: result = mid + 1 - else: b = mid - if result >= len(a) or a[result] != key: result = -1 - -proc smartBinarySearch*[T](a: openArray[T], key: T): int = - ## ``a.len`` must be a power of 2 for this to work. - var step = a.len div 2 - while step > 0: - if a[result or step] <= key: - result = result or step - step = step shr 1 - if a[result] != key: result = -1 + ## Binary search for `key` in `a`. Return the index of `key` or -1 if not found. + ## Assumes that `a` is sorted. + runnableExamples: + assert binarySearch([0, 1, 2, 3, 4], 4) == 4 + assert binarySearch([0, 1, 2, 3, 4], 2) == 2 + binarySearch(a, key, cmp[T]) const onlySafeCode = true -proc lowerBound*[T](a: openArray[T], key: T, cmp: proc(x,y: T): int {.closure.}): int = - ## same as binarySearch except that if key is not in `a` then this - ## returns the location where `key` would be if it were. In other - ## words if you have a sorted sequence and you call - ## insert(thing, elm, lowerBound(thing, elm)) +proc lowerBound*[T, K](a: openArray[T], key: K, + cmp: proc(x: T, k: K): int {.closure.}): int {.effectsOf: cmp.} = + ## Returns the index of the first element in `a` that is not less than + ## (i.e. greater or equal to) `key`, or last if no such element is found. + ## In other words if you have a sorted sequence and you call + ## `insert(thing, elm, lowerBound(thing, elm))` ## the sequence will still be sorted. + ## Assumes that `a` is sorted according to `cmp`. ## - ## `cmp` is the comparator function to use, the expected return values are - ## the same as that of system.cmp. + ## If an invalid range is passed, it raises `IndexDefect`. ## - ## example:: + ## This version uses `cmp` to compare the elements. + ## The expected return values are the same as those of `system.cmp`. ## - ## var arr = @[1,2,3,5,6,7,8,9] - ## arr.insert(4, arr.lowerBound(4)) - ## `after running the above arr is `[1,2,3,4,5,6,7,8,9]` + ## **See also:** + ## * `upperBound proc<#upperBound,openArray[T],K,proc(T,K)>`_ sorted by `cmp` in the specified order + ## * `upperBound proc<#upperBound,openArray[T],T>`_ + runnableExamples: + var arr = @[1, 2, 3, 5, 6, 7, 8, 9] + assert arr.lowerBound(3, system.cmp[int]) == 2 + assert arr.lowerBound(4, system.cmp[int]) == 3 + assert arr.lowerBound(5, system.cmp[int]) == 3 + arr.insert(4, arr.lowerBound(4, system.cmp[int])) + assert arr == [1, 2, 3, 4, 5, 6, 7, 8, 9] result = a.low - var pos = result - var count, step: int - count = a.high - a.low + 1 + var count = a.high - a.low + 1 + var step, pos: int while count != 0: - pos = result - step = count div 2 - pos += step + step = count shr 1 + pos = result + step if cmp(a[pos], key) < 0: - pos.inc - result = pos + result = pos + 1 count -= step + 1 else: count = step proc lowerBound*[T](a: openArray[T], key: T): int = lowerBound(a, key, cmp[T]) -proc merge[T](a, b: var openArray[T], lo, m, hi: int, - cmp: proc (x, y: T): int {.closure.}, order: SortOrder) = - template `<-` (a, b: expr) = - when false: - a = b - elif onlySafeCode: - shallowCopy(a, b) + ## Returns the index of the first element in `a` that is not less than + ## (i.e. greater or equal to) `key`, or last if no such element is found. + ## In other words if you have a sorted sequence and you call + ## `insert(thing, elm, lowerBound(thing, elm))` + ## the sequence will still be sorted. + ## Assumes that `a` is sorted. + ## + ## This version uses the default comparison function `cmp`. + ## + ## **See also:** + ## * `upperBound proc<#upperBound,openArray[T],K,proc(T,K)>`_ sorted by `cmp` in the specified order + ## * `upperBound proc<#upperBound,openArray[T],T>`_ + +proc upperBound*[T, K](a: openArray[T], key: K, + cmp: proc(x: T, k: K): int {.closure.}): int {.effectsOf: cmp.} = + ## Returns the index of the first element in `a` that is greater than + ## `key`, or last if no such element is found. + ## In other words if you have a sorted sequence and you call + ## `insert(thing, elm, upperBound(thing, elm))` + ## the sequence will still be sorted. + ## Assumes that `a` is sorted according to `cmp`. + ## + ## If an invalid range is passed, it raises `IndexDefect`. + ## + ## This version uses `cmp` to compare the elements. The expected + ## return values are the same as those of `system.cmp`. + ## + ## **See also:** + ## * `lowerBound proc<#lowerBound,openArray[T],K,proc(T,K)>`_ sorted by `cmp` in the specified order + ## * `lowerBound proc<#lowerBound,openArray[T],T>`_ + runnableExamples: + var arr = @[1, 2, 3, 5, 6, 7, 8, 9] + assert arr.upperBound(2, system.cmp[int]) == 2 + assert arr.upperBound(3, system.cmp[int]) == 3 + assert arr.upperBound(4, system.cmp[int]) == 3 + arr.insert(4, arr.upperBound(3, system.cmp[int])) + assert arr == [1, 2, 3, 4, 5, 6, 7, 8, 9] + result = a.low + var count = a.high - a.low + 1 + var step, pos: int + while count != 0: + step = count shr 1 + pos = result + step + if cmp(a[pos], key) <= 0: + result = pos + 1 + count -= step + 1 else: - copyMem(addr(a), addr(b), sizeof(T)) - # optimization: If max(left) <= min(right) there is nothing to do! - # 1 2 3 4 ## 5 6 7 8 + count = step + +proc upperBound*[T](a: openArray[T], key: T): int = upperBound(a, key, cmp[T]) + ## Returns the index of the first element in `a` that is greater than + ## `key`, or last if no such element is found. + ## In other words if you have a sorted sequence and you call + ## `insert(thing, elm, upperBound(thing, elm))` + ## the sequence will still be sorted. + ## Assumes that `a` is sorted. + ## + ## This version uses the default comparison function `cmp`. + ## + ## **See also:** + ## * `lowerBound proc<#lowerBound,openArray[T],K,proc(T,K)>`_ sorted by `cmp` in the specified order + ## * `lowerBound proc<#lowerBound,openArray[T],T>`_ + +template `<-`(a, b) = + when defined(gcDestructors): + a = move b + elif onlySafeCode: + shallowCopy(a, b) + else: + copyMem(addr(a), addr(b), sizeof(T)) + +proc mergeAlt[T](a, b: var openArray[T], lo, m, hi: int, + cmp: proc (x, y: T): int {.closure.}, order: SortOrder) {.effectsOf: cmp.} = + # Optimization: If max(left) <= min(right) there is nothing to do! + # 1 2 3 4 ## 5 6 7 8 # -> O(n) for sorted arrays. - # On random data this safes up to 40% of merge calls + # On random data this saves up to 40% of mergeAlt calls. if cmp(a[m], a[m+1]) * order <= 0: return var j = lo # copy a[j..m] into b: @@ -149,80 +365,138 @@ proc merge[T](a, b: var openArray[T], lo, m, hi: int, else: if k < j: copyMem(addr(a[k]), addr(b[i]), sizeof(T)*(j-k)) -proc sort*[T](a: var openArray[T], +func sort*[T](a: var openArray[T], cmp: proc (x, y: T): int {.closure.}, - order = SortOrder.Ascending) = - ## Default Nim sort. The sorting is guaranteed to be stable and - ## the worst case is guaranteed to be O(n log n). + order = SortOrder.Ascending) {.effectsOf: cmp.} = + ## Default Nim sort (an implementation of merge sort). The sorting + ## is guaranteed to be stable (that is, equal elements stay in the same order) + ## and the worst case is guaranteed to be O(n log n). + ## Sorts by `cmp` in the specified `order`. + ## ## The current implementation uses an iterative ## mergesort to achieve this. It uses a temporary sequence of - ## length ``a.len div 2``. Currently Nim does not support a - ## sensible default argument for ``cmp``, so you have to provide one - ## of your own. However, the ``system.cmp`` procs can be used: - ## - ## .. code-block:: nim + ## length `a.len div 2`. If you do not wish to provide your own + ## `cmp`, you may use `system.cmp` or instead call the overloaded + ## version of `sort`, which uses `system.cmp`. ## - ## sort(myIntArray, system.cmp[int]) - ## - ## # do not use cmp[string] here as we want to use the specialized - ## # overload: - ## sort(myStrArray, system.cmp) + ## ```nim + ## sort(myIntArray, system.cmp[int]) + ## # do not use cmp[string] here as we want to use the specialized + ## # overload: + ## sort(myStrArray, system.cmp) + ## ``` ## ## You can inline adhoc comparison procs with the `do notation - ## <manual.html#do-notation>`_. Example: - ## - ## .. code-block:: nim + ## <manual.html#procedures-do-notation>`_. Example: ## + ## ```nim ## people.sort do (x, y: Person) -> int: ## result = cmp(x.surname, y.surname) ## if result == 0: ## result = cmp(x.name, y.name) + ## ``` + ## + ## **See also:** + ## * `sort proc<#sort,openArray[T]>`_ + ## * `sorted proc<#sorted,openArray[T],proc(T,T)>`_ sorted by `cmp` in the specified order + ## * `sorted proc<#sorted,openArray[T]>`_ + ## * `sortedByIt template<#sortedByIt.t,untyped,untyped>`_ + runnableExamples: + var d = ["boo", "fo", "barr", "qux"] + proc myCmp(x, y: string): int = + if x.len() > y.len() or x.len() == y.len(): 1 + else: -1 + sort(d, myCmp) + assert d == ["fo", "qux", "boo", "barr"] var n = a.len - var b: seq[T] - newSeq(b, n div 2) + var b = newSeq[T](n div 2) var s = 1 while s < n: var m = n-1-s while m >= 0: - merge(a, b, max(m-s+1, 0), m, m+s, cmp, order) + mergeAlt(a, b, max(m-s+1, 0), m, m+s, cmp, order) dec(m, s*2) s = s*2 +proc sort*[T](a: var openArray[T], order = SortOrder.Ascending) = sort[T](a, + system.cmp[T], order) + ## Shortcut version of `sort` that uses `system.cmp[T]` as the comparison function. + ## + ## **See also:** + ## * `sort func<#sort,openArray[T],proc(T,T)>`_ + ## * `sorted proc<#sorted,openArray[T],proc(T,T)>`_ sorted by `cmp` in the specified order + ## * `sorted proc<#sorted,openArray[T]>`_ + ## * `sortedByIt template<#sortedByIt.t,untyped,untyped>`_ + proc sorted*[T](a: openArray[T], cmp: proc(x, y: T): int {.closure.}, - order = SortOrder.Ascending): seq[T] = - ## returns `a` sorted by `cmp` in the specified `order`. + order = SortOrder.Ascending): seq[T] {.effectsOf: cmp.} = + ## Returns `a` sorted by `cmp` in the specified `order`. + ## + ## **See also:** + ## * `sort func<#sort,openArray[T],proc(T,T)>`_ + ## * `sort proc<#sort,openArray[T]>`_ + ## * `sortedByIt template<#sortedByIt.t,untyped,untyped>`_ + runnableExamples: + let + a = [2, 3, 1, 5, 4] + b = sorted(a, system.cmp[int]) + c = sorted(a, system.cmp[int], Descending) + d = sorted(["adam", "dande", "brian", "cat"], system.cmp[string]) + assert b == @[1, 2, 3, 4, 5] + assert c == @[5, 4, 3, 2, 1] + assert d == @["adam", "brian", "cat", "dande"] result = newSeq[T](a.len) for i in 0 .. a.high: result[i] = a[i] sort(result, cmp, order) -template sortedByIt*(seq1, op: expr): expr = - ## Convenience template around the ``sorted`` proc to reduce typing. - ## - ## The template injects the ``it`` variable which you can use directly in an - ## expression. Example: - ## - ## .. code-block:: nim - ## - ## type Person = tuple[name: string, age: int] - ## var - ## p1: Person = (name: "p1", age: 60) - ## p2: Person = (name: "p2", age: 20) - ## p3: Person = (name: "p3", age: 30) - ## p4: Person = (name: "p4", age: 30) - ## - ## people = @[p1,p2,p4,p3] - ## - ## echo people.sortedByIt(it.name) +proc sorted*[T](a: openArray[T], order = SortOrder.Ascending): seq[T] = + ## Shortcut version of `sorted` that uses `system.cmp[T]` as the comparison function. ## - ## Because the underlying ``cmp()`` is defined for tuples you can do - ## a nested sort like in the following example: + ## **See also:** + ## * `sort func<#sort,openArray[T],proc(T,T)>`_ + ## * `sort proc<#sort,openArray[T]>`_ + ## * `sortedByIt template<#sortedByIt.t,untyped,untyped>`_ + runnableExamples: + let + a = [2, 3, 1, 5, 4] + b = sorted(a) + c = sorted(a, Descending) + d = sorted(["adam", "dande", "brian", "cat"]) + assert b == @[1, 2, 3, 4, 5] + assert c == @[5, 4, 3, 2, 1] + assert d == @["adam", "brian", "cat", "dande"] + sorted[T](a, system.cmp[T], order) + +template sortedByIt*(seq1, op: untyped): untyped = + ## Convenience template around the `sorted` proc to reduce typing. ## - ## .. code-block:: nim + ## The template injects the `it` variable which you can use directly in an + ## expression. ## - ## echo people.sortedByIt((it.age, it.name)) + ## Because the underlying `cmp()` is defined for tuples you can also do + ## a nested sort. ## - var result {.gensym.} = sorted(seq1, proc(x, y: type(seq1[0])): int = + ## **See also:** + ## * `sort func<#sort,openArray[T],proc(T,T)>`_ + ## * `sort proc<#sort,openArray[T]>`_ + ## * `sorted proc<#sorted,openArray[T],proc(T,T)>`_ sorted by `cmp` in the specified order + ## * `sorted proc<#sorted,openArray[T]>`_ + runnableExamples: + type Person = tuple[name: string, age: int] + var + p1: Person = (name: "p1", age: 60) + p2: Person = (name: "p2", age: 20) + p3: Person = (name: "p3", age: 30) + p4: Person = (name: "p4", age: 30) + people = @[p1, p2, p4, p3] + + assert people.sortedByIt(it.name) == @[(name: "p1", age: 60), (name: "p2", + age: 20), (name: "p3", age: 30), (name: "p4", age: 30)] + # Nested sort + assert people.sortedByIt((it.age, it.name)) == @[(name: "p2", age: 20), + (name: "p3", age: 30), (name: "p4", age: 30), (name: "p1", age: 60)] + var result = sorted(seq1, proc(x, y: typeof(items(seq1), typeOfIter)): int = var it {.inject.} = x let a = op it = y @@ -230,50 +504,203 @@ template sortedByIt*(seq1, op: expr): expr = result = cmp(a, b)) result +func isSorted*[T](a: openArray[T], + cmp: proc(x, y: T): int {.closure.}, + order = SortOrder.Ascending): bool {.effectsOf: cmp.} = + ## Checks to see whether `a` is already sorted in `order` + ## using `cmp` for the comparison. The parameters are identical + ## to `sort`. Requires O(n) time. + ## + ## **See also:** + ## * `isSorted proc<#isSorted,openArray[T]>`_ + runnableExamples: + let + a = [2, 3, 1, 5, 4] + b = [1, 2, 3, 4, 5] + c = [5, 4, 3, 2, 1] + d = ["adam", "brian", "cat", "dande"] + e = ["adam", "dande", "brian", "cat"] + assert isSorted(a) == false + assert isSorted(b) == true + assert isSorted(c) == false + assert isSorted(c, Descending) == true + assert isSorted(d) == true + assert isSorted(e) == false + result = true + for i in 0..<len(a)-1: + if cmp(a[i], a[i+1]) * order > 0: + return false + +proc isSorted*[T](a: openArray[T], order = SortOrder.Ascending): bool = + ## Shortcut version of `isSorted` that uses `system.cmp[T]` as the comparison function. + ## + ## **See also:** + ## * `isSorted func<#isSorted,openArray[T],proc(T,T)>`_ + runnableExamples: + let + a = [2, 3, 1, 5, 4] + b = [1, 2, 3, 4, 5] + c = [5, 4, 3, 2, 1] + d = ["adam", "brian", "cat", "dande"] + e = ["adam", "dande", "brian", "cat"] + assert isSorted(a) == false + assert isSorted(b) == true + assert isSorted(c) == false + assert isSorted(c, Descending) == true + assert isSorted(d) == true + assert isSorted(e) == false + isSorted(a, system.cmp[T], order) + +proc merge*[T]( + result: var seq[T], + x, y: openArray[T], cmp: proc(x, y: T): int {.closure.} +) {.since: (1, 5, 1), effectsOf: cmp.} = + ## Merges two sorted `openArray`. `x` and `y` are assumed to be sorted. + ## If you do not wish to provide your own `cmp`, + ## you may use `system.cmp` or instead call the overloaded + ## version of `merge`, which uses `system.cmp`. + ## + ## .. note:: The original data of `result` is not cleared, + ## new data is appended to `result`. + ## + ## **See also:** + ## * `merge proc<#merge,seq[T],openArray[T],openArray[T]>`_ + runnableExamples: + let x = @[1, 3, 6] + let y = @[2, 3, 4] + + block: + var merged = @[7] # new data is appended to merged sequence + merged.merge(x, y, system.cmp[int]) + assert merged == @[7, 1, 2, 3, 3, 4, 6] + + block: + var merged = @[7] # if you only want new data, clear merged sequence first + merged.setLen(0) + merged.merge(x, y, system.cmp[int]) + assert merged.isSorted + assert merged == @[1, 2, 3, 3, 4, 6] + + import std/sugar + + var res: seq[(int, int)] + res.merge([(1, 1)], [(1, 2)], (a, b) => a[0] - b[0]) + assert res == @[(1, 1), (1, 2)] + + assert seq[int].default.dup(merge([1, 3], [2, 4])) == @[1, 2, 3, 4] + + let + sizeX = x.len + sizeY = y.len + oldLen = result.len + + result.setLen(oldLen + sizeX + sizeY) + + var + ix = 0 + iy = 0 + i = oldLen + + while true: + if ix == sizeX: + while iy < sizeY: + result[i] = y[iy] + inc i + inc iy + return + + if iy == sizeY: + while ix < sizeX: + result[i] = x[ix] + inc i + inc ix + return + + let itemX = x[ix] + let itemY = y[iy] + + if cmp(itemX, itemY) > 0: # to have a stable sort + result[i] = itemY + inc iy + else: + result[i] = itemX + inc ix + + inc i + +proc merge*[T](result: var seq[T], x, y: openArray[T]) {.inline, since: (1, 5, 1).} = + ## Shortcut version of `merge` that uses `system.cmp[T]` as the comparison function. + ## + ## **See also:** + ## * `merge proc<#merge,seq[T],openArray[T],openArray[T],proc(T,T)>`_ + runnableExamples: + let x = [5, 10, 15, 20, 25] + let y = [50, 40, 30, 20, 10].sorted + + var merged: seq[int] + merged.merge(x, y) + assert merged.isSorted + assert merged == @[5, 10, 10, 15, 20, 20, 25, 30, 40, 50] + merge(result, x, y, system.cmp) + proc product*[T](x: openArray[seq[T]]): seq[seq[T]] = - ## produces the Cartesian product of the array. Warning: complexity - ## may explode. - result = @[] - if x.len == 0: + ## Produces the Cartesian product of the array. + ## Every element of the result is a combination of one element from each seq in `x`, + ## with the ith element coming from `x[i]`. + ## + ## .. warning:: complexity may explode. + runnableExamples: + assert product(@[@[1], @[2]]) == @[@[1, 2]] + assert product(@[@["A", "K"], @["Q"]]) == @[@["K", "Q"], @["A", "Q"]] + let xLen = x.len + result = newSeq[seq[T]]() + if xLen == 0: return - if x.len == 1: + if xLen == 1: result = @x return var - indexes = newSeq[int](x.len) - initial = newSeq[int](x.len) + indices = newSeq[int](xLen) + initial = newSeq[int](xLen) index = 0 - # replace with newSeq as soon as #853 is fixed - var next: seq[T] = @[] - next.setLen(x.len) - for i in 0..(x.len-1): + var next = newSeq[T](xLen) + for i in 0 ..< xLen: if len(x[i]) == 0: return - initial[i] = len(x[i])-1 - indexes = initial + initial[i] = len(x[i]) - 1 + indices = initial while true: - while indexes[index] == -1: - indexes[index] = initial[index] + while indices[index] == -1: + indices[index] = initial[index] index += 1 - if index == x.len: return - indexes[index] -= 1 - for ni, i in indexes: + if index == xLen: return + indices[index] -= 1 + for ni, i in indices: next[ni] = x[ni][i] - var res: seq[T] - shallowCopy(res, next) - result.add(res) + result.add(next) index = 0 - indexes[index] -= 1 + indices[index] -= 1 -proc nextPermutation*[T](x: var openarray[T]): bool {.discardable.} = - ## Calculates the next lexicographic permutation, directly modifying ``x``. +proc nextPermutation*[T](x: var openArray[T]): bool {.discardable.} = + ## Calculates the next lexicographic permutation, directly modifying `x`. ## The result is whether a permutation happened, otherwise we have reached ## the last-ordered permutation. ## - ## .. code-block:: nim + ## If you start with an unsorted array/seq, the repeated permutations + ## will **not** give you all permutations but stop with the last. ## - ## var v = @[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - ## v.nextPermutation() - ## echo v + ## **See also:** + ## * `prevPermutation proc<#prevPermutation,openArray[T]>`_ + runnableExamples: + var v = @[0, 1, 2, 3] + assert v.nextPermutation() == true + assert v == @[0, 1, 3, 2] + assert v.nextPermutation() == true + assert v == @[0, 2, 1, 3] + assert v.prevPermutation() == true + assert v == @[0, 1, 3, 2] + v = @[3, 2, 1, 0] + assert v.nextPermutation() == false + assert v == @[3, 2, 1, 0] if x.len < 2: return false @@ -293,16 +720,21 @@ proc nextPermutation*[T](x: var openarray[T]): bool {.discardable.} = result = true -proc prevPermutation*[T](x: var openarray[T]): bool {.discardable.} = +proc prevPermutation*[T](x: var openArray[T]): bool {.discardable.} = ## Calculates the previous lexicographic permutation, directly modifying - ## ``x``. The result is whether a permutation happened, otherwise we have + ## `x`. The result is whether a permutation happened, otherwise we have ## reached the first-ordered permutation. ## - ## .. code-block:: nim - ## - ## var v = @[0, 1, 2, 3, 4, 5, 6, 7, 9, 8] - ## v.prevPermutation() - ## echo v + ## **See also:** + ## * `nextPermutation proc<#nextPermutation,openArray[T]>`_ + runnableExamples: + var v = @[0, 1, 2, 3] + assert v.prevPermutation() == false + assert v == @[0, 1, 2, 3] + assert v.nextPermutation() == true + assert v == @[0, 1, 3, 2] + assert v.prevPermutation() == true + assert v == @[0, 1, 2, 3] if x.len < 2: return false @@ -322,3 +754,164 @@ proc prevPermutation*[T](x: var openarray[T]): bool {.discardable.} = swap x[i-1], x[j] result = true + +proc rotateInternal[T](arg: var openArray[T]; first, middle, last: int): int = + ## A port of std::rotate from C++. + ## Ported from [this reference](http://www.cplusplus.com/reference/algorithm/rotate/). + result = first + last - middle + + if first == middle or middle == last: + return + + assert first < middle + assert middle < last + + # m prefix for mutable + var + mFirst = first + mMiddle = middle + next = middle + + swap(arg[mFirst], arg[next]) + mFirst += 1 + next += 1 + if mFirst == mMiddle: + mMiddle = next + + while next != last: + swap(arg[mFirst], arg[next]) + mFirst += 1 + next += 1 + if mFirst == mMiddle: + mMiddle = next + + next = mMiddle + while next != last: + swap(arg[mFirst], arg[next]) + mFirst += 1 + next += 1 + if mFirst == mMiddle: + mMiddle = next + elif next == last: + next = mMiddle + +proc rotatedInternal[T](arg: openArray[T]; first, middle, last: int): seq[T] = + let argLen = arg.len + result = newSeq[T](argLen) + for i in 0 ..< first: + result[i] = arg[i] + let n = last - middle + let m = middle - first + for i in 0 ..< n: + result[first+i] = arg[middle+i] + for i in 0 ..< m: + result[first+n+i] = arg[first+i] + for i in last ..< argLen: + result[i] = arg[i] + +proc rotateLeft*[T](arg: var openArray[T]; slice: HSlice[int, int]; + dist: int): int {.discardable.} = + ## Performs a left rotation on a range of elements. If you want to rotate + ## right, use a negative `dist`. Specifically, `rotateLeft` rotates + ## the elements at `slice` by `dist` positions. + ## + ## | The element at index `slice.a + dist` will be at index `slice.a`. + ## | The element at index `slice.b` will be at `slice.a + dist - 1`. + ## | The element at index `slice.a` will be at `slice.b + 1 - dist`. + ## | The element at index `slice.a + dist - 1` will be at `slice.b`. + ## + ## Elements outside of `slice` will be left unchanged. + ## The time complexity is linear to `slice.b - slice.a + 1`. + ## If an invalid range (`HSlice`) is passed, it raises `IndexDefect`. + ## + ## `slice` + ## : The indices of the element range that should be rotated. + ## + ## `dist` + ## : The distance in amount of elements that the data should be rotated. + ## Can be negative, can be any number. + ## + ## **See also:** + ## * `rotateLeft proc<#rotateLeft,openArray[T],int>`_ for a version which rotates the whole container + ## * `rotatedLeft proc<#rotatedLeft,openArray[T],HSlice[int,int],int>`_ for a version which returns a `seq[T]` + runnableExamples: + var a = [0, 1, 2, 3, 4, 5] + a.rotateLeft(1 .. 4, 3) + assert a == [0, 4, 1, 2, 3, 5] + a.rotateLeft(1 .. 4, 3) + assert a == [0, 3, 4, 1, 2, 5] + a.rotateLeft(1 .. 4, -3) + assert a == [0, 4, 1, 2, 3, 5] + doAssertRaises(IndexDefect, a.rotateLeft(1 .. 7, 2)) + let sliceLen = slice.b + 1 - slice.a + let distLeft = ((dist mod sliceLen) + sliceLen) mod sliceLen + arg.rotateInternal(slice.a, slice.a + distLeft, slice.b + 1) + +proc rotateLeft*[T](arg: var openArray[T]; dist: int): int {.discardable.} = + ## Same as `rotateLeft`, but with default arguments for slice, + ## so that this procedure operates on the entire + ## `arg`, and not just on a part of it. + ## + ## **See also:** + ## * `rotateLeft proc<#rotateLeft,openArray[T],HSlice[int,int],int>`_ for a version which rotates a range + ## * `rotatedLeft proc<#rotatedLeft,openArray[T],int>`_ for a version which returns a `seq[T]` + runnableExamples: + var a = [1, 2, 3, 4, 5] + a.rotateLeft(2) + assert a == [3, 4, 5, 1, 2] + a.rotateLeft(4) + assert a == [2, 3, 4, 5, 1] + a.rotateLeft(-6) + assert a == [1, 2, 3, 4, 5] + let argLen = arg.len + let distLeft = ((dist mod argLen) + argLen) mod argLen + arg.rotateInternal(0, distLeft, argLen) + +proc rotatedLeft*[T](arg: openArray[T]; slice: HSlice[int, int], + dist: int): seq[T] = + ## Same as `rotateLeft`, just with the difference that it does + ## not modify the argument. It creates a new `seq` instead. + ## + ## Elements outside of `slice` will be left unchanged. + ## If an invalid range (`HSlice`) is passed, it raises `IndexDefect`. + ## + ## `slice` + ## : The indices of the element range that should be rotated. + ## + ## `dist` + ## : The distance in amount of elements that the data should be rotated. + ## Can be negative, can be any number. + ## + ## **See also:** + ## * `rotateLeft proc<#rotateLeft,openArray[T],HSlice[int,int],int>`_ for the in-place version of this proc + ## * `rotatedLeft proc<#rotatedLeft,openArray[T],int>`_ for a version which rotates the whole container + runnableExamples: + var a = @[1, 2, 3, 4, 5] + a = rotatedLeft(a, 1 .. 4, 3) + assert a == @[1, 5, 2, 3, 4] + a = rotatedLeft(a, 1 .. 3, 2) + assert a == @[1, 3, 5, 2, 4] + a = rotatedLeft(a, 1 .. 3, -2) + assert a == @[1, 5, 2, 3, 4] + let sliceLen = slice.b + 1 - slice.a + let distLeft = ((dist mod sliceLen) + sliceLen) mod sliceLen + arg.rotatedInternal(slice.a, slice.a + distLeft, slice.b + 1) + +proc rotatedLeft*[T](arg: openArray[T]; dist: int): seq[T] = + ## Same as `rotateLeft`, just with the difference that it does + ## not modify the argument. It creates a new `seq` instead. + ## + ## **See also:** + ## * `rotateLeft proc<#rotateLeft,openArray[T],int>`_ for the in-place version of this proc + ## * `rotatedLeft proc<#rotatedLeft,openArray[T],HSlice[int,int],int>`_ for a version which rotates a range + runnableExamples: + var a = @[1, 2, 3, 4, 5] + a = rotatedLeft(a, 2) + assert a == @[3, 4, 5, 1, 2] + a = rotatedLeft(a, 4) + assert a == @[2, 3, 4, 5, 1] + a = rotatedLeft(a, -6) + assert a == @[1, 2, 3, 4, 5] + let argLen = arg.len + let distLeft = ((dist mod argLen) + argLen) mod argLen + arg.rotatedInternal(0, distLeft, argLen) diff --git a/lib/pure/async.nim b/lib/pure/async.nim new file mode 100644 index 000000000..e4d8d41c3 --- /dev/null +++ b/lib/pure/async.nim @@ -0,0 +1,9 @@ +## Exports [asyncmacro](asyncmacro.html) and [asyncfutures](asyncfutures.html) for native backends, +## and [asyncjs](asyncjs.html) on the JS backend. + +when defined(js): + import std/asyncjs + export asyncjs +else: + import std/[asyncmacro, asyncfutures] + export asyncmacro, asyncfutures diff --git a/lib/pure/asyncdispatch.nim b/lib/pure/asyncdispatch.nim index 1b9887098..126db7a7f 100644 --- a/lib/pure/asyncdispatch.nim +++ b/lib/pure/asyncdispatch.nim @@ -7,39 +7,26 @@ # distribution, for details about the copyright. # -include "system/inclrtl" - -import os, oids, tables, strutils, macros, times - -import rawsockets, net - -export Port, SocketFlag - -#{.injectStmt: newGcInvariant().} - -## AsyncDispatch -## ************* -## ## This module implements asynchronous IO. This includes a dispatcher, -## a ``Future`` type implementation, and an ``async`` macro which allows -## asynchronous code to be written in a synchronous style with the ``await`` +## a `Future` type implementation, and an `async` macro which allows +## asynchronous code to be written in a synchronous style with the `await` ## keyword. ## -## The dispatcher acts as a kind of event loop. You must call ``poll`` on it -## (or a function which does so for you such as ``waitFor`` or ``runForever``) +## The dispatcher acts as a kind of event loop. You must call `poll` on it +## (or a function which does so for you such as `waitFor` or `runForever`) ## in order to poll for any outstanding events. The underlying implementation ## is based on epoll on Linux, IO Completion Ports on Windows and select on ## other operating systems. ## -## The ``poll`` function will not, on its own, return any events. Instead -## an appropriate ``Future`` object will be completed. A ``Future`` is a +## The `poll` function will not, on its own, return any events. Instead +## an appropriate `Future` object will be completed. A `Future` is a ## type which holds a value which is not yet available, but which *may* be ## available in the future. You can check whether a future is finished -## by using the ``finished`` function. When a future is finished it means that +## by using the `finished` function. When a future is finished it means that ## either the value that it holds is now available or it holds an error instead. ## The latter situation occurs when the operation to complete a future fails ## with an exception. You can distinguish between the two situations with the -## ``failed`` function. +## `failed` function. ## ## Future objects can also store a callback procedure which will be called ## automatically once the future completes. @@ -48,49 +35,50 @@ export Port, SocketFlag ## pattern. In this ## pattern you make a request for an action, and once that action is fulfilled ## a future is completed with the result of that action. Requests can be -## made by calling the appropriate functions. For example: calling the ``recv`` +## made by calling the appropriate functions. For example: calling the `recv` ## function will create a request for some data to be read from a socket. The -## future which the ``recv`` function returns will then complete once the +## future which the `recv` function returns will then complete once the ## requested amount of data is read **or** an exception occurs. ## ## Code to read some data from a socket may look something like this: +## ```Nim +## var future = socket.recv(100) +## future.addCallback( +## proc () = +## echo(future.read) +## ) +## ``` ## -## .. code-block::nim -## var future = socket.recv(100) -## future.callback = -## proc () = -## echo(future.read) -## -## All asynchronous functions returning a ``Future`` will not block. They +## All asynchronous functions returning a `Future` will not block. They ## will not however return immediately. An asynchronous function will have ## code which will be executed before an asynchronous request is made, in most ## cases this code sets up the request. ## -## In the above example, the ``recv`` function will return a brand new -## ``Future`` instance once the request for data to be read from the socket -## is made. This ``Future`` instance will complete once the requested amount +## In the above example, the `recv` function will return a brand new +## `Future` instance once the request for data to be read from the socket +## is made. This `Future` instance will complete once the requested amount ## of data is read, in this case it is 100 bytes. The second line sets a ## callback on this future which will be called once the future completes. -## All the callback does is write the data stored in the future to ``stdout``. -## The ``read`` function is used for this and it checks whether the future -## completes with an error for you (if it did it will simply raise the -## error), if there is no error however it returns the value of the future. +## All the callback does is write the data stored in the future to `stdout`. +## The `read` function is used for this and it checks whether the future +## completes with an error for you (if it did, it will simply raise the +## error), if there is no error, however, it returns the value of the future. ## ## Asynchronous procedures -## ----------------------- +## ======================= ## ## Asynchronous procedures remove the pain of working with callbacks. They do ## this by allowing you to write asynchronous code the same way as you would ## write synchronous code. ## -## An asynchronous procedure is marked using the ``{.async.}`` pragma. -## When marking a procedure with the ``{.async.}`` pragma it must have a -## ``Future[T]`` return type or no return type at all. If you do not specify -## a return type then ``Future[void]`` is assumed. +## An asynchronous procedure is marked using the `{.async.}` pragma. +## When marking a procedure with the `{.async.}` pragma it must have a +## `Future[T]` return type or no return type at all. If you do not specify +## a return type then `Future[void]` is assumed. ## -## Inside asynchronous procedures ``await`` can be used to call any +## Inside asynchronous procedures `await` can be used to call any ## procedures which return a -## ``Future``; this includes asynchronous procedures. When a procedure is +## `Future`; this includes asynchronous procedures. When a procedure is ## "awaited", the asynchronous procedure it is awaited in will ## suspend its execution ## until the awaited procedure's Future completes. At which point the @@ -98,462 +86,423 @@ export Port, SocketFlag ## when an asynchronous procedure is suspended other asynchronous procedures ## will be run by the dispatcher. ## -## The ``await`` call may be used in many contexts. It can be used on the right -## hand side of a variable declaration: ``var data = await socket.recv(100)``, +## The `await` call may be used in many contexts. It can be used on the right +## hand side of a variable declaration: `var data = await socket.recv(100)`, ## in which case the variable will be set to the value of the future -## automatically. It can be used to await a ``Future`` object, and it can -## be used to await a procedure returning a ``Future[void]``: -## ``await socket.send("foobar")``. +## automatically. It can be used to await a `Future` object, and it can +## be used to await a procedure returning a `Future[void]`: +## `await socket.send("foobar")`. +## +## If an awaited future completes with an error, then `await` will re-raise +## this error. To avoid this, you can use the `yield` keyword instead of +## `await`. The following section shows different ways that you can handle +## exceptions in async procs. +## +## .. caution:: +## Procedures marked {.async.} do not support mutable parameters such +## as `var int`. References such as `ref int` should be used instead. +## +## Handling Exceptions +## ------------------- +## +## You can handle exceptions in the same way as in ordinary Nim code; +## by using the try statement: +## +## ```Nim +## try: +## let data = await sock.recv(100) +## echo("Received ", data) +## except: +## # Handle exception +## ``` +## +## An alternative approach to handling exceptions is to use `yield` on a future +## then check the future's `failed` property. For example: +## +## ```Nim +## var future = sock.recv(100) +## yield future +## if future.failed: +## # Handle exception +## ``` +## ## ## Discarding futures -## ------------------ +## ================== +## +## Futures should **never** be discarded directly because they may contain +## errors. If you do not care for the result of a Future then you should use +## the `asyncCheck` procedure instead of the `discard` keyword. Note that this +## does not wait for completion, and you should use `waitFor` or `await` for that purpose. +## +## .. note:: `await` also checks if the future fails, so you can safely discard +## its result. ## -## Futures should **never** be discarded. This is because they may contain -## errors. If you do not care for the result of a Future then you should -## use the ``asyncCheck`` procedure instead of the ``discard`` keyword. +## Handling futures +## ================ +## +## There are many different operations that apply to a future. +## The three primary high-level operations are `asyncCheck`, +## `waitFor`, and `await`. +## +## * `asyncCheck`: Raises an exception if the future fails. It neither waits +## for the future to finish nor returns the result of the future. +## * `waitFor`: Polls the event loop and blocks the current thread until the +## future finishes. This is often used to call an async procedure from a +## synchronous context and should never be used in an `async` proc. +## * `await`: Pauses execution in the current async procedure until the future +## finishes. While the current procedure is paused, other async procedures will +## continue running. Should be used instead of `waitFor` in an async +## procedure. +## +## Here is a handy quick reference chart showing their high-level differences: +## ============== ===================== ======================= +## Procedure Context Blocking +## ============== ===================== ======================= +## `asyncCheck` non-async and async non-blocking +## `waitFor` non-async blocks current thread +## `await` async suspends current proc +## ============== ===================== ======================= ## ## Examples -## -------- +## ======== ## ## For examples take a look at the documentation for the modules implementing ## asynchronous IO. A good place to start is the ## `asyncnet module <asyncnet.html>`_. ## +## Investigating pending futures +## ============================= +## +## It's possible to get into a situation where an async proc, or more accurately +## a `Future[T]` gets stuck and +## never completes. This can happen for various reasons and can cause serious +## memory leaks. When this occurs it's hard to identify the procedure that is +## stuck. +## +## Thankfully there is a mechanism which tracks the count of each pending future. +## All you need to do to enable it is compile with `-d:futureLogging` and +## use the `getFuturesInProgress` procedure to get the list of pending futures +## together with the stack traces to the moment of their creation. +## +## You may also find it useful to use this +## `prometheus package <https://github.com/dom96/prometheus>`_ which will log +## the pending futures into prometheus, allowing you to analyse them via a nice +## graph. +## +## +## ## Limitations/Bugs -## ---------------- +## ================ ## -## * ``except`` statement (without `try`) does not work inside async procedures. -## * The effect system (``raises: []``) does not work with async procedures. -## * Can't await in a ``except`` body - - -# TODO: Check if yielded future is nil and throw a more meaningful exception - -# -- Futures - -type - FutureBase* = ref object of RootObj ## Untyped future. - cb: proc () {.closure,gcsafe.} - finished: bool - error*: ref Exception ## Stored exception - errorStackTrace*: string - when not defined(release): - stackTrace: string ## For debugging purposes only. - id: int - fromProc: string - - Future*[T] = ref object of FutureBase ## Typed future. - value: T ## Stored value - -{.deprecated: [PFutureBase: FutureBase, PFuture: Future].} - - -var currentID = 0 -proc newFuture*[T](fromProc: string = "unspecified"): Future[T] = - ## Creates a new future. - ## - ## Specifying ``fromProc``, which is a string specifying the name of the proc - ## that this future belongs to, is a good habit as it helps with debugging. - new(result) - result.finished = false - when not defined(release): - result.stackTrace = getStackTrace() - result.id = currentID - result.fromProc = fromProc - currentID.inc() - -proc checkFinished[T](future: Future[T]) = - when not defined(release): - if future.finished: - echo("<-----> ", future.id, " ", future.fromProc) - echo(future.stackTrace) - echo("-----") - when T is string: - echo("Contents: ", future.value.repr) - echo("<----->") - echo("Future already finished, cannot finish twice.") - echo getStackTrace() - assert false - -proc complete*[T](future: Future[T], val: T) = - ## Completes ``future`` with value ``val``. - #assert(not future.finished, "Future already finished, cannot finish twice.") - checkFinished(future) - assert(future.error == nil) - future.value = val - future.finished = true - if future.cb != nil: - future.cb() - -proc complete*(future: Future[void]) = - ## Completes a void ``future``. - #assert(not future.finished, "Future already finished, cannot finish twice.") - checkFinished(future) - assert(future.error == nil) - future.finished = true - if future.cb != nil: - future.cb() - -proc fail*[T](future: Future[T], error: ref Exception) = - ## Completes ``future`` with ``error``. - #assert(not future.finished, "Future already finished, cannot finish twice.") - checkFinished(future) - future.finished = true - future.error = error - future.errorStackTrace = - if getStackTrace(error) == "": getStackTrace() else: getStackTrace(error) - if future.cb != nil: - future.cb() - else: - # This is to prevent exceptions from being silently ignored when a future - # is discarded. - # TODO: This may turn out to be a bad idea. - # Turns out this is a bad idea. - #raise error - discard - -proc `callback=`*(future: FutureBase, cb: proc () {.closure,gcsafe.}) = - ## Sets the callback proc to be called when the future completes. - ## - ## If future has already completed then ``cb`` will be called immediately. - ## - ## **Note**: You most likely want the other ``callback`` setter which - ## passes ``future`` as a param to the callback. - future.cb = cb - if future.finished: - future.cb() - -proc `callback=`*[T](future: Future[T], - cb: proc (future: Future[T]) {.closure,gcsafe.}) = - ## Sets the callback proc to be called when the future completes. - ## - ## If future has already completed then ``cb`` will be called immediately. - future.callback = proc () = cb(future) - -proc echoOriginalStackTrace[T](future: Future[T]) = - # TODO: Come up with something better. - when not defined(release): - echo("Original stack trace in ", future.fromProc, ":") - if not future.errorStackTrace.isNil and future.errorStackTrace != "": - echo(future.errorStackTrace) - else: - echo("Empty or nil stack trace.") - echo("Continuing...") - -proc read*[T](future: Future[T]): T = - ## Retrieves the value of ``future``. Future must be finished otherwise - ## this function will fail with a ``ValueError`` exception. - ## - ## If the result of the future is an error then that error will be raised. - if future.finished: - if future.error != nil: - echoOriginalStackTrace(future) - raise future.error - when T isnot void: - return future.value - else: - # TODO: Make a custom exception type for this? - raise newException(ValueError, "Future still in progress.") +## * The effect system (`raises: []`) does not work with async procedures. +## * Mutable parameters are not supported by async procedures. +## +## +## Multiple async backend support +## ============================== +## +## Thanks to its powerful macro support, Nim allows ``async``/``await`` to be +## implemented in libraries with only minimal support from the language - as +## such, multiple ``async`` libraries exist, including ``asyncdispatch`` and +## ``chronos``, and more may come to be developed in the future. +## +## Libraries built on top of async/await may wish to support multiple async +## backends - the best way to do so is to create separate modules for each backend +## that may be imported side-by-side. +## +## An alternative way is to select backend using a global compile flag - this +## method makes it difficult to compose applications that use both backends as may +## happen with transitive dependencies, but may be appropriate in some cases - +## libraries choosing this path should call the flag `asyncBackend`, allowing +## applications to choose the backend with `-d:asyncBackend=<backend_name>`. +## +## Known `async` backends include: +## +## * `-d:asyncBackend=none`: disable `async` support completely +## * `-d:asyncBackend=asyncdispatch`: https://nim-lang.org/docs/asyncdispatch.html +## * `-d:asyncBackend=chronos`: https://github.com/status-im/nim-chronos/ +## +## ``none`` can be used when a library supports both a synchronous and +## asynchronous API, to disable the latter. -proc readError*[T](future: Future[T]): ref Exception = - ## Retrieves the exception stored in ``future``. - ## - ## An ``ValueError`` exception will be thrown if no exception exists - ## in the specified Future. - if future.error != nil: return future.error - else: - raise newException(ValueError, "No error in future.") +import std/[os, tables, strutils, times, heapqueue, options, asyncstreams] +import std/[math, monotimes] +import std/asyncfutures except callSoon -proc finished*[T](future: Future[T]): bool = - ## Determines whether ``future`` has completed. - ## - ## ``True`` may indicate an error or a value. Use ``failed`` to distinguish. - future.finished +import std/[nativesockets, net, deques] -proc failed*(future: FutureBase): bool = - ## Determines whether ``future`` completed with an error. - return future.error != nil +when defined(nimPreviewSlimSystem): + import std/[assertions, syncio] -proc asyncCheck*[T](future: Future[T]) = - ## Sets a callback on ``future`` which raises an exception if the future - ## finished with an error. - ## - ## This should be used instead of ``discard`` to discard void futures. - future.callback = - proc () = - if future.failed: - echoOriginalStackTrace(future) - raise future.error - -proc `and`*[T, Y](fut1: Future[T], fut2: Future[Y]): Future[void] = - ## Returns a future which will complete once both ``fut1`` and ``fut2`` - ## complete. - var retFuture = newFuture[void]("asyncdispatch.`and`") - fut1.callback = - proc () = - if fut2.finished: retFuture.complete() - fut2.callback = - proc () = - if fut1.finished: retFuture.complete() - return retFuture +export Port, SocketFlag +export asyncfutures except callSoon +export asyncstreams -proc `or`*[T, Y](fut1: Future[T], fut2: Future[Y]): Future[void] = - ## Returns a future which will complete once either ``fut1`` or ``fut2`` - ## complete. - var retFuture = newFuture[void]("asyncdispatch.`or`") - proc cb() = - if not retFuture.finished: retFuture.complete() - fut1.callback = cb - fut2.callback = cb - return retFuture +# TODO: Check if yielded future is nil and throw a more meaningful exception type PDispatcherBase = ref object of RootRef - timers: seq[tuple[finishAt: float, fut: Future[void]]] - -proc processTimers(p: PDispatcherBase) = - var oldTimers = p.timers - p.timers = @[] - for t in oldTimers: - if epochTime() >= t.finishAt: - t.fut.complete() - else: - p.timers.add(t) + timers*: HeapQueue[tuple[finishAt: MonoTime, fut: Future[void]]] + callbacks*: Deque[proc () {.gcsafe.}] + +proc processTimers( + p: PDispatcherBase, didSomeWork: var bool +): Option[int] {.inline.} = + # Pop the timers in the order in which they will expire (smaller `finishAt`). + var count = p.timers.len + let t = getMonoTime() + while count > 0 and t >= p.timers[0].finishAt: + p.timers.pop().fut.complete() + dec count + didSomeWork = true + + # Return the number of milliseconds in which the next timer will expire. + if p.timers.len == 0: return + + let millisecs = (p.timers[0].finishAt - getMonoTime()).inMilliseconds + return some(millisecs.int + 1) + +proc processPendingCallbacks(p: PDispatcherBase; didSomeWork: var bool) = + while p.callbacks.len > 0: + var cb = p.callbacks.popFirst() + cb() + didSomeWork = true + +proc adjustTimeout( + p: PDispatcherBase, pollTimeout: int, nextTimer: Option[int] +): int {.inline.} = + if p.callbacks.len != 0: + return 0 + + if nextTimer.isNone() or pollTimeout == -1: + return pollTimeout + + result = max(nextTimer.get(), 0) + result = min(pollTimeout, result) + +proc runOnce(timeout: int): bool {.gcsafe.} + +proc callSoon*(cbproc: proc () {.gcsafe.}) {.gcsafe.} + ## Schedule `cbproc` to be called as soon as possible. + ## The callback is called when control returns to the event loop. + +proc initCallSoonProc = + if asyncfutures.getCallSoonProc().isNil: + asyncfutures.setCallSoonProc(callSoon) + +template implementSetInheritable() {.dirty.} = + when declared(setInheritable): + proc setInheritable*(fd: AsyncFD, inheritable: bool): bool = + ## Control whether a file handle can be inherited by child processes. + ## Returns `true` on success. + ## + ## This procedure is not guaranteed to be available for all platforms. + ## Test for availability with `declared() <system.html#declared,untyped>`_. + fd.FileHandle.setInheritable(inheritable) when defined(windows) or defined(nimdoc): - import winlean, sets, hashes + import std/[winlean, sets, hashes] type - TCompletionKey = Dword + CompletionKey = ULONG_PTR - TCompletionData* = object - fd*: TAsyncFD # TODO: Rename this. - cb*: proc (fd: TAsyncFD, bytesTransferred: Dword, - errcode: OSErrorCode) {.closure,gcsafe.} + CompletionData* = object + fd*: AsyncFD # TODO: Rename this. + cb*: owned(proc (fd: AsyncFD, bytesTransferred: DWORD, + errcode: OSErrorCode) {.closure, gcsafe.}) + cell*: ForeignCell # we need this `cell` to protect our `cb` environment, + # when using RegisterWaitForSingleObject, because + # waiting is done in different thread. PDispatcher* = ref object of PDispatcherBase - ioPort: THandle - handles: HashSet[TAsyncFD] + ioPort: Handle + handles*: HashSet[AsyncFD] # Export handles so that an external library can register them. + + CustomObj = object of OVERLAPPED + data*: CompletionData + + CustomRef* = ref CustomObj - TCustomOverlapped = object of TOVERLAPPED - data*: TCompletionData + AsyncFD* = distinct int - PCustomOverlapped* = ref TCustomOverlapped + PostCallbackData = object + ioPort: Handle + handleFd: AsyncFD + waitFd: Handle + ovl: owned CustomRef + PostCallbackDataPtr = ptr PostCallbackData - TAsyncFD* = distinct int + AsyncEventImpl = object + hEvent: Handle + hWaiter: Handle + pcd: PostCallbackDataPtr + AsyncEvent* = ptr AsyncEventImpl - proc hash(x: TAsyncFD): THash {.borrow.} - proc `==`*(x: TAsyncFD, y: TAsyncFD): bool {.borrow.} + Callback* = proc (fd: AsyncFD): bool {.closure, gcsafe.} - proc newDispatcher*(): PDispatcher = + proc hash(x: AsyncFD): Hash {.borrow.} + proc `==`*(x: AsyncFD, y: AsyncFD): bool {.borrow.} + + proc newDispatcher*(): owned PDispatcher = ## Creates a new Dispatcher instance. new result result.ioPort = createIoCompletionPort(INVALID_HANDLE_VALUE, 0, 0, 1) - result.handles = initSet[TAsyncFD]() - result.timers = @[] + result.handles = initHashSet[AsyncFD]() + result.timers.clear() + result.callbacks = initDeque[proc () {.closure, gcsafe.}](64) + + var gDisp{.threadvar.}: owned PDispatcher ## Global dispatcher + + proc setGlobalDispatcher*(disp: sink PDispatcher) = + if not gDisp.isNil: + assert gDisp.callbacks.len == 0 + gDisp = disp + initCallSoonProc() - var gDisp{.threadvar.}: PDispatcher ## Global dispatcher proc getGlobalDispatcher*(): PDispatcher = - ## Retrieves the global thread-local dispatcher. - if gDisp.isNil: gDisp = newDispatcher() + if gDisp.isNil: + setGlobalDispatcher(newDispatcher()) result = gDisp - proc register*(fd: TAsyncFD) = - ## Registers ``fd`` with the dispatcher. + proc getIoHandler*(disp: PDispatcher): Handle = + ## Returns the underlying IO Completion Port handle (Windows) or selector + ## (Unix) for the specified dispatcher. + return disp.ioPort + + proc register*(fd: AsyncFD) = + ## Registers `fd` with the dispatcher. let p = getGlobalDispatcher() - if createIoCompletionPort(fd.THandle, p.ioPort, - cast[TCompletionKey](fd), 1) == 0: + + if createIoCompletionPort(fd.Handle, p.ioPort, + cast[CompletionKey](fd), 1) == 0: raiseOSError(osLastError()) p.handles.incl(fd) - proc verifyPresence(fd: TAsyncFD) = + proc verifyPresence(fd: AsyncFD) = ## Ensures that file descriptor has been registered with the dispatcher. + ## Raises ValueError if `fd` has not been registered. let p = getGlobalDispatcher() if fd notin p.handles: raise newException(ValueError, "Operation performed on a socket which has not been registered with" & " the dispatcher yet.") - proc poll*(timeout = 500) = - ## Waits for completion events and processes them. + proc hasPendingOperations*(): bool = + ## Returns `true` if the global dispatcher has pending operations. + let p = getGlobalDispatcher() + p.handles.len != 0 or p.timers.len != 0 or p.callbacks.len != 0 + + proc runOnce(timeout: int): bool = let p = getGlobalDispatcher() - if p.handles.len == 0 and p.timers.len == 0: + if p.handles.len == 0 and p.timers.len == 0 and p.callbacks.len == 0: raise newException(ValueError, "No handles or timers registered in dispatcher.") - let llTimeout = - if timeout == -1: winlean.INFINITE - else: timeout.int32 - var lpNumberOfBytesTransferred: Dword - var lpCompletionKey: ULONG - var customOverlapped: PCustomOverlapped + result = false + let nextTimer = processTimers(p, result) + let at = adjustTimeout(p, timeout, nextTimer) + var llTimeout = + if at == -1: winlean.INFINITE + else: at.int32 + + var lpNumberOfBytesTransferred: DWORD + var lpCompletionKey: ULONG_PTR + var customOverlapped: CustomRef let res = getQueuedCompletionStatus(p.ioPort, addr lpNumberOfBytesTransferred, addr lpCompletionKey, cast[ptr POVERLAPPED](addr customOverlapped), llTimeout).bool + result = true + # For 'gcDestructors' the destructor of 'customOverlapped' will + # be called at the end and we are the only owner here. This means + # We do not have to 'GC_unref(customOverlapped)' because the destructor + # does that for us. # http://stackoverflow.com/a/12277264/492186 # TODO: http://www.serverframework.com/handling-multiple-pending-socket-read-and-write-operations.html if res: # This is useful for ensuring the reliability of the overlapped struct. - assert customOverlapped.data.fd == lpCompletionKey.TAsyncFD + assert customOverlapped.data.fd == lpCompletionKey.AsyncFD customOverlapped.data.cb(customOverlapped.data.fd, lpNumberOfBytesTransferred, OSErrorCode(-1)) - GC_unref(customOverlapped) + + # If cell.data != nil, then system.protect(rawEnv(cb)) was called, + # so we need to dispose our `cb` environment, because it is not needed + # anymore. + if customOverlapped.data.cell.data != nil: + system.dispose(customOverlapped.data.cell) + + when not defined(gcDestructors): + GC_unref(customOverlapped) else: let errCode = osLastError() if customOverlapped != nil: - assert customOverlapped.data.fd == lpCompletionKey.TAsyncFD + assert customOverlapped.data.fd == lpCompletionKey.AsyncFD customOverlapped.data.cb(customOverlapped.data.fd, lpNumberOfBytesTransferred, errCode) - GC_unref(customOverlapped) + if customOverlapped.data.cell.data != nil: + system.dispose(customOverlapped.data.cell) + when not defined(gcDestructors): + GC_unref(customOverlapped) else: if errCode.int32 == WAIT_TIMEOUT: # Timed out - discard + result = false else: raiseOSError(errCode) # Timer processing. - processTimers(p) + discard processTimers(p, result) + # Callback queue processing + processPendingCallbacks(p, result) - var connectExPtr: pointer = nil - var acceptExPtr: pointer = nil - var getAcceptExSockAddrsPtr: pointer = nil - proc initPointer(s: SocketHandle, fun: var pointer, guid: var TGUID): bool = + var acceptEx: WSAPROC_ACCEPTEX + var connectEx: WSAPROC_CONNECTEX + var getAcceptExSockAddrs: WSAPROC_GETACCEPTEXSOCKADDRS + + proc initPointer(s: SocketHandle, fun: var pointer, guid: var GUID): bool = # Ref: https://github.com/powdahound/twisted/blob/master/twisted/internet/iocpreactor/iocpsupport/winsock_pointers.c - var bytesRet: Dword + var bytesRet: DWORD fun = nil result = WSAIoctl(s, SIO_GET_EXTENSION_FUNCTION_POINTER, addr guid, - sizeof(TGUID).Dword, addr fun, sizeof(pointer).Dword, + sizeof(GUID).DWORD, addr fun, sizeof(pointer).DWORD, addr bytesRet, nil, nil) == 0 proc initAll() = - let dummySock = newRawSocket() - if not initPointer(dummySock, connectExPtr, WSAID_CONNECTEX): + let dummySock = createNativeSocket() + if dummySock == INVALID_SOCKET: raiseOSError(osLastError()) - if not initPointer(dummySock, acceptExPtr, WSAID_ACCEPTEX): + var fun: pointer = nil + if not initPointer(dummySock, fun, WSAID_CONNECTEX): raiseOSError(osLastError()) - if not initPointer(dummySock, getAcceptExSockAddrsPtr, WSAID_GETACCEPTEXSOCKADDRS): + connectEx = cast[WSAPROC_CONNECTEX](fun) + if not initPointer(dummySock, fun, WSAID_ACCEPTEX): raiseOSError(osLastError()) - - proc connectEx(s: SocketHandle, name: ptr SockAddr, namelen: cint, - lpSendBuffer: pointer, dwSendDataLength: Dword, - lpdwBytesSent: PDword, lpOverlapped: POVERLAPPED): bool = - if connectExPtr.isNil: raise newException(ValueError, "Need to initialise ConnectEx().") - let fun = - cast[proc (s: SocketHandle, name: ptr SockAddr, namelen: cint, - lpSendBuffer: pointer, dwSendDataLength: Dword, - lpdwBytesSent: PDword, lpOverlapped: POVERLAPPED): bool {.stdcall,gcsafe.}](connectExPtr) - - result = fun(s, name, namelen, lpSendBuffer, dwSendDataLength, lpdwBytesSent, - lpOverlapped) - - proc acceptEx(listenSock, acceptSock: SocketHandle, lpOutputBuffer: pointer, - dwReceiveDataLength, dwLocalAddressLength, - dwRemoteAddressLength: Dword, lpdwBytesReceived: PDword, - lpOverlapped: POVERLAPPED): bool = - if acceptExPtr.isNil: raise newException(ValueError, "Need to initialise AcceptEx().") - let fun = - cast[proc (listenSock, acceptSock: SocketHandle, lpOutputBuffer: pointer, - dwReceiveDataLength, dwLocalAddressLength, - dwRemoteAddressLength: Dword, lpdwBytesReceived: PDword, - lpOverlapped: POVERLAPPED): bool {.stdcall,gcsafe.}](acceptExPtr) - result = fun(listenSock, acceptSock, lpOutputBuffer, dwReceiveDataLength, - dwLocalAddressLength, dwRemoteAddressLength, lpdwBytesReceived, - lpOverlapped) - - proc getAcceptExSockaddrs(lpOutputBuffer: pointer, - dwReceiveDataLength, dwLocalAddressLength, dwRemoteAddressLength: Dword, - LocalSockaddr: ptr ptr SockAddr, LocalSockaddrLength: LPInt, - RemoteSockaddr: ptr ptr SockAddr, RemoteSockaddrLength: LPInt) = - if getAcceptExSockAddrsPtr.isNil: - raise newException(ValueError, "Need to initialise getAcceptExSockAddrs().") - - let fun = - cast[proc (lpOutputBuffer: pointer, - dwReceiveDataLength, dwLocalAddressLength, - dwRemoteAddressLength: Dword, LocalSockaddr: ptr ptr SockAddr, - LocalSockaddrLength: LPInt, RemoteSockaddr: ptr ptr SockAddr, - RemoteSockaddrLength: LPInt) {.stdcall,gcsafe.}](getAcceptExSockAddrsPtr) - - fun(lpOutputBuffer, dwReceiveDataLength, dwLocalAddressLength, - dwRemoteAddressLength, LocalSockaddr, LocalSockaddrLength, - RemoteSockaddr, RemoteSockaddrLength) - - proc connect*(socket: TAsyncFD, address: string, port: Port, - af = AF_INET): Future[void] = - ## Connects ``socket`` to server at ``address:port``. - ## - ## Returns a ``Future`` which will complete when the connection succeeds - ## or an error occurs. - verifyPresence(socket) - var retFuture = newFuture[void]("connect") - # Apparently ``ConnectEx`` expects the socket to be initially bound: - var saddr: Sockaddr_in - saddr.sin_family = int16(toInt(af)) - saddr.sin_port = 0 - saddr.sin_addr.s_addr = INADDR_ANY - if bindAddr(socket.SocketHandle, cast[ptr SockAddr](addr(saddr)), - sizeof(saddr).SockLen) < 0'i32: + acceptEx = cast[WSAPROC_ACCEPTEX](fun) + if not initPointer(dummySock, fun, WSAID_GETACCEPTEXSOCKADDRS): raiseOSError(osLastError()) - - var aiList = getAddrInfo(address, port, af) - var success = false - var lastError: OSErrorCode - var it = aiList - while it != nil: - # "the OVERLAPPED structure must remain valid until the I/O completes" - # http://blogs.msdn.com/b/oldnewthing/archive/2011/02/02/10123392.aspx - var ol = PCustomOverlapped() - GC_ref(ol) - ol.data = TCompletionData(fd: socket, cb: - proc (fd: TAsyncFD, bytesCount: Dword, errcode: OSErrorCode) = - if not retFuture.finished: - if errcode == OSErrorCode(-1): - retFuture.complete() - else: - retFuture.fail(newException(OSError, osErrorMsg(errcode))) - ) - - var ret = connectEx(socket.SocketHandle, it.ai_addr, - sizeof(Sockaddr_in).cint, nil, 0, nil, - cast[POVERLAPPED](ol)) - if ret: - # Request to connect completed immediately. - success = true - retFuture.complete() - # We don't deallocate ``ol`` here because even though this completed - # immediately poll will still be notified about its completion and it will - # free ``ol``. - break - else: - lastError = osLastError() - if lastError.int32 == ERROR_IO_PENDING: - # In this case ``ol`` will be deallocated in ``poll``. - success = true - break - else: - GC_unref(ol) - success = false - it = it.ai_next - - dealloc(aiList) - if not success: - retFuture.fail(newException(OSError, osErrorMsg(lastError))) - return retFuture - - proc recv*(socket: TAsyncFD, size: int, - flags = {SocketFlag.SafeDisconn}): Future[string] = - ## Reads **up to** ``size`` bytes from ``socket``. Returned future will + getAcceptExSockAddrs = cast[WSAPROC_GETACCEPTEXSOCKADDRS](fun) + close(dummySock) + + proc newCustom*(): CustomRef = + result = CustomRef() # 0 + GC_ref(result) # 1 prevent destructor from doing a premature free. + # destructor of newCustom's caller --> 0. This means + # Windows holds a ref for us with RC == 0 (single owner). + # This is passed back to us in the IO completion port. + + proc recv*(socket: AsyncFD, size: int, + flags = {SocketFlag.SafeDisconn}): owned(Future[string]) = + ## Reads **up to** `size` bytes from `socket`. Returned future will ## complete once all the data requested is read, a part of the data has been ## read, or the socket has disconnected in which case the future will - ## complete with a value of ``""``. + ## complete with a value of `""`. ## - ## **Warning**: The ``Peek`` socket flag is not supported on Windows. + ## .. warning:: The `Peek` socket flag is not supported on Windows. # Things to note: - # * When WSARecv completes immediately then ``bytesReceived`` is very + # * When WSARecv completes immediately then `bytesReceived` is very # unreliable. # * Still need to implement message-oriented socket disconnection, # '\0' in the message currently signifies a socket disconnect. Who @@ -564,14 +513,13 @@ when defined(windows) or defined(nimdoc): var retFuture = newFuture[string]("recv") var dataBuf: TWSABuf dataBuf.buf = cast[cstring](alloc0(size)) - dataBuf.len = size - - var bytesReceived: Dword - var flagsio = flags.toOSFlags().Dword - var ol = PCustomOverlapped() - GC_ref(ol) - ol.data = TCompletionData(fd: socket, cb: - proc (fd: TAsyncFD, bytesCount: Dword, errcode: OSErrorCode) = + dataBuf.len = size.ULONG + + var bytesReceived: DWORD + var flagsio = flags.toOSFlags().DWORD + var ol = newCustom() + ol.data = CompletionData(fd: socket, cb: + proc (fd: AsyncFD, bytesCount: DWORD, errcode: OSErrorCode) = if not retFuture.finished: if errcode == OSErrorCode(-1): if bytesCount == 0 and dataBuf.buf[0] == '\0': @@ -585,7 +533,7 @@ when defined(windows) or defined(nimdoc): if flags.isDisconnectionError(errcode): retFuture.complete("") else: - retFuture.fail(newException(OSError, osErrorMsg(errcode))) + retFuture.fail(newOSError(errcode)) if dataBuf.buf != nil: dealloc dataBuf.buf dataBuf.buf = nil @@ -603,53 +551,103 @@ when defined(windows) or defined(nimdoc): if flags.isDisconnectionError(err): retFuture.complete("") else: - retFuture.fail(newException(OSError, osErrorMsg(err))) - elif ret == 0 and bytesReceived == 0 and dataBuf.buf[0] == '\0': - # We have to ensure that the buffer is empty because WSARecv will tell - # us immediately when it was disconnected, even when there is still - # data in the buffer. - # We want to give the user as much data as we can. So we only return - # the empty string (which signals a disconnection) when there is - # nothing left to read. - retFuture.complete("") - # TODO: "For message-oriented sockets, where a zero byte message is often - # allowable, a failure with an error code of WSAEDISCON is used to - # indicate graceful closure." - # ~ http://msdn.microsoft.com/en-us/library/ms741688%28v=vs.85%29.aspx - else: - # Request to read completed immediately. - # From my tests bytesReceived isn't reliable. - let realSize = - if bytesReceived == 0: - size + retFuture.fail(newOSError(err)) + elif ret == 0: + # Request completed immediately. + if bytesReceived != 0: + var data = newString(bytesReceived) + assert bytesReceived <= size + copyMem(addr data[0], addr dataBuf.buf[0], bytesReceived) + retFuture.complete($data) + else: + if hasOverlappedIoCompleted(cast[POVERLAPPED](ol)): + retFuture.complete("") + return retFuture + + proc recvInto*(socket: AsyncFD, buf: pointer, size: int, + flags = {SocketFlag.SafeDisconn}): owned(Future[int]) = + ## Reads **up to** `size` bytes from `socket` into `buf`, which must + ## at least be of that size. Returned future will complete once all the + ## data requested is read, a part of the data has been read, or the socket + ## has disconnected in which case the future will complete with a value of + ## `0`. + ## + ## .. warning:: The `Peek` socket flag is not supported on Windows. + + + # Things to note: + # * When WSARecv completes immediately then `bytesReceived` is very + # unreliable. + # * Still need to implement message-oriented socket disconnection, + # '\0' in the message currently signifies a socket disconnect. Who + # knows what will happen when someone sends that to our socket. + verifyPresence(socket) + assert SocketFlag.Peek notin flags, "Peek not supported on Windows." + + var retFuture = newFuture[int]("recvInto") + + #buf[] = '\0' + var dataBuf: TWSABuf + dataBuf.buf = cast[cstring](buf) + dataBuf.len = size.ULONG + + var bytesReceived: DWORD + var flagsio = flags.toOSFlags().DWORD + var ol = newCustom() + ol.data = CompletionData(fd: socket, cb: + proc (fd: AsyncFD, bytesCount: DWORD, errcode: OSErrorCode) = + if not retFuture.finished: + if errcode == OSErrorCode(-1): + retFuture.complete(bytesCount) + else: + if flags.isDisconnectionError(errcode): + retFuture.complete(0) + else: + retFuture.fail(newOSError(errcode)) + if dataBuf.buf != nil: + dataBuf.buf = nil + ) + + let ret = WSARecv(socket.SocketHandle, addr dataBuf, 1, addr bytesReceived, + addr flagsio, cast[POVERLAPPED](ol), nil) + if ret == -1: + let err = osLastError() + if err.int32 != ERROR_IO_PENDING: + if dataBuf.buf != nil: + dataBuf.buf = nil + GC_unref(ol) + if flags.isDisconnectionError(err): + retFuture.complete(0) else: - bytesReceived - var data = newString(realSize) - assert realSize <= size - copyMem(addr data[0], addr dataBuf.buf[0], realSize) - #dealloc dataBuf.buf - retFuture.complete($data) - # We don't deallocate ``ol`` here because even though this completed - # immediately poll will still be notified about its completion and it will - # free ``ol``. + retFuture.fail(newOSError(err)) + elif ret == 0: + # Request completed immediately. + if bytesReceived != 0: + assert bytesReceived <= size + retFuture.complete(bytesReceived) + else: + if hasOverlappedIoCompleted(cast[POVERLAPPED](ol)): + retFuture.complete(bytesReceived) return retFuture - proc send*(socket: TAsyncFD, data: string, - flags = {SocketFlag.SafeDisconn}): Future[void] = - ## Sends ``data`` to ``socket``. The returned future will complete once all - ## data has been sent. + proc send*(socket: AsyncFD, buf: pointer, size: int, + flags = {SocketFlag.SafeDisconn}): owned(Future[void]) = + ## Sends `size` bytes from `buf` to `socket`. The returned future + ## will complete once all data has been sent. + ## + ## .. warning:: Use it with caution. If `buf` refers to GC'ed object, + ## you must use GC_ref/GC_unref calls to avoid early freeing of the buffer. verifyPresence(socket) var retFuture = newFuture[void]("send") var dataBuf: TWSABuf - dataBuf.buf = data # since this is not used in a callback, this is fine - dataBuf.len = data.len - - var bytesReceived, lowFlags: Dword - var ol = PCustomOverlapped() - GC_ref(ol) - ol.data = TCompletionData(fd: socket, cb: - proc (fd: TAsyncFD, bytesCount: Dword, errcode: OSErrorCode) = + dataBuf.buf = cast[cstring](buf) + dataBuf.len = size.ULONG + + var bytesReceived, lowFlags: DWORD + var ol = newCustom() + ol.data = CompletionData(fd: socket, cb: + proc (fd: AsyncFD, bytesCount: DWORD, errcode: OSErrorCode) = if not retFuture.finished: if errcode == OSErrorCode(-1): retFuture.complete() @@ -657,7 +655,7 @@ when defined(windows) or defined(nimdoc): if flags.isDisconnectionError(errcode): retFuture.complete() else: - retFuture.fail(newException(OSError, osErrorMsg(errcode))) + retFuture.fail(newOSError(errcode)) ) let ret = WSASend(socket.SocketHandle, addr dataBuf, 1, addr bytesReceived, @@ -669,16 +667,110 @@ when defined(windows) or defined(nimdoc): if flags.isDisconnectionError(err): retFuture.complete() else: - retFuture.fail(newException(OSError, osErrorMsg(err))) + retFuture.fail(newOSError(err)) + else: + retFuture.complete() + # We don't deallocate `ol` here because even though this completed + # immediately poll will still be notified about its completion and it will + # free `ol`. + return retFuture + + proc sendTo*(socket: AsyncFD, data: pointer, size: int, saddr: ptr SockAddr, + saddrLen: SockLen, + flags = {SocketFlag.SafeDisconn}): owned(Future[void]) = + ## Sends `data` to specified destination `saddr`, using + ## socket `socket`. The returned future will complete once all data + ## has been sent. + verifyPresence(socket) + var retFuture = newFuture[void]("sendTo") + var dataBuf: TWSABuf + dataBuf.buf = cast[cstring](data) + dataBuf.len = size.ULONG + var bytesSent = 0.DWORD + var lowFlags = 0.DWORD + + # we will preserve address in our stack + var staddr: array[128, char] # SOCKADDR_STORAGE size is 128 bytes + var stalen: cint = cint(saddrLen) + zeroMem(addr(staddr[0]), 128) + copyMem(addr(staddr[0]), saddr, saddrLen) + + var ol = newCustom() + ol.data = CompletionData(fd: socket, cb: + proc (fd: AsyncFD, bytesCount: DWORD, errcode: OSErrorCode) = + if not retFuture.finished: + if errcode == OSErrorCode(-1): + retFuture.complete() + else: + retFuture.fail(newOSError(errcode)) + ) + + let ret = WSASendTo(socket.SocketHandle, addr dataBuf, 1, addr bytesSent, + lowFlags, cast[ptr SockAddr](addr(staddr[0])), + stalen, cast[POVERLAPPED](ol), nil) + if ret == -1: + let err = osLastError() + if err.int32 != ERROR_IO_PENDING: + GC_unref(ol) + retFuture.fail(newOSError(err)) else: retFuture.complete() - # We don't deallocate ``ol`` here because even though this completed + # We don't deallocate `ol` here because even though this completed # immediately poll will still be notified about its completion and it will - # free ``ol``. + # free `ol`. + return retFuture + + proc recvFromInto*(socket: AsyncFD, data: pointer, size: int, + saddr: ptr SockAddr, saddrLen: ptr SockLen, + flags = {SocketFlag.SafeDisconn}): owned(Future[int]) = + ## Receives a datagram data from `socket` into `buf`, which must + ## be at least of size `size`, address of datagram's sender will be + ## stored into `saddr` and `saddrLen`. Returned future will complete + ## once one datagram has been received, and will return size of packet + ## received. + verifyPresence(socket) + var retFuture = newFuture[int]("recvFromInto") + + var dataBuf = TWSABuf(buf: cast[cstring](data), len: size.ULONG) + + var bytesReceived = 0.DWORD + var lowFlags = 0.DWORD + + var ol = newCustom() + ol.data = CompletionData(fd: socket, cb: + proc (fd: AsyncFD, bytesCount: DWORD, errcode: OSErrorCode) = + if not retFuture.finished: + if errcode == OSErrorCode(-1): + assert bytesCount <= size + retFuture.complete(bytesCount) + else: + # datagram sockets don't have disconnection, + # so we can just raise an exception + retFuture.fail(newOSError(errcode)) + ) + + let res = WSARecvFrom(socket.SocketHandle, addr dataBuf, 1, + addr bytesReceived, addr lowFlags, + saddr, cast[ptr cint](saddrLen), + cast[POVERLAPPED](ol), nil) + if res == -1: + let err = osLastError() + if err.int32 != ERROR_IO_PENDING: + GC_unref(ol) + retFuture.fail(newOSError(err)) + else: + # Request completed immediately. + if bytesReceived != 0: + assert bytesReceived <= size + retFuture.complete(bytesReceived) + else: + if hasOverlappedIoCompleted(cast[POVERLAPPED](ol)): + retFuture.complete(bytesReceived) return retFuture - proc acceptAddr*(socket: TAsyncFD, flags = {SocketFlag.SafeDisconn}): - Future[tuple[address: string, client: TAsyncFD]] = + proc acceptAddr*(socket: AsyncFD, flags = {SocketFlag.SafeDisconn}, + inheritable = defined(nimInheritHandles)): + owned(Future[tuple[address: string, client: AsyncFD]]) {.gcsafe.} = ## Accepts a new connection. Returns a future containing the client socket ## corresponding to that connection and the remote address of the client. ## The future will complete when the connection is successfully accepted. @@ -686,44 +778,27 @@ when defined(windows) or defined(nimdoc): ## The resulting client socket is automatically registered to the ## dispatcher. ## - ## The ``accept`` call may result in an error if the connecting socket - ## disconnects during the duration of the ``accept``. If the ``SafeDisconn`` + ## If `inheritable` is false (the default), the resulting client socket will + ## not be inheritable by child processes. + ## + ## The `accept` call may result in an error if the connecting socket + ## disconnects during the duration of the `accept`. If the `SafeDisconn` ## flag is specified then this error will not be raised and instead ## accept will be called again. verifyPresence(socket) - var retFuture = newFuture[tuple[address: string, client: TAsyncFD]]("acceptAddr") + var retFuture = newFuture[tuple[address: string, client: AsyncFD]]("acceptAddr") - var clientSock = newRawSocket() + var clientSock = createNativeSocket(inheritable = inheritable) if clientSock == osInvalidSocket: raiseOSError(osLastError()) const lpOutputLen = 1024 var lpOutputBuf = newString(lpOutputLen) - var dwBytesReceived: Dword - let dwReceiveDataLength = 0.Dword # We don't want any data to be read. - let dwLocalAddressLength = Dword(sizeof (Sockaddr_in) + 16) - let dwRemoteAddressLength = Dword(sizeof(Sockaddr_in) + 16) + var dwBytesReceived: DWORD + let dwReceiveDataLength = 0.DWORD # We don't want any data to be read. + let dwLocalAddressLength = DWORD(sizeof(Sockaddr_in6) + 16) + let dwRemoteAddressLength = DWORD(sizeof(Sockaddr_in6) + 16) - template completeAccept(): stmt {.immediate, dirty.} = - var listenSock = socket - let setoptRet = setsockopt(clientSock, SOL_SOCKET, - SO_UPDATE_ACCEPT_CONTEXT, addr listenSock, - sizeof(listenSock).SockLen) - if setoptRet != 0: raiseOSError(osLastError()) - - var localSockaddr, remoteSockaddr: ptr SockAddr - var localLen, remoteLen: int32 - getAcceptExSockaddrs(addr lpOutputBuf[0], dwReceiveDataLength, - dwLocalAddressLength, dwRemoteAddressLength, - addr localSockaddr, addr localLen, - addr remoteSockaddr, addr remoteLen) - register(clientSock.TAsyncFD) - # TODO: IPv6. Check ``sa_family``. http://stackoverflow.com/a/9212542/492186 - retFuture.complete( - (address: $inet_ntoa(cast[ptr Sockaddr_in](remoteSockAddr).sin_addr), - client: clientSock.TAsyncFD) - ) - - template failAccept(errcode): stmt = + template failAccept(errcode) = if flags.isDisconnectionError(errcode): var newAcceptFut = acceptAddr(socket, flags) newAcceptFut.callback = @@ -733,12 +808,36 @@ when defined(windows) or defined(nimdoc): else: retFuture.complete(newAcceptFut.read) else: - retFuture.fail(newException(OSError, osErrorMsg(errcode))) + retFuture.fail(newOSError(errcode)) - var ol = PCustomOverlapped() - GC_ref(ol) - ol.data = TCompletionData(fd: socket, cb: - proc (fd: TAsyncFD, bytesCount: Dword, errcode: OSErrorCode) = + template completeAccept() {.dirty.} = + var listenSock = socket + let setoptRet = setsockopt(clientSock, SOL_SOCKET, + SO_UPDATE_ACCEPT_CONTEXT, addr listenSock, + sizeof(listenSock).SockLen) + if setoptRet != 0: + let errcode = osLastError() + discard clientSock.closesocket() + failAccept(errcode) + else: + var localSockaddr, remoteSockaddr: ptr SockAddr + var localLen, remoteLen: int32 + getAcceptExSockAddrs(addr lpOutputBuf[0], dwReceiveDataLength, + dwLocalAddressLength, dwRemoteAddressLength, + addr localSockaddr, addr localLen, + addr remoteSockaddr, addr remoteLen) + try: + let address = getAddrString(remoteSockaddr) + register(clientSock.AsyncFD) + retFuture.complete((address: address, client: clientSock.AsyncFD)) + except: + # getAddrString may raise + clientSock.close() + retFuture.fail(getCurrentException()) + + var ol = newCustom() + ol.data = CompletionData(fd: socket, cb: + proc (fd: AsyncFD, bytesCount: DWORD, errcode: OSErrorCode) {.gcsafe.} = if not retFuture.finished: if errcode == OSErrorCode(-1): completeAccept() @@ -760,213 +859,616 @@ when defined(windows) or defined(nimdoc): GC_unref(ol) else: completeAccept() - # We don't deallocate ``ol`` here because even though this completed + # We don't deallocate `ol` here because even though this completed # immediately poll will still be notified about its completion and it will - # free ``ol``. + # free `ol`. return retFuture - proc newAsyncRawSocket*(domain, typ, protocol: cint): TAsyncFD = - ## Creates a new socket and registers it with the dispatcher implicitly. - result = newRawSocket(domain, typ, protocol).TAsyncFD - result.SocketHandle.setBlocking(false) - register(result) - - proc newAsyncRawSocket*(domain: Domain = AF_INET, - typ: SockType = SOCK_STREAM, - protocol: Protocol = IPPROTO_TCP): TAsyncFD = - ## Creates a new socket and registers it with the dispatcher implicitly. - result = newRawSocket(domain, typ, protocol).TAsyncFD - result.SocketHandle.setBlocking(false) - register(result) - - proc closeSocket*(socket: TAsyncFD) = + implementSetInheritable() + + proc closeSocket*(socket: AsyncFD) = ## Closes a socket and ensures that it is unregistered. socket.SocketHandle.close() getGlobalDispatcher().handles.excl(socket) - proc unregister*(fd: TAsyncFD) = - ## Unregisters ``fd``. + proc unregister*(fd: AsyncFD) = + ## Unregisters `fd`. getGlobalDispatcher().handles.excl(fd) + proc contains*(disp: PDispatcher, fd: AsyncFD): bool = + return fd in disp.handles + + {.push stackTrace: off.} + proc waitableCallback(param: pointer, + timerOrWaitFired: WINBOOL) {.stdcall.} = + var p = cast[PostCallbackDataPtr](param) + discard postQueuedCompletionStatus(p.ioPort, timerOrWaitFired.DWORD, + ULONG_PTR(p.handleFd), + cast[pointer](p.ovl)) + {.pop.} + + proc registerWaitableEvent(fd: AsyncFD, cb: Callback; mask: DWORD) = + let p = getGlobalDispatcher() + var flags = (WT_EXECUTEINWAITTHREAD or WT_EXECUTEONLYONCE).DWORD + var hEvent = wsaCreateEvent() + if hEvent == 0: + raiseOSError(osLastError()) + var pcd = cast[PostCallbackDataPtr](allocShared0(sizeof(PostCallbackData))) + pcd.ioPort = p.ioPort + pcd.handleFd = fd + var ol = newCustom() + + ol.data = CompletionData(fd: fd, cb: + proc(fd: AsyncFD, bytesCount: DWORD, errcode: OSErrorCode) {.gcsafe.} = + # we excluding our `fd` because cb(fd) can register own handler + # for this `fd` + p.handles.excl(fd) + # unregisterWait() is called before callback, because appropriate + # winsockets function can re-enable event. + # https://msdn.microsoft.com/en-us/library/windows/desktop/ms741576(v=vs.85).aspx + if unregisterWait(pcd.waitFd) == 0: + let err = osLastError() + if err.int32 != ERROR_IO_PENDING: + deallocShared(cast[pointer](pcd)) + discard wsaCloseEvent(hEvent) + raiseOSError(err) + if cb(fd): + # callback returned `true`, so we free all allocated resources + deallocShared(cast[pointer](pcd)) + if not wsaCloseEvent(hEvent): + raiseOSError(osLastError()) + # pcd.ovl will be unrefed in poll(). + else: + # callback returned `false` we need to continue + if p.handles.contains(fd): + # new callback was already registered with `fd`, so we free all + # allocated resources. This happens because in callback `cb` + # addRead/addWrite was called with same `fd`. + deallocShared(cast[pointer](pcd)) + if not wsaCloseEvent(hEvent): + raiseOSError(osLastError()) + else: + # we need to include `fd` again + p.handles.incl(fd) + # and register WaitForSingleObject again + if not registerWaitForSingleObject(addr(pcd.waitFd), hEvent, + cast[WAITORTIMERCALLBACK](waitableCallback), + cast[pointer](pcd), INFINITE, flags): + # pcd.ovl will be unrefed in poll() + let err = osLastError() + deallocShared(cast[pointer](pcd)) + discard wsaCloseEvent(hEvent) + raiseOSError(err) + else: + # we incref `pcd.ovl` and `protect` callback one more time, + # because it will be unrefed and disposed in `poll()` after + # callback finishes. + GC_ref(pcd.ovl) + pcd.ovl.data.cell = system.protect(rawEnv(pcd.ovl.data.cb)) + ) + # We need to protect our callback environment value, so GC will not free it + # accidentally. + ol.data.cell = system.protect(rawEnv(ol.data.cb)) + + # This is main part of `hacky way` is using WSAEventSelect, so `hEvent` + # will be signaled when appropriate `mask` events will be triggered. + if wsaEventSelect(fd.SocketHandle, hEvent, mask) != 0: + let err = osLastError() + GC_unref(ol) + deallocShared(cast[pointer](pcd)) + discard wsaCloseEvent(hEvent) + raiseOSError(err) + + pcd.ovl = ol + if not registerWaitForSingleObject(addr(pcd.waitFd), hEvent, + cast[WAITORTIMERCALLBACK](waitableCallback), + cast[pointer](pcd), INFINITE, flags): + let err = osLastError() + GC_unref(ol) + deallocShared(cast[pointer](pcd)) + discard wsaCloseEvent(hEvent) + raiseOSError(err) + p.handles.incl(fd) + + proc addRead*(fd: AsyncFD, cb: Callback) = + ## Start watching the file descriptor for read availability and then call + ## the callback `cb`. + ## + ## This is not `pure` mechanism for Windows Completion Ports (IOCP), + ## so if you can avoid it, please do it. Use `addRead` only if really + ## need it (main usecase is adaptation of unix-like libraries to be + ## asynchronous on Windows). + ## + ## If you use this function, you don't need to use asyncdispatch.recv() + ## or asyncdispatch.accept(), because they are using IOCP, please use + ## nativesockets.recv() and nativesockets.accept() instead. + ## + ## Be sure your callback `cb` returns `true`, if you want to remove + ## watch of `read` notifications, and `false`, if you want to continue + ## receiving notifications. + registerWaitableEvent(fd, cb, FD_READ or FD_ACCEPT or FD_OOB or FD_CLOSE) + + proc addWrite*(fd: AsyncFD, cb: Callback) = + ## Start watching the file descriptor for write availability and then call + ## the callback `cb`. + ## + ## This is not `pure` mechanism for Windows Completion Ports (IOCP), + ## so if you can avoid it, please do it. Use `addWrite` only if really + ## need it (main usecase is adaptation of unix-like libraries to be + ## asynchronous on Windows). + ## + ## If you use this function, you don't need to use asyncdispatch.send() + ## or asyncdispatch.connect(), because they are using IOCP, please use + ## nativesockets.send() and nativesockets.connect() instead. + ## + ## Be sure your callback `cb` returns `true`, if you want to remove + ## watch of `write` notifications, and `false`, if you want to continue + ## receiving notifications. + registerWaitableEvent(fd, cb, FD_WRITE or FD_CONNECT or FD_CLOSE) + + template registerWaitableHandle(p, hEvent, flags, pcd, timeout, + handleCallback) = + let handleFD = AsyncFD(hEvent) + pcd.ioPort = p.ioPort + pcd.handleFd = handleFD + var ol = newCustom() + ol.data.fd = handleFD + ol.data.cb = handleCallback + # We need to protect our callback environment value, so GC will not free it + # accidentally. + ol.data.cell = system.protect(rawEnv(ol.data.cb)) + + pcd.ovl = ol + if not registerWaitForSingleObject(addr(pcd.waitFd), hEvent, + cast[WAITORTIMERCALLBACK](waitableCallback), + cast[pointer](pcd), timeout.DWORD, flags): + let err = osLastError() + GC_unref(ol) + deallocShared(cast[pointer](pcd)) + discard closeHandle(hEvent) + raiseOSError(err) + p.handles.incl(handleFD) + + template closeWaitable(handle: untyped) = + let waitFd = pcd.waitFd + deallocShared(cast[pointer](pcd)) + p.handles.excl(fd) + if unregisterWait(waitFd) == 0: + let err = osLastError() + if err.int32 != ERROR_IO_PENDING: + discard closeHandle(handle) + raiseOSError(err) + if closeHandle(handle) == 0: + raiseOSError(osLastError()) + + proc addTimer*(timeout: int, oneshot: bool, cb: Callback) = + ## Registers callback `cb` to be called when timer expired. + ## + ## Parameters: + ## + ## * `timeout` - timeout value in milliseconds. + ## * `oneshot` + ## * `true` - generate only one timeout event + ## * `false` - generate timeout events periodically + + doAssert(timeout > 0) + let p = getGlobalDispatcher() + + var hEvent = createEvent(nil, 1, 0, nil) + if hEvent == INVALID_HANDLE_VALUE: + raiseOSError(osLastError()) + + var pcd = cast[PostCallbackDataPtr](allocShared0(sizeof(PostCallbackData))) + var flags = WT_EXECUTEINWAITTHREAD.DWORD + if oneshot: flags = flags or WT_EXECUTEONLYONCE + + proc timercb(fd: AsyncFD, bytesCount: DWORD, errcode: OSErrorCode) = + let res = cb(fd) + if res or oneshot: + closeWaitable(hEvent) + else: + # if callback returned `false`, then it wants to be called again, so + # we need to ref and protect `pcd.ovl` again, because it will be + # unrefed and disposed in `poll()`. + GC_ref(pcd.ovl) + pcd.ovl.data.cell = system.protect(rawEnv(pcd.ovl.data.cb)) + + registerWaitableHandle(p, hEvent, flags, pcd, timeout, timercb) + + proc addProcess*(pid: int, cb: Callback) = + ## Registers callback `cb` to be called when process with process ID + ## `pid` exited. + const NULL = Handle(0) + let p = getGlobalDispatcher() + let procFlags = SYNCHRONIZE + var hProcess = openProcess(procFlags, 0, pid.DWORD) + if hProcess == NULL: + raiseOSError(osLastError()) + + var pcd = cast[PostCallbackDataPtr](allocShared0(sizeof(PostCallbackData))) + var flags = WT_EXECUTEINWAITTHREAD.DWORD or WT_EXECUTEONLYONCE.DWORD + + proc proccb(fd: AsyncFD, bytesCount: DWORD, errcode: OSErrorCode) = + closeWaitable(hProcess) + discard cb(fd) + + registerWaitableHandle(p, hProcess, flags, pcd, INFINITE, proccb) + + proc newAsyncEvent*(): AsyncEvent = + ## Creates a new thread-safe `AsyncEvent` object. + ## + ## New `AsyncEvent` object is not automatically registered with + ## dispatcher like `AsyncSocket`. + var sa = SECURITY_ATTRIBUTES( + nLength: sizeof(SECURITY_ATTRIBUTES).cint, + bInheritHandle: 1 + ) + var event = createEvent(addr(sa), 0'i32, 0'i32, nil) + if event == INVALID_HANDLE_VALUE: + raiseOSError(osLastError()) + result = cast[AsyncEvent](allocShared0(sizeof(AsyncEventImpl))) + result.hEvent = event + + proc trigger*(ev: AsyncEvent) = + ## Set event `ev` to signaled state. + if setEvent(ev.hEvent) == 0: + raiseOSError(osLastError()) + + proc unregister*(ev: AsyncEvent) = + ## Unregisters event `ev`. + doAssert(ev.hWaiter != 0, "Event is not registered in the queue!") + let p = getGlobalDispatcher() + p.handles.excl(AsyncFD(ev.hEvent)) + if unregisterWait(ev.hWaiter) == 0: + let err = osLastError() + if err.int32 != ERROR_IO_PENDING: + raiseOSError(err) + ev.hWaiter = 0 + + proc close*(ev: AsyncEvent) = + ## Closes event `ev`. + let res = closeHandle(ev.hEvent) + deallocShared(cast[pointer](ev)) + if res == 0: + raiseOSError(osLastError()) + + proc addEvent*(ev: AsyncEvent, cb: Callback) = + ## Registers callback `cb` to be called when `ev` will be signaled + doAssert(ev.hWaiter == 0, "Event is already registered in the queue!") + + let p = getGlobalDispatcher() + let hEvent = ev.hEvent + + var pcd = cast[PostCallbackDataPtr](allocShared0(sizeof(PostCallbackData))) + var flags = WT_EXECUTEINWAITTHREAD.DWORD + + proc eventcb(fd: AsyncFD, bytesCount: DWORD, errcode: OSErrorCode) = + if ev.hWaiter != 0: + if cb(fd): + # we need this check to avoid exception, if `unregister(event)` was + # called in callback. + deallocShared(cast[pointer](pcd)) + if ev.hWaiter != 0: + unregister(ev) + else: + # if callback returned `false`, then it wants to be called again, so + # we need to ref and protect `pcd.ovl` again, because it will be + # unrefed and disposed in `poll()`. + GC_ref(pcd.ovl) + pcd.ovl.data.cell = system.protect(rawEnv(pcd.ovl.data.cb)) + else: + # if ev.hWaiter == 0, then event was unregistered before `poll()` call. + deallocShared(cast[pointer](pcd)) + + registerWaitableHandle(p, hEvent, flags, pcd, INFINITE, eventcb) + ev.hWaiter = pcd.waitFd + initAll() else: - import selectors - when defined(windows): - import winlean - const - EINTR = WSAEINPROGRESS - EINPROGRESS = WSAEINPROGRESS - EWOULDBLOCK = WSAEWOULDBLOCK - EAGAIN = EINPROGRESS - MSG_NOSIGNAL = 0 - else: - from posix import EINTR, EAGAIN, EINPROGRESS, EWOULDBLOCK, MSG_PEEK, - MSG_NOSIGNAL - + import std/selectors + from std/posix import EINTR, EAGAIN, EINPROGRESS, EWOULDBLOCK, MSG_PEEK, + MSG_NOSIGNAL + when declared(posix.accept4): + from std/posix import accept4, SOCK_CLOEXEC + when defined(genode): + import genode/env # get the implicit Genode env + import genode/signals + + const + InitCallbackListSize = 4 # initial size of callbacks sequence, + # associated with file/socket descriptor. + InitDelayedCallbackListSize = 64 # initial size of delayed callbacks + # queue. type - TAsyncFD* = distinct cint - TCallback = proc (fd: TAsyncFD): bool {.closure,gcsafe.} + AsyncFD* = distinct cint + Callback* = proc (fd: AsyncFD): bool {.closure, gcsafe.} - PData* = ref object of RootRef - fd: TAsyncFD - readCBs: seq[TCallback] - writeCBs: seq[TCallback] + AsyncData = object + readList: seq[Callback] + writeList: seq[Callback] + + AsyncEvent* = distinct SelectEvent PDispatcher* = ref object of PDispatcherBase - selector: Selector + selector: Selector[AsyncData] + when defined(genode): + signalHandler: SignalHandler + + proc `==`*(x, y: AsyncFD): bool {.borrow.} + proc `==`*(x, y: AsyncEvent): bool {.borrow.} - proc `==`*(x, y: TAsyncFD): bool {.borrow.} + template newAsyncData(): AsyncData = + AsyncData( + readList: newSeqOfCap[Callback](InitCallbackListSize), + writeList: newSeqOfCap[Callback](InitCallbackListSize) + ) - proc newDispatcher*(): PDispatcher = + proc newDispatcher*(): owned(PDispatcher) = new result - result.selector = newSelector() - result.timers = @[] + result.selector = newSelector[AsyncData]() + result.timers.clear() + result.callbacks = initDeque[proc () {.closure, gcsafe.}](InitDelayedCallbackListSize) + when defined(genode): + let entrypoint = ep(cast[GenodeEnv](runtimeEnv)) + result.signalHandler = newSignalHandler(entrypoint): + discard runOnce(0) + + var gDisp{.threadvar.}: owned PDispatcher ## Global dispatcher + + when defined(nuttx): + import std/exitprocs + + proc cleanDispatcher() {.noconv.} = + gDisp = nil + + proc addFinalyzer() = + addExitProc(cleanDispatcher) + + proc setGlobalDispatcher*(disp: owned PDispatcher) = + if not gDisp.isNil: + assert gDisp.callbacks.len == 0 + gDisp = disp + initCallSoonProc() - var gDisp{.threadvar.}: PDispatcher ## Global dispatcher proc getGlobalDispatcher*(): PDispatcher = - if gDisp.isNil: gDisp = newDispatcher() + if gDisp.isNil: + setGlobalDispatcher(newDispatcher()) + when defined(nuttx): + addFinalyzer() result = gDisp - proc update(fd: TAsyncFD, events: set[Event]) = - let p = getGlobalDispatcher() - assert fd.SocketHandle in p.selector - discard p.selector.update(fd.SocketHandle, events) + proc getIoHandler*(disp: PDispatcher): Selector[AsyncData] = + return disp.selector - proc register*(fd: TAsyncFD) = + proc register*(fd: AsyncFD) = let p = getGlobalDispatcher() - var data = PData(fd: fd, readCBs: @[], writeCBs: @[]) - p.selector.register(fd.SocketHandle, {}, data.RootRef) - - proc newAsyncRawSocket*(domain: cint, typ: cint, protocol: cint): TAsyncFD = - result = newRawSocket(domain, typ, protocol).TAsyncFD - result.SocketHandle.setBlocking(false) - register(result) - - proc newAsyncRawSocket*(domain: Domain = AF_INET, - typ: SockType = SOCK_STREAM, - protocol: Protocol = IPPROTO_TCP): TAsyncFD = - result = newRawSocket(domain, typ, protocol).TAsyncFD - result.SocketHandle.setBlocking(false) - register(result) - - proc closeSocket*(sock: TAsyncFD) = - let disp = getGlobalDispatcher() - sock.SocketHandle.close() - disp.selector.unregister(sock.SocketHandle) + var data = newAsyncData() + p.selector.registerHandle(fd.SocketHandle, {}, data) - proc unregister*(fd: TAsyncFD) = + proc unregister*(fd: AsyncFD) = getGlobalDispatcher().selector.unregister(fd.SocketHandle) - proc addRead*(fd: TAsyncFD, cb: TCallback) = + proc unregister*(ev: AsyncEvent) = + getGlobalDispatcher().selector.unregister(SelectEvent(ev)) + + proc contains*(disp: PDispatcher, fd: AsyncFD): bool = + return fd.SocketHandle in disp.selector + + proc addRead*(fd: AsyncFD, cb: Callback) = let p = getGlobalDispatcher() - if fd.SocketHandle notin p.selector: + var newEvents = {Event.Read} + withData(p.selector, fd.SocketHandle, adata) do: + adata.readList.add(cb) + newEvents.incl(Event.Read) + if len(adata.writeList) != 0: newEvents.incl(Event.Write) + do: raise newException(ValueError, "File descriptor not registered.") - p.selector[fd.SocketHandle].data.PData.readCBs.add(cb) - update(fd, p.selector[fd.SocketHandle].events + {EvRead}) + p.selector.updateHandle(fd.SocketHandle, newEvents) - proc addWrite*(fd: TAsyncFD, cb: TCallback) = + proc addWrite*(fd: AsyncFD, cb: Callback) = let p = getGlobalDispatcher() - if fd.SocketHandle notin p.selector: + var newEvents = {Event.Write} + withData(p.selector, fd.SocketHandle, adata) do: + adata.writeList.add(cb) + newEvents.incl(Event.Write) + if len(adata.readList) != 0: newEvents.incl(Event.Read) + do: raise newException(ValueError, "File descriptor not registered.") - p.selector[fd.SocketHandle].data.PData.writeCBs.add(cb) - update(fd, p.selector[fd.SocketHandle].events + {EvWrite}) + p.selector.updateHandle(fd.SocketHandle, newEvents) - proc poll*(timeout = 500) = + proc hasPendingOperations*(): bool = let p = getGlobalDispatcher() - for info in p.selector.select(timeout): - let data = PData(info.key.data) - assert data.fd == info.key.fd.TAsyncFD - #echo("In poll ", data.fd.cint) - if EvError in info.events: - closeSocket(data.fd) - continue - - if EvRead in info.events: - # Callback may add items to ``data.readCBs`` which causes issues if - # we are iterating over ``data.readCBs`` at the same time. We therefore - # make a copy to iterate over. - let currentCBs = data.readCBs - data.readCBs = @[] - for cb in currentCBs: - if not cb(data.fd): - # Callback wants to be called again. - data.readCBs.add(cb) - - if EvWrite in info.events: - let currentCBs = data.writeCBs - data.writeCBs = @[] - for cb in currentCBs: - if not cb(data.fd): - # Callback wants to be called again. - data.writeCBs.add(cb) - - if info.key in p.selector: - var newEvents: set[Event] - if data.readCBs.len != 0: newEvents = {EvRead} - if data.writeCBs.len != 0: newEvents = newEvents + {EvWrite} - if newEvents != info.key.events: - update(data.fd, newEvents) + not p.selector.isEmpty() or p.timers.len != 0 or p.callbacks.len != 0 + + proc prependSeq(dest: var seq[Callback]; src: sink seq[Callback]) = + var old = move dest + dest = src + for i in 0..high(old): + dest.add(move old[i]) + + proc processBasicCallbacks( + fd: AsyncFD, event: Event + ): tuple[readCbListCount, writeCbListCount: int] = + # Process pending descriptor and AsyncEvent callbacks. + # + # Invoke every callback stored in `rwlist`, until one + # returns `false` (which means callback wants to stay + # alive). In such case all remaining callbacks will be added + # to `rwlist` again, in the order they have been inserted. + # + # `rwlist` associated with file descriptor MUST BE emptied before + # dispatching callback (See https://github.com/nim-lang/Nim/issues/5128), + # or it can be possible to fall into endless cycle. + var curList: seq[Callback] + + let selector = getGlobalDispatcher().selector + withData(selector, fd.int, fdData): + case event + of Event.Read: + #shallowCopy(curList, fdData.readList) + curList = move fdData.readList + fdData.readList = newSeqOfCap[Callback](InitCallbackListSize) + of Event.Write: + #shallowCopy(curList, fdData.writeList) + curList = move fdData.writeList + fdData.writeList = newSeqOfCap[Callback](InitCallbackListSize) else: - # FD no longer a part of the selector. Likely been closed - # (e.g. socket disconnected). - discard + assert false, "Cannot process callbacks for " & $event + + let newLength = max(len(curList), InitCallbackListSize) + var newList = newSeqOfCap[Callback](newLength) + + var eventsExtinguished = false + for cb in curList: + if eventsExtinguished: + newList.add(cb) + elif not cb(fd): + # Callback wants to be called again. + newList.add(cb) + # This callback has returned with EAGAIN, so we don't need to + # call any other callbacks as they are all waiting for the same event + # on the same fd. + # We do need to ensure they are called again though. + eventsExtinguished = true + + withData(selector, fd.int, fdData) do: + # Descriptor is still present in the queue. + case event + of Event.Read: prependSeq(fdData.readList, newList) + of Event.Write: prependSeq(fdData.writeList, newList) + else: + assert false, "Cannot process callbacks for " & $event + + result.readCbListCount = len(fdData.readList) + result.writeCbListCount = len(fdData.writeList) + do: + # Descriptor was unregistered in callback via `unregister()`. + result.readCbListCount = -1 + result.writeCbListCount = -1 + + proc processCustomCallbacks(p: PDispatcher; fd: AsyncFD) = + # Process pending custom event callbacks. Custom events are + # {Event.Timer, Event.Signal, Event.Process, Event.Vnode}. + # There can be only one callback registered with one descriptor, + # so there is no need to iterate over list. + var curList: seq[Callback] + + withData(p.selector, fd.int, adata) do: + curList = move adata.readList + adata.readList = newSeqOfCap[Callback](InitCallbackListSize) + + let newLength = len(curList) + var newList = newSeqOfCap[Callback](newLength) + + var cb = curList[0] + if not cb(fd): + newList.add(cb) + + withData(p.selector, fd.int, adata) do: + # descriptor still present in queue. + adata.readList = newList & adata.readList + if len(adata.readList) == 0: + # if no callbacks registered with descriptor, unregister it. + p.selector.unregister(fd.int) + do: + # descriptor was unregistered in callback via `unregister()`. + discard + + implementSetInheritable() + + proc closeSocket*(sock: AsyncFD) = + let selector = getGlobalDispatcher().selector + if sock.SocketHandle notin selector: + raise newException(ValueError, "File descriptor not registered.") - processTimers(p) + let data = selector.getData(sock.SocketHandle) + sock.unregister() + sock.SocketHandle.close() + # We need to unblock the read and write callbacks which could still be + # waiting for the socket to become readable and/or writeable. + for cb in data.readList & data.writeList: + if not cb(sock): + raise newException( + ValueError, "Expecting async operations to stop when fd has closed." + ) + + proc runOnce(timeout: int): bool = + let p = getGlobalDispatcher() + if p.selector.isEmpty() and p.timers.len == 0 and p.callbacks.len == 0: + when defined(genode): + if timeout == 0: return + raise newException(ValueError, + "No handles or timers registered in dispatcher.") - proc connect*(socket: TAsyncFD, address: string, port: Port, - af = AF_INET): Future[void] = - var retFuture = newFuture[void]("connect") + result = false + var keys: array[64, ReadyKey] + let nextTimer = processTimers(p, result) + var count = + p.selector.selectInto(adjustTimeout(p, timeout, nextTimer), keys) + for i in 0..<count: + let fd = keys[i].fd.AsyncFD + let events = keys[i].events + var (readCbListCount, writeCbListCount) = (0, 0) + + if Event.Read in events or events == {Event.Error}: + (readCbListCount, writeCbListCount) = + processBasicCallbacks(fd, Event.Read) + result = true + + if Event.Write in events or events == {Event.Error}: + (readCbListCount, writeCbListCount) = + processBasicCallbacks(fd, Event.Write) + result = true + + var isCustomEvent = false + if Event.User in events: + (readCbListCount, writeCbListCount) = + processBasicCallbacks(fd, Event.Read) + isCustomEvent = true + if readCbListCount == 0: + p.selector.unregister(fd.int) + result = true + + when ioselSupportedPlatform: + const customSet = {Event.Timer, Event.Signal, Event.Process, + Event.Vnode} + if (customSet * events) != {}: + isCustomEvent = true + processCustomCallbacks(p, fd) + result = true - proc cb(fd: TAsyncFD): bool = - # We have connected. - retFuture.complete() - return true - - var aiList = getAddrInfo(address, port, af) - var success = false - var lastError: OSErrorCode - var it = aiList - while it != nil: - var ret = connect(socket.SocketHandle, it.ai_addr, it.ai_addrlen.Socklen) - if ret == 0: - # Request to connect completed immediately. - success = true - retFuture.complete() - break - else: - lastError = osLastError() - if lastError.int32 == EINTR or lastError.int32 == EINPROGRESS: - success = true - addWrite(socket, cb) - break - else: - success = false - it = it.ai_next + # because state `data` can be modified in callback we need to update + # descriptor events with currently registered callbacks. + if not isCustomEvent and (readCbListCount != -1 and writeCbListCount != -1): + var newEvents: set[Event] = {} + if readCbListCount > 0: incl(newEvents, Event.Read) + if writeCbListCount > 0: incl(newEvents, Event.Write) + p.selector.updateHandle(SocketHandle(fd), newEvents) - dealloc(aiList) - if not success: - retFuture.fail(newException(OSError, osErrorMsg(lastError))) - return retFuture + # Timer processing. + discard processTimers(p, result) + # Callback queue processing + processPendingCallbacks(p, result) - proc recv*(socket: TAsyncFD, size: int, - flags = {SocketFlag.SafeDisconn}): Future[string] = + proc recv*(socket: AsyncFD, size: int, + flags = {SocketFlag.SafeDisconn}): owned(Future[string]) = var retFuture = newFuture[string]("recv") var readBuffer = newString(size) - proc cb(sock: TAsyncFD): bool = + proc cb(sock: AsyncFD): bool = result = true let res = recv(sock.SocketHandle, addr readBuffer[0], size.cint, flags.toOSFlags()) - #echo("recv cb res: ", res) if res < 0: let lastError = osLastError() - if lastError.int32 notin {EINTR, EWOULDBLOCK, EAGAIN}: + if lastError.int32 != EINTR and lastError.int32 != EWOULDBLOCK and + lastError.int32 != EAGAIN: if flags.isDisconnectionError(lastError): retFuture.complete("") else: - retFuture.fail(newException(OSError, osErrorMsg(lastError))) + retFuture.fail(newOSError(lastError)) else: result = false # We still want this callback to be called. elif res == 0: @@ -980,25 +1482,52 @@ else: addRead(socket, cb) return retFuture - proc send*(socket: TAsyncFD, data: string, - flags = {SocketFlag.SafeDisconn}): Future[void] = + proc recvInto*(socket: AsyncFD, buf: pointer, size: int, + flags = {SocketFlag.SafeDisconn}): owned(Future[int]) = + var retFuture = newFuture[int]("recvInto") + + proc cb(sock: AsyncFD): bool = + result = true + let res = recv(sock.SocketHandle, buf, size.cint, + flags.toOSFlags()) + if res < 0: + let lastError = osLastError() + if lastError.int32 != EINTR and lastError.int32 != EWOULDBLOCK and + lastError.int32 != EAGAIN: + if flags.isDisconnectionError(lastError): + retFuture.complete(0) + else: + retFuture.fail(newOSError(lastError)) + else: + result = false # We still want this callback to be called. + else: + retFuture.complete(res) + # TODO: The following causes a massive slowdown. + #if not cb(socket): + addRead(socket, cb) + return retFuture + + proc send*(socket: AsyncFD, buf: pointer, size: int, + flags = {SocketFlag.SafeDisconn}): owned(Future[void]) = var retFuture = newFuture[void]("send") var written = 0 - proc cb(sock: TAsyncFD): bool = + proc cb(sock: AsyncFD): bool = result = true - let netSize = data.len-written - var d = data.cstring + let netSize = size-written + var d = cast[cstring](buf) let res = send(sock.SocketHandle, addr d[written], netSize.cint, MSG_NOSIGNAL) if res < 0: let lastError = osLastError() - if lastError.int32 notin {EINTR, EWOULDBLOCK, EAGAIN}: + if lastError.int32 != EINTR and + lastError.int32 != EWOULDBLOCK and + lastError.int32 != EAGAIN: if flags.isDisconnectionError(lastError): retFuture.complete() else: - retFuture.fail(newException(OSError, osErrorMsg(lastError))) + retFuture.fail(newOSError(lastError)) else: result = false # We still want this callback to be called. else: @@ -1012,49 +1541,429 @@ else: addWrite(socket, cb) return retFuture - proc acceptAddr*(socket: TAsyncFD, flags = {SocketFlag.SafeDisconn}): - Future[tuple[address: string, client: TAsyncFD]] = + proc sendTo*(socket: AsyncFD, data: pointer, size: int, saddr: ptr SockAddr, + saddrLen: SockLen, + flags = {SocketFlag.SafeDisconn}): owned(Future[void]) = + ## Sends `data` of size `size` in bytes to specified destination + ## (`saddr` of size `saddrLen` in bytes, using socket `socket`. + ## The returned future will complete once all data has been sent. + var retFuture = newFuture[void]("sendTo") + + # we will preserve address in our stack + var staddr: array[128, char] # SOCKADDR_STORAGE size is 128 bytes + var stalen = saddrLen + zeroMem(addr(staddr[0]), 128) + copyMem(addr(staddr[0]), saddr, saddrLen) + + proc cb(sock: AsyncFD): bool = + result = true + let res = sendto(sock.SocketHandle, data, size, MSG_NOSIGNAL, + cast[ptr SockAddr](addr(staddr[0])), stalen) + if res < 0: + let lastError = osLastError() + if lastError.int32 != EINTR and lastError.int32 != EWOULDBLOCK and + lastError.int32 != EAGAIN: + retFuture.fail(newOSError(lastError)) + else: + result = false # We still want this callback to be called. + else: + retFuture.complete() + + addWrite(socket, cb) + return retFuture + + proc recvFromInto*(socket: AsyncFD, data: pointer, size: int, + saddr: ptr SockAddr, saddrLen: ptr SockLen, + flags = {SocketFlag.SafeDisconn}): owned(Future[int]) = + ## Receives a datagram data from `socket` into `data`, which must + ## be at least of size `size` in bytes, address of datagram's sender + ## will be stored into `saddr` and `saddrLen`. Returned future will + ## complete once one datagram has been received, and will return size + ## of packet received. + var retFuture = newFuture[int]("recvFromInto") + proc cb(sock: AsyncFD): bool = + result = true + let res = recvfrom(sock.SocketHandle, data, size.cint, flags.toOSFlags(), + saddr, saddrLen) + if res < 0: + let lastError = osLastError() + if lastError.int32 != EINTR and lastError.int32 != EWOULDBLOCK and + lastError.int32 != EAGAIN: + retFuture.fail(newOSError(lastError)) + else: + result = false + else: + retFuture.complete(res) + addRead(socket, cb) + return retFuture + + proc acceptAddr*(socket: AsyncFD, flags = {SocketFlag.SafeDisconn}, + inheritable = defined(nimInheritHandles)): + owned(Future[tuple[address: string, client: AsyncFD]]) = var retFuture = newFuture[tuple[address: string, - client: TAsyncFD]]("acceptAddr") - proc cb(sock: TAsyncFD): bool = + client: AsyncFD]]("acceptAddr") + proc cb(sock: AsyncFD): bool {.gcsafe.} = result = true - var sockAddress: SockAddr_in - var addrLen = sizeof(sockAddress).Socklen - var client = accept(sock.SocketHandle, - cast[ptr SockAddr](addr(sockAddress)), addr(addrLen)) + var sockAddress: Sockaddr_storage + var addrLen = sizeof(sockAddress).SockLen + var client = + when declared(accept4): + accept4(sock.SocketHandle, cast[ptr SockAddr](addr(sockAddress)), + addr(addrLen), if inheritable: 0 else: SOCK_CLOEXEC) + else: + accept(sock.SocketHandle, cast[ptr SockAddr](addr(sockAddress)), + addr(addrLen)) + when declared(setInheritable) and not declared(accept4): + if client != osInvalidSocket and not setInheritable(client, inheritable): + # Set failure first because close() itself can fail, + # altering osLastError(). + retFuture.fail(newOSError(osLastError())) + close client + return false + if client == osInvalidSocket: let lastError = osLastError() - assert lastError.int32 notin {EWOULDBLOCK, EAGAIN} + assert lastError.int32 != EWOULDBLOCK and lastError.int32 != EAGAIN if lastError.int32 == EINTR: return false else: if flags.isDisconnectionError(lastError): return false else: - retFuture.fail(newException(OSError, osErrorMsg(lastError))) + retFuture.fail(newOSError(lastError)) else: - register(client.TAsyncFD) - retFuture.complete(($inet_ntoa(sockAddress.sin_addr), client.TAsyncFD)) + try: + let address = getAddrString(cast[ptr SockAddr](addr sockAddress)) + register(client.AsyncFD) + retFuture.complete((address, client.AsyncFD)) + except: + # getAddrString may raise + client.close() + retFuture.fail(getCurrentException()) addRead(socket, cb) return retFuture -proc sleepAsync*(ms: int): Future[void] = + when ioselSupportedPlatform: + + proc addTimer*(timeout: int, oneshot: bool, cb: Callback) = + ## Start watching for timeout expiration, and then call the + ## callback `cb`. + ## `timeout` - time in milliseconds, + ## `oneshot` - if `true` only one event will be dispatched, + ## if `false` continuous events every `timeout` milliseconds. + let p = getGlobalDispatcher() + var data = newAsyncData() + data.readList.add(cb) + p.selector.registerTimer(timeout, oneshot, data) + + proc addSignal*(signal: int, cb: Callback) = + ## Start watching signal `signal`, and when signal appears, call the + ## callback `cb`. + let p = getGlobalDispatcher() + var data = newAsyncData() + data.readList.add(cb) + p.selector.registerSignal(signal, data) + + proc addProcess*(pid: int, cb: Callback) = + ## Start watching for process exit with pid `pid`, and then call + ## the callback `cb`. + let p = getGlobalDispatcher() + var data = newAsyncData() + data.readList.add(cb) + p.selector.registerProcess(pid, data) + + proc newAsyncEvent*(): AsyncEvent = + ## Creates new `AsyncEvent`. + result = AsyncEvent(newSelectEvent()) + + proc trigger*(ev: AsyncEvent) = + ## Sets new `AsyncEvent` to signaled state. + trigger(SelectEvent(ev)) + + proc close*(ev: AsyncEvent) = + ## Closes `AsyncEvent` + close(SelectEvent(ev)) + + proc addEvent*(ev: AsyncEvent, cb: Callback) = + ## Start watching for event `ev`, and call callback `cb`, when + ## ev will be set to signaled state. + let p = getGlobalDispatcher() + var data = newAsyncData() + data.readList.add(cb) + p.selector.registerEvent(SelectEvent(ev), data) + +proc drain*(timeout = 500) = + ## Waits for completion of **all** events and processes them. Raises `ValueError` + ## if there are no pending operations. In contrast to `poll` this + ## processes as many events as are available until the timeout has elapsed. + var curTimeout = timeout + let start = now() + while hasPendingOperations(): + discard runOnce(curTimeout) + curTimeout -= (now() - start).inMilliseconds.int + if curTimeout < 0: + break + +proc poll*(timeout = 500) = + ## Waits for completion events and processes them. Raises `ValueError` + ## if there are no pending operations. This runs the underlying OS + ## `epoll`:idx: or `kqueue`:idx: primitive only once. + discard runOnce(timeout) + +template createAsyncNativeSocketImpl(domain, sockType, protocol: untyped, + inheritable = defined(nimInheritHandles)) = + let handle = createNativeSocket(domain, sockType, protocol, inheritable) + if handle == osInvalidSocket: + return osInvalidSocket.AsyncFD + handle.setBlocking(false) + when defined(macosx) and not defined(nimdoc): + handle.setSockOptInt(SOL_SOCKET, SO_NOSIGPIPE, 1) + result = handle.AsyncFD + register(result) + +proc createAsyncNativeSocket*(domain: cint, sockType: cint, + protocol: cint, + inheritable = defined(nimInheritHandles)): AsyncFD = + createAsyncNativeSocketImpl(domain, sockType, protocol, inheritable) + +proc createAsyncNativeSocket*(domain: Domain = Domain.AF_INET, + sockType: SockType = SOCK_STREAM, + protocol: Protocol = IPPROTO_TCP, + inheritable = defined(nimInheritHandles)): AsyncFD = + createAsyncNativeSocketImpl(domain, sockType, protocol, inheritable) + +when defined(windows) or defined(nimdoc): + proc bindToDomain(handle: SocketHandle, domain: Domain) = + # Extracted into a separate proc, because connect() on Windows requires + # the socket to be initially bound. + template doBind(saddr) = + if bindAddr(handle, cast[ptr SockAddr](addr(saddr)), + sizeof(saddr).SockLen) < 0'i32: + raiseOSError(osLastError()) + + if domain == Domain.AF_INET6: + var saddr: Sockaddr_in6 + saddr.sin6_family = uint16(toInt(domain)) + doBind(saddr) + else: + var saddr: Sockaddr_in + saddr.sin_family = uint16(toInt(domain)) + doBind(saddr) + + proc doConnect(socket: AsyncFD, addrInfo: ptr AddrInfo): owned(Future[void]) = + let retFuture = newFuture[void]("doConnect") + result = retFuture + + var ol = newCustom() + ol.data = CompletionData(fd: socket, cb: + proc (fd: AsyncFD, bytesCount: DWORD, errcode: OSErrorCode) = + if not retFuture.finished: + if errcode == OSErrorCode(-1): + const SO_UPDATE_CONNECT_CONTEXT = 0x7010 + socket.SocketHandle.setSockOptInt(SOL_SOCKET, SO_UPDATE_CONNECT_CONTEXT, 1) # 15022 + retFuture.complete() + else: + retFuture.fail(newOSError(errcode)) + ) + + let ret = connectEx(socket.SocketHandle, addrInfo.ai_addr, + cint(addrInfo.ai_addrlen), nil, 0, nil, + cast[POVERLAPPED](ol)) + if ret: + # Request to connect completed immediately. + retFuture.complete() + # We don't deallocate `ol` here because even though this completed + # immediately poll will still be notified about its completion and it + # will free `ol`. + else: + let lastError = osLastError() + if lastError.int32 != ERROR_IO_PENDING: + # With ERROR_IO_PENDING `ol` will be deallocated in `poll`, + # and the future will be completed/failed there, too. + GC_unref(ol) + retFuture.fail(newOSError(lastError)) +else: + proc doConnect(socket: AsyncFD, addrInfo: ptr AddrInfo): owned(Future[void]) = + let retFuture = newFuture[void]("doConnect") + result = retFuture + + proc cb(fd: AsyncFD): bool = + let ret = SocketHandle(fd).getSockOptInt( + cint(SOL_SOCKET), cint(SO_ERROR)) + if ret == 0: + # We have connected. + retFuture.complete() + return true + elif ret == EINTR: + # interrupted, keep waiting + return false + else: + retFuture.fail(newOSError(OSErrorCode(ret))) + return true + + let ret = connect(socket.SocketHandle, + addrInfo.ai_addr, + addrInfo.ai_addrlen.SockLen) + if ret == 0: + # Request to connect completed immediately. + retFuture.complete() + else: + let lastError = osLastError() + if lastError.int32 == EINTR or lastError.int32 == EINPROGRESS: + addWrite(socket, cb) + else: + retFuture.fail(newOSError(lastError)) + +template asyncAddrInfoLoop(addrInfo: ptr AddrInfo, fd: untyped, + protocol: Protocol = IPPROTO_RAW) = + ## Iterates through the AddrInfo linked list asynchronously + ## until the connection can be established. + const shouldCreateFd = not declared(fd) + + when shouldCreateFd: + let sockType = protocol.toSockType() + + var fdPerDomain: array[low(Domain).ord..high(Domain).ord, AsyncFD] + for i in low(fdPerDomain)..high(fdPerDomain): + fdPerDomain[i] = osInvalidSocket.AsyncFD + template closeUnusedFds(domainToKeep = -1) {.dirty.} = + for i, fd in fdPerDomain: + if fd != osInvalidSocket.AsyncFD and i != domainToKeep: + fd.closeSocket() + + var lastException: ref Exception + var curAddrInfo = addrInfo + var domain: Domain + when shouldCreateFd: + var curFd: AsyncFD + else: + var curFd = fd + proc tryNextAddrInfo(fut: Future[void]) {.gcsafe.} = + if fut == nil or fut.failed: + if fut != nil: + lastException = fut.readError() + + while curAddrInfo != nil: + let domainOpt = curAddrInfo.ai_family.toKnownDomain() + if domainOpt.isSome: + domain = domainOpt.unsafeGet() + break + curAddrInfo = curAddrInfo.ai_next + + if curAddrInfo == nil: + freeAddrInfo(addrInfo) + when shouldCreateFd: + closeUnusedFds() + if lastException != nil: + retFuture.fail(lastException) + else: + retFuture.fail(newException( + IOError, "Couldn't resolve address: " & address)) + return + + when shouldCreateFd: + curFd = fdPerDomain[ord(domain)] + if curFd == osInvalidSocket.AsyncFD: + try: + curFd = createAsyncNativeSocket(domain, sockType, protocol) + except: + freeAddrInfo(addrInfo) + closeUnusedFds() + raise getCurrentException() + when defined(windows): + curFd.SocketHandle.bindToDomain(domain) + fdPerDomain[ord(domain)] = curFd + + doConnect(curFd, curAddrInfo).callback = tryNextAddrInfo + curAddrInfo = curAddrInfo.ai_next + else: + freeAddrInfo(addrInfo) + when shouldCreateFd: + closeUnusedFds(ord(domain)) + retFuture.complete(curFd) + else: + retFuture.complete() + + tryNextAddrInfo(nil) + +proc dial*(address: string, port: Port, + protocol: Protocol = IPPROTO_TCP): owned(Future[AsyncFD]) = + ## Establishes connection to the specified `address`:`port` pair via the + ## specified protocol. The procedure iterates through possible + ## resolutions of the `address` until it succeeds, meaning that it + ## seamlessly works with both IPv4 and IPv6. + ## Returns the async file descriptor, registered in the dispatcher of + ## the current thread, ready to send or receive data. + let retFuture = newFuture[AsyncFD]("dial") + result = retFuture + let sockType = protocol.toSockType() + + let aiList = getAddrInfo(address, port, Domain.AF_UNSPEC, sockType, protocol) + asyncAddrInfoLoop(aiList, noFD, protocol) + +proc connect*(socket: AsyncFD, address: string, port: Port, + domain = Domain.AF_INET): owned(Future[void]) = + let retFuture = newFuture[void]("connect") + result = retFuture + + when defined(windows): + verifyPresence(socket) + else: + assert getSockDomain(socket.SocketHandle) == domain + + let aiList = getAddrInfo(address, port, domain) + when defined(windows): + socket.SocketHandle.bindToDomain(domain) + asyncAddrInfoLoop(aiList, socket) + +proc sleepAsync*(ms: int | float): owned(Future[void]) = ## Suspends the execution of the current async procedure for the next - ## ``ms`` miliseconds. + ## `ms` milliseconds. var retFuture = newFuture[void]("sleepAsync") let p = getGlobalDispatcher() - p.timers.add((epochTime() + (ms / 1000), retFuture)) + when ms is int: + p.timers.push((getMonoTime() + initDuration(milliseconds = ms), retFuture)) + elif ms is float: + let ns = (ms * 1_000_000).int64 + p.timers.push((getMonoTime() + initDuration(nanoseconds = ns), retFuture)) + return retFuture + +proc withTimeout*[T](fut: Future[T], timeout: int): owned(Future[bool]) = + ## Returns a future which will complete once `fut` completes or after + ## `timeout` milliseconds has elapsed. + ## + ## If `fut` completes first the returned future will hold true, + ## otherwise, if `timeout` milliseconds has elapsed first, the returned + ## future will hold false. + + var retFuture = newFuture[bool]("asyncdispatch.`withTimeout`") + var timeoutFuture = sleepAsync(timeout) + fut.callback = + proc () = + if not retFuture.finished: + if fut.failed: + retFuture.fail(fut.error) + else: + retFuture.complete(true) + timeoutFuture.callback = + proc () = + if not retFuture.finished: retFuture.complete(false) return retFuture -proc accept*(socket: TAsyncFD, - flags = {SocketFlag.SafeDisconn}): Future[TAsyncFD] = +proc accept*(socket: AsyncFD, + flags = {SocketFlag.SafeDisconn}, + inheritable = defined(nimInheritHandles)): owned(Future[AsyncFD]) = ## Accepts a new connection. Returns a future containing the client socket ## corresponding to that connection. + ## + ## If `inheritable` is false (the default), the resulting client socket + ## will not be inheritable by child processes. + ## ## The future will complete when the connection is successfully accepted. - var retFut = newFuture[TAsyncFD]("accept") - var fut = acceptAddr(socket, flags) + var retFut = newFuture[AsyncFD]("accept") + var fut = acceptAddr(socket, flags, inheritable) fut.callback = - proc (future: Future[tuple[address: string, client: TAsyncFD]]) = + proc (future: Future[tuple[address: string, client: AsyncFD]]) = assert future.finished if future.failed: retFut.fail(future.error) @@ -1062,363 +1971,45 @@ proc accept*(socket: TAsyncFD, retFut.complete(future.read.client) return retFut -# -- Await Macro - -proc skipUntilStmtList(node: NimNode): NimNode {.compileTime.} = - # Skips a nest of StmtList's. - result = node - if node[0].kind == nnkStmtList: - result = skipUntilStmtList(node[0]) - -proc skipStmtList(node: NimNode): NimNode {.compileTime.} = - result = node - if node[0].kind == nnkStmtList: - result = node[0] - -template createCb(retFutureSym, iteratorNameSym, - name: expr): stmt {.immediate.} = - var nameIterVar = iteratorNameSym - #{.push stackTrace: off.} - proc cb {.closure,gcsafe.} = - try: - if not nameIterVar.finished: - var next = nameIterVar() - if next == nil: - assert retFutureSym.finished, "Async procedure's (" & - name & ") return Future was not finished." - else: - next.callback = cb - except: - if retFutureSym.finished: - # Take a look at tasyncexceptions for the bug which this fixes. - # That test explains it better than I can here. - raise - else: - retFutureSym.fail(getCurrentException()) - cb() - #{.pop.} -proc generateExceptionCheck(futSym, - tryStmt, rootReceiver, fromNode: NimNode): NimNode {.compileTime.} = - if tryStmt.kind == nnkNilLit: - result = rootReceiver - else: - var exceptionChecks: seq[tuple[cond, body: NimNode]] = @[] - let errorNode = newDotExpr(futSym, newIdentNode("error")) - for i in 1 .. <tryStmt.len: - let exceptBranch = tryStmt[i] - if exceptBranch[0].kind == nnkStmtList: - exceptionChecks.add((newIdentNode("true"), exceptBranch[0])) - else: - var exceptIdentCount = 0 - var ifCond: NimNode - for i in 0 .. <exceptBranch.len: - let child = exceptBranch[i] - if child.kind == nnkIdent: - let cond = infix(errorNode, "of", child) - if exceptIdentCount == 0: - ifCond = cond - else: - ifCond = infix(ifCond, "or", cond) - else: - break - exceptIdentCount.inc - - expectKind(exceptBranch[exceptIdentCount], nnkStmtList) - exceptionChecks.add((ifCond, exceptBranch[exceptIdentCount])) - # -> -> else: raise futSym.error - exceptionChecks.add((newIdentNode("true"), - newNimNode(nnkRaiseStmt).add(errorNode))) - # Read the future if there is no error. - # -> else: futSym.read - let elseNode = newNimNode(nnkElse, fromNode) - elseNode.add newNimNode(nnkStmtList, fromNode) - elseNode[0].add rootReceiver - - let ifBody = newStmtList() - ifBody.add newCall(newIdentNode("setCurrentException"), errorNode) - ifBody.add newIfStmt(exceptionChecks) - ifBody.add newCall(newIdentNode("setCurrentException"), newNilLit()) - - result = newIfStmt( - (newDotExpr(futSym, newIdentNode("failed")), ifBody) - ) - result.add elseNode - -template createVar(result: var NimNode, futSymName: string, - asyncProc: NimNode, - valueReceiver, rootReceiver: expr, - fromNode: NimNode) = - result = newNimNode(nnkStmtList, fromNode) - var futSym = genSym(nskVar, "future") - result.add newVarStmt(futSym, asyncProc) # -> var future<x> = y - result.add newNimNode(nnkYieldStmt, fromNode).add(futSym) # -> yield future<x> - valueReceiver = newDotExpr(futSym, newIdentNode("read")) # -> future<x>.read - result.add generateExceptionCheck(futSym, tryStmt, rootReceiver, fromNode) - -proc processBody(node, retFutureSym: NimNode, - subTypeIsVoid: bool, - tryStmt: NimNode): NimNode {.compileTime.} = - #echo(node.treeRepr) - result = node - case node.kind - of nnkReturnStmt: - result = newNimNode(nnkStmtList, node) - if node[0].kind == nnkEmpty: - if not subTypeIsVoid: - result.add newCall(newIdentNode("complete"), retFutureSym, - newIdentNode("result")) - else: - result.add newCall(newIdentNode("complete"), retFutureSym) - else: - result.add newCall(newIdentNode("complete"), retFutureSym, - node[0].processBody(retFutureSym, subTypeIsVoid, tryStmt)) - - result.add newNimNode(nnkReturnStmt, node).add(newNilLit()) - return # Don't process the children of this return stmt - of nnkCommand, nnkCall: - if node[0].kind == nnkIdent and node[0].ident == !"await": - case node[1].kind - of nnkIdent, nnkInfix: - # await x - result = newNimNode(nnkYieldStmt, node).add(node[1]) # -> yield x - of nnkCall, nnkCommand: - # await foo(p, x) - var futureValue: NimNode - result.createVar("future" & $node[1][0].toStrLit, node[1], futureValue, - futureValue, node) - else: - error("Invalid node kind in 'await', got: " & $node[1].kind) - elif node.len > 1 and node[1].kind == nnkCommand and - node[1][0].kind == nnkIdent and node[1][0].ident == !"await": - # foo await x - var newCommand = node - result.createVar("future" & $node[0].toStrLit, node[1][1], newCommand[1], - newCommand, node) - - of nnkVarSection, nnkLetSection: - case node[0][2].kind - of nnkCommand: - if node[0][2][0].kind == nnkIdent and node[0][2][0].ident == !"await": - # var x = await y - var newVarSection = node # TODO: Should this use copyNimNode? - result.createVar("future" & $node[0][0].ident, node[0][2][1], - newVarSection[0][2], newVarSection, node) - else: discard - of nnkAsgn: - case node[1].kind - of nnkCommand: - if node[1][0].ident == !"await": - # x = await y - var newAsgn = node - result.createVar("future" & $node[0].toStrLit, node[1][1], newAsgn[1], newAsgn, node) - else: discard - of nnkDiscardStmt: - # discard await x - if node[0].kind != nnkEmpty and node[0][0].kind == nnkIdent and - node[0][0].ident == !"await": - var newDiscard = node - result.createVar("futureDiscard_" & $toStrLit(node[0][1]), node[0][1], - newDiscard[0], newDiscard, node) - of nnkTryStmt: - # try: await x; except: ... - result = newNimNode(nnkStmtList, node) - template wrapInTry(n, tryBody: expr) = - var temp = n - n[0] = tryBody - tryBody = temp - - # Transform ``except`` body. - # TODO: Could we perform some ``await`` transformation here to get it - # working in ``except``? - tryBody[1] = processBody(n[1], retFutureSym, subTypeIsVoid, nil) - - proc processForTry(n: NimNode, i: var int, - res: NimNode): bool {.compileTime.} = - ## Transforms the body of the tryStmt. Does not transform the - ## body in ``except``. - ## Returns true if the tryStmt node was transformed into an ifStmt. - result = false - var skipped = n.skipStmtList() - while i < skipped.len: - var processed = processBody(skipped[i], retFutureSym, - subTypeIsVoid, n) - - # Check if we transformed the node into an exception check. - # This suggests skipped[i] contains ``await``. - if processed.kind != skipped[i].kind or processed.len != skipped[i].len: - processed = processed.skipUntilStmtList() - expectKind(processed, nnkStmtList) - expectKind(processed[2][1], nnkElse) - i.inc - - if not processForTry(n, i, processed[2][1][0]): - # We need to wrap the nnkElse nodes back into a tryStmt. - # As they are executed if an exception does not happen - # inside the awaited future. - # The following code will wrap the nodes inside the - # original tryStmt. - wrapInTry(n, processed[2][1][0]) - - res.add processed - result = true +proc keepAlive(x: string) = + discard "mark 'x' as escaping so that it is put into a closure for us to keep the data alive" + +proc send*(socket: AsyncFD, data: string, + flags = {SocketFlag.SafeDisconn}): owned(Future[void]) = + ## Sends `data` to `socket`. The returned future will complete once all + ## data has been sent. + var retFuture = newFuture[void]("send") + if data.len > 0: + let sendFut = socket.send(unsafeAddr data[0], data.len, flags) + sendFut.callback = + proc () = + keepAlive(data) + if sendFut.failed: + retFuture.fail(sendFut.error) else: - res.add skipped[i] - i.inc - var i = 0 - if not processForTry(node, i, result): - # If the tryStmt hasn't been transformed we can just put the body - # back into it. - wrapInTry(node, result) - return - else: discard - - for i in 0 .. <result.len: - result[i] = processBody(result[i], retFutureSym, subTypeIsVoid, nil) - -proc getName(node: NimNode): string {.compileTime.} = - case node.kind - of nnkPostfix: - return $node[1].ident - of nnkIdent: - return $node.ident - of nnkEmpty: - return "anonymous" - else: - error("Unknown name.") - -macro async*(prc: stmt): stmt {.immediate.} = - ## Macro which processes async procedures into the appropriate - ## iterators and yield statements. - if prc.kind notin {nnkProcDef, nnkLambda}: - error("Cannot transform this node kind into an async proc." & - " Proc definition or lambda node expected.") - - hint("Processing " & prc[0].getName & " as an async proc.") - - let returnType = prc[3][0] - # Verify that the return type is a Future[T] - if returnType.kind == nnkIdent: - error("Expected return type of 'Future' got '" & $returnType & "'") - elif returnType.kind == nnkBracketExpr: - if $returnType[0] != "Future": - error("Expected return type of 'Future' got '" & $returnType[0] & "'") - - let subtypeIsVoid = returnType.kind == nnkEmpty or - (returnType.kind == nnkBracketExpr and - returnType[1].kind == nnkIdent and returnType[1].ident == !"void") - - var outerProcBody = newNimNode(nnkStmtList, prc[6]) - - # -> var retFuture = newFuture[T]() - var retFutureSym = genSym(nskVar, "retFuture") - var subRetType = - if returnType.kind == nnkEmpty: newIdentNode("void") - else: returnType[1] - outerProcBody.add( - newVarStmt(retFutureSym, - newCall( - newNimNode(nnkBracketExpr, prc[6]).add( - newIdentNode(!"newFuture"), # TODO: Strange bug here? Remove the `!`. - subRetType), - newLit(prc[0].getName)))) # Get type from return type of this proc - - # -> iterator nameIter(): FutureBase {.closure.} = - # -> {.push warning[resultshadowed]: off.} - # -> var result: T - # -> {.pop.} - # -> <proc_body> - # -> complete(retFuture, result) - var iteratorNameSym = genSym(nskIterator, $prc[0].getName & "Iter") - var procBody = prc[6].processBody(retFutureSym, subtypeIsVoid, nil) - if not subtypeIsVoid: - procBody.insert(0, newNimNode(nnkPragma).add(newIdentNode("push"), - newNimNode(nnkExprColonExpr).add(newNimNode(nnkBracketExpr).add( - newIdentNode("warning"), newIdentNode("resultshadowed")), - newIdentNode("off")))) # -> {.push warning[resultshadowed]: off.} - - procBody.insert(1, newNimNode(nnkVarSection, prc[6]).add( - newIdentDefs(newIdentNode("result"), returnType[1]))) # -> var result: T - - procBody.insert(2, newNimNode(nnkPragma).add( - newIdentNode("pop"))) # -> {.pop.}) - - procBody.add( - newCall(newIdentNode("complete"), - retFutureSym, newIdentNode("result"))) # -> complete(retFuture, result) + retFuture.complete() else: - # -> complete(retFuture) - procBody.add(newCall(newIdentNode("complete"), retFutureSym)) - - var closureIterator = newProc(iteratorNameSym, [newIdentNode("FutureBase")], - procBody, nnkIteratorDef) - closureIterator[4] = newNimNode(nnkPragma, prc[6]).add(newIdentNode("closure")) - outerProcBody.add(closureIterator) - - # -> createCb(retFuture) - #var cbName = newIdentNode("cb") - var procCb = newCall(bindSym"createCb", retFutureSym, iteratorNameSym, - newStrLitNode(prc[0].getName)) - outerProcBody.add procCb - - # -> return retFuture - outerProcBody.add newNimNode(nnkReturnStmt, prc[6][prc[6].len-1]).add(retFutureSym) - - result = prc - - # Remove the 'async' pragma. - for i in 0 .. <result[4].len: - if result[4][i].kind == nnkIdent and result[4][i].ident == !"async": - result[4].del(i) - if subtypeIsVoid: - # Add discardable pragma. - if returnType.kind == nnkEmpty: - # Add Future[void] - result[3][0] = parseExpr("Future[void]") - - result[6] = outerProcBody - - #echo(treeRepr(result)) - #if prc[0].getName == "test": - # echo(toStrLit(result)) - -proc recvLine*(socket: TAsyncFD): Future[string] {.async.} = - ## Reads a line of data from ``socket``. Returned future will complete once - ## a full line is read or an error occurs. - ## - ## If a full line is read ``\r\L`` is not - ## added to ``line``, however if solely ``\r\L`` is read then ``line`` - ## will be set to it. - ## - ## If the socket is disconnected, ``line`` will be set to ``""``. - ## - ## If the socket is disconnected in the middle of a line (before ``\r\L`` - ## is read) then line will be set to ``""``. - ## The partial line **will be lost**. - ## - ## **Warning**: This assumes that lines are delimited by ``\r\L``. - ## - ## **Note**: This procedure is mostly used for testing. You likely want to - ## use ``asyncnet.recvLine`` instead. + retFuture.complete() + + return retFuture - template addNLIfEmpty(): stmt = - if result.len == 0: - result.add("\c\L") +# -- Await Macro +import std/asyncmacro +export asyncmacro +proc readAll*(future: FutureStream[string]): owned(Future[string]) {.async.} = + ## Returns a future that will complete when all the string data from the + ## specified future stream is retrieved. result = "" - var c = "" while true: - c = await recv(socket, 1) - if c.len == 0: - return "" - if c == "\r": - c = await recv(socket, 1) - assert c == "\l" - addNLIfEmpty() - return - elif c == "\L": - addNLIfEmpty() - return - add(result, c) + let (hasValue, value) = await future.read() + if hasValue: + result.add(value) + else: + break + +proc callSoon(cbproc: proc () {.gcsafe.}) = + getGlobalDispatcher().callbacks.addLast(cbproc) proc runForever*() = ## Begins a never ending global dispatcher poll loop. @@ -1431,3 +2022,44 @@ proc waitFor*[T](fut: Future[T]): T = poll() fut.read + +proc activeDescriptors*(): int {.inline.} = + ## Returns the current number of active file descriptors for the current + ## event loop. This is a cheap operation that does not involve a system call. + when defined(windows): + result = getGlobalDispatcher().handles.len + elif not defined(nimdoc): + result = getGlobalDispatcher().selector.count + +when defined(posix): + import std/posix + +when defined(linux) or defined(windows) or defined(macosx) or defined(bsd) or + defined(solaris) or defined(zephyr) or defined(freertos) or defined(nuttx) or defined(haiku): + proc maxDescriptors*(): int {.raises: OSError.} = + ## Returns the maximum number of active file descriptors for the current + ## process. This involves a system call. For now `maxDescriptors` is + ## supported on the following OSes: Windows, Linux, OSX, BSD, Solaris. + when defined(windows): + result = 16_700_000 + elif defined(zephyr) or defined(freertos): + result = FD_MAX + else: + var fdLim: RLimit + if getrlimit(RLIMIT_NOFILE, fdLim) < 0: + raiseOSError(osLastError()) + result = int(fdLim.rlim_cur) - 1 + +when defined(genode): + proc scheduleCallbacks*(): bool {.discardable.} = + ## *Genode only.* + ## Schedule callback processing and return immediately. + ## Returns `false` if there is nothing to schedule. + ## RPC servers should call this to dispatch `callSoon` + ## bodies after retiring an RPC to its client. + ## This is effectively a non-blocking `poll(…)` and is + ## equivalent to scheduling a momentary no-op timeout + ## but faster and with less overhead. + let dis = getGlobalDispatcher() + result = dis.callbacks.len > 0 + if result: submit(dis.signalHandler.cap) diff --git a/lib/pure/asyncfile.nim b/lib/pure/asyncfile.nim index 25e121183..0f6504342 100644 --- a/lib/pure/asyncfile.nim +++ b/lib/pure/asyncfile.nim @@ -9,29 +9,37 @@ ## This module implements asynchronous file reading and writing. ## -## .. code-block:: Nim -## import asyncfile, asyncdispatch, os +## ```Nim +## import std/[asyncfile, asyncdispatch, os] ## -## proc main() {.async.} = -## var file = openAsync(getTempDir() / "foobar.txt", fmReadWrite) -## await file.write("test") -## file.setFilePos(0) -## let data = await file.readAll() -## doAssert data == "test" -## file.close() +## proc main() {.async.} = +## var file = openAsync(getTempDir() / "foobar.txt", fmReadWrite) +## await file.write("test") +## file.setFilePos(0) +## let data = await file.readAll() +## doAssert data == "test" +## file.close() ## -## waitFor main() +## waitFor main() +## ``` -import asyncdispatch, os +import std/[asyncdispatch, os] + +when defined(nimPreviewSlimSystem): + import std/[assertions, syncio] + when defined(windows) or defined(nimdoc): + import std/widestrs + +# TODO: Fix duplication introduced by PR #4683. when defined(windows) or defined(nimdoc): - import winlean + import std/winlean else: - import posix + import std/posix type AsyncFile* = ref object - fd: TAsyncFd + fd: AsyncFD offset: int64 when defined(windows) or defined(nimdoc): @@ -48,56 +56,60 @@ when defined(windows) or defined(nimdoc): case mode of fmRead, fmReadWriteExisting: OPEN_EXISTING - of fmAppend, fmReadWrite, fmWrite: - if fileExists(filename): - OPEN_EXISTING - else: - CREATE_NEW + of fmReadWrite, fmWrite: + CREATE_ALWAYS + of fmAppend: + OPEN_ALWAYS else: proc getPosixFlags(mode: FileMode): cint = case mode of fmRead: result = O_RDONLY of fmWrite: - result = O_WRONLY or O_CREAT + result = O_WRONLY or O_CREAT or O_TRUNC of fmAppend: result = O_WRONLY or O_CREAT or O_APPEND of fmReadWrite: - result = O_RDWR or O_CREAT + result = O_RDWR or O_CREAT or O_TRUNC of fmReadWriteExisting: result = O_RDWR result = result or O_NONBLOCK -proc getFileSize(f: AsyncFile): int64 = +proc getFileSize*(f: AsyncFile): int64 = ## Retrieves the specified file's size. when defined(windows) or defined(nimdoc): - var high: DWord - let low = getFileSize(f.fd.THandle, addr high) + var high: DWORD + let low = getFileSize(f.fd.Handle, addr high) if low == INVALID_FILE_SIZE: raiseOSError(osLastError()) - return (high shl 32) or low + result = (high shl 32) or low + else: + let curPos = lseek(f.fd.cint, 0, SEEK_CUR) + result = lseek(f.fd.cint, 0, SEEK_END) + f.offset = lseek(f.fd.cint, curPos, SEEK_SET) + assert(f.offset == curPos) -proc openAsync*(filename: string, mode = fmRead): AsyncFile = - ## Opens a file specified by the path in ``filename`` using - ## the specified ``mode`` asynchronously. +proc newAsyncFile*(fd: AsyncFD): AsyncFile = + ## Creates `AsyncFile` with a previously opened file descriptor `fd`. new result + result.fd = fd + register(fd) + +proc openAsync*(filename: string, mode = fmRead): AsyncFile = + ## Opens a file specified by the path in `filename` using + ## the specified FileMode `mode` asynchronously. when defined(windows) or defined(nimdoc): let flags = FILE_FLAG_OVERLAPPED or FILE_ATTRIBUTE_NORMAL let desiredAccess = getDesiredAccess(mode) let creationDisposition = getCreationDisposition(mode, filename) - when useWinUnicode: - result.fd = createFileW(newWideCString(filename), desiredAccess, - FILE_SHARE_READ, - nil, creationDisposition, flags, 0).TAsyncFd - else: - result.fd = createFileA(filename, desiredAccess, - FILE_SHARE_READ, - nil, creationDisposition, flags, 0).TAsyncFd + let fd = createFileW(newWideCString(filename), desiredAccess, + FILE_SHARE_READ, + nil, creationDisposition, flags, 0) - if result.fd.THandle == INVALID_HANDLE_VALUE: + if fd == INVALID_HANDLE_VALUE: raiseOSError(osLastError()) - register(result.fd) + result = newAsyncFile(fd.AsyncFD) if mode == fmAppend: result.offset = getFileSize(result) @@ -106,27 +118,104 @@ proc openAsync*(filename: string, mode = fmRead): AsyncFile = let flags = getPosixFlags(mode) # RW (Owner), RW (Group), R (Other) let perm = S_IRUSR or S_IWUSR or S_IRGRP or S_IWGRP or S_IROTH - result.fd = open(filename, flags, perm).TAsyncFD - if result.fd.cint == -1: + let fd = open(filename, flags, perm) + if fd == -1: raiseOSError(osLastError()) - register(result.fd) + result = newAsyncFile(fd.AsyncFD) -proc read*(f: AsyncFile, size: int): Future[string] = - ## Read ``size`` bytes from the specified file asynchronously starting at +proc readBuffer*(f: AsyncFile, buf: pointer, size: int): Future[int] = + ## Read `size` bytes from the specified file asynchronously starting at ## the current position of the file pointer. ## + ## If the file pointer is past the end of the file then zero is returned + ## and no bytes are read into `buf` + var retFuture = newFuture[int]("asyncfile.readBuffer") + + when defined(windows) or defined(nimdoc): + var ol = newCustom() + ol.data = CompletionData(fd: f.fd, cb: + proc (fd: AsyncFD, bytesCount: DWORD, errcode: OSErrorCode) = + if not retFuture.finished: + if errcode == OSErrorCode(-1): + assert bytesCount > 0 + assert bytesCount <= size + f.offset.inc bytesCount + retFuture.complete(bytesCount) + else: + if errcode.int32 == ERROR_HANDLE_EOF: + retFuture.complete(0) + else: + retFuture.fail(newOSError(errcode)) + ) + ol.offset = DWORD(f.offset and 0xffffffff) + ol.offsetHigh = DWORD(f.offset shr 32) + + # According to MSDN we're supposed to pass nil to lpNumberOfBytesRead. + let ret = readFile(f.fd.Handle, buf, size.int32, nil, + cast[POVERLAPPED](ol)) + if not ret.bool: + let err = osLastError() + if err.int32 != ERROR_IO_PENDING: + GC_unref(ol) + if err.int32 == ERROR_HANDLE_EOF: + # This happens in Windows Server 2003 + retFuture.complete(0) + else: + retFuture.fail(newOSError(err)) + else: + # Request completed immediately. + var bytesRead: DWORD + let overlappedRes = getOverlappedResult(f.fd.Handle, + cast[POVERLAPPED](ol), bytesRead, false.WINBOOL) + if not overlappedRes.bool: + let err = osLastError() + if err.int32 == ERROR_HANDLE_EOF: + retFuture.complete(0) + else: + retFuture.fail(newOSError(osLastError())) + else: + assert bytesRead > 0 + assert bytesRead <= size + f.offset.inc bytesRead + retFuture.complete(bytesRead) + else: + proc cb(fd: AsyncFD): bool = + result = true + let res = read(fd.cint, cast[cstring](buf), size.cint) + if res < 0: + let lastError = osLastError() + if lastError.int32 != EAGAIN: + retFuture.fail(newOSError(lastError)) + else: + result = false # We still want this callback to be called. + elif res == 0: + # EOF + retFuture.complete(0) + else: + f.offset.inc(res) + retFuture.complete(res) + + if not cb(f.fd): + addRead(f.fd, cb) + + return retFuture + +proc read*(f: AsyncFile, size: int): Future[string] = + ## Read `size` bytes from the specified file asynchronously starting at + ## the current position of the file pointer. `size` should be greater than zero. + ## ## If the file pointer is past the end of the file then an empty string is ## returned. + assert size > 0 var retFuture = newFuture[string]("asyncfile.read") when defined(windows) or defined(nimdoc): var buffer = alloc0(size) - var ol = PCustomOverlapped() - GC_ref(ol) - ol.data = TCompletionData(fd: f.fd, cb: - proc (fd: TAsyncFD, bytesCount: Dword, errcode: OSErrorCode) = + var ol = newCustom() + ol.data = CompletionData(fd: f.fd, cb: + proc (fd: AsyncFD, bytesCount: DWORD, errcode: OSErrorCode) = if not retFuture.finished: if errcode == OSErrorCode(-1): assert bytesCount > 0 @@ -139,16 +228,16 @@ proc read*(f: AsyncFile, size: int): Future[string] = if errcode.int32 == ERROR_HANDLE_EOF: retFuture.complete("") else: - retFuture.fail(newException(OSError, osErrorMsg(errcode))) + retFuture.fail(newOSError(errcode)) if buffer != nil: dealloc buffer buffer = nil ) - ol.offset = DWord(f.offset and 0xffffffff) - ol.offsetHigh = DWord(f.offset shr 32) + ol.offset = DWORD(f.offset and 0xffffffff) + ol.offsetHigh = DWORD(f.offset shr 32) # According to MSDN we're supposed to pass nil to lpNumberOfBytesRead. - let ret = readFile(f.fd.THandle, buffer, size.int32, nil, + let ret = readFile(f.fd.Handle, buffer, size.int32, nil, cast[POVERLAPPED](ol)) if not ret.bool: let err = osLastError() @@ -157,18 +246,23 @@ proc read*(f: AsyncFile, size: int): Future[string] = dealloc buffer buffer = nil GC_unref(ol) - retFuture.fail(newException(OSError, osErrorMsg(err))) + + if err.int32 == ERROR_HANDLE_EOF: + # This happens in Windows Server 2003 + retFuture.complete("") + else: + retFuture.fail(newOSError(err)) else: # Request completed immediately. - var bytesRead: DWord - let overlappedRes = getOverlappedResult(f.fd.THandle, - cast[POverlapped](ol)[], bytesRead, false.WinBool) + var bytesRead: DWORD + let overlappedRes = getOverlappedResult(f.fd.Handle, + cast[POVERLAPPED](ol), bytesRead, false.WINBOOL) if not overlappedRes.bool: let err = osLastError() if err.int32 == ERROR_HANDLE_EOF: retFuture.complete("") else: - retFuture.fail(newException(OSError, osErrorMsg(osLastError()))) + retFuture.fail(newOSError(osLastError())) else: assert bytesRead > 0 assert bytesRead <= size @@ -179,26 +273,27 @@ proc read*(f: AsyncFile, size: int): Future[string] = else: var readBuffer = newString(size) - proc cb(fd: TAsyncFD): bool = + proc cb(fd: AsyncFD): bool = result = true let res = read(fd.cint, addr readBuffer[0], size.cint) if res < 0: let lastError = osLastError() if lastError.int32 != EAGAIN: - retFuture.fail(newException(OSError, osErrorMsg(lastError))) + retFuture.fail(newOSError(lastError)) else: result = false # We still want this callback to be called. elif res == 0: # EOF + f.offset = lseek(fd.cint, 0, SEEK_CUR) retFuture.complete("") else: readBuffer.setLen(res) f.offset.inc(res) retFuture.complete(readBuffer) - + if not cb(f.fd): addRead(f.fd, cb) - + return retFuture proc readLine*(f: AsyncFile): Future[string] {.async.} = @@ -206,6 +301,8 @@ proc readLine*(f: AsyncFile): Future[string] {.async.} = result = "" while true: var c = await read(f, 1) + if c.len == 0: + break if c[0] == '\c': c = await read(f, 1) break @@ -222,10 +319,10 @@ proc getFilePos*(f: AsyncFile): int64 = proc setFilePos*(f: AsyncFile, pos: int64) = ## Sets the position of the file pointer that is used for read/write - ## operations. The file's first byte has the index zero. + ## operations. The file's first byte has the index zero. f.offset = pos when not defined(windows) and not defined(nimdoc): - let ret = lseek(f.fd.cint, pos, SEEK_SET) + let ret = lseek(f.fd.cint, pos.Off, SEEK_SET) if ret == -1: raiseOSError(osLastError()) @@ -238,8 +335,77 @@ proc readAll*(f: AsyncFile): Future[string] {.async.} = return result.add data +proc writeBuffer*(f: AsyncFile, buf: pointer, size: int): Future[void] = + ## Writes `size` bytes from `buf` to the file specified asynchronously. + ## + ## The returned Future will complete once all data has been written to the + ## specified file. + var retFuture = newFuture[void]("asyncfile.writeBuffer") + when defined(windows) or defined(nimdoc): + var ol = newCustom() + ol.data = CompletionData(fd: f.fd, cb: + proc (fd: AsyncFD, bytesCount: DWORD, errcode: OSErrorCode) = + if not retFuture.finished: + if errcode == OSErrorCode(-1): + assert bytesCount == size.int32 + retFuture.complete() + else: + retFuture.fail(newOSError(errcode)) + ) + # passing -1 here should work according to MSDN, but doesn't. For more + # information see + # http://stackoverflow.com/questions/33650899/does-asynchronous-file- + # appending-in-windows-preserve-order + ol.offset = DWORD(f.offset and 0xffffffff) + ol.offsetHigh = DWORD(f.offset shr 32) + f.offset.inc(size) + + # According to MSDN we're supposed to pass nil to lpNumberOfBytesWritten. + let ret = writeFile(f.fd.Handle, buf, size.int32, nil, + cast[POVERLAPPED](ol)) + if not ret.bool: + let err = osLastError() + if err.int32 != ERROR_IO_PENDING: + GC_unref(ol) + retFuture.fail(newOSError(err)) + else: + # Request completed immediately. + var bytesWritten: DWORD + let overlappedRes = getOverlappedResult(f.fd.Handle, + cast[POVERLAPPED](ol), bytesWritten, false.WINBOOL) + if not overlappedRes.bool: + retFuture.fail(newOSError(osLastError())) + else: + assert bytesWritten == size.int32 + retFuture.complete() + else: + var written = 0 + + proc cb(fd: AsyncFD): bool = + result = true + let remainderSize = size - written + var cbuf = cast[cstring](buf) + let res = write(fd.cint, addr cbuf[written], remainderSize.cint) + if res < 0: + let lastError = osLastError() + if lastError.int32 != EAGAIN: + retFuture.fail(newOSError(lastError)) + else: + result = false # We still want this callback to be called. + else: + written.inc res + f.offset.inc res + if res != remainderSize: + result = false # We still have data to write. + else: + retFuture.complete() + + if not cb(f.fd): + addWrite(f.fd, cb) + return retFuture + proc write*(f: AsyncFile, data: string): Future[void] = - ## Writes ``data`` to the file specified asynchronously. + ## Writes `data` to the file specified asynchronously. ## ## The returned Future will complete once all data has been written to the ## specified file. @@ -247,28 +413,27 @@ proc write*(f: AsyncFile, data: string): Future[void] = var copy = data when defined(windows) or defined(nimdoc): var buffer = alloc0(data.len) - copyMem(buffer, addr copy[0], data.len) + copyMem(buffer, copy.cstring, data.len) - var ol = PCustomOverlapped() - GC_ref(ol) - ol.data = TCompletionData(fd: f.fd, cb: - proc (fd: TAsyncFD, bytesCount: DWord, errcode: OSErrorCode) = + var ol = newCustom() + ol.data = CompletionData(fd: f.fd, cb: + proc (fd: AsyncFD, bytesCount: DWORD, errcode: OSErrorCode) = if not retFuture.finished: if errcode == OSErrorCode(-1): assert bytesCount == data.len.int32 - f.offset.inc(data.len) retFuture.complete() else: - retFuture.fail(newException(OSError, osErrorMsg(errcode))) + retFuture.fail(newOSError(errcode)) if buffer != nil: dealloc buffer buffer = nil ) - ol.offset = DWord(f.offset and 0xffffffff) - ol.offsetHigh = DWord(f.offset shr 32) + ol.offset = DWORD(f.offset and 0xffffffff) + ol.offsetHigh = DWORD(f.offset shr 32) + f.offset.inc(data.len) # According to MSDN we're supposed to pass nil to lpNumberOfBytesWritten. - let ret = writeFile(f.fd.THandle, buffer, data.len.int32, nil, + let ret = writeFile(f.fd.Handle, buffer, data.len.int32, nil, cast[POVERLAPPED](ol)) if not ret.bool: let err = osLastError() @@ -277,29 +442,35 @@ proc write*(f: AsyncFile, data: string): Future[void] = dealloc buffer buffer = nil GC_unref(ol) - retFuture.fail(newException(OSError, osErrorMsg(err))) + retFuture.fail(newOSError(err)) else: # Request completed immediately. - var bytesWritten: DWord - let overlappedRes = getOverlappedResult(f.fd.THandle, - cast[POverlapped](ol)[], bytesWritten, false.WinBool) + var bytesWritten: DWORD + let overlappedRes = getOverlappedResult(f.fd.Handle, + cast[POVERLAPPED](ol), bytesWritten, false.WINBOOL) if not overlappedRes.bool: - retFuture.fail(newException(OSError, osErrorMsg(osLastError()))) + retFuture.fail(newOSError(osLastError())) else: assert bytesWritten == data.len.int32 - f.offset.inc(data.len) retFuture.complete() else: var written = 0 - - proc cb(fd: TAsyncFD): bool = + + proc cb(fd: AsyncFD): bool = result = true - let remainderSize = data.len-written - let res = write(fd.cint, addr copy[written], remainderSize.cint) + + let remainderSize = data.len - written + + let res = + if data.len == 0: + write(fd.cint, copy.cstring, 0) + else: + write(fd.cint, addr copy[written], remainderSize.cint) + if res < 0: let lastError = osLastError() if lastError.int32 != EAGAIN: - retFuture.fail(newException(OSError, osErrorMsg(lastError))) + retFuture.fail(newOSError(lastError)) else: result = false # We still want this callback to be called. else: @@ -309,17 +480,58 @@ proc write*(f: AsyncFile, data: string): Future[void] = result = false # We still have data to write. else: retFuture.complete() - + if not cb(f.fd): addWrite(f.fd, cb) return retFuture +proc setFileSize*(f: AsyncFile, length: int64) = + ## Set a file length. + when defined(windows) or defined(nimdoc): + var + high = (length shr 32).DWORD + let + low = (length and 0xffffffff).DWORD + status = setFilePointer(f.fd.Handle, low, addr high, 0) + lastErr = osLastError() + if (status == INVALID_SET_FILE_POINTER and lastErr.int32 != NO_ERROR) or + (setEndOfFile(f.fd.Handle) == 0): + raiseOSError(osLastError()) + else: + # will truncate if Off is a 32-bit type! + if ftruncate(f.fd.cint, length.Off) == -1: + raiseOSError(osLastError()) + proc close*(f: AsyncFile) = ## Closes the file specified. + unregister(f.fd) when defined(windows) or defined(nimdoc): - if not closeHandle(f.fd.THandle).bool: + if not closeHandle(f.fd.Handle).bool: raiseOSError(osLastError()) else: if close(f.fd.cint) == -1: raiseOSError(osLastError()) +proc writeFromStream*(f: AsyncFile, fs: FutureStream[string]) {.async.} = + ## Reads data from the specified future stream until it is completed. + ## The data which is read is written to the file immediately and + ## freed from memory. + ## + ## This procedure is perfect for saving streamed data to a file without + ## wasting memory. + while true: + let (hasValue, value) = await fs.read() + if hasValue: + await f.write(value) + else: + break + +proc readToStream*(f: AsyncFile, fs: FutureStream[string]) {.async.} = + ## Writes data to the specified future stream as the file is read. + while true: + let data = await read(f, 4000) + if data.len == 0: + break + await fs.write(data) + + fs.complete() diff --git a/lib/pure/asyncftpclient.nim b/lib/pure/asyncftpclient.nim deleted file mode 100644 index 96f54b49e..000000000 --- a/lib/pure/asyncftpclient.nim +++ /dev/null @@ -1,313 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2015 Dominik Picheta -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module implement an asynchronous FTP client. -## -## Examples -## -------- -## -## .. code-block::nim -## -## var ftp = newAsyncFtpClient("example.com", user = "test", pass = "test") -## proc main(ftp: AsyncFtpClient) {.async.} = -## await ftp.connect() -## echo await ftp.pwd() -## echo await ftp.listDirs() -## await ftp.store("payload.jpg", "payload.jpg") -## await ftp.retrFile("payload.jpg", "payload2.jpg") -## echo("Finished") -## -## waitFor main(ftp) - -import asyncdispatch, asyncnet, strutils, parseutils, os, times - -from ftpclient import FtpBaseObj, ReplyError, FtpEvent -from net import BufferSize - -type - AsyncFtpClientObj* = FtpBaseObj[AsyncSocket] - AsyncFtpClient* = ref AsyncFtpClientObj - - ProgressChangedProc* = - proc (total, progress: BiggestInt, speed: float): - Future[void] {.closure, gcsafe.} - -proc expectReply(ftp: AsyncFtpClient): Future[TaintedString] = - result = ftp.csock.recvLine() - -proc send*(ftp: AsyncFtpClient, m: string): Future[TaintedString] {.async.} = - ## Send a message to the server, and wait for a primary reply. - ## ``\c\L`` is added for you. - await ftp.csock.send(m & "\c\L") - return await ftp.expectReply() - -proc assertReply(received: TaintedString, expected: varargs[string]) = - for i in items(expected): - if received.string.startsWith(i): return - raise newException(ReplyError, - "Expected reply '$1' got: $2" % - [expected.join("' or '"), received.string]) - -proc pasv(ftp: AsyncFtpClient) {.async.} = - ## Negotiate a data connection. - ftp.dsock = newAsyncSocket() - - var pasvMsg = (await ftp.send("PASV")).string.strip.TaintedString - assertReply(pasvMsg, "227") - var betweenParens = captureBetween(pasvMsg.string, '(', ')') - var nums = betweenParens.split(',') - var ip = nums[0.. ^3] - var port = nums[^2.. ^1] - var properPort = port[0].parseInt()*256+port[1].parseInt() - await ftp.dsock.connect(ip.join("."), Port(properPort.toU16)) - ftp.dsockConnected = true - -proc normalizePathSep(path: string): string = - return replace(path, '\\', '/') - -proc connect*(ftp: AsyncFtpClient) {.async.} = - ## Connect to the FTP server specified by ``ftp``. - await ftp.csock.connect(ftp.address, ftp.port) - - var reply = await ftp.expectReply() - if reply.startsWith("120"): - # 120 Service ready in nnn minutes. - # We wait until we receive 220. - reply = await ftp.expectReply() - assertReply(reply, "220") - - if ftp.user != "": - assertReply(await(ftp.send("USER " & ftp.user)), "230", "331") - - if ftp.pass != "": - assertReply(await(ftp.send("PASS " & ftp.pass)), "230") - -proc pwd*(ftp: AsyncFtpClient): Future[TaintedString] {.async.} = - ## Returns the current working directory. - let wd = await ftp.send("PWD") - assertReply wd, "257" - return wd.string.captureBetween('"').TaintedString # " - -proc cd*(ftp: AsyncFtpClient, dir: string) {.async.} = - ## Changes the current directory on the remote FTP server to ``dir``. - assertReply(await(ftp.send("CWD " & dir.normalizePathSep)), "250") - -proc cdup*(ftp: AsyncFtpClient) {.async.} = - ## Changes the current directory to the parent of the current directory. - assertReply(await(ftp.send("CDUP")), "200") - -proc getLines(ftp: AsyncFtpClient): Future[string] {.async.} = - ## Downloads text data in ASCII mode - result = "" - assert ftp.dsockConnected - while ftp.dsockConnected: - let r = await ftp.dsock.recvLine() - if r.string == "": - ftp.dsockConnected = false - else: - result.add(r.string & "\n") - - assertReply(await(ftp.expectReply()), "226") - -proc listDirs*(ftp: AsyncFtpClient, dir = ""): Future[seq[string]] {.async.} = - ## Returns a list of filenames in the given directory. If ``dir`` is "", - ## the current directory is used. If ``async`` is true, this - ## function will return immediately and it will be your job to - ## use asyncio's ``poll`` to progress this operation. - await ftp.pasv() - - assertReply(await(ftp.send("NLST " & dir.normalizePathSep)), ["125", "150"]) - - result = splitLines(await ftp.getLines()) - -proc existsFile*(ftp: AsyncFtpClient, file: string): Future[bool] {.async.} = - ## Determines whether ``file`` exists. - var files = await ftp.listDirs() - for f in items(files): - if f.normalizePathSep == file.normalizePathSep: return true - -proc createDir*(ftp: AsyncFtpClient, dir: string, recursive = false){.async.} = - ## Creates a directory ``dir``. If ``recursive`` is true, the topmost - ## subdirectory of ``dir`` will be created first, following the secondmost... - ## etc. this allows you to give a full path as the ``dir`` without worrying - ## about subdirectories not existing. - if not recursive: - assertReply(await(ftp.send("MKD " & dir.normalizePathSep)), "257") - else: - var reply = TaintedString"" - var previousDirs = "" - for p in split(dir, {os.DirSep, os.AltSep}): - if p != "": - previousDirs.add(p) - reply = await ftp.send("MKD " & previousDirs) - previousDirs.add('/') - assertReply reply, "257" - -proc chmod*(ftp: AsyncFtpClient, path: string, - permissions: set[FilePermission]) {.async.} = - ## Changes permission of ``path`` to ``permissions``. - var userOctal = 0 - var groupOctal = 0 - var otherOctal = 0 - for i in items(permissions): - case i - of fpUserExec: userOctal.inc(1) - of fpUserWrite: userOctal.inc(2) - of fpUserRead: userOctal.inc(4) - of fpGroupExec: groupOctal.inc(1) - of fpGroupWrite: groupOctal.inc(2) - of fpGroupRead: groupOctal.inc(4) - of fpOthersExec: otherOctal.inc(1) - of fpOthersWrite: otherOctal.inc(2) - of fpOthersRead: otherOctal.inc(4) - - var perm = $userOctal & $groupOctal & $otherOctal - assertReply(await(ftp.send("SITE CHMOD " & perm & - " " & path.normalizePathSep)), "200") - -proc list*(ftp: AsyncFtpClient, dir = ""): Future[string] {.async.} = - ## Lists all files in ``dir``. If ``dir`` is ``""``, uses the current - ## working directory. - await ftp.pasv() - - let reply = await ftp.send("LIST" & " " & dir.normalizePathSep) - assertReply(reply, ["125", "150"]) - - result = await ftp.getLines() - -proc retrText*(ftp: AsyncFtpClient, file: string): Future[string] {.async.} = - ## Retrieves ``file``. File must be ASCII text. - await ftp.pasv() - let reply = await ftp.send("RETR " & file.normalizePathSep) - assertReply(reply, ["125", "150"]) - - result = await ftp.getLines() - -proc getFile(ftp: AsyncFtpClient, file: File, total: BiggestInt, - onProgressChanged: ProgressChangedProc) {.async.} = - assert ftp.dsockConnected - var progress = 0 - var progressInSecond = 0 - var countdownFut = sleepAsync(1000) - var dataFut = ftp.dsock.recv(BufferSize) - while ftp.dsockConnected: - await dataFut or countdownFut - if countdownFut.finished: - asyncCheck onProgressChanged(total, progress, - progressInSecond.float) - progressInSecond = 0 - countdownFut = sleepAsync(1000) - - if dataFut.finished: - let data = dataFut.read - if data != "": - progress.inc(data.len) - progressInSecond.inc(data.len) - file.write(data) - dataFut = ftp.dsock.recv(BufferSize) - else: - ftp.dsockConnected = false - - assertReply(await(ftp.expectReply()), "226") - -proc defaultOnProgressChanged*(total, progress: BiggestInt, - speed: float): Future[void] {.nimcall,gcsafe.} = - ## Default FTP ``onProgressChanged`` handler. Does nothing. - result = newFuture[void]() - #echo(total, " ", progress, " ", speed) - result.complete() - -proc retrFile*(ftp: AsyncFtpClient, file, dest: string, - onProgressChanged = defaultOnProgressChanged) {.async.} = - ## Downloads ``file`` and saves it to ``dest``. - ## The ``EvRetr`` event is passed to the specified ``handleEvent`` function - ## when the download is finished. The event's ``filename`` field will be equal - ## to ``file``. - var destFile = open(dest, mode = fmWrite) - await ftp.pasv() - var reply = await ftp.send("RETR " & file.normalizePathSep) - assertReply reply, ["125", "150"] - if {'(', ')'} notin reply.string: - raise newException(ReplyError, "Reply has no file size.") - var fileSize: BiggestInt - if reply.string.captureBetween('(', ')').parseBiggestInt(fileSize) == 0: - raise newException(ReplyError, "Reply has no file size.") - - await getFile(ftp, destFile, fileSize, onProgressChanged) - -proc doUpload(ftp: AsyncFtpClient, file: File, - onProgressChanged: ProgressChangedProc) {.async.} = - assert ftp.dsockConnected - - let total = file.getFileSize() - var data = newStringOfCap(4000) - var progress = 0 - var progressInSecond = 0 - var countdownFut = sleepAsync(1000) - var sendFut: Future[void] = nil - while ftp.dsockConnected: - if sendFut == nil or sendFut.finished: - progress.inc(data.len) - progressInSecond.inc(data.len) - # TODO: Async file reading. - let len = file.readBuffer(addr(data[0]), 4000) - setLen(data, len) - if len == 0: - # File finished uploading. - ftp.dsock.close() - ftp.dsockConnected = false - - assertReply(await(ftp.expectReply()), "226") - else: - sendFut = ftp.dsock.send(data) - - if countdownFut.finished: - asyncCheck onProgressChanged(total, progress, progressInSecond.float) - progressInSecond = 0 - countdownFut = sleepAsync(1000) - - await countdownFut or sendFut - -proc store*(ftp: AsyncFtpClient, file, dest: string, - onProgressChanged = defaultOnProgressChanged) {.async.} = - ## Uploads ``file`` to ``dest`` on the remote FTP server. Usage of this - ## function asynchronously is recommended to view the progress of - ## the download. - ## The ``EvStore`` event is passed to the specified ``handleEvent`` function - ## when the upload is finished, and the ``filename`` field will be - ## equal to ``file``. - var destFile = open(file) - await ftp.pasv() - - let reply = await ftp.send("STOR " & dest.normalizePathSep) - assertReply reply, ["125", "150"] - - await doUpload(ftp, destFile, onProgressChanged) - -proc newAsyncFtpClient*(address: string, port = Port(21), - user, pass = ""): AsyncFtpClient = - ## Creates a new ``AsyncFtpClient`` object. - new result - result.user = user - result.pass = pass - result.address = address - result.port = port - result.dsockConnected = false - result.csock = newAsyncSocket() - -when isMainModule: - var ftp = newAsyncFtpClient("example.com", user = "test", pass = "test") - proc main(ftp: AsyncFtpClient) {.async.} = - await ftp.connect() - echo await ftp.pwd() - echo await ftp.listDirs() - await ftp.store("payload.jpg", "payload.jpg") - await ftp.retrFile("payload.jpg", "payload2.jpg") - echo("Finished") - - waitFor main(ftp) diff --git a/lib/pure/asyncfutures.nim b/lib/pure/asyncfutures.nim new file mode 100644 index 000000000..29ebf8f89 --- /dev/null +++ b/lib/pure/asyncfutures.nim @@ -0,0 +1,527 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2015 Dominik Picheta +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +import std/[os, tables, strutils, times, heapqueue, options, deques, cstrutils, typetraits] + +import system/stacktraces + +when defined(nimPreviewSlimSystem): + import std/objectdollar # for StackTraceEntry + import std/assertions + +# TODO: This shouldn't need to be included, but should ideally be exported. +type + CallbackFunc = proc () {.closure, gcsafe.} + + CallbackList = object + function: CallbackFunc + next: owned(ref CallbackList) + + FutureBase* = ref object of RootObj ## Untyped future. + callbacks: CallbackList + + finished: bool + error*: ref Exception ## Stored exception + errorStackTrace*: string + when not defined(release) or defined(futureLogging): + stackTrace: seq[StackTraceEntry] ## For debugging purposes only. + id: int + fromProc: string + + Future*[T] = ref object of FutureBase ## Typed future. + value: T ## Stored value + + FutureVar*[T] = distinct Future[T] + + FutureError* = object of Defect + cause*: FutureBase + +when not defined(release): + var currentID = 0 + +const isFutureLoggingEnabled* = defined(futureLogging) + +const + NimAsyncContinueSuffix* = "NimAsyncContinue" ## For internal usage. Do not use. + +when isFutureLoggingEnabled: + import std/hashes + type + FutureInfo* = object + stackTrace*: seq[StackTraceEntry] + fromProc*: string + + var futuresInProgress {.threadvar.}: Table[FutureInfo, int] + + proc getFuturesInProgress*(): var Table[FutureInfo, int] = + return futuresInProgress + + proc hash(s: StackTraceEntry): Hash = + result = hash(s.procname) !& hash(s.line) !& + hash(s.filename) + result = !$result + + proc hash(fi: FutureInfo): Hash = + result = hash(fi.stackTrace) !& hash(fi.fromProc) + result = !$result + + proc getFutureInfo(fut: FutureBase): FutureInfo = + let info = FutureInfo( + stackTrace: fut.stackTrace, + fromProc: fut.fromProc + ) + return info + + proc logFutureStart(fut: FutureBase) = + let info = getFutureInfo(fut) + if info notin getFuturesInProgress(): + getFuturesInProgress()[info] = 0 + getFuturesInProgress()[info].inc() + + proc logFutureFinish(fut: FutureBase) = + getFuturesInProgress()[getFutureInfo(fut)].dec() + +var callSoonProc {.threadvar.}: proc (cbproc: proc ()) {.gcsafe.} + +proc getCallSoonProc*(): (proc(cbproc: proc ()) {.gcsafe.}) = + ## Get current implementation of `callSoon`. + return callSoonProc + +proc setCallSoonProc*(p: (proc(cbproc: proc ()) {.gcsafe.})) = + ## Change current implementation of `callSoon`. This is normally called when dispatcher from `asyncdispatcher` is initialized. + callSoonProc = p + +proc callSoon*(cbproc: proc () {.gcsafe.}) = + ## Call `cbproc` "soon". + ## + ## If async dispatcher is running, `cbproc` will be executed during next dispatcher tick. + ## + ## If async dispatcher is not running, `cbproc` will be executed immediately. + if callSoonProc.isNil: + # Loop not initialized yet. Call the function directly to allow setup code to use futures. + cbproc() + else: + callSoonProc(cbproc) + +template setupFutureBase(fromProc: string) = + new(result) + result.finished = false + when not defined(release): + result.stackTrace = getStackTraceEntries() + result.id = currentID + result.fromProc = fromProc + currentID.inc() + +proc newFuture*[T](fromProc: string = "unspecified"): owned(Future[T]) = + ## Creates a new future. + ## + ## Specifying `fromProc`, which is a string specifying the name of the proc + ## that this future belongs to, is a good habit as it helps with debugging. + setupFutureBase(fromProc) + when isFutureLoggingEnabled: logFutureStart(result) + +proc newFutureVar*[T](fromProc = "unspecified"): owned(FutureVar[T]) = + ## Create a new `FutureVar`. This Future type is ideally suited for + ## situations where you want to avoid unnecessary allocations of Futures. + ## + ## Specifying `fromProc`, which is a string specifying the name of the proc + ## that this future belongs to, is a good habit as it helps with debugging. + let fo = newFuture[T](fromProc) + result = typeof(result)(fo) + when isFutureLoggingEnabled: logFutureStart(Future[T](result)) + +proc clean*[T](future: FutureVar[T]) = + ## Resets the `finished` status of `future`. + Future[T](future).finished = false + Future[T](future).error = nil + +proc checkFinished[T](future: Future[T]) = + ## Checks whether `future` is finished. If it is then raises a + ## `FutureError`. + when not defined(release): + if future.finished: + var msg = "" + msg.add("An attempt was made to complete a Future more than once. ") + msg.add("Details:") + msg.add("\n Future ID: " & $future.id) + msg.add("\n Created in proc: " & future.fromProc) + msg.add("\n Stack trace to moment of creation:") + msg.add("\n" & indent(($future.stackTrace).strip(), 4)) + when T is string: + msg.add("\n Contents (string): ") + msg.add("\n" & indent($future.value, 4)) + msg.add("\n Stack trace to moment of secondary completion:") + msg.add("\n" & indent(getStackTrace().strip(), 4)) + var err = newException(FutureError, msg) + err.cause = future + raise err + +proc call(callbacks: var CallbackList) = + var current = callbacks + while true: + if not current.function.isNil: + callSoon(current.function) + + if current.next.isNil: + break + else: + current = current.next[] + # callback will be called only once, let GC collect them now + callbacks.next = nil + callbacks.function = nil + +proc add(callbacks: var CallbackList, function: CallbackFunc) = + if callbacks.function.isNil: + callbacks.function = function + assert callbacks.next == nil + else: + let newCallback = new(ref CallbackList) + newCallback.function = function + newCallback.next = nil + + if callbacks.next == nil: + callbacks.next = newCallback + else: + var last = callbacks.next + while last.next != nil: + last = last.next + last.next = newCallback + +proc completeImpl[T, U](future: Future[T], val: sink U, isVoid: static bool) = + #assert(not future.finished, "Future already finished, cannot finish twice.") + checkFinished(future) + assert(future.error == nil) + when not isVoid: + future.value = val + future.finished = true + future.callbacks.call() + when isFutureLoggingEnabled: logFutureFinish(future) + +proc complete*[T](future: Future[T], val: sink T) = + ## Completes `future` with value `val`. + completeImpl(future, val, false) + +proc complete*(future: Future[void], val = Future[void].default) = + completeImpl(future, (), true) + +proc complete*[T](future: FutureVar[T]) = + ## Completes a `FutureVar`. + template fut: untyped = Future[T](future) + checkFinished(fut) + assert(fut.error == nil) + fut.finished = true + fut.callbacks.call() + when isFutureLoggingEnabled: logFutureFinish(Future[T](future)) + +proc complete*[T](future: FutureVar[T], val: sink T) = + ## Completes a `FutureVar` with value `val`. + ## + ## Any previously stored value will be overwritten. + template fut: untyped = Future[T](future) + checkFinished(fut) + assert(fut.error.isNil()) + fut.finished = true + fut.value = val + fut.callbacks.call() + when isFutureLoggingEnabled: logFutureFinish(fut) + +proc fail*[T](future: Future[T], error: ref Exception) = + ## Completes `future` with `error`. + #assert(not future.finished, "Future already finished, cannot finish twice.") + checkFinished(future) + future.finished = true + future.error = error + future.errorStackTrace = + if getStackTrace(error) == "": getStackTrace() else: getStackTrace(error) + future.callbacks.call() + when isFutureLoggingEnabled: logFutureFinish(future) + +proc clearCallbacks*(future: FutureBase) = + future.callbacks.function = nil + future.callbacks.next = nil + +proc addCallback*(future: FutureBase, cb: proc() {.closure, gcsafe.}) = + ## Adds the callbacks proc to be called when the future completes. + ## + ## If future has already completed then `cb` will be called immediately. + assert cb != nil + if future.finished: + callSoon(cb) + else: + future.callbacks.add cb + +proc addCallback*[T](future: Future[T], + cb: proc (future: Future[T]) {.closure, gcsafe.}) = + ## Adds the callbacks proc to be called when the future completes. + ## + ## If future has already completed then `cb` will be called immediately. + future.addCallback( + proc() = + cb(future) + ) + +proc `callback=`*(future: FutureBase, cb: proc () {.closure, gcsafe.}) = + ## Clears the list of callbacks and sets the callback proc to be called when the future completes. + ## + ## If future has already completed then `cb` will be called immediately. + ## + ## It's recommended to use `addCallback` or `then` instead. + future.clearCallbacks + future.addCallback cb + +proc `callback=`*[T](future: Future[T], + cb: proc (future: Future[T]) {.closure, gcsafe.}) = + ## Sets the callback proc to be called when the future completes. + ## + ## If future has already completed then `cb` will be called immediately. + future.callback = proc () = cb(future) + +template getFilenameProcname(entry: StackTraceEntry): (string, string) = + when compiles(entry.filenameStr) and compiles(entry.procnameStr): + # We can't rely on "entry.filename" and "entry.procname" still being valid + # cstring pointers, because the "string.data" buffers they pointed to might + # be already garbage collected (this entry being a non-shallow copy, + # "entry.filename" no longer points to "entry.filenameStr.data", but to the + # buffer of the original object). + (entry.filenameStr, entry.procnameStr) + else: + ($entry.filename, $entry.procname) + +proc getHint(entry: StackTraceEntry): string = + ## We try to provide some hints about stack trace entries that the user + ## may not be familiar with, in particular calls inside the stdlib. + + let (filename, procname) = getFilenameProcname(entry) + + result = "" + if procname == "processPendingCallbacks": + if cmpIgnoreStyle(filename, "asyncdispatch.nim") == 0: + return "Executes pending callbacks" + elif procname == "poll": + if cmpIgnoreStyle(filename, "asyncdispatch.nim") == 0: + return "Processes asynchronous completion events" + + if procname.endsWith(NimAsyncContinueSuffix): + if cmpIgnoreStyle(filename, "asyncmacro.nim") == 0: + return "Resumes an async procedure" + +proc `$`*(stackTraceEntries: seq[StackTraceEntry]): string = + when defined(nimStackTraceOverride): + let entries = addDebuggingInfo(stackTraceEntries) + else: + let entries = stackTraceEntries + + result = "" + # Find longest filename & line number combo for alignment purposes. + var longestLeft = 0 + for entry in entries: + let (filename, procname) = getFilenameProcname(entry) + + if procname == "": continue + + let leftLen = filename.len + len($entry.line) + if leftLen > longestLeft: + longestLeft = leftLen + + # Format the entries. + for entry in entries: + let (filename, procname) = getFilenameProcname(entry) + + if procname == "" and entry.line == reraisedFromBegin: + break + + let left = "$#($#)" % [filename, $entry.line] + result.add((spaces(2) & "$# $#\n") % [ + left, + procname + ]) + let hint = getHint(entry) + if hint.len > 0: + result.add(spaces(4) & "## " & hint & "\n") + +proc injectStacktrace[T](future: Future[T]) = + when not defined(release): + const header = "\nAsync traceback:\n" + + var exceptionMsg = future.error.msg + if header in exceptionMsg: + # This is messy: extract the original exception message from the msg + # containing the async traceback. + let start = exceptionMsg.find(header) + exceptionMsg = exceptionMsg[0..<start] + + + var newMsg = exceptionMsg & header + + let entries = getStackTraceEntries(future.error) + newMsg.add($entries) + + newMsg.add("Exception message: " & exceptionMsg & "\n") + + # # For debugging purposes + # newMsg.add("Exception type:") + # for entry in getStackTraceEntries(future.error): + # newMsg.add "\n" & $entry + future.error.msg = newMsg + +template readImpl(future, T) = + when future is Future[T]: + let fut {.cursor.} = future + else: + let fut {.cursor.} = Future[T](future) + if fut.finished: + if fut.error != nil: + injectStacktrace(fut) + raise fut.error + when T isnot void: + result = distinctBase(future).value + else: + # TODO: Make a custom exception type for this? + raise newException(ValueError, "Future still in progress.") + +proc read*[T](future: Future[T] | FutureVar[T]): lent T = + ## Retrieves the value of `future`. Future must be finished otherwise + ## this function will fail with a `ValueError` exception. + ## + ## If the result of the future is an error then that error will be raised. + readImpl(future, T) + +proc read*(future: Future[void] | FutureVar[void]) = + readImpl(future, void) + +proc readError*[T](future: Future[T]): ref Exception = + ## Retrieves the exception stored in `future`. + ## + ## An `ValueError` exception will be thrown if no exception exists + ## in the specified Future. + if future.error != nil: return future.error + else: + raise newException(ValueError, "No error in future.") + +proc mget*[T](future: FutureVar[T]): var T = + ## Returns a mutable value stored in `future`. + ## + ## Unlike `read`, this function will not raise an exception if the + ## Future has not been finished. + result = Future[T](future).value + +proc finished*(future: FutureBase | FutureVar): bool = + ## Determines whether `future` has completed. + ## + ## `True` may indicate an error or a value. Use `failed` to distinguish. + when future is FutureVar: + result = (FutureBase(future)).finished + else: + result = future.finished + +proc failed*(future: FutureBase): bool = + ## Determines whether `future` completed with an error. + return future.error != nil + +proc asyncCheck*[T](future: Future[T]) = + ## Sets a callback on `future` which raises an exception if the future + ## finished with an error. + ## + ## This should be used instead of `discard` to discard void futures, + ## or use `waitFor` if you need to wait for the future's completion. + assert(not future.isNil, "Future is nil") + # TODO: We can likely look at the stack trace here and inject the location + # where the `asyncCheck` was called to give a better error stack message. + proc asyncCheckCallback() = + if future.failed: + injectStacktrace(future) + raise future.error + future.callback = asyncCheckCallback + +proc `and`*[T, Y](fut1: Future[T], fut2: Future[Y]): Future[void] = + ## Returns a future which will complete once both `fut1` and `fut2` + ## complete. + var retFuture = newFuture[void]("asyncdispatch.`and`") + fut1.callback = + proc () = + if not retFuture.finished: + if fut1.failed: retFuture.fail(fut1.error) + elif fut2.finished: retFuture.complete() + fut2.callback = + proc () = + if not retFuture.finished: + if fut2.failed: retFuture.fail(fut2.error) + elif fut1.finished: retFuture.complete() + return retFuture + +proc `or`*[T, Y](fut1: Future[T], fut2: Future[Y]): Future[void] = + ## Returns a future which will complete once either `fut1` or `fut2` + ## complete. + var retFuture = newFuture[void]("asyncdispatch.`or`") + proc cb[X](fut: Future[X]) = + if not retFuture.finished: + if fut.failed: retFuture.fail(fut.error) + else: retFuture.complete() + fut1.callback = cb[T] + fut2.callback = cb[Y] + return retFuture + +proc all*[T](futs: varargs[Future[T]]): auto = + ## Returns a future which will complete once + ## all futures in `futs` complete. + ## If the argument is empty, the returned future completes immediately. + ## + ## If the awaited futures are not `Future[void]`, the returned future + ## will hold the values of all awaited futures in a sequence. + ## + ## If the awaited futures *are* `Future[void]`, + ## this proc returns `Future[void]`. + + when T is void: + var + retFuture = newFuture[void]("asyncdispatch.all") + completedFutures = 0 + + let totalFutures = len(futs) + + for fut in futs: + fut.addCallback proc (f: Future[T]) = + inc(completedFutures) + if not retFuture.finished: + if f.failed: + retFuture.fail(f.error) + else: + if completedFutures == totalFutures: + retFuture.complete() + + if totalFutures == 0: + retFuture.complete() + + return retFuture + + else: + var + retFuture = newFuture[seq[T]]("asyncdispatch.all") + retValues = newSeq[T](len(futs)) + completedFutures = 0 + + for i, fut in futs: + proc setCallback(i: int) = + fut.addCallback proc (f: Future[T]) = + inc(completedFutures) + if not retFuture.finished: + if f.failed: + retFuture.fail(f.error) + else: + retValues[i] = f.read() + + if completedFutures == len(retValues): + retFuture.complete(retValues) + + setCallback(i) + + if retValues.len == 0: + retFuture.complete(retValues) + + return retFuture diff --git a/lib/pure/asynchttpserver.nim b/lib/pure/asynchttpserver.nim index 64242234c..39e945d5e 100644 --- a/lib/pure/asynchttpserver.nim +++ b/lib/pure/asynchttpserver.nim @@ -9,129 +9,141 @@ ## This module implements a high performance asynchronous HTTP server. ## -## Examples -## -------- -## -## This example will create an HTTP server on port 8080. The server will -## respond to all requests with a ``200 OK`` response code and "Hello World" -## as the response body. -## -## .. code-block::nim -## import asynchttpserver, asyncdispatch -## -## var server = newAsyncHttpServer() -## proc cb(req: Request) {.async.} = -## await req.respond(Http200, "Hello World") -## -## asyncCheck server.serve(Port(8080), cb) -## runForever() +## This HTTP server has not been designed to be used in production, but +## for testing applications locally. Because of this, when deploying your +## application in production you should use a reverse proxy (for example nginx) +## instead of allowing users to connect directly to this server. + +runnableExamples("-r:off"): + # This example will create an HTTP server on an automatically chosen port. + # It will respond to all requests with a `200 OK` response code and "Hello World" + # as the response body. + import std/asyncdispatch + proc main {.async.} = + var server = newAsyncHttpServer() + proc cb(req: Request) {.async.} = + echo (req.reqMethod, req.url, req.headers) + let headers = {"Content-type": "text/plain; charset=utf-8"} + await req.respond(Http200, "Hello World", headers.newHttpHeaders()) + + server.listen(Port(0)) # or Port(8080) to hardcode the standard HTTP port. + let port = server.getPort + echo "test this with: curl localhost:" & $port.uint16 & "/" + while true: + if server.shouldAcceptRequest(): + await server.acceptRequest(cb) + else: + # too many concurrent connections, `maxFDs` exceeded + # wait 500ms for FDs to be closed + await sleepAsync(500) -import strtabs, asyncnet, asyncdispatch, parseutils, uri, strutils + waitFor main() + +import std/[asyncnet, asyncdispatch, parseutils, uri, strutils] +import std/httpcore +from std/nativesockets import getLocalAddr, Domain, AF_INET, AF_INET6 +import std/private/since + +when defined(nimPreviewSlimSystem): + import std/assertions + +export httpcore except parseHeader + +const + maxLine = 8*1024 + +# TODO: If it turns out that the decisions that asynchttpserver makes +# explicitly, about whether to close the client sockets or upgrade them are +# wrong, then add a return value which determines what to do for the callback. +# Also, maybe move `client` out of `Request` object and into the args for +# the proc. type Request* = object client*: AsyncSocket # TODO: Separate this into a Response object? - reqMethod*: string - headers*: StringTableRef + reqMethod*: HttpMethod + headers*: HttpHeaders protocol*: tuple[orig: string, major, minor: int] url*: Uri - hostname*: string ## The hostname of the client that made the request. + hostname*: string ## The hostname of the client that made the request. body*: string AsyncHttpServer* = ref object socket: AsyncSocket reuseAddr: bool + reusePort: bool + maxBody: int ## The maximum content-length that will be read for the body. + maxFDs: int - HttpCode* = enum - Http100 = "100 Continue", - Http101 = "101 Switching Protocols", - Http200 = "200 OK", - Http201 = "201 Created", - Http202 = "202 Accepted", - Http204 = "204 No Content", - Http205 = "205 Reset Content", - Http206 = "206 Partial Content", - Http300 = "300 Multiple Choices", - Http301 = "301 Moved Permanently", - Http302 = "302 Found", - Http303 = "303 See Other", - Http304 = "304 Not Modified", - Http305 = "305 Use Proxy", - Http307 = "307 Temporary Redirect", - Http400 = "400 Bad Request", - Http401 = "401 Unauthorized", - Http403 = "403 Forbidden", - Http404 = "404 Not Found", - Http405 = "405 Method Not Allowed", - Http406 = "406 Not Acceptable", - Http407 = "407 Proxy Authentication Required", - Http408 = "408 Request Timeout", - Http409 = "409 Conflict", - Http410 = "410 Gone", - Http411 = "411 Length Required", - Http418 = "418 I'm a teapot", - Http500 = "500 Internal Server Error", - Http501 = "501 Not Implemented", - Http502 = "502 Bad Gateway", - Http503 = "503 Service Unavailable", - Http504 = "504 Gateway Timeout", - Http505 = "505 HTTP Version Not Supported" - - HttpVersion* = enum - HttpVer11, - HttpVer10 - -{.deprecated: [TRequest: Request, PAsyncHttpServer: AsyncHttpServer, - THttpCode: HttpCode, THttpVersion: HttpVersion].} - -proc `==`*(protocol: tuple[orig: string, major, minor: int], - ver: HttpVersion): bool = - let major = - case ver - of HttpVer11, HttpVer10: 1 - let minor = - case ver - of HttpVer11: 1 - of HttpVer10: 0 - result = protocol.major == major and protocol.minor == minor - -proc newAsyncHttpServer*(reuseAddr = true): AsyncHttpServer = - ## Creates a new ``AsyncHttpServer`` instance. - new result - result.reuseAddr = reuseAddr - -proc addHeaders(msg: var string, headers: StringTableRef) = +proc getPort*(self: AsyncHttpServer): Port {.since: (1, 5, 1).} = + ## Returns the port `self` was bound to. + ## + ## Useful for identifying what port `self` is bound to, if it + ## was chosen automatically, for example via `listen(Port(0))`. + runnableExamples: + from std/nativesockets import Port + let server = newAsyncHttpServer() + server.listen(Port(0)) + assert server.getPort.uint16 > 0 + server.close() + result = getLocalAddr(self.socket)[1] + +proc newAsyncHttpServer*(reuseAddr = true, reusePort = false, + maxBody = 8388608): AsyncHttpServer = + ## Creates a new `AsyncHttpServer` instance. + result = AsyncHttpServer(reuseAddr: reuseAddr, reusePort: reusePort, maxBody: maxBody) + +proc addHeaders(msg: var string, headers: HttpHeaders) = for k, v in headers: msg.add(k & ": " & v & "\c\L") -proc sendHeaders*(req: Request, headers: StringTableRef): Future[void] = +proc sendHeaders*(req: Request, headers: HttpHeaders): Future[void] = ## Sends the specified headers to the requesting client. var msg = "" addHeaders(msg, headers) return req.client.send(msg) -proc respond*(req: Request, code: HttpCode, - content: string, headers = newStringTable()) {.async.} = - ## Responds to the request with the specified ``HttpCode``, headers and +proc respond*(req: Request, code: HttpCode, content: string, + headers: HttpHeaders = nil): Future[void] = + ## Responds to the request with the specified `HttpCode`, headers and ## content. ## ## This procedure will **not** close the client socket. - var customHeaders = headers - customHeaders["Content-Length"] = $content.len + ## + ## Example: + ## ```Nim + ## import std/json + ## proc handler(req: Request) {.async.} = + ## if req.url.path == "/hello-world": + ## let msg = %* {"message": "Hello World"} + ## let headers = newHttpHeaders([("Content-Type","application/json")]) + ## await req.respond(Http200, $msg, headers) + ## else: + ## await req.respond(Http404, "Not Found") + ## ``` var msg = "HTTP/1.1 " & $code & "\c\L" - msg.addHeaders(customHeaders) - await req.client.send(msg & "\c\L" & content) -proc newRequest(): Request = - result.headers = newStringTable(modeCaseInsensitive) - result.hostname = "" - result.body = "" + if headers != nil: + msg.addHeaders(headers) -proc parseHeader(line: string): tuple[key, value: string] = - var i = 0 - i = line.parseUntil(result.key, ':') - inc(i) # skip : - i += line.skipWhiteSpace(i) - i += line.parseUntil(result.value, {'\c', '\L'}, i) + # If the headers did not contain a Content-Length use our own + if headers.isNil() or not headers.hasKey("Content-Length"): + msg.add("Content-Length: ") + # this particular way saves allocations: + msg.addInt content.len + msg.add "\c\L" + + msg.add "\c\L" + msg.add(content) + result = req.client.send(msg) + +proc respondError(req: Request, code: HttpCode): Future[void] = + ## Responds to the request with the specified `HttpCode`. + let content = $code + var msg = "HTTP/1.1 " & content & "\c\L" + + msg.add("Content-Length: " & $content.len & "\c\L\c\L") + msg.add(content) + result = req.client.send(msg) proc parseProtocol(protocol: string): tuple[orig: string, major, minor: int] = var i = protocol.skipIgnoreCase("HTTP/") @@ -139,137 +151,290 @@ proc parseProtocol(protocol: string): tuple[orig: string, major, minor: int] = raise newException(ValueError, "Invalid request protocol. Got: " & protocol) result.orig = protocol - i.inc protocol.parseInt(result.major, i) + i.inc protocol.parseSaturatedNatural(result.major, i) i.inc # Skip . - i.inc protocol.parseInt(result.minor, i) + i.inc protocol.parseSaturatedNatural(result.minor, i) proc sendStatus(client: AsyncSocket, status: string): Future[void] = - client.send("HTTP/1.1 " & status & "\c\L") + client.send("HTTP/1.1 " & status & "\c\L\c\L") -proc processClient(client: AsyncSocket, address: string, - callback: proc (request: Request): - Future[void] {.closure, gcsafe.}) {.async.} = - while not client.isClosed: - # GET /path HTTP/1.1 - # Header: val - # \n - var request = newRequest() +func hasChunkedEncoding(request: Request): bool = + ## Searches for a chunked transfer encoding + const transferEncoding = "Transfer-Encoding" + + if request.headers.hasKey(transferEncoding): + for encoding in seq[string](request.headers[transferEncoding]): + if "chunked" == encoding.strip: + # Returns true if it is both an HttpPost and has chunked encoding + return request.reqMethod == HttpPost + return false + +proc processRequest( + server: AsyncHttpServer, + req: FutureVar[Request], + client: AsyncSocket, + address: sink string, + lineFut: FutureVar[string], + callback: proc (request: Request): Future[void] {.closure, gcsafe.}, +): Future[bool] {.async.} = + + # Alias `request` to `req.mget()` so we don't have to write `mget` everywhere. + template request(): Request = + req.mget() + + # GET /path HTTP/1.1 + # Header: val + # \n + request.headers.clear() + request.body = "" + when defined(gcArc) or defined(gcOrc) or defined(gcAtomicArc): request.hostname = address - assert client != nil - request.client = client + else: + request.hostname.shallowCopy(address) + assert client != nil + request.client = client - # First line - GET /path HTTP/1.1 - let line = await client.recvLine() # TODO: Timeouts. - if line == "": + # We should skip at least one empty line before the request + # https://tools.ietf.org/html/rfc7230#section-3.5 + for i in 0..1: + lineFut.mget().setLen(0) + lineFut.clean() + await client.recvLineInto(lineFut, maxLength = maxLine) # TODO: Timeouts. + + if lineFut.mget == "": client.close() - return - let lineParts = line.split(' ') - if lineParts.len != 3: - await request.respond(Http400, "Invalid request. Got: " & line) - continue - - let reqMethod = lineParts[0] - let path = lineParts[1] - let protocol = lineParts[2] - - # Headers - var i = 0 - while true: - i = 0 - let headerLine = await client.recvLine() - if headerLine == "": - client.close(); return - if headerLine == "\c\L": break - # TODO: Compiler crash - #let (key, value) = parseHeader(headerLine) - let kv = parseHeader(headerLine) - request.headers[kv.key] = kv.value - - request.reqMethod = reqMethod - request.url = parseUri(path) - try: - request.protocol = protocol.parseProtocol() - except ValueError: - asyncCheck request.respond(Http400, "Invalid request protocol. Got: " & - protocol) - continue - - if reqMethod.normalize == "post": - # Check for Expect header - if request.headers.hasKey("Expect"): - if request.headers["Expect"].toLower == "100-continue": - await client.sendStatus("100 Continue") - else: - await client.sendStatus("417 Expectation Failed") - - # Read the body - # - Check for Content-length header - if request.headers.hasKey("Content-Length"): - var contentLength = 0 - if parseInt(request.headers["Content-Length"], contentLength) == 0: - await request.respond(Http400, "Bad Request. Invalid Content-Length.") - else: - request.body = await client.recv(contentLength) - assert request.body.len == contentLength - else: - await request.respond(Http400, "Bad Request. No Content-Length.") - continue + return false - case reqMethod.normalize - of "get", "post", "head", "put", "delete", "trace", "options", "connect", "patch": - await callback(request) - else: - await request.respond(Http400, "Invalid request method. Got: " & reqMethod) - - # Persistent connections - if (request.protocol == HttpVer11 and - request.headers["connection"].normalize != "close") or - (request.protocol == HttpVer10 and - request.headers["connection"].normalize == "keep-alive"): - # In HTTP 1.1 we assume that connection is persistent. Unless connection - # header states otherwise. - # In HTTP 1.0 we assume that the connection should not be persistent. - # Unless the connection header states otherwise. - discard + if lineFut.mget.len > maxLine: + await request.respondError(Http413) + client.close() + return false + if lineFut.mget != "\c\L": + break + + # First line - GET /path HTTP/1.1 + var i = 0 + for linePart in lineFut.mget.split(' '): + case i + of 0: + case linePart + of "GET": request.reqMethod = HttpGet + of "POST": request.reqMethod = HttpPost + of "HEAD": request.reqMethod = HttpHead + of "PUT": request.reqMethod = HttpPut + of "DELETE": request.reqMethod = HttpDelete + of "PATCH": request.reqMethod = HttpPatch + of "OPTIONS": request.reqMethod = HttpOptions + of "CONNECT": request.reqMethod = HttpConnect + of "TRACE": request.reqMethod = HttpTrace + else: + asyncCheck request.respondError(Http400) + return true # Retry processing of request + of 1: + try: + parseUri(linePart, request.url) + except ValueError: + asyncCheck request.respondError(Http400) + return true + of 2: + try: + request.protocol = parseProtocol(linePart) + except ValueError: + asyncCheck request.respondError(Http400) + return true else: + await request.respondError(Http400) + return true + inc i + + # Headers + while true: + i = 0 + lineFut.mget.setLen(0) + lineFut.clean() + await client.recvLineInto(lineFut, maxLength = maxLine) + + if lineFut.mget == "": + client.close(); return false + if lineFut.mget.len > maxLine: + await request.respondError(Http413) + client.close(); return false + if lineFut.mget == "\c\L": break + let (key, value) = parseHeader(lineFut.mget) + request.headers[key] = value + # Ensure the client isn't trying to DoS us. + if request.headers.len > headerLimit: + await client.sendStatus("400 Bad Request") request.client.close() + return false + + if request.reqMethod == HttpPost: + # Check for Expect header + if request.headers.hasKey("Expect"): + if "100-continue" in request.headers["Expect"]: + await client.sendStatus("100 Continue") + else: + await client.sendStatus("417 Expectation Failed") + + # Read the body + # - Check for Content-length header + if request.headers.hasKey("Content-Length"): + var contentLength = 0 + if parseSaturatedNatural(request.headers["Content-Length"], contentLength) == 0: + await request.respond(Http400, "Bad Request. Invalid Content-Length.") + return true + else: + if contentLength > server.maxBody: + await request.respondError(Http413) + return false + request.body = await client.recv(contentLength) + if request.body.len != contentLength: + await request.respond(Http400, "Bad Request. Content-Length does not match actual.") + return true + elif hasChunkedEncoding(request): + # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Transfer-Encoding + var sizeOrData = 0 + var bytesToRead = 0 + request.body = "" + + while true: + lineFut.mget.setLen(0) + lineFut.clean() + + # The encoding format alternates between specifying a number of bytes to read + # and the data to be read, of the previously specified size + if sizeOrData mod 2 == 0: + # Expect a number of chars to read + await client.recvLineInto(lineFut, maxLength = maxLine) + try: + bytesToRead = lineFut.mget.parseHexInt + except ValueError: + # Malformed request + await request.respond(Http411, ("Invalid chunked transfer encoding - " & + "chunk data size must be hex encoded")) + return true + else: + if bytesToRead == 0: + # Done reading chunked data + break + + # Read bytesToRead and add to body + let chunk = await client.recv(bytesToRead) + request.body.add(chunk) + # Skip \r\n (chunk terminating bytes per spec) + let separator = await client.recv(2) + if separator != "\r\n": + await request.respond(Http400, "Bad Request. Encoding separator must be \\r\\n") + return true + + inc sizeOrData + elif request.reqMethod == HttpPost: + await request.respond(Http411, "Content-Length required.") + return true + + # Call the user's callback. + await callback(request) + + if "upgrade" in request.headers.getOrDefault("connection"): + return false + + # The request has been served, from this point on returning `true` means the + # connection will not be closed and will be kept in the connection pool. + + # Persistent connections + if (request.protocol == HttpVer11 and + cmpIgnoreCase(request.headers.getOrDefault("connection"), "close") != 0) or + (request.protocol == HttpVer10 and + cmpIgnoreCase(request.headers.getOrDefault("connection"), "keep-alive") == 0): + # In HTTP 1.1 we assume that connection is persistent. Unless connection + # header states otherwise. + # In HTTP 1.0 we assume that the connection should not be persistent. + # Unless the connection header states otherwise. + return true + else: + request.client.close() + return false + +proc processClient(server: AsyncHttpServer, client: AsyncSocket, address: string, + callback: proc (request: Request): + Future[void] {.closure, gcsafe.}) {.async.} = + var request = newFutureVar[Request]("asynchttpserver.processClient") + request.mget().url = initUri() + request.mget().headers = newHttpHeaders() + var lineFut = newFutureVar[string]("asynchttpserver.processClient") + lineFut.mget() = newStringOfCap(80) + + while not client.isClosed: + let retry = await processRequest( + server, request, client, address, lineFut, callback + ) + if not retry: + client.close() break +const + nimMaxDescriptorsFallback* {.intdefine.} = 16_000 ## fallback value for \ + ## when `maxDescriptors` is not available. + ## This can be set on the command line during compilation + ## via `-d:nimMaxDescriptorsFallback=N` + +proc listen*(server: AsyncHttpServer; port: Port; address = ""; domain = AF_INET) = + ## Listen to the given port and address. + when declared(maxDescriptors): + server.maxFDs = try: maxDescriptors() except: nimMaxDescriptorsFallback + else: + server.maxFDs = nimMaxDescriptorsFallback + server.socket = newAsyncSocket(domain) + if server.reuseAddr: + server.socket.setSockOpt(OptReuseAddr, true) + when not defined(nuttx): + if server.reusePort: + server.socket.setSockOpt(OptReusePort, true) + server.socket.bindAddr(port, address) + server.socket.listen() + +proc shouldAcceptRequest*(server: AsyncHttpServer; + assumedDescriptorsPerRequest = 5): bool {.inline.} = + ## Returns true if the process's current number of opened file + ## descriptors is still within the maximum limit and so it's reasonable to + ## accept yet another request. + result = assumedDescriptorsPerRequest < 0 or + (activeDescriptors() + assumedDescriptorsPerRequest < server.maxFDs) + +proc acceptRequest*(server: AsyncHttpServer, + callback: proc (request: Request): Future[void] {.closure, gcsafe.}) {.async.} = + ## Accepts a single request. Write an explicit loop around this proc so that + ## errors can be handled properly. + var (address, client) = await server.socket.acceptAddr() + asyncCheck processClient(server, client, address, callback) + proc serve*(server: AsyncHttpServer, port: Port, - callback: proc (request: Request): Future[void] {.closure,gcsafe.}, - address = "") {.async.} = + callback: proc (request: Request): Future[void] {.closure, gcsafe.}, + address = ""; + assumedDescriptorsPerRequest = -1; + domain = AF_INET) {.async.} = ## Starts the process of listening for incoming HTTP connections on the ## specified address and port. ## ## When a request is made by a client the specified callback will be called. - server.socket = newAsyncSocket() - if server.reuseAddr: - server.socket.setSockOpt(OptReuseAddr, true) - server.socket.bindAddr(port, address) - server.socket.listen() - + ## + ## If `assumedDescriptorsPerRequest` is 0 or greater the server cares about + ## the process's maximum file descriptor limit. It then ensures that the + ## process still has the resources for `assumedDescriptorsPerRequest` + ## file descriptors before accepting a connection. + ## + ## You should prefer to call `acceptRequest` instead with a custom server + ## loop so that you're in control over the error handling and logging. + listen server, port, address, domain while true: - # TODO: Causes compiler crash. - #var (address, client) = await server.socket.acceptAddr() - var fut = await server.socket.acceptAddr() - asyncCheck processClient(fut.client, fut.address, callback) + if shouldAcceptRequest(server, assumedDescriptorsPerRequest): + var (address, client) = await server.socket.acceptAddr() + asyncCheck processClient(server, client, address, callback) + else: + poll() #echo(f.isNil) #echo(f.repr) proc close*(server: AsyncHttpServer) = ## Terminates the async http server instance. server.socket.close() - -when isMainModule: - proc main = - var server = newAsyncHttpServer() - proc cb(req: Request) {.async.} = - #echo(req.reqMethod, " ", req.url) - #echo(req.headers) - let headers = {"Date": "Tue, 29 Apr 2014 23:40:08 GMT", - "Content-type": "text/plain; charset=utf-8"} - await req.respond(Http200, "Hello World", headers.newStringTable()) - - asyncCheck server.serve(Port(5555), cb) - runForever() - main() diff --git a/lib/pure/asyncio.nim b/lib/pure/asyncio.nim deleted file mode 100644 index f58bb4302..000000000 --- a/lib/pure/asyncio.nim +++ /dev/null @@ -1,712 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2012 Andreas Rumpf, Dominik Picheta -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -include "system/inclrtl" - -import sockets, os - -## -## **Warning:** This module is deprecated since version 0.10.2. -## Use the brand new `asyncdispatch <asyncdispatch.html>`_ module together -## with the `asyncnet <asyncnet.html>`_ module. - -## This module implements an asynchronous event loop together with asynchronous -## sockets which use this event loop. -## It is akin to Python's asyncore module. Many modules that use sockets -## have an implementation for this module, those modules should all have a -## ``register`` function which you should use to add the desired objects to a -## dispatcher which you created so -## that you can receive the events associated with that module's object. -## -## Once everything is registered in a dispatcher, you need to call the ``poll`` -## function in a while loop. -## -## **Note:** Most modules have tasks which need to be ran regularly, this is -## why you should not call ``poll`` with a infinite timeout, or even a -## very long one. In most cases the default timeout is fine. -## -## **Note:** This module currently only supports select(), this is limited by -## FD_SETSIZE, which is usually 1024. So you may only be able to use 1024 -## sockets at a time. -## -## Most (if not all) modules that use asyncio provide a userArg which is passed -## on with the events. The type that you set userArg to must be inheriting from -## ``RootObj``! -## -## **Note:** If you want to provide async ability to your module please do not -## use the ``Delegate`` object, instead use ``AsyncSocket``. It is possible -## that in the future this type's fields will not be exported therefore breaking -## your code. -## -## **Warning:** The API of this module is unstable, and therefore is subject -## to change. -## -## Asynchronous sockets -## ==================== -## -## For most purposes you do not need to worry about the ``Delegate`` type. The -## ``AsyncSocket`` is what you are after. It's a reference to -## the ``AsyncSocketObj`` object. This object defines events which you should -## overwrite by your own procedures. -## -## For server sockets the only event you need to worry about is the ``handleAccept`` -## event, in your handleAccept proc you should call ``accept`` on the server -## socket which will give you the client which is connecting. You should then -## set any events that you want to use on that client and add it to your dispatcher -## using the ``register`` procedure. -## -## An example ``handleAccept`` follows: -## -## .. code-block:: nim -## -## var disp = newDispatcher() -## ... -## proc handleAccept(s: AsyncSocket) = -## echo("Accepted client.") -## var client: AsyncSocket -## new(client) -## s.accept(client) -## client.handleRead = ... -## disp.register(client) -## ... -## -## For client sockets you should only be interested in the ``handleRead`` and -## ``handleConnect`` events. The former gets called whenever the socket has -## received messages and can be read from and the latter gets called whenever -## the socket has established a connection to a server socket; from that point -## it can be safely written to. -## -## Getting a blocking client from an AsyncSocket -## ============================================= -## -## If you need a asynchronous server socket but you wish to process the clients -## synchronously then you can use the ``getSocket`` converter to get -## a ``Socket`` from the ``AsyncSocket`` object, this can then be combined -## with ``accept`` like so: -## -## .. code-block:: nim -## -## proc handleAccept(s: AsyncSocket) = -## var client: Socket -## getSocket(s).accept(client) - -{.deprecated.} - -when defined(windows): - from winlean import TimeVal, SocketHandle, FD_SET, FD_ZERO, TFdSet, - FD_ISSET, select -else: - from posix import TimeVal, SocketHandle, FD_SET, FD_ZERO, TFdSet, - FD_ISSET, select - -type - DelegateObj* = object - fd*: SocketHandle - deleVal*: RootRef - - handleRead*: proc (h: RootRef) {.nimcall, gcsafe.} - handleWrite*: proc (h: RootRef) {.nimcall, gcsafe.} - handleError*: proc (h: RootRef) {.nimcall, gcsafe.} - hasDataBuffered*: proc (h: RootRef): bool {.nimcall, gcsafe.} - - open*: bool - task*: proc (h: RootRef) {.nimcall, gcsafe.} - mode*: FileMode - - Delegate* = ref DelegateObj - - Dispatcher* = ref DispatcherObj - DispatcherObj = object - delegates: seq[Delegate] - - AsyncSocket* = ref AsyncSocketObj - AsyncSocketObj* = object of RootObj - socket: Socket - info: SocketStatus - - handleRead*: proc (s: AsyncSocket) {.closure, gcsafe.} - handleWrite: proc (s: AsyncSocket) {.closure, gcsafe.} - handleConnect*: proc (s: AsyncSocket) {.closure, gcsafe.} - - handleAccept*: proc (s: AsyncSocket) {.closure, gcsafe.} - - handleTask*: proc (s: AsyncSocket) {.closure, gcsafe.} - - lineBuffer: TaintedString ## Temporary storage for ``readLine`` - sendBuffer: string ## Temporary storage for ``send`` - sslNeedAccept: bool - proto: Protocol - deleg: Delegate - - SocketStatus* = enum - SockIdle, SockConnecting, SockConnected, SockListening, SockClosed, - SockUDPBound - -{.deprecated: [TDelegate: DelegateObj, PDelegate: Delegate, - TInfo: SocketStatus, PAsyncSocket: AsyncSocket, TAsyncSocket: AsyncSocketObj, - TDispatcher: DispatcherObj, PDispatcher: Dispatcher, - ].} - - -proc newDelegate*(): Delegate = - ## Creates a new delegate. - new(result) - result.handleRead = (proc (h: RootRef) = discard) - result.handleWrite = (proc (h: RootRef) = discard) - result.handleError = (proc (h: RootRef) = discard) - result.hasDataBuffered = (proc (h: RootRef): bool = return false) - result.task = (proc (h: RootRef) = discard) - result.mode = fmRead - -proc newAsyncSocket(): AsyncSocket = - new(result) - result.info = SockIdle - - result.handleRead = (proc (s: AsyncSocket) = discard) - result.handleWrite = nil - result.handleConnect = (proc (s: AsyncSocket) = discard) - result.handleAccept = (proc (s: AsyncSocket) = discard) - result.handleTask = (proc (s: AsyncSocket) = discard) - - result.lineBuffer = "".TaintedString - result.sendBuffer = "" - -proc asyncSocket*(domain: Domain = AF_INET, typ: SockType = SOCK_STREAM, - protocol: Protocol = IPPROTO_TCP, - buffered = true): AsyncSocket = - ## Initialises an AsyncSocket object. If a socket cannot be initialised - ## EOS is raised. - result = newAsyncSocket() - result.socket = socket(domain, typ, protocol, buffered) - result.proto = protocol - if result.socket == invalidSocket: raiseOSError(osLastError()) - result.socket.setBlocking(false) - -proc toAsyncSocket*(sock: Socket, state: SocketStatus = SockConnected): AsyncSocket = - ## Wraps an already initialized ``TSocket`` into a AsyncSocket. - ## This is useful if you want to use an already connected TSocket as an - ## asynchronous AsyncSocket in asyncio's event loop. - ## - ## ``state`` may be overriden, i.e. if ``sock`` is not connected it should be - ## adjusted properly. By default it will be assumed that the socket is - ## connected. Please note this is only applicable to TCP client sockets, if - ## ``sock`` is a different type of socket ``state`` needs to be adjusted!!! - ## - ## ================ ================================================================ - ## Value Meaning - ## ================ ================================================================ - ## SockIdle Socket has only just been initialised, not connected or closed. - ## SockConnected Socket is connected to a server. - ## SockConnecting Socket is in the process of connecting to a server. - ## SockListening Socket is a server socket and is listening for connections. - ## SockClosed Socket has been closed. - ## SockUDPBound Socket is a UDP socket which is listening for data. - ## ================ ================================================================ - ## - ## **Warning**: If ``state`` is set incorrectly the resulting ``AsyncSocket`` - ## object may not work properly. - ## - ## **Note**: This will set ``sock`` to be non-blocking. - result = newAsyncSocket() - result.socket = sock - result.proto = if state == SockUDPBound: IPPROTO_UDP else: IPPROTO_TCP - result.socket.setBlocking(false) - result.info = state - -proc asyncSockHandleRead(h: RootRef) = - when defined(ssl): - if AsyncSocket(h).socket.isSSL and not - AsyncSocket(h).socket.gotHandshake: - return - - if AsyncSocket(h).info != SockListening: - if AsyncSocket(h).info != SockConnecting: - AsyncSocket(h).handleRead(AsyncSocket(h)) - else: - AsyncSocket(h).handleAccept(AsyncSocket(h)) - -proc close*(sock: AsyncSocket) {.gcsafe.} -proc asyncSockHandleWrite(h: RootRef) = - when defined(ssl): - if AsyncSocket(h).socket.isSSL and not - AsyncSocket(h).socket.gotHandshake: - return - - if AsyncSocket(h).info == SockConnecting: - AsyncSocket(h).handleConnect(AsyncSocket(h)) - AsyncSocket(h).info = SockConnected - # Stop receiving write events if there is no handleWrite event. - if AsyncSocket(h).handleWrite == nil: - AsyncSocket(h).deleg.mode = fmRead - else: - AsyncSocket(h).deleg.mode = fmReadWrite - else: - if AsyncSocket(h).sendBuffer != "": - let sock = AsyncSocket(h) - try: - let bytesSent = sock.socket.sendAsync(sock.sendBuffer) - if bytesSent == 0: - # Apparently the socket cannot be written to. Even though select - # just told us that it can be... This used to be an assert. Just - # do nothing instead. - discard - elif bytesSent != sock.sendBuffer.len: - sock.sendBuffer = sock.sendBuffer[bytesSent .. ^1] - elif bytesSent == sock.sendBuffer.len: - sock.sendBuffer = "" - - if AsyncSocket(h).handleWrite != nil: - AsyncSocket(h).handleWrite(AsyncSocket(h)) - except OSError: - # Most likely the socket closed before the full buffer could be sent to it. - sock.close() # TODO: Provide a handleError for users? - else: - if AsyncSocket(h).handleWrite != nil: - AsyncSocket(h).handleWrite(AsyncSocket(h)) - else: - AsyncSocket(h).deleg.mode = fmRead - -when defined(ssl): - proc asyncSockDoHandshake(h: PObject) {.gcsafe.} = - if AsyncSocket(h).socket.isSSL and not - AsyncSocket(h).socket.gotHandshake: - if AsyncSocket(h).sslNeedAccept: - var d = "" - let ret = AsyncSocket(h).socket.acceptAddrSSL(AsyncSocket(h).socket, d) - assert ret != AcceptNoClient - if ret == AcceptSuccess: - AsyncSocket(h).info = SockConnected - else: - # handshake will set socket's ``sslNoHandshake`` field. - discard AsyncSocket(h).socket.handshake() - - -proc asyncSockTask(h: RootRef) = - when defined(ssl): - h.asyncSockDoHandshake() - - AsyncSocket(h).handleTask(AsyncSocket(h)) - -proc toDelegate(sock: AsyncSocket): Delegate = - result = newDelegate() - result.deleVal = sock - result.fd = getFD(sock.socket) - # We need this to get write events, just to know when the socket connects. - result.mode = fmReadWrite - result.handleRead = asyncSockHandleRead - result.handleWrite = asyncSockHandleWrite - result.task = asyncSockTask - # TODO: Errors? - #result.handleError = (proc (h: PObject) = assert(false)) - - result.hasDataBuffered = - proc (h: RootRef): bool {.nimcall.} = - return AsyncSocket(h).socket.hasDataBuffered() - - sock.deleg = result - if sock.info notin {SockIdle, SockClosed}: - sock.deleg.open = true - else: - sock.deleg.open = false - -proc connect*(sock: AsyncSocket, name: string, port = Port(0), - af: Domain = AF_INET) = - ## Begins connecting ``sock`` to ``name``:``port``. - sock.socket.connectAsync(name, port, af) - sock.info = SockConnecting - if sock.deleg != nil: - sock.deleg.open = true - -proc close*(sock: AsyncSocket) = - ## Closes ``sock``. Terminates any current connections. - sock.socket.close() - sock.info = SockClosed - if sock.deleg != nil: - sock.deleg.open = false - -proc bindAddr*(sock: AsyncSocket, port = Port(0), address = "") = - ## Equivalent to ``sockets.bindAddr``. - sock.socket.bindAddr(port, address) - if sock.proto == IPPROTO_UDP: - sock.info = SockUDPBound - if sock.deleg != nil: - sock.deleg.open = true - -proc listen*(sock: AsyncSocket) = - ## Equivalent to ``sockets.listen``. - sock.socket.listen() - sock.info = SockListening - if sock.deleg != nil: - sock.deleg.open = true - -proc acceptAddr*(server: AsyncSocket, client: var AsyncSocket, - address: var string) = - ## Equivalent to ``sockets.acceptAddr``. This procedure should be called in - ## a ``handleAccept`` event handler **only** once. - ## - ## **Note**: ``client`` needs to be initialised. - assert(client != nil) - client = newAsyncSocket() - var c: Socket - new(c) - when defined(ssl): - if server.socket.isSSL: - var ret = server.socket.acceptAddrSSL(c, address) - # The following shouldn't happen because when this function is called - # it is guaranteed that there is a client waiting. - # (This should be called in handleAccept) - assert(ret != AcceptNoClient) - if ret == AcceptNoHandshake: - client.sslNeedAccept = true - else: - client.sslNeedAccept = false - client.info = SockConnected - else: - server.socket.acceptAddr(c, address) - client.sslNeedAccept = false - client.info = SockConnected - else: - server.socket.acceptAddr(c, address) - client.sslNeedAccept = false - client.info = SockConnected - - if c == invalidSocket: raiseSocketError(server.socket) - c.setBlocking(false) # TODO: Needs to be tested. - - # deleg.open is set in ``toDelegate``. - - client.socket = c - client.lineBuffer = "".TaintedString - client.sendBuffer = "" - client.info = SockConnected - -proc accept*(server: AsyncSocket, client: var AsyncSocket) = - ## Equivalent to ``sockets.accept``. - var dummyAddr = "" - server.acceptAddr(client, dummyAddr) - -proc acceptAddr*(server: AsyncSocket): tuple[sock: AsyncSocket, - address: string] {.deprecated.} = - ## Equivalent to ``sockets.acceptAddr``. - ## - ## **Deprecated since version 0.9.0:** Please use the function above. - var client = newAsyncSocket() - var address: string = "" - acceptAddr(server, client, address) - return (client, address) - -proc accept*(server: AsyncSocket): AsyncSocket {.deprecated.} = - ## Equivalent to ``sockets.accept``. - ## - ## **Deprecated since version 0.9.0:** Please use the function above. - new(result) - var address = "" - server.acceptAddr(result, address) - -proc newDispatcher*(): Dispatcher = - new(result) - result.delegates = @[] - -proc register*(d: Dispatcher, deleg: Delegate) = - ## Registers delegate ``deleg`` with dispatcher ``d``. - d.delegates.add(deleg) - -proc register*(d: Dispatcher, sock: AsyncSocket): Delegate {.discardable.} = - ## Registers async socket ``sock`` with dispatcher ``d``. - result = sock.toDelegate() - d.register(result) - -proc unregister*(d: Dispatcher, deleg: Delegate) = - ## Unregisters deleg ``deleg`` from dispatcher ``d``. - for i in 0..len(d.delegates)-1: - if d.delegates[i] == deleg: - d.delegates.del(i) - return - raise newException(IndexError, "Could not find delegate.") - -proc isWriteable*(s: AsyncSocket): bool = - ## Determines whether socket ``s`` is ready to be written to. - var writeSock = @[s.socket] - return selectWrite(writeSock, 1) != 0 and s.socket notin writeSock - -converter getSocket*(s: AsyncSocket): Socket = - return s.socket - -proc isConnected*(s: AsyncSocket): bool = - ## Determines whether ``s`` is connected. - return s.info == SockConnected -proc isListening*(s: AsyncSocket): bool = - ## Determines whether ``s`` is listening for incoming connections. - return s.info == SockListening -proc isConnecting*(s: AsyncSocket): bool = - ## Determines whether ``s`` is connecting. - return s.info == SockConnecting -proc isClosed*(s: AsyncSocket): bool = - ## Determines whether ``s`` has been closed. - return s.info == SockClosed -proc isSendDataBuffered*(s: AsyncSocket): bool = - ## Determines whether ``s`` has data waiting to be sent, i.e. whether this - ## socket's sendBuffer contains data. - return s.sendBuffer.len != 0 - -proc setHandleWrite*(s: AsyncSocket, - handleWrite: proc (s: AsyncSocket) {.closure, gcsafe.}) = - ## Setter for the ``handleWrite`` event. - ## - ## To remove this event you should use the ``delHandleWrite`` function. - ## It is advised to use that function instead of just setting the event to - ## ``proc (s: AsyncSocket) = nil`` as that would mean that that function - ## would be called constantly. - s.deleg.mode = fmReadWrite - s.handleWrite = handleWrite - -proc delHandleWrite*(s: AsyncSocket) = - ## Removes the ``handleWrite`` event handler on ``s``. - s.handleWrite = nil - -{.push warning[deprecated]: off.} -proc recvLine*(s: AsyncSocket, line: var TaintedString): bool {.deprecated.} = - ## Behaves similar to ``sockets.recvLine``, however it handles non-blocking - ## sockets properly. This function guarantees that ``line`` is a full line, - ## if this function can only retrieve some data; it will save this data and - ## add it to the result when a full line is retrieved. - ## - ## Unlike ``sockets.recvLine`` this function will raise an EOS or ESSL - ## exception if an error occurs. - ## - ## **Deprecated since version 0.9.2**: This function has been deprecated in - ## favour of readLine. - setLen(line.string, 0) - var dataReceived = "".TaintedString - var ret = s.socket.recvLineAsync(dataReceived) - case ret - of RecvFullLine: - if s.lineBuffer.len > 0: - string(line).add(s.lineBuffer.string) - setLen(s.lineBuffer.string, 0) - string(line).add(dataReceived.string) - if string(line) == "": - line = "\c\L".TaintedString - result = true - of RecvPartialLine: - string(s.lineBuffer).add(dataReceived.string) - result = false - of RecvDisconnected: - result = true - of RecvFail: - s.raiseSocketError(async = true) - result = false -{.pop.} - -proc readLine*(s: AsyncSocket, line: var TaintedString): bool = - ## Behaves similar to ``sockets.readLine``, however it handles non-blocking - ## sockets properly. This function guarantees that ``line`` is a full line, - ## if this function can only retrieve some data; it will save this data and - ## add it to the result when a full line is retrieved, when this happens - ## False will be returned. True will only be returned if a full line has been - ## retrieved or the socket has been disconnected in which case ``line`` will - ## be set to "". - ## - ## This function will raise an EOS exception when a socket error occurs. - setLen(line.string, 0) - var dataReceived = "".TaintedString - var ret = s.socket.readLineAsync(dataReceived) - case ret - of ReadFullLine: - if s.lineBuffer.len > 0: - string(line).add(s.lineBuffer.string) - setLen(s.lineBuffer.string, 0) - string(line).add(dataReceived.string) - if string(line) == "": - line = "\c\L".TaintedString - result = true - of ReadPartialLine: - string(s.lineBuffer).add(dataReceived.string) - result = false - of ReadNone: - result = false - of ReadDisconnected: - result = true - -proc send*(sock: AsyncSocket, data: string) = - ## Sends ``data`` to socket ``sock``. This is basically a nicer implementation - ## of ``sockets.sendAsync``. - ## - ## If ``data`` cannot be sent immediately it will be buffered and sent - ## when ``sock`` becomes writeable (during the ``handleWrite`` event). - ## It's possible that only a part of ``data`` will be sent immediately, while - ## the rest of it will be buffered and sent later. - if sock.sendBuffer.len != 0: - sock.sendBuffer.add(data) - return - let bytesSent = sock.socket.sendAsync(data) - assert bytesSent >= 0 - if bytesSent == 0: - sock.sendBuffer.add(data) - sock.deleg.mode = fmReadWrite - elif bytesSent != data.len: - sock.sendBuffer.add(data[bytesSent .. ^1]) - sock.deleg.mode = fmReadWrite - -proc timeValFromMilliseconds(timeout = 500): Timeval = - if timeout != -1: - var seconds = timeout div 1000 - result.tv_sec = seconds.int32 - result.tv_usec = ((timeout - seconds * 1000) * 1000).int32 - -proc createFdSet(fd: var TFdSet, s: seq[Delegate], m: var int) = - FD_ZERO(fd) - for i in items(s): - m = max(m, int(i.fd)) - FD_SET(i.fd, fd) - -proc pruneSocketSet(s: var seq[Delegate], fd: var TFdSet) = - var i = 0 - var L = s.len - while i < L: - if FD_ISSET(s[i].fd, fd) != 0'i32: - s[i] = s[L-1] - dec(L) - else: - inc(i) - setLen(s, L) - -proc select(readfds, writefds, exceptfds: var seq[Delegate], - timeout = 500): int = - var tv {.noInit.}: Timeval = timeValFromMilliseconds(timeout) - - var rd, wr, ex: TFdSet - var m = 0 - createFdSet(rd, readfds, m) - createFdSet(wr, writefds, m) - createFdSet(ex, exceptfds, m) - - if timeout != -1: - result = int(select(cint(m+1), addr(rd), addr(wr), addr(ex), addr(tv))) - else: - result = int(select(cint(m+1), addr(rd), addr(wr), addr(ex), nil)) - - pruneSocketSet(readfds, (rd)) - pruneSocketSet(writefds, (wr)) - pruneSocketSet(exceptfds, (ex)) - -proc poll*(d: Dispatcher, timeout: int = 500): bool = - ## This function checks for events on all the delegates in the `PDispatcher`. - ## It then proceeds to call the correct event handler. - ## - ## This function returns ``True`` if there are file descriptors that are still - ## open, otherwise ``False``. File descriptors that have been - ## closed are immediately removed from the dispatcher automatically. - ## - ## **Note:** Each delegate has a task associated with it. This gets called - ## after each select() call, if you set timeout to ``-1`` the tasks will - ## only be executed after one or more file descriptors becomes readable or - ## writeable. - result = true - var readDg, writeDg, errorDg: seq[Delegate] = @[] - var len = d.delegates.len - var dc = 0 - - while dc < len: - let deleg = d.delegates[dc] - if (deleg.mode != fmWrite or deleg.mode != fmAppend) and deleg.open: - readDg.add(deleg) - if (deleg.mode != fmRead) and deleg.open: - writeDg.add(deleg) - if deleg.open: - errorDg.add(deleg) - inc dc - else: - # File/socket has been closed. Remove it from dispatcher. - d.delegates[dc] = d.delegates[len-1] - dec len - - d.delegates.setLen(len) - - var hasDataBufferedCount = 0 - for d in d.delegates: - if d.hasDataBuffered(d.deleVal): - hasDataBufferedCount.inc() - d.handleRead(d.deleVal) - if hasDataBufferedCount > 0: return true - - if readDg.len() == 0 and writeDg.len() == 0: - ## TODO: Perhaps this shouldn't return if errorDg has something? - return false - - if select(readDg, writeDg, errorDg, timeout) != 0: - for i in 0..len(d.delegates)-1: - if i > len(d.delegates)-1: break # One delegate might've been removed. - let deleg = d.delegates[i] - if not deleg.open: continue # This delegate might've been closed. - if (deleg.mode != fmWrite or deleg.mode != fmAppend) and - deleg notin readDg: - deleg.handleRead(deleg.deleVal) - if (deleg.mode != fmRead) and deleg notin writeDg: - deleg.handleWrite(deleg.deleVal) - if deleg notin errorDg: - deleg.handleError(deleg.deleVal) - - # Execute tasks - for i in items(d.delegates): - i.task(i.deleVal) - -proc len*(disp: Dispatcher): int = - ## Retrieves the amount of delegates in ``disp``. - return disp.delegates.len - -when isMainModule: - - proc testConnect(s: AsyncSocket, no: int) = - echo("Connected! " & $no) - - proc testRead(s: AsyncSocket, no: int) = - echo("Reading! " & $no) - var data = "" - if not s.readLine(data): return - if data == "": - echo("Closing connection. " & $no) - s.close() - echo(data) - echo("Finished reading! " & $no) - - proc testAccept(s: AsyncSocket, disp: Dispatcher, no: int) = - echo("Accepting client! " & $no) - var client: AsyncSocket - new(client) - var address = "" - s.acceptAddr(client, address) - echo("Accepted ", address) - client.handleRead = - proc (s: AsyncSocket) = - testRead(s, 2) - disp.register(client) - - proc main = - var d = newDispatcher() - - var s = asyncSocket() - s.connect("amber.tenthbit.net", Port(6667)) - s.handleConnect = - proc (s: AsyncSocket) = - testConnect(s, 1) - s.handleRead = - proc (s: AsyncSocket) = - testRead(s, 1) - d.register(s) - - var server = asyncSocket() - server.handleAccept = - proc (s: AsyncSocket) = - testAccept(s, d, 78) - server.bindAddr(Port(5555)) - server.listen() - d.register(server) - - while d.poll(-1): discard - main() diff --git a/lib/pure/asyncmacro.nim b/lib/pure/asyncmacro.nim new file mode 100644 index 000000000..d4e72c28a --- /dev/null +++ b/lib/pure/asyncmacro.nim @@ -0,0 +1,383 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2015 Dominik Picheta +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Implements the `async` and `multisync` macros for `asyncdispatch`. + +import std/[macros, strutils, asyncfutures] + +type + Context = ref object + inTry: int + hasRet: bool + +# TODO: Ref https://github.com/nim-lang/Nim/issues/5617 +# TODO: Add more line infos +proc newCallWithLineInfo(fromNode: NimNode; theProc: NimNode, args: varargs[NimNode]): NimNode = + result = newCall(theProc, args) + result.copyLineInfo(fromNode) + +template createCb(retFutureSym, iteratorNameSym, + strName, identName, futureVarCompletions: untyped) = + bind finished + var nameIterVar = iteratorNameSym + proc identName {.closure, stackTrace: off.} = + try: + if not nameIterVar.finished: + var next = nameIterVar() + # Continue while the yielded future is already finished. + while (not next.isNil) and next.finished: + next = nameIterVar() + if nameIterVar.finished: + break + + if next == nil: + if not retFutureSym.finished: + let msg = "Async procedure ($1) yielded `nil`, are you await'ing a `nil` Future?" + raise newException(AssertionDefect, msg % strName) + else: + {.gcsafe.}: + next.addCallback cast[proc() {.closure, gcsafe.}](identName) + except: + futureVarCompletions + if retFutureSym.finished: + # Take a look at tasyncexceptions for the bug which this fixes. + # That test explains it better than I can here. + raise + else: + retFutureSym.fail(getCurrentException()) + identName() + +proc createFutureVarCompletions(futureVarIdents: seq[NimNode], fromNode: NimNode): NimNode = + result = newNimNode(nnkStmtList, fromNode) + # Add calls to complete each FutureVar parameter. + for ident in futureVarIdents: + # Only complete them if they have not been completed already by the user. + # In the meantime, this was really useful for debugging :) + #result.add(newCall(newIdentNode("echo"), newStrLitNode(fromNode.lineinfo))) + result.add newIfStmt( + ( + newCall(newIdentNode("not"), + newDotExpr(ident, newIdentNode("finished"))), + newCallWithLineInfo(fromNode, newIdentNode("complete"), ident) + ) + ) + +proc processBody(ctx: Context; node, needsCompletionSym, retFutureSym: NimNode, futureVarIdents: seq[NimNode]): NimNode = + result = node + case node.kind + of nnkReturnStmt: + result = newNimNode(nnkStmtList, node) + + # As I've painfully found out, the order here really DOES matter. + result.add createFutureVarCompletions(futureVarIdents, node) + + ctx.hasRet = true + if node[0].kind == nnkEmpty: + if ctx.inTry == 0: + result.add newCallWithLineInfo(node, newIdentNode("complete"), retFutureSym, newIdentNode("result")) + else: + result.add newAssignment(needsCompletionSym, newLit(true)) + else: + let x = processBody(ctx, node[0], needsCompletionSym, retFutureSym, futureVarIdents) + if x.kind == nnkYieldStmt: result.add x + elif ctx.inTry == 0: + result.add newCallWithLineInfo(node, newIdentNode("complete"), retFutureSym, x) + else: + result.add newAssignment(newIdentNode("result"), x) + result.add newAssignment(needsCompletionSym, newLit(true)) + + result.add newNimNode(nnkReturnStmt, node).add(newNilLit()) + return # Don't process the children of this return stmt + of RoutineNodes-{nnkTemplateDef}: + # skip all the nested procedure definitions + return + of nnkTryStmt: + if result[^1].kind == nnkFinally: + inc ctx.inTry + result[0] = processBody(ctx, result[0], needsCompletionSym, retFutureSym, futureVarIdents) + dec ctx.inTry + for i in 1 ..< result.len: + result[i] = processBody(ctx, result[i], needsCompletionSym, retFutureSym, futureVarIdents) + if ctx.inTry == 0 and ctx.hasRet: + let finallyNode = copyNimNode(result[^1]) + let stmtNode = newNimNode(nnkStmtList) + for child in result[^1]: + stmtNode.add child + stmtNode.add newIfStmt( + ( needsCompletionSym, + newCallWithLineInfo(node, newIdentNode("complete"), retFutureSym, + newIdentNode("result") + ) + ) + ) + finallyNode.add stmtNode + result[^1] = finallyNode + else: + for i in 0 ..< result.len: + result[i] = processBody(ctx, result[i], needsCompletionSym, retFutureSym, futureVarIdents) + else: + for i in 0 ..< result.len: + result[i] = processBody(ctx, result[i], needsCompletionSym, retFutureSym, futureVarIdents) + + # echo result.repr + +proc getName(node: NimNode): string = + case node.kind + of nnkPostfix: + return node[1].strVal + of nnkIdent, nnkSym: + return node.strVal + of nnkEmpty: + return "anonymous" + else: + error("Unknown name.", node) + +proc getFutureVarIdents(params: NimNode): seq[NimNode] = + result = @[] + for i in 1 ..< len(params): + expectKind(params[i], nnkIdentDefs) + if params[i][1].kind == nnkBracketExpr and + params[i][1][0].eqIdent(FutureVar.astToStr): + ## eqIdent: first char is case sensitive!!! + result.add(params[i][0]) + +proc isInvalidReturnType(typeName: string): bool = + return typeName notin ["Future"] #, "FutureStream"] + +proc verifyReturnType(typeName: string, node: NimNode = nil) = + if typeName.isInvalidReturnType: + error("Expected return type of 'Future' got '$1'" % + typeName, node) + +template await*(f: typed): untyped {.used.} = + static: + error "await expects Future[T], got " & $typeof(f) + +template await*[T](f: Future[T]): auto {.used.} = + when not defined(nimHasTemplateRedefinitionPragma): + {.pragma: redefine.} + template yieldFuture {.redefine.} = yield FutureBase() + + when compiles(yieldFuture): + var internalTmpFuture: FutureBase = f + yield internalTmpFuture + (cast[typeof(f)](internalTmpFuture)).read() + else: + macro errorAsync(futureError: Future[T]) = + error( + "Can only 'await' inside a proc marked as 'async'. Use " & + "'waitFor' when calling an 'async' proc in a non-async scope instead", + futureError) + errorAsync(f) + +proc asyncSingleProc(prc: NimNode): NimNode = + ## This macro transforms a single procedure into a closure iterator. + ## The `async` macro supports a stmtList holding multiple async procedures. + if prc.kind == nnkProcTy: + result = prc + if prc[0][0].kind == nnkEmpty: + result[0][0] = quote do: Future[void] + return result + + if prc.kind in RoutineNodes and prc.name.kind != nnkEmpty: + # Only non anonymous functions need/can have stack trace disabled + prc.addPragma(nnkExprColonExpr.newTree(ident"stackTrace", ident"off")) + + if prc.kind notin {nnkProcDef, nnkLambda, nnkMethodDef, nnkDo}: + error("Cannot transform this node kind into an async proc." & + " proc/method definition or lambda node expected.", prc) + + if prc[4].kind != nnkEmpty: + for prag in prc[4]: + if prag.eqIdent("discardable"): + error("Cannot make async proc discardable. Futures have to be " & + "checked with `asyncCheck` instead of discarded", prag) + + let prcName = prc.name.getName + + var returnType = prc.params[0] + var baseType: NimNode + if returnType.kind in nnkCallKinds and returnType[0].eqIdent("owned") and + returnType.len == 2: + returnType = returnType[1] + # Verify that the return type is a Future[T] + if returnType.kind == nnkBracketExpr: + let fut = repr(returnType[0]) + verifyReturnType(fut, returnType[0]) + baseType = returnType[1] + elif returnType.kind in nnkCallKinds and returnType[0].eqIdent("[]"): + let fut = repr(returnType[1]) + verifyReturnType(fut, returnType[0]) + baseType = returnType[2] + elif returnType.kind == nnkEmpty: + baseType = returnType + else: + verifyReturnType(repr(returnType), returnType) + + let futureVarIdents = getFutureVarIdents(prc.params) + var outerProcBody = newNimNode(nnkStmtList, prc.body) + + # Extract the documentation comment from the original procedure declaration. + # Note that we're not removing it from the body in order not to make this + # transformation even more complex. + let body2 = extractDocCommentsAndRunnables(prc.body) + + # -> var retFuture = newFuture[T]() + var retFutureSym = genSym(nskVar, "retFuture") + var subRetType = + if returnType.kind == nnkEmpty: newIdentNode("void") + else: baseType + outerProcBody.add( + newVarStmt(retFutureSym, + newCall( + newNimNode(nnkBracketExpr, prc.body).add( + newIdentNode("newFuture"), + subRetType), + newLit(prcName)))) # Get type from return type of this proc + + # -> iterator nameIter(): FutureBase {.closure.} = + # -> {.push warning[resultshadowed]: off.} + # -> var result: T + # -> {.pop.} + # -> <proc_body> + # -> complete(retFuture, result) + var iteratorNameSym = genSym(nskIterator, $prcName & " (Async)") + var needsCompletionSym = genSym(nskVar, "needsCompletion") + var ctx = Context() + var procBody = processBody(ctx, prc.body, needsCompletionSym, retFutureSym, futureVarIdents) + # don't do anything with forward bodies (empty) + if procBody.kind != nnkEmpty: + # fix #13899, defer should not escape its original scope + let blockStmt = newStmtList(newTree(nnkBlockStmt, newEmptyNode(), procBody)) + procBody = newStmtList() + let resultIdent = ident"result" + procBody.add quote do: + # Check whether there is an implicit return + when typeof(`blockStmt`) is void: + `blockStmt` + else: + `resultIdent` = `blockStmt` + procBody.add(createFutureVarCompletions(futureVarIdents, nil)) + procBody.insert(0): quote do: + {.push warning[resultshadowed]: off.} + when `subRetType` isnot void: + var `resultIdent`: `subRetType` + else: + var `resultIdent`: Future[void] + {.pop.} + + var `needsCompletionSym` = false + procBody.add quote do: + complete(`retFutureSym`, `resultIdent`) + + var closureIterator = newProc(iteratorNameSym, [quote do: owned(FutureBase)], + procBody, nnkIteratorDef) + closureIterator.pragma = newNimNode(nnkPragma, lineInfoFrom = prc.body) + closureIterator.addPragma(newIdentNode("closure")) + + # If proc has an explicit gcsafe pragma, we add it to iterator as well. + if prc.pragma.findChild(it.kind in {nnkSym, nnkIdent} and $it == "gcsafe") != nil: + closureIterator.addPragma(newIdentNode("gcsafe")) + outerProcBody.add(closureIterator) + + # -> createCb(retFuture) + # NOTE: The NimAsyncContinueSuffix is checked for in asyncfutures.nim to produce + # friendlier stack traces: + var cbName = genSym(nskProc, prcName & NimAsyncContinueSuffix) + var procCb = getAst createCb(retFutureSym, iteratorNameSym, + newStrLitNode(prcName), + cbName, + createFutureVarCompletions(futureVarIdents, nil) + ) + outerProcBody.add procCb + + # -> return retFuture + outerProcBody.add newNimNode(nnkReturnStmt, prc.body[^1]).add(retFutureSym) + + result = prc + # Add discardable pragma. + if returnType.kind == nnkEmpty: + # xxx consider removing `owned`? it's inconsistent with non-void case + result.params[0] = quote do: owned(Future[void]) + + # based on the yglukhov's patch to chronos: https://github.com/status-im/nim-chronos/pull/47 + if procBody.kind != nnkEmpty: + body2.add quote do: + `outerProcBody` + result.body = body2 + +macro async*(prc: untyped): untyped = + ## Macro which processes async procedures into the appropriate + ## iterators and yield statements. + if prc.kind == nnkStmtList: + result = newStmtList() + for oneProc in prc: + result.add asyncSingleProc(oneProc) + else: + result = asyncSingleProc(prc) + when defined(nimDumpAsync): + echo repr result + +proc splitParamType(paramType: NimNode, async: bool): NimNode = + result = paramType + if paramType.kind == nnkInfix and paramType[0].strVal in ["|", "or"]: + let firstAsync = "async" in paramType[1].toStrLit().strVal.normalize + let secondAsync = "async" in paramType[2].toStrLit().strVal.normalize + + if firstAsync: + result = paramType[if async: 1 else: 2] + elif secondAsync: + result = paramType[if async: 2 else: 1] + +proc stripReturnType(returnType: NimNode): NimNode = + # Strip out the 'Future' from 'Future[T]'. + result = returnType + if returnType.kind == nnkBracketExpr: + let fut = repr(returnType[0]) + verifyReturnType(fut, returnType) + result = returnType[1] + +proc splitProc(prc: NimNode): (NimNode, NimNode) = + ## Takes a procedure definition which takes a generic union of arguments, + ## for example: proc (socket: Socket | AsyncSocket). + ## It transforms them so that `proc (socket: Socket)` and + ## `proc (socket: AsyncSocket)` are returned. + + result[0] = prc.copyNimTree() + # Retrieve the `T` inside `Future[T]`. + let returnType = stripReturnType(result[0][3][0]) + result[0][3][0] = splitParamType(returnType, async = false) + for i in 1 ..< result[0][3].len: + # Sync proc (0) -> FormalParams (3) -> IdentDefs, the parameter (i) -> + # parameter type (1). + result[0][3][i][1] = splitParamType(result[0][3][i][1], async=false) + var multisyncAwait = quote: + template await(value: typed): untyped = + value + + result[0][^1] = nnkStmtList.newTree(multisyncAwait, result[0][^1]) + + result[1] = prc.copyNimTree() + if result[1][3][0].kind == nnkBracketExpr: + result[1][3][0][1] = splitParamType(result[1][3][0][1], async = true) + for i in 1 ..< result[1][3].len: + # Async proc (1) -> FormalParams (3) -> IdentDefs, the parameter (i) -> + # parameter type (1). + result[1][3][i][1] = splitParamType(result[1][3][i][1], async = true) + +macro multisync*(prc: untyped): untyped = + ## Macro which processes async procedures into both asynchronous and + ## synchronous procedures. + ## + ## The generated async procedures use the `async` macro, whereas the + ## generated synchronous procedures simply strip off the `await` calls. + let (sync, asyncPrc) = splitProc(prc) + result = newStmtList() + result.add(asyncSingleProc(asyncPrc)) + result.add(sync) diff --git a/lib/pure/asyncnet.nim b/lib/pure/asyncnet.nim index e7325e0d7..ee07e599e 100644 --- a/lib/pure/asyncnet.nim +++ b/lib/pure/asyncnet.nim @@ -1,47 +1,86 @@ # # # Nim's Runtime Library -# (c) Copyright 2015 Dominik Picheta +# (c) Copyright 2017 Dominik Picheta # # See the file "copying.txt", included in this # distribution, for details about the copyright. # ## This module implements a high-level asynchronous sockets API based on the -## asynchronous dispatcher defined in the ``asyncdispatch`` module. +## asynchronous dispatcher defined in the `asyncdispatch` module. +## +## Asynchronous IO in Nim +## ====================== +## +## Async IO in Nim consists of multiple layers (from highest to lowest): +## +## * `asyncnet` module +## +## * Async await +## +## * `asyncdispatch` module (event loop) +## +## * `selectors` module +## +## Each builds on top of the layers below it. The selectors module is an +## abstraction for the various system `select()` mechanisms such as epoll or +## kqueue. If you wish you can use it directly, and some people have done so +## `successfully <http://goran.krampe.se/2014/10/25/nim-socketserver/>`_. +## But you must be aware that on Windows it only supports +## `select()`. +## +## The async dispatcher implements the proactor pattern and also has an +## implementation of IOCP. It implements the proactor pattern for other +## OS' via the selectors module. Futures are also implemented here, and +## indeed all the procedures return a future. +## +## The final layer is the async await transformation. This allows you to +## write asynchronous code in a synchronous style and works similar to +## C#'s await. The transformation works by converting any async procedures +## into an iterator. +## +## This is all single threaded, fully non-blocking and does give you a +## lot of control. In theory you should be able to work with any of these +## layers interchangeably (as long as you only care about non-Windows +## platforms). +## +## For most applications using `asyncnet` is the way to go as it builds +## over all the layers, providing some extra features such as buffering. ## ## SSL -## --- +## === ## -## SSL can be enabled by compiling with the ``-d:ssl`` flag. +## SSL can be enabled by compiling with the `-d:ssl` flag. ## -## You must create a new SSL context with the ``newContext`` function defined -## in the ``net`` module. You may then call ``wrapSocket`` on your socket using +## You must create a new SSL context with the `newContext` function defined +## in the `net` module. You may then call `wrapSocket` on your socket using ## the newly created SSL context to get an SSL socket. ## ## Examples -## -------- +## ======== ## ## Chat server -## ^^^^^^^^^^^ -## -## The following example demonstrates a simple chat server. +## ----------- ## -## .. code-block::nim +## The following example demonstrates a simple chat server. ## -## import asyncnet, asyncdispatch +## ```Nim +## import std/[asyncnet, asyncdispatch] ## ## var clients {.threadvar.}: seq[AsyncSocket] ## ## proc processClient(client: AsyncSocket) {.async.} = ## while true: ## let line = await client.recvLine() +## if line.len == 0: break ## for c in clients: ## await c.send(line & "\c\L") ## ## proc serve() {.async.} = ## clients = @[] ## var server = newAsyncSocket() +## server.setSockOpt(OptReuseAddr, true) ## server.bindAddr(Port(12345)) ## server.listen() ## @@ -53,72 +92,125 @@ ## ## asyncCheck serve() ## runForever() -## +## ``` + +import std/private/since -import asyncdispatch -import rawsockets -import net -import os +when defined(nimPreviewSlimSystem): + import std/[assertions, syncio] + +import std/[asyncdispatch, nativesockets, net, os] export SOBool -when defined(ssl): - import openssl +# TODO: Remove duplication introduced by PR #4683. + +const defineSsl = defined(ssl) or defined(nimdoc) +const useNimNetLite = defined(nimNetLite) or defined(freertos) or defined(zephyr) or + defined(nuttx) + +when defineSsl: + import std/openssl type # TODO: I would prefer to just do: # AsyncSocket* {.borrow: `.`.} = distinct Socket. But that doesn't work. - AsyncSocketDesc = object + AsyncSocketDesc = object fd: SocketHandle - closed: bool ## determines whether this socket has been closed - case isBuffered: bool ## determines whether this socket is buffered. - of true: - buffer: array[0..BufferSize, char] - currPos: int # current index in buffer - bufLen: int # current length of buffer - of false: nil - case isSsl: bool - of true: - when defined(ssl): - sslHandle: SslPtr - sslContext: SslContext - bioIn: BIO - bioOut: BIO - of false: nil + closed: bool ## determines whether this socket has been closed + isBuffered: bool ## determines whether this socket is buffered. + buffer: array[0..BufferSize, char] + currPos: int # current index in buffer + bufLen: int # current length of buffer + isSsl: bool + when defineSsl: + sslHandle: SslPtr + sslContext: SslContext + bioIn: BIO + bioOut: BIO + sslNoShutdown: bool + domain: Domain + sockType: SockType + protocol: Protocol AsyncSocket* = ref AsyncSocketDesc -{.deprecated: [PAsyncSocket: AsyncSocket].} - -# TODO: Save AF, domain etc info and reuse it in procs which need it like connect. - -proc newAsyncSocket*(fd: TAsyncFD, isBuff: bool): AsyncSocket = - ## Creates a new ``AsyncSocket`` based on the supplied params. - assert fd != osInvalidSocket.TAsyncFD +proc newAsyncSocket*(fd: AsyncFD, domain: Domain = AF_INET, + sockType: SockType = SOCK_STREAM, + protocol: Protocol = IPPROTO_TCP, + buffered = true, + inheritable = defined(nimInheritHandles)): owned(AsyncSocket) = + ## Creates a new `AsyncSocket` based on the supplied params. + ## + ## The supplied `fd`'s non-blocking state will be enabled implicitly. + ## + ## If `inheritable` is false (the default), the supplied `fd` will not + ## be inheritable by child processes. + ## + ## **Note**: This procedure will **NOT** register `fd` with the global + ## async dispatcher. You need to do this manually. If you have used + ## `newAsyncNativeSocket` to create `fd` then it's already registered. + assert fd != osInvalidSocket.AsyncFD new(result) result.fd = fd.SocketHandle - result.isBuffered = isBuff - if isBuff: + fd.SocketHandle.setBlocking(false) + if not fd.SocketHandle.setInheritable(inheritable): + raiseOSError(osLastError()) + result.isBuffered = buffered + result.domain = domain + result.sockType = sockType + result.protocol = protocol + if buffered: result.currPos = 0 -proc newAsyncSocket*(domain: Domain = AF_INET, typ: SockType = SOCK_STREAM, - protocol: Protocol = IPPROTO_TCP, buffered = true): AsyncSocket = +proc newAsyncSocket*(domain: Domain = AF_INET, sockType: SockType = SOCK_STREAM, + protocol: Protocol = IPPROTO_TCP, buffered = true, + inheritable = defined(nimInheritHandles)): owned(AsyncSocket) = ## Creates a new asynchronous socket. ## ## This procedure will also create a brand new file descriptor for ## this socket. - result = newAsyncSocket(newAsyncRawSocket(domain, typ, protocol), buffered) + ## + ## If `inheritable` is false (the default), the new file descriptor will not + ## be inheritable by child processes. + let fd = createAsyncNativeSocket(domain, sockType, protocol, inheritable) + if fd.SocketHandle == osInvalidSocket: + raiseOSError(osLastError()) + result = newAsyncSocket(fd, domain, sockType, protocol, buffered, inheritable) + +proc getLocalAddr*(socket: AsyncSocket): (string, Port) = + ## Get the socket's local address and port number. + ## + ## This is high-level interface for `getsockname`:idx:. + getLocalAddr(socket.fd, socket.domain) -proc newAsyncSocket*(domain, typ, protocol: cint, buffered = true): AsyncSocket = +when not useNimNetLite: + proc getPeerAddr*(socket: AsyncSocket): (string, Port) = + ## Get the socket's peer address and port number. + ## + ## This is high-level interface for `getpeername`:idx:. + getPeerAddr(socket.fd, socket.domain) + +proc newAsyncSocket*(domain, sockType, protocol: cint, + buffered = true, + inheritable = defined(nimInheritHandles)): owned(AsyncSocket) = ## Creates a new asynchronous socket. ## ## This procedure will also create a brand new file descriptor for ## this socket. - result = newAsyncSocket(newAsyncRawSocket(domain, typ, protocol), buffered) - -when defined(ssl): - proc getSslError(handle: SslPtr, err: cint): cint = + ## + ## If `inheritable` is false (the default), the new file descriptor will not + ## be inheritable by child processes. + let fd = createAsyncNativeSocket(domain, sockType, protocol, inheritable) + if fd.SocketHandle == osInvalidSocket: + raiseOSError(osLastError()) + result = newAsyncSocket(fd, Domain(domain), SockType(sockType), + Protocol(protocol), buffered, inheritable) + +when defineSsl: + proc getSslError(socket: AsyncSocket, err: cint): cint = + assert socket.isSsl assert err < 0 - var ret = SSLGetError(handle, err.cint) + var ret = SSL_get_error(socket.sslHandle, err.cint) case ret of SSL_ERROR_ZERO_RETURN: raiseSSLError("TLS/SSL connection failed to initiate, socket closed prematurely.") @@ -129,6 +221,7 @@ when defined(ssl): of SSL_ERROR_WANT_X509_LOOKUP: raiseSSLError("Function for x509 lookup has been called.") of SSL_ERROR_SYSCALL, SSL_ERROR_SSL: + socket.sslNoShutdown = true raiseSSLError() else: raiseSSLError("Unknown Error") @@ -136,79 +229,170 @@ when defined(ssl): flags: set[SocketFlag]) {.async.} = let len = bioCtrlPending(socket.bioOut) if len > 0: - var data = newStringOfCap(len) - let read = bioRead(socket.bioOut, addr data[0], len) + var data = newString(len) + let read = bioRead(socket.bioOut, cast[cstring](addr data[0]), len) assert read != 0 if read < 0: - raiseSslError() + raiseSSLError() data.setLen(read) - await socket.fd.TAsyncFd.send(data, flags) + await socket.fd.AsyncFD.send(data, flags) proc appeaseSsl(socket: AsyncSocket, flags: set[SocketFlag], - sslError: cint) {.async.} = + sslError: cint): owned(Future[bool]) {.async.} = + ## Returns `true` if `socket` is still connected, otherwise `false`. + result = true case sslError of SSL_ERROR_WANT_WRITE: await sendPendingSslData(socket, flags) of SSL_ERROR_WANT_READ: - var data = await recv(socket.fd.TAsyncFD, BufferSize, flags) - let ret = bioWrite(socket.bioIn, addr data[0], data.len.cint) - if ret < 0: - raiseSSLError() + var data = await recv(socket.fd.AsyncFD, BufferSize, flags) + let length = len(data) + if length > 0: + let ret = bioWrite(socket.bioIn, cast[cstring](addr data[0]), length.cint) + if ret < 0: + raiseSSLError() + elif length == 0: + # connection not properly closed by remote side or connection dropped + SSL_set_shutdown(socket.sslHandle, SSL_RECEIVED_SHUTDOWN) + result = false else: raiseSSLError("Cannot appease SSL.") template sslLoop(socket: AsyncSocket, flags: set[SocketFlag], - op: expr) = + op: untyped) = var opResult {.inject.} = -1.cint while opResult < 0: + ErrClearError() + # Call the desired operation. opResult = op - # Bit hackish here. - # TODO: Introduce an async template transformation pragma? - yield sendPendingSslData(socket, flags) - if opResult < 0: - let err = getSslError(socket.sslHandle, opResult.cint) - yield appeaseSsl(socket, flags, err.cint) + let err = + if opResult < 0: + getSslError(socket, opResult.cint) + else: + SSL_ERROR_NONE + # Send any remaining pending SSL data. + await sendPendingSslData(socket, flags) -proc connect*(socket: AsyncSocket, address: string, port: Port, - af = AF_INET) {.async.} = - ## Connects ``socket`` to server at ``address:port``. + # If the operation failed, try to see if SSL has some data to read + # or write. + if opResult < 0: + let fut = appeaseSsl(socket, flags, err.cint) + yield fut + if not fut.read(): + # Socket disconnected. + if SocketFlag.SafeDisconn in flags: + opResult = 0.cint + break + else: + raiseSSLError("Socket has been disconnected") + +proc dial*(address: string, port: Port, protocol = IPPROTO_TCP, + buffered = true): owned(Future[AsyncSocket]) {.async.} = + ## Establishes connection to the specified `address`:`port` pair via the + ## specified protocol. The procedure iterates through possible + ## resolutions of the `address` until it succeeds, meaning that it + ## seamlessly works with both IPv4 and IPv6. + ## Returns AsyncSocket ready to send or receive data. + let asyncFd = await asyncdispatch.dial(address, port, protocol) + let sockType = protocol.toSockType() + let domain = getSockDomain(asyncFd.SocketHandle) + result = newAsyncSocket(asyncFd, domain, sockType, protocol, buffered) + +proc connect*(socket: AsyncSocket, address: string, port: Port) {.async.} = + ## Connects `socket` to server at `address:port`. ## - ## Returns a ``Future`` which will complete when the connection succeeds + ## Returns a `Future` which will complete when the connection succeeds ## or an error occurs. - await connect(socket.fd.TAsyncFD, address, port, af) + await connect(socket.fd.AsyncFD, address, port, socket.domain) if socket.isSsl: - when defined(ssl): + when defineSsl: + if not isIpAddress(address): + # Set the SNI address for this connection. This call can fail if + # we're not using TLSv1+. + discard SSL_set_tlsext_host_name(socket.sslHandle, address) + let flags = {SocketFlag.SafeDisconn} sslSetConnectState(socket.sslHandle) sslLoop(socket, flags, sslDoHandshake(socket.sslHandle)) -proc readInto(buf: cstring, size: int, socket: AsyncSocket, - flags: set[SocketFlag]): Future[int] {.async.} = +template readInto(buf: pointer, size: int, socket: AsyncSocket, + flags: set[SocketFlag]): int = + ## Reads **up to** `size` bytes from `socket` into `buf`. Note that + ## this is a template and not a proc. + assert(not socket.closed, "Cannot `recv` on a closed socket") + var res = 0 if socket.isSsl: - when defined(ssl): + when defineSsl: # SSL mode. sslLoop(socket, flags, - sslRead(socket.sslHandle, buf, size.cint)) - result = opResult + sslRead(socket.sslHandle, cast[cstring](buf), size.cint)) + res = opResult else: - var data = await recv(socket.fd.TAsyncFD, size, flags) - if data.len != 0: - copyMem(buf, addr data[0], data.len) # Not in SSL mode. - result = data.len + res = await asyncdispatch.recvInto(socket.fd.AsyncFD, buf, size, flags) + res -proc readIntoBuf(socket: AsyncSocket, - flags: set[SocketFlag]): Future[int] {.async.} = - result = await readInto(addr socket.buffer[0], BufferSize, socket, flags) +template readIntoBuf(socket: AsyncSocket, + flags: set[SocketFlag]): int = + var size = readInto(addr socket.buffer[0], BufferSize, socket, flags) socket.currPos = 0 - socket.bufLen = result + socket.bufLen = size + size + +proc recvInto*(socket: AsyncSocket, buf: pointer, size: int, + flags = {SocketFlag.SafeDisconn}): owned(Future[int]) {.async.} = + ## Reads **up to** `size` bytes from `socket` into `buf`. + ## + ## For buffered sockets this function will attempt to read all the requested + ## data. It will read this data in `BufferSize` chunks. + ## + ## For unbuffered sockets this function makes no effort to read + ## all the data requested. It will return as much data as the operating system + ## gives it. + ## + ## If socket is disconnected during the + ## recv operation then the future may complete with only a part of the + ## requested data. + ## + ## If socket is disconnected and no data is available + ## to be read then the future will complete with a value of `0`. + if socket.isBuffered: + let originalBufPos = socket.currPos + + if socket.bufLen == 0: + let res = socket.readIntoBuf(flags - {SocketFlag.Peek}) + if res == 0: + return 0 + + var read = 0 + var cbuf = cast[cstring](buf) + while read < size: + if socket.currPos >= socket.bufLen: + if SocketFlag.Peek in flags: + # We don't want to get another buffer if we're peeking. + break + let res = socket.readIntoBuf(flags - {SocketFlag.Peek}) + if res == 0: + break + + let chunk = min(socket.bufLen-socket.currPos, size-read) + copyMem(addr(cbuf[read]), addr(socket.buffer[socket.currPos]), chunk) + read.inc(chunk) + socket.currPos.inc(chunk) + + if SocketFlag.Peek in flags: + # Restore old buffer cursor position. + socket.currPos = originalBufPos + result = read + else: + result = readInto(buf, size, socket, flags) proc recv*(socket: AsyncSocket, size: int, - flags = {SocketFlag.SafeDisconn}): Future[string] {.async.} = - ## Reads **up to** ``size`` bytes from ``socket``. + flags = {SocketFlag.SafeDisconn}): owned(Future[string]) {.async.} = + ## Reads **up to** `size` bytes from `socket`. ## ## For buffered sockets this function will attempt to read all the requested - ## data. It will read this data in ``BufferSize`` chunks. + ## data. It will read this data in `BufferSize` chunks. ## ## For unbuffered sockets this function makes no effort to read ## all the data requested. It will return as much data as the operating system @@ -219,13 +403,15 @@ proc recv*(socket: AsyncSocket, size: int, ## requested data. ## ## If socket is disconnected and no data is available - ## to be read then the future will complete with a value of ``""``. + ## to be read then the future will complete with a value of `""`. if socket.isBuffered: result = newString(size) + when not defined(nimSeqsV2): + shallow(result) let originalBufPos = socket.currPos if socket.bufLen == 0: - let res = await socket.readIntoBuf(flags - {SocketFlag.Peek}) + let res = socket.readIntoBuf(flags - {SocketFlag.Peek}) if res == 0: result.setLen(0) return @@ -236,7 +422,7 @@ proc recv*(socket: AsyncSocket, size: int, if SocketFlag.Peek in flags: # We don't want to get another buffer if we're peeking. break - let res = await socket.readIntoBuf(flags - {SocketFlag.Peek}) + let res = socket.readIntoBuf(flags - {SocketFlag.Peek}) if res == 0: break @@ -251,45 +437,67 @@ proc recv*(socket: AsyncSocket, size: int, result.setLen(read) else: result = newString(size) - let read = await readInto(addr result[0], size, socket, flags) + let read = readInto(addr result[0], size, socket, flags) result.setLen(read) +proc send*(socket: AsyncSocket, buf: pointer, size: int, + flags = {SocketFlag.SafeDisconn}) {.async.} = + ## Sends `size` bytes from `buf` to `socket`. The returned future will complete once all + ## data has been sent. + assert socket != nil + assert(not socket.closed, "Cannot `send` on a closed socket") + if socket.isSsl: + when defineSsl: + sslLoop(socket, flags, + sslWrite(socket.sslHandle, cast[cstring](buf), size.cint)) + await sendPendingSslData(socket, flags) + else: + await send(socket.fd.AsyncFD, buf, size, flags) + proc send*(socket: AsyncSocket, data: string, flags = {SocketFlag.SafeDisconn}) {.async.} = - ## Sends ``data`` to ``socket``. The returned future will complete once all + ## Sends `data` to `socket`. The returned future will complete once all ## data has been sent. assert socket != nil if socket.isSsl: - when defined(ssl): + when defineSsl: var copy = data sslLoop(socket, flags, - sslWrite(socket.sslHandle, addr copy[0], copy.len.cint)) + sslWrite(socket.sslHandle, cast[cstring](addr copy[0]), copy.len.cint)) await sendPendingSslData(socket, flags) else: - await send(socket.fd.TAsyncFD, data, flags) + await send(socket.fd.AsyncFD, data, flags) -proc acceptAddr*(socket: AsyncSocket, flags = {SocketFlag.SafeDisconn}): - Future[tuple[address: string, client: AsyncSocket]] = +proc acceptAddr*(socket: AsyncSocket, flags = {SocketFlag.SafeDisconn}, + inheritable = defined(nimInheritHandles)): + owned(Future[tuple[address: string, client: AsyncSocket]]) = ## Accepts a new connection. Returns a future containing the client socket ## corresponding to that connection and the remote address of the client. + ## + ## If `inheritable` is false (the default), the resulting client socket will + ## not be inheritable by child processes. + ## ## The future will complete when the connection is successfully accepted. var retFuture = newFuture[tuple[address: string, client: AsyncSocket]]("asyncnet.acceptAddr") - var fut = acceptAddr(socket.fd.TAsyncFD, flags) + var fut = acceptAddr(socket.fd.AsyncFD, flags, inheritable) fut.callback = - proc (future: Future[tuple[address: string, client: TAsyncFD]]) = + proc (future: Future[tuple[address: string, client: AsyncFD]]) = assert future.finished if future.failed: retFuture.fail(future.readError) else: let resultTup = (future.read.address, - newAsyncSocket(future.read.client, socket.isBuffered)) + newAsyncSocket(future.read.client, socket.domain, + socket.sockType, socket.protocol, socket.isBuffered, inheritable)) retFuture.complete(resultTup) return retFuture proc accept*(socket: AsyncSocket, - flags = {SocketFlag.SafeDisconn}): Future[AsyncSocket] = + flags = {SocketFlag.SafeDisconn}): owned(Future[AsyncSocket]) = ## Accepts a new connection. Returns a future containing the client socket ## corresponding to that connection. + ## If `inheritable` is false (the default), the resulting client socket will + ## not be inheritable by child processes. ## The future will complete when the connection is successfully accepted. var retFut = newFuture[AsyncSocket]("asyncnet.accept") var fut = acceptAddr(socket, flags) @@ -302,43 +510,52 @@ proc accept*(socket: AsyncSocket, retFut.complete(future.read.client) return retFut -proc recvLine*(socket: AsyncSocket, - flags = {SocketFlag.SafeDisconn}): Future[string] {.async.} = - ## Reads a line of data from ``socket``. Returned future will complete once - ## a full line is read or an error occurs. +proc recvLineInto*(socket: AsyncSocket, resString: FutureVar[string], + flags = {SocketFlag.SafeDisconn}, maxLength = MaxLineLength) {.async.} = + ## Reads a line of data from `socket` into `resString`. ## - ## If a full line is read ``\r\L`` is not - ## added to ``line``, however if solely ``\r\L`` is read then ``line`` + ## If a full line is read `\r\L` is not + ## added to `line`, however if solely `\r\L` is read then `line` ## will be set to it. - ## - ## If the socket is disconnected, ``line`` will be set to ``""``. ## - ## If the socket is disconnected in the middle of a line (before ``\r\L`` - ## is read) then line will be set to ``""``. + ## If the socket is disconnected, `line` will be set to `""`. + ## + ## If the socket is disconnected in the middle of a line (before `\r\L` + ## is read) then line will be set to `""`. ## The partial line **will be lost**. ## - ## **Warning**: The ``Peek`` flag is not yet implemented. - ## - ## **Warning**: ``recvLine`` on unbuffered sockets assumes that the protocol - ## uses ``\r\L`` to delimit a new line. - template addNLIfEmpty(): stmt = - if result.len == 0: - result.add("\c\L") + ## The `maxLength` parameter determines the maximum amount of characters + ## that can be read. `resString` will be truncated after that. + ## + ## .. warning:: The `Peek` flag is not yet implemented. + ## + ## .. warning:: `recvLineInto` on unbuffered sockets assumes that the protocol uses `\r\L` to delimit a new line. assert SocketFlag.Peek notin flags ## TODO: + result = newFuture[void]("asyncnet.recvLineInto") + + # TODO: Make the async transformation check for FutureVar params and complete + # them when the result future is completed. + # Can we replace the result future with the FutureVar? + + template addNLIfEmpty(): untyped = + if resString.mget.len == 0: + resString.mget.add("\c\L") + if socket.isBuffered: - result = "" if socket.bufLen == 0: - let res = await socket.readIntoBuf(flags) + let res = socket.readIntoBuf(flags) if res == 0: + resString.complete() return var lastR = false while true: if socket.currPos >= socket.bufLen: - let res = await socket.readIntoBuf(flags) + let res = socket.readIntoBuf(flags) if res == 0: - result = "" - break + resString.mget.setLen(0) + resString.complete() + return case socket.buffer[socket.currPos] of '\r': @@ -347,107 +564,261 @@ proc recvLine*(socket: AsyncSocket, of '\L': addNLIfEmpty() socket.currPos.inc() + resString.complete() return else: if lastR: socket.currPos.inc() + resString.complete() return else: - result.add socket.buffer[socket.currPos] + resString.mget.add socket.buffer[socket.currPos] socket.currPos.inc() + + # Verify that this isn't a DOS attack: #3847. + if resString.mget.len > maxLength: break else: - result = "" var c = "" while true: c = await recv(socket, 1, flags) if c.len == 0: - return "" + resString.mget.setLen(0) + resString.complete() + return if c == "\r": c = await recv(socket, 1, flags) # Skip \L assert c == "\L" addNLIfEmpty() + resString.complete() return elif c == "\L": addNLIfEmpty() + resString.complete() return - add(result.string, c) + resString.mget.add c -proc listen*(socket: AsyncSocket, backlog = SOMAXCONN) {.tags: [ReadIOEffect].} = - ## Marks ``socket`` as accepting connections. - ## ``Backlog`` specifies the maximum length of the + # Verify that this isn't a DOS attack: #3847. + if resString.mget.len > maxLength: break + resString.complete() + +proc recvLine*(socket: AsyncSocket, + flags = {SocketFlag.SafeDisconn}, + maxLength = MaxLineLength): owned(Future[string]) {.async.} = + ## Reads a line of data from `socket`. Returned future will complete once + ## a full line is read or an error occurs. + ## + ## If a full line is read `\r\L` is not + ## added to `line`, however if solely `\r\L` is read then `line` + ## will be set to it. + ## + ## If the socket is disconnected, `line` will be set to `""`. + ## + ## If the socket is disconnected in the middle of a line (before `\r\L` + ## is read) then line will be set to `""`. + ## The partial line **will be lost**. + ## + ## The `maxLength` parameter determines the maximum amount of characters + ## that can be read. The result is truncated after that. + ## + ## .. warning:: The `Peek` flag is not yet implemented. + ## + ## .. warning:: `recvLine` on unbuffered sockets assumes that the protocol uses `\r\L` to delimit a new line. + assert SocketFlag.Peek notin flags ## TODO: + + # TODO: Optimise this + var resString = newFutureVar[string]("asyncnet.recvLine") + resString.mget() = "" + await socket.recvLineInto(resString, flags, maxLength) + result = resString.mget() + +proc listen*(socket: AsyncSocket, backlog = SOMAXCONN) {.tags: [ + ReadIOEffect].} = + ## Marks `socket` as accepting connections. + ## `Backlog` specifies the maximum length of the ## queue of pending connections. ## - ## Raises an EOS error upon failure. + ## Raises an OSError error upon failure. if listen(socket.fd, backlog) < 0'i32: raiseOSError(osLastError()) proc bindAddr*(socket: AsyncSocket, port = Port(0), address = "") {. tags: [ReadIOEffect].} = - ## Binds ``address``:``port`` to the socket. + ## Binds `address`:`port` to the socket. ## - ## If ``address`` is "" then ADDR_ANY will be bound. - - if address == "": - var name: Sockaddr_in - when defined(Windows) or defined(nimdoc): - name.sin_family = toInt(AF_INET).int16 + ## If `address` is "" then ADDR_ANY will be bound. + var realaddr = address + if realaddr == "": + case socket.domain + of AF_INET6: realaddr = "::" + of AF_INET: realaddr = "0.0.0.0" else: - name.sin_family = toInt(AF_INET) - name.sin_port = htons(int16(port)) - name.sin_addr.s_addr = htonl(INADDR_ANY) - if bindAddr(socket.fd, cast[ptr SockAddr](addr(name)), - sizeof(name).Socklen) < 0'i32: - raiseOSError(osLastError()) - else: - var aiList = getAddrInfo(address, port, AF_INET) - if bindAddr(socket.fd, aiList.ai_addr, aiList.ai_addrlen.Socklen) < 0'i32: - dealloc(aiList) - raiseOSError(osLastError()) - dealloc(aiList) + raise newException(ValueError, + "Unknown socket address family and no address specified to bindAddr") + + var aiList = getAddrInfo(realaddr, port, socket.domain) + if bindAddr(socket.fd, aiList.ai_addr, aiList.ai_addrlen.SockLen) < 0'i32: + freeAddrInfo(aiList) + raiseOSError(osLastError()) + freeAddrInfo(aiList) + +proc hasDataBuffered*(s: AsyncSocket): bool {.since: (1, 5).} = + ## Determines whether an AsyncSocket has data buffered. + # xxx dedup with std/net + s.isBuffered and s.bufLen > 0 and s.currPos != s.bufLen + +when defined(posix) and not useNimNetLite: + + proc connectUnix*(socket: AsyncSocket, path: string): owned(Future[void]) = + ## Binds Unix socket to `path`. + ## This only works on Unix-style systems: Mac OS X, BSD and Linux + when not defined(nimdoc): + let retFuture = newFuture[void]("connectUnix") + result = retFuture + + proc cb(fd: AsyncFD): bool = + let ret = SocketHandle(fd).getSockOptInt(cint(SOL_SOCKET), cint(SO_ERROR)) + if ret == 0: + retFuture.complete() + return true + elif ret == EINTR: + return false + else: + retFuture.fail(newOSError(OSErrorCode(ret))) + return true + + var socketAddr = makeUnixAddr(path) + let ret = socket.fd.connect(cast[ptr SockAddr](addr socketAddr), + (offsetOf(socketAddr, sun_path) + path.len + 1).SockLen) + if ret == 0: + # Request to connect completed immediately. + retFuture.complete() + else: + let lastError = osLastError() + if lastError.int32 == EINTR or lastError.int32 == EINPROGRESS: + addWrite(AsyncFD(socket.fd), cb) + else: + retFuture.fail(newOSError(lastError)) + + proc bindUnix*(socket: AsyncSocket, path: string) {. + tags: [ReadIOEffect].} = + ## Binds Unix socket to `path`. + ## This only works on Unix-style systems: Mac OS X, BSD and Linux + when not defined(nimdoc): + var socketAddr = makeUnixAddr(path) + if socket.fd.bindAddr(cast[ptr SockAddr](addr socketAddr), + (offsetOf(socketAddr, sun_path) + path.len + 1).SockLen) != 0'i32: + raiseOSError(osLastError()) + +elif defined(nimdoc): + + proc connectUnix*(socket: AsyncSocket, path: string): owned(Future[void]) = + ## Binds Unix socket to `path`. + ## This only works on Unix-style systems: Mac OS X, BSD and Linux + discard + + proc bindUnix*(socket: AsyncSocket, path: string) = + ## Binds Unix socket to `path`. + ## This only works on Unix-style systems: Mac OS X, BSD and Linux + discard proc close*(socket: AsyncSocket) = ## Closes the socket. + if socket.closed: return + defer: - socket.fd.TAsyncFD.closeSocket() - when defined(ssl): - if socket.isSSL: - let res = SslShutdown(socket.sslHandle) + socket.fd.AsyncFD.closeSocket() + socket.closed = true # TODO: Add extra debugging checks for this. + + when defineSsl: + if socket.isSsl: + let res = + # Don't call SSL_shutdown if the connection has not been fully + # established, see: + # https://github.com/openssl/openssl/issues/710#issuecomment-253897666 + if not socket.sslNoShutdown and SSL_in_init(socket.sslHandle) == 0: + ErrClearError() + SSL_shutdown(socket.sslHandle) + else: + 0 + SSL_free(socket.sslHandle) if res == 0: discard elif res != 1: - raiseSslError() - socket.closed = true # TODO: Add extra debugging checks for this. + raiseSSLError() -when defined(ssl): +when defineSsl: + proc sslHandle*(self: AsyncSocket): SslPtr = + ## Retrieve the ssl pointer of `socket`. + ## Useful for interfacing with `openssl`. + self.sslHandle + proc wrapSocket*(ctx: SslContext, socket: AsyncSocket) = ## Wraps a socket in an SSL context. This function effectively turns - ## ``socket`` into an SSL socket. + ## `socket` into an SSL socket. ## ## **Disclaimer**: This code is not well tested, may be very unsafe and ## prone to security vulnerabilities. socket.isSsl = true socket.sslContext = ctx - socket.sslHandle = SSLNew(SSLCTX(socket.sslContext)) + socket.sslHandle = SSL_new(socket.sslContext.context) if socket.sslHandle == nil: - raiseSslError() + raiseSSLError() - socket.bioIn = bioNew(bio_s_mem()) - socket.bioOut = bioNew(bio_s_mem()) + socket.bioIn = bioNew(bioSMem()) + socket.bioOut = bioNew(bioSMem()) sslSetBio(socket.sslHandle, socket.bioIn, socket.bioOut) + socket.sslNoShutdown = true + + proc wrapConnectedSocket*(ctx: SslContext, socket: AsyncSocket, + handshake: SslHandshakeType, + hostname: string = "") = + ## Wraps a connected socket in an SSL context. This function effectively + ## turns `socket` into an SSL socket. + ## `hostname` should be specified so that the client knows which hostname + ## the server certificate should be validated against. + ## + ## This should be called on a connected socket, and will perform + ## an SSL handshake immediately. + ## + ## **Disclaimer**: This code is not well tested, may be very unsafe and + ## prone to security vulnerabilities. + wrapSocket(ctx, socket) + + case handshake + of handshakeAsClient: + if hostname.len > 0 and not isIpAddress(hostname): + # Set the SNI address for this connection. This call can fail if + # we're not using TLSv1+. + discard SSL_set_tlsext_host_name(socket.sslHandle, hostname) + sslSetConnectState(socket.sslHandle) + of handshakeAsServer: + sslSetAcceptState(socket.sslHandle) + + proc getPeerCertificates*(socket: AsyncSocket): seq[Certificate] {.since: (1, 1).} = + ## Returns the certificate chain received by the peer we are connected to + ## through the given socket. + ## The handshake must have been completed and the certificate chain must + ## have been verified successfully or else an empty sequence is returned. + ## The chain is ordered from leaf certificate to root certificate. + if not socket.isSsl: + result = newSeq[Certificate]() + else: + result = getPeerCertificates(socket.sslHandle) + proc getSockOpt*(socket: AsyncSocket, opt: SOBool, level = SOL_SOCKET): bool {. tags: [ReadIOEffect].} = - ## Retrieves option ``opt`` as a boolean value. + ## Retrieves option `opt` as a boolean value. var res = getSockOptInt(socket.fd, cint(level), toCInt(opt)) result = res != 0 proc setSockOpt*(socket: AsyncSocket, opt: SOBool, value: bool, level = SOL_SOCKET) {.tags: [WriteIOEffect].} = - ## Sets option ``opt`` to a boolean value specified by ``value``. + ## Sets option `opt` to a boolean value specified by `value`. var valuei = cint(if value: 1 else: 0) setSockOptInt(socket.fd, cint(level), toCInt(opt), valuei) proc isSsl*(socket: AsyncSocket): bool = - ## Determines whether ``socket`` is a SSL socket. + ## Determines whether `socket` is a SSL socket. socket.isSsl proc getFd*(socket: AsyncSocket): SocketHandle = @@ -458,7 +829,131 @@ proc isClosed*(socket: AsyncSocket): bool = ## Determines whether the socket has been closed. return socket.closed -when isMainModule: +proc sendTo*(socket: AsyncSocket, address: string, port: Port, data: string, + flags = {SocketFlag.SafeDisconn}): owned(Future[void]) + {.async, since: (1, 3).} = + ## This proc sends `data` to the specified `address`, which may be an IP + ## address or a hostname. If a hostname is specified this function will try + ## each IP of that hostname. The returned future will complete once all data + ## has been sent. + ## + ## If an error occurs an OSError exception will be raised. + ## + ## This proc is normally used with connectionless sockets (UDP sockets). + assert(socket.protocol != IPPROTO_TCP, + "Cannot `sendTo` on a TCP socket. Use `send` instead") + assert(not socket.closed, "Cannot `sendTo` on a closed socket") + + let aiList = getAddrInfo(address, port, socket.domain, socket.sockType, + socket.protocol) + + var + it = aiList + success = false + lastException: ref Exception + + while it != nil: + let fut = sendTo(socket.fd.AsyncFD, cstring(data), len(data), it.ai_addr, + it.ai_addrlen.SockLen, flags) + + yield fut + + if not fut.failed: + success = true + + break + + lastException = fut.readError() + + it = it.ai_next + + freeAddrInfo(aiList) + + if not success: + if lastException != nil: + raise lastException + else: + raise newException(IOError, "Couldn't resolve address: " & address) + +proc recvFrom*(socket: AsyncSocket, data: FutureVar[string], size: int, + address: FutureVar[string], port: FutureVar[Port], + flags = {SocketFlag.SafeDisconn}): owned(Future[int]) + {.async, since: (1, 3).} = + ## Receives a datagram data from `socket` into `data`, which must be at + ## least of size `size`. The address and port of datagram's sender will be + ## stored into `address` and `port`, respectively. Returned future will + ## complete once one datagram has been received, and will return size of + ## packet received. + ## + ## If an error occurs an OSError exception will be raised. + ## + ## This proc is normally used with connectionless sockets (UDP sockets). + ## + ## **Notes** + ## * `data` must be initialized to the length of `size`. + ## * `address` must be initialized to 46 in length. + template adaptRecvFromToDomain(domain: Domain) = + var lAddr = sizeof(sAddr).SockLen + + result = await recvFromInto(AsyncFD(getFd(socket)), cstring(data.mget()), size, + cast[ptr SockAddr](addr sAddr), addr lAddr, + flags) + + data.mget().setLen(result) + data.complete() + + getAddrString(cast[ptr SockAddr](addr sAddr), address.mget()) + + address.complete() + + when domain == AF_INET6: + port.complete(ntohs(sAddr.sin6_port).Port) + else: + port.complete(ntohs(sAddr.sin_port).Port) + + assert(socket.protocol != IPPROTO_TCP, + "Cannot `recvFrom` on a TCP socket. Use `recv` or `recvInto` instead") + assert(not socket.closed, "Cannot `recvFrom` on a closed socket") + assert(size == len(data.mget()), + "`date` was not initialized correctly. `size` != `len(data.mget())`") + assert(46 == len(address.mget()), + "`address` was not initialized correctly. 46 != `len(address.mget())`") + + case socket.domain + of AF_INET6: + var sAddr: Sockaddr_in6 + adaptRecvFromToDomain(AF_INET6) + of AF_INET: + var sAddr: Sockaddr_in + adaptRecvFromToDomain(AF_INET) + else: + raise newException(ValueError, "Unknown socket address family") + +proc recvFrom*(socket: AsyncSocket, size: int, + flags = {SocketFlag.SafeDisconn}): + owned(Future[tuple[data: string, address: string, port: Port]]) + {.async, since: (1, 3).} = + ## Receives a datagram data from `socket`, which must be at least of size + ## `size`. Returned future will complete once one datagram has been received + ## and will return tuple with: data of packet received; and address and port + ## of datagram's sender. + ## + ## If an error occurs an OSError exception will be raised. + ## + ## This proc is normally used with connectionless sockets (UDP sockets). + var + data = newFutureVar[string]() + address = newFutureVar[string]() + port = newFutureVar[Port]() + + data.mget().setLen(size) + address.mget().setLen(46) + + let read = await recvFrom(socket, data, size, address, port, flags) + + result = (data.mget(), address.mget(), port.mget()) + +when not defined(testing) and isMainModule: type TestCases = enum HighClient, LowClient, LowServer @@ -500,11 +995,10 @@ when isMainModule: proc (future: Future[void]) = echo("Send") client.close() - + var f = accept(sock) f.callback = onAccept - + var f = accept(sock) f.callback = onAccept runForever() - diff --git a/lib/pure/asyncstreams.nim b/lib/pure/asyncstreams.nim new file mode 100644 index 000000000..c97b98d55 --- /dev/null +++ b/lib/pure/asyncstreams.nim @@ -0,0 +1,147 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2015 Dominik Picheta +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Unstable API. + +import std/asyncfutures + +when defined(nimPreviewSlimSystem): + import std/assertions + +import std/deques + +type + FutureStream*[T] = ref object ## Special future that acts as + ## a queue. Its API is still + ## experimental and so is + ## subject to change. + queue: Deque[T] + finished: bool + cb: proc () {.closure, gcsafe.} + error*: ref Exception + +proc newFutureStream*[T](fromProc = "unspecified"): FutureStream[T] = + ## Create a new `FutureStream`. This future's callback is activated when + ## two events occur: + ## + ## * New data is written into the future stream. + ## * The future stream is completed (this means that no more data will be + ## written). + ## + ## Specifying `fromProc`, which is a string specifying the name of the proc + ## that this future belongs to, is a good habit as it helps with debugging. + ## + ## **Note:** The API of FutureStream is still new and so has a higher + ## likelihood of changing in the future. + result = FutureStream[T](finished: false, cb: nil) + result.queue = initDeque[T]() + +proc complete*[T](future: FutureStream[T]) = + ## Completes a `FutureStream` signalling the end of data. + assert(future.error == nil, "Trying to complete failed stream") + future.finished = true + if not future.cb.isNil: + future.cb() + +proc fail*[T](future: FutureStream[T], error: ref Exception) = + ## Completes `future` with `error`. + assert(not future.finished) + future.finished = true + future.error = error + if not future.cb.isNil: + future.cb() + +proc `callback=`*[T](future: FutureStream[T], + cb: proc (future: FutureStream[T]) {.closure, gcsafe.}) = + ## Sets the callback proc to be called when data was placed inside the + ## future stream. + ## + ## The callback is also called when the future is completed. So you should + ## use `finished` to check whether data is available. + ## + ## If the future stream already has data or is finished then `cb` will be + ## called immediately. + proc named() = cb(future) + future.cb = named + if future.queue.len > 0 or future.finished: + callSoon(future.cb) + +proc finished*[T](future: FutureStream[T]): bool = + ## Check if a `FutureStream` is finished. `true` value means that + ## no more data will be placed inside the stream *and* that there is + ## no data waiting to be retrieved. + result = future.finished and future.queue.len == 0 + +proc failed*[T](future: FutureStream[T]): bool = + ## Determines whether `future` completed with an error. + return future.error != nil + +proc write*[T](future: FutureStream[T], value: T): Future[void] = + ## Writes the specified value inside the specified future stream. + ## + ## This will raise `ValueError` if `future` is finished. + result = newFuture[void]("FutureStream.put") + if future.finished: + let msg = "FutureStream is finished and so no longer accepts new data." + result.fail(newException(ValueError, msg)) + return + # TODO: Implement limiting of the streams storage to prevent it growing + # infinitely when no reads are occurring. + future.queue.addLast(value) + if not future.cb.isNil: future.cb() + result.complete() + +proc read*[T](future: FutureStream[T]): owned(Future[(bool, T)]) = + ## Returns a future that will complete when the `FutureStream` has data + ## placed into it. The future will be completed with the oldest + ## value stored inside the stream. The return value will also determine + ## whether data was retrieved, `false` means that the future stream was + ## completed and no data was retrieved. + ## + ## This function will remove the data that was returned from the underlying + ## `FutureStream`. + var resFut = newFuture[(bool, T)]("FutureStream.take") + let savedCb = future.cb + proc newCb(fs: FutureStream[T]) = + # Exit early if `resFut` is already complete. (See #8994). + if resFut.finished: return + + # We don't want this callback called again. + #future.cb = nil + + # The return value depends on whether the FutureStream has finished. + var res: (bool, T) + if finished(fs): + # Remember, this callback is called when the FutureStream is completed. + res[0] = false + else: + res[0] = true + res[1] = fs.queue.popFirst() + + if fs.failed: + resFut.fail(fs.error) + else: + resFut.complete(res) + + # If the saved callback isn't nil then let's call it. + if not savedCb.isNil: + if fs.queue.len > 0: + savedCb() + else: + future.cb = savedCb + + if future.queue.len > 0 or future.finished: + newCb(future) + else: + future.callback = newCb + return resFut + +proc len*[T](future: FutureStream[T]): int = + ## Returns the amount of data pieces inside the stream. + future.queue.len diff --git a/lib/pure/base64.nim b/lib/pure/base64.nim index 41d19dc0f..591d22cc0 100644 --- a/lib/pure/base64.nim +++ b/lib/pure/base64.nim @@ -8,121 +8,266 @@ # ## This module implements a base64 encoder and decoder. +## +## Unstable API. +## +## Base64 is an encoding and decoding technique used to convert binary +## data to an ASCII string format. +## Each Base64 digit represents exactly 6 bits of data. Three 8-bit +## bytes (i.e., a total of 24 bits) can therefore be represented by +## four 6-bit Base64 digits. -const - cb64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" - -template encodeInternal(s: expr, lineLen: int, newLine: string): stmt {.immediate.} = - ## encodes `s` into base64 representation. After `lineLen` characters, a - ## `newline` is added. - var total = ((len(s) + 2) div 3) * 4 - var numLines = (total + lineLen - 1) div lineLen - if numLines > 0: inc(total, (numLines-1) * newLine.len) - - result = newString(total) - var i = 0 - var r = 0 - var currLine = 0 - while i < s.len - 2: - var a = ord(s[i]) - var b = ord(s[i+1]) - var c = ord(s[i+2]) - result[r] = cb64[a shr 2] - result[r+1] = cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)] - result[r+2] = cb64[((b and 0x0F) shl 2) or ((c and 0xC0) shr 6)] - result[r+3] = cb64[c and 0x3F] - inc(r, 4) - inc(i, 3) - inc(currLine, 4) - if currLine >= lineLen and i != s.len-2: - for x in items(newLine): - result[r] = x - inc(r) - currLine = 0 - - if i < s.len-1: - var a = ord(s[i]) - var b = ord(s[i+1]) - result[r] = cb64[a shr 2] - result[r+1] = cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)] - result[r+2] = cb64[((b and 0x0F) shl 2)] - result[r+3] = '=' - if r+4 != result.len: - setLen(result, r+4) - elif i < s.len: - var a = ord(s[i]) - result[r] = cb64[a shr 2] - result[r+1] = cb64[(a and 3) shl 4] - result[r+2] = '=' - result[r+3] = '=' - if r+4 != result.len: - setLen(result, r+4) +##[ +# Basic usage +## Encoding data +]## + +runnableExamples: + let encoded = encode("Hello World") + assert encoded == "SGVsbG8gV29ybGQ=" + +## +## Apart from strings you can also encode lists of integers or characters: +## + +runnableExamples: + let encodedInts = encode([1'u8,2,3]) + assert encodedInts == "AQID" + let encodedChars = encode(['h','e','y']) + assert encodedChars == "aGV5" + +##[ +## Decoding data +]## + +runnableExamples: + let decoded = decode("SGVsbG8gV29ybGQ=") + assert decoded == "Hello World" + +##[ +## URL Safe Base64 +]## + +runnableExamples: + assert encode("c\xf7>", safe = true) == "Y_c-" + assert encode("c\xf7>", safe = false) == "Y/c+" + +## See also +## ======== +## +## * `hashes module<hashes.html>`_ for efficient computations of hash values for diverse Nim types +## * `md5 module<md5.html>`_ for the MD5 checksum algorithm +## * `sha1 module<sha1.html>`_ for the SHA-1 checksum algorithm + +template cbBase(a, b): untyped = [ + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', a, b] + +const + cb64 = cbBase('+', '/') + cb64safe = cbBase('-', '_') + +const + invalidChar = 255 + +template encodeSize(size: int): int = (size * 4 div 3) + 6 + +template encodeInternal(s, alphabet: typed): untyped = + ## encodes `s` into base64 representation. + + result.setLen(encodeSize(s.len)) + + let + padding = s.len mod 3 + inputEnds = s.len - padding + + var + inputIndex = 0 + outputIndex = 0 + n: uint32 + b: uint32 + + template inputByte(exp: untyped) = + b = uint32(s[inputIndex]) + n = exp + inc inputIndex + + template outputChar(x: typed) = + result[outputIndex] = alphabet[x and 63] + inc outputIndex + + template outputChar(c: char) = + result[outputIndex] = c + inc outputIndex + + while inputIndex != inputEnds: + inputByte(b shl 16) + inputByte(n or b shl 8) + inputByte(n or b shl 0) + outputChar(n shr 18) + outputChar(n shr 12) + outputChar(n shr 6) + outputChar(n shr 0) + + if padding == 1: + inputByte(b shl 16) + outputChar(n shr 18) + outputChar(n shr 12) + outputChar('=') + outputChar('=') + + elif padding == 2: + inputByte(b shl 16) + inputByte(n or b shl 8) + outputChar(n shr 18) + outputChar(n shr 12) + outputChar(n shr 6) + outputChar('=') + + result.setLen(outputIndex) + +template encodeImpl() {.dirty.} = + if safe: + encodeInternal(s, cb64safe) else: - #assert(r == result.len) - discard - -proc encode*[T:SomeInteger|char](s: openarray[T], lineLen = 75, newLine="\13\10"): string = - ## encodes `s` into base64 representation. After `lineLen` characters, a - ## `newline` is added. - encodeInternal(s, lineLen, newLine) - -proc encode*(s: string, lineLen = 75, newLine="\13\10"): string = - ## encodes `s` into base64 representation. After `lineLen` characters, a - ## `newline` is added. - encodeInternal(s, lineLen, newLine) - -proc decodeByte(b: char): int {.inline.} = - case b - of '+': result = ord('>') - of '0'..'9': result = ord(b) + 4 - of 'A'..'Z': result = ord(b) - ord('A') - of 'a'..'z': result = ord(b) - 71 - else: result = 63 - -proc decode*(s: string): string = - ## decodes a string in base64 representation back into its original form. - ## Whitespace is skipped. - const Whitespace = {' ', '\t', '\v', '\r', '\l', '\f'} - var total = ((len(s) + 3) div 4) * 3 - # total is an upper bound, as we will skip arbitrary whitespace: - result = newString(total) - - var i = 0 - var r = 0 - while true: - while s[i] in Whitespace: inc(i) - if i < s.len-3: - var a = s[i].decodeByte - var b = s[i+1].decodeByte - var c = s[i+2].decodeByte - var d = s[i+3].decodeByte - - result[r] = chr((a shl 2) and 0xff or ((b shr 4) and 0x03)) - result[r+1] = chr((b shl 4) and 0xff or ((c shr 2) and 0x0F)) - result[r+2] = chr((c shl 6) and 0xff or (d and 0x3F)) - inc(r, 3) - inc(i, 4) - else: break - assert i == s.len - # adjust the length: - if i > 0 and s[i-1] == '=': - dec(r) - if i > 1 and s[i-2] == '=': dec(r) - setLen(result, r) - -when isMainModule: - assert encode("leasure.") == "bGVhc3VyZS4=" - assert encode("easure.") == "ZWFzdXJlLg==" - assert encode("asure.") == "YXN1cmUu" - assert encode("sure.") == "c3VyZS4=" - - const longText = """Man is distinguished, not only by his reason, but by this - singular passion from other animals, which is a lust of the mind, - that by a perseverance of delight in the continued and indefatigable - generation of knowledge, exceeds the short vehemence of any carnal - pleasure.""" - const tests = ["", "abc", "xyz", "man", "leasure.", "sure.", "easure.", - "asure.", longText] - for t in items(tests): - assert decode(encode(t)) == t + encodeInternal(s, cb64) + +proc encode*[T: byte|char](s: openArray[T], safe = false): string = + ## Encodes `s` into base64 representation. + ## + ## If `safe` is `true` then it will encode using the + ## URL-Safe and Filesystem-safe standard alphabet characters, + ## which substitutes `-` instead of `+` and `_` instead of `/`. + ## * https://en.wikipedia.org/wiki/Base64#URL_applications + ## * https://tools.ietf.org/html/rfc4648#page-7 + ## + ## **See also:** + ## * `decode proc<#decode,string>`_ for decoding a string + runnableExamples: + assert encode("Hello World") == "SGVsbG8gV29ybGQ=" + assert encode(['n', 'i', 'm']) == "bmlt" + assert encode(@['n', 'i', 'm']) == "bmlt" + assert encode([1'u8, 2, 3, 4, 5]) == "AQIDBAU=" + encodeImpl() + +proc encode*[T: SomeInteger and not byte](s: openArray[T], safe = false): string + {.deprecated: "use `byte` or `char` instead".} = + encodeImpl() + +proc encodeMime*(s: string, lineLen = 75.Positive, newLine = "\r\n", + safe = false): string = + ## Encodes `s` into base64 representation as lines. + ## Used in email MIME format, use `lineLen` and `newline`. + ## + ## This procedure encodes a string according to MIME spec. + ## + ## If `safe` is `true` then it will encode using the + ## URL-Safe and Filesystem-safe standard alphabet characters, + ## which substitutes `-` instead of `+` and `_` instead of `/`. + ## * https://en.wikipedia.org/wiki/Base64#URL_applications + ## * https://tools.ietf.org/html/rfc4648#page-7 + ## + ## **See also:** + ## * `encode proc<#encode,openArray[T]>`_ for encoding an openArray + ## * `decode proc<#decode,string>`_ for decoding a string + runnableExamples: + assert encodeMime("Hello World", 4, "\n") == "SGVs\nbG8g\nV29y\nbGQ=" + template cpy(l, src, idx) = + b = l + while i < b: + result[i] = src[idx] + inc i + inc idx + + if s.len == 0: return + let e = encode(s, safe) + if e.len <= lineLen or newLine.len == 0: + return e + result = newString(e.len + newLine.len * ((e.len div lineLen) - int(e.len mod lineLen == 0))) + var i, j, k, b: int + let nd = e.len - lineLen + while j < nd: + cpy(i + lineLen, e, j) + cpy(i + newLine.len, newLine, k) + k = 0 + cpy(result.len, e, j) + +proc initDecodeTable*(): array[256, char] = + # computes a decode table at compile time + for i in 0 ..< 256: + let ch = char(i) + var code = invalidChar + if ch >= 'A' and ch <= 'Z': code = i - 0x00000041 + if ch >= 'a' and ch <= 'z': code = i - 0x00000047 + if ch >= '0' and ch <= '9': code = i + 0x00000004 + if ch == '+' or ch == '-': code = 0x0000003E + if ch == '/' or ch == '_': code = 0x0000003F + result[i] = char(code) + +const + decodeTable = initDecodeTable() + +proc decode*(s: string): string = + ## Decodes string `s` in base64 representation back into its original form. + ## The initial whitespace is skipped. + ## + ## **See also:** + ## * `encode proc<#encode,openArray[T]>`_ for encoding an openarray + runnableExamples: + assert decode("SGVsbG8gV29ybGQ=") == "Hello World" + assert decode(" SGVsbG8gV29ybGQ=") == "Hello World" + if s.len == 0: return + + proc decodeSize(size: int): int = + return (size * 3 div 4) + 6 + + template inputChar(x: untyped) = + let x = int decodeTable[ord(s[inputIndex])] + if x == invalidChar: + raise newException(ValueError, + "Invalid base64 format character `" & s[inputIndex] & + "` (ord " & $s[inputIndex].ord & ") at location " & $inputIndex & ".") + inc inputIndex + + template outputChar(x: untyped) = + result[outputIndex] = char(x and 255) + inc outputIndex + # pre allocate output string once + result.setLen(decodeSize(s.len)) + var + inputIndex = 0 + outputIndex = 0 + inputLen = s.len + inputEnds = 0 + # strip trailing characters + while inputLen > 0 and s[inputLen - 1] in {'\n', '\r', ' ', '='}: + dec inputLen + # hot loop: read 4 characters at at time + inputEnds = inputLen - 4 + while inputIndex <= inputEnds: + while s[inputIndex] in {'\n', '\r', ' '}: + inc inputIndex + inputChar(a) + inputChar(b) + inputChar(c) + inputChar(d) + outputChar(a shl 2 or b shr 4) + outputChar(b shl 4 or c shr 2) + outputChar(c shl 6 or d shr 0) + # do the last 2 or 3 characters + var leftLen = abs((inputIndex - inputLen) mod 4) + if leftLen == 2: + inputChar(a) + inputChar(b) + outputChar(a shl 2 or b shr 4) + elif leftLen == 3: + inputChar(a) + inputChar(b) + inputChar(c) + outputChar(a shl 2 or b shr 4) + outputChar(b shl 4 or c shr 2) + result.setLen(outputIndex) diff --git a/lib/pure/basic2d.nim b/lib/pure/basic2d.nim deleted file mode 100644 index a344cd053..000000000 --- a/lib/pure/basic2d.nim +++ /dev/null @@ -1,855 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2013 Robert Persson -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -import math -import strutils - - -## Basic 2d support with vectors, points, matrices and some basic utilities. -## Vectors are implemented as direction vectors, ie. when transformed with a matrix -## the translation part of matrix is ignored. -## Operators `+` , `-` , `*` , `/` , `+=` , `-=` , `*=` and `/=` are implemented for vectors and scalars. -## -## Quick start example: -## -## # Create a matrix which first rotates, then scales and at last translates -## -## var m:TMatrix2d=rotate(DEG90) & scale(2.0) & move(100.0,200.0) -## -## # Create a 2d point at (100,0) and a vector (5,2) -## -## var pt:TPoint2d=point2d(100.0,0.0) -## -## var vec:TVector2d=vector2d(5.0,2.0) -## -## -## pt &= m # transforms pt in place -## -## var pt2:TPoint2d=pt & m #concatenates pt with m and returns a new point -## -## var vec2:TVector2d=vec & m #concatenates vec with m and returns a new vector - - -const - DEG360* = PI * 2.0 - ## 360 degrees in radians. - DEG270* = PI * 1.5 - ## 270 degrees in radians. - DEG180* = PI - ## 180 degrees in radians. - DEG90* = PI / 2.0 - ## 90 degrees in radians. - DEG60* = PI / 3.0 - ## 60 degrees in radians. - DEG45* = PI / 4.0 - ## 45 degrees in radians. - DEG30* = PI / 6.0 - ## 30 degrees in radians. - DEG15* = PI / 12.0 - ## 15 degrees in radians. - RAD2DEGCONST = 180.0 / PI - ## used internally by DegToRad and RadToDeg - -type - TMatrix2d* = object - ## Implements a row major 2d matrix, which means - ## transformations are applied the order they are concatenated. - ## The rightmost column of the 3x3 matrix is left out since normally - ## not used for geometric transformations in 2d. - ax*,ay*,bx*,by*,tx*,ty*:float - TPoint2d* = object - ## Implements a non-homegeneous 2d point stored as - ## an `x` coordinate and an `y` coordinate. - x*,y*:float - TVector2d* = object - ## Implements a 2d **direction vector** stored as - ## an `x` coordinate and an `y` coordinate. Direction vector means, - ## that when transforming a vector with a matrix, the translational - ## part of the matrix is ignored. - x*,y*:float - - - -# Some forward declarations... -proc matrix2d*(ax,ay,bx,by,tx,ty:float):TMatrix2d {.noInit.} - ## Creates a new matrix. - ## `ax`,`ay` is the local x axis - ## `bx`,`by` is the local y axis - ## `tx`,`ty` is the translation -proc vector2d*(x,y:float):TVector2d {.noInit,inline.} - ## Returns a new vector (`x`,`y`) -proc point2d*(x,y:float):TPoint2d {.noInit,inline.} - ## Returns a new point (`x`,`y`) - - - -let - IDMATRIX*:TMatrix2d=matrix2d(1.0,0.0,0.0,1.0,0.0,0.0) - ## Quick access to an identity matrix - ORIGO*:TPoint2d=point2d(0.0,0.0) - ## Quick acces to point (0,0) - XAXIS*:TVector2d=vector2d(1.0,0.0) - ## Quick acces to an 2d x-axis unit vector - YAXIS*:TVector2d=vector2d(0.0,1.0) - ## Quick acces to an 2d y-axis unit vector - - -# *************************************** -# Private utils -# *************************************** - -proc rtos(val:float):string= - return formatFloat(val,ffDefault,0) - -proc safeArccos(v:float):float= - ## assumes v is in range 0.0-1.0, but clamps - ## the value to avoid out of domain errors - ## due to rounding issues - return arccos(clamp(v,-1.0,1.0)) - - -template makeBinOpVector(s:expr)= - ## implements binary operators + , - , * and / for vectors - proc s*(a,b:TVector2d):TVector2d {.inline,noInit.} = vector2d(s(a.x,b.x),s(a.y,b.y)) - proc s*(a:TVector2d,b:float):TVector2d {.inline,noInit.} = vector2d(s(a.x,b),s(a.y,b)) - proc s*(a:float,b:TVector2d):TVector2d {.inline,noInit.} = vector2d(s(a,b.x),s(a,b.y)) - -template makeBinOpAssignVector(s:expr)= - ## implements inplace binary operators += , -= , /= and *= for vectors - proc s*(a:var TVector2d,b:TVector2d) {.inline.} = s(a.x,b.x) ; s(a.y,b.y) - proc s*(a:var TVector2d,b:float) {.inline.} = s(a.x,b) ; s(a.y,b) - - -# *************************************** -# TMatrix2d implementation -# *************************************** - -proc setElements*(t:var TMatrix2d,ax,ay,bx,by,tx,ty:float) {.inline.}= - ## Sets arbitrary elements in an existing matrix. - t.ax=ax - t.ay=ay - t.bx=bx - t.by=by - t.tx=tx - t.ty=ty - -proc matrix2d*(ax,ay,bx,by,tx,ty:float):TMatrix2d = - result.setElements(ax,ay,bx,by,tx,ty) - -proc `&`*(a,b:TMatrix2d):TMatrix2d {.noInit.} = #concatenate matrices - ## Concatenates matrices returning a new matrix. - - # | a.AX a.AY 0 | | b.AX b.AY 0 | - # | a.BX a.BY 0 | * | b.BX b.BY 0 | - # | a.TX a.TY 1 | | b.TX b.TY 1 | - result.setElements( - a.ax * b.ax + a.ay * b.bx, - a.ax * b.ay + a.ay * b.by, - a.bx * b.ax + a.by * b.bx, - a.bx * b.ay + a.by * b.by, - a.tx * b.ax + a.ty * b.bx + b.tx, - a.tx * b.ay + a.ty * b.by + b.ty) - - -proc scale*(s:float):TMatrix2d {.noInit.} = - ## Returns a new scale matrix. - result.setElements(s,0,0,s,0,0) - -proc scale*(s:float,org:TPoint2d):TMatrix2d {.noInit.} = - ## Returns a new scale matrix using, `org` as scale origin. - result.setElements(s,0,0,s,org.x-s*org.x,org.y-s*org.y) - -proc stretch*(sx,sy:float):TMatrix2d {.noInit.} = - ## Returns new a stretch matrix, which is a - ## scale matrix with non uniform scale in x and y. - result.setElements(sx,0,0,sy,0,0) - -proc stretch*(sx,sy:float,org:TPoint2d):TMatrix2d {.noInit.} = - ## Returns a new stretch matrix, which is a - ## scale matrix with non uniform scale in x and y. - ## `org` is used as stretch origin. - result.setElements(sx,0,0,sy,org.x-sx*org.x,org.y-sy*org.y) - -proc move*(dx,dy:float):TMatrix2d {.noInit.} = - ## Returns a new translation matrix. - result.setElements(1,0,0,1,dx,dy) - -proc move*(v:TVector2d):TMatrix2d {.noInit.} = - ## Returns a new translation matrix from a vector. - result.setElements(1,0,0,1,v.x,v.y) - -proc rotate*(rad:float):TMatrix2d {.noInit.} = - ## Returns a new rotation matrix, which - ## represents a rotation by `rad` radians - let - s=sin(rad) - c=cos(rad) - result.setElements(c,s,-s,c,0,0) - -proc rotate*(rad:float,org:TPoint2d):TMatrix2d {.noInit.} = - ## Returns a new rotation matrix, which - ## represents a rotation by `rad` radians around - ## the origin `org` - let - s=sin(rad) - c=cos(rad) - result.setElements(c,s,-s,c,org.x+s*org.y-c*org.x,org.y-c*org.y-s*org.x) - -proc mirror*(v:TVector2d):TMatrix2d {.noInit.} = - ## Returns a new mirror matrix, mirroring - ## around the line that passes through origo and - ## has the direction of `v` - let - sqx=v.x*v.x - sqy=v.y*v.y - nd=1.0/(sqx+sqy) #used to normalize invector - xy2=v.x*v.y*2.0*nd - sqd=nd*(sqx-sqy) - - if nd==Inf or nd==NegInf: - return IDMATRIX #mirroring around a zero vector is arbitrary=>just use identity - - result.setElements( - sqd,xy2, - xy2,-sqd, - 0.0,0.0) - -proc mirror*(org:TPoint2d,v:TVector2d):TMatrix2d {.noInit.} = - ## Returns a new mirror matrix, mirroring - ## around the line that passes through `org` and - ## has the direction of `v` - let - sqx=v.x*v.x - sqy=v.y*v.y - nd=1.0/(sqx+sqy) #used to normalize invector - xy2=v.x*v.y*2.0*nd - sqd=nd*(sqx-sqy) - - if nd==Inf or nd==NegInf: - return IDMATRIX #mirroring around a zero vector is arbitrary=>just use identity - - result.setElements( - sqd,xy2, - xy2,-sqd, - org.x-org.y*xy2-org.x*sqd,org.y-org.x*xy2+org.y*sqd) - - - -proc skew*(xskew,yskew:float):TMatrix2d {.noInit.} = - ## Returns a new skew matrix, which has its - ## x axis rotated `xskew` radians from the local x axis, and - ## y axis rotated `yskew` radians from the local y axis - result.setElements(cos(yskew),sin(yskew),-sin(xskew),cos(xskew),0,0) - - -proc `$`* (t:TMatrix2d):string {.noInit.} = - ## Returns a string representation of the matrix - return rtos(t.ax) & "," & rtos(t.ay) & - "," & rtos(t.bx) & "," & rtos(t.by) & - "," & rtos(t.tx) & "," & rtos(t.ty) - -proc isUniform*(t:TMatrix2d,tol=1.0e-6):bool= - ## Checks if the transform is uniform, that is - ## perpendicular axes of equal length, which means (for example) - ## it cannot transform a circle into an ellipse. - ## `tol` is used as tolerance for both equal length comparison - ## and perp. comparison. - - #dot product=0 means perpendicular coord. system: - if abs(t.ax*t.bx+t.ay*t.by)<=tol: - #subtract squared lengths of axes to check if uniform scaling: - if abs((t.ax*t.ax+t.ay*t.ay)-(t.bx*t.bx+t.by*t.by))<=tol: - return true - return false - -proc determinant*(t:TMatrix2d):float= - ## Computes the determinant of the matrix. - - #NOTE: equivalent with perp.dot product for two 2d vectors - return t.ax*t.by-t.bx*t.ay - -proc isMirroring* (m:TMatrix2d):bool= - ## Checks if the `m` is a mirroring matrix, - ## which means it will reverse direction of a curve transformed with it - return m.determinant<0.0 - -proc inverse*(m:TMatrix2d):TMatrix2d {.noInit.} = - ## Returns a new matrix, which is the inverse of the matrix - ## If the matrix is not invertible (determinant=0), an EDivByZero - ## will be raised. - let d=m.determinant - if d==0.0: - raise newException(DivByZeroError,"Cannot invert a zero determinant matrix") - - result.setElements( - m.by/d,-m.ay/d, - -m.bx/d,m.ax/d, - (m.bx*m.ty-m.by*m.tx)/d, - (m.ay*m.tx-m.ax*m.ty)/d) - -proc equals*(m1:TMatrix2d,m2:TMatrix2d,tol=1.0e-6):bool= - ## Checks if all elements of `m1`and `m2` is equal within - ## a given tolerance `tol`. - return - abs(m1.ax-m2.ax)<=tol and - abs(m1.ay-m2.ay)<=tol and - abs(m1.bx-m2.bx)<=tol and - abs(m1.by-m2.by)<=tol and - abs(m1.tx-m2.tx)<=tol and - abs(m1.ty-m2.ty)<=tol - -proc `=~`*(m1,m2:TMatrix2d):bool= - ## Checks if `m1`and `m2` is approximately equal, using a - ## tolerance of 1e-6. - equals(m1,m2) - -proc isIdentity*(m:TMatrix2d,tol=1.0e-6):bool= - ## Checks is a matrix is approximately an identity matrix, - ## using `tol` as tolerance for each element. - return equals(m,IDMATRIX,tol) - -proc apply*(m:TMatrix2d,x,y:var float,translate=false)= - ## Applies transformation `m` onto `x`,`y`, optionally - ## using the translation part of the matrix. - if translate: # positional style transform - let newx=x*m.ax+y*m.bx+m.tx - y=x*m.ay+y*m.by+m.ty - x=newx - else: # delta style transform - let newx=x*m.ax+y*m.bx - y=x*m.ay+y*m.by - x=newx - - - -# *************************************** -# TVector2d implementation -# *************************************** -proc vector2d*(x,y:float):TVector2d = #forward decl. - result.x=x - result.y=y - -proc polarVector2d*(ang:float,len:float):TVector2d {.noInit.} = - ## Returns a new vector with angle `ang` and magnitude `len` - result.x=cos(ang)*len - result.y=sin(ang)*len - -proc slopeVector2d*(slope:float,len:float):TVector2d {.noInit.} = - ## Returns a new vector having slope (dy/dx) given by - ## `slope`, and a magnitude of `len` - let ang=arctan(slope) - result.x=cos(ang)*len - result.y=sin(ang)*len - -proc len*(v:TVector2d):float {.inline.}= - ## Returns the length of the vector. - sqrt(v.x*v.x+v.y*v.y) - -proc `len=`*(v:var TVector2d,newlen:float) {.noInit.} = - ## Sets the length of the vector, keeping its angle. - let fac=newlen/v.len - - if newlen==0.0: - v.x=0.0 - v.y=0.0 - return - - if fac==Inf or fac==NegInf: - #to short for float accuracy - #do as good as possible: - v.x=newlen - v.y=0.0 - else: - v.x*=fac - v.y*=fac - -proc sqrLen*(v:TVector2d):float {.inline.}= - ## Computes the squared length of the vector, which is - ## faster than computing the absolute length. - v.x*v.x+v.y*v.y - -proc angle*(v:TVector2d):float= - ## Returns the angle of the vector. - ## (The counter clockwise plane angle between posetive x axis and `v`) - result=arctan2(v.y,v.x) - if result<0.0: result+=DEG360 - -proc `$` *(v:TVector2d):string= - ## String representation of `v` - result=rtos(v.x) - result.add(",") - result.add(rtos(v.y)) - - -proc `&` *(v:TVector2d,m:TMatrix2d):TVector2d {.noInit.} = - ## Concatenate vector `v` with a transformation matrix. - ## Transforming a vector ignores the translational part - ## of the matrix. - - # | AX AY 0 | - # | X Y 1 | * | BX BY 0 | - # | 0 0 1 | - result.x=v.x*m.ax+v.y*m.bx - result.y=v.x*m.ay+v.y*m.by - - -proc `&=`*(v:var TVector2d,m:TMatrix2d) {.inline.}= - ## Applies transformation `m` onto `v` in place. - ## Transforming a vector ignores the translational part - ## of the matrix. - - # | AX AY 0 | - # | X Y 1 | * | BX BY 0 | - # | 0 0 1 | - let newx=v.x*m.ax+v.y*m.bx - v.y=v.x*m.ay+v.y*m.by - v.x=newx - - -proc tryNormalize*(v:var TVector2d):bool= - ## Modifies `v` to have a length of 1.0, keeping its angle. - ## If `v` has zero length (and thus no angle), it is left unmodified and - ## false is returned, otherwise true is returned. - - let mag=v.len - - if mag==0.0: - return false - - v.x/=mag - v.y/=mag - return true - - -proc normalize*(v:var TVector2d) {.inline.}= - ## Modifies `v` to have a length of 1.0, keeping its angle. - ## If `v` has zero length, an EDivByZero will be raised. - if not tryNormalize(v): - raise newException(DivByZeroError,"Cannot normalize zero length vector") - -proc transformNorm*(v:var TVector2d,t:TMatrix2d)= - ## Applies a normal direction transformation `t` onto `v` in place. - ## The resulting vector is *not* normalized. Transforming a vector ignores the - ## translational part of the matrix. If the matrix is not invertible - ## (determinant=0), an EDivByZero will be raised. - - # transforming a normal is done by transforming - # by the transpose of the inverse of the original matrix - # this can be heavily optimized by precompute and inline - # | | AX AY 0 | ^-1| ^T - # | X Y 1 | * | | BX BY 0 | | - # | | 0 0 1 | | - let d=t.determinant - if(d==0.0): - raise newException(DivByZeroError,"Matrix is not invertible") - let newx = (t.by*v.x-t.ay*v.y)/d - v.y = (t.ax*v.y-t.bx*v.x)/d - v.x = newx - -proc transformInv*(v:var TVector2d,t:TMatrix2d)= - ## Applies inverse of a transformation `t` to `v` in place. - ## This is faster than creating an inverse matrix and apply() it. - ## Transforming a vector ignores the translational part - ## of the matrix. If the matrix is not invertible (determinant=0), an EDivByZero - ## will be raised. - let d=t.determinant - - if(d==0.0): - raise newException(DivByZeroError,"Matrix is not invertible") - - let newx=(t.by*v.x-t.bx*v.y)/d - v.y = (t.ax*v.y-t.ay*v.x)/d - v.x = newx - -proc transformNormInv*(v:var TVector2d,t:TMatrix2d)= - ## Applies an inverse normal direction transformation `t` onto `v` in place. - ## This is faster than creating an inverse - ## matrix and transformNorm(...) it. Transforming a vector ignores the - ## translational part of the matrix. - - # normal inverse transform is done by transforming - # by the inverse of the transpose of the inverse of the org. matrix - # which is equivalent with transforming with the transpose. - # | | | AX AY 0 |^-1|^T|^-1 | AX BX 0 | - # | X Y 1 | * | | | BX BY 0 | | | = | X Y 1 | * | AY BY 0 | - # | | | 0 0 1 | | | | 0 0 1 | - # This can be heavily reduced to: - let newx=t.ay*v.y+t.ax*v.x - v.y=t.by*v.y+t.bx*v.x - v.x=newx - -proc rotate90*(v:var TVector2d) {.inline.}= - ## Quickly rotates vector `v` 90 degrees counter clockwise, - ## without using any trigonometrics. - swap(v.x,v.y) - v.x= -v.x - -proc rotate180*(v:var TVector2d){.inline.}= - ## Quickly rotates vector `v` 180 degrees counter clockwise, - ## without using any trigonometrics. - v.x= -v.x - v.y= -v.y - -proc rotate270*(v:var TVector2d) {.inline.}= - ## Quickly rotates vector `v` 270 degrees counter clockwise, - ## without using any trigonometrics. - swap(v.x,v.y) - v.y= -v.y - -proc rotate*(v:var TVector2d,rad:float) = - ## Rotates vector `v` `rad` radians in place. - let - s=sin(rad) - c=cos(rad) - newx=c*v.x-s*v.y - v.y=c*v.y+s*v.x - v.x=newx - -proc scale*(v:var TVector2d,fac:float){.inline.}= - ## Scales vector `v` `rad` radians in place. - v.x*=fac - v.y*=fac - -proc stretch*(v:var TVector2d,facx,facy:float){.inline.}= - ## Stretches vector `v` `facx` times horizontally, - ## and `facy` times vertically. - v.x*=facx - v.y*=facy - -proc mirror*(v:var TVector2d,mirrvec:TVector2d)= - ## Mirrors vector `v` using `mirrvec` as mirror direction. - let - sqx=mirrvec.x*mirrvec.x - sqy=mirrvec.y*mirrvec.y - nd=1.0/(sqx+sqy) #used to normalize invector - xy2=mirrvec.x*mirrvec.y*2.0*nd - sqd=nd*(sqx-sqy) - - if nd==Inf or nd==NegInf: - return #mirroring around a zero vector is arbitrary=>keep as is is fastest - - let newx=xy2*v.y+sqd*v.x - v.y=v.x*xy2-sqd*v.y - v.x=newx - - -proc `-` *(v:TVector2d):TVector2d= - ## Negates a vector - result.x= -v.x - result.y= -v.y - -# declare templated binary operators -makeBinOpVector(`+`) -makeBinOpVector(`-`) -makeBinOpVector(`*`) -makeBinOpVector(`/`) -makeBinOpAssignVector(`+=`) -makeBinOpAssignVector(`-=`) -makeBinOpAssignVector(`*=`) -makeBinOpAssignVector(`/=`) - - -proc dot*(v1,v2:TVector2d):float= - ## Computes the dot product of two vectors. - ## Returns 0.0 if the vectors are perpendicular. - return v1.x*v2.x+v1.y*v2.y - -proc cross*(v1,v2:TVector2d):float= - ## Computes the cross product of two vectors, also called - ## the 'perpendicular dot product' in 2d. Returns 0.0 if the vectors - ## are parallel. - return v1.x*v2.y-v1.y*v2.x - -proc equals*(v1,v2:TVector2d,tol=1.0e-6):bool= - ## Checks if two vectors approximately equals with a tolerance. - return abs(v2.x-v1.x)<=tol and abs(v2.y-v1.y)<=tol - -proc `=~` *(v1,v2:TVector2d):bool= - ## Checks if two vectors approximately equals with a - ## hardcoded tolerance 1e-6 - equals(v1,v2) - -proc angleTo*(v1,v2:TVector2d):float= - ## Returns the smallest of the two possible angles - ## between `v1` and `v2` in radians. - var - nv1=v1 - nv2=v2 - if not nv1.tryNormalize or not nv2.tryNormalize: - return 0.0 # zero length vector has zero angle to any other vector - return safeArccos(dot(nv1,nv2)) - -proc angleCCW*(v1,v2:TVector2d):float= - ## Returns the counter clockwise plane angle from `v1` to `v2`, - ## in range 0 - 2*PI - let a=v1.angleTo(v2) - if v1.cross(v2)>=0.0: - return a - return DEG360-a - -proc angleCW*(v1,v2:TVector2d):float= - ## Returns the clockwise plane angle from `v1` to `v2`, - ## in range 0 - 2*PI - let a=v1.angleTo(v2) - if v1.cross(v2)<=0.0: - return a - return DEG360-a - -proc turnAngle*(v1,v2:TVector2d):float= - ## Returns the amount v1 should be rotated (in radians) to equal v2, - ## in range -PI to PI - let a=v1.angleTo(v2) - if v1.cross(v2)<=0.0: - return -a - return a - -proc bisect*(v1,v2:TVector2d):TVector2d {.noInit.}= - ## Computes the bisector between v1 and v2 as a normalized vector. - ## If one of the input vectors has zero length, a normalized version - ## of the other is returned. If both input vectors has zero length, - ## an arbitrary normalized vector is returned. - var - vmag1=v1.len - vmag2=v2.len - - # zero length vector equals arbitrary vector, just change to magnitude to one to - # avoid zero division - if vmag1==0.0: - if vmag2==0: #both are zero length return any normalized vector - return XAXIS - vmag1=1.0 - if vmag2==0.0: vmag2=1.0 - - let - x1=v1.x/vmag1 - y1=v1.y/vmag1 - x2=v2.x/vmag2 - y2=v2.y/vmag2 - - result.x=(x1 + x2) * 0.5 - result.y=(y1 + y2) * 0.5 - - if not result.tryNormalize(): - # This can happen if vectors are colinear. In this special case - # there are actually two bisectors, we select just - # one of them (x1,y1 rotated 90 degrees ccw). - result.x = -y1 - result.y = x1 - - - -# *************************************** -# TPoint2d implementation -# *************************************** - -proc point2d*(x,y:float):TPoint2d = - result.x=x - result.y=y - -proc sqrDist*(a,b:TPoint2d):float= - ## Computes the squared distance between `a` and `b` - let dx=b.x-a.x - let dy=b.y-a.y - result=dx*dx+dy*dy - -proc dist*(a,b:TPoint2d):float {.inline.}= - ## Computes the absolute distance between `a` and `b` - result=sqrt(sqrDist(a,b)) - -proc angle*(a,b:TPoint2d):float= - ## Computes the angle of the vector `b`-`a` - let dx=b.x-a.x - let dy=b.y-a.y - result=arctan2(dy,dx) - if result<0: - result += DEG360 - -proc `$` *(p:TPoint2d):string= - ## String representation of `p` - result=rtos(p.x) - result.add(",") - result.add(rtos(p.y)) - -proc `&`*(p:TPoint2d,t:TMatrix2d):TPoint2d {.noInit,inline.} = - ## Concatenates a point `p` with a transform `t`, - ## resulting in a new, transformed point. - - # | AX AY 0 | - # | X Y 1 | * | BX BY 0 | - # | TX TY 1 | - result.x=p.x*t.ax+p.y*t.bx+t.tx - result.y=p.x*t.ay+p.y*t.by+t.ty - -proc `&=` *(p:var TPoint2d,t:TMatrix2d) {.inline.}= - ## Applies transformation `t` onto `p` in place. - let newx=p.x*t.ax+p.y*t.bx+t.tx - p.y=p.x*t.ay+p.y*t.by+t.ty - p.x=newx - - -proc transformInv*(p:var TPoint2d,t:TMatrix2d){.inline.}= - ## Applies the inverse of transformation `t` onto `p` in place. - ## If the matrix is not invertable (determinant=0) , EDivByZero will - ## be raised. - - # | AX AY 0 | ^-1 - # | X Y 1 | * | BX BY 0 | - # | TX TY 1 | - let d=t.determinant - if d==0.0: - raise newException(DivByZeroError,"Cannot invert a zero determinant matrix") - let - newx= (t.bx*t.ty-t.by*t.tx+p.x*t.by-p.y*t.bx)/d - p.y = -(t.ax*t.ty-t.ay*t.tx+p.x*t.ay-p.y*t.ax)/d - p.x=newx - - -proc `+`*(p:TPoint2d,v:TVector2d):TPoint2d {.noInit,inline.} = - ## Adds a vector `v` to a point `p`, resulting - ## in a new point. - result.x=p.x+v.x - result.y=p.y+v.y - -proc `+=`*(p:var TPoint2d,v:TVector2d) {.noInit,inline.} = - ## Adds a vector `v` to a point `p` in place. - p.x+=v.x - p.y+=v.y - -proc `-`*(p:TPoint2d,v:TVector2d):TPoint2d {.noInit,inline.} = - ## Subtracts a vector `v` from a point `p`, resulting - ## in a new point. - result.x=p.x-v.x - result.y=p.y-v.y - -proc `-`*(p1,p2:TPoint2d):TVector2d {.noInit,inline.} = - ## Subtracts `p2`from `p1` resulting in a difference vector. - result.x=p1.x-p2.x - result.y=p1.y-p2.y - -proc `-=`*(p:var TPoint2d,v:TVector2d) {.noInit,inline.} = - ## Subtracts a vector `v` from a point `p` in place. - p.x-=v.x - p.y-=v.y - -proc equals(p1,p2:TPoint2d,tol=1.0e-6):bool {.inline.}= - ## Checks if two points approximately equals with a tolerance. - return abs(p2.x-p1.x)<=tol and abs(p2.y-p1.y)<=tol - -proc `=~`*(p1,p2:TPoint2d):bool {.inline.}= - ## Checks if two vectors approximately equals with a - ## hardcoded tolerance 1e-6 - equals(p1,p2) - -proc polar*(p:TPoint2d,ang,dist:float):TPoint2d {.noInit.} = - ## Returns a point with a given angle and distance away from `p` - result.x=p.x+cos(ang)*dist - result.y=p.y+sin(ang)*dist - -proc rotate*(p:var TPoint2d,rad:float)= - ## Rotates a point in place `rad` radians around origo. - let - c=cos(rad) - s=sin(rad) - newx=p.x*c-p.y*s - p.y=p.y*c+p.x*s - p.x=newx - -proc rotate*(p:var TPoint2d,rad:float,org:TPoint2d)= - ## Rotates a point in place `rad` radians using `org` as - ## center of rotation. - let - c=cos(rad) - s=sin(rad) - newx=(p.x - org.x) * c - (p.y - org.y) * s + org.x - p.y=(p.y - org.y) * c + (p.x - org.x) * s + org.y - p.x=newx - -proc scale*(p:var TPoint2d,fac:float) {.inline.}= - ## Scales a point in place `fac` times with world origo as origin. - p.x*=fac - p.y*=fac - -proc scale*(p:var TPoint2d,fac:float,org:TPoint2d){.inline.}= - ## Scales the point in place `fac` times with `org` as origin. - p.x=(p.x - org.x) * fac + org.x - p.y=(p.y - org.y) * fac + org.y - -proc stretch*(p:var TPoint2d,facx,facy:float){.inline.}= - ## Scales a point in place non uniformly `facx` and `facy` times with - ## world origo as origin. - p.x*=facx - p.y*=facy - -proc stretch*(p:var TPoint2d,facx,facy:float,org:TPoint2d){.inline.}= - ## Scales the point in place non uniformly `facx` and `facy` times with - ## `org` as origin. - p.x=(p.x - org.x) * facx + org.x - p.y=(p.y - org.y) * facy + org.y - -proc move*(p:var TPoint2d,dx,dy:float){.inline.}= - ## Translates a point `dx`, `dy` in place. - p.x+=dx - p.y+=dy - -proc move*(p:var TPoint2d,v:TVector2d){.inline.}= - ## Translates a point with vector `v` in place. - p.x+=v.x - p.y+=v.y - -proc sgnArea*(a,b,c:TPoint2d):float= - ## Computes the signed area of the triangle thru points `a`,`b` and `c` - ## result>0.0 for counter clockwise triangle - ## result<0.0 for clockwise triangle - ## This is commonly used to determinate side of a point with respect to a line. - return ((b.x - c.x) * (b.y - a.y)-(b.y - c.y) * (b.x - a.x))*0.5 - -proc area*(a,b,c:TPoint2d):float= - ## Computes the area of the triangle thru points `a`,`b` and `c` - return abs(sgnArea(a,b,c)) - -proc closestPoint*(p:TPoint2d,pts:varargs[TPoint2d]):TPoint2d= - ## Returns a point selected from `pts`, that has the closest - ## euclidean distance to `p` - assert(pts.len>0) # must have at least one point - - var - bestidx=0 - bestdist=p.sqrDist(pts[0]) - curdist:float - - for idx in 1..high(pts): - curdist=p.sqrDist(pts[idx]) - if curdist<bestdist: - bestidx=idx - bestdist=curdist - - result=pts[bestidx] - - -# *************************************** -# Misc. math utilities that should -# probably be in another module. -# *************************************** -proc normAngle*(ang:float):float= - ## Returns an angle in radians, that is equal to `ang`, - ## but in the range 0 to <2*PI - if ang>=0.0 and ang<DEG360: - return ang - - return ang mod DEG360 - -proc degToRad*(deg:float):float {.inline.}= - ## converts `deg` degrees to radians - deg / RAD2DEGCONST - -proc radToDeg*(rad:float):float {.inline.}= - ## converts `rad` radians to degrees - rad * RAD2DEGCONST - - diff --git a/lib/pure/basic3d.nim b/lib/pure/basic3d.nim deleted file mode 100644 index 18ebed67b..000000000 --- a/lib/pure/basic3d.nim +++ /dev/null @@ -1,1040 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2013 Robert Persson -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -import math -import strutils -import times - - -## Basic 3d support with vectors, points, matrices and some basic utilities. -## Vectors are implemented as direction vectors, ie. when transformed with a matrix -## the translation part of matrix is ignored. The coordinate system used is -## right handed, because its compatible with 2d coordinate system (rotation around -## zaxis equals 2d rotation). -## Operators `+` , `-` , `*` , `/` , `+=` , `-=` , `*=` and `/=` are implemented -## for vectors and scalars. -## -## -## Quick start example: -## -## # Create a matrix which first rotates, then scales and at last translates -## -## var m:TMatrix3d=rotate(PI,vector3d(1,1,2.5)) & scale(2.0) & move(100.0,200.0,300.0) -## -## # Create a 3d point at (100,150,200) and a vector (5,2,3) -## -## var pt:TPoint3d=point3d(100.0,150.0,200.0) -## -## var vec:TVector3d=vector3d(5.0,2.0,3.0) -## -## -## pt &= m # transforms pt in place -## -## var pt2:TPoint3d=pt & m #concatenates pt with m and returns a new point -## -## var vec2:TVector3d=vec & m #concatenates vec with m and returns a new vector - - - -type - TMatrix3d* =object - ## Implements a row major 3d matrix, which means - ## transformations are applied the order they are concatenated. - ## This matrix is stored as an 4x4 matrix: - ## [ ax ay az aw ] - ## [ bx by bz bw ] - ## [ cx cy cz cw ] - ## [ tx ty tz tw ] - ax*,ay*,az*,aw*, bx*,by*,bz*,bw*, cx*,cy*,cz*,cw*, tx*,ty*,tz*,tw*:float - TPoint3d* = object - ## Implements a non-homegeneous 2d point stored as - ## an `x` , `y` and `z` coordinate. - x*,y*,z*:float - TVector3d* = object - ## Implements a 3d **direction vector** stored as - ## an `x` , `y` and `z` coordinate. Direction vector means, - ## that when transforming a vector with a matrix, the translational - ## part of the matrix is ignored. - x*,y*,z*:float - - - -# Some forward declarations -proc matrix3d*(ax,ay,az,aw,bx,by,bz,bw,cx,cy,cz,cw,tx,ty,tz,tw:float):TMatrix3d {.noInit.} - ## Creates a new 4x4 3d transformation matrix. - ## `ax` , `ay` , `az` is the local x axis. - ## `bx` , `by` , `bz` is the local y axis. - ## `cx` , `cy` , `cz` is the local z axis. - ## `tx` , `ty` , `tz` is the translation. -proc vector3d*(x,y,z:float):TVector3d {.noInit,inline.} - ## Returns a new 3d vector (`x`,`y`,`z`) -proc point3d*(x,y,z:float):TPoint3d {.noInit,inline.} - ## Returns a new 4d point (`x`,`y`,`z`) -proc tryNormalize*(v:var TVector3d):bool - ## Modifies `v` to have a length of 1.0, keeping its angle. - ## If `v` has zero length (and thus no angle), it is left unmodified and false is - ## returned, otherwise true is returned. - - - -let - IDMATRIX*:TMatrix3d=matrix3d( - 1.0,0.0,0.0,0.0, - 0.0,1.0,0.0,0.0, - 0.0,0.0,1.0,0.0, - 0.0,0.0,0.0,1.0) - ## Quick access to a 3d identity matrix - ORIGO*:TPoint3d=point3d(0.0,0.0,0.0) - ## Quick access to point (0,0) - XAXIS*:TVector3d=vector3d(1.0,0.0,0.0) - ## Quick access to an 3d x-axis unit vector - YAXIS*:TVector3d=vector3d(0.0,1.0,0.0) - ## Quick access to an 3d y-axis unit vector - ZAXIS*:TVector3d=vector3d(0.0,0.0,1.0) - ## Quick access to an 3d z-axis unit vector - - - -# *************************************** -# Private utils -# *************************************** - -proc rtos(val:float):string= - return formatFloat(val,ffDefault,0) - -proc safeArccos(v:float):float= - ## assumes v is in range 0.0-1.0, but clamps - ## the value to avoid out of domain errors - ## due to rounding issues - return arccos(clamp(v,-1.0,1.0)) - -template makeBinOpVector(s:expr)= - ## implements binary operators + , - , * and / for vectors - proc s*(a,b:TVector3d):TVector3d {.inline,noInit.} = - vector3d(s(a.x,b.x),s(a.y,b.y),s(a.z,b.z)) - proc s*(a:TVector3d,b:float):TVector3d {.inline,noInit.} = - vector3d(s(a.x,b),s(a.y,b),s(a.z,b)) - proc s*(a:float,b:TVector3d):TVector3d {.inline,noInit.} = - vector3d(s(a,b.x),s(a,b.y),s(a,b.z)) - -template makeBinOpAssignVector(s:expr)= - ## implements inplace binary operators += , -= , /= and *= for vectors - proc s*(a:var TVector3d,b:TVector3d) {.inline.} = - s(a.x,b.x) ; s(a.y,b.y) ; s(a.z,b.z) - proc s*(a:var TVector3d,b:float) {.inline.} = - s(a.x,b) ; s(a.y,b) ; s(a.z,b) - - - -# *************************************** -# TMatrix3d implementation -# *************************************** - -proc setElements*(t:var TMatrix3d,ax,ay,az,aw,bx,by,bz,bw,cx,cy,cz,cw,tx,ty,tz,tw:float) {.inline.}= - ## Sets arbitrary elements in an exisitng matrix. - t.ax=ax - t.ay=ay - t.az=az - t.aw=aw - t.bx=bx - t.by=by - t.bz=bz - t.bw=bw - t.cx=cx - t.cy=cy - t.cz=cz - t.cw=cw - t.tx=tx - t.ty=ty - t.tz=tz - t.tw=tw - -proc matrix3d*(ax,ay,az,aw,bx,by,bz,bw,cx,cy,cz,cw,tx,ty,tz,tw:float):TMatrix3d = - result.setElements(ax,ay,az,aw,bx,by,bz,bw,cx,cy,cz,cw,tx,ty,tz,tw) - -proc `&`*(a,b:TMatrix3d):TMatrix3d {.noinit.} = - ## Concatenates matrices returning a new matrix. - result.setElements( - a.aw*b.tx+a.az*b.cx+a.ay*b.bx+a.ax*b.ax, - a.aw*b.ty+a.az*b.cy+a.ay*b.by+a.ax*b.ay, - a.aw*b.tz+a.az*b.cz+a.ay*b.bz+a.ax*b.az, - a.aw*b.tw+a.az*b.cw+a.ay*b.bw+a.ax*b.aw, - - a.bw*b.tx+a.bz*b.cx+a.by*b.bx+a.bx*b.ax, - a.bw*b.ty+a.bz*b.cy+a.by*b.by+a.bx*b.ay, - a.bw*b.tz+a.bz*b.cz+a.by*b.bz+a.bx*b.az, - a.bw*b.tw+a.bz*b.cw+a.by*b.bw+a.bx*b.aw, - - a.cw*b.tx+a.cz*b.cx+a.cy*b.bx+a.cx*b.ax, - a.cw*b.ty+a.cz*b.cy+a.cy*b.by+a.cx*b.ay, - a.cw*b.tz+a.cz*b.cz+a.cy*b.bz+a.cx*b.az, - a.cw*b.tw+a.cz*b.cw+a.cy*b.bw+a.cx*b.aw, - - a.tw*b.tx+a.tz*b.cx+a.ty*b.bx+a.tx*b.ax, - a.tw*b.ty+a.tz*b.cy+a.ty*b.by+a.tx*b.ay, - a.tw*b.tz+a.tz*b.cz+a.ty*b.bz+a.tx*b.az, - a.tw*b.tw+a.tz*b.cw+a.ty*b.bw+a.tx*b.aw) - - -proc scale*(s:float):TMatrix3d {.noInit.} = - ## Returns a new scaling matrix. - result.setElements(s,0,0,0, 0,s,0,0, 0,0,s,0, 0,0,0,1) - -proc scale*(s:float,org:TPoint3d):TMatrix3d {.noInit.} = - ## Returns a new scaling matrix using, `org` as scale origin. - result.setElements(s,0,0,0, 0,s,0,0, 0,0,s,0, - org.x-s*org.x,org.y-s*org.y,org.z-s*org.z,1.0) - -proc stretch*(sx,sy,sz:float):TMatrix3d {.noInit.} = - ## Returns new a stretch matrix, which is a - ## scale matrix with non uniform scale in x,y and z. - result.setElements(sx,0,0,0, 0,sy,0,0, 0,0,sz,0, 0,0,0,1) - -proc stretch*(sx,sy,sz:float,org:TPoint3d):TMatrix3d {.noInit.} = - ## Returns a new stretch matrix, which is a - ## scale matrix with non uniform scale in x,y and z. - ## `org` is used as stretch origin. - result.setElements(sx,0,0,0, 0,sy,0,0, 0,0,sz,0, org.x-sx*org.x,org.y-sy*org.y,org.z-sz*org.z,1) - -proc move*(dx,dy,dz:float):TMatrix3d {.noInit.} = - ## Returns a new translation matrix. - result.setElements(1,0,0,0, 0,1,0,0, 0,0,1,0, dx,dy,dz,1) - -proc move*(v:TVector3d):TMatrix3d {.noInit.} = - ## Returns a new translation matrix from a vector. - result.setElements(1,0,0,0, 0,1,0,0, 0,0,1,0, v.x,v.y,v.z,1) - - -proc rotate*(angle:float,axis:TVector3d):TMatrix3d {.noInit.}= - ## Creates a rotation matrix that rotates `angle` radians over - ## `axis`, which passes through origo. - - # see PDF document http://inside.mines.edu/~gmurray/ArbitraryAxisRotation/ArbitraryAxisRotation.pdf - # for how this is computed - - var normax=axis - if not normax.tryNormalize: #simplifies matrix computation below a lot - raise newException(DivByZeroError,"Cannot rotate around zero length axis") - - let - cs=cos(angle) - si=sin(angle) - omc=1.0-cs - usi=normax.x*si - vsi=normax.y*si - wsi=normax.z*si - u2=normax.x*normax.x - v2=normax.y*normax.y - w2=normax.z*normax.z - uvomc=normax.x*normax.y*omc - uwomc=normax.x*normax.z*omc - vwomc=normax.y*normax.z*omc - - result.setElements( - u2+(1.0-u2)*cs, uvomc+wsi, uwomc-vsi, 0.0, - uvomc-wsi, v2+(1.0-v2)*cs, vwomc+usi, 0.0, - uwomc+vsi, vwomc-usi, w2+(1.0-w2)*cs, 0.0, - 0.0,0.0,0.0,1.0) - -proc rotate*(angle:float,org:TPoint3d,axis:TVector3d):TMatrix3d {.noInit.}= - ## Creates a rotation matrix that rotates `angle` radians over - ## `axis`, which passes through `org`. - - # see PDF document http://inside.mines.edu/~gmurray/ArbitraryAxisRotation/ArbitraryAxisRotation.pdf - # for how this is computed - - var normax=axis - if not normax.tryNormalize: #simplifies matrix computation below a lot - raise newException(DivByZeroError,"Cannot rotate around zero length axis") - - let - u=normax.x - v=normax.y - w=normax.z - u2=u*u - v2=v*v - w2=w*w - cs=cos(angle) - omc=1.0-cs - si=sin(angle) - a=org.x - b=org.y - c=org.z - usi=u*si - vsi=v*si - wsi=w*si - uvomc=normax.x*normax.y*omc - uwomc=normax.x*normax.z*omc - vwomc=normax.y*normax.z*omc - - result.setElements( - u2+(v2+w2)*cs, uvomc+wsi, uwomc-vsi, 0.0, - uvomc-wsi, v2+(u2+w2)*cs, vwomc+usi, 0.0, - uwomc+vsi, vwomc-usi, w2+(u2+v2)*cs, 0.0, - (a*(v2+w2)-u*(b*v+c*w))*omc+(b*w-c*v)*si, - (b*(u2+w2)-v*(a*u+c*w))*omc+(c*u-a*w)*si, - (c*(u2+v2)-w*(a*u+b*v))*omc+(a*v-b*u)*si,1.0) - - -proc rotateX*(angle:float):TMatrix3d {.noInit.}= - ## Creates a matrix that rotates around the x-axis with `angle` radians, - ## which is also called a 'roll' matrix. - let - c=cos(angle) - s=sin(angle) - result.setElements( - 1,0,0,0, - 0,c,s,0, - 0,-s,c,0, - 0,0,0,1) - -proc rotateY*(angle:float):TMatrix3d {.noInit.}= - ## Creates a matrix that rotates around the y-axis with `angle` radians, - ## which is also called a 'pitch' matrix. - let - c=cos(angle) - s=sin(angle) - result.setElements( - c,0,-s,0, - 0,1,0,0, - s,0,c,0, - 0,0,0,1) - -proc rotateZ*(angle:float):TMatrix3d {.noInit.}= - ## Creates a matrix that rotates around the z-axis with `angle` radians, - ## which is also called a 'yaw' matrix. - let - c=cos(angle) - s=sin(angle) - result.setElements( - c,s,0,0, - -s,c,0,0, - 0,0,1,0, - 0,0,0,1) - -proc isUniform*(m:TMatrix3d,tol=1.0e-6):bool= - ## Checks if the transform is uniform, that is - ## perpendicular axes of equal length, which means (for example) - ## it cannot transform a sphere into an ellipsoid. - ## `tol` is used as tolerance for both equal length comparison - ## and perpendicular comparison. - - #dot product=0 means perpendicular coord. system, check xaxis vs yaxis and xaxis vs zaxis - if abs(m.ax*m.bx+m.ay*m.by+m.az*m.bz)<=tol and # x vs y - abs(m.ax*m.cx+m.ay*m.cy+m.az*m.cz)<=tol and #x vs z - abs(m.bx*m.cx+m.by*m.cy+m.bz*m.cz)<=tol: #y vs z - - #subtract squared lengths of axes to check if uniform scaling: - let - sqxlen=(m.ax*m.ax+m.ay*m.ay+m.az*m.az) - sqylen=(m.bx*m.bx+m.by*m.by+m.bz*m.bz) - sqzlen=(m.cx*m.cx+m.cy*m.cy+m.cz*m.cz) - if abs(sqxlen-sqylen)<=tol and abs(sqxlen-sqzlen)<=tol: - return true - return false - - - -proc mirror*(planeperp:TVector3d):TMatrix3d {.noInit.}= - ## Creates a matrix that mirrors over the plane that has `planeperp` as normal, - ## and passes through origo. `planeperp` does not need to be normalized. - - # https://en.wikipedia.org/wiki/Transformation_matrix - var n=planeperp - if not n.tryNormalize: - raise newException(DivByZeroError,"Cannot mirror over a plane with a zero length normal") - - let - a=n.x - b=n.y - c=n.z - ab=a*b - ac=a*c - bc=b*c - - result.setElements( - 1-2*a*a , -2*ab,-2*ac,0, - -2*ab , 1-2*b*b, -2*bc, 0, - -2*ac, -2*bc, 1-2*c*c,0, - 0,0,0,1) - - -proc mirror*(org:TPoint3d,planeperp:TVector3d):TMatrix3d {.noInit.}= - ## Creates a matrix that mirrors over the plane that has `planeperp` as normal, - ## and passes through `org`. `planeperp` does not need to be normalized. - - # constructs a mirror M like the simpler mirror matrix constructor - # above but premultiplies with the inverse traslation of org - # and postmultiplies with the translation of org. - # With some fiddling this becomes reasonably simple: - var n=planeperp - if not n.tryNormalize: - raise newException(DivByZeroError,"Cannot mirror over a plane with a zero length normal") - - let - a=n.x - b=n.y - c=n.z - ab=a*b - ac=a*c - bc=b*c - aa=a*a - bb=b*b - cc=c*c - tx=org.x - ty=org.y - tz=org.z - - result.setElements( - 1-2*aa , -2*ab,-2*ac,0, - -2*ab , 1-2*bb, -2*bc, 0, - -2*ac, -2*bc, 1-2*cc,0, - 2*(ac*tz+ab*ty+aa*tx), - 2*(bc*tz+bb*ty+ab*tx), - 2*(cc*tz+bc*ty+ac*tx) ,1) - - -proc determinant*(m:TMatrix3d):float= - ## Computes the determinant of matrix `m`. - - # This computation is gotten from ratsimp(optimize(determinant(m))) - # in maxima CAS - let - O1=m.cx*m.tw-m.cw*m.tx - O2=m.cy*m.tw-m.cw*m.ty - O3=m.cx*m.ty-m.cy*m.tx - O4=m.cz*m.tw-m.cw*m.tz - O5=m.cx*m.tz-m.cz*m.tx - O6=m.cy*m.tz-m.cz*m.ty - - return (O1*m.ay-O2*m.ax-O3*m.aw)*m.bz+ - (-O1*m.az+O4*m.ax+O5*m.aw)*m.by+ - (O2*m.az-O4*m.ay-O6*m.aw)*m.bx+ - (O3*m.az-O5*m.ay+O6*m.ax)*m.bw - - -proc inverse*(m:TMatrix3d):TMatrix3d {.noInit.}= - ## Computes the inverse of matrix `m`. If the matrix - ## determinant is zero, thus not invertible, a EDivByZero - ## will be raised. - - # this computation comes from optimize(invert(m)) in maxima CAS - - let - det=m.determinant - O2=m.cy*m.tw-m.cw*m.ty - O3=m.cz*m.tw-m.cw*m.tz - O4=m.cy*m.tz-m.cz*m.ty - O5=m.by*m.tw-m.bw*m.ty - O6=m.bz*m.tw-m.bw*m.tz - O7=m.by*m.tz-m.bz*m.ty - O8=m.by*m.cw-m.bw*m.cy - O9=m.bz*m.cw-m.bw*m.cz - O10=m.by*m.cz-m.bz*m.cy - O11=m.cx*m.tw-m.cw*m.tx - O12=m.cx*m.tz-m.cz*m.tx - O13=m.bx*m.tw-m.bw*m.tx - O14=m.bx*m.tz-m.bz*m.tx - O15=m.bx*m.cw-m.bw*m.cx - O16=m.bx*m.cz-m.bz*m.cx - O17=m.cx*m.ty-m.cy*m.tx - O18=m.bx*m.ty-m.by*m.tx - O19=m.bx*m.cy-m.by*m.cx - - if det==0.0: - raise newException(DivByZeroError,"Cannot normalize zero length vector") - - result.setElements( - (m.bw*O4+m.by*O3-m.bz*O2)/det , (-m.aw*O4-m.ay*O3+m.az*O2)/det, - (m.aw*O7+m.ay*O6-m.az*O5)/det , (-m.aw*O10-m.ay*O9+m.az*O8)/det, - (-m.bw*O12-m.bx*O3+m.bz*O11)/det , (m.aw*O12+m.ax*O3-m.az*O11)/det, - (-m.aw*O14-m.ax*O6+m.az*O13)/det , (m.aw*O16+m.ax*O9-m.az*O15)/det, - (m.bw*O17+m.bx*O2-m.by*O11)/det , (-m.aw*O17-m.ax*O2+m.ay*O11)/det, - (m.aw*O18+m.ax*O5-m.ay*O13)/det , (-m.aw*O19-m.ax*O8+m.ay*O15)/det, - (-m.bx*O4+m.by*O12-m.bz*O17)/det , (m.ax*O4-m.ay*O12+m.az*O17)/det, - (-m.ax*O7+m.ay*O14-m.az*O18)/det , (m.ax*O10-m.ay*O16+m.az*O19)/det) - - -proc equals*(m1:TMatrix3d,m2:TMatrix3d,tol=1.0e-6):bool= - ## Checks if all elements of `m1`and `m2` is equal within - ## a given tolerance `tol`. - return - abs(m1.ax-m2.ax)<=tol and - abs(m1.ay-m2.ay)<=tol and - abs(m1.az-m2.az)<=tol and - abs(m1.aw-m2.aw)<=tol and - abs(m1.bx-m2.bx)<=tol and - abs(m1.by-m2.by)<=tol and - abs(m1.bz-m2.bz)<=tol and - abs(m1.bw-m2.bw)<=tol and - abs(m1.cx-m2.cx)<=tol and - abs(m1.cy-m2.cy)<=tol and - abs(m1.cz-m2.cz)<=tol and - abs(m1.cw-m2.cw)<=tol and - abs(m1.tx-m2.tx)<=tol and - abs(m1.ty-m2.ty)<=tol and - abs(m1.tz-m2.tz)<=tol and - abs(m1.tw-m2.tw)<=tol - -proc `=~`*(m1,m2:TMatrix3d):bool= - ## Checks if `m1` and `m2` is approximately equal, using a - ## tolerance of 1e-6. - equals(m1,m2) - -proc transpose*(m:TMatrix3d):TMatrix3d {.noInit.}= - ## Returns the transpose of `m` - result.setElements(m.ax,m.bx,m.cx,m.tx,m.ay,m.by,m.cy,m.ty,m.az,m.bz,m.cz,m.tz,m.aw,m.bw,m.cw,m.tw) - -proc getXAxis*(m:TMatrix3d):TVector3d {.noInit.}= - ## Gets the local x axis of `m` - result.x=m.ax - result.y=m.ay - result.z=m.az - -proc getYAxis*(m:TMatrix3d):TVector3d {.noInit.}= - ## Gets the local y axis of `m` - result.x=m.bx - result.y=m.by - result.z=m.bz - -proc getZAxis*(m:TMatrix3d):TVector3d {.noInit.}= - ## Gets the local y axis of `m` - result.x=m.cx - result.y=m.cy - result.z=m.cz - - -proc `$`*(m:TMatrix3d):string= - ## String representation of `m` - return rtos(m.ax) & "," & rtos(m.ay) & "," &rtos(m.az) & "," & rtos(m.aw) & - "\n" & rtos(m.bx) & "," & rtos(m.by) & "," &rtos(m.bz) & "," & rtos(m.bw) & - "\n" & rtos(m.cx) & "," & rtos(m.cy) & "," &rtos(m.cz) & "," & rtos(m.cw) & - "\n" & rtos(m.tx) & "," & rtos(m.ty) & "," &rtos(m.tz) & "," & rtos(m.tw) - -proc apply*(m:TMatrix3d, x,y,z:var float, translate=false)= - ## Applies transformation `m` onto `x` , `y` , `z` , optionally - ## using the translation part of the matrix. - let - oldx=x - oldy=y - oldz=z - - x=m.cx*oldz+m.bx*oldy+m.ax*oldx - y=m.cy*oldz+m.by*oldy+m.ay*oldx - z=m.cz*oldz+m.bz*oldy+m.az*oldx - - if translate: - x+=m.tx - y+=m.ty - z+=m.tz - -# *************************************** -# TVector3d implementation -# *************************************** -proc vector3d*(x,y,z:float):TVector3d= - result.x=x - result.y=y - result.z=z - -proc len*(v:TVector3d):float= - ## Returns the length of the vector `v`. - sqrt(v.x*v.x+v.y*v.y+v.z*v.z) - -proc `len=`*(v:var TVector3d,newlen:float) {.noInit.} = - ## Sets the length of the vector, keeping its direction. - ## If the vector has zero length before changing it's length, - ## an arbitrary vector of the requested length is returned. - - let fac=newlen/v.len - - if newlen==0.0: - v.x=0.0 - v.y=0.0 - v.z=0.0 - return - - if fac==Inf or fac==NegInf: - #to short for float accuracy - #do as good as possible: - v.x=newlen - v.y=0.0 - v.z=0.0 - else: - v.x*=fac - v.y*=fac - v.z*=fac - - -proc sqrLen*(v:TVector3d):float {.inline.}= - ## Computes the squared length of the vector, which is - ## faster than computing the absolute length. - return v.x*v.x+v.y*v.y+v.z*v.z - -proc `$` *(v:TVector3d):string= - ## String representation of `v` - result=rtos(v.x) - result.add(",") - result.add(rtos(v.y)) - result.add(",") - result.add(rtos(v.z)) - -proc `&` *(v:TVector3d,m:TMatrix3d):TVector3d {.noInit.} = - ## Concatenate vector `v` with a transformation matrix. - ## Transforming a vector ignores the translational part - ## of the matrix. - - # | AX AY AZ AW | - # | X Y Z 1 | * | BX BY BZ BW | - # | CX CY CZ CW | - # | 0 0 0 1 | - let - newx=m.cx*v.z+m.bx*v.y+m.ax*v.x - newy=m.cy*v.z+m.by*v.y+m.ay*v.x - result.z=m.cz*v.z+m.bz*v.y+m.az*v.x - result.y=newy - result.x=newx - - -proc `&=` *(v:var TVector3d,m:TMatrix3d) {.noInit.} = - ## Applies transformation `m` onto `v` in place. - ## Transforming a vector ignores the translational part - ## of the matrix. - - # | AX AY AZ AW | - # | X Y Z 1 | * | BX BY BZ BW | - # | CX CY CZ CW | - # | 0 0 0 1 | - - let - newx=m.cx*v.z+m.bx*v.y+m.ax*v.x - newy=m.cy*v.z+m.by*v.y+m.ay*v.x - v.z=m.cz*v.z+m.bz*v.y+m.az*v.x - v.y=newy - v.x=newx - -proc transformNorm*(v:var TVector3d,m:TMatrix3d)= - ## Applies a normal direction transformation `m` onto `v` in place. - ## The resulting vector is *not* normalized. Transforming a vector ignores the - ## translational part of the matrix. If the matrix is not invertible - ## (determinant=0), an EDivByZero will be raised. - - # transforming a normal is done by transforming - # by the transpose of the inverse of the original matrix - - # Major reason this simple function is here is that this function can be optimized in the future, - # (possibly by hardware) as well as having a consistent API with the 2d version. - v&=transpose(inverse(m)) - -proc transformInv*(v:var TVector3d,m:TMatrix3d)= - ## Applies the inverse of `m` on vector `v`. Transforming a vector ignores - ## the translational part of the matrix. Transforming a vector ignores the - ## translational part of the matrix. - ## If the matrix is not invertible (determinant=0), an EDivByZero - ## will be raised. - - # Major reason this simple function is here is that this function can be optimized in the future, - # (possibly by hardware) as well as having a consistent API with the 2d version. - v&=m.inverse - -proc transformNormInv*(vec:var TVector3d,m:TMatrix3d)= - ## Applies an inverse normal direction transformation `m` onto `v` in place. - ## This is faster than creating an inverse - ## matrix and transformNorm(...) it. Transforming a vector ignores the - ## translational part of the matrix. - - # see vector2d:s equivalent for a deeper look how/why this works - vec&=m.transpose - -proc tryNormalize*(v:var TVector3d):bool= - ## Modifies `v` to have a length of 1.0, keeping its angle. - ## If `v` has zero length (and thus no angle), it is left unmodified and false is - ## returned, otherwise true is returned. - let mag=v.len - - if mag==0.0: - return false - - v.x/=mag - v.y/=mag - v.z/=mag - - return true - -proc normalize*(v:var TVector3d) {.inline.}= - ## Modifies `v` to have a length of 1.0, keeping its angle. - ## If `v` has zero length, an EDivByZero will be raised. - if not tryNormalize(v): - raise newException(DivByZeroError,"Cannot normalize zero length vector") - -proc rotate*(vec:var TVector3d,angle:float,axis:TVector3d)= - ## Rotates `vec` in place, with `angle` radians over `axis`, which passes - ## through origo. - - # see PDF document http://inside.mines.edu/~gmurray/ArbitraryAxisRotation/ArbitraryAxisRotation.pdf - # for how this is computed - - var normax=axis - if not normax.tryNormalize: - raise newException(DivByZeroError,"Cannot rotate around zero length axis") - - let - cs=cos(angle) - si=sin(angle) - omc=1.0-cs - u=normax.x - v=normax.y - w=normax.z - x=vec.x - y=vec.y - z=vec.z - uxyzomc=(u*x+v*y+w*z)*omc - - vec.x=u*uxyzomc+x*cs+(v*z-w*y)*si - vec.y=v*uxyzomc+y*cs+(w*x-u*z)*si - vec.z=w*uxyzomc+z*cs+(u*y-v*x)*si - -proc scale*(v:var TVector3d,s:float)= - ## Scales the vector in place with factor `s` - v.x*=s - v.y*=s - v.z*=s - -proc stretch*(v:var TVector3d,sx,sy,sz:float)= - ## Scales the vector non uniformly with factors `sx` , `sy` , `sz` - v.x*=sx - v.y*=sy - v.z*=sz - -proc mirror*(v:var TVector3d,planeperp:TVector3d)= - ## Computes the mirrored vector of `v` over the plane - ## that has `planeperp` as normal direction. - ## `planeperp` does not need to be normalized. - - var n=planeperp - n.normalize - - let - x=v.x - y=v.y - z=v.z - a=n.x - b=n.y - c=n.z - ac=a*c - ab=a*b - bc=b*c - - v.x= -2*(ac*z+ab*y+a*a*x)+x - v.y= -2*(bc*z+b*b*y+ab*x)+y - v.z= -2*(c*c*z+bc*y+ac*x)+z - - -proc `-` *(v:TVector3d):TVector3d= - ## Negates a vector - result.x= -v.x - result.y= -v.y - result.z= -v.z - -# declare templated binary operators -makeBinOpVector(`+`) -makeBinOpVector(`-`) -makeBinOpVector(`*`) -makeBinOpVector(`/`) -makeBinOpAssignVector(`+=`) -makeBinOpAssignVector(`-=`) -makeBinOpAssignVector(`*=`) -makeBinOpAssignVector(`/=`) - -proc dot*(v1,v2:TVector3d):float {.inline.}= - ## Computes the dot product of two vectors. - ## Returns 0.0 if the vectors are perpendicular. - return v1.x*v2.x+v1.y*v2.y+v1.z*v2.z - -proc cross*(v1,v2:TVector3d):TVector3d {.inline.}= - ## Computes the cross product of two vectors. - ## The result is a vector which is perpendicular - ## to the plane of `v1` and `v2`, which means - ## cross(xaxis,yaxis)=zaxis. The magnitude of the result is - ## zero if the vectors are colinear. - result.x = (v1.y * v2.z) - (v2.y * v1.z) - result.y = (v1.z * v2.x) - (v2.z * v1.x) - result.z = (v1.x * v2.y) - (v2.x * v1.y) - -proc equals*(v1,v2:TVector3d,tol=1.0e-6):bool= - ## Checks if two vectors approximately equals with a tolerance. - return abs(v2.x-v1.x)<=tol and abs(v2.y-v1.y)<=tol and abs(v2.z-v1.z)<=tol - -proc `=~` *(v1,v2:TVector3d):bool= - ## Checks if two vectors approximately equals with a - ## hardcoded tolerance 1e-6 - equals(v1,v2) - -proc angleTo*(v1,v2:TVector3d):float= - ## Returns the smallest angle between v1 and v2, - ## which is in range 0-PI - var - nv1=v1 - nv2=v2 - if not nv1.tryNormalize or not nv2.tryNormalize: - return 0.0 # zero length vector has zero angle to any other vector - return safeArccos(dot(nv1,nv2)) - -proc arbitraryAxis*(norm:TVector3d):TMatrix3d {.noInit.}= - ## Computes the rotation matrix that would transform - ## world z vector into `norm`. The inverse of this matrix - ## is useful to transform a planar 3d object to 2d space. - ## This is the same algorithm used to interpret DXF and DWG files. - const lim=1.0/64.0 - var ax,ay,az:TVector3d - if abs(norm.x)<lim and abs(norm.y)<lim: - ax=cross(YAXIS,norm) - else: - ax=cross(ZAXIS,norm) - - ax.normalize() - ay=cross(norm,ax) - ay.normalize() - az=cross(ax,ay) - - result.setElements( - ax.x,ax.y,ax.z,0.0, - ay.x,ay.y,ay.z,0.0, - az.x,az.y,az.z,0.0, - 0.0,0.0,0.0,1.0) - -proc bisect*(v1,v2:TVector3d):TVector3d {.noInit.}= - ## Computes the bisector between v1 and v2 as a normalized vector. - ## If one of the input vectors has zero length, a normalized version - ## of the other is returned. If both input vectors has zero length, - ## an arbitrary normalized vector `v1` is returned. - var - vmag1=v1.len - vmag2=v2.len - - # zero length vector equals arbitrary vector, just change - # magnitude to one to avoid zero division - if vmag1==0.0: - if vmag2==0: #both are zero length return any normalized vector - return XAXIS - vmag1=1.0 - if vmag2==0.0: vmag2=1.0 - - let - x1=v1.x/vmag1 - y1=v1.y/vmag1 - z1=v1.z/vmag1 - x2=v2.x/vmag2 - y2=v2.y/vmag2 - z2=v2.z/vmag2 - - result.x=(x1 + x2) * 0.5 - result.y=(y1 + y2) * 0.5 - result.z=(z1 + z2) * 0.5 - - if not result.tryNormalize(): - # This can happen if vectors are colinear. In this special case - # there are actually inifinitely many bisectors, we select just - # one of them. - result=v1.cross(XAXIS) - if result.sqrLen<1.0e-9: - result=v1.cross(YAXIS) - if result.sqrLen<1.0e-9: - result=v1.cross(ZAXIS) # now we should be guaranteed to have succeeded - result.normalize - - - -# *************************************** -# TPoint3d implementation -# *************************************** -proc point3d*(x,y,z:float):TPoint3d= - result.x=x - result.y=y - result.z=z - -proc sqrDist*(a,b:TPoint3d):float= - ## Computes the squared distance between `a`and `b` - let dx=b.x-a.x - let dy=b.y-a.y - let dz=b.z-a.z - result=dx*dx+dy*dy+dz*dz - -proc dist*(a,b:TPoint3d):float {.inline.}= - ## Computes the absolute distance between `a`and `b` - result=sqrt(sqrDist(a,b)) - -proc `$` *(p:TPoint3d):string= - ## String representation of `p` - result=rtos(p.x) - result.add(",") - result.add(rtos(p.y)) - result.add(",") - result.add(rtos(p.z)) - -proc `&`*(p:TPoint3d,m:TMatrix3d):TPoint3d= - ## Concatenates a point `p` with a transform `m`, - ## resulting in a new, transformed point. - result.z=m.cz*p.z+m.bz*p.y+m.az*p.x+m.tz - result.y=m.cy*p.z+m.by*p.y+m.ay*p.x+m.ty - result.x=m.cx*p.z+m.bx*p.y+m.ax*p.x+m.tx - -proc `&=` *(p:var TPoint3d,m:TMatrix3d)= - ## Applies transformation `m` onto `p` in place. - let - x=p.x - y=p.y - z=p.z - p.x=m.cx*z+m.bx*y+m.ax*x+m.tx - p.y=m.cy*z+m.by*y+m.ay*x+m.ty - p.z=m.cz*z+m.bz*y+m.az*x+m.tz - -proc transformInv*(p:var TPoint3d,m:TMatrix3d)= - ## Applies the inverse of transformation `m` onto `p` in place. - ## If the matrix is not invertable (determinant=0) , EDivByZero will - ## be raised. - - # can possibly be more optimized in the future so use this function when possible - p&=inverse(m) - - -proc `+`*(p:TPoint3d,v:TVector3d):TPoint3d {.noInit,inline.} = - ## Adds a vector `v` to a point `p`, resulting - ## in a new point. - result.x=p.x+v.x - result.y=p.y+v.y - result.z=p.z+v.z - -proc `+=`*(p:var TPoint3d,v:TVector3d) {.noInit,inline.} = - ## Adds a vector `v` to a point `p` in place. - p.x+=v.x - p.y+=v.y - p.z+=v.z - -proc `-`*(p:TPoint3d,v:TVector3d):TPoint3d {.noInit,inline.} = - ## Subtracts a vector `v` from a point `p`, resulting - ## in a new point. - result.x=p.x-v.x - result.y=p.y-v.y - result.z=p.z-v.z - -proc `-`*(p1,p2:TPoint3d):TVector3d {.noInit,inline.} = - ## Subtracts `p2`from `p1` resulting in a difference vector. - result.x=p1.x-p2.x - result.y=p1.y-p2.y - result.z=p1.z-p2.z - -proc `-=`*(p:var TPoint3d,v:TVector3d) {.noInit,inline.} = - ## Subtracts a vector `v` from a point `p` in place. - p.x-=v.x - p.y-=v.y - p.z-=v.z - -proc equals(p1,p2:TPoint3d,tol=1.0e-6):bool {.inline.}= - ## Checks if two points approximately equals with a tolerance. - return abs(p2.x-p1.x)<=tol and abs(p2.y-p1.y)<=tol and abs(p2.z-p1.z)<=tol - -proc `=~`*(p1,p2:TPoint3d):bool {.inline.}= - ## Checks if two vectors approximately equals with a - ## hardcoded tolerance 1e-6 - equals(p1,p2) - -proc rotate*(p:var TPoint3d,rad:float,axis:TVector3d)= - ## Rotates point `p` in place `rad` radians about an axis - ## passing through origo. - - var v=vector3d(p.x,p.y,p.z) - v.rotate(rad,axis) # reuse this code here since doing the same thing and quite complicated - p.x=v.x - p.y=v.y - p.z=v.z - -proc rotate*(p:var TPoint3d,angle:float,org:TPoint3d,axis:TVector3d)= - ## Rotates point `p` in place `rad` radians about an axis - ## passing through `org` - - # see PDF document http://inside.mines.edu/~gmurray/ArbitraryAxisRotation/ArbitraryAxisRotation.pdf - # for how this is computed - - var normax=axis - normax.normalize - - let - cs=cos(angle) - omc=1.0-cs - si=sin(angle) - u=normax.x - v=normax.y - w=normax.z - a=org.x - b=org.y - c=org.z - x=p.x - y=p.y - z=p.z - uu=u*u - vv=v*v - ww=w*w - ux=u*p.x - vy=v*p.y - wz=w*p.z - au=a*u - bv=b*v - cw=c*w - uxmvymwz=ux-vy-wz - - p.x=(a*(vv+ww)-u*(bv+cw-uxmvymwz))*omc + x*cs + (b*w+v*z-c*v-w*y)*si - p.y=(b*(uu+ww)-v*(au+cw-uxmvymwz))*omc + y*cs + (c*u-a*w+w*x-u*z)*si - p.z=(c*(uu+vv)-w*(au+bv-uxmvymwz))*omc + z*cs + (a*v+u*y-b*u-v*x)*si - -proc scale*(p:var TPoint3d,fac:float) {.inline.}= - ## Scales a point in place `fac` times with world origo as origin. - p.x*=fac - p.y*=fac - p.z*=fac - -proc scale*(p:var TPoint3d,fac:float,org:TPoint3d){.inline.}= - ## Scales the point in place `fac` times with `org` as origin. - p.x=(p.x - org.x) * fac + org.x - p.y=(p.y - org.y) * fac + org.y - p.z=(p.z - org.z) * fac + org.z - -proc stretch*(p:var TPoint3d,facx,facy,facz:float){.inline.}= - ## Scales a point in place non uniformly `facx` , `facy` , `facz` times - ## with world origo as origin. - p.x*=facx - p.y*=facy - p.z*=facz - -proc stretch*(p:var TPoint3d,facx,facy,facz:float,org:TPoint3d){.inline.}= - ## Scales the point in place non uniformly `facx` , `facy` , `facz` times - ## with `org` as origin. - p.x=(p.x - org.x) * facx + org.x - p.y=(p.y - org.y) * facy + org.y - p.z=(p.z - org.z) * facz + org.z - - -proc move*(p:var TPoint3d,dx,dy,dz:float){.inline.}= - ## Translates a point `dx` , `dy` , `dz` in place. - p.x+=dx - p.y+=dy - p.z+=dz - -proc move*(p:var TPoint3d,v:TVector3d){.inline.}= - ## Translates a point with vector `v` in place. - p.x+=v.x - p.y+=v.y - p.z+=v.z - -proc area*(a,b,c:TPoint3d):float {.inline.}= - ## Computes the area of the triangle thru points `a` , `b` and `c` - - # The area of a planar 3d quadliteral is the magnitude of the cross - # product of two edge vectors. Taking this time 0.5 gives the triangle area. - return cross(b-a,c-a).len*0.5 - diff --git a/lib/pure/bitops.nim b/lib/pure/bitops.nim new file mode 100644 index 000000000..0d3351ee5 --- /dev/null +++ b/lib/pure/bitops.nim @@ -0,0 +1,883 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2017 Nim Authors +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a series of low level methods for bit manipulation. +## +## By default, compiler intrinsics are used where possible to improve performance +## on supported compilers: `GCC`, `LLVM_GCC`, `CLANG`, `VCC`, `ICC`. +## +## The module will fallback to pure nim procs in case the backend is not supported. +## You can also use the flag `noIntrinsicsBitOpts` to disable compiler intrinsics. +## +## This module is also compatible with other backends: `JavaScript`, `NimScript` +## as well as the `compiletime VM`. +## +## As a result of using optimized functions/intrinsics, some functions can return +## undefined results if the input is invalid. You can use the flag `noUndefinedBitOpts` +## to force predictable behaviour for all input, causing a small performance hit. +## +## At this time only `fastLog2`, `firstSetBit`, `countLeadingZeroBits` and `countTrailingZeroBits` +## may return undefined and/or platform dependent values if given invalid input. + +import std/macros +import std/private/since +from std/private/bitops_utils import forwardImpl, castToUnsigned + +func bitnot*[T: SomeInteger](x: T): T {.magic: "BitnotI".} + ## Computes the `bitwise complement` of the integer `x`. + +func internalBitand[T: SomeInteger](x, y: T): T {.magic: "BitandI".} + +func internalBitor[T: SomeInteger](x, y: T): T {.magic: "BitorI".} + +func internalBitxor[T: SomeInteger](x, y: T): T {.magic: "BitxorI".} + +macro bitand*[T: SomeInteger](x, y: T; z: varargs[T]): T = + ## Computes the `bitwise and` of all arguments collectively. + let fn = bindSym("internalBitand") + result = newCall(fn, x, y) + for extra in z: + result = newCall(fn, result, extra) + +macro bitor*[T: SomeInteger](x, y: T; z: varargs[T]): T = + ## Computes the `bitwise or` of all arguments collectively. + let fn = bindSym("internalBitor") + result = newCall(fn, x, y) + for extra in z: + result = newCall(fn, result, extra) + +macro bitxor*[T: SomeInteger](x, y: T; z: varargs[T]): T = + ## Computes the `bitwise xor` of all arguments collectively. + let fn = bindSym("internalBitxor") + result = newCall(fn, x, y) + for extra in z: + result = newCall(fn, result, extra) + + +type BitsRange*[T] = range[0..sizeof(T)*8-1] + ## A range with all bit positions for type `T`. + +template typeMasked[T: SomeInteger](x: T): T = + when defined(js): + T(x and ((0xffffffff_ffffffff'u shr (64 - sizeof(T) * 8)))) + else: + x + +func bitsliced*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} = + ## Returns an extracted (and shifted) slice of bits from `v`. + runnableExamples: + doAssert 0b10111.bitsliced(2 .. 4) == 0b101 + doAssert 0b11100.bitsliced(0 .. 2) == 0b100 + doAssert 0b11100.bitsliced(0 ..< 3) == 0b100 + + let + upmost = sizeof(T) * 8 - 1 + uv = v.castToUnsigned + ((uv shl (upmost - slice.b)).typeMasked shr (upmost - slice.b + slice.a)).T + +proc bitslice*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} = + ## Mutates `v` into an extracted (and shifted) slice of bits from `v`. + runnableExamples: + var x = 0b101110 + x.bitslice(2 .. 4) + doAssert x == 0b011 + + let + upmost = sizeof(T) * 8 - 1 + uv = v.castToUnsigned + v = ((uv shl (upmost - slice.b)).typeMasked shr (upmost - slice.b + slice.a)).T + +func toMask*[T: SomeInteger](slice: Slice[int]): T {.inline, since: (1, 3).} = + ## Creates a bitmask based on a slice of bits. + runnableExamples: + doAssert toMask[int32](1 .. 3) == 0b1110'i32 + doAssert toMask[int32](0 .. 3) == 0b1111'i32 + + let + upmost = sizeof(T) * 8 - 1 + bitmask = bitnot(0.T).castToUnsigned + ((bitmask shl (upmost - slice.b + slice.a)).typeMasked shr (upmost - slice.b)).T + +proc masked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} = + ## Returns `v`, with only the `1` bits from `mask` matching those of + ## `v` set to 1. + ## + ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation. + runnableExamples: + let v = 0b0000_0011'u8 + doAssert v.masked(0b0000_1010'u8) == 0b0000_0010'u8 + + bitand(v, mask) + +func masked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} = + ## Returns `v`, with only the `1` bits in the range of `slice` + ## matching those of `v` set to 1. + ## + ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation. + runnableExamples: + let v = 0b0000_1011'u8 + doAssert v.masked(1 .. 3) == 0b0000_1010'u8 + + bitand(v, toMask[T](slice)) + +proc mask*[T: SomeInteger](v: var T; mask: T) {.inline, since: (1, 3).} = + ## Mutates `v`, with only the `1` bits from `mask` matching those of + ## `v` set to 1. + ## + ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation. + runnableExamples: + var v = 0b0000_0011'u8 + v.mask(0b0000_1010'u8) + doAssert v == 0b0000_0010'u8 + + v = bitand(v, mask) + +proc mask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} = + ## Mutates `v`, with only the `1` bits in the range of `slice` + ## matching those of `v` set to 1. + ## + ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation. + runnableExamples: + var v = 0b0000_1011'u8 + v.mask(1 .. 3) + doAssert v == 0b0000_1010'u8 + + v = bitand(v, toMask[T](slice)) + +func setMasked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} = + ## Returns `v`, with all the `1` bits from `mask` set to 1. + ## + ## Effectively maps to a `bitor <#bitor.m,T,T,varargs[T]>`_ operation. + runnableExamples: + let v = 0b0000_0011'u8 + doAssert v.setMasked(0b0000_1010'u8) == 0b0000_1011'u8 + + bitor(v, mask) + +func setMasked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} = + ## Returns `v`, with all the `1` bits in the range of `slice` set to 1. + ## + ## Effectively maps to a `bitor <#bitor.m,T,T,varargs[T]>`_ operation. + runnableExamples: + let v = 0b0000_0011'u8 + doAssert v.setMasked(2 .. 3) == 0b0000_1111'u8 + + bitor(v, toMask[T](slice)) + +proc setMask*[T: SomeInteger](v: var T; mask: T) {.inline.} = + ## Mutates `v`, with all the `1` bits from `mask` set to 1. + ## + ## Effectively maps to a `bitor <#bitor.m,T,T,varargs[T]>`_ operation. + runnableExamples: + var v = 0b0000_0011'u8 + v.setMask(0b0000_1010'u8) + doAssert v == 0b0000_1011'u8 + + v = bitor(v, mask) + +proc setMask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} = + ## Mutates `v`, with all the `1` bits in the range of `slice` set to 1. + ## + ## Effectively maps to a `bitor <#bitor.m,T,T,varargs[T]>`_ operation. + runnableExamples: + var v = 0b0000_0011'u8 + v.setMask(2 .. 3) + doAssert v == 0b0000_1111'u8 + + v = bitor(v, toMask[T](slice)) + +func clearMasked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} = + ## Returns `v`, with all the `1` bits from `mask` set to 0. + ## + ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation + ## with an *inverted mask*. + runnableExamples: + let v = 0b0000_0011'u8 + doAssert v.clearMasked(0b0000_1010'u8) == 0b0000_0001'u8 + + bitand(v, bitnot(mask)) + +func clearMasked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} = + ## Returns `v`, with all the `1` bits in the range of `slice` set to 0. + ## + ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation + ## with an *inverted mask*. + runnableExamples: + let v = 0b0000_0011'u8 + doAssert v.clearMasked(1 .. 3) == 0b0000_0001'u8 + + bitand(v, bitnot(toMask[T](slice))) + +proc clearMask*[T: SomeInteger](v: var T; mask: T) {.inline.} = + ## Mutates `v`, with all the `1` bits from `mask` set to 0. + ## + ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation + ## with an *inverted mask*. + runnableExamples: + var v = 0b0000_0011'u8 + v.clearMask(0b0000_1010'u8) + doAssert v == 0b0000_0001'u8 + + v = bitand(v, bitnot(mask)) + +proc clearMask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} = + ## Mutates `v`, with all the `1` bits in the range of `slice` set to 0. + ## + ## Effectively maps to a `bitand <#bitand.m,T,T,varargs[T]>`_ operation + ## with an *inverted mask*. + runnableExamples: + var v = 0b0000_0011'u8 + v.clearMask(1 .. 3) + doAssert v == 0b0000_0001'u8 + + v = bitand(v, bitnot(toMask[T](slice))) + +func flipMasked*[T: SomeInteger](v, mask :T): T {.inline, since: (1, 3).} = + ## Returns `v`, with all the `1` bits from `mask` flipped. + ## + ## Effectively maps to a `bitxor <#bitxor.m,T,T,varargs[T]>`_ operation. + runnableExamples: + let v = 0b0000_0011'u8 + doAssert v.flipMasked(0b0000_1010'u8) == 0b0000_1001'u8 + + bitxor(v, mask) + +func flipMasked*[T: SomeInteger](v: T; slice: Slice[int]): T {.inline, since: (1, 3).} = + ## Returns `v`, with all the `1` bits in the range of `slice` flipped. + ## + ## Effectively maps to a `bitxor <#bitxor.m,T,T,varargs[T]>`_ operation. + runnableExamples: + let v = 0b0000_0011'u8 + doAssert v.flipMasked(1 .. 3) == 0b0000_1101'u8 + + bitxor(v, toMask[T](slice)) + +proc flipMask*[T: SomeInteger](v: var T; mask: T) {.inline.} = + ## Mutates `v`, with all the `1` bits from `mask` flipped. + ## + ## Effectively maps to a `bitxor <#bitxor.m,T,T,varargs[T]>`_ operation. + runnableExamples: + var v = 0b0000_0011'u8 + v.flipMask(0b0000_1010'u8) + doAssert v == 0b0000_1001'u8 + + v = bitxor(v, mask) + +proc flipMask*[T: SomeInteger](v: var T; slice: Slice[int]) {.inline, since: (1, 3).} = + ## Mutates `v`, with all the `1` bits in the range of `slice` flipped. + ## + ## Effectively maps to a `bitxor <#bitxor.m,T,T,varargs[T]>`_ operation. + runnableExamples: + var v = 0b0000_0011'u8 + v.flipMask(1 .. 3) + doAssert v == 0b0000_1101'u8 + + v = bitxor(v, toMask[T](slice)) + +proc setBit*[T: SomeInteger](v: var T; bit: BitsRange[T]) {.inline.} = + ## Mutates `v`, with the bit at position `bit` set to 1. + runnableExamples: + var v = 0b0000_0011'u8 + v.setBit(5'u8) + doAssert v == 0b0010_0011'u8 + + v.setMask(1.T shl bit) + +proc clearBit*[T: SomeInteger](v: var T; bit: BitsRange[T]) {.inline.} = + ## Mutates `v`, with the bit at position `bit` set to 0. + runnableExamples: + var v = 0b0000_0011'u8 + v.clearBit(1'u8) + doAssert v == 0b0000_0001'u8 + + v.clearMask(1.T shl bit) + +proc flipBit*[T: SomeInteger](v: var T; bit: BitsRange[T]) {.inline.} = + ## Mutates `v`, with the bit at position `bit` flipped. + runnableExamples: + var v = 0b0000_0011'u8 + v.flipBit(1'u8) + doAssert v == 0b0000_0001'u8 + + v = 0b0000_0011'u8 + v.flipBit(2'u8) + doAssert v == 0b0000_0111'u8 + + v.flipMask(1.T shl bit) + +macro setBits*(v: typed; bits: varargs[typed]): untyped = + ## Mutates `v`, with the bits at positions `bits` set to 1. + runnableExamples: + var v = 0b0000_0011'u8 + v.setBits(3, 5, 7) + doAssert v == 0b1010_1011'u8 + + bits.expectKind(nnkBracket) + result = newStmtList() + for bit in bits: + result.add newCall("setBit", v, bit) + +macro clearBits*(v: typed; bits: varargs[typed]): untyped = + ## Mutates `v`, with the bits at positions `bits` set to 0. + runnableExamples: + var v = 0b1111_1111'u8 + v.clearBits(1, 3, 5, 7) + doAssert v == 0b0101_0101'u8 + + bits.expectKind(nnkBracket) + result = newStmtList() + for bit in bits: + result.add newCall("clearBit", v, bit) + +macro flipBits*(v: typed; bits: varargs[typed]): untyped = + ## Mutates `v`, with the bits at positions `bits` set to 0. + runnableExamples: + var v = 0b0000_1111'u8 + v.flipBits(1, 3, 5, 7) + doAssert v == 0b1010_0101'u8 + + bits.expectKind(nnkBracket) + result = newStmtList() + for bit in bits: + result.add newCall("flipBit", v, bit) + + +proc testBit*[T: SomeInteger](v: T; bit: BitsRange[T]): bool {.inline.} = + ## Returns true if the bit in `v` at positions `bit` is set to 1. + runnableExamples: + let v = 0b0000_1111'u8 + doAssert v.testBit(0) + doAssert not v.testBit(7) + + let mask = 1.T shl bit + return (v and mask) == mask + +# #### Pure Nim version #### + +func firstSetBitNim(x: uint32): int {.inline.} = + ## Returns the 1-based index of the least significant set bit of x, or if x is zero, returns zero. + # https://graphics.stanford.edu/%7Eseander/bithacks.html#ZerosOnRightMultLookup + const lookup: array[32, uint8] = [0'u8, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, + 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9] + let v = x.uint32 + let k = not v + 1 # get two's complement # cast[uint32](-cast[int32](v)) + result = 1 + lookup[uint32((v and k) * 0x077CB531'u32) shr 27].int + +func firstSetBitNim(x: uint64): int {.inline.} = + ## Returns the 1-based index of the least significant set bit of x, or if x is zero, returns zero. + # https://graphics.stanford.edu/%7Eseander/bithacks.html#ZerosOnRightMultLookup + let v = uint64(x) + var k = uint32(v and 0xFFFFFFFF'u32) + if k == 0: + k = uint32(v shr 32'u32) and 0xFFFFFFFF'u32 + result = 32 + else: + result = 0 + result += firstSetBitNim(k) + +func fastlog2Nim(x: uint32): int {.inline.} = + ## Quickly find the log base 2 of a 32-bit or less integer. + # https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn + # https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers + const lookup: array[32, uint8] = [0'u8, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, + 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31] + var v = x.uint32 + v = v or v shr 1 # first round down to one less than a power of 2 + v = v or v shr 2 + v = v or v shr 4 + v = v or v shr 8 + v = v or v shr 16 + result = lookup[uint32(v * 0x07C4ACDD'u32) shr 27].int + +func fastlog2Nim(x: uint64): int {.inline.} = + ## Quickly find the log base 2 of a 64-bit integer. + # https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn + # https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers + const lookup: array[64, uint8] = [0'u8, 58, 1, 59, 47, 53, 2, 60, 39, 48, 27, 54, + 33, 42, 3, 61, 51, 37, 40, 49, 18, 28, 20, 55, 30, 34, 11, 43, 14, 22, 4, 62, + 57, 46, 52, 38, 26, 32, 41, 50, 36, 17, 19, 29, 10, 13, 21, 56, 45, 25, 31, + 35, 16, 9, 12, 44, 24, 15, 8, 23, 7, 6, 5, 63] + var v = x.uint64 + v = v or v shr 1 # first round down to one less than a power of 2 + v = v or v shr 2 + v = v or v shr 4 + v = v or v shr 8 + v = v or v shr 16 + v = v or v shr 32 + result = lookup[(v * 0x03F6EAF2CD271461'u64) shr 58].int + +import system/countbits_impl + +const useBuiltinsRotate = (defined(amd64) or defined(i386)) and + (defined(gcc) or defined(clang) or defined(vcc) or + (defined(icl) and not defined(cpp))) and useBuiltins + +template parityImpl[T](value: T): int = + # formula id from: https://graphics.stanford.edu/%7Eseander/bithacks.html#ParityParallel + var v = value + when sizeof(T) == 8: + v = v xor (v shr 32) + when sizeof(T) >= 4: + v = v xor (v shr 16) + when sizeof(T) >= 2: + v = v xor (v shr 8) + v = v xor (v shr 4) + v = v and 0xf + ((0x6996'u shr v) and 1).int + + +when useGCC_builtins: + # Returns the bit parity in value + proc builtin_parity(x: cuint): cint {.importc: "__builtin_parity", cdecl.} + proc builtin_parityll(x: culonglong): cint {.importc: "__builtin_parityll", cdecl.} + + # Returns one plus the index of the least significant 1-bit of x, or if x is zero, returns zero. + proc builtin_ffs(x: cint): cint {.importc: "__builtin_ffs", cdecl.} + proc builtin_ffsll(x: clonglong): cint {.importc: "__builtin_ffsll", cdecl.} + + # Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined. + proc builtin_clz(x: cuint): cint {.importc: "__builtin_clz", cdecl.} + proc builtin_clzll(x: culonglong): cint {.importc: "__builtin_clzll", cdecl.} + + # Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined. + proc builtin_ctz(x: cuint): cint {.importc: "__builtin_ctz", cdecl.} + proc builtin_ctzll(x: culonglong): cint {.importc: "__builtin_ctzll", cdecl.} + +elif useVCC_builtins: + # Search the mask data from most significant bit (MSB) to least significant bit (LSB) for a set bit (1). + func bitScanReverse(index: ptr culong, mask: culong): uint8 {. + importc: "_BitScanReverse", header: "<intrin.h>".} + func bitScanReverse64(index: ptr culong, mask: uint64): uint8 {. + importc: "_BitScanReverse64", header: "<intrin.h>".} + + # Search the mask data from least significant bit (LSB) to the most significant bit (MSB) for a set bit (1). + func bitScanForward(index: ptr culong, mask: culong): uint8 {. + importc: "_BitScanForward", header: "<intrin.h>".} + func bitScanForward64(index: ptr culong, mask: uint64): uint8 {. + importc: "_BitScanForward64", header: "<intrin.h>".} + + template vcc_scan_impl(fnc: untyped; v: untyped): int = + var index {.inject.}: culong = 0 + discard fnc(index.addr, v) + index.int + +elif useICC_builtins: + # Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined. + func bitScanForward(p: ptr uint32, b: uint32): uint8 {. + importc: "_BitScanForward", header: "<immintrin.h>".} + func bitScanForward64(p: ptr uint32, b: uint64): uint8 {. + importc: "_BitScanForward64", header: "<immintrin.h>".} + + # Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined. + func bitScanReverse(p: ptr uint32, b: uint32): uint8 {. + importc: "_BitScanReverse", header: "<immintrin.h>".} + func bitScanReverse64(p: ptr uint32, b: uint64): uint8 {. + importc: "_BitScanReverse64", header: "<immintrin.h>".} + + template icc_scan_impl(fnc: untyped; v: untyped): int = + var index: uint32 + discard fnc(index.addr, v) + index.int + +func countSetBits*(x: SomeInteger): int {.inline.} = + ## Counts the set bits in an integer (also called `Hamming weight`:idx:). + runnableExamples: + doAssert countSetBits(0b0000_0011'u8) == 2 + doAssert countSetBits(0b1010_1010'u8) == 4 + + result = countSetBitsImpl(x) + +func popcount*(x: SomeInteger): int {.inline.} = + ## Alias for `countSetBits <#countSetBits,SomeInteger>`_ (Hamming weight). + result = countSetBits(x) + +func parityBits*(x: SomeInteger): int {.inline.} = + ## Calculate the bit parity in an integer. If the number of 1-bits + ## is odd, the parity is 1, otherwise 0. + runnableExamples: + doAssert parityBits(0b0000_0000'u8) == 0 + doAssert parityBits(0b0101_0001'u8) == 1 + doAssert parityBits(0b0110_1001'u8) == 0 + doAssert parityBits(0b0111_1111'u8) == 1 + + # Can be used a base if creating ASM version. + # https://stackoverflow.com/questions/21617970/how-to-check-if-value-has-even-parity-of-bits-or-odd + let x = x.castToUnsigned + when nimvm: + result = forwardImpl(parityImpl, x) + else: + when useGCC_builtins: + when sizeof(x) <= 4: result = builtin_parity(x.uint32).int + else: result = builtin_parityll(x.uint64).int + else: + when sizeof(x) <= 4: result = parityImpl(x.uint32) + else: result = parityImpl(x.uint64) + +func firstSetBit*(x: SomeInteger): int {.inline.} = + ## Returns the 1-based index of the least significant set bit of `x`. + ## If `x` is zero, when `noUndefinedBitOpts` is set, the result is 0, + ## otherwise the result is undefined. + runnableExamples: + doAssert firstSetBit(0b0000_0001'u8) == 1 + doAssert firstSetBit(0b0000_0010'u8) == 2 + doAssert firstSetBit(0b0000_0100'u8) == 3 + doAssert firstSetBit(0b0000_1000'u8) == 4 + doAssert firstSetBit(0b0000_1111'u8) == 1 + + # GCC builtin 'builtin_ffs' already handle zero input. + let x = x.castToUnsigned + when nimvm: + when noUndefined: + if x == 0: + return 0 + result = forwardImpl(firstSetBitNim, x) + else: + when noUndefined and not useGCC_builtins: + if x == 0: + return 0 + when useGCC_builtins: + when sizeof(x) <= 4: result = builtin_ffs(cast[cint](x.cuint)).int + else: result = builtin_ffsll(cast[clonglong](x.culonglong)).int + elif useVCC_builtins: + when sizeof(x) <= 4: + result = 1 + vcc_scan_impl(bitScanForward, x.culong) + elif arch64: + result = 1 + vcc_scan_impl(bitScanForward64, x.uint64) + else: + result = firstSetBitNim(x.uint64) + elif useICC_builtins: + when sizeof(x) <= 4: + result = 1 + icc_scan_impl(bitScanForward, x.uint32) + elif arch64: + result = 1 + icc_scan_impl(bitScanForward64, x.uint64) + else: + result = firstSetBitNim(x.uint64) + else: + when sizeof(x) <= 4: result = firstSetBitNim(x.uint32) + else: result = firstSetBitNim(x.uint64) + +func fastLog2*(x: SomeInteger): int {.inline.} = + ## Quickly find the log base 2 of an integer. + ## If `x` is zero, when `noUndefinedBitOpts` is set, the result is -1, + ## otherwise the result is undefined. + runnableExamples: + doAssert fastLog2(0b0000_0001'u8) == 0 + doAssert fastLog2(0b0000_0010'u8) == 1 + doAssert fastLog2(0b0000_0100'u8) == 2 + doAssert fastLog2(0b0000_1000'u8) == 3 + doAssert fastLog2(0b0000_1111'u8) == 3 + + let x = x.castToUnsigned + when noUndefined: + if x == 0: + return -1 + when nimvm: + result = forwardImpl(fastlog2Nim, x) + else: + when useGCC_builtins: + when sizeof(x) <= 4: result = 31 - builtin_clz(x.uint32).int + else: result = 63 - builtin_clzll(x.uint64).int + elif useVCC_builtins: + when sizeof(x) <= 4: + result = vcc_scan_impl(bitScanReverse, x.culong) + elif arch64: + result = vcc_scan_impl(bitScanReverse64, x.uint64) + else: + result = fastlog2Nim(x.uint64) + elif useICC_builtins: + when sizeof(x) <= 4: + result = icc_scan_impl(bitScanReverse, x.uint32) + elif arch64: + result = icc_scan_impl(bitScanReverse64, x.uint64) + else: + result = fastlog2Nim(x.uint64) + else: + when sizeof(x) <= 4: result = fastlog2Nim(x.uint32) + else: result = fastlog2Nim(x.uint64) + +func countLeadingZeroBits*(x: SomeInteger): int {.inline.} = + ## Returns the number of leading zero bits in an integer. + ## If `x` is zero, when `noUndefinedBitOpts` is set, the result is 0, + ## otherwise the result is undefined. + ## + ## **See also:** + ## * `countTrailingZeroBits proc <#countTrailingZeroBits,SomeInteger>`_ + runnableExamples: + doAssert countLeadingZeroBits(0b0000_0001'u8) == 7 + doAssert countLeadingZeroBits(0b0000_0010'u8) == 6 + doAssert countLeadingZeroBits(0b0000_0100'u8) == 5 + doAssert countLeadingZeroBits(0b0000_1000'u8) == 4 + doAssert countLeadingZeroBits(0b0000_1111'u8) == 4 + + let x = x.castToUnsigned + when noUndefined: + if x == 0: + return 0 + when nimvm: + result = sizeof(x)*8 - 1 - forwardImpl(fastlog2Nim, x) + else: + when useGCC_builtins: + when sizeof(x) <= 4: result = builtin_clz(x.uint32).int - (32 - sizeof(x)*8) + else: result = builtin_clzll(x.uint64).int + else: + when sizeof(x) <= 4: result = sizeof(x)*8 - 1 - fastlog2Nim(x.uint32) + else: result = sizeof(x)*8 - 1 - fastlog2Nim(x.uint64) + +func countTrailingZeroBits*(x: SomeInteger): int {.inline.} = + ## Returns the number of trailing zeros in an integer. + ## If `x` is zero, when `noUndefinedBitOpts` is set, the result is 0, + ## otherwise the result is undefined. + ## + ## **See also:** + ## * `countLeadingZeroBits proc <#countLeadingZeroBits,SomeInteger>`_ + runnableExamples: + doAssert countTrailingZeroBits(0b0000_0001'u8) == 0 + doAssert countTrailingZeroBits(0b0000_0010'u8) == 1 + doAssert countTrailingZeroBits(0b0000_0100'u8) == 2 + doAssert countTrailingZeroBits(0b0000_1000'u8) == 3 + doAssert countTrailingZeroBits(0b0000_1111'u8) == 0 + + let x = x.castToUnsigned + when noUndefined: + if x == 0: + return 0 + when nimvm: + result = firstSetBit(x) - 1 + else: + when useGCC_builtins: + when sizeof(x) <= 4: result = builtin_ctz(x.uint32).int + else: result = builtin_ctzll(x.uint64).int + else: + result = firstSetBit(x) - 1 + +when useBuiltinsRotate: + when defined(gcc): + # GCC was tested until version 4.8.1 and intrinsics were present. Not tested + # in previous versions. + func builtin_rotl8(value: uint8, shift: cint): uint8 + {.importc: "__rolb", header: "<x86intrin.h>".} + func builtin_rotl16(value: cushort, shift: cint): cushort + {.importc: "__rolw", header: "<x86intrin.h>".} + func builtin_rotl32(value: cuint, shift: cint): cuint + {.importc: "__rold", header: "<x86intrin.h>".} + when defined(amd64): + func builtin_rotl64(value: culonglong, shift: cint): culonglong + {.importc: "__rolq", header: "<x86intrin.h>".} + + func builtin_rotr8(value: uint8, shift: cint): uint8 + {.importc: "__rorb", header: "<x86intrin.h>".} + func builtin_rotr16(value: cushort, shift: cint): cushort + {.importc: "__rorw", header: "<x86intrin.h>".} + func builtin_rotr32(value: cuint, shift: cint): cuint + {.importc: "__rord", header: "<x86intrin.h>".} + when defined(amd64): + func builtin_rotr64(value: culonglong, shift: cint): culonglong + {.importc: "__rorq", header: "<x86intrin.h>".} + elif defined(clang): + # In CLANG, builtins have been present since version 8.0.0 and intrinsics + # since version 9.0.0. This implementation chose the builtins, as they have + # been around for longer. + # https://releases.llvm.org/8.0.0/tools/clang/docs/ReleaseNotes.html#non-comprehensive-list-of-changes-in-this-release + # https://releases.llvm.org/8.0.0/tools/clang/docs/LanguageExtensions.html#builtin-rotateleft + # source for correct declarations: https://github.com/llvm/llvm-project/blob/main/clang/include/clang/Basic/Builtins.def + func builtin_rotl8(value: uint8, shift: uint8): uint8 + {.importc: "__builtin_rotateleft8", nodecl.} + func builtin_rotl16(value: cushort, shift: cushort): cushort + {.importc: "__builtin_rotateleft16", nodecl.} + func builtin_rotl32(value: cuint, shift: cuint): cuint + {.importc: "__builtin_rotateleft32", nodecl.} + when defined(amd64): + func builtin_rotl64(value: culonglong, shift: culonglong): culonglong + {.importc: "__builtin_rotateleft64", nodecl.} + + func builtin_rotr8(value: uint8, shift: uint8): uint8 + {.importc: "__builtin_rotateright8", nodecl.} + func builtin_rotr16(value: cushort, shift: cushort): cushort + {.importc: "__builtin_rotateright16", nodecl.} + func builtin_rotr32(value: cuint, shift: cuint): cuint + {.importc: "__builtin_rotateright32", nodecl.} + when defined(amd64): + # shift is unsigned, refs https://github.com/llvm-mirror/clang/commit/892de415b7fde609dafc4e6c1643b7eaa0150a4d + func builtin_rotr64(value: culonglong, shift: culonglong): culonglong + {.importc: "__builtin_rotateright64", nodecl.} + elif defined(vcc): + # Tested on Microsoft (R) C/C++ Optimizing Compiler 19.28.29335 x64 and x86. + # Not tested in previous versions. + # https://docs.microsoft.com/en-us/cpp/intrinsics/rotl8-rotl16?view=msvc-160 + # https://docs.microsoft.com/en-us/cpp/intrinsics/rotr8-rotr16?view=msvc-160 + # https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/rotl-rotl64-rotr-rotr64?view=msvc-160 + func builtin_rotl8(value: uint8, shift: uint8): uint8 + {.importc: "_rotl8", header: "<intrin.h>".} + func builtin_rotl16(value: cushort, shift: uint8): cushort + {.importc: "_rotl16", header: "<intrin.h>".} + func builtin_rotl32(value: cuint, shift: cint): cuint + {.importc: "_rotl", header: "<stdlib.h>".} + when defined(amd64): + func builtin_rotl64(value: culonglong, shift: cint): culonglong + {.importc: "_rotl64", header: "<stdlib.h>".} + + func builtin_rotr8(value: uint8, shift: uint8): uint8 + {.importc: "_rotr8", header: "<intrin.h>".} + func builtin_rotr16(value: cushort, shift: uint8): cushort + {.importc: "_rotr16", header: "<intrin.h>".} + func builtin_rotr32(value: cuint, shift: cint): cuint + {.importc: "_rotr", header: "<stdlib.h>".} + when defined(amd64): + func builtin_rotr64(value: culonglong, shift: cint): culonglong + {.importc: "_rotr64", header: "<stdlib.h>".} + elif defined(icl): + # Tested on Intel(R) C++ Intel(R) 64 Compiler Classic Version 2021.1.2 Build + # 20201208_000000 x64 and x86. Not tested in previous versions. + func builtin_rotl8(value: uint8, shift: cint): uint8 + {.importc: "__rolb", header: "<immintrin.h>".} + func builtin_rotl16(value: cushort, shift: cint): cushort + {.importc: "__rolw", header: "<immintrin.h>".} + func builtin_rotl32(value: cuint, shift: cint): cuint + {.importc: "__rold", header: "<immintrin.h>".} + when defined(amd64): + func builtin_rotl64(value: culonglong, shift: cint): culonglong + {.importc: "__rolq", header: "<immintrin.h>".} + + func builtin_rotr8(value: uint8, shift: cint): uint8 + {.importc: "__rorb", header: "<immintrin.h>".} + func builtin_rotr16(value: cushort, shift: cint): cushort + {.importc: "__rorw", header: "<immintrin.h>".} + func builtin_rotr32(value: cuint, shift: cint): cuint + {.importc: "__rord", header: "<immintrin.h>".} + when defined(amd64): + func builtin_rotr64(value: culonglong, shift: cint): culonglong + {.importc: "__rorq", header: "<immintrin.h>".} + +func rotl[T: SomeUnsignedInt](value: T, rot: int32): T {.inline.} = + ## Left-rotate bits in a `value`. + # https://stackoverflow.com/a/776523 + const mask = 8 * sizeof(value) - 1 + let rot = rot and mask + (value shl rot) or (value shr ((-rot) and mask)) + +func rotr[T: SomeUnsignedInt](value: T, rot: int32): T {.inline.} = + ## Right-rotate bits in a `value`. + const mask = 8 * sizeof(value) - 1 + let rot = rot and mask + (value shr rot) or (value shl ((-rot) and mask)) + +func shiftTypeTo(size: static int, shift: int): auto {.inline.} = + ## Returns the `shift` for the rotation according to the compiler and the + ## `size`. + when (defined(vcc) and (size in [4, 8])) or defined(gcc) or defined(icl): + cint(shift) + elif (defined(vcc) and (size in [1, 2])) or (defined(clang) and size == 1): + uint8(shift) + elif defined(clang): + when size == 2: + cushort(shift) + elif size == 4: + cuint(shift) + elif size == 8: + culonglong(shift) + +func rotateLeftBits*[T: SomeUnsignedInt](value: T, shift: range[0..(sizeof(T) * 8)]): T {.inline.} = + ## Left-rotate bits in a `value`. + runnableExamples: + doAssert rotateLeftBits(0b0110_1001'u8, 4) == 0b1001_0110'u8 + doAssert rotateLeftBits(0b00111100_11000011'u16, 8) == + 0b11000011_00111100'u16 + doAssert rotateLeftBits(0b0000111111110000_1111000000001111'u32, 16) == + 0b1111000000001111_0000111111110000'u32 + doAssert rotateLeftBits(0b00000000111111111111111100000000_11111111000000000000000011111111'u64, 32) == + 0b11111111000000000000000011111111_00000000111111111111111100000000'u64 + when nimvm: + rotl(value, shift.int32) + else: + when useBuiltinsRotate: + const size = sizeof(T) + when size == 1: + builtin_rotl8(value.uint8, shiftTypeTo(size, shift)).T + elif size == 2: + builtin_rotl16(value.cushort, shiftTypeTo(size, shift)).T + elif size == 4: + builtin_rotl32(value.cuint, shiftTypeTo(size, shift)).T + elif size == 8 and arch64: + builtin_rotl64(value.culonglong, shiftTypeTo(size, shift)).T + else: + rotl(value, shift.int32) + else: + rotl(value, shift.int32) + +func rotateRightBits*[T: SomeUnsignedInt](value: T, shift: range[0..(sizeof(T) * 8)]): T {.inline.} = + ## Right-rotate bits in a `value`. + runnableExamples: + doAssert rotateRightBits(0b0110_1001'u8, 4) == 0b1001_0110'u8 + doAssert rotateRightBits(0b00111100_11000011'u16, 8) == + 0b11000011_00111100'u16 + doAssert rotateRightBits(0b0000111111110000_1111000000001111'u32, 16) == + 0b1111000000001111_0000111111110000'u32 + doAssert rotateRightBits(0b00000000111111111111111100000000_11111111000000000000000011111111'u64, 32) == + 0b11111111000000000000000011111111_00000000111111111111111100000000'u64 + when nimvm: + rotr(value, shift.int32) + else: + when useBuiltinsRotate: + const size = sizeof(T) + when size == 1: + builtin_rotr8(value.uint8, shiftTypeTo(size, shift)).T + elif size == 2: + builtin_rotr16(value.cushort, shiftTypeTo(size, shift)).T + elif size == 4: + builtin_rotr32(value.cuint, shiftTypeTo(size, shift)).T + elif size == 8 and arch64: + builtin_rotr64(value.culonglong, shiftTypeTo(size, shift)).T + else: + rotr(value, shift.int32) + else: + rotr(value, shift.int32) + +func repeatBits[T: SomeUnsignedInt](x: SomeUnsignedInt; retType: type[T]): T = + result = x + var i = 1 + while i != (sizeof(T) div sizeof(x)): + result = (result shl (sizeof(x)*8*i)) or result + i *= 2 + +func reverseBits*[T: SomeUnsignedInt](x: T): T = + ## Return the bit reversal of x. + runnableExamples: + doAssert reverseBits(0b10100100'u8) == 0b00100101'u8 + doAssert reverseBits(0xdd'u8) == 0xbb'u8 + doAssert reverseBits(0xddbb'u16) == 0xddbb'u16 + doAssert reverseBits(0xdeadbeef'u32) == 0xf77db57b'u32 + + template repeat(x: SomeUnsignedInt): T = repeatBits(x, T) + + result = x + result = + ((repeat(0x55u8) and result) shl 1) or + ((repeat(0xaau8) and result) shr 1) + result = + ((repeat(0x33u8) and result) shl 2) or + ((repeat(0xccu8) and result) shr 2) + when sizeof(T) == 1: + result = (result shl 4) or (result shr 4) + when sizeof(T) >= 2: + result = + ((repeat(0x0fu8) and result) shl 4) or + ((repeat(0xf0u8) and result) shr 4) + when sizeof(T) == 2: + result = (result shl 8) or (result shr 8) + when sizeof(T) >= 4: + result = + ((repeat(0x00ffu16) and result) shl 8) or + ((repeat(0xff00u16) and result) shr 8) + when sizeof(T) == 4: + result = (result shl 16) or (result shr 16) + when sizeof(T) == 8: + result = + ((repeat(0x0000ffffu32) and result) shl 16) or + ((repeat(0xffff0000u32) and result) shr 16) + result = (result shl 32) or (result shr 32) diff --git a/lib/pure/browsers.nim b/lib/pure/browsers.nim index c6a603318..59e2078df 100644 --- a/lib/pure/browsers.nim +++ b/lib/pure/browsers.nim @@ -9,40 +9,104 @@ ## This module implements a simple proc for opening URLs with the user's ## default browser. +## +## Unstable API. -import strutils +import std/private/since # used by the deprecated `openDefaultBrowser()` + +import std/strutils + +when defined(nimPreviewSlimSystem): + import std/assertions when defined(windows): - import winlean + import std/winlean + when defined(nimPreviewSlimSystem): + import std/widestrs + from std/os import absolutePath else: - import os, osproc + import std/os + when not defined(osx): + import std/osproc + +const osOpenCmd* = + when defined(macos) or defined(macosx) or defined(windows): "open" else: "xdg-open" ## \ + ## Alias for the operating system specific *"open"* command, + ## `"open"` on OSX, MacOS and Windows, `"xdg-open"` on Linux, BSD, etc. + +proc prepare(s: string): string = + if s.contains("://"): + result = s + else: + result = "file://" & absolutePath(s) + +proc openDefaultBrowserRaw(url: string) = + ## note the url argument should be alreadly prepared, i.e. the url is passed "AS IS" -proc openDefaultBrowser*(url: string) = - ## opens `url` with the user's default browser. This does not block. - ## - ## Under Windows, ``ShellExecute`` is used. Under Mac OS X the ``open`` - ## command is used. Under Unix, it is checked if ``gnome-open`` exists and - ## used if it does. Next attempt is ``kde-open``, then ``xdg-open``. - ## Otherwise the environment variable ``BROWSER`` is used to determine the - ## default browser to use. when defined(windows): - when useWinUnicode: - var o = newWideCString("open") - var u = newWideCString(url) - discard shellExecuteW(0'i32, o, u, nil, nil, SW_SHOWNORMAL) - else: - discard shellExecuteA(0'i32, "open", url, nil, nil, SW_SHOWNORMAL) + var o = newWideCString(osOpenCmd) + var u = newWideCString(url) + discard shellExecuteW(0'i32, o, u, nil, nil, SW_SHOWNORMAL) elif defined(macosx): - discard execShellCmd("open " & quoteShell(url)) + discard execShellCmd(osOpenCmd & " " & quoteShell(url)) else: - const attempts = ["gnome-open ", "kde-open ", "xdg-open "] var u = quoteShell(url) - for a in items(attempts): - if execShellCmd(a & u) == 0: return - for b in getEnv("BROWSER").string.split(PathSep): + if execShellCmd(osOpenCmd & " " & u) == 0: return + for b in getEnv("BROWSER").split(PathSep): try: - # we use ``startProcess`` here because we don't want to block! - discard startProcess(command=b, args=[url], options={poUsePath}) + # we use `startProcess` here because we don't want to block! + discard startProcess(command = b, args = [url], options = {poUsePath}) return except OSError: discard + +proc openDefaultBrowser*(url: string) = + ## Opens `url` with the user's default browser. This does not block. + ## The URL must not be empty string, to open on a blank page see `openDefaultBrowser()`. + ## + ## Under Windows, `ShellExecute` is used. Under Mac OS X the `open` + ## command is used. Under Unix, it is checked if `xdg-open` exists and + ## used if it does. Otherwise the environment variable `BROWSER` is + ## used to determine the default browser to use. + ## + ## This proc doesn't raise an exception on error, beware. + ## + ## ```nim + ## block: openDefaultBrowser("https://nim-lang.org") + ## ``` + doAssert url.len > 0, "URL must not be empty string" + openDefaultBrowserRaw(url) + +proc openDefaultBrowser*() {.since: (1, 1), deprecated: + "not implemented, please open with a specific url instead".} = + ## Intends to open the user's default browser without any `url` (blank page). + ## This does not block. + ## Intends to implement IETF RFC-6694 Section 3, + ## ("about:blank" is reserved for a blank page). + ## + ## Beware that this intended behavior is **not** implemented and + ## considered not worthy to implement here. + ## + ## The following describes the behavior of current implementation: + ## + ## - Under Windows, this will only cause a pop-up dialog \ + ## asking the assocated application with `about` \ + ## (as Windows simply treats `about:` as a protocol like `http`). + ## - Under Mac OS X the `open "about:blank"` command is used. + ## - Under Unix, it is checked if `xdg-open` exists and used \ + ## if it does and open the application assocated with `text/html` mime \ + ## (not `x-scheme-handler/http`, so maybe html-viewer \ + ## other than your default browser is opened). \ + ## Otherwise the environment variable `BROWSER` is used \ + ## to determine the default browser to use. + ## + ## This proc doesn't raise an exception on error, beware. + ## + ## ```nim + ## block: openDefaultBrowser() + ## ``` + ## + ## **See also:** + ## + ## * https://tools.ietf.org/html/rfc6694#section-3 + openDefaultBrowserRaw("about:blank") # See IETF RFC-6694 Section 3. diff --git a/lib/pure/cgi.nim b/lib/pure/cgi.nim index e8977b80b..034f224ac 100644 --- a/lib/pure/cgi.nim +++ b/lib/pure/cgi.nim @@ -9,72 +9,32 @@ ## This module implements helper procs for CGI applications. Example: ## -## .. code-block:: Nim +## ```Nim +## import std/[strtabs, cgi] ## -## import strtabs, cgi -## -## # Fill the values when debugging: -## when debug: -## setTestData("name", "Klaus", "password", "123456") -## # read the data into `myData` -## var myData = readData() -## # check that the data's variable names are "name" or "password" -## validateData(myData, "name", "password") -## # start generating content: -## writeContentType() -## # generate content: -## write(stdout, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\n") -## write(stdout, "<html><head><title>Test</title></head><body>\n") -## writeln(stdout, "your name: " & myData["name"]) -## writeln(stdout, "your password: " & myData["password"]) -## writeln(stdout, "</body></html>") - -import strutils, os, strtabs, cookies - -proc encodeUrl*(s: string): string = - ## Encodes a value to be HTTP safe: This means that characters in the set - ## ``{'A'..'Z', 'a'..'z', '0'..'9', '_'}`` are carried over to the result, - ## a space is converted to ``'+'`` and every other character is encoded as - ## ``'%xx'`` where ``xx`` denotes its hexadecimal value. - result = newStringOfCap(s.len + s.len shr 2) # assume 12% non-alnum-chars - for i in 0..s.len-1: - case s[i] - of 'a'..'z', 'A'..'Z', '0'..'9', '_': add(result, s[i]) - of ' ': add(result, '+') - else: - add(result, '%') - add(result, toHex(ord(s[i]), 2)) - -proc handleHexChar(c: char, x: var int) {.inline.} = - case c - of '0'..'9': x = (x shl 4) or (ord(c) - ord('0')) - of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10) - of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10) - else: assert(false) - -proc decodeUrl*(s: string): string = - ## Decodes a value from its HTTP representation: This means that a ``'+'`` - ## is converted to a space, ``'%xx'`` (where ``xx`` denotes a hexadecimal - ## value) is converted to the character with ordinal number ``xx``, and - ## and every other character is carried over. - result = newString(s.len) - var i = 0 - var j = 0 - while i < s.len: - case s[i] - of '%': - var x = 0 - handleHexChar(s[i+1], x) - handleHexChar(s[i+2], x) - inc(i, 2) - result[j] = chr(x) - of '+': result[j] = ' ' - else: result[j] = s[i] - inc(i) - inc(j) - setLen(result, j) - -{.deprecated: [URLDecode: decodeUrl, URLEncode: encodeUrl].} +## # Fill the values when debugging: +## when debug: +## setTestData("name", "Klaus", "password", "123456") +## # read the data into `myData` +## var myData = readData() +## # check that the data's variable names are "name" or "password" +## validateData(myData, "name", "password") +## # start generating content: +## writeContentType() +## # generate content: +## write(stdout, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\n") +## write(stdout, "<html><head><title>Test</title></head><body>\n") +## writeLine(stdout, "your name: " & myData["name"]) +## writeLine(stdout, "your password: " & myData["password"]) +## writeLine(stdout, "</body></html>") +## ``` + +import std/[strutils, os, strtabs, cookies, uri] +export uri.encodeUrl, uri.decodeUrl + +when defined(nimPreviewSlimSystem): + import std/syncio + proc addXmlChar(dest: var string, c: char) {.inline.} = case c @@ -86,250 +46,215 @@ proc addXmlChar(dest: var string, c: char) {.inline.} = proc xmlEncode*(s: string): string = ## Encodes a value to be XML safe: - ## * ``"`` is replaced by ``"`` - ## * ``<`` is replaced by ``<`` - ## * ``>`` is replaced by ``>`` - ## * ``&`` is replaced by ``&`` + ## * `"` is replaced by `"` + ## * `<` is replaced by `<` + ## * `>` is replaced by `>` + ## * `&` is replaced by `&` ## * every other character is carried over. result = newStringOfCap(s.len + s.len shr 2) for i in 0..len(s)-1: addXmlChar(result, s[i]) type - CgiError* = object of IOError ## exception that is raised if a CGI error occurs - RequestMethod* = enum ## the used request method - methodNone, ## no REQUEST_METHOD environment variable - methodPost, ## query uses the POST method - methodGet ## query uses the GET method - -{.deprecated: [TRequestMethod: RequestMethod, ECgi: CgiError, - XMLencode: xmlEncode].} + CgiError* = object of IOError ## Exception that is raised if a CGI error occurs. + RequestMethod* = enum ## The used request method. + methodNone, ## no REQUEST_METHOD environment variable + methodPost, ## query uses the POST method + methodGet ## query uses the GET method proc cgiError*(msg: string) {.noreturn.} = - ## raises an ECgi exception with message `msg`. - var e: ref CgiError - new(e) - e.msg = msg - raise e + ## Raises a `CgiError` exception with message `msg`. + raise newException(CgiError, msg) proc getEncodedData(allowedMethods: set[RequestMethod]): string = - case getEnv("REQUEST_METHOD").string + case getEnv("REQUEST_METHOD") of "POST": if methodPost notin allowedMethods: cgiError("'REQUEST_METHOD' 'POST' is not supported") - var L = parseInt(getEnv("CONTENT_LENGTH").string) + var L = parseInt(getEnv("CONTENT_LENGTH")) + if L == 0: + return "" result = newString(L) if readBuffer(stdin, addr(result[0]), L) != L: cgiError("cannot read from stdin") of "GET": if methodGet notin allowedMethods: cgiError("'REQUEST_METHOD' 'GET' is not supported") - result = getEnv("QUERY_STRING").string + result = getEnv("QUERY_STRING") else: if methodNone notin allowedMethods: cgiError("'REQUEST_METHOD' must be 'POST' or 'GET'") -iterator decodeData*(data: string): tuple[key, value: TaintedString] = +iterator decodeData*(data: string): tuple[key, value: string] = ## Reads and decodes CGI data and yields the (name, value) pairs the ## data consists of. - var i = 0 - var name = "" - var value = "" - # decode everything in one pass: - while data[i] != '\0': - setLen(name, 0) # reuse memory - while true: - case data[i] - of '\0': break - of '%': - var x = 0 - handleHexChar(data[i+1], x) - handleHexChar(data[i+2], x) - inc(i, 2) - add(name, chr(x)) - of '+': add(name, ' ') - of '=', '&': break - else: add(name, data[i]) - inc(i) - if data[i] != '=': cgiError("'=' expected") - inc(i) # skip '=' - setLen(value, 0) # reuse memory - while true: - case data[i] - of '%': - var x = 0 - handleHexChar(data[i+1], x) - handleHexChar(data[i+2], x) - inc(i, 2) - add(value, chr(x)) - of '+': add(value, ' ') - of '&', '\0': break - else: add(value, data[i]) - inc(i) - yield (name.TaintedString, value.TaintedString) - if data[i] == '&': inc(i) - elif data[i] == '\0': break - else: cgiError("'&' expected") + for (key, value) in uri.decodeQuery(data): + yield (key, value) iterator decodeData*(allowedMethods: set[RequestMethod] = - {methodNone, methodPost, methodGet}): tuple[key, value: TaintedString] = + {methodNone, methodPost, methodGet}): tuple[key, value: string] = ## Reads and decodes CGI data and yields the (name, value) pairs the ## data consists of. If the client does not use a method listed in the - ## `allowedMethods` set, an `ECgi` exception is raised. - var data = getEncodedData(allowedMethods) - if not isNil(data): - for key, value in decodeData(data): - yield (key, value) + ## `allowedMethods` set, a `CgiError` exception is raised. + let data = getEncodedData(allowedMethods) + for (key, value) in uri.decodeQuery(data): + yield (key, value) proc readData*(allowedMethods: set[RequestMethod] = {methodNone, methodPost, methodGet}): StringTableRef = - ## Read CGI data. If the client does not use a method listed in the - ## `allowedMethods` set, an `ECgi` exception is raised. + ## Reads CGI data. If the client does not use a method listed in the + ## `allowedMethods` set, a `CgiError` exception is raised. result = newStringTable() for name, value in decodeData(allowedMethods): - result[name.string] = value.string + result[name] = value + +proc readData*(data: string): StringTableRef = + ## Reads CGI data from a string. + result = newStringTable() + for name, value in decodeData(data): + result[name] = value proc validateData*(data: StringTableRef, validKeys: varargs[string]) = - ## validates data; raises `ECgi` if this fails. This checks that each variable + ## Validates data; raises `CgiError` if this fails. This checks that each variable ## name of the CGI `data` occurs in the `validKeys` array. for key, val in pairs(data): if find(validKeys, key) < 0: cgiError("unknown variable name: " & key) proc getContentLength*(): string = - ## returns contents of the ``CONTENT_LENGTH`` environment variable - return getEnv("CONTENT_LENGTH").string + ## Returns contents of the `CONTENT_LENGTH` environment variable. + return getEnv("CONTENT_LENGTH") proc getContentType*(): string = - ## returns contents of the ``CONTENT_TYPE`` environment variable - return getEnv("CONTENT_Type").string + ## Returns contents of the `CONTENT_TYPE` environment variable. + return getEnv("CONTENT_Type") proc getDocumentRoot*(): string = - ## returns contents of the ``DOCUMENT_ROOT`` environment variable - return getEnv("DOCUMENT_ROOT").string + ## Returns contents of the `DOCUMENT_ROOT` environment variable. + return getEnv("DOCUMENT_ROOT") proc getGatewayInterface*(): string = - ## returns contents of the ``GATEWAY_INTERFACE`` environment variable - return getEnv("GATEWAY_INTERFACE").string + ## Returns contents of the `GATEWAY_INTERFACE` environment variable. + return getEnv("GATEWAY_INTERFACE") proc getHttpAccept*(): string = - ## returns contents of the ``HTTP_ACCEPT`` environment variable - return getEnv("HTTP_ACCEPT").string + ## Returns contents of the `HTTP_ACCEPT` environment variable. + return getEnv("HTTP_ACCEPT") proc getHttpAcceptCharset*(): string = - ## returns contents of the ``HTTP_ACCEPT_CHARSET`` environment variable - return getEnv("HTTP_ACCEPT_CHARSET").string + ## Returns contents of the `HTTP_ACCEPT_CHARSET` environment variable. + return getEnv("HTTP_ACCEPT_CHARSET") proc getHttpAcceptEncoding*(): string = - ## returns contents of the ``HTTP_ACCEPT_ENCODING`` environment variable - return getEnv("HTTP_ACCEPT_ENCODING").string + ## Returns contents of the `HTTP_ACCEPT_ENCODING` environment variable. + return getEnv("HTTP_ACCEPT_ENCODING") proc getHttpAcceptLanguage*(): string = - ## returns contents of the ``HTTP_ACCEPT_LANGUAGE`` environment variable - return getEnv("HTTP_ACCEPT_LANGUAGE").string + ## Returns contents of the `HTTP_ACCEPT_LANGUAGE` environment variable. + return getEnv("HTTP_ACCEPT_LANGUAGE") proc getHttpConnection*(): string = - ## returns contents of the ``HTTP_CONNECTION`` environment variable - return getEnv("HTTP_CONNECTION").string + ## Returns contents of the `HTTP_CONNECTION` environment variable. + return getEnv("HTTP_CONNECTION") proc getHttpCookie*(): string = - ## returns contents of the ``HTTP_COOKIE`` environment variable - return getEnv("HTTP_COOKIE").string + ## Returns contents of the `HTTP_COOKIE` environment variable. + return getEnv("HTTP_COOKIE") proc getHttpHost*(): string = - ## returns contents of the ``HTTP_HOST`` environment variable - return getEnv("HTTP_HOST").string + ## Returns contents of the `HTTP_HOST` environment variable. + return getEnv("HTTP_HOST") proc getHttpReferer*(): string = - ## returns contents of the ``HTTP_REFERER`` environment variable - return getEnv("HTTP_REFERER").string + ## Returns contents of the `HTTP_REFERER` environment variable. + return getEnv("HTTP_REFERER") proc getHttpUserAgent*(): string = - ## returns contents of the ``HTTP_USER_AGENT`` environment variable - return getEnv("HTTP_USER_AGENT").string + ## Returns contents of the `HTTP_USER_AGENT` environment variable. + return getEnv("HTTP_USER_AGENT") proc getPathInfo*(): string = - ## returns contents of the ``PATH_INFO`` environment variable - return getEnv("PATH_INFO").string + ## Returns contents of the `PATH_INFO` environment variable. + return getEnv("PATH_INFO") proc getPathTranslated*(): string = - ## returns contents of the ``PATH_TRANSLATED`` environment variable - return getEnv("PATH_TRANSLATED").string + ## Returns contents of the `PATH_TRANSLATED` environment variable. + return getEnv("PATH_TRANSLATED") proc getQueryString*(): string = - ## returns contents of the ``QUERY_STRING`` environment variable - return getEnv("QUERY_STRING").string + ## Returns contents of the `QUERY_STRING` environment variable. + return getEnv("QUERY_STRING") proc getRemoteAddr*(): string = - ## returns contents of the ``REMOTE_ADDR`` environment variable - return getEnv("REMOTE_ADDR").string + ## Returns contents of the `REMOTE_ADDR` environment variable. + return getEnv("REMOTE_ADDR") proc getRemoteHost*(): string = - ## returns contents of the ``REMOTE_HOST`` environment variable - return getEnv("REMOTE_HOST").string + ## Returns contents of the `REMOTE_HOST` environment variable. + return getEnv("REMOTE_HOST") proc getRemoteIdent*(): string = - ## returns contents of the ``REMOTE_IDENT`` environment variable - return getEnv("REMOTE_IDENT").string + ## Returns contents of the `REMOTE_IDENT` environment variable. + return getEnv("REMOTE_IDENT") proc getRemotePort*(): string = - ## returns contents of the ``REMOTE_PORT`` environment variable - return getEnv("REMOTE_PORT").string + ## Returns contents of the `REMOTE_PORT` environment variable. + return getEnv("REMOTE_PORT") proc getRemoteUser*(): string = - ## returns contents of the ``REMOTE_USER`` environment variable - return getEnv("REMOTE_USER").string + ## Returns contents of the `REMOTE_USER` environment variable. + return getEnv("REMOTE_USER") proc getRequestMethod*(): string = - ## returns contents of the ``REQUEST_METHOD`` environment variable - return getEnv("REQUEST_METHOD").string + ## Returns contents of the `REQUEST_METHOD` environment variable. + return getEnv("REQUEST_METHOD") proc getRequestURI*(): string = - ## returns contents of the ``REQUEST_URI`` environment variable - return getEnv("REQUEST_URI").string + ## Returns contents of the `REQUEST_URI` environment variable. + return getEnv("REQUEST_URI") proc getScriptFilename*(): string = - ## returns contents of the ``SCRIPT_FILENAME`` environment variable - return getEnv("SCRIPT_FILENAME").string + ## Returns contents of the `SCRIPT_FILENAME` environment variable. + return getEnv("SCRIPT_FILENAME") proc getScriptName*(): string = - ## returns contents of the ``SCRIPT_NAME`` environment variable - return getEnv("SCRIPT_NAME").string + ## Returns contents of the `SCRIPT_NAME` environment variable. + return getEnv("SCRIPT_NAME") proc getServerAddr*(): string = - ## returns contents of the ``SERVER_ADDR`` environment variable - return getEnv("SERVER_ADDR").string + ## Returns contents of the `SERVER_ADDR` environment variable. + return getEnv("SERVER_ADDR") proc getServerAdmin*(): string = - ## returns contents of the ``SERVER_ADMIN`` environment variable - return getEnv("SERVER_ADMIN").string + ## Returns contents of the `SERVER_ADMIN` environment variable. + return getEnv("SERVER_ADMIN") proc getServerName*(): string = - ## returns contents of the ``SERVER_NAME`` environment variable - return getEnv("SERVER_NAME").string + ## Returns contents of the `SERVER_NAME` environment variable. + return getEnv("SERVER_NAME") proc getServerPort*(): string = - ## returns contents of the ``SERVER_PORT`` environment variable - return getEnv("SERVER_PORT").string + ## Returns contents of the `SERVER_PORT` environment variable. + return getEnv("SERVER_PORT") proc getServerProtocol*(): string = - ## returns contents of the ``SERVER_PROTOCOL`` environment variable - return getEnv("SERVER_PROTOCOL").string + ## Returns contents of the `SERVER_PROTOCOL` environment variable. + return getEnv("SERVER_PROTOCOL") proc getServerSignature*(): string = - ## returns contents of the ``SERVER_SIGNATURE`` environment variable - return getEnv("SERVER_SIGNATURE").string + ## Returns contents of the `SERVER_SIGNATURE` environment variable. + return getEnv("SERVER_SIGNATURE") proc getServerSoftware*(): string = - ## returns contents of the ``SERVER_SOFTWARE`` environment variable - return getEnv("SERVER_SOFTWARE").string + ## Returns contents of the `SERVER_SOFTWARE` environment variable. + return getEnv("SERVER_SOFTWARE") proc setTestData*(keysvalues: varargs[string]) = - ## fills the appropriate environment variables to test your CGI application. + ## Fills the appropriate environment variables to test your CGI application. ## This can only simulate the 'GET' request method. `keysvalues` should ## provide embedded (name, value)-pairs. Example: - ## - ## .. code-block:: Nim - ## setTestData("name", "Hanz", "password", "12345") + ## ```Nim + ## setTestData("name", "Hanz", "password", "12345") + ## ``` putEnv("REQUEST_METHOD", "GET") var i = 0 var query = "" @@ -342,11 +267,11 @@ proc setTestData*(keysvalues: varargs[string]) = putEnv("QUERY_STRING", query) proc writeContentType*() = - ## call this before starting to send your HTML data to `stdout`. This + ## Calls this before starting to send your HTML data to `stdout`. This ## implements this part of the CGI protocol: - ## - ## .. code-block:: Nim - ## write(stdout, "Content-type: text/html\n\n") + ## ```Nim + ## write(stdout, "Content-type: text/html\n\n") + ## ``` write(stdout, "Content-type: text/html\n\n") proc resetForStacktrace() = @@ -372,11 +297,6 @@ proc setStackTraceStdout*() = ## Makes Nim output stacktraces to stdout, instead of server log. errorMessageWriter = writeErrorMessage -proc setStackTraceNewLine*() {.deprecated.} = - ## Makes Nim output stacktraces to stdout, instead of server log. - ## Depracated alias for setStackTraceStdout. - setStackTraceStdout() - proc setCookie*(name, value: string) = ## Sets a cookie. write(stdout, "Set-Cookie: ", name, "=", value, "\n") @@ -384,17 +304,12 @@ proc setCookie*(name, value: string) = var gcookies {.threadvar.}: StringTableRef -proc getCookie*(name: string): TaintedString = +proc getCookie*(name: string): string = ## Gets a cookie. If no cookie of `name` exists, "" is returned. if gcookies == nil: gcookies = parseCookies(getHttpCookie()) - result = TaintedString(gcookies[name]) + result = gcookies.getOrDefault(name) proc existsCookie*(name: string): bool = ## Checks if a cookie of `name` exists. if gcookies == nil: gcookies = parseCookies(getHttpCookie()) result = hasKey(gcookies, name) - -when isMainModule: - const test1 = "abc\L+def xyz" - assert encodeUrl(test1) == "abc%0A%2Bdef+xyz" - assert decodeUrl(encodeUrl(test1)) == test1 diff --git a/lib/pure/collections/LockFreeHash.nim b/lib/pure/collections/LockFreeHash.nim deleted file mode 100644 index c3954468a..000000000 --- a/lib/pure/collections/LockFreeHash.nim +++ /dev/null @@ -1,607 +0,0 @@ -#nim c -t:-march=i686 --cpu:amd64 --threads:on -d:release lockfreehash.nim - -import unsigned, math, hashes - -#------------------------------------------------------------------------------ -## Memory Utility Functions - -proc newHeap*[T](): ptr T = - result = cast[ptr T](alloc0(sizeof(T))) - -proc copyNew*[T](x: var T): ptr T = - var - size = sizeof(T) - mem = alloc(size) - copyMem(mem, x.addr, size) - return cast[ptr T](mem) - -proc copyTo*[T](val: var T, dest: int) = - copyMem(pointer(dest), val.addr, sizeof(T)) - -proc allocType*[T](): pointer = alloc(sizeof(T)) - -proc newShared*[T](): ptr T = - result = cast[ptr T](allocShared0(sizeof(T))) - -proc copyShared*[T](x: var T): ptr T = - var - size = sizeof(T) - mem = allocShared(size) - copyMem(mem, x.addr, size) - return cast[ptr T](mem) - -#------------------------------------------------------------------------------ -## Pointer arithmetic - -proc `+`*(p: pointer, i: int): pointer {.inline.} = - cast[pointer](cast[int](p) + i) - -const - minTableSize = 8 - reProbeLimit = 12 - minCopyWork = 4096 - intSize = sizeof(int) - - - -when sizeof(int) == 4: # 32bit - type - TRaw = range[0..1073741823] - ## The range of uint values that can be stored directly in a value slot - ## when on a 32 bit platform - -elif sizeof(int) == 8: # 64bit - type - TRaw = range[0..4611686018427387903] - ## The range of uint values that can be stored directly in a value slot - ## when on a 64 bit platform -else: - {.error: "unsupported platform".} - -type - TEntry = tuple - key: int - value: int - - TEntryArr = ptr array[0..10_000_000, TEntry] - - PConcTable[K,V] = ptr object {.pure.} - len: int - used: int - active: int - copyIdx: int - copyDone: int - next: PConcTable[K,V] - data: TEntryArr - - -proc setVal[K,V](table: var PConcTable[K,V], key: int, val: int, - expVal: int, match: bool): int - -#------------------------------------------------------------------------------ - -# Create a new table -proc newLFTable*[K,V](size: int = minTableSize): PConcTable[K,V] = - let - dataLen = max(nextPowerOfTwo(size), minTableSize) - dataSize = dataLen*sizeof(TEntry) - dataMem = allocShared0(dataSize) - tableSize = 7 * intSize - tableMem = allocShared0(tableSize) - table = cast[PConcTable[K,V]](tableMem) - table.len = dataLen - table.used = 0 - table.active = 0 - table.copyIdx = 0 - table.copyDone = 0 - table.next = nil - table.data = cast[TEntryArr](dataMem) - result = table - -#------------------------------------------------------------------------------ - -# Delete a table -proc deleteConcTable[K,V](tbl: PConcTable[K,V]) = - deallocShared(tbl.data) - deallocShared(tbl) - -#------------------------------------------------------------------------------ - -proc `[]`[K,V](table: var PConcTable[K,V], i: int): var TEntry {.inline.} = - table.data[i] - -#------------------------------------------------------------------------------ -# State flags stored in ptr - - -proc pack[T](x: T): int {.inline.} = - result = (cast[int](x) shl 2) - #echo("packKey ",cast[int](x) , " -> ", result) - -# Pop the flags off returning a 4 byte aligned ptr to our Key or Val -proc pop(x: int): int {.inline.} = - result = x and 0xFFFFFFFC'i32 - -# Pop the raw value off of our Key or Val -proc popRaw(x: int): int {.inline.} = - result = x shr 2 - -# Pop the flags off returning a 4 byte aligned ptr to our Key or Val -proc popPtr[V](x: int): ptr V {.inline.} = - result = cast[ptr V](pop(x)) - #echo("popPtr " & $x & " -> " & $cast[int](result)) - -# Ghost (sentinel) -# K or V is no longer valid use new table -const Ghost = 0xFFFFFFFC -proc isGhost(x: int): bool {.inline.} = - result = x == 0xFFFFFFFC - -# Tombstone -# applied to V = K is dead -proc isTomb(x: int): bool {.inline.} = - result = (x and 0x00000002) != 0 - -proc setTomb(x: int): int {.inline.} = - result = x or 0x00000002 - -# Prime -# K or V is in new table copied from old -proc isPrime(x: int): bool {.inline.} = - result = (x and 0x00000001) != 0 - -proc setPrime(x: int): int {.inline.} = - result = x or 0x00000001 - -#------------------------------------------------------------------------------ - -##This is for i32 only need to override for i64 -proc hashInt(x: int):int {.inline.} = - var h = uint32(x) #shr 2'u32 - h = h xor (h shr 16'u32) - h *= 0x85ebca6b'u32 - h = h xor (h shr 13'u32) - h *= 0xc2b2ae35'u32 - h = h xor (h shr 16'u32) - result = int(h) - -#------------------------------------------------------------------------------ - -proc resize[K,V](self: PConcTable[K,V]): PConcTable[K,V] = - var next = atomic_load_n(self.next.addr, ATOMIC_RELAXED) - #echo("next = " & $cast[int](next)) - if next != nil: - #echo("A new table already exists, copy in progress") - return next - var - oldLen = atomic_load_n(self.len.addr, ATOMIC_RELAXED) - newTable = newLFTable[K,V](oldLen*2) - success = atomic_compare_exchange_n(self.next.addr, next.addr, newTable, - false, ATOMIC_RELAXED, ATOMIC_RELAXED) - if not success: - echo("someone beat us to it! delete table we just created and return his " & $cast[int](next)) - deleteConcTable(newTable) - return next - else: - echo("Created New Table! " & $cast[int](newTable) & " Size = " & $newTable.len) - return newTable - - -#------------------------------------------------------------------------------ -#proc keyEQ[K](key1: ptr K, key2: ptr K): bool {.inline.} = -proc keyEQ[K](key1: int, key2: int): bool {.inline.} = - result = false - when K is TRaw: - if key1 == key2: - result = true - else: - var - p1 = popPtr[K](key1) - p2 = popPtr[K](key2) - if p1 != nil and p2 != nil: - if cast[int](p1) == cast[int](p2): - return true - if p1[] == p2[]: - return true - -#------------------------------------------------------------------------------ - -#proc tableFull(self: var PConcTable[K,V]) : bool {.inline.} = - - -#------------------------------------------------------------------------------ - -proc copySlot[K,V](idx: int, oldTbl: var PConcTable[K,V], newTbl: var PConcTable[K,V]): bool = - #echo("Copy idx " & $idx) - var - oldVal = 0 - oldkey = 0 - ok = false - result = false - #Block the key so no other threads waste time here - while not ok: - ok = atomic_compare_exchange_n(oldTbl[idx].key.addr, oldKey.addr, - setTomb(oldKey), false, ATOMIC_RELAXED, ATOMIC_RELAXED) - #echo("oldKey was = " & $oldKey & " set it to tomb " & $setTomb(oldKey)) - #Prevent new values from appearing in the old table by priming - oldVal = atomic_load_n(oldTbl[idx].value.addr, ATOMIC_RELAXED) - while not isPrime(oldVal): - var box = if oldVal == 0 or isTomb(oldVal) : oldVal.setTomb.setPrime - else: oldVal.setPrime - if atomic_compare_exchange_n(oldTbl[idx].value.addr, oldVal.addr, - box, false, ATOMIC_RELAXED, ATOMIC_RELAXED): - if isPrime(box) and isTomb(box): - return true - oldVal = box - break - #echo("oldVal was = ", oldVal, " set it to prime ", box) - if isPrime(oldVal) and isTomb(oldVal): - #when not (K is TRaw): - # deallocShared(popPtr[K](oldKey)) - return false - if isTomb(oldVal): - echo("oldVal is Tomb!!!, should not happen") - if pop(oldVal) != 0: - result = setVal(newTbl, pop(oldKey), pop(oldVal), 0, true) == 0 - if result: - #echo("Copied a Slot! idx= " & $idx & " key= " & $oldKey & " val= " & $oldVal) - else: - #echo("copy slot failed") - # Our copy is done so we disable the old slot - while not ok: - ok = atomic_compare_exchange_n(oldTbl[idx].value.addr, oldVal.addr, - oldVal.setTomb.setPrime , false, ATOMIC_RELAXED, ATOMIC_RELAXED) - #echo("disabled old slot") - #echo"---------------------" - -#------------------------------------------------------------------------------ - -proc promote[K,V](table: var PConcTable[K,V]) = - var - newData = atomic_load_n(table.next.data.addr, ATOMIC_RELAXED) - newLen = atomic_load_n(table.next.len.addr, ATOMIC_RELAXED) - newUsed = atomic_load_n(table.next.used.addr, ATOMIC_RELAXED) - - deallocShared(table.data) - atomic_store_n(table.data.addr, newData, ATOMIC_RELAXED) - atomic_store_n(table.len.addr, newLen, ATOMIC_RELAXED) - atomic_store_n(table.used.addr, newUsed, ATOMIC_RELAXED) - atomic_store_n(table.copyIdx.addr, 0, ATOMIC_RELAXED) - atomic_store_n(table.copyDone.addr, 0, ATOMIC_RELAXED) - deallocShared(table.next) - atomic_store_n(table.next.addr, nil, ATOMIC_RELAXED) - echo("new table swapped!") - -#------------------------------------------------------------------------------ - -proc checkAndPromote[K,V](table: var PConcTable[K,V], workDone: int): bool = - var - oldLen = atomic_load_n(table.len.addr, ATOMIC_RELAXED) - copyDone = atomic_load_n(table.copyDone.addr, ATOMIC_RELAXED) - ok: bool - result = false - if workDone > 0: - #echo("len to copy =" & $oldLen) - #echo("copyDone + workDone = " & $copyDone & " + " & $workDone) - while not ok: - ok = atomic_compare_exchange_n(table.copyDone.addr, copyDone.addr, - copyDone + workDone, false, ATOMIC_RELAXED, ATOMIC_RELAXED) - #if ok: echo("set copyDone") - # If the copy is done we can promote this table - if copyDone + workDone >= oldLen: - # Swap new data - #echo("work is done!") - table.promote - result = true - -#------------------------------------------------------------------------------ - -proc copySlotAndCheck[K,V](table: var PConcTable[K,V], idx: int): - PConcTable[K,V] = - var - newTable = cast[PConcTable[K,V]](atomic_load_n(table.next.addr, ATOMIC_RELAXED)) - result = newTable - if newTable != nil and copySlot(idx, table, newTable): - #echo("copied a single slot, idx = " & $idx) - if checkAndPromote(table, 1): return table - - -#------------------------------------------------------------------------------ - -proc helpCopy[K,V](table: var PConcTable[K,V]): PConcTable[K,V] = - var - newTable = cast[PConcTable[K,V]](atomic_load_n(table.next.addr, ATOMIC_RELAXED)) - result = newTable - if newTable != nil: - var - oldLen = atomic_load_n(table.len.addr, ATOMIC_RELAXED) - copyDone = atomic_load_n(table.copyDone.addr, ATOMIC_RELAXED) - copyIdx = 0 - work = min(oldLen, minCopyWork) - #panicStart = -1 - workDone = 0 - if copyDone < oldLen: - var ok: bool - while not ok: - ok = atomic_compare_exchange_n(table.copyIdx.addr, copyIdx.addr, - copyIdx + work, false, ATOMIC_RELAXED, ATOMIC_RELAXED) - #echo("copy idx = ", copyIdx) - for i in 0..work-1: - var idx = (copyIdx + i) and (oldLen - 1) - if copySlot(idx, table, newTable): - workDone += 1 - if workDone > 0: - #echo("did work ", workDone, " on thread ", cast[int](myThreadID[pointer]())) - if checkAndPromote(table, workDone): return table - # In case a thread finished all the work then got stalled before promotion - if checkAndPromote(table, 0): return table - - - -#------------------------------------------------------------------------------ - -proc setVal[K,V](table: var PConcTable[K,V], key: int, val: int, - expVal: int, match: bool): int = - #echo("-try set- in table ", " key = ", (popPtr[K](key)[]), " val = ", val) - when K is TRaw: - var idx = hashInt(key) - else: - var idx = popPtr[K](key)[].hash - var - nextTable: PConcTable[K,V] - probes = 1 - # spin until we find a key slot or build and jump to next table - while true: - idx = idx and (table.len - 1) - #echo("try set idx = " & $idx & "for" & $key) - var - probedKey = 0 - openKey = atomic_compare_exchange_n(table[idx].key.addr, probedKey.addr, - key, false, ATOMIC_RELAXED, ATOMIC_RELAXED) - if openKey: - if val.isTomb: - #echo("val was tomb, bail, no reason to set an open slot to tomb") - return val - #increment used slots - #echo("found an open slot, total used = " & - #$atomic_add_fetch(table.used.addr, 1, ATOMIC_RELAXED)) - discard atomic_add_fetch(table.used.addr, 1, ATOMIC_RELAXED) - break # We found an open slot - #echo("set idx ", idx, " key = ", key, " probed = ", probedKey) - if keyEQ[K](probedKey, key): - #echo("we found the matching slot") - break # We found a matching slot - if (not(expVal != 0 and match)) and (probes >= reProbeLimit or key.isTomb): - if key.isTomb: echo("Key is Tombstone") - #if probes >= reProbeLimit: echo("Too much probing " & $probes) - #echo("try to resize") - #create next bigger table - nextTable = resize(table) - #help do some copying - #echo("help copy old table to new") - nextTable = helpCopy(table) - #now setVal in the new table instead - #echo("jumping to next table to set val") - return setVal(nextTable, key, val, expVal, match) - else: - idx += 1 - probes += 1 - # Done spinning for a new slot - var oldVal = atomic_load_n(table[idx].value.addr, ATOMIC_RELAXED) - if val == oldVal: - #echo("this val is alredy in the slot") - return oldVal - nextTable = atomic_load_n(table.next.addr, ATOMIC_SEQ_CST) - if nextTable == nil and - ((oldVal == 0 and - (probes >= reProbeLimit or table.used / table.len > 0.8)) or - (isPrime(oldVal))): - if table.used / table.len > 0.8: echo("resize because usage ratio = " & - $(table.used / table.len)) - if isPrime(oldVal): echo("old val isPrime, should be a rare mem ordering event") - nextTable = resize(table) - if nextTable != nil: - #echo("tomb old slot then set in new table") - nextTable = copySlotAndCheck(table,idx) - return setVal(nextTable, key, val, expVal, match) - # Finally ready to add new val to table - while true: - if match and oldVal != expVal: - #echo("set failed, no match oldVal= " & $oldVal & " expVal= " & $expVal) - return oldVal - if atomic_compare_exchange_n(table[idx].value.addr, oldVal.addr, - val, false, ATOMIC_RELEASE, ATOMIC_RELAXED): - #echo("val set at table " & $cast[int](table)) - if expVal != 0: - if (oldVal == 0 or isTomb(oldVal)) and not isTomb(val): - discard atomic_add_fetch(table.active.addr, 1, ATOMIC_RELAXED) - elif not (oldVal == 0 or isTomb(oldVal)) and isTomb(val): - discard atomic_add_fetch(table.active.addr, -1, ATOMIC_RELAXED) - if oldVal == 0 and expVal != 0: - return setTomb(oldVal) - else: return oldVal - if isPrime(oldVal): - nextTable = copySlotAndCheck(table, idx) - return setVal(nextTable, key, val, expVal, match) - -#------------------------------------------------------------------------------ - -proc getVal[K,V](table: var PConcTable[K,V], key: int): int = - #echo("-try get- key = " & $key) - when K is TRaw: - var idx = hashInt(key) - else: - var idx = popPtr[K](key)[].hash - #echo("get idx ", idx) - var - probes = 0 - val: int - while true: - idx = idx and (table.len - 1) - var - newTable: PConcTable[K,V] # = atomic_load_n(table.next.addr, ATOMIC_ACQUIRE) - probedKey = atomic_load_n(table[idx].key.addr, ATOMIC_SEQ_CST) - if keyEQ[K](probedKey, key): - #echo("found key after ", probes+1) - val = atomic_load_n(table[idx].value.addr, ATOMIC_ACQUIRE) - if not isPrime(val): - if isTomb(val): - #echo("val was tomb but not prime") - return 0 - else: - #echo("-GotIt- idx = ", idx, " key = ", key, " val ", val ) - return val - else: - newTable = copySlotAndCheck(table, idx) - return getVal(newTable, key) - else: - #echo("probe ", probes, " idx = ", idx, " key = ", key, " found ", probedKey ) - if probes >= reProbeLimit*4 or key.isTomb: - if newTable == nil: - #echo("too many probes and no new table ", key, " ", idx ) - return 0 - else: - newTable = helpCopy(table) - return getVal(newTable, key) - idx += 1 - probes += 1 - -#------------------------------------------------------------------------------ - -#proc set*(table: var PConcTable[TRaw,TRaw], key: TRaw, val: TRaw) = -# discard setVal(table, pack(key), pack(key), 0, false) - -#proc set*[V](table: var PConcTable[TRaw,V], key: TRaw, val: ptr V) = -# discard setVal(table, pack(key), cast[int](val), 0, false) - -proc set*[K,V](table: var PConcTable[K,V], key: var K, val: var V) = - when not (K is TRaw): - var newKey = cast[int](copyShared(key)) - else: - var newKey = pack(key) - when not (V is TRaw): - var newVal = cast[int](copyShared(val)) - else: - var newVal = pack(val) - var oldPtr = pop(setVal(table, newKey, newVal, 0, false)) - #echo("oldPtr = ", cast[int](oldPtr), " newPtr = ", cast[int](newPtr)) - when not (V is TRaw): - if newVal != oldPtr and oldPtr != 0: - deallocShared(cast[ptr V](oldPtr)) - - - -proc get*[K,V](table: var PConcTable[K,V], key: var K): V = - when not (V is TRaw): - when not (K is TRaw): - return popPtr[V](getVal(table, cast[int](key.addr)))[] - else: - return popPtr[V](getVal(table, pack(key)))[] - else: - when not (K is TRaw): - return popRaw(getVal(table, cast[int](key.addr))) - else: - return popRaw(getVal(table, pack(key))) - - - - - - - - - - - -#proc `[]`[K,V](table: var PConcTable[K,V], key: K): PEntry[K,V] {.inline.} = -# getVal(table, key) - -#proc `[]=`[K,V](table: var PConcTable[K,V], key: K, val: V): PEntry[K,V] {.inline.} = -# setVal(table, key, val) - - - - - - -#Tests ---------------------------- -when isMainModule: - import locks, times, mersenne - - const - numTests = 100000 - numThreads = 10 - - - - type - TTestObj = tuple - thr: int - f0: int - f1: int - - TData = tuple[k: string,v: TTestObj] - PDataArr = array[0..numTests-1, TData] - Dict = PConcTable[string,TTestObj] - - var - thr: array[0..numThreads-1, TThread[Dict]] - - table = newLFTable[string,TTestObj](8) - rand = newMersenneTwister(2525) - - proc createSampleData(len: int): PDataArr = - #result = cast[PDataArr](allocShared0(sizeof(TData)*numTests)) - for i in 0..len-1: - result[i].k = "mark" & $(i+1) - #echo("mark" & $(i+1), " ", hash("mark" & $(i+1))) - result[i].v.thr = 0 - result[i].v.f0 = i+1 - result[i].v.f1 = 0 - #echo("key = " & $(i+1) & " Val ptr = " & $cast[int](result[i].v.addr)) - - - - proc threadProc(tp: Dict) {.thread.} = - var t = cpuTime(); - for i in 1..numTests: - var key = "mark" & $(i) - var got = table.get(key) - got.thr = cast[int](myThreadID[pointer]()) - got.f1 = got.f1 + 1 - table.set(key, got) - t = cpuTime() - t - echo t - - - var testData = createSampleData(numTests) - - for i in 0..numTests-1: - table.set(testData[i].k, testData[i].v) - - var i = 0 - while i < numThreads: - createThread(thr[i], threadProc, table) - i += 1 - - joinThreads(thr) - - - - - - var fails = 0 - - for i in 0..numTests-1: - var got = table.get(testData[i].k) - if got.f0 != i+1 or got.f1 != numThreads: - fails += 1 - echo(got) - - echo("Failed read or write = ", fails) - - - #for i in 1..numTests: - # echo(i, " = ", hashInt(i) and 8191) - - deleteConcTable(table) diff --git a/lib/pure/collections/chains.nim b/lib/pure/collections/chains.nim new file mode 100644 index 000000000..6b2ecd272 --- /dev/null +++ b/lib/pure/collections/chains.nim @@ -0,0 +1,44 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2016 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Template based implementation of singly and doubly linked lists. +## The involved types should have 'prev' or 'next' fields and the +## list header should have 'head' or 'tail' fields. + +template prepend*(header, node) = + when compiles(header.head): + when compiles(node.prev): + if header.head != nil: + header.head.prev = node + node.next = header.head + header.head = node + when compiles(header.tail): + if header.tail == nil: + header.tail = node + +template append*(header, node) = + when compiles(header.head): + if header.head == nil: + header.head = node + when compiles(header.tail): + when compiles(node.prev): + node.prev = header.tail + if header.tail != nil: + header.tail.next = node + header.tail = node + +template unlink*(header, node) = + if node.next != nil: + node.next.prev = node.prev + if node.prev != nil: + node.prev.next = node.next + if header.head == node: + header.head = node.prev + if header.tail == node: + header.tail = node.next diff --git a/lib/pure/collections/critbits.nim b/lib/pure/collections/critbits.nim index 3d10e39aa..24257dacb 100644 --- a/lib/pure/collections/critbits.nim +++ b/lib/pure/collections/critbits.nim @@ -8,32 +8,62 @@ # ## This module implements a `crit bit tree`:idx: which is an efficient -## container for a set or a mapping of strings. Based on the excellent paper -## by Adam Langley. +## container for a sorted set of strings, or for a sorted mapping of strings. Based on the +## [excellent paper by Adam Langley](https://www.imperialviolet.org/binary/critbit.pdf). +## (A crit bit tree is a form of `radix tree`:idx: or `patricia trie`:idx:.) + +runnableExamples: + from std/sequtils import toSeq + + var critbitAsSet: CritBitTree[void] = ["kitten", "puppy"].toCritBitTree + doAssert critbitAsSet.len == 2 + critbitAsSet.incl("") + doAssert "" in critbitAsSet + critbitAsSet.excl("") + doAssert "" notin critbitAsSet + doAssert toSeq(critbitAsSet.items) == @["kitten", "puppy"] + let same = ["puppy", "kitten", "puppy"].toCritBitTree + doAssert toSeq(same.keys) == toSeq(critbitAsSet.keys) + + var critbitAsDict: CritBitTree[int] = {"key1": 42}.toCritBitTree + doAssert critbitAsDict.len == 1 + critbitAsDict["key2"] = 0 + doAssert "key2" in critbitAsDict + doAssert critbitAsDict["key2"] == 0 + critbitAsDict.excl("key1") + doAssert "key1" notin critbitAsDict + doAssert toSeq(critbitAsDict.pairs) == @[("key2", 0)] + +import std/private/since + +when defined(nimPreviewSlimSystem): + import std/assertions type - NodeObj[T] = object {.acyclic.} + NodeObj[T] {.acyclic.} = object byte: int ## byte index of the difference - otherbits: char + otherBits: char case isLeaf: bool of false: child: array[0..1, ref NodeObj[T]] - of true: + of true: key: string when T isnot void: val: T - + Node[T] = ref NodeObj[T] CritBitTree*[T] = object ## The crit bit tree can either be used ## as a mapping from strings to - ## some type ``T`` or as a set of - ## strings if ``T`` is void. + ## some type `T` or as a set of + ## strings if `T` is `void`. root: Node[T] count: int -{.deprecated: [TCritBitTree: CritBitTree].} +func len*[T](c: CritBitTree[T]): int {.inline.} = + ## Returns the number of elements in `c` in O(1). + runnableExamples: + let c = ["key1", "key2"].toCritBitTree + doAssert c.len == 2 -proc len*[T](c: CritBitTree[T]): int = - ## returns the number of elements in `c` in O(1). result = c.count proc rawGet[T](c: CritBitTree[T], key: string): Node[T] = @@ -46,19 +76,22 @@ proc rawGet[T](c: CritBitTree[T], key: string): Node[T] = else: return if it.key == key: it else: nil -proc contains*[T](c: CritBitTree[T], key: string): bool {.inline.} = - ## returns true iff `c` contains the given `key`. +func contains*[T](c: CritBitTree[T], key: string): bool {.inline.} = + ## Returns true if `c` contains the given `key`. + runnableExamples: + var c: CritBitTree[void] + incl(c, "key") + doAssert c.contains("key") + result = rawGet(c, key) != nil -proc hasKey*[T](c: CritBitTree[T], key: string): bool {.inline.} = - ## alias for `contains`. +func hasKey*[T](c: CritBitTree[T], key: string): bool {.inline.} = + ## Alias for `contains <#contains,CritBitTree[T],string>`_. result = rawGet(c, key) != nil proc rawInsert[T](c: var CritBitTree[T], key: string): Node[T] = if c.root == nil: - new c.root - c.root.isleaf = true - c.root.key = key + c.root = Node[T](isleaf: true, key: key) result = c.root else: var it = c.root @@ -66,34 +99,33 @@ proc rawInsert[T](c: var CritBitTree[T], key: string): Node[T] = let ch = if it.byte < key.len: key[it.byte] else: '\0' let dir = (1 + (ch.ord or it.otherBits.ord)) shr 8 it = it.child[dir] - + var newOtherBits = 0 var newByte = 0 block blockX: - while newbyte < key.len: - if it.key[newbyte] != key[newbyte]: - newotherbits = it.key[newbyte].ord xor key[newbyte].ord + while newByte < key.len: + let ch = if newByte < it.key.len: it.key[newByte] else: '\0' + if ch != key[newByte]: + newOtherBits = ch.ord xor key[newByte].ord break blockX - inc newbyte - if it.key[newbyte] != '\0': - newotherbits = it.key[newbyte].ord + inc newByte + if newByte < it.key.len: + newOtherBits = it.key[newByte].ord else: return it while (newOtherBits and (newOtherBits-1)) != 0: newOtherBits = newOtherBits and (newOtherBits-1) newOtherBits = newOtherBits xor 255 - let ch = it.key[newByte] + let ch = if newByte < it.key.len: it.key[newByte] else: '\0' let dir = (1 + (ord(ch) or newOtherBits)) shr 8 - + var inner: Node[T] new inner - new result - result.isLeaf = true - result.key = key + result = Node[T](isLeaf: true, key: key) inner.otherBits = chr(newOtherBits) inner.byte = newByte inner.child[1 - dir] = result - + var wherep = addr(c.root) while true: var p = wherep[] @@ -107,9 +139,87 @@ proc rawInsert[T](c: var CritBitTree[T], key: string): Node[T] = wherep[] = inner inc c.count -proc containsOrIncl*[T](c: var CritBitTree[T], key: string, val: T): bool = - ## returns true iff `c` contains the given `key`. If the key does not exist - ## ``c[key] = val`` is performed. +func exclImpl[T](c: var CritBitTree[T], key: string): int = + var p = c.root + var wherep = addr(c.root) + var whereq: ptr Node[T] = nil + if p == nil: return c.count + var dir = 0 + var q: Node[T] + while not p.isLeaf: + whereq = wherep + q = p + let ch = if p.byte < key.len: key[p.byte] else: '\0' + dir = (1 + (ch.ord or p.otherBits.ord)) shr 8 + wherep = addr(p.child[dir]) + p = wherep[] + if p.key == key: + # else: not in tree at all + if whereq == nil: + c.root = nil + else: + whereq[] = q.child[1 - dir] + dec c.count + + return c.count + +proc excl*[T](c: var CritBitTree[T], key: string) = + ## Removes `key` (and its associated value) from the set `c`. + ## If the `key` does not exist, nothing happens. + ## + ## **See also:** + ## * `incl proc <#incl,CritBitTree[void],string>`_ + ## * `incl proc <#incl,CritBitTree[T],string,T>`_ + runnableExamples: + var c: CritBitTree[void] + incl(c, "key") + excl(c, "key") + doAssert not c.contains("key") + + discard exclImpl(c, key) + +proc missingOrExcl*[T](c: var CritBitTree[T], key: string): bool = + ## Returns true if `c` does not contain the given `key`. If the key + ## does exist, `c.excl(key)` is performed. + ## + ## **See also:** + ## * `excl proc <#excl,CritBitTree[T],string>`_ + ## * `containsOrIncl proc <#containsOrIncl,CritBitTree[T],string,T>`_ + ## * `containsOrIncl proc <#containsOrIncl,CritBitTree[void],string>`_ + runnableExamples: + block: + var c: CritBitTree[void] + doAssert c.missingOrExcl("key") + block: + var c: CritBitTree[void] + incl(c, "key") + doAssert not c.missingOrExcl("key") + doAssert not c.contains("key") + + let oldCount = c.count + discard exclImpl(c, key) + result = c.count == oldCount + +proc containsOrIncl*[T](c: var CritBitTree[T], key: string, val: sink T): bool = + ## Returns true if `c` contains the given `key`. If the key does not exist, + ## `c[key] = val` is performed. + ## + ## **See also:** + ## * `incl proc <#incl,CritBitTree[void],string>`_ + ## * `incl proc <#incl,CritBitTree[T],string,T>`_ + ## * `containsOrIncl proc <#containsOrIncl,CritBitTree[void],string>`_ + ## * `missingOrExcl proc <#missingOrExcl,CritBitTree[T],string>`_ + runnableExamples: + block: + var c: CritBitTree[int] + doAssert not c.containsOrIncl("key", 42) + doAssert c.contains("key") + block: + var c: CritBitTree[int] + incl(c, "key", 21) + doAssert c.containsOrIncl("key", 42) + doAssert c["key"] == 21 + let oldCount = c.count var n = rawInsert(c, key) result = c.count == oldCount @@ -117,66 +227,107 @@ proc containsOrIncl*[T](c: var CritBitTree[T], key: string, val: T): bool = if not result: n.val = val proc containsOrIncl*(c: var CritBitTree[void], key: string): bool = - ## returns true iff `c` contains the given `key`. If the key does not exist + ## Returns true if `c` contains the given `key`. If the key does not exist, ## it is inserted into `c`. + ## + ## **See also:** + ## * `incl proc <#incl,CritBitTree[void],string>`_ + ## * `incl proc <#incl,CritBitTree[T],string,T>`_ + ## * `containsOrIncl proc <#containsOrIncl,CritBitTree[T],string,T>`_ + ## * `missingOrExcl proc <#missingOrExcl,CritBitTree[T],string>`_ + runnableExamples: + block: + var c: CritBitTree[void] + doAssert not c.containsOrIncl("key") + doAssert c.contains("key") + block: + var c: CritBitTree[void] + incl(c, "key") + doAssert c.containsOrIncl("key") + let oldCount = c.count - var n = rawInsert(c, key) + discard rawInsert(c, key) result = c.count == oldCount +proc inc*(c: var CritBitTree[int]; key: string, val: int = 1) = + ## Increments `c[key]` by `val`. + runnableExamples: + var c: CritBitTree[int] + c["key"] = 1 + inc(c, "key") + doAssert c["key"] == 2 + + var n = rawInsert(c, key) + inc n.val, val + proc incl*(c: var CritBitTree[void], key: string) = - ## includes `key` in `c`. + ## Includes `key` in `c`. + ## + ## **See also:** + ## * `excl proc <#excl,CritBitTree[T],string>`_ + ## * `incl proc <#incl,CritBitTree[T],string,T>`_ + runnableExamples: + var c: CritBitTree[void] + incl(c, "key") + doAssert c.hasKey("key") + discard rawInsert(c, key) -proc `[]=`*[T](c: var CritBitTree[T], key: string, val: T) = - ## puts a (key, value)-pair into `t`. +proc incl*[T](c: var CritBitTree[T], key: string, val: sink T) = + ## Inserts `key` with value `val` into `c`. + ## + ## **See also:** + ## * `excl proc <#excl,CritBitTree[T],string>`_ + ## * `incl proc <#incl,CritBitTree[void],string>`_ + runnableExamples: + var c: CritBitTree[int] + incl(c, "key", 42) + doAssert c["key"] == 42 + var n = rawInsert(c, key) n.val = val -proc `[]`*[T](c: CritBitTree[T], key: string): T {.inline.} = - ## retrieves the value at ``c[key]``. If `key` is not in `t`, - ## default empty value for the type `B` is returned - ## and no exception is raised. One can check with ``hasKey`` whether the key - ## exists. - let n = rawGet(c, key) - if n != nil: result = n.val +proc `[]=`*[T](c: var CritBitTree[T], key: string, val: sink T) = + ## Alias for `incl <#incl,CritBitTree[T],string,T>`_. + ## + ## **See also:** + ## * `[] proc <#[],CritBitTree[T],string>`_ + ## * `[] proc <#[],CritBitTree[T],string_2>`_ + var n = rawInsert(c, key) + n.val = val -proc mget*[T](c: var CritBitTree[T], key: string): var T {.inline.} = - ## retrieves the value at ``c[key]``. The value can be modified. - ## If `key` is not in `t`, the ``KeyError`` exception is raised. +template get[T](c: CritBitTree[T], key: string): T = let n = rawGet(c, key) - if n != nil: result = n.val - else: raise newException(KeyError, "key not found: " & $key) + if n == nil: + raise newException(KeyError, "key not found: " & key) -proc excl*[T](c: var CritBitTree[T], key: string) = - ## removes `key` (and its associated value) from the set `c`. - ## If the `key` does not exist, nothing happens. - var p = c.root - var wherep = addr(c.root) - var whereq: ptr Node[T] = nil - if p == nil: return - var dir = 0 - var q: Node[T] - while not p.isLeaf: - whereq = wherep - q = p - let ch = if p.byte < key.len: key[p.byte] else: '\0' - dir = (1 + (ch.ord or p.otherBits.ord)) shr 8 - wherep = addr(p.child[dir]) - p = wherep[] - if p.key == key: - # else: not in tree at all - if whereq == nil: - c.root = nil - else: - whereq[] = q.child[1 - dir] - dec c.count + n.val + +func `[]`*[T](c: CritBitTree[T], key: string): lent T {.inline.} = + ## Retrieves the value at `c[key]`. If `key` is not in `t`, the + ## `KeyError` exception is raised. One can check with `hasKey` whether + ## the key exists. + ## + ## **See also:** + ## * `[] proc <#[],CritBitTree[T],string_2>`_ + ## * `[]= proc <#[]=,CritBitTree[T],string,T>`_ + get(c, key) + +func `[]`*[T](c: var CritBitTree[T], key: string): var T {.inline.} = + ## Retrieves the value at `c[key]`. The value can be modified. + ## If `key` is not in `t`, the `KeyError` exception is raised. + ## + ## **See also:** + ## * `[] proc <#[],CritBitTree[T],string>`_ + ## * `[]= proc <#[]=,CritBitTree[T],string,T>`_ + get(c, key) iterator leaves[T](n: Node[T]): Node[T] = if n != nil: # XXX actually we could compute the necessary stack size in advance: # it's roughly log2(c.count). var stack = @[n] - while stack.len > 0: + while stack.len > 0: var it = stack.pop while not it.isLeaf: stack.add(it.child[1]) @@ -185,29 +336,61 @@ iterator leaves[T](n: Node[T]): Node[T] = yield it iterator keys*[T](c: CritBitTree[T]): string = - ## yields all keys in lexicographical order. + ## Yields all keys in lexicographical order. + runnableExamples: + from std/sequtils import toSeq + + let c = {"key1": 1, "key2": 2}.toCritBitTree + doAssert toSeq(c.keys) == @["key1", "key2"] + for x in leaves(c.root): yield x.key -iterator values*[T](c: CritBitTree[T]): T = - ## yields all values of `c` in the lexicographical order of the +iterator values*[T](c: CritBitTree[T]): lent T = + ## Yields all values of `c` in the lexicographical order of the ## corresponding keys. + ## + ## **See also:** + ## * `mvalues iterator <#mvalues.i,CritBitTree[T]>`_ + runnableExamples: + from std/sequtils import toSeq + + let c = {"key1": 1, "key2": 2}.toCritBitTree + doAssert toSeq(c.values) == @[1, 2] + for x in leaves(c.root): yield x.val iterator mvalues*[T](c: var CritBitTree[T]): var T = - ## yields all values of `c` in the lexicographical order of the + ## Yields all values of `c` in the lexicographical order of the ## corresponding keys. The values can be modified. + ## + ## **See also:** + ## * `values iterator <#values.i,CritBitTree[T]>`_ for x in leaves(c.root): yield x.val iterator items*[T](c: CritBitTree[T]): string = - ## yields all keys in lexicographical order. + ## Alias for `keys <#keys.i,CritBitTree[T]>`_. for x in leaves(c.root): yield x.key iterator pairs*[T](c: CritBitTree[T]): tuple[key: string, val: T] = - ## yields all (key, value)-pairs of `c`. + ## Yields all `(key, value)`-pairs of `c` in the lexicographical order of the + ## corresponding keys. + ## + ## **See also:** + ## * `mpairs iterator <#mpairs.i,CritBitTree[T]>`_ + runnableExamples: + from std/sequtils import toSeq + + let c = {"key1": 1, "key2": 2}.toCritBitTree + doAssert toSeq(c.pairs) == @[(key: "key1", val: 1), (key: "key2", val: 2)] + for x in leaves(c.root): yield (x.key, x.val) - + iterator mpairs*[T](c: var CritBitTree[T]): tuple[key: string, val: var T] = - ## yields all (key, value)-pairs of `c`. The yielded values can be modified. + ## Yields all `(key, value)`-pairs of `c` in the lexicographical order of the + ## corresponding keys. The yielded values can be modified. + ## + ## **See also:** + ## * `pairs iterator <#pairs.i,CritBitTree[T]>`_ for x in leaves(c.root): yield (x.key, x.val) proc allprefixedAux[T](c: CritBitTree[T], key: string): Node[T] = @@ -220,50 +403,83 @@ proc allprefixedAux[T](c: CritBitTree[T], key: string): Node[T] = let dir = (1 + (ch.ord or p.otherBits.ord)) shr 8 p = p.child[dir] if q.byte < key.len: top = p - for i in 0 .. <key.len: - if p.key[i] != key[i]: return + for i in 0 ..< key.len: + if i >= p.key.len or p.key[i] != key[i]: return result = top -iterator itemsWithPrefix*[T](c: CritBitTree[T], prefix: string): string = - ## yields all keys starting with `prefix`. - let top = allprefixedAux(c, prefix) - for x in leaves(top): yield x.key - iterator keysWithPrefix*[T](c: CritBitTree[T], prefix: string): string = - ## yields all keys starting with `prefix`. + ## Yields all keys starting with `prefix`. + runnableExamples: + from std/sequtils import toSeq + + let c = {"key1": 42, "key2": 43}.toCritBitTree + doAssert toSeq(c.keysWithPrefix("key")) == @["key1", "key2"] + let top = allprefixedAux(c, prefix) for x in leaves(top): yield x.key -iterator valuesWithPrefix*[T](c: CritBitTree[T], prefix: string): T = - ## yields all values of `c` starting with `prefix` of the +iterator valuesWithPrefix*[T](c: CritBitTree[T], prefix: string): lent T = + ## Yields all values of `c` starting with `prefix` of the ## corresponding keys. + ## + ## **See also:** + ## * `mvaluesWithPrefix iterator <#mvaluesWithPrefix.i,CritBitTree[T],string>`_ + runnableExamples: + from std/sequtils import toSeq + + let c = {"key1": 42, "key2": 43}.toCritBitTree + doAssert toSeq(c.valuesWithPrefix("key")) == @[42, 43] + let top = allprefixedAux(c, prefix) for x in leaves(top): yield x.val iterator mvaluesWithPrefix*[T](c: var CritBitTree[T], prefix: string): var T = - ## yields all values of `c` starting with `prefix` of the + ## Yields all values of `c` starting with `prefix` of the ## corresponding keys. The values can be modified. + ## + ## **See also:** + ## * `valuesWithPrefix iterator <#valuesWithPrefix.i,CritBitTree[T],string>`_ let top = allprefixedAux(c, prefix) for x in leaves(top): yield x.val +iterator itemsWithPrefix*[T](c: CritBitTree[T], prefix: string): string = + ## Alias for `keysWithPrefix <#keysWithPrefix.i,CritBitTree[T],string>`_. + let top = allprefixedAux(c, prefix) + for x in leaves(top): yield x.key + iterator pairsWithPrefix*[T](c: CritBitTree[T], prefix: string): tuple[key: string, val: T] = - ## yields all (key, value)-pairs of `c` starting with `prefix`. + ## Yields all (key, value)-pairs of `c` starting with `prefix`. + ## + ## **See also:** + ## * `mpairsWithPrefix iterator <#mpairsWithPrefix.i,CritBitTree[T],string>`_ + runnableExamples: + from std/sequtils import toSeq + + let c = {"key1": 42, "key2": 43}.toCritBitTree + doAssert toSeq(c.pairsWithPrefix("key")) == @[(key: "key1", val: 42), (key: "key2", val: 43)] + let top = allprefixedAux(c, prefix) for x in leaves(top): yield (x.key, x.val) - + iterator mpairsWithPrefix*[T](c: var CritBitTree[T], prefix: string): tuple[key: string, val: var T] = - ## yields all (key, value)-pairs of `c` starting with `prefix`. + ## Yields all (key, value)-pairs of `c` starting with `prefix`. ## The yielded values can be modified. + ## + ## **See also:** + ## * `pairsWithPrefix iterator <#pairsWithPrefix.i,CritBitTree[T],string>`_ let top = allprefixedAux(c, prefix) for x in leaves(top): yield (x.key, x.val) -proc `$`*[T](c: CritBitTree[T]): string = - ## turns `c` into a string representation. Example outputs: - ## ``{keyA: value, keyB: value}``, ``{:}`` - ## If `T` is void the outputs look like: - ## ``{keyA, keyB}``, ``{}``. +func `$`*[T](c: CritBitTree[T]): string = + ## Turns `c` into a string representation. + runnableExamples: + doAssert $CritBitTree[int].default == "{:}" + doAssert $toCritBitTree({"key1": 1, "key2": 2}) == """{"key1": 1, "key2": 2}""" + doAssert $CritBitTree[void].default == "{}" + doAssert $toCritBitTree(["key1", "key2"]) == """{"key1", "key2"}""" + if c.len == 0: when T is void: result = "{}" @@ -277,27 +493,45 @@ proc `$`*[T](c: CritBitTree[T]): string = const avgItemLen = 16 result = newStringOfCap(c.count * avgItemLen) result.add("{") - for key, val in pairs(c): - if result.len > 1: result.add(", ") - result.add($key) - when T isnot void: + when T is void: + for key in keys(c): + if result.len > 1: result.add(", ") + result.addQuoted(key) + else: + for key, val in pairs(c): + if result.len > 1: result.add(", ") + result.addQuoted(key) result.add(": ") - result.add($val) + result.addQuoted(val) result.add("}") -when isMainModule: - var r: CritBitTree[void] - r.incl "abc" - r.incl "xyz" - r.incl "def" - r.incl "definition" - r.incl "prefix" - doAssert r.contains"def" - #r.del "def" - - for w in r.items: - echo w - - for w in r.itemsWithPrefix("de"): - echo w +func commonPrefixLen*[T](c: CritBitTree[T]): int {.inline, since((1, 3)).} = + ## Returns the length of the longest common prefix of all keys in `c`. + ## If `c` is empty, returns 0. + runnableExamples: + var c: CritBitTree[void] + doAssert c.commonPrefixLen == 0 + incl(c, "key1") + doAssert c.commonPrefixLen == 4 + incl(c, "key2") + doAssert c.commonPrefixLen == 3 + + if c.root != nil: + if c.root.isLeaf: len(c.root.key) + else: c.root.byte + else: 0 + +proc toCritBitTree*[T](pairs: sink openArray[(string, T)]): CritBitTree[T] {.since: (1, 3).} = + ## Creates a new `CritBitTree` that contains the given `pairs`. + runnableExamples: + doAssert {"a": "0", "b": "1", "c": "2"}.toCritBitTree is CritBitTree[string] + doAssert {"a": 0, "b": 1, "c": 2}.toCritBitTree is CritBitTree[int] + + for item in pairs: result.incl item[0], item[1] + +proc toCritBitTree*(items: sink openArray[string]): CritBitTree[void] {.since: (1, 3).} = + ## Creates a new `CritBitTree` that contains the given `items`. + runnableExamples: + doAssert ["a", "b", "c"].toCritBitTree is CritBitTree[void] + for item in items: result.incl item diff --git a/lib/pure/collections/deques.nim b/lib/pure/collections/deques.nim new file mode 100644 index 000000000..d2b0099f2 --- /dev/null +++ b/lib/pure/collections/deques.nim @@ -0,0 +1,480 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2012 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## An implementation of a `deque`:idx: (double-ended queue). +## The underlying implementation uses a `seq`. +## +## .. note:: None of the procs that get an individual value from the deque should be used +## on an empty deque. +## +## If compiled with the `boundChecks` option, those procs will raise an `IndexDefect` +## on such access. This should not be relied upon, as `-d:danger` or `--checks:off` will +## disable those checks and then the procs may return garbage or crash the program. +## +## As such, a check to see if the deque is empty is needed before any +## access, unless your program logic guarantees it indirectly. + +runnableExamples: + var a = [10, 20, 30, 40].toDeque + + doAssertRaises(IndexDefect, echo a[4]) + + a.addLast(50) + assert $a == "[10, 20, 30, 40, 50]" + + assert a.peekFirst == 10 + assert a.peekLast == 50 + assert len(a) == 5 + + assert a.popFirst == 10 + assert a.popLast == 50 + assert len(a) == 3 + + a.addFirst(11) + a.addFirst(22) + a.addFirst(33) + assert $a == "[33, 22, 11, 20, 30, 40]" + + a.shrink(fromFirst = 1, fromLast = 2) + assert $a == "[22, 11, 20]" + +## See also +## ======== +## * `lists module <lists.html>`_ for singly and doubly linked lists and rings + +import std/private/since + +import std/[assertions, hashes, math] + +type + Deque*[T] = object + ## A double-ended queue backed with a ringed `seq` buffer. + ## + ## To initialize an empty deque, + ## use the `initDeque proc <#initDeque,int>`_. + data: seq[T] + + # `head` and `tail` are masked only when accessing an element of `data` + # so that `tail - head == data.len` when the deque is full. + # They are uint so that incrementing/decrementing them doesn't cause + # over/underflow. You can get a number of items with `tail - head` + # even if `tail` or `head` is wraps around and `tail < head`, because + # `tail - head == (uint.high + 1 + tail) - head` when `tail < head`. + head, tail: uint + +const + defaultInitialSize* = 4 + +template initImpl(result: typed, initialSize: int) = + let correctSize = nextPowerOfTwo(initialSize) + newSeq(result.data, correctSize) + +template checkIfInitialized(deq: typed) = + if deq.data.len == 0: + initImpl(deq, defaultInitialSize) + +func mask[T](deq: Deque[T]): uint {.inline.} = + uint(deq.data.len) - 1 + +proc initDeque*[T](initialSize: int = defaultInitialSize): Deque[T] = + ## Creates a new empty deque. + ## + ## Optionally, the initial capacity can be reserved via `initialSize` + ## as a performance optimization + ## (default: `defaultInitialSize <#defaultInitialSize>`_). + ## The length of a newly created deque will still be 0. + ## + ## **See also:** + ## * `toDeque proc <#toDeque,openArray[T]>`_ + result.initImpl(initialSize) + +func len*[T](deq: Deque[T]): int {.inline.} = + ## Returns the number of elements of `deq`. + int(deq.tail - deq.head) + +template emptyCheck(deq) = + # Bounds check for the regular deque access. + when compileOption("boundChecks"): + if unlikely(deq.len < 1): + raise newException(IndexDefect, "Empty deque.") + +template xBoundsCheck(deq, i) = + # Bounds check for the array like accesses. + when compileOption("boundChecks"): # `-d:danger` or `--checks:off` should disable this. + if unlikely(i >= deq.len): # x < deq.low is taken care by the Natural parameter + raise newException(IndexDefect, + "Out of bounds: " & $i & " > " & $(deq.len - 1)) + if unlikely(i < 0): # when used with BackwardsIndex + raise newException(IndexDefect, + "Out of bounds: " & $i & " < 0") + +proc `[]`*[T](deq: Deque[T], i: Natural): lent T {.inline.} = + ## Accesses the `i`-th element of `deq`. + runnableExamples: + let a = [10, 20, 30, 40, 50].toDeque + assert a[0] == 10 + assert a[3] == 40 + doAssertRaises(IndexDefect, echo a[8]) + + xBoundsCheck(deq, i) + return deq.data[(deq.head + i.uint) and deq.mask] + +proc `[]`*[T](deq: var Deque[T], i: Natural): var T {.inline.} = + ## Accesses the `i`-th element of `deq` and returns a mutable + ## reference to it. + runnableExamples: + var a = [10, 20, 30, 40, 50].toDeque + inc(a[0]) + assert a[0] == 11 + + xBoundsCheck(deq, i) + return deq.data[(deq.head + i.uint) and deq.mask] + +proc `[]=`*[T](deq: var Deque[T], i: Natural, val: sink T) {.inline.} = + ## Sets the `i`-th element of `deq` to `val`. + runnableExamples: + var a = [10, 20, 30, 40, 50].toDeque + a[0] = 99 + a[3] = 66 + assert $a == "[99, 20, 30, 66, 50]" + + checkIfInitialized(deq) + xBoundsCheck(deq, i) + deq.data[(deq.head + i.uint) and deq.mask] = val + +proc `[]`*[T](deq: Deque[T], i: BackwardsIndex): lent T {.inline.} = + ## Accesses the backwards indexed `i`-th element. + ## + ## `deq[^1]` is the last element. + runnableExamples: + let a = [10, 20, 30, 40, 50].toDeque + assert a[^1] == 50 + assert a[^4] == 20 + doAssertRaises(IndexDefect, echo a[^9]) + + xBoundsCheck(deq, deq.len - int(i)) + return deq[deq.len - int(i)] + +proc `[]`*[T](deq: var Deque[T], i: BackwardsIndex): var T {.inline.} = + ## Accesses the backwards indexed `i`-th element and returns a mutable + ## reference to it. + ## + ## `deq[^1]` is the last element. + runnableExamples: + var a = [10, 20, 30, 40, 50].toDeque + inc(a[^1]) + assert a[^1] == 51 + + xBoundsCheck(deq, deq.len - int(i)) + return deq[deq.len - int(i)] + +proc `[]=`*[T](deq: var Deque[T], i: BackwardsIndex, x: sink T) {.inline.} = + ## Sets the backwards indexed `i`-th element of `deq` to `x`. + ## + ## `deq[^1]` is the last element. + runnableExamples: + var a = [10, 20, 30, 40, 50].toDeque + a[^1] = 99 + a[^3] = 77 + assert $a == "[10, 20, 77, 40, 99]" + + checkIfInitialized(deq) + xBoundsCheck(deq, deq.len - int(i)) + deq[deq.len - int(i)] = x + +iterator items*[T](deq: Deque[T]): lent T = + ## Yields every element of `deq`. + ## + ## **See also:** + ## * `mitems iterator <#mitems.i,Deque[T]>`_ + runnableExamples: + from std/sequtils import toSeq + + let a = [10, 20, 30, 40, 50].toDeque + assert toSeq(a.items) == @[10, 20, 30, 40, 50] + + for c in 0 ..< deq.len: + yield deq.data[(deq.head + c.uint) and deq.mask] + +iterator mitems*[T](deq: var Deque[T]): var T = + ## Yields every element of `deq`, which can be modified. + ## + ## **See also:** + ## * `items iterator <#items.i,Deque[T]>`_ + runnableExamples: + var a = [10, 20, 30, 40, 50].toDeque + assert $a == "[10, 20, 30, 40, 50]" + for x in mitems(a): + x = 5 * x - 1 + assert $a == "[49, 99, 149, 199, 249]" + + for c in 0 ..< deq.len: + yield deq.data[(deq.head + c.uint) and deq.mask] + +iterator pairs*[T](deq: Deque[T]): tuple[key: int, val: T] = + ## Yields every `(position, value)`-pair of `deq`. + runnableExamples: + from std/sequtils import toSeq + + let a = [10, 20, 30].toDeque + assert toSeq(a.pairs) == @[(0, 10), (1, 20), (2, 30)] + + for c in 0 ..< deq.len: + yield (c, deq.data[(deq.head + c.uint) and deq.mask]) + +proc contains*[T](deq: Deque[T], item: T): bool {.inline.} = + ## Returns true if `item` is in `deq` or false if not found. + ## + ## Usually used via the `in` operator. + ## It is the equivalent of `deq.find(item) >= 0`. + runnableExamples: + let q = [7, 9].toDeque + assert 7 in q + assert q.contains(7) + assert 8 notin q + + for e in deq: + if e == item: return true + return false + +proc expandIfNeeded[T](deq: var Deque[T]) = + checkIfInitialized(deq) + let cap = deq.data.len + assert deq.len <= cap + if unlikely(deq.len == cap): + var n = newSeq[T](cap * 2) + var i = 0 + for x in mitems(deq): + when nimvm: n[i] = x # workaround for VM bug + else: n[i] = move(x) + inc i + deq.data = move(n) + deq.tail = cap.uint + deq.head = 0 + +proc addFirst*[T](deq: var Deque[T], item: sink T) = + ## Adds an `item` to the beginning of `deq`. + ## + ## **See also:** + ## * `addLast proc <#addLast,Deque[T],sinkT>`_ + runnableExamples: + var a = initDeque[int]() + for i in 1 .. 5: + a.addFirst(10 * i) + assert $a == "[50, 40, 30, 20, 10]" + + expandIfNeeded(deq) + dec deq.head + deq.data[deq.head and deq.mask] = item + +proc addLast*[T](deq: var Deque[T], item: sink T) = + ## Adds an `item` to the end of `deq`. + ## + ## **See also:** + ## * `addFirst proc <#addFirst,Deque[T],sinkT>`_ + runnableExamples: + var a = initDeque[int]() + for i in 1 .. 5: + a.addLast(10 * i) + assert $a == "[10, 20, 30, 40, 50]" + + expandIfNeeded(deq) + deq.data[deq.tail and deq.mask] = item + inc deq.tail + +proc toDeque*[T](x: openArray[T]): Deque[T] {.since: (1, 3).} = + ## Creates a new deque that contains the elements of `x` (in the same order). + ## + ## **See also:** + ## * `initDeque proc <#initDeque,int>`_ + runnableExamples: + let a = toDeque([7, 8, 9]) + assert len(a) == 3 + assert $a == "[7, 8, 9]" + + result.initImpl(x.len) + for item in items(x): + result.addLast(item) + +proc peekFirst*[T](deq: Deque[T]): lent T {.inline.} = + ## Returns the first element of `deq`, but does not remove it from the deque. + ## + ## **See also:** + ## * `peekFirst proc <#peekFirst,Deque[T]_2>`_ which returns a mutable reference + ## * `peekLast proc <#peekLast,Deque[T]>`_ + runnableExamples: + let a = [10, 20, 30, 40, 50].toDeque + assert $a == "[10, 20, 30, 40, 50]" + assert a.peekFirst == 10 + assert len(a) == 5 + + emptyCheck(deq) + result = deq.data[deq.head and deq.mask] + +proc peekLast*[T](deq: Deque[T]): lent T {.inline.} = + ## Returns the last element of `deq`, but does not remove it from the deque. + ## + ## **See also:** + ## * `peekLast proc <#peekLast,Deque[T]_2>`_ which returns a mutable reference + ## * `peekFirst proc <#peekFirst,Deque[T]>`_ + runnableExamples: + let a = [10, 20, 30, 40, 50].toDeque + assert $a == "[10, 20, 30, 40, 50]" + assert a.peekLast == 50 + assert len(a) == 5 + + emptyCheck(deq) + result = deq.data[(deq.tail - 1) and deq.mask] + +proc peekFirst*[T](deq: var Deque[T]): var T {.inline, since: (1, 3).} = + ## Returns a mutable reference to the first element of `deq`, + ## but does not remove it from the deque. + ## + ## **See also:** + ## * `peekFirst proc <#peekFirst,Deque[T]>`_ + ## * `peekLast proc <#peekLast,Deque[T]_2>`_ + runnableExamples: + var a = [10, 20, 30, 40, 50].toDeque + a.peekFirst() = 99 + assert $a == "[99, 20, 30, 40, 50]" + + emptyCheck(deq) + result = deq.data[deq.head and deq.mask] + +proc peekLast*[T](deq: var Deque[T]): var T {.inline, since: (1, 3).} = + ## Returns a mutable reference to the last element of `deq`, + ## but does not remove it from the deque. + ## + ## **See also:** + ## * `peekFirst proc <#peekFirst,Deque[T]_2>`_ + ## * `peekLast proc <#peekLast,Deque[T]>`_ + runnableExamples: + var a = [10, 20, 30, 40, 50].toDeque + a.peekLast() = 99 + assert $a == "[10, 20, 30, 40, 99]" + + emptyCheck(deq) + result = deq.data[(deq.tail - 1) and deq.mask] + +template destroy(x: untyped) = + reset(x) + +proc popFirst*[T](deq: var Deque[T]): T {.inline, discardable.} = + ## Removes and returns the first element of the `deq`. + ## + ## See also: + ## * `popLast proc <#popLast,Deque[T]>`_ + ## * `shrink proc <#shrink,Deque[T],int,int>`_ + runnableExamples: + var a = [10, 20, 30, 40, 50].toDeque + assert $a == "[10, 20, 30, 40, 50]" + assert a.popFirst == 10 + assert $a == "[20, 30, 40, 50]" + + emptyCheck(deq) + result = move deq.data[deq.head and deq.mask] + inc deq.head + +proc popLast*[T](deq: var Deque[T]): T {.inline, discardable.} = + ## Removes and returns the last element of the `deq`. + ## + ## **See also:** + ## * `popFirst proc <#popFirst,Deque[T]>`_ + ## * `shrink proc <#shrink,Deque[T],int,int>`_ + runnableExamples: + var a = [10, 20, 30, 40, 50].toDeque + assert $a == "[10, 20, 30, 40, 50]" + assert a.popLast == 50 + assert $a == "[10, 20, 30, 40]" + + emptyCheck(deq) + dec deq.tail + result = move deq.data[deq.tail and deq.mask] + +proc clear*[T](deq: var Deque[T]) {.inline.} = + ## Resets the deque so that it is empty. + ## + ## **See also:** + ## * `shrink proc <#shrink,Deque[T],int,int>`_ + runnableExamples: + var a = [10, 20, 30, 40, 50].toDeque + assert $a == "[10, 20, 30, 40, 50]" + clear(a) + assert len(a) == 0 + + for el in mitems(deq): destroy(el) + deq.tail = deq.head + +proc shrink*[T](deq: var Deque[T], fromFirst = 0, fromLast = 0) = + ## Removes `fromFirst` elements from the front of the deque and + ## `fromLast` elements from the back. + ## + ## If the supplied number of elements exceeds the total number of elements + ## in the deque, the deque will remain empty. + ## + ## **See also:** + ## * `clear proc <#clear,Deque[T]>`_ + ## * `popFirst proc <#popFirst,Deque[T]>`_ + ## * `popLast proc <#popLast,Deque[T]>`_ + runnableExamples: + var a = [10, 20, 30, 40, 50].toDeque + assert $a == "[10, 20, 30, 40, 50]" + a.shrink(fromFirst = 2, fromLast = 1) + assert $a == "[30, 40]" + + if fromFirst + fromLast > deq.len: + clear(deq) + return + + for i in 0 ..< fromFirst: + destroy(deq.data[deq.head and deq.mask]) + inc deq.head + + for i in 0 ..< fromLast: + destroy(deq.data[(deq.tail - 1) and deq.mask]) + dec deq.tail + +proc `$`*[T](deq: Deque[T]): string = + ## Turns a deque into its string representation. + runnableExamples: + let a = [10, 20, 30].toDeque + assert $a == "[10, 20, 30]" + + result = "[" + for x in deq: + if result.len > 1: result.add(", ") + result.addQuoted(x) + result.add("]") + +func `==`*[T](deq1, deq2: Deque[T]): bool = + ## The `==` operator for Deque. + ## Returns `true` if both deques contains the same values in the same order. + runnableExamples: + var a, b = initDeque[int]() + a.addFirst(2) + a.addFirst(1) + b.addLast(1) + b.addLast(2) + doAssert a == b + + if deq1.len != deq2.len: + return false + + for i in 0 ..< deq1.len: + if deq1.data[(deq1.head + i.uint) and deq1.mask] != deq2.data[(deq2.head + i.uint) and deq2.mask]: + return false + + true + +func hash*[T](deq: Deque[T]): Hash = + ## Hashing of Deque. + var h: Hash = 0 + for x in deq: + h = h !& hash(x) + !$h diff --git a/lib/pure/collections/hashcommon.nim b/lib/pure/collections/hashcommon.nim new file mode 100644 index 000000000..17785c8c7 --- /dev/null +++ b/lib/pure/collections/hashcommon.nim @@ -0,0 +1,76 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2019 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# An `include` file which contains common code for +# hash sets and tables. + +when defined(nimPreviewSlimSystem): + import std/assertions + +import std / outparams + +const + growthFactor = 2 + +# hcode for real keys cannot be zero. hcode==0 signifies an empty slot. These +# two procs retain clarity of that encoding without the space cost of an enum. +proc isEmpty(hcode: Hash): bool {.inline.} = + result = hcode == 0 + +proc isFilled(hcode: Hash): bool {.inline.} = + result = hcode != 0 + +proc nextTry(h, maxHash: Hash): Hash {.inline.} = + result = (h + 1) and maxHash + +proc mustRehash[T](t: T): bool {.inline.} = + # If this is changed, make sure to synchronize it with `slotsNeeded` below + assert(t.dataLen > t.counter) + result = (t.dataLen * 2 < t.counter * 3) or (t.dataLen - t.counter < 4) + +proc slotsNeeded(count: Natural): int {.inline.} = + # Make sure to synchronize with `mustRehash` above + result = nextPowerOfTwo(count * 3 div 2 + 4) + +template rawGetKnownHCImpl() {.dirty.} = + if t.dataLen == 0: + return -1 + var h: Hash = hc and maxHash(t) # start with real hash value + while isFilled(t.data[h].hcode): + # Compare hc THEN key with boolean short circuit. This makes the common case + # zero ==key's for missing (e.g.inserts) and exactly one ==key for present. + # It does slow down succeeding lookups by one extra Hash cmp&and..usually + # just a few clock cycles, generally worth it for any non-integer-like A. + if t.data[h].hcode == hc and t.data[h].key == key: + return h + h = nextTry(h, maxHash(t)) + result = -1 - h # < 0 => MISSING; insert idx = -1 - result + +proc rawGetKnownHC[X, A](t: X, key: A, hc: Hash): int {.inline.} = + rawGetKnownHCImpl() + +template genHashImpl(key, hc: typed) = + hc = hash(key) + if hc == 0: # This almost never taken branch should be very predictable. + when sizeof(int) < 4: + hc = 31415 # Value doesn't matter; Any non-zero favorite is fine <= 16-bit. + else: + hc = 314159265 # Value doesn't matter; Any non-zero favorite is fine. + +template genHash(key: typed): Hash = + var res: Hash + genHashImpl(key, res) + res + +template rawGetImpl() {.dirty.} = + genHashImpl(key, hc) + rawGetKnownHCImpl() + +proc rawGet[X, A](t: X, key: A, hc: var Hash): int {.inline, outParamsAt: [3].} = + rawGetImpl() diff --git a/lib/pure/collections/heapqueue.nim b/lib/pure/collections/heapqueue.nim new file mode 100644 index 000000000..96f9b4430 --- /dev/null +++ b/lib/pure/collections/heapqueue.nim @@ -0,0 +1,266 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2016 Yuriy Glukhov +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. + + +## The `heapqueue` module implements a +## `binary heap data structure<https://en.wikipedia.org/wiki/Binary_heap>`_ +## that can be used as a `priority queue<https://en.wikipedia.org/wiki/Priority_queue>`_. +## They are represented as arrays for which `a[k] <= a[2*k+1]` and `a[k] <= a[2*k+2]` +## for all indices `k` (counting elements from 0). The interesting property of a heap is that +## `a[0]` is always its smallest element. +## +## Basic usage +## ----------- +## +runnableExamples: + var heap = [8, 2].toHeapQueue + heap.push(5) + # the first element is the lowest element + assert heap[0] == 2 + # remove and return the lowest element + assert heap.pop() == 2 + # the lowest element remaining is 5 + assert heap[0] == 5 + +## Usage with custom objects +## ------------------------- +## To use a `HeapQueue` with a custom object, the `<` operator must be +## implemented. + +runnableExamples: + type Job = object + priority: int + + proc `<`(a, b: Job): bool = a.priority < b.priority + + var jobs = initHeapQueue[Job]() + jobs.push(Job(priority: 1)) + jobs.push(Job(priority: 2)) + + assert jobs[0].priority == 1 + + +import std/private/since + +when defined(nimPreviewSlimSystem): + import std/assertions + +type HeapQueue*[T] = object + ## A heap queue, commonly known as a priority queue. + data: seq[T] + +proc initHeapQueue*[T](): HeapQueue[T] = + ## Creates a new empty heap. + ## + ## Heaps are initialized by default, so it is not necessary to call + ## this function explicitly. + ## + ## **See also:** + ## * `toHeapQueue proc <#toHeapQueue,openArray[T]>`_ + result = default(HeapQueue[T]) + +proc len*[T](heap: HeapQueue[T]): int {.inline.} = + ## Returns the number of elements of `heap`. + runnableExamples: + let heap = [9, 5, 8].toHeapQueue + assert heap.len == 3 + + heap.data.len + +proc `[]`*[T](heap: HeapQueue[T], i: Natural): lent T {.inline.} = + ## Accesses the i-th element of `heap`. + heap.data[i] + +iterator items*[T](heap: HeapQueue[T]): lent T {.inline, since: (2, 1, 1).} = + ## Iterates over each item of `heap`. + let L = len(heap) + for i in 0 .. high(heap.data): + yield heap.data[i] + assert(len(heap) == L, "the length of the HeapQueue changed while iterating over it") + +proc heapCmp[T](x, y: T): bool {.inline.} = x < y + +proc siftup[T](heap: var HeapQueue[T], startpos, p: int) = + ## `heap` is a heap at all indices >= `startpos`, except possibly for `p`. `p` + ## is the index of a leaf with a possibly out-of-order value. Restores the + ## heap invariant. + var pos = p + let newitem = heap[pos] + # Follow the path to the root, moving parents down until finding a place + # newitem fits. + while pos > startpos: + let parentpos = (pos - 1) shr 1 + let parent = heap[parentpos] + if heapCmp(newitem, parent): + heap.data[pos] = parent + pos = parentpos + else: + break + heap.data[pos] = newitem + +proc siftdownToBottom[T](heap: var HeapQueue[T], p: int) = + # This is faster when the element should be close to the bottom. + let endpos = len(heap) + var pos = p + let startpos = pos + let newitem = heap[pos] + # Bubble up the smaller child until hitting a leaf. + var childpos = 2 * pos + 1 # leftmost child position + while childpos < endpos: + # Set childpos to index of smaller child. + let rightpos = childpos + 1 + if rightpos < endpos and not heapCmp(heap[childpos], heap[rightpos]): + childpos = rightpos + # Move the smaller child up. + heap.data[pos] = heap[childpos] + pos = childpos + childpos = 2 * pos + 1 + # The leaf at pos is empty now. Put newitem there, and bubble it up + # to its final resting place (by sifting its parents down). + heap.data[pos] = newitem + siftup(heap, startpos, pos) + +proc siftdown[T](heap: var HeapQueue[T], p: int) = + let endpos = len(heap) + var pos = p + let newitem = heap[pos] + var childpos = 2 * pos + 1 + while childpos < endpos: + let rightpos = childpos + 1 + if rightpos < endpos and not heapCmp(heap[childpos], heap[rightpos]): + childpos = rightpos + if not heapCmp(heap[childpos], newitem): + break + heap.data[pos] = heap[childpos] + pos = childpos + childpos = 2 * pos + 1 + heap.data[pos] = newitem + +proc push*[T](heap: var HeapQueue[T], item: sink T) = + ## Pushes `item` onto `heap`, maintaining the heap invariant. + heap.data.add(item) + siftup(heap, 0, len(heap) - 1) + +proc toHeapQueue*[T](x: openArray[T]): HeapQueue[T] {.since: (1, 3).} = + ## Creates a new HeapQueue that contains the elements of `x`. + ## + ## **See also:** + ## * `initHeapQueue proc <#initHeapQueue>`_ + runnableExamples: + var heap = [9, 5, 8].toHeapQueue + assert heap.pop() == 5 + assert heap[0] == 8 + + # see https://en.wikipedia.org/wiki/Binary_heap#Building_a_heap + result.data = @x + for i in countdown(x.len div 2 - 1, 0): + siftdown(result, i) + +proc pop*[T](heap: var HeapQueue[T]): T = + ## Pops and returns the smallest item from `heap`, + ## maintaining the heap invariant. + runnableExamples: + var heap = [9, 5, 8].toHeapQueue + assert heap.pop() == 5 + + let lastelt = heap.data.pop() + if heap.len > 0: + result = heap[0] + heap.data[0] = lastelt + siftdownToBottom(heap, 0) + else: + result = lastelt + +proc find*[T](heap: HeapQueue[T], x: T): int {.since: (1, 3).} = + ## Linear scan to find the index of the item `x` or -1 if not found. + runnableExamples: + let heap = [9, 5, 8].toHeapQueue + assert heap.find(5) == 0 + assert heap.find(9) == 1 + assert heap.find(777) == -1 + + result = -1 + for i in 0 ..< heap.len: + if heap[i] == x: return i + +proc contains*[T](heap: HeapQueue[T], x: T): bool {.since: (2, 1, 1).} = + ## Returns true if `x` is in `heap` or false if not found. This is a shortcut + ## for `find(heap, x) >= 0`. + result = find(heap, x) >= 0 + +proc del*[T](heap: var HeapQueue[T], index: Natural) = + ## Removes the element at `index` from `heap`, maintaining the heap invariant. + runnableExamples: + var heap = [9, 5, 8].toHeapQueue + heap.del(1) + assert heap[0] == 5 + assert heap[1] == 8 + + swap(heap.data[^1], heap.data[index]) + let newLen = heap.len - 1 + heap.data.setLen(newLen) + if index < newLen: + siftdownToBottom(heap, index) + +proc replace*[T](heap: var HeapQueue[T], item: sink T): T = + ## Pops and returns the current smallest value, and add the new item. + ## This is more efficient than `pop()` followed by `push()`, and can be + ## more appropriate when using a fixed-size heap. Note that the value + ## returned may be larger than `item`! That constrains reasonable uses of + ## this routine unless written as part of a conditional replacement. + ## + ## **See also:** + ## * `pushpop proc <#pushpop,HeapQueue[T],sinkT>`_ + runnableExamples: + var heap = [5, 12].toHeapQueue + assert heap.replace(6) == 5 + assert heap.len == 2 + assert heap[0] == 6 + assert heap.replace(4) == 6 + + result = heap[0] + heap.data[0] = item + siftdown(heap, 0) + +proc pushpop*[T](heap: var HeapQueue[T], item: sink T): T = + ## Fast version of a `push()` followed by a `pop()`. + ## + ## **See also:** + ## * `replace proc <#replace,HeapQueue[T],sinkT>`_ + runnableExamples: + var heap = [5, 12].toHeapQueue + assert heap.pushpop(6) == 5 + assert heap.len == 2 + assert heap[0] == 6 + assert heap.pushpop(4) == 4 + + result = item + if heap.len > 0 and heapCmp(heap.data[0], result): + swap(result, heap.data[0]) + siftdown(heap, 0) + +proc clear*[T](heap: var HeapQueue[T]) = + ## Removes all elements from `heap`, making it empty. + runnableExamples: + var heap = [9, 5, 8].toHeapQueue + heap.clear() + assert heap.len == 0 + + heap.data.setLen(0) + +proc `$`*[T](heap: HeapQueue[T]): string = + ## Turns a heap into its string representation. + runnableExamples: + let heap = [1, 2].toHeapQueue + assert $heap == "[1, 2]" + + result = "[" + for x in heap.data: + if result.len > 1: result.add(", ") + result.addQuoted(x) + result.add("]") diff --git a/lib/pure/collections/intsets.nim b/lib/pure/collections/intsets.nim index 7520e6e46..765a23e97 100644 --- a/lib/pure/collections/intsets.nim +++ b/lib/pure/collections/intsets.nim @@ -7,205 +7,17 @@ # distribution, for details about the copyright. # -## The ``intsets`` module implements an efficient int set implemented as a -## `sparse bit set`:idx:. -## **Note**: Since Nim currently does not allow the assignment operator to -## be overloaded, ``=`` for int sets performs some rather meaningless shallow -## copy; use ``assign`` to get a deep copy. +## Specialization of the generic `packedsets module <packedsets.html>`_ +## (see its documentation for more examples) for ordinal sparse sets. -import - os, hashes, math +import std/private/since +import std/packedsets +export packedsets type - BitScalar = int + IntSet* = PackedSet[int] -const - InitIntSetSize = 8 # must be a power of two! - TrunkShift = 9 - BitsPerTrunk = 1 shl TrunkShift # needs to be a power of 2 and - # divisible by 64 - TrunkMask = BitsPerTrunk - 1 - IntsPerTrunk = BitsPerTrunk div (sizeof(BitScalar) * 8) - IntShift = 5 + ord(sizeof(BitScalar) == 8) # 5 or 6, depending on int width - IntMask = 1 shl IntShift - 1 +proc toIntSet*(x: openArray[int]): IntSet {.since: (1, 3), inline.} = toPackedSet[int](x) -type - PTrunk = ref TTrunk - TTrunk {.final.} = object - next: PTrunk # all nodes are connected with this pointer - key: int # start address at bit 0 - bits: array[0..IntsPerTrunk - 1, BitScalar] # a bit vector - - TTrunkSeq = seq[PTrunk] - IntSet* = object ## an efficient set of 'int' implemented as a sparse bit set - counter, max: int - head: PTrunk - data: TTrunkSeq - -{.deprecated: [TIntSet: IntSet].} - -proc mustRehash(length, counter: int): bool {.inline.} = - assert(length > counter) - result = (length * 2 < counter * 3) or (length - counter < 4) - -proc nextTry(h, maxHash: THash): THash {.inline.} = - result = ((5 * h) + 1) and maxHash - -proc intSetGet(t: IntSet, key: int): PTrunk = - var h = key and t.max - while t.data[h] != nil: - if t.data[h].key == key: - return t.data[h] - h = nextTry(h, t.max) - result = nil - -proc intSetRawInsert(t: IntSet, data: var TTrunkSeq, desc: PTrunk) = - var h = desc.key and t.max - while data[h] != nil: - assert(data[h] != desc) - h = nextTry(h, t.max) - assert(data[h] == nil) - data[h] = desc - -proc intSetEnlarge(t: var IntSet) = - var n: TTrunkSeq - var oldMax = t.max - t.max = ((t.max + 1) * 2) - 1 - newSeq(n, t.max + 1) - for i in countup(0, oldMax): - if t.data[i] != nil: intSetRawInsert(t, n, t.data[i]) - swap(t.data, n) - -proc intSetPut(t: var IntSet, key: int): PTrunk = - var h = key and t.max - while t.data[h] != nil: - if t.data[h].key == key: - return t.data[h] - h = nextTry(h, t.max) - if mustRehash(t.max + 1, t.counter): intSetEnlarge(t) - inc(t.counter) - h = key and t.max - while t.data[h] != nil: h = nextTry(h, t.max) - assert(t.data[h] == nil) - new(result) - result.next = t.head - result.key = key - t.head = result - t.data[h] = result - -proc contains*(s: IntSet, key: int): bool = - ## returns true iff `key` is in `s`. - var t = intSetGet(s, `shr`(key, TrunkShift)) - if t != nil: - var u = key and TrunkMask - result = (t.bits[`shr`(u, IntShift)] and `shl`(1, u and IntMask)) != 0 - else: - result = false - -proc incl*(s: var IntSet, key: int) = - ## includes an element `key` in `s`. - var t = intSetPut(s, `shr`(key, TrunkShift)) - var u = key and TrunkMask - t.bits[`shr`(u, IntShift)] = t.bits[`shr`(u, IntShift)] or - `shl`(1, u and IntMask) - -proc excl*(s: var IntSet, key: int) = - ## excludes `key` from the set `s`. - var t = intSetGet(s, `shr`(key, TrunkShift)) - if t != nil: - var u = key and TrunkMask - t.bits[`shr`(u, IntShift)] = t.bits[`shr`(u, IntShift)] and - not `shl`(1, u and IntMask) - -proc containsOrIncl*(s: var IntSet, key: int): bool = - ## returns true if `s` contains `key`, otherwise `key` is included in `s` - ## and false is returned. - var t = intSetGet(s, `shr`(key, TrunkShift)) - if t != nil: - var u = key and TrunkMask - result = (t.bits[`shr`(u, IntShift)] and `shl`(1, u and IntMask)) != 0 - if not result: - t.bits[`shr`(u, IntShift)] = t.bits[`shr`(u, IntShift)] or - `shl`(1, u and IntMask) - else: - incl(s, key) - result = false - -proc initIntSet*: IntSet = - ## creates a new int set that is empty. - newSeq(result.data, InitIntSetSize) - result.max = InitIntSetSize-1 - result.counter = 0 - result.head = nil - -proc assign*(dest: var IntSet, src: IntSet) = - ## copies `src` to `dest`. `dest` does not need to be initialized by - ## `initIntSet`. - dest.counter = src.counter - dest.max = src.max - newSeq(dest.data, src.data.len) - - var it = src.head - while it != nil: - - var h = it.key and dest.max - while dest.data[h] != nil: h = nextTry(h, dest.max) - assert(dest.data[h] == nil) - - var n: PTrunk - new(n) - n.next = dest.head - n.key = it.key - n.bits = it.bits - dest.head = n - dest.data[h] = n - - it = it.next - -iterator items*(s: IntSet): int {.inline.} = - ## iterates over any included element of `s`. - var r = s.head - while r != nil: - var i = 0 - while i <= high(r.bits): - var w = r.bits[i] - # taking a copy of r.bits[i] here is correct, because - # modifying operations are not allowed during traversation - var j = 0 - while w != 0: # test all remaining bits for zero - if (w and 1) != 0: # the bit is set! - yield (r.key shl TrunkShift) or (i shl IntShift +% j) - inc(j) - w = w shr 1 - inc(i) - r = r.next - -template dollarImpl(): stmt = - result = "{" - for key in items(s): - if result.len > 1: result.add(", ") - result.add($key) - result.add("}") - -proc `$`*(s: IntSet): string = - ## The `$` operator for int sets. - dollarImpl() - -proc empty*(s: IntSet): bool {.inline, deprecated.} = - ## returns true if `s` is empty. This is safe to call even before - ## the set has been initialized with `initIntSet`. Note this never - ## worked reliably and so is deprecated. - result = s.counter == 0 - -when isMainModule: - var x = initIntSet() - x.incl(1) - x.incl(2) - x.incl(7) - x.incl(1056) - for e in items(x): echo e - - var y: TIntSet - assign(y, x) - for e in items(y): echo e +proc initIntSet*(): IntSet {.inline.} = initPackedSet[int]() diff --git a/lib/pure/collections/lists.nim b/lib/pure/collections/lists.nim index 535d5e21d..6b88747ef 100644 --- a/lib/pure/collections/lists.nim +++ b/lib/pure/collections/lists.nim @@ -7,246 +7,574 @@ # distribution, for details about the copyright. # -## Implementation of singly and doubly linked lists. Because it makes no sense -## to do so, the 'next' and 'prev' pointers are not hidden from you and can -## be manipulated directly for efficiency. - -when not defined(nimhygiene): - {.pragma: dirty.} +## Implementation of: +## * `singly linked lists <#SinglyLinkedList>`_ +## * `doubly linked lists <#DoublyLinkedList>`_ +## * `singly linked rings <#SinglyLinkedRing>`_ (circular lists) +## * `doubly linked rings <#DoublyLinkedRing>`_ (circular lists) +## +## # Basic Usage +## Because it makes no sense to do otherwise, the `next` and `prev` pointers +## are not hidden from you and can be manipulated directly for efficiency. +## +## ## Lists +runnableExamples: + var list = initDoublyLinkedList[int]() + let + a = newDoublyLinkedNode[int](3) + b = newDoublyLinkedNode[int](7) + c = newDoublyLinkedNode[int](9) + + list.add(a) + list.add(b) + list.prepend(c) + + assert a.next == b + assert a.prev == c + assert c.next == a + assert c.next.next == b + assert c.prev == nil + assert b.next == nil + +## ## Rings +runnableExamples: + var ring = initSinglyLinkedRing[int]() + let + a = newSinglyLinkedNode[int](3) + b = newSinglyLinkedNode[int](7) + c = newSinglyLinkedNode[int](9) + + ring.add(a) + ring.add(b) + ring.prepend(c) + + assert c.next == a + assert a.next == b + assert c.next.next == b + assert b.next == c + assert c.next.next.next == c + +## # See also +## * `deques module <deques.html>`_ for double-ended queues + +import std/private/since + +when defined(nimPreviewSlimSystem): + import std/assertions type - DoublyLinkedNodeObj*[T] = object ## a node a doubly linked list consists of - next*, prev*: ref DoublyLinkedNodeObj[T] + DoublyLinkedNodeObj*[T] = object + ## A node of a doubly linked list. + ## + ## It consists of a `value` field, and pointers to `next` and `prev`. + next*: DoublyLinkedNode[T] + prev* {.cursor.}: DoublyLinkedNode[T] value*: T DoublyLinkedNode*[T] = ref DoublyLinkedNodeObj[T] - SinglyLinkedNodeObj*[T] = object ## a node a singly linked list consists of - next*: ref SinglyLinkedNodeObj[T] + SinglyLinkedNodeObj*[T] = object + ## A node of a singly linked list. + ## + ## It consists of a `value` field, and a pointer to `next`. + next*: SinglyLinkedNode[T] value*: T SinglyLinkedNode*[T] = ref SinglyLinkedNodeObj[T] - SinglyLinkedList*[T] = object ## a singly linked list - head*, tail*: SinglyLinkedNode[T] - - DoublyLinkedList*[T] = object ## a doubly linked list - head*, tail*: DoublyLinkedNode[T] + SinglyLinkedList*[T] = object + ## A singly linked list. + head*: SinglyLinkedNode[T] + tail* {.cursor.}: SinglyLinkedNode[T] + + DoublyLinkedList*[T] = object + ## A doubly linked list. + head*: DoublyLinkedNode[T] + tail* {.cursor.}: DoublyLinkedNode[T] + + SinglyLinkedRing*[T] = object + ## A singly linked ring. + head*: SinglyLinkedNode[T] + tail* {.cursor.}: SinglyLinkedNode[T] - SinglyLinkedRing*[T] = object ## a singly linked ring - head*, tail*: SinglyLinkedNode[T] - - DoublyLinkedRing*[T] = object ## a doubly linked ring + DoublyLinkedRing*[T] = object + ## A doubly linked ring. head*: DoublyLinkedNode[T] -{.deprecated: [TDoublyLinkedNode: DoublyLinkedNodeObj, - PDoublyLinkedNode: DoublyLinkedNode, - TSinglyLinkedNode: SinglyLinkedNodeObj, - PSinglyLinkedNode: SinglyLinkedNode, - TDoublyLinkedList: DoublyLinkedList, - TSinglyLinkedRing: SinglyLinkedRing, - TDoublyLinkedRing: DoublyLinkedRing, - TSinglyLinkedList: SinglyLinkedList].} + SomeLinkedList*[T] = SinglyLinkedList[T] | DoublyLinkedList[T] + + SomeLinkedRing*[T] = SinglyLinkedRing[T] | DoublyLinkedRing[T] + + SomeLinkedCollection*[T] = SomeLinkedList[T] | SomeLinkedRing[T] + + SomeLinkedNode*[T] = SinglyLinkedNode[T] | DoublyLinkedNode[T] proc initSinglyLinkedList*[T](): SinglyLinkedList[T] = - ## creates a new singly linked list that is empty. + ## Creates a new singly linked list that is empty. + ## + ## Singly linked lists are initialized by default, so it is not necessary to + ## call this function explicitly. + runnableExamples: + let a = initSinglyLinkedList[int]() + discard proc initDoublyLinkedList*[T](): DoublyLinkedList[T] = - ## creates a new doubly linked list that is empty. + ## Creates a new doubly linked list that is empty. + ## + ## Doubly linked lists are initialized by default, so it is not necessary to + ## call this function explicitly. + runnableExamples: + let a = initDoublyLinkedList[int]() + discard proc initSinglyLinkedRing*[T](): SinglyLinkedRing[T] = - ## creates a new singly linked ring that is empty. + ## Creates a new singly linked ring that is empty. + ## + ## Singly linked rings are initialized by default, so it is not necessary to + ## call this function explicitly. + runnableExamples: + let a = initSinglyLinkedRing[int]() + discard proc initDoublyLinkedRing*[T](): DoublyLinkedRing[T] = - ## creates a new doubly linked ring that is empty. + ## Creates a new doubly linked ring that is empty. + ## + ## Doubly linked rings are initialized by default, so it is not necessary to + ## call this function explicitly. + runnableExamples: + let a = initDoublyLinkedRing[int]() + discard proc newDoublyLinkedNode*[T](value: T): DoublyLinkedNode[T] = - ## creates a new doubly linked node with the given `value`. + ## Creates a new doubly linked node with the given `value`. + runnableExamples: + let n = newDoublyLinkedNode[int](5) + assert n.value == 5 + new(result) result.value = value proc newSinglyLinkedNode*[T](value: T): SinglyLinkedNode[T] = - ## creates a new singly linked node with the given `value`. + ## Creates a new singly linked node with the given `value`. + runnableExamples: + let n = newSinglyLinkedNode[int](5) + assert n.value == 5 + new(result) result.value = value template itemsListImpl() {.dirty.} = - var it = L.head + var it {.cursor.} = L.head while it != nil: yield it.value it = it.next template itemsRingImpl() {.dirty.} = - var it = L.head + var it {.cursor.} = L.head if it != nil: while true: yield it.value it = it.next if it == L.head: break -template nodesListImpl() {.dirty.} = - var it = L.head - while it != nil: - var nxt = it.next - yield it - it = nxt +iterator items*[T](L: SomeLinkedList[T]): T = + ## Yields every value of `L`. + ## + ## **See also:** + ## * `mitems iterator <#mitems.i,SomeLinkedList[T]>`_ + ## * `nodes iterator <#nodes.i,SomeLinkedList[T]>`_ + runnableExamples: + from std/sugar import collect + from std/sequtils import toSeq + let a = collect(initSinglyLinkedList): + for i in 1..3: 10 * i + assert toSeq(items(a)) == toSeq(a) + assert toSeq(a) == @[10, 20, 30] -template nodesRingImpl() {.dirty.} = - var it = L.head - if it != nil: - while true: - var nxt = it.next - yield it - it = nxt - if it == L.head: break - -template findImpl() {.dirty.} = - for x in nodes(L): - if x.value == value: return x - -iterator items*[T](L: DoublyLinkedList[T]): T = - ## yields every value of `L`. - itemsListImpl() - -iterator items*[T](L: SinglyLinkedList[T]): T = - ## yields every value of `L`. itemsListImpl() -iterator items*[T](L: SinglyLinkedRing[T]): T = - ## yields every value of `L`. - itemsRingImpl() +iterator items*[T](L: SomeLinkedRing[T]): T = + ## Yields every value of `L`. + ## + ## **See also:** + ## * `mitems iterator <#mitems.i,SomeLinkedRing[T]>`_ + ## * `nodes iterator <#nodes.i,SomeLinkedRing[T]>`_ + runnableExamples: + from std/sugar import collect + from std/sequtils import toSeq + let a = collect(initSinglyLinkedRing): + for i in 1..3: 10 * i + assert toSeq(items(a)) == toSeq(a) + assert toSeq(a) == @[10, 20, 30] -iterator items*[T](L: DoublyLinkedRing[T]): T = - ## yields every value of `L`. itemsRingImpl() -iterator mitems*[T](L: var DoublyLinkedList[T]): var T = - ## yields every value of `L` so that you can modify it. - itemsListImpl() +iterator mitems*[T](L: var SomeLinkedList[T]): var T = + ## Yields every value of `L` so that you can modify it. + ## + ## **See also:** + ## * `items iterator <#items.i,SomeLinkedList[T]>`_ + ## * `nodes iterator <#nodes.i,SomeLinkedList[T]>`_ + runnableExamples: + var a = initSinglyLinkedList[int]() + for i in 1..5: + a.add(10 * i) + assert $a == "[10, 20, 30, 40, 50]" + for x in mitems(a): + x = 5 * x - 1 + assert $a == "[49, 99, 149, 199, 249]" -iterator mitems*[T](L: var SinglyLinkedList[T]): var T = - ## yields every value of `L` so that you can modify it. itemsListImpl() -iterator mitems*[T](L: var SinglyLinkedRing[T]): var T = - ## yields every value of `L` so that you can modify it. - itemsRingImpl() +iterator mitems*[T](L: var SomeLinkedRing[T]): var T = + ## Yields every value of `L` so that you can modify it. + ## + ## **See also:** + ## * `items iterator <#items.i,SomeLinkedRing[T]>`_ + ## * `nodes iterator <#nodes.i,SomeLinkedRing[T]>`_ + runnableExamples: + var a = initSinglyLinkedRing[int]() + for i in 1..5: + a.add(10 * i) + assert $a == "[10, 20, 30, 40, 50]" + for x in mitems(a): + x = 5 * x - 1 + assert $a == "[49, 99, 149, 199, 249]" -iterator mitems*[T](L: var DoublyLinkedRing[T]): var T = - ## yields every value of `L` so that you can modify it. itemsRingImpl() -iterator nodes*[T](L: SinglyLinkedList[T]): SinglyLinkedNode[T] = - ## iterates over every node of `x`. Removing the current node from the - ## list during traversal is supported. - nodesListImpl() - -iterator nodes*[T](L: DoublyLinkedList[T]): DoublyLinkedNode[T] = - ## iterates over every node of `x`. Removing the current node from the +iterator nodes*[T](L: SomeLinkedList[T]): SomeLinkedNode[T] = + ## Iterates over every node of `x`. Removing the current node from the ## list during traversal is supported. - nodesListImpl() + ## + ## **See also:** + ## * `items iterator <#items.i,SomeLinkedList[T]>`_ + ## * `mitems iterator <#mitems.i,SomeLinkedList[T]>`_ + runnableExamples: + var a = initDoublyLinkedList[int]() + for i in 1..5: + a.add(10 * i) + assert $a == "[10, 20, 30, 40, 50]" + for x in nodes(a): + if x.value == 30: + a.remove(x) + else: + x.value = 5 * x.value - 1 + assert $a == "[49, 99, 199, 249]" + + var it {.cursor.} = L.head + while it != nil: + let nxt = it.next + yield it + it = nxt -iterator nodes*[T](L: SinglyLinkedRing[T]): SinglyLinkedNode[T] = - ## iterates over every node of `x`. Removing the current node from the +iterator nodes*[T](L: SomeLinkedRing[T]): SomeLinkedNode[T] = + ## Iterates over every node of `x`. Removing the current node from the ## list during traversal is supported. - nodesRingImpl() + ## + ## **See also:** + ## * `items iterator <#items.i,SomeLinkedRing[T]>`_ + ## * `mitems iterator <#mitems.i,SomeLinkedRing[T]>`_ + runnableExamples: + var a = initDoublyLinkedRing[int]() + for i in 1..5: + a.add(10 * i) + assert $a == "[10, 20, 30, 40, 50]" + for x in nodes(a): + if x.value == 30: + a.remove(x) + else: + x.value = 5 * x.value - 1 + assert $a == "[49, 99, 199, 249]" + + var it {.cursor.} = L.head + if it != nil: + while true: + let nxt = it.next + yield it + it = nxt + if it == L.head: break -iterator nodes*[T](L: DoublyLinkedRing[T]): DoublyLinkedNode[T] = - ## iterates over every node of `x`. Removing the current node from the - ## list during traversal is supported. - nodesRingImpl() +proc `$`*[T](L: SomeLinkedCollection[T]): string = + ## Turns a list into its string representation for logging and printing. + runnableExamples: + let a = [1, 2, 3, 4].toSinglyLinkedList + assert $a == "[1, 2, 3, 4]" -template dollarImpl() {.dirty.} = result = "[" for x in nodes(L): if result.len > 1: result.add(", ") - result.add($x.value) + result.addQuoted(x.value) result.add("]") -proc `$`*[T](L: SinglyLinkedList[T]): string = - ## turns a list into its string representation. - dollarImpl() - -proc `$`*[T](L: DoublyLinkedList[T]): string = - ## turns a list into its string representation. - dollarImpl() - -proc `$`*[T](L: SinglyLinkedRing[T]): string = - ## turns a list into its string representation. - dollarImpl() - -proc `$`*[T](L: DoublyLinkedRing[T]): string = - ## turns a list into its string representation. - dollarImpl() - -proc find*[T](L: SinglyLinkedList[T], value: T): SinglyLinkedNode[T] = - ## searches in the list for a value. Returns nil if the value does not - ## exist. - findImpl() - -proc find*[T](L: DoublyLinkedList[T], value: T): DoublyLinkedNode[T] = - ## searches in the list for a value. Returns nil if the value does not +proc find*[T](L: SomeLinkedCollection[T], value: T): SomeLinkedNode[T] = + ## Searches in the list for a value. Returns `nil` if the value does not ## exist. - findImpl() + ## + ## **See also:** + ## * `contains proc <#contains,SomeLinkedCollection[T],T>`_ + runnableExamples: + let a = [9, 8].toSinglyLinkedList + assert a.find(9).value == 9 + assert a.find(1) == nil -proc find*[T](L: SinglyLinkedRing[T], value: T): SinglyLinkedNode[T] = - ## searches in the list for a value. Returns nil if the value does not - ## exist. - findImpl() + for x in nodes(L): + if x.value == value: return x -proc find*[T](L: DoublyLinkedRing[T], value: T): DoublyLinkedNode[T] = - ## searches in the list for a value. Returns nil if the value does not - ## exist. - findImpl() +proc contains*[T](L: SomeLinkedCollection[T], value: T): bool {.inline.} = + ## Searches in the list for a value. Returns `false` if the value does not + ## exist, `true` otherwise. This allows the usage of the `in` and `notin` + ## operators. + ## + ## **See also:** + ## * `find proc <#find,SomeLinkedCollection[T],T>`_ + runnableExamples: + let a = [9, 8].toSinglyLinkedList + assert a.contains(9) + assert 8 in a + assert(not a.contains(1)) + assert 2 notin a -proc contains*[T](L: SinglyLinkedList[T], value: T): bool {.inline.} = - ## searches in the list for a value. Returns false if the value does not - ## exist, true otherwise. result = find(L, value) != nil -proc contains*[T](L: DoublyLinkedList[T], value: T): bool {.inline.} = - ## searches in the list for a value. Returns false if the value does not - ## exist, true otherwise. - result = find(L, value) != nil +proc prepend*[T: SomeLinkedList](a: var T, b: T) {.since: (1, 5, 1).} = + ## Prepends a shallow copy of `b` to the beginning of `a`. + ## + ## **See also:** + ## * `prependMoved proc <#prependMoved,T,T>`_ + ## for moving the second list instead of copying + runnableExamples: + from std/sequtils import toSeq + var a = [4, 5].toSinglyLinkedList + let b = [1, 2, 3].toSinglyLinkedList + a.prepend(b) + assert a.toSeq == [1, 2, 3, 4, 5] + assert b.toSeq == [1, 2, 3] + a.prepend(a) + assert a.toSeq == [1, 2, 3, 4, 5, 1, 2, 3, 4, 5] + + var tmp = b.copy + tmp.addMoved(a) + a = tmp + +proc prependMoved*[T: SomeLinkedList](a, b: var T) {.since: (1, 5, 1).} = + ## Moves `b` before the head of `a`. Efficiency: O(1). + ## Note that `b` becomes empty after the operation unless it has the same address as `a`. + ## Self-prepending results in a cycle. + ## + ## **See also:** + ## * `prepend proc <#prepend,T,T>`_ + ## for prepending a copy of a list + runnableExamples: + import std/[sequtils, enumerate, sugar] + var + a = [4, 5].toSinglyLinkedList + b = [1, 2, 3].toSinglyLinkedList + c = [0, 1].toSinglyLinkedList + a.prependMoved(b) + assert a.toSeq == [1, 2, 3, 4, 5] + assert b.toSeq == [] + c.prependMoved(c) + let s = collect: + for i, ci in enumerate(c): + if i == 6: break + ci + assert s == [0, 1, 0, 1, 0, 1] + + b.addMoved(a) + swap a, b + +proc add*[T](L: var SinglyLinkedList[T], n: SinglyLinkedNode[T]) {.inline.} = + ## Appends (adds to the end) a node `n` to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,SinglyLinkedList[T],T>`_ for appending a value + ## * `prepend proc <#prepend,SinglyLinkedList[T],SinglyLinkedNode[T]>`_ + ## for prepending a node + ## * `prepend proc <#prepend,SinglyLinkedList[T],T>`_ for prepending a value + runnableExamples: + var a = initSinglyLinkedList[int]() + let n = newSinglyLinkedNode[int](9) + a.add(n) + assert a.contains(9) -proc contains*[T](L: SinglyLinkedRing[T], value: T): bool {.inline.} = - ## searches in the list for a value. Returns false if the value does not - ## exist, true otherwise. - result = find(L, value) != nil + n.next = nil + if L.tail != nil: + assert(L.tail.next == nil) + L.tail.next = n + L.tail = n + if L.head == nil: L.head = n -proc contains*[T](L: DoublyLinkedRing[T], value: T): bool {.inline.} = - ## searches in the list for a value. Returns false if the value does not - ## exist, true otherwise. - result = find(L, value) != nil +proc add*[T](L: var SinglyLinkedList[T], value: T) {.inline.} = + ## Appends (adds to the end) a value to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,SinglyLinkedList[T],T>`_ for appending a value + ## * `prepend proc <#prepend,SinglyLinkedList[T],SinglyLinkedNode[T]>`_ + ## for prepending a node + ## * `prepend proc <#prepend,SinglyLinkedList[T],T>`_ for prepending a value + runnableExamples: + var a = initSinglyLinkedList[int]() + a.add(9) + a.add(8) + assert a.contains(9) + + add(L, newSinglyLinkedNode(value)) + +proc prepend*[T](L: var SinglyLinkedList[T], + n: SinglyLinkedNode[T]) {.inline.} = + ## Prepends (adds to the beginning) a node to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,SinglyLinkedList[T],SinglyLinkedNode[T]>`_ + ## for appending a node + ## * `add proc <#add,SinglyLinkedList[T],T>`_ for appending a value + ## * `prepend proc <#prepend,SinglyLinkedList[T],T>`_ for prepending a value + runnableExamples: + var a = initSinglyLinkedList[int]() + let n = newSinglyLinkedNode[int](9) + a.prepend(n) + assert a.contains(9) -proc prepend*[T](L: var SinglyLinkedList[T], - n: SinglyLinkedNode[T]) {.inline.} = - ## prepends a node to `L`. Efficiency: O(1). n.next = L.head L.head = n + if L.tail == nil: L.tail = n + +proc prepend*[T](L: var SinglyLinkedList[T], value: T) {.inline.} = + ## Prepends (adds to the beginning) a node to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,SinglyLinkedList[T],SinglyLinkedNode[T]>`_ + ## for appending a node + ## * `add proc <#add,SinglyLinkedList[T],T>`_ for appending a value + ## * `prepend proc <#prepend,SinglyLinkedList[T],SinglyLinkedNode[T]>`_ + ## for prepending a node + runnableExamples: + var a = initSinglyLinkedList[int]() + a.prepend(9) + a.prepend(8) + assert a.contains(9) -proc prepend*[T](L: var SinglyLinkedList[T], value: T) {.inline.} = - ## prepends a node to `L`. Efficiency: O(1). prepend(L, newSinglyLinkedNode(value)) - -proc append*[T](L: var DoublyLinkedList[T], n: DoublyLinkedNode[T]) = - ## appends a node `n` to `L`. Efficiency: O(1). + +func copy*[T](a: SinglyLinkedList[T]): SinglyLinkedList[T] {.since: (1, 5, 1).} = + ## Creates a shallow copy of `a`. + runnableExamples: + from std/sequtils import toSeq + type Foo = ref object + x: int + var + f = Foo(x: 1) + a = [f].toSinglyLinkedList + let b = a.copy + a.add([f].toSinglyLinkedList) + assert a.toSeq == [f, f] + assert b.toSeq == [f] # b isn't modified... + f.x = 42 + assert a.head.value.x == 42 + assert b.head.value.x == 42 # ... but the elements are not deep copied + + let c = [1, 2, 3].toSinglyLinkedList + assert $c == $c.copy + + result = initSinglyLinkedList[T]() + for x in a.items: + result.add(x) + +proc addMoved*[T](a, b: var SinglyLinkedList[T]) {.since: (1, 5, 1).} = + ## Moves `b` to the end of `a`. Efficiency: O(1). + ## Note that `b` becomes empty after the operation unless it has the same address as `a`. + ## Self-adding results in a cycle. + ## + ## **See also:** + ## * `add proc <#add,T,T>`_ for adding a copy of a list + runnableExamples: + import std/[sequtils, enumerate, sugar] + var + a = [1, 2, 3].toSinglyLinkedList + b = [4, 5].toSinglyLinkedList + c = [0, 1].toSinglyLinkedList + a.addMoved(b) + assert a.toSeq == [1, 2, 3, 4, 5] + assert b.toSeq == [] + c.addMoved(c) + let s = collect: + for i, ci in enumerate(c): + if i == 6: break + ci + assert s == [0, 1, 0, 1, 0, 1] + + if b.head != nil: + if a.head == nil: + a.head = b.head + else: + a.tail.next = b.head + a.tail = b.tail + if a.addr != b.addr: + b.head = nil + b.tail = nil + +proc add*[T](L: var DoublyLinkedList[T], n: DoublyLinkedNode[T]) = + ## Appends (adds to the end) a node `n` to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,DoublyLinkedList[T],T>`_ for appending a value + ## * `prepend proc <#prepend,DoublyLinkedList[T],DoublyLinkedNode[T]>`_ + ## for prepending a node + ## * `prepend proc <#prepend,DoublyLinkedList[T],T>`_ for prepending a value + ## * `remove proc <#remove,DoublyLinkedList[T],DoublyLinkedNode[T]>`_ + ## for removing a node + runnableExamples: + var a = initDoublyLinkedList[int]() + let n = newDoublyLinkedNode[int](9) + a.add(n) + assert a.contains(9) + n.next = nil n.prev = L.tail - if L.tail != nil: + if L.tail != nil: assert(L.tail.next == nil) L.tail.next = n L.tail = n if L.head == nil: L.head = n -proc append*[T](L: var DoublyLinkedList[T], value: T) = - ## appends a value to `L`. Efficiency: O(1). - append(L, newDoublyLinkedNode(value)) +proc add*[T](L: var DoublyLinkedList[T], value: T) = + ## Appends (adds to the end) a value to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,DoublyLinkedList[T],DoublyLinkedNode[T]>`_ + ## for appending a node + ## * `prepend proc <#prepend,DoublyLinkedList[T],DoublyLinkedNode[T]>`_ + ## for prepending a node + ## * `prepend proc <#prepend,DoublyLinkedList[T],T>`_ for prepending a value + ## * `remove proc <#remove,DoublyLinkedList[T],DoublyLinkedNode[T]>`_ + ## for removing a node + runnableExamples: + var a = initDoublyLinkedList[int]() + a.add(9) + a.add(8) + assert a.contains(9) + + add(L, newDoublyLinkedNode(value)) + +proc prepend*[T](L: var DoublyLinkedList[T], n: DoublyLinkedNode[T]) = + ## Prepends (adds to the beginning) a node `n` to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,DoublyLinkedList[T],DoublyLinkedNode[T]>`_ + ## for appending a node + ## * `add proc <#add,DoublyLinkedList[T],T>`_ for appending a value + ## * `prepend proc <#prepend,DoublyLinkedList[T],T>`_ for prepending a value + ## * `remove proc <#remove,DoublyLinkedList[T],DoublyLinkedNode[T]>`_ + ## for removing a node + runnableExamples: + var a = initDoublyLinkedList[int]() + let n = newDoublyLinkedNode[int](9) + a.prepend(n) + assert a.contains(9) -proc prepend*[T](L: var DoublyLinkedList[T], n: DoublyLinkedNode[T]) = - ## prepends a node `n` to `L`. Efficiency: O(1). n.prev = nil n.next = L.head if L.head != nil: @@ -255,36 +583,225 @@ proc prepend*[T](L: var DoublyLinkedList[T], n: DoublyLinkedNode[T]) = L.head = n if L.tail == nil: L.tail = n -proc prepend*[T](L: var DoublyLinkedList[T], value: T) = - ## prepends a value to `L`. Efficiency: O(1). +proc prepend*[T](L: var DoublyLinkedList[T], value: T) = + ## Prepends (adds to the beginning) a value to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,DoublyLinkedList[T],DoublyLinkedNode[T]>`_ + ## for appending a node + ## * `add proc <#add,DoublyLinkedList[T],T>`_ for appending a value + ## * `prepend proc <#prepend,DoublyLinkedList[T],DoublyLinkedNode[T]>`_ + ## for prepending a node + ## * `remove proc <#remove,DoublyLinkedList[T],DoublyLinkedNode[T]>`_ + ## for removing a node + runnableExamples: + var a = initDoublyLinkedList[int]() + a.prepend(9) + a.prepend(8) + assert a.contains(9) + prepend(L, newDoublyLinkedNode(value)) - -proc remove*[T](L: var DoublyLinkedList[T], n: DoublyLinkedNode[T]) = - ## removes `n` from `L`. Efficiency: O(1). + +func copy*[T](a: DoublyLinkedList[T]): DoublyLinkedList[T] {.since: (1, 5, 1).} = + ## Creates a shallow copy of `a`. + runnableExamples: + from std/sequtils import toSeq + type Foo = ref object + x: int + var + f = Foo(x: 1) + a = [f].toDoublyLinkedList + let b = a.copy + a.add([f].toDoublyLinkedList) + assert a.toSeq == [f, f] + assert b.toSeq == [f] # b isn't modified... + f.x = 42 + assert a.head.value.x == 42 + assert b.head.value.x == 42 # ... but the elements are not deep copied + + let c = [1, 2, 3].toDoublyLinkedList + assert $c == $c.copy + + result = initDoublyLinkedList[T]() + for x in a.items: + result.add(x) + +proc addMoved*[T](a, b: var DoublyLinkedList[T]) {.since: (1, 5, 1).} = + ## Moves `b` to the end of `a`. Efficiency: O(1). + ## Note that `b` becomes empty after the operation unless it has the same address as `a`. + ## Self-adding results in a cycle. + ## + ## **See also:** + ## * `add proc <#add,T,T>`_ + ## for adding a copy of a list + runnableExamples: + import std/[sequtils, enumerate, sugar] + var + a = [1, 2, 3].toDoublyLinkedList + b = [4, 5].toDoublyLinkedList + c = [0, 1].toDoublyLinkedList + a.addMoved(b) + assert a.toSeq == [1, 2, 3, 4, 5] + assert b.toSeq == [] + c.addMoved(c) + let s = collect: + for i, ci in enumerate(c): + if i == 6: break + ci + assert s == [0, 1, 0, 1, 0, 1] + + if b.head != nil: + if a.head == nil: + a.head = b.head + else: + b.head.prev = a.tail + a.tail.next = b.head + a.tail = b.tail + if a.addr != b.addr: + b.head = nil + b.tail = nil + +proc add*[T: SomeLinkedList](a: var T, b: T) {.since: (1, 5, 1).} = + ## Appends a shallow copy of `b` to the end of `a`. + ## + ## **See also:** + ## * `addMoved proc <#addMoved,SinglyLinkedList[T],SinglyLinkedList[T]>`_ + ## * `addMoved proc <#addMoved,DoublyLinkedList[T],DoublyLinkedList[T]>`_ + ## for moving the second list instead of copying + runnableExamples: + from std/sequtils import toSeq + var a = [1, 2, 3].toSinglyLinkedList + let b = [4, 5].toSinglyLinkedList + a.add(b) + assert a.toSeq == [1, 2, 3, 4, 5] + assert b.toSeq == [4, 5] + a.add(a) + assert a.toSeq == [1, 2, 3, 4, 5, 1, 2, 3, 4, 5] + + var tmp = b.copy + a.addMoved(tmp) + +proc remove*[T](L: var SinglyLinkedList[T], n: SinglyLinkedNode[T]): bool {.discardable.} = + ## Removes a node `n` from `L`. + ## Returns `true` if `n` was found in `L`. + ## Efficiency: O(n); the list is traversed until `n` is found. + ## Attempting to remove an element not contained in the list is a no-op. + ## When the list is cyclic, the cycle is preserved after removal. + runnableExamples: + import std/[sequtils, enumerate, sugar] + var a = [0, 1, 2].toSinglyLinkedList + let n = a.head.next + assert n.value == 1 + assert a.remove(n) == true + assert a.toSeq == [0, 2] + assert a.remove(n) == false + assert a.toSeq == [0, 2] + a.addMoved(a) # cycle: [0, 2, 0, 2, ...] + a.remove(a.head) + let s = collect: + for i, ai in enumerate(a): + if i == 4: break + ai + assert s == [2, 2, 2, 2] + + if n == L.head: + L.head = n.next + if L.tail.next == n: + L.tail.next = L.head # restore cycle + else: + var prev {.cursor.} = L.head + while prev.next != n and prev.next != nil: + prev = prev.next + if prev.next == nil: + return false + prev.next = n.next + if L.tail == n: + L.tail = prev # update tail if we removed the last node + true + +proc remove*[T](L: var DoublyLinkedList[T], n: DoublyLinkedNode[T]) = + ## Removes a node `n` from `L`. Efficiency: O(1). + ## This function assumes, for the sake of efficiency, that `n` is contained in `L`, + ## otherwise the effects are undefined. + ## When the list is cyclic, the cycle is preserved after removal. + runnableExamples: + import std/[sequtils, enumerate, sugar] + var a = [0, 1, 2].toSinglyLinkedList + let n = a.head.next + assert n.value == 1 + a.remove(n) + assert a.toSeq == [0, 2] + a.remove(n) + assert a.toSeq == [0, 2] + a.addMoved(a) # cycle: [0, 2, 0, 2, ...] + a.remove(a.head) + let s = collect: + for i, ai in enumerate(a): + if i == 4: break + ai + assert s == [2, 2, 2, 2] + if n == L.tail: L.tail = n.prev if n == L.head: L.head = n.next if n.next != nil: n.next.prev = n.prev if n.prev != nil: n.prev.next = n.next -proc append*[T](L: var SinglyLinkedRing[T], n: SinglyLinkedNode[T]) = - ## appends a node `n` to `L`. Efficiency: O(1). + +proc add*[T](L: var SinglyLinkedRing[T], n: SinglyLinkedNode[T]) = + ## Appends (adds to the end) a node `n` to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,SinglyLinkedRing[T],T>`_ for appending a value + ## * `prepend proc <#prepend,SinglyLinkedRing[T],SinglyLinkedNode[T]>`_ + ## for prepending a node + ## * `prepend proc <#prepend,SinglyLinkedRing[T],T>`_ for prepending a value + runnableExamples: + var a = initSinglyLinkedRing[int]() + let n = newSinglyLinkedNode[int](9) + a.add(n) + assert a.contains(9) + if L.head != nil: n.next = L.head assert(L.tail != nil) L.tail.next = n - L.tail = n else: n.next = n L.head = n - L.tail = n + L.tail = n -proc append*[T](L: var SinglyLinkedRing[T], value: T) = - ## appends a value to `L`. Efficiency: O(1). - append(L, newSinglyLinkedNode(value)) +proc add*[T](L: var SinglyLinkedRing[T], value: T) = + ## Appends (adds to the end) a value to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,SinglyLinkedRing[T],SinglyLinkedNode[T]>`_ + ## for appending a node + ## * `prepend proc <#prepend,SinglyLinkedRing[T],SinglyLinkedNode[T]>`_ + ## for prepending a node + ## * `prepend proc <#prepend,SinglyLinkedRing[T],T>`_ for prepending a value + runnableExamples: + var a = initSinglyLinkedRing[int]() + a.add(9) + a.add(8) + assert a.contains(9) + + add(L, newSinglyLinkedNode(value)) + +proc prepend*[T](L: var SinglyLinkedRing[T], n: SinglyLinkedNode[T]) = + ## Prepends (adds to the beginning) a node `n` to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,SinglyLinkedRing[T],SinglyLinkedNode[T]>`_ + ## for appending a node + ## * `add proc <#add,SinglyLinkedRing[T],T>`_ for appending a value + ## * `prepend proc <#prepend,SinglyLinkedRing[T],T>`_ for prepending a value + runnableExamples: + var a = initSinglyLinkedRing[int]() + let n = newSinglyLinkedNode[int](9) + a.prepend(n) + assert a.contains(9) -proc prepend*[T](L: var SinglyLinkedRing[T], n: SinglyLinkedNode[T]) = - ## prepends a node `n` to `L`. Efficiency: O(1). if L.head != nil: n.next = L.head assert(L.tail != nil) @@ -294,12 +811,41 @@ proc prepend*[T](L: var SinglyLinkedRing[T], n: SinglyLinkedNode[T]) = L.tail = n L.head = n -proc prepend*[T](L: var SinglyLinkedRing[T], value: T) = - ## prepends a value to `L`. Efficiency: O(1). +proc prepend*[T](L: var SinglyLinkedRing[T], value: T) = + ## Prepends (adds to the beginning) a value to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,SinglyLinkedRing[T],SinglyLinkedNode[T]>`_ + ## for appending a node + ## * `add proc <#add,SinglyLinkedRing[T],T>`_ for appending a value + ## * `prepend proc <#prepend,SinglyLinkedRing[T],SinglyLinkedNode[T]>`_ + ## for prepending a node + runnableExamples: + var a = initSinglyLinkedRing[int]() + a.prepend(9) + a.prepend(8) + assert a.contains(9) + prepend(L, newSinglyLinkedNode(value)) -proc append*[T](L: var DoublyLinkedRing[T], n: DoublyLinkedNode[T]) = - ## appends a node `n` to `L`. Efficiency: O(1). + + +proc add*[T](L: var DoublyLinkedRing[T], n: DoublyLinkedNode[T]) = + ## Appends (adds to the end) a node `n` to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,DoublyLinkedRing[T],T>`_ for appending a value + ## * `prepend proc <#prepend,DoublyLinkedRing[T],DoublyLinkedNode[T]>`_ + ## for prepending a node + ## * `prepend proc <#prepend,DoublyLinkedRing[T],T>`_ for prepending a value + ## * `remove proc <#remove,DoublyLinkedRing[T],DoublyLinkedNode[T]>`_ + ## for removing a node + runnableExamples: + var a = initDoublyLinkedRing[int]() + let n = newDoublyLinkedNode[int](9) + a.add(n) + assert a.contains(9) + if L.head != nil: n.next = L.head n.prev = L.head.prev @@ -310,13 +856,42 @@ proc append*[T](L: var DoublyLinkedRing[T], n: DoublyLinkedNode[T]) = n.next = n L.head = n -proc append*[T](L: var DoublyLinkedRing[T], value: T) = - ## appends a value to `L`. Efficiency: O(1). - append(L, newDoublyLinkedNode(value)) +proc add*[T](L: var DoublyLinkedRing[T], value: T) = + ## Appends (adds to the end) a value to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,DoublyLinkedRing[T],DoublyLinkedNode[T]>`_ + ## for appending a node + ## * `prepend proc <#prepend,DoublyLinkedRing[T],DoublyLinkedNode[T]>`_ + ## for prepending a node + ## * `prepend proc <#prepend,DoublyLinkedRing[T],T>`_ for prepending a value + ## * `remove proc <#remove,DoublyLinkedRing[T],DoublyLinkedNode[T]>`_ + ## for removing a node + runnableExamples: + var a = initDoublyLinkedRing[int]() + a.add(9) + a.add(8) + assert a.contains(9) + + add(L, newDoublyLinkedNode(value)) + +proc prepend*[T](L: var DoublyLinkedRing[T], n: DoublyLinkedNode[T]) = + ## Prepends (adds to the beginning) a node `n` to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,DoublyLinkedRing[T],DoublyLinkedNode[T]>`_ + ## for appending a node + ## * `add proc <#add,DoublyLinkedRing[T],T>`_ for appending a value + ## * `prepend proc <#prepend,DoublyLinkedRing[T],T>`_ for prepending a value + ## * `remove proc <#remove,DoublyLinkedRing[T],DoublyLinkedNode[T]>`_ + ## for removing a node + runnableExamples: + var a = initDoublyLinkedRing[int]() + let n = newDoublyLinkedNode[int](9) + a.prepend(n) + assert a.contains(9) -proc prepend*[T](L: var DoublyLinkedRing[T], n: DoublyLinkedNode[T]) = - ## prepends a node `n` to `L`. Efficiency: O(1). - if L.head != nil: + if L.head != nil: n.next = L.head n.prev = L.head.prev L.head.prev.next = n @@ -326,18 +901,115 @@ proc prepend*[T](L: var DoublyLinkedRing[T], n: DoublyLinkedNode[T]) = n.next = n L.head = n -proc prepend*[T](L: var DoublyLinkedRing[T], value: T) = - ## prepends a value to `L`. Efficiency: O(1). +proc prepend*[T](L: var DoublyLinkedRing[T], value: T) = + ## Prepends (adds to the beginning) a value to `L`. Efficiency: O(1). + ## + ## **See also:** + ## * `add proc <#add,DoublyLinkedRing[T],DoublyLinkedNode[T]>`_ + ## for appending a node + ## * `add proc <#add,DoublyLinkedRing[T],T>`_ for appending a value + ## * `prepend proc <#prepend,DoublyLinkedRing[T],DoublyLinkedNode[T]>`_ + ## for prepending a node + ## * `remove proc <#remove,DoublyLinkedRing[T],DoublyLinkedNode[T]>`_ + ## for removing a node + runnableExamples: + var a = initDoublyLinkedRing[int]() + a.prepend(9) + a.prepend(8) + assert a.contains(9) + prepend(L, newDoublyLinkedNode(value)) - -proc remove*[T](L: var DoublyLinkedRing[T], n: DoublyLinkedNode[T]) = - ## removes `n` from `L`. Efficiency: O(1). + +proc remove*[T](L: var DoublyLinkedRing[T], n: DoublyLinkedNode[T]) = + ## Removes `n` from `L`. Efficiency: O(1). + ## This function assumes, for the sake of efficiency, that `n` is contained in `L`, + ## otherwise the effects are undefined. + runnableExamples: + var a = initDoublyLinkedRing[int]() + let n = newDoublyLinkedNode[int](5) + a.add(n) + assert 5 in a + a.remove(n) + assert 5 notin a + n.next.prev = n.prev n.prev.next = n.next - if n == L.head: - var p = L.head.prev - if p == L.head: + if n == L.head: + let p = L.head.prev + if p == L.head: # only one element left: L.head = nil else: - L.head = L.head.prev + L.head = p + +proc append*[T](a: var (SinglyLinkedList[T] | SinglyLinkedRing[T]), + b: SinglyLinkedList[T] | SinglyLinkedNode[T] | T) = + ## Alias for `a.add(b)`. + ## + ## **See also:** + ## * `add proc <#add,SinglyLinkedList[T],SinglyLinkedNode[T]>`_ + ## * `add proc <#add,SinglyLinkedList[T],T>`_ + ## * `add proc <#add,T,T>`_ + a.add(b) + +proc append*[T](a: var (DoublyLinkedList[T] | DoublyLinkedRing[T]), + b: DoublyLinkedList[T] | DoublyLinkedNode[T] | T) = + ## Alias for `a.add(b)`. + ## + ## **See also:** + ## * `add proc <#add,DoublyLinkedList[T],DoublyLinkedNode[T]>`_ + ## * `add proc <#add,DoublyLinkedList[T],T>`_ + ## * `add proc <#add,T,T>`_ + a.add(b) + +proc appendMoved*[T: SomeLinkedList](a, b: var T) {.since: (1, 5, 1).} = + ## Alias for `a.addMoved(b)`. + ## + ## **See also:** + ## * `addMoved proc <#addMoved,SinglyLinkedList[T],SinglyLinkedList[T]>`_ + ## * `addMoved proc <#addMoved,DoublyLinkedList[T],DoublyLinkedList[T]>`_ + a.addMoved(b) + +func toSinglyLinkedList*[T](elems: openArray[T]): SinglyLinkedList[T] {.since: (1, 5, 1).} = + ## Creates a new `SinglyLinkedList` from the members of `elems`. + runnableExamples: + from std/sequtils import toSeq + let a = [1, 2, 3, 4, 5].toSinglyLinkedList + assert a.toSeq == [1, 2, 3, 4, 5] + + result = initSinglyLinkedList[T]() + for elem in elems.items: + result.add(elem) + +func toSinglyLinkedRing*[T](elems: openArray[T]): SinglyLinkedRing[T] = + ## Creates a new `SinglyLinkedRing` from the members of `elems`. + runnableExamples: + from std/sequtils import toSeq + let a = [1, 2, 3, 4, 5].toSinglyLinkedRing + assert a.toSeq == [1, 2, 3, 4, 5] + + result = initSinglyLinkedRing[T]() + for elem in elems.items: + result.add(elem) + +func toDoublyLinkedList*[T](elems: openArray[T]): DoublyLinkedList[T] {.since: (1, 5, 1).} = + ## Creates a new `DoublyLinkedList` from the members of `elems`. + runnableExamples: + from std/sequtils import toSeq + let a = [1, 2, 3, 4, 5].toDoublyLinkedList + assert a.toSeq == [1, 2, 3, 4, 5] + + result = initDoublyLinkedList[T]() + for elem in elems.items: + result.add(elem) + +func toDoublyLinkedRing*[T](elems: openArray[T]): DoublyLinkedRing[T] = + ## Creates a new `DoublyLinkedRing` from the members of `elems`. + runnableExamples: + from std/sequtils import toSeq + let a = [1, 2, 3, 4, 5].toDoublyLinkedRing + assert a.toSeq == [1, 2, 3, 4, 5] + + result = initDoublyLinkedRing[T]() + for elem in elems.items: + result.add(elem) diff --git a/lib/pure/collections/queues.nim b/lib/pure/collections/queues.nim deleted file mode 100644 index af5e7b6cd..000000000 --- a/lib/pure/collections/queues.nim +++ /dev/null @@ -1,102 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2012 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## Implementation of a `queue`:idx:. The underlying implementation uses a ``seq``. -## Note: For inter thread communication use -## a `TChannel <channels.html>`_ instead. - -import math - -type - Queue*[T] = object ## a queue - data: seq[T] - rd, wr, count, mask: int - -{.deprecated: [TQueue: Queue].} - -proc initQueue*[T](initialSize=4): Queue[T] = - ## creates a new queue. `initialSize` needs to be a power of 2. - assert isPowerOfTwo(initialSize) - result.mask = initialSize-1 - newSeq(result.data, initialSize) - -proc len*[T](q: Queue[T]): int = - ## returns the number of elements of `q`. - result = q.count - -iterator items*[T](q: Queue[T]): T = - ## yields every element of `q`. - var i = q.rd - var c = q.count - while c > 0: - dec c - yield q.data[i] - i = (i + 1) and q.mask - -iterator mitems*[T](q: var Queue[T]): var T = - ## yields every element of `q`. - var i = q.rd - var c = q.count - while c > 0: - dec c - yield q.data[i] - i = (i + 1) and q.mask - -proc add*[T](q: var Queue[T], item: T) = - ## adds an `item` to the end of the queue `q`. - var cap = q.mask+1 - if q.count >= cap: - var n: seq[T] - newSeq(n, cap*2) - var i = 0 - for x in items(q): - shallowCopy(n[i], x) - inc i - shallowCopy(q.data, n) - q.mask = cap*2 - 1 - q.wr = q.count - q.rd = 0 - inc q.count - q.data[q.wr] = item - q.wr = (q.wr + 1) and q.mask - -proc enqueue*[T](q: var Queue[T], item: T) = - ## alias for the ``add`` operation. - add(q, item) - -proc dequeue*[T](q: var Queue[T]): T = - ## removes and returns the first element of the queue `q`. - assert q.count > 0 - dec q.count - result = q.data[q.rd] - q.rd = (q.rd + 1) and q.mask - -proc `$`*[T](q: Queue[T]): string = - ## turns a queue into its string representation. - result = "[" - for x in items(q): - if result.len > 1: result.add(", ") - result.add($x) - result.add("]") - -when isMainModule: - var q = initQueue[int]() - q.add(123) - q.add(9) - q.add(4) - var first = q.dequeue - q.add(56) - q.add(6) - var second = q.dequeue - q.add(789) - - assert first == 123 - assert second == 9 - assert($q == "[4, 56, 6, 789]") - diff --git a/lib/pure/collections/rtarrays.nim b/lib/pure/collections/rtarrays.nim index 9d8085643..3c3ffda7c 100644 --- a/lib/pure/collections/rtarrays.nim +++ b/lib/pure/collections/rtarrays.nim @@ -10,18 +10,19 @@ ## Module that implements a fixed length array whose size ## is determined at runtime. Note: This is not ready for other people to use! +## +## Unstable API. const ArrayPartSize = 10 type - RtArray*[T] = object ## + RtArray*[T] = object ## L: Natural spart: seq[T] - apart: array [ArrayPartSize, T] - UncheckedArray* {.unchecked.}[T] = array[0..100_000_000, T] + apart: array[ArrayPartSize, T] -template usesSeqPart(x): expr = x.L > ArrayPartSize +template usesSeqPart(x): untyped = x.L > ArrayPartSize proc initRtArray*[T](len: Natural): RtArray[T] = result.L = len diff --git a/lib/pure/collections/sequtils.nim b/lib/pure/collections/sequtils.nim index 3f37d1ef0..3c0d8dc0e 100644 --- a/lib/pure/collections/sequtils.nim +++ b/lib/pure/collections/sequtils.nim @@ -1,43 +1,137 @@ # # # Nim's Runtime Library -# (c) Copyright 2011 Alex Mitchell +# (c) Copyright 2011 Alexander Mitchell-Robinson # # See the file "copying.txt", included in this # distribution, for details about the copyright. # -## :Author: Alex Mitchell +## Although this module has `seq` in its name, it implements operations +## not only for the `seq`:idx: type, but for three built-in container types +## under the `openArray` umbrella: +## * sequences +## * strings +## * array ## -## This module implements operations for the built-in `seq`:idx: type which -## were inspired by functional programming languages. If you are looking for -## the typical `map` function which applies a function to every element in a -## sequence, it already exists in the `system <system.html>`_ module in both -## mutable and immutable styles. +## The `system` module defines several common functions, such as: +## * `newSeq[T]` for creating new sequences of type `T` +## * `@` for converting arrays and strings to sequences +## * `add` for adding new elements to strings and sequences +## * `&` for string and seq concatenation +## * `in` (alias for `contains`) and `notin` for checking if an item is +## in a container ## -## Also, for functional style programming you may want to pass `anonymous procs -## <manual.html#anonymous-procs>`_ to procs like ``filter`` to reduce typing. -## Anonymous procs can use `the special do notation <manual.html#do-notation>`_ -## which is more convenient in certain situations. +## This module builds upon that, providing additional functionality in form of +## procs, iterators and templates inspired by functional programming +## languages. ## -## **Note**: This interface will change as soon as the compiler supports -## closures and proper coroutines. +## For functional style programming you have different options at your disposal: +## * the `sugar.collect macro<sugar.html#collect.m%2Cuntyped%2Cuntyped>`_ +## * pass an `anonymous proc<manual.html#procedures-anonymous-procs>`_ +## * import the `sugar module<sugar.html>`_ and use +## the `=> macro<sugar.html#%3D>.m,untyped,untyped>`_ +## * use `...It templates<#18>`_ +## (`mapIt<#mapIt.t,typed,untyped>`_, +## `filterIt<#filterIt.t,untyped,untyped>`_, etc.) +## +## Chaining of functions is possible thanks to the +## `method call syntax<manual.html#procedures-method-call-syntax>`_. + +runnableExamples: + import std/sugar + + # Creating a sequence from 1 to 10, multiplying each member by 2, + # keeping only the members which are not divisible by 6. + let + foo = toSeq(1..10).map(x => x * 2).filter(x => x mod 6 != 0) + bar = toSeq(1..10).mapIt(it * 2).filterIt(it mod 6 != 0) + baz = collect: + for i in 1..10: + let j = 2 * i + if j mod 6 != 0: + j + + doAssert foo == bar + doAssert foo == baz + doAssert foo == @[2, 4, 8, 10, 14, 16, 20] + + doAssert foo.any(x => x > 17) + doAssert not bar.allIt(it < 20) + doAssert foo.foldl(a + b) == 74 # sum of all members + + +runnableExamples: + from std/strutils import join + + let + vowels = @"aeiou" + foo = "sequtils is an awesome module" + + doAssert (vowels is seq[char]) and (vowels == @['a', 'e', 'i', 'o', 'u']) + doAssert foo.filterIt(it notin vowels).join == "sqtls s n wsm mdl" + +## See also +## ======== +## * `strutils module<strutils.html>`_ for common string functions +## * `sugar module<sugar.html>`_ for syntactic sugar macros +## * `algorithm module<algorithm.html>`_ for common generic algorithms +## * `json module<json.html>`_ for a structure which allows +## heterogeneous members + + +import std/private/since + +import std/macros +from std/typetraits import supportsCopyMem + +when defined(nimPreviewSlimSystem): + import std/assertions + + +when defined(nimHasEffectsOf): + {.experimental: "strictEffects".} +else: + {.pragma: effectsOf.} + +macro evalOnceAs(expAlias, exp: untyped, + letAssigneable: static[bool]): untyped = + ## Injects `expAlias` in caller scope, to avoid bugs involving multiple + ## substitution in macro arguments such as + ## https://github.com/nim-lang/Nim/issues/7187. + ## `evalOnceAs(myAlias, myExp)` will behave as `let myAlias = myExp` + ## except when `letAssigneable` is false (e.g. to handle openArray) where + ## it just forwards `exp` unchanged. + expectKind(expAlias, nnkIdent) + var val = exp -when not defined(nimhygiene): - {.pragma: dirty.} + result = newStmtList() + # If `exp` is not a symbol we evaluate it once here and then use the temporary + # symbol as alias + if exp.kind != nnkSym and letAssigneable: + val = genSym() + result.add(newLetStmt(val, exp)) -proc concat*[T](seqs: varargs[seq[T]]): seq[T] = + result.add( + newProc(name = genSym(nskTemplate, $expAlias), params = [getType(untyped)], + body = val, procType = nnkTemplateDef)) + +func concat*[T](seqs: varargs[seq[T]]): seq[T] = ## Takes several sequences' items and returns them inside a new sequence. + ## All sequences must be of the same type. ## - ## Example: + ## **See also:** + ## * `distribute func<#distribute,seq[T],Positive>`_ for a reverse + ## operation ## - ## .. code-block:: - ## let - ## s1 = @[1, 2, 3] - ## s2 = @[4, 5] - ## s3 = @[6, 7] - ## total = concat(s1, s2, s3) - ## assert total == @[1, 2, 3, 4, 5, 6, 7] + runnableExamples: + let + s1 = @[1, 2, 3] + s2 = @[4, 5] + s3 = @[6, 7] + total = concat(s1, s2, s3) + assert total == @[1, 2, 3, 4, 5, 6, 7] + var L = 0 for seqitm in items(seqs): inc(L, len(seqitm)) newSeq(result, L) @@ -47,96 +141,226 @@ proc concat*[T](seqs: varargs[seq[T]]): seq[T] = result[i] = itm inc(i) -proc repeat*[T](s: seq[T], n: Natural): seq[T] = - ## Returns a new sequence with the items of `s` repeated `n` times. - ## - ## Example: +func addUnique*[T](s: var seq[T], x: sink T) = + ## Adds `x` to the container `s` if it is not already present. + ## Uses `==` to check if the item is already present. + runnableExamples: + var a = @[1, 2, 3] + a.addUnique(4) + a.addUnique(4) + assert a == @[1, 2, 3, 4] + + for i in 0..high(s): + if s[i] == x: return + when declared(ensureMove): + s.add ensureMove(x) + else: + s.add x + +func count*[T](s: openArray[T], x: T): int = + ## Returns the number of occurrences of the item `x` in the container `s`. ## - ## .. code-block: + runnableExamples: + let + a = @[1, 2, 2, 3, 2, 4, 2] + b = "abracadabra" + assert count(a, 2) == 4 + assert count(a, 99) == 0 + assert count(b, 'r') == 2 + + for itm in items(s): + if itm == x: + inc result + +func cycle*[T](s: openArray[T], n: Natural): seq[T] = + ## Returns a new sequence with the items of the container `s` repeated + ## `n` times. + ## `n` must be a non-negative number (zero or more). ## - ## let - ## s = @[1, 2, 3] - ## total = s.repeat(3) - ## assert total == @[1, 2, 3, 1, 2, 3, 1, 2, 3] + runnableExamples: + let + s = @[1, 2, 3] + total = s.cycle(3) + assert total == @[1, 2, 3, 1, 2, 3, 1, 2, 3] + result = newSeq[T](n * s.len) var o = 0 - for x in 1..n: + for x in 0 ..< n: for e in s: result[o] = e inc o -proc deduplicate*[T](seq1: seq[T]): seq[T] = +proc repeat*[T](x: T, n: Natural): seq[T] = + ## Returns a new sequence with the item `x` repeated `n` times. + ## `n` must be a non-negative number (zero or more). + ## + runnableExamples: + let + total = repeat(5, 3) + assert total == @[5, 5, 5] + + result = newSeq[T](n) + for i in 0 ..< n: + result[i] = x + +func deduplicate*[T](s: openArray[T], isSorted: bool = false): seq[T] = ## Returns a new sequence without duplicates. ## - ## .. code-block:: - ## let - ## dup1 = @[1, 1, 3, 4, 2, 2, 8, 1, 4] - ## dup2 = @["a", "a", "c", "d", "d"] - ## unique1 = deduplicate(dup1) - ## unique2 = deduplicate(dup2) - ## assert unique1 == @[1, 3, 4, 2, 8] - ## assert unique2 == @["a", "c", "d"] + ## Setting the optional argument `isSorted` to true (default: false) + ## uses a faster algorithm for deduplication. + ## + runnableExamples: + let + dup1 = @[1, 1, 3, 4, 2, 2, 8, 1, 4] + dup2 = @["a", "a", "c", "d", "d"] + unique1 = deduplicate(dup1) + unique2 = deduplicate(dup2, isSorted = true) + assert unique1 == @[1, 3, 4, 2, 8] + assert unique2 == @["a", "c", "d"] + result = @[] - for itm in items(seq1): - if not result.contains(itm): result.add(itm) - -{.deprecated: [distnct: deduplicate].} - -proc zip*[S, T](seq1: seq[S], seq2: seq[T]): seq[tuple[a: S, b: T]] = - ## Returns a new sequence with a combination of the two input sequences. - ## - ## For convenience you can access the returned tuples through the named - ## fields `a` and `b`. If one sequence is shorter, the remaining items in the - ## longer sequence are discarded. Example: - ## - ## .. code-block:: - ## let - ## short = @[1, 2, 3] - ## long = @[6, 5, 4, 3, 2, 1] - ## words = @["one", "two", "three"] - ## zip1 = zip(short, long) - ## zip2 = zip(short, words) - ## assert zip1 == @[(1, 6), (2, 5), (3, 4)] - ## assert zip2 == @[(1, "one"), (2, "two"), (3, "three")] - ## assert zip1[2].b == 4 - ## assert zip2[2].b == "three" - var m = min(seq1.len, seq2.len) - newSeq(result, m) - for i in 0 .. m-1: result[i] = (seq1[i], seq2[i]) - -proc distribute*[T](s: seq[T], num: Positive, spread = true): seq[seq[T]] = - ## Splits and distributes a sequence `s` into `num` sub sequences. - ## - ## Returns a sequence of `num` sequences. For some input values this is the - ## inverse of the `concat <#concat>`_ proc. The proc will assert in debug - ## builds if `s` is nil or `num` is less than one, and will likely crash on - ## release builds. The input sequence `s` can be empty, which will produce + if s.len > 0: + if isSorted: + var prev = s[0] + result.add(prev) + for i in 1..s.high: + if s[i] != prev: + prev = s[i] + result.add(prev) + else: + for itm in items(s): + if not result.contains(itm): result.add(itm) + +func minIndex*[T](s: openArray[T]): int {.since: (1, 1).} = + ## Returns the index of the minimum value of `s`. + ## `T` needs to have a `<` operator. + runnableExamples: + let + a = @[1, 2, 3, 4] + b = @[6, 5, 4, 3] + c = [2, -7, 8, -5] + d = "ziggy" + assert minIndex(a) == 0 + assert minIndex(b) == 3 + assert minIndex(c) == 1 + assert minIndex(d) == 2 + + for i in 1..high(s): + if s[i] < s[result]: result = i + +func maxIndex*[T](s: openArray[T]): int {.since: (1, 1).} = + ## Returns the index of the maximum value of `s`. + ## `T` needs to have a `<` operator. + runnableExamples: + let + a = @[1, 2, 3, 4] + b = @[6, 5, 4, 3] + c = [2, -7, 8, -5] + d = "ziggy" + assert maxIndex(a) == 3 + assert maxIndex(b) == 0 + assert maxIndex(c) == 2 + assert maxIndex(d) == 0 + + for i in 1..high(s): + if s[i] > s[result]: result = i + +func minmax*[T](x: openArray[T]): (T, T) = + ## The minimum and maximum values of `x`. `T` needs to have a `<` operator. + var l = x[0] + var h = x[0] + for i in 1..high(x): + if x[i] < l: l = x[i] + if h < x[i]: h = x[i] + result = (l, h) + + +template zipImpl(s1, s2, retType: untyped): untyped = + proc zip*[S, T](s1: openArray[S], s2: openArray[T]): retType = + ## Returns a new sequence with a combination of the two input containers. + ## + ## The input containers can be of different types. + ## If one container is shorter, the remaining items in the longer container + ## are discarded. + ## + ## **Note**: For Nim 1.0.x and older version, `zip` returned a seq of + ## named tuples with fields `a` and `b`. For Nim versions 1.1.x and newer, + ## `zip` returns a seq of unnamed tuples. + runnableExamples: + let + short = @[1, 2, 3] + long = @[6, 5, 4, 3, 2, 1] + words = @["one", "two", "three"] + letters = "abcd" + zip1 = zip(short, long) + zip2 = zip(short, words) + assert zip1 == @[(1, 6), (2, 5), (3, 4)] + assert zip2 == @[(1, "one"), (2, "two"), (3, "three")] + assert zip1[2][0] == 3 + assert zip2[1][1] == "two" + when (NimMajor, NimMinor) <= (1, 0): + let + zip3 = zip(long, letters) + assert zip3 == @[(a: 6, b: 'a'), (5, 'b'), (4, 'c'), (3, 'd')] + assert zip3[0].b == 'a' + else: + let + zip3: seq[tuple[num: int, letter: char]] = zip(long, letters) + assert zip3 == @[(6, 'a'), (5, 'b'), (4, 'c'), (3, 'd')] + assert zip3[0].letter == 'a' + + var m = min(s1.len, s2.len) + newSeq(result, m) + for i in 0 ..< m: + result[i] = (s1[i], s2[i]) + +when (NimMajor, NimMinor) <= (1, 0): + zipImpl(s1, s2, seq[tuple[a: S, b: T]]) +else: + zipImpl(s1, s2, seq[(S, T)]) + +proc unzip*[S, T](s: openArray[(S, T)]): (seq[S], seq[T]) {.since: (1, 1).} = + ## Returns a tuple of two sequences split out from a sequence of 2-field tuples. + runnableExamples: + let + zipped = @[(1, 'a'), (2, 'b'), (3, 'c')] + unzipped1 = @[1, 2, 3] + unzipped2 = @['a', 'b', 'c'] + assert zipped.unzip() == (unzipped1, unzipped2) + assert zip(unzipped1, unzipped2).unzip() == (unzipped1, unzipped2) + result = (newSeq[S](s.len), newSeq[T](s.len)) + for i in 0..<s.len: + result[0][i] = s[i][0] + result[1][i] = s[i][1] + +func distribute*[T](s: seq[T], num: Positive, spread = true): seq[seq[T]] = + ## Splits and distributes a sequence `s` into `num` sub-sequences. + ## + ## Returns a sequence of `num` sequences. For *some* input values this is the + ## inverse of the `concat <#concat,varargs[seq[T]]>`_ func. + ## The input sequence `s` can be empty, which will produce ## `num` empty sequences. ## ## If `spread` is false and the length of `s` is not a multiple of `num`, the - ## proc will max out the first sub sequences with ``1 + len(s) div num`` + ## func will max out the first sub-sequence with `1 + len(s) div num` ## entries, leaving the remainder of elements to the last sequence. ## - ## On the other hand, if `spread` is true, the proc will distribute evenly + ## On the other hand, if `spread` is true, the func will distribute evenly ## the remainder of the division across all sequences, which makes the result ## more suited to multithreading where you are passing equal sized work units ## to a thread pool and want to maximize core usage. ## - ## Example: - ## - ## .. code-block:: - ## let numbers = @[1, 2, 3, 4, 5, 6, 7] - ## assert numbers.distribute(3) == @[@[1, 2, 3], @[4, 5], @[6, 7]] - ## assert numbers.distribute(3, false) == @[@[1, 2, 3], @[4, 5, 6], @[7]] - ## assert numbers.distribute(6)[0] == @[1, 2] - ## assert numbers.distribute(6)[5] == @[7] - assert(not s.isNil, "`s` can't be nil") + runnableExamples: + let numbers = @[1, 2, 3, 4, 5, 6, 7] + assert numbers.distribute(3) == @[@[1, 2, 3], @[4, 5], @[6, 7]] + assert numbers.distribute(3, false) == @[@[1, 2, 3], @[4, 5, 6], @[7]] + assert numbers.distribute(6)[0] == @[1, 2] + assert numbers.distribute(6)[1] == @[3] + if num < 2: result = @[s] return - let num = int(num) # XXX probably only needed because of .. bug - # Create the result and calculate the stride size and the remainder if any. result = newSeq[seq[T]](num) var @@ -148,117 +372,266 @@ proc distribute*[T](s: seq[T], num: Positive, spread = true): seq[seq[T]] = if extra == 0 or spread == false: # Use an algorithm which overcounts the stride and minimizes reading limits. if extra > 0: inc(stride) - - for i in 0 .. <num: + for i in 0 ..< num: result[i] = newSeq[T]() - for g in first .. <min(s.len, first + stride): + for g in first ..< min(s.len, first + stride): result[i].add(s[g]) first += stride - else: # Use an undercounting algorithm which *adds* the remainder each iteration. - for i in 0 .. <num: + for i in 0 ..< num: last = first + stride if extra > 0: extra -= 1 inc(last) - result[i] = newSeq[T]() - for g in first .. <last: + for g in first ..< last: result[i].add(s[g]) first = last +proc map*[T, S](s: openArray[T], op: proc (x: T): S {.closure.}): + seq[S] {.inline, effectsOf: op.} = + ## Returns a new sequence with the results of the `op` proc applied to every + ## item in the container `s`. + ## + ## Since the input is not modified, you can use it to + ## transform the type of the elements in the input container. + ## + ## Instead of using `map` and `filter`, consider using the `collect` macro + ## from the `sugar` module. + ## + ## **See also:** + ## * `sugar.collect macro<sugar.html#collect.m%2Cuntyped%2Cuntyped>`_ + ## * `mapIt template<#mapIt.t,typed,untyped>`_ + ## * `apply proc<#apply,openArray[T],proc(T)_2>`_ for the in-place version + ## + runnableExamples: + let + a = @[1, 2, 3, 4] + b = map(a, proc(x: int): string = $x) + assert b == @["1", "2", "3", "4"] + newSeq(result, s.len) + for i in 0 ..< s.len: + result[i] = op(s[i]) -iterator filter*[T](seq1: seq[T], pred: proc(item: T): bool {.closure.}): T = - ## Iterates through a sequence and yields every item that fulfills the - ## predicate. +proc apply*[T](s: var openArray[T], op: proc (x: var T) {.closure.}) + {.inline, effectsOf: op.} = + ## Applies `op` to every item in `s`, modifying it directly. + ## + ## Note that the container `s` must be declared as a `var`, + ## since `s` is modified in-place. + ## The parameter function takes a `var T` type parameter. + ## + ## **See also:** + ## * `applyIt template<#applyIt.t,untyped,untyped>`_ + ## * `map proc<#map,openArray[T],proc(T)>`_ + ## + runnableExamples: + var a = @["1", "2", "3", "4"] + apply(a, proc(x: var string) = x &= "42") + assert a == @["142", "242", "342", "442"] + + for i in 0 ..< s.len: op(s[i]) + +proc apply*[T](s: var openArray[T], op: proc (x: T): T {.closure.}) + {.inline, effectsOf: op.} = + ## Applies `op` to every item in `s` modifying it directly. + ## + ## Note that the container `s` must be declared as a `var` + ## and it is required for your input and output types to + ## be the same, since `s` is modified in-place. + ## The parameter function takes and returns a `T` type variable. + ## + ## **See also:** + ## * `applyIt template<#applyIt.t,untyped,untyped>`_ + ## * `map proc<#map,openArray[T],proc(T)>`_ + ## + runnableExamples: + var a = @["1", "2", "3", "4"] + apply(a, proc(x: string): string = x & "42") + assert a == @["142", "242", "342", "442"] + + for i in 0 ..< s.len: s[i] = op(s[i]) + +proc apply*[T](s: openArray[T], op: proc (x: T) {.closure.}) {.inline, since: (1, 3), effectsOf: op.} = + ## Same as `apply` but for a proc that does not return anything + ## and does not mutate `s` directly. + runnableExamples: + var message: string + apply([0, 1, 2, 3, 4], proc(item: int) = message.addInt item) + assert message == "01234" + for i in 0 ..< s.len: op(s[i]) + +iterator filter*[T](s: openArray[T], pred: proc(x: T): bool {.closure.}): T {.effectsOf: pred.} = + ## Iterates through a container `s` and yields every item that fulfills the + ## predicate `pred` (a function that returns a `bool`). + ## + ## Instead of using `map` and `filter`, consider using the `collect` macro + ## from the `sugar` module. + ## + ## **See also:** + ## * `sugar.collect macro<sugar.html#collect.m%2Cuntyped%2Cuntyped>`_ + ## * `filter proc<#filter,openArray[T],proc(T)>`_ + ## * `filterIt template<#filterIt.t,untyped,untyped>`_ + ## + runnableExamples: + let numbers = @[1, 4, 5, 8, 9, 7, 4] + var evens = newSeq[int]() + for n in filter(numbers, proc (x: int): bool = x mod 2 == 0): + evens.add(n) + assert evens == @[4, 8, 4] + + for i in 0 ..< s.len: + if pred(s[i]): + yield s[i] + +proc filter*[T](s: openArray[T], pred: proc(x: T): bool {.closure.}): seq[T] + {.inline, effectsOf: pred.} = + ## Returns a new sequence with all the items of `s` that fulfill the + ## predicate `pred` (a function that returns a `bool`). + ## + ## Instead of using `map` and `filter`, consider using the `collect` macro + ## from the `sugar` module. + ## + ## **See also:** + ## * `sugar.collect macro<sugar.html#collect.m%2Cuntyped%2Cuntyped>`_ + ## * `filterIt template<#filterIt.t,untyped,untyped>`_ + ## * `filter iterator<#filter.i,openArray[T],proc(T)>`_ + ## * `keepIf proc<#keepIf,seq[T],proc(T)>`_ for the in-place version + ## + runnableExamples: + let + colors = @["red", "yellow", "black"] + f1 = filter(colors, proc(x: string): bool = x.len < 6) + f2 = filter(colors, proc(x: string): bool = x.contains('y')) + assert f1 == @["red", "black"] + assert f2 == @["yellow"] + + result = newSeq[T]() + for i in 0 ..< s.len: + if pred(s[i]): + result.add(s[i]) + +proc keepIf*[T](s: var seq[T], pred: proc(x: T): bool {.closure.}) + {.inline, effectsOf: pred.} = + ## Keeps the items in the passed sequence `s` if they fulfill the + ## predicate `pred` (a function that returns a `bool`). ## - ## Example: - ## - ## .. code-block:: - ## let numbers = @[1, 4, 5, 8, 9, 7, 4] - ## for n in filter(numbers, proc (x: int): bool = x mod 2 == 0): - ## echo($n) - ## # echoes 4, 8, 4 in separate lines - for i in countup(0, len(seq1)-1): - var item = seq1[i] - if pred(item): yield seq1[i] - -proc filter*[T](seq1: seq[T], pred: proc(item: T): bool {.closure.}): seq[T] = - ## Returns a new sequence with all the items that fulfilled the predicate. - ## - ## Example: - ## - ## .. code-block:: - ## let - ## colors = @["red", "yellow", "black"] - ## f1 = filter(colors, proc(x: string): bool = x.len < 6) - ## f2 = filter(colors) do (x: string) -> bool : x.len > 5 - ## assert f1 == @["red", "black"] - ## assert f2 == @["yellow"] - accumulateResult(filter(seq1, pred)) - -proc keepIf*[T](seq1: var seq[T], pred: proc(item: T): bool {.closure.}) = - ## Keeps the items in the passed sequence if they fulfilled the predicate. - ## Same as the ``filter`` proc, but modifies the sequence directly. - ## - ## Example: - ## - ## .. code-block:: - ## var floats = @[13.0, 12.5, 5.8, 2.0, 6.1, 9.9, 10.1] - ## keepIf(floats, proc(x: float): bool = x > 10) - ## assert floats == @[13.0, 12.5, 10.1] + ## Note that `s` must be declared as a `var`. + ## + ## Similar to the `filter proc<#filter,openArray[T],proc(T)>`_, + ## but modifies the sequence directly. + ## + ## **See also:** + ## * `keepItIf template<#keepItIf.t,seq,untyped>`_ + ## * `filter proc<#filter,openArray[T],proc(T)>`_ + ## + runnableExamples: + var floats = @[13.0, 12.5, 5.8, 2.0, 6.1, 9.9, 10.1] + keepIf(floats, proc(x: float): bool = x > 10) + assert floats == @[13.0, 12.5, 10.1] + var pos = 0 - for i in 0 .. <len(seq1): - if pred(seq1[i]): + for i in 0 ..< len(s): + if pred(s[i]): if pos != i: - seq1[pos] = seq1[i] + when defined(gcDestructors): + s[pos] = move(s[i]) + else: + shallowCopy(s[pos], s[i]) inc(pos) - setLen(seq1, pos) + setLen(s, pos) -proc delete*[T](s: var seq[T], first=0, last=0) = - ## Deletes in `s` the items at position `first` .. `last`. This modifies - ## `s` itself, it does not return a copy. +func delete*[T](s: var seq[T]; slice: Slice[int]) = + ## Deletes the items `s[slice]`, raising `IndexDefect` if the slice contains + ## elements out of range. ## - ## Example: - ## - ##.. code-block:: - ## let outcome = @[1,1,1,1,1,1,1,1] - ## var dest = @[1,1,1,2,2,2,2,2,2,1,1,1,1,1] - ## dest.delete(3, 8) - ## assert outcome == dest + ## This operation moves all elements after `s[slice]` in linear time. + runnableExamples: + var a = @[10, 11, 12, 13, 14] + doAssertRaises(IndexDefect): a.delete(4..5) + assert a == @[10, 11, 12, 13, 14] + a.delete(4..4) + assert a == @[10, 11, 12, 13] + a.delete(1..2) + assert a == @[10, 13] + a.delete(1..<1) # empty slice + assert a == @[10, 13] + when compileOption("boundChecks"): + if not (slice.a < s.len and slice.a >= 0 and slice.b < s.len): + raise newException(IndexDefect, $(slice: slice, len: s.len)) + if slice.b >= slice.a: + template defaultImpl = + var i = slice.a + var j = slice.b + 1 + var newLen = s.len - j + i + while i < newLen: + when defined(gcDestructors): + s[i] = move(s[j]) + else: + s[i].shallowCopy(s[j]) + inc(i) + inc(j) + setLen(s, newLen) + when nimvm: defaultImpl() + else: + when defined(js): + let n = slice.b - slice.a + 1 + let first = slice.a + {.emit: "`s`.splice(`first`, `n`);".} + else: + defaultImpl() +func delete*[T](s: var seq[T]; first, last: Natural) {.deprecated: "use `delete(s, first..last)`".} = + ## Deletes the items of a sequence `s` at positions `first..last` + ## (including both ends of the range). + ## This modifies `s` itself, it does not return a copy. + runnableExamples("--warning:deprecated:off"): + let outcome = @[1, 1, 1, 1, 1, 1, 1, 1] + var dest = @[1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1] + dest.delete(3, 8) + assert outcome == dest + doAssert first <= last + if first >= s.len: + return var i = first - var j = last+1 - var newLen = len(s)-j+i + var j = min(len(s), last + 1) + var newLen = len(s) - j + i while i < newLen: - s[i].shallowCopy(s[j]) + when defined(gcDestructors): + s[i] = move(s[j]) + else: + s[i].shallowCopy(s[j]) inc(i) inc(j) setLen(s, newLen) -proc insert*[T](dest: var seq[T], src: openArray[T], pos=0) = +func insert*[T](dest: var seq[T], src: openArray[T], pos = 0) = ## Inserts items from `src` into `dest` at position `pos`. This modifies ## `dest` itself, it does not return a copy. ## - ## Example: + ## Note that the elements of `src` and `dest` must be of the same type. ## - ##.. code-block:: - ## var dest = @[1,1,1,1,1,1,1,1] - ## let - ## src = @[2,2,2,2,2,2] - ## outcome = @[1,1,1,2,2,2,2,2,2,1,1,1,1,1] - ## dest.insert(src, 3) - ## assert dest == outcome + runnableExamples: + var dest = @[1, 1, 1, 1, 1, 1, 1, 1] + let + src = @[2, 2, 2, 2, 2, 2] + outcome = @[1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1] + dest.insert(src, 3) + assert dest == outcome var j = len(dest) - 1 - var i = len(dest) + len(src) - 1 + var i = j + len(src) + if i == j: return dest.setLen(i + 1) # Move items after `pos` to the end of the sequence. while j >= pos: - dest[i].shallowCopy(dest[j]) + when defined(gcDestructors): + dest[i] = move(dest[j]) + else: + dest[i].shallowCopy(dest[j]) dec(i) dec(j) # Insert items from `dest` into `dest` at `pos` @@ -268,261 +641,266 @@ proc insert*[T](dest: var seq[T], src: openArray[T], pos=0) = inc(j) -template filterIt*(seq1, pred: expr): expr {.immediate.} = - ## Returns a new sequence with all the items that fulfilled the predicate. +template filterIt*(s, pred: untyped): untyped = + ## Returns a new sequence with all the items of `s` that fulfill the + ## predicate `pred`. + ## + ## Unlike the `filter proc<#filter,openArray[T],proc(T)>`_ and + ## `filter iterator<#filter.i,openArray[T],proc(T)>`_, + ## the predicate needs to be an expression using the `it` variable + ## for testing, like: `filterIt("abcxyz", it == 'x')`. ## - ## Unlike the `proc` version, the predicate needs to be an expression using - ## the ``it`` variable for testing, like: ``filterIt("abcxyz", it == 'x')``. - ## Example: + ## Instead of using `mapIt` and `filterIt`, consider using the `collect` macro + ## from the `sugar` module. ## - ## .. code-block:: - ## let - ## temperatures = @[-272.15, -2.0, 24.5, 44.31, 99.9, -113.44] - ## acceptable = filterIt(temperatures, it < 50 and it > -10) - ## notAcceptable = filterIt(temperatures, it > 50 or it < -10) - ## assert acceptable == @[-2.0, 24.5, 44.31] - ## assert notAcceptable == @[-272.15, 99.9, -113.44] - var result {.gensym.}: type(seq1) = @[] - for it {.inject.} in items(seq1): + ## **See also:** + ## * `sugar.collect macro<sugar.html#collect.m%2Cuntyped%2Cuntyped>`_ + ## * `filter proc<#filter,openArray[T],proc(T)>`_ + ## * `filter iterator<#filter.i,openArray[T],proc(T)>`_ + ## + runnableExamples: + let + temperatures = @[-272.15, -2.0, 24.5, 44.31, 99.9, -113.44] + acceptable = temperatures.filterIt(it < 50 and it > -10) + notAcceptable = temperatures.filterIt(it > 50 or it < -10) + assert acceptable == @[-2.0, 24.5, 44.31] + assert notAcceptable == @[-272.15, 99.9, -113.44] + + var result = newSeq[typeof(s[0])]() + for it {.inject.} in items(s): if pred: result.add(it) result -template keepItIf*(varSeq, pred: expr) = - ## Convenience template around the ``keepIf`` proc to reduce typing. +template keepItIf*(varSeq: seq, pred: untyped) = + ## Keeps the items in the passed sequence (must be declared as a `var`) + ## if they fulfill the predicate. + ## + ## Unlike the `keepIf proc<#keepIf,seq[T],proc(T)>`_, + ## the predicate needs to be an expression using + ## the `it` variable for testing, like: `keepItIf("abcxyz", it == 'x')`. ## - ## Unlike the `proc` version, the predicate needs to be an expression using - ## the ``it`` variable for testing, like: ``keepItIf("abcxyz", it == 'x')``. - ## Example: + ## **See also:** + ## * `keepIf proc<#keepIf,seq[T],proc(T)>`_ + ## * `filterIt template<#filterIt.t,untyped,untyped>`_ ## - ## .. code-block:: - ## var candidates = @["foo", "bar", "baz", "foobar"] - ## keepItIf(candidates, it.len == 3 and it[0] == 'b') - ## assert candidates == @["bar", "baz"] + runnableExamples: + var candidates = @["foo", "bar", "baz", "foobar"] + candidates.keepItIf(it.len == 3 and it[0] == 'b') + assert candidates == @["bar", "baz"] + var pos = 0 - for i in 0 .. <len(varSeq): + for i in 0 ..< len(varSeq): let it {.inject.} = varSeq[i] if pred: if pos != i: - varSeq[pos] = varSeq[i] + when defined(gcDestructors): + varSeq[pos] = move(varSeq[i]) + else: + shallowCopy(varSeq[pos], varSeq[i]) inc(pos) setLen(varSeq, pos) +since (1, 1): + template countIt*(s, pred: untyped): int = + ## Returns a count of all the items that fulfill the predicate. + ## + ## The predicate needs to be an expression using + ## the `it` variable for testing, like: `countIt(@[1, 2, 3], it > 2)`. + ## + runnableExamples: + let numbers = @[-3, -2, -1, 0, 1, 2, 3, 4, 5, 6] + iterator iota(n: int): int = + for i in 0..<n: yield i + assert numbers.countIt(it < 0) == 3 + assert countIt(iota(10), it < 2) == 2 -template toSeq*(iter: expr): expr {.immediate.} = - ## Transforms any iterator into a sequence. - ## - ## Example: - ## - ## .. code-block:: - ## let - ## numeric = @[1, 2, 3, 4, 5, 6, 7, 8, 9] - ## odd_numbers = toSeq(filter(numeric) do (x: int) -> bool: - ## if x mod 2 == 1: - ## result = true) - ## assert odd_numbers == @[1, 3, 5, 7, 9] - ## - ## **Note**: Since this is an immediate macro, you cannot always invoke this - ## as ``x.toSeq``, depending on the ``x``. - ## See `this <manual.html#limitations-of-the-method-call-syntax>`_ - ## for an explanation. - var result {.gensym.}: seq[type(iter)] = @[] - for x in iter: add(result, x) - result + var result = 0 + for it {.inject.} in s: + if pred: result += 1 + result -template foldl*(sequence, operation: expr): expr = - ## Template to fold a sequence from left to right, returning the accumulation. +proc all*[T](s: openArray[T], pred: proc(x: T): bool {.closure.}): bool {.effectsOf: pred.} = + ## Iterates through a container and checks if every item fulfills the + ## predicate. ## - ## The sequence is required to have at least a single element. Debug versions - ## of your program will assert in this situation but release versions will - ## happily go ahead. If the sequence has a single element it will be returned - ## without applying ``operation``. + ## **See also:** + ## * `allIt template<#allIt.t,untyped,untyped>`_ + ## * `any proc<#any,openArray[T],proc(T)>`_ ## - ## The ``operation`` parameter should be an expression which uses the - ## variables ``a`` and ``b`` for each step of the fold. Since this is a left - ## fold, for non associative binary operations like subtraction think that - ## the sequence of numbers 1, 2 and 3 will be parenthesized as (((1) - 2) - - ## 3). Example: - ## - ## .. code-block:: - ## let - ## numbers = @[5, 9, 11] - ## addition = foldl(numbers, a + b) - ## subtraction = foldl(numbers, a - b) - ## multiplication = foldl(numbers, a * b) - ## words = @["nim", "is", "cool"] - ## concatenation = foldl(words, a & b) - ## assert addition == 25, "Addition is (((5)+9)+11)" - ## assert subtraction == -15, "Subtraction is (((5)-9)-11)" - ## assert multiplication == 495, "Multiplication is (((5)*9)*11)" - ## assert concatenation == "nimiscool" - assert sequence.len > 0, "Can't fold empty sequences" - var result {.gensym.}: type(sequence[0]) - result = sequence[0] - for i in countup(1, sequence.len - 1): - let - a {.inject.} = result - b {.inject.} = sequence[i] - result = operation - result + runnableExamples: + let numbers = @[1, 4, 5, 8, 9, 7, 4] + assert all(numbers, proc (x: int): bool = x < 10) == true + assert all(numbers, proc (x: int): bool = x < 9) == false -template foldr*(sequence, operation: expr): expr = - ## Template to fold a sequence from right to left, returning the accumulation. + for i in s: + if not pred(i): + return false + true + +template allIt*(s, pred: untyped): bool = + ## Iterates through a container and checks if every item fulfills the + ## predicate. ## - ## The sequence is required to have at least a single element. Debug versions - ## of your program will assert in this situation but release versions will - ## happily go ahead. If the sequence has a single element it will be returned - ## without applying ``operation``. + ## Unlike the `all proc<#all,openArray[T],proc(T)>`_, + ## the predicate needs to be an expression using + ## the `it` variable for testing, like: `allIt("abba", it == 'a')`. ## - ## The ``operation`` parameter should be an expression which uses the - ## variables ``a`` and ``b`` for each step of the fold. Since this is a right - ## fold, for non associative binary operations like subtraction think that - ## the sequence of numbers 1, 2 and 3 will be parenthesized as (1 - (2 - - ## (3))). Example: - ## - ## .. code-block:: - ## let - ## numbers = @[5, 9, 11] - ## addition = foldr(numbers, a + b) - ## subtraction = foldr(numbers, a - b) - ## multiplication = foldr(numbers, a * b) - ## words = @["nim", "is", "cool"] - ## concatenation = foldr(words, a & b) - ## assert addition == 25, "Addition is (5+(9+(11)))" - ## assert subtraction == 7, "Subtraction is (5-(9-(11)))" - ## assert multiplication == 495, "Multiplication is (5*(9*(11)))" - ## assert concatenation == "nimiscool" - assert sequence.len > 0, "Can't fold empty sequences" - var result {.gensym.}: type(sequence[0]) - result = sequence[sequence.len - 1] - for i in countdown(sequence.len - 2, 0): - let - a {.inject.} = sequence[i] - b {.inject.} = result - result = operation - result + ## **See also:** + ## * `all proc<#all,openArray[T],proc(T)>`_ + ## * `anyIt template<#anyIt.t,untyped,untyped>`_ + ## + runnableExamples: + let numbers = @[1, 4, 5, 8, 9, 7, 4] + assert numbers.allIt(it < 10) == true + assert numbers.allIt(it < 9) == false -template mapIt*(seq1, typ, op: expr): expr = - ## Convenience template around the ``map`` proc to reduce typing. - ## - ## The template injects the ``it`` variable which you can use directly in an - ## expression. You also need to pass as `typ` the type of the expression, - ## since the new returned sequence can have a different type than the - ## original. Example: - ## - ## .. code-block:: - ## let - ## nums = @[1, 2, 3, 4] - ## strings = nums.mapIt(string, $(4 * it)) - ## assert strings == @["4", "8", "12", "16"] - var result {.gensym.}: seq[typ] = @[] - for it {.inject.} in items(seq1): - result.add(op) + var result = true + for it {.inject.} in items(s): + if not pred: + result = false + break result -template mapIt*(varSeq, op: expr) = - ## Convenience template around the mutable ``map`` proc to reduce typing. +proc any*[T](s: openArray[T], pred: proc(x: T): bool {.closure.}): bool {.effectsOf: pred.} = + ## Iterates through a container and checks if at least one item + ## fulfills the predicate. ## - ## The template injects the ``it`` variable which you can use directly in an - ## expression. The expression has to return the same type as the sequence you - ## are mutating. Example: + ## **See also:** + ## * `anyIt template<#anyIt.t,untyped,untyped>`_ + ## * `all proc<#all,openArray[T],proc(T)>`_ ## - ## .. code-block:: - ## var nums = @[1, 2, 3, 4] - ## nums.mapIt(it * 3) - ## assert nums[0] + nums[3] == 15 - for i in 0 .. <len(varSeq): - let it {.inject.} = varSeq[i] - varSeq[i] = op + runnableExamples: + let numbers = @[1, 4, 5, 8, 9, 7, 4] + assert any(numbers, proc (x: int): bool = x > 8) == true + assert any(numbers, proc (x: int): bool = x > 9) == false -template newSeqWith*(len: int, init: expr): expr = - ## creates a new sequence, calling `init` to initialize each value. Example: + for i in s: + if pred(i): + return true + false + +template anyIt*(s, pred: untyped): bool = + ## Iterates through a container and checks if at least one item + ## fulfills the predicate. ## - ## .. code-block:: - ## var seq2D = newSeqWith(20, newSeq[bool](10)) - ## seq2D[0][0] = true - ## seq2D[1][0] = true - ## seq2D[0][1] = true + ## Unlike the `any proc<#any,openArray[T],proc(T)>`_, + ## the predicate needs to be an expression using + ## the `it` variable for testing, like: `anyIt("abba", it == 'a')`. ## - ## import math - ## var seqRand = newSeqWith(20, random(10)) - ## echo seqRand - var result {.gensym.} = newSeq[type(init)](len) - for i in 0 .. <len: - result[i] = init + ## **See also:** + ## * `any proc<#any,openArray[T],proc(T)>`_ + ## * `allIt template<#allIt.t,untyped,untyped>`_ + ## + runnableExamples: + let numbers = @[1, 4, 5, 8, 9, 7, 4] + assert numbers.anyIt(it > 8) == true + assert numbers.anyIt(it > 9) == false + + var result = false + for it {.inject.} in items(s): + if pred: + result = true + break result -when isMainModule: - import strutils - block: # concat test - let - s1 = @[1, 2, 3] - s2 = @[4, 5] - s3 = @[6, 7] - total = concat(s1, s2, s3) - assert total == @[1, 2, 3, 4, 5, 6, 7] +template toSeq1(s: not iterator): untyped = + # overload for typed but not iterator + type OutType = typeof(items(s)) + when compiles(s.len): + block: + evalOnceAs(s2, s, compiles((let _ = s))) + var i = 0 + var result = newSeq[OutType](s2.len) + for it in s2: + result[i] = it + i += 1 + result + else: + var result: seq[OutType]# = @[] + for it in s: + result.add(it) + result - block: # duplicates test - let - dup1 = @[1, 1, 3, 4, 2, 2, 8, 1, 4] - dup2 = @["a", "a", "c", "d", "d"] - unique1 = deduplicate(dup1) - unique2 = deduplicate(dup2) - assert unique1 == @[1, 3, 4, 2, 8] - assert unique2 == @["a", "c", "d"] +template toSeq2(iter: iterator): untyped = + # overload for iterator + evalOnceAs(iter2, iter(), false) + when compiles(iter2.len): + var i = 0 + var result = newSeq[typeof(iter2)](iter2.len) + for x in iter2: + result[i] = x + inc i + result + else: + type OutType = typeof(iter2()) + var result: seq[OutType]# = @[] + when compiles(iter2()): + evalOnceAs(iter4, iter, false) + let iter3 = iter4() + for x in iter3(): + result.add(x) + else: + for x in iter2(): + result.add(x) + result - block: # zip test - let - short = @[1, 2, 3] - long = @[6, 5, 4, 3, 2, 1] - words = @["one", "two", "three"] - zip1 = zip(short, long) - zip2 = zip(short, words) - assert zip1 == @[(1, 6), (2, 5), (3, 4)] - assert zip2 == @[(1, "one"), (2, "two"), (3, "three")] - assert zip1[2].b == 4 - assert zip2[2].b == "three" - - block: # filter proc test +template toSeq*(iter: untyped): untyped = + ## Transforms any iterable (anything that can be iterated over, e.g. with + ## a for-loop) into a sequence. + ## + runnableExamples: let - colors = @["red", "yellow", "black"] - f1 = filter(colors, proc(x: string): bool = x.len < 6) - f2 = filter(colors) do (x: string) -> bool : x.len > 5 - assert f1 == @["red", "black"] - assert f2 == @["yellow"] - - block: # filter iterator test - let numbers = @[1, 4, 5, 8, 9, 7, 4] - for n in filter(numbers, proc (x: int): bool = x mod 2 == 0): - echo($n) - # echoes 4, 8, 4 in separate lines + myRange = 1..5 + mySet: set[int8] = {5'i8, 3, 1} + assert typeof(myRange) is HSlice[system.int, system.int] + assert typeof(mySet) is set[int8] - block: # keepIf test - var floats = @[13.0, 12.5, 5.8, 2.0, 6.1, 9.9, 10.1] - keepIf(floats, proc(x: float): bool = x > 10) - assert floats == @[13.0, 12.5, 10.1] - - block: # filterIt test let - temperatures = @[-272.15, -2.0, 24.5, 44.31, 99.9, -113.44] - acceptable = filterIt(temperatures, it < 50 and it > -10) - notAcceptable = filterIt(temperatures, it > 50 or it < -10) - assert acceptable == @[-2.0, 24.5, 44.31] - assert notAcceptable == @[-272.15, 99.9, -113.44] + mySeq1 = toSeq(myRange) + mySeq2 = toSeq(mySet) + assert mySeq1 == @[1, 2, 3, 4, 5] + assert mySeq2 == @[1'i8, 3, 5] - block: # keepItIf test - var candidates = @["foo", "bar", "baz", "foobar"] - keepItIf(candidates, it.len == 3 and it[0] == 'b') - assert candidates == @["bar", "baz"] - - block: # toSeq test - let - numeric = @[1, 2, 3, 4, 5, 6, 7, 8, 9] - odd_numbers = toSeq(filter(numeric) do (x: int) -> bool: - if x mod 2 == 1: - result = true) - assert odd_numbers == @[1, 3, 5, 7, 9] + when compiles(toSeq1(iter)): + toSeq1(iter) + elif compiles(toSeq2(iter)): + toSeq2(iter) + else: + # overload for untyped, e.g.: `toSeq(myInlineIterator(3))` + when compiles(iter.len): + block: + evalOnceAs(iter2, iter, true) + var result = newSeq[typeof(iter)](iter2.len) + var i = 0 + for x in iter2: + result[i] = x + inc i + result + else: + var result: seq[typeof(iter)] = @[] + for x in iter: + result.add(x) + result - block: # foldl tests +template foldl*(sequence, operation: untyped): untyped = + ## Template to fold a sequence from left to right, returning the accumulation. + ## + ## The sequence is required to have at least a single element. Debug versions + ## of your program will assert in this situation but release versions will + ## happily go ahead. If the sequence has a single element it will be returned + ## without applying `operation`. + ## + ## The `operation` parameter should be an expression which uses the + ## variables `a` and `b` for each step of the fold. Since this is a left + ## fold, for non associative binary operations like subtraction think that + ## the sequence of numbers 1, 2 and 3 will be parenthesized as (((1) - 2) - + ## 3). + ## + ## **See also:** + ## * `foldl template<#foldl.t,,,>`_ with a starting parameter + ## * `foldr template<#foldr.t,untyped,untyped>`_ + ## + runnableExamples: let numbers = @[5, 9, 11] addition = foldl(numbers, a + b) @@ -530,12 +908,75 @@ when isMainModule: multiplication = foldl(numbers, a * b) words = @["nim", "is", "cool"] concatenation = foldl(words, a & b) + procs = @["proc", "Is", "Also", "Fine"] + + + func foo(acc, cur: string): string = + result = acc & cur + assert addition == 25, "Addition is (((5)+9)+11)" assert subtraction == -15, "Subtraction is (((5)-9)-11)" assert multiplication == 495, "Multiplication is (((5)*9)*11)" assert concatenation == "nimiscool" + assert foldl(procs, foo(a, b)) == "procIsAlsoFine" + + let s = sequence + assert s.len > 0, "Can't fold empty sequences" + var result: typeof(s[0]) + result = s[0] + for i in 1..<s.len: + let + a {.inject.} = result + b {.inject.} = s[i] + result = operation + result + +template foldl*(sequence, operation, first): untyped = + ## Template to fold a sequence from left to right, returning the accumulation. + ## + ## This version of `foldl` gets a **starting parameter**. This makes it possible + ## to accumulate the sequence into a different type than the sequence elements. + ## + ## The `operation` parameter should be an expression which uses the variables + ## `a` and `b` for each step of the fold. The `first` parameter is the + ## start value (the first `a`) and therefore defines the type of the result. + ## + ## **See also:** + ## * `foldr template<#foldr.t,untyped,untyped>`_ + ## + runnableExamples: + let + numbers = @[0, 8, 1, 5] + digits = foldl(numbers, a & (chr(b + ord('0'))), "") + assert digits == "0815" - block: # foldr tests + var result: typeof(first) = first + for x in items(sequence): + let + a {.inject.} = result + b {.inject.} = x + result = operation + result + +template foldr*(sequence, operation: untyped): untyped = + ## Template to fold a sequence from right to left, returning the accumulation. + ## + ## The sequence is required to have at least a single element. Debug versions + ## of your program will assert in this situation but release versions will + ## happily go ahead. If the sequence has a single element it will be returned + ## without applying `operation`. + ## + ## The `operation` parameter should be an expression which uses the + ## variables `a` and `b` for each step of the fold. Since this is a right + ## fold, for non associative binary operations like subtraction think that + ## the sequence of numbers 1, 2 and 3 will be parenthesized as (1 - (2 - + ## (3))). + ## + ## **See also:** + ## * `foldl template<#foldl.t,untyped,untyped>`_ + ## * `foldl template<#foldl.t,,,>`_ with a starting parameter + ## + runnableExamples: let numbers = @[5, 9, 11] addition = foldr(numbers, a + b) @@ -548,72 +989,174 @@ when isMainModule: assert multiplication == 495, "Multiplication is (5*(9*(11)))" assert concatenation == "nimiscool" - block: # delete tests - let outcome = @[1,1,1,1,1,1,1,1] - var dest = @[1,1,1,2,2,2,2,2,2,1,1,1,1,1] - dest.delete(3, 8) - assert outcome == dest, """\ - Deleting range 3-9 from [1,1,1,2,2,2,2,2,2,1,1,1,1,1] - is [1,1,1,1,1,1,1,1]""" - - block: # insert tests - var dest = @[1,1,1,1,1,1,1,1] + let s = sequence # xxx inefficient, use {.evalonce.} pending #13750 + let n = s.len + assert n > 0, "Can't fold empty sequences" + var result = s[n - 1] + for i in countdown(n - 2, 0): let - src = @[2,2,2,2,2,2] - outcome = @[1,1,1,2,2,2,2,2,2,1,1,1,1,1] - dest.insert(src, 3) - assert dest == outcome, """\ - Inserting [2,2,2,2,2,2] into [1,1,1,1,1,1,1,1] - at 3 is [1,1,1,2,2,2,2,2,2,1,1,1,1,1]""" + a {.inject.} = s[i] + b {.inject.} = result + result = operation + result - block: # mapIt tests - var +template mapIt*(s: typed, op: untyped): untyped = + ## Returns a new sequence with the results of the `op` proc applied to every + ## item in the container `s`. + ## + ## Since the input is not modified you can use it to + ## transform the type of the elements in the input container. + ## + ## The template injects the `it` variable which you can use directly in an + ## expression. + ## + ## Instead of using `mapIt` and `filterIt`, consider using the `collect` macro + ## from the `sugar` module. + ## + ## **See also:** + ## * `sugar.collect macro<sugar.html#collect.m%2Cuntyped%2Cuntyped>`_ + ## * `map proc<#map,openArray[T],proc(T)>`_ + ## * `applyIt template<#applyIt.t,untyped,untyped>`_ for the in-place version + ## + runnableExamples: + let nums = @[1, 2, 3, 4] - strings = nums.mapIt(string, $(4 * it)) - nums.mapIt(it * 3) + strings = nums.mapIt($(4 * it)) + assert strings == @["4", "8", "12", "16"] + + type OutType = typeof(( + block: + var it{.inject.}: typeof(items(s), typeOfIter); + op), typeOfProc) + when OutType is not (proc): + # Here, we avoid to create closures in loops. + # This avoids https://github.com/nim-lang/Nim/issues/12625 + when compiles(s.len): + block: # using a block avoids https://github.com/nim-lang/Nim/issues/8580 + + # BUG: `evalOnceAs(s2, s, false)` would lead to C compile errors + # (`error: use of undeclared identifier`) instead of Nim compile errors + evalOnceAs(s2, s, compiles((let _ = s))) + + var i = 0 + var result = newSeq[OutType](s2.len) + for it {.inject.} in s2: + result[i] = op + i += 1 + result + else: + var result: seq[OutType]# = @[] + # use `items` to avoid https://github.com/nim-lang/Nim/issues/12639 + for it {.inject.} in items(s): + result.add(op) + result + else: + # `op` is going to create closures in loops, let's fallback to `map`. + # NOTE: Without this fallback, developers have to define a helper function and + # call `map`: + # [1, 2].map((it) => ((x: int) => it + x)) + # With this fallback, above code can be simplified to: + # [1, 2].mapIt((x: int) => it + x) + # In this case, `mapIt` is just syntax sugar for `map`. + type InType = typeof(items(s), typeOfIter) + # Use a help proc `f` to create closures for each element in `s` + let f = proc (x: InType): OutType = + let it {.inject.} = x + op + map(s, f) + +template applyIt*(varSeq, op: untyped) = + ## Convenience template around the mutable `apply` proc to reduce typing. + ## + ## The template injects the `it` variable which you can use directly in an + ## expression. The expression has to return the same type as the elements + ## of the sequence you are mutating. + ## + ## **See also:** + ## * `apply proc<#apply,openArray[T],proc(T)_2>`_ + ## * `mapIt template<#mapIt.t,typed,untyped>`_ + ## + runnableExamples: + var nums = @[1, 2, 3, 4] + nums.applyIt(it * 3) assert nums[0] + nums[3] == 15 - block: # distribute tests - let numbers = @[1, 2, 3, 4, 5, 6, 7] - doAssert numbers.distribute(3) == @[@[1, 2, 3], @[4, 5], @[6, 7]] - doAssert numbers.distribute(6)[0] == @[1, 2] - doAssert numbers.distribute(6)[5] == @[7] - let a = @[1, 2, 3, 4, 5, 6, 7] - doAssert a.distribute(1, true) == @[@[1, 2, 3, 4, 5, 6, 7]] - doAssert a.distribute(1, false) == @[@[1, 2, 3, 4, 5, 6, 7]] - doAssert a.distribute(2, true) == @[@[1, 2, 3, 4], @[5, 6, 7]] - doAssert a.distribute(2, false) == @[@[1, 2, 3, 4], @[5, 6, 7]] - doAssert a.distribute(3, true) == @[@[1, 2, 3], @[4, 5], @[6, 7]] - doAssert a.distribute(3, false) == @[@[1, 2, 3], @[4, 5, 6], @[7]] - doAssert a.distribute(4, true) == @[@[1, 2], @[3, 4], @[5, 6], @[7]] - doAssert a.distribute(4, false) == @[@[1, 2], @[3, 4], @[5, 6], @[7]] - doAssert a.distribute(5, true) == @[@[1, 2], @[3, 4], @[5], @[6], @[7]] - doAssert a.distribute(5, false) == @[@[1, 2], @[3, 4], @[5, 6], @[7], @[]] - doAssert a.distribute(6, true) == @[@[1, 2], @[3], @[4], @[5], @[6], @[7]] - doAssert a.distribute(6, false) == @[ - @[1, 2], @[3, 4], @[5, 6], @[7], @[], @[]] - doAssert a.distribute(8, false) == a.distribute(8, true) - doAssert a.distribute(90, false) == a.distribute(90, true) - var b = @[0] - for f in 1 .. 25: b.add(f) - doAssert b.distribute(5, true)[4].len == 5 - doAssert b.distribute(5, false)[4].len == 2 - - block: # newSeqWith tests - var seq2D = newSeqWith(4, newSeq[bool](2)) - seq2D[0][0] = true - seq2D[1][0] = true - seq2D[0][1] = true - doAssert seq2D == @[@[true, true], @[true, false], @[false, false], @[false, false]] - - block: # repeat tests - let - a = @[1, 2, 3] - b: seq[int] = @[] + for i in low(varSeq) .. high(varSeq): + let it {.inject.} = varSeq[i] + varSeq[i] = op + + +template newSeqWith*(len: int, init: untyped): untyped = + ## Creates a new `seq` of length `len`, calling `init` to initialize + ## each value of the seq. + ## + ## Useful for creating "2D" seqs - seqs containing other seqs + ## or to populate fields of the created seq. + runnableExamples: + ## Creates a seq containing 5 bool seqs, each of length of 3. + var seq2D = newSeqWith(5, newSeq[bool](3)) + assert seq2D.len == 5 + assert seq2D[0].len == 3 + assert seq2D[4][2] == false + + ## Creates a seq with random numbers + import std/random + var seqRand = newSeqWith(20, rand(1.0)) + assert seqRand[0] != seqRand[1] + type T = typeof(init) + let newLen = len + when supportsCopyMem(T) and declared(newSeqUninit): + var result = newSeqUninit[T](newLen) + else: # TODO: use `newSeqUnsafe` when that's available + var result = newSeq[T](newLen) + for i in 0 ..< newLen: + result[i] = init + move(result) # refs bug #7295 + +func mapLitsImpl(constructor: NimNode; op: NimNode; nested: bool; + filter = nnkLiterals): NimNode = + if constructor.kind in filter: + result = newNimNode(nnkCall, lineInfoFrom = constructor) + result.add op + result.add constructor + else: + result = copyNimNode(constructor) + for v in constructor: + if nested or v.kind in filter: + result.add mapLitsImpl(v, op, nested, filter) + else: + result.add v + +macro mapLiterals*(constructor, op: untyped; + nested = true): untyped = + ## Applies `op` to each of the **atomic** literals like `3` + ## or `"abc"` in the specified `constructor` AST. This can + ## be used to map every array element to some target type: + runnableExamples: + let x = mapLiterals([0.1, 1.2, 2.3, 3.4], int) + doAssert x is array[4, int] + doAssert x == [int(0.1), int(1.2), int(2.3), int(3.4)] + ## If `nested` is true (which is the default), the literals are replaced + ## everywhere in the `constructor` AST, otherwise only the first level + ## is considered: + runnableExamples: + let a = mapLiterals((1.2, (2.3, 3.4), 4.8), int) + let b = mapLiterals((1.2, (2.3, 3.4), 4.8), int, nested=false) + assert a == (1, (2, 3), 4) + assert b == (1, (2.3, 3.4), 4) - doAssert a.repeat(3) == @[1, 2, 3, 1, 2, 3, 1, 2, 3] - doAssert a.repeat(0) == @[] - #doAssert a.repeat(-1) == @[] # will not compile! - doAssert b.repeat(3) == @[] + let c = mapLiterals((1, (2, 3), 4, (5, 6)), `$`) + let d = mapLiterals((1, (2, 3), 4, (5, 6)), `$`, nested=false) + assert c == ("1", ("2", "3"), "4", ("5", "6")) + assert d == ("1", (2, 3), "4", (5, 6)) + ## There are no constraints for the `constructor` AST, it + ## works for nested tuples of arrays of sets etc. + result = mapLitsImpl(constructor, op, nested.boolVal) - echo "Finished doc tests" +iterator items*[T](xs: iterator: T): T = + ## Iterates over each element yielded by a closure iterator. This may + ## not seem particularly useful on its own, but this allows closure + ## iterators to be used by the mapIt, filterIt, allIt, anyIt, etc. + ## templates. + for x in xs(): + yield x diff --git a/lib/pure/collections/setimpl.nim b/lib/pure/collections/setimpl.nim new file mode 100644 index 000000000..360a075d6 --- /dev/null +++ b/lib/pure/collections/setimpl.nim @@ -0,0 +1,156 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2019 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# An `include` file for the different hash set implementations. + + +template maxHash(t): untyped = high(t.data) +template dataLen(t): untyped = len(t.data) + +include hashcommon + +template initImpl(s: typed, size: int) = + let correctSize = slotsNeeded(size) + when s is OrderedSet: + s.first = -1 + s.last = -1 + s.counter = 0 + newSeq(s.data, correctSize) + +template rawInsertImpl() {.dirty.} = + if data.len == 0: + initImpl(s, defaultInitialSize) + data[h].key = key + data[h].hcode = hc + +proc rawInsert[A](s: var HashSet[A], data: var KeyValuePairSeq[A], key: A, + hc: Hash, h: Hash) = + rawInsertImpl() + +proc enlarge[A](s: var HashSet[A]) = + var n: KeyValuePairSeq[A] + newSeq(n, len(s.data) * growthFactor) + swap(s.data, n) # n is now old seq + for i in countup(0, high(n)): + if isFilled(n[i].hcode): + var j = -1 - rawGetKnownHC(s, n[i].key, n[i].hcode) + rawInsert(s, s.data, n[i].key, n[i].hcode, j) + +template inclImpl() {.dirty.} = + if s.data.len == 0: + initImpl(s, defaultInitialSize) + var hc: Hash + var index = rawGet(s, key, hc) + if index < 0: + if mustRehash(s): + enlarge(s) + index = rawGetKnownHC(s, key, hc) + rawInsert(s, s.data, key, hc, -1 - index) + inc(s.counter) + +template containsOrInclImpl() {.dirty.} = + if s.data.len == 0: + initImpl(s, defaultInitialSize) + var hc: Hash + var index = rawGet(s, key, hc) + if index >= 0: + result = true + else: + result = false + if mustRehash(s): + enlarge(s) + index = rawGetKnownHC(s, key, hc) + rawInsert(s, s.data, key, hc, -1 - index) + inc(s.counter) + +template doWhile(a, b) = + while true: + b + if not a: break + +proc exclImpl[A](s: var HashSet[A], key: A): bool {.inline.} = + var hc: Hash + var i = rawGet(s, key, hc) + var msk = high(s.data) + result = true + + if i >= 0: + result = false + dec(s.counter) + while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1 + var j = i # The correctness of this depends on (h+1) in nextTry, + var r = j # though may be adaptable to other simple sequences. + s.data[i].hcode = 0 # mark current EMPTY + {.push warning[UnsafeDefault]:off.} + reset(s.data[i].key) + {.pop.} + doWhile((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)): + i = (i + 1) and msk # increment mod table size + if isEmpty(s.data[i].hcode): # end of collision cluster; So all done + return + r = s.data[i].hcode and msk # "home" location of key@i + s.data[j] = move(s.data[i]) # data[i] will be marked EMPTY next loop + +template dollarImpl() {.dirty.} = + result = "{" + for key in items(s): + if result.len > 1: result.add(", ") + result.addQuoted(key) + result.add("}") + + + +# --------------------------- OrderedSet ------------------------------ + +proc rawGet[A](t: OrderedSet[A], key: A, hc: var Hash): int {.inline.} = + rawGetImpl() + +proc rawInsert[A](s: var OrderedSet[A], data: var OrderedKeyValuePairSeq[A], + key: A, hc: Hash, h: Hash) = + rawInsertImpl() + data[h].next = -1 + if s.first < 0: s.first = h + if s.last >= 0: data[s.last].next = h + s.last = h + +proc enlarge[A](s: var OrderedSet[A]) = + var n: OrderedKeyValuePairSeq[A] + newSeq(n, len(s.data) * growthFactor) + var h = s.first + s.first = -1 + s.last = -1 + swap(s.data, n) + while h >= 0: + var nxt = n[h].next + if isFilled(n[h].hcode): + var j = -1 - rawGetKnownHC(s, n[h].key, n[h].hcode) + rawInsert(s, s.data, n[h].key, n[h].hcode, j) + h = nxt + +proc exclImpl[A](s: var OrderedSet[A], key: A): bool {.inline.} = + if len(s.data) == 0: + return true + var n: OrderedKeyValuePairSeq[A] + newSeq(n, len(s.data)) + var h = s.first + s.first = -1 + s.last = -1 + swap(s.data, n) + let hc = genHash(key) + result = true + while h >= 0: + var nxt = n[h].next + if isFilled(n[h].hcode): + if n[h].hcode == hc and n[h].key == key: + dec s.counter + result = false + else: + var j = -1 - rawGetKnownHC(s, n[h].key, n[h].hcode) + rawInsert(s, s.data, n[h].key, n[h].hcode, j) + h = nxt diff --git a/lib/pure/collections/sets.nim b/lib/pure/collections/sets.nim index 4a20d00a4..af13135aa 100644 --- a/lib/pure/collections/sets.nim +++ b/lib/pure/collections/sets.nim @@ -7,94 +7,255 @@ # distribution, for details about the copyright. # -## The ``sets`` module implements an efficient `hash set`:idx: and +## The `sets` module implements an efficient `hash set`:idx: and ## ordered hash set. ## ## Hash sets are different from the `built in set type -## <manual.html#set-type>`_. Sets allow you to store any value that can be +## <manual.html#types-set-type>`_. Sets allow you to store any value that can be ## `hashed <hashes.html>`_ and they don't contain duplicate entries. ## -## **Note**: The data types declared here have *value semantics*: This means -## that ``=`` performs a copy of the set. +## Common usages of sets: +## * removing duplicates from a container by converting it with `toHashSet proc +## <#toHashSet,openArray[A]>`_ (see also `sequtils.deduplicate func +## <sequtils.html#deduplicate,openArray[T],bool>`_) +## * membership testing +## * mathematical operations on two sets, such as +## `union <#union,HashSet[A],HashSet[A]>`_, +## `intersection <#intersection,HashSet[A],HashSet[A]>`_, +## `difference <#difference,HashSet[A],HashSet[A]>`_, and +## `symmetric difference <#symmetricDifference,HashSet[A],HashSet[A]>`_ +## +## **Examples:** +## +## ```Nim +## echo toHashSet([9, 5, 1]) # {9, 1, 5} +## echo toOrderedSet([9, 5, 1]) # {9, 5, 1} +## +## let +## s1 = toHashSet([9, 5, 1]) +## s2 = toHashSet([3, 5, 7]) +## +## echo s1 + s2 # {9, 1, 3, 5, 7} +## echo s1 - s2 # {1, 9} +## echo s1 * s2 # {5} +## echo s1 -+- s2 # {9, 1, 3, 7} +## ``` +## +## Note: The data types declared here have *value semantics*: This means +## that `=` performs a copy of the set. +## +## **See also:** +## * `intsets module <intsets.html>`_ for efficient int sets +## * `tables module <tables.html>`_ for hash tables + import - os, hashes, math + std/[hashes, math] -{.pragma: myShallow.} -when not defined(nimhygiene): - {.pragma: dirty.} +when not defined(nimHasEffectsOf): + {.pragma: effectsOf.} +{.pragma: myShallow.} # For "integer-like A" that are too big for intsets/bit-vectors to be practical, # it would be best to shrink hcode to the same size as the integer. Larger # codes should never be needed, and this can pack more entries per cache-line. # Losing hcode entirely is also possible - if some element value is forbidden. type - KeyValuePair[A] = tuple[hcode: THash, key: A] + KeyValuePair[A] = tuple[hcode: Hash, key: A] KeyValuePairSeq[A] = seq[KeyValuePair[A]] - HashSet* {.myShallow.}[A] = object ## \ + HashSet*[A] {.myShallow.} = object ## \ ## A generic hash set. ## - ## Use `init() <#init,HashSet[A],int>`_ or `initSet[type]() <#initSet>`_ + ## Use `init proc <#init,HashSet[A]>`_ or `initHashSet proc <#initHashSet>`_ ## before calling other procs on it. data: KeyValuePairSeq[A] counter: int -{.deprecated: [TSet: HashSet].} +type + OrderedKeyValuePair[A] = tuple[ + hcode: Hash, next: int, key: A] + OrderedKeyValuePairSeq[A] = seq[OrderedKeyValuePair[A]] + OrderedSet*[A] {.myShallow.} = object ## \ + ## A generic hash set that remembers insertion order. + ## + ## Use `init proc <#init,OrderedSet[A]>`_ or `initOrderedSet proc + ## <#initOrderedSet>`_ before calling other procs on it. + data: OrderedKeyValuePairSeq[A] + counter, first, last: int + SomeSet*[A] = HashSet[A] | OrderedSet[A] + ## Type union representing `HashSet` or `OrderedSet`. + +const + defaultInitialSize* = 64 + +include setimpl -# hcode for real keys cannot be zero. hcode==0 signifies an empty slot. These -# two procs retain clarity of that encoding without the space cost of an enum. -proc isEmpty(hcode: THash): bool {.inline.} = - result = hcode == 0 +# --------------------------------------------------------------------- +# ------------------------------ HashSet ------------------------------ +# --------------------------------------------------------------------- -proc isFilled(hcode: THash): bool {.inline.} = - result = hcode != 0 -proc isValid*[A](s: HashSet[A]): bool = - ## Returns `true` if the set has been initialized with `initSet <#initSet>`_. +proc init*[A](s: var HashSet[A], initialSize = defaultInitialSize) = + ## Initializes a hash set. ## - ## Most operations over an uninitialized set will crash at runtime and - ## `assert <system.html#assert>`_ in debug builds. You can use this proc in - ## your own procs to verify that sets passed to your procs are correctly - ## initialized. Example: + ## Starting from Nim v0.20, sets are initialized by default and it is + ## not necessary to call this function explicitly. ## - ## .. code-block :: - ## proc savePreferences(options: TSet[string]) = - ## assert options.isValid, "Pass an initialized set!" - ## # Do stuff here, may crash in release builds! - result = not s.data.isNil + ## You can call this proc on a previously initialized hash set, which will + ## discard all its values. This might be more convenient than iterating over + ## existing values and calling `excl() <#excl,HashSet[A],A>`_ on them. + ## + ## See also: + ## * `initHashSet proc <#initHashSet>`_ + ## * `toHashSet proc <#toHashSet,openArray[A]>`_ + runnableExamples: + var a: HashSet[int] + init(a) + + initImpl(s, initialSize) + +proc initHashSet*[A](initialSize = defaultInitialSize): HashSet[A] = + ## Wrapper around `init proc <#init,HashSet[A]>`_ for initialization of + ## hash sets. + ## + ## Returns an empty hash set you can assign directly in `var` blocks in a + ## single line. + ## + ## Starting from Nim v0.20, sets are initialized by default and it is + ## not necessary to call this function explicitly. + ## + ## See also: + ## * `toHashSet proc <#toHashSet,openArray[A]>`_ + runnableExamples: + var a = initHashSet[int]() + a.incl(3) + assert len(a) == 1 + result = default(HashSet[A]) + result.init(initialSize) + +proc `[]`*[A](s: var HashSet[A], key: A): var A = + ## Returns the element that is actually stored in `s` which has the same + ## value as `key` or raises the `KeyError` exception. + ## + ## This is useful when one overloaded `hash` and `==` but still needs + ## reference semantics for sharing. + var hc: Hash + var index = rawGet(s, key, hc) + if index >= 0: result = s.data[index].key + else: + when compiles($key): + raise newException(KeyError, "key not found: " & $key) + else: + raise newException(KeyError, "key not found") + +proc contains*[A](s: HashSet[A], key: A): bool = + ## Returns true if `key` is in `s`. + ## + ## This allows the usage of `in` operator. + ## + ## See also: + ## * `incl proc <#incl,HashSet[A],A>`_ + ## * `containsOrIncl proc <#containsOrIncl,HashSet[A],A>`_ + runnableExamples: + var values = initHashSet[int]() + assert(not values.contains(2)) + assert 2 notin values + + values.incl(2) + assert values.contains(2) + assert 2 in values + + var hc: Hash + var index = rawGet(s, key, hc) + result = index >= 0 proc len*[A](s: HashSet[A]): int = - ## Returns the number of keys in `s`. + ## Returns the number of elements in `s`. ## ## Due to an implementation detail you can call this proc on variables which ## have not been initialized yet. The proc will return zero as the length - ## then. Example: - ## - ## .. code-block:: - ## - ## var values: TSet[int] - ## assert(not values.isValid) - ## assert values.len == 0 + ## then. + runnableExamples: + var a: HashSet[string] + assert len(a) == 0 + let s = toHashSet([3, 5, 7]) + assert len(s) == 3 + result = s.counter proc card*[A](s: HashSet[A]): int = - ## Alias for `len() <#len,TSet[A]>`_. + ## Alias for `len() <#len,HashSet[A]>`_. ## ## Card stands for the `cardinality ## <http://en.wikipedia.org/wiki/Cardinality>`_ of a set. result = s.counter +proc incl*[A](s: var HashSet[A], key: A) = + ## Includes an element `key` in `s`. + ## + ## This doesn't do anything if `key` is already in `s`. + ## + ## See also: + ## * `excl proc <#excl,HashSet[A],A>`_ for excluding an element + ## * `incl proc <#incl,HashSet[A],HashSet[A]>`_ for including other set + ## * `containsOrIncl proc <#containsOrIncl,HashSet[A],A>`_ + runnableExamples: + var values = initHashSet[int]() + values.incl(2) + values.incl(2) + assert values.len == 1 + + inclImpl() + +proc incl*[A](s: var HashSet[A], other: HashSet[A]) = + ## Includes all elements from `other` set into `s` (must be declared as `var`). + ## + ## This is the in-place version of `s + other <#+,HashSet[A],HashSet[A]>`_. + ## + ## See also: + ## * `excl proc <#excl,HashSet[A],HashSet[A]>`_ for excluding other set + ## * `incl proc <#incl,HashSet[A],A>`_ for including an element + ## * `containsOrIncl proc <#containsOrIncl,HashSet[A],A>`_ + runnableExamples: + var + values = toHashSet([1, 2, 3]) + others = toHashSet([3, 4, 5]) + values.incl(others) + assert values.len == 5 + + for item in other: incl(s, item) + +proc toHashSet*[A](keys: openArray[A]): HashSet[A] = + ## Creates a new hash set that contains the members of the given + ## collection (seq, array, or string) `keys`. + ## + ## Duplicates are removed. + ## + ## See also: + ## * `initHashSet proc <#initHashSet>`_ + runnableExamples: + let + a = toHashSet([5, 3, 2]) + b = toHashSet("abracadabra") + assert len(a) == 3 + ## a == {2, 3, 5} + assert len(b) == 5 + ## b == {'a', 'b', 'c', 'd', 'r'} + + result = initHashSet[A](keys.len) + for key in items(keys): result.incl(key) + iterator items*[A](s: HashSet[A]): A = - ## Iterates over keys in the set `s`. + ## Iterates over elements of the set `s`. ## - ## If you need a sequence with the keys you can use `sequtils.toSeq() - ## <sequtils.html#toSeq>`_ on the iterator. Usage example: + ## If you need a sequence with the elements you can use `sequtils.toSeq + ## template <sequtils.html#toSeq.t,untyped>`_. ## - ## .. code-block:: + ## ```Nim ## type ## pair = tuple[a, b: int] ## var - ## a, b = initSet[pair]() + ## a, b = initHashSet[pair]() ## a.incl((2, 3)) ## a.incl((3, 2)) ## a.incl((2, 3)) @@ -103,334 +264,202 @@ iterator items*[A](s: HashSet[A]): A = ## assert a.len == 2 ## echo b ## # --> {(a: 1, b: 3), (a: 0, b: 4)} - assert s.isValid, "The set needs to be initialized." - for h in 0..high(s.data): - if isFilled(s.data[h].hcode): yield s.data[h].key - -const - growthFactor = 2 - -proc mustRehash(length, counter: int): bool {.inline.} = - assert(length > counter) - result = (length * 2 < counter * 3) or (length - counter < 4) - -proc rightSize*(count: int): int {.inline.} = - ## Return the value of `initialSize` to support `count` items. - ## - ## If more items are expected to be added, simply add that - ## expected extra amount to the parameter before calling this. - ## - ## Internally, we want mustRehash(rightSize(x), x) == false. - result = nextPowerOfTwo(count * 3 div 2 + 4) - -proc nextTry(h, maxHash: THash): THash {.inline.} = - result = (h + 1) and maxHash - -template rawGetKnownHCImpl() {.dirty.} = - var h: THash = hc and high(s.data) # start with real hash value - while isFilled(s.data[h].hcode): - # Compare hc THEN key with boolean short circuit. This makes the common case - # zero ==key's for missing (e.g.inserts) and exactly one ==key for present. - # It does slow down succeeding lookups by one extra THash cmp&and..usually - # just a few clock cycles, generally worth it for any non-integer-like A. - if s.data[h].hcode == hc and s.data[h].key == key: # compare hc THEN key - return h - h = nextTry(h, high(s.data)) - result = -1 - h # < 0 => MISSING; insert idx = -1 - result - -template rawGetImpl() {.dirty.} = - hc = hash(key) - if hc == 0: # This almost never taken branch should be very predictable. - hc = 314159265 # Value doesn't matter; Any non-zero favorite is fine. - rawGetKnownHCImpl() - -template rawInsertImpl() {.dirty.} = - data[h].key = key - data[h].hcode = hc - -proc rawGetKnownHC[A](s: HashSet[A], key: A, hc: THash): int {.inline.} = - rawGetKnownHCImpl() - -proc rawGet[A](s: HashSet[A], key: A, hc: var THash): int {.inline.} = - rawGetImpl() - -proc mget*[A](s: var HashSet[A], key: A): var A = - ## returns the element that is actually stored in 's' which has the same - ## value as 'key' or raises the ``EInvalidKey`` exception. This is useful - ## when one overloaded 'hash' and '==' but still needs reference semantics - ## for sharing. - assert s.isValid, "The set needs to be initialized." - var hc: THash - var index = rawGet(s, key, hc) - if index >= 0: result = s.data[index].key - else: raise newException(KeyError, "key not found: " & $key) + ## ``` + let length = s.len + for h in 0 .. high(s.data): + if isFilled(s.data[h].hcode): + yield s.data[h].key + assert(len(s) == length, "the length of the HashSet changed while iterating over it") -proc contains*[A](s: HashSet[A], key: A): bool = - ## Returns true iff `key` is in `s`. - ## - ## Example: - ## - ## .. code-block:: - ## var values = initSet[int]() - ## assert(not values.contains(2)) - ## values.incl(2) - ## assert values.contains(2) - ## values.excl(2) - ## assert(not values.contains(2)) - assert s.isValid, "The set needs to be initialized." - var hc: THash - var index = rawGet(s, key, hc) - result = index >= 0 - -proc rawInsert[A](s: var HashSet[A], data: var KeyValuePairSeq[A], key: A, - hc: THash, h: THash) = - rawInsertImpl() - -proc enlarge[A](s: var HashSet[A]) = - var n: KeyValuePairSeq[A] - newSeq(n, len(s.data) * growthFactor) - swap(s.data, n) # n is now old seq - for i in countup(0, high(n)): - if isFilled(n[i].hcode): - var j = -1 - rawGetKnownHC(s, n[i].key, n[i].hcode) - rawInsert(s, s.data, n[i].key, n[i].hcode, j) - -template inclImpl() {.dirty.} = - var hc: THash - var index = rawGet(s, key, hc) - if index < 0: - if mustRehash(len(s.data), s.counter): - enlarge(s) - index = rawGetKnownHC(s, key, hc) - rawInsert(s, s.data, key, hc, -1 - index) - inc(s.counter) - -template containsOrInclImpl() {.dirty.} = - var hc: THash - var index = rawGet(s, key, hc) - if index >= 0: - result = true - else: - if mustRehash(len(s.data), s.counter): - enlarge(s) - index = rawGetKnownHC(s, key, hc) - rawInsert(s, s.data, key, hc, -1 - index) - inc(s.counter) - -proc incl*[A](s: var HashSet[A], key: A) = - ## Includes an element `key` in `s`. - ## - ## This doesn't do anything if `key` is already in `s`. Example: - ## - ## .. code-block:: - ## var values = initSet[int]() - ## values.incl(2) - ## values.incl(2) - ## assert values.len == 1 - assert s.isValid, "The set needs to be initialized." - inclImpl() - -proc incl*[A](s: var HashSet[A], other: HashSet[A]) = - ## Includes all elements from `other` into `s`. - ## - ## Example: - ## - ## .. code-block:: - ## var values = initSet[int]() - ## values.incl(2) - ## var others = toSet([6, 7]) - ## values.incl(others) - ## assert values.len == 3 - assert s.isValid, "The set `s` needs to be initialized." - assert other.isValid, "The set `other` needs to be initialized." - for item in other: incl(s, item) +proc containsOrIncl*[A](s: var HashSet[A], key: A): bool = + ## Includes `key` in the set `s` and tells if `key` was already in `s`. + ## + ## The difference with regards to the `incl proc <#incl,HashSet[A],A>`_ is + ## that this proc returns `true` if `s` already contained `key`. The + ## proc will return `false` if `key` was added as a new value to `s` during + ## this call. + ## + ## See also: + ## * `incl proc <#incl,HashSet[A],A>`_ for including an element + ## * `incl proc <#incl,HashSet[A],HashSet[A]>`_ for including other set + ## * `missingOrExcl proc <#missingOrExcl,HashSet[A],A>`_ + runnableExamples: + var values = initHashSet[int]() + assert values.containsOrIncl(2) == false + assert values.containsOrIncl(2) == true + assert values.containsOrIncl(3) == false -template doWhile(a: expr, b: stmt): stmt = - while true: - b - if not a: break + containsOrInclImpl() proc excl*[A](s: var HashSet[A], key: A) = ## Excludes `key` from the set `s`. ## - ## This doesn't do anything if `key` is not found in `s`. Example: - ## - ## .. code-block:: - ## var s = toSet([2, 3, 6, 7]) - ## s.excl(2) - ## s.excl(2) - ## assert s.len == 3 - assert s.isValid, "The set needs to be initialized." - var hc: THash - var i = rawGet(s, key, hc) - var msk = high(s.data) - if i >= 0: - s.data[i].hcode = 0 - dec(s.counter) - while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1 - var j = i # The correctness of this depends on (h+1) in nextTry, - var r = j # though may be adaptable to other simple sequences. - s.data[i].hcode = 0 # mark current EMPTY - doWhile ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)): - i = (i + 1) and msk # increment mod table size - if isEmpty(s.data[i].hcode): # end of collision cluster; So all done - return - r = s.data[i].hcode and msk # "home" location of key@i - shallowCopy(s.data[j], s.data[i]) # data[j] will be marked EMPTY next loop - -proc excl*[A](s: var HashSet[A], other: HashSet[A]) = - ## Excludes everything in `other` from `s`. - ## - ## Example: - ## - ## .. code-block:: - ## var - ## numbers = toSet([1, 2, 3, 4, 5]) - ## even = toSet([2, 4, 6, 8]) - ## numbers.excl(even) - ## echo numbers - ## # --> {1, 3, 5} - assert s.isValid, "The set `s` needs to be initialized." - assert other.isValid, "The set `other` needs to be initialized." - for item in other: excl(s, item) - -proc containsOrIncl*[A](s: var HashSet[A], key: A): bool = - ## Includes `key` in the set `s` and tells if `key` was added to `s`. - ## - ## The difference with regards to the `incl() <#incl,TSet[A],A>`_ proc is - ## that this proc returns `true` if `key` was already present in `s`. The - ## proc will return false if `key` was added as a new value to `s` during - ## this call. Example: + ## This doesn't do anything if `key` is not found in `s`. ## - ## .. code-block:: - ## var values = initSet[int]() - ## assert values.containsOrIncl(2) == false - ## assert values.containsOrIncl(2) == true - assert s.isValid, "The set needs to be initialized." - containsOrInclImpl() - -proc init*[A](s: var HashSet[A], initialSize=64) = - ## Initializes a hash set. - ## - ## The `initialSize` parameter needs to be a power of two. You can use - ## `math.nextPowerOfTwo() <math.html#nextPowerOfTwo>`_ or `rightSize` to - ## guarantee that at runtime. All set variables must be initialized before - ## use with other procs from this module with the exception of `isValid() - ## <#isValid,TSet[A]>`_ and `len() <#len,TSet[A]>`_. - ## - ## You can call this proc on a previously initialized hash set, which will - ## discard all its values. This might be more convenient than iterating over - ## existing values and calling `excl() <#excl,TSet[A],A>`_ on them. Example: - ## - ## .. code-block :: - ## var a: TSet[int] - ## a.init(4) - ## a.incl(2) - ## a.init - ## assert a.len == 0 and a.isValid - assert isPowerOfTwo(initialSize) - s.counter = 0 - newSeq(s.data, initialSize) + ## See also: + ## * `incl proc <#incl,HashSet[A],A>`_ for including an element + ## * `excl proc <#excl,HashSet[A],HashSet[A]>`_ for excluding other set + ## * `missingOrExcl proc <#missingOrExcl,HashSet[A],A>`_ + runnableExamples: + var s = toHashSet([2, 3, 6, 7]) + s.excl(2) + s.excl(2) + assert s.len == 3 -proc initSet*[A](initialSize=64): HashSet[A] = - ## Wrapper around `init() <#init,TSet[A],int>`_ for initialization of hash - ## sets. - ## - ## Returns an empty hash set you can assign directly in ``var`` blocks in a - ## single line. Example: - ## - ## .. code-block :: - ## var a = initSet[int](4) - ## a.incl(2) - result.init(initialSize) + discard exclImpl(s, key) -proc toSet*[A](keys: openArray[A]): HashSet[A] = - ## Creates a new hash set that contains the given `keys`. +proc excl*[A](s: var HashSet[A], other: HashSet[A]) = + ## Excludes all elements of `other` set from `s`. ## - ## Example: + ## This is the in-place version of `s - other <#-,HashSet[A],HashSet[A]>`_. ## - ## .. code-block:: - ## var numbers = toSet([1, 2, 3, 4, 5]) - ## assert numbers.contains(2) - ## assert numbers.contains(4) - result = initSet[A](rightSize(keys.len)) - for key in items(keys): result.incl(key) + ## See also: + ## * `incl proc <#incl,HashSet[A],HashSet[A]>`_ for including other set + ## * `excl proc <#excl,HashSet[A],A>`_ for excluding an element + ## * `missingOrExcl proc <#missingOrExcl,HashSet[A],A>`_ + runnableExamples: + var + numbers = toHashSet([1, 2, 3, 4, 5]) + even = toHashSet([2, 4, 6, 8]) + numbers.excl(even) + assert len(numbers) == 3 + ## numbers == {1, 3, 5} + + for item in other: discard exclImpl(s, item) + +proc missingOrExcl*[A](s: var HashSet[A], key: A): bool = + ## Excludes `key` in the set `s` and tells if `key` was already missing from `s`. + ## + ## The difference with regards to the `excl proc <#excl,HashSet[A],A>`_ is + ## that this proc returns `true` if `key` was missing from `s`. + ## The proc will return `false` if `key` was in `s` and it was removed + ## during this call. + ## + ## See also: + ## * `excl proc <#excl,HashSet[A],A>`_ for excluding an element + ## * `excl proc <#excl,HashSet[A],HashSet[A]>`_ for excluding other set + ## * `containsOrIncl proc <#containsOrIncl,HashSet[A],A>`_ + runnableExamples: + var s = toHashSet([2, 3, 6, 7]) + assert s.missingOrExcl(4) == true + assert s.missingOrExcl(6) == false + assert s.missingOrExcl(6) == true + + exclImpl(s, key) + +proc pop*[A](s: var HashSet[A]): A = + ## Removes and returns an arbitrary element from the set `s`. + ## + ## Raises `KeyError` if the set `s` is empty. + ## + ## See also: + ## * `clear proc <#clear,HashSet[A]>`_ + runnableExamples: + var s = toHashSet([2, 1]) + assert [s.pop, s.pop] in [[1, 2], [2,1]] # order unspecified + doAssertRaises(KeyError, echo s.pop) + + for h in 0 .. high(s.data): + if isFilled(s.data[h].hcode): + result = s.data[h].key + excl(s, result) + return result + raise newException(KeyError, "set is empty") + +proc clear*[A](s: var HashSet[A]) = + ## Clears the HashSet back to an empty state, without shrinking + ## any of the existing storage. + ## + ## `O(n)` operation, where `n` is the size of the hash bucket. + ## + ## See also: + ## * `pop proc <#pop,HashSet[A]>`_ + runnableExamples: + var s = toHashSet([3, 5, 7]) + clear(s) + assert len(s) == 0 -template dollarImpl(): stmt {.dirty.} = - result = "{" - for key in items(s): - if result.len > 1: result.add(", ") - result.add($key) - result.add("}") + s.counter = 0 + for i in 0 ..< s.data.len: + s.data[i].hcode = 0 + {.push warning[UnsafeDefault]:off.} + reset(s.data[i].key) + {.pop.} -proc `$`*[A](s: HashSet[A]): string = - ## Converts the set `s` to a string, mostly for logging purposes. - ## - ## Don't use this proc for serialization, the representation may change at - ## any moment and values are not escaped. Example: - ## - ## Example: - ## - ## .. code-block:: - ## echo toSet([2, 4, 5]) - ## # --> {2, 4, 5} - ## echo toSet(["no", "esc'aping", "is \" provided"]) - ## # --> {no, esc'aping, is " provided} - assert s.isValid, "The set needs to be initialized." - dollarImpl() proc union*[A](s1, s2: HashSet[A]): HashSet[A] = ## Returns the union of the sets `s1` and `s2`. ## - ## The union of two sets is represented mathematically as *A ∪ B* and is the - ## set of all objects that are members of `s1`, `s2` or both. Example: + ## The same as `s1 + s2 <#+,HashSet[A],HashSet[A]>`_. ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## c = union(a, b) - ## assert c == toSet(["a", "b", "c"]) - assert s1.isValid, "The set `s1` needs to be initialized." - assert s2.isValid, "The set `s2` needs to be initialized." + ## The union of two sets is represented mathematically as *A ∪ B* and is the + ## set of all objects that are members of `s1`, `s2` or both. + ## + ## See also: + ## * `intersection proc <#intersection,HashSet[A],HashSet[A]>`_ + ## * `difference proc <#difference,HashSet[A],HashSet[A]>`_ + ## * `symmetricDifference proc <#symmetricDifference,HashSet[A],HashSet[A]>`_ + runnableExamples: + let + a = toHashSet(["a", "b"]) + b = toHashSet(["b", "c"]) + c = union(a, b) + assert c == toHashSet(["a", "b", "c"]) + result = s1 incl(result, s2) proc intersection*[A](s1, s2: HashSet[A]): HashSet[A] = ## Returns the intersection of the sets `s1` and `s2`. ## + ## The same as `s1 * s2 <#*,HashSet[A],HashSet[A]>`_. + ## ## The intersection of two sets is represented mathematically as *A ∩ B* and ## is the set of all objects that are members of `s1` and `s2` at the same - ## time. Example: - ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## c = intersection(a, b) - ## assert c == toSet(["b"]) - assert s1.isValid, "The set `s1` needs to be initialized." - assert s2.isValid, "The set `s2` needs to be initialized." - result = initSet[A](min(s1.data.len, s2.data.len)) - for item in s1: - if item in s2: incl(result, item) + ## time. + ## + ## See also: + ## * `union proc <#union,HashSet[A],HashSet[A]>`_ + ## * `difference proc <#difference,HashSet[A],HashSet[A]>`_ + ## * `symmetricDifference proc <#symmetricDifference,HashSet[A],HashSet[A]>`_ + runnableExamples: + let + a = toHashSet(["a", "b"]) + b = toHashSet(["b", "c"]) + c = intersection(a, b) + assert c == toHashSet(["b"]) + + result = initHashSet[A](max(min(s1.data.len, s2.data.len), 2)) + + # iterate over the elements of the smaller set + if s1.data.len < s2.data.len: + for item in s1: + if item in s2: incl(result, item) + else: + for item in s2: + if item in s1: incl(result, item) + proc difference*[A](s1, s2: HashSet[A]): HashSet[A] = ## Returns the difference of the sets `s1` and `s2`. ## - ## The difference of two sets is represented mathematically as *A \ B* and is + ## The same as `s1 - s2 <#-,HashSet[A],HashSet[A]>`_. + ## + ## The difference of two sets is represented mathematically as *A ∖ B* and is ## the set of all objects that are members of `s1` and not members of `s2`. - ## Example: ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## c = difference(a, b) - ## assert c == toSet(["a"]) - assert s1.isValid, "The set `s1` needs to be initialized." - assert s2.isValid, "The set `s2` needs to be initialized." - result = initSet[A]() + ## See also: + ## * `union proc <#union,HashSet[A],HashSet[A]>`_ + ## * `intersection proc <#intersection,HashSet[A],HashSet[A]>`_ + ## * `symmetricDifference proc <#symmetricDifference,HashSet[A],HashSet[A]>`_ + runnableExamples: + let + a = toHashSet(["a", "b"]) + b = toHashSet(["b", "c"]) + c = difference(a, b) + assert c == toHashSet(["a"]) + + result = initHashSet[A]() for item in s1: if not contains(s2, item): incl(result, item) @@ -438,51 +467,53 @@ proc difference*[A](s1, s2: HashSet[A]): HashSet[A] = proc symmetricDifference*[A](s1, s2: HashSet[A]): HashSet[A] = ## Returns the symmetric difference of the sets `s1` and `s2`. ## + ## The same as `s1 -+- s2 <#-+-,HashSet[A],HashSet[A]>`_. + ## ## The symmetric difference of two sets is represented mathematically as *A △ ## B* or *A ⊖ B* and is the set of all objects that are members of `s1` or - ## `s2` but not both at the same time. Example: - ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## c = symmetricDifference(a, b) - ## assert c == toSet(["a", "c"]) - assert s1.isValid, "The set `s1` needs to be initialized." - assert s2.isValid, "The set `s2` needs to be initialized." + ## `s2` but not both at the same time. + ## + ## See also: + ## * `union proc <#union,HashSet[A],HashSet[A]>`_ + ## * `intersection proc <#intersection,HashSet[A],HashSet[A]>`_ + ## * `difference proc <#difference,HashSet[A],HashSet[A]>`_ + runnableExamples: + let + a = toHashSet(["a", "b"]) + b = toHashSet(["b", "c"]) + c = symmetricDifference(a, b) + assert c == toHashSet(["a", "c"]) + result = s1 for item in s2: if containsOrIncl(result, item): excl(result, item) proc `+`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} = - ## Alias for `union(s1, s2) <#union>`_. + ## Alias for `union(s1, s2) <#union,HashSet[A],HashSet[A]>`_. result = union(s1, s2) proc `*`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} = - ## Alias for `intersection(s1, s2) <#intersection>`_. + ## Alias for `intersection(s1, s2) <#intersection,HashSet[A],HashSet[A]>`_. result = intersection(s1, s2) proc `-`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} = - ## Alias for `difference(s1, s2) <#difference>`_. + ## Alias for `difference(s1, s2) <#difference,HashSet[A],HashSet[A]>`_. result = difference(s1, s2) proc `-+-`*[A](s1, s2: HashSet[A]): HashSet[A] {.inline.} = - ## Alias for `symmetricDifference(s1, s2) <#symmetricDifference>`_. + ## Alias for `symmetricDifference(s1, s2) + ## <#symmetricDifference,HashSet[A],HashSet[A]>`_. result = symmetricDifference(s1, s2) proc disjoint*[A](s1, s2: HashSet[A]): bool = - ## Returns true iff the sets `s1` and `s2` have no items in common. - ## - ## Example: - ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## assert disjoint(a, b) == false - ## assert disjoint(a, b - a) == true - assert s1.isValid, "The set `s1` needs to be initialized." - assert s2.isValid, "The set `s2` needs to be initialized." + ## Returns `true` if the sets `s1` and `s2` have no items in common. + runnableExamples: + let + a = toHashSet(["a", "b"]) + b = toHashSet(["b", "c"]) + assert disjoint(a, b) == false + assert disjoint(a, b - a) == true + for item in s1: if item in s2: return false return true @@ -491,306 +522,344 @@ proc `<`*[A](s, t: HashSet[A]): bool = ## Returns true if `s` is a strict or proper subset of `t`. ## ## A strict or proper subset `s` has all of its members in `t` but `t` has - ## more elements than `s`. Example: - ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## c = intersection(a, b) - ## assert c < a and c < b - ## assert((a < a) == false) + ## more elements than `s`. + runnableExamples: + let + a = toHashSet(["a", "b"]) + b = toHashSet(["b", "c"]) + c = intersection(a, b) + assert c < a and c < b + assert(not (a < a)) + s.counter != t.counter and s <= t proc `<=`*[A](s, t: HashSet[A]): bool = - ## Returns true if `s` is subset of `t`. + ## Returns true if `s` is a subset of `t`. ## ## A subset `s` has all of its members in `t` and `t` doesn't necessarily - ## have more members than `s`. That is, `s` can be equal to `t`. Example: - ## - ## .. code-block:: - ## var - ## a = toSet(["a", "b"]) - ## b = toSet(["b", "c"]) - ## c = intersection(a, b) - ## assert c <= a and c <= b - ## assert((a <= a)) + ## have more members than `s`. That is, `s` can be equal to `t`. + runnableExamples: + let + a = toHashSet(["a", "b"]) + b = toHashSet(["b", "c"]) + c = intersection(a, b) + assert c <= a and c <= b + assert a <= a + result = false if s.counter > t.counter: return result = true - for item in s: + for item in items(s): if not(t.contains(item)): result = false return proc `==`*[A](s, t: HashSet[A]): bool = ## Returns true if both `s` and `t` have the same members and set size. - ## - ## Example: - ## - ## .. code-block:: - ## var - ## a = toSet([1, 2]) - ## b = toSet([1]) - ## b.incl(2) - ## assert a == b + runnableExamples: + var + a = toHashSet([1, 2]) + b = toHashSet([2, 1]) + assert a == b + s.counter == t.counter and s <= t -proc map*[A, B](data: HashSet[A], op: proc (x: A): B {.closure.}): HashSet[B] = - ## Returns a new set after applying `op` on each of the elements of `data`. - ## - ## You can use this proc to transform the elements from a set. Example: +proc map*[A, B](data: HashSet[A], op: proc (x: A): B {.closure.}): HashSet[B] {.effectsOf: op.} = + ## Returns a new set after applying `op` proc on each of the elements of + ##`data` set. ## - ## .. code-block:: - ## var a = toSet([1, 2, 3]) - ## var b = a.map(proc (x: int): string = $x) - ## assert b == toSet(["1", "2", "3"]) - result = initSet[B]() - for item in data: result.incl(op(item)) + ## You can use this proc to transform the elements from a set. + runnableExamples: + let + a = toHashSet([1, 2, 3]) + b = a.map(proc (x: int): string = $x) + assert b == toHashSet(["1", "2", "3"]) -# ------------------------------ ordered set ------------------------------ - -type - OrderedKeyValuePair[A] = tuple[ - hcode: THash, next: int, key: A] - OrderedKeyValuePairSeq[A] = seq[OrderedKeyValuePair[A]] - OrderedSet* {.myShallow.}[A] = object ## \ - ## A generic hash set that remembers insertion order. - ## - ## Use `init() <#init,OrderedSet[A],int>`_ or `initOrderedSet[type]() - ## <#initOrderedSet>`_ before calling other procs on it. - data: OrderedKeyValuePairSeq[A] - counter, first, last: int + result = initHashSet[B]() + for item in items(data): result.incl(op(item)) -{.deprecated: [TOrderedSet: OrderedSet].} +proc hash*[A](s: HashSet[A]): Hash = + ## Hashing of HashSet. + for h in 0 .. high(s.data): + result = result xor s.data[h].hcode + result = !$result -proc isValid*[A](s: OrderedSet[A]): bool = - ## Returns `true` if the ordered set has been initialized with `initSet - ## <#initOrderedSet>`_. +proc `$`*[A](s: HashSet[A]): string = + ## Converts the set `s` to a string, mostly for logging and printing purposes. ## - ## Most operations over an uninitialized ordered set will crash at runtime - ## and `assert <system.html#assert>`_ in debug builds. You can use this proc - ## in your own procs to verify that ordered sets passed to your procs are - ## correctly initialized. Example: + ## Don't use this proc for serialization, the representation may change at + ## any moment and values are not escaped. ## - ## .. code-block:: - ## proc saveTarotCards(cards: TOrderedSet[int]) = - ## assert cards.isValid, "Pass an initialized set!" - ## # Do stuff here, may crash in release builds! - result = not s.data.isNil + ## **Examples:** + ## ```Nim + ## echo toHashSet([2, 4, 5]) + ## # --> {2, 4, 5} + ## echo toHashSet(["no", "esc'aping", "is \" provided"]) + ## # --> {no, esc'aping, is " provided} + ## ``` + dollarImpl() -proc len*[A](s: OrderedSet[A]): int {.inline.} = - ## Returns the number of keys in `s`. + +proc initSet*[A](initialSize = defaultInitialSize): HashSet[A] {.deprecated: + "Deprecated since v0.20, use 'initHashSet'".} = initHashSet[A](initialSize) + +proc toSet*[A](keys: openArray[A]): HashSet[A] {.deprecated: + "Deprecated since v0.20, use 'toHashSet'".} = toHashSet[A](keys) + +proc isValid*[A](s: HashSet[A]): bool {.deprecated: + "Deprecated since v0.20; sets are initialized by default".} = + ## Returns `true` if the set has been initialized (with `initHashSet proc + ## <#initHashSet>`_ or `init proc <#init,HashSet[A]>`_). ## - ## Due to an implementation detail you can call this proc on variables which - ## have not been initialized yet. The proc will return zero as the length - ## then. Example: + runnableExamples: + proc savePreferences(options: HashSet[string]) = + assert options.isValid, "Pass an initialized set!" + # Do stuff here, may crash in release builds! + result = s.data.len > 0 + + + +# --------------------------------------------------------------------- +# --------------------------- OrderedSet ------------------------------ +# --------------------------------------------------------------------- + +template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} = + if s.data.len > 0: + var h = s.first + var idx = 0 + while h >= 0: + var nxt = s.data[h].next + if isFilled(s.data[h].hcode): + yieldStmt + inc(idx) + h = nxt + + +proc init*[A](s: var OrderedSet[A], initialSize = defaultInitialSize) = + ## Initializes an ordered hash set. ## - ## .. code-block:: + ## Starting from Nim v0.20, sets are initialized by default and it is + ## not necessary to call this function explicitly. ## - ## var values: TOrderedSet[int] - ## assert(not values.isValid) - ## assert values.len == 0 - result = s.counter - -proc card*[A](s: OrderedSet[A]): int {.inline.} = - ## Alias for `len() <#len,TOrderedSet[A]>`_. + ## You can call this proc on a previously initialized hash set, which will + ## discard all its values. This might be more convenient than iterating over + ## existing values and calling `excl() <#excl,HashSet[A],A>`_ on them. ## - ## Card stands for the `cardinality - ## <http://en.wikipedia.org/wiki/Cardinality>`_ of a set. - result = s.counter + ## See also: + ## * `initOrderedSet proc <#initOrderedSet>`_ + ## * `toOrderedSet proc <#toOrderedSet,openArray[A]>`_ + runnableExamples: + var a: OrderedSet[int] + init(a) -template forAllOrderedPairs(yieldStmt: stmt) {.dirty, immediate.} = - var h = s.first - while h >= 0: - var nxt = s.data[h].next - if isFilled(s.data[h].hcode): yieldStmt - h = nxt + initImpl(s, initialSize) -iterator items*[A](s: OrderedSet[A]): A = - ## Iterates over keys in the ordered set `s` in insertion order. +proc initOrderedSet*[A](initialSize = defaultInitialSize): OrderedSet[A] = + ## Wrapper around `init proc <#init,OrderedSet[A]>`_ for initialization of + ## ordered hash sets. ## - ## If you need a sequence with the keys you can use `sequtils.toSeq() - ## <sequtils.html#toSeq>`_ on the iterator. Usage example: + ## Returns an empty ordered hash set you can assign directly in `var` blocks + ## in a single line. ## - ## .. code-block:: - ## var a = initOrderedSet[int]() - ## for value in [9, 2, 1, 5, 1, 8, 4, 2]: - ## a.incl(value) - ## for value in a.items: - ## echo "Got ", value - ## # --> Got 9 - ## # --> Got 2 - ## # --> Got 1 - ## # --> Got 5 - ## # --> Got 8 - ## # --> Got 4 - assert s.isValid, "The set needs to be initialized." - forAllOrderedPairs: - yield s.data[h].key + ## Starting from Nim v0.20, sets are initialized by default and it is + ## not necessary to call this function explicitly. + ## + ## See also: + ## * `toOrderedSet proc <#toOrderedSet,openArray[A]>`_ + runnableExamples: + var a = initOrderedSet[int]() + a.incl(3) + assert len(a) == 1 -proc rawGetKnownHC[A](s: OrderedSet[A], key: A, hc: THash): int {.inline.} = - rawGetKnownHCImpl() + result.init(initialSize) -proc rawGet[A](s: OrderedSet[A], key: A, hc: var THash): int {.inline.} = - rawGetImpl() +proc toOrderedSet*[A](keys: openArray[A]): OrderedSet[A] = + ## Creates a new hash set that contains the members of the given + ## collection (seq, array, or string) `keys`. + ## + ## Duplicates are removed. + ## + ## See also: + ## * `initOrderedSet proc <#initOrderedSet>`_ + runnableExamples: + let + a = toOrderedSet([5, 3, 2]) + b = toOrderedSet("abracadabra") + assert len(a) == 3 + ## a == {5, 3, 2} # different than in HashSet + assert len(b) == 5 + ## b == {'a', 'b', 'r', 'c', 'd'} # different than in HashSet + + result = initOrderedSet[A](keys.len) + for key in items(keys): result.incl(key) proc contains*[A](s: OrderedSet[A], key: A): bool = - ## Returns true iff `key` is in `s`. + ## Returns true if `key` is in `s`. ## - ## Example: + ## This allows the usage of `in` operator. ## - ## .. code-block:: - ## var values = initOrderedSet[int]() - ## assert(not values.contains(2)) - ## values.incl(2) - ## assert values.contains(2) - assert s.isValid, "The set needs to be initialized." - var hc: THash + ## See also: + ## * `incl proc <#incl,OrderedSet[A],A>`_ + ## * `containsOrIncl proc <#containsOrIncl,OrderedSet[A],A>`_ + runnableExamples: + var values = initOrderedSet[int]() + assert(not values.contains(2)) + assert 2 notin values + + values.incl(2) + assert values.contains(2) + assert 2 in values + + var hc: Hash var index = rawGet(s, key, hc) result = index >= 0 -proc rawInsert[A](s: var OrderedSet[A], data: var OrderedKeyValuePairSeq[A], - key: A, hc: THash, h: THash) = - rawInsertImpl() - data[h].next = -1 - if s.first < 0: s.first = h - if s.last >= 0: data[s.last].next = h - s.last = h - -proc enlarge[A](s: var OrderedSet[A]) = - var n: OrderedKeyValuePairSeq[A] - newSeq(n, len(s.data) * growthFactor) - var h = s.first - s.first = -1 - s.last = -1 - swap(s.data, n) - while h >= 0: - var nxt = n[h].next - if isFilled(n[h].hcode): - var j = -1 - rawGetKnownHC(s, n[h].key, n[h].hcode) - rawInsert(s, s.data, n[h].key, n[h].hcode, j) - h = nxt - proc incl*[A](s: var OrderedSet[A], key: A) = ## Includes an element `key` in `s`. ## - ## This doesn't do anything if `key` is already in `s`. Example: + ## This doesn't do anything if `key` is already in `s`. ## - ## .. code-block:: - ## var values = initOrderedSet[int]() - ## values.incl(2) - ## values.incl(2) - ## assert values.len == 1 - assert s.isValid, "The set needs to be initialized." + ## See also: + ## * `excl proc <#excl,OrderedSet[A],A>`_ for excluding an element + ## * `incl proc <#incl,HashSet[A],OrderedSet[A]>`_ for including other set + ## * `containsOrIncl proc <#containsOrIncl,OrderedSet[A],A>`_ + runnableExamples: + var values = initOrderedSet[int]() + values.incl(2) + values.incl(2) + assert values.len == 1 + inclImpl() proc incl*[A](s: var HashSet[A], other: OrderedSet[A]) = - ## Includes all elements from `other` into `s`. - ## - ## Example: - ## - ## .. code-block:: - ## var values = initOrderedSet[int]() - ## values.incl(2) - ## var others = toOrderedSet([6, 7]) - ## values.incl(others) - ## assert values.len == 3 - assert s.isValid, "The set `s` needs to be initialized." - assert other.isValid, "The set `other` needs to be initialized." - for item in other: incl(s, item) + ## Includes all elements from the OrderedSet `other` into + ## HashSet `s` (must be declared as `var`). + ## + ## See also: + ## * `incl proc <#incl,OrderedSet[A],A>`_ for including an element + ## * `containsOrIncl proc <#containsOrIncl,OrderedSet[A],A>`_ + runnableExamples: + var + values = toHashSet([1, 2, 3]) + others = toOrderedSet([3, 4, 5]) + values.incl(others) + assert values.len == 5 + + for item in items(other): incl(s, item) proc containsOrIncl*[A](s: var OrderedSet[A], key: A): bool = - ## Includes `key` in the set `s` and tells if `key` was added to `s`. + ## Includes `key` in the set `s` and tells if `key` was already in `s`. ## - ## The difference with regards to the `incl() <#incl,TOrderedSet[A],A>`_ proc - ## is that this proc returns `true` if `key` was already present in `s`. The + ## The difference with regards to the `incl proc <#incl,OrderedSet[A],A>`_ is + ## that this proc returns `true` if `s` already contained `key`. The ## proc will return false if `key` was added as a new value to `s` during - ## this call. Example: + ## this call. ## - ## .. code-block:: - ## var values = initOrderedSet[int]() - ## assert values.containsOrIncl(2) == false - ## assert values.containsOrIncl(2) == true - assert s.isValid, "The set needs to be initialized." - containsOrInclImpl() + ## See also: + ## * `incl proc <#incl,OrderedSet[A],A>`_ for including an element + ## * `missingOrExcl proc <#missingOrExcl,OrderedSet[A],A>`_ + runnableExamples: + var values = initOrderedSet[int]() + assert values.containsOrIncl(2) == false + assert values.containsOrIncl(2) == true + assert values.containsOrIncl(3) == false -proc init*[A](s: var OrderedSet[A], initialSize=64) = - ## Initializes an ordered hash set. - ## - ## The `initialSize` parameter needs to be a power of two. You can use - ## `math.nextPowerOfTwo() <math.html#nextPowerOfTwo>`_ or `rightSize` to - ## guarantee that at runtime. All set variables must be initialized before - ## use with other procs from this module with the exception of `isValid() - ## <#isValid,TOrderedSet[A]>`_ and `len() <#len,TOrderedSet[A]>`_. - ## - ## You can call this proc on a previously initialized ordered hash set to - ## discard its values. At the moment this is the only proc to remove elements - ## from an ordered hash set. Example: - ## - ## .. code-block :: - ## var a: TOrderedSet[int] - ## a.init(4) - ## a.incl(2) - ## a.init - ## assert a.len == 0 and a.isValid - assert isPowerOfTwo(initialSize) - s.counter = 0 - s.first = -1 - s.last = -1 - newSeq(s.data, initialSize) + containsOrInclImpl() -proc initOrderedSet*[A](initialSize=64): OrderedSet[A] = - ## Wrapper around `init() <#init,TOrderedSet[A],int>`_ for initialization of - ## ordered hash sets. +proc excl*[A](s: var OrderedSet[A], key: A) = + ## Excludes `key` from the set `s`. Efficiency: `O(n)`. ## - ## Returns an empty ordered hash set you can assign directly in ``var`` - ## blocks in a single line. Example: + ## This doesn't do anything if `key` is not found in `s`. ## - ## .. code-block :: - ## var a = initOrderedSet[int](4) - ## a.incl(2) - result.init(initialSize) + ## See also: + ## * `incl proc <#incl,OrderedSet[A],A>`_ for including an element + ## * `missingOrExcl proc <#missingOrExcl,OrderedSet[A],A>`_ + runnableExamples: + var s = toOrderedSet([2, 3, 6, 7]) + s.excl(2) + s.excl(2) + assert s.len == 3 -proc toOrderedSet*[A](keys: openArray[A]): OrderedSet[A] = - ## Creates a new ordered hash set that contains the given `keys`. + discard exclImpl(s, key) + +proc missingOrExcl*[A](s: var OrderedSet[A], key: A): bool = + ## Excludes `key` in the set `s` and tells if `key` was already missing from `s`. + ## Efficiency: O(n). ## - ## Example: + ## The difference with regards to the `excl proc <#excl,OrderedSet[A],A>`_ is + ## that this proc returns `true` if `key` was missing from `s`. + ## The proc will return `false` if `key` was in `s` and it was removed + ## during this call. ## - ## .. code-block:: - ## var numbers = toOrderedSet([1, 2, 3, 4, 5]) - ## assert numbers.contains(2) - ## assert numbers.contains(4) - result = initOrderedSet[A](rightSize(keys.len)) - for key in items(keys): result.incl(key) + ## See also: + ## * `excl proc <#excl,OrderedSet[A],A>`_ + ## * `containsOrIncl proc <#containsOrIncl,OrderedSet[A],A>`_ + runnableExamples: + var s = toOrderedSet([2, 3, 6, 7]) + assert s.missingOrExcl(4) == true + assert s.missingOrExcl(6) == false + assert s.missingOrExcl(6) == true -proc `$`*[A](s: OrderedSet[A]): string = - ## Converts the ordered hash set `s` to a string, mostly for logging purposes. + exclImpl(s, key) + +proc clear*[A](s: var OrderedSet[A]) = + ## Clears the OrderedSet back to an empty state, without shrinking + ## any of the existing storage. ## - ## Don't use this proc for serialization, the representation may change at - ## any moment and values are not escaped. Example: + ## `O(n)` operation where `n` is the size of the hash bucket. + runnableExamples: + var s = toOrderedSet([3, 5, 7]) + clear(s) + assert len(s) == 0 + + s.counter = 0 + s.first = -1 + s.last = -1 + for i in 0 ..< s.data.len: + s.data[i].hcode = 0 + s.data[i].next = 0 + {.push warning[UnsafeDefault]:off.} + reset(s.data[i].key) + {.pop.} + +proc len*[A](s: OrderedSet[A]): int {.inline.} = + ## Returns the number of elements in `s`. ## - ## Example: + ## Due to an implementation detail you can call this proc on variables which + ## have not been initialized yet. The proc will return zero as the length + ## then. + runnableExamples: + var a: OrderedSet[string] + assert len(a) == 0 + let s = toHashSet([3, 5, 7]) + assert len(s) == 3 + + result = s.counter + +proc card*[A](s: OrderedSet[A]): int {.inline.} = + ## Alias for `len() <#len,OrderedSet[A]>`_. ## - ## .. code-block:: - ## echo toOrderedSet([2, 4, 5]) - ## # --> {2, 4, 5} - ## echo toOrderedSet(["no", "esc'aping", "is \" provided"]) - ## # --> {no, esc'aping, is " provided} - assert s.isValid, "The set needs to be initialized." - dollarImpl() + ## Card stands for the `cardinality + ## <http://en.wikipedia.org/wiki/Cardinality>`_ of a set. + result = s.counter proc `==`*[A](s, t: OrderedSet[A]): bool = ## Equality for ordered sets. + runnableExamples: + let + a = toOrderedSet([1, 2]) + b = toOrderedSet([2, 1]) + assert(not (a == b)) + if s.counter != t.counter: return false var h = s.first - var g = s.first + var g = t.first var compared = 0 while h >= 0 and g >= 0: var nxh = s.data[h].next var nxg = t.data[g].next - if isFilled(s.data[h].hcode) and isFilled(s.data[g].hcode): - if s.data[h].key == s.data[g].key: + if isFilled(s.data[h].hcode) and isFilled(t.data[g].hcode): + if s.data[h].key == t.data[g].key: inc compared else: return false @@ -798,177 +867,64 @@ proc `==`*[A](s, t: OrderedSet[A]): bool = g = nxg result = compared == s.counter -proc testModule() = - ## Internal micro test to validate docstrings and such. - block isValidTest: - var options: HashSet[string] - proc savePreferences(options: HashSet[string]) = - assert options.isValid, "Pass an initialized set!" - options = initSet[string]() - options.savePreferences - - block lenTest: - var values: HashSet[int] - assert(not values.isValid) - assert values.len == 0 - assert values.card == 0 - - block setIterator: - type pair = tuple[a, b: int] - var a, b = initSet[pair]() - a.incl((2, 3)) - a.incl((3, 2)) - a.incl((2, 3)) - for x, y in a.items: - b.incl((x - 2, y + 1)) - assert a.len == b.card - assert a.len == 2 - #echo b - - block setContains: - var values = initSet[int]() - assert(not values.contains(2)) - values.incl(2) - assert values.contains(2) - values.excl(2) - assert(not values.contains(2)) - - values.incl(4) - var others = toSet([6, 7]) - values.incl(others) - assert values.len == 3 - - values.init - assert values.containsOrIncl(2) == false - assert values.containsOrIncl(2) == true - var - a = toSet([1, 2]) - b = toSet([1]) - b.incl(2) - assert a == b - - block exclusions: - var s = toSet([2, 3, 6, 7]) - s.excl(2) - s.excl(2) - assert s.len == 3 +proc hash*[A](s: OrderedSet[A]): Hash = + ## Hashing of OrderedSet. + forAllOrderedPairs: + result = result !& s.data[h].hcode + result = !$result - var - numbers = toSet([1, 2, 3, 4, 5]) - even = toSet([2, 4, 6, 8]) - numbers.excl(even) - #echo numbers - # --> {1, 3, 5} - - block toSeqAndString: - var a = toSet([2, 4, 5]) - var b = initSet[int]() - for x in [2, 4, 5]: b.incl(x) - assert($a == $b) - #echo a - #echo toSet(["no", "esc'aping", "is \" provided"]) - - #block orderedToSeqAndString: - # echo toOrderedSet([2, 4, 5]) - # echo toOrderedSet(["no", "esc'aping", "is \" provided"]) - - block setOperations: - var - a = toSet(["a", "b"]) - b = toSet(["b", "c"]) - c = union(a, b) - assert c == toSet(["a", "b", "c"]) - var d = intersection(a, b) - assert d == toSet(["b"]) - var e = difference(a, b) - assert e == toSet(["a"]) - var f = symmetricDifference(a, b) - assert f == toSet(["a", "c"]) - assert d < a and d < b - assert((a < a) == false) - assert d <= a and d <= b - assert((a <= a)) - # Alias test. - assert a + b == toSet(["a", "b", "c"]) - assert a * b == toSet(["b"]) - assert a - b == toSet(["a"]) - assert a -+- b == toSet(["a", "c"]) - assert disjoint(a, b) == false - assert disjoint(a, b - a) == true +proc `$`*[A](s: OrderedSet[A]): string = + ## Converts the ordered hash set `s` to a string, mostly for logging and + ## printing purposes. + ## + ## Don't use this proc for serialization, the representation may change at + ## any moment and values are not escaped. + ## + ## **Examples:** + ## ```Nim + ## echo toOrderedSet([2, 4, 5]) + ## # --> {2, 4, 5} + ## echo toOrderedSet(["no", "esc'aping", "is \" provided"]) + ## # --> {no, esc'aping, is " provided} + ## ``` + dollarImpl() - block mapSet: - var a = toSet([1, 2, 3]) - var b = a.map(proc (x: int): string = $x) - assert b == toSet(["1", "2", "3"]) - - block isValidTest: - var cards: OrderedSet[string] - proc saveTarotCards(cards: OrderedSet[string]) = - assert cards.isValid, "Pass an initialized set!" - cards = initOrderedSet[string]() - cards.saveTarotCards - - block lenTest: - var values: OrderedSet[int] - assert(not values.isValid) - assert values.len == 0 - assert values.card == 0 - - block setIterator: - type pair = tuple[a, b: int] - var a, b = initOrderedSet[pair]() - a.incl((2, 3)) - a.incl((3, 2)) - a.incl((2, 3)) - for x, y in a.items: - b.incl((x - 2, y + 1)) - assert a.len == b.card - assert a.len == 2 - - #block orderedSetIterator: - # var a = initOrderedSet[int]() - # for value in [9, 2, 1, 5, 1, 8, 4, 2]: - # a.incl(value) - # for value in a.items: - # echo "Got ", value - - block setContains: - var values = initOrderedSet[int]() - assert(not values.contains(2)) - values.incl(2) - assert values.contains(2) - block toSeqAndString: - var a = toOrderedSet([2, 4, 5]) - var b = initOrderedSet[int]() - for x in [2, 4, 5]: b.incl(x) - assert($a == $b) - assert(a == b) # https://github.com/Araq/Nimrod/issues/1413 - block initBlocks: - var a: OrderedSet[int] - a.init(4) - a.incl(2) - a.init - assert a.len == 0 and a.isValid - a = initOrderedSet[int](4) - a.incl(2) - assert a.len == 1 - - var b: HashSet[int] - b.init(4) - b.incl(2) - b.init - assert b.len == 0 and b.isValid - b = initSet[int](4) - b.incl(2) - assert b.len == 1 - - for i in 0 .. 32: - var s = rightSize(i) - if s <= i or mustRehash(s, i): - echo "performance issue: rightSize() will not elide enlarge() at ", i - - echo "Micro tests run successfully." - -when isMainModule and not defined(release): testModule() +iterator items*[A](s: OrderedSet[A]): A = + ## Iterates over keys in the ordered set `s` in insertion order. + ## + ## If you need a sequence with the elements you can use `sequtils.toSeq + ## template <sequtils.html#toSeq.t,untyped>`_. + ## + ## ```Nim + ## var a = initOrderedSet[int]() + ## for value in [9, 2, 1, 5, 1, 8, 4, 2]: + ## a.incl(value) + ## for value in a.items: + ## echo "Got ", value + ## # --> Got 9 + ## # --> Got 2 + ## # --> Got 1 + ## # --> Got 5 + ## # --> Got 8 + ## # --> Got 4 + ## ``` + let length = s.len + forAllOrderedPairs: + yield s.data[h].key + assert(len(s) == length, "the length of the OrderedSet changed while iterating over it") + +iterator pairs*[A](s: OrderedSet[A]): tuple[a: int, b: A] = + ## Iterates through (position, value) tuples of OrderedSet `s`. + runnableExamples: + let a = toOrderedSet("abracadabra") + var p = newSeq[(int, char)]() + for x in pairs(a): + p.add(x) + assert p == @[(0, 'a'), (1, 'b'), (2, 'r'), (3, 'c'), (4, 'd')] + + let length = s.len + forAllOrderedPairs: + yield (idx, s.data[h].key) + assert(len(s) == length, "the length of the OrderedSet changed while iterating over it") diff --git a/lib/pure/collections/sharedlist.nim b/lib/pure/collections/sharedlist.nim new file mode 100644 index 000000000..ec8f1cd86 --- /dev/null +++ b/lib/pure/collections/sharedlist.nim @@ -0,0 +1,105 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2015 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Shared list support. +## +## Unstable API. + +{.deprecated.} + +{.push stackTrace: off.} + +import + std/locks + +const + ElemsPerNode = 100 + +type + SharedListNode[A] = ptr object + next: SharedListNode[A] + dataLen: int + d: array[ElemsPerNode, A] + + SharedList*[A] = object ## generic shared list + head, tail: SharedListNode[A] + lock*: Lock + +template withLock(t, x: untyped) = + acquire(t.lock) + x + release(t.lock) + +proc iterAndMutate*[A](x: var SharedList[A]; action: proc(x: A): bool) = + ## Iterates over the list. If `action` returns true, the + ## current item is removed from the list. + ## + ## .. warning:: It may not preserve the element order after some modifications. + withLock(x): + var n = x.head + while n != nil: + var i = 0 + while i < n.dataLen: + # action can add new items at the end, so release the lock: + release(x.lock) + if action(n.d[i]): + acquire(x.lock) + let t = x.tail + dec t.dataLen # TODO considering t.dataLen == 0, + # probably the module should be refactored using doubly linked lists + n.d[i] = t.d[t.dataLen] + else: + acquire(x.lock) + inc i + n = n.next + +iterator items*[A](x: var SharedList[A]): A = + withLock(x): + var it = x.head + while it != nil: + for i in 0..it.dataLen-1: + yield it.d[i] + it = it.next + +proc add*[A](x: var SharedList[A]; y: A) = + withLock(x): + var node: SharedListNode[A] + if x.tail == nil: + node = cast[typeof node](allocShared0(sizeof(node[]))) + x.tail = node + x.head = node + elif x.tail.dataLen == ElemsPerNode: + node = cast[typeof node](allocShared0(sizeof(node[]))) + x.tail.next = node + x.tail = node + else: + node = x.tail + node.d[node.dataLen] = y + inc(node.dataLen) + +proc init*[A](t: var SharedList[A]) = + initLock t.lock + t.head = nil + t.tail = nil + +proc clear*[A](t: var SharedList[A]) = + withLock(t): + var it = t.head + while it != nil: + let nxt = it.next + deallocShared(it) + it = nxt + t.head = nil + t.tail = nil + +proc deinitSharedList*[A](t: var SharedList[A]) = + clear(t) + deinitLock t.lock + +{.pop.} diff --git a/lib/pure/collections/sharedtables.nim b/lib/pure/collections/sharedtables.nim new file mode 100644 index 000000000..b474ecd31 --- /dev/null +++ b/lib/pure/collections/sharedtables.nim @@ -0,0 +1,252 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2015 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Shared table support for Nim. Use plain old non GC'ed keys and values or +## you'll be in trouble. Uses a single lock to protect the table, lockfree +## implementations welcome but if lock contention is so high that you need a +## lockfree hash table, you're doing it wrong. +## +## Unstable API. + +{.deprecated.} + +import + std/[hashes, math, locks] + +type + KeyValuePair[A, B] = tuple[hcode: Hash, key: A, val: B] + KeyValuePairSeq[A, B] = ptr UncheckedArray[KeyValuePair[A, B]] + SharedTable*[A, B] = object ## generic hash SharedTable + data: KeyValuePairSeq[A, B] + counter, dataLen: int + lock: Lock + +template maxHash(t): untyped = t.dataLen-1 + +include tableimpl + +template st_maybeRehashPutImpl(enlarge) {.dirty.} = + if mustRehash(t): + enlarge(t) + index = rawGetKnownHC(t, key, hc) + index = -1 - index # important to transform for mgetOrPutImpl + rawInsert(t, t.data, key, val, hc, index) + inc(t.counter) + +proc enlarge[A, B](t: var SharedTable[A, B]) = + let oldSize = t.dataLen + let size = oldSize * growthFactor + var n = cast[KeyValuePairSeq[A, B]](allocShared0( + sizeof(KeyValuePair[A, B]) * size)) + t.dataLen = size + swap(t.data, n) + for i in 0..<oldSize: + let eh = n[i].hcode + if isFilled(eh): + var j: Hash = eh and maxHash(t) + while isFilled(t.data[j].hcode): + j = nextTry(j, maxHash(t)) + rawInsert(t, t.data, n[i].key, n[i].val, eh, j) + deallocShared(n) + +template withLock(t, x: untyped) = + acquire(t.lock) + x + release(t.lock) + +template withValue*[A, B](t: var SharedTable[A, B], key: A, + value, body: untyped) = + ## Retrieves the value at `t[key]`. + ## `value` can be modified in the scope of the `withValue` call. + runnableExamples: + var table: SharedTable[string, string] + init(table) + + table["a"] = "x" + table["b"] = "y" + table["c"] = "z" + + table.withValue("a", value): + assert value[] == "x" + + table.withValue("b", value): + value[] = "modified" + + table.withValue("b", value): + assert value[] == "modified" + + table.withValue("nonexistent", value): + assert false # not called + acquire(t.lock) + try: + var hc: Hash + var index = rawGet(t, key, hc) + let hasKey = index >= 0 + if hasKey: + var value {.inject.} = addr(t.data[index].val) + body + finally: + release(t.lock) + +template withValue*[A, B](t: var SharedTable[A, B], key: A, + value, body1, body2: untyped) = + ## Retrieves the value at `t[key]`. + ## `value` can be modified in the scope of the `withValue` call. + runnableExamples: + var table: SharedTable[string, string] + init(table) + + table["a"] = "x" + table["b"] = "y" + table["c"] = "z" + + + table.withValue("a", value): + value[] = "m" + + var flag = false + table.withValue("d", value): + discard value + doAssert false + do: # if "d" notin table + flag = true + + if flag: + table["d"] = "n" + + assert table.mget("a") == "m" + assert table.mget("d") == "n" + + acquire(t.lock) + try: + var hc: Hash + var index = rawGet(t, key, hc) + let hasKey = index >= 0 + if hasKey: + var value {.inject.} = addr(t.data[index].val) + body1 + else: + body2 + finally: + release(t.lock) + +proc mget*[A, B](t: var SharedTable[A, B], key: A): var B = + ## Retrieves the value at `t[key]`. The value can be modified. + ## If `key` is not in `t`, the `KeyError` exception is raised. + withLock t: + var hc: Hash + var index = rawGet(t, key, hc) + let hasKey = index >= 0 + if hasKey: result = t.data[index].val + if not hasKey: + when compiles($key): + raise newException(KeyError, "key not found: " & $key) + else: + raise newException(KeyError, "key not found") + +proc mgetOrPut*[A, B](t: var SharedTable[A, B], key: A, val: B): var B = + ## Retrieves value at `t[key]` or puts `val` if not present, either way + ## returning a value which can be modified. **Note**: This is inherently + ## unsafe in the context of multi-threading since it returns a pointer + ## to `B`. + withLock t: + mgetOrPutImpl(enlarge) + +proc hasKeyOrPut*[A, B](t: var SharedTable[A, B], key: A, val: B): bool = + ## Returns true if `key` is in the table, otherwise inserts `value`. + withLock t: + hasKeyOrPutImpl(enlarge) + +template tabMakeEmpty(i) = t.data[i].hcode = 0 +template tabCellEmpty(i) = isEmpty(t.data[i].hcode) +template tabCellHash(i) = t.data[i].hcode + +proc withKey*[A, B](t: var SharedTable[A, B], key: A, + mapper: proc(key: A, val: var B, pairExists: var bool)) = + ## Computes a new mapping for the `key` with the specified `mapper` + ## procedure. + ## + ## The `mapper` takes 3 arguments: + ## + ## 1. `key` - the current key, if it exists, or the key passed to + ## `withKey` otherwise; + ## 2. `val` - the current value, if the key exists, or default value + ## of the type otherwise; + ## 3. `pairExists` - `true` if the key exists, `false` otherwise. + ## + ## The `mapper` can can modify `val` and `pairExists` values to change + ## the mapping of the key or delete it from the table. + ## When adding a value, make sure to set `pairExists` to `true` along + ## with modifying the `val`. + ## + ## The operation is performed atomically and other operations on the table + ## will be blocked while the `mapper` is invoked, so it should be short and + ## simple. + ## + ## Example usage: + ## + ## ```nim + ## # If value exists, decrement it. + ## # If it becomes zero or less, delete the key + ## t.withKey(1'i64) do (k: int64, v: var int, pairExists: var bool): + ## if pairExists: + ## dec v + ## if v <= 0: + ## pairExists = false + ## ``` + withLock t: + var hc: Hash + var index = rawGet(t, key, hc) + + var pairExists = index >= 0 + if pairExists: + mapper(t.data[index].key, t.data[index].val, pairExists) + if not pairExists: + delImplIdx(t, index, tabMakeEmpty, tabCellEmpty, tabCellHash) + else: + var val: B + mapper(key, val, pairExists) + if pairExists: + st_maybeRehashPutImpl(enlarge) + +proc `[]=`*[A, B](t: var SharedTable[A, B], key: A, val: B) = + ## Puts a (key, value)-pair into `t`. + withLock t: + putImpl(enlarge) + +proc add*[A, B](t: var SharedTable[A, B], key: A, val: B) = + ## Puts a new (key, value)-pair into `t` even if `t[key]` already exists. + ## This can introduce duplicate keys into the table! + withLock t: + addImpl(enlarge) + +proc del*[A, B](t: var SharedTable[A, B], key: A) = + ## Deletes `key` from hash table `t`. + withLock t: + delImpl(tabMakeEmpty, tabCellEmpty, tabCellHash) + +proc len*[A, B](t: var SharedTable[A, B]): int = + ## Number of elements in `t`. + withLock t: + result = t.counter + +proc init*[A, B](t: var SharedTable[A, B], initialSize = 32) = + ## Creates a new hash table that is empty. + ## + ## This proc must be called before any other usage of `t`. + let initialSize = slotsNeeded(initialSize) + t.counter = 0 + t.dataLen = initialSize + t.data = cast[KeyValuePairSeq[A, B]](allocShared0( + sizeof(KeyValuePair[A, B]) * initialSize)) + initLock t.lock + +proc deinitSharedTable*[A, B](t: var SharedTable[A, B]) = + deallocShared(t.data) + deinitLock t.lock diff --git a/lib/pure/collections/tableimpl.nim b/lib/pure/collections/tableimpl.nim new file mode 100644 index 000000000..3542741fa --- /dev/null +++ b/lib/pure/collections/tableimpl.nim @@ -0,0 +1,231 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2015 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# An `include` file for the different table implementations. + +include hashcommon + +const + defaultInitialSize* = 32 + +template rawGetDeepImpl() {.dirty.} = # Search algo for unconditional add + genHashImpl(key, hc) + var h: Hash = hc and maxHash(t) + while isFilled(t.data[h].hcode): + h = nextTry(h, maxHash(t)) + result = h + +template rawInsertImpl() {.dirty.} = + data[h].key = key + data[h].val = val + data[h].hcode = hc + +proc rawGetDeep[X, A](t: X, key: A, hc: var Hash): int {.inline, outParamsAt: [3].} = + rawGetDeepImpl() + +proc rawInsert[X, A, B](t: var X, data: var KeyValuePairSeq[A, B], + key: A, val: sink B, hc: Hash, h: Hash) = + rawInsertImpl() + +template checkIfInitialized() = + if t.dataLen == 0: + initImpl(t, defaultInitialSize) + +template addImpl(enlarge) {.dirty.} = + checkIfInitialized() + if mustRehash(t): enlarge(t) + var hc: Hash + var j = rawGetDeep(t, key, hc) + rawInsert(t, t.data, key, val, hc, j) + inc(t.counter) + +template maybeRehashPutImpl(enlarge, val) {.dirty.} = + checkIfInitialized() + if mustRehash(t): + enlarge(t) + index = rawGetKnownHC(t, key, hc) + index = -1 - index # important to transform for mgetOrPutImpl + rawInsert(t, t.data, key, val, hc, index) + inc(t.counter) + +template putImpl(enlarge) {.dirty.} = + checkIfInitialized() + var hc: Hash = default(Hash) + var index = rawGet(t, key, hc) + if index >= 0: t.data[index].val = val + else: maybeRehashPutImpl(enlarge, val) + +template mgetOrPutImpl(enlarge) {.dirty.} = + checkIfInitialized() + var hc: Hash = default(Hash) + var index = rawGet(t, key, hc) + if index < 0: + # not present: insert (flipping index) + when declared(val): + maybeRehashPutImpl(enlarge, val) + else: + maybeRehashPutImpl(enlarge, default(B)) + # either way return modifiable val + result = t.data[index].val + +# template mgetOrPutDefaultImpl(enlarge) {.dirty.} = +# checkIfInitialized() +# var hc: Hash = default(Hash) +# var index = rawGet(t, key, hc) +# if index < 0: +# # not present: insert (flipping index) +# maybeRehashPutImpl(enlarge, default(B)) +# # either way return modifiable val +# result = t.data[index].val + +template hasKeyOrPutImpl(enlarge) {.dirty.} = + checkIfInitialized() + var hc: Hash = default(Hash) + var index = rawGet(t, key, hc) + if index < 0: + result = false + maybeRehashPutImpl(enlarge, val) + else: result = true + +# delImplIdx is KnuthV3 Algo6.4R adapted to i=i+1 (from i=i-1) which has come to +# be called "back shift delete". It shifts elements in the collision cluster of +# a victim backward to make things as-if the victim were never inserted in the +# first place. This is desirable to keep things "ageless" after many deletes. +# It is trickier than you might guess since initial probe (aka "home") locations +# of keys in a cluster may collide and since table addresses wrap around. +# +# A before-after diagram might look like ('.' means empty): +# slot: 0 1 2 3 4 5 6 7 +# before(1) +# hash1: 6 7 . 3 . 5 5 6 ; Really hash() and msk +# data1: E F . A . B C D ; About to delete C @index 6 +# after(2) +# hash2: 7 . . 3 . 5 6 6 ; Really hash() and msk +# data2: F . . A . B D E ; After deletion of C +# +# This lowers total search depth over the whole table from 1+1+2+2+2+2=10 to 7. +# Had the victim been B@5, C would need back shifting to slot 5. Total depth is +# always lowered by at least 1, e.g. victim A@3. This is all quite fast when +# empty slots are frequent (also needed to keep insert/miss searches fast) and +# hash() is either fast or avoided (via `.hcode`). It need not compare keys. +# +# delImplIdx realizes the above transformation, but only works for dense Linear +# Probing, nextTry(h)=h+1. This is not an important limitation since that's the +# fastest sequence on any CPU made since the 1980s. { Performance analysis often +# overweights "key cmp" neglecting cache behavior, giving bad ideas how big/slow +# tables behave (when perf matters most!). Comparing hcode first means usually +# only 1 key cmp is needed for *any* seq. Timing only predictable activity, +# small tables, and/or integer keys often perpetuates such bad ideas. } + +template delImplIdx(t, i, makeEmpty, cellEmpty, cellHash) = + let msk = maxHash(t) + if i >= 0: + dec(t.counter) + block outer: + while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1 + var j = i # The correctness of this depends on (h+1) in nextTry + var r = j # though may be adaptable to other simple sequences. + makeEmpty(i) # mark current EMPTY + {.push warning[UnsafeDefault]:off.} + reset(t.data[i].key) + reset(t.data[i].val) + {.pop.} + while true: + i = (i + 1) and msk # increment mod table size + if cellEmpty(i): # end of collision cluster; So all done + break outer + r = cellHash(i) and msk # initial probe index for key@slot i + if not ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)): + break + when defined(js): + t.data[j] = t.data[i] + else: + t.data[j] = move(t.data[i]) # data[j] will be marked EMPTY next loop + +template delImpl(makeEmpty, cellEmpty, cellHash) {.dirty.} = + var hc: Hash + var i = rawGet(t, key, hc) + delImplIdx(t, i, makeEmpty, cellEmpty, cellHash) + +template delImplNoHCode(makeEmpty, cellEmpty, cellHash) {.dirty.} = + if t.dataLen > 0: + var i: Hash = hash(key) and maxHash(t) + while not cellEmpty(i): + if t.data[i].key == key: + delImplIdx(t, i, makeEmpty, cellEmpty, cellHash) + break + i = nextTry(i, maxHash(t)) + +template clearImpl() {.dirty.} = + for i in 0 ..< t.dataLen: + when compiles(t.data[i].hcode): # CountTable records don't contain a hcode + t.data[i].hcode = 0 + {.push warning[UnsafeDefault]:off.} + reset(t.data[i].key) + reset(t.data[i].val) + {.pop.} + t.counter = 0 + +template ctAnd(a, b): bool = + when a: + when b: true + else: false + else: false + +template initImpl(result: typed, size: int) = + let correctSize = slotsNeeded(size) + when ctAnd(declared(SharedTable), typeof(result) is SharedTable): + init(result, correctSize) + else: + result.counter = 0 + newSeq(result.data, correctSize) + when compiles(result.first): + result.first = -1 + result.last = -1 + +template insertImpl() = # for CountTable + if t.dataLen == 0: initImpl(t, defaultInitialSize) + if mustRehash(t): enlarge(t) + ctRawInsert(t, t.data, key, val) + inc(t.counter) + +template getOrDefaultImpl(t, key): untyped = + mixin rawGet + var hc: Hash + var index = rawGet(t, key, hc) + if index >= 0: result = t.data[index].val + +template getOrDefaultImpl(t, key, default: untyped): untyped = + mixin rawGet + var hc: Hash + var index = rawGet(t, key, hc) + result = if index >= 0: t.data[index].val else: default + +template dollarImpl(): untyped {.dirty.} = + if t.len == 0: + result = "{:}" + else: + result = "{" + for key, val in pairs(t): + if result.len > 1: result.add(", ") + result.addQuoted(key) + result.add(": ") + result.addQuoted(val) + result.add("}") + +template equalsImpl(s, t: typed) = + if s.counter == t.counter: + # different insertion orders mean different 'data' seqs, so we have + # to use the slow route here: + for key, val in s: + if not t.hasKey(key): return false + if t.getOrDefault(key) != val: return false + return true + else: + return false diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim index f85acef22..d414caeed 100644 --- a/lib/pure/collections/tables.nim +++ b/lib/pure/collections/tables.nim @@ -7,347 +7,591 @@ # distribution, for details about the copyright. # -## The ``tables`` module implements variants of an efficient `hash table`:idx: +## The `tables` module implements variants of an efficient `hash table`:idx: ## (also often named `dictionary`:idx: in other programming languages) that is -## a mapping from keys to values. ``Table`` is the usual hash table, -## ``OrderedTable`` is like ``Table`` but remembers insertion order -## and ``CountTable`` is a mapping from a key to its number of occurrences. -## For consistency with every other data type in Nim these have **value** -## semantics, this means that ``=`` performs a copy of the hash table. -## For **reference** semantics use the ``Ref`` variant: ``TableRef``, -## ``OrderedTableRef``, ``CountTableRef``. -## -## If you are using simple standard types like ``int`` or ``string`` for the -## keys of the table you won't have any problems, but as soon as you try to use -## a more complex object as a key you will be greeted by a strange compiler -## error:: +## a mapping from keys to values. ## -## Error: type mismatch: got (Person) -## but expected one of: -## hashes.hash(x: openarray[A]): THash -## hashes.hash(x: int): THash -## hashes.hash(x: float): THash -## … +## There are several different types of hash tables available: +## * `Table<#Table>`_ is the usual hash table, +## * `OrderedTable<#OrderedTable>`_ is like `Table` but remembers insertion order, +## * `CountTable<#CountTable>`_ is a mapping from a key to its number of occurrences ## -## What is happening here is that the types used for table keys require to have -## a ``hash()`` proc which will convert them to a `THash <hashes.html#THash>`_ -## value, and the compiler is listing all the hash functions it knows. -## Additionally there has to be a ``==`` operator that provides the same -## semantics as its corresponding ``hash`` proc. -## -## After you add ``hash`` and ``==`` for your custom type everything will work. -## Currently however ``hash`` for objects is not defined, whereas -## ``system.==`` for objects does exist and performs a "deep" comparison (every -## field is compared) which is usually what you want. So in the following -## example implementing only ``hash`` suffices: -## -## .. code-block:: -## type -## Person = object -## firstName, lastName: string +## For consistency with every other data type in Nim these have **value** +## semantics, this means that `=` performs a copy of the hash table. ## -## proc hash(x: Person): THash = -## ## Piggyback on the already available string hash proc. -## ## -## ## Without this proc nothing works! -## result = x.firstName.hash !& x.lastName.hash -## result = !$result +## For `ref semantics<manual.html#types-reference-and-pointer-types>`_ +## use their `Ref` variants: `TableRef<#TableRef>`_, +## `OrderedTableRef<#OrderedTableRef>`_, and `CountTableRef<#CountTableRef>`_. ## -## var -## salaries = initTable[Person, int]() -## p1, p2: Person +## To give an example, when `a` is a `Table`, then `var b = a` gives `b` +## as a new independent table. `b` is initialised with the contents of `a`. +## Changing `b` does not affect `a` and vice versa: + +runnableExamples: + var + a = {1: "one", 2: "two"}.toTable # creates a Table + b = a + + assert a == b + + b[3] = "three" + assert 3 notin a + assert 3 in b + assert a != b + +## On the other hand, when `a` is a `TableRef` instead, then changes to `b` +## also affect `a`. Both `a` and `b` **ref** the same data structure: + +runnableExamples: + var + a = {1: "one", 2: "two"}.newTable # creates a TableRef + b = a + + assert a == b + + b[3] = "three" + + assert 3 in a + assert 3 in b + assert a == b + ## -## p1.firstName = "Jon" -## p1.lastName = "Ross" -## salaries[p1] = 30_000 +## ---- ## -## p2.firstName = "소진" -## p2.lastName = "박" -## salaries[p2] = 45_000 -import - hashes, math +## # Basic usage -{.pragma: myShallow.} -type - KeyValuePair[A, B] = tuple[hcode: THash, key: A, val: B] - KeyValuePairSeq[A, B] = seq[KeyValuePair[A, B]] - Table* {.myShallow.}[A, B] = object ## generic hash table - data: KeyValuePairSeq[A, B] - counter: int - TableRef*[A,B] = ref Table[A, B] +## ## Table +runnableExamples: + from std/sequtils import zip -{.deprecated: [TTable: Table, PTable: TableRef].} + let + names = ["John", "Paul", "George", "Ringo"] + years = [1940, 1942, 1943, 1940] -when not defined(nimhygiene): - {.pragma: dirty.} + var beatles = initTable[string, int]() -# hcode for real keys cannot be zero. hcode==0 signifies an empty slot. These -# two procs retain clarity of that encoding without the space cost of an enum. -proc isEmpty(hcode: THash): bool {.inline.} = - result = hcode == 0 + for pairs in zip(names, years): + let (name, birthYear) = pairs + beatles[name] = birthYear -proc isFilled(hcode: THash): bool {.inline.} = - result = hcode != 0 + assert beatles == {"George": 1943, "Ringo": 1940, "Paul": 1942, "John": 1940}.toTable -proc len*[A, B](t: Table[A, B]): int = - ## returns the number of keys in `t`. - result = t.counter -iterator pairs*[A, B](t: Table[A, B]): (A, B) = - ## iterates over any (key, value) pair in the table `t`. - for h in 0..high(t.data): - if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val) + var beatlesByYear = initTable[int, seq[string]]() -iterator mpairs*[A, B](t: var Table[A, B]): (A, var B) = - ## iterates over any (key, value) pair in the table `t`. The values - ## can be modified. - for h in 0..high(t.data): - if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val) + for pairs in zip(years, names): + let (birthYear, name) = pairs + if not beatlesByYear.hasKey(birthYear): + # if a key doesn't exist, we create one with an empty sequence + # before we can add elements to it + beatlesByYear[birthYear] = @[] + beatlesByYear[birthYear].add(name) -iterator keys*[A, B](t: Table[A, B]): A = - ## iterates over any key in the table `t`. - for h in 0..high(t.data): - if isFilled(t.data[h].hcode): yield t.data[h].key + assert beatlesByYear == {1940: @["John", "Ringo"], 1942: @["Paul"], 1943: @["George"]}.toTable -iterator values*[A, B](t: Table[A, B]): B = - ## iterates over any value in the table `t`. - for h in 0..high(t.data): - if isFilled(t.data[h].hcode): yield t.data[h].val +## ## OrderedTable +## `OrderedTable<#OrderedTable>`_ is used when it is important to preserve +## the insertion order of keys. -iterator mvalues*[A, B](t: var Table[A, B]): var B = - ## iterates over any value in the table `t`. The values can be modified. - for h in 0..high(t.data): - if isFilled(t.data[h].hcode): yield t.data[h].val +runnableExamples: + let + a = [('z', 1), ('y', 2), ('x', 3)] + ot = a.toOrderedTable # ordered tables -const - growthFactor = 2 + assert $ot == """{'z': 1, 'y': 2, 'x': 3}""" -proc mustRehash(length, counter: int): bool {.inline.} = - assert(length > counter) - result = (length * 2 < counter * 3) or (length - counter < 4) +## ## CountTable +## `CountTable<#CountTable>`_ is useful for counting number of items of some +## container (e.g. string, sequence or array), as it is a mapping where the +## items are the keys, and their number of occurrences are the values. +## For that purpose `toCountTable proc<#toCountTable,openArray[A]>`_ +## comes handy: -proc rightSize*(count: int): int {.inline.} = - ## Return the value of `initialSize` to support `count` items. - ## - ## If more items are expected to be added, simply add that - ## expected extra amount to the parameter before calling this. - ## - ## Internally, we want mustRehash(rightSize(x), x) == false. - result = nextPowerOfTwo(count * 3 div 2 + 4) +runnableExamples: + let myString = "abracadabra" + let letterFrequencies = toCountTable(myString) + assert $letterFrequencies == "{'a': 5, 'd': 1, 'b': 2, 'r': 2, 'c': 1}" -proc nextTry(h, maxHash: THash): THash {.inline.} = - result = (h + 1) and maxHash +## The same could have been achieved by manually iterating over a container +## and increasing each key's value with `inc proc +## <#inc,CountTable[A],A,int>`_: -template rawGetKnownHCImpl() {.dirty.} = - var h: THash = hc and high(t.data) # start with real hash value - while isFilled(t.data[h].hcode): - # Compare hc THEN key with boolean short circuit. This makes the common case - # zero ==key's for missing (e.g.inserts) and exactly one ==key for present. - # It does slow down succeeding lookups by one extra THash cmp&and..usually - # just a few clock cycles, generally worth it for any non-integer-like A. - if t.data[h].hcode == hc and t.data[h].key == key: - return h - h = nextTry(h, high(t.data)) - result = -1 - h # < 0 => MISSING; insert idx = -1 - result +runnableExamples: + let myString = "abracadabra" + var letterFrequencies = initCountTable[char]() + for c in myString: + letterFrequencies.inc(c) + assert $letterFrequencies == "{'d': 1, 'r': 2, 'c': 1, 'a': 5, 'b': 2}" -template rawGetImpl() {.dirty.} = - hc = hash(key) - if hc == 0: # This almost never taken branch should be very predictable. - hc = 314159265 # Value doesn't matter; Any non-zero favorite is fine. - rawGetKnownHCImpl() +## +## ---- +## -template rawGetDeepImpl() {.dirty.} = # Search algo for unconditional add - hc = hash(key) - if hc == 0: - hc = 314159265 - var h: THash = hc and high(t.data) - while isFilled(t.data[h].hcode): - h = nextTry(h, high(t.data)) - result = h +## ## Hashing +## +## If you are using simple standard types like `int` or `string` for the +## keys of the table you won't have any problems, but as soon as you try to use +## a more complex object as a key you will be greeted by a strange compiler +## error: +## +## Error: type mismatch: got (Person) +## but expected one of: +## hashes.hash(x: openArray[A]): Hash +## hashes.hash(x: int): Hash +## hashes.hash(x: float): Hash +## +## What is happening here is that the types used for table keys require to have +## a `hash()` proc which will convert them to a `Hash <hashes.html#Hash>`_ +## value, and the compiler is listing all the hash functions it knows. +## Additionally there has to be a `==` operator that provides the same +## semantics as its corresponding `hash` proc. +## +## After you add `hash` and `==` for your custom type everything will work. +## Currently, however, `hash` for objects is not defined, whereas +## `system.==` for objects does exist and performs a "deep" comparison (every +## field is compared) which is usually what you want. So in the following +## example implementing only `hash` suffices: -template rawInsertImpl() {.dirty.} = - data[h].key = key - data[h].val = val - data[h].hcode = hc +runnableExamples: + import std/hashes -proc rawGetKnownHC[A, B](t: Table[A, B], key: A, hc: THash): int {.inline.} = - rawGetKnownHCImpl() + type + Person = object + firstName, lastName: string -proc rawGetDeep[A, B](t: Table[A, B], key: A, hc: var THash): int {.inline.} = - rawGetDeepImpl() + proc hash(x: Person): Hash = + ## Piggyback on the already available string hash proc. + ## + ## Without this proc nothing works! + result = x.firstName.hash !& x.lastName.hash + result = !$result -proc rawGet[A, B](t: Table[A, B], key: A, hc: var THash): int {.inline.} = - rawGetImpl() + var + salaries = initTable[Person, int]() + p1, p2: Person -proc `[]`*[A, B](t: Table[A, B], key: A): B = - ## retrieves the value at ``t[key]``. If `key` is not in `t`, - ## default empty value for the type `B` is returned - ## and no exception is raised. One can check with ``hasKey`` whether the key - ## exists. - var hc: THash - var index = rawGet(t, key, hc) - if index >= 0: result = t.data[index].val + p1.firstName = "Jon" + p1.lastName = "Ross" + salaries[p1] = 30_000 -proc mget*[A, B](t: var Table[A, B], key: A): var B = - ## retrieves the value at ``t[key]``. The value can be modified. - ## If `key` is not in `t`, the ``KeyError`` exception is raised. - var hc: THash - var index = rawGet(t, key, hc) - if index >= 0: result = t.data[index].val - else: - when compiles($key): - raise newException(KeyError, "key not found: " & $key) - else: - raise newException(KeyError, "key not found") + p2.firstName = "소진" + p2.lastName = "박" + salaries[p2] = 45_000 -iterator allValues*[A, B](t: Table[A, B]; key: A): B = - ## iterates over any value in the table `t` that belongs to the given `key`. - var h: THash = hash(key) and high(t.data) - while isFilled(t.data[h].hcode): - if t.data[h].key == key: - yield t.data[h].val - h = nextTry(h, high(t.data)) +## +## ---- +## -proc hasKey*[A, B](t: Table[A, B], key: A): bool = - ## returns true iff `key` is in the table `t`. - var hc: THash - result = rawGet(t, key, hc) >= 0 +## # See also +## +## * `json module<json.html>`_ for table-like structure which allows +## heterogeneous members +## * `strtabs module<strtabs.html>`_ for efficient hash tables +## mapping from strings to strings +## * `hashes module<hashes.html>`_ for helper functions for hashing -proc rawInsert[A, B](t: var Table[A, B], data: var KeyValuePairSeq[A, B], - key: A, val: B, hc: THash, h: THash) = - rawInsertImpl() + +import std/private/since +import std/[hashes, math, algorithm] + + +when not defined(nimHasEffectsOf): + {.pragma: effectsOf.} + +type + KeyValuePair[A, B] = tuple[hcode: Hash, key: A, val: B] + KeyValuePairSeq[A, B] = seq[KeyValuePair[A, B]] + Table*[A, B] = object + ## Generic hash table, consisting of a key-value pair. + ## + ## `data` and `counter` are internal implementation details which + ## can't be accessed. + ## + ## For creating an empty Table, use `initTable proc<#initTable>`_. + data: KeyValuePairSeq[A, B] + counter: int + TableRef*[A, B] = ref Table[A, B] ## Ref version of `Table<#Table>`_. + ## + ## For creating a new empty TableRef, use `newTable proc + ## <#newTable>`_. + + +# ------------------------------ helpers --------------------------------- + +# Do NOT move these to tableimpl.nim, because sharedtables uses that +# file and has its own implementation. +template maxHash(t): untyped = high(t.data) +template dataLen(t): untyped = len(t.data) + +include tableimpl + +proc raiseKeyError[T](key: T) {.noinline, noreturn.} = + when compiles($key): + raise newException(KeyError, "key not found: " & $key) + else: + raise newException(KeyError, "key not found") + +template get(t, key): untyped = + ## retrieves the value at `t[key]`. The value can be modified. + ## If `key` is not in `t`, the `KeyError` exception is raised. + mixin rawGet + var hc: Hash + var index = rawGet(t, key, hc) + if index >= 0: result = t.data[index].val + else: + raiseKeyError(key) proc enlarge[A, B](t: var Table[A, B]) = var n: KeyValuePairSeq[A, B] newSeq(n, len(t.data) * growthFactor) swap(t.data, n) for i in countup(0, high(n)): - if isFilled(n[i].hcode): - var j = -1 - rawGetKnownHC(t, n[i].key, n[i].hcode) - rawInsert(t, t.data, n[i].key, n[i].val, n[i].hcode, j) - -template addImpl() {.dirty.} = - if mustRehash(len(t.data), t.counter): enlarge(t) - var hc: THash - var j = rawGetDeep(t, key, hc) - rawInsert(t, t.data, key, val, hc, j) - inc(t.counter) - -template maybeRehashPutImpl() {.dirty.} = - if mustRehash(len(t.data), t.counter): - enlarge(t) - index = rawGetKnownHC(t, key, hc) - index = -1 - index # important to transform for mgetOrPutImpl - rawInsert(t, t.data, key, val, hc, index) - inc(t.counter) - -template putImpl() {.dirty.} = - var hc: THash - var index = rawGet(t, key, hc) - if index >= 0: t.data[index].val = val - else: maybeRehashPutImpl() + let eh = n[i].hcode + if isFilled(eh): + var j: Hash = eh and maxHash(t) + while isFilled(t.data[j].hcode): + j = nextTry(j, maxHash(t)) + when defined(js): + rawInsert(t, t.data, n[i].key, n[i].val, eh, j) + else: + rawInsert(t, t.data, move n[i].key, move n[i].val, eh, j) -template mgetOrPutImpl() {.dirty.} = - var hc: THash - var index = rawGet(t, key, hc) - if index < 0: maybeRehashPutImpl() # not present: insert (flipping index) - result = t.data[index].val # either way return modifiable val -template hasKeyOrPutImpl() {.dirty.} = - var hc: THash - var index = rawGet(t, key, hc) - if index < 0: - result = false - maybeRehashPutImpl() - else: result = true + + +# ------------------------------------------------------------------- +# ------------------------------ Table ------------------------------ +# ------------------------------------------------------------------- + +proc initTable*[A, B](initialSize = defaultInitialSize): Table[A, B] = + ## Creates a new hash table that is empty. + ## + ## Starting from Nim v0.20, tables are initialized by default and it is + ## not necessary to call this function explicitly. + ## + ## See also: + ## * `toTable proc<#toTable,openArray[]>`_ + ## * `newTable proc<#newTable>`_ for creating a `TableRef` + runnableExamples: + let + a = initTable[int, string]() + b = initTable[char, seq[int]]() + result = default(Table[A, B]) + initImpl(result, initialSize) + +proc `[]=`*[A, B](t: var Table[A, B], key: A, val: sink B) = + ## Inserts a `(key, value)` pair into `t`. + ## + ## See also: + ## * `[] proc<#[],Table[A,B],A>`_ for retrieving a value of a key + ## * `hasKeyOrPut proc<#hasKeyOrPut,Table[A,B],A,B>`_ + ## * `mgetOrPut proc<#mgetOrPut,Table[A,B],A,B>`_ + ## * `del proc<#del,Table[A,B],A>`_ for removing a key from the table + runnableExamples: + var a = initTable[char, int]() + a['x'] = 7 + a['y'] = 33 + doAssert a == {'x': 7, 'y': 33}.toTable + + putImpl(enlarge) + +proc toTable*[A, B](pairs: openArray[(A, B)]): Table[A, B] = + ## Creates a new hash table that contains the given `pairs`. + ## + ## `pairs` is a container consisting of `(key, value)` tuples. + ## + ## See also: + ## * `initTable proc<#initTable>`_ + ## * `newTable proc<#newTable,openArray[]>`_ for a `TableRef` version + runnableExamples: + let a = [('a', 5), ('b', 9)] + let b = toTable(a) + assert b == {'a': 5, 'b': 9}.toTable + + result = initTable[A, B](pairs.len) + for key, val in items(pairs): result[key] = val + +proc `[]`*[A, B](t: Table[A, B], key: A): lent B = + ## Retrieves the value at `t[key]`. + ## + ## If `key` is not in `t`, the `KeyError` exception is raised. + ## One can check with `hasKey proc<#hasKey,Table[A,B],A>`_ whether + ## the key exists. + ## + ## See also: + ## * `getOrDefault proc<#getOrDefault,Table[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,Table[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + ## * `[]= proc<#[]=,Table[A,B],A,sinkB>`_ for inserting a new + ## (key, value) pair in the table + ## * `hasKey proc<#hasKey,Table[A,B],A>`_ for checking if a key is in + ## the table + runnableExamples: + let a = {'a': 5, 'b': 9}.toTable + doAssert a['a'] == 5 + doAssertRaises(KeyError): + echo a['z'] + get(t, key) + +proc `[]`*[A, B](t: var Table[A, B], key: A): var B = + ## Retrieves the value at `t[key]`. The value can be modified. + ## + ## If `key` is not in `t`, the `KeyError` exception is raised. + ## + ## See also: + ## * `getOrDefault proc<#getOrDefault,Table[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,Table[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + ## * `[]= proc<#[]=,Table[A,B],A,sinkB>`_ for inserting a new + ## (key, value) pair in the table + ## * `hasKey proc<#hasKey,Table[A,B],A>`_ for checking if a key is in + ## the table + get(t, key) + +proc hasKey*[A, B](t: Table[A, B], key: A): bool = + ## Returns true if `key` is in the table `t`. + ## + ## See also: + ## * `contains proc<#contains,Table[A,B],A>`_ for use with the `in` operator + ## * `[] proc<#[],Table[A,B],A>`_ for retrieving a value of a key + ## * `getOrDefault proc<#getOrDefault,Table[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,Table[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + let a = {'a': 5, 'b': 9}.toTable + doAssert a.hasKey('a') == true + doAssert a.hasKey('z') == false + + var hc: Hash + result = rawGet(t, key, hc) >= 0 + +proc contains*[A, B](t: Table[A, B], key: A): bool = + ## Alias of `hasKey proc<#hasKey,Table[A,B],A>`_ for use with + ## the `in` operator. + runnableExamples: + let a = {'a': 5, 'b': 9}.toTable + doAssert 'b' in a == true + doAssert a.contains('z') == false + + return hasKey[A, B](t, key) + +proc hasKeyOrPut*[A, B](t: var Table[A, B], key: A, val: B): bool = + ## Returns true if `key` is in the table, otherwise inserts `value`. + ## + ## See also: + ## * `hasKey proc<#hasKey,Table[A,B],A>`_ + ## * `[] proc<#[],Table[A,B],A>`_ for retrieving a value of a key + ## * `getOrDefault proc<#getOrDefault,Table[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,Table[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + var a = {'a': 5, 'b': 9}.toTable + if a.hasKeyOrPut('a', 50): + a['a'] = 99 + if a.hasKeyOrPut('z', 50): + a['z'] = 99 + doAssert a == {'a': 99, 'b': 9, 'z': 50}.toTable + + hasKeyOrPutImpl(enlarge) + +proc getOrDefault*[A, B](t: Table[A, B], key: A): B = + ## Retrieves the value at `t[key]` if `key` is in `t`. Otherwise, the + ## default initialization value for type `B` is returned (e.g. 0 for any + ## integer type). + ## + ## See also: + ## * `[] proc<#[],Table[A,B],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,Table[A,B],A>`_ + ## * `hasKeyOrPut proc<#hasKeyOrPut,Table[A,B],A,B>`_ + ## * `mgetOrPut proc<#mgetOrPut,Table[A,B],A,B>`_ + ## * `getOrDefault proc<#getOrDefault,Table[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + let a = {'a': 5, 'b': 9}.toTable + doAssert a.getOrDefault('a') == 5 + doAssert a.getOrDefault('z') == 0 + result = default(B) + getOrDefaultImpl(t, key) + +proc getOrDefault*[A, B](t: Table[A, B], key: A, default: B): B = + ## Retrieves the value at `t[key]` if `key` is in `t`. + ## Otherwise, `default` is returned. + ## + ## See also: + ## * `[] proc<#[],Table[A,B],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,Table[A,B],A>`_ + ## * `hasKeyOrPut proc<#hasKeyOrPut,Table[A,B],A,B>`_ + ## * `mgetOrPut proc<#mgetOrPut,Table[A,B],A,B>`_ + ## * `getOrDefault proc<#getOrDefault,Table[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + runnableExamples: + let a = {'a': 5, 'b': 9}.toTable + doAssert a.getOrDefault('a', 99) == 5 + doAssert a.getOrDefault('z', 99) == 99 + result = default(B) + getOrDefaultImpl(t, key, default) proc mgetOrPut*[A, B](t: var Table[A, B], key: A, val: B): var B = - ## retrieves value at ``t[key]`` or puts ``val`` if not present, either way + ## Retrieves value at `t[key]` or puts `val` if not present, either way ## returning a value which can be modified. - mgetOrPutImpl() + ## + ## + ## Note that while the value returned is of type `var B`, + ## it is easy to accidentally create a copy of the value at `t[key]`. + ## Remember that seqs and strings are value types, and therefore + ## cannot be copied into a separate variable for modification. + ## See the example below. + ## + ## See also: + ## * `[] proc<#[],Table[A,B],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,Table[A,B],A>`_ + ## * `hasKeyOrPut proc<#hasKeyOrPut,Table[A,B],A,B>`_ + ## * `getOrDefault proc<#getOrDefault,Table[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,Table[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + var a = {'a': 5, 'b': 9}.toTable + doAssert a.mgetOrPut('a', 99) == 5 + doAssert a.mgetOrPut('z', 99) == 99 + doAssert a == {'a': 5, 'b': 9, 'z': 99}.toTable + + # An example of accidentally creating a copy + var t = initTable[int, seq[int]]() + # In this example, we expect t[10] to be modified, + # but it is not. + var copiedSeq = t.mgetOrPut(10, @[10]) + copiedSeq.add(20) + doAssert t[10] == @[10] + # Correct + t.mgetOrPut(25, @[25]).add(35) + doAssert t[25] == @[25, 35] + + mgetOrPutImpl(enlarge) + +proc mgetOrPut*[A, B](t: var Table[A, B], key: A): var B = + ## Retrieves the value at `t[key]` or puts the + ## default initialization value for type `B` (e.g. 0 for any + ## integer type). + runnableExamples: + var a = {'a': 5}.newTable + doAssert a.mgetOrPut('a') == 5 + a.mgetOrPut('z').inc + doAssert a == {'a': 5, 'z': 1}.newTable + + mgetOrPutImpl(enlarge) -proc hasKeyOrPut*[A, B](t: var Table[A, B], key: A, val: B): bool = - ## returns true iff `key` is in the table, otherwise inserts `value`. - hasKeyOrPutImpl() +proc len*[A, B](t: Table[A, B]): int = + ## Returns the number of keys in `t`. + runnableExamples: + let a = {'a': 5, 'b': 9}.toTable + doAssert len(a) == 2 -proc `[]=`*[A, B](t: var Table[A, B], key: A, val: B) = - ## puts a (key, value)-pair into `t`. - putImpl() + result = t.counter -proc add*[A, B](t: var Table[A, B], key: A, val: B) = - ## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists. - addImpl() +proc add*[A, B](t: var Table[A, B], key: A, val: sink B) {.deprecated: + "Deprecated since v1.4; it was more confusing than useful, use `[]=`".} = + ## Puts a new `(key, value)` pair into `t` even if `t[key]` already exists. + ## + ## **This can introduce duplicate keys into the table!** + ## + ## Use `[]= proc<#[]=,Table[A,B],A,sinkB>`_ for inserting a new + ## (key, value) pair in the table without introducing duplicates. + addImpl(enlarge) -template doWhile(a: expr, b: stmt): stmt = - while true: - b - if not a: break +template tabMakeEmpty(i) = t.data[i].hcode = 0 +template tabCellEmpty(i) = isEmpty(t.data[i].hcode) +template tabCellHash(i) = t.data[i].hcode proc del*[A, B](t: var Table[A, B], key: A) = - ## deletes `key` from hash table `t`. - var hc: THash - var i = rawGet(t, key, hc) - let msk = high(t.data) - if i >= 0: - t.data[i].hcode = 0 - dec(t.counter) - while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1 - var j = i # The correctness of this depends on (h+1) in nextTry, - var r = j # though may be adaptable to other simple sequences. - t.data[i].hcode = 0 # mark current EMPTY - doWhile ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)): - i = (i + 1) and msk # increment mod table size - if isEmpty(t.data[i].hcode): # end of collision cluster; So all done - return - r = t.data[i].hcode and msk # "home" location of key@i - shallowCopy(t.data[j], t.data[i]) # data[j] will be marked EMPTY next loop - -proc initTable*[A, B](initialSize=64): Table[A, B] = - ## creates a new hash table that is empty. - ## - ## `initialSize` needs to be a power of two. If you need to accept runtime - ## values for this you could use the ``nextPowerOfTwo`` proc from the - ## `math <math.html>`_ module or the ``rightSize`` proc from this module. - assert isPowerOfTwo(initialSize) - result.counter = 0 - newSeq(result.data, initialSize) - -proc toTable*[A, B](pairs: openArray[(A, - B)]): Table[A, B] = - ## creates a new hash table that contains the given `pairs`. - result = initTable[A, B](rightSize(pairs.len)) - for key, val in items(pairs): result[key] = val + ## Deletes `key` from hash table `t`. Does nothing if the key does not exist. + ## + ## .. warning:: If duplicate keys were added (via the now deprecated `add` proc), + ## this may need to be called multiple times. + ## + ## See also: + ## * `pop proc<#pop,Table[A,B],A,B>`_ + ## * `clear proc<#clear,Table[A,B]>`_ to empty the whole table + runnableExamples: + var a = {'a': 5, 'b': 9, 'c': 13}.toTable + a.del('a') + doAssert a == {'b': 9, 'c': 13}.toTable + a.del('z') + doAssert a == {'b': 9, 'c': 13}.toTable + + delImpl(tabMakeEmpty, tabCellEmpty, tabCellHash) + +proc pop*[A, B](t: var Table[A, B], key: A, val: var B): bool = + ## Deletes the `key` from the table. + ## Returns `true`, if the `key` existed, and sets `val` to the + ## mapping of the key. Otherwise, returns `false`, and the `val` is + ## unchanged. + ## + ## .. warning:: If duplicate keys were added (via the now deprecated `add` proc), + ## this may need to be called multiple times. + ## + ## See also: + ## * `del proc<#del,Table[A,B],A>`_ + ## * `clear proc<#clear,Table[A,B]>`_ to empty the whole table + runnableExamples: + var + a = {'a': 5, 'b': 9, 'c': 13}.toTable + i: int + doAssert a.pop('b', i) == true + doAssert a == {'a': 5, 'c': 13}.toTable + doAssert i == 9 + i = 0 + doAssert a.pop('z', i) == false + doAssert a == {'a': 5, 'c': 13}.toTable + doAssert i == 0 + + var hc: Hash + var index = rawGet(t, key, hc) + result = index >= 0 + if result: + val = move(t.data[index].val) + delImplIdx(t, index, tabMakeEmpty, tabCellEmpty, tabCellHash) + +proc take*[A, B](t: var Table[A, B], key: A, val: var B): bool {.inline.} = + ## Alias for: + ## * `pop proc<#pop,Table[A,B],A,B>`_ + pop(t, key, val) + +proc clear*[A, B](t: var Table[A, B]) = + ## Resets the table so that it is empty. + ## + ## See also: + ## * `del proc<#del,Table[A,B],A>`_ + ## * `pop proc<#pop,Table[A,B],A,B>`_ + runnableExamples: + var a = {'a': 5, 'b': 9, 'c': 13}.toTable + doAssert len(a) == 3 + clear(a) + doAssert len(a) == 0 -template dollarImpl(): stmt {.dirty.} = - if t.len == 0: - result = "{:}" - else: - result = "{" - for key, val in pairs(t): - if result.len > 1: result.add(", ") - result.add($key) - result.add(": ") - result.add($val) - result.add("}") + clearImpl() proc `$`*[A, B](t: Table[A, B]): string = - ## The `$` operator for hash tables. + ## The `$` operator for hash tables. Used internally when calling `echo` + ## on a table. dollarImpl() -template equalsImpl() = - if s.counter == t.counter: - # different insertion orders mean different 'data' seqs, so we have - # to use the slow route here: - for key, val in s: - # prefix notation leads to automatic dereference in case of PTable - if not t.hasKey(key): return false - if t[key] != val: return false - return true - proc `==`*[A, B](s, t: Table[A, B]): bool = - equalsImpl() + ## The `==` operator for hash tables. Returns `true` if the content of both + ## tables contains the same key-value pairs. Insert order does not matter. + runnableExamples: + let + a = {'a': 5, 'b': 9, 'c': 13}.toTable + b = {'b': 9, 'c': 13, 'a': 5}.toTable + doAssert a == b + + equalsImpl(s, t) proc indexBy*[A, B, C](collection: A, index: proc(x: B): C): Table[C, B] = ## Index the collection with the proc provided. @@ -356,185 +600,705 @@ proc indexBy*[A, B, C](collection: A, index: proc(x: B): C): Table[C, B] = for item in collection: result[index(item)] = item -proc len*[A, B](t: TableRef[A, B]): int = - ## returns the number of keys in `t`. - result = t.counter -iterator pairs*[A, B](t: TableRef[A, B]): (A, B) = - ## iterates over any (key, value) pair in the table `t`. - for h in 0..high(t.data): - if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val) -iterator mpairs*[A, B](t: TableRef[A, B]): (A, var B) = - ## iterates over any (key, value) pair in the table `t`. The values - ## can be modified. - for h in 0..high(t.data): - if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val) +template withValue*[A, B](t: var Table[A, B], key: A, value, body: untyped) = + ## Retrieves the value at `t[key]`. + ## + ## `value` can be modified in the scope of the `withValue` call. + runnableExamples: + type + User = object + name: string + uid: int + + var t = initTable[int, User]() + let u = User(name: "Hello", uid: 99) + t[1] = u + + t.withValue(1, value): + # block is executed only if `key` in `t` + value.name = "Nim" + value.uid = 1314 + + t.withValue(2, value): + value.name = "No" + value.uid = 521 + + assert t[1].name == "Nim" + assert t[1].uid == 1314 + + mixin rawGet + var hc: Hash + var index = rawGet(t, key, hc) + let hasKey = index >= 0 + if hasKey: + var value {.inject.} = addr(t.data[index].val) + body + +template withValue*[A, B](t: var Table[A, B], key: A, + value, body1, body2: untyped) = + ## Retrieves the value at `t[key]`. + ## + ## `value` can be modified in the scope of the `withValue` call. + runnableExamples: + type + User = object + name: string + uid: int + + var t = initTable[int, User]() + let u = User(name: "Hello", uid: 99) + t[1] = u + + t.withValue(1, value): + # block is executed only if `key` in `t` + value.name = "Nim" + value.uid = 1314 + + t.withValue(521, value): + doAssert false + do: + # block is executed when `key` not in `t` + t[1314] = User(name: "exist", uid: 521) + + assert t[1].name == "Nim" + assert t[1].uid == 1314 + assert t[1314].name == "exist" + assert t[1314].uid == 521 + + mixin rawGet + var hc: Hash + var index = rawGet(t, key, hc) + let hasKey = index >= 0 + if hasKey: + var value {.inject.} = addr(t.data[index].val) + body1 + else: + body2 -iterator keys*[A, B](t: TableRef[A, B]): A = - ## iterates over any key in the table `t`. - for h in 0..high(t.data): - if isFilled(t.data[h].hcode): yield t.data[h].key -iterator values*[A, B](t: TableRef[A, B]): B = - ## iterates over any value in the table `t`. - for h in 0..high(t.data): - if isFilled(t.data[h].hcode): yield t.data[h].val +iterator pairs*[A, B](t: Table[A, B]): (A, B) = + ## Iterates over any `(key, value)` pair in the table `t`. + ## + ## See also: + ## * `mpairs iterator<#mpairs.i,Table[A,B]>`_ + ## * `keys iterator<#keys.i,Table[A,B]>`_ + ## * `values iterator<#values.i,Table[A,B]>`_ + ## + ## **Examples:** + ## + ## ```Nim + ## let a = { + ## 'o': [1, 5, 7, 9], + ## 'e': [2, 4, 6, 8] + ## }.toTable + ## + ## for k, v in a.pairs: + ## echo "key: ", k + ## echo "value: ", v + ## + ## # key: e + ## # value: [2, 4, 6, 8] + ## # key: o + ## # value: [1, 5, 7, 9] + ## ``` + let L = len(t) + for h in 0 .. high(t.data): + if isFilled(t.data[h].hcode): + yield (t.data[h].key, t.data[h].val) + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator mpairs*[A, B](t: var Table[A, B]): (A, var B) = + ## Iterates over any `(key, value)` pair in the table `t` (must be + ## declared as `var`). The values can be modified. + ## + ## See also: + ## * `pairs iterator<#pairs.i,Table[A,B]>`_ + ## * `mvalues iterator<#mvalues.i,Table[A,B]>`_ + runnableExamples: + var a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.toTable + for k, v in a.mpairs: + v.add(v[0] + 10) + doAssert a == {'e': @[2, 4, 6, 8, 12], 'o': @[1, 5, 7, 9, 11]}.toTable + + let L = len(t) + for h in 0 .. high(t.data): + if isFilled(t.data[h].hcode): + yield (t.data[h].key, t.data[h].val) + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator keys*[A, B](t: Table[A, B]): lent A = + ## Iterates over any key in the table `t`. + ## + ## See also: + ## * `pairs iterator<#pairs.i,Table[A,B]>`_ + ## * `values iterator<#values.i,Table[A,B]>`_ + runnableExamples: + var a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.toTable + for k in a.keys: + a[k].add(99) + doAssert a == {'e': @[2, 4, 6, 8, 99], 'o': @[1, 5, 7, 9, 99]}.toTable + + let L = len(t) + for h in 0 .. high(t.data): + if isFilled(t.data[h].hcode): + yield t.data[h].key + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator values*[A, B](t: Table[A, B]): lent B = + ## Iterates over any value in the table `t`. + ## + ## See also: + ## * `pairs iterator<#pairs.i,Table[A,B]>`_ + ## * `keys iterator<#keys.i,Table[A,B]>`_ + ## * `mvalues iterator<#mvalues.i,Table[A,B]>`_ + runnableExamples: + let a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.toTable + for v in a.values: + doAssert v.len == 4 + + let L = len(t) + for h in 0 .. high(t.data): + if isFilled(t.data[h].hcode): + yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator mvalues*[A, B](t: var Table[A, B]): var B = + ## Iterates over any value in the table `t` (must be + ## declared as `var`). The values can be modified. + ## + ## See also: + ## * `mpairs iterator<#mpairs.i,Table[A,B]>`_ + ## * `values iterator<#values.i,Table[A,B]>`_ + runnableExamples: + var a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.toTable + for v in a.mvalues: + v.add(99) + doAssert a == {'e': @[2, 4, 6, 8, 99], 'o': @[1, 5, 7, 9, 99]}.toTable + + let L = len(t) + for h in 0 .. high(t.data): + if isFilled(t.data[h].hcode): + yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator allValues*[A, B](t: Table[A, B]; key: A): B {.deprecated: + "Deprecated since v1.4; tables with duplicated keys are deprecated".} = + ## Iterates over any value in the table `t` that belongs to the given `key`. + ## + ## Used if you have a table with duplicate keys (as a result of using + ## `add proc<#add,Table[A,B],A,sinkB>`_). + ## + runnableExamples: + import std/[sequtils, algorithm] + + var a = {'a': 3, 'b': 5}.toTable + for i in 1..3: a.add('z', 10*i) + doAssert toSeq(a.pairs).sorted == @[('a', 3), ('b', 5), ('z', 10), ('z', 20), ('z', 30)] + doAssert sorted(toSeq(a.allValues('z'))) == @[10, 20, 30] + var h: Hash = genHash(key) and high(t.data) + let L = len(t) + while isFilled(t.data[h].hcode): + if t.data[h].key == key: + yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") + h = nextTry(h, high(t.data)) + + + +# ------------------------------------------------------------------- +# ---------------------------- TableRef ----------------------------- +# ------------------------------------------------------------------- + + +proc newTable*[A, B](initialSize = defaultInitialSize): TableRef[A, B] = + ## Creates a new ref hash table that is empty. + ## + ## See also: + ## * `newTable proc<#newTable,openArray[]>`_ for creating a `TableRef` + ## from a collection of `(key, value)` pairs + ## * `initTable proc<#initTable>`_ for creating a `Table` + runnableExamples: + let + a = newTable[int, string]() + b = newTable[char, seq[int]]() + + new(result) + {.noSideEffect.}: + result[] = initTable[A, B](initialSize) + +proc newTable*[A, B](pairs: openArray[(A, B)]): TableRef[A, B] = + ## Creates a new ref hash table that contains the given `pairs`. + ## + ## `pairs` is a container consisting of `(key, value)` tuples. + ## + ## See also: + ## * `newTable proc<#newTable>`_ + ## * `toTable proc<#toTable,openArray[]>`_ for a `Table` version + runnableExamples: + let a = [('a', 5), ('b', 9)] + let b = newTable(a) + assert b == {'a': 5, 'b': 9}.newTable + + new(result) + {.noSideEffect.}: + result[] = toTable[A, B](pairs) + +proc newTableFrom*[A, B, C](collection: A, index: proc(x: B): C): TableRef[C, B] = + ## Index the collection with the proc provided. + # TODO: As soon as supported, change collection: A to collection: A[B] + result = newTable[C, B]() + {.noSideEffect.}: + for item in collection: + result[index(item)] = item + +proc `[]`*[A, B](t: TableRef[A, B], key: A): var B = + ## Retrieves the value at `t[key]`. + ## + ## If `key` is not in `t`, the `KeyError` exception is raised. + ## One can check with `hasKey proc<#hasKey,TableRef[A,B],A>`_ whether + ## the key exists. + ## + ## See also: + ## * `getOrDefault proc<#getOrDefault,TableRef[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,TableRef[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + ## * `[]= proc<#[]=,TableRef[A,B],A,sinkB>`_ for inserting a new + ## (key, value) pair in the table + ## * `hasKey proc<#hasKey,TableRef[A,B],A>`_ for checking if a key is in + ## the table + runnableExamples: + let a = {'a': 5, 'b': 9}.newTable + doAssert a['a'] == 5 + doAssertRaises(KeyError): + echo a['z'] -iterator mvalues*[A, B](t: TableRef[A, B]): var B = - ## iterates over any value in the table `t`. The values can be modified. - for h in 0..high(t.data): - if isFilled(t.data[h].hcode): yield t.data[h].val - -proc `[]`*[A, B](t: TableRef[A, B], key: A): B = - ## retrieves the value at ``t[key]``. If `key` is not in `t`, - ## default empty value for the type `B` is returned - ## and no exception is raised. One can check with ``hasKey`` whether the key - ## exists. result = t[][key] -proc mget*[A, B](t: TableRef[A, B], key: A): var B = - ## retrieves the value at ``t[key]``. The value can be modified. - ## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. - t[].mget(key) +proc `[]=`*[A, B](t: TableRef[A, B], key: A, val: sink B) = + ## Inserts a `(key, value)` pair into `t`. + ## + ## See also: + ## * `[] proc<#[],TableRef[A,B],A>`_ for retrieving a value of a key + ## * `hasKeyOrPut proc<#hasKeyOrPut,TableRef[A,B],A,B>`_ + ## * `mgetOrPut proc<#mgetOrPut,TableRef[A,B],A,B>`_ + ## * `del proc<#del,TableRef[A,B],A>`_ for removing a key from the table + runnableExamples: + var a = newTable[char, int]() + a['x'] = 7 + a['y'] = 33 + doAssert a == {'x': 7, 'y': 33}.newTable + + t[][key] = val + +proc hasKey*[A, B](t: TableRef[A, B], key: A): bool = + ## Returns true if `key` is in the table `t`. + ## + ## See also: + ## * `contains proc<#contains,TableRef[A,B],A>`_ for use with the `in` + ## operator + ## * `[] proc<#[],TableRef[A,B],A>`_ for retrieving a value of a key + ## * `getOrDefault proc<#getOrDefault,TableRef[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,TableRef[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + let a = {'a': 5, 'b': 9}.newTable + doAssert a.hasKey('a') == true + doAssert a.hasKey('z') == false + + result = t[].hasKey(key) + +proc contains*[A, B](t: TableRef[A, B], key: A): bool = + ## Alias of `hasKey proc<#hasKey,TableRef[A,B],A>`_ for use with + ## the `in` operator. + runnableExamples: + let a = {'a': 5, 'b': 9}.newTable + doAssert 'b' in a == true + doAssert a.contains('z') == false + + return hasKey[A, B](t, key) + +proc hasKeyOrPut*[A, B](t: TableRef[A, B], key: A, val: B): bool = + ## Returns true if `key` is in the table, otherwise inserts `value`. + ## + ## See also: + ## * `hasKey proc<#hasKey,TableRef[A,B],A>`_ + ## * `[] proc<#[],TableRef[A,B],A>`_ for retrieving a value of a key + ## * `getOrDefault proc<#getOrDefault,TableRef[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,TableRef[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + var a = {'a': 5, 'b': 9}.newTable + if a.hasKeyOrPut('a', 50): + a['a'] = 99 + if a.hasKeyOrPut('z', 50): + a['z'] = 99 + doAssert a == {'a': 99, 'b': 9, 'z': 50}.newTable + + t[].hasKeyOrPut(key, val) + +proc getOrDefault*[A, B](t: TableRef[A, B], key: A): B = + ## Retrieves the value at `t[key]` if `key` is in `t`. Otherwise, the + ## default initialization value for type `B` is returned (e.g. 0 for any + ## integer type). + ## + ## See also: + ## * `[] proc<#[],TableRef[A,B],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,TableRef[A,B],A>`_ + ## * `hasKeyOrPut proc<#hasKeyOrPut,TableRef[A,B],A,B>`_ + ## * `mgetOrPut proc<#mgetOrPut,TableRef[A,B],A,B>`_ + ## * `getOrDefault proc<#getOrDefault,TableRef[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + let a = {'a': 5, 'b': 9}.newTable + doAssert a.getOrDefault('a') == 5 + doAssert a.getOrDefault('z') == 0 + + getOrDefault(t[], key) + +proc getOrDefault*[A, B](t: TableRef[A, B], key: A, default: B): B = + ## Retrieves the value at `t[key]` if `key` is in `t`. + ## Otherwise, `default` is returned. + ## + ## See also: + ## * `[] proc<#[],TableRef[A,B],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,TableRef[A,B],A>`_ + ## * `hasKeyOrPut proc<#hasKeyOrPut,TableRef[A,B],A,B>`_ + ## * `mgetOrPut proc<#mgetOrPut,TableRef[A,B],A,B>`_ + ## * `getOrDefault proc<#getOrDefault,TableRef[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + runnableExamples: + let a = {'a': 5, 'b': 9}.newTable + doAssert a.getOrDefault('a', 99) == 5 + doAssert a.getOrDefault('z', 99) == 99 + + getOrDefault(t[], key, default) proc mgetOrPut*[A, B](t: TableRef[A, B], key: A, val: B): var B = - ## retrieves value at ``t[key]`` or puts ``val`` if not present, either way + ## Retrieves value at `t[key]` or puts `val` if not present, either way ## returning a value which can be modified. + ## + ## Note that while the value returned is of type `var B`, + ## it is easy to accidentally create an copy of the value at `t[key]`. + ## Remember that seqs and strings are value types, and therefore + ## cannot be copied into a separate variable for modification. + ## See the example below. + ## + ## See also: + ## * `[] proc<#[],TableRef[A,B],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,TableRef[A,B],A>`_ + ## * `hasKeyOrPut proc<#hasKeyOrPut,TableRef[A,B],A,B>`_ + ## * `getOrDefault proc<#getOrDefault,TableRef[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,TableRef[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + var a = {'a': 5, 'b': 9}.newTable + doAssert a.mgetOrPut('a', 99) == 5 + doAssert a.mgetOrPut('z', 99) == 99 + doAssert a == {'a': 5, 'b': 9, 'z': 99}.newTable + + # An example of accidentally creating a copy + var t = newTable[int, seq[int]]() + # In this example, we expect t[10] to be modified, + # but it is not. + var copiedSeq = t.mgetOrPut(10, @[10]) + copiedSeq.add(20) + doAssert t[10] == @[10] + # Correct + t.mgetOrPut(25, @[25]).add(35) + doAssert t[25] == @[25, 35] t[].mgetOrPut(key, val) -proc hasKeyOrPut*[A, B](t: var TableRef[A, B], key: A, val: B): bool = - ## returns true iff `key` is in the table, otherwise inserts `value`. - t[].hasKeyOrPut(key, val) +proc mgetOrPut*[A, B](t: TableRef[A, B], key: A): var B = + ## Retrieves the value at `t[key]` or puts the + ## default initialization value for type `B` (e.g. 0 for any + ## integer type). + runnableExamples: + var a = {'a': 5}.newTable + doAssert a.mgetOrPut('a') == 5 + a.mgetOrPut('z').inc + doAssert a == {'a': 5, 'z': 1}.newTable -proc hasKey*[A, B](t: TableRef[A, B], key: A): bool = - ## returns true iff `key` is in the table `t`. - result = t[].hasKey(key) + t[].mgetOrPut(key) -proc `[]=`*[A, B](t: TableRef[A, B], key: A, val: B) = - ## puts a (key, value)-pair into `t`. - t[][key] = val +proc len*[A, B](t: TableRef[A, B]): int = + ## Returns the number of keys in `t`. + runnableExamples: + let a = {'a': 5, 'b': 9}.newTable + doAssert len(a) == 2 -proc add*[A, B](t: TableRef[A, B], key: A, val: B) = - ## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists. + result = t.counter + +proc add*[A, B](t: TableRef[A, B], key: A, val: sink B) {.deprecated: + "Deprecated since v1.4; it was more confusing than useful, use `[]=`".} = + ## Puts a new `(key, value)` pair into `t` even if `t[key]` already exists. + ## + ## **This can introduce duplicate keys into the table!** + ## + ## Use `[]= proc<#[]=,TableRef[A,B],A,sinkB>`_ for inserting a new + ## (key, value) pair in the table without introducing duplicates. t[].add(key, val) proc del*[A, B](t: TableRef[A, B], key: A) = - ## deletes `key` from hash table `t`. + ## Deletes `key` from hash table `t`. Does nothing if the key does not exist. + ## + ## .. warning:: If duplicate keys were added (via the now deprecated `add` proc), + ## this may need to be called multiple times. + ## + ## See also: + ## * `pop proc<#pop,TableRef[A,B],A,B>`_ + ## * `clear proc<#clear,TableRef[A,B]>`_ to empty the whole table + runnableExamples: + var a = {'a': 5, 'b': 9, 'c': 13}.newTable + a.del('a') + doAssert a == {'b': 9, 'c': 13}.newTable + a.del('z') + doAssert a == {'b': 9, 'c': 13}.newTable + t[].del(key) -proc newTable*[A, B](initialSize=64): TableRef[A, B] = - new(result) - result[] = initTable[A, B](initialSize) +proc pop*[A, B](t: TableRef[A, B], key: A, val: var B): bool = + ## Deletes the `key` from the table. + ## Returns `true`, if the `key` existed, and sets `val` to the + ## mapping of the key. Otherwise, returns `false`, and the `val` is + ## unchanged. + ## + ## .. warning:: If duplicate keys were added (via the now deprecated `add` proc), + ## this may need to be called multiple times. + ## + ## See also: + ## * `del proc<#del,TableRef[A,B],A>`_ + ## * `clear proc<#clear,TableRef[A,B]>`_ to empty the whole table + runnableExamples: + var + a = {'a': 5, 'b': 9, 'c': 13}.newTable + i: int + doAssert a.pop('b', i) == true + doAssert a == {'a': 5, 'c': 13}.newTable + doAssert i == 9 + i = 0 + doAssert a.pop('z', i) == false + doAssert a == {'a': 5, 'c': 13}.newTable + doAssert i == 0 + + result = t[].pop(key, val) + +proc take*[A, B](t: TableRef[A, B], key: A, val: var B): bool {.inline.} = + ## Alias for: + ## * `pop proc<#pop,TableRef[A,B],A,B>`_ + pop(t, key, val) + +proc clear*[A, B](t: TableRef[A, B]) = + ## Resets the table so that it is empty. + ## + ## See also: + ## * `del proc<#del,Table[A,B],A>`_ + ## * `pop proc<#pop,Table[A,B],A,B>`_ + runnableExamples: + var a = {'a': 5, 'b': 9, 'c': 13}.newTable + doAssert len(a) == 3 + clear(a) + doAssert len(a) == 0 -proc newTable*[A, B](pairs: openArray[(A, B)]): TableRef[A, B] = - ## creates a new hash table that contains the given `pairs`. - new(result) - result[] = toTable[A, B](pairs) + clearImpl() proc `$`*[A, B](t: TableRef[A, B]): string = - ## The `$` operator for hash tables. + ## The `$` operator for hash tables. Used internally when calling `echo` + ## on a table. dollarImpl() proc `==`*[A, B](s, t: TableRef[A, B]): bool = + ## The `==` operator for hash tables. Returns `true` if either both tables + ## are `nil`, or neither is `nil` and the content of both tables contains the + ## same key-value pairs. Insert order does not matter. + runnableExamples: + let + a = {'a': 5, 'b': 9, 'c': 13}.newTable + b = {'b': 9, 'c': 13, 'a': 5}.newTable + doAssert a == b + if isNil(s): result = isNil(t) elif isNil(t): result = false - else: equalsImpl() + else: equalsImpl(s[], t[]) -proc newTableFrom*[A, B, C](collection: A, index: proc(x: B): C): TableRef[C, B] = - ## Index the collection with the proc provided. - # TODO: As soon as supported, change collection: A to collection: A[B] - result = newTable[C, B]() - for item in collection: - result[index(item)] = item -# ------------------------------ ordered table ------------------------------ -type - OrderedKeyValuePair[A, B] = tuple[ - hcode: THash, next: int, key: A, val: B] - OrderedKeyValuePairSeq[A, B] = seq[OrderedKeyValuePair[A, B]] - OrderedTable* {. - myShallow.}[A, B] = object ## table that remembers insertion order - data: OrderedKeyValuePairSeq[A, B] - counter, first, last: int - OrderedTableRef*[A, B] = ref OrderedTable[A, B] +iterator pairs*[A, B](t: TableRef[A, B]): (A, B) = + ## Iterates over any `(key, value)` pair in the table `t`. + ## + ## See also: + ## * `mpairs iterator<#mpairs.i,TableRef[A,B]>`_ + ## * `keys iterator<#keys.i,TableRef[A,B]>`_ + ## * `values iterator<#values.i,TableRef[A,B]>`_ + ## + ## **Examples:** + ## + ## ```Nim + ## let a = { + ## 'o': [1, 5, 7, 9], + ## 'e': [2, 4, 6, 8] + ## }.newTable + ## + ## for k, v in a.pairs: + ## echo "key: ", k + ## echo "value: ", v + ## + ## # key: e + ## # value: [2, 4, 6, 8] + ## # key: o + ## # value: [1, 5, 7, 9] + ## ``` + let L = len(t) + for h in 0 .. high(t.data): + if isFilled(t.data[h].hcode): + yield (t.data[h].key, t.data[h].val) + assert(len(t) == L, "the length of the table changed while iterating over it") -{.deprecated: [TOrderedTable: OrderedTable, POrderedTable: OrderedTableRef].} +iterator mpairs*[A, B](t: TableRef[A, B]): (A, var B) = + ## Iterates over any `(key, value)` pair in the table `t`. The values + ## can be modified. + ## + ## See also: + ## * `pairs iterator<#pairs.i,TableRef[A,B]>`_ + ## * `mvalues iterator<#mvalues.i,TableRef[A,B]>`_ + runnableExamples: + let a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.newTable + for k, v in a.mpairs: + v.add(v[0] + 10) + doAssert a == {'e': @[2, 4, 6, 8, 12], 'o': @[1, 5, 7, 9, 11]}.newTable + + let L = len(t) + for h in 0 .. high(t.data): + if isFilled(t.data[h].hcode): + yield (t.data[h].key, t.data[h].val) + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator keys*[A, B](t: TableRef[A, B]): lent A = + ## Iterates over any key in the table `t`. + ## + ## See also: + ## * `pairs iterator<#pairs.i,TableRef[A,B]>`_ + ## * `values iterator<#values.i,TableRef[A,B]>`_ + runnableExamples: + let a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.newTable + for k in a.keys: + a[k].add(99) + doAssert a == {'e': @[2, 4, 6, 8, 99], 'o': @[1, 5, 7, 9, 99]}.newTable + + let L = len(t) + for h in 0 .. high(t.data): + if isFilled(t.data[h].hcode): + yield t.data[h].key + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator values*[A, B](t: TableRef[A, B]): lent B = + ## Iterates over any value in the table `t`. + ## + ## See also: + ## * `pairs iterator<#pairs.i,TableRef[A,B]>`_ + ## * `keys iterator<#keys.i,TableRef[A,B]>`_ + ## * `mvalues iterator<#mvalues.i,TableRef[A,B]>`_ + runnableExamples: + let a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.newTable + for v in a.values: + doAssert v.len == 4 + + let L = len(t) + for h in 0 .. high(t.data): + if isFilled(t.data[h].hcode): + yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") -proc len*[A, B](t: OrderedTable[A, B]): int {.inline.} = - ## returns the number of keys in `t`. - result = t.counter +iterator mvalues*[A, B](t: TableRef[A, B]): var B = + ## Iterates over any value in the table `t`. The values can be modified. + ## + ## See also: + ## * `mpairs iterator<#mpairs.i,TableRef[A,B]>`_ + ## * `values iterator<#values.i,TableRef[A,B]>`_ + runnableExamples: + let a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.newTable + for v in a.mvalues: + v.add(99) + doAssert a == {'e': @[2, 4, 6, 8, 99], 'o': @[1, 5, 7, 9, 99]}.newTable + + let L = len(t) + for h in 0 .. high(t.data): + if isFilled(t.data[h].hcode): + yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") -template forAllOrderedPairs(yieldStmt: stmt) {.dirty, immediate.} = - var h = t.first - while h >= 0: - var nxt = t.data[h].next - if isFilled(t.data[h].hcode): yieldStmt - h = nxt -iterator pairs*[A, B](t: OrderedTable[A, B]): (A, B) = - ## iterates over any (key, value) pair in the table `t` in insertion - ## order. - forAllOrderedPairs: - yield (t.data[h].key, t.data[h].val) -iterator mpairs*[A, B](t: var OrderedTable[A, B]): (A, var B) = - ## iterates over any (key, value) pair in the table `t` in insertion - ## order. The values can be modified. - forAllOrderedPairs: - yield (t.data[h].key, t.data[h].val) -iterator keys*[A, B](t: OrderedTable[A, B]): A = - ## iterates over any key in the table `t` in insertion order. - forAllOrderedPairs: - yield t.data[h].key -iterator values*[A, B](t: OrderedTable[A, B]): B = - ## iterates over any value in the table `t` in insertion order. - forAllOrderedPairs: - yield t.data[h].val -iterator mvalues*[A, B](t: var OrderedTable[A, B]): var B = - ## iterates over any value in the table `t` in insertion order. The values - ## can be modified. - forAllOrderedPairs: - yield t.data[h].val -proc rawGetKnownHC[A, B](t: OrderedTable[A, B], key: A, hc: THash): int = - rawGetKnownHCImpl() -proc rawGetDeep[A, B](t: OrderedTable[A, B], key: A, hc: var THash): int {.inline.} = - rawGetDeepImpl() +# --------------------------------------------------------------------------- +# ------------------------------ OrderedTable ------------------------------- +# --------------------------------------------------------------------------- -proc rawGet[A, B](t: OrderedTable[A, B], key: A, hc: var THash): int = - rawGetImpl() +type + OrderedKeyValuePair[A, B] = tuple[ + hcode: Hash, next: int, key: A, val: B] + OrderedKeyValuePairSeq[A, B] = seq[OrderedKeyValuePair[A, B]] + OrderedTable*[A, B] = object + ## Hash table that remembers insertion order. + ## + ## For creating an empty OrderedTable, use `initOrderedTable proc + ## <#initOrderedTable>`_. + data: OrderedKeyValuePairSeq[A, B] + counter, first, last: int + OrderedTableRef*[A, B] = ref OrderedTable[A, B] ## Ref version of + ## `OrderedTable<#OrderedTable>`_. + ## + ## For creating a new empty OrderedTableRef, use `newOrderedTable proc + ## <#newOrderedTable>`_. -proc `[]`*[A, B](t: OrderedTable[A, B], key: A): B = - ## retrieves the value at ``t[key]``. If `key` is not in `t`, - ## default empty value for the type `B` is returned - ## and no exception is raised. One can check with ``hasKey`` whether the key - ## exists. - var hc: THash - var index = rawGet(t, key, hc) - if index >= 0: result = t.data[index].val -proc mget*[A, B](t: var OrderedTable[A, B], key: A): var B = - ## retrieves the value at ``t[key]``. The value can be modified. - ## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. - var hc: THash - var index = rawGet(t, key, hc) - if index >= 0: result = t.data[index].val - else: raise newException(KeyError, "key not found: " & $key) +# ------------------------------ helpers --------------------------------- -proc hasKey*[A, B](t: OrderedTable[A, B], key: A): bool = - ## returns true iff `key` is in the table `t`. - var hc: THash - result = rawGet(t, key, hc) >= 0 +proc rawGetKnownHC[A, B](t: OrderedTable[A, B], key: A, hc: Hash): int = + rawGetKnownHCImpl() + +proc rawGetDeep[A, B](t: OrderedTable[A, B], key: A, hc: var Hash): int {.inline.} = + rawGetDeepImpl() + +proc rawGet[A, B](t: OrderedTable[A, B], key: A, hc: var Hash): int = + rawGetImpl() proc rawInsert[A, B](t: var OrderedTable[A, B], data: var OrderedKeyValuePairSeq[A, B], - key: A, val: B, hc: THash, h: THash) = + key: A, val: sink B, hc: Hash, h: Hash) = rawInsertImpl() data[h].next = -1 if t.first < 0: t.first = h @@ -550,56 +1314,349 @@ proc enlarge[A, B](t: var OrderedTable[A, B]) = swap(t.data, n) while h >= 0: var nxt = n[h].next - if isFilled(n[h].hcode): - var j = -1 - rawGetKnownHC(t, n[h].key, n[h].hcode) - rawInsert(t, t.data, n[h].key, n[h].val, n[h].hcode, j) + let eh = n[h].hcode + if isFilled(eh): + var j: Hash = eh and maxHash(t) + while isFilled(t.data[j].hcode): + j = nextTry(j, maxHash(t)) + rawInsert(t, t.data, move n[h].key, move n[h].val, n[h].hcode, j) h = nxt -proc `[]=`*[A, B](t: var OrderedTable[A, B], key: A, val: B) = - ## puts a (key, value)-pair into `t`. - putImpl() +template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} = + if t.counter > 0: + var h = t.first + while h >= 0: + var nxt = t.data[h].next + if isFilled(t.data[h].hcode): + yieldStmt + h = nxt + +# ---------------------------------------------------------------------- -proc add*[A, B](t: var OrderedTable[A, B], key: A, val: B) = - ## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists. - addImpl() +proc initOrderedTable*[A, B](initialSize = defaultInitialSize): OrderedTable[A, B] = + ## Creates a new ordered hash table that is empty. + ## + ## Starting from Nim v0.20, tables are initialized by default and it is + ## not necessary to call this function explicitly. + ## + ## See also: + ## * `toOrderedTable proc<#toOrderedTable,openArray[]>`_ + ## * `newOrderedTable proc<#newOrderedTable>`_ for creating an + ## `OrderedTableRef` + runnableExamples: + let + a = initOrderedTable[int, string]() + b = initOrderedTable[char, seq[int]]() + result = default(OrderedTable[A, B]) + initImpl(result, initialSize) + +proc `[]=`*[A, B](t: var OrderedTable[A, B], key: A, val: sink B) = + ## Inserts a `(key, value)` pair into `t`. + ## + ## See also: + ## * `[] proc<#[],OrderedTable[A,B],A>`_ for retrieving a value of a key + ## * `hasKeyOrPut proc<#hasKeyOrPut,OrderedTable[A,B],A,B>`_ + ## * `mgetOrPut proc<#mgetOrPut,OrderedTable[A,B],A,B>`_ + ## * `del proc<#del,OrderedTable[A,B],A>`_ for removing a key from the table + runnableExamples: + var a = initOrderedTable[char, int]() + a['x'] = 7 + a['y'] = 33 + doAssert a == {'x': 7, 'y': 33}.toOrderedTable + + putImpl(enlarge) + +proc toOrderedTable*[A, B](pairs: openArray[(A, B)]): OrderedTable[A, B] = + ## Creates a new ordered hash table that contains the given `pairs`. + ## + ## `pairs` is a container consisting of `(key, value)` tuples. + ## + ## See also: + ## * `initOrderedTable proc<#initOrderedTable>`_ + ## * `newOrderedTable proc<#newOrderedTable,openArray[]>`_ for an + ## `OrderedTableRef` version + runnableExamples: + let a = [('a', 5), ('b', 9)] + let b = toOrderedTable(a) + assert b == {'a': 5, 'b': 9}.toOrderedTable + + result = initOrderedTable[A, B](pairs.len) + for key, val in items(pairs): result[key] = val + +proc `[]`*[A, B](t: OrderedTable[A, B], key: A): lent B = + ## Retrieves the value at `t[key]`. + ## + ## If `key` is not in `t`, the `KeyError` exception is raised. + ## One can check with `hasKey proc<#hasKey,OrderedTable[A,B],A>`_ whether + ## the key exists. + ## + ## See also: + ## * `getOrDefault proc<#getOrDefault,OrderedTable[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,OrderedTable[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + ## * `[]= proc<#[]=,OrderedTable[A,B],A,sinkB>`_ for inserting a new + ## (key, value) pair in the table + ## * `hasKey proc<#hasKey,OrderedTable[A,B],A>`_ for checking if a + ## key is in the table + runnableExamples: + let a = {'a': 5, 'b': 9}.toOrderedTable + doAssert a['a'] == 5 + doAssertRaises(KeyError): + echo a['z'] + + get(t, key) + +proc `[]`*[A, B](t: var OrderedTable[A, B], key: A): var B = + ## Retrieves the value at `t[key]`. The value can be modified. + ## + ## If `key` is not in `t`, the `KeyError` exception is raised. + ## + ## See also: + ## * `getOrDefault proc<#getOrDefault,OrderedTable[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,OrderedTable[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + ## * `[]= proc<#[]=,OrderedTable[A,B],A,sinkB>`_ for inserting a new + ## (key, value) pair in the table + ## * `hasKey proc<#hasKey,OrderedTable[A,B],A>`_ for checking if a + ## key is in the table + get(t, key) + +proc hasKey*[A, B](t: OrderedTable[A, B], key: A): bool = + ## Returns true if `key` is in the table `t`. + ## + ## See also: + ## * `contains proc<#contains,OrderedTable[A,B],A>`_ for use with the `in` + ## operator + ## * `[] proc<#[],OrderedTable[A,B],A>`_ for retrieving a value of a key + ## * `getOrDefault proc<#getOrDefault,OrderedTable[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,OrderedTable[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + let a = {'a': 5, 'b': 9}.toOrderedTable + doAssert a.hasKey('a') == true + doAssert a.hasKey('z') == false + + var hc: Hash = default(Hash) + result = rawGet(t, key, hc) >= 0 + +proc contains*[A, B](t: OrderedTable[A, B], key: A): bool = + ## Alias of `hasKey proc<#hasKey,OrderedTable[A,B],A>`_ for use with + ## the `in` operator. + runnableExamples: + let a = {'a': 5, 'b': 9}.toOrderedTable + doAssert 'b' in a == true + doAssert a.contains('z') == false + + return hasKey[A, B](t, key) + +proc hasKeyOrPut*[A, B](t: var OrderedTable[A, B], key: A, val: B): bool = + ## Returns true if `key` is in the table, otherwise inserts `value`. + ## + ## See also: + ## * `hasKey proc<#hasKey,OrderedTable[A,B],A>`_ + ## * `[] proc<#[],OrderedTable[A,B],A>`_ for retrieving a value of a key + ## * `getOrDefault proc<#getOrDefault,OrderedTable[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,OrderedTable[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + var a = {'a': 5, 'b': 9}.toOrderedTable + if a.hasKeyOrPut('a', 50): + a['a'] = 99 + if a.hasKeyOrPut('z', 50): + a['z'] = 99 + doAssert a == {'a': 99, 'b': 9, 'z': 50}.toOrderedTable + + hasKeyOrPutImpl(enlarge) + +proc getOrDefault*[A, B](t: OrderedTable[A, B], key: A): B = + ## Retrieves the value at `t[key]` if `key` is in `t`. Otherwise, the + ## default initialization value for type `B` is returned (e.g. 0 for any + ## integer type). + ## + ## See also: + ## * `[] proc<#[],OrderedTable[A,B],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,OrderedTable[A,B],A>`_ + ## * `hasKeyOrPut proc<#hasKeyOrPut,OrderedTable[A,B],A,B>`_ + ## * `mgetOrPut proc<#mgetOrPut,OrderedTable[A,B],A,B>`_ + ## * `getOrDefault proc<#getOrDefault,OrderedTable[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + let a = {'a': 5, 'b': 9}.toOrderedTable + doAssert a.getOrDefault('a') == 5 + doAssert a.getOrDefault('z') == 0 + result = default(B) + getOrDefaultImpl(t, key) + +proc getOrDefault*[A, B](t: OrderedTable[A, B], key: A, default: B): B = + ## Retrieves the value at `t[key]` if `key` is in `t`. + ## Otherwise, `default` is returned. + ## + ## See also: + ## * `[] proc<#[],OrderedTable[A,B],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,OrderedTable[A,B],A>`_ + ## * `hasKeyOrPut proc<#hasKeyOrPut,OrderedTable[A,B],A,B>`_ + ## * `mgetOrPut proc<#mgetOrPut,OrderedTable[A,B],A,B>`_ + ## * `getOrDefault proc<#getOrDefault,OrderedTable[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + runnableExamples: + let a = {'a': 5, 'b': 9}.toOrderedTable + doAssert a.getOrDefault('a', 99) == 5 + doAssert a.getOrDefault('z', 99) == 99 + result = default(B) + getOrDefaultImpl(t, key, default) proc mgetOrPut*[A, B](t: var OrderedTable[A, B], key: A, val: B): var B = - ## retrieves value at ``t[key]`` or puts ``value`` if not present, either way + ## Retrieves value at `t[key]` or puts `val` if not present, either way ## returning a value which can be modified. - mgetOrPutImpl() + ## + ## See also: + ## * `[] proc<#[],OrderedTable[A,B],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,OrderedTable[A,B],A>`_ + ## * `hasKeyOrPut proc<#hasKeyOrPut,OrderedTable[A,B],A,B>`_ + ## * `getOrDefault proc<#getOrDefault,OrderedTable[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,OrderedTable[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + var a = {'a': 5, 'b': 9}.toOrderedTable + doAssert a.mgetOrPut('a', 99) == 5 + doAssert a.mgetOrPut('z', 99) == 99 + doAssert a == {'a': 5, 'b': 9, 'z': 99}.toOrderedTable + + mgetOrPutImpl(enlarge) + +proc mgetOrPut*[A, B](t: var OrderedTable[A, B], key: A): var B = + ## Retrieves the value at `t[key]` or puts the + ## default initialization value for type `B` (e.g. 0 for any + ## integer type). + runnableExamples: + var a = {'a': 5}.toOrderedTable + doAssert a.mgetOrPut('a') == 5 + a.mgetOrPut('z').inc + doAssert a == {'a': 5, 'z': 1}.toOrderedTable + + mgetOrPutImpl(enlarge) -proc hasKeyOrPut*[A, B](t: var OrderedTable[A, B], key: A, val: B): bool = - ## returns true iff `key` is in the table, otherwise inserts `value`. - hasKeyOrPutImpl() - -proc initOrderedTable*[A, B](initialSize=64): OrderedTable[A, B] = - ## creates a new ordered hash table that is empty. - ## - ## `initialSize` needs to be a power of two. If you need to accept runtime - ## values for this you could use the ``nextPowerOfTwo`` proc from the - ## `math <math.html>`_ module or the ``rightSize`` proc from this module. - assert isPowerOfTwo(initialSize) - result.counter = 0 - result.first = -1 - result.last = -1 - newSeq(result.data, initialSize) - -proc toOrderedTable*[A, B](pairs: openArray[(A, - B)]): OrderedTable[A, B] = - ## creates a new ordered hash table that contains the given `pairs`. - result = initOrderedTable[A, B](rightSize(pairs.len)) - for key, val in items(pairs): result[key] = val +proc len*[A, B](t: OrderedTable[A, B]): int {.inline.} = + ## Returns the number of keys in `t`. + runnableExamples: + let a = {'a': 5, 'b': 9}.toOrderedTable + doAssert len(a) == 2 -proc `$`*[A, B](t: OrderedTable[A, B]): string = - ## The `$` operator for ordered hash tables. - dollarImpl() + result = t.counter + +proc add*[A, B](t: var OrderedTable[A, B], key: A, val: sink B) {.deprecated: + "Deprecated since v1.4; it was more confusing than useful, use `[]=`".} = + ## Puts a new `(key, value)` pair into `t` even if `t[key]` already exists. + ## + ## **This can introduce duplicate keys into the table!** + ## + ## Use `[]= proc<#[]=,OrderedTable[A,B],A,sinkB>`_ for inserting a new + ## (key, value) pair in the table without introducing duplicates. + addImpl(enlarge) + +proc del*[A, B](t: var OrderedTable[A, B], key: A) = + ## Deletes `key` from hash table `t`. Does nothing if the key does not exist. + ## + ## O(n) complexity. + ## + ## See also: + ## * `pop proc<#pop,OrderedTable[A,B],A,B>`_ + ## * `clear proc<#clear,OrderedTable[A,B]>`_ to empty the whole table + runnableExamples: + var a = {'a': 5, 'b': 9, 'c': 13}.toOrderedTable + a.del('a') + doAssert a == {'b': 9, 'c': 13}.toOrderedTable + a.del('z') + doAssert a == {'b': 9, 'c': 13}.toOrderedTable + + if t.counter == 0: return + var n: OrderedKeyValuePairSeq[A, B] + newSeq(n, len(t.data)) + var h = t.first + t.first = -1 + t.last = -1 + swap(t.data, n) + let hc = genHash(key) + while h >= 0: + var nxt = n[h].next + if isFilled(n[h].hcode): + if n[h].hcode == hc and n[h].key == key: + dec t.counter + else: + var j = -1 - rawGetKnownHC(t, n[h].key, n[h].hcode) + rawInsert(t, t.data, move n[h].key, move n[h].val, n[h].hcode, j) + h = nxt + +proc pop*[A, B](t: var OrderedTable[A, B], key: A, val: var B): bool {.since: (1, 1).} = + ## Deletes the `key` from the table. + ## Returns `true`, if the `key` existed, and sets `val` to the + ## mapping of the key. Otherwise, returns `false`, and the `val` is + ## unchanged. + ## + ## O(n) complexity. + ## + ## See also: + ## * `del proc<#del,OrderedTable[A,B],A>`_ + ## * `clear proc<#clear,OrderedTable[A,B]>`_ to empty the whole table + runnableExamples: + var + a = {'c': 5, 'b': 9, 'a': 13}.toOrderedTable + i: int + doAssert a.pop('b', i) == true + doAssert a == {'c': 5, 'a': 13}.toOrderedTable + doAssert i == 9 + i = 0 + doAssert a.pop('z', i) == false + doAssert a == {'c': 5, 'a': 13}.toOrderedTable + doAssert i == 0 + + var hc: Hash + var index = rawGet(t, key, hc) + result = index >= 0 + if result: + val = move(t.data[index].val) + del(t, key) + +proc clear*[A, B](t: var OrderedTable[A, B]) = + ## Resets the table so that it is empty. + ## + ## See also: + ## * `del proc<#del,OrderedTable[A,B],A>`_ + ## * `pop proc<#pop,OrderedTable[A,B],A,B>`_ + runnableExamples: + var a = {'a': 5, 'b': 9, 'c': 13}.toOrderedTable + doAssert len(a) == 3 + clear(a) + doAssert len(a) == 0 + + clearImpl() + t.first = -1 + t.last = -1 -proc sort*[A, B](t: var OrderedTable[A, B], - cmp: proc (x,y: (A, B)): int) = - ## sorts `t` according to `cmp`. This modifies the internal list +proc sort*[A, B](t: var OrderedTable[A, B], cmp: proc (x, y: (A, B)): int, + order = SortOrder.Ascending) {.effectsOf: cmp.} = + ## Sorts `t` according to the function `cmp`. + ## + ## This modifies the internal list ## that kept the insertion order, so insertion order is lost after this ## call but key lookup and insertions remain possible after `sort` (in - ## contrast to the `sort` for count tables). + ## contrast to the `sort proc<#sort,CountTable[A]>`_ for count tables). + runnableExamples: + import std/[algorithm] + var a = initOrderedTable[char, int]() + for i, c in "cab": + a[c] = 10*i + doAssert a == {'c': 0, 'a': 10, 'b': 20}.toOrderedTable + a.sort(system.cmp) + doAssert a == {'a': 10, 'b': 20, 'c': 0}.toOrderedTable + a.sort(system.cmp, order = SortOrder.Descending) + doAssert a == {'c': 0, 'b': 20, 'a': 10}.toOrderedTable + var list = t.first var p, q, e, tail, oldhead: int @@ -626,7 +1683,7 @@ proc sort*[A, B](t: var OrderedTable[A, B], elif qsize == 0 or q < 0: e = p; p = t.data[p].next; dec(psize) elif cmp((t.data[p].key, t.data[p].val), - (t.data[q].key, t.data[q].val)) <= 0: + (t.data[q].key, t.data[q].val)) * order <= 0: e = p; p = t.data[p].next; dec(psize) else: e = q; q = t.data[q].next; dec(qsize) @@ -640,173 +1697,609 @@ proc sort*[A, B](t: var OrderedTable[A, B], t.first = list t.last = tail -proc len*[A, B](t: OrderedTableRef[A, B]): int {.inline.} = - ## returns the number of keys in `t`. - result = t.counter +proc `$`*[A, B](t: OrderedTable[A, B]): string = + ## The `$` operator for ordered hash tables. Used internally when calling + ## `echo` on a table. + dollarImpl() -template forAllOrderedPairs(yieldStmt: stmt) {.dirty, immediate.} = - var h = t.first - while h >= 0: - var nxt = t.data[h].next - if isFilled(t.data[h].hcode): yieldStmt - h = nxt +proc `==`*[A, B](s, t: OrderedTable[A, B]): bool = + ## The `==` operator for ordered hash tables. Returns `true` if both the + ## content and the order are equal. + runnableExamples: + let + a = {'a': 5, 'b': 9, 'c': 13}.toOrderedTable + b = {'b': 9, 'c': 13, 'a': 5}.toOrderedTable + doAssert a != b + + if s.counter != t.counter: + return false + if s.counter == 0 and t.counter == 0: + return true + var ht = t.first + var hs = s.first + while ht >= 0 and hs >= 0: + var nxtt = t.data[ht].next + var nxts = s.data[hs].next + if isFilled(t.data[ht].hcode) and isFilled(s.data[hs].hcode): + if (s.data[hs].key != t.data[ht].key) or (s.data[hs].val != t.data[ht].val): + return false + ht = nxtt + hs = nxts + return true -iterator pairs*[A, B](t: OrderedTableRef[A, B]): (A, B) = - ## iterates over any (key, value) pair in the table `t` in insertion + + +iterator pairs*[A, B](t: OrderedTable[A, B]): (A, B) = + ## Iterates over any `(key, value)` pair in the table `t` in insertion ## order. + ## + ## See also: + ## * `mpairs iterator<#mpairs.i,OrderedTable[A,B]>`_ + ## * `keys iterator<#keys.i,OrderedTable[A,B]>`_ + ## * `values iterator<#values.i,OrderedTable[A,B]>`_ + ## + ## **Examples:** + ## + ## ```Nim + ## let a = { + ## 'o': [1, 5, 7, 9], + ## 'e': [2, 4, 6, 8] + ## }.toOrderedTable + ## + ## for k, v in a.pairs: + ## echo "key: ", k + ## echo "value: ", v + ## + ## # key: o + ## # value: [1, 5, 7, 9] + ## # key: e + ## # value: [2, 4, 6, 8] + ## ``` + + let L = len(t) forAllOrderedPairs: yield (t.data[h].key, t.data[h].val) + assert(len(t) == L, "the length of the table changed while iterating over it") -iterator mpairs*[A, B](t: OrderedTableRef[A, B]): (A, var B) = - ## iterates over any (key, value) pair in the table `t` in insertion - ## order. The values can be modified. +iterator mpairs*[A, B](t: var OrderedTable[A, B]): (A, var B) = + ## Iterates over any `(key, value)` pair in the table `t` (must be + ## declared as `var`) in insertion order. The values can be modified. + ## + ## See also: + ## * `pairs iterator<#pairs.i,OrderedTable[A,B]>`_ + ## * `mvalues iterator<#mvalues.i,OrderedTable[A,B]>`_ + runnableExamples: + var a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.toOrderedTable + for k, v in a.mpairs: + v.add(v[0] + 10) + doAssert a == {'o': @[1, 5, 7, 9, 11], + 'e': @[2, 4, 6, 8, 12]}.toOrderedTable + + let L = len(t) forAllOrderedPairs: yield (t.data[h].key, t.data[h].val) + assert(len(t) == L, "the length of the table changed while iterating over it") -iterator keys*[A, B](t: OrderedTableRef[A, B]): A = - ## iterates over any key in the table `t` in insertion order. +iterator keys*[A, B](t: OrderedTable[A, B]): lent A = + ## Iterates over any key in the table `t` in insertion order. + ## + ## See also: + ## * `pairs iterator<#pairs.i,OrderedTable[A,B]>`_ + ## * `values iterator<#values.i,OrderedTable[A,B]>`_ + runnableExamples: + var a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.toOrderedTable + for k in a.keys: + a[k].add(99) + doAssert a == {'o': @[1, 5, 7, 9, 99], + 'e': @[2, 4, 6, 8, 99]}.toOrderedTable + + let L = len(t) forAllOrderedPairs: yield t.data[h].key + assert(len(t) == L, "the length of the table changed while iterating over it") -iterator values*[A, B](t: OrderedTableRef[A, B]): B = - ## iterates over any value in the table `t` in insertion order. +iterator values*[A, B](t: OrderedTable[A, B]): lent B = + ## Iterates over any value in the table `t` in insertion order. + ## + ## See also: + ## * `pairs iterator<#pairs.i,OrderedTable[A,B]>`_ + ## * `keys iterator<#keys.i,OrderedTable[A,B]>`_ + ## * `mvalues iterator<#mvalues.i,OrderedTable[A,B]>`_ + runnableExamples: + let a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.toOrderedTable + for v in a.values: + doAssert v.len == 4 + + let L = len(t) forAllOrderedPairs: yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") -iterator mvalues*[A, B](t: OrderedTableRef[A, B]): var B = - ## iterates over any value in the table `t` in insertion order. The values +iterator mvalues*[A, B](t: var OrderedTable[A, B]): var B = + ## Iterates over any value in the table `t` (must be + ## declared as `var`) in insertion order. The values ## can be modified. + ## + ## See also: + ## * `mpairs iterator<#mpairs.i,OrderedTable[A,B]>`_ + ## * `values iterator<#values.i,OrderedTable[A,B]>`_ + runnableExamples: + var a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.toOrderedTable + for v in a.mvalues: + v.add(99) + doAssert a == {'o': @[1, 5, 7, 9, 99], + 'e': @[2, 4, 6, 8, 99]}.toOrderedTable + + let L = len(t) forAllOrderedPairs: yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") + +# --------------------------------------------------------------------------- +# --------------------------- OrderedTableRef ------------------------------- +# --------------------------------------------------------------------------- + +proc newOrderedTable*[A, B](initialSize = defaultInitialSize): OrderedTableRef[A, B] = + ## Creates a new ordered ref hash table that is empty. + ## + ## See also: + ## * `newOrderedTable proc<#newOrderedTable,openArray[]>`_ for creating + ## an `OrderedTableRef` from a collection of `(key, value)` pairs + ## * `initOrderedTable proc<#initOrderedTable>`_ for creating an + ## `OrderedTable` + runnableExamples: + let + a = newOrderedTable[int, string]() + b = newOrderedTable[char, seq[int]]() + new(result) + {.noSideEffect.}: + result[] = initOrderedTable[A, B](initialSize) -proc `[]`*[A, B](t: OrderedTableRef[A, B], key: A): B = - ## retrieves the value at ``t[key]``. If `key` is not in `t`, - ## default empty value for the type `B` is returned - ## and no exception is raised. One can check with ``hasKey`` whether the key - ## exists. +proc newOrderedTable*[A, B](pairs: openArray[(A, B)]): OrderedTableRef[A, B] = + ## Creates a new ordered ref hash table that contains the given `pairs`. + ## + ## `pairs` is a container consisting of `(key, value)` tuples. + ## + ## See also: + ## * `newOrderedTable proc<#newOrderedTable>`_ + ## * `toOrderedTable proc<#toOrderedTable,openArray[]>`_ for an + ## `OrderedTable` version + runnableExamples: + let a = [('a', 5), ('b', 9)] + let b = newOrderedTable(a) + assert b == {'a': 5, 'b': 9}.newOrderedTable + + result = newOrderedTable[A, B](pairs.len) + {.noSideEffect.}: + for key, val in items(pairs): result[key] = val + + +proc `[]`*[A, B](t: OrderedTableRef[A, B], key: A): var B = + ## Retrieves the value at `t[key]`. + ## + ## If `key` is not in `t`, the `KeyError` exception is raised. + ## One can check with `hasKey proc<#hasKey,OrderedTableRef[A,B],A>`_ whether + ## the key exists. + ## + ## See also: + ## * `getOrDefault proc<#getOrDefault,OrderedTableRef[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,OrderedTableRef[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + ## * `[]= proc<#[]=,OrderedTableRef[A,B],A,sinkB>`_ for inserting a new + ## (key, value) pair in the table + ## * `hasKey proc<#hasKey,OrderedTableRef[A,B],A>`_ for checking if + ## a key is in the table + runnableExamples: + let a = {'a': 5, 'b': 9}.newOrderedTable + doAssert a['a'] == 5 + doAssertRaises(KeyError): + echo a['z'] result = t[][key] -proc mget*[A, B](t: OrderedTableRef[A, B], key: A): var B = - ## retrieves the value at ``t[key]``. The value can be modified. - ## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. - result = t[].mget(key) +proc `[]=`*[A, B](t: OrderedTableRef[A, B], key: A, val: sink B) = + ## Inserts a `(key, value)` pair into `t`. + ## + ## See also: + ## * `[] proc<#[],OrderedTableRef[A,B],A>`_ for retrieving a value of a key + ## * `hasKeyOrPut proc<#hasKeyOrPut,OrderedTableRef[A,B],A,B>`_ + ## * `mgetOrPut proc<#mgetOrPut,OrderedTableRef[A,B],A,B>`_ + ## * `del proc<#del,OrderedTableRef[A,B],A>`_ for removing a key from the table + runnableExamples: + var a = newOrderedTable[char, int]() + a['x'] = 7 + a['y'] = 33 + doAssert a == {'x': 7, 'y': 33}.newOrderedTable + + t[][key] = val + +proc hasKey*[A, B](t: OrderedTableRef[A, B], key: A): bool = + ## Returns true if `key` is in the table `t`. + ## + ## See also: + ## * `contains proc<#contains,OrderedTableRef[A,B],A>`_ for use with the `in` + ## operator + ## * `[] proc<#[],OrderedTableRef[A,B],A>`_ for retrieving a value of a key + ## * `getOrDefault proc<#getOrDefault,OrderedTableRef[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,OrderedTableRef[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + let a = {'a': 5, 'b': 9}.newOrderedTable + doAssert a.hasKey('a') == true + doAssert a.hasKey('z') == false + + result = t[].hasKey(key) + +proc contains*[A, B](t: OrderedTableRef[A, B], key: A): bool = + ## Alias of `hasKey proc<#hasKey,OrderedTableRef[A,B],A>`_ for use with + ## the `in` operator. + runnableExamples: + let a = {'a': 5, 'b': 9}.newOrderedTable + doAssert 'b' in a == true + doAssert a.contains('z') == false + + return hasKey[A, B](t, key) + +proc hasKeyOrPut*[A, B](t: OrderedTableRef[A, B], key: A, val: B): bool = + ## Returns true if `key` is in the table, otherwise inserts `value`. + ## + ## See also: + ## * `hasKey proc<#hasKey,OrderedTableRef[A,B],A>`_ + ## * `[] proc<#[],OrderedTableRef[A,B],A>`_ for retrieving a value of a key + ## * `getOrDefault proc<#getOrDefault,OrderedTableRef[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,OrderedTableRef[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + var a = {'a': 5, 'b': 9}.newOrderedTable + if a.hasKeyOrPut('a', 50): + a['a'] = 99 + if a.hasKeyOrPut('z', 50): + a['z'] = 99 + doAssert a == {'a': 99, 'b': 9, 'z': 50}.newOrderedTable + + result = t[].hasKeyOrPut(key, val) + +proc getOrDefault*[A, B](t: OrderedTableRef[A, B], key: A): B = + ## Retrieves the value at `t[key]` if `key` is in `t`. Otherwise, the + ## default initialization value for type `B` is returned (e.g. 0 for any + ## integer type). + ## + ## See also: + ## * `[] proc<#[],OrderedTableRef[A,B],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,OrderedTableRef[A,B],A>`_ + ## * `hasKeyOrPut proc<#hasKeyOrPut,OrderedTableRef[A,B],A,B>`_ + ## * `mgetOrPut proc<#mgetOrPut,OrderedTableRef[A,B],A,B>`_ + ## * `getOrDefault proc<#getOrDefault,OrderedTableRef[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + let a = {'a': 5, 'b': 9}.newOrderedTable + doAssert a.getOrDefault('a') == 5 + doAssert a.getOrDefault('z') == 0 + + getOrDefault(t[], key) + +proc getOrDefault*[A, B](t: OrderedTableRef[A, B], key: A, default: B): B = + ## Retrieves the value at `t[key]` if `key` is in `t`. + ## Otherwise, `default` is returned. + ## + ## See also: + ## * `[] proc<#[],OrderedTableRef[A,B],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,OrderedTableRef[A,B],A>`_ + ## * `hasKeyOrPut proc<#hasKeyOrPut,OrderedTableRef[A,B],A,B>`_ + ## * `mgetOrPut proc<#mgetOrPut,OrderedTableRef[A,B],A,B>`_ + ## * `getOrDefault proc<#getOrDefault,OrderedTableRef[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + runnableExamples: + let a = {'a': 5, 'b': 9}.newOrderedTable + doAssert a.getOrDefault('a', 99) == 5 + doAssert a.getOrDefault('z', 99) == 99 + + getOrDefault(t[], key, default) proc mgetOrPut*[A, B](t: OrderedTableRef[A, B], key: A, val: B): var B = - ## retrieves value at ``t[key]`` or puts ``val`` if not present, either way + ## Retrieves value at `t[key]` or puts `val` if not present, either way ## returning a value which can be modified. + ## + ## See also: + ## * `[] proc<#[],OrderedTableRef[A,B],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,OrderedTableRef[A,B],A>`_ + ## * `hasKeyOrPut proc<#hasKeyOrPut,OrderedTableRef[A,B],A,B>`_ + ## * `getOrDefault proc<#getOrDefault,OrderedTableRef[A,B],A>`_ to return + ## a default value (e.g. zero for int) if the key doesn't exist + ## * `getOrDefault proc<#getOrDefault,OrderedTableRef[A,B],A,B>`_ to return + ## a custom value if the key doesn't exist + runnableExamples: + var a = {'a': 5, 'b': 9}.newOrderedTable + doAssert a.mgetOrPut('a', 99) == 5 + doAssert a.mgetOrPut('z', 99) == 99 + doAssert a == {'a': 5, 'b': 9, 'z': 99}.newOrderedTable + result = t[].mgetOrPut(key, val) -proc hasKeyOrPut*[A, B](t: var OrderedTableRef[A, B], key: A, val: B): bool = - ## returns true iff `key` is in the table, otherwise inserts `val`. - result = t[].hasKeyOrPut(key, val) +proc mgetOrPut*[A, B](t: OrderedTableRef[A, B], key: A): var B = + ## Retrieves the value at `t[key]` or puts the + ## default initialization value for type `B` (e.g. 0 for any + ## integer type). + runnableExamples: + var a = {'a': 5}.toOrderedTable + doAssert a.mgetOrPut('a') == 5 + a.mgetOrPut('z').inc + doAssert a == {'a': 5, 'z': 1}.toOrderedTable -proc hasKey*[A, B](t: OrderedTableRef[A, B], key: A): bool = - ## returns true iff `key` is in the table `t`. - result = t[].hasKey(key) + t[].mgetOrPut(key) -proc `[]=`*[A, B](t: OrderedTableRef[A, B], key: A, val: B) = - ## puts a (key, value)-pair into `t`. - t[][key] = val +proc len*[A, B](t: OrderedTableRef[A, B]): int {.inline.} = + ## Returns the number of keys in `t`. + runnableExamples: + let a = {'a': 5, 'b': 9}.newOrderedTable + doAssert len(a) == 2 -proc add*[A, B](t: OrderedTableRef[A, B], key: A, val: B) = - ## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists. + result = t.counter + +proc add*[A, B](t: OrderedTableRef[A, B], key: A, val: sink B) {.deprecated: + "Deprecated since v1.4; it was more confusing than useful, use `[]=`".} = + ## Puts a new `(key, value)` pair into `t` even if `t[key]` already exists. + ## + ## **This can introduce duplicate keys into the table!** + ## + ## Use `[]= proc<#[]=,OrderedTableRef[A,B],A,sinkB>`_ for inserting a new + ## (key, value) pair in the table without introducing duplicates. t[].add(key, val) -proc newOrderedTable*[A, B](initialSize=64): OrderedTableRef[A, B] = - ## creates a new ordered hash table that is empty. +proc del*[A, B](t: OrderedTableRef[A, B], key: A) = + ## Deletes `key` from hash table `t`. Does nothing if the key does not exist. ## - ## `initialSize` needs to be a power of two. If you need to accept runtime - ## values for this you could use the ``nextPowerOfTwo`` proc from the - ## `math <math.html>`_ module or the ``rightSize`` proc from this module. - new(result) - result[] = initOrderedTable[A, B]() + ## See also: + ## * `clear proc<#clear,OrderedTableRef[A,B]>`_ to empty the whole table + runnableExamples: + var a = {'a': 5, 'b': 9, 'c': 13}.newOrderedTable + a.del('a') + doAssert a == {'b': 9, 'c': 13}.newOrderedTable + a.del('z') + doAssert a == {'b': 9, 'c': 13}.newOrderedTable -proc newOrderedTable*[A, B](pairs: openArray[(A, B)]): OrderedTableRef[A, B] = - ## creates a new ordered hash table that contains the given `pairs`. - result = newOrderedTable[A, B](rightSize(pairs.len)) - for key, val in items(pairs): result[key] = val + t[].del(key) + +proc pop*[A, B](t: OrderedTableRef[A, B], key: A, val: var B): bool {.since: (1, 1).} = + ## Deletes the `key` from the table. + ## Returns `true`, if the `key` existed, and sets `val` to the + ## mapping of the key. Otherwise, returns `false`, and the `val` is + ## unchanged. + ## + ## See also: + ## * `del proc<#del,OrderedTableRef[A,B],A>`_ + ## * `clear proc<#clear,OrderedTableRef[A,B]>`_ to empty the whole table + runnableExamples: + var + a = {'c': 5, 'b': 9, 'a': 13}.newOrderedTable + i: int + doAssert a.pop('b', i) == true + doAssert a == {'c': 5, 'a': 13}.newOrderedTable + doAssert i == 9 + i = 0 + doAssert a.pop('z', i) == false + doAssert a == {'c': 5, 'a': 13}.newOrderedTable + doAssert i == 0 + + pop(t[], key, val) + +proc clear*[A, B](t: OrderedTableRef[A, B]) = + ## Resets the table so that it is empty. + ## + ## See also: + ## * `del proc<#del,OrderedTableRef[A,B],A>`_ + runnableExamples: + var a = {'a': 5, 'b': 9, 'c': 13}.newOrderedTable + doAssert len(a) == 3 + clear(a) + doAssert len(a) == 0 + + clear(t[]) + +proc sort*[A, B](t: OrderedTableRef[A, B], cmp: proc (x, y: (A, B)): int, + order = SortOrder.Ascending) {.effectsOf: cmp.} = + ## Sorts `t` according to the function `cmp`. + ## + ## This modifies the internal list + ## that kept the insertion order, so insertion order is lost after this + ## call but key lookup and insertions remain possible after `sort` (in + ## contrast to the `sort proc<#sort,CountTableRef[A]>`_ for count tables). + runnableExamples: + import std/[algorithm] + var a = newOrderedTable[char, int]() + for i, c in "cab": + a[c] = 10*i + doAssert a == {'c': 0, 'a': 10, 'b': 20}.newOrderedTable + a.sort(system.cmp) + doAssert a == {'a': 10, 'b': 20, 'c': 0}.newOrderedTable + a.sort(system.cmp, order = SortOrder.Descending) + doAssert a == {'c': 0, 'b': 20, 'a': 10}.newOrderedTable + + t[].sort(cmp, order = order) proc `$`*[A, B](t: OrderedTableRef[A, B]): string = - ## The `$` operator for ordered hash tables. + ## The `$` operator for hash tables. Used internally when calling `echo` + ## on a table. dollarImpl() -proc sort*[A, B](t: OrderedTableRef[A, B], - cmp: proc (x,y: (A, B)): int) = - ## sorts `t` according to `cmp`. This modifies the internal list - ## that kept the insertion order, so insertion order is lost after this - ## call but key lookup and insertions remain possible after `sort` (in - ## contrast to the `sort` for count tables). - t[].sort(cmp) +proc `==`*[A, B](s, t: OrderedTableRef[A, B]): bool = + ## The `==` operator for ordered hash tables. Returns true if either both + ## tables are `nil`, or neither is `nil` and the content and the order of + ## both are equal. + runnableExamples: + let + a = {'a': 5, 'b': 9, 'c': 13}.newOrderedTable + b = {'b': 9, 'c': 13, 'a': 5}.newOrderedTable + doAssert a != b -# ------------------------------ count tables ------------------------------- + if isNil(s): result = isNil(t) + elif isNil(t): result = false + else: result = s[] == t[] -type - CountTable* {.myShallow.}[ - A] = object ## table that counts the number of each key - data: seq[tuple[key: A, val: int]] - counter: int - CountTableRef*[A] = ref CountTable[A] -{.deprecated: [TCountTable: CountTable, PCountTable: CountTableRef].} -proc len*[A](t: CountTable[A]): int = - ## returns the number of keys in `t`. - result = t.counter +iterator pairs*[A, B](t: OrderedTableRef[A, B]): (A, B) = + ## Iterates over any `(key, value)` pair in the table `t` in insertion + ## order. + ## + ## See also: + ## * `mpairs iterator<#mpairs.i,OrderedTableRef[A,B]>`_ + ## * `keys iterator<#keys.i,OrderedTableRef[A,B]>`_ + ## * `values iterator<#values.i,OrderedTableRef[A,B]>`_ + ## + ## **Examples:** + ## + ## ```Nim + ## let a = { + ## 'o': [1, 5, 7, 9], + ## 'e': [2, 4, 6, 8] + ## }.newOrderedTable + ## + ## for k, v in a.pairs: + ## echo "key: ", k + ## echo "value: ", v + ## + ## # key: o + ## # value: [1, 5, 7, 9] + ## # key: e + ## # value: [2, 4, 6, 8] + ## ``` -iterator pairs*[A](t: CountTable[A]): (A, int) = - ## iterates over any (key, value) pair in the table `t`. - for h in 0..high(t.data): - if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val) + let L = len(t) + forAllOrderedPairs: + yield (t.data[h].key, t.data[h].val) + assert(len(t) == L, "the length of the table changed while iterating over it") -iterator mpairs*[A](t: var CountTable[A]): (A, var int) = - ## iterates over any (key, value) pair in the table `t`. The values can - ## be modified. - for h in 0..high(t.data): - if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val) +iterator mpairs*[A, B](t: OrderedTableRef[A, B]): (A, var B) = + ## Iterates over any `(key, value)` pair in the table `t` in insertion + ## order. The values can be modified. + ## + ## See also: + ## * `pairs iterator<#pairs.i,OrderedTableRef[A,B]>`_ + ## * `mvalues iterator<#mvalues.i,OrderedTableRef[A,B]>`_ + runnableExamples: + let a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.newOrderedTable + for k, v in a.mpairs: + v.add(v[0] + 10) + doAssert a == {'o': @[1, 5, 7, 9, 11], + 'e': @[2, 4, 6, 8, 12]}.newOrderedTable + + let L = len(t) + forAllOrderedPairs: + yield (t.data[h].key, t.data[h].val) + assert(len(t) == L, "the length of the table changed while iterating over it") -iterator keys*[A](t: CountTable[A]): A = - ## iterates over any key in the table `t`. - for h in 0..high(t.data): - if t.data[h].val != 0: yield t.data[h].key +iterator keys*[A, B](t: OrderedTableRef[A, B]): lent A = + ## Iterates over any key in the table `t` in insertion order. + ## + ## See also: + ## * `pairs iterator<#pairs.i,OrderedTableRef[A,B]>`_ + ## * `values iterator<#values.i,OrderedTableRef[A,B]>`_ + runnableExamples: + let a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.newOrderedTable + for k in a.keys: + a[k].add(99) + doAssert a == {'o': @[1, 5, 7, 9, 99], 'e': @[2, 4, 6, 8, + 99]}.newOrderedTable + + let L = len(t) + forAllOrderedPairs: + yield t.data[h].key + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator values*[A, B](t: OrderedTableRef[A, B]): lent B = + ## Iterates over any value in the table `t` in insertion order. + ## + ## See also: + ## * `pairs iterator<#pairs.i,OrderedTableRef[A,B]>`_ + ## * `keys iterator<#keys.i,OrderedTableRef[A,B]>`_ + ## * `mvalues iterator<#mvalues.i,OrderedTableRef[A,B]>`_ + runnableExamples: + let a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.newOrderedTable + for v in a.values: + doAssert v.len == 4 + + let L = len(t) + forAllOrderedPairs: + yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator mvalues*[A, B](t: OrderedTableRef[A, B]): var B = + ## Iterates over any value in the table `t` in insertion order. The values + ## can be modified. + ## + ## See also: + ## * `mpairs iterator<#mpairs.i,OrderedTableRef[A,B]>`_ + ## * `values iterator<#values.i,OrderedTableRef[A,B]>`_ + runnableExamples: + let a = { + 'o': @[1, 5, 7, 9], + 'e': @[2, 4, 6, 8] + }.newOrderedTable + for v in a.mvalues: + v.add(99) + doAssert a == {'o': @[1, 5, 7, 9, 99], + 'e': @[2, 4, 6, 8, 99]}.newOrderedTable + + let L = len(t) + forAllOrderedPairs: + yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") -iterator values*[A](t: CountTable[A]): int = - ## iterates over any value in the table `t`. - for h in 0..high(t.data): - if t.data[h].val != 0: yield t.data[h].val -iterator mvalues*[A](t: CountTable[A]): var int = - ## iterates over any value in the table `t`. The values can be modified. - for h in 0..high(t.data): - if t.data[h].val != 0: yield t.data[h].val -proc rawGet[A](t: CountTable[A], key: A): int = - var h: THash = hash(key) and high(t.data) # start with real hash value - while t.data[h].val != 0: - if t.data[h].key == key: return h - h = nextTry(h, high(t.data)) - result = -1 - h # < 0 => MISSING; insert idx = -1 - result -proc `[]`*[A](t: CountTable[A], key: A): int = - ## retrieves the value at ``t[key]``. If `key` is not in `t`, - ## 0 is returned. One can check with ``hasKey`` whether the key - ## exists. - var index = rawGet(t, key) - if index >= 0: result = t.data[index].val -proc mget*[A](t: var CountTable[A], key: A): var int = - ## retrieves the value at ``t[key]``. The value can be modified. - ## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. - var index = rawGet(t, key) - if index >= 0: result = t.data[index].val - else: raise newException(KeyError, "key not found: " & $key) -proc hasKey*[A](t: CountTable[A], key: A): bool = - ## returns true iff `key` is in the table `t`. - result = rawGet(t, key) >= 0 -proc rawInsert[A](t: CountTable[A], data: var seq[tuple[key: A, val: int]], +# ------------------------------------------------------------------------- +# ------------------------------ CountTable ------------------------------- +# ------------------------------------------------------------------------- + +type + CountTable*[A] = object + ## Hash table that counts the number of each key. + ## + ## For creating an empty CountTable, use `initCountTable proc + ## <#initCountTable>`_. + data: seq[tuple[key: A, val: int]] + counter: int + isSorted: bool + CountTableRef*[A] = ref CountTable[A] ## Ref version of + ## `CountTable<#CountTable>`_. + ## + ## For creating a new empty CountTableRef, use `newCountTable proc + ## <#newCountTable>`_. + + +# ------------------------------ helpers --------------------------------- + +proc ctRawInsert[A](t: CountTable[A], data: var seq[tuple[key: A, val: int]], key: A, val: int) = - var h: THash = hash(key) and high(data) + var h: Hash = hash(key) and high(data) while data[h].val != 0: h = nextTry(h, high(data)) data[h].key = key data[h].val = val @@ -815,200 +2308,665 @@ proc enlarge[A](t: var CountTable[A]) = var n: seq[tuple[key: A, val: int]] newSeq(n, len(t.data) * growthFactor) for i in countup(0, high(t.data)): - if t.data[i].val != 0: rawInsert(t, n, t.data[i].key, t.data[i].val) + if t.data[i].val != 0: ctRawInsert(t, n, move t.data[i].key, move t.data[i].val) swap(t.data, n) -proc `[]=`*[A](t: var CountTable[A], key: A, val: int) = - ## puts a (key, value)-pair into `t`. `val` has to be positive. - assert val > 0 - var h = rawGet(t, key) - if h >= 0: - t.data[h].val = val - else: - h = -1 - h - t.data[h].key = key - t.data[h].val = val +proc rawGet[A](t: CountTable[A], key: A): int = + if t.data.len == 0: + return -1 + var h: Hash = hash(key) and high(t.data) # start with real hash value + while t.data[h].val != 0: + if t.data[h].key == key: return h + h = nextTry(h, high(t.data)) + result = -1 - h # < 0 => MISSING; insert idx = -1 - result + +template ctget(t, key, default: untyped): untyped = + var index = rawGet(t, key) + result = if index >= 0: t.data[index].val else: default + +proc inc*[A](t: var CountTable[A], key: A, val = 1) + +# ---------------------------------------------------------------------- -proc initCountTable*[A](initialSize=64): CountTable[A] = - ## creates a new count table that is empty. +proc initCountTable*[A](initialSize = defaultInitialSize): CountTable[A] = + ## Creates a new count table that is empty. ## - ## `initialSize` needs to be a power of two. If you need to accept runtime - ## values for this you could use the ``nextPowerOfTwo`` proc from the - ## `math <math.html>`_ module or the ``rightSize`` proc in this module. - assert isPowerOfTwo(initialSize) - result.counter = 0 - newSeq(result.data, initialSize) + ## Starting from Nim v0.20, tables are initialized by default and it is + ## not necessary to call this function explicitly. + ## + ## See also: + ## * `toCountTable proc<#toCountTable,openArray[A]>`_ + ## * `newCountTable proc<#newCountTable>`_ for creating a + ## `CountTableRef` + result = default(CountTable[A]) + initImpl(result, initialSize) proc toCountTable*[A](keys: openArray[A]): CountTable[A] = - ## creates a new count table with every key in `keys` having a count of 1. - result = initCountTable[A](rightSize(keys.len)) - for key in items(keys): result[key] = 1 + ## Creates a new count table with every member of a container `keys` + ## having a count of how many times it occurs in that container. + result = initCountTable[A](keys.len) + for key in items(keys): result.inc(key) -proc `$`*[A](t: CountTable[A]): string = - ## The `$` operator for count tables. - dollarImpl() +proc `[]`*[A](t: CountTable[A], key: A): int = + ## Retrieves the value at `t[key]` if `key` is in `t`. + ## Otherwise `0` is returned. + ## + ## See also: + ## * `getOrDefault<#getOrDefault,CountTable[A],A,int>`_ to return + ## a custom value if the key doesn't exist + ## * `[]= proc<#[]%3D,CountTable[A],A,int>`_ for inserting a new + ## (key, value) pair in the table + ## * `hasKey proc<#hasKey,CountTable[A],A>`_ for checking if a key + ## is in the table + assert(not t.isSorted, "CountTable must not be used after sorting") + ctget(t, key, 0) + +template cntMakeEmpty(i) = t.data[i].val = 0 +template cntCellEmpty(i) = t.data[i].val == 0 +template cntCellHash(i) = hash(t.data[i].key) + +proc `[]=`*[A](t: var CountTable[A], key: A, val: int) = + ## Inserts a `(key, value)` pair into `t`. + ## + ## See also: + ## * `[] proc<#[],CountTable[A],A>`_ for retrieving a value of a key + ## * `inc proc<#inc,CountTable[A],A,int>`_ for incrementing a + ## value of a key + assert(not t.isSorted, "CountTable must not be used after sorting") + assert val >= 0 + if val == 0: + delImplNoHCode(cntMakeEmpty, cntCellEmpty, cntCellHash) + else: + let h = rawGet(t, key) + if h >= 0: + t.data[h].val = val + else: + insertImpl() proc inc*[A](t: var CountTable[A], key: A, val = 1) = - ## increments `t[key]` by `val`. + ## Increments `t[key]` by `val` (default: 1). + runnableExamples: + var a = toCountTable("aab") + a.inc('a') + a.inc('b', 10) + doAssert a == toCountTable("aaabbbbbbbbbbb") + + assert(not t.isSorted, "CountTable must not be used after sorting") var index = rawGet(t, key) if index >= 0: inc(t.data[index].val, val) + if t.data[index].val == 0: + delImplIdx(t, index, cntMakeEmpty, cntCellEmpty, cntCellHash) else: - if mustRehash(len(t.data), t.counter): enlarge(t) - rawInsert(t, t.data, key, val) - inc(t.counter) + if val != 0: + insertImpl() + +proc len*[A](t: CountTable[A]): int = + ## Returns the number of keys in `t`. + result = t.counter proc smallest*[A](t: CountTable[A]): tuple[key: A, val: int] = - ## returns the largest (key,val)-pair. Efficiency: O(n) - assert t.len > 0 - var minIdx = 0 - for h in 1..high(t.data): - if t.data[h].val > 0 and t.data[minIdx].val > t.data[h].val: minIdx = h + ## Returns the `(key, value)` pair with the smallest `val`. Efficiency: O(n) + ## + ## See also: + ## * `largest proc<#largest,CountTable[A]>`_ + assert t.len > 0, "counttable is empty" + var minIdx = -1 + for h in 0 .. high(t.data): + if t.data[h].val > 0 and (minIdx == -1 or t.data[minIdx].val > t.data[h].val): + minIdx = h result.key = t.data[minIdx].key result.val = t.data[minIdx].val proc largest*[A](t: CountTable[A]): tuple[key: A, val: int] = - ## returns the (key,val)-pair with the largest `val`. Efficiency: O(n) - assert t.len > 0 + ## Returns the `(key, value)` pair with the largest `val`. Efficiency: O(n) + ## + ## See also: + ## * `smallest proc<#smallest,CountTable[A]>`_ + assert t.len > 0, "counttable is empty" var maxIdx = 0 - for h in 1..high(t.data): + for h in 1 .. high(t.data): if t.data[maxIdx].val < t.data[h].val: maxIdx = h result.key = t.data[maxIdx].key result.val = t.data[maxIdx].val -proc sort*[A](t: var CountTable[A]) = - ## sorts the count table so that the entry with the highest counter comes - ## first. This is destructive! You must not modify `t` afterwards! - ## You can use the iterators `pairs`, `keys`, and `values` to iterate over - ## `t` in the sorted order. +proc hasKey*[A](t: CountTable[A], key: A): bool = + ## Returns true if `key` is in the table `t`. + ## + ## See also: + ## * `contains proc<#contains,CountTable[A],A>`_ for use with the `in` + ## operator + ## * `[] proc<#[],CountTable[A],A>`_ for retrieving a value of a key + ## * `getOrDefault proc<#getOrDefault,CountTable[A],A,int>`_ to return + ## a custom value if the key doesn't exist + assert(not t.isSorted, "CountTable must not be used after sorting") + result = rawGet(t, key) >= 0 - # we use shellsort here; fast enough and simple - var h = 1 - while true: - h = 3 * h + 1 - if h >= high(t.data): break - while true: - h = h div 3 - for i in countup(h, high(t.data)): - var j = i - while t.data[j-h].val <= t.data[j].val: - swap(t.data[j], t.data[j-h]) - j = j-h - if j < h: break - if h == 1: break +proc contains*[A](t: CountTable[A], key: A): bool = + ## Alias of `hasKey proc<#hasKey,CountTable[A],A>`_ for use with + ## the `in` operator. + return hasKey[A](t, key) -proc len*[A](t: CountTableRef[A]): int = - ## returns the number of keys in `t`. - result = t.counter +proc getOrDefault*[A](t: CountTable[A], key: A; default: int = 0): int = + ## Retrieves the value at `t[key]` if `key` is in `t`. Otherwise, the + ## integer value of `default` is returned. + ## + ## See also: + ## * `[] proc<#[],CountTable[A],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,CountTable[A],A>`_ for checking if a key + ## is in the table + ctget(t, key, default) + +proc del*[A](t: var CountTable[A], key: A) {.since: (1, 1).} = + ## Deletes `key` from table `t`. Does nothing if the key does not exist. + ## + ## See also: + ## * `pop proc<#pop,CountTable[A],A,int>`_ + ## * `clear proc<#clear,CountTable[A]>`_ to empty the whole table + runnableExamples: + var a = toCountTable("aabbbccccc") + a.del('b') + assert a == toCountTable("aaccccc") + a.del('b') + assert a == toCountTable("aaccccc") + a.del('c') + assert a == toCountTable("aa") + + delImplNoHCode(cntMakeEmpty, cntCellEmpty, cntCellHash) + +proc pop*[A](t: var CountTable[A], key: A, val: var int): bool {.since: (1, 1).} = + ## Deletes the `key` from the table. + ## Returns `true`, if the `key` existed, and sets `val` to the + ## mapping of the key. Otherwise, returns `false`, and the `val` is + ## unchanged. + ## + ## See also: + ## * `del proc<#del,CountTable[A],A>`_ + ## * `clear proc<#clear,CountTable[A]>`_ to empty the whole table + runnableExamples: + var a = toCountTable("aabbbccccc") + var i = 0 + assert a.pop('b', i) + assert i == 3 + i = 99 + assert not a.pop('b', i) + assert i == 99 -iterator pairs*[A](t: CountTableRef[A]): (A, int) = - ## iterates over any (key, value) pair in the table `t`. - for h in 0..high(t.data): - if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val) + var index = rawGet(t, key) + result = index >= 0 + if result: + val = move(t.data[index].val) + delImplIdx(t, index, cntMakeEmpty, cntCellEmpty, cntCellHash) -iterator mpairs*[A](t: CountTableRef[A]): (A, var int) = - ## iterates over any (key, value) pair in the table `t`. The values can - ## be modified. - for h in 0..high(t.data): - if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val) +proc clear*[A](t: var CountTable[A]) = + ## Resets the table so that it is empty. + ## + ## See also: + ## * `del proc<#del,CountTable[A],A>`_ + ## * `pop proc<#pop,CountTable[A],A,int>`_ + clearImpl() + t.isSorted = false + +func ctCmp[T](a, b: tuple[key: T, val: int]): int = + result = system.cmp(a.val, b.val) + +proc sort*[A](t: var CountTable[A], order = SortOrder.Descending) = + ## Sorts the count table so that, by default, the entry with the + ## highest counter comes first. + ## + ## .. warning:: This is destructive! Once sorted, you must not modify `t` afterwards! + ## + ## You can use the iterators `pairs<#pairs.i,CountTable[A]>`_, + ## `keys<#keys.i,CountTable[A]>`_, and `values<#values.i,CountTable[A]>`_ + ## to iterate over `t` in the sorted order. + runnableExamples: + import std/[algorithm, sequtils] + var a = toCountTable("abracadabra") + doAssert a == "aaaaabbrrcd".toCountTable + a.sort() + doAssert toSeq(a.values) == @[5, 2, 2, 1, 1] + a.sort(SortOrder.Ascending) + doAssert toSeq(a.values) == @[1, 1, 2, 2, 5] + + t.data.sort(cmp = ctCmp, order = order) + t.isSorted = true + +proc merge*[A](s: var CountTable[A], t: CountTable[A]) = + ## Merges the second table into the first one (must be declared as `var`). + runnableExamples: + var a = toCountTable("aaabbc") + let b = toCountTable("bcc") + a.merge(b) + doAssert a == toCountTable("aaabbbccc") + + assert(not s.isSorted, "CountTable must not be used after sorting") + for key, value in t: + s.inc(key, value) + +when (NimMajor, NimMinor) <= (1, 0): + proc merge*[A](s, t: CountTable[A]): CountTable[A] = + ## Merges the two tables into a new one. + runnableExamples: + let + a = toCountTable("aaabbc") + b = toCountTable("bcc") + doAssert merge(a, b) == toCountTable("aaabbbccc") + + result = initCountTable[A](nextPowerOfTwo(max(s.len, t.len))) + for table in @[s, t]: + for key, value in table: + result.inc(key, value) -iterator keys*[A](t: CountTableRef[A]): A = - ## iterates over any key in the table `t`. - for h in 0..high(t.data): - if t.data[h].val != 0: yield t.data[h].key +proc `$`*[A](t: CountTable[A]): string = + ## The `$` operator for count tables. Used internally when calling `echo` + ## on a table. + dollarImpl() -iterator values*[A](t: CountTableRef[A]): int = - ## iterates over any value in the table `t`. - for h in 0..high(t.data): - if t.data[h].val != 0: yield t.data[h].val +proc `==`*[A](s, t: CountTable[A]): bool = + ## The `==` operator for count tables. Returns `true` if both tables + ## contain the same keys with the same count. Insert order does not matter. + equalsImpl(s, t) -iterator mvalues*[A](t: CountTableRef[A]): var int = - ## iterates over any value in the table `t`. The values can be modified. - for h in 0..high(t.data): - if t.data[h].val != 0: yield t.data[h].val + +iterator pairs*[A](t: CountTable[A]): (A, int) = + ## Iterates over any `(key, value)` pair in the table `t`. + ## + ## See also: + ## * `mpairs iterator<#mpairs.i,CountTable[A]>`_ + ## * `keys iterator<#keys.i,CountTable[A]>`_ + ## * `values iterator<#values.i,CountTable[A]>`_ + ## + ## **Examples:** + ## + ## ```Nim + ## let a = toCountTable("abracadabra") + ## + ## for k, v in pairs(a): + ## echo "key: ", k + ## echo "value: ", v + ## + ## # key: a + ## # value: 5 + ## # key: b + ## # value: 2 + ## # key: c + ## # value: 1 + ## # key: d + ## # value: 1 + ## # key: r + ## # value: 2 + ## ``` + let L = len(t) + for h in 0 .. high(t.data): + if t.data[h].val != 0: + yield (t.data[h].key, t.data[h].val) + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator mpairs*[A](t: var CountTable[A]): (A, var int) = + ## Iterates over any `(key, value)` pair in the table `t` (must be + ## declared as `var`). The values can be modified. + ## + ## See also: + ## * `pairs iterator<#pairs.i,CountTable[A]>`_ + ## * `mvalues iterator<#mvalues.i,CountTable[A]>`_ + runnableExamples: + var a = toCountTable("abracadabra") + for k, v in mpairs(a): + v = 2 + doAssert a == toCountTable("aabbccddrr") + + let L = len(t) + for h in 0 .. high(t.data): + if t.data[h].val != 0: + yield (t.data[h].key, t.data[h].val) + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator keys*[A](t: CountTable[A]): lent A = + ## Iterates over any key in the table `t`. + ## + ## See also: + ## * `pairs iterator<#pairs.i,CountTable[A]>`_ + ## * `values iterator<#values.i,CountTable[A]>`_ + runnableExamples: + var a = toCountTable("abracadabra") + for k in keys(a): + a[k] = 2 + doAssert a == toCountTable("aabbccddrr") + + let L = len(t) + for h in 0 .. high(t.data): + if t.data[h].val != 0: + yield t.data[h].key + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator values*[A](t: CountTable[A]): int = + ## Iterates over any value in the table `t`. + ## + ## See also: + ## * `pairs iterator<#pairs.i,CountTable[A]>`_ + ## * `keys iterator<#keys.i,CountTable[A]>`_ + ## * `mvalues iterator<#mvalues.i,CountTable[A]>`_ + runnableExamples: + let a = toCountTable("abracadabra") + for v in values(a): + assert v < 10 + + let L = len(t) + for h in 0 .. high(t.data): + if t.data[h].val != 0: + yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") + +iterator mvalues*[A](t: var CountTable[A]): var int = + ## Iterates over any value in the table `t` (must be + ## declared as `var`). The values can be modified. + ## + ## See also: + ## * `mpairs iterator<#mpairs.i,CountTable[A]>`_ + ## * `values iterator<#values.i,CountTable[A]>`_ + runnableExamples: + var a = toCountTable("abracadabra") + for v in mvalues(a): + v = 2 + doAssert a == toCountTable("aabbccddrr") + + let L = len(t) + for h in 0 .. high(t.data): + if t.data[h].val != 0: + yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") + + + + + + + +# --------------------------------------------------------------------------- +# ---------------------------- CountTableRef -------------------------------- +# --------------------------------------------------------------------------- + +proc inc*[A](t: CountTableRef[A], key: A, val = 1) + +proc newCountTable*[A](initialSize = defaultInitialSize): CountTableRef[A] = + ## Creates a new ref count table that is empty. + ## + ## See also: + ## * `newCountTable proc<#newCountTable,openArray[A]>`_ for creating + ## a `CountTableRef` from a collection + ## * `initCountTable proc<#initCountTable>`_ for creating a + ## `CountTable` + new(result) + {.noSideEffect.}: + result[] = initCountTable[A](initialSize) + +proc newCountTable*[A](keys: openArray[A]): CountTableRef[A] = + ## Creates a new ref count table with every member of a container `keys` + ## having a count of how many times it occurs in that container. + result = newCountTable[A](keys.len) + {.noSideEffect.}: + for key in items(keys): result.inc(key) proc `[]`*[A](t: CountTableRef[A], key: A): int = - ## retrieves the value at ``t[key]``. If `key` is not in `t`, - ## 0 is returned. One can check with ``hasKey`` whether the key - ## exists. + ## Retrieves the value at `t[key]` if `key` is in `t`. + ## Otherwise `0` is returned. + ## + ## See also: + ## * `getOrDefault<#getOrDefault,CountTableRef[A],A,int>`_ to return + ## a custom value if the key doesn't exist + ## * `inc proc<#inc,CountTableRef[A],A,int>`_ to inc even if missing + ## * `[]= proc<#[]%3D,CountTableRef[A],A,int>`_ for inserting a new + ## (key, value) pair in the table + ## * `hasKey proc<#hasKey,CountTableRef[A],A>`_ for checking if a key + ## is in the table result = t[][key] -proc mget*[A](t: CountTableRef[A], key: A): var int = - ## retrieves the value at ``t[key]``. The value can be modified. - ## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. - result = t[].mget(key) +proc `[]=`*[A](t: CountTableRef[A], key: A, val: int) = + ## Inserts a `(key, value)` pair into `t`. + ## + ## See also: + ## * `[] proc<#[],CountTableRef[A],A>`_ for retrieving a value of a key + ## * `inc proc<#inc,CountTableRef[A],A,int>`_ for incrementing a + ## value of a key + assert val > 0 + {.noSideEffect.}: + t[][key] = val + +proc inc*[A](t: CountTableRef[A], key: A, val = 1) = + ## Increments `t[key]` by `val` (default: 1). + runnableExamples: + var a = newCountTable("aab") + a.inc('a') + a.inc('b', 10) + doAssert a == newCountTable("aaabbbbbbbbbbb") + {.noSideEffect.}: + t[].inc(key, val) + +proc smallest*[A](t: CountTableRef[A]): tuple[key: A, val: int] = + ## Returns the `(key, value)` pair with the smallest `val`. Efficiency: O(n) + ## + ## See also: + ## * `largest proc<#largest,CountTableRef[A]>`_ + t[].smallest + +proc largest*[A](t: CountTableRef[A]): tuple[key: A, val: int] = + ## Returns the `(key, value)` pair with the largest `val`. Efficiency: O(n) + ## + ## See also: + ## * `smallest proc<#smallest,CountTable[A]>`_ + t[].largest proc hasKey*[A](t: CountTableRef[A], key: A): bool = - ## returns true iff `key` is in the table `t`. + ## Returns true if `key` is in the table `t`. + ## + ## See also: + ## * `contains proc<#contains,CountTableRef[A],A>`_ for use with the `in` + ## operator + ## * `[] proc<#[],CountTableRef[A],A>`_ for retrieving a value of a key + ## * `getOrDefault proc<#getOrDefault,CountTableRef[A],A,int>`_ to return + ## a custom value if the key doesn't exist result = t[].hasKey(key) -proc `[]=`*[A](t: CountTableRef[A], key: A, val: int) = - ## puts a (key, value)-pair into `t`. `val` has to be positive. - assert val > 0 - t[][key] = val +proc contains*[A](t: CountTableRef[A], key: A): bool = + ## Alias of `hasKey proc<#hasKey,CountTableRef[A],A>`_ for use with + ## the `in` operator. + return hasKey[A](t, key) -proc newCountTable*[A](initialSize=64): CountTableRef[A] = - ## creates a new count table that is empty. +proc getOrDefault*[A](t: CountTableRef[A], key: A, default: int): int = + ## Retrieves the value at `t[key]` if `key` is in `t`. Otherwise, the + ## integer value of `default` is returned. ## - ## `initialSize` needs to be a power of two. If you need to accept runtime - ## values for this you could use the ``nextPowerOfTwo`` proc from the - ## `math <math.html>`_ module or the ``rightSize`` method in this module. - new(result) - result[] = initCountTable[A](initialSize) + ## See also: + ## * `[] proc<#[],CountTableRef[A],A>`_ for retrieving a value of a key + ## * `hasKey proc<#hasKey,CountTableRef[A],A>`_ for checking if a key + ## is in the table + result = t[].getOrDefault(key, default) -proc newCountTable*[A](keys: openArray[A]): CountTableRef[A] = - ## creates a new count table with every key in `keys` having a count of 1. - result = newCountTable[A](rightSize(keys.len)) - for key in items(keys): result[key] = 1 +proc len*[A](t: CountTableRef[A]): int = + ## Returns the number of keys in `t`. + result = t.counter + +proc del*[A](t: CountTableRef[A], key: A) {.since: (1, 1).} = + ## Deletes `key` from table `t`. Does nothing if the key does not exist. + ## + ## See also: + ## * `pop proc<#pop,CountTableRef[A],A,int>`_ + ## * `clear proc<#clear,CountTableRef[A]>`_ to empty the whole table + del(t[], key) + +proc pop*[A](t: CountTableRef[A], key: A, val: var int): bool {.since: (1, 1).} = + ## Deletes the `key` from the table. + ## Returns `true`, if the `key` existed, and sets `val` to the + ## mapping of the key. Otherwise, returns `false`, and the `val` is + ## unchanged. + ## + ## See also: + ## * `del proc<#del,CountTableRef[A],A>`_ + ## * `clear proc<#clear,CountTableRef[A]>`_ to empty the whole table + pop(t[], key, val) + +proc clear*[A](t: CountTableRef[A]) = + ## Resets the table so that it is empty. + ## + ## See also: + ## * `del proc<#del,CountTableRef[A],A>`_ + ## * `pop proc<#pop,CountTableRef[A],A,int>`_ + clear(t[]) + +proc sort*[A](t: CountTableRef[A], order = SortOrder.Descending) = + ## Sorts the count table so that, by default, the entry with the + ## highest counter comes first. + ## + ## **This is destructive! You must not modify `t` afterwards!** + ## + ## You can use the iterators `pairs<#pairs.i,CountTableRef[A]>`_, + ## `keys<#keys.i,CountTableRef[A]>`_, and `values<#values.i,CountTableRef[A]>`_ + ## to iterate over `t` in the sorted order. + t[].sort(order = order) + +proc merge*[A](s, t: CountTableRef[A]) = + ## Merges the second table into the first one. + runnableExamples: + let + a = newCountTable("aaabbc") + b = newCountTable("bcc") + a.merge(b) + doAssert a == newCountTable("aaabbbccc") + + s[].merge(t[]) proc `$`*[A](t: CountTableRef[A]): string = - ## The `$` operator for count tables. + ## The `$` operator for count tables. Used internally when calling `echo` + ## on a table. dollarImpl() -proc inc*[A](t: CountTableRef[A], key: A, val = 1) = - ## increments `t[key]` by `val`. - t[].inc(key, val) +proc `==`*[A](s, t: CountTableRef[A]): bool = + ## The `==` operator for count tables. Returns `true` if either both tables + ## are `nil`, or neither is `nil` and both contain the same keys with the same + ## count. Insert order does not matter. + if isNil(s): result = isNil(t) + elif isNil(t): result = false + else: result = s[] == t[] -proc smallest*[A](t: CountTableRef[A]): (A, int) = - ## returns the largest (key,val)-pair. Efficiency: O(n) - t[].smallest -proc largest*[A](t: CountTableRef[A]): (A, int) = - ## returns the (key,val)-pair with the largest `val`. Efficiency: O(n) - t[].largest +iterator pairs*[A](t: CountTableRef[A]): (A, int) = + ## Iterates over any `(key, value)` pair in the table `t`. + ## + ## See also: + ## * `mpairs iterator<#mpairs.i,CountTableRef[A]>`_ + ## * `keys iterator<#keys.i,CountTableRef[A]>`_ + ## * `values iterator<#values.i,CountTableRef[A]>`_ + ## + ## **Examples:** + ## + ## ```Nim + ## let a = newCountTable("abracadabra") + ## + ## for k, v in pairs(a): + ## echo "key: ", k + ## echo "value: ", v + ## + ## # key: a + ## # value: 5 + ## # key: b + ## # value: 2 + ## # key: c + ## # value: 1 + ## # key: d + ## # value: 1 + ## # key: r + ## # value: 2 + ## ``` + let L = len(t) + for h in 0 .. high(t.data): + if t.data[h].val != 0: + yield (t.data[h].key, t.data[h].val) + assert(len(t) == L, "the length of the table changed while iterating over it") -proc sort*[A](t: CountTableRef[A]) = - ## sorts the count table so that the entry with the highest counter comes - ## first. This is destructive! You must not modify `t` afterwards! - ## You can use the iterators `pairs`, `keys`, and `values` to iterate over - ## `t` in the sorted order. - t[].sort +iterator mpairs*[A](t: CountTableRef[A]): (A, var int) = + ## Iterates over any `(key, value)` pair in the table `t`. The values can + ## be modified. + ## + ## See also: + ## * `pairs iterator<#pairs.i,CountTableRef[A]>`_ + ## * `mvalues iterator<#mvalues.i,CountTableRef[A]>`_ + runnableExamples: + let a = newCountTable("abracadabra") + for k, v in mpairs(a): + v = 2 + doAssert a == newCountTable("aabbccddrr") + + let L = len(t) + for h in 0 .. high(t.data): + if t.data[h].val != 0: + yield (t.data[h].key, t.data[h].val) + assert(len(t) == L, "table modified while iterating over it") -when isMainModule: - type - Person = object - firstName, lastName: string +iterator keys*[A](t: CountTableRef[A]): A = + ## Iterates over any key in the table `t`. + ## + ## See also: + ## * `pairs iterator<#pairs.i,CountTable[A]>`_ + ## * `values iterator<#values.i,CountTable[A]>`_ + runnableExamples: + let a = newCountTable("abracadabra") + for k in keys(a): + a[k] = 2 + doAssert a == newCountTable("aabbccddrr") + + let L = len(t) + for h in 0 .. high(t.data): + if t.data[h].val != 0: + yield t.data[h].key + assert(len(t) == L, "the length of the table changed while iterating over it") - proc hash(x: Person): THash = - ## Piggyback on the already available string hash proc. - ## - ## Without this proc nothing works! - result = x.firstName.hash !& x.lastName.hash - result = !$result +iterator values*[A](t: CountTableRef[A]): int = + ## Iterates over any value in the table `t`. + ## + ## See also: + ## * `pairs iterator<#pairs.i,CountTableRef[A]>`_ + ## * `keys iterator<#keys.i,CountTableRef[A]>`_ + ## * `mvalues iterator<#mvalues.i,CountTableRef[A]>`_ + runnableExamples: + let a = newCountTable("abracadabra") + for v in values(a): + assert v < 10 + + let L = len(t) + for h in 0 .. high(t.data): + if t.data[h].val != 0: + yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") - var - salaries = initTable[Person, int]() - p1, p2: Person - p1.firstName = "Jon" - p1.lastName = "Ross" - salaries[p1] = 30_000 - p2.firstName = "소진" - p2.lastName = "박" - salaries[p2] = 45_000 - var - s2 = initOrderedTable[Person, int]() - s3 = initCountTable[Person]() - s2[p1] = 30_000 - s2[p2] = 45_000 - s3[p1] = 30_000 - s3[p2] = 45_000 +iterator mvalues*[A](t: CountTableRef[A]): var int = + ## Iterates over any value in the table `t`. The values can be modified. + ## + ## See also: + ## * `mpairs iterator<#mpairs.i,CountTableRef[A]>`_ + ## * `values iterator<#values.i,CountTableRef[A]>`_ + runnableExamples: + var a = newCountTable("abracadabra") + for v in mvalues(a): + v = 2 + doAssert a == newCountTable("aabbccddrr") + + let L = len(t) + for h in 0 .. high(t.data): + if t.data[h].val != 0: + yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") + +proc hash*[K,V](s: Table[K,V]): Hash = + for p in pairs(s): + result = result xor hash(p) + result = !$result + +proc hash*[K,V](s: OrderedTable[K,V]): Hash = + for p in pairs(s): + result = result !& hash(p) + result = !$result + +proc hash*[V](s: CountTable[V]): Hash = + for p in pairs(s): + result = result xor hash(p) + result = !$result diff --git a/lib/pure/colors.nim b/lib/pure/colors.nim index f24cc0072..d3e6dc063 100644 --- a/lib/pure/colors.nim +++ b/lib/pure/colors.nim @@ -6,31 +6,39 @@ # distribution, for details about the copyright. # -## This module implements color handling for Nimrod. It is used by -## the ``graphics`` module. +## This module implements color handling for Nim, +## namely color mixing and parsing the CSS color names. -import strutils +import std/strutils +from std/algorithm import binarySearch type - Color* = distinct int ## a color stored as RGB + Color* = distinct int ## A color stored as RGB, e.g. `0xff00cc`. -{.deprecated: [TColor: Color].} +proc `==`*(a, b: Color): bool {.borrow.} + ## Compares two colors. + ## + ## ```Nim + ## var + ## a = Color(0xff_00_ff) + ## b = colFuchsia + ## c = Color(0x00_ff_cc) + ## assert a == b + ## assert not (a == c) + ## ``` -proc `==` *(a, b: Color): bool {.borrow.} - ## compares two colors. - -template extract(a: Color, r, g, b: expr) {.immediate.}= +template extract(a: Color, r, g, b: untyped) = var r = a.int shr 16 and 0xff var g = a.int shr 8 and 0xff var b = a.int and 0xff - -template rawRGB(r, g, b: int): expr = + +template rawRGB(r, g, b: int): Color = Color(r shl 16 or g shl 8 or b) - -template colorOp(op: expr) {.immediate.} = + +template colorOp(op): Color = extract(a, ar, ag, ab) extract(b, br, bg, bb) - result = rawRGB(op(ar, br), op(ag, bg), op(ab, bb)) + rawRGB(op(ar, br), op(ag, bg), op(ab, bb)) proc satPlus(a, b: int): int {.inline.} = result = a +% b @@ -39,26 +47,66 @@ proc satPlus(a, b: int): int {.inline.} = proc satMinus(a, b: int): int {.inline.} = result = a -% b if result < 0: result = 0 - + proc `+`*(a, b: Color): Color = - ## adds two colors: This uses saturated artithmetic, so that each color + ## Adds two colors. + ## + ## This uses saturated arithmetic, so that each color ## component cannot overflow (255 is used as a maximum). + ## + runnableExamples: + var + a = Color(0xaa_00_ff) + b = Color(0x11_cc_cc) + assert a + b == Color(0xbb_cc_ff) + colorOp(satPlus) - + proc `-`*(a, b: Color): Color = - ## subtracts two colors: This uses saturated artithmetic, so that each color - ## component cannot overflow (255 is used as a maximum). + ## Subtracts two colors. + ## + ## This uses saturated arithmetic, so that each color + ## component cannot underflow (0 is used as a minimum). + ## + runnableExamples: + var + a = Color(0xff_33_ff) + b = Color(0x11_ff_cc) + assert a - b == Color(0xee_00_33) + colorOp(satMinus) - + proc extractRGB*(a: Color): tuple[r, g, b: range[0..255]] = - ## extracts the red/green/blue components of the color `a`. + ## Extracts the red/green/blue components of the color `a`. + ## + runnableExamples: + var + a = Color(0xff_00_ff) + b = Color(0x00_ff_cc) + type + Col = range[0..255] + # assert extractRGB(a) == (r: 255.Col, g: 0.Col, b: 255.Col) + # assert extractRGB(b) == (r: 0.Col, g: 255.Col, b: 204.Col) + echo extractRGB(a) + echo typeof(extractRGB(a)) + echo extractRGB(b) + echo typeof(extractRGB(b)) + result.r = a.int shr 16 and 0xff result.g = a.int shr 8 and 0xff result.b = a.int and 0xff - -proc intensity*(a: Color, f: float): Color = - ## returns `a` with intensity `f`. `f` should be a float from 0.0 (completely + +proc intensity*(a: Color, f: float): Color = + ## Returns `a` with intensity `f`. `f` should be a float from 0.0 (completely ## dark) to 1.0 (full color intensity). + ## + runnableExamples: + var + a = Color(0xff_00_ff) + b = Color(0x00_42_cc) + assert a.intensity(0.5) == Color(0x80_00_80) + assert b.intensity(0.5) == Color(0x00_21_66) + var r = toInt(toFloat(a.int shr 16 and 0xff) * f) var g = toInt(toFloat(a.int shr 8 and 0xff) * f) var b = toInt(toFloat(a.int and 0xff) * f) @@ -66,23 +114,35 @@ proc intensity*(a: Color, f: float): Color = if g >% 255: g = 255 if b >% 255: b = 255 result = rawRGB(r, g, b) - -template mix*(a, b: Color, fn: expr): expr = - ## uses `fn` to mix the colors `a` and `b`. `fn` is invoked for each component - ## R, G, and B. This is a template because `fn` should be inlined and the - ## compiler cannot inline proc pointers yet. If `fn`'s result is not in the - ## range[0..255], it will be saturated to be so. - template `><` (x: expr): expr = + +template mix*(a, b: Color, fn: untyped): untyped = + ## Uses `fn` to mix the colors `a` and `b`. + ## + ## `fn` is invoked for each component R, G, and B. + ## If `fn`'s result is not in the `range[0..255]`, + ## it will be saturated to be so. + ## + runnableExamples: + var + a = Color(0x0a2814) + b = Color(0x050a03) + + proc myMix(x, y: int): int = + 2 * x - 3 * y + + assert mix(a, b, myMix) == Color(0x05_32_1f) + + template `><` (x: untyped): untyped = # keep it in the range 0..255 block: var y = x # eval only once if y >% 255: y = if y < 0: 0 else: 255 y - - (bind extract)(a, ar, ag, ab) - (bind extract)(b, br, bg, bb) - (bind rawRGB)(><fn(ar, br), ><fn(ag, bg), ><fn(ab, bb)) + + extract(a, ar, ag, ab) + extract(b, br, bg, bb) + rawRGB(><fn(ar, br), ><fn(ag, bg), ><fn(ab, bb)) const @@ -112,6 +172,7 @@ const colDarkGoldenRod* = Color(0xB8860B) colDarkGray* = Color(0xA9A9A9) colDarkGreen* = Color(0x006400) + colDarkGrey* = Color(0xA9A9A9) colDarkKhaki* = Color(0xBDB76B) colDarkMagenta* = Color(0x8B008B) colDarkOliveGreen* = Color(0x556B2F) @@ -122,11 +183,13 @@ const colDarkSeaGreen* = Color(0x8FBC8F) colDarkSlateBlue* = Color(0x483D8B) colDarkSlateGray* = Color(0x2F4F4F) + colDarkSlateGrey* = Color(0x2F4F4F) colDarkTurquoise* = Color(0x00CED1) colDarkViolet* = Color(0x9400D3) colDeepPink* = Color(0xFF1493) colDeepSkyBlue* = Color(0x00BFFF) colDimGray* = Color(0x696969) + colDimGrey* = Color(0x696969) colDodgerBlue* = Color(0x1E90FF) colFireBrick* = Color(0xB22222) colFloralWhite* = Color(0xFFFAF0) @@ -139,6 +202,7 @@ const colGray* = Color(0x808080) colGreen* = Color(0x008000) colGreenYellow* = Color(0xADFF2F) + colGrey* = Color(0x808080) colHoneyDew* = Color(0xF0FFF0) colHotPink* = Color(0xFF69B4) colIndianRed* = Color(0xCD5C5C) @@ -153,13 +217,15 @@ const colLightCoral* = Color(0xF08080) colLightCyan* = Color(0xE0FFFF) colLightGoldenRodYellow* = Color(0xFAFAD2) - colLightGrey* = Color(0xD3D3D3) + colLightGray* = Color(0xD3D3D3) colLightGreen* = Color(0x90EE90) + colLightGrey* = Color(0xD3D3D3) colLightPink* = Color(0xFFB6C1) colLightSalmon* = Color(0xFFA07A) colLightSeaGreen* = Color(0x20B2AA) colLightSkyBlue* = Color(0x87CEFA) colLightSlateGray* = Color(0x778899) + colLightSlateGrey* = Color(0x778899) colLightSteelBlue* = Color(0xB0C4DE) colLightYellow* = Color(0xFFFFE0) colLime* = Color(0x00FF00) @@ -170,7 +236,7 @@ const colMediumAquaMarine* = Color(0x66CDAA) colMediumBlue* = Color(0x0000CD) colMediumOrchid* = Color(0xBA55D3) - colMediumPurple* = Color(0x9370D8) + colMediumPurple* = Color(0x9370DB) colMediumSeaGreen* = Color(0x3CB371) colMediumSlateBlue* = Color(0x7B68EE) colMediumSpringGreen* = Color(0x00FA9A) @@ -191,7 +257,7 @@ const colPaleGoldenRod* = Color(0xEEE8AA) colPaleGreen* = Color(0x98FB98) colPaleTurquoise* = Color(0xAFEEEE) - colPaleVioletRed* = Color(0xD87093) + colPaleVioletRed* = Color(0xDB7093) colPapayaWhip* = Color(0xFFEFD5) colPeachPuff* = Color(0xFFDAB9) colPeru* = Color(0xCD853F) @@ -199,6 +265,7 @@ const colPlum* = Color(0xDDA0DD) colPowderBlue* = Color(0xB0E0E6) colPurple* = Color(0x800080) + colRebeccaPurple* = Color(0x663399) colRed* = Color(0xFF0000) colRosyBrown* = Color(0xBC8F8F) colRoyalBlue* = Color(0x4169E1) @@ -212,6 +279,7 @@ const colSkyBlue* = Color(0x87CEEB) colSlateBlue* = Color(0x6A5ACD) colSlateGray* = Color(0x708090) + colSlateGrey* = Color(0x708090) colSnow* = Color(0xFFFAFA) colSpringGreen* = Color(0x00FF7F) colSteelBlue* = Color(0x4682B4) @@ -226,186 +294,214 @@ const colWhiteSmoke* = Color(0xF5F5F5) colYellow* = Color(0xFFFF00) colYellowGreen* = Color(0x9ACD32) - - colorNames = [ - ("aliceblue", colAliceBlue), - ("antiquewhite", colAntiqueWhite), - ("aqua", colAqua), - ("aquamarine", colAquamarine), - ("azure", colAzure), - ("beige", colBeige), - ("bisque", colBisque), - ("black", colBlack), - ("blanchedalmond", colBlanchedAlmond), - ("blue", colBlue), - ("blueviolet", colBlueViolet), - ("brown", colBrown), - ("burlywood", colBurlyWood), - ("cadetblue", colCadetBlue), - ("chartreuse", colChartreuse), - ("chocolate", colChocolate), - ("coral", colCoral), - ("cornflowerblue", colCornflowerBlue), - ("cornsilk", colCornsilk), - ("crimson", colCrimson), - ("cyan", colCyan), - ("darkblue", colDarkBlue), - ("darkcyan", colDarkCyan), - ("darkgoldenrod", colDarkGoldenRod), - ("darkgray", colDarkGray), - ("darkgreen", colDarkGreen), - ("darkkhaki", colDarkKhaki), - ("darkmagenta", colDarkMagenta), - ("darkolivegreen", colDarkOliveGreen), - ("darkorange", colDarkorange), - ("darkorchid", colDarkOrchid), - ("darkred", colDarkRed), - ("darksalmon", colDarkSalmon), - ("darkseagreen", colDarkSeaGreen), - ("darkslateblue", colDarkSlateBlue), - ("darkslategray", colDarkSlateGray), - ("darkturquoise", colDarkTurquoise), - ("darkviolet", colDarkViolet), - ("deeppink", colDeepPink), - ("deepskyblue", colDeepSkyBlue), - ("dimgray", colDimGray), - ("dodgerblue", colDodgerBlue), - ("firebrick", colFireBrick), - ("floralwhite", colFloralWhite), - ("forestgreen", colForestGreen), - ("fuchsia", colFuchsia), - ("gainsboro", colGainsboro), - ("ghostwhite", colGhostWhite), - ("gold", colGold), - ("goldenrod", colGoldenRod), - ("gray", colGray), - ("green", colGreen), - ("greenyellow", colGreenYellow), - ("honeydew", colHoneyDew), - ("hotpink", colHotPink), - ("indianred", colIndianRed), - ("indigo", colIndigo), - ("ivory", colIvory), - ("khaki", colKhaki), - ("lavender", colLavender), - ("lavenderblush", colLavenderBlush), - ("lawngreen", colLawnGreen), - ("lemonchiffon", colLemonChiffon), - ("lightblue", colLightBlue), - ("lightcoral", colLightCoral), - ("lightcyan", colLightCyan), - ("lightgoldenrodyellow", colLightGoldenRodYellow), - ("lightgrey", colLightGrey), - ("lightgreen", colLightGreen), - ("lightpink", colLightPink), - ("lightsalmon", colLightSalmon), - ("lightseagreen", colLightSeaGreen), - ("lightskyblue", colLightSkyBlue), - ("lightslategray", colLightSlateGray), - ("lightsteelblue", colLightSteelBlue), - ("lightyellow", colLightYellow), - ("lime", colLime), - ("limegreen", colLimeGreen), - ("linen", colLinen), - ("magenta", colMagenta), - ("maroon", colMaroon), - ("mediumaquamarine", colMediumAquaMarine), - ("mediumblue", colMediumBlue), - ("mediumorchid", colMediumOrchid), - ("mediumpurple", colMediumPurple), - ("mediumseagreen", colMediumSeaGreen), - ("mediumslateblue", colMediumSlateBlue), - ("mediumspringgreen", colMediumSpringGreen), - ("mediumturquoise", colMediumTurquoise), - ("mediumvioletred", colMediumVioletRed), - ("midnightblue", colMidnightBlue), - ("mintcream", colMintCream), - ("mistyrose", colMistyRose), - ("moccasin", colMoccasin), - ("navajowhite", colNavajoWhite), - ("navy", colNavy), - ("oldlace", colOldLace), - ("olive", colOlive), - ("olivedrab", colOliveDrab), - ("orange", colOrange), - ("orangered", colOrangeRed), - ("orchid", colOrchid), - ("palegoldenrod", colPaleGoldenRod), - ("palegreen", colPaleGreen), - ("paleturquoise", colPaleTurquoise), - ("palevioletred", colPaleVioletRed), - ("papayawhip", colPapayaWhip), - ("peachpuff", colPeachPuff), - ("peru", colPeru), - ("pink", colPink), - ("plum", colPlum), - ("powderblue", colPowderBlue), - ("purple", colPurple), - ("red", colRed), - ("rosybrown", colRosyBrown), - ("royalblue", colRoyalBlue), - ("saddlebrown", colSaddleBrown), - ("salmon", colSalmon), - ("sandybrown", colSandyBrown), - ("seagreen", colSeaGreen), - ("seashell", colSeaShell), - ("sienna", colSienna), - ("silver", colSilver), - ("skyblue", colSkyBlue), - ("slateblue", colSlateBlue), - ("slategray", colSlateGray), - ("snow", colSnow), - ("springgreen", colSpringGreen), - ("steelblue", colSteelBlue), - ("tan", colTan), - ("teal", colTeal), - ("thistle", colThistle), - ("tomato", colTomato), - ("turquoise", colTurquoise), - ("violet", colViolet), - ("wheat", colWheat), - ("white", colWhite), - ("whitesmoke", colWhiteSmoke), - ("yellow", colYellow), - ("yellowgreen", colYellowGreen)] - -proc `$`*(c: Color): string = - ## converts a color into its textual representation. Example: ``#00FF00``. + + colorNames = { + "aliceblue": colAliceBlue, + "antiquewhite": colAntiqueWhite, + "aqua": colAqua, + "aquamarine": colAquamarine, + "azure": colAzure, + "beige": colBeige, + "bisque": colBisque, + "black": colBlack, + "blanchedalmond": colBlanchedAlmond, + "blue": colBlue, + "blueviolet": colBlueViolet, + "brown": colBrown, + "burlywood": colBurlyWood, + "cadetblue": colCadetBlue, + "chartreuse": colChartreuse, + "chocolate": colChocolate, + "coral": colCoral, + "cornflowerblue": colCornflowerBlue, + "cornsilk": colCornsilk, + "crimson": colCrimson, + "cyan": colCyan, + "darkblue": colDarkBlue, + "darkcyan": colDarkCyan, + "darkgoldenrod": colDarkGoldenRod, + "darkgray": colDarkGray, + "darkgreen": colDarkGreen, + "darkgrey": colDarkGrey, + "darkkhaki": colDarkKhaki, + "darkmagenta": colDarkMagenta, + "darkolivegreen": colDarkOliveGreen, + "darkorange": colDarkorange, + "darkorchid": colDarkOrchid, + "darkred": colDarkRed, + "darksalmon": colDarkSalmon, + "darkseagreen": colDarkSeaGreen, + "darkslateblue": colDarkSlateBlue, + "darkslategray": colDarkSlateGray, + "darkslategrey": colDarkSlateGrey, + "darkturquoise": colDarkTurquoise, + "darkviolet": colDarkViolet, + "deeppink": colDeepPink, + "deepskyblue": colDeepSkyBlue, + "dimgray": colDimGray, + "dimgrey": colDimGrey, + "dodgerblue": colDodgerBlue, + "firebrick": colFireBrick, + "floralwhite": colFloralWhite, + "forestgreen": colForestGreen, + "fuchsia": colFuchsia, + "gainsboro": colGainsboro, + "ghostwhite": colGhostWhite, + "gold": colGold, + "goldenrod": colGoldenRod, + "gray": colGray, + "green": colGreen, + "greenyellow": colGreenYellow, + "grey": colGrey, + "honeydew": colHoneyDew, + "hotpink": colHotPink, + "indianred": colIndianRed, + "indigo": colIndigo, + "ivory": colIvory, + "khaki": colKhaki, + "lavender": colLavender, + "lavenderblush": colLavenderBlush, + "lawngreen": colLawnGreen, + "lemonchiffon": colLemonChiffon, + "lightblue": colLightBlue, + "lightcoral": colLightCoral, + "lightcyan": colLightCyan, + "lightgoldenrodyellow": colLightGoldenRodYellow, + "lightgray": colLightGray, + "lightgreen": colLightGreen, + "lightgrey": colLightGrey, + "lightpink": colLightPink, + "lightsalmon": colLightSalmon, + "lightseagreen": colLightSeaGreen, + "lightskyblue": colLightSkyBlue, + "lightslategray": colLightSlateGray, + "lightslategrey": colLightSlateGrey, + "lightsteelblue": colLightSteelBlue, + "lightyellow": colLightYellow, + "lime": colLime, + "limegreen": colLimeGreen, + "linen": colLinen, + "magenta": colMagenta, + "maroon": colMaroon, + "mediumaquamarine": colMediumAquaMarine, + "mediumblue": colMediumBlue, + "mediumorchid": colMediumOrchid, + "mediumpurple": colMediumPurple, + "mediumseagreen": colMediumSeaGreen, + "mediumslateblue": colMediumSlateBlue, + "mediumspringgreen": colMediumSpringGreen, + "mediumturquoise": colMediumTurquoise, + "mediumvioletred": colMediumVioletRed, + "midnightblue": colMidnightBlue, + "mintcream": colMintCream, + "mistyrose": colMistyRose, + "moccasin": colMoccasin, + "navajowhite": colNavajoWhite, + "navy": colNavy, + "oldlace": colOldLace, + "olive": colOlive, + "olivedrab": colOliveDrab, + "orange": colOrange, + "orangered": colOrangeRed, + "orchid": colOrchid, + "palegoldenrod": colPaleGoldenRod, + "palegreen": colPaleGreen, + "paleturquoise": colPaleTurquoise, + "palevioletred": colPaleVioletRed, + "papayawhip": colPapayaWhip, + "peachpuff": colPeachPuff, + "peru": colPeru, + "pink": colPink, + "plum": colPlum, + "powderblue": colPowderBlue, + "purple": colPurple, + "rebeccapurple": colRebeccaPurple, + "red": colRed, + "rosybrown": colRosyBrown, + "royalblue": colRoyalBlue, + "saddlebrown": colSaddleBrown, + "salmon": colSalmon, + "sandybrown": colSandyBrown, + "seagreen": colSeaGreen, + "seashell": colSeaShell, + "sienna": colSienna, + "silver": colSilver, + "skyblue": colSkyBlue, + "slateblue": colSlateBlue, + "slategray": colSlateGray, + "slategrey": colSlateGrey, + "snow": colSnow, + "springgreen": colSpringGreen, + "steelblue": colSteelBlue, + "tan": colTan, + "teal": colTeal, + "thistle": colThistle, + "tomato": colTomato, + "turquoise": colTurquoise, + "violet": colViolet, + "wheat": colWheat, + "white": colWhite, + "whitesmoke": colWhiteSmoke, + "yellow": colYellow, + "yellowgreen": colYellowGreen} + +proc `$`*(c: Color): string = + ## Converts a color into its textual representation. + ## + runnableExamples: + assert $colFuchsia == "#FF00FF" result = '#' & toHex(int(c), 6) -proc binaryStrSearch(x: openArray[tuple[name: string, col: Color]], - y: string): int = - var a = 0 - var b = len(x) - 1 - while a <= b: - var mid = (a + b) div 2 - var c = cmp(x[mid].name, y) - if c < 0: a = mid + 1 - elif c > 0: b = mid - 1 - else: return mid - result = - 1 - -proc parseColor*(name: string): Color = - ## parses `name` to a color value. If no valid color could be - ## parsed ``EInvalidValue`` is raised. - if name[0] == '#': +proc colorNameCmp(x: tuple[name: string, col: Color], y: string): int = + result = cmpIgnoreCase(x.name, y) + +proc parseColor*(name: string): Color = + ## Parses `name` to a color value. + ## + ## If no valid color could be parsed `ValueError` is raised. + ## Case insensitive. + ## + runnableExamples: + var + a = "silver" + b = "#0179fc" + c = "#zzmmtt" + assert parseColor(a) == Color(0xc0_c0_c0) + assert parseColor(b) == Color(0x01_79_fc) + doAssertRaises(ValueError): discard parseColor(c) + + if name.len > 0 and name[0] == '#': result = Color(parseHexInt(name)) else: - var idx = binaryStrSearch(colorNames, name) + var idx = binarySearch(colorNames, name, colorNameCmp) if idx < 0: raise newException(ValueError, "unknown color: " & name) result = colorNames[idx][1] proc isColor*(name: string): bool = - ## returns true if `name` is a known color name or a hexadecimal color - ## prefixed with ``#``. - if name[0] == '#': - for i in 1 .. name.len-1: - if name[i] notin {'0'..'9', 'a'..'f', 'A'..'F'}: return false + ## Returns true if `name` is a known color name or a hexadecimal color + ## prefixed with `#`. Case insensitive. + ## + runnableExamples: + var + a = "silver" + b = "#0179fc" + c = "#zzmmtt" + assert a.isColor + assert b.isColor + assert not c.isColor + + if name.len == 0: return false + if name[0] == '#': + for i in 1 .. name.len-1: + if name[i] notin HexDigits: return false result = true else: - result = binaryStrSearch(colorNames, name) >= 0 + result = binarySearch(colorNames, name, colorNameCmp) >= 0 proc rgb*(r, g, b: range[0..255]): Color = - ## constructs a color from RGB values. - result = rawRGB(r, g, b) + ## Constructs a color from RGB values. + ## + runnableExamples: + assert rgb(0, 255, 128) == Color(0x00_ff_80) + result = rawRGB(r, g, b) diff --git a/lib/pure/complex.nim b/lib/pure/complex.nim index 8577bf7a1..b48811eae 100644 --- a/lib/pure/complex.nim +++ b/lib/pure/complex.nim @@ -7,184 +7,187 @@ # distribution, for details about the copyright. # +## This module implements complex numbers +## and basic mathematical operations on them. +## +## Complex numbers are currently generic over 64-bit or 32-bit floats. +runnableExamples: + from std/math import almostEqual, sqrt -## This module implements complex numbers. -{.push checks:off, line_dir:off, stack_trace:off, debugger:off.} -# the user does not want to trace a part -# of the standard library! + let + z1 = complex(1.0, 2.0) + z2 = complex(3.0, -4.0) + assert almostEqual(z1 + z2, complex(4.0, -2.0)) + assert almostEqual(z1 - z2, complex(-2.0, 6.0)) + assert almostEqual(z1 * z2, complex(11.0, 2.0)) + assert almostEqual(z1 / z2, complex(-0.2, 0.4)) -import - math - -const - EPS = 1.0e-7 ## Epsilon used for float comparisons. + assert almostEqual(abs(z1), sqrt(5.0)) + assert almostEqual(conjugate(z1), complex(1.0, -2.0)) -type - Complex* = tuple[re, im: float] - ## a complex number, consisting of a real and an imaginary part - -{.deprecated: [TComplex: Complex].} + let (r, phi) = z1.polar + assert almostEqual(rect(r, phi), z1) -proc toComplex*(x: SomeInteger): Complex = - ## Convert some integer ``x`` to a complex number. - result.re = x - result.im = 0 +{.push checks: off, line_dir: off, stack_trace: off, debugger: off.} +# the user does not want to trace a part of the standard library! -proc `==` *(x, y: Complex): bool = - ## Compare two complex numbers `x` and `y` for equality. - result = x.re == y.re and x.im == y.im +import std/[math, strformat] -proc `=~` *(x, y: Complex): bool = - ## Compare two complex numbers `x` and `y` approximately. - result = abs(x.re-y.re)<EPS and abs(x.im-y.im)<EPS +type + Complex*[T: SomeFloat] = object + ## A complex number, consisting of a real and an imaginary part. + re*, im*: T + Complex64* = Complex[float64] + ## Alias for a complex number using 64-bit floats. + Complex32* = Complex[float32] + ## Alias for a complex number using 32-bit floats. + +func complex*[T: SomeFloat](re: T; im: T = 0.0): Complex[T] = + ## Returns a `Complex[T]` with real part `re` and imaginary part `im`. + result.re = re + result.im = im + +func complex32*(re: float32; im: float32 = 0.0): Complex32 = + ## Returns a `Complex32` with real part `re` and imaginary part `im`. + result.re = re + result.im = im + +func complex64*(re: float64; im: float64 = 0.0): Complex64 = + ## Returns a `Complex64` with real part `re` and imaginary part `im`. + result.re = re + result.im = im + +template im*(arg: typedesc[float32]): Complex32 = complex32(0, 1) + ## Returns the imaginary unit (`complex32(0, 1)`). +template im*(arg: typedesc[float64]): Complex64 = complex64(0, 1) + ## Returns the imaginary unit (`complex64(0, 1)`). +template im*(arg: float32): Complex32 = complex32(0, arg) + ## Returns `arg` as an imaginary number (`complex32(0, arg)`). +template im*(arg: float64): Complex64 = complex64(0, arg) + ## Returns `arg` as an imaginary number (`complex64(0, arg)`). + +func abs*[T](z: Complex[T]): T = + ## Returns the absolute value of `z`, + ## that is the distance from (0, 0) to `z`. + result = hypot(z.re, z.im) + +func abs2*[T](z: Complex[T]): T = + ## Returns the squared absolute value of `z`, + ## that is the squared distance from (0, 0) to `z`. + ## This is more efficient than `abs(z) ^ 2`. + result = z.re * z.re + z.im * z.im + +func sgn*[T](z: Complex[T]): Complex[T] = + ## Returns the phase of `z` as a unit complex number, + ## or 0 if `z` is 0. + let a = abs(z) + if a != 0: + result = z / a + +func conjugate*[T](z: Complex[T]): Complex[T] = + ## Returns the complex conjugate of `z` (`complex(z.re, -z.im)`). + result.re = z.re + result.im = -z.im -proc `+` *(x, y: Complex): Complex = - ## Add two complex numbers. - result.re = x.re + y.re - result.im = x.im + y.im +func inv*[T](z: Complex[T]): Complex[T] = + ## Returns the multiplicative inverse of `z` (`1/z`). + conjugate(z) / abs2(z) -proc `+` *(x: Complex, y: float): Complex = - ## Add complex `x` to float `y`. - result.re = x.re + y - result.im = x.im +func `==`*[T](x, y: Complex[T]): bool = + ## Compares two complex numbers for equality. + result = x.re == y.re and x.im == y.im -proc `+` *(x: float, y: Complex): Complex = - ## Add float `x` to complex `y`. +func `+`*[T](x: T; y: Complex[T]): Complex[T] = + ## Adds a real number to a complex number. result.re = x + y.re result.im = y.im +func `+`*[T](x: Complex[T]; y: T): Complex[T] = + ## Adds a complex number to a real number. + result.re = x.re + y + result.im = x.im -proc `-` *(z: Complex): Complex = +func `+`*[T](x, y: Complex[T]): Complex[T] = + ## Adds two complex numbers. + result.re = x.re + y.re + result.im = x.im + y.im + +func `-`*[T](z: Complex[T]): Complex[T] = ## Unary minus for complex numbers. result.re = -z.re result.im = -z.im -proc `-` *(x, y: Complex): Complex = - ## Subtract two complex numbers. - result.re = x.re - y.re - result.im = x.im - y.im - -proc `-` *(x: Complex, y: float): Complex = - ## Subtracts float `y` from complex `x`. - result = x + (-y) +func `-`*[T](x: T; y: Complex[T]): Complex[T] = + ## Subtracts a complex number from a real number. + result.re = x - y.re + result.im = -y.im -proc `-` *(x: float, y: Complex): Complex = - ## Subtracts complex `y` from float `x`. - result = x + (-y) - - -proc `/` *(x, y: Complex): Complex = - ## Divide `x` by `y`. - var - r, den: float - if abs(y.re) < abs(y.im): - r = y.re / y.im - den = y.im + r * y.re - result.re = (x.re * r + x.im) / den - result.im = (x.im * r - x.re) / den - else: - r = y.im / y.re - den = y.re + r * y.im - result.re = (x.re + r * x.im) / den - result.im = (x.im - r * x.re) / den +func `-`*[T](x: Complex[T]; y: T): Complex[T] = + ## Subtracts a real number from a complex number. + result.re = x.re - y + result.im = x.im -proc `/` *(x : Complex, y: float ): Complex = - ## Divide complex `x` by float `y`. - result.re = x.re/y - result.im = x.im/y +func `-`*[T](x, y: Complex[T]): Complex[T] = + ## Subtracts two complex numbers. + result.re = x.re - y.re + result.im = x.im - y.im -proc `/` *(x : float, y: Complex ): Complex = - ## Divide float `x` by complex `y`. - var num : Complex = (x, 0.0) - result = num/y +func `*`*[T](x: T; y: Complex[T]): Complex[T] = + ## Multiplies a real number with a complex number. + result.re = x * y.re + result.im = x * y.im +func `*`*[T](x: Complex[T]; y: T): Complex[T] = + ## Multiplies a complex number with a real number. + result.re = x.re * y + result.im = x.im * y -proc `*` *(x, y: Complex): Complex = - ## Multiply `x` with `y`. +func `*`*[T](x, y: Complex[T]): Complex[T] = + ## Multiplies two complex numbers. result.re = x.re * y.re - x.im * y.im result.im = x.im * y.re + x.re * y.im -proc `*` *(x: float, y: Complex): Complex = - ## Multiply float `x` with complex `y`. - result.re = x * y.re - result.im = x * y.im +func `/`*[T](x: Complex[T]; y: T): Complex[T] = + ## Divides a complex number by a real number. + result.re = x.re / y + result.im = x.im / y -proc `*` *(x: Complex, y: float): Complex = - ## Multiply complex `x` with float `y`. - result.re = x.re * y - result.im = x.im * y +func `/`*[T](x: T; y: Complex[T]): Complex[T] = + ## Divides a real number by a complex number. + result = x * inv(y) +func `/`*[T](x, y: Complex[T]): Complex[T] = + ## Divides two complex numbers. + x * conjugate(y) / abs2(y) -proc `+=` *(x: var Complex, y: Complex) = - ## Add `y` to `x`. +func `+=`*[T](x: var Complex[T]; y: Complex[T]) = + ## Adds `y` to `x`. x.re += y.re x.im += y.im -proc `+=` *(x: var Complex, y: float) = - ## Add `y` to the complex number `x`. - x.re += y - -proc `-=` *(x: var Complex, y: Complex) = - ## Subtract `y` from `x`. +func `-=`*[T](x: var Complex[T]; y: Complex[T]) = + ## Subtracts `y` from `x`. x.re -= y.re x.im -= y.im -proc `-=` *(x: var Complex, y: float) = - ## Subtract `y` from the complex number `x`. - x.re -= y - -proc `*=` *(x: var Complex, y: Complex) = - ## Multiply `y` to `x`. +func `*=`*[T](x: var Complex[T]; y: Complex[T]) = + ## Multiplies `x` by `y`. let im = x.im * y.re + x.re * y.im x.re = x.re * y.re - x.im * y.im x.im = im -proc `*=` *(x: var Complex, y: float) = - ## Multiply `y` to the complex number `x`. - x.re *= y - x.im *= y - -proc `/=` *(x: var Complex, y: Complex) = - ## Divide `x` by `y` in place. +func `/=`*[T](x: var Complex[T]; y: Complex[T]) = + ## Divides `x` by `y` in place. x = x / y -proc `/=` *(x : var Complex, y: float) = - ## Divide complex `x` by float `y` in place. - x.re /= y - x.im /= y - - -proc abs*(z: Complex): float = - ## Return the distance from (0,0) to `z`. - # optimized by checking special cases (sqrt is expensive) - var x, y, temp: float - - x = abs(z.re) - y = abs(z.im) - if x == 0.0: - result = y - elif y == 0.0: - result = x - elif x > y: - temp = y / x - result = x * sqrt(1.0 + temp * temp) - else: - temp = x / y - result = y * sqrt(1.0 + temp * temp) - - -proc conjugate*(z: Complex): Complex = - ## Conjugate of complex number `z`. - result.re = z.re - result.im = -z.im - - -proc sqrt*(z: Complex): Complex = - ## Square root for a complex number `z`. - var x, y, w, r: float +func sqrt*[T](z: Complex[T]): Complex[T] = + ## Computes the + ## ([principal](https://en.wikipedia.org/wiki/Square_root#Principal_square_root_of_a_complex_number)) + ## square root of a complex number `z`. + var x, y, w, r: T if z.re == 0.0 and z.im == 0.0: result = z @@ -197,247 +200,274 @@ proc sqrt*(z: Complex): Complex = else: r = x / y w = sqrt(y) * sqrt(0.5 * (r + sqrt(1.0 + r * r))) + if z.re >= 0.0: result.re = w result.im = z.im / (w * 2.0) else: - if z.im >= 0.0: result.im = w - else: result.im = -w + result.im = if z.im >= 0.0: w else: -w result.re = z.im / (result.im + result.im) - -proc exp*(z: Complex): Complex = - ## e raised to the power `z`. - var rho = exp(z.re) - var theta = z.im - result.re = rho*cos(theta) - result.im = rho*sin(theta) - - -proc ln*(z: Complex): Complex = - ## Returns the natural log of `z`. +func exp*[T](z: Complex[T]): Complex[T] = + ## Computes the exponential function (`e^z`). + let + rho = exp(z.re) + theta = z.im + result.re = rho * cos(theta) + result.im = rho * sin(theta) + +func ln*[T](z: Complex[T]): Complex[T] = + ## Returns the + ## ([principal value](https://en.wikipedia.org/wiki/Complex_logarithm#Principal_value) + ## of the) natural logarithm of `z`. result.re = ln(abs(z)) - result.im = arctan2(z.im,z.re) - -proc log10*(z: Complex): Complex = - ## Returns the log base 10 of `z`. - result = ln(z)/ln(10.0) - -proc log2*(z: Complex): Complex = - ## Returns the log base 2 of `z`. - result = ln(z)/ln(2.0) - - -proc pow*(x, y: Complex): Complex = - ## `x` raised to the power `y`. - if x.re == 0.0 and x.im == 0.0: - if y.re == 0.0 and y.im == 0.0: + result.im = arctan2(z.im, z.re) + +func log10*[T](z: Complex[T]): Complex[T] = + ## Returns the logarithm base 10 of `z`. + ## + ## **See also:** + ## * `ln func<#ln,Complex[T]>`_ + result = ln(z) / ln(10.0) + +func log2*[T](z: Complex[T]): Complex[T] = + ## Returns the logarithm base 2 of `z`. + ## + ## **See also:** + ## * `ln func<#ln,Complex[T]>`_ + result = ln(z) / ln(2.0) + +func pow*[T](x, y: Complex[T]): Complex[T] = + ## `x` raised to the power of `y`. + if x.re == 0.0 and x.im == 0.0: + if y.re == 0.0 and y.im == 0.0: result.re = 1.0 result.im = 0.0 else: result.re = 0.0 result.im = 0.0 - elif y.re == 1.0 and y.im == 0.0: - result = x - elif y.re == -1.0 and y.im == 0.0: - result = 1.0/x + elif y.im == 0.0: + if y.re == 1.0: + result = x + elif y.re == -1.0: + result = T(1.0) / x + elif y.re == 2.0: + result = x * x + elif y.re == 0.5: + result = sqrt(x) + elif x.im == 0.0: + # Revert to real pow when both base and exponent are real + result.re = pow(x.re, y.re) + result.im = 0.0 + else: + # Special case when the exponent is real + let + rho = abs(x) + theta = arctan2(x.im, x.re) + s = pow(rho, y.re) + r = y.re * theta + result.re = s * cos(r) + result.im = s * sin(r) + elif x.im == 0.0 and x.re == E: + # Special case Euler's formula + result = exp(y) else: - var rho = sqrt(x.re*x.re + x.im*x.im) - var theta = arctan2(x.im,x.re) - var s = pow(rho,y.re) * exp(-y.im*theta) - var r = y.re*theta + y.im*ln(rho) - result.re = s*cos(r) - result.im = s*sin(r) - - -proc sin*(z: Complex): Complex = + let + rho = abs(x) + theta = arctan2(x.im, x.re) + s = pow(rho, y.re) * exp(-y.im * theta) + r = y.re * theta + y.im * ln(rho) + result.re = s * cos(r) + result.im = s * sin(r) + +func pow*[T](x: Complex[T]; y: T): Complex[T] = + ## The complex number `x` raised to the power of the real number `y`. + pow(x, complex[T](y)) + + +func sin*[T](z: Complex[T]): Complex[T] = ## Returns the sine of `z`. - result.re = sin(z.re)*cosh(z.im) - result.im = cos(z.re)*sinh(z.im) + result.re = sin(z.re) * cosh(z.im) + result.im = cos(z.re) * sinh(z.im) -proc arcsin*(z: Complex): Complex = +func arcsin*[T](z: Complex[T]): Complex[T] = ## Returns the inverse sine of `z`. - var i: Complex = (0.0,1.0) - result = -i*ln(i*z + sqrt(1.0-z*z)) + result = -im(T) * ln(im(T) * z + sqrt(T(1.0) - z*z)) -proc cos*(z: Complex): Complex = +func cos*[T](z: Complex[T]): Complex[T] = ## Returns the cosine of `z`. - result.re = cos(z.re)*cosh(z.im) - result.im = -sin(z.re)*sinh(z.im) + result.re = cos(z.re) * cosh(z.im) + result.im = -sin(z.re) * sinh(z.im) -proc arccos*(z: Complex): Complex = +func arccos*[T](z: Complex[T]): Complex[T] = ## Returns the inverse cosine of `z`. - var i: Complex = (0.0,1.0) - result = -i*ln(z + sqrt(z*z-1.0)) + result = -im(T) * ln(z + sqrt(z*z - T(1.0))) -proc tan*(z: Complex): Complex = +func tan*[T](z: Complex[T]): Complex[T] = ## Returns the tangent of `z`. - result = sin(z)/cos(z) + result = sin(z) / cos(z) -proc arctan*(z: Complex): Complex = +func arctan*[T](z: Complex[T]): Complex[T] = ## Returns the inverse tangent of `z`. - var i: Complex = (0.0,1.0) - result = 0.5*i*(ln(1-i*z)-ln(1+i*z)) + result = T(0.5)*im(T) * (ln(T(1.0) - im(T)*z) - ln(T(1.0) + im(T)*z)) -proc cot*(z: Complex): Complex = +func cot*[T](z: Complex[T]): Complex[T] = ## Returns the cotangent of `z`. result = cos(z)/sin(z) -proc arccot*(z: Complex): Complex = +func arccot*[T](z: Complex[T]): Complex[T] = ## Returns the inverse cotangent of `z`. - var i: Complex = (0.0,1.0) - result = 0.5*i*(ln(1-i/z)-ln(1+i/z)) + result = T(0.5)*im(T) * (ln(T(1.0) - im(T)/z) - ln(T(1.0) + im(T)/z)) -proc sec*(z: Complex): Complex = +func sec*[T](z: Complex[T]): Complex[T] = ## Returns the secant of `z`. - result = 1.0/cos(z) + result = T(1.0) / cos(z) -proc arcsec*(z: Complex): Complex = +func arcsec*[T](z: Complex[T]): Complex[T] = ## Returns the inverse secant of `z`. - var i: Complex = (0.0,1.0) - result = -i*ln(i*sqrt(1-1/(z*z))+1/z) + result = -im(T) * ln(im(T) * sqrt(1.0 - 1.0/(z*z)) + T(1.0)/z) -proc csc*(z: Complex): Complex = +func csc*[T](z: Complex[T]): Complex[T] = ## Returns the cosecant of `z`. - result = 1.0/sin(z) + result = T(1.0) / sin(z) -proc arccsc*(z: Complex): Complex = +func arccsc*[T](z: Complex[T]): Complex[T] = ## Returns the inverse cosecant of `z`. - var i: Complex = (0.0,1.0) - result = -i*ln(sqrt(1-1/(z*z))+i/z) - + result = -im(T) * ln(sqrt(T(1.0) - T(1.0)/(z*z)) + im(T)/z) -proc sinh*(z: Complex): Complex = +func sinh*[T](z: Complex[T]): Complex[T] = ## Returns the hyperbolic sine of `z`. - result = 0.5*(exp(z)-exp(-z)) + result = T(0.5) * (exp(z) - exp(-z)) -proc arcsinh*(z: Complex): Complex = +func arcsinh*[T](z: Complex[T]): Complex[T] = ## Returns the inverse hyperbolic sine of `z`. - result = ln(z+sqrt(z*z+1)) + result = ln(z + sqrt(z*z + 1.0)) -proc cosh*(z: Complex): Complex = +func cosh*[T](z: Complex[T]): Complex[T] = ## Returns the hyperbolic cosine of `z`. - result = 0.5*(exp(z)+exp(-z)) + result = T(0.5) * (exp(z) + exp(-z)) -proc arccosh*(z: Complex): Complex = +func arccosh*[T](z: Complex[T]): Complex[T] = ## Returns the inverse hyperbolic cosine of `z`. - result = ln(z+sqrt(z*z-1)) + result = ln(z + sqrt(z*z - T(1.0))) -proc tanh*(z: Complex): Complex = +func tanh*[T](z: Complex[T]): Complex[T] = ## Returns the hyperbolic tangent of `z`. - result = sinh(z)/cosh(z) + result = sinh(z) / cosh(z) -proc arctanh*(z: Complex): Complex = +func arctanh*[T](z: Complex[T]): Complex[T] = ## Returns the inverse hyperbolic tangent of `z`. - result = 0.5*(ln((1+z)/(1-z))) + result = T(0.5) * (ln((T(1.0)+z) / (T(1.0)-z))) + +func coth*[T](z: Complex[T]): Complex[T] = + ## Returns the hyperbolic cotangent of `z`. + result = cosh(z) / sinh(z) -proc sech*(z: Complex): Complex = +func arccoth*[T](z: Complex[T]): Complex[T] = + ## Returns the inverse hyperbolic cotangent of `z`. + result = T(0.5) * (ln(T(1.0) + T(1.0)/z) - ln(T(1.0) - T(1.0)/z)) + +func sech*[T](z: Complex[T]): Complex[T] = ## Returns the hyperbolic secant of `z`. - result = 2/(exp(z)+exp(-z)) + result = T(2.0) / (exp(z) + exp(-z)) -proc arcsech*(z: Complex): Complex = +func arcsech*[T](z: Complex[T]): Complex[T] = ## Returns the inverse hyperbolic secant of `z`. - result = ln(1/z+sqrt(1/z+1)*sqrt(1/z-1)) + result = ln(1.0/z + sqrt(T(1.0)/z+T(1.0)) * sqrt(T(1.0)/z-T(1.0))) -proc csch*(z: Complex): Complex = +func csch*[T](z: Complex[T]): Complex[T] = ## Returns the hyperbolic cosecant of `z`. - result = 2/(exp(z)-exp(-z)) + result = T(2.0) / (exp(z) - exp(-z)) -proc arccsch*(z: Complex): Complex = +func arccsch*[T](z: Complex[T]): Complex[T] = ## Returns the inverse hyperbolic cosecant of `z`. - result = ln(1/z+sqrt(1/(z*z)+1)) - -proc coth*(z: Complex): Complex = - ## Returns the hyperbolic cotangent of `z`. - result = cosh(z)/sinh(z) + result = ln(T(1.0)/z + sqrt(T(1.0)/(z*z) + T(1.0))) -proc arccoth*(z: Complex): Complex = - ## Returns the inverse hyperbolic cotangent of `z`. - result = 0.5*(ln(1+1/z)-ln(1-1/z)) - -proc phase*(z: Complex): float = - ## Returns the phase of `z`. +func phase*[T](z: Complex[T]): T = + ## Returns the phase (or argument) of `z`, that is the angle in polar representation. + ## + ## | `result = arctan2(z.im, z.re)` arctan2(z.im, z.re) -proc polar*(z: Complex): tuple[r, phi: float] = +func polar*[T](z: Complex[T]): tuple[r, phi: T] = ## Returns `z` in polar coordinates. - result.r = abs(z) - result.phi = phase(z) - -proc rect*(r: float, phi: float): Complex = + ## + ## | `result.r = abs(z)` + ## | `result.phi = phase(z)` + ## + ## **See also:** + ## * `rect func<#rect,T,T>`_ for the inverse operation + (r: abs(z), phi: phase(z)) + +func rect*[T](r, phi: T): Complex[T] = ## Returns the complex number with polar coordinates `r` and `phi`. - result.re = r * cos(phi) - result.im = r * sin(phi) + ## + ## | `result.re = r * cos(phi)` + ## | `result.im = r * sin(phi)` + ## + ## **See also:** + ## * `polar func<#polar,Complex[T]>`_ for the inverse operation + complex(r * cos(phi), r * sin(phi)) + +func almostEqual*[T: SomeFloat](x, y: Complex[T]; unitsInLastPlace: Natural = 4): bool = + ## Checks if two complex values are almost equal, using the + ## [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon). + ## + ## Two complex values are considered almost equal if their real and imaginary + ## components are almost equal. + ## + ## `unitsInLastPlace` is the max number of + ## [units in the last place](https://en.wikipedia.org/wiki/Unit_in_the_last_place) + ## difference tolerated when comparing two numbers. The larger the value, the + ## more error is allowed. A `0` value means that two numbers must be exactly the + ## same to be considered equal. + ## + ## The machine epsilon has to be scaled to the magnitude of the values used + ## and multiplied by the desired precision in ULPs unless the difference is + ## subnormal. + almostEqual(x.re, y.re, unitsInLastPlace = unitsInLastPlace) and + almostEqual(x.im, y.im, unitsInLastPlace = unitsInLastPlace) + +func `$`*(z: Complex): string = + ## Returns `z`'s string representation as `"(re, im)"`. + runnableExamples: + doAssert $complex(1.0, 2.0) == "(1.0, 2.0)" - -proc `$`*(z: Complex): string = - ## Returns `z`'s string representation as ``"(re, im)"``. result = "(" & $z.re & ", " & $z.im & ")" -{.pop.} - +proc formatValueAsTuple(result: var string; value: Complex; specifier: string) = + ## Format implementation for `Complex` representing the value as a (real, imaginary) tuple. + result.add "(" + formatValue(result, value.re, specifier) + result.add ", " + formatValue(result, value.im, specifier) + result.add ")" + +proc formatValueAsComplexNumber(result: var string; value: Complex; specifier: string) = + ## Format implementation for `Complex` representing the value as a (RE+IMj) number + ## By default, the real and imaginary parts are formatted using the general ('g') format + let specifier = if specifier.contains({'e', 'E', 'f', 'F', 'g', 'G'}): + specifier.replace("j") + else: + specifier.replace('j', 'g') + result.add "(" + formatValue(result, value.re, specifier) + if value.im >= 0 and not specifier.contains({'+', '-'}): + result.add "+" + formatValue(result, value.im, specifier) + result.add "j)" + +proc formatValue*(result: var string; value: Complex; specifier: string) = + ## Standard format implementation for `Complex`. It makes little + ## sense to call this directly, but it is required to exist + ## by the `&` macro. + ## For complex numbers, we add a specific 'j' specifier, which formats + ## the value as (A+Bj) like in mathematics. + if specifier.len == 0: + result.add $value + elif 'j' in specifier: + formatValueAsComplexNumber(result, value, specifier) + else: + formatValueAsTuple(result, value, specifier) -when isMainModule: - var z = (0.0, 0.0) - var oo = (1.0,1.0) - var a = (1.0, 2.0) - var b = (-1.0, -2.0) - var m1 = (-1.0, 0.0) - var i = (0.0,1.0) - var one = (1.0,0.0) - var tt = (10.0, 20.0) - var ipi = (0.0, -PI) - - assert( a == a ) - assert( (a-a) == z ) - assert( (a+b) == z ) - assert( (a/b) == m1 ) - assert( (1.0/a) == (0.2, -0.4) ) - assert( (a*b) == (3.0, -4.0) ) - assert( 10.0*a == tt ) - assert( a*10.0 == tt ) - assert( tt/10.0 == a ) - assert( oo+(-1.0) == i ) - assert( (-1.0)+oo == i ) - assert( abs(oo) == sqrt(2.0) ) - assert( conjugate(a) == (1.0, -2.0) ) - assert( sqrt(m1) == i ) - assert( exp(ipi) =~ m1 ) - - assert( pow(a,b) =~ (-3.72999124927876, -1.68815826725068) ) - assert( pow(z,a) =~ (0.0, 0.0) ) - assert( pow(z,z) =~ (1.0, 0.0) ) - assert( pow(a,one) =~ a ) - assert( pow(a,m1) =~ (0.2, -0.4) ) - - assert( ln(a) =~ (0.804718956217050, 1.107148717794090) ) - assert( log10(a) =~ (0.349485002168009, 0.480828578784234) ) - assert( log2(a) =~ (1.16096404744368, 1.59727796468811) ) - - assert( sin(a) =~ (3.16577851321617, 1.95960104142161) ) - assert( cos(a) =~ (2.03272300701967, -3.05189779915180) ) - assert( tan(a) =~ (0.0338128260798967, 1.0147936161466335) ) - assert( cot(a) =~ 1.0/tan(a) ) - assert( sec(a) =~ 1.0/cos(a) ) - assert( csc(a) =~ 1.0/sin(a) ) - assert( arcsin(a) =~ (0.427078586392476, 1.528570919480998) ) - assert( arccos(a) =~ (1.14371774040242, -1.52857091948100) ) - assert( arctan(a) =~ (1.338972522294494, 0.402359478108525) ) - - assert( cosh(a) =~ (-0.642148124715520, 1.068607421382778) ) - assert( sinh(a) =~ (-0.489056259041294, 1.403119250622040) ) - assert( tanh(a) =~ (1.1667362572409199,-0.243458201185725) ) - assert( sech(a) =~ 1/cosh(a) ) - assert( csch(a) =~ 1/sinh(a) ) - assert( coth(a) =~ 1/tanh(a) ) - assert( arccosh(a) =~ (1.528570919480998, 1.14371774040242) ) - assert( arcsinh(a) =~ (1.469351744368185, 1.06344002357775) ) - assert( arctanh(a) =~ (0.173286795139986, 1.17809724509617) ) - assert( arcsech(a) =~ arccosh(1/a) ) - assert( arccsch(a) =~ arcsinh(1/a) ) - assert( arccoth(a) =~ arctanh(1/a) ) - - assert( phase(a) == 1.1071487177940904 ) - var t = polar(a) - assert( rect(t.r, t.phi) =~ a ) - assert( rect(1.0, 2.0) =~ (-0.4161468365471424, 0.9092974268256817) ) +{.pop.} diff --git a/lib/pure/concurrency/atomics.nim b/lib/pure/concurrency/atomics.nim new file mode 100644 index 000000000..818f1b37a --- /dev/null +++ b/lib/pure/concurrency/atomics.nim @@ -0,0 +1,433 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2018 Jörg Wollenschläger +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Types and operations for atomic operations and lockless algorithms. +## +## Unstable API. +## +## By default, C++ uses C11 atomic primitives. To use C++ `std::atomic`, +## `-d:nimUseCppAtomics` can be defined. + +runnableExamples: + # Atomic + var loc: Atomic[int] + loc.store(4) + assert loc.load == 4 + loc.store(2) + assert loc.load(moRelaxed) == 2 + loc.store(9) + assert loc.load(moAcquire) == 9 + loc.store(0, moRelease) + assert loc.load == 0 + + assert loc.exchange(7) == 0 + assert loc.load == 7 + + var expected = 7 + assert loc.compareExchange(expected, 5, moRelaxed, moRelaxed) + assert expected == 7 + assert loc.load == 5 + + assert not loc.compareExchange(expected, 12, moRelaxed, moRelaxed) + assert expected == 5 + assert loc.load == 5 + + assert loc.fetchAdd(1) == 5 + assert loc.fetchAdd(2) == 6 + assert loc.fetchSub(3) == 8 + + loc.atomicInc(1) + assert loc.load == 6 + + # AtomicFlag + var flag: AtomicFlag + + assert not flag.testAndSet + assert flag.testAndSet + flag.clear(moRelaxed) + assert not flag.testAndSet + +when (defined(cpp) and defined(nimUseCppAtomics)) or defined(nimdoc): + # For the C++ backend, types and operations map directly to C++11 atomics. + + {.push, header: "<atomic>".} + + type + MemoryOrder* {.importcpp: "std::memory_order".} = enum + ## Specifies how non-atomic operations can be reordered around atomic + ## operations. + + moRelaxed + ## No ordering constraints. Only the atomicity and ordering against + ## other atomic operations is guaranteed. + + moConsume + ## This ordering is currently discouraged as it's semantics are + ## being revised. Acquire operations should be preferred. + + moAcquire + ## When applied to a load operation, no reads or writes in the + ## current thread can be reordered before this operation. + + moRelease + ## When applied to a store operation, no reads or writes in the + ## current thread can be reorderd after this operation. + + moAcquireRelease + ## When applied to a read-modify-write operation, this behaves like + ## both an acquire and a release operation. + + moSequentiallyConsistent + ## Behaves like Acquire when applied to load, like Release when + ## applied to a store and like AcquireRelease when applied to a + ## read-modify-write operation. + ## Also guarantees that all threads observe the same total ordering + ## with other moSequentiallyConsistent operations. + + type + Atomic*[T] {.importcpp: "std::atomic", completeStruct.} = object + ## An atomic object with underlying type `T`. + raw: T + + AtomicFlag* {.importcpp: "std::atomic_flag", size: 1.} = object + ## An atomic boolean state. + + # Access operations + + proc load*[T](location: var Atomic[T]; order: MemoryOrder = moSequentiallyConsistent): T {.importcpp: "#.load(@)".} + ## Atomically obtains the value of the atomic object. + + proc store*[T](location: var Atomic[T]; desired: T; order: MemoryOrder = moSequentiallyConsistent) {.importcpp: "#.store(@)".} + ## Atomically replaces the value of the atomic object with the `desired` + ## value. + + proc exchange*[T](location: var Atomic[T]; desired: T; order: MemoryOrder = moSequentiallyConsistent): T {.importcpp: "#.exchange(@)".} + ## Atomically replaces the value of the atomic object with the `desired` + ## value and returns the old value. + + proc compareExchange*[T](location: var Atomic[T]; expected: var T; desired: T; order: MemoryOrder = moSequentiallyConsistent): bool {.importcpp: "#.compare_exchange_strong(@)".} + ## Atomically compares the value of the atomic object with the `expected` + ## value and performs exchange with the `desired` one if equal or load if + ## not. Returns true if the exchange was successful. + + proc compareExchange*[T](location: var Atomic[T]; expected: var T; desired: T; success, failure: MemoryOrder): bool {.importcpp: "#.compare_exchange_strong(@)".} + ## Same as above, but allows for different memory orders for success and + ## failure. + + proc compareExchangeWeak*[T](location: var Atomic[T]; expected: var T; desired: T; order: MemoryOrder = moSequentiallyConsistent): bool {.importcpp: "#.compare_exchange_weak(@)".} + ## Same as above, but is allowed to fail spuriously. + + proc compareExchangeWeak*[T](location: var Atomic[T]; expected: var T; desired: T; success, failure: MemoryOrder): bool {.importcpp: "#.compare_exchange_weak(@)".} + ## Same as above, but allows for different memory orders for success and + ## failure. + + # Numerical operations + + proc fetchAdd*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importcpp: "#.fetch_add(@)".} + ## Atomically adds a `value` to the atomic integer and returns the + ## original value. + + proc fetchSub*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importcpp: "#.fetch_sub(@)".} + ## Atomically subtracts a `value` to the atomic integer and returns the + ## original value. + + proc fetchAnd*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importcpp: "#.fetch_and(@)".} + ## Atomically replaces the atomic integer with it's bitwise AND + ## with the specified `value` and returns the original value. + + proc fetchOr*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importcpp: "#.fetch_or(@)".} + ## Atomically replaces the atomic integer with it's bitwise OR + ## with the specified `value` and returns the original value. + + proc fetchXor*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importcpp: "#.fetch_xor(@)".} + ## Atomically replaces the atomic integer with it's bitwise XOR + ## with the specified `value` and returns the original value. + + # Flag operations + + proc testAndSet*(location: var AtomicFlag; order: MemoryOrder = moSequentiallyConsistent): bool {.importcpp: "#.test_and_set(@)".} + ## Atomically sets the atomic flag to true and returns the original value. + + proc clear*(location: var AtomicFlag; order: MemoryOrder = moSequentiallyConsistent) {.importcpp: "#.clear(@)".} + ## Atomically sets the value of the atomic flag to false. + + proc fence*(order: MemoryOrder) {.importcpp: "std::atomic_thread_fence(@)".} + ## Ensures memory ordering without using atomic operations. + + proc signalFence*(order: MemoryOrder) {.importcpp: "std::atomic_signal_fence(@)".} + ## Prevents reordering of accesses by the compiler as would fence, but + ## inserts no CPU instructions for memory ordering. + + {.pop.} + +else: + # For the C backend, atomics map to C11 built-ins on GCC and Clang for + # trivial Nim types. Other types are implemented using spin locks. + # This could be overcome by supporting advanced importc-patterns. + + # Since MSVC does not implement C11, we fall back to MS intrinsics + # where available. + + type + Trivial = SomeNumber | bool | enum | ptr | pointer + # A type that is known to be atomic and whose size is known at + # compile time to be 8 bytes or less + + template nonAtomicType*(T: typedesc[Trivial]): untyped = + # Maps types to integers of the same size + when sizeof(T) == 1: int8 + elif sizeof(T) == 2: int16 + elif sizeof(T) == 4: int32 + elif sizeof(T) == 8: int64 + + when defined(vcc): + + # TODO: Trivial types should be volatile and use VC's special volatile + # semantics for store and loads. + + type + MemoryOrder* = enum + moRelaxed + moConsume + moAcquire + moRelease + moAcquireRelease + moSequentiallyConsistent + + Atomic*[T] = object + when T is Trivial: + value: T.nonAtomicType + else: + nonAtomicValue: T + guard: AtomicFlag + + AtomicFlag* = distinct int8 + + {.push header: "<intrin.h>".} + + # MSVC intrinsics + proc interlockedExchange(location: pointer; desired: int8): int8 {.importc: "_InterlockedExchange8".} + proc interlockedExchange(location: pointer; desired: int16): int16 {.importc: "_InterlockedExchange16".} + proc interlockedExchange(location: pointer; desired: int32): int32 {.importc: "_InterlockedExchange".} + proc interlockedExchange(location: pointer; desired: int64): int64 {.importc: "_InterlockedExchange64".} + + proc interlockedCompareExchange(location: pointer; desired, expected: int8): int8 {.importc: "_InterlockedCompareExchange8".} + proc interlockedCompareExchange(location: pointer; desired, expected: int16): int16 {.importc: "_InterlockedCompareExchange16".} + proc interlockedCompareExchange(location: pointer; desired, expected: int32): int32 {.importc: "_InterlockedCompareExchange".} + proc interlockedCompareExchange(location: pointer; desired, expected: int64): int64 {.importc: "_InterlockedCompareExchange64".} + + proc interlockedAnd(location: pointer; value: int8): int8 {.importc: "_InterlockedAnd8".} + proc interlockedAnd(location: pointer; value: int16): int16 {.importc: "_InterlockedAnd16".} + proc interlockedAnd(location: pointer; value: int32): int32 {.importc: "_InterlockedAnd".} + proc interlockedAnd(location: pointer; value: int64): int64 {.importc: "_InterlockedAnd64".} + + proc interlockedOr(location: pointer; value: int8): int8 {.importc: "_InterlockedOr8".} + proc interlockedOr(location: pointer; value: int16): int16 {.importc: "_InterlockedOr16".} + proc interlockedOr(location: pointer; value: int32): int32 {.importc: "_InterlockedOr".} + proc interlockedOr(location: pointer; value: int64): int64 {.importc: "_InterlockedOr64".} + + proc interlockedXor(location: pointer; value: int8): int8 {.importc: "_InterlockedXor8".} + proc interlockedXor(location: pointer; value: int16): int16 {.importc: "_InterlockedXor16".} + proc interlockedXor(location: pointer; value: int32): int32 {.importc: "_InterlockedXor".} + proc interlockedXor(location: pointer; value: int64): int64 {.importc: "_InterlockedXor64".} + + proc fence(order: MemoryOrder): int64 {.importc: "_ReadWriteBarrier()".} + proc signalFence(order: MemoryOrder): int64 {.importc: "_ReadWriteBarrier()".} + + {.pop.} + + proc testAndSet*(location: var AtomicFlag; order: MemoryOrder = moSequentiallyConsistent): bool = + interlockedOr(addr(location), 1'i8) == 1'i8 + proc clear*(location: var AtomicFlag; order: MemoryOrder = moSequentiallyConsistent) = + discard interlockedAnd(addr(location), 0'i8) + + proc load*[T: Trivial](location: var Atomic[T]; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + cast[T](interlockedOr(addr(location.value), (nonAtomicType(T))0)) + proc store*[T: Trivial](location: var Atomic[T]; desired: T; order: MemoryOrder = moSequentiallyConsistent) {.inline.} = + discard interlockedExchange(addr(location.value), cast[nonAtomicType(T)](desired)) + + proc exchange*[T: Trivial](location: var Atomic[T]; desired: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + cast[T](interlockedExchange(addr(location.value), cast[int64](desired))) + proc compareExchange*[T: Trivial](location: var Atomic[T]; expected: var T; desired: T; success, failure: MemoryOrder): bool {.inline.} = + cast[T](interlockedCompareExchange(addr(location.value), cast[nonAtomicType(T)](desired), cast[nonAtomicType(T)](expected))) == expected + proc compareExchange*[T: Trivial](location: var Atomic[T]; expected: var T; desired: T; order: MemoryOrder = moSequentiallyConsistent): bool {.inline.} = + compareExchange(location, expected, desired, order, order) + proc compareExchangeWeak*[T: Trivial](location: var Atomic[T]; expected: var T; desired: T; success, failure: MemoryOrder): bool {.inline.} = + compareExchange(location, expected, desired, success, failure) + proc compareExchangeWeak*[T: Trivial](location: var Atomic[T]; expected: var T; desired: T; order: MemoryOrder = moSequentiallyConsistent): bool {.inline.} = + compareExchangeWeak(location, expected, desired, order, order) + + proc fetchAdd*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + var currentValue = location.load() + while not compareExchangeWeak(location, currentValue, currentValue + value): discard + proc fetchSub*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + fetchAdd(location, -value, order) + proc fetchAnd*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + cast[T](interlockedAnd(addr(location.value), cast[nonAtomicType(T)](value))) + proc fetchOr*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + cast[T](interlockedOr(addr(location.value), cast[nonAtomicType(T)](value))) + proc fetchXor*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + cast[T](interlockedXor(addr(location.value), cast[nonAtomicType(T)](value))) + + else: + when defined(cpp): + {.push, header: "<atomic>".} + template maybeWrapStd(x: string): string = + "std::" & x + else: + {.push, header: "<stdatomic.h>".} + template maybeWrapStd(x: string): string = + x + + type + MemoryOrder* {.importc: "memory_order".maybeWrapStd.} = enum + moRelaxed + moConsume + moAcquire + moRelease + moAcquireRelease + moSequentiallyConsistent + + when defined(cpp): + type + # Atomic*[T] {.importcpp: "_Atomic('0)".} = object + + AtomicInt8 {.importc: "std::atomic<NI8>".} = int8 + AtomicInt16 {.importc: "std::atomic<NI16>".} = int16 + AtomicInt32 {.importc: "std::atomic<NI32>".} = int32 + AtomicInt64 {.importc: "std::atomic<NI64>".} = int64 + else: + type + # Atomic*[T] {.importcpp: "_Atomic('0)".} = object + + AtomicInt8 {.importc: "_Atomic NI8".} = int8 + AtomicInt16 {.importc: "_Atomic NI16".} = int16 + AtomicInt32 {.importc: "_Atomic NI32".} = int32 + AtomicInt64 {.importc: "_Atomic NI64".} = int64 + + type + AtomicFlag* {.importc: "atomic_flag".maybeWrapStd, size: 1.} = object + + Atomic*[T] = object + when T is Trivial: + # Maps the size of a trivial type to it's internal atomic type + when sizeof(T) == 1: value: AtomicInt8 + elif sizeof(T) == 2: value: AtomicInt16 + elif sizeof(T) == 4: value: AtomicInt32 + elif sizeof(T) == 8: value: AtomicInt64 + else: + nonAtomicValue: T + guard: AtomicFlag + + #proc init*[T](location: var Atomic[T]; value: T): T {.importcpp: "atomic_init(@)".} + proc atomic_load_explicit[T, A](location: ptr A; order: MemoryOrder): T {.importc: "atomic_load_explicit".maybeWrapStd.} + proc atomic_store_explicit[T, A](location: ptr A; desired: T; order: MemoryOrder = moSequentiallyConsistent) {.importc: "atomic_store_explicit".maybeWrapStd.} + proc atomic_exchange_explicit[T, A](location: ptr A; desired: T; order: MemoryOrder = moSequentiallyConsistent): T {.importc: "atomic_exchange_explicit".maybeWrapStd.} + proc atomic_compare_exchange_strong_explicit[T, A](location: ptr A; expected: ptr T; desired: T; success, failure: MemoryOrder): bool {.importc: "atomic_compare_exchange_strong_explicit".maybeWrapStd.} + proc atomic_compare_exchange_weak_explicit[T, A](location: ptr A; expected: ptr T; desired: T; success, failure: MemoryOrder): bool {.importc: "atomic_compare_exchange_weak_explicit".maybeWrapStd.} + + # Numerical operations + proc atomic_fetch_add_explicit[T, A](location: ptr A; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importc: "atomic_fetch_add_explicit".maybeWrapStd.} + proc atomic_fetch_sub_explicit[T, A](location: ptr A; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importc: "atomic_fetch_sub_explicit".maybeWrapStd.} + proc atomic_fetch_and_explicit[T, A](location: ptr A; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importc: "atomic_fetch_and_explicit".maybeWrapStd.} + proc atomic_fetch_or_explicit[T, A](location: ptr A; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importc: "atomic_fetch_or_explicit".maybeWrapStd.} + proc atomic_fetch_xor_explicit[T, A](location: ptr A; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.importc: "atomic_fetch_xor_explicit".maybeWrapStd.} + + # Flag operations + # var ATOMIC_FLAG_INIT {.importc, nodecl.}: AtomicFlag + # proc init*(location: var AtomicFlag) {.inline.} = location = ATOMIC_FLAG_INIT + proc testAndSet*(location: var AtomicFlag; order: MemoryOrder = moSequentiallyConsistent): bool {.importc: "atomic_flag_test_and_set_explicit".maybeWrapStd.} + proc clear*(location: var AtomicFlag; order: MemoryOrder = moSequentiallyConsistent) {.importc: "atomic_flag_clear_explicit".maybeWrapStd.} + + proc fence*(order: MemoryOrder) {.importc: "atomic_thread_fence".maybeWrapStd.} + proc signalFence*(order: MemoryOrder) {.importc: "atomic_signal_fence".maybeWrapStd.} + + {.pop.} + + proc load*[T: Trivial](location: var Atomic[T]; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + cast[T](atomic_load_explicit[nonAtomicType(T), typeof(location.value)](addr(location.value), order)) + proc store*[T: Trivial](location: var Atomic[T]; desired: T; order: MemoryOrder = moSequentiallyConsistent) {.inline.} = + atomic_store_explicit(addr(location.value), cast[nonAtomicType(T)](desired), order) + proc exchange*[T: Trivial](location: var Atomic[T]; desired: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + cast[T](atomic_exchange_explicit(addr(location.value), cast[nonAtomicType(T)](desired), order)) + proc compareExchange*[T: Trivial](location: var Atomic[T]; expected: var T; desired: T; success, failure: MemoryOrder): bool {.inline.} = + atomic_compare_exchange_strong_explicit(addr(location.value), cast[ptr nonAtomicType(T)](addr(expected)), cast[nonAtomicType(T)](desired), success, failure) + proc compareExchange*[T: Trivial](location: var Atomic[T]; expected: var T; desired: T; order: MemoryOrder = moSequentiallyConsistent): bool {.inline.} = + compareExchange(location, expected, desired, order, order) + + proc compareExchangeWeak*[T: Trivial](location: var Atomic[T]; expected: var T; desired: T; success, failure: MemoryOrder): bool {.inline.} = + atomic_compare_exchange_weak_explicit(addr(location.value), cast[ptr nonAtomicType(T)](addr(expected)), cast[nonAtomicType(T)](desired), success, failure) + proc compareExchangeWeak*[T: Trivial](location: var Atomic[T]; expected: var T; desired: T; order: MemoryOrder = moSequentiallyConsistent): bool {.inline.} = + compareExchangeWeak(location, expected, desired, order, order) + + # Numerical operations + proc fetchAdd*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + cast[T](atomic_fetch_add_explicit(addr(location.value), cast[nonAtomicType(T)](value), order)) + proc fetchSub*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + cast[T](atomic_fetch_sub_explicit(addr(location.value), cast[nonAtomicType(T)](value), order)) + proc fetchAnd*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + cast[T](atomic_fetch_and_explicit(addr(location.value), cast[nonAtomicType(T)](value), order)) + proc fetchOr*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + cast[T](atomic_fetch_or_explicit(addr(location.value), cast[nonAtomicType(T)](value), order)) + proc fetchXor*[T: SomeInteger](location: var Atomic[T]; value: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + cast[T](atomic_fetch_xor_explicit(addr(location.value), cast[nonAtomicType(T)](value), order)) + + template withLock[T: not Trivial](location: var Atomic[T]; order: MemoryOrder; body: untyped): untyped = + while testAndSet(location.guard, moAcquire): discard + try: + body + finally: + clear(location.guard, moRelease) + + proc load*[T: not Trivial](location: var Atomic[T]; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + withLock(location, order): + result = location.nonAtomicValue + + proc store*[T: not Trivial](location: var Atomic[T]; desired: T; order: MemoryOrder = moSequentiallyConsistent) {.inline.} = + withLock(location, order): + location.nonAtomicValue = desired + + proc exchange*[T: not Trivial](location: var Atomic[T]; desired: T; order: MemoryOrder = moSequentiallyConsistent): T {.inline.} = + withLock(location, order): + result = location.nonAtomicValue + location.nonAtomicValue = desired + + proc compareExchange*[T: not Trivial](location: var Atomic[T]; expected: var T; desired: T; success, failure: MemoryOrder): bool {.inline.} = + withLock(location, success): + if location.nonAtomicValue != expected: + expected = location.nonAtomicValue + return false + expected = desired + swap(location.nonAtomicValue, expected) + return true + + proc compareExchangeWeak*[T: not Trivial](location: var Atomic[T]; expected: var T; desired: T; success, failure: MemoryOrder): bool {.inline.} = + compareExchange(location, expected, desired, success, failure) + + proc compareExchange*[T: not Trivial](location: var Atomic[T]; expected: var T; desired: T; order: MemoryOrder = moSequentiallyConsistent): bool {.inline.} = + compareExchange(location, expected, desired, order, order) + + proc compareExchangeWeak*[T: not Trivial](location: var Atomic[T]; expected: var T; desired: T; order: MemoryOrder = moSequentiallyConsistent): bool {.inline.} = + compareExchangeWeak(location, expected, desired, order, order) + +proc atomicInc*[T: SomeInteger](location: var Atomic[T]; value: T = 1) {.inline.} = + ## Atomically increments the atomic integer by some `value`. + discard location.fetchAdd(value) + +proc atomicDec*[T: SomeInteger](location: var Atomic[T]; value: T = 1) {.inline.} = + ## Atomically decrements the atomic integer by some `value`. + discard location.fetchSub(value) + +proc `+=`*[T: SomeInteger](location: var Atomic[T]; value: T) {.inline.} = + ## Atomically increments the atomic integer by some `value`. + discard location.fetchAdd(value) + +proc `-=`*[T: SomeInteger](location: var Atomic[T]; value: T) {.inline.} = + ## Atomically decrements the atomic integer by some `value`. + discard location.fetchSub(value) diff --git a/lib/pure/concurrency/cpuinfo.nim b/lib/pure/concurrency/cpuinfo.nim index 6f2bc4491..9bc3fd579 100644 --- a/lib/pure/concurrency/cpuinfo.nim +++ b/lib/pure/concurrency/cpuinfo.nim @@ -7,61 +7,104 @@ # distribution, for details about the copyright. # -## This module implements procs to determine the number of CPUs / cores. +## This module implements a proc to determine the number of CPUs / cores. + +runnableExamples: + doAssert countProcessors() > 0 + include "system/inclrtl" -import strutils, os +when defined(js): + import std/jsffi + proc countProcessorsImpl(): int = + when defined(nodejs): + let jsOs = require("os") + let jsObj = jsOs.cpus().length + else: + # `navigator.hardwareConcurrency` + # works on browser as well as deno. + let navigator{.importcpp.}: JsObject + let jsObj = navigator.hardwareConcurrency + result = jsObj.to int +else: + when defined(posix) and not (defined(macosx) or defined(bsd)): + import std/posix + + when defined(windows): + import std/private/win_getsysteminfo + + when defined(freebsd) or defined(macosx): + {.emit: "#include <sys/types.h>".} + + when defined(openbsd) or defined(netbsd): + {.emit: "#include <sys/param.h>".} -when not defined(windows): - import posix + when defined(macosx) or defined(bsd): + # we HAVE to emit param.h before sysctl.h so we cannot use .header here + # either. The amount of archaic bullshit in Poonix based OSes is just insane. + {.emit: "#include <sys/sysctl.h>".} + {.push nodecl.} + when defined(macosx): + proc sysctlbyname(name: cstring, + oldp: pointer, oldlenp: var csize_t, + newp: pointer, newlen: csize_t): cint {.importc.} + let + CTL_HW{.importc.}: cint + HW_NCPU{.importc.}: cint + proc sysctl[I: static[int]](name: var array[I, cint], namelen: cuint, + oldp: pointer, oldlenp: var csize_t, + newp: pointer, newlen: csize_t): cint {.importc.} + {.pop.} -when defined(linux): - import linux - -when defined(freebsd) or defined(macosx): - {.emit:"#include <sys/types.h>".} + when defined(genode): + import genode/env + + proc affinitySpaceTotal(env: GenodeEnvPtr): cuint {. + importcpp: "@->cpu().affinity_space().total()".} + + when defined(haiku): + type + SystemInfo {.importc: "system_info", header: "<OS.h>".} = object + cpuCount {.importc: "cpu_count".}: uint32 + + proc getSystemInfo(info: ptr SystemInfo): int32 {.importc: "get_system_info", + header: "<OS.h>".} + + proc countProcessorsImpl(): int {.inline.} = + when defined(windows): + var + si: SystemInfo + getSystemInfo(addr si) + result = int(si.dwNumberOfProcessors) + elif defined(macosx) or defined(bsd): + let dest = addr result + var len = sizeof(result).csize_t + when defined(macosx): + # alias of "hw.activecpu" + if sysctlbyname("hw.logicalcpu", dest, len, nil, 0) == 0: + return + var mib = [CTL_HW, HW_NCPU] + if sysctl(mib, 2, dest, len, nil, 0) == 0: + return + elif defined(hpux): + result = mpctl(MPC_GETNUMSPUS, nil, nil) + elif defined(irix): + var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "<unistd.h>".}: cint + result = sysconf(SC_NPROC_ONLN) + elif defined(genode): + result = runtimeEnv.affinitySpaceTotal().int + elif defined(haiku): + var sysinfo: SystemInfo + if getSystemInfo(addr sysinfo) == 0: + result = sysinfo.cpuCount.int + else: + result = sysconf(SC_NPROCESSORS_ONLN) + if result < 0: result = 0 -when defined(openbsd) or defined(netbsd): - {.emit:"#include <sys/param.h>".} -when defined(macosx) or defined(bsd): - # we HAVE to emit param.h before sysctl.h so we cannot use .header here - # either. The amount of archaic bullshit in Poonix based OSes is just insane. - {.emit:"#include <sys/sysctl.h>".} - const - CTL_HW = 6 - HW_AVAILCPU = 25 - HW_NCPU = 3 - proc sysctl(x: ptr array[0..3, cint], y: cint, z: pointer, - a: var csize, b: pointer, c: int): cint {. - importc: "sysctl", nodecl.} proc countProcessors*(): int {.rtl, extern: "ncpi$1".} = - ## returns the numer of the processors/cores the machine has. + ## Returns the number of the processors/cores the machine has. ## Returns 0 if it cannot be detected. - when defined(windows): - var x = getEnv("NUMBER_OF_PROCESSORS") - if x.len > 0: result = parseInt(x.string) - elif defined(macosx) or defined(bsd): - var - mib: array[0..3, cint] - numCPU: int - len: csize - mib[0] = CTL_HW - mib[1] = HW_AVAILCPU - len = sizeof(numCPU) - discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) - if numCPU < 1: - mib[1] = HW_NCPU - discard sysctl(addr(mib), 2, addr(numCPU), len, nil, 0) - result = numCPU - elif defined(hpux): - result = mpctl(MPC_GETNUMSPUS, nil, nil) - elif defined(irix): - var SC_NPROC_ONLN {.importc: "_SC_NPROC_ONLN", header: "<unistd.h>".}: cint - result = sysconf(SC_NPROC_ONLN) - else: - result = sysconf(SC_NPROCESSORS_ONLN) - if result <= 0: result = 1 - + countProcessorsImpl() diff --git a/lib/pure/concurrency/cpuload.nim b/lib/pure/concurrency/cpuload.nim index c1796089a..bfbf16721 100644 --- a/lib/pure/concurrency/cpuload.nim +++ b/lib/pure/concurrency/cpuload.nim @@ -9,13 +9,18 @@ ## This module implements a helper for a thread pool to determine whether ## creating a thread is a good idea. +## +## Unstable API. when defined(windows): - import winlean, os, strutils, math + import std/[winlean, os, strutils, math] - proc `-`(a, b: TFILETIME): int64 = a.rdFileTime - b.rdFileTime + proc `-`(a, b: FILETIME): int64 = a.rdFileTime - b.rdFileTime elif defined(linux): - from cpuinfo import countProcessors + from std/cpuinfo import countProcessors + +when defined(nimPreviewSlimSystem): + import std/syncio type ThreadPoolAdvice* = enum @@ -25,16 +30,16 @@ type ThreadPoolState* = object when defined(windows): - prevSysKernel, prevSysUser, prevProcKernel, prevProcUser: TFILETIME + prevSysKernel, prevSysUser, prevProcKernel, prevProcUser: FILETIME calls*: int proc advice*(s: var ThreadPoolState): ThreadPoolAdvice = when defined(windows): var sysIdle, sysKernel, sysUser, - procCreation, procExit, procKernel, procUser: TFILETIME + procCreation, procExit, procKernel, procUser: FILETIME if getSystemTimes(sysIdle, sysKernel, sysUser) == 0 or - getProcessTimes(THandle(-1), procCreation, procExit, + getProcessTimes(Handle(-1), procCreation, procExit, procKernel, procUser) == 0: return doNothing if s.calls > 0: @@ -45,32 +50,35 @@ proc advice*(s: var ThreadPoolState): ThreadPoolAdvice = procKernelDiff = procKernel - s.prevProcKernel procUserDiff = procUser - s.prevProcUser - sysTotal = int(sysKernelDiff + sysUserDiff) - procTotal = int(procKernelDiff + procUserDiff) + sysTotal = sysKernelDiff + sysUserDiff + procTotal = procKernelDiff + procUserDiff # total CPU usage < 85% --> create a new worker thread. # Measurements show that 100% and often even 90% is not reached even # if all my cores are busy. - if sysTotal == 0 or procTotal / sysTotal < 0.85: + if sysTotal == 0 or procTotal.float / sysTotal.float < 0.85: result = doCreateThread s.prevSysKernel = sysKernel s.prevSysUser = sysUser s.prevProcKernel = procKernel s.prevProcUser = procUser elif defined(linux): - proc fscanf(c: File, frmt: cstring) {.varargs, importc, + proc fscanf(c: File, frmt: cstring) {.varargs, importc, header: "<stdio.h>".} - var f = open("/proc/loadavg") - var b: float - var busy, total: int - fscanf(f,"%lf %lf %lf %ld/%ld", - addr b, addr b, addr b, addr busy, addr total) - f.close() - let cpus = countProcessors() - if busy-1 < cpus: - result = doCreateThread - elif busy-1 >= cpus*2: - result = doShutdownThread + var f: File + if f.open("/proc/loadavg"): + var b: float + var busy, total: int + fscanf(f,"%lf %lf %lf %ld/%ld", + addr b, addr b, addr b, addr busy, addr total) + f.close() + let cpus = countProcessors() + if busy-1 < cpus: + result = doCreateThread + elif busy-1 >= cpus*2: + result = doShutdownThread + else: + result = doNothing else: result = doNothing else: @@ -78,10 +86,12 @@ proc advice*(s: var ThreadPoolState): ThreadPoolAdvice = result = doNothing inc s.calls -when isMainModule: +when not defined(testing) and isMainModule and not defined(nimdoc): + import std/random + proc busyLoop() = while true: - discard random(80) + discard rand(80) os.sleep(100) spawn busyLoop() diff --git a/lib/pure/concurrency/threadpool.nim b/lib/pure/concurrency/threadpool.nim index 9f1e53fb8..06ed2fe54 100644 --- a/lib/pure/concurrency/threadpool.nim +++ b/lib/pure/concurrency/threadpool.nim @@ -7,30 +7,43 @@ # distribution, for details about the copyright. # -## Implements Nim's 'spawn'. +{.deprecated: "use the nimble packages `malebolgia`, `taskpools` or `weave` instead".} + +## Implements Nim's `parallel & spawn statements <manual_experimental.html#parallel-amp-spawn>`_. +## +## Unstable API. +## +## See also +## ======== +## * `threads module <typedthreads.html>`_ for basic thread support +## * `locks module <locks.html>`_ for locks and condition variables +## * `asyncdispatch module <asyncdispatch.html>`_ for asynchronous IO when not compileOption("threads"): {.error: "Threadpool requires --threads:on option.".} -import cpuinfo, cpuload, locks +import std/[cpuinfo, cpuload, locks, os] + +when defined(nimPreviewSlimSystem): + import std/[assertions, typedthreads, sysatomics] {.push stackTrace:off.} type Semaphore = object - c: TCond - L: TLock + c: Cond + L: Lock counter: int -proc createSemaphore(): Semaphore = - initCond(result.c) - initLock(result.L) +proc initSemaphore(cv: var Semaphore) = + initCond(cv.c) + initLock(cv.L) proc destroySemaphore(cv: var Semaphore) {.inline.} = deinitCond(cv.c) deinitLock(cv.L) -proc await(cv: var Semaphore) = +proc blockUntil(cv: var Semaphore) = acquire(cv.L) while cv.counter <= 0: wait(cv.c, cv.L) @@ -43,45 +56,42 @@ proc signal(cv: var Semaphore) = release(cv.L) signal(cv.c) -const CacheLineSize = 32 # true for most archs +const CacheLineSize = 64 # true for most archs type - Barrier {.compilerProc.} = object + Barrier {.compilerproc.} = object entered: int cv: Semaphore # Semaphore takes 3 words at least - when sizeof(int) < 8: - cacheAlign: array[CacheLineSize-4*sizeof(int), byte] - left: int - cacheAlign2: array[CacheLineSize-sizeof(int), byte] - interest: bool ## wether the master is interested in the "all done" event + left {.align(CacheLineSize).}: int + interest {.align(CacheLineSize).} : bool # whether the master is interested in the "all done" event -proc barrierEnter(b: ptr Barrier) {.compilerProc, inline.} = +proc barrierEnter(b: ptr Barrier) {.compilerproc, inline.} = # due to the signaling between threads, it is ensured we are the only # one with access to 'entered' so we don't need 'atomicInc' here: inc b.entered # also we need no 'fence' instructions here as soon 'nimArgsPassingDone' # will be called which already will perform a fence for us. -proc barrierLeave(b: ptr Barrier) {.compilerProc, inline.} = +proc barrierLeave(b: ptr Barrier) {.compilerproc, inline.} = atomicInc b.left when not defined(x86): fence() # We may not have seen the final value of b.entered yet, # so we need to check for >= instead of ==. if b.interest and b.left >= b.entered: signal(b.cv) -proc openBarrier(b: ptr Barrier) {.compilerProc, inline.} = +proc openBarrier(b: ptr Barrier) {.compilerproc, inline.} = b.entered = 0 b.left = 0 b.interest = false -proc closeBarrier(b: ptr Barrier) {.compilerProc.} = +proc closeBarrier(b: ptr Barrier) {.compilerproc.} = fence() if b.left != b.entered: - b.cv = createSemaphore() + b.cv.initSemaphore() fence() b.interest = true fence() - while b.left != b.entered: await(b.cv) + while b.left != b.entered: blockUntil(b.cv) destroySemaphore(b.cv) {.pop.} @@ -89,31 +99,28 @@ proc closeBarrier(b: ptr Barrier) {.compilerProc.} = # ---------------------------------------------------------------------------- type - foreign* = object ## a region that indicates the pointer comes from a - ## foreign thread heap. AwaitInfo = object cv: Semaphore idx: int - FlowVarBase* = ref FlowVarBaseObj ## untyped base class for 'FlowVar[T]' - FlowVarBaseObj = object of RootObj + FlowVarBase* = ref FlowVarBaseObj ## Untyped base class for `FlowVar[T] <#FlowVar>`_. + FlowVarBaseObj {.acyclic.} = object of RootObj ready, usesSemaphore, awaited: bool - cv: Semaphore #\ - # for 'awaitAny' support + cv: Semaphore # for 'blockUntilAny' support ai: ptr AwaitInfo idx: int data: pointer # we incRef and unref it to keep it alive; note this MUST NOT # be RootRef here otherwise the wrong GC keeps track of it! owner: pointer # ptr Worker - FlowVarObj[T] = object of FlowVarBaseObj + FlowVarObj[T] {.acyclic.} = object of FlowVarBaseObj blob: T - FlowVar*{.compilerProc.}[T] = ref FlowVarObj[T] ## a data flow variable + FlowVar*[T] {.compilerproc.} = ref FlowVarObj[T] ## A data flow variable. ToFreeQueue = object len: int - lock: TLock + lock: Lock empty: Semaphore data: array[128, pointer] @@ -128,13 +135,17 @@ type initialized: bool # whether it has even been initialized shutdown: bool # the pool requests to shut down this worker thread q: ToFreeQueue + readyForTask: Semaphore + +const threadpoolWaitMs {.intdefine.}: int = 100 -proc await*(fv: FlowVarBase) = - ## waits until the value for the flowVar arrives. Usually it is not necessary - ## to call this explicitly. +proc blockUntil*(fv: var FlowVarBaseObj) = + ## Waits until the value for `fv` arrives. + ## + ## Usually it is not necessary to call this explicitly. if fv.usesSemaphore and not fv.awaited: fv.awaited = true - await(fv.cv) + blockUntil(fv.cv) destroySemaphore(fv.cv) proc selectWorker(w: ptr Worker; fn: WorkerProc; data: pointer): bool = @@ -142,13 +153,13 @@ proc selectWorker(w: ptr Worker; fn: WorkerProc; data: pointer): bool = w.data = data w.f = fn signal(w.taskArrived) - await(w.taskStarted) + blockUntil(w.taskStarted) result = true proc cleanFlowVars(w: ptr Worker) = let q = addr(w.q) acquire(q.lock) - for i in 0 .. <q.len: + for i in 0 ..< q.len: GC_unref(cast[RootRef](q.data[i])) #echo "GC_unref" q.len = 0 @@ -167,12 +178,21 @@ proc wakeupWorkerToProcessQueue(w: ptr Worker) = signal(w.q.empty) signal(w.taskArrived) -proc finished(fv: FlowVarBase) = - doAssert fv.ai.isNil, "flowVar is still attached to an 'awaitAny'" +proc attach(fv: FlowVarBase; i: int): bool = + acquire(fv.cv.L) + if fv.cv.counter <= 0: + fv.idx = i + result = true + else: + result = false + release(fv.cv.L) + +proc finished(fv: var FlowVarBaseObj) = + doAssert fv.ai.isNil, "flowVar is still attached to an 'blockUntilAny'" # we have to protect against the rare cases where the owner of the flowVar # simply disregards the flowVar and yet the "flowVar" has not yet written # anything to it: - await(fv) + blockUntil(fv) if fv.data.isNil: return let owner = cast[ptr Worker](fv.owner) let q = addr(owner.q) @@ -181,23 +201,27 @@ proc finished(fv: FlowVarBase) = #echo "EXHAUSTED!" release(q.lock) wakeupWorkerToProcessQueue(owner) - await(q.empty) + blockUntil(q.empty) acquire(q.lock) q.data[q.len] = cast[pointer](fv.data) inc q.len release(q.lock) fv.data = nil + # the worker thread waits for "data" to be set to nil before shutting down + owner.data = nil -proc fvFinalizer[T](fv: FlowVar[T]) = finished(fv) +proc `=destroy`[T](fv: var FlowVarObj[T]) = + finished(fv) + `=destroy`(fv.blob) -proc nimCreateFlowVar[T](): FlowVar[T] {.compilerProc.} = - new(result, fvFinalizer) +proc nimCreateFlowVar[T](): FlowVar[T] {.compilerproc.} = + new(result) -proc nimFlowVarCreateSemaphore(fv: FlowVarBase) {.compilerProc.} = - fv.cv = createSemaphore() +proc nimFlowVarCreateSemaphore(fv: FlowVarBase) {.compilerproc.} = + fv.cv.initSemaphore() fv.usesSemaphore = true -proc nimFlowVarSignal(fv: FlowVarBase) {.compilerProc.} = +proc nimFlowVarSignal(fv: FlowVarBase) {.compilerproc.} = if fv.ai != nil: acquire(fv.ai.cv.L) fv.ai.idx = fv.idx @@ -208,169 +232,295 @@ proc nimFlowVarSignal(fv: FlowVarBase) {.compilerProc.} = signal(fv.cv) proc awaitAndThen*[T](fv: FlowVar[T]; action: proc (x: T) {.closure.}) = - ## blocks until the ``fv`` is available and then passes its value - ## to ``action``. Note that due to Nim's parameter passing semantics this - ## means that ``T`` doesn't need to be copied and so ``awaitAndThen`` can - ## sometimes be more efficient than ``^``. - await(fv) - when T is string or T is seq: + ## Blocks until `fv` is available and then passes its value + ## to `action`. + ## + ## Note that due to Nim's parameter passing semantics, this + ## means that `T` doesn't need to be copied, so `awaitAndThen` can + ## sometimes be more efficient than the `^ proc <#^,FlowVar[T]>`_. + blockUntil(fv[]) + when defined(nimV2): + action(fv.blob) + elif T is string or T is seq: action(cast[T](fv.data)) elif T is ref: {.error: "'awaitAndThen' not available for FlowVar[ref]".} else: action(fv.blob) - finished(fv) + finished(fv[]) -proc `^`*[T](fv: FlowVar[ref T]): foreign ptr T = - ## blocks until the value is available and then returns this value. - await(fv) - result = cast[foreign ptr T](fv.data) +proc unsafeRead*[T](fv: FlowVar[ref T]): ptr T = + ## Blocks until the value is available and then returns this value. + blockUntil(fv[]) + when defined(nimV2): + result = cast[ptr T](fv.blob) + else: + result = cast[ptr T](fv.data) + finished(fv[]) proc `^`*[T](fv: FlowVar[T]): T = - ## blocks until the value is available and then returns this value. - await(fv) - when T is string or T is seq: - # XXX closures? deepCopy? - result = cast[T](fv.data) + ## Blocks until the value is available and then returns this value. + blockUntil(fv[]) + when not defined(nimV2) and (T is string or T is seq or T is ref): + deepCopy result, cast[T](fv.data) else: result = fv.blob - -proc awaitAny*(flowVars: openArray[FlowVarBase]): int = - ## awaits any of the given flowVars. Returns the index of one flowVar for - ## which a value arrived. A flowVar only supports one call to 'awaitAny' at - ## the same time. That means if you await([a,b]) and await([b,c]) the second - ## call will only await 'c'. If there is no flowVar left to be able to wait - ## on, -1 is returned. - ## **Note**: This results in non-deterministic behaviour and so should be - ## avoided. + finished(fv[]) + +proc blockUntilAny*(flowVars: openArray[FlowVarBase]): int = + ## Awaits any of the given `flowVars`. Returns the index of one `flowVar` + ## for which a value arrived. + ## + ## A `flowVar` only supports one call to `blockUntilAny` at the same time. + ## That means if you `blockUntilAny([a,b])` and `blockUntilAny([b,c])` + ## the second call will only block until `c`. If there is no `flowVar` left + ## to be able to wait on, -1 is returned. + ## + ## **Note:** This results in non-deterministic behaviour and should be avoided. var ai: AwaitInfo - ai.cv = createSemaphore() + ai.cv.initSemaphore() var conflicts = 0 + result = -1 for i in 0 .. flowVars.high: if cas(addr flowVars[i].ai, nil, addr ai): - flowVars[i].idx = i + if not attach(flowVars[i], i): + result = i + break else: inc conflicts if conflicts < flowVars.len: - await(ai.cv) - result = ai.idx + if result < 0: + blockUntil(ai.cv) + result = ai.idx for i in 0 .. flowVars.high: discard cas(addr flowVars[i].ai, addr ai, nil) - else: - result = -1 destroySemaphore(ai.cv) -proc nimArgsPassingDone(p: pointer) {.compilerProc.} = +proc isReady*(fv: FlowVarBase): bool = + ## Determines whether the specified `FlowVarBase`'s value is available. + ## + ## If `true`, awaiting `fv` will not block. + if fv.usesSemaphore and not fv.awaited: + acquire(fv.cv.L) + result = fv.cv.counter > 0 + release(fv.cv.L) + else: + result = true + +proc nimArgsPassingDone(p: pointer) {.compilerproc.} = let w = cast[ptr Worker](p) signal(w.taskStarted) const - MaxThreadPoolSize* = 256 ## maximal size of the thread pool. 256 threads - ## should be good enough for anybody ;-) + MaxThreadPoolSize* {.intdefine.} = 256 ## Maximum size of the thread pool. 256 threads + ## should be good enough for anybody ;-) + MaxDistinguishedThread* {.intdefine.} = 32 ## Maximum number of "distinguished" threads. + +type + ThreadId* = range[0..MaxDistinguishedThread-1] ## A thread identifier. var currentPoolSize: int maxPoolSize = MaxThreadPoolSize minPoolSize = 4 - gSomeReady = createSemaphore() + gSomeReady: Semaphore readyWorker: ptr Worker +# A workaround for recursion deadlock issue +# https://github.com/nim-lang/Nim/issues/4597 +var + numSlavesLock: Lock + numSlavesRunning {.guard: numSlavesLock.}: int + numSlavesWaiting {.guard: numSlavesLock.}: int + isSlave {.threadvar.}: bool + +numSlavesLock.initLock + +gSomeReady.initSemaphore() + proc slave(w: ptr Worker) {.thread.} = + isSlave = true while true: + if w.shutdown: + w.shutdown = false + atomicDec currentPoolSize + while true: + if w.data != nil: + sleep(threadpoolWaitMs) + else: + # The flowvar finalizer ("finished()") set w.data to nil, so we can + # safely terminate the thread. + # + # TODO: look for scenarios in which the flowvar is never finalized, so + # a shut down thread gets stuck in this loop until the main thread exits. + break + break when declared(atomicStoreN): atomicStoreN(addr(w.ready), true, ATOMIC_SEQ_CST) else: w.ready = true readyWorker = w signal(gSomeReady) - await(w.taskArrived) + blockUntil(w.taskArrived) + # XXX Somebody needs to look into this (why does this assertion fail + # in Visual Studio?) + when not defined(vcc) and not defined(tcc): assert(not w.ready) + + withLock numSlavesLock: + inc numSlavesRunning + + w.f(w, w.data) + + withLock numSlavesLock: + dec numSlavesRunning + + if w.q.len != 0: w.cleanFlowVars + +proc distinguishedSlave(w: ptr Worker) {.thread.} = + while true: + when declared(atomicStoreN): + atomicStoreN(addr(w.ready), true, ATOMIC_SEQ_CST) + else: + w.ready = true + signal(w.readyForTask) + blockUntil(w.taskArrived) assert(not w.ready) w.f(w, w.data) if w.q.len != 0: w.cleanFlowVars - if w.shutdown: - w.shutdown = false - atomicDec currentPoolSize var - workers: array[MaxThreadPoolSize, TThread[ptr Worker]] + workers: array[MaxThreadPoolSize, Thread[ptr Worker]] workersData: array[MaxThreadPoolSize, Worker] + distinguished: array[MaxDistinguishedThread, Thread[ptr Worker]] + distinguishedData: array[MaxDistinguishedThread, Worker] + +when defined(nimPinToCpu): + var gCpus: Natural + proc setMinPoolSize*(size: range[1..MaxThreadPoolSize]) = - ## sets the minimal thread pool size. The default value of this is 4. + ## Sets the minimum thread pool size. The default value of this is 4. minPoolSize = size proc setMaxPoolSize*(size: range[1..MaxThreadPoolSize]) = - ## sets the minimal thread pool size. The default value of this - ## is ``MaxThreadPoolSize``. + ## Sets the maximum thread pool size. The default value of this + ## is `MaxThreadPoolSize <#MaxThreadPoolSize>`_. maxPoolSize = size if currentPoolSize > maxPoolSize: for i in maxPoolSize..currentPoolSize-1: let w = addr(workersData[i]) w.shutdown = true -proc activateThread(i: int) {.noinline.} = - workersData[i].taskArrived = createSemaphore() - workersData[i].taskStarted = createSemaphore() +when defined(nimRecursiveSpawn): + var localThreadId {.threadvar.}: int + +proc activateWorkerThread(i: int) {.noinline.} = + workersData[i].taskArrived.initSemaphore() + workersData[i].taskStarted.initSemaphore() workersData[i].initialized = true - workersData[i].q.empty = createSemaphore() + workersData[i].q.empty.initSemaphore() initLock(workersData[i].q.lock) createThread(workers[i], slave, addr(workersData[i])) + when defined(nimRecursiveSpawn): + localThreadId = i+1 + when defined(nimPinToCpu): + if gCpus > 0: pinToCpu(workers[i], i mod gCpus) + +proc activateDistinguishedThread(i: int) {.noinline.} = + distinguishedData[i].taskArrived.initSemaphore() + distinguishedData[i].taskStarted.initSemaphore() + distinguishedData[i].initialized = true + distinguishedData[i].q.empty.initSemaphore() + initLock(distinguishedData[i].q.lock) + distinguishedData[i].readyForTask.initSemaphore() + createThread(distinguished[i], distinguishedSlave, addr(distinguishedData[i])) proc setup() = - currentPoolSize = min(countProcessors(), MaxThreadPoolSize) + let p = countProcessors() + when defined(nimPinToCpu): + gCpus = p + currentPoolSize = min(p, MaxThreadPoolSize) readyWorker = addr(workersData[0]) - for i in 0.. <currentPoolSize: activateThread(i) + for i in 0..<currentPoolSize: activateWorkerThread(i) proc preferSpawn*(): bool = - ## Use this proc to determine quickly if a 'spawn' or a direct call is - ## preferable. If it returns 'true' a 'spawn' may make sense. In general - ## it is not necessary to call this directly; use 'spawnX' instead. + ## Use this proc to determine quickly if a `spawn` or a direct call is + ## preferable. + ## + ## If it returns `true`, a `spawn` may make sense. In general + ## it is not necessary to call this directly; use the `spawnX template + ## <#spawnX.t>`_ instead. result = gSomeReady.counter > 0 -proc spawn*(call: expr): expr {.magic: "Spawn".} - ## always spawns a new task, so that the 'call' is never executed on - ## the calling thread. 'call' has to be proc call 'p(...)' where 'p' - ## is gcsafe and has a return type that is either 'void' or compatible - ## with ``FlowVar[T]``. - -template spawnX*(call: expr): expr = - ## spawns a new task if a CPU core is ready, otherwise executes the - ## call in the calling thread. Usually it is advised to - ## use 'spawn' in order to not block the producer for an unknown - ## amount of time. 'call' has to be proc call 'p(...)' where 'p' - ## is gcsafe and has a return type that is either 'void' or compatible - ## with ``FlowVar[T]``. +proc spawn*(call: sink typed) {.magic: "Spawn".} = + ## Always spawns a new task, so that the `call` is never executed on + ## the calling thread. + ## + ## `call` has to be a proc call `p(...)` where `p` is gcsafe and has a + ## return type that is either `void` or compatible with `FlowVar[T]`. + discard "It uses `nimSpawn3` internally" + +proc pinnedSpawn*(id: ThreadId; call: sink typed) {.magic: "Spawn".} = + ## Always spawns a new task on the worker thread with `id`, so that + ## the `call` is **always** executed on the thread. + ## + ## `call` has to be a proc call `p(...)` where `p` is gcsafe and has a + ## return type that is either `void` or compatible with `FlowVar[T]`. + discard "It uses `nimSpawn4` internally" + +template spawnX*(call) = + ## Spawns a new task if a CPU core is ready, otherwise executes the + ## call in the calling thread. + ## + ## Usually, it is advised to use the `spawn proc <#spawn,sinktyped>`_ + ## in order to not block the producer for an unknown amount of time. + ## + ## `call` has to be a proc call `p(...)` where `p` is gcsafe and has a + ## return type that is either 'void' or compatible with `FlowVar[T]`. (if preferSpawn(): spawn call else: call) -proc parallel*(body: stmt) {.magic: "Parallel".} - ## a parallel section can be used to execute a block in parallel. ``body`` - ## has to be in a DSL that is a particular subset of the language. Please - ## refer to the manual for further information. +proc parallel*(body: untyped) {.magic: "Parallel".} + ## A parallel section can be used to execute a block in parallel. + ## + ## `body` has to be in a DSL that is a particular subset of the language. + ## + ## Please refer to `the manual <manual_experimental.html#parallel-amp-spawn>`_ + ## for further information. var state: ThreadPoolState - stateLock: TLock + stateLock: Lock initLock stateLock -proc nimSpawn(fn: WorkerProc; data: pointer) {.compilerProc.} = +proc nimSpawn3(fn: WorkerProc; data: pointer) {.compilerproc.} = # implementation of 'spawn' that is used by the code generator. while true: if selectWorker(readyWorker, fn, data): return - for i in 0.. <currentPoolSize: + for i in 0..<currentPoolSize: if selectWorker(addr(workersData[i]), fn, data): return + # determine what to do, but keep in mind this is expensive too: # state.calls < maxPoolSize: warmup phase # (state.calls and 127) == 0: periodic check if state.calls < maxPoolSize or (state.calls and 127) == 0: # ensure the call to 'advice' is atomic: if tryAcquire(stateLock): + if currentPoolSize < minPoolSize: + if not workersData[currentPoolSize].initialized: + activateWorkerThread(currentPoolSize) + let w = addr(workersData[currentPoolSize]) + atomicInc currentPoolSize + if selectWorker(w, fn, data): + release(stateLock) + return + case advice(state) of doNothing: discard of doCreateThread: if currentPoolSize < maxPoolSize: if not workersData[currentPoolSize].initialized: - activateThread(currentPoolSize) + activateWorkerThread(currentPoolSize) let w = addr(workersData[currentPoolSize]) atomicInc currentPoolSize if selectWorker(w, fn, data): @@ -385,17 +535,72 @@ proc nimSpawn(fn: WorkerProc; data: pointer) {.compilerProc.} = release(stateLock) # else the acquire failed, but this means some # other thread succeeded, so we don't need to do anything here. - await(gSomeReady) + when defined(nimRecursiveSpawn): + if localThreadId > 0: + # we are a worker thread, so instead of waiting for something which + # might as well never happen (see tparallel_quicksort), we run the task + # on the current thread instead. + var self = addr(workersData[localThreadId-1]) + fn(self, data) + blockUntil(self.taskStarted) + return + + if isSlave: + # Run under lock until `numSlavesWaiting` increment to avoid a + # race (otherwise two last threads might start waiting together) + withLock numSlavesLock: + if numSlavesRunning <= numSlavesWaiting + 1: + # All the other slaves are waiting + # If we wait now, we-re deadlocked until + # an external spawn happens ! + if currentPoolSize < maxPoolSize: + if not workersData[currentPoolSize].initialized: + activateWorkerThread(currentPoolSize) + let w = addr(workersData[currentPoolSize]) + atomicInc currentPoolSize + if selectWorker(w, fn, data): + return + else: + # There is no place in the pool. We're deadlocked. + # echo "Deadlock!" + discard + + inc numSlavesWaiting + + blockUntil(gSomeReady) + + if isSlave: + withLock numSlavesLock: + dec numSlavesWaiting + +var + distinguishedLock: Lock + +initLock distinguishedLock + +proc nimSpawn4(fn: WorkerProc; data: pointer; id: ThreadId) {.compilerproc.} = + acquire(distinguishedLock) + if not distinguishedData[id].initialized: + activateDistinguishedThread(id) + release(distinguishedLock) + while true: + if selectWorker(addr(distinguishedData[id]), fn, data): break + blockUntil(distinguishedData[id].readyForTask) + proc sync*() = - ## a simple barrier to wait for all spawn'ed tasks. If you need more elaborate - ## waiting, you have to use an explicit barrier. + ## A simple barrier to wait for all `spawn`ed tasks. + ## + ## If you need more elaborate waiting, you have to use an explicit barrier. while true: var allReady = true - for i in 0 .. <currentPoolSize: + for i in 0 ..< currentPoolSize: if not allReady: break allReady = allReady and workersData[i].ready if allReady: break - await(gSomeReady) + sleep(threadpoolWaitMs) + # We cannot "blockUntil(gSomeReady)" because workers may be shut down between + # the time we establish that some are not "ready" and the time we wait for a + # "signal(gSomeReady)" from inside "slave()" that can never come. setup() diff --git a/lib/pure/cookies.nim b/lib/pure/cookies.nim index 6247efed2..f628aaf6b 100644 --- a/lib/pure/cookies.nim +++ b/lib/pure/cookies.nim @@ -9,53 +9,73 @@ ## This module implements helper procs for parsing Cookies. -import strtabs, times +import std/[strtabs, times, options] + +when defined(nimPreviewSlimSystem): + import std/assertions + + +type + SameSite* {.pure.} = enum ## The SameSite cookie attribute. + ## `Default` means that `setCookie` + ## proc will not set `SameSite` attribute. + Default, None, Lax, Strict + +proc parseCookies*(s: string): StringTableRef = + ## Parses cookies into a string table. + ## + ## The proc is meant to parse the Cookie header set by a client, not the + ## "Set-Cookie" header set by servers. + runnableExamples: + import std/strtabs + let cookieJar = parseCookies("a=1; foo=bar") + assert cookieJar["a"] == "1" + assert cookieJar["foo"] == "bar" -proc parseCookies*(s: string): StringTableRef = - ## parses cookies into a string table. result = newStringTable(modeCaseInsensitive) var i = 0 while true: - while s[i] == ' ' or s[i] == '\t': inc(i) + while i < s.len and (s[i] == ' ' or s[i] == '\t'): inc(i) var keystart = i - while s[i] != '=' and s[i] != '\0': inc(i) + while i < s.len and s[i] != '=': inc(i) var keyend = i-1 - if s[i] == '\0': break + if i >= s.len: break inc(i) # skip '=' var valstart = i - while s[i] != ';' and s[i] != '\0': inc(i) + while i < s.len and s[i] != ';': inc(i) result[substr(s, keystart, keyend)] = substr(s, valstart, i-1) - if s[i] == '\0': break + if i >= s.len: break inc(i) # skip ';' proc setCookie*(key, value: string, domain = "", path = "", expires = "", noName = false, - secure = false, httpOnly = false): string = - ## Creates a command in the format of - ## ``Set-Cookie: key=value; Domain=...; ...`` + secure = false, httpOnly = false, + maxAge = none(int), sameSite = SameSite.Default): string = + ## Creates a command in the format of + ## `Set-Cookie: key=value; Domain=...; ...` + ## + ## .. tip:: Cookies can be vulnerable. Consider setting `secure=true`, `httpOnly=true` and `sameSite=Strict`. result = "" if not noName: result.add("Set-Cookie: ") result.add key & "=" & value if domain != "": result.add("; Domain=" & domain) if path != "": result.add("; Path=" & path) if expires != "": result.add("; Expires=" & expires) - if secure: result.add("; secure") + if secure: result.add("; Secure") if httpOnly: result.add("; HttpOnly") + if maxAge.isSome: result.add("; Max-Age=" & $maxAge.unsafeGet) -proc setCookie*(key, value: string, expires: TimeInfo, - domain = "", path = "", noName = false, - secure = false, httpOnly = false): string = - ## Creates a command in the format of - ## ``Set-Cookie: key=value; Domain=...; ...`` - ## - ## **Note:** UTC is assumed as the timezone for ``expires``. - return setCookie(key, value, domain, path, - format(expires, "ddd',' dd MMM yyyy HH:mm:ss 'UTC'"), - noname, secure, httpOnly) + if sameSite != SameSite.Default: + if sameSite == SameSite.None: + doAssert secure, "Cookies with SameSite=None must specify the Secure attribute!" + result.add("; SameSite=" & $sameSite) -when isMainModule: - var tim = Time(int(getTime()) + 76 * (60 * 60 * 24)) - - echo(setCookie("test", "value", tim.getGMTime())) - - echo parseCookies("uid=1; kp=2") +proc setCookie*(key, value: string, expires: DateTime|Time, + domain = "", path = "", noName = false, + secure = false, httpOnly = false, + maxAge = none(int), sameSite = SameSite.Default): string = + ## Creates a command in the format of + ## `Set-Cookie: key=value; Domain=...; ...` + result = setCookie(key, value, domain, path, + format(expires.utc, "ddd',' dd MMM yyyy HH:mm:ss 'GMT'"), + noName, secure, httpOnly, maxAge, sameSite) diff --git a/lib/pure/coro.nim b/lib/pure/coro.nim new file mode 100644 index 000000000..24836e316 --- /dev/null +++ b/lib/pure/coro.nim @@ -0,0 +1,345 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2015 Rokas Kupstys +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Nim coroutines implementation, supports several context switching methods: +## ======== ============ +## ucontext available on unix and alike (default) +## setjmp available on unix and alike (x86/64 only) +## fibers available and required on windows. +## ======== ============ +## +## -d:nimCoroutines Required to build this module. +## -d:nimCoroutinesUcontext Use ucontext backend. +## -d:nimCoroutinesSetjmp Use setjmp backend. +## -d:nimCoroutinesSetjmpBundled Use bundled setjmp implementation. +## +## Unstable API. + +import system/coro_detection + +when not nimCoroutines and not defined(nimdoc): + when defined(noNimCoroutines): + {.error: "Coroutines can not be used with -d:noNimCoroutines".} + else: + {.error: "Coroutines require -d:nimCoroutines".} + +import std/[os, lists] +include system/timers + +when defined(nimPreviewSlimSystem): + import std/assertions + +const defaultStackSize = 512 * 1024 +const useOrcArc = defined(gcArc) or defined(gcOrc) or defined(gcAtomicArc) + +when useOrcArc: + proc nimGC_setStackBottom*(theStackBottom: pointer) = discard + +proc GC_addStack(bottom: pointer) {.cdecl, importc.} +proc GC_removeStack(bottom: pointer) {.cdecl, importc.} +proc GC_setActiveStack(bottom: pointer) {.cdecl, importc.} +proc GC_getActiveStack() : pointer {.cdecl, importc.} + +const + CORO_BACKEND_UCONTEXT = 0 + CORO_BACKEND_SETJMP = 1 + CORO_BACKEND_FIBERS = 2 + +when defined(windows): + const coroBackend = CORO_BACKEND_FIBERS + when defined(nimCoroutinesUcontext): + {.warning: "ucontext coroutine backend is not available on windows, defaulting to fibers.".} + when defined(nimCoroutinesSetjmp): + {.warning: "setjmp coroutine backend is not available on windows, defaulting to fibers.".} +elif defined(haiku) or defined(openbsd): + const coroBackend = CORO_BACKEND_SETJMP + when defined(nimCoroutinesUcontext): + {.warning: "ucontext coroutine backend is not available on haiku, defaulting to setjmp".} +elif defined(nimCoroutinesSetjmp) or defined(nimCoroutinesSetjmpBundled): + const coroBackend = CORO_BACKEND_SETJMP +else: + const coroBackend = CORO_BACKEND_UCONTEXT + +when coroBackend == CORO_BACKEND_FIBERS: + import std/winlean + type + Context = pointer + +elif coroBackend == CORO_BACKEND_UCONTEXT: + type + stack_t {.importc, header: "<ucontext.h>".} = object + ss_sp: pointer + ss_flags: int + ss_size: int + + ucontext_t {.importc, header: "<ucontext.h>".} = object + uc_link: ptr ucontext_t + uc_stack: stack_t + + Context = ucontext_t + + proc getcontext(context: var ucontext_t): int32 {.importc, + header: "<ucontext.h>".} + proc setcontext(context: var ucontext_t): int32 {.importc, + header: "<ucontext.h>".} + proc swapcontext(fromCtx, toCtx: var ucontext_t): int32 {.importc, + header: "<ucontext.h>".} + proc makecontext(context: var ucontext_t, fn: pointer, argc: int32) {.importc, + header: "<ucontext.h>", varargs.} + +elif coroBackend == CORO_BACKEND_SETJMP: + proc coroExecWithStack*(fn: pointer, stack: pointer) {.noreturn, + importc: "narch_$1", fastcall.} + when defined(amd64): + {.compile: "../arch/x86/amd64.S".} + elif defined(i386): + {.compile: "../arch/x86/i386.S".} + else: + # coroExecWithStack is defined in assembly. To support other platforms + # please provide implementation of this procedure. + {.error: "Unsupported architecture.".} + + when defined(nimCoroutinesSetjmpBundled): + # Use setjmp/longjmp implementation shipped with compiler. + when defined(amd64): + type + JmpBuf = array[0x50 + 0x10, uint8] + elif defined(i386): + type + JmpBuf = array[0x1C, uint8] + else: + # Bundled setjmp/longjmp are defined in assembly. To support other + # platforms please provide implementations of these procedures. + {.error: "Unsupported architecture.".} + + proc setjmp(ctx: var JmpBuf): int {.importc: "narch_$1".} + proc longjmp(ctx: JmpBuf, ret = 1) {.importc: "narch_$1".} + else: + # Use setjmp/longjmp implementation provided by the system. + type + JmpBuf {.importc: "jmp_buf", header: "<setjmp.h>".} = object + + proc setjmp(ctx: var JmpBuf): int {.importc, header: "<setjmp.h>".} + proc longjmp(ctx: JmpBuf, ret = 1) {.importc, header: "<setjmp.h>".} + + type + Context = JmpBuf + +when defined(unix): + # GLibc fails with "*** longjmp causes uninitialized stack frame ***" because + # our custom stacks are not initialized to a magic value. + when defined(osx): + # workaround: error: The deprecated ucontext routines require _XOPEN_SOURCE to be defined + const extra = " -D_XOPEN_SOURCE" + else: + const extra = "" + {.passc: "-U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0" & extra.} + +const + CORO_CREATED = 0 + CORO_EXECUTING = 1 + CORO_FINISHED = 2 + +type + Stack {.pure.} = object + top: pointer # Top of the stack. Pointer used for deallocating stack if we own it. + bottom: pointer # Very bottom of the stack, acts as unique stack identifier. + size: int + + Coroutine {.pure.} = object + execContext: Context + fn: proc() + state: int + lastRun: Ticks + sleepTime: float + stack: Stack + reference: CoroutineRef + + CoroutinePtr = ptr Coroutine + + CoroutineRef* = ref object + ## CoroutineRef holds a pointer to actual coroutine object. Public API always returns + ## CoroutineRef instead of CoroutinePtr in order to allow holding a reference to coroutine + ## object while it can be safely deallocated by coroutine scheduler loop. In this case + ## Coroutine.reference.coro is set to nil. Public API checks for it being nil and + ## gracefully fails if it is nil. + coro: CoroutinePtr + + CoroutineLoopContext = ref object + coroutines: DoublyLinkedList[CoroutinePtr] + current: DoublyLinkedNode[CoroutinePtr] + loop: Coroutine + ncbottom: pointer # non coroutine stack botttom + +var ctx {.threadvar.}: CoroutineLoopContext + +proc getCurrent(): CoroutinePtr = + ## Returns current executing coroutine object. + var node = ctx.current + if node != nil: + return node.value + return nil + +proc initialize() = + ## Initializes coroutine state of current thread. + if ctx == nil: + ctx = CoroutineLoopContext() + ctx.coroutines = initDoublyLinkedList[CoroutinePtr]() + ctx.loop = Coroutine() + ctx.loop.state = CORO_EXECUTING + when not useOrcArc: + ctx.ncbottom = GC_getActiveStack() + when coroBackend == CORO_BACKEND_FIBERS: + ctx.loop.execContext = ConvertThreadToFiberEx(nil, FIBER_FLAG_FLOAT_SWITCH) + +proc runCurrentTask() + +proc switchTo(current, to: CoroutinePtr) = + ## Switches execution from `current` into `to` context. + to.lastRun = getTicks() + # Update position of current stack so gc invoked from another stack knows how much to scan. + when not useOrcArc: + GC_setActiveStack(current.stack.bottom) + nimGC_setStackBottom(current.stack.bottom) + var frame = getFrameState() + block: + # Execution will switch to another fiber now. We do not need to update current stack + when coroBackend == CORO_BACKEND_FIBERS: + SwitchToFiber(to.execContext) + elif coroBackend == CORO_BACKEND_UCONTEXT: + discard swapcontext(current.execContext, to.execContext) + elif coroBackend == CORO_BACKEND_SETJMP: + var res = setjmp(current.execContext) + if res == 0: + if to.state == CORO_EXECUTING: + # Coroutine is resumed. + longjmp(to.execContext, 1) + elif to.state == CORO_CREATED: + # Coroutine is started. + coroExecWithStack(runCurrentTask, to.stack.bottom) + #raiseAssert "unreachable" + else: + {.error: "Invalid coroutine backend set.".} + # Execution was just resumed. Restore frame information and set active stack. + setFrameState(frame) + when not useOrcArc: + GC_setActiveStack(current.stack.bottom) + nimGC_setStackBottom(ctx.ncbottom) + +proc suspend*(sleepTime: float = 0) = + ## Stops coroutine execution and resumes no sooner than after `sleeptime` seconds. + ## Until then other coroutines are executed. + var current = getCurrent() + current.sleepTime = sleepTime + nimGC_setStackBottom(ctx.ncbottom) + switchTo(current, addr(ctx.loop)) + +proc runCurrentTask() = + ## Starts execution of current coroutine and updates it's state through coroutine's life. + var sp {.volatile.}: pointer + sp = addr(sp) + block: + var current = getCurrent() + current.stack.bottom = sp + nimGC_setStackBottom(current.stack.bottom) + # Execution of new fiber just started. Since it was entered not through `switchTo` we + # have to set active stack here as well. GC_removeStack() has to be called in main loop + # because we still need stack available in final suspend(0) call from which we will not + # return. + when not useOrcArc: + GC_addStack(sp) + # Activate current stack because we are executing in a new coroutine. + GC_setActiveStack(sp) + current.state = CORO_EXECUTING + try: + current.fn() # Start coroutine execution + except: + echo "Unhandled exception in coroutine." + writeStackTrace() + current.state = CORO_FINISHED + nimGC_setStackBottom(ctx.ncbottom) + suspend(0) # Exit coroutine without returning from coroExecWithStack() + raiseAssert "unreachable" + +proc start*(c: proc(), stacksize: int = defaultStackSize): CoroutineRef {.discardable.} = + ## Schedule coroutine for execution. It does not run immediately. + if ctx == nil: + initialize() + + var coro: CoroutinePtr + when coroBackend == CORO_BACKEND_FIBERS: + coro = cast[CoroutinePtr](alloc0(sizeof(Coroutine))) + coro.execContext = CreateFiberEx(stacksize, stacksize, + FIBER_FLAG_FLOAT_SWITCH, + (proc(p: pointer) {.stdcall.} = runCurrentTask()), nil) + else: + coro = cast[CoroutinePtr](alloc0(sizeof(Coroutine) + stacksize)) + coro.stack.top = cast[pointer](cast[int](coro) + sizeof(Coroutine)) + coro.stack.bottom = cast[pointer](cast[int](coro.stack.top) + stacksize) + when coroBackend == CORO_BACKEND_UCONTEXT: + discard getcontext(coro.execContext) + coro.execContext.uc_stack.ss_sp = coro.stack.top + coro.execContext.uc_stack.ss_size = stacksize + coro.execContext.uc_link = addr(ctx.loop.execContext) + makecontext(coro.execContext, runCurrentTask, 0) + coro.fn = c + coro.stack.size = stacksize + coro.state = CORO_CREATED + coro.reference = CoroutineRef(coro: coro) + ctx.coroutines.append(coro) + return coro.reference + +proc run*() = + ## Starts main coroutine scheduler loop which exits when all coroutines exit. + ## Calling this proc starts execution of first coroutine. + initialize() + ctx.current = ctx.coroutines.head + var minDelay: float = 0 + while ctx.current != nil: + var current = getCurrent() + + var remaining = current.sleepTime - (float(getTicks() - current.lastRun) / 1_000_000_000) + if remaining <= 0: + # Save main loop context. Suspending coroutine will resume after this statement with + switchTo(addr(ctx.loop), current) + else: + if minDelay > 0 and remaining > 0: + minDelay = min(remaining, minDelay) + else: + minDelay = remaining + + if current.state == CORO_FINISHED: + var next = ctx.current.prev + if next == nil: + # If first coroutine ends then `prev` is nil even if more coroutines + # are to be scheduled. + next = ctx.current.next + current.reference.coro = nil + ctx.coroutines.remove(ctx.current) + when not useOrcArc: + GC_removeStack(current.stack.bottom) + when coroBackend == CORO_BACKEND_FIBERS: + DeleteFiber(current.execContext) + else: + dealloc(current.stack.top) + dealloc(current) + ctx.current = next + elif ctx.current == nil or ctx.current.next == nil: + ctx.current = ctx.coroutines.head + os.sleep(int(minDelay * 1000)) + else: + ctx.current = ctx.current.next + +proc alive*(c: CoroutineRef): bool = c.coro != nil and c.coro.state != CORO_FINISHED + ## Returns `true` if coroutine has not returned, `false` otherwise. + +proc wait*(c: CoroutineRef, interval = 0.01) = + ## Returns only after coroutine `c` has returned. `interval` is time in seconds how often. + while alive(c): + suspend(interval) diff --git a/lib/pure/coro.nimcfg b/lib/pure/coro.nimcfg new file mode 100644 index 000000000..b011bc585 --- /dev/null +++ b/lib/pure/coro.nimcfg @@ -0,0 +1 @@ +-d:nimCoroutines diff --git a/lib/pure/cstrutils.nim b/lib/pure/cstrutils.nim new file mode 100644 index 000000000..c907e54d8 --- /dev/null +++ b/lib/pure/cstrutils.nim @@ -0,0 +1,122 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2017 Nim contributors +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module supports helper routines for working with `cstring` +## without having to convert `cstring` to `string`, in order to +## save allocations. +## +## See also +## ======== +## * `strutils module <strutils.html>`_ for working with `string` + +include system/inclrtl +import std/private/strimpl + + +when defined(js): + func jsStartsWith(s, prefix: cstring): bool {.importjs: "#.startsWith(#)".} + func jsEndsWith(s, suffix: cstring): bool {.importjs: "#.endsWith(#)".} + + +func startsWith*(s, prefix: cstring): bool {.rtl, extern: "csuStartsWith".} = + ## Returns true if `s` starts with `prefix`. + ## + ## The JS backend uses the native `String.prototype.startsWith` function. + runnableExamples: + assert startsWith(cstring"Hello, Nimion", cstring"Hello") + assert not startsWith(cstring"Hello, Nimion", cstring"Nimion") + assert startsWith(cstring"Hello", cstring"") + + when nimvm: + startsWithImpl(s, prefix) + else: + when defined(js): + result = jsStartsWith(s, prefix) + else: + var i = 0 + while true: + if prefix[i] == '\0': return true + if s[i] != prefix[i]: return false + inc(i) + +func endsWith*(s, suffix: cstring): bool {.rtl, extern: "csuEndsWith".} = + ## Returns true if `s` ends with `suffix`. + ## + ## The JS backend uses the native `String.prototype.endsWith` function. + runnableExamples: + assert endsWith(cstring"Hello, Nimion", cstring"Nimion") + assert not endsWith(cstring"Hello, Nimion", cstring"Hello") + assert endsWith(cstring"Hello", cstring"") + + when nimvm: + endsWithImpl(s, suffix) + else: + when defined(js): + result = jsEndsWith(s, suffix) + else: + let slen = s.len + var i = 0 + var j = slen - len(suffix) + while i + j <% slen: + if s[i + j] != suffix[i]: return false + inc(i) + if suffix[i] == '\0': return true + +func cmpIgnoreStyle*(a, b: cstring): int {.rtl, extern: "csuCmpIgnoreStyle".} = + ## Semantically the same as `cmp(normalize($a), normalize($b))`. It + ## is just optimized to not allocate temporary strings. This should + ## NOT be used to compare Nim identifier names, use `macros.eqIdent` + ## for that. Returns: + ## * 0 if `a == b` + ## * < 0 if `a < b` + ## * \> 0 if `a > b` + runnableExamples: + assert cmpIgnoreStyle(cstring"hello", cstring"H_e_L_Lo") == 0 + + when nimvm: + cmpIgnoreStyleImpl(a, b) + else: + when defined(js): + cmpIgnoreStyleImpl(a, b) + else: + var i = 0 + var j = 0 + while true: + while a[i] == '_': inc(i) + while b[j] == '_': inc(j) # BUGFIX: typo + var aa = toLowerAscii(a[i]) + var bb = toLowerAscii(b[j]) + result = ord(aa) - ord(bb) + if result != 0 or aa == '\0': break + inc(i) + inc(j) + +func cmpIgnoreCase*(a, b: cstring): int {.rtl, extern: "csuCmpIgnoreCase".} = + ## Compares two strings in a case insensitive manner. Returns: + ## * 0 if `a == b` + ## * < 0 if `a < b` + ## * \> 0 if `a > b` + runnableExamples: + assert cmpIgnoreCase(cstring"hello", cstring"HeLLo") == 0 + assert cmpIgnoreCase(cstring"echo", cstring"hello") < 0 + assert cmpIgnoreCase(cstring"yellow", cstring"hello") > 0 + + when nimvm: + cmpIgnoreCaseImpl(a, b) + else: + when defined(js): + cmpIgnoreCaseImpl(a, b) + else: + var i = 0 + while true: + var aa = toLowerAscii(a[i]) + var bb = toLowerAscii(b[i]) + result = ord(aa) - ord(bb) + if result != 0 or aa == '\0': break + inc(i) diff --git a/lib/pure/distros.nim b/lib/pure/distros.nim new file mode 100644 index 000000000..9e71d4ce0 --- /dev/null +++ b/lib/pure/distros.nim @@ -0,0 +1,279 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2016 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements the basics for Linux distribution ("distro") +## detection and the OS's native package manager. Its primary purpose is to +## produce output for Nimble packages, like: +## +## To complete the installation, run: +## +## sudo apt-get install libblas-dev +## sudo apt-get install libvoodoo +## +## The above output could be the result of a code snippet like: +## +## ```nim +## if detectOs(Ubuntu): +## foreignDep "lbiblas-dev" +## foreignDep "libvoodoo" +## ``` +## +## See `packaging <packaging.html>`_ for hints on distributing Nim using OS packages. + +from std/strutils import contains, toLowerAscii + +when not defined(nimscript): + from std/osproc import execProcess + from std/envvars import existsEnv + +type + Distribution* {.pure.} = enum ## the list of known distributions + Windows ## some version of Windows + Posix ## some POSIX system + MacOSX ## some version of OSX + Linux ## some version of Linux + Ubuntu + Debian + Gentoo + Fedora + RedHat + + OpenSUSE + Manjaro + Elementary + Zorin + CentOS + Deepin + ArchLinux + Artix + Antergos + PCLinuxOS + Mageia + LXLE + Solus + Lite + Slackware + Androidx86 + Puppy + Peppermint + Tails + AntiX + Kali + SparkyLinux + Apricity + BlackLab + Bodhi + TrueOS + ArchBang + KaOS + WattOS + Korora + Simplicity + RemixOS + OpenMandriva + Netrunner + Alpine + BlackArch + Ultimate + Gecko + Parrot + KNOPPIX + GhostBSD + Sabayon + Salix + Q4OS + ClearOS + Container + ROSA + Zenwalk + Parabola + ChaletOS + BackBox + MXLinux + Vector + Maui + Qubes + RancherOS + Oracle + TinyCore + Robolinux + Trisquel + Voyager + Clonezilla + SteamOS + Absolute + NixOS ## NixOS or a Nix build environment + AUSTRUMI + Arya + Porteus + AVLinux + Elive + Bluestar + SliTaz + Solaris + Chakra + Wifislax + Scientific + ExTiX + Rockstor + GoboLinux + Void + + BSD + FreeBSD + NetBSD + OpenBSD + DragonFlyBSD + + Haiku + + +const + LacksDevPackages* = {Distribution.Gentoo, Distribution.Slackware, + Distribution.ArchLinux, Distribution.Artix, Distribution.Antergos, + Distribution.BlackArch, Distribution.ArchBang} + +# we cache the result of the 'cmdRelease' +# execution for faster platform detections. +var unameRes, osReleaseIDRes, releaseRes, hostnamectlRes: string + +template cmdRelease(cmd, cache): untyped = + if cache.len == 0: + cache = (when defined(nimscript): gorge(cmd) else: execProcess(cmd)) + cache + +template uname(): untyped = cmdRelease("uname -a", unameRes) +template osReleaseID(): untyped = cmdRelease("cat /etc/os-release | grep ^ID=", osReleaseIDRes) +template release(): untyped = cmdRelease("lsb_release -d", releaseRes) +template hostnamectl(): untyped = cmdRelease("hostnamectl", hostnamectlRes) + +proc detectOsWithAllCmd(d: Distribution): bool = + let dd = toLowerAscii($d) + result = dd in toLowerAscii(osReleaseID()) or dd in toLowerAscii(release()) or + dd in toLowerAscii(uname()) or ("operating system: " & dd) in + toLowerAscii(hostnamectl()) + +proc detectOsImpl(d: Distribution): bool = + case d + of Distribution.Windows: result = defined(windows) + of Distribution.Posix: result = defined(posix) + of Distribution.MacOSX: result = defined(macosx) + of Distribution.Linux: result = defined(linux) + of Distribution.BSD: result = defined(bsd) + else: + when defined(bsd): + case d + of Distribution.FreeBSD, Distribution.NetBSD, Distribution.OpenBSD: + result = $d in uname() + else: + result = false + elif defined(linux): + case d + of Distribution.Gentoo: + result = ("-" & $d & " ") in uname() + of Distribution.Elementary, Distribution.Ubuntu, Distribution.Debian, + Distribution.Fedora, Distribution.OpenMandriva, Distribution.CentOS, + Distribution.Alpine, Distribution.Mageia, Distribution.Zorin, Distribution.Void: + result = toLowerAscii($d) in osReleaseID() + of Distribution.RedHat: + result = "rhel" in osReleaseID() + of Distribution.ArchLinux: + result = "arch" in osReleaseID() + of Distribution.Artix: + result = "artix" in osReleaseID() + of Distribution.NixOS: + # Check if this is a Nix build or NixOS environment + result = existsEnv("NIX_BUILD_TOP") or existsEnv("__NIXOS_SET_ENVIRONMENT_DONE") + of Distribution.OpenSUSE: + result = "suse" in toLowerAscii(uname()) or "suse" in toLowerAscii(release()) + of Distribution.GoboLinux: + result = "-Gobo " in uname() + of Distribution.Solaris: + let uname = toLowerAscii(uname()) + result = ("sun" in uname) or ("solaris" in uname) + of Distribution.Haiku: + result = defined(haiku) + else: + result = detectOsWithAllCmd(d) + else: + result = false + +template detectOs*(d: untyped): bool = + ## Distro/OS detection. For convenience, the + ## required `Distribution.` qualifier is added to the + ## enum value. + detectOsImpl(Distribution.d) + +when not defined(nimble): + var foreignDeps*: seq[string] = @[] ## Registered foreign deps. + +proc foreignCmd*(cmd: string; requiresSudo = false) = + ## Registers a foreign command to the internal list of commands + ## that can be queried later. + let c = (if requiresSudo: "sudo " else: "") & cmd + when defined(nimble): + nimscriptapi.foreignDeps.add(c) + else: + foreignDeps.add(c) + +proc foreignDepInstallCmd*(foreignPackageName: string): (string, bool) = + ## Returns the distro's native command to install `foreignPackageName` + ## and whether it requires root/admin rights. + let p = foreignPackageName + when defined(windows): + result = ("choco install " & p, false) + elif defined(bsd): + result = ("ports install " & p, true) + elif defined(linux): + if detectOs(Ubuntu) or detectOs(Elementary) or detectOs(Debian) or + detectOs(KNOPPIX) or detectOs(SteamOS): + result = ("apt-get install " & p, true) + elif detectOs(Gentoo): + result = ("emerge install " & p, true) + elif detectOs(Fedora): + result = ("yum install " & p, true) + elif detectOs(RedHat): + result = ("rpm install " & p, true) + elif detectOs(OpenSUSE): + result = ("yast -i " & p, true) + elif detectOs(Slackware): + result = ("installpkg " & p, true) + elif detectOs(OpenMandriva): + result = ("urpmi " & p, true) + elif detectOs(ZenWalk): + result = ("netpkg install " & p, true) + elif detectOs(NixOS): + result = ("nix-env -i " & p, false) + elif detectOs(Solaris) or detectOs(FreeBSD): + result = ("pkg install " & p, true) + elif detectOs(NetBSD) or detectOs(OpenBSD): + result = ("pkg_add " & p, true) + elif detectOs(PCLinuxOS): + result = ("rpm -ivh " & p, true) + elif detectOs(ArchLinux) or detectOs(Manjaro) or detectOs(Artix): + result = ("pacman -S " & p, true) + elif detectOs(Void): + result = ("xbps-install " & p, true) + else: + result = ("<your package manager here> install " & p, true) + elif defined(haiku): + result = ("pkgman install " & p, true) + else: + result = ("brew install " & p, false) + +proc foreignDep*(foreignPackageName: string) = + ## Registers `foreignPackageName` to the internal list of foreign deps. + ## It is your job to ensure that the package name is correct. + let (installCmd, sudo) = foreignDepInstallCmd(foreignPackageName) + foreignCmd(installCmd, sudo) + +proc echoForeignDeps*() = + ## Writes the list of registered foreign deps to stdout. + for d in foreignDeps: + echo d diff --git a/lib/pure/dynlib.nim b/lib/pure/dynlib.nim index c6794be67..a162fe37f 100644 --- a/lib/pure/dynlib.nim +++ b/lib/pure/dynlib.nim @@ -8,43 +8,95 @@ # ## This module implements the ability to access symbols from shared -## libraries. On POSIX this uses the ``dlsym`` mechanism, on -## Windows ``LoadLibrary``. +## libraries. On POSIX this uses the `dlsym` mechanism, on +## Windows `LoadLibrary`. +## +## Examples +## ======== +## +## Loading a simple C function +## --------------------------- +## +## The following example demonstrates loading a function called `greet` +## from a library that is determined at runtime based upon a language choice. +## If the library fails to load or the function `greet` is not found, +## it quits with a failure error code. +## +runnableExamples: + type + GreetFunction = proc (): cstring {.gcsafe, stdcall.} -type - LibHandle* = pointer ## a handle to a dynamically loaded library + proc loadGreet(lang: string) = + let lib = + case lang + of "french": + loadLib("french.dll") + else: + loadLib("english.dll") + assert lib != nil, "Error loading library" + + let greet = cast[GreetFunction](lib.symAddr("greet")) + assert greet != nil, "Error loading 'greet' function from library" + + echo greet() -{.deprecated: [TLibHandle: LibHandle].} + unloadLib(lib) + + +import std/strutils + +type + LibHandle* = pointer ## A handle to a dynamically loaded library. -proc loadLib*(path: string, global_symbols=false): LibHandle - ## loads a library from `path`. Returns nil if the library could not +proc loadLib*(path: string, globalSymbols = false): LibHandle {.gcsafe.} + ## Loads a library from `path`. Returns nil if the library could not ## be loaded. -proc loadLib*(): LibHandle - ## gets the handle from the current executable. Returns nil if the +proc loadLib*(): LibHandle {.gcsafe.} + ## Gets the handle from the current executable. Returns nil if the ## library could not be loaded. -proc unloadLib*(lib: LibHandle) - ## unloads the library `lib` +proc unloadLib*(lib: LibHandle) {.gcsafe.} + ## Unloads the library `lib`. proc raiseInvalidLibrary*(name: cstring) {.noinline, noreturn.} = - ## raises an `EInvalidLibrary` exception. - var e: ref LibraryError - new(e) - e.msg = "could not find symbol: " & $name - raise e - -proc symAddr*(lib: LibHandle, name: cstring): pointer - ## retrieves the address of a procedure/variable from `lib`. Returns nil + ## Raises a `LibraryError` exception. + raise newException(LibraryError, "could not find symbol: " & $name) + +proc symAddr*(lib: LibHandle, name: cstring): pointer {.gcsafe.} + ## Retrieves the address of a procedure/variable from `lib`. Returns nil ## if the symbol could not be found. proc checkedSymAddr*(lib: LibHandle, name: cstring): pointer = - ## retrieves the address of a procedure/variable from `lib`. Raises - ## `EInvalidLibrary` if the symbol could not be found. + ## Retrieves the address of a procedure/variable from `lib`. Raises + ## `LibraryError` if the symbol could not be found. result = symAddr(lib, name) if result == nil: raiseInvalidLibrary(name) -when defined(posix): +proc libCandidates*(s: string, dest: var seq[string]) = + ## Given a library name pattern `s`, write possible library names to `dest`. + var le = strutils.find(s, '(') + var ri = strutils.find(s, ')', le+1) + if le >= 0 and ri > le: + var prefix = substr(s, 0, le - 1) + var suffix = substr(s, ri + 1) + for middle in split(substr(s, le + 1, ri - 1), '|'): + libCandidates(prefix & middle & suffix, dest) + else: + add(dest, s) + +proc loadLibPattern*(pattern: string, globalSymbols = false): LibHandle = + ## Loads a library with name matching `pattern`, similar to what the `dynlib` + ## pragma does. Returns nil if the library could not be loaded. + ## + ## .. warning:: this proc uses the GC and so cannot be used to load the GC. + var candidates = newSeq[string]() + libCandidates(pattern, candidates) + for c in candidates: + result = loadLib(c, globalSymbols) + if not result.isNil: break + +when defined(posix) and not defined(nintendoswitch): # # ========================================================================= # This is an implementation based on the dlfcn interface. @@ -53,24 +105,66 @@ when defined(posix): # as an emulation layer on top of native functions. # ========================================================================= # - var - RTLD_NOW {.importc: "RTLD_NOW", header: "<dlfcn.h>".}: int - RTLD_GLOBAL {.importc: "RTLD_GLOBAL", header: "<dlfcn.h>".}: int - - proc dlclose(lib: LibHandle) {.importc, header: "<dlfcn.h>".} - proc dlopen(path: cstring, mode: int): LibHandle {. - importc, header: "<dlfcn.h>".} - proc dlsym(lib: LibHandle, name: cstring): pointer {. - importc, header: "<dlfcn.h>".} - - proc loadLib(path: string, global_symbols=false): LibHandle = - var flags = RTLD_NOW - if global_symbols: flags = flags or RTLD_GLOBAL - return dlopen(path, flags) - proc loadLib(): LibHandle = return dlopen(nil, RTLD_NOW) - proc unloadLib(lib: LibHandle) = dlclose(lib) - proc symAddr(lib: LibHandle, name: cstring): pointer = - return dlsym(lib, name) + import std/posix + + proc loadLib(path: string, globalSymbols = false): LibHandle = + let flags = + if globalSymbols: RTLD_NOW or RTLD_GLOBAL + else: RTLD_NOW + + dlopen(path, flags) + + proc loadLib(): LibHandle = dlopen(nil, RTLD_NOW) + proc unloadLib(lib: LibHandle) = discard dlclose(lib) + proc symAddr(lib: LibHandle, name: cstring): pointer = dlsym(lib, name) + +elif defined(nintendoswitch): + # + # ========================================================================= + # Nintendo switch DevkitPro sdk does not have these. Raise an error if called. + # ========================================================================= + # + + proc dlclose(lib: LibHandle) = + raise newException(OSError, "dlclose not implemented on Nintendo Switch!") + proc dlopen(path: cstring, mode: int): LibHandle = + raise newException(OSError, "dlopen not implemented on Nintendo Switch!") + proc dlsym(lib: LibHandle, name: cstring): pointer = + raise newException(OSError, "dlsym not implemented on Nintendo Switch!") + proc loadLib(path: string, global_symbols = false): LibHandle = + raise newException(OSError, "loadLib not implemented on Nintendo Switch!") + proc loadLib(): LibHandle = + raise newException(OSError, "loadLib not implemented on Nintendo Switch!") + proc unloadLib(lib: LibHandle) = + raise newException(OSError, "unloadLib not implemented on Nintendo Switch!") + proc symAddr(lib: LibHandle, name: cstring): pointer = + raise newException(OSError, "symAddr not implemented on Nintendo Switch!") + +elif defined(genode): + # + # ========================================================================= + # Not implemented for Genode without POSIX. Raise an error if called. + # ========================================================================= + # + + template raiseErr(prc: string) = + raise newException(OSError, prc & " not implemented, compile with POSIX support") + + proc dlclose(lib: LibHandle) = + raiseErr(OSError, "dlclose") + proc dlopen(path: cstring, mode: int): LibHandle = + raiseErr(OSError, "dlopen") + proc dlsym(lib: LibHandle, name: cstring): pointer = + raiseErr(OSError, "dlsym") + proc loadLib(path: string, global_symbols = false): LibHandle = + raiseErr(OSError, "loadLib") + proc loadLib(): LibHandle = + raiseErr(OSError, "loadLib") + proc unloadLib(lib: LibHandle) = + raiseErr(OSError, "unloadLib") + proc symAddr(lib: LibHandle, name: cstring): pointer = + raiseErr(OSError, "symAddr") + elif defined(windows) or defined(dos): # @@ -79,22 +173,23 @@ elif defined(windows) or defined(dos): # ======================================================================= # type - THINSTANCE {.importc: "HINSTANCE".} = pointer + HMODULE {.importc: "HMODULE".} = pointer + FARPROC {.importc: "FARPROC".} = pointer - proc FreeLibrary(lib: THINSTANCE) {.importc, header: "<windows.h>", stdcall.} - proc winLoadLibrary(path: cstring): THINSTANCE {. + proc FreeLibrary(lib: HMODULE) {.importc, header: "<windows.h>", stdcall.} + proc winLoadLibrary(path: cstring): HMODULE {. importc: "LoadLibraryA", header: "<windows.h>", stdcall.} - proc getProcAddress(lib: THINSTANCE, name: cstring): pointer {. + proc getProcAddress(lib: HMODULE, name: cstring): FARPROC {. importc: "GetProcAddress", header: "<windows.h>", stdcall.} - proc loadLib(path: string, global_symbols=false): LibHandle = + proc loadLib(path: string, globalSymbols = false): LibHandle = result = cast[LibHandle](winLoadLibrary(path)) proc loadLib(): LibHandle = result = cast[LibHandle](winLoadLibrary(nil)) - proc unloadLib(lib: LibHandle) = FreeLibrary(cast[THINSTANCE](lib)) + proc unloadLib(lib: LibHandle) = FreeLibrary(cast[HMODULE](lib)) proc symAddr(lib: LibHandle, name: cstring): pointer = - result = getProcAddress(cast[THINSTANCE](lib), name) + result = cast[pointer](getProcAddress(cast[HMODULE](lib), name)) else: {.error: "no implementation for dynlib".} diff --git a/lib/pure/encodings.nim b/lib/pure/encodings.nim index 25c7ad9ef..bbadca655 100644 --- a/lib/pure/encodings.nim +++ b/lib/pure/encodings.nim @@ -7,15 +7,46 @@ # distribution, for details about the copyright. # -## Converts between different character encodings. On UNIX, this uses +## Routines for converting between different character encodings. On UNIX, this uses ## the `iconv`:idx: library, on Windows the Windows API. +## +## The following example shows how to change character encodings. +runnableExamples: + when defined(windows): + let + orig = "öäüß" + # convert `orig` from "UTF-8" to "CP1252" + cp1252 = convert(orig, "CP1252", "UTF-8") + # convert `cp1252` from "CP1252" to "ibm850" + ibm850 = convert(cp1252, "ibm850", "CP1252") + current = getCurrentEncoding() + assert orig == "\195\182\195\164\195\188\195\159" + assert ibm850 == "\148\132\129\225" + assert convert(ibm850, current, "ibm850") == orig + +## The example below uses a reuseable `EncodingConverter` object which is +## created by `open` with `destEncoding` and `srcEncoding` specified. You can use +## `convert` on this object multiple times. +runnableExamples: + when defined(windows): + var fromGB2312 = open("utf-8", "gb2312") + let first = "\203\173\197\194\163\191\210\187" & + "\203\242\209\204\211\234\200\206\198\189\201\250" + assert fromGB2312.convert(first) == "谁怕?一蓑烟雨任平生" + + let second = "\211\208\176\215\205\183\200\231" & + "\208\194\163\172\199\227\184\199\200\231\185\202" + assert fromGB2312.convert(second) == "有白头如新,倾盖如故" + -import os, parseutils, strutils +import std/os +when defined(nimPreviewSlimSystem): + import std/assertions when not defined(windows): type ConverterObj = object - EncodingConverter* = ptr ConverterObj ## can convert between two character sets + EncodingConverter* = ptr ConverterObj ## Can convert between two character sets. else: type @@ -24,216 +55,220 @@ else: dest, src: CodePage type - EncodingError* = object of ValueError ## exception that is raised - ## for encoding errors - -{.deprecated: [EInvalidEncoding: EncodingError, PConverter: EncodingConverter].} + EncodingError* = object of ValueError ## Exception that is raised + ## for encoding errors. when defined(windows): + import std/[parseutils, strutils] proc eqEncodingNames(a, b: string): bool = var i = 0 var j = 0 while i < a.len and j < b.len: if a[i] in {'-', '_'}: inc i if b[j] in {'-', '_'}: inc j - if a[i].toLower != b[j].toLower: return false + if i < a.len and j < b.len and + a[i].toLowerAscii != b[j].toLowerAscii: + return false inc i inc j result = i == a.len and j == b.len - const + const winEncodings = [ - (1, "OEMCP"), # current OEM codepage - (037, "IBM037"), # IBM EBCDIC US-Canada - (437, "IBM437"), # OEM United States - (500, "IBM500"), # IBM EBCDIC International - (708, "ASMO-708"), # Arabic (ASMO 708) - (709, "ASMO_449"), # Arabic (ASMO-449+, BCON V4) - (710, ""), # Arabic - Transparent Arabic - (720, "DOS-720"), # Arabic (Transparent ASMO); Arabic (DOS) - (737, "ibm737"), # OEM Greek (formerly 437G); Greek (DOS) - (775, "ibm775"), # OEM Baltic; Baltic (DOS) - (850, "ibm850"), # OEM Multilingual Latin 1; Western European (DOS) - (852, "ibm852"), # OEM Latin 2; Central European (DOS) - (855, "IBM855"), # OEM Cyrillic (primarily Russian) - (857, "ibm857"), # OEM Turkish; Turkish (DOS) - (858, "IBM00858"), # OEM Multilingual Latin 1 + Euro symbol - (860, "IBM860"), # OEM Portuguese; Portuguese (DOS) - (861, "ibm861"), # OEM Icelandic; Icelandic (DOS) - (862, "DOS-862"), # OEM Hebrew; Hebrew (DOS) - (863, "IBM863"), # OEM French Canadian; French Canadian (DOS) - (864, "IBM864"), # OEM Arabic; Arabic (864) - (865, "IBM865"), # OEM Nordic; Nordic (DOS) - (866, "cp866"), # OEM Russian; Cyrillic (DOS) - (869, "ibm869"), # OEM Modern Greek; Greek, Modern (DOS) - (870, "IBM870"), # IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2 - (874, "windows-874"), # ANSI/OEM Thai (same as 28605, ISO 8859-15); Thai (Windows) - (875, "cp875"), # IBM EBCDIC Greek Modern - (932, "shift_jis"), # ANSI/OEM Japanese; Japanese (Shift-JIS) - (936, "gb2312"), # ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312) - (949, "ks_c_5601-1987"), # ANSI/OEM Korean (Unified Hangul Code) - (950, "big5"), # ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) - (1026, "IBM1026"), # IBM EBCDIC Turkish (Latin 5) - (1047, "IBM01047"), # IBM EBCDIC Latin 1/Open System - (1140, "IBM01140"), # IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro) - (1141, "IBM01141"), # IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro) - (1142, "IBM01142"), # IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro) - (1143, "IBM01143"), # IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro) - (1144, "IBM01144"), # IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro) - (1145, "IBM01145"), # IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro) - (1146, "IBM01146"), # IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro) - (1147, "IBM01147"), # IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro) - (1148, "IBM01148"), # IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro) - (1149, "IBM01149"), # IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro) - (1200, "utf-16"), # Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications - (1201, "unicodeFFFE"), # Unicode UTF-16, big endian byte order; available only to managed applications - (1250, "windows-1250"), # ANSI Central European; Central European (Windows) - (1251, "windows-1251"), # ANSI Cyrillic; Cyrillic (Windows) - (1252, "windows-1252"), # ANSI Latin 1; Western European (Windows) - (1253, "windows-1253"), # ANSI Greek; Greek (Windows) - (1254, "windows-1254"), # ANSI Turkish; Turkish (Windows) - (1255, "windows-1255"), # ANSI Hebrew; Hebrew (Windows) - (1256, "windows-1256"), # ANSI Arabic; Arabic (Windows) - (1257, "windows-1257"), # ANSI Baltic; Baltic (Windows) - (1258, "windows-1258"), # ANSI/OEM Vietnamese; Vietnamese (Windows) - - (1250, "cp-1250"), # ANSI Central European; Central European (Windows) - (1251, "cp-1251"), # ANSI Cyrillic; Cyrillic (Windows) - (1252, "cp-1252"), # ANSI Latin 1; Western European (Windows) - (1253, "cp-1253"), # ANSI Greek; Greek (Windows) - (1254, "cp-1254"), # ANSI Turkish; Turkish (Windows) - (1255, "cp-1255"), # ANSI Hebrew; Hebrew (Windows) - (1256, "cp-1256"), # ANSI Arabic; Arabic (Windows) - (1257, "cp-1257"), # ANSI Baltic; Baltic (Windows) - (1258, "cp-1258"), # ANSI/OEM Vietnamese; Vietnamese (Windows) - - (1361, "Johab"), # Korean (Johab) - (10000, "macintosh"), # MAC Roman; Western European (Mac) - (10001, "x-mac-japanese"), # Japanese (Mac) - (10002, "x-mac-chinesetrad"), # MAC Traditional Chinese (Big5); Chinese Traditional (Mac) - (10003, "x-mac-korean"), # Korean (Mac) - (10004, "x-mac-arabic"), # Arabic (Mac) - (10005, "x-mac-hebrew"), # Hebrew (Mac) - (10006, "x-mac-greek"), # Greek (Mac) - (10007, "x-mac-cyrillic"), # Cyrillic (Mac) - (10008, "x-mac-chinesesimp"), # MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac) - (10010, "x-mac-romanian"), # Romanian (Mac) - (10017, "x-mac-ukrainian"), # Ukrainian (Mac) - (10021, "x-mac-thai"), # Thai (Mac) - (10029, "x-mac-ce"), # MAC Latin 2; Central European (Mac) - (10079, "x-mac-icelandic"), # Icelandic (Mac) - (10081, "x-mac-turkish"), # Turkish (Mac) - (10082, "x-mac-croatian"), # Croatian (Mac) - (12000, "utf-32"), # Unicode UTF-32, little endian byte order; available only to managed applications - (12001, "utf-32BE"), # Unicode UTF-32, big endian byte order; available only to managed applications - (20000, "x-Chinese_CNS"), # CNS Taiwan; Chinese Traditional (CNS) - (20001, "x-cp20001"), # TCA Taiwan - (20002, "x_Chinese-Eten"), # Eten Taiwan; Chinese Traditional (Eten) - (20003, "x-cp20003"), # IBM5550 Taiwan - (20004, "x-cp20004"), # TeleText Taiwan - (20005, "x-cp20005"), # Wang Taiwan - (20105, "x-IA5"), # IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5) - (20106, "x-IA5-German"), # IA5 German (7-bit) - (20107, "x-IA5-Swedish"), # IA5 Swedish (7-bit) - (20108, "x-IA5-Norwegian"), # IA5 Norwegian (7-bit) - (20127, "us-ascii"), # US-ASCII (7-bit) - (20261, "x-cp20261"), # T.61 - (20269, "x-cp20269"), # ISO 6937 Non-Spacing Accent - (20273, "IBM273"), # IBM EBCDIC Germany - (20277, "IBM277"), # IBM EBCDIC Denmark-Norway - (20278, "IBM278"), # IBM EBCDIC Finland-Sweden - (20280, "IBM280"), # IBM EBCDIC Italy - (20284, "IBM284"), # IBM EBCDIC Latin America-Spain - (20285, "IBM285"), # IBM EBCDIC United Kingdom - (20290, "IBM290"), # IBM EBCDIC Japanese Katakana Extended - (20297, "IBM297"), # IBM EBCDIC France - (20420, "IBM420"), # IBM EBCDIC Arabic - (20423, "IBM423"), # IBM EBCDIC Greek - (20424, "IBM424"), # IBM EBCDIC Hebrew - (20833, "x-EBCDIC-KoreanExtended"), # IBM EBCDIC Korean Extended - (20838, "IBM-Thai"), # IBM EBCDIC Thai - (20866, "koi8-r"), # Russian (KOI8-R); Cyrillic (KOI8-R) - (20871, "IBM871"), # IBM EBCDIC Icelandic - (20880, "IBM880"), # IBM EBCDIC Cyrillic Russian - (20905, "IBM905"), # IBM EBCDIC Turkish - (20924, "IBM00924"), # IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) - (20932, "EUC-JP"), # Japanese (JIS 0208-1990 and 0121-1990) - (20936, "x-cp20936"), # Simplified Chinese (GB2312); Chinese Simplified (GB2312-80) - (20949, "x-cp20949"), # Korean Wansung - (21025, "cp1025"), # IBM EBCDIC Cyrillic Serbian-Bulgarian - (21027, ""), # (deprecated) - (21866, "koi8-u"), # Ukrainian (KOI8-U); Cyrillic (KOI8-U) - (28591, "iso-8859-1"), # ISO 8859-1 Latin 1; Western European (ISO) - (28592, "iso-8859-2"), # ISO 8859-2 Central European; Central European (ISO) - (28593, "iso-8859-3"), # ISO 8859-3 Latin 3 - (28594, "iso-8859-4"), # ISO 8859-4 Baltic - (28595, "iso-8859-5"), # ISO 8859-5 Cyrillic - (28596, "iso-8859-6"), # ISO 8859-6 Arabic - (28597, "iso-8859-7"), # ISO 8859-7 Greek - (28598, "iso-8859-8"), # ISO 8859-8 Hebrew; Hebrew (ISO-Visual) - (28599, "iso-8859-9"), # ISO 8859-9 Turkish - (28603, "iso-8859-13"), # ISO 8859-13 Estonian - (28605, "iso-8859-15"), # ISO 8859-15 Latin 9 - (29001, "x-Europa"), # Europa 3 - (38598, "iso-8859-8-i"), # ISO 8859-8 Hebrew; Hebrew (ISO-Logical) - (50220, "iso-2022-jp"), # ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) - (50221, "csISO2022JP"), # ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana) - (50222, "iso-2022-jp"), # ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI) - (50225, "iso-2022-kr"), # ISO 2022 Korean - (50227, "x-cp50227"), # ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022) - (50229, ""), # ISO 2022 Traditional Chinese - (50930, ""), # EBCDIC Japanese (Katakana) Extended - (50931, ""), # EBCDIC US-Canada and Japanese - (50933, ""), # EBCDIC Korean Extended and Korean - (50935, ""), # EBCDIC Simplified Chinese Extended and Simplified Chinese - (50936, ""), # EBCDIC Simplified Chinese - (50937, ""), # EBCDIC US-Canada and Traditional Chinese - (50939, ""), # EBCDIC Japanese (Latin) Extended and Japanese - (51932, "euc-jp"), # EUC Japanese - (51936, "EUC-CN"), # EUC Simplified Chinese; Chinese Simplified (EUC) - (51949, "euc-kr"), # EUC Korean - (51950, ""), # EUC Traditional Chinese - (52936, "hz-gb-2312"), # HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ) - (54936, "GB18030"), # Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030) - (57002, "x-iscii-de"), # ISCII Devanagari - (57003, "x-iscii-be"), # ISCII Bengali - (57004, "x-iscii-ta"), # ISCII Tamil - (57005, "x-iscii-te"), # ISCII Telugu - (57006, "x-iscii-as"), # ISCII Assamese - (57007, "x-iscii-or"), # ISCII Oriya - (57008, "x-iscii-ka"), # ISCII Kannada - (57009, "x-iscii-ma"), # ISCII Malayalam - (57010, "x-iscii-gu"), # ISCII Gujarati - (57011, "x-iscii-pa"), # ISCII Punjabi - (65000, "utf-7"), # Unicode (UTF-7) - (65001, "utf-8")] # Unicode (UTF-8) - + (1, "OEMCP"), # current OEM codepage + (037, "IBM037"), # IBM EBCDIC US-Canada + (437, "IBM437"), # OEM United States + (500, "IBM500"), # IBM EBCDIC International + (708, "ASMO-708"), # Arabic (ASMO 708) + (709, "ASMO_449"), # Arabic (ASMO-449+, BCON V4) + (710, ""), # Arabic - Transparent Arabic + (720, "DOS-720"), # Arabic (Transparent ASMO); Arabic (DOS) + (737, "ibm737"), # OEM Greek (formerly 437G); Greek (DOS) + (775, "ibm775"), # OEM Baltic; Baltic (DOS) + (850, "ibm850"), # OEM Multilingual Latin 1; Western European (DOS) + (852, "ibm852"), # OEM Latin 2; Central European (DOS) + (855, "IBM855"), # OEM Cyrillic (primarily Russian) + (857, "ibm857"), # OEM Turkish; Turkish (DOS) + (858, "IBM00858"), # OEM Multilingual Latin 1 + Euro symbol + (860, "IBM860"), # OEM Portuguese; Portuguese (DOS) + (861, "ibm861"), # OEM Icelandic; Icelandic (DOS) + (862, "DOS-862"), # OEM Hebrew; Hebrew (DOS) + (863, "IBM863"), # OEM French Canadian; French Canadian (DOS) + (864, "IBM864"), # OEM Arabic; Arabic (864) + (865, "IBM865"), # OEM Nordic; Nordic (DOS) + (866, "cp866"), # OEM Russian; Cyrillic (DOS) + (869, "ibm869"), # OEM Modern Greek; Greek, Modern (DOS) + (870, "IBM870"), # IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2 + (874, "windows-874"), # ANSI/OEM Thai (same as 28605, ISO 8859-15); Thai (Windows) + (875, "cp875"), # IBM EBCDIC Greek Modern + (932, "shift_jis"), # ANSI/OEM Japanese; Japanese (Shift-JIS) + (936, "gb2312"), # ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312) + (936, "gbk"), # Alias for GB2312 encoding + (949, "ks_c_5601-1987"), # ANSI/OEM Korean (Unified Hangul Code) + (950, "big5"), # ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) + (1026, "IBM1026"), # IBM EBCDIC Turkish (Latin 5) + (1047, "IBM01047"), # IBM EBCDIC Latin 1/Open System + (1140, "IBM01140"), # IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro) + (1141, "IBM01141"), # IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro) + (1142, "IBM01142"), # IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro) + (1143, "IBM01143"), # IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro) + (1144, "IBM01144"), # IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro) + (1145, "IBM01145"), # IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro) + (1146, "IBM01146"), # IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro) + (1147, "IBM01147"), # IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro) + (1148, "IBM01148"), # IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro) + (1149, "IBM01149"), # IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro) + (1200, "utf-16"), # Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications + (1201, "unicodeFFFE"), # Unicode UTF-16, big endian byte order; available only to managed applications + (1250, "windows-1250"), # ANSI Central European; Central European (Windows) + (1251, "windows-1251"), # ANSI Cyrillic; Cyrillic (Windows) + (1252, "windows-1252"), # ANSI Latin 1; Western European (Windows) + (1253, "windows-1253"), # ANSI Greek; Greek (Windows) + (1254, "windows-1254"), # ANSI Turkish; Turkish (Windows) + (1255, "windows-1255"), # ANSI Hebrew; Hebrew (Windows) + (1256, "windows-1256"), # ANSI Arabic; Arabic (Windows) + (1257, "windows-1257"), # ANSI Baltic; Baltic (Windows) + (1258, "windows-1258"), # ANSI/OEM Vietnamese; Vietnamese (Windows) + + (1250, "cp-1250"), # ANSI Central European; Central European (Windows) + (1251, "cp-1251"), # ANSI Cyrillic; Cyrillic (Windows) + (1252, "cp-1252"), # ANSI Latin 1; Western European (Windows) + (1253, "cp-1253"), # ANSI Greek; Greek (Windows) + (1254, "cp-1254"), # ANSI Turkish; Turkish (Windows) + (1255, "cp-1255"), # ANSI Hebrew; Hebrew (Windows) + (1256, "cp-1256"), # ANSI Arabic; Arabic (Windows) + (1257, "cp-1257"), # ANSI Baltic; Baltic (Windows) + (1258, "cp-1258"), # ANSI/OEM Vietnamese; Vietnamese (Windows) + + (1361, "Johab"), # Korean (Johab) + (10000, "macintosh"), # MAC Roman; Western European (Mac) + (10001, "x-mac-japanese"), # Japanese (Mac) + (10002, "x-mac-chinesetrad"), # MAC Traditional Chinese (Big5); Chinese Traditional (Mac) + (10003, "x-mac-korean"), # Korean (Mac) + (10004, "x-mac-arabic"), # Arabic (Mac) + (10005, "x-mac-hebrew"), # Hebrew (Mac) + (10006, "x-mac-greek"), # Greek (Mac) + (10007, "x-mac-cyrillic"), # Cyrillic (Mac) + (10008, "x-mac-chinesesimp"), # MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac) + (10010, "x-mac-romanian"), # Romanian (Mac) + (10017, "x-mac-ukrainian"), # Ukrainian (Mac) + (10021, "x-mac-thai"), # Thai (Mac) + (10029, "x-mac-ce"), # MAC Latin 2; Central European (Mac) + (10079, "x-mac-icelandic"), # Icelandic (Mac) + (10081, "x-mac-turkish"), # Turkish (Mac) + (10082, "x-mac-croatian"), # Croatian (Mac) + (12000, "utf-32"), # Unicode UTF-32, little endian byte order; available only to managed applications + (12001, "utf-32BE"), # Unicode UTF-32, big endian byte order; available only to managed applications + (20000, "x-Chinese_CNS"), # CNS Taiwan; Chinese Traditional (CNS) + (20001, "x-cp20001"), # TCA Taiwan + (20002, "x_Chinese-Eten"), # Eten Taiwan; Chinese Traditional (Eten) + (20003, "x-cp20003"), # IBM5550 Taiwan + (20004, "x-cp20004"), # TeleText Taiwan + (20005, "x-cp20005"), # Wang Taiwan + (20105, "x-IA5"), # IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5) + (20106, "x-IA5-German"), # IA5 German (7-bit) + (20107, "x-IA5-Swedish"), # IA5 Swedish (7-bit) + (20108, "x-IA5-Norwegian"), # IA5 Norwegian (7-bit) + (20127, "us-ascii"), # US-ASCII (7-bit) + (20261, "x-cp20261"), # T.61 + (20269, "x-cp20269"), # ISO 6937 Non-Spacing Accent + (20273, "IBM273"), # IBM EBCDIC Germany + (20277, "IBM277"), # IBM EBCDIC Denmark-Norway + (20278, "IBM278"), # IBM EBCDIC Finland-Sweden + (20280, "IBM280"), # IBM EBCDIC Italy + (20284, "IBM284"), # IBM EBCDIC Latin America-Spain + (20285, "IBM285"), # IBM EBCDIC United Kingdom + (20290, "IBM290"), # IBM EBCDIC Japanese Katakana Extended + (20297, "IBM297"), # IBM EBCDIC France + (20420, "IBM420"), # IBM EBCDIC Arabic + (20423, "IBM423"), # IBM EBCDIC Greek + (20424, "IBM424"), # IBM EBCDIC Hebrew + (20833, "x-EBCDIC-KoreanExtended"), # IBM EBCDIC Korean Extended + (20838, "IBM-Thai"), # IBM EBCDIC Thai + (20866, "koi8-r"), # Russian (KOI8-R); Cyrillic (KOI8-R) + (20871, "IBM871"), # IBM EBCDIC Icelandic + (20880, "IBM880"), # IBM EBCDIC Cyrillic Russian + (20905, "IBM905"), # IBM EBCDIC Turkish + (20924, "IBM00924"), # IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) + (20932, "EUC-JP"), # Japanese (JIS 0208-1990 and 0121-1990) + (20936, "x-cp20936"), # Simplified Chinese (GB2312); Chinese Simplified (GB2312-80) + (20949, "x-cp20949"), # Korean Wansung + (21025, "cp1025"), # IBM EBCDIC Cyrillic Serbian-Bulgarian + (21027, ""), # (deprecated) + (21866, "koi8-u"), # Ukrainian (KOI8-U); Cyrillic (KOI8-U) + (28591, "iso-8859-1"), # ISO 8859-1 Latin 1; Western European (ISO) + (28592, "iso-8859-2"), # ISO 8859-2 Central European; Central European (ISO) + (28593, "iso-8859-3"), # ISO 8859-3 Latin 3 + (28594, "iso-8859-4"), # ISO 8859-4 Baltic + (28595, "iso-8859-5"), # ISO 8859-5 Cyrillic + (28596, "iso-8859-6"), # ISO 8859-6 Arabic + (28597, "iso-8859-7"), # ISO 8859-7 Greek + (28598, "iso-8859-8"), # ISO 8859-8 Hebrew; Hebrew (ISO-Visual) + (28599, "iso-8859-9"), # ISO 8859-9 Turkish + (28603, "iso-8859-13"), # ISO 8859-13 Estonian + (28605, "iso-8859-15"), # ISO 8859-15 Latin 9 + (29001, "x-Europa"), # Europa 3 + (38598, "iso-8859-8-i"), # ISO 8859-8 Hebrew; Hebrew (ISO-Logical) + (50220, "iso-2022-jp"), # ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) + (50221, "csISO2022JP"), # ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana) + (50222, "iso-2022-jp"), # ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI) + (50225, "iso-2022-kr"), # ISO 2022 Korean + (50227, "x-cp50227"), # ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022) + (50229, ""), # ISO 2022 Traditional Chinese + (50930, ""), # EBCDIC Japanese (Katakana) Extended + (50931, ""), # EBCDIC US-Canada and Japanese + (50933, ""), # EBCDIC Korean Extended and Korean + (50935, ""), # EBCDIC Simplified Chinese Extended and Simplified Chinese + (50936, ""), # EBCDIC Simplified Chinese + (50937, ""), # EBCDIC US-Canada and Traditional Chinese + (50939, ""), # EBCDIC Japanese (Latin) Extended and Japanese + (51932, "euc-jp"), # EUC Japanese + (51936, "EUC-CN"), # EUC Simplified Chinese; Chinese Simplified (EUC) + (51949, "euc-kr"), # EUC Korean + (51950, ""), # EUC Traditional Chinese + (52936, "hz-gb-2312"), # HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ) + (54936, "GB18030"), # Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030) + (57002, "x-iscii-de"), # ISCII Devanagari + (57003, "x-iscii-be"), # ISCII Bengali + (57004, "x-iscii-ta"), # ISCII Tamil + (57005, "x-iscii-te"), # ISCII Telugu + (57006, "x-iscii-as"), # ISCII Assamese + (57007, "x-iscii-or"), # ISCII Oriya + (57008, "x-iscii-ka"), # ISCII Kannada + (57009, "x-iscii-ma"), # ISCII Malayalam + (57010, "x-iscii-gu"), # ISCII Gujarati + (57011, "x-iscii-pa"), # ISCII Punjabi + (65000, "utf-7"), # Unicode (UTF-7) + (65001, "utf-8")] # Unicode (UTF-8) + when false: # not needed yet: type - TCpInfo = object + CpInfo = object maxCharSize: int32 defaultChar: array[0..1, char] leadByte: array[0..12-1, char] - proc getCPInfo(codePage: CodePage, lpCPInfo: var TCpInfo): int32 {. + proc getCPInfo(codePage: CodePage, lpCPInfo: var CpInfo): int32 {. stdcall, importc: "GetCPInfo", dynlib: "kernel32".} - - proc nameToCodePage(name: string): CodePage = + + proc nameToCodePage*(name: string): CodePage = var nameAsInt: int if parseInt(name, nameAsInt) == 0: nameAsInt = -1 for no, na in items(winEncodings): if no == nameAsInt or eqEncodingNames(na, name): return CodePage(no) result = CodePage(-1) - - proc codePageToName(c: CodePage): string = + + proc codePageToName*(c: CodePage): string = for no, na in items(winEncodings): if no == int(c): return if na.len != 0: na else: $no result = "" - + proc getACP(): CodePage {.stdcall, importc: "GetACP", dynlib: "kernel32".} - + proc getGetConsoleCP(): CodePage {.stdcall, importc: "GetConsoleCP", + dynlib: "kernel32".} + proc multiByteToWideChar( codePage: CodePage, dwFlags: int32, @@ -250,23 +285,18 @@ when defined(windows): cchWideChar: cint, lpMultiByteStr: cstring, cbMultiByte: cint, - lpDefaultChar: cstring=nil, - lpUsedDefaultChar: pointer=nil): cint {. + lpDefaultChar: cstring = nil, + lpUsedDefaultChar: pointer = nil): cint {. stdcall, importc: "WideCharToMultiByte", dynlib: "kernel32".} - + else: when defined(haiku): - const iconvDll = "(libc.so.6|libiconv.so|libtextencoding.so)" + const iconvDll = "libiconv.so" elif defined(macosx): const iconvDll = "libiconv.dylib" else: const iconvDll = "(libc.so.6|libiconv.so)" - when defined(macosx) and defined(powerpc): - const prefix = "lib" - else: - const prefix = "" - const E2BIG = 7.cint EINVAL = 22.cint @@ -278,78 +308,83 @@ else: const EILSEQ = 86.cint elif defined(solaris): const EILSEQ = 88.cint + elif defined(haiku): + const EILSEQ = -2147454938.cint var errno {.importc, header: "<errno.h>".}: cint + when defined(bsd): + {.pragma: importIconv, cdecl, header: "<iconv.h>".} + when defined(openbsd): + {.passL: "-liconv".} + else: + {.pragma: importIconv, cdecl, dynlib: iconvDll.} + proc iconvOpen(tocode, fromcode: cstring): EncodingConverter {. - importc: prefix & "iconv_open", cdecl, dynlib: iconvDll.} + importc: "iconv_open", importIconv.} proc iconvClose(c: EncodingConverter) {. - importc: prefix & "iconv_close", cdecl, dynlib: iconvDll.} - proc iconv(c: EncodingConverter, inbuf: var cstring, inbytesLeft: var int, - outbuf: var cstring, outbytesLeft: var int): int {. - importc: prefix & "iconv", cdecl, dynlib: iconvDll.} - proc iconv(c: EncodingConverter, inbuf: pointer, inbytesLeft: pointer, - outbuf: var cstring, outbytesLeft: var int): int {. - importc: prefix & "iconv", cdecl, dynlib: iconvDll.} - -proc getCurrentEncoding*(): string = - ## retrieves the current encoding. On Unix, always "UTF-8" is returned. + importc: "iconv_close", importIconv.} + proc iconv(c: EncodingConverter, inbuf: ptr cstring, inbytesLeft: ptr csize_t, + outbuf: ptr cstring, outbytesLeft: ptr csize_t): csize_t {. + importc: "iconv", importIconv.} + +proc getCurrentEncoding*(uiApp = false): string = + ## Retrieves the current encoding. On Unix, "UTF-8" is always returned. + ## The `uiApp` parameter is Windows specific. If true, the UI's code-page + ## is returned, if false, the Console's code-page is returned. when defined(windows): - result = codePageToName(getACP()) + result = codePageToName(if uiApp: getACP() else: getGetConsoleCP()) else: result = "UTF-8" - + proc open*(destEncoding = "UTF-8", srcEncoding = "CP1252"): EncodingConverter = - ## opens a converter that can convert from `srcEncoding` to `destEncoding`. - ## Raises `EIO` if it cannot fulfill the request. + ## Opens a converter that can convert from `srcEncoding` to `destEncoding`. + ## Raises `EncodingError` if it cannot fulfill the request. when not defined(windows): result = iconvOpen(destEncoding, srcEncoding) - if result == nil: - raise newException(EncodingError, - "cannot create encoding converter from " & + if result == cast[EncodingConverter](-1): + raise newException(EncodingError, + "cannot create encoding converter from " & srcEncoding & " to " & destEncoding) else: result.dest = nameToCodePage(destEncoding) result.src = nameToCodePage(srcEncoding) if int(result.dest) == -1: - raise newException(EncodingError, + raise newException(EncodingError, "cannot find encoding " & destEncoding) if int(result.src) == -1: - raise newException(EncodingError, + raise newException(EncodingError, "cannot find encoding " & srcEncoding) proc close*(c: EncodingConverter) = - ## frees the resources the converter `c` holds. + ## Frees the resources the converter `c` holds. when not defined(windows): iconvClose(c) when defined(windows): - proc convert*(c: EncodingConverter, s: string): string = - ## converts `s` to `destEncoding` that was given to the converter `c`. It - ## assumed that `s` is in `srcEncoding`. - - # special case: empty string: needed because MultiByteToWideChar - # return 0 in case of error: - if s.len == 0: return "" + proc convertToWideString(codePage: CodePage, s: string): string = # educated guess of capacity: var cap = s.len + s.len shr 2 - result = newStringOfCap(cap*2) + result = newString(cap*2) # convert to utf-16 LE - var m = multiByteToWideChar(codePage = c.src, dwFlags = 0'i32, + var m = multiByteToWideChar(codePage, + dwFlags = 0'i32, lpMultiByteStr = cstring(s), cbMultiByte = cint(s.len), lpWideCharStr = cstring(result), cchWideChar = cint(cap)) - if m == 0: + if m == 0: # try again; ask for capacity: - cap = multiByteToWideChar(codePage = c.src, dwFlags = 0'i32, + cap = multiByteToWideChar(codePage, + dwFlags = 0'i32, lpMultiByteStr = cstring(s), cbMultiByte = cint(s.len), lpWideCharStr = nil, cchWideChar = cint(0)) # and do the conversion properly: - result = newStringOfCap(cap*2) - m = multiByteToWideChar(codePage = c.src, dwFlags = 0'i32, + result = newString(cap*2) + m = multiByteToWideChar(codePage, + dwFlags = 0'i32, lpMultiByteStr = cstring(s), cbMultiByte = cint(s.len), lpWideCharStr = cstring(result), @@ -360,57 +395,79 @@ when defined(windows): setLen(result, m*2) else: assert(false) # cannot happen - - # if already utf-16 LE, no further need to do something: - if int(c.dest) == 1200: return - # otherwise the fun starts again: - cap = s.len + s.len shr 2 - var res = newStringOfCap(cap) - m = wideCharToMultiByte( - codePage = c.dest, - dwFlags = 0'i32, - lpWideCharStr = cstring(result), - cchWideChar = cint(result.len div 2), - lpMultiByteStr = cstring(res), - cbMultiByte = cap.cint) + + proc convertFromWideString(codePage: CodePage, s: string): string = + let charCount = s.len div 2 + var cap = s.len + s.len shr 2 + result = newString(cap) + var m = wideCharToMultiByte(codePage, + dwFlags = 0'i32, + lpWideCharStr = cstring(s), + cchWideChar = cint(charCount), + lpMultiByteStr = cstring(result), + cbMultiByte = cap.cint) if m == 0: # try again; ask for capacity: - cap = wideCharToMultiByte( - codePage = c.dest, - dwFlags = 0'i32, - lpWideCharStr = cstring(result), - cchWideChar = cint(result.len div 2), - lpMultiByteStr = nil, - cbMultiByte = cint(0)) + cap = wideCharToMultiByte(codePage, + dwFlags = 0'i32, + lpWideCharStr = cstring(s), + cchWideChar = cint(charCount), + lpMultiByteStr = nil, + cbMultiByte = cint(0)) # and do the conversion properly: - res = newStringOfCap(cap) - m = wideCharToMultiByte( - codePage = c.dest, - dwFlags = 0'i32, - lpWideCharStr = cstring(result), - cchWideChar = cint(result.len div 2), - lpMultiByteStr = cstring(res), - cbMultiByte = cap.cint) + result = newString(cap) + m = wideCharToMultiByte(codePage, + dwFlags = 0'i32, + lpWideCharStr = cstring(s), + cchWideChar = cint(charCount), + lpMultiByteStr = cstring(result), + cbMultiByte = cap.cint) if m == 0: raiseOSError(osLastError()) - setLen(res, m) - result = res + setLen(result, m) elif m <= cap: - setLen(res, m) - result = res + setLen(result, m) else: assert(false) # cannot happen + proc convertWin(codePageFrom: CodePage, codePageTo: CodePage, + s: string): string = + # special case: empty string: needed because MultiByteToWideChar, WideCharToMultiByte + # return 0 in case of error + if s.len == 0: return "" + # multiByteToWideChar does not support encoding from code pages below + let unsupported = [1201, 12000, 12001] + + if int(codePageFrom) in unsupported: + let message = "encoding from " & codePageToName(codePageFrom) & " is not supported on windows" + raise newException(EncodingError, message) + + if int(codePageTo) in unsupported: + let message = "encoding to " & codePageToName(codePageTo) & " is not supported on windows" + raise newException(EncodingError, message) + + # in case it's already UTF-16 little endian - conversion can be simplified + let wideString = if int(codePageFrom) == 1200: s + else: convertToWideString(codePageFrom, s) + return if int(codePageTo) == 1200: wideString + else: convertFromWideString(codePageTo, wideString) + + proc convert*(c: EncodingConverter, s: string): string = + result = convertWin(c.src, c.dest, s) else: proc convert*(c: EncodingConverter, s: string): string = + ## Converts `s` to `destEncoding` that was given to the converter `c`. It + ## assumes that `s` is in `srcEncoding`. + ## + ## .. warning:: UTF-16BE and UTF-32 conversions are not supported on Windows. result = newString(s.len) - var inLen = len(s) - var outLen = len(result) + var inLen = csize_t len(s) + var outLen = csize_t len(result) var src = cstring(s) var dst = cstring(result) - var iconvres: int + var iconvres: csize_t while inLen > 0: - iconvres = iconv(c, src, inLen, dst, outLen) - if iconvres == -1: + iconvres = iconv(c, addr src, addr inLen, addr dst, addr outLen) + if iconvres == high(csize_t): var lerr = errno if lerr == EILSEQ or lerr == EINVAL: # unknown char, skip @@ -421,44 +478,34 @@ else: dec(outLen) elif lerr == E2BIG: var offset = cast[int](dst) - cast[int](cstring(result)) - setLen(result, len(result)+inLen*2+5) + setLen(result, len(result) + inLen.int * 2 + 5) # 5 is minimally one utf-8 char dst = cast[cstring](cast[int](cstring(result)) + offset) - outLen = len(result) - offset + outLen = csize_t(len(result) - offset) else: raiseOSError(lerr.OSErrorCode) - # iconv has a buffer that needs flushing, specially if the last char is + # iconv has a buffer that needs flushing, specially if the last char is # not '\0' - discard iconv(c, nil, nil, dst, outLen) - if iconvres == cint(-1) and errno == E2BIG: + discard iconv(c, nil, nil, addr dst, addr outLen) + if iconvres == high(csize_t) and errno == E2BIG: var offset = cast[int](dst) - cast[int](cstring(result)) - setLen(result, len(result)+inLen*2+5) + setLen(result, len(result) + inLen.int * 2 + 5) # 5 is minimally one utf-8 char dst = cast[cstring](cast[int](cstring(result)) + offset) - outLen = len(result) - offset - discard iconv(c, nil, nil, dst, outLen) + outLen = csize_t(len(result) - offset) + discard iconv(c, nil, nil, addr dst, addr outLen) # trim output buffer - setLen(result, len(result) - outLen) + setLen(result, len(result) - outLen.int) -proc convert*(s: string, destEncoding = "UTF-8", +proc convert*(s: string, destEncoding = "UTF-8", srcEncoding = "CP1252"): string = - ## converts `s` to `destEncoding`. It assumed that `s` is in `srcEncoding`. + ## Converts `s` to `destEncoding`. It assumed that `s` is in `srcEncoding`. ## This opens a converter, uses it and closes it again and is thus more - ## convienent but also likely less efficient than re-using a converter. + ## convenient but also likely less efficient than re-using a converter. + ## + ## .. warning:: UTF-16BE and UTF-32 conversions are not supported on Windows. var c = open(destEncoding, srcEncoding) try: result = convert(c, s) finally: close(c) - -when isMainModule: - let - orig = "öäüß" - cp1252 = convert(orig, "CP1252", "UTF-8") - ibm850 = convert(cp1252, "ibm850", "CP1252") - current = getCurrentEncoding() - echo "Original string from source code: ", orig - echo "Forced ibm850 encoding: ", ibm850 - echo "Current encoding: ", current - echo "From ibm850 to current: ", convert(ibm850, current, "ibm850") - diff --git a/lib/pure/endians.nim b/lib/pure/endians.nim index 6e33d4624..4c1d45ae5 100644 --- a/lib/pure/endians.nim +++ b/lib/pure/endians.nim @@ -9,39 +9,116 @@ ## This module contains helpers that deal with different byte orders ## (`endian`:idx:). +## +## Endianness is the order of bytes of a value in memory. Big-endian means that +## the most significant byte is stored at the smallest memory address, +## while little endian means that the least-significant byte is stored +## at the smallest address. See also https://en.wikipedia.org/wiki/Endianness. +## +## Unstable API. -proc swapEndian64*(outp, inp: pointer) = - ## copies `inp` to `outp` swapping bytes. Both buffers are supposed to - ## contain at least 8 bytes. - var i = cast[cstring](inp) - var o = cast[cstring](outp) - o[0] = i[7] - o[1] = i[6] - o[2] = i[5] - o[3] = i[4] - o[4] = i[3] - o[5] = i[2] - o[6] = i[1] - o[7] = i[0] - -proc swapEndian32*(outp, inp: pointer) = - ## copies `inp` to `outp` swapping bytes. Both buffers are supposed to - ## contain at least 4 bytes. - var i = cast[cstring](inp) - var o = cast[cstring](outp) - o[0] = i[3] - o[1] = i[2] - o[2] = i[1] - o[3] = i[0] - -proc swapEndian16*(outp, inp: pointer) = - ## copies `inp` to `outp` swapping bytes. Both buffers are supposed to - ## contain at least 2 bytes. - var - i = cast[cstring](inp) - o = cast[cstring](outp) - o[0] = i[1] - o[1] = i[0] +when defined(gcc) or defined(llvm_gcc) or defined(clang): + const useBuiltinSwap = true + proc builtin_bswap16(a: uint16): uint16 {. + importc: "__builtin_bswap16", nodecl, noSideEffect.} + + proc builtin_bswap32(a: uint32): uint32 {. + importc: "__builtin_bswap32", nodecl, noSideEffect.} + + proc builtin_bswap64(a: uint64): uint64 {. + importc: "__builtin_bswap64", nodecl, noSideEffect.} +elif defined(icc): + const useBuiltinSwap = true + proc builtin_bswap16(a: uint16): uint16 {. + importc: "_bswap16", nodecl, noSideEffect.} + + proc builtin_bswap32(a: uint32): uint32 {. + importc: "_bswap", nodecl, noSideEffect.} + + proc builtin_bswap64(a: uint64): uint64 {. + importc: "_bswap64", nodecl, noSideEffect.} +elif defined(vcc): + const useBuiltinSwap = true + proc builtin_bswap16(a: uint16): uint16 {. + importc: "_byteswap_ushort", nodecl, header: "<intrin.h>", noSideEffect.} + + proc builtin_bswap32(a: uint32): uint32 {. + importc: "_byteswap_ulong", nodecl, header: "<intrin.h>", noSideEffect.} + + proc builtin_bswap64(a: uint64): uint64 {. + importc: "_byteswap_uint64", nodecl, header: "<intrin.h>", noSideEffect.} +else: + const useBuiltinSwap = false + +when useBuiltinSwap: + template swapOpImpl(T: typedesc, op: untyped) = + ## We have to use `copyMem` here instead of a simple dereference because they + ## may point to a unaligned address. A sufficiently smart compiler _should_ + ## be able to elide them when they're not necessary. + var tmp: T + copyMem(addr tmp, inp, sizeof(T)) + tmp = op(tmp) + copyMem(outp, addr tmp, sizeof(T)) + + proc swapEndian64*(outp, inp: pointer) {.inline, noSideEffect.} = + ## Copies `inp` to `outp`, reversing the byte order. + ## Both buffers are supposed to contain at least 8 bytes. + runnableExamples: + var a = [1'u8, 2, 3, 4, 5, 6, 7, 8] + var b: array[8, uint8] + swapEndian64(addr b, addr a) + assert b == [8'u8, 7, 6, 5, 4, 3, 2, 1] + + swapOpImpl(uint64, builtin_bswap64) + + proc swapEndian32*(outp, inp: pointer) {.inline, noSideEffect.} = + ## Copies `inp` to `outp`, reversing the byte order. + ## Both buffers are supposed to contain at least 4 bytes. + runnableExamples: + var a = [1'u8, 2, 3, 4] + var b: array[4, uint8] + swapEndian32(addr b, addr a) + assert b == [4'u8, 3, 2, 1] + + swapOpImpl(uint32, builtin_bswap32) + + proc swapEndian16*(outp, inp: pointer) {.inline, noSideEffect.} = + ## Copies `inp` to `outp`, reversing the byte order. + ## Both buffers are supposed to contain at least 2 bytes. + runnableExamples: + var a = [1'u8, 2] + var b: array[2, uint8] + swapEndian16(addr b, addr a) + assert b == [2'u8, 1] + + swapOpImpl(uint16, builtin_bswap16) + +else: + proc swapEndian64*(outp, inp: pointer) = + var i = cast[cstring](inp) + var o = cast[cstring](outp) + o[0] = i[7] + o[1] = i[6] + o[2] = i[5] + o[3] = i[4] + o[4] = i[3] + o[5] = i[2] + o[6] = i[1] + o[7] = i[0] + + proc swapEndian32*(outp, inp: pointer) = + var i = cast[cstring](inp) + var o = cast[cstring](outp) + o[0] = i[3] + o[1] = i[2] + o[2] = i[1] + o[3] = i[0] + + proc swapEndian16*(outp, inp: pointer) = + var i = cast[cstring](inp) + var o = cast[cstring](outp) + o[0] = i[1] + o[1] = i[0] when system.cpuEndian == bigEndian: proc littleEndian64*(outp, inp: pointer) {.inline.} = swapEndian64(outp, inp) @@ -50,10 +127,22 @@ when system.cpuEndian == bigEndian: proc bigEndian64*(outp, inp: pointer) {.inline.} = copyMem(outp, inp, 8) proc bigEndian32*(outp, inp: pointer) {.inline.} = copyMem(outp, inp, 4) proc bigEndian16*(outp, inp: pointer) {.inline.} = copyMem(outp, inp, 2) -else: +else: proc littleEndian64*(outp, inp: pointer) {.inline.} = copyMem(outp, inp, 8) + ## Copies `inp` to `outp`, storing it in 64-bit little-endian order. + ## Both buffers are supposed to contain at least 8 bytes. proc littleEndian32*(outp, inp: pointer) {.inline.} = copyMem(outp, inp, 4) + ## Copies `inp` to `outp`, storing it in 32-bit little-endian order. + ## Both buffers are supposed to contain at least 4 bytes. proc littleEndian16*(outp, inp: pointer){.inline.} = copyMem(outp, inp, 2) + ## Copies `inp` to `outp`, storing it in 16-bit little-endian order. + ## Both buffers are supposed to contain at least 2 bytes. proc bigEndian64*(outp, inp: pointer) {.inline.} = swapEndian64(outp, inp) + ## Copies `inp` to `outp`, storing it in 64-bit big-endian order. + ## Both buffers are supposed to contain at least 8 bytes. proc bigEndian32*(outp, inp: pointer) {.inline.} = swapEndian32(outp, inp) + ## Copies `inp` to `outp`, storing it in 32-bit big-endian order. + ## Both buffers are supposed to contain at least 4 bytes. proc bigEndian16*(outp, inp: pointer) {.inline.} = swapEndian16(outp, inp) + ## Copies `inp` to `outp`, storing it in 16-bit big-endian order. + ## Both buffers are supposed to contain at least 2 bytes. diff --git a/lib/pure/events.nim b/lib/pure/events.nim deleted file mode 100644 index 44e9ed286..000000000 --- a/lib/pure/events.nim +++ /dev/null @@ -1,103 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2011 Alex Mitchell -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## :Author: Alex Mitchell -## -## This module implements an event system that is not dependent on external -## graphical toolkits. It was originally called ``NimEE`` because -## it was inspired by Python's PyEE module. There are two ways you can use -## events: one is a python-inspired way; the other is more of a C-style way. -## -## .. code-block:: Nim -## var ee = initEventEmitter() -## var genericargs: EventArgs -## proc handleevent(e: EventArgs) = -## echo("Handled!") -## -## # Python way -## ee.on("EventName", handleevent) -## ee.emit("EventName", genericargs) -## -## # C/Java way -## # Declare a type -## type -## SomeObject = object of RootObj -## SomeEvent: EventHandler -## var myobj: SomeObject -## myobj.SomeEvent = initEventHandler("SomeEvent") -## myobj.SomeEvent.addHandler(handleevent) -## ee.emit(myobj.SomeEvent, genericargs) - -type - EventArgs* = object of RootObj ## Base object for event arguments that are passed to callback functions. - EventHandler* = tuple[name: string, handlers: seq[proc(e: EventArgs) {.closure.}]] ## An eventhandler for an event. - -type - EventEmitter* = object ## An object that fires events and holds event handlers for an object. - s: seq[EventHandler] - EventError* = object of ValueError - -{.deprecated: [TEventArgs: EventArgs, TEventHandler: EventHandler, - TEventEmitter: EventEmitter, EInvalidEvent: EventError].} - -proc initEventHandler*(name: string): EventHandler = - ## Initializes an EventHandler with the specified name and returns it. - result.handlers = @[] - result.name = name - -proc addHandler*(handler: var EventHandler, fn: proc(e: EventArgs) {.closure.}) = - ## Adds the callback to the specified event handler. - handler.handlers.add(fn) - -proc removeHandler*(handler: var EventHandler, fn: proc(e: EventArgs) {.closure.}) = - ## Removes the callback from the specified event handler. - for i in countup(0, len(handler.handlers) -1): - if fn == handler.handlers[i]: - handler.handlers.del(i) - break - -proc containsHandler*(handler: var EventHandler, fn: proc(e: EventArgs) {.closure.}): bool = - ## Checks if a callback is registered to this event handler. - return handler.handlers.contains(fn) - - -proc clearHandlers*(handler: var EventHandler) = - ## Clears all of the callbacks from the event handler. - setLen(handler.handlers, 0) - -proc getEventHandler(emitter: var EventEmitter, event: string): int = - for k in 0..high(emitter.s): - if emitter.s[k].name == event: return k - return -1 - -proc on*(emitter: var EventEmitter, event: string, fn: proc(e: EventArgs) {.closure.}) = - ## Assigns a event handler with the specified callback. If the event - ## doesn't exist, it will be created. - var i = getEventHandler(emitter, event) - if i < 0: - var eh = initEventHandler(event) - addHandler(eh, fn) - emitter.s.add(eh) - else: - addHandler(emitter.s[i], fn) - -proc emit*(emitter: var EventEmitter, eventhandler: var EventHandler, - args: EventArgs) = - ## Fires an event handler with specified event arguments. - for fn in items(eventhandler.handlers): fn(args) - -proc emit*(emitter: var EventEmitter, event: string, args: EventArgs) = - ## Fires an event handler with specified event arguments. - var i = getEventHandler(emitter, event) - if i >= 0: - emit(emitter, emitter.s[i], args) - -proc initEventEmitter*(): EventEmitter = - ## Creates and returns a new EventEmitter. - result.s = @[] diff --git a/lib/pure/fenv.nim b/lib/pure/fenv.nim index f8f115ecc..1d96fd6be 100644 --- a/lib/pure/fenv.nim +++ b/lib/pure/fenv.nim @@ -9,10 +9,10 @@ ## Floating-point environment. Handling of floating-point rounding and ## exceptions (overflow, division by zero, etc.). +## The types, vars and procs are bindings for the C standard library +## [<fenv.h>](https://en.cppreference.com/w/c/numeric/fenv) header. -{.deadCodeElim:on.} - -when defined(Posix) and not defined(haiku): +when defined(posix) and not defined(genode) and not defined(macosx): {.passl: "-lm".} var @@ -37,8 +37,8 @@ var FE_UPWARD* {.importc, header: "<fenv.h>".}: cint ## round toward +Inf FE_DFL_ENV* {.importc, header: "<fenv.h>".}: cint - ## macro of type pointer to fenv_t to be used as the argument - ## to functions taking an argument of type fenv_t; in this + ## macro of type pointer to `fenv_t` to be used as the argument + ## to functions taking an argument of type `fenv_t`; in this ## case the default environment will be used type @@ -102,80 +102,81 @@ proc feupdateenv*(envp: ptr Tfenv): cint {.importc, header: "<fenv.h>".} ## represented by object pointed to by `envp` and raise exceptions ## according to saved exceptions. -var FP_RADIX_INTERNAL {. importc: "FLT_RADIX" header: "<float.h>" .} : int - -template fpRadix* : int = FP_RADIX_INTERNAL +const + FLT_RADIX = 2 ## the radix of the exponent representation + + FLT_MANT_DIG = 24 ## the number of base FLT_RADIX digits in the mantissa part of a float + FLT_DIG = 6 ## the number of digits of precision of a float + FLT_MIN_EXP = -125 ## the minimum value of base FLT_RADIX in the exponent part of a float + FLT_MAX_EXP = 128 ## the maximum value of base FLT_RADIX in the exponent part of a float + FLT_MIN_10_EXP = -37 ## the minimum value in base 10 of the exponent part of a float + FLT_MAX_10_EXP = 38 ## the maximum value in base 10 of the exponent part of a float + FLT_MIN = 1.17549435e-38'f32 ## the minimum value of a float + FLT_MAX = 3.40282347e+38'f32 ## the maximum value of a float + FLT_EPSILON = 1.19209290e-07'f32 ## the difference between 1 and the least value greater than 1 of a float + + DBL_MANT_DIG = 53 ## the number of base FLT_RADIX digits in the mantissa part of a double + DBL_DIG = 15 ## the number of digits of precision of a double + DBL_MIN_EXP = -1021 ## the minimum value of base FLT_RADIX in the exponent part of a double + DBL_MAX_EXP = 1024 ## the maximum value of base FLT_RADIX in the exponent part of a double + DBL_MIN_10_EXP = -307 ## the minimum value in base 10 of the exponent part of a double + DBL_MAX_10_EXP = 308 ## the maximum value in base 10 of the exponent part of a double + DBL_MIN = 2.2250738585072014E-308 ## the minimal value of a double + DBL_MAX = 1.7976931348623157E+308 ## the minimal value of a double + DBL_EPSILON = 2.2204460492503131E-16 ## the difference between 1 and the least value greater than 1 of a double + +template fpRadix*: int = FLT_RADIX ## The (integer) value of the radix used to represent any floating ## point type on the architecture used to build the program. -var FLT_MANT_DIG {. importc: "FLT_MANT_DIG" header: "<float.h>" .} : int -var FLT_DIG {. importc: "FLT_DIG" header: "<float.h>" .} : int -var FLT_MIN_EXP {. importc: "FLT_MIN_EXP" header: "<float.h>" .} : int -var FLT_MAX_EXP {. importc: "FLT_MAX_EXP" header: "<float.h>" .} : int -var FLT_MIN_10_EXP {. importc: "FLT_MIN_10_EXP" header: "<float.h>" .} : int -var FLT_MAX_10_EXP {. importc: "FLT_MAX_10_EXP" header: "<float.h>" .} : int -var FLT_MIN {. importc: "FLT_MIN" header: "<float.h>" .} : cfloat -var FLT_MAX {. importc: "FLT_MAX" header: "<float.h>" .} : cfloat -var FLT_EPSILON {. importc: "FLT_EPSILON" header: "<float.h>" .} : cfloat - -var DBL_MANT_DIG {. importc: "DBL_MANT_DIG" header: "<float.h>" .} : int -var DBL_DIG {. importc: "DBL_DIG" header: "<float.h>" .} : int -var DBL_MIN_EXP {. importc: "DBL_MIN_EXP" header: "<float.h>" .} : int -var DBL_MAX_EXP {. importc: "DBL_MAX_EXP" header: "<float.h>" .} : int -var DBL_MIN_10_EXP {. importc: "DBL_MIN_10_EXP" header: "<float.h>" .} : int -var DBL_MAX_10_EXP {. importc: "DBL_MAX_10_EXP" header: "<float.h>" .} : int -var DBL_MIN {. importc: "DBL_MIN" header: "<float.h>" .} : cdouble -var DBL_MAX {. importc: "DBL_MAX" header: "<float.h>" .} : cdouble -var DBL_EPSILON {. importc: "DBL_EPSILON" header: "<float.h>" .} : cdouble - -template mantissaDigits*(T : typedesc[float32]) : int = FLT_MANT_DIG - ## Number of digits (in base ``floatingPointRadix``) in the mantissa +template mantissaDigits*(T: typedesc[float32]): int = FLT_MANT_DIG + ## Number of digits (in base `floatingPointRadix`) in the mantissa ## of 32-bit floating-point numbers. -template digits*(T : typedesc[float32]) : int = FLT_DIG +template digits*(T: typedesc[float32]): int = FLT_DIG ## Number of decimal digits that can be represented in a ## 32-bit floating-point type without losing precision. -template minExponent*(T : typedesc[float32]) : int = FLT_MIN_EXP +template minExponent*(T: typedesc[float32]): int = FLT_MIN_EXP ## Minimum (negative) exponent for 32-bit floating-point numbers. -template maxExponent*(T : typedesc[float32]) : int = FLT_MAX_EXP +template maxExponent*(T: typedesc[float32]): int = FLT_MAX_EXP ## Maximum (positive) exponent for 32-bit floating-point numbers. -template min10Exponent*(T : typedesc[float32]) : int = FLT_MIN_10_EXP +template min10Exponent*(T: typedesc[float32]): int = FLT_MIN_10_EXP ## Minimum (negative) exponent in base 10 for 32-bit floating-point ## numbers. -template max10Exponent*(T : typedesc[float32]) : int = FLT_MAX_10_EXP +template max10Exponent*(T: typedesc[float32]): int = FLT_MAX_10_EXP ## Maximum (positive) exponent in base 10 for 32-bit floating-point ## numbers. -template minimumPositiveValue*(T : typedesc[float32]) : float32 = FLT_MIN +template minimumPositiveValue*(T: typedesc[float32]): float32 = FLT_MIN ## The smallest positive (nonzero) number that can be represented in a ## 32-bit floating-point type. -template maximumPositiveValue*(T : typedesc[float32]) : float32 = FLT_MAX +template maximumPositiveValue*(T: typedesc[float32]): float32 = FLT_MAX ## The largest positive number that can be represented in a 32-bit ## floating-point type. -template epsilon*(T : typedesc[float32]): float32 = FLT_EPSILON +template epsilon*(T: typedesc[float32]): float32 = FLT_EPSILON ## The difference between 1.0 and the smallest number greater than ## 1.0 that can be represented in a 32-bit floating-point type. -template mantissaDigits*(T : typedesc[float64]) : int = DBL_MANT_DIG - ## Number of digits (in base ``floatingPointRadix``) in the mantissa +template mantissaDigits*(T: typedesc[float64]): int = DBL_MANT_DIG + ## Number of digits (in base `floatingPointRadix`) in the mantissa ## of 64-bit floating-point numbers. -template digits*(T : typedesc[float64]) : int = DBL_DIG +template digits*(T: typedesc[float64]): int = DBL_DIG ## Number of decimal digits that can be represented in a ## 64-bit floating-point type without losing precision. -template minExponent*(T : typedesc[float64]) : int = DBL_MIN_EXP +template minExponent*(T: typedesc[float64]): int = DBL_MIN_EXP ## Minimum (negative) exponent for 64-bit floating-point numbers. -template maxExponent*(T : typedesc[float64]) : int = DBL_MAX_EXP +template maxExponent*(T: typedesc[float64]): int = DBL_MAX_EXP ## Maximum (positive) exponent for 64-bit floating-point numbers. -template min10Exponent*(T : typedesc[float64]) : int = DBL_MIN_10_EXP +template min10Exponent*(T: typedesc[float64]): int = DBL_MIN_10_EXP ## Minimum (negative) exponent in base 10 for 64-bit floating-point ## numbers. -template max10Exponent*(T : typedesc[float64]) : int = DBL_MAX_10_EXP +template max10Exponent*(T: typedesc[float64]): int = DBL_MAX_10_EXP ## Maximum (positive) exponent in base 10 for 64-bit floating-point ## numbers. -template minimumPositiveValue*(T : typedesc[float64]) : float64 = DBL_MIN +template minimumPositiveValue*(T: typedesc[float64]): float64 = DBL_MIN ## The smallest positive (nonzero) number that can be represented in a ## 64-bit floating-point type. -template maximumPositiveValue*(T : typedesc[float64]) : float64 = DBL_MAX +template maximumPositiveValue*(T: typedesc[float64]): float64 = DBL_MAX ## The largest positive number that can be represented in a 64-bit ## floating-point type. -template epsilon*(T : typedesc[float64]): float64 = DBL_EPSILON +template epsilon*(T: typedesc[float64]): float64 = DBL_EPSILON ## The difference between 1.0 and the smallest number greater than ## 1.0 that can be represented in a 64-bit floating-point type. diff --git a/lib/pure/fsmonitor.nim b/lib/pure/fsmonitor.nim deleted file mode 100644 index e6919b661..000000000 --- a/lib/pure/fsmonitor.nim +++ /dev/null @@ -1,217 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2012 Dominik Picheta -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module allows you to monitor files or directories for changes using -## asyncio. -## -## Windows support is not yet implemented. -## -## **Note:** This module uses ``inotify`` on Linux (Other Unixes are not yet -## supported). ``inotify`` was merged into the 2.6.13 Linux kernel, this -## module will therefore not work with any Linux kernel prior to that, unless -## it has been patched to support inotify. - -when defined(linux) or defined(nimdoc): - from posix import read -else: - {.error: "Your platform is not supported.".} - -import inotify, os, asyncio, tables - -type - FSMonitor* = ref FSMonitorObj - FSMonitorObj = object of RootObj - fd: cint - handleEvent: proc (m: FSMonitor, ev: MonitorEvent) {.closure.} - targets: Table[cint, string] - - MonitorEventType* = enum ## Monitor event type - MonitorAccess, ## File was accessed. - MonitorAttrib, ## Metadata changed. - MonitorCloseWrite, ## Writtable file was closed. - MonitorCloseNoWrite, ## Unwrittable file closed. - MonitorCreate, ## Subfile was created. - MonitorDelete, ## Subfile was deleted. - MonitorDeleteSelf, ## Watched file/directory was itself deleted. - MonitorModify, ## File was modified. - MonitorMoveSelf, ## Self was moved. - MonitorMoved, ## File was moved. - MonitorOpen, ## File was opened. - MonitorAll ## Filter for all event types. - - MonitorEvent* = object - case kind*: MonitorEventType ## Type of the event. - of MonitorMoveSelf, MonitorMoved: - oldPath*: string ## Old absolute location - newPath*: string ## New absolute location - else: - fullname*: string ## Absolute filename of the file/directory affected. - name*: string ## Non absolute filepath of the file/directory - ## affected relative to the directory watched. - ## "" if this event refers to the file/directory - ## watched. - wd*: cint ## Watch descriptor. - -{.deprecated: [PFSMonitor: FSMonitor, TFSMonitor: FSMonitorObj, - TMonitorEventType: MonitorEventType, TMonitorEvent: MonitorEvent].} - -const - MaxEvents = 100 - -proc newMonitor*(): FSMonitor = - ## Creates a new file system monitor. - new(result) - result.targets = initTable[cint, string]() - result.fd = inotifyInit() - if result.fd < 0: - raiseOSError(osLastError()) - -proc add*(monitor: FSMonitor, target: string, - filters = {MonitorAll}): cint {.discardable.} = - ## Adds ``target`` which may be a directory or a file to the list of - ## watched paths of ``monitor``. - ## You can specify the events to report using the ``filters`` parameter. - - var INFilter = -1 - for f in filters: - case f - of MonitorAccess: INFilter = INFilter and IN_ACCESS - of MonitorAttrib: INFilter = INFilter and IN_ATTRIB - of MonitorCloseWrite: INFilter = INFilter and IN_CLOSE_WRITE - of MonitorCloseNoWrite: INFilter = INFilter and IN_CLOSE_NO_WRITE - of MonitorCreate: INFilter = INFilter and IN_CREATE - of MonitorDelete: INFilter = INFilter and IN_DELETE - of MonitorDeleteSelf: INFilter = INFilter and IN_DELETE_SELF - of MonitorModify: INFilter = INFilter and IN_MODIFY - of MonitorMoveSelf: INFilter = INFilter and IN_MOVE_SELF - of MonitorMoved: INFilter = INFilter and IN_MOVED_FROM and IN_MOVED_TO - of MonitorOpen: INFilter = INFilter and IN_OPEN - of MonitorAll: INFilter = INFilter and IN_ALL_EVENTS - - result = inotifyAddWatch(monitor.fd, target, INFilter.uint32) - if result < 0: - raiseOSError(osLastError()) - monitor.targets.add(result, target) - -proc del*(monitor: FSMonitor, wd: cint) = - ## Removes watched directory or file as specified by ``wd`` from ``monitor``. - ## - ## If ``wd`` is not a part of ``monitor`` an EOS error is raised. - if inotifyRmWatch(monitor.fd, wd) < 0: - raiseOSError(osLastError()) - -proc getEvent(m: FSMonitor, fd: cint): seq[MonitorEvent] = - result = @[] - let size = (sizeof(TINotifyEvent)+2000)*MaxEvents - var buffer = newString(size) - - let le = read(fd, addr(buffer[0]), size) - - var movedFrom = initTable[cint, tuple[wd: cint, old: string]]() - - var i = 0 - while i < le: - var event = cast[ptr TINotifyEvent](addr(buffer[i])) - var mev: MonitorEvent - mev.wd = event.wd - if event.len.int != 0: - let cstr = event.name.addr.cstring - mev.name = $cstr - else: - mev.name = "" - - if (event.mask.int and IN_MOVED_FROM) != 0: - # Moved from event, add to m's collection - movedFrom.add(event.cookie.cint, (mev.wd, mev.name)) - inc(i, sizeof(TINotifyEvent) + event.len.int) - continue - elif (event.mask.int and IN_MOVED_TO) != 0: - mev.kind = MonitorMoved - assert movedFrom.hasKey(event.cookie.cint) - # Find the MovedFrom event. - mev.oldPath = movedFrom[event.cookie.cint].old - mev.newPath = "" # Set later - # Delete it from the Table - movedFrom.del(event.cookie.cint) - elif (event.mask.int and IN_ACCESS) != 0: mev.kind = MonitorAccess - elif (event.mask.int and IN_ATTRIB) != 0: mev.kind = MonitorAttrib - elif (event.mask.int and IN_CLOSE_WRITE) != 0: - mev.kind = MonitorCloseWrite - elif (event.mask.int and IN_CLOSE_NOWRITE) != 0: - mev.kind = MonitorCloseNoWrite - elif (event.mask.int and IN_CREATE) != 0: mev.kind = MonitorCreate - elif (event.mask.int and IN_DELETE) != 0: - mev.kind = MonitorDelete - elif (event.mask.int and IN_DELETE_SELF) != 0: - mev.kind = MonitorDeleteSelf - elif (event.mask.int and IN_MODIFY) != 0: mev.kind = MonitorModify - elif (event.mask.int and IN_MOVE_SELF) != 0: - mev.kind = MonitorMoveSelf - elif (event.mask.int and IN_OPEN) != 0: mev.kind = MonitorOpen - - if mev.kind != MonitorMoved: - mev.fullname = "" - - result.add(mev) - inc(i, sizeof(TINotifyEvent) + event.len.int) - - # If movedFrom events have not been matched with a moveTo. File has - # been moved to an unwatched location, emit a MonitorDelete. - for cookie, t in pairs(movedFrom): - var mev: MonitorEvent - mev.kind = MonitorDelete - mev.wd = t.wd - mev.name = t.old - result.add(mev) - -proc FSMonitorRead(h: RootRef) = - var events = FSMonitor(h).getEvent(FSMonitor(h).fd) - #var newEv: MonitorEvent - for ev in events: - var target = FSMonitor(h).targets[ev.wd] - var newEv = ev - if newEv.kind == MonitorMoved: - newEv.oldPath = target / newEv.oldPath - newEv.newPath = target / newEv.name - else: - newEv.fullName = target / newEv.name - FSMonitor(h).handleEvent(FSMonitor(h), newEv) - -proc toDelegate(m: FSMonitor): Delegate = - result = newDelegate() - result.deleVal = m - result.fd = (type(result.fd))(m.fd) - result.mode = fmRead - result.handleRead = FSMonitorRead - result.open = true - -proc register*(d: Dispatcher, monitor: FSMonitor, - handleEvent: proc (m: FSMonitor, ev: MonitorEvent) {.closure.}) = - ## Registers ``monitor`` with dispatcher ``d``. - monitor.handleEvent = handleEvent - var deleg = toDelegate(monitor) - d.register(deleg) - -when isMainModule: - proc main = - var disp = newDispatcher() - var monitor = newMonitor() - echo monitor.add("/home/dom/inotifytests/") - disp.register(monitor, - proc (m: FSMonitor, ev: MonitorEvent) = - echo("Got event: ", ev.kind) - if ev.kind == MonitorMoved: - echo("From ", ev.oldPath, " to ", ev.newPath) - echo("Name is ", ev.name) - else: - echo("Name ", ev.name, " fullname ", ev.fullName)) - - while true: - if not disp.poll(): break - main() diff --git a/lib/pure/ftpclient.nim b/lib/pure/ftpclient.nim deleted file mode 100644 index dc387b79c..000000000 --- a/lib/pure/ftpclient.nim +++ /dev/null @@ -1,646 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2015 Dominik Picheta -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -include "system/inclrtl" - -import sockets, strutils, parseutils, times, os, asyncio - -from asyncnet import nil -from rawsockets import nil -from asyncdispatch import PFuture - -## This module **partially** implements an FTP client as specified -## by `RFC 959 <http://tools.ietf.org/html/rfc959>`_. -## -## This module provides both a synchronous and asynchronous implementation. -## The asynchronous implementation requires you to use the ``asyncFTPClient`` -## function. You are then required to register the ``AsyncFTPClient`` with a -## asyncio dispatcher using the ``register`` function. Take a look at the -## asyncio module documentation for more information. -## -## **Note**: The asynchronous implementation is only asynchronous for long -## file transfers, calls to functions which use the command socket will block. -## -## Here is some example usage of this module: -## -## .. code-block:: Nim -## var ftp = ftpClient("example.org", user = "user", pass = "pass") -## ftp.connect() -## ftp.retrFile("file.ext", "file.ext") -## -## **Warning:** The API of this module is unstable, and therefore is subject -## to change. - -type - FtpBase*[SockType] = ref FtpBaseObj[SockType] - FtpBaseObj*[SockType] = object - csock*: SockType - dsock*: SockType - when SockType is asyncio.AsyncSocket: - handleEvent*: proc (ftp: AsyncFTPClient, ev: FTPEvent){.closure,gcsafe.} - disp: Dispatcher - asyncDSockID: Delegate - user*, pass*: string - address*: string - when SockType is asyncnet.AsyncSocket: - port*: rawsockets.Port - else: - port*: Port - - jobInProgress*: bool - job*: FTPJob[SockType] - - dsockConnected*: bool - - FTPJobType* = enum - JRetrText, JRetr, JStore - - FtpJob[T] = ref FtpJobObj[T] - FTPJobObj[T] = object - prc: proc (ftp: FTPBase[T], async: bool): bool {.nimcall, gcsafe.} - case typ*: FTPJobType - of JRetrText: - lines: string - of JRetr, JStore: - file: File - filename: string - total: BiggestInt # In bytes. - progress: BiggestInt # In bytes. - oneSecond: BiggestInt # Bytes transferred in one second. - lastProgressReport: float # Time - toStore: string # Data left to upload (Only used with async) - else: nil - - FtpClientObj* = FtpBaseObj[Socket] - FtpClient* = ref FtpClientObj - - AsyncFtpClient* = ref AsyncFtpClientObj ## Async alternative to TFTPClient. - AsyncFtpClientObj* = FtpBaseObj[asyncio.AsyncSocket] - - FTPEventType* = enum - EvTransferProgress, EvLines, EvRetr, EvStore - - FTPEvent* = object ## Event - filename*: string - case typ*: FTPEventType - of EvLines: - lines*: string ## Lines that have been transferred. - of EvRetr, EvStore: ## Retr/Store operation finished. - nil - of EvTransferProgress: - bytesTotal*: BiggestInt ## Bytes total. - bytesFinished*: BiggestInt ## Bytes transferred. - speed*: BiggestInt ## Speed in bytes/s - currentJob*: FTPJobType ## The current job being performed. - - ReplyError* = object of IOError - FTPError* = object of IOError - -{.deprecated: [ - TFTPClient: FTPClientObj, TFTPJob: FTPJob, PAsyncFTPClient: AsyncFTPClient, - TAsyncFTPClient: AsyncFTPClientObj, TFTPEvent: FTPEvent, - EInvalidReply: ReplyError, EFTP: FTPError -].} - -proc ftpClient*(address: string, port = Port(21), - user, pass = ""): FtpClient = - ## Create a ``FtpClient`` object. - new(result) - result.user = user - result.pass = pass - result.address = address - result.port = port - - result.dsockConnected = false - result.csock = socket() - if result.csock == invalidSocket: raiseOSError(osLastError()) - -template blockingOperation(sock: Socket, body: stmt) {.immediate.} = - body - -template blockingOperation(sock: asyncio.AsyncSocket, body: stmt) {.immediate.} = - sock.setBlocking(true) - body - sock.setBlocking(false) - -proc expectReply[T](ftp: FtpBase[T]): TaintedString = - result = TaintedString"" - blockingOperation(ftp.csock): - when T is Socket: - ftp.csock.readLine(result) - else: - discard ftp.csock.readLine(result) - -proc send*[T](ftp: FtpBase[T], m: string): TaintedString = - ## Send a message to the server, and wait for a primary reply. - ## ``\c\L`` is added for you. - blockingOperation(ftp.csock): - ftp.csock.send(m & "\c\L") - return ftp.expectReply() - -proc assertReply(received: TaintedString, expected: string) = - if not received.string.startsWith(expected): - raise newException(ReplyError, - "Expected reply '$1' got: $2" % [ - expected, received.string]) - -proc assertReply(received: TaintedString, expected: varargs[string]) = - for i in items(expected): - if received.string.startsWith(i): return - raise newException(ReplyError, - "Expected reply '$1' got: $2" % - [expected.join("' or '"), received.string]) - -proc createJob[T](ftp: FtpBase[T], - prc: proc (ftp: FtpBase[T], async: bool): bool {. - nimcall,gcsafe.}, - cmd: FTPJobType) = - if ftp.jobInProgress: - raise newException(FTPError, "Unable to do two jobs at once.") - ftp.jobInProgress = true - new(ftp.job) - ftp.job.prc = prc - ftp.job.typ = cmd - case cmd - of JRetrText: - ftp.job.lines = "" - of JRetr, JStore: - ftp.job.toStore = "" - -proc deleteJob[T](ftp: FtpBase[T]) = - assert ftp.jobInProgress - ftp.jobInProgress = false - case ftp.job.typ - of JRetrText: - ftp.job.lines = "" - of JRetr, JStore: - ftp.job.file.close() - ftp.dsock.close() - -proc handleTask(s: AsyncSocket, ftp: AsyncFTPClient) = - if ftp.jobInProgress: - if ftp.job.typ in {JRetr, JStore}: - if epochTime() - ftp.job.lastProgressReport >= 1.0: - var r: FTPEvent - ftp.job.lastProgressReport = epochTime() - r.typ = EvTransferProgress - r.bytesTotal = ftp.job.total - r.bytesFinished = ftp.job.progress - r.speed = ftp.job.oneSecond - r.filename = ftp.job.filename - r.currentJob = ftp.job.typ - ftp.job.oneSecond = 0 - ftp.handleEvent(ftp, r) - -proc handleWrite(s: AsyncSocket, ftp: AsyncFTPClient) = - if ftp.jobInProgress: - if ftp.job.typ == JStore: - assert (not ftp.job.prc(ftp, true)) - -proc handleConnect(s: AsyncSocket, ftp: AsyncFTPClient) = - ftp.dsockConnected = true - assert(ftp.jobInProgress) - if ftp.job.typ == JStore: - s.setHandleWrite(proc (s: AsyncSocket) = handleWrite(s, ftp)) - else: - s.delHandleWrite() - -proc handleRead(s: AsyncSocket, ftp: AsyncFTPClient) = - assert ftp.jobInProgress - assert ftp.job.typ != JStore - # This can never return true, because it shouldn't check for code - # 226 from csock. - assert(not ftp.job.prc(ftp, true)) - -proc pasv[T](ftp: FtpBase[T]) = - ## Negotiate a data connection. - when T is Socket: - ftp.dsock = socket() - if ftp.dsock == invalidSocket: raiseOSError(osLastError()) - elif T is AsyncSocket: - ftp.dsock = asyncSocket() - ftp.dsock.handleRead = - proc (s: AsyncSocket) = - handleRead(s, ftp) - ftp.dsock.handleConnect = - proc (s: AsyncSocket) = - handleConnect(s, ftp) - ftp.dsock.handleTask = - proc (s: AsyncSocket) = - handleTask(s, ftp) - ftp.disp.register(ftp.dsock) - else: - {.fatal: "Incorrect socket instantiation".} - - var pasvMsg = ftp.send("PASV").string.strip.TaintedString - assertReply(pasvMsg, "227") - var betweenParens = captureBetween(pasvMsg.string, '(', ')') - var nums = betweenParens.split(',') - var ip = nums[0.. ^3] - var port = nums[^2.. ^1] - var properPort = port[0].parseInt()*256+port[1].parseInt() - ftp.dsock.connect(ip.join("."), Port(properPort.toU16)) - when T is AsyncSocket: - ftp.dsockConnected = false - else: - ftp.dsockConnected = true - -proc normalizePathSep(path: string): string = - return replace(path, '\\', '/') - -proc connect*[T](ftp: FtpBase[T]) = - ## Connect to the FTP server specified by ``ftp``. - when T is AsyncSocket: - blockingOperation(ftp.csock): - ftp.csock.connect(ftp.address, ftp.port) - elif T is Socket: - ftp.csock.connect(ftp.address, ftp.port) - else: - {.fatal: "Incorrect socket instantiation".} - - # TODO: Handle 120? or let user handle it. - assertReply ftp.expectReply(), "220" - - if ftp.user != "": - assertReply(ftp.send("USER " & ftp.user), "230", "331") - - if ftp.pass != "": - assertReply ftp.send("PASS " & ftp.pass), "230" - -proc pwd*[T](ftp: FtpBase[T]): string = - ## Returns the current working directory. - var wd = ftp.send("PWD") - assertReply wd, "257" - return wd.string.captureBetween('"') # " - -proc cd*[T](ftp: FtpBase[T], dir: string) = - ## Changes the current directory on the remote FTP server to ``dir``. - assertReply ftp.send("CWD " & dir.normalizePathSep), "250" - -proc cdup*[T](ftp: FtpBase[T]) = - ## Changes the current directory to the parent of the current directory. - assertReply ftp.send("CDUP"), "200" - -proc getLines[T](ftp: FtpBase[T], async: bool = false): bool = - ## Downloads text data in ASCII mode - ## Returns true if the download is complete. - ## It doesn't if `async` is true, because it doesn't check for 226 then. - if ftp.dsockConnected: - var r = TaintedString"" - when T is AsyncSocket: - if ftp.asyncDSock.readLine(r): - if r.string == "": - ftp.dsockConnected = false - else: - ftp.job.lines.add(r.string & "\n") - elif T is Socket: - assert(not async) - ftp.dsock.readLine(r) - if r.string == "": - ftp.dsockConnected = false - else: - ftp.job.lines.add(r.string & "\n") - else: - {.fatal: "Incorrect socket instantiation".} - - if not async: - var readSocks: seq[Socket] = @[ftp.csock] - # This is only needed here. Asyncio gets this socket... - blockingOperation(ftp.csock): - if readSocks.select(1) != 0 and ftp.csock in readSocks: - assertReply ftp.expectReply(), "226" - return true - -proc listDirs*[T](ftp: FtpBase[T], dir: string = "", - async = false): seq[string] = - ## Returns a list of filenames in the given directory. If ``dir`` is "", - ## the current directory is used. If ``async`` is true, this - ## function will return immediately and it will be your job to - ## use asyncio's ``poll`` to progress this operation. - - ftp.createJob(getLines[T], JRetrText) - ftp.pasv() - - assertReply ftp.send("NLST " & dir.normalizePathSep), ["125", "150"] - - if not async: - while not ftp.job.prc(ftp, false): discard - result = splitLines(ftp.job.lines) - ftp.deleteJob() - else: return @[] - -proc fileExists*(ftp: FtpClient, file: string): bool {.deprecated.} = - ## **Deprecated since version 0.9.0:** Please use ``existsFile``. - ## - ## Determines whether ``file`` exists. - ## - ## Warning: This function may block. Especially on directories with many - ## files, because a full list of file names must be retrieved. - var files = ftp.listDirs() - for f in items(files): - if f.normalizePathSep == file.normalizePathSep: return true - -proc existsFile*(ftp: FtpClient, file: string): bool = - ## Determines whether ``file`` exists. - ## - ## Warning: This function may block. Especially on directories with many - ## files, because a full list of file names must be retrieved. - var files = ftp.listDirs() - for f in items(files): - if f.normalizePathSep == file.normalizePathSep: return true - -proc createDir*[T](ftp: FtpBase[T], dir: string, recursive: bool = false) = - ## Creates a directory ``dir``. If ``recursive`` is true, the topmost - ## subdirectory of ``dir`` will be created first, following the secondmost... - ## etc. this allows you to give a full path as the ``dir`` without worrying - ## about subdirectories not existing. - if not recursive: - assertReply ftp.send("MKD " & dir.normalizePathSep), "257" - else: - var reply = TaintedString"" - var previousDirs = "" - for p in split(dir, {os.DirSep, os.AltSep}): - if p != "": - previousDirs.add(p) - reply = ftp.send("MKD " & previousDirs) - previousDirs.add('/') - assertReply reply, "257" - -proc chmod*[T](ftp: FtpBase[T], path: string, - permissions: set[FilePermission]) = - ## Changes permission of ``path`` to ``permissions``. - var userOctal = 0 - var groupOctal = 0 - var otherOctal = 0 - for i in items(permissions): - case i - of fpUserExec: userOctal.inc(1) - of fpUserWrite: userOctal.inc(2) - of fpUserRead: userOctal.inc(4) - of fpGroupExec: groupOctal.inc(1) - of fpGroupWrite: groupOctal.inc(2) - of fpGroupRead: groupOctal.inc(4) - of fpOthersExec: otherOctal.inc(1) - of fpOthersWrite: otherOctal.inc(2) - of fpOthersRead: otherOctal.inc(4) - - var perm = $userOctal & $groupOctal & $otherOctal - assertReply ftp.send("SITE CHMOD " & perm & - " " & path.normalizePathSep), "200" - -proc list*[T](ftp: FtpBase[T], dir: string = "", async = false): string = - ## Lists all files in ``dir``. If ``dir`` is ``""``, uses the current - ## working directory. If ``async`` is true, this function will return - ## immediately and it will be your job to call asyncio's - ## ``poll`` to progress this operation. - ftp.createJob(getLines[T], JRetrText) - ftp.pasv() - - assertReply(ftp.send("LIST" & " " & dir.normalizePathSep), ["125", "150"]) - - if not async: - while not ftp.job.prc(ftp, false): discard - result = ftp.job.lines - ftp.deleteJob() - else: - return "" - -proc retrText*[T](ftp: FtpBase[T], file: string, async = false): string = - ## Retrieves ``file``. File must be ASCII text. - ## If ``async`` is true, this function will return immediately and - ## it will be your job to call asyncio's ``poll`` to progress this operation. - ftp.createJob(getLines[T], JRetrText) - ftp.pasv() - assertReply ftp.send("RETR " & file.normalizePathSep), ["125", "150"] - - if not async: - while not ftp.job.prc(ftp, false): discard - result = ftp.job.lines - ftp.deleteJob() - else: - return "" - -proc getFile[T](ftp: FtpBase[T], async = false): bool = - if ftp.dsockConnected: - var r = "".TaintedString - var bytesRead = 0 - var returned = false - if async: - when T is Socket: - raise newException(FTPError, "FTPClient must be async.") - else: - bytesRead = ftp.dsock.recvAsync(r, BufferSize) - returned = bytesRead != -1 - else: - bytesRead = ftp.dsock.recv(r, BufferSize) - returned = true - let r2 = r.string - if r2 != "": - ftp.job.progress.inc(r2.len) - ftp.job.oneSecond.inc(r2.len) - ftp.job.file.write(r2) - elif returned and r2 == "": - ftp.dsockConnected = false - - when T is Socket: - if not async: - var readSocks: seq[Socket] = @[ftp.csock] - blockingOperation(ftp.csock): - if readSocks.select(1) != 0 and ftp.csock in readSocks: - assertReply ftp.expectReply(), "226" - return true - -proc retrFile*[T](ftp: FtpBase[T], file, dest: string, async = false) = - ## Downloads ``file`` and saves it to ``dest``. Usage of this function - ## asynchronously is recommended to view the progress of the download. - ## The ``EvRetr`` event is passed to the specified ``handleEvent`` function - ## when the download is finished, and the ``filename`` field will be equal - ## to ``file``. - ftp.createJob(getFile[T], JRetr) - ftp.job.file = open(dest, mode = fmWrite) - ftp.pasv() - var reply = ftp.send("RETR " & file.normalizePathSep) - assertReply reply, ["125", "150"] - if {'(', ')'} notin reply.string: - raise newException(ReplyError, "Reply has no file size.") - var fileSize: BiggestInt - if reply.string.captureBetween('(', ')').parseBiggestInt(fileSize) == 0: - raise newException(ReplyError, "Reply has no file size.") - - ftp.job.total = fileSize - ftp.job.lastProgressReport = epochTime() - ftp.job.filename = file.normalizePathSep - - if not async: - while not ftp.job.prc(ftp, false): discard - ftp.deleteJob() - -proc doUpload[T](ftp: FtpBase[T], async = false): bool = - if ftp.dsockConnected: - if ftp.job.toStore.len() > 0: - assert(async) - let bytesSent = ftp.dsock.sendAsync(ftp.job.toStore) - if bytesSent == ftp.job.toStore.len: - ftp.job.toStore = "" - elif bytesSent != ftp.job.toStore.len and bytesSent != 0: - ftp.job.toStore = ftp.job.toStore[bytesSent .. ^1] - ftp.job.progress.inc(bytesSent) - ftp.job.oneSecond.inc(bytesSent) - else: - var s = newStringOfCap(4000) - var len = ftp.job.file.readBuffer(addr(s[0]), 4000) - setLen(s, len) - if len == 0: - # File finished uploading. - ftp.dsock.close() - ftp.dsockConnected = false - - if not async: - assertReply ftp.expectReply(), "226" - return true - return false - - if not async: - ftp.dsock.send(s) - else: - let bytesSent = ftp.dsock.sendAsync(s) - if bytesSent == 0: - ftp.job.toStore.add(s) - elif bytesSent != s.len: - ftp.job.toStore.add(s[bytesSent .. ^1]) - len = bytesSent - - ftp.job.progress.inc(len) - ftp.job.oneSecond.inc(len) - -proc store*[T](ftp: FtpBase[T], file, dest: string, async = false) = - ## Uploads ``file`` to ``dest`` on the remote FTP server. Usage of this - ## function asynchronously is recommended to view the progress of - ## the download. - ## The ``EvStore`` event is passed to the specified ``handleEvent`` function - ## when the upload is finished, and the ``filename`` field will be - ## equal to ``file``. - ftp.createJob(doUpload[T], JStore) - ftp.job.file = open(file) - ftp.job.total = ftp.job.file.getFileSize() - ftp.job.lastProgressReport = epochTime() - ftp.job.filename = file - ftp.pasv() - - assertReply ftp.send("STOR " & dest.normalizePathSep), ["125", "150"] - - if not async: - while not ftp.job.prc(ftp, false): discard - ftp.deleteJob() - -proc close*[T](ftp: FtpBase[T]) = - ## Terminates the connection to the server. - assertReply ftp.send("QUIT"), "221" - if ftp.jobInProgress: ftp.deleteJob() - ftp.csock.close() - ftp.dsock.close() - -proc csockHandleRead(s: AsyncSocket, ftp: AsyncFTPClient) = - if ftp.jobInProgress: - assertReply ftp.expectReply(), "226" # Make sure the transfer completed. - var r: FTPEvent - case ftp.job.typ - of JRetrText: - r.typ = EvLines - r.lines = ftp.job.lines - of JRetr: - r.typ = EvRetr - r.filename = ftp.job.filename - if ftp.job.progress != ftp.job.total: - raise newException(FTPError, "Didn't download full file.") - of JStore: - r.typ = EvStore - r.filename = ftp.job.filename - if ftp.job.progress != ftp.job.total: - raise newException(FTPError, "Didn't upload full file.") - ftp.deleteJob() - - ftp.handleEvent(ftp, r) - -proc asyncFTPClient*(address: string, port = Port(21), - user, pass = "", - handleEvent: proc (ftp: AsyncFTPClient, ev: FTPEvent) {.closure,gcsafe.} = - (proc (ftp: AsyncFTPClient, ev: FTPEvent) = discard)): AsyncFTPClient = - ## Create a ``AsyncFTPClient`` object. - ## - ## Use this if you want to use asyncio's dispatcher. - var dres: AsyncFtpClient - new(dres) - dres.user = user - dres.pass = pass - dres.address = address - dres.port = port - dres.dsockConnected = false - dres.handleEvent = handleEvent - dres.csock = asyncSocket() - dres.csock.handleRead = - proc (s: AsyncSocket) = - csockHandleRead(s, dres) - result = dres - -proc register*(d: Dispatcher, ftp: AsyncFTPClient): Delegate {.discardable.} = - ## Registers ``ftp`` with dispatcher ``d``. - ftp.disp = d - return ftp.disp.register(ftp.csock) - -when isMainModule: - proc main = - var d = newDispatcher() - let hev = - proc (ftp: AsyncFTPClient, event: FTPEvent) = - case event.typ - of EvStore: - echo("Upload finished!") - ftp.retrFile("payload.jpg", "payload2.jpg", async = true) - of EvTransferProgress: - var time: int64 = -1 - if event.speed != 0: - time = (event.bytesTotal - event.bytesFinished) div event.speed - echo(event.currentJob) - echo(event.speed div 1000, " kb/s. - ", - event.bytesFinished, "/", event.bytesTotal, - " - ", time, " seconds") - echo(d.len) - of EvRetr: - echo("Download finished!") - ftp.close() - echo d.len - else: assert(false) - var ftp = asyncFTPClient("example.com", user = "foo", pass = "bar", handleEvent = hev) - - d.register(ftp) - d.len.echo() - ftp.connect() - echo "connected" - ftp.store("payload.jpg", "payload.jpg", async = true) - d.len.echo() - echo "uploading..." - while true: - if not d.poll(): break - main() - -when isMainModule and false: - var ftp = ftpClient("example.com", user = "foo", pass = "bar") - ftp.connect() - echo ftp.pwd() - echo ftp.list() - echo("uploading") - ftp.store("payload.jpg", "payload.jpg", async = false) - - echo("Upload complete") - ftp.retrFile("payload.jpg", "payload2.jpg", async = false) - - echo("Download complete") - sleep(5000) - ftp.close() - sleep(200) diff --git a/lib/pure/future.nim b/lib/pure/future.nim deleted file mode 100644 index 661afd7b3..000000000 --- a/lib/pure/future.nim +++ /dev/null @@ -1,183 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2015 Dominik Picheta -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module implements experimental features which may soon be moved to -## the system module (or other more appropriate modules). - -import macros - -proc createProcType(p, b: NimNode): NimNode {.compileTime.} = - #echo treeRepr(p) - #echo treeRepr(b) - result = newNimNode(nnkProcTy) - var formalParams = newNimNode(nnkFormalParams) - - formalParams.add b - - case p.kind - of nnkPar: - for i in 0 .. <p.len: - let ident = p[i] - var identDefs = newNimNode(nnkIdentDefs) - case ident.kind - of nnkExprColonExpr: - identDefs.add ident[0] - identDefs.add ident[1] - of nnkIdent: - identDefs.add newIdentNode("i" & $i) - identDefs.add(ident) - else: - error("Incorrect type list in proc type declaration.") - identDefs.add newEmptyNode() - formalParams.add identDefs - of nnkIdent: - var identDefs = newNimNode(nnkIdentDefs) - identDefs.add newIdentNode("i0") - identDefs.add(p) - identDefs.add newEmptyNode() - formalParams.add identDefs - else: - error("Incorrect type list in proc type declaration.") - - result.add formalParams - result.add newEmptyNode() - #echo(treeRepr(result)) - #echo(result.toStrLit()) - -macro `=>`*(p, b: expr): expr {.immediate.} = - ## Syntax sugar for anonymous procedures. - ## - ## .. code-block:: nim - ## - ## proc passTwoAndTwo(f: (int, int) -> int): int = - ## f(2, 2) - ## - ## passTwoAndTwo((x, y) => x + y) # 4 - - #echo treeRepr(p) - #echo(treeRepr(b)) - var params: seq[NimNode] = @[newIdentNode("auto")] - - case p.kind - of nnkPar: - for c in children(p): - var identDefs = newNimNode(nnkIdentDefs) - case c.kind - of nnkExprColonExpr: - identDefs.add(c[0]) - identDefs.add(c[1]) - identDefs.add(newEmptyNode()) - of nnkIdent: - identDefs.add(c) - identDefs.add(newEmptyNode()) - identDefs.add(newEmptyNode()) - of nnkInfix: - if c[0].kind == nnkIdent and c[0].ident == !"->": - var procTy = createProcType(c[1], c[2]) - params[0] = procTy[0][0] - for i in 1 .. <procTy[0].len: - params.add(procTy[0][i]) - else: - error("Expected proc type (->) got (" & $c[0].ident & ").") - break - else: - echo treeRepr c - error("Incorrect procedure parameter list.") - params.add(identDefs) - of nnkIdent: - var identDefs = newNimNode(nnkIdentDefs) - identDefs.add(p) - identDefs.add(newEmptyNode()) - identDefs.add(newEmptyNode()) - params.add(identDefs) - of nnkInfix: - if p[0].kind == nnkIdent and p[0].ident == !"->": - var procTy = createProcType(p[1], p[2]) - params[0] = procTy[0][0] - for i in 1 .. <procTy[0].len: - params.add(procTy[0][i]) - else: - error("Expected proc type (->) got (" & $p[0].ident & ").") - else: - error("Incorrect procedure parameter list.") - result = newProc(params = params, body = b, procType = nnkLambda) - #echo(result.treeRepr) - #echo(result.toStrLit()) - #return result # TODO: Bug? - -macro `->`*(p, b: expr): expr {.immediate.} = - ## Syntax sugar for procedure types. - ## - ## .. code-block:: nim - ## - ## proc pass2(f: (float, float) -> float): float = - ## f(2, 2) - ## - ## # is the same as: - ## - ## proc pass2(f: proc (x, y: float): float): float = - ## f(2, 2) - - result = createProcType(p, b) - -type ListComprehension = object -var lc*: ListComprehension - -macro `[]`*(lc: ListComprehension, comp, typ: expr): expr = - ## List comprehension, returns a sequence. `comp` is the actual list - ## comprehension, for example ``x | (x <- 1..10, x mod 2 == 0)``. `typ` is - ## the type that will be stored inside the result seq. - ## - ## .. code-block:: nimrod - ## - ## echo lc[x | (x <- 1..10, x mod 2 == 0), int] - ## - ## const n = 20 - ## echo lc[(x,y,z) | (x <- 1..n, y <- x..n, z <- y..n, x*x + y*y == z*z), - ## tuple[a,b,c: int]] - - expectLen(comp, 3) - expectKind(comp, nnkInfix) - expectKind(comp[0], nnkIdent) - assert($comp[0].ident == "|") - - result = newCall( - newDotExpr( - newIdentNode("result"), - newIdentNode("add")), - comp[1]) - - for i in countdown(comp[2].len-1, 0): - let x = comp[2][i] - expectMinLen(x, 1) - if x[0].kind == nnkIdent and $x[0].ident == "<-": - expectLen(x, 3) - result = newNimNode(nnkForStmt).add(x[1], x[2], result) - else: - result = newIfStmt((x, result)) - - result = newNimNode(nnkCall).add( - newNimNode(nnkPar).add( - newNimNode(nnkLambda).add( - newEmptyNode(), - newEmptyNode(), - newEmptyNode(), - newNimNode(nnkFormalParams).add( - newNimNode(nnkBracketExpr).add( - newIdentNode("seq"), - typ)), - newEmptyNode(), - newEmptyNode(), - newStmtList( - newAssignment( - newIdentNode("result"), - newNimNode(nnkPrefix).add( - newIdentNode("@"), - newNimNode(nnkBracket))), - result)))) diff --git a/lib/pure/gentabs.nim b/lib/pure/gentabs.nim deleted file mode 100644 index a6128efc9..000000000 --- a/lib/pure/gentabs.nim +++ /dev/null @@ -1,193 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2012 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## The ``gentabs`` module implements an efficient hash table that is a -## key-value mapping. The keys are required to be strings, but the values -## may be any Nim or user defined type. This module supports matching -## of keys in case-sensitive, case-insensitive and style-insensitive modes. - -{.deprecated.} - -import - os, hashes, strutils - -type - TGenTableMode* = enum ## describes the table's key matching mode - modeCaseSensitive, ## case sensitive matching of keys - modeCaseInsensitive, ## case insensitive matching of keys - modeStyleInsensitive ## style sensitive matching of keys - - TGenKeyValuePair[T] = tuple[key: string, val: T] - TGenKeyValuePairSeq[T] = seq[TGenKeyValuePair[T]] - TGenTable*[T] = object of RootObj - counter: int - data: TGenKeyValuePairSeq[T] - mode: TGenTableMode - - PGenTable*[T] = ref TGenTable[T] ## use this type to declare hash tables - - -const - growthFactor = 2 - startSize = 64 - - -proc len*[T](tbl: PGenTable[T]): int {.inline.} = - ## returns the number of keys in `tbl`. - result = tbl.counter - -iterator pairs*[T](tbl: PGenTable[T]): tuple[key: string, value: T] = - ## iterates over any (key, value) pair in the table `tbl`. - for h in 0..high(tbl.data): - if not isNil(tbl.data[h].key): - yield (tbl.data[h].key, tbl.data[h].val) - -proc myhash[T](tbl: PGenTable[T], key: string): THash = - case tbl.mode - of modeCaseSensitive: result = hashes.hash(key) - of modeCaseInsensitive: result = hashes.hashIgnoreCase(key) - of modeStyleInsensitive: result = hashes.hashIgnoreStyle(key) - -proc myCmp[T](tbl: PGenTable[T], a, b: string): bool = - case tbl.mode - of modeCaseSensitive: result = cmp(a, b) == 0 - of modeCaseInsensitive: result = cmpIgnoreCase(a, b) == 0 - of modeStyleInsensitive: result = cmpIgnoreStyle(a, b) == 0 - -proc mustRehash(length, counter: int): bool = - assert(length > counter) - result = (length * 2 < counter * 3) or (length - counter < 4) - -proc newGenTable*[T](mode: TGenTableMode): PGenTable[T] = - ## creates a new generic hash table that is empty. - new(result) - result.mode = mode - result.counter = 0 - newSeq(result.data, startSize) - -proc nextTry(h, maxHash: THash): THash {.inline.} = - result = ((5 * h) + 1) and maxHash - -proc rawGet[T](tbl: PGenTable[T], key: string): int = - var h: THash - h = myhash(tbl, key) and high(tbl.data) # start with real hash value - while not isNil(tbl.data[h].key): - if myCmp(tbl, tbl.data[h].key, key): - return h - h = nextTry(h, high(tbl.data)) - result = - 1 - -proc rawInsert[T](tbl: PGenTable[T], data: var TGenKeyValuePairSeq[T], - key: string, val: T) = - var h: THash - h = myhash(tbl, key) and high(data) - while not isNil(data[h].key): - h = nextTry(h, high(data)) - data[h].key = key - data[h].val = val - -proc enlarge[T](tbl: PGenTable[T]) = - var n: TGenKeyValuePairSeq[T] - newSeq(n, len(tbl.data) * growthFactor) - for i in countup(0, high(tbl.data)): - if not isNil(tbl.data[i].key): - rawInsert[T](tbl, n, tbl.data[i].key, tbl.data[i].val) - swap(tbl.data, n) - -proc hasKey*[T](tbl: PGenTable[T], key: string): bool = - ## returns true iff `key` is in the table `tbl`. - result = rawGet(tbl, key) >= 0 - -proc `[]`*[T](tbl: PGenTable[T], key: string): T = - ## retrieves the value at ``tbl[key]``. If `key` is not in `tbl`, - ## default(T) is returned and no exception is raised. One can check - ## with ``hasKey`` whether the key exists. - var index = rawGet(tbl, key) - if index >= 0: result = tbl.data[index].val - -proc `[]=`*[T](tbl: PGenTable[T], key: string, val: T) = - ## puts a (key, value)-pair into `tbl`. - var index = rawGet(tbl, key) - if index >= 0: - tbl.data[index].val = val - else: - if mustRehash(len(tbl.data), tbl.counter): enlarge(tbl) - rawInsert(tbl, tbl.data, key, val) - inc(tbl.counter) - - -when isMainModule: - # - # Verify tables of integer values (string keys) - # - var x = newGenTable[int](modeCaseInsensitive) - x["one"] = 1 - x["two"] = 2 - x["three"] = 3 - x["four"] = 4 - x["five"] = 5 - assert(len(x) == 5) # length procedure works - assert(x["one"] == 1) # case-sensitive lookup works - assert(x["ONE"] == 1) # case-insensitive should work for this table - assert(x["one"]+x["two"] == 3) # make sure we're getting back ints - assert(x.hasKey("one")) # hasKey should return 'true' for a key - # of "one"... - assert(not x.hasKey("NOPE")) # ...but key "NOPE" is not in the table. - for k,v in pairs(x): # make sure the 'pairs' iterator works - assert(x[k]==v) - - # - # Verify a table of user-defined types - # - type - TMyType = tuple[first, second: string] # a pair of strings - - var y = newGenTable[TMyType](modeCaseInsensitive) # hash table where each - # value is TMyType tuple - - #var junk: TMyType = ("OK", "Here") - - #echo junk.first, " ", junk.second - - y["Hello"] = ("Hello", "World") - y["Goodbye"] = ("Goodbye", "Everyone") - #y["Hello"] = TMyType( ("Hello", "World") ) - #y["Goodbye"] = TMyType( ("Goodbye", "Everyone") ) - - assert( not isNil(y["Hello"].first) ) - assert( y["Hello"].first == "Hello" ) - assert( y["Hello"].second == "World" ) - - # - # Verify table of tables - # - var z: PGenTable[ PGenTable[int] ] # hash table where each value is - # a hash table of ints - - z = newGenTable[PGenTable[int]](modeCaseInsensitive) - z["first"] = newGenTable[int](modeCaseInsensitive) - z["first"]["one"] = 1 - z["first"]["two"] = 2 - z["first"]["three"] = 3 - - z["second"] = newGenTable[int](modeCaseInsensitive) - z["second"]["red"] = 10 - z["second"]["blue"] = 20 - - assert(len(z) == 2) # length of outer table - assert(len(z["first"]) == 3) # length of "first" table - assert(len(z["second"]) == 2) # length of "second" table - assert( z["first"]["one"] == 1) # retrieve from first inner table - assert( z["second"]["red"] == 10) # retrieve from second inner table - - for k,v in pairs(z): - echo( "$# ($#) ->" % [k,$len(v)] ) - #for k2,v2 in pairs(v): - # echo( " $# <-> $#" % [k2,$v2] ) - echo() diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim index a16342d44..1038d55a1 100644 --- a/lib/pure/hashes.nim +++ b/lib/pure/hashes.nim @@ -8,80 +8,216 @@ # ## This module implements efficient computations of hash values for diverse -## Nim types. All the procs are based on these two building blocks: the `!& -## proc <#!&>`_ used to start or mix a hash value, and the `!$ proc <#!$>`_ -## used to *finish* the hash value. If you want to implement hash procs for -## your custom types you will end up writing the following kind of skeleton of -## code: +## Nim types. All the procs are based on these two building blocks: +## - `!& proc <#!&,Hash,int>`_ used to start or mix a hash value, and +## - `!$ proc <#!$,Hash>`_ used to finish the hash value. ## -## .. code-block:: Nim -## proc hash(x: Something): THash = -## ## Computes a THash from `x`. -## var h: THash = 0 -## # Iterate over parts of `x`. -## for xAtom in x: -## # Mix the atom with the partial hash. -## h = h !& xAtom -## # Finish the hash. -## result = !$h +## If you want to implement hash procs for your custom types, +## you will end up writing the following kind of skeleton of code: + +runnableExamples: + type + Something = object + foo: int + bar: string + + iterator items(x: Something): Hash = + yield hash(x.foo) + yield hash(x.bar) + + proc hash(x: Something): Hash = + ## Computes a Hash from `x`. + var h: Hash = 0 + # Iterate over parts of `x`. + for xAtom in x: + # Mix the atom with the partial hash. + h = h !& xAtom + # Finish the hash. + result = !$h + +## If your custom types contain fields for which there already is a `hash` proc, +## you can simply hash together the hash values of the individual fields: + +runnableExamples: + type + Something = object + foo: int + bar: string + + proc hash(x: Something): Hash = + ## Computes a Hash from `x`. + var h: Hash = 0 + h = h !& hash(x.foo) + h = h !& hash(x.bar) + result = !$h + +## .. important:: Use `-d:nimPreviewHashRef` to +## enable hashing `ref`s. It is expected that this behavior +## becomes the new default in upcoming versions. ## -## If your custom types contain fields for which there already is a hash proc, -## like for example objects made up of ``strings``, you can simply hash -## together the hash value of the individual fields: +## .. note:: If the type has a `==` operator, the following must hold: +## If two values compare equal, their hashes must also be equal. ## -## .. code-block:: Nim -## proc hash(x: Something): THash = -## ## Computes a THash from `x`. -## var h: THash = 0 -## h = h !& hash(x.foo) -## h = h !& hash(x.bar) -## result = !$h - -import - strutils - -type - THash* = int ## a hash value; hash tables using these values should - ## always have a size of a power of two and can use the ``and`` - ## operator instead of ``mod`` for truncation of the hash value. - -proc `!&`*(h: THash, val: int): THash {.inline.} = - ## mixes a hash value `h` with `val` to produce a new hash value. This is - ## only needed if you need to implement a hash proc for a new datatype. - result = h +% val - result = result +% result shl 10 - result = result xor (result shr 6) - -proc `!$`*(h: THash): THash {.inline.} = - ## finishes the computation of the hash value. This is - ## only needed if you need to implement a hash proc for a new datatype. - result = h +% h shl 3 - result = result xor (result shr 11) - result = result +% result shl 15 - -proc hashData*(data: pointer, size: int): THash = - ## hashes an array of bytes of size `size` - var h: THash = 0 +## See also +## ======== +## * `md5 module <md5.html>`_ for the MD5 checksum algorithm +## * `base64 module <base64.html>`_ for a Base64 encoder and decoder +## * `sha1 module <sha1.html>`_ for the SHA-1 checksum algorithm +## * `tables module <tables.html>`_ for hash tables + +import std/private/since + +when defined(nimPreviewSlimSystem): + import std/assertions + + +type + Hash* = int ## A hash value. Hash tables using these values should + ## always have a size of a power of two so they can use the `and` + ## operator instead of `mod` for truncation of the hash value. + +proc `!&`*(h: Hash, val: int): Hash {.inline.} = + ## Mixes a hash value `h` with `val` to produce a new hash value. + ## + ## This is only needed if you need to implement a `hash` proc for a new datatype. + let h = cast[uint](h) + let val = cast[uint](val) + var res = h + val + res = res + res shl 10 + res = res xor (res shr 6) + result = cast[Hash](res) + +proc `!$`*(h: Hash): Hash {.inline.} = + ## Finishes the computation of the hash value. + ## + ## This is only needed if you need to implement a `hash` proc for a new datatype. + let h = cast[uint](h) # Hash is practically unsigned. + var res = h + h shl 3 + res = res xor (res shr 11) + res = res + res shl 15 + result = cast[Hash](res) + +proc hiXorLoFallback64(a, b: uint64): uint64 {.inline.} = + let # Fall back in 64-bit arithmetic + aH = a shr 32 + aL = a and 0xFFFFFFFF'u64 + bH = b shr 32 + bL = b and 0xFFFFFFFF'u64 + rHH = aH * bH + rHL = aH * bL + rLH = aL * bH + rLL = aL * bL + t = rLL + (rHL shl 32) + var c = if t < rLL: 1'u64 else: 0'u64 + let lo = t + (rLH shl 32) + c += (if lo < t: 1'u64 else: 0'u64) + let hi = rHH + (rHL shr 32) + (rLH shr 32) + c + return hi xor lo + +proc hiXorLo(a, b: uint64): uint64 {.inline.} = + # XOR of the high & low 8 bytes of the full 16 byte product. + when nimvm: + result = hiXorLoFallback64(a, b) # `result =` is necessary here. + else: + when Hash.sizeof < 8: + result = hiXorLoFallback64(a, b) + elif defined(gcc) or defined(llvm_gcc) or defined(clang): + {.emit: """__uint128_t r = `a`; r *= `b`; `result` = (r >> 64) ^ r;""".} + elif defined(windows) and not defined(tcc): + proc umul128(a, b: uint64, c: ptr uint64): uint64 {.importc: "_umul128", header: "intrin.h".} + var b = b + let c = umul128(a, b, addr b) + result = c xor b + else: + result = hiXorLoFallback64(a, b) + +when defined(js): + import std/jsbigints + import std/private/jsutils + + proc hiXorLoJs(a, b: JsBigInt): JsBigInt = + let + prod = a * b + mask = big"0xffffffffffffffff" # (big"1" shl big"64") - big"1" + result = (prod shr big"64") xor (prod and mask) + + template hashWangYiJS(x: JsBigInt): Hash = + let + P0 = big"0xa0761d6478bd642f" + P1 = big"0xe7037ed1a0b428db" + P58 = big"0xeb44accab455d16d" # big"0xeb44accab455d165" xor big"8" + res = hiXorLoJs(hiXorLoJs(P0, x xor P1), P58) + cast[Hash](toNumber(wrapToInt(res, 32))) + + template toBits(num: float): JsBigInt = + let + x = newArrayBuffer(8) + y = newFloat64Array(x) + if hasBigUint64Array(): + let z = newBigUint64Array(x) + y[0] = num + z[0] + else: + let z = newUint32Array(x) + y[0] = num + big(z[0]) + big(z[1]) shl big(32) + +proc hashWangYi1*(x: int64|uint64|Hash): Hash {.inline.} = + ## Wang Yi's hash_v1 for 64-bit ints (see https://github.com/rurban/smhasher for + ## more details). This passed all scrambling tests in Spring 2019 and is simple. + ## + ## **Note:** It's ok to define `proc(x: int16): Hash = hashWangYi1(Hash(x))`. + const P0 = 0xa0761d6478bd642f'u64 + const P1 = 0xe7037ed1a0b428db'u64 + const P58 = 0xeb44accab455d165'u64 xor 8'u64 + template h(x): untyped = hiXorLo(hiXorLo(P0, uint64(x) xor P1), P58) + when nimvm: + when defined(js): # Nim int64<->JS Number & VM match => JS gets 32-bit hash + result = cast[Hash](h(x)) and cast[Hash](0xFFFFFFFF) + else: + result = cast[Hash](h(x)) + else: + when defined(js): + if hasJsBigInt(): + result = hashWangYiJS(big(x)) + else: + result = cast[Hash](x) and cast[Hash](0xFFFFFFFF) + else: + result = cast[Hash](h(x)) + +proc hashData*(data: pointer, size: int): Hash = + ## Hashes an array of bytes of size `size`. + var h: Hash = 0 when defined(js): var p: cstring - asm """`p` = `Data`;""" + {.emit: """`p` = `Data`;""".} else: var p = cast[cstring](data) var i = 0 var s = size - while s > 0: + while s > 0: h = h !& ord(p[i]) inc(i) dec(s) result = !$h +proc hashIdentity*[T: Ordinal|enum](x: T): Hash {.inline, since: (1, 3).} = + ## The identity hash, i.e. `hashIdentity(x) = x`. + cast[Hash](ord(x)) + +when defined(nimIntHash1): + proc hash*[T: Ordinal|enum](x: T): Hash {.inline.} = + ## Efficient hashing of integers. + cast[Hash](ord(x)) +else: + proc hash*[T: Ordinal|enum](x: T): Hash {.inline.} = + ## Efficient hashing of integers. + hashWangYi1(uint64(ord(x))) + when defined(js): var objectID = 0 - -proc hash*(x: pointer): THash {.inline.} = - ## efficient hashing of pointers - when defined(js): - asm """ + proc getObjectId(x: pointer): int = + {.emit: """ if (typeof `x` == "object") { if ("_NimID" in `x`) `result` = `x`["_NimID"]; @@ -90,73 +226,554 @@ proc hash*(x: pointer): THash {.inline.} = `x`["_NimID"] = `result`; } } - """ + """.} + +proc hash*(x: pointer): Hash {.inline.} = + ## Efficient `hash` overload. + when defined(js): + let y = getObjectId(x) + else: + let y = cast[int](x) + hash(y) # consistent with code expecting scrambled hashes depending on `nimIntHash1`. + +proc hash*[T](x: ptr[T]): Hash {.inline.} = + ## Efficient `hash` overload. + runnableExamples: + var a: array[10, uint8] + assert a[0].addr.hash != a[1].addr.hash + assert cast[pointer](a[0].addr).hash == a[0].addr.hash + hash(cast[pointer](x)) + +when defined(nimPreviewHashRef) or defined(nimdoc): + proc hash*[T](x: ref[T]): Hash {.inline.} = + ## Efficient `hash` overload. + ## + ## .. important:: Use `-d:nimPreviewHashRef` to + ## enable hashing `ref`s. It is expected that this behavior + ## becomes the new default in upcoming versions. + runnableExamples("-d:nimPreviewHashRef"): + type A = ref object + x: int + let a = A(x: 3) + let ha = a.hash + assert ha != A(x: 3).hash # A(x: 3) is a different ref object from `a`. + a.x = 4 + assert ha == a.hash # the hash only depends on the address + runnableExamples("-d:nimPreviewHashRef"): + # you can overload `hash` if you want to customize semantics + type A[T] = ref object + x, y: T + proc hash(a: A): Hash = hash(a.x) + assert A[int](x: 3, y: 4).hash == A[int](x: 3, y: 5).hash + # xxx pending bug #17733, merge as `proc hash*(pointer | ref | ptr): Hash` + # or `proc hash*[T: ref | ptr](x: T): Hash` + hash(cast[pointer](x)) + +proc hash*(x: float): Hash {.inline.} = + ## Efficient hashing of floats. + let y = x + 0.0 # for denormalization + when nimvm: + # workaround a JS VM bug: bug #16547 + result = hashWangYi1(cast[int64](float64(y))) else: - result = (cast[THash](x)) shr 3 # skip the alignment - -when not defined(booting): - proc hash*[T: proc](x: T): THash {.inline.} = - ## efficient hashing of proc vars; closures are supported too. - when T is "closure": - result = hash(rawProc(x)) !& hash(rawEnv(x)) + when not defined(js): + result = hashWangYi1(cast[Hash](y)) + else: + result = hashWangYiJS(toBits(y)) + +# Forward declarations before methods that hash containers. This allows +# containers to contain other containers +proc hash*[A](x: openArray[A]): Hash +proc hash*[A](x: set[A]): Hash + + +when defined(js): + proc imul(a, b: uint32): uint32 = + # https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/imul + let mask = 0xffff'u32 + var + aHi = (a shr 16) and mask + aLo = a and mask + bHi = (b shr 16) and mask + bLo = b and mask + result = (aLo * bLo) + (aHi * bLo + aLo * bHi) shl 16 +else: + template imul(a, b: uint32): untyped = a * b + +proc rotl32(x: uint32, r: int): uint32 {.inline.} = + (x shl r) or (x shr (32 - r)) + +proc murmurHash(x: openArray[byte]): Hash = + # https://github.com/PeterScott/murmur3/blob/master/murmur3.c + const + c1 = 0xcc9e2d51'u32 + c2 = 0x1b873593'u32 + n1 = 0xe6546b64'u32 + m1 = 0x85ebca6b'u32 + m2 = 0xc2b2ae35'u32 + let + size = len(x) + stepSize = 4 # 32-bit + n = size div stepSize + var + h1: uint32 + i = 0 + + + template impl = + var j = stepSize + while j > 0: + dec j + k1 = (k1 shl 8) or (ord(x[i+j])).uint32 + + # body + while i < n * stepSize: + var k1: uint32 + + when nimvm: + impl() else: - result = hash(pointer(x)) - -proc hash*(x: int): THash {.inline.} = - ## efficient hashing of integers - result = x - -proc hash*(x: int64): THash {.inline.} = - ## efficient hashing of integers - result = toU32(x) - -proc hash*(x: char): THash {.inline.} = - ## efficient hashing of characters - result = ord(x) - -proc hash*(x: string): THash = - ## efficient hashing of strings - var h: THash = 0 - for i in 0..x.len-1: - h = h !& ord(x[i]) + when declared(copyMem): + copyMem(addr k1, addr x[i], 4) + else: + impl() + inc i, stepSize + + k1 = imul(k1, c1) + k1 = rotl32(k1, 15) + k1 = imul(k1, c2) + + h1 = h1 xor k1 + h1 = rotl32(h1, 13) + h1 = h1*5 + n1 + + # tail + var k1: uint32 + var rem = size mod stepSize + while rem > 0: + dec rem + k1 = (k1 shl 8) or (ord(x[i+rem])).uint32 + k1 = imul(k1, c1) + k1 = rotl32(k1, 15) + k1 = imul(k1, c2) + h1 = h1 xor k1 + + # finalization + h1 = h1 xor size.uint32 + h1 = h1 xor (h1 shr 16) + h1 = imul(h1, m1) + h1 = h1 xor (h1 shr 13) + h1 = imul(h1, m2) + h1 = h1 xor (h1 shr 16) + return cast[Hash](h1) + +proc hashVmImpl(x: cstring, sPos, ePos: int): Hash = + raiseAssert "implementation override in compiler/vmops.nim" + +proc hashVmImpl(x: string, sPos, ePos: int): Hash = + raiseAssert "implementation override in compiler/vmops.nim" + +proc hashVmImplChar(x: openArray[char], sPos, ePos: int): Hash = + raiseAssert "implementation override in compiler/vmops.nim" + +proc hashVmImplByte(x: openArray[byte], sPos, ePos: int): Hash = + raiseAssert "implementation override in compiler/vmops.nim" + +const k0 = 0xc3a5c85c97cb3127u64 # Primes on (2^63, 2^64) for various uses +const k1 = 0xb492b66fbe98f273u64 +const k2 = 0x9ae16a3b2f90404fu64 + +proc load4e(s: openArray[byte], o=0): uint32 {.inline.} = + uint32(s[o + 3]) shl 24 or uint32(s[o + 2]) shl 16 or + uint32(s[o + 1]) shl 8 or uint32(s[o + 0]) + +proc load8e(s: openArray[byte], o=0): uint64 {.inline.} = + uint64(s[o + 7]) shl 56 or uint64(s[o + 6]) shl 48 or + uint64(s[o + 5]) shl 40 or uint64(s[o + 4]) shl 32 or + uint64(s[o + 3]) shl 24 or uint64(s[o + 2]) shl 16 or + uint64(s[o + 1]) shl 8 or uint64(s[o + 0]) + +proc load4(s: openArray[byte], o=0): uint32 {.inline.} = + when nimvm: result = load4e(s, o) + else: + when declared copyMem: copyMem result.addr, s[o].addr, result.sizeof + else: result = load4e(s, o) + +proc load8(s: openArray[byte], o=0): uint64 {.inline.} = + when nimvm: result = load8e(s, o) + else: + when declared copyMem: copyMem result.addr, s[o].addr, result.sizeof + else: result = load8e(s, o) + +proc lenU(s: openArray[byte]): uint64 {.inline.} = s.len.uint64 + +proc shiftMix(v: uint64): uint64 {.inline.} = v xor (v shr 47) + +proc rotR(v: uint64; bits: cint): uint64 {.inline.} = + (v shr bits) or (v shl (64 - bits)) + +proc len16(u: uint64; v: uint64; mul: uint64): uint64 {.inline.} = + var a = (u xor v)*mul + a = a xor (a shr 47) + var b = (v xor a)*mul + b = b xor (b shr 47) + b*mul + +proc len0_16(s: openArray[byte]): uint64 {.inline.} = + if s.len >= 8: + let mul = k2 + 2*s.lenU + let a = load8(s) + k2 + let b = load8(s, s.len - 8) + let c = rotR(b, 37)*mul + a + let d = (rotR(a, 25) + b)*mul + len16 c, d, mul + elif s.len >= 4: + let mul = k2 + 2*s.lenU + let a = load4(s).uint64 + len16 s.lenU + (a shl 3), load4(s, s.len - 4), mul + elif s.len > 0: + let a = uint32(s[0]) + let b = uint32(s[s.len shr 1]) + let c = uint32(s[s.len - 1]) + let y = a + (b shl 8) + let z = s.lenU + (c shl 2) + shiftMix(y*k2 xor z*k0)*k2 + else: k2 # s.len == 0 + +proc len17_32(s: openArray[byte]): uint64 {.inline.} = + let mul = k2 + 2*s.lenU + let a = load8(s)*k1 + let b = load8(s, 8) + let c = load8(s, s.len - 8)*mul + let d = load8(s, s.len - 16)*k2 + len16 rotR(a + b, 43) + rotR(c, 30) + d, a + rotR(b + k2, 18) + c, mul + +proc len33_64(s: openArray[byte]): uint64 {.inline.} = + let mul = k2 + 2*s.lenU + let a = load8(s)*k2 + let b = load8(s, 8) + let c = load8(s, s.len - 8)*mul + let d = load8(s, s.len - 16)*k2 + let y = rotR(a + b, 43) + rotR(c, 30) + d + let z = len16(y, a + rotR(b + k2, 18) + c, mul) + let e = load8(s, 16)*mul + let f = load8(s, 24) + let g = (y + load8(s, s.len - 32))*mul + let h = (z + load8(s, s.len - 24))*mul + len16 rotR(e + f, 43) + rotR(g, 30) + h, e + rotR(f + a, 18) + g, mul + +type Pair = tuple[first, second: uint64] + +proc weakLen32withSeeds2(w, x, y, z, a, b: uint64): Pair {.inline.} = + var a = a + w + var b = rotR(b + a + z, 21) + let c = a + a += x + a += y + b += rotR(a, 44) + result[0] = a + z + result[1] = b + c + +proc weakLen32withSeeds(s: openArray[byte]; o: int; a,b: uint64): Pair {.inline.} = + weakLen32withSeeds2 load8(s, o ), load8(s, o + 8), + load8(s, o + 16), load8(s, o + 24), a, b + +proc hashFarm(s: openArray[byte]): uint64 {.inline.} = + if s.len <= 16: return len0_16(s) + if s.len <= 32: return len17_32(s) + if s.len <= 64: return len33_64(s) + const seed = 81u64 # not const to use input `h` + var + o = 0 # s[] ptr arith -> variable origin variable `o` + x = seed + y = seed*k1 + 113 + z = shiftMix(y*k2 + 113)*k2 + v, w: Pair + x = x*k2 + load8(s) + let eos = ((s.len - 1) div 64)*64 + let last64 = eos + ((s.len - 1) and 63) - 63 + while true: + x = rotR(x + y + v[0] + load8(s, o+8), 37)*k1 + y = rotR(y + v[1] + load8(s, o+48), 42)*k1 + x = x xor w[1] + y += v[0] + load8(s, o+40) + z = rotR(z + w[0], 33)*k1 + v = weakLen32withSeeds(s, o+0 , v[1]*k1, x + w[0]) + w = weakLen32withSeeds(s, o+32, z + w[1], y + load8(s, o+16)) + swap z, x + inc o, 64 + if o == eos: break + let mul = k1 + ((z and 0xff) shl 1) + o = last64 + w[0] += (s.lenU - 1) and 63 + v[0] += w[0] + w[0] += v[0] + x = rotR(x + y + v[0] + load8(s, o+8), 37)*mul + y = rotR(y + v[1] + load8(s, o+48), 42)*mul + x = x xor w[1]*9 + y += v[0]*9 + load8(s, o+40) + z = rotR(z + w[0], 33)*mul + v = weakLen32withSeeds(s, o+0 , v[1]*mul, x + w[0]) + w = weakLen32withSeeds(s, o+32, z + w[1], y + load8(s, o+16)) + swap z, x + len16 len16(v[0],w[0],mul) + shiftMix(y)*k0 + z, len16(v[1],w[1],mul) + x, mul + +template jsNoInt64: untyped = + when defined js: + when compiles(compileOption("jsbigint64")): + when not compileOption("jsbigint64"): true + else: false + else: false + else: false +const sHash2 = (when defined(nimStringHash2) or jsNoInt64(): true else: false) + +template maybeFailJS_Number = + when jsNoInt64() and not defined(nimStringHash2): + {.error: "Must use `-d:nimStringHash2` when using `--jsbigint64:off`".} + +proc hash*(x: string): Hash = + ## Efficient hashing of strings. + ## + ## **See also:** + ## * `hashIgnoreStyle <#hashIgnoreStyle,string>`_ + ## * `hashIgnoreCase <#hashIgnoreCase,string>`_ + runnableExamples: + doAssert hash("abracadabra") != hash("AbracadabrA") + maybeFailJS_Number() + when not sHash2: + result = cast[Hash](hashFarm(toOpenArrayByte(x, 0, x.high))) + else: + #when nimvm: + # result = hashVmImpl(x, 0, high(x)) + when true: + result = murmurHash(toOpenArrayByte(x, 0, high(x))) + +proc hash*(x: cstring): Hash = + ## Efficient hashing of null-terminated strings. + runnableExamples: + doAssert hash(cstring"abracadabra") == hash("abracadabra") + doAssert hash(cstring"AbracadabrA") == hash("AbracadabrA") + doAssert hash(cstring"abracadabra") != hash(cstring"AbracadabrA") + + maybeFailJS_Number() + when not sHash2: + when defined js: + let xx = $x + result = cast[Hash](hashFarm(toOpenArrayByte(xx, 0, xx.high))) + else: + result = cast[Hash](hashFarm(toOpenArrayByte(x, 0, x.high))) + else: + #when nimvm: + # result = hashVmImpl(x, 0, high(x)) + when true: + when not defined(js): + result = murmurHash(toOpenArrayByte(x, 0, x.high)) + else: + let xx = $x + result = murmurHash(toOpenArrayByte(xx, 0, high(xx))) + +proc hash*(sBuf: string, sPos, ePos: int): Hash = + ## Efficient hashing of a string buffer, from starting + ## position `sPos` to ending position `ePos` (included). + ## + ## `hash(myStr, 0, myStr.high)` is equivalent to `hash(myStr)`. + runnableExamples: + var a = "abracadabra" + doAssert hash(a, 0, 3) == hash(a, 7, 10) + + maybeFailJS_Number() + when not sHash2: + result = cast[Hash](hashFarm(toOpenArrayByte(sBuf, sPos, ePos))) + else: + murmurHash(toOpenArrayByte(sBuf, sPos, ePos)) + +proc hashIgnoreStyle*(x: string): Hash = + ## Efficient hashing of strings; style is ignored. + ## + ## **Note:** This uses a different hashing algorithm than `hash(string)`. + ## + ## **See also:** + ## * `hashIgnoreCase <#hashIgnoreCase,string>`_ + runnableExamples: + doAssert hashIgnoreStyle("aBr_aCa_dAB_ra") == hashIgnoreStyle("abracadabra") + doAssert hashIgnoreStyle("abcdefghi") != hash("abcdefghi") + + var h: Hash = 0 + var i = 0 + let xLen = x.len + while i < xLen: + var c = x[i] + if c == '_': + inc(i) + else: + if c in {'A'..'Z'}: + c = chr(ord(c) + (ord('a') - ord('A'))) # toLower() + h = h !& ord(c) + inc(i) result = !$h - -proc hashIgnoreStyle*(x: string): THash = - ## efficient hashing of strings; style is ignored - var h: THash = 0 - for i in 0..x.len-1: + +proc hashIgnoreStyle*(sBuf: string, sPos, ePos: int): Hash = + ## Efficient hashing of a string buffer, from starting + ## position `sPos` to ending position `ePos` (included); style is ignored. + ## + ## **Note:** This uses a different hashing algorithm than `hash(string)`. + ## + ## `hashIgnoreStyle(myBuf, 0, myBuf.high)` is equivalent + ## to `hashIgnoreStyle(myBuf)`. + runnableExamples: + var a = "ABracada_b_r_a" + doAssert hashIgnoreStyle(a, 0, 3) == hashIgnoreStyle(a, 7, a.high) + + var h: Hash = 0 + var i = sPos + while i <= ePos: + var c = sBuf[i] + if c == '_': + inc(i) + else: + if c in {'A'..'Z'}: + c = chr(ord(c) + (ord('a') - ord('A'))) # toLower() + h = h !& ord(c) + inc(i) + result = !$h + +proc hashIgnoreCase*(x: string): Hash = + ## Efficient hashing of strings; case is ignored. + ## + ## **Note:** This uses a different hashing algorithm than `hash(string)`. + ## + ## **See also:** + ## * `hashIgnoreStyle <#hashIgnoreStyle,string>`_ + runnableExamples: + doAssert hashIgnoreCase("ABRAcaDABRA") == hashIgnoreCase("abRACAdabra") + doAssert hashIgnoreCase("abcdefghi") != hash("abcdefghi") + + var h: Hash = 0 + for i in 0..x.len-1: var c = x[i] - if c == '_': - continue # skip _ - if c in {'A'..'Z'}: + if c in {'A'..'Z'}: c = chr(ord(c) + (ord('a') - ord('A'))) # toLower() h = h !& ord(c) result = !$h -proc hashIgnoreCase*(x: string): THash = - ## efficient hashing of strings; case is ignored - var h: THash = 0 - for i in 0..x.len-1: - var c = x[i] - if c in {'A'..'Z'}: +proc hashIgnoreCase*(sBuf: string, sPos, ePos: int): Hash = + ## Efficient hashing of a string buffer, from starting + ## position `sPos` to ending position `ePos` (included); case is ignored. + ## + ## **Note:** This uses a different hashing algorithm than `hash(string)`. + ## + ## `hashIgnoreCase(myBuf, 0, myBuf.high)` is equivalent + ## to `hashIgnoreCase(myBuf)`. + runnableExamples: + var a = "ABracadabRA" + doAssert hashIgnoreCase(a, 0, 3) == hashIgnoreCase(a, 7, 10) + + var h: Hash = 0 + for i in sPos..ePos: + var c = sBuf[i] + if c in {'A'..'Z'}: c = chr(ord(c) + (ord('a') - ord('A'))) # toLower() h = h !& ord(c) result = !$h - -proc hash*[T: tuple](x: T): THash = - ## efficient hashing of tuples. - for f in fields(x): - result = result !& hash(f) - result = !$result -proc hash*(x: float): THash {.inline.} = - var y = x + 1.0 - result = cast[ptr THash](addr(y))[] +proc hash*[T: tuple | object | proc | iterator {.closure.}](x: T): Hash = + ## Efficient `hash` overload. + runnableExamples: + # for `tuple|object`, `hash` must be defined for each component of `x`. + type Obj = object + x: int + y: string + type Obj2[T] = object + x: int + y: string + assert hash(Obj(x: 520, y: "Nim")) != hash(Obj(x: 520, y: "Nim2")) + # you can define custom hashes for objects (even if they're generic): + proc hash(a: Obj2): Hash = hash((a.x)) + assert hash(Obj2[float](x: 520, y: "Nim")) == hash(Obj2[float](x: 520, y: "Nim2")) + runnableExamples: + # proc + proc fn1() = discard + const fn1b = fn1 + assert hash(fn1b) == hash(fn1) -proc hash*[A](x: openArray[A]): THash = - for it in items(x): result = result !& hash(it) - result = !$result + # closure + proc outer = + var a = 0 + proc fn2() = a.inc + assert fn2 is "closure" + let fn2b = fn2 + assert hash(fn2b) == hash(fn2) + assert hash(fn2) != hash(fn1) + outer() + + when T is "closure": + result = hash((rawProc(x), rawEnv(x))) + elif T is (proc): + result = hash(cast[pointer](x)) + else: + result = 0 + for f in fields(x): + result = result !& hash(f) + result = !$result + +proc hash*[A](x: openArray[A]): Hash = + ## Efficient hashing of arrays and sequences. + ## There must be a `hash` proc defined for the element type `A`. + when A is byte: + when not sHash2: + result = cast[Hash](hashFarm(x)) + else: + result = murmurHash(x) + elif A is char: + when not sHash2: + result = cast[Hash](hashFarm(toOpenArrayByte(x, 0, x.high))) + else: + #when nimvm: + # result = hashVmImplChar(x, 0, x.high) + when true: + result = murmurHash(toOpenArrayByte(x, 0, x.high)) + else: + result = 0 + for a in x: + result = result !& hash(a) + result = !$result + +proc hash*[A](aBuf: openArray[A], sPos, ePos: int): Hash = + ## Efficient hashing of portions of arrays and sequences, from starting + ## position `sPos` to ending position `ePos` (included). + ## There must be a `hash` proc defined for the element type `A`. + ## + ## `hash(myBuf, 0, myBuf.high)` is equivalent to `hash(myBuf)`. + runnableExamples: + let a = [1, 2, 5, 1, 2, 6] + doAssert hash(a, 0, 1) == hash(a, 3, 4) + when A is byte: + maybeFailJS_Number() + when not sHash2: + result = cast[Hash](hashFarm(toOpenArray(aBuf, sPos, ePos))) + else: + #when nimvm: + # result = hashVmImplByte(aBuf, sPos, ePos) + when true: + result = murmurHash(toOpenArray(aBuf, sPos, ePos)) + elif A is char: + maybeFailJS_Number() + when not sHash2: + result = cast[Hash](hashFarm(toOpenArrayByte(aBuf, sPos, ePos))) + else: + #when nimvm: + # result = hashVmImplChar(aBuf, sPos, ePos) + when true: + result = murmurHash(toOpenArrayByte(aBuf, sPos, ePos)) + else: + for i in sPos .. ePos: + result = result !& hash(aBuf[i]) + result = !$result -proc hash*[A](x: set[A]): THash = - for it in items(x): result = result !& hash(it) +proc hash*[A](x: set[A]): Hash = + ## Efficient hashing of sets. + ## There must be a `hash` proc defined for the element type `A`. + result = 0 + for it in items(x): + result = result !& hash(it) result = !$result diff --git a/lib/pure/htmlgen.nim b/lib/pure/htmlgen.nim index d712e53f3..fafa72463 100644 --- a/lib/pure/htmlgen.nim +++ b/lib/pure/htmlgen.nim @@ -7,43 +7,65 @@ # distribution, for details about the copyright. # -## **Warning**: This module uses ``immediate`` macros which are known to -## cause problems. Do yourself a favor and import the module -## as ``from htmlgen import nil`` and then fully qualify the macros. +## Do yourself a favor and import the module +## as `from std/htmlgen import nil` and then fully qualify the macros. +## +## *Note*: The Karax project (`nimble install karax`) has a better +## way to achieve the same, see https://github.com/pragmagic/karax/blob/master/tests/nativehtmlgen.nim +## for an example. ## ## ## This module implements a simple `XML`:idx: and `HTML`:idx: code ## generator. Each commonly used HTML tag has a corresponding macro ## that generates a string with its HTML representation. ## -## Example: +## MathML +## ====== +## +## `MathML <https://wikipedia.org/wiki/MathML>`_ is supported, MathML is part of HTML5. +## `MathML <https://wikipedia.org/wiki/MathML>`_ is an Standard ISO/IEC 40314 from year 2015. +## MathML allows you to `draw advanced math on the web <https://developer.mozilla.org/en-US/docs/Web/MathML/Element/math#Examples>`_, +## `visually similar to Latex math. <https://developer.mozilla.org/en-US/docs/Web/MathML/Element/semantics#Example>`_ ## -## .. code-block:: Nim +## Examples +## ======== +## +## ```Nim ## var nim = "Nim" -## echo h1(a(href="http://nim-lang.org", nim)) +## echo h1(a(href="https://nim-lang.org", nim)) +## ``` ## -## Writes the string:: +## Writes the string: ## -## <h1><a href="http://nim-lang.org">Nim</a></h1> +## <h1><a href="https://nim-lang.org">Nim</a></h1> ## import - macros, strutils + std/[macros, strutils] const - coreAttr* = " id class title style " - eventAttr* = " onclick ondblclick onmousedown onmouseup " & - "onmouseover onmousemove onmouseout onkeypress onkeydown onkeyup " - commonAttr* = coreAttr & eventAttr - -proc getIdent(e: NimNode): string {.compileTime.} = + coreAttr* = " accesskey class contenteditable dir hidden id lang " & + "spellcheck style tabindex title translate " ## HTML DOM Core Attributes + eventAttr* = "onabort onblur oncancel oncanplay oncanplaythrough onchange " & + "onclick oncuechange ondblclick ondurationchange onemptied onended " & + "onerror onfocus oninput oninvalid onkeydown onkeypress onkeyup onload " & + "onloadeddata onloadedmetadata onloadstart onmousedown onmouseenter " & + "onmouseleave onmousemove onmouseout onmouseover onmouseup onmousewheel " & + "onpause onplay onplaying onprogress onratechange onreset onresize " & + "onscroll onseeked onseeking onselect onshow onstalled onsubmit " & + "onsuspend ontimeupdate ontoggle onvolumechange onwaiting " ## HTML DOM Event Attributes + ariaAttr* = " role " ## HTML DOM Aria Attributes + commonAttr* = coreAttr & eventAttr & ariaAttr ## HTML DOM Common Attributes + +proc getIdent(e: NimNode): string = case e.kind - of nnkIdent: result = normalize($e.ident) + of nnkIdent: + result = e.strVal.normalize of nnkAccQuoted: result = getIdent(e[0]) for i in 1 .. e.len-1: result.add getIdent(e[i]) - else: error("cannot extract identifier from node: " & toStrLit(e).strVal) + else: error("cannot extract identifier from node: " & toStrLit(e).strVal, e) proc delete[T](s: var seq[T], attr: T): bool = var idx = find(s, attr) @@ -53,437 +75,738 @@ proc delete[T](s: var seq[T], attr: T): bool = setLen(s, L-1) result = true -proc xmlCheckedTag*(e: NimNode, tag: string, optAttr = "", reqAttr = "", - isLeaf = false): NimNode {.compileTime.} = +proc xmlCheckedTag*(argsList: NimNode, tag: string, optAttr = "", reqAttr = "", + isLeaf = false): NimNode = ## use this procedure to define a new XML tag # copy the attributes; when iterating over them these lists # will be modified, so that each attribute is only given one value - var req = split(reqAttr) - var opt = split(optAttr) - result = newNimNode(nnkBracket, e) + var req = splitWhitespace(reqAttr) + var opt = splitWhitespace(optAttr) + result = newNimNode(nnkBracket) result.add(newStrLitNode("<")) result.add(newStrLitNode(tag)) # first pass over attributes: - for i in 1..e.len-1: - if e[i].kind == nnkExprEqExpr: - var name = getIdent(e[i][0]) - if delete(req, name) or delete(opt, name): + for i in 0 ..< argsList.len: + if argsList[i].kind == nnkExprEqExpr: + var name = getIdent(argsList[i][0]) + if name.startsWith("data-") or delete(req, name) or delete(opt, name): result.add(newStrLitNode(" ")) result.add(newStrLitNode(name)) result.add(newStrLitNode("=\"")) - result.add(e[i][1]) + result.add(argsList[i][1]) result.add(newStrLitNode("\"")) else: - error("invalid attribute for '" & tag & "' element: " & name) + error("invalid attribute for '" & tag & "' element: " & name, argsList[i]) # check each required attribute exists: if req.len > 0: - error(req[0] & " attribute for '" & tag & "' element expected") + error(req[0] & " attribute for '" & tag & "' element expected", argsList) if isLeaf: - for i in 1..e.len-1: - if e[i].kind != nnkExprEqExpr: - error("element " & tag & " cannot be nested") + for i in 0 ..< argsList.len: + if argsList[i].kind != nnkExprEqExpr: + error("element " & tag & " cannot be nested", argsList[i]) result.add(newStrLitNode(" />")) else: result.add(newStrLitNode(">")) # second pass over elements: - for i in 1..e.len-1: - if e[i].kind != nnkExprEqExpr: result.add(e[i]) + for i in 0 ..< argsList.len: + if argsList[i].kind != nnkExprEqExpr: result.add(argsList[i]) result.add(newStrLitNode("</")) result.add(newStrLitNode(tag)) result.add(newStrLitNode(">")) - result = nestList(!"&", result) + result = nestList(ident"&", result) +macro a*(e: varargs[untyped]): untyped = + ## Generates the HTML `a` element. + result = xmlCheckedTag(e, "a", "href target download rel hreflang type " & + commonAttr) -macro a*(e: expr): expr {.immediate.} = - ## generates the HTML ``a`` element. - let e = callsite() - result = xmlCheckedTag(e, "a", "href charset type hreflang rel rev " & - "accesskey tabindex" & commonAttr) - -macro acronym*(e: expr): expr {.immediate.} = - ## generates the HTML ``acronym`` element. - let e = callsite() - result = xmlCheckedTag(e, "acronym", commonAttr) +macro abbr*(e: varargs[untyped]): untyped = + ## Generates the HTML `abbr` element. + result = xmlCheckedTag(e, "abbr", commonAttr) -macro address*(e: expr): expr {.immediate.} = - ## generates the HTML ``address`` element. - let e = callsite() +macro address*(e: varargs[untyped]): untyped = + ## Generates the HTML `address` element. result = xmlCheckedTag(e, "address", commonAttr) -macro area*(e: expr): expr {.immediate.} = - ## generates the HTML ``area`` element. - let e = callsite() - result = xmlCheckedTag(e, "area", "shape coords href nohref" & - " accesskey tabindex" & commonAttr, "alt", true) +macro area*(e: varargs[untyped]): untyped = + ## Generates the HTML `area` element. + result = xmlCheckedTag(e, "area", "coords download href hreflang rel " & + "shape target type" & commonAttr, "alt", true) + +macro article*(e: varargs[untyped]): untyped = + ## Generates the HTML `article` element. + result = xmlCheckedTag(e, "article", commonAttr) + +macro aside*(e: varargs[untyped]): untyped = + ## Generates the HTML `aside` element. + result = xmlCheckedTag(e, "aside", commonAttr) -macro b*(e: expr): expr {.immediate.} = - ## generates the HTML ``b`` element. - let e = callsite() +macro audio*(e: varargs[untyped]): untyped = + ## Generates the HTML `audio` element. + result = xmlCheckedTag(e, "audio", "src crossorigin preload " & + "autoplay mediagroup loop muted controls" & commonAttr) + +macro b*(e: varargs[untyped]): untyped = + ## Generates the HTML `b` element. result = xmlCheckedTag(e, "b", commonAttr) -macro base*(e: expr): expr {.immediate.} = - ## generates the HTML ``base`` element. - let e = callsite() - result = xmlCheckedTag(e, "base", "", "href", true) +macro base*(e: varargs[untyped]): untyped = + ## Generates the HTML `base` element. + result = xmlCheckedTag(e, "base", "href target" & commonAttr, "", true) + +macro bdi*(e: varargs[untyped]): untyped = + ## Generates the HTML `bdi` element. + result = xmlCheckedTag(e, "bdi", commonAttr) -macro big*(e: expr): expr {.immediate.} = - ## generates the HTML ``big`` element. - let e = callsite() +macro bdo*(e: varargs[untyped]): untyped = + ## Generates the HTML `bdo` element. + result = xmlCheckedTag(e, "bdo", commonAttr) + +macro big*(e: varargs[untyped]): untyped = + ## Generates the HTML `big` element. result = xmlCheckedTag(e, "big", commonAttr) -macro blockquote*(e: expr): expr {.immediate.} = - ## generates the HTML ``blockquote`` element. - let e = callsite() +macro blockquote*(e: varargs[untyped]): untyped = + ## Generates the HTML `blockquote` element. result = xmlCheckedTag(e, "blockquote", " cite" & commonAttr) -macro body*(e: expr): expr {.immediate.} = - ## generates the HTML ``body`` element. - let e = callsite() - result = xmlCheckedTag(e, "body", commonAttr) - -macro br*(e: expr): expr {.immediate.} = - ## generates the HTML ``br`` element. - let e = callsite() - result = xmlCheckedTag(e, "br", "", "", true) - -macro button*(e: expr): expr {.immediate.} = - ## generates the HTML ``button`` element. - let e = callsite() - result = xmlCheckedTag(e, "button", "accesskey tabindex " & - "disabled name type value" & commonAttr) - -macro caption*(e: expr): expr {.immediate.} = - ## generates the HTML ``caption`` element. - let e = callsite() +macro body*(e: varargs[untyped]): untyped = + ## Generates the HTML `body` element. + result = xmlCheckedTag(e, "body", "onafterprint onbeforeprint " & + "onbeforeunload onhashchange onmessage onoffline ononline onpagehide " & + "onpageshow onpopstate onstorage onunload" & commonAttr) + +macro br*(e: varargs[untyped]): untyped = + ## Generates the HTML `br` element. + result = xmlCheckedTag(e, "br", commonAttr, "", true) + +macro button*(e: varargs[untyped]): untyped = + ## Generates the HTML `button` element. + result = xmlCheckedTag(e, "button", "autofocus disabled form formaction " & + "formenctype formmethod formnovalidate formtarget menu name type value" & + commonAttr) + +macro canvas*(e: varargs[untyped]): untyped = + ## Generates the HTML `canvas` element. + result = xmlCheckedTag(e, "canvas", "width height" & commonAttr) + +macro caption*(e: varargs[untyped]): untyped = + ## Generates the HTML `caption` element. result = xmlCheckedTag(e, "caption", commonAttr) -macro cite*(e: expr): expr {.immediate.} = - ## generates the HTML ``cite`` element. - let e = callsite() +macro center*(e: varargs[untyped]): untyped = + ## Generates the HTML `center` element. + result = xmlCheckedTag(e, "center", commonAttr) + +macro cite*(e: varargs[untyped]): untyped = + ## Generates the HTML `cite` element. result = xmlCheckedTag(e, "cite", commonAttr) -macro code*(e: expr): expr {.immediate.} = - ## generates the HTML ``code`` element. - let e = callsite() +macro code*(e: varargs[untyped]): untyped = + ## Generates the HTML `code` element. result = xmlCheckedTag(e, "code", commonAttr) -macro col*(e: expr): expr {.immediate.} = - ## generates the HTML ``col`` element. - let e = callsite() - result = xmlCheckedTag(e, "col", "span align valign" & commonAttr, "", true) +macro col*(e: varargs[untyped]): untyped = + ## Generates the HTML `col` element. + result = xmlCheckedTag(e, "col", "span" & commonAttr, "", true) + +macro colgroup*(e: varargs[untyped]): untyped = + ## Generates the HTML `colgroup` element. + result = xmlCheckedTag(e, "colgroup", "span" & commonAttr) + +macro data*(e: varargs[untyped]): untyped = + ## Generates the HTML `data` element. + result = xmlCheckedTag(e, "data", "value" & commonAttr) -macro colgroup*(e: expr): expr {.immediate.} = - ## generates the HTML ``colgroup`` element. - let e = callsite() - result = xmlCheckedTag(e, "colgroup", "span align valign" & commonAttr) +macro datalist*(e: varargs[untyped]): untyped = + ## Generates the HTML `datalist` element. + result = xmlCheckedTag(e, "datalist", commonAttr) -macro dd*(e: expr): expr {.immediate.} = - ## generates the HTML ``dd`` element. - let e = callsite() +macro dd*(e: varargs[untyped]): untyped = + ## Generates the HTML `dd` element. result = xmlCheckedTag(e, "dd", commonAttr) -macro del*(e: expr): expr {.immediate.} = - ## generates the HTML ``del`` element. - let e = callsite() +macro del*(e: varargs[untyped]): untyped = + ## Generates the HTML `del` element. result = xmlCheckedTag(e, "del", "cite datetime" & commonAttr) -macro dfn*(e: expr): expr {.immediate.} = - ## generates the HTML ``dfn`` element. - let e = callsite() +macro details*(e: varargs[untyped]): untyped = + ## Generates the HTML `details` element. + result = xmlCheckedTag(e, "details", commonAttr & "open") + +macro dfn*(e: varargs[untyped]): untyped = + ## Generates the HTML `dfn` element. result = xmlCheckedTag(e, "dfn", commonAttr) -macro `div`*(e: expr): expr {.immediate.} = - ## generates the HTML ``div`` element. - let e = callsite() +macro dialog*(e: varargs[untyped]): untyped = + ## Generates the HTML `dialog` element. + result = xmlCheckedTag(e, "dialog", commonAttr & "open") + +macro `div`*(e: varargs[untyped]): untyped = + ## Generates the HTML `div` element. result = xmlCheckedTag(e, "div", commonAttr) -macro dl*(e: expr): expr {.immediate.} = - ## generates the HTML ``dl`` element. - let e = callsite() +macro dl*(e: varargs[untyped]): untyped = + ## Generates the HTML `dl` element. result = xmlCheckedTag(e, "dl", commonAttr) -macro dt*(e: expr): expr {.immediate.} = - ## generates the HTML ``dt`` element. - let e = callsite() +macro dt*(e: varargs[untyped]): untyped = + ## Generates the HTML `dt` element. result = xmlCheckedTag(e, "dt", commonAttr) -macro em*(e: expr): expr {.immediate.} = - ## generates the HTML ``em`` element. - let e = callsite() +macro em*(e: varargs[untyped]): untyped = + ## Generates the HTML `em` element. result = xmlCheckedTag(e, "em", commonAttr) -macro fieldset*(e: expr): expr {.immediate.} = - ## generates the HTML ``fieldset`` element. - let e = callsite() - result = xmlCheckedTag(e, "fieldset", commonAttr) +macro embed*(e: varargs[untyped]): untyped = + ## Generates the HTML `embed` element. + result = xmlCheckedTag(e, "embed", "src type height width" & + commonAttr, "", true) + +macro fieldset*(e: varargs[untyped]): untyped = + ## Generates the HTML `fieldset` element. + result = xmlCheckedTag(e, "fieldset", "disabled form name" & commonAttr) -macro form*(e: expr): expr {.immediate.} = - ## generates the HTML ``form`` element. - let e = callsite() - result = xmlCheckedTag(e, "form", "method encype accept accept-charset" & - commonAttr, "action") +macro figure*(e: varargs[untyped]): untyped = + ## Generates the HTML `figure` element. + result = xmlCheckedTag(e, "figure", commonAttr) -macro h1*(e: expr): expr {.immediate.} = - ## generates the HTML ``h1`` element. - let e = callsite() +macro figcaption*(e: varargs[untyped]): untyped = + ## Generates the HTML `figcaption` element. + result = xmlCheckedTag(e, "figcaption", commonAttr) + +macro footer*(e: varargs[untyped]): untyped = + ## Generates the HTML `footer` element. + result = xmlCheckedTag(e, "footer", commonAttr) + +macro form*(e: varargs[untyped]): untyped = + ## Generates the HTML `form` element. + result = xmlCheckedTag(e, "form", "accept-charset action autocomplete " & + "enctype method name novalidate target" & commonAttr) + +macro h1*(e: varargs[untyped]): untyped = + ## Generates the HTML `h1` element. result = xmlCheckedTag(e, "h1", commonAttr) -macro h2*(e: expr): expr {.immediate.} = - ## generates the HTML ``h2`` element. - let e = callsite() +macro h2*(e: varargs[untyped]): untyped = + ## Generates the HTML `h2` element. result = xmlCheckedTag(e, "h2", commonAttr) -macro h3*(e: expr): expr {.immediate.} = - ## generates the HTML ``h3`` element. - let e = callsite() +macro h3*(e: varargs[untyped]): untyped = + ## Generates the HTML `h3` element. result = xmlCheckedTag(e, "h3", commonAttr) -macro h4*(e: expr): expr {.immediate.} = - ## generates the HTML ``h4`` element. - let e = callsite() +macro h4*(e: varargs[untyped]): untyped = + ## Generates the HTML `h4` element. result = xmlCheckedTag(e, "h4", commonAttr) -macro h5*(e: expr): expr {.immediate.} = - ## generates the HTML ``h5`` element. - let e = callsite() +macro h5*(e: varargs[untyped]): untyped = + ## Generates the HTML `h5` element. result = xmlCheckedTag(e, "h5", commonAttr) -macro h6*(e: expr): expr {.immediate.} = - ## generates the HTML ``h6`` element. - let e = callsite() +macro h6*(e: varargs[untyped]): untyped = + ## Generates the HTML `h6` element. result = xmlCheckedTag(e, "h6", commonAttr) -macro head*(e: expr): expr {.immediate.} = - ## generates the HTML ``head`` element. - let e = callsite() - result = xmlCheckedTag(e, "head", "profile") +macro head*(e: varargs[untyped]): untyped = + ## Generates the HTML `head` element. + result = xmlCheckedTag(e, "head", commonAttr) -macro html*(e: expr): expr {.immediate.} = - ## generates the HTML ``html`` element. - let e = callsite() - result = xmlCheckedTag(e, "html", "xmlns", "") +macro header*(e: varargs[untyped]): untyped = + ## Generates the HTML `header` element. + result = xmlCheckedTag(e, "header", commonAttr) -macro hr*(): expr {.immediate.} = - ## generates the HTML ``hr`` element. - let e = callsite() - result = xmlCheckedTag(e, "hr", commonAttr, "", true) +macro html*(e: varargs[untyped]): untyped = + ## Generates the HTML `html` element. + result = xmlCheckedTag(e, "html", "xmlns" & commonAttr, "") -macro i*(e: expr): expr {.immediate.} = - ## generates the HTML ``i`` element. - let e = callsite() - result = xmlCheckedTag(e, "i", commonAttr) +macro hr*(): untyped = + ## Generates the HTML `hr` element. + result = xmlCheckedTag(newNimNode(nnkArgList), "hr", commonAttr, "", true) -macro img*(e: expr): expr {.immediate.} = - ## generates the HTML ``img`` element. - let e = callsite() - result = xmlCheckedTag(e, "img", "longdesc height width", "src alt", true) +macro i*(e: varargs[untyped]): untyped = + ## Generates the HTML `i` element. + result = xmlCheckedTag(e, "i", commonAttr) -macro input*(e: expr): expr {.immediate.} = - ## generates the HTML ``input`` element. - let e = callsite() - result = xmlCheckedTag(e, "input", "name type value checked maxlength src" & - " alt accept disabled readonly accesskey tabindex" & commonAttr, "", true) +macro iframe*(e: varargs[untyped]): untyped = + ## Generates the HTML `iframe` element. + result = xmlCheckedTag(e, "iframe", "src srcdoc name sandbox width height loading" & + commonAttr) -macro ins*(e: expr): expr {.immediate.} = - ## generates the HTML ``ins`` element. - let e = callsite() +macro img*(e: varargs[untyped]): untyped = + ## Generates the HTML `img` element. + result = xmlCheckedTag(e, "img", "crossorigin usemap ismap height width loading" & + commonAttr, "src alt", true) + +macro input*(e: varargs[untyped]): untyped = + ## Generates the HTML `input` element. + result = xmlCheckedTag(e, "input", "accept alt autocomplete autofocus " & + "checked dirname disabled form formaction formenctype formmethod " & + "formnovalidate formtarget height inputmode list max maxlength min " & + "minlength multiple name pattern placeholder readonly required size " & + "src step type value width" & commonAttr, "", true) + +macro ins*(e: varargs[untyped]): untyped = + ## Generates the HTML `ins` element. result = xmlCheckedTag(e, "ins", "cite datetime" & commonAttr) -macro kbd*(e: expr): expr {.immediate.} = - ## generates the HTML ``kbd`` element. - let e = callsite() +macro kbd*(e: varargs[untyped]): untyped = + ## Generates the HTML `kbd` element. result = xmlCheckedTag(e, "kbd", commonAttr) -macro label*(e: expr): expr {.immediate.} = - ## generates the HTML ``label`` element. - let e = callsite() - result = xmlCheckedTag(e, "label", "for accesskey" & commonAttr) - -macro legend*(e: expr): expr {.immediate.} = - ## generates the HTML ``legend`` element. - let e = callsite() - result = xmlCheckedTag(e, "legend", "accesskey" & commonAttr) - -macro li*(e: expr): expr {.immediate.} = - ## generates the HTML ``li`` element. - let e = callsite() - result = xmlCheckedTag(e, "li", commonAttr) - -macro link*(e: expr): expr {.immediate.} = - ## generates the HTML ``link`` element. - let e = callsite() - result = xmlCheckedTag(e, "link", "href charset hreflang type rel rev media" & +macro keygen*(e: varargs[untyped]): untyped = + ## Generates the HTML `keygen` element. + result = xmlCheckedTag(e, "keygen", "autofocus challenge disabled " & + "form keytype name" & commonAttr) + +macro label*(e: varargs[untyped]): untyped = + ## Generates the HTML `label` element. + result = xmlCheckedTag(e, "label", "form for" & commonAttr) + +macro legend*(e: varargs[untyped]): untyped = + ## Generates the HTML `legend` element. + result = xmlCheckedTag(e, "legend", commonAttr) + +macro li*(e: varargs[untyped]): untyped = + ## Generates the HTML `li` element. + result = xmlCheckedTag(e, "li", "value" & commonAttr) + +macro link*(e: varargs[untyped]): untyped = + ## Generates the HTML `link` element. + result = xmlCheckedTag(e, "link", "href crossorigin rel media hreflang " & + "type sizes" & commonAttr, "", true) + +macro main*(e: varargs[untyped]): untyped = + ## Generates the HTML `main` element. + result = xmlCheckedTag(e, "main", commonAttr) + +macro map*(e: varargs[untyped]): untyped = + ## Generates the HTML `map` element. + result = xmlCheckedTag(e, "map", "name" & commonAttr) + +macro mark*(e: varargs[untyped]): untyped = + ## Generates the HTML `mark` element. + result = xmlCheckedTag(e, "mark", commonAttr) + +macro marquee*(e: varargs[untyped]): untyped = + ## Generates the HTML `marquee` element. + result = xmlCheckedTag(e, "marquee", coreAttr & + "behavior bgcolor direction height hspace loop scrollamount " & + "scrolldelay truespeed vspace width onbounce onfinish onstart") + +macro meta*(e: varargs[untyped]): untyped = + ## Generates the HTML `meta` element. + result = xmlCheckedTag(e, "meta", "name http-equiv content charset" & commonAttr, "", true) -macro map*(e: expr): expr {.immediate.} = - ## generates the HTML ``map`` element. - let e = callsite() - result = xmlCheckedTag(e, "map", "class title" & eventAttr, "id", false) +macro meter*(e: varargs[untyped]): untyped = + ## Generates the HTML `meter` element. + result = xmlCheckedTag(e, "meter", "value min max low high optimum" & + commonAttr) -macro meta*(e: expr): expr {.immediate.} = - ## generates the HTML ``meta`` element. - let e = callsite() - result = xmlCheckedTag(e, "meta", "name http-equiv scheme", "content", true) +macro nav*(e: varargs[untyped]): untyped = + ## Generates the HTML `nav` element. + result = xmlCheckedTag(e, "nav", commonAttr) -macro noscript*(e: expr): expr {.immediate.} = - ## generates the HTML ``noscript`` element. - let e = callsite() +macro noscript*(e: varargs[untyped]): untyped = + ## Generates the HTML `noscript` element. result = xmlCheckedTag(e, "noscript", commonAttr) -macro `object`*(e: expr): expr {.immediate.} = - ## generates the HTML ``object`` element. - let e = callsite() - result = xmlCheckedTag(e, "object", "classid data codebase declare type " & - "codetype archive standby width height name tabindex" & commonAttr) +macro `object`*(e: varargs[untyped]): untyped = + ## Generates the HTML `object` element. + result = xmlCheckedTag(e, "object", "data type typemustmatch name usemap " & + "form width height" & commonAttr) -macro ol*(e: expr): expr {.immediate.} = - ## generates the HTML ``ol`` element. - let e = callsite() - result = xmlCheckedTag(e, "ol", commonAttr) +macro ol*(e: varargs[untyped]): untyped = + ## Generates the HTML `ol` element. + result = xmlCheckedTag(e, "ol", "reversed start type" & commonAttr) -macro optgroup*(e: expr): expr {.immediate.} = - ## generates the HTML ``optgroup`` element. - let e = callsite() +macro optgroup*(e: varargs[untyped]): untyped = + ## Generates the HTML `optgroup` element. result = xmlCheckedTag(e, "optgroup", "disabled" & commonAttr, "label", false) -macro option*(e: expr): expr {.immediate.} = - ## generates the HTML ``option`` element. - let e = callsite() - result = xmlCheckedTag(e, "option", "selected value" & commonAttr) +macro option*(e: varargs[untyped]): untyped = + ## Generates the HTML `option` element. + result = xmlCheckedTag(e, "option", "disabled label selected value" & + commonAttr) + +macro output*(e: varargs[untyped]): untyped = + ## Generates the HTML `output` element. + result = xmlCheckedTag(e, "output", "for form name" & commonAttr) -macro p*(e: expr): expr {.immediate.} = - ## generates the HTML ``p`` element. - let e = callsite() +macro p*(e: varargs[untyped]): untyped = + ## Generates the HTML `p` element. result = xmlCheckedTag(e, "p", commonAttr) -macro param*(e: expr): expr {.immediate.} = - ## generates the HTML ``param`` element. - let e = callsite() - result = xmlCheckedTag(e, "param", "value id type valuetype", "name", true) +macro param*(e: varargs[untyped]): untyped = + ## Generates the HTML `param` element. + result = xmlCheckedTag(e, "param", commonAttr, "name value", true) -macro pre*(e: expr): expr {.immediate.} = - ## generates the HTML ``pre`` element. - let e = callsite() +macro picture*(e: varargs[untyped]): untyped = + ## Generates the HTML `picture` element. + result = xmlCheckedTag(e, "picture", commonAttr) + +macro pre*(e: varargs[untyped]): untyped = + ## Generates the HTML `pre` element. result = xmlCheckedTag(e, "pre", commonAttr) -macro q*(e: expr): expr {.immediate.} = - ## generates the HTML ``q`` element. - let e = callsite() +macro progress*(e: varargs[untyped]): untyped = + ## Generates the HTML `progress` element. + result = xmlCheckedTag(e, "progress", "value max" & commonAttr) + +macro q*(e: varargs[untyped]): untyped = + ## Generates the HTML `q` element. result = xmlCheckedTag(e, "q", "cite" & commonAttr) -macro samp*(e: expr): expr {.immediate.} = - ## generates the HTML ``samp`` element. - let e = callsite() +macro rb*(e: varargs[untyped]): untyped = + ## Generates the HTML `rb` element. + result = xmlCheckedTag(e, "rb", commonAttr) + +macro rp*(e: varargs[untyped]): untyped = + ## Generates the HTML `rp` element. + result = xmlCheckedTag(e, "rp", commonAttr) + +macro rt*(e: varargs[untyped]): untyped = + ## Generates the HTML `rt` element. + result = xmlCheckedTag(e, "rt", commonAttr) + +macro rtc*(e: varargs[untyped]): untyped = + ## Generates the HTML `rtc` element. + result = xmlCheckedTag(e, "rtc", commonAttr) + +macro ruby*(e: varargs[untyped]): untyped = + ## Generates the HTML `ruby` element. + result = xmlCheckedTag(e, "ruby", commonAttr) + +macro s*(e: varargs[untyped]): untyped = + ## Generates the HTML `s` element. + result = xmlCheckedTag(e, "s", commonAttr) + +macro samp*(e: varargs[untyped]): untyped = + ## Generates the HTML `samp` element. result = xmlCheckedTag(e, "samp", commonAttr) -macro script*(e: expr): expr {.immediate.} = - ## generates the HTML ``script`` element. - let e = callsite() - result = xmlCheckedTag(e, "script", "src charset defer", "type", false) +macro script*(e: varargs[untyped]): untyped = + ## Generates the HTML `script` element. + result = xmlCheckedTag(e, "script", "src type charset async defer " & + "crossorigin" & commonAttr) -macro select*(e: expr): expr {.immediate.} = - ## generates the HTML ``select`` element. - let e = callsite() - result = xmlCheckedTag(e, "select", "name size multiple disabled tabindex" & - commonAttr) +macro section*(e: varargs[untyped]): untyped = + ## Generates the HTML `section` element. + result = xmlCheckedTag(e, "section", commonAttr) + +macro select*(e: varargs[untyped]): untyped = + ## Generates the HTML `select` element. + result = xmlCheckedTag(e, "select", "autofocus disabled form multiple " & + "name required size" & commonAttr) -macro small*(e: expr): expr {.immediate.} = - ## generates the HTML ``small`` element. - let e = callsite() +macro slot*(e: varargs[untyped]): untyped = + ## Generates the HTML `slot` element. + result = xmlCheckedTag(e, "slot", commonAttr) + +macro small*(e: varargs[untyped]): untyped = + ## Generates the HTML `small` element. result = xmlCheckedTag(e, "small", commonAttr) -macro span*(e: expr): expr {.immediate.} = - ## generates the HTML ``span`` element. - let e = callsite() +macro source*(e: varargs[untyped]): untyped = + ## Generates the HTML `source` element. + result = xmlCheckedTag(e, "source", "type" & commonAttr, "src", true) + +macro span*(e: varargs[untyped]): untyped = + ## Generates the HTML `span` element. result = xmlCheckedTag(e, "span", commonAttr) -macro strong*(e: expr): expr {.immediate.} = - ## generates the HTML ``strong`` element. - let e = callsite() +macro strong*(e: varargs[untyped]): untyped = + ## Generates the HTML `strong` element. result = xmlCheckedTag(e, "strong", commonAttr) -macro style*(e: expr): expr {.immediate.} = - ## generates the HTML ``style`` element. - let e = callsite() - result = xmlCheckedTag(e, "style", "media title", "type") +macro style*(e: varargs[untyped]): untyped = + ## Generates the HTML `style` element. + result = xmlCheckedTag(e, "style", "media type" & commonAttr) -macro sub*(e: expr): expr {.immediate.} = - ## generates the HTML ``sub`` element. - let e = callsite() +macro sub*(e: varargs[untyped]): untyped = + ## Generates the HTML `sub` element. result = xmlCheckedTag(e, "sub", commonAttr) -macro sup*(e: expr): expr {.immediate.} = - ## generates the HTML ``sup`` element. - let e = callsite() +macro summary*(e: varargs[untyped]): untyped = + ## Generates the HTML `summary` element. + result = xmlCheckedTag(e, "summary", commonAttr) + +macro sup*(e: varargs[untyped]): untyped = + ## Generates the HTML `sup` element. result = xmlCheckedTag(e, "sup", commonAttr) -macro table*(e: expr): expr {.immediate.} = - ## generates the HTML ``table`` element. - let e = callsite() - result = xmlCheckedTag(e, "table", "summary border cellpadding cellspacing" & - " frame rules width" & commonAttr) - -macro tbody*(e: expr): expr {.immediate.} = - ## generates the HTML ``tbody`` element. - let e = callsite() - result = xmlCheckedTag(e, "tbody", "align valign" & commonAttr) - -macro td*(e: expr): expr {.immediate.} = - ## generates the HTML ``td`` element. - let e = callsite() - result = xmlCheckedTag(e, "td", "colspan rowspan abbr axis headers scope" & - " align valign" & commonAttr) - -macro textarea*(e: expr): expr {.immediate.} = - ## generates the HTML ``textarea`` element. - let e = callsite() - result = xmlCheckedTag(e, "textarea", " name disabled readonly accesskey" & - " tabindex" & commonAttr, "rows cols", false) - -macro tfoot*(e: expr): expr {.immediate.} = - ## generates the HTML ``tfoot`` element. - let e = callsite() - result = xmlCheckedTag(e, "tfoot", "align valign" & commonAttr) - -macro th*(e: expr): expr {.immediate.} = - ## generates the HTML ``th`` element. - let e = callsite() - result = xmlCheckedTag(e, "th", "colspan rowspan abbr axis headers scope" & - " align valign" & commonAttr) - -macro thead*(e: expr): expr {.immediate.} = - ## generates the HTML ``thead`` element. - let e = callsite() - result = xmlCheckedTag(e, "thead", "align valign" & commonAttr) - -macro title*(e: expr): expr {.immediate.} = - ## generates the HTML ``title`` element. - let e = callsite() - result = xmlCheckedTag(e, "title") - -macro tr*(e: expr): expr {.immediate.} = - ## generates the HTML ``tr`` element. - let e = callsite() - result = xmlCheckedTag(e, "tr", "align valign" & commonAttr) - -macro tt*(e: expr): expr {.immediate.} = - ## generates the HTML ``tt`` element. - let e = callsite() +macro table*(e: varargs[untyped]): untyped = + ## Generates the HTML `table` element. + result = xmlCheckedTag(e, "table", "border sortable" & commonAttr) + +macro tbody*(e: varargs[untyped]): untyped = + ## Generates the HTML `tbody` element. + result = xmlCheckedTag(e, "tbody", commonAttr) + +macro td*(e: varargs[untyped]): untyped = + ## Generates the HTML `td` element. + result = xmlCheckedTag(e, "td", "colspan rowspan headers" & commonAttr) + +macro `template`*(e: varargs[untyped]): untyped = + ## Generates the HTML `template` element. + result = xmlCheckedTag(e, "template", commonAttr) + +macro textarea*(e: varargs[untyped]): untyped = + ## Generates the HTML `textarea` element. + result = xmlCheckedTag(e, "textarea", "autocomplete autofocus cols " & + "dirname disabled form inputmode maxlength minlength name placeholder " & + "readonly required rows wrap" & commonAttr) + +macro tfoot*(e: varargs[untyped]): untyped = + ## Generates the HTML `tfoot` element. + result = xmlCheckedTag(e, "tfoot", commonAttr) + +macro th*(e: varargs[untyped]): untyped = + ## Generates the HTML `th` element. + result = xmlCheckedTag(e, "th", "colspan rowspan headers abbr scope axis" & + " sorted" & commonAttr) + +macro thead*(e: varargs[untyped]): untyped = + ## Generates the HTML `thead` element. + result = xmlCheckedTag(e, "thead", commonAttr) + +macro time*(e: varargs[untyped]): untyped = + ## Generates the HTML `time` element. + result = xmlCheckedTag(e, "time", "datetime" & commonAttr) + +macro title*(e: varargs[untyped]): untyped = + ## Generates the HTML `title` element. + result = xmlCheckedTag(e, "title", commonAttr) + +macro tr*(e: varargs[untyped]): untyped = + ## Generates the HTML `tr` element. + result = xmlCheckedTag(e, "tr", commonAttr) + +macro track*(e: varargs[untyped]): untyped = + ## Generates the HTML `track` element. + result = xmlCheckedTag(e, "track", "kind srclang label default" & + commonAttr, "src", true) + +macro tt*(e: varargs[untyped]): untyped = + ## Generates the HTML `tt` element. result = xmlCheckedTag(e, "tt", commonAttr) -macro ul*(e: expr): expr {.immediate.} = - ## generates the HTML ``ul`` element. - let e = callsite() +macro u*(e: varargs[untyped]): untyped = + ## Generates the HTML `u` element. + result = xmlCheckedTag(e, "u", commonAttr) + +macro ul*(e: varargs[untyped]): untyped = + ## Generates the HTML `ul` element. result = xmlCheckedTag(e, "ul", commonAttr) -macro `var`*(e: expr): expr {.immediate.} = - ## generates the HTML ``var`` element. - let e = callsite() +macro `var`*(e: varargs[untyped]): untyped = + ## Generates the HTML `var` element. result = xmlCheckedTag(e, "var", commonAttr) -when isMainModule: - var nim = "Nim" - echo h1(a(href="http://nim-lang.org", nim)) - echo form(action="test", `accept-charset` = "Content-Type") - +macro video*(e: varargs[untyped]): untyped = + ## Generates the HTML `video` element. + result = xmlCheckedTag(e, "video", "src crossorigin poster preload " & + "autoplay mediagroup loop muted controls width height" & commonAttr) + +macro wbr*(e: varargs[untyped]): untyped = + ## Generates the HTML `wbr` element. + result = xmlCheckedTag(e, "wbr", commonAttr, "", true) + +macro portal*(e: varargs[untyped]): untyped = + ## Generates the HTML `portal` element. + result = xmlCheckedTag(e, "portal", "width height type src disabled" & commonAttr, "", false) + + +macro math*(e: varargs[untyped]): untyped = + ## Generates the HTML `math` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/math#Examples + result = xmlCheckedTag(e, "math", "mathbackground mathcolor href overflow" & commonAttr) + +macro maction*(e: varargs[untyped]): untyped = + ## Generates the HTML `maction` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/maction + result = xmlCheckedTag(e, "maction", "mathbackground mathcolor href" & commonAttr) + +macro menclose*(e: varargs[untyped]): untyped = + ## Generates the HTML `menclose` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/menclose + result = xmlCheckedTag(e, "menclose", "mathbackground mathcolor href notation" & commonAttr) + +macro merror*(e: varargs[untyped]): untyped = + ## Generates the HTML `merror` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/merror + result = xmlCheckedTag(e, "merror", "mathbackground mathcolor href" & commonAttr) + +macro mfenced*(e: varargs[untyped]): untyped = + ## Generates the HTML `mfenced` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/mfenced + result = xmlCheckedTag(e, "mfenced", "mathbackground mathcolor href open separators" & commonAttr) + +macro mfrac*(e: varargs[untyped]): untyped = + ## Generates the HTML `mfrac` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/mfrac + result = xmlCheckedTag(e, "mfrac", "mathbackground mathcolor href linethickness numalign" & commonAttr) + +macro mglyph*(e: varargs[untyped]): untyped = + ## Generates the HTML `mglyph` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/mglyph + result = xmlCheckedTag(e, "mglyph", "mathbackground mathcolor href src valign" & commonAttr) + +macro mi*(e: varargs[untyped]): untyped = + ## Generates the HTML `mi` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/mi + result = xmlCheckedTag(e, "mi", "mathbackground mathcolor href mathsize mathvariant" & commonAttr) + +macro mlabeledtr*(e: varargs[untyped]): untyped = + ## Generates the HTML `mlabeledtr` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/mlabeledtr + result = xmlCheckedTag(e, "mlabeledtr", "mathbackground mathcolor href columnalign groupalign rowalign" & commonAttr) + +macro mmultiscripts*(e: varargs[untyped]): untyped = + ## Generates the HTML `mmultiscripts` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/mmultiscripts + result = xmlCheckedTag(e, "mmultiscripts", "mathbackground mathcolor href subscriptshift superscriptshift" & commonAttr) + +macro mn*(e: varargs[untyped]): untyped = + ## Generates the HTML `mn` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/mn + result = xmlCheckedTag(e, "mn", "mathbackground mathcolor href mathsize mathvariant" & commonAttr) + +macro mo*(e: varargs[untyped]): untyped = + ## Generates the HTML `mo` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/mo + result = xmlCheckedTag(e, "mo", + "mathbackground mathcolor fence form largeop lspace mathsize mathvariant movablelimits rspace separator stretchy symmetric" & commonAttr) + +macro mover*(e: varargs[untyped]): untyped = + ## Generates the HTML `mover` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/mover + result = xmlCheckedTag(e, "mover", "mathbackground mathcolor accent href" & commonAttr) + +macro mpadded*(e: varargs[untyped]): untyped = + ## Generates the HTML `mpadded` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/mpadded + result = xmlCheckedTag(e, "mpadded", "mathbackground mathcolor depth href lspace voffset" & commonAttr) + +macro mphantom*(e: varargs[untyped]): untyped = + ## Generates the HTML `mphantom` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/mphantom + result = xmlCheckedTag(e, "mphantom", "mathbackground" & commonAttr) + +macro mroot*(e: varargs[untyped]): untyped = + ## Generates the HTML `mroot` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/mroot + result = xmlCheckedTag(e, "mroot", "mathbackground mathcolor href" & commonAttr) + +macro mrow*(e: varargs[untyped]): untyped = + ## Generates the HTML `mrow` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/mrow + result = xmlCheckedTag(e, "mrow", "mathbackground mathcolor href" & commonAttr) + +macro ms*(e: varargs[untyped]): untyped = + ## Generates the HTML `ms` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/ms + result = xmlCheckedTag(e, "ms", "mathbackground mathcolor href lquote mathsize mathvariant rquote" & commonAttr) + +macro mspace*(e: varargs[untyped]): untyped = + ## Generates the HTML `mspace` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/mspace + result = xmlCheckedTag(e, "mspace", "mathbackground mathcolor href linebreak" & commonAttr) + +macro msqrt*(e: varargs[untyped]): untyped = + ## Generates the HTML `msqrt` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/msqrt + result = xmlCheckedTag(e, "msqrt", "mathbackground mathcolor href" & commonAttr) + +macro mstyle*(e: varargs[untyped]): untyped = + ## Generates the HTML `mstyle` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/mstyle + result = xmlCheckedTag(e, "mstyle", ("mathbackground mathcolor href decimalpoint displaystyle " & + "infixlinebreakstyle scriptlevel scriptminsize scriptsizemultiplier" & commonAttr)) + +macro msub*(e: varargs[untyped]): untyped = + ## Generates the HTML `msub` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/msub + result = xmlCheckedTag(e, "msub", "mathbackground mathcolor href subscriptshift" & commonAttr) + +macro msubsup*(e: varargs[untyped]): untyped = + ## Generates the HTML `msubsup` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/msubsup + result = xmlCheckedTag(e, "msubsup", "mathbackground mathcolor href subscriptshift superscriptshift" & commonAttr) + +macro msup*(e: varargs[untyped]): untyped = + ## Generates the HTML `msup` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/msup + result = xmlCheckedTag(e, "msup", "mathbackground mathcolor href superscriptshift" & commonAttr) + +macro mtable*(e: varargs[untyped]): untyped = + ## Generates the HTML `mtable` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/mtable + result = xmlCheckedTag(e, "mtable", ("mathbackground mathcolor href align " & + "alignmentscope columnalign columnlines columnspacing columnwidth " & + "displaystyle equalcolumns equalrows frame framespacing groupalign " & + "rowalign rowlines rowspacing side width" & commonAttr)) + +macro mtd*(e: varargs[untyped]): untyped = + ## Generates the HTML `mtd` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/mtd + result = xmlCheckedTag(e, "mtd", + "mathbackground mathcolor href columnalign columnspan groupalign rowalign rowspan" & commonAttr) + +macro mtext*(e: varargs[untyped]): untyped = + ## Generates the HTML `mtext` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/mtext + result = xmlCheckedTag(e, "mtext", "mathbackground mathcolor href mathsize mathvariant" & commonAttr) + +macro munder*(e: varargs[untyped]): untyped = + ## Generates the HTML `munder` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/munder + result = xmlCheckedTag(e, "munder", "mathbackground mathcolor href accentunder align" & commonAttr) + +macro munderover*(e: varargs[untyped]): untyped = + ## Generates the HTML `munderover` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/munderover + result = xmlCheckedTag(e, "munderover", "mathbackground mathcolor href accentunder accent align" & commonAttr) + +macro semantics*(e: varargs[untyped]): untyped = + ## Generates the HTML `semantics` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/semantics + result = xmlCheckedTag(e, "semantics", "mathbackground mathcolor href definitionURL encoding cd src" & commonAttr) + +macro annotation*(e: varargs[untyped]): untyped = + ## Generates the HTML `annotation` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/semantics + result = xmlCheckedTag(e, "annotation", "mathbackground mathcolor href definitionURL encoding cd src" & commonAttr) + +macro `annotation-xml`*(e: varargs[untyped]): untyped = + ## Generates the HTML `annotation-xml` element. MathML https://wikipedia.org/wiki/MathML + ## https://developer.mozilla.org/en-US/docs/Web/MathML/Element/semantics + result = xmlCheckedTag(e, "annotation", "mathbackground mathcolor href definitionURL encoding cd src" & commonAttr) + + +runnableExamples: + let nim = "Nim" + assert h1(a(href = "https://nim-lang.org", nim)) == + """<h1><a href="https://nim-lang.org">Nim</a></h1>""" + assert form(action = "test", `accept-charset` = "Content-Type") == + """<form action="test" accept-charset="Content-Type"></form>""" + + + assert math( + semantics( + mrow( + msup( + mi("x"), + mn("42") + ) + ) + ) + ) == "<math><semantics><mrow><msup><mi>x</mi><mn>42</mn></msup></mrow></semantics></math>" + + assert math( + semantics( + annotation(encoding = "application/x-tex", title = "Latex on Web", r"x^{2} + y") + ) + ) == """<math><semantics><annotation encoding="application/x-tex" title="Latex on Web">x^{2} + y</annotation></semantics></math>""" diff --git a/lib/pure/htmlparser.nim b/lib/pure/htmlparser.nim index 5e4eba4e5..62919546f 100644 --- a/lib/pure/htmlparser.nim +++ b/lib/pure/htmlparser.nim @@ -7,19 +7,18 @@ # distribution, for details about the copyright. # -## This module parses an HTML document and creates its XML tree representation. -## It is supposed to handle the *wild* HTML the real world uses. -## +## **NOTE**: The behaviour might change in future versions as it is not +## clear what "*wild* HTML the real world uses" really implies. +## ## It can be used to parse a wild HTML document and output it as valid XHTML ## document (well, if you are lucky): -## -## .. code-block:: Nim -## +## ```Nim ## echo loadHtml("mydirty.html") +## ``` ## ## Every tag in the resulting tree is in lower case. ## -## **Note:** The resulting ``PXmlNode`` already uses the ``clientData`` field, +## **Note:** The resulting `XmlNode` already uses the `clientData` field, ## so it cannot be used by clients of this library. ## ## Example: Transforming hyperlinks @@ -27,178 +26,182 @@ ## ## This code demonstrates how you can iterate over all the tags in an HTML file ## and write back the modified version. In this case we look for hyperlinks -## ending with the extension ``.rst`` and convert them to ``.html``. +## ending with the extension `.rst` and convert them to `.html`. ## -## .. code-block:: Nim -## -## import htmlparser -## import xmltree # To use '$' for PXmlNode -## import strtabs # To access PXmlAttributes -## import os # To use splitFile -## import strutils # To use cmpIgnoreCase +## ```Nim test +## import std/htmlparser +## import std/xmltree # To use '$' for XmlNode +## import std/strtabs # To access XmlAttributes +## import std/os # To use splitFile +## import std/strutils # To use cmpIgnoreCase ## ## proc transformHyperlinks() = -## let html = loadHTML("input.html") +## let html = loadHtml("input.html") ## ## for a in html.findAll("a"): -## let href = a.attrs["href"] -## if not href.isNil: -## let (dir, filename, ext) = splitFile(href) +## if a.attrs.hasKey "href": +## let (dir, filename, ext) = splitFile(a.attrs["href"]) ## if cmpIgnoreCase(ext, ".rst") == 0: ## a.attrs["href"] = dir / filename & ".html" ## ## writeFile("output.html", $html) +## ``` + +{.deprecated: "use `nimble install htmlparser` and import `pkg/htmlparser` instead".} -import strutils, streams, parsexml, xmltree, unicode, strtabs +import std/[strutils, streams, parsexml, xmltree, unicode, strtabs] + +when defined(nimPreviewSlimSystem): + import std/syncio type - THtmlTag* = enum ## list of all supported HTML tags; order will always be + HtmlTag* = enum ## list of all supported HTML tags; order will always be ## alphabetically tagUnknown, ## unknown HTML element - tagA, ## the HTML ``a`` element - tagAbbr, ## the deprecated HTML ``abbr`` element - tagAcronym, ## the HTML ``acronym`` element - tagAddress, ## the HTML ``address`` element - tagApplet, ## the deprecated HTML ``applet`` element - tagArea, ## the HTML ``area`` element - tagArticle, ## the HTML ``article`` element - tagAside, ## the HTML ``aside`` element - tagAudio, ## the HTML ``audio`` element - tagB, ## the HTML ``b`` element - tagBase, ## the HTML ``base`` element - tagBdi, ## the HTML ``bdi`` element - tagBdo, ## the deprecated HTML ``dbo`` element - tagBasefont, ## the deprecated HTML ``basefont`` element - tagBig, ## the HTML ``big`` element - tagBlockquote, ## the HTML ``blockquote`` element - tagBody, ## the HTML ``body`` element - tagBr, ## the HTML ``br`` element - tagButton, ## the HTML ``button`` element - tagCanvas, ## the HTML ``canvas`` element - tagCaption, ## the HTML ``caption`` element - tagCenter, ## the deprecated HTML ``center`` element - tagCite, ## the HTML ``cite`` element - tagCode, ## the HTML ``code`` element - tagCol, ## the HTML ``col`` element - tagColgroup, ## the HTML ``colgroup`` element - tagCommand, ## the HTML ``command`` element - tagDatalist, ## the HTML ``datalist`` element - tagDd, ## the HTML ``dd`` element - tagDel, ## the HTML ``del`` element - tagDetails, ## the HTML ``details`` element - tagDfn, ## the HTML ``dfn`` element - tagDialog, ## the HTML ``dialog`` element - tagDiv, ## the HTML ``div`` element - tagDir, ## the deprecated HTLM ``dir`` element - tagDl, ## the HTML ``dl`` element - tagDt, ## the HTML ``dt`` element - tagEm, ## the HTML ``em`` element - tagEmbed, ## the HTML ``embed`` element - tagFieldset, ## the HTML ``fieldset`` element - tagFigcaption, ## the HTML ``figcaption`` element - tagFigure, ## the HTML ``figure`` element - tagFont, ## the deprecated HTML ``font`` element - tagFooter, ## the HTML ``footer`` element - tagForm, ## the HTML ``form`` element - tagFrame, ## the HTML ``frame`` element - tagFrameset, ## the deprecated HTML ``frameset`` element - tagH1, ## the HTML ``h1`` element - tagH2, ## the HTML ``h2`` element - tagH3, ## the HTML ``h3`` element - tagH4, ## the HTML ``h4`` element - tagH5, ## the HTML ``h5`` element - tagH6, ## the HTML ``h6`` element - tagHead, ## the HTML ``head`` element - tagHeader, ## the HTML ``header`` element - tagHgroup, ## the HTML ``hgroup`` element - tagHtml, ## the HTML ``html`` element - tagHr, ## the HTML ``hr`` element - tagI, ## the HTML ``i`` element - tagIframe, ## the deprecated HTML ``iframe`` element - tagImg, ## the HTML ``img`` element - tagInput, ## the HTML ``input`` element - tagIns, ## the HTML ``ins`` element - tagIsindex, ## the deprecated HTML ``isindex`` element - tagKbd, ## the HTML ``kbd`` element - tagKeygen, ## the HTML ``keygen`` element - tagLabel, ## the HTML ``label`` element - tagLegend, ## the HTML ``legend`` element - tagLi, ## the HTML ``li`` element - tagLink, ## the HTML ``link`` element - tagMap, ## the HTML ``map`` element - tagMark, ## the HTML ``mark`` element - tagMenu, ## the deprecated HTML ``menu`` element - tagMeta, ## the HTML ``meta`` element - tagMeter, ## the HTML ``meter`` element - tagNav, ## the HTML ``nav`` element - tagNobr, ## the deprecated HTML ``nobr`` element - tagNoframes, ## the deprecated HTML ``noframes`` element - tagNoscript, ## the HTML ``noscript`` element - tagObject, ## the HTML ``object`` element - tagOl, ## the HTML ``ol`` element - tagOptgroup, ## the HTML ``optgroup`` element - tagOption, ## the HTML ``option`` element - tagOutput, ## the HTML ``output`` element - tagP, ## the HTML ``p`` element - tagParam, ## the HTML ``param`` element - tagPre, ## the HTML ``pre`` element - tagProgress, ## the HTML ``progress`` element - tagQ, ## the HTML ``q`` element - tagRp, ## the HTML ``rp`` element - tagRt, ## the HTML ``rt`` element - tagRuby, ## the HTML ``ruby`` element - tagS, ## the deprecated HTML ``s`` element - tagSamp, ## the HTML ``samp`` element - tagScript, ## the HTML ``script`` element - tagSection, ## the HTML ``section`` element - tagSelect, ## the HTML ``select`` element - tagSmall, ## the HTML ``small`` element - tagSource, ## the HTML ``source`` element - tagSpan, ## the HTML ``span`` element - tagStrike, ## the deprecated HTML ``strike`` element - tagStrong, ## the HTML ``strong`` element - tagStyle, ## the HTML ``style`` element - tagSub, ## the HTML ``sub`` element - tagSummary, ## the HTML ``summary`` element - tagSup, ## the HTML ``sup`` element - tagTable, ## the HTML ``table`` element - tagTbody, ## the HTML ``tbody`` element - tagTd, ## the HTML ``td`` element - tagTextarea, ## the HTML ``textarea`` element - tagTfoot, ## the HTML ``tfoot`` element - tagTh, ## the HTML ``th`` element - tagThead, ## the HTML ``thead`` element - tagTime, ## the HTML ``time`` element - tagTitle, ## the HTML ``title`` element - tagTr, ## the HTML ``tr`` element - tagTrack, ## the HTML ``track`` element - tagTt, ## the HTML ``tt`` element - tagU, ## the deprecated HTML ``u`` element - tagUl, ## the HTML ``ul`` element - tagVar, ## the HTML ``var`` element - tagVideo, ## the HTML ``video`` element - tagWbr ## the HTML ``wbr`` element + tagA, ## the HTML `a` element + tagAbbr, ## the deprecated HTML `abbr` element + tagAcronym, ## the HTML `acronym` element + tagAddress, ## the HTML `address` element + tagApplet, ## the deprecated HTML `applet` element + tagArea, ## the HTML `area` element + tagArticle, ## the HTML `article` element + tagAside, ## the HTML `aside` element + tagAudio, ## the HTML `audio` element + tagB, ## the HTML `b` element + tagBase, ## the HTML `base` element + tagBdi, ## the HTML `bdi` element + tagBdo, ## the deprecated HTML `dbo` element + tagBasefont, ## the deprecated HTML `basefont` element + tagBig, ## the HTML `big` element + tagBlockquote, ## the HTML `blockquote` element + tagBody, ## the HTML `body` element + tagBr, ## the HTML `br` element + tagButton, ## the HTML `button` element + tagCanvas, ## the HTML `canvas` element + tagCaption, ## the HTML `caption` element + tagCenter, ## the deprecated HTML `center` element + tagCite, ## the HTML `cite` element + tagCode, ## the HTML `code` element + tagCol, ## the HTML `col` element + tagColgroup, ## the HTML `colgroup` element + tagCommand, ## the HTML `command` element + tagDatalist, ## the HTML `datalist` element + tagDd, ## the HTML `dd` element + tagDel, ## the HTML `del` element + tagDetails, ## the HTML `details` element + tagDfn, ## the HTML `dfn` element + tagDialog, ## the HTML `dialog` element + tagDiv, ## the HTML `div` element + tagDir, ## the deprecated HTLM `dir` element + tagDl, ## the HTML `dl` element + tagDt, ## the HTML `dt` element + tagEm, ## the HTML `em` element + tagEmbed, ## the HTML `embed` element + tagFieldset, ## the HTML `fieldset` element + tagFigcaption, ## the HTML `figcaption` element + tagFigure, ## the HTML `figure` element + tagFont, ## the deprecated HTML `font` element + tagFooter, ## the HTML `footer` element + tagForm, ## the HTML `form` element + tagFrame, ## the HTML `frame` element + tagFrameset, ## the deprecated HTML `frameset` element + tagH1, ## the HTML `h1` element + tagH2, ## the HTML `h2` element + tagH3, ## the HTML `h3` element + tagH4, ## the HTML `h4` element + tagH5, ## the HTML `h5` element + tagH6, ## the HTML `h6` element + tagHead, ## the HTML `head` element + tagHeader, ## the HTML `header` element + tagHgroup, ## the HTML `hgroup` element + tagHtml, ## the HTML `html` element + tagHr, ## the HTML `hr` element + tagI, ## the HTML `i` element + tagIframe, ## the deprecated HTML `iframe` element + tagImg, ## the HTML `img` element + tagInput, ## the HTML `input` element + tagIns, ## the HTML `ins` element + tagIsindex, ## the deprecated HTML `isindex` element + tagKbd, ## the HTML `kbd` element + tagKeygen, ## the HTML `keygen` element + tagLabel, ## the HTML `label` element + tagLegend, ## the HTML `legend` element + tagLi, ## the HTML `li` element + tagLink, ## the HTML `link` element + tagMap, ## the HTML `map` element + tagMark, ## the HTML `mark` element + tagMenu, ## the deprecated HTML `menu` element + tagMeta, ## the HTML `meta` element + tagMeter, ## the HTML `meter` element + tagNav, ## the HTML `nav` element + tagNobr, ## the deprecated HTML `nobr` element + tagNoframes, ## the deprecated HTML `noframes` element + tagNoscript, ## the HTML `noscript` element + tagObject, ## the HTML `object` element + tagOl, ## the HTML `ol` element + tagOptgroup, ## the HTML `optgroup` element + tagOption, ## the HTML `option` element + tagOutput, ## the HTML `output` element + tagP, ## the HTML `p` element + tagParam, ## the HTML `param` element + tagPre, ## the HTML `pre` element + tagProgress, ## the HTML `progress` element + tagQ, ## the HTML `q` element + tagRp, ## the HTML `rp` element + tagRt, ## the HTML `rt` element + tagRuby, ## the HTML `ruby` element + tagS, ## the deprecated HTML `s` element + tagSamp, ## the HTML `samp` element + tagScript, ## the HTML `script` element + tagSection, ## the HTML `section` element + tagSelect, ## the HTML `select` element + tagSmall, ## the HTML `small` element + tagSource, ## the HTML `source` element + tagSpan, ## the HTML `span` element + tagStrike, ## the deprecated HTML `strike` element + tagStrong, ## the HTML `strong` element + tagStyle, ## the HTML `style` element + tagSub, ## the HTML `sub` element + tagSummary, ## the HTML `summary` element + tagSup, ## the HTML `sup` element + tagTable, ## the HTML `table` element + tagTbody, ## the HTML `tbody` element + tagTd, ## the HTML `td` element + tagTextarea, ## the HTML `textarea` element + tagTfoot, ## the HTML `tfoot` element + tagTh, ## the HTML `th` element + tagThead, ## the HTML `thead` element + tagTime, ## the HTML `time` element + tagTitle, ## the HTML `title` element + tagTr, ## the HTML `tr` element + tagTrack, ## the HTML `track` element + tagTt, ## the HTML `tt` element + tagU, ## the deprecated HTML `u` element + tagUl, ## the HTML `ul` element + tagVar, ## the HTML `var` element + tagVideo, ## the HTML `video` element + tagWbr ## the HTML `wbr` element const tagToStr* = [ - "a", "abbr", "acronym", "address", "applet", "area", "article", + "a", "abbr", "acronym", "address", "applet", "area", "article", "aside", "audio", - "b", "base", "basefont", "bdi", "bdo", "big", "blockquote", "body", - "br", "button", "canvas", "caption", "center", "cite", "code", + "b", "base", "basefont", "bdi", "bdo", "big", "blockquote", "body", + "br", "button", "canvas", "caption", "center", "cite", "code", "col", "colgroup", "command", - "datalist", "dd", "del", "details", "dfn", "dialog", "div", - "dir", "dl", "dt", "em", "embed", "fieldset", + "datalist", "dd", "del", "details", "dfn", "dialog", "div", + "dir", "dl", "dt", "em", "embed", "fieldset", "figcaption", "figure", "font", "footer", - "form", "frame", "frameset", "h1", "h2", "h3", - "h4", "h5", "h6", "head", "header", "hgroup", "html", "hr", - "i", "iframe", "img", "input", "ins", "isindex", + "form", "frame", "frameset", "h1", "h2", "h3", + "h4", "h5", "h6", "head", "header", "hgroup", "html", "hr", + "i", "iframe", "img", "input", "ins", "isindex", "kbd", "keygen", "label", "legend", "li", "link", "map", "mark", - "menu", "meta", "meter", "nav", "nobr", "noframes", "noscript", - "object", "ol", - "optgroup", "option", "output", "p", "param", "pre", "progress", "q", - "rp", "rt", "ruby", "s", "samp", "script", "section", "select", "small", - "source", "span", "strike", "strong", "style", - "sub", "summary", "sup", "table", + "menu", "meta", "meter", "nav", "nobr", "noframes", "noscript", + "object", "ol", + "optgroup", "option", "output", "p", "param", "pre", "progress", "q", + "rp", "rt", "ruby", "s", "samp", "script", "section", "select", "small", + "source", "span", "strike", "strong", "style", + "sub", "summary", "sup", "table", "tbody", "td", "textarea", "tfoot", "th", "thead", "time", "title", "tr", "track", "tt", "u", "ul", "var", "video", "wbr"] InlineTags* = {tagA, tagAbbr, tagAcronym, tagApplet, tagB, tagBasefont, @@ -206,96 +209,23 @@ const tagEm, tagFont, tagI, tagImg, tagIns, tagInput, tagIframe, tagKbd, tagLabel, tagMap, tagObject, tagQ, tagSamp, tagScript, tagSelect, tagSmall, tagSpan, tagStrong, tagSub, tagSup, tagTextarea, tagTt, - tagVar, tagApplet, tagBasefont, tagFont, tagIframe, tagU, tagS, + tagVar, tagApplet, tagBasefont, tagFont, tagIframe, tagU, tagS, tagStrike, tagWbr} - BlockTags* = {tagAddress, tagBlockquote, tagCenter, tagDel, tagDir, tagDiv, - tagDl, tagFieldset, tagForm, tagH1, tagH2, tagH3, tagH4, - tagH5, tagH6, tagHr, tagIns, tagIsindex, tagMenu, tagNoframes, tagNoscript, - tagOl, tagP, tagPre, tagTable, tagUl, tagCenter, tagDir, tagIsindex, + BlockTags* = {tagAddress, tagBlockquote, tagCenter, tagDel, tagDir, tagDiv, + tagDl, tagFieldset, tagForm, tagH1, tagH2, tagH3, tagH4, + tagH5, tagH6, tagHr, tagIns, tagIsindex, tagMenu, tagNoframes, tagNoscript, + tagOl, tagP, tagPre, tagTable, tagUl, tagCenter, tagDir, tagIsindex, tagMenu, tagNoframes} - SingleTags* = {tagArea, tagBase, tagBasefont, + SingleTags* = {tagArea, tagBase, tagBasefont, tagBr, tagCol, tagFrame, tagHr, tagImg, tagIsindex, - tagLink, tagMeta, tagParam, tagWbr} - - Entities = [ - ("nbsp", 0x00A0), ("iexcl", 0x00A1), ("cent", 0x00A2), ("pound", 0x00A3), - ("curren", 0x00A4), ("yen", 0x00A5), ("brvbar", 0x00A6), ("sect", 0x00A7), - ("uml", 0x00A8), ("copy", 0x00A9), ("ordf", 0x00AA), ("laquo", 0x00AB), - ("not", 0x00AC), ("shy", 0x00AD), ("reg", 0x00AE), ("macr", 0x00AF), - ("deg", 0x00B0), ("plusmn", 0x00B1), ("sup2", 0x00B2), ("sup3", 0x00B3), - ("acute", 0x00B4), ("micro", 0x00B5), ("para", 0x00B6), ("middot", 0x00B7), - ("cedil", 0x00B8), ("sup1", 0x00B9), ("ordm", 0x00BA), ("raquo", 0x00BB), - ("frac14", 0x00BC), ("frac12", 0x00BD), ("frac34", 0x00BE), - ("iquest", 0x00BF), ("Agrave", 0x00C0), ("Aacute", 0x00C1), - ("Acirc", 0x00C2), ("Atilde", 0x00C3), ("Auml", 0x00C4), ("Aring", 0x00C5), - ("AElig", 0x00C6), ("Ccedil", 0x00C7), ("Egrave", 0x00C8), - ("Eacute", 0x00C9), ("Ecirc", 0x00CA), ("Euml", 0x00CB), ("Igrave", 0x00CC), - ("Iacute", 0x00CD), ("Icirc", 0x00CE), ("Iuml", 0x00CF), ("ETH", 0x00D0), - ("Ntilde", 0x00D1), ("Ograve", 0x00D2), ("Oacute", 0x00D3), - ("Ocirc", 0x00D4), ("Otilde", 0x00D5), ("Ouml", 0x00D6), ("times", 0x00D7), - ("Oslash", 0x00D8), ("Ugrave", 0x00D9), ("Uacute", 0x00DA), - ("Ucirc", 0x00DB), ("Uuml", 0x00DC), ("Yacute", 0x00DD), ("THORN", 0x00DE), - ("szlig", 0x00DF), ("agrave", 0x00E0), ("aacute", 0x00E1), - ("acirc", 0x00E2), ("atilde", 0x00E3), ("auml", 0x00E4), ("aring", 0x00E5), - ("aelig", 0x00E6), ("ccedil", 0x00E7), ("egrave", 0x00E8), - ("eacute", 0x00E9), ("ecirc", 0x00EA), ("euml", 0x00EB), ("igrave", 0x00EC), - ("iacute", 0x00ED), ("icirc", 0x00EE), ("iuml", 0x00EF), ("eth", 0x00F0), - ("ntilde", 0x00F1), ("ograve", 0x00F2), ("oacute", 0x00F3), - ("ocirc", 0x00F4), ("otilde", 0x00F5), ("ouml", 0x00F6), ("divide", 0x00F7), - ("oslash", 0x00F8), ("ugrave", 0x00F9), ("uacute", 0x00FA), - ("ucirc", 0x00FB), ("uuml", 0x00FC), ("yacute", 0x00FD), ("thorn", 0x00FE), - ("yuml", 0x00FF), ("OElig", 0x0152), ("oelig", 0x0153), ("Scaron", 0x0160), - ("scaron", 0x0161), ("Yuml", 0x0178), ("fnof", 0x0192), ("circ", 0x02C6), - ("tilde", 0x02DC), ("Alpha", 0x0391), ("Beta", 0x0392), ("Gamma", 0x0393), - ("Delta", 0x0394), ("Epsilon", 0x0395), ("Zeta", 0x0396), ("Eta", 0x0397), - ("Theta", 0x0398), ("Iota", 0x0399), ("Kappa", 0x039A), ("Lambda", 0x039B), - ("Mu", 0x039C), ("Nu", 0x039D), ("Xi", 0x039E), ("Omicron", 0x039F), - ("Pi", 0x03A0), ("Rho", 0x03A1), ("Sigma", 0x03A3), ("Tau", 0x03A4), - ("Upsilon", 0x03A5), ("Phi", 0x03A6), ("Chi", 0x03A7), ("Psi", 0x03A8), - ("Omega", 0x03A9), ("alpha", 0x03B1), ("beta", 0x03B2), ("gamma", 0x03B3), - ("delta", 0x03B4), ("epsilon", 0x03B5), ("zeta", 0x03B6), ("eta", 0x03B7), - ("theta", 0x03B8), ("iota", 0x03B9), ("kappa", 0x03BA), ("lambda", 0x03BB), - ("mu", 0x03BC), ("nu", 0x03BD), ("xi", 0x03BE), ("omicron", 0x03BF), - ("pi", 0x03C0), ("rho", 0x03C1), ("sigmaf", 0x03C2), ("sigma", 0x03C3), - ("tau", 0x03C4), ("upsilon", 0x03C5), ("phi", 0x03C6), ("chi", 0x03C7), - ("psi", 0x03C8), ("omega", 0x03C9), ("thetasym", 0x03D1), ("upsih", 0x03D2), - ("piv", 0x03D6), ("ensp", 0x2002), ("emsp", 0x2003), ("thinsp", 0x2009), - ("zwnj", 0x200C), ("zwj", 0x200D), ("lrm", 0x200E), ("rlm", 0x200F), - ("ndash", 0x2013), ("mdash", 0x2014), ("lsquo", 0x2018), ("rsquo", 0x2019), - ("sbquo", 0x201A), ("ldquo", 0x201C), ("rdquo", 0x201D), ("bdquo", 0x201E), - ("dagger", 0x2020), ("Dagger", 0x2021), ("bull", 0x2022), - ("hellip", 0x2026), ("permil", 0x2030), ("prime", 0x2032), - ("Prime", 0x2033), ("lsaquo", 0x2039), ("rsaquo", 0x203A), - ("oline", 0x203E), ("frasl", 0x2044), ("euro", 0x20AC), - ("image", 0x2111), ("weierp", 0x2118), ("real", 0x211C), - ("trade", 0x2122), ("alefsym", 0x2135), ("larr", 0x2190), - ("uarr", 0x2191), ("rarr", 0x2192), ("darr", 0x2193), - ("harr", 0x2194), ("crarr", 0x21B5), ("lArr", 0x21D0), - ("uArr", 0x21D1), ("rArr", 0x21D2), ("dArr", 0x21D3), - ("hArr", 0x21D4), ("forall", 0x2200), ("part", 0x2202), - ("exist", 0x2203), ("empty", 0x2205), ("nabla", 0x2207), - ("isin", 0x2208), ("notin", 0x2209), ("ni", 0x220B), - ("prod", 0x220F), ("sum", 0x2211), ("minus", 0x2212), - ("lowast", 0x2217), ("radic", 0x221A), ("prop", 0x221D), - ("infin", 0x221E), ("ang", 0x2220), ("and", 0x2227), - ("or", 0x2228), ("cap", 0x2229), ("cup", 0x222A), - ("int", 0x222B), ("there4", 0x2234), ("sim", 0x223C), - ("cong", 0x2245), ("asymp", 0x2248), ("ne", 0x2260), - ("equiv", 0x2261), ("le", 0x2264), ("ge", 0x2265), - ("sub", 0x2282), ("sup", 0x2283), ("nsub", 0x2284), - ("sube", 0x2286), ("supe", 0x2287), ("oplus", 0x2295), - ("otimes", 0x2297), ("perp", 0x22A5), ("sdot", 0x22C5), - ("lceil", 0x2308), ("rceil", 0x2309), ("lfloor", 0x230A), - ("rfloor", 0x230B), ("lang", 0x2329), ("rang", 0x232A), - ("loz", 0x25CA), ("spades", 0x2660), ("clubs", 0x2663), - ("hearts", 0x2665), ("diams", 0x2666)] + tagLink, tagMeta, tagParam, tagWbr, tagSource} proc allLower(s: string): bool = for c in s: if c < 'a' or c > 'z': return false return true -proc toHtmlTag(s: string): THtmlTag = +proc toHtmlTag(s: string): HtmlTag = case s of "a": tagA of "abbr": tagAbbr @@ -422,39 +352,1563 @@ proc toHtmlTag(s: string): THtmlTag = of "wbr": tagWbr else: tagUnknown -proc htmlTag*(n: XmlNode): THtmlTag = - ## gets `n`'s tag as a ``THtmlTag``. + +proc htmlTag*(n: XmlNode): HtmlTag = + ## Gets `n`'s tag as a `HtmlTag`. if n.clientData == 0: n.clientData = toHtmlTag(n.tag).ord - result = THtmlTag(n.clientData) + result = HtmlTag(n.clientData) -proc htmlTag*(s: string): THtmlTag = - ## converts `s` to a ``THtmlTag``. If `s` is no HTML tag, ``tagUnknown`` is +proc htmlTag*(s: string): HtmlTag = + ## Converts `s` to a `HtmlTag`. If `s` is no HTML tag, `tagUnknown` is ## returned. - let s = if allLower(s): s else: s.toLower + let s = if allLower(s): s else: toLowerAscii(s) result = toHtmlTag(s) -proc entityToUtf8*(entity: string): string = - ## converts an HTML entity name like ``Ü`` to its UTF-8 equivalent. +proc runeToEntity*(rune: Rune): string = + ## converts a Rune to its numeric HTML entity equivalent. + runnableExamples: + import std/unicode + doAssert runeToEntity(Rune(0)) == "" + doAssert runeToEntity(Rune(-1)) == "" + doAssert runeToEntity("Ü".runeAt(0)) == "#220" + doAssert runeToEntity("∈".runeAt(0)) == "#8712" + if rune.ord <= 0: result = "" + else: result = '#' & $rune.ord + +proc entityToRune*(entity: string): Rune = + ## Converts an HTML entity name like `Ü` or values like `Ü` + ## or `Ü` to its UTF-8 equivalent. + ## Rune(0) is returned if the entity name is unknown. + runnableExamples: + import std/unicode + doAssert entityToRune("") == Rune(0) + doAssert entityToRune("a") == Rune(0) + doAssert entityToRune("gt") == ">".runeAt(0) + doAssert entityToRune("Uuml") == "Ü".runeAt(0) + doAssert entityToRune("quest") == "?".runeAt(0) + doAssert entityToRune("#x0003F") == "?".runeAt(0) + if entity.len < 2: return # smallest entity has length 2 + if entity[0] == '#': + var runeValue = 0 + case entity[1] + of '0'..'9': + try: runeValue = parseInt(entity[1..^1]) + except ValueError: discard + of 'x', 'X': # not case sensitive here + try: runeValue = parseHexInt(entity[2..^1]) + except ValueError: discard + else: discard # other entities are not defined with prefix `#` + if runeValue notin 0..0x10FFFF: runeValue = 0 # only return legal values + return Rune(runeValue) + case entity # entity names are case sensitive + of "Tab": Rune(0x00009) + of "NewLine": Rune(0x0000A) + of "excl": Rune(0x00021) + of "quot", "QUOT": Rune(0x00022) + of "num": Rune(0x00023) + of "dollar": Rune(0x00024) + of "percnt": Rune(0x00025) + of "amp", "AMP": Rune(0x00026) + of "apos": Rune(0x00027) + of "lpar": Rune(0x00028) + of "rpar": Rune(0x00029) + of "ast", "midast": Rune(0x0002A) + of "plus": Rune(0x0002B) + of "comma": Rune(0x0002C) + of "period": Rune(0x0002E) + of "sol": Rune(0x0002F) + of "colon": Rune(0x0003A) + of "semi": Rune(0x0003B) + of "lt", "LT": Rune(0x0003C) + of "equals": Rune(0x0003D) + of "gt", "GT": Rune(0x0003E) + of "quest": Rune(0x0003F) + of "commat": Rune(0x00040) + of "lsqb", "lbrack": Rune(0x0005B) + of "bsol": Rune(0x0005C) + of "rsqb", "rbrack": Rune(0x0005D) + of "Hat": Rune(0x0005E) + of "lowbar": Rune(0x0005F) + of "grave", "DiacriticalGrave": Rune(0x00060) + of "lcub", "lbrace": Rune(0x0007B) + of "verbar", "vert", "VerticalLine": Rune(0x0007C) + of "rcub", "rbrace": Rune(0x0007D) + of "nbsp", "NonBreakingSpace": Rune(0x000A0) + of "iexcl": Rune(0x000A1) + of "cent": Rune(0x000A2) + of "pound": Rune(0x000A3) + of "curren": Rune(0x000A4) + of "yen": Rune(0x000A5) + of "brvbar": Rune(0x000A6) + of "sect": Rune(0x000A7) + of "Dot", "die", "DoubleDot", "uml": Rune(0x000A8) + of "copy", "COPY": Rune(0x000A9) + of "ordf": Rune(0x000AA) + of "laquo": Rune(0x000AB) + of "not": Rune(0x000AC) + of "shy": Rune(0x000AD) + of "reg", "circledR", "REG": Rune(0x000AE) + of "macr", "OverBar", "strns": Rune(0x000AF) + of "deg": Rune(0x000B0) + of "plusmn", "pm", "PlusMinus": Rune(0x000B1) + of "sup2": Rune(0x000B2) + of "sup3": Rune(0x000B3) + of "acute", "DiacriticalAcute": Rune(0x000B4) + of "micro": Rune(0x000B5) + of "para": Rune(0x000B6) + of "middot", "centerdot", "CenterDot": Rune(0x000B7) + of "cedil", "Cedilla": Rune(0x000B8) + of "sup1": Rune(0x000B9) + of "ordm": Rune(0x000BA) + of "raquo": Rune(0x000BB) + of "frac14": Rune(0x000BC) + of "frac12", "half": Rune(0x000BD) + of "frac34": Rune(0x000BE) + of "iquest": Rune(0x000BF) + of "Agrave": Rune(0x000C0) + of "Aacute": Rune(0x000C1) + of "Acirc": Rune(0x000C2) + of "Atilde": Rune(0x000C3) + of "Auml": Rune(0x000C4) + of "Aring": Rune(0x000C5) + of "AElig": Rune(0x000C6) + of "Ccedil": Rune(0x000C7) + of "Egrave": Rune(0x000C8) + of "Eacute": Rune(0x000C9) + of "Ecirc": Rune(0x000CA) + of "Euml": Rune(0x000CB) + of "Igrave": Rune(0x000CC) + of "Iacute": Rune(0x000CD) + of "Icirc": Rune(0x000CE) + of "Iuml": Rune(0x000CF) + of "ETH": Rune(0x000D0) + of "Ntilde": Rune(0x000D1) + of "Ograve": Rune(0x000D2) + of "Oacute": Rune(0x000D3) + of "Ocirc": Rune(0x000D4) + of "Otilde": Rune(0x000D5) + of "Ouml": Rune(0x000D6) + of "times": Rune(0x000D7) + of "Oslash": Rune(0x000D8) + of "Ugrave": Rune(0x000D9) + of "Uacute": Rune(0x000DA) + of "Ucirc": Rune(0x000DB) + of "Uuml": Rune(0x000DC) + of "Yacute": Rune(0x000DD) + of "THORN": Rune(0x000DE) + of "szlig": Rune(0x000DF) + of "agrave": Rune(0x000E0) + of "aacute": Rune(0x000E1) + of "acirc": Rune(0x000E2) + of "atilde": Rune(0x000E3) + of "auml": Rune(0x000E4) + of "aring": Rune(0x000E5) + of "aelig": Rune(0x000E6) + of "ccedil": Rune(0x000E7) + of "egrave": Rune(0x000E8) + of "eacute": Rune(0x000E9) + of "ecirc": Rune(0x000EA) + of "euml": Rune(0x000EB) + of "igrave": Rune(0x000EC) + of "iacute": Rune(0x000ED) + of "icirc": Rune(0x000EE) + of "iuml": Rune(0x000EF) + of "eth": Rune(0x000F0) + of "ntilde": Rune(0x000F1) + of "ograve": Rune(0x000F2) + of "oacute": Rune(0x000F3) + of "ocirc": Rune(0x000F4) + of "otilde": Rune(0x000F5) + of "ouml": Rune(0x000F6) + of "divide", "div": Rune(0x000F7) + of "oslash": Rune(0x000F8) + of "ugrave": Rune(0x000F9) + of "uacute": Rune(0x000FA) + of "ucirc": Rune(0x000FB) + of "uuml": Rune(0x000FC) + of "yacute": Rune(0x000FD) + of "thorn": Rune(0x000FE) + of "yuml": Rune(0x000FF) + of "Amacr": Rune(0x00100) + of "amacr": Rune(0x00101) + of "Abreve": Rune(0x00102) + of "abreve": Rune(0x00103) + of "Aogon": Rune(0x00104) + of "aogon": Rune(0x00105) + of "Cacute": Rune(0x00106) + of "cacute": Rune(0x00107) + of "Ccirc": Rune(0x00108) + of "ccirc": Rune(0x00109) + of "Cdot": Rune(0x0010A) + of "cdot": Rune(0x0010B) + of "Ccaron": Rune(0x0010C) + of "ccaron": Rune(0x0010D) + of "Dcaron": Rune(0x0010E) + of "dcaron": Rune(0x0010F) + of "Dstrok": Rune(0x00110) + of "dstrok": Rune(0x00111) + of "Emacr": Rune(0x00112) + of "emacr": Rune(0x00113) + of "Edot": Rune(0x00116) + of "edot": Rune(0x00117) + of "Eogon": Rune(0x00118) + of "eogon": Rune(0x00119) + of "Ecaron": Rune(0x0011A) + of "ecaron": Rune(0x0011B) + of "Gcirc": Rune(0x0011C) + of "gcirc": Rune(0x0011D) + of "Gbreve": Rune(0x0011E) + of "gbreve": Rune(0x0011F) + of "Gdot": Rune(0x00120) + of "gdot": Rune(0x00121) + of "Gcedil": Rune(0x00122) + of "Hcirc": Rune(0x00124) + of "hcirc": Rune(0x00125) + of "Hstrok": Rune(0x00126) + of "hstrok": Rune(0x00127) + of "Itilde": Rune(0x00128) + of "itilde": Rune(0x00129) + of "Imacr": Rune(0x0012A) + of "imacr": Rune(0x0012B) + of "Iogon": Rune(0x0012E) + of "iogon": Rune(0x0012F) + of "Idot": Rune(0x00130) + of "imath", "inodot": Rune(0x00131) + of "IJlig": Rune(0x00132) + of "ijlig": Rune(0x00133) + of "Jcirc": Rune(0x00134) + of "jcirc": Rune(0x00135) + of "Kcedil": Rune(0x00136) + of "kcedil": Rune(0x00137) + of "kgreen": Rune(0x00138) + of "Lacute": Rune(0x00139) + of "lacute": Rune(0x0013A) + of "Lcedil": Rune(0x0013B) + of "lcedil": Rune(0x0013C) + of "Lcaron": Rune(0x0013D) + of "lcaron": Rune(0x0013E) + of "Lmidot": Rune(0x0013F) + of "lmidot": Rune(0x00140) + of "Lstrok": Rune(0x00141) + of "lstrok": Rune(0x00142) + of "Nacute": Rune(0x00143) + of "nacute": Rune(0x00144) + of "Ncedil": Rune(0x00145) + of "ncedil": Rune(0x00146) + of "Ncaron": Rune(0x00147) + of "ncaron": Rune(0x00148) + of "napos": Rune(0x00149) + of "ENG": Rune(0x0014A) + of "eng": Rune(0x0014B) + of "Omacr": Rune(0x0014C) + of "omacr": Rune(0x0014D) + of "Odblac": Rune(0x00150) + of "odblac": Rune(0x00151) + of "OElig": Rune(0x00152) + of "oelig": Rune(0x00153) + of "Racute": Rune(0x00154) + of "racute": Rune(0x00155) + of "Rcedil": Rune(0x00156) + of "rcedil": Rune(0x00157) + of "Rcaron": Rune(0x00158) + of "rcaron": Rune(0x00159) + of "Sacute": Rune(0x0015A) + of "sacute": Rune(0x0015B) + of "Scirc": Rune(0x0015C) + of "scirc": Rune(0x0015D) + of "Scedil": Rune(0x0015E) + of "scedil": Rune(0x0015F) + of "Scaron": Rune(0x00160) + of "scaron": Rune(0x00161) + of "Tcedil": Rune(0x00162) + of "tcedil": Rune(0x00163) + of "Tcaron": Rune(0x00164) + of "tcaron": Rune(0x00165) + of "Tstrok": Rune(0x00166) + of "tstrok": Rune(0x00167) + of "Utilde": Rune(0x00168) + of "utilde": Rune(0x00169) + of "Umacr": Rune(0x0016A) + of "umacr": Rune(0x0016B) + of "Ubreve": Rune(0x0016C) + of "ubreve": Rune(0x0016D) + of "Uring": Rune(0x0016E) + of "uring": Rune(0x0016F) + of "Udblac": Rune(0x00170) + of "udblac": Rune(0x00171) + of "Uogon": Rune(0x00172) + of "uogon": Rune(0x00173) + of "Wcirc": Rune(0x00174) + of "wcirc": Rune(0x00175) + of "Ycirc": Rune(0x00176) + of "ycirc": Rune(0x00177) + of "Yuml": Rune(0x00178) + of "Zacute": Rune(0x00179) + of "zacute": Rune(0x0017A) + of "Zdot": Rune(0x0017B) + of "zdot": Rune(0x0017C) + of "Zcaron": Rune(0x0017D) + of "zcaron": Rune(0x0017E) + of "fnof": Rune(0x00192) + of "imped": Rune(0x001B5) + of "gacute": Rune(0x001F5) + of "jmath": Rune(0x00237) + of "circ": Rune(0x002C6) + of "caron", "Hacek": Rune(0x002C7) + of "breve", "Breve": Rune(0x002D8) + of "dot", "DiacriticalDot": Rune(0x002D9) + of "ring": Rune(0x002DA) + of "ogon": Rune(0x002DB) + of "tilde", "DiacriticalTilde": Rune(0x002DC) + of "dblac", "DiacriticalDoubleAcute": Rune(0x002DD) + of "DownBreve": Rune(0x00311) + of "UnderBar": Rune(0x00332) + of "Alpha": Rune(0x00391) + of "Beta": Rune(0x00392) + of "Gamma": Rune(0x00393) + of "Delta": Rune(0x00394) + of "Epsilon": Rune(0x00395) + of "Zeta": Rune(0x00396) + of "Eta": Rune(0x00397) + of "Theta": Rune(0x00398) + of "Iota": Rune(0x00399) + of "Kappa": Rune(0x0039A) + of "Lambda": Rune(0x0039B) + of "Mu": Rune(0x0039C) + of "Nu": Rune(0x0039D) + of "Xi": Rune(0x0039E) + of "Omicron": Rune(0x0039F) + of "Pi": Rune(0x003A0) + of "Rho": Rune(0x003A1) + of "Sigma": Rune(0x003A3) + of "Tau": Rune(0x003A4) + of "Upsilon": Rune(0x003A5) + of "Phi": Rune(0x003A6) + of "Chi": Rune(0x003A7) + of "Psi": Rune(0x003A8) + of "Omega": Rune(0x003A9) + of "alpha": Rune(0x003B1) + of "beta": Rune(0x003B2) + of "gamma": Rune(0x003B3) + of "delta": Rune(0x003B4) + of "epsiv", "varepsilon", "epsilon": Rune(0x003B5) + of "zeta": Rune(0x003B6) + of "eta": Rune(0x003B7) + of "theta": Rune(0x003B8) + of "iota": Rune(0x003B9) + of "kappa": Rune(0x003BA) + of "lambda": Rune(0x003BB) + of "mu": Rune(0x003BC) + of "nu": Rune(0x003BD) + of "xi": Rune(0x003BE) + of "omicron": Rune(0x003BF) + of "pi": Rune(0x003C0) + of "rho": Rune(0x003C1) + of "sigmav", "varsigma", "sigmaf": Rune(0x003C2) + of "sigma": Rune(0x003C3) + of "tau": Rune(0x003C4) + of "upsi", "upsilon": Rune(0x003C5) + of "phi", "phiv", "varphi": Rune(0x003C6) + of "chi": Rune(0x003C7) + of "psi": Rune(0x003C8) + of "omega": Rune(0x003C9) + of "thetav", "vartheta", "thetasym": Rune(0x003D1) + of "Upsi", "upsih": Rune(0x003D2) + of "straightphi": Rune(0x003D5) + of "piv", "varpi": Rune(0x003D6) + of "Gammad": Rune(0x003DC) + of "gammad", "digamma": Rune(0x003DD) + of "kappav", "varkappa": Rune(0x003F0) + of "rhov", "varrho": Rune(0x003F1) + of "epsi", "straightepsilon": Rune(0x003F5) + of "bepsi", "backepsilon": Rune(0x003F6) + of "IOcy": Rune(0x00401) + of "DJcy": Rune(0x00402) + of "GJcy": Rune(0x00403) + of "Jukcy": Rune(0x00404) + of "DScy": Rune(0x00405) + of "Iukcy": Rune(0x00406) + of "YIcy": Rune(0x00407) + of "Jsercy": Rune(0x00408) + of "LJcy": Rune(0x00409) + of "NJcy": Rune(0x0040A) + of "TSHcy": Rune(0x0040B) + of "KJcy": Rune(0x0040C) + of "Ubrcy": Rune(0x0040E) + of "DZcy": Rune(0x0040F) + of "Acy": Rune(0x00410) + of "Bcy": Rune(0x00411) + of "Vcy": Rune(0x00412) + of "Gcy": Rune(0x00413) + of "Dcy": Rune(0x00414) + of "IEcy": Rune(0x00415) + of "ZHcy": Rune(0x00416) + of "Zcy": Rune(0x00417) + of "Icy": Rune(0x00418) + of "Jcy": Rune(0x00419) + of "Kcy": Rune(0x0041A) + of "Lcy": Rune(0x0041B) + of "Mcy": Rune(0x0041C) + of "Ncy": Rune(0x0041D) + of "Ocy": Rune(0x0041E) + of "Pcy": Rune(0x0041F) + of "Rcy": Rune(0x00420) + of "Scy": Rune(0x00421) + of "Tcy": Rune(0x00422) + of "Ucy": Rune(0x00423) + of "Fcy": Rune(0x00424) + of "KHcy": Rune(0x00425) + of "TScy": Rune(0x00426) + of "CHcy": Rune(0x00427) + of "SHcy": Rune(0x00428) + of "SHCHcy": Rune(0x00429) + of "HARDcy": Rune(0x0042A) + of "Ycy": Rune(0x0042B) + of "SOFTcy": Rune(0x0042C) + of "Ecy": Rune(0x0042D) + of "YUcy": Rune(0x0042E) + of "YAcy": Rune(0x0042F) + of "acy": Rune(0x00430) + of "bcy": Rune(0x00431) + of "vcy": Rune(0x00432) + of "gcy": Rune(0x00433) + of "dcy": Rune(0x00434) + of "iecy": Rune(0x00435) + of "zhcy": Rune(0x00436) + of "zcy": Rune(0x00437) + of "icy": Rune(0x00438) + of "jcy": Rune(0x00439) + of "kcy": Rune(0x0043A) + of "lcy": Rune(0x0043B) + of "mcy": Rune(0x0043C) + of "ncy": Rune(0x0043D) + of "ocy": Rune(0x0043E) + of "pcy": Rune(0x0043F) + of "rcy": Rune(0x00440) + of "scy": Rune(0x00441) + of "tcy": Rune(0x00442) + of "ucy": Rune(0x00443) + of "fcy": Rune(0x00444) + of "khcy": Rune(0x00445) + of "tscy": Rune(0x00446) + of "chcy": Rune(0x00447) + of "shcy": Rune(0x00448) + of "shchcy": Rune(0x00449) + of "hardcy": Rune(0x0044A) + of "ycy": Rune(0x0044B) + of "softcy": Rune(0x0044C) + of "ecy": Rune(0x0044D) + of "yucy": Rune(0x0044E) + of "yacy": Rune(0x0044F) + of "iocy": Rune(0x00451) + of "djcy": Rune(0x00452) + of "gjcy": Rune(0x00453) + of "jukcy": Rune(0x00454) + of "dscy": Rune(0x00455) + of "iukcy": Rune(0x00456) + of "yicy": Rune(0x00457) + of "jsercy": Rune(0x00458) + of "ljcy": Rune(0x00459) + of "njcy": Rune(0x0045A) + of "tshcy": Rune(0x0045B) + of "kjcy": Rune(0x0045C) + of "ubrcy": Rune(0x0045E) + of "dzcy": Rune(0x0045F) + of "ensp": Rune(0x02002) + of "emsp": Rune(0x02003) + of "emsp13": Rune(0x02004) + of "emsp14": Rune(0x02005) + of "numsp": Rune(0x02007) + of "puncsp": Rune(0x02008) + of "thinsp", "ThinSpace": Rune(0x02009) + of "hairsp", "VeryThinSpace": Rune(0x0200A) + of "ZeroWidthSpace", "NegativeVeryThinSpace", "NegativeThinSpace", + "NegativeMediumSpace", "NegativeThickSpace": Rune(0x0200B) + of "zwnj": Rune(0x0200C) + of "zwj": Rune(0x0200D) + of "lrm": Rune(0x0200E) + of "rlm": Rune(0x0200F) + of "hyphen", "dash": Rune(0x02010) + of "ndash": Rune(0x02013) + of "mdash": Rune(0x02014) + of "horbar": Rune(0x02015) + of "Verbar", "Vert": Rune(0x02016) + of "lsquo", "OpenCurlyQuote": Rune(0x02018) + of "rsquo", "rsquor", "CloseCurlyQuote": Rune(0x02019) + of "lsquor", "sbquo": Rune(0x0201A) + of "ldquo", "OpenCurlyDoubleQuote": Rune(0x0201C) + of "rdquo", "rdquor", "CloseCurlyDoubleQuote": Rune(0x0201D) + of "ldquor", "bdquo": Rune(0x0201E) + of "dagger": Rune(0x02020) + of "Dagger", "ddagger": Rune(0x02021) + of "bull", "bullet": Rune(0x02022) + of "nldr": Rune(0x02025) + of "hellip", "mldr": Rune(0x02026) + of "permil": Rune(0x02030) + of "pertenk": Rune(0x02031) + of "prime": Rune(0x02032) + of "Prime": Rune(0x02033) + of "tprime": Rune(0x02034) + of "bprime", "backprime": Rune(0x02035) + of "lsaquo": Rune(0x02039) + of "rsaquo": Rune(0x0203A) + of "oline": Rune(0x0203E) + of "caret": Rune(0x02041) + of "hybull": Rune(0x02043) + of "frasl": Rune(0x02044) + of "bsemi": Rune(0x0204F) + of "qprime": Rune(0x02057) + of "MediumSpace": Rune(0x0205F) + of "NoBreak": Rune(0x02060) + of "ApplyFunction", "af": Rune(0x02061) + of "InvisibleTimes", "it": Rune(0x02062) + of "InvisibleComma", "ic": Rune(0x02063) + of "euro": Rune(0x020AC) + of "tdot", "TripleDot": Rune(0x020DB) + of "DotDot": Rune(0x020DC) + of "Copf", "complexes": Rune(0x02102) + of "incare": Rune(0x02105) + of "gscr": Rune(0x0210A) + of "hamilt", "HilbertSpace", "Hscr": Rune(0x0210B) + of "Hfr", "Poincareplane": Rune(0x0210C) + of "quaternions", "Hopf": Rune(0x0210D) + of "planckh": Rune(0x0210E) + of "planck", "hbar", "plankv", "hslash": Rune(0x0210F) + of "Iscr", "imagline": Rune(0x02110) + of "image", "Im", "imagpart", "Ifr": Rune(0x02111) + of "Lscr", "lagran", "Laplacetrf": Rune(0x02112) + of "ell": Rune(0x02113) + of "Nopf", "naturals": Rune(0x02115) + of "numero": Rune(0x02116) + of "copysr": Rune(0x02117) + of "weierp", "wp": Rune(0x02118) + of "Popf", "primes": Rune(0x02119) + of "rationals", "Qopf": Rune(0x0211A) + of "Rscr", "realine": Rune(0x0211B) + of "real", "Re", "realpart", "Rfr": Rune(0x0211C) + of "reals", "Ropf": Rune(0x0211D) + of "rx": Rune(0x0211E) + of "trade", "TRADE": Rune(0x02122) + of "integers", "Zopf": Rune(0x02124) + of "ohm": Rune(0x02126) + of "mho": Rune(0x02127) + of "Zfr", "zeetrf": Rune(0x02128) + of "iiota": Rune(0x02129) + of "angst": Rune(0x0212B) + of "bernou", "Bernoullis", "Bscr": Rune(0x0212C) + of "Cfr", "Cayleys": Rune(0x0212D) + of "escr": Rune(0x0212F) + of "Escr", "expectation": Rune(0x02130) + of "Fscr", "Fouriertrf": Rune(0x02131) + of "phmmat", "Mellintrf", "Mscr": Rune(0x02133) + of "order", "orderof", "oscr": Rune(0x02134) + of "alefsym", "aleph": Rune(0x02135) + of "beth": Rune(0x02136) + of "gimel": Rune(0x02137) + of "daleth": Rune(0x02138) + of "CapitalDifferentialD", "DD": Rune(0x02145) + of "DifferentialD", "dd": Rune(0x02146) + of "ExponentialE", "exponentiale", "ee": Rune(0x02147) + of "ImaginaryI", "ii": Rune(0x02148) + of "frac13": Rune(0x02153) + of "frac23": Rune(0x02154) + of "frac15": Rune(0x02155) + of "frac25": Rune(0x02156) + of "frac35": Rune(0x02157) + of "frac45": Rune(0x02158) + of "frac16": Rune(0x02159) + of "frac56": Rune(0x0215A) + of "frac18": Rune(0x0215B) + of "frac38": Rune(0x0215C) + of "frac58": Rune(0x0215D) + of "frac78": Rune(0x0215E) + of "larr", "leftarrow", "LeftArrow", "slarr", + "ShortLeftArrow": Rune(0x02190) + of "uarr", "uparrow", "UpArrow", "ShortUpArrow": Rune(0x02191) + of "rarr", "rightarrow", "RightArrow", "srarr", + "ShortRightArrow": Rune(0x02192) + of "darr", "downarrow", "DownArrow", + "ShortDownArrow": Rune(0x02193) + of "harr", "leftrightarrow", "LeftRightArrow": Rune(0x02194) + of "varr", "updownarrow", "UpDownArrow": Rune(0x02195) + of "nwarr", "UpperLeftArrow", "nwarrow": Rune(0x02196) + of "nearr", "UpperRightArrow", "nearrow": Rune(0x02197) + of "searr", "searrow", "LowerRightArrow": Rune(0x02198) + of "swarr", "swarrow", "LowerLeftArrow": Rune(0x02199) + of "nlarr", "nleftarrow": Rune(0x0219A) + of "nrarr", "nrightarrow": Rune(0x0219B) + of "rarrw", "rightsquigarrow": Rune(0x0219D) + of "Larr", "twoheadleftarrow": Rune(0x0219E) + of "Uarr": Rune(0x0219F) + of "Rarr", "twoheadrightarrow": Rune(0x021A0) + of "Darr": Rune(0x021A1) + of "larrtl", "leftarrowtail": Rune(0x021A2) + of "rarrtl", "rightarrowtail": Rune(0x021A3) + of "LeftTeeArrow", "mapstoleft": Rune(0x021A4) + of "UpTeeArrow", "mapstoup": Rune(0x021A5) + of "map", "RightTeeArrow", "mapsto": Rune(0x021A6) + of "DownTeeArrow", "mapstodown": Rune(0x021A7) + of "larrhk", "hookleftarrow": Rune(0x021A9) + of "rarrhk", "hookrightarrow": Rune(0x021AA) + of "larrlp", "looparrowleft": Rune(0x021AB) + of "rarrlp", "looparrowright": Rune(0x021AC) + of "harrw", "leftrightsquigarrow": Rune(0x021AD) + of "nharr", "nleftrightarrow": Rune(0x021AE) + of "lsh", "Lsh": Rune(0x021B0) + of "rsh", "Rsh": Rune(0x021B1) + of "ldsh": Rune(0x021B2) + of "rdsh": Rune(0x021B3) + of "crarr": Rune(0x021B5) + of "cularr", "curvearrowleft": Rune(0x021B6) + of "curarr", "curvearrowright": Rune(0x021B7) + of "olarr", "circlearrowleft": Rune(0x021BA) + of "orarr", "circlearrowright": Rune(0x021BB) + of "lharu", "LeftVector", "leftharpoonup": Rune(0x021BC) + of "lhard", "leftharpoondown", "DownLeftVector": Rune(0x021BD) + of "uharr", "upharpoonright", "RightUpVector": Rune(0x021BE) + of "uharl", "upharpoonleft", "LeftUpVector": Rune(0x021BF) + of "rharu", "RightVector", "rightharpoonup": Rune(0x021C0) + of "rhard", "rightharpoondown", "DownRightVector": Rune(0x021C1) + of "dharr", "RightDownVector", "downharpoonright": Rune(0x021C2) + of "dharl", "LeftDownVector", "downharpoonleft": Rune(0x021C3) + of "rlarr", "rightleftarrows", "RightArrowLeftArrow": Rune(0x021C4) + of "udarr", "UpArrowDownArrow": Rune(0x021C5) + of "lrarr", "leftrightarrows", "LeftArrowRightArrow": Rune(0x021C6) + of "llarr", "leftleftarrows": Rune(0x021C7) + of "uuarr", "upuparrows": Rune(0x021C8) + of "rrarr", "rightrightarrows": Rune(0x021C9) + of "ddarr", "downdownarrows": Rune(0x021CA) + of "lrhar", "ReverseEquilibrium", + "leftrightharpoons": Rune(0x021CB) + of "rlhar", "rightleftharpoons", "Equilibrium": Rune(0x021CC) + of "nlArr", "nLeftarrow": Rune(0x021CD) + of "nhArr", "nLeftrightarrow": Rune(0x021CE) + of "nrArr", "nRightarrow": Rune(0x021CF) + of "lArr", "Leftarrow", "DoubleLeftArrow": Rune(0x021D0) + of "uArr", "Uparrow", "DoubleUpArrow": Rune(0x021D1) + of "rArr", "Rightarrow", "Implies", + "DoubleRightArrow": Rune(0x021D2) + of "dArr", "Downarrow", "DoubleDownArrow": Rune(0x021D3) + of "hArr", "Leftrightarrow", "DoubleLeftRightArrow", + "iff": Rune(0x021D4) + of "vArr", "Updownarrow", "DoubleUpDownArrow": Rune(0x021D5) + of "nwArr": Rune(0x021D6) + of "neArr": Rune(0x021D7) + of "seArr": Rune(0x021D8) + of "swArr": Rune(0x021D9) + of "lAarr", "Lleftarrow": Rune(0x021DA) + of "rAarr", "Rrightarrow": Rune(0x021DB) + of "zigrarr": Rune(0x021DD) + of "larrb", "LeftArrowBar": Rune(0x021E4) + of "rarrb", "RightArrowBar": Rune(0x021E5) + of "duarr", "DownArrowUpArrow": Rune(0x021F5) + of "loarr": Rune(0x021FD) + of "roarr": Rune(0x021FE) + of "hoarr": Rune(0x021FF) + of "forall", "ForAll": Rune(0x02200) + of "comp", "complement": Rune(0x02201) + of "part", "PartialD": Rune(0x02202) + of "exist", "Exists": Rune(0x02203) + of "nexist", "NotExists", "nexists": Rune(0x02204) + of "empty", "emptyset", "emptyv", "varnothing": Rune(0x02205) + of "nabla", "Del": Rune(0x02207) + of "isin", "isinv", "Element", "in": Rune(0x02208) + of "notin", "NotElement", "notinva": Rune(0x02209) + of "niv", "ReverseElement", "ni", "SuchThat": Rune(0x0220B) + of "notni", "notniva", "NotReverseElement": Rune(0x0220C) + of "prod", "Product": Rune(0x0220F) + of "coprod", "Coproduct": Rune(0x02210) + of "sum", "Sum": Rune(0x02211) + of "minus": Rune(0x02212) + of "mnplus", "mp", "MinusPlus": Rune(0x02213) + of "plusdo", "dotplus": Rune(0x02214) + of "setmn", "setminus", "Backslash", "ssetmn", + "smallsetminus": Rune(0x02216) + of "lowast": Rune(0x02217) + of "compfn", "SmallCircle": Rune(0x02218) + of "radic", "Sqrt": Rune(0x0221A) + of "prop", "propto", "Proportional", "vprop", + "varpropto": Rune(0x0221D) + of "infin": Rune(0x0221E) + of "angrt": Rune(0x0221F) + of "ang", "angle": Rune(0x02220) + of "angmsd", "measuredangle": Rune(0x02221) + of "angsph": Rune(0x02222) + of "mid", "VerticalBar", "smid", "shortmid": Rune(0x02223) + of "nmid", "NotVerticalBar", "nsmid", "nshortmid": Rune(0x02224) + of "par", "parallel", "DoubleVerticalBar", "spar", + "shortparallel": Rune(0x02225) + of "npar", "nparallel", "NotDoubleVerticalBar", "nspar", + "nshortparallel": Rune(0x02226) + of "and", "wedge": Rune(0x02227) + of "or", "vee": Rune(0x02228) + of "cap": Rune(0x02229) + of "cup": Rune(0x0222A) + of "int", "Integral": Rune(0x0222B) + of "Int": Rune(0x0222C) + of "tint", "iiint": Rune(0x0222D) + of "conint", "oint", "ContourIntegral": Rune(0x0222E) + of "Conint", "DoubleContourIntegral": Rune(0x0222F) + of "Cconint": Rune(0x02230) + of "cwint": Rune(0x02231) + of "cwconint", "ClockwiseContourIntegral": Rune(0x02232) + of "awconint", "CounterClockwiseContourIntegral": Rune(0x02233) + of "there4", "therefore", "Therefore": Rune(0x02234) + of "becaus", "because", "Because": Rune(0x02235) + of "ratio": Rune(0x02236) + of "Colon", "Proportion": Rune(0x02237) + of "minusd", "dotminus": Rune(0x02238) + of "mDDot": Rune(0x0223A) + of "homtht": Rune(0x0223B) + of "sim", "Tilde", "thksim", "thicksim": Rune(0x0223C) + of "bsim", "backsim": Rune(0x0223D) + of "ac", "mstpos": Rune(0x0223E) + of "acd": Rune(0x0223F) + of "wreath", "VerticalTilde", "wr": Rune(0x02240) + of "nsim", "NotTilde": Rune(0x02241) + of "esim", "EqualTilde", "eqsim": Rune(0x02242) + of "sime", "TildeEqual", "simeq": Rune(0x02243) + of "nsime", "nsimeq", "NotTildeEqual": Rune(0x02244) + of "cong", "TildeFullEqual": Rune(0x02245) + of "simne": Rune(0x02246) + of "ncong", "NotTildeFullEqual": Rune(0x02247) + of "asymp", "ap", "TildeTilde", "approx", "thkap", + "thickapprox": Rune(0x02248) + of "nap", "NotTildeTilde", "napprox": Rune(0x02249) + of "ape", "approxeq": Rune(0x0224A) + of "apid": Rune(0x0224B) + of "bcong", "backcong": Rune(0x0224C) + of "asympeq", "CupCap": Rune(0x0224D) + of "bump", "HumpDownHump", "Bumpeq": Rune(0x0224E) + of "bumpe", "HumpEqual", "bumpeq": Rune(0x0224F) + of "esdot", "DotEqual", "doteq": Rune(0x02250) + of "eDot", "doteqdot": Rune(0x02251) + of "efDot", "fallingdotseq": Rune(0x02252) + of "erDot", "risingdotseq": Rune(0x02253) + of "colone", "coloneq", "Assign": Rune(0x02254) + of "ecolon", "eqcolon": Rune(0x02255) + of "ecir", "eqcirc": Rune(0x02256) + of "cire", "circeq": Rune(0x02257) + of "wedgeq": Rune(0x02259) + of "veeeq": Rune(0x0225A) + of "trie", "triangleq": Rune(0x0225C) + of "equest", "questeq": Rune(0x0225F) + of "ne", "NotEqual": Rune(0x02260) + of "equiv", "Congruent": Rune(0x02261) + of "nequiv", "NotCongruent": Rune(0x02262) + of "le", "leq": Rune(0x02264) + of "ge", "GreaterEqual", "geq": Rune(0x02265) + of "lE", "LessFullEqual", "leqq": Rune(0x02266) + of "gE", "GreaterFullEqual", "geqq": Rune(0x02267) + of "lnE", "lneqq": Rune(0x02268) + of "gnE", "gneqq": Rune(0x02269) + of "Lt", "NestedLessLess", "ll": Rune(0x0226A) + of "Gt", "NestedGreaterGreater", "gg": Rune(0x0226B) + of "twixt", "between": Rune(0x0226C) + of "NotCupCap": Rune(0x0226D) + of "nlt", "NotLess", "nless": Rune(0x0226E) + of "ngt", "NotGreater", "ngtr": Rune(0x0226F) + of "nle", "NotLessEqual", "nleq": Rune(0x02270) + of "nge", "NotGreaterEqual", "ngeq": Rune(0x02271) + of "lsim", "LessTilde", "lesssim": Rune(0x02272) + of "gsim", "gtrsim", "GreaterTilde": Rune(0x02273) + of "nlsim", "NotLessTilde": Rune(0x02274) + of "ngsim", "NotGreaterTilde": Rune(0x02275) + of "lg", "lessgtr", "LessGreater": Rune(0x02276) + of "gl", "gtrless", "GreaterLess": Rune(0x02277) + of "ntlg", "NotLessGreater": Rune(0x02278) + of "ntgl", "NotGreaterLess": Rune(0x02279) + of "pr", "Precedes", "prec": Rune(0x0227A) + of "sc", "Succeeds", "succ": Rune(0x0227B) + of "prcue", "PrecedesSlantEqual", "preccurlyeq": Rune(0x0227C) + of "sccue", "SucceedsSlantEqual", "succcurlyeq": Rune(0x0227D) + of "prsim", "precsim", "PrecedesTilde": Rune(0x0227E) + of "scsim", "succsim", "SucceedsTilde": Rune(0x0227F) + of "npr", "nprec", "NotPrecedes": Rune(0x02280) + of "nsc", "nsucc", "NotSucceeds": Rune(0x02281) + of "sub", "subset": Rune(0x02282) + of "sup", "supset", "Superset": Rune(0x02283) + of "nsub": Rune(0x02284) + of "nsup": Rune(0x02285) + of "sube", "SubsetEqual", "subseteq": Rune(0x02286) + of "supe", "supseteq", "SupersetEqual": Rune(0x02287) + of "nsube", "nsubseteq", "NotSubsetEqual": Rune(0x02288) + of "nsupe", "nsupseteq", "NotSupersetEqual": Rune(0x02289) + of "subne", "subsetneq": Rune(0x0228A) + of "supne", "supsetneq": Rune(0x0228B) + of "cupdot": Rune(0x0228D) + of "uplus", "UnionPlus": Rune(0x0228E) + of "sqsub", "SquareSubset", "sqsubset": Rune(0x0228F) + of "sqsup", "SquareSuperset", "sqsupset": Rune(0x02290) + of "sqsube", "SquareSubsetEqual", "sqsubseteq": Rune(0x02291) + of "sqsupe", "SquareSupersetEqual", "sqsupseteq": Rune(0x02292) + of "sqcap", "SquareIntersection": Rune(0x02293) + of "sqcup", "SquareUnion": Rune(0x02294) + of "oplus", "CirclePlus": Rune(0x02295) + of "ominus", "CircleMinus": Rune(0x02296) + of "otimes", "CircleTimes": Rune(0x02297) + of "osol": Rune(0x02298) + of "odot", "CircleDot": Rune(0x02299) + of "ocir", "circledcirc": Rune(0x0229A) + of "oast", "circledast": Rune(0x0229B) + of "odash", "circleddash": Rune(0x0229D) + of "plusb", "boxplus": Rune(0x0229E) + of "minusb", "boxminus": Rune(0x0229F) + of "timesb", "boxtimes": Rune(0x022A0) + of "sdotb", "dotsquare": Rune(0x022A1) + of "vdash", "RightTee": Rune(0x022A2) + of "dashv", "LeftTee": Rune(0x022A3) + of "top", "DownTee": Rune(0x022A4) + of "bottom", "bot", "perp", "UpTee": Rune(0x022A5) + of "models": Rune(0x022A7) + of "vDash", "DoubleRightTee": Rune(0x022A8) + of "Vdash": Rune(0x022A9) + of "Vvdash": Rune(0x022AA) + of "VDash": Rune(0x022AB) + of "nvdash": Rune(0x022AC) + of "nvDash": Rune(0x022AD) + of "nVdash": Rune(0x022AE) + of "nVDash": Rune(0x022AF) + of "prurel": Rune(0x022B0) + of "vltri", "vartriangleleft", "LeftTriangle": Rune(0x022B2) + of "vrtri", "vartriangleright", "RightTriangle": Rune(0x022B3) + of "ltrie", "trianglelefteq", "LeftTriangleEqual": Rune(0x022B4) + of "rtrie", "trianglerighteq", "RightTriangleEqual": Rune(0x022B5) + of "origof": Rune(0x022B6) + of "imof": Rune(0x022B7) + of "mumap", "multimap": Rune(0x022B8) + of "hercon": Rune(0x022B9) + of "intcal", "intercal": Rune(0x022BA) + of "veebar": Rune(0x022BB) + of "barvee": Rune(0x022BD) + of "angrtvb": Rune(0x022BE) + of "lrtri": Rune(0x022BF) + of "xwedge", "Wedge", "bigwedge": Rune(0x022C0) + of "xvee", "Vee", "bigvee": Rune(0x022C1) + of "xcap", "Intersection", "bigcap": Rune(0x022C2) + of "xcup", "Union", "bigcup": Rune(0x022C3) + of "diam", "diamond", "Diamond": Rune(0x022C4) + of "sdot": Rune(0x022C5) + of "sstarf", "Star": Rune(0x022C6) + of "divonx", "divideontimes": Rune(0x022C7) + of "bowtie": Rune(0x022C8) + of "ltimes": Rune(0x022C9) + of "rtimes": Rune(0x022CA) + of "lthree", "leftthreetimes": Rune(0x022CB) + of "rthree", "rightthreetimes": Rune(0x022CC) + of "bsime", "backsimeq": Rune(0x022CD) + of "cuvee", "curlyvee": Rune(0x022CE) + of "cuwed", "curlywedge": Rune(0x022CF) + of "Sub", "Subset": Rune(0x022D0) + of "Sup", "Supset": Rune(0x022D1) + of "Cap": Rune(0x022D2) + of "Cup": Rune(0x022D3) + of "fork", "pitchfork": Rune(0x022D4) + of "epar": Rune(0x022D5) + of "ltdot", "lessdot": Rune(0x022D6) + of "gtdot", "gtrdot": Rune(0x022D7) + of "Ll": Rune(0x022D8) + of "Gg", "ggg": Rune(0x022D9) + of "leg", "LessEqualGreater", "lesseqgtr": Rune(0x022DA) + of "gel", "gtreqless", "GreaterEqualLess": Rune(0x022DB) + of "cuepr", "curlyeqprec": Rune(0x022DE) + of "cuesc", "curlyeqsucc": Rune(0x022DF) + of "nprcue", "NotPrecedesSlantEqual": Rune(0x022E0) + of "nsccue", "NotSucceedsSlantEqual": Rune(0x022E1) + of "nsqsube", "NotSquareSubsetEqual": Rune(0x022E2) + of "nsqsupe", "NotSquareSupersetEqual": Rune(0x022E3) + of "lnsim": Rune(0x022E6) + of "gnsim": Rune(0x022E7) + of "prnsim", "precnsim": Rune(0x022E8) + of "scnsim", "succnsim": Rune(0x022E9) + of "nltri", "ntriangleleft", "NotLeftTriangle": Rune(0x022EA) + of "nrtri", "ntriangleright", "NotRightTriangle": Rune(0x022EB) + of "nltrie", "ntrianglelefteq", + "NotLeftTriangleEqual": Rune(0x022EC) + of "nrtrie", "ntrianglerighteq", + "NotRightTriangleEqual": Rune(0x022ED) + of "vellip": Rune(0x022EE) + of "ctdot": Rune(0x022EF) + of "utdot": Rune(0x022F0) + of "dtdot": Rune(0x022F1) + of "disin": Rune(0x022F2) + of "isinsv": Rune(0x022F3) + of "isins": Rune(0x022F4) + of "isindot": Rune(0x022F5) + of "notinvc": Rune(0x022F6) + of "notinvb": Rune(0x022F7) + of "isinE": Rune(0x022F9) + of "nisd": Rune(0x022FA) + of "xnis": Rune(0x022FB) + of "nis": Rune(0x022FC) + of "notnivc": Rune(0x022FD) + of "notnivb": Rune(0x022FE) + of "barwed", "barwedge": Rune(0x02305) + of "Barwed", "doublebarwedge": Rune(0x02306) + of "lceil", "LeftCeiling": Rune(0x02308) + of "rceil", "RightCeiling": Rune(0x02309) + of "lfloor", "LeftFloor": Rune(0x0230A) + of "rfloor", "RightFloor": Rune(0x0230B) + of "drcrop": Rune(0x0230C) + of "dlcrop": Rune(0x0230D) + of "urcrop": Rune(0x0230E) + of "ulcrop": Rune(0x0230F) + of "bnot": Rune(0x02310) + of "profline": Rune(0x02312) + of "profsurf": Rune(0x02313) + of "telrec": Rune(0x02315) + of "target": Rune(0x02316) + of "ulcorn", "ulcorner": Rune(0x0231C) + of "urcorn", "urcorner": Rune(0x0231D) + of "dlcorn", "llcorner": Rune(0x0231E) + of "drcorn", "lrcorner": Rune(0x0231F) + of "frown", "sfrown": Rune(0x02322) + of "smile", "ssmile": Rune(0x02323) + of "cylcty": Rune(0x0232D) + of "profalar": Rune(0x0232E) + of "topbot": Rune(0x02336) + of "ovbar": Rune(0x0233D) + of "solbar": Rune(0x0233F) + of "angzarr": Rune(0x0237C) + of "lmoust", "lmoustache": Rune(0x023B0) + of "rmoust", "rmoustache": Rune(0x023B1) + of "tbrk", "OverBracket": Rune(0x023B4) + of "bbrk", "UnderBracket": Rune(0x023B5) + of "bbrktbrk": Rune(0x023B6) + of "OverParenthesis": Rune(0x023DC) + of "UnderParenthesis": Rune(0x023DD) + of "OverBrace": Rune(0x023DE) + of "UnderBrace": Rune(0x023DF) + of "trpezium": Rune(0x023E2) + of "elinters": Rune(0x023E7) + of "blank": Rune(0x02423) + of "oS", "circledS": Rune(0x024C8) + of "boxh", "HorizontalLine": Rune(0x02500) + of "boxv": Rune(0x02502) + of "boxdr": Rune(0x0250C) + of "boxdl": Rune(0x02510) + of "boxur": Rune(0x02514) + of "boxul": Rune(0x02518) + of "boxvr": Rune(0x0251C) + of "boxvl": Rune(0x02524) + of "boxhd": Rune(0x0252C) + of "boxhu": Rune(0x02534) + of "boxvh": Rune(0x0253C) + of "boxH": Rune(0x02550) + of "boxV": Rune(0x02551) + of "boxdR": Rune(0x02552) + of "boxDr": Rune(0x02553) + of "boxDR": Rune(0x02554) + of "boxdL": Rune(0x02555) + of "boxDl": Rune(0x02556) + of "boxDL": Rune(0x02557) + of "boxuR": Rune(0x02558) + of "boxUr": Rune(0x02559) + of "boxUR": Rune(0x0255A) + of "boxuL": Rune(0x0255B) + of "boxUl": Rune(0x0255C) + of "boxUL": Rune(0x0255D) + of "boxvR": Rune(0x0255E) + of "boxVr": Rune(0x0255F) + of "boxVR": Rune(0x02560) + of "boxvL": Rune(0x02561) + of "boxVl": Rune(0x02562) + of "boxVL": Rune(0x02563) + of "boxHd": Rune(0x02564) + of "boxhD": Rune(0x02565) + of "boxHD": Rune(0x02566) + of "boxHu": Rune(0x02567) + of "boxhU": Rune(0x02568) + of "boxHU": Rune(0x02569) + of "boxvH": Rune(0x0256A) + of "boxVh": Rune(0x0256B) + of "boxVH": Rune(0x0256C) + of "uhblk": Rune(0x02580) + of "lhblk": Rune(0x02584) + of "block": Rune(0x02588) + of "blk14": Rune(0x02591) + of "blk12": Rune(0x02592) + of "blk34": Rune(0x02593) + of "squ", "square", "Square": Rune(0x025A1) + of "squf", "squarf", "blacksquare", + "FilledVerySmallSquare": Rune(0x025AA) + of "EmptyVerySmallSquare": Rune(0x025AB) + of "rect": Rune(0x025AD) + of "marker": Rune(0x025AE) + of "fltns": Rune(0x025B1) + of "xutri", "bigtriangleup": Rune(0x025B3) + of "utrif", "blacktriangle": Rune(0x025B4) + of "utri", "triangle": Rune(0x025B5) + of "rtrif", "blacktriangleright": Rune(0x025B8) + of "rtri", "triangleright": Rune(0x025B9) + of "xdtri", "bigtriangledown": Rune(0x025BD) + of "dtrif", "blacktriangledown": Rune(0x025BE) + of "dtri", "triangledown": Rune(0x025BF) + of "ltrif", "blacktriangleleft": Rune(0x025C2) + of "ltri", "triangleleft": Rune(0x025C3) + of "loz", "lozenge": Rune(0x025CA) + of "cir": Rune(0x025CB) + of "tridot": Rune(0x025EC) + of "xcirc", "bigcirc": Rune(0x025EF) + of "ultri": Rune(0x025F8) + of "urtri": Rune(0x025F9) + of "lltri": Rune(0x025FA) + of "EmptySmallSquare": Rune(0x025FB) + of "FilledSmallSquare": Rune(0x025FC) + of "starf", "bigstar": Rune(0x02605) + of "star": Rune(0x02606) + of "phone": Rune(0x0260E) + of "female": Rune(0x02640) + of "male": Rune(0x02642) + of "spades", "spadesuit": Rune(0x02660) + of "clubs", "clubsuit": Rune(0x02663) + of "hearts", "heartsuit": Rune(0x02665) + of "diams", "diamondsuit": Rune(0x02666) + of "sung": Rune(0x0266A) + of "flat": Rune(0x0266D) + of "natur", "natural": Rune(0x0266E) + of "sharp": Rune(0x0266F) + of "check", "checkmark": Rune(0x02713) + of "cross": Rune(0x02717) + of "malt", "maltese": Rune(0x02720) + of "sext": Rune(0x02736) + of "VerticalSeparator": Rune(0x02758) + of "lbbrk": Rune(0x02772) + of "rbbrk": Rune(0x02773) + of "lobrk", "LeftDoubleBracket": Rune(0x027E6) + of "robrk", "RightDoubleBracket": Rune(0x027E7) + of "lang", "LeftAngleBracket", "langle": Rune(0x027E8) + of "rang", "RightAngleBracket", "rangle": Rune(0x027E9) + of "Lang": Rune(0x027EA) + of "Rang": Rune(0x027EB) + of "loang": Rune(0x027EC) + of "roang": Rune(0x027ED) + of "xlarr", "longleftarrow", "LongLeftArrow": Rune(0x027F5) + of "xrarr", "longrightarrow", "LongRightArrow": Rune(0x027F6) + of "xharr", "longleftrightarrow", + "LongLeftRightArrow": Rune(0x027F7) + of "xlArr", "Longleftarrow", "DoubleLongLeftArrow": Rune(0x027F8) + of "xrArr", "Longrightarrow", "DoubleLongRightArrow": Rune(0x027F9) + of "xhArr", "Longleftrightarrow", + "DoubleLongLeftRightArrow": Rune(0x027FA) + of "xmap", "longmapsto": Rune(0x027FC) + of "dzigrarr": Rune(0x027FF) + of "nvlArr": Rune(0x02902) + of "nvrArr": Rune(0x02903) + of "nvHarr": Rune(0x02904) + of "Map": Rune(0x02905) + of "lbarr": Rune(0x0290C) + of "rbarr", "bkarow": Rune(0x0290D) + of "lBarr": Rune(0x0290E) + of "rBarr", "dbkarow": Rune(0x0290F) + of "RBarr", "drbkarow": Rune(0x02910) + of "DDotrahd": Rune(0x02911) + of "UpArrowBar": Rune(0x02912) + of "DownArrowBar": Rune(0x02913) + of "Rarrtl": Rune(0x02916) + of "latail": Rune(0x02919) + of "ratail": Rune(0x0291A) + of "lAtail": Rune(0x0291B) + of "rAtail": Rune(0x0291C) + of "larrfs": Rune(0x0291D) + of "rarrfs": Rune(0x0291E) + of "larrbfs": Rune(0x0291F) + of "rarrbfs": Rune(0x02920) + of "nwarhk": Rune(0x02923) + of "nearhk": Rune(0x02924) + of "searhk", "hksearow": Rune(0x02925) + of "swarhk", "hkswarow": Rune(0x02926) + of "nwnear": Rune(0x02927) + of "nesear", "toea": Rune(0x02928) + of "seswar", "tosa": Rune(0x02929) + of "swnwar": Rune(0x0292A) + of "rarrc": Rune(0x02933) + of "cudarrr": Rune(0x02935) + of "ldca": Rune(0x02936) + of "rdca": Rune(0x02937) + of "cudarrl": Rune(0x02938) + of "larrpl": Rune(0x02939) + of "curarrm": Rune(0x0293C) + of "cularrp": Rune(0x0293D) + of "rarrpl": Rune(0x02945) + of "harrcir": Rune(0x02948) + of "Uarrocir": Rune(0x02949) + of "lurdshar": Rune(0x0294A) + of "ldrushar": Rune(0x0294B) + of "LeftRightVector": Rune(0x0294E) + of "RightUpDownVector": Rune(0x0294F) + of "DownLeftRightVector": Rune(0x02950) + of "LeftUpDownVector": Rune(0x02951) + of "LeftVectorBar": Rune(0x02952) + of "RightVectorBar": Rune(0x02953) + of "RightUpVectorBar": Rune(0x02954) + of "RightDownVectorBar": Rune(0x02955) + of "DownLeftVectorBar": Rune(0x02956) + of "DownRightVectorBar": Rune(0x02957) + of "LeftUpVectorBar": Rune(0x02958) + of "LeftDownVectorBar": Rune(0x02959) + of "LeftTeeVector": Rune(0x0295A) + of "RightTeeVector": Rune(0x0295B) + of "RightUpTeeVector": Rune(0x0295C) + of "RightDownTeeVector": Rune(0x0295D) + of "DownLeftTeeVector": Rune(0x0295E) + of "DownRightTeeVector": Rune(0x0295F) + of "LeftUpTeeVector": Rune(0x02960) + of "LeftDownTeeVector": Rune(0x02961) + of "lHar": Rune(0x02962) + of "uHar": Rune(0x02963) + of "rHar": Rune(0x02964) + of "dHar": Rune(0x02965) + of "luruhar": Rune(0x02966) + of "ldrdhar": Rune(0x02967) + of "ruluhar": Rune(0x02968) + of "rdldhar": Rune(0x02969) + of "lharul": Rune(0x0296A) + of "llhard": Rune(0x0296B) + of "rharul": Rune(0x0296C) + of "lrhard": Rune(0x0296D) + of "udhar", "UpEquilibrium": Rune(0x0296E) + of "duhar", "ReverseUpEquilibrium": Rune(0x0296F) + of "RoundImplies": Rune(0x02970) + of "erarr": Rune(0x02971) + of "simrarr": Rune(0x02972) + of "larrsim": Rune(0x02973) + of "rarrsim": Rune(0x02974) + of "rarrap": Rune(0x02975) + of "ltlarr": Rune(0x02976) + of "gtrarr": Rune(0x02978) + of "subrarr": Rune(0x02979) + of "suplarr": Rune(0x0297B) + of "lfisht": Rune(0x0297C) + of "rfisht": Rune(0x0297D) + of "ufisht": Rune(0x0297E) + of "dfisht": Rune(0x0297F) + of "lopar": Rune(0x02985) + of "ropar": Rune(0x02986) + of "lbrke": Rune(0x0298B) + of "rbrke": Rune(0x0298C) + of "lbrkslu": Rune(0x0298D) + of "rbrksld": Rune(0x0298E) + of "lbrksld": Rune(0x0298F) + of "rbrkslu": Rune(0x02990) + of "langd": Rune(0x02991) + of "rangd": Rune(0x02992) + of "lparlt": Rune(0x02993) + of "rpargt": Rune(0x02994) + of "gtlPar": Rune(0x02995) + of "ltrPar": Rune(0x02996) + of "vzigzag": Rune(0x0299A) + of "vangrt": Rune(0x0299C) + of "angrtvbd": Rune(0x0299D) + of "ange": Rune(0x029A4) + of "range": Rune(0x029A5) + of "dwangle": Rune(0x029A6) + of "uwangle": Rune(0x029A7) + of "angmsdaa": Rune(0x029A8) + of "angmsdab": Rune(0x029A9) + of "angmsdac": Rune(0x029AA) + of "angmsdad": Rune(0x029AB) + of "angmsdae": Rune(0x029AC) + of "angmsdaf": Rune(0x029AD) + of "angmsdag": Rune(0x029AE) + of "angmsdah": Rune(0x029AF) + of "bemptyv": Rune(0x029B0) + of "demptyv": Rune(0x029B1) + of "cemptyv": Rune(0x029B2) + of "raemptyv": Rune(0x029B3) + of "laemptyv": Rune(0x029B4) + of "ohbar": Rune(0x029B5) + of "omid": Rune(0x029B6) + of "opar": Rune(0x029B7) + of "operp": Rune(0x029B9) + of "olcross": Rune(0x029BB) + of "odsold": Rune(0x029BC) + of "olcir": Rune(0x029BE) + of "ofcir": Rune(0x029BF) + of "olt": Rune(0x029C0) + of "ogt": Rune(0x029C1) + of "cirscir": Rune(0x029C2) + of "cirE": Rune(0x029C3) + of "solb": Rune(0x029C4) + of "bsolb": Rune(0x029C5) + of "boxbox": Rune(0x029C9) + of "trisb": Rune(0x029CD) + of "rtriltri": Rune(0x029CE) + of "LeftTriangleBar": Rune(0x029CF) + of "RightTriangleBar": Rune(0x029D0) + of "race": Rune(0x029DA) + of "iinfin": Rune(0x029DC) + of "infintie": Rune(0x029DD) + of "nvinfin": Rune(0x029DE) + of "eparsl": Rune(0x029E3) + of "smeparsl": Rune(0x029E4) + of "eqvparsl": Rune(0x029E5) + of "lozf", "blacklozenge": Rune(0x029EB) + of "RuleDelayed": Rune(0x029F4) + of "dsol": Rune(0x029F6) + of "xodot", "bigodot": Rune(0x02A00) + of "xoplus", "bigoplus": Rune(0x02A01) + of "xotime", "bigotimes": Rune(0x02A02) + of "xuplus", "biguplus": Rune(0x02A04) + of "xsqcup", "bigsqcup": Rune(0x02A06) + of "qint", "iiiint": Rune(0x02A0C) + of "fpartint": Rune(0x02A0D) + of "cirfnint": Rune(0x02A10) + of "awint": Rune(0x02A11) + of "rppolint": Rune(0x02A12) + of "scpolint": Rune(0x02A13) + of "npolint": Rune(0x02A14) + of "pointint": Rune(0x02A15) + of "quatint": Rune(0x02A16) + of "intlarhk": Rune(0x02A17) + of "pluscir": Rune(0x02A22) + of "plusacir": Rune(0x02A23) + of "simplus": Rune(0x02A24) + of "plusdu": Rune(0x02A25) + of "plussim": Rune(0x02A26) + of "plustwo": Rune(0x02A27) + of "mcomma": Rune(0x02A29) + of "minusdu": Rune(0x02A2A) + of "loplus": Rune(0x02A2D) + of "roplus": Rune(0x02A2E) + of "Cross": Rune(0x02A2F) + of "timesd": Rune(0x02A30) + of "timesbar": Rune(0x02A31) + of "smashp": Rune(0x02A33) + of "lotimes": Rune(0x02A34) + of "rotimes": Rune(0x02A35) + of "otimesas": Rune(0x02A36) + of "Otimes": Rune(0x02A37) + of "odiv": Rune(0x02A38) + of "triplus": Rune(0x02A39) + of "triminus": Rune(0x02A3A) + of "tritime": Rune(0x02A3B) + of "iprod", "intprod": Rune(0x02A3C) + of "amalg": Rune(0x02A3F) + of "capdot": Rune(0x02A40) + of "ncup": Rune(0x02A42) + of "ncap": Rune(0x02A43) + of "capand": Rune(0x02A44) + of "cupor": Rune(0x02A45) + of "cupcap": Rune(0x02A46) + of "capcup": Rune(0x02A47) + of "cupbrcap": Rune(0x02A48) + of "capbrcup": Rune(0x02A49) + of "cupcup": Rune(0x02A4A) + of "capcap": Rune(0x02A4B) + of "ccups": Rune(0x02A4C) + of "ccaps": Rune(0x02A4D) + of "ccupssm": Rune(0x02A50) + of "And": Rune(0x02A53) + of "Or": Rune(0x02A54) + of "andand": Rune(0x02A55) + of "oror": Rune(0x02A56) + of "orslope": Rune(0x02A57) + of "andslope": Rune(0x02A58) + of "andv": Rune(0x02A5A) + of "orv": Rune(0x02A5B) + of "andd": Rune(0x02A5C) + of "ord": Rune(0x02A5D) + of "wedbar": Rune(0x02A5F) + of "sdote": Rune(0x02A66) + of "simdot": Rune(0x02A6A) + of "congdot": Rune(0x02A6D) + of "easter": Rune(0x02A6E) + of "apacir": Rune(0x02A6F) + of "apE": Rune(0x02A70) + of "eplus": Rune(0x02A71) + of "pluse": Rune(0x02A72) + of "Esim": Rune(0x02A73) + of "Colone": Rune(0x02A74) + of "Equal": Rune(0x02A75) + of "eDDot", "ddotseq": Rune(0x02A77) + of "equivDD": Rune(0x02A78) + of "ltcir": Rune(0x02A79) + of "gtcir": Rune(0x02A7A) + of "ltquest": Rune(0x02A7B) + of "gtquest": Rune(0x02A7C) + of "les", "LessSlantEqual", "leqslant": Rune(0x02A7D) + of "ges", "GreaterSlantEqual", "geqslant": Rune(0x02A7E) + of "lesdot": Rune(0x02A7F) + of "gesdot": Rune(0x02A80) + of "lesdoto": Rune(0x02A81) + of "gesdoto": Rune(0x02A82) + of "lesdotor": Rune(0x02A83) + of "gesdotol": Rune(0x02A84) + of "lap", "lessapprox": Rune(0x02A85) + of "gap", "gtrapprox": Rune(0x02A86) + of "lne", "lneq": Rune(0x02A87) + of "gne", "gneq": Rune(0x02A88) + of "lnap", "lnapprox": Rune(0x02A89) + of "gnap", "gnapprox": Rune(0x02A8A) + of "lEg", "lesseqqgtr": Rune(0x02A8B) + of "gEl", "gtreqqless": Rune(0x02A8C) + of "lsime": Rune(0x02A8D) + of "gsime": Rune(0x02A8E) + of "lsimg": Rune(0x02A8F) + of "gsiml": Rune(0x02A90) + of "lgE": Rune(0x02A91) + of "glE": Rune(0x02A92) + of "lesges": Rune(0x02A93) + of "gesles": Rune(0x02A94) + of "els", "eqslantless": Rune(0x02A95) + of "egs", "eqslantgtr": Rune(0x02A96) + of "elsdot": Rune(0x02A97) + of "egsdot": Rune(0x02A98) + of "el": Rune(0x02A99) + of "eg": Rune(0x02A9A) + of "siml": Rune(0x02A9D) + of "simg": Rune(0x02A9E) + of "simlE": Rune(0x02A9F) + of "simgE": Rune(0x02AA0) + of "LessLess": Rune(0x02AA1) + of "GreaterGreater": Rune(0x02AA2) + of "glj": Rune(0x02AA4) + of "gla": Rune(0x02AA5) + of "ltcc": Rune(0x02AA6) + of "gtcc": Rune(0x02AA7) + of "lescc": Rune(0x02AA8) + of "gescc": Rune(0x02AA9) + of "smt": Rune(0x02AAA) + of "lat": Rune(0x02AAB) + of "smte": Rune(0x02AAC) + of "late": Rune(0x02AAD) + of "bumpE": Rune(0x02AAE) + of "pre", "preceq", "PrecedesEqual": Rune(0x02AAF) + of "sce", "succeq", "SucceedsEqual": Rune(0x02AB0) + of "prE": Rune(0x02AB3) + of "scE": Rune(0x02AB4) + of "prnE", "precneqq": Rune(0x02AB5) + of "scnE", "succneqq": Rune(0x02AB6) + of "prap", "precapprox": Rune(0x02AB7) + of "scap", "succapprox": Rune(0x02AB8) + of "prnap", "precnapprox": Rune(0x02AB9) + of "scnap", "succnapprox": Rune(0x02ABA) + of "Pr": Rune(0x02ABB) + of "Sc": Rune(0x02ABC) + of "subdot": Rune(0x02ABD) + of "supdot": Rune(0x02ABE) + of "subplus": Rune(0x02ABF) + of "supplus": Rune(0x02AC0) + of "submult": Rune(0x02AC1) + of "supmult": Rune(0x02AC2) + of "subedot": Rune(0x02AC3) + of "supedot": Rune(0x02AC4) + of "subE", "subseteqq": Rune(0x02AC5) + of "supE", "supseteqq": Rune(0x02AC6) + of "subsim": Rune(0x02AC7) + of "supsim": Rune(0x02AC8) + of "subnE", "subsetneqq": Rune(0x02ACB) + of "supnE", "supsetneqq": Rune(0x02ACC) + of "csub": Rune(0x02ACF) + of "csup": Rune(0x02AD0) + of "csube": Rune(0x02AD1) + of "csupe": Rune(0x02AD2) + of "subsup": Rune(0x02AD3) + of "supsub": Rune(0x02AD4) + of "subsub": Rune(0x02AD5) + of "supsup": Rune(0x02AD6) + of "suphsub": Rune(0x02AD7) + of "supdsub": Rune(0x02AD8) + of "forkv": Rune(0x02AD9) + of "topfork": Rune(0x02ADA) + of "mlcp": Rune(0x02ADB) + of "Dashv", "DoubleLeftTee": Rune(0x02AE4) + of "Vdashl": Rune(0x02AE6) + of "Barv": Rune(0x02AE7) + of "vBar": Rune(0x02AE8) + of "vBarv": Rune(0x02AE9) + of "Vbar": Rune(0x02AEB) + of "Not": Rune(0x02AEC) + of "bNot": Rune(0x02AED) + of "rnmid": Rune(0x02AEE) + of "cirmid": Rune(0x02AEF) + of "midcir": Rune(0x02AF0) + of "topcir": Rune(0x02AF1) + of "nhpar": Rune(0x02AF2) + of "parsim": Rune(0x02AF3) + of "parsl": Rune(0x02AFD) + of "fflig": Rune(0x0FB00) + of "filig": Rune(0x0FB01) + of "fllig": Rune(0x0FB02) + of "ffilig": Rune(0x0FB03) + of "ffllig": Rune(0x0FB04) + of "Ascr": Rune(0x1D49C) + of "Cscr": Rune(0x1D49E) + of "Dscr": Rune(0x1D49F) + of "Gscr": Rune(0x1D4A2) + of "Jscr": Rune(0x1D4A5) + of "Kscr": Rune(0x1D4A6) + of "Nscr": Rune(0x1D4A9) + of "Oscr": Rune(0x1D4AA) + of "Pscr": Rune(0x1D4AB) + of "Qscr": Rune(0x1D4AC) + of "Sscr": Rune(0x1D4AE) + of "Tscr": Rune(0x1D4AF) + of "Uscr": Rune(0x1D4B0) + of "Vscr": Rune(0x1D4B1) + of "Wscr": Rune(0x1D4B2) + of "Xscr": Rune(0x1D4B3) + of "Yscr": Rune(0x1D4B4) + of "Zscr": Rune(0x1D4B5) + of "ascr": Rune(0x1D4B6) + of "bscr": Rune(0x1D4B7) + of "cscr": Rune(0x1D4B8) + of "dscr": Rune(0x1D4B9) + of "fscr": Rune(0x1D4BB) + of "hscr": Rune(0x1D4BD) + of "iscr": Rune(0x1D4BE) + of "jscr": Rune(0x1D4BF) + of "kscr": Rune(0x1D4C0) + of "lscr": Rune(0x1D4C1) + of "mscr": Rune(0x1D4C2) + of "nscr": Rune(0x1D4C3) + of "pscr": Rune(0x1D4C5) + of "qscr": Rune(0x1D4C6) + of "rscr": Rune(0x1D4C7) + of "sscr": Rune(0x1D4C8) + of "tscr": Rune(0x1D4C9) + of "uscr": Rune(0x1D4CA) + of "vscr": Rune(0x1D4CB) + of "wscr": Rune(0x1D4CC) + of "xscr": Rune(0x1D4CD) + of "yscr": Rune(0x1D4CE) + of "zscr": Rune(0x1D4CF) + of "Afr": Rune(0x1D504) + of "Bfr": Rune(0x1D505) + of "Dfr": Rune(0x1D507) + of "Efr": Rune(0x1D508) + of "Ffr": Rune(0x1D509) + of "Gfr": Rune(0x1D50A) + of "Jfr": Rune(0x1D50D) + of "Kfr": Rune(0x1D50E) + of "Lfr": Rune(0x1D50F) + of "Mfr": Rune(0x1D510) + of "Nfr": Rune(0x1D511) + of "Ofr": Rune(0x1D512) + of "Pfr": Rune(0x1D513) + of "Qfr": Rune(0x1D514) + of "Sfr": Rune(0x1D516) + of "Tfr": Rune(0x1D517) + of "Ufr": Rune(0x1D518) + of "Vfr": Rune(0x1D519) + of "Wfr": Rune(0x1D51A) + of "Xfr": Rune(0x1D51B) + of "Yfr": Rune(0x1D51C) + of "afr": Rune(0x1D51E) + of "bfr": Rune(0x1D51F) + of "cfr": Rune(0x1D520) + of "dfr": Rune(0x1D521) + of "efr": Rune(0x1D522) + of "ffr": Rune(0x1D523) + of "gfr": Rune(0x1D524) + of "hfr": Rune(0x1D525) + of "ifr": Rune(0x1D526) + of "jfr": Rune(0x1D527) + of "kfr": Rune(0x1D528) + of "lfr": Rune(0x1D529) + of "mfr": Rune(0x1D52A) + of "nfr": Rune(0x1D52B) + of "ofr": Rune(0x1D52C) + of "pfr": Rune(0x1D52D) + of "qfr": Rune(0x1D52E) + of "rfr": Rune(0x1D52F) + of "sfr": Rune(0x1D530) + of "tfr": Rune(0x1D531) + of "ufr": Rune(0x1D532) + of "vfr": Rune(0x1D533) + of "wfr": Rune(0x1D534) + of "xfr": Rune(0x1D535) + of "yfr": Rune(0x1D536) + of "zfr": Rune(0x1D537) + of "Aopf": Rune(0x1D538) + of "Bopf": Rune(0x1D539) + of "Dopf": Rune(0x1D53B) + of "Eopf": Rune(0x1D53C) + of "Fopf": Rune(0x1D53D) + of "Gopf": Rune(0x1D53E) + of "Iopf": Rune(0x1D540) + of "Jopf": Rune(0x1D541) + of "Kopf": Rune(0x1D542) + of "Lopf": Rune(0x1D543) + of "Mopf": Rune(0x1D544) + of "Oopf": Rune(0x1D546) + of "Sopf": Rune(0x1D54A) + of "Topf": Rune(0x1D54B) + of "Uopf": Rune(0x1D54C) + of "Vopf": Rune(0x1D54D) + of "Wopf": Rune(0x1D54E) + of "Xopf": Rune(0x1D54F) + of "Yopf": Rune(0x1D550) + of "aopf": Rune(0x1D552) + of "bopf": Rune(0x1D553) + of "copf": Rune(0x1D554) + of "dopf": Rune(0x1D555) + of "eopf": Rune(0x1D556) + of "fopf": Rune(0x1D557) + of "gopf": Rune(0x1D558) + of "hopf": Rune(0x1D559) + of "iopf": Rune(0x1D55A) + of "jopf": Rune(0x1D55B) + of "kopf": Rune(0x1D55C) + of "lopf": Rune(0x1D55D) + of "mopf": Rune(0x1D55E) + of "nopf": Rune(0x1D55F) + of "oopf": Rune(0x1D560) + of "popf": Rune(0x1D561) + of "qopf": Rune(0x1D562) + of "ropf": Rune(0x1D563) + of "sopf": Rune(0x1D564) + of "topf": Rune(0x1D565) + of "uopf": Rune(0x1D566) + of "vopf": Rune(0x1D567) + of "wopf": Rune(0x1D568) + of "xopf": Rune(0x1D569) + of "yopf": Rune(0x1D56A) + of "zopf": Rune(0x1D56B) + else: Rune(0) + +proc entityToUtf8*(entity: string): string = + ## Converts an HTML entity name like `Ü` or values like `Ü` + ## or `Ü` to its UTF-8 equivalent. ## "" is returned if the entity name is unknown. The HTML parser ## already converts entities to UTF-8. - for name, val in items(Entities): - if name == entity: return toUTF8(Rune(val)) - result = "" + runnableExamples: + const sigma = "Σ" + doAssert entityToUtf8("") == "" + doAssert entityToUtf8("a") == "" + doAssert entityToUtf8("gt") == ">" + doAssert entityToUtf8("Uuml") == "Ü" + doAssert entityToUtf8("quest") == "?" + doAssert entityToUtf8("#63") == "?" + doAssert entityToUtf8("Sigma") == sigma + doAssert entityToUtf8("#931") == sigma + doAssert entityToUtf8("#0931") == sigma + doAssert entityToUtf8("#x3A3") == sigma + doAssert entityToUtf8("#x03A3") == sigma + doAssert entityToUtf8("#x3a3") == sigma + doAssert entityToUtf8("#X3a3") == sigma + let rune = entityToRune(entity) + if rune.ord <= 0: result = "" + else: result = toUTF8(rune) -proc addNode(father, son: XmlNode) = +proc addNode(father, son: XmlNode) = if son != nil: add(father, son) -proc parse(x: var XmlParser, errors: var seq[string]): XmlNode +proc parse(x: var XmlParser, errors: var seq[string]): XmlNode {.gcsafe.} proc expected(x: var XmlParser, n: XmlNode): string = result = errorMsg(x, "</" & n.tag & "> expected") -template elemName(x: expr): expr = rawData(x) +template elemName(x: untyped): untyped = rawData(x) + +template adderr(x: untyped) = + errors.add(x) -proc untilElementEnd(x: var XmlParser, result: XmlNode, +proc untilElementEnd(x: var XmlParser, result: XmlNode, errors: var seq[string]) = - # we parsed e.g. ``<br>`` and don't really expect a ``</br>``: + # we parsed e.g. `<br>` and don't really expect a `</br>`: if result.htmlTag in SingleTags: if x.kind != xmlElementEnd or cmpIgnoreCase(x.elemName, result.tag) != 0: return @@ -462,43 +1916,53 @@ proc untilElementEnd(x: var XmlParser, result: XmlNode, case x.kind of xmlElementStart, xmlElementOpen: case result.htmlTag - of tagLi, tagP, tagDt, tagDd, tagInput, tagOption: - # some tags are common to have no ``</end>``, like ``<li>``: + of tagP, tagInput, tagOption: + # some tags are common to have no `</end>`, like `<li>` but + # allow `<p>` in `<dd>`, `<dt>` and `<li>` in next case if htmlTag(x.elemName) in {tagLi, tagP, tagDt, tagDd, tagInput, tagOption}: - errors.add(expected(x, result)) + adderr(expected(x, result)) break - of tagTd, tagTh, tagTfoot, tagThead: + of tagDd, tagDt, tagLi: + if htmlTag(x.elemName) in {tagLi, tagDt, tagDd, tagInput, + tagOption}: + adderr(expected(x, result)) + break + of tagTd, tagTh: if htmlTag(x.elemName) in {tagTr, tagTd, tagTh, tagTfoot, tagThead}: - errors.add(expected(x, result)) + adderr(expected(x, result)) break of tagTr: if htmlTag(x.elemName) == tagTr: - errors.add(expected(x, result)) + adderr(expected(x, result)) break of tagOptgroup: if htmlTag(x.elemName) in {tagOption, tagOptgroup}: - errors.add(expected(x, result)) + adderr(expected(x, result)) break else: discard result.addNode(parse(x, errors)) - of xmlElementEnd: - if cmpIgnoreCase(x.elemName, result.tag) == 0: - next(x) - else: - #echo "5; expected: ", result.htmltag, " ", x.elemName - errors.add(expected(x, result)) - # do not skip it here! + of xmlElementEnd: + if cmpIgnoreCase(x.elemName, result.tag) != 0: + #echo "5; expected: ", result.htmltag, " ", x.elemName + adderr(expected(x, result)) + # this seems to do better match error corrections in browsers: + while x.kind in {xmlElementEnd, xmlWhitespace}: + if x.kind == xmlElementEnd and cmpIgnoreCase(x.elemName, + result.tag) == 0: + break + next(x) + next(x) break of xmlEof: - errors.add(expected(x, result)) + adderr(expected(x, result)) break else: result.addNode(parse(x, errors)) proc parse(x: var XmlParser, errors: var seq[string]): XmlNode = case x.kind - of xmlComment: + of xmlComment: result = newComment(x.rawData) next(x) of xmlCharData, xmlWhitespace: @@ -508,19 +1972,19 @@ proc parse(x: var XmlParser, errors: var seq[string]): XmlNode = # we just ignore processing instructions for now next(x) of xmlError: - errors.add(errorMsg(x)) + adderr(errorMsg(x)) next(x) of xmlElementStart: - result = newElement(x.elemName.toLower) + result = newElement(toLowerAscii(x.elemName)) next(x) untilElementEnd(x, result, errors) of xmlElementEnd: - errors.add(errorMsg(x, "unexpected ending tag: " & x.elemName)) - of xmlElementOpen: - result = newElement(x.elemName.toLower) + adderr(errorMsg(x, "unexpected ending tag: " & x.elemName)) + of xmlElementOpen: + result = newElement(toLowerAscii(x.elemName)) next(x) result.attrs = newStringTable() - while true: + while true: case x.kind of xmlAttribute: result.attrs[x.rawData] = x.rawData2 @@ -529,18 +1993,18 @@ proc parse(x: var XmlParser, errors: var seq[string]): XmlNode = next(x) break of xmlError: - errors.add(errorMsg(x)) + adderr(errorMsg(x)) next(x) break else: - errors.add(errorMsg(x, "'>' expected")) + adderr(errorMsg(x, "'>' expected")) next(x) break untilElementEnd(x, result, errors) of xmlAttribute, xmlElementClose: - errors.add(errorMsg(x, "<some_tag> expected")) + adderr(errorMsg(x, "<some_tag> expected")) next(x) - of xmlCData: + of xmlCData: result = newCData(x.rawData) next(x) of xmlEntity: @@ -549,22 +2013,23 @@ proc parse(x: var XmlParser, errors: var seq[string]): XmlNode = next(x) of xmlEof: discard -proc parseHtml*(s: Stream, filename: string, - errors: var seq[string]): XmlNode = - ## parses the XML from stream `s` and returns a ``PXmlNode``. Every +proc parseHtml*(s: Stream, filename: string, + errors: var seq[string]): XmlNode = + ## Parses the XML from stream `s` and returns a `XmlNode`. Every ## occurred parsing error is added to the `errors` sequence. var x: XmlParser - open(x, s, filename, {reportComments, reportWhitespace}) + open(x, s, filename, {reportComments, reportWhitespace, allowUnquotedAttribs, + allowEmptyAttribs}) next(x) # skip the DOCTYPE: if x.kind == xmlSpecial: next(x) - + result = newElement("document") result.addNode(parse(x, errors)) #if x.kind != xmlEof: - # errors.add(errorMsg(x, "EOF expected")) + # adderr(errorMsg(x, "EOF expected")) while x.kind != xmlEof: - var oldPos = x.bufpos # little hack to see if we made any progess + var oldPos = x.bufpos # little hack to see if we made any progress result.addNode(parse(x, errors)) if x.bufpos == oldPos: # force progress! @@ -573,33 +2038,38 @@ proc parseHtml*(s: Stream, filename: string, if result.len == 1: result = result[0] -proc parseHtml*(s: Stream): XmlNode = - ## parses the XTML from stream `s` and returns a ``PXmlNode``. All parsing +proc parseHtml*(s: Stream): XmlNode = + ## Parses the HTML from stream `s` and returns a `XmlNode`. All parsing ## errors are ignored. var errors: seq[string] = @[] result = parseHtml(s, "unknown_html_doc", errors) -proc loadHtml*(path: string, errors: var seq[string]): XmlNode = - ## Loads and parses HTML from file specified by ``path``, and returns - ## a ``PXmlNode``. Every occurred parsing error is added to +proc parseHtml*(html: string): XmlNode = + ## Parses the HTML from string `html` and returns a `XmlNode`. All parsing + ## errors are ignored. + parseHtml(newStringStream(html)) + +proc loadHtml*(path: string, errors: var seq[string]): XmlNode = + ## Loads and parses HTML from file specified by `path`, and returns + ## a `XmlNode`. Every occurred parsing error is added to ## the `errors` sequence. var s = newFileStream(path, fmRead) if s == nil: raise newException(IOError, "Unable to read file: " & path) result = parseHtml(s, path, errors) -proc loadHtml*(path: string): XmlNode = - ## Loads and parses HTML from file specified by ``path``, and returns - ## a ``PXmlNode``. All parsing errors are ignored. +proc loadHtml*(path: string): XmlNode = + ## Loads and parses HTML from file specified by `path`, and returns + ## a `XmlNode`. All parsing errors are ignored. var errors: seq[string] = @[] result = loadHtml(path, errors) -when isMainModule: - import os +when not defined(testing) and isMainModule: + import std/os - var errors: seq[string] = @[] + var errors: seq[string] = @[] var x = loadHtml(paramStr(1), errors) for e in items(errors): echo e - + var f: File if open(f, "test.txt", fmWrite): f.write($x) diff --git a/lib/pure/httpclient.nim b/lib/pure/httpclient.nim index 4c2580da0..08ea99627 100644 --- a/lib/pure/httpclient.nim +++ b/lib/pure/httpclient.nim @@ -1,116 +1,365 @@ # # # Nim's Runtime Library -# (c) Copyright 2010 Dominik Picheta, Andreas Rumpf +# (c) Copyright 2019 Nim Contributors # # See the file "copying.txt", included in this # distribution, for details about the copyright. # ## This module implements a simple HTTP client that can be used to retrieve -## webpages/other data. +## webpages and other data. ## -## -## **Note**: This module is not ideal, connection is not kept alive so sites with -## many redirects are expensive. As such in the future this module may change, -## and the current procedures will be deprecated. +## .. warning:: Validate untrusted inputs: URI parsers and getters are not detecting malicious URIs. ## ## Retrieving a website ## ==================== ## ## This example uses HTTP GET to retrieve -## ``http://google.com`` +## `http://google.com`: +## +## ```Nim +## import std/httpclient +## var client = newHttpClient() +## try: +## echo client.getContent("http://google.com") +## finally: +## client.close() +## ``` +## +## The same action can also be performed asynchronously, simply use the +## `AsyncHttpClient`: +## +## ```Nim +## import std/[asyncdispatch, httpclient] +## +## proc asyncProc(): Future[string] {.async.} = +## var client = newAsyncHttpClient() +## try: +## return await client.getContent("http://google.com") +## finally: +## client.close() ## -## .. code-block:: Nim -## echo(getContent("http://google.com")) +## echo waitFor asyncProc() +## ``` +## +## The functionality implemented by `HttpClient` and `AsyncHttpClient` +## is the same, so you can use whichever one suits you best in the examples +## shown here. +## +## **Note:** You need to run asynchronous examples in an async proc +## otherwise you will get an `Undeclared identifier: 'await'` error. +## +## **Note:** An asynchronous client instance can only deal with one +## request at a time. To send multiple requests in parallel, use +## multiple client instances. ## ## Using HTTP POST ## =============== ## ## This example demonstrates the usage of the W3 HTML Validator, it -## uses ``multipart/form-data`` as the ``Content-Type`` to send the HTML to -## the server. +## uses `multipart/form-data` as the `Content-Type` to send the HTML to be +## validated to the server. ## -## .. code-block:: Nim +## ```Nim +## var client = newHttpClient() ## var data = newMultipartData() ## data["output"] = "soap12" ## data["uploaded_file"] = ("test.html", "text/html", ## "<html><head></head><body><p>test</p></body></html>") +## try: +## echo client.postContent("http://validator.w3.org/check", multipart=data) +## finally: +## client.close() +## ``` +## +## To stream files from disk when performing the request, use `addFiles`. +## +## **Note:** This will allocate a new `Mimetypes` database every time you call +## it, you can pass your own via the `mimeDb` parameter to avoid this. +## +## ```Nim +## let mimes = newMimetypes() +## var client = newHttpClient() +## var data = newMultipartData() +## data.addFiles({"uploaded_file": "test.html"}, mimeDb = mimes) +## try: +## echo client.postContent("http://validator.w3.org/check", multipart=data) +## finally: +## client.close() +## ``` +## +## You can also make post requests with custom headers. +## This example sets `Content-Type` to `application/json` +## and uses a json object for the body +## +## ```Nim +## import std/[httpclient, json] +## +## let client = newHttpClient() +## client.headers = newHttpHeaders({ "Content-Type": "application/json" }) +## let body = %*{ +## "data": "some text" +## } +## try: +## let response = client.request("http://some.api", httpMethod = HttpPost, body = $body) +## echo response.status +## finally: +## client.close() +## ``` +## +## Progress reporting +## ================== +## +## You may specify a callback procedure to be called during an HTTP request. +## This callback will be executed every second with information about the +## progress of the HTTP request. ## -## echo postContent("http://validator.w3.org/check", multipart=data) +## ```Nim +## import std/[asyncdispatch, httpclient] ## -## Asynchronous HTTP requests -## ========================== +## proc onProgressChanged(total, progress, speed: BiggestInt) {.async.} = +## echo("Downloaded ", progress, " of ", total) +## echo("Current rate: ", speed div 1000, "kb/s") ## -## You simply have to create a new instance of the ``AsyncHttpClient`` object. -## You may then use ``await`` on the functions defined for that object. -## Keep in mind that the following code needs to be inside an asynchronous -## procedure. +## proc asyncProc() {.async.} = +## var client = newAsyncHttpClient() +## client.onProgressChanged = onProgressChanged +## try: +## discard await client.getContent("http://speedtest-ams2.digitalocean.com/100mb.test") +## finally: +## client.close() ## -## .. code-block::nim +## waitFor asyncProc() +## ``` +## +## If you would like to remove the callback simply set it to `nil`. +## +## ```Nim +## client.onProgressChanged = nil +## ``` +## +## .. warning:: The `total` reported by httpclient may be 0 in some cases. ## -## var client = newAsyncHttpClient() -## var resp = await client.request("http://google.com") ## ## SSL/TLS support ## =============== -## This requires the OpenSSL library, fortunately it's widely used and installed +## This requires the OpenSSL library. Fortunately it's widely used and installed ## on many operating systems. httpclient will use SSL automatically if you give -## any of the functions a url with the ``https`` schema, for example: -## ``https://github.com/``, you also have to compile with ``ssl`` defined like so: -## ``nim c -d:ssl ...``. +## any of the functions a url with the `https` schema, for example: +## `https://github.com/`. +## +## You will also have to compile with `ssl` defined like so: +## `nim c -d:ssl ...`. +## +## Certificate validation is performed by default. +## +## A set of directories and files from the `ssl_certs <ssl_certs.html>`_ +## module are scanned to locate CA certificates. +## +## Example of setting SSL verification parameters in a new client: +## +## ```Nim +## import httpclient +## var client = newHttpClient(sslContext=newContext(verifyMode=CVerifyPeer)) +## ``` +## +## There are three options for verify mode: +## +## * ``CVerifyNone``: certificates are not verified; +## * ``CVerifyPeer``: certificates are verified; +## * ``CVerifyPeerUseEnvVars``: certificates are verified and the optional +## environment variables SSL_CERT_FILE and SSL_CERT_DIR are also used to +## locate certificates +## +## See `newContext <net.html#newContext.string,string,string,string>`_ to tweak or disable certificate validation. ## ## Timeouts ## ======== -## Currently all functions support an optional timeout, by default the timeout is set to -## `-1` which means that the function will never time out. The timeout is -## measured in miliseconds, once it is set any call on a socket which may -## block will be susceptible to this timeout, however please remember that the +## +## Currently only the synchronous functions support a timeout. +## The timeout is +## measured in milliseconds, once it is set any call on a socket which may +## block will be susceptible to this timeout. +## +## It may be surprising but the ## function as a whole can take longer than the specified timeout, only ## individual internal calls on the socket are affected. In practice this means ## that as long as the server is sending data an exception will not be raised, -## if however data does not reach client within the specified timeout an ETimeout -## exception will then be raised. +## if however data does not reach the client within the specified timeout a +## `TimeoutError` exception will be raised. +## +## Here is how to set a timeout when creating an `HttpClient` instance: +## +## ```Nim +## import std/httpclient +## +## let client = newHttpClient(timeout = 42) +## ``` ## ## Proxy ## ===== ## -## A proxy can be specified as a param to any of these procedures, the ``newProxy`` -## constructor should be used for this purpose. However, -## currently only basic authentication is supported. +## A proxy can be specified as a param to any of the procedures defined in +## this module. To do this, use the `newProxy` constructor. Unfortunately, +## only basic authentication is supported at the moment. +## +## Some examples on how to configure a Proxy for `HttpClient`: +## +## ```Nim +## import std/httpclient +## +## let myProxy = newProxy("http://myproxy.network") +## let client = newHttpClient(proxy = myProxy) +## ``` +## +## Use proxies with basic authentication: +## +## ```Nim +## import std/httpclient +## +## let myProxy = newProxy("http://myproxy.network", auth="user:password") +## let client = newHttpClient(proxy = myProxy) +## ``` +## +## Get Proxy URL from environment variables: +## +## ```Nim +## import std/httpclient +## +## var url = "" +## try: +## if existsEnv("http_proxy"): +## url = getEnv("http_proxy") +## elif existsEnv("https_proxy"): +## url = getEnv("https_proxy") +## except ValueError: +## echo "Unable to parse proxy from environment variables." +## +## let myProxy = newProxy(url = url) +## let client = newHttpClient(proxy = myProxy) +## ``` +## +## Redirects +## ========= +## +## The maximum redirects can be set with the `maxRedirects` of `int` type, +## it specifies the maximum amount of redirects to follow, +## it defaults to `5`, you can set it to `0` to disable redirects. +## +## Here you can see an example about how to set the `maxRedirects` of `HttpClient`: +## +## ```Nim +## import std/httpclient +## +## let client = newHttpClient(maxRedirects = 0) +## ``` +## + +import std/private/since -import net, strutils, uri, parseutils, strtabs, base64, os, mimetypes, math -import asyncnet, asyncdispatch -import rawsockets +import std/[ + net, strutils, uri, parseutils, base64, os, mimetypes, + math, random, httpcore, times, tables, streams, monotimes, + asyncnet, asyncdispatch, asyncfile, nativesockets, +] + +when defined(nimPreviewSlimSystem): + import std/[assertions, syncio] + +export httpcore except parseHeader # TODO: The `except` doesn't work type - Response* = tuple[ - version: string, - status: string, - headers: StringTableRef, - body: string] + Response* = ref object + version*: string + status*: string + headers*: HttpHeaders + body: string + bodyStream*: Stream + + AsyncResponse* = ref object + version*: string + status*: string + headers*: HttpHeaders + body: string + bodyStream*: FutureStream[string] + +proc code*(response: Response | AsyncResponse): HttpCode + {.raises: [ValueError, OverflowDefect].} = + ## Retrieves the specified response's `HttpCode`. + ## + ## Raises a `ValueError` if the response's `status` does not have a + ## corresponding `HttpCode`. + return response.status[0 .. 2].parseInt.HttpCode + +proc contentType*(response: Response | AsyncResponse): string {.inline.} = + ## Retrieves the specified response's content type. + ## + ## This is effectively the value of the "Content-Type" header. + response.headers.getOrDefault("content-type") + +proc contentLength*(response: Response | AsyncResponse): int = + ## Retrieves the specified response's content length. + ## + ## This is effectively the value of the "Content-Length" header. + ## + ## A `ValueError` exception will be raised if the value is not an integer. + ## If the Content-Length header is not set in the response, ContentLength is set to the value -1. + var contentLengthHeader = response.headers.getOrDefault("Content-Length", HttpHeaderValues(@["-1"])) + result = contentLengthHeader.parseInt() +proc lastModified*(response: Response | AsyncResponse): DateTime = + ## Retrieves the specified response's last modified time. + ## + ## This is effectively the value of the "Last-Modified" header. + ## + ## Raises a `ValueError` if the parsing fails or the value is not a correctly + ## formatted time. + var lastModifiedHeader = response.headers.getOrDefault("last-modified") + result = parse(lastModifiedHeader, "ddd, dd MMM yyyy HH:mm:ss 'GMT'", utc()) + +proc body*(response: Response): string = + ## Retrieves the specified response's body. + ## + ## The response's body stream is read synchronously. + if response.body.len == 0: + response.body = response.bodyStream.readAll() + return response.body + +proc body*(response: AsyncResponse): Future[string] {.async.} = + ## Reads the response's body and caches it. The read is performed only + ## once. + if response.body.len == 0: + response.body = await readAll(response.bodyStream) + return response.body + +type Proxy* = ref object url*: Uri auth*: string - MultipartEntries* = openarray[tuple[name, content: string]] + MultipartEntry = object + name, content: string + case isFile: bool + of true: + filename, contentType: string + fileSize: int64 + isStream: bool + else: discard + + MultipartEntries* = openArray[tuple[name, content: string]] MultipartData* = ref object - content: seq[string] + content: seq[MultipartEntry] - ProtocolError* = object of IOError ## exception that is raised when server - ## does not conform to the implemented - ## protocol + ProtocolError* = object of IOError ## exception that is raised when server + ## does not conform to the implemented + ## protocol - HttpRequestError* = object of IOError ## Thrown in the ``getContent`` proc - ## and ``postContent`` proc, + HttpRequestError* = object of IOError ## Thrown in the `getContent` proc + ## and `postContent` proc, ## when the server returns an error -{.deprecated: [TResponse: Response, PProxy: Proxy, - EInvalidProtocol: ProtocolError, EHttpRequestErr: HttpRequestError -].} - -const defUserAgent* = "Nim httpclient/0.1" +const defUserAgent* = "Nim-httpclient/" & NimVersion proc httpError(msg: string) = var e: ref ProtocolError @@ -124,192 +373,78 @@ proc fileError(msg: string) = e.msg = msg raise e -proc parseChunks(s: Socket, timeout: int): string = - result = "" - var ri = 0 - while true: - var chunkSizeStr = "" - var chunkSize = 0 - s.readLine(chunkSizeStr, timeout) - var i = 0 - if chunkSizeStr == "": - httpError("Server terminated connection prematurely") - while true: - case chunkSizeStr[i] - of '0'..'9': - chunkSize = chunkSize shl 4 or (ord(chunkSizeStr[i]) - ord('0')) - of 'a'..'f': - chunkSize = chunkSize shl 4 or (ord(chunkSizeStr[i]) - ord('a') + 10) - of 'A'..'F': - chunkSize = chunkSize shl 4 or (ord(chunkSizeStr[i]) - ord('A') + 10) - of '\0': - break - of ';': - # http://tools.ietf.org/html/rfc2616#section-3.6.1 - # We don't care about chunk-extensions. - break - else: - httpError("Invalid chunk size: " & chunkSizeStr) - inc(i) - if chunkSize <= 0: - s.skip(2, timeout) # Skip \c\L - break - result.setLen(ri+chunkSize) - var bytesRead = 0 - while bytesRead != chunkSize: - let ret = recv(s, addr(result[ri]), chunkSize-bytesRead, timeout) - ri += ret - bytesRead += ret - s.skip(2, timeout) # Skip \c\L - # Trailer headers will only be sent if the request specifies that we want - # them: http://tools.ietf.org/html/rfc2616#section-3.6.1 - -proc parseBody(s: Socket, headers: StringTableRef, timeout: int): string = - result = "" - if headers["Transfer-Encoding"] == "chunked": - result = parseChunks(s, timeout) - else: - # -REGION- Content-Length - # (http://tools.ietf.org/html/rfc2616#section-4.4) NR.3 - var contentLengthHeader = headers["Content-Length"] - if contentLengthHeader != "": - var length = contentLengthHeader.parseint() - if length > 0: - result = newString(length) - var received = 0 - while true: - if received >= length: break - let r = s.recv(addr(result[received]), length-received, timeout) - if r == 0: break - received += r - if received != length: - httpError("Got invalid content length. Expected: " & $length & - " got: " & $received) - else: - # (http://tools.ietf.org/html/rfc2616#section-4.4) NR.4 TODO - - # -REGION- Connection: Close - # (http://tools.ietf.org/html/rfc2616#section-4.4) NR.5 - if headers["Connection"] == "close": - var buf = "" - while true: - buf = newString(4000) - let r = s.recv(addr(buf[0]), 4000, timeout) - if r == 0: break - buf.setLen(r) - result.add(buf) - -proc parseResponse(s: Socket, getBody: bool, timeout: int): Response = - var parsedStatus = false - var linei = 0 - var fullyRead = false - var line = "" - result.headers = newStringTable(modeCaseInsensitive) - while true: - line = "" - linei = 0 - s.readLine(line, timeout) - if line == "": break # We've been disconnected. - if line == "\c\L": - fullyRead = true - break - if not parsedStatus: - # Parse HTTP version info and status code. - var le = skipIgnoreCase(line, "HTTP/", linei) - if le <= 0: httpError("invalid http version") - inc(linei, le) - le = skipIgnoreCase(line, "1.1", linei) - if le > 0: result.version = "1.1" - else: - le = skipIgnoreCase(line, "1.0", linei) - if le <= 0: httpError("unsupported http version") - result.version = "1.0" - inc(linei, le) - # Status code - linei.inc skipWhitespace(line, linei) - result.status = line[linei .. ^1] - parsedStatus = true - else: - # Parse headers - var name = "" - var le = parseUntil(line, name, ':', linei) - if le <= 0: httpError("invalid headers") - inc(linei, le) - if line[linei] != ':': httpError("invalid headers") - inc(linei) # Skip : - - result.headers[name] = line[linei.. ^1].strip() - if not fullyRead: - httpError("Connection was closed before full request has been made") - if getBody: - result.body = parseBody(s, result.headers, timeout) - else: - result.body = "" - -type - HttpMethod* = enum ## the requested HttpMethod - httpHEAD, ## Asks for the response identical to the one that would - ## correspond to a GET request, but without the response - ## body. - httpGET, ## Retrieves the specified resource. - httpPOST, ## Submits data to be processed to the identified - ## resource. The data is included in the body of the - ## request. - httpPUT, ## Uploads a representation of the specified resource. - httpDELETE, ## Deletes the specified resource. - httpTRACE, ## Echoes back the received request, so that a client - ## can see what intermediate servers are adding or - ## changing in the request. - httpOPTIONS, ## Returns the HTTP methods that the server supports - ## for specified address. - httpCONNECT ## Converts the request connection to a transparent - ## TCP/IP tunnel, usually used for proxies. - -{.deprecated: [THttpMethod: HttpMethod].} - when not defined(ssl): - type SSLContext = ref object - let defaultSSLContext: SSLContext = nil -else: - let defaultSSLContext = newContext(verifyMode = CVerifyNone) + type SslContext = ref object +var defaultSslContext {.threadvar.}: SslContext -proc newProxy*(url: string, auth = ""): Proxy = - ## Constructs a new ``TProxy`` object. - result = Proxy(url: parseUri(url), auth: auth) +proc getDefaultSSL(): SslContext = + result = defaultSslContext + when defined(ssl): + if result == nil: + defaultSslContext = newContext(verifyMode = CVerifyPeer) + result = defaultSslContext + doAssert result != nil, "failure to initialize the SSL context" -proc newMultipartData*: MultipartData = - ## Constructs a new ``MultipartData`` object. - MultipartData(content: @[]) +proc newProxy*(url: string; auth = ""): Proxy = + ## Constructs a new `TProxy` object. + result = Proxy(url: parseUri(url), auth: auth) -proc add*(p: var MultipartData, name, content: string, filename: string = nil, - contentType: string = nil) = - ## Add a value to the multipart data. Raises a `ValueError` exception if +proc newProxy*(url: Uri; auth = ""): Proxy = + ## Constructs a new `TProxy` object. + result = Proxy(url: url, auth: auth) + +proc newMultipartData*: MultipartData {.inline.} = + ## Constructs a new `MultipartData` object. + MultipartData() + +proc `$`*(data: MultipartData): string {.since: (1, 1).} = + ## convert MultipartData to string so it's human readable when echo + ## see https://github.com/nim-lang/Nim/issues/11863 + const sep = "-".repeat(30) + for pos, entry in data.content: + result.add(sep & center($pos, 3) & sep) + result.add("\nname=\"" & entry.name & "\"") + if entry.isFile: + result.add("; filename=\"" & entry.filename & "\"\n") + result.add("Content-Type: " & entry.contentType) + result.add("\n\n" & entry.content & "\n") + +proc add*(p: MultipartData, name, content: string, filename: string = "", + contentType: string = "", useStream = true) = + ## Add a value to the multipart data. + ## + ## When `useStream` is `false`, the file will be read into memory. + ## + ## Raises a `ValueError` exception if ## `name`, `filename` or `contentType` contain newline characters. - - if {'\c','\L'} in name: + if {'\c', '\L'} in name: raise newException(ValueError, "name contains a newline character") - if filename != nil and {'\c','\L'} in filename: + if {'\c', '\L'} in filename: raise newException(ValueError, "filename contains a newline character") - if contentType != nil and {'\c','\L'} in contentType: + if {'\c', '\L'} in contentType: raise newException(ValueError, "contentType contains a newline character") - var str = "Content-Disposition: form-data; name=\"" & name & "\"" - if filename != nil: - str.add("; filename=\"" & filename & "\"") - str.add("\c\L") - if contentType != nil: - str.add("Content-Type: " & contentType & "\c\L") - str.add("\c\L" & content & "\c\L") + var entry = MultipartEntry( + name: name, + content: content, + isFile: filename.len > 0 + ) + + if entry.isFile: + entry.isStream = useStream + entry.filename = filename + entry.contentType = contentType - p.content.add(str) + p.content.add(entry) -proc add*(p: var MultipartData, xs: MultipartEntries): MultipartData +proc add*(p: MultipartData, xs: MultipartEntries): MultipartData {.discardable.} = ## Add a list of multipart entries to the multipart data `p`. All values are ## added without a filename and without a content type. ## - ## .. code-block:: Nim + ## ```Nim ## data.add({"action": "login", "format": "json"}) + ## ``` for name, content in xs.items: p.add(name, content) result = p @@ -318,330 +453,320 @@ proc newMultipartData*(xs: MultipartEntries): MultipartData = ## Create a new multipart data object and fill it with the entries `xs` ## directly. ## - ## .. code-block:: Nim + ## ```Nim ## var data = newMultipartData({"action": "login", "format": "json"}) - result = MultipartData(content: @[]) - result.add(xs) - -proc addFiles*(p: var MultipartData, xs: openarray[tuple[name, file: string]]): - MultipartData {.discardable.} = - ## Add files to a multipart data object. The file will be opened from your - ## disk, read and sent with the automatically determined MIME type. Raises an - ## `IOError` if the file cannot be opened or reading fails. To manually - ## specify file content, filename and MIME type, use `[]=` instead. + ## ``` + result = MultipartData() + for entry in xs: + result.add(entry.name, entry.content) + +proc addFiles*(p: MultipartData, xs: openArray[tuple[name, file: string]], + mimeDb = newMimetypes(), useStream = true): + MultipartData {.discardable.} = + ## Add files to a multipart data object. The files will be streamed from disk + ## when the request is being made. When `stream` is `false`, the files are + ## instead read into memory, but beware this is very memory ineffecient even + ## for small files. The MIME types will automatically be determined. + ## Raises an `IOError` if the file cannot be opened or reading fails. To + ## manually specify file content, filename and MIME type, use `[]=` instead. ## - ## .. code-block:: Nim + ## ```Nim ## data.addFiles({"uploaded_file": "public/test.html"}) - var m = newMimetypes() + ## ``` for name, file in xs.items: var contentType: string - let (dir, fName, ext) = splitFile(file) + let (_, fName, ext) = splitFile(file) if ext.len > 0: - contentType = m.getMimetype(ext[1..ext.high], nil) - p.add(name, readFile(file), fName & ext, contentType) + contentType = mimeDb.getMimetype(ext[1..ext.high], "") + let content = if useStream: file else: readFile(file) + p.add(name, content, fName & ext, contentType, useStream = useStream) result = p -proc `[]=`*(p: var MultipartData, name, content: string) = +proc `[]=`*(p: MultipartData, name, content: string) {.inline.} = ## Add a multipart entry to the multipart data `p`. The value is added ## without a filename and without a content type. ## - ## .. code-block:: Nim + ## ```Nim ## data["username"] = "NimUser" + ## ``` p.add(name, content) -proc `[]=`*(p: var MultipartData, name: string, - file: tuple[name, contentType, content: string]) = - ## Add a file to the multipart data `p`, specifying filename, contentType and - ## content manually. +proc `[]=`*(p: MultipartData, name: string, + file: tuple[name, contentType, content: string]) {.inline.} = + ## Add a file to the multipart data `p`, specifying filename, contentType + ## and content manually. ## - ## .. code-block:: Nim + ## ```Nim ## data["uploaded_file"] = ("test.html", "text/html", ## "<html><head></head><body><p>test</p></body></html>") - p.add(name, file.content, file.name, file.contentType) - -proc format(p: MultipartData): tuple[header, body: string] = - if p == nil or p.content == nil or p.content.len == 0: - return ("", "") + ## ``` + p.add(name, file.content, file.name, file.contentType, useStream = false) - # Create boundary that is not in the data to be formatted - var bound: string +proc getBoundary(p: MultipartData): string = + if p == nil or p.content.len == 0: return while true: - bound = $random(int.high) - var found = false - for s in p.content: - if bound in s: - found = true - if not found: - break - - result.header = "Content-Type: multipart/form-data; boundary=" & bound & "\c\L" - result.body = "" - for s in p.content: - result.body.add("--" & bound & "\c\L" & s) - result.body.add("--" & bound & "--\c\L") - -proc request*(url: string, httpMethod: string, extraHeaders = "", - body = "", sslContext = defaultSSLContext, timeout = -1, - userAgent = defUserAgent, proxy: Proxy = nil): Response = - ## | Requests ``url`` with the custom method string specified by the - ## | ``httpMethod`` parameter. - ## | Extra headers can be specified and must be separated by ``\c\L`` - ## | An optional timeout can be specified in miliseconds, if reading from the - ## server takes longer than specified an ETimeout exception will be raised. - var r = if proxy == nil: parseUri(url) else: proxy.url - var headers = substr(httpMethod, len("http")) - # TODO: Use generateHeaders further down once it supports proxies. - if proxy == nil: - headers.add ' ' - if r.path[0] != '/': headers.add '/' - headers.add(r.path) - if r.query.len > 0: - headers.add("?" & r.query) + result = $rand(int.high) + for i, entry in p.content: + if result in entry.content: break + elif i == p.content.high: return + +proc sendFile(socket: Socket | AsyncSocket, + entry: MultipartEntry) {.multisync.} = + const chunkSize = 2^18 + let file = + when socket is AsyncSocket: openAsync(entry.content) + else: newFileStream(entry.content, fmRead) + + var buffer: string + while true: + buffer = + when socket is AsyncSocket: (await read(file, chunkSize)) + else: readStr(file, chunkSize) + if buffer.len == 0: break + await socket.send(buffer) + file.close() + +proc getNewLocation(lastURL: Uri, headers: HttpHeaders): Uri = + let newLocation = headers.getOrDefault"Location" + if newLocation == "": httpError("location header expected") + # Relative URLs. (Not part of the spec, but soon will be.) + let parsedLocation = parseUri(newLocation) + if parsedLocation.hostname == "" and parsedLocation.path != "": + result = lastURL + result.path = parsedLocation.path + result.query = parsedLocation.query + result.anchor = parsedLocation.anchor else: - headers.add(" " & url) - - headers.add(" HTTP/1.1\c\L") - - add(headers, "Host: " & r.hostname & "\c\L") - if userAgent != "": - add(headers, "User-Agent: " & userAgent & "\c\L") - if proxy != nil and proxy.auth != "": - let auth = base64.encode(proxy.auth, newline = "") - add(headers, "Proxy-Authorization: basic " & auth & "\c\L") - add(headers, extraHeaders) - add(headers, "\c\L") - - var s = newSocket() - if s == nil: raiseOSError(osLastError()) - var port = net.Port(80) - if r.scheme == "https": - when defined(ssl): - sslContext.wrapSocket(s) - port = net.Port(443) - else: - raise newException(HttpRequestError, - "SSL support is not available. Cannot connect over SSL.") - if r.port != "": - port = net.Port(r.port.parseInt) + result = parsedLocation - if timeout == -1: - s.connect(r.hostname, port) - else: - s.connect(r.hostname, port, timeout) - s.send(headers) - if body != "": - s.send(body) - - result = parseResponse(s, httpMethod != "httpHEAD", timeout) - s.close() - -proc request*(url: string, httpMethod = httpGET, extraHeaders = "", - body = "", sslContext = defaultSSLContext, timeout = -1, - userAgent = defUserAgent, proxy: Proxy = nil): Response = - ## | Requests ``url`` with the specified ``httpMethod``. - ## | Extra headers can be specified and must be separated by ``\c\L`` - ## | An optional timeout can be specified in miliseconds, if reading from the - ## server takes longer than specified an ETimeout exception will be raised. - result = request(url, $httpMethod, extraHeaders, body, sslContext, timeout, - userAgent, proxy) - -proc redirection(status: string): bool = - const redirectionNRs = ["301", "302", "303", "307"] - for i in items(redirectionNRs): - if status.startsWith(i): - return true - -proc getNewLocation(lastUrl: string, headers: StringTableRef): string = - result = headers["Location"] - if result == "": httpError("location header expected") - # Relative URLs. (Not part of the spec, but soon will be.) - let r = parseUri(result) - if r.hostname == "" and r.path != "": - let origParsed = parseUri(lastUrl) - result = origParsed.hostname & "/" & r.path - -proc get*(url: string, extraHeaders = "", maxRedirects = 5, - sslContext: SSLContext = defaultSSLContext, - timeout = -1, userAgent = defUserAgent, - proxy: Proxy = nil): Response = - ## | GETs the ``url`` and returns a ``Response`` object - ## | This proc also handles redirection - ## | Extra headers can be specified and must be separated by ``\c\L``. - ## | An optional timeout can be specified in miliseconds, if reading from the - ## server takes longer than specified an ETimeout exception will be raised. - result = request(url, httpGET, extraHeaders, "", sslContext, timeout, - userAgent, proxy) - var lastURL = url - for i in 1..maxRedirects: - if result.status.redirection(): - let redirectTo = getNewLocation(lastURL, result.headers) - result = request(redirectTo, httpGET, extraHeaders, "", sslContext, - timeout, userAgent, proxy) - lastUrl = redirectTo - -proc getContent*(url: string, extraHeaders = "", maxRedirects = 5, - sslContext: SSLContext = defaultSSLContext, - timeout = -1, userAgent = defUserAgent, - proxy: Proxy = nil): string = - ## | GETs the body and returns it as a string. - ## | Raises exceptions for the status codes ``4xx`` and ``5xx`` - ## | Extra headers can be specified and must be separated by ``\c\L``. - ## | An optional timeout can be specified in miliseconds, if reading from the - ## server takes longer than specified an ETimeout exception will be raised. - var r = get(url, extraHeaders, maxRedirects, sslContext, timeout, userAgent, - proxy) - if r.status[0] in {'4','5'}: - raise newException(HttpRequestError, r.status) +proc generateHeaders(requestUrl: Uri, httpMethod: HttpMethod, headers: HttpHeaders, + proxy: Proxy): string = + # GET + result = $httpMethod + result.add ' ' + + if proxy.isNil or requestUrl.scheme == "https": + # /path?query + if not requestUrl.path.startsWith("/"): result.add '/' + result.add(requestUrl.path) + if requestUrl.query.len > 0: + result.add("?" & requestUrl.query) else: - return r.body - -proc post*(url: string, extraHeaders = "", body = "", - maxRedirects = 5, - sslContext: SSLContext = defaultSSLContext, - timeout = -1, userAgent = defUserAgent, - proxy: Proxy = nil, - multipart: MultipartData = nil): Response = - ## | POSTs ``body`` to the ``url`` and returns a ``Response`` object. - ## | This proc adds the necessary Content-Length header. - ## | This proc also handles redirection. - ## | Extra headers can be specified and must be separated by ``\c\L``. - ## | An optional timeout can be specified in miliseconds, if reading from the - ## server takes longer than specified an ETimeout exception will be raised. - ## | The optional ``multipart`` parameter can be used to create - ## ``multipart/form-data`` POSTs comfortably. - let (mpHeaders, mpBody) = format(multipart) - - template withNewLine(x): expr = - if x.len > 0 and not x.endsWith("\c\L"): - x & "\c\L" + # Remove the 'http://' from the URL for CONNECT requests for TLS connections. + var modifiedUrl = requestUrl + if requestUrl.scheme == "https": modifiedUrl.scheme = "" + result.add($modifiedUrl) + + # HTTP/1.1\c\l + result.add(" HTTP/1.1" & httpNewLine) + + # Host header. + if not headers.hasKey("Host"): + if requestUrl.port == "": + add(result, "Host: " & requestUrl.hostname & httpNewLine) else: - x - - var xb = mpBody.withNewLine() & body - - var xh = extraHeaders.withNewLine() & mpHeaders.withNewLine() & - withNewLine("Content-Length: " & $len(xb)) - - result = request(url, httpPOST, xh, xb, sslContext, timeout, userAgent, - proxy) - var lastUrl = "" - for i in 1..maxRedirects: - if result.status.redirection(): - let redirectTo = getNewLocation(lastURL, result.headers) - var meth = if result.status != "307": httpGet else: httpPost - result = request(redirectTo, meth, xh, xb, sslContext, timeout, - userAgent, proxy) - lastUrl = redirectTo - -proc postContent*(url: string, extraHeaders = "", body = "", - maxRedirects = 5, - sslContext: SSLContext = defaultSSLContext, - timeout = -1, userAgent = defUserAgent, - proxy: Proxy = nil, - multipart: MultipartData = nil): string = - ## | POSTs ``body`` to ``url`` and returns the response's body as a string - ## | Raises exceptions for the status codes ``4xx`` and ``5xx`` - ## | Extra headers can be specified and must be separated by ``\c\L``. - ## | An optional timeout can be specified in miliseconds, if reading from the - ## server takes longer than specified an ETimeout exception will be raised. - ## | The optional ``multipart`` parameter can be used to create - ## ``multipart/form-data`` POSTs comfortably. - var r = post(url, extraHeaders, body, maxRedirects, sslContext, timeout, - userAgent, proxy, multipart) - if r.status[0] in {'4','5'}: - raise newException(HttpRequestError, r.status) - else: - return r.body - -proc downloadFile*(url: string, outputFilename: string, - sslContext: SSLContext = defaultSSLContext, - timeout = -1, userAgent = defUserAgent, - proxy: Proxy = nil) = - ## | Downloads ``url`` and saves it to ``outputFilename`` - ## | An optional timeout can be specified in miliseconds, if reading from the - ## server takes longer than specified an ETimeout exception will be raised. - var f: File - if open(f, outputFilename, fmWrite): - f.write(getContent(url, sslContext = sslContext, timeout = timeout, - userAgent = userAgent, proxy = proxy)) - f.close() - else: - fileError("Unable to open file") + add(result, "Host: " & requestUrl.hostname & ":" & requestUrl.port & httpNewLine) + + # Connection header. + if not headers.hasKey("Connection"): + add(result, "Connection: Keep-Alive" & httpNewLine) + + # Proxy auth header. + if not proxy.isNil and proxy.auth != "": + let auth = base64.encode(proxy.auth) + add(result, "Proxy-Authorization: Basic " & auth & httpNewLine) -proc generateHeaders(r: Uri, httpMethod: string, - headers: StringTableRef): string = - # TODO: Use this in the blocking HttpClient once it supports proxies. - result = substr(httpMethod, len("http")) - # TODO: Proxies - result.add ' ' - if r.path[0] != '/': result.add '/' - result.add(r.path) - if r.query.len > 0: - result.add("?" & r.query) - result.add(" HTTP/1.1\c\L") - - add(result, "Host: " & r.hostname & "\c\L") - add(result, "Connection: Keep-Alive\c\L") for key, val in headers: - add(result, key & ": " & val & "\c\L") + add(result, key & ": " & val & httpNewLine) - add(result, "\c\L") + add(result, httpNewLine) type - AsyncHttpClient* = ref object - socket: AsyncSocket + ProgressChangedProc*[ReturnType] = + proc (total, progress, speed: BiggestInt): + ReturnType {.closure, gcsafe.} + + HttpClientBase*[SocketType] = ref object + socket: SocketType connected: bool - currentURL: Uri ## Where we are currently connected. - headers*: StringTableRef - maxRedirects: int + currentURL: Uri ## Where we are currently connected. + headers*: HttpHeaders ## Headers to send in requests. + maxRedirects: Natural ## Maximum redirects, set to `0` to disable. userAgent: string + timeout*: int ## Only used for blocking HttpClient for now. + proxy: Proxy + ## `nil` or the callback to call when request progress changes. + when SocketType is Socket: + onProgressChanged*: ProgressChangedProc[void] + else: + onProgressChanged*: ProgressChangedProc[Future[void]] when defined(ssl): sslContext: net.SslContext + contentTotal: BiggestInt + contentProgress: BiggestInt + oneSecondProgress: BiggestInt + lastProgressReport: MonoTime + when SocketType is AsyncSocket: + bodyStream: FutureStream[string] + parseBodyFut: Future[void] + else: + bodyStream: Stream + getBody: bool ## When `false`, the body is never read in requestAux. -{.deprecated: [PAsyncHttpClient: AsyncHttpClient].} +type + HttpClient* = HttpClientBase[Socket] -proc newAsyncHttpClient*(userAgent = defUserAgent, - maxRedirects = 5, sslContext = defaultSslContext): AsyncHttpClient = +proc newHttpClient*(userAgent = defUserAgent, maxRedirects = 5, + sslContext = getDefaultSSL(), proxy: Proxy = nil, + timeout = -1, headers = newHttpHeaders()): HttpClient = + ## Creates a new HttpClient instance. + ## + ## `userAgent` specifies the user agent that will be used when making + ## requests. + ## + ## `maxRedirects` specifies the maximum amount of redirects to follow, + ## default is 5. + ## + ## `sslContext` specifies the SSL context to use for HTTPS requests. + ## See `SSL/TLS support <#sslslashtls-support>`_ + ## + ## `proxy` specifies an HTTP proxy to use for this HTTP client's + ## connections. + ## + ## `timeout` specifies the number of milliseconds to allow before a + ## `TimeoutError` is raised. + ## + ## `headers` specifies the HTTP Headers. + runnableExamples: + import std/strutils + + let exampleHtml = newHttpClient().getContent("http://example.com") + assert "Example Domain" in exampleHtml + assert "Pizza" notin exampleHtml + + new result + result.headers = headers + result.userAgent = userAgent + result.maxRedirects = maxRedirects + result.proxy = proxy + result.timeout = timeout + result.onProgressChanged = nil + result.bodyStream = newStringStream() + result.getBody = true + when defined(ssl): + result.sslContext = sslContext + +type + AsyncHttpClient* = HttpClientBase[AsyncSocket] + +proc newAsyncHttpClient*(userAgent = defUserAgent, maxRedirects = 5, + sslContext = getDefaultSSL(), proxy: Proxy = nil, + headers = newHttpHeaders()): AsyncHttpClient = ## Creates a new AsyncHttpClient instance. ## - ## ``userAgent`` specifies the user agent that will be used when making + ## `userAgent` specifies the user agent that will be used when making ## requests. ## - ## ``maxRedirects`` specifies the maximum amount of redirects to follow, + ## `maxRedirects` specifies the maximum amount of redirects to follow, ## default is 5. ## - ## ``sslContext`` specifies the SSL context to use for HTTPS requests. + ## `sslContext` specifies the SSL context to use for HTTPS requests. + ## + ## `proxy` specifies an HTTP proxy to use for this HTTP client's + ## connections. + ## + ## `headers` specifies the HTTP Headers. + runnableExamples: + import std/[asyncdispatch, strutils] + + proc asyncProc(): Future[string] {.async.} = + let client = newAsyncHttpClient() + result = await client.getContent("http://example.com") + + let exampleHtml = waitFor asyncProc() + assert "Example Domain" in exampleHtml + assert "Pizza" notin exampleHtml + new result - result.headers = newStringTable(modeCaseInsensitive) - result.userAgent = defUserAgent + result.headers = headers + result.userAgent = userAgent result.maxRedirects = maxRedirects + result.proxy = proxy + result.timeout = -1 # TODO + result.onProgressChanged = nil + result.bodyStream = newFutureStream[string]("newAsyncHttpClient") + result.getBody = true when defined(ssl): - result.sslContext = net.SslContext(sslContext) + result.sslContext = sslContext -proc close*(client: AsyncHttpClient) = +proc close*(client: HttpClient | AsyncHttpClient) = ## Closes any connections held by the HTTP client. if client.connected: client.socket.close() client.connected = false -proc recvFull(socket: AsyncSocket, size: int): Future[string] {.async.} = +proc getSocket*(client: HttpClient): Socket {.inline.} = + ## Get network socket, useful if you want to find out more details about the connection. + ## + ## This example shows info about local and remote endpoints: + ## + ## ```Nim + ## if client.connected: + ## echo client.getSocket.getLocalAddr + ## echo client.getSocket.getPeerAddr + ## ``` + return client.socket + +proc getSocket*(client: AsyncHttpClient): AsyncSocket {.inline.} = + return client.socket + +proc reportProgress(client: HttpClient | AsyncHttpClient, + progress: BiggestInt) {.multisync.} = + client.contentProgress += progress + client.oneSecondProgress += progress + if (getMonoTime() - client.lastProgressReport).inSeconds >= 1: + if not client.onProgressChanged.isNil: + await client.onProgressChanged(client.contentTotal, + client.contentProgress, + client.oneSecondProgress) + client.oneSecondProgress = 0 + client.lastProgressReport = getMonoTime() + +proc recvFull(client: HttpClient | AsyncHttpClient, size: int, timeout: int, + keep: bool): Future[int] {.multisync.} = ## Ensures that all the data requested is read and returned. - result = "" + var readLen = 0 while true: - if size == result.len: break - let data = await socket.recv(size - result.len) - if data == "": break # We've been disconnected. - result.add data + if size == readLen: break + + let remainingSize = size - readLen + let sizeToRecv = min(remainingSize, net.BufferSize) -proc parseChunks(client: AsyncHttpClient): Future[string] {.async.} = - result = "" + when client.socket is Socket: + let data = client.socket.recv(sizeToRecv, timeout) + else: + let data = await client.socket.recv(sizeToRecv) + if data == "": + client.close() + break # We've been disconnected. + + readLen.inc(data.len) + if keep: + await client.bodyStream.write(data) + + await reportProgress(client, data.len) + + return readLen + +proc parseChunks(client: HttpClient | AsyncHttpClient): Future[void] + {.multisync.} = while true: var chunkSize = 0 var chunkSizeStr = await client.socket.recvLine() var i = 0 if chunkSizeStr == "": httpError("Server terminated connection prematurely") - while true: + while i < chunkSizeStr.len: case chunkSizeStr[i] of '0'..'9': chunkSize = chunkSize shl 4 or (ord(chunkSizeStr[i]) - ord('0')) @@ -649,8 +774,6 @@ proc parseChunks(client: AsyncHttpClient): Future[string] {.async.} = chunkSize = chunkSize shl 4 or (ord(chunkSizeStr[i]) - ord('a') + 10) of 'A'..'F': chunkSize = chunkSize shl 4 or (ord(chunkSizeStr[i]) - ord('A') + 10) - of '\0': - break of ';': # http://tools.ietf.org/html/rfc2616#section-3.6.1 # We don't care about chunk-extensions. @@ -659,62 +782,101 @@ proc parseChunks(client: AsyncHttpClient): Future[string] {.async.} = httpError("Invalid chunk size: " & chunkSizeStr) inc(i) if chunkSize <= 0: - discard await recvFull(client.socket, 2) # Skip \c\L + discard await recvFull(client, 2, client.timeout, false) # Skip \c\L break - result.add await recvFull(client.socket, chunkSize) - discard await recvFull(client.socket, 2) # Skip \c\L + var bytesRead = await recvFull(client, chunkSize, client.timeout, true) + if bytesRead != chunkSize: + httpError("Server terminated connection prematurely") + + bytesRead = await recvFull(client, 2, client.timeout, false) # Skip \c\L + if bytesRead != 2: + httpError("Server terminated connection prematurely") + # Trailer headers will only be sent if the request specifies that we want # them: http://tools.ietf.org/html/rfc2616#section-3.6.1 -proc parseBody(client: AsyncHttpClient, - headers: StringTableRef): Future[string] {.async.} = - result = "" - if headers["Transfer-Encoding"] == "chunked": - result = await parseChunks(client) +proc parseBody(client: HttpClient | AsyncHttpClient, headers: HttpHeaders, + httpVersion: string): Future[void] {.multisync.} = + # Reset progress from previous requests. + client.contentTotal = 0 + client.contentProgress = 0 + client.oneSecondProgress = 0 + client.lastProgressReport = MonoTime() + + when client is AsyncHttpClient: + assert(not client.bodyStream.finished) + + if headers.getOrDefault"Transfer-Encoding" == "chunked": + await parseChunks(client) else: # -REGION- Content-Length # (http://tools.ietf.org/html/rfc2616#section-4.4) NR.3 - var contentLengthHeader = headers["Content-Length"] + var contentLengthHeader = headers.getOrDefault"Content-Length" if contentLengthHeader != "": - var length = contentLengthHeader.parseint() + var length = contentLengthHeader.parseInt() + client.contentTotal = length if length > 0: - result = await client.socket.recvFull(length) - if result == "": + let recvLen = await client.recvFull(length, client.timeout, true) + if recvLen == 0: + client.close() httpError("Got disconnected while trying to read body.") - if result.len != length: + if recvLen != length: httpError("Received length doesn't match expected length. Wanted " & - $length & " got " & $result.len) + $length & " got: " & $recvLen) else: # (http://tools.ietf.org/html/rfc2616#section-4.4) NR.4 TODO # -REGION- Connection: Close # (http://tools.ietf.org/html/rfc2616#section-4.4) NR.5 - if headers["Connection"] == "close": - var buf = "" + let implicitConnectionClose = + httpVersion == "1.0" or + # This doesn't match the HTTP spec, but it fixes issues for non-conforming servers. + (httpVersion == "1.1" and headers.getOrDefault"Connection" == "") + if headers.getOrDefault"Connection" == "close" or implicitConnectionClose: while true: - buf = await client.socket.recvFull(4000) - if buf == "": break - result.add(buf) + let recvLen = await client.recvFull(4000, client.timeout, true) + if recvLen != 4000: + client.close() + break -proc parseResponse(client: AsyncHttpClient, - getBody: bool): Future[Response] {.async.} = + when client is AsyncHttpClient: + client.bodyStream.complete() + else: + client.bodyStream.setPosition(0) + + # If the server will close our connection, then no matter the method of + # reading the body, we need to close our socket. + if headers.getOrDefault"Connection" == "close": + client.close() + +proc parseResponse(client: HttpClient | AsyncHttpClient, + getBody: bool): Future[Response | AsyncResponse] + {.multisync.} = + new result var parsedStatus = false var linei = 0 var fullyRead = false + var lastHeaderName = "" var line = "" - result.headers = newStringTable(modeCaseInsensitive) + result.headers = newHttpHeaders() while true: linei = 0 - line = await client.socket.recvLine() - if line == "": break # We've been disconnected. - if line == "\c\L": + when client is HttpClient: + line = await client.socket.recvLine(client.timeout) + else: + line = await client.socket.recvLine() + if line == "": + # We've been disconnected. + client.close() + break + if line == httpNewLine: fullyRead = true break if not parsedStatus: # Parse HTTP version info and status code. var le = skipIgnoreCase(line, "HTTP/", linei) if le <= 0: - httpError("invalid http version, " & line.repr) + httpError("invalid http version, `" & line & "`") inc(linei, le) le = skipIgnoreCase(line, "1.1", linei) if le > 0: result.version = "1.1" @@ -729,133 +891,470 @@ proc parseResponse(client: AsyncHttpClient, parsedStatus = true else: # Parse headers - var name = "" - var le = parseUntil(line, name, ':', linei) - if le <= 0: httpError("invalid headers") - inc(linei, le) - if line[linei] != ':': httpError("invalid headers") - inc(linei) # Skip : + # There's at least one char because empty lines are handled above (with client.close) + if line[0] in {' ', '\t'}: + # Check if it's a multiline header value, if so, append to the header we're currently parsing + # This works because a line with a header must start with the header name without any leading space + # See https://datatracker.ietf.org/doc/html/rfc7230, section 3.2 and 3.2.4 + # Multiline headers are deprecated in the spec, but it's better to parse them than crash + if lastHeaderName == "": + # Some extra unparsable lines in the HTTP output - we ignore them + discard + else: + result.headers.table[result.headers.toCaseInsensitive(lastHeaderName)][^1].add "\n" & line + else: + var name = "" + var le = parseUntil(line, name, ':', linei) + if le <= 0: httpError("Invalid headers - received empty header name") + if line.len == le: httpError("Invalid headers - no colon after header name") + inc(linei, le) # Skip the parsed header name + inc(linei) # Skip : + # If we want to be HTTP spec compliant later, error on linei == line.len (for empty header value) + lastHeaderName = name # Remember the header name for the possible multi-line header + result.headers.add(name, line[linei .. ^1].strip()) + if result.headers.len > headerLimit: + httpError("too many headers") - result.headers[name] = line[linei.. ^1].strip() if not fullyRead: httpError("Connection was closed before full request has been made") - if getBody: - result.body = await parseBody(client, result.headers) + + when client is HttpClient: + result.bodyStream = newStringStream() + else: + result.bodyStream = newFutureStream[string]("parseResponse") + + if getBody and result.code != Http204: + client.bodyStream = result.bodyStream + when client is HttpClient: + parseBody(client, result.headers, result.version) + else: + assert(client.parseBodyFut.isNil or client.parseBodyFut.finished) + # do not wait here for the body request to complete + client.parseBodyFut = parseBody(client, result.headers, result.version) + client.parseBodyFut.addCallback do(): + if client.parseBodyFut.failed: + client.bodyStream.fail(client.parseBodyFut.error) else: - result.body = "" + when client is AsyncHttpClient: + result.bodyStream.complete() -proc newConnection(client: AsyncHttpClient, url: Uri) {.async.} = +proc newConnection(client: HttpClient | AsyncHttpClient, + url: Uri) {.multisync.} = if client.currentURL.hostname != url.hostname or - client.currentURL.scheme != url.scheme: - if client.connected: client.close() - client.socket = newAsyncSocket() + client.currentURL.scheme != url.scheme or + client.currentURL.port != url.port or + (not client.connected): + # Connect to proxy if specified + let connectionUrl = + if client.proxy.isNil: url else: client.proxy.url + + let isSsl = connectionUrl.scheme.toLowerAscii() == "https" + + if isSsl and not defined(ssl): + raise newException(HttpRequestError, + "SSL support is not available. Cannot connect over SSL. Compile with -d:ssl to enable.") + + if client.connected: + client.close() + client.connected = false # TODO: I should be able to write 'net.Port' here... let port = - if url.port == "": - if url.scheme.toLower() == "https": - rawsockets.Port(443) + if connectionUrl.port == "": + if isSsl: + nativesockets.Port(443) else: - rawsockets.Port(80) - else: rawsockets.Port(url.port.parseInt) + nativesockets.Port(80) + else: nativesockets.Port(connectionUrl.port.parseInt) - if url.scheme.toLower() == "https": + when client is HttpClient: + client.socket = await net.dial(connectionUrl.hostname, port) + elif client is AsyncHttpClient: + client.socket = await asyncnet.dial(connectionUrl.hostname, port) + else: {.fatal: "Unsupported client type".} + + when defined(ssl): + if isSsl: + try: + client.sslContext.wrapConnectedSocket( + client.socket, handshakeAsClient, connectionUrl.hostname) + except: + client.socket.close() + raise getCurrentException() + + # If need to CONNECT through proxy + if url.scheme == "https" and not client.proxy.isNil: when defined(ssl): - client.sslContext.wrapSocket(client.socket) + # Pass only host:port for CONNECT + var connectUrl = initUri() + connectUrl.hostname = url.hostname + connectUrl.port = if url.port != "": url.port else: "443" + + let proxyHeaderString = generateHeaders(connectUrl, HttpConnect, + newHttpHeaders(), client.proxy) + await client.socket.send(proxyHeaderString) + let proxyResp = await parseResponse(client, false) + + if not proxyResp.status.startsWith("200"): + raise newException(HttpRequestError, + "The proxy server rejected a CONNECT request, " & + "so a secure connection could not be established.") + client.sslContext.wrapConnectedSocket( + client.socket, handshakeAsClient, url.hostname) else: raise newException(HttpRequestError, - "SSL support is not available. Cannot connect over SSL.") + "SSL support is not available. Cannot connect over SSL. Compile with -d:ssl to enable.") - await client.socket.connect(url.hostname, port) + # May be connected through proxy but remember actual URL being accessed client.currentURL = url client.connected = true -proc request*(client: AsyncHttpClient, url: string, httpMethod: string, - body = ""): Future[Response] {.async.} = - ## Connects to the hostname specified by the URL and performs a request - ## using the custom method string specified by ``httpMethod``. - ## - ## Connection will kept alive. Further requests on the same ``client`` to - ## the same hostname will not require a new connection to be made. The - ## connection can be closed by using the ``close`` procedure. - ## - ## The returned future will complete once the request is completed. - let r = parseUri(url) - await newConnection(client, r) +proc readFileSizes(client: HttpClient | AsyncHttpClient, + multipart: MultipartData) {.multisync.} = + for entry in multipart.content.mitems(): + if not entry.isFile: continue + if not entry.isStream: + entry.fileSize = entry.content.len + continue + + # TODO: look into making getFileSize work with async + let fileSize = getFileSize(entry.content) + entry.fileSize = fileSize + +proc format(entry: MultipartEntry, boundary: string): string = + result = "--" & boundary & httpNewLine + result.add("Content-Disposition: form-data; name=\"" & entry.name & "\"") + if entry.isFile: + result.add("; filename=\"" & entry.filename & "\"" & httpNewLine) + result.add("Content-Type: " & entry.contentType & httpNewLine) + else: + result.add(httpNewLine & httpNewLine & entry.content) - if not client.headers.hasKey("user-agent") and client.userAgent != "": - client.headers["User-Agent"] = client.userAgent +proc format(client: HttpClient | AsyncHttpClient, + multipart: MultipartData): Future[seq[string]] {.multisync.} = + let bound = getBoundary(multipart) + client.headers["Content-Type"] = "multipart/form-data; boundary=" & bound - var headers = generateHeaders(r, $httpMethod, client.headers) + await client.readFileSizes(multipart) - await client.socket.send(headers) - if body != "": - await client.socket.send(body) + var length: int64 + for entry in multipart.content: + result.add(format(entry, bound) & httpNewLine) + if entry.isFile: + length += entry.fileSize + httpNewLine.len - result = await parseResponse(client, httpMethod != "httpHEAD") + result.add "--" & bound & "--" & httpNewLine -proc request*(client: AsyncHttpClient, url: string, httpMethod = httpGET, - body = ""): Future[Response] = - ## Connects to the hostname specified by the URL and performs a request - ## using the method specified. - ## - ## Connection will kept alive. Further requests on the same ``client`` to - ## the same hostname will not require a new connection to be made. The - ## connection can be closed by using the ``close`` procedure. - ## - ## The returned future will complete once the request is completed. - result = request(client, url, $httpMethod, body) + for s in result: length += s.len + client.headers["Content-Length"] = $length -proc get*(client: AsyncHttpClient, url: string): Future[Response] {.async.} = - ## Connects to the hostname specified by the URL and performs a GET request. - ## - ## This procedure will follow redirects up to a maximum number of redirects - ## specified in ``newAsyncHttpClient``. - result = await client.request(url, httpGET) - var lastURL = url - for i in 1..client.maxRedirects: - if result.status.redirection(): - let redirectTo = getNewLocation(lastURL, result.headers) - result = await client.request(redirectTo, httpGET) - lastUrl = redirectTo +proc override(fallback, override: HttpHeaders): HttpHeaders = + # Right-biased map union for `HttpHeaders` -when isMainModule: - when true: - # Async - proc main() {.async.} = - var client = newAsyncHttpClient() - var resp = await client.request("http://picheta.me") + result = newHttpHeaders() + # Copy by value + result.table[] = fallback.table[] - echo("Got response: ", resp.status) - echo("Body:\n") - echo(resp.body) + if override.isNil: + # Return the copy of fallback so it does not get modified + return result - resp = await client.request("http://picheta.me/asfas.html") - echo("Got response: ", resp.status) + for k, vs in override.table: + result[k] = vs - resp = await client.request("http://picheta.me/aboutme.html") - echo("Got response: ", resp.status) +proc requestAux(client: HttpClient | AsyncHttpClient, url: Uri, + httpMethod: HttpMethod, body = "", headers: HttpHeaders = nil, + multipart: MultipartData = nil): Future[Response | AsyncResponse] + {.multisync.} = + # Helper that actually makes the request. Does not handle redirects. + if url.scheme == "": + raise newException(ValueError, "No uri scheme supplied.") - resp = await client.request("http://nim-lang.org/") - echo("Got response: ", resp.status) + when client is AsyncHttpClient: + if not client.parseBodyFut.isNil: + # let the current operation finish before making another request + await client.parseBodyFut + client.parseBodyFut = nil - resp = await client.request("http://nim-lang.org/download.html") - echo("Got response: ", resp.status) + await newConnection(client, url) - waitFor main() + var newHeaders: HttpHeaders + var data: seq[string] + if multipart != nil and multipart.content.len > 0: + # `format` modifies `client.headers`, see + # https://github.com/nim-lang/Nim/pull/18208#discussion_r647036979 + data = await client.format(multipart) + newHeaders = client.headers.override(headers) else: - #downloadFile("http://force7.de/nim/index.html", "nimindex.html") - #downloadFile("http://www.httpwatch.com/", "ChunkTest.html") - #downloadFile("http://validator.w3.org/check?uri=http%3A%2F%2Fgoogle.com", - # "validator.html") + newHeaders = client.headers.override(headers) + # Only change headers if they have not been specified already + if not newHeaders.hasKey("Content-Length"): + if body.len != 0: + newHeaders["Content-Length"] = $body.len + elif httpMethod notin {HttpGet, HttpHead}: + newHeaders["Content-Length"] = "0" + + if not newHeaders.hasKey("user-agent") and client.userAgent.len > 0: + newHeaders["User-Agent"] = client.userAgent + + let headerString = generateHeaders(url, httpMethod, newHeaders, + client.proxy) + await client.socket.send(headerString) + + if data.len > 0: + var buffer: string + for i, entry in multipart.content: + buffer.add data[i] + if not entry.isFile: continue + if buffer.len > 0: + await client.socket.send(buffer) + buffer.setLen(0) + if entry.isStream: + await client.socket.sendFile(entry) + else: + await client.socket.send(entry.content) + buffer.add httpNewLine + # send the rest and the last boundary + await client.socket.send(buffer & data[^1]) + elif body.len > 0: + await client.socket.send(body) + + let getBody = httpMethod notin {HttpHead, HttpConnect} and + client.getBody + result = await parseResponse(client, getBody) + +proc request*(client: HttpClient | AsyncHttpClient, url: Uri | string, + httpMethod: HttpMethod | string = HttpGet, body = "", + headers: HttpHeaders = nil, + multipart: MultipartData = nil): Future[Response | AsyncResponse] + {.multisync.} = + ## Connects to the hostname specified by the URL and performs a request + ## using the custom method string specified by `httpMethod`. + ## + ## Connection will be kept alive. Further requests on the same `client` to + ## the same hostname will not require a new connection to be made. The + ## connection can be closed by using the `close` procedure. + ## + ## This procedure will follow redirects up to a maximum number of redirects + ## specified in `client.maxRedirects`. + ## + ## You need to make sure that the `url` doesn't contain any newline + ## characters. Failing to do so will raise `AssertionDefect`. + ## + ## `headers` are HTTP headers that override the `client.headers` for + ## this specific request only and will not be persisted. + ## + ## **Deprecated since v1.5**: use HttpMethod enum instead; string parameter httpMethod is deprecated + when url is string: + doAssert(not url.contains({'\c', '\L'}), "url shouldn't contain any newline characters") + let url = parseUri(url) + + when httpMethod is string: + {.warning: + "Deprecated since v1.5; use HttpMethod enum instead; string parameter httpMethod is deprecated".} + let httpMethod = case httpMethod + of "HEAD": + HttpHead + of "GET": + HttpGet + of "POST": + HttpPost + of "PUT": + HttpPut + of "DELETE": + HttpDelete + of "TRACE": + HttpTrace + of "OPTIONS": + HttpOptions + of "CONNECT": + HttpConnect + of "PATCH": + HttpPatch + else: + raise newException(ValueError, "Invalid HTTP method name: " & httpMethod) + + result = await client.requestAux(url, httpMethod, body, headers, multipart) + + var lastURL = url + for i in 1..client.maxRedirects: + let statusCode = result.code + + if statusCode notin {Http301, Http302, Http303, Http307, Http308}: + break - #var r = get("http://validator.w3.org/check?uri=http%3A%2F%2Fgoogle.com& - # charset=%28detect+automatically%29&doctype=Inline&group=0") + let redirectTo = getNewLocation(lastURL, result.headers) + var redirectMethod: HttpMethod + var redirectBody: string + # For more informations about the redirect methods see: + # https://developer.mozilla.org/en-US/docs/Web/HTTP/Redirections + case statusCode + of Http301, Http302, Http303: + # The method is changed to GET unless it is GET or HEAD (RFC2616) + if httpMethod notin {HttpGet, HttpHead}: + redirectMethod = HttpGet + else: + redirectMethod = httpMethod + # The body is stripped away + redirectBody = "" + # Delete any header value associated with the body + if not headers.isNil(): + headers.del("Content-Length") + headers.del("Content-Type") + headers.del("Transfer-Encoding") + of Http307, Http308: + # The method and the body are unchanged + redirectMethod = httpMethod + redirectBody = body + else: + # Unreachable + doAssert(false) + + # Check if the redirection is to the same domain or a sub-domain (foo.com + # -> sub.foo.com) + if redirectTo.hostname != lastURL.hostname and + not redirectTo.hostname.endsWith("." & lastURL.hostname): + # Perform some cleanup of the header values + if headers != nil: + # Delete the Host header + headers.del("Host") + # Do not send any sensitive info to a unknown host + headers.del("Authorization") + + result = await client.requestAux(redirectTo, redirectMethod, redirectBody, + headers, multipart) + lastURL = redirectTo + +proc responseContent(resp: Response | AsyncResponse): Future[string] {.multisync.} = + ## Returns the content of a response as a string. + ## + ## A `HttpRequestError` will be raised if the server responds with a + ## client error (status code 4xx) or a server error (status code 5xx). + if resp.code.is4xx or resp.code.is5xx: + raise newException(HttpRequestError, resp.status.move) + else: + return await resp.bodyStream.readAll() - var data = newMultipartData() - data["output"] = "soap12" - data["uploaded_file"] = ("test.html", "text/html", - "<html><head></head><body><p>test</p></body></html>") +proc head*(client: HttpClient | AsyncHttpClient, + url: Uri | string): Future[Response | AsyncResponse] {.multisync.} = + ## Connects to the hostname specified by the URL and performs a HEAD request. + ## + ## This procedure uses httpClient values such as `client.maxRedirects`. + result = await client.request(url, HttpHead) - echo postContent("http://validator.w3.org/check", multipart=data) +proc get*(client: HttpClient | AsyncHttpClient, + url: Uri | string): Future[Response | AsyncResponse] {.multisync.} = + ## Connects to the hostname specified by the URL and performs a GET request. + ## + ## This procedure uses httpClient values such as `client.maxRedirects`. + result = await client.request(url, HttpGet) + +proc getContent*(client: HttpClient | AsyncHttpClient, + url: Uri | string): Future[string] {.multisync.} = + ## Connects to the hostname specified by the URL and returns the content of a GET request. + let resp = await get(client, url) + return await responseContent(resp) + +proc delete*(client: HttpClient | AsyncHttpClient, + url: Uri | string): Future[Response | AsyncResponse] {.multisync.} = + ## Connects to the hostname specified by the URL and performs a DELETE request. + ## This procedure uses httpClient values such as `client.maxRedirects`. + result = await client.request(url, HttpDelete) + +proc deleteContent*(client: HttpClient | AsyncHttpClient, + url: Uri | string): Future[string] {.multisync.} = + ## Connects to the hostname specified by the URL and returns the content of a DELETE request. + let resp = await delete(client, url) + return await responseContent(resp) + +proc post*(client: HttpClient | AsyncHttpClient, url: Uri | string, body = "", + multipart: MultipartData = nil): Future[Response | AsyncResponse] + {.multisync.} = + ## Connects to the hostname specified by the URL and performs a POST request. + ## This procedure uses httpClient values such as `client.maxRedirects`. + result = await client.request(url, HttpPost, body, multipart=multipart) + +proc postContent*(client: HttpClient | AsyncHttpClient, url: Uri | string, body = "", + multipart: MultipartData = nil): Future[string] + {.multisync.} = + ## Connects to the hostname specified by the URL and returns the content of a POST request. + let resp = await post(client, url, body, multipart) + return await responseContent(resp) + +proc put*(client: HttpClient | AsyncHttpClient, url: Uri | string, body = "", + multipart: MultipartData = nil): Future[Response | AsyncResponse] + {.multisync.} = + ## Connects to the hostname specified by the URL and performs a PUT request. + ## This procedure uses httpClient values such as `client.maxRedirects`. + result = await client.request(url, HttpPut, body, multipart=multipart) + +proc putContent*(client: HttpClient | AsyncHttpClient, url: Uri | string, body = "", + multipart: MultipartData = nil): Future[string] {.multisync.} = + ## Connects to the hostname specified by the URL andreturns the content of a PUT request. + let resp = await put(client, url, body, multipart) + return await responseContent(resp) + +proc patch*(client: HttpClient | AsyncHttpClient, url: Uri | string, body = "", + multipart: MultipartData = nil): Future[Response | AsyncResponse] + {.multisync.} = + ## Connects to the hostname specified by the URL and performs a PATCH request. + ## This procedure uses httpClient values such as `client.maxRedirects`. + result = await client.request(url, HttpPatch, body, multipart=multipart) + +proc patchContent*(client: HttpClient | AsyncHttpClient, url: Uri | string, body = "", + multipart: MultipartData = nil): Future[string] + {.multisync.} = + ## Connects to the hostname specified by the URL and returns the content of a PATCH request. + let resp = await patch(client, url, body, multipart) + return await responseContent(resp) + +proc downloadFile*(client: HttpClient, url: Uri | string, filename: string) = + ## Downloads `url` and saves it to `filename`. + client.getBody = false + defer: + client.getBody = true + let resp = client.get(url) + + if resp.code.is4xx or resp.code.is5xx: + raise newException(HttpRequestError, resp.status) + + client.bodyStream = newFileStream(filename, fmWrite) + if client.bodyStream.isNil: + fileError("Unable to open file") + parseBody(client, resp.headers, resp.version) + client.bodyStream.close() + +proc downloadFileEx(client: AsyncHttpClient, + url: Uri | string, filename: string): Future[void] {.async.} = + ## Downloads `url` and saves it to `filename`. + client.getBody = false + let resp = await client.get(url) + + if resp.code.is4xx or resp.code.is5xx: + raise newException(HttpRequestError, resp.status) + + client.bodyStream = newFutureStream[string]("downloadFile") + var file = openAsync(filename, fmWrite) + defer: file.close() + # Let `parseBody` write response data into client.bodyStream in the + # background. + let parseBodyFut = parseBody(client, resp.headers, resp.version) + parseBodyFut.addCallback do(): + if parseBodyFut.failed: + client.bodyStream.fail(parseBodyFut.error) + # The `writeFromStream` proc will complete once all the data in the + # `bodyStream` has been written to the file. + await file.writeFromStream(client.bodyStream) + +proc downloadFile*(client: AsyncHttpClient, url: Uri | string, + filename: string): Future[void] = + result = newFuture[void]("downloadFile") + try: + result = downloadFileEx(client, url, filename) + except Exception as exc: + result.fail(exc) + finally: + result.addCallback( + proc () = client.getBody = true + ) diff --git a/lib/pure/httpcore.nim b/lib/pure/httpcore.nim new file mode 100644 index 000000000..5ccab379c --- /dev/null +++ b/lib/pure/httpcore.nim @@ -0,0 +1,368 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2016 Dominik Picheta +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Contains functionality shared between the `httpclient` and +## `asynchttpserver` modules. +## +## Unstable API. +import std/private/since +import std/[tables, strutils, parseutils] + +type + HttpHeaders* = ref object + table*: TableRef[string, seq[string]] + isTitleCase: bool + + HttpHeaderValues* = distinct seq[string] + + # The range starts at '0' so that we don't have to explicitly initialise + # it. See: http://irclogs.nim-lang.org/19-09-2016.html#19:48:27 for context. + HttpCode* = distinct range[0 .. 599] + + HttpVersion* = enum + HttpVer11, + HttpVer10 + + HttpMethod* = enum ## the requested HttpMethod + HttpHead = "HEAD" ## Asks for the response identical to the one that + ## would correspond to a GET request, but without + ## the response body. + HttpGet = "GET" ## Retrieves the specified resource. + HttpPost = "POST" ## Submits data to be processed to the identified + ## resource. The data is included in the body of + ## the request. + HttpPut = "PUT" ## Uploads a representation of the specified + ## resource. + HttpDelete = "DELETE" ## Deletes the specified resource. + HttpTrace = "TRACE" ## Echoes back the received request, so that a + ## client + ## can see what intermediate servers are adding or + ## changing in the request. + HttpOptions = "OPTIONS" ## Returns the HTTP methods that the server + ## supports for specified address. + HttpConnect = "CONNECT" ## Converts the request connection to a transparent + ## TCP/IP tunnel, usually used for proxies. + HttpPatch = "PATCH" ## Applies partial modifications to a resource. + + +const + Http100* = HttpCode(100) + Http101* = HttpCode(101) + Http102* = HttpCode(102) ## https://tools.ietf.org/html/rfc2518.html WebDAV + Http103* = HttpCode(103) ## https://tools.ietf.org/html/rfc8297.html Early hints + Http200* = HttpCode(200) + Http201* = HttpCode(201) + Http202* = HttpCode(202) + Http203* = HttpCode(203) + Http204* = HttpCode(204) + Http205* = HttpCode(205) + Http206* = HttpCode(206) + Http207* = HttpCode(207) ## https://tools.ietf.org/html/rfc4918.html WebDAV + Http208* = HttpCode(208) ## https://tools.ietf.org/html/rfc5842.html WebDAV, Section 7.1 + Http226* = HttpCode(226) ## https://tools.ietf.org/html/rfc3229.html Delta encoding, Section 10.4.1 + Http300* = HttpCode(300) + Http301* = HttpCode(301) + Http302* = HttpCode(302) + Http303* = HttpCode(303) + Http304* = HttpCode(304) + Http305* = HttpCode(305) + Http307* = HttpCode(307) + Http308* = HttpCode(308) + Http400* = HttpCode(400) + Http401* = HttpCode(401) + Http402* = HttpCode(402) ## https://tools.ietf.org/html/rfc7231.html Payment required, Section 6.5.2 + Http403* = HttpCode(403) + Http404* = HttpCode(404) + Http405* = HttpCode(405) + Http406* = HttpCode(406) + Http407* = HttpCode(407) + Http408* = HttpCode(408) + Http409* = HttpCode(409) + Http410* = HttpCode(410) + Http411* = HttpCode(411) + Http412* = HttpCode(412) + Http413* = HttpCode(413) + Http414* = HttpCode(414) + Http415* = HttpCode(415) + Http416* = HttpCode(416) + Http417* = HttpCode(417) + Http418* = HttpCode(418) + Http421* = HttpCode(421) + Http422* = HttpCode(422) + Http423* = HttpCode(423) ## https://tools.ietf.org/html/rfc4918.html WebDAV, Section 11.3 + Http424* = HttpCode(424) ## https://tools.ietf.org/html/rfc4918.html WebDAV, Section 11.3 + Http425* = HttpCode(425) ## https://tools.ietf.org/html/rfc8470.html Early data + Http426* = HttpCode(426) + Http428* = HttpCode(428) + Http429* = HttpCode(429) + Http431* = HttpCode(431) + Http451* = HttpCode(451) + Http500* = HttpCode(500) + Http501* = HttpCode(501) + Http502* = HttpCode(502) + Http503* = HttpCode(503) + Http504* = HttpCode(504) + Http505* = HttpCode(505) + Http506* = HttpCode(506) ## https://tools.ietf.org/html/rfc2295.html Content negotiation, Section 8.1 + Http507* = HttpCode(507) ## https://tools.ietf.org/html/rfc4918.html WebDAV, Section 11.5 + Http508* = HttpCode(508) ## https://tools.ietf.org/html/rfc5842.html WebDAV, Section 7.2 + Http510* = HttpCode(510) ## https://tools.ietf.org/html/rfc2774.html Extension framework, Section 7 + Http511* = HttpCode(511) ## https://tools.ietf.org/html/rfc6585.html Additional status code, Section 6 + + +const httpNewLine* = "\c\L" +const headerLimit* = 10_000 + +func toTitleCase(s: string): string = + result = newString(len(s)) + var upper = true + for i in 0..len(s) - 1: + result[i] = if upper: toUpperAscii(s[i]) else: toLowerAscii(s[i]) + upper = s[i] == '-' + +func toCaseInsensitive*(headers: HttpHeaders, s: string): string {.inline.} = + ## For internal usage only. Do not use. + return if headers.isTitleCase: toTitleCase(s) else: toLowerAscii(s) + +func newHttpHeaders*(titleCase=false): HttpHeaders = + ## Returns a new `HttpHeaders` object. if `titleCase` is set to true, + ## headers are passed to the server in title case (e.g. "Content-Length") + result = HttpHeaders(table: newTable[string, seq[string]](), isTitleCase: titleCase) + +func newHttpHeaders*(keyValuePairs: + openArray[tuple[key: string, val: string]], titleCase=false): HttpHeaders = + ## Returns a new `HttpHeaders` object from an array. if `titleCase` is set to true, + ## headers are passed to the server in title case (e.g. "Content-Length") + result = HttpHeaders(table: newTable[string, seq[string]](), isTitleCase: titleCase) + + for pair in keyValuePairs: + let key = result.toCaseInsensitive(pair.key) + {.cast(noSideEffect).}: + if key in result.table: + result.table[key].add(pair.val) + else: + result.table[key] = @[pair.val] + +func `$`*(headers: HttpHeaders): string {.inline.} = + $headers.table + +proc clear*(headers: HttpHeaders) {.inline.} = + headers.table.clear() + +func `[]`*(headers: HttpHeaders, key: string): HttpHeaderValues = + ## Returns the values associated with the given `key`. If the returned + ## values are passed to a procedure expecting a `string`, the first + ## value is automatically picked. If there are + ## no values associated with the key, an exception is raised. + ## + ## To access multiple values of a key, use the overloaded `[]` below or + ## to get all of them access the `table` field directly. + {.cast(noSideEffect).}: + let tmp = headers.table[headers.toCaseInsensitive(key)] + return HttpHeaderValues(tmp) + +converter toString*(values: HttpHeaderValues): string = + return seq[string](values)[0] + +func `[]`*(headers: HttpHeaders, key: string, i: int): string = + ## Returns the `i`'th value associated with the given key. If there are + ## no values associated with the key or the `i`'th value doesn't exist, + ## an exception is raised. + {.cast(noSideEffect).}: + return headers.table[headers.toCaseInsensitive(key)][i] + +proc `[]=`*(headers: HttpHeaders, key, value: string) = + ## Sets the header entries associated with `key` to the specified value. + ## Replaces any existing values. + headers.table[headers.toCaseInsensitive(key)] = @[value] + +proc `[]=`*(headers: HttpHeaders, key: string, value: seq[string]) = + ## Sets the header entries associated with `key` to the specified list of + ## values. Replaces any existing values. If `value` is empty, + ## deletes the header entries associated with `key`. + if value.len > 0: + headers.table[headers.toCaseInsensitive(key)] = value + else: + headers.table.del(headers.toCaseInsensitive(key)) + +proc add*(headers: HttpHeaders, key, value: string) = + ## Adds the specified value to the specified key. Appends to any existing + ## values associated with the key. + if not headers.table.hasKey(headers.toCaseInsensitive(key)): + headers.table[headers.toCaseInsensitive(key)] = @[value] + else: + headers.table[headers.toCaseInsensitive(key)].add(value) + +proc del*(headers: HttpHeaders, key: string) = + ## Deletes the header entries associated with `key` + headers.table.del(headers.toCaseInsensitive(key)) + +iterator pairs*(headers: HttpHeaders): tuple[key, value: string] = + ## Yields each key, value pair. + for k, v in headers.table: + for value in v: + yield (k, value) + +func contains*(values: HttpHeaderValues, value: string): bool = + ## Determines if `value` is one of the values inside `values`. Comparison + ## is performed without case sensitivity. + for val in seq[string](values): + if val.toLowerAscii == value.toLowerAscii: return true + +func hasKey*(headers: HttpHeaders, key: string): bool = + return headers.table.hasKey(headers.toCaseInsensitive(key)) + +func getOrDefault*(headers: HttpHeaders, key: string, + default = @[""].HttpHeaderValues): HttpHeaderValues = + ## Returns the values associated with the given `key`. If there are no + ## values associated with the key, then `default` is returned. + if headers.hasKey(key): + return headers[key] + else: + return default + +func len*(headers: HttpHeaders): int {.inline.} = headers.table.len + +func parseList(line: string, list: var seq[string], start: int): int = + var i = 0 + var current = "" + while start+i < line.len and line[start + i] notin {'\c', '\l'}: + i += line.skipWhitespace(start + i) + i += line.parseUntil(current, {'\c', '\l', ','}, start + i) + list.add(move current) # implicit current.setLen(0) + if start+i < line.len and line[start + i] == ',': + i.inc # Skip , + +func parseHeader*(line: string): tuple[key: string, value: seq[string]] = + ## Parses a single raw header HTTP line into key value pairs. + ## + ## Used by `asynchttpserver` and `httpclient` internally and should not + ## be used by you. + result.value = @[] + var i = 0 + i = line.parseUntil(result.key, ':') + inc(i) # skip : + if i < len(line): + if cmpIgnoreCase(result.key, "cookie") == 0: + i += line.skipWhitespace(i) + result.value.add line.substr(i) + else: + i += parseList(line, result.value, i) + elif result.key.len > 0: + result.value = @[""] + else: + result.value = @[] + +func `==`*(protocol: tuple[orig: string, major, minor: int], + ver: HttpVersion): bool = + let major = + case ver + of HttpVer11, HttpVer10: 1 + let minor = + case ver + of HttpVer11: 1 + of HttpVer10: 0 + result = protocol.major == major and protocol.minor == minor + +func contains*(methods: set[HttpMethod], x: string): bool = + return parseEnum[HttpMethod](x) in methods + +func `$`*(code: HttpCode): string = + ## Converts the specified `HttpCode` into a HTTP status. + runnableExamples: + doAssert($Http404 == "404 Not Found") + case code.int + of 100: "100 Continue" + of 101: "101 Switching Protocols" + of 102: "102 Processing" + of 103: "103 Early Hints" + of 200: "200 OK" + of 201: "201 Created" + of 202: "202 Accepted" + of 203: "203 Non-Authoritative Information" + of 204: "204 No Content" + of 205: "205 Reset Content" + of 206: "206 Partial Content" + of 207: "207 Multi-Status" + of 208: "208 Already Reported" + of 226: "226 IM Used" + of 300: "300 Multiple Choices" + of 301: "301 Moved Permanently" + of 302: "302 Found" + of 303: "303 See Other" + of 304: "304 Not Modified" + of 305: "305 Use Proxy" + of 307: "307 Temporary Redirect" + of 308: "308 Permanent Redirect" + of 400: "400 Bad Request" + of 401: "401 Unauthorized" + of 402: "402 Payment Required" + of 403: "403 Forbidden" + of 404: "404 Not Found" + of 405: "405 Method Not Allowed" + of 406: "406 Not Acceptable" + of 407: "407 Proxy Authentication Required" + of 408: "408 Request Timeout" + of 409: "409 Conflict" + of 410: "410 Gone" + of 411: "411 Length Required" + of 412: "412 Precondition Failed" + of 413: "413 Request Entity Too Large" + of 414: "414 Request-URI Too Long" + of 415: "415 Unsupported Media Type" + of 416: "416 Requested Range Not Satisfiable" + of 417: "417 Expectation Failed" + of 418: "418 I'm a teapot" + of 421: "421 Misdirected Request" + of 422: "422 Unprocessable Entity" + of 423: "423 Locked" + of 424: "424 Failed Dependency" + of 425: "425 Too Early" + of 426: "426 Upgrade Required" + of 428: "428 Precondition Required" + of 429: "429 Too Many Requests" + of 431: "431 Request Header Fields Too Large" + of 451: "451 Unavailable For Legal Reasons" + of 500: "500 Internal Server Error" + of 501: "501 Not Implemented" + of 502: "502 Bad Gateway" + of 503: "503 Service Unavailable" + of 504: "504 Gateway Timeout" + of 505: "505 HTTP Version Not Supported" + of 506: "506 Variant Also Negotiates" + of 507: "507 Insufficient Storage" + of 508: "508 Loop Detected" + of 510: "510 Not Extended" + of 511: "511 Network Authentication Required" + else: $(int(code)) + +func `==`*(a, b: HttpCode): bool {.borrow.} + +func is1xx*(code: HttpCode): bool {.inline, since: (1, 5).} = + ## Determines whether `code` is a 1xx HTTP status code. + runnableExamples: + doAssert is1xx(HttpCode(103)) + + code.int in 100 .. 199 + +func is2xx*(code: HttpCode): bool {.inline.} = + ## Determines whether `code` is a 2xx HTTP status code. + code.int in 200 .. 299 + +func is3xx*(code: HttpCode): bool {.inline.} = + ## Determines whether `code` is a 3xx HTTP status code. + code.int in 300 .. 399 + +func is4xx*(code: HttpCode): bool {.inline.} = + ## Determines whether `code` is a 4xx HTTP status code. + code.int in 400 .. 499 + +func is5xx*(code: HttpCode): bool {.inline.} = + ## Determines whether `code` is a 5xx HTTP status code. + code.int in 500 .. 599 diff --git a/lib/pure/httpserver.nim b/lib/pure/httpserver.nim deleted file mode 100644 index 5efdbe297..000000000 --- a/lib/pure/httpserver.nim +++ /dev/null @@ -1,532 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2012 Andreas Rumpf, Dominik Picheta -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module implements a simple HTTP-Server. -## -## Example: -## -## .. code-block:: nim -## import strutils, sockets, httpserver -## -## var counter = 0 -## proc handleRequest(client: Socket, path, query: string): bool {.procvar.} = -## inc(counter) -## client.send("Hello for the $#th time." % $counter & wwwNL) -## return false # do not stop processing -## -## run(handleRequest, Port(80)) -## - -import parseutils, strutils, os, osproc, strtabs, streams, sockets, asyncio - -const - wwwNL* = "\r\L" - ServerSig = "Server: httpserver.nim/1.0.0" & wwwNL - -# --------------- output messages -------------------------------------------- - -proc sendTextContentType(client: Socket) = - send(client, "Content-type: text/html" & wwwNL) - send(client, wwwNL) - -proc sendStatus(client: Socket, status: string) = - send(client, "HTTP/1.1 " & status & wwwNL) - -proc badRequest(client: Socket) = - # Inform the client that a request it has made has a problem. - send(client, "HTTP/1.1 400 Bad Request" & wwwNL) - sendTextContentType(client) - send(client, "<p>Your browser sent a bad request, " & - "such as a POST without a Content-Length.</p>" & wwwNL) - -when false: - proc cannotExec(client: Socket) = - send(client, "HTTP/1.1 500 Internal Server Error" & wwwNL) - sendTextContentType(client) - send(client, "<P>Error prohibited CGI execution." & wwwNL) - -proc headers(client: Socket, filename: string) = - # XXX could use filename to determine file type - send(client, "HTTP/1.1 200 OK" & wwwNL) - send(client, ServerSig) - sendTextContentType(client) - -proc notFound(client: Socket) = - send(client, "HTTP/1.1 404 NOT FOUND" & wwwNL) - send(client, ServerSig) - sendTextContentType(client) - send(client, "<html><title>Not Found</title>" & wwwNL) - send(client, "<body><p>The server could not fulfill" & wwwNL) - send(client, "your request because the resource specified" & wwwNL) - send(client, "is unavailable or nonexistent.</p>" & wwwNL) - send(client, "</body></html>" & wwwNL) - -proc unimplemented(client: Socket) = - send(client, "HTTP/1.1 501 Method Not Implemented" & wwwNL) - send(client, ServerSig) - sendTextContentType(client) - send(client, "<html><head><title>Method Not Implemented" & - "</title></head>" & - "<body><p>HTTP request method not supported.</p>" & - "</body></HTML>" & wwwNL) - -# ----------------- file serving --------------------------------------------- - -when false: - proc discardHeaders(client: Socket) = skip(client) - -proc serveFile*(client: Socket, filename: string) = - ## serves a file to the client. - var f: File - if open(f, filename): - headers(client, filename) - const bufSize = 8000 # != 8K might be good for memory manager - var buf = alloc(bufsize) - while true: - var bytesread = readBuffer(f, buf, bufsize) - if bytesread > 0: - var byteswritten = send(client, buf, bytesread) - if bytesread != bytesWritten: - dealloc(buf) - close(f) - raiseOSError(osLastError()) - if bytesread != bufSize: break - dealloc(buf) - close(f) - else: - notFound(client) - -# ------------------ CGI execution ------------------------------------------- -when false: - # TODO: Fix this, or get rid of it. - type - TRequestMethod = enum reqGet, reqPost - - proc executeCgi(client: Socket, path, query: string, meth: TRequestMethod) = - var env = newStringTable(modeCaseInsensitive) - var contentLength = -1 - case meth - of reqGet: - discardHeaders(client) - - env["REQUEST_METHOD"] = "GET" - env["QUERY_STRING"] = query - of reqPost: - var buf = TaintedString"" - var dataAvail = false - while dataAvail: - dataAvail = recvLine(client, buf) # TODO: This is incorrect. - var L = toLower(buf.string) - if L.startsWith("content-length:"): - var i = len("content-length:") - while L[i] in Whitespace: inc(i) - contentLength = parseInt(substr(L, i)) - - if contentLength < 0: - badRequest(client) - return - - env["REQUEST_METHOD"] = "POST" - env["CONTENT_LENGTH"] = $contentLength - - send(client, "HTTP/1.0 200 OK" & wwwNL) - - var process = startProcess(command=path, env=env) - if meth == reqPost: - # get from client and post to CGI program: - var buf = alloc(contentLength) - if recv(client, buf, contentLength) != contentLength: - dealloc(buf) - raiseOSError() - var inp = process.inputStream - inp.writeData(buf, contentLength) - dealloc(buf) - - var outp = process.outputStream - var line = newStringOfCap(120).TaintedString - while true: - if outp.readLine(line): - send(client, line.string) - send(client, wwwNL) - elif not running(process): break - - # --------------- Server Setup ----------------------------------------------- - - proc acceptRequest(client: Socket) = - var cgi = false - var query = "" - var buf = TaintedString"" - discard recvLine(client, buf) - var path = "" - var data = buf.string.split() - var meth = reqGet - - var q = find(data[1], '?') - - # extract path - if q >= 0: - # strip "?..." from path, this may be found in both POST and GET - path = "." & data[1].substr(0, q-1) - else: - path = "." & data[1] - # path starts with "/", by adding "." in front of it we serve files from cwd - - if cmpIgnoreCase(data[0], "GET") == 0: - if q >= 0: - cgi = true - query = data[1].substr(q+1) - elif cmpIgnoreCase(data[0], "POST") == 0: - cgi = true - meth = reqPost - else: - unimplemented(client) - - if path[path.len-1] == '/' or existsDir(path): - path = path / "index.html" - - if not existsFile(path): - discardHeaders(client) - notFound(client) - else: - when defined(Windows): - var ext = splitFile(path).ext.toLower - if ext == ".exe" or ext == ".cgi": - # XXX: extract interpreter information here? - cgi = true - else: - if {fpUserExec, fpGroupExec, fpOthersExec} * path.getFilePermissions != {}: - cgi = true - if not cgi: - serveFile(client, path) - else: - executeCgi(client, path, query, meth) - -type - TServer* = object of RootObj ## contains the current server state - socket: Socket - port: Port - client*: Socket ## the socket to write the file data to - reqMethod*: string ## Request method. GET or POST. - path*, query*: string ## path and query the client requested - headers*: StringTableRef ## headers with which the client made the request - body*: string ## only set with POST requests - ip*: string ## ip address of the requesting client - - PAsyncHTTPServer* = ref TAsyncHTTPServer - TAsyncHTTPServer = object of TServer - asyncSocket: AsyncSocket - -proc open*(s: var TServer, port = Port(80), reuseAddr = false) = - ## creates a new server at port `port`. If ``port == 0`` a free port is - ## acquired that can be accessed later by the ``port`` proc. - s.socket = socket(AF_INET) - if s.socket == invalidSocket: raiseOSError(osLastError()) - if reuseAddr: - s.socket.setSockOpt(OptReuseAddr, true) - bindAddr(s.socket, port) - listen(s.socket) - - if port == Port(0): - s.port = getSockName(s.socket) - else: - s.port = port - s.client = invalidSocket - s.reqMethod = "" - s.body = "" - s.path = "" - s.query = "" - s.headers = {:}.newStringTable() - -proc port*(s: var TServer): Port = - ## get the port number the server has acquired. - result = s.port - -proc next*(s: var TServer) = - ## proceed to the first/next request. - var client: Socket - new(client) - var ip: string - acceptAddr(s.socket, client, ip) - s.client = client - s.ip = ip - s.headers = newStringTable(modeCaseInsensitive) - #headers(s.client, "") - var data = "" - s.client.readLine(data) - if data == "": - # Socket disconnected - s.client.close() - next(s) - return - var header = "" - while true: - s.client.readLine(header) - if header == "\c\L": break - if header != "": - var i = 0 - var key = "" - var value = "" - i = header.parseUntil(key, ':') - inc(i) # skip : - i += header.skipWhiteSpace(i) - i += header.parseUntil(value, {'\c', '\L'}, i) - s.headers[key] = value - else: - s.client.close() - next(s) - return - - var i = skipWhitespace(data) - if skipIgnoreCase(data, "GET") > 0: - s.reqMethod = "GET" - inc(i, 3) - elif skipIgnoreCase(data, "POST") > 0: - s.reqMethod = "POST" - inc(i, 4) - else: - unimplemented(s.client) - s.client.close() - next(s) - return - - if s.reqMethod == "POST": - # Check for Expect header - if s.headers.hasKey("Expect"): - if s.headers["Expect"].toLower == "100-continue": - s.client.sendStatus("100 Continue") - else: - s.client.sendStatus("417 Expectation Failed") - - # Read the body - # - Check for Content-length header - if s.headers.hasKey("Content-Length"): - var contentLength = 0 - if parseInt(s.headers["Content-Length"], contentLength) == 0: - badRequest(s.client) - s.client.close() - next(s) - return - else: - var totalRead = 0 - var totalBody = "" - while totalRead < contentLength: - var chunkSize = 8000 - if (contentLength - totalRead) < 8000: - chunkSize = (contentLength - totalRead) - var bodyData = newString(chunkSize) - var octetsRead = s.client.recv(cstring(bodyData), chunkSize) - if octetsRead <= 0: - s.client.close() - next(s) - return - totalRead += octetsRead - totalBody.add(bodyData) - if totalBody.len != contentLength: - s.client.close() - next(s) - return - - s.body = totalBody - else: - badRequest(s.client) - s.client.close() - next(s) - return - - var L = skipWhitespace(data, i) - inc(i, L) - # XXX we ignore "HTTP/1.1" etc. for now here - var query = 0 - var last = i - while last < data.len and data[last] notin Whitespace: - if data[last] == '?' and query == 0: query = last - inc(last) - if query > 0: - s.query = data.substr(query+1, last-1) - s.path = data.substr(i, query-1) - else: - s.query = "" - s.path = data.substr(i, last-1) - -proc close*(s: TServer) = - ## closes the server (and the socket the server uses). - close(s.socket) - -proc run*(handleRequest: proc (client: Socket, - path, query: string): bool {.closure.}, - port = Port(80)) = - ## encapsulates the server object and main loop - var s: TServer - open(s, port, reuseAddr = true) - #echo("httpserver running on port ", s.port) - while true: - next(s) - if handleRequest(s.client, s.path, s.query): break - close(s.client) - close(s) - -# -- AsyncIO begin - -proc nextAsync(s: PAsyncHTTPServer) = - ## proceed to the first/next request. - var client: Socket - new(client) - var ip: string - acceptAddr(getSocket(s.asyncSocket), client, ip) - s.client = client - s.ip = ip - s.headers = newStringTable(modeCaseInsensitive) - #headers(s.client, "") - var data = "" - s.client.readLine(data) - if data == "": - # Socket disconnected - s.client.close() - return - var header = "" - while true: - s.client.readLine(header) # TODO: Very inefficient here. Prone to DOS. - if header == "\c\L": break - if header != "": - var i = 0 - var key = "" - var value = "" - i = header.parseUntil(key, ':') - inc(i) # skip : - if i < header.len: - i += header.skipWhiteSpace(i) - i += header.parseUntil(value, {'\c', '\L'}, i) - s.headers[key] = value - else: - s.client.close() - return - - var i = skipWhitespace(data) - if skipIgnoreCase(data, "GET") > 0: - s.reqMethod = "GET" - inc(i, 3) - elif skipIgnoreCase(data, "POST") > 0: - s.reqMethod = "POST" - inc(i, 4) - else: - unimplemented(s.client) - s.client.close() - return - - if s.reqMethod == "POST": - # Check for Expect header - if s.headers.hasKey("Expect"): - if s.headers["Expect"].toLower == "100-continue": - s.client.sendStatus("100 Continue") - else: - s.client.sendStatus("417 Expectation Failed") - - # Read the body - # - Check for Content-length header - if s.headers.hasKey("Content-Length"): - var contentLength = 0 - if parseInt(s.headers["Content-Length"], contentLength) == 0: - badRequest(s.client) - s.client.close() - return - else: - var totalRead = 0 - var totalBody = "" - while totalRead < contentLength: - var chunkSize = 8000 - if (contentLength - totalRead) < 8000: - chunkSize = (contentLength - totalRead) - var bodyData = newString(chunkSize) - var octetsRead = s.client.recv(cstring(bodyData), chunkSize) - if octetsRead <= 0: - s.client.close() - return - totalRead += octetsRead - totalBody.add(bodyData) - if totalBody.len != contentLength: - s.client.close() - return - - s.body = totalBody - else: - badRequest(s.client) - s.client.close() - return - - var L = skipWhitespace(data, i) - inc(i, L) - # XXX we ignore "HTTP/1.1" etc. for now here - var query = 0 - var last = i - while last < data.len and data[last] notin Whitespace: - if data[last] == '?' and query == 0: query = last - inc(last) - if query > 0: - s.query = data.substr(query+1, last-1) - s.path = data.substr(i, query-1) - else: - s.query = "" - s.path = data.substr(i, last-1) - -proc asyncHTTPServer*(handleRequest: proc (server: PAsyncHTTPServer, client: Socket, - path, query: string): bool {.closure, gcsafe.}, - port = Port(80), address = "", - reuseAddr = false): PAsyncHTTPServer = - ## Creates an Asynchronous HTTP server at ``port``. - var capturedRet: PAsyncHTTPServer - new(capturedRet) - capturedRet.asyncSocket = asyncSocket() - capturedRet.asyncSocket.handleAccept = - proc (s: AsyncSocket) = - nextAsync(capturedRet) - let quit = handleRequest(capturedRet, capturedRet.client, capturedRet.path, - capturedRet.query) - if quit: capturedRet.asyncSocket.close() - if reuseAddr: - capturedRet.asyncSocket.setSockOpt(OptReuseAddr, true) - - capturedRet.asyncSocket.bindAddr(port, address) - capturedRet.asyncSocket.listen() - if port == Port(0): - capturedRet.port = getSockName(capturedRet.asyncSocket) - else: - capturedRet.port = port - - capturedRet.client = invalidSocket - capturedRet.reqMethod = "" - capturedRet.body = "" - capturedRet.path = "" - capturedRet.query = "" - capturedRet.headers = {:}.newStringTable() - result = capturedRet - -proc register*(d: Dispatcher, s: PAsyncHTTPServer) = - ## Registers a ``PAsyncHTTPServer`` with a ``Dispatcher``. - d.register(s.asyncSocket) - -proc close*(h: PAsyncHTTPServer) = - ## Closes the ``PAsyncHTTPServer``. - h.asyncSocket.close() - -when isMainModule: - var counter = 0 - - var s: TServer - open(s, Port(0)) - echo("httpserver running on port ", s.port) - while true: - next(s) - - inc(counter) - s.client.send("Hello, Andreas, for the $#th time. $# ? $#" % [ - $counter, s.path, s.query] & wwwNL) - - close(s.client) - close(s) - diff --git a/lib/pure/includes/unicode_ranges.nim b/lib/pure/includes/unicode_ranges.nim new file mode 100644 index 000000000..04ccfb747 --- /dev/null +++ b/lib/pure/includes/unicode_ranges.nim @@ -0,0 +1,2007 @@ +# This file was created from a script. + +const + toLowerRanges = [ + 0x00041'i32, 0x0005A'i32, 532, + 0x000C0'i32, 0x000D6'i32, 532, + 0x000D8'i32, 0x000DE'i32, 532, + 0x00189'i32, 0x0018A'i32, 705, + 0x001B1'i32, 0x001B2'i32, 717, + 0x00388'i32, 0x0038A'i32, 537, + 0x0038E'i32, 0x0038F'i32, 563, + 0x00391'i32, 0x003A1'i32, 532, + 0x003A3'i32, 0x003AB'i32, 532, + 0x003FD'i32, 0x003FF'i32, 370, + 0x00400'i32, 0x0040F'i32, 580, + 0x00410'i32, 0x0042F'i32, 532, + 0x00531'i32, 0x00556'i32, 548, + 0x010A0'i32, 0x010C5'i32, 7764, + 0x013A0'i32, 0x013EF'i32, 39364, + 0x013F0'i32, 0x013F5'i32, 508, + 0x01C90'i32, 0x01CBA'i32, -2508, + 0x01CBD'i32, 0x01CBF'i32, -2508, + 0x01F08'i32, 0x01F0F'i32, 492, + 0x01F18'i32, 0x01F1D'i32, 492, + 0x01F28'i32, 0x01F2F'i32, 492, + 0x01F38'i32, 0x01F3F'i32, 492, + 0x01F48'i32, 0x01F4D'i32, 492, + 0x01F68'i32, 0x01F6F'i32, 492, + 0x01F88'i32, 0x01F8F'i32, 492, + 0x01F98'i32, 0x01F9F'i32, 492, + 0x01FA8'i32, 0x01FAF'i32, 492, + 0x01FB8'i32, 0x01FB9'i32, 492, + 0x01FBA'i32, 0x01FBB'i32, 426, + 0x01FC8'i32, 0x01FCB'i32, 414, + 0x01FD8'i32, 0x01FD9'i32, 492, + 0x01FDA'i32, 0x01FDB'i32, 400, + 0x01FE8'i32, 0x01FE9'i32, 492, + 0x01FEA'i32, 0x01FEB'i32, 388, + 0x01FF8'i32, 0x01FF9'i32, 372, + 0x01FFA'i32, 0x01FFB'i32, 374, + 0x02C00'i32, 0x02C2E'i32, 548, + 0x02C7E'i32, 0x02C7F'i32, -10315, + 0x0FF21'i32, 0x0FF3A'i32, 532, + 0x10400'i32, 0x10427'i32, 540, + 0x104B0'i32, 0x104D3'i32, 540, + 0x10C80'i32, 0x10CB2'i32, 564, + 0x118A0'i32, 0x118BF'i32, 532, + 0x16E40'i32, 0x16E5F'i32, 532, + 0x1E900'i32, 0x1E921'i32, 534, + ] + + toLowerSinglets = [ + 0x00100'i32, 501, + 0x00102'i32, 501, + 0x00104'i32, 501, + 0x00106'i32, 501, + 0x00108'i32, 501, + 0x0010A'i32, 501, + 0x0010C'i32, 501, + 0x0010E'i32, 501, + 0x00110'i32, 501, + 0x00112'i32, 501, + 0x00114'i32, 501, + 0x00116'i32, 501, + 0x00118'i32, 501, + 0x0011A'i32, 501, + 0x0011C'i32, 501, + 0x0011E'i32, 501, + 0x00120'i32, 501, + 0x00122'i32, 501, + 0x00124'i32, 501, + 0x00126'i32, 501, + 0x00128'i32, 501, + 0x0012A'i32, 501, + 0x0012C'i32, 501, + 0x0012E'i32, 501, + 0x00130'i32, 301, + 0x00132'i32, 501, + 0x00134'i32, 501, + 0x00136'i32, 501, + 0x00139'i32, 501, + 0x0013B'i32, 501, + 0x0013D'i32, 501, + 0x0013F'i32, 501, + 0x00141'i32, 501, + 0x00143'i32, 501, + 0x00145'i32, 501, + 0x00147'i32, 501, + 0x0014A'i32, 501, + 0x0014C'i32, 501, + 0x0014E'i32, 501, + 0x00150'i32, 501, + 0x00152'i32, 501, + 0x00154'i32, 501, + 0x00156'i32, 501, + 0x00158'i32, 501, + 0x0015A'i32, 501, + 0x0015C'i32, 501, + 0x0015E'i32, 501, + 0x00160'i32, 501, + 0x00162'i32, 501, + 0x00164'i32, 501, + 0x00166'i32, 501, + 0x00168'i32, 501, + 0x0016A'i32, 501, + 0x0016C'i32, 501, + 0x0016E'i32, 501, + 0x00170'i32, 501, + 0x00172'i32, 501, + 0x00174'i32, 501, + 0x00176'i32, 501, + 0x00178'i32, 379, + 0x00179'i32, 501, + 0x0017B'i32, 501, + 0x0017D'i32, 501, + 0x00181'i32, 710, + 0x00182'i32, 501, + 0x00184'i32, 501, + 0x00186'i32, 706, + 0x00187'i32, 501, + 0x0018B'i32, 501, + 0x0018E'i32, 579, + 0x0018F'i32, 702, + 0x00190'i32, 703, + 0x00191'i32, 501, + 0x00193'i32, 705, + 0x00194'i32, 707, + 0x00196'i32, 711, + 0x00197'i32, 709, + 0x00198'i32, 501, + 0x0019C'i32, 711, + 0x0019D'i32, 713, + 0x0019F'i32, 714, + 0x001A0'i32, 501, + 0x001A2'i32, 501, + 0x001A4'i32, 501, + 0x001A6'i32, 718, + 0x001A7'i32, 501, + 0x001A9'i32, 718, + 0x001AC'i32, 501, + 0x001AE'i32, 718, + 0x001AF'i32, 501, + 0x001B3'i32, 501, + 0x001B5'i32, 501, + 0x001B7'i32, 719, + 0x001B8'i32, 501, + 0x001BC'i32, 501, + 0x001C4'i32, 502, + 0x001C5'i32, 501, + 0x001C7'i32, 502, + 0x001C8'i32, 501, + 0x001CA'i32, 502, + 0x001CB'i32, 501, + 0x001CD'i32, 501, + 0x001CF'i32, 501, + 0x001D1'i32, 501, + 0x001D3'i32, 501, + 0x001D5'i32, 501, + 0x001D7'i32, 501, + 0x001D9'i32, 501, + 0x001DB'i32, 501, + 0x001DE'i32, 501, + 0x001E0'i32, 501, + 0x001E2'i32, 501, + 0x001E4'i32, 501, + 0x001E6'i32, 501, + 0x001E8'i32, 501, + 0x001EA'i32, 501, + 0x001EC'i32, 501, + 0x001EE'i32, 501, + 0x001F1'i32, 502, + 0x001F2'i32, 501, + 0x001F4'i32, 501, + 0x001F6'i32, 403, + 0x001F7'i32, 444, + 0x001F8'i32, 501, + 0x001FA'i32, 501, + 0x001FC'i32, 501, + 0x001FE'i32, 501, + 0x00200'i32, 501, + 0x00202'i32, 501, + 0x00204'i32, 501, + 0x00206'i32, 501, + 0x00208'i32, 501, + 0x0020A'i32, 501, + 0x0020C'i32, 501, + 0x0020E'i32, 501, + 0x00210'i32, 501, + 0x00212'i32, 501, + 0x00214'i32, 501, + 0x00216'i32, 501, + 0x00218'i32, 501, + 0x0021A'i32, 501, + 0x0021C'i32, 501, + 0x0021E'i32, 501, + 0x00220'i32, 370, + 0x00222'i32, 501, + 0x00224'i32, 501, + 0x00226'i32, 501, + 0x00228'i32, 501, + 0x0022A'i32, 501, + 0x0022C'i32, 501, + 0x0022E'i32, 501, + 0x00230'i32, 501, + 0x00232'i32, 501, + 0x0023A'i32, 11295, + 0x0023B'i32, 501, + 0x0023D'i32, 337, + 0x0023E'i32, 11292, + 0x00241'i32, 501, + 0x00243'i32, 305, + 0x00244'i32, 569, + 0x00245'i32, 571, + 0x00246'i32, 501, + 0x00248'i32, 501, + 0x0024A'i32, 501, + 0x0024C'i32, 501, + 0x0024E'i32, 501, + 0x00370'i32, 501, + 0x00372'i32, 501, + 0x00376'i32, 501, + 0x0037F'i32, 616, + 0x00386'i32, 538, + 0x0038C'i32, 564, + 0x003CF'i32, 508, + 0x003D8'i32, 501, + 0x003DA'i32, 501, + 0x003DC'i32, 501, + 0x003DE'i32, 501, + 0x003E0'i32, 501, + 0x003E2'i32, 501, + 0x003E4'i32, 501, + 0x003E6'i32, 501, + 0x003E8'i32, 501, + 0x003EA'i32, 501, + 0x003EC'i32, 501, + 0x003EE'i32, 501, + 0x003F4'i32, 440, + 0x003F7'i32, 501, + 0x003F9'i32, 493, + 0x003FA'i32, 501, + 0x00460'i32, 501, + 0x00462'i32, 501, + 0x00464'i32, 501, + 0x00466'i32, 501, + 0x00468'i32, 501, + 0x0046A'i32, 501, + 0x0046C'i32, 501, + 0x0046E'i32, 501, + 0x00470'i32, 501, + 0x00472'i32, 501, + 0x00474'i32, 501, + 0x00476'i32, 501, + 0x00478'i32, 501, + 0x0047A'i32, 501, + 0x0047C'i32, 501, + 0x0047E'i32, 501, + 0x00480'i32, 501, + 0x0048A'i32, 501, + 0x0048C'i32, 501, + 0x0048E'i32, 501, + 0x00490'i32, 501, + 0x00492'i32, 501, + 0x00494'i32, 501, + 0x00496'i32, 501, + 0x00498'i32, 501, + 0x0049A'i32, 501, + 0x0049C'i32, 501, + 0x0049E'i32, 501, + 0x004A0'i32, 501, + 0x004A2'i32, 501, + 0x004A4'i32, 501, + 0x004A6'i32, 501, + 0x004A8'i32, 501, + 0x004AA'i32, 501, + 0x004AC'i32, 501, + 0x004AE'i32, 501, + 0x004B0'i32, 501, + 0x004B2'i32, 501, + 0x004B4'i32, 501, + 0x004B6'i32, 501, + 0x004B8'i32, 501, + 0x004BA'i32, 501, + 0x004BC'i32, 501, + 0x004BE'i32, 501, + 0x004C0'i32, 515, + 0x004C1'i32, 501, + 0x004C3'i32, 501, + 0x004C5'i32, 501, + 0x004C7'i32, 501, + 0x004C9'i32, 501, + 0x004CB'i32, 501, + 0x004CD'i32, 501, + 0x004D0'i32, 501, + 0x004D2'i32, 501, + 0x004D4'i32, 501, + 0x004D6'i32, 501, + 0x004D8'i32, 501, + 0x004DA'i32, 501, + 0x004DC'i32, 501, + 0x004DE'i32, 501, + 0x004E0'i32, 501, + 0x004E2'i32, 501, + 0x004E4'i32, 501, + 0x004E6'i32, 501, + 0x004E8'i32, 501, + 0x004EA'i32, 501, + 0x004EC'i32, 501, + 0x004EE'i32, 501, + 0x004F0'i32, 501, + 0x004F2'i32, 501, + 0x004F4'i32, 501, + 0x004F6'i32, 501, + 0x004F8'i32, 501, + 0x004FA'i32, 501, + 0x004FC'i32, 501, + 0x004FE'i32, 501, + 0x00500'i32, 501, + 0x00502'i32, 501, + 0x00504'i32, 501, + 0x00506'i32, 501, + 0x00508'i32, 501, + 0x0050A'i32, 501, + 0x0050C'i32, 501, + 0x0050E'i32, 501, + 0x00510'i32, 501, + 0x00512'i32, 501, + 0x00514'i32, 501, + 0x00516'i32, 501, + 0x00518'i32, 501, + 0x0051A'i32, 501, + 0x0051C'i32, 501, + 0x0051E'i32, 501, + 0x00520'i32, 501, + 0x00522'i32, 501, + 0x00524'i32, 501, + 0x00526'i32, 501, + 0x00528'i32, 501, + 0x0052A'i32, 501, + 0x0052C'i32, 501, + 0x0052E'i32, 501, + 0x010C7'i32, 7764, + 0x010CD'i32, 7764, + 0x01E00'i32, 501, + 0x01E02'i32, 501, + 0x01E04'i32, 501, + 0x01E06'i32, 501, + 0x01E08'i32, 501, + 0x01E0A'i32, 501, + 0x01E0C'i32, 501, + 0x01E0E'i32, 501, + 0x01E10'i32, 501, + 0x01E12'i32, 501, + 0x01E14'i32, 501, + 0x01E16'i32, 501, + 0x01E18'i32, 501, + 0x01E1A'i32, 501, + 0x01E1C'i32, 501, + 0x01E1E'i32, 501, + 0x01E20'i32, 501, + 0x01E22'i32, 501, + 0x01E24'i32, 501, + 0x01E26'i32, 501, + 0x01E28'i32, 501, + 0x01E2A'i32, 501, + 0x01E2C'i32, 501, + 0x01E2E'i32, 501, + 0x01E30'i32, 501, + 0x01E32'i32, 501, + 0x01E34'i32, 501, + 0x01E36'i32, 501, + 0x01E38'i32, 501, + 0x01E3A'i32, 501, + 0x01E3C'i32, 501, + 0x01E3E'i32, 501, + 0x01E40'i32, 501, + 0x01E42'i32, 501, + 0x01E44'i32, 501, + 0x01E46'i32, 501, + 0x01E48'i32, 501, + 0x01E4A'i32, 501, + 0x01E4C'i32, 501, + 0x01E4E'i32, 501, + 0x01E50'i32, 501, + 0x01E52'i32, 501, + 0x01E54'i32, 501, + 0x01E56'i32, 501, + 0x01E58'i32, 501, + 0x01E5A'i32, 501, + 0x01E5C'i32, 501, + 0x01E5E'i32, 501, + 0x01E60'i32, 501, + 0x01E62'i32, 501, + 0x01E64'i32, 501, + 0x01E66'i32, 501, + 0x01E68'i32, 501, + 0x01E6A'i32, 501, + 0x01E6C'i32, 501, + 0x01E6E'i32, 501, + 0x01E70'i32, 501, + 0x01E72'i32, 501, + 0x01E74'i32, 501, + 0x01E76'i32, 501, + 0x01E78'i32, 501, + 0x01E7A'i32, 501, + 0x01E7C'i32, 501, + 0x01E7E'i32, 501, + 0x01E80'i32, 501, + 0x01E82'i32, 501, + 0x01E84'i32, 501, + 0x01E86'i32, 501, + 0x01E88'i32, 501, + 0x01E8A'i32, 501, + 0x01E8C'i32, 501, + 0x01E8E'i32, 501, + 0x01E90'i32, 501, + 0x01E92'i32, 501, + 0x01E94'i32, 501, + 0x01E9E'i32, -7115, + 0x01EA0'i32, 501, + 0x01EA2'i32, 501, + 0x01EA4'i32, 501, + 0x01EA6'i32, 501, + 0x01EA8'i32, 501, + 0x01EAA'i32, 501, + 0x01EAC'i32, 501, + 0x01EAE'i32, 501, + 0x01EB0'i32, 501, + 0x01EB2'i32, 501, + 0x01EB4'i32, 501, + 0x01EB6'i32, 501, + 0x01EB8'i32, 501, + 0x01EBA'i32, 501, + 0x01EBC'i32, 501, + 0x01EBE'i32, 501, + 0x01EC0'i32, 501, + 0x01EC2'i32, 501, + 0x01EC4'i32, 501, + 0x01EC6'i32, 501, + 0x01EC8'i32, 501, + 0x01ECA'i32, 501, + 0x01ECC'i32, 501, + 0x01ECE'i32, 501, + 0x01ED0'i32, 501, + 0x01ED2'i32, 501, + 0x01ED4'i32, 501, + 0x01ED6'i32, 501, + 0x01ED8'i32, 501, + 0x01EDA'i32, 501, + 0x01EDC'i32, 501, + 0x01EDE'i32, 501, + 0x01EE0'i32, 501, + 0x01EE2'i32, 501, + 0x01EE4'i32, 501, + 0x01EE6'i32, 501, + 0x01EE8'i32, 501, + 0x01EEA'i32, 501, + 0x01EEC'i32, 501, + 0x01EEE'i32, 501, + 0x01EF0'i32, 501, + 0x01EF2'i32, 501, + 0x01EF4'i32, 501, + 0x01EF6'i32, 501, + 0x01EF8'i32, 501, + 0x01EFA'i32, 501, + 0x01EFC'i32, 501, + 0x01EFE'i32, 501, + 0x01F59'i32, 492, + 0x01F5B'i32, 492, + 0x01F5D'i32, 492, + 0x01F5F'i32, 492, + 0x01FBC'i32, 491, + 0x01FCC'i32, 491, + 0x01FEC'i32, 493, + 0x01FFC'i32, 491, + 0x02126'i32, -7017, + 0x0212A'i32, -7883, + 0x0212B'i32, -7762, + 0x02132'i32, 528, + 0x02183'i32, 501, + 0x02C60'i32, 501, + 0x02C62'i32, -10243, + 0x02C63'i32, -3314, + 0x02C64'i32, -10227, + 0x02C67'i32, 501, + 0x02C69'i32, 501, + 0x02C6B'i32, 501, + 0x02C6D'i32, -10280, + 0x02C6E'i32, -10249, + 0x02C6F'i32, -10283, + 0x02C70'i32, -10282, + 0x02C72'i32, 501, + 0x02C75'i32, 501, + 0x02C80'i32, 501, + 0x02C82'i32, 501, + 0x02C84'i32, 501, + 0x02C86'i32, 501, + 0x02C88'i32, 501, + 0x02C8A'i32, 501, + 0x02C8C'i32, 501, + 0x02C8E'i32, 501, + 0x02C90'i32, 501, + 0x02C92'i32, 501, + 0x02C94'i32, 501, + 0x02C96'i32, 501, + 0x02C98'i32, 501, + 0x02C9A'i32, 501, + 0x02C9C'i32, 501, + 0x02C9E'i32, 501, + 0x02CA0'i32, 501, + 0x02CA2'i32, 501, + 0x02CA4'i32, 501, + 0x02CA6'i32, 501, + 0x02CA8'i32, 501, + 0x02CAA'i32, 501, + 0x02CAC'i32, 501, + 0x02CAE'i32, 501, + 0x02CB0'i32, 501, + 0x02CB2'i32, 501, + 0x02CB4'i32, 501, + 0x02CB6'i32, 501, + 0x02CB8'i32, 501, + 0x02CBA'i32, 501, + 0x02CBC'i32, 501, + 0x02CBE'i32, 501, + 0x02CC0'i32, 501, + 0x02CC2'i32, 501, + 0x02CC4'i32, 501, + 0x02CC6'i32, 501, + 0x02CC8'i32, 501, + 0x02CCA'i32, 501, + 0x02CCC'i32, 501, + 0x02CCE'i32, 501, + 0x02CD0'i32, 501, + 0x02CD2'i32, 501, + 0x02CD4'i32, 501, + 0x02CD6'i32, 501, + 0x02CD8'i32, 501, + 0x02CDA'i32, 501, + 0x02CDC'i32, 501, + 0x02CDE'i32, 501, + 0x02CE0'i32, 501, + 0x02CE2'i32, 501, + 0x02CEB'i32, 501, + 0x02CED'i32, 501, + 0x02CF2'i32, 501, + 0x0A640'i32, 501, + 0x0A642'i32, 501, + 0x0A644'i32, 501, + 0x0A646'i32, 501, + 0x0A648'i32, 501, + 0x0A64A'i32, 501, + 0x0A64C'i32, 501, + 0x0A64E'i32, 501, + 0x0A650'i32, 501, + 0x0A652'i32, 501, + 0x0A654'i32, 501, + 0x0A656'i32, 501, + 0x0A658'i32, 501, + 0x0A65A'i32, 501, + 0x0A65C'i32, 501, + 0x0A65E'i32, 501, + 0x0A660'i32, 501, + 0x0A662'i32, 501, + 0x0A664'i32, 501, + 0x0A666'i32, 501, + 0x0A668'i32, 501, + 0x0A66A'i32, 501, + 0x0A66C'i32, 501, + 0x0A680'i32, 501, + 0x0A682'i32, 501, + 0x0A684'i32, 501, + 0x0A686'i32, 501, + 0x0A688'i32, 501, + 0x0A68A'i32, 501, + 0x0A68C'i32, 501, + 0x0A68E'i32, 501, + 0x0A690'i32, 501, + 0x0A692'i32, 501, + 0x0A694'i32, 501, + 0x0A696'i32, 501, + 0x0A698'i32, 501, + 0x0A69A'i32, 501, + 0x0A722'i32, 501, + 0x0A724'i32, 501, + 0x0A726'i32, 501, + 0x0A728'i32, 501, + 0x0A72A'i32, 501, + 0x0A72C'i32, 501, + 0x0A72E'i32, 501, + 0x0A732'i32, 501, + 0x0A734'i32, 501, + 0x0A736'i32, 501, + 0x0A738'i32, 501, + 0x0A73A'i32, 501, + 0x0A73C'i32, 501, + 0x0A73E'i32, 501, + 0x0A740'i32, 501, + 0x0A742'i32, 501, + 0x0A744'i32, 501, + 0x0A746'i32, 501, + 0x0A748'i32, 501, + 0x0A74A'i32, 501, + 0x0A74C'i32, 501, + 0x0A74E'i32, 501, + 0x0A750'i32, 501, + 0x0A752'i32, 501, + 0x0A754'i32, 501, + 0x0A756'i32, 501, + 0x0A758'i32, 501, + 0x0A75A'i32, 501, + 0x0A75C'i32, 501, + 0x0A75E'i32, 501, + 0x0A760'i32, 501, + 0x0A762'i32, 501, + 0x0A764'i32, 501, + 0x0A766'i32, 501, + 0x0A768'i32, 501, + 0x0A76A'i32, 501, + 0x0A76C'i32, 501, + 0x0A76E'i32, 501, + 0x0A779'i32, 501, + 0x0A77B'i32, 501, + 0x0A77D'i32, -34832, + 0x0A77E'i32, 501, + 0x0A780'i32, 501, + 0x0A782'i32, 501, + 0x0A784'i32, 501, + 0x0A786'i32, 501, + 0x0A78B'i32, 501, + 0x0A78D'i32, -41780, + 0x0A790'i32, 501, + 0x0A792'i32, 501, + 0x0A796'i32, 501, + 0x0A798'i32, 501, + 0x0A79A'i32, 501, + 0x0A79C'i32, 501, + 0x0A79E'i32, 501, + 0x0A7A0'i32, 501, + 0x0A7A2'i32, 501, + 0x0A7A4'i32, 501, + 0x0A7A6'i32, 501, + 0x0A7A8'i32, 501, + 0x0A7AA'i32, -41808, + 0x0A7AB'i32, -41819, + 0x0A7AC'i32, -41815, + 0x0A7AD'i32, -41805, + 0x0A7AE'i32, -41808, + 0x0A7B0'i32, -41758, + 0x0A7B1'i32, -41782, + 0x0A7B2'i32, -41761, + 0x0A7B3'i32, 1428, + 0x0A7B4'i32, 501, + 0x0A7B6'i32, 501, + 0x0A7B8'i32, 501, + 0x0A7BA'i32, 501, + 0x0A7BC'i32, 501, + 0x0A7BE'i32, 501, + 0x0A7C2'i32, 501, + 0x0A7C4'i32, 452, + 0x0A7C5'i32, -41807, + 0x0A7C6'i32, -34884, + ] + + toUpperRanges = [ + 0x00061'i32, 0x0007A'i32, 468, + 0x000E0'i32, 0x000F6'i32, 468, + 0x000F8'i32, 0x000FE'i32, 468, + 0x0023F'i32, 0x00240'i32, 11315, + 0x00256'i32, 0x00257'i32, 295, + 0x0028A'i32, 0x0028B'i32, 283, + 0x0037B'i32, 0x0037D'i32, 630, + 0x003AD'i32, 0x003AF'i32, 463, + 0x003B1'i32, 0x003C1'i32, 468, + 0x003C3'i32, 0x003CB'i32, 468, + 0x003CD'i32, 0x003CE'i32, 437, + 0x00430'i32, 0x0044F'i32, 468, + 0x00450'i32, 0x0045F'i32, 420, + 0x00561'i32, 0x00586'i32, 452, + 0x010D0'i32, 0x010FA'i32, 3508, + 0x010FD'i32, 0x010FF'i32, 3508, + 0x013F8'i32, 0x013FD'i32, 492, + 0x01C83'i32, 0x01C84'i32, -5742, + 0x01F00'i32, 0x01F07'i32, 508, + 0x01F10'i32, 0x01F15'i32, 508, + 0x01F20'i32, 0x01F27'i32, 508, + 0x01F30'i32, 0x01F37'i32, 508, + 0x01F40'i32, 0x01F45'i32, 508, + 0x01F60'i32, 0x01F67'i32, 508, + 0x01F70'i32, 0x01F71'i32, 574, + 0x01F72'i32, 0x01F75'i32, 586, + 0x01F76'i32, 0x01F77'i32, 600, + 0x01F78'i32, 0x01F79'i32, 628, + 0x01F7A'i32, 0x01F7B'i32, 612, + 0x01F7C'i32, 0x01F7D'i32, 626, + 0x01F80'i32, 0x01F87'i32, 508, + 0x01F90'i32, 0x01F97'i32, 508, + 0x01FA0'i32, 0x01FA7'i32, 508, + 0x01FB0'i32, 0x01FB1'i32, 508, + 0x01FD0'i32, 0x01FD1'i32, 508, + 0x01FE0'i32, 0x01FE1'i32, 508, + 0x02C30'i32, 0x02C5E'i32, 452, + 0x02D00'i32, 0x02D25'i32, -6764, + 0x0AB70'i32, 0x0ABBF'i32, -38364, + 0x0FF41'i32, 0x0FF5A'i32, 468, + 0x10428'i32, 0x1044F'i32, 460, + 0x104D8'i32, 0x104FB'i32, 460, + 0x10CC0'i32, 0x10CF2'i32, 436, + 0x118C0'i32, 0x118DF'i32, 468, + 0x16E60'i32, 0x16E7F'i32, 468, + 0x1E922'i32, 0x1E943'i32, 466, + ] + + toUpperSinglets = [ + 0x000B5'i32, 1243, + 0x000FF'i32, 621, + 0x00101'i32, 499, + 0x00103'i32, 499, + 0x00105'i32, 499, + 0x00107'i32, 499, + 0x00109'i32, 499, + 0x0010B'i32, 499, + 0x0010D'i32, 499, + 0x0010F'i32, 499, + 0x00111'i32, 499, + 0x00113'i32, 499, + 0x00115'i32, 499, + 0x00117'i32, 499, + 0x00119'i32, 499, + 0x0011B'i32, 499, + 0x0011D'i32, 499, + 0x0011F'i32, 499, + 0x00121'i32, 499, + 0x00123'i32, 499, + 0x00125'i32, 499, + 0x00127'i32, 499, + 0x00129'i32, 499, + 0x0012B'i32, 499, + 0x0012D'i32, 499, + 0x0012F'i32, 499, + 0x00131'i32, 268, + 0x00133'i32, 499, + 0x00135'i32, 499, + 0x00137'i32, 499, + 0x0013A'i32, 499, + 0x0013C'i32, 499, + 0x0013E'i32, 499, + 0x00140'i32, 499, + 0x00142'i32, 499, + 0x00144'i32, 499, + 0x00146'i32, 499, + 0x00148'i32, 499, + 0x0014B'i32, 499, + 0x0014D'i32, 499, + 0x0014F'i32, 499, + 0x00151'i32, 499, + 0x00153'i32, 499, + 0x00155'i32, 499, + 0x00157'i32, 499, + 0x00159'i32, 499, + 0x0015B'i32, 499, + 0x0015D'i32, 499, + 0x0015F'i32, 499, + 0x00161'i32, 499, + 0x00163'i32, 499, + 0x00165'i32, 499, + 0x00167'i32, 499, + 0x00169'i32, 499, + 0x0016B'i32, 499, + 0x0016D'i32, 499, + 0x0016F'i32, 499, + 0x00171'i32, 499, + 0x00173'i32, 499, + 0x00175'i32, 499, + 0x00177'i32, 499, + 0x0017A'i32, 499, + 0x0017C'i32, 499, + 0x0017E'i32, 499, + 0x0017F'i32, 200, + 0x00180'i32, 695, + 0x00183'i32, 499, + 0x00185'i32, 499, + 0x00188'i32, 499, + 0x0018C'i32, 499, + 0x00192'i32, 499, + 0x00195'i32, 597, + 0x00199'i32, 499, + 0x0019A'i32, 663, + 0x0019E'i32, 630, + 0x001A1'i32, 499, + 0x001A3'i32, 499, + 0x001A5'i32, 499, + 0x001A8'i32, 499, + 0x001AD'i32, 499, + 0x001B0'i32, 499, + 0x001B4'i32, 499, + 0x001B6'i32, 499, + 0x001B9'i32, 499, + 0x001BD'i32, 499, + 0x001BF'i32, 556, + 0x001C5'i32, 499, + 0x001C6'i32, 498, + 0x001C8'i32, 499, + 0x001C9'i32, 498, + 0x001CB'i32, 499, + 0x001CC'i32, 498, + 0x001CE'i32, 499, + 0x001D0'i32, 499, + 0x001D2'i32, 499, + 0x001D4'i32, 499, + 0x001D6'i32, 499, + 0x001D8'i32, 499, + 0x001DA'i32, 499, + 0x001DC'i32, 499, + 0x001DD'i32, 421, + 0x001DF'i32, 499, + 0x001E1'i32, 499, + 0x001E3'i32, 499, + 0x001E5'i32, 499, + 0x001E7'i32, 499, + 0x001E9'i32, 499, + 0x001EB'i32, 499, + 0x001ED'i32, 499, + 0x001EF'i32, 499, + 0x001F2'i32, 499, + 0x001F3'i32, 498, + 0x001F5'i32, 499, + 0x001F9'i32, 499, + 0x001FB'i32, 499, + 0x001FD'i32, 499, + 0x001FF'i32, 499, + 0x00201'i32, 499, + 0x00203'i32, 499, + 0x00205'i32, 499, + 0x00207'i32, 499, + 0x00209'i32, 499, + 0x0020B'i32, 499, + 0x0020D'i32, 499, + 0x0020F'i32, 499, + 0x00211'i32, 499, + 0x00213'i32, 499, + 0x00215'i32, 499, + 0x00217'i32, 499, + 0x00219'i32, 499, + 0x0021B'i32, 499, + 0x0021D'i32, 499, + 0x0021F'i32, 499, + 0x00223'i32, 499, + 0x00225'i32, 499, + 0x00227'i32, 499, + 0x00229'i32, 499, + 0x0022B'i32, 499, + 0x0022D'i32, 499, + 0x0022F'i32, 499, + 0x00231'i32, 499, + 0x00233'i32, 499, + 0x0023C'i32, 499, + 0x00242'i32, 499, + 0x00247'i32, 499, + 0x00249'i32, 499, + 0x0024B'i32, 499, + 0x0024D'i32, 499, + 0x0024F'i32, 499, + 0x00250'i32, 11283, + 0x00251'i32, 11280, + 0x00252'i32, 11282, + 0x00253'i32, 290, + 0x00254'i32, 294, + 0x00259'i32, 298, + 0x0025B'i32, 297, + 0x0025C'i32, 42819, + 0x00260'i32, 295, + 0x00261'i32, 42815, + 0x00263'i32, 293, + 0x00265'i32, 42780, + 0x00266'i32, 42808, + 0x00268'i32, 291, + 0x00269'i32, 289, + 0x0026A'i32, 42808, + 0x0026B'i32, 11243, + 0x0026C'i32, 42805, + 0x0026F'i32, 289, + 0x00271'i32, 11249, + 0x00272'i32, 287, + 0x00275'i32, 286, + 0x0027D'i32, 11227, + 0x00280'i32, 282, + 0x00282'i32, 42807, + 0x00283'i32, 282, + 0x00287'i32, 42782, + 0x00288'i32, 282, + 0x00289'i32, 431, + 0x0028C'i32, 429, + 0x00292'i32, 281, + 0x0029D'i32, 42761, + 0x0029E'i32, 42758, + 0x00371'i32, 499, + 0x00373'i32, 499, + 0x00377'i32, 499, + 0x003AC'i32, 462, + 0x003C2'i32, 469, + 0x003CC'i32, 436, + 0x003D0'i32, 438, + 0x003D1'i32, 443, + 0x003D5'i32, 453, + 0x003D6'i32, 446, + 0x003D7'i32, 492, + 0x003D9'i32, 499, + 0x003DB'i32, 499, + 0x003DD'i32, 499, + 0x003DF'i32, 499, + 0x003E1'i32, 499, + 0x003E3'i32, 499, + 0x003E5'i32, 499, + 0x003E7'i32, 499, + 0x003E9'i32, 499, + 0x003EB'i32, 499, + 0x003ED'i32, 499, + 0x003EF'i32, 499, + 0x003F0'i32, 414, + 0x003F1'i32, 420, + 0x003F2'i32, 507, + 0x003F3'i32, 384, + 0x003F5'i32, 404, + 0x003F8'i32, 499, + 0x003FB'i32, 499, + 0x00461'i32, 499, + 0x00463'i32, 499, + 0x00465'i32, 499, + 0x00467'i32, 499, + 0x00469'i32, 499, + 0x0046B'i32, 499, + 0x0046D'i32, 499, + 0x0046F'i32, 499, + 0x00471'i32, 499, + 0x00473'i32, 499, + 0x00475'i32, 499, + 0x00477'i32, 499, + 0x00479'i32, 499, + 0x0047B'i32, 499, + 0x0047D'i32, 499, + 0x0047F'i32, 499, + 0x00481'i32, 499, + 0x0048B'i32, 499, + 0x0048D'i32, 499, + 0x0048F'i32, 499, + 0x00491'i32, 499, + 0x00493'i32, 499, + 0x00495'i32, 499, + 0x00497'i32, 499, + 0x00499'i32, 499, + 0x0049B'i32, 499, + 0x0049D'i32, 499, + 0x0049F'i32, 499, + 0x004A1'i32, 499, + 0x004A3'i32, 499, + 0x004A5'i32, 499, + 0x004A7'i32, 499, + 0x004A9'i32, 499, + 0x004AB'i32, 499, + 0x004AD'i32, 499, + 0x004AF'i32, 499, + 0x004B1'i32, 499, + 0x004B3'i32, 499, + 0x004B5'i32, 499, + 0x004B7'i32, 499, + 0x004B9'i32, 499, + 0x004BB'i32, 499, + 0x004BD'i32, 499, + 0x004BF'i32, 499, + 0x004C2'i32, 499, + 0x004C4'i32, 499, + 0x004C6'i32, 499, + 0x004C8'i32, 499, + 0x004CA'i32, 499, + 0x004CC'i32, 499, + 0x004CE'i32, 499, + 0x004CF'i32, 485, + 0x004D1'i32, 499, + 0x004D3'i32, 499, + 0x004D5'i32, 499, + 0x004D7'i32, 499, + 0x004D9'i32, 499, + 0x004DB'i32, 499, + 0x004DD'i32, 499, + 0x004DF'i32, 499, + 0x004E1'i32, 499, + 0x004E3'i32, 499, + 0x004E5'i32, 499, + 0x004E7'i32, 499, + 0x004E9'i32, 499, + 0x004EB'i32, 499, + 0x004ED'i32, 499, + 0x004EF'i32, 499, + 0x004F1'i32, 499, + 0x004F3'i32, 499, + 0x004F5'i32, 499, + 0x004F7'i32, 499, + 0x004F9'i32, 499, + 0x004FB'i32, 499, + 0x004FD'i32, 499, + 0x004FF'i32, 499, + 0x00501'i32, 499, + 0x00503'i32, 499, + 0x00505'i32, 499, + 0x00507'i32, 499, + 0x00509'i32, 499, + 0x0050B'i32, 499, + 0x0050D'i32, 499, + 0x0050F'i32, 499, + 0x00511'i32, 499, + 0x00513'i32, 499, + 0x00515'i32, 499, + 0x00517'i32, 499, + 0x00519'i32, 499, + 0x0051B'i32, 499, + 0x0051D'i32, 499, + 0x0051F'i32, 499, + 0x00521'i32, 499, + 0x00523'i32, 499, + 0x00525'i32, 499, + 0x00527'i32, 499, + 0x00529'i32, 499, + 0x0052B'i32, 499, + 0x0052D'i32, 499, + 0x0052F'i32, 499, + 0x01C80'i32, -5754, + 0x01C81'i32, -5753, + 0x01C82'i32, -5744, + 0x01C85'i32, -5743, + 0x01C86'i32, -5736, + 0x01C87'i32, -5681, + 0x01C88'i32, 35766, + 0x01D79'i32, 35832, + 0x01D7D'i32, 4314, + 0x01D8E'i32, 35884, + 0x01E01'i32, 499, + 0x01E03'i32, 499, + 0x01E05'i32, 499, + 0x01E07'i32, 499, + 0x01E09'i32, 499, + 0x01E0B'i32, 499, + 0x01E0D'i32, 499, + 0x01E0F'i32, 499, + 0x01E11'i32, 499, + 0x01E13'i32, 499, + 0x01E15'i32, 499, + 0x01E17'i32, 499, + 0x01E19'i32, 499, + 0x01E1B'i32, 499, + 0x01E1D'i32, 499, + 0x01E1F'i32, 499, + 0x01E21'i32, 499, + 0x01E23'i32, 499, + 0x01E25'i32, 499, + 0x01E27'i32, 499, + 0x01E29'i32, 499, + 0x01E2B'i32, 499, + 0x01E2D'i32, 499, + 0x01E2F'i32, 499, + 0x01E31'i32, 499, + 0x01E33'i32, 499, + 0x01E35'i32, 499, + 0x01E37'i32, 499, + 0x01E39'i32, 499, + 0x01E3B'i32, 499, + 0x01E3D'i32, 499, + 0x01E3F'i32, 499, + 0x01E41'i32, 499, + 0x01E43'i32, 499, + 0x01E45'i32, 499, + 0x01E47'i32, 499, + 0x01E49'i32, 499, + 0x01E4B'i32, 499, + 0x01E4D'i32, 499, + 0x01E4F'i32, 499, + 0x01E51'i32, 499, + 0x01E53'i32, 499, + 0x01E55'i32, 499, + 0x01E57'i32, 499, + 0x01E59'i32, 499, + 0x01E5B'i32, 499, + 0x01E5D'i32, 499, + 0x01E5F'i32, 499, + 0x01E61'i32, 499, + 0x01E63'i32, 499, + 0x01E65'i32, 499, + 0x01E67'i32, 499, + 0x01E69'i32, 499, + 0x01E6B'i32, 499, + 0x01E6D'i32, 499, + 0x01E6F'i32, 499, + 0x01E71'i32, 499, + 0x01E73'i32, 499, + 0x01E75'i32, 499, + 0x01E77'i32, 499, + 0x01E79'i32, 499, + 0x01E7B'i32, 499, + 0x01E7D'i32, 499, + 0x01E7F'i32, 499, + 0x01E81'i32, 499, + 0x01E83'i32, 499, + 0x01E85'i32, 499, + 0x01E87'i32, 499, + 0x01E89'i32, 499, + 0x01E8B'i32, 499, + 0x01E8D'i32, 499, + 0x01E8F'i32, 499, + 0x01E91'i32, 499, + 0x01E93'i32, 499, + 0x01E95'i32, 499, + 0x01E9B'i32, 441, + 0x01EA1'i32, 499, + 0x01EA3'i32, 499, + 0x01EA5'i32, 499, + 0x01EA7'i32, 499, + 0x01EA9'i32, 499, + 0x01EAB'i32, 499, + 0x01EAD'i32, 499, + 0x01EAF'i32, 499, + 0x01EB1'i32, 499, + 0x01EB3'i32, 499, + 0x01EB5'i32, 499, + 0x01EB7'i32, 499, + 0x01EB9'i32, 499, + 0x01EBB'i32, 499, + 0x01EBD'i32, 499, + 0x01EBF'i32, 499, + 0x01EC1'i32, 499, + 0x01EC3'i32, 499, + 0x01EC5'i32, 499, + 0x01EC7'i32, 499, + 0x01EC9'i32, 499, + 0x01ECB'i32, 499, + 0x01ECD'i32, 499, + 0x01ECF'i32, 499, + 0x01ED1'i32, 499, + 0x01ED3'i32, 499, + 0x01ED5'i32, 499, + 0x01ED7'i32, 499, + 0x01ED9'i32, 499, + 0x01EDB'i32, 499, + 0x01EDD'i32, 499, + 0x01EDF'i32, 499, + 0x01EE1'i32, 499, + 0x01EE3'i32, 499, + 0x01EE5'i32, 499, + 0x01EE7'i32, 499, + 0x01EE9'i32, 499, + 0x01EEB'i32, 499, + 0x01EED'i32, 499, + 0x01EEF'i32, 499, + 0x01EF1'i32, 499, + 0x01EF3'i32, 499, + 0x01EF5'i32, 499, + 0x01EF7'i32, 499, + 0x01EF9'i32, 499, + 0x01EFB'i32, 499, + 0x01EFD'i32, 499, + 0x01EFF'i32, 499, + 0x01F51'i32, 508, + 0x01F53'i32, 508, + 0x01F55'i32, 508, + 0x01F57'i32, 508, + 0x01FB3'i32, 509, + 0x01FBE'i32, -6705, + 0x01FC3'i32, 509, + 0x01FE5'i32, 507, + 0x01FF3'i32, 509, + 0x0214E'i32, 472, + 0x02184'i32, 499, + 0x02C61'i32, 499, + 0x02C65'i32, -10295, + 0x02C66'i32, -10292, + 0x02C68'i32, 499, + 0x02C6A'i32, 499, + 0x02C6C'i32, 499, + 0x02C73'i32, 499, + 0x02C76'i32, 499, + 0x02C81'i32, 499, + 0x02C83'i32, 499, + 0x02C85'i32, 499, + 0x02C87'i32, 499, + 0x02C89'i32, 499, + 0x02C8B'i32, 499, + 0x02C8D'i32, 499, + 0x02C8F'i32, 499, + 0x02C91'i32, 499, + 0x02C93'i32, 499, + 0x02C95'i32, 499, + 0x02C97'i32, 499, + 0x02C99'i32, 499, + 0x02C9B'i32, 499, + 0x02C9D'i32, 499, + 0x02C9F'i32, 499, + 0x02CA1'i32, 499, + 0x02CA3'i32, 499, + 0x02CA5'i32, 499, + 0x02CA7'i32, 499, + 0x02CA9'i32, 499, + 0x02CAB'i32, 499, + 0x02CAD'i32, 499, + 0x02CAF'i32, 499, + 0x02CB1'i32, 499, + 0x02CB3'i32, 499, + 0x02CB5'i32, 499, + 0x02CB7'i32, 499, + 0x02CB9'i32, 499, + 0x02CBB'i32, 499, + 0x02CBD'i32, 499, + 0x02CBF'i32, 499, + 0x02CC1'i32, 499, + 0x02CC3'i32, 499, + 0x02CC5'i32, 499, + 0x02CC7'i32, 499, + 0x02CC9'i32, 499, + 0x02CCB'i32, 499, + 0x02CCD'i32, 499, + 0x02CCF'i32, 499, + 0x02CD1'i32, 499, + 0x02CD3'i32, 499, + 0x02CD5'i32, 499, + 0x02CD7'i32, 499, + 0x02CD9'i32, 499, + 0x02CDB'i32, 499, + 0x02CDD'i32, 499, + 0x02CDF'i32, 499, + 0x02CE1'i32, 499, + 0x02CE3'i32, 499, + 0x02CEC'i32, 499, + 0x02CEE'i32, 499, + 0x02CF3'i32, 499, + 0x02D27'i32, -6764, + 0x02D2D'i32, -6764, + 0x0A641'i32, 499, + 0x0A643'i32, 499, + 0x0A645'i32, 499, + 0x0A647'i32, 499, + 0x0A649'i32, 499, + 0x0A64B'i32, 499, + 0x0A64D'i32, 499, + 0x0A64F'i32, 499, + 0x0A651'i32, 499, + 0x0A653'i32, 499, + 0x0A655'i32, 499, + 0x0A657'i32, 499, + 0x0A659'i32, 499, + 0x0A65B'i32, 499, + 0x0A65D'i32, 499, + 0x0A65F'i32, 499, + 0x0A661'i32, 499, + 0x0A663'i32, 499, + 0x0A665'i32, 499, + 0x0A667'i32, 499, + 0x0A669'i32, 499, + 0x0A66B'i32, 499, + 0x0A66D'i32, 499, + 0x0A681'i32, 499, + 0x0A683'i32, 499, + 0x0A685'i32, 499, + 0x0A687'i32, 499, + 0x0A689'i32, 499, + 0x0A68B'i32, 499, + 0x0A68D'i32, 499, + 0x0A68F'i32, 499, + 0x0A691'i32, 499, + 0x0A693'i32, 499, + 0x0A695'i32, 499, + 0x0A697'i32, 499, + 0x0A699'i32, 499, + 0x0A69B'i32, 499, + 0x0A723'i32, 499, + 0x0A725'i32, 499, + 0x0A727'i32, 499, + 0x0A729'i32, 499, + 0x0A72B'i32, 499, + 0x0A72D'i32, 499, + 0x0A72F'i32, 499, + 0x0A733'i32, 499, + 0x0A735'i32, 499, + 0x0A737'i32, 499, + 0x0A739'i32, 499, + 0x0A73B'i32, 499, + 0x0A73D'i32, 499, + 0x0A73F'i32, 499, + 0x0A741'i32, 499, + 0x0A743'i32, 499, + 0x0A745'i32, 499, + 0x0A747'i32, 499, + 0x0A749'i32, 499, + 0x0A74B'i32, 499, + 0x0A74D'i32, 499, + 0x0A74F'i32, 499, + 0x0A751'i32, 499, + 0x0A753'i32, 499, + 0x0A755'i32, 499, + 0x0A757'i32, 499, + 0x0A759'i32, 499, + 0x0A75B'i32, 499, + 0x0A75D'i32, 499, + 0x0A75F'i32, 499, + 0x0A761'i32, 499, + 0x0A763'i32, 499, + 0x0A765'i32, 499, + 0x0A767'i32, 499, + 0x0A769'i32, 499, + 0x0A76B'i32, 499, + 0x0A76D'i32, 499, + 0x0A76F'i32, 499, + 0x0A77A'i32, 499, + 0x0A77C'i32, 499, + 0x0A77F'i32, 499, + 0x0A781'i32, 499, + 0x0A783'i32, 499, + 0x0A785'i32, 499, + 0x0A787'i32, 499, + 0x0A78C'i32, 499, + 0x0A791'i32, 499, + 0x0A793'i32, 499, + 0x0A794'i32, 548, + 0x0A797'i32, 499, + 0x0A799'i32, 499, + 0x0A79B'i32, 499, + 0x0A79D'i32, 499, + 0x0A79F'i32, 499, + 0x0A7A1'i32, 499, + 0x0A7A3'i32, 499, + 0x0A7A5'i32, 499, + 0x0A7A7'i32, 499, + 0x0A7A9'i32, 499, + 0x0A7B5'i32, 499, + 0x0A7B7'i32, 499, + 0x0A7B9'i32, 499, + 0x0A7BB'i32, 499, + 0x0A7BD'i32, 499, + 0x0A7BF'i32, 499, + 0x0A7C3'i32, 499, + 0x0AB53'i32, -428, + ] + + toTitleSinglets = [ + 0x001C4'i32, 501, + 0x001C6'i32, 499, + 0x001C7'i32, 501, + 0x001C9'i32, 499, + 0x001CA'i32, 501, + 0x001CC'i32, 499, + 0x001F1'i32, 501, + 0x001F3'i32, 499, + ] + + alphaRanges = [ + 0x00041'i32, 0x0005A'i32, + 0x00061'i32, 0x0007A'i32, + 0x000C0'i32, 0x000D6'i32, + 0x000D8'i32, 0x000F6'i32, + 0x000F8'i32, 0x002C1'i32, + 0x002C6'i32, 0x002D1'i32, + 0x002E0'i32, 0x002E4'i32, + 0x00370'i32, 0x00374'i32, + 0x00376'i32, 0x00377'i32, + 0x0037A'i32, 0x0037D'i32, + 0x00388'i32, 0x0038A'i32, + 0x0038E'i32, 0x003A1'i32, + 0x003A3'i32, 0x003F5'i32, + 0x003F7'i32, 0x00481'i32, + 0x0048A'i32, 0x0052F'i32, + 0x00531'i32, 0x00556'i32, + 0x00560'i32, 0x00588'i32, + 0x005D0'i32, 0x005EA'i32, + 0x005EF'i32, 0x005F2'i32, + 0x00620'i32, 0x0064A'i32, + 0x0066E'i32, 0x0066F'i32, + 0x00671'i32, 0x006D3'i32, + 0x006E5'i32, 0x006E6'i32, + 0x006EE'i32, 0x006EF'i32, + 0x006FA'i32, 0x006FC'i32, + 0x00712'i32, 0x0072F'i32, + 0x0074D'i32, 0x007A5'i32, + 0x007CA'i32, 0x007EA'i32, + 0x007F4'i32, 0x007F5'i32, + 0x00800'i32, 0x00815'i32, + 0x00840'i32, 0x00858'i32, + 0x00860'i32, 0x0086A'i32, + 0x008A0'i32, 0x008B4'i32, + 0x008B6'i32, 0x008BD'i32, + 0x00904'i32, 0x00939'i32, + 0x00958'i32, 0x00961'i32, + 0x00971'i32, 0x00980'i32, + 0x00985'i32, 0x0098C'i32, + 0x0098F'i32, 0x00990'i32, + 0x00993'i32, 0x009A8'i32, + 0x009AA'i32, 0x009B0'i32, + 0x009B6'i32, 0x009B9'i32, + 0x009DC'i32, 0x009DD'i32, + 0x009DF'i32, 0x009E1'i32, + 0x009F0'i32, 0x009F1'i32, + 0x00A05'i32, 0x00A0A'i32, + 0x00A0F'i32, 0x00A10'i32, + 0x00A13'i32, 0x00A28'i32, + 0x00A2A'i32, 0x00A30'i32, + 0x00A32'i32, 0x00A33'i32, + 0x00A35'i32, 0x00A36'i32, + 0x00A38'i32, 0x00A39'i32, + 0x00A59'i32, 0x00A5C'i32, + 0x00A72'i32, 0x00A74'i32, + 0x00A85'i32, 0x00A8D'i32, + 0x00A8F'i32, 0x00A91'i32, + 0x00A93'i32, 0x00AA8'i32, + 0x00AAA'i32, 0x00AB0'i32, + 0x00AB2'i32, 0x00AB3'i32, + 0x00AB5'i32, 0x00AB9'i32, + 0x00AE0'i32, 0x00AE1'i32, + 0x00B05'i32, 0x00B0C'i32, + 0x00B0F'i32, 0x00B10'i32, + 0x00B13'i32, 0x00B28'i32, + 0x00B2A'i32, 0x00B30'i32, + 0x00B32'i32, 0x00B33'i32, + 0x00B35'i32, 0x00B39'i32, + 0x00B5C'i32, 0x00B5D'i32, + 0x00B5F'i32, 0x00B61'i32, + 0x00B85'i32, 0x00B8A'i32, + 0x00B8E'i32, 0x00B90'i32, + 0x00B92'i32, 0x00B95'i32, + 0x00B99'i32, 0x00B9A'i32, + 0x00B9E'i32, 0x00B9F'i32, + 0x00BA3'i32, 0x00BA4'i32, + 0x00BA8'i32, 0x00BAA'i32, + 0x00BAE'i32, 0x00BB9'i32, + 0x00C05'i32, 0x00C0C'i32, + 0x00C0E'i32, 0x00C10'i32, + 0x00C12'i32, 0x00C28'i32, + 0x00C2A'i32, 0x00C39'i32, + 0x00C58'i32, 0x00C5A'i32, + 0x00C60'i32, 0x00C61'i32, + 0x00C85'i32, 0x00C8C'i32, + 0x00C8E'i32, 0x00C90'i32, + 0x00C92'i32, 0x00CA8'i32, + 0x00CAA'i32, 0x00CB3'i32, + 0x00CB5'i32, 0x00CB9'i32, + 0x00CE0'i32, 0x00CE1'i32, + 0x00CF1'i32, 0x00CF2'i32, + 0x00D05'i32, 0x00D0C'i32, + 0x00D0E'i32, 0x00D10'i32, + 0x00D12'i32, 0x00D3A'i32, + 0x00D54'i32, 0x00D56'i32, + 0x00D5F'i32, 0x00D61'i32, + 0x00D7A'i32, 0x00D7F'i32, + 0x00D85'i32, 0x00D96'i32, + 0x00D9A'i32, 0x00DB1'i32, + 0x00DB3'i32, 0x00DBB'i32, + 0x00DC0'i32, 0x00DC6'i32, + 0x00E01'i32, 0x00E30'i32, + 0x00E32'i32, 0x00E33'i32, + 0x00E40'i32, 0x00E46'i32, + 0x00E81'i32, 0x00E82'i32, + 0x00E86'i32, 0x00E8A'i32, + 0x00E8C'i32, 0x00EA3'i32, + 0x00EA7'i32, 0x00EB0'i32, + 0x00EB2'i32, 0x00EB3'i32, + 0x00EC0'i32, 0x00EC4'i32, + 0x00EDC'i32, 0x00EDF'i32, + 0x00F40'i32, 0x00F47'i32, + 0x00F49'i32, 0x00F6C'i32, + 0x00F88'i32, 0x00F8C'i32, + 0x01000'i32, 0x0102A'i32, + 0x01050'i32, 0x01055'i32, + 0x0105A'i32, 0x0105D'i32, + 0x01065'i32, 0x01066'i32, + 0x0106E'i32, 0x01070'i32, + 0x01075'i32, 0x01081'i32, + 0x010A0'i32, 0x010C5'i32, + 0x010D0'i32, 0x010FA'i32, + 0x010FC'i32, 0x01248'i32, + 0x0124A'i32, 0x0124D'i32, + 0x01250'i32, 0x01256'i32, + 0x0125A'i32, 0x0125D'i32, + 0x01260'i32, 0x01288'i32, + 0x0128A'i32, 0x0128D'i32, + 0x01290'i32, 0x012B0'i32, + 0x012B2'i32, 0x012B5'i32, + 0x012B8'i32, 0x012BE'i32, + 0x012C2'i32, 0x012C5'i32, + 0x012C8'i32, 0x012D6'i32, + 0x012D8'i32, 0x01310'i32, + 0x01312'i32, 0x01315'i32, + 0x01318'i32, 0x0135A'i32, + 0x01380'i32, 0x0138F'i32, + 0x013A0'i32, 0x013F5'i32, + 0x013F8'i32, 0x013FD'i32, + 0x01401'i32, 0x0166C'i32, + 0x0166F'i32, 0x0167F'i32, + 0x01681'i32, 0x0169A'i32, + 0x016A0'i32, 0x016EA'i32, + 0x016F1'i32, 0x016F8'i32, + 0x01700'i32, 0x0170C'i32, + 0x0170E'i32, 0x01711'i32, + 0x01720'i32, 0x01731'i32, + 0x01740'i32, 0x01751'i32, + 0x01760'i32, 0x0176C'i32, + 0x0176E'i32, 0x01770'i32, + 0x01780'i32, 0x017B3'i32, + 0x01820'i32, 0x01878'i32, + 0x01880'i32, 0x01884'i32, + 0x01887'i32, 0x018A8'i32, + 0x018B0'i32, 0x018F5'i32, + 0x01900'i32, 0x0191E'i32, + 0x01950'i32, 0x0196D'i32, + 0x01970'i32, 0x01974'i32, + 0x01980'i32, 0x019AB'i32, + 0x019B0'i32, 0x019C9'i32, + 0x01A00'i32, 0x01A16'i32, + 0x01A20'i32, 0x01A54'i32, + 0x01B05'i32, 0x01B33'i32, + 0x01B45'i32, 0x01B4B'i32, + 0x01B83'i32, 0x01BA0'i32, + 0x01BAE'i32, 0x01BAF'i32, + 0x01BBA'i32, 0x01BE5'i32, + 0x01C00'i32, 0x01C23'i32, + 0x01C4D'i32, 0x01C4F'i32, + 0x01C5A'i32, 0x01C7D'i32, + 0x01C80'i32, 0x01C88'i32, + 0x01C90'i32, 0x01CBA'i32, + 0x01CBD'i32, 0x01CBF'i32, + 0x01CE9'i32, 0x01CEC'i32, + 0x01CEE'i32, 0x01CF3'i32, + 0x01CF5'i32, 0x01CF6'i32, + 0x01D00'i32, 0x01DBF'i32, + 0x01E00'i32, 0x01F15'i32, + 0x01F18'i32, 0x01F1D'i32, + 0x01F20'i32, 0x01F45'i32, + 0x01F48'i32, 0x01F4D'i32, + 0x01F50'i32, 0x01F57'i32, + 0x01F5F'i32, 0x01F7D'i32, + 0x01F80'i32, 0x01FB4'i32, + 0x01FB6'i32, 0x01FBC'i32, + 0x01FC2'i32, 0x01FC4'i32, + 0x01FC6'i32, 0x01FCC'i32, + 0x01FD0'i32, 0x01FD3'i32, + 0x01FD6'i32, 0x01FDB'i32, + 0x01FE0'i32, 0x01FEC'i32, + 0x01FF2'i32, 0x01FF4'i32, + 0x01FF6'i32, 0x01FFC'i32, + 0x02090'i32, 0x0209C'i32, + 0x0210A'i32, 0x02113'i32, + 0x02119'i32, 0x0211D'i32, + 0x0212A'i32, 0x0212D'i32, + 0x0212F'i32, 0x02139'i32, + 0x0213C'i32, 0x0213F'i32, + 0x02145'i32, 0x02149'i32, + 0x02183'i32, 0x02184'i32, + 0x02C00'i32, 0x02C2E'i32, + 0x02C30'i32, 0x02C5E'i32, + 0x02C60'i32, 0x02CE4'i32, + 0x02CEB'i32, 0x02CEE'i32, + 0x02CF2'i32, 0x02CF3'i32, + 0x02D00'i32, 0x02D25'i32, + 0x02D30'i32, 0x02D67'i32, + 0x02D80'i32, 0x02D96'i32, + 0x02DA0'i32, 0x02DA6'i32, + 0x02DA8'i32, 0x02DAE'i32, + 0x02DB0'i32, 0x02DB6'i32, + 0x02DB8'i32, 0x02DBE'i32, + 0x02DC0'i32, 0x02DC6'i32, + 0x02DC8'i32, 0x02DCE'i32, + 0x02DD0'i32, 0x02DD6'i32, + 0x02DD8'i32, 0x02DDE'i32, + 0x03005'i32, 0x03006'i32, + 0x03031'i32, 0x03035'i32, + 0x0303B'i32, 0x0303C'i32, + 0x03041'i32, 0x03096'i32, + 0x0309D'i32, 0x0309F'i32, + 0x030A1'i32, 0x030FA'i32, + 0x030FC'i32, 0x030FF'i32, + 0x03105'i32, 0x0312F'i32, + 0x03131'i32, 0x0318E'i32, + 0x031A0'i32, 0x031BA'i32, + 0x031F0'i32, 0x031FF'i32, + 0x03400'i32, 0x04DB5'i32, + 0x04E00'i32, 0x09FEF'i32, + 0x0A000'i32, 0x0A48C'i32, + 0x0A4D0'i32, 0x0A4FD'i32, + 0x0A500'i32, 0x0A60C'i32, + 0x0A610'i32, 0x0A61F'i32, + 0x0A62A'i32, 0x0A62B'i32, + 0x0A640'i32, 0x0A66E'i32, + 0x0A67F'i32, 0x0A69D'i32, + 0x0A6A0'i32, 0x0A6E5'i32, + 0x0A717'i32, 0x0A71F'i32, + 0x0A722'i32, 0x0A788'i32, + 0x0A78B'i32, 0x0A7BF'i32, + 0x0A7C2'i32, 0x0A7C6'i32, + 0x0A7F7'i32, 0x0A801'i32, + 0x0A803'i32, 0x0A805'i32, + 0x0A807'i32, 0x0A80A'i32, + 0x0A80C'i32, 0x0A822'i32, + 0x0A840'i32, 0x0A873'i32, + 0x0A882'i32, 0x0A8B3'i32, + 0x0A8F2'i32, 0x0A8F7'i32, + 0x0A8FD'i32, 0x0A8FE'i32, + 0x0A90A'i32, 0x0A925'i32, + 0x0A930'i32, 0x0A946'i32, + 0x0A960'i32, 0x0A97C'i32, + 0x0A984'i32, 0x0A9B2'i32, + 0x0A9E0'i32, 0x0A9E4'i32, + 0x0A9E6'i32, 0x0A9EF'i32, + 0x0A9FA'i32, 0x0A9FE'i32, + 0x0AA00'i32, 0x0AA28'i32, + 0x0AA40'i32, 0x0AA42'i32, + 0x0AA44'i32, 0x0AA4B'i32, + 0x0AA60'i32, 0x0AA76'i32, + 0x0AA7E'i32, 0x0AAAF'i32, + 0x0AAB5'i32, 0x0AAB6'i32, + 0x0AAB9'i32, 0x0AABD'i32, + 0x0AADB'i32, 0x0AADD'i32, + 0x0AAE0'i32, 0x0AAEA'i32, + 0x0AAF2'i32, 0x0AAF4'i32, + 0x0AB01'i32, 0x0AB06'i32, + 0x0AB09'i32, 0x0AB0E'i32, + 0x0AB11'i32, 0x0AB16'i32, + 0x0AB20'i32, 0x0AB26'i32, + 0x0AB28'i32, 0x0AB2E'i32, + 0x0AB30'i32, 0x0AB5A'i32, + 0x0AB5C'i32, 0x0AB67'i32, + 0x0AB70'i32, 0x0ABE2'i32, + 0x0AC00'i32, 0x0D7A3'i32, + 0x0D7B0'i32, 0x0D7C6'i32, + 0x0D7CB'i32, 0x0D7FB'i32, + 0x0F900'i32, 0x0FA6D'i32, + 0x0FA70'i32, 0x0FAD9'i32, + 0x0FB00'i32, 0x0FB06'i32, + 0x0FB13'i32, 0x0FB17'i32, + 0x0FB1F'i32, 0x0FB28'i32, + 0x0FB2A'i32, 0x0FB36'i32, + 0x0FB38'i32, 0x0FB3C'i32, + 0x0FB40'i32, 0x0FB41'i32, + 0x0FB43'i32, 0x0FB44'i32, + 0x0FB46'i32, 0x0FBB1'i32, + 0x0FBD3'i32, 0x0FD3D'i32, + 0x0FD50'i32, 0x0FD8F'i32, + 0x0FD92'i32, 0x0FDC7'i32, + 0x0FDF0'i32, 0x0FDFB'i32, + 0x0FE70'i32, 0x0FE74'i32, + 0x0FE76'i32, 0x0FEFC'i32, + 0x0FF21'i32, 0x0FF3A'i32, + 0x0FF41'i32, 0x0FF5A'i32, + 0x0FF66'i32, 0x0FFBE'i32, + 0x0FFC2'i32, 0x0FFC7'i32, + 0x0FFCA'i32, 0x0FFCF'i32, + 0x0FFD2'i32, 0x0FFD7'i32, + 0x0FFDA'i32, 0x0FFDC'i32, + 0x10000'i32, 0x1000B'i32, + 0x1000D'i32, 0x10026'i32, + 0x10028'i32, 0x1003A'i32, + 0x1003C'i32, 0x1003D'i32, + 0x1003F'i32, 0x1004D'i32, + 0x10050'i32, 0x1005D'i32, + 0x10080'i32, 0x100FA'i32, + 0x10280'i32, 0x1029C'i32, + 0x102A0'i32, 0x102D0'i32, + 0x10300'i32, 0x1031F'i32, + 0x1032D'i32, 0x10340'i32, + 0x10342'i32, 0x10349'i32, + 0x10350'i32, 0x10375'i32, + 0x10380'i32, 0x1039D'i32, + 0x103A0'i32, 0x103C3'i32, + 0x103C8'i32, 0x103CF'i32, + 0x10400'i32, 0x1049D'i32, + 0x104B0'i32, 0x104D3'i32, + 0x104D8'i32, 0x104FB'i32, + 0x10500'i32, 0x10527'i32, + 0x10530'i32, 0x10563'i32, + 0x10600'i32, 0x10736'i32, + 0x10740'i32, 0x10755'i32, + 0x10760'i32, 0x10767'i32, + 0x10800'i32, 0x10805'i32, + 0x1080A'i32, 0x10835'i32, + 0x10837'i32, 0x10838'i32, + 0x1083F'i32, 0x10855'i32, + 0x10860'i32, 0x10876'i32, + 0x10880'i32, 0x1089E'i32, + 0x108E0'i32, 0x108F2'i32, + 0x108F4'i32, 0x108F5'i32, + 0x10900'i32, 0x10915'i32, + 0x10920'i32, 0x10939'i32, + 0x10980'i32, 0x109B7'i32, + 0x109BE'i32, 0x109BF'i32, + 0x10A10'i32, 0x10A13'i32, + 0x10A15'i32, 0x10A17'i32, + 0x10A19'i32, 0x10A35'i32, + 0x10A60'i32, 0x10A7C'i32, + 0x10A80'i32, 0x10A9C'i32, + 0x10AC0'i32, 0x10AC7'i32, + 0x10AC9'i32, 0x10AE4'i32, + 0x10B00'i32, 0x10B35'i32, + 0x10B40'i32, 0x10B55'i32, + 0x10B60'i32, 0x10B72'i32, + 0x10B80'i32, 0x10B91'i32, + 0x10C00'i32, 0x10C48'i32, + 0x10C80'i32, 0x10CB2'i32, + 0x10CC0'i32, 0x10CF2'i32, + 0x10D00'i32, 0x10D23'i32, + 0x10F00'i32, 0x10F1C'i32, + 0x10F30'i32, 0x10F45'i32, + 0x10FE0'i32, 0x10FF6'i32, + 0x11003'i32, 0x11037'i32, + 0x11083'i32, 0x110AF'i32, + 0x110D0'i32, 0x110E8'i32, + 0x11103'i32, 0x11126'i32, + 0x11150'i32, 0x11172'i32, + 0x11183'i32, 0x111B2'i32, + 0x111C1'i32, 0x111C4'i32, + 0x11200'i32, 0x11211'i32, + 0x11213'i32, 0x1122B'i32, + 0x11280'i32, 0x11286'i32, + 0x1128A'i32, 0x1128D'i32, + 0x1128F'i32, 0x1129D'i32, + 0x1129F'i32, 0x112A8'i32, + 0x112B0'i32, 0x112DE'i32, + 0x11305'i32, 0x1130C'i32, + 0x1130F'i32, 0x11310'i32, + 0x11313'i32, 0x11328'i32, + 0x1132A'i32, 0x11330'i32, + 0x11332'i32, 0x11333'i32, + 0x11335'i32, 0x11339'i32, + 0x1135D'i32, 0x11361'i32, + 0x11400'i32, 0x11434'i32, + 0x11447'i32, 0x1144A'i32, + 0x11480'i32, 0x114AF'i32, + 0x114C4'i32, 0x114C5'i32, + 0x11580'i32, 0x115AE'i32, + 0x115D8'i32, 0x115DB'i32, + 0x11600'i32, 0x1162F'i32, + 0x11680'i32, 0x116AA'i32, + 0x11700'i32, 0x1171A'i32, + 0x11800'i32, 0x1182B'i32, + 0x118A0'i32, 0x118DF'i32, + 0x119A0'i32, 0x119A7'i32, + 0x119AA'i32, 0x119D0'i32, + 0x11A0B'i32, 0x11A32'i32, + 0x11A5C'i32, 0x11A89'i32, + 0x11AC0'i32, 0x11AF8'i32, + 0x11C00'i32, 0x11C08'i32, + 0x11C0A'i32, 0x11C2E'i32, + 0x11C72'i32, 0x11C8F'i32, + 0x11D00'i32, 0x11D06'i32, + 0x11D08'i32, 0x11D09'i32, + 0x11D0B'i32, 0x11D30'i32, + 0x11D60'i32, 0x11D65'i32, + 0x11D67'i32, 0x11D68'i32, + 0x11D6A'i32, 0x11D89'i32, + 0x11EE0'i32, 0x11EF2'i32, + 0x12000'i32, 0x12399'i32, + 0x12480'i32, 0x12543'i32, + 0x13000'i32, 0x1342E'i32, + 0x14400'i32, 0x14646'i32, + 0x16800'i32, 0x16A38'i32, + 0x16A40'i32, 0x16A5E'i32, + 0x16AD0'i32, 0x16AED'i32, + 0x16B00'i32, 0x16B2F'i32, + 0x16B40'i32, 0x16B43'i32, + 0x16B63'i32, 0x16B77'i32, + 0x16B7D'i32, 0x16B8F'i32, + 0x16E40'i32, 0x16E7F'i32, + 0x16F00'i32, 0x16F4A'i32, + 0x16F93'i32, 0x16F9F'i32, + 0x16FE0'i32, 0x16FE1'i32, + 0x17000'i32, 0x187F7'i32, + 0x18800'i32, 0x18AF2'i32, + 0x1B000'i32, 0x1B11E'i32, + 0x1B150'i32, 0x1B152'i32, + 0x1B164'i32, 0x1B167'i32, + 0x1B170'i32, 0x1B2FB'i32, + 0x1BC00'i32, 0x1BC6A'i32, + 0x1BC70'i32, 0x1BC7C'i32, + 0x1BC80'i32, 0x1BC88'i32, + 0x1BC90'i32, 0x1BC99'i32, + 0x1D400'i32, 0x1D454'i32, + 0x1D456'i32, 0x1D49C'i32, + 0x1D49E'i32, 0x1D49F'i32, + 0x1D4A5'i32, 0x1D4A6'i32, + 0x1D4A9'i32, 0x1D4AC'i32, + 0x1D4AE'i32, 0x1D4B9'i32, + 0x1D4BD'i32, 0x1D4C3'i32, + 0x1D4C5'i32, 0x1D505'i32, + 0x1D507'i32, 0x1D50A'i32, + 0x1D50D'i32, 0x1D514'i32, + 0x1D516'i32, 0x1D51C'i32, + 0x1D51E'i32, 0x1D539'i32, + 0x1D53B'i32, 0x1D53E'i32, + 0x1D540'i32, 0x1D544'i32, + 0x1D54A'i32, 0x1D550'i32, + 0x1D552'i32, 0x1D6A5'i32, + 0x1D6A8'i32, 0x1D6C0'i32, + 0x1D6C2'i32, 0x1D6DA'i32, + 0x1D6DC'i32, 0x1D6FA'i32, + 0x1D6FC'i32, 0x1D714'i32, + 0x1D716'i32, 0x1D734'i32, + 0x1D736'i32, 0x1D74E'i32, + 0x1D750'i32, 0x1D76E'i32, + 0x1D770'i32, 0x1D788'i32, + 0x1D78A'i32, 0x1D7A8'i32, + 0x1D7AA'i32, 0x1D7C2'i32, + 0x1D7C4'i32, 0x1D7CB'i32, + 0x1E100'i32, 0x1E12C'i32, + 0x1E137'i32, 0x1E13D'i32, + 0x1E2C0'i32, 0x1E2EB'i32, + 0x1E800'i32, 0x1E8C4'i32, + 0x1E900'i32, 0x1E943'i32, + 0x1EE00'i32, 0x1EE03'i32, + 0x1EE05'i32, 0x1EE1F'i32, + 0x1EE21'i32, 0x1EE22'i32, + 0x1EE29'i32, 0x1EE32'i32, + 0x1EE34'i32, 0x1EE37'i32, + 0x1EE4D'i32, 0x1EE4F'i32, + 0x1EE51'i32, 0x1EE52'i32, + 0x1EE61'i32, 0x1EE62'i32, + 0x1EE67'i32, 0x1EE6A'i32, + 0x1EE6C'i32, 0x1EE72'i32, + 0x1EE74'i32, 0x1EE77'i32, + 0x1EE79'i32, 0x1EE7C'i32, + 0x1EE80'i32, 0x1EE89'i32, + 0x1EE8B'i32, 0x1EE9B'i32, + 0x1EEA1'i32, 0x1EEA3'i32, + 0x1EEA5'i32, 0x1EEA9'i32, + 0x1EEAB'i32, 0x1EEBB'i32, + 0x20000'i32, 0x2A6D6'i32, + 0x2A700'i32, 0x2B734'i32, + 0x2B740'i32, 0x2B81D'i32, + 0x2B820'i32, 0x2CEA1'i32, + 0x2CEB0'i32, 0x2EBE0'i32, + 0x2F800'i32, 0x2FA1D'i32, + ] + + alphaSinglets = [ + 0x000AA'i32, + 0x000B5'i32, + 0x000BA'i32, + 0x002EC'i32, + 0x002EE'i32, + 0x0037F'i32, + 0x00386'i32, + 0x0038C'i32, + 0x00559'i32, + 0x006D5'i32, + 0x006FF'i32, + 0x00710'i32, + 0x007B1'i32, + 0x007FA'i32, + 0x0081A'i32, + 0x00824'i32, + 0x00828'i32, + 0x0093D'i32, + 0x00950'i32, + 0x009B2'i32, + 0x009BD'i32, + 0x009CE'i32, + 0x009FC'i32, + 0x00A5E'i32, + 0x00ABD'i32, + 0x00AD0'i32, + 0x00AF9'i32, + 0x00B3D'i32, + 0x00B71'i32, + 0x00B83'i32, + 0x00B9C'i32, + 0x00BD0'i32, + 0x00C3D'i32, + 0x00C80'i32, + 0x00CBD'i32, + 0x00CDE'i32, + 0x00D3D'i32, + 0x00D4E'i32, + 0x00DBD'i32, + 0x00E84'i32, + 0x00EA5'i32, + 0x00EBD'i32, + 0x00EC6'i32, + 0x00F00'i32, + 0x0103F'i32, + 0x01061'i32, + 0x0108E'i32, + 0x010C7'i32, + 0x010CD'i32, + 0x01258'i32, + 0x012C0'i32, + 0x017D7'i32, + 0x017DC'i32, + 0x018AA'i32, + 0x01AA7'i32, + 0x01CFA'i32, + 0x01F59'i32, + 0x01F5B'i32, + 0x01F5D'i32, + 0x01FBE'i32, + 0x02071'i32, + 0x0207F'i32, + 0x02102'i32, + 0x02107'i32, + 0x02115'i32, + 0x02124'i32, + 0x02126'i32, + 0x02128'i32, + 0x0214E'i32, + 0x02D27'i32, + 0x02D2D'i32, + 0x02D6F'i32, + 0x02E2F'i32, + 0x0A8FB'i32, + 0x0A9CF'i32, + 0x0AA7A'i32, + 0x0AAB1'i32, + 0x0AAC0'i32, + 0x0AAC2'i32, + 0x0FB1D'i32, + 0x0FB3E'i32, + 0x10808'i32, + 0x1083C'i32, + 0x10A00'i32, + 0x10F27'i32, + 0x11144'i32, + 0x11176'i32, + 0x111DA'i32, + 0x111DC'i32, + 0x11288'i32, + 0x1133D'i32, + 0x11350'i32, + 0x1145F'i32, + 0x114C7'i32, + 0x11644'i32, + 0x116B8'i32, + 0x118FF'i32, + 0x119E1'i32, + 0x119E3'i32, + 0x11A00'i32, + 0x11A3A'i32, + 0x11A50'i32, + 0x11A9D'i32, + 0x11C40'i32, + 0x11D46'i32, + 0x11D98'i32, + 0x16F50'i32, + 0x16FE3'i32, + 0x1D4A2'i32, + 0x1D4BB'i32, + 0x1D546'i32, + 0x1E14E'i32, + 0x1E94B'i32, + 0x1EE24'i32, + 0x1EE27'i32, + 0x1EE39'i32, + 0x1EE3B'i32, + 0x1EE42'i32, + 0x1EE47'i32, + 0x1EE49'i32, + 0x1EE4B'i32, + 0x1EE54'i32, + 0x1EE57'i32, + 0x1EE59'i32, + 0x1EE5B'i32, + 0x1EE5D'i32, + 0x1EE5F'i32, + 0x1EE64'i32, + 0x1EE7E'i32, + ] + + spaceRanges = [ + 0x00009'i32, 0x0000D'i32, + 0x00020'i32, 0x00020'i32, + 0x00085'i32, 0x00085'i32, + 0x000A0'i32, 0x000A0'i32, + 0x01680'i32, 0x01680'i32, + 0x02000'i32, 0x0200A'i32, + 0x02028'i32, 0x02029'i32, + 0x0202F'i32, 0x0202F'i32, + 0x0205F'i32, 0x0205F'i32, + 0x03000'i32, 0x03000'i32, + ] + + unicodeSpaces = [ + Rune 0x00009, + Rune 0x0000A, + Rune 0x0000B, + Rune 0x0000C, + Rune 0x0000D, + Rune 0x00020, + Rune 0x00085, + Rune 0x000A0, + Rune 0x01680, + Rune 0x02000, + Rune 0x02001, + Rune 0x02002, + Rune 0x02003, + Rune 0x02004, + Rune 0x02005, + Rune 0x02006, + Rune 0x02007, + Rune 0x02008, + Rune 0x02009, + Rune 0x0200A, + Rune 0x02028, + Rune 0x02029, + Rune 0x0202F, + Rune 0x0205F, + Rune 0x03000, + ] + diff --git a/lib/pure/ioselects/ioselectors_epoll.nim b/lib/pure/ioselects/ioselectors_epoll.nim new file mode 100644 index 000000000..10658b78e --- /dev/null +++ b/lib/pure/ioselects/ioselectors_epoll.nim @@ -0,0 +1,534 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2016 Eugene Kabanov +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# This module implements Linux epoll(). + +import std/[posix, times, epoll] + +# Maximum number of events that can be returned +const MAX_EPOLL_EVENTS = 64 + +when not defined(android): + type + SignalFdInfo* {.importc: "struct signalfd_siginfo", + header: "<sys/signalfd.h>", pure, final.} = object + ssi_signo*: uint32 + ssi_errno*: int32 + ssi_code*: int32 + ssi_pid*: uint32 + ssi_uid*: uint32 + ssi_fd*: int32 + ssi_tid*: uint32 + ssi_band*: uint32 + ssi_overrun*: uint32 + ssi_trapno*: uint32 + ssi_status*: int32 + ssi_int*: int32 + ssi_ptr*: uint64 + ssi_utime*: uint64 + ssi_stime*: uint64 + ssi_addr*: uint64 + pad* {.importc: "__pad".}: array[0..47, uint8] + +proc timerfd_create(clock_id: ClockId, flags: cint): cint + {.cdecl, importc: "timerfd_create", header: "<sys/timerfd.h>".} +proc timerfd_settime(ufd: cint, flags: cint, + utmr: var Itimerspec, otmr: var Itimerspec): cint + {.cdecl, importc: "timerfd_settime", header: "<sys/timerfd.h>".} +proc eventfd(count: cuint, flags: cint): cint + {.cdecl, importc: "eventfd", header: "<sys/eventfd.h>".} + +when not defined(android): + proc signalfd(fd: cint, mask: var Sigset, flags: cint): cint + {.cdecl, importc: "signalfd", header: "<sys/signalfd.h>".} + +when hasThreadSupport: + type + SelectorImpl[T] = object + epollFD: cint + maxFD: int + numFD: int + fds: ptr SharedArray[SelectorKey[T]] + count*: int + Selector*[T] = ptr SelectorImpl[T] +else: + type + SelectorImpl[T] = object + epollFD: cint + maxFD: int + numFD: int + fds: seq[SelectorKey[T]] + count*: int + Selector*[T] = ref SelectorImpl[T] +type + SelectEventImpl = object + efd: cint + SelectEvent* = ptr SelectEventImpl + +proc newSelector*[T](): Selector[T] = + proc initialNumFD(): int {.inline.} = + when defined(nuttx): + result = NEPOLL_MAX + else: + result = 1024 + # Retrieve the maximum fd count (for current OS) via getrlimit() + var maxFD = maxDescriptors() + doAssert(maxFD > 0) + # Start with a reasonable size, checkFd() will grow this on demand + let numFD = initialNumFD() + + var epollFD = epoll_create1(O_CLOEXEC) + if epollFD < 0: + raiseOSError(osLastError()) + + when hasThreadSupport: + result = cast[Selector[T]](allocShared0(sizeof(SelectorImpl[T]))) + result.epollFD = epollFD + result.maxFD = maxFD + result.numFD = numFD + result.fds = allocSharedArray[SelectorKey[T]](numFD) + else: + result = Selector[T]() + result.epollFD = epollFD + result.maxFD = maxFD + result.numFD = numFD + result.fds = newSeq[SelectorKey[T]](numFD) + + for i in 0 ..< numFD: + result.fds[i].ident = InvalidIdent + +proc close*[T](s: Selector[T]) = + let res = posix.close(s.epollFD) + when hasThreadSupport: + deallocSharedArray(s.fds) + deallocShared(cast[pointer](s)) + if res != 0: + raiseIOSelectorsError(osLastError()) + +proc newSelectEvent*(): SelectEvent = + let fdci = eventfd(0, O_CLOEXEC or O_NONBLOCK) + if fdci == -1: + raiseIOSelectorsError(osLastError()) + result = cast[SelectEvent](allocShared0(sizeof(SelectEventImpl))) + result.efd = fdci + +proc trigger*(ev: SelectEvent) = + var data: uint64 = 1 + if posix.write(ev.efd, addr data, sizeof(uint64)) == -1: + raiseIOSelectorsError(osLastError()) + +proc close*(ev: SelectEvent) = + let res = posix.close(ev.efd) + deallocShared(cast[pointer](ev)) + if res != 0: + raiseIOSelectorsError(osLastError()) + +template checkFd(s, f) = + # TODO: I don't see how this can ever happen. You won't be able to create an + # FD if there is too many. -- DP + if f >= s.maxFD: + raiseIOSelectorsError("Maximum number of descriptors is exhausted!") + if f >= s.numFD: + var numFD = s.numFD + while numFD <= f: numFD *= 2 + when hasThreadSupport: + s.fds = reallocSharedArray(s.fds, s.numFD, numFD) + else: + s.fds.setLen(numFD) + for i in s.numFD ..< numFD: + s.fds[i].ident = InvalidIdent + s.numFD = numFD + +proc registerHandle*[T](s: Selector[T], fd: int | SocketHandle, + events: set[Event], data: T) = + let fdi = int(fd) + s.checkFd(fdi) + doAssert(s.fds[fdi].ident == InvalidIdent, "Descriptor $# already registered" % $fdi) + s.setKey(fdi, events, 0, data) + if events != {}: + var epv = EpollEvent(events: EPOLLRDHUP) + epv.data.u64 = fdi.uint + if Event.Read in events: epv.events = epv.events or EPOLLIN + if Event.Write in events: epv.events = epv.events or EPOLLOUT + if epoll_ctl(s.epollFD, EPOLL_CTL_ADD, fdi.cint, addr epv) != 0: + raiseIOSelectorsError(osLastError()) + inc(s.count) + +proc updateHandle*[T](s: Selector[T], fd: int | SocketHandle, events: set[Event]) = + let maskEvents = {Event.Timer, Event.Signal, Event.Process, Event.Vnode, + Event.User, Event.Oneshot, Event.Error} + let fdi = int(fd) + s.checkFd(fdi) + var pkey = addr(s.fds[fdi]) + doAssert(pkey.ident != InvalidIdent, + "Descriptor $# is not registered in the selector!" % $fdi) + doAssert(pkey.events * maskEvents == {}) + if pkey.events != events: + var epv = EpollEvent(events: EPOLLRDHUP) + epv.data.u64 = fdi.uint + + if Event.Read in events: epv.events = epv.events or EPOLLIN + if Event.Write in events: epv.events = epv.events or EPOLLOUT + + if pkey.events == {}: + if epoll_ctl(s.epollFD, EPOLL_CTL_ADD, fdi.cint, addr epv) != 0: + raiseIOSelectorsError(osLastError()) + inc(s.count) + else: + if events != {}: + if epoll_ctl(s.epollFD, EPOLL_CTL_MOD, fdi.cint, addr epv) != 0: + raiseIOSelectorsError(osLastError()) + else: + if epoll_ctl(s.epollFD, EPOLL_CTL_DEL, fdi.cint, addr epv) != 0: + raiseIOSelectorsError(osLastError()) + dec(s.count) + pkey.events = events + +proc unregister*[T](s: Selector[T], fd: int|SocketHandle) = + let fdi = int(fd) + s.checkFd(fdi) + var pkey = addr(s.fds[fdi]) + doAssert(pkey.ident != InvalidIdent, + "Descriptor $# is not registered in the selector!" % $fdi) + if pkey.events != {}: + when not defined(android): + if Event.Read in pkey.events or Event.Write in pkey.events or Event.User in pkey.events: + var epv = EpollEvent() + # TODO: Refactor all these EPOLL_CTL_DEL + dec(s.count) into a proc. + if epoll_ctl(s.epollFD, EPOLL_CTL_DEL, fdi.cint, addr epv) != 0: + raiseIOSelectorsError(osLastError()) + dec(s.count) + elif Event.Timer in pkey.events: + if Event.Finished notin pkey.events: + var epv = EpollEvent() + if epoll_ctl(s.epollFD, EPOLL_CTL_DEL, fdi.cint, addr epv) != 0: + raiseIOSelectorsError(osLastError()) + dec(s.count) + if posix.close(cint(fdi)) != 0: + raiseIOSelectorsError(osLastError()) + elif Event.Signal in pkey.events: + var epv = EpollEvent() + if epoll_ctl(s.epollFD, EPOLL_CTL_DEL, fdi.cint, addr epv) != 0: + raiseIOSelectorsError(osLastError()) + var nmask, omask: Sigset + discard sigemptyset(nmask) + discard sigemptyset(omask) + discard sigaddset(nmask, cint(s.fds[fdi].param)) + unblockSignals(nmask, omask) + dec(s.count) + if posix.close(cint(fdi)) != 0: + raiseIOSelectorsError(osLastError()) + elif Event.Process in pkey.events: + if Event.Finished notin pkey.events: + var epv = EpollEvent() + if epoll_ctl(s.epollFD, EPOLL_CTL_DEL, fdi.cint, addr epv) != 0: + raiseIOSelectorsError(osLastError()) + var nmask, omask: Sigset + discard sigemptyset(nmask) + discard sigemptyset(omask) + discard sigaddset(nmask, SIGCHLD) + unblockSignals(nmask, omask) + dec(s.count) + if posix.close(cint(fdi)) != 0: + raiseIOSelectorsError(osLastError()) + else: + if Event.Read in pkey.events or Event.Write in pkey.events or Event.User in pkey.events: + var epv = EpollEvent() + if epoll_ctl(s.epollFD, EPOLL_CTL_DEL, fdi.cint, addr epv) != 0: + raiseIOSelectorsError(osLastError()) + dec(s.count) + elif Event.Timer in pkey.events: + if Event.Finished notin pkey.events: + var epv = EpollEvent() + if epoll_ctl(s.epollFD, EPOLL_CTL_DEL, fdi.cint, addr epv) != 0: + raiseIOSelectorsError(osLastError()) + dec(s.count) + if posix.close(cint(fdi)) != 0: + raiseIOSelectorsError(osLastError()) + clearKey(pkey) + +proc unregister*[T](s: Selector[T], ev: SelectEvent) = + let fdi = int(ev.efd) + s.checkFd(fdi) + var pkey = addr(s.fds[fdi]) + doAssert(pkey.ident != InvalidIdent, "Event is not registered in the queue!") + doAssert(Event.User in pkey.events) + var epv = EpollEvent() + if epoll_ctl(s.epollFD, EPOLL_CTL_DEL, fdi.cint, addr epv) != 0: + raiseIOSelectorsError(osLastError()) + dec(s.count) + clearKey(pkey) + +proc registerTimer*[T](s: Selector[T], timeout: int, oneshot: bool, + data: T): int {.discardable.} = + var + newTs: Itimerspec + oldTs: Itimerspec + let fdi = timerfd_create(CLOCK_MONOTONIC, O_CLOEXEC or O_NONBLOCK).int + if fdi == -1: + raiseIOSelectorsError(osLastError()) + + s.checkFd(fdi) + doAssert(s.fds[fdi].ident == InvalidIdent) + + var events = {Event.Timer} + var epv = EpollEvent(events: EPOLLIN or EPOLLRDHUP) + epv.data.u64 = fdi.uint + + if oneshot: + newTs.it_interval.tv_sec = posix.Time(0) + newTs.it_interval.tv_nsec = 0 + newTs.it_value.tv_sec = posix.Time(timeout div 1_000) + newTs.it_value.tv_nsec = (timeout %% 1_000) * 1_000_000 + incl(events, Event.Oneshot) + epv.events = epv.events or EPOLLONESHOT + else: + newTs.it_interval.tv_sec = posix.Time(timeout div 1000) + newTs.it_interval.tv_nsec = (timeout %% 1_000) * 1_000_000 + newTs.it_value.tv_sec = newTs.it_interval.tv_sec + newTs.it_value.tv_nsec = newTs.it_interval.tv_nsec + + if timerfd_settime(fdi.cint, cint(0), newTs, oldTs) != 0: + raiseIOSelectorsError(osLastError()) + if epoll_ctl(s.epollFD, EPOLL_CTL_ADD, fdi.cint, addr epv) != 0: + raiseIOSelectorsError(osLastError()) + s.setKey(fdi, events, 0, data) + inc(s.count) + result = fdi + +when not defined(android): + proc registerSignal*[T](s: Selector[T], signal: int, + data: T): int {.discardable.} = + var + nmask: Sigset + omask: Sigset + + discard sigemptyset(nmask) + discard sigemptyset(omask) + discard sigaddset(nmask, cint(signal)) + blockSignals(nmask, omask) + + let fdi = signalfd(-1, nmask, O_CLOEXEC or O_NONBLOCK).int + if fdi == -1: + raiseIOSelectorsError(osLastError()) + + s.checkFd(fdi) + doAssert(s.fds[fdi].ident == InvalidIdent) + + var epv = EpollEvent(events: EPOLLIN or EPOLLRDHUP) + epv.data.u64 = fdi.uint + if epoll_ctl(s.epollFD, EPOLL_CTL_ADD, fdi.cint, addr epv) != 0: + raiseIOSelectorsError(osLastError()) + s.setKey(fdi, {Event.Signal}, signal, data) + inc(s.count) + result = fdi + + proc registerProcess*[T](s: Selector, pid: int, + data: T): int {.discardable.} = + var + nmask: Sigset + omask: Sigset + + discard sigemptyset(nmask) + discard sigemptyset(omask) + discard sigaddset(nmask, posix.SIGCHLD) + blockSignals(nmask, omask) + + let fdi = signalfd(-1, nmask, O_CLOEXEC or O_NONBLOCK).int + if fdi == -1: + raiseIOSelectorsError(osLastError()) + + s.checkFd(fdi) + doAssert(s.fds[fdi].ident == InvalidIdent) + + var epv = EpollEvent(events: EPOLLIN or EPOLLRDHUP) + epv.data.u64 = fdi.uint + epv.events = EPOLLIN or EPOLLRDHUP + if epoll_ctl(s.epollFD, EPOLL_CTL_ADD, fdi.cint, addr epv) != 0: + raiseIOSelectorsError(osLastError()) + s.setKey(fdi, {Event.Process, Event.Oneshot}, pid, data) + inc(s.count) + result = fdi + +proc registerEvent*[T](s: Selector[T], ev: SelectEvent, data: T) = + let fdi = int(ev.efd) + doAssert(s.fds[fdi].ident == InvalidIdent, "Event is already registered in the queue!") + s.setKey(fdi, {Event.User}, 0, data) + var epv = EpollEvent(events: EPOLLIN or EPOLLRDHUP) + epv.data.u64 = ev.efd.uint + if epoll_ctl(s.epollFD, EPOLL_CTL_ADD, ev.efd, addr epv) != 0: + raiseIOSelectorsError(osLastError()) + inc(s.count) + +proc selectInto*[T](s: Selector[T], timeout: int, + results: var openArray[ReadyKey]): int = + var + resTable: array[MAX_EPOLL_EVENTS, EpollEvent] + maxres = MAX_EPOLL_EVENTS + i, k: int + + if maxres > len(results): + maxres = len(results) + + verifySelectParams(timeout) + + let count = epoll_wait(s.epollFD, addr(resTable[0]), maxres.cint, + timeout.cint) + if count < 0: + result = 0 + let err = osLastError() + if cint(err) != EINTR: + raiseIOSelectorsError(err) + elif count == 0: + result = 0 + else: + i = 0 + k = 0 + while i < count: + let fdi = int(resTable[i].data.u64) + let pevents = resTable[i].events + var pkey = addr(s.fds[fdi]) + doAssert(pkey.ident != InvalidIdent) + var rkey = ReadyKey(fd: fdi, events: {}) + + if (pevents and EPOLLERR) != 0 or (pevents and EPOLLHUP) != 0: + if (pevents and EPOLLHUP) != 0: + rkey.errorCode = OSErrorCode ECONNRESET + else: + # Try reading SO_ERROR from fd. + var error: cint + var size = SockLen sizeof(error) + if getsockopt(SocketHandle fdi, SOL_SOCKET, SO_ERROR, addr(error), + addr(size)) == 0'i32: + rkey.errorCode = OSErrorCode error + + rkey.events.incl(Event.Error) + if (pevents and EPOLLOUT) != 0: + rkey.events.incl(Event.Write) + when not defined(android): + if (pevents and EPOLLIN) != 0: + if Event.Read in pkey.events: + rkey.events.incl(Event.Read) + elif Event.Timer in pkey.events: + var data: uint64 = 0 + if posix.read(cint(fdi), addr data, + sizeof(uint64)) != sizeof(uint64): + raiseIOSelectorsError(osLastError()) + rkey.events.incl(Event.Timer) + elif Event.Signal in pkey.events: + var data = SignalFdInfo() + if posix.read(cint(fdi), addr data, + sizeof(SignalFdInfo)) != sizeof(SignalFdInfo): + raiseIOSelectorsError(osLastError()) + rkey.events.incl(Event.Signal) + elif Event.Process in pkey.events: + var data = SignalFdInfo() + if posix.read(cint(fdi), addr data, + sizeof(SignalFdInfo)) != sizeof(SignalFdInfo): + raiseIOSelectorsError(osLastError()) + if cast[int](data.ssi_pid) == pkey.param: + rkey.events.incl(Event.Process) + else: + inc(i) + continue + elif Event.User in pkey.events: + var data: uint64 = 0 + if posix.read(cint(fdi), addr data, + sizeof(uint64)) != sizeof(uint64): + let err = osLastError() + if err == OSErrorCode(EAGAIN): + inc(i) + continue + else: + raiseIOSelectorsError(err) + rkey.events.incl(Event.User) + else: + if (pevents and EPOLLIN) != 0: + if Event.Read in pkey.events: + rkey.events.incl(Event.Read) + elif Event.Timer in pkey.events: + var data: uint64 = 0 + if posix.read(cint(fdi), addr data, + sizeof(uint64)) != sizeof(uint64): + raiseIOSelectorsError(osLastError()) + rkey.events.incl(Event.Timer) + elif Event.User in pkey.events: + var data: uint64 = 0 + if posix.read(cint(fdi), addr data, + sizeof(uint64)) != sizeof(uint64): + let err = osLastError() + if err == OSErrorCode(EAGAIN): + inc(i) + continue + else: + raiseIOSelectorsError(err) + rkey.events.incl(Event.User) + + if Event.Oneshot in pkey.events: + var epv = EpollEvent() + if epoll_ctl(s.epollFD, EPOLL_CTL_DEL, cint(fdi), addr epv) != 0: + raiseIOSelectorsError(osLastError()) + # we will not clear key until it will be unregistered, so + # application can obtain data, but we will decrease counter, + # because epoll is empty. + dec(s.count) + # we are marking key with `Finished` event, to avoid double decrease. + pkey.events.incl(Event.Finished) + + results[k] = rkey + inc(k) + inc(i) + result = k + +proc select*[T](s: Selector[T], timeout: int): seq[ReadyKey] = + result = newSeq[ReadyKey](MAX_EPOLL_EVENTS) + let count = selectInto(s, timeout, result) + result.setLen(count) + +template isEmpty*[T](s: Selector[T]): bool = + (s.count == 0) + +proc contains*[T](s: Selector[T], fd: SocketHandle|int): bool {.inline.} = + return s.fds[fd.int].ident != InvalidIdent + +proc getData*[T](s: Selector[T], fd: SocketHandle|int): var T = + let fdi = int(fd) + s.checkFd(fdi) + if fdi in s: + result = s.fds[fdi].data + +proc setData*[T](s: Selector[T], fd: SocketHandle|int, data: T): bool = + let fdi = int(fd) + s.checkFd(fdi) + if fdi in s: + s.fds[fdi].data = data + result = true + +template withData*[T](s: Selector[T], fd: SocketHandle|int, value, + body: untyped) = + mixin checkFd + let fdi = int(fd) + s.checkFd(fdi) + if fdi in s: + var value = addr(s.fds[fdi].data) + body + +template withData*[T](s: Selector[T], fd: SocketHandle|int, value, body1, + body2: untyped) = + mixin checkFd + let fdi = int(fd) + s.checkFd(fdi) + if fdi in s: + var value = addr(s.fds[fdi].data) + body1 + else: + body2 + +proc getFd*[T](s: Selector[T]): int = + return s.epollFD.int diff --git a/lib/pure/ioselects/ioselectors_kqueue.nim b/lib/pure/ioselects/ioselectors_kqueue.nim new file mode 100644 index 000000000..513578eda --- /dev/null +++ b/lib/pure/ioselects/ioselectors_kqueue.nim @@ -0,0 +1,639 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2016 Eugene Kabanov +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# This module implements BSD kqueue(). + +import std/[posix, times, kqueue, nativesockets] + +const + # Maximum number of events that can be returned. + MAX_KQUEUE_EVENTS = 64 + # SIG_IGN and SIG_DFL declared in posix.nim as variables, but we need them + # to be constants and GC-safe. + SIG_DFL = cast[proc(x: cint) {.noconv,gcsafe.}](0) + SIG_IGN = cast[proc(x: cint) {.noconv,gcsafe.}](1) + +when defined(kqcache): + const CACHE_EVENTS = true + +when defined(macosx) or defined(freebsd) or defined(dragonfly): + when defined(macosx): + const MAX_DESCRIPTORS_ID = 29 # KERN_MAXFILESPERPROC (MacOS) + else: + const MAX_DESCRIPTORS_ID = 27 # KERN_MAXFILESPERPROC (FreeBSD) + proc sysctl(name: ptr cint, namelen: cuint, oldp: pointer, oldplen: ptr csize_t, + newp: pointer, newplen: csize_t): cint + {.importc: "sysctl",header: """#include <sys/types.h> + #include <sys/sysctl.h>""".} +elif defined(netbsd) or defined(openbsd): + # OpenBSD and NetBSD don't have KERN_MAXFILESPERPROC, so we are using + # KERN_MAXFILES, because KERN_MAXFILES is always bigger, + # than KERN_MAXFILESPERPROC. + const MAX_DESCRIPTORS_ID = 7 # KERN_MAXFILES + proc sysctl(name: ptr cint, namelen: cuint, oldp: pointer, oldplen: ptr csize_t, + newp: pointer, newplen: csize_t): cint + {.importc: "sysctl",header: """#include <sys/param.h> + #include <sys/sysctl.h>""".} + +when hasThreadSupport: + type + SelectorImpl[T] = object + kqFD: cint + maxFD: int + changes: ptr SharedArray[KEvent] + fds: ptr SharedArray[SelectorKey[T]] + count*: int + changesLock: Lock + changesSize: int + changesLength: int + sock: cint + Selector*[T] = ptr SelectorImpl[T] +else: + type + SelectorImpl[T] = object + kqFD: cint + maxFD: int + changes: seq[KEvent] + fds: seq[SelectorKey[T]] + count*: int + sock: cint + Selector*[T] = ref SelectorImpl[T] + +type + SelectEventImpl = object + rfd: cint + wfd: cint + + SelectEvent* = ptr SelectEventImpl + # SelectEvent is declared as `ptr` to be placed in `shared memory`, + # so you can share one SelectEvent handle between threads. + +proc getUnique[T](s: Selector[T]): int {.inline.} = + # we create duplicated handles to get unique indexes for our `fds` array. + result = posix.fcntl(s.sock, F_DUPFD_CLOEXEC, s.sock) + if result == -1: + raiseIOSelectorsError(osLastError()) + +proc newSelector*[T](): owned(Selector[T]) = + var maxFD = 0.cint + var size = csize_t(sizeof(cint)) + var namearr = [1.cint, MAX_DESCRIPTORS_ID.cint] + # Obtain maximum number of opened file descriptors for process + if sysctl(addr(namearr[0]), 2, cast[pointer](addr maxFD), addr size, + nil, 0) != 0: + raiseIOSelectorsError(osLastError()) + + var kqFD = kqueue() + if kqFD < 0: + raiseIOSelectorsError(osLastError()) + + # we allocating empty socket to duplicate it handle in future, to get unique + # indexes for `fds` array. This is needed to properly identify + # {Event.Timer, Event.Signal, Event.Process} events. + let usock = createNativeSocket(posix.AF_INET, posix.SOCK_STREAM, + posix.IPPROTO_TCP).cint + if usock == -1: + let err = osLastError() + discard posix.close(kqFD) + raiseIOSelectorsError(err) + + when hasThreadSupport: + result = cast[Selector[T]](allocShared0(sizeof(SelectorImpl[T]))) + result.fds = allocSharedArray[SelectorKey[T]](maxFD) + result.changes = allocSharedArray[KEvent](MAX_KQUEUE_EVENTS) + result.changesSize = MAX_KQUEUE_EVENTS + initLock(result.changesLock) + else: + result = Selector[T]() + result.fds = newSeq[SelectorKey[T]](maxFD) + result.changes = newSeqOfCap[KEvent](MAX_KQUEUE_EVENTS) + + for i in 0 ..< maxFD: + result.fds[i].ident = InvalidIdent + + result.sock = usock + result.kqFD = kqFD + result.maxFD = maxFD.int + +proc close*[T](s: Selector[T]) = + let res1 = posix.close(s.kqFD) + let res2 = posix.close(s.sock) + when hasThreadSupport: + deinitLock(s.changesLock) + deallocSharedArray(s.fds) + deallocShared(cast[pointer](s)) + if res1 != 0 or res2 != 0: + raiseIOSelectorsError(osLastError()) + +proc newSelectEvent*(): SelectEvent = + var fds: array[2, cint] + if posix.pipe(fds) != 0: + raiseIOSelectorsError(osLastError()) + setNonBlocking(fds[0]) + setNonBlocking(fds[1]) + result = cast[SelectEvent](allocShared0(sizeof(SelectEventImpl))) + result.rfd = fds[0] + result.wfd = fds[1] + +proc trigger*(ev: SelectEvent) = + var data: uint64 = 1 + if posix.write(ev.wfd, addr data, sizeof(uint64)) != sizeof(uint64): + raiseIOSelectorsError(osLastError()) + +proc close*(ev: SelectEvent) = + let res1 = posix.close(ev.rfd) + let res2 = posix.close(ev.wfd) + deallocShared(cast[pointer](ev)) + if res1 != 0 or res2 != 0: + raiseIOSelectorsError(osLastError()) + +template checkFd(s, f) = + if f >= s.maxFD: + raiseIOSelectorsError("Maximum number of descriptors is exhausted!") + +when hasThreadSupport: + template withChangeLock[T](s: Selector[T], body: untyped) = + acquire(s.changesLock) + {.locks: [s.changesLock].}: + try: + body + finally: + release(s.changesLock) +else: + template withChangeLock(s, body: untyped) = + body + +when hasThreadSupport: + template modifyKQueue[T](s: Selector[T], nident: uint, nfilter: cshort, + nflags: cushort, nfflags: cuint, ndata: int, + nudata: pointer) = + mixin withChangeLock + s.withChangeLock(): + if s.changesLength == s.changesSize: + # if cache array is full, we allocating new with size * 2 + let newSize = s.changesSize shl 1 + let rdata = allocSharedArray[KEvent](newSize) + copyMem(rdata, s.changes, s.changesSize * sizeof(KEvent)) + s.changesSize = newSize + s.changes[s.changesLength] = KEvent(ident: nident, + filter: nfilter, flags: nflags, + fflags: nfflags, data: ndata, + udata: nudata) + inc(s.changesLength) + + when not declared(CACHE_EVENTS): + template flushKQueue[T](s: Selector[T]) = + mixin withChangeLock + s.withChangeLock(): + if s.changesLength > 0: + if kevent(s.kqFD, addr(s.changes[0]), cint(s.changesLength), + nil, 0, nil) == -1: + let res = osLastError() + if cint(res) != ENOENT: # ignore pipes whose read end is closed + raiseIOSelectorsError(res) + s.changesLength = 0 +else: + template modifyKQueue[T](s: Selector[T], nident: uint, nfilter: cshort, + nflags: cushort, nfflags: cuint, ndata: int, + nudata: pointer) = + s.changes.add(KEvent(ident: nident, + filter: nfilter, flags: nflags, + fflags: nfflags, data: ndata, + udata: nudata)) + + when not declared(CACHE_EVENTS): + template flushKQueue[T](s: Selector[T]) = + let length = cint(len(s.changes)) + if length > 0: + if kevent(s.kqFD, addr(s.changes[0]), length, + nil, 0, nil) == -1: + let res = osLastError() + if cint(res) != ENOENT: # ignore pipes whose read end is closed + raiseIOSelectorsError(res) + s.changes.setLen(0) + +proc registerHandle*[T](s: Selector[T], fd: int | SocketHandle, + events: set[Event], data: T) = + let fdi = int(fd) + s.checkFd(fdi) + doAssert(s.fds[fdi].ident == InvalidIdent) + s.setKey(fdi, events, 0, data) + + if events != {}: + if Event.Read in events: + modifyKQueue(s, uint(fdi), EVFILT_READ, EV_ADD, 0, 0, nil) + inc(s.count) + if Event.Write in events: + modifyKQueue(s, uint(fdi), EVFILT_WRITE, EV_ADD, 0, 0, nil) + inc(s.count) + + when not declared(CACHE_EVENTS): + flushKQueue(s) + +proc updateHandle*[T](s: Selector[T], fd: int | SocketHandle, + events: set[Event]) = + let maskEvents = {Event.Timer, Event.Signal, Event.Process, Event.Vnode, + Event.User, Event.Oneshot, Event.Error} + let fdi = int(fd) + s.checkFd(fdi) + var pkey = addr(s.fds[fdi]) + doAssert(pkey.ident != InvalidIdent, + "Descriptor $# is not registered in the queue!" % $fdi) + doAssert(pkey.events * maskEvents == {}) + + if pkey.events != events: + if (Event.Read in pkey.events) and (Event.Read notin events): + modifyKQueue(s, fdi.uint, EVFILT_READ, EV_DELETE, 0, 0, nil) + dec(s.count) + if (Event.Write in pkey.events) and (Event.Write notin events): + modifyKQueue(s, fdi.uint, EVFILT_WRITE, EV_DELETE, 0, 0, nil) + dec(s.count) + if (Event.Read notin pkey.events) and (Event.Read in events): + modifyKQueue(s, fdi.uint, EVFILT_READ, EV_ADD, 0, 0, nil) + inc(s.count) + if (Event.Write notin pkey.events) and (Event.Write in events): + modifyKQueue(s, fdi.uint, EVFILT_WRITE, EV_ADD, 0, 0, nil) + inc(s.count) + + when not declared(CACHE_EVENTS): + flushKQueue(s) + + pkey.events = events + +proc registerTimer*[T](s: Selector[T], timeout: int, oneshot: bool, + data: T): int {.discardable.} = + let fdi = getUnique(s) + s.checkFd(fdi) + doAssert(s.fds[fdi].ident == InvalidIdent) + + let events = if oneshot: {Event.Timer, Event.Oneshot} else: {Event.Timer} + let flags: cushort = if oneshot: EV_ONESHOT or EV_ADD else: EV_ADD + + s.setKey(fdi, events, 0, data) + + # EVFILT_TIMER on Open/Net(BSD) has granularity of only milliseconds, + # but MacOS and FreeBSD allow use `0` as `fflags` to use milliseconds + # too + modifyKQueue(s, fdi.uint, EVFILT_TIMER, flags, 0, cint(timeout), nil) + + when not declared(CACHE_EVENTS): + flushKQueue(s) + + inc(s.count) + result = fdi + +proc registerSignal*[T](s: Selector[T], signal: int, + data: T): int {.discardable.} = + let fdi = getUnique(s) + s.checkFd(fdi) + doAssert(s.fds[fdi].ident == InvalidIdent) + + s.setKey(fdi, {Event.Signal}, signal, data) + var nmask, omask: Sigset + discard sigemptyset(nmask) + discard sigemptyset(omask) + discard sigaddset(nmask, cint(signal)) + blockSignals(nmask, omask) + # to be compatible with linux semantic we need to "eat" signals + posix.signal(cint(signal), SIG_IGN) + + modifyKQueue(s, signal.uint, EVFILT_SIGNAL, EV_ADD, 0, 0, + cast[pointer](fdi)) + + when not declared(CACHE_EVENTS): + flushKQueue(s) + + inc(s.count) + result = fdi + +proc registerProcess*[T](s: Selector[T], pid: int, + data: T): int {.discardable.} = + let fdi = getUnique(s) + s.checkFd(fdi) + doAssert(s.fds[fdi].ident == InvalidIdent) + + var kflags: cushort = EV_ONESHOT or EV_ADD + setKey(s, fdi, {Event.Process, Event.Oneshot}, pid, data) + + modifyKQueue(s, pid.uint, EVFILT_PROC, kflags, NOTE_EXIT, 0, + cast[pointer](fdi)) + + when not declared(CACHE_EVENTS): + flushKQueue(s) + + inc(s.count) + result = fdi + +proc registerEvent*[T](s: Selector[T], ev: SelectEvent, data: T) = + let fdi = ev.rfd.int + doAssert(s.fds[fdi].ident == InvalidIdent, "Event is already registered in the queue!") + setKey(s, fdi, {Event.User}, 0, data) + + modifyKQueue(s, fdi.uint, EVFILT_READ, EV_ADD, 0, 0, nil) + + when not declared(CACHE_EVENTS): + flushKQueue(s) + + inc(s.count) + +template processVnodeEvents(events: set[Event]): cuint = + var rfflags = 0.cuint + if events == {Event.VnodeWrite, Event.VnodeDelete, Event.VnodeExtend, + Event.VnodeAttrib, Event.VnodeLink, Event.VnodeRename, + Event.VnodeRevoke}: + rfflags = NOTE_DELETE or NOTE_WRITE or NOTE_EXTEND or NOTE_ATTRIB or + NOTE_LINK or NOTE_RENAME or NOTE_REVOKE + else: + if Event.VnodeDelete in events: rfflags = rfflags or NOTE_DELETE + if Event.VnodeWrite in events: rfflags = rfflags or NOTE_WRITE + if Event.VnodeExtend in events: rfflags = rfflags or NOTE_EXTEND + if Event.VnodeAttrib in events: rfflags = rfflags or NOTE_ATTRIB + if Event.VnodeLink in events: rfflags = rfflags or NOTE_LINK + if Event.VnodeRename in events: rfflags = rfflags or NOTE_RENAME + if Event.VnodeRevoke in events: rfflags = rfflags or NOTE_REVOKE + rfflags + +proc registerVnode*[T](s: Selector[T], fd: cint, events: set[Event], data: T) = + let fdi = fd.int + setKey(s, fdi, {Event.Vnode} + events, 0, data) + var fflags = processVnodeEvents(events) + + modifyKQueue(s, fdi.uint, EVFILT_VNODE, EV_ADD or EV_CLEAR, fflags, 0, nil) + + when not declared(CACHE_EVENTS): + flushKQueue(s) + + inc(s.count) + +proc unregister*[T](s: Selector[T], fd: int|SocketHandle) = + let fdi = int(fd) + s.checkFd(fdi) + var pkey = addr(s.fds[fdi]) + doAssert(pkey.ident != InvalidIdent, + "Descriptor [" & $fdi & "] is not registered in the queue!") + + if pkey.events != {}: + if pkey.events * {Event.Read, Event.Write} != {}: + if Event.Read in pkey.events: + modifyKQueue(s, uint(fdi), EVFILT_READ, EV_DELETE, 0, 0, nil) + dec(s.count) + if Event.Write in pkey.events: + modifyKQueue(s, uint(fdi), EVFILT_WRITE, EV_DELETE, 0, 0, nil) + dec(s.count) + when not declared(CACHE_EVENTS): + flushKQueue(s) + elif Event.Timer in pkey.events: + if Event.Finished notin pkey.events: + modifyKQueue(s, uint(fdi), EVFILT_TIMER, EV_DELETE, 0, 0, nil) + when not declared(CACHE_EVENTS): + flushKQueue(s) + dec(s.count) + if posix.close(cint(pkey.ident)) != 0: + raiseIOSelectorsError(osLastError()) + elif Event.Signal in pkey.events: + var nmask, omask: Sigset + let signal = cint(pkey.param) + discard sigemptyset(nmask) + discard sigemptyset(omask) + discard sigaddset(nmask, signal) + unblockSignals(nmask, omask) + posix.signal(signal, SIG_DFL) + modifyKQueue(s, uint(pkey.param), EVFILT_SIGNAL, EV_DELETE, 0, 0, nil) + when not declared(CACHE_EVENTS): + flushKQueue(s) + dec(s.count) + if posix.close(cint(pkey.ident)) != 0: + raiseIOSelectorsError(osLastError()) + elif Event.Process in pkey.events: + if Event.Finished notin pkey.events: + modifyKQueue(s, uint(pkey.param), EVFILT_PROC, EV_DELETE, 0, 0, nil) + when not declared(CACHE_EVENTS): + flushKQueue(s) + dec(s.count) + if posix.close(cint(pkey.ident)) != 0: + raiseIOSelectorsError(osLastError()) + elif Event.Vnode in pkey.events: + modifyKQueue(s, uint(fdi), EVFILT_VNODE, EV_DELETE, 0, 0, nil) + when not declared(CACHE_EVENTS): + flushKQueue(s) + dec(s.count) + elif Event.User in pkey.events: + modifyKQueue(s, uint(fdi), EVFILT_READ, EV_DELETE, 0, 0, nil) + when not declared(CACHE_EVENTS): + flushKQueue(s) + dec(s.count) + + clearKey(pkey) + +proc unregister*[T](s: Selector[T], ev: SelectEvent) = + let fdi = int(ev.rfd) + s.checkFd(fdi) + var pkey = addr(s.fds[fdi]) + doAssert(pkey.ident != InvalidIdent, "Event is not registered in the queue!") + doAssert(Event.User in pkey.events) + modifyKQueue(s, uint(fdi), EVFILT_READ, EV_DELETE, 0, 0, nil) + when not declared(CACHE_EVENTS): + flushKQueue(s) + clearKey(pkey) + dec(s.count) + +proc selectInto*[T](s: Selector[T], timeout: int, + results: var openArray[ReadyKey]): int = + var + tv: Timespec + resTable: array[MAX_KQUEUE_EVENTS, KEvent] + ptv = addr tv + maxres = MAX_KQUEUE_EVENTS + + verifySelectParams(timeout) + + if timeout != -1: + if timeout >= 1000: + tv.tv_sec = posix.Time(timeout div 1_000) + tv.tv_nsec = (timeout %% 1_000) * 1_000_000 + else: + tv.tv_sec = posix.Time(0) + tv.tv_nsec = timeout * 1_000_000 + else: + ptv = nil + + if maxres > len(results): + maxres = len(results) + + var count = 0 + when not declared(CACHE_EVENTS): + count = kevent(s.kqFD, nil, cint(0), addr(resTable[0]), cint(maxres), ptv) + else: + when hasThreadSupport: + s.withChangeLock(): + if s.changesLength > 0: + count = kevent(s.kqFD, addr(s.changes[0]), cint(s.changesLength), + addr(resTable[0]), cint(maxres), ptv) + s.changesLength = 0 + else: + count = kevent(s.kqFD, nil, cint(0), addr(resTable[0]), cint(maxres), + ptv) + else: + let length = cint(len(s.changes)) + if length > 0: + count = kevent(s.kqFD, addr(s.changes[0]), length, + addr(resTable[0]), cint(maxres), ptv) + s.changes.setLen(0) + else: + count = kevent(s.kqFD, nil, cint(0), addr(resTable[0]), cint(maxres), + ptv) + + if count < 0: + result = 0 + let err = osLastError() + if cint(err) != EINTR: + raiseIOSelectorsError(err) + elif count == 0: + result = 0 + else: + var i = 0 + var k = 0 # do not delete this, because `continue` used in cycle. + var pkey: ptr SelectorKey[T] + while i < count: + let kevent = addr(resTable[i]) + var rkey = ReadyKey(fd: int(kevent.ident), events: {}) + + if (kevent.flags and EV_ERROR) != 0: + rkey.events = {Event.Error} + rkey.errorCode = OSErrorCode(kevent.data) + + case kevent.filter: + of EVFILT_READ: + pkey = addr(s.fds[int(kevent.ident)]) + rkey.events.incl(Event.Read) + if Event.User in pkey.events: + var data: uint64 = 0 + if posix.read(cint(kevent.ident), addr data, + sizeof(uint64)) != sizeof(uint64): + let err = osLastError() + if err == OSErrorCode(EAGAIN): + # someone already consumed event data + inc(i) + continue + else: + raiseIOSelectorsError(err) + rkey.events = {Event.User} + of EVFILT_WRITE: + pkey = addr(s.fds[int(kevent.ident)]) + rkey.events.incl(Event.Write) + rkey.events = {Event.Write} + of EVFILT_TIMER: + pkey = addr(s.fds[int(kevent.ident)]) + if Event.Oneshot in pkey.events: + # we will not clear key until it will be unregistered, so + # application can obtain data, but we will decrease counter, + # because kqueue is empty. + dec(s.count) + # we are marking key with `Finished` event, to avoid double decrease. + pkey.events.incl(Event.Finished) + rkey.events.incl(Event.Timer) + of EVFILT_VNODE: + pkey = addr(s.fds[int(kevent.ident)]) + rkey.events.incl(Event.Vnode) + if (kevent.fflags and NOTE_DELETE) != 0: + rkey.events.incl(Event.VnodeDelete) + if (kevent.fflags and NOTE_WRITE) != 0: + rkey.events.incl(Event.VnodeWrite) + if (kevent.fflags and NOTE_EXTEND) != 0: + rkey.events.incl(Event.VnodeExtend) + if (kevent.fflags and NOTE_ATTRIB) != 0: + rkey.events.incl(Event.VnodeAttrib) + if (kevent.fflags and NOTE_LINK) != 0: + rkey.events.incl(Event.VnodeLink) + if (kevent.fflags and NOTE_RENAME) != 0: + rkey.events.incl(Event.VnodeRename) + if (kevent.fflags and NOTE_REVOKE) != 0: + rkey.events.incl(Event.VnodeRevoke) + of EVFILT_SIGNAL: + pkey = addr(s.fds[cast[int](kevent.udata)]) + rkey.fd = cast[int](kevent.udata) + rkey.events.incl(Event.Signal) + of EVFILT_PROC: + rkey.fd = cast[int](kevent.udata) + pkey = addr(s.fds[cast[int](kevent.udata)]) + # we will not clear key, until it will be unregistered, so + # application can obtain data, but we will decrease counter, + # because kqueue is empty. + dec(s.count) + # we are marking key with `Finished` event, to avoid double decrease. + pkey.events.incl(Event.Finished) + rkey.events.incl(Event.Process) + else: + doAssert(true, "Unsupported kqueue filter in the queue!") + + if (kevent.flags and EV_EOF) != 0: + # TODO this error handling needs to be rethought. + # `fflags` can sometimes be `0x80000000` and thus we use 'cast' + # here: + if kevent.fflags != 0: + rkey.errorCode = cast[OSErrorCode](kevent.fflags) + else: + # This assumes we are dealing with sockets. + # TODO: For future-proofing it might be a good idea to give the + # user access to the raw `kevent`. + rkey.errorCode = OSErrorCode(ECONNRESET) + rkey.events.incl(Event.Error) + + results[k] = rkey + inc(k) + inc(i) + result = k + +proc select*[T](s: Selector[T], timeout: int): seq[ReadyKey] = + result = newSeq[ReadyKey](MAX_KQUEUE_EVENTS) + let count = selectInto(s, timeout, result) + result.setLen(count) + +template isEmpty*[T](s: Selector[T]): bool = + (s.count == 0) + +proc contains*[T](s: Selector[T], fd: SocketHandle|int): bool {.inline.} = + return s.fds[fd.int].ident != InvalidIdent + +proc getData*[T](s: Selector[T], fd: SocketHandle|int): var T = + let fdi = int(fd) + s.checkFd(fdi) + if fdi in s: + result = s.fds[fdi].data + +proc setData*[T](s: Selector[T], fd: SocketHandle|int, data: T): bool = + let fdi = int(fd) + s.checkFd(fdi) + if fdi in s: + s.fds[fdi].data = data + result = true + +template withData*[T](s: Selector[T], fd: SocketHandle|int, value, + body: untyped) = + mixin checkFd + let fdi = int(fd) + s.checkFd(fdi) + if fdi in s: + var value = addr(s.fds[fdi].data) + body + +template withData*[T](s: Selector[T], fd: SocketHandle|int, value, body1, + body2: untyped) = + mixin checkFd + let fdi = int(fd) + s.checkFd(fdi) + if fdi in s: + var value = addr(s.fds[fdi].data) + body1 + else: + body2 + + +proc getFd*[T](s: Selector[T]): int = + return s.kqFD.int diff --git a/lib/pure/ioselects/ioselectors_poll.nim b/lib/pure/ioselects/ioselectors_poll.nim new file mode 100644 index 000000000..7c5347156 --- /dev/null +++ b/lib/pure/ioselects/ioselectors_poll.nim @@ -0,0 +1,324 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2016 Eugene Kabanov +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# This module implements Posix poll(). + +import std/[posix, times] + +# Maximum number of events that can be returned +const MAX_POLL_EVENTS = 64 + +const hasEventFds = defined(zephyr) or defined(nimPollHasEventFds) + +when hasEventFds: + proc eventfd(count: cuint, flags: cint): cint + {.cdecl, importc: "eventfd", header: "<sys/eventfd.h>".} + +when hasThreadSupport: + type + SelectorImpl[T] = object + maxFD : int + pollcnt: int + fds: ptr SharedArray[SelectorKey[T]] + pollfds: ptr SharedArray[TPollFd] + count*: int + lock: Lock + Selector*[T] = ptr SelectorImpl[T] +else: + type + SelectorImpl[T] = object + maxFD : int + pollcnt: int + fds: seq[SelectorKey[T]] + pollfds: seq[TPollFd] + count*: int + Selector*[T] = ref SelectorImpl[T] + +type + SelectEventImpl = object + rfd: cint + wfd: cint + SelectEvent* = ptr SelectEventImpl + +when hasThreadSupport: + template withPollLock[T](s: Selector[T], body: untyped) = + acquire(s.lock) + {.locks: [s.lock].}: + try: + body + finally: + release(s.lock) +else: + template withPollLock(s, body: untyped) = + body + +proc newSelector*[T](): Selector[T] = + var maxFD = maxDescriptors() + + when hasThreadSupport: + result = cast[Selector[T]](allocShared0(sizeof(SelectorImpl[T]))) + result.maxFD = maxFD + result.fds = allocSharedArray[SelectorKey[T]](maxFD) + result.pollfds = allocSharedArray[TPollFd](maxFD) + initLock(result.lock) + else: + result = Selector[T]() + result.maxFD = maxFD + result.fds = newSeq[SelectorKey[T]](maxFD) + result.pollfds = newSeq[TPollFd](maxFD) + + for i in 0 ..< maxFD: + result.fds[i].ident = InvalidIdent + +proc close*[T](s: Selector[T]) = + when hasThreadSupport: + deinitLock(s.lock) + deallocSharedArray(s.fds) + deallocSharedArray(s.pollfds) + deallocShared(cast[pointer](s)) + +template pollAdd[T](s: Selector[T], sock: cint, events: set[Event]) = + withPollLock(s): + var pollev: cshort = 0 + if Event.Read in events: pollev = pollev or POLLIN + if Event.Write in events: pollev = pollev or POLLOUT + s.pollfds[s.pollcnt].fd = cint(sock) + s.pollfds[s.pollcnt].events = pollev + inc(s.count) + inc(s.pollcnt) + +template pollUpdate[T](s: Selector[T], sock: cint, events: set[Event]) = + withPollLock(s): + var i = 0 + var pollev: cshort = 0 + if Event.Read in events: pollev = pollev or POLLIN + if Event.Write in events: pollev = pollev or POLLOUT + + while i < s.pollcnt: + if s.pollfds[i].fd == sock: + s.pollfds[i].events = pollev + break + inc(i) + doAssert(i < s.pollcnt, + "Descriptor [" & $sock & "] is not registered in the queue!") + +template pollRemove[T](s: Selector[T], sock: cint) = + withPollLock(s): + var i = 0 + while i < s.pollcnt: + if s.pollfds[i].fd == sock: + if i == s.pollcnt - 1: + s.pollfds[i].fd = 0 + s.pollfds[i].events = 0 + s.pollfds[i].revents = 0 + else: + while i < (s.pollcnt - 1): + s.pollfds[i].fd = s.pollfds[i + 1].fd + s.pollfds[i].events = s.pollfds[i + 1].events + inc(i) + break + inc(i) + dec(s.pollcnt) + dec(s.count) + +template checkFd(s, f) = + if f >= s.maxFD: + raiseIOSelectorsError("Maximum number of descriptors is exhausted!") + +proc registerHandle*[T](s: Selector[T], fd: int | SocketHandle, + events: set[Event], data: T) = + var fdi = int(fd) + s.checkFd(fdi) + doAssert(s.fds[fdi].ident == InvalidIdent) + setKey(s, fdi, events, 0, data) + if events != {}: s.pollAdd(fdi.cint, events) + +proc updateHandle*[T](s: Selector[T], fd: int | SocketHandle, + events: set[Event]) = + let maskEvents = {Event.Timer, Event.Signal, Event.Process, Event.Vnode, + Event.User, Event.Oneshot, Event.Error} + let fdi = int(fd) + s.checkFd(fdi) + var pkey = addr(s.fds[fdi]) + doAssert(pkey.ident != InvalidIdent, + "Descriptor [" & $fdi & "] is not registered in the queue!") + doAssert(pkey.events * maskEvents == {}) + + if pkey.events != events: + if pkey.events == {}: + s.pollAdd(fd.cint, events) + else: + if events != {}: + s.pollUpdate(fd.cint, events) + else: + s.pollRemove(fd.cint) + pkey.events = events + +proc registerEvent*[T](s: Selector[T], ev: SelectEvent, data: T) = + var fdi = int(ev.rfd) + doAssert(s.fds[fdi].ident == InvalidIdent, "Event is already registered in the queue!") + var events = {Event.User} + setKey(s, fdi, events, 0, data) + events.incl(Event.Read) + s.pollAdd(fdi.cint, events) + +proc unregister*[T](s: Selector[T], fd: int|SocketHandle) = + let fdi = int(fd) + s.checkFd(fdi) + var pkey = addr(s.fds[fdi]) + doAssert(pkey.ident != InvalidIdent, + "Descriptor [" & $fdi & "] is not registered in the queue!") + pkey.ident = InvalidIdent + if pkey.events != {}: + pkey.events = {} + s.pollRemove(fdi.cint) + +proc unregister*[T](s: Selector[T], ev: SelectEvent) = + let fdi = int(ev.rfd) + s.checkFd(fdi) + var pkey = addr(s.fds[fdi]) + doAssert(pkey.ident != InvalidIdent, "Event is not registered in the queue!") + doAssert(Event.User in pkey.events) + pkey.ident = InvalidIdent + pkey.events = {} + s.pollRemove(fdi.cint) + +proc newSelectEvent*(): SelectEvent = + when not hasEventFds: + var fds: array[2, cint] + if posix.pipe(fds) != 0: + raiseIOSelectorsError(osLastError()) + setNonBlocking(fds[0]) + setNonBlocking(fds[1]) + result = cast[SelectEvent](allocShared0(sizeof(SelectEventImpl))) + result.rfd = fds[0] + result.wfd = fds[1] + else: + let fdci = eventfd(0, posix.O_NONBLOCK) + if fdci == -1: + raiseIOSelectorsError(osLastError()) + result = cast[SelectEvent](allocShared0(sizeof(SelectEventImpl))) + result.rfd = fdci + result.wfd = fdci + +proc trigger*(ev: SelectEvent) = + var data: uint64 = 1 + if posix.write(ev.wfd, addr data, sizeof(uint64)) != sizeof(uint64): + raiseIOSelectorsError(osLastError()) + +proc close*(ev: SelectEvent) = + let res1 = posix.close(ev.rfd) + let res2 = + when hasEventFds: 0 + else: posix.close(ev.wfd) + + deallocShared(cast[pointer](ev)) + if res1 != 0 or res2 != 0: + raiseIOSelectorsError(osLastError()) + +proc selectInto*[T](s: Selector[T], timeout: int, + results: var openArray[ReadyKey]): int = + var maxres = MAX_POLL_EVENTS + if maxres > len(results): + maxres = len(results) + + verifySelectParams(timeout) + + s.withPollLock(): + let count = posix.poll(addr(s.pollfds[0]), Tnfds(s.pollcnt), timeout) + if count < 0: + result = 0 + let err = osLastError() + if cint(err) != EINTR: + raiseIOSelectorsError(err) + elif count == 0: + result = 0 + else: + var i = 0 + var k = 0 + var rindex = 0 + while (i < s.pollcnt) and (k < count) and (rindex < maxres): + let revents = s.pollfds[i].revents + if revents != 0: + let fd = s.pollfds[i].fd + var pkey = addr(s.fds[fd]) + var rkey = ReadyKey(fd: int(fd), events: {}) + + if (revents and POLLIN) != 0: + rkey.events.incl(Event.Read) + if Event.User in pkey.events: + var data: uint64 = 0 + if posix.read(fd, addr data, sizeof(uint64)) != sizeof(uint64): + let err = osLastError() + if err != OSErrorCode(EAGAIN): + raiseIOSelectorsError(err) + else: + # someone already consumed event data + inc(i) + continue + rkey.events = {Event.User} + if (revents and POLLOUT) != 0: + rkey.events.incl(Event.Write) + if (revents and POLLERR) != 0 or (revents and POLLHUP) != 0 or + (revents and POLLNVAL) != 0: + rkey.events.incl(Event.Error) + results[rindex] = rkey + s.pollfds[i].revents = 0 + inc(rindex) + inc(k) + inc(i) + result = k + +proc select*[T](s: Selector[T], timeout: int): seq[ReadyKey] = + result = newSeq[ReadyKey](MAX_POLL_EVENTS) + let count = selectInto(s, timeout, result) + result.setLen(count) + +template isEmpty*[T](s: Selector[T]): bool = + (s.count == 0) + +proc contains*[T](s: Selector[T], fd: SocketHandle|int): bool {.inline.} = + return s.fds[fd.int].ident != InvalidIdent + +proc getData*[T](s: Selector[T], fd: SocketHandle|int): var T = + let fdi = int(fd) + s.checkFd(fdi) + if fdi in s: + result = s.fds[fdi].data + +proc setData*[T](s: Selector[T], fd: SocketHandle|int, data: T): bool = + let fdi = int(fd) + s.checkFd(fdi) + if fdi in s: + s.fds[fdi].data = data + result = true + +template withData*[T](s: Selector[T], fd: SocketHandle|int, value, + body: untyped) = + mixin checkFd + let fdi = int(fd) + s.checkFd(fdi) + if fdi in s: + var value = addr(s.getData(fdi)) + body + +template withData*[T](s: Selector[T], fd: SocketHandle|int, value, body1, + body2: untyped) = + mixin checkFd + let fdi = int(fd) + s.checkFd(fdi) + if fdi in s: + var value = addr(s.getData(fdi)) + body1 + else: + body2 + + +proc getFd*[T](s: Selector[T]): int = + return -1 diff --git a/lib/pure/ioselects/ioselectors_select.nim b/lib/pure/ioselects/ioselectors_select.nim new file mode 100644 index 000000000..6c516395b --- /dev/null +++ b/lib/pure/ioselects/ioselectors_select.nim @@ -0,0 +1,454 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2016 Eugene Kabanov +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# This module implements Posix and Windows select(). + +import std/[times, nativesockets] + +when defined(windows): + import std/winlean + when defined(gcc): + {.passl: "-lws2_32".} + elif defined(vcc): + {.passl: "ws2_32.lib".} + const platformHeaders = """#include <winsock2.h> + #include <windows.h>""" + const EAGAIN = WSAEWOULDBLOCK +else: + const platformHeaders = """#include <sys/select.h> + #include <sys/time.h> + #include <sys/types.h> + #include <unistd.h>""" +type + FdSet {.importc: "fd_set", header: platformHeaders, pure, final.} = object +var + FD_SETSIZE {.importc: "FD_SETSIZE", header: platformHeaders.}: cint + +proc IOFD_SET(fd: SocketHandle, fdset: ptr FdSet) + {.cdecl, importc: "FD_SET", header: platformHeaders, inline.} +proc IOFD_CLR(fd: SocketHandle, fdset: ptr FdSet) + {.cdecl, importc: "FD_CLR", header: platformHeaders, inline.} +proc IOFD_ZERO(fdset: ptr FdSet) + {.cdecl, importc: "FD_ZERO", header: platformHeaders, inline.} + +when defined(windows): + proc IOFD_ISSET(fd: SocketHandle, fdset: ptr FdSet): cint + {.stdcall, importc: "FD_ISSET", header: platformHeaders, inline.} + proc ioselect(nfds: cint, readFds, writeFds, exceptFds: ptr FdSet, + timeout: ptr Timeval): cint + {.stdcall, importc: "select", header: platformHeaders.} +else: + proc IOFD_ISSET(fd: SocketHandle, fdset: ptr FdSet): cint + {.cdecl, importc: "FD_ISSET", header: platformHeaders, inline.} + proc ioselect(nfds: cint, readFds, writeFds, exceptFds: ptr FdSet, + timeout: ptr Timeval): cint + {.cdecl, importc: "select", header: platformHeaders.} + +when hasThreadSupport: + type + SelectorImpl[T] = object + rSet: FdSet + wSet: FdSet + eSet: FdSet + maxFD: int + fds: ptr SharedArray[SelectorKey[T]] + count*: int + lock: Lock + Selector*[T] = ptr SelectorImpl[T] +else: + type + SelectorImpl[T] = object + rSet: FdSet + wSet: FdSet + eSet: FdSet + maxFD: int + fds: seq[SelectorKey[T]] + count*: int + Selector*[T] = ref SelectorImpl[T] + +type + SelectEventImpl = object + rsock: SocketHandle + wsock: SocketHandle + SelectEvent* = ptr SelectEventImpl + +when hasThreadSupport: + template withSelectLock[T](s: Selector[T], body: untyped) = + acquire(s.lock) + {.locks: [s.lock].}: + try: + body + finally: + release(s.lock) +else: + template withSelectLock[T](s: Selector[T], body: untyped) = + body + +proc newSelector*[T](): Selector[T] = + when hasThreadSupport: + result = cast[Selector[T]](allocShared0(sizeof(SelectorImpl[T]))) + result.fds = allocSharedArray[SelectorKey[T]](FD_SETSIZE) + initLock result.lock + else: + result = Selector[T]() + result.fds = newSeq[SelectorKey[T]](FD_SETSIZE) + + for i in 0 ..< FD_SETSIZE: + result.fds[i].ident = InvalidIdent + + IOFD_ZERO(addr result.rSet) + IOFD_ZERO(addr result.wSet) + IOFD_ZERO(addr result.eSet) + +proc close*[T](s: Selector[T]) = + when hasThreadSupport: + deallocSharedArray(s.fds) + deallocShared(cast[pointer](s)) + deinitLock(s.lock) + +when defined(windows): + proc newSelectEvent*(): SelectEvent = + var ssock = createNativeSocket() + var wsock = createNativeSocket() + var rsock: SocketHandle = INVALID_SOCKET + var saddr = Sockaddr_in() + + saddr.sin_family = winlean.AF_INET + saddr.sin_port = 0 + saddr.sin_addr.s_addr = INADDR_ANY + if bindAddr(ssock, cast[ptr SockAddr](addr(saddr)), + sizeof(saddr).SockLen) < 0'i32: + raiseIOSelectorsError(osLastError()) + + if winlean.listen(ssock, 1) != 0: + raiseIOSelectorsError(osLastError()) + + var namelen = sizeof(saddr).SockLen + if getsockname(ssock, cast[ptr SockAddr](addr(saddr)), + addr(namelen)) != 0'i32: + raiseIOSelectorsError(osLastError()) + + saddr.sin_addr.s_addr = 0x0100007F + if winlean.connect(wsock, cast[ptr SockAddr](addr(saddr)), + sizeof(saddr).SockLen) != 0: + raiseIOSelectorsError(osLastError()) + namelen = sizeof(saddr).SockLen + rsock = winlean.accept(ssock, cast[ptr SockAddr](addr(saddr)), + cast[ptr SockLen](addr(namelen))) + if rsock == SocketHandle(-1): + raiseIOSelectorsError(osLastError()) + + if winlean.closesocket(ssock) != 0: + raiseIOSelectorsError(osLastError()) + + var mode = clong(1) + if ioctlsocket(rsock, FIONBIO, addr(mode)) != 0: + raiseIOSelectorsError(osLastError()) + mode = clong(1) + if ioctlsocket(wsock, FIONBIO, addr(mode)) != 0: + raiseIOSelectorsError(osLastError()) + + result = cast[SelectEvent](allocShared0(sizeof(SelectEventImpl))) + result.rsock = rsock + result.wsock = wsock + + proc trigger*(ev: SelectEvent) = + var data: uint64 = 1 + if winlean.send(ev.wsock, cast[pointer](addr data), + cint(sizeof(uint64)), 0) != sizeof(uint64): + raiseIOSelectorsError(osLastError()) + + proc close*(ev: SelectEvent) = + let res1 = winlean.closesocket(ev.rsock) + let res2 = winlean.closesocket(ev.wsock) + deallocShared(cast[pointer](ev)) + if res1 != 0 or res2 != 0: + raiseIOSelectorsError(osLastError()) + +else: + proc newSelectEvent*(): SelectEvent = + var fds: array[2, cint] + if posix.pipe(fds) != 0: + raiseIOSelectorsError(osLastError()) + setNonBlocking(fds[0]) + setNonBlocking(fds[1]) + result = cast[SelectEvent](allocShared0(sizeof(SelectEventImpl))) + result.rsock = SocketHandle(fds[0]) + result.wsock = SocketHandle(fds[1]) + + proc trigger*(ev: SelectEvent) = + var data: uint64 = 1 + if posix.write(cint(ev.wsock), addr data, sizeof(uint64)) != sizeof(uint64): + raiseIOSelectorsError(osLastError()) + + proc close*(ev: SelectEvent) = + let res1 = posix.close(cint(ev.rsock)) + let res2 = posix.close(cint(ev.wsock)) + deallocShared(cast[pointer](ev)) + if res1 != 0 or res2 != 0: + raiseIOSelectorsError(osLastError()) + +proc setSelectKey[T](s: Selector[T], fd: SocketHandle, events: set[Event], + data: T) = + var i = 0 + let fdi = int(fd) + while i < FD_SETSIZE: + if s.fds[i].ident == InvalidIdent: + var pkey = addr(s.fds[i]) + pkey.ident = fdi + pkey.events = events + pkey.data = data + break + inc(i) + if i >= FD_SETSIZE: + raiseIOSelectorsError("Maximum number of descriptors is exhausted!") + +proc getKey[T](s: Selector[T], fd: SocketHandle): ptr SelectorKey[T] = + var i = 0 + let fdi = int(fd) + while i < FD_SETSIZE: + if s.fds[i].ident == fdi: + result = addr(s.fds[i]) + break + inc(i) + doAssert(i < FD_SETSIZE, + "Descriptor [" & $int(fd) & "] is not registered in the queue!") + +proc delKey[T](s: Selector[T], fd: SocketHandle) = + var empty: T + var i = 0 + while i < FD_SETSIZE: + if s.fds[i].ident == fd.int: + s.fds[i].ident = InvalidIdent + s.fds[i].events = {} + s.fds[i].data = empty + break + inc(i) + doAssert(i < FD_SETSIZE, + "Descriptor [" & $int(fd) & "] is not registered in the queue!") + +proc registerHandle*[T](s: Selector[T], fd: int | SocketHandle, + events: set[Event], data: T) = + when not defined(windows): + let fdi = int(fd) + s.withSelectLock(): + s.setSelectKey(fd, events, data) + when not defined(windows): + if fdi > s.maxFD: s.maxFD = fdi + if Event.Read in events: + IOFD_SET(fd, addr s.rSet) + inc(s.count) + if Event.Write in events: + IOFD_SET(fd, addr s.wSet) + IOFD_SET(fd, addr s.eSet) + inc(s.count) + +proc registerEvent*[T](s: Selector[T], ev: SelectEvent, data: T) = + when not defined(windows): + let fdi = int(ev.rsock) + s.withSelectLock(): + s.setSelectKey(ev.rsock, {Event.User}, data) + when not defined(windows): + if fdi > s.maxFD: s.maxFD = fdi + IOFD_SET(ev.rsock, addr s.rSet) + inc(s.count) + +proc updateHandle*[T](s: Selector[T], fd: int | SocketHandle, + events: set[Event]) = + let maskEvents = {Event.Timer, Event.Signal, Event.Process, Event.Vnode, + Event.User, Event.Oneshot, Event.Error} + s.withSelectLock(): + var pkey = s.getKey(fd) + doAssert(pkey.events * maskEvents == {}) + if pkey.events != events: + if (Event.Read in pkey.events) and (Event.Read notin events): + IOFD_CLR(fd, addr s.rSet) + dec(s.count) + if (Event.Write in pkey.events) and (Event.Write notin events): + IOFD_CLR(fd, addr s.wSet) + IOFD_CLR(fd, addr s.eSet) + dec(s.count) + if (Event.Read notin pkey.events) and (Event.Read in events): + IOFD_SET(fd, addr s.rSet) + inc(s.count) + if (Event.Write notin pkey.events) and (Event.Write in events): + IOFD_SET(fd, addr s.wSet) + IOFD_SET(fd, addr s.eSet) + inc(s.count) + pkey.events = events + +proc unregister*[T](s: Selector[T], fd: SocketHandle|int) = + s.withSelectLock(): + let fd = fd.SocketHandle + var pkey = s.getKey(fd) + if Event.Read in pkey.events or Event.User in pkey.events: + IOFD_CLR(fd, addr s.rSet) + dec(s.count) + if Event.Write in pkey.events: + IOFD_CLR(fd, addr s.wSet) + IOFD_CLR(fd, addr s.eSet) + dec(s.count) + s.delKey(fd) + +proc unregister*[T](s: Selector[T], ev: SelectEvent) = + let fd = ev.rsock + s.withSelectLock(): + var pkey = s.getKey(fd) + IOFD_CLR(fd, addr s.rSet) + dec(s.count) + s.delKey(fd) + +proc selectInto*[T](s: Selector[T], timeout: int, + results: var openArray[ReadyKey]): int = + var tv = Timeval() + var ptv = addr tv + var rset, wset, eset: FdSet + + verifySelectParams(timeout) + + if timeout != -1: + when defined(genode) or defined(freertos) or defined(zephyr) or defined(nuttx): + tv.tv_sec = posix.Time(timeout div 1_000) + else: + tv.tv_sec = timeout.int32 div 1_000 + tv.tv_usec = (timeout.int32 %% 1_000) * 1_000 + else: + ptv = nil + + s.withSelectLock(): + rset = s.rSet + wset = s.wSet + eset = s.eSet + + var count = ioselect(cint(s.maxFD) + 1, addr(rset), addr(wset), + addr(eset), ptv) + if count < 0: + result = 0 + when defined(windows): + raiseIOSelectorsError(osLastError()) + else: + let err = osLastError() + if cint(err) != EINTR: + raiseIOSelectorsError(err) + elif count == 0: + result = 0 + else: + var rindex = 0 + var i = 0 + var k = 0 + + while (i < FD_SETSIZE) and (k < count): + if s.fds[i].ident != InvalidIdent: + var flag = false + var pkey = addr(s.fds[i]) + var rkey = ReadyKey(fd: int(pkey.ident), events: {}) + let fd = SocketHandle(pkey.ident) + if IOFD_ISSET(fd, addr rset) != 0: + if Event.User in pkey.events: + var data: uint64 = 0 + if recv(fd, cast[pointer](addr(data)), + sizeof(uint64).cint, 0) != sizeof(uint64): + let err = osLastError() + if cint(err) != EAGAIN: + raiseIOSelectorsError(err) + else: + inc(i) + inc(k) + continue + else: + flag = true + rkey.events = {Event.User} + else: + flag = true + rkey.events = {Event.Read} + if IOFD_ISSET(fd, addr wset) != 0: + rkey.events.incl(Event.Write) + if IOFD_ISSET(fd, addr eset) != 0: + rkey.events.incl(Event.Error) + flag = true + if flag: + results[rindex] = rkey + inc(rindex) + inc(k) + inc(i) + result = rindex + +proc select*[T](s: Selector[T], timeout: int): seq[ReadyKey] = + result = newSeq[ReadyKey](FD_SETSIZE) + var count = selectInto(s, timeout, result) + result.setLen(count) + +proc flush*[T](s: Selector[T]) = discard + +template isEmpty*[T](s: Selector[T]): bool = + (s.count == 0) + +proc contains*[T](s: Selector[T], fd: SocketHandle|int): bool {.inline.} = + s.withSelectLock(): + result = false + + let fdi = int(fd) + for i in 0..<FD_SETSIZE: + if s.fds[i].ident == fdi: + return true + +proc getData*[T](s: Selector[T], fd: SocketHandle|int): var T = + s.withSelectLock(): + let fdi = int(fd) + for i in 0..<FD_SETSIZE: + if s.fds[i].ident == fdi: + return s.fds[i].data + +proc setData*[T](s: Selector[T], fd: SocketHandle|int, data: T): bool = + s.withSelectLock(): + let fdi = int(fd) + var i = 0 + while i < FD_SETSIZE: + if s.fds[i].ident == fdi: + var pkey = addr(s.fds[i]) + pkey.data = data + result = true + break + +template withData*[T](s: Selector[T], fd: SocketHandle|int, value, + body: untyped) = + mixin withSelectLock + s.withSelectLock(): + var value: ptr T + let fdi = int(fd) + var i = 0 + while i < FD_SETSIZE: + if s.fds[i].ident == fdi: + value = addr(s.fds[i].data) + break + inc(i) + if i != FD_SETSIZE: + body + +template withData*[T](s: Selector[T], fd: SocketHandle|int, value, + body1, body2: untyped) = + mixin withSelectLock + s.withSelectLock(): + block: + var value: ptr T + let fdi = int(fd) + var i = 0 + while i < FD_SETSIZE: + if s.fds[i].ident == fdi: + value = addr(s.fds[i].data) + break + inc(i) + if i != FD_SETSIZE: + body1 + else: + body2 + + +proc getFd*[T](s: Selector[T]): int = + return -1 diff --git a/lib/pure/json.nim b/lib/pure/json.nim index c3db5bdf8..53fa7553a 100644 --- a/lib/pure/json.nim +++ b/lib/pure/json.nim @@ -14,514 +14,172 @@ ## JSON is based on a subset of the JavaScript Programming Language, ## Standard ECMA-262 3rd Edition - December 1999. ## -## Usage example: +## See also +## ======== +## * `std/parsejson <parsejson.html>`_ +## * `std/jsonutils <jsonutils.html>`_ +## * `std/marshal <marshal.html>`_ +## * `std/jscore <jscore.html>`_ ## -## .. code-block:: nim -## let -## small_json = """{"test": 1.3, "key2": true}""" -## jobj = parseJson(small_json) -## assert (jobj.kind == JObject) -## echo($jobj["test"].fnum) -## echo($jobj["key2"].bval) ## -## Results in: +## Overview +## ======== ## -## .. code-block:: nim +## Parsing JSON +## ------------ ## -## 1.3000000000000000e+00 -## true +## JSON often arrives into your program (via an API or a file) as a `string`. +## The first step is to change it from its serialized form into a nested object +## structure called a `JsonNode`. +## +## The `parseJson` procedure takes a string containing JSON and returns a +## `JsonNode` object. This is an object variant and it is either a +## `JObject`, `JArray`, `JString`, `JInt`, `JFloat`, `JBool` or +## `JNull`. You check the kind of this object variant by using the `kind` +## accessor. +## +## For a `JsonNode` who's kind is `JObject`, you can access its fields using +## the `[]` operator. The following example shows how to do this: +## +## ```Nim +## import std/json +## +## let jsonNode = parseJson("""{"key": 3.14}""") +## +## doAssert jsonNode.kind == JObject +## doAssert jsonNode["key"].kind == JFloat +## ``` +## +## Reading values +## -------------- +## +## Once you have a `JsonNode`, retrieving the values can then be achieved +## by using one of the helper procedures, which include: +## +## * `getInt` +## * `getFloat` +## * `getStr` +## * `getBool` +## +## To retrieve the value of `"key"` you can do the following: +## +## ```Nim +## import std/json +## +## let jsonNode = parseJson("""{"key": 3.14}""") +## +## doAssert jsonNode["key"].getFloat() == 3.14 +## ``` +## +## **Important:** The `[]` operator will raise an exception when the +## specified field does not exist. +## +## Handling optional keys +## ---------------------- +## +## By using the `{}` operator instead of `[]`, it will return `nil` +## when the field is not found. The `get`-family of procedures will return a +## type's default value when called on `nil`. +## +## ```Nim +## import std/json +## +## let jsonNode = parseJson("{}") +## +## doAssert jsonNode{"nope"}.getInt() == 0 +## doAssert jsonNode{"nope"}.getFloat() == 0 +## doAssert jsonNode{"nope"}.getStr() == "" +## doAssert jsonNode{"nope"}.getBool() == false +## ``` +## +## Using default values +## -------------------- +## +## The `get`-family helpers also accept an additional parameter which allow +## you to fallback to a default value should the key's values be `null`: +## +## ```Nim +## import std/json +## +## let jsonNode = parseJson("""{"key": 3.14, "key2": null}""") +## +## doAssert jsonNode["key"].getFloat(6.28) == 3.14 +## doAssert jsonNode["key2"].getFloat(3.14) == 3.14 +## doAssert jsonNode{"nope"}.getFloat(3.14) == 3.14 # note the {} +## ``` +## +## Unmarshalling +## ------------- +## +## In addition to reading dynamic data, Nim can also unmarshal JSON directly +## into a type with the `to` macro. +## +## Note: Use `Option <options.html#Option>`_ for keys sometimes missing in json +## responses, and backticks around keys with a reserved keyword as name. +## +## ```Nim +## import std/json +## import std/options +## +## type +## User = object +## name: string +## age: int +## `type`: Option[string] +## +## let userJson = parseJson("""{ "name": "Nim", "age": 12 }""") +## let user = to(userJson, User) +## if user.`type`.isSome(): +## assert user.`type`.get() != "robot" +## ``` +## +## Creating JSON +## ============= ## ## This module can also be used to comfortably create JSON using the `%*` ## operator: ## -## .. code-block:: nim +## ```nim +## import std/json ## ## var hisName = "John" ## let herAge = 31 ## var j = %* ## [ -## { -## "name": hisName, -## "age": 30 -## }, -## { -## "name": "Susan", -## "age": herAge -## } +## { "name": hisName, "age": 30 }, +## { "name": "Susan", "age": herAge } ## ] +## +## var j2 = %* {"name": "Isaac", "books": ["Robot Dreams"]} +## j2["details"] = %* {"age":35, "pi":3.1415} +## echo j2 +## ``` +## +## See also: std/jsonutils for hookable json serialization/deserialization +## of arbitrary types. -import - hashes, strutils, lexbase, streams, unicode, macros +runnableExamples: + ## Note: for JObject, key ordering is preserved, unlike in some languages, + ## this is convenient for some use cases. Example: + type Foo = object + a1, a2, a0, a3, a4: int + doAssert $(%* Foo()) == """{"a1":0,"a2":0,"a0":0,"a3":0,"a4":0}""" -type - JsonEventKind* = enum ## enumeration of all events that may occur when parsing - jsonError, ## an error occurred during parsing - jsonEof, ## end of file reached - jsonString, ## a string literal - jsonInt, ## an integer literal - jsonFloat, ## a float literal - jsonTrue, ## the value ``true`` - jsonFalse, ## the value ``false`` - jsonNull, ## the value ``null`` - jsonObjectStart, ## start of an object: the ``{`` token - jsonObjectEnd, ## end of an object: the ``}`` token - jsonArrayStart, ## start of an array: the ``[`` token - jsonArrayEnd ## start of an array: the ``]`` token - - TTokKind = enum # must be synchronized with TJsonEventKind! - tkError, - tkEof, - tkString, - tkInt, - tkFloat, - tkTrue, - tkFalse, - tkNull, - tkCurlyLe, - tkCurlyRi, - tkBracketLe, - tkBracketRi, - tkColon, - tkComma - - JsonError* = enum ## enumeration that lists all errors that can occur - errNone, ## no error - errInvalidToken, ## invalid token - errStringExpected, ## string expected - errColonExpected, ## ``:`` expected - errCommaExpected, ## ``,`` expected - errBracketRiExpected, ## ``]`` expected - errCurlyRiExpected, ## ``}`` expected - errQuoteExpected, ## ``"`` or ``'`` expected - errEOC_Expected, ## ``*/`` expected - errEofExpected, ## EOF expected - errExprExpected ## expr expected - - ParserState = enum - stateEof, stateStart, stateObject, stateArray, stateExpectArrayComma, - stateExpectObjectComma, stateExpectColon, stateExpectValue - - JsonParser* = object of BaseLexer ## the parser object. - a: string - tok: TTokKind - kind: JsonEventKind - err: JsonError - state: seq[ParserState] - filename: string - -{.deprecated: [TJsonEventKind: JsonEventKind, TJsonError: JsonError, - TJsonParser: JsonParser].} - -const - errorMessages: array [JsonError, string] = [ - "no error", - "invalid token", - "string expected", - "':' expected", - "',' expected", - "']' expected", - "'}' expected", - "'\"' or \"'\" expected", - "'*/' expected", - "EOF expected", - "expression expected" - ] - tokToStr: array [TTokKind, string] = [ - "invalid token", - "EOF", - "string literal", - "int literal", - "float literal", - "true", - "false", - "null", - "{", "}", "[", "]", ":", "," - ] - -proc open*(my: var JsonParser, input: Stream, filename: string) = - ## initializes the parser with an input stream. `Filename` is only used - ## for nice error messages. - lexbase.open(my, input) - my.filename = filename - my.state = @[stateStart] - my.kind = jsonError - my.a = "" - -proc close*(my: var JsonParser) {.inline.} = - ## closes the parser `my` and its associated input stream. - lexbase.close(my) - -proc str*(my: JsonParser): string {.inline.} = - ## returns the character data for the events: ``jsonInt``, ``jsonFloat``, - ## ``jsonString`` - assert(my.kind in {jsonInt, jsonFloat, jsonString}) - return my.a - -proc getInt*(my: JsonParser): BiggestInt {.inline.} = - ## returns the number for the event: ``jsonInt`` - assert(my.kind == jsonInt) - return parseBiggestInt(my.a) - -proc getFloat*(my: JsonParser): float {.inline.} = - ## returns the number for the event: ``jsonFloat`` - assert(my.kind == jsonFloat) - return parseFloat(my.a) - -proc kind*(my: JsonParser): JsonEventKind {.inline.} = - ## returns the current event type for the JSON parser - return my.kind - -proc getColumn*(my: JsonParser): int {.inline.} = - ## get the current column the parser has arrived at. - result = getColNumber(my, my.bufpos) - -proc getLine*(my: JsonParser): int {.inline.} = - ## get the current line the parser has arrived at. - result = my.lineNumber - -proc getFilename*(my: JsonParser): string {.inline.} = - ## get the filename of the file that the parser processes. - result = my.filename - -proc errorMsg*(my: JsonParser): string = - ## returns a helpful error message for the event ``jsonError`` - assert(my.kind == jsonError) - result = "$1($2, $3) Error: $4" % [ - my.filename, $getLine(my), $getColumn(my), errorMessages[my.err]] - -proc errorMsgExpected*(my: JsonParser, e: string): string = - ## returns an error message "`e` expected" in the same format as the - ## other error messages - result = "$1($2, $3) Error: $4" % [ - my.filename, $getLine(my), $getColumn(my), e & " expected"] - -proc handleHexChar(c: char, x: var int): bool = - result = true # Success - case c - of '0'..'9': x = (x shl 4) or (ord(c) - ord('0')) - of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10) - of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10) - else: result = false # error - -proc parseString(my: var JsonParser): TTokKind = - result = tkString - var pos = my.bufpos + 1 - var buf = my.buf - while true: - case buf[pos] - of '\0': - my.err = errQuoteExpected - result = tkError - break - of '"': - inc(pos) - break - of '\\': - case buf[pos+1] - of '\\', '"', '\'', '/': - add(my.a, buf[pos+1]) - inc(pos, 2) - of 'b': - add(my.a, '\b') - inc(pos, 2) - of 'f': - add(my.a, '\f') - inc(pos, 2) - of 'n': - add(my.a, '\L') - inc(pos, 2) - of 'r': - add(my.a, '\C') - inc(pos, 2) - of 't': - add(my.a, '\t') - inc(pos, 2) - of 'u': - inc(pos, 2) - var r: int - if handleHexChar(buf[pos], r): inc(pos) - if handleHexChar(buf[pos], r): inc(pos) - if handleHexChar(buf[pos], r): inc(pos) - if handleHexChar(buf[pos], r): inc(pos) - add(my.a, toUTF8(Rune(r))) - else: - # don't bother with the error - add(my.a, buf[pos]) - inc(pos) - of '\c': - pos = lexbase.handleCR(my, pos) - buf = my.buf - add(my.a, '\c') - of '\L': - pos = lexbase.handleLF(my, pos) - buf = my.buf - add(my.a, '\L') - else: - add(my.a, buf[pos]) - inc(pos) - my.bufpos = pos # store back +import std/[hashes, tables, strutils, lexbase, streams, macros, parsejson] -proc skip(my: var JsonParser) = - var pos = my.bufpos - var buf = my.buf - while true: - case buf[pos] - of '/': - if buf[pos+1] == '/': - # skip line comment: - inc(pos, 2) - while true: - case buf[pos] - of '\0': - break - of '\c': - pos = lexbase.handleCR(my, pos) - buf = my.buf - break - of '\L': - pos = lexbase.handleLF(my, pos) - buf = my.buf - break - else: - inc(pos) - elif buf[pos+1] == '*': - # skip long comment: - inc(pos, 2) - while true: - case buf[pos] - of '\0': - my.err = errEOC_Expected - break - of '\c': - pos = lexbase.handleCR(my, pos) - buf = my.buf - of '\L': - pos = lexbase.handleLF(my, pos) - buf = my.buf - of '*': - inc(pos) - if buf[pos] == '/': - inc(pos) - break - else: - inc(pos) - else: - break - of ' ', '\t': - inc(pos) - of '\c': - pos = lexbase.handleCR(my, pos) - buf = my.buf - of '\L': - pos = lexbase.handleLF(my, pos) - buf = my.buf - else: - break - my.bufpos = pos - -proc parseNumber(my: var JsonParser) = - var pos = my.bufpos - var buf = my.buf - if buf[pos] == '-': - add(my.a, '-') - inc(pos) - if buf[pos] == '.': - add(my.a, "0.") - inc(pos) - else: - while buf[pos] in Digits: - add(my.a, buf[pos]) - inc(pos) - if buf[pos] == '.': - add(my.a, '.') - inc(pos) - # digits after the dot: - while buf[pos] in Digits: - add(my.a, buf[pos]) - inc(pos) - if buf[pos] in {'E', 'e'}: - add(my.a, buf[pos]) - inc(pos) - if buf[pos] in {'+', '-'}: - add(my.a, buf[pos]) - inc(pos) - while buf[pos] in Digits: - add(my.a, buf[pos]) - inc(pos) - my.bufpos = pos - -proc parseName(my: var JsonParser) = - var pos = my.bufpos - var buf = my.buf - if buf[pos] in IdentStartChars: - while buf[pos] in IdentChars: - add(my.a, buf[pos]) - inc(pos) - my.bufpos = pos - -proc getTok(my: var JsonParser): TTokKind = - setLen(my.a, 0) - skip(my) # skip whitespace, comments - case my.buf[my.bufpos] - of '-', '.', '0'..'9': - parseNumber(my) - if {'.', 'e', 'E'} in my.a: - result = tkFloat - else: - result = tkInt - of '"': - result = parseString(my) - of '[': - inc(my.bufpos) - result = tkBracketLe - of '{': - inc(my.bufpos) - result = tkCurlyLe - of ']': - inc(my.bufpos) - result = tkBracketRi - of '}': - inc(my.bufpos) - result = tkCurlyRi - of ',': - inc(my.bufpos) - result = tkComma - of ':': - inc(my.bufpos) - result = tkColon - of '\0': - result = tkEof - of 'a'..'z', 'A'..'Z', '_': - parseName(my) - case my.a - of "null": result = tkNull - of "true": result = tkTrue - of "false": result = tkFalse - else: result = tkError - else: - inc(my.bufpos) - result = tkError - my.tok = result - -proc next*(my: var JsonParser) = - ## retrieves the first/next event. This controls the parser. - var tk = getTok(my) - var i = my.state.len-1 - # the following code is a state machine. If we had proper coroutines, - # the code could be much simpler. - case my.state[i] - of stateEof: - if tk == tkEof: - my.kind = jsonEof - else: - my.kind = jsonError - my.err = errEofExpected - of stateStart: - # tokens allowed? - case tk - of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull: - my.state[i] = stateEof # expect EOF next! - my.kind = JsonEventKind(ord(tk)) - of tkBracketLe: - my.state.add(stateArray) # we expect any - my.kind = jsonArrayStart - of tkCurlyLe: - my.state.add(stateObject) - my.kind = jsonObjectStart - of tkEof: - my.kind = jsonEof - else: - my.kind = jsonError - my.err = errEofExpected - of stateObject: - case tk - of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull: - my.state.add(stateExpectColon) - my.kind = JsonEventKind(ord(tk)) - of tkBracketLe: - my.state.add(stateExpectColon) - my.state.add(stateArray) - my.kind = jsonArrayStart - of tkCurlyLe: - my.state.add(stateExpectColon) - my.state.add(stateObject) - my.kind = jsonObjectStart - of tkCurlyRi: - my.kind = jsonObjectEnd - discard my.state.pop() - else: - my.kind = jsonError - my.err = errCurlyRiExpected - of stateArray: - case tk - of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull: - my.state.add(stateExpectArrayComma) # expect value next! - my.kind = JsonEventKind(ord(tk)) - of tkBracketLe: - my.state.add(stateExpectArrayComma) - my.state.add(stateArray) - my.kind = jsonArrayStart - of tkCurlyLe: - my.state.add(stateExpectArrayComma) - my.state.add(stateObject) - my.kind = jsonObjectStart - of tkBracketRi: - my.kind = jsonArrayEnd - discard my.state.pop() - else: - my.kind = jsonError - my.err = errBracketRiExpected - of stateExpectArrayComma: - case tk - of tkComma: - discard my.state.pop() - next(my) - of tkBracketRi: - my.kind = jsonArrayEnd - discard my.state.pop() # pop stateExpectArrayComma - discard my.state.pop() # pop stateArray - else: - my.kind = jsonError - my.err = errBracketRiExpected - of stateExpectObjectComma: - case tk - of tkComma: - discard my.state.pop() - next(my) - of tkCurlyRi: - my.kind = jsonObjectEnd - discard my.state.pop() # pop stateExpectObjectComma - discard my.state.pop() # pop stateObject - else: - my.kind = jsonError - my.err = errCurlyRiExpected - of stateExpectColon: - case tk - of tkColon: - my.state[i] = stateExpectValue - next(my) - else: - my.kind = jsonError - my.err = errColonExpected - of stateExpectValue: - case tk - of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull: - my.state[i] = stateExpectObjectComma - my.kind = JsonEventKind(ord(tk)) - of tkBracketLe: - my.state[i] = stateExpectObjectComma - my.state.add(stateArray) - my.kind = jsonArrayStart - of tkCurlyLe: - my.state[i] = stateExpectObjectComma - my.state.add(stateObject) - my.kind = jsonObjectStart - else: - my.kind = jsonError - my.err = errExprExpected +import std/options # xxx remove this dependency using same approach as https://github.com/nim-lang/Nim/pull/14563 +import std/private/since + +when defined(nimPreviewSlimSystem): + import std/[syncio, assertions, formatfloat] +export + tables.`$` -# ------------- higher level interface --------------------------------------- +export + parsejson.JsonEventKind, parsejson.JsonError, JsonParser, JsonKindError, + open, close, str, getInt, getFloat, kind, getColumn, getLine, getFilename, + errorMsg, errorMsgExpected, next, JsonParsingError, raiseParseErr, nimIdentNormalize type JsonNodeKind* = enum ## possible JSON node types @@ -535,6 +193,8 @@ type JsonNode* = ref JsonNodeObj ## JSON node JsonNodeObj* {.acyclic.} = object + isUnquoted: bool # the JString was a number-like token and + # so shouldn't be quoted case kind*: JsonNodeKind of JString: str*: string @@ -547,127 +207,244 @@ type of JNull: nil of JObject: - fields*: seq[tuple[key: string, val: JsonNode]] + fields*: OrderedTable[string, JsonNode] of JArray: elems*: seq[JsonNode] - JsonParsingError* = object of ValueError ## is raised for a JSON error - -{.deprecated: [EJsonParsingError: JsonParsingError, TJsonNode: JsonNodeObj, - PJsonNode: JsonNode, TJsonNodeKind: JsonNodeKind].} - -proc raiseParseErr*(p: JsonParser, msg: string) {.noinline, noreturn.} = - ## raises an `EJsonParsingError` exception. - raise newException(JsonParsingError, errorMsgExpected(p, msg)) +const DepthLimit = 1000 proc newJString*(s: string): JsonNode = ## Creates a new `JString JsonNode`. - new(result) - result.kind = JString - result.str = s + result = JsonNode(kind: JString, str: s) -proc newJStringMove(s: string): JsonNode = - new(result) - result.kind = JString - shallowCopy(result.str, s) +proc newJRawNumber(s: string): JsonNode = + ## Creates a "raw JS number", that is a number that does not + ## fit into Nim's `BiggestInt` field. This is really a `JString` + ## with the additional information that it should be converted back + ## to the string representation without the quotes. + result = JsonNode(kind: JString, str: s, isUnquoted: true) proc newJInt*(n: BiggestInt): JsonNode = ## Creates a new `JInt JsonNode`. - new(result) - result.kind = JInt - result.num = n + result = JsonNode(kind: JInt, num: n) proc newJFloat*(n: float): JsonNode = ## Creates a new `JFloat JsonNode`. - new(result) - result.kind = JFloat - result.fnum = n + result = JsonNode(kind: JFloat, fnum: n) proc newJBool*(b: bool): JsonNode = ## Creates a new `JBool JsonNode`. - new(result) - result.kind = JBool - result.bval = b + result = JsonNode(kind: JBool, bval: b) proc newJNull*(): JsonNode = ## Creates a new `JNull JsonNode`. - new(result) + result = JsonNode(kind: JNull) proc newJObject*(): JsonNode = ## Creates a new `JObject JsonNode` - new(result) - result.kind = JObject - result.fields = @[] + result = JsonNode(kind: JObject, fields: initOrderedTable[string, JsonNode](2)) proc newJArray*(): JsonNode = ## Creates a new `JArray JsonNode` - new(result) - result.kind = JArray - result.elems = @[] + result = JsonNode(kind: JArray, elems: @[]) + +proc getStr*(n: JsonNode, default: string = ""): string = + ## Retrieves the string value of a `JString JsonNode`. + ## + ## Returns `default` if `n` is not a `JString`, or if `n` is nil. + if n.isNil or n.kind != JString: return default + else: return n.str + +proc getInt*(n: JsonNode, default: int = 0): int = + ## Retrieves the int value of a `JInt JsonNode`. + ## + ## Returns `default` if `n` is not a `JInt`, or if `n` is nil. + if n.isNil or n.kind != JInt: return default + else: return int(n.num) + +proc getBiggestInt*(n: JsonNode, default: BiggestInt = 0): BiggestInt = + ## Retrieves the BiggestInt value of a `JInt JsonNode`. + ## + ## Returns `default` if `n` is not a `JInt`, or if `n` is nil. + if n.isNil or n.kind != JInt: return default + else: return n.num + +proc getFloat*(n: JsonNode, default: float = 0.0): float = + ## Retrieves the float value of a `JFloat JsonNode`. + ## + ## Returns `default` if `n` is not a `JFloat` or `JInt`, or if `n` is nil. + if n.isNil: return default + case n.kind + of JFloat: return n.fnum + of JInt: return float(n.num) + else: return default + +proc getBool*(n: JsonNode, default: bool = false): bool = + ## Retrieves the bool value of a `JBool JsonNode`. + ## + ## Returns `default` if `n` is not a `JBool`, or if `n` is nil. + if n.isNil or n.kind != JBool: return default + else: return n.bval + +proc getFields*(n: JsonNode, + default = initOrderedTable[string, JsonNode](2)): + OrderedTable[string, JsonNode] = + ## Retrieves the key, value pairs of a `JObject JsonNode`. + ## + ## Returns `default` if `n` is not a `JObject`, or if `n` is nil. + if n.isNil or n.kind != JObject: return default + else: return n.fields + +proc getElems*(n: JsonNode, default: seq[JsonNode] = @[]): seq[JsonNode] = + ## Retrieves the array of a `JArray JsonNode`. + ## + ## Returns `default` if `n` is not a `JArray`, or if `n` is nil. + if n.isNil or n.kind != JArray: return default + else: return n.elems +proc add*(father, child: JsonNode) = + ## Adds `child` to a JArray node `father`. + assert father.kind == JArray + father.elems.add(child) + +proc add*(obj: JsonNode, key: string, val: JsonNode) = + ## Sets a field from a `JObject`. + assert obj.kind == JObject + obj.fields[key] = val proc `%`*(s: string): JsonNode = ## Generic constructor for JSON data. Creates a new `JString JsonNode`. - new(result) - result.kind = JString - result.str = s + result = JsonNode(kind: JString, str: s) + +proc `%`*(n: uint): JsonNode = + ## Generic constructor for JSON data. Creates a new `JInt JsonNode`. + if n > cast[uint](int.high): + result = newJRawNumber($n) + else: + result = JsonNode(kind: JInt, num: BiggestInt(n)) + +proc `%`*(n: int): JsonNode = + ## Generic constructor for JSON data. Creates a new `JInt JsonNode`. + result = JsonNode(kind: JInt, num: n) + +proc `%`*(n: BiggestUInt): JsonNode = + ## Generic constructor for JSON data. Creates a new `JInt JsonNode`. + if n > cast[BiggestUInt](BiggestInt.high): + result = newJRawNumber($n) + else: + result = JsonNode(kind: JInt, num: BiggestInt(n)) proc `%`*(n: BiggestInt): JsonNode = ## Generic constructor for JSON data. Creates a new `JInt JsonNode`. - new(result) - result.kind = JInt - result.num = n + result = JsonNode(kind: JInt, num: n) proc `%`*(n: float): JsonNode = ## Generic constructor for JSON data. Creates a new `JFloat JsonNode`. - new(result) - result.kind = JFloat - result.fnum = n + runnableExamples: + assert $(%[NaN, Inf, -Inf, 0.0, -0.0, 1.0, 1e-2]) == """["nan","inf","-inf",0.0,-0.0,1.0,0.01]""" + assert (%NaN).kind == JString + assert (%0.0).kind == JFloat + # for those special cases, we could also have used `newJRawNumber` but then + # it would've been inconsisten with the case of `parseJson` vs `%` for representing them. + if n != n: newJString("nan") + elif n == Inf: newJString("inf") + elif n == -Inf: newJString("-inf") + else: JsonNode(kind: JFloat, fnum: n) proc `%`*(b: bool): JsonNode = ## Generic constructor for JSON data. Creates a new `JBool JsonNode`. - new(result) - result.kind = JBool - result.bval = b + result = JsonNode(kind: JBool, bval: b) proc `%`*(keyVals: openArray[tuple[key: string, val: JsonNode]]): JsonNode = ## Generic constructor for JSON data. Creates a new `JObject JsonNode` - new(result) - result.kind = JObject - newSeq(result.fields, keyVals.len) - for i, p in pairs(keyVals): result.fields[i] = p + if keyVals.len == 0: return newJArray() + result = newJObject() + for key, val in items(keyVals): result.fields[key] = val + +template `%`*(j: JsonNode): JsonNode = j -proc `%`*(elements: openArray[JsonNode]): JsonNode = +proc `%`*[T](elements: openArray[T]): JsonNode = ## Generic constructor for JSON data. Creates a new `JArray JsonNode` - new(result) - result.kind = JArray - newSeq(result.elems, elements.len) - for i, p in pairs(elements): result.elems[i] = p + result = newJArray() + for elem in elements: result.add(%elem) -proc toJson(x: NimNode): NimNode {.compiletime.} = - case x.kind - of nnkBracket: - result = newNimNode(nnkBracket) - for i in 0 .. <x.len: - result.add(toJson(x[i])) +proc `%`*[T](table: Table[string, T]|OrderedTable[string, T]): JsonNode = + ## Generic constructor for JSON data. Creates a new `JObject JsonNode`. + result = newJObject() + for k, v in table: result[k] = %v - of nnkTableConstr: - result = newNimNode(nnkTableConstr) - for i in 0 .. <x.len: - assert x[i].kind == nnkExprColonExpr - result.add(newNimNode(nnkExprColonExpr).add(x[i][0]).add(toJson(x[i][1]))) +proc `%`*[T](opt: Option[T]): JsonNode = + ## Generic constructor for JSON data. Creates a new `JNull JsonNode` + ## if `opt` is empty, otherwise it delegates to the underlying value. + if opt.isSome: %opt.get else: newJNull() +when false: + # For 'consistency' we could do this, but that only pushes people further + # into that evil comfort zone where they can use Nim without understanding it + # causing problems later on. + proc `%`*(elements: set[bool]): JsonNode = + ## Generic constructor for JSON data. Creates a new `JObject JsonNode`. + ## This can only be used with the empty set `{}` and is supported + ## to prevent the gotcha `%*{}` which used to produce an empty + ## JSON array. + result = newJObject() + assert false notin elements, "usage error: only empty sets allowed" + assert true notin elements, "usage error: only empty sets allowed" + +proc `[]=`*(obj: JsonNode, key: string, val: JsonNode) {.inline.} = + ## Sets a field from a `JObject`. + assert(obj.kind == JObject) + obj.fields[key] = val + +proc `%`*[T: object](o: T): JsonNode = + ## Construct JsonNode from tuples and objects. + result = newJObject() + for k, v in o.fieldPairs: result[k] = %v + +proc `%`*(o: ref object): JsonNode = + ## Generic constructor for JSON data. Creates a new `JObject JsonNode` + if o.isNil: + result = newJNull() else: - result = x + result = %(o[]) + +proc `%`*(o: enum): JsonNode = + ## Construct a JsonNode that represents the specified enum value as a + ## string. Creates a new `JString JsonNode`. + result = %($o) - result = prefix(result, "%") +proc toJsonImpl(x: NimNode): NimNode = + case x.kind + of nnkBracket: # array + if x.len == 0: return newCall(bindSym"newJArray") + result = newNimNode(nnkBracket) + for i in 0 ..< x.len: + result.add(toJsonImpl(x[i])) + result = newCall(bindSym("%", brOpen), result) + of nnkTableConstr: # object + if x.len == 0: return newCall(bindSym"newJObject") + result = newNimNode(nnkTableConstr) + for i in 0 ..< x.len: + x[i].expectKind nnkExprColonExpr + result.add newTree(nnkExprColonExpr, x[i][0], toJsonImpl(x[i][1])) + result = newCall(bindSym("%", brOpen), result) + of nnkCurly: # empty object + x.expectLen(0) + result = newCall(bindSym"newJObject") + of nnkNilLit: + result = newCall(bindSym"newJNull") + of nnkPar: + if x.len == 1: result = toJsonImpl(x[0]) + else: result = newCall(bindSym("%", brOpen), x) + else: + result = newCall(bindSym("%", brOpen), x) -macro `%*`*(x: expr): expr = +macro `%*`*(x: untyped): untyped = ## Convert an expression to a JsonNode directly, without having to specify ## `%` for every element. - result = toJson(x) + result = toJsonImpl(x) -proc `==`* (a,b: JsonNode): bool = +proc `==`*(a, b: JsonNode): bool {.noSideEffect, raises: [].} = ## Check two nodes for equality if a.isNil: if b.isNil: return true @@ -675,23 +452,37 @@ proc `==`* (a,b: JsonNode): bool = elif b.isNil or a.kind != b.kind: return false else: - return case a.kind + case a.kind of JString: - a.str == b.str + result = a.str == b.str of JInt: - a.num == b.num + result = a.num == b.num of JFloat: - a.fnum == b.fnum + result = a.fnum == b.fnum of JBool: - a.bval == b.bval + result = a.bval == b.bval of JNull: - true + result = true of JArray: - a.elems == b.elems + {.cast(raises: []).}: # bug #19303 + result = a.elems == b.elems of JObject: - a.fields == b.fields + # we cannot use OrderedTable's equality here as + # the order does not matter for equality here. + if a.fields.len != b.fields.len: return false + for key, val in a.fields: + if not b.fields.hasKey(key): return false + {.cast(raises: []).}: + when defined(nimHasEffectsOf): + {.noSideEffect.}: + if b.fields[key] != val: return false + else: + if b.fields[key] != val: return false + result = true + +proc hash*(n: OrderedTable[string, JsonNode]): Hash {.noSideEffect.} -proc hash* (n:JsonNode): THash = +proc hash*(n: JsonNode): Hash {.noSideEffect.} = ## Compute the hash for a JSON node case n.kind of JArray: @@ -707,7 +498,12 @@ proc hash* (n:JsonNode): THash = of JString: result = hash(n.str) of JNull: - result = hash(0) + result = Hash(0) + +proc hash*(n: OrderedTable[string, JsonNode]): Hash = + for key, val in n: + result = result xor (hash(key) !& hash(val)) + result = !$result proc len*(n: JsonNode): int = ## If `n` is a `JArray`, it returns the number of elements. @@ -718,84 +514,131 @@ proc len*(n: JsonNode): int = of JObject: result = n.fields.len else: discard -proc `[]`*(node: JsonNode, name: string): JsonNode = +proc `[]`*(node: JsonNode, name: string): JsonNode {.inline.} = ## Gets a field from a `JObject`, which must not be nil. - ## If the value at `name` does not exist, returns nil + ## If the value at `name` does not exist, raises KeyError. assert(not isNil(node)) assert(node.kind == JObject) - for key, item in items(node.fields): - if key == name: - return item - return nil + when defined(nimJsonGet): + if not node.fields.hasKey(name): return nil + result = node.fields[name] -proc `[]`*(node: JsonNode, index: int): JsonNode = +proc `[]`*(node: JsonNode, index: int): JsonNode {.inline.} = ## Gets the node at `index` in an Array. Result is undefined if `index` - ## is out of bounds + ## is out of bounds, but as long as array bound checks are enabled it will + ## result in an exception. assert(not isNil(node)) assert(node.kind == JArray) return node.elems[index] +proc `[]`*(node: JsonNode, index: BackwardsIndex): JsonNode {.inline, since: (1, 5, 1).} = + ## Gets the node at `array.len-i` in an array through the `^` operator. + ## + ## i.e. `j[^i]` is a shortcut for `j[j.len-i]`. + runnableExamples: + let + j = parseJson("[1,2,3,4,5]") + + doAssert j[^1].getInt == 5 + doAssert j[^2].getInt == 4 + + `[]`(node, node.len - int(index)) + +proc `[]`*[U, V](a: JsonNode, x: HSlice[U, V]): JsonNode = + ## Slice operation for JArray. + ## + ## Returns the inclusive range `[a[x.a], a[x.b]]`: + runnableExamples: + import std/json + let arr = %[0,1,2,3,4,5] + doAssert arr[2..4] == %[2,3,4] + doAssert arr[2..^2] == %[2,3,4] + doAssert arr[^4..^2] == %[2,3,4] + + assert(a.kind == JArray) + result = newJArray() + let xa = (when x.a is BackwardsIndex: a.len - int(x.a) else: int(x.a)) + let L = (when x.b is BackwardsIndex: a.len - int(x.b) else: int(x.b)) - xa + 1 + for i in 0..<L: + result.add(a[i + xa]) + proc hasKey*(node: JsonNode, key: string): bool = ## Checks if `key` exists in `node`. assert(node.kind == JObject) - for k, item in items(node.fields): - if k == key: return true + result = node.fields.hasKey(key) -proc existsKey*(node: JsonNode, key: string): bool {.deprecated.} = node.hasKey(key) - ## Deprecated for `hasKey` +proc contains*(node: JsonNode, key: string): bool = + ## Checks if `key` exists in `node`. + assert(node.kind == JObject) + node.fields.hasKey(key) -proc add*(father, child: JsonNode) = - ## Adds `child` to a JArray node `father`. - assert father.kind == JArray - father.elems.add(child) +proc contains*(node: JsonNode, val: JsonNode): bool = + ## Checks if `val` exists in array `node`. + assert(node.kind == JArray) + find(node.elems, val) >= 0 + +proc `{}`*(node: JsonNode, keys: varargs[string]): JsonNode = + ## Traverses the node and gets the given value. If any of the + ## keys do not exist, returns `nil`. Also returns `nil` if one of the + ## intermediate data structures is not an object. + ## + ## This proc can be used to create tree structures on the + ## fly (sometimes called `autovivification`:idx:): + ## + runnableExamples: + var myjson = %* {"parent": {"child": {"grandchild": 1}}} + doAssert myjson{"parent", "child", "grandchild"} == newJInt(1) -proc add*(obj: JsonNode, key: string, val: JsonNode) = - ## Adds ``(key, val)`` pair to the JObject node `obj`. For speed - ## reasons no check for duplicate keys is performed! - ## But ``[]=`` performs the check. - assert obj.kind == JObject - obj.fields.add((key, val)) + result = node + for key in keys: + if isNil(result) or result.kind != JObject: + return nil + result = result.fields.getOrDefault(key) + +proc `{}`*(node: JsonNode, index: varargs[int]): JsonNode = + ## Traverses the node and gets the given value. If any of the + ## indexes do not exist, returns `nil`. Also returns `nil` if one of the + ## intermediate data structures is not an array. + result = node + for i in index: + if isNil(result) or result.kind != JArray or i >= node.len: + return nil + result = result.elems[i] -proc `[]=`*(obj: JsonNode, key: string, val: JsonNode) = - ## Sets a field from a `JObject`. Performs a check for duplicate keys. - assert(obj.kind == JObject) - for i in 0..obj.fields.len-1: - if obj.fields[i].key == key: - obj.fields[i].val = val - return - obj.fields.add((key, val)) +proc getOrDefault*(node: JsonNode, key: string): JsonNode = + ## Gets a field from a `node`. If `node` is nil or not an object or + ## value at `key` does not exist, returns nil + if not isNil(node) and node.kind == JObject: + result = node.fields.getOrDefault(key) proc `{}`*(node: JsonNode, key: string): JsonNode = - ## Transverses the node and gets the given value. If any of the - ## names does not exist, returns nil - result = node - if isNil(node): return nil - result = result[key] + ## Gets a field from a `node`. If `node` is nil or not an object or + ## value at `key` does not exist, returns nil + node.getOrDefault(key) -proc `{}=`*(node: JsonNode, names: varargs[string], value: JsonNode) = - ## Transverses the node and tries to set the value at the given location - ## to `value` If any of the names are missing, they are added +proc `{}=`*(node: JsonNode, keys: varargs[string], value: JsonNode) = + ## Traverses the node and tries to set the value at the given location + ## to `value`. If any of the keys are missing, they are added. var node = node - for i in 0..(names.len-2): - if isNil(node[names[i]]): - node[names[i]] = newJObject() - node = node[names[i]] - node[names[names.len-1]] = value + for i in 0..(keys.len-2): + if not node.hasKey(keys[i]): + node[keys[i]] = newJObject() + node = node[keys[i]] + node[keys[keys.len-1]] = value proc delete*(obj: JsonNode, key: string) = - ## Deletes ``obj[key]`` preserving the order of the other (key, value)-pairs. + ## Deletes `obj[key]`. assert(obj.kind == JObject) - for i in 0..obj.fields.len-1: - if obj.fields[i].key == key: - obj.fields.delete(i) - return - raise newException(IndexError, "key not in object") + if not obj.fields.hasKey(key): + raise newException(KeyError, "key not in object") + obj.fields.del(key) proc copy*(p: JsonNode): JsonNode = - ## Performs a deep copy of `a`. + ## Performs a deep copy of `p`. case p.kind of JString: result = newJString(p.str) + result.isUnquoted = p.isUnquoted of JInt: result = newJInt(p.num) of JFloat: @@ -806,8 +649,8 @@ proc copy*(p: JsonNode): JsonNode = result = newJNull() of JObject: result = newJObject() - for key, field in items(p.fields): - result.fields.add((key, copy(field))) + for key, val in pairs(p.fields): + result.fields[key] = copy(val) of JArray: result = newJArray() for i in items(p.elems): @@ -823,43 +666,102 @@ proc newIndent(curr, indent: int, ml: bool): int = else: return indent proc nl(s: var string, ml: bool) = - if ml: s.add("\n") + s.add(if ml: "\n" else: " ") + +proc escapeJsonUnquoted*(s: string; result: var string) = + ## Converts a string `s` to its JSON representation without quotes. + ## Appends to `result`. + for c in s: + case c + of '\L': result.add("\\n") + of '\b': result.add("\\b") + of '\f': result.add("\\f") + of '\t': result.add("\\t") + of '\v': result.add("\\u000b") + of '\r': result.add("\\r") + of '"': result.add("\\\"") + of '\0'..'\7': result.add("\\u000" & $ord(c)) + of '\14'..'\31': result.add("\\u00" & toHex(ord(c), 2)) + of '\\': result.add("\\\\") + else: result.add(c) + +proc escapeJsonUnquoted*(s: string): string = + ## Converts a string `s` to its JSON representation without quotes. + result = newStringOfCap(s.len + s.len shr 3) + escapeJsonUnquoted(s, result) + +proc escapeJson*(s: string; result: var string) = + ## Converts a string `s` to its JSON representation with quotes. + ## Appends to `result`. + result.add("\"") + escapeJsonUnquoted(s, result) + result.add("\"") proc escapeJson*(s: string): string = - ## Converts a string `s` to its JSON representation. + ## Converts a string `s` to its JSON representation with quotes. result = newStringOfCap(s.len + s.len shr 3) - result.add("\"") - for x in runes(s): - var r = int(x) - if r >= 32 and r <= 127: - var c = chr(r) - case c - of '"': result.add("\\\"") - of '\\': result.add("\\\\") - else: result.add(c) + escapeJson(s, result) + +proc toUgly*(result: var string, node: JsonNode) = + ## Converts `node` to its JSON Representation, without + ## regard for human readability. Meant to improve `$` string + ## conversion performance. + ## + ## JSON representation is stored in the passed `result` + ## + ## This provides higher efficiency than the `pretty` procedure as it + ## does **not** attempt to format the resulting JSON to make it human readable. + var comma = false + case node.kind: + of JArray: + result.add "[" + for child in node.elems: + if comma: result.add "," + else: comma = true + result.toUgly child + result.add "]" + of JObject: + result.add "{" + for key, value in pairs(node.fields): + if comma: result.add "," + else: comma = true + key.escapeJson(result) + result.add ":" + result.toUgly value + result.add "}" + of JString: + if node.isUnquoted: + result.add node.str else: - result.add("\\u") - result.add(toHex(r, 4)) - result.add("\"") + escapeJson(node.str, result) + of JInt: + result.addInt(node.num) + of JFloat: + result.addFloat(node.fnum) + of JBool: + result.add(if node.bval: "true" else: "false") + of JNull: + result.add "null" proc toPretty(result: var string, node: JsonNode, indent = 2, ml = true, lstArr = false, currIndent = 0) = case node.kind of JObject: - if currIndent != 0 and not lstArr: result.nl(ml) - result.indent(currIndent) # Indentation + if lstArr: result.indent(currIndent) # Indentation if node.fields.len > 0: result.add("{") result.nl(ml) # New line - for i in 0..len(node.fields)-1: + var i = 0 + for key, val in pairs(node.fields): if i > 0: - result.add(", ") + result.add(",") result.nl(ml) # New Line + inc i # Need to indent more than { result.indent(newIndent(currIndent, indent, ml)) - result.add(escapeJson(node.fields[i].key)) + escapeJson(key, result) result.add(": ") - toPretty(result, node.fields[i].val, indent, ml, false, + toPretty(result, val, indent, ml, false, newIndent(currIndent, indent, ml)) result.nl(ml) result.indent(currIndent) # indent the same as { @@ -868,16 +770,16 @@ proc toPretty(result: var string, node: JsonNode, indent = 2, ml = true, result.add("{}") of JString: if lstArr: result.indent(currIndent) - result.add(escapeJson(node.str)) + toUgly(result, node) of JInt: if lstArr: result.indent(currIndent) - result.add($node.num) + result.addInt(node.num) of JFloat: if lstArr: result.indent(currIndent) - result.add($node.fnum) + result.addFloat(node.fnum) of JBool: if lstArr: result.indent(currIndent) - result.add($node.bval) + result.add(if node.bval: "true" else: "false") of JArray: if lstArr: result.indent(currIndent) if len(node.elems) != 0: @@ -885,7 +787,7 @@ proc toPretty(result: var string, node: JsonNode, indent = 2, ml = true, result.nl(ml) for i in 0..len(node.elems)-1: if i > 0: - result.add(", ") + result.add(",") result.nl(ml) # New Line toPretty(result, node.elems[i], indent, ml, true, newIndent(currIndent, indent, ml)) @@ -898,59 +800,93 @@ proc toPretty(result: var string, node: JsonNode, indent = 2, ml = true, result.add("null") proc pretty*(node: JsonNode, indent = 2): string = - ## Converts `node` to its JSON Representation, with indentation and + ## Returns a JSON Representation of `node`, with indentation and ## on multiple lines. + ## + ## Similar to prettyprint in Python. + runnableExamples: + let j = %* {"name": "Isaac", "books": ["Robot Dreams"], + "details": {"age": 35, "pi": 3.1415}} + doAssert pretty(j) == """ +{ + "name": "Isaac", + "books": [ + "Robot Dreams" + ], + "details": { + "age": 35, + "pi": 3.1415 + } +}""" result = "" toPretty(result, node, indent) proc `$`*(node: JsonNode): string = ## Converts `node` to its JSON Representation on one line. - result = "" - toPretty(result, node, 0, false) + result = newStringOfCap(node.len shl 1) + toUgly(result, node) iterator items*(node: JsonNode): JsonNode = ## Iterator for the items of `node`. `node` has to be a JArray. - assert node.kind == JArray + assert node.kind == JArray, ": items() can not iterate a JsonNode of kind " & $node.kind for i in items(node.elems): yield i iterator mitems*(node: var JsonNode): var JsonNode = ## Iterator for the items of `node`. `node` has to be a JArray. Items can be ## modified. - assert node.kind == JArray + assert node.kind == JArray, ": mitems() can not iterate a JsonNode of kind " & $node.kind for i in mitems(node.elems): yield i iterator pairs*(node: JsonNode): tuple[key: string, val: JsonNode] = ## Iterator for the child elements of `node`. `node` has to be a JObject. - assert node.kind == JObject - for key, val in items(node.fields): + assert node.kind == JObject, ": pairs() can not iterate a JsonNode of kind " & $node.kind + for key, val in pairs(node.fields): yield (key, val) -iterator mpairs*(node: var JsonNode): var tuple[key: string, val: JsonNode] = - ## Iterator for the child elements of `node`. `node` has to be a JObject. - ## Items can be modified - assert node.kind == JObject - for keyVal in mitems(node.fields): - yield keyVal +iterator keys*(node: JsonNode): string = + ## Iterator for the keys in `node`. `node` has to be a JObject. + assert node.kind == JObject, ": keys() can not iterate a JsonNode of kind " & $node.kind + for key in node.fields.keys: + yield key -proc eat(p: var JsonParser, tok: TTokKind) = - if p.tok == tok: discard getTok(p) - else: raiseParseErr(p, tokToStr[tok]) +iterator mpairs*(node: var JsonNode): tuple[key: string, val: var JsonNode] = + ## Iterator for the child elements of `node`. `node` has to be a JObject. + ## Values can be modified + assert node.kind == JObject, ": mpairs() can not iterate a JsonNode of kind " & $node.kind + for key, val in mpairs(node.fields): + yield (key, val) -proc parseJson(p: var JsonParser): JsonNode = +proc parseJson(p: var JsonParser; rawIntegers, rawFloats: bool, depth = 0): JsonNode = ## Parses JSON from a JSON Parser `p`. case p.tok of tkString: # we capture 'p.a' here, so we need to give it a fresh buffer afterwards: - result = newJStringMove(p.a) - p.a = "" + when defined(gcArc) or defined(gcOrc) or defined(gcAtomicArc): + result = JsonNode(kind: JString, str: move p.a) + else: + result = JsonNode(kind: JString) + shallowCopy(result.str, p.a) + p.a = "" discard getTok(p) of tkInt: - result = newJInt(parseBiggestInt(p.a)) + if rawIntegers: + result = newJRawNumber(p.a) + else: + try: + result = newJInt(parseBiggestInt(p.a)) + except ValueError: + result = newJRawNumber(p.a) discard getTok(p) of tkFloat: - result = newJFloat(parseFloat(p.a)) + if rawFloats: + result = newJRawNumber(p.a) + else: + try: + result = newJFloat(parseFloat(p.a)) + except ValueError: + result = newJRawNumber(p.a) discard getTok(p) of tkTrue: result = newJBool(true) @@ -962,66 +898,91 @@ proc parseJson(p: var JsonParser): JsonNode = result = newJNull() discard getTok(p) of tkCurlyLe: + if depth > DepthLimit: + raiseParseErr(p, "}") result = newJObject() discard getTok(p) while p.tok != tkCurlyRi: if p.tok != tkString: - raiseParseErr(p, "string literal as key expected") + raiseParseErr(p, "string literal as key") var key = p.a discard getTok(p) eat(p, tkColon) - var val = parseJson(p) + var val = parseJson(p, rawIntegers, rawFloats, depth+1) result[key] = val if p.tok != tkComma: break discard getTok(p) eat(p, tkCurlyRi) of tkBracketLe: + if depth > DepthLimit: + raiseParseErr(p, "]") result = newJArray() discard getTok(p) while p.tok != tkBracketRi: - result.add(parseJson(p)) + result.add(parseJson(p, rawIntegers, rawFloats, depth+1)) if p.tok != tkComma: break discard getTok(p) eat(p, tkBracketRi) of tkError, tkCurlyRi, tkBracketRi, tkColon, tkComma, tkEof: raiseParseErr(p, "{") -when not defined(js): - proc parseJson*(s: Stream, filename: string): JsonNode = - ## Parses from a stream `s` into a `JsonNode`. `filename` is only needed - ## for nice error messages. - var p: JsonParser - p.open(s, filename) +iterator parseJsonFragments*(s: Stream, filename: string = ""; rawIntegers = false, rawFloats = false): JsonNode = + ## Parses from a stream `s` into `JsonNodes`. `filename` is only needed + ## for nice error messages. + ## The JSON fragments are separated by whitespace. This can be substantially + ## faster than the comparable loop + ## `for x in splitWhitespace(s): yield parseJson(x)`. + ## This closes the stream `s` after it's done. + ## If `rawIntegers` is true, integer literals will not be converted to a `JInt` + ## field but kept as raw numbers via `JString`. + ## If `rawFloats` is true, floating point literals will not be converted to a `JFloat` + ## field but kept as raw numbers via `JString`. + var p: JsonParser + p.open(s, filename) + try: + discard getTok(p) # read first token + while p.tok != tkEof: + yield p.parseJson(rawIntegers, rawFloats) + finally: + p.close() + +proc parseJson*(s: Stream, filename: string = ""; rawIntegers = false, rawFloats = false): JsonNode = + ## Parses from a stream `s` into a `JsonNode`. `filename` is only needed + ## for nice error messages. + ## If `s` contains extra data, it will raise `JsonParsingError`. + ## This closes the stream `s` after it's done. + ## If `rawIntegers` is true, integer literals will not be converted to a `JInt` + ## field but kept as raw numbers via `JString`. + ## If `rawFloats` is true, floating point literals will not be converted to a `JFloat` + ## field but kept as raw numbers via `JString`. + var p: JsonParser + p.open(s, filename) + try: discard getTok(p) # read first token - result = p.parseJson() + result = p.parseJson(rawIntegers, rawFloats) + eat(p, tkEof) # check if there is no extra data + finally: p.close() - proc parseJson*(buffer: string): JsonNode = - ## Parses JSON from `buffer`. - result = parseJson(newStringStream(buffer), "input") +when defined(js): + from std/math import `mod` + from std/jsffi import JsObject, `[]`, to + from std/private/jsutils import getProtoName, isInteger, isSafeInteger - proc parseFile*(filename: string): JsonNode = - ## Parses `file` into a `JsonNode`. - var stream = newFileStream(filename, fmRead) - if stream == nil: - raise newException(IOError, "cannot read from file: " & filename) - result = parseJson(stream, filename) -else: - from math import `mod` - type - TJSObject = object - proc parseNativeJson(x: cstring): TJSObject {.importc: "JSON.parse".} + proc parseNativeJson(x: cstring): JsObject {.importjs: "JSON.parse(#)".} - proc getVarType(x): JsonNodeKind = + proc getVarType(x: JsObject, isRawNumber: var bool): JsonNodeKind = result = JNull - proc getProtoName(y): cstring - {.importc: "Object.prototype.toString.call".} case $getProtoName(x) # TODO: Implicit returns fail here. of "[object Array]": return JArray of "[object Object]": return JObject of "[object Number]": - if cast[float](x) mod 1.0 == 0: - return JInt + if isInteger(x) and 1.0 / cast[float](x) != -Inf: # preserve -0.0 as float + if isSafeInteger(x): + return JInt + else: + isRawNumber = true + return JString else: return JFloat of "[object Boolean]": return JBool @@ -1029,56 +990,384 @@ else: of "[object String]": return JString else: assert false - proc len(x: TJSObject): int = - assert x.getVarType == JArray - asm """ - return `x`.length; - """ - - proc `[]`(x: TJSObject, y: string): TJSObject = - assert x.getVarType == JObject - asm """ - return `x`[`y`]; - """ - - proc `[]`(x: TJSObject, y: int): TJSObject = - assert x.getVarType == JArray - asm """ - return `x`[`y`]; - """ - - proc convertObject(x: TJSObject): JsonNode = - case getVarType(x) + proc len(x: JsObject): int = + {.emit: """ + `result` = `x`.length; + """.} + + proc convertObject(x: JsObject): JsonNode = + var isRawNumber = false + case getVarType(x, isRawNumber) of JArray: result = newJArray() - for i in 0 .. <x.len: + for i in 0 ..< x.len: result.add(x[i].convertObject()) of JObject: result = newJObject() - asm """for (property in `x`) { + {.emit: """for (var property in `x`) { if (`x`.hasOwnProperty(property)) { - """ + """.} + var nimProperty: cstring - var nimValue: TJSObject - asm "`nimProperty` = property; `nimValue` = `x`[property];" + var nimValue: JsObject + {.emit: "`nimProperty` = property; `nimValue` = `x`[property];".} result[$nimProperty] = nimValue.convertObject() - asm "}}" + {.emit: "}}".} of JInt: - result = newJInt(cast[int](x)) + result = newJInt(x.to(int)) of JFloat: - result = newJFloat(cast[float](x)) + result = newJFloat(x.to(float)) of JString: - result = newJString($cast[cstring](x)) + # Dunno what to do with isUnquoted here + if isRawNumber: + var value: cstring + {.emit: "`value` = `x`.toString();".} + result = newJRawNumber($value) + else: + result = newJString($x.to(cstring)) of JBool: - result = newJBool(cast[bool](x)) + result = newJBool(x.to(bool)) of JNull: result = newJNull() proc parseJson*(buffer: string): JsonNode = - return parseNativeJson(buffer).convertObject() + when nimvm: + return parseJson(newStringStream(buffer), "input") + else: + return parseNativeJson(buffer).convertObject() + +else: + proc parseJson*(buffer: string; rawIntegers = false, rawFloats = false): JsonNode = + ## Parses JSON from `buffer`. + ## If `buffer` contains extra data, it will raise `JsonParsingError`. + ## If `rawIntegers` is true, integer literals will not be converted to a `JInt` + ## field but kept as raw numbers via `JString`. + ## If `rawFloats` is true, floating point literals will not be converted to a `JFloat` + ## field but kept as raw numbers via `JString`. + result = parseJson(newStringStream(buffer), "input", rawIntegers, rawFloats) + + proc parseFile*(filename: string): JsonNode = + ## Parses `file` into a `JsonNode`. + ## If `file` contains extra data, it will raise `JsonParsingError`. + var stream = newFileStream(filename, fmRead) + if stream == nil: + raise newException(IOError, "cannot read from file: " & filename) + result = parseJson(stream, filename, rawIntegers=false, rawFloats=false) + +# -- Json deserialiser. -- + +template verifyJsonKind(node: JsonNode, kinds: set[JsonNodeKind], + ast: string) = + if node == nil: + raise newException(KeyError, "key not found: " & ast) + elif node.kind notin kinds: + let msg = "Incorrect JSON kind. Wanted '$1' in '$2' but got '$3'." % [ + $kinds, + ast, + $node.kind + ] + raise newException(JsonKindError, msg) + +macro isRefSkipDistinct*(arg: typed): untyped = + ## internal only, do not use + var impl = getTypeImpl(arg) + if impl.kind == nnkBracketExpr and impl[0].eqIdent("typeDesc"): + impl = getTypeImpl(impl[1]) + while impl.kind == nnkDistinctTy: + impl = getTypeImpl(impl[0]) + result = newLit(impl.kind == nnkRefTy) + +# The following forward declarations don't work in older versions of Nim + +# forward declare all initFromJson + +proc initFromJson(dst: var string; jsonNode: JsonNode; jsonPath: var string) +proc initFromJson(dst: var bool; jsonNode: JsonNode; jsonPath: var string) +proc initFromJson(dst: var JsonNode; jsonNode: JsonNode; jsonPath: var string) +proc initFromJson[T: SomeInteger](dst: var T; jsonNode: JsonNode, jsonPath: var string) +proc initFromJson[T: SomeFloat](dst: var T; jsonNode: JsonNode; jsonPath: var string) +proc initFromJson[T: enum](dst: var T; jsonNode: JsonNode; jsonPath: var string) +proc initFromJson[T](dst: var seq[T]; jsonNode: JsonNode; jsonPath: var string) +proc initFromJson[S, T](dst: var array[S, T]; jsonNode: JsonNode; jsonPath: var string) +proc initFromJson[T](dst: var Table[string, T]; jsonNode: JsonNode; jsonPath: var string) +proc initFromJson[T](dst: var OrderedTable[string, T]; jsonNode: JsonNode; jsonPath: var string) +proc initFromJson[T](dst: var ref T; jsonNode: JsonNode; jsonPath: var string) +proc initFromJson[T](dst: var Option[T]; jsonNode: JsonNode; jsonPath: var string) +proc initFromJson[T: distinct](dst: var T; jsonNode: JsonNode; jsonPath: var string) +proc initFromJson[T: object|tuple](dst: var T; jsonNode: JsonNode; jsonPath: var string) + +# initFromJson definitions + +proc initFromJson(dst: var string; jsonNode: JsonNode; jsonPath: var string) = + verifyJsonKind(jsonNode, {JString, JNull}, jsonPath) + # since strings don't have a nil state anymore, this mapping of + # JNull to the default string is questionable. `none(string)` and + # `some("")` have the same potentional json value `JNull`. + if jsonNode.kind == JNull: + dst = "" + else: + dst = jsonNode.str + +proc initFromJson(dst: var bool; jsonNode: JsonNode; jsonPath: var string) = + verifyJsonKind(jsonNode, {JBool}, jsonPath) + dst = jsonNode.bval + +proc initFromJson(dst: var JsonNode; jsonNode: JsonNode; jsonPath: var string) = + if jsonNode == nil: + raise newException(KeyError, "key not found: " & jsonPath) + dst = jsonNode.copy + +proc initFromJson[T: SomeInteger](dst: var T; jsonNode: JsonNode, jsonPath: var string) = + when T is uint|uint64 or int.sizeof == 4: + verifyJsonKind(jsonNode, {JInt, JString}, jsonPath) + case jsonNode.kind + of JString: + let x = parseBiggestUInt(jsonNode.str) + dst = cast[T](x) + else: + dst = T(jsonNode.num) + else: + verifyJsonKind(jsonNode, {JInt}, jsonPath) + dst = cast[T](jsonNode.num) + +proc initFromJson[T: SomeFloat](dst: var T; jsonNode: JsonNode; jsonPath: var string) = + verifyJsonKind(jsonNode, {JInt, JFloat, JString}, jsonPath) + if jsonNode.kind == JString: + case jsonNode.str + of "nan": + let b = NaN + dst = T(b) + # dst = NaN # would fail some tests because range conversions would cause CT error + # in some cases; but this is not a hot-spot inside this branch and backend can optimize this. + of "inf": + let b = Inf + dst = T(b) + of "-inf": + let b = -Inf + dst = T(b) + else: raise newException(JsonKindError, "expected 'nan|inf|-inf', got " & jsonNode.str) + else: + if jsonNode.kind == JFloat: + dst = T(jsonNode.fnum) + else: + dst = T(jsonNode.num) + +proc initFromJson[T: enum](dst: var T; jsonNode: JsonNode; jsonPath: var string) = + verifyJsonKind(jsonNode, {JString}, jsonPath) + dst = parseEnum[T](jsonNode.getStr) + +proc initFromJson[T](dst: var seq[T]; jsonNode: JsonNode; jsonPath: var string) = + verifyJsonKind(jsonNode, {JArray}, jsonPath) + dst.setLen jsonNode.len + let orignalJsonPathLen = jsonPath.len + for i in 0 ..< jsonNode.len: + jsonPath.add '[' + jsonPath.addInt i + jsonPath.add ']' + initFromJson(dst[i], jsonNode[i], jsonPath) + jsonPath.setLen orignalJsonPathLen + +proc initFromJson[S,T](dst: var array[S,T]; jsonNode: JsonNode; jsonPath: var string) = + verifyJsonKind(jsonNode, {JArray}, jsonPath) + let originalJsonPathLen = jsonPath.len + for i in 0 ..< jsonNode.len: + jsonPath.add '[' + jsonPath.addInt i + jsonPath.add ']' + initFromJson(dst[i.S], jsonNode[i], jsonPath) # `.S` for enum indexed arrays + jsonPath.setLen originalJsonPathLen + +proc initFromJson[T](dst: var Table[string,T]; jsonNode: JsonNode; jsonPath: var string) = + dst = initTable[string, T]() + verifyJsonKind(jsonNode, {JObject}, jsonPath) + let originalJsonPathLen = jsonPath.len + for key in keys(jsonNode.fields): + jsonPath.add '.' + jsonPath.add key + initFromJson(mgetOrPut(dst, key, default(T)), jsonNode[key], jsonPath) + jsonPath.setLen originalJsonPathLen + +proc initFromJson[T](dst: var OrderedTable[string,T]; jsonNode: JsonNode; jsonPath: var string) = + dst = initOrderedTable[string,T]() + verifyJsonKind(jsonNode, {JObject}, jsonPath) + let originalJsonPathLen = jsonPath.len + for key in keys(jsonNode.fields): + jsonPath.add '.' + jsonPath.add key + initFromJson(mgetOrPut(dst, key, default(T)), jsonNode[key], jsonPath) + jsonPath.setLen originalJsonPathLen + +proc initFromJson[T](dst: var ref T; jsonNode: JsonNode; jsonPath: var string) = + verifyJsonKind(jsonNode, {JObject, JNull}, jsonPath) + if jsonNode.kind == JNull: + dst = nil + else: + dst = new(T) + initFromJson(dst[], jsonNode, jsonPath) + +proc initFromJson[T](dst: var Option[T]; jsonNode: JsonNode; jsonPath: var string) = + if jsonNode != nil and jsonNode.kind != JNull: + when T is ref: + dst = some(new(T)) + else: + dst = some(default(T)) + initFromJson(dst.get, jsonNode, jsonPath) + +macro assignDistinctImpl[T: distinct](dst: var T;jsonNode: JsonNode; jsonPath: var string) = + let typInst = getTypeInst(dst) + let typImpl = getTypeImpl(dst) + let baseTyp = typImpl[0] + + result = quote do: + initFromJson(`baseTyp`(`dst`), `jsonNode`, `jsonPath`) + +proc initFromJson[T: distinct](dst: var T; jsonNode: JsonNode; jsonPath: var string) = + assignDistinctImpl(dst, jsonNode, jsonPath) + +proc detectIncompatibleType(typeExpr, lineinfoNode: NimNode) = + if typeExpr.kind == nnkTupleConstr: + error("Use a named tuple instead of: " & typeExpr.repr, lineinfoNode) + +proc foldObjectBody(dst, typeNode, tmpSym, jsonNode, jsonPath, originalJsonPathLen: NimNode) = + case typeNode.kind + of nnkEmpty: + discard + of nnkRecList, nnkTupleTy: + for it in typeNode: + foldObjectBody(dst, it, tmpSym, jsonNode, jsonPath, originalJsonPathLen) + + of nnkIdentDefs: + typeNode.expectLen 3 + let fieldSym = typeNode[0] + let fieldNameLit = newLit(fieldSym.strVal) + let fieldPathLit = newLit("." & fieldSym.strVal) + let fieldType = typeNode[1] + + # Detecting incompatiple tuple types in `assignObjectImpl` only + # would be much cleaner, but the ast for tuple types does not + # contain usable type information. + detectIncompatibleType(fieldType, fieldSym) + + dst.add quote do: + jsonPath.add `fieldPathLit` + when nimvm: + when isRefSkipDistinct(`tmpSym`.`fieldSym`): + # workaround #12489 + var tmp: `fieldType` + initFromJson(tmp, getOrDefault(`jsonNode`,`fieldNameLit`), `jsonPath`) + `tmpSym`.`fieldSym` = tmp + else: + initFromJson(`tmpSym`.`fieldSym`, getOrDefault(`jsonNode`,`fieldNameLit`), `jsonPath`) + else: + initFromJson(`tmpSym`.`fieldSym`, getOrDefault(`jsonNode`,`fieldNameLit`), `jsonPath`) + jsonPath.setLen `originalJsonPathLen` + + of nnkRecCase: + let kindSym = typeNode[0][0] + let kindNameLit = newLit(kindSym.strVal) + let kindPathLit = newLit("." & kindSym.strVal) + let kindType = typeNode[0][1] + let kindOffsetLit = newLit(uint(getOffset(kindSym))) + dst.add quote do: + var kindTmp: `kindType` + jsonPath.add `kindPathLit` + initFromJson(kindTmp, `jsonNode`[`kindNameLit`], `jsonPath`) + jsonPath.setLen `originalJsonPathLen` + when defined js: + `tmpSym`.`kindSym` = kindTmp + else: + when nimvm: + `tmpSym`.`kindSym` = kindTmp + else: + # fuck it, assign kind field anyway + ((cast[ptr `kindType`](cast[uint](`tmpSym`.addr) + `kindOffsetLit`))[]) = kindTmp + dst.add nnkCaseStmt.newTree(nnkDotExpr.newTree(tmpSym, kindSym)) + for i in 1 ..< typeNode.len: + foldObjectBody(dst, typeNode[i], tmpSym, jsonNode, jsonPath, originalJsonPathLen) + + of nnkOfBranch, nnkElse: + let ofBranch = newNimNode(typeNode.kind) + for i in 0 ..< typeNode.len-1: + ofBranch.add copyNimTree(typeNode[i]) + let dstInner = newNimNode(nnkStmtListExpr) + foldObjectBody(dstInner, typeNode[^1], tmpSym, jsonNode, jsonPath, originalJsonPathLen) + # resOuter now contains the inner stmtList + ofBranch.add dstInner + dst[^1].expectKind nnkCaseStmt + dst[^1].add ofBranch + + of nnkObjectTy: + typeNode[0].expectKind nnkEmpty + typeNode[1].expectKind {nnkEmpty, nnkOfInherit} + if typeNode[1].kind == nnkOfInherit: + let base = typeNode[1][0] + var impl = getTypeImpl(base) + while impl.kind in {nnkRefTy, nnkPtrTy}: + impl = getTypeImpl(impl[0]) + foldObjectBody(dst, impl, tmpSym, jsonNode, jsonPath, originalJsonPathLen) + let body = typeNode[2] + foldObjectBody(dst, body, tmpSym, jsonNode, jsonPath, originalJsonPathLen) + + else: + error("unhandled kind: " & $typeNode.kind, typeNode) + +macro assignObjectImpl[T](dst: var T; jsonNode: JsonNode; jsonPath: var string) = + let typeSym = getTypeInst(dst) + let originalJsonPathLen = genSym(nskLet, "originalJsonPathLen") + result = newStmtList() + result.add quote do: + let `originalJsonPathLen` = len(`jsonPath`) + if typeSym.kind in {nnkTupleTy, nnkTupleConstr}: + # both, `dst` and `typeSym` don't have good lineinfo. But nothing + # else is available here. + detectIncompatibleType(typeSym, dst) + foldObjectBody(result, typeSym, dst, jsonNode, jsonPath, originalJsonPathLen) + else: + foldObjectBody(result, typeSym.getTypeImpl, dst, jsonNode, jsonPath, originalJsonPathLen) + +proc initFromJson[T: object|tuple](dst: var T; jsonNode: JsonNode; jsonPath: var string) = + assignObjectImpl(dst, jsonNode, jsonPath) + +proc to*[T](node: JsonNode, t: typedesc[T]): T = + ## `Unmarshals`:idx: the specified node into the object type specified. + ## + ## Known limitations: + ## + ## * Heterogeneous arrays are not supported. + ## * Sets in object variants are not supported. + ## * Not nil annotations are not supported. + ## + runnableExamples: + let jsonNode = parseJson(""" + { + "person": { + "name": "Nimmer", + "age": 21 + }, + "list": [1, 2, 3, 4] + } + """) + + type + Person = object + name: string + age: int + + Data = object + person: Person + list: seq[int] + + var data = to(jsonNode, Data) + doAssert data.person.name == "Nimmer" + doAssert data.person.age == 21 + doAssert data.list == @[1, 2, 3, 4] + + var jsonPath = "" + result = default(T) + initFromJson(result, node, jsonPath) when false: - import os + import std/os var s = newFileStream(paramStr(1), fmRead) if s == nil: quit("cannot open the file" & paramStr(1)) var x: JsonParser @@ -1103,82 +1392,3 @@ when false: # { "json": 5 } # To get that we shall use, obj["json"] - -when isMainModule: - #var node = parse("{ \"test\": null }") - #echo(node.existsKey("test56")) - var parsed = parseFile("tests/testdata/jsontest.json") - var parsed2 = parseFile("tests/testdata/jsontest2.json") - echo(parsed) - echo() - echo(pretty(parsed, 2)) - echo() - echo(parsed["keyÄÖöoßß"]) - echo() - echo(pretty(parsed2)) - try: - echo(parsed["key2"][12123]) - raise newException(ValueError, "That line was expected to fail") - except IndexError: echo() - - let testJson = parseJson"""{ "a": [1, 2, 3, 4], "b": "asd" }""" - # nil passthrough - assert(testJson{"doesnt_exist"}{"anything"}.isNil) - testJson{["c", "d"]} = %true - assert(testJson["c"]["d"].bval) - - # Bounds checking - try: - let a = testJson["a"][9] - assert(false, "EInvalidIndex not thrown") - except IndexError: - discard - try: - let a = testJson["a"][-1] - assert(false, "EInvalidIndex not thrown") - except IndexError: - discard - try: - assert(testJson["a"][0].num == 1, "Index doesn't correspond to its value") - except: - assert(false, "EInvalidIndex thrown for valid index") - - # Generator: - var j = %* [{"name": "John", "age": 30}, {"name": "Susan", "age": 31}] - assert j == %[%{"name": %"John", "age": %30}, %{"name": %"Susan", "age": %31}] - - var j2 = %* - [ - { - "name": "John", - "age": 30 - }, - { - "name": "Susan", - "age": 31 - } - ] - assert j2 == %[%{"name": %"John", "age": %30}, %{"name": %"Susan", "age": %31}] - - var name = "John" - let herAge = 30 - const hisAge = 31 - - var j3 = %* - [ { "name": "John" - , "age": herAge - } - , { "name": "Susan" - , "age": hisAge - } - ] - assert j3 == %[%{"name": %"John", "age": %30}, %{"name": %"Susan", "age": %31}] - - discard """ - while true: - var json = stdin.readLine() - var node = parse(json) - echo(node) - echo() - echo() - """ diff --git a/lib/pure/lenientops.nim b/lib/pure/lenientops.nim new file mode 100644 index 000000000..a8fc78e39 --- /dev/null +++ b/lib/pure/lenientops.nim @@ -0,0 +1,58 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2017 Nim contributors +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module offers implementations of common binary operations +## like `+`, `-`, `*`, `/` and comparison operations, +## which work for mixed float/int operands. +## All operations convert the integer operand into the +## type of the float operand. For numerical expressions, the return +## type is always the type of the float involved in the expression, +## i.e., there is no auto conversion from float32 to float64. +## +## **Note:** In general, auto-converting from int to float loses +## information, which is why these operators live in a separate +## module. Use with care. +## +## Regarding binary comparison, this module only provides unequal operators. +## The equality operator `==` is omitted, because depending on the use case +## either casting to float or rounding to int might be preferred, and users +## should make an explicit choice. + +func `+`*[I: SomeInteger, F: SomeFloat](i: I, f: F): F {.inline.} = + F(i) + f +func `+`*[I: SomeInteger, F: SomeFloat](f: F, i: I): F {.inline.} = + f + F(i) + +func `-`*[I: SomeInteger, F: SomeFloat](i: I, f: F): F {.inline.} = + F(i) - f +func `-`*[I: SomeInteger, F: SomeFloat](f: F, i: I): F {.inline.} = + f - F(i) + +func `*`*[I: SomeInteger, F: SomeFloat](i: I, f: F): F {.inline.} = + F(i) * f +func `*`*[I: SomeInteger, F: SomeFloat](f: F, i: I): F {.inline.} = + f * F(i) + +func `/`*[I: SomeInteger, F: SomeFloat](i: I, f: F): F {.inline.} = + F(i) / f +func `/`*[I: SomeInteger, F: SomeFloat](f: F, i: I): F {.inline.} = + f / F(i) + +func `<`*[I: SomeInteger, F: SomeFloat](i: I, f: F): bool {.inline.} = + F(i) < f +func `<`*[I: SomeInteger, F: SomeFloat](f: F, i: I): bool {.inline.} = + f < F(i) +func `<=`*[I: SomeInteger, F: SomeFloat](i: I, f: F): bool {.inline.} = + F(i) <= f +func `<=`*[I: SomeInteger, F: SomeFloat](f: F, i: I): bool {.inline.} = + f <= F(i) + +# Note that we must not defined `>=` and `>`, because system.nim already has a +# template with signature (x, y: untyped): untyped, which would lead to +# ambiguous calls. diff --git a/lib/pure/lexbase.nim b/lib/pure/lexbase.nim index 23a87d9f8..1efd97b24 100644 --- a/lib/pure/lexbase.nim +++ b/lib/pure/lexbase.nim @@ -1,6 +1,6 @@ # # -# The Nim Compiler +# Nim's Runtime Library # (c) Copyright 2009 Andreas Rumpf # # See the file "copying.txt", included in this @@ -12,10 +12,13 @@ ## needs refilling. import - strutils, streams + std/[strutils, streams] + +when defined(nimPreviewSlimSystem): + import std/assertions const - EndOfFile* = '\0' ## end of file marker + EndOfFile* = '\0' ## end of file marker NewLines* = {'\c', '\L'} # Buffer handling: @@ -27,45 +30,17 @@ const type BaseLexer* = object of RootObj ## the base lexer. Inherit your lexer from ## this object. - bufpos*: int ## the current position within the buffer - buf*: cstring ## the buffer itself - bufLen*: int ## length of buffer in characters - input: Stream ## the input stream - lineNumber*: int ## the current line number + bufpos*: int ## the current position within the buffer + buf*: string ## the buffer itself + input: Stream ## the input stream + lineNumber*: int ## the current line number sentinel: int - lineStart: int # index of last line start in buffer - fileOpened: bool - -{.deprecated: [TBaseLexer: BaseLexer].} + lineStart: int # index of last line start in buffer + offsetBase*: int # use `offsetBase + bufpos` to get the offset + refillChars: set[char] -proc open*(L: var BaseLexer, input: Stream, bufLen: int = 8192) - ## inits the TBaseLexer with a stream to read from - -proc close*(L: var BaseLexer) +proc close*(L: var BaseLexer) = ## closes the base lexer. This closes `L`'s associated stream too. - -proc getCurrentLine*(L: BaseLexer, marker: bool = true): string - ## retrieves the current line. - -proc getColNumber*(L: BaseLexer, pos: int): int - ## retrieves the current column. - -proc handleCR*(L: var BaseLexer, pos: int): int - ## Call this if you scanned over '\c' in the buffer; it returns the the - ## position to continue the scanning from. `pos` must be the position - ## of the '\c'. -proc handleLF*(L: var BaseLexer, pos: int): int - ## Call this if you scanned over '\L' in the buffer; it returns the the - ## position to continue the scanning from. `pos` must be the position - ## of the '\L'. - -# implementation - -const - chrSize = sizeof(char) - -proc close(L: var BaseLexer) = - dealloc(L.buf) close(L.input) proc fillBuffer(L: var BaseLexer) = @@ -76,24 +51,32 @@ proc fillBuffer(L: var BaseLexer) = oldBufLen: int # we know here that pos == L.sentinel, but not if this proc # is called the first time by initBaseLexer() - assert(L.sentinel < L.bufLen) - toCopy = L.bufLen - L.sentinel - 1 + assert(L.sentinel + 1 <= L.buf.len) + toCopy = L.buf.len - (L.sentinel + 1) assert(toCopy >= 0) if toCopy > 0: - moveMem(L.buf, addr(L.buf[L.sentinel + 1]), toCopy * chrSize) - # "moveMem" handles overlapping regions - charsRead = readData(L.input, addr(L.buf[toCopy]), - (L.sentinel + 1) * chrSize) div chrSize + when defined(js) or defined(nimscript): + # nimscript has to be here to avoid compiling other branch (moveMem) + for i in 0 ..< toCopy: + L.buf[i] = L.buf[L.sentinel + 1 + i] + else: + when nimvm: + for i in 0 ..< toCopy: + L.buf[i] = L.buf[L.sentinel + 1 + i] + else: + # "moveMem" handles overlapping regions + moveMem(addr L.buf[0], addr L.buf[L.sentinel + 1], toCopy) + charsRead = L.input.readDataStr(L.buf, toCopy ..< toCopy + L.sentinel + 1) s = toCopy + charsRead if charsRead < L.sentinel + 1: - L.buf[s] = EndOfFile # set end marker + L.buf[s] = EndOfFile # set end marker L.sentinel = s else: # compute sentinel: - dec(s) # BUGFIX (valgrind) + dec(s) # BUGFIX (valgrind) while true: - assert(s < L.bufLen) - while (s >= 0) and not (L.buf[s] in NewLines): dec(s) + assert(s < L.buf.len) + while s >= 0 and L.buf[s] notin L.refillChars: dec(s) if s >= 0: # we found an appropriate character for a sentinel: L.sentinel = s @@ -101,62 +84,78 @@ proc fillBuffer(L: var BaseLexer) = else: # rather than to give up here because the line is too long, # double the buffer's size and try again: - oldBufLen = L.bufLen - L.bufLen = L.bufLen * 2 - L.buf = cast[cstring](realloc(L.buf, L.bufLen * chrSize)) - assert(L.bufLen - oldBufLen == oldBufLen) - charsRead = readData(L.input, addr(L.buf[oldBufLen]), - oldBufLen * chrSize) div chrSize + oldBufLen = L.buf.len + L.buf.setLen(L.buf.len * 2) + charsRead = readDataStr(L.input, L.buf, oldBufLen ..< L.buf.len) if charsRead < oldBufLen: L.buf[oldBufLen + charsRead] = EndOfFile L.sentinel = oldBufLen + charsRead break - s = L.bufLen - 1 + s = L.buf.len - 1 proc fillBaseLexer(L: var BaseLexer, pos: int): int = assert(pos <= L.sentinel) if pos < L.sentinel: - result = pos + 1 # nothing to do + result = pos + 1 # nothing to do else: fillBuffer(L) - L.bufpos = 0 # XXX: is this really correct? + L.offsetBase += pos + L.bufpos = 0 result = 0 - L.lineStart = result -proc handleCR(L: var BaseLexer, pos: int): int = +proc handleCR*(L: var BaseLexer, pos: int): int = + ## Call this if you scanned over `'\c'` in the buffer; it returns the + ## position to continue the scanning from. `pos` must be the position + ## of the `'\c'`. assert(L.buf[pos] == '\c') inc(L.lineNumber) result = fillBaseLexer(L, pos) if L.buf[result] == '\L': result = fillBaseLexer(L, result) + L.lineStart = result -proc handleLF(L: var BaseLexer, pos: int): int = +proc handleLF*(L: var BaseLexer, pos: int): int = + ## Call this if you scanned over `'\L'` in the buffer; it returns the + ## position to continue the scanning from. `pos` must be the position + ## of the `'\L'`. assert(L.buf[pos] == '\L') inc(L.lineNumber) result = fillBaseLexer(L, pos) #L.lastNL := result-1; // BUGFIX: was: result; + L.lineStart = result + +proc handleRefillChar*(L: var BaseLexer, pos: int): int = + ## Call this if a terminator character other than a new line is scanned + ## at `pos`; it returns the position to continue the scanning from. + assert(L.buf[pos] in L.refillChars) + result = fillBaseLexer(L, pos) #L.lastNL := result-1; // BUGFIX: was: result; proc skipUtf8Bom(L: var BaseLexer) = if (L.buf[0] == '\xEF') and (L.buf[1] == '\xBB') and (L.buf[2] == '\xBF'): inc(L.bufpos, 3) inc(L.lineStart, 3) -proc open(L: var BaseLexer, input: Stream, bufLen: int = 8192) = +proc open*(L: var BaseLexer, input: Stream, bufLen: int = 8192; + refillChars: set[char] = NewLines) = + ## inits the BaseLexer with a stream to read from. assert(bufLen > 0) assert(input != nil) L.input = input L.bufpos = 0 - L.bufLen = bufLen - L.buf = cast[cstring](alloc(bufLen * chrSize)) + L.offsetBase = 0 + L.refillChars = refillChars + L.buf = newString(bufLen) L.sentinel = bufLen - 1 L.lineStart = 0 - L.lineNumber = 1 # lines start at 1 + L.lineNumber = 1 # lines start at 1 fillBuffer(L) skipUtf8Bom(L) -proc getColNumber(L: BaseLexer, pos: int): int = +proc getColNumber*(L: BaseLexer, pos: int): int = + ## retrieves the current column. result = abs(pos - L.lineStart) -proc getCurrentLine(L: BaseLexer, marker: bool = true): string = +proc getCurrentLine*(L: BaseLexer, marker: bool = true): string = + ## retrieves the current line. var i: int result = "" i = L.lineStart @@ -166,4 +165,3 @@ proc getCurrentLine(L: BaseLexer, marker: bool = true): string = add(result, "\n") if marker: add(result, spaces(getColNumber(L, L.bufpos)) & "^\n") - diff --git a/lib/pure/logging.nim b/lib/pure/logging.nim index ca674af4b..c30f68af8 100644 --- a/lib/pure/logging.nim +++ b/lib/pure/logging.nim @@ -7,91 +7,311 @@ # distribution, for details about the copyright. # -## This module implements a simple logger. It has been designed to be as simple -## as possible to avoid bloat, if this library does not fulfill your needs, -## write your own. +## This module implements a simple logger. ## -## Format strings support the following variables which must be prefixed with -## the dollar operator (``$``): +## It has been designed to be as simple as possible to avoid bloat. +## If this library does not fulfill your needs, write your own. +## +## Basic usage +## =========== +## +## To get started, first create a logger: +## +## ```Nim +## import std/logging +## +## var logger = newConsoleLogger() +## ``` +## +## The logger that was created above logs to the console, but this module +## also provides loggers that log to files, such as the +## `FileLogger<#FileLogger>`_. Creating custom loggers is also possible by +## inheriting from the `Logger<#Logger>`_ type. +## +## Once a logger has been created, call its `log proc +## <#log.e,ConsoleLogger,Level,varargs[string,]>`_ to log a message: +## +## ```Nim +## logger.log(lvlInfo, "a log message") +## # Output: INFO a log message +## ``` +## +## The ``INFO`` within the output is the result of a format string being +## prepended to the message, and it will differ depending on the message's +## level. Format strings are `explained in more detail +## here<#basic-usage-format-strings>`_. +## +## There are six logging levels: debug, info, notice, warn, error, and fatal. +## They are described in more detail within the `Level enum's documentation +## <#Level>`_. A message is logged if its level is at or above both the logger's +## ``levelThreshold`` field and the global log filter. The latter can be changed +## with the `setLogFilter proc<#setLogFilter,Level>`_. +## +## .. warning:: +## For loggers that log to a console or to files, only error and fatal +## messages will cause their output buffers to be flushed immediately by default. +## set ``flushThreshold`` when creating the logger to change this. +## +## Handlers +## -------- +## +## When using multiple loggers, calling the log proc for each logger can +## become repetitive. Instead of doing that, register each logger that will be +## used with the `addHandler proc<#addHandler,Logger>`_, which is demonstrated +## in the following example: +## +## ```Nim +## import std/logging +## +## var consoleLog = newConsoleLogger() +## var fileLog = newFileLogger("errors.log", levelThreshold=lvlError) +## var rollingLog = newRollingFileLogger("rolling.log") +## +## addHandler(consoleLog) +## addHandler(fileLog) +## addHandler(rollingLog) +## ``` +## +## After doing this, use either the `log template +## <#log.t,Level,varargs[string,]>`_ or one of the level-specific templates, +## such as the `error template<#error.t,varargs[string,]>`_, to log messages +## to all registered handlers at once. +## +## ```Nim +## # This example uses the loggers created above +## log(lvlError, "an error occurred") +## error("an error occurred") # Equivalent to the above line +## info("something normal happened") # Will not be written to errors.log +## ``` +## +## Note that a message's level is still checked against each handler's +## ``levelThreshold`` and the global log filter. +## +## Format strings +## -------------- +## +## Log messages are prefixed with format strings. These strings contain +## placeholders for variables, such as ``$time``, that are replaced with their +## corresponding values, such as the current time, before they are prepended to +## a log message. Characters that are not part of variables are unaffected. +## +## The format string used by a logger can be specified by providing the `fmtStr` +## argument when creating the logger or by setting its `fmtStr` field afterward. +## If not specified, the `default format string<#defaultFmtStr>`_ is used. +## +## The following variables, which must be prefixed with a dollar sign (``$``), +## are available: ## ## ============ ======================= -## Operator Output +## Variable Output ## ============ ======================= ## $date Current date ## $time Current time -## $app ``os.getAppFilename()`` +## $datetime $dateT$time +## $app `os.getAppFilename()<os.html#getAppFilename>`_ +## $appname Base name of ``$app`` +## $appdir Directory name of ``$app`` +## $levelid First letter of log level +## $levelname Log level name ## ============ ======================= ## +## Note that ``$app``, ``$appname``, and ``$appdir`` are not supported when +## using the JavaScript backend. +## +## The following example illustrates how to use format strings: +## +## ```Nim +## import std/logging ## -## The following example demonstrates logging to three different handlers -## simultaneously: +## var logger = newConsoleLogger(fmtStr="[$time] - $levelname: ") +## logger.log(lvlInfo, "this is a message") +## # Output: [19:50:13] - INFO: this is a message +## ``` ## -## .. code-block:: nim +## Notes when using multiple threads +## --------------------------------- ## -## var L = newConsoleLogger() -## var fL = newFileLogger("test.log", fmtStr = verboseFmtStr) -## var rL = newRollingFileLogger("rolling.log", fmtStr = verboseFmtStr) -## addHandler(L) -## addHandler(fL) -## addHandler(rL) -## info("920410:52 accepted") -## warn("4 8 15 16 23 4-- Error") -## error("922044:16 SYSTEM FAILURE") -## fatal("SYSTEM FAILURE SYSTEM FAILURE") +## There are a few details to keep in mind when using this module within +## multiple threads: +## * The global log filter is actually a thread-local variable, so it needs to +## be set in each thread that uses this module. +## * The list of registered handlers is also a thread-local variable. If a +## handler will be used in multiple threads, it needs to be registered in +## each of those threads. ## -## **Warning:** The global list of handlers is a thread var, this means that -## the handlers must be re-added in each thread. +## See also +## ======== +## * `strutils module<strutils.html>`_ for common string functions +## * `strformat module<strformat.html>`_ for string interpolation and formatting +## * `strscans module<strscans.html>`_ for ``scanf`` and ``scanp`` macros, which +## offer easier substring extraction than regular expressions -import strutils, os, times +import std/[strutils, times] +when not defined(js): + import std/os + +when defined(nimPreviewSlimSystem): + import std/syncio type - Level* = enum ## logging level - lvlAll, ## all levels active - lvlDebug, ## debug level (and any above) active - lvlInfo, ## info level (and any above) active - lvlWarn, ## warn level (and any above) active - lvlError, ## error level (and any above) active - lvlFatal, ## fatal level (and any above) active - lvlNone ## no levels active + Level* = enum ## \ + ## Enumeration of logging levels. + ## + ## Debug messages represent the lowest logging level, and fatal error + ## messages represent the highest logging level. ``lvlAll`` can be used + ## to enable all messages, while ``lvlNone`` can be used to disable all + ## messages. + ## + ## Typical usage for each logging level, from lowest to highest, is + ## described below: + ## + ## * **Debug** - debugging information helpful only to developers + ## * **Info** - anything associated with normal operation and without + ## any particular importance + ## * **Notice** - more important information that users should be + ## notified about + ## * **Warn** - impending problems that require some attention + ## * **Error** - error conditions that the application can recover from + ## * **Fatal** - fatal errors that prevent the application from continuing + ## + ## It is completely up to the application how to utilize each level. + ## + ## Individual loggers have a ``levelThreshold`` field that filters out + ## any messages with a level lower than the threshold. There is also + ## a global filter that applies to all log messages, and it can be changed + ## using the `setLogFilter proc<#setLogFilter,Level>`_. + lvlAll, ## All levels active + lvlDebug, ## Debug level and above are active + lvlInfo, ## Info level and above are active + lvlNotice, ## Notice level and above are active + lvlWarn, ## Warn level and above are active + lvlError, ## Error level and above are active + lvlFatal, ## Fatal level and above are active + lvlNone ## No levels active; nothing is logged const - LevelNames*: array [Level, string] = [ - "DEBUG", "DEBUG", "INFO", "WARN", "ERROR", "FATAL", "NONE" - ] - - defaultFmtStr* = "" ## default string between log level and message per logger - verboseFmtStr* = "$date $time " + LevelNames*: array[Level, string] = [ + "DEBUG", "DEBUG", "INFO", "NOTICE", "WARN", "ERROR", "FATAL", "NONE" + ] ## Array of strings representing each logging level. + + defaultFmtStr* = "$levelname " ## The default format string. + verboseFmtStr* = "$levelid, [$datetime] -- $appname: " ## \ + ## A more verbose format string. + ## + ## This string can be passed as the ``frmStr`` argument to procs that create + ## new loggers, such as the `newConsoleLogger proc<#newConsoleLogger>`_. + ## + ## If a different format string is preferred, refer to the + ## `documentation about format strings<#basic-usage-format-strings>`_ + ## for more information, including a list of available variables. + defaultFlushThreshold = when NimMajor >= 2: + when defined(nimV1LogFlushBehavior): lvlError else: lvlAll + else: + when defined(nimFlushAllLogs): lvlAll else: lvlError + ## The threshold above which log messages to file-like loggers + ## are automatically flushed. + ## + ## By default, only error and fatal messages are logged, + ## but defining ``-d:nimFlushAllLogs`` will make all levels be flushed type - Logger* = ref object of RootObj ## abstract logger; the base type of all loggers - levelThreshold*: Level ## only messages of level >= levelThreshold - ## should be processed - fmtStr: string ## = defaultFmtStr by default, see substituteLog for $date etc. - - ConsoleLogger* = ref object of Logger ## logger that writes the messages to the - ## console - - FileLogger* = ref object of Logger ## logger that writes the messages to a file - f: File - - RollingFileLogger* = ref object of FileLogger ## logger that writes the - ## messages to a file and - ## performs log rotation - maxLines: int # maximum number of lines - curLine : int - baseName: string # initial filename - baseMode: FileMode # initial file mode - logFiles: int # how many log files already created, e.g. basename.1, basename.2... - -{.deprecated: [TLevel: Level, PLogger: Logger, PConsoleLogger: ConsoleLogger, - PFileLogger: FileLogger, PRollingFileLogger: RollingFileLogger].} - -proc substituteLog(frmt: string): string = - ## converts $date to the current date - ## converts $time to the current time - ## converts $app to getAppFilename() - ## converts - result = newStringOfCap(frmt.len + 20) + Logger* = ref object of RootObj + ## The abstract base type of all loggers. + ## + ## Custom loggers should inherit from this type. They should also provide + ## their own implementation of the + ## `log method<#log.e,Logger,Level,varargs[string,]>`_. + ## + ## See also: + ## * `ConsoleLogger<#ConsoleLogger>`_ + ## * `FileLogger<#FileLogger>`_ + ## * `RollingFileLogger<#RollingFileLogger>`_ + levelThreshold*: Level ## Only messages that are at or above this + ## threshold will be logged + fmtStr*: string ## Format string to prepend to each log message; + ## defaultFmtStr is the default + + ConsoleLogger* = ref object of Logger + ## A logger that writes log messages to the console. + ## + ## Create a new ``ConsoleLogger`` with the `newConsoleLogger proc + ## <#newConsoleLogger>`_. + ## + ## See also: + ## * `FileLogger<#FileLogger>`_ + ## * `RollingFileLogger<#RollingFileLogger>`_ + useStderr*: bool ## If true, writes to stderr; otherwise, writes to stdout + flushThreshold*: Level ## Only messages that are at or above this + ## threshold will be flushed immediately + +when not defined(js): + type + FileLogger* = ref object of Logger + ## A logger that writes log messages to a file. + ## + ## Create a new ``FileLogger`` with the `newFileLogger proc + ## <#newFileLogger,File>`_. + ## + ## **Note:** This logger is not available for the JavaScript backend. + ## + ## See also: + ## * `ConsoleLogger<#ConsoleLogger>`_ + ## * `RollingFileLogger<#RollingFileLogger>`_ + file*: File ## The wrapped file + flushThreshold*: Level ## Only messages that are at or above this + ## threshold will be flushed immediately + + RollingFileLogger* = ref object of FileLogger + ## A logger that writes log messages to a file while performing log + ## rotation. + ## + ## Create a new ``RollingFileLogger`` with the `newRollingFileLogger proc + ## <#newRollingFileLogger,FileMode,Positive,int>`_. + ## + ## **Note:** This logger is not available for the JavaScript backend. + ## + ## See also: + ## * `ConsoleLogger<#ConsoleLogger>`_ + ## * `FileLogger<#FileLogger>`_ + maxLines: int # maximum number of lines + curLine: int + baseName: string # initial filename + baseMode: FileMode # initial file mode + logFiles: int # how many log files already created, e.g. basename.1, basename.2... + bufSize: int # size of output buffer (-1: use system defaults, 0: unbuffered, >0: fixed buffer size) + +var + level {.threadvar.}: Level ## global log filter + handlers {.threadvar.}: seq[Logger] ## handlers with their own log levels + +proc substituteLog*(frmt: string, level: Level, + args: varargs[string, `$`]): string = + ## Formats a log message at the specified level with the given format string. + ## + ## The `format variables<#basic-usage-format-strings>`_ present within + ## ``frmt`` will be replaced with the corresponding values before being + ## prepended to ``args`` and returned. + ## + ## Unless you are implementing a custom logger, there is little need to call + ## this directly. Use either a logger's log method or one of the logging + ## templates. + ## + ## See also: + ## * `log method<#log.e,ConsoleLogger,Level,varargs[string,]>`_ + ## for the ConsoleLogger + ## * `log method<#log.e,FileLogger,Level,varargs[string,]>`_ + ## for the FileLogger + ## * `log method<#log.e,RollingFileLogger,Level,varargs[string,]>`_ + ## for the RollingFileLogger + ## * `log template<#log.t,Level,varargs[string,]>`_ + runnableExamples: + doAssert substituteLog(defaultFmtStr, lvlInfo, "a message") == "INFO a message" + doAssert substituteLog("$levelid - ", lvlError, "an error") == "E - an error" + doAssert substituteLog("$levelid", lvlDebug, "error") == "Derror" + var msgLen = 0 + for arg in args: + msgLen += arg.len + result = newStringOfCap(frmt.len + msgLen + 20) var i = 0 while i < frmt.len: if frmt[i] != '$': @@ -100,192 +320,588 @@ proc substituteLog(frmt: string): string = else: inc(i) var v = "" - var app = getAppFilename() - while frmt[i] in IdentChars: - v.add(toLower(frmt[i])) + let app = when defined(js): "" else: getAppFilename() + while i < frmt.len and frmt[i] in IdentChars: + v.add(toLowerAscii(frmt[i])) inc(i) case v of "date": result.add(getDateStr()) of "time": result.add(getClockStr()) - of "app": result.add(app) - of "appdir": result.add(app.splitFile.dir) - of "appname": result.add(app.splitFile.name) + of "datetime": result.add(getDateStr() & "T" & getClockStr()) + of "app": result.add(app) + of "appdir": + when not defined(js): result.add(app.splitFile.dir) + of "appname": + when not defined(js): result.add(app.splitFile.name) + of "levelid": result.add(LevelNames[level][0]) + of "levelname": result.add(LevelNames[level]) else: discard + for arg in args: + result.add(arg) -method log*(logger: Logger, level: Level, - frmt: string, args: varargs[string, `$`]) {. - raises: [Exception], - tags: [TimeEffect, WriteIOEffect, ReadIOEffect].} = - ## Override this method in custom loggers. Default implementation does +method log*(logger: Logger, level: Level, args: varargs[string, `$`]) {. + raises: [Exception], gcsafe, + tags: [RootEffect], base.} = + ## Override this method in custom loggers. The default implementation does ## nothing. + ## + ## See also: + ## * `log method<#log.e,ConsoleLogger,Level,varargs[string,]>`_ + ## for the ConsoleLogger + ## * `log method<#log.e,FileLogger,Level,varargs[string,]>`_ + ## for the FileLogger + ## * `log method<#log.e,RollingFileLogger,Level,varargs[string,]>`_ + ## for the RollingFileLogger + ## * `log template<#log.t,Level,varargs[string,]>`_ discard -method log*(logger: ConsoleLogger, level: Level, - frmt: string, args: varargs[string, `$`]) = - ## Logs to the console using ``logger`` only. - if level >= logger.levelThreshold: - writeln(stdout, LevelNames[level], " ", substituteLog(logger.fmtStr), - frmt % args) - -method log*(logger: FileLogger, level: Level, - frmt: string, args: varargs[string, `$`]) = - ## Logs to a file using ``logger`` only. - if level >= logger.levelThreshold: - writeln(logger.f, LevelNames[level], " ", - substituteLog(logger.fmtStr), frmt % args) - -proc defaultFilename*(): string = - ## Returns the default filename for a logger. - var (path, name, ext) = splitFile(getAppFilename()) - result = changeFileExt(path / name, "log") - -proc newConsoleLogger*(levelThreshold = lvlAll, fmtStr = defaultFmtStr): ConsoleLogger = - ## Creates a new console logger. This logger logs to the console. +method log*(logger: ConsoleLogger, level: Level, args: varargs[string, `$`]) = + ## Logs to the console with the given `ConsoleLogger<#ConsoleLogger>`_ only. + ## + ## This method ignores the list of registered handlers. + ## + ## Whether the message is logged depends on both the ConsoleLogger's + ## ``levelThreshold`` field and the global log filter set using the + ## `setLogFilter proc<#setLogFilter,Level>`_. + ## + ## **Note:** Only error and fatal messages will cause the output buffer + ## to be flushed immediately by default. Set ``flushThreshold`` when creating + ## the logger to change this. + ## + ## See also: + ## * `log method<#log.e,FileLogger,Level,varargs[string,]>`_ + ## for the FileLogger + ## * `log method<#log.e,RollingFileLogger,Level,varargs[string,]>`_ + ## for the RollingFileLogger + ## * `log template<#log.t,Level,varargs[string,]>`_ + ## + ## **Examples:** + ## + ## ```Nim + ## var consoleLog = newConsoleLogger() + ## consoleLog.log(lvlInfo, "this is a message") + ## consoleLog.log(lvlError, "error code is: ", 404) + ## ``` + if level >= logging.level and level >= logger.levelThreshold: + let ln = substituteLog(logger.fmtStr, level, args) + when defined(js): + let cln = ln.cstring + case level + of lvlDebug: {.emit: "console.debug(`cln`);".} + of lvlInfo: {.emit: "console.info(`cln`);".} + of lvlWarn: {.emit: "console.warn(`cln`);".} + of lvlError: {.emit: "console.error(`cln`);".} + else: {.emit: "console.log(`cln`);".} + else: + try: + var handle = stdout + if logger.useStderr: + handle = stderr + writeLine(handle, ln) + if level >= logger.flushThreshold: flushFile(handle) + except IOError: + discard + +proc newConsoleLogger*(levelThreshold = lvlAll, fmtStr = defaultFmtStr, + useStderr = false, flushThreshold = defaultFlushThreshold): ConsoleLogger = + ## Creates a new `ConsoleLogger<#ConsoleLogger>`_. + ## + ## By default, log messages are written to ``stdout``. If ``useStderr`` is + ## true, they are written to ``stderr`` instead. + ## + ## For the JavaScript backend, log messages are written to the console, + ## and ``useStderr`` is ignored. + ## + ## See also: + ## * `newFileLogger proc<#newFileLogger,File>`_ that uses a file handle + ## * `newFileLogger proc<#newFileLogger,FileMode,int>`_ + ## that accepts a filename + ## * `newRollingFileLogger proc<#newRollingFileLogger,FileMode,Positive,int>`_ + ## + ## **Examples:** + ## + ## ```Nim + ## var normalLog = newConsoleLogger() + ## var formatLog = newConsoleLogger(fmtStr=verboseFmtStr) + ## var errorLog = newConsoleLogger(levelThreshold=lvlError, useStderr=true) + ## ``` new result result.fmtStr = fmtStr result.levelThreshold = levelThreshold - -proc newFileLogger*(filename = defaultFilename(), - mode: FileMode = fmAppend, - levelThreshold = lvlAll, - fmtStr = defaultFmtStr): FileLogger = - ## Creates a new file logger. This logger logs to a file. - new(result) - result.levelThreshold = levelThreshold - result.f = open(filename, mode) - result.fmtStr = fmtStr - -# ------ - -proc countLogLines(logger: RollingFileLogger): int = - result = 0 - for line in logger.f.lines(): - result.inc() - -proc countFiles(filename: string): int = - # Example: file.log.1 - result = 0 - let (dir, name, ext) = splitFile(filename) - for kind, path in walkDir(dir): - if kind == pcFile: - let llfn = name & ext & ExtSep - if path.extractFilename.startsWith(llfn): - let numS = path.extractFilename[llfn.len .. ^1] - try: - let num = parseInt(numS) - if num > result: - result = num - except ValueError: discard - -proc newRollingFileLogger*(filename = defaultFilename(), - mode: FileMode = fmReadWrite, - levelThreshold = lvlAll, - fmtStr = defaultFmtStr, - maxLines = 1000): RollingFileLogger = - ## Creates a new rolling file logger. Once a file reaches ``maxLines`` lines - ## a new log file will be started and the old will be renamed. - new(result) - result.levelThreshold = levelThreshold - result.fmtStr = fmtStr - result.maxLines = maxLines - result.f = open(filename, mode) - result.curLine = 0 - result.baseName = filename - result.baseMode = mode - - result.logFiles = countFiles(filename) - - if mode == fmAppend: - # We need to get a line count because we will be appending to the file. - result.curLine = countLogLines(result) - -proc rotate(logger: RollingFileLogger) = - let (dir, name, ext) = splitFile(logger.baseName) - for i in countdown(logger.logFiles, 0): - let srcSuff = if i != 0: ExtSep & $i else: "" - moveFile(dir / (name & ext & srcSuff), - dir / (name & ext & ExtSep & $(i+1))) - -method log*(logger: RollingFileLogger, level: Level, - frmt: string, args: varargs[string, `$`]) = - ## Logs to a file using rolling ``logger`` only. - if level >= logger.levelThreshold: - if logger.curLine >= logger.maxLines: - logger.f.close() - rotate(logger) - logger.logFiles.inc - logger.curLine = 0 - logger.f = open(logger.baseName, logger.baseMode) - - writeln(logger.f, LevelNames[level], " ",substituteLog(logger.fmtStr), frmt % args) - logger.curLine.inc + result.flushThreshold = flushThreshold + result.useStderr = useStderr + +when not defined(js): + method log*(logger: FileLogger, level: Level, args: varargs[string, `$`]) = + ## Logs a message at the specified level using the given + ## `FileLogger<#FileLogger>`_ only. + ## + ## This method ignores the list of registered handlers. + ## + ## Whether the message is logged depends on both the FileLogger's + ## ``levelThreshold`` field and the global log filter set using the + ## `setLogFilter proc<#setLogFilter,Level>`_. + ## + ## **Notes:** + ## * Only error and fatal messages will cause the output buffer + ## to be flushed immediately by default. Set ``flushThreshold`` when creating + ## the logger to change this. + ## * This method is not available for the JavaScript backend. + ## + ## See also: + ## * `log method<#log.e,ConsoleLogger,Level,varargs[string,]>`_ + ## for the ConsoleLogger + ## * `log method<#log.e,RollingFileLogger,Level,varargs[string,]>`_ + ## for the RollingFileLogger + ## * `log template<#log.t,Level,varargs[string,]>`_ + ## + ## **Examples:** + ## + ## ```Nim + ## var fileLog = newFileLogger("messages.log") + ## fileLog.log(lvlInfo, "this is a message") + ## fileLog.log(lvlError, "error code is: ", 404) + ## ``` + if level >= logging.level and level >= logger.levelThreshold: + writeLine(logger.file, substituteLog(logger.fmtStr, level, args)) + if level >= logger.flushThreshold: flushFile(logger.file) + + proc defaultFilename*(): string = + ## Returns the filename that is used by default when naming log files. + ## + ## **Note:** This proc is not available for the JavaScript backend. + var (path, name, _) = splitFile(getAppFilename()) + result = changeFileExt(path / name, "log") + + proc newFileLogger*(file: File, + levelThreshold = lvlAll, + fmtStr = defaultFmtStr, + flushThreshold = defaultFlushThreshold): FileLogger = + ## Creates a new `FileLogger<#FileLogger>`_ that uses the given file handle. + ## + ## **Note:** This proc is not available for the JavaScript backend. + ## + ## See also: + ## * `newConsoleLogger proc<#newConsoleLogger>`_ + ## * `newFileLogger proc<#newFileLogger,FileMode,int>`_ + ## that accepts a filename + ## * `newRollingFileLogger proc<#newRollingFileLogger,FileMode,Positive,int>`_ + ## + ## **Examples:** + ## + ## ```Nim + ## var messages = open("messages.log", fmWrite) + ## var formatted = open("formatted.log", fmWrite) + ## var errors = open("errors.log", fmWrite) + ## + ## var normalLog = newFileLogger(messages) + ## var formatLog = newFileLogger(formatted, fmtStr=verboseFmtStr) + ## var errorLog = newFileLogger(errors, levelThreshold=lvlError) + ## ``` + new(result) + result.file = file + result.levelThreshold = levelThreshold + result.flushThreshold = flushThreshold + result.fmtStr = fmtStr + + proc newFileLogger*(filename = defaultFilename(), + mode: FileMode = fmAppend, + levelThreshold = lvlAll, + fmtStr = defaultFmtStr, + bufSize: int = -1, + flushThreshold = defaultFlushThreshold): FileLogger = + ## Creates a new `FileLogger<#FileLogger>`_ that logs to a file with the + ## given filename. + ## + ## ``bufSize`` controls the size of the output buffer that is used when + ## writing to the log file. The following values can be provided: + ## * ``-1`` - use system defaults + ## * ``0`` - unbuffered + ## * ``> 0`` - fixed buffer size + ## + ## **Note:** This proc is not available for the JavaScript backend. + ## + ## See also: + ## * `newConsoleLogger proc<#newConsoleLogger>`_ + ## * `newFileLogger proc<#newFileLogger,File>`_ that uses a file handle + ## * `newRollingFileLogger proc<#newRollingFileLogger,FileMode,Positive,int>`_ + ## + ## **Examples:** + ## + ## ```Nim + ## var normalLog = newFileLogger("messages.log") + ## var formatLog = newFileLogger("formatted.log", fmtStr=verboseFmtStr) + ## var errorLog = newFileLogger("errors.log", levelThreshold=lvlError) + ## ``` + let file = open(filename, mode, bufSize = bufSize) + newFileLogger(file, levelThreshold, fmtStr, flushThreshold) + + # ------ + + proc countLogLines(logger: RollingFileLogger): int = + let fp = open(logger.baseName, fmRead) + for line in fp.lines(): + result.inc() + fp.close() + + proc countFiles(filename: string): int = + # Example: file.log.1 + result = 0 + var (dir, name, ext) = splitFile(filename) + if dir == "": + dir = "." + for kind, path in walkDir(dir): + if kind == pcFile: + let llfn = name & ext & ExtSep + if path.extractFilename.startsWith(llfn): + let numS = path.extractFilename[llfn.len .. ^1] + try: + let num = parseInt(numS) + if num > result: + result = num + except ValueError: discard + + proc newRollingFileLogger*(filename = defaultFilename(), + mode: FileMode = fmReadWrite, + levelThreshold = lvlAll, + fmtStr = defaultFmtStr, + maxLines: Positive = 1000, + bufSize: int = -1, + flushThreshold = defaultFlushThreshold): RollingFileLogger = + ## Creates a new `RollingFileLogger<#RollingFileLogger>`_. + ## + ## Once the current log file being written to contains ``maxLines`` lines, + ## a new log file will be created, and the old log file will be renamed. + ## + ## ``bufSize`` controls the size of the output buffer that is used when + ## writing to the log file. The following values can be provided: + ## * ``-1`` - use system defaults + ## * ``0`` - unbuffered + ## * ``> 0`` - fixed buffer size + ## + ## **Note:** This proc is not available in the JavaScript backend. + ## + ## See also: + ## * `newConsoleLogger proc<#newConsoleLogger>`_ + ## * `newFileLogger proc<#newFileLogger,File>`_ that uses a file handle + ## * `newFileLogger proc<#newFileLogger,FileMode,int>`_ + ## that accepts a filename + ## + ## **Examples:** + ## + ## ```Nim + ## var normalLog = newRollingFileLogger("messages.log") + ## var formatLog = newRollingFileLogger("formatted.log", fmtStr=verboseFmtStr) + ## var shortLog = newRollingFileLogger("short.log", maxLines=200) + ## var errorLog = newRollingFileLogger("errors.log", levelThreshold=lvlError) + ## ``` + new(result) + result.levelThreshold = levelThreshold + result.fmtStr = fmtStr + result.maxLines = maxLines + result.bufSize = bufSize + result.file = open(filename, mode, bufSize = result.bufSize) + result.curLine = 0 + result.baseName = filename + result.baseMode = mode + result.flushThreshold = flushThreshold + + result.logFiles = countFiles(filename) + + if mode == fmAppend: + # We need to get a line count because we will be appending to the file. + result.curLine = countLogLines(result) + + proc rotate(logger: RollingFileLogger) = + let (dir, name, ext) = splitFile(logger.baseName) + for i in countdown(logger.logFiles, 0): + let srcSuff = if i != 0: ExtSep & $i else: "" + moveFile(dir / (name & ext & srcSuff), + dir / (name & ext & ExtSep & $(i+1))) + + method log*(logger: RollingFileLogger, level: Level, args: varargs[string, `$`]) = + ## Logs a message at the specified level using the given + ## `RollingFileLogger<#RollingFileLogger>`_ only. + ## + ## This method ignores the list of registered handlers. + ## + ## Whether the message is logged depends on both the RollingFileLogger's + ## ``levelThreshold`` field and the global log filter set using the + ## `setLogFilter proc<#setLogFilter,Level>`_. + ## + ## **Notes:** + ## * Only error and fatal messages will cause the output buffer + ## to be flushed immediately by default. Set ``flushThreshold`` when creating + ## the logger to change this. + ## * This method is not available for the JavaScript backend. + ## + ## See also: + ## * `log method<#log.e,ConsoleLogger,Level,varargs[string,]>`_ + ## for the ConsoleLogger + ## * `log method<#log.e,FileLogger,Level,varargs[string,]>`_ + ## for the FileLogger + ## * `log template<#log.t,Level,varargs[string,]>`_ + ## + ## **Examples:** + ## + ## ```Nim + ## var rollingLog = newRollingFileLogger("messages.log") + ## rollingLog.log(lvlInfo, "this is a message") + ## rollingLog.log(lvlError, "error code is: ", 404) + ## ``` + if level >= logging.level and level >= logger.levelThreshold: + if logger.curLine >= logger.maxLines: + logger.file.close() + rotate(logger) + logger.logFiles.inc + logger.curLine = 0 + logger.file = open(logger.baseName, logger.baseMode, + bufSize = logger.bufSize) + + writeLine(logger.file, substituteLog(logger.fmtStr, level, args)) + if level >= logger.flushThreshold: flushFile(logger.file) + logger.curLine.inc # -------- -var level {.threadvar.}: Level ## global log filter -var handlers {.threadvar.}: seq[Logger] ## handlers with their own log levels - -proc logLoop(level: Level, frmt: string, args: varargs[string, `$`]) = +proc logLoop(level: Level, args: varargs[string, `$`]) = for logger in items(handlers): if level >= logger.levelThreshold: - log(logger, level, frmt, args) - -template log*(level: Level, frmt: string, args: varargs[string, `$`]) = - ## Logs a message to all registered handlers at the given level. + log(logger, level, args) + +template log*(level: Level, args: varargs[string, `$`]) = + ## Logs a message at the specified level to all registered handlers. + ## + ## Whether the message is logged depends on both the FileLogger's + ## `levelThreshold` field and the global log filter set using the + ## `setLogFilter proc<#setLogFilter,Level>`_. + ## + ## **Examples:** + ## + ## ```Nim + ## var logger = newConsoleLogger() + ## addHandler(logger) + ## + ## log(lvlInfo, "This is an example.") + ## ``` + ## + ## See also: + ## * `debug template<#debug.t,varargs[string,]>`_ + ## * `info template<#info.t,varargs[string,]>`_ + ## * `notice template<#notice.t,varargs[string,]>`_ + ## * `warn template<#warn.t,varargs[string,]>`_ + ## * `error template<#error.t,varargs[string,]>`_ + ## * `fatal template<#fatal.t,varargs[string,]>`_ bind logLoop bind `%` bind logging.level if level >= logging.level: - logLoop(level, frmt, args) + logLoop(level, args) -template debug*(frmt: string, args: varargs[string, `$`]) = +template debug*(args: varargs[string, `$`]) = ## Logs a debug message to all registered handlers. - log(lvlDebug, frmt, args) - -template info*(frmt: string, args: varargs[string, `$`]) = + ## + ## Debug messages are typically useful to the application developer only, + ## and they are usually disabled in release builds, although this template + ## does not make that distinction. + ## + ## **Examples:** + ## + ## ```Nim + ## var logger = newConsoleLogger() + ## addHandler(logger) + ## + ## debug("myProc called with arguments: foo, 5") + ## ``` + ## + ## See also: + ## * `log template<#log.t,Level,varargs[string,]>`_ + ## * `info template<#info.t,varargs[string,]>`_ + ## * `notice template<#notice.t,varargs[string,]>`_ + log(lvlDebug, args) + +template info*(args: varargs[string, `$`]) = ## Logs an info message to all registered handlers. - log(lvlInfo, frmt, args) - -template warn*(frmt: string, args: varargs[string, `$`]) = + ## + ## Info messages are typically generated during the normal operation + ## of an application and are of no particular importance. It can be useful to + ## aggregate these messages for later analysis. + ## + ## **Examples:** + ## + ## ```Nim + ## var logger = newConsoleLogger() + ## addHandler(logger) + ## + ## info("Application started successfully.") + ## ``` + ## + ## See also: + ## * `log template<#log.t,Level,varargs[string,]>`_ + ## * `debug template<#debug.t,varargs[string,]>`_ + ## * `notice template<#notice.t,varargs[string,]>`_ + log(lvlInfo, args) + +template notice*(args: varargs[string, `$`]) = + ## Logs an notice to all registered handlers. + ## + ## Notices are semantically very similar to info messages, but they are meant + ## to be messages that the user should be actively notified about, depending + ## on the application. + ## + ## **Examples:** + ## + ## ```Nim + ## var logger = newConsoleLogger() + ## addHandler(logger) + ## + ## notice("An important operation has completed.") + ## ``` + ## + ## See also: + ## * `log template<#log.t,Level,varargs[string,]>`_ + ## * `debug template<#debug.t,varargs[string,]>`_ + ## * `info template<#info.t,varargs[string,]>`_ + log(lvlNotice, args) + +template warn*(args: varargs[string, `$`]) = ## Logs a warning message to all registered handlers. - log(lvlWarn, frmt, args) - -template error*(frmt: string, args: varargs[string, `$`]) = + ## + ## A warning is a non-error message that may indicate impending problems or + ## degraded performance. + ## + ## **Examples:** + ## + ## ```Nim + ## var logger = newConsoleLogger() + ## addHandler(logger) + ## + ## warn("The previous operation took too long to process.") + ## ``` + ## + ## See also: + ## * `log template<#log.t,Level,varargs[string,]>`_ + ## * `error template<#error.t,varargs[string,]>`_ + ## * `fatal template<#fatal.t,varargs[string,]>`_ + log(lvlWarn, args) + +template error*(args: varargs[string, `$`]) = ## Logs an error message to all registered handlers. - log(lvlError, frmt, args) - -template fatal*(frmt: string, args: varargs[string, `$`]) = + ## + ## Error messages are for application-level error conditions, such as when + ## some user input generated an exception. Typically, the application will + ## continue to run, but with degraded functionality or loss of data, and + ## these effects might be visible to users. + ## + ## **Examples:** + ## + ## ```Nim + ## var logger = newConsoleLogger() + ## addHandler(logger) + ## + ## error("An exception occurred while processing the form.") + ## ``` + ## + ## See also: + ## * `log template<#log.t,Level,varargs[string,]>`_ + ## * `warn template<#warn.t,varargs[string,]>`_ + ## * `fatal template<#fatal.t,varargs[string,]>`_ + log(lvlError, args) + +template fatal*(args: varargs[string, `$`]) = ## Logs a fatal error message to all registered handlers. - log(lvlFatal, frmt, args) + ## + ## Fatal error messages usually indicate that the application cannot continue + ## to run and will exit due to a fatal condition. This template only logs the + ## message, and it is the application's responsibility to exit properly. + ## + ## **Examples:** + ## + ## ```Nim + ## var logger = newConsoleLogger() + ## addHandler(logger) + ## + ## fatal("Can't open database -- exiting.") + ## ``` + ## + ## See also: + ## * `log template<#log.t,Level,varargs[string,]>`_ + ## * `warn template<#warn.t,varargs[string,]>`_ + ## * `error template<#error.t,varargs[string,]>`_ + log(lvlFatal, args) proc addHandler*(handler: Logger) = - ## Adds ``handler`` to the list of handlers. - if handlers.isNil: handlers = @[] + ## Adds a logger to the list of registered handlers. + ## + ## .. warning:: The list of handlers is a thread-local variable. If the given + ## handler will be used in multiple threads, this proc should be called in + ## each of those threads. + ## + ## See also: + ## * `removeHandler proc`_ + ## * `getHandlers proc<#getHandlers>`_ + runnableExamples: + var logger = newConsoleLogger() + addHandler(logger) + doAssert logger in getHandlers() handlers.add(handler) +proc removeHandler*(handler: Logger) = + ## Removes a logger from the list of registered handlers. + ## + ## Note that for n times a logger is registered, n calls to this proc + ## are required to remove that logger. + for i, hnd in handlers: + if hnd == handler: + handlers.delete(i) + return + proc getHandlers*(): seq[Logger] = ## Returns a list of all the registered handlers. + ## + ## See also: + ## * `addHandler proc<#addHandler,Logger>`_ return handlers proc setLogFilter*(lvl: Level) = ## Sets the global log filter. + ## + ## Messages below the provided level will not be logged regardless of an + ## individual logger's ``levelThreshold``. By default, all messages are + ## logged. + ## + ## .. warning:: The global log filter is a thread-local variable. If logging + ## is being performed in multiple threads, this proc should be called in each + ## thread unless it is intended that different threads should log at different + ## logging levels. + ## + ## See also: + ## * `getLogFilter proc<#getLogFilter>`_ + runnableExamples: + setLogFilter(lvlError) + doAssert getLogFilter() == lvlError level = lvl proc getLogFilter*(): Level = ## Gets the global log filter. + ## + ## See also: + ## * `setLogFilter proc<#setLogFilter,Level>`_ return level # -------------- -when isMainModule: +when not defined(testing) and isMainModule: var L = newConsoleLogger() - var fL = newFileLogger("test.log", fmtStr = verboseFmtStr) - var rL = newRollingFileLogger("rolling.log", fmtStr = verboseFmtStr) + when not defined(js): + var fL = newFileLogger("test.log", fmtStr = verboseFmtStr) + var rL = newRollingFileLogger("rolling.log", fmtStr = verboseFmtStr) + addHandler(fL) + addHandler(rL) addHandler(L) - addHandler(fL) - addHandler(rL) for i in 0 .. 25: - info("hello" & $i, []) - + info("hello", i) + var nilString: string + info "hello ", nilString diff --git a/lib/pure/marshal.nim b/lib/pure/marshal.nim index b63c334ff..f9b3d3e4c 100644 --- a/lib/pure/marshal.nim +++ b/lib/pure/marshal.nim @@ -7,46 +7,77 @@ # distribution, for details about the copyright. # -## This module contains procs for `serialization`:idx: and `deseralization`:idx: +## This module contains procs for `serialization`:idx: and `deserialization`:idx: ## of arbitrary Nim data structures. The serialization format uses `JSON`:idx:. ## -## **Restriction**: For objects their type is **not** serialized. This means +## **Restriction:** For objects, their type is **not** serialized. This means ## essentially that it does not work if the object has some other runtime -## type than its compiletime type: +## type than its compiletime type. ## -## .. code-block:: nim -## -## type -## TA = object -## TB = object of TA -## f: int ## -## var -## a: ref TA -## b: ref TB +## Basic usage +## =========== ## -## new(b) -## a = b -## echo($$a[]) # produces "{}", not "{f: 0}" +runnableExamples: + type + A = object of RootObj + B = object of A + f: int + + let a: ref A = new(B) + assert $$a[] == "{}" # not "{f: 0}" + + # unmarshal + let c = to[B]("""{"f": 2}""") + assert typeof(c) is B + assert c.f == 2 + + # marshal + assert $$c == """{"f": 2}""" + +## **Note:** The `to` and `$$` operations are available at compile-time! +## +## +## See also +## ======== +## * `streams module <streams.html>`_ +## * `json module <json.html>`_ -import streams, typeinfo, json, intsets, tables +const unsupportedPlatform = + when defined(js): "javascript" + elif defined(nimscript): "nimscript" + else: "" + +when unsupportedPlatform != "": + {.error: "marshal module is not supported in " & unsupportedPlatform & """. +Please use alternative packages for serialization. +It is possible to reimplement this module using generics and type traits. +Please contribute a new implementation.""".} + +import std/[streams, typeinfo, json, intsets, tables, unicode] + +when defined(nimPreviewSlimSystem): + import std/[assertions, formatfloat] proc ptrToInt(x: pointer): int {.inline.} = result = cast[int](x) # don't skip alignment -proc storeAny(s: Stream, a: TAny, stored: var IntSet) = +proc storeAny(s: Stream, a: Any, stored: var IntSet) = case a.kind of akNone: assert false of akBool: s.write($getBool(a)) - of akChar: s.write(escapeJson($getChar(a))) - of akArray, akSequence: - if a.kind == akSequence and isNil(a): s.write("null") + of akChar: + let ch = getChar(a) + if ch < '\128': + s.write(escapeJson($ch)) else: - s.write("[") - for i in 0 .. a.len-1: - if i > 0: s.write(", ") - storeAny(s, a[i], stored) - s.write("]") + s.write($int(ch)) + of akArray, akSequence: + s.write("[") + for i in 0 .. a.len-1: + if i > 0: s.write(", ") + storeAny(s, a[i], stored) + s.write("]") of akObject, akTuple: s.write("{") var i = 0 @@ -84,15 +115,22 @@ proc storeAny(s: Stream, a: TAny, stored: var IntSet) = of akProc, akPointer, akCString: s.write($a.getPointer.ptrToInt) of akString: var x = getString(a) - if isNil(x): s.write("null") - else: s.write(escapeJson(x)) + if x.validateUtf8() == -1: s.write(escapeJson(x)) + else: + s.write("[") + var i = 0 + for c in x: + if i > 0: s.write(", ") + s.write($ord(c)) + inc(i) + s.write("]") of akInt..akInt64, akUInt..akUInt64: s.write($getBiggestInt(a)) of akFloat..akFloat128: s.write($getBiggestFloat(a)) -proc loadAny(p: var JsonParser, a: TAny, t: var Table[BiggestInt, pointer]) = +proc loadAny(p: var JsonParser, a: Any, t: var Table[BiggestInt, pointer]) = case a.kind of akNone: assert false - of akBool: + of akBool: case p.kind of jsonFalse: setBiggestInt(a, 0) of jsonTrue: setBiggestInt(a, 1) @@ -105,8 +143,12 @@ proc loadAny(p: var JsonParser, a: TAny, t: var Table[BiggestInt, pointer]) = setBiggestInt(a, ord(x[0])) next(p) return + elif p.kind == jsonInt: + setBiggestInt(a, getInt(p)) + next(p) + return raiseParseErr(p, "string of length 1 expected for a char") - of akEnum: + of akEnum: if p.kind == jsonString: setBiggestInt(a, getEnumOrdinal(a, p.str)) next(p) @@ -122,9 +164,12 @@ proc loadAny(p: var JsonParser, a: TAny, t: var Table[BiggestInt, pointer]) = if p.kind == jsonArrayEnd: next(p) else: raiseParseErr(p, "']' end of array expected") of akSequence: - case p.kind + case p.kind of jsonNull: - setPointer(a, nil) + when defined(nimSeqsV2): + invokeNewSeq(a, 0) + else: + setPointer(a, nil) next(p) of jsonArrayStart: next(p) @@ -143,7 +188,7 @@ proc loadAny(p: var JsonParser, a: TAny, t: var Table[BiggestInt, pointer]) = if p.kind != jsonObjectStart: raiseParseErr(p, "'{' expected for an object") next(p) while p.kind != jsonObjectEnd and p.kind != jsonEof: - if p.kind != jsonString: + if p.kind != jsonString: raiseParseErr(p, "string expected for a field name") var fieldName = p.str next(p) @@ -160,17 +205,18 @@ proc loadAny(p: var JsonParser, a: TAny, t: var Table[BiggestInt, pointer]) = if p.kind == jsonArrayEnd: next(p) else: raiseParseErr(p, "']' end of array expected") of akPtr, akRef: - case p.kind + case p.kind of jsonNull: setPointer(a, nil) next(p) of jsonInt: - setPointer(a, t[p.getInt]) + var raw = t.getOrDefault(p.getInt) + setPointer(a, addr raw) next(p) of jsonArrayStart: next(p) if a.kind == akRef: invokeNew(a) - else: setPointer(a, alloc0(a.baseTypeSize)) + else: setPointer(a, alloc0(a.baseTypeSize)) if p.kind == jsonInt: t[p.getInt] = getPointer(a) next(p) @@ -179,8 +225,8 @@ proc loadAny(p: var JsonParser, a: TAny, t: var Table[BiggestInt, pointer]) = if p.kind == jsonArrayEnd: next(p) else: raiseParseErr(p, "']' end of ref-address pair expected") else: raiseParseErr(p, "int for pointer type expected") - of akProc, akPointer, akCString: - case p.kind + of akProc, akPointer, akCString: + case p.kind of jsonNull: setPointer(a, nil) next(p) @@ -189,15 +235,30 @@ proc loadAny(p: var JsonParser, a: TAny, t: var Table[BiggestInt, pointer]) = next(p) else: raiseParseErr(p, "int for pointer type expected") of akString: - case p.kind + case p.kind of jsonNull: - setPointer(a, nil) + when defined(nimSeqsV2): + setString(a, "") + else: + setPointer(a, nil) next(p) of jsonString: setString(a, p.str) next(p) + of jsonArrayStart: + next(p) + var str = "" + while p.kind == jsonInt: + let code = p.getInt() + if code < 0 or code > 255: + raiseParseErr(p, "invalid charcode: " & $code) + str.add(chr(code)) + next(p) + if p.kind == jsonArrayEnd: next(p) + else: raiseParseErr(p, "an array of charcodes expected for string") + setString(a, str) else: raiseParseErr(p, "string expected") - of akInt..akInt64, akUInt..akUInt64: + of akInt..akInt64, akUInt..akUInt64: if p.kind == jsonInt: setBiggestInt(a, getInt(p)) next(p) @@ -211,7 +272,7 @@ proc loadAny(p: var JsonParser, a: TAny, t: var Table[BiggestInt, pointer]) = raiseParseErr(p, "float expected") of akRange: loadAny(p, a.skipRange, t) -proc loadAny(s: Stream, a: TAny, t: var Table[BiggestInt, pointer]) = +proc loadAny(s: Stream, a: Any, t: var Table[BiggestInt, pointer]) = var p: JsonParser open(p, s, "unknown file") next(p) @@ -219,101 +280,88 @@ proc loadAny(s: Stream, a: TAny, t: var Table[BiggestInt, pointer]) = close(p) proc load*[T](s: Stream, data: var T) = - ## loads `data` from the stream `s`. Raises `EIO` in case of an error. + ## Loads `data` from the stream `s`. Raises `IOError` in case of an error. + runnableExamples: + import std/streams + + var s = newStringStream("[1, 3, 5]") + var a: array[3, int] + load(s, a) + assert a == [1, 3, 5] + var tab = initTable[BiggestInt, pointer]() loadAny(s, toAny(data), tab) -proc store*[T](s: Stream, data: T) = - ## stores `data` into the stream `s`. Raises `EIO` in case of an error. - var stored = initIntSet() - var d: T - shallowCopy(d, data) - storeAny(s, toAny(d), stored) +proc store*[T](s: Stream, data: sink T) = + ## Stores `data` into the stream `s`. Raises `IOError` in case of an error. + runnableExamples: + import std/streams + + var s = newStringStream("") + var a = [1, 3, 5] + store(s, a) + s.setPosition(0) + assert s.readAll() == "[1, 3, 5]" -proc `$$`*[T](x: T): string = - ## returns a string representation of `x`. var stored = initIntSet() var d: T - shallowCopy(d, x) - var s = newStringStream() + when defined(gcArc) or defined(gcOrc)or defined(gcAtomicArc): + d = data + else: + shallowCopy(d, data) storeAny(s, toAny(d), stored) - result = s.data - -proc to*[T](data: string): T = - ## reads data and transforms it to a ``T``. - var tab = initTable[BiggestInt, pointer]() - loadAny(newStringStream(data), toAny(result), tab) - -when isMainModule: - template testit(x: expr) = echo($$to[type(x)]($$x)) - - var x: array[0..4, array[0..4, string]] = [ - ["test", "1", "2", "3", "4"], ["test", "1", "2", "3", "4"], - ["test", "1", "2", "3", "4"], ["test", "1", "2", "3", "4"], - ["test", "1", "2", "3", "4"]] - testit(x) - var test2: tuple[name: string, s: uint] = ("tuple test", 56u) - testit(test2) - - type - TE = enum - blah, blah2 - - TestObj = object - test, asd: int - case test2: TE - of blah: - help: string - else: - nil - - PNode = ref TNode - TNode = object - next, prev: PNode - data: string - - proc buildList(): PNode = - new(result) - new(result.next) - new(result.prev) - result.data = "middle" - result.next.data = "next" - result.prev.data = "prev" - result.next.next = result.prev - result.next.prev = result - result.prev.next = result - result.prev.prev = result.next - - var test3: TestObj - test3.test = 42 - test3.test2 = blah - testit(test3) - var test4: ref tuple[a, b: string] - new(test4) - test4.a = "ref string test: A" - test4.b = "ref string test: B" - testit(test4) - - var test5 = @[(0,1),(2,3),(4,5)] - testit(test5) +proc loadVM[T](typ: typedesc[T], x: T): string = + discard "the implementation is in the compiler/vmops" - var test6: set[char] = {'A'..'Z', '_'} - testit(test6) +proc `$$`*[T](x: sink T): string = + ## Returns a string representation of `x` (serialization, marshalling). + ## + ## **Note:** to serialize `x` to JSON use `%x` from the `json` module + ## or `jsonutils.toJson(x)`. + runnableExamples: + type + Foo = object + id: int + bar: string + let x = Foo(id: 1, bar: "baz") + ## serialize: + let y = $$x + assert y == """{"id": 1, "bar": "baz"}""" - var test7 = buildList() - echo($$test7) - testit(test7) + when nimvm: + result = loadVM(T, x) + else: + var stored = initIntSet() + var d: T + when defined(gcArc) or defined(gcOrc) or defined(gcAtomicArc): + d = x + else: + shallowCopy(d, x) + var s = newStringStream() + storeAny(s, toAny(d), stored) + result = s.data - type - TA {.inheritable.} = object - TB = object of TA - f: int +proc toVM[T](typ: typedesc[T], data: string): T = + discard "the implementation is in the compiler/vmops" - var - a: ref TA - b: ref TB - new(b) - a = b - echo($$a[]) # produces "{}", not "{f: 0}" +proc to*[T](data: string): T = + ## Reads data and transforms it to a type `T` (deserialization, unmarshalling). + runnableExamples: + type + Foo = object + id: int + bar: string + let y = """{"id": 1, "bar": "baz"}""" + assert typeof(y) is string + ## deserialize to type 'Foo': + let z = y.to[:Foo] + assert typeof(z) is Foo + assert z.id == 1 + assert z.bar == "baz" + when nimvm: + result = toVM(T, data) + else: + var tab = initTable[BiggestInt, pointer]() + loadAny(newStringStream(data), toAny(result), tab) diff --git a/lib/pure/matchers.nim b/lib/pure/matchers.nim deleted file mode 100644 index d55963c15..000000000 --- a/lib/pure/matchers.nim +++ /dev/null @@ -1,64 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2015 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module contains various string matchers for email addresses, etc. -{.deadCodeElim: on.} - -{.push debugger:off .} # the user does not want to trace a part - # of the standard library! - -include "system/inclrtl" - -import parseutils, strutils - -proc validEmailAddress*(s: string): bool {.noSideEffect, - rtl, extern: "nsuValidEmailAddress".} = - ## returns true if `s` seems to be a valid e-mail address. - ## The checking also uses a domain list. - const - chars = Letters + Digits + {'!','#','$','%','&', - '\'','*','+','/','=','?','^','_','`','{','}','|','~','-','.'} - var i = 0 - if s[i] notin chars or s[i] == '.': return false - while s[i] in chars: - if s[i] == '.' and s[i+1] == '.': return false - inc(i) - if s[i] != '@': return false - var j = len(s)-1 - if s[j] notin Letters: return false - while j >= i and s[j] in Letters: dec(j) - inc(i) # skip '@' - while s[i] in {'0'..'9', 'a'..'z', '-', '.'}: inc(i) - if s[i] != '\0': return false - - var x = substr(s, j+1) - if len(x) == 2 and x[0] in Letters and x[1] in Letters: return true - case toLower(x) - of "com", "org", "net", "gov", "mil", "biz", "info", "mobi", "name", - "aero", "jobs", "museum": return true - else: return false - -proc parseInt*(s: string, value: var int, validRange: Slice[int]) {. - noSideEffect, rtl, extern: "nmatchParseInt".} = - ## parses `s` into an integer in the range `validRange`. If successful, - ## `value` is modified to contain the result. Otherwise no exception is - ## raised and `value` is not touched; this way a reasonable default value - ## won't be overwritten. - var x = value - try: - discard parseutils.parseInt(s, x, 0) - except OverflowError: - discard - if x in validRange: value = x - -when isMainModule: - doAssert "wuseldusel@codehome.com".validEmailAddress - -{.pop.} - diff --git a/lib/pure/math.nim b/lib/pure/math.nim index c902af381..ed7d2382f 100644 --- a/lib/pure/math.nim +++ b/lib/pure/math.nim @@ -7,90 +7,349 @@ # distribution, for details about the copyright. # -## Constructive mathematics is naturally typed. -- Simon Thompson -## +## *Constructive mathematics is naturally typed.* -- Simon Thompson +## ## Basic math routines for Nim. +## +## Note that the trigonometric functions naturally operate on radians. +## The helper functions `degToRad <#degToRad,T>`_ and `radToDeg <#radToDeg,T>`_ +## provide conversion between radians and degrees. + +runnableExamples: + from std/fenv import epsilon + from std/random import rand + + proc generateGaussianNoise(mu: float = 0.0, sigma: float = 1.0): (float, float) = + # Generates values from a normal distribution. + # Translated from https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Implementation. + var u1: float + var u2: float + while true: + u1 = rand(1.0) + u2 = rand(1.0) + if u1 > epsilon(float): break + let mag = sigma * sqrt(-2 * ln(u1)) + let z0 = mag * cos(2 * PI * u2) + mu + let z1 = mag * sin(2 * PI * u2) + mu + (z0, z1) + + echo generateGaussianNoise() + ## This module is available for the `JavaScript target -## <backends.html#the-javascript-target>`_. +## <backends.html#backends-the-javascript-target>`_. +## +## See also +## ======== +## * `complex module <complex.html>`_ for complex numbers and their +## mathematical operations +## * `rationals module <rationals.html>`_ for rational numbers and their +## mathematical operations +## * `fenv module <fenv.html>`_ for handling of floating-point rounding +## and exceptions (overflow, zero-divide, etc.) +## * `random module <random.html>`_ for a fast and tiny random number generator +## * `stats module <stats.html>`_ for statistical analysis +## * `strformat module <strformat.html>`_ for formatting floats for printing +## * `system module <system.html>`_ for some very basic and trivial math operators +## (`shr`, `shl`, `xor`, `clamp`, etc.) + -include "system/inclrtl" -{.push debugger:off .} # the user does not want to trace a part +import std/private/since +{.push debugger: off.} # the user does not want to trace a part # of the standard library! -{.push checks:off, line_dir:off, stack_trace:off.} +import std/[bitops, fenv] +import system/countbits_impl + +when defined(nimPreviewSlimSystem): + import std/assertions + + +when not defined(js) and not defined(nimscript): # C + proc c_isnan(x: float): bool {.importc: "isnan", header: "<math.h>".} + # a generic like `x: SomeFloat` might work too if this is implemented via a C macro. + + proc c_copysign(x, y: cfloat): cfloat {.importc: "copysignf", header: "<math.h>".} + proc c_copysign(x, y: cdouble): cdouble {.importc: "copysign", header: "<math.h>".} + + proc c_signbit(x: SomeFloat): cint {.importc: "signbit", header: "<math.h>".} + + # don't export `c_frexp` in the future and remove `c_frexp2`. + func c_frexp2(x: cfloat, exponent: var cint): cfloat {. + importc: "frexpf", header: "<math.h>".} + func c_frexp2(x: cdouble, exponent: var cint): cdouble {. + importc: "frexp", header: "<math.h>".} + + type + div_t {.importc, header: "<stdlib.h>".} = object + quot: cint + rem: cint + ldiv_t {.importc, header: "<stdlib.h>".} = object + quot: clong + rem: clong + lldiv_t {.importc, header: "<stdlib.h>".} = object + quot: clonglong + rem: clonglong + + when cint isnot clong: + func divmod_c(x, y: cint): div_t {.importc: "div", header: "<stdlib.h>".} + when clong isnot clonglong: + func divmod_c(x, y: clonglong): lldiv_t {.importc: "lldiv", header: "<stdlib.h>".} + func divmod_c(x, y: clong): ldiv_t {.importc: "ldiv", header: "<stdlib.h>".} + func divmod*[T: SomeInteger](x, y: T): (T, T) {.inline.} = + ## Specialized instructions for computing both division and modulus. + ## Return structure is: (quotient, remainder) + runnableExamples: + doAssert divmod(5, 2) == (2, 1) + doAssert divmod(5, -3) == (-1, 2) + when T is cint | clong | clonglong: + when compileOption("overflowChecks"): + if y == 0: + raise new(DivByZeroDefect) + elif (x == T.low and y == -1.T): + raise new(OverflowDefect) + let res = divmod_c(x, y) + result[0] = res.quot + result[1] = res.rem + else: + result[0] = x div y + result[1] = x mod y + +func binom*(n, k: int): int = + ## Computes the [binomial coefficient](https://en.wikipedia.org/wiki/Binomial_coefficient). + runnableExamples: + doAssert binom(6, 2) == 15 + doAssert binom(-6, 2) == 1 + doAssert binom(6, 0) == 1 + + if k <= 0: return 1 + if 2 * k > n: return binom(n, n - k) + result = n + for i in countup(2, k): + result = (result * (n + 1 - i)) div i + +func createFactTable[N: static[int]]: array[N, int] = + result[0] = 1 + for i in 1 ..< N: + result[i] = result[i - 1] * i -when defined(Posix) and not defined(haiku): +func fac*(n: int): int = + ## Computes the [factorial](https://en.wikipedia.org/wiki/Factorial) of + ## a non-negative integer `n`. + ## + ## **See also:** + ## * `prod func <#prod,openArray[T]>`_ + runnableExamples: + doAssert fac(0) == 1 + doAssert fac(4) == 24 + doAssert fac(10) == 3628800 + + const factTable = + when sizeof(int) == 2: + createFactTable[5]() + elif sizeof(int) == 4: + createFactTable[13]() + else: + createFactTable[21]() + assert(n >= 0, $n & " must not be negative.") + assert(n < factTable.len, $n & " is too large to look up in the table") + factTable[n] + +{.push checks: off, line_dir: off, stack_trace: off.} + +when defined(posix) and not defined(genode) and not defined(macosx): {.passl: "-lm".} -when not defined(js): - import times const - PI* = 3.1415926535897932384626433 ## the circle constant PI (Ludolph's number) - E* = 2.71828182845904523536028747 ## Euler's number - - MaxFloat64Precision* = 16 ## maximum number of meaningful digits - ## after the decimal point for Nim's - ## ``float64`` type. - MaxFloat32Precision* = 8 ## maximum number of meaningful digits - ## after the decimal point for Nim's - ## ``float32`` type. - MaxFloatPrecision* = MaxFloat64Precision ## maximum number of - ## meaningful digits - ## after the decimal point - ## for Nim's ``float`` type. + PI* = 3.1415926535897932384626433 ## The circle constant PI (Ludolph's number). + TAU* = 2.0 * PI ## The circle constant TAU (= 2 * PI). + E* = 2.71828182845904523536028747 ## Euler's number. + + MaxFloat64Precision* = 16 ## Maximum number of meaningful digits + ## after the decimal point for Nim's + ## `float64` type. + MaxFloat32Precision* = 8 ## Maximum number of meaningful digits + ## after the decimal point for Nim's + ## `float32` type. + MaxFloatPrecision* = MaxFloat64Precision ## Maximum number of + ## meaningful digits + ## after the decimal point + ## for Nim's `float` type. + MinFloatNormal* = 2.225073858507201e-308 ## Smallest normal number for Nim's + ## `float` type (= 2^-1022). + RadPerDeg = PI / 180.0 ## Number of radians per degree. type - FloatClass* = enum ## describes the class a floating point value belongs to. - ## This is the type that is returned by `classify`. - fcNormal, ## value is an ordinary nonzero floating point value - fcSubnormal, ## value is a subnormal (a very small) floating point value - fcZero, ## value is zero - fcNegZero, ## value is the negative zero - fcNan, ## value is Not-A-Number (NAN) - fcInf, ## value is positive infinity - fcNegInf ## value is negative infinity - -proc classify*(x: float): FloatClass = - ## classifies a floating point value. Returns `x`'s class as specified by - ## `FloatClass`. - + FloatClass* = enum ## Describes the class a floating point value belongs to. + ## This is the type that is returned by the + ## `classify func <#classify,float>`_. + fcNormal, ## value is an ordinary nonzero floating point value + fcSubnormal, ## value is a subnormal (a very small) floating point value + fcZero, ## value is zero + fcNegZero, ## value is the negative zero + fcNan, ## value is Not a Number (NaN) + fcInf, ## value is positive infinity + fcNegInf ## value is negative infinity + +func isNaN*(x: SomeFloat): bool {.inline, since: (1,5,1).} = + ## Returns whether `x` is a `NaN`, more efficiently than via `classify(x) == fcNan`. + ## Works even with `--passc:-ffast-math`. + runnableExamples: + doAssert NaN.isNaN + doAssert not Inf.isNaN + doAssert not isNaN(3.1415926) + + template fn: untyped = result = x != x + when nimvm: fn() + else: + when defined(js) or defined(nimscript): fn() + else: result = c_isnan(x) + +when defined(js): + import std/private/jsutils + + proc toBitsImpl(x: float): array[2, uint32] = + let buffer = newArrayBuffer(8) + let a = newFloat64Array(buffer) + let b = newUint32Array(buffer) + a[0] = x + {.emit: "`result` = `b`;".} + # result = cast[array[2, uint32]](b) + + proc jsSetSign(x: float, sgn: bool): float = + let buffer = newArrayBuffer(8) + let a = newFloat64Array(buffer) + let b = newUint32Array(buffer) + a[0] = x + {.emit: """ + function updateBit(num, bitPos, bitVal) { + return (num & ~(1 << bitPos)) | (bitVal << bitPos); + } + `b`[1] = updateBit(`b`[1], 31, `sgn`); + `result` = `a`[0]; + """.} + +proc signbit*(x: SomeFloat): bool {.inline, since: (1, 5, 1).} = + ## Returns true if `x` is negative, false otherwise. + runnableExamples: + doAssert not signbit(0.0) + doAssert signbit(-0.0) + doAssert signbit(-0.1) + doAssert not signbit(0.1) + + when defined(js): + let uintBuffer = toBitsImpl(x) + result = (uintBuffer[1] shr 31) != 0 + else: + result = c_signbit(x) != 0 + +func copySign*[T: SomeFloat](x, y: T): T {.inline, since: (1, 5, 1).} = + ## Returns a value with the magnitude of `x` and the sign of `y`; + ## this works even if x or y are NaN, infinity or zero, all of which can carry a sign. + runnableExamples: + doAssert copySign(10.0, 1.0) == 10.0 + doAssert copySign(10.0, -1.0) == -10.0 + doAssert copySign(-Inf, -0.0) == -Inf + doAssert copySign(NaN, 1.0).isNaN + doAssert copySign(1.0, copySign(NaN, -1.0)) == -1.0 + + # TODO: use signbit for examples + when defined(js): + let uintBuffer = toBitsImpl(y) + let sgn = (uintBuffer[1] shr 31) != 0 + result = jsSetSign(x, sgn) + else: + when nimvm: # not exact but we have a vmops for recent enough nim + if y > 0.0 or (y == 0.0 and 1.0 / y > 0.0): + result = abs(x) + elif y <= 0.0: + result = -abs(x) + else: # must be NaN + result = abs(x) + else: result = c_copysign(x, y) + +func classify*(x: float): FloatClass = + ## Classifies a floating point value. + ## + ## Returns `x`'s class as specified by the `FloatClass enum<#FloatClass>`_. + runnableExamples: + doAssert classify(0.3) == fcNormal + doAssert classify(0.0) == fcZero + doAssert classify(0.3 / 0.0) == fcInf + doAssert classify(-0.3 / 0.0) == fcNegInf + doAssert classify(5.0e-324) == fcSubnormal + # JavaScript and most C compilers have no classify: + if isNan(x): return fcNan if x == 0.0: - if 1.0/x == Inf: + if 1.0 / x == Inf: return fcZero else: return fcNegZero - if x*0.5 == x: + if x * 0.5 == x: if x > 0.0: return fcInf else: return fcNegInf - if x != x: return fcNan + if abs(x) < MinFloatNormal: + return fcSubnormal return fcNormal - # XXX: fcSubnormal is not detected! +func almostEqual*[T: SomeFloat](x, y: T; unitsInLastPlace: Natural = 4): bool {. + since: (1, 5), inline.} = + ## Checks if two float values are almost equal, using the + ## [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon). + ## + ## `unitsInLastPlace` is the max number of + ## [units in the last place](https://en.wikipedia.org/wiki/Unit_in_the_last_place) + ## difference tolerated when comparing two numbers. The larger the value, the + ## more error is allowed. A `0` value means that two numbers must be exactly the + ## same to be considered equal. + ## + ## The machine epsilon has to be scaled to the magnitude of the values used + ## and multiplied by the desired precision in ULPs unless the difference is + ## subnormal. + ## + # taken from: https://en.cppreference.com/w/cpp/types/numeric_limits/epsilon + runnableExamples: + doAssert almostEqual(PI, 3.14159265358979) + doAssert almostEqual(Inf, Inf) + doAssert not almostEqual(NaN, NaN) -proc binom*(n, k: int): int {.noSideEffect.} = - ## computes the binomial coefficient - if k <= 0: return 1 - if 2*k > n: return binom(n, n-k) - result = n - for i in countup(2, k): - result = (result * (n + 1 - i)) div i - -proc fac*(n: int): int {.noSideEffect.} = - ## computes the faculty/factorial function. - result = 1 - for i in countup(2, n): - result = result * i - -proc isPowerOfTwo*(x: int): bool {.noSideEffect.} = - ## returns true, if `x` is a power of two, false otherwise. + if x == y: + # short circuit exact equality -- needed to catch two infinities of + # the same sign. And perhaps speeds things up a bit sometimes. + return true + let diff = abs(x - y) + result = diff <= epsilon(T) * abs(x + y) * T(unitsInLastPlace) or + diff < minimumPositiveValue(T) + +func isPowerOfTwo*(x: int): bool = + ## Returns `true`, if `x` is a power of two, `false` otherwise. + ## ## Zero and negative numbers are not a power of two. - return (x != 0) and ((x and (x - 1)) == 0) + ## + ## **See also:** + ## * `nextPowerOfTwo func <#nextPowerOfTwo,int>`_ + runnableExamples: + doAssert isPowerOfTwo(16) + doAssert not isPowerOfTwo(5) + doAssert not isPowerOfTwo(0) + doAssert not isPowerOfTwo(-16) + + return (x > 0) and ((x and (x - 1)) == 0) -proc nextPowerOfTwo*(x: int): int {.noSideEffect.} = - ## returns `x` rounded up to the nearest power of two. +func nextPowerOfTwo*(x: int): int = + ## Returns `x` rounded up to the nearest power of two. + ## ## Zero and negative numbers get rounded up to 1. - result = x - 1 + ## + ## **See also:** + ## * `isPowerOfTwo func <#isPowerOfTwo,int>`_ + runnableExamples: + doAssert nextPowerOfTwo(16) == 16 + doAssert nextPowerOfTwo(5) == 8 + doAssert nextPowerOfTwo(0) == 1 + doAssert nextPowerOfTwo(-16) == 1 + + result = x - 1 when defined(cpu64): result = result or (result shr 32) when sizeof(int) > 2: @@ -100,273 +359,956 @@ proc nextPowerOfTwo*(x: int): int {.noSideEffect.} = result = result or (result shr 4) result = result or (result shr 2) result = result or (result shr 1) - result += 1 + ord(x<=0) + result += 1 + ord(x <= 0) -proc countBits32*(n: int32): int {.noSideEffect.} = - ## counts the set bits in `n`. - var v = n - v = v -% ((v shr 1'i32) and 0x55555555'i32) - v = (v and 0x33333333'i32) +% ((v shr 2'i32) and 0x33333333'i32) - result = ((v +% (v shr 4'i32) and 0xF0F0F0F'i32) *% 0x1010101'i32) shr 24'i32 -proc sum*[T](x: openArray[T]): T {.noSideEffect.} = - ## computes the sum of the elements in `x`. - ## If `x` is empty, 0 is returned. - for i in items(x): result = result + i -proc mean*(x: openArray[float]): float {.noSideEffect.} = - ## computes the mean of the elements in `x`. - ## If `x` is empty, NaN is returned. - result = sum(x) / toFloat(len(x)) - -proc variance*(x: openArray[float]): float {.noSideEffect.} = - ## computes the variance of the elements in `x`. - ## If `x` is empty, NaN is returned. - result = 0.0 - var m = mean(x) - for i in 0 .. high(x): - var diff = x[i] - m - result = result + diff*diff - result = result / toFloat(len(x)) - -proc random*(max: int): int {.benign.} - ## returns a random number in the range 0..max-1. The sequence of - ## random number is always the same, unless `randomize` is called - ## which initializes the random number generator with a "random" - ## number, i.e. a tickcount. - -proc random*(max: float): float {.benign.} - ## returns a random number in the range 0..<max. The sequence of - ## random number is always the same, unless `randomize` is called - ## which initializes the random number generator with a "random" - ## number, i.e. a tickcount. This has a 16-bit resolution on windows - ## and a 48-bit resolution on other platforms. - -proc randomize*() {.benign.} - ## initializes the random number generator with a "random" - ## number, i.e. a tickcount. Note: Does nothing for the JavaScript target, - ## as JavaScript does not support this. - -proc randomize*(seed: int) {.benign.} - ## initializes the random number generator with a specific seed. - ## Note: Does nothing for the JavaScript target, - ## as JavaScript does not support this. - -when not defined(JS): - proc sqrt*(x: float): float {.importc: "sqrt", header: "<math.h>".} - ## computes the square root of `x`. - - proc ln*(x: float): float {.importc: "log", header: "<math.h>".} - ## computes ln(x). - proc log10*(x: float): float {.importc: "log10", header: "<math.h>".} - proc log2*(x: float): float = return ln(x) / ln(2.0) - proc exp*(x: float): float {.importc: "exp", header: "<math.h>".} - ## computes e**x. - - proc frexp*(x: float, exponent: var int): float {. - importc: "frexp", header: "<math.h>".} - ## Split a number into mantissa and exponent. - ## `frexp` calculates the mantissa m (a float greater than or equal to 0.5 - ## and less than 1) and the integer value n such that `x` (the original - ## float value) equals m * 2**n. frexp stores n in `exponent` and returns - ## m. - - proc round*(x: float): int {.importc: "lrint", header: "<math.h>".} - ## converts a float to an int by rounding. - - proc arccos*(x: float): float {.importc: "acos", header: "<math.h>".} - proc arcsin*(x: float): float {.importc: "asin", header: "<math.h>".} - proc arctan*(x: float): float {.importc: "atan", header: "<math.h>".} - proc arctan2*(y, x: float): float {.importc: "atan2", header: "<math.h>".} - ## Calculate the arc tangent of `y` / `x`. - ## `atan2` returns the arc tangent of `y` / `x`; it produces correct - ## results even when the resulting angle is near pi/2 or -pi/2 - ## (`x` near 0). - - proc cos*(x: float): float {.importc: "cos", header: "<math.h>".} - proc cosh*(x: float): float {.importc: "cosh", header: "<math.h>".} - proc hypot*(x, y: float): float {.importc: "hypot", header: "<math.h>".} - ## same as ``sqrt(x*x + y*y)``. - - proc sinh*(x: float): float {.importc: "sinh", header: "<math.h>".} - proc sin*(x: float): float {.importc: "sin", header: "<math.h>".} - proc tan*(x: float): float {.importc: "tan", header: "<math.h>".} - proc tanh*(x: float): float {.importc: "tanh", header: "<math.h>".} - proc pow*(x, y: float): float {.importc: "pow", header: "<math.h>".} - ## computes x to power raised of y. - - # C procs: - proc srand(seed: cint) {.importc: "srand", header: "<stdlib.h>".} - proc rand(): cint {.importc: "rand", header: "<stdlib.h>".} - - when not defined(windows): - proc srand48(seed: clong) {.importc: "srand48", header: "<stdlib.h>".} - proc drand48(): float {.importc: "drand48", header: "<stdlib.h>".} - proc random(max: float): float = - result = drand48() * max - when defined(windows): - proc random(max: float): float = - # we are hardcodeing this because - # importcing macros is extremely problematic - # and because the value is publicly documented - # on MSDN and very unlikely to change - const rand_max = 32767 - result = (float(rand()) / float(rand_max)) * max - proc randomize() = - randomize(cast[int](epochTime())) - - proc randomize(seed: int) = - srand(cint(seed)) - when declared(srand48): srand48(seed) - proc random(max: int): int = - result = int(rand()) mod max - - proc trunc*(x: float): float {.importc: "trunc", header: "<math.h>".} - proc floor*(x: float): float {.importc: "floor", header: "<math.h>".} - proc ceil*(x: float): float {.importc: "ceil", header: "<math.h>".} - - proc fmod*(x, y: float): float {.importc: "fmod", header: "<math.h>".} - -else: - proc mathrandom(): float {.importc: "Math.random", nodecl.} - proc floor*(x: float): float {.importc: "Math.floor", nodecl.} - proc ceil*(x: float): float {.importc: "Math.ceil", nodecl.} - proc random(max: int): int = - result = int(floor(mathrandom() * float(max))) - proc random(max: float): float = - result = float(mathrandom() * float(max)) - proc randomize() = discard - proc randomize(seed: int) = discard + +when not defined(js): # C + func sqrt*(x: float32): float32 {.importc: "sqrtf", header: "<math.h>".} + func sqrt*(x: float64): float64 {.importc: "sqrt", header: "<math.h>".} = + ## Computes the square root of `x`. + ## + ## **See also:** + ## * `cbrt func <#cbrt,float64>`_ for the cube root + runnableExamples: + doAssert almostEqual(sqrt(4.0), 2.0) + doAssert almostEqual(sqrt(1.44), 1.2) + func cbrt*(x: float32): float32 {.importc: "cbrtf", header: "<math.h>".} + func cbrt*(x: float64): float64 {.importc: "cbrt", header: "<math.h>".} = + ## Computes the cube root of `x`. + ## + ## **See also:** + ## * `sqrt func <#sqrt,float64>`_ for the square root + runnableExamples: + doAssert almostEqual(cbrt(8.0), 2.0) + doAssert almostEqual(cbrt(2.197), 1.3) + doAssert almostEqual(cbrt(-27.0), -3.0) + func ln*(x: float32): float32 {.importc: "logf", header: "<math.h>".} + func ln*(x: float64): float64 {.importc: "log", header: "<math.h>".} = + ## Computes the [natural logarithm](https://en.wikipedia.org/wiki/Natural_logarithm) + ## of `x`. + ## + ## **See also:** + ## * `log func <#log,T,T>`_ + ## * `log10 func <#log10,float64>`_ + ## * `log2 func <#log2,float64>`_ + ## * `exp func <#exp,float64>`_ + runnableExamples: + doAssert almostEqual(ln(exp(4.0)), 4.0) + doAssert almostEqual(ln(1.0), 0.0) + doAssert almostEqual(ln(0.0), -Inf) + doAssert ln(-7.0).isNaN +else: # JS + func sqrt*(x: float32): float32 {.importc: "Math.sqrt", nodecl.} + func sqrt*(x: float64): float64 {.importc: "Math.sqrt", nodecl.} + + func cbrt*(x: float32): float32 {.importc: "Math.cbrt", nodecl.} + func cbrt*(x: float64): float64 {.importc: "Math.cbrt", nodecl.} + + func ln*(x: float32): float32 {.importc: "Math.log", nodecl.} + func ln*(x: float64): float64 {.importc: "Math.log", nodecl.} + +func log*[T: SomeFloat](x, base: T): T = + ## Computes the logarithm of `x` to base `base`. + ## + ## **See also:** + ## * `ln func <#ln,float64>`_ + ## * `log10 func <#log10,float64>`_ + ## * `log2 func <#log2,float64>`_ + runnableExamples: + doAssert almostEqual(log(9.0, 3.0), 2.0) + doAssert almostEqual(log(0.0, 2.0), -Inf) + doAssert log(-7.0, 4.0).isNaN + doAssert log(8.0, -2.0).isNaN + + ln(x) / ln(base) + +when not defined(js): # C + func log10*(x: float32): float32 {.importc: "log10f", header: "<math.h>".} + func log10*(x: float64): float64 {.importc: "log10", header: "<math.h>".} = + ## Computes the common logarithm (base 10) of `x`. + ## + ## **See also:** + ## * `ln func <#ln,float64>`_ + ## * `log func <#log,T,T>`_ + ## * `log2 func <#log2,float64>`_ + runnableExamples: + doAssert almostEqual(log10(100.0) , 2.0) + doAssert almostEqual(log10(0.0), -Inf) + doAssert log10(-100.0).isNaN + func exp*(x: float32): float32 {.importc: "expf", header: "<math.h>".} + func exp*(x: float64): float64 {.importc: "exp", header: "<math.h>".} = + ## Computes the exponential function of `x` (`e^x`). + ## + ## **See also:** + ## * `ln func <#ln,float64>`_ + runnableExamples: + doAssert almostEqual(exp(1.0), E) + doAssert almostEqual(ln(exp(4.0)), 4.0) + doAssert almostEqual(exp(0.0), 1.0) + func sin*(x: float32): float32 {.importc: "sinf", header: "<math.h>".} + func sin*(x: float64): float64 {.importc: "sin", header: "<math.h>".} = + ## Computes the sine of `x`. + ## + ## **See also:** + ## * `arcsin func <#arcsin,float64>`_ + runnableExamples: + doAssert almostEqual(sin(PI / 6), 0.5) + doAssert almostEqual(sin(degToRad(90.0)), 1.0) + func cos*(x: float32): float32 {.importc: "cosf", header: "<math.h>".} + func cos*(x: float64): float64 {.importc: "cos", header: "<math.h>".} = + ## Computes the cosine of `x`. + ## + ## **See also:** + ## * `arccos func <#arccos,float64>`_ + runnableExamples: + doAssert almostEqual(cos(2 * PI), 1.0) + doAssert almostEqual(cos(degToRad(60.0)), 0.5) + func tan*(x: float32): float32 {.importc: "tanf", header: "<math.h>".} + func tan*(x: float64): float64 {.importc: "tan", header: "<math.h>".} = + ## Computes the tangent of `x`. + ## + ## **See also:** + ## * `arctan func <#arctan,float64>`_ + runnableExamples: + doAssert almostEqual(tan(degToRad(45.0)), 1.0) + doAssert almostEqual(tan(PI / 4), 1.0) + func sinh*(x: float32): float32 {.importc: "sinhf", header: "<math.h>".} + func sinh*(x: float64): float64 {.importc: "sinh", header: "<math.h>".} = + ## Computes the [hyperbolic sine](https://en.wikipedia.org/wiki/Hyperbolic_function#Definitions) of `x`. + ## + ## **See also:** + ## * `arcsinh func <#arcsinh,float64>`_ + runnableExamples: + doAssert almostEqual(sinh(0.0), 0.0) + doAssert almostEqual(sinh(1.0), 1.175201193643801) + func cosh*(x: float32): float32 {.importc: "coshf", header: "<math.h>".} + func cosh*(x: float64): float64 {.importc: "cosh", header: "<math.h>".} = + ## Computes the [hyperbolic cosine](https://en.wikipedia.org/wiki/Hyperbolic_function#Definitions) of `x`. + ## + ## **See also:** + ## * `arccosh func <#arccosh,float64>`_ + runnableExamples: + doAssert almostEqual(cosh(0.0), 1.0) + doAssert almostEqual(cosh(1.0), 1.543080634815244) + func tanh*(x: float32): float32 {.importc: "tanhf", header: "<math.h>".} + func tanh*(x: float64): float64 {.importc: "tanh", header: "<math.h>".} = + ## Computes the [hyperbolic tangent](https://en.wikipedia.org/wiki/Hyperbolic_function#Definitions) of `x`. + ## + ## **See also:** + ## * `arctanh func <#arctanh,float64>`_ + runnableExamples: + doAssert almostEqual(tanh(0.0), 0.0) + doAssert almostEqual(tanh(1.0), 0.7615941559557649) + func arcsin*(x: float32): float32 {.importc: "asinf", header: "<math.h>".} + func arcsin*(x: float64): float64 {.importc: "asin", header: "<math.h>".} = + ## Computes the arc sine of `x`. + ## + ## **See also:** + ## * `sin func <#sin,float64>`_ + runnableExamples: + doAssert almostEqual(radToDeg(arcsin(0.0)), 0.0) + doAssert almostEqual(radToDeg(arcsin(1.0)), 90.0) + func arccos*(x: float32): float32 {.importc: "acosf", header: "<math.h>".} + func arccos*(x: float64): float64 {.importc: "acos", header: "<math.h>".} = + ## Computes the arc cosine of `x`. + ## + ## **See also:** + ## * `cos func <#cos,float64>`_ + runnableExamples: + doAssert almostEqual(radToDeg(arccos(0.0)), 90.0) + doAssert almostEqual(radToDeg(arccos(1.0)), 0.0) + func arctan*(x: float32): float32 {.importc: "atanf", header: "<math.h>".} + func arctan*(x: float64): float64 {.importc: "atan", header: "<math.h>".} = + ## Calculate the arc tangent of `x`. + ## + ## **See also:** + ## * `arctan2 func <#arctan2,float64,float64>`_ + ## * `tan func <#tan,float64>`_ + runnableExamples: + doAssert almostEqual(arctan(1.0), 0.7853981633974483) + doAssert almostEqual(radToDeg(arctan(1.0)), 45.0) + func arctan2*(y, x: float32): float32 {.importc: "atan2f", header: "<math.h>".} + func arctan2*(y, x: float64): float64 {.importc: "atan2", header: "<math.h>".} = + ## Calculate the arc tangent of `y/x`. + ## + ## It produces correct results even when the resulting angle is near + ## `PI/2` or `-PI/2` (`x` near 0). + ## + ## **See also:** + ## * `arctan func <#arctan,float64>`_ + runnableExamples: + doAssert almostEqual(arctan2(1.0, 0.0), PI / 2.0) + doAssert almostEqual(radToDeg(arctan2(1.0, 0.0)), 90.0) + func arcsinh*(x: float32): float32 {.importc: "asinhf", header: "<math.h>".} + func arcsinh*(x: float64): float64 {.importc: "asinh", header: "<math.h>".} + ## Computes the inverse hyperbolic sine of `x`. + ## + ## **See also:** + ## * `sinh func <#sinh,float64>`_ + func arccosh*(x: float32): float32 {.importc: "acoshf", header: "<math.h>".} + func arccosh*(x: float64): float64 {.importc: "acosh", header: "<math.h>".} + ## Computes the inverse hyperbolic cosine of `x`. + ## + ## **See also:** + ## * `cosh func <#cosh,float64>`_ + func arctanh*(x: float32): float32 {.importc: "atanhf", header: "<math.h>".} + func arctanh*(x: float64): float64 {.importc: "atanh", header: "<math.h>".} + ## Computes the inverse hyperbolic tangent of `x`. + ## + ## **See also:** + ## * `tanh func <#tanh,float64>`_ + +else: # JS + func log10*(x: float32): float32 {.importc: "Math.log10", nodecl.} + func log10*(x: float64): float64 {.importc: "Math.log10", nodecl.} + func log2*(x: float32): float32 {.importc: "Math.log2", nodecl.} + func log2*(x: float64): float64 {.importc: "Math.log2", nodecl.} + func exp*(x: float32): float32 {.importc: "Math.exp", nodecl.} + func exp*(x: float64): float64 {.importc: "Math.exp", nodecl.} + + func sin*[T: float32|float64](x: T): T {.importc: "Math.sin", nodecl.} + func cos*[T: float32|float64](x: T): T {.importc: "Math.cos", nodecl.} + func tan*[T: float32|float64](x: T): T {.importc: "Math.tan", nodecl.} + + func sinh*[T: float32|float64](x: T): T {.importc: "Math.sinh", nodecl.} + func cosh*[T: float32|float64](x: T): T {.importc: "Math.cosh", nodecl.} + func tanh*[T: float32|float64](x: T): T {.importc: "Math.tanh", nodecl.} + + func arcsin*[T: float32|float64](x: T): T {.importc: "Math.asin", nodecl.} + # keep this as generic or update test in `tvmops.nim` to make sure we + # keep testing that generic importc procs work + func arccos*[T: float32|float64](x: T): T {.importc: "Math.acos", nodecl.} + func arctan*[T: float32|float64](x: T): T {.importc: "Math.atan", nodecl.} + func arctan2*[T: float32|float64](y, x: T): T {.importc: "Math.atan2", nodecl.} + + func arcsinh*[T: float32|float64](x: T): T {.importc: "Math.asinh", nodecl.} + func arccosh*[T: float32|float64](x: T): T {.importc: "Math.acosh", nodecl.} + func arctanh*[T: float32|float64](x: T): T {.importc: "Math.atanh", nodecl.} + +func cot*[T: float32|float64](x: T): T = 1.0 / tan(x) + ## Computes the cotangent of `x` (`1/tan(x)`). +func sec*[T: float32|float64](x: T): T = 1.0 / cos(x) + ## Computes the secant of `x` (`1/cos(x)`). +func csc*[T: float32|float64](x: T): T = 1.0 / sin(x) + ## Computes the cosecant of `x` (`1/sin(x)`). + +func coth*[T: float32|float64](x: T): T = 1.0 / tanh(x) + ## Computes the hyperbolic cotangent of `x` (`1/tanh(x)`). +func sech*[T: float32|float64](x: T): T = 1.0 / cosh(x) + ## Computes the hyperbolic secant of `x` (`1/cosh(x)`). +func csch*[T: float32|float64](x: T): T = 1.0 / sinh(x) + ## Computes the hyperbolic cosecant of `x` (`1/sinh(x)`). + +func arccot*[T: float32|float64](x: T): T = arctan(1.0 / x) + ## Computes the inverse cotangent of `x` (`arctan(1/x)`). +func arcsec*[T: float32|float64](x: T): T = arccos(1.0 / x) + ## Computes the inverse secant of `x` (`arccos(1/x)`). +func arccsc*[T: float32|float64](x: T): T = arcsin(1.0 / x) + ## Computes the inverse cosecant of `x` (`arcsin(1/x)`). + +func arccoth*[T: float32|float64](x: T): T = arctanh(1.0 / x) + ## Computes the inverse hyperbolic cotangent of `x` (`arctanh(1/x)`). +func arcsech*[T: float32|float64](x: T): T = arccosh(1.0 / x) + ## Computes the inverse hyperbolic secant of `x` (`arccosh(1/x)`). +func arccsch*[T: float32|float64](x: T): T = arcsinh(1.0 / x) + ## Computes the inverse hyperbolic cosecant of `x` (`arcsinh(1/x)`). + +const windowsCC89 = defined(windows) and defined(bcc) + +when not defined(js): # C + func hypot*(x, y: float32): float32 {.importc: "hypotf", header: "<math.h>".} + func hypot*(x, y: float64): float64 {.importc: "hypot", header: "<math.h>".} = + ## Computes the length of the hypotenuse of a right-angle triangle with + ## `x` as its base and `y` as its height. Equivalent to `sqrt(x*x + y*y)`. + runnableExamples: + doAssert almostEqual(hypot(3.0, 4.0), 5.0) + func pow*(x, y: float32): float32 {.importc: "powf", header: "<math.h>".} + func pow*(x, y: float64): float64 {.importc: "pow", header: "<math.h>".} = + ## Computes `x` raised to the power of `y`. + ## + ## To compute the power between integers (e.g. 2^6), + ## use the `^ func <#^,T,Natural>`_. + ## + ## **See also:** + ## * `^ func <#^,T,Natural>`_ + ## * `sqrt func <#sqrt,float64>`_ + ## * `cbrt func <#cbrt,float64>`_ + runnableExamples: + doAssert almostEqual(pow(100, 1.5), 1000.0) + doAssert almostEqual(pow(16.0, 0.5), 4.0) + + # TODO: add C89 version on windows + when not windowsCC89: + func erf*(x: float32): float32 {.importc: "erff", header: "<math.h>".} + func erf*(x: float64): float64 {.importc: "erf", header: "<math.h>".} + ## Computes the [error function](https://en.wikipedia.org/wiki/Error_function) for `x`. + ## + ## **Note:** Not available for the JS backend. + func erfc*(x: float32): float32 {.importc: "erfcf", header: "<math.h>".} + func erfc*(x: float64): float64 {.importc: "erfc", header: "<math.h>".} + ## Computes the [complementary error function](https://en.wikipedia.org/wiki/Error_function#Complementary_error_function) for `x`. + ## + ## **Note:** Not available for the JS backend. + func gamma*(x: float32): float32 {.importc: "tgammaf", header: "<math.h>".} + func gamma*(x: float64): float64 {.importc: "tgamma", header: "<math.h>".} = + ## Computes the [gamma function](https://en.wikipedia.org/wiki/Gamma_function) for `x`. + ## + ## **Note:** Not available for the JS backend. + ## + ## **See also:** + ## * `lgamma func <#lgamma,float64>`_ for the natural logarithm of the gamma function + runnableExamples: + doAssert almostEqual(gamma(1.0), 1.0) + doAssert almostEqual(gamma(4.0), 6.0) + doAssert almostEqual(gamma(11.0), 3628800.0) + func lgamma*(x: float32): float32 {.importc: "lgammaf", header: "<math.h>".} + func lgamma*(x: float64): float64 {.importc: "lgamma", header: "<math.h>".} = + ## Computes the natural logarithm of the gamma function for `x`. + ## + ## **Note:** Not available for the JS backend. + ## + ## **See also:** + ## * `gamma func <#gamma,float64>`_ for gamma function + + func floor*(x: float32): float32 {.importc: "floorf", header: "<math.h>".} + func floor*(x: float64): float64 {.importc: "floor", header: "<math.h>".} = + ## Computes the floor function (i.e. the largest integer not greater than `x`). + ## + ## **See also:** + ## * `ceil func <#ceil,float64>`_ + ## * `round func <#round,float64>`_ + ## * `trunc func <#trunc,float64>`_ + runnableExamples: + doAssert floor(2.1) == 2.0 + doAssert floor(2.9) == 2.0 + doAssert floor(-3.5) == -4.0 + + func ceil*(x: float32): float32 {.importc: "ceilf", header: "<math.h>".} + func ceil*(x: float64): float64 {.importc: "ceil", header: "<math.h>".} = + ## Computes the ceiling function (i.e. the smallest integer not smaller + ## than `x`). + ## + ## **See also:** + ## * `floor func <#floor,float64>`_ + ## * `round func <#round,float64>`_ + ## * `trunc func <#trunc,float64>`_ + runnableExamples: + doAssert ceil(2.1) == 3.0 + doAssert ceil(2.9) == 3.0 + doAssert ceil(-2.1) == -2.0 + + when windowsCC89: + # MSVC 2010 don't have trunc/truncf + # this implementation was inspired by Go-lang Math.Trunc + func truncImpl(f: float64): float64 = + const + mask: uint64 = 0x7FF + shift: uint64 = 64 - 12 + bias: uint64 = 0x3FF + + if f < 1: + if f < 0: return -truncImpl(-f) + elif f == 0: return f # Return -0 when f == -0 + else: return 0 + + var x = cast[uint64](f) + let e = (x shr shift) and mask - bias + + # Keep the top 12+e bits, the integer part; clear the rest. + if e < 64 - 12: + x = x and (not (1'u64 shl (64'u64 - 12'u64 - e) - 1'u64)) + + result = cast[float64](x) + + func truncImpl(f: float32): float32 = + const + mask: uint32 = 0xFF + shift: uint32 = 32 - 9 + bias: uint32 = 0x7F + + if f < 1: + if f < 0: return -truncImpl(-f) + elif f == 0: return f # Return -0 when f == -0 + else: return 0 + + var x = cast[uint32](f) + let e = (x shr shift) and mask - bias + + # Keep the top 9+e bits, the integer part; clear the rest. + if e < 32 - 9: + x = x and (not (1'u32 shl (32'u32 - 9'u32 - e) - 1'u32)) + + result = cast[float32](x) + + func trunc*(x: float64): float64 = + if classify(x) in {fcZero, fcNegZero, fcNan, fcInf, fcNegInf}: return x + result = truncImpl(x) + + func trunc*(x: float32): float32 = + if classify(x) in {fcZero, fcNegZero, fcNan, fcInf, fcNegInf}: return x + result = truncImpl(x) + + func round*[T: float32|float64](x: T): T = + ## Windows compilers prior to MSVC 2012 do not implement 'round', + ## 'roundl' or 'roundf'. + result = if x < 0.0: ceil(x - T(0.5)) else: floor(x + T(0.5)) + else: + func round*(x: float32): float32 {.importc: "roundf", header: "<math.h>".} + func round*(x: float64): float64 {.importc: "round", header: "<math.h>".} = + ## Rounds a float to zero decimal places. + ## + ## Used internally by the `round func <#round,T,int>`_ + ## when the specified number of places is 0. + ## + ## **See also:** + ## * `round func <#round,T,int>`_ for rounding to the specific + ## number of decimal places + ## * `floor func <#floor,float64>`_ + ## * `ceil func <#ceil,float64>`_ + ## * `trunc func <#trunc,float64>`_ + runnableExamples: + doAssert round(3.4) == 3.0 + doAssert round(3.5) == 4.0 + doAssert round(4.5) == 5.0 + + func trunc*(x: float32): float32 {.importc: "truncf", header: "<math.h>".} + func trunc*(x: float64): float64 {.importc: "trunc", header: "<math.h>".} = + ## Truncates `x` to the decimal point. + ## + ## **See also:** + ## * `floor func <#floor,float64>`_ + ## * `ceil func <#ceil,float64>`_ + ## * `round func <#round,float64>`_ + runnableExamples: + doAssert trunc(PI) == 3.0 + doAssert trunc(-1.85) == -1.0 + + func `mod`*(x, y: float32): float32 {.importc: "fmodf", header: "<math.h>".} + func `mod`*(x, y: float64): float64 {.importc: "fmod", header: "<math.h>".} = + ## Computes the modulo operation for float values (the remainder of `x` divided by `y`). + ## + ## **See also:** + ## * `floorMod func <#floorMod,T,T>`_ for Python-like (`%` operator) behavior + runnableExamples: + doAssert 6.5 mod 2.5 == 1.5 + doAssert -6.5 mod 2.5 == -1.5 + doAssert 6.5 mod -2.5 == 1.5 + doAssert -6.5 mod -2.5 == -1.5 + +else: # JS + func hypot*(x, y: float32): float32 {.importc: "Math.hypot", varargs, nodecl.} + func hypot*(x, y: float64): float64 {.importc: "Math.hypot", varargs, nodecl.} + func pow*(x, y: float32): float32 {.importc: "Math.pow", nodecl.} + func pow*(x, y: float64): float64 {.importc: "Math.pow", nodecl.} + func floor*(x: float32): float32 {.importc: "Math.floor", nodecl.} + func floor*(x: float64): float64 {.importc: "Math.floor", nodecl.} + func ceil*(x: float32): float32 {.importc: "Math.ceil", nodecl.} + func ceil*(x: float64): float64 {.importc: "Math.ceil", nodecl.} + + when (NimMajor, NimMinor) < (1, 5) or defined(nimLegacyJsRound): + func round*(x: float): float {.importc: "Math.round", nodecl.} + else: + func jsRound(x: float): float {.importc: "Math.round", nodecl.} + func round*[T: float64 | float32](x: T): T = + if x >= 0: result = jsRound(x) + else: + result = ceil(x) + if result - x >= T(0.5): + result -= T(1.0) + func trunc*(x: float32): float32 {.importc: "Math.trunc", nodecl.} + func trunc*(x: float64): float64 {.importc: "Math.trunc", nodecl.} + + func `mod`*(x, y: float32): float32 {.importjs: "(# % #)".} + func `mod`*(x, y: float64): float64 {.importjs: "(# % #)".} = + ## Computes the modulo operation for float values (the remainder of `x` divided by `y`). + runnableExamples: + doAssert 6.5 mod 2.5 == 1.5 + doAssert -6.5 mod 2.5 == -1.5 + doAssert 6.5 mod -2.5 == 1.5 + doAssert -6.5 mod -2.5 == -1.5 - proc sqrt*(x: float): float {.importc: "Math.sqrt", nodecl.} - proc ln*(x: float): float {.importc: "Math.log", nodecl.} - proc log10*(x: float): float = return ln(x) / ln(10.0) - proc log2*(x: float): float = return ln(x) / ln(2.0) - - proc exp*(x: float): float {.importc: "Math.exp", nodecl.} - proc round*(x: float): int {.importc: "Math.round", nodecl.} - proc pow*(x, y: float): float {.importc: "Math.pow", nodecl.} + func divmod*[T:SomeInteger](num, denom: T): (T, T) = + runnableExamples: + doAssert divmod(5, 2) == (2, 1) + doAssert divmod(5, -3) == (-1, 2) + result[0] = num div denom + result[1] = num mod denom - proc frexp*(x: float, exponent: var int): float = + +func round*[T: float32|float64](x: T, places: int): T = + ## Decimal rounding on a binary floating point number. + ## + ## This function is NOT reliable. Floating point numbers cannot hold + ## non integer decimals precisely. If `places` is 0 (or omitted), + ## round to the nearest integral value following normal mathematical + ## rounding rules (e.g. `round(54.5) -> 55.0`). If `places` is + ## greater than 0, round to the given number of decimal places, + ## e.g. `round(54.346, 2) -> 54.350000000000001421…`. If `places` is negative, round + ## to the left of the decimal place, e.g. `round(537.345, -1) -> 540.0`. + runnableExamples: + doAssert round(PI, 2) == 3.14 + doAssert round(PI, 4) == 3.1416 + + if places == 0: + result = round(x) + else: + var mult = pow(10.0, T(places)) + result = round(x * mult) / mult + +func floorDiv*[T: SomeInteger](x, y: T): T = + ## Floor division is conceptually defined as `floor(x / y)`. + ## + ## This is different from the `system.div <system.html#div,int,int>`_ + ## operator, which is defined as `trunc(x / y)`. + ## That is, `div` rounds towards `0` and `floorDiv` rounds down. + ## + ## **See also:** + ## * `system.div proc <system.html#div,int,int>`_ for integer division + ## * `floorMod func <#floorMod,T,T>`_ for Python-like (`%` operator) behavior + runnableExamples: + doAssert floorDiv( 13, 3) == 4 + doAssert floorDiv(-13, 3) == -5 + doAssert floorDiv( 13, -3) == -5 + doAssert floorDiv(-13, -3) == 4 + + result = x div y + let r = x mod y + if (r > 0 and y < 0) or (r < 0 and y > 0): result.dec 1 + +func floorMod*[T: SomeNumber](x, y: T): T = + ## Floor modulo is conceptually defined as `x - (floorDiv(x, y) * y)`. + ## + ## This func behaves the same as the `%` operator in Python. + ## + ## **See also:** + ## * `mod func <#mod,float64,float64>`_ + ## * `floorDiv func <#floorDiv,T,T>`_ + runnableExamples: + doAssert floorMod( 13, 3) == 1 + doAssert floorMod(-13, 3) == 2 + doAssert floorMod( 13, -3) == -2 + doAssert floorMod(-13, -3) == -1 + + result = x mod y + if (result > 0 and y < 0) or (result < 0 and y > 0): result += y + +func euclDiv*[T: SomeInteger](x, y: T): T {.since: (1, 5, 1).} = + ## Returns euclidean division of `x` by `y`. + runnableExamples: + doAssert euclDiv(13, 3) == 4 + doAssert euclDiv(-13, 3) == -5 + doAssert euclDiv(13, -3) == -4 + doAssert euclDiv(-13, -3) == 5 + + result = x div y + if x mod y < 0: + if y > 0: + dec result + else: + inc result + +func euclMod*[T: SomeNumber](x, y: T): T {.since: (1, 5, 1).} = + ## Returns euclidean modulo of `x` by `y`. + ## `euclMod(x, y)` is non-negative. + runnableExamples: + doAssert euclMod(13, 3) == 1 + doAssert euclMod(-13, 3) == 2 + doAssert euclMod(13, -3) == 1 + doAssert euclMod(-13, -3) == 2 + + result = x mod y + if result < 0: + result += abs(y) + +func ceilDiv*[T: SomeInteger](x, y: T): T {.inline, since: (1, 5, 1).} = + ## Ceil division is conceptually defined as `ceil(x / y)`. + ## + ## Assumes `x >= 0` and `y > 0` (and `x + y - 1 <= high(T)` if T is SomeUnsignedInt). + ## + ## This is different from the `system.div <system.html#div,int,int>`_ + ## operator, which works like `trunc(x / y)`. + ## That is, `div` rounds towards `0` and `ceilDiv` rounds up. + ## + ## This function has the above input limitation, because that allows the + ## compiler to generate faster code and it is rarely used with + ## negative values or unsigned integers close to `high(T)/2`. + ## If you need a `ceilDiv` that works with any input, see: + ## https://github.com/demotomohiro/divmath. + ## + ## **See also:** + ## * `system.div proc <system.html#div,int,int>`_ for integer division + ## * `floorDiv func <#floorDiv,T,T>`_ for integer division which rounds down. + runnableExamples: + assert ceilDiv(12, 3) == 4 + assert ceilDiv(13, 3) == 5 + + when sizeof(T) == 8: + type UT = uint64 + elif sizeof(T) == 4: + type UT = uint32 + elif sizeof(T) == 2: + type UT = uint16 + elif sizeof(T) == 1: + type UT = uint8 + else: + {.fatal: "Unsupported int type".} + + assert x >= 0 and y > 0 + when T is SomeUnsignedInt: + assert x + y - 1 >= x + + # If the divisor is const, the backend C/C++ compiler generates code without a `div` + # instruction, as it is slow on most CPUs. + # If the divisor is a power of 2 and a const unsigned integer type, the + # compiler generates faster code. + # If the divisor is const and a signed integer, generated code becomes slower + # than the code with unsigned integers, because division with signed integers + # need to works for both positive and negative value without `idiv`/`sdiv`. + # That is why this code convert parameters to unsigned. + # This post contains a comparison of the performance of signed/unsigned integers: + # https://github.com/nim-lang/Nim/pull/18596#issuecomment-894420984. + # If signed integer arguments were not converted to unsigned integers, + # `ceilDiv` wouldn't work for any positive signed integer value, because + # `x + (y - 1)` can overflow. + ((x.UT + (y.UT - 1.UT)) div y.UT).T + +func frexp*[T: float32|float64](x: T): tuple[frac: T, exp: int] {.inline.} = + ## Splits `x` into a normalized fraction `frac` and an integral power of 2 `exp`, + ## such that `abs(frac) in 0.5..<1` and `x == frac * 2 ^ exp`, except for special + ## cases shown below. + runnableExamples: + doAssert frexp(8.0) == (0.5, 4) + doAssert frexp(-8.0) == (-0.5, 4) + doAssert frexp(0.0) == (0.0, 0) + + # special cases: + when sizeof(int) == 8: + doAssert frexp(-0.0).frac.signbit # signbit preserved for +-0 + doAssert frexp(Inf).frac == Inf # +- Inf preserved + doAssert frexp(NaN).frac.isNaN + + when not defined(js): + var exp: cint + result.frac = c_frexp2(x, exp) + result.exp = exp + else: if x == 0.0: - exponent = 0 - result = 0.0 + # reuse signbit implementation + let uintBuffer = toBitsImpl(x) + if (uintBuffer[1] shr 31) != 0: + # x is -0.0 + result = (-0.0, 0) + else: + result = (0.0, 0) elif x < 0.0: - result = -frexp(-x, exponent) + result = frexp(-x) + result.frac = -result.frac else: - var ex = floor(log2(x)) - exponent = round(ex) - result = x / pow(2.0, ex) - - proc arccos*(x: float): float {.importc: "Math.acos", nodecl.} - proc arcsin*(x: float): float {.importc: "Math.asin", nodecl.} - proc arctan*(x: float): float {.importc: "Math.atan", nodecl.} - proc arctan2*(y, x: float): float {.importc: "Math.atan2", nodecl.} - - proc cos*(x: float): float {.importc: "Math.cos", nodecl.} - proc cosh*(x: float): float = return (exp(x)+exp(-x))*0.5 - proc hypot*(x, y: float): float = return sqrt(x*x + y*y) - proc sinh*(x: float): float = return (exp(x)-exp(-x))*0.5 - proc sin*(x: float): float {.importc: "Math.sin", nodecl.} - proc tan*(x: float): float {.importc: "Math.tan", nodecl.} - proc tanh*(x: float): float = - var y = exp(2.0*x) - return (y-1.0)/(y+1.0) - -proc `mod`*(x, y: float): float = - result = if y == 0.0: x else: x - y * (x/y).floor - -proc random*[T](x: Slice[T]): T = - ## For a slice `a .. b` returns a value in the range `a .. b-1`. - result = random(x.b - x.a) + x.a - -proc random*[T](a: openArray[T]): T = - ## returns a random element from the openarray `a`. - result = a[random(a.low..a.len)] + var ex = trunc(log2(x)) + result.exp = int(ex) + result.frac = x / pow(2.0, ex) + if abs(result.frac) >= 1: + inc(result.exp) + result.frac = result.frac / 2 + if result.exp == 1024 and result.frac == 0.0: + result.frac = 0.99999999999999988898 + +func frexp*[T: float32|float64](x: T, exponent: var int): T {.inline.} = + ## Overload of `frexp` that calls `(result, exponent) = frexp(x)`. + runnableExamples: + var x: int + doAssert frexp(5.0, x) == 0.625 + doAssert x == 3 + + (result, exponent) = frexp(x) + + +when not defined(js): + when windowsCC89: + # taken from Go-lang Math.Log2 + const ln2 = 0.693147180559945309417232121458176568075500134360255254120680009 + template log2Impl[T](x: T): T = + var exp: int + var frac = frexp(x, exp) + # Make sure exact powers of two give an exact answer. + # Don't depend on Log(0.5)*(1/Ln2)+exp being exactly exp-1. + if frac == 0.5: return T(exp - 1) + log10(frac) * (1 / ln2) + T(exp) + + func log2*(x: float32): float32 = log2Impl(x) + func log2*(x: float64): float64 = log2Impl(x) + ## Log2 returns the binary logarithm of x. + ## The special cases are the same as for Log. -type - RunningStat* = object ## an accumulator for statistical data - n*: int ## number of pushed data - sum*, min*, max*, mean*: float ## self-explaining - oldM, oldS, newS: float - -{.deprecated: [TFloatClass: FloatClass, TRunningStat: RunningStat].} - -proc push*(s: var RunningStat, x: float) = - ## pushes a value `x` for processing - inc(s.n) - # See Knuth TAOCP vol 2, 3rd edition, page 232 - if s.n == 1: - s.min = x - s.max = x - s.oldM = x - s.mean = x - s.oldS = 0.0 else: - if s.min > x: s.min = x - if s.max < x: s.max = x - s.mean = s.oldM + (x - s.oldM)/toFloat(s.n) - s.newS = s.oldS + (x - s.oldM)*(x - s.mean) - - # set up for next iteration: - s.oldM = s.mean - s.oldS = s.newS - s.sum = s.sum + x - -proc push*(s: var RunningStat, x: int) = - ## pushes a value `x` for processing. `x` is simply converted to ``float`` - ## and the other push operation is called. - push(s, toFloat(x)) - -proc variance*(s: RunningStat): float = - ## computes the current variance of `s` - if s.n > 1: result = s.newS / (toFloat(s.n - 1)) + func log2*(x: float32): float32 {.importc: "log2f", header: "<math.h>".} + func log2*(x: float64): float64 {.importc: "log2", header: "<math.h>".} = + ## Computes the binary logarithm (base 2) of `x`. + ## + ## **See also:** + ## * `log func <#log,T,T>`_ + ## * `log10 func <#log10,float64>`_ + ## * `ln func <#ln,float64>`_ + runnableExamples: + doAssert almostEqual(log2(8.0), 3.0) + doAssert almostEqual(log2(1.0), 0.0) + doAssert almostEqual(log2(0.0), -Inf) + doAssert log2(-2.0).isNaN + +func splitDecimal*[T: float32|float64](x: T): tuple[intpart: T, floatpart: T] = + ## Breaks `x` into an integer and a fractional part. + ## + ## Returns a tuple containing `intpart` and `floatpart`, representing + ## the integer part and the fractional part, respectively. + ## + ## Both parts have the same sign as `x`. Analogous to the `modf` + ## function in C. + runnableExamples: + doAssert splitDecimal(5.25) == (intpart: 5.0, floatpart: 0.25) + doAssert splitDecimal(-2.73) == (intpart: -2.0, floatpart: -0.73) + + var + absolute: T + absolute = abs(x) + result.intpart = floor(absolute) + result.floatpart = absolute - result.intpart + if x < 0: + result.intpart = -result.intpart + result.floatpart = -result.floatpart + + +func degToRad*[T: float32|float64](d: T): T {.inline.} = + ## Converts from degrees to radians. + ## + ## **See also:** + ## * `radToDeg func <#radToDeg,T>`_ + runnableExamples: + doAssert almostEqual(degToRad(180.0), PI) + + result = d * T(RadPerDeg) -proc standardDeviation*(s: RunningStat): float = - ## computes the current standard deviation of `s` - result = sqrt(variance(s)) +func radToDeg*[T: float32|float64](d: T): T {.inline.} = + ## Converts from radians to degrees. + ## + ## **See also:** + ## * `degToRad func <#degToRad,T>`_ + runnableExamples: + doAssert almostEqual(radToDeg(2 * PI), 360.0) + + result = d / T(RadPerDeg) + +func sgn*[T: SomeNumber](x: T): int {.inline.} = + ## Sign function. + ## + ## Returns: + ## * `-1` for negative numbers and `NegInf`, + ## * `1` for positive numbers and `Inf`, + ## * `0` for positive zero, negative zero and `NaN` + runnableExamples: + doAssert sgn(5) == 1 + doAssert sgn(0) == 0 + doAssert sgn(-4.1) == -1 + + ord(T(0) < x) - ord(x < T(0)) {.pop.} {.pop.} -proc `^`*[T](x, y: T): T = - ## Computes ``x`` to the power ``y`. ``x`` must be non-negative, use - ## `pow <#pow,float,float>` for negative exponents. - assert y >= 0 - var (x, y) = (x, y) - result = 1 +func sum*[T](x: openArray[T]): T = + ## Computes the sum of the elements in `x`. + ## + ## If `x` is empty, 0 is returned. + ## + ## **See also:** + ## * `prod func <#prod,openArray[T]>`_ + ## * `cumsum func <#cumsum,openArray[T]>`_ + ## * `cumsummed func <#cumsummed,openArray[T]>`_ + runnableExamples: + doAssert sum([1, 2, 3, 4]) == 10 + doAssert sum([-4, 3, 5]) == 4 - while y != 0: - if (y and 1) != 0: - result *= x - y = y shr 1 - x *= x - -proc gcd*[T](x, y: T): T = - ## Computes the greatest common divisor of ``x`` and ``y``. - var (x,y) = (x,y) + for i in items(x): result = result + i + +func prod*[T](x: openArray[T]): T = + ## Computes the product of the elements in `x`. + ## + ## If `x` is empty, 1 is returned. + ## + ## **See also:** + ## * `sum func <#sum,openArray[T]>`_ + ## * `fac func <#fac,int>`_ + runnableExamples: + doAssert prod([1, 2, 3, 4]) == 24 + doAssert prod([-4, 3, 5]) == -60 + + result = T(1) + for i in items(x): result = result * i + +func cumsummed*[T](x: openArray[T]): seq[T] = + ## Returns the cumulative (aka prefix) summation of `x`. + ## + ## If `x` is empty, `@[]` is returned. + ## + ## **See also:** + ## * `sum func <#sum,openArray[T]>`_ + ## * `cumsum func <#cumsum,openArray[T]>`_ for the in-place version + runnableExamples: + doAssert cumsummed([1, 2, 3, 4]) == @[1, 3, 6, 10] + + let xLen = x.len + if xLen == 0: + return @[] + result.setLen(xLen) + result[0] = x[0] + for i in 1 ..< xLen: result[i] = result[i - 1] + x[i] + +func cumsum*[T](x: var openArray[T]) = + ## Transforms `x` in-place (must be declared as `var`) into its + ## cumulative (aka prefix) summation. + ## + ## **See also:** + ## * `sum func <#sum,openArray[T]>`_ + ## * `cumsummed func <#cumsummed,openArray[T]>`_ for a version which + ## returns a cumsummed sequence + runnableExamples: + var a = [1, 2, 3, 4] + cumsum(a) + doAssert a == @[1, 3, 6, 10] + + for i in 1 ..< x.len: x[i] = x[i - 1] + x[i] + +func `^`*[T: SomeNumber](x: T, y: Natural): T = + ## Computes `x` to the power of `y`. + ## + ## The exponent `y` must be non-negative, use + ## `pow <#pow,float64,float64>`_ for negative exponents. + ## + ## **See also:** + ## * `pow func <#pow,float64,float64>`_ for negative exponent or + ## floats + ## * `sqrt func <#sqrt,float64>`_ + ## * `cbrt func <#cbrt,float64>`_ + runnableExamples: + doAssert -3 ^ 0 == 1 + doAssert -3 ^ 1 == -3 + doAssert -3 ^ 2 == 9 + + case y + of 0: result = 1 + of 1: result = x + of 2: result = x * x + of 3: result = x * x * x + else: + var (x, y) = (x, y) + result = 1 + while true: + if (y and 1) != 0: + result *= x + y = y shr 1 + if y == 0: + break + x *= x + +func gcd*[T](x, y: T): T = + ## Computes the greatest common (positive) divisor of `x` and `y`. + ## + ## Note that for floats, the result cannot always be interpreted as + ## "greatest decimal `z` such that `z*N == x and z*M == y` + ## where N and M are positive integers". + ## + ## **See also:** + ## * `gcd func <#gcd,SomeInteger,SomeInteger>`_ for an integer version + ## * `lcm func <#lcm,T,T>`_ + runnableExamples: + doAssert gcd(13.5, 9.0) == 4.5 + + var (x, y) = (x, y) while y != 0: x = x mod y swap x, y abs x -proc lcm*[T](x, y: T): T = - ## Computes the least common multiple of ``x`` and ``y``. +when useBuiltins: + ## this func uses bitwise comparisons from C compilers, which are not always available. + func gcd*(x, y: SomeInteger): SomeInteger = + ## Computes the greatest common (positive) divisor of `x` and `y`, + ## using the binary GCD (aka Stein's) algorithm. + ## + ## **See also:** + ## * `gcd func <#gcd,T,T>`_ for a float version + ## * `lcm func <#lcm,T,T>`_ + runnableExamples: + doAssert gcd(12, 8) == 4 + doAssert gcd(17, 63) == 1 + + when x is SomeSignedInt: + var x = abs(x) + else: + var x = x + when y is SomeSignedInt: + var y = abs(y) + else: + var y = y + + if x == 0: + return y + if y == 0: + return x + + let shift = countTrailingZeroBits(x or y) + y = y shr countTrailingZeroBits(y) + while x != 0: + x = x shr countTrailingZeroBits(x) + if y > x: + swap y, x + x -= y + y shl shift + +func gcd*[T](x: openArray[T]): T {.since: (1, 1).} = + ## Computes the greatest common (positive) divisor of the elements of `x`. + ## + ## **See also:** + ## * `gcd func <#gcd,T,T>`_ for a version with two arguments + runnableExamples: + doAssert gcd(@[13.5, 9.0]) == 4.5 + + result = x[0] + for i in 1 ..< x.len: + result = gcd(result, x[i]) + +func lcm*[T](x, y: T): T = + ## Computes the least common multiple of `x` and `y`. + ## + ## **See also:** + ## * `gcd func <#gcd,T,T>`_ + runnableExamples: + doAssert lcm(24, 30) == 120 + doAssert lcm(13, 39) == 39 + x div gcd(x, y) * y -when isMainModule and not defined(JS): - proc gettime(dummy: ptr cint): cint {.importc: "time", header: "<time.h>".} - - # Verifies random seed initialization. - let seed = gettime(nil) - randomize(seed) - const SIZE = 10 - var buf : array[0..SIZE, int] - # Fill the buffer with random values - for i in 0..SIZE-1: - buf[i] = random(high(int)) - # Check that the second random calls are the same for each position. - randomize(seed) - for i in 0..SIZE-1: - assert buf[i] == random(high(int)), "non deterministic random seeding" - echo "random values equal after reseeding" +func clamp*[T](val: T, bounds: Slice[T]): T {.since: (1, 5), inline.} = + ## Like `system.clamp`, but takes a slice, so you can easily clamp within a range. + runnableExamples: + assert clamp(10, 1 .. 5) == 5 + assert clamp(1, 1 .. 3) == 1 + type A = enum a0, a1, a2, a3, a4, a5 + assert a1.clamp(a2..a4) == a2 + assert clamp((3, 0), (1, 0) .. (2, 9)) == (2, 9) + doAssertRaises(AssertionDefect): discard clamp(1, 3..2) # invalid bounds + assert bounds.a <= bounds.b, $(bounds.a, bounds.b) + clamp(val, bounds.a, bounds.b) + +func lcm*[T](x: openArray[T]): T {.since: (1, 1).} = + ## Computes the least common multiple of the elements of `x`. + ## + ## **See also:** + ## * `lcm func <#lcm,T,T>`_ for a version with two arguments + runnableExamples: + doAssert lcm(@[24, 30]) == 120 + + result = x[0] + for i in 1 ..< x.len: + result = lcm(result, x[i]) diff --git a/lib/pure/md5.nim b/lib/pure/md5.nim index 5ee301b15..9c3f6d51b 100644 --- a/lib/pure/md5.nim +++ b/lib/pure/md5.nim @@ -7,15 +7,28 @@ # distribution, for details about the copyright. # -## Module for computing MD5 checksums. +## Module for computing [MD5 checksums](https://en.wikipedia.org/wiki/MD5). +## +## This module also works at compile time and in JavaScript. +## +## See also +## ======== +## * `base64 module<base64.html>`_ for a Base64 encoder and decoder +## * `sha1 module <sha1.html>`_ for the SHA-1 checksum algorithm +## * `hashes module<hashes.html>`_ for efficient computations of hash values +## for diverse Nim types -import unsigned +{.deprecated: "use command `nimble install checksums` and import `checksums/md5` instead".} + +when defined(nimHasStyleChecks): + {.push styleChecks: off.} type MD5State = array[0..3, uint32] MD5Block = array[0..15, uint32] MD5CBits = array[0..7, uint8] MD5Digest* = array[0..15, uint8] + ## MD5 checksum of a string, obtained with the `toMD5 proc <#toMD5,string>`_. MD5Buffer = array[0..63, uint8] MD5Context* {.final.} = object state: MD5State @@ -23,15 +36,16 @@ type buffer: MD5Buffer const - padding: cstring = "\x80\0\0\0" & - "\0\0\0\0\0\0\0\0" & - "\0\0\0\0\0\0\0\0" & - "\0\0\0\0\0\0\0\0" & - "\0\0\0\0\0\0\0\0" & - "\0\0\0\0\0\0\0\0" & - "\0\0\0\0\0\0\0\0" & - "\0\0\0\0\0\0\0\0" & - "\0\0\0\0" + padding: array[0..63, uint8] = [ + 0x80'u8, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 + ] proc F(x, y, z: uint32): uint32 {.inline.} = result = (x and y) or ((not x) and z) @@ -68,7 +82,7 @@ proc II(a: var uint32, b, c, d, x: uint32, s: uint8, ac: uint32) = rot(a, s) a = a + b -proc encode(dest: var MD5Block, src: cstring) = +proc encode(dest: var MD5Block, src: openArray[uint8]) = var j = 0 for i in 0..high(dest): dest[i] = uint32(ord(src[j])) or @@ -80,16 +94,41 @@ proc encode(dest: var MD5Block, src: cstring) = proc decode(dest: var openArray[uint8], src: openArray[uint32]) = var i = 0 for j in 0..high(src): - dest[i] = src[j] and 0xff'u32 - dest[i+1] = src[j] shr 8 and 0xff'u32 - dest[i+2] = src[j] shr 16 and 0xff'u32 - dest[i+3] = src[j] shr 24 and 0xff'u32 + dest[i] = uint8(src[j] and 0xff'u32) + dest[i+1] = uint8(src[j] shr 8 and 0xff'u32) + dest[i+2] = uint8(src[j] shr 16 and 0xff'u32) + dest[i+3] = uint8(src[j] shr 24 and 0xff'u32) inc(i, 4) -proc transform(buffer: pointer, state: var MD5State) = +template slice(s: cstring, a, b): openArray[uint8] = + when nimvm: + # toOpenArray is not implemented in VM + toOpenArrayByte($s, a, b) + else: + when defined(js): + # toOpenArrayByte for cstring is not implemented in JS + toOpenArrayByte($s, a, b) + else: + s.toOpenArrayByte(a, b) + +template slice(s: openArray[uint8], a, b): openArray[uint8] = + s.toOpenArray(a, b) + +const useMem = declared(copyMem) + +template memOrNot(withMem, withoutMem): untyped = + when nimvm: + withoutMem + else: + when useMem: + withMem + else: + withoutMem + +proc transform(buffer: openArray[uint8], state: var MD5State) = var myBlock: MD5Block - encode(myBlock, cast[cstring](buffer)) + encode(myBlock, buffer) var a = state[0] var b = state[1] var c = state[2] @@ -163,83 +202,134 @@ proc transform(buffer: pointer, state: var MD5State) = state[2] = state[2] + c state[3] = state[3] + d -proc md5Init*(c: var MD5Context) = - ## initializes a MD5Context - c.state[0] = 0x67452301'u32 - c.state[1] = 0xEFCDAB89'u32 - c.state[2] = 0x98BADCFE'u32 - c.state[3] = 0x10325476'u32 - c.count[0] = 0'u32 - c.count[1] = 0'u32 - zeroMem(addr(c.buffer), sizeof(MD5buffer)) +proc md5Init*(c: var MD5Context) {.raises: [], tags: [], gcsafe.} +proc md5Update*(c: var MD5Context, input: openArray[uint8]) {.raises: [], + tags: [], gcsafe.} +proc md5Final*(c: var MD5Context, digest: var MD5Digest) {.raises: [], tags: [], gcsafe.} -proc md5Update*(c: var MD5Context, input: cstring, len: int) = - ## updates the MD5Context with the `input` data of length `len` - var input = input - var Index = int((c.count[0] shr 3) and 0x3F) - c.count[0] = c.count[0] + (uint32(len) shl 3) - if c.count[0] < (uint32(len) shl 3): c.count[1] = c.count[1] + 1'u32 - c.count[1] = c.count[1] + (uint32(len) shr 29) - var PartLen = 64 - Index - if len >= PartLen: - copyMem(addr(c.buffer[Index]), input, PartLen) - transform(addr(c.buffer), c.state) - var i = PartLen - while i + 63 < len: - transform(addr(input[i]), c.state) - inc(i, 64) - copyMem(addr(c.buffer[0]), addr(input[i]), len-i) - else: - copyMem(addr(c.buffer[Index]), addr(input[0]), len) +proc md5Update*(c: var MD5Context, input: cstring, len: int) {.raises: [], + tags: [], gcsafe.} = + ## Updates the `MD5Context` with the `input` data of length `len`. + ## + ## If you use the `toMD5 proc <#toMD5,string>`_, there's no need to call this + ## function explicitly. + md5Update(c, input.slice(0, len - 1)) -proc md5Final*(c: var MD5Context, digest: var MD5Digest) = - ## finishes the MD5Context and stores the result in `digest` - var - Bits: MD5CBits - PadLen: int - decode(Bits, c.count) - var Index = int((c.count[0] shr 3) and 0x3F) - if Index < 56: PadLen = 56 - Index - else: PadLen = 120 - Index - md5Update(c, padding, PadLen) - md5Update(c, cast[cstring](addr(Bits)), 8) - decode(digest, c.state) - zeroMem(addr(c), sizeof(MD5Context)) proc toMD5*(s: string): MD5Digest = - ## computes the MD5Digest value for a string `s` + ## Computes the `MD5Digest` value for a string `s`. + ## + ## **See also:** + ## * `getMD5 proc <#getMD5,string>`_ which returns a string representation + ## of the `MD5Digest` + ## * `$ proc <#$,MD5Digest>`_ for converting MD5Digest to string + runnableExamples: + assert $toMD5("abc") == "900150983cd24fb0d6963f7d28e17f72" + var c: MD5Context md5Init(c) - md5Update(c, cstring(s), len(s)) + md5Update(c, s.slice(0, s.len - 1)) md5Final(c, result) proc `$`*(d: MD5Digest): string = - ## converts a MD5Digest value into its string representation + ## Converts a `MD5Digest` value into its string representation. const digits = "0123456789abcdef" result = "" for i in 0..15: - add(result, digits[(d[i] shr 4) and 0xF]) - add(result, digits[d[i] and 0xF]) + add(result, digits[(d[i].int shr 4) and 0xF]) + add(result, digits[d[i].int and 0xF]) proc getMD5*(s: string): string = - ## computes an MD5 value of `s` and returns its string representation + ## Computes an MD5 value of `s` and returns its string representation. + ## + ## **See also:** + ## * `toMD5 proc <#toMD5,string>`_ which returns the `MD5Digest` of a string + runnableExamples: + assert getMD5("abc") == "900150983cd24fb0d6963f7d28e17f72" + var c: MD5Context d: MD5Digest md5Init(c) - md5Update(c, cstring(s), len(s)) + md5Update(c, s.slice(0, s.len - 1)) md5Final(c, d) result = $d proc `==`*(D1, D2: MD5Digest): bool = - ## checks if two MD5Digest values are identical + ## Checks if two `MD5Digest` values are identical. for i in 0..15: if D1[i] != D2[i]: return false return true -when isMainModule: - assert(getMD5("Franz jagt im komplett verwahrlosten Taxi quer durch Bayern") == - "a3cca2b2aa1e3b5b3b5aad99a8529074") - assert(getMD5("Frank jagt im komplett verwahrlosten Taxi quer durch Bayern") == - "7e716d0e702df0505fc72e2b89467910") - assert($toMD5("") == "d41d8cd98f00b204e9800998ecf8427e") + +proc clearBuffer(c: var MD5Context) {.inline.} = + memOrNot: + zeroMem(addr(c.buffer), sizeof(MD5Buffer)) + do: + reset(c.buffer) + +proc md5Init*(c: var MD5Context) = + ## Initializes an `MD5Context`. + ## + ## If you use the `toMD5 proc <#toMD5,string>`_, there's no need to call this + ## function explicitly. + c.state[0] = 0x67452301'u32 + c.state[1] = 0xEFCDAB89'u32 + c.state[2] = 0x98BADCFE'u32 + c.state[3] = 0x10325476'u32 + c.count[0] = 0'u32 + c.count[1] = 0'u32 + clearBuffer(c) + +proc writeBuffer(c: var MD5Context, index: int, + input: openArray[uint8], inputIndex, len: int) {.inline.} = + memOrNot: + copyMem(addr(c.buffer[index]), unsafeAddr(input[inputIndex]), len) + do: + # cannot use system.`[]=` for arrays and openarrays as + # it can raise RangeDefect which gets tracked + for i in 0..<len: + c.buffer[index + i] = input[inputIndex + i] + +proc md5Update*(c: var MD5Context, input: openArray[uint8]) = + ## Updates the `MD5Context` with the `input` data. + ## + ## If you use the `toMD5 proc <#toMD5,string>`_, there's no need to call this + ## function explicitly. + var Index = int((c.count[0] shr 3) and 0x3F) + c.count[0] = c.count[0] + (uint32(input.len) shl 3) + if c.count[0] < (uint32(input.len) shl 3): c.count[1] = c.count[1] + 1'u32 + c.count[1] = c.count[1] + (uint32(input.len) shr 29) + var PartLen = 64 - Index + if input.len >= PartLen: + writeBuffer(c, Index, input, 0, PartLen) + transform(c.buffer, c.state) + var i = PartLen + while i + 63 < input.len: + transform(input.slice(i, i + 63), c.state) + inc(i, 64) + if i < input.len: + writeBuffer(c, 0, input, i, input.len - i) + elif input.len > 0: + writeBuffer(c, Index, input, 0, input.len) + +proc md5Final*(c: var MD5Context, digest: var MD5Digest) = + ## Finishes the `MD5Context` and stores the result in `digest`. + ## + ## If you use the `toMD5 proc <#toMD5,string>`_, there's no need to call this + ## function explicitly. + var + Bits: MD5CBits + PadLen: int + decode(Bits, c.count) + var Index = int((c.count[0] shr 3) and 0x3F) + if Index < 56: PadLen = 56 - Index + else: PadLen = 120 - Index + md5Update(c, padding.slice(0, PadLen - 1)) + md5Update(c, Bits) + decode(digest, c.state) + clearBuffer(c) + + +when defined(nimHasStyleChecks): + {.pop.} #{.push styleChecks: off.} \ No newline at end of file diff --git a/lib/pure/memfiles.nim b/lib/pure/memfiles.nim index d49dfae9f..8eec551c4 100644 --- a/lib/pure/memfiles.nim +++ b/lib/pure/memfiles.nim @@ -11,61 +11,120 @@ ## ## This module provides support for `memory mapped files`:idx: ## (Posix's `mmap`:idx:) on the different operating systems. +## +## It also provides some fast iterators over lines in text files (or +## other "line-like", variable length, delimited records). when defined(windows): - import winlean + import std/winlean + when defined(nimPreviewSlimSystem): + import std/widestrs elif defined(posix): - import posix + import std/posix else: {.error: "the memfiles module is not supported on your operating system!".} -import os +import std/streams +import std/oserrors + +when defined(nimPreviewSlimSystem): + import std/[syncio, assertions] + + +proc newEIO(msg: string): ref IOError = + new(result) + result.msg = msg + +proc setFileSize(fh: FileHandle, newFileSize = -1, oldSize = -1): OSErrorCode = + ## Set the size of open file pointed to by `fh` to `newFileSize` if != -1, + ## allocating | freeing space from the file system. This routine returns the + ## last OSErrorCode found rather than raising to support old rollback/clean-up + ## code style. [ Should maybe move to std/osfiles. ] + if newFileSize < 0 or newFileSize == oldSize: + return + when defined(windows): + var sizeHigh = int32(newFileSize shr 32) + let sizeLow = int32(newFileSize and 0xffffffff) + let status = setFilePointer(fh, sizeLow, addr(sizeHigh), FILE_BEGIN) + let lastErr = osLastError() + if (status == INVALID_SET_FILE_POINTER and lastErr.int32 != NO_ERROR) or + setEndOfFile(fh) == 0: + result = lastErr + else: + if newFileSize > oldSize: # grow the file + var e: cint # posix_fallocate truncates up when needed. + when declared(posix_fallocate): + while (e = posix_fallocate(fh, 0, newFileSize); e == EINTR): + discard + if e in [EINVAL, EOPNOTSUPP] and ftruncate(fh, newFileSize) == -1: + result = osLastError() # fallback arguable; Most portable BUT allows SEGV + elif e != 0: + result = osLastError() + else: # shrink the file + if ftruncate(fh.cint, newFileSize) == -1: + result = osLastError() type - MemFile* = object ## represents a memory mapped file - mem*: pointer ## a pointer to the memory mapped file. The pointer - ## can be used directly to change the contents of the - ## file, if it was opened with write access. - size*: int ## size of the memory mapped file + MemFile* = object ## represents a memory mapped file + mem*: pointer ## a pointer to the memory mapped file. The pointer + ## can be used directly to change the contents of the + ## file, if it was opened with write access. + size*: int ## size of the memory mapped file when defined(windows): - fHandle: int - mapHandle: int + fHandle*: Handle ## **Caution**: Windows specific public field to allow + ## even more low level trickery. + mapHandle*: Handle ## **Caution**: Windows specific public field. + wasOpened*: bool ## **Caution**: Windows specific public field. else: - handle: cint - -{.deprecated: [TMemFile: MemFile].} + handle*: cint ## **Caution**: Posix specific public field. + flags: cint ## **Caution**: Platform specific private field. proc mapMem*(m: var MemFile, mode: FileMode = fmRead, - mappedSize = -1, offset = 0): pointer = + mappedSize = -1, offset = 0, mapFlags = cint(-1)): pointer = + ## returns a pointer to a mapped portion of MemFile `m` + ## + ## `mappedSize` of `-1` maps to the whole file, and + ## `offset` must be multiples of the PAGE SIZE of your OS + if mode == fmAppend: + raise newEIO("The append mode is not supported.") + var readonly = mode == fmRead when defined(windows): result = mapViewOfFileEx( m.mapHandle, - if readonly: FILE_MAP_READ else: FILE_MAP_WRITE, + if readonly: FILE_MAP_READ else: FILE_MAP_READ or FILE_MAP_WRITE, int32(offset shr 32), int32(offset and 0xffffffff), - if mappedSize == -1: 0 else: mappedSize, + WinSizeT(if mappedSize == -1: 0 else: mappedSize), nil) if result == nil: raiseOSError(osLastError()) else: assert mappedSize > 0 + + m.flags = if mapFlags == cint(-1): MAP_SHARED else: mapFlags + #Ensure exactly one of MAP_PRIVATE cr MAP_SHARED is set + if int(m.flags and MAP_PRIVATE) == 0: + m.flags = m.flags or MAP_SHARED + result = mmap( nil, mappedSize, if readonly: PROT_READ else: PROT_READ or PROT_WRITE, - if readonly: (MAP_PRIVATE or MAP_POPULATE) else: (MAP_SHARED or MAP_POPULATE), + m.flags, m.handle, offset) if result == cast[pointer](MAP_FAILED): raiseOSError(osLastError()) proc unmapMem*(f: var MemFile, p: pointer, size: int) = - ## unmaps the memory region ``(p, <p+size)`` of the mapped file `f`. + ## unmaps the memory region `(p, <p+size)` of the mapped file `f`. ## All changes are written back to the file system, if `f` was opened - ## with write access. ``size`` must be of exactly the size that was requested - ## via ``mapMem``. + ## with write access. + ## + ## `size` must be of exactly the size that was requested + ## via `mapMem`. when defined(windows): if unmapViewOfFile(p) == 0: raiseOSError(osLastError()) else: @@ -73,13 +132,29 @@ proc unmapMem*(f: var MemFile, p: pointer, size: int) = proc open*(filename: string, mode: FileMode = fmRead, - mappedSize = -1, offset = 0, newFileSize = -1): MemFile = - ## opens a memory mapped file. If this fails, ``EOS`` is raised. + mappedSize = -1, offset = 0, newFileSize = -1, + allowRemap = false, mapFlags = cint(-1)): MemFile = + ## opens a memory mapped file. If this fails, `OSError` is raised. + ## ## `newFileSize` can only be set if the file does not exist and is opened - ## with write access (e.g., with fmReadWrite). `mappedSize` and `offset` - ## can be used to map only a slice of the file. Example: + ## with write access (e.g., with fmReadWrite). + ## + ##`mappedSize` and `offset` + ## can be used to map only a slice of the file. ## - ## .. code-block:: nim + ## `offset` must be multiples of the PAGE SIZE of your OS + ## (usually 4K or 8K but is unique to your OS) + ## + ## `allowRemap` only needs to be true if you want to call `mapMem` on + ## the resulting MemFile; else file handles are not kept open. + ## + ## `mapFlags` allows callers to override default choices for memory mapping + ## flags with a bitwise mask of a variety of likely platform-specific flags + ## which may be ignored or even cause `open` to fail if misspecified. + ## + ## Example: + ## + ## ```nim ## var ## mm, mm_full, mm_half: MemFile ## @@ -91,8 +166,12 @@ proc open*(filename: string, mode: FileMode = fmRead, ## ## # Read the first 512 bytes ## mm_half = memfiles.open("/tmp/test.mmap", mode = fmReadWrite, mappedSize = 512) + ## ``` # The file can be resized only when write mode is used: + if mode == fmAppend: + raise newEIO("The append mode is not supported.") + assert newFileSize == -1 or mode != fmRead var readonly = mode == fmRead @@ -101,43 +180,37 @@ proc open*(filename: string, mode: FileMode = fmRead, result.size = 0 when defined(windows): - template fail(errCode: OSErrorCode, msg: expr) = + let desiredAccess = GENERIC_READ + let shareMode = FILE_SHARE_READ + let flags = FILE_FLAG_RANDOM_ACCESS + + template fail(errCode: OSErrorCode, msg: untyped) = rollback() if result.fHandle != 0: discard closeHandle(result.fHandle) if result.mapHandle != 0: discard closeHandle(result.mapHandle) raiseOSError(errCode) # return false - #raise newException(EIO, msg) + #raise newException(IOError, msg) - template callCreateFile(winApiProc, filename: expr): expr = + template callCreateFile(winApiProc, filename): untyped = winApiProc( filename, - if readonly: GENERIC_READ else: GENERIC_ALL, - FILE_SHARE_READ, + # GENERIC_ALL != (GENERIC_READ or GENERIC_WRITE) + if readonly: desiredAccess else: desiredAccess or GENERIC_WRITE, + if readonly: shareMode else: shareMode or FILE_SHARE_WRITE, nil, if newFileSize != -1: CREATE_ALWAYS else: OPEN_EXISTING, - if readonly: FILE_ATTRIBUTE_READONLY else: FILE_ATTRIBUTE_TEMPORARY, + if readonly: FILE_ATTRIBUTE_READONLY or flags + else: FILE_ATTRIBUTE_NORMAL or flags, 0) - when useWinUnicode: - result.fHandle = callCreateFile(createFileW, newWideCString(filename)) - else: - result.fHandle = callCreateFile(createFileA, filename) + result.fHandle = callCreateFile(createFileW, newWideCString(filename)) if result.fHandle == INVALID_HANDLE_VALUE: fail(osLastError(), "error opening file") - if newFileSize != -1: - var - sizeHigh = int32(newFileSize shr 32) - sizeLow = int32(newFileSize and 0xffffffff) - - var status = setFilePointer(result.fHandle, sizeLow, addr(sizeHigh), - FILE_BEGIN) - let lastErr = osLastError() - if (status == INVALID_SET_FILE_POINTER and lastErr.int32 != NO_ERROR) or - (setEndOfFile(result.fHandle) == 0): - fail(lastErr, "error setting file size") + if (let e = setFileSize(result.fHandle.FileHandle, newFileSize); + e != 0.OSErrorCode): fail(e, "error setting file size") # since the strings are always 'nil', we simply always call # CreateFileMappingW which should be slightly faster anyway: @@ -151,7 +224,7 @@ proc open*(filename: string, mode: FileMode = fmRead, result.mem = mapViewOfFileEx( result.mapHandle, - if readonly: FILE_MAP_READ else: FILE_MAP_WRITE, + if readonly: FILE_MAP_READ else: FILE_MAP_READ or FILE_MAP_WRITE, int32(offset shr 32), int32(offset and 0xffffffff), if mappedSize == -1: 0 else: mappedSize, @@ -165,73 +238,155 @@ proc open*(filename: string, mode: FileMode = fmRead, if low == INVALID_FILE_SIZE: fail(osLastError(), "error getting file size") else: - var fileSize = (int64(hi) shr 32) or low + var fileSize = (int64(hi) shl 32) or int64(uint32(low)) if mappedSize != -1: result.size = min(fileSize, mappedSize).int else: result.size = fileSize.int + result.wasOpened = true + if not allowRemap and result.fHandle != INVALID_HANDLE_VALUE: + if closeHandle(result.fHandle) != 0: + result.fHandle = INVALID_HANDLE_VALUE + else: - template fail(errCode: OSErrorCode, msg: expr) = + template fail(errCode: OSErrorCode, msg: string) = rollback() - if result.handle != 0: discard close(result.handle) + if result.handle != -1: discard close(result.handle) raiseOSError(errCode) - - var flags = if readonly: O_RDONLY else: O_RDWR + + var flags = (if readonly: O_RDONLY else: O_RDWR) or O_CLOEXEC if newFileSize != -1: flags = flags or O_CREAT or O_TRUNC - var permissions_mode = S_IRUSR or S_IWUSR - result.handle = open(filename, flags, permissions_mode) + var permissionsMode = S_IRUSR or S_IWUSR + result.handle = open(filename, flags, permissionsMode) + if result.handle != -1: + if (let e = setFileSize(result.handle.FileHandle, newFileSize); + e != 0.OSErrorCode): fail(e, "error setting file size") else: result.handle = open(filename, flags) if result.handle == -1: - # XXX: errno is supposed to be set here - # Is there an exception that wraps it? fail(osLastError(), "error opening file") - if newFileSize != -1: - if ftruncate(result.handle, newFileSize) == -1: - fail(osLastError(), "error setting file size") - - if mappedSize != -1: - result.size = mappedSize - else: - var stat: TStat + if mappedSize != -1: #XXX Logic here differs from `when windows` branch .. + result.size = mappedSize #.. which always fstats&Uses min(mappedSize, st). + else: # if newFileSize!=-1: result.size=newFileSize # if trust setFileSize + var stat: Stat #^^.. BUT some FSes (eg. Linux HugeTLBfs) round to 2MiB. if fstat(result.handle, stat) != -1: - # XXX: Hmm, this could be unsafe - # Why is mmap taking int anyway? - result.size = int(stat.st_size) + result.size = stat.st_size.int # int may be 32-bit-unsafe for 2..<4 GiB else: fail(osLastError(), "error getting file size") - result.mem = mmap( - nil, - result.size, - if readonly: PROT_READ else: PROT_READ or PROT_WRITE, - if readonly: (MAP_PRIVATE or MAP_POPULATE) else: (MAP_SHARED or MAP_POPULATE), - result.handle, - offset) + result.flags = if mapFlags == cint(-1): MAP_SHARED else: mapFlags + # Ensure exactly one of MAP_PRIVATE cr MAP_SHARED is set + if int(result.flags and MAP_PRIVATE) == 0: + result.flags = result.flags or MAP_SHARED + let pr = if readonly: PROT_READ else: PROT_READ or PROT_WRITE + result.mem = mmap(nil, result.size, pr, result.flags, result.handle, offset) if result.mem == cast[pointer](MAP_FAILED): fail(osLastError(), "file mapping failed") + if not allowRemap and result.handle != -1: + if close(result.handle) == 0: + result.handle = -1 + +proc flush*(f: var MemFile; attempts: Natural = 3) = + ## Flushes `f`'s buffer for the number of attempts equal to `attempts`. + ## If were errors an exception `OSError` will be raised. + var res = false + var lastErr: OSErrorCode + when defined(windows): + for i in 1..attempts: + res = flushViewOfFile(f.mem, 0) != 0 + if res: + break + lastErr = osLastError() + if lastErr != ERROR_LOCK_VIOLATION.OSErrorCode: + raiseOSError(lastErr) + else: + for i in 1..attempts: + res = msync(f.mem, f.size, MS_SYNC or MS_INVALIDATE) == 0 + if res: + break + lastErr = osLastError() + if lastErr != EBUSY.OSErrorCode: + raiseOSError(lastErr, "error flushing mapping") + +proc resize*(f: var MemFile, newFileSize: int) {.raises: [IOError, OSError].} = + ## Resize & re-map the file underlying an `allowRemap MemFile`. If the OS/FS + ## supports it, file space is reserved to ensure room for new virtual pages. + ## Caller should wait often enough for `flush` to finish to limit use of + ## system RAM for write buffering, perhaps just prior to this call. + ## **Note**: this assumes the entire file is mapped read-write at offset 0. + ## Also, the value of `.mem` will probably change. + if newFileSize < 1: # Q: include system/bitmasks & use PageSize ? + raise newException(IOError, "Cannot resize MemFile to < 1 byte") + when defined(windows): + if not f.wasOpened: + raise newException(IOError, "Cannot resize unopened MemFile") + if f.fHandle == INVALID_HANDLE_VALUE: + raise newException(IOError, + "Cannot resize MemFile opened with allowRemap=false") + if unmapViewOfFile(f.mem) == 0 or closeHandle(f.mapHandle) == 0: # Un-do map + raiseOSError(osLastError()) + if newFileSize != f.size: # Seek to size & `setEndOfFile` => allocated. + if (let e = setFileSize(f.fHandle.FileHandle, newFileSize); + e != 0.OSErrorCode): raiseOSError(e) + f.mapHandle = createFileMappingW(f.fHandle, nil, PAGE_READWRITE, 0,0,nil) + if f.mapHandle == 0: # Re-do map + raiseOSError(osLastError()) + if (let m = mapViewOfFileEx(f.mapHandle, FILE_MAP_READ or FILE_MAP_WRITE, + 0, 0, WinSizeT(newFileSize), nil); m != nil): + f.mem = m + f.size = newFileSize + else: + raiseOSError(osLastError()) + elif defined(posix): + if f.handle == -1: + raise newException(IOError, + "Cannot resize MemFile opened with allowRemap=false") + if newFileSize != f.size: + if (let e = setFileSize(f.handle.FileHandle, newFileSize, f.size); + e != 0.OSErrorCode): raiseOSError(e) + when defined(linux): #Maybe NetBSD, too? + # On Linux this can be over 100 times faster than a munmap,mmap cycle. + proc mremap(old: pointer; oldSize, newSize: csize_t; flags: cint): + pointer {.importc: "mremap", header: "<sys/mman.h>".} + let newAddr = mremap(f.mem, csize_t(f.size), csize_t(newFileSize), 1.cint) + if newAddr == cast[pointer](MAP_FAILED): + raiseOSError(osLastError()) + else: + if munmap(f.mem, f.size) != 0: + raiseOSError(osLastError()) + let newAddr = mmap(nil, newFileSize, PROT_READ or PROT_WRITE, + f.flags, f.handle, 0) + if newAddr == cast[pointer](MAP_FAILED): + raiseOSError(osLastError()) + f.mem = newAddr + f.size = newFileSize + proc close*(f: var MemFile) = ## closes the memory mapped file `f`. All changes are written back to the ## file system, if `f` was opened with write access. - + var error = false var lastErr: OSErrorCode when defined(windows): - if f.fHandle != INVALID_HANDLE_VALUE: + if f.wasOpened: error = unmapViewOfFile(f.mem) == 0 - lastErr = osLastError() - error = (closeHandle(f.mapHandle) == 0) or error - error = (closeHandle(f.fHandle) == 0) or error + if not error: + error = closeHandle(f.mapHandle) == 0 + if not error and f.fHandle != INVALID_HANDLE_VALUE: + discard closeHandle(f.fHandle) + f.fHandle = INVALID_HANDLE_VALUE + if error: + lastErr = osLastError() else: - if f.handle != 0: - error = munmap(f.mem, f.size) != 0 - lastErr = osLastError() + error = munmap(f.mem, f.size) != 0 + lastErr = osLastError() + if f.handle != -1: error = (close(f.handle) != 0) or error f.size = 0 @@ -240,8 +395,174 @@ proc close*(f: var MemFile) = when defined(windows): f.fHandle = 0 f.mapHandle = 0 + f.wasOpened = false else: - f.handle = 0 - + f.handle = -1 + if error: raiseOSError(lastErr) +type MemSlice* = object ## represent slice of a MemFile for iteration over delimited lines/records + data*: pointer + size*: int + +proc `==`*(x, y: MemSlice): bool = + ## Compare a pair of MemSlice for strict equality. + result = (x.size == y.size and equalMem(x.data, y.data, x.size)) + +proc `$`*(ms: MemSlice): string {.inline.} = + ## Return a Nim string built from a MemSlice. + result.setLen(ms.size) + copyMem(result.cstring, ms.data, ms.size) + +iterator memSlices*(mfile: MemFile, delim = '\l', eat = '\r'): MemSlice {.inline.} = + ## Iterates over \[optional `eat`] `delim`-delimited slices in MemFile `mfile`. + ## + ## Default parameters parse lines ending in either Unix(\\l) or Windows(\\r\\l) + ## style on on a line-by-line basis. I.e., not every line needs the same ending. + ## Unlike readLine(File) & lines(File), archaic MacOS9 \\r-delimited lines + ## are not supported as a third option for each line. Such archaic MacOS9 + ## files can be handled by passing delim='\\r', eat='\\0', though. + ## + ## Delimiters are not part of the returned slice. A final, unterminated line + ## or record is returned just like any other. + ## + ## Non-default delimiters can be passed to allow iteration over other sorts + ## of "line-like" variable length records. Pass eat='\\0' to be strictly + ## `delim`-delimited. (Eating an optional prefix equal to '\\0' is not + ## supported.) + ## + ## This zero copy, memchr-limited interface is probably the fastest way to + ## iterate over line-like records in a file. However, returned (data,size) + ## objects are not Nim strings, bounds checked Nim arrays, or even terminated + ## C strings. So, care is required to access the data (e.g., think C mem* + ## functions, not str* functions). + ## + ## Example: + ## ```nim + ## var count = 0 + ## for slice in memSlices(memfiles.open("foo")): + ## if slice.size > 0 and cast[cstring](slice.data)[0] != '#': + ## inc(count) + ## echo count + ## ``` + + proc c_memchr(cstr: pointer, c: char, n: csize_t): pointer {. + importc: "memchr", header: "<string.h>".} + proc `-!`(p, q: pointer): int {.inline.} = return cast[int](p) -% cast[int](q) + var ms: MemSlice + var ending: pointer + ms.data = mfile.mem + var remaining = mfile.size + while remaining > 0: + ending = c_memchr(ms.data, delim, csize_t(remaining)) + if ending == nil: # unterminated final slice + ms.size = remaining # Weird case..check eat? + yield ms + break + ms.size = ending -! ms.data # delim is NOT included + if eat != '\0' and ms.size > 0 and cast[cstring](ms.data)[ms.size - 1] == eat: + dec(ms.size) # trim pre-delim char + yield ms + ms.data = cast[pointer](cast[int](ending) +% 1) # skip delim + remaining = mfile.size - (ms.data -! mfile.mem) + +iterator lines*(mfile: MemFile, buf: var string, delim = '\l', + eat = '\r'): string {.inline.} = + ## Replace contents of passed buffer with each new line, like + ## `readLine(File) <syncio.html#readLine,File,string>`_. + ## `delim`, `eat`, and delimiting logic is exactly as for `memSlices + ## <#memSlices.i,MemFile,char,char>`_, but Nim strings are returned. + ## + ## Example: + ## ```nim + ## var buffer: string = "" + ## for line in lines(memfiles.open("foo"), buffer): + ## echo line + ## ``` + + for ms in memSlices(mfile, delim, eat): + setLen(buf, ms.size) + if ms.size > 0: + copyMem(addr buf[0], ms.data, ms.size) + yield buf + +iterator lines*(mfile: MemFile, delim = '\l', eat = '\r'): string {.inline.} = + ## Return each line in a file as a Nim string, like + ## `lines(File) <syncio.html#lines.i,File>`_. + ## `delim`, `eat`, and delimiting logic is exactly as for `memSlices + ## <#memSlices.i,MemFile,char,char>`_, but Nim strings are returned. + ## + ## Example: + ## ```nim + ## for line in lines(memfiles.open("foo")): + ## echo line + ## ``` + + var buf = newStringOfCap(80) + for line in lines(mfile, buf, delim, eat): + yield buf + +type + MemMapFileStream* = ref MemMapFileStreamObj ## a stream that encapsulates a `MemFile` + MemMapFileStreamObj* = object of Stream + mf: MemFile + mode: FileMode + pos: ByteAddress + +proc mmsClose(s: Stream) = + MemMapFileStream(s).pos = -1 + close(MemMapFileStream(s).mf) + +proc mmsFlush(s: Stream) = flush(MemMapFileStream(s).mf) + +proc mmsAtEnd(s: Stream): bool = (MemMapFileStream(s).pos >= MemMapFileStream(s).mf.size) or + (MemMapFileStream(s).pos < 0) + +proc mmsSetPosition(s: Stream, pos: int) = + if pos > MemMapFileStream(s).mf.size or pos < 0: + raise newEIO("cannot set pos in stream") + MemMapFileStream(s).pos = pos + +proc mmsGetPosition(s: Stream): int = MemMapFileStream(s).pos + +proc mmsPeekData(s: Stream, buffer: pointer, bufLen: int): int = + let startAddress = cast[int](MemMapFileStream(s).mf.mem) + let p = cast[int](MemMapFileStream(s).pos) + let l = min(bufLen, MemMapFileStream(s).mf.size - p) + moveMem(buffer, cast[pointer](startAddress + p), l) + result = l + +proc mmsReadData(s: Stream, buffer: pointer, bufLen: int): int = + result = mmsPeekData(s, buffer, bufLen) + inc(MemMapFileStream(s).pos, result) + +proc mmsWriteData(s: Stream, buffer: pointer, bufLen: int) = + if MemMapFileStream(s).mode == fmRead: + raise newEIO("cannot write to read-only stream") + let size = MemMapFileStream(s).mf.size + if MemMapFileStream(s).pos + bufLen > size: + raise newEIO("cannot write to stream") + let p = cast[int](MemMapFileStream(s).mf.mem) + + cast[int](MemMapFileStream(s).pos) + moveMem(cast[pointer](p), buffer, bufLen) + inc(MemMapFileStream(s).pos, bufLen) + +proc newMemMapFileStream*(filename: string, mode: FileMode = fmRead, + fileSize: int = -1): MemMapFileStream = + ## creates a new stream from the file named `filename` with the mode `mode`. + ## Raises ## `OSError` if the file cannot be opened. See the `system + ## <system.html>`_ module for a list of available FileMode enums. + ## `fileSize` can only be set if the file does not exist and is opened + ## with write access (e.g., with fmReadWrite). + var mf: MemFile = open(filename, mode, newFileSize = fileSize) + new(result) + result.mode = mode + result.mf = mf + result.closeImpl = mmsClose + result.atEndImpl = mmsAtEnd + result.setPositionImpl = mmsSetPosition + result.getPositionImpl = mmsGetPosition + result.readDataImpl = mmsReadData + result.peekDataImpl = mmsPeekData + result.writeDataImpl = mmsWriteData + result.flushImpl = mmsFlush diff --git a/lib/pure/mersenne.nim b/lib/pure/mersenne.nim deleted file mode 100644 index a6a781cb8..000000000 --- a/lib/pure/mersenne.nim +++ /dev/null @@ -1,39 +0,0 @@ -import unsigned - -type - MersenneTwister* = object - mt: array[0..623, uint32] - index: int - -{.deprecated: [TMersenneTwister: MersenneTwister].} - -proc newMersenneTwister*(seed: int): MersenneTwister = - result.index = 0 - result.mt[0]= uint32(seed) - for i in 1..623'u32: - result.mt[i]= (0x6c078965'u32 * (result.mt[i-1] xor (result.mt[i-1] shr 30'u32)) + i) - -proc generateNumbers(m: var MersenneTwister) = - for i in 0..623: - var y = (m.mt[i] and 0x80000000'u32) + (m.mt[(i+1) mod 624] and 0x7fffffff'u32) - m.mt[i] = m.mt[(i+397) mod 624] xor uint32(y shr 1'u32) - if (y mod 2'u32) != 0: - m.mt[i] = m.mt[i] xor 0x9908b0df'u32 - -proc getNum*(m: var MersenneTwister): int = - if m.index == 0: - generateNumbers(m) - var y = m.mt[m.index] - y = y xor (y shr 11'u32) - y = y xor ((7'u32 shl y) and 0x9d2c5680'u32) - y = y xor ((15'u32 shl y) and 0xefc60000'u32) - y = y xor (y shr 18'u32) - m.index = (m.index+1) mod 624 - return int(y) - -# Test -when isMainModule: - var mt = newMersenneTwister(2525) - - for i in 0..99: - echo mt.getNum diff --git a/lib/pure/mimetypes.nim b/lib/pure/mimetypes.nim index a52ba4ebe..ff639e8e5 100644 --- a/lib/pure/mimetypes.nim +++ b/lib/pure/mimetypes.nim @@ -8,515 +8,1064 @@ # ## This module implements a mimetypes database -import strtabs + +runnableExamples: + var m = newMimetypes() + doAssert m.getMimetype("mp4") == "video/mp4" + doAssert m.getExt("text/html") == "html" + ## Values can be uppercase too. + doAssert m.getMimetype("MP4") == "video/mp4" + doAssert m.getExt("TEXT/HTML") == "html" + ## If values are invalid then `default` is returned. + doAssert m.getMimetype("INVALID") == "text/plain" + doAssert m.getExt("INVALID/NONEXISTENT") == "txt" + doAssert m.getMimetype("") == "text/plain" + doAssert m.getExt("") == "txt" + ## Register new Mimetypes. + m.register(ext = "fakext", mimetype = "text/fakelang") + doAssert m.getMimetype("fakext") == "text/fakelang" + doAssert m.getMimetype("FaKeXT") == "text/fakelang" + +import std/tables +from std/strutils import startsWith, toLowerAscii, strip + +when defined(nimPreviewSlimSystem): + import std/assertions + + type MimeDB* = object - mimes: StringTableRef - -{.deprecated: [TMimeDB: MimeDB].} + mimes: OrderedTable[string, string] const mimes* = { - "ez": "application/andrew-inset", - "anx": "application/annodex", - "atom": "application/atom+xml", - "atomcat": "application/atomcat+xml", - "atomsrv": "application/atomserv+xml", - "lin": "application/bbolin", - "cap": "application/cap", - "pcap": "application/cap", - "cu": "application/cu-seeme", - "davmount": "application/davmount+xml", - "tsp": "application/dsptype", - "es": "application/ecmascript", - "spl": "application/futuresplash", - "hta": "application/hta", - "jar": "application/java-archive", - "ser": "application/java-serialized-object", - "class": "application/java-vm", - "js": "application/javascript", - "m3g": "application/m3g", - "hqx": "application/mac-binhex40", - "cpt": "application/mac-compactpro", - "nb": "application/mathematica", - "nbp": "application/mathematica", - "mdb": "application/msaccess", - "doc": "application/msword", - "dot": "application/msword", - "mxf": "application/mxf", - "bin": "application/octet-stream", - "oda": "application/oda", - "ogx": "application/ogg", - "pdf": "application/pdf", - "key": "application/pgp-keys", - "pgp": "application/pgp-signature", - "prf": "application/pics-rules", - "ps": "application/postscript", - "ai": "application/postscript", - "eps": "application/postscript", - "epsi": "application/postscript", - "epsf": "application/postscript", - "eps2": "application/postscript", - "eps3": "application/postscript", - "rar": "application/rar", - "rdf": "application/rdf+xml", - "rss": "application/rss+xml", - "rtf": "application/rtf", - "smi": "application/smil", - "smil": "application/smil", - "xhtml": "application/xhtml+xml", - "xht": "application/xhtml+xml", - "xml": "application/xml", - "xsl": "application/xml", - "xsd": "application/xml", - "xspf": "application/xspf+xml", - "zip": "application/zip", - "apk": "application/vnd.android.package-archive", - "cdy": "application/vnd.cinderella", - "kml": "application/vnd.google-earth.kml+xml", - "kmz": "application/vnd.google-earth.kmz", - "xul": "application/vnd.mozilla.xul+xml", - "xls": "application/vnd.ms-excel", - "xlb": "application/vnd.ms-excel", - "xlt": "application/vnd.ms-excel", - "cat": "application/vnd.ms-pki.seccat", - "stl": "application/vnd.ms-pki.stl", - "ppt": "application/vnd.ms-powerpoint", - "pps": "application/vnd.ms-powerpoint", - "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - "xltx": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", - "pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation", - "ppsx": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", - "potx": "application/vnd.openxmlformats-officedocument.presentationml.template", - "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "dotx": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", - "cod": "application/vnd.rim.cod", - "mmf": "application/vnd.smaf", - "sis": "application/vnd.symbian.install", - "vsd": "application/vnd.visio", - "wbxml": "application/vnd.wap.wbxml", - "wmlc": "application/vnd.wap.wmlc", - "wmlsc": "application/vnd.wap.wmlscriptc", - "wpd": "application/vnd.wordperfect", - "wp5": "application/vnd.wordperfect5.1", - "wk": "application/x-123", - "7z": "application/x-7z-compressed", - "abw": "application/x-abiword", - "dmg": "application/x-apple-diskimage", - "bcpio": "application/x-bcpio", - "torrent": "application/x-bittorrent", - "cab": "application/x-cab", - "cbr": "application/x-cbr", - "cbz": "application/x-cbz", - "cdf": "application/x-cdf", - "cda": "application/x-cdf", - "vcd": "application/x-cdlink", - "pgn": "application/x-chess-pgn", - "cpio": "application/x-cpio", - "csh": "application/x-csh", - "deb": "application/x-debian-package", - "udeb": "application/x-debian-package", - "dcr": "application/x-director", - "dir": "application/x-director", - "dxr": "application/x-director", - "dms": "application/x-dms", - "wad": "application/x-doom", - "dvi": "application/x-dvi", - "rhtml": "application/x-httpd-eruby", - "pfa": "application/x-font", - "pfb": "application/x-font", - "gsf": "application/x-font", - "pcf": "application/x-font", - "pcf.Z": "application/x-font", - "mm": "application/x-freemind", - "spl": "application/x-futuresplash", - "gnumeric": "application/x-gnumeric", - "sgf": "application/x-go-sgf", - "gcf": "application/x-graphing-calculator", - "gtar": "application/x-gtar", - "tgz": "application/x-gtar", - "taz": "application/x-gtar", - "hdf": "application/x-hdf", - "phtml": "application/x-httpd-php", - "pht": "application/x-httpd-php", - "php": "application/x-httpd-php", - "phps": "application/x-httpd-php-source", - "php3": "application/x-httpd-php3", - "php3p": "application/x-httpd-php3-preprocessed", - "php4": "application/x-httpd-php4", - "php5": "application/x-httpd-php5", - "ica": "application/x-ica", - "info": "application/x-info", - "ins": "application/x-internet-signup", - "isp": "application/x-internet-signup", - "iii": "application/x-iphone", - "iso": "application/x-iso9660-image", - "jam": "application/x-jam", - "jnlp": "application/x-java-jnlp-file", - "jmz": "application/x-jmol", - "chrt": "application/x-kchart", - "kil": "application/x-killustrator", - "skp": "application/x-koan", - "skd": "application/x-koan", - "skt": "application/x-koan", - "skm": "application/x-koan", - "kpr": "application/x-kpresenter", - "kpt": "application/x-kpresenter", - "ksp": "application/x-kspread", - "kwd": "application/x-kword", - "kwt": "application/x-kword", - "latex": "application/x-latex", - "lha": "application/x-lha", - "lyx": "application/x-lyx", - "lzh": "application/x-lzh", - "lzx": "application/x-lzx", - "frm": "application/x-maker", - "maker": "application/x-maker", - "frame": "application/x-maker", - "fm": "application/x-maker", - "fb": "application/x-maker", - "book": "application/x-maker", - "fbdoc": "application/x-maker", - "mif": "application/x-mif", - "wmd": "application/x-ms-wmd", - "wmz": "application/x-ms-wmz", - "com": "application/x-msdos-program", - "exe": "application/x-msdos-program", - "bat": "application/x-msdos-program", - "dll": "application/x-msdos-program", - "msi": "application/x-msi", - "nc": "application/x-netcdf", - "pac": "application/x-ns-proxy-autoconfig", - "dat": "application/x-ns-proxy-autoconfig", - "nwc": "application/x-nwc", - "o": "application/x-object", - "oza": "application/x-oz-application", - "p7r": "application/x-pkcs7-certreqresp", - "crl": "application/x-pkcs7-crl", - "pyc": "application/x-python-code", - "pyo": "application/x-python-code", - "qgs": "application/x-qgis", - "shp": "application/x-qgis", - "shx": "application/x-qgis", - "qtl": "application/x-quicktimeplayer", - "rpm": "application/x-redhat-package-manager", - "rb": "application/x-ruby", - "sh": "application/x-sh", - "shar": "application/x-shar", - "swf": "application/x-shockwave-flash", - "swfl": "application/x-shockwave-flash", - "scr": "application/x-silverlight", - "sit": "application/x-stuffit", - "sitx": "application/x-stuffit", - "sv4cpio": "application/x-sv4cpio", - "sv4crc": "application/x-sv4crc", - "tar": "application/x-tar", - "tcl": "application/x-tcl", - "gf": "application/x-tex-gf", - "pk": "application/x-tex-pk", - "texinfo": "application/x-texinfo", - "texi": "application/x-texinfo", - "~": "application/x-trash", - "%": "application/x-trash", - "bak": "application/x-trash", - "old": "application/x-trash", - "sik": "application/x-trash", - "t": "application/x-troff", - "tr": "application/x-troff", - "roff": "application/x-troff", - "man": "application/x-troff-man", - "me": "application/x-troff-me", - "ms": "application/x-troff-ms", - "ustar": "application/x-ustar", - "src": "application/x-wais-source", - "wz": "application/x-wingz", - "crt": "application/x-x509-ca-cert", - "xcf": "application/x-xcf", - "fig": "application/x-xfig", - "xpi": "application/x-xpinstall", - "amr": "audio/amr", - "awb": "audio/amr-wb", - "amr": "audio/amr", - "awb": "audio/amr-wb", - "axa": "audio/annodex", - "au": "audio/basic", - "snd": "audio/basic", - "flac": "audio/flac", - "mid": "audio/midi", - "midi": "audio/midi", - "kar": "audio/midi", - "mpga": "audio/mpeg", - "mpega": "audio/mpeg", - "mp2": "audio/mpeg", - "mp3": "audio/mpeg", - "m4a": "audio/mpeg", - "m3u": "audio/mpegurl", - "oga": "audio/ogg", - "ogg": "audio/ogg", - "spx": "audio/ogg", - "sid": "audio/prs.sid", - "aif": "audio/x-aiff", - "aiff": "audio/x-aiff", - "aifc": "audio/x-aiff", - "gsm": "audio/x-gsm", - "m3u": "audio/x-mpegurl", - "wma": "audio/x-ms-wma", - "wax": "audio/x-ms-wax", - "ra": "audio/x-pn-realaudio", - "rm": "audio/x-pn-realaudio", - "ram": "audio/x-pn-realaudio", - "ra": "audio/x-realaudio", - "pls": "audio/x-scpls", - "sd2": "audio/x-sd2", - "wav": "audio/x-wav", - "alc": "chemical/x-alchemy", - "cac": "chemical/x-cache", - "cache": "chemical/x-cache", - "csf": "chemical/x-cache-csf", - "cbin": "chemical/x-cactvs-binary", - "cascii": "chemical/x-cactvs-binary", - "ctab": "chemical/x-cactvs-binary", - "cdx": "chemical/x-cdx", - "cer": "chemical/x-cerius", - "c3d": "chemical/x-chem3d", - "chm": "chemical/x-chemdraw", - "cif": "chemical/x-cif", - "cmdf": "chemical/x-cmdf", - "cml": "chemical/x-cml", - "cpa": "chemical/x-compass", - "bsd": "chemical/x-crossfire", - "csml": "chemical/x-csml", - "csm": "chemical/x-csml", - "ctx": "chemical/x-ctx", - "cxf": "chemical/x-cxf", - "cef": "chemical/x-cxf", - "smi": "#chemical/x-daylight-smiles", - "emb": "chemical/x-embl-dl-nucleotide", - "embl": "chemical/x-embl-dl-nucleotide", - "spc": "chemical/x-galactic-spc", - "inp": "chemical/x-gamess-input", - "gam": "chemical/x-gamess-input", - "gamin": "chemical/x-gamess-input", - "fch": "chemical/x-gaussian-checkpoint", - "fchk": "chemical/x-gaussian-checkpoint", - "cub": "chemical/x-gaussian-cube", - "gau": "chemical/x-gaussian-input", - "gjc": "chemical/x-gaussian-input", - "gjf": "chemical/x-gaussian-input", - "gal": "chemical/x-gaussian-log", - "gcg": "chemical/x-gcg8-sequence", - "gen": "chemical/x-genbank", - "hin": "chemical/x-hin", - "istr": "chemical/x-isostar", - "ist": "chemical/x-isostar", - "jdx": "chemical/x-jcamp-dx", - "dx": "chemical/x-jcamp-dx", - "kin": "chemical/x-kinemage", - "mcm": "chemical/x-macmolecule", - "mmd": "chemical/x-macromodel-input", - "mmod": "chemical/x-macromodel-input", - "mol": "chemical/x-mdl-molfile", - "rd": "chemical/x-mdl-rdfile", - "rxn": "chemical/x-mdl-rxnfile", - "sd": "chemical/x-mdl-sdfile", - "sdf": "chemical/x-mdl-sdfile", - "tgf": "chemical/x-mdl-tgf", - "mif": "#chemical/x-mif", - "mcif": "chemical/x-mmcif", - "mol2": "chemical/x-mol2", - "b": "chemical/x-molconn-Z", - "gpt": "chemical/x-mopac-graph", - "mop": "chemical/x-mopac-input", - "mopcrt": "chemical/x-mopac-input", - "mpc": "chemical/x-mopac-input", - "zmt": "chemical/x-mopac-input", - "moo": "chemical/x-mopac-out", - "mvb": "chemical/x-mopac-vib", - "asn": "chemical/x-ncbi-asn1", - "prt": "chemical/x-ncbi-asn1-ascii", - "ent": "chemical/x-ncbi-asn1-ascii", - "val": "chemical/x-ncbi-asn1-binary", - "aso": "chemical/x-ncbi-asn1-binary", - "asn": "chemical/x-ncbi-asn1-spec", - "pdb": "chemical/x-pdb", - "ent": "chemical/x-pdb", - "ros": "chemical/x-rosdal", - "sw": "chemical/x-swissprot", - "vms": "chemical/x-vamas-iso14976", - "vmd": "chemical/x-vmd", - "xtel": "chemical/x-xtel", - "xyz": "chemical/x-xyz", - "gif": "image/gif", - "ief": "image/ief", - "jpeg": "image/jpeg", - "jpg": "image/jpeg", - "jpe": "image/jpeg", - "pcx": "image/pcx", - "png": "image/png", - "svg": "image/svg+xml", - "svgz": "image/svg+xml", - "tiff": "image/tiff", - "tif": "image/tiff", - "djvu": "image/vnd.djvu", - "djv": "image/vnd.djvu", - "wbmp": "image/vnd.wap.wbmp", - "cr2": "image/x-canon-cr2", - "crw": "image/x-canon-crw", - "ras": "image/x-cmu-raster", - "cdr": "image/x-coreldraw", - "pat": "image/x-coreldrawpattern", - "cdt": "image/x-coreldrawtemplate", - "cpt": "image/x-corelphotopaint", - "erf": "image/x-epson-erf", - "ico": "image/x-icon", - "art": "image/x-jg", - "jng": "image/x-jng", - "bmp": "image/x-ms-bmp", - "nef": "image/x-nikon-nef", - "orf": "image/x-olympus-orf", - "psd": "image/x-photoshop", - "pnm": "image/x-portable-anymap", - "pbm": "image/x-portable-bitmap", - "pgm": "image/x-portable-graymap", - "ppm": "image/x-portable-pixmap", - "rgb": "image/x-rgb", - "xbm": "image/x-xbitmap", - "xpm": "image/x-xpixmap", - "xwd": "image/x-xwindowdump", - "eml": "message/rfc822", - "igs": "model/iges", - "iges": "model/iges", - "msh": "model/mesh", - "mesh": "model/mesh", - "silo": "model/mesh", - "wrl": "model/vrml", - "vrml": "model/vrml", - "x3dv": "model/x3d+vrml", - "x3d": "model/x3d+xml", - "x3db": "model/x3d+binary", - "manifest": "text/cache-manifest", - "ics": "text/calendar", - "icz": "text/calendar", - "css": "text/css", - "csv": "text/csv", - "323": "text/h323", - "html": "text/html", - "htm": "text/html", - "shtml": "text/html", - "uls": "text/iuls", - "mml": "text/mathml", - "asc": "text/plain", - "txt": "text/plain", - "text": "text/plain", - "pot": "text/plain", - "brf": "text/plain", - "rtx": "text/richtext", - "sct": "text/scriptlet", - "wsc": "text/scriptlet", - "tm": "text/texmacs", - "ts": "text/texmacs", - "tsv": "text/tab-separated-values", - "jad": "text/vnd.sun.j2me.app-descriptor", - "wml": "text/vnd.wap.wml", - "wmls": "text/vnd.wap.wmlscript", - "bib": "text/x-bibtex", - "boo": "text/x-boo", - "h++": "text/x-c++hdr", - "hpp": "text/x-c++hdr", - "hxx": "text/x-c++hdr", - "hh": "text/x-c++hdr", - "c++": "text/x-c++src", - "cpp": "text/x-c++src", - "cxx": "text/x-c++src", - "cc": "text/x-c++src", - "h": "text/x-chdr", - "htc": "text/x-component", - "csh": "text/x-csh", - "c": "text/x-csrc", - "d": "text/x-dsrc", - "diff": "text/x-diff", - "patch": "text/x-diff", - "hs": "text/x-haskell", - "java": "text/x-java", - "lhs": "text/x-literate-haskell", - "moc": "text/x-moc", - "p": "text/x-pascal", - "pas": "text/x-pascal", - "gcd": "text/x-pcs-gcd", - "pl": "text/x-perl", - "pm": "text/x-perl", - "py": "text/x-python", - "scala": "text/x-scala", - "etx": "text/x-setext", - "sh": "text/x-sh", - "tcl": "text/x-tcl", - "tk": "text/x-tcl", - "tex": "text/x-tex", - "ltx": "text/x-tex", - "sty": "text/x-tex", - "cls": "text/x-tex", - "vcs": "text/x-vcalendar", - "vcf": "text/x-vcard", - "3gp": "video/3gpp", - "axv": "video/annodex", - "dl": "video/dl", - "dif": "video/dv", - "dv": "video/dv", - "fli": "video/fli", - "gl": "video/gl", - "mpeg": "video/mpeg", - "mpg": "video/mpeg", - "mpe": "video/mpeg", - "mp4": "video/mp4", - "qt": "video/quicktime", - "mov": "video/quicktime", - "ogv": "video/ogg", - "mxu": "video/vnd.mpegurl", - "flv": "video/x-flv", - "lsf": "video/x-la-asf", - "lsx": "video/x-la-asf", - "mng": "video/x-mng", - "asf": "video/x-ms-asf", - "asx": "video/x-ms-asf", - "wm": "video/x-ms-wm", - "wmv": "video/x-ms-wmv", - "wmx": "video/x-ms-wmx", - "wvx": "video/x-ms-wvx", - "avi": "video/x-msvideo", - "movie": "video/x-sgi-movie", - "mpv": "video/x-matroska", - "mkv": "video/x-matroska", - "ice": "x-conference/x-cooltalk", - "sisx": "x-epoc/x-sisx-app", - "vrm": "x-world/x-vrml", - "vrml": "x-world/x-vrml", - "wrl": "x-world/x-vrml"} + "ez": "application/andrew-inset", + "aw": "application/applixware", + "atom": "application/atom+xml", + "atomcat": "application/atomcat+xml", + "atomsvc": "application/atomsvc+xml", + "ccxml": "application/ccxml+xml", + "cdmia": "application/cdmi-capability", + "cdmic": "application/cdmi-container", + "cdmid": "application/cdmi-domain", + "cdmio": "application/cdmi-object", + "cdmiq": "application/cdmi-queue", + "cu": "application/cu-seeme", + "davmount": "application/davmount+xml", + "dbk": "application/docbook+xml", + "dssc": "application/dssc+der", + "xdssc": "application/dssc+xml", + "ecma": "application/ecmascript", + "emma": "application/emma+xml", + "epub": "application/epub+zip", + "exi": "application/exi", + "pfr": "application/font-tdpfr", + "gml": "application/gml+xml", + "gpx": "application/gpx+xml", + "gxf": "application/gxf", + "stk": "application/hyperstudio", + "ink": "application/inkml+xml", + "inkml": "application/inkml+xml", + "ipfix": "application/ipfix", + "jar": "application/java-archive", + "ser": "application/java-serialized-object", + "class": "application/java-vm", + "json": "application/json", + "jsonml": "application/jsonml+json", + "lostxml": "application/lost+xml", + "hqx": "application/mac-binhex40", + "cpt": "application/mac-compactpro", + "mads": "application/mads+xml", + "mrc": "application/marc", + "mrcx": "application/marcxml+xml", + "ma": "application/mathematica", + "nb": "application/mathematica", + "mb": "application/mathematica", + "mathml": "application/mathml+xml", + "mbox": "application/mbox", + "mscml": "application/mediaservercontrol+xml", + "metalink": "application/metalink+xml", + "meta4": "application/metalink4+xml", + "mets": "application/mets+xml", + "mods": "application/mods+xml", + "m21": "application/mp21", + "mp21": "application/mp21", + "mp4s": "application/mp4", + "doc": "application/msword", + "dot": "application/msword", + "mxf": "application/mxf", + "bin": "application/octet-stream", + "dms": "application/octet-stream", + "lrf": "application/octet-stream", + "mar": "application/octet-stream", + "so": "application/octet-stream", + "dist": "application/octet-stream", + "distz": "application/octet-stream", + "pkg": "application/octet-stream", + "bpk": "application/octet-stream", + "dump": "application/octet-stream", + "elc": "application/octet-stream", + "deploy": "application/octet-stream", + "oda": "application/oda", + "opf": "application/oebps-package+xml", + "ogx": "application/ogg", + "omdoc": "application/omdoc+xml", + "onetoc": "application/onenote", + "onetoc2": "application/onenote", + "onetmp": "application/onenote", + "onepkg": "application/onenote", + "oxps": "application/oxps", + "xer": "application/patch-ops-error+xml", + "pdf": "application/pdf", + "pgp": "application/pgp-encrypted", + "asc": "application/pgp-signature", + "sig": "application/pgp-signature", + "prf": "application/pics-rules", + "p10": "application/pkcs10", + "p7m": "application/pkcs7-mime", + "p7c": "application/pkcs7-mime", + "p7s": "application/pkcs7-signature", + "p8": "application/pkcs8", + "ac": "application/pkix-attr-cert", + "cer": "application/pkix-cert", + "crl": "application/pkix-crl", + "pkipath": "application/pkix-pkipath", + "pki": "application/pkixcmp", + "pls": "application/pls+xml", + "ai": "application/postscript", + "eps": "application/postscript", + "ps": "application/postscript", + "cww": "application/prs.cww", + "pskcxml": "application/pskc+xml", + "rdf": "application/rdf+xml", + "rif": "application/reginfo+xml", + "rnc": "application/relax-ng-compact-syntax", + "rl": "application/resource-lists+xml", + "rld": "application/resource-lists-diff+xml", + "rs": "application/rls-services+xml", + "gbr": "application/rpki-ghostbusters", + "mft": "application/rpki-manifest", + "roa": "application/rpki-roa", + "rsd": "application/rsd+xml", + "rss": "application/rss+xml", + "rtf": "application/rtf", + "sbml": "application/sbml+xml", + "scq": "application/scvp-cv-request", + "scs": "application/scvp-cv-response", + "spq": "application/scvp-vp-request", + "spp": "application/scvp-vp-response", + "sdp": "application/sdp", + "setpay": "application/set-payment-initiation", + "setreg": "application/set-registration-initiation", + "shf": "application/shf+xml", + "smi": "application/smil+xml", + "smil": "application/smil+xml", + "rq": "application/sparql-query", + "srx": "application/sparql-results+xml", + "gram": "application/srgs", + "grxml": "application/srgs+xml", + "sru": "application/sru+xml", + "ssdl": "application/ssdl+xml", + "ssml": "application/ssml+xml", + "tei": "application/tei+xml", + "teicorpus": "application/tei+xml", + "tfi": "application/thraud+xml", + "tsd": "application/timestamped-data", + "plb": "application/vnd.3gpp.pic-bw-large", + "psb": "application/vnd.3gpp.pic-bw-small", + "pvb": "application/vnd.3gpp.pic-bw-var", + "tcap": "application/vnd.3gpp2.tcap", + "pwn": "application/vnd.3m.post-it-notes", + "aso": "application/vnd.accpac.simply.aso", + "imp": "application/vnd.accpac.simply.imp", + "acu": "application/vnd.acucobol", + "atc": "application/vnd.acucorp", + "acutc": "application/vnd.acucorp", + "air": "application/vnd.adobe.air-application-installer-package+zip", + "fcdt": "application/vnd.adobe.formscentral.fcdt", + "fxp": "application/vnd.adobe.fxp", + "fxpl": "application/vnd.adobe.fxp", + "xdp": "application/vnd.adobe.xdp+xml", + "xfdf": "application/vnd.adobe.xfdf", + "ahead": "application/vnd.ahead.space", + "azf": "application/vnd.airzip.filesecure.azf", + "azs": "application/vnd.airzip.filesecure.azs", + "azw": "application/vnd.amazon.ebook", + "acc": "application/vnd.americandynamics.acc", + "ami": "application/vnd.amiga.ami", + "apk": "application/vnd.android.package-archive", + "cii": "application/vnd.anser-web-certificate-issue-initiation", + "fti": "application/vnd.anser-web-funds-transfer-initiation", + "atx": "application/vnd.antix.game-component", + "mpkg": "application/vnd.apple.installer+xml", + "m3u8": "application/vnd.apple.mpegurl", + "swi": "application/vnd.aristanetworks.swi", + "iota": "application/vnd.astraea-software.iota", + "aep": "application/vnd.audiograph", + "mpm": "application/vnd.blueice.multipass", + "bmi": "application/vnd.bmi", + "rep": "application/vnd.businessobjects", + "cdxml": "application/vnd.chemdraw+xml", + "mmd": "application/vnd.chipnuts.karaoke-mmd", + "cdy": "application/vnd.cinderella", + "cla": "application/vnd.claymore", + "rp9": "application/vnd.cloanto.rp9", + "c4g": "application/vnd.clonk.c4group", + "c4d": "application/vnd.clonk.c4group", + "c4f": "application/vnd.clonk.c4group", + "c4p": "application/vnd.clonk.c4group", + "c4u": "application/vnd.clonk.c4group", + "c11amc": "application/vnd.cluetrust.cartomobile-config", + "c11amz": "application/vnd.cluetrust.cartomobile-config-pkg", + "csp": "application/vnd.commonspace", + "cdbcmsg": "application/vnd.contact.cmsg", + "cmc": "application/vnd.cosmocaller", + "clkx": "application/vnd.crick.clicker", + "clkk": "application/vnd.crick.clicker.keyboard", + "clkp": "application/vnd.crick.clicker.palette", + "clkt": "application/vnd.crick.clicker.template", + "clkw": "application/vnd.crick.clicker.wordbank", + "wbs": "application/vnd.criticaltools.wbs+xml", + "pml": "application/vnd.ctc-posml", + "ppd": "application/vnd.cups-ppd", + "car": "application/vnd.curl.car", + "pcurl": "application/vnd.curl.pcurl", + "dart": "application/vnd.dart", + "rdz": "application/vnd.data-vision.rdz", + "uvf": "application/vnd.dece.data", + "uvvf": "application/vnd.dece.data", + "uvd": "application/vnd.dece.data", + "uvvd": "application/vnd.dece.data", + "uvt": "application/vnd.dece.ttml+xml", + "uvvt": "application/vnd.dece.ttml+xml", + "uvx": "application/vnd.dece.unspecified", + "uvvx": "application/vnd.dece.unspecified", + "uvz": "application/vnd.dece.zip", + "uvvz": "application/vnd.dece.zip", + "fe_launch": "application/vnd.denovo.fcselayout-link", + "dna": "application/vnd.dna", + "mlp": "application/vnd.dolby.mlp", + "dpg": "application/vnd.dpgraph", + "dfac": "application/vnd.dreamfactory", + "kpxx": "application/vnd.ds-keypoint", + "ait": "application/vnd.dvb.ait", + "svc": "application/vnd.dvb.service", + "geo": "application/vnd.dynageo", + "mag": "application/vnd.ecowin.chart", + "nml": "application/vnd.enliven", + "esf": "application/vnd.epson.esf", + "msf": "application/vnd.epson.msf", + "qam": "application/vnd.epson.quickanime", + "slt": "application/vnd.epson.salt", + "ssf": "application/vnd.epson.ssf", + "es3": "application/vnd.eszigno3+xml", + "et3": "application/vnd.eszigno3+xml", + "ez2": "application/vnd.ezpix-album", + "ez3": "application/vnd.ezpix-package", + "fdf": "application/vnd.fdf", + "mseed": "application/vnd.fdsn.mseed", + "seed": "application/vnd.fdsn.seed", + "dataless": "application/vnd.fdsn.seed", + "gph": "application/vnd.flographit", + "ftc": "application/vnd.fluxtime.clip", + "fm": "application/vnd.framemaker", + "frame": "application/vnd.framemaker", + "maker": "application/vnd.framemaker", + "book": "application/vnd.framemaker", + "fnc": "application/vnd.frogans.fnc", + "ltf": "application/vnd.frogans.ltf", + "fsc": "application/vnd.fsc.weblaunch", + "oas": "application/vnd.fujitsu.oasys", + "oa2": "application/vnd.fujitsu.oasys2", + "oa3": "application/vnd.fujitsu.oasys3", + "fg5": "application/vnd.fujitsu.oasysgp", + "bh2": "application/vnd.fujitsu.oasysprs", + "ddd": "application/vnd.fujixerox.ddd", + "xdw": "application/vnd.fujixerox.docuworks", + "xbd": "application/vnd.fujixerox.docuworks.binder", + "fzs": "application/vnd.fuzzysheet", + "txd": "application/vnd.genomatix.tuxedo", + "ggb": "application/vnd.geogebra.file", + "ggs": "application/vnd.geogebra.slides", + "ggt": "application/vnd.geogebra.tool", + "gex": "application/vnd.geometry-explorer", + "gre": "application/vnd.geometry-explorer", + "gxt": "application/vnd.geonext", + "g2w": "application/vnd.geoplan", + "g3w": "application/vnd.geospace", + "gmx": "application/vnd.gmx", + "kml": "application/vnd.google-earth.kml+xml", + "kmz": "application/vnd.google-earth.kmz", + "gqf": "application/vnd.grafeq", + "gqs": "application/vnd.grafeq", + "gac": "application/vnd.groove-account", + "ghf": "application/vnd.groove-help", + "gim": "application/vnd.groove-identity-message", + "grv": "application/vnd.groove-injector", + "gtm": "application/vnd.groove-tool-message", + "tpl": "application/vnd.groove-tool-template", + "vcg": "application/vnd.groove-vcard", + "hal": "application/vnd.hal+xml", + "zmm": "application/vnd.handheld-entertainment+xml", + "hbci": "application/vnd.hbci", + "les": "application/vnd.hhe.lesson-player", + "hpgl": "application/vnd.hp-hpgl", + "hpid": "application/vnd.hp-hpid", + "hps": "application/vnd.hp-hps", + "jlt": "application/vnd.hp-jlyt", + "pcl": "application/vnd.hp-pcl", + "pclxl": "application/vnd.hp-pclxl", + "sfd-hdstx": "application/vnd.hydrostatix.sof-data", + "mpy": "application/vnd.ibm.minipay", + "afp": "application/vnd.ibm.modcap", + "listafp": "application/vnd.ibm.modcap", + "list3820": "application/vnd.ibm.modcap", + "irm": "application/vnd.ibm.rights-management", + "sc": "application/vnd.ibm.secure-container", + "icc": "application/vnd.iccprofile", + "icm": "application/vnd.iccprofile", + "igl": "application/vnd.igloader", + "ivp": "application/vnd.immervision-ivp", + "ivu": "application/vnd.immervision-ivu", + "igm": "application/vnd.insors.igm", + "xpw": "application/vnd.intercon.formnet", + "xpx": "application/vnd.intercon.formnet", + "i2g": "application/vnd.intergeo", + "qbo": "application/vnd.intu.qbo", + "qfx": "application/vnd.intu.qfx", + "rcprofile": "application/vnd.ipunplugged.rcprofile", + "irp": "application/vnd.irepository.package+xml", + "xpr": "application/vnd.is-xpr", + "fcs": "application/vnd.isac.fcs", + "jam": "application/vnd.jam", + "rms": "application/vnd.jcp.javame.midlet-rms", + "jisp": "application/vnd.jisp", + "joda": "application/vnd.joost.joda-archive", + "ktz": "application/vnd.kahootz", + "ktr": "application/vnd.kahootz", + "karbon": "application/vnd.kde.karbon", + "chrt": "application/vnd.kde.kchart", + "kfo": "application/vnd.kde.kformula", + "flw": "application/vnd.kde.kivio", + "kon": "application/vnd.kde.kontour", + "kpr": "application/vnd.kde.kpresenter", + "kpt": "application/vnd.kde.kpresenter", + "ksp": "application/vnd.kde.kspread", + "kwd": "application/vnd.kde.kword", + "kwt": "application/vnd.kde.kword", + "htke": "application/vnd.kenameaapp", + "kia": "application/vnd.kidspiration", + "kne": "application/vnd.kinar", + "knp": "application/vnd.kinar", + "skp": "application/vnd.koan", + "skd": "application/vnd.koan", + "skt": "application/vnd.koan", + "skm": "application/vnd.koan", + "sse": "application/vnd.kodak-descriptor", + "lasxml": "application/vnd.las.las+xml", + "lbd": "application/vnd.llamagraphics.life-balance.desktop", + "lbe": "application/vnd.llamagraphics.life-balance.exchange+xml", + "123": "application/vnd.lotus-1-2-3", + "apr": "application/vnd.lotus-approach", + "pre": "application/vnd.lotus-freelance", + "nsf": "application/vnd.lotus-notes", + "org": "application/vnd.lotus-organizer", + "scm": "application/vnd.lotus-screencam", + "lwp": "application/vnd.lotus-wordpro", + "portpkg": "application/vnd.macports.portpkg", + "mcd": "application/vnd.mcd", + "mc1": "application/vnd.medcalcdata", + "cdkey": "application/vnd.mediastation.cdkey", + "mwf": "application/vnd.mfer", + "mfm": "application/vnd.mfmp", + "flo": "application/vnd.micrografx.flo", + "igx": "application/vnd.micrografx.igx", + "mif": "application/vnd.mif", + "daf": "application/vnd.mobius.daf", + "dis": "application/vnd.mobius.dis", + "mbk": "application/vnd.mobius.mbk", + "mqy": "application/vnd.mobius.mqy", + "msl": "application/vnd.mobius.msl", + "plc": "application/vnd.mobius.plc", + "txf": "application/vnd.mobius.txf", + "mpn": "application/vnd.mophun.application", + "mpc": "application/vnd.mophun.certificate", + "xul": "application/vnd.mozilla.xul+xml", + "cil": "application/vnd.ms-artgalry", + "cab": "application/vnd.ms-cab-compressed", + "xls": "application/vnd.ms-excel", + "xlm": "application/vnd.ms-excel", + "xla": "application/vnd.ms-excel", + "xlc": "application/vnd.ms-excel", + "xlt": "application/vnd.ms-excel", + "xlw": "application/vnd.ms-excel", + "xlam": "application/vnd.ms-excel.addin.macroenabled.12", + "xlsb": "application/vnd.ms-excel.sheet.binary.macroenabled.12", + "xlsm": "application/vnd.ms-excel.sheet.macroenabled.12", + "xltm": "application/vnd.ms-excel.template.macroenabled.12", + "eot": "application/vnd.ms-fontobject", + "chm": "application/vnd.ms-htmlhelp", + "ims": "application/vnd.ms-ims", + "lrm": "application/vnd.ms-lrm", + "thmx": "application/vnd.ms-officetheme", + "cat": "application/vnd.ms-pki.seccat", + "stl": "application/vnd.ms-pki.stl", + "ppt": "application/vnd.ms-powerpoint", + "pps": "application/vnd.ms-powerpoint", + "pot": "application/vnd.ms-powerpoint", + "ppam": "application/vnd.ms-powerpoint.addin.macroenabled.12", + "pptm": "application/vnd.ms-powerpoint.presentation.macroenabled.12", + "sldm": "application/vnd.ms-powerpoint.slide.macroenabled.12", + "ppsm": "application/vnd.ms-powerpoint.slideshow.macroenabled.12", + "potm": "application/vnd.ms-powerpoint.template.macroenabled.12", + "mpp": "application/vnd.ms-project", + "mpt": "application/vnd.ms-project", + "docm": "application/vnd.ms-word.document.macroenabled.12", + "dotm": "application/vnd.ms-word.template.macroenabled.12", + "wps": "application/vnd.ms-works", + "wks": "application/vnd.ms-works", + "wcm": "application/vnd.ms-works", + "wdb": "application/vnd.ms-works", + "wpl": "application/vnd.ms-wpl", + "xps": "application/vnd.ms-xpsdocument", + "mseq": "application/vnd.mseq", + "mus": "application/vnd.musician", + "msty": "application/vnd.muvee.style", + "taglet": "application/vnd.mynfc", + "nlu": "application/vnd.neurolanguage.nlu", + "nim": "text/nim", + "nimble": "text/nimble", + "nimf": "text/nim", + "nims": "text/nim", + "ntf": "application/vnd.nitf", + "nitf": "application/vnd.nitf", + "nnd": "application/vnd.noblenet-directory", + "nns": "application/vnd.noblenet-sealer", + "nnw": "application/vnd.noblenet-web", + "ngdat": "application/vnd.nokia.n-gage.data", + "n-gage": "application/vnd.nokia.n-gage.symbian.install", + "rpst": "application/vnd.nokia.radio-preset", + "rpss": "application/vnd.nokia.radio-presets", + "edm": "application/vnd.novadigm.edm", + "edx": "application/vnd.novadigm.edx", + "ext": "application/vnd.novadigm.ext", + "odc": "application/vnd.oasis.opendocument.chart", + "otc": "application/vnd.oasis.opendocument.chart-template", + "odb": "application/vnd.oasis.opendocument.database", + "odf": "application/vnd.oasis.opendocument.formula", + "odft": "application/vnd.oasis.opendocument.formula-template", + "odg": "application/vnd.oasis.opendocument.graphics", + "otg": "application/vnd.oasis.opendocument.graphics-template", + "odi": "application/vnd.oasis.opendocument.image", + "oti": "application/vnd.oasis.opendocument.image-template", + "odp": "application/vnd.oasis.opendocument.presentation", + "otp": "application/vnd.oasis.opendocument.presentation-template", + "ods": "application/vnd.oasis.opendocument.spreadsheet", + "ots": "application/vnd.oasis.opendocument.spreadsheet-template", + "odt": "application/vnd.oasis.opendocument.text", + "odm": "application/vnd.oasis.opendocument.text-master", + "ott": "application/vnd.oasis.opendocument.text-template", + "oth": "application/vnd.oasis.opendocument.text-web", + "xo": "application/vnd.olpc-sugar", + "dd2": "application/vnd.oma.dd2+xml", + "oxt": "application/vnd.openofficeorg.extension", + "pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation", + "sldx": "application/vnd.openxmlformats-officedocument.presentationml.slide", + "ppsx": "application/vnd.openxmlformats-officedocument.presentationml.slideshow", + "potx": "application/vnd.openxmlformats-officedocument.presentationml.template", + "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "xltx": "application/vnd.openxmlformats-officedocument.spreadsheetml.template", + "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "dotx": "application/vnd.openxmlformats-officedocument.wordprocessingml.template", + "mgp": "application/vnd.osgeo.mapguide.package", + "dp": "application/vnd.osgi.dp", + "esa": "application/vnd.osgi.subsystem", + "pdb": "application/vnd.palm", + "pqa": "application/vnd.palm", + "oprc": "application/vnd.palm", + "paw": "application/vnd.pawaafile", + "str": "application/vnd.pg.format", + "ei6": "application/vnd.pg.osasli", + "efif": "application/vnd.picsel", + "wg": "application/vnd.pmi.widget", + "plf": "application/vnd.pocketlearn", + "pbd": "application/vnd.powerbuilder6", + "box": "application/vnd.previewsystems.box", + "mgz": "application/vnd.proteus.magazine", + "qps": "application/vnd.publishare-delta-tree", + "ptid": "application/vnd.pvi.ptid1", + "qxd": "application/vnd.quark.quarkxpress", + "qxt": "application/vnd.quark.quarkxpress", + "qwd": "application/vnd.quark.quarkxpress", + "qwt": "application/vnd.quark.quarkxpress", + "qxl": "application/vnd.quark.quarkxpress", + "qxb": "application/vnd.quark.quarkxpress", + "bed": "application/vnd.realvnc.bed", + "mxl": "application/vnd.recordare.musicxml", + "musicxml": "application/vnd.recordare.musicxml+xml", + "cryptonote": "application/vnd.rig.cryptonote", + "cod": "application/vnd.rim.cod", + "rm": "application/vnd.rn-realmedia", + "rmvb": "application/vnd.rn-realmedia-vbr", + "link66": "application/vnd.route66.link66+xml", + "st": "application/vnd.sailingtracker.track", + "see": "application/vnd.seemail", + "sema": "application/vnd.sema", + "semd": "application/vnd.semd", + "semf": "application/vnd.semf", + "ifm": "application/vnd.shana.informed.formdata", + "itp": "application/vnd.shana.informed.formtemplate", + "iif": "application/vnd.shana.informed.interchange", + "ipk": "application/vnd.shana.informed.package", + "twd": "application/vnd.simtech-mindmapper", + "twds": "application/vnd.simtech-mindmapper", + "mmf": "application/vnd.smaf", + "teacher": "application/vnd.smart.teacher", + "sdkm": "application/vnd.solent.sdkm+xml", + "sdkd": "application/vnd.solent.sdkm+xml", + "dxp": "application/vnd.spotfire.dxp", + "sfs": "application/vnd.spotfire.sfs", + "sdc": "application/vnd.stardivision.calc", + "sda": "application/vnd.stardivision.draw", + "sdd": "application/vnd.stardivision.impress", + "smf": "application/vnd.stardivision.math", + "sdw": "application/vnd.stardivision.writer", + "vor": "application/vnd.stardivision.writer", + "sgl": "application/vnd.stardivision.writer-global", + "smzip": "application/vnd.stepmania.package", + "sm": "application/vnd.stepmania.stepchart", + "sxc": "application/vnd.sun.xml.calc", + "stc": "application/vnd.sun.xml.calc.template", + "sxd": "application/vnd.sun.xml.draw", + "std": "application/vnd.sun.xml.draw.template", + "sxi": "application/vnd.sun.xml.impress", + "sti": "application/vnd.sun.xml.impress.template", + "sxm": "application/vnd.sun.xml.math", + "sxw": "application/vnd.sun.xml.writer", + "sxg": "application/vnd.sun.xml.writer.global", + "stw": "application/vnd.sun.xml.writer.template", + "sus": "application/vnd.sus-calendar", + "susp": "application/vnd.sus-calendar", + "svd": "application/vnd.svd", + "sis": "application/vnd.symbian.install", + "sisx": "application/vnd.symbian.install", + "xsm": "application/vnd.syncml+xml", + "bdm": "application/vnd.syncml.dm+wbxml", + "xdm": "application/vnd.syncml.dm+xml", + "tao": "application/vnd.tao.intent-module-archive", + "pcap": "application/vnd.tcpdump.pcap", + "cap": "application/vnd.tcpdump.pcap", + "dmp": "application/vnd.tcpdump.pcap", + "tmo": "application/vnd.tmobile-livetv", + "tpt": "application/vnd.trid.tpt", + "mxs": "application/vnd.triscape.mxs", + "tra": "application/vnd.trueapp", + "ufd": "application/vnd.ufdl", + "ufdl": "application/vnd.ufdl", + "utz": "application/vnd.uiq.theme", + "umj": "application/vnd.umajin", + "unityweb": "application/vnd.unity", + "uoml": "application/vnd.uoml+xml", + "vcx": "application/vnd.vcx", + "vsd": "application/vnd.visio", + "vst": "application/vnd.visio", + "vss": "application/vnd.visio", + "vsw": "application/vnd.visio", + "vis": "application/vnd.visionary", + "vsf": "application/vnd.vsf", + "wbxml": "application/vnd.wap.wbxml", + "wmlc": "application/vnd.wap.wmlc", + "wmlsc": "application/vnd.wap.wmlscriptc", + "wtb": "application/vnd.webturbo", + "nbp": "application/vnd.wolfram.player", + "wpd": "application/vnd.wordperfect", + "wqd": "application/vnd.wqd", + "stf": "application/vnd.wt.stf", + "xar": "application/vnd.xara", + "xfdl": "application/vnd.xfdl", + "hvd": "application/vnd.yamaha.hv-dic", + "hvs": "application/vnd.yamaha.hv-script", + "hvp": "application/vnd.yamaha.hv-voice", + "osf": "application/vnd.yamaha.openscoreformat", + "osfpvg": "application/vnd.yamaha.openscoreformat.osfpvg+xml", + "saf": "application/vnd.yamaha.smaf-audio", + "spf": "application/vnd.yamaha.smaf-phrase", + "cmp": "application/vnd.yellowriver-custom-menu", + "zir": "application/vnd.zul", + "zirz": "application/vnd.zul", + "zaz": "application/vnd.zzazz.deck+xml", + "vxml": "application/voicexml+xml", + "wasm": "application/wasm", + "wgt": "application/widget", + "hlp": "application/winhlp", + "wsdl": "application/wsdl+xml", + "wspolicy": "application/wspolicy+xml", + "7z": "application/x-7z-compressed", + "abw": "application/x-abiword", + "ace": "application/x-ace-compressed", + "dmg": "application/x-apple-diskimage", + "aab": "application/x-authorware-bin", + "x32": "application/x-authorware-bin", + "u32": "application/x-authorware-bin", + "vox": "application/x-authorware-bin", + "aam": "application/x-authorware-map", + "aas": "application/x-authorware-seg", + "bcpio": "application/x-bcpio", + "torrent": "application/x-bittorrent", + "blb": "application/x-blorb", + "blorb": "application/x-blorb", + "bz": "application/x-bzip", + "bz2": "application/x-bzip2", + "boz": "application/x-bzip2", + "cbr": "application/x-cbr", + "cba": "application/x-cbr", + "cbt": "application/x-cbr", + "cbz": "application/x-cbr", + "cb7": "application/x-cbr", + "vcd": "application/x-cdlink", + "cfs": "application/x-cfs-compressed", + "chat": "application/x-chat", + "pgn": "application/x-chess-pgn", + "nsc": "application/x-conference", + "cpio": "application/x-cpio", + "csh": "application/x-csh", + "deb": "application/x-debian-package", + "udeb": "application/x-debian-package", + "dgc": "application/x-dgc-compressed", + "dir": "application/x-director", + "dcr": "application/x-director", + "dxr": "application/x-director", + "cst": "application/x-director", + "cct": "application/x-director", + "cxt": "application/x-director", + "w3d": "application/x-director", + "fgd": "application/x-director", + "swa": "application/x-director", + "wad": "application/x-doom", + "ncx": "application/x-dtbncx+xml", + "dtb": "application/x-dtbook+xml", + "res": "application/x-dtbresource+xml", + "dvi": "application/x-dvi", + "evy": "application/x-envoy", + "eva": "application/x-eva", + "bdf": "application/x-font-bdf", + "gsf": "application/x-font-ghostscript", + "psf": "application/x-font-linux-psf", + "pcf": "application/x-font-pcf", + "snf": "application/x-font-snf", + "pfa": "application/x-font-type1", + "pfb": "application/x-font-type1", + "pfm": "application/x-font-type1", + "afm": "application/x-font-type1", + "arc": "application/x-freearc", + "spl": "application/x-futuresplash", + "gca": "application/x-gca-compressed", + "ulx": "application/x-glulx", + "gnumeric": "application/x-gnumeric", + "gramps": "application/x-gramps-xml", + "gtar": "application/x-gtar", + "hdf": "application/x-hdf", + "install": "application/x-install-instructions", + "iso": "application/x-iso9660-image", + "jnlp": "application/x-java-jnlp-file", + "latex": "application/x-latex", + "lzh": "application/x-lzh-compressed", + "lha": "application/x-lzh-compressed", + "mie": "application/x-mie", + "prc": "application/x-mobipocket-ebook", + "mobi": "application/x-mobipocket-ebook", + "application": "application/x-ms-application", + "lnk": "application/x-ms-shortcut", + "wmd": "application/x-ms-wmd", + "wmz": "application/x-ms-wmz", + "xbap": "application/x-ms-xbap", + "mdb": "application/x-msaccess", + "obd": "application/x-msbinder", + "crd": "application/x-mscardfile", + "clp": "application/x-msclip", + "exe": "application/x-msdownload", + "dll": "application/x-msdownload", + "com": "application/x-msdownload", + "bat": "application/x-msdownload", + "msi": "application/x-msdownload", + "mvb": "application/x-msmediaview", + "m13": "application/x-msmediaview", + "m14": "application/x-msmediaview", + "wmf": "application/x-msmetafile", + "wmz": "application/x-msmetafile", + "emf": "application/x-msmetafile", + "emz": "application/x-msmetafile", + "mny": "application/x-msmoney", + "pub": "application/x-mspublisher", + "scd": "application/x-msschedule", + "trm": "application/x-msterminal", + "wri": "application/x-mswrite", + "nc": "application/x-netcdf", + "cdf": "application/x-netcdf", + "nzb": "application/x-nzb", + "p12": "application/x-pkcs12", + "pfx": "application/x-pkcs12", + "p7b": "application/x-pkcs7-certificates", + "spc": "application/x-pkcs7-certificates", + "p7r": "application/x-pkcs7-certreqresp", + "rar": "application/x-rar-compressed", + "ris": "application/x-research-info-systems", + "sh": "application/x-sh", + "shar": "application/x-shar", + "swf": "application/x-shockwave-flash", + "xap": "application/x-silverlight-app", + "sql": "application/x-sql", + "sit": "application/x-stuffit", + "sitx": "application/x-stuffitx", + "srt": "application/x-subrip", + "sv4cpio": "application/x-sv4cpio", + "sv4crc": "application/x-sv4crc", + "t3": "application/x-t3vm-image", + "gam": "application/x-tads", + "tar": "application/x-tar", + "tcl": "application/x-tcl", + "tex": "application/x-tex", + "tfm": "application/x-tex-tfm", + "texinfo": "application/x-texinfo", + "texi": "application/x-texinfo", + "obj": "application/x-tgif", + "ustar": "application/x-ustar", + "src": "application/x-wais-source", + "der": "application/x-x509-ca-cert", + "crt": "application/x-x509-ca-cert", + "fig": "application/x-xfig", + "xlf": "application/x-xliff+xml", + "xpi": "application/x-xpinstall", + "xz": "application/x-xz", + "z1": "application/x-zmachine", + "z2": "application/x-zmachine", + "z3": "application/x-zmachine", + "z4": "application/x-zmachine", + "z5": "application/x-zmachine", + "z6": "application/x-zmachine", + "z7": "application/x-zmachine", + "z8": "application/x-zmachine", + "xaml": "application/xaml+xml", + "xdf": "application/xcap-diff+xml", + "xenc": "application/xenc+xml", + "xhtml": "application/xhtml+xml", + "xht": "application/xhtml+xml", + "xml": "application/xml", + "xsl": "application/xml", + "dtd": "application/xml-dtd", + "xop": "application/xop+xml", + "xpl": "application/xproc+xml", + "xslt": "application/xslt+xml", + "xspf": "application/xspf+xml", + "mxml": "application/xv+xml", + "xhvml": "application/xv+xml", + "xvml": "application/xv+xml", + "xvm": "application/xv+xml", + "yang": "application/yang", + "yin": "application/yin+xml", + "zip": "application/zip", + "adp": "audio/adpcm", + "au": "audio/basic", + "snd": "audio/basic", + "mid": "audio/midi", + "midi": "audio/midi", + "kar": "audio/midi", + "rmi": "audio/midi", + "m4a": "audio/mp4", + "mp4a": "audio/mp4", + "mpga": "audio/mpeg", + "mp2": "audio/mpeg", + "mp2a": "audio/mpeg", + "mp3": "audio/mpeg", + "m2a": "audio/mpeg", + "m3a": "audio/mpeg", + "oga": "audio/ogg", + "ogg": "audio/ogg", + "spx": "audio/ogg", + "opus": "audio/ogg", + "s3m": "audio/s3m", + "sil": "audio/silk", + "uva": "audio/vnd.dece.audio", + "uvva": "audio/vnd.dece.audio", + "eol": "audio/vnd.digital-winds", + "dra": "audio/vnd.dra", + "dts": "audio/vnd.dts", + "dtshd": "audio/vnd.dts.hd", + "lvp": "audio/vnd.lucent.voice", + "pya": "audio/vnd.ms-playready.media.pya", + "ecelp4800": "audio/vnd.nuera.ecelp4800", + "ecelp7470": "audio/vnd.nuera.ecelp7470", + "ecelp9600": "audio/vnd.nuera.ecelp9600", + "rip": "audio/vnd.rip", + "weba": "audio/webm", + "aac": "audio/x-aac", + "aif": "audio/x-aiff", + "aiff": "audio/x-aiff", + "aifc": "audio/x-aiff", + "caf": "audio/x-caf", + "flac": "audio/x-flac", + "mka": "audio/x-matroska", + "m3u": "audio/x-mpegurl", + "wax": "audio/x-ms-wax", + "wma": "audio/x-ms-wma", + "ram": "audio/x-pn-realaudio", + "ra": "audio/x-pn-realaudio", + "rmp": "audio/x-pn-realaudio-plugin", + "wav": "audio/x-wav", + "xm": "audio/xm", + "cdx": "chemical/x-cdx", + "cif": "chemical/x-cif", + "cmdf": "chemical/x-cmdf", + "cml": "chemical/x-cml", + "csml": "chemical/x-csml", + "xyz": "chemical/x-xyz", + "ttc": "font/collection", + "otf": "font/otf", + "ttf": "font/ttf", + "woff": "font/woff", + "woff2": "font/woff2", + "bmp": "image/bmp", + "cgm": "image/cgm", + "g3": "image/g3fax", + "gif": "image/gif", + "ief": "image/ief", + "jpeg": "image/jpeg", + "jpg": "image/jpeg", + "jpe": "image/jpeg", + "ktx": "image/ktx", + "png": "image/png", + "btif": "image/prs.btif", + "sgi": "image/sgi", + "svg": "image/svg+xml", + "svgz": "image/svg+xml", + "tiff": "image/tiff", + "tif": "image/tiff", + "psd": "image/vnd.adobe.photoshop", + "uvi": "image/vnd.dece.graphic", + "uvvi": "image/vnd.dece.graphic", + "uvg": "image/vnd.dece.graphic", + "uvvg": "image/vnd.dece.graphic", + "djvu": "image/vnd.djvu", + "djv": "image/vnd.djvu", + "sub": "image/vnd.dvb.subtitle", + "dwg": "image/vnd.dwg", + "dxf": "image/vnd.dxf", + "fbs": "image/vnd.fastbidsheet", + "fpx": "image/vnd.fpx", + "fst": "image/vnd.fst", + "mmr": "image/vnd.fujixerox.edmics-mmr", + "rlc": "image/vnd.fujixerox.edmics-rlc", + "mdi": "image/vnd.ms-modi", + "wdp": "image/vnd.ms-photo", + "npx": "image/vnd.net-fpx", + "wbmp": "image/vnd.wap.wbmp", + "xif": "image/vnd.xiff", + "webp": "image/webp", + "3ds": "image/x-3ds", + "ras": "image/x-cmu-raster", + "cmx": "image/x-cmx", + "fh": "image/x-freehand", + "fhc": "image/x-freehand", + "fh4": "image/x-freehand", + "fh5": "image/x-freehand", + "fh7": "image/x-freehand", + "ico": "image/x-icon", + "sid": "image/x-mrsid-image", + "pcx": "image/x-pcx", + "pic": "image/x-pict", + "pct": "image/x-pict", + "pnm": "image/x-portable-anymap", + "pbm": "image/x-portable-bitmap", + "pgm": "image/x-portable-graymap", + "ppm": "image/x-portable-pixmap", + "rgb": "image/x-rgb", + "tga": "image/x-tga", + "xbm": "image/x-xbitmap", + "xpm": "image/x-xpixmap", + "xwd": "image/x-xwindowdump", + "eml": "message/rfc822", + "mime": "message/rfc822", + "igs": "model/iges", + "iges": "model/iges", + "msh": "model/mesh", + "mesh": "model/mesh", + "silo": "model/mesh", + "dae": "model/vnd.collada+xml", + "dwf": "model/vnd.dwf", + "gdl": "model/vnd.gdl", + "gtw": "model/vnd.gtw", + "mts": "model/vnd.mts", + "vtu": "model/vnd.vtu", + "wrl": "model/vrml", + "vrml": "model/vrml", + "x3db": "model/x3d+binary", + "x3dbz": "model/x3d+binary", + "x3dv": "model/x3d+vrml", + "x3dvz": "model/x3d+vrml", + "x3d": "model/x3d+xml", + "x3dz": "model/x3d+xml", + "appcache": "text/cache-manifest", + "ics": "text/calendar", + "ifb": "text/calendar", + "css": "text/css", + "csv": "text/csv", + "html": "text/html", + "htm": "text/html", + "js": "text/javascript", + "mjs": "text/javascript", + "n3": "text/n3", + "txt": "text/plain", + "text": "text/plain", + "conf": "text/plain", + "def": "text/plain", + "list": "text/plain", + "log": "text/plain", + "in": "text/plain", + "dsc": "text/prs.lines.tag", + "rtx": "text/richtext", + "sgml": "text/sgml", + "sgm": "text/sgml", + "tsv": "text/tab-separated-values", + "t": "text/troff", + "tr": "text/troff", + "roff": "text/troff", + "man": "text/troff", + "me": "text/troff", + "ms": "text/troff", + "ttl": "text/turtle", + "uri": "text/uri-list", + "uris": "text/uri-list", + "urls": "text/uri-list", + "vcard": "text/vcard", + "curl": "text/vnd.curl", + "dcurl": "text/vnd.curl.dcurl", + "mcurl": "text/vnd.curl.mcurl", + "scurl": "text/vnd.curl.scurl", + "sub": "text/vnd.dvb.subtitle", + "fly": "text/vnd.fly", + "flx": "text/vnd.fmi.flexstor", + "gv": "text/vnd.graphviz", + "3dml": "text/vnd.in3d.3dml", + "spot": "text/vnd.in3d.spot", + "jad": "text/vnd.sun.j2me.app-descriptor", + "wml": "text/vnd.wap.wml", + "wmls": "text/vnd.wap.wmlscript", + "s": "text/x-asm", + "asm": "text/x-asm", + "c": "text/x-c", + "cc": "text/x-c", + "cxx": "text/x-c", + "cpp": "text/x-c", + "h": "text/x-c", + "hh": "text/x-c", + "dic": "text/x-c", + "f": "text/x-fortran", + "for": "text/x-fortran", + "f77": "text/x-fortran", + "f90": "text/x-fortran", + "java": "text/x-java-source", + "nfo": "text/x-nfo", + "opml": "text/x-opml", + "p": "text/x-pascal", + "pas": "text/x-pascal", + "etx": "text/x-setext", + "sfv": "text/x-sfv", + "uu": "text/x-uuencode", + "vcs": "text/x-vcalendar", + "vcf": "text/x-vcard", + "3gp": "video/3gpp", + "3g2": "video/3gpp2", + "h261": "video/h261", + "h263": "video/h263", + "h264": "video/h264", + "jpgv": "video/jpeg", + "jpm": "video/jpm", + "jpgm": "video/jpm", + "mj2": "video/mj2", + "mjp2": "video/mj2", + "mp4": "video/mp4", + "mp4v": "video/mp4", + "mpg4": "video/mp4", + "mpeg": "video/mpeg", + "mpg": "video/mpeg", + "mpe": "video/mpeg", + "m1v": "video/mpeg", + "m2v": "video/mpeg", + "ogv": "video/ogg", + "qt": "video/quicktime", + "mov": "video/quicktime", + "uvh": "video/vnd.dece.hd", + "uvvh": "video/vnd.dece.hd", + "uvm": "video/vnd.dece.mobile", + "uvvm": "video/vnd.dece.mobile", + "uvp": "video/vnd.dece.pd", + "uvvp": "video/vnd.dece.pd", + "uvs": "video/vnd.dece.sd", + "uvvs": "video/vnd.dece.sd", + "uvv": "video/vnd.dece.video", + "uvvv": "video/vnd.dece.video", + "dvb": "video/vnd.dvb.file", + "fvt": "video/vnd.fvt", + "mxu": "video/vnd.mpegurl", + "m4u": "video/vnd.mpegurl", + "pyv": "video/vnd.ms-playready.media.pyv", + "uvu": "video/vnd.uvvu.mp4", + "uvvu": "video/vnd.uvvu.mp4", + "viv": "video/vnd.vivo", + "webm": "video/webm", + "f4v": "video/x-f4v", + "fli": "video/x-fli", + "flv": "video/x-flv", + "m4v": "video/x-m4v", + "mkv": "video/x-matroska", + "mk3d": "video/x-matroska", + "mks": "video/x-matroska", + "mng": "video/x-mng", + "asf": "video/x-ms-asf", + "asx": "video/x-ms-asf", + "vob": "video/x-ms-vob", + "wm": "video/x-ms-wm", + "wmv": "video/x-ms-wmv", + "wmx": "video/x-ms-wmx", + "wvx": "video/x-ms-wvx", + "avi": "video/x-msvideo", + "movie": "video/x-sgi-movie", + "smv": "video/x-smv", + "ice": "x-conference/x-cooltalk", +} + -proc newMimetypes*(): MimeDB = +func newMimetypes*(): MimeDB = ## Creates a new Mimetypes database. The database will contain the most ## common mimetypes. - result.mimes = mimes.newStringTable() + {.cast(noSideEffect).}: + result.mimes = mimes.toOrderedTable() -proc getMimetype*(mimedb: MimeDB, ext: string, default = "text/plain"): string = - ## Gets mimetype which corresponds to ``ext``. Returns ``default`` if ``ext`` - ## could not be found. - result = mimedb.mimes[ext] +func getMimetype*(mimedb: MimeDB, ext: string, default = "text/plain"): string = + ## Gets mimetype which corresponds to `ext`. Returns `default` if `ext` + ## could not be found. `ext` can start with an optional dot which is ignored. + ## `ext` is lowercased before querying `mimedb`. + if ext.startsWith("."): + result = mimedb.mimes.getOrDefault(ext.toLowerAscii.substr(1)) + else: + result = mimedb.mimes.getOrDefault(ext.toLowerAscii()) if result == "": return default -proc getExt*(mimedb: MimeDB, mimetype: string, default = "txt"): string = - ## Gets extension which corresponds to ``mimetype``. Returns ``default`` if - ## ``mimetype`` could not be found. Extensions are returned without the - ## leading dot. +func getExt*(mimedb: MimeDB, mimetype: string, default = "txt"): string = + ## Gets extension which corresponds to `mimetype`. Returns `default` if + ## `mimetype` could not be found. Extensions are returned without the + ## leading dot. `mimetype` is lowercased before querying `mimedb`. result = default + let mimeLowered = mimetype.toLowerAscii() for e, m in mimedb.mimes: - if m == mimetype: + if m == mimeLowered: result = e + break -proc register*(mimedb: var MimeDB, ext: string, mimetype: string) = - ## Adds ``mimetype`` to the ``mimedb``. - mimedb.mimes[ext] = mimetype - -when isMainModule: - var m = newMimetypes() - echo m.getMimetype("mp4") - echo m.getExt("text/html") +func register*(mimedb: var MimeDB, ext: string, mimetype: string) = + ## Adds `mimetype` to the `mimedb`. + ## `mimetype` and `ext` are lowercased before registering on `mimedb`. + assert ext.strip.len > 0, "ext argument can not be empty string" + assert mimetype.strip.len > 0, "mimetype argument can not be empty string" + {.noSideEffect.}: + mimedb.mimes[ext.toLowerAscii()] = mimetype.toLowerAscii() diff --git a/lib/pure/nativesockets.nim b/lib/pure/nativesockets.nim new file mode 100644 index 000000000..656c98a20 --- /dev/null +++ b/lib/pure/nativesockets.nim @@ -0,0 +1,870 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2015 Dominik Picheta +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a low-level cross-platform sockets interface. Look +## at the `net` module for the higher-level version. + +# TODO: Clean up the exports a bit and everything else in general. + +import std/[os, options] +import std/private/since +import std/strbasics + +when defined(nimPreviewSlimSystem): + import std/[assertions, syncio] + +when hostOS == "solaris": + {.passl: "-lsocket -lnsl".} + +const useWinVersion = defined(windows) or defined(nimdoc) +const useNimNetLite = defined(nimNetLite) or defined(freertos) or defined(zephyr) or + defined(nuttx) + +when useWinVersion: + import std/winlean + export WSAEWOULDBLOCK, WSAECONNRESET, WSAECONNABORTED, WSAENETRESET, + WSANOTINITIALISED, WSAENOTSOCK, WSAEINPROGRESS, WSAEINTR, + WSAEDISCON, ERROR_NETNAME_DELETED +else: + import std/posix + export fcntl, F_GETFL, O_NONBLOCK, F_SETFL, EAGAIN, EWOULDBLOCK, MSG_NOSIGNAL, + EINTR, EINPROGRESS, ECONNRESET, EPIPE, ENETRESET, EBADF + export Sockaddr_storage, Sockaddr_un, Sockaddr_un_path_length + +export SocketHandle, Sockaddr_in, Addrinfo, INADDR_ANY, SockAddr, SockLen, + Sockaddr_in6, Sockaddr_storage, + recv, `==`, connect, send, accept, recvfrom, sendto, + freeAddrInfo + +when not useNimNetLite: + export inet_ntoa + +export + SO_ERROR, + SOL_SOCKET, + SOMAXCONN, + SO_ACCEPTCONN, SO_BROADCAST, SO_DEBUG, SO_DONTROUTE, + SO_KEEPALIVE, SO_OOBINLINE, SO_REUSEADDR, SO_REUSEPORT, + MSG_PEEK + +when defined(macosx) and not defined(nimdoc): + export SO_NOSIGPIPE + +type + Port* = distinct uint16 ## port type + + Domain* = enum ## \ + ## domain, which specifies the protocol family of the + ## created socket. Other domains than those that are listed + ## here are unsupported. + AF_UNSPEC = 0, ## unspecified domain (can be detected automatically by + ## some procedures, such as getaddrinfo) + AF_UNIX = 1, ## for local socket (using a file). Unsupported on Windows. + AF_INET = 2, ## for network protocol IPv4 or + AF_INET6 = when defined(macosx): 30 elif defined(windows): 23 else: 10 ## for network protocol IPv6. + + SockType* = enum ## second argument to `socket` proc + SOCK_STREAM = 1, ## reliable stream-oriented service or Stream Sockets + SOCK_DGRAM = 2, ## datagram service or Datagram Sockets + SOCK_RAW = 3, ## raw protocols atop the network layer. + SOCK_SEQPACKET = 5 ## reliable sequenced packet service + + Protocol* = enum ## third argument to `socket` proc + IPPROTO_TCP = 6, ## Transmission control protocol. + IPPROTO_UDP = 17, ## User datagram protocol. + IPPROTO_IP, ## Internet protocol. + IPPROTO_IPV6, ## Internet Protocol Version 6. + IPPROTO_RAW, ## Raw IP Packets Protocol. Unsupported on Windows. + IPPROTO_ICMP ## Internet Control message protocol. + IPPROTO_ICMPV6 ## Internet Control message protocol for IPv6. + + Servent* = object ## information about a service + name*: string + aliases*: seq[string] + port*: Port + proto*: string + + Hostent* = object ## information about a given host + name*: string + aliases*: seq[string] + addrtype*: Domain + length*: int + addrList*: seq[string] + +const IPPROTO_NONE* = IPPROTO_IP ## Use this if your socket type requires a protocol value of zero (e.g. Unix sockets). + +when useWinVersion: + let + osInvalidSocket* = winlean.INVALID_SOCKET + + const + IOCPARM_MASK* = 127 + IOC_IN* = int(-2147483648) + FIONBIO* = IOC_IN.int32 or ((sizeof(int32) and IOCPARM_MASK) shl 16) or + (102 shl 8) or 126 + nativeAfInet = winlean.AF_INET + nativeAfInet6 = winlean.AF_INET6 + + proc ioctlsocket*(s: SocketHandle, cmd: clong, + argptr: ptr clong): cint {. + stdcall, importc: "ioctlsocket", dynlib: "ws2_32.dll".} +else: + let + osInvalidSocket* = posix.INVALID_SOCKET + nativeAfInet = posix.AF_INET + nativeAfInet6 = posix.AF_INET6 + nativeAfUnix = posix.AF_UNIX + +proc `==`*(a, b: Port): bool {.borrow.} + ## `==` for ports. + +proc `$`*(p: Port): string {.borrow.} + ## Returns the port number as a string + +proc toInt*(domain: Domain): cint + ## Converts the Domain enum to a platform-dependent `cint`. + +proc toInt*(typ: SockType): cint + ## Converts the SockType enum to a platform-dependent `cint`. + +proc toInt*(p: Protocol): cint + ## Converts the Protocol enum to a platform-dependent `cint`. + +when not useWinVersion: + proc toInt(domain: Domain): cint = + case domain + of AF_UNSPEC: result = posix.AF_UNSPEC.cint + of AF_UNIX: result = posix.AF_UNIX.cint + of AF_INET: result = posix.AF_INET.cint + of AF_INET6: result = posix.AF_INET6.cint + + proc toKnownDomain*(family: cint): Option[Domain] = + ## Converts the platform-dependent `cint` to the Domain or none(), + ## if the `cint` is not known. + result = if family == posix.AF_UNSPEC: some(Domain.AF_UNSPEC) + elif family == posix.AF_UNIX: some(Domain.AF_UNIX) + elif family == posix.AF_INET: some(Domain.AF_INET) + elif family == posix.AF_INET6: some(Domain.AF_INET6) + else: none(Domain) + + proc toInt(typ: SockType): cint = + case typ + of SOCK_STREAM: result = posix.SOCK_STREAM + of SOCK_DGRAM: result = posix.SOCK_DGRAM + of SOCK_SEQPACKET: result = posix.SOCK_SEQPACKET + of SOCK_RAW: result = posix.SOCK_RAW + + proc toInt(p: Protocol): cint = + case p + of IPPROTO_TCP: result = posix.IPPROTO_TCP + of IPPROTO_UDP: result = posix.IPPROTO_UDP + of IPPROTO_IP: result = posix.IPPROTO_IP + of IPPROTO_IPV6: result = posix.IPPROTO_IPV6 + of IPPROTO_RAW: result = posix.IPPROTO_RAW + of IPPROTO_ICMP: result = posix.IPPROTO_ICMP + of IPPROTO_ICMPV6: result = posix.IPPROTO_ICMPV6 + +else: + proc toInt(domain: Domain): cint = + result = cast[cint](uint32(ord(domain))) + + proc toKnownDomain*(family: cint): Option[Domain] = + ## Converts the platform-dependent `cint` to the Domain or none(), + ## if the `cint` is not known. + result = if family == winlean.AF_UNSPEC: some(Domain.AF_UNSPEC) + elif family == winlean.AF_INET: some(Domain.AF_INET) + elif family == winlean.AF_INET6: some(Domain.AF_INET6) + else: none(Domain) + + proc toInt(typ: SockType): cint = + result = cint(ord(typ)) + + proc toInt(p: Protocol): cint = + case p + of IPPROTO_IP: + result = 0.cint + of IPPROTO_ICMP: + result = 1.cint + of IPPROTO_TCP: + result = 6.cint + of IPPROTO_UDP: + result = 17.cint + of IPPROTO_IPV6: + result = 41.cint + of IPPROTO_ICMPV6: + result = 58.cint + else: + result = cint(ord(p)) + +proc toSockType*(protocol: Protocol): SockType = + result = case protocol + of IPPROTO_TCP: + SOCK_STREAM + of IPPROTO_UDP: + SOCK_DGRAM + of IPPROTO_IP, IPPROTO_IPV6, IPPROTO_RAW, IPPROTO_ICMP, IPPROTO_ICMPV6: + SOCK_RAW + +proc getProtoByName*(name: string): int {.since: (1, 3, 5).} = + ## Returns a protocol code from the database that matches the protocol `name`. + when useWinVersion: + let protoent = winlean.getprotobyname(name.cstring) + else: + let protoent = posix.getprotobyname(name.cstring) + + if protoent == nil: + raise newException(OSError, "protocol not found: " & name) + + result = protoent.p_proto.int + +proc close*(socket: SocketHandle) = + ## Closes a socket. + when useWinVersion: + discard winlean.closesocket(socket) + else: + discard posix.close(socket) + # TODO: These values should not be discarded. An OSError should be raised. + # http://stackoverflow.com/questions/12463473/what-happens-if-you-call-close-on-a-bsd-socket-multiple-times + +when declared(setInheritable) or defined(nimdoc): + proc setInheritable*(s: SocketHandle, inheritable: bool): bool {.inline.} = + ## Set whether a socket is inheritable by child processes. Returns `true` + ## on success. + ## + ## This function is not implemented on all platform, test for availability + ## with `declared() <system.html#declared,untyped>`. + setInheritable(FileHandle s, inheritable) + +proc createNativeSocket*(domain: cint, sockType: cint, protocol: cint, + inheritable: bool = defined(nimInheritHandles)): SocketHandle = + ## Creates a new socket; returns `osInvalidSocket` if an error occurs. + ## + ## `inheritable` decides if the resulting SocketHandle can be inherited + ## by child processes. + ## + ## Use this overload if one of the enums specified above does + ## not contain what you need. + let sockType = + when (defined(linux) or defined(bsd)) and not defined(nimdoc): + if inheritable: sockType and not SOCK_CLOEXEC else: sockType or SOCK_CLOEXEC + else: + sockType + result = socket(domain, sockType, protocol) + when declared(setInheritable) and not (defined(linux) or defined(bsd)): + if not setInheritable(result, inheritable): + close result + return osInvalidSocket + +proc createNativeSocket*(domain: Domain = AF_INET, + sockType: SockType = SOCK_STREAM, + protocol: Protocol = IPPROTO_TCP, + inheritable: bool = defined(nimInheritHandles)): SocketHandle = + ## Creates a new socket; returns `osInvalidSocket` if an error occurs. + ## + ## `inheritable` decides if the resulting SocketHandle can be inherited + ## by child processes. + createNativeSocket(toInt(domain), toInt(sockType), toInt(protocol), inheritable) + +proc bindAddr*(socket: SocketHandle, name: ptr SockAddr, + namelen: SockLen): cint = + result = bindSocket(socket, name, namelen) + +proc listen*(socket: SocketHandle, backlog = SOMAXCONN): cint {.tags: [ + ReadIOEffect].} = + ## Marks `socket` as accepting connections. + ## `Backlog` specifies the maximum length of the + ## queue of pending connections. + when useWinVersion: + result = winlean.listen(socket, cint(backlog)) + else: + result = posix.listen(socket, cint(backlog)) + +proc getAddrInfo*(address: string, port: Port, domain: Domain = AF_INET, + sockType: SockType = SOCK_STREAM, + protocol: Protocol = IPPROTO_TCP): ptr AddrInfo = + ## + ## + ## .. warning:: The resulting `ptr AddrInfo` must be freed using `freeAddrInfo`! + var hints: AddrInfo + result = nil + hints.ai_family = toInt(domain) + hints.ai_socktype = toInt(sockType) + hints.ai_protocol = toInt(protocol) + # OpenBSD doesn't support AI_V4MAPPED and doesn't define the macro AI_V4MAPPED. + # FreeBSD, Haiku don't support AI_V4MAPPED but defines the macro. + # https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=198092 + # https://dev.haiku-os.org/ticket/14323 + when not defined(freebsd) and not defined(openbsd) and not defined(netbsd) and + not defined(android) and not defined(haiku): + if domain == AF_INET6: + hints.ai_flags = AI_V4MAPPED + let socketPort = if sockType == SOCK_RAW: "" else: $port + var gaiResult = getaddrinfo(address, socketPort.cstring, addr(hints), result) + if gaiResult != 0'i32: + when useWinVersion or defined(freertos) or defined(nuttx): + raiseOSError(osLastError()) + else: + raiseOSError(osLastError(), $gai_strerror(gaiResult)) + +proc ntohl*(x: uint32): uint32 = + ## Converts 32-bit unsigned integers from network to host byte order. + ## On machines where the host byte order is the same as network byte order, + ## this is a no-op; otherwise, it performs a 4-byte swap operation. + when cpuEndian == bigEndian: result = x + else: result = (x shr 24'u32) or + (x shr 8'u32 and 0xff00'u32) or + (x shl 8'u32 and 0xff0000'u32) or + (x shl 24'u32) + +proc ntohs*(x: uint16): uint16 = + ## Converts 16-bit unsigned integers from network to host byte order. On + ## machines where the host byte order is the same as network byte order, + ## this is a no-op; otherwise, it performs a 2-byte swap operation. + when cpuEndian == bigEndian: result = x + else: result = (x shr 8'u16) or (x shl 8'u16) + +template htonl*(x: uint32): untyped = + ## Converts 32-bit unsigned integers from host to network byte order. On + ## machines where the host byte order is the same as network byte order, + ## this is a no-op; otherwise, it performs a 4-byte swap operation. + nativesockets.ntohl(x) + +template htons*(x: uint16): untyped = + ## Converts 16-bit unsigned integers from host to network byte order. + ## On machines where the host byte order is the same as network byte + ## order, this is a no-op; otherwise, it performs a 2-byte swap operation. + nativesockets.ntohs(x) + +proc getSockDomain*(socket: SocketHandle): Domain = + ## Returns the socket's domain (AF_INET or AF_INET6). + var name: Sockaddr_in6 + var namelen = sizeof(name).SockLen + if getsockname(socket, cast[ptr SockAddr](addr(name)), + addr(namelen)) == -1'i32: + raiseOSError(osLastError()) + let knownDomain = toKnownDomain(name.sin6_family.cint) + if knownDomain.isSome: + result = knownDomain.get() + else: + raise newException(IOError, "Unknown socket family in getSockDomain") + +when not useNimNetLite: + proc getServByName*(name, proto: string): Servent {.tags: [ReadIOEffect].} = + ## Searches the database from the beginning and finds the first entry for + ## which the service name specified by `name` matches the s_name member + ## and the protocol name specified by `proto` matches the s_proto member. + ## + ## On posix this will search through the `/etc/services` file. + when useWinVersion: + var s = winlean.getservbyname(name, proto) + else: + var s = posix.getservbyname(name, proto) + if s == nil: raiseOSError(osLastError(), "Service not found.") + result.name = $s.s_name + result.aliases = cstringArrayToSeq(s.s_aliases) + result.port = Port(s.s_port) + result.proto = $s.s_proto + + proc getServByPort*(port: Port, proto: string): Servent {.tags: [ReadIOEffect].} = + ## Searches the database from the beginning and finds the first entry for + ## which the port specified by `port` matches the s_port member and the + ## protocol name specified by `proto` matches the s_proto member. + ## + ## On posix this will search through the `/etc/services` file. + when useWinVersion: + var s = winlean.getservbyport(uint16(port).cint, proto) + else: + var s = posix.getservbyport(uint16(port).cint, proto) + if s == nil: raiseOSError(osLastError(), "Service not found.") + result.name = $s.s_name + result.aliases = cstringArrayToSeq(s.s_aliases) + result.port = Port(s.s_port) + result.proto = $s.s_proto + + proc getHostByAddr*(ip: string): Hostent {.tags: [ReadIOEffect].} = + ## This function will lookup the hostname of an IP Address. + var + addrInfo = getAddrInfo(ip, Port(0), AF_UNSPEC) + myAddr: pointer + addrLen = 0 + family = 0 + + defer: freeAddrInfo(addrInfo) + + if addrInfo.ai_addr.sa_family.cint == nativeAfInet: + family = nativeAfInet + myAddr = addr cast[ptr Sockaddr_in](addrInfo.ai_addr).sin_addr + addrLen = 4 + elif addrInfo.ai_addr.sa_family.cint == nativeAfInet6: + family = nativeAfInet6 + myAddr = addr cast[ptr Sockaddr_in6](addrInfo.ai_addr).sin6_addr + addrLen = 16 + else: + raise newException(IOError, "Unknown socket family in `getHostByAddr()`") + + when useWinVersion: + var s = winlean.gethostbyaddr(cast[ptr InAddr](myAddr), addrLen.cuint, + cint(family)) + if s == nil: raiseOSError(osLastError()) + else: + var s = + when defined(android4): + posix.gethostbyaddr(cast[cstring](myAddr), addrLen.cint, + cint(family)) + else: + posix.gethostbyaddr(myAddr, addrLen.SockLen, + cint(family)) + if s == nil: + raiseOSError(osLastError(), $hstrerror(h_errno)) + + result.name = $s.h_name + result.aliases = cstringArrayToSeq(s.h_aliases) + when useWinVersion: + result.addrtype = Domain(s.h_addrtype) + else: + if s.h_addrtype == posix.AF_INET: + result.addrtype = AF_INET + elif s.h_addrtype == posix.AF_INET6: + result.addrtype = AF_INET6 + else: + raiseOSError(osLastError(), "unknown h_addrtype") + if result.addrtype == AF_INET: + result.addrList = @[] + var i = 0 + while not isNil(s.h_addr_list[i]): + var inaddrPtr = cast[ptr InAddr](s.h_addr_list[i]) + result.addrList.add($inet_ntoa(inaddrPtr[])) + inc(i) + else: + let strAddrLen = when not useWinVersion: posix.INET6_ADDRSTRLEN.int + else: 46 + var i = 0 + while not isNil(s.h_addr_list[i]): + var ipStr = newString(strAddrLen) + if inet_ntop(nativeAfInet6, cast[pointer](s.h_addr_list[i]), + cstring(ipStr), len(ipStr).int32) == nil: + raiseOSError(osLastError()) + when not useWinVersion: + if posix.IN6_IS_ADDR_V4MAPPED(cast[ptr In6Addr](s.h_addr_list[i])) != 0: + ipStr.setSlice("::ffff:".len..<strAddrLen) + setLen(ipStr, len(cstring(ipStr))) + result.addrList.add(ipStr) + inc(i) + result.length = int(s.h_length) + + proc getHostByName*(name: string): Hostent {.tags: [ReadIOEffect].} = + ## This function will lookup the IP address of a hostname. + when useWinVersion: + var s = winlean.gethostbyname(name) + else: + var s = posix.gethostbyname(name) + if s == nil: raiseOSError(osLastError()) + result.name = $s.h_name + result.aliases = cstringArrayToSeq(s.h_aliases) + when useWinVersion: + result.addrtype = Domain(s.h_addrtype) + else: + if s.h_addrtype == posix.AF_INET: + result.addrtype = AF_INET + elif s.h_addrtype == posix.AF_INET6: + result.addrtype = AF_INET6 + else: + raiseOSError(osLastError(), "unknown h_addrtype") + if result.addrtype == AF_INET: + result.addrList = @[] + var i = 0 + while not isNil(s.h_addr_list[i]): + var inaddrPtr = cast[ptr InAddr](s.h_addr_list[i]) + result.addrList.add($inet_ntoa(inaddrPtr[])) + inc(i) + else: + result.addrList = cstringArrayToSeq(s.h_addr_list) + result.length = int(s.h_length) + + proc getHostname*(): string {.tags: [ReadIOEffect].} = + ## Returns the local hostname (not the FQDN) + # https://tools.ietf.org/html/rfc1035#section-2.3.1 + # https://tools.ietf.org/html/rfc2181#section-11 + const size = 256 + result = newString(size) + when useWinVersion: + let success = winlean.gethostname(result.cstring, size) + else: + # Posix + let success = posix.gethostname(result.cstring, size) + if success != 0.cint: + raiseOSError(osLastError()) + let x = len(cstring(result)) + result.setLen(x) + + proc getAddrString*(sockAddr: ptr SockAddr): string = + ## Returns the string representation of address within sockAddr + if sockAddr.sa_family.cint == nativeAfInet: + result = $inet_ntoa(cast[ptr Sockaddr_in](sockAddr).sin_addr) + elif sockAddr.sa_family.cint == nativeAfInet6: + let addrLen = when not useWinVersion: posix.INET6_ADDRSTRLEN.int + else: 46 # it's actually 46 in both cases + result = newString(addrLen) + let addr6 = addr cast[ptr Sockaddr_in6](sockAddr).sin6_addr + when not useWinVersion: + if posix.inet_ntop(posix.AF_INET6, addr6, cast[cstring](addr result[0]), + result.len.int32) == nil: + raiseOSError(osLastError()) + if posix.IN6_IS_ADDR_V4MAPPED(addr6) != 0: + result.setSlice("::ffff:".len..<addrLen) + else: + if winlean.inet_ntop(winlean.AF_INET6, addr6, cast[cstring](addr result[0]), + result.len.int32) == nil: + raiseOSError(osLastError()) + setLen(result, len(cstring(result))) + else: + when defined(posix) and not defined(nimdoc): + if sockAddr.sa_family.cint == nativeAfUnix: + return "unix" + raise newException(IOError, "Unknown socket family in getAddrString") + + proc getAddrString*(sockAddr: ptr SockAddr, strAddress: var string) = + ## Stores in `strAddress` the string representation of the address inside + ## `sockAddr` + ## + ## **Note** + ## * `strAddress` must be initialized to 46 in length. + const length = 46 + assert(length == len(strAddress), + "`strAddress` was not initialized correctly. 46 != `len(strAddress)`") + if sockAddr.sa_family.cint == nativeAfInet: + let addr4 = addr cast[ptr Sockaddr_in](sockAddr).sin_addr + when not useWinVersion: + if posix.inet_ntop(posix.AF_INET, addr4, cast[cstring](addr strAddress[0]), + strAddress.len.int32) == nil: + raiseOSError(osLastError()) + else: + if winlean.inet_ntop(winlean.AF_INET, addr4, cast[cstring](addr strAddress[0]), + strAddress.len.int32) == nil: + raiseOSError(osLastError()) + elif sockAddr.sa_family.cint == nativeAfInet6: + let addr6 = addr cast[ptr Sockaddr_in6](sockAddr).sin6_addr + when not useWinVersion: + if posix.inet_ntop(posix.AF_INET6, addr6, cast[cstring](addr strAddress[0]), + strAddress.len.int32) == nil: + raiseOSError(osLastError()) + if posix.IN6_IS_ADDR_V4MAPPED(addr6) != 0: + strAddress.setSlice("::ffff:".len..<length) + else: + if winlean.inet_ntop(winlean.AF_INET6, addr6, cast[cstring](addr strAddress[0]), + strAddress.len.int32) == nil: + raiseOSError(osLastError()) + else: + raise newException(IOError, "Unknown socket family in getAddrString") + setLen(strAddress, len(cstring(strAddress))) + + when defined(posix) and not defined(nimdoc): + proc makeUnixAddr*(path: string): Sockaddr_un = + result.sun_family = AF_UNIX.TSa_Family + if path.len >= Sockaddr_un_path_length: + raise newException(ValueError, "socket path too long") + copyMem(addr result.sun_path, path.cstring, path.len + 1) + + proc getSockName*(socket: SocketHandle): Port = + ## Returns the socket's associated port number. + var name: Sockaddr_in + when useWinVersion: + name.sin_family = uint16(ord(AF_INET)) + else: + name.sin_family = TSa_Family(posix.AF_INET) + #name.sin_port = htons(cint16(port)) + #name.sin_addr.s_addr = htonl(INADDR_ANY) + var namelen = sizeof(name).SockLen + if getsockname(socket, cast[ptr SockAddr](addr(name)), + addr(namelen)) == -1'i32: + raiseOSError(osLastError()) + result = Port(nativesockets.ntohs(name.sin_port)) + + proc getLocalAddr*(socket: SocketHandle, domain: Domain): (string, Port) = + ## Returns the socket's local address and port number. + ## + ## Similar to POSIX's `getsockname`:idx:. + case domain + of AF_INET: + var name: Sockaddr_in + when useWinVersion: + name.sin_family = uint16(ord(AF_INET)) + else: + name.sin_family = TSa_Family(posix.AF_INET) + var namelen = sizeof(name).SockLen + if getsockname(socket, cast[ptr SockAddr](addr(name)), + addr(namelen)) == -1'i32: + raiseOSError(osLastError()) + result = ($inet_ntoa(name.sin_addr), + Port(nativesockets.ntohs(name.sin_port))) + of AF_INET6: + var name: Sockaddr_in6 + when useWinVersion: + name.sin6_family = uint16(ord(AF_INET6)) + else: + name.sin6_family = TSa_Family(posix.AF_INET6) + var namelen = sizeof(name).SockLen + if getsockname(socket, cast[ptr SockAddr](addr(name)), + addr(namelen)) == -1'i32: + raiseOSError(osLastError()) + # Cannot use INET6_ADDRSTRLEN here, because it's a C define. + result[0] = newString(64) + if inet_ntop(name.sin6_family.cint, + addr name.sin6_addr, cast[cstring](addr result[0][0]), (result[0].len+1).int32).isNil: + raiseOSError(osLastError()) + setLen(result[0], result[0].cstring.len) + result[1] = Port(nativesockets.ntohs(name.sin6_port)) + else: + raiseOSError(OSErrorCode(-1), "invalid socket family in getLocalAddr") + + proc getPeerAddr*(socket: SocketHandle, domain: Domain): (string, Port) = + ## Returns the socket's peer address and port number. + ## + ## Similar to POSIX's `getpeername`:idx: + case domain + of AF_INET: + var name: Sockaddr_in + when useWinVersion: + name.sin_family = uint16(ord(AF_INET)) + else: + name.sin_family = TSa_Family(posix.AF_INET) + var namelen = sizeof(name).SockLen + if getpeername(socket, cast[ptr SockAddr](addr(name)), + addr(namelen)) == -1'i32: + raiseOSError(osLastError()) + result = ($inet_ntoa(name.sin_addr), + Port(nativesockets.ntohs(name.sin_port))) + of AF_INET6: + var name: Sockaddr_in6 + when useWinVersion: + name.sin6_family = uint16(ord(AF_INET6)) + else: + name.sin6_family = TSa_Family(posix.AF_INET6) + var namelen = sizeof(name).SockLen + if getpeername(socket, cast[ptr SockAddr](addr(name)), + addr(namelen)) == -1'i32: + raiseOSError(osLastError()) + # Cannot use INET6_ADDRSTRLEN here, because it's a C define. + result[0] = newString(64) + if inet_ntop(name.sin6_family.cint, + addr name.sin6_addr, cast[cstring](addr result[0][0]), (result[0].len+1).int32).isNil: + raiseOSError(osLastError()) + setLen(result[0], result[0].cstring.len) + result[1] = Port(nativesockets.ntohs(name.sin6_port)) + else: + raiseOSError(OSErrorCode(-1), "invalid socket family in getLocalAddr") + +when useNimNetLite: + + when useWinVersion: + const + INET_ADDRSTRLEN = 16 + INET6_ADDRSTRLEN = 46 # it's actually 46 in both cases + + proc sockAddrToStr(sa: ptr SockAddr): string {.noinit.} = + let af_family = sa.sa_family + var nl, v4Slice: cint + var si_addr: ptr InAddr + + if af_family == AF_INET.TSa_Family: + nl = INET_ADDRSTRLEN + si_addr = cast[ptr Sockaddr_in](sa).sin_addr.addr() + elif af_family == AF_INET6.TSa_Family: + nl = INET6_ADDRSTRLEN + let si6_addr = cast[ptr Sockaddr_in6](sa).sin6_addr.addr() + si_addr = cast[ptr InAddr](si6_addr) # let's us reuse logic below + when defined(posix) and not defined(nimdoc) and not defined(zephyr): + if posix.IN6_IS_ADDR_V4MAPPED(si6_addr) != 0: + v4Slice = "::ffff:".len() + else: + when defined(posix) and not defined(nimdoc): + if af_family.cint == nativeAfUnix: + return "unix" + return "" + + result = newString(nl) + let namePtr = result.cstring() + if namePtr == inet_ntop(af_family.cint, si_addr, namePtr, nl): + result.setLen(len(namePtr)) + if v4Slice > 0: result.setSlice(v4Slice.int ..< nl.int) + else: + return "" + + proc sockAddrToStr(sa: var Sockaddr_in | var Sockaddr_in6): string = + result = sockAddrToStr(cast[ptr SockAddr](unsafeAddr(sa))) + + proc getAddrString*(sockAddr: ptr SockAddr): string = + result = sockAddrToStr(sockAddr) + if result.len() == 0: + raiseOSError(osLastError()) + + proc getAddrString*(sockAddr: ptr SockAddr, strAddress: var string) {.noinit.} = + strAddress = getAddrString(sockAddr) + + proc getLocalAddr*(socket: SocketHandle, domain: Domain): (string, Port) = + ## Returns the socket's local address and port number. + ## + ## Similar to POSIX's `getsockname`:idx:. + template sockGetNameOrRaiseError(socket: untyped, name: untyped) = + var namelen = sizeof(socket).SockLen + if getsockname(socket, cast[ptr SockAddr](addr(name)), + addr(namelen)) == -1'i32: + raiseOSError(osLastError()) + + case domain + of AF_INET: + var name = Sockaddr_in(sin_family: TSa_Family(posix.AF_INET)) + sockGetNameOrRaiseError(socket, name) + result = (sockAddrToStr(name), + Port(nativesockets.ntohs(name.sin_port))) + of AF_INET6: + var name = Sockaddr_in6(sin6_family: TSa_Family(posix.AF_INET6)) + sockGetNameOrRaiseError(socket, name) + result = (sockAddrToStr(name), + Port(nativesockets.ntohs(name.sin6_port))) + else: + raiseOSError(OSErrorCode(-1), "invalid socket family in getLocalAddr") + + +proc getSockOptInt*(socket: SocketHandle, level, optname: int): int {. + tags: [ReadIOEffect].} = + ## getsockopt for integer options. + var res: cint + var size = sizeof(res).SockLen + if getsockopt(socket, cint(level), cint(optname), + addr(res), addr(size)) < 0'i32: + raiseOSError(osLastError()) + result = int(res) + +proc setSockOptInt*(socket: SocketHandle, level, optname, optval: int) {. + tags: [WriteIOEffect].} = + ## setsockopt for integer options. + var value = cint(optval) + if setsockopt(socket, cint(level), cint(optname), addr(value), + sizeof(value).SockLen) < 0'i32: + raiseOSError(osLastError()) + +proc setBlocking*(s: SocketHandle, blocking: bool) = + ## Sets blocking mode on socket. + ## + ## Raises OSError on error. + when useWinVersion: + var mode = clong(ord(not blocking)) # 1 for non-blocking, 0 for blocking + if ioctlsocket(s, FIONBIO, addr(mode)) == -1: + raiseOSError(osLastError()) + else: # BSD sockets + var x: int = fcntl(s, F_GETFL, 0) + if x == -1: + raiseOSError(osLastError()) + else: + var mode = if blocking: x and not O_NONBLOCK else: x or O_NONBLOCK + if fcntl(s, F_SETFL, mode) == -1: + raiseOSError(osLastError()) + +proc timeValFromMilliseconds(timeout = 500): Timeval = + if timeout != -1: + var seconds = timeout div 1000 + when useWinVersion: + result.tv_sec = seconds.int32 + result.tv_usec = ((timeout - seconds * 1000) * 1000).int32 + else: + result.tv_sec = seconds.Time + result.tv_usec = ((timeout - seconds * 1000) * 1000).Suseconds + +proc createFdSet(fd: var TFdSet, s: seq[SocketHandle], m: var int) = + FD_ZERO(fd) + for i in items(s): + m = max(m, int(i)) + FD_SET(i, fd) + +proc pruneSocketSet(s: var seq[SocketHandle], fd: var TFdSet) = + var i = 0 + var L = s.len + while i < L: + if FD_ISSET(s[i], fd) == 0'i32: + s[i] = s[L-1] + dec(L) + else: + inc(i) + setLen(s, L) + +proc selectRead*(readfds: var seq[SocketHandle], timeout = 500): int = + ## When a socket in `readfds` is ready to be read from then a non-zero + ## value will be returned specifying the count of the sockets which can be + ## read from. The sockets which cannot be read from will also be removed + ## from `readfds`. + ## + ## `timeout` is specified in milliseconds and `-1` can be specified for + ## an unlimited time. + var tv {.noinit.}: Timeval = timeValFromMilliseconds(timeout) + + var rd: TFdSet + var m = 0 + createFdSet((rd), readfds, m) + + if timeout != -1: + result = int(select(cint(m+1), addr(rd), nil, nil, addr(tv))) + else: + result = int(select(cint(m+1), addr(rd), nil, nil, nil)) + + pruneSocketSet(readfds, (rd)) + +proc selectWrite*(writefds: var seq[SocketHandle], + timeout = 500): int {.tags: [ReadIOEffect].} = + ## When a socket in `writefds` is ready to be written to then a non-zero + ## value will be returned specifying the count of the sockets which can be + ## written to. The sockets which cannot be written to will also be removed + ## from `writefds`. + ## + ## `timeout` is specified in milliseconds and `-1` can be specified for + ## an unlimited time. + var tv {.noinit.}: Timeval = timeValFromMilliseconds(timeout) + + var wr: TFdSet + var m = 0 + createFdSet((wr), writefds, m) + + if timeout != -1: + result = int(select(cint(m+1), nil, addr(wr), nil, addr(tv))) + else: + result = int(select(cint(m+1), nil, addr(wr), nil, nil)) + + pruneSocketSet(writefds, (wr)) + +proc accept*(fd: SocketHandle, inheritable = defined(nimInheritHandles)): (SocketHandle, string) = + ## Accepts a new client connection. + ## + ## `inheritable` decides if the resulting SocketHandle can be inherited by + ## child processes. + ## + ## Returns (osInvalidSocket, "") if an error occurred. + var sockAddress: SockAddr + var addrLen = sizeof(sockAddress).SockLen + var sock = + when (defined(linux) or defined(bsd)) and not defined(nimdoc): + accept4(fd, addr(sockAddress), addr(addrLen), + if inheritable: 0 else: SOCK_CLOEXEC) + else: + accept(fd, addr(sockAddress), addr(addrLen)) + when declared(setInheritable) and not (defined(linux) or defined(bsd)): + if not setInheritable(sock, inheritable): + close sock + sock = osInvalidSocket + if sock == osInvalidSocket: + return (osInvalidSocket, "") + else: + when useNimNetLite: + var name = sockAddrToStr(addr sockAddress) + return (sock, name) + else: + return (sock, $inet_ntoa(cast[Sockaddr_in](sockAddress).sin_addr)) + +when defined(windows): + var wsa: WSAData + if wsaStartup(0x0101'i16, addr wsa) != 0: raiseOSError(osLastError()) diff --git a/lib/pure/net.nim b/lib/pure/net.nim index ffbc6e320..24c94b651 100644 --- a/lib/pure/net.nim +++ b/lib/pure/net.nim @@ -8,107 +8,250 @@ # ## This module implements a high-level cross-platform sockets interface. +## The procedures implemented in this module are primarily for blocking sockets. +## For asynchronous non-blocking sockets use the `asyncnet` module together +## with the `asyncdispatch` module. +## +## The first thing you will always need to do in order to start using sockets, +## is to create a new instance of the `Socket` type using the `newSocket` +## procedure. +## +## SSL +## ==== +## +## In order to use the SSL procedures defined in this module, you will need to +## compile your application with the `-d:ssl` flag. See the +## `newContext<net.html#newContext%2Cstring%2Cstring%2Cstring%2Cstring>`_ +## procedure for additional details. +## +## +## SSL on Windows +## ============== +## +## On Windows the SSL library checks for valid certificates. +## It uses the `cacert.pem` file for this purpose which was extracted +## from `https://curl.se/ca/cacert.pem`. Besides +## the OpenSSL DLLs (e.g. libssl-1_1-x64.dll, libcrypto-1_1-x64.dll) you +## also need to ship `cacert.pem` with your `.exe` file. +## +## +## Examples +## ======== +## +## Connecting to a server +## ---------------------- +## +## After you create a socket with the `newSocket` procedure, you can easily +## connect it to a server running at a known hostname (or IP address) and port. +## To do so over TCP, use the example below. + +runnableExamples("-r:off"): + let socket = newSocket() + socket.connect("google.com", Port(80)) + +## For SSL, use the following example: + +runnableExamples("-r:off -d:ssl"): + let socket = newSocket() + let ctx = newContext() + wrapSocket(ctx, socket) + socket.connect("google.com", Port(443)) + +## UDP is a connectionless protocol, so UDP sockets don't have to explicitly +## call the `connect <net.html#connect%2CSocket%2Cstring>`_ procedure. They can +## simply start sending data immediately. + +runnableExamples("-r:off"): + let socket = newSocket(AF_INET, SOCK_DGRAM, IPPROTO_UDP) + socket.sendTo("192.168.0.1", Port(27960), "status\n") + +runnableExamples("-r:off"): + let socket = newSocket(AF_INET, SOCK_DGRAM, IPPROTO_UDP) + let ip = parseIpAddress("192.168.0.1") + doAssert socket.sendTo(ip, Port(27960), "status\c\l") == 8 + +## Creating a server +## ----------------- +## +## After you create a socket with the `newSocket` procedure, you can create a +## TCP server by calling the `bindAddr` and `listen` procedures. + +runnableExamples("-r:off"): + let socket = newSocket() + socket.bindAddr(Port(1234)) + socket.listen() + + # You can then begin accepting connections using the `accept` procedure. + var client: Socket + var address = "" + while true: + socket.acceptAddr(client, address) + echo "Client connected from: ", address + +import std/private/since + +when defined(nimPreviewSlimSystem): + import std/assertions -{.deadCodeElim: on.} -import rawsockets, os, strutils, unsigned, parseutils, times -export Port, `$`, `==` +import std/nativesockets +import std/[os, strutils, times, sets, options, monotimes] +import std/ssl_config +export nativesockets.Port, nativesockets.`$`, nativesockets.`==` +export Domain, SockType, Protocol, IPPROTO_NONE -const useWinVersion = defined(Windows) or defined(nimdoc) +const useWinVersion = defined(windows) or defined(nimdoc) +const useNimNetLite = defined(nimNetLite) or defined(freertos) or defined(zephyr) or + defined(nuttx) +const defineSsl = defined(ssl) or defined(nimdoc) -when defined(ssl): - import openssl +when useWinVersion: + from std/winlean import WSAESHUTDOWN + +when defineSsl: + import std/openssl + when not defined(nimDisableCertificateValidation): + from std/ssl_certs import scanSSLCertificates # Note: The enumerations are mapped to Window's constants. -when defined(ssl): +when defineSsl: type - SslError* = object of Exception + Certificate* = string ## DER encoded certificate + + SslError* = object of CatchableError SslCVerifyMode* = enum - CVerifyNone, CVerifyPeer - + CVerifyNone, CVerifyPeer, CVerifyPeerUseEnvVars + SslProtVersion* = enum protSSLv2, protSSLv3, protTLSv1, protSSLv23 - - SslContext* = distinct SslCtx + + SslContext* = ref object + context*: SslCtx + referencedData: HashSet[int] + extraInternal: SslContextExtraInternal SslAcceptResult* = enum AcceptNoClient = 0, AcceptNoHandshake, AcceptSuccess - {.deprecated: [ESSL: SSLError, TSSLCVerifyMode: SSLCVerifyMode, - TSSLProtVersion: SSLProtVersion, PSSLContext: SSLContext, - TSSLAcceptResult: SSLAcceptResult].} + SslHandshakeType* = enum + handshakeAsClient, handshakeAsServer + + SslClientGetPskFunc* = proc(hint: string): tuple[identity: string, psk: string] + + SslServerGetPskFunc* = proc(identity: string): string + + SslContextExtraInternal = ref object of RootRef + serverGetPskFunc: SslServerGetPskFunc + clientGetPskFunc: SslClientGetPskFunc + +else: + type + SslContext* = ref object # TODO: Workaround #4797. const BufferSize*: int = 4000 ## size of a buffered socket's buffer + MaxLineLength* = 1_000_000 type - SocketImpl* = object ## socket type + SocketImpl* = object ## socket type fd: SocketHandle - case isBuffered: bool # determines whether this socket is buffered. - of true: - buffer: array[0..BufferSize, char] - currPos: int # current index in buffer - bufLen: int # current length of buffer - of false: nil - when defined(ssl): - case isSsl: bool - of true: - sslHandle: SSLPtr - sslContext: SSLContext - sslNoHandshake: bool # True if needs handshake. - sslHasPeekChar: bool - sslPeekChar: char - of false: nil + isBuffered: bool # determines whether this socket is buffered. + buffer: array[0..BufferSize, char] + currPos: int # current index in buffer + bufLen: int # current length of buffer + when defineSsl: + isSsl: bool + sslHandle: SslPtr + sslContext: SslContext + sslNoHandshake: bool # True if needs handshake. + sslHasPeekChar: bool + sslPeekChar: char + sslNoShutdown: bool # True if shutdown shouldn't be done. lastError: OSErrorCode ## stores the last error on this socket + domain: Domain + sockType: SockType + protocol: Protocol Socket* = ref SocketImpl SOBool* = enum ## Boolean socket options. OptAcceptConn, OptBroadcast, OptDebug, OptDontRoute, OptKeepAlive, - OptOOBInline, OptReuseAddr + OptOOBInline, OptReuseAddr, OptReusePort, OptNoDelay ReadLineResult* = enum ## result for readLineAsync ReadFullLine, ReadPartialLine, ReadDisconnected, ReadNone - TimeoutError* = object of Exception + TimeoutError* = object of CatchableError SocketFlag* {.pure.} = enum Peek, SafeDisconn ## Ensures disconnection exceptions (ECONNRESET, EPIPE etc) are not thrown. -{.deprecated: [TSocketFlags: SocketFlag, ETimeout: TimeoutError, - TReadLineResult: ReadLineResult, TSOBool: SOBool, PSocket: Socket, - TSocketImpl: SocketImpl].} +when defined(nimHasStyleChecks): + {.push styleChecks: off.} type IpAddressFamily* {.pure.} = enum ## Describes the type of an IP address - IPv6, ## IPv6 address - IPv4 ## IPv4 address + IPv6, ## IPv6 address + IPv4 ## IPv4 address - TIpAddress* = object ## stores an arbitrary IP address - case family*: IpAddressFamily ## the type of the IP address (IPv4 or IPv6) + IpAddress* = object ## stores an arbitrary IP address + case family*: IpAddressFamily ## the type of the IP address (IPv4 or IPv6) of IpAddressFamily.IPv6: address_v6*: array[0..15, uint8] ## Contains the IP address in bytes in ## case of IPv6 of IpAddressFamily.IPv4: - address_v4*: array[0..3, uint8] ## Contains the IP address in bytes in - ## case of IPv4 + address_v4*: array[0..3, uint8] ## Contains the IP address in bytes in + ## case of IPv4 +when defined(nimHasStyleChecks): + {.pop.} + + +when defined(posix) and not defined(lwip): + from std/posix import TPollfd, POLLIN, POLLPRI, POLLOUT, POLLWRBAND, Tnfds -proc isIpAddress*(address_str: string): bool {.tags: [].} -proc parseIpAddress*(address_str: string): TIpAddress + template monitorPollEvent(x: var SocketHandle, y: cint, timeout: int): int = + var tpollfd: TPollfd + tpollfd.fd = cast[cint](x) + tpollfd.events = y + posix.poll(addr(tpollfd), Tnfds(1), timeout) + +proc timeoutRead(fd: var SocketHandle, timeout = 500): int = + when defined(windows) or defined(lwip): + var fds = @[fd] + selectRead(fds, timeout) + else: + monitorPollEvent(fd, POLLIN or POLLPRI, timeout) + +proc timeoutWrite(fd: var SocketHandle, timeout = 500): int = + when defined(windows) or defined(lwip): + var fds = @[fd] + selectWrite(fds, timeout) + else: + monitorPollEvent(fd, POLLOUT or POLLWRBAND, timeout) + +proc socketError*(socket: Socket, err: int = -1, async = false, + lastError = (-1).OSErrorCode, + flags: set[SocketFlag] = {}) {.gcsafe.} proc isDisconnectionError*(flags: set[SocketFlag], lastError: OSErrorCode): bool = - ## Determines whether ``lastError`` is a disconnection error. Only does this - ## if flags contains ``SafeDisconn``. + ## Determines whether `lastError` is a disconnection error. Only does this + ## if flags contains `SafeDisconn`. when useWinVersion: SocketFlag.SafeDisconn in flags and - lastError.int32 in {WSAECONNRESET, WSAECONNABORTED, WSAENETRESET, - WSAEDISCON, ERROR_NETNAME_DELETED} + (lastError.int32 == WSAECONNRESET or + lastError.int32 == WSAECONNABORTED or + lastError.int32 == WSAENETRESET or + lastError.int32 == WSAEDISCON or + lastError.int32 == WSAESHUTDOWN or + lastError.int32 == ERROR_NETNAME_DELETED) else: SocketFlag.SafeDisconn in flags and - lastError.int32 in {ECONNRESET, EPIPE, ENETRESET} + (lastError.int32 == ECONNRESET or + lastError.int32 == EPIPE or + lastError.int32 == ENETRESET) proc toOSFlags*(socketFlags: set[SocketFlag]): cint = ## Converts the flags into the underlying OS representation. @@ -118,159 +261,702 @@ proc toOSFlags*(socketFlags: set[SocketFlag]): cint = result = result or MSG_PEEK of SocketFlag.SafeDisconn: continue -proc newSocket(fd: SocketHandle, isBuff: bool): Socket = +proc newSocket*(fd: SocketHandle, domain: Domain = AF_INET, + sockType: SockType = SOCK_STREAM, + protocol: Protocol = IPPROTO_TCP, buffered = true): owned(Socket) = ## Creates a new socket as specified by the params. assert fd != osInvalidSocket - new(result) - result.fd = fd - result.isBuffered = isBuff - if isBuff: + result = Socket( + fd: fd, + isBuffered: buffered, + domain: domain, + sockType: sockType, + protocol: protocol) + if buffered: result.currPos = 0 -proc newSocket*(domain, typ, protocol: cint, buffered = true): Socket = + # Set SO_NOSIGPIPE on OS X. + when defined(macosx) and not defined(nimdoc): + setSockOptInt(fd, SOL_SOCKET, SO_NOSIGPIPE, 1) + +proc newSocket*(domain, sockType, protocol: cint, buffered = true, + inheritable = defined(nimInheritHandles)): owned(Socket) = ## Creates a new socket. ## - ## If an error occurs EOS will be raised. - let fd = newRawSocket(domain, typ, protocol) + ## The SocketHandle associated with the resulting Socket will not be + ## inheritable by child processes by default. This can be changed via + ## the `inheritable` parameter. + ## + ## If an error occurs OSError will be raised. + let fd = createNativeSocket(domain, sockType, protocol, inheritable) if fd == osInvalidSocket: raiseOSError(osLastError()) - result = newSocket(fd, buffered) + result = newSocket(fd, domain.Domain, sockType.SockType, protocol.Protocol, + buffered) -proc newSocket*(domain: Domain = AF_INET, typ: SockType = SOCK_STREAM, - protocol: Protocol = IPPROTO_TCP, buffered = true): Socket = +proc newSocket*(domain: Domain = AF_INET, sockType: SockType = SOCK_STREAM, + protocol: Protocol = IPPROTO_TCP, buffered = true, + inheritable = defined(nimInheritHandles)): owned(Socket) = ## Creates a new socket. ## - ## If an error occurs EOS will be raised. - let fd = newRawSocket(domain, typ, protocol) + ## The SocketHandle associated with the resulting Socket will not be + ## inheritable by child processes by default. This can be changed via + ## the `inheritable` parameter. + ## + ## If an error occurs OSError will be raised. + let fd = createNativeSocket(domain, sockType, protocol, inheritable) if fd == osInvalidSocket: raiseOSError(osLastError()) - result = newSocket(fd, buffered) + result = newSocket(fd, domain, sockType, protocol, buffered) + +proc parseIPv4Address(addressStr: string): IpAddress = + ## Parses IPv4 addresses + ## Raises ValueError on errors + var + byteCount = 0 + currentByte: uint16 = 0 + separatorValid = false + leadingZero = false + + result = IpAddress(family: IpAddressFamily.IPv4) + + for i in 0 .. high(addressStr): + if addressStr[i] in strutils.Digits: # Character is a number + if leadingZero: + raise newException(ValueError, + "Invalid IP address. Octal numbers are not allowed") + currentByte = currentByte * 10 + + cast[uint16](ord(addressStr[i]) - ord('0')) + if currentByte == 0'u16: + leadingZero = true + elif currentByte > 255'u16: + raise newException(ValueError, + "Invalid IP Address. Value is out of range") + separatorValid = true + elif addressStr[i] == '.': # IPv4 address separator + if not separatorValid or byteCount >= 3: + raise newException(ValueError, + "Invalid IP Address. The address consists of too many groups") + result.address_v4[byteCount] = cast[uint8](currentByte) + currentByte = 0 + byteCount.inc + separatorValid = false + leadingZero = false + else: + raise newException(ValueError, + "Invalid IP Address. Address contains an invalid character") + + if byteCount != 3 or not separatorValid: + raise newException(ValueError, "Invalid IP Address") + result.address_v4[byteCount] = cast[uint8](currentByte) + +proc parseIPv6Address(addressStr: string): IpAddress = + ## Parses IPv6 addresses + ## Raises ValueError on errors + result = IpAddress(family: IpAddressFamily.IPv6) + if addressStr.len < 2: + raise newException(ValueError, "Invalid IP Address") + + var + groupCount = 0 + currentGroupStart = 0 + currentShort: uint32 = 0 + separatorValid = true + dualColonGroup = -1 + lastWasColon = false + v4StartPos = -1 + byteCount = 0 + + for i, c in addressStr: + if c == ':': + if not separatorValid: + raise newException(ValueError, + "Invalid IP Address. Address contains an invalid separator") + if lastWasColon: + if dualColonGroup != -1: + raise newException(ValueError, + "Invalid IP Address. Address contains more than one \"::\" separator") + dualColonGroup = groupCount + separatorValid = false + elif i != 0 and i != high(addressStr): + if groupCount >= 8: + raise newException(ValueError, + "Invalid IP Address. The address consists of too many groups") + result.address_v6[groupCount*2] = cast[uint8](currentShort shr 8) + result.address_v6[groupCount*2+1] = cast[uint8](currentShort and 0xFF) + currentShort = 0 + groupCount.inc() + if dualColonGroup != -1: separatorValid = false + elif i == 0: # only valid if address starts with :: + if addressStr[1] != ':': + raise newException(ValueError, + "Invalid IP Address. Address may not start with \":\"") + else: # i == high(addressStr) - only valid if address ends with :: + if addressStr[high(addressStr)-1] != ':': + raise newException(ValueError, + "Invalid IP Address. Address may not end with \":\"") + lastWasColon = true + currentGroupStart = i + 1 + elif c == '.': # Switch to parse IPv4 mode + if i < 3 or not separatorValid or groupCount >= 7: + raise newException(ValueError, "Invalid IP Address") + v4StartPos = currentGroupStart + currentShort = 0 + separatorValid = false + break + elif c in strutils.HexDigits: + if c in strutils.Digits: # Normal digit + currentShort = (currentShort shl 4) + cast[uint32](ord(c) - ord('0')) + elif c >= 'a' and c <= 'f': # Lower case hex + currentShort = (currentShort shl 4) + cast[uint32](ord(c) - ord('a')) + 10 + else: # Upper case hex + currentShort = (currentShort shl 4) + cast[uint32](ord(c) - ord('A')) + 10 + if currentShort > 65535'u32: + raise newException(ValueError, + "Invalid IP Address. Value is out of range") + lastWasColon = false + separatorValid = true + else: + raise newException(ValueError, + "Invalid IP Address. Address contains an invalid character") + + + if v4StartPos == -1: # Don't parse v4. Copy the remaining v6 stuff + if separatorValid: # Copy remaining data + if groupCount >= 8: + raise newException(ValueError, + "Invalid IP Address. The address consists of too many groups") + result.address_v6[groupCount*2] = cast[uint8](currentShort shr 8) + result.address_v6[groupCount*2+1] = cast[uint8](currentShort and 0xFF) + groupCount.inc() + else: # Must parse IPv4 address + var leadingZero = false + for i, c in addressStr[v4StartPos..high(addressStr)]: + if c in strutils.Digits: # Character is a number + if leadingZero: + raise newException(ValueError, + "Invalid IP address. Octal numbers not allowed") + currentShort = currentShort * 10 + cast[uint32](ord(c) - ord('0')) + if currentShort == 0'u32: + leadingZero = true + elif currentShort > 255'u32: + raise newException(ValueError, + "Invalid IP Address. Value is out of range") + separatorValid = true + elif c == '.': # IPv4 address separator + if not separatorValid or byteCount >= 3: + raise newException(ValueError, "Invalid IP Address") + result.address_v6[groupCount*2 + byteCount] = cast[uint8](currentShort) + currentShort = 0 + byteCount.inc() + separatorValid = false + leadingZero = false + else: # Invalid character + raise newException(ValueError, + "Invalid IP Address. Address contains an invalid character") + + if byteCount != 3 or not separatorValid: + raise newException(ValueError, "Invalid IP Address") + result.address_v6[groupCount*2 + byteCount] = cast[uint8](currentShort) + groupCount += 2 + + # Shift and fill zeros in case of :: + if groupCount > 8: + raise newException(ValueError, + "Invalid IP Address. The address consists of too many groups") + elif groupCount < 8: # must fill + if dualColonGroup == -1: + raise newException(ValueError, + "Invalid IP Address. The address consists of too few groups") + var toFill = 8 - groupCount # The number of groups to fill + var toShift = groupCount - dualColonGroup # Nr of known groups after :: + for i in 0..2*toShift-1: # shift + result.address_v6[15-i] = result.address_v6[groupCount*2-i-1] + for i in 0..2*toFill-1: # fill with 0s + result.address_v6[dualColonGroup*2+i] = 0 + elif dualColonGroup != -1: + raise newException(ValueError, + "Invalid IP Address. The address consists of too many groups") + +proc parseIpAddress*(addressStr: string): IpAddress = + ## Parses an IP address + ## + ## Raises ValueError on error. + ## + ## For IPv4 addresses, only the strict form as + ## defined in RFC 6943 is considered valid, see + ## https://datatracker.ietf.org/doc/html/rfc6943#section-3.1.1. + if addressStr.len == 0: + raise newException(ValueError, "IP Address string is empty") + if addressStr.contains(':'): + return parseIPv6Address(addressStr) + else: + return parseIPv4Address(addressStr) -when defined(ssl): - CRYPTO_malloc_init() - SslLibraryInit() - SslLoadErrorStrings() - ErrLoadBioStrings() - OpenSSL_add_all_algorithms() +proc isIpAddress*(addressStr: string): bool {.tags: [].} = + ## Checks if a string is an IP address + ## Returns true if it is, false otherwise + try: + discard parseIpAddress(addressStr) + except ValueError: + return false + return true - proc raiseSSLError*(s = "") = +proc toSockAddr*(address: IpAddress, port: Port, sa: var Sockaddr_storage, + sl: var SockLen) = + ## Converts `IpAddress` and `Port` to `SockAddr` and `SockLen` + let port = htons(uint16(port)) + case address.family + of IpAddressFamily.IPv4: + sl = sizeof(Sockaddr_in).SockLen + let s = cast[ptr Sockaddr_in](addr sa) + s.sin_family = typeof(s.sin_family)(toInt(AF_INET)) + s.sin_port = port + copyMem(addr s.sin_addr, unsafeAddr address.address_v4[0], + sizeof(s.sin_addr)) + of IpAddressFamily.IPv6: + sl = sizeof(Sockaddr_in6).SockLen + let s = cast[ptr Sockaddr_in6](addr sa) + s.sin6_family = typeof(s.sin6_family)(toInt(AF_INET6)) + s.sin6_port = port + copyMem(addr s.sin6_addr, unsafeAddr address.address_v6[0], + sizeof(s.sin6_addr)) + +proc fromSockAddrAux(sa: ptr Sockaddr_storage, sl: SockLen, + address: var IpAddress, port: var Port) = + if sa.ss_family.cint == toInt(AF_INET) and sl == sizeof(Sockaddr_in).SockLen: + address = IpAddress(family: IpAddressFamily.IPv4) + let s = cast[ptr Sockaddr_in](sa) + copyMem(addr address.address_v4[0], addr s.sin_addr, + sizeof(address.address_v4)) + port = ntohs(s.sin_port).Port + elif sa.ss_family.cint == toInt(AF_INET6) and + sl == sizeof(Sockaddr_in6).SockLen: + address = IpAddress(family: IpAddressFamily.IPv6) + let s = cast[ptr Sockaddr_in6](sa) + copyMem(addr address.address_v6[0], addr s.sin6_addr, + sizeof(address.address_v6)) + port = ntohs(s.sin6_port).Port + else: + raise newException(ValueError, "Neither IPv4 nor IPv6") + +proc fromSockAddr*(sa: Sockaddr_storage | SockAddr | Sockaddr_in | Sockaddr_in6, + sl: SockLen, address: var IpAddress, port: var Port) {.inline.} = + ## Converts `SockAddr` and `SockLen` to `IpAddress` and `Port`. Raises + ## `ObjectConversionDefect` in case of invalid `sa` and `sl` arguments. + fromSockAddrAux(cast[ptr Sockaddr_storage](unsafeAddr sa), sl, address, port) + +when defineSsl: + # OpenSSL >= 1.1.0 does not need explicit init. + when not useOpenssl3: + CRYPTO_malloc_init() + doAssert SslLibraryInit() == 1 + SSL_load_error_strings() + ERR_load_BIO_strings() + OpenSSL_add_all_algorithms() + + proc sslHandle*(self: Socket): SslPtr = + ## Retrieve the ssl pointer of `socket`. + ## Useful for interfacing with `openssl`. + self.sslHandle + + proc raiseSSLError*(s = "") {.raises: [SslError].}= ## Raises a new SSL error. if s != "": - raise newException(SSLError, s) - let err = ErrPeekLastError() + raise newException(SslError, s) + let err = ERR_peek_last_error() if err == 0: - raise newException(SSLError, "No error reported.") - if err == -1: - raiseOSError(osLastError()) - var errStr = ErrErrorString(err, nil) - raise newException(SSLError, $errStr) + raise newException(SslError, "No error reported.") + var errStr = $ERR_error_string(err, nil) + case err + of 336032814, 336032784: + errStr = "Please upgrade your OpenSSL library, it does not support the " & + "necessary protocols. OpenSSL error is: " & errStr + else: + discard + raise newException(SslError, errStr) + + proc getExtraData*(ctx: SslContext, index: int): RootRef = + ## Retrieves arbitrary data stored inside SslContext. + if index notin ctx.referencedData: + raise newException(IndexDefect, "No data with that index.") + let res = ctx.context.SSL_CTX_get_ex_data(index.cint) + if cast[int](res) == 0: + raiseSSLError() + return cast[RootRef](res) + + proc setExtraData*(ctx: SslContext, index: int, data: RootRef) = + ## Stores arbitrary data inside SslContext. The unique `index` + ## should be retrieved using getSslContextExtraDataIndex. + if index in ctx.referencedData: + GC_unref(getExtraData(ctx, index)) + + if ctx.context.SSL_CTX_set_ex_data(index.cint, cast[pointer](data)) == -1: + raiseSSLError() + + if index notin ctx.referencedData: + ctx.referencedData.incl(index) + GC_ref(data) # http://simplestcodings.blogspot.co.uk/2010/08/secure-server-client-using-openssl-in-c.html - proc loadCertificates(ctx: SSL_CTX, certFile, keyFile: string) = - if certFile != "" and not existsFile(certFile): - raise newException(system.IOError, "Certificate file could not be found: " & certFile) - if keyFile != "" and not existsFile(keyFile): + proc loadCertificates(ctx: SslCtx, certFile, keyFile: string) = + if certFile != "" and not fileExists(certFile): + raise newException(system.IOError, + "Certificate file could not be found: " & certFile) + if keyFile != "" and not fileExists(keyFile): raise newException(system.IOError, "Key file could not be found: " & keyFile) - + if certFile != "": - var ret = SSLCTXUseCertificateChainFile(ctx, certFile) + var ret = SSL_CTX_use_certificate_chain_file(ctx, certFile) if ret != 1: raiseSSLError() - + # TODO: Password? www.rtfm.com/openssl-examples/part1.pdf if keyFile != "": if SSL_CTX_use_PrivateKey_file(ctx, keyFile, SSL_FILETYPE_PEM) != 1: raiseSSLError() - + if SSL_CTX_check_private_key(ctx) != 1: raiseSSLError("Verification of private key file failed.") proc newContext*(protVersion = protSSLv23, verifyMode = CVerifyPeer, - certFile = "", keyFile = ""): SSLContext = + certFile = "", keyFile = "", cipherList = CiphersIntermediate, + caDir = "", caFile = "", ciphersuites = CiphersModern): SslContext = ## Creates an SSL context. - ## - ## Protocol version specifies the protocol to use. SSLv2, SSLv3, TLSv1 - ## are available with the addition of ``protSSLv23`` which allows for - ## compatibility with all of them. ## - ## There are currently only two options for verify mode; - ## one is ``CVerifyNone`` and with it certificates will not be verified - ## the other is ``CVerifyPeer`` and certificates will be verified for - ## it, ``CVerifyPeer`` is the safest choice. + ## Protocol version is currently ignored by default and TLS is used. + ## With `-d:openssl10`, only SSLv23 and TLSv1 may be used. + ## + ## There are three options for verify mode: + ## `CVerifyNone`: certificates are not verified; + ## `CVerifyPeer`: certificates are verified; + ## `CVerifyPeerUseEnvVars`: certificates are verified and the optional + ## environment variables SSL_CERT_FILE and SSL_CERT_DIR are also used to + ## locate certificates + ## + ## The `nimDisableCertificateValidation` define overrides verifyMode and + ## disables certificate verification globally! + ## + ## CA certificates will be loaded, in the following order, from: + ## + ## - caFile, caDir, parameters, if set + ## - if `verifyMode` is set to `CVerifyPeerUseEnvVars`, + ## the SSL_CERT_FILE and SSL_CERT_DIR environment variables are used + ## - a set of files and directories from the `ssl_certs <ssl_certs.html>`_ file. ## ## The last two parameters specify the certificate file path and the key file ## path, a server socket will most likely not work without these. + ## ## Certificates can be generated using the following command: - ## ``openssl req -x509 -nodes -days 365 -newkey rsa:1024 -keyout mycert.pem -out mycert.pem``. - var newCTX: SSL_CTX - case protVersion - of protSSLv23: - newCTX = SSL_CTX_new(SSLv23_method()) # SSlv2,3 and TLS1 support. - of protSSLv2: - when not defined(linux): - newCTX = SSL_CTX_new(SSLv2_method()) - else: - raiseSslError() - of protSSLv3: - newCTX = SSL_CTX_new(SSLv3_method()) - of protTLSv1: - newCTX = SSL_CTX_new(TLSv1_method()) - - if newCTX.SSLCTXSetCipherList("ALL") != 1: + ## - `openssl req -x509 -nodes -days 365 -newkey rsa:4096 -keyout mykey.pem -out mycert.pem` + ## or using ECDSA: + ## - `openssl ecparam -out mykey.pem -name secp256k1 -genkey` + ## - `openssl req -new -key mykey.pem -x509 -nodes -days 365 -out mycert.pem` + var mtd: PSSL_METHOD + when defined(openssl10): + case protVersion + of protSSLv23: + mtd = SSLv23_method() + of protSSLv2: + raiseSSLError("SSLv2 is no longer secure and has been deprecated, use protSSLv23") + of protSSLv3: + raiseSSLError("SSLv3 is no longer secure and has been deprecated, use protSSLv23") + of protTLSv1: + mtd = TLSv1_method() + else: + mtd = TLS_method() + if mtd == nil: + raiseSSLError("Failed to create TLS context") + var newCTX = SSL_CTX_new(mtd) + if newCTX == nil: + raiseSSLError("Failed to create TLS context") + + if newCTX.SSL_CTX_set_cipher_list(cipherList) != 1: + raiseSSLError() + when not defined(openssl10) and not defined(libressl): + let sslVersion = getOpenSSLVersion() + if sslVersion >= 0x010101000 and sslVersion != 0x020000000: + # In OpenSSL >= 1.1.1, TLSv1.3 cipher suites can only be configured via + # this API. + if newCTX.SSL_CTX_set_ciphersuites(ciphersuites) != 1: + raiseSSLError() + # Automatically the best ECDH curve for client exchange. Without this, ECDH + # ciphers will be ignored by the server. + # + # From OpenSSL >= 1.1.0, this setting is set by default and can't be + # overridden. + if newCTX.SSL_CTX_set_ecdh_auto(1) != 1: raiseSSLError() - case verifyMode - of CVerifyPeer: - newCTX.SSLCTXSetVerify(SSLVerifyPeer, nil) - of CVerifyNone: - newCTX.SSLCTXSetVerify(SSLVerifyNone, nil) + + when defined(nimDisableCertificateValidation): + newCTX.SSL_CTX_set_verify(SSL_VERIFY_NONE, nil) + else: + case verifyMode + of CVerifyPeer, CVerifyPeerUseEnvVars: + newCTX.SSL_CTX_set_verify(SSL_VERIFY_PEER, nil) + of CVerifyNone: + newCTX.SSL_CTX_set_verify(SSL_VERIFY_NONE, nil) + if newCTX == nil: raiseSSLError() discard newCTX.SSLCTXSetMode(SSL_MODE_AUTO_RETRY) newCTX.loadCertificates(certFile, keyFile) - return SSLContext(newCTX) - proc wrapSocket*(ctx: SSLContext, socket: Socket) = + const VerifySuccess = 1 # SSL_CTX_load_verify_locations returns 1 on success. + + when not defined(nimDisableCertificateValidation): + if verifyMode != CVerifyNone: + # Use the caDir and caFile parameters if set + if caDir != "" or caFile != "": + if newCTX.SSL_CTX_load_verify_locations(if caFile == "": nil else: caFile.cstring, if caDir == "": nil else: caDir.cstring) != VerifySuccess: + raise newException(IOError, "Failed to load SSL/TLS CA certificate(s).") + + else: + # Scan for certs in known locations. For CVerifyPeerUseEnvVars also scan + # the SSL_CERT_FILE and SSL_CERT_DIR env vars + var found = false + let useEnvVars = (if verifyMode == CVerifyPeerUseEnvVars: true else: false) + for fn in scanSSLCertificates(useEnvVars = useEnvVars): + if fn.extractFilename == "": + if newCTX.SSL_CTX_load_verify_locations(nil, cstring(fn.normalizePathEnd(false))) == VerifySuccess: + found = true + break + elif newCTX.SSL_CTX_load_verify_locations(cstring(fn), nil) == VerifySuccess: + found = true + break + if not found: + raise newException(IOError, "No SSL/TLS CA certificates found.") + + result = SslContext(context: newCTX, referencedData: initHashSet[int](), + extraInternal: new(SslContextExtraInternal)) + + proc getExtraInternal(ctx: SslContext): SslContextExtraInternal = + return ctx.extraInternal + + proc destroyContext*(ctx: SslContext) = + ## Free memory referenced by SslContext. + + # We assume here that OpenSSL's internal indexes increase by 1 each time. + # That means we can assume that the next internal index is the length of + # extra data indexes. + for i in ctx.referencedData: + GC_unref(getExtraData(ctx, i)) + ctx.context.SSL_CTX_free() + + proc `pskIdentityHint=`*(ctx: SslContext, hint: string) = + ## Sets the identity hint passed to server. + ## + ## Only used in PSK ciphersuites. + if ctx.context.SSL_CTX_use_psk_identity_hint(hint) <= 0: + raiseSSLError() + + proc clientGetPskFunc*(ctx: SslContext): SslClientGetPskFunc = + return ctx.getExtraInternal().clientGetPskFunc + + proc pskClientCallback(ssl: SslPtr; hint: cstring; identity: cstring; + max_identity_len: cuint; psk: ptr uint8; + max_psk_len: cuint): cuint {.cdecl.} = + let ctx = SslContext(context: ssl.SSL_get_SSL_CTX) + let hintString = if hint == nil: "" else: $hint + let (identityString, pskString) = (ctx.clientGetPskFunc)(hintString) + if pskString.len.cuint > max_psk_len: + return 0 + if identityString.len.cuint >= max_identity_len: + return 0 + copyMem(identity, identityString.cstring, identityString.len + 1) # with the last zero byte + copyMem(psk, pskString.cstring, pskString.len) + + return pskString.len.cuint + + proc `clientGetPskFunc=`*(ctx: SslContext, fun: SslClientGetPskFunc) = + ## Sets function that returns the client identity and the PSK based on identity + ## hint from the server. + ## + ## Only used in PSK ciphersuites. + ctx.getExtraInternal().clientGetPskFunc = fun + ctx.context.SSL_CTX_set_psk_client_callback( + if fun == nil: nil else: pskClientCallback) + + proc serverGetPskFunc*(ctx: SslContext): SslServerGetPskFunc = + return ctx.getExtraInternal().serverGetPskFunc + + proc pskServerCallback(ssl: SslCtx; identity: cstring; psk: ptr uint8; + max_psk_len: cint): cuint {.cdecl.} = + let ctx = SslContext(context: ssl.SSL_get_SSL_CTX) + let pskString = (ctx.serverGetPskFunc)($identity) + if pskString.len.cint > max_psk_len: + return 0 + copyMem(psk, pskString.cstring, pskString.len) + + return pskString.len.cuint + + proc `serverGetPskFunc=`*(ctx: SslContext, fun: SslServerGetPskFunc) = + ## Sets function that returns PSK based on the client identity. + ## + ## Only used in PSK ciphersuites. + ctx.getExtraInternal().serverGetPskFunc = fun + ctx.context.SSL_CTX_set_psk_server_callback(if fun == nil: nil + else: pskServerCallback) + + proc getPskIdentity*(socket: Socket): string = + ## Gets the PSK identity provided by the client. + assert socket.isSsl + return $(socket.sslHandle.SSL_get_psk_identity) + + proc wrapSocket*(ctx: SslContext, socket: Socket) = ## Wraps a socket in an SSL context. This function effectively turns - ## ``socket`` into an SSL socket. + ## `socket` into an SSL socket. + ## + ## This must be called on an unconnected socket; an SSL session will + ## be started when the socket is connected. ## + ## FIXME: ## **Disclaimer**: This code is not well tested, may be very unsafe and ## prone to security vulnerabilities. - - socket.isSSL = true + + assert(not socket.isSsl) + socket.isSsl = true socket.sslContext = ctx - socket.sslHandle = SSLNew(SSLCTX(socket.sslContext)) + socket.sslHandle = SSL_new(socket.sslContext.context) socket.sslNoHandshake = false socket.sslHasPeekChar = false + socket.sslNoShutdown = false if socket.sslHandle == nil: raiseSSLError() - - if SSLSetFd(socket.sslHandle, socket.fd) != 1: + + if SSL_set_fd(socket.sslHandle, socket.fd) != 1: raiseSSLError() + proc checkCertName(socket: Socket, hostname: string) {.raises: [SslError], tags:[RootEffect].} = + ## Check if the certificate Subject Alternative Name (SAN) or Subject CommonName (CN) matches hostname. + ## Wildcards match only in the left-most label. + ## When name starts with a dot it will be matched by a certificate valid for any subdomain + when not defined(nimDisableCertificateValidation) and not defined(windows): + assert socket.isSsl + try: + let certificate = socket.sslHandle.SSL_get_peer_certificate() + if certificate.isNil: + raiseSSLError("No SSL certificate found.") + + const X509_CHECK_FLAG_ALWAYS_CHECK_SUBJECT = 0x1.cuint + # https://www.openssl.org/docs/man1.1.1/man3/X509_check_host.html + let match = certificate.X509_check_host(hostname.cstring, hostname.len.cint, + X509_CHECK_FLAG_ALWAYS_CHECK_SUBJECT, nil) + # https://www.openssl.org/docs/man1.1.1/man3/SSL_get_peer_certificate.html + X509_free(certificate) + if match != 1: + raiseSSLError("SSL Certificate check failed.") + + except LibraryError: + raiseSSLError("SSL import failed") + + proc wrapConnectedSocket*(ctx: SslContext, socket: Socket, + handshake: SslHandshakeType, + hostname: string = "") = + ## Wraps a connected socket in an SSL context. This function effectively + ## turns `socket` into an SSL socket. + ## `hostname` should be specified so that the client knows which hostname + ## the server certificate should be validated against. + ## + ## This should be called on a connected socket, and will perform + ## an SSL handshake immediately. + ## + ## FIXME: + ## **Disclaimer**: This code is not well tested, may be very unsafe and + ## prone to security vulnerabilities. + wrapSocket(ctx, socket) + case handshake + of handshakeAsClient: + if hostname.len > 0 and not isIpAddress(hostname): + # Discard result in case OpenSSL version doesn't support SNI, or we're + # not using TLSv1+ + discard SSL_set_tlsext_host_name(socket.sslHandle, hostname) + ErrClearError() + let ret = SSL_connect(socket.sslHandle) + socketError(socket, ret) + when not defined(nimDisableCertificateValidation) and not defined(windows): + # FIXME: this should be skipped on CVerifyNone + if hostname.len > 0 and not isIpAddress(hostname): + socket.checkCertName(hostname) + of handshakeAsServer: + ErrClearError() + let ret = SSL_accept(socket.sslHandle) + socketError(socket, ret) + + proc getPeerCertificates*(sslHandle: SslPtr): seq[Certificate] {.since: (1, 1).} = + ## Returns the certificate chain received by the peer we are connected to + ## through the OpenSSL connection represented by `sslHandle`. + ## The handshake must have been completed and the certificate chain must + ## have been verified successfully or else an empty sequence is returned. + ## The chain is ordered from leaf certificate to root certificate. + result = newSeq[Certificate]() + if SSL_get_verify_result(sslHandle) != X509_V_OK: + return + let stack = SSL_get0_verified_chain(sslHandle) + if stack == nil: + return + let length = OPENSSL_sk_num(stack) + if length == 0: + return + for i in 0 .. length - 1: + let x509 = cast[PX509](OPENSSL_sk_value(stack, i)) + result.add(i2d_X509(x509)) + + proc getPeerCertificates*(socket: Socket): seq[Certificate] {.since: (1, 1).} = + ## Returns the certificate chain received by the peer we are connected to + ## through the given socket. + ## The handshake must have been completed and the certificate chain must + ## have been verified successfully or else an empty sequence is returned. + ## The chain is ordered from leaf certificate to root certificate. + if not socket.isSsl: + result = newSeq[Certificate]() + else: + result = getPeerCertificates(socket.sslHandle) + + proc `sessionIdContext=`*(ctx: SslContext, sidCtx: string) = + ## Sets the session id context in which a session can be reused. + ## Used for permitting clients to reuse a session id instead of + ## doing a new handshake. + ## + ## TLS clients might attempt to resume a session using the session id context, + ## thus it must be set if verifyMode is set to CVerifyPeer or CVerifyPeerUseEnvVars, + ## otherwise the connection will fail and SslError will be raised if resumption occurs. + ## + ## - Only useful if set server-side. + ## - Should be unique per-application to prevent clients from malfunctioning. + ## - sidCtx must be at most 32 characters in length. + if sidCtx.len > 32: + raiseSSLError("sessionIdContext must be shorter than 32 characters") + SSL_CTX_set_session_id_context(ctx.context, sidCtx, sidCtx.len) + proc getSocketError*(socket: Socket): OSErrorCode = - ## Checks ``osLastError`` for a valid error. If it has been reset it uses + ## Checks `osLastError` for a valid error. If it has been reset it uses ## the last error stored in the socket object. result = osLastError() if result == 0.OSErrorCode: result = socket.lastError if result == 0.OSErrorCode: - raise newException(OSError, "No valid socket error code available") + raiseOSError(result, "No valid socket error code available") proc socketError*(socket: Socket, err: int = -1, async = false, - lastError = (-1).OSErrorCode) = - ## Raises an OSError based on the error code returned by ``SSLGetError`` - ## (for SSL sockets) and ``osLastError`` otherwise. + lastError = (-1).OSErrorCode, + flags: set[SocketFlag] = {}) = + ## Raises an OSError based on the error code returned by `SSL_get_error` + ## (for SSL sockets) and `osLastError` otherwise. ## - ## If ``async`` is ``true`` no error will be thrown in the case when the + ## If `async` is `true` no error will be thrown in the case when the ## error was caused by no data being available to be read. ## - ## If ``err`` is not lower than 0 no exception will be raised. - when defined(ssl): - if socket.isSSL: + ## If `err` is not lower than 0 no exception will be raised. + ## + ## If `flags` contains `SafeDisconn`, no exception will be raised + ## when the error was caused by a peer disconnection. + when defineSsl: + if socket.isSsl: if err <= 0: - var ret = SSLGetError(socket.sslHandle, err.cint) + var ret = SSL_get_error(socket.sslHandle, err.cint) case ret of SSL_ERROR_ZERO_RETURN: raiseSSLError("TLS/SSL connection failed to initiate, socket closed prematurely.") @@ -285,143 +971,147 @@ proc socketError*(socket: Socket, err: int = -1, async = false, of SSL_ERROR_WANT_X509_LOOKUP: raiseSSLError("Function for x509 lookup has been called.") of SSL_ERROR_SYSCALL: - var errStr = "IO error has occurred " - let sslErr = ErrPeekLastError() - if sslErr == 0 and err == 0: - errStr.add "because an EOF was observed that violates the protocol" - elif sslErr == 0 and err == -1: - errStr.add "in the BIO layer" - else: - let errStr = $ErrErrorString(sslErr, nil) - raiseSSLError(errStr & ": " & errStr) - let osMsg = osErrorMsg osLastError() - if osMsg != "": - errStr.add ". The OS reports: " & osMsg - raise newException(OSError, errStr) + # SSL shutdown must not be done if a fatal error occurred. + socket.sslNoShutdown = true + let osErr = osLastError() + if not flags.isDisconnectionError(osErr): + var errStr = "IO error has occurred " + let sslErr = ERR_peek_last_error() + if sslErr == 0 and err == 0: + errStr.add "because an EOF was observed that violates the protocol" + elif sslErr == 0 and err == -1: + errStr.add "in the BIO layer" + else: + let errStr = $ERR_error_string(sslErr, nil) + raiseSSLError(errStr & ": " & errStr) + raiseOSError(osErr, errStr) of SSL_ERROR_SSL: + # SSL shutdown must not be done if a fatal error occurred. + socket.sslNoShutdown = true raiseSSLError() else: raiseSSLError("Unknown Error") - - if err == -1 and not (when defined(ssl): socket.isSSL else: false): + + if err == -1 and not (when defineSsl: socket.isSsl else: false): var lastE = if lastError.int == -1: getSocketError(socket) else: lastError - if async: - when useWinVersion: - if lastE.int32 == WSAEWOULDBLOCK: - return - else: raiseOSError(lastE) - else: - if lastE.int32 == EAGAIN or lastE.int32 == EWOULDBLOCK: - return - else: raiseOSError(lastE) - else: raiseOSError(lastE) + if not flags.isDisconnectionError(lastE): + if async: + when useWinVersion: + if lastE.int32 == WSAEWOULDBLOCK: + return + else: raiseOSError(lastE) + else: + if lastE.int32 == EAGAIN or lastE.int32 == EWOULDBLOCK: + return + else: raiseOSError(lastE) + else: raiseOSError(lastE) proc listen*(socket: Socket, backlog = SOMAXCONN) {.tags: [ReadIOEffect].} = - ## Marks ``socket`` as accepting connections. - ## ``Backlog`` specifies the maximum length of the + ## Marks `socket` as accepting connections. + ## `Backlog` specifies the maximum length of the ## queue of pending connections. ## - ## Raises an EOS error upon failure. - if rawsockets.listen(socket.fd, backlog) < 0'i32: + ## Raises an OSError error upon failure. + if nativesockets.listen(socket.fd, backlog) < 0'i32: raiseOSError(osLastError()) proc bindAddr*(socket: Socket, port = Port(0), address = "") {. tags: [ReadIOEffect].} = - ## Binds ``address``:``port`` to the socket. + ## Binds `address`:`port` to the socket. ## - ## If ``address`` is "" then ADDR_ANY will be bound. - - if address == "": - var name: Sockaddr_in - when useWinVersion: - name.sin_family = toInt(AF_INET).int16 + ## If `address` is "" then ADDR_ANY will be bound. + var realaddr = address + if realaddr == "": + case socket.domain + of AF_INET6: realaddr = "::" + of AF_INET: realaddr = "0.0.0.0" else: - name.sin_family = toInt(AF_INET) - name.sin_port = htons(int16(port)) - name.sin_addr.s_addr = htonl(INADDR_ANY) - if bindAddr(socket.fd, cast[ptr SockAddr](addr(name)), - sizeof(name).SockLen) < 0'i32: - raiseOSError(osLastError()) - else: - var aiList = getAddrInfo(address, port, AF_INET) - if bindAddr(socket.fd, aiList.ai_addr, aiList.ai_addrlen.SockLen) < 0'i32: - dealloc(aiList) - raiseOSError(osLastError()) - dealloc(aiList) - -proc acceptAddr*(server: Socket, client: var Socket, address: var string, - flags = {SocketFlag.SafeDisconn}) {. - tags: [ReadIOEffect], gcsafe, locks: 0.} = + raise newException(ValueError, + "Unknown socket address family and no address specified to bindAddr") + + var aiList = getAddrInfo(realaddr, port, socket.domain) + if bindAddr(socket.fd, aiList.ai_addr, aiList.ai_addrlen.SockLen) < 0'i32: + freeAddrInfo(aiList) + var address2: string + address2.addQuoted address + raiseOSError(osLastError(), "address: $# port: $#" % [address2, $port]) + freeAddrInfo(aiList) + +proc acceptAddr*(server: Socket, client: var owned(Socket), address: var string, + flags = {SocketFlag.SafeDisconn}, + inheritable = defined(nimInheritHandles)) {. + tags: [ReadIOEffect], gcsafe.} = ## Blocks until a connection is being made from a client. When a connection - ## is made sets ``client`` to the client socket and ``address`` to the address + ## is made sets `client` to the client socket and `address` to the address ## of the connecting client. - ## This function will raise EOS if an error occurs. + ## This function will raise OSError if an error occurs. ## ## The resulting client will inherit any properties of the server socket. For ## example: whether the socket is buffered or not. ## - ## **Note**: ``client`` must be initialised (with ``new``), this function - ## makes no effort to initialise the ``client`` variable. + ## The SocketHandle associated with the resulting client will not be + ## inheritable by child processes by default. This can be changed via + ## the `inheritable` parameter. ## - ## The ``accept`` call may result in an error if the connecting socket - ## disconnects during the duration of the ``accept``. If the ``SafeDisconn`` + ## The `accept` call may result in an error if the connecting socket + ## disconnects during the duration of the `accept`. If the `SafeDisconn` ## flag is specified then this error will not be raised and instead ## accept will be called again. - assert(client != nil) - var sockAddress: Sockaddr_in - var addrLen = sizeof(sockAddress).SockLen - var sock = accept(server.fd, cast[ptr SockAddr](addr(sockAddress)), - addr(addrLen)) - + if client.isNil: + new(client) + let ret = accept(server.fd, inheritable) + let sock = ret[0] + if sock == osInvalidSocket: let err = osLastError() if flags.isDisconnectionError(err): - acceptAddr(server, client, address, flags) + acceptAddr(server, client, address, flags, inheritable) raiseOSError(err) else: + address = ret[1] client.fd = sock + client.domain = getSockDomain(sock) client.isBuffered = server.isBuffered # Handle SSL. - when defined(ssl): - if server.isSSL: + when defineSsl: + if server.isSsl: # We must wrap the client sock in a ssl context. - + server.sslContext.wrapSocket(client) - let ret = SSLAccept(client.sslHandle) + ErrClearError() + let ret = SSL_accept(client.sslHandle) socketError(client, ret, false) - - # Client socket is set above. - address = $inet_ntoa(sockAddress.sin_addr) -when false: #defined(ssl): +when false: #defineSsl: proc acceptAddrSSL*(server: Socket, client: var Socket, - address: var string): TSSLAcceptResult {. + address: var string): SSL_acceptResult {. tags: [ReadIOEffect].} = - ## This procedure should only be used for non-blocking **SSL** sockets. + ## This procedure should only be used for non-blocking **SSL** sockets. ## It will immediately return with one of the following values: - ## - ## ``AcceptSuccess`` will be returned when a client has been successfully + ## + ## `AcceptSuccess` will be returned when a client has been successfully ## accepted and the handshake has been successfully performed between - ## ``server`` and the newly connected client. + ## `server` and the newly connected client. ## - ## ``AcceptNoHandshake`` will be returned when a client has been accepted + ## `AcceptNoHandshake` will be returned when a client has been accepted ## but no handshake could be performed. This can happen when the client ## connects but does not yet initiate a handshake. In this case - ## ``acceptAddrSSL`` should be called again with the same parameters. + ## `acceptAddrSSL` should be called again with the same parameters. ## - ## ``AcceptNoClient`` will be returned when no client is currently attempting + ## `AcceptNoClient` will be returned when no client is currently attempting ## to connect. - template doHandshake(): stmt = - when defined(ssl): - if server.isSSL: + template doHandshake(): untyped = + when defineSsl: + if server.isSsl: client.setBlocking(false) # We must wrap the client sock in a ssl context. - - if not client.isSSL or client.sslHandle == nil: + + if not client.isSsl or client.sslHandle == nil: server.sslContext.wrapSocket(client) - let ret = SSLAccept(client.sslHandle) + ErrClearError() + let ret = SSL_accept(client.sslHandle) while ret <= 0: - let err = SSLGetError(client.sslHandle, ret) + let err = SSL_get_error(client.sslHandle, ret) if err != SSL_ERROR_WANT_ACCEPT: case err of SSL_ERROR_ZERO_RETURN: @@ -438,50 +1128,152 @@ when false: #defined(ssl): raiseSSLError("Unknown error") client.sslNoHandshake = false - if client.isSSL and client.sslNoHandshake: + if client.isSsl and client.sslNoHandshake: doHandshake() return AcceptSuccess else: acceptAddrPlain(AcceptNoClient, AcceptSuccess): doHandshake() -proc accept*(server: Socket, client: var Socket, - flags = {SocketFlag.SafeDisconn}) {.tags: [ReadIOEffect].} = - ## Equivalent to ``acceptAddr`` but doesn't return the address, only the +proc accept*(server: Socket, client: var owned(Socket), + flags = {SocketFlag.SafeDisconn}, + inheritable = defined(nimInheritHandles)) + {.tags: [ReadIOEffect].} = + ## Equivalent to `acceptAddr` but doesn't return the address, only the ## socket. - ## - ## **Note**: ``client`` must be initialised (with ``new``), this function - ## makes no effort to initialise the ``client`` variable. ## - ## The ``accept`` call may result in an error if the connecting socket - ## disconnects during the duration of the ``accept``. If the ``SafeDisconn`` + ## The SocketHandle associated with the resulting client will not be + ## inheritable by child processes by default. This can be changed via + ## the `inheritable` parameter. + ## + ## The `accept` call may result in an error if the connecting socket + ## disconnects during the duration of the `accept`. If the `SafeDisconn` ## flag is specified then this error will not be raised and instead ## accept will be called again. var addrDummy = "" acceptAddr(server, client, addrDummy, flags) -proc close*(socket: Socket) = +when defined(posix) and not defined(lwip): + from std/posix import Sigset, sigwait, sigismember, sigemptyset, sigaddset, + sigprocmask, pthread_sigmask, SIGPIPE, SIG_BLOCK, SIG_UNBLOCK + +template blockSigpipe(body: untyped): untyped = + ## Temporary block SIGPIPE within the provided code block. If SIGPIPE is + ## raised for the duration of the code block, it will be queued and will be + ## raised once the block ends. + ## + ## Within the block a `selectSigpipe()` template is provided which can be + ## used to remove SIGPIPE from the queue. Note that if SIGPIPE is **not** + ## raised at the time of call, it will block until SIGPIPE is raised. + ## + ## If SIGPIPE has already been blocked at the time of execution, the + ## signal mask is left as-is and `selectSigpipe()` will become a no-op. + ## + ## For convenience, this template is also available for non-POSIX system, + ## where `body` will be executed as-is. + when not defined(posix) or defined(lwip): + body + else: + template sigmask(how: cint, set, oset: var Sigset): untyped {.gensym.} = + ## Alias for pthread_sigmask or sigprocmask depending on the status + ## of --threads + when compileOption("threads"): + pthread_sigmask(how, set, oset) + else: + sigprocmask(how, set, oset) + + var oldSet, watchSet: Sigset + if sigemptyset(oldSet) == -1: + raiseOSError(osLastError()) + if sigemptyset(watchSet) == -1: + raiseOSError(osLastError()) + + if sigaddset(watchSet, SIGPIPE) == -1: + raiseOSError(osLastError(), "Couldn't add SIGPIPE to Sigset") + + if sigmask(SIG_BLOCK, watchSet, oldSet) == -1: + raiseOSError(osLastError(), "Couldn't block SIGPIPE") + + let alreadyBlocked = sigismember(oldSet, SIGPIPE) == 1 + + template selectSigpipe(): untyped {.used.} = + if not alreadyBlocked: + var signal: cint + let err = sigwait(watchSet, signal) + if err != 0: + raiseOSError(err.OSErrorCode, "Couldn't select SIGPIPE") + assert signal == SIGPIPE + + try: + body + finally: + if not alreadyBlocked: + if sigmask(SIG_UNBLOCK, watchSet, oldSet) == -1: + raiseOSError(osLastError(), "Couldn't unblock SIGPIPE") + +proc close*(socket: Socket, flags = {SocketFlag.SafeDisconn}) = ## Closes a socket. + ## + ## If `socket` is an SSL/TLS socket, this proc will also send a closure + ## notification to the peer. If `SafeDisconn` is in `flags`, failure to do so + ## due to disconnections will be ignored. This is generally safe in + ## practice. See + ## `here <https://security.stackexchange.com/a/82044>`_ for more details. try: - when defined(ssl): - if socket.isSSL: - ErrClearError() - # As we are closing the underlying socket immediately afterwards, - # it is valid, under the TLS standard, to perform a unidirectional - # shutdown i.e not wait for the peers "close notify" alert with a second - # call to SSLShutdown - let res = SSLShutdown(socket.sslHandle) - SSLFree(socket.sslHandle) - socket.sslHandle = nil - if res == 0: - discard - elif res != 1: - socketError(socket, res) + when defineSsl: + if socket.isSsl and socket.sslHandle != nil: + # Don't call SSL_shutdown if the connection has not been fully + # established, see: + # https://github.com/openssl/openssl/issues/710#issuecomment-253897666 + if not socket.sslNoShutdown and SSL_in_init(socket.sslHandle) == 0: + # As we are closing the underlying socket immediately afterwards, + # it is valid, under the TLS standard, to perform a unidirectional + # shutdown i.e not wait for the peers "close notify" alert with a second + # call to SSL_shutdown + blockSigpipe: + ErrClearError() + let res = SSL_shutdown(socket.sslHandle) + if res == 0: + discard + elif res != 1: + let + err = osLastError() + sslError = SSL_get_error(socket.sslHandle, res) + + # If a close notification is received, failures outside of the + # protocol will be returned as SSL_ERROR_ZERO_RETURN instead + # of SSL_ERROR_SYSCALL. This fact is deduced by digging into + # SSL_get_error() source code. + if sslError == SSL_ERROR_ZERO_RETURN or + sslError == SSL_ERROR_SYSCALL: + when defined(posix) and not defined(macosx) and + not defined(nimdoc): + if err == EPIPE.OSErrorCode: + # Clear the SIGPIPE that's been raised due to + # the disconnection. + selectSigpipe() + else: + discard + if not flags.isDisconnectionError(err): + socketError(socket, res, lastError = err, flags = flags) + else: + socketError(socket, res, lastError = err, flags = flags) finally: + when defineSsl: + if socket.isSsl and socket.sslHandle != nil: + SSL_free(socket.sslHandle) + socket.sslHandle = nil + socket.fd.close() + socket.fd = osInvalidSocket + +when defined(posix): + from std/posix import TCP_NODELAY +else: + from std/winlean import TCP_NODELAY proc toCInt*(opt: SOBool): cint = - ## Converts a ``SOBool`` into its Socket Option cint representation. + ## Converts a `SOBool` into its Socket Option cint representation. case opt of OptAcceptConn: SO_ACCEPTCONN of OptBroadcast: SO_BROADCAST @@ -490,90 +1282,64 @@ proc toCInt*(opt: SOBool): cint = of OptKeepAlive: SO_KEEPALIVE of OptOOBInline: SO_OOBINLINE of OptReuseAddr: SO_REUSEADDR + of OptReusePort: SO_REUSEPORT + of OptNoDelay: TCP_NODELAY proc getSockOpt*(socket: Socket, opt: SOBool, level = SOL_SOCKET): bool {. tags: [ReadIOEffect].} = - ## Retrieves option ``opt`` as a boolean value. + ## Retrieves option `opt` as a boolean value. var res = getSockOptInt(socket.fd, cint(level), toCInt(opt)) result = res != 0 -proc setSockOpt*(socket: Socket, opt: SOBool, value: bool, level = SOL_SOCKET) {. - tags: [WriteIOEffect].} = - ## Sets option ``opt`` to a boolean value specified by ``value``. - var valuei = cint(if value: 1 else: 0) - setSockOptInt(socket.fd, cint(level), toCInt(opt), valuei) - -proc connect*(socket: Socket, address: string, port = Port(0), - af: Domain = AF_INET) {.tags: [ReadIOEffect].} = - ## Connects socket to ``address``:``port``. ``Address`` can be an IP address or a - ## host name. If ``address`` is a host name, this function will try each IP - ## of that host name. ``htons`` is already performed on ``port`` so you must - ## not do it. +proc getLocalAddr*(socket: Socket): (string, Port) = + ## Get the socket's local address and port number. ## - ## If ``socket`` is an SSL socket a handshake will be automatically performed. - var aiList = getAddrInfo(address, port, af) - # try all possibilities: - var success = false - var lastError: OSErrorCode - var it = aiList - while it != nil: - if connect(socket.fd, it.ai_addr, it.ai_addrlen.SockLen) == 0'i32: - success = true - break - else: lastError = osLastError() - it = it.ai_next + ## This is high-level interface for `getsockname`:idx:. + getLocalAddr(socket.fd, socket.domain) - dealloc(aiList) - if not success: raiseOSError(lastError) - - when defined(ssl): - if socket.isSSL: - # RFC3546 for SNI specifies that IP addresses are not allowed. - if not isIpAddress(address): - # Discard result in case OpenSSL version doesn't support SNI, or we're - # not using TLSv1+ - discard SSL_set_tlsext_host_name(socket.sslHandle, address) - - let ret = SSLConnect(socket.sslHandle) - socketError(socket, ret) - -when defined(ssl): - proc handshake*(socket: Socket): bool {.tags: [ReadIOEffect, WriteIOEffect].} = - ## This proc needs to be called on a socket after it connects. This is - ## only applicable when using ``connectAsync``. - ## This proc performs the SSL handshake. - ## - ## Returns ``False`` whenever the socket is not yet ready for a handshake, - ## ``True`` whenever handshake completed successfully. +when not useNimNetLite: + proc getPeerAddr*(socket: Socket): (string, Port) = + ## Get the socket's peer address and port number. ## - ## A ESSL error is raised on any other errors. - result = true - if socket.isSSL: - var ret = SSLConnect(socket.sslHandle) - if ret <= 0: - var errret = SSLGetError(socket.sslHandle, ret) - case errret - of SSL_ERROR_ZERO_RETURN: - raiseSSLError("TLS/SSL connection failed to initiate, socket closed prematurely.") - of SSL_ERROR_WANT_CONNECT, SSL_ERROR_WANT_ACCEPT, - SSL_ERROR_WANT_READ, SSL_ERROR_WANT_WRITE: - return false - of SSL_ERROR_WANT_X509_LOOKUP: - raiseSSLError("Function for x509 lookup has been called.") - of SSL_ERROR_SYSCALL, SSL_ERROR_SSL: - raiseSSLError() - else: - raiseSSLError("Unknown Error") - socket.sslNoHandshake = false - else: - raiseSSLError("Socket is not an SSL socket.") + ## This is high-level interface for `getpeername`:idx:. + getPeerAddr(socket.fd, socket.domain) + +proc setSockOpt*(socket: Socket, opt: SOBool, value: bool, + level = SOL_SOCKET) {.tags: [WriteIOEffect].} = + ## Sets option `opt` to a boolean value specified by `value`. + runnableExamples("-r:off"): + let socket = newSocket() + socket.setSockOpt(OptReusePort, true) + socket.setSockOpt(OptNoDelay, true, level = IPPROTO_TCP.cint) + var valuei = cint(if value: 1 else: 0) + setSockOptInt(socket.fd, cint(level), toCInt(opt), valuei) +when defined(nimdoc) or (defined(posix) and not useNimNetLite): + proc connectUnix*(socket: Socket, path: string) = + ## Connects to Unix socket on `path`. + ## This only works on Unix-style systems: Mac OS X, BSD and Linux + when not defined(nimdoc): + var socketAddr = makeUnixAddr(path) + if socket.fd.connect(cast[ptr SockAddr](addr socketAddr), + (offsetOf(socketAddr, sun_path) + path.len + 1).SockLen) != 0'i32: + raiseOSError(osLastError()) + + proc bindUnix*(socket: Socket, path: string) = + ## Binds Unix socket to `path`. + ## This only works on Unix-style systems: Mac OS X, BSD and Linux + when not defined(nimdoc): + var socketAddr = makeUnixAddr(path) + if socket.fd.bindAddr(cast[ptr SockAddr](addr socketAddr), + (offsetOf(socketAddr, sun_path) + path.len + 1).SockLen) != 0'i32: + raiseOSError(osLastError()) + +when defineSsl: proc gotHandshake*(socket: Socket): bool = - ## Determines whether a handshake has occurred between a client (``socket``) - ## and the server that ``socket`` is connected to. + ## Determines whether a handshake has occurred between a client (`socket`) + ## and the server that `socket` is connected to. ## - ## Throws ESSL if ``socket`` is not an SSL socket. - if socket.isSSL: + ## Throws SslError if `socket` is not an SSL socket. + if socket.isSsl: return not socket.sslNoHandshake else: raiseSSLError("Socket is not an SSL socket.") @@ -584,29 +1350,32 @@ proc hasDataBuffered*(s: Socket): bool = if s.isBuffered: result = s.bufLen > 0 and s.currPos != s.bufLen - when defined(ssl): - if s.isSSL and not result: + when defineSsl: + if s.isSsl and not result: result = s.sslHasPeekChar -proc select(readfd: Socket, timeout = 500): int = - ## Used for socket operation timeouts. - if readfd.hasDataBuffered: - return 1 +proc isClosed(socket: Socket): bool = + socket.fd == osInvalidSocket + +proc uniRecv(socket: Socket, buffer: pointer, size, flags: cint): int = + ## Handles SSL and non-ssl recv in a nice package. + ## + ## In particular handles the case where socket has been closed properly + ## for both SSL and non-ssl. + result = 0 + assert(not socket.isClosed, "Cannot `recv` on a closed socket") + when defineSsl: + if socket.isSsl: + ErrClearError() + return SSL_read(socket.sslHandle, buffer, size) - var fds = @[readfd.fd] - result = select(fds, timeout) + return recv(socket.fd, buffer, size, flags) proc readIntoBuf(socket: Socket, flags: int32): int = result = 0 - when defined(ssl): - if socket.isSSL: - result = SSLRead(socket.sslHandle, addr(socket.buffer), int(socket.buffer.high)) - else: - result = recv(socket.fd, addr(socket.buffer), cint(socket.buffer.high), flags) - else: - result = recv(socket.fd, addr(socket.buffer), cint(socket.buffer.high), flags) + result = uniRecv(socket, addr(socket.buffer), socket.buffer.high, flags) if result < 0: - # Save it in case it gets reset (the Nim codegen occassionally may call + # Save it in case it gets reset (the Nim codegen occasionally may call # Win API functions which reset it). socket.lastError = osLastError() if result <= 0: @@ -624,21 +1393,22 @@ template retRead(flags, readBytes: int) {.dirty.} = else: return res -proc recv*(socket: Socket, data: pointer, size: int): int {.tags: [ReadIOEffect].} = +proc recv*(socket: Socket, data: pointer, size: int): int {.tags: [ + ReadIOEffect].} = ## Receives data from a socket. ## ## **Note**: This is a low-level function, you may be interested in the higher - ## level versions of this function which are also named ``recv``. + ## level versions of this function which are also named `recv`. if size == 0: return if socket.isBuffered: if socket.bufLen == 0: retRead(0'i32, 0) - + var read = 0 while read < size: if socket.currPos >= socket.bufLen: retRead(0'i32, read) - + let chunk = min(socket.bufLen-socket.currPos, size-read) var d = cast[cstring](data) assert size-read >= chunk @@ -648,18 +1418,18 @@ proc recv*(socket: Socket, data: pointer, size: int): int {.tags: [ReadIOEffect] result = read else: - when defined(ssl): - if socket.isSSL: - if socket.sslHasPeekChar: + when defineSsl: + if socket.isSsl: + if socket.sslHasPeekChar: # TODO: Merge this peek char mess into uniRecv copyMem(data, addr(socket.sslPeekChar), 1) socket.sslHasPeekChar = false if size-1 > 0: var d = cast[cstring](data) - result = SSLRead(socket.sslHandle, addr(d[1]), size-1) + 1 + result = uniRecv(socket, addr(d[1]), cint(size-1), 0'i32) + 1 else: result = 1 else: - result = SSLRead(socket.sslHandle, data, size) + result = uniRecv(socket, data, size.cint, 0'i32) else: result = recv(socket.fd, data, size.cint, 0'i32) else: @@ -668,45 +1438,48 @@ proc recv*(socket: Socket, data: pointer, size: int): int {.tags: [ReadIOEffect] # Save the error in case it gets reset. socket.lastError = osLastError() -proc waitFor(socket: Socket, waited: var float, timeout, size: int, +proc waitFor(socket: Socket, waited: var Duration, timeout, size: int, funcName: string): int {.tags: [TimeEffect].} = ## determines the amount of characters that can be read. Result will never - ## be larger than ``size``. For unbuffered sockets this will be ``1``. - ## For buffered sockets it can be as big as ``BufferSize``. + ## be larger than `size`. For unbuffered sockets this will be `1`. + ## For buffered sockets it can be as big as `BufferSize`. ## ## If this function does not determine that there is data on the socket - ## within ``timeout`` ms, an ETimeout error will be raised. + ## within `timeout` ms, a TimeoutError error will be raised. result = 1 if size <= 0: assert false if timeout == -1: return size - if socket.isBuffered and socket.bufLen != 0 and socket.bufLen != socket.currPos: + if socket.isBuffered and socket.bufLen != 0 and + socket.bufLen != socket.currPos: result = socket.bufLen - socket.currPos result = min(result, size) else: - if timeout - int(waited * 1000.0) < 1: + if timeout - waited.inMilliseconds < 1: raise newException(TimeoutError, "Call to '" & funcName & "' timed out.") - - when defined(ssl): - if socket.isSSL: + + when defineSsl: + if socket.isSsl: if socket.hasDataBuffered: # sslPeekChar is present. return 1 - let sslPending = SSLPending(socket.sslHandle) + let sslPending = SSL_pending(socket.sslHandle) if sslPending != 0: - return sslPending - - var startTime = epochTime() - let selRet = select(socket, timeout - int(waited * 1000.0)) + return min(sslPending, size) + + var startTime = getMonoTime() + let selRet = if socket.hasDataBuffered: 1 + else: + timeoutRead(socket.fd, (timeout - waited.inMilliseconds).int) if selRet < 0: raiseOSError(osLastError()) if selRet != 1: raise newException(TimeoutError, "Call to '" & funcName & "' timed out.") - waited += (epochTime() - startTime) + waited += (getMonoTime() - startTime) proc recv*(socket: Socket, data: pointer, size: int, timeout: int): int {. tags: [ReadIOEffect, TimeEffect].} = - ## overload with a ``timeout`` parameter in miliseconds. - var waited = 0.0 # number of seconds already waited - + ## overload with a `timeout` parameter in milliseconds. + var waited: Duration # duration already waited + var read = 0 while read < size: let avail = waitFor(socket, waited, timeout, size-read, "recv") @@ -717,32 +1490,68 @@ proc recv*(socket: Socket, data: pointer, size: int, timeout: int): int {. if result < 0: return result inc(read, result) - + result = read proc recv*(socket: Socket, data: var string, size: int, timeout = -1, flags = {SocketFlag.SafeDisconn}): int = - ## Higher-level version of ``recv``. + ## Higher-level version of `recv`. + ## + ## Reads **up to** `size` bytes from `socket` into `data`. + ## + ## For buffered sockets this function will attempt to read all the requested + ## data. It will read this data in `BufferSize` chunks. + ## + ## For unbuffered sockets this function makes no effort to read + ## all the data requested. It will return as much data as the operating system + ## gives it. ## ## When 0 is returned the socket's connection has been closed. ## - ## This function will throw an EOS exception when an error occurs. A value + ## This function will throw an OSError exception when an error occurs. A value ## lower than 0 is never returned. ## - ## A timeout may be specified in miliseconds, if enough data is not received - ## within the time specified an ETimeout exception will be raised. - ## - ## **Note**: ``data`` must be initialised. + ## A timeout may be specified in milliseconds, if enough data is not received + ## within the time specified a TimeoutError exception will be raised. ## - ## **Warning**: Only the ``SafeDisconn`` flag is currently supported. + ## .. warning:: Only the `SafeDisconn` flag is currently supported. data.setLen(size) - result = recv(socket, cstring(data), size, timeout) + result = + if timeout == -1: + recv(socket, cstring(data), size) + else: + recv(socket, cstring(data), size, timeout) if result < 0: data.setLen(0) let lastError = getSocketError(socket) - if flags.isDisconnectionError(lastError): return - socket.socketError(result, lastError = lastError) - data.setLen(result) + socket.socketError(result, lastError = lastError, flags = flags) + else: + data.setLen(result) + +proc recv*(socket: Socket, size: int, timeout = -1, + flags = {SocketFlag.SafeDisconn}): string {.inline.} = + ## Higher-level version of `recv` which returns a string. + ## + ## Reads **up to** `size` bytes from `socket` into the result. + ## + ## For buffered sockets this function will attempt to read all the requested + ## data. It will read this data in `BufferSize` chunks. + ## + ## For unbuffered sockets this function makes no effort to read + ## all the data requested. It will return as much data as the operating system + ## gives it. + ## + ## When `""` is returned the socket's connection has been closed. + ## + ## This function will throw an OSError exception when an error occurs. + ## + ## A timeout may be specified in milliseconds, if enough data is not received + ## within the time specified a TimeoutError exception will be raised. + ## + ## + ## .. warning:: Only the `SafeDisconn` flag is currently supported. + result = newString(size) + discard recv(socket, result, size, timeout, flags) proc peekChar(socket: Socket, c: var char): int {.tags: [ReadIOEffect].} = if socket.isBuffered: @@ -751,55 +1560,60 @@ proc peekChar(socket: Socket, c: var char): int {.tags: [ReadIOEffect].} = var res = socket.readIntoBuf(0'i32) if res <= 0: result = res - + c = socket.buffer[socket.currPos] else: - when defined(ssl): - if socket.isSSL: + when defineSsl: + if socket.isSsl: if not socket.sslHasPeekChar: - result = SSLRead(socket.sslHandle, addr(socket.sslPeekChar), 1) + result = uniRecv(socket, addr(socket.sslPeekChar), 1, 0'i32) socket.sslHasPeekChar = true - + c = socket.sslPeekChar return result = recv(socket.fd, addr(c), 1, MSG_PEEK) -proc readLine*(socket: Socket, line: var TaintedString, timeout = -1, - flags = {SocketFlag.SafeDisconn}) {. +proc readLine*(socket: Socket, line: var string, timeout = -1, + flags = {SocketFlag.SafeDisconn}, maxLength = MaxLineLength) {. tags: [ReadIOEffect, TimeEffect].} = - ## Reads a line of data from ``socket``. + ## Reads a line of data from `socket`. ## - ## If a full line is read ``\r\L`` is not - ## added to ``line``, however if solely ``\r\L`` is read then ``line`` + ## If a full line is read `\r\L` is not + ## added to `line`, however if solely `\r\L` is read then `line` ## will be set to it. - ## - ## If the socket is disconnected, ``line`` will be set to ``""``. ## - ## An EOS exception will be raised in the case of a socket error. + ## If the socket is disconnected, `line` will be set to `""`. + ## + ## An OSError exception will be raised in the case of a socket error. ## - ## A timeout can be specified in miliseconds, if data is not received within - ## the specified time an ETimeout exception will be raised. + ## A timeout can be specified in milliseconds, if data is not received within + ## the specified time a TimeoutError exception will be raised. ## - ## **Warning**: Only the ``SafeDisconn`` flag is currently supported. - - template addNLIfEmpty(): stmt = + ## The `maxLength` parameter determines the maximum amount of characters + ## that can be read. The result is truncated after that. + ## + ## .. warning:: Only the `SafeDisconn` flag is currently supported. + + template addNLIfEmpty() = if line.len == 0: line.add("\c\L") - template raiseSockError(): stmt {.dirty, immediate.} = + template raiseSockError() {.dirty.} = let lastError = getSocketError(socket) - if flags.isDisconnectionError(lastError): setLen(line.string, 0); return - socket.socketError(n, lastError = lastError) + if flags.isDisconnectionError(lastError): + setLen(line, 0) + socket.socketError(n, lastError = lastError, flags = flags) + return - var waited = 0.0 + var waited: Duration - setLen(line.string, 0) + setLen(line, 0) while true: var c: char discard waitFor(socket, waited, timeout, 1, "readLine") var n = recv(socket, addr(c), 1) if n < 0: raiseSockError() - elif n == 0: setLen(line.string, 0); return + elif n == 0: setLen(line, 0); return if c == '\r': discard waitFor(socket, waited, timeout, 1, "readLine") n = peekChar(socket, c) @@ -808,47 +1622,93 @@ proc readLine*(socket: Socket, line: var TaintedString, timeout = -1, elif n <= 0: raiseSockError() addNLIfEmpty() return - elif c == '\L': + elif c == '\L': addNLIfEmpty() return - add(line.string, c) + add(line, c) -proc recvFrom*(socket: Socket, data: var string, length: int, - address: var string, port: var Port, flags = 0'i32): int {. + # Verify that this isn't a DOS attack: #3847. + if line.len > maxLength: break + +proc recvLine*(socket: Socket, timeout = -1, + flags = {SocketFlag.SafeDisconn}, + maxLength = MaxLineLength): string = + ## Reads a line of data from `socket`. + ## + ## If a full line is read `\r\L` is not + ## added to the result, however if solely `\r\L` is read then the result + ## will be set to it. + ## + ## If the socket is disconnected, the result will be set to `""`. + ## + ## An OSError exception will be raised in the case of a socket error. + ## + ## A timeout can be specified in milliseconds, if data is not received within + ## the specified time a TimeoutError exception will be raised. + ## + ## The `maxLength` parameter determines the maximum amount of characters + ## that can be read. The result is truncated after that. + ## + ## .. warning:: Only the `SafeDisconn` flag is currently supported. + result = "" + readLine(socket, result, timeout, flags, maxLength) + +proc recvFrom*[T: string | IpAddress](socket: Socket, data: var string, length: int, + address: var T, port: var Port, flags = 0'i32): int {. tags: [ReadIOEffect].} = - ## Receives data from ``socket``. This function should normally be used with - ## connection-less sockets (UDP sockets). + ## Receives data from `socket`. This function should normally be used with + ## connection-less sockets (UDP sockets). The source address of the data + ## packet is stored in the `address` argument as either a string or an IpAddress. ## - ## If an error occurs an EOS exception will be raised. Otherwise the return + ## If an error occurs an OSError exception will be raised. Otherwise the return ## value will be the length of data received. ## - ## **Warning:** This function does not yet have a buffered implementation, - ## so when ``socket`` is buffered the non-buffered implementation will be - ## used. Therefore if ``socket`` contains something in its buffer this - ## function will make no effort to return it. - + ## .. warning:: This function does not yet have a buffered implementation, + ## so when `socket` is buffered the non-buffered implementation will be + ## used. Therefore if `socket` contains something in its buffer this + ## function will make no effort to return it. + template adaptRecvFromToDomain(sockAddress: untyped, domain: Domain) = + var addrLen = SockLen(sizeof(sockAddress)) + result = recvfrom(socket.fd, cstring(data), length.cint, flags.cint, + cast[ptr SockAddr](addr(sockAddress)), addr(addrLen)) + + if result != -1: + data.setLen(result) + + when typeof(address) is string: + address = getAddrString(cast[ptr SockAddr](addr(sockAddress))) + when domain == AF_INET6: + port = ntohs(sockAddress.sin6_port).Port + else: + port = ntohs(sockAddress.sin_port).Port + else: + data.setLen(result) + sockAddress.fromSockAddr(addrLen, address, port) + else: + raiseOSError(osLastError()) + + assert(socket.protocol != IPPROTO_TCP, "Cannot `recvFrom` on a TCP socket") # TODO: Buffered sockets data.setLen(length) - var sockAddress: Sockaddr_in - var addrLen = sizeof(sockAddress).SockLen - result = recvfrom(socket.fd, cstring(data), length.cint, flags.cint, - cast[ptr SockAddr](addr(sockAddress)), addr(addrLen)) - if result != -1: - data.setLen(result) - address = $inet_ntoa(sockAddress.sin_addr) - port = ntohs(sockAddress.sin_port).Port + case socket.domain + of AF_INET6: + var sockAddress: Sockaddr_in6 + adaptRecvFromToDomain(sockAddress, AF_INET6) + of AF_INET: + var sockAddress: Sockaddr_in + adaptRecvFromToDomain(sockAddress, AF_INET) else: - raiseOSError(osLastError()) + raise newException(ValueError, "Unknown socket address family") proc skip*(socket: Socket, size: int, timeout = -1) = - ## Skips ``size`` amount of bytes. + ## Skips `size` amount of bytes. ## - ## An optional timeout can be specified in miliseconds, if skipping the - ## bytes takes longer than specified an ETimeout exception will be raised. + ## An optional timeout can be specified in milliseconds, if skipping the + ## bytes takes longer than specified a TimeoutError exception will be raised. ## ## Returns the number of skipped bytes. - var waited = 0.0 + var waited: Duration var dummy = alloc(size) var bytesSkipped = 0 while bytesSkipped != size: @@ -860,53 +1720,83 @@ proc send*(socket: Socket, data: pointer, size: int): int {. tags: [WriteIOEffect].} = ## Sends data to a socket. ## - ## **Note**: This is a low-level version of ``send``. You likely should use + ## **Note**: This is a low-level version of `send`. You likely should use ## the version below. - when defined(ssl): - if socket.isSSL: - return SSLWrite(socket.sslHandle, cast[cstring](data), size) - + assert(not socket.isClosed, "Cannot `send` on a closed socket") + when defineSsl: + if socket.isSsl: + ErrClearError() + return SSL_write(socket.sslHandle, cast[cstring](data), size) + when useWinVersion or defined(macosx): result = send(socket.fd, data, size.cint, 0'i32) else: - when defined(solaris): + when defined(solaris): const MSG_NOSIGNAL = 0 result = send(socket.fd, data, size, int32(MSG_NOSIGNAL)) proc send*(socket: Socket, data: string, - flags = {SocketFlag.SafeDisconn}) {.tags: [WriteIOEffect].} = - ## sends data to a socket. - let sent = send(socket, cstring(data), data.len) - if sent < 0: - let lastError = osLastError() - if flags.isDisconnectionError(lastError): return - socketError(socket, lastError = lastError) + flags = {SocketFlag.SafeDisconn}, maxRetries = 100) {.tags: [WriteIOEffect].} = + ## Sends data to a socket. Will try to send all the data by handling interrupts + ## and incomplete writes up to `maxRetries`. + var written = 0 + var attempts = 0 + while data.len - written > 0: + let sent = send(socket, cstring(data), data.len) + + if sent < 0: + let lastError = osLastError() + let isBlockingErr = + when defined(nimdoc): + false + elif useWinVersion: + lastError.int32 == WSAEINTR or + lastError.int32 == WSAEWOULDBLOCK + else: + lastError.int32 == EINTR or + lastError.int32 == EWOULDBLOCK or + lastError.int32 == EAGAIN - if sent != data.len: - raise newException(OSError, "Could not send all data.") + if not isBlockingErr: + let lastError = osLastError() + socketError(socket, lastError = lastError, flags = flags) + else: + attempts.inc() + if attempts > maxRetries: + raiseOSError(osLastError(), "Could not send all data.") + else: + written.inc(sent) + +template `&=`*(socket: Socket; data: typed) = + ## an alias for 'send'. + send(socket, data) proc trySend*(socket: Socket, data: string): bool {.tags: [WriteIOEffect].} = - ## Safe alternative to ``send``. Does not raise an EOS when an error occurs, - ## and instead returns ``false`` on failure. + ## Safe alternative to `send`. Does not raise an OSError when an error occurs, + ## and instead returns `false` on failure. result = send(socket, cstring(data), data.len) == data.len proc sendTo*(socket: Socket, address: string, port: Port, data: pointer, - size: int, af: Domain = AF_INET, flags = 0'i32): int {. + size: int, af: Domain = AF_INET, flags = 0'i32) {. tags: [WriteIOEffect].} = - ## This proc sends ``data`` to the specified ``address``, - ## which may be an IP address or a hostname, if a hostname is specified - ## this function will try each IP of that hostname. + ## This proc sends `data` to the specified `address`, + ## which may be an IP address or a hostname, if a hostname is specified + ## this function will try each IP of that hostname. This function + ## should normally be used with connection-less sockets (UDP sockets). ## + ## If an error occurs an OSError exception will be raised. ## ## **Note:** You may wish to use the high-level version of this function ## which is defined below. ## ## **Note:** This proc is not available for SSL sockets. - var aiList = getAddrInfo(address, port, af) - + assert(socket.protocol != IPPROTO_TCP, "Cannot `sendTo` on a TCP socket") + assert(not socket.isClosed, "Cannot `sendTo` on a closed socket") + var aiList = getAddrInfo(address, port, af, socket.sockType, socket.protocol) # try all possibilities: var success = false var it = aiList + var result = 0 while it != nil: result = sendto(socket.fd, data, size.cint, flags.cint, it.ai_addr, it.ai_addrlen.SockLen) @@ -915,117 +1805,110 @@ proc sendTo*(socket: Socket, address: string, port: Port, data: pointer, break it = it.ai_next - dealloc(aiList) + let osError = osLastError() + freeAddrInfo(aiList) -proc sendTo*(socket: Socket, address: string, port: Port, - data: string): int {.tags: [WriteIOEffect].} = - ## This proc sends ``data`` to the specified ``address``, - ## which may be an IP address or a hostname, if a hostname is specified + if not success: + raiseOSError(osError) + +proc sendTo*(socket: Socket, address: string, port: Port, + data: string) {.tags: [WriteIOEffect].} = + ## This proc sends `data` to the specified `address`, + ## which may be an IP address or a hostname, if a hostname is specified ## this function will try each IP of that hostname. ## - ## This is the high-level version of the above ``sendTo`` function. - result = socket.sendTo(address, port, cstring(data), data.len) - -proc connectAsync(socket: Socket, name: string, port = Port(0), - af: Domain = AF_INET) {.tags: [ReadIOEffect].} = - ## A variant of ``connect`` for non-blocking sockets. + ## Generally for use with connection-less (UDP) sockets. ## - ## This procedure will immediately return, it will not block until a connection - ## is made. It is up to the caller to make sure the connection has been established - ## by checking (using ``select``) whether the socket is writeable. + ## If an error occurs an OSError exception will be raised. ## - ## **Note**: For SSL sockets, the ``handshake`` procedure must be called - ## whenever the socket successfully connects to a server. - var aiList = getAddrInfo(name, port, af) - # try all possibilities: - var success = false - var lastError: OSErrorCode - var it = aiList - while it != nil: - var ret = connect(socket.fd, it.ai_addr, it.ai_addrlen.SockLen) - if ret == 0'i32: - success = true - break - else: - lastError = osLastError() - when useWinVersion: - # Windows EINTR doesn't behave same as POSIX. - if lastError.int32 == WSAEWOULDBLOCK: - success = true - break - else: - if lastError.int32 == EINTR or lastError.int32 == EINPROGRESS: - success = true - break - - it = it.ai_next + ## This is the high-level version of the above `sendTo` function. + socket.sendTo(address, port, cstring(data), data.len, socket.domain) + +proc sendTo*(socket: Socket, address: IpAddress, port: Port, + data: string, flags = 0'i32): int {. + discardable, tags: [WriteIOEffect].} = + ## This proc sends `data` to the specified `IpAddress` and returns + ## the number of bytes written. + ## + ## Generally for use with connection-less (UDP) sockets. + ## + ## If an error occurs an OSError exception will be raised. + ## + ## This is the high-level version of the above `sendTo` function. + assert(socket.protocol != IPPROTO_TCP, "Cannot `sendTo` on a TCP socket") + assert(not socket.isClosed, "Cannot `sendTo` on a closed socket") - dealloc(aiList) - if not success: raiseOSError(lastError) + var sa: Sockaddr_storage + var sl: SockLen + toSockAddr(address, port, sa, sl) + result = sendto(socket.fd, cstring(data), data.len().cint, flags.cint, + cast[ptr SockAddr](addr sa), sl) + + if result == -1'i32: + let osError = osLastError() + raiseOSError(osError) -proc connect*(socket: Socket, address: string, port = Port(0), timeout: int, - af: Domain = AF_INET) {.tags: [ReadIOEffect, WriteIOEffect].} = - ## Connects to server as specified by ``address`` on port specified by ``port``. - ## - ## The ``timeout`` paremeter specifies the time in miliseconds to allow for - ## the connection to the server to be made. - socket.fd.setBlocking(false) - - socket.connectAsync(address, port, af) - var s = @[socket.fd] - if selectWrite(s, timeout) != 1: - raise newException(TimeoutError, "Call to 'connect' timed out.") - else: - when defined(ssl): - if socket.isSSL: - socket.fd.setBlocking(true) - doAssert socket.handshake() - socket.fd.setBlocking(true) -proc isSsl*(socket: Socket): bool = - ## Determines whether ``socket`` is a SSL socket. - when defined(ssl): - result = socket.isSSL +proc isSsl*(socket: Socket): bool = + ## Determines whether `socket` is a SSL socket. + when defineSsl: + result = socket.isSsl else: result = false proc getFd*(socket: Socket): SocketHandle = return socket.fd ## Returns the socket's file descriptor -proc IPv4_any*(): TIpAddress = +when defined(zephyr) or defined(nimNetSocketExtras): # Remove in future + proc getDomain*(socket: Socket): Domain = return socket.domain + ## Returns the socket's domain + + proc getType*(socket: Socket): SockType = return socket.sockType + ## Returns the socket's type + + proc getProtocol*(socket: Socket): Protocol = return socket.protocol + ## Returns the socket's protocol + +when defined(nimHasStyleChecks): + {.push styleChecks: off.} + +proc IPv4_any*(): IpAddress = ## Returns the IPv4 any address, which can be used to listen on all available ## network adapters - result = TIpAddress( + result = IpAddress( family: IpAddressFamily.IPv4, address_v4: [0'u8, 0, 0, 0]) -proc IPv4_loopback*(): TIpAddress = +proc IPv4_loopback*(): IpAddress = ## Returns the IPv4 loopback address (127.0.0.1) - result = TIpAddress( + result = IpAddress( family: IpAddressFamily.IPv4, address_v4: [127'u8, 0, 0, 1]) -proc IPv4_broadcast*(): TIpAddress = +proc IPv4_broadcast*(): IpAddress = ## Returns the IPv4 broadcast address (255.255.255.255) - result = TIpAddress( + result = IpAddress( family: IpAddressFamily.IPv4, address_v4: [255'u8, 255, 255, 255]) -proc IPv6_any*(): TIpAddress = +proc IPv6_any*(): IpAddress = ## Returns the IPv6 any address (::0), which can be used - ## to listen on all available network adapters - result = TIpAddress( + ## to listen on all available network adapters + result = IpAddress( family: IpAddressFamily.IPv6, address_v6: [0'u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) -proc IPv6_loopback*(): TIpAddress = +proc IPv6_loopback*(): IpAddress = ## Returns the IPv6 loopback address (::1) - result = TIpAddress( + result = IpAddress( family: IpAddressFamily.IPv6, address_v6: [0'u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]) -proc `==`*(lhs, rhs: TIpAddress): bool = - ## Compares two IpAddresses for Equality. Returns two if the addresses are equal +when defined(nimHasStyleChecks): + {.pop.} + +proc `==`*(lhs, rhs: IpAddress): bool = + ## Compares two IpAddresses for Equality. Returns true if the addresses are equal if lhs.family != rhs.family: return false if lhs.family == IpAddressFamily.IPv4: for i in low(lhs.address_v4) .. high(lhs.address_v4): @@ -1035,16 +1918,20 @@ proc `==`*(lhs, rhs: TIpAddress): bool = if lhs.address_v6[i] != rhs.address_v6[i]: return false return true -proc `$`*(address: TIpAddress): string = - ## Converts an TIpAddress into the textual representation - result = "" +proc `$`*(address: IpAddress): string = + ## Converts an IpAddress into the textual representation case address.family of IpAddressFamily.IPv4: - for i in 0 .. 3: - if i != 0: - result.add('.') - result.add($address.address_v4[i]) + result = newStringOfCap(15) + result.addInt address.address_v4[0] + result.add '.' + result.addInt address.address_v4[1] + result.add '.' + result.addInt address.address_v4[2] + result.add '.' + result.addInt address.address_v4[3] of IpAddressFamily.IPv6: + result = newStringOfCap(39) var currentZeroStart = -1 currentZeroCount = 0 @@ -1070,7 +1957,7 @@ proc `$`*(address: TIpAddress): string = else: # Print address var printedLastGroup = false for i in 0..7: - var word:uint16 = (cast[uint16](address.address_v6[i*2])) shl 8 + var word: uint16 = (cast[uint16](address.address_v6[i*2])) shl 8 word = word or cast[uint16](address.address_v6[i*2+1]) if biggestZeroCount != 0 and # Check if group is in skip group @@ -1093,179 +1980,196 @@ proc `$`*(address: TIpAddress): string = result.add(chr(uint16(ord('a'))+val-0xA)) afterLeadingZeros = true mask = mask shr 4 - printedLastGroup = true -proc parseIPv4Address(address_str: string): TIpAddress = - ## Parses IPv4 adresses - ## Raises EInvalidValue on errors - var - byteCount = 0 - currentByte:uint16 = 0 - seperatorValid = false + if not afterLeadingZeros: + result.add '0' - result.family = IpAddressFamily.IPv4 + printedLastGroup = true - for i in 0 .. high(address_str): - if address_str[i] in strutils.Digits: # Character is a number - currentByte = currentByte * 10 + - cast[uint16](ord(address_str[i]) - ord('0')) - if currentByte > 255'u16: - raise newException(ValueError, - "Invalid IP Address. Value is out of range") - seperatorValid = true - elif address_str[i] == '.': # IPv4 address separator - if not seperatorValid or byteCount >= 3: - raise newException(ValueError, - "Invalid IP Address. The address consists of too many groups") - result.address_v4[byteCount] = cast[uint8](currentByte) - currentByte = 0 - byteCount.inc - seperatorValid = false - else: - raise newException(ValueError, - "Invalid IP Address. Address contains an invalid character") +proc dial*(address: string, port: Port, + protocol = IPPROTO_TCP, buffered = true): owned(Socket) + {.tags: [ReadIOEffect, WriteIOEffect].} = + ## Establishes connection to the specified `address`:`port` pair via the + ## specified protocol. The procedure iterates through possible + ## resolutions of the `address` until it succeeds, meaning that it + ## seamlessly works with both IPv4 and IPv6. + ## Returns Socket ready to send or receive data. + let sockType = protocol.toSockType() + + let aiList = getAddrInfo(address, port, AF_UNSPEC, sockType, protocol) + + var fdPerDomain: array[low(Domain).ord..high(Domain).ord, SocketHandle] + for i in low(fdPerDomain)..high(fdPerDomain): + fdPerDomain[i] = osInvalidSocket + template closeUnusedFds(domainToKeep = -1) {.dirty.} = + for i, fd in fdPerDomain: + if fd != osInvalidSocket and i != domainToKeep: + fd.close() - if byteCount != 3 or not seperatorValid: - raise newException(ValueError, "Invalid IP Address") - result.address_v4[byteCount] = cast[uint8](currentByte) + var success = false + var lastError: OSErrorCode + var it = aiList + var domain: Domain + var lastFd: SocketHandle + while it != nil: + let domainOpt = it.ai_family.toKnownDomain() + if domainOpt.isNone: + it = it.ai_next + continue + domain = domainOpt.unsafeGet() + lastFd = fdPerDomain[ord(domain)] + if lastFd == osInvalidSocket: + lastFd = createNativeSocket(domain, sockType, protocol) + if lastFd == osInvalidSocket: + # we always raise if socket creation failed, because it means a + # network system problem (e.g. not enough FDs), and not an unreachable + # address. + let err = osLastError() + freeAddrInfo(aiList) + closeUnusedFds() + raiseOSError(err) + fdPerDomain[ord(domain)] = lastFd + if connect(lastFd, it.ai_addr, it.ai_addrlen.SockLen) == 0'i32: + success = true + break + lastError = osLastError() + it = it.ai_next + freeAddrInfo(aiList) + closeUnusedFds(ord(domain)) + + if success: + result = newSocket(lastFd, domain, sockType, protocol, buffered) + elif lastError != 0.OSErrorCode: + lastFd.close() + raiseOSError(lastError) + else: + lastFd.close() + raise newException(IOError, "Couldn't resolve address: " & address) + +proc connect*(socket: Socket, address: string, + port = Port(0)) {.tags: [ReadIOEffect, RootEffect].} = + ## Connects socket to `address`:`port`. `Address` can be an IP address or a + ## host name. If `address` is a host name, this function will try each IP + ## of that host name. `htons` is already performed on `port` so you must + ## not do it. + ## + ## If `socket` is an SSL socket a handshake will be automatically performed. + var aiList = getAddrInfo(address, port, socket.domain) + # try all possibilities: + var success = false + var lastError: OSErrorCode + var it = aiList + while it != nil: + if connect(socket.fd, it.ai_addr, it.ai_addrlen.SockLen) == 0'i32: + success = true + break + else: lastError = osLastError() + it = it.ai_next -proc parseIPv6Address(address_str: string): TIpAddress = - ## Parses IPv6 adresses - ## Raises EInvalidValue on errors - result.family = IpAddressFamily.IPv6 - if address_str.len < 2: - raise newException(ValueError, "Invalid IP Address") + freeAddrInfo(aiList) + if not success: raiseOSError(lastError) - var - groupCount = 0 - currentGroupStart = 0 - currentShort:uint32 = 0 - seperatorValid = true - dualColonGroup = -1 - lastWasColon = false - v4StartPos = -1 - byteCount = 0 + when defineSsl: + if socket.isSsl: + # RFC3546 for SNI specifies that IP addresses are not allowed. + if not isIpAddress(address): + # Discard result in case OpenSSL version doesn't support SNI, or we're + # not using TLSv1+ + discard SSL_set_tlsext_host_name(socket.sslHandle, address) - for i,c in address_str: - if c == ':': - if not seperatorValid: - raise newException(ValueError, - "Invalid IP Address. Address contains an invalid seperator") - if lastWasColon: - if dualColonGroup != -1: - raise newException(ValueError, - "Invalid IP Address. Address contains more than one \"::\" seperator") - dualColonGroup = groupCount - seperatorValid = false - elif i != 0 and i != high(address_str): - if groupCount >= 8: - raise newException(ValueError, - "Invalid IP Address. The address consists of too many groups") - result.address_v6[groupCount*2] = cast[uint8](currentShort shr 8) - result.address_v6[groupCount*2+1] = cast[uint8](currentShort and 0xFF) - currentShort = 0 - groupCount.inc() - if dualColonGroup != -1: seperatorValid = false - elif i == 0: # only valid if address starts with :: - if address_str[1] != ':': - raise newException(ValueError, - "Invalid IP Address. Address may not start with \":\"") - else: # i == high(address_str) - only valid if address ends with :: - if address_str[high(address_str)-1] != ':': - raise newException(ValueError, - "Invalid IP Address. Address may not end with \":\"") - lastWasColon = true - currentGroupStart = i + 1 - elif c == '.': # Switch to parse IPv4 mode - if i < 3 or not seperatorValid or groupCount >= 7: - raise newException(ValueError, "Invalid IP Address") - v4StartPos = currentGroupStart - currentShort = 0 - seperatorValid = false + ErrClearError() + let ret = SSL_connect(socket.sslHandle) + socketError(socket, ret) + when not defined(nimDisableCertificateValidation) and not defined(windows): + if not isIpAddress(address): + socket.checkCertName(address) + +proc connectAsync(socket: Socket, name: string, port = Port(0), + af: Domain = AF_INET) {.tags: [ReadIOEffect].} = + ## A variant of `connect` for non-blocking sockets. + ## + ## This procedure will immediately return, it will not block until a connection + ## is made. It is up to the caller to make sure the connection has been established + ## by checking (using `select`) whether the socket is writeable. + ## + ## **Note**: For SSL sockets, the `handshake` procedure must be called + ## whenever the socket successfully connects to a server. + var aiList = getAddrInfo(name, port, af) + # try all possibilities: + var success = false + var lastError: OSErrorCode + var it = aiList + while it != nil: + var ret = connect(socket.fd, it.ai_addr, it.ai_addrlen.SockLen) + if ret == 0'i32: + success = true break - elif c in strutils.HexDigits: - if c in strutils.Digits: # Normal digit - currentShort = (currentShort shl 4) + cast[uint32](ord(c) - ord('0')) - elif c >= 'a' and c <= 'f': # Lower case hex - currentShort = (currentShort shl 4) + cast[uint32](ord(c) - ord('a')) + 10 - else: # Upper case hex - currentShort = (currentShort shl 4) + cast[uint32](ord(c) - ord('A')) + 10 - if currentShort > 65535'u32: - raise newException(ValueError, - "Invalid IP Address. Value is out of range") - lastWasColon = false - seperatorValid = true else: - raise newException(ValueError, - "Invalid IP Address. Address contains an invalid character") - + lastError = osLastError() + when useWinVersion: + # Windows EINTR doesn't behave same as POSIX. + if lastError.int32 == WSAEWOULDBLOCK: + success = true + break + else: + if lastError.int32 == EINTR or lastError.int32 == EINPROGRESS: + success = true + break - if v4StartPos == -1: # Don't parse v4. Copy the remaining v6 stuff - if seperatorValid: # Copy remaining data - if groupCount >= 8: - raise newException(ValueError, - "Invalid IP Address. The address consists of too many groups") - result.address_v6[groupCount*2] = cast[uint8](currentShort shr 8) - result.address_v6[groupCount*2+1] = cast[uint8](currentShort and 0xFF) - groupCount.inc() - else: # Must parse IPv4 address - for i,c in address_str[v4StartPos..high(address_str)]: - if c in strutils.Digits: # Character is a number - currentShort = currentShort * 10 + cast[uint32](ord(c) - ord('0')) - if currentShort > 255'u32: - raise newException(ValueError, - "Invalid IP Address. Value is out of range") - seperatorValid = true - elif c == '.': # IPv4 address separator - if not seperatorValid or byteCount >= 3: - raise newException(ValueError, "Invalid IP Address") - result.address_v6[groupCount*2 + byteCount] = cast[uint8](currentShort) - currentShort = 0 - byteCount.inc() - seperatorValid = false - else: # Invalid character - raise newException(ValueError, - "Invalid IP Address. Address contains an invalid character") + it = it.ai_next - if byteCount != 3 or not seperatorValid: - raise newException(ValueError, "Invalid IP Address") - result.address_v6[groupCount*2 + byteCount] = cast[uint8](currentShort) - groupCount += 2 + freeAddrInfo(aiList) + if not success: raiseOSError(lastError) - # Shift and fill zeros in case of :: - if groupCount > 8: - raise newException(ValueError, - "Invalid IP Address. The address consists of too many groups") - elif groupCount < 8: # must fill - if dualColonGroup == -1: - raise newException(ValueError, - "Invalid IP Address. The address consists of too few groups") - var toFill = 8 - groupCount # The number of groups to fill - var toShift = groupCount - dualColonGroup # Nr of known groups after :: - for i in 0..2*toShift-1: # shift - result.address_v6[15-i] = result.address_v6[groupCount*2-i-1] - for i in 0..2*toFill-1: # fill with 0s - result.address_v6[dualColonGroup*2+i] = 0 - elif dualColonGroup != -1: - raise newException(ValueError, - "Invalid IP Address. The address consists of too many groups") +proc connect*(socket: Socket, address: string, port = Port(0), + timeout: int) {.tags: [ReadIOEffect, WriteIOEffect, RootEffect].} = + ## Connects to server as specified by `address` on port specified by `port`. + ## + ## The `timeout` parameter specifies the time in milliseconds to allow for + ## the connection to the server to be made. + socket.fd.setBlocking(false) -proc parseIpAddress(address_str: string): TIpAddress = - ## Parses an IP address - ## Raises EInvalidValue on error - if address_str == nil: - raise newException(ValueError, "IP Address string is nil") - if address_str.contains(':'): - return parseIPv6Address(address_str) + socket.connectAsync(address, port, socket.domain) + if timeoutWrite(socket.fd, timeout) != 1: + raise newException(TimeoutError, "Call to 'connect' timed out.") else: - return parseIPv4Address(address_str) + let res = getSockOptInt(socket.fd, SOL_SOCKET, SO_ERROR) + if res != 0: + raiseOSError(OSErrorCode(res)) + when defineSsl and not defined(nimdoc): + if socket.isSsl: + socket.fd.setBlocking(true) + # RFC3546 for SNI specifies that IP addresses are not allowed. + if not isIpAddress(address): + # Discard result in case OpenSSL version doesn't support SNI, or we're + # not using TLSv1+ + discard SSL_set_tlsext_host_name(socket.sslHandle, address) + ErrClearError() + let ret = SSL_connect(socket.sslHandle) + socketError(socket, ret) + when not defined(nimDisableCertificateValidation): + if not isIpAddress(address): + socket.checkCertName(address) + socket.fd.setBlocking(true) -proc isIpAddress(address_str: string): bool = - ## Checks if a string is an IP address - ## Returns true if it is, false otherwise +proc getPrimaryIPAddr*(dest = parseIpAddress("8.8.8.8")): IpAddress = + ## Finds the local IP address, usually assigned to eth0 on LAN or wlan0 on WiFi, + ## used to reach an external address. Useful to run local services. + ## + ## No traffic is sent. + ## + ## Supports IPv4 and v6. + ## Raises OSError if external networking is not set up. + runnableExamples("-r:off"): + echo getPrimaryIPAddr() # "192.168.1.2" + let socket = + if dest.family == IpAddressFamily.IPv4: + newSocket(AF_INET, SOCK_DGRAM, IPPROTO_UDP) + else: + newSocket(AF_INET6, SOCK_DGRAM, IPPROTO_UDP) try: - discard parseIpAddress(address_str) - except ValueError: - return false - return true + socket.connect($dest, 80.Port) + result = socket.getLocalAddr()[0].parseIpAddress() + finally: + socket.close() diff --git a/lib/pure/nimprof.nim b/lib/pure/nimprof.nim index cce2a20ae..bf8367d1d 100644 --- a/lib/pure/nimprof.nim +++ b/lib/pure/nimprof.nim @@ -1,23 +1,29 @@ # # # Nim's Runtime Library -# (c) Copyright 2012 Andreas Rumpf +# (c) Copyright 2015 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # ## Profiling support for Nim. This is an embedded profiler that requires -## ``--profiler:on``. You only need to import this module to get a profiling -## report at program exit. +## `--profiler:on`. You only need to import this module to get a profiling +## report at program exit. See `Embedded Stack Trace Profiler <estp.html>`_ +## for usage. when not defined(profiler) and not defined(memProfiler): - {.warning: "Profiling support is turned off!".} + {.error: "Profiling support is turned off! Enable profiling by passing `--profiler:on --stackTrace:on` to the compiler (see the Nim Compiler User Guide for more options).".} + +{.used.} # We don't want to profile the profiling code ... {.push profiler: off.} -import hashes, algorithm, strutils, tables, sets +import std/[hashes, algorithm, strutils, tables, sets] + +when defined(nimPreviewSlimSystem): + import std/[syncio, sysatomics] when not defined(memProfiler): include "system/timers" @@ -26,31 +32,34 @@ const withThreads = compileOption("threads") tickCountCorrection = 50_000 -when not declared(system.TStackTrace): - type TStackTrace = array [0..20, cstring] +when not declared(system.StackTrace): + type StackTrace = object + lines: array[0..20, cstring] + files: array[0..20, cstring] + proc `[]`*(st: StackTrace, i: int): cstring = st.lines[i] # We use a simple hash table of bounded size to keep track of the stack traces: type - TProfileEntry = object + ProfileEntry = object total: int - st: TStackTrace - TProfileData = array [0..64*1024-1, ptr TProfileEntry] + st: StackTrace + ProfileData = array[0..64*1024-1, ptr ProfileEntry] -proc `==`(a, b: TStackTrace): bool = - for i in 0 .. high(a): +proc `==`(a, b: StackTrace): bool = + for i in 0 .. high(a.lines): if a[i] != b[i]: return false result = true # XXX extract this data structure; it is generally useful ;-) # However a chain length of over 3000 is suspicious... var - profileData: TProfileData + profileData: ProfileData emptySlots = profileData.len * 3 div 2 maxChainLen = 0 totalCalls = 0 when not defined(memProfiler): - var interval: TNanos = 5_000_000 - tickCountCorrection # 5ms + var interval: Nanos = 5_000_000 - tickCountCorrection # 5ms proc setSamplingFrequency*(intervalInUs: int) = ## set this to change the sampling frequency. Default value is 5ms. @@ -60,17 +69,17 @@ when not defined(memProfiler): else: interval = intervalInUs * 1000 - tickCountCorrection when withThreads: - import locks + import std/locks var - profilingLock: TLock + profilingLock: Lock initLock profilingLock -proc hookAux(st: TStackTrace, costs: int) = +proc hookAux(st: StackTrace, costs: int) = # this is quite performance sensitive! when withThreads: acquire profilingLock inc totalCalls - var last = high(st) + var last = high(st.lines) while last > 0 and isNil(st[last]): dec last var h = hash(pointer(st[last])) and high(profileData) @@ -94,8 +103,8 @@ proc hookAux(st: TStackTrace, costs: int) = var chain = 0 while true: if profileData[h] == nil: - profileData[h] = cast[ptr TProfileEntry]( - allocShared0(sizeof(TProfileEntry))) + profileData[h] = cast[ptr ProfileEntry]( + allocShared0(sizeof(ProfileEntry))) profileData[h].total = costs profileData[h].st = st dec emptySlots @@ -115,53 +124,68 @@ when defined(memProfiler): var gTicker {.threadvar.}: int - proc hook(st: TStackTrace, size: int) {.nimcall.} = + proc requestedHook(): bool {.nimcall.} = if gTicker == 0: - gTicker = -1 - when defined(ignoreAllocationSize): - hookAux(st, 1) - else: - hookAux(st, size) gTicker = SamplingInterval + result = true dec gTicker + proc hook(st: StackTrace, size: int) {.nimcall.} = + when defined(ignoreAllocationSize): + hookAux(st, 1) + else: + hookAux(st, size) + else: var - t0 {.threadvar.}: TTicks - - proc hook(st: TStackTrace) {.nimcall.} = + t0 {.threadvar.}: Ticks + gTicker: int # we use an additional counter to + # avoid calling 'getTicks' too frequently + + proc requestedHook(): bool {.nimcall.} = + if interval == 0: result = true + elif gTicker == 0: + gTicker = 500 + if getTicks() - t0 > interval: + result = true + else: + dec gTicker + + proc hook(st: StackTrace) {.nimcall.} = + #echo "profiling! ", interval if interval == 0: hookAux(st, 1) - elif int64(t0) == 0 or getTicks() - t0 > interval: + else: hookAux(st, 1) t0 = getTicks() -proc getTotal(x: ptr TProfileEntry): int = +proc getTotal(x: ptr ProfileEntry): int = result = if isNil(x): 0 else: x.total -proc cmpEntries(a, b: ptr TProfileEntry): int = +proc cmpEntries(a, b: ptr ProfileEntry): int = result = b.getTotal - a.getTotal proc `//`(a, b: int): string = - result = format("$1/$2 = $3%", a, b, formatFloat(a / b * 100.0, ffDefault, 2)) + result = format("$1/$2 = $3%", a, b, formatFloat(a / b * 100.0, ffDecimal, 2)) proc writeProfile() {.noconv.} = - when declared(system.TStackTrace): + system.profilingRequestedHook = nil + when declared(system.StackTrace): system.profilerHook = nil const filename = "profile_results.txt" echo "writing " & filename & "..." var f: File if open(f, filename, fmWrite): sort(profileData, cmpEntries) - writeln(f, "total executions of each stack trace:") + writeLine(f, "total executions of each stack trace:") var entries = 0 for i in 0..high(profileData): if profileData[i] != nil: inc entries var perProc = initCountTable[string]() for i in 0..entries-1: - var dups = initSet[string]() - for ii in 0..high(TStackTrace): + var dups = initHashSet[string]() + for ii in 0..high(StackTrace.lines): let procname = profileData[i].st[ii] if isNil(procname): break let p = $procname @@ -173,13 +197,15 @@ proc writeProfile() {.noconv.} = for i in 0..min(100, entries-1): if profileData[i].total > 1: inc sum, profileData[i].total - writeln(f, "Entry: ", i+1, "/", entries, " Calls: ", + writeLine(f, "Entry: ", i+1, "/", entries, " Calls: ", profileData[i].total // totalCalls, " [sum: ", sum, "; ", sum // totalCalls, "]") - for ii in 0..high(TStackTrace): + for ii in 0..high(StackTrace.lines): let procname = profileData[i].st[ii] + let filename = profileData[i].st.files[ii] if isNil(procname): break - writeln(f, " ", procname, " ", perProc[$procname] // totalCalls) + writeLine(f, " ", $filename & ": " & $procname, " ", + perProc[$procname] // totalCalls) close(f) echo "... done" else: @@ -189,17 +215,19 @@ var disabled: int proc disableProfiling*() = - when declared(system.TStackTrace): + when declared(system.StackTrace): atomicDec disabled - system.profilerHook = nil + system.profilingRequestedHook = nil proc enableProfiling*() = - when declared(system.TStackTrace): + when declared(system.StackTrace): if atomicInc(disabled) >= 0: - system.profilerHook = hook + system.profilingRequestedHook = requestedHook -when declared(system.TStackTrace): +when declared(system.StackTrace): + import std/exitprocs + system.profilingRequestedHook = requestedHook system.profilerHook = hook - addQuitProc(writeProfile) + addExitProc(writeProfile) {.pop.} diff --git a/lib/pure/numeric.nim b/lib/pure/numeric.nim deleted file mode 100644 index 9b298c0a0..000000000 --- a/lib/pure/numeric.nim +++ /dev/null @@ -1,84 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2013 Robert Persson -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -type OneVarFunction* = proc (x: float): float - -{.deprecated: [TOneVarFunction: OneVarFunction].} - -proc brent*(xmin,xmax:float, function:OneVarFunction, tol:float,maxiter=1000): - tuple[rootx, rooty: float, success: bool]= - ## Searches `function` for a root between `xmin` and `xmax` - ## using brents method. If the function value at `xmin`and `xmax` has the - ## same sign, `rootx`/`rooty` is set too the extrema value closest to x-axis - ## and succes is set to false. - ## Otherwise there exists at least one root and success is set to true. - ## This root is searched for at most `maxiter` iterations. - ## If `tol` tolerance is reached within `maxiter` iterations - ## the root refinement stops and success=true. - - # see http://en.wikipedia.org/wiki/Brent%27s_method - var - a=xmin - b=xmax - c=a - d=1.0e308 - fa=function(a) - fb=function(b) - fc=fa - s=0.0 - fs=0.0 - mflag:bool - i=0 - tmp2:float - - if fa*fb>=0: - if abs(fa)<abs(fb): - return (a,fa,false) - else: - return (b,fb,false) - - if abs(fa)<abs(fb): - swap(fa,fb) - swap(a,b) - - while fb!=0.0 and abs(a-b)>tol: - if fa!=fc and fb!=fc: # inverse quadratic interpolation - s = a * fb * fc / (fa - fb) / (fa - fc) + b * fa * fc / (fb - fa) / (fb - fc) + c * fa * fb / (fc - fa) / (fc - fb) - else: #secant rule - s = b - fb * (b - a) / (fb - fa) - tmp2 = (3.0 * a + b) / 4.0 - if not((s > tmp2 and s < b) or (s < tmp2 and s > b)) or - (mflag and abs(s - b) >= (abs(b - c) / 2.0)) or - (not mflag and abs(s - b) >= abs(c - d) / 2.0): - s=(a+b)/2.0 - mflag=true - else: - if (mflag and (abs(b - c) < tol)) or (not mflag and (abs(c - d) < tol)): - s=(a+b)/2.0 - mflag=true - else: - mflag=false - fs = function(s) - d = c - c = b - fc = fb - if fa * fs<0.0: - b=s - fb=fs - else: - a=s - fa=fs - if abs(fa)<abs(fb): - swap(a,b) - swap(fa,fb) - inc i - if i>maxiter: - break - - return (b,fb,true) diff --git a/lib/pure/oids.nim b/lib/pure/oids.nim index 0dc8e3c15..4d6ceefd7 100644 --- a/lib/pure/oids.nim +++ b/lib/pure/oids.nim @@ -8,86 +8,92 @@ # ## Nim OID support. An OID is a global ID that consists of a timestamp, -## a unique counter and a random value. This combination should suffice to -## produce a globally distributed unique ID. This implementation was extracted -## from the Mongodb interface and it thus binary compatible with a Mongo OID. +## a unique counter and a random value. This combination should suffice to +## produce a globally distributed unique ID. ## -## This implementation calls ``math.randomize()`` for the first call of -## ``genOid``. +## This implementation calls `initRand()` for the first call of +## `genOid`. -import times, endians +import std/[hashes, times, endians, random] +from std/private/decode_helpers import handleHexChar + +when defined(nimPreviewSlimSystem): + import std/sysatomics type - Oid* = object ## an OID - time: int32 ## - fuzz: int32 ## - count: int32 ## + Oid* = object ## An OID. + time: int64 + fuzz: int32 + count: int32 + +proc `==`*(oid1: Oid, oid2: Oid): bool {.inline.} = + ## Compares two OIDs for equality. + result = (oid1.time == oid2.time) and (oid1.fuzz == oid2.fuzz) and + (oid1.count == oid2.count) -{.deprecated: [Toid: Oid].} +proc hash*(oid: Oid): Hash = + ## Generates the hash of an OID for use in hashtables. + var h: Hash = 0 + h = h !& hash(oid.time) + h = h !& hash(oid.fuzz) + h = h !& hash(oid.count) + result = !$h -proc hexbyte*(hex: char): int = - case hex - of '0'..'9': result = (ord(hex) - ord('0')) - of 'a'..'f': result = (ord(hex) - ord('a') + 10) - of 'A'..'F': result = (ord(hex) - ord('A') + 10) - else: discard +proc hexbyte*(hex: char): int {.inline.} = + result = handleHexChar(hex) proc parseOid*(str: cstring): Oid = - ## parses an OID. - var bytes = cast[cstring](addr(result.time)) + ## Parses an OID. + var bytes = cast[cstring](cast[pointer](cast[int](addr(result.time)) + 4)) var i = 0 while i < 12: bytes[i] = chr((hexbyte(str[2 * i]) shl 4) or hexbyte(str[2 * i + 1])) inc(i) -proc oidToString*(oid: Oid, str: cstring) = +proc `$`*(oid: Oid): string = + ## Converts an OID to a string. const hex = "0123456789abcdef" - # work around a compiler bug: - var str = str + + result.setLen 24 + var o = oid - var bytes = cast[cstring](addr(o)) + var bytes = cast[cstring](cast[pointer](cast[int](addr(o)) + 4)) var i = 0 while i < 12: let b = bytes[i].ord - str[2 * i] = hex[(b and 0xF0) shr 4] - str[2 * i + 1] = hex[b and 0xF] + result[2 * i] = hex[(b and 0xF0) shr 4] + result[2 * i + 1] = hex[b and 0xF] inc(i) - str[24] = '\0' -proc `$`*(oid: Oid): string = - result = newString(24) - oidToString(oid, result) +let + t = getTime().toUnix var - incr: int - fuzz: int32 + seed = initRand(t) + incr: int = seed.rand(int.high) -proc genOid*(): Oid = - ## generates a new OID. - proc rand(): cint {.importc: "rand", header: "<stdlib.h>", nodecl.} - proc gettime(dummy: ptr cint): cint {.importc: "time", header: "<time.h>".} - proc srand(seed: cint) {.importc: "srand", header: "<stdlib.h>", nodecl.} - - var t = gettime(nil) - - var i = int32(incr) - atomicInc(incr) - - if fuzz == 0: - # racy, but fine semantically: - srand(t) - fuzz = rand() - bigEndian32(addr result.time, addr(t)) +let fuzz = cast[int32](seed.rand(high(int))) + + +template genOid(result: var Oid, incr: var int, fuzz: int32) = + var time = getTime().toUnix + var i = cast[int32](atomicInc(incr)) + + bigEndian64(addr result.time, addr(time)) result.fuzz = fuzz bigEndian32(addr result.count, addr(i)) +proc genOid*(): Oid = + ## Generates a new OID. + runnableExamples: + doAssert ($genOid()).len == 24 + runnableExamples("-r:off"): + echo $genOid() # for example, "5fc7f546ddbbc84800006aaf" + genOid(result, incr, fuzz) + proc generatedTime*(oid: Oid): Time = - ## returns the generated timestamp of the OID. - var tmp: int32 + ## Returns the generated timestamp of the OID. + var tmp: int64 var dummy = oid.time - bigEndian32(addr(tmp), addr(dummy)) - result = Time(tmp) - -when isMainModule: - let xo = genOid() - echo xo.generatedTime + bigEndian64(addr(tmp), addr(dummy)) + result = fromUnix(tmp) diff --git a/lib/pure/options.nim b/lib/pure/options.nim new file mode 100644 index 000000000..b34ff72c0 --- /dev/null +++ b/lib/pure/options.nim @@ -0,0 +1,381 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2015 Nim Contributors +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +##[ +This module implements types which encapsulate an optional value. + +A value of type `Option[T]` either contains a value `x` (represented as +`some(x)`) or is empty (`none(T)`). + +This can be useful when you have a value that can be present or not. The +absence of a value is often represented by `nil`, but that is not always +available, nor is it always a good solution. + + +Basic usage +=========== + +Let's start with an example: a procedure that finds the index of a character +in a string. +]## + +runnableExamples: + proc find(haystack: string, needle: char): Option[int] = + for i, c in haystack: + if c == needle: + return some(i) + return none(int) # This line is actually optional, + # because the default is empty + + let found = "abc".find('c') + assert found.isSome and found.get() == 2 + +##[ +The `get` operation demonstrated above returns the underlying value, or +raises `UnpackDefect` if there is no value. Note that `UnpackDefect` +inherits from `system.Defect` and should therefore never be caught. +Instead, rely on checking if the option contains a value with the +`isSome <#isSome,Option[T]>`_ and `isNone <#isNone,Option[T]>`_ procs. + + +Pattern matching +================ + +.. note:: This requires the [fusion](https://github.com/nim-lang/fusion) package. + +[fusion/matching](https://nim-lang.github.io/fusion/src/fusion/matching.html) +supports pattern matching on `Option`s, with the `Some(<pattern>)` and +`None()` patterns. + + ```nim + {.experimental: "caseStmtMacros".} + + import fusion/matching + + case some(42) + of Some(@a): + assert a == 42 + of None(): + assert false + + assertMatch(some(some(none(int))), Some(Some(None()))) + ``` +]## +# xxx pending https://github.com/timotheecour/Nim/issues/376 use `runnableExamples` and `whichModule` + +when defined(nimHasEffectsOf): + {.experimental: "strictEffects".} +else: + {.pragma: effectsOf.} + +import std/typetraits + +when defined(nimPreviewSlimSystem): + import std/assertions + + +when (NimMajor, NimMinor) >= (1, 1): + type + SomePointer = ref | ptr | pointer | proc | iterator {.closure.} +else: + type + SomePointer = ref | ptr | pointer + +type + Option*[T] = object + ## An optional type that may or may not contain a value of type `T`. + ## When `T` is a a pointer type (`ptr`, `pointer`, `ref`, `proc` or `iterator {.closure.}`), + ## `none(T)` is represented as `nil`. + when T is SomePointer: + val: T + else: + val: T + has: bool + + UnpackDefect* = object of Defect + UnpackError* {.deprecated: "See corresponding Defect".} = UnpackDefect + +proc option*[T](val: sink T): Option[T] {.inline.} = + ## Can be used to convert a pointer type (`ptr`, `pointer`, `ref` or `proc`) to an option type. + ## It converts `nil` to `none(T)`. When `T` is no pointer type, this is equivalent to `some(val)`. + ## + ## **See also:** + ## * `some proc <#some,T>`_ + ## * `none proc <#none,typedesc>`_ + runnableExamples: + type + Foo = ref object + a: int + b: string + + assert option[Foo](nil).isNone + assert option(42).isSome + + when T is SomePointer: + result = Option[T](val: val) + else: + result = Option[T](has: true, val: val) + +proc some*[T](val: sink T): Option[T] {.inline.} = + ## Returns an `Option` that has the value `val`. + ## + ## **See also:** + ## * `option proc <#option,T>`_ + ## * `none proc <#none,typedesc>`_ + ## * `isSome proc <#isSome,Option[T]>`_ + runnableExamples: + let a = some("abc") + + assert a.isSome + assert a.get == "abc" + + when T is SomePointer: + assert not val.isNil + result = Option[T](val: val) + else: + result = Option[T](has: true, val: val) + +proc none*(T: typedesc): Option[T] {.inline.} = + ## Returns an `Option` for this type that has no value. + ## + ## **See also:** + ## * `option proc <#option,T>`_ + ## * `some proc <#some,T>`_ + ## * `isNone proc <#isNone,Option[T]>`_ + runnableExamples: + assert none(int).isNone + + # the default is the none type + result = Option[T]() + +proc none*[T]: Option[T] {.inline.} = + ## Alias for `none(T) <#none,typedesc>`_. + none(T) + +proc isSome*[T](self: Option[T]): bool {.inline.} = + ## Checks if an `Option` contains a value. + ## + ## **See also:** + ## * `isNone proc <#isNone,Option[T]>`_ + ## * `some proc <#some,T>`_ + runnableExamples: + assert some(42).isSome + assert not none(string).isSome + + when T is SomePointer: + not self.val.isNil + else: + self.has + +proc isNone*[T](self: Option[T]): bool {.inline.} = + ## Checks if an `Option` is empty. + ## + ## **See also:** + ## * `isSome proc <#isSome,Option[T]>`_ + ## * `none proc <#none,typedesc>`_ + runnableExamples: + assert not some(42).isNone + assert none(string).isNone + + when T is SomePointer: + self.val.isNil + else: + not self.has + +proc get*[T](self: Option[T]): lent T {.inline.} = + ## Returns the content of an `Option`. If it has no value, + ## an `UnpackDefect` exception is raised. + ## + ## **See also:** + ## * `get proc <#get,Option[T],T>`_ with a default return value + runnableExamples: + assert some(42).get == 42 + doAssertRaises(UnpackDefect): + echo none(string).get + + if self.isNone: + raise newException(UnpackDefect, "Can't obtain a value from a `none`") + result = self.val + +proc get*[T](self: Option[T], otherwise: T): T {.inline.} = + ## Returns the content of the `Option` or `otherwise` if + ## the `Option` has no value. + runnableExamples: + assert some(42).get(9999) == 42 + assert none(int).get(9999) == 9999 + + if self.isSome: + self.val + else: + otherwise + +proc get*[T](self: var Option[T]): var T {.inline.} = + ## Returns the content of the `var Option` mutably. If it has no value, + ## an `UnpackDefect` exception is raised. + runnableExamples: + var + a = some(42) + b = none(string) + inc(a.get) + assert a.get == 43 + doAssertRaises(UnpackDefect): + echo b.get + + if self.isNone: + raise newException(UnpackDefect, "Can't obtain a value from a `none`") + return self.val + +proc map*[T](self: Option[T], callback: proc (input: T)) {.inline, effectsOf: callback.} = + ## Applies a `callback` function to the value of the `Option`, if it has one. + ## + ## **See also:** + ## * `map proc <#map,Option[T],proc(T)_2>`_ for a version with a callback + ## which returns a value + runnableExamples: + var d = 0 + proc saveDouble(x: int) = + d = 2 * x + + none(int).map(saveDouble) + assert d == 0 + some(42).map(saveDouble) + assert d == 84 + + if self.isSome: + callback(self.val) + +proc map*[T, R](self: Option[T], callback: proc (input: T): R): Option[R] {.inline, effectsOf: callback.} = + ## Applies a `callback` function to the value of the `Option` and returns an + ## `Option` containing the new value. + ## + ## If the `Option` has no value, `none(R)` will be returned. + ## + ## **See also:** + ## * `map proc <#map,Option[T],proc(T)>`_ + ## * `flatMap proc <#flatMap,Option[T],proc(T)>`_ for a version with a + ## callback that returns an `Option` + runnableExamples: + proc isEven(x: int): bool = + x mod 2 == 0 + + assert some(42).map(isEven) == some(true) + assert none(int).map(isEven) == none(bool) + + if self.isSome: + some[R](callback(self.val)) + else: + none(R) + +proc flatten*[T](self: Option[Option[T]]): Option[T] {.inline.} = + ## Remove one level of structure in a nested `Option`. + ## + ## **See also:** + ## * `flatMap proc <#flatMap,Option[T],proc(T)>`_ + runnableExamples: + assert flatten(some(some(42))) == some(42) + assert flatten(none(Option[int])) == none(int) + + if self.isSome: + self.val + else: + none(T) + +proc flatMap*[T, R](self: Option[T], + callback: proc (input: T): Option[R]): Option[R] {.inline, effectsOf: callback.} = + ## Applies a `callback` function to the value of the `Option` and returns the new value. + ## + ## If the `Option` has no value, `none(R)` will be returned. + ## + ## This is similar to `map`, with the difference that the `callback` returns an + ## `Option`, not a raw value. This allows multiple procs with a + ## signature of `A -> Option[B]` to be chained together. + ## + ## See also: + ## * `flatten proc <#flatten,Option[Option[A]]>`_ + ## * `filter proc <#filter,Option[T],proc(T)>`_ + runnableExamples: + proc doublePositives(x: int): Option[int] = + if x > 0: + some(2 * x) + else: + none(int) + + assert some(42).flatMap(doublePositives) == some(84) + assert none(int).flatMap(doublePositives) == none(int) + assert some(-11).flatMap(doublePositives) == none(int) + + map(self, callback).flatten() + +proc filter*[T](self: Option[T], callback: proc (input: T): bool): Option[T] {.inline, effectsOf: callback.} = + ## Applies a `callback` to the value of the `Option`. + ## + ## If the `callback` returns `true`, the option is returned as `some`. + ## If it returns `false`, it is returned as `none`. + ## + ## **See also:** + ## * `flatMap proc <#flatMap,Option[A],proc(A)>`_ + runnableExamples: + proc isEven(x: int): bool = + x mod 2 == 0 + + assert some(42).filter(isEven) == some(42) + assert none(int).filter(isEven) == none(int) + assert some(-11).filter(isEven) == none(int) + + if self.isSome and not callback(self.val): + none(T) + else: + self + +proc `==`*[T](a, b: Option[T]): bool {.inline.} = + ## Returns `true` if both `Option`s are `none`, + ## or if they are both `some` and have equal values. + runnableExamples: + let + a = some(42) + b = none(int) + c = some(42) + d = none(int) + + assert a == c + assert b == d + assert not (a == b) + + when T is SomePointer: + a.val == b.val + else: + (a.isSome and b.isSome and a.val == b.val) or (a.isNone and b.isNone) + +proc `$`*[T](self: Option[T]): string = + ## Get the string representation of the `Option`. + runnableExamples: + assert $some(42) == "some(42)" + assert $none(int) == "none(int)" + + if self.isSome: + when defined(nimLagacyOptionsDollar): + result = "Some(" + else: + result = "some(" + result.addQuoted self.val + result.add ")" + else: + when defined(nimLagacyOptionsDollar): + result = "None[" & name(T) & "]" + else: + result = "none(" & name(T) & ")" + +proc unsafeGet*[T](self: Option[T]): lent T {.inline.}= + ## Returns the value of a `some`. The behavior is undefined for `none`. + ## + ## **Note:** Use this only when you are **absolutely sure** the value is present + ## (e.g. after checking with `isSome <#isSome,Option[T]>`_). + ## Generally, using the `get proc <#get,Option[T]>`_ is preferred. + assert self.isSome + result = self.val diff --git a/lib/pure/os.nim b/lib/pure/os.nim index f53abe81d..78ebb1c88 100644 --- a/lib/pure/os.nim +++ b/lib/pure/os.nim @@ -8,1759 +8,589 @@ # ## This module contains basic operating system facilities like -## retrieving environment variables, reading command line arguments, -## working with directories, running shell commands, etc. -{.deadCodeElim: on.} +## retrieving environment variables, working with directories, +## running shell commands, etc. -{.push debugger: off.} +## .. importdoc:: symlinks.nim, appdirs.nim, dirs.nim, ospaths2.nim -include "system/inclrtl" +runnableExamples: + let myFile = "/path/to/my/file.nim" + assert splitPath(myFile) == (head: "/path/to/my", tail: "file.nim") + when defined(posix): + assert parentDir(myFile) == "/path/to/my" + assert splitFile(myFile) == (dir: "/path/to/my", name: "file", ext: ".nim") + assert myFile.changeFileExt("c") == "/path/to/my/file.c" + +## **See also:** +## * `paths <paths.html>`_ and `files <files.html>`_ modules for high-level file manipulation +## * `osproc module <osproc.html>`_ for process communication beyond +## `execShellCmd proc`_ +## * `uri module <uri.html>`_ +## * `distros module <distros.html>`_ +## * `dynlib module <dynlib.html>`_ +## * `streams module <streams.html>`_ +import std/private/ospaths2 +export ospaths2 + +import std/private/osfiles +export osfiles + +import std/private/osdirs +export osdirs + +import std/private/ossymlinks +export ossymlinks + +import std/private/osappdirs +export osappdirs -import - strutils, times +import std/private/oscommon -when defined(windows): - import winlean +include system/inclrtl +import std/private/since + +import std/cmdline +export cmdline + +import std/[strutils, pathnorm] + +when defined(nimPreviewSlimSystem): + import std/[syncio, assertions, widestrs] + +const weirdTarget = defined(nimscript) or defined(js) + +since (1, 1): + const + invalidFilenameChars* = {'/', '\\', ':', '*', '?', '"', '<', '>', '|', '^', '\0'} ## \ + ## Characters that may produce invalid filenames across Linux, Windows and Mac. + ## You can check if your filename contains any of these chars and strip them for safety. + ## Mac bans ``':'``, Linux bans ``'/'``, Windows bans all others. + invalidFilenames* = [ + "CON", "PRN", "AUX", "NUL", + "COM0", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9", + "LPT0", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9"] ## \ + ## Filenames that may be invalid across Linux, Windows, Mac, etc. + ## You can check if your filename match these and rename it for safety + ## (Currently all invalid filenames are from Windows only). + +when weirdTarget: + discard +elif defined(windows): + import std/[winlean, times] elif defined(posix): - import posix + import std/[posix, times] + + proc toTime(ts: Timespec): times.Time {.inline.} = + result = initTime(ts.tv_sec.int64, ts.tv_nsec.int) else: {.error: "OS module not ported to your operating system!".} -include "system/ansi_c" +when weirdTarget: + {.pragma: noWeirdTarget, error: "this proc is not available on the NimScript/js target".} +else: + {.pragma: noWeirdTarget.} -type - ReadEnvEffect* = object of ReadIOEffect ## effect that denotes a read - ## from an environment variable - WriteEnvEffect* = object of WriteIOEffect ## effect that denotes a write - ## to an environment variable - - ReadDirEffect* = object of ReadIOEffect ## effect that denotes a write - ## operation to the directory structure - WriteDirEffect* = object of WriteIOEffect ## effect that denotes a write operation to - ## the directory structure - - OSErrorCode* = distinct int32 ## Specifies an OS Error Code. - -{.deprecated: [FReadEnv: ReadEnvEffect, FWriteEnv: WriteEnvEffect, - FReadDir: ReadDirEffect, - FWriteDir: WriteDirEffect, - TOSErrorCode: OSErrorCode -].} -const - doslike = defined(windows) or defined(OS2) or defined(DOS) - # DOS-like filesystem +when defined(nimscript): + # for procs already defined in scriptconfig.nim + template noNimJs(body): untyped = discard +elif defined(js): + {.pragma: noNimJs, error: "this proc is not available on the js target".} +else: + {.pragma: noNimJs.} -when defined(Nimdoc): # only for proper documentation: - const - CurDir* = '.' - ## The constant string used by the operating system to refer to the - ## current directory. - ## - ## For example: '.' for POSIX or ':' for the classic Macintosh. - - ParDir* = ".." - ## The constant string used by the operating system to refer to the - ## parent directory. - ## - ## For example: ".." for POSIX or "::" for the classic Macintosh. - - DirSep* = '/' - ## The character used by the operating system to separate pathname - ## components, for example, '/' for POSIX or ':' for the classic - ## Macintosh. - - AltSep* = '/' - ## An alternative character used by the operating system to separate - ## pathname components, or the same as `DirSep` if only one separator - ## character exists. This is set to '/' on Windows systems where `DirSep` - ## is a backslash. - - PathSep* = ':' - ## The character conventionally used by the operating system to separate - ## search patch components (as in PATH), such as ':' for POSIX or ';' for - ## Windows. - - FileSystemCaseSensitive* = true - ## True if the file system is case sensitive, false otherwise. Used by - ## `cmpPaths` to compare filenames properly. - - ExeExt* = "" - ## The file extension of native executables. For example: - ## "" for POSIX, "exe" on Windows. - - ScriptExt* = "" - ## The file extension of a script file. For example: "" for POSIX, - ## "bat" on Windows. - - DynlibFormat* = "lib$1.so" - ## The format string to turn a filename into a `DLL`:idx: file (also - ## called `shared object`:idx: on some operating systems). - -elif defined(macos): - const - CurDir* = ':' - ParDir* = "::" - DirSep* = ':' - AltSep* = Dirsep - PathSep* = ',' - FileSystemCaseSensitive* = false - ExeExt* = "" - ScriptExt* = "" - DynlibFormat* = "$1.dylib" - - # MacOS paths - # =========== - # MacOS directory separator is a colon ":" which is the only character not - # allowed in filenames. - # - # A path containing no colon or which begins with a colon is a partial path. - # E.g. ":kalle:petter" ":kalle" "kalle" - # - # All other paths are full (absolute) paths. E.g. "HD:kalle:" "HD:" - # When generating paths, one is safe if one ensures that all partial paths - # begin with a colon, and all full paths end with a colon. - # In full paths the first name (e g HD above) is the name of a mounted - # volume. - # These names are not unique, because, for instance, two diskettes with the - # same names could be inserted. This means that paths on MacOS are not - # waterproof. In case of equal names the first volume found will do. - # Two colons "::" are the relative path to the parent. Three is to the - # grandparent etc. -elif doslike: - const - CurDir* = '.' - ParDir* = ".." - DirSep* = '\\' # seperator within paths - AltSep* = '/' - PathSep* = ';' # seperator between paths - FileSystemCaseSensitive* = false - ExeExt* = "exe" - ScriptExt* = "bat" - DynlibFormat* = "$1.dll" -elif defined(PalmOS) or defined(MorphOS): - const - DirSep* = '/' - AltSep* = Dirsep - PathSep* = ';' - ParDir* = ".." - FileSystemCaseSensitive* = false - ExeExt* = "" - ScriptExt* = "" - DynlibFormat* = "$1.prc" -elif defined(RISCOS): - const - DirSep* = '.' - AltSep* = '.' - ParDir* = ".." # is this correct? - PathSep* = ',' - FileSystemCaseSensitive* = true - ExeExt* = "" - ScriptExt* = "" - DynlibFormat* = "lib$1.so" -else: # UNIX-like operating system - const - CurDir* = '.' - ParDir* = ".." - DirSep* = '/' - AltSep* = DirSep - PathSep* = ':' - FileSystemCaseSensitive* = true - ExeExt* = "" - ScriptExt* = "" - DynlibFormat* = when defined(macosx): "lib$1.dylib" else: "lib$1.so" - -when defined(posix): - when NoFakeVars: - const pathMax = 5000 # doesn't matter really. The concept of PATH_MAX - # doesn't work anymore on modern OSes. - else: - var - pathMax {.importc: "PATH_MAX", header: "<stdlib.h>".}: cint -const - ExtSep* = '.' - ## The character which separates the base filename from the extension; - ## for example, the '.' in ``os.nim``. - -proc osErrorMsg*(): string {.rtl, extern: "nos$1", deprecated.} = - ## Retrieves the operating system's error flag, ``errno``. - ## On Windows ``GetLastError`` is checked before ``errno``. - ## Returns "" if no error occurred. - ## - ## **Deprecated since version 0.9.4**: use the other ``osErrorMsg`` proc. +import std/oserrors +export oserrors +import std/envvars +export envvars - result = "" - when defined(Windows): - var err = getLastError() - if err != 0'i32: - when useWinUnicode: - var msgbuf: WideCString - if formatMessageW(0x00000100 or 0x00001000 or 0x00000200, - nil, err, 0, addr(msgbuf), 0, nil) != 0'i32: - result = $msgbuf - if msgbuf != nil: localFree(cast[pointer](msgbuf)) - else: - var msgbuf: cstring - if formatMessageA(0x00000100 or 0x00001000 or 0x00000200, - nil, err, 0, addr(msgbuf), 0, nil) != 0'i32: - result = $msgbuf - if msgbuf != nil: localFree(msgbuf) - if errno != 0'i32: - result = $os.strerror(errno) - -{.push warning[deprecated]: off.} -proc raiseOSError*(msg: string = "") {.noinline, rtl, extern: "nos$1", - deprecated.} = - ## raises an OSError exception with the given message ``msg``. - ## If ``msg == ""``, the operating system's error flag - ## (``errno``) is converted to a readable error message. On Windows - ## ``GetLastError`` is checked before ``errno``. - ## If no error flag is set, the message ``unknown OS error`` is used. - ## - ## **Deprecated since version 0.9.4**: use the other ``raiseOSError`` proc. - if len(msg) == 0: - var m = osErrorMsg() - raise newException(OSError, if m.len > 0: m else: "unknown OS error") - else: - raise newException(OSError, msg) -{.pop.} +import std/private/osseps +export osseps -when not defined(nimfix): - {.deprecated: [osError: raiseOSError].} -proc `==`*(err1, err2: OSErrorCode): bool {.borrow.} -proc `$`*(err: OSErrorCode): string {.borrow.} -proc osErrorMsg*(errorCode: OSErrorCode): string = - ## Converts an OS error code into a human readable string. +proc expandTilde*(path: string): string {. + tags: [ReadEnvEffect, ReadIOEffect].} = + ## Expands ``~`` or a path starting with ``~/`` to a full path, replacing + ## ``~`` with `getHomeDir()`_ (otherwise returns ``path`` unmodified). ## - ## The error code can be retrieved using the ``osLastError`` proc. + ## Windows: this is still supported despite the Windows platform not having this + ## convention; also, both ``~/`` and ``~\`` are handled. ## - ## If conversion fails, or ``errorCode`` is ``0`` then ``""`` will be - ## returned. + ## See also: + ## * `getHomeDir proc`_ + ## * `getConfigDir proc`_ + ## * `getTempDir proc`_ + ## * `getCurrentDir proc`_ + ## * `setCurrentDir proc`_ + runnableExamples: + assert expandTilde("~" / "appname.cfg") == getHomeDir() / "appname.cfg" + assert expandTilde("~/foo/bar") == getHomeDir() / "foo/bar" + assert expandTilde("/foo/bar") == "/foo/bar" + + if len(path) == 0 or path[0] != '~': + result = path + elif len(path) == 1: + result = getHomeDir() + elif (path[1] in {DirSep, AltSep}): + result = getHomeDir() / path.substr(2) + else: + # TODO: handle `~bob` and `~bob/` which means home of bob + result = path + +proc quoteShellWindows*(s: string): string {.noSideEffect, rtl, extern: "nosp$1".} = + ## Quote `s`, so it can be safely passed to Windows API. ## - ## On Windows, the ``-d:useWinAnsi`` compilation flag can be used to - ## make this procedure use the non-unicode Win API calls to retrieve the - ## message. + ## Based on Python's `subprocess.list2cmdline`. + ## See `this link <http://msdn.microsoft.com/en-us/library/17w5ykft.aspx>`_ + ## for more details. + let needQuote = {' ', '\t'} in s or s.len == 0 result = "" - when defined(Windows): - if errorCode != OSErrorCode(0'i32): - when useWinUnicode: - var msgbuf: WideCString - if formatMessageW(0x00000100 or 0x00001000 or 0x00000200, - nil, errorCode.int32, 0, addr(msgbuf), 0, nil) != 0'i32: - result = $msgbuf - if msgbuf != nil: localFree(cast[pointer](msgbuf)) - else: - var msgbuf: cstring - if formatMessageA(0x00000100 or 0x00001000 or 0x00000200, - nil, errorCode.int32, 0, addr(msgbuf), 0, nil) != 0'i32: - result = $msgbuf - if msgbuf != nil: localFree(msgbuf) + var backslashBuff = "" + if needQuote: + result.add("\"") + + for c in s: + if c == '\\': + backslashBuff.add(c) + elif c == '\"': + for i in 0..<backslashBuff.len*2: + result.add('\\') + backslashBuff.setLen(0) + result.add("\\\"") + else: + if backslashBuff.len != 0: + result.add(backslashBuff) + backslashBuff.setLen(0) + result.add(c) + + if backslashBuff.len > 0: + result.add(backslashBuff) + if needQuote: + result.add(backslashBuff) + result.add("\"") + + +proc quoteShellPosix*(s: string): string {.noSideEffect, rtl, extern: "nosp$1".} = + ## Quote ``s``, so it can be safely passed to POSIX shell. + const safeUnixChars = {'%', '+', '-', '.', '/', '_', ':', '=', '@', + '0'..'9', 'A'..'Z', 'a'..'z'} + if s.len == 0: + result = "''" + elif s.allCharsInSet(safeUnixChars): + result = s else: - if errorCode != OSErrorCode(0'i32): - result = $os.strerror(errorCode.int32) + result = "'" & s.replace("'", "'\"'\"'") & "'" -proc raiseOSError*(errorCode: OSErrorCode) = - ## Raises an ``OSError`` exception. The ``errorCode`` will determine the - ## message, ``osErrorMsg`` will be used to get this message. - ## - ## The error code can be retrieved using the ``osLastError`` proc. - ## - ## If the error code is ``0`` or an error message could not be retrieved, - ## the message ``unknown OS error`` will be used. - var e: ref OSError; new(e) - e.errorCode = errorCode.int32 - e.msg = osErrorMsg(errorCode) - if e.msg == "": - e.msg = "unknown OS error" - raise e - -{.push stackTrace:off.} -proc osLastError*(): OSErrorCode = - ## Retrieves the last operating system error code. - ## - ## This procedure is useful in the event when an OS call fails. In that case - ## this procedure will return the error code describing the reason why the - ## OS call failed. The ``OSErrorMsg`` procedure can then be used to convert - ## this code into a string. - ## - ## **Warning**: - ## The behaviour of this procedure varies between Windows and POSIX systems. - ## On Windows some OS calls can reset the error code to ``0`` causing this - ## procedure to return ``0``. It is therefore advised to call this procedure - ## immediately after an OS call fails. On POSIX systems this is not a problem. +when defined(windows) or defined(posix) or defined(nintendoswitch): + proc quoteShell*(s: string): string {.noSideEffect, rtl, extern: "nosp$1".} = + ## Quote ``s``, so it can be safely passed to shell. + ## + ## When on Windows, it calls `quoteShellWindows proc`_. + ## Otherwise, calls `quoteShellPosix proc`_. + when defined(windows): + result = quoteShellWindows(s) + else: + result = quoteShellPosix(s) - when defined(windows): - result = OSErrorCode(getLastError()) - else: - result = OSErrorCode(errno) -{.pop.} + proc quoteShellCommand*(args: openArray[string]): string = + ## Concatenates and quotes shell arguments `args`. + runnableExamples: + when defined(posix): + assert quoteShellCommand(["aaa", "", "c d"]) == "aaa '' 'c d'" + when defined(windows): + assert quoteShellCommand(["aaa", "", "c d"]) == "aaa \"\" \"c d\"" -proc unixToNativePath*(path: string, drive=""): string {. - noSideEffect, rtl, extern: "nos$1".} = - ## Converts an UNIX-like path to a native one. - ## - ## On an UNIX system this does nothing. Else it converts - ## '/', '.', '..' to the appropriate things. - ## - ## On systems with a concept of "drives", `drive` is used to determine - ## which drive label to use during absolute path conversion. - ## `drive` defaults to the drive of the current working directory, and is - ## ignored on systems that do not have a concept of "drives". + # can't use `map` pending https://github.com/nim-lang/Nim/issues/8303 + result = "" + for i in 0..<args.len: + if i > 0: result.add " " + result.add quoteShell(args[i]) - when defined(unix): - result = path - else: - var start: int - if path[0] == '/': - # an absolute path - when doslike: - if drive != "": - result = drive & ":" & DirSep - else: - result = $DirSep - elif defined(macos): - result = "" # must not start with ':' - else: - result = $DirSep - start = 1 - elif path[0] == '.' and path[1] == '/': - # current directory - result = $CurDir - start = 2 - else: - result = "" - start = 0 - - var i = start - while i < len(path): # ../../../ --> :::: - if path[i] == '.' and path[i+1] == '.' and path[i+2] == '/': - # parent directory - when defined(macos): - if result[high(result)] == ':': - add result, ':' - else: - add result, ParDir - else: - add result, ParDir & DirSep - inc(i, 3) - elif path[i] == '/': - add result, DirSep - inc(i) - else: - add result, path[i] - inc(i) - -when defined(windows): - when useWinUnicode: - template wrapUnary(varname, winApiProc, arg: expr) {.immediate.} = - var varname = winApiProc(newWideCString(arg)) - - template wrapBinary(varname, winApiProc, arg, arg2: expr) {.immediate.} = - var varname = winApiProc(newWideCString(arg), arg2) - proc findFirstFile(a: string, b: var TWIN32_FIND_DATA): THandle = - result = findFirstFileW(newWideCString(a), b) - template findNextFile(a, b: expr): expr = findNextFileW(a, b) - template getCommandLine(): expr = getCommandLineW() - - template getFilename(f: expr): expr = - $cast[WideCString](addr(f.cFilename[0])) - else: - template findFirstFile(a, b: expr): expr = findFirstFileA(a, b) - template findNextFile(a, b: expr): expr = findNextFileA(a, b) - template getCommandLine(): expr = getCommandLineA() +when not weirdTarget: + proc c_system(cmd: cstring): cint {. + importc: "system", header: "<stdlib.h>".} - template getFilename(f: expr): expr = $f.cFilename + when not defined(windows): + proc c_free(p: pointer) {. + importc: "free", header: "<stdlib.h>".} - proc skipFindData(f: TWIN32_FIND_DATA): bool {.inline.} = - # Note - takes advantage of null delimiter in the cstring - const dot = ord('.') - result = f.cFileName[0].int == dot and (f.cFileName[1].int == 0 or - f.cFileName[1].int == dot and f.cFileName[2].int == 0) -proc existsFile*(filename: string): bool {.rtl, extern: "nos$1", - tags: [ReadDirEffect].} = - ## Returns true if the file exists, false otherwise. - when defined(windows): - when useWinUnicode: - wrapUnary(a, getFileAttributesW, filename) - else: - var a = getFileAttributesA(filename) - if a != -1'i32: - result = (a and FILE_ATTRIBUTE_DIRECTORY) == 0'i32 - else: - var res: TStat - return stat(filename, res) >= 0'i32 and S_ISREG(res.st_mode) +const + ExeExts* = ## Platform specific file extension for executables. + ## On Windows ``["exe", "cmd", "bat"]``, on Posix ``[""]``. + when defined(windows): ["exe", "cmd", "bat"] else: [""] -proc existsDir*(dir: string): bool {.rtl, extern: "nos$1", tags: [ReadDirEffect].} = - ## Returns true iff the directory `dir` exists. If `dir` is a file, false - ## is returned. - when defined(windows): - when useWinUnicode: - wrapUnary(a, getFileAttributesW, dir) - else: - var a = getFileAttributesA(dir) - if a != -1'i32: - result = (a and FILE_ATTRIBUTE_DIRECTORY) != 0'i32 +proc findExe*(exe: string, followSymlinks: bool = true; + extensions: openArray[string]=ExeExts): string {. + tags: [ReadDirEffect, ReadEnvEffect, ReadIOEffect], noNimJs.} = + ## Searches for `exe` in the current working directory and then + ## in directories listed in the ``PATH`` environment variable. + ## + ## Returns `""` if the `exe` cannot be found. `exe` + ## is added the `ExeExts`_ file extensions if it has none. + ## + ## If the system supports symlinks it also resolves them until it + ## meets the actual file. This behavior can be disabled if desired + ## by setting `followSymlinks = false`. + + if exe.len == 0: return + template checkCurrentDir() = + for ext in extensions: + result = addFileExt(exe, ext) + if fileExists(result): return + when defined(posix): + if '/' in exe: checkCurrentDir() else: - var res: TStat - return stat(dir, res) >= 0'i32 and S_ISDIR(res.st_mode) - -proc symlinkExists*(link: string): bool {.rtl, extern: "nos$1", - tags: [ReadDirEffect].} = - ## Returns true iff the symlink `link` exists. Will return true - ## regardless of whether the link points to a directory or file. - when defined(windows): - when useWinUnicode: - wrapUnary(a, getFileAttributesW, link) + checkCurrentDir() + let path = getEnv("PATH") + for candidate in split(path, PathSep): + if candidate.len == 0: continue + when defined(windows): + var x = (if candidate[0] == '"' and candidate[^1] == '"': + substr(candidate, 1, candidate.len-2) else: candidate) / + exe else: - var a = getFileAttributesA(link) - if a != -1'i32: - result = (a and FILE_ATTRIBUTE_REPARSE_POINT) != 0'i32 - else: - var res: TStat - return lstat(link, res) >= 0'i32 and S_ISLNK(res.st_mode) - -proc fileExists*(filename: string): bool {.inline.} = - ## Synonym for existsFile - existsFile(filename) + var x = expandTilde(candidate) / exe + for ext in extensions: + var x = addFileExt(x, ext) + if fileExists(x): + when not (defined(windows) or defined(nintendoswitch)): + while followSymlinks: # doubles as if here + if x.symlinkExists: + var r = newString(maxSymlinkLen) + var len = readlink(x.cstring, r.cstring, maxSymlinkLen) + if len < 0: + raiseOSError(osLastError(), exe) + if len > maxSymlinkLen: + r = newString(len+1) + len = readlink(x.cstring, r.cstring, len) + setLen(r, len) + if isAbsolute(r): + x = r + else: + x = parentDir(x) / r + else: + break + return x + result = "" -proc dirExists*(dir: string): bool {.inline.} = - ## Synonym for existsDir - existsDir(dir) +when weirdTarget: + const times = "fake const" + template Time(x: untyped): untyped = string -proc getLastModificationTime*(file: string): Time {.rtl, extern: "nos$1".} = +proc getLastModificationTime*(file: string): times.Time {.rtl, extern: "nos$1", noWeirdTarget.} = ## Returns the `file`'s last modification time. + ## + ## See also: + ## * `getLastAccessTime proc`_ + ## * `getCreationTime proc`_ + ## * `fileNewer proc`_ when defined(posix): - var res: TStat - if stat(file, res) < 0'i32: raiseOSError(osLastError()) - return res.st_mtime + var res: Stat + if stat(file, res) < 0'i32: raiseOSError(osLastError(), file) + result = res.st_mtim.toTime else: - var f: TWIN32_FIND_DATA + var f: WIN32_FIND_DATA var h = findFirstFile(file, f) - if h == -1'i32: raiseOSError(osLastError()) - result = winTimeToUnixTime(rdFileTime(f.ftLastWriteTime)) + if h == -1'i32: raiseOSError(osLastError(), file) + result = fromWinTime(rdFileTime(f.ftLastWriteTime)) findClose(h) -proc getLastAccessTime*(file: string): Time {.rtl, extern: "nos$1".} = +proc getLastAccessTime*(file: string): times.Time {.rtl, extern: "nos$1", noWeirdTarget.} = ## Returns the `file`'s last read or write access time. + ## + ## See also: + ## * `getLastModificationTime proc`_ + ## * `getCreationTime proc`_ + ## * `fileNewer proc`_ when defined(posix): - var res: TStat - if stat(file, res) < 0'i32: raiseOSError(osLastError()) - return res.st_atime + var res: Stat + if stat(file, res) < 0'i32: raiseOSError(osLastError(), file) + result = res.st_atim.toTime else: - var f: TWIN32_FIND_DATA + var f: WIN32_FIND_DATA var h = findFirstFile(file, f) - if h == -1'i32: raiseOSError(osLastError()) - result = winTimeToUnixTime(rdFileTime(f.ftLastAccessTime)) + if h == -1'i32: raiseOSError(osLastError(), file) + result = fromWinTime(rdFileTime(f.ftLastAccessTime)) findClose(h) -proc getCreationTime*(file: string): Time {.rtl, extern: "nos$1".} = +proc getCreationTime*(file: string): times.Time {.rtl, extern: "nos$1", noWeirdTarget.} = ## Returns the `file`'s creation time. - ## Note that under posix OS's, the returned time may actually be the time at - ## which the file's attribute's were last modified. + ## + ## **Note:** Under POSIX OS's, the returned time may actually be the time at + ## which the file's attribute's were last modified. See + ## `here <https://github.com/nim-lang/Nim/issues/1058>`_ for details. + ## + ## See also: + ## * `getLastModificationTime proc`_ + ## * `getLastAccessTime proc`_ + ## * `fileNewer proc`_ when defined(posix): - var res: TStat - if stat(file, res) < 0'i32: raiseOSError(osLastError()) - return res.st_ctime + var res: Stat + if stat(file, res) < 0'i32: raiseOSError(osLastError(), file) + result = res.st_ctim.toTime else: - var f: TWIN32_FIND_DATA + var f: WIN32_FIND_DATA var h = findFirstFile(file, f) - if h == -1'i32: raiseOSError(osLastError()) - result = winTimeToUnixTime(rdFileTime(f.ftCreationTime)) + if h == -1'i32: raiseOSError(osLastError(), file) + result = fromWinTime(rdFileTime(f.ftCreationTime)) findClose(h) -proc fileNewer*(a, b: string): bool {.rtl, extern: "nos$1".} = +proc fileNewer*(a, b: string): bool {.rtl, extern: "nos$1", noWeirdTarget.} = ## Returns true if the file `a` is newer than file `b`, i.e. if `a`'s ## modification time is later than `b`'s. - when defined(posix): - result = getLastModificationTime(a) - getLastModificationTime(b) >= 0 - # Posix's resolution sucks so, we use '>=' for posix. - else: - result = getLastModificationTime(a) - getLastModificationTime(b) > 0 - -proc getCurrentDir*(): string {.rtl, extern: "nos$1", tags: [].} = - ## Returns the `current working directory`:idx:. - const bufsize = 512 # should be enough - when defined(windows): - when useWinUnicode: - var res = newWideCString("", bufsize) - var L = getCurrentDirectoryW(bufsize, res) - if L == 0'i32: raiseOSError(osLastError()) - result = res$L - else: - result = newString(bufsize) - var L = getCurrentDirectoryA(bufsize, result) - if L == 0'i32: raiseOSError(osLastError()) - setLen(result, L) - else: - result = newString(bufsize) - if getcwd(result, bufsize) != nil: - setLen(result, c_strlen(result)) - else: - raiseOSError(osLastError()) - -proc setCurrentDir*(newDir: string) {.inline, tags: [].} = - ## Sets the `current working directory`:idx:; `OSError` is raised if - ## `newDir` cannot been set. - when defined(Windows): - when useWinUnicode: - if setCurrentDirectoryW(newWideCString(newDir)) == 0'i32: - raiseOSError(osLastError()) - else: - if setCurrentDirectoryA(newDir) == 0'i32: raiseOSError(osLastError()) - else: - if chdir(newDir) != 0'i32: raiseOSError(osLastError()) - -proc joinPath*(head, tail: string): string {. - noSideEffect, rtl, extern: "nos$1".} = - ## Joins two directory names to one. - ## - ## For example on Unix: - ## - ## .. code-block:: nim - ## joinPath("usr", "lib") - ## - ## results in: - ## - ## .. code-block:: nim - ## "usr/lib" ## - ## If head is the empty string, tail is returned. If tail is the empty - ## string, head is returned with a trailing path separator. If tail starts - ## with a path separator it will be removed when concatenated to head. Other - ## path separators not located on boundaries won't be modified. More - ## examples on Unix: - ## - ## .. code-block:: nim - ## assert joinPath("usr", "") == "usr/" - ## assert joinPath("", "lib") == "lib" - ## assert joinPath("", "/lib") == "/lib" - ## assert joinPath("usr/", "/lib") == "usr/lib" - if len(head) == 0: - result = tail - elif head[len(head)-1] in {DirSep, AltSep}: - if tail[0] in {DirSep, AltSep}: - result = head & substr(tail, 1) - else: - result = head & tail - else: - if tail[0] in {DirSep, AltSep}: - result = head & tail + ## See also: + ## * `getLastModificationTime proc`_ + ## * `getLastAccessTime proc`_ + ## * `getCreationTime proc`_ + when defined(posix): + # If we don't have access to nanosecond resolution, use '>=' + when not StatHasNanoseconds: + result = getLastModificationTime(a) >= getLastModificationTime(b) else: - result = head & DirSep & tail - -proc joinPath*(parts: varargs[string]): string {.noSideEffect, - rtl, extern: "nos$1OpenArray".} = - ## The same as `joinPath(head, tail)`, but works with any number of directory - ## parts. You need to pass at least one element or the proc will assert in - ## debug builds and crash on release builds. - result = parts[0] - for i in 1..high(parts): - result = joinPath(result, parts[i]) - -proc `/` * (head, tail: string): string {.noSideEffect.} = - ## The same as ``joinPath(head, tail)`` - ## - ## Here are some examples for Unix: - ## - ## .. code-block:: nim - ## assert "usr" / "" == "usr/" - ## assert "" / "lib" == "lib" - ## assert "" / "/lib" == "/lib" - ## assert "usr/" / "/lib" == "usr/lib" - return joinPath(head, tail) - -proc splitPath*(path: string): tuple[head, tail: string] {. - noSideEffect, rtl, extern: "nos$1".} = - ## Splits a directory into (head, tail), so that - ## ``head / tail == path`` (except for edge cases like "/usr"). - ## - ## Examples: - ## - ## .. code-block:: nim - ## splitPath("usr/local/bin") -> ("usr/local", "bin") - ## splitPath("usr/local/bin/") -> ("usr/local/bin", "") - ## splitPath("bin") -> ("", "bin") - ## splitPath("/bin") -> ("", "bin") - ## splitPath("") -> ("", "") - var sepPos = -1 - for i in countdown(len(path)-1, 0): - if path[i] in {DirSep, AltSep}: - sepPos = i - break - if sepPos >= 0: - result.head = substr(path, 0, sepPos-1) - result.tail = substr(path, sepPos+1) + result = getLastModificationTime(a) > getLastModificationTime(b) else: - result.head = "" - result.tail = path - -proc parentDirPos(path: string): int = - var q = 1 - if len(path) >= 1 and path[len(path)-1] in {DirSep, AltSep}: q = 2 - for i in countdown(len(path)-q, 0): - if path[i] in {DirSep, AltSep}: return i - result = -1 - -proc parentDir*(path: string): string {. - noSideEffect, rtl, extern: "nos$1".} = - ## Returns the parent directory of `path`. - ## - ## This is often the same as the ``head`` result of ``splitPath``. - ## If there is no parent, "" is returned. - ## | Example: ``parentDir("/usr/local/bin") == "/usr/local"``. - ## | Example: ``parentDir("/usr/local/bin/") == "/usr/local"``. - let sepPos = parentDirPos(path) - if sepPos >= 0: - result = substr(path, 0, sepPos-1) - else: - result = "" + result = getLastModificationTime(a) > getLastModificationTime(b) -proc isRootDir*(path: string): bool {. - noSideEffect, rtl, extern: "nos$1".} = - ## Checks whether a given `path` is a root directory - result = parentDirPos(path) < 0 -iterator parentDirs*(path: string, fromRoot=false, inclusive=true): string = - ## Walks over all parent directories of a given `path` - ## - ## If `fromRoot` is set, the traversal will start from the file system root - ## diretory. If `inclusive` is set, the original argument will be included - ## in the traversal. - ## - ## Relative paths won't be expanded by this proc. Instead, it will traverse - ## only the directories appearing in the relative path. - if not fromRoot: - var current = path - if inclusive: yield path - while true: - if current.isRootDir: break - current = current.parentDir - yield current - else: - for i in countup(0, path.len - 2): # ignore the last / - # deal with non-normalized paths such as /foo//bar//baz - if path[i] in {DirSep, AltSep} and - (i == 0 or path[i-1] notin {DirSep, AltSep}): - yield path.substr(0, i) - - if inclusive: yield path - -proc `/../` * (head, tail: string): string {.noSideEffect.} = - ## The same as ``parentDir(head) / tail`` unless there is no parent directory. - ## Then ``head / tail`` is performed instead. - let sepPos = parentDirPos(head) - if sepPos >= 0: - result = substr(head, 0, sepPos-1) / tail - else: - result = head / tail - -proc normExt(ext: string): string = - if ext == "" or ext[0] == ExtSep: result = ext # no copy needed here - else: result = ExtSep & ext - -proc searchExtPos(s: string): int = - # BUGFIX: do not search until 0! .DS_Store is no file extension! - result = -1 - for i in countdown(len(s)-1, 1): - if s[i] == ExtSep: - result = i - break - elif s[i] in {DirSep, AltSep}: - break # do not skip over path - -proc splitFile*(path: string): tuple[dir, name, ext: string] {. - noSideEffect, rtl, extern: "nos$1".} = - ## Splits a filename into (dir, filename, extension). - ## `dir` does not end in `DirSep`. - ## `extension` includes the leading dot. - ## - ## Example: - ## - ## .. code-block:: nim - ## var (dir, name, ext) = splitFile("usr/local/nimc.html") - ## assert dir == "usr/local" - ## assert name == "nimc" - ## assert ext == ".html" - ## - ## If `path` has no extension, `ext` is the empty string. - ## If `path` has no directory component, `dir` is the empty string. - ## If `path` has no filename component, `name` and `ext` are empty strings. - if path.len == 0 or path[path.len-1] in {DirSep, AltSep}: - result = (path, "", "") - else: - var sepPos = -1 - var dotPos = path.len - for i in countdown(len(path)-1, 0): - if path[i] == ExtSep: - if dotPos == path.len and i > 0 and - path[i-1] notin {DirSep, AltSep}: dotPos = i - elif path[i] in {DirSep, AltSep}: - sepPos = i - break - result.dir = substr(path, 0, sepPos-1) - result.name = substr(path, sepPos+1, dotPos-1) - result.ext = substr(path, dotPos) - -proc extractFilename*(path: string): string {. - noSideEffect, rtl, extern: "nos$1".} = - ## Extracts the filename of a given `path`. This is the same as - ## ``name & ext`` from ``splitFile(path)``. - if path.len == 0 or path[path.len-1] in {DirSep, AltSep}: - result = "" - else: - result = splitPath(path).tail - -proc expandFilename*(filename: string): string {.rtl, extern: "nos$1", - tags: [ReadDirEffect].} = - ## Returns the full path of `filename`, raises OSError in case of an error. +proc isAdmin*: bool {.noWeirdTarget.} = + ## Returns whether the caller's process is a member of the Administrators local + ## group (on Windows) or a root (on POSIX), via `geteuid() == 0`. when defined(windows): - const bufsize = 3072'i32 - when useWinUnicode: - var unused: WideCString - var res = newWideCString("", bufsize div 2) - var L = getFullPathNameW(newWideCString(filename), bufsize, res, unused) - if L <= 0'i32 or L >= bufsize: - raiseOSError(osLastError()) - result = res$L - else: - var unused: cstring - result = newString(bufsize) - var L = getFullPathNameA(filename, bufsize, result, unused) - if L <= 0'i32 or L >= bufsize: raiseOSError(osLastError()) - setLen(result, L) - else: - # careful, realpath needs to take an allocated buffer according to Posix: - result = newString(pathMax) - var r = realpath(filename, result) - if r.isNil: raiseOSError(osLastError()) - setLen(result, c_strlen(result)) - -proc changeFileExt*(filename, ext: string): string {. - noSideEffect, rtl, extern: "nos$1".} = - ## Changes the file extension to `ext`. - ## - ## If the `filename` has no extension, `ext` will be added. - ## If `ext` == "" then any extension is removed. - ## `Ext` should be given without the leading '.', because some - ## filesystems may use a different character. (Although I know - ## of none such beast.) - var extPos = searchExtPos(filename) - if extPos < 0: result = filename & normExt(ext) - else: result = substr(filename, 0, extPos-1) & normExt(ext) - -proc addFileExt*(filename, ext: string): string {. - noSideEffect, rtl, extern: "nos$1".} = - ## Adds the file extension `ext` to `filename`, unless - ## `filename` already has an extension. - ## - ## `Ext` should be given without the leading '.', because some - ## filesystems may use a different character. - ## (Although I know of none such beast.) - var extPos = searchExtPos(filename) - if extPos < 0: result = filename & normExt(ext) - else: result = filename - -proc cmpPaths*(pathA, pathB: string): int {. - noSideEffect, rtl, extern: "nos$1".} = - ## Compares two paths. - ## - ## On a case-sensitive filesystem this is done - ## case-sensitively otherwise case-insensitively. Returns: - ## - ## | 0 iff pathA == pathB - ## | < 0 iff pathA < pathB - ## | > 0 iff pathA > pathB - if FileSystemCaseSensitive: - result = cmp(pathA, pathB) - else: - result = cmpIgnoreCase(pathA, pathB) + # Rewrite of the example from Microsoft Docs: + # https://docs.microsoft.com/en-us/windows/win32/api/securitybaseapi/nf-securitybaseapi-checktokenmembership#examples + # and corresponding PostgreSQL function: + # https://doxygen.postgresql.org/win32security_8c.html#ae6b61e106fa5d6c5d077a9d14ee80569 + var ntAuthority = SID_IDENTIFIER_AUTHORITY(value: SECURITY_NT_AUTHORITY) + var administratorsGroup: PSID + if not isSuccess(allocateAndInitializeSid(addr ntAuthority, + BYTE(2), + SECURITY_BUILTIN_DOMAIN_RID, + DOMAIN_ALIAS_RID_ADMINS, + 0, 0, 0, 0, 0, 0, + addr administratorsGroup)): + raiseOSError(osLastError(), "could not get SID for Administrators group") -proc isAbsolute*(path: string): bool {.rtl, noSideEffect, extern: "nos$1".} = - ## Checks whether a given `path` is absolute. - ## - ## On Windows, network paths are considered absolute too. - when doslike: - var len = len(path) - result = (len > 1 and path[0] in {'/', '\\'}) or - (len > 2 and path[0] in Letters and path[1] == ':') - elif defined(macos): - result = path.len > 0 and path[0] != ':' - elif defined(RISCOS): - result = path[0] == '$' - elif defined(posix): - result = path[0] == '/' - -when defined(Windows): - proc openHandle(path: string, followSymlink=true): THandle = - var flags = FILE_FLAG_BACKUP_SEMANTICS or FILE_ATTRIBUTE_NORMAL - if not followSymlink: - flags = flags or FILE_FLAG_OPEN_REPARSE_POINT - - when useWinUnicode: - result = createFileW( - newWideCString(path), 0'i32, - FILE_SHARE_DELETE or FILE_SHARE_READ or FILE_SHARE_WRITE, - nil, OPEN_EXISTING, flags, 0 - ) - else: - result = createFileA( - path, 0'i32, - FILE_SHARE_DELETE or FILE_SHARE_READ or FILE_SHARE_WRITE, - nil, OPEN_EXISTING, flags, 0 - ) - -proc sameFile*(path1, path2: string): bool {.rtl, extern: "nos$1", - tags: [ReadDirEffect].} = - ## Returns True if both pathname arguments refer to the same physical - ## file or directory. Raises an exception if any of the files does not - ## exist or information about it can not be obtained. - ## - ## This proc will return true if given two alternative hard-linked or - ## sym-linked paths to the same file or directory. - when defined(Windows): - var success = true - var f1 = openHandle(path1) - var f2 = openHandle(path2) - - var lastErr: OSErrorCode - if f1 != INVALID_HANDLE_VALUE and f2 != INVALID_HANDLE_VALUE: - var fi1, fi2: TBY_HANDLE_FILE_INFORMATION - - if getFileInformationByHandle(f1, addr(fi1)) != 0 and - getFileInformationByHandle(f2, addr(fi2)) != 0: - result = fi1.dwVolumeSerialNumber == fi2.dwVolumeSerialNumber and - fi1.nFileIndexHigh == fi2.nFileIndexHigh and - fi1.nFileIndexLow == fi2.nFileIndexLow - else: - lastErr = osLastError() - success = false - else: - lastErr = osLastError() - success = false - - discard closeHandle(f1) - discard closeHandle(f2) - - if not success: raiseOSError(lastErr) - else: - var a, b: TStat - if stat(path1, a) < 0'i32 or stat(path2, b) < 0'i32: - raiseOSError(osLastError()) - else: - result = a.st_dev == b.st_dev and a.st_ino == b.st_ino - -proc sameFileContent*(path1, path2: string): bool {.rtl, extern: "nos$1", - tags: [ReadIOEffect].} = - ## Returns True if both pathname arguments refer to files with identical - ## binary content. - const - bufSize = 8192 # 8K buffer - var - a, b: File - if not open(a, path1): return false - if not open(b, path2): - close(a) - return false - var bufA = alloc(bufSize) - var bufB = alloc(bufSize) - while true: - var readA = readBuffer(a, bufA, bufSize) - var readB = readBuffer(b, bufB, bufSize) - if readA != readB: - result = false - break - if readA == 0: - result = true - break - result = equalMem(bufA, bufB, readA) - if not result: break - if readA != bufSize: break # end of file - dealloc(bufA) - dealloc(bufB) - close(a) - close(b) + try: + var b: WINBOOL + if not isSuccess(checkTokenMembership(0, administratorsGroup, addr b)): + raiseOSError(osLastError(), "could not check access token membership") -type - FilePermission* = enum ## file access permission; modelled after UNIX - fpUserExec, ## execute access for the file owner - fpUserWrite, ## write access for the file owner - fpUserRead, ## read access for the file owner - fpGroupExec, ## execute access for the group - fpGroupWrite, ## write access for the group - fpGroupRead, ## read access for the group - fpOthersExec, ## execute access for others - fpOthersWrite, ## write access for others - fpOthersRead ## read access for others - -{.deprecated: [TFilePermission: FilePermission].} - -proc getFilePermissions*(filename: string): set[FilePermission] {. - rtl, extern: "nos$1", tags: [ReadDirEffect].} = - ## retrieves file permissions for `filename`. `OSError` is raised in case of - ## an error. On Windows, only the ``readonly`` flag is checked, every other - ## permission is available in any case. - when defined(posix): - var a: TStat - if stat(filename, a) < 0'i32: raiseOSError(osLastError()) - result = {} - if (a.st_mode and S_IRUSR) != 0'i32: result.incl(fpUserRead) - if (a.st_mode and S_IWUSR) != 0'i32: result.incl(fpUserWrite) - if (a.st_mode and S_IXUSR) != 0'i32: result.incl(fpUserExec) - - if (a.st_mode and S_IRGRP) != 0'i32: result.incl(fpGroupRead) - if (a.st_mode and S_IWGRP) != 0'i32: result.incl(fpGroupWrite) - if (a.st_mode and S_IXGRP) != 0'i32: result.incl(fpGroupExec) - - if (a.st_mode and S_IROTH) != 0'i32: result.incl(fpOthersRead) - if (a.st_mode and S_IWOTH) != 0'i32: result.incl(fpOthersWrite) - if (a.st_mode and S_IXOTH) != 0'i32: result.incl(fpOthersExec) - else: - when useWinUnicode: - wrapUnary(res, getFileAttributesW, filename) - else: - var res = getFileAttributesA(filename) - if res == -1'i32: raiseOSError(osLastError()) - if (res and FILE_ATTRIBUTE_READONLY) != 0'i32: - result = {fpUserExec, fpUserRead, fpGroupExec, fpGroupRead, - fpOthersExec, fpOthersRead} - else: - result = {fpUserExec..fpOthersRead} - -proc setFilePermissions*(filename: string, permissions: set[FilePermission]) {. - rtl, extern: "nos$1", tags: [WriteDirEffect].} = - ## sets the file permissions for `filename`. `OSError` is raised in case of - ## an error. On Windows, only the ``readonly`` flag is changed, depending on - ## ``fpUserWrite``. - when defined(posix): - var p = 0'i32 - if fpUserRead in permissions: p = p or S_IRUSR - if fpUserWrite in permissions: p = p or S_IWUSR - if fpUserExec in permissions: p = p or S_IXUSR - - if fpGroupRead in permissions: p = p or S_IRGRP - if fpGroupWrite in permissions: p = p or S_IWGRP - if fpGroupExec in permissions: p = p or S_IXGRP - - if fpOthersRead in permissions: p = p or S_IROTH - if fpOthersWrite in permissions: p = p or S_IWOTH - if fpOthersExec in permissions: p = p or S_IXOTH - - if chmod(filename, p) != 0: raiseOSError(osLastError()) - else: - when useWinUnicode: - wrapUnary(res, getFileAttributesW, filename) - else: - var res = getFileAttributesA(filename) - if res == -1'i32: raiseOSError(osLastError()) - if fpUserWrite in permissions: - res = res and not FILE_ATTRIBUTE_READONLY - else: - res = res or FILE_ATTRIBUTE_READONLY - when useWinUnicode: - wrapBinary(res2, setFileAttributesW, filename, res) - else: - var res2 = setFileAttributesA(filename, res) - if res2 == - 1'i32: raiseOSError(osLastError()) + result = isSuccess(b) + finally: + if freeSid(administratorsGroup) != nil: + raiseOSError(osLastError(), "failed to free SID for Administrators group") -proc copyFile*(source, dest: string) {.rtl, extern: "nos$1", - tags: [ReadIOEffect, WriteIOEffect].} = - ## Copies a file from `source` to `dest`. - ## - ## If this fails, `OSError` is raised. On the Windows platform this proc will - ## copy the source file's attributes into dest. On other platforms you need - ## to use `getFilePermissions() <#getFilePermissions>`_ and - ## `setFilePermissions() <#setFilePermissions>`_ to copy them by hand (or use - ## the convenience `copyFileWithPermissions() <#copyFileWithPermissions>`_ - ## proc), otherwise `dest` will inherit the default permissions of a newly - ## created file for the user. If `dest` already exists, the file attributes - ## will be preserved and the content overwritten. - when defined(Windows): - when useWinUnicode: - let s = newWideCString(source) - let d = newWideCString(dest) - if copyFileW(s, d, 0'i32) == 0'i32: raiseOSError(osLastError()) - else: - if copyFileA(source, dest, 0'i32) == 0'i32: raiseOSError(osLastError()) - else: - # generic version of copyFile which works for any platform: - const bufSize = 8000 # better for memory manager - var d, s: File - if not open(s, source): raiseOSError(osLastError()) - if not open(d, dest, fmWrite): - close(s) - raiseOSError(osLastError()) - var buf = alloc(bufSize) - while true: - var bytesread = readBuffer(s, buf, bufSize) - if bytesread > 0: - var byteswritten = writeBuffer(d, buf, bytesread) - if bytesread != byteswritten: - dealloc(buf) - close(s) - close(d) - raiseOSError(osLastError()) - if bytesread != bufSize: break - dealloc(buf) - close(s) - close(d) - -proc moveFile*(source, dest: string) {.rtl, extern: "nos$1", - tags: [ReadIOEffect, WriteIOEffect].} = - ## Moves a file from `source` to `dest`. If this fails, `OSError` is raised. - when defined(Windows): - when useWinUnicode: - let s = newWideCString(source) - let d = newWideCString(dest) - if moveFileW(s, d, 0'i32) == 0'i32: raiseOSError(osLastError()) - else: - if moveFileA(source, dest, 0'i32) == 0'i32: raiseOSError(osLastError()) else: - if c_rename(source, dest) != 0'i32: - raise newException(OSError, $strerror(errno)) + result = geteuid() == 0 -when not declared(ENOENT) and not defined(Windows): - when NoFakeVars: - const ENOENT = cint(2) # 2 on most systems including Solaris - else: - var ENOENT {.importc, header: "<errno.h>".}: cint -when defined(Windows): - when useWinUnicode: - template deleteFile(file: expr): expr {.immediate.} = deleteFileW(file) - template setFileAttributes(file, attrs: expr): expr {.immediate.} = - setFileAttributesW(file, attrs) - else: - template deleteFile(file: expr): expr {.immediate.} = deleteFileA(file) - template setFileAttributes(file, attrs: expr): expr {.immediate.} = - setFileAttributesA(file, attrs) - -proc removeFile*(file: string) {.rtl, extern: "nos$1", tags: [WriteDirEffect].} = - ## Removes the `file`. If this fails, `OSError` is raised. This does not fail - ## if the file never existed in the first place. - ## On Windows, ignores the read-only attribute. - when defined(Windows): - when useWinUnicode: - let f = newWideCString(file) +proc exitStatusLikeShell*(status: cint): cint = + ## Converts exit code from `c_system` into a shell exit code. + when defined(posix) and not weirdTarget: + if WIFSIGNALED(status): + # like the shell! + 128 + WTERMSIG(status) else: - let f = file - if deleteFile(f) == 0: - if getLastError() == ERROR_ACCESS_DENIED: - if setFileAttributes(f, FILE_ATTRIBUTE_NORMAL) == 0: - raiseOSError(osLastError()) - if deleteFile(f) == 0: - raiseOSError(osLastError()) + WEXITSTATUS(status) else: - if c_remove(file) != 0'i32 and errno != ENOENT: - raise newException(OSError, $strerror(errno)) + status proc execShellCmd*(command: string): int {.rtl, extern: "nos$1", - tags: [ExecIOEffect].} = + tags: [ExecIOEffect], noWeirdTarget.} = ## Executes a `shell command`:idx:. ## ## Command has the form 'program args' where args are the command ## line arguments given to program. The proc returns the error code - ## of the shell when it has finished. The proc does not return until - ## the process has finished. To execute a program without having a - ## shell involved, use the `execProcess` proc of the `osproc` - ## module. - when defined(linux): - result = c_system(command) shr 8 - else: - result = c_system(command) - -# Environment handling cannot be put into RTL, because the ``envPairs`` -# iterator depends on ``environment``. - -var - envComputed {.threadvar.}: bool - environment {.threadvar.}: seq[string] - -when defined(windows): - # because we support Windows GUI applications, things get really - # messy here... - when useWinUnicode: - when defined(cpp): - proc strEnd(cstr: WideCString, c = 0'i32): WideCString {. - importcpp: "(NI16*)wcschr((const wchar_t *)#, #)", header: "<string.h>".} - else: - proc strEnd(cstr: WideCString, c = 0'i32): WideCString {. - importc: "wcschr", header: "<string.h>".} - else: - proc strEnd(cstr: cstring, c = 0'i32): cstring {. - importc: "strchr", header: "<string.h>".} - - proc getEnvVarsC() = - if not envComputed: - environment = @[] - when useWinUnicode: - var - env = getEnvironmentStringsW() - e = env - if e == nil: return # an error occurred - while true: - var eend = strEnd(e) - add(environment, $e) - e = cast[WideCString](cast[ByteAddress](eend)+2) - if eend[1].int == 0: break - discard freeEnvironmentStringsW(env) - else: - var - env = getEnvironmentStringsA() - e = env - if e == nil: return # an error occurred - while true: - var eend = strEnd(e) - add(environment, $e) - e = cast[cstring](cast[ByteAddress](eend)+1) - if eend[1] == '\0': break - discard freeEnvironmentStringsA(env) - envComputed = true - -else: - const - useNSGetEnviron = defined(macosx) - - when useNSGetEnviron: - # From the manual: - # Shared libraries and bundles don't have direct access to environ, - # which is only available to the loader ld(1) when a complete program - # is being linked. - # The environment routines can still be used, but if direct access to - # environ is needed, the _NSGetEnviron() routine, defined in - # <crt_externs.h>, can be used to retrieve the address of environ - # at runtime. - proc NSGetEnviron(): ptr cstringArray {. - importc: "_NSGetEnviron", header: "<crt_externs.h>".} - else: - var gEnv {.importc: "environ".}: cstringArray - - proc getEnvVarsC() = - # retrieves the variables of char** env of C's main proc - if not envComputed: - environment = @[] - when useNSGetEnviron: - var gEnv = NSGetEnviron()[] - var i = 0 - while true: - if gEnv[i] == nil: break - add environment, $gEnv[i] - inc(i) - envComputed = true - -proc findEnvVar(key: string): int = - getEnvVarsC() - var temp = key & '=' - for i in 0..high(environment): - if startsWith(environment[i], temp): return i - return -1 - -proc getEnv*(key: string): TaintedString {.tags: [ReadEnvEffect].} = - ## Returns the value of the `environment variable`:idx: named `key`. + ## of the shell when it has finished (zero if there is no error). + ## The proc does not return until the process has finished. ## - ## If the variable does not exist, "" is returned. To distinguish - ## whether a variable exists or it's value is just "", call - ## `existsEnv(key)`. - var i = findEnvVar(key) - if i >= 0: - return TaintedString(substr(environment[i], find(environment[i], '=')+1)) - else: - var env = c_getenv(key) - if env == nil: return TaintedString("") - result = TaintedString($env) - -proc existsEnv*(key: string): bool {.tags: [ReadEnvEffect].} = - ## Checks whether the environment variable named `key` exists. - ## Returns true if it exists, false otherwise. - if c_getenv(key) != nil: return true - else: return findEnvVar(key) >= 0 - -proc putEnv*(key, val: string) {.tags: [WriteEnvEffect].} = - ## Sets the value of the `environment variable`:idx: named `key` to `val`. - ## If an error occurs, `EInvalidEnvVar` is raised. - - # Note: by storing the string in the environment sequence, - # we guarantee that we don't free the memory before the program - # ends (this is needed for POSIX compliance). It is also needed so that - # the process itself may access its modified environment variables! - var indx = findEnvVar(key) - if indx >= 0: - environment[indx] = key & '=' & val - else: - add environment, (key & '=' & val) - indx = high(environment) - when defined(unix): - if c_putenv(environment[indx]) != 0'i32: - raiseOSError(osLastError()) - else: - when useWinUnicode: - var k = newWideCString(key) - var v = newWideCString(val) - if setEnvironmentVariableW(k, v) == 0'i32: raiseOSError(osLastError()) - else: - if setEnvironmentVariableA(key, val) == 0'i32: raiseOSError(osLastError()) - -iterator envPairs*(): tuple[key, value: TaintedString] {.tags: [ReadEnvEffect].} = - ## Iterate over all `environments variables`:idx:. In the first component - ## of the tuple is the name of the current variable stored, in the second - ## its value. - getEnvVarsC() - for i in 0..high(environment): - var p = find(environment[i], '=') - yield (TaintedString(substr(environment[i], 0, p-1)), - TaintedString(substr(environment[i], p+1))) - -iterator walkFiles*(pattern: string): string {.tags: [ReadDirEffect].} = - ## Iterate over all the files that match the `pattern`. On POSIX this uses - ## the `glob`:idx: call. + ## To execute a program without having a shell involved, use `osproc.execProcess proc + ## <osproc.html#execProcess,string,string,openArray[string],StringTableRef,set[ProcessOption]>`_. ## - ## `pattern` is OS dependent, but at least the "\*.ext" - ## notation is supported. - when defined(windows): - var - f: TWIN32_FIND_DATA - res: int - res = findFirstFile(pattern, f) - if res != -1: - while true: - if not skipFindData(f) and - (f.dwFileAttributes and FILE_ATTRIBUTE_DIRECTORY) == 0'i32: - yield splitFile(pattern).dir / extractFilename(getFilename(f)) - if findNextFile(res, f) == 0'i32: break - findClose(res) - else: # here we use glob - var - f: TGlob - res: int - f.gl_offs = 0 - f.gl_pathc = 0 - f.gl_pathv = nil - res = glob(pattern, 0, nil, addr(f)) - if res == 0: - for i in 0.. f.gl_pathc - 1: - assert(f.gl_pathv[i] != nil) - yield $f.gl_pathv[i] - globfree(addr(f)) + ## **Examples:** + ## ```Nim + ## discard execShellCmd("ls -la") + ## ``` + result = exitStatusLikeShell(c_system(command)) -type - PathComponent* = enum ## Enumeration specifying a path component. - pcFile, ## path refers to a file - pcLinkToFile, ## path refers to a symbolic link to a file - pcDir, ## path refers to a directory - pcLinkToDir ## path refers to a symbolic link to a directory - -{.deprecated: [TPathComponent: PathComponent].} - -iterator walkDir*(dir: string): tuple[kind: PathComponent, path: string] {. - tags: [ReadDirEffect].} = - ## walks over the directory `dir` and yields for each directory or file in - ## `dir`. The component type and full path for each item is returned. - ## Walking is not recursive. - ## Example: This directory structure:: - ## dirA / dirB / fileB1.txt - ## / dirC - ## / fileA1.txt - ## / fileA2.txt - ## - ## and this code: - ## - ## .. code-block:: Nim - ## for kind, path in walkDir("dirA"): - ## echo(path) - ## - ## produces this output (but not necessarily in this order!):: - ## dirA/dirB - ## dirA/dirC - ## dirA/fileA1.txt - ## dirA/fileA2.txt - when defined(windows): - var f: TWIN32_FIND_DATA - var h = findFirstFile(dir / "*", f) - if h != -1: - while true: - var k = pcFile - if not skipFindData(f): - if (f.dwFileAttributes and FILE_ATTRIBUTE_DIRECTORY) != 0'i32: - k = pcDir - if (f.dwFileAttributes and FILE_ATTRIBUTE_REPARSE_POINT) != 0'i32: - k = succ(k) - yield (k, dir / extractFilename(getFilename(f))) - if findNextFile(h, f) == 0'i32: break - findClose(h) - else: - var d = opendir(dir) - if d != nil: - while true: - var x = readdir(d) - if x == nil: break - var y = $x.d_name - if y != "." and y != "..": - var s: TStat - y = dir / y - var k = pcFile - - when defined(linux) or defined(macosx) or defined(bsd): - if x.d_type != DT_UNKNOWN: - if x.d_type == DT_DIR: k = pcDir - if x.d_type == DT_LNK: k = succ(k) - yield (k, y) - continue - - if lstat(y, s) < 0'i32: break - if S_ISDIR(s.st_mode): k = pcDir - if S_ISLNK(s.st_mode): k = succ(k) - yield (k, y) - discard closedir(d) - -iterator walkDirRec*(dir: string, filter={pcFile, pcDir}): string {. - tags: [ReadDirEffect].} = - ## walks over the directory `dir` and yields for each file in `dir`. The - ## full path for each file is returned. - ## **Warning**: - ## Modifying the directory structure while the iterator - ## is traversing may result in undefined behavior! - ## - ## Walking is recursive. `filter` controls the behaviour of the iterator: - ## - ## --------------------- --------------------------------------------- - ## filter meaning - ## --------------------- --------------------------------------------- - ## ``pcFile`` yield real files - ## ``pcLinkToFile`` yield symbolic links to files - ## ``pcDir`` follow real directories - ## ``pcLinkToDir`` follow symbolic links to directories - ## --------------------- --------------------------------------------- +proc expandFilename*(filename: string): string {.rtl, extern: "nos$1", + tags: [ReadDirEffect], noWeirdTarget.} = + ## Returns the full (`absolute`:idx:) path of an existing file `filename`. ## - var stack = @[dir] - while stack.len > 0: - for k,p in walkDir(stack.pop()): - if k in filter: - case k - of pcFile, pcLinkToFile: yield p - of pcDir, pcLinkToDir: stack.add(p) - -proc rawRemoveDir(dir: string) = + ## Raises `OSError` in case of an error. Follows symlinks. when defined(windows): - when useWinUnicode: - wrapUnary(res, removeDirectoryW, dir) - else: - var res = removeDirectoryA(dir) - let lastError = osLastError() - if res == 0'i32 and lastError.int32 != 3'i32 and - lastError.int32 != 18'i32 and lastError.int32 != 2'i32: - raiseOSError(lastError) - else: - if rmdir(dir) != 0'i32 and errno != ENOENT: raiseOSError(osLastError()) - -proc removeDir*(dir: string) {.rtl, extern: "nos$1", tags: [ - WriteDirEffect, ReadDirEffect], benign.} = - ## Removes the directory `dir` including all subdirectories and files - ## in `dir` (recursively). - ## - ## If this fails, `OSError` is raised. This does not fail if the directory never - ## existed in the first place. - for kind, path in walkDir(dir): - case kind - of pcFile, pcLinkToFile, pcLinkToDir: removeFile(path) - of pcDir: removeDir(path) - rawRemoveDir(dir) - -proc rawCreateDir(dir: string) = - when defined(solaris): - if mkdir(dir, 0o777) != 0'i32 and errno != EEXIST and errno != ENOSYS: - raiseOSError(osLastError()) - elif defined(unix): - if mkdir(dir, 0o777) != 0'i32 and errno != EEXIST: - raiseOSError(osLastError()) + var bufsize = MAX_PATH.int32 + var unused: WideCString = nil + var res = newWideCString(bufsize) + while true: + var L = getFullPathNameW(newWideCString(filename), bufsize, res, unused) + if L == 0'i32: + raiseOSError(osLastError(), filename) + elif L > bufsize: + res = newWideCString(L) + bufsize = L + else: + result = res$L + break + # getFullPathName doesn't do case corrections, so we have to use this convoluted + # way of retrieving the true filename + for x in walkFiles(result): + result = x + if not fileExists(result) and not dirExists(result): + # consider using: `raiseOSError(osLastError(), result)` + raise newException(OSError, "file '" & result & "' does not exist") else: - when useWinUnicode: - wrapUnary(res, createDirectoryW, dir) + # according to Posix we don't need to allocate space for result pathname. + # But we need to free return value with free(3). + var r = realpath(filename, nil) + if r.isNil: + raiseOSError(osLastError(), filename) else: - var res = createDirectoryA(dir) - if res == 0'i32 and getLastError() != 183'i32: - raiseOSError(osLastError()) - -proc createDir*(dir: string) {.rtl, extern: "nos$1", tags: [WriteDirEffect].} = - ## Creates the `directory`:idx: `dir`. - ## - ## The directory may contain several subdirectories that do not exist yet. - ## The full path is created. If this fails, `OSError` is raised. It does **not** - ## fail if the path already exists because for most usages this does not - ## indicate an error. - var omitNext = false - when doslike: - omitNext = isAbsolute(dir) - for i in 1.. dir.len-1: - if dir[i] in {DirSep, AltSep}: - if omitNext: - omitNext = false - else: - rawCreateDir(substr(dir, 0, i-1)) - rawCreateDir(dir) + result = $r + c_free(cast[pointer](r)) -proc copyDir*(source, dest: string) {.rtl, extern: "nos$1", - tags: [WriteIOEffect, ReadIOEffect], benign.} = - ## Copies a directory from `source` to `dest`. +proc getCurrentCompilerExe*(): string {.compileTime.} = discard + ## Returns the path of the currently running Nim compiler or nimble executable. ## - ## If this fails, `OSError` is raised. On the Windows platform this proc will - ## copy the attributes from `source` into `dest`. On other platforms created - ## files and directories will inherit the default permissions of a newly - ## created file/directory for the user. To preserve attributes recursively on - ## these platforms use `copyDirWithPermissions() <#copyDirWithPermissions>`_. - createDir(dest) - for kind, path in walkDir(source): - var noSource = path.substr(source.len()+1) - case kind - of pcFile: - copyFile(path, dest / noSource) - of pcDir: - copyDir(path, dest / noSource) - else: discard - -proc createSymlink*(src, dest: string) = - ## Create a symbolic link at `dest` which points to the item specified - ## by `src`. On most operating systems, will fail if a lonk - ## - ## **Warning**: - ## Some OS's (such as Microsoft Windows) restrict the creation - ## of symlinks to root users (administrators). - when defined(Windows): - let flag = dirExists(src).int32 - when useWinUnicode: - var wSrc = newWideCString(src) - var wDst = newWideCString(dest) - if createSymbolicLinkW(wDst, wSrc, flag) == 0 or getLastError() != 0: - raiseOSError(osLastError()) - else: - if createSymbolicLinkA(dest, src, flag) == 0 or getLastError() != 0: - raiseOSError(osLastError()) - else: - if symlink(src, dest) != 0: - raiseOSError(osLastError()) + ## Can be used to retrieve the currently executing + ## Nim compiler from a Nim or nimscript program, or the nimble binary + ## inside a nimble program (likewise with other binaries built from + ## compiler API). -proc createHardlink*(src, dest: string) = +proc createHardlink*(src, dest: string) {.noWeirdTarget.} = ## Create a hard link at `dest` which points to the item specified ## by `src`. ## - ## **Warning**: Most OS's restrict the creation of hard links to - ## root users (administrators) . - when defined(Windows): - when useWinUnicode: - var wSrc = newWideCString(src) - var wDst = newWideCString(dest) - if createHardLinkW(wDst, wSrc, nil) == 0: - raiseOSError(osLastError()) - else: - if createHardLinkA(dest, src, nil) == 0: - raiseOSError(osLastError()) + ## .. warning:: Some OS's restrict the creation of hard links to + ## root users (administrators). + ## + ## See also: + ## * `createSymlink proc`_ + when defined(windows): + var wSrc = newWideCString(src) + var wDst = newWideCString(dest) + if createHardLinkW(wDst, wSrc, nil) == 0: + raiseOSError(osLastError(), $(src, dest)) else: if link(src, dest) != 0: - raiseOSError(osLastError()) - -proc parseCmdLine*(c: string): seq[string] {. - noSideEffect, rtl, extern: "nos$1".} = - ## Splits a command line into several components; - ## This proc is only occasionally useful, better use the `parseopt` module. - ## - ## On Windows, it uses the following parsing rules - ## (see http://msdn.microsoft.com/en-us/library/17w5ykft.aspx ): - ## - ## * Arguments are delimited by white space, which is either a space or a tab. - ## * The caret character (^) is not recognized as an escape character or - ## delimiter. The character is handled completely by the command-line parser - ## in the operating system before being passed to the argv array in the - ## program. - ## * A string surrounded by double quotation marks ("string") is interpreted - ## as a single argument, regardless of white space contained within. A - ## quoted string can be embedded in an argument. - ## * A double quotation mark preceded by a backslash (\") is interpreted as a - ## literal double quotation mark character ("). - ## * Backslashes are interpreted literally, unless they immediately precede - ## a double quotation mark. - ## * If an even number of backslashes is followed by a double quotation mark, - ## one backslash is placed in the argv array for every pair of backslashes, - ## and the double quotation mark is interpreted as a string delimiter. - ## * If an odd number of backslashes is followed by a double quotation mark, - ## one backslash is placed in the argv array for every pair of backslashes, - ## and the double quotation mark is "escaped" by the remaining backslash, - ## causing a literal double quotation mark (") to be placed in argv. - ## - ## On Posix systems, it uses the following parsing rules: - ## Components are separated by whitespace unless the whitespace - ## occurs within ``"`` or ``'`` quotes. - result = @[] - var i = 0 - var a = "" - while true: - setLen(a, 0) - # eat all delimiting whitespace - while c[i] == ' ' or c[i] == '\t' or c [i] == '\l' or c [i] == '\r' : inc(i) - when defined(windows): - # parse a single argument according to the above rules: - if c[i] == '\0': break - var inQuote = false - while true: - case c[i] - of '\0': break - of '\\': - var j = i - while c[j] == '\\': inc(j) - if c[j] == '"': - for k in 1..(j-i) div 2: a.add('\\') - if (j-i) mod 2 == 0: - i = j - else: - a.add('"') - i = j+1 - else: - a.add(c[i]) - inc(i) - of '"': - inc(i) - if not inQuote: inQuote = true - elif c[i] == '"': - a.add(c[i]) - inc(i) - else: - inQuote = false - break - of ' ', '\t': - if not inQuote: break - a.add(c[i]) - inc(i) - else: - a.add(c[i]) - inc(i) - else: - case c[i] - of '\'', '\"': - var delim = c[i] - inc(i) # skip ' or " - while c[i] != '\0' and c[i] != delim: - add a, c[i] - inc(i) - if c[i] != '\0': inc(i) - of '\0': break - else: - while c[i] > ' ': - add(a, c[i]) - inc(i) - add(result, a) - -proc copyFileWithPermissions*(source, dest: string, - ignorePermissionErrors = true) = - ## Copies a file from `source` to `dest` preserving file permissions. - ## - ## This is a wrapper proc around `copyFile() <#copyFile>`_, - ## `getFilePermissions() <#getFilePermissions>`_ and `setFilePermissions() - ## <#setFilePermissions>`_ on non Windows platform. On Windows this proc is - ## just a wrapper for `copyFile() <#copyFile>`_ since that proc already - ## copies attributes. - ## - ## On non Windows systems permissions are copied after the file itself has - ## been copied, which won't happen atomically and could lead to a race - ## condition. If `ignorePermissionErrors` is true, errors while - ## reading/setting file attributes will be ignored, otherwise will raise - ## `OSError`. - copyFile(source, dest) - when not defined(Windows): - try: - setFilePermissions(dest, getFilePermissions(source)) - except: - if not ignorePermissionErrors: - raise - -proc copyDirWithPermissions*(source, dest: string, - ignorePermissionErrors = true) {.rtl, extern: "nos$1", - tags: [WriteIOEffect, ReadIOEffect], benign.} = - ## Copies a directory from `source` to `dest` preserving file permissions. - ## - ## If this fails, `OSError` is raised. This is a wrapper proc around `copyDir() - ## <#copyDir>`_ and `copyFileWithPermissions() <#copyFileWithPermissions>`_ - ## on non Windows platforms. On Windows this proc is just a wrapper for - ## `copyDir() <#copyDir>`_ since that proc already copies attributes. - ## - ## On non Windows systems permissions are copied after the file or directory - ## itself has been copied, which won't happen atomically and could lead to a - ## race condition. If `ignorePermissionErrors` is true, errors while - ## reading/setting file attributes will be ignored, otherwise will raise - ## `OSError`. - createDir(dest) - when not defined(Windows): - try: - setFilePermissions(dest, getFilePermissions(source)) - except: - if not ignorePermissionErrors: - raise - for kind, path in walkDir(source): - var noSource = path.substr(source.len()+1) - case kind - of pcFile: - copyFileWithPermissions(path, dest / noSource, ignorePermissionErrors) - of pcDir: - copyDirWithPermissions(path, dest / noSource, ignorePermissionErrors) - else: discard + raiseOSError(osLastError(), $(src, dest)) proc inclFilePermissions*(filename: string, permissions: set[FilePermission]) {. - rtl, extern: "nos$1", tags: [ReadDirEffect, WriteDirEffect].} = - ## a convenience procedure for: - ## - ## .. code-block:: nim + rtl, extern: "nos$1", tags: [ReadDirEffect, WriteDirEffect], noWeirdTarget.} = + ## A convenience proc for: + ## ```nim ## setFilePermissions(filename, getFilePermissions(filename)+permissions) + ## ``` setFilePermissions(filename, getFilePermissions(filename)+permissions) proc exclFilePermissions*(filename: string, permissions: set[FilePermission]) {. - rtl, extern: "nos$1", tags: [ReadDirEffect, WriteDirEffect].} = - ## a convenience procedure for: - ## - ## .. code-block:: nim + rtl, extern: "nos$1", tags: [ReadDirEffect, WriteDirEffect], noWeirdTarget.} = + ## A convenience proc for: + ## ```nim ## setFilePermissions(filename, getFilePermissions(filename)-permissions) + ## ``` setFilePermissions(filename, getFilePermissions(filename)-permissions) -proc getHomeDir*(): string {.rtl, extern: "nos$1", tags: [ReadEnvEffect].} = - ## Returns the home directory of the current user. - ## - ## This proc is wrapped by the expandTilde proc for the convenience of - ## processing paths coming from user configuration files. - when defined(windows): return string(getEnv("USERPROFILE")) & "\\" - else: return string(getEnv("HOME")) & "/" - -proc getConfigDir*(): string {.rtl, extern: "nos$1", tags: [ReadEnvEffect].} = - ## Returns the config directory of the current user for applications. - when defined(windows): return string(getEnv("APPDATA")) & "\\" - else: return string(getEnv("HOME")) & "/.config/" - -proc getTempDir*(): string {.rtl, extern: "nos$1", tags: [ReadEnvEffect].} = - ## Returns the temporary directory of the current user for applications to - ## save temporary files in. - when defined(windows): return string(getEnv("TEMP")) & "\\" - else: return "/tmp/" - -when defined(nimdoc): - # Common forward declaration docstring block for parameter retrieval procs. - proc paramCount*(): int {.tags: [ReadIOEffect].} = - ## Returns the number of `command line arguments`:idx: given to the - ## application. - ## - ## If your binary was called without parameters this will return zero. You - ## can later query each individual paramater with `paramStr() <#paramStr>`_ - ## or retrieve all of them in one go with `commandLineParams() - ## <#commandLineParams>`_. - ## - ## **Availability**: On Posix there is no portable way to get the command - ## line from a DLL and thus the proc isn't defined in this environment. You - ## can test for its availability with `declared() <system.html#declared>`_. - ## Example: - ## - ## .. code-block:: nim - ## when declared(paramCount): - ## # Use paramCount() here - ## else: - ## # Do something else! - - proc paramStr*(i: int): TaintedString {.tags: [ReadIOEffect].} = - ## Returns the `i`-th `command line argument`:idx: given to the application. - ## - ## `i` should be in the range `1..paramCount()`, the `EInvalidIndex` - ## exception will be raised for invalid values. Instead of iterating over - ## `paramCount() <#paramCount>`_ with this proc you can call the - ## convenience `commandLineParams() <#commandLineParams>`_. - ## - ## It is possible to call ``paramStr(0)`` but this will return OS specific - ## contents (usually the name of the invoked executable). You should avoid - ## this and call `getAppFilename() <#getAppFilename>`_ instead. - ## - ## **Availability**: On Posix there is no portable way to get the command - ## line from a DLL and thus the proc isn't defined in this environment. You - ## can test for its availability with `declared() <system.html#declared>`_. - ## Example: - ## - ## .. code-block:: nim - ## when declared(paramStr): - ## # Use paramStr() here - ## else: - ## # Do something else! +when not weirdTarget and (defined(freebsd) or defined(dragonfly) or defined(netbsd)): + proc sysctl(name: ptr cint, namelen: cuint, oldp: pointer, oldplen: ptr csize_t, + newp: pointer, newplen: csize_t): cint + {.importc: "sysctl",header: """#include <sys/types.h> + #include <sys/sysctl.h>""".} + const + CTL_KERN = 1 + KERN_PROC = 14 + MAX_PATH = 1024 + + when defined(freebsd): + const KERN_PROC_PATHNAME = 12 + elif defined(netbsd): + const KERN_PROC_ARGS = 48 + const KERN_PROC_PATHNAME = 5 + else: + const KERN_PROC_PATHNAME = 9 -elif defined(windows): - # Since we support GUI applications with Nim, we sometimes generate - # a WinMain entry proc. But a WinMain proc has no access to the parsed - # command line arguments. The way to get them differs. Thus we parse them - # ourselves. This has the additional benefit that the program's behaviour - # is always the same -- independent of the used C compiler. - var - ownArgv {.threadvar.}: seq[string] + proc getApplFreebsd(): string = + var pathLength = csize_t(0) - proc paramCount*(): int {.rtl, extern: "nos$1", tags: [ReadIOEffect].} = - # Docstring in nimdoc block. - if isNil(ownArgv): ownArgv = parseCmdLine($getCommandLine()) - result = ownArgv.len-1 + when defined(netbsd): + var req = [CTL_KERN.cint, KERN_PROC_ARGS.cint, -1.cint, KERN_PROC_PATHNAME.cint] + else: + var req = [CTL_KERN.cint, KERN_PROC.cint, KERN_PROC_PATHNAME.cint, -1.cint] - proc paramStr*(i: int): TaintedString {.rtl, extern: "nos$1", - tags: [ReadIOEffect].} = - # Docstring in nimdoc block. - if isNil(ownArgv): ownArgv = parseCmdLine($getCommandLine()) - return TaintedString(ownArgv[i]) + # first call to get the required length + var res = sysctl(addr req[0], 4, nil, addr pathLength, nil, 0) -elif not defined(createNimRtl): - # On Posix, there is no portable way to get the command line from a DLL. - var - cmdCount {.importc: "cmdCount".}: cint - cmdLine {.importc: "cmdLine".}: cstringArray + if res < 0: + return "" - proc paramStr*(i: int): TaintedString {.tags: [ReadIOEffect].} = - # Docstring in nimdoc block. - if i < cmdCount and i >= 0: return TaintedString($cmdLine[i]) - raise newException(IndexError, "invalid index") + result.setLen(pathLength) + res = sysctl(addr req[0], 4, addr result[0], addr pathLength, nil, 0) - proc paramCount*(): int {.tags: [ReadIOEffect].} = - # Docstring in nimdoc block. - result = cmdCount-1 + if res < 0: + return "" -when declared(paramCount) or defined(nimdoc): - proc commandLineParams*(): seq[TaintedString] = - ## Convenience proc which returns the command line parameters. - ## - ## This returns **only** the parameters. If you want to get the application - ## executable filename, call `getAppFilename() <#getAppFilename>`_. - ## - ## **Availability**: On Posix there is no portable way to get the command - ## line from a DLL and thus the proc isn't defined in this environment. You - ## can test for its availability with `declared() <system.html#declared>`_. - ## Example: - ## - ## .. code-block:: nim - ## when declared(commandLineParams): - ## # Use commandLineParams() here - ## else: - ## # Do something else! - result = @[] - for i in 1..paramCount(): - result.add(paramStr(i)) - -when defined(linux) or defined(solaris) or defined(bsd) or defined(aix): + let realLen = len(cstring(result)) + setLen(result, realLen) + +when not weirdTarget and (defined(linux) or defined(solaris) or defined(bsd) or defined(aix)): proc getApplAux(procPath: string): string = - result = newString(256) - var len = readlink(procPath, result, 256) - if len > 256: + result = newString(maxSymlinkLen) + var len = readlink(procPath, result.cstring, maxSymlinkLen) + if len > maxSymlinkLen: result = newString(len+1) - len = readlink(procPath, result, len) + len = readlink(procPath, result.cstring, len) setLen(result, len) -when not (defined(windows) or defined(macosx)): +when not weirdTarget and defined(openbsd): + proc getApplOpenBsd(): string = + # similar to getApplHeuristic, but checks current working directory + when declared(paramStr): + result = "" + + # POSIX guaranties that this contains the executable + # as it has been executed by the calling process + let exePath = paramStr(0) + + if len(exePath) == 0: + return "" + + if exePath[0] == DirSep: + # path is absolute + result = exePath + else: + # not an absolute path, check if it's relative to the current working directory + for i in 1..<len(exePath): + if exePath[i] == DirSep: + result = joinPath(getCurrentDir(), exePath) + break + + if len(result) > 0: + return expandFilename(result) + + # search in path + for p in split(getEnv("PATH"), {PathSep}): + var x = joinPath(p, exePath) + if fileExists(x): + return expandFilename(x) + else: + result = "" + +when not (defined(windows) or defined(macosx) or weirdTarget): proc getApplHeuristic(): string = when declared(paramStr): - result = string(paramStr(0)) + result = paramStr(0) # POSIX guaranties that this contains the executable # as it has been executed by the calling process if len(result) > 0 and result[0] != DirSep: # not an absolute path? # iterate over any path in the $PATH environment variable - for p in split(string(getEnv("PATH")), {PathSep}): + for p in split(getEnv("PATH"), {PathSep}): var x = joinPath(p, result) - if existsFile(x): return x + if fileExists(x): return x else: result = "" @@ -1776,194 +606,199 @@ when defined(macosx): proc getExecPath2(c: cstring, size: var cuint32): bool {. importc: "_NSGetExecutablePath", header: "<mach-o/dyld.h>".} -proc getAppFilename*(): string {.rtl, extern: "nos$1", tags: [ReadIOEffect].} = +when defined(haiku): + const + PATH_MAX = 1024 + B_FIND_PATH_IMAGE_PATH = 1000 + + proc find_path(codePointer: pointer, baseDirectory: cint, subPath: cstring, + pathBuffer: cstring, bufferSize: csize_t): int32 + {.importc, header: "<FindDirectory.h>".} + + proc getApplHaiku(): string = + result = newString(PATH_MAX) + + if find_path(nil, B_FIND_PATH_IMAGE_PATH, nil, result, PATH_MAX) == 0: + let realLen = len(cstring(result)) + setLen(result, realLen) + else: + result = "" + +proc getAppFilename*(): string {.rtl, extern: "nos$1", tags: [ReadIOEffect], noWeirdTarget, raises: [].} = ## Returns the filename of the application's executable. + ## This proc will resolve symlinks. ## - ## This procedure will resolve symlinks. + ## Returns empty string when name is unavailable ## - ## **Note**: This does not work reliably on BSD. + ## See also: + ## * `getAppDir proc`_ + ## * `getCurrentCompilerExe proc`_ # Linux: /proc/<pid>/exe # Solaris: # /proc/<pid>/object/a.out (filename only) # /proc/<pid>/path/a.out (complete pathname) - # *BSD (and maybe Darwin too): - # /proc/<pid>/file when defined(windows): - when useWinUnicode: - var buf = cast[WideCString](alloc(256*2)) - var len = getModuleFileNameW(0, buf, 256) - result = buf$len - else: - result = newString(256) - var len = getModuleFileNameA(0, result, 256) - setlen(result, int(len)) - elif defined(linux) or defined(aix): - result = getApplAux("/proc/self/exe") - if result.len == 0: result = getApplHeuristic() - elif defined(solaris): - result = getApplAux("/proc/" & $getpid() & "/path/a.out") - if result.len == 0: result = getApplHeuristic() - elif defined(freebsd): - result = getApplAux("/proc/" & $getpid() & "/file") - if result.len == 0: result = getApplHeuristic() + var bufsize = int32(MAX_PATH) + var buf = newWideCString(bufsize) + while true: + var L = getModuleFileNameW(0, buf, bufsize) + if L == 0'i32: + result = "" # error! + break + elif L > bufsize: + buf = newWideCString(L) + bufsize = L + else: + result = buf$L + break elif defined(macosx): - var size: cuint32 + var size = cuint32(0) getExecPath1(nil, size) result = newString(int(size)) - if getExecPath2(result, size): + if getExecPath2(result.cstring, size): result = "" # error! if result.len > 0: - result = result.expandFilename + try: + result = result.expandFilename + except OSError: + result = "" else: - # little heuristic that may work on other POSIX-like systems: - result = string(getEnv("_")) - if result.len == 0: result = getApplHeuristic() - -proc getApplicationFilename*(): string {.rtl, extern: "nos$1", deprecated.} = - ## Returns the filename of the application's executable. - ## **Deprecated since version 0.8.12**: use ``getAppFilename`` - ## instead. - result = getAppFilename() + when defined(linux) or defined(aix): + result = getApplAux("/proc/self/exe") + elif defined(solaris): + result = getApplAux("/proc/" & $getpid() & "/path/a.out") + elif defined(genode): + result = "" # Not supported + elif defined(freebsd) or defined(dragonfly) or defined(netbsd): + result = getApplFreebsd() + elif defined(haiku): + result = getApplHaiku() + elif defined(openbsd): + result = try: getApplOpenBsd() except OSError: "" + elif defined(nintendoswitch): + result = "" -proc getApplicationDir*(): string {.rtl, extern: "nos$1", deprecated.} = - ## Returns the directory of the application's executable. - ## **Deprecated since version 0.8.12**: use ``getAppDir`` - ## instead. - result = splitFile(getAppFilename()).dir + # little heuristic that may work on other POSIX-like systems: + if result.len == 0: + result = try: getApplHeuristic() except OSError: "" -proc getAppDir*(): string {.rtl, extern: "nos$1", tags: [ReadIOEffect].} = +proc getAppDir*(): string {.rtl, extern: "nos$1", tags: [ReadIOEffect], noWeirdTarget.} = ## Returns the directory of the application's executable. - ## **Note**: This does not work reliably on BSD. + ## + ## See also: + ## * `getAppFilename proc`_ result = splitFile(getAppFilename()).dir -proc sleep*(milsecs: int) {.rtl, extern: "nos$1", tags: [TimeEffect].} = - ## sleeps `milsecs` milliseconds. +proc sleep*(milsecs: int) {.rtl, extern: "nos$1", tags: [TimeEffect], noWeirdTarget.} = + ## Sleeps `milsecs` milliseconds. + ## A negative `milsecs` causes sleep to return immediately. when defined(windows): + if milsecs < 0: + return # fixes #23732 winlean.sleep(int32(milsecs)) else: - var a, b: Ttimespec - a.tv_sec = Time(milsecs div 1000) + var a, b: Timespec + a.tv_sec = posix.Time(milsecs div 1000) a.tv_nsec = (milsecs mod 1000) * 1000 * 1000 discard posix.nanosleep(a, b) proc getFileSize*(file: string): BiggestInt {.rtl, extern: "nos$1", - tags: [ReadIOEffect].} = - ## returns the file size of `file`. Can raise ``OSError``. + tags: [ReadIOEffect], noWeirdTarget.} = + ## Returns the file size of `file` (in bytes). ``OSError`` is + ## raised in case of an error. when defined(windows): - var a: TWIN32_FIND_DATA + var a: WIN32_FIND_DATA var resA = findFirstFile(file, a) - if resA == -1: raiseOSError(osLastError()) + if resA == -1: raiseOSError(osLastError(), file) result = rdFileSize(a) findClose(resA) else: - var f: File - if open(f, file): - result = getFileSize(f) - close(f) - else: raiseOSError(osLastError()) + var rawInfo: Stat + if stat(file, rawInfo) < 0'i32: + raiseOSError(osLastError(), file) + rawInfo.st_size -proc expandTilde*(path: string): string {.tags: [ReadEnvEffect].} - -proc findExe*(exe: string): string {.tags: [ReadDirEffect, ReadEnvEffect].} = - ## Searches for `exe` in the current working directory and then - ## in directories listed in the ``PATH`` environment variable. - ## Returns "" if the `exe` cannot be found. On DOS-like platforms, `exe` - ## is added the `ExeExt <#ExeExt>`_ file extension if it has none. - result = addFileExt(exe, os.ExeExt) - if existsFile(result): return - var path = string(os.getEnv("PATH")) - for candidate in split(path, PathSep): - when defined(windows): - var x = candidate / result - else: - var x = expandTilde(candidate) / result - if existsFile(x): return x - result = "" - -proc expandTilde*(path: string): string = - ## Expands a path starting with ``~/`` to a full path. - ## - ## If `path` starts with the tilde character and is followed by `/` or `\\` - ## this proc will return the reminder of the path appended to the result of - ## the getHomeDir() proc, otherwise the input path will be returned without - ## modification. - ## - ## The behaviour of this proc is the same on the Windows platform despite not - ## having this convention. Example: - ## - ## .. code-block:: nim - ## let configFile = expandTilde("~" / "appname.cfg") - ## echo configFile - ## # --> C:\Users\amber\appname.cfg - if len(path) > 1 and path[0] == '~' and (path[1] == '/' or path[1] == '\\'): - result = getHomeDir() / path[2..len(path)-1] - else: - result = path - -when defined(Windows): +when defined(windows) or weirdTarget: type DeviceId* = int32 FileId* = int64 else: type - DeviceId* = TDev - FileId* = Tino + DeviceId* = Dev + FileId* = Ino type FileInfo* = object ## Contains information associated with a file object. - id*: tuple[device: DeviceId, file: FileId] # Device and file id. - kind*: PathComponent # Kind of file object - directory, symlink, etc. - size*: BiggestInt # Size of file. - permissions*: set[FilePermission] # File permissions - linkCount*: BiggestInt # Number of hard links the file object has. - lastAccessTime*: Time # Time file was last accessed. - lastWriteTime*: Time # Time file was last modified/written to. - creationTime*: Time # Time file was created. Not supported on all systems! - -template rawToFormalFileInfo(rawInfo, formalInfo): expr = + ## + ## See also: + ## * `getFileInfo(handle) proc`_ + ## * `getFileInfo(file) proc`_ + ## * `getFileInfo(path, followSymlink) proc`_ + id*: tuple[device: DeviceId, file: FileId] ## Device and file id. + kind*: PathComponent ## Kind of file object - directory, symlink, etc. + size*: BiggestInt ## Size of file. + permissions*: set[FilePermission] ## File permissions + linkCount*: BiggestInt ## Number of hard links the file object has. + lastAccessTime*: times.Time ## Time file was last accessed. + lastWriteTime*: times.Time ## Time file was last modified/written to. + creationTime*: times.Time ## Time file was created. Not supported on all systems! + blockSize*: int ## Preferred I/O block size for this object. + ## In some filesystems, this may vary from file to file. + isSpecial*: bool ## Is file special? (on Unix some "files" + ## can be special=non-regular like FIFOs, + ## devices); for directories `isSpecial` + ## is always `false`, for symlinks it is + ## the same as for the link's target. + +template rawToFormalFileInfo(rawInfo, path, formalInfo): untyped = ## Transforms the native file info structure into the one nim uses. - ## 'rawInfo' is either a 'TBY_HANDLE_FILE_INFORMATION' structure on Windows, - ## or a 'TStat' structure on posix - when defined(Windows): - template toTime(e): expr = winTimeToUnixTime(rdFileTime(e)) - template merge(a, b): expr = a or (b shl 32) + ## 'rawInfo' is either a 'BY_HANDLE_FILE_INFORMATION' structure on Windows, + ## or a 'Stat' structure on posix + when defined(windows): + template merge[T](a, b): untyped = + cast[T]( + (uint64(cast[uint32](a))) or + (uint64(cast[uint32](b)) shl 32) + ) formalInfo.id.device = rawInfo.dwVolumeSerialNumber - formalInfo.id.file = merge(rawInfo.nFileIndexLow, rawInfo.nFileIndexHigh) - formalInfo.size = merge(rawInfo.nFileSizeLow, rawInfo.nFileSizeHigh) + formalInfo.id.file = merge[FileId](rawInfo.nFileIndexLow, rawInfo.nFileIndexHigh) + formalInfo.size = merge[BiggestInt](rawInfo.nFileSizeLow, rawInfo.nFileSizeHigh) formalInfo.linkCount = rawInfo.nNumberOfLinks - formalInfo.lastAccessTime = toTime(rawInfo.ftLastAccessTime) - formalInfo.lastWriteTime = toTime(rawInfo.ftLastWriteTime) - formalInfo.creationTime = toTime(rawInfo.ftCreationTime) - + formalInfo.lastAccessTime = fromWinTime(rdFileTime(rawInfo.ftLastAccessTime)) + formalInfo.lastWriteTime = fromWinTime(rdFileTime(rawInfo.ftLastWriteTime)) + formalInfo.creationTime = fromWinTime(rdFileTime(rawInfo.ftCreationTime)) + formalInfo.blockSize = 8192 # xxx use Windows API instead of hardcoding + # Retrieve basic permissions if (rawInfo.dwFileAttributes and FILE_ATTRIBUTE_READONLY) != 0'i32: - formalInfo.permissions = {fpUserExec, fpUserRead, fpGroupExec, + formalInfo.permissions = {fpUserExec, fpUserRead, fpGroupExec, fpGroupRead, fpOthersExec, fpOthersRead} else: - result.permissions = {fpUserExec..fpOthersRead} + formalInfo.permissions = {fpUserExec..fpOthersRead} # Retrieve basic file kind - result.kind = pcFile if (rawInfo.dwFileAttributes and FILE_ATTRIBUTE_DIRECTORY) != 0'i32: formalInfo.kind = pcDir + else: + formalInfo.kind = pcFile if (rawInfo.dwFileAttributes and FILE_ATTRIBUTE_REPARSE_POINT) != 0'i32: - formalInfo.kind = succ(result.kind) - + formalInfo.kind = succ(formalInfo.kind) else: - template checkAndIncludeMode(rawMode, formalMode: expr) = - if (rawInfo.st_mode and rawMode) != 0'i32: + template checkAndIncludeMode(rawMode, formalMode: untyped) = + if (rawInfo.st_mode and rawMode.Mode) != 0.Mode: formalInfo.permissions.incl(formalMode) formalInfo.id = (rawInfo.st_dev, rawInfo.st_ino) formalInfo.size = rawInfo.st_size - formalInfo.linkCount = rawInfo.st_Nlink - formalInfo.lastAccessTime = rawInfo.st_atime - formalInfo.lastWriteTime = rawInfo.st_mtime - formalInfo.creationTime = rawInfo.st_ctime + formalInfo.linkCount = rawInfo.st_nlink.BiggestInt + formalInfo.lastAccessTime = rawInfo.st_atim.toTime + formalInfo.lastWriteTime = rawInfo.st_mtim.toTime + formalInfo.creationTime = rawInfo.st_ctim.toTime + formalInfo.blockSize = rawInfo.st_blksize - result.permissions = {} + formalInfo.permissions = {} checkAndIncludeMode(S_IRUSR, fpUserRead) checkAndIncludeMode(S_IWUSR, fpUserWrite) checkAndIncludeMode(S_IXUSR, fpUserExec) @@ -1976,93 +811,222 @@ template rawToFormalFileInfo(rawInfo, formalInfo): expr = checkAndIncludeMode(S_IWOTH, fpOthersWrite) checkAndIncludeMode(S_IXOTH, fpOthersExec) - formalInfo.kind = pcFile - if S_ISDIR(rawInfo.st_mode): formalInfo.kind = pcDir - if S_ISLNK(rawInfo.st_mode): formalInfo.kind.inc() + (formalInfo.kind, formalInfo.isSpecial) = + if S_ISDIR(rawInfo.st_mode): + (pcDir, false) + elif S_ISLNK(rawInfo.st_mode): + assert(path != "") # symlinks can't occur for file handles + getSymlinkFileKind(path) + else: + (pcFile, not S_ISREG(rawInfo.st_mode)) + +when defined(js): + when not declared(FileHandle): + type FileHandle = distinct int32 + when not declared(File): + type File = object -proc getFileInfo*(handle: FileHandle): FileInfo = +proc getFileInfo*(handle: FileHandle): FileInfo {.noWeirdTarget.} = ## Retrieves file information for the file object represented by the given ## handle. ## ## If the information cannot be retrieved, such as when the file handle - ## is invalid, an error will be thrown. + ## is invalid, `OSError` is raised. + ## + ## See also: + ## * `getFileInfo(file) proc`_ + ## * `getFileInfo(path, followSymlink) proc`_ + # Done: ID, Kind, Size, Permissions, Link Count - when defined(Windows): - var rawInfo: TBY_HANDLE_FILE_INFORMATION + when defined(windows): + var rawInfo: BY_HANDLE_FILE_INFORMATION # We have to use the super special '_get_osfhandle' call (wrapped above) - # To transform the C file descripter to a native file handle. + # To transform the C file descriptor to a native file handle. var realHandle = get_osfhandle(handle) if getFileInformationByHandle(realHandle, addr rawInfo) == 0: - raiseOSError(osLastError()) - rawToFormalFileInfo(rawInfo, result) + raiseOSError(osLastError(), $handle) + rawToFormalFileInfo(rawInfo, "", result) else: - var rawInfo: TStat + var rawInfo: Stat if fstat(handle, rawInfo) < 0'i32: - raiseOSError(osLastError()) - rawToFormalFileInfo(rawInfo, result) + raiseOSError(osLastError(), $handle) + rawToFormalFileInfo(rawInfo, "", result) -proc getFileInfo*(file: File): FileInfo = +proc getFileInfo*(file: File): FileInfo {.noWeirdTarget.} = + ## Retrieves file information for the file object. + ## + ## See also: + ## * `getFileInfo(handle) proc`_ + ## * `getFileInfo(path, followSymlink) proc`_ if file.isNil: raise newException(IOError, "File is nil") result = getFileInfo(file.getFileHandle()) -proc getFileInfo*(path: string, followSymlink = true): FileInfo = +proc getFileInfo*(path: string, followSymlink = true): FileInfo {.noWeirdTarget.} = ## Retrieves file information for the file object pointed to by `path`. - ## + ## ## Due to intrinsic differences between operating systems, the information - ## contained by the returned `FileInfo` structure will be slightly different - ## across platforms, and in some cases, incomplete or inaccurate. - ## - ## When `followSymlink` is true, symlinks are followed and the information - ## retrieved is information related to the symlink's target. Otherwise, - ## information on the symlink itself is retrieved. - ## + ## contained by the returned `FileInfo object`_ will be slightly + ## different across platforms, and in some cases, incomplete or inaccurate. + ## + ## When `followSymlink` is true (default), symlinks are followed and the + ## information retrieved is information related to the symlink's target. + ## Otherwise, information on the symlink itself is retrieved (however, + ## field `isSpecial` is still determined from the target on Unix). + ## ## If the information cannot be retrieved, such as when the path doesn't ## exist, or when permission restrictions prevent the program from retrieving - ## file information, an error will be thrown. - when defined(Windows): - var + ## file information, `OSError` is raised. + ## + ## See also: + ## * `getFileInfo(handle) proc`_ + ## * `getFileInfo(file) proc`_ + when defined(windows): + var handle = openHandle(path, followSymlink) - rawInfo: TBY_HANDLE_FILE_INFORMATION + rawInfo: BY_HANDLE_FILE_INFORMATION if handle == INVALID_HANDLE_VALUE: - raiseOSError(osLastError()) + raiseOSError(osLastError(), path) if getFileInformationByHandle(handle, addr rawInfo) == 0: - raiseOSError(osLastError()) - rawToFormalFileInfo(rawInfo, result) + raiseOSError(osLastError(), path) + rawToFormalFileInfo(rawInfo, path, result) discard closeHandle(handle) else: - var rawInfo: TStat + var rawInfo: Stat if followSymlink: - if lstat(path, rawInfo) < 0'i32: - raiseOSError(osLastError()) - else: if stat(path, rawInfo) < 0'i32: - raiseOSError(osLastError()) - rawToFormalFileInfo(rawInfo, result) - -proc isHidden*(path: string): bool = - ## Determines whether a given path is hidden or not. Returns false if the - ## file doesn't exist. The given path must be accessible from the current - ## working directory of the program. - ## - ## On Windows, a file is hidden if the file's 'hidden' attribute is set. - ## On Unix-like systems, a file is hidden if it starts with a '.' (period) - ## and is not *just* '.' or '..' ' ." - when defined(Windows): - when useWinUnicode: - wrapUnary(attributes, getFileAttributesW, path) + raiseOSError(osLastError(), path) else: - var attributes = getFileAttributesA(path) + if lstat(path, rawInfo) < 0'i32: + raiseOSError(osLastError(), path) + rawToFormalFileInfo(rawInfo, path, result) + +proc sameFileContent*(path1, path2: string): bool {.rtl, extern: "nos$1", + tags: [ReadIOEffect], noWeirdTarget.} = + ## Returns true if both pathname arguments refer to files with identical + ## binary content. + ## + ## See also: + ## * `sameFile proc`_ + var + a, b: File + if not open(a, path1): return false + if not open(b, path2): + close(a) + return false + let bufSize = getFileInfo(a).blockSize + var bufA = alloc(bufSize) + var bufB = alloc(bufSize) + while true: + var readA = readBuffer(a, bufA, bufSize) + var readB = readBuffer(b, bufB, bufSize) + if readA != readB: + result = false + break + if readA == 0: + result = true + break + result = equalMem(bufA, bufB, readA) + if not result: break + if readA != bufSize: break # end of file + dealloc(bufA) + dealloc(bufB) + close(a) + close(b) + +proc isHidden*(path: string): bool {.noWeirdTarget.} = + ## Determines whether ``path`` is hidden or not, using `this + ## reference <https://en.wikipedia.org/wiki/Hidden_file_and_hidden_directory>`_. + ## + ## On Windows: returns true if it exists and its "hidden" attribute is set. + ## + ## On posix: returns true if ``lastPathPart(path)`` starts with ``.`` and is + ## not ``.`` or ``..``. + ## + ## **Note**: paths are not normalized to determine `isHidden`. + runnableExamples: + when defined(posix): + assert ".foo".isHidden + assert not ".foo/bar".isHidden + assert not ".".isHidden + assert not "..".isHidden + assert not "".isHidden + assert ".foo/".isHidden + + when defined(windows): + wrapUnary(attributes, getFileAttributesW, path) if attributes != -1'i32: result = (attributes and FILE_ATTRIBUTE_HIDDEN) != 0'i32 else: - if fileExists(path): - let - fileName = extractFilename(path) - nameLen = len(fileName) - if nameLen == 2: - result = (fileName[0] == '.') and (fileName[1] != '.') - elif nameLen > 2: - result = (fileName[0] == '.') and (fileName[3] != '.') - -{.pop.} + let fileName = lastPathPart(path) + result = len(fileName) >= 2 and fileName[0] == '.' and fileName != ".." + +proc getCurrentProcessId*(): int {.noWeirdTarget.} = + ## Return current process ID. + ## + ## See also: + ## * `osproc.processID(p: Process) <osproc.html#processID,Process>`_ + when defined(windows): + proc GetCurrentProcessId(): DWORD {.stdcall, dynlib: "kernel32", + importc: "GetCurrentProcessId".} + result = GetCurrentProcessId().int + else: + result = getpid() + +proc setLastModificationTime*(file: string, t: times.Time) {.noWeirdTarget.} = + ## Sets the `file`'s last modification time. `OSError` is raised in case of + ## an error. + when defined(posix): + let unixt = posix.Time(t.toUnix) + let micro = convert(Nanoseconds, Microseconds, t.nanosecond) + var timevals = [Timeval(tv_sec: unixt, tv_usec: micro), + Timeval(tv_sec: unixt, tv_usec: micro)] # [last access, last modification] + if utimes(file, timevals.addr) != 0: raiseOSError(osLastError(), file) + else: + let h = openHandle(path = file, writeAccess = true) + if h == INVALID_HANDLE_VALUE: raiseOSError(osLastError(), file) + var ft = t.toWinTime.toFILETIME + let res = setFileTime(h, nil, nil, ft.addr) + discard h.closeHandle + if res == 0'i32: raiseOSError(osLastError(), file) + + +func isValidFilename*(filename: string, maxLen = 259.Positive): bool {.since: (1, 1).} = + ## Returns `true` if `filename` is valid for crossplatform use. + ## + ## This is useful if you want to copy or save files across Windows, Linux, Mac, etc. + ## It uses `invalidFilenameChars`, `invalidFilenames` and `maxLen` to verify the specified `filename`. + ## + ## See also: + ## + ## * https://docs.microsoft.com/en-us/dotnet/api/system.io.pathtoolongexception + ## * https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file + ## * https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247%28v=vs.85%29.aspx + ## + ## .. warning:: This only checks filenames, not whole paths + ## (because basically you can mount anything as a path on Linux). + runnableExamples: + assert not isValidFilename(" foo") # Leading white space + assert not isValidFilename("foo ") # Trailing white space + assert not isValidFilename("foo.") # Ends with dot + assert not isValidFilename("con.txt") # "CON" is invalid (Windows) + assert not isValidFilename("OwO:UwU") # ":" is invalid (Mac) + assert not isValidFilename("aux.bat") # "AUX" is invalid (Windows) + assert not isValidFilename("") # Empty string + assert not isValidFilename("foo/") # Filename is empty + + result = true + let f = filename.splitFile() + if unlikely(f.name.len + f.ext.len > maxLen or f.name.len == 0 or + f.name[0] == ' ' or f.name[^1] == ' ' or f.name[^1] == '.' or + find(f.name, invalidFilenameChars) != -1): return false + for invalid in invalidFilenames: + if cmpIgnoreCase(f.name, invalid) == 0: return false + + +# deprecated declarations +when not weirdTarget: + template existsFile*(args: varargs[untyped]): untyped {.deprecated: "use fileExists".} = + fileExists(args) + template existsDir*(args: varargs[untyped]): untyped {.deprecated: "use dirExists".} = + dirExists(args) diff --git a/lib/pure/osproc.nim b/lib/pure/osproc.nim index cd3700019..c304ecca6 100644 --- a/lib/pure/osproc.nim +++ b/lib/pure/osproc.nim @@ -9,136 +9,131 @@ ## This module implements an advanced facility for executing OS processes ## and process communication. +## +## **See also:** +## * `os module <os.html>`_ +## * `streams module <streams.html>`_ +## * `memfiles module <memfiles.html>`_ include "system/inclrtl" import - strutils, os, strtabs, streams, cpuinfo + std/[strutils, os, strtabs, streams, cpuinfo, streamwrapper, + private/since] + +export quoteShell, quoteShellWindows, quoteShellPosix when defined(windows): - import winlean + import std/winlean else: - import posix + import std/posix + +when defined(linux) and defined(useClone): + import std/linux + +when defined(nimPreviewSlimSystem): + import std/[syncio, assertions] + when defined(windows): + import std/widestrs -when defined(linux): - import linux type + ProcessOption* = enum ## Options that can be passed to `startProcess proc + ## <#startProcess,string,string,openArray[string],StringTableRef,set[ProcessOption]>`_. + poEchoCmd, ## Echo the command before execution. + poUsePath, ## Asks system to search for executable using PATH environment + ## variable. + ## On Windows, this is the default. + poEvalCommand, ## Pass `command` directly to the shell, without quoting. + ## Use it only if `command` comes from trusted source. + poStdErrToStdOut, ## Merge stdout and stderr to the stdout stream. + poParentStreams, ## Use the parent's streams. + poInteractive, ## Optimize the buffer handling for responsiveness for + ## UI applications. Currently this only affects + ## Windows: Named pipes are used so that you can peek + ## at the process' output streams. + poDaemon ## Windows: The program creates no Window. + ## Unix: Start the program as a daemon. This is still + ## work in progress! + ProcessObj = object of RootObj when defined(windows): - fProcessHandle: THandle + fProcessHandle: Handle + fThreadHandle: Handle inHandle, outHandle, errHandle: FileHandle - id: THandle + id: Handle else: inHandle, outHandle, errHandle: FileHandle - inStream, outStream, errStream: Stream - id: TPid - exitCode: cint - - Process* = ref ProcessObj ## represents an operating system process - - ProcessOption* = enum ## options that can be passed `startProcess` - poEchoCmd, ## echo the command before execution - poUsePath, ## Asks system to search for executable using PATH environment - ## variable. - ## On Windows, this is the default. - poEvalCommand, ## Pass `command` directly to the shell, without quoting. - ## Use it only if `command` comes from trused source. - poStdErrToStdOut, ## merge stdout and stderr to the stdout stream - poParentStreams ## use the parent's streams - -{.deprecated: [TProcess: ProcessObj, PProcess: Process, - TProcessOption: ProcessOption].} - -const poUseShell* {.deprecated.} = poUsePath - ## Deprecated alias for poUsePath. - -proc quoteShellWindows*(s: string): string {.noSideEffect, rtl, extern: "nosp$1".} = - ## Quote s, so it can be safely passed to Windows API. - ## Based on Python's subprocess.list2cmdline - ## See http://msdn.microsoft.com/en-us/library/17w5ykft.aspx - let needQuote = {' ', '\t'} in s or s.len == 0 - - result = "" - var backslashBuff = "" - if needQuote: - result.add("\"") - - for c in s: - if c == '\\': - backslashBuff.add(c) - elif c == '\"': - result.add(backslashBuff) - result.add(backslashBuff) - backslashBuff.setLen(0) - result.add("\\\"") - else: - if backslashBuff.len != 0: - result.add(backslashBuff) - backslashBuff.setLen(0) - result.add(c) - - if needQuote: - result.add("\"") - -proc quoteShellPosix*(s: string): string {.noSideEffect, rtl, extern: "nosp$1".} = - ## Quote s, so it can be safely passed to POSIX shell. - ## Based on Python's pipes.quote - const safeUnixChars = {'%', '+', '-', '.', '/', '_', ':', '=', '@', - '0'..'9', 'A'..'Z', 'a'..'z'} - if s.len == 0: - return "''" - - let safe = s.allCharsInSet(safeUnixChars) - - if safe: - return s - else: - return "'" & s.replace("'", "'\"'\"'") & "'" - -proc quoteShell*(s: string): string {.noSideEffect, rtl, extern: "nosp$1".} = - ## Quote s, so it can be safely passed to shell. - when defined(Windows): - return quoteShellWindows(s) - elif defined(posix): - return quoteShellPosix(s) - else: - {.error:"quoteShell is not supported on your system".} - -proc execProcess*(command: string, - args: openArray[string] = [], - env: StringTableRef = nil, - options: set[ProcessOption] = {poStdErrToStdOut, - poUsePath, - poEvalCommand}): TaintedString {. - rtl, extern: "nosp$1", - tags: [ExecIOEffect, ReadIOEffect].} + id: Pid + inStream, outStream, errStream: owned(Stream) + exitStatus: cint + exitFlag: bool + options: set[ProcessOption] + + Process* = ref ProcessObj ## Represents an operating system process. + + +proc execProcess*(command: string, workingDir: string = "", + args: openArray[string] = [], env: StringTableRef = nil, + options: set[ProcessOption] = {poStdErrToStdOut, poUsePath, poEvalCommand}): + string {.rtl, extern: "nosp$1", raises: [OSError, IOError], + tags: [ExecIOEffect, ReadIOEffect, RootEffect].} ## A convenience procedure that executes ``command`` with ``startProcess`` ## and returns its output as a string. - ## WARNING: this function uses poEvalCommand by default for backward compatibility. - ## Make sure to pass options explicitly. - -proc execCmd*(command: string): int {.rtl, extern: "nosp$1", tags: [ExecIOEffect].} - ## Executes ``command`` and returns its error code. Standard input, output, - ## error streams are inherited from the calling process. This operation - ## is also often called `system`:idx:. - -proc startProcess*(command: string, - workingDir: string = "", - args: openArray[string] = [], - env: StringTableRef = nil, - options: set[ProcessOption] = {poStdErrToStdOut}): - Process {.rtl, extern: "nosp$1", tags: [ExecIOEffect, ReadEnvEffect].} + ## + ## .. warning:: This function uses `poEvalCommand` by default for backwards + ## compatibility. Make sure to pass options explicitly. + ## + ## See also: + ## * `startProcess proc + ## <#startProcess,string,string,openArray[string],StringTableRef,set[ProcessOption]>`_ + ## * `execProcesses proc <#execProcesses,openArray[string],proc(int),proc(int,Process)>`_ + ## * `execCmd proc <#execCmd,string>`_ + ## + ## Example: + ## ```Nim + ## let outp = execProcess("nim", args=["c", "-r", "mytestfile.nim"], options={poUsePath}) + ## let outp_shell = execProcess("nim c -r mytestfile.nim") + ## # Note: outp may have an interleave of text from the nim compile + ## # and any output from mytestfile when it runs + ## ``` + +proc execCmd*(command: string): int {.rtl, extern: "nosp$1", + tags: [ExecIOEffect, ReadIOEffect, RootEffect].} + ## Executes ``command`` and returns its error code. + ## + ## Standard input, output, error streams are inherited from the calling process. + ## This operation is also often called `system`:idx:. + ## + ## See also: + ## * `execCmdEx proc <#execCmdEx,string,set[ProcessOption],StringTableRef,string,string>`_ + ## * `startProcess proc + ## <#startProcess,string,string,openArray[string],StringTableRef,set[ProcessOption]>`_ + ## * `execProcess proc + ## <#execProcess,string,string,openArray[string],StringTableRef,set[ProcessOption]>`_ + ## + ## Example: + ## ```Nim + ## let errC = execCmd("nim c -r mytestfile.nim") + ## ``` + +proc startProcess*(command: string, workingDir: string = "", + args: openArray[string] = [], env: StringTableRef = nil, + options: set[ProcessOption] = {poStdErrToStdOut}): + owned(Process) {.rtl, extern: "nosp$1", raises: [OSError, IOError], + tags: [ExecIOEffect, ReadEnvEffect, RootEffect].} ## Starts a process. `Command` is the executable file, `workingDir` is the ## process's working directory. If ``workingDir == ""`` the current directory - ## is used. `args` are the command line arguments that are passed to the + ## is used (default). `args` are the command line arguments that are passed to the ## process. On many operating systems, the first command line argument is the - ## name of the executable. `args` should not contain this argument! + ## name of the executable. `args` should *not* contain this argument! ## `env` is the environment that will be passed to the process. - ## If ``env == nil`` the environment is inherited of + ## If ``env == nil`` (default) the environment is inherited of ## the parent process. `options` are additional flags that may be passed - ## to `startProcess`. See the documentation of ``TProcessOption`` for the - ## meaning of these flags. You need to `close` the process when done. + ## to `startProcess`. See the documentation of `ProcessOption<#ProcessOption>`_ + ## for the meaning of these flags. + ## + ## You need to `close <#close,Process>`_ the process when done. ## ## Note that you can't pass any `args` if you use the option ## ``poEvalCommand``, which invokes the system shell to run the specified @@ -149,201 +144,418 @@ proc startProcess*(command: string, ## invocation if possible as it leads to non portable software. ## ## Return value: The newly created process object. Nil is never returned, - ## but ``EOS`` is raised in case of an error. - -proc startCmd*(command: string, options: set[ProcessOption] = { - poStdErrToStdOut, poUsePath}): Process {. - tags: [ExecIOEffect, ReadEnvEffect], deprecated.} = - ## Deprecated - use `startProcess` directly. - result = startProcess(command=command, options=options + {poEvalCommand}) - -proc close*(p: Process) {.rtl, extern: "nosp$1", tags: [].} - ## When the process has finished executing, cleanup related handles + ## but ``OSError`` is raised in case of an error. + ## + ## See also: + ## * `execProcesses proc <#execProcesses,openArray[string],proc(int),proc(int,Process)>`_ + ## * `execProcess proc + ## <#execProcess,string,string,openArray[string],StringTableRef,set[ProcessOption]>`_ + ## * `execCmd proc <#execCmd,string>`_ + +proc close*(p: Process) {.rtl, extern: "nosp$1", raises: [IOError, OSError], tags: [WriteIOEffect].} + ## When the process has finished executing, cleanup related handles. + ## + ## .. warning:: If the process has not finished executing, this will forcibly + ## terminate the process. Doing so may result in zombie processes and + ## `pty leaks <http://stackoverflow.com/questions/27021641/how-to-fix-request-failed-on-channel-0>`_. proc suspend*(p: Process) {.rtl, extern: "nosp$1", tags: [].} ## Suspends the process `p`. + ## + ## See also: + ## * `resume proc <#resume,Process>`_ + ## * `terminate proc <#terminate,Process>`_ + ## * `kill proc <#kill,Process>`_ + proc resume*(p: Process) {.rtl, extern: "nosp$1", tags: [].} ## Resumes the process `p`. + ## + ## See also: + ## * `suspend proc <#suspend,Process>`_ + ## * `terminate proc <#terminate,Process>`_ + ## * `kill proc <#kill,Process>`_ proc terminate*(p: Process) {.rtl, extern: "nosp$1", tags: [].} - ## Stop the process `p`. On Posix OSes the procedure sends ``SIGTERM`` - ## to the process. On Windows the Win32 API function ``TerminateProcess()`` + ## Stop the process `p`. + ## + ## On Posix OSes the procedure sends ``SIGTERM`` to the process. + ## On Windows the Win32 API function ``TerminateProcess()`` ## is called to stop the process. + ## + ## See also: + ## * `suspend proc <#suspend,Process>`_ + ## * `resume proc <#resume,Process>`_ + ## * `kill proc <#kill,Process>`_ + ## * `posix_utils.sendSignal(pid: Pid, signal: int) <posix_utils.html#sendSignal,Pid,int>`_ proc kill*(p: Process) {.rtl, extern: "nosp$1", tags: [].} - ## Kill the process `p`. On Posix OSes the procedure sends ``SIGKILL`` to - ## the process. On Windows ``kill()`` is simply an alias for ``terminate()``. - -proc running*(p: Process): bool {.rtl, extern: "nosp$1", tags: [].} - ## Returns true iff the process `p` is still running. Returns immediately. + ## Kill the process `p`. + ## + ## On Posix OSes the procedure sends ``SIGKILL`` to the process. + ## On Windows ``kill`` is simply an alias for `terminate() <#terminate,Process>`_. + ## + ## See also: + ## * `suspend proc <#suspend,Process>`_ + ## * `resume proc <#resume,Process>`_ + ## * `terminate proc <#terminate,Process>`_ + ## * `posix_utils.sendSignal(pid: Pid, signal: int) <posix_utils.html#sendSignal,Pid,int>`_ + +proc running*(p: Process): bool {.rtl, extern: "nosp$1", raises: [OSError], tags: [].} + ## Returns true if the process `p` is still running. Returns immediately. proc processID*(p: Process): int {.rtl, extern: "nosp$1".} = - ## returns `p`'s process ID. + ## Returns `p`'s process ID. + ## + ## See also: + ## * `os.getCurrentProcessId proc <os.html#getCurrentProcessId>`_ return p.id proc waitForExit*(p: Process, timeout: int = -1): int {.rtl, - extern: "nosp$1", tags: [].} - ## waits for the process to finish and returns `p`'s error code. + extern: "nosp$1", raises: [OSError, ValueError], tags: [TimeEffect].} + ## Waits for the process to finish and returns `p`'s error code. + ## + ## .. warning:: Be careful when using `waitForExit` for processes created without + ## `poParentStreams` because they may fill output buffers, causing deadlock. + ## + ## On posix, if the process has exited because of a signal, 128 + signal + ## number will be returned. ## - ## **Warning**: Be careful when using waitForExit for processes created without - ## poParentStreams because they may fill output buffers, causing deadlock. + ## .. warning:: When working with `timeout` parameters, remember that the value is + ## typically expressed in milliseconds, and ensure that the correct unit of time + ## is used to avoid unexpected behavior. -proc peekExitCode*(p: Process): int {.tags: [].} - ## return -1 if the process is still running. Otherwise the process' exit code +proc peekExitCode*(p: Process): int {.rtl, extern: "nosp$1", raises: [OSError], tags: [].} + ## Return `-1` if the process is still running. Otherwise the process' exit code. + ## + ## On posix, if the process has exited because of a signal, 128 + signal + ## number will be returned. proc inputStream*(p: Process): Stream {.rtl, extern: "nosp$1", tags: [].} - ## returns ``p``'s input stream for writing to. + ## Returns ``p``'s input stream for writing to. + ## + ## .. warning:: The returned `Stream` should not be closed manually as it + ## is closed when closing the Process ``p``. ## - ## **Warning**: The returned `PStream` should not be closed manually as it - ## is closed when closing the PProcess ``p``. + ## See also: + ## * `outputStream proc <#outputStream,Process>`_ + ## * `errorStream proc <#errorStream,Process>`_ -proc outputStream*(p: Process): Stream {.rtl, extern: "nosp$1", tags: [].} - ## returns ``p``'s output stream for reading from. +proc outputStream*(p: Process): Stream {.rtl, extern: "nosp$1", raises: [IOError, OSError], tags: [].} + ## Returns ``p``'s output stream for reading from. + ## + ## You cannot perform peek/write/setOption operations to this stream. + ## Use `peekableOutputStream proc <#peekableOutputStream,Process>`_ + ## if you need to peek stream. ## - ## **Warning**: The returned `PStream` should not be closed manually as it - ## is closed when closing the PProcess ``p``. + ## .. warning:: The returned `Stream` should not be closed manually as it + ## is closed when closing the Process ``p``. + ## + ## See also: + ## * `inputStream proc <#inputStream,Process>`_ + ## * `errorStream proc <#errorStream,Process>`_ proc errorStream*(p: Process): Stream {.rtl, extern: "nosp$1", tags: [].} - ## returns ``p``'s error stream for reading from. + ## Returns ``p``'s error stream for reading from. + ## + ## You cannot perform peek/write/setOption operations to this stream. + ## Use `peekableErrorStream proc <#peekableErrorStream,Process>`_ + ## if you need to peek stream. + ## + ## .. warning:: The returned `Stream` should not be closed manually as it + ## is closed when closing the Process ``p``. + ## + ## See also: + ## * `inputStream proc <#inputStream,Process>`_ + ## * `outputStream proc <#outputStream,Process>`_ + +proc peekableOutputStream*(p: Process): Stream {.rtl, extern: "nosp$1", tags: [], since: (1, 3).} + ## Returns ``p``'s output stream for reading from. + ## + ## You can peek returned stream. + ## + ## .. warning:: The returned `Stream` should not be closed manually as it + ## is closed when closing the Process ``p``. ## - ## **Warning**: The returned `PStream` should not be closed manually as it - ## is closed when closing the PProcess ``p``. + ## See also: + ## * `outputStream proc <#outputStream,Process>`_ + ## * `peekableErrorStream proc <#peekableErrorStream,Process>`_ -proc inputHandle*(p: Process): FileHandle {.rtl, extern: "nosp$1", +proc peekableErrorStream*(p: Process): Stream {.rtl, extern: "nosp$1", tags: [], since: (1, 3).} + ## Returns ``p``'s error stream for reading from. + ## + ## You can run peek operation to returned stream. + ## + ## .. warning:: The returned `Stream` should not be closed manually as it + ## is closed when closing the Process ``p``. + ## + ## See also: + ## * `errorStream proc <#errorStream,Process>`_ + ## * `peekableOutputStream proc <#peekableOutputStream,Process>`_ + +proc inputHandle*(p: Process): FileHandle {.rtl, raises: [], extern: "nosp$1", tags: [].} = - ## returns ``p``'s input file handle for writing to. + ## Returns ``p``'s input file handle for writing to. + ## + ## .. warning:: The returned `FileHandle` should not be closed manually as + ## it is closed when closing the Process ``p``. ## - ## **Warning**: The returned `TFileHandle` should not be closed manually as - ## it is closed when closing the PProcess ``p``. + ## See also: + ## * `outputHandle proc <#outputHandle,Process>`_ + ## * `errorHandle proc <#errorHandle,Process>`_ result = p.inHandle proc outputHandle*(p: Process): FileHandle {.rtl, extern: "nosp$1", - tags: [].} = - ## returns ``p``'s output file handle for reading from. + raises: [], tags: [].} = + ## Returns ``p``'s output file handle for reading from. ## - ## **Warning**: The returned `TFileHandle` should not be closed manually as - ## it is closed when closing the PProcess ``p``. + ## .. warning:: The returned `FileHandle` should not be closed manually as + ## it is closed when closing the Process ``p``. + ## + ## See also: + ## * `inputHandle proc <#inputHandle,Process>`_ + ## * `errorHandle proc <#errorHandle,Process>`_ result = p.outHandle proc errorHandle*(p: Process): FileHandle {.rtl, extern: "nosp$1", - tags: [].} = - ## returns ``p``'s error file handle for reading from. + raises: [], tags: [].} = + ## Returns ``p``'s error file handle for reading from. + ## + ## .. warning:: The returned `FileHandle` should not be closed manually as + ## it is closed when closing the Process ``p``. ## - ## **Warning**: The returned `TFileHandle` should not be closed manually as - ## it is closed when closing the PProcess ``p``. + ## See also: + ## * `inputHandle proc <#inputHandle,Process>`_ + ## * `outputHandle proc <#outputHandle,Process>`_ result = p.errHandle -proc countProcessors*(): int {.rtl, extern: "nosp$1".} = - ## returns the numer of the processors/cores the machine has. +proc countProcessors*(): int {.rtl, extern: "nosp$1", raises: [].} = + ## Returns the number of the processors/cores the machine has. ## Returns 0 if it cannot be detected. + ## It is implemented just calling `cpuinfo.countProcessors`. result = cpuinfo.countProcessors() +when not defined(nimHasEffectsOf): + {.pragma: effectsOf.} + proc execProcesses*(cmds: openArray[string], - options = {poStdErrToStdOut, poParentStreams}, - n = countProcessors(), - beforeRunEvent: proc(idx: int) = nil): int - {.rtl, extern: "nosp$1", - tags: [ExecIOEffect, TimeEffect, ReadEnvEffect, RootEffect]} = - ## executes the commands `cmds` in parallel. Creates `n` processes - ## that execute in parallel. The highest return value of all processes - ## is returned. Runs `beforeRunEvent` before running each command. - when defined(posix): - # poParentStreams causes problems on Posix, so we simply disable it: - var options = options - {poParentStreams} + options = {poStdErrToStdOut, poParentStreams}, n = countProcessors(), + beforeRunEvent: proc(idx: int) = nil, + afterRunEvent: proc(idx: int, p: Process) = nil): + int {.rtl, extern: "nosp$1", + raises: [ValueError, OSError, IOError], + tags: [ExecIOEffect, TimeEffect, ReadEnvEffect, RootEffect], + effectsOf: [beforeRunEvent, afterRunEvent].} = + ## Executes the commands `cmds` in parallel. + ## Creates `n` processes that execute in parallel. + ## + ## The highest (absolute) return value of all processes is returned. + ## Runs `beforeRunEvent` before running each command. assert n > 0 if n > 1: - var q: seq[Process] - newSeq(q, n) - var m = min(n, cmds.len) - for i in 0..m-1: + var i = 0 + var q = newSeq[Process](n) + var idxs = newSeq[int](n) # map process index to cmds index + + when defined(windows): + var w: WOHandleArray + var m = min(min(n, MAXIMUM_WAIT_OBJECTS), cmds.len) + var wcount = m + else: + var m = min(n, cmds.len) + + while i < m: if beforeRunEvent != nil: beforeRunEvent(i) - q[i] = startProcess(cmds[i], options=options + {poEvalCommand}) - when defined(noBusyWaiting): - var r = 0 - for i in m..high(cmds): - when defined(debugExecProcesses): - var err = "" - var outp = outputStream(q[r]) - while running(q[r]) or not atEnd(outp): - err.add(outp.readLine()) - err.add("\n") - echo(err) - result = max(waitForExit(q[r]), result) - if q[r] != nil: close(q[r]) - if beforeRunEvent != nil: - beforeRunEvent(i) - q[r] = startProcess(cmds[i], options=options + {poEvalCommand}) - r = (r + 1) mod n - else: - var i = m - while i <= high(cmds): - sleep(50) - for r in 0..n-1: - if not running(q[r]): - #echo(outputStream(q[r]).readLine()) - result = max(waitForExit(q[r]), result) - if q[r] != nil: close(q[r]) - if beforeRunEvent != nil: - beforeRunEvent(i) - q[r] = startProcess(cmds[i], options=options + {poEvalCommand}) - inc(i) - if i > high(cmds): break - for j in 0..m-1: - result = max(waitForExit(q[j]), result) - if q[j] != nil: close(q[j]) + q[i] = startProcess(cmds[i], options = options + {poEvalCommand}) + idxs[i] = i + when defined(windows): + w[i] = q[i].fProcessHandle + inc(i) + + var ecount = len(cmds) + while ecount > 0: + var rexit = -1 + when defined(windows): + # waiting for all children, get result if any child exits + var ret = waitForMultipleObjects(int32(wcount), addr(w), 0'i32, + INFINITE) + if ret == WAIT_TIMEOUT: + # must not be happen + discard + elif ret == WAIT_FAILED: + raiseOSError(osLastError()) + else: + var status: int32 + for r in 0..m-1: + if not isNil(q[r]) and q[r].fProcessHandle == w[ret]: + discard getExitCodeProcess(q[r].fProcessHandle, status) + q[r].exitFlag = true + q[r].exitStatus = status + rexit = r + break + else: + var status: cint = 1 + # waiting for all children, get result if any child exits + let res = waitpid(-1, status, 0) + if res > 0: + for r in 0..m-1: + if not isNil(q[r]) and q[r].id == res: + if WIFEXITED(status) or WIFSIGNALED(status): + q[r].exitFlag = true + q[r].exitStatus = status + rexit = r + break + else: + let err = osLastError() + if err == OSErrorCode(ECHILD): + # some child exits, we need to check our childs exit codes + for r in 0..m-1: + if (not isNil(q[r])) and (not running(q[r])): + q[r].exitFlag = true + q[r].exitStatus = status + rexit = r + break + elif err == OSErrorCode(EINTR): + # signal interrupted our syscall, lets repeat it + continue + else: + # all other errors are exceptions + raiseOSError(err) + + if rexit >= 0: + when defined(windows): + let processHandle = q[rexit].fProcessHandle + result = max(result, abs(q[rexit].peekExitCode())) + if afterRunEvent != nil: afterRunEvent(idxs[rexit], q[rexit]) + close(q[rexit]) + if i < len(cmds): + if beforeRunEvent != nil: beforeRunEvent(i) + q[rexit] = startProcess(cmds[i], + options = options + {poEvalCommand}) + idxs[rexit] = i + when defined(windows): + w[rexit] = q[rexit].fProcessHandle + inc(i) + else: + when defined(windows): + for k in 0..wcount - 1: + if w[k] == processHandle: + w[k] = w[wcount - 1] + w[wcount - 1] = 0 + dec(wcount) + break + q[rexit] = nil + dec(ecount) else: for i in 0..high(cmds): if beforeRunEvent != nil: beforeRunEvent(i) - var p = startProcess(cmds[i], options=options + {poEvalCommand}) - result = max(waitForExit(p), result) + var p = startProcess(cmds[i], options = options + {poEvalCommand}) + result = max(abs(waitForExit(p)), result) + if afterRunEvent != nil: afterRunEvent(i, p) close(p) -proc select*(readfds: var seq[Process], timeout = 500): int - ## `select` with a sensible Nim interface. `timeout` is in miliseconds. - ## Specify -1 for no timeout. Returns the number of processes that are - ## ready to read from. The processes that are ready to be read from are - ## removed from `readfds`. +iterator lines*(p: Process, keepNewLines = false): string {.since: (1, 3), raises: [OSError, IOError, ValueError], tags: [ReadIOEffect, TimeEffect].} = + ## Convenience iterator for working with `startProcess` to read data from a + ## background process. + ## + ## See also: + ## * `readLines proc <#readLines,Process>`_ ## - ## **Warning**: This function may give unexpected or completely wrong - ## results on Windows. + ## Example: + ## ```Nim + ## const opts = {poUsePath, poDaemon, poStdErrToStdOut} + ## var ps: seq[Process] + ## for prog in ["a", "b"]: # run 2 progs in parallel + ## ps.add startProcess("nim", "", ["r", prog], nil, opts) + ## for p in ps: + ## var i = 0 + ## for line in p.lines: + ## echo line + ## i.inc + ## if i > 100: break + ## p.close + ## ``` + var outp = p.outputStream + var line = newStringOfCap(120) + while outp.readLine(line): + if keepNewLines: + line.add("\n") + yield line + discard waitForExit(p) + +proc readLines*(p: Process): (seq[string], int) {.since: (1, 3), + raises: [OSError, IOError, ValueError], tags: [ReadIOEffect, TimeEffect].} = + ## Convenience function for working with `startProcess` to read data from a + ## background process. + ## + ## See also: + ## * `lines iterator <#lines.i,Process>`_ + ## + ## Example: + ## ```Nim + ## const opts = {poUsePath, poDaemon, poStdErrToStdOut} + ## var ps: seq[Process] + ## for prog in ["a", "b"]: # run 2 progs in parallel + ## ps.add startProcess("nim", "", ["r", prog], nil, opts) + ## for p in ps: + ## let (lines, exCode) = p.readLines + ## if exCode != 0: + ## for line in lines: echo line + ## p.close + ## ``` + for line in p.lines: result[0].add(line) + result[1] = p.peekExitCode when not defined(useNimRtl): - proc execProcess(command: string, - args: openArray[string] = [], - env: StringTableRef = nil, - options: set[ProcessOption] = {poStdErrToStdOut, - poUsePath, - poEvalCommand}): TaintedString = - var p = startProcess(command, args=args, env=env, options=options) + proc execProcess(command: string, workingDir: string = "", + args: openArray[string] = [], env: StringTableRef = nil, + options: set[ProcessOption] = {poStdErrToStdOut, poUsePath, + poEvalCommand}): + string = + + var p = startProcess(command, workingDir = workingDir, args = args, + env = env, options = options) var outp = outputStream(p) - result = TaintedString"" - var line = newStringOfCap(120).TaintedString + result = "" + var line = newStringOfCap(120) + # consider `p.lines(keepNewLines=true)` to circumvent `running` busy-wait while true: # FIXME: converts CR-LF to LF. if outp.readLine(line): - result.string.add(line.string) - result.string.add("\n") + result.add(line) + result.add("\n") elif not running(p): break close(p) +template streamAccess(p) = + assert poParentStreams notin p.options, "API usage error: stream access not allowed when you use poParentStreams" -when defined(Windows) and not defined(useNimRtl): +when defined(windows) and not defined(useNimRtl): # We need to implement a handle stream for Windows: type - PFileHandleStream = ref TFileHandleStream - TFileHandleStream = object of StreamObj - handle: THandle + FileHandleStream = ref object of StreamObj + handle: Handle atTheEnd: bool - proc hsClose(s: Stream) = discard # nothing to do here - proc hsAtEnd(s: Stream): bool = return PFileHandleStream(s).atTheEnd + proc closeHandleCheck(handle: Handle) {.inline.} = + if handle.closeHandle() == 0: + raiseOSError(osLastError()) + + proc fileClose[T: Handle | FileHandle](h: var T) {.inline.} = + if h > 4: + closeHandleCheck(h) + h = INVALID_HANDLE_VALUE.T + + proc hsClose(s: Stream) = + FileHandleStream(s).handle.fileClose() + + proc hsAtEnd(s: Stream): bool = return FileHandleStream(s).atTheEnd proc hsReadData(s: Stream, buffer: pointer, bufLen: int): int = - var s = PFileHandleStream(s) + var s = FileHandleStream(s) if s.atTheEnd: return 0 var br: int32 var a = winlean.readFile(s.handle, buffer, bufLen.cint, addr br, nil) @@ -351,81 +563,137 @@ when defined(Windows) and not defined(useNimRtl): # TRUE and n (>0) bytes returned (good data). # FALSE and bytes returned undefined (system error). if a == 0 and br != 0: raiseOSError(osLastError()) - s.atTheEnd = br < bufLen + s.atTheEnd = br == 0 #< bufLen result = br proc hsWriteData(s: Stream, buffer: pointer, bufLen: int) = - var s = PFileHandleStream(s) + var s = FileHandleStream(s) var bytesWritten: int32 var a = winlean.writeFile(s.handle, buffer, bufLen.cint, addr bytesWritten, nil) if a == 0: raiseOSError(osLastError()) - proc newFileHandleStream(handle: THandle): PFileHandleStream = - new(result) - result.handle = handle - result.closeImpl = hsClose - result.atEndImpl = hsAtEnd - result.readDataImpl = hsReadData - result.writeDataImpl = hsWriteData - - proc buildCommandLine(a: string, args: openArray[string]): cstring = - var res = quoteShell(a) + proc newFileHandleStream(handle: Handle): owned FileHandleStream = + result = FileHandleStream(handle: handle, closeImpl: hsClose, atEndImpl: hsAtEnd, + readDataImpl: hsReadData, writeDataImpl: hsWriteData) + + proc buildCommandLine(a: string, args: openArray[string]): string = + result = quoteShell(a) for i in 0..high(args): - res.add(' ') - res.add(quoteShell(args[i])) - result = cast[cstring](alloc0(res.len+1)) - copyMem(result, cstring(res), res.len) + result.add(' ') + result.add(quoteShell(args[i])) - proc buildEnv(env: StringTableRef): cstring = + proc buildEnv(env: StringTableRef): tuple[str: cstring, len: int] = var L = 0 for key, val in pairs(env): inc(L, key.len + val.len + 2) - result = cast[cstring](alloc0(L+2)) + var str = cast[cstring](alloc0(L+2)) L = 0 for key, val in pairs(env): var x = key & "=" & val - copyMem(addr(result[L]), cstring(x), x.len+1) # copy \0 + copyMem(addr(str[L]), cstring(x), x.len+1) # copy \0 inc(L, x.len+1) + (str, L) - #proc open_osfhandle(osh: THandle, mode: int): int {. + #proc open_osfhandle(osh: Handle, mode: int): int {. # importc: "_open_osfhandle", header: "<fcntl.h>".} #var # O_WRONLY {.importc: "_O_WRONLY", header: "<fcntl.h>".}: int # O_RDONLY {.importc: "_O_RDONLY", header: "<fcntl.h>".}: int + proc myDup(h: Handle; inherit: WINBOOL = 1): Handle = + let thisProc = getCurrentProcess() + if duplicateHandle(thisProc, h, thisProc, addr result, 0, inherit, + DUPLICATE_SAME_ACCESS) == 0: + raiseOSError(osLastError()) - proc createPipeHandles(rdHandle, wrHandle: var THandle) = - var piInheritablePipe: TSECURITY_ATTRIBUTES - piInheritablePipe.nLength = sizeof(TSECURITY_ATTRIBUTES).cint - piInheritablePipe.lpSecurityDescriptor = nil - piInheritablePipe.bInheritHandle = 1 - if createPipe(rdHandle, wrHandle, piInheritablePipe, 1024) == 0'i32: + proc createAllPipeHandles(si: var STARTUPINFO; + stdin, stdout, stderr: var Handle; hash: int) = + var sa: SECURITY_ATTRIBUTES + sa.nLength = sizeof(SECURITY_ATTRIBUTES).cint + sa.lpSecurityDescriptor = nil + sa.bInheritHandle = 1 + let pipeOutName = newWideCString(r"\\.\pipe\stdout" & $hash) + let pipeInName = newWideCString(r"\\.\pipe\stdin" & $hash) + let pipeOut = createNamedPipe(pipeOutName, + dwOpenMode = PIPE_ACCESS_INBOUND or FILE_FLAG_WRITE_THROUGH, + dwPipeMode = PIPE_NOWAIT, + nMaxInstances = 1, + nOutBufferSize = 1024, nInBufferSize = 1024, + nDefaultTimeOut = 0, addr sa) + if pipeOut == INVALID_HANDLE_VALUE: + raiseOSError(osLastError()) + let pipeIn = createNamedPipe(pipeInName, + dwOpenMode = PIPE_ACCESS_OUTBOUND or FILE_FLAG_WRITE_THROUGH, + dwPipeMode = PIPE_NOWAIT, + nMaxInstances = 1, + nOutBufferSize = 1024, nInBufferSize = 1024, + nDefaultTimeOut = 0, addr sa) + if pipeIn == INVALID_HANDLE_VALUE: + raiseOSError(osLastError()) + + si.hStdOutput = createFileW(pipeOutName, + FILE_WRITE_DATA or SYNCHRONIZE, 0, addr sa, + OPEN_EXISTING, # very important flag! + FILE_ATTRIBUTE_NORMAL, + 0 # no template file for OPEN_EXISTING + ) + if si.hStdOutput == INVALID_HANDLE_VALUE: + raiseOSError(osLastError()) + si.hStdError = myDup(si.hStdOutput) + si.hStdInput = createFileW(pipeInName, + FILE_READ_DATA or SYNCHRONIZE, 0, addr sa, + OPEN_EXISTING, # very important flag! + FILE_ATTRIBUTE_NORMAL, + 0 # no template file for OPEN_EXISTING + ) + if si.hStdInput == INVALID_HANDLE_VALUE: raiseOSError(osLastError()) - proc fileClose(h: THandle) {.inline.} = - if h > 4: discard closeHandle(h) + stdin = myDup(pipeIn, 0) + stdout = myDup(pipeOut, 0) + closeHandleCheck(pipeIn) + closeHandleCheck(pipeOut) + stderr = stdout + + proc createPipeHandles(rdHandle, wrHandle: var Handle) = + var sa: SECURITY_ATTRIBUTES + sa.nLength = sizeof(SECURITY_ATTRIBUTES).cint + sa.lpSecurityDescriptor = nil + sa.bInheritHandle = 1 + if createPipe(rdHandle, wrHandle, sa, 0) == 0'i32: + raiseOSError(osLastError()) - proc startProcess(command: string, - workingDir: string = "", - args: openArray[string] = [], - env: StringTableRef = nil, - options: set[ProcessOption] = {poStdErrToStdOut}): Process = + proc startProcess(command: string, workingDir: string = "", + args: openArray[string] = [], env: StringTableRef = nil, + options: set[ProcessOption] = {poStdErrToStdOut}): + owned Process = var - si: TSTARTUPINFO - procInfo: TPROCESS_INFORMATION + si: STARTUPINFO + procInfo: PROCESS_INFORMATION success: int - hi, ho, he: THandle + hi, ho, he: Handle new(result) + result.options = options + result.exitFlag = true si.cb = sizeof(si).cint if poParentStreams notin options: si.dwFlags = STARTF_USESTDHANDLES # STARTF_USESHOWWINDOW or - createPipeHandles(si.hStdInput, hi) - createPipeHandles(ho, si.hStdOutput) - if poStdErrToStdOut in options: - si.hStdError = si.hStdOutput - he = ho + if poInteractive notin options: + createPipeHandles(si.hStdInput, hi) + createPipeHandles(ho, si.hStdOutput) + if poStdErrToStdOut in options: + si.hStdError = si.hStdOutput + he = ho + else: + createPipeHandles(he, si.hStdError) + if setHandleInformation(he, DWORD(1), DWORD(0)) == 0'i32: + raiseOSError(osLastError()) + if setHandleInformation(hi, DWORD(1), DWORD(0)) == 0'i32: + raiseOSError(osLastError()) + if setHandleInformation(ho, DWORD(1), DWORD(0)) == 0'i32: + raiseOSError(osLastError()) else: - createPipeHandles(he, si.hStdError) + createAllPipeHandles(si, hi, ho, he, cast[int](result)) result.inHandle = FileHandle(hi) result.outHandle = FileHandle(ho) result.errHandle = FileHandle(he) @@ -438,26 +706,27 @@ when defined(Windows) and not defined(useNimRtl): result.errHandle = FileHandle(si.hStdError) var cmdl: cstring + var cmdRoot: string if poEvalCommand in options: cmdl = command assert args.len == 0 else: - cmdl = buildCommandLine(command, args) + cmdRoot = buildCommandLine(command, args) + cmdl = cstring(cmdRoot) var wd: cstring = nil - var e: cstring = nil + var e = (str: nil.cstring, len: -1) if len(workingDir) > 0: wd = workingDir if env != nil: e = buildEnv(env) if poEchoCmd in options: echo($cmdl) - when useWinUnicode: - var tmp = newWideCString(cmdl) - var ee = newWideCString(e) - var wwd = newWideCString(wd) - success = winlean.createProcessW(nil, - tmp, nil, nil, 1, NORMAL_PRIORITY_CLASS or CREATE_UNICODE_ENVIRONMENT, - ee, wwd, si, procInfo) - else: - success = winlean.createProcessA(nil, - cmdl, nil, nil, 1, NORMAL_PRIORITY_CLASS, e, wd, si, procInfo) + var tmp = newWideCString(cmdl) + var ee = + if e.str.isNil: newWideCString(cstring(nil)) + else: newWideCString(e.str, e.len) + var wwd = newWideCString(wd) + var flags = NORMAL_PRIORITY_CLASS or CREATE_UNICODE_ENVIRONMENT + if poDaemon in options: flags = flags or CREATE_NO_WINDOW + success = winlean.createProcessW(nil, tmp, nil, nil, 1, flags, + ee, wwd, si, procInfo) let lastError = osLastError() if poParentStreams notin options: @@ -466,30 +735,59 @@ when defined(Windows) and not defined(useNimRtl): if poStdErrToStdOut notin options: fileClose(si.hStdError) - if e != nil: dealloc(e) - if success == 0: raiseOSError(lastError) - # Close the handle now so anyone waiting is woken: - discard closeHandle(procInfo.hThread) + if e.str != nil: dealloc(e.str) + if success == 0: + if poInteractive in result.options: close(result) + const errInvalidParameter = 87.int + const errFileNotFound = 2.int + if lastError.int in {errInvalidParameter, errFileNotFound}: + raiseOSError(lastError, + "Requested command not found: '" & command & "'. OS error:") + else: + raiseOSError(lastError, command) result.fProcessHandle = procInfo.hProcess + result.fThreadHandle = procInfo.hThread result.id = procInfo.dwProcessId + result.exitFlag = false + + proc closeThreadAndProcessHandle(p: Process) = + if p.fThreadHandle != 0: + closeHandleCheck(p.fThreadHandle) + p.fThreadHandle = 0 + + if p.fProcessHandle != 0: + closeHandleCheck(p.fProcessHandle) + p.fProcessHandle = 0 proc close(p: Process) = - when false: - # somehow this does not work on Windows: - discard closeHandle(p.inHandle) - discard closeHandle(p.outHandle) - discard closeHandle(p.errHandle) - discard closeHandle(p.FProcessHandle) + if poParentStreams notin p.options: + if p.inStream == nil: + p.inHandle.fileClose() + else: + # p.inHandle can be already closed via inputStream. + p.inStream.close + + # You may NOT close outputStream and errorStream. + assert p.outStream == nil or FileHandleStream(p.outStream).handle != INVALID_HANDLE_VALUE + assert p.errStream == nil or FileHandleStream(p.errStream).handle != INVALID_HANDLE_VALUE + + if p.outHandle != p.errHandle: + p.errHandle.fileClose() + p.outHandle.fileClose() + p.closeThreadAndProcessHandle() proc suspend(p: Process) = - discard suspendThread(p.fProcessHandle) + discard suspendThread(p.fThreadHandle) proc resume(p: Process) = - discard resumeThread(p.fProcessHandle) + discard resumeThread(p.fThreadHandle) proc running(p: Process): bool = - var x = waitForSingleObject(p.fProcessHandle, 50) - return x == WAIT_TIMEOUT + if p.exitFlag: + return false + else: + var x = waitForSingleObject(p.fProcessHandle, 0) + return x == WAIT_TIMEOUT proc terminate(p: Process) = if running(p): @@ -499,47 +797,79 @@ when defined(Windows) and not defined(useNimRtl): terminate(p) proc waitForExit(p: Process, timeout: int = -1): int = - discard waitForSingleObject(p.fProcessHandle, timeout.int32) - - var res: int32 - discard getExitCodeProcess(p.fProcessHandle, res) - result = res - discard closeHandle(p.fProcessHandle) + if p.exitFlag: + return p.exitStatus + + let res = waitForSingleObject(p.fProcessHandle, timeout.int32) + if res == WAIT_TIMEOUT: + terminate(p) + var status: int32 + discard getExitCodeProcess(p.fProcessHandle, status) + if status != STILL_ACTIVE: + p.exitFlag = true + p.exitStatus = status + p.closeThreadAndProcessHandle() + result = status + else: + result = -1 proc peekExitCode(p: Process): int = - var b = waitForSingleObject(p.fProcessHandle, 50) == WAIT_TIMEOUT - if b: result = -1 - else: - var res: int32 - discard getExitCodeProcess(p.fProcessHandle, res) - return res + if p.exitFlag: + return p.exitStatus + + result = -1 + var b = waitForSingleObject(p.fProcessHandle, 0) == WAIT_TIMEOUT + if not b: + var status: int32 + discard getExitCodeProcess(p.fProcessHandle, status) + p.exitFlag = true + p.exitStatus = status + p.closeThreadAndProcessHandle() + result = status proc inputStream(p: Process): Stream = - result = newFileHandleStream(p.inHandle) + streamAccess(p) + if p.inStream == nil: + p.inStream = newFileHandleStream(p.inHandle) + result = p.inStream proc outputStream(p: Process): Stream = - result = newFileHandleStream(p.outHandle) + streamAccess(p) + if p.outStream == nil: + p.outStream = newFileHandleStream(p.outHandle) + result = p.outStream proc errorStream(p: Process): Stream = - result = newFileHandleStream(p.errHandle) + streamAccess(p) + if p.errStream == nil: + p.errStream = newFileHandleStream(p.errHandle) + result = p.errStream + + proc peekableOutputStream(p: Process): Stream = + streamAccess(p) + if p.outStream == nil: + p.outStream = newFileHandleStream(p.outHandle).newPipeOutStream + result = p.outStream + + proc peekableErrorStream(p: Process): Stream = + streamAccess(p) + if p.errStream == nil: + p.errStream = newFileHandleStream(p.errHandle).newPipeOutStream + result = p.errStream proc execCmd(command: string): int = var - si: TSTARTUPINFO - procInfo: TPROCESS_INFORMATION - process: THandle + si: STARTUPINFO + procInfo: PROCESS_INFORMATION + process: Handle L: int32 si.cb = sizeof(si).cint si.hStdError = getStdHandle(STD_ERROR_HANDLE) si.hStdInput = getStdHandle(STD_INPUT_HANDLE) si.hStdOutput = getStdHandle(STD_OUTPUT_HANDLE) - when useWinUnicode: - var c = newWideCString(command) - var res = winlean.createProcessW(nil, c, nil, nil, 0, - NORMAL_PRIORITY_CLASS, nil, nil, si, procInfo) - else: - var res = winlean.createProcessA(nil, command, nil, nil, 0, - NORMAL_PRIORITY_CLASS, nil, nil, si, procInfo) + var c = newWideCString(command) + var res = winlean.createProcessW(nil, c, nil, nil, 0, + NORMAL_PRIORITY_CLASS, nil, nil, si, procInfo) if res == 0: raiseOSError(osLastError()) else: @@ -554,9 +884,9 @@ when defined(Windows) and not defined(useNimRtl): proc select(readfds: var seq[Process], timeout = 500): int = assert readfds.len <= MAXIMUM_WAIT_OBJECTS - var rfds: TWOHandleArray + var rfds: WOHandleArray for i in 0..readfds.len()-1: - rfds[i] = readfds[i].fProcessHandle + rfds[i] = readfds[i].outHandle #fProcessHandle var ret = waitForMultipleObjects(readfds.len.int32, addr(rfds), 0'i32, timeout.int32) @@ -570,11 +900,19 @@ when defined(Windows) and not defined(useNimRtl): readfds.del(i) return 1 + proc hasData*(p: Process): bool = + var x: int32 + if peekNamedPipe(p.outHandle, lpTotalBytesAvail = addr x): + result = x > 0 + elif not defined(useNimRtl): const readIdx = 0 writeIdx = 1 + proc isExitStatus(status: cint): bool = + WIFEXITED(status) or WIFSIGNALED(status) + proc envToCStringArray(t: StringTableRef): cstringArray = result = cast[cstringArray](alloc0((t.len + 1) * sizeof(cstring))) var i = 0 @@ -590,58 +928,64 @@ elif not defined(useNimRtl): result = cast[cstringArray](alloc0((counter + 1) * sizeof(cstring))) var i = 0 for key, val in envPairs(): - var x = key.string & "=" & val.string + var x = key & "=" & val result[i] = cast[cstring](alloc(x.len+1)) copyMem(result[i], addr(x[0]), x.len+1) inc(i) - type TStartProcessData = object - sysCommand: cstring - sysArgs: cstringArray - sysEnv: cstringArray - workingDir: cstring - pStdin, pStdout, pStderr, pErrorPipe: array[0..1, cint] - optionPoUsePath: bool - optionPoParentStreams: bool - optionPoStdErrToStdOut: bool - - when not defined(useFork): - proc startProcessAuxSpawn(data: TStartProcessData): TPid {. - tags: [ExecIOEffect, ReadEnvEffect], gcsafe.} - proc startProcessAuxFork(data: TStartProcessData): TPid {. - tags: [ExecIOEffect, ReadEnvEffect], gcsafe.} - {.push stacktrace: off, profiler: off.} - proc startProcessAfterFork(data: ptr TStartProcessData) {. - tags: [ExecIOEffect, ReadEnvEffect], cdecl, gcsafe.} - {.pop.} - - proc startProcess(command: string, - workingDir: string = "", - args: openArray[string] = [], - env: StringTableRef = nil, - options: set[ProcessOption] = {poStdErrToStdOut}): Process = + type + StartProcessData = object + sysCommand: string + sysArgs: cstringArray + sysEnv: cstringArray + workingDir: cstring + pStdin, pStdout, pStderr, pErrorPipe: array[0..1, cint] + options: set[ProcessOption] + + const useProcessAuxSpawn = declared(posix_spawn) and not defined(useFork) and + not defined(useClone) and not defined(linux) + when useProcessAuxSpawn: + proc startProcessAuxSpawn(data: StartProcessData): Pid {. + raises: [OSError], tags: [ExecIOEffect, ReadEnvEffect, ReadDirEffect, RootEffect], gcsafe.} + else: + proc startProcessAuxFork(data: StartProcessData): Pid {. + raises: [OSError], tags: [ExecIOEffect, ReadEnvEffect, ReadDirEffect, RootEffect], gcsafe.} + {.push stacktrace: off, profiler: off.} + proc startProcessAfterFork(data: ptr StartProcessData) {. + raises: [OSError], tags: [ExecIOEffect, ReadEnvEffect, ReadDirEffect, RootEffect], cdecl, gcsafe.} + {.pop.} + + proc startProcess(command: string, workingDir: string = "", + args: openArray[string] = [], env: StringTableRef = nil, + options: set[ProcessOption] = {poStdErrToStdOut}): + owned Process = var - pStdin, pStdout, pStderr: array [0..1, cint] + pStdin, pStdout, pStderr: array[0..1, cint] new(result) - result.exitCode = -3 # for ``waitForExit`` + result.options = options + result.exitFlag = true + if poParentStreams notin options: if pipe(pStdin) != 0'i32 or pipe(pStdout) != 0'i32 or pipe(pStderr) != 0'i32: raiseOSError(osLastError()) - var sysCommand: string + var data: StartProcessData var sysArgsRaw: seq[string] if poEvalCommand in options: - sysCommand = "/bin/sh" - sysArgsRaw = @[sysCommand, "-c", command] + const useShPath {.strdefine.} = + when not defined(android): "/bin/sh" + else: "/system/bin/sh" + data.sysCommand = useShPath + sysArgsRaw = @[useShPath, "-c", command] assert args.len == 0, "`args` has to be empty when using poEvalCommand." else: - sysCommand = command + data.sysCommand = command sysArgsRaw = @[command] for arg in args.items: sysArgsRaw.add arg - var pid: TPid + var pid: Pid var sysArgs = allocCStringArray(sysArgsRaw) defer: deallocCStringArray(sysArgs) @@ -653,22 +997,19 @@ elif not defined(useNimRtl): defer: deallocCStringArray(sysEnv) - var data: TStartProcessData - data.sysCommand = sysCommand data.sysArgs = sysArgs data.sysEnv = sysEnv data.pStdin = pStdin data.pStdout = pStdout data.pStderr = pStderr - data.optionPoParentStreams = poParentStreams in options - data.optionPoUsePath = poUsePath in options - data.optionPoStdErrToStdOut = poStdErrToStdOut in options data.workingDir = workingDir + data.options = options - - when declared(posix_spawn) and not defined(useFork) and - not defined(useClone) and not defined(linux): + when useProcessAuxSpawn: + var currentDir = getCurrentDir() pid = startProcessAuxSpawn(data) + if workingDir.len > 0: + setCurrentDir(currentDir) else: pid = startProcessAuxFork(data) @@ -676,6 +1017,7 @@ elif not defined(useNimRtl): if poEchoCmd in options: echo(command, " ", join(args, " ")) result.id = pid + result.exitFlag = false if poParentStreams in options: # does not make much sense, but better than nothing: @@ -697,222 +1039,439 @@ elif not defined(useNimRtl): discard close(pStdin[readIdx]) discard close(pStdout[writeIdx]) - when not defined(useFork): - proc startProcessAuxSpawn(data: TStartProcessData): TPid = + when useProcessAuxSpawn: + proc startProcessAuxSpawn(data: StartProcessData): Pid = var attr: Tposix_spawnattr var fops: Tposix_spawn_file_actions - template chck(e: expr) = + template chck(e: untyped) = if e != 0'i32: raiseOSError(osLastError()) chck posix_spawn_file_actions_init(fops) chck posix_spawnattr_init(attr) - var mask: Tsigset + var mask: Sigset chck sigemptyset(mask) chck posix_spawnattr_setsigmask(attr, mask) - chck posix_spawnattr_setpgroup(attr, 0'i32) - - chck posix_spawnattr_setflags(attr, POSIX_SPAWN_USEVFORK or - POSIX_SPAWN_SETSIGMASK or - POSIX_SPAWN_SETPGROUP) - - if not data.optionPoParentStreams: + when not defined(nuttx): + if poDaemon in data.options: + chck posix_spawnattr_setpgroup(attr, 0'i32) + + var flags = POSIX_SPAWN_USEVFORK or + POSIX_SPAWN_SETSIGMASK + when not defined(nuttx): + if poDaemon in data.options: + flags = flags or POSIX_SPAWN_SETPGROUP + chck posix_spawnattr_setflags(attr, flags) + + if not (poParentStreams in data.options): chck posix_spawn_file_actions_addclose(fops, data.pStdin[writeIdx]) chck posix_spawn_file_actions_adddup2(fops, data.pStdin[readIdx], readIdx) chck posix_spawn_file_actions_addclose(fops, data.pStdout[readIdx]) chck posix_spawn_file_actions_adddup2(fops, data.pStdout[writeIdx], writeIdx) chck posix_spawn_file_actions_addclose(fops, data.pStderr[readIdx]) - if data.optionPoStdErrToStdOut: + if poStdErrToStdOut in data.options: chck posix_spawn_file_actions_adddup2(fops, data.pStdout[writeIdx], 2) else: chck posix_spawn_file_actions_adddup2(fops, data.pStderr[writeIdx], 2) var res: cint - # FIXME: chdir is global to process if data.workingDir.len > 0: setCurrentDir($data.workingDir) - var pid: TPid + var pid: Pid - if data.optionPoUsePath: - res = posix_spawnp(pid, data.sysCommand, fops, attr, data.sysArgs, data.sysEnv) + if (poUsePath in data.options): + res = posix_spawnp(pid, data.sysCommand.cstring, fops, attr, data.sysArgs, data.sysEnv) else: - res = posix_spawn(pid, data.sysCommand, fops, attr, data.sysArgs, data.sysEnv) + res = posix_spawn(pid, data.sysCommand.cstring, fops, attr, data.sysArgs, data.sysEnv) discard posix_spawn_file_actions_destroy(fops) discard posix_spawnattr_destroy(attr) - chck res - return pid + if res != 0'i32: raiseOSError(OSErrorCode(res), data.sysCommand) - proc startProcessAuxFork(data: TStartProcessData): TPid = - if pipe(data.pErrorPipe) != 0: - raiseOSError(osLastError()) + return pid + else: + proc startProcessAuxFork(data: StartProcessData): Pid = + if pipe(data.pErrorPipe) != 0: + raiseOSError(osLastError()) - defer: - discard close(data.pErrorPipe[readIdx]) + defer: + discard close(data.pErrorPipe[readIdx]) + + var pid: Pid + var dataCopy = data + + when defined(useClone): + const stackSize = 65536 + let stackEnd = cast[clong](alloc(stackSize)) + let stack = cast[pointer](stackEnd + stackSize) + let fn: pointer = startProcessAfterFork + pid = clone(fn, stack, + cint(CLONE_VM or CLONE_VFORK or SIGCHLD), + pointer(addr dataCopy), nil, nil, nil) + discard close(data.pErrorPipe[writeIdx]) + dealloc(stack) + else: + pid = fork() + if pid == 0: + startProcessAfterFork(addr(dataCopy)) + exitnow(1) - var pid: TPid - var dataCopy = data - - when defined(useClone): - const stackSize = 65536 - let stackEnd = cast[clong](alloc(stackSize)) - let stack = cast[pointer](stackEnd + stackSize) - let fn: pointer = startProcessAfterFork - pid = clone(fn, stack, - cint(CLONE_VM or CLONE_VFORK or SIGCHLD), - pointer(addr dataCopy), nil, nil, nil) discard close(data.pErrorPipe[writeIdx]) - dealloc(stack) - else: - pid = fork() - if pid == 0: - startProcessAfterFork(addr(dataCopy)) - exitnow(1) - - discard close(data.pErrorPipe[writeIdx]) - if pid < 0: raiseOSError(osLastError()) - - var error: cint - let sizeRead = read(data.pErrorPipe[readIdx], addr error, sizeof(error)) - if sizeRead == sizeof(error): - raiseOSError($strerror(error)) - - return pid - - {.push stacktrace: off, profiler: off.} - proc startProcessFail(data: ptr TStartProcessData) = - var error: cint = errno - discard write(data.pErrorPipe[writeIdx], addr error, sizeof(error)) - exitnow(1) - - when defined(macosx) or defined(freebsd): - var environ {.importc.}: cstringArray - - proc startProcessAfterFork(data: ptr TStartProcessData) = - # Warning: no GC here! - # Or anything that touches global structures - all called nim procs - # must be marked with stackTrace:off. Inspect C code after making changes. - if not data.optionPoParentStreams: - discard close(data.pStdin[writeIdx]) - if dup2(data.pStdin[readIdx], readIdx) < 0: - startProcessFail(data) - discard close(data.pStdout[readIdx]) - if dup2(data.pStdout[writeIdx], writeIdx) < 0: - startProcessFail(data) - discard close(data.pStderr[readIdx]) - if data.optionPoStdErrToStdOut: - if dup2(data.pStdout[writeIdx], 2) < 0: + if pid < 0: raiseOSError(osLastError()) + + var error: cint + let sizeRead = read(data.pErrorPipe[readIdx], addr error, sizeof(error)) + if sizeRead == sizeof(error): + raiseOSError(OSErrorCode(error), + "Could not find command: '" & $data.sysCommand & "'. OS error: " & $strerror(error)) + + return pid + + {.push stacktrace: off, profiler: off.} + proc startProcessFail(data: ptr StartProcessData, error: cint = errno) = + discard write(data.pErrorPipe[writeIdx], addr error, sizeof(error)) + exitnow(1) + + when not defined(uClibc) and (not defined(linux) or defined(android)) and + not defined(haiku): + var environ {.importc.}: cstringArray + + proc startProcessAfterFork(data: ptr StartProcessData) = + # Warning: no GC here! + # Or anything that touches global structures - all called nim procs + # must be marked with stackTrace:off. Inspect C code after making changes. + if not (poParentStreams in data.options): + discard close(data.pStdin[writeIdx]) + if dup2(data.pStdin[readIdx], readIdx) < 0: startProcessFail(data) - else: - if dup2(data.pStderr[writeIdx], 2) < 0: + discard close(data.pStdout[readIdx]) + if dup2(data.pStdout[writeIdx], writeIdx) < 0: startProcessFail(data) + discard close(data.pStderr[readIdx]) + if (poStdErrToStdOut in data.options): + if dup2(data.pStdout[writeIdx], 2) < 0: + startProcessFail(data) + else: + if dup2(data.pStderr[writeIdx], 2) < 0: + startProcessFail(data) - if data.workingDir.len > 0: - if chdir(data.workingDir) < 0: - startProcessFail(data) - - discard close(data.pErrorPipe[readIdx]) - discard fcntl(data.pErrorPipe[writeIdx], F_SETFD, FD_CLOEXEC) + if data.workingDir.len > 0: + if chdir(data.workingDir) < 0: + startProcessFail(data) - if data.optionPoUsePath: - when defined(macosx) or defined(freebsd): - # MacOSX doesn't have execvpe, so we need workaround. - # On MacOSX we can arrive here only from fork, so this is safe: - environ = data.sysEnv - discard execvp(data.sysCommand, data.sysArgs) - else: - when defined(uClibc): - # uClibc environment (OpenWrt included) doesn't have the full execvpe - discard execve(data.sysCommand, data.sysArgs, data.sysEnv) + discard close(data.pErrorPipe[readIdx]) + discard fcntl(data.pErrorPipe[writeIdx], F_SETFD, FD_CLOEXEC) + + if (poUsePath in data.options): + when defined(uClibc) or defined(linux) or defined(haiku): + # uClibc environment (OpenWrt included) doesn't have the full execvpe + var exe: string + try: + exe = findExe(data.sysCommand) + except OSError as e: + startProcessFail(data, e.errorCode) + discard execve(exe.cstring, data.sysArgs, data.sysEnv) else: - discard execvpe(data.sysCommand, data.sysArgs, data.sysEnv) - else: - discard execve(data.sysCommand, data.sysArgs, data.sysEnv) + # MacOSX doesn't have execvpe, so we need workaround. + # On MacOSX we can arrive here only from fork, so this is safe: + environ = data.sysEnv + discard execvp(data.sysCommand.cstring, data.sysArgs) + else: + discard execve(data.sysCommand.cstring, data.sysArgs, data.sysEnv) - startProcessFail(data) - {.pop} + startProcessFail(data) + {.pop.} proc close(p: Process) = - if p.inStream != nil: close(p.inStream) - if p.outStream != nil: close(p.outStream) - if p.errStream != nil: close(p.errStream) - discard close(p.inHandle) - discard close(p.outHandle) - discard close(p.errHandle) + if poParentStreams notin p.options: + if p.inStream != nil: + close(p.inStream) + else: + discard close(p.inHandle) + + if p.outStream != nil: + close(p.outStream) + else: + discard close(p.outHandle) + + if p.errStream != nil: + close(p.errStream) + else: + discard close(p.errHandle) proc suspend(p: Process) = - if kill(p.id, SIGSTOP) != 0'i32: raiseOsError(osLastError()) + if kill(p.id, SIGSTOP) != 0'i32: raiseOSError(osLastError()) proc resume(p: Process) = - if kill(p.id, SIGCONT) != 0'i32: raiseOsError(osLastError()) + if kill(p.id, SIGCONT) != 0'i32: raiseOSError(osLastError()) proc running(p: Process): bool = - var ret : int - when not defined(freebsd): - ret = waitpid(p.id, p.exitCode, WNOHANG) + if p.exitFlag: + return false else: - var status : cint = 1 - ret = waitpid(p.id, status, WNOHANG) - if WIFEXITED(status): - p.exitCode = status - if ret == 0: return true # Can't establish status. Assume running. - result = ret == int(p.id) + var status: cint = 1 + let ret = waitpid(p.id, status, WNOHANG) + if ret == int(p.id): + if isExitStatus(status): + p.exitFlag = true + p.exitStatus = status + return false + else: + return true + elif ret == 0: + return true # Can't establish status. Assume running. + else: + raiseOSError(osLastError()) proc terminate(p: Process) = if kill(p.id, SIGTERM) != 0'i32: - raiseOsError(osLastError()) + raiseOSError(osLastError()) proc kill(p: Process) = - if kill(p.id, SIGKILL) != 0'i32: - raiseOsError(osLastError()) - - proc waitForExit(p: Process, timeout: int = -1): int = - #if waitPid(p.id, p.exitCode, 0) == int(p.id): - # ``waitPid`` fails if the process is not running anymore. But then - # ``running`` probably set ``p.exitCode`` for us. Since ``p.exitCode`` is - # initialized with -3, wrong success exit codes are prevented. - if p.exitCode != -3: return p.exitCode - if waitpid(p.id, p.exitCode, 0) < 0: - p.exitCode = -3 + if kill(p.id, SIGKILL) != 0'i32: raiseOSError(osLastError()) - result = int(p.exitCode) shr 8 + + when defined(macosx) or defined(freebsd) or defined(netbsd) or + defined(openbsd) or defined(dragonfly): + import std/kqueue + + proc waitForExit(p: Process, timeout: int = -1): int = + if p.exitFlag: + return exitStatusLikeShell(p.exitStatus) + + if timeout == -1: + var status: cint = 1 + if waitpid(p.id, status, 0) < 0: + raiseOSError(osLastError()) + p.exitFlag = true + p.exitStatus = status + else: + var kqFD = kqueue() + if kqFD == -1: + raiseOSError(osLastError()) + + var kevIn = KEvent(ident: p.id.uint, filter: EVFILT_PROC, + flags: EV_ADD, fflags: NOTE_EXIT) + var kevOut: KEvent + var tmspec: Timespec + + if timeout >= 1000: + tmspec.tv_sec = posix.Time(timeout div 1_000) + tmspec.tv_nsec = (timeout %% 1_000) * 1_000_000 + else: + tmspec.tv_sec = posix.Time(0) + tmspec.tv_nsec = (timeout * 1_000_000) + + try: + while true: + var status: cint = 1 + var count = kevent(kqFD, addr(kevIn), 1, addr(kevOut), 1, + addr(tmspec)) + if count < 0: + let err = osLastError() + if err.cint != EINTR: + raiseOSError(osLastError()) + elif count == 0: + # timeout expired, so we trying to kill process + if posix.kill(p.id, SIGKILL) == -1: + raiseOSError(osLastError()) + if waitpid(p.id, status, 0) < 0: + raiseOSError(osLastError()) + p.exitFlag = true + p.exitStatus = status + break + else: + if kevOut.ident == p.id.uint and kevOut.filter == EVFILT_PROC: + if waitpid(p.id, status, 0) < 0: + raiseOSError(osLastError()) + p.exitFlag = true + p.exitStatus = status + break + else: + raiseOSError(osLastError()) + finally: + discard posix.close(kqFD) + + result = exitStatusLikeShell(p.exitStatus) + elif defined(haiku): + const + B_OBJECT_TYPE_THREAD = 3 + B_EVENT_INVALID = 0x1000 + B_RELATIVE_TIMEOUT = 0x8 + + type + ObjectWaitInfo {.importc: "object_wait_info", header: "OS.h".} = object + obj {.importc: "object".}: int32 + typ {.importc: "type".}: uint16 + events: uint16 + + proc waitForObjects(infos: ptr ObjectWaitInfo, numInfos: cint, flags: uint32, + timeout: int64): clong + {.importc: "wait_for_objects_etc", header: "OS.h".} + + proc waitForExit(p: Process, timeout: int = -1): int = + if p.exitFlag: + return exitStatusLikeShell(p.exitStatus) + + if timeout == -1: + var status: cint = 1 + if waitpid(p.id, status, 0) < 0: + raiseOSError(osLastError()) + p.exitFlag = true + p.exitStatus = status + else: + var info = ObjectWaitInfo( + obj: p.id, # Haiku's PID is actually the main thread ID. + typ: B_OBJECT_TYPE_THREAD, + events: B_EVENT_INVALID # notify when the thread die. + ) + + while true: + var status: cint = 1 + let count = waitForObjects(addr info, 1, B_RELATIVE_TIMEOUT, timeout) + + if count < 0: + let err = count.cint + if err == ETIMEDOUT: + # timeout expired, so we try to kill the process + if posix.kill(p.id, SIGKILL) == -1: + raiseOSError(osLastError()) + if waitpid(p.id, status, 0) < 0: + raiseOSError(osLastError()) + p.exitFlag = true + p.exitStatus = status + break + elif err != EINTR: + raiseOSError(err.OSErrorCode) + elif count > 0: + if waitpid(p.id, status, 0) < 0: + raiseOSError(osLastError()) + p.exitFlag = true + p.exitStatus = status + break + else: + raiseAssert "unreachable!" + + result = exitStatusLikeShell(p.exitStatus) + + else: + import std/times except getTime + import std/monotimes + + proc waitForExit(p: Process, timeout: int = -1): int = + if p.exitFlag: + return exitStatusLikeShell(p.exitStatus) + + if timeout < 0: + # Backwards compatibility with previous verison to + # handle cases where timeout == -1, but extend + # to handle cases where timeout < 0 + var status: cint + if waitpid(p.id, status, 0) < 0: + raiseOSError(osLastError()) + p.exitFlag = true + p.exitStatus = status + else: + # Max 50ms delay + const maxWait = initDuration(milliseconds = 50) + let wait = initDuration(milliseconds = timeout) + let deadline = getMonoTime() + wait + # starting 50μs delay + var delay = initDuration(microseconds = 50) + + while true: + var status: cint + let pid = waitpid(p.id, status, WNOHANG) + if p.id == pid : + p.exitFlag = true + p.exitStatus = status + break + elif pid.int == -1: + raiseOsError(osLastError()) + else: + # Continue waiting if needed + if getMonoTime() >= deadline: + # Previous version of `waitForExit` + # foricibly killed the process. + # We keep this so we don't break programs + # that depend on this behavior + if posix.kill(p.id, SIGKILL) < 0: + raiseOSError(osLastError()) + else: + const max = 1_000_000_000 + let + newWait = getMonoTime() + delay + ticks = newWait.ticks() + ns = ticks mod max + secs = ticks div max + var + waitSpec: TimeSpec + unused: Timespec + waitSpec.tv_sec = posix.Time(secs) + waitSpec.tv_nsec = clong ns + discard posix.clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, waitSpec, unused) + let remaining = deadline - getMonoTime() + delay = min([delay * 2, remaining, maxWait]) + + result = exitStatusLikeShell(p.exitStatus) proc peekExitCode(p: Process): int = - if p.exitCode != -3: return p.exitCode - var ret = waitpid(p.id, p.exitCode, WNOHANG) - var b = ret == int(p.id) - if b: result = -1 - if p.exitCode == -3: result = -1 - else: result = p.exitCode.int shr 8 - - proc createStream(stream: var Stream, handle: var FileHandle, - fileMode: FileMode) = + var status = cint(0) + result = -1 + if p.exitFlag: + return exitStatusLikeShell(p.exitStatus) + + var ret = waitpid(p.id, status, WNOHANG) + if ret > 0: + if isExitStatus(status): + p.exitFlag = true + p.exitStatus = status + result = exitStatusLikeShell(status) + + proc createStream(handle: var FileHandle, + fileMode: FileMode): owned FileStream = var f: File if not open(f, handle, fileMode): raiseOSError(osLastError()) - stream = newFileStream(f) + return newFileStream(f) proc inputStream(p: Process): Stream = + streamAccess(p) if p.inStream == nil: - createStream(p.inStream, p.inHandle, fmWrite) + p.inStream = createStream(p.inHandle, fmWrite) return p.inStream proc outputStream(p: Process): Stream = + streamAccess(p) if p.outStream == nil: - createStream(p.outStream, p.outHandle, fmRead) + p.outStream = createStream(p.outHandle, fmRead) return p.outStream proc errorStream(p: Process): Stream = + streamAccess(p) + if p.errStream == nil: + p.errStream = createStream(p.errHandle, fmRead) + return p.errStream + + proc peekableOutputStream(p: Process): Stream = + streamAccess(p) + if p.outStream == nil: + p.outStream = createStream(p.outHandle, fmRead).newPipeOutStream + return p.outStream + + proc peekableErrorStream(p: Process): Stream = + streamAccess(p) if p.errStream == nil: - createStream(p.errStream, p.errHandle, fmRead) + p.errStream = createStream(p.errHandle, fmRead).newPipeOutStream return p.errStream - proc csystem(cmd: cstring): cint {.nodecl, importc: "system", + proc csystem(cmd: cstring): cint {.nodecl, importc: "system", header: "<stdlib.h>".} proc execCmd(command: string): int = - when defined(linux): - result = csystem(command) shr 8 + when defined(posix): + let tmp = csystem(command) + result = if tmp == -1: tmp else: exitStatusLikeShell(tmp) else: result = csystem(command) @@ -935,8 +1494,8 @@ elif not defined(useNimRtl): proc select(readfds: var seq[Process], timeout = 500): int = var tv: Timeval - tv.tv_sec = 0 - tv.tv_usec = timeout * 1000 + tv.tv_sec = posix.Time(0) + tv.tv_usec = Suseconds(timeout * 1000) var rd: TFdSet var m = 0 @@ -949,35 +1508,74 @@ elif not defined(useNimRtl): pruneProcessSet(readfds, (rd)) + proc hasData*(p: Process): bool = + var rd: TFdSet + + FD_ZERO(rd) + let m = max(0, int(p.outHandle)) + FD_SET(cint(p.outHandle), rd) + + result = int(select(cint(m+1), addr(rd), nil, nil, nil)) == 1 + proc execCmdEx*(command: string, options: set[ProcessOption] = { - poStdErrToStdOut, poUsePath}): tuple[ - output: TaintedString, - exitCode: int] {.tags: [ExecIOEffect, ReadIOEffect], gcsafe.} = - ## a convenience proc that runs the `command`, grabs all its output and - ## exit code and returns both. - var p = startProcess(command, options=options + {poEvalCommand}) + poStdErrToStdOut, poUsePath}, env: StringTableRef = nil, + workingDir = "", input = ""): tuple[ + output: string, + exitCode: int] {.raises: [OSError, IOError], tags: + [ExecIOEffect, ReadIOEffect, RootEffect], gcsafe.} = + ## A convenience proc that runs the `command`, and returns its `output` and + ## `exitCode`. `env` and `workingDir` params behave as for `startProcess`. + ## If `input.len > 0`, it is passed as stdin. + ## + ## Note: this could block if `input.len` is greater than your OS's maximum + ## pipe buffer size. + ## + ## See also: + ## * `execCmd proc <#execCmd,string>`_ + ## * `startProcess proc + ## <#startProcess,string,string,openArray[string],StringTableRef,set[ProcessOption]>`_ + ## * `execProcess proc + ## <#execProcess,string,string,openArray[string],StringTableRef,set[ProcessOption]>`_ + ## + ## Example: + ## ```Nim + ## var result = execCmdEx("nim r --hints:off -", options = {}, input = "echo 3*4") + ## import std/[strutils, strtabs] + ## stripLineEnd(result[0]) ## portable way to remove trailing newline, if any + ## doAssert result == ("12", 0) + ## doAssert execCmdEx("ls --nonexistent").exitCode != 0 + ## when defined(posix): + ## assert execCmdEx("echo $FO", env = newStringTable({"FO": "B"})) == ("B\n", 0) + ## assert execCmdEx("echo $PWD", workingDir = "/") == ("/\n", 0) + ## ``` + + when (NimMajor, NimMinor, NimPatch) < (1, 3, 5): + doAssert input.len == 0 + doAssert workingDir.len == 0 + doAssert env == nil + + var p = startProcess(command, options = options + {poEvalCommand}, + workingDir = workingDir, env = env) var outp = outputStream(p) - result = (TaintedString"", -1) - var line = newStringOfCap(120).TaintedString + + if input.len > 0: + # There is no way to provide input for the child process + # anymore. Closing it will create EOF on stdin instead of eternal + # blocking. + # Writing in chunks would require a selectors (eg kqueue/epoll) to avoid + # blocking on io. + inputStream(p).write(input) + close inputStream(p) + + # consider `p.lines(keepNewLines=true)` to avoid exit code test + result = ("", -1) + var line = newStringOfCap(120) while true: if outp.readLine(line): - result[0].string.add(line.string) - result[0].string.add("\n") + result[0].add(line) + result[0].add("\n") else: result[1] = peekExitCode(p) if result[1] != -1: break close(p) - -when isMainModule: - assert quoteShellWindows("aaa") == "aaa" - assert quoteShellWindows("aaa\"") == "aaa\\\"" - assert quoteShellWindows("") == "\"\"" - - assert quoteShellPosix("aaa") == "aaa" - assert quoteShellPosix("aaa a") == "'aaa a'" - assert quoteShellPosix("") == "''" - assert quoteShellPosix("a'a") == "'a'\"'\"'a'" - - when defined(posix): - assert quoteShell("") == "''" diff --git a/lib/pure/parsecfg.nim b/lib/pure/parsecfg.nim index bb64c8134..8a43daf54 100644 --- a/lib/pure/parsecfg.nim +++ b/lib/pure/parsecfg.nim @@ -7,76 +7,224 @@ # distribution, for details about the copyright. # -## The ``parsecfg`` module implements a high performance configuration file -## parser. The configuration file's syntax is similar to the Windows ``.ini`` -## format, but much more powerful, as it is not a line based parser. String -## literals, raw string literals and triple quoted string literals are supported +## The `parsecfg` module implements a high performance configuration file +## parser. The configuration file's syntax is similar to the Windows `.ini` +## format, but much more powerful, as it is not a line based parser. String +## literals, raw string literals and triple quoted string literals are supported ## as in the Nim programming language. - -## This is an example of how a configuration file may look like: ## -## .. include:: doc/mytest.cfg +## Example of how a configuration file may look like: +## +## .. include:: ../../doc/mytest.cfg ## :literal: -## The file ``examples/parsecfgex.nim`` demonstrates how to use the -## configuration file parser: ## -## .. code-block:: nim -## :file: examples/parsecfgex.nim +## Here is an example of how to use the configuration file parser: +runnableExamples("-r:off"): + import std/[strutils, streams] + + let configFile = "example.ini" + var f = newFileStream(configFile, fmRead) + assert f != nil, "cannot open " & configFile + var p: CfgParser + open(p, f, configFile) + while true: + var e = next(p) + case e.kind + of cfgEof: break + of cfgSectionStart: ## a `[section]` has been parsed + echo "new section: " & e.section + of cfgKeyValuePair: + echo "key-value-pair: " & e.key & ": " & e.value + of cfgOption: + echo "command: " & e.key & ": " & e.value + of cfgError: + echo e.msg + close(p) + +##[ +## Configuration file example +]## + +## ```none +## charset = "utf-8" +## [Package] +## name = "hello" +## --threads:on +## [Author] +## name = "nim-lang" +## website = "nim-lang.org" +## ``` + +##[ +## Creating a configuration file +]## + +runnableExamples: + var dict = newConfig() + dict.setSectionKey("","charset", "utf-8") + dict.setSectionKey("Package", "name", "hello") + dict.setSectionKey("Package", "--threads", "on") + dict.setSectionKey("Author", "name", "nim-lang") + dict.setSectionKey("Author", "website", "nim-lang.org") + assert $dict == """ +charset=utf-8 +[Package] +name=hello +--threads:on +[Author] +name=nim-lang +website=nim-lang.org +""" + +##[ +## Reading a configuration file +]## + +runnableExamples("-r:off"): + let dict = loadConfig("config.ini") + let charset = dict.getSectionValue("","charset") + let threads = dict.getSectionValue("Package","--threads") + let pname = dict.getSectionValue("Package","name") + let name = dict.getSectionValue("Author","name") + let website = dict.getSectionValue("Author","website") + echo pname & "\n" & name & "\n" & website + +##[ +## Modifying a configuration file +]## + +runnableExamples("-r:off"): + var dict = loadConfig("config.ini") + dict.setSectionKey("Author", "name", "nim-lang") + dict.writeConfig("config.ini") + +##[ +## Deleting a section key in a configuration file +]## + +runnableExamples("-r:off"): + var dict = loadConfig("config.ini") + dict.delSectionKey("Author", "website") + dict.writeConfig("config.ini") + +##[ +## Supported INI File structure +]## + +# taken from https://docs.python.org/3/library/configparser.html#supported-ini-file-structure +runnableExamples: + import std/streams + + var dict = loadConfig(newStringStream("""[Simple Values] + key=value + spaces in keys=allowed + spaces in values=allowed as well + spaces around the delimiter = obviously + you can also use : to delimit keys from values + [All Values Are Strings] + values like this: 19990429 + or this: 3.14159265359 + are they treated as numbers : no + integers floats and booleans are held as: strings + can use the API to get converted values directly: true + [No Values] + key_without_value + # empty string value is not allowed = + [ Seletion A ] + space around section name will be ignored + [You can use comments] + # like this + ; or this + # By default only in an empty line. + # Inline comments can be harmful because they prevent users + # from using the delimiting characters as parts of values. + # That being said, this can be customized. + [Sections Can Be Indented] + can_values_be_as_well = True + does_that_mean_anything_special = False + purpose = formatting for readability + # Did I mention we can indent comments, too? + """) + ) + let section1 = "Simple Values" + assert dict.getSectionValue(section1, "key") == "value" + assert dict.getSectionValue(section1, "spaces in keys") == "allowed" + assert dict.getSectionValue(section1, "spaces in values") == "allowed as well" + assert dict.getSectionValue(section1, "spaces around the delimiter") == "obviously" + assert dict.getSectionValue(section1, "you can also use") == "to delimit keys from values" -import - hashes, strutils, lexbase, streams + let section2 = "All Values Are Strings" + assert dict.getSectionValue(section2, "values like this") == "19990429" + assert dict.getSectionValue(section2, "or this") == "3.14159265359" + assert dict.getSectionValue(section2, "are they treated as numbers") == "no" + assert dict.getSectionValue(section2, "integers floats and booleans are held as") == "strings" + assert dict.getSectionValue(section2, "can use the API to get converted values directly") == "true" + + let section3 = "Seletion A" + assert dict.getSectionValue(section3, + "space around section name will be ignored", "not an empty value") == "" + + let section4 = "Sections Can Be Indented" + assert dict.getSectionValue(section4, "can_values_be_as_well") == "True" + assert dict.getSectionValue(section4, "does_that_mean_anything_special") == "False" + assert dict.getSectionValue(section4, "purpose") == "formatting for readability" + +import std/[strutils, lexbase, streams, tables] +import std/private/decode_helpers +import std/private/since + +when defined(nimPreviewSlimSystem): + import std/syncio include "system/inclrtl" + type CfgEventKind* = enum ## enumeration of all events that may occur when parsing - cfgEof, ## end of file reached - cfgSectionStart, ## a ``[section]`` has been parsed - cfgKeyValuePair, ## a ``key=value`` pair has been detected - cfgOption, ## a ``--key=value`` command line option - cfgError ## an error occurred during parsing - + cfgEof, ## end of file reached + cfgSectionStart, ## a `[section]` has been parsed + cfgKeyValuePair, ## a `key=value` pair has been detected + cfgOption, ## a `--key=value` command line option + cfgError ## an error occurred during parsing + CfgEvent* = object of RootObj ## describes a parsing event case kind*: CfgEventKind ## the kind of the event of cfgEof: nil - of cfgSectionStart: - section*: string ## `section` contains the name of the - ## parsed section start (syntax: ``[section]``) - of cfgKeyValuePair, cfgOption: - key*, value*: string ## contains the (key, value) pair if an option - ## of the form ``--key: value`` or an ordinary - ## ``key= value`` pair has been parsed. - ## ``value==""`` if it was not specified in the - ## configuration file. - of cfgError: ## the parser encountered an error: `msg` - msg*: string ## contains the error message. No exceptions - ## are thrown if a parse error occurs. - - TokKind = enum - tkInvalid, tkEof, + of cfgSectionStart: + section*: string ## `section` contains the name of the + ## parsed section start (syntax: `[section]`) + of cfgKeyValuePair, cfgOption: + key*, value*: string ## contains the (key, value) pair if an option + ## of the form `--key: value` or an ordinary + ## `key= value` pair has been parsed. + ## `value==""` if it was not specified in the + ## configuration file. + of cfgError: ## the parser encountered an error: `msg` + msg*: string ## contains the error message. No exceptions + ## are thrown if a parse error occurs. + + TokKind = enum + tkInvalid, tkEof, tkSymbol, tkEquals, tkColon, tkBracketLe, tkBracketRi, tkDashDash - Token = object # a token - kind: TokKind # the type of the token - literal: string # the parsed (string) literal - + Token = object # a token + kind: TokKind # the type of the token + literal: string # the parsed (string) literal + CfgParser* = object of BaseLexer ## the parser object. tok: Token filename: string -{.deprecated: [TCfgEventKind: CfgEventKind, TCfgEvent: CfgEvent, - TTokKind: TokKind, TToken: Token, TCfgParser: CfgParser].} - # implementation -const - SymChars = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\x80'..'\xFF', '.', '/', '\\'} - +const + SymChars = {'a'..'z', 'A'..'Z', '0'..'9', '_', ' ', '\x80'..'\xFF', '.', '/', '\\', '-'} + proc rawGetTok(c: var CfgParser, tok: var Token) {.gcsafe.} -proc open*(c: var CfgParser, input: Stream, filename: string, +proc open*(c: var CfgParser, input: Stream, filename: string, lineOffset = 0) {.rtl, extern: "npc$1".} = - ## initializes the parser with an input stream. `Filename` is only used + ## Initializes the parser with an input stream. `Filename` is only used ## for nice error messages. `lineOffset` can be used to influence the line ## number information in the generated error messages. lexbase.open(c, input) @@ -85,277 +233,427 @@ proc open*(c: var CfgParser, input: Stream, filename: string, c.tok.literal = "" inc(c.lineNumber, lineOffset) rawGetTok(c, c.tok) - + proc close*(c: var CfgParser) {.rtl, extern: "npc$1".} = - ## closes the parser `c` and its associated input stream. + ## Closes the parser `c` and its associated input stream. lexbase.close(c) proc getColumn*(c: CfgParser): int {.rtl, extern: "npc$1".} = - ## get the current column the parser has arrived at. + ## Gets the current column the parser has arrived at. result = getColNumber(c, c.bufpos) proc getLine*(c: CfgParser): int {.rtl, extern: "npc$1".} = - ## get the current line the parser has arrived at. + ## Gets the current line the parser has arrived at. result = c.lineNumber proc getFilename*(c: CfgParser): string {.rtl, extern: "npc$1".} = - ## get the filename of the file that the parser processes. + ## Gets the filename of the file that the parser processes. result = c.filename -proc handleHexChar(c: var CfgParser, xi: var int) = - case c.buf[c.bufpos] - of '0'..'9': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0')) - inc(c.bufpos) - of 'a'..'f': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10) - inc(c.bufpos) - of 'A'..'F': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10) - inc(c.bufpos) - else: - discard - -proc handleDecChars(c: var CfgParser, xi: var int) = - while c.buf[c.bufpos] in {'0'..'9'}: +proc handleDecChars(c: var CfgParser, xi: var int) = + while c.buf[c.bufpos] in {'0'..'9'}: xi = (xi * 10) + (ord(c.buf[c.bufpos]) - ord('0')) inc(c.bufpos) -proc getEscapedChar(c: var CfgParser, tok: var Token) = - inc(c.bufpos) # skip '\' +proc getEscapedChar(c: var CfgParser, tok: var Token) = + inc(c.bufpos) # skip '\' case c.buf[c.bufpos] - of 'n', 'N': + of 'n', 'N': add(tok.literal, "\n") inc(c.bufpos) - of 'r', 'R', 'c', 'C': + of 'r', 'R', 'c', 'C': add(tok.literal, '\c') inc(c.bufpos) - of 'l', 'L': + of 'l', 'L': add(tok.literal, '\L') inc(c.bufpos) - of 'f', 'F': + of 'f', 'F': add(tok.literal, '\f') inc(c.bufpos) - of 'e', 'E': + of 'e', 'E': add(tok.literal, '\e') inc(c.bufpos) - of 'a', 'A': + of 'a', 'A': add(tok.literal, '\a') inc(c.bufpos) - of 'b', 'B': + of 'b', 'B': add(tok.literal, '\b') inc(c.bufpos) - of 'v', 'V': + of 'v', 'V': add(tok.literal, '\v') inc(c.bufpos) - of 't', 'T': + of 't', 'T': add(tok.literal, '\t') inc(c.bufpos) - of '\'', '"': + of '\'', '"': add(tok.literal, c.buf[c.bufpos]) inc(c.bufpos) - of '\\': + of '\\': add(tok.literal, '\\') inc(c.bufpos) - of 'x', 'X': + of 'x', 'X': inc(c.bufpos) var xi = 0 - handleHexChar(c, xi) - handleHexChar(c, xi) + if handleHexChar(c.buf[c.bufpos], xi): + inc(c.bufpos) + if handleHexChar(c.buf[c.bufpos], xi): + inc(c.bufpos) add(tok.literal, chr(xi)) - of '0'..'9': + of '0'..'9': var xi = 0 handleDecChars(c, xi) if (xi <= 255): add(tok.literal, chr(xi)) else: tok.kind = tkInvalid else: tok.kind = tkInvalid - -proc handleCRLF(c: var CfgParser, pos: int): int = + +proc handleCRLF(c: var CfgParser, pos: int): int = case c.buf[pos] of '\c': result = lexbase.handleCR(c, pos) of '\L': result = lexbase.handleLF(c, pos) else: result = pos - -proc getString(c: var CfgParser, tok: var Token, rawMode: bool) = - var pos = c.bufpos + 1 # skip " - var buf = c.buf # put `buf` in a register + +proc getString(c: var CfgParser, tok: var Token, rawMode: bool) = + var pos = c.bufpos + 1 # skip " tok.kind = tkSymbol - if (buf[pos] == '"') and (buf[pos + 1] == '"'): + if (c.buf[pos] == '"') and (c.buf[pos + 1] == '"'): # long string literal: - inc(pos, 2) # skip "" + inc(pos, 2) # skip "" # skip leading newline: pos = handleCRLF(c, pos) - buf = c.buf - while true: - case buf[pos] - of '"': - if (buf[pos + 1] == '"') and (buf[pos + 2] == '"'): break + while true: + case c.buf[pos] + of '"': + if (c.buf[pos + 1] == '"') and (c.buf[pos + 2] == '"'): break add(tok.literal, '"') inc(pos) - of '\c', '\L': + of '\c', '\L': pos = handleCRLF(c, pos) - buf = c.buf add(tok.literal, "\n") - of lexbase.EndOfFile: + of lexbase.EndOfFile: tok.kind = tkInvalid - break - else: - add(tok.literal, buf[pos]) + break + else: + add(tok.literal, c.buf[pos]) inc(pos) - c.bufpos = pos + 3 # skip the three """ - else: + c.bufpos = pos + 3 # skip the three """ + else: # ordinary string literal - while true: - var ch = buf[pos] - if ch == '"': - inc(pos) # skip '"' - break - if ch in {'\c', '\L', lexbase.EndOfFile}: + while true: + var ch = c.buf[pos] + if ch == '"': + inc(pos) # skip '"' + break + if ch in {'\c', '\L', lexbase.EndOfFile}: tok.kind = tkInvalid - break - if (ch == '\\') and not rawMode: + break + if (ch == '\\') and not rawMode: c.bufpos = pos getEscapedChar(c, tok) pos = c.bufpos - else: + else: add(tok.literal, ch) inc(pos) c.bufpos = pos -proc getSymbol(c: var CfgParser, tok: var Token) = +proc getSymbol(c: var CfgParser, tok: var Token) = var pos = c.bufpos - var buf = c.buf - while true: - add(tok.literal, buf[pos]) + while true: + add(tok.literal, c.buf[pos]) inc(pos) - if not (buf[pos] in SymChars): break + if not (c.buf[pos] in SymChars): break + + while tok.literal.len > 0 and tok.literal[^1] == ' ': + tok.literal.setLen(tok.literal.len - 1) + c.bufpos = pos tok.kind = tkSymbol -proc skip(c: var CfgParser) = +proc skip(c: var CfgParser) = var pos = c.bufpos - var buf = c.buf - while true: - case buf[pos] - of ' ', '\t': + while true: + case c.buf[pos] + of ' ', '\t': inc(pos) - of '#', ';': - while not (buf[pos] in {'\c', '\L', lexbase.EndOfFile}): inc(pos) - of '\c', '\L': + of '#', ';': + while not (c.buf[pos] in {'\c', '\L', lexbase.EndOfFile}): inc(pos) + of '\c', '\L': pos = handleCRLF(c, pos) - buf = c.buf - else: - break # EndOfFile also leaves the loop + else: + break # EndOfFile also leaves the loop c.bufpos = pos -proc rawGetTok(c: var CfgParser, tok: var Token) = +proc rawGetTok(c: var CfgParser, tok: var Token) = tok.kind = tkInvalid setLen(tok.literal, 0) skip(c) case c.buf[c.bufpos] - of '=': + of '=': tok.kind = tkEquals inc(c.bufpos) tok.literal = "=" - of '-': + of '-': inc(c.bufpos) - if c.buf[c.bufpos] == '-': inc(c.bufpos) - tok.kind = tkDashDash - tok.literal = "--" - of ':': + if c.buf[c.bufpos] == '-': + inc(c.bufpos) + tok.kind = tkDashDash + tok.literal = "--" + else: + dec(c.bufpos) + getSymbol(c, tok) + of ':': tok.kind = tkColon inc(c.bufpos) tok.literal = ":" - of 'r', 'R': - if c.buf[c.bufpos + 1] == '\"': + of 'r', 'R': + if c.buf[c.bufpos + 1] == '\"': inc(c.bufpos) getString(c, tok, true) - else: + else: getSymbol(c, tok) - of '[': + of '[': tok.kind = tkBracketLe inc(c.bufpos) - tok.literal = "]" - of ']': + tok.literal = "[" + of ']': tok.kind = tkBracketRi inc(c.bufpos) tok.literal = "]" - of '"': + of '"': getString(c, tok, false) - of lexbase.EndOfFile: + of lexbase.EndOfFile: tok.kind = tkEof tok.literal = "[EOF]" else: getSymbol(c, tok) - + proc errorStr*(c: CfgParser, msg: string): string {.rtl, extern: "npc$1".} = - ## returns a properly formated error message containing current line and + ## Returns a properly formatted error message containing current line and ## column information. - result = `%`("$1($2, $3) Error: $4", - [c.filename, $getLine(c), $getColumn(c), msg]) - + result = `%`("$1($2, $3) Error: $4", + [c.filename, $getLine(c), $getColumn(c), msg]) + proc warningStr*(c: CfgParser, msg: string): string {.rtl, extern: "npc$1".} = - ## returns a properly formated warning message containing current line and + ## Returns a properly formatted warning message containing current line and ## column information. - result = `%`("$1($2, $3) Warning: $4", - [c.filename, $getLine(c), $getColumn(c), msg]) + result = `%`("$1($2, $3) Warning: $4", + [c.filename, $getLine(c), $getColumn(c), msg]) proc ignoreMsg*(c: CfgParser, e: CfgEvent): string {.rtl, extern: "npc$1".} = - ## returns a properly formated warning message containing that + ## Returns a properly formatted warning message containing that ## an entry is ignored. - case e.kind + case e.kind of cfgSectionStart: result = c.warningStr("section ignored: " & e.section) of cfgKeyValuePair: result = c.warningStr("key ignored: " & e.key) - of cfgOption: + of cfgOption: result = c.warningStr("command ignored: " & e.key & ": " & e.value) of cfgError: result = e.msg of cfgEof: result = "" -proc getKeyValPair(c: var CfgParser, kind: CfgEventKind): CfgEvent = - if c.tok.kind == tkSymbol: - result.kind = kind - result.key = c.tok.literal - result.value = "" +proc getKeyValPair(c: var CfgParser, kind: CfgEventKind): CfgEvent = + if c.tok.kind == tkSymbol: + case kind + of cfgOption, cfgKeyValuePair: + result = CfgEvent(kind: kind, key: c.tok.literal.move, value: "") + else: discard rawGetTok(c, c.tok) - if c.tok.kind in {tkEquals, tkColon}: + if c.tok.kind in {tkEquals, tkColon}: rawGetTok(c, c.tok) - if c.tok.kind == tkSymbol: + if c.tok.kind == tkSymbol: result.value = c.tok.literal - else: - reset result - result.kind = cfgError - result.msg = errorStr(c, "symbol expected, but found: " & c.tok.literal) + else: + result = CfgEvent(kind: cfgError, + msg: errorStr(c, "symbol expected, but found: " & c.tok.literal)) rawGetTok(c, c.tok) - else: - result.kind = cfgError - result.msg = errorStr(c, "symbol expected, but found: " & c.tok.literal) + else: + result = CfgEvent(kind: cfgError, + msg: errorStr(c, "symbol expected, but found: " & c.tok.literal)) rawGetTok(c, c.tok) proc next*(c: var CfgParser): CfgEvent {.rtl, extern: "npc$1".} = - ## retrieves the first/next event. This controls the parser. - case c.tok.kind - of tkEof: - result.kind = cfgEof - of tkDashDash: + ## Retrieves the first/next event. This controls the parser. + case c.tok.kind + of tkEof: + result = CfgEvent(kind: cfgEof) + of tkDashDash: rawGetTok(c, c.tok) result = getKeyValPair(c, cfgOption) - of tkSymbol: + of tkSymbol: result = getKeyValPair(c, cfgKeyValuePair) - of tkBracketLe: + of tkBracketLe: rawGetTok(c, c.tok) - if c.tok.kind == tkSymbol: - result.kind = cfgSectionStart - result.section = c.tok.literal - else: - result.kind = cfgError - result.msg = errorStr(c, "symbol expected, but found: " & c.tok.literal) + if c.tok.kind == tkSymbol: + result = CfgEvent(kind: cfgSectionStart, section: c.tok.literal.move) + else: + result = CfgEvent(kind: cfgError, + msg: errorStr(c, "symbol expected, but found: " & c.tok.literal)) rawGetTok(c, c.tok) - if c.tok.kind == tkBracketRi: + if c.tok.kind == tkBracketRi: rawGetTok(c, c.tok) else: - reset(result) - result.kind = cfgError - result.msg = errorStr(c, "']' expected, but found: " & c.tok.literal) - of tkInvalid, tkEquals, tkColon, tkBracketRi: - result.kind = cfgError - result.msg = errorStr(c, "invalid token: " & c.tok.literal) + result = CfgEvent(kind: cfgError, + msg: errorStr(c, "']' expected, but found: " & c.tok.literal)) + of tkInvalid, tkEquals, tkColon, tkBracketRi: + result = CfgEvent(kind: cfgError, + msg: errorStr(c, "invalid token: " & c.tok.literal)) rawGetTok(c, c.tok) + +# ---------------- Configuration file related operations ---------------- +type + Config* = OrderedTableRef[string, OrderedTableRef[string, string]] + +proc newConfig*(): Config = + ## Creates a new configuration table. + ## Useful when wanting to create a configuration file. + result = newOrderedTable[string, OrderedTableRef[string, string]]() + +proc loadConfig*(stream: Stream, filename: string = "[stream]"): Config = + ## Loads the specified configuration from stream into a new Config instance. + ## `filename` parameter is only used for nicer error messages. + var dict = newOrderedTable[string, OrderedTableRef[string, string]]() + var curSection = "" ## Current section, + ## the default value of the current section is "", + ## which means that the current section is a common + var p: CfgParser + open(p, stream, filename) + while true: + var e = next(p) + case e.kind + of cfgEof: + break + of cfgSectionStart: # Only look for the first time the Section + curSection = e.section + of cfgKeyValuePair: + var t = newOrderedTable[string, string]() + if dict.hasKey(curSection): + t = dict[curSection] + t[e.key] = e.value + dict[curSection] = t + of cfgOption: + var c = newOrderedTable[string, string]() + if dict.hasKey(curSection): + c = dict[curSection] + c["--" & e.key] = e.value + dict[curSection] = c + of cfgError: + break + close(p) + result = dict + +proc loadConfig*(filename: string): Config = + ## Loads the specified configuration file into a new Config instance. + let file = open(filename, fmRead) + let fileStream = newFileStream(file) + defer: fileStream.close() + result = fileStream.loadConfig(filename) + +proc replace(s: string): string = + var d = "" + var i = 0 + while i < s.len(): + if s[i] == '\\': + d.add(r"\\") + elif s[i] == '\c' and s[i+1] == '\l': + d.add(r"\c\l") + inc(i) + elif s[i] == '\c': + d.add(r"\n") + elif s[i] == '\l': + d.add(r"\n") + else: + d.add(s[i]) + inc(i) + result = d + +proc writeConfig*(dict: Config, stream: Stream) = + ## Writes the contents of the table to the specified stream. + ## + ## .. note:: Comment statement will be ignored. + for section, sectionData in dict.pairs(): + if section != "": ## Not general section + if not allCharsInSet(section, SymChars): ## Non system character + stream.writeLine("[\"" & section & "\"]") + else: + stream.writeLine("[" & section & "]") + for key, value in sectionData.pairs(): + var kv, segmentChar: string + if key.len > 1 and key[0] == '-' and key[1] == '-': ## If it is a command key + segmentChar = ":" + if not allCharsInSet(key[2..key.len()-1], SymChars): + kv.add("--\"") + kv.add(key[2..key.len()-1]) + kv.add("\"") + else: + kv = key + else: + segmentChar = "=" + kv = key + if value != "": ## If the key is not empty + if not allCharsInSet(value, SymChars): + if find(value, '"') == -1: + kv.add(segmentChar) + kv.add("\"") + kv.add(replace(value)) + kv.add("\"") + else: + kv.add(segmentChar) + kv.add("\"\"\"") + kv.add(replace(value)) + kv.add("\"\"\"") + else: + kv.add(segmentChar) + kv.add(value) + stream.writeLine(kv) + +proc `$`*(dict: Config): string = + ## Writes the contents of the table to string. + ## + ## .. note:: Comment statement will be ignored. + let stream = newStringStream() + defer: stream.close() + dict.writeConfig(stream) + result = stream.data + +proc writeConfig*(dict: Config, filename: string) = + ## Writes the contents of the table to the specified configuration file. + ## + ## .. note:: Comment statement will be ignored. + let file = open(filename, fmWrite) + defer: file.close() + let fileStream = newFileStream(file) + dict.writeConfig(fileStream) + +proc getSectionValue*(dict: Config, section, key: string, defaultVal = ""): string = + ## Gets the key value of the specified Section. + ## Returns the specified default value if the specified key does not exist. + if dict.hasKey(section): + if dict[section].hasKey(key): + result = dict[section][key] + else: + result = defaultVal + else: + result = defaultVal + +proc setSectionKey*(dict: var Config, section, key, value: string) = + ## Sets the Key value of the specified Section. + var t = newOrderedTable[string, string]() + if dict.hasKey(section): + t = dict[section] + t[key] = value + dict[section] = t + +proc delSection*(dict: var Config, section: string) = + ## Deletes the specified section and all of its sub keys. + dict.del(section) + +proc delSectionKey*(dict: var Config, section, key: string) = + ## Deletes the key of the specified section. + if dict.hasKey(section): + if dict[section].hasKey(key): + if dict[section].len == 1: + dict.del(section) + else: + dict[section].del(key) + +iterator sections*(dict: Config): lent string {.since: (1, 5).} = + ## Iterates through the sections in the `dict`. + for section in dict.keys: + yield section diff --git a/lib/pure/parsecsv.nim b/lib/pure/parsecsv.nim index f4943ed89..c7bf0c9c1 100644 --- a/lib/pure/parsecsv.nim +++ b/lib/pure/parsecsv.nim @@ -8,15 +8,20 @@ # ## This module implements a simple high performance `CSV`:idx: -## (`comma separated value`:idx:) parser. +## (`comma separated value`:idx:) parser. ## -## Example: How to use the parser -## ============================== +## Basic usage +## =========== +## +## ```nim +## import std/parsecsv +## from std/os import paramStr +## from std/streams import newFileStream ## -## .. code-block:: nim -## import os, parsecsv, streams ## var s = newFileStream(paramStr(1), fmRead) -## if s == nil: quit("cannot open the file" & paramStr(1)) +## if s == nil: +## quit("cannot open the file" & paramStr(1)) +## ## var x: CsvParser ## open(x, s, paramStr(1)) ## while readRow(x): @@ -24,150 +29,327 @@ ## for val in items(x.row): ## echo "##", val, "##" ## close(x) +## ``` +## +## For CSV files with a header row, the header can be read and then used as a +## reference for item access with `rowEntry <#rowEntry,CsvParser,string>`_: +## +## ```nim +## import std/parsecsv +## +## # Prepare a file +## let content = """One,Two,Three,Four +## 1,2,3,4 +## 10,20,30,40 +## 100,200,300,400 +## """ +## writeFile("temp.csv", content) ## +## var p: CsvParser +## p.open("temp.csv") +## p.readHeaderRow() +## while p.readRow(): +## echo "new row: " +## for col in items(p.headers): +## echo "##", col, ":", p.rowEntry(col), "##" +## p.close() +## ``` +## +## See also +## ======== +## +## * `streams module <streams.html>`_ for using +## `open proc <#open,CsvParser,Stream,string,char,char,char>`_ +## and other stream processing (like `close proc <streams.html#close,Stream>`_) +## * `parseopt module <parseopt.html>`_ for a command line parser +## * `parsecfg module <parsecfg.html>`_ for a configuration file parser +## * `parsexml module <parsexml.html>`_ for a XML / HTML parser +## * `parsesql module <parsesql.html>`_ for a SQL parser +## * `other parsers <lib.html#pure-libraries-parsers>`_ for other parsers + +import std/[lexbase, streams] -import - lexbase, streams +when defined(nimPreviewSlimSystem): + import std/syncio type - CsvRow* = seq[string] ## a row in a CSV file - CsvParser* = object of BaseLexer ## the parser object. - row*: CsvRow ## the current row + CsvRow* = seq[string] ## A row in a CSV file. + CsvParser* = object of BaseLexer ## The parser object. + ## + ## It consists of two public fields: + ## * `row` is the current row + ## * `headers` are the columns that are defined in the csv file + ## (read using `readHeaderRow <#readHeaderRow,CsvParser>`_). + ## Used with `rowEntry <#rowEntry,CsvParser,string>`_). + row*: CsvRow filename: string sep, quote, esc: char skipWhite: bool currRow: int + headers*: seq[string] - CsvError* = object of IOError ## exception that is raised if - ## a parsing error occurs + CsvError* = object of IOError ## An exception that is raised if + ## a parsing error occurs. -{.deprecated: [TCsvRow: CsvRow, TCsvParser: CsvParser, EInvalidCsv: CsvError].} - -proc raiseEInvalidCsv(filename: string, line, col: int, +proc raiseEInvalidCsv(filename: string, line, col: int, msg: string) {.noreturn.} = var e: ref CsvError new(e) - e.msg = filename & "(" & $line & ", " & $col & ") Error: " & msg + if filename.len == 0: + e.msg = "Error: " & msg + else: + e.msg = filename & "(" & $line & ", " & $col & ") Error: " & msg raise e -proc error(my: CsvParser, pos: int, msg: string) = - raiseEInvalidCsv(my.filename, my.lineNumber, getColNumber(my, pos), msg) +proc error(self: CsvParser, pos: int, msg: string) = + raiseEInvalidCsv(self.filename, self.lineNumber, getColNumber(self, pos), msg) -proc open*(my: var CsvParser, input: Stream, filename: string, +proc open*(self: var CsvParser, input: Stream, filename: string, separator = ',', quote = '"', escape = '\0', skipInitialSpace = false) = - ## initializes the parser with an input stream. `Filename` is only used + ## Initializes the parser with an input stream. `Filename` is only used ## for nice error messages. The parser's behaviour can be controlled by ## the diverse optional parameters: ## - `separator`: character used to separate fields - ## - `quote`: Used to quote fields containing special characters like - ## `separator`, `quote` or new-line characters. '\0' disables the parsing + ## - `quote`: Used to quote fields containing special characters like + ## `separator`, `quote` or new-line characters. '\\0' disables the parsing ## of quotes. - ## - `escape`: removes any special meaning from the following character; - ## '\0' disables escaping; if escaping is disabled and `quote` is not '\0', + ## - `escape`: removes any special meaning from the following character; + ## '\\0' disables escaping; if escaping is disabled and `quote` is not '\\0', ## two `quote` characters are parsed one literal `quote` character. - ## - `skipInitialSpace`: If true, whitespace immediately following the + ## - `skipInitialSpace`: If true, whitespace immediately following the ## `separator` is ignored. - lexbase.open(my, input) - my.filename = filename - my.sep = separator - my.quote = quote - my.esc = escape - my.skipWhite = skipInitialSpace - my.row = @[] - my.currRow = 0 - -proc parseField(my: var CsvParser, a: var string) = - var pos = my.bufpos - var buf = my.buf - if my.skipWhite: - while buf[pos] in {' ', '\t'}: inc(pos) + ## + ## See also: + ## * `open proc <#open,CsvParser,string,char,char,char>`_ which creates the + ## file stream for you + runnableExamples: + import std/streams + var strm = newStringStream("One,Two,Three\n1,2,3\n10,20,30") + var parser: CsvParser + parser.open(strm, "tmp.csv") + parser.close() + strm.close() + + lexbase.open(self, input) + self.filename = filename + self.sep = separator + self.quote = quote + self.esc = escape + self.skipWhite = skipInitialSpace + +proc open*(self: var CsvParser, filename: string, + separator = ',', quote = '"', escape = '\0', + skipInitialSpace = false) = + ## Similar to the `other open proc<#open,CsvParser,Stream,string,char,char,char>`_, + ## but creates the file stream for you. + runnableExamples: + from std/os import removeFile + writeFile("tmp.csv", "One,Two,Three\n1,2,3\n10,20,300") + var parser: CsvParser + parser.open("tmp.csv") + parser.close() + removeFile("tmp.csv") + + var s = newFileStream(filename, fmRead) + if s == nil: self.error(0, "cannot open: " & filename) + open(self, s, filename, separator, + quote, escape, skipInitialSpace) + +proc parseField(self: var CsvParser, a: var string) = + var pos = self.bufpos + if self.skipWhite: + while self.buf[pos] in {' ', '\t'}: inc(pos) setLen(a, 0) # reuse memory - if buf[pos] == my.quote and my.quote != '\0': + if self.buf[pos] == self.quote and self.quote != '\0': inc(pos) while true: - var c = buf[pos] + let c = self.buf[pos] if c == '\0': - my.bufpos = pos # can continue after exception? - error(my, pos, my.quote & " expected") + self.bufpos = pos # can continue after exception? + error(self, pos, self.quote & " expected") break - elif c == my.quote: - if my.esc == '\0' and buf[pos+1] == my.quote: - add(a, my.quote) + elif c == self.quote: + if self.esc == '\0' and self.buf[pos + 1] == self.quote: + add(a, self.quote) inc(pos, 2) else: inc(pos) break - elif c == my.esc: - add(a, buf[pos+1]) + elif c == self.esc: + add(a, self.buf[pos + 1]) inc(pos, 2) else: case c - of '\c': - pos = handleCR(my, pos) - buf = my.buf + of '\c': + pos = handleCR(self, pos) add(a, "\n") - of '\l': - pos = handleLF(my, pos) - buf = my.buf + of '\l': + pos = handleLF(self, pos) add(a, "\n") else: add(a, c) inc(pos) else: while true: - var c = buf[pos] - if c == my.sep: break + let c = self.buf[pos] + if c == self.sep: break if c in {'\c', '\l', '\0'}: break add(a, c) inc(pos) - my.bufpos = pos + self.bufpos = pos + +proc processedRows*(self: var CsvParser): int {.inline.} = + ## Returns number of the processed rows. + ## + ## But even if `readRow <#readRow,CsvParser,int>`_ arrived at EOF then + ## processed rows counter is incremented. + runnableExamples: + import std/streams -proc processedRows*(my: var CsvParser): int = - ## returns number of the processed rows - return my.currRow + var strm = newStringStream("One,Two,Three\n1,2,3") + var parser: CsvParser + parser.open(strm, "tmp.csv") + doAssert parser.readRow() + doAssert parser.processedRows() == 1 + doAssert parser.readRow() + doAssert parser.processedRows() == 2 + ## Even if `readRow` arrived at EOF then `processedRows` is incremented. + doAssert parser.readRow() == false + doAssert parser.processedRows() == 3 + doAssert parser.readRow() == false + doAssert parser.processedRows() == 4 + parser.close() + strm.close() -proc readRow*(my: var CsvParser, columns = 0): bool = - ## reads the next row; if `columns` > 0, it expects the row to have + self.currRow + +proc readRow*(self: var CsvParser, columns = 0): bool = + ## Reads the next row; if `columns` > 0, it expects the row to have ## exactly this many columns. Returns false if the end of the file ## has been encountered else true. + ## + ## Blank lines are skipped. + runnableExamples: + import std/streams + var strm = newStringStream("One,Two,Three\n1,2,3\n\n10,20,30") + var parser: CsvParser + parser.open(strm, "tmp.csv") + doAssert parser.readRow() + doAssert parser.row == @["One", "Two", "Three"] + doAssert parser.readRow() + doAssert parser.row == @["1", "2", "3"] + ## Blank lines are skipped. + doAssert parser.readRow() + doAssert parser.row == @["10", "20", "30"] + + var emptySeq: seq[string] + doAssert parser.readRow() == false + doAssert parser.row == emptySeq + doAssert parser.readRow() == false + doAssert parser.row == emptySeq + + parser.close() + strm.close() + var col = 0 # current column - var oldpos = my.bufpos - while my.buf[my.bufpos] != '\0': - var oldlen = my.row.len - if oldlen < col+1: - setLen(my.row, col+1) - my.row[col] = "" - parseField(my, my.row[col]) + let oldpos = self.bufpos + # skip initial empty lines #8365 + while true: + case self.buf[self.bufpos] + of '\c': self.bufpos = handleCR(self, self.bufpos) + of '\l': self.bufpos = handleLF(self, self.bufpos) + else: break + while self.buf[self.bufpos] != '\0': + let oldlen = self.row.len + if oldlen < col + 1: + setLen(self.row, col + 1) + self.row[col] = "" + parseField(self, self.row[col]) inc(col) - if my.buf[my.bufpos] == my.sep: - inc(my.bufpos) + if self.buf[self.bufpos] == self.sep: + inc(self.bufpos) else: - case my.buf[my.bufpos] - of '\c', '\l': + case self.buf[self.bufpos] + of '\c', '\l': # skip empty lines: - while true: - case my.buf[my.bufpos] - of '\c': my.bufpos = handleCR(my, my.bufpos) - of '\l': my.bufpos = handleLF(my, my.bufpos) + while true: + case self.buf[self.bufpos] + of '\c': self.bufpos = handleCR(self, self.bufpos) + of '\l': self.bufpos = handleLF(self, self.bufpos) else: break of '\0': discard - else: error(my, my.bufpos, my.sep & " expected") + else: error(self, self.bufpos, self.sep & " expected") break - - setLen(my.row, col) + + setLen(self.row, col) result = col > 0 - if result and col != columns and columns > 0: - error(my, oldpos+1, $columns & " columns expected, but found " & + if result and col != columns and columns > 0: + error(self, oldpos + 1, $columns & " columns expected, but found " & $col & " columns") - inc(my.currRow) - -proc close*(my: var CsvParser) {.inline.} = - ## closes the parser `my` and its associated input stream. - lexbase.close(my) - -when isMainModule: - import os + inc(self.currRow) + +proc close*(self: var CsvParser) {.inline.} = + ## Closes the parser `self` and its associated input stream. + lexbase.close(self) + +proc readHeaderRow*(self: var CsvParser) = + ## Reads the first row and creates a look-up table for column numbers + ## See also: + ## * `rowEntry proc <#rowEntry,CsvParser,string>`_ + runnableExamples: + import std/streams + + var strm = newStringStream("One,Two,Three\n1,2,3") + var parser: CsvParser + parser.open(strm, "tmp.csv") + + parser.readHeaderRow() + doAssert parser.headers == @["One", "Two", "Three"] + doAssert parser.row == @["One", "Two", "Three"] + + doAssert parser.readRow() + doAssert parser.headers == @["One", "Two", "Three"] + doAssert parser.row == @["1", "2", "3"] + + parser.close() + strm.close() + + let present = self.readRow() + if present: + self.headers = self.row + +proc rowEntry*(self: var CsvParser, entry: string): var string = + ## Accesses a specified `entry` from the current row. + ## + ## Assumes that `readHeaderRow <#readHeaderRow,CsvParser>`_ has already been + ## called. + ## + ## If specified `entry` does not exist, raises KeyError. + runnableExamples: + import std/streams + var strm = newStringStream("One,Two,Three\n1,2,3\n\n10,20,30") + var parser: CsvParser + parser.open(strm, "tmp.csv") + ## Requires calling `readHeaderRow`. + parser.readHeaderRow() + doAssert parser.readRow() + doAssert parser.rowEntry("One") == "1" + doAssert parser.rowEntry("Two") == "2" + doAssert parser.rowEntry("Three") == "3" + doAssertRaises(KeyError): + discard parser.rowEntry("NonexistentEntry") + parser.close() + strm.close() + + let index = self.headers.find(entry) + if index >= 0: + result = self.row[index] + else: + raise newException(KeyError, "Entry `" & entry & "` doesn't exist") + +when not defined(testing) and isMainModule: + import std/os var s = newFileStream(paramStr(1), fmRead) if s == nil: quit("cannot open the file" & paramStr(1)) var x: CsvParser @@ -177,4 +359,3 @@ when isMainModule: for val in items(x.row): echo "##", val, "##" close(x) - diff --git a/lib/pure/parsejson.nim b/lib/pure/parsejson.nim new file mode 100644 index 000000000..9292a8596 --- /dev/null +++ b/lib/pure/parsejson.nim @@ -0,0 +1,522 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2018 Nim contributors +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a json parser. It is used +## and exported by the `json` standard library +## module, but can also be used in its own right. + +import std/[strutils, lexbase, streams, unicode] +import std/private/decode_helpers + +when defined(nimPreviewSlimSystem): + import std/assertions + +type + JsonEventKind* = enum ## enumeration of all events that may occur when parsing + jsonError, ## an error occurred during parsing + jsonEof, ## end of file reached + jsonString, ## a string literal + jsonInt, ## an integer literal + jsonFloat, ## a float literal + jsonTrue, ## the value `true` + jsonFalse, ## the value `false` + jsonNull, ## the value `null` + jsonObjectStart, ## start of an object: the `{` token + jsonObjectEnd, ## end of an object: the `}` token + jsonArrayStart, ## start of an array: the `[` token + jsonArrayEnd ## end of an array: the `]` token + + TokKind* = enum # must be synchronized with TJsonEventKind! + tkError, + tkEof, + tkString, + tkInt, + tkFloat, + tkTrue, + tkFalse, + tkNull, + tkCurlyLe, + tkCurlyRi, + tkBracketLe, + tkBracketRi, + tkColon, + tkComma + + JsonError* = enum ## enumeration that lists all errors that can occur + errNone, ## no error + errInvalidToken, ## invalid token + errStringExpected, ## string expected + errColonExpected, ## `:` expected + errCommaExpected, ## `,` expected + errBracketRiExpected, ## `]` expected + errCurlyRiExpected, ## `}` expected + errQuoteExpected, ## `"` or `'` expected + errEOC_Expected, ## `*/` expected + errEofExpected, ## EOF expected + errExprExpected ## expr expected + + ParserState = enum + stateEof, stateStart, stateObject, stateArray, stateExpectArrayComma, + stateExpectObjectComma, stateExpectColon, stateExpectValue + + JsonParser* = object of BaseLexer ## the parser object. + a*: string + tok*: TokKind + kind: JsonEventKind + err: JsonError + state: seq[ParserState] + filename: string + rawStringLiterals: bool + + JsonKindError* = object of ValueError ## raised by the `to` macro if the + ## JSON kind is incorrect. + JsonParsingError* = object of ValueError ## is raised for a JSON error + +const + errorMessages*: array[JsonError, string] = [ + "no error", + "invalid token", + "string expected", + "':' expected", + "',' expected", + "']' expected", + "'}' expected", + "'\"' or \"'\" expected", + "'*/' expected", + "EOF expected", + "expression expected" + ] + tokToStr: array[TokKind, string] = [ + "invalid token", + "EOF", + "string literal", + "int literal", + "float literal", + "true", + "false", + "null", + "{", "}", "[", "]", ":", "," + ] + +proc open*(my: var JsonParser, input: Stream, filename: string; + rawStringLiterals = false) = + ## initializes the parser with an input stream. `Filename` is only used + ## for nice error messages. If `rawStringLiterals` is true, string literals + ## are kept with their surrounding quotes and escape sequences in them are + ## left untouched too. + lexbase.open(my, input) + my.filename = filename + my.state = @[stateStart] + my.kind = jsonError + my.a = "" + my.rawStringLiterals = rawStringLiterals + +proc close*(my: var JsonParser) {.inline.} = + ## closes the parser `my` and its associated input stream. + lexbase.close(my) + +proc str*(my: JsonParser): string {.inline.} = + ## returns the character data for the events: `jsonInt`, `jsonFloat`, + ## `jsonString` + assert(my.kind in {jsonInt, jsonFloat, jsonString}) + return my.a + +proc getInt*(my: JsonParser): BiggestInt {.inline.} = + ## returns the number for the event: `jsonInt` + assert(my.kind == jsonInt) + return parseBiggestInt(my.a) + +proc getFloat*(my: JsonParser): float {.inline.} = + ## returns the number for the event: `jsonFloat` + assert(my.kind == jsonFloat) + return parseFloat(my.a) + +proc kind*(my: JsonParser): JsonEventKind {.inline.} = + ## returns the current event type for the JSON parser + return my.kind + +proc getColumn*(my: JsonParser): int {.inline.} = + ## get the current column the parser has arrived at. + result = getColNumber(my, my.bufpos) + +proc getLine*(my: JsonParser): int {.inline.} = + ## get the current line the parser has arrived at. + result = my.lineNumber + +proc getFilename*(my: JsonParser): string {.inline.} = + ## get the filename of the file that the parser processes. + result = my.filename + +proc errorMsg*(my: JsonParser): string = + ## returns a helpful error message for the event `jsonError` + assert(my.kind == jsonError) + result = "$1($2, $3) Error: $4" % [ + my.filename, $getLine(my), $getColumn(my), errorMessages[my.err]] + +proc errorMsgExpected*(my: JsonParser, e: string): string = + ## returns an error message "`e` expected" in the same format as the + ## other error messages + result = "$1($2, $3) Error: $4" % [ + my.filename, $getLine(my), $getColumn(my), e & " expected"] + +proc parseEscapedUTF16*(buf: cstring, pos: var int): int = + result = 0 + #UTF-16 escape is always 4 bytes. + for _ in 0..3: + # if char in '0' .. '9', 'a' .. 'f', 'A' .. 'F' + if handleHexChar(buf[pos], result): + inc(pos) + else: + return -1 + +proc parseString(my: var JsonParser): TokKind = + result = tkString + var pos = my.bufpos + 1 + if my.rawStringLiterals: + add(my.a, '"') + while true: + case my.buf[pos] + of '\0': + my.err = errQuoteExpected + result = tkError + break + of '"': + if my.rawStringLiterals: + add(my.a, '"') + inc(pos) + break + of '\\': + if my.rawStringLiterals: + add(my.a, '\\') + case my.buf[pos+1] + of '\\', '"', '\'', '/': + add(my.a, my.buf[pos+1]) + inc(pos, 2) + of 'b': + add(my.a, '\b') + inc(pos, 2) + of 'f': + add(my.a, '\f') + inc(pos, 2) + of 'n': + add(my.a, '\L') + inc(pos, 2) + of 'r': + add(my.a, '\C') + inc(pos, 2) + of 't': + add(my.a, '\t') + inc(pos, 2) + of 'v': + add(my.a, '\v') + inc(pos, 2) + of 'u': + if my.rawStringLiterals: + add(my.a, 'u') + inc(pos, 2) + var pos2 = pos + var r = parseEscapedUTF16(cstring(my.buf), pos) + if r < 0: + my.err = errInvalidToken + break + # Deal with surrogates + if (r and 0xfc00) == 0xd800: + if my.buf[pos] != '\\' or my.buf[pos+1] != 'u': + my.err = errInvalidToken + break + inc(pos, 2) + var s = parseEscapedUTF16(cstring(my.buf), pos) + if (s and 0xfc00) == 0xdc00 and s > 0: + r = 0x10000 + (((r - 0xd800) shl 10) or (s - 0xdc00)) + else: + my.err = errInvalidToken + break + if my.rawStringLiterals: + let length = pos - pos2 + for i in 1 .. length: + if my.buf[pos2] in {'0'..'9', 'A'..'F', 'a'..'f'}: + add(my.a, my.buf[pos2]) + inc pos2 + else: + break + else: + add(my.a, toUTF8(Rune(r))) + else: + # don't bother with the error + add(my.a, my.buf[pos]) + inc(pos) + of '\c': + pos = lexbase.handleCR(my, pos) + add(my.a, '\c') + of '\L': + pos = lexbase.handleLF(my, pos) + add(my.a, '\L') + else: + add(my.a, my.buf[pos]) + inc(pos) + my.bufpos = pos # store back + +proc skip(my: var JsonParser) = + var pos = my.bufpos + while true: + case my.buf[pos] + of '/': + if my.buf[pos+1] == '/': + # skip line comment: + inc(pos, 2) + while true: + case my.buf[pos] + of '\0': + break + of '\c': + pos = lexbase.handleCR(my, pos) + break + of '\L': + pos = lexbase.handleLF(my, pos) + break + else: + inc(pos) + elif my.buf[pos+1] == '*': + # skip long comment: + inc(pos, 2) + while true: + case my.buf[pos] + of '\0': + my.err = errEOC_Expected + break + of '\c': + pos = lexbase.handleCR(my, pos) + of '\L': + pos = lexbase.handleLF(my, pos) + of '*': + inc(pos) + if my.buf[pos] == '/': + inc(pos) + break + else: + inc(pos) + else: + break + of ' ', '\t': + inc(pos) + of '\c': + pos = lexbase.handleCR(my, pos) + of '\L': + pos = lexbase.handleLF(my, pos) + else: + break + my.bufpos = pos + +proc parseNumber(my: var JsonParser) = + var pos = my.bufpos + if my.buf[pos] == '-': + add(my.a, '-') + inc(pos) + if my.buf[pos] == '.': + add(my.a, "0.") + inc(pos) + else: + while my.buf[pos] in Digits: + add(my.a, my.buf[pos]) + inc(pos) + if my.buf[pos] == '.': + add(my.a, '.') + inc(pos) + # digits after the dot: + while my.buf[pos] in Digits: + add(my.a, my.buf[pos]) + inc(pos) + if my.buf[pos] in {'E', 'e'}: + add(my.a, my.buf[pos]) + inc(pos) + if my.buf[pos] in {'+', '-'}: + add(my.a, my.buf[pos]) + inc(pos) + while my.buf[pos] in Digits: + add(my.a, my.buf[pos]) + inc(pos) + my.bufpos = pos + +proc parseName(my: var JsonParser) = + var pos = my.bufpos + if my.buf[pos] in IdentStartChars: + while my.buf[pos] in IdentChars: + add(my.a, my.buf[pos]) + inc(pos) + my.bufpos = pos + +proc getTok*(my: var JsonParser): TokKind = + setLen(my.a, 0) + skip(my) # skip whitespace, comments + case my.buf[my.bufpos] + of '-', '.', '0'..'9': + parseNumber(my) + if {'.', 'e', 'E'} in my.a: + result = tkFloat + else: + result = tkInt + of '"': + result = parseString(my) + of '[': + inc(my.bufpos) + result = tkBracketLe + of '{': + inc(my.bufpos) + result = tkCurlyLe + of ']': + inc(my.bufpos) + result = tkBracketRi + of '}': + inc(my.bufpos) + result = tkCurlyRi + of ',': + inc(my.bufpos) + result = tkComma + of ':': + inc(my.bufpos) + result = tkColon + of '\0': + result = tkEof + of 'a'..'z', 'A'..'Z', '_': + parseName(my) + case my.a + of "null": result = tkNull + of "true": result = tkTrue + of "false": result = tkFalse + else: result = tkError + else: + inc(my.bufpos) + result = tkError + my.tok = result + + +proc next*(my: var JsonParser) = + ## retrieves the first/next event. This controls the parser. + var tk = getTok(my) + var i = my.state.len-1 + # the following code is a state machine. If we had proper coroutines, + # the code could be much simpler. + case my.state[i] + of stateEof: + if tk == tkEof: + my.kind = jsonEof + else: + my.kind = jsonError + my.err = errEofExpected + of stateStart: + # tokens allowed? + case tk + of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull: + my.state[i] = stateEof # expect EOF next! + my.kind = JsonEventKind(ord(tk)) + of tkBracketLe: + my.state.add(stateArray) # we expect any + my.kind = jsonArrayStart + of tkCurlyLe: + my.state.add(stateObject) + my.kind = jsonObjectStart + of tkEof: + my.kind = jsonEof + else: + my.kind = jsonError + my.err = errEofExpected + of stateObject: + case tk + of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull: + my.state.add(stateExpectColon) + my.kind = JsonEventKind(ord(tk)) + of tkBracketLe: + my.state.add(stateExpectColon) + my.state.add(stateArray) + my.kind = jsonArrayStart + of tkCurlyLe: + my.state.add(stateExpectColon) + my.state.add(stateObject) + my.kind = jsonObjectStart + of tkCurlyRi: + my.kind = jsonObjectEnd + discard my.state.pop() + else: + my.kind = jsonError + my.err = errCurlyRiExpected + of stateArray: + case tk + of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull: + my.state.add(stateExpectArrayComma) # expect value next! + my.kind = JsonEventKind(ord(tk)) + of tkBracketLe: + my.state.add(stateExpectArrayComma) + my.state.add(stateArray) + my.kind = jsonArrayStart + of tkCurlyLe: + my.state.add(stateExpectArrayComma) + my.state.add(stateObject) + my.kind = jsonObjectStart + of tkBracketRi: + my.kind = jsonArrayEnd + discard my.state.pop() + else: + my.kind = jsonError + my.err = errBracketRiExpected + of stateExpectArrayComma: + case tk + of tkComma: + discard my.state.pop() + next(my) + of tkBracketRi: + my.kind = jsonArrayEnd + discard my.state.pop() # pop stateExpectArrayComma + discard my.state.pop() # pop stateArray + else: + my.kind = jsonError + my.err = errBracketRiExpected + of stateExpectObjectComma: + case tk + of tkComma: + discard my.state.pop() + next(my) + of tkCurlyRi: + my.kind = jsonObjectEnd + discard my.state.pop() # pop stateExpectObjectComma + discard my.state.pop() # pop stateObject + else: + my.kind = jsonError + my.err = errCurlyRiExpected + of stateExpectColon: + case tk + of tkColon: + my.state[i] = stateExpectValue + next(my) + else: + my.kind = jsonError + my.err = errColonExpected + of stateExpectValue: + case tk + of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull: + my.state[i] = stateExpectObjectComma + my.kind = JsonEventKind(ord(tk)) + of tkBracketLe: + my.state[i] = stateExpectObjectComma + my.state.add(stateArray) + my.kind = jsonArrayStart + of tkCurlyLe: + my.state[i] = stateExpectObjectComma + my.state.add(stateObject) + my.kind = jsonObjectStart + else: + my.kind = jsonError + my.err = errExprExpected + +proc raiseParseErr*(p: JsonParser, msg: string) {.noinline, noreturn.} = + ## raises an `EJsonParsingError` exception. + raise newException(JsonParsingError, errorMsgExpected(p, msg)) + +proc eat*(p: var JsonParser, tok: TokKind) = + if p.tok == tok: discard getTok(p) + else: raiseParseErr(p, tokToStr[tok]) diff --git a/lib/pure/parseopt.nim b/lib/pure/parseopt.nim index 4c92a7cdf..03f151b66 100644 --- a/lib/pure/parseopt.nim +++ b/lib/pure/parseopt.nim @@ -11,149 +11,515 @@ ## It supports one convenience iterator over all command line options and some ## lower-level features. ## -## Supported syntax: +## Supported Syntax +## ================ ## -## 1. short options - ``-abcd``, where a, b, c, d are names -## 2. long option - ``--foo:bar``, ``--foo=bar`` or ``--foo`` -## 3. argument - everything else +## The following syntax is supported when arguments for the `shortNoVal` and +## `longNoVal` parameters, which are +## `described later<#nimshortnoval-and-nimlongnoval>`_, are not provided: +## +## 1. Short options: `-abcd`, `-e:5`, `-e=5` +## 2. Long options: `--foo:bar`, `--foo=bar`, `--foo` +## 3. Arguments: everything that does not start with a `-` +## +## These three kinds of tokens are enumerated in the +## `CmdLineKind enum<#CmdLineKind>`_. +## +## When option values begin with ':' or '=', they need to be doubled up (as in +## `--delim::`) or alternated (as in `--delim=:`). +## +## The `--` option, commonly used to denote that every token that follows is +## an argument, is interpreted as a long option, and its name is the empty +## string. +## +## Parsing +## ======= +## +## Use an `OptParser<#OptParser>`_ to parse command line options. It can be +## created with `initOptParser<#initOptParser,string,set[char],seq[string]>`_, +## and `next<#next,OptParser>`_ advances the parser by one token. +## +## For each token, the parser's `kind`, `key`, and `val` fields give +## information about that token. If the token is a long or short option, `key` +## is the option's name, and `val` is either the option's value, if provided, +## or the empty string. For arguments, the `key` field contains the argument +## itself, and `val` is unused. To check if the end of the command line has +## been reached, check if `kind` is equal to `cmdEnd`. +## +## Here is an example: +## +## ```Nim +## import std/parseopt +## +## var p = initOptParser("-ab -e:5 --foo --bar=20 file.txt") +## while true: +## p.next() +## case p.kind +## of cmdEnd: break +## of cmdShortOption, cmdLongOption: +## if p.val == "": +## echo "Option: ", p.key +## else: +## echo "Option and value: ", p.key, ", ", p.val +## of cmdArgument: +## echo "Argument: ", p.key +## +## # Output: +## # Option: a +## # Option: b +## # Option and value: e, 5 +## # Option: foo +## # Option and value: bar, 20 +## # Argument: file.txt +## ``` +## +## The `getopt iterator<#getopt.i,OptParser>`_, which is provided for +## convenience, can be used to iterate through all command line options as well. +## +## To set a default value for a variable assigned through `getopt` and accept arguments from the cmd line. +## Assign the default value to a variable before parsing. +## Then set the variable to the new value while parsing. +## +## Here is an example: +## +## ```Nim +## import std/parseopt +## +## var varName: string = "defaultValue" +## +## for kind, key, val in getopt(): +## case kind +## of cmdArgument: +## discard +## of cmdLongOption, cmdShortOption: +## case key: +## of "varName": # --varName:<value> in the console when executing +## varName = val # do input sanitization in production systems +## of cmdEnd: +## discard +## ``` +## +## `shortNoVal` and `longNoVal` +## ============================ +## +## The optional `shortNoVal` and `longNoVal` parameters present in +## `initOptParser<#initOptParser,string,set[char],seq[string]>`_ are for +## specifying which short and long options do not accept values. +## +## When `shortNoVal` is non-empty, users are not required to separate short +## options and their values with a ':' or '=' since the parser knows which +## options accept values and which ones do not. This behavior also applies for +## long options if `longNoVal` is non-empty. For short options, `-j4` +## becomes supported syntax, and for long options, `--foo bar` becomes +## supported. This is in addition to the `previously mentioned +## syntax<#supported-syntax>`_. Users can still separate options and their +## values with ':' or '=', but that becomes optional. +## +## As more options which do not accept values are added to your program, +## remember to amend `shortNoVal` and `longNoVal` accordingly. +## +## The following example illustrates the difference between having an empty +## `shortNoVal` and `longNoVal`, which is the default, and providing +## arguments for those two parameters: +## +## ```Nim +## import std/parseopt +## +## proc printToken(kind: CmdLineKind, key: string, val: string) = +## case kind +## of cmdEnd: doAssert(false) # Doesn't happen with getopt() +## of cmdShortOption, cmdLongOption: +## if val == "": +## echo "Option: ", key +## else: +## echo "Option and value: ", key, ", ", val +## of cmdArgument: +## echo "Argument: ", key +## +## let cmdLine = "-j4 --first bar" +## +## var emptyNoVal = initOptParser(cmdLine) +## for kind, key, val in emptyNoVal.getopt(): +## printToken(kind, key, val) +## +## # Output: +## # Option: j +## # Option: 4 +## # Option: first +## # Argument: bar +## +## var withNoVal = initOptParser(cmdLine, shortNoVal = {'c'}, +## longNoVal = @["second"]) +## for kind, key, val in withNoVal.getopt(): +## printToken(kind, key, val) +## +## # Output: +## # Option and value: j, 4 +## # Option and value: first, bar +## ``` +## +## See also +## ======== +## +## * `os module<os.html>`_ for lower-level command line parsing procs +## * `parseutils module<parseutils.html>`_ for helpers that parse tokens, +## numbers, identifiers, etc. +## * `strutils module<strutils.html>`_ for common string handling operations +## * `json module<json.html>`_ for a JSON parser +## * `parsecfg module<parsecfg.html>`_ for a configuration file parser +## * `parsecsv module<parsecsv.html>`_ for a simple CSV (comma separated value) +## parser +## * `parsexml module<parsexml.html>`_ for a XML / HTML parser +## * `other parsers<lib.html#pure-libraries-parsers>`_ for more parsers {.push debugger: off.} include "system/inclrtl" -import - os, strutils - -type - CmdLineKind* = enum ## the detected command line token - cmdEnd, ## end of command line reached - cmdArgument, ## argument detected - cmdLongOption, ## a long option ``--option`` detected - cmdShortOption ## a short option ``-c`` detected - OptParser* = - object of RootObj ## this object implements the command line parser - cmd: string +import std/strutils +import std/os + +type + CmdLineKind* = enum ## The detected command line token. + cmdEnd, ## End of command line reached + cmdArgument, ## An argument such as a filename + cmdLongOption, ## A long option such as --option + cmdShortOption ## A short option such as -c + OptParser* = object of RootObj ## \ + ## Implementation of the command line parser. + ## + ## To initialize it, use the + ## `initOptParser proc<#initOptParser,string,set[char],seq[string]>`_. pos: int inShortState: bool - kind*: CmdLineKind ## the dected command line token - key*, val*: TaintedString ## key and value pair; ``key`` is the option - ## or the argument, ``value`` is not "" if - ## the option was given a value - -{.deprecated: [TCmdLineKind: CmdLineKind, TOptParser: OptParser].} - -when declared(os.paramCount): - # we cannot provide this for NimRtl creation on Posix, because we can't - # access the command line arguments then! - - proc initOptParser*(cmdline = ""): OptParser = - ## inits the option parser. If ``cmdline == ""``, the real command line - ## (as provided by the ``OS`` module) is taken. - result.pos = 0 - result.inShortState = false - if cmdline != "": - result.cmd = cmdline - else: - result.cmd = "" - for i in countup(1, paramCount()): - result.cmd = result.cmd & quoteIfContainsWhite(paramStr(i).string) & ' ' - result.kind = cmdEnd - result.key = TaintedString"" - result.val = TaintedString"" - -proc parseWord(s: string, i: int, w: var string, - delim: set[char] = {'\x09', ' ', '\0'}): int = + allowWhitespaceAfterColon: bool + shortNoVal: set[char] + longNoVal: seq[string] + cmds: seq[string] + idx: int + kind*: CmdLineKind ## The detected command line token + key*, val*: string ## Key and value pair; the key is the option + ## or the argument, and the value is not "" if + ## the option was given a value + +proc parseWord(s: string, i: int, w: var string, + delim: set[char] = {'\t', ' '}): int = result = i - if s[result] == '\"': + if result < s.len and s[result] == '\"': inc(result) - while not (s[result] in {'\0', '\"'}): + while result < s.len: + if s[result] == '"': + inc result + break add(w, s[result]) inc(result) - if s[result] == '\"': inc(result) - else: - while not (s[result] in delim): + else: + while result < s.len and s[result] notin delim: add(w, s[result]) inc(result) -proc handleShortOption(p: var OptParser) = +proc initOptParser*(cmdline: seq[string], shortNoVal: set[char] = {}, + longNoVal: seq[string] = @[]; + allowWhitespaceAfterColon = true): OptParser = + ## Initializes the command line parser. + ## + ## If `cmdline.len == 0`, the real command line as provided by the + ## `os` module is retrieved instead if it is available. If the + ## command line is not available, a `ValueError` will be raised. + ## Behavior of the other parameters remains the same as in + ## `initOptParser(string, ...) + ## <#initOptParser,string,set[char],seq[string]>`_. + ## + ## See also: + ## * `getopt iterator<#getopt.i,seq[string],set[char],seq[string]>`_ + runnableExamples: + var p = initOptParser() + p = initOptParser(@["--left", "--debug:3", "-l", "-r:2"]) + p = initOptParser(@["--left", "--debug:3", "-l", "-r:2"], + shortNoVal = {'l'}, longNoVal = @["left"]) + + result.pos = 0 + result.idx = 0 + result.inShortState = false + result.shortNoVal = shortNoVal + result.longNoVal = longNoVal + result.allowWhitespaceAfterColon = allowWhitespaceAfterColon + if cmdline.len != 0: + result.cmds = newSeq[string](cmdline.len) + for i in 0..<cmdline.len: + result.cmds[i] = cmdline[i] + else: + when declared(paramCount): + when defined(nimscript): + var ctr = 0 + var firstNimsFound = false + for i in countup(0, paramCount()): + if firstNimsFound: + result.cmds[ctr] = paramStr(i) + inc ctr, 1 + if paramStr(i).endsWith(".nims") and not firstNimsFound: + firstNimsFound = true + result.cmds = newSeq[string](paramCount()-i) + else: + result.cmds = newSeq[string](paramCount()) + for i in countup(1, paramCount()): + result.cmds[i-1] = paramStr(i) + else: + # we cannot provide this for NimRtl creation on Posix, because we can't + # access the command line arguments then! + raiseAssert "empty command line given but" & + " real command line is not accessible" + result.kind = cmdEnd + result.key = "" + result.val = "" + +proc initOptParser*(cmdline = "", shortNoVal: set[char] = {}, + longNoVal: seq[string] = @[]; + allowWhitespaceAfterColon = true): OptParser = + ## Initializes the command line parser. + ## + ## If `cmdline == ""`, the real command line as provided by the + ## `os` module is retrieved instead if it is available. If the + ## command line is not available, a `ValueError` will be raised. + ## + ## `shortNoVal` and `longNoVal` are used to specify which options + ## do not take values. See the `documentation about these + ## parameters<#nimshortnoval-and-nimlongnoval>`_ for more information on + ## how this affects parsing. + ## + ## This does not provide a way of passing default values to arguments. + ## + ## See also: + ## * `getopt iterator<#getopt.i,OptParser>`_ + runnableExamples: + var p = initOptParser() + p = initOptParser("--left --debug:3 -l -r:2") + p = initOptParser("--left --debug:3 -l -r:2", + shortNoVal = {'l'}, longNoVal = @["left"]) + + initOptParser(parseCmdLine(cmdline), shortNoVal, longNoVal, allowWhitespaceAfterColon) + +proc handleShortOption(p: var OptParser; cmd: string) = var i = p.pos p.kind = cmdShortOption - add(p.key.string, p.cmd[i]) - inc(i) + if i < cmd.len: + add(p.key, cmd[i]) + inc(i) p.inShortState = true - while p.cmd[i] in {'\x09', ' '}: + while i < cmd.len and cmd[i] in {'\t', ' '}: inc(i) p.inShortState = false - if p.cmd[i] in {':', '='}: - inc(i) + if i < cmd.len and (cmd[i] in {':', '='} or + card(p.shortNoVal) > 0 and p.key[0] notin p.shortNoVal): + if i < cmd.len and cmd[i] in {':', '='}: + inc(i) p.inShortState = false - while p.cmd[i] in {'\x09', ' '}: inc(i) - i = parseWord(p.cmd, i, p.val.string) - if p.cmd[i] == '\0': p.inShortState = false - p.pos = i + while i < cmd.len and cmd[i] in {'\t', ' '}: inc(i) + p.val = substr(cmd, i) + p.pos = 0 + inc p.idx + else: + p.pos = i + if i >= cmd.len: + p.inShortState = false + p.pos = 0 + inc p.idx + +proc next*(p: var OptParser) {.rtl, extern: "npo$1".} = + ## Parses the next token. + ## + ## `p.kind` describes what kind of token has been parsed. `p.key` and + ## `p.val` are set accordingly. + runnableExamples: + var p = initOptParser("--left -r:2 file.txt") + p.next() + doAssert p.kind == cmdLongOption and p.key == "left" + p.next() + doAssert p.kind == cmdShortOption and p.key == "r" and p.val == "2" + p.next() + doAssert p.kind == cmdArgument and p.key == "file.txt" + p.next() + doAssert p.kind == cmdEnd + + if p.idx >= p.cmds.len: + p.kind = cmdEnd + return -proc next*(p: var OptParser) {.rtl, extern: "npo$1".} = - ## parses the first or next option; ``p.kind`` describes what token has been - ## parsed. ``p.key`` and ``p.val`` are set accordingly. var i = p.pos - while p.cmd[i] in {'\x09', ' '}: inc(i) + while i < p.cmds[p.idx].len and p.cmds[p.idx][i] in {'\t', ' '}: inc(i) p.pos = i - setLen(p.key.string, 0) - setLen(p.val.string, 0) - if p.inShortState: - handleShortOption(p) - return - case p.cmd[i] - of '\0': - p.kind = cmdEnd - of '-': + setLen(p.key, 0) + setLen(p.val, 0) + if p.inShortState: + p.inShortState = false + if i >= p.cmds[p.idx].len: + inc(p.idx) + p.pos = 0 + if p.idx >= p.cmds.len: + p.kind = cmdEnd + return + else: + handleShortOption(p, p.cmds[p.idx]) + return + + if i < p.cmds[p.idx].len and p.cmds[p.idx][i] == '-': inc(i) - if p.cmd[i] == '-': - p.kind = cmdLongoption + if i < p.cmds[p.idx].len and p.cmds[p.idx][i] == '-': + p.kind = cmdLongOption inc(i) - i = parseWord(p.cmd, i, p.key.string, {'\0', ' ', '\x09', ':', '='}) - while p.cmd[i] in {'\x09', ' '}: inc(i) - if p.cmd[i] in {':', '='}: + i = parseWord(p.cmds[p.idx], i, p.key, {' ', '\t', ':', '='}) + while i < p.cmds[p.idx].len and p.cmds[p.idx][i] in {'\t', ' '}: inc(i) + if i < p.cmds[p.idx].len and p.cmds[p.idx][i] in {':', '='}: inc(i) - while p.cmd[i] in {'\x09', ' '}: inc(i) - p.pos = parseWord(p.cmd, i, p.val.string) - else: - p.pos = i - else: + while i < p.cmds[p.idx].len and p.cmds[p.idx][i] in {'\t', ' '}: inc(i) + # if we're at the end, use the next command line option: + if i >= p.cmds[p.idx].len and p.idx < p.cmds.len and + p.allowWhitespaceAfterColon: + inc p.idx + i = 0 + if p.idx < p.cmds.len: + p.val = p.cmds[p.idx].substr(i) + elif len(p.longNoVal) > 0 and p.key notin p.longNoVal and p.idx+1 < p.cmds.len: + p.val = p.cmds[p.idx+1] + inc p.idx + else: + p.val = "" + inc p.idx + p.pos = 0 + else: p.pos = i - handleShortOption(p) + handleShortOption(p, p.cmds[p.idx]) else: p.kind = cmdArgument - p.pos = parseWord(p.cmd, i, p.key.string) - -proc cmdLineRest*(p: OptParser): TaintedString {.rtl, extern: "npo$1".} = - ## retrieves the rest of the command line that has not been parsed yet. - result = strip(substr(p.cmd, p.pos, len(p.cmd) - 1)).TaintedString + p.key = p.cmds[p.idx] + inc p.idx + p.pos = 0 -when declared(initOptParser): - iterator getopt*(): tuple[kind: CmdLineKind, key, val: TaintedString] = - ## This is an convenience iterator for iterating over the command line. - ## This uses the TOptParser object. Example: +when declared(quoteShellCommand): + proc cmdLineRest*(p: OptParser): string {.rtl, extern: "npo$1".} = + ## Retrieves the rest of the command line that has not been parsed yet. ## - ## .. code-block:: nim - ## var - ## filename = "" - ## for kind, key, val in getopt(): - ## case kind - ## of cmdArgument: - ## filename = key - ## of cmdLongOption, cmdShortOption: - ## case key - ## of "help", "h": writeHelp() - ## of "version", "v": writeVersion() - ## of cmdEnd: assert(false) # cannot happen - ## if filename == "": - ## # no filename has been given, so we show the help: - ## writeHelp() - var p = initOptParser() - while true: - next(p) - if p.kind == cmdEnd: break - yield (p.kind, p.key, p.val) + ## See also: + ## * `remainingArgs proc<#remainingArgs,OptParser>`_ + ## + ## **Examples:** + ## ```Nim + ## var p = initOptParser("--left -r:2 -- foo.txt bar.txt") + ## while true: + ## p.next() + ## if p.kind == cmdLongOption and p.key == "": # Look for "--" + ## break + ## doAssert p.cmdLineRest == "foo.txt bar.txt" + ## ``` + result = p.cmds[p.idx .. ^1].quoteShellCommand + +proc remainingArgs*(p: OptParser): seq[string] {.rtl, extern: "npo$1".} = + ## Retrieves a sequence of the arguments that have not been parsed yet. + ## + ## See also: + ## * `cmdLineRest proc<#cmdLineRest,OptParser>`_ + ## + ## **Examples:** + ## ```Nim + ## var p = initOptParser("--left -r:2 -- foo.txt bar.txt") + ## while true: + ## p.next() + ## if p.kind == cmdLongOption and p.key == "": # Look for "--" + ## break + ## doAssert p.remainingArgs == @["foo.txt", "bar.txt"] + ## ``` + result = @[] + for i in p.idx..<p.cmds.len: result.add p.cmds[i] + +iterator getopt*(p: var OptParser): tuple[kind: CmdLineKind, key, + val: string] = + ## Convenience iterator for iterating over the given + ## `OptParser<#OptParser>`_. + ## + ## There is no need to check for `cmdEnd` while iterating. If using `getopt` + ## with case switching, checking for `cmdEnd` is required. + ## + ## See also: + ## * `initOptParser proc<#initOptParser,string,set[char],seq[string]>`_ + ## + ## **Examples:** + ## + ## ```Nim + ## # these are placeholders, of course + ## proc writeHelp() = discard + ## proc writeVersion() = discard + ## + ## var filename: string + ## var p = initOptParser("--left --debug:3 -l -r:2") + ## + ## for kind, key, val in p.getopt(): + ## case kind + ## of cmdArgument: + ## filename = key + ## of cmdLongOption, cmdShortOption: + ## case key + ## of "help", "h": writeHelp() + ## of "version", "v": writeVersion() + ## of cmdEnd: assert(false) # cannot happen + ## if filename == "": + ## # no filename has been given, so we show the help + ## writeHelp() + ## ``` + p.pos = 0 + p.idx = 0 + while true: + next(p) + if p.kind == cmdEnd: break + yield (p.kind, p.key, p.val) + +iterator getopt*(cmdline: seq[string] = @[], + shortNoVal: set[char] = {}, longNoVal: seq[string] = @[]): + tuple[kind: CmdLineKind, key, val: string] = + ## Convenience iterator for iterating over command line arguments. + ## + ## This creates a new `OptParser<#OptParser>`_. If no command line + ## arguments are provided, the real command line as provided by the + ## `os` module is retrieved instead. + ## + ## `shortNoVal` and `longNoVal` are used to specify which options + ## do not take values. See the `documentation about these + ## parameters<#nimshortnoval-and-nimlongnoval>`_ for more information on + ## how this affects parsing. + ## + ## There is no need to check for `cmdEnd` while iterating. If using `getopt` + ## with case switching, checking for `cmdEnd` is required. + ## + ## See also: + ## * `initOptParser proc<#initOptParser,seq[string],set[char],seq[string]>`_ + ## + ## **Examples:** + ## + ## ```Nim + ## # these are placeholders, of course + ## proc writeHelp() = discard + ## proc writeVersion() = discard + ## + ## var filename: string + ## let params = @["--left", "--debug:3", "-l", "-r:2"] + ## + ## for kind, key, val in getopt(params): + ## case kind + ## of cmdArgument: + ## filename = key + ## of cmdLongOption, cmdShortOption: + ## case key + ## of "help", "h": writeHelp() + ## of "version", "v": writeVersion() + ## of cmdEnd: assert(false) # cannot happen + ## if filename == "": + ## # no filename has been written, so we show the help + ## writeHelp() + ## ``` + var p = initOptParser(cmdline, shortNoVal = shortNoVal, + longNoVal = longNoVal) + while true: + next(p) + if p.kind == cmdEnd: break + yield (p.kind, p.key, p.val) {.pop.} diff --git a/lib/pure/parseopt2.nim b/lib/pure/parseopt2.nim deleted file mode 100644 index 73b498fe0..000000000 --- a/lib/pure/parseopt2.nim +++ /dev/null @@ -1,152 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2015 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module provides the standard Nim command line parser. -## It supports one convenience iterator over all command line options and some -## lower-level features. -## -## Supported syntax: -## -## 1. short options - ``-abcd``, where a, b, c, d are names -## 2. long option - ``--foo:bar``, ``--foo=bar`` or ``--foo`` -## 3. argument - everything else - -{.push debugger: off.} - -include "system/inclrtl" - -import - os, strutils - -type - CmdLineKind* = enum ## the detected command line token - cmdEnd, ## end of command line reached - cmdArgument, ## argument detected - cmdLongOption, ## a long option ``--option`` detected - cmdShortOption ## a short option ``-c`` detected - OptParser* = - object of RootObj ## this object implements the command line parser - cmd: seq[string] - pos: int - remainingShortOptions: string - kind*: CmdLineKind ## the dected command line token - key*, val*: TaintedString ## key and value pair; ``key`` is the option - ## or the argument, ``value`` is not "" if - ## the option was given a value - -{.deprecated: [TCmdLineKind: CmdLineKind, TOptParser: OptParser].} - -proc initOptParser*(cmdline: seq[string]): OptParser {.rtl.} = - ## Initalizes option parses with cmdline. cmdline should not contain - ## argument 0 - program name. - ## If cmdline == nil default to current command line arguments. - result.remainingShortOptions = "" - when not defined(createNimRtl): - if cmdline == nil: - result.cmd = commandLineParams() - return - else: - assert cmdline != nil, "Cannot determine command line arguments." - - result.cmd = @cmdline - -proc initOptParser*(cmdline: string): OptParser {.rtl, deprecated.} = - ## Initalizes option parses with cmdline. Splits cmdline in on spaces - ## and calls initOptParser(openarray[string]) - ## Do not use. - if cmdline == "": # backward compatibility - return initOptParser(seq[string](nil)) - else: - return initOptParser(cmdline.split) - -when not defined(createNimRtl): - proc initOptParser*(): OptParser = - ## Initializes option parser from current command line arguments. - return initOptParser(commandLineParams()) - -proc next*(p: var OptParser) {.rtl, extern: "npo$1".} - -proc nextOption(p: var OptParser, token: string, allowEmpty: bool) = - for splitchar in [':', '=']: - if splitchar in token: - let pos = token.find(splitchar) - p.key = token[0..pos-1] - p.val = token[pos+1..token.len-1] - return - - p.key = token - if allowEmpty: - p.val = "" - else: - p.remainingShortOptions = token[0..token.len-1] - p.next() - -proc next(p: var OptParser) = - if p.remainingShortOptions.len != 0: - p.kind = cmdShortOption - p.key = TaintedString(p.remainingShortOptions[0..0]) - p.val = "" - p.remainingShortOptions = p.remainingShortOptions[1..p.remainingShortOptions.len-1] - return - - if p.pos >= p.cmd.len: - p.kind = cmdEnd - return - - let token = p.cmd[p.pos] - p.pos += 1 - - if token.startsWith("--"): - p.kind = cmdLongOption - nextOption(p, token[2..token.len-1], allowEmpty=true) - elif token.startsWith("-"): - p.kind = cmdShortOption - nextOption(p, token[1..token.len-1], allowEmpty=true) - else: - p.kind = cmdArgument - p.key = token - p.val = "" - -proc cmdLineRest*(p: OptParser): TaintedString {.rtl, extern: "npo$1", deprecated.} = - ## Returns part of command line string that has not been parsed yet. - ## Do not use - does not correctly handle whitespace. - return p.cmd[p.pos..p.cmd.len-1].join(" ") - -type - GetoptResult* = tuple[kind: CmdLineKind, key, val: TaintedString] - -{.deprecated: [TGetoptResult: GetoptResult].} - -when declared(paramCount): - iterator getopt*(): GetoptResult = - ## This is an convenience iterator for iterating over the command line. - ## This uses the OptParser object. Example: - ## - ## .. code-block:: nim - ## var - ## filename = "" - ## for kind, key, val in getopt(): - ## case kind - ## of cmdArgument: - ## filename = key - ## of cmdLongOption, cmdShortOption: - ## case key - ## of "help", "h": writeHelp() - ## of "version", "v": writeVersion() - ## of cmdEnd: assert(false) # cannot happen - ## if filename == "": - ## # no filename has been given, so we show the help: - ## writeHelp() - var p = initOptParser() - while true: - next(p) - if p.kind == cmdEnd: break - yield (p.kind, p.key, p.val) - -{.pop.} diff --git a/lib/pure/parsesql.nim b/lib/pure/parsesql.nim index bb4ede779..a7c938d01 100644 --- a/lib/pure/parsesql.nim +++ b/lib/pure/parsesql.nim @@ -7,21 +7,26 @@ # distribution, for details about the copyright. # -## The ``parsesql`` module implements a high performance SQL file +## The `parsesql` module implements a high performance SQL file ## parser. It parses PostgreSQL syntax and the SQL ANSI standard. +## +## Unstable API. -import - hashes, strutils, lexbase, streams +import std/[strutils, lexbase] +import std/private/decode_helpers + +when defined(nimPreviewSlimSystem): + import std/assertions # ------------------- scanner ------------------------------------------------- type - TokKind = enum ## enumeration of all SQL tokens - tkInvalid, ## invalid token - tkEof, ## end of file reached - tkIdentifier, ## abc - tkQuotedIdentifier, ## "abc" - tkStringConstant, ## 'abc' + TokKind = enum ## enumeration of all SQL tokens + tkInvalid, ## invalid token + tkEof, ## end of file reached + tkIdentifier, ## abc + tkQuotedIdentifier, ## "abc" + tkStringConstant, ## 'abc' tkEscapeConstant, ## e'abc' tkDollarQuotedConstant, ## $tag$abc$tag$ tkBitStringConstant, ## B'00011' @@ -37,16 +42,14 @@ type tkBracketLe, ## '[' tkBracketRi, ## ']' tkDot ## '.' - - Token = object # a token - kind: TokKind # the type of the token - literal: string # the parsed (string) literal - + + Token = object # a token + kind: TokKind # the type of the token + literal: string # the parsed (string) literal + SqlLexer* = object of BaseLexer ## the parser object. filename: string -{.deprecated: [TToken: Token, TSqlLexer: SqlLexer].} - const tokKindToStr: array[TokKind, string] = [ "invalid", "[EOF]", "identifier", "quoted identifier", "string constant", @@ -55,82 +58,73 @@ const ";", ":", ",", "(", ")", "[", "]", "." ] -proc open(L: var SqlLexer, input: Stream, filename: string) = - lexbase.open(L, input) - L.filename = filename - -proc close(L: var SqlLexer) = + reservedKeywords = @[ + # statements + "select", "from", "where", "group", "limit", "offset", "having", + # functions + "count", + ] + +proc close(L: var SqlLexer) = lexbase.close(L) -proc getColumn(L: SqlLexer): int = +proc getColumn(L: SqlLexer): int = ## get the current column the parser has arrived at. result = getColNumber(L, L.bufpos) -proc getLine(L: SqlLexer): int = +proc getLine(L: SqlLexer): int = result = L.lineNumber -proc handleHexChar(c: var SqlLexer, xi: var int) = - case c.buf[c.bufpos] - of '0'..'9': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0')) - inc(c.bufpos) - of 'a'..'f': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10) - inc(c.bufpos) - of 'A'..'F': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10) - inc(c.bufpos) - else: - discard - -proc handleOctChar(c: var SqlLexer, xi: var int) = +proc handleOctChar(c: var SqlLexer, xi: var int) = if c.buf[c.bufpos] in {'0'..'7'}: xi = (xi shl 3) or (ord(c.buf[c.bufpos]) - ord('0')) inc(c.bufpos) -proc getEscapedChar(c: var SqlLexer, tok: var Token) = +proc getEscapedChar(c: var SqlLexer, tok: var Token) = inc(c.bufpos) case c.buf[c.bufpos] - of 'n', 'N': + of 'n', 'N': add(tok.literal, '\L') inc(c.bufpos) - of 'r', 'R', 'c', 'C': + of 'r', 'R', 'c', 'C': add(tok.literal, '\c') inc(c.bufpos) - of 'l', 'L': + of 'l', 'L': add(tok.literal, '\L') inc(c.bufpos) - of 'f', 'F': + of 'f', 'F': add(tok.literal, '\f') inc(c.bufpos) - of 'e', 'E': + of 'e', 'E': add(tok.literal, '\e') inc(c.bufpos) - of 'a', 'A': + of 'a', 'A': add(tok.literal, '\a') inc(c.bufpos) - of 'b', 'B': + of 'b', 'B': add(tok.literal, '\b') inc(c.bufpos) - of 'v', 'V': + of 'v', 'V': add(tok.literal, '\v') inc(c.bufpos) - of 't', 'T': + of 't', 'T': add(tok.literal, '\t') inc(c.bufpos) - of '\'', '\"': + of '\'', '\"': add(tok.literal, c.buf[c.bufpos]) inc(c.bufpos) - of '\\': + of '\\': add(tok.literal, '\\') inc(c.bufpos) - of 'x', 'X': + of 'x', 'X': inc(c.bufpos) var xi = 0 - handleHexChar(c, xi) - handleHexChar(c, xi) + if handleHexChar(c.buf[c.bufpos], xi): + inc(c.bufpos) + if handleHexChar(c.buf[c.bufpos], xi): + inc(c.bufpos) add(tok.literal, chr(xi)) - of '0'..'7': + of '0'..'7': var xi = 0 handleOctChar(c, xi) handleOctChar(c, xi) @@ -138,80 +132,76 @@ proc getEscapedChar(c: var SqlLexer, tok: var Token) = if (xi <= 255): add(tok.literal, chr(xi)) else: tok.kind = tkInvalid else: tok.kind = tkInvalid - -proc handleCRLF(c: var SqlLexer, pos: int): int = + +proc handleCRLF(c: var SqlLexer, pos: int): int = case c.buf[pos] of '\c': result = lexbase.handleCR(c, pos) of '\L': result = lexbase.handleLF(c, pos) else: result = pos -proc skip(c: var SqlLexer) = +proc skip(c: var SqlLexer) = var pos = c.bufpos - var buf = c.buf var nested = 0 - while true: - case buf[pos] - of ' ', '\t': + while true: + case c.buf[pos] + of ' ', '\t': inc(pos) of '-': - if buf[pos+1] == '-': - while not (buf[pos] in {'\c', '\L', lexbase.EndOfFile}): inc(pos) + if c.buf[pos+1] == '-': + while not (c.buf[pos] in {'\c', '\L', lexbase.EndOfFile}): inc(pos) else: break of '/': - if buf[pos+1] == '*': - inc(pos,2) + if c.buf[pos+1] == '*': + inc(pos, 2) while true: - case buf[pos] + case c.buf[pos] of '\0': break - of '\c', '\L': + of '\c', '\L': pos = handleCRLF(c, pos) - buf = c.buf of '*': - if buf[pos+1] == '/': + if c.buf[pos+1] == '/': inc(pos, 2) if nested <= 0: break dec(nested) else: inc(pos) of '/': - if buf[pos+1] == '*': + if c.buf[pos+1] == '*': inc(pos, 2) inc(nested) else: inc(pos) else: inc(pos) else: break - of '\c', '\L': + of '\c', '\L': pos = handleCRLF(c, pos) - buf = c.buf - else: - break # EndOfFile also leaves the loop + else: + break # EndOfFile also leaves the loop c.bufpos = pos - -proc getString(c: var SqlLexer, tok: var Token, kind: TokKind) = + +proc getString(c: var SqlLexer, tok: var Token, kind: TokKind) = var pos = c.bufpos + 1 - var buf = c.buf tok.kind = kind block parseLoop: while true: - while true: - var ch = buf[pos] + while true: + var ch = c.buf[pos] if ch == '\'': - if buf[pos+1] == '\'': + if c.buf[pos+1] == '\'': inc(pos, 2) add(tok.literal, '\'') else: inc(pos) - break - elif ch in {'\c', '\L', lexbase.EndOfFile}: + break + elif ch in {'\c', '\L', lexbase.EndOfFile}: tok.kind = tkInvalid break parseLoop - elif (ch == '\\') and kind == tkEscapeConstant: + elif (ch == '\\') and kind == tkEscapeConstant: c.bufpos = pos getEscapedChar(c, tok) pos = c.bufpos - else: + else: add(tok.literal, ch) inc(pos) c.bufpos = pos @@ -220,30 +210,27 @@ proc getString(c: var SqlLexer, tok: var Token, kind: TokKind) = if c.lineNumber > line: # a new line whitespace has been parsed, so we check if the string # continues after the whitespace: - buf = c.buf # may have been reallocated pos = c.bufpos - if buf[pos] == '\'': inc(pos) + if c.buf[pos] == '\'': inc(pos) else: break parseLoop else: break parseLoop c.bufpos = pos -proc getDollarString(c: var SqlLexer, tok: var Token) = +proc getDollarString(c: var SqlLexer, tok: var Token) = var pos = c.bufpos + 1 - var buf = c.buf tok.kind = tkDollarQuotedConstant var tag = "$" - while buf[pos] in IdentChars: - add(tag, buf[pos]) + while c.buf[pos] in IdentChars: + add(tag, c.buf[pos]) inc(pos) - if buf[pos] == '$': inc(pos) + if c.buf[pos] == '$': inc(pos) else: tok.kind = tkInvalid return while true: - case buf[pos] - of '\c', '\L': + case c.buf[pos] + of '\c', '\L': pos = handleCRLF(c, pos) - buf = c.buf add(tok.literal, "\L") of '\0': tok.kind = tkInvalid @@ -251,43 +238,42 @@ proc getDollarString(c: var SqlLexer, tok: var Token) = of '$': inc(pos) var tag2 = "$" - while buf[pos] in IdentChars: - add(tag2, buf[pos]) + while c.buf[pos] in IdentChars: + add(tag2, c.buf[pos]) inc(pos) - if buf[pos] == '$': inc(pos) + if c.buf[pos] == '$': inc(pos) if tag2 == tag: break add(tok.literal, tag2) add(tok.literal, '$') else: - add(tok.literal, buf[pos]) + add(tok.literal, c.buf[pos]) inc(pos) c.bufpos = pos -proc getSymbol(c: var SqlLexer, tok: var Token) = +proc getSymbol(c: var SqlLexer, tok: var Token) = var pos = c.bufpos - var buf = c.buf - while true: - add(tok.literal, buf[pos]) + while true: + add(tok.literal, c.buf[pos]) inc(pos) - if buf[pos] notin {'a'..'z','A'..'Z','0'..'9','_','$', '\128'..'\255'}: + if c.buf[pos] notin {'a'..'z', 'A'..'Z', '0'..'9', '_', '$', + '\128'..'\255'}: break c.bufpos = pos tok.kind = tkIdentifier -proc getQuotedIdentifier(c: var SqlLexer, tok: var Token) = +proc getQuotedIdentifier(c: var SqlLexer, tok: var Token, quote = '\"') = var pos = c.bufpos + 1 - var buf = c.buf tok.kind = tkQuotedIdentifier while true: - var ch = buf[pos] - if ch == '\"': - if buf[pos+1] == '\"': + var ch = c.buf[pos] + if ch == quote: + if c.buf[pos+1] == quote: inc(pos, 2) - add(tok.literal, '\"') + add(tok.literal, quote) else: inc(pos) break - elif ch in {'\c', '\L', lexbase.EndOfFile}: + elif ch in {'\c', '\L', lexbase.EndOfFile}: tok.kind = tkInvalid break else: @@ -297,18 +283,17 @@ proc getQuotedIdentifier(c: var SqlLexer, tok: var Token) = proc getBitHexString(c: var SqlLexer, tok: var Token, validChars: set[char]) = var pos = c.bufpos + 1 - var buf = c.buf block parseLoop: while true: - while true: - var ch = buf[pos] + while true: + var ch = c.buf[pos] if ch in validChars: add(tok.literal, ch) - inc(pos) + inc(pos) elif ch == '\'': inc(pos) break - else: + else: tok.kind = tkInvalid break parseLoop c.bufpos = pos @@ -317,9 +302,8 @@ proc getBitHexString(c: var SqlLexer, tok: var Token, validChars: set[char]) = if c.lineNumber > line: # a new line whitespace has been parsed, so we check if the string # continues after the whitespace: - buf = c.buf # may have been reallocated pos = c.bufpos - if buf[pos] == '\'': inc(pos) + if c.buf[pos] == '\'': inc(pos) else: break parseLoop else: break parseLoop c.bufpos = pos @@ -327,66 +311,64 @@ proc getBitHexString(c: var SqlLexer, tok: var Token, validChars: set[char]) = proc getNumeric(c: var SqlLexer, tok: var Token) = tok.kind = tkInteger var pos = c.bufpos - var buf = c.buf - while buf[pos] in Digits: - add(tok.literal, buf[pos]) + while c.buf[pos] in Digits: + add(tok.literal, c.buf[pos]) inc(pos) - if buf[pos] == '.': + if c.buf[pos] == '.': tok.kind = tkNumeric - add(tok.literal, buf[pos]) + add(tok.literal, c.buf[pos]) inc(pos) - while buf[pos] in Digits: - add(tok.literal, buf[pos]) + while c.buf[pos] in Digits: + add(tok.literal, c.buf[pos]) inc(pos) - if buf[pos] in {'E', 'e'}: + if c.buf[pos] in {'E', 'e'}: tok.kind = tkNumeric - add(tok.literal, buf[pos]) + add(tok.literal, c.buf[pos]) inc(pos) - if buf[pos] == '+': + if c.buf[pos] == '+': inc(pos) - elif buf[pos] == '-': - add(tok.literal, buf[pos]) + elif c.buf[pos] == '-': + add(tok.literal, c.buf[pos]) inc(pos) - if buf[pos] in Digits: - while buf[pos] in Digits: - add(tok.literal, buf[pos]) + if c.buf[pos] in Digits: + while c.buf[pos] in Digits: + add(tok.literal, c.buf[pos]) inc(pos) else: tok.kind = tkInvalid - c.bufpos = pos + c.bufpos = pos proc getOperator(c: var SqlLexer, tok: var Token) = const operators = {'+', '-', '*', '/', '<', '>', '=', '~', '!', '@', '#', '%', '^', '&', '|', '`', '?'} tok.kind = tkOperator var pos = c.bufpos - var buf = c.buf var trailingPlusMinus = false while true: - case buf[pos] + case c.buf[pos] of '-': - if buf[pos] == '-': break - if not trailingPlusMinus and buf[pos+1] notin operators and + if c.buf[pos] == '-': break + if not trailingPlusMinus and c.buf[pos+1] notin operators and tok.literal.len > 0: break of '/': - if buf[pos] == '*': break + if c.buf[pos] == '*': break of '~', '!', '@', '#', '%', '^', '&', '|', '`', '?': trailingPlusMinus = true of '+': - if not trailingPlusMinus and buf[pos+1] notin operators and + if not trailingPlusMinus and c.buf[pos+1] notin operators and tok.literal.len > 0: break of '*', '<', '>', '=': discard else: break - add(tok.literal, buf[pos]) + add(tok.literal, c.buf[pos]) inc(pos) c.bufpos = pos -proc getTok(c: var SqlLexer, tok: var Token) = +proc getTok(c: var SqlLexer, tok: var Token) = tok.kind = tkInvalid setLen(tok.literal, 0) skip(c) case c.buf[c.bufpos] - of ';': + of ';': tok.kind = tkSemicolon inc(c.bufpos) add(tok.literal, ';') @@ -394,15 +376,15 @@ proc getTok(c: var SqlLexer, tok: var Token) = tok.kind = tkComma inc(c.bufpos) add(tok.literal, ',') - of ':': + of ':': tok.kind = tkColon inc(c.bufpos) add(tok.literal, ':') - of 'e', 'E': - if c.buf[c.bufpos + 1] == '\'': + of 'e', 'E': + if c.buf[c.bufpos + 1] == '\'': inc(c.bufpos) getString(c, tok, tkEscapeConstant) - else: + else: getSymbol(c, tok) of 'b', 'B': if c.buf[c.bufpos + 1] == '\'': @@ -413,15 +395,15 @@ proc getTok(c: var SqlLexer, tok: var Token) = of 'x', 'X': if c.buf[c.bufpos + 1] == '\'': tok.kind = tkHexStringConstant - getBitHexString(c, tok, {'a'..'f','A'..'F','0'..'9'}) + getBitHexString(c, tok, {'a'..'f', 'A'..'F', '0'..'9'}) else: getSymbol(c, tok) of '$': getDollarString(c, tok) - of '[': + of '[': tok.kind = tkBracketLe inc(c.bufpos) add(tok.literal, '[') - of ']': + of ']': tok.kind = tkBracketRi inc(c.bufpos) add(tok.literal, ']') @@ -433,7 +415,7 @@ proc getTok(c: var SqlLexer, tok: var Token) = tok.kind = tkParRi inc(c.bufpos) add(tok.literal, ')') - of '.': + of '.': if c.buf[c.bufpos + 1] in Digits: getNumeric(c, tok) else: @@ -442,52 +424,54 @@ proc getTok(c: var SqlLexer, tok: var Token) = add(tok.literal, '.') of '0'..'9': getNumeric(c, tok) of '\'': getString(c, tok, tkStringConstant) - of '"': getQuotedIdentifier(c, tok) - of lexbase.EndOfFile: + of '"': getQuotedIdentifier(c, tok, '"') + of '`': getQuotedIdentifier(c, tok, '`') + of lexbase.EndOfFile: tok.kind = tkEof tok.literal = "[EOF]" of 'a', 'c', 'd', 'f'..'w', 'y', 'z', 'A', 'C', 'D', 'F'..'W', 'Y', 'Z', '_', '\128'..'\255': getSymbol(c, tok) of '+', '-', '*', '/', '<', '>', '=', '~', '!', '@', '#', '%', - '^', '&', '|', '`', '?': + '^', '&', '|', '?': getOperator(c, tok) else: add(tok.literal, c.buf[c.bufpos]) inc(c.bufpos) - -proc errorStr(L: SqlLexer, msg: string): string = + +proc errorStr(L: SqlLexer, msg: string): string = result = "$1($2, $3) Error: $4" % [L.filename, $getLine(L), $getColumn(L), msg] # ----------------------------- parser ---------------------------------------- -# Operator/Element Associativity Description -# . left table/column name separator -# :: left PostgreSQL-style typecast -# [ ] left array element selection -# - right unary minus -# ^ left exponentiation -# * / % left multiplication, division, modulo -# + - left addition, subtraction -# IS IS TRUE, IS FALSE, IS UNKNOWN, IS NULL -# ISNULL test for null -# NOTNULL test for not null -# (any other) left all other native and user-defined oprs -# IN set membership -# BETWEEN range containment -# OVERLAPS time interval overlap -# LIKE ILIKE SIMILAR string pattern matching -# < > less than, greater than -# = right equality, assignment -# NOT right logical negation -# AND left logical conjunction -# OR left logical disjunction +# Operator/Element Associativity Description +# . left table/column name separator +# :: left PostgreSQL-style typecast +# [ ] left array element selection +# - right unary minus +# ^ left exponentiation +# * / % left multiplication, division, modulo +# + - left addition, subtraction +# IS IS TRUE, IS FALSE, IS UNKNOWN, IS NULL +# ISNULL test for null +# NOTNULL test for not null +# (any other) left all other native and user-defined oprs +# IN set membership +# BETWEEN range containment +# OVERLAPS time interval overlap +# LIKE ILIKE SIMILAR string pattern matching +# < > less than, greater than +# = right equality, assignment +# NOT right logical negation +# AND left logical conjunction +# OR left logical disjunction type SqlNodeKind* = enum ## kind of SQL abstract syntax tree nkNone, nkIdent, + nkQuotedIdent, nkStringLit, nkBitStringLit, nkHexStringLit, @@ -496,13 +480,15 @@ type nkPrimaryKey, nkForeignKey, nkNotNull, - + nkNull, + nkStmtList, nkDot, nkDotDot, nkPrefix, nkInfix, nkCall, + nkPrGroup, nkColumnReference, nkReferences, nkDefault, @@ -510,18 +496,23 @@ type nkConstraint, nkUnique, nkIdentity, - nkColumnDef, ## name, datatype, constraints + nkColumnDef, ## name, datatype, constraints nkInsert, nkUpdate, nkDelete, nkSelect, nkSelectDistinct, nkSelectColumns, + nkSelectPair, nkAsgn, nkFrom, + nkFromItemPair, nkGroup, + nkLimit, + nkOffset, nkHaving, nkOrder, + nkJoin, nkDesc, nkUnion, nkIntersect, @@ -530,46 +521,61 @@ type nkValueList, nkWhere, nkCreateTable, - nkCreateTableIfNotExists, + nkCreateTableIfNotExists, nkCreateType, nkCreateTypeIfNotExists, nkCreateIndex, nkCreateIndexIfNotExists, nkEnumDef - + +const + LiteralNodes = { + nkIdent, nkQuotedIdent, nkStringLit, nkBitStringLit, nkHexStringLit, + nkIntegerLit, nkNumericLit + } + type SqlParseError* = object of ValueError ## Invalid SQL encountered - SqlNode* = ref SqlNodeObj ## an SQL abstract syntax tree node - SqlNodeObj* = object ## an SQL abstract syntax tree node - case kind*: SqlNodeKind ## kind of syntax tree - of nkIdent, nkStringLit, nkBitStringLit, nkHexStringLit, - nkIntegerLit, nkNumericLit: - strVal*: string ## AST leaf: the identifier, numeric literal - ## string literal, etc. + SqlNode* = ref SqlNodeObj ## an SQL abstract syntax tree node + SqlNodeObj* = object ## an SQL abstract syntax tree node + case kind*: SqlNodeKind ## kind of syntax tree + of LiteralNodes: + strVal*: string ## AST leaf: the identifier, numeric literal + ## string literal, etc. else: - sons*: seq[SqlNode] ## the node's children + sons*: seq[SqlNode] ## the node's children SqlParser* = object of SqlLexer ## SQL parser object tok: Token -{.deprecated: [EInvalidSql: SqlParseError, PSqlNode: SqlNode, - TSqlNode: SqlNodeObj, TSqlParser: SqlParser, TSqlNodeKind: SqlNodeKind].} - -proc newNode(k: SqlNodeKind): SqlNode = - new(result) - result.kind = k +proc newNode*(k: SqlNodeKind): SqlNode = + when defined(js): # bug #14117 + case k + of LiteralNodes: + result = SqlNode(kind: k, strVal: "") + else: + result = SqlNode(kind: k, sons: @[]) + else: + result = SqlNode(kind: k) -proc newNode(k: SqlNodeKind, s: string): SqlNode = - new(result) - result.kind = k +proc newNode*(k: SqlNodeKind, s: string): SqlNode = + result = SqlNode(kind: k) result.strVal = s - + +proc newNode*(k: SqlNodeKind, sons: seq[SqlNode]): SqlNode = + result = SqlNode(kind: k) + result.sons = sons + proc len*(n: SqlNode): int = - if isNil(n.sons): result = 0 - else: result = n.sons.len - + if n.kind in LiteralNodes: + result = 0 + else: + result = n.sons.len + +proc `[]`*(n: SqlNode; i: int): SqlNode = n.sons[i] +proc `[]`*(n: SqlNode; i: BackwardsIndex): SqlNode = n.sons[n.len - int(i)] + proc add*(father, n: SqlNode) = - if isNil(father.sons): father.sons = @[] add(father.sons, n) proc getTok(p: var SqlParser) = @@ -611,7 +617,10 @@ proc eat(p: var SqlParser, keyw: string) = if isKeyw(p, keyw): getTok(p) else: - sqlError(p, keyw.toUpper() & " expected") + sqlError(p, keyw.toUpperAscii() & " expected") + +proc opt(p: var SqlParser, kind: TokKind) = + if p.tok.kind == kind: getTok(p) proc parseDataType(p: var SqlParser): SqlNode = if isKeyw(p, "enum"): @@ -641,18 +650,20 @@ proc parseDataType(p: var SqlParser): SqlNode = getTok(p) eat(p, tkParRi) -proc getPrecedence(p: SqlParser): int = +proc getPrecedence(p: SqlParser): int = if isOpr(p, "*") or isOpr(p, "/") or isOpr(p, "%"): result = 6 elif isOpr(p, "+") or isOpr(p, "-"): - result = 5 + result = 5 elif isOpr(p, "=") or isOpr(p, "<") or isOpr(p, ">") or isOpr(p, ">=") or isOpr(p, "<=") or isOpr(p, "<>") or isOpr(p, "!=") or isKeyw(p, "is") or - isKeyw(p, "like"): - result = 3 + isKeyw(p, "like") or isKeyw(p, "in"): + result = 4 elif isKeyw(p, "and"): - result = 2 + result = 3 elif isKeyw(p, "or"): + result = 2 + elif isKeyw(p, "between"): result = 1 elif p.tok.kind == tkOperator: # user-defined operator: @@ -660,11 +671,15 @@ proc getPrecedence(p: SqlParser): int = else: result = - 1 -proc parseExpr(p: var SqlParser): SqlNode +proc parseExpr(p: var SqlParser): SqlNode {.gcsafe.} +proc parseSelect(p: var SqlParser): SqlNode {.gcsafe.} proc identOrLiteral(p: var SqlParser): SqlNode = case p.tok.kind - of tkIdentifier, tkQuotedIdentifier: + of tkQuotedIdentifier: + result = newNode(nkQuotedIdent, p.tok.literal) + getTok(p) + of tkIdentifier: result = newNode(nkIdent, p.tok.literal) getTok(p) of tkStringConstant, tkEscapeConstant, tkDollarQuotedConstant: @@ -684,33 +699,42 @@ proc identOrLiteral(p: var SqlParser): SqlNode = getTok(p) of tkParLe: getTok(p) - result = parseExpr(p) + result = newNode(nkPrGroup) + while true: + result.add(parseExpr(p)) + if p.tok.kind != tkComma: break + getTok(p) eat(p, tkParRi) - else: - sqlError(p, "expression expected") - getTok(p) # we must consume a token here to prevend endless loops! + else: + if p.tok.literal == "*": + result = newNode(nkIdent, p.tok.literal) + getTok(p) + else: + sqlError(p, "expression expected") + getTok(p) # we must consume a token here to prevent endless loops! -proc primary(p: var SqlParser): SqlNode = - if p.tok.kind == tkOperator or isKeyw(p, "not"): +proc primary(p: var SqlParser): SqlNode = + if (p.tok.kind == tkOperator and (p.tok.literal == "+" or p.tok.literal == + "-")) or isKeyw(p, "not"): result = newNode(nkPrefix) result.add(newNode(nkIdent, p.tok.literal)) getTok(p) result.add(primary(p)) return result = identOrLiteral(p) - while true: + while true: case p.tok.kind - of tkParLe: + of tkParLe: var a = result result = newNode(nkCall) result.add(a) getTok(p) - while true: + while p.tok.kind != tkParRi: result.add(parseExpr(p)) if p.tok.kind == tkComma: getTok(p) else: break eat(p, tkParRi) - of tkDot: + of tkDot: getTok(p) var a = result if p.tok.kind == tkDot: @@ -727,16 +751,16 @@ proc primary(p: var SqlParser): SqlNode = sqlError(p, "identifier expected") getTok(p) else: break - -proc lowestExprAux(p: var SqlParser, v: var SqlNode, limit: int): int = + +proc lowestExprAux(p: var SqlParser, v: var SqlNode, limit: int): int = var v2, node, opNode: SqlNode v = primary(p) # expand while operators have priorities higher than 'limit' var opPred = getPrecedence(p) result = opPred - while opPred > limit: + while opPred > limit: node = newNode(nkInfix) - opNode = newNode(nkIdent, p.tok.literal) + opNode = newNode(nkIdent, p.tok.literal.toLowerAscii()) getTok(p) result = lowestExprAux(p, v2, opPred) node.add(opNode) @@ -744,8 +768,8 @@ proc lowestExprAux(p: var SqlParser, v: var SqlNode, limit: int): int = node.add(v2) v = node opPred = getPrecedence(p) - -proc parseExpr(p: var SqlParser): SqlNode = + +proc parseExpr(p: var SqlParser): SqlNode = discard lowestExprAux(p, result, - 1) proc parseTableName(p: var SqlParser): SqlNode = @@ -765,7 +789,7 @@ proc parseColumnReference(p: var SqlParser): SqlNode = result.add(parseTableName(p)) eat(p, tkParRi) -proc parseCheck(p: var SqlParser): SqlNode = +proc parseCheck(p: var SqlParser): SqlNode = getTok(p) result = newNode(nkCheck) result.add(parseExpr(p)) @@ -776,9 +800,19 @@ proc parseConstraint(p: var SqlParser): SqlNode = expectIdent(p) result.add(newNode(nkIdent, p.tok.literal)) getTok(p) - eat(p, "check") + optKeyw(p, "check") result.add(parseExpr(p)) +proc parseParIdentList(p: var SqlParser, father: SqlNode) = + eat(p, tkParLe) + while true: + expectIdent(p) + father.add(newNode(nkIdent, p.tok.literal)) + getTok(p) + if p.tok.kind != tkComma: break + getTok(p) + eat(p, tkParRi) + proc parseColumnConstraints(p: var SqlParser, result: SqlNode) = while true: if isKeyw(p, "default"): @@ -795,6 +829,9 @@ proc parseColumnConstraints(p: var SqlParser, result: SqlNode) = getTok(p) eat(p, "null") result.add(newNode(nkNotNull)) + elif isKeyw(p, "null"): + getTok(p) + result.add(newNode(nkNull)) elif isKeyw(p, "identity"): getTok(p) result.add(newNode(nkIdentity)) @@ -807,6 +844,7 @@ proc parseColumnConstraints(p: var SqlParser, result: SqlNode) = elif isKeyw(p, "constraint"): result.add(parseConstraint(p)) elif isKeyw(p, "unique"): + getTok(p) result.add(newNode(nkUnique)) else: break @@ -817,9 +855,9 @@ proc parseColumnDef(p: var SqlParser): SqlNode = result.add(newNode(nkIdent, p.tok.literal)) getTok(p) result.add(parseDataType(p)) - parseColumnConstraints(p, result) + parseColumnConstraints(p, result) -proc parseIfNotExists(p: var SqlParser, k: SqlNodeKind): SqlNode = +proc parseIfNotExists(p: var SqlParser, k: SqlNodeKind): SqlNode = getTok(p) if isKeyw(p, "if"): getTok(p) @@ -829,16 +867,6 @@ proc parseIfNotExists(p: var SqlParser, k: SqlNodeKind): SqlNode = else: result = newNode(k) -proc parseParIdentList(p: var SqlParser, father: SqlNode) = - eat(p, tkParLe) - while true: - expectIdent(p) - father.add(newNode(nkIdent, p.tok.literal)) - getTok(p) - if p.tok.kind != tkComma: break - getTok(p) - eat(p, tkParRi) - proc parseTableConstraint(p: var SqlParser): SqlNode = if isKeyw(p, "primary"): getTok(p) @@ -866,21 +894,35 @@ proc parseTableConstraint(p: var SqlParser): SqlNode = else: sqlError(p, "column definition expected") +proc parseUnique(p: var SqlParser): SqlNode = + result = parseExpr(p) + if result.kind == nkCall: result.kind = nkUnique + proc parseTableDef(p: var SqlParser): SqlNode = result = parseIfNotExists(p, nkCreateTable) expectIdent(p) result.add(newNode(nkIdent, p.tok.literal)) getTok(p) if p.tok.kind == tkParLe: - while true: - getTok(p) - if p.tok.kind == tkIdentifier or p.tok.kind == tkQuotedIdentifier: + getTok(p) + while p.tok.kind != tkParRi: + if isKeyw(p, "constraint"): + result.add parseConstraint(p) + elif isKeyw(p, "primary") or isKeyw(p, "foreign"): + result.add parseTableConstraint(p) + elif isKeyw(p, "unique"): + result.add parseUnique(p) + elif p.tok.kind == tkIdentifier or p.tok.kind == tkQuotedIdentifier: result.add(parseColumnDef(p)) else: result.add(parseTableConstraint(p)) if p.tok.kind != tkComma: break + getTok(p) eat(p, tkParRi) - + # skip additional crap after 'create table (...) crap;' + while p.tok.kind notin {tkSemicolon, tkEof}: + getTok(p) + proc parseTypeDef(p: var SqlParser): SqlNode = result = parseIfNotExists(p, nkCreateType) expectIdent(p) @@ -894,6 +936,19 @@ proc parseWhere(p: var SqlParser): SqlNode = result = newNode(nkWhere) result.add(parseExpr(p)) +proc parseFromItem(p: var SqlParser): SqlNode = + result = newNode(nkFromItemPair) + if p.tok.kind == tkParLe: + getTok(p) + var select = parseSelect(p) + result.add(select) + eat(p, tkParRi) + else: + result.add(parseExpr(p)) + if isKeyw(p, "as"): + getTok(p) + result.add(parseExpr(p)) + proc parseIndexDef(p: var SqlParser): SqlNode = result = parseIfNotExists(p, nkCreateIndex) if isKeyw(p, "primary"): @@ -929,8 +984,9 @@ proc parseInsert(p: var SqlParser): SqlNode = if p.tok.kind == tkParLe: var n = newNode(nkColumnList) parseParIdentList(p, n) + result.add n else: - result.add(nil) + result.add(newNode(nkNone)) if isKeyw(p, "default"): getTok(p) eat(p, "values") @@ -965,17 +1021,19 @@ proc parseUpdate(p: var SqlParser): SqlNode = if isKeyw(p, "where"): result.add(parseWhere(p)) else: - result.add(nil) - + result.add(newNode(nkNone)) + proc parseDelete(p: var SqlParser): SqlNode = getTok(p) + if isOpr(p, "*"): + getTok(p) result = newNode(nkDelete) eat(p, "from") result.add(primary(p)) if isKeyw(p, "where"): result.add(parseWhere(p)) else: - result.add(nil) + result.add(newNode(nkNone)) proc parseSelect(p: var SqlParser): SqlNode = getTok(p) @@ -991,7 +1049,12 @@ proc parseSelect(p: var SqlParser): SqlNode = a.add(newNode(nkIdent, "*")) getTok(p) else: - a.add(parseExpr(p)) + var pair = newNode(nkSelectPair) + pair.add(parseExpr(p)) + a.add(pair) + if isKeyw(p, "as"): + getTok(p) + pair.add(parseExpr(p)) if p.tok.kind != tkComma: break getTok(p) result.add(a) @@ -999,7 +1062,7 @@ proc parseSelect(p: var SqlParser): SqlNode = var f = newNode(nkFrom) while true: getTok(p) - f.add(parseExpr(p)) + f.add(parseFromItem(p)) if p.tok.kind != tkComma: break result.add(f) if isKeyw(p, "where"): @@ -1013,29 +1076,14 @@ proc parseSelect(p: var SqlParser): SqlNode = if p.tok.kind != tkComma: break getTok(p) result.add(g) - if isKeyw(p, "having"): - var h = newNode(nkHaving) - while true: - getTok(p) - h.add(parseExpr(p)) - if p.tok.kind != tkComma: break - result.add(h) - if isKeyw(p, "union"): - result.add(newNode(nkUnion)) - getTok(p) - elif isKeyw(p, "intersect"): - result.add(newNode(nkIntersect)) - getTok(p) - elif isKeyw(p, "except"): - result.add(newNode(nkExcept)) - getTok(p) if isKeyw(p, "order"): getTok(p) eat(p, "by") var n = newNode(nkOrder) while true: var e = parseExpr(p) - if isKeyw(p, "asc"): getTok(p) # is default + if isKeyw(p, "asc"): + getTok(p) # is default elif isKeyw(p, "desc"): getTok(p) var x = newNode(nkDesc) @@ -1045,8 +1093,47 @@ proc parseSelect(p: var SqlParser): SqlNode = if p.tok.kind != tkComma: break getTok(p) result.add(n) + if isKeyw(p, "having"): + var h = newNode(nkHaving) + while true: + getTok(p) + h.add(parseExpr(p)) + if p.tok.kind != tkComma: break + result.add(h) + if isKeyw(p, "union"): + result.add(newNode(nkUnion)) + getTok(p) + elif isKeyw(p, "intersect"): + result.add(newNode(nkIntersect)) + getTok(p) + elif isKeyw(p, "except"): + result.add(newNode(nkExcept)) + getTok(p) + if isKeyw(p, "join") or isKeyw(p, "inner") or isKeyw(p, "outer") or isKeyw(p, "cross"): + var join = newNode(nkJoin) + result.add(join) + if isKeyw(p, "join"): + join.add(newNode(nkIdent, "")) + getTok(p) + else: + join.add(newNode(nkIdent, p.tok.literal.toLowerAscii())) + getTok(p) + eat(p, "join") + join.add(parseFromItem(p)) + eat(p, "on") + join.add(parseExpr(p)) + if isKeyw(p, "limit"): + getTok(p) + var l = newNode(nkLimit) + l.add(parseExpr(p)) + result.add(l) + if isKeyw(p, "offset"): + getTok(p) + var o = newNode(nkOffset) + o.add(parseExpr(p)) + result.add(o) -proc parseStmt(p: var SqlParser): SqlNode = +proc parseStmt(p: var SqlParser; parent: SqlNode) = if isKeyw(p, "create"): getTok(p) optKeyw(p, "cached") @@ -1058,71 +1145,108 @@ proc parseStmt(p: var SqlParser): SqlNode = optKeyw(p, "unique") optKeyw(p, "hash") if isKeyw(p, "table"): - result = parseTableDef(p) + parent.add parseTableDef(p) elif isKeyw(p, "type"): - result = parseTypeDef(p) + parent.add parseTypeDef(p) elif isKeyw(p, "index"): - result = parseIndexDef(p) + parent.add parseIndexDef(p) else: sqlError(p, "TABLE expected") elif isKeyw(p, "insert"): - result = parseInsert(p) + parent.add parseInsert(p) elif isKeyw(p, "update"): - result = parseUpdate(p) + parent.add parseUpdate(p) elif isKeyw(p, "delete"): - result = parseDelete(p) + parent.add parseDelete(p) elif isKeyw(p, "select"): - result = parseSelect(p) + parent.add parseSelect(p) + elif isKeyw(p, "begin"): + getTok(p) else: - sqlError(p, "CREATE expected") + sqlError(p, "SELECT, CREATE, UPDATE or DELETE expected") -proc open(p: var SqlParser, input: Stream, filename: string) = - ## opens the parser `p` and assigns the input stream `input` to it. - ## `filename` is only used for error messages. - open(SqlLexer(p), input, filename) - p.tok.kind = tkInvalid - p.tok.literal = "" - getTok(p) - proc parse(p: var SqlParser): SqlNode = ## parses the content of `p`'s input stream and returns the SQL AST. - ## Syntax errors raise an `EInvalidSql` exception. + ## Syntax errors raise an `SqlParseError` exception. result = newNode(nkStmtList) while p.tok.kind != tkEof: - var s = parseStmt(p) + parseStmt(p, result) + if p.tok.kind == tkEof: + break eat(p, tkSemicolon) - result.add(s) - if result.len == 1: - result = result.sons[0] - + proc close(p: var SqlParser) = ## closes the parser `p`. The associated input stream is closed too. close(SqlLexer(p)) -proc parseSQL*(input: Stream, filename: string): SqlNode = - ## parses the SQL from `input` into an AST and returns the AST. - ## `filename` is only used for error messages. - ## Syntax errors raise an `EInvalidSql` exception. - var p: SqlParser - open(p, input, filename) - try: - result = parse(p) - finally: - close(p) +type + SqlWriter = object + indent: int + upperCase: bool + buffer: string + +proc add(s: var SqlWriter, thing: char) = + s.buffer.add(thing) -proc ra(n: SqlNode, s: var string, indent: int) +proc prepareAdd(s: var SqlWriter) {.inline.} = + if s.buffer.len > 0 and s.buffer[^1] notin {' ', '\L', '(', '.'}: + s.buffer.add(" ") -proc rs(n: SqlNode, s: var string, indent: int, - prefix = "(", suffix = ")", - sep = ", ") = +proc add(s: var SqlWriter, thing: string) = + s.prepareAdd + s.buffer.add(thing) + +proc addKeyw(s: var SqlWriter, thing: string) = + var keyw = thing + if s.upperCase: + keyw = keyw.toUpperAscii() + s.add(keyw) + +proc addIden(s: var SqlWriter, thing: string) = + var iden = thing + if iden.toLowerAscii() in reservedKeywords: + iden = '"' & iden & '"' + s.add(iden) + +proc ra(n: SqlNode, s: var SqlWriter) {.gcsafe.} + +proc rs(n: SqlNode, s: var SqlWriter, prefix = "(", suffix = ")", sep = ", ") = if n.len > 0: s.add(prefix) for i in 0 .. n.len-1: if i > 0: s.add(sep) - ra(n.sons[i], s, indent) + ra(n.sons[i], s) s.add(suffix) -proc ra(n: SqlNode, s: var string, indent: int) = +proc addMulti(s: var SqlWriter, n: SqlNode, sep = ',') = + if n.len > 0: + for i in 0 .. n.len-1: + if i > 0: s.add(sep) + ra(n.sons[i], s) + +proc addMulti(s: var SqlWriter, n: SqlNode, sep = ',', prefix, suffix: char) = + if n.len > 0: + s.add(prefix) + for i in 0 .. n.len-1: + if i > 0: s.add(sep) + ra(n.sons[i], s) + s.add(suffix) + +proc quoted(s: string): string = + "\"" & replace(s, "\"", "\"\"") & "\"" + +func escape(result: var string; s: string) = + result.add('\'') + for c in items(s): + case c + of '\0'..'\31': + result.add("\\x") + result.add(toHex(ord(c), 2)) + of '\'': result.add("''") + else: result.add(c) + result.add('\'') + +proc ra(n: SqlNode, s: var SqlWriter) = if n == nil: return case n.kind of nkNone: discard @@ -1130,9 +1254,12 @@ proc ra(n: SqlNode, s: var string, indent: int) = if allCharsInSet(n.strVal, {'\33'..'\127'}): s.add(n.strVal) else: - s.add("\"" & replace(n.strVal, "\"", "\"\"") & "\"") + s.add(quoted(n.strVal)) + of nkQuotedIdent: + s.add(quoted(n.strVal)) of nkStringLit: - s.add(escape(n.strVal, "e'", "'")) + s.prepareAdd + s.buffer.escape(n.strVal) of nkBitStringLit: s.add("b'" & n.strVal & "'") of nkHexStringLit: @@ -1140,211 +1267,257 @@ proc ra(n: SqlNode, s: var string, indent: int) = of nkIntegerLit, nkNumericLit: s.add(n.strVal) of nkPrimaryKey: - s.add(" primary key") - rs(n, s, indent) + s.addKeyw("primary key") + rs(n, s) of nkForeignKey: - s.add(" foreign key") - rs(n, s, indent) + s.addKeyw("foreign key") + rs(n, s) of nkNotNull: - s.add(" not null") + s.addKeyw("not null") + of nkNull: + s.addKeyw("null") of nkDot: - ra(n.sons[0], s, indent) - s.add(".") - ra(n.sons[1], s, indent) + ra(n.sons[0], s) + s.add('.') + ra(n.sons[1], s) of nkDotDot: - ra(n.sons[0], s, indent) + ra(n.sons[0], s) s.add(". .") - ra(n.sons[1], s, indent) + ra(n.sons[1], s) of nkPrefix: - s.add('(') - ra(n.sons[0], s, indent) + ra(n.sons[0], s) s.add(' ') - ra(n.sons[1], s, indent) - s.add(')') + ra(n.sons[1], s) of nkInfix: - s.add('(') - ra(n.sons[1], s, indent) + ra(n.sons[1], s) s.add(' ') - ra(n.sons[0], s, indent) + ra(n.sons[0], s) s.add(' ') - ra(n.sons[2], s, indent) - s.add(')') + ra(n.sons[2], s) of nkCall, nkColumnReference: - ra(n.sons[0], s, indent) + ra(n.sons[0], s) s.add('(') for i in 1..n.len-1: - if i > 1: s.add(", ") - ra(n.sons[i], s, indent) + if i > 1: s.add(',') + ra(n.sons[i], s) + s.add(')') + of nkPrGroup: + s.add('(') + s.addMulti(n) s.add(')') of nkReferences: - s.add(" references ") - ra(n.sons[0], s, indent) + s.addKeyw("references") + ra(n.sons[0], s) of nkDefault: - s.add(" default ") - ra(n.sons[0], s, indent) + s.addKeyw("default") + ra(n.sons[0], s) of nkCheck: - s.add(" check ") - ra(n.sons[0], s, indent) + s.addKeyw("check") + ra(n.sons[0], s) of nkConstraint: - s.add(" constraint ") - ra(n.sons[0], s, indent) - s.add(" check ") - ra(n.sons[1], s, indent) + s.addKeyw("constraint") + ra(n.sons[0], s) + s.addKeyw("check") + ra(n.sons[1], s) of nkUnique: - s.add(" unique") - rs(n, s, indent) + s.addKeyw("unique") + rs(n, s) of nkIdentity: - s.add(" identity") + s.addKeyw("identity") of nkColumnDef: - s.add("\n ") - rs(n, s, indent, "", "", " ") + rs(n, s, "", "", " ") of nkStmtList: for i in 0..n.len-1: - ra(n.sons[i], s, indent) - s.add("\n") + ra(n.sons[i], s) + s.add(';') of nkInsert: assert n.len == 3 - s.add("insert into ") - ra(n.sons[0], s, indent) - ra(n.sons[1], s, indent) - if n.sons[2].kind == nkDefault: - s.add("default values") + s.addKeyw("insert into") + ra(n.sons[0], s) + s.add(' ') + ra(n.sons[1], s) + if n.sons[2].kind == nkDefault: + s.addKeyw("default values") else: - s.add("\nvalues ") - ra(n.sons[2], s, indent) - s.add(';') - of nkUpdate: - s.add("update ") - ra(n.sons[0], s, indent) - s.add(" set ") + ra(n.sons[2], s) + of nkUpdate: + s.addKeyw("update") + ra(n.sons[0], s) + s.addKeyw("set") var L = n.len for i in 1 .. L-2: if i > 1: s.add(", ") var it = n.sons[i] assert it.kind == nkAsgn - ra(it, s, indent) - ra(n.sons[L-1], s, indent) - s.add(';') - of nkDelete: - s.add("delete from ") - ra(n.sons[0], s, indent) - ra(n.sons[1], s, indent) - s.add(';') + ra(it, s) + ra(n.sons[L-1], s) + of nkDelete: + s.addKeyw("delete from") + ra(n.sons[0], s) + ra(n.sons[1], s) of nkSelect, nkSelectDistinct: - s.add("select ") + s.addKeyw("select") if n.kind == nkSelectDistinct: - s.add("distinct ") - rs(n.sons[0], s, indent, "", "", ", ") - for i in 1 .. n.len-1: ra(n.sons[i], s, indent) - s.add(';') - of nkSelectColumns: - assert(false) + s.addKeyw("distinct") + for i in 0 ..< n.len: + ra(n.sons[i], s) + of nkSelectColumns: + for i, column in n.sons: + if i > 0: s.add(',') + ra(column, s) + of nkSelectPair: + ra(n.sons[0], s) + if n.sons.len == 2: + s.addKeyw("as") + ra(n.sons[1], s) + of nkFromItemPair: + if n.sons[0].kind in {nkIdent, nkQuotedIdent}: + ra(n.sons[0], s) + else: + assert n.sons[0].kind == nkSelect + s.add('(') + ra(n.sons[0], s) + s.add(')') + if n.sons.len == 2: + s.addKeyw("as") + ra(n.sons[1], s) of nkAsgn: - ra(n.sons[0], s, indent) + ra(n.sons[0], s) s.add(" = ") - ra(n.sons[1], s, indent) + ra(n.sons[1], s) of nkFrom: - s.add("\nfrom ") - rs(n, s, indent, "", "", ", ") + s.addKeyw("from") + s.addMulti(n) of nkGroup: - s.add("\ngroup by") - rs(n, s, indent, "", "", ", ") + s.addKeyw("group by") + s.addMulti(n) + of nkLimit: + s.addKeyw("limit") + s.addMulti(n) + of nkOffset: + s.addKeyw("offset") + s.addMulti(n) of nkHaving: - s.add("\nhaving") - rs(n, s, indent, "", "", ", ") + s.addKeyw("having") + s.addMulti(n) of nkOrder: - s.add("\norder by ") - rs(n, s, indent, "", "", ", ") + s.addKeyw("order by") + s.addMulti(n) + of nkJoin: + var joinType = n.sons[0].strVal + if joinType == "": + joinType = "join" + else: + joinType &= " " & "join" + s.addKeyw(joinType) + ra(n.sons[1], s) + s.addKeyw("on") + ra(n.sons[2], s) of nkDesc: - ra(n.sons[0], s, indent) - s.add(" desc") + ra(n.sons[0], s) + s.addKeyw("desc") of nkUnion: - s.add(" union") + s.addKeyw("union") of nkIntersect: - s.add(" intersect") + s.addKeyw("intersect") of nkExcept: - s.add(" except") + s.addKeyw("except") of nkColumnList: - rs(n, s, indent) + rs(n, s) of nkValueList: - s.add("values ") - rs(n, s, indent) + s.addKeyw("values") + rs(n, s) of nkWhere: - s.add("\nwhere ") - ra(n.sons[0], s, indent) + s.addKeyw("where") + ra(n.sons[0], s) of nkCreateTable, nkCreateTableIfNotExists: - s.add("create table ") + s.addKeyw("create table") if n.kind == nkCreateTableIfNotExists: - s.add("if not exists ") - ra(n.sons[0], s, indent) + s.addKeyw("if not exists") + ra(n.sons[0], s) s.add('(') for i in 1..n.len-1: - if i > 1: s.add(", ") - ra(n.sons[i], s, indent) + if i > 1: s.add(',') + ra(n.sons[i], s) s.add(");") of nkCreateType, nkCreateTypeIfNotExists: - s.add("create type ") + s.addKeyw("create type") if n.kind == nkCreateTypeIfNotExists: - s.add("if not exists ") - ra(n.sons[0], s, indent) - s.add(" as ") - ra(n.sons[1], s, indent) - s.add(';') + s.addKeyw("if not exists") + ra(n.sons[0], s) + s.addKeyw("as") + ra(n.sons[1], s) of nkCreateIndex, nkCreateIndexIfNotExists: - s.add("create index ") + s.addKeyw("create index") if n.kind == nkCreateIndexIfNotExists: - s.add("if not exists ") - ra(n.sons[0], s, indent) - s.add(" on ") - ra(n.sons[1], s, indent) + s.addKeyw("if not exists") + ra(n.sons[0], s) + s.addKeyw("on") + ra(n.sons[1], s) s.add('(') for i in 2..n.len-1: if i > 2: s.add(", ") - ra(n.sons[i], s, indent) + ra(n.sons[i], s) s.add(");") of nkEnumDef: - s.add("enum ") - rs(n, s, indent) + s.addKeyw("enum") + rs(n, s) -# What I want: -# -#select(columns = [T1.all, T2.name], -# fromm = [T1, T2], -# where = T1.name ==. T2.name, -# orderby = [name]): -# -#for row in dbQuery(db, """select x, y, z -# from a, b -# where a.name = b.name"""): -# - -#select x, y, z: -# fromm: Table1, Table2 -# where: x.name == y.name -#db.select(fromm = [t1, t2], where = t1.name == t2.name): -#for x, y, z in db.select(fromm = a, b where = a.name == b.name): -# writeln x, y, z - -proc renderSQL*(n: SqlNode): string = +proc renderSql*(n: SqlNode, upperCase = false): string = ## Converts an SQL abstract syntax tree to its string representation. - result = "" - ra(n, result, 0) - -when isMainModule: - echo(renderSQL(parseSQL(newStringStream(""" - CREATE TYPE happiness AS ENUM ('happy', 'very happy', 'ecstatic'); - CREATE TABLE holidays ( - num_weeks int, - happiness happiness - ); - CREATE INDEX table1_attr1 ON table1(attr1); - - SELECT * FROM myTab WHERE col1 = 'happy'; - """), "stdin"))) - -# CREATE TYPE happiness AS ENUM ('happy', 'very happy', 'ecstatic'); -# CREATE TABLE holidays ( -# num_weeks int, -# happiness happiness -# ); -# CREATE INDEX table1_attr1 ON table1(attr1) + var s: SqlWriter + s.buffer = "" + s.upperCase = upperCase + ra(n, s) + return s.buffer + +proc `$`*(n: SqlNode): string = + ## an alias for `renderSql`. + renderSql(n) + +proc treeReprAux(s: SqlNode, level: int, result: var string) = + result.add('\n') + for i in 0 ..< level: result.add(" ") + + result.add($s.kind) + if s.kind in LiteralNodes: + result.add(' ') + result.add(s.strVal) + else: + for son in s.sons: + treeReprAux(son, level + 1, result) + +proc treeRepr*(s: SqlNode): string = + result = newStringOfCap(128) + treeReprAux(s, 0, result) + +import std/streams + +proc open(L: var SqlLexer, input: Stream, filename: string) = + lexbase.open(L, input) + L.filename = filename + +proc open(p: var SqlParser, input: Stream, filename: string) = + ## opens the parser `p` and assigns the input stream `input` to it. + ## `filename` is only used for error messages. + open(SqlLexer(p), input, filename) + p.tok.kind = tkInvalid + p.tok.literal = "" + getTok(p) + +proc parseSql*(input: Stream, filename: string): SqlNode = + ## parses the SQL from `input` into an AST and returns the AST. + ## `filename` is only used for error messages. + ## Syntax errors raise an `SqlParseError` exception. + var p: SqlParser + open(p, input, filename) + try: + result = parse(p) + finally: + close(p) + +proc parseSql*(input: string, filename = ""): SqlNode = + ## parses the SQL from `input` into an AST and returns the AST. + ## `filename` is only used for error messages. + ## Syntax errors raise an `SqlParseError` exception. + parseSql(newStringStream(input), "") diff --git a/lib/pure/parseurl.nim b/lib/pure/parseurl.nim deleted file mode 100644 index 56bf10768..000000000 --- a/lib/pure/parseurl.nim +++ /dev/null @@ -1,114 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2015 Dominik Picheta -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## **Warnings:** This module is deprecated since version 0.10.2. -## Use the `uri <uri.html>`_ module instead. -## -## Parses & constructs URLs. - -{.deprecated.} - -import strutils - -type - Url* = tuple[ ## represents a *Uniform Resource Locator* (URL) - ## any optional component is "" if it does not exist - scheme, username, password, - hostname, port, path, query, anchor: string] - -{.deprecated: [TUrl: Url].} - -proc parseUrl*(url: string): Url {.deprecated.} = - var i = 0 - - var scheme, username, password: string = "" - var hostname, port, path, query, anchor: string = "" - - var temp = "" - - if url[i] != '/': # url isn't a relative path - while true: - # Scheme - if url[i] == ':': - if url[i+1] == '/' and url[i+2] == '/': - scheme = temp - temp.setLen(0) - inc(i, 3) # Skip the // - # Authority(username, password) - if url[i] == '@': - username = temp - let colon = username.find(':') - if colon >= 0: - password = username.substr(colon+1) - username = username.substr(0, colon-1) - temp.setLen(0) - inc(i) #Skip the @ - # hostname(subdomain, domain, port) - if url[i] == '/' or url[i] == '\0': - hostname = temp - let colon = hostname.find(':') - if colon >= 0: - port = hostname.substr(colon+1) - hostname = hostname.substr(0, colon-1) - - temp.setLen(0) - break - - temp.add(url[i]) - inc(i) - - if url[i] == '/': inc(i) # Skip the '/' - # Path - while true: - if url[i] == '?': - path = temp - temp.setLen(0) - if url[i] == '#': - if temp[0] == '?': - query = temp - else: - path = temp - temp.setLen(0) - - if url[i] == '\0': - if temp[0] == '?': - query = temp - elif temp[0] == '#': - anchor = temp - else: - path = temp - break - - temp.add(url[i]) - inc(i) - - return (scheme, username, password, hostname, port, path, query, anchor) - -proc `$`*(u: Url): string {.deprecated.} = - ## turns the URL `u` into its string representation. - result = "" - if u.scheme.len > 0: - result.add(u.scheme) - result.add("://") - if u.username.len > 0: - result.add(u.username) - if u.password.len > 0: - result.add(":") - result.add(u.password) - result.add("@") - result.add(u.hostname) - if u.port.len > 0: - result.add(":") - result.add(u.port) - if u.path.len > 0: - result.add("/") - result.add(u.path) - result.add(u.query) - result.add(u.anchor) - diff --git a/lib/pure/parseutils.nim b/lib/pure/parseutils.nim index eb649a878..2ca255fa0 100644 --- a/lib/pure/parseutils.nim +++ b/lib/pure/parseutils.nim @@ -7,15 +7,50 @@ # distribution, for details about the copyright. # -## This module contains helpers for parsing tokens, numbers, identifiers, etc. +## This module contains helpers for parsing tokens, numbers, integers, floats, +## identifiers, etc. +## +## To unpack raw bytes look at the `streams <streams.html>`_ module. +## +## ```nim test +## let logs = @["2019-01-10: OK_", "2019-01-11: FAIL_", "2019-01: aaaa"] +## var outp: seq[string] +## +## for log in logs: +## var res: string +## if parseUntil(log, res, ':') == 10: # YYYY-MM-DD == 10 +## outp.add(res & " - " & captureBetween(log, ' ', '_')) +## doAssert outp == @["2019-01-10 - OK", "2019-01-11 - FAIL"] +## ``` +## +## ```nim test +## from std/strutils import Digits, parseInt +## +## let +## input1 = "2019 school start" +## input2 = "3 years back" +## startYear = input1[0 .. skipWhile(input1, Digits)-1] # 2019 +## yearsBack = input2[0 .. skipWhile(input2, Digits)-1] # 3 +## examYear = parseInt(startYear) + parseInt(yearsBack) +## doAssert "Examination is in " & $examYear == "Examination is in 2022" +## ``` +## +## **See also:** +## * `strutils module<strutils.html>`_ for combined and identical parsing proc's +## * `json module<json.html>`_ for a JSON parser +## * `parsecfg module<parsecfg.html>`_ for a configuration file parser +## * `parsecsv module<parsecsv.html>`_ for a simple CSV (comma separated value) parser +## * `parseopt module<parseopt.html>`_ for a command line parser +## * `parsexml module<parsexml.html>`_ for a XML / HTML parser +## * `other parsers<lib.html#pure-libraries-parsers>`_ for other parsers -{.deadCodeElim: on.} - -{.push debugger:off .} # the user does not want to trace a part +{.push debugger: off.} # the user does not want to trace a part # of the standard library! include "system/inclrtl" +template toOa(s: string): openArray[char] = openArray[char](s) + const Whitespace = {' ', '\t', '\v', '\r', '\l', '\f'} IdentChars = {'a'..'z', 'A'..'Z', '0'..'9', '_'} @@ -25,314 +60,1028 @@ const proc toLower(c: char): char {.inline.} = result = if c in {'A'..'Z'}: chr(ord(c)-ord('A')+ord('a')) else: c -proc parseHex*(s: string, number: var int, start = 0): int {. - rtl, extern: "npuParseHex", noSideEffect.} = - ## Parses a hexadecimal number and stores its value in ``number``. +proc parseBin*[T: SomeInteger](s: openArray[char], number: var T, maxLen = 0): int {.noSideEffect.} = + ## Parses a binary number and stores its value in ``number``. ## - ## Returns the number of the parsed characters or 0 in case of an error. This - ## proc is sensitive to the already existing value of ``number`` and will - ## likely not do what you want unless you make sure ``number`` is zero. You - ## can use this feature to *chain* calls, though the result int will quickly - ## overflow. Example: + ## Returns the number of the parsed characters or 0 in case of an error. + ## If error, the value of ``number`` is not changed. ## - ## .. code-block:: nim - ## var value = 0 - ## discard parseHex("0x38", value) - ## assert value == 56 - ## discard parseHex("0x34", value) - ## assert value == 56 * 256 + 52 - ## value = -1 - ## discard parseHex("0x38", value) - ## assert value == -200 + ## If ``maxLen == 0``, the parsing continues until the first non-bin character + ## or to the end of the string. Otherwise, no more than ``maxLen`` characters + ## are parsed starting from the ``start`` position. ## - var i = start + ## It does not check for overflow. If the value represented by the string is + ## too big to fit into ``number``, only the value of last fitting characters + ## will be stored in ``number`` without producing an error. + runnableExamples: + var num: int + doAssert parseBin("0100_1110_0110_1001_1110_1101", num) == 29 + doAssert num == 5138925 + doAssert parseBin("3", num) == 0 + var num8: int8 + doAssert parseBin("0b_0100_1110_0110_1001_1110_1101", num8) == 32 + doAssert num8 == 0b1110_1101'i8 + doAssert parseBin("0b_0100_1110_0110_1001_1110_1101", num8, 3, 9) == 9 + doAssert num8 == 0b0100_1110'i8 + var num8u: uint8 + doAssert parseBin("0b_0100_1110_0110_1001_1110_1101", num8u) == 32 + doAssert num8u == 237 + var num64: int64 + doAssert parseBin("0100111001101001111011010100111001101001", num64) == 40 + doAssert num64 == 336784608873 + var i = 0 + var output = T(0) var foundDigit = false - if s[i] == '0' and (s[i+1] == 'x' or s[i+1] == 'X'): inc(i, 2) - elif s[i] == '#': inc(i) - while true: + let last = min(s.len, if maxLen == 0: s.len else: i + maxLen) + if i + 1 < last and s[i] == '0' and (s[i+1] in {'b', 'B'}): inc(i, 2) + while i < last: case s[i] of '_': discard - of '0'..'9': - number = number shl 4 or (ord(s[i]) - ord('0')) - foundDigit = true - of 'a'..'f': - number = number shl 4 or (ord(s[i]) - ord('a') + 10) - foundDigit = true - of 'A'..'F': - number = number shl 4 or (ord(s[i]) - ord('A') + 10) + of '0'..'1': + output = output shl 1 or T(ord(s[i]) - ord('0')) foundDigit = true else: break inc(i) - if foundDigit: result = i-start + if foundDigit: + number = output + result = i -proc parseOct*(s: string, number: var int, start = 0): int {. - rtl, extern: "npuParseOct", noSideEffect.} = - ## parses an octal number and stores its value in ``number``. Returns - ## the number of the parsed characters or 0 in case of an error. - var i = start +proc parseOct*[T: SomeInteger](s: openArray[char], number: var T, maxLen = 0): int {.noSideEffect.} = + ## Parses an octal number and stores its value in ``number``. + ## + ## Returns the number of the parsed characters or 0 in case of an error. + ## If error, the value of ``number`` is not changed. + ## + ## If ``maxLen == 0``, the parsing continues until the first non-oct character + ## or to the end of the string. Otherwise, no more than ``maxLen`` characters + ## are parsed starting from the ``start`` position. + ## + ## It does not check for overflow. If the value represented by the string is + ## too big to fit into ``number``, only the value of last fitting characters + ## will be stored in ``number`` without producing an error. + runnableExamples: + var num: int + doAssert parseOct("0o23464755", num) == 10 + doAssert num == 5138925 + doAssert parseOct("8", num) == 0 + var num8: int8 + doAssert parseOct("0o_1464_755", num8) == 11 + doAssert num8 == -19 + doAssert parseOct("0o_1464_755", num8, 3, 3) == 3 + doAssert num8 == 102 + var num8u: uint8 + doAssert parseOct("1464755", num8u) == 7 + doAssert num8u == 237 + var num64: int64 + doAssert parseOct("2346475523464755", num64) == 16 + doAssert num64 == 86216859871725 + var i = 0 + var output = T(0) var foundDigit = false - if s[i] == '0' and (s[i+1] == 'o' or s[i+1] == 'O'): inc(i, 2) - while true: + let last = min(s.len, if maxLen == 0: s.len else: i + maxLen) + if i + 1 < last and s[i] == '0' and (s[i+1] in {'o', 'O'}): inc(i, 2) + while i < last: case s[i] of '_': discard of '0'..'7': - number = number shl 3 or (ord(s[i]) - ord('0')) + output = output shl 3 or T(ord(s[i]) - ord('0')) foundDigit = true else: break inc(i) - if foundDigit: result = i-start + if foundDigit: + number = output + result = i -proc parseIdent*(s: string, ident: var string, start = 0): int = - ## parses an identifier and stores it in ``ident``. Returns +proc parseHex*[T: SomeInteger](s: openArray[char], number: var T, maxLen = 0): int {.noSideEffect.} = + ## Parses a hexadecimal number and stores its value in ``number``. + ## + ## Returns the number of the parsed characters or 0 in case of an error. + ## If error, the value of ``number`` is not changed. + ## + ## If ``maxLen == 0``, the parsing continues until the first non-hex character + ## or to the end of the string. Otherwise, no more than ``maxLen`` characters + ## are parsed starting from the ``start`` position. + ## + ## It does not check for overflow. If the value represented by the string is + ## too big to fit into ``number``, only the value of last fitting characters + ## will be stored in ``number`` without producing an error. + runnableExamples: + var num: int + doAssert parseHex("4E_69_ED", num) == 8 + doAssert num == 5138925 + doAssert parseHex("X", num) == 0 + doAssert parseHex("#ABC", num) == 4 + var num8: int8 + doAssert parseHex("0x_4E_69_ED", num8) == 11 + doAssert num8 == 0xED'i8 + doAssert parseHex("0x_4E_69_ED", num8, 3, 2) == 2 + doAssert num8 == 0x4E'i8 + var num8u: uint8 + doAssert parseHex("0x_4E_69_ED", num8u) == 11 + doAssert num8u == 237 + var num64: int64 + doAssert parseHex("4E69ED4E69ED", num64) == 12 + doAssert num64 == 86216859871725 + var i = 0 + var output = T(0) + var foundDigit = false + let last = min(s.len, if maxLen == 0: s.len else: i + maxLen) + if i + 1 < last and s[i] == '0' and (s[i+1] in {'x', 'X'}): inc(i, 2) + elif i < last and s[i] == '#': inc(i) + while i < last: + case s[i] + of '_': discard + of '0'..'9': + output = output shl 4 or T(ord(s[i]) - ord('0')) + foundDigit = true + of 'a'..'f': + output = output shl 4 or T(ord(s[i]) - ord('a') + 10) + foundDigit = true + of 'A'..'F': + output = output shl 4 or T(ord(s[i]) - ord('A') + 10) + foundDigit = true + else: break + inc(i) + if foundDigit: + number = output + result = i + +proc parseIdent*(s: openArray[char], ident: var string): int = + ## Parses an identifier and stores it in ``ident``. Returns ## the number of the parsed characters or 0 in case of an error. - var i = start - if s[i] in IdentStartChars: + ## If error, the value of `ident` is not changed. + runnableExamples: + var res: string + doAssert parseIdent("Hello World", res, 0) == 5 + doAssert res == "Hello" + doAssert parseIdent("Hello World", res, 1) == 4 + doAssert res == "ello" + doAssert parseIdent("Hello World", res, 6) == 5 + doAssert res == "World" + var i = 0 + if i < s.len and s[i] in IdentStartChars: inc(i) - while s[i] in IdentChars: inc(i) - ident = substr(s, start, i-1) - result = i-start + while i < s.len and s[i] in IdentChars: inc(i) + ident = substr(s.toOpenArray(0, i-1)) + result = i -proc parseIdent*(s: string, start = 0): string = - ## parses an identifier and stores it in ``ident``. - ## Returns the parsed identifier or an empty string in case of an error. +proc parseIdent*(s: openArray[char]): string = + ## Parses an identifier and returns it or an empty string in + ## case of an error. + runnableExamples: + doAssert parseIdent("Hello World", 0) == "Hello" + doAssert parseIdent("Hello World", 1) == "ello" + doAssert parseIdent("Hello World", 5) == "" + doAssert parseIdent("Hello World", 6) == "World" result = "" - var i = start - - if s[i] in IdentStartChars: + var i = 0 + if i < s.len and s[i] in IdentStartChars: inc(i) - while s[i] in IdentChars: inc(i) - - result = substr(s, start, i-1) + while i < s.len and s[i] in IdentChars: inc(i) + result = substr(s.toOpenArray(0, i - 1)) -proc parseToken*(s: string, token: var string, validChars: set[char], - start = 0): int {.inline, deprecated.} = - ## parses a token and stores it in ``token``. Returns - ## the number of the parsed characters or 0 in case of an error. A token - ## consists of the characters in `validChars`. - ## - ## **Deprecated since version 0.8.12**: Use ``parseWhile`` instead. - var i = start - while s[i] in validChars: inc(i) - result = i-start - token = substr(s, start, i-1) +proc parseChar*(s: openArray[char], c: var char): int = + ## Parses a single character, stores it in `c` and returns 1. + ## In case of error (if start >= s.len) it returns 0 + ## and the value of `c` is unchanged. + runnableExamples: + var c: char + doAssert "nim".parseChar(c, 3) == 0 + doAssert c == '\0' + doAssert "nim".parseChar(c, 0) == 1 + doAssert c == 'n' + if s.len > 0: + c = s[0] + result = 1 -proc skipWhitespace*(s: string, start = 0): int {.inline.} = - ## skips the whitespace starting at ``s[start]``. Returns the number of +proc skipWhitespace*(s: openArray[char]): int {.inline.} = + ## Skips the whitespace starting at ``s[start]``. Returns the number of ## skipped characters. - while s[start+result] in Whitespace: inc(result) + runnableExamples: + doAssert skipWhitespace("Hello World", 0) == 0 + doAssert skipWhitespace(" Hello World", 0) == 1 + doAssert skipWhitespace("Hello World", 5) == 1 + doAssert skipWhitespace("Hello World", 5) == 2 + result = 0 + while result < s.len and s[result] in Whitespace: inc(result) -proc skip*(s, token: string, start = 0): int {.inline.} = - ## skips the `token` starting at ``s[start]``. Returns the length of `token` +proc skip*(s, token: openArray[char]): int {.inline.} = + ## Skips the `token` starting at ``s[start]``. Returns the length of `token` ## or 0 if there was no `token` at ``s[start]``. - while result < token.len and s[result+start] == token[result]: inc(result) + runnableExamples: + doAssert skip("2019-01-22", "2019", 0) == 4 + doAssert skip("2019-01-22", "19", 0) == 0 + doAssert skip("2019-01-22", "19", 2) == 2 + doAssert skip("CAPlow", "CAP", 0) == 3 + doAssert skip("CAPlow", "cap", 0) == 0 + result = 0 + while result < s.len and result < token.len and + s[result] == token[result]: + inc(result) if result != token.len: result = 0 - -proc skipIgnoreCase*(s, token: string, start = 0): int = - ## same as `skip` but case is ignored for token matching. - while result < token.len and - toLower(s[result+start]) == toLower(token[result]): inc(result) + +proc skipIgnoreCase*(s, token: openArray[char]): int = + ## Same as `skip` but case is ignored for token matching. + runnableExamples: + doAssert skipIgnoreCase("CAPlow", "CAP", 0) == 3 + doAssert skipIgnoreCase("CAPlow", "cap", 0) == 3 + result = 0 + while result < s.len and result < token.len and + toLower(s[result]) == toLower(token[result]): inc(result) if result != token.len: result = 0 - -proc skipUntil*(s: string, until: set[char], start = 0): int {.inline.} = + +proc skipUntil*(s: openArray[char], until: set[char]): int {.inline.} = ## Skips all characters until one char from the set `until` is found ## or the end is reached. ## Returns number of characters skipped. - while s[result+start] notin until and s[result+start] != '\0': inc(result) + runnableExamples: + doAssert skipUntil("Hello World", {'W', 'e'}, 0) == 1 + doAssert skipUntil("Hello World", {'W'}, 0) == 6 + doAssert skipUntil("Hello World", {'W', 'd'}, 0) == 6 + result = 0 + while result < s.len and s[result] notin until: inc(result) -proc skipUntil*(s: string, until: char, start = 0): int {.inline.} = +proc skipUntil*(s: openArray[char], until: char): int {.inline.} = ## Skips all characters until the char `until` is found ## or the end is reached. ## Returns number of characters skipped. - while s[result+start] != until and s[result+start] != '\0': inc(result) + runnableExamples: + doAssert skipUntil("Hello World", 'o', 0) == 4 + doAssert skipUntil("Hello World", 'o', 4) == 0 + doAssert skipUntil("Hello World", 'W', 0) == 6 + doAssert skipUntil("Hello World", 'w', 0) == 11 + result = 0 + while result < s.len and s[result] != until: inc(result) -proc skipWhile*(s: string, toSkip: set[char], start = 0): int {.inline.} = - ## Skips all characters while one char from the set `token` is found. +proc skipWhile*(s: openArray[char], toSkip: set[char]): int {.inline.} = + ## Skips all characters while one char from the set `toSkip` is found. ## Returns number of characters skipped. - while s[result+start] in toSkip and s[result+start] != '\0': inc(result) + runnableExamples: + doAssert skipWhile("Hello World", {'H', 'e'}) == 2 + doAssert skipWhile("Hello World", {'e'}) == 0 + doAssert skipWhile("Hello World", {'W', 'o', 'r'}, 6) == 3 + result = 0 + while result < s.len and s[result] in toSkip: inc(result) -proc parseUntil*(s: string, token: var string, until: set[char], - start = 0): int {.inline.} = - ## parses a token and stores it in ``token``. Returns +proc fastSubstr(s: openArray[char]; token: var string; length: int) = + token.setLen length + for i in 0 ..< length: token[i] = s[i] + +proc parseUntil*(s: openArray[char], token: var string, until: set[char]): int {.inline.} = + ## Parses a token and stores it in ``token``. Returns ## the number of the parsed characters or 0 in case of an error. A token - ## consists of the characters notin `until`. - var i = start + ## consists of the characters notin `until`. + runnableExamples: + var myToken: string + doAssert parseUntil("Hello World", myToken, {'W', 'o', 'r'}) == 4 + doAssert myToken == "Hell" + doAssert parseUntil("Hello World", myToken, {'W', 'r'}) == 6 + doAssert myToken == "Hello " + doAssert parseUntil("Hello World", myToken, {'W', 'r'}, 3) == 3 + doAssert myToken == "lo " + var i = 0 while i < s.len and s[i] notin until: inc(i) - result = i-start - token = substr(s, start, i-1) + result = i + fastSubstr(s, token, result) + #token = substr(s, start, i-1) -proc parseUntil*(s: string, token: var string, until: char, - start = 0): int {.inline.} = - ## parses a token and stores it in ``token``. Returns +proc parseUntil*(s: openArray[char], token: var string, until: char): int {.inline.} = + ## Parses a token and stores it in ``token``. Returns ## the number of the parsed characters or 0 in case of an error. A token ## consists of any character that is not the `until` character. - var i = start + runnableExamples: + var myToken: string + doAssert parseUntil("Hello World", myToken, 'W') == 6 + doAssert myToken == "Hello " + doAssert parseUntil("Hello World", myToken, 'o') == 4 + doAssert myToken == "Hell" + doAssert parseUntil("Hello World", myToken, 'o', 2) == 2 + doAssert myToken == "ll" + var i = 0 while i < s.len and s[i] != until: inc(i) - result = i-start - token = substr(s, start, i-1) + result = i + fastSubstr(s, token, result) + #token = substr(s, start, i-1) -proc parseWhile*(s: string, token: var string, validChars: set[char], - start = 0): int {.inline.} = - ## parses a token and stores it in ``token``. Returns +proc parseUntil*(s: openArray[char], token: var string, until: string): int {.inline.} = + ## Parses a token and stores it in ``token``. Returns + ## the number of the parsed characters or 0 in case of an error. A token + ## consists of any character that comes before the `until` token. + runnableExamples: + var myToken: string + doAssert parseUntil("Hello World", myToken, "Wor") == 6 + doAssert myToken == "Hello " + doAssert parseUntil("Hello World", myToken, "Wor", 2) == 4 + doAssert myToken == "llo " + when (NimMajor, NimMinor) <= (1, 0): + if until.len == 0: + token.setLen(0) + return 0 + var i = 0 + while i < s.len: + if until.len > 0 and s[i] == until[0]: + var u = 1 + while i+u < s.len and u < until.len and s[i+u] == until[u]: + inc u + if u >= until.len: break + inc(i) + result = i + fastSubstr(s, token, result) + #token = substr(s, start, i-1) + +proc parseWhile*(s: openArray[char], token: var string, validChars: set[char]): int {.inline.} = + ## Parses a token and stores it in ``token``. Returns ## the number of the parsed characters or 0 in case of an error. A token - ## consists of the characters in `validChars`. - var i = start - while s[i] in validChars: inc(i) - result = i-start - token = substr(s, start, i-1) + ## consists of the characters in `validChars`. + runnableExamples: + var myToken: string + doAssert parseWhile("Hello World", myToken, {'W', 'o', 'r'}, 0) == 0 + doAssert myToken.len() == 0 + doAssert parseWhile("Hello World", myToken, {'W', 'o', 'r'}, 6) == 3 + doAssert myToken == "Wor" + var i = 0 + while i < s.len and s[i] in validChars: inc(i) + result = i + fastSubstr(s, token, result) + #token = substr(s, start, i-1) -proc captureBetween*(s: string, first: char, second = '\0', start = 0): string = +proc captureBetween*(s: openArray[char], first: char, second = '\0'): string = ## Finds the first occurrence of ``first``, then returns everything from there - ## up to ``second``(if ``second`` is '\0', then ``first`` is used). - var i = skipUntil(s, first, start)+1+start + ## up to ``second`` (if ``second`` is '\0', then ``first`` is used). + runnableExamples: + doAssert captureBetween("Hello World", 'e') == "llo World" + doAssert captureBetween("Hello World", 'e', 'r') == "llo Wo" + doAssert captureBetween("Hello World".toOpenArray(6, "Hello World".high), 'l') == "d" + var i = skipUntil(s, first) + 1 result = "" - discard s.parseUntil(result, if second == '\0': first else: second, i) + discard parseUntil(s.toOpenArray(i, s.high), result, if second == '\0': first else: second) + +proc integerOutOfRangeError() {.noinline.} = + raise newException(ValueError, "Parsed integer outside of valid range") -{.push overflowChecks: on.} -# this must be compiled with overflow checking turned on: -proc rawParseInt(s: string, b: var BiggestInt, start = 0): int = +# See #6752 +when defined(js): + {.push overflowChecks: off.} + +proc rawParseInt(s: openArray[char], b: var BiggestInt): int = var sign: BiggestInt = -1 - i = start - if s[i] == '+': inc(i) - elif s[i] == '-': - inc(i) - sign = 1 - if s[i] in {'0'..'9'}: + i = 0 + if i < s.len: + if s[i] == '+': inc(i) + elif s[i] == '-': + inc(i) + sign = 1 + if i < s.len and s[i] in {'0'..'9'}: b = 0 - while s[i] in {'0'..'9'}: - b = b * 10 - (ord(s[i]) - ord('0')) + while i < s.len and s[i] in {'0'..'9'}: + let c = ord(s[i]) - ord('0') + if b >= (low(BiggestInt) + c) div 10: + b = b * 10 - c + else: + integerOutOfRangeError() inc(i) - while s[i] == '_': inc(i) # underscores are allowed and ignored - b = b * sign - result = i - start -{.pop.} # overflowChecks - -proc parseBiggestInt*(s: string, number: var BiggestInt, start = 0): int {. - rtl, extern: "npuParseBiggestInt", noSideEffect.} = - ## parses an integer starting at `start` and stores the value into `number`. + while i < s.len and s[i] == '_': inc(i) # underscores are allowed and ignored + if sign == -1 and b == low(BiggestInt): + integerOutOfRangeError() + else: + b = b * sign + result = i + +when defined(js): + {.pop.} # overflowChecks: off + +proc parseBiggestInt*(s: openArray[char], number: var BiggestInt): int {. + rtl, extern: "npuParseBiggestInt", noSideEffect, raises: [ValueError].} = + ## Parses an integer and stores the value into `number`. ## Result is the number of processed chars or 0 if there is no integer. - ## `EOverflow` is raised if an overflow occurs. - var res: BiggestInt + ## `ValueError` is raised if the parsed integer is out of the valid range. + runnableExamples: + var res: BiggestInt + doAssert parseBiggestInt("9223372036854775807", res) == 19 + doAssert res == 9223372036854775807 + doAssert parseBiggestInt("-2024_05_09", res) == 11 + doAssert res == -20240509 + var res = BiggestInt(0) # use 'res' for exception safety (don't write to 'number' in case of an - # overflow exception: - result = rawParseInt(s, res, start) - number = res + # overflow exception): + result = rawParseInt(s, res) + if result != 0: + number = res -proc parseInt*(s: string, number: var int, start = 0): int {. - rtl, extern: "npuParseInt", noSideEffect.} = - ## parses an integer starting at `start` and stores the value into `number`. +proc parseInt*(s: openArray[char], number: var int): int {. + rtl, extern: "npuParseInt", noSideEffect, raises: [ValueError].} = + ## Parses an integer and stores the value into `number`. ## Result is the number of processed chars or 0 if there is no integer. - ## `EOverflow` is raised if an overflow occurs. - var res: BiggestInt - result = parseBiggestInt(s, res, start) - if (sizeof(int) <= 4) and - ((res < low(int)) or (res > high(int))): - raise newException(OverflowError, "overflow") - elif result != 0: + ## `ValueError` is raised if the parsed integer is out of the valid range. + runnableExamples: + var res: int + doAssert parseInt("-2024_05_02", res) == 11 + doAssert res == -20240502 + var res = BiggestInt(0) + result = parseBiggestInt(s, res) + when sizeof(int) <= 4: + if res < low(int) or res > high(int): + integerOutOfRangeError() + if result != 0: number = int(res) -proc parseBiggestFloat*(s: string, number: var BiggestFloat, start = 0): int {. +proc parseSaturatedNatural*(s: openArray[char], b: var int): int {. + raises: [].} = + ## Parses a natural number into ``b``. This cannot raise an overflow + ## error. ``high(int)`` is returned for an overflow. + ## The number of processed character is returned. + ## This is usually what you really want to use instead of `parseInt`:idx:. + runnableExamples: + var res = 0 + discard parseSaturatedNatural("848", res) + doAssert res == 848 + var i = 0 + if i < s.len and s[i] == '+': inc(i) + if i < s.len and s[i] in {'0'..'9'}: + b = 0 + while i < s.len and s[i] in {'0'..'9'}: + let c = ord(s[i]) - ord('0') + if b <= (high(int) - c) div 10: + b = b * 10 + c + else: + b = high(int) + inc(i) + while i < s.len and s[i] == '_': inc(i) # underscores are allowed and ignored + result = i + +proc rawParseUInt(s: openArray[char], b: var BiggestUInt): int = + var + res = 0.BiggestUInt + prev = 0.BiggestUInt + i = 0 + if i < s.len - 1 and s[i] == '-' and s[i + 1] in {'0'..'9'}: + integerOutOfRangeError() + if i < s.len and s[i] == '+': inc(i) # Allow + if i < s.len and s[i] in {'0'..'9'}: + b = 0 + while i < s.len and s[i] in {'0'..'9'}: + prev = res + res = res * 10 + (ord(s[i]) - ord('0')).BiggestUInt + if prev > res: + integerOutOfRangeError() + inc(i) + while i < s.len and s[i] == '_': inc(i) # underscores are allowed and ignored + b = res + result = i + +proc parseBiggestUInt*(s: openArray[char], number: var BiggestUInt): int {. + rtl, extern: "npuParseBiggestUInt", noSideEffect, raises: [ValueError].} = + ## Parses an unsigned integer and stores the value + ## into `number`. + ## `ValueError` is raised if the parsed integer is out of the valid range. + runnableExamples: + var res: BiggestUInt + doAssert parseBiggestUInt("12", res, 0) == 2 + doAssert res == 12 + doAssert parseBiggestUInt("1111111111111111111", res, 0) == 19 + doAssert res == 1111111111111111111'u64 + var res = BiggestUInt(0) + # use 'res' for exception safety (don't write to 'number' in case of an + # overflow exception): + result = rawParseUInt(s, res) + if result != 0: + number = res + +proc parseUInt*(s: openArray[char], number: var uint): int {. + rtl, extern: "npuParseUInt", noSideEffect, raises: [ValueError].} = + ## Parses an unsigned integer and stores the value + ## into `number`. + ## `ValueError` is raised if the parsed integer is out of the valid range. + runnableExamples: + var res: uint + doAssert parseUInt("3450", res) == 4 + doAssert res == 3450 + doAssert parseUInt("3450", res, 2) == 2 + doAssert res == 50 + var res = BiggestUInt(0) + result = parseBiggestUInt(s, res) + when sizeof(BiggestUInt) > sizeof(uint) and sizeof(uint) <= 4: + if res > 0xFFFF_FFFF'u64: + integerOutOfRangeError() + if result != 0: + number = uint(res) + +proc parseBiggestFloat*(s: openArray[char], number: var BiggestFloat): int {. magic: "ParseBiggestFloat", importc: "nimParseBiggestFloat", noSideEffect.} - ## parses a float starting at `start` and stores the value into `number`. + ## Parses a float and stores the value into `number`. ## Result is the number of processed chars or 0 if a parsing error ## occurred. -proc parseFloat*(s: string, number: var float, start = 0): int {. +proc parseFloat*(s: openArray[char], number: var float): int {. rtl, extern: "npuParseFloat", noSideEffect.} = - ## parses a float starting at `start` and stores the value into `number`. + ## Parses a float and stores the value into `number`. ## Result is the number of processed chars or 0 if there occurred a parsing ## error. - var bf: BiggestFloat - result = parseBiggestFloat(s, bf, start) + runnableExamples: + var res: float + doAssert parseFloat("32", res, 0) == 2 + doAssert res == 32.0 + doAssert parseFloat("32.57", res, 0) == 5 + doAssert res == 32.57 + doAssert parseFloat("32.57", res, 3) == 2 + doAssert res == 57.00 + var bf = BiggestFloat(0.0) + result = parseBiggestFloat(s, bf) if result != 0: number = bf - -type - InterpolatedKind* = enum ## describes for `interpolatedFragments` - ## which part of the interpolated string is - ## yielded; for example in "str$$$var${expr}" - ikStr, ## ``str`` part of the interpolated string - ikDollar, ## escaped ``$`` part of the interpolated string - ikVar, ## ``var`` part of the interpolated string - ikExpr ## ``expr`` part of the interpolated string -{.deprecated: [TInterpolatedKind: InterpolatedKind].} +func toLowerAscii(c: char): char = + if c in {'A'..'Z'}: char(uint8(c) xor 0b0010_0000'u8) else: c -iterator interpolatedFragments*(s: string): tuple[kind: InterpolatedKind, - value: string] = - ## Tokenizes the string `s` into substrings for interpolation purposes. +func parseSize*(s: openArray[char], size: var int64, alwaysBin=false): int = + ## Parse a size qualified by binary or metric units into `size`. This format + ## is often called "human readable". Result is the number of processed chars + ## or 0 on parse errors and size is rounded to the nearest integer. Trailing + ## garbage like "/s" in "1k/s" is allowed and detected by `result < s.len`. ## - ## Example: + ## To simplify use, following non-rare wild conventions, and since fractional + ## data like milli-bytes is so rare, unit matching is case-insensitive but for + ## the 'i' distinguishing binary-metric from metric (which cannot be 'I'). ## - ## .. code-block:: nim - ## for k, v in interpolatedFragments(" $this is ${an example} $$"): - ## echo "(", k, ", \"", v, "\")" + ## An optional trailing 'B|b' is ignored but processed. I.e., you must still + ## know if units are bytes | bits or infer this fact via the case of s[^1] (if + ## users can even be relied upon to use 'B' for byte and 'b' for bit or have + ## that be s[^1]). ## - ## Results in: + ## If `alwaysBin==true` then scales are always binary-metric, but e.g. "KiB" + ## is still accepted for clarity. If the value would exceed the range of + ## `int64`, `size` saturates to `int64.high`. Supported metric prefix chars + ## include k, m, g, t, p, e, z, y (but z & y saturate unless the number is a + ## small fraction). ## - ## .. code-block:: nim - ## (ikString, " ") - ## (ikExpr, "this") - ## (ikString, " is ") - ## (ikExpr, "an example") - ## (ikString, " ") - ## (ikDollar, "$") + ## **See also:** + ## * https://en.wikipedia.org/wiki/Binary_prefix + ## * `formatSize module<strutils.html>`_ for formatting + runnableExamples: + var res: int64 # caller must still know if 'b' refers to bytes|bits + doAssert parseSize("10.5 MB", res) == 7 + doAssert res == 10_500_000 # decimal metric Mega prefix + doAssert parseSize("64 mib", res) == 6 + doAssert res == 67108864 # 64 shl 20 + doAssert parseSize("1G/h", res, true) == 2 # '/' stops parse + doAssert res == 1073741824 # 1 shl 30, forced binary metric + const prefix = "b" & "kmgtpezy" # byte|bit & lowCase metric-ish prefixes + const scaleM = [1.0, 1e3, 1e6, 1e9, 1e12, 1e15, 1e18, 1e21, 1e24] # 10^(3*idx) + const scaleB = [1.0, 1024, 1048576, 1073741824, 1099511627776.0, # 2^(10*idx) + 1125899906842624.0, 1152921504606846976.0, # ldexp? + 1.180591620717411303424e21, 1.208925819614629174706176e24] + var number: float + var scale = 1.0 + result = parseFloat(s, number) + if number < 0: # While parseFloat accepts negatives .. + result = 0 #.. we do not since sizes cannot be < 0 + if result > 0: + let start = result # Save spot to maybe unwind white to EOS + while result < s.len and s[result] in Whitespace: + inc result + if result < s.len: # Illegal starting char => unity + if (let si = prefix.find(s[result].toLowerAscii); si >= 0): + inc result # Now parse the scale + scale = if alwaysBin: scaleB[si] else: scaleM[si] + if result < s.len and s[result] == 'i': + scale = scaleB[si] # Switch from default to binary-metric + inc result + if result < s.len and s[result].toLowerAscii == 'b': + inc result # Skip optional '[bB]' + else: # Unwind result advancement when there.. + result = start #..is no unit to the end of `s`. + var sizeF = number * scale + 0.5 # Saturate to int64.high when too big + size = if sizeF > 9223372036854774784.0: int64.high else: sizeF.int64 +# Above constant=2^63-1024 avoids C UB; github.com/nim-lang/Nim/issues/20102 or +# stackoverflow.com/questions/20923556/math-pow2-63-1-math-pow2-63-512-is-true + +type + InterpolatedKind* = enum ## Describes for `interpolatedFragments` + ## which part of the interpolated string is + ## yielded; for example in "str$$$var${expr}" + ikStr, ## ``str`` part of the interpolated string + ikDollar, ## escaped ``$`` part of the interpolated string + ikVar, ## ``var`` part of the interpolated string + ikExpr ## ``expr`` part of the interpolated string + +iterator interpolatedFragments*(s: openArray[char]): tuple[kind: InterpolatedKind, + value: string] = + ## Tokenizes the string `s` into substrings for interpolation purposes. + ## + runnableExamples: + var outp: seq[tuple[kind: InterpolatedKind, value: string]] + for k, v in interpolatedFragments(" $this is ${an example} $$"): + outp.add (k, v) + doAssert outp == @[(ikStr, " "), + (ikVar, "this"), + (ikStr, " is "), + (ikExpr, "an example"), + (ikStr, " "), + (ikDollar, "$")] + var i = 0 var kind: InterpolatedKind while true: var j = i - if s[j] == '$': - if s[j+1] == '{': + if j < s.len and s[j] == '$': + if j+1 < s.len and s[j+1] == '{': inc j, 2 var nesting = 0 - while true: - case s[j] - of '{': inc nesting - of '}': - if nesting == 0: - inc j - break - dec nesting - of '\0': - raise newException(ValueError, - "Expected closing '}': " & substr(s, i, s.high)) - else: discard - inc j + block curlies: + while j < s.len: + case s[j] + of '{': inc nesting + of '}': + if nesting == 0: + inc j + break curlies + dec nesting + else: discard + inc j + raise newException(ValueError, + "Expected closing '}': " & substr(s.toOpenArray(i, s.high))) inc i, 2 # skip ${ kind = ikExpr - elif s[j+1] in IdentStartChars: + elif j+1 < s.len and s[j+1] in IdentStartChars: inc j, 2 - while s[j] in IdentChars: inc(j) + while j < s.len and s[j] in IdentChars: inc(j) inc i # skip $ kind = ikVar - elif s[j+1] == '$': + elif j+1 < s.len and s[j+1] == '$': inc j, 2 inc i # skip $ kind = ikDollar else: - raise newException(ValueError, - "Unable to parse a varible name at " & substr(s, i, s.high)) + raise newException(ValueError, + "Unable to parse a variable name at " & substr(s.toOpenArray(i, s.high))) else: while j < s.len and s[j] != '$': inc j kind = ikStr if j > i: # do not copy the trailing } for ikExpr: - yield (kind, substr(s, i, j-1-ord(kind == ikExpr))) + yield (kind, substr(s.toOpenArray(i, j-1-ord(kind == ikExpr)))) else: break i = j -when isMainModule: - for k, v in interpolatedFragments("$test{} $this is ${an{ example}} "): - echo "(", k, ", \"", v, "\")" - var value = 0 - discard parseHex("0x38", value) - assert value == 56 - discard parseHex("0x34", value) - assert value == 56 * 256 + 52 - value = -1 - discard parseHex("0x38", value) - assert value == -200 +{.pop.} -{.pop.} +proc parseBin*[T: SomeInteger](s: string, number: var T, start = 0, + maxLen = 0): int {.noSideEffect.} = + ## Parses a binary number and stores its value in ``number``. + ## + ## Returns the number of the parsed characters or 0 in case of an error. + ## If error, the value of ``number`` is not changed. + ## + ## If ``maxLen == 0``, the parsing continues until the first non-bin character + ## or to the end of the string. Otherwise, no more than ``maxLen`` characters + ## are parsed starting from the ``start`` position. + ## + ## It does not check for overflow. If the value represented by the string is + ## too big to fit into ``number``, only the value of last fitting characters + ## will be stored in ``number`` without producing an error. + runnableExamples: + var num: int + doAssert parseBin("0100_1110_0110_1001_1110_1101", num) == 29 + doAssert num == 5138925 + doAssert parseBin("3", num) == 0 + var num8: int8 + doAssert parseBin("0b_0100_1110_0110_1001_1110_1101", num8) == 32 + doAssert num8 == 0b1110_1101'i8 + doAssert parseBin("0b_0100_1110_0110_1001_1110_1101", num8, 3, 9) == 9 + doAssert num8 == 0b0100_1110'i8 + var num8u: uint8 + doAssert parseBin("0b_0100_1110_0110_1001_1110_1101", num8u) == 32 + doAssert num8u == 237 + var num64: int64 + doAssert parseBin("0100111001101001111011010100111001101001", num64) == 40 + doAssert num64 == 336784608873 + parseBin(s.toOpenArray(start, s.high), number, maxLen) + +proc parseOct*[T: SomeInteger](s: string, number: var T, start = 0, + maxLen = 0): int {.noSideEffect.} = + ## Parses an octal number and stores its value in ``number``. + ## + ## Returns the number of the parsed characters or 0 in case of an error. + ## If error, the value of ``number`` is not changed. + ## + ## If ``maxLen == 0``, the parsing continues until the first non-oct character + ## or to the end of the string. Otherwise, no more than ``maxLen`` characters + ## are parsed starting from the ``start`` position. + ## + ## It does not check for overflow. If the value represented by the string is + ## too big to fit into ``number``, only the value of last fitting characters + ## will be stored in ``number`` without producing an error. + runnableExamples: + var num: int + doAssert parseOct("0o23464755", num) == 10 + doAssert num == 5138925 + doAssert parseOct("8", num) == 0 + var num8: int8 + doAssert parseOct("0o_1464_755", num8) == 11 + doAssert num8 == -19 + doAssert parseOct("0o_1464_755", num8, 3, 3) == 3 + doAssert num8 == 102 + var num8u: uint8 + doAssert parseOct("1464755", num8u) == 7 + doAssert num8u == 237 + var num64: int64 + doAssert parseOct("2346475523464755", num64) == 16 + doAssert num64 == 86216859871725 + parseOct(s.toOpenArray(start, s.high), number, maxLen) + +proc parseHex*[T: SomeInteger](s: string, number: var T, start = 0, + maxLen = 0): int {.noSideEffect.} = + ## Parses a hexadecimal number and stores its value in ``number``. + ## + ## Returns the number of the parsed characters or 0 in case of an error. + ## If error, the value of ``number`` is not changed. + ## + ## If ``maxLen == 0``, the parsing continues until the first non-hex character + ## or to the end of the string. Otherwise, no more than ``maxLen`` characters + ## are parsed starting from the ``start`` position. + ## + ## It does not check for overflow. If the value represented by the string is + ## too big to fit into ``number``, only the value of last fitting characters + ## will be stored in ``number`` without producing an error. + runnableExamples: + var num: int + doAssert parseHex("4E_69_ED", num) == 8 + doAssert num == 5138925 + doAssert parseHex("X", num) == 0 + doAssert parseHex("#ABC", num) == 4 + var num8: int8 + doAssert parseHex("0x_4E_69_ED", num8) == 11 + doAssert num8 == 0xED'i8 + doAssert parseHex("0x_4E_69_ED", num8, 3, 2) == 2 + doAssert num8 == 0x4E'i8 + var num8u: uint8 + doAssert parseHex("0x_4E_69_ED", num8u) == 11 + doAssert num8u == 237 + var num64: int64 + doAssert parseHex("4E69ED4E69ED", num64) == 12 + doAssert num64 == 86216859871725 + parseHex(s.toOpenArray(start, s.high), number, maxLen) + +proc parseIdent*(s: string, ident: var string, start = 0): int = + ## Parses an identifier and stores it in ``ident``. Returns + ## the number of the parsed characters or 0 in case of an error. + ## If error, the value of `ident` is not changed. + runnableExamples: + var res: string + doAssert parseIdent("Hello World", res, 0) == 5 + doAssert res == "Hello" + doAssert parseIdent("Hello World", res, 1) == 4 + doAssert res == "ello" + doAssert parseIdent("Hello World", res, 6) == 5 + doAssert res == "World" + parseIdent(s.toOpenArray(start, s.high), ident) + +proc parseIdent*(s: string, start = 0): string = + ## Parses an identifier and returns it or an empty string in + ## case of an error. + runnableExamples: + doAssert parseIdent("Hello World", 0) == "Hello" + doAssert parseIdent("Hello World", 1) == "ello" + doAssert parseIdent("Hello World", 5) == "" + doAssert parseIdent("Hello World", 6) == "World" + parseIdent(s.toOpenArray(start, s.high)) + +proc parseChar*(s: string, c: var char, start = 0): int = + ## Parses a single character, stores it in `c` and returns 1. + ## In case of error (if start >= s.len) it returns 0 + ## and the value of `c` is unchanged. + runnableExamples: + var c: char + doAssert "nim".parseChar(c, 3) == 0 + doAssert c == '\0' + doAssert "nim".parseChar(c, 0) == 1 + doAssert c == 'n' + parseChar(s.toOpenArray(start, s.high), c) + +proc skipWhitespace*(s: string, start = 0): int {.inline.} = + ## Skips the whitespace starting at ``s[start]``. Returns the number of + ## skipped characters. + runnableExamples: + doAssert skipWhitespace("Hello World", 0) == 0 + doAssert skipWhitespace(" Hello World", 0) == 1 + doAssert skipWhitespace("Hello World", 5) == 1 + doAssert skipWhitespace("Hello World", 5) == 2 + skipWhitespace(s.toOpenArray(start, s.high)) + +proc skip*(s, token: string, start = 0): int {.inline.} = + ## Skips the `token` starting at ``s[start]``. Returns the length of `token` + ## or 0 if there was no `token` at ``s[start]``. + runnableExamples: + doAssert skip("2019-01-22", "2019", 0) == 4 + doAssert skip("2019-01-22", "19", 0) == 0 + doAssert skip("2019-01-22", "19", 2) == 2 + doAssert skip("CAPlow", "CAP", 0) == 3 + doAssert skip("CAPlow", "cap", 0) == 0 + skip(s.toOpenArray(start, s.high), token) + +proc skipIgnoreCase*(s, token: string, start = 0): int = + ## Same as `skip` but case is ignored for token matching. + runnableExamples: + doAssert skipIgnoreCase("CAPlow", "CAP", 0) == 3 + doAssert skipIgnoreCase("CAPlow", "cap", 0) == 3 + skipIgnoreCase(s.toOpenArray(start, s.high), token) + +proc skipUntil*(s: string, until: set[char], start = 0): int {.inline.} = + ## Skips all characters until one char from the set `until` is found + ## or the end is reached. + ## Returns number of characters skipped. + runnableExamples: + doAssert skipUntil("Hello World", {'W', 'e'}, 0) == 1 + doAssert skipUntil("Hello World", {'W'}, 0) == 6 + doAssert skipUntil("Hello World", {'W', 'd'}, 0) == 6 + skipUntil(s.toOpenArray(start, s.high), until) + +proc skipUntil*(s: string, until: char, start = 0): int {.inline.} = + ## Skips all characters until the char `until` is found + ## or the end is reached. + ## Returns number of characters skipped. + runnableExamples: + doAssert skipUntil("Hello World", 'o', 0) == 4 + doAssert skipUntil("Hello World", 'o', 4) == 0 + doAssert skipUntil("Hello World", 'W', 0) == 6 + doAssert skipUntil("Hello World", 'w', 0) == 11 + skipUntil(s.toOpenArray(start, s.high), until) + +proc skipWhile*(s: string, toSkip: set[char], start = 0): int {.inline.} = + ## Skips all characters while one char from the set `toSkip` is found. + ## Returns number of characters skipped. + runnableExamples: + doAssert skipWhile("Hello World", {'H', 'e'}) == 2 + doAssert skipWhile("Hello World", {'e'}) == 0 + doAssert skipWhile("Hello World", {'W', 'o', 'r'}, 6) == 3 + skipWhile(s.toOpenArray(start, s.high), toSkip) + +proc parseUntil*(s: string, token: var string, until: set[char], + start = 0): int {.inline.} = + ## Parses a token and stores it in ``token``. Returns + ## the number of the parsed characters or 0 in case of an error. A token + ## consists of the characters notin `until`. + runnableExamples: + var myToken: string + doAssert parseUntil("Hello World", myToken, {'W', 'o', 'r'}) == 4 + doAssert myToken == "Hell" + doAssert parseUntil("Hello World", myToken, {'W', 'r'}) == 6 + doAssert myToken == "Hello " + doAssert parseUntil("Hello World", myToken, {'W', 'r'}, 3) == 3 + doAssert myToken == "lo " + parseUntil(s.toOpenArray(start, s.high), token, until) + +proc parseUntil*(s: string, token: var string, until: char, + start = 0): int {.inline.} = + ## Parses a token and stores it in ``token``. Returns + ## the number of the parsed characters or 0 in case of an error. A token + ## consists of any character that is not the `until` character. + runnableExamples: + var myToken: string + doAssert parseUntil("Hello World", myToken, 'W') == 6 + doAssert myToken == "Hello " + doAssert parseUntil("Hello World", myToken, 'o') == 4 + doAssert myToken == "Hell" + doAssert parseUntil("Hello World", myToken, 'o', 2) == 2 + doAssert myToken == "ll" + parseUntil(s.toOpenArray(start, s.high), token, until) + +proc parseUntil*(s: string, token: var string, until: string, + start = 0): int {.inline.} = + ## Parses a token and stores it in ``token``. Returns + ## the number of the parsed characters or 0 in case of an error. A token + ## consists of any character that comes before the `until` token. + runnableExamples: + var myToken: string + doAssert parseUntil("Hello World", myToken, "Wor") == 6 + doAssert myToken == "Hello " + doAssert parseUntil("Hello World", myToken, "Wor", 2) == 4 + doAssert myToken == "llo " + parseUntil(s.toOpenArray(start, s.high), token, until) + +proc parseWhile*(s: string, token: var string, validChars: set[char], + start = 0): int {.inline.} = + ## Parses a token and stores it in ``token``. Returns + ## the number of the parsed characters or 0 in case of an error. A token + ## consists of the characters in `validChars`. + runnableExamples: + var myToken: string + doAssert parseWhile("Hello World", myToken, {'W', 'o', 'r'}, 0) == 0 + doAssert myToken.len() == 0 + doAssert parseWhile("Hello World", myToken, {'W', 'o', 'r'}, 6) == 3 + doAssert myToken == "Wor" + parseWhile(s.toOpenArray(start, s.high), token, validChars) + +proc captureBetween*(s: string, first: char, second = '\0', start = 0): string = + ## Finds the first occurrence of ``first``, then returns everything from there + ## up to ``second`` (if ``second`` is '\0', then ``first`` is used). + runnableExamples: + doAssert captureBetween("Hello World", 'e') == "llo World" + doAssert captureBetween("Hello World", 'e', 'r') == "llo Wo" + doAssert captureBetween("Hello World", 'l', start = 6) == "d" + captureBetween(s.toOpenArray(start, s.high), first, second) + +proc parseBiggestInt*(s: string, number: var BiggestInt, start = 0): int {.noSideEffect, raises: [ValueError].} = + ## Parses an integer starting at `start` and stores the value into `number`. + ## Result is the number of processed chars or 0 if there is no integer. + ## `ValueError` is raised if the parsed integer is out of the valid range. + runnableExamples: + var res: BiggestInt + doAssert parseBiggestInt("9223372036854775807", res, 0) == 19 + doAssert res == 9223372036854775807 + doAssert parseBiggestInt("-2024_05_09", res) == 11 + doAssert res == -20240509 + doAssert parseBiggestInt("-2024_05_02", res, 7) == 4 + doAssert res == 502 + parseBiggestInt(s.toOpenArray(start, s.high), number) + +proc parseInt*(s: string, number: var int, start = 0): int {.noSideEffect, raises: [ValueError].} = + ## Parses an integer starting at `start` and stores the value into `number`. + ## Result is the number of processed chars or 0 if there is no integer. + ## `ValueError` is raised if the parsed integer is out of the valid range. + runnableExamples: + var res: int + doAssert parseInt("-2024_05_02", res) == 11 + doAssert res == -20240502 + doAssert parseInt("-2024_05_02", res, 7) == 4 + doAssert res == 502 + parseInt(s.toOpenArray(start, s.high), number) + + +proc parseSaturatedNatural*(s: string, b: var int, start = 0): int {. + raises: [].} = + ## Parses a natural number into ``b``. This cannot raise an overflow + ## error. ``high(int)`` is returned for an overflow. + ## The number of processed character is returned. + ## This is usually what you really want to use instead of `parseInt`:idx:. + runnableExamples: + var res = 0 + discard parseSaturatedNatural("848", res) + doAssert res == 848 + parseSaturatedNatural(s.toOpenArray(start, s.high), b) + + +proc parseBiggestUInt*(s: string, number: var BiggestUInt, start = 0): int {.noSideEffect, raises: [ValueError].} = + ## Parses an unsigned integer starting at `start` and stores the value + ## into `number`. + ## `ValueError` is raised if the parsed integer is out of the valid range. + runnableExamples: + var res: BiggestUInt + doAssert parseBiggestUInt("12", res, 0) == 2 + doAssert res == 12 + doAssert parseBiggestUInt("1111111111111111111", res, 0) == 19 + doAssert res == 1111111111111111111'u64 + parseBiggestUInt(s.toOpenArray(start, s.high), number) + +proc parseUInt*(s: string, number: var uint, start = 0): int {.noSideEffect, raises: [ValueError].} = + ## Parses an unsigned integer starting at `start` and stores the value + ## into `number`. + ## `ValueError` is raised if the parsed integer is out of the valid range. + runnableExamples: + var res: uint + doAssert parseUInt("3450", res) == 4 + doAssert res == 3450 + doAssert parseUInt("3450", res, 2) == 2 + doAssert res == 50 + parseUInt(s.toOpenArray(start, s.high), number) + +proc parseBiggestFloat*(s: string, number: var BiggestFloat, start = 0): int {.noSideEffect.} = + ## Parses a float starting at `start` and stores the value into `number`. + ## Result is the number of processed chars or 0 if a parsing error + ## occurred. + parseFloat(s.toOpenArray(start, s.high), number) + +proc parseFloat*(s: string, number: var float, start = 0): int {.noSideEffect.} = + ## Parses a float starting at `start` and stores the value into `number`. + ## Result is the number of processed chars or 0 if there occurred a parsing + ## error. + runnableExamples: + var res: float + doAssert parseFloat("32", res, 0) == 2 + doAssert res == 32.0 + doAssert parseFloat("32.57", res, 0) == 5 + doAssert res == 32.57 + doAssert parseFloat("32.57", res, 3) == 2 + doAssert res == 57.00 + parseFloat(s.toOpenArray(start, s.high), number) + +iterator interpolatedFragments*(s: string): tuple[kind: InterpolatedKind, + value: string] = + ## Tokenizes the string `s` into substrings for interpolation purposes. + ## + runnableExamples: + var outp: seq[tuple[kind: InterpolatedKind, value: string]] + for k, v in interpolatedFragments(" $this is ${an example} $$"): + outp.add (k, v) + doAssert outp == @[(ikStr, " "), + (ikVar, "this"), + (ikStr, " is "), + (ikExpr, "an example"), + (ikStr, " "), + (ikDollar, "$")] + for x in s.toOa.interpolatedFragments: + yield x + diff --git a/lib/pure/parsexml.nim b/lib/pure/parsexml.nim index 2663c5b2f..c760799a2 100644 --- a/lib/pure/parsexml.nim +++ b/lib/pure/parsexml.nim @@ -8,99 +8,201 @@ # ## This module implements a simple high performance `XML`:idx: / `HTML`:idx: -## parser. +## parser. ## The only encoding that is supported is UTF-8. The parser has been designed -## to be somewhat error correcting, so that even most "wild HTML" found on the +## to be somewhat error correcting, so that even most "wild HTML" found on the ## web can be parsed with it. **Note:** This parser does not check that each -## ``<tag>`` has a corresponding ``</tag>``! These checks have do be -## implemented by the client code for various reasons: +## ``<tag>`` has a corresponding ``</tag>``! These checks have do be +## implemented by the client code for various reasons: ## ## * Old HTML contains tags that have no end tag: ``<br>`` for example. ## * HTML tags are case insensitive, XML tags are case sensitive. Since this ## library can parse both, only the client knows which comparison is to be ## used. ## * Thus the checks would have been very difficult to implement properly with -## little benefit, especially since they are simple to implement in the +## little benefit, especially since they are simple to implement in the ## client. The client should use the `errorMsgExpected` proc to generate ## a nice error message that fits the other error messages this library ## creates. ## ## -## Example 1: Retrieve HTML title -## ============================== -## -## The file ``examples/htmltitle.nim`` demonstrates how to use the -## XML parser to accomplish a simple task: To determine the title of an HTML -## document. -## -## .. code-block:: nim -## :file: examples/htmltitle.nim -## -## -## Example 2: Retrieve all HTML links -## ================================== -## -## The file ``examples/htmlrefs.nim`` demonstrates how to use the -## XML parser to accomplish another simple task: To determine all the links -## an HTML document contains. -## -## .. code-block:: nim -## :file: examples/htmlrefs.nim -## -import - hashes, strutils, lexbase, streams, unicode +##[ + +Example 1: Retrieve HTML title +============================== + +The file ``examples/htmltitle.nim`` demonstrates how to use the +XML parser to accomplish a simple task: To determine the title of an HTML +document. + + ```nim + # Example program to show the parsexml module + # This program reads an HTML file and writes its title to stdout. + # Errors and whitespace are ignored. + + import std/[os, streams, parsexml, strutils] + + if paramCount() < 1: + quit("Usage: htmltitle filename[.html]") + + var filename = addFileExt(paramStr(1), "html") + var s = newFileStream(filename, fmRead) + if s == nil: quit("cannot open the file " & filename) + var x: XmlParser + open(x, s, filename) + while true: + x.next() + case x.kind + of xmlElementStart: + if cmpIgnoreCase(x.elementName, "title") == 0: + var title = "" + x.next() # skip "<title>" + while x.kind == xmlCharData: + title.add(x.charData) + x.next() + if x.kind == xmlElementEnd and cmpIgnoreCase(x.elementName, "title") == 0: + echo("Title: " & title) + quit(0) # Success! + else: + echo(x.errorMsgExpected("/title")) + + of xmlEof: break # end of file reached + else: discard # ignore other events + + x.close() + quit("Could not determine title!") + ``` + +]## + +##[ + +Example 2: Retrieve all HTML links +================================== + +The file ``examples/htmlrefs.nim`` demonstrates how to use the +XML parser to accomplish another simple task: To determine all the links +an HTML document contains. + + ```nim + # Example program to show the new parsexml module + # This program reads an HTML file and writes all its used links to stdout. + # Errors and whitespace are ignored. + + import std/[os, streams, parsexml, strutils] + + proc `=?=` (a, b: string): bool = + # little trick: define our own comparator that ignores case + return cmpIgnoreCase(a, b) == 0 + + if paramCount() < 1: + quit("Usage: htmlrefs filename[.html]") + + var links = 0 # count the number of links + var filename = addFileExt(paramStr(1), "html") + var s = newFileStream(filename, fmRead) + if s == nil: quit("cannot open the file " & filename) + var x: XmlParser + open(x, s, filename) + next(x) # get first event + block mainLoop: + while true: + case x.kind + of xmlElementOpen: + # the <a href = "xyz"> tag we are interested in always has an attribute, + # thus we search for ``xmlElementOpen`` and not for ``xmlElementStart`` + if x.elementName =?= "a": + x.next() + if x.kind == xmlAttribute: + if x.attrKey =?= "href": + var link = x.attrValue + inc(links) + # skip until we have an ``xmlElementClose`` event + while true: + x.next() + case x.kind + of xmlEof: break mainLoop + of xmlElementClose: break + else: discard + x.next() # skip ``xmlElementClose`` + # now we have the description for the ``a`` element + var desc = "" + while x.kind == xmlCharData: + desc.add(x.charData) + x.next() + echo(desc & ": " & link) + else: + x.next() + of xmlEof: break # end of file reached + of xmlError: + echo(errorMsg(x)) + x.next() + else: x.next() # skip other events + + echo($links & " link(s) found!") + x.close() + ``` + +]## + +import + std/[strutils, lexbase, streams, unicode] + +when defined(nimPreviewSlimSystem): + import std/[assertions, syncio] # the parser treats ``<br />`` as ``<br></br>`` -# xmlElementCloseEnd, ## ``/>`` - -type - XmlEventKind* = enum ## enumation of all events that may occur when parsing - xmlError, ## an error occurred during parsing - xmlEof, ## end of file reached - xmlCharData, ## character data - xmlWhitespace, ## whitespace has been parsed - xmlComment, ## a comment has been parsed - xmlPI, ## processing instruction (``<?name something ?>``) - xmlElementStart, ## ``<elem>`` - xmlElementEnd, ## ``</elem>`` - xmlElementOpen, ## ``<elem - xmlAttribute, ## ``key = "value"`` pair - xmlElementClose, ## ``>`` - xmlCData, ## ``<![CDATA[`` ... data ... ``]]>`` - xmlEntity, ## &entity; - xmlSpecial ## ``<! ... data ... >`` - - XmlErrorKind* = enum ## enumeration that lists all errors that can occur - errNone, ## no error - errEndOfCDataExpected, ## ``]]>`` expected - errNameExpected, ## name expected - errSemicolonExpected, ## ``;`` expected - errQmGtExpected, ## ``?>`` expected - errGtExpected, ## ``>`` expected - errEqExpected, ## ``=`` expected - errQuoteExpected, ## ``"`` or ``'`` expected - errEndOfCommentExpected ## ``-->`` expected - - ParserState = enum +# xmlElementCloseEnd, ## ``/>`` + +type + XmlEventKind* = enum ## enumeration of all events that may occur when parsing + xmlError, ## an error occurred during parsing + xmlEof, ## end of file reached + xmlCharData, ## character data + xmlWhitespace, ## whitespace has been parsed + xmlComment, ## a comment has been parsed + xmlPI, ## processing instruction (``<?name something ?>``) + xmlElementStart, ## ``<elem>`` + xmlElementEnd, ## ``</elem>`` + xmlElementOpen, ## ``<elem + xmlAttribute, ## ``key = "value"`` pair + xmlElementClose, ## ``>`` + xmlCData, ## ``<![CDATA[`` ... data ... ``]]>`` + xmlEntity, ## &entity; + xmlSpecial ## ``<! ... data ... >`` + + XmlErrorKind* = enum ## enumeration that lists all errors that can occur + errNone, ## no error + errEndOfCDataExpected, ## ``]]>`` expected + errNameExpected, ## name expected + errSemicolonExpected, ## ``;`` expected + errQmGtExpected, ## ``?>`` expected + errGtExpected, ## ``>`` expected + errEqExpected, ## ``=`` expected + errQuoteExpected, ## ``"`` or ``'`` expected + errEndOfCommentExpected ## ``-->`` expected + errAttributeValueExpected ## non-empty attribute value expected + + ParserState = enum stateStart, stateNormal, stateAttr, stateEmptyElementTag, stateError - XmlParseOption* = enum ## options for the XML parser - reportWhitespace, ## report whitespace - reportComments ## report comments + XmlParseOption* = enum ## options for the XML parser + reportWhitespace, ## report whitespace + reportComments ## report comments + allowUnquotedAttribs ## allow unquoted attribute values (for HTML) + allowEmptyAttribs ## allow empty attributes (without explicit value) XmlParser* = object of BaseLexer ## the parser object. a, b, c: string kind: XmlEventKind err: XmlErrorKind state: ParserState + cIsEmpty: bool filename: string options: set[XmlParseOption] -{.deprecated: [TXmlParser: XmlParser, TXmlParseOptions: XmlParseOption, - TXmlError: XmlErrorKind, TXmlEventKind: XmlEventKind].} - const errorMessages: array[XmlErrorKind, string] = [ "no error", @@ -111,7 +213,8 @@ const "'>' expected", "'=' expected", "'\"' or \"'\" expected", - "'-->' expected" + "'-->' expected", + "attribute value expected" ] proc open*(my: var XmlParser, input: Stream, filename: string, @@ -121,178 +224,197 @@ proc open*(my: var XmlParser, input: Stream, filename: string, ## the `options` parameter: If `options` contains ``reportWhitespace`` ## a whitespace token is reported as an ``xmlWhitespace`` event. ## If `options` contains ``reportComments`` a comment token is reported as an - ## ``xmlComment`` event. - lexbase.open(my, input) + ## ``xmlComment`` event. + lexbase.open(my, input, 8192, {'\c', '\L', '/'}) my.filename = filename my.state = stateStart my.kind = xmlError my.a = "" my.b = "" - my.c = nil + my.c = "" + my.cIsEmpty = true my.options = options - -proc close*(my: var XmlParser) {.inline.} = + +proc close*(my: var XmlParser) {.inline.} = ## closes the parser `my` and its associated input stream. lexbase.close(my) -proc kind*(my: XmlParser): XmlEventKind {.inline.} = +proc kind*(my: XmlParser): XmlEventKind {.inline.} = ## returns the current event type for the XML parser return my.kind template charData*(my: XmlParser): string = - ## returns the character data for the events: ``xmlCharData``, + ## returns the character data for the events: ``xmlCharData``, ## ``xmlWhitespace``, ``xmlComment``, ``xmlCData``, ``xmlSpecial`` - assert(my.kind in {xmlCharData, xmlWhitespace, xmlComment, xmlCData, + ## Raises an assertion in debug mode if ``my.kind`` is not one + ## of those events. In release mode, this will not trigger an error + ## but the value returned will not be valid. + assert(my.kind in {xmlCharData, xmlWhitespace, xmlComment, xmlCData, xmlSpecial}) my.a template elementName*(my: XmlParser): string = - ## returns the element name for the events: ``xmlElementStart``, + ## returns the element name for the events: ``xmlElementStart``, ## ``xmlElementEnd``, ``xmlElementOpen`` + ## Raises an assertion in debug mode if ``my.kind`` is not one + ## of those events. In release mode, this will not trigger an error + ## but the value returned will not be valid. assert(my.kind in {xmlElementStart, xmlElementEnd, xmlElementOpen}) my.a template entityName*(my: XmlParser): string = ## returns the entity name for the event: ``xmlEntity`` + ## Raises an assertion in debug mode if ``my.kind`` is not + ## ``xmlEntity``. In release mode, this will not trigger an error + ## but the value returned will not be valid. assert(my.kind == xmlEntity) my.a - + template attrKey*(my: XmlParser): string = ## returns the attribute key for the event ``xmlAttribute`` + ## Raises an assertion in debug mode if ``my.kind`` is not + ## ``xmlAttribute``. In release mode, this will not trigger an error + ## but the value returned will not be valid. assert(my.kind == xmlAttribute) my.a - + template attrValue*(my: XmlParser): string = ## returns the attribute value for the event ``xmlAttribute`` + ## Raises an assertion in debug mode if ``my.kind`` is not + ## ``xmlAttribute``. In release mode, this will not trigger an error + ## but the value returned will not be valid. assert(my.kind == xmlAttribute) my.b template piName*(my: XmlParser): string = ## returns the processing instruction name for the event ``xmlPI`` + ## Raises an assertion in debug mode if ``my.kind`` is not + ## ``xmlPI``. In release mode, this will not trigger an error + ## but the value returned will not be valid. assert(my.kind == xmlPI) my.a template piRest*(my: XmlParser): string = ## returns the rest of the processing instruction for the event ``xmlPI`` + ## Raises an assertion in debug mode if ``my.kind`` is not + ## ``xmlPI``. In release mode, this will not trigger an error + ## but the value returned will not be valid. assert(my.kind == xmlPI) my.b -proc rawData*(my: XmlParser): string {.inline.} = +proc rawData*(my: var XmlParser): lent string {.inline.} = ## returns the underlying 'data' string by reference. ## This is only used for speed hacks. - shallowCopy(result, my.a) + result = my.a -proc rawData2*(my: XmlParser): string {.inline.} = +proc rawData2*(my: var XmlParser): lent string {.inline.} = ## returns the underlying second 'data' string by reference. ## This is only used for speed hacks. - shallowCopy(result, my.b) + result = my.b -proc getColumn*(my: XmlParser): int {.inline.} = +proc getColumn*(my: XmlParser): int {.inline.} = ## get the current column the parser has arrived at. result = getColNumber(my, my.bufpos) -proc getLine*(my: XmlParser): int {.inline.} = +proc getLine*(my: XmlParser): int {.inline.} = ## get the current line the parser has arrived at. result = my.lineNumber -proc getFilename*(my: XmlParser): string {.inline.} = +proc getFilename*(my: XmlParser): string {.inline.} = ## get the filename of the file that the parser processes. result = my.filename - -proc errorMsg*(my: XmlParser): string = + +proc errorMsg*(my: XmlParser): string = ## returns a helpful error message for the event ``xmlError`` assert(my.kind == xmlError) result = "$1($2, $3) Error: $4" % [ my.filename, $getLine(my), $getColumn(my), errorMessages[my.err]] -proc errorMsgExpected*(my: XmlParser, tag: string): string = +proc errorMsgExpected*(my: XmlParser, tag: string): string = ## returns an error message "<tag> expected" in the same format as the - ## other error messages + ## other error messages result = "$1($2, $3) Error: $4" % [ my.filename, $getLine(my), $getColumn(my), "<$1> expected" % tag] -proc errorMsg*(my: XmlParser, msg: string): string = +proc errorMsg*(my: XmlParser, msg: string): string = ## returns an error message with text `msg` in the same format as the - ## other error messages + ## other error messages result = "$1($2, $3) Error: $4" % [ my.filename, $getLine(my), $getColumn(my), msg] - -proc markError(my: var XmlParser, kind: XmlErrorKind) {.inline.} = + +proc markError(my: var XmlParser, kind: XmlErrorKind) {.inline.} = my.err = kind my.state = stateError -proc parseCDATA(my: var XmlParser) = +proc parseCDATA(my: var XmlParser) = var pos = my.bufpos + len("<![CDATA[") - var buf = my.buf while true: - case buf[pos] + case my.buf[pos] of ']': - if buf[pos+1] == ']' and buf[pos+2] == '>': + if my.buf[pos+1] == ']' and my.buf[pos+2] == '>': inc(pos, 3) break add(my.a, ']') inc(pos) - of '\0': + of '\0': markError(my, errEndOfCDataExpected) break - of '\c': + of '\c': pos = lexbase.handleCR(my, pos) - buf = my.buf add(my.a, '\L') - of '\L': + of '\L': pos = lexbase.handleLF(my, pos) - buf = my.buf add(my.a, '\L') + of '/': + pos = lexbase.handleRefillChar(my, pos) + add(my.a, '/') else: - add(my.a, buf[pos]) - inc(pos) + add(my.a, my.buf[pos]) + inc(pos) my.bufpos = pos # store back my.kind = xmlCData -proc parseComment(my: var XmlParser) = +proc parseComment(my: var XmlParser) = var pos = my.bufpos + len("<!--") - var buf = my.buf while true: - case buf[pos] + case my.buf[pos] of '-': - if buf[pos+1] == '-' and buf[pos+2] == '>': + if my.buf[pos+1] == '-' and my.buf[pos+2] == '>': inc(pos, 3) break if my.options.contains(reportComments): add(my.a, '-') inc(pos) - of '\0': + of '\0': markError(my, errEndOfCommentExpected) break - of '\c': + of '\c': pos = lexbase.handleCR(my, pos) - buf = my.buf if my.options.contains(reportComments): add(my.a, '\L') - of '\L': + of '\L': pos = lexbase.handleLF(my, pos) - buf = my.buf if my.options.contains(reportComments): add(my.a, '\L') + of '/': + pos = lexbase.handleRefillChar(my, pos) + if my.options.contains(reportComments): add(my.a, '/') else: - if my.options.contains(reportComments): add(my.a, buf[pos]) + if my.options.contains(reportComments): add(my.a, my.buf[pos]) inc(pos) my.bufpos = pos my.kind = xmlComment -proc parseWhitespace(my: var XmlParser, skip=false) = +proc parseWhitespace(my: var XmlParser, skip = false) = var pos = my.bufpos - var buf = my.buf - while true: - case buf[pos] - of ' ', '\t': - if not skip: add(my.a, buf[pos]) + while true: + case my.buf[pos] + of ' ', '\t': + if not skip: add(my.a, my.buf[pos]) inc(pos) - of '\c': + of '\c': # the specification says that CR-LF, CR are to be transformed to LF pos = lexbase.handleCR(my, pos) - buf = my.buf if not skip: add(my.a, '\L') - of '\L': + of '\L': pos = lexbase.handleLF(my, pos) - buf = my.buf if not skip: add(my.a, '\L') else: break @@ -302,84 +424,84 @@ const NameStartChar = {'A'..'Z', 'a'..'z', '_', ':', '\128'..'\255'} NameChar = {'A'..'Z', 'a'..'z', '0'..'9', '.', '-', '_', ':', '\128'..'\255'} -proc parseName(my: var XmlParser, dest: var string) = +proc parseName(my: var XmlParser, dest: var string) = var pos = my.bufpos - var buf = my.buf - if buf[pos] in NameStartChar: + if my.buf[pos] in NameStartChar: while true: - add(dest, buf[pos]) + add(dest, my.buf[pos]) inc(pos) - if buf[pos] notin NameChar: break + if my.buf[pos] notin NameChar: break my.bufpos = pos else: markError(my, errNameExpected) -proc parseEntity(my: var XmlParser, dest: var string) = +proc parseEntity(my: var XmlParser, dest: var string) = var pos = my.bufpos+1 - var buf = my.buf my.kind = xmlCharData - if buf[pos] == '#': + if my.buf[pos] == '#': var r: int inc(pos) - if buf[pos] == 'x': + if my.buf[pos] == 'x': inc(pos) while true: - case buf[pos] - of '0'..'9': r = (r shl 4) or (ord(buf[pos]) - ord('0')) - of 'a'..'f': r = (r shl 4) or (ord(buf[pos]) - ord('a') + 10) - of 'A'..'F': r = (r shl 4) or (ord(buf[pos]) - ord('A') + 10) + case my.buf[pos] + of '0'..'9': r = (r shl 4) or (ord(my.buf[pos]) - ord('0')) + of 'a'..'f': r = (r shl 4) or (ord(my.buf[pos]) - ord('a') + 10) + of 'A'..'F': r = (r shl 4) or (ord(my.buf[pos]) - ord('A') + 10) else: break inc(pos) else: - while buf[pos] in {'0'..'9'}: - r = r * 10 + (ord(buf[pos]) - ord('0')) + while my.buf[pos] in {'0'..'9'}: + r = r * 10 + (ord(my.buf[pos]) - ord('0')) inc(pos) add(dest, toUTF8(Rune(r))) - elif buf[pos] == 'l' and buf[pos+1] == 't' and buf[pos+2] == ';': + elif my.buf[pos] == 'l' and my.buf[pos+1] == 't' and my.buf[pos+2] == ';': add(dest, '<') inc(pos, 2) - elif buf[pos] == 'g' and buf[pos+1] == 't' and buf[pos+2] == ';': + elif my.buf[pos] == 'g' and my.buf[pos+1] == 't' and my.buf[pos+2] == ';': add(dest, '>') inc(pos, 2) - elif buf[pos] == 'a' and buf[pos+1] == 'm' and buf[pos+2] == 'p' and - buf[pos+3] == ';': + elif my.buf[pos] == 'a' and my.buf[pos+1] == 'm' and my.buf[pos+2] == 'p' and + my.buf[pos+3] == ';': add(dest, '&') inc(pos, 3) - elif buf[pos] == 'a' and buf[pos+1] == 'p' and buf[pos+2] == 'o' and - buf[pos+3] == 's' and buf[pos+4] == ';': + elif my.buf[pos] == 'a' and my.buf[pos+1] == 'p' and my.buf[pos+2] == 'o' and + my.buf[pos+3] == 's' and my.buf[pos+4] == ';': add(dest, '\'') inc(pos, 4) - elif buf[pos] == 'q' and buf[pos+1] == 'u' and buf[pos+2] == 'o' and - buf[pos+3] == 't' and buf[pos+4] == ';': + elif my.buf[pos] == 'q' and my.buf[pos+1] == 'u' and my.buf[pos+2] == 'o' and + my.buf[pos+3] == 't' and my.buf[pos+4] == ';': add(dest, '"') inc(pos, 4) else: my.bufpos = pos - parseName(my, dest) + var name = "" + parseName(my, name) pos = my.bufpos - if my.err != errNameExpected: + if my.err != errNameExpected and my.buf[pos] == ';': my.kind = xmlEntity else: add(dest, '&') - if buf[pos] == ';': + add(dest, name) + if my.buf[pos] == ';': inc(pos) else: - markError(my, errSemicolonExpected) + my.err = errSemicolonExpected + # do not overwrite 'my.state' here, it's a benign error my.bufpos = pos -proc parsePI(my: var XmlParser) = +proc parsePI(my: var XmlParser) = inc(my.bufpos, "<?".len) parseName(my, my.a) var pos = my.bufpos - var buf = my.buf setLen(my.b, 0) - while true: - case buf[pos] + while true: + case my.buf[pos] of '\0': markError(my, errQmGtExpected) break of '?': - if buf[pos+1] == '>': + if my.buf[pos+1] == '>': inc(pos, 2) break add(my.b, '?') @@ -387,29 +509,29 @@ proc parsePI(my: var XmlParser) = of '\c': # the specification says that CR-LF, CR are to be transformed to LF pos = lexbase.handleCR(my, pos) - buf = my.buf add(my.b, '\L') - of '\L': + of '\L': pos = lexbase.handleLF(my, pos) - buf = my.buf add(my.b, '\L') + of '/': + pos = lexbase.handleRefillChar(my, pos) + add(my.b, '/') else: - add(my.b, buf[pos]) + add(my.b, my.buf[pos]) inc(pos) my.bufpos = pos my.kind = xmlPI -proc parseSpecial(my: var XmlParser) = +proc parseSpecial(my: var XmlParser) = # things that start with <! var pos = my.bufpos + 2 - var buf = my.buf var opentags = 0 - while true: - case buf[pos] + while true: + case my.buf[pos] of '\0': markError(my, errGtExpected) break - of '<': + of '<': inc(opentags) inc(pos) add(my.a, '<') @@ -420,190 +542,227 @@ proc parseSpecial(my: var XmlParser) = dec(opentags) inc(pos) add(my.a, '>') - of '\c': + of '\c': pos = lexbase.handleCR(my, pos) - buf = my.buf add(my.a, '\L') - of '\L': + of '\L': pos = lexbase.handleLF(my, pos) - buf = my.buf add(my.a, '\L') + of '/': + pos = lexbase.handleRefillChar(my, pos) + add(my.b, '/') else: - add(my.a, buf[pos]) + add(my.a, my.buf[pos]) inc(pos) my.bufpos = pos my.kind = xmlSpecial -proc parseTag(my: var XmlParser) = +proc parseTag(my: var XmlParser) = inc(my.bufpos) parseName(my, my.a) # if we have no name, do not interpret the '<': - if my.a.len == 0: + if my.a.len == 0: my.kind = xmlCharData add(my.a, '<') return - parseWhitespace(my, skip=true) - if my.buf[my.bufpos] in NameStartChar: + parseWhitespace(my, skip = true) + if my.buf[my.bufpos] in NameStartChar: # an attribute follows: my.kind = xmlElementOpen my.state = stateAttr my.c = my.a # save for later + my.cIsEmpty = false else: my.kind = xmlElementStart - if my.buf[my.bufpos] == '/' and my.buf[my.bufpos+1] == '>': - inc(my.bufpos, 2) + let slash = my.buf[my.bufpos] == '/' + if slash: + my.bufpos = lexbase.handleRefillChar(my, my.bufpos) + if slash and my.buf[my.bufpos] == '>': + inc(my.bufpos) my.state = stateEmptyElementTag - my.c = nil + my.c = "" + my.cIsEmpty = true elif my.buf[my.bufpos] == '>': - inc(my.bufpos) + inc(my.bufpos) else: markError(my, errGtExpected) - -proc parseEndTag(my: var XmlParser) = - inc(my.bufpos, 2) + +proc parseEndTag(my: var XmlParser) = + my.bufpos = lexbase.handleRefillChar(my, my.bufpos+1) + #inc(my.bufpos, 2) parseName(my, my.a) - parseWhitespace(my, skip=true) + parseWhitespace(my, skip = true) if my.buf[my.bufpos] == '>': inc(my.bufpos) else: markError(my, errGtExpected) my.kind = xmlElementEnd -proc parseAttribute(my: var XmlParser) = +proc parseAttribute(my: var XmlParser) = my.kind = xmlAttribute setLen(my.a, 0) setLen(my.b, 0) parseName(my, my.a) # if we have no name, we have '<tag attr= key %&$$%': - if my.a.len == 0: + if my.a.len == 0: markError(my, errGtExpected) return - parseWhitespace(my, skip=true) + + let startPos = my.bufpos + parseWhitespace(my, skip = true) if my.buf[my.bufpos] != '=': - markError(my, errEqExpected) + if allowEmptyAttribs notin my.options or + (my.buf[my.bufpos] != '>' and my.bufpos == startPos): + markError(my, errEqExpected) return + inc(my.bufpos) - parseWhitespace(my, skip=true) + parseWhitespace(my, skip = true) var pos = my.bufpos - var buf = my.buf - if buf[pos] in {'\'', '"'}: - var quote = buf[pos] + if my.buf[pos] in {'\'', '"'}: + var quote = my.buf[pos] var pendingSpace = false inc(pos) - while true: - case buf[pos] + while true: + case my.buf[pos] of '\0': markError(my, errQuoteExpected) break - of '&': - if pendingSpace: + of '&': + if pendingSpace: add(my.b, ' ') pendingSpace = false my.bufpos = pos parseEntity(my, my.b) my.kind = xmlAttribute # parseEntity overwrites my.kind! pos = my.bufpos - of ' ', '\t': + of ' ', '\t': pendingSpace = true inc(pos) - of '\c': + of '\c': pos = lexbase.handleCR(my, pos) - buf = my.buf pendingSpace = true - of '\L': + of '\L': pos = lexbase.handleLF(my, pos) - buf = my.buf pendingSpace = true + of '/': + pos = lexbase.handleRefillChar(my, pos) + add(my.b, '/') else: - if buf[pos] == quote: + if my.buf[pos] == quote: inc(pos) break else: - if pendingSpace: + if pendingSpace: add(my.b, ' ') pendingSpace = false - add(my.b, buf[pos]) + add(my.b, my.buf[pos]) inc(pos) + elif allowUnquotedAttribs in my.options: + const disallowedChars = {'"', '\'', '`', '=', '<', '>', ' ', + '\0', '\t', '\L', '\F', '\f'} + let startPos = pos + while (let c = my.buf[pos]; c notin disallowedChars): + if c == '&': + my.bufpos = pos + parseEntity(my, my.b) + my.kind = xmlAttribute # parseEntity overwrites my.kind! + pos = my.bufpos + elif c == '/': + pos = lexbase.handleRefillChar(my, pos) + add(my.b, '/') + else: + add(my.b, c) + inc(pos) + if pos == startPos: + markError(my, errAttributeValueExpected) else: - markError(my, errQuoteExpected) + markError(my, errQuoteExpected) + # error corrections: guess what was meant + while my.buf[pos] != '>' and my.buf[pos] > ' ': + add(my.b, my.buf[pos]) + inc pos my.bufpos = pos - parseWhitespace(my, skip=true) - -proc parseCharData(my: var XmlParser) = + parseWhitespace(my, skip = true) + +proc parseCharData(my: var XmlParser) = var pos = my.bufpos - var buf = my.buf - while true: - case buf[pos] + while true: + case my.buf[pos] of '\0', '<', '&': break - of '\c': + of '\c': # the specification says that CR-LF, CR are to be transformed to LF pos = lexbase.handleCR(my, pos) - buf = my.buf add(my.a, '\L') - of '\L': + of '\L': pos = lexbase.handleLF(my, pos) - buf = my.buf add(my.a, '\L') + of '/': + pos = lexbase.handleRefillChar(my, pos) + add(my.a, '/') else: - add(my.a, buf[pos]) + add(my.a, my.buf[pos]) inc(pos) my.bufpos = pos my.kind = xmlCharData -proc rawGetTok(my: var XmlParser) = +proc rawGetTok(my: var XmlParser) = my.kind = xmlError setLen(my.a, 0) var pos = my.bufpos - var buf = my.buf - case buf[pos] - of '<': - case buf[pos+1] + case my.buf[pos] + of '<': + case my.buf[pos+1] of '/': parseEndTag(my) of '!': - if buf[pos+2] == '[' and buf[pos+3] == 'C' and buf[pos+4] == 'D' and - buf[pos+5] == 'A' and buf[pos+6] == 'T' and buf[pos+7] == 'A' and - buf[pos+8] == '[': + if my.buf[pos+2] == '[' and my.buf[pos+3] == 'C' and + my.buf[pos+4] == 'D' and my.buf[pos+5] == 'A' and + my.buf[pos+6] == 'T' and my.buf[pos+7] == 'A' and + my.buf[pos+8] == '[': parseCDATA(my) - elif buf[pos+2] == '-' and buf[pos+3] == '-': + elif my.buf[pos+2] == '-' and my.buf[pos+3] == '-': parseComment(my) - else: + else: parseSpecial(my) of '?': parsePI(my) - else: + else: parseTag(my) - of ' ', '\t', '\c', '\l': + of ' ', '\t', '\c', '\l': parseWhitespace(my) my.kind = xmlWhitespace - of '\0': + of '\0': my.kind = xmlEof of '&': parseEntity(my, my.a) - else: + else: parseCharData(my) assert my.kind != xmlError - -proc getTok(my: var XmlParser) = + +proc getTok(my: var XmlParser) = while true: + let lastKind = my.kind rawGetTok(my) case my.kind - of xmlComment: + of xmlComment: if my.options.contains(reportComments): break - of xmlWhitespace: - if my.options.contains(reportWhitespace): break + of xmlWhitespace: + if my.options.contains(reportWhitespace) or lastKind in {xmlCharData, + xmlComment, xmlEntity}: + break else: break - -proc next*(my: var XmlParser) = + +proc next*(my: var XmlParser) = ## retrieves the first/next event. This controls the parser. case my.state of stateNormal: - getTok(my) + getTok(my) of stateStart: my.state = stateNormal getTok(my) - if my.kind == xmlPI and my.a == "xml": + if my.kind == xmlPI and my.a == "xml": # just skip the first ``<?xml >`` processing instruction getTok(my) of stateAttr: @@ -612,24 +771,28 @@ proc next*(my: var XmlParser) = my.kind = xmlElementClose inc(my.bufpos) my.state = stateNormal - elif my.buf[my.bufpos] == '/' and my.buf[my.bufpos+1] == '>': - my.kind = xmlElementClose - inc(my.bufpos, 2) - my.state = stateEmptyElementTag + elif my.buf[my.bufpos] == '/': + my.bufpos = lexbase.handleRefillChar(my, my.bufpos) + if my.buf[my.bufpos] == '>': + my.kind = xmlElementClose + inc(my.bufpos) + my.state = stateEmptyElementTag + else: + markError(my, errGtExpected) else: parseAttribute(my) # state remains the same of stateEmptyElementTag: my.state = stateNormal my.kind = xmlElementEnd - if not my.c.isNil: + if not my.cIsEmpty: my.a = my.c - of stateError: + of stateError: my.kind = xmlError my.state = stateNormal - -when isMainModule: - import os + +when not defined(testing) and isMainModule: + import std/os var s = newFileStream(paramStr(1), fmRead) if s == nil: quit("cannot open the file" & paramStr(1)) var x: XmlParser @@ -645,13 +808,13 @@ when isMainModule: of xmlPI: echo("<? $1 ## $2 ?>" % [x.piName, x.piRest]) of xmlElementStart: echo("<$1>" % x.elementName) of xmlElementEnd: echo("</$1>" % x.elementName) - - of xmlElementOpen: echo("<$1" % x.elementName) - of xmlAttribute: + + of xmlElementOpen: echo("<$1" % x.elementName) + of xmlAttribute: echo("Key: " & x.attrKey) echo("Value: " & x.attrValue) - - of xmlElementClose: echo(">") + + of xmlElementClose: echo(">") of xmlCData: echo("<![CDATA[$1]]>" % x.charData) of xmlEntity: @@ -659,4 +822,3 @@ when isMainModule: of xmlSpecial: echo("SPECIAL: " & x.charData) close(x) - diff --git a/lib/pure/pathnorm.nim b/lib/pure/pathnorm.nim new file mode 100644 index 000000000..4cdc02303 --- /dev/null +++ b/lib/pure/pathnorm.nim @@ -0,0 +1,121 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2018 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## OS-Path normalization. Used by `os.nim` but also +## generally useful for dealing with paths. +## +## Unstable API. + +# Yes, this uses import here, not include so that +# we don't end up exporting these symbols from pathnorm and os: +import std/private/osseps + +type + PathIter* = object + i, prev: int + notFirst: bool + +proc hasNext*(it: PathIter; x: string): bool = + it.i < x.len + +proc next*(it: var PathIter; x: string): (int, int) = + it.prev = it.i + if not it.notFirst and x[it.i] in {DirSep, AltSep}: + # absolute path: + inc it.i + else: + while it.i < x.len and x[it.i] notin {DirSep, AltSep}: inc it.i + if it.i > it.prev: + result = (it.prev, it.i-1) + elif hasNext(it, x): + result = next(it, x) + # skip all separators: + while it.i < x.len and x[it.i] in {DirSep, AltSep}: inc it.i + it.notFirst = true + +iterator dirs(x: string): (int, int) = + var it = default PathIter + while hasNext(it, x): yield next(it, x) + +proc isDot(x: string; bounds: (int, int)): bool = + bounds[1] == bounds[0] and x[bounds[0]] == '.' + +proc isDotDot(x: string; bounds: (int, int)): bool = + bounds[1] == bounds[0] + 1 and x[bounds[0]] == '.' and x[bounds[0]+1] == '.' + +proc isSlash(x: string; bounds: (int, int)): bool = + bounds[1] == bounds[0] and x[bounds[0]] in {DirSep, AltSep} + +when doslikeFileSystem: + import std/private/ntpath + +proc addNormalizePath*(x: string; result: var string; state: var int; + dirSep = DirSep) = + ## Low level proc. Undocumented. + + when doslikeFileSystem: # Add Windows drive at start without normalization + var x = x + if result == "": + let (drive, file) = splitDrive(x) + x = file + result.add drive + for c in result.mitems: + if c in {DirSep, AltSep}: + c = dirSep + + # state: 0th bit set if isAbsolute path. Other bits count + # the number of path components. + var it: PathIter + it.notFirst = (state shr 1) > 0 + if it.notFirst: + while it.i < x.len and x[it.i] in {DirSep, AltSep}: inc it.i + while hasNext(it, x): + let b = next(it, x) + if (state shr 1 == 0) and isSlash(x, b): + if result.len == 0 or result[result.len - 1] notin {DirSep, AltSep}: + result.add dirSep + state = state or 1 + elif isDotDot(x, b): + if (state shr 1) >= 1: + var d = result.len + # f/.. + # We could handle stripping trailing sep here: foo// => foo like this: + # while (d-1) > (state and 1) and result[d-1] in {DirSep, AltSep}: dec d + # but right now we instead handle it inside os.joinPath + + # strip path component: foo/bar => foo + while (d-1) > (state and 1) and result[d-1] notin {DirSep, AltSep}: + dec d + if d > 0: + setLen(result, d-1) + dec state, 2 + else: + if result.len > 0 and result[result.len - 1] notin {DirSep, AltSep}: + result.add dirSep + result.add substr(x, b[0], b[1]) + elif isDot(x, b): + discard "discard the dot" + elif b[1] >= b[0]: + if result.len > 0 and result[result.len - 1] notin {DirSep, AltSep}: + result.add dirSep + result.add substr(x, b[0], b[1]) + inc state, 2 + if result == "" and x != "": result = "." + +proc normalizePath*(path: string; dirSep = DirSep): string = + runnableExamples: + when defined(posix): + doAssert normalizePath("./foo//bar/../baz") == "foo/baz" + + ## - Turns multiple slashes into single slashes. + ## - Resolves `'/foo/../bar'` to `'/bar'`. + ## - Removes `'./'` from the path, but `"foo/.."` becomes `"."`. + result = newStringOfCap(path.len) + var state = 0 + addNormalizePath(path, result, state, dirSep) diff --git a/lib/pure/pegs.nim b/lib/pure/pegs.nim index 39f0bfa95..2969fd6d7 100644 --- a/lib/pure/pegs.nim +++ b/lib/pure/pegs.nim @@ -12,19 +12,22 @@ ## Matching performance is hopefully competitive with optimized regular ## expression engines. ## -## .. include:: ../doc/pegdocs.txt +## .. include:: ../../doc/pegdocs.txt ## include "system/inclrtl" +when defined(nimPreviewSlimSystem): + import std/[syncio, assertions] const useUnicode = true ## change this to deactivate proper UTF-8 support -import - strutils +import std/[strutils, macros] +import std/private/decode_helpers when useUnicode: - import unicode + import std/unicode + export unicode.`==` const InlineThreshold = 5 ## number of leaves; -1 to disable inlining @@ -32,102 +35,147 @@ const ## can be captured. More subpatterns cannot be captured! type - PegKind = enum + PegKind* = enum pkEmpty, - pkAny, ## any character (.) - pkAnyRune, ## any Unicode character (_) - pkNewLine, ## CR-LF, LF, CR - pkLetter, ## Unicode letter - pkLower, ## Unicode lower case letter - pkUpper, ## Unicode upper case letter - pkTitle, ## Unicode title character - pkWhitespace, ## Unicode whitespace character + pkAny, ## any character (.) + pkAnyRune, ## any Unicode character (_) + pkNewLine, ## CR-LF, LF, CR + pkLetter, ## Unicode letter + pkLower, ## Unicode lower case letter + pkUpper, ## Unicode upper case letter + pkTitle, ## Unicode title character + pkWhitespace, ## Unicode whitespace character pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle, - pkChar, ## single character to match + pkChar, ## single character to match pkCharChoice, pkNonTerminal, - pkSequence, ## a b c ... --> Internal DSL: peg(a, b, c) - pkOrderedChoice, ## a / b / ... --> Internal DSL: a / b or /[a, b, c] - pkGreedyRep, ## a* --> Internal DSL: *a - ## a+ --> (a a*) - pkGreedyRepChar, ## x* where x is a single character (superop) - pkGreedyRepSet, ## [set]* (superop) - pkGreedyAny, ## .* or _* (superop) - pkOption, ## a? --> Internal DSL: ?a - pkAndPredicate, ## &a --> Internal DSL: &a - pkNotPredicate, ## !a --> Internal DSL: !a - pkCapture, ## {a} --> Internal DSL: capture(a) - pkBackRef, ## $i --> Internal DSL: backref(i) + pkSequence, ## a b c ... --> Internal DSL: peg(a, b, c) + pkOrderedChoice, ## a / b / ... --> Internal DSL: a / b or /[a, b, c] + pkGreedyRep, ## a* --> Internal DSL: *a + ## a+ --> (a a*) + pkGreedyRepChar, ## x* where x is a single character (superop) + pkGreedyRepSet, ## [set]* (superop) + pkGreedyAny, ## .* or _* (superop) + pkOption, ## a? --> Internal DSL: ?a + pkAndPredicate, ## &a --> Internal DSL: &a + pkNotPredicate, ## !a --> Internal DSL: !a + pkCapture, ## {a} --> Internal DSL: capture(a) + pkBackRef, ## $i --> Internal DSL: backref(i) pkBackRefIgnoreCase, pkBackRefIgnoreStyle, - pkSearch, ## @a --> Internal DSL: !*a - pkCapturedSearch, ## {@} a --> Internal DSL: !*\a - pkRule, ## a <- b - pkList, ## a, b - pkStartAnchor ## ^ --> Internal DSL: startAnchor() - NonTerminalFlag = enum + pkSearch, ## @a --> Internal DSL: !*a + pkCapturedSearch, ## {@} a --> Internal DSL: !*\a + pkRule, ## a <- b + pkList, ## a, b + pkStartAnchor ## ^ --> Internal DSL: startAnchor() + NonTerminalFlag* = enum ntDeclared, ntUsed - NonTerminalObj = object ## represents a non terminal symbol - name: string ## the name of the symbol - line: int ## line the symbol has been declared/used in - col: int ## column the symbol has been declared/used in - flags: set[NonTerminalFlag] ## the nonterminal's flags - rule: TNode ## the rule that the symbol refers to - TNode {.shallow.} = object + NonTerminalObj = object ## represents a non terminal symbol + name: string ## the name of the symbol + line: int ## line the symbol has been declared/used in + col: int ## column the symbol has been declared/used in + flags: set[NonTerminalFlag] ## the nonterminal's flags + rule: Peg ## the rule that the symbol refers to + Peg* {.shallow.} = object ## type that represents a PEG case kind: PegKind of pkEmpty..pkWhitespace: nil of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle: term: string of pkChar, pkGreedyRepChar: ch: char of pkCharChoice, pkGreedyRepSet: charChoice: ref set[char] of pkNonTerminal: nt: NonTerminal - of pkBackRef..pkBackRefIgnoreStyle: index: range[0..MaxSubpatterns] - else: sons: seq[TNode] + of pkBackRef..pkBackRefIgnoreStyle: index: range[-MaxSubpatterns..MaxSubpatterns-1] + else: sons: seq[Peg] NonTerminal* = ref NonTerminalObj - Peg* = TNode ## type that represents a PEG +func kind*(p: Peg): PegKind = p.kind + ## Returns the *PegKind* of a given *Peg* object. -{.deprecated: [TPeg: Peg].} +func term*(p: Peg): string = p.term + ## Returns the *string* representation of a given *Peg* variant object + ## where present. -proc term*(t: string): Peg {.nosideEffect, rtl, extern: "npegs$1Str".} = +func ch*(p: Peg): char = p.ch + ## Returns the *char* representation of a given *Peg* variant object + ## where present. + +func charChoice*(p: Peg): ref set[char] = p.charChoice + ## Returns the *charChoice* field of a given *Peg* variant object + ## where present. + +func nt*(p: Peg): NonTerminal = p.nt + ## Returns the *NonTerminal* object of a given *Peg* variant object + ## where present. + +func index*(p: Peg): range[-MaxSubpatterns..MaxSubpatterns-1] = p.index + ## Returns the back-reference index of a captured sub-pattern in the + ## *Captures* object for a given *Peg* variant object where present. + +iterator items*(p: Peg): Peg {.inline.} = + ## Yields the child nodes of a *Peg* variant object where present. + for s in p.sons: + yield s + +iterator pairs*(p: Peg): (int, Peg) {.inline.} = + ## Yields the indices and child nodes of a *Peg* variant object where present. + for i in 0 ..< p.sons.len: + yield (i, p.sons[i]) + +func name*(nt: NonTerminal): string = nt.name + ## Gets the name of the symbol represented by the parent *Peg* object variant + ## of a given *NonTerminal*. + +func line*(nt: NonTerminal): int = nt.line + ## Gets the line number of the definition of the parent *Peg* object variant + ## of a given *NonTerminal*. + +func col*(nt: NonTerminal): int = nt.col + ## Gets the column number of the definition of the parent *Peg* object variant + ## of a given *NonTerminal*. + +func flags*(nt: NonTerminal): set[NonTerminalFlag] = nt.flags + ## Gets the *NonTerminalFlag*-typed flags field of the parent *Peg* variant + ## object of a given *NonTerminal*. + +func rule*(nt: NonTerminal): Peg = nt.rule + ## Gets the *Peg* object representing the rule definition of the parent *Peg* + ## object variant of a given *NonTerminal*. + +func term*(t: string): Peg {.rtl, extern: "npegs$1Str".} = ## constructs a PEG from a terminal string if t.len != 1: - result.kind = pkTerminal - result.term = t + result = Peg(kind: pkTerminal, term: t) else: - result.kind = pkChar - result.ch = t[0] + result = Peg(kind: pkChar, ch: t[0]) -proc termIgnoreCase*(t: string): Peg {. - nosideEffect, rtl, extern: "npegs$1".} = +func termIgnoreCase*(t: string): Peg {. + rtl, extern: "npegs$1".} = ## constructs a PEG from a terminal string; ignore case for matching - result.kind = pkTerminalIgnoreCase - result.term = t + result = Peg(kind: pkTerminalIgnoreCase, term: t) -proc termIgnoreStyle*(t: string): Peg {. - nosideEffect, rtl, extern: "npegs$1".} = +func termIgnoreStyle*(t: string): Peg {. + rtl, extern: "npegs$1".} = ## constructs a PEG from a terminal string; ignore style for matching - result.kind = pkTerminalIgnoreStyle - result.term = t + result = Peg(kind: pkTerminalIgnoreStyle, term: t) -proc term*(t: char): Peg {.nosideEffect, rtl, extern: "npegs$1Char".} = +func term*(t: char): Peg {.rtl, extern: "npegs$1Char".} = ## constructs a PEG from a terminal char assert t != '\0' - result.kind = pkChar - result.ch = t + result = Peg(kind: pkChar, ch: t) -proc charSet*(s: set[char]): Peg {.nosideEffect, rtl, extern: "npegs$1".} = +func charSet*(s: set[char]): Peg {.rtl, extern: "npegs$1".} = ## constructs a PEG from a character set `s` assert '\0' notin s - result.kind = pkCharChoice - new(result.charChoice) - result.charChoice[] = s + result = Peg(kind: pkCharChoice) + {.cast(noSideEffect).}: + new(result.charChoice) + result.charChoice[] = s -proc len(a: Peg): int {.inline.} = return a.sons.len -proc add(d: var Peg, s: Peg) {.inline.} = add(d.sons, s) +func len(a: Peg): int {.inline.} = return a.sons.len +func add(d: var Peg, s: Peg) {.inline.} = add(d.sons, s) -proc addChoice(dest: var Peg, elem: Peg) = +func addChoice(dest: var Peg, elem: Peg) = var L = dest.len-1 if L >= 0 and dest.sons[L].kind == pkCharChoice: # caution! Do not introduce false aliasing here! @@ -139,9 +187,8 @@ proc addChoice(dest: var Peg, elem: Peg) = else: add(dest, elem) else: add(dest, elem) -template multipleOp(k: PegKind, localOpt: expr) = - result.kind = k - result.sons = @[] +template multipleOp(k: PegKind, localOpt: untyped) = + result = Peg(kind: k, sons: @[]) for x in items(a): if x.kind == k: for y in items(x.sons): @@ -151,12 +198,12 @@ template multipleOp(k: PegKind, localOpt: expr) = if result.len == 1: result = result.sons[0] -proc `/`*(a: varargs[Peg]): Peg {. - nosideEffect, rtl, extern: "npegsOrderedChoice".} = +func `/`*(a: varargs[Peg]): Peg {. + rtl, extern: "npegsOrderedChoice".} = ## constructs an ordered choice with the PEGs in `a` multipleOp(pkOrderedChoice, addChoice) -proc addSequence(dest: var Peg, elem: Peg) = +func addSequence(dest: var Peg, elem: Peg) = var L = dest.len-1 if L >= 0 and dest.sons[L].kind == pkTerminal: # caution! Do not introduce false aliasing here! @@ -168,12 +215,12 @@ proc addSequence(dest: var Peg, elem: Peg) = else: add(dest, elem) else: add(dest, elem) -proc sequence*(a: varargs[Peg]): Peg {. - nosideEffect, rtl, extern: "npegs$1".} = +func sequence*(a: varargs[Peg]): Peg {. + rtl, extern: "npegs$1".} = ## constructs a sequence with all the PEGs from `a` multipleOp(pkSequence, addSequence) -proc `?`*(a: Peg): Peg {.nosideEffect, rtl, extern: "npegsOptional".} = +func `?`*(a: Peg): Peg {.rtl, extern: "npegsOptional".} = ## constructs an optional for the PEG `a` if a.kind in {pkOption, pkGreedyRep, pkGreedyAny, pkGreedyRepChar, pkGreedyRepSet}: @@ -181,125 +228,116 @@ proc `?`*(a: Peg): Peg {.nosideEffect, rtl, extern: "npegsOptional".} = # a? ? --> a? result = a else: - result.kind = pkOption - result.sons = @[a] + result = Peg(kind: pkOption, sons: @[a]) -proc `*`*(a: Peg): Peg {.nosideEffect, rtl, extern: "npegsGreedyRep".} = +func `*`*(a: Peg): Peg {.rtl, extern: "npegsGreedyRep".} = ## constructs a "greedy repetition" for the PEG `a` case a.kind of pkGreedyRep, pkGreedyRepChar, pkGreedyRepSet, pkGreedyAny, pkOption: assert false # produces endless loop! of pkChar: - result.kind = pkGreedyRepChar - result.ch = a.ch + result = Peg(kind: pkGreedyRepChar, ch: a.ch) of pkCharChoice: - result.kind = pkGreedyRepSet - result.charChoice = a.charChoice # copying a reference suffices! + result = Peg(kind: pkGreedyRepSet, charChoice: a.charChoice) of pkAny, pkAnyRune: - result.kind = pkGreedyAny + result = Peg(kind: pkGreedyAny) else: - result.kind = pkGreedyRep - result.sons = @[a] + result = Peg(kind: pkGreedyRep, sons: @[a]) -proc `!*`*(a: Peg): Peg {.nosideEffect, rtl, extern: "npegsSearch".} = +func `!*`*(a: Peg): Peg {.rtl, extern: "npegsSearch".} = ## constructs a "search" for the PEG `a` - result.kind = pkSearch - result.sons = @[a] + result = Peg(kind: pkSearch, sons: @[a]) -proc `!*\`*(a: Peg): Peg {.noSideEffect, rtl, +func `!*\`*(a: Peg): Peg {.rtl, extern: "npgegsCapturedSearch".} = ## constructs a "captured search" for the PEG `a` - result.kind = pkCapturedSearch - result.sons = @[a] + result = Peg(kind: pkCapturedSearch, sons: @[a]) -proc `+`*(a: Peg): Peg {.nosideEffect, rtl, extern: "npegsGreedyPosRep".} = +func `+`*(a: Peg): Peg {.rtl, extern: "npegsGreedyPosRep".} = ## constructs a "greedy positive repetition" with the PEG `a` return sequence(a, *a) -proc `&`*(a: Peg): Peg {.nosideEffect, rtl, extern: "npegsAndPredicate".} = +func `&`*(a: Peg): Peg {.rtl, extern: "npegsAndPredicate".} = ## constructs an "and predicate" with the PEG `a` - result.kind = pkAndPredicate - result.sons = @[a] + result = Peg(kind: pkAndPredicate, sons: @[a]) -proc `!`*(a: Peg): Peg {.nosideEffect, rtl, extern: "npegsNotPredicate".} = +func `!`*(a: Peg): Peg {.rtl, extern: "npegsNotPredicate".} = ## constructs a "not predicate" with the PEG `a` - result.kind = pkNotPredicate - result.sons = @[a] + result = Peg(kind: pkNotPredicate, sons: @[a]) -proc any*: Peg {.inline.} = +func any*: Peg {.inline.} = ## constructs the PEG `any character`:idx: (``.``) - result.kind = pkAny + result = Peg(kind: pkAny) -proc anyRune*: Peg {.inline.} = +func anyRune*: Peg {.inline.} = ## constructs the PEG `any rune`:idx: (``_``) - result.kind = pkAnyRune + result = Peg(kind: pkAnyRune) -proc newLine*: Peg {.inline.} = +func newLine*: Peg {.inline.} = ## constructs the PEG `newline`:idx: (``\n``) - result.kind = pkNewLine + result = Peg(kind: pkNewLine) -proc unicodeLetter*: Peg {.inline.} = +func unicodeLetter*: Peg {.inline.} = ## constructs the PEG ``\letter`` which matches any Unicode letter. - result.kind = pkLetter + result = Peg(kind: pkLetter) -proc unicodeLower*: Peg {.inline.} = +func unicodeLower*: Peg {.inline.} = ## constructs the PEG ``\lower`` which matches any Unicode lowercase letter. - result.kind = pkLower + result = Peg(kind: pkLower) -proc unicodeUpper*: Peg {.inline.} = +func unicodeUpper*: Peg {.inline.} = ## constructs the PEG ``\upper`` which matches any Unicode uppercase letter. - result.kind = pkUpper + result = Peg(kind: pkUpper) -proc unicodeTitle*: Peg {.inline.} = +func unicodeTitle*: Peg {.inline.} = ## constructs the PEG ``\title`` which matches any Unicode title letter. - result.kind = pkTitle + result = Peg(kind: pkTitle) -proc unicodeWhitespace*: Peg {.inline.} = +func unicodeWhitespace*: Peg {.inline.} = ## constructs the PEG ``\white`` which matches any Unicode ## whitespace character. - result.kind = pkWhitespace + result = Peg(kind: pkWhitespace) -proc startAnchor*: Peg {.inline.} = +func startAnchor*: Peg {.inline.} = ## constructs the PEG ``^`` which matches the start of the input. - result.kind = pkStartAnchor + result = Peg(kind: pkStartAnchor) -proc endAnchor*: Peg {.inline.} = +func endAnchor*: Peg {.inline.} = ## constructs the PEG ``$`` which matches the end of the input. result = !any() -proc capture*(a: Peg): Peg {.nosideEffect, rtl, extern: "npegsCapture".} = +func capture*(a: Peg = Peg(kind: pkEmpty)): Peg {.rtl, extern: "npegsCapture".} = ## constructs a capture with the PEG `a` - result.kind = pkCapture - result.sons = @[a] + result = Peg(kind: pkCapture, sons: @[a]) -proc backref*(index: range[1..MaxSubpatterns]): Peg {. - nosideEffect, rtl, extern: "npegs$1".} = +func backref*(index: range[1..MaxSubpatterns], reverse: bool = false): Peg {. + rtl, extern: "npegs$1".} = ## constructs a back reference of the given `index`. `index` starts counting - ## from 1. - result.kind = pkBackRef - result.index = index-1 + ## from 1. `reverse` specifies whether indexing starts from the end of the + ## capture list. + result = Peg(kind: pkBackRef, index: (if reverse: -index else: index - 1)) -proc backrefIgnoreCase*(index: range[1..MaxSubpatterns]): Peg {. - nosideEffect, rtl, extern: "npegs$1".} = +func backrefIgnoreCase*(index: range[1..MaxSubpatterns], reverse: bool = false): Peg {. + rtl, extern: "npegs$1".} = ## constructs a back reference of the given `index`. `index` starts counting - ## from 1. Ignores case for matching. - result.kind = pkBackRefIgnoreCase - result.index = index-1 + ## from 1. `reverse` specifies whether indexing starts from the end of the + ## capture list. Ignores case for matching. + result = Peg(kind: pkBackRefIgnoreCase, index: (if reverse: -index else: index - 1)) -proc backrefIgnoreStyle*(index: range[1..MaxSubpatterns]): Peg {. - nosideEffect, rtl, extern: "npegs$1".}= +func backrefIgnoreStyle*(index: range[1..MaxSubpatterns], reverse: bool = false): Peg {. + rtl, extern: "npegs$1".} = ## constructs a back reference of the given `index`. `index` starts counting - ## from 1. Ignores style for matching. - result.kind = pkBackRefIgnoreStyle - result.index = index-1 + ## from 1. `reverse` specifies whether indexing starts from the end of the + ## capture list. Ignores style for matching. + result = Peg(kind: pkBackRefIgnoreStyle, index: (if reverse: -index else: index - 1)) -proc spaceCost(n: Peg): int = +func spaceCost(n: Peg): int = case n.kind of pkEmpty: discard of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle, pkChar, pkGreedyRepChar, pkCharChoice, pkGreedyRepSet, - pkAny..pkWhitespace, pkGreedyAny: + pkAny..pkWhitespace, pkGreedyAny, pkBackRef..pkBackRefIgnoreStyle: result = 1 of pkNonTerminal: # we cannot inline a rule with a non-terminal @@ -309,57 +347,53 @@ proc spaceCost(n: Peg): int = inc(result, spaceCost(n.sons[i])) if result >= InlineThreshold: break -proc nonterminal*(n: NonTerminal): Peg {. - nosideEffect, rtl, extern: "npegs$1".} = +func nonterminal*(n: NonTerminal): Peg {. + rtl, extern: "npegs$1".} = ## constructs a PEG that consists of the nonterminal symbol assert n != nil if ntDeclared in n.flags and spaceCost(n.rule) < InlineThreshold: when false: echo "inlining symbol: ", n.name result = n.rule # inlining of rule enables better optimizations else: - result.kind = pkNonTerminal - result.nt = n + result = Peg(kind: pkNonTerminal, nt: n) -proc newNonTerminal*(name: string, line, column: int): NonTerminal {. - nosideEffect, rtl, extern: "npegs$1".} = +func newNonTerminal*(name: string, line, column: int): NonTerminal {. + rtl, extern: "npegs$1".} = ## constructs a nonterminal symbol - new(result) - result.name = name - result.line = line - result.col = column + result = NonTerminal(name: name, line: line, col: column) -template letters*: expr = +template letters*: Peg = ## expands to ``charset({'A'..'Z', 'a'..'z'})`` charSet({'A'..'Z', 'a'..'z'}) -template digits*: expr = +template digits*: Peg = ## expands to ``charset({'0'..'9'})`` charSet({'0'..'9'}) -template whitespace*: expr = +template whitespace*: Peg = ## expands to ``charset({' ', '\9'..'\13'})`` charSet({' ', '\9'..'\13'}) -template identChars*: expr = +template identChars*: Peg = ## expands to ``charset({'a'..'z', 'A'..'Z', '0'..'9', '_'})`` charSet({'a'..'z', 'A'..'Z', '0'..'9', '_'}) -template identStartChars*: expr = +template identStartChars*: Peg = ## expands to ``charset({'A'..'Z', 'a'..'z', '_'})`` charSet({'a'..'z', 'A'..'Z', '_'}) -template ident*: expr = +template ident*: Peg = ## same as ``[a-zA-Z_][a-zA-z_0-9]*``; standard identifier sequence(charSet({'a'..'z', 'A'..'Z', '_'}), *charSet({'a'..'z', 'A'..'Z', '0'..'9', '_'})) -template natural*: expr = +template natural*: Peg = ## same as ``\d+`` +digits # ------------------------- debugging ----------------------------------------- -proc esc(c: char, reserved = {'\0'..'\255'}): string = +func esc(c: char, reserved = {'\0'..'\255'}): string = case c of '\b': result = "\\b" of '\t': result = "\\t" @@ -371,18 +405,18 @@ proc esc(c: char, reserved = {'\0'..'\255'}): string = of '\a': result = "\\a" of '\\': result = "\\\\" of 'a'..'z', 'A'..'Z', '0'..'9', '_': result = $c - elif c < ' ' or c >= '\128': result = '\\' & $ord(c) + elif c < ' ' or c >= '\127': result = '\\' & $ord(c) elif c in reserved: result = '\\' & c else: result = $c -proc singleQuoteEsc(c: char): string = return "'" & esc(c, {'\''}) & "'" +func singleQuoteEsc(c: char): string = return "'" & esc(c, {'\''}) & "'" -proc singleQuoteEsc(str: string): string = +func singleQuoteEsc(str: string): string = result = "'" for c in items(str): add result, esc(c, {'\''}) add result, '\'' -proc charSetEscAux(cc: set[char]): string = +func charSetEscAux(cc: set[char]): string = const reserved = {'^', '-', ']'} result = "" var c1 = 0 @@ -399,13 +433,13 @@ proc charSetEscAux(cc: set[char]): string = c1 = c2 inc(c1) -proc charSetEsc(cc: set[char]): string = +func charSetEsc(cc: set[char]): string = if card(cc) >= 128+64: result = "[^" & charSetEscAux({'\1'..'\xFF'} - cc) & ']' else: result = '[' & charSetEscAux(cc) & ']' -proc toStrAux(r: Peg, res: var string) = +func toStrAux(r: Peg, res: var string) = case r.kind of pkEmpty: add(res, "()") of pkAny: add(res, '.') @@ -491,7 +525,7 @@ proc toStrAux(r: Peg, res: var string) = of pkStartAnchor: add(res, '^') -proc `$` *(r: Peg): string {.nosideEffect, rtl, extern: "npegsToString".} = +func `$` *(r: Peg): string {.rtl, extern: "npegsToString".} = ## converts a PEG to its string representation result = "" toStrAux(r, result) @@ -504,9 +538,7 @@ type ml: int origStart: int -{.deprecated: [TCaptures: Captures].} - -proc bounds*(c: Captures, +func bounds*(c: Captures, i: range[0..MaxSubpatterns-1]): tuple[first, last: int] = ## returns the bounds ``[first..last]`` of the `i`'th capture. result = c.matches[i] @@ -514,238 +546,543 @@ proc bounds*(c: Captures, when not useUnicode: type Rune = char - template fastRuneAt(s, i, ch: expr) = + template fastRuneAt(s, i, ch) = ch = s[i] inc(i) - template runeLenAt(s, i: expr): expr = 1 - - proc isAlpha(a: char): bool {.inline.} = return a in {'a'..'z','A'..'Z'} - proc isUpper(a: char): bool {.inline.} = return a in {'A'..'Z'} - proc isLower(a: char): bool {.inline.} = return a in {'a'..'z'} - proc isTitle(a: char): bool {.inline.} = return false - proc isWhiteSpace(a: char): bool {.inline.} = return a in {' ', '\9'..'\13'} - -proc rawMatch*(s: string, p: Peg, start: int, c: var Captures): int {. - nosideEffect, rtl, extern: "npegs$1".} = - ## low-level matching proc that implements the PEG interpreter. Use this - ## for maximum efficiency (every other PEG operation ends up calling this - ## proc). - ## Returns -1 if it does not match, else the length of the match - case p.kind - of pkEmpty: result = 0 # match of length 0 - of pkAny: - if s[start] != '\0': result = 1 - else: result = -1 - of pkAnyRune: - if s[start] != '\0': - result = runeLenAt(s, start) - else: - result = -1 - of pkLetter: - if s[start] != '\0': - var a: Rune - result = start - fastRuneAt(s, result, a) - if isAlpha(a): dec(result, start) + template runeLenAt(s, i): untyped = 1 + + func isAlpha(a: char): bool {.inline.} = return a in {'a'..'z', 'A'..'Z'} + func isUpper(a: char): bool {.inline.} = return a in {'A'..'Z'} + func isLower(a: char): bool {.inline.} = return a in {'a'..'z'} + func isTitle(a: char): bool {.inline.} = return false + func isWhiteSpace(a: char): bool {.inline.} = return a in {' ', '\9'..'\13'} + +template matchOrParse(mopProc: untyped) = + # Used to make the main matcher proc *rawMatch* as well as event parser + # procs. For the former, *enter* and *leave* event handler code generators + # are provided which just return *discard*. + + proc mopProc(s: string, p: Peg, start: int, c: var Captures): int {.gcsafe, raises: [].} = + proc matchBackRef(s: string, p: Peg, start: int, c: var Captures): int = + # Parse handler code must run in an *of* clause of its own for each + # *PegKind*, so we encapsulate the identical clause body for + # *pkBackRef..pkBackRefIgnoreStyle* here. + var index = p.index + if index < 0: index.inc(c.ml) + if index < 0 or index >= c.ml: return -1 + var (a, b) = c.matches[index] + var n: Peg + case p.kind + of pkBackRef: + n = Peg(kind: pkTerminal, term: s.substr(a, b)) + of pkBackRefIgnoreStyle: + n = Peg(kind: pkTerminalIgnoreStyle, term: s.substr(a, b)) + of pkBackRefIgnoreCase: + n = Peg(kind: pkTerminalIgnoreCase, term: s.substr(a, b)) + else: assert(false, "impossible case") + mopProc(s, n, start, c) + + case p.kind + of pkEmpty: + enter(pkEmpty, s, p, start) + result = 0 # match of length 0 + leave(pkEmpty, s, p, start, result) + of pkAny: + enter(pkAny, s, p, start) + if start < s.len: result = 1 else: result = -1 - else: - result = -1 - of pkLower: - if s[start] != '\0': - var a: Rune - result = start - fastRuneAt(s, result, a) - if isLower(a): dec(result, start) + leave(pkAny, s, p, start, result) + of pkAnyRune: + enter(pkAnyRune, s, p, start) + if start < s.len: + result = runeLenAt(s, start) + else: + result = -1 + leave(pkAnyRune, s, p, start, result) + of pkLetter: + enter(pkLetter, s, p, start) + if start < s.len: + var a: Rune + result = start + fastRuneAt(s, result, a) + if isAlpha(a): dec(result, start) + else: result = -1 + else: + result = -1 + leave(pkLetter, s, p, start, result) + of pkLower: + enter(pkLower, s, p, start) + if start < s.len: + var a: Rune + result = start + fastRuneAt(s, result, a) + if isLower(a): dec(result, start) + else: result = -1 + else: + result = -1 + leave(pkLower, s, p, start, result) + of pkUpper: + enter(pkUpper, s, p, start) + if start < s.len: + var a: Rune + result = start + fastRuneAt(s, result, a) + if isUpper(a): dec(result, start) + else: result = -1 + else: + result = -1 + leave(pkUpper, s, p, start, result) + of pkTitle: + enter(pkTitle, s, p, start) + if start < s.len: + var a: Rune + result = start + fastRuneAt(s, result, a) + if isTitle(a): dec(result, start) + else: result = -1 + else: + result = -1 + leave(pkTitle, s, p, start, result) + of pkWhitespace: + enter(pkWhitespace, s, p, start) + if start < s.len: + var a: Rune + result = start + fastRuneAt(s, result, a) + if isWhiteSpace(a): dec(result, start) + else: result = -1 + else: + result = -1 + leave(pkWhitespace, s, p, start, result) + of pkGreedyAny: + enter(pkGreedyAny, s, p, start) + result = len(s) - start + leave(pkGreedyAny, s, p, start, result) + of pkNewLine: + enter(pkNewLine, s, p, start) + if start < s.len and s[start] == '\L': result = 1 + elif start < s.len and s[start] == '\C': + if start+1 < s.len and s[start+1] == '\L': result = 2 + else: result = 1 else: result = -1 - else: - result = -1 - of pkUpper: - if s[start] != '\0': - var a: Rune + leave(pkNewLine, s, p, start, result) + of pkTerminal: + enter(pkTerminal, s, p, start) + result = len(p.term) + for i in 0..result-1: + if start+i >= s.len or p.term[i] != s[start+i]: + result = -1 + break + leave(pkTerminal, s, p, start, result) + of pkTerminalIgnoreCase: + enter(pkTerminalIgnoreCase, s, p, start) + var + i = 0 + a, b: Rune result = start - fastRuneAt(s, result, a) - if isUpper(a): dec(result, start) - else: result = -1 - else: - result = -1 - of pkTitle: - if s[start] != '\0': - var a: Rune + while i < len(p.term): + if result >= s.len: + result = -1 + break + fastRuneAt(p.term, i, a) + fastRuneAt(s, result, b) + if toLower(a) != toLower(b): + result = -1 + break + dec(result, start) + leave(pkTerminalIgnoreCase, s, p, start, result) + of pkTerminalIgnoreStyle: + enter(pkTerminalIgnoreStyle, s, p, start) + var + i = 0 + a, b: Rune result = start - fastRuneAt(s, result, a) - if isTitle(a): dec(result, start) + while i < len(p.term): + while i < len(p.term): + fastRuneAt(p.term, i, a) + if a != Rune('_'): break + while result < s.len: + fastRuneAt(s, result, b) + if b != Rune('_'): break + if result >= s.len: + if i >= p.term.len: break + else: + result = -1 + break + elif toLower(a) != toLower(b): + result = -1 + break + dec(result, start) + leave(pkTerminalIgnoreStyle, s, p, start, result) + of pkChar: + enter(pkChar, s, p, start) + if start < s.len and p.ch == s[start]: result = 1 else: result = -1 - else: - result = -1 - of pkWhitespace: - if s[start] != '\0': - var a: Rune - result = start - fastRuneAt(s, result, a) - if isWhiteSpace(a): dec(result, start) + leave(pkChar, s, p, start, result) + of pkCharChoice: + enter(pkCharChoice, s, p, start) + if start < s.len and contains(p.charChoice[], s[start]): result = 1 else: result = -1 - else: - result = -1 - of pkGreedyAny: - result = len(s) - start - of pkNewLine: - if s[start] == '\L': result = 1 - elif s[start] == '\C': - if s[start+1] == '\L': result = 2 - else: result = 1 - else: result = -1 - of pkTerminal: - result = len(p.term) - for i in 0..result-1: - if p.term[i] != s[start+i]: - result = -1 - break - of pkTerminalIgnoreCase: - var - i = 0 - a, b: Rune - result = start - while i < len(p.term): - fastRuneAt(p.term, i, a) - fastRuneAt(s, result, b) - if toLower(a) != toLower(b): - result = -1 - break - dec(result, start) - of pkTerminalIgnoreStyle: - var - i = 0 - a, b: Rune - result = start - while i < len(p.term): - while true: - fastRuneAt(p.term, i, a) - if a != Rune('_'): break - while true: - fastRuneAt(s, result, b) - if b != Rune('_'): break - if toLower(a) != toLower(b): - result = -1 - break - dec(result, start) - of pkChar: - if p.ch == s[start]: result = 1 - else: result = -1 - of pkCharChoice: - if contains(p.charChoice[], s[start]): result = 1 - else: result = -1 - of pkNonTerminal: - var oldMl = c.ml - when false: echo "enter: ", p.nt.name - result = rawMatch(s, p.nt.rule, start, c) - when false: echo "leave: ", p.nt.name - if result < 0: c.ml = oldMl - of pkSequence: - var oldMl = c.ml - result = 0 - for i in 0..high(p.sons): - var x = rawMatch(s, p.sons[i], start+result, c) - if x < 0: + leave(pkCharChoice, s, p, start, result) + of pkNonTerminal: + enter(pkNonTerminal, s, p, start) + var oldMl = c.ml + when false: echo "enter: ", p.nt.name + result = mopProc(s, p.nt.rule, start, c) + when false: echo "leave: ", p.nt.name + if result < 0: c.ml = oldMl + leave(pkNonTerminal, s, p, start, result) + of pkSequence: + enter(pkSequence, s, p, start) + var oldMl = c.ml + result = 0 + for i in 0..high(p.sons): + var x = mopProc(s, p.sons[i], start+result, c) + if x < 0: + c.ml = oldMl + result = -1 + break + else: inc(result, x) + leave(pkSequence, s, p, start, result) + of pkOrderedChoice: + enter(pkOrderedChoice, s, p, start) + var oldMl = c.ml + for i in 0..high(p.sons): + result = mopProc(s, p.sons[i], start, c) + if result >= 0: break c.ml = oldMl - result = -1 - break - else: inc(result, x) - of pkOrderedChoice: - var oldMl = c.ml - for i in 0..high(p.sons): - result = rawMatch(s, p.sons[i], start, c) - if result >= 0: break - c.ml = oldMl - of pkSearch: - var oldMl = c.ml - result = 0 - while start+result < s.len: - var x = rawMatch(s, p.sons[0], start+result, c) - if x >= 0: - inc(result, x) - return - inc(result) - result = -1 - c.ml = oldMl - of pkCapturedSearch: - var idx = c.ml # reserve a slot for the subpattern - inc(c.ml) - result = 0 - while start+result < s.len: - var x = rawMatch(s, p.sons[0], start+result, c) - if x >= 0: - if idx < MaxSubpatterns: - c.matches[idx] = (start, start+result-1) - #else: silently ignore the capture - inc(result, x) - return - inc(result) - result = -1 - c.ml = idx - of pkGreedyRep: - result = 0 - while true: - var x = rawMatch(s, p.sons[0], start+result, c) - # if x == 0, we have an endless loop; so the correct behaviour would be - # not to break. But endless loops can be easily introduced: - # ``(comment / \w*)*`` is such an example. Breaking for x == 0 does the - # expected thing in this case. - if x <= 0: break - inc(result, x) - of pkGreedyRepChar: - result = 0 - var ch = p.ch - while ch == s[start+result]: inc(result) - of pkGreedyRepSet: - result = 0 - while contains(p.charChoice[], s[start+result]): inc(result) - of pkOption: - result = max(0, rawMatch(s, p.sons[0], start, c)) - of pkAndPredicate: - var oldMl = c.ml - result = rawMatch(s, p.sons[0], start, c) - if result >= 0: result = 0 # do not consume anything - else: c.ml = oldMl - of pkNotPredicate: - var oldMl = c.ml - result = rawMatch(s, p.sons[0], start, c) - if result < 0: result = 0 - else: + leave(pkOrderedChoice, s, p, start, result) + of pkSearch: + enter(pkSearch, s, p, start) + var oldMl = c.ml + result = 0 + while start+result <= s.len: + var x = mopProc(s, p.sons[0], start+result, c) + if x >= 0: + inc(result, x) + leave(pkSearch, s, p, start, result) + return + inc(result) + result = -1 c.ml = oldMl + leave(pkSearch, s, p, start, result) + of pkCapturedSearch: + enter(pkCapturedSearch, s, p, start) + var idx = c.ml # reserve a slot for the subpattern + inc(c.ml) + result = 0 + while start+result <= s.len: + var x = mopProc(s, p.sons[0], start+result, c) + if x >= 0: + if idx < MaxSubpatterns: + c.matches[idx] = (start, start+result-1) + #else: silently ignore the capture + inc(result, x) + leave(pkCapturedSearch, s, p, start, result) + return + inc(result) result = -1 - of pkCapture: - var idx = c.ml # reserve a slot for the subpattern - inc(c.ml) - result = rawMatch(s, p.sons[0], start, c) - if result >= 0: - if idx < MaxSubpatterns: - c.matches[idx] = (start, start+result-1) - #else: silently ignore the capture - else: c.ml = idx - of pkBackRef..pkBackRefIgnoreStyle: - if p.index >= c.ml: return -1 - var (a, b) = c.matches[p.index] - var n: Peg - n.kind = succ(pkTerminal, ord(p.kind)-ord(pkBackRef)) - n.term = s.substr(a, b) - result = rawMatch(s, n, start, c) - of pkStartAnchor: - if c.origStart == start: result = 0 - else: result = -1 - of pkRule, pkList: assert false + leave(pkCapturedSearch, s, p, start, result) + of pkGreedyRep: + enter(pkGreedyRep, s, p, start) + result = 0 + while true: + var x = mopProc(s, p.sons[0], start+result, c) + # if x == 0, we have an endless loop; so the correct behaviour would be + # not to break. But endless loops can be easily introduced: + # ``(comment / \w*)*`` is such an example. Breaking for x == 0 does the + # expected thing in this case. + if x <= 0: break + inc(result, x) + leave(pkGreedyRep, s, p, start, result) + of pkGreedyRepChar: + enter(pkGreedyRepChar, s, p, start) + result = 0 + var ch = p.ch + while start+result < s.len and ch == s[start+result]: inc(result) + leave(pkGreedyRepChar, s, p, start, result) + of pkGreedyRepSet: + enter(pkGreedyRepSet, s, p, start) + result = 0 + while start+result < s.len and contains(p.charChoice[], s[start+result]): + inc(result) + leave(pkGreedyRepSet, s, p, start, result) + of pkOption: + enter(pkOption, s, p, start) + result = max(0, mopProc(s, p.sons[0], start, c)) + leave(pkOption, s, p, start, result) + of pkAndPredicate: + enter(pkAndPredicate, s, p, start) + var oldMl = c.ml + result = mopProc(s, p.sons[0], start, c) + if result >= 0: result = 0 # do not consume anything + else: c.ml = oldMl + leave(pkAndPredicate, s, p, start, result) + of pkNotPredicate: + enter(pkNotPredicate, s, p, start) + var oldMl = c.ml + result = mopProc(s, p.sons[0], start, c) + if result < 0: result = 0 + else: + c.ml = oldMl + result = -1 + leave(pkNotPredicate, s, p, start, result) + of pkCapture: + enter(pkCapture, s, p, start) + if p.sons.len == 0 or p.sons[0].kind == pkEmpty: + # empty capture removes last match + dec(c.ml) + c.matches[c.ml] = (0, 0) + result = 0 # match of length 0 + else: + var idx = c.ml # reserve a slot for the subpattern + result = mopProc(s, p.sons[0], start, c) + if result >= 0: + if idx < MaxSubpatterns: + if idx != c.ml: + for i in countdown(c.ml, idx): + c.matches[i+1] = c.matches[i] + c.matches[idx] = (start, start+result-1) + #else: silently ignore the capture + inc(c.ml) + leave(pkCapture, s, p, start, result) + of pkBackRef: + enter(pkBackRef, s, p, start) + result = matchBackRef(s, p, start, c) + leave(pkBackRef, s, p, start, result) + of pkBackRefIgnoreCase: + enter(pkBackRefIgnoreCase, s, p, start) + result = matchBackRef(s, p, start, c) + leave(pkBackRefIgnoreCase, s, p, start, result) + of pkBackRefIgnoreStyle: + enter(pkBackRefIgnoreStyle, s, p, start) + result = matchBackRef(s, p, start, c) + leave(pkBackRefIgnoreStyle, s, p, start, result) + of pkStartAnchor: + enter(pkStartAnchor, s, p, start) + if c.origStart == start: result = 0 + else: result = -1 + leave(pkStartAnchor, s, p, start, result) + of pkRule, pkList: assert false + +func rawMatch*(s: string, p: Peg, start: int, c: var Captures): int + {.rtl, extern: "npegs$1".} = + ## low-level matching proc that implements the PEG interpreter. Use this + ## for maximum efficiency (every other PEG operation ends up calling this + ## proc). + ## Returns -1 if it does not match, else the length of the match + + # Set the handler generators to produce do-nothing handlers. + template enter(pk, s, p, start) = + discard + template leave(pk, s, p, start, length) = + discard + matchOrParse(matchIt) + {.cast(noSideEffect).}: + # This cast is allowed because the `matchOrParse` template is used for + # both matching and parsing, but side effects are only possible when it's + # used by `eventParser`. + result = matchIt(s, p, start, c) + +macro mkHandlerTplts(handlers: untyped): untyped = + # Transforms the handler spec in *handlers* into handler templates. + # The AST structure of *handlers[0]*: + # + # ``` + # StmtList + # Call + # Ident "pkNonTerminal" + # StmtList + # Call + # Ident "enter" + # StmtList + # <handler code block> + # Call + # Ident "leave" + # StmtList + # <handler code block> + # Call + # Ident "pkChar" + # StmtList + # Call + # Ident "leave" + # StmtList + # <handler code block> + # ... + # ``` + func mkEnter(hdName, body: NimNode): NimNode = + template helper(hdName, body) {.dirty.} = + template hdName(s, p, start) = + let s {.inject.} = s + let p {.inject.} = p + let start {.inject.} = start + body + result = getAst(helper(hdName, body)) + + template mkLeave(hdPostf, body) {.dirty.} = + # this has to be dirty to be able to capture *result* as *length* in + # *leaveXX* calls. + template `leave hdPostf`(s, p, start, length) = + body + + result = newStmtList() + for topCall in handlers[0]: + if topCall.kind notin nnkCallKinds: + error("Call syntax expected.", topCall) + let pegKind = topCall[0] + if pegKind.kind notin {nnkIdent, nnkSym}: + error("PegKind expected.", pegKind) + if 2 == topCall.len: + for hdDef in topCall[1]: + if hdDef.kind notin nnkCallKinds: + error("Call syntax expected.", hdDef) + if hdDef[0].kind notin {nnkIdent, nnkSym}: + error("Handler identifier expected.", hdDef[0]) + if 2 == hdDef.len: + let hdPostf = substr(pegKind.strVal, 2) + case hdDef[0].strVal + of "enter": + result.add mkEnter(newIdentNode("enter" & hdPostf), hdDef[1]) + of "leave": + result.add getAst(mkLeave(ident(hdPostf), hdDef[1])) + else: + error( + "Unsupported handler identifier, expected 'enter' or 'leave'.", + hdDef[0] + ) + +template eventParser*(pegAst, handlers: untyped): (proc(s: string): int) = + ## Generates an interpreting event parser *proc* according to the specified + ## PEG AST and handler code blocks. The *proc* can be called with a string + ## to be parsed and will execute the handler code blocks whenever their + ## associated grammar element is matched. It returns -1 if the string does not + ## match, else the length of the total match. The following example code + ## evaluates an arithmetic expression defined by a simple PEG: + ## + ## ```nim + ## import std/[strutils, pegs] + ## + ## let + ## pegAst = """ + ## Expr <- Sum + ## Sum <- Product (('+' / '-')Product)* + ## Product <- Value (('*' / '/')Value)* + ## Value <- [0-9]+ / '(' Expr ')' + ## """.peg + ## txt = "(5+3)/2-7*22" + ## + ## var + ## pStack: seq[string] = @[] + ## valStack: seq[float] = @[] + ## opStack = "" + ## let + ## parseArithExpr = pegAst.eventParser: + ## pkNonTerminal: + ## enter: + ## pStack.add p.nt.name + ## leave: + ## pStack.setLen pStack.high + ## if length > 0: + ## let matchStr = s.substr(start, start+length-1) + ## case p.nt.name + ## of "Value": + ## try: + ## valStack.add matchStr.parseFloat + ## echo valStack + ## except ValueError: + ## discard + ## of "Sum", "Product": + ## try: + ## let val = matchStr.parseFloat + ## except ValueError: + ## if valStack.len > 1 and opStack.len > 0: + ## valStack[^2] = case opStack[^1] + ## of '+': valStack[^2] + valStack[^1] + ## of '-': valStack[^2] - valStack[^1] + ## of '*': valStack[^2] * valStack[^1] + ## else: valStack[^2] / valStack[^1] + ## valStack.setLen valStack.high + ## echo valStack + ## opStack.setLen opStack.high + ## echo opStack + ## pkChar: + ## leave: + ## if length == 1 and "Value" != pStack[^1]: + ## let matchChar = s[start] + ## opStack.add matchChar + ## echo opStack + ## + ## let pLen = parseArithExpr(txt) + ## ``` + ## + ## The *handlers* parameter consists of code blocks for *PegKinds*, + ## which define the grammar elements of interest. Each block can contain + ## handler code to be executed when the parser enters and leaves text + ## matching the grammar element. An *enter* handler can access the specific + ## PEG AST node being matched as *p*, the entire parsed string as *s* + ## and the position of the matched text segment in *s* as *start*. A *leave* + ## handler can access *p*, *s*, *start* and also the length of the matched + ## text segment as *length*. For an unsuccessful match, the *enter* and + ## *leave* handlers will be executed, with *length* set to -1. + ## + ## Symbols declared in an *enter* handler can be made visible in the + ## corresponding *leave* handler by annotating them with an *inject* pragma. + proc rawParse(s: string, p: Peg, start: int, c: var Captures): int + {.gensym.} = + + # binding from *macros* + bind strVal + + mkHandlerTplts: + handlers + + macro enter(pegKind, s, pegNode, start: untyped): untyped = + # This is called by the matcher code in *matchOrParse* at the + # start of the code for a grammar element of kind *pegKind*. + # Expands to a call to the handler template if one was generated + # by *mkHandlerTplts*. + template mkDoEnter(hdPostf, s, pegNode, start) = + when declared(`enter hdPostf`): + `enter hdPostf`(s, pegNode, start) + else: + discard + let hdPostf = ident(substr($pegKind, 2)) + getAst(mkDoEnter(hdPostf, s, pegNode, start)) + + macro leave(pegKind, s, pegNode, start, length: untyped): untyped = + # Like *enter*, but called at the end of the matcher code for + # a grammar element of kind *pegKind*. + template mkDoLeave(hdPostf, s, pegNode, start, length) = + when declared(`leave hdPostf`): + `leave hdPostf`(s, pegNode, start, length) + else: + discard + let hdPostf = ident(substr($pegKind, 2)) + getAst(mkDoLeave(hdPostf, s, pegNode, start, length)) + + matchOrParse(parseIt) + parseIt(s, p, start, c) -template fillMatches(s, caps, c: expr) = + proc parser(s: string): int {.gensym.} = + # the proc to be returned + var + ms: array[MaxSubpatterns, (int, int)] + cs = Captures(matches: ms, ml: 0, origStart: 0) + rawParse(s, pegAst, 0, cs) + parser + +template fillMatches(s, caps, c) = for k in 0..c.ml-1: let startIdx = c.matches[k][0] let endIdx = c.matches[k][1] if startIdx != -1: caps[k] = substr(s, startIdx, endIdx) else: - caps[k] = nil + caps[k] = "" -proc matchLen*(s: string, pattern: Peg, matches: var openArray[string], - start = 0): int {.nosideEffect, rtl, extern: "npegs$1Capture".} = +func matchLen*(s: string, pattern: Peg, matches: var openArray[string], + start = 0): int {.rtl, extern: "npegs$1Capture".} = ## the same as ``match``, but it returns the length of the match, ## if there is no match, -1 is returned. Note that a match length ## of zero can happen. It's possible that a suffix of `s` remains @@ -755,8 +1092,8 @@ proc matchLen*(s: string, pattern: Peg, matches: var openArray[string], result = rawMatch(s, pattern, start, c) if result >= 0: fillMatches(s, matches, c) -proc matchLen*(s: string, pattern: Peg, - start = 0): int {.nosideEffect, rtl, extern: "npegs$1".} = +func matchLen*(s: string, pattern: Peg, + start = 0): int {.rtl, extern: "npegs$1".} = ## the same as ``match``, but it returns the length of the match, ## if there is no match, -1 is returned. Note that a match length ## of zero can happen. It's possible that a suffix of `s` remains @@ -765,22 +1102,22 @@ proc matchLen*(s: string, pattern: Peg, c.origStart = start result = rawMatch(s, pattern, start, c) -proc match*(s: string, pattern: Peg, matches: var openArray[string], - start = 0): bool {.nosideEffect, rtl, extern: "npegs$1Capture".} = +func match*(s: string, pattern: Peg, matches: var openArray[string], + start = 0): bool {.rtl, extern: "npegs$1Capture".} = ## returns ``true`` if ``s[start..]`` matches the ``pattern`` and ## the captured substrings in the array ``matches``. If it does not ## match, nothing is written into ``matches`` and ``false`` is ## returned. result = matchLen(s, pattern, matches, start) != -1 -proc match*(s: string, pattern: Peg, - start = 0): bool {.nosideEffect, rtl, extern: "npegs$1".} = +func match*(s: string, pattern: Peg, + start = 0): bool {.rtl, extern: "npegs$1".} = ## returns ``true`` if ``s`` matches the ``pattern`` beginning from ``start``. result = matchLen(s, pattern, start) != -1 -proc find*(s: string, pattern: Peg, matches: var openArray[string], - start = 0): int {.nosideEffect, rtl, extern: "npegs$1Capture".} = +func find*(s: string, pattern: Peg, matches: var openArray[string], + start = 0): int {.rtl, extern: "npegs$1Capture".} = ## returns the starting position of ``pattern`` in ``s`` and the captured ## substrings in the array ``matches``. If it does not match, nothing ## is written into ``matches`` and -1 is returned. @@ -794,9 +1131,9 @@ proc find*(s: string, pattern: Peg, matches: var openArray[string], return -1 # could also use the pattern here: (!P .)* P -proc findBounds*(s: string, pattern: Peg, matches: var openArray[string], +func findBounds*(s: string, pattern: Peg, matches: var openArray[string], start = 0): tuple[first, last: int] {. - nosideEffect, rtl, extern: "npegs$1Capture".} = + rtl, extern: "npegs$1Capture".} = ## returns the starting position and end position of ``pattern`` in ``s`` ## and the captured ## substrings in the array ``matches``. If it does not match, nothing @@ -811,8 +1148,8 @@ proc findBounds*(s: string, pattern: Peg, matches: var openArray[string], return (i, i+L-1) return (-1, 0) -proc find*(s: string, pattern: Peg, - start = 0): int {.nosideEffect, rtl, extern: "npegs$1".} = +func find*(s: string, pattern: Peg, + start = 0): int {.rtl, extern: "npegs$1".} = ## returns the starting position of ``pattern`` in ``s``. If it does not ## match, -1 is returned. var c: Captures @@ -835,21 +1172,18 @@ iterator findAll*(s: string, pattern: Peg, start = 0): string = yield substr(s, i, i+L-1) inc(i, L) -proc findAll*(s: string, pattern: Peg, start = 0): seq[string] {. - nosideEffect, rtl, extern: "npegs$1".} = +func findAll*(s: string, pattern: Peg, start = 0): seq[string] {. + rtl, extern: "npegs$1".} = ## returns all matching *substrings* of `s` that match `pattern`. - ## If it does not match, @[] is returned. - accumulateResult(findAll(s, pattern, start)) - -when not defined(nimhygiene): - {.pragma: inject.} + ## If it does not match, `@[]` is returned. + result = @[] + for it in findAll(s, pattern, start): result.add it template `=~`*(s: string, pattern: Peg): bool = ## This calls ``match`` with an implicit declared ``matches`` array that ## can be used in the scope of the ``=~`` call: ## - ## .. code-block:: nim - ## + ## ```nim ## if line =~ peg"\s* {\w+} \s* '=' \s* {\w+}": ## # matches a key=value pair: ## echo("Key: ", matches[0]) @@ -861,50 +1195,51 @@ template `=~`*(s: string, pattern: Peg): bool = ## echo("comment: ", matches[0]) ## else: ## echo("syntax error") - ## + ## ``` bind MaxSubpatterns when not declaredInScope(matches): - var matches {.inject.}: array[0..MaxSubpatterns-1, string] + var matches {.inject.} = default(array[0..MaxSubpatterns-1, string]) match(s, pattern, matches) # ------------------------- more string handling ------------------------------ -proc contains*(s: string, pattern: Peg, start = 0): bool {. - nosideEffect, rtl, extern: "npegs$1".} = +func contains*(s: string, pattern: Peg, start = 0): bool {. + rtl, extern: "npegs$1".} = ## same as ``find(s, pattern, start) >= 0`` return find(s, pattern, start) >= 0 -proc contains*(s: string, pattern: Peg, matches: var openArray[string], - start = 0): bool {.nosideEffect, rtl, extern: "npegs$1Capture".} = +func contains*(s: string, pattern: Peg, matches: var openArray[string], + start = 0): bool {.rtl, extern: "npegs$1Capture".} = ## same as ``find(s, pattern, matches, start) >= 0`` return find(s, pattern, matches, start) >= 0 -proc startsWith*(s: string, prefix: Peg, start = 0): bool {. - nosideEffect, rtl, extern: "npegs$1".} = +func startsWith*(s: string, prefix: Peg, start = 0): bool {. + rtl, extern: "npegs$1".} = ## returns true if `s` starts with the pattern `prefix` result = matchLen(s, prefix, start) >= 0 -proc endsWith*(s: string, suffix: Peg, start = 0): bool {. - nosideEffect, rtl, extern: "npegs$1".} = - ## returns true if `s` ends with the pattern `prefix` +func endsWith*(s: string, suffix: Peg, start = 0): bool {. + rtl, extern: "npegs$1".} = + ## returns true if `s` ends with the pattern `suffix` var c: Captures c.origStart = start for i in start .. s.len-1: if rawMatch(s, suffix, i, c) == s.len - i: return true -proc replacef*(s: string, sub: Peg, by: string): string {. - nosideEffect, rtl, extern: "npegs$1".} = +func replacef*(s: string, sub: Peg, by: string): string {. + rtl, extern: "npegs$1".} = ## Replaces `sub` in `s` by the string `by`. Captures can be accessed in `by` ## with the notation ``$i`` and ``$#`` (see strutils.`%`). Examples: ## - ## .. code-block:: nim + ## ```nim ## "var1=key; var2=key2".replacef(peg"{\ident}'='{\ident}", "$1<-$2$2") + ## ``` ## ## Results in: ## - ## .. code-block:: nim - ## + ## ```nim ## "var1<-keykey; val2<-key2key2" + ## ``` result = "" var i = 0 var caps: array[0..MaxSubpatterns-1, string] @@ -921,8 +1256,8 @@ proc replacef*(s: string, sub: Peg, by: string): string {. inc(i, x) add(result, substr(s, i)) -proc replace*(s: string, sub: Peg, by = ""): string {. - nosideEffect, rtl, extern: "npegs$1".} = +func replace*(s: string, sub: Peg, by = ""): string {. + rtl, extern: "npegs$1".} = ## Replaces `sub` in `s` by the string `by`. Captures cannot be accessed ## in `by`. result = "" @@ -938,9 +1273,9 @@ proc replace*(s: string, sub: Peg, by = ""): string {. inc(i, x) add(result, substr(s, i)) -proc parallelReplace*(s: string, subs: varargs[ +func parallelReplace*(s: string, subs: varargs[ tuple[pattern: Peg, repl: string]]): string {. - nosideEffect, rtl, extern: "npegs$1".} = + rtl, extern: "npegs$1".} = ## Returns a modified copy of `s` with the substitutions in `subs` ## applied in parallel. result = "" @@ -962,14 +1297,65 @@ proc parallelReplace*(s: string, subs: varargs[ # copy the rest: add(result, substr(s, i)) -proc transformFile*(infile, outfile: string, - subs: varargs[tuple[pattern: Peg, repl: string]]) {. - rtl, extern: "npegs$1".} = - ## reads in the file `infile`, performs a parallel replacement (calls - ## `parallelReplace`) and writes back to `outfile`. Raises ``EIO`` if an - ## error occurs. This is supposed to be used for quick scripting. - var x = readFile(infile).string - writeFile(outfile, x.parallelReplace(subs)) +when not defined(nimHasEffectsOf): + {.pragma: effectsOf.} + +func replace*(s: string, sub: Peg, cb: proc( + match: int, cnt: int, caps: openArray[string]): string): string {. + rtl, extern: "npegs$1cb", effectsOf: cb.} = + ## Replaces `sub` in `s` by the resulting strings from the callback. + ## The callback proc receives the index of the current match (starting with 0), + ## the count of captures and an open array with the captures of each match. Examples: + ## + ## ```nim + ## func handleMatches*(m: int, n: int, c: openArray[string]): string = + ## result = "" + ## if m > 0: + ## result.add ", " + ## result.add case n: + ## of 2: c[0].toLower & ": '" & c[1] & "'" + ## of 1: c[0].toLower & ": ''" + ## else: "" + ## + ## let s = "Var1=key1;var2=Key2; VAR3" + ## echo s.replace(peg"{\ident}('='{\ident})* ';'* \s*", handleMatches) + ## ``` + ## + ## Results in: + ## + ## ```nim + ## "var1: 'key1', var2: 'Key2', var3: ''" + ## ``` + result = "" + var i = 0 + var caps: array[0..MaxSubpatterns-1, string] + var c: Captures + var m = 0 + while i < s.len: + c.ml = 0 + var x = rawMatch(s, sub, i, c) + if x <= 0: + add(result, s[i]) + inc(i) + else: + fillMatches(s, caps, c) + add(result, cb(m, c.ml, caps)) + inc(i, x) + inc(m) + add(result, substr(s, i)) + +when not defined(js): + proc transformFile*(infile, outfile: string, + subs: varargs[tuple[pattern: Peg, repl: string]]) {. + rtl, extern: "npegs$1".} = + ## reads in the file `infile`, performs a parallel replacement (calls + ## `parallelReplace`) and writes back to `outfile`. Raises ``IOError`` if an + ## error occurs. This is supposed to be used for quick scripting. + ## + ## **Note**: this proc does not exist while using the JS backend. + var x = readFile(infile) + writeFile(outfile, x.parallelReplace(subs)) + iterator split*(s: string, sep: Peg): string = ## Splits the string `s` into substrings. @@ -977,18 +1363,19 @@ iterator split*(s: string, sep: Peg): string = ## Substrings are separated by the PEG `sep`. ## Examples: ## - ## .. code-block:: nim + ## ```nim ## for word in split("00232this02939is39an22example111", peg"\d+"): - ## writeln(stdout, word) + ## writeLine(stdout, word) + ## ``` ## ## Results in: ## - ## .. code-block:: nim + ## ```nim ## "this" ## "is" ## "an" ## "example" - ## + ## ``` var c: Captures var first = 0 @@ -1006,83 +1393,85 @@ iterator split*(s: string, sep: Peg): string = if first < last: yield substr(s, first, last-1) -proc split*(s: string, sep: Peg): seq[string] {. - nosideEffect, rtl, extern: "npegs$1".} = +func split*(s: string, sep: Peg): seq[string] {. + rtl, extern: "npegs$1".} = ## Splits the string `s` into substrings. - accumulateResult(split(s, sep)) + result = @[] + for it in split(s, sep): result.add it # ------------------- scanner ------------------------------------------------- type - TModifier = enum + Modifier = enum modNone, modVerbatim, modIgnoreCase, modIgnoreStyle - TTokKind = enum ## enumeration of all tokens - tkInvalid, ## invalid token - tkEof, ## end of file reached - tkAny, ## . - tkAnyRune, ## _ - tkIdentifier, ## abc - tkStringLit, ## "abc" or 'abc' - tkCharSet, ## [^A-Z] - tkParLe, ## '(' - tkParRi, ## ')' - tkCurlyLe, ## '{' - tkCurlyRi, ## '}' - tkCurlyAt, ## '{@}' - tkArrow, ## '<-' - tkBar, ## '/' - tkStar, ## '*' - tkPlus, ## '+' - tkAmp, ## '&' - tkNot, ## '!' - tkOption, ## '?' - tkAt, ## '@' - tkBuiltin, ## \identifier - tkEscaped, ## \\ - tkBackref, ## '$' - tkDollar, ## '$' - tkHat ## '^' - - TToken {.final.} = object ## a token - kind: TTokKind ## the type of the token - modifier: TModifier - literal: string ## the parsed (string) literal - charset: set[char] ## if kind == tkCharSet - index: int ## if kind == tkBackref - - PegLexer {.inheritable.} = object ## the lexer object. - bufpos: int ## the current position within the buffer - buf: cstring ## the buffer itself - lineNumber: int ## the current line number - lineStart: int ## index of last line start in buffer - colOffset: int ## column to add + TokKind = enum ## enumeration of all tokens + tkInvalid, ## invalid token + tkEof, ## end of file reached + tkAny, ## . + tkAnyRune, ## _ + tkIdentifier, ## abc + tkStringLit, ## "abc" or 'abc' + tkCharSet, ## [^A-Z] + tkParLe, ## '(' + tkParRi, ## ')' + tkCurlyLe, ## '{' + tkCurlyRi, ## '}' + tkCurlyAt, ## '{@}' + tkEmptyCurl, ## '{}' + tkArrow, ## '<-' + tkBar, ## '/' + tkStar, ## '*' + tkPlus, ## '+' + tkAmp, ## '&' + tkNot, ## '!' + tkOption, ## '?' + tkAt, ## '@' + tkBuiltin, ## \identifier + tkEscaped, ## \\ + tkBackref, ## '$' + tkDollar, ## '$' + tkHat ## '^' + + Token {.final.} = object ## a token + kind: TokKind ## the type of the token + modifier: Modifier + literal: string ## the parsed (string) literal + charset: set[char] ## if kind == tkCharSet + index: int ## if kind == tkBackref + + PegLexer {.inheritable.} = object ## the lexer object. + bufpos: int ## the current position within the buffer + buf: string ## the buffer itself + lineNumber: int ## the current line number + lineStart: int ## index of last line start in buffer + colOffset: int ## column to add filename: string const - tokKindToStr: array[TTokKind, string] = [ + tokKindToStr: array[TokKind, string] = [ "invalid", "[EOF]", ".", "_", "identifier", "string literal", - "character set", "(", ")", "{", "}", "{@}", + "character set", "(", ")", "{", "}", "{@}", "{}", "<-", "/", "*", "+", "&", "!", "?", "@", "built-in", "escaped", "$", "$", "^" ] -proc handleCR(L: var PegLexer, pos: int): int = +func handleCR(L: var PegLexer, pos: int): int = assert(L.buf[pos] == '\c') inc(L.lineNumber) result = pos+1 - if L.buf[result] == '\L': inc(result) + if result < L.buf.len and L.buf[result] == '\L': inc(result) L.lineStart = result -proc handleLF(L: var PegLexer, pos: int): int = +func handleLF(L: var PegLexer, pos: int): int = assert(L.buf[pos] == '\L') inc(L.lineNumber) result = pos+1 L.lineStart = result -proc init(L: var PegLexer, input, filename: string, line = 1, col = 0) = +func init(L: var PegLexer, input, filename: string, line = 1, col = 0) = L.buf = input L.bufpos = 0 L.lineNumber = line @@ -1090,32 +1479,22 @@ proc init(L: var PegLexer, input, filename: string, line = 1, col = 0) = L.lineStart = 0 L.filename = filename -proc getColumn(L: PegLexer): int {.inline.} = +func getColumn(L: PegLexer): int {.inline.} = result = abs(L.bufpos - L.lineStart) + L.colOffset -proc getLine(L: PegLexer): int {.inline.} = +func getLine(L: PegLexer): int {.inline.} = result = L.lineNumber -proc errorStr(L: PegLexer, msg: string, line = -1, col = -1): string = +func errorStr(L: PegLexer, msg: string, line = -1, col = -1): string = var line = if line < 0: getLine(L) else: line var col = if col < 0: getColumn(L) else: col result = "$1($2, $3) Error: $4" % [L.filename, $line, $col, msg] -proc handleHexChar(c: var PegLexer, xi: var int) = - case c.buf[c.bufpos] - of '0'..'9': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0')) - inc(c.bufpos) - of 'a'..'f': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10) - inc(c.bufpos) - of 'A'..'F': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10) - inc(c.bufpos) - else: discard - -proc getEscapedChar(c: var PegLexer, tok: var TToken) = +func getEscapedChar(c: var PegLexer, tok: var Token) = inc(c.bufpos) + if c.bufpos >= len(c.buf): + tok.kind = tkInvalid + return case c.buf[c.bufpos] of 'r', 'R', 'c', 'C': add(tok.literal, '\c') @@ -1143,16 +1522,21 @@ proc getEscapedChar(c: var PegLexer, tok: var TToken) = inc(c.bufpos) of 'x', 'X': inc(c.bufpos) + if c.bufpos >= len(c.buf): + tok.kind = tkInvalid + return var xi = 0 - handleHexChar(c, xi) - handleHexChar(c, xi) + if handleHexChar(c.buf[c.bufpos], xi): + inc(c.bufpos) + if handleHexChar(c.buf[c.bufpos], xi): + inc(c.bufpos) if xi == 0: tok.kind = tkInvalid else: add(tok.literal, chr(xi)) of '0'..'9': var val = ord(c.buf[c.bufpos]) - ord('0') inc(c.bufpos) var i = 1 - while (i <= 3) and (c.buf[c.bufpos] in {'0'..'9'}): + while (c.bufpos < len(c.buf)) and (i <= 3) and (c.buf[c.bufpos] in {'0'..'9'}): val = val * 10 + ord(c.buf[c.bufpos]) - ord('0') inc(c.bufpos) inc(i) @@ -1166,32 +1550,29 @@ proc getEscapedChar(c: var PegLexer, tok: var TToken) = add(tok.literal, c.buf[c.bufpos]) inc(c.bufpos) -proc skip(c: var PegLexer) = +func skip(c: var PegLexer) = var pos = c.bufpos - var buf = c.buf - while true: - case buf[pos] + while pos < c.buf.len: + case c.buf[pos] of ' ', '\t': inc(pos) of '#': - while not (buf[pos] in {'\c', '\L', '\0'}): inc(pos) + while (pos < c.buf.len) and + not (c.buf[pos] in {'\c', '\L', '\0'}): inc(pos) of '\c': pos = handleCR(c, pos) - buf = c.buf of '\L': pos = handleLF(c, pos) - buf = c.buf else: - break # EndOfFile also leaves the loop + break # EndOfFile also leaves the loop c.bufpos = pos -proc getString(c: var PegLexer, tok: var TToken) = +func getString(c: var PegLexer, tok: var Token) = tok.kind = tkStringLit var pos = c.bufpos + 1 - var buf = c.buf - var quote = buf[pos-1] - while true: - case buf[pos] + var quote = c.buf[pos-1] + while pos < c.buf.len: + case c.buf[pos] of '\\': c.bufpos = pos getEscapedChar(c, tok) @@ -1199,90 +1580,102 @@ proc getString(c: var PegLexer, tok: var TToken) = of '\c', '\L', '\0': tok.kind = tkInvalid break - elif buf[pos] == quote: + elif c.buf[pos] == quote: inc(pos) break else: - add(tok.literal, buf[pos]) + add(tok.literal, c.buf[pos]) inc(pos) c.bufpos = pos -proc getDollar(c: var PegLexer, tok: var TToken) = +func getDollar(c: var PegLexer, tok: var Token) = var pos = c.bufpos + 1 - var buf = c.buf - if buf[pos] in {'0'..'9'}: + var neg = false + if pos < c.buf.len and c.buf[pos] == '^': + neg = true + inc(pos) + if pos < c.buf.len and c.buf[pos] in {'0'..'9'}: tok.kind = tkBackref tok.index = 0 - while buf[pos] in {'0'..'9'}: - tok.index = tok.index * 10 + ord(buf[pos]) - ord('0') + while pos < c.buf.len and c.buf[pos] in {'0'..'9'}: + tok.index = tok.index * 10 + ord(c.buf[pos]) - ord('0') inc(pos) + if neg: + tok.index = -tok.index else: + if neg: + dec(pos) tok.kind = tkDollar c.bufpos = pos -proc getCharSet(c: var PegLexer, tok: var TToken) = +func getCharSet(c: var PegLexer, tok: var Token) = tok.kind = tkCharSet tok.charset = {} var pos = c.bufpos + 1 - var buf = c.buf var caret = false - if buf[pos] == '^': - inc(pos) - caret = true - while true: - var ch: char - case buf[pos] - of ']': + if pos < c.buf.len: + if c.buf[pos] == '^': inc(pos) - break - of '\\': - c.bufpos = pos - getEscapedChar(c, tok) - pos = c.bufpos - ch = tok.literal[tok.literal.len-1] - of '\C', '\L', '\0': - tok.kind = tkInvalid - break - else: - ch = buf[pos] - inc(pos) - incl(tok.charset, ch) - if buf[pos] == '-': - if buf[pos+1] == ']': - incl(tok.charset, '-') - inc(pos) + caret = true + while pos < c.buf.len: + var ch: char + case c.buf[pos] + of ']': + if pos < c.buf.len: inc(pos) + break + of '\\': + c.bufpos = pos + getEscapedChar(c, tok) + pos = c.bufpos + ch = tok.literal[tok.literal.len-1] + of '\C', '\L', '\0': + tok.kind = tkInvalid + break else: + ch = c.buf[pos] inc(pos) - var ch2: char - case buf[pos] - of '\\': - c.bufpos = pos - getEscapedChar(c, tok) - pos = c.bufpos - ch2 = tok.literal[tok.literal.len-1] - of '\C', '\L', '\0': - tok.kind = tkInvalid - break - else: - ch2 = buf[pos] + incl(tok.charset, ch) + if c.buf[pos] == '-': + if pos+1 < c.buf.len and c.buf[pos+1] == ']': + incl(tok.charset, '-') inc(pos) - for i in ord(ch)+1 .. ord(ch2): - incl(tok.charset, chr(i)) + else: + if pos+1 < c.buf.len: + inc(pos) + else: + break + var ch2: char + case c.buf[pos] + of '\\': + c.bufpos = pos + getEscapedChar(c, tok) + pos = c.bufpos + ch2 = tok.literal[tok.literal.len-1] + of '\C', '\L', '\0': + tok.kind = tkInvalid + break + else: + if pos+1 < c.buf.len: + ch2 = c.buf[pos] + inc(pos) + else: + break + for i in ord(ch)+1 .. ord(ch2): + incl(tok.charset, chr(i)) c.bufpos = pos if caret: tok.charset = {'\1'..'\xFF'} - tok.charset -proc getSymbol(c: var PegLexer, tok: var TToken) = +func getSymbol(c: var PegLexer, tok: var Token) = var pos = c.bufpos - var buf = c.buf - while true: - add(tok.literal, buf[pos]) + while pos < c.buf.len: + add(tok.literal, c.buf[pos]) inc(pos) - if buf[pos] notin strutils.IdentChars: break + if pos < c.buf.len and c.buf[pos] notin strutils.IdentChars: break c.bufpos = pos tok.kind = tkIdentifier -proc getBuiltin(c: var PegLexer, tok: var TToken) = - if c.buf[c.bufpos+1] in strutils.Letters: +func getBuiltin(c: var PegLexer, tok: var Token) = + if c.bufpos+1 < c.buf.len and c.buf[c.bufpos+1] in strutils.Letters: inc(c.bufpos) getSymbol(c, tok) tok.kind = tkBuiltin @@ -1290,18 +1683,31 @@ proc getBuiltin(c: var PegLexer, tok: var TToken) = tok.kind = tkEscaped getEscapedChar(c, tok) # may set tok.kind to tkInvalid -proc getTok(c: var PegLexer, tok: var TToken) = +func getTok(c: var PegLexer, tok: var Token) = tok.kind = tkInvalid tok.modifier = modNone setLen(tok.literal, 0) skip(c) + + if c.bufpos >= c.buf.len: + tok.kind = tkEof + tok.literal = "[EOF]" + add(tok.literal, '\0') + inc(c.bufpos) + return + case c.buf[c.bufpos] of '{': inc(c.bufpos) - if c.buf[c.bufpos] == '@' and c.buf[c.bufpos+1] == '}': + if c.buf[c.bufpos] == '@' and c.bufpos+2 < c.buf.len and + c.buf[c.bufpos+1] == '}': tok.kind = tkCurlyAt inc(c.bufpos, 2) add(tok.literal, "{@}") + elif c.buf[c.bufpos] == '}' and c.bufpos < c.buf.len: + tok.kind = tkEmptyCurl + inc(c.bufpos) + add(tok.literal, "{}") else: tok.kind = tkCurlyLe add(tok.literal, '{') @@ -1331,13 +1737,13 @@ proc getTok(c: var PegLexer, tok: var TToken) = getBuiltin(c, tok) of '\'', '"': getString(c, tok) of '$': getDollar(c, tok) - of '\0': - tok.kind = tkEof - tok.literal = "[EOF]" of 'a'..'z', 'A'..'Z', '\128'..'\255': getSymbol(c, tok) + if c.bufpos >= c.buf.len: + return if c.buf[c.bufpos] in {'\'', '"'} or - c.buf[c.bufpos] == '$' and c.buf[c.bufpos+1] in {'0'..'9'}: + c.buf[c.bufpos] == '$' and c.bufpos+1 < c.buf.len and + c.buf[c.bufpos+1] in {'^', '0'..'9'}: case tok.literal of "i": tok.modifier = modIgnoreCase of "y": tok.modifier = modIgnoreStyle @@ -1358,7 +1764,7 @@ proc getTok(c: var PegLexer, tok: var TToken) = inc(c.bufpos) add(tok.literal, '+') of '<': - if c.buf[c.bufpos+1] == '-': + if c.bufpos+2 < c.buf.len and c.buf[c.bufpos+1] == '-': inc(c.bufpos, 2) tok.kind = tkArrow add(tok.literal, "<-") @@ -1393,45 +1799,48 @@ proc getTok(c: var PegLexer, tok: var TToken) = inc(c.bufpos) add(tok.literal, '^') else: + if c.bufpos >= c.buf.len: + tok.kind = tkEof + tok.literal = "[EOF]" add(tok.literal, c.buf[c.bufpos]) inc(c.bufpos) -proc arrowIsNextTok(c: PegLexer): bool = +func arrowIsNextTok(c: PegLexer): bool = # the only look ahead we need var pos = c.bufpos - while c.buf[pos] in {'\t', ' '}: inc(pos) + while pos < c.buf.len and c.buf[pos] in {'\t', ' '}: inc(pos) + if pos+1 >= c.buf.len: + return result = c.buf[pos] == '<' and c.buf[pos+1] == '-' # ----------------------------- parser ---------------------------------------- type EInvalidPeg* = object of ValueError ## raised if an invalid - ## PEG has been detected + ## PEG has been detected PegParser = object of PegLexer ## the PEG parser object - tok: TToken + tok: Token nonterms: seq[NonTerminal] - modifier: TModifier + modifier: Modifier captures: int identIsVerbatim: bool skip: Peg -proc pegError(p: PegParser, msg: string, line = -1, col = -1) = - var e: ref EInvalidPeg - new(e) - e.msg = errorStr(p, msg, line, col) +func pegError(p: PegParser, msg: string, line = -1, col = -1) = + var e = (ref EInvalidPeg)(msg: errorStr(p, msg, line, col)) raise e -proc getTok(p: var PegParser) = +func getTok(p: var PegParser) = getTok(p, p.tok) - if p.tok.kind == tkInvalid: pegError(p, "invalid token") + if p.tok.kind == tkInvalid: pegError(p, "'" & p.tok.literal & "' is invalid token") -proc eat(p: var PegParser, kind: TTokKind) = +func eat(p: var PegParser, kind: TokKind) = if p.tok.kind == kind: getTok(p) else: pegError(p, tokKindToStr[kind] & " expected") -proc parseExpr(p: var PegParser): Peg {.gcsafe.} +func parseExpr(p: var PegParser): Peg {.gcsafe.} -proc getNonTerminal(p: var PegParser, name: string): NonTerminal = +func getNonTerminal(p: var PegParser, name: string): NonTerminal = for i in 0..high(p.nonterms): result = p.nonterms[i] if cmpIgnoreStyle(result.name, name) == 0: return @@ -1439,19 +1848,22 @@ proc getNonTerminal(p: var PegParser, name: string): NonTerminal = result = newNonTerminal(name, getLine(p), getColumn(p)) add(p.nonterms, result) -proc modifiedTerm(s: string, m: TModifier): Peg = +func modifiedTerm(s: string, m: Modifier): Peg = case m of modNone, modVerbatim: result = term(s) of modIgnoreCase: result = termIgnoreCase(s) of modIgnoreStyle: result = termIgnoreStyle(s) -proc modifiedBackref(s: int, m: TModifier): Peg = +func modifiedBackref(s: int, m: Modifier): Peg = + var + reverse = s < 0 + index = if reverse: -s else: s case m - of modNone, modVerbatim: result = backref(s) - of modIgnoreCase: result = backrefIgnoreCase(s) - of modIgnoreStyle: result = backrefIgnoreStyle(s) + of modNone, modVerbatim: result = backref(index, reverse) + of modIgnoreCase: result = backrefIgnoreCase(index, reverse) + of modIgnoreStyle: result = backrefIgnoreStyle(index, reverse) -proc builtin(p: var PegParser): Peg = +func builtin(p: var PegParser): Peg = # do not use "y", "skip" or "i" as these would be ambiguous case p.tok.literal of "n": result = newLine() @@ -1460,7 +1872,7 @@ proc builtin(p: var PegParser): Peg = of "s": result = charSet({' ', '\9'..'\13'}) of "S": result = charSet({'\1'..'\xff'} - {' ', '\9'..'\13'}) of "w": result = charSet({'a'..'z', 'A'..'Z', '_', '0'..'9'}) - of "W": result = charSet({'\1'..'\xff'} - {'a'..'z','A'..'Z','_','0'..'9'}) + of "W": result = charSet({'\1'..'\xff'} - {'a'..'z', 'A'..'Z', '_', '0'..'9'}) of "a": result = charSet({'a'..'z', 'A'..'Z'}) of "A": result = charSet({'\1'..'\xff'} - {'a'..'z', 'A'..'Z'}) of "ident": result = pegs.ident @@ -1471,11 +1883,11 @@ proc builtin(p: var PegParser): Peg = of "white": result = unicodeWhitespace() else: pegError(p, "unknown built-in: " & p.tok.literal) -proc token(terminal: Peg, p: PegParser): Peg = +func token(terminal: Peg, p: PegParser): Peg = if p.skip.kind == pkEmpty: result = terminal else: result = sequence(p.skip, terminal) -proc primary(p: var PegParser): Peg = +func primary(p: var PegParser): Peg = case p.tok.kind of tkAmp: getTok(p) @@ -1499,7 +1911,8 @@ proc primary(p: var PegParser): Peg = getTok(p) elif not arrowIsNextTok(p): var nt = getNonTerminal(p, p.tok.literal) - incl(nt.flags, ntUsed) + {.cast(noSideEffect).}: + incl(nt.flags, ntUsed) result = nonterminal(nt).token(p) getTok(p) else: @@ -1523,6 +1936,9 @@ proc primary(p: var PegParser): Peg = result = capture(parseExpr(p)).token(p) eat(p, tkCurlyRi) inc(p.captures) + of tkEmptyCurl: + result = capture() + getTok(p) of tkAny: result = any().token(p) getTok(p) @@ -1542,11 +1958,11 @@ proc primary(p: var PegParser): Peg = result = startAnchor() getTok(p) of tkBackref: + if abs(p.tok.index) > p.captures or p.tok.index == 0: + pegError(p, "invalid back reference index: " & $p.tok.index) var m = p.tok.modifier if m == modNone: m = p.modifier result = modifiedBackref(p.tok.index, m).token(p) - if p.tok.index < 0 or p.tok.index > p.captures: - pegError(p, "invalid back reference index: " & $p.tok.index) getTok(p) else: pegError(p, "expression expected, but found: " & p.tok.literal) @@ -1564,13 +1980,13 @@ proc primary(p: var PegParser): Peg = getTok(p) else: break -proc seqExpr(p: var PegParser): Peg = +func seqExpr(p: var PegParser): Peg = result = primary(p) while true: case p.tok.kind of tkAmp, tkNot, tkAt, tkStringLit, tkCharSet, tkParLe, tkCurlyLe, tkAny, tkAnyRune, tkBuiltin, tkEscaped, tkDollar, tkBackref, - tkHat, tkCurlyAt: + tkHat, tkCurlyAt, tkEmptyCurl: result = sequence(result, primary(p)) of tkIdentifier: if not arrowIsNextTok(p): @@ -1578,27 +1994,29 @@ proc seqExpr(p: var PegParser): Peg = else: break else: break -proc parseExpr(p: var PegParser): Peg = +func parseExpr(p: var PegParser): Peg = result = seqExpr(p) while p.tok.kind == tkBar: getTok(p) result = result / seqExpr(p) -proc parseRule(p: var PegParser): NonTerminal = +func parseRule(p: var PegParser): NonTerminal = if p.tok.kind == tkIdentifier and arrowIsNextTok(p): result = getNonTerminal(p, p.tok.literal) if ntDeclared in result.flags: pegError(p, "attempt to redefine: " & result.name) - result.line = getLine(p) - result.col = getColumn(p) + {.cast(noSideEffect).}: + result.line = getLine(p) + result.col = getColumn(p) getTok(p) eat(p, tkArrow) - result.rule = parseExpr(p) - incl(result.flags, ntDeclared) # NOW inlining may be attempted + {.cast(noSideEffect).}: + result.rule = parseExpr(p) + incl(result.flags, ntDeclared) # NOW inlining may be attempted else: pegError(p, "rule expected, but found: " & p.tok.literal) -proc rawParse(p: var PegParser): Peg = +func rawParse(p: var PegParser): Peg = ## parses a rule or a PEG expression while p.tok.kind == tkBuiltin: case p.tok.literal @@ -1628,7 +2046,7 @@ proc rawParse(p: var PegParser): Peg = elif ntUsed notin nt.flags and i > 0: pegError(p, "unused rule: " & nt.name, nt.line, nt.col) -proc parsePeg*(pattern: string, filename = "pattern", line = 1, col = 0): Peg = +func parsePeg*(pattern: string, filename = "pattern", line = 1, col = 0): Peg = ## constructs a Peg object from `pattern`. `filename`, `line`, `col` are ## used for error messages, but they only provide start offsets. `parsePeg` ## keeps track of line and column numbers within `pattern`. @@ -1643,14 +2061,14 @@ proc parsePeg*(pattern: string, filename = "pattern", line = 1, col = 0): Peg = getTok(p) result = rawParse(p) -proc peg*(pattern: string): Peg = +func peg*(pattern: string): Peg = ## constructs a Peg object from the `pattern`. The short name has been - ## chosen to encourage its use as a raw string modifier:: + ## chosen to encourage its use as a raw string modifier: ## - ## peg"{\ident} \s* '=' \s* {.*}" + ## peg"{\ident} \s* '=' \s* {.*}" result = parsePeg(pattern, "pattern") -proc escapePeg*(s: string): string = +func escapePeg*(s: string): string = ## escapes `s` so that it is matched verbatim when used as a peg. result = "" var inQuote = false @@ -1668,124 +2086,3 @@ proc escapePeg*(s: string): string = inQuote = true result.add(c) if inQuote: result.add('\'') - -when isMainModule: - assert escapePeg("abc''def'") == r"'abc'\x27\x27'def'\x27" - assert match("(a b c)", peg"'(' @ ')'") - assert match("W_HI_Le", peg"\y 'while'") - assert(not match("W_HI_L", peg"\y 'while'")) - assert(not match("W_HI_Le", peg"\y v'while'")) - assert match("W_HI_Le", peg"y'while'") - - assert($ +digits == $peg"\d+") - assert "0158787".match(peg"\d+") - assert "ABC 0232".match(peg"\w+\s+\d+") - assert "ABC".match(peg"\d+ / \w+") - - var accum: seq[string] = @[] - for word in split("00232this02939is39an22example111", peg"\d+"): - accum.add(word) - assert(accum == @["this", "is", "an", "example"]) - - assert matchLen("key", ident) == 3 - - var pattern = sequence(ident, *whitespace, term('='), *whitespace, ident) - assert matchLen("key1= cal9", pattern) == 11 - - var ws = newNonTerminal("ws", 1, 1) - ws.rule = *whitespace - - var expr = newNonTerminal("expr", 1, 1) - expr.rule = sequence(capture(ident), *sequence( - nonterminal(ws), term('+'), nonterminal(ws), nonterminal(expr))) - - var c: Captures - var s = "a+b + c +d+e+f" - assert rawMatch(s, expr.rule, 0, c) == len(s) - var a = "" - for i in 0..c.ml-1: - a.add(substr(s, c.matches[i][0], c.matches[i][1])) - assert a == "abcdef" - #echo expr.rule - - #const filename = "lib/devel/peg/grammar.txt" - #var grammar = parsePeg(newFileStream(filename, fmRead), filename) - #echo "a <- [abc]*?".match(grammar) - assert find("_____abc_______", term("abc"), 2) == 5 - assert match("_______ana", peg"A <- 'ana' / . A") - assert match("abcs%%%", peg"A <- ..A / .A / '%'") - - var matches: array[0..MaxSubpatterns-1, string] - if "abc" =~ peg"{'a'}'bc' 'xyz' / {\ident}": - assert matches[0] == "abc" - else: - assert false - - var g2 = peg"""S <- A B / C D - A <- 'a'+ - B <- 'b'+ - C <- 'c'+ - D <- 'd'+ - """ - assert($g2 == "((A B) / (C D))") - assert match("cccccdddddd", g2) - assert("var1=key; var2=key2".replacef(peg"{\ident}'='{\ident}", "$1<-$2$2") == - "var1<-keykey; var2<-key2key2") - assert("var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") == - "$1<-$2$2; $1<-$2$2") - assert "var1=key; var2=key2".endsWith(peg"{\ident}'='{\ident}") - - if "aaaaaa" =~ peg"'aa' !. / ({'a'})+": - assert matches[0] == "a" - else: - assert false - - if match("abcdefg", peg"c {d} ef {g}", matches, 2): - assert matches[0] == "d" - assert matches[1] == "g" - else: - assert false - - accum = @[] - for x in findAll("abcdef", peg".", 3): - accum.add(x) - assert(accum == @["d", "e", "f"]) - - for x in findAll("abcdef", peg"^{.}", 3): - assert x == "d" - - if "f(a, b)" =~ peg"{[0-9]+} / ({\ident} '(' {@} ')')": - assert matches[0] == "f" - assert matches[1] == "a, b" - else: - assert false - - assert match("eine übersicht und außerdem", peg"(\letter \white*)+") - # ß is not a lower cased letter?! - assert match("eine übersicht und auerdem", peg"(\lower \white*)+") - assert match("EINE ÜBERSICHT UND AUSSERDEM", peg"(\upper \white*)+") - assert(not match("456678", peg"(\letter)+")) - - assert("var1 = key; var2 = key2".replacef( - peg"\skip(\s*) {\ident}'='{\ident}", "$1<-$2$2") == - "var1<-keykey;var2<-key2key2") - - assert match("prefix/start", peg"^start$", 7) - - if "foo" =~ peg"{'a'}?.*": - assert matches[0] == nil - else: assert false - - if "foo" =~ peg"{''}.*": - assert matches[0] == "" - else: assert false - - if "foo" =~ peg"{'foo'}": - assert matches[0] == "foo" - else: assert false - - let empty_test = peg"^\d*" - let str = "XYZ" - - assert(str.find(empty_test) == 0) - assert(str.match(empty_test)) diff --git a/lib/pure/pegs.nimfix b/lib/pure/pegs.nimfix deleted file mode 100644 index 15bc95351..000000000 --- a/lib/pure/pegs.nimfix +++ /dev/null @@ -1,1770 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2012 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## Simple PEG (Parsing expression grammar) matching. Uses no memorization, but -## uses superoperators and symbol inlining to improve performance. Note: -## Matching performance is hopefully competitive with optimized regular -## expression engines. -## -## .. include:: ../doc/pegdocs.txt -## - -include "system/inclrtl" - -const - useUnicode = true ## change this to deactivate proper UTF-8 support - -import - strutils - -when useUnicode: - import unicode - -const - InlineThreshold = 5 ## number of leaves; -1 to disable inlining - MaxSubpatterns* = 10 ## defines the maximum number of subpatterns that - ## can be captured. More subpatterns cannot be captured! - -type - PegKind = enum - pkEmpty, - pkAny, ## any character (.) - pkAnyRune, ## any Unicode character (_) - pkNewLine, ## CR-LF, LF, CR - pkLetter, ## Unicode letter - pkLower, ## Unicode lower case letter - pkUpper, ## Unicode upper case letter - pkTitle, ## Unicode title character - pkWhitespace, ## Unicode whitespace character - pkTerminal, - pkTerminalIgnoreCase, - pkTerminalIgnoreStyle, - pkChar, ## single character to match - pkCharChoice, - pkNonTerminal, - pkSequence, ## a b c ... --> Internal DSL: peg(a, b, c) - pkOrderedChoice, ## a / b / ... --> Internal DSL: a / b or /[a, b, c] - pkGreedyRep, ## a* --> Internal DSL: *a - ## a+ --> (a a*) - pkGreedyRepChar, ## x* where x is a single character (superop) - pkGreedyRepSet, ## [set]* (superop) - pkGreedyAny, ## .* or _* (superop) - pkOption, ## a? --> Internal DSL: ?a - pkAndPredicate, ## &a --> Internal DSL: &a - pkNotPredicate, ## !a --> Internal DSL: !a - pkCapture, ## {a} --> Internal DSL: capture(a) - pkBackRef, ## $i --> Internal DSL: backref(i) - pkBackRefIgnoreCase, - pkBackRefIgnoreStyle, - pkSearch, ## @a --> Internal DSL: !*a - pkCapturedSearch, ## {@} a --> Internal DSL: !*\a - pkRule, ## a <- b - pkList, ## a, b - pkStartAnchor ## ^ --> Internal DSL: startAnchor() - NonTerminalFlag = enum - ntDeclared, ntUsed - NonTerminalObj = object ## represents a non terminal symbol - name: string ## the name of the symbol - line: int ## line the symbol has been declared/used in - col: int ## column the symbol has been declared/used in - flags: set[NonTerminalFlag] ## the nonterminal's flags - rule: TNode ## the rule that the symbol refers to - TNode {.shallow.} = object - case kind: PegKind - of pkEmpty..pkWhitespace: nil - of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle: term: string - of pkChar, pkGreedyRepChar: ch: char - of pkCharChoice, pkGreedyRepSet: charChoice: ref set[char] - of pkNonTerminal: nt: PNonTerminal - of pkBackRef..pkBackRefIgnoreStyle: index: range[0..MaxSubpatterns] - else: sons: seq[TNode] - PNonTerminal* = ref NonTerminalObj - TPeg* = TNode - -block: - type - Peg = TNode - NonTerminal = PNonTerminal - {.deprecated: [TPeg: Peg, PNonTerminal: NonTerminal].} - -proc term*(t: string): TPeg {.nosideEffect, rtl, extern: "npegs$1Str".} = - ## constructs a PEG from a terminal string - if t.len != 1: - result.kind = pkTerminal - result.term = t - else: - result.kind = pkChar - result.ch = t[0] - -proc termIgnoreCase*(t: string): TPeg {.nosideEffect, rtl, extern: "npegs$1".} = - ## constructs a PEG from a terminal string; ignore case for matching - result.kind = pkTerminalIgnoreCase - result.term = t - -proc termIgnoreStyle*(t: string): TPeg {.nosideEffect, rtl, extern: "npegs$1".} = - ## constructs a PEG from a terminal string; ignore style for matching - result.kind = pkTerminalIgnoreStyle - result.term = t - -proc term*(t: char): TPeg {.nosideEffect, rtl, extern: "npegs$1Char".} = - ## constructs a PEG from a terminal char - assert t != '\0' - result.kind = pkChar - result.ch = t - -proc charSet*(s: set[char]): TPeg {.nosideEffect, rtl, extern: "npegs$1".} = - ## constructs a PEG from a character set `s` - assert '\0' notin s - result.kind = pkCharChoice - new(result.charChoice) - result.charChoice[] = s - -proc len(a: TPeg): int {.inline.} = return a.sons.len -proc add(d: var TPeg, s: TPeg) {.inline.} = add(d.sons, s) - -proc addChoice(dest: var TPeg, elem: TPeg) = - var L = dest.len-1 - if L >= 0 and dest.sons[L].kind == pkCharChoice: - # caution! Do not introduce false aliasing here! - case elem.kind - of pkCharChoice: - dest.sons[L] = charSet(dest.sons[L].charChoice[] + elem.charChoice[]) - of pkChar: - dest.sons[L] = charSet(dest.sons[L].charChoice[] + {elem.ch}) - else: add(dest, elem) - else: add(dest, elem) - -template multipleOp(k: PegKind, localOpt: expr) = - result.kind = k - result.sons = @[] - for x in items(a): - if x.kind == k: - for y in items(x.sons): - localOpt(result, y) - else: - localOpt(result, x) - if result.len == 1: - result = result.sons[0] - -proc `/`*(a: varargs[TPeg]): TPeg {. - nosideEffect, rtl, extern: "npegsOrderedChoice".} = - ## constructs an ordered choice with the PEGs in `a` - multipleOp(pkOrderedChoice, addChoice) - -proc addSequence(dest: var TPeg, elem: TPeg) = - var L = dest.len-1 - if L >= 0 and dest.sons[L].kind == pkTerminal: - # caution! Do not introduce false aliasing here! - case elem.kind - of pkTerminal: - dest.sons[L] = term(dest.sons[L].term & elem.term) - of pkChar: - dest.sons[L] = term(dest.sons[L].term & elem.ch) - else: add(dest, elem) - else: add(dest, elem) - -proc sequence*(a: varargs[TPeg]): TPeg {. - nosideEffect, rtl, extern: "npegs$1".} = - ## constructs a sequence with all the PEGs from `a` - multipleOp(pkSequence, addSequence) - -proc `?`*(a: TPeg): TPeg {.nosideEffect, rtl, extern: "npegsOptional".} = - ## constructs an optional for the PEG `a` - if a.kind in {pkOption, pkGreedyRep, pkGreedyAny, pkGreedyRepChar, - pkGreedyRepSet}: - # a* ? --> a* - # a? ? --> a? - result = a - else: - result.kind = pkOption - result.sons = @[a] - -proc `*`*(a: TPeg): TPeg {.nosideEffect, rtl, extern: "npegsGreedyRep".} = - ## constructs a "greedy repetition" for the PEG `a` - case a.kind - of pkGreedyRep, pkGreedyRepChar, pkGreedyRepSet, pkGreedyAny, pkOption: - assert false - # produces endless loop! - of pkChar: - result.kind = pkGreedyRepChar - result.ch = a.ch - of pkCharChoice: - result.kind = pkGreedyRepSet - result.charChoice = a.charChoice # copying a reference suffices! - of pkAny, pkAnyRune: - result.kind = pkGreedyAny - else: - result.kind = pkGreedyRep - result.sons = @[a] - -proc `!*`*(a: TPeg): TPeg {.nosideEffect, rtl, extern: "npegsSearch".} = - ## constructs a "search" for the PEG `a` - result.kind = pkSearch - result.sons = @[a] - -proc `!*\`*(a: TPeg): TPeg {.noSideEffect, rtl, - extern: "npgegsCapturedSearch".} = - ## constructs a "captured search" for the PEG `a` - result.kind = pkCapturedSearch - result.sons = @[a] - -proc `+`*(a: TPeg): TPeg {.nosideEffect, rtl, extern: "npegsGreedyPosRep".} = - ## constructs a "greedy positive repetition" with the PEG `a` - return sequence(a, *a) - -proc `&`*(a: TPeg): TPeg {.nosideEffect, rtl, extern: "npegsAndPredicate".} = - ## constructs an "and predicate" with the PEG `a` - result.kind = pkAndPredicate - result.sons = @[a] - -proc `!`*(a: TPeg): TPeg {.nosideEffect, rtl, extern: "npegsNotPredicate".} = - ## constructs a "not predicate" with the PEG `a` - result.kind = pkNotPredicate - result.sons = @[a] - -proc any*: TPeg {.inline.} = - ## constructs the PEG `any character`:idx: (``.``) - result.kind = pkAny - -proc anyRune*: TPeg {.inline.} = - ## constructs the PEG `any rune`:idx: (``_``) - result.kind = pkAnyRune - -proc newLine*: TPeg {.inline.} = - ## constructs the PEG `newline`:idx: (``\n``) - result.kind = pkNewLine - -proc unicodeLetter*: TPeg {.inline.} = - ## constructs the PEG ``\letter`` which matches any Unicode letter. - result.kind = pkLetter - -proc unicodeLower*: TPeg {.inline.} = - ## constructs the PEG ``\lower`` which matches any Unicode lowercase letter. - result.kind = pkLower - -proc unicodeUpper*: TPeg {.inline.} = - ## constructs the PEG ``\upper`` which matches any Unicode uppercase letter. - result.kind = pkUpper - -proc unicodeTitle*: TPeg {.inline.} = - ## constructs the PEG ``\title`` which matches any Unicode title letter. - result.kind = pkTitle - -proc unicodeWhitespace*: TPeg {.inline.} = - ## constructs the PEG ``\white`` which matches any Unicode - ## whitespace character. - result.kind = pkWhitespace - -proc startAnchor*: TPeg {.inline.} = - ## constructs the PEG ``^`` which matches the start of the input. - result.kind = pkStartAnchor - -proc endAnchor*: TPeg {.inline.} = - ## constructs the PEG ``$`` which matches the end of the input. - result = !any() - -proc capture*(a: TPeg): TPeg {.nosideEffect, rtl, extern: "npegsCapture".} = - ## constructs a capture with the PEG `a` - result.kind = pkCapture - result.sons = @[a] - -proc backref*(index: range[1..MaxSubpatterns]): TPeg {. - nosideEffect, rtl, extern: "npegs$1".} = - ## constructs a back reference of the given `index`. `index` starts counting - ## from 1. - result.kind = pkBackRef - result.index = index-1 - -proc backrefIgnoreCase*(index: range[1..MaxSubpatterns]): TPeg {. - nosideEffect, rtl, extern: "npegs$1".} = - ## constructs a back reference of the given `index`. `index` starts counting - ## from 1. Ignores case for matching. - result.kind = pkBackRefIgnoreCase - result.index = index-1 - -proc backrefIgnoreStyle*(index: range[1..MaxSubpatterns]): TPeg {. - nosideEffect, rtl, extern: "npegs$1".}= - ## constructs a back reference of the given `index`. `index` starts counting - ## from 1. Ignores style for matching. - result.kind = pkBackRefIgnoreStyle - result.index = index-1 - -proc spaceCost(n: TPeg): int = - case n.kind - of pkEmpty: discard - of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle, pkChar, - pkGreedyRepChar, pkCharChoice, pkGreedyRepSet, - pkAny..pkWhitespace, pkGreedyAny: - result = 1 - of pkNonTerminal: - # we cannot inline a rule with a non-terminal - result = InlineThreshold+1 - else: - for i in 0..n.len-1: - inc(result, spaceCost(n.sons[i])) - if result >= InlineThreshold: break - -proc nonterminal*(n: PNonTerminal): TPeg {. - nosideEffect, rtl, extern: "npegs$1".} = - ## constructs a PEG that consists of the nonterminal symbol - assert n != nil - if ntDeclared in n.flags and spaceCost(n.rule) < InlineThreshold: - when false: echo "inlining symbol: ", n.name - result = n.rule # inlining of rule enables better optimizations - else: - result.kind = pkNonTerminal - result.nt = n - -proc newNonTerminal*(name: string, line, column: int): PNonTerminal {. - nosideEffect, rtl, extern: "npegs$1".} = - ## constructs a nonterminal symbol - new(result) - result.name = name - result.line = line - result.col = column - -template letters*: expr = - ## expands to ``charset({'A'..'Z', 'a'..'z'})`` - charSet({'A'..'Z', 'a'..'z'}) - -template digits*: expr = - ## expands to ``charset({'0'..'9'})`` - charSet({'0'..'9'}) - -template whitespace*: expr = - ## expands to ``charset({' ', '\9'..'\13'})`` - charSet({' ', '\9'..'\13'}) - -template identChars*: expr = - ## expands to ``charset({'a'..'z', 'A'..'Z', '0'..'9', '_'})`` - charSet({'a'..'z', 'A'..'Z', '0'..'9', '_'}) - -template identStartChars*: expr = - ## expands to ``charset({'A'..'Z', 'a'..'z', '_'})`` - charSet({'a'..'z', 'A'..'Z', '_'}) - -template ident*: expr = - ## same as ``[a-zA-Z_][a-zA-z_0-9]*``; standard identifier - sequence(charSet({'a'..'z', 'A'..'Z', '_'}), - *charSet({'a'..'z', 'A'..'Z', '0'..'9', '_'})) - -template natural*: expr = - ## same as ``\d+`` - +digits - -# ------------------------- debugging ----------------------------------------- - -proc esc(c: char, reserved = {'\0'..'\255'}): string = - case c - of '\b': result = "\\b" - of '\t': result = "\\t" - of '\c': result = "\\c" - of '\L': result = "\\l" - of '\v': result = "\\v" - of '\f': result = "\\f" - of '\e': result = "\\e" - of '\a': result = "\\a" - of '\\': result = "\\\\" - of 'a'..'z', 'A'..'Z', '0'..'9', '_': result = $c - elif c < ' ' or c >= '\128': result = '\\' & $ord(c) - elif c in reserved: result = '\\' & c - else: result = $c - -proc singleQuoteEsc(c: char): string = return "'" & esc(c, {'\''}) & "'" - -proc singleQuoteEsc(str: string): string = - result = "'" - for c in items(str): add result, esc(c, {'\''}) - add result, '\'' - -proc charSetEscAux(cc: set[char]): string = - const reserved = {'^', '-', ']'} - result = "" - var c1 = 0 - while c1 <= 0xff: - if chr(c1) in cc: - var c2 = c1 - while c2 < 0xff and chr(succ(c2)) in cc: inc(c2) - if c1 == c2: - add result, esc(chr(c1), reserved) - elif c2 == succ(c1): - add result, esc(chr(c1), reserved) & esc(chr(c2), reserved) - else: - add result, esc(chr(c1), reserved) & '-' & esc(chr(c2), reserved) - c1 = c2 - inc(c1) - -proc charSetEsc(cc: set[char]): string = - if card(cc) >= 128+64: - result = "[^" & charSetEscAux({'\1'..'\xFF'} - cc) & ']' - else: - result = '[' & charSetEscAux(cc) & ']' - -proc toStrAux(r: TPeg, res: var string) = - case r.kind - of pkEmpty: add(res, "()") - of pkAny: add(res, '.') - of pkAnyRune: add(res, '_') - of pkLetter: add(res, "\\letter") - of pkLower: add(res, "\\lower") - of pkUpper: add(res, "\\upper") - of pkTitle: add(res, "\\title") - of pkWhitespace: add(res, "\\white") - - of pkNewLine: add(res, "\\n") - of pkTerminal: add(res, singleQuoteEsc(r.term)) - of pkTerminalIgnoreCase: - add(res, 'i') - add(res, singleQuoteEsc(r.term)) - of pkTerminalIgnoreStyle: - add(res, 'y') - add(res, singleQuoteEsc(r.term)) - of pkChar: add(res, singleQuoteEsc(r.ch)) - of pkCharChoice: add(res, charSetEsc(r.charChoice[])) - of pkNonTerminal: add(res, r.nt.name) - of pkSequence: - add(res, '(') - toStrAux(r.sons[0], res) - for i in 1 .. high(r.sons): - add(res, ' ') - toStrAux(r.sons[i], res) - add(res, ')') - of pkOrderedChoice: - add(res, '(') - toStrAux(r.sons[0], res) - for i in 1 .. high(r.sons): - add(res, " / ") - toStrAux(r.sons[i], res) - add(res, ')') - of pkGreedyRep: - toStrAux(r.sons[0], res) - add(res, '*') - of pkGreedyRepChar: - add(res, singleQuoteEsc(r.ch)) - add(res, '*') - of pkGreedyRepSet: - add(res, charSetEsc(r.charChoice[])) - add(res, '*') - of pkGreedyAny: - add(res, ".*") - of pkOption: - toStrAux(r.sons[0], res) - add(res, '?') - of pkAndPredicate: - add(res, '&') - toStrAux(r.sons[0], res) - of pkNotPredicate: - add(res, '!') - toStrAux(r.sons[0], res) - of pkSearch: - add(res, '@') - toStrAux(r.sons[0], res) - of pkCapturedSearch: - add(res, "{@}") - toStrAux(r.sons[0], res) - of pkCapture: - add(res, '{') - toStrAux(r.sons[0], res) - add(res, '}') - of pkBackRef: - add(res, '$') - add(res, $r.index) - of pkBackRefIgnoreCase: - add(res, "i$") - add(res, $r.index) - of pkBackRefIgnoreStyle: - add(res, "y$") - add(res, $r.index) - of pkRule: - toStrAux(r.sons[0], res) - add(res, " <- ") - toStrAux(r.sons[1], res) - of pkList: - for i in 0 .. high(r.sons): - toStrAux(r.sons[i], res) - add(res, "\n") - of pkStartAnchor: - add(res, '^') - -proc `$` *(r: TPeg): string {.nosideEffect, rtl, extern: "npegsToString".} = - ## converts a PEG to its string representation - result = "" - toStrAux(r, result) - -# --------------------- core engine ------------------------------------------- - -type - Captures* = object ## contains the captured substrings. - matches: array[0..MaxSubpatterns-1, tuple[first, last: int]] - ml: int - origStart: int - -{.deprecated: [TCaptures: Captures].} - -proc bounds*(c: Captures, - i: range[0..MaxSubpatterns-1]): tuple[first, last: int] = - ## returns the bounds ``[first..last]`` of the `i`'th capture. - result = c.matches[i] - -when not useUnicode: - type - Rune = char - template fastRuneAt(s, i, ch: expr) = - ch = s[i] - inc(i) - template runeLenAt(s, i: expr): expr = 1 - - proc isAlpha(a: char): bool {.inline.} = return a in {'a'..'z','A'..'Z'} - proc isUpper(a: char): bool {.inline.} = return a in {'A'..'Z'} - proc isLower(a: char): bool {.inline.} = return a in {'a'..'z'} - proc isTitle(a: char): bool {.inline.} = return false - proc isWhiteSpace(a: char): bool {.inline.} = return a in {' ', '\9'..'\13'} - -proc rawMatch*(s: string, p: TPeg, start: int, c: var Captures): int {. - nosideEffect, rtl, extern: "npegs$1".} = - ## low-level matching proc that implements the PEG interpreter. Use this - ## for maximum efficiency (every other PEG operation ends up calling this - ## proc). - ## Returns -1 if it does not match, else the length of the match - case p.kind - of pkEmpty: result = 0 # match of length 0 - of pkAny: - if s[start] != '\0': result = 1 - else: result = -1 - of pkAnyRune: - if s[start] != '\0': - result = runeLenAt(s, start) - else: - result = -1 - of pkLetter: - if s[start] != '\0': - var a: Rune - result = start - fastRuneAt(s, result, a) - if isAlpha(a): dec(result, start) - else: result = -1 - else: - result = -1 - of pkLower: - if s[start] != '\0': - var a: Rune - result = start - fastRuneAt(s, result, a) - if isLower(a): dec(result, start) - else: result = -1 - else: - result = -1 - of pkUpper: - if s[start] != '\0': - var a: Rune - result = start - fastRuneAt(s, result, a) - if isUpper(a): dec(result, start) - else: result = -1 - else: - result = -1 - of pkTitle: - if s[start] != '\0': - var a: Rune - result = start - fastRuneAt(s, result, a) - if isTitle(a): dec(result, start) - else: result = -1 - else: - result = -1 - of pkWhitespace: - if s[start] != '\0': - var a: Rune - result = start - fastRuneAt(s, result, a) - if isWhiteSpace(a): dec(result, start) - else: result = -1 - else: - result = -1 - of pkGreedyAny: - result = len(s) - start - of pkNewLine: - if s[start] == '\L': result = 1 - elif s[start] == '\C': - if s[start+1] == '\L': result = 2 - else: result = 1 - else: result = -1 - of pkTerminal: - result = len(p.term) - for i in 0..result-1: - if p.term[i] != s[start+i]: - result = -1 - break - of pkTerminalIgnoreCase: - var - i = 0 - a, b: Rune - result = start - while i < len(p.term): - fastRuneAt(p.term, i, a) - fastRuneAt(s, result, b) - if toLower(a) != toLower(b): - result = -1 - break - dec(result, start) - of pkTerminalIgnoreStyle: - var - i = 0 - a, b: Rune - result = start - while i < len(p.term): - while true: - fastRuneAt(p.term, i, a) - if a != Rune('_'): break - while true: - fastRuneAt(s, result, b) - if b != Rune('_'): break - if toLower(a) != toLower(b): - result = -1 - break - dec(result, start) - of pkChar: - if p.ch == s[start]: result = 1 - else: result = -1 - of pkCharChoice: - if contains(p.charChoice[], s[start]): result = 1 - else: result = -1 - of pkNonTerminal: - var oldMl = c.ml - when false: echo "enter: ", p.nt.name - result = rawMatch(s, p.nt.rule, start, c) - when false: echo "leave: ", p.nt.name - if result < 0: c.ml = oldMl - of pkSequence: - var oldMl = c.ml - result = 0 - for i in 0..high(p.sons): - var x = rawMatch(s, p.sons[i], start+result, c) - if x < 0: - c.ml = oldMl - result = -1 - break - else: inc(result, x) - of pkOrderedChoice: - var oldMl = c.ml - for i in 0..high(p.sons): - result = rawMatch(s, p.sons[i], start, c) - if result >= 0: break - c.ml = oldMl - of pkSearch: - var oldMl = c.ml - result = 0 - while start+result < s.len: - var x = rawMatch(s, p.sons[0], start+result, c) - if x >= 0: - inc(result, x) - return - inc(result) - result = -1 - c.ml = oldMl - of pkCapturedSearch: - var idx = c.ml # reserve a slot for the subpattern - inc(c.ml) - result = 0 - while start+result < s.len: - var x = rawMatch(s, p.sons[0], start+result, c) - if x >= 0: - if idx < MaxSubpatterns: - c.matches[idx] = (start, start+result-1) - #else: silently ignore the capture - inc(result, x) - return - inc(result) - result = -1 - c.ml = idx - of pkGreedyRep: - result = 0 - while true: - var x = rawMatch(s, p.sons[0], start+result, c) - # if x == 0, we have an endless loop; so the correct behaviour would be - # not to break. But endless loops can be easily introduced: - # ``(comment / \w*)*`` is such an example. Breaking for x == 0 does the - # expected thing in this case. - if x <= 0: break - inc(result, x) - of pkGreedyRepChar: - result = 0 - var ch = p.ch - while ch == s[start+result]: inc(result) - of pkGreedyRepSet: - result = 0 - while contains(p.charChoice[], s[start+result]): inc(result) - of pkOption: - result = max(0, rawMatch(s, p.sons[0], start, c)) - of pkAndPredicate: - var oldMl = c.ml - result = rawMatch(s, p.sons[0], start, c) - if result >= 0: result = 0 # do not consume anything - else: c.ml = oldMl - of pkNotPredicate: - var oldMl = c.ml - result = rawMatch(s, p.sons[0], start, c) - if result < 0: result = 0 - else: - c.ml = oldMl - result = -1 - of pkCapture: - var idx = c.ml # reserve a slot for the subpattern - inc(c.ml) - result = rawMatch(s, p.sons[0], start, c) - if result >= 0: - if idx < MaxSubpatterns: - c.matches[idx] = (start, start+result-1) - #else: silently ignore the capture - else: - c.ml = idx - of pkBackRef..pkBackRefIgnoreStyle: - if p.index >= c.ml: return -1 - var (a, b) = c.matches[p.index] - var n: TPeg - n.kind = succ(pkTerminal, ord(p.kind)-ord(pkBackRef)) - n.term = s.substr(a, b) - result = rawMatch(s, n, start, c) - of pkStartAnchor: - if c.origStart == start: result = 0 - else: result = -1 - of pkRule, pkList: assert false - -template fillMatches(s, caps, c: expr) = - for k in 0..c.ml-1: - caps[k] = substr(s, c.matches[k][0], c.matches[k][1]) - -proc match*(s: string, pattern: TPeg, matches: var openArray[string], - start = 0): bool {.nosideEffect, rtl, extern: "npegs$1Capture".} = - ## returns ``true`` if ``s[start..]`` matches the ``pattern`` and - ## the captured substrings in the array ``matches``. If it does not - ## match, nothing is written into ``matches`` and ``false`` is - ## returned. - var c: Captures - c.origStart = start - result = rawMatch(s, pattern, start, c) == len(s) - start - if result: fillMatches(s, matches, c) - -proc match*(s: string, pattern: TPeg, - start = 0): bool {.nosideEffect, rtl, extern: "npegs$1".} = - ## returns ``true`` if ``s`` matches the ``pattern`` beginning from ``start``. - var c: Captures - c.origStart = start - result = rawMatch(s, pattern, start, c) == len(s)-start - -proc matchLen*(s: string, pattern: TPeg, matches: var openArray[string], - start = 0): int {.nosideEffect, rtl, extern: "npegs$1Capture".} = - ## the same as ``match``, but it returns the length of the match, - ## if there is no match, -1 is returned. Note that a match length - ## of zero can happen. It's possible that a suffix of `s` remains - ## that does not belong to the match. - var c: Captures - c.origStart = start - result = rawMatch(s, pattern, start, c) - if result >= 0: fillMatches(s, matches, c) - -proc matchLen*(s: string, pattern: TPeg, - start = 0): int {.nosideEffect, rtl, extern: "npegs$1".} = - ## the same as ``match``, but it returns the length of the match, - ## if there is no match, -1 is returned. Note that a match length - ## of zero can happen. It's possible that a suffix of `s` remains - ## that does not belong to the match. - var c: Captures - c.origStart = start - result = rawMatch(s, pattern, start, c) - -proc find*(s: string, pattern: TPeg, matches: var openArray[string], - start = 0): int {.nosideEffect, rtl, extern: "npegs$1Capture".} = - ## returns the starting position of ``pattern`` in ``s`` and the captured - ## substrings in the array ``matches``. If it does not match, nothing - ## is written into ``matches`` and -1 is returned. - var c: Captures - c.origStart = start - for i in start .. s.len-1: - c.ml = 0 - if rawMatch(s, pattern, i, c) >= 0: - fillMatches(s, matches, c) - return i - return -1 - # could also use the pattern here: (!P .)* P - -proc findBounds*(s: string, pattern: TPeg, matches: var openArray[string], - start = 0): tuple[first, last: int] {. - nosideEffect, rtl, extern: "npegs$1Capture".} = - ## returns the starting position and end position of ``pattern`` in ``s`` - ## and the captured - ## substrings in the array ``matches``. If it does not match, nothing - ## is written into ``matches`` and (-1,0) is returned. - var c: Captures - c.origStart = start - for i in start .. s.len-1: - c.ml = 0 - var L = rawMatch(s, pattern, i, c) - if L >= 0: - fillMatches(s, matches, c) - return (i, i+L-1) - return (-1, 0) - -proc find*(s: string, pattern: TPeg, - start = 0): int {.nosideEffect, rtl, extern: "npegs$1".} = - ## returns the starting position of ``pattern`` in ``s``. If it does not - ## match, -1 is returned. - var c: Captures - c.origStart = start - for i in start .. s.len-1: - if rawMatch(s, pattern, i, c) >= 0: return i - return -1 - -iterator findAll*(s: string, pattern: TPeg, start = 0): string = - ## yields all matching *substrings* of `s` that match `pattern`. - var c: Captures - c.origStart = start - var i = start - while i < s.len: - c.ml = 0 - var L = rawMatch(s, pattern, i, c) - if L < 0: - inc(i, 1) - else: - yield substr(s, i, i+L-1) - inc(i, L) - -proc findAll*(s: string, pattern: TPeg, start = 0): seq[string] {. - nosideEffect, rtl, extern: "npegs$1".} = - ## returns all matching *substrings* of `s` that match `pattern`. - ## If it does not match, @[] is returned. - accumulateResult(findAll(s, pattern, start)) - -when not defined(nimhygiene): - {.pragma: inject.} - -template `=~`*(s: string, pattern: TPeg): bool = - ## This calls ``match`` with an implicit declared ``matches`` array that - ## can be used in the scope of the ``=~`` call: - ## - ## .. code-block:: nim - ## - ## if line =~ peg"\s* {\w+} \s* '=' \s* {\w+}": - ## # matches a key=value pair: - ## echo("Key: ", matches[0]) - ## echo("Value: ", matches[1]) - ## elif line =~ peg"\s*{'#'.*}": - ## # matches a comment - ## # note that the implicit ``matches`` array is different from the - ## # ``matches`` array of the first branch - ## echo("comment: ", matches[0]) - ## else: - ## echo("syntax error") - ## - bind MaxSubpatterns - when not declaredInScope(matches): - var matches {.inject.}: array[0..MaxSubpatterns-1, string] - match(s, pattern, matches) - -# ------------------------- more string handling ------------------------------ - -proc contains*(s: string, pattern: TPeg, start = 0): bool {. - nosideEffect, rtl, extern: "npegs$1".} = - ## same as ``find(s, pattern, start) >= 0`` - return find(s, pattern, start) >= 0 - -proc contains*(s: string, pattern: TPeg, matches: var openArray[string], - start = 0): bool {.nosideEffect, rtl, extern: "npegs$1Capture".} = - ## same as ``find(s, pattern, matches, start) >= 0`` - return find(s, pattern, matches, start) >= 0 - -proc startsWith*(s: string, prefix: TPeg, start = 0): bool {. - nosideEffect, rtl, extern: "npegs$1".} = - ## returns true if `s` starts with the pattern `prefix` - result = matchLen(s, prefix, start) >= 0 - -proc endsWith*(s: string, suffix: TPeg, start = 0): bool {. - nosideEffect, rtl, extern: "npegs$1".} = - ## returns true if `s` ends with the pattern `prefix` - var c: Captures - c.origStart = start - for i in start .. s.len-1: - if rawMatch(s, suffix, i, c) == s.len - i: return true - -proc replacef*(s: string, sub: TPeg, by: string): string {. - nosideEffect, rtl, extern: "npegs$1".} = - ## Replaces `sub` in `s` by the string `by`. Captures can be accessed in `by` - ## with the notation ``$i`` and ``$#`` (see strutils.`%`). Examples: - ## - ## .. code-block:: nim - ## "var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") - ## - ## Results in: - ## - ## .. code-block:: nim - ## - ## "var1<-keykey; val2<-key2key2" - result = "" - var i = 0 - var caps: array[0..MaxSubpatterns-1, string] - var c: Captures - while i < s.len: - c.ml = 0 - var x = rawMatch(s, sub, i, c) - if x <= 0: - add(result, s[i]) - inc(i) - else: - fillMatches(s, caps, c) - addf(result, by, caps) - inc(i, x) - add(result, substr(s, i)) - -proc replace*(s: string, sub: TPeg, by = ""): string {. - nosideEffect, rtl, extern: "npegs$1".} = - ## Replaces `sub` in `s` by the string `by`. Captures cannot be accessed - ## in `by`. - result = "" - var i = 0 - var c: Captures - while i < s.len: - var x = rawMatch(s, sub, i, c) - if x <= 0: - add(result, s[i]) - inc(i) - else: - add(result, by) - inc(i, x) - add(result, substr(s, i)) - -proc parallelReplace*(s: string, subs: varargs[ - tuple[pattern: TPeg, repl: string]]): string {. - nosideEffect, rtl, extern: "npegs$1".} = - ## Returns a modified copy of `s` with the substitutions in `subs` - ## applied in parallel. - result = "" - var i = 0 - var c: Captures - var caps: array[0..MaxSubpatterns-1, string] - while i < s.len: - block searchSubs: - for j in 0..high(subs): - c.ml = 0 - var x = rawMatch(s, subs[j][0], i, c) - if x > 0: - fillMatches(s, caps, c) - addf(result, subs[j][1], caps) - inc(i, x) - break searchSubs - add(result, s[i]) - inc(i) - # copy the rest: - add(result, substr(s, i)) - -proc transformFile*(infile, outfile: string, - subs: varargs[tuple[pattern: TPeg, repl: string]]) {. - rtl, extern: "npegs$1".} = - ## reads in the file `infile`, performs a parallel replacement (calls - ## `parallelReplace`) and writes back to `outfile`. Raises ``EIO`` if an - ## error occurs. This is supposed to be used for quick scripting. - var x = readFile(infile).string - writeFile(outfile, x.parallelReplace(subs)) - -iterator split*(s: string, sep: TPeg): string = - ## Splits the string `s` into substrings. - ## - ## Substrings are separated by the PEG `sep`. - ## Examples: - ## - ## .. code-block:: nim - ## for word in split("00232this02939is39an22example111", peg"\d+"): - ## writeln(stdout, word) - ## - ## Results in: - ## - ## .. code-block:: nim - ## "this" - ## "is" - ## "an" - ## "example" - ## - var c: Captures - var - first = 0 - last = 0 - while last < len(s): - c.ml = 0 - var x = rawMatch(s, sep, last, c) - if x > 0: inc(last, x) - first = last - while last < len(s): - inc(last) - c.ml = 0 - x = rawMatch(s, sep, last, c) - if x > 0: break - if first < last: - yield substr(s, first, last-1) - -proc split*(s: string, sep: TPeg): seq[string] {. - nosideEffect, rtl, extern: "npegs$1".} = - ## Splits the string `s` into substrings. - accumulateResult(split(s, sep)) - -# ------------------- scanner ------------------------------------------------- - -type - TModifier = enum - modNone, - modVerbatim, - modIgnoreCase, - modIgnoreStyle - TTokKind = enum ## enumeration of all tokens - tkInvalid, ## invalid token - tkEof, ## end of file reached - tkAny, ## . - tkAnyRune, ## _ - tkIdentifier, ## abc - tkStringLit, ## "abc" or 'abc' - tkCharSet, ## [^A-Z] - tkParLe, ## '(' - tkParRi, ## ')' - tkCurlyLe, ## '{' - tkCurlyRi, ## '}' - tkCurlyAt, ## '{@}' - tkArrow, ## '<-' - tkBar, ## '/' - tkStar, ## '*' - tkPlus, ## '+' - tkAmp, ## '&' - tkNot, ## '!' - tkOption, ## '?' - tkAt, ## '@' - tkBuiltin, ## \identifier - tkEscaped, ## \\ - tkBackref, ## '$' - tkDollar, ## '$' - tkHat ## '^' - - TToken {.final.} = object ## a token - kind: TTokKind ## the type of the token - modifier: TModifier - literal: string ## the parsed (string) literal - charset: set[char] ## if kind == tkCharSet - index: int ## if kind == tkBackref - - PegLexer {.inheritable.} = object ## the lexer object. - bufpos: int ## the current position within the buffer - buf: cstring ## the buffer itself - lineNumber: int ## the current line number - lineStart: int ## index of last line start in buffer - colOffset: int ## column to add - filename: string - -const - tokKindToStr: array[TTokKind, string] = [ - "invalid", "[EOF]", ".", "_", "identifier", "string literal", - "character set", "(", ")", "{", "}", "{@}", - "<-", "/", "*", "+", "&", "!", "?", - "@", "built-in", "escaped", "$", "$", "^" - ] - -proc handleCR(L: var PegLexer, pos: int): int = - assert(L.buf[pos] == '\c') - inc(L.lineNumber) - result = pos+1 - if L.buf[result] == '\L': inc(result) - L.lineStart = result - -proc handleLF(L: var PegLexer, pos: int): int = - assert(L.buf[pos] == '\L') - inc(L.lineNumber) - result = pos+1 - L.lineStart = result - -proc init(L: var PegLexer, input, filename: string, line = 1, col = 0) = - L.buf = input - L.bufpos = 0 - L.lineNumber = line - L.colOffset = col - L.lineStart = 0 - L.filename = filename - -proc getColumn(L: PegLexer): int {.inline.} = - result = abs(L.bufpos - L.lineStart) + L.colOffset - -proc getLine(L: PegLexer): int {.inline.} = - result = L.lineNumber - -proc errorStr(L: PegLexer, msg: string, line = -1, col = -1): string = - var line = if line < 0: getLine(L) else: line - var col = if col < 0: getColumn(L) else: col - result = "$1($2, $3) Error: $4" % [L.filename, $line, $col, msg] - -proc handleHexChar(c: var PegLexer, xi: var int) = - case c.buf[c.bufpos] - of '0'..'9': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0')) - inc(c.bufpos) - of 'a'..'f': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10) - inc(c.bufpos) - of 'A'..'F': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10) - inc(c.bufpos) - else: discard - -proc getEscapedChar(c: var PegLexer, tok: var TToken) = - inc(c.bufpos) - case c.buf[c.bufpos] - of 'r', 'R', 'c', 'C': - add(tok.literal, '\c') - inc(c.bufpos) - of 'l', 'L': - add(tok.literal, '\L') - inc(c.bufpos) - of 'f', 'F': - add(tok.literal, '\f') - inc(c.bufpos) - of 'e', 'E': - add(tok.literal, '\e') - inc(c.bufpos) - of 'a', 'A': - add(tok.literal, '\a') - inc(c.bufpos) - of 'b', 'B': - add(tok.literal, '\b') - inc(c.bufpos) - of 'v', 'V': - add(tok.literal, '\v') - inc(c.bufpos) - of 't', 'T': - add(tok.literal, '\t') - inc(c.bufpos) - of 'x', 'X': - inc(c.bufpos) - var xi = 0 - handleHexChar(c, xi) - handleHexChar(c, xi) - if xi == 0: tok.kind = tkInvalid - else: add(tok.literal, chr(xi)) - of '0'..'9': - var val = ord(c.buf[c.bufpos]) - ord('0') - inc(c.bufpos) - var i = 1 - while (i <= 3) and (c.buf[c.bufpos] in {'0'..'9'}): - val = val * 10 + ord(c.buf[c.bufpos]) - ord('0') - inc(c.bufpos) - inc(i) - if val > 0 and val <= 255: add(tok.literal, chr(val)) - else: tok.kind = tkInvalid - of '\0'..'\31': - tok.kind = tkInvalid - elif c.buf[c.bufpos] in strutils.Letters: - tok.kind = tkInvalid - else: - add(tok.literal, c.buf[c.bufpos]) - inc(c.bufpos) - -proc skip(c: var PegLexer) = - var pos = c.bufpos - var buf = c.buf - while true: - case buf[pos] - of ' ', '\t': - inc(pos) - of '#': - while not (buf[pos] in {'\c', '\L', '\0'}): inc(pos) - of '\c': - pos = handleCR(c, pos) - buf = c.buf - of '\L': - pos = handleLF(c, pos) - buf = c.buf - else: - break # EndOfFile also leaves the loop - c.bufpos = pos - -proc getString(c: var PegLexer, tok: var TToken) = - tok.kind = tkStringLit - var pos = c.bufpos + 1 - var buf = c.buf - var quote = buf[pos-1] - while true: - case buf[pos] - of '\\': - c.bufpos = pos - getEscapedChar(c, tok) - pos = c.bufpos - of '\c', '\L', '\0': - tok.kind = tkInvalid - break - elif buf[pos] == quote: - inc(pos) - break - else: - add(tok.literal, buf[pos]) - inc(pos) - c.bufpos = pos - -proc getDollar(c: var PegLexer, tok: var TToken) = - var pos = c.bufpos + 1 - var buf = c.buf - if buf[pos] in {'0'..'9'}: - tok.kind = tkBackref - tok.index = 0 - while buf[pos] in {'0'..'9'}: - tok.index = tok.index * 10 + ord(buf[pos]) - ord('0') - inc(pos) - else: - tok.kind = tkDollar - c.bufpos = pos - -proc getCharSet(c: var PegLexer, tok: var TToken) = - tok.kind = tkCharSet - tok.charset = {} - var pos = c.bufpos + 1 - var buf = c.buf - var caret = false - if buf[pos] == '^': - inc(pos) - caret = true - while true: - var ch: char - case buf[pos] - of ']': - inc(pos) - break - of '\\': - c.bufpos = pos - getEscapedChar(c, tok) - pos = c.bufpos - ch = tok.literal[tok.literal.len-1] - of '\C', '\L', '\0': - tok.kind = tkInvalid - break - else: - ch = buf[pos] - inc(pos) - incl(tok.charset, ch) - if buf[pos] == '-': - if buf[pos+1] == ']': - incl(tok.charset, '-') - inc(pos) - else: - inc(pos) - var ch2: char - case buf[pos] - of '\\': - c.bufpos = pos - getEscapedChar(c, tok) - pos = c.bufpos - ch2 = tok.literal[tok.literal.len-1] - of '\C', '\L', '\0': - tok.kind = tkInvalid - break - else: - ch2 = buf[pos] - inc(pos) - for i in ord(ch)+1 .. ord(ch2): - incl(tok.charset, chr(i)) - c.bufpos = pos - if caret: tok.charset = {'\1'..'\xFF'} - tok.charset - -proc getSymbol(c: var PegLexer, tok: var TToken) = - var pos = c.bufpos - var buf = c.buf - while true: - add(tok.literal, buf[pos]) - inc(pos) - if buf[pos] notin strutils.IdentChars: break - c.bufpos = pos - tok.kind = tkIdentifier - -proc getBuiltin(c: var PegLexer, tok: var TToken) = - if c.buf[c.bufpos+1] in strutils.Letters: - inc(c.bufpos) - getSymbol(c, tok) - tok.kind = tkBuiltin - else: - tok.kind = tkEscaped - getEscapedChar(c, tok) # may set tok.kind to tkInvalid - -proc getTok(c: var PegLexer, tok: var TToken) = - tok.kind = tkInvalid - tok.modifier = modNone - setLen(tok.literal, 0) - skip(c) - case c.buf[c.bufpos] - of '{': - inc(c.bufpos) - if c.buf[c.bufpos] == '@' and c.buf[c.bufpos+1] == '}': - tok.kind = tkCurlyAt - inc(c.bufpos, 2) - add(tok.literal, "{@}") - else: - tok.kind = tkCurlyLe - add(tok.literal, '{') - of '}': - tok.kind = tkCurlyRi - inc(c.bufpos) - add(tok.literal, '}') - of '[': - getCharSet(c, tok) - of '(': - tok.kind = tkParLe - inc(c.bufpos) - add(tok.literal, '(') - of ')': - tok.kind = tkParRi - inc(c.bufpos) - add(tok.literal, ')') - of '.': - tok.kind = tkAny - inc(c.bufpos) - add(tok.literal, '.') - of '_': - tok.kind = tkAnyRune - inc(c.bufpos) - add(tok.literal, '_') - of '\\': - getBuiltin(c, tok) - of '\'', '"': getString(c, tok) - of '$': getDollar(c, tok) - of '\0': - tok.kind = tkEof - tok.literal = "[EOF]" - of 'a'..'z', 'A'..'Z', '\128'..'\255': - getSymbol(c, tok) - if c.buf[c.bufpos] in {'\'', '"'} or - c.buf[c.bufpos] == '$' and c.buf[c.bufpos+1] in {'0'..'9'}: - case tok.literal - of "i": tok.modifier = modIgnoreCase - of "y": tok.modifier = modIgnoreStyle - of "v": tok.modifier = modVerbatim - else: discard - setLen(tok.literal, 0) - if c.buf[c.bufpos] == '$': - getDollar(c, tok) - else: - getString(c, tok) - if tok.modifier == modNone: tok.kind = tkInvalid - of '+': - tok.kind = tkPlus - inc(c.bufpos) - add(tok.literal, '+') - of '*': - tok.kind = tkStar - inc(c.bufpos) - add(tok.literal, '+') - of '<': - if c.buf[c.bufpos+1] == '-': - inc(c.bufpos, 2) - tok.kind = tkArrow - add(tok.literal, "<-") - else: - add(tok.literal, '<') - of '/': - tok.kind = tkBar - inc(c.bufpos) - add(tok.literal, '/') - of '?': - tok.kind = tkOption - inc(c.bufpos) - add(tok.literal, '?') - of '!': - tok.kind = tkNot - inc(c.bufpos) - add(tok.literal, '!') - of '&': - tok.kind = tkAmp - inc(c.bufpos) - add(tok.literal, '!') - of '@': - tok.kind = tkAt - inc(c.bufpos) - add(tok.literal, '@') - if c.buf[c.bufpos] == '@': - tok.kind = tkCurlyAt - inc(c.bufpos) - add(tok.literal, '@') - of '^': - tok.kind = tkHat - inc(c.bufpos) - add(tok.literal, '^') - else: - add(tok.literal, c.buf[c.bufpos]) - inc(c.bufpos) - -proc arrowIsNextTok(c: PegLexer): bool = - # the only look ahead we need - var pos = c.bufpos - while c.buf[pos] in {'\t', ' '}: inc(pos) - result = c.buf[pos] == '<' and c.buf[pos+1] == '-' - -# ----------------------------- parser ---------------------------------------- - -type - EInvalidPeg* = object of ValueError ## raised if an invalid - ## PEG has been detected - PegParser = object of PegLexer ## the PEG parser object - tok: TToken - nonterms: seq[PNonTerminal] - modifier: TModifier - captures: int - identIsVerbatim: bool - skip: TPeg - -proc pegError(p: PegParser, msg: string, line = -1, col = -1) = - var e: ref EInvalidPeg - new(e) - e.msg = errorStr(p, msg, line, col) - raise e - -proc getTok(p: var PegParser) = - getTok(p, p.tok) - if p.tok.kind == tkInvalid: pegError(p, "invalid token") - -proc eat(p: var PegParser, kind: TTokKind) = - if p.tok.kind == kind: getTok(p) - else: pegError(p, tokKindToStr[kind] & " expected") - -proc parseExpr(p: var PegParser): TPeg - -proc getNonTerminal(p: var PegParser, name: string): PNonTerminal = - for i in 0..high(p.nonterms): - result = p.nonterms[i] - if cmpIgnoreStyle(result.name, name) == 0: return - # forward reference: - result = newNonTerminal(name, getLine(p), getColumn(p)) - add(p.nonterms, result) - -proc modifiedTerm(s: string, m: TModifier): TPeg = - case m - of modNone, modVerbatim: result = term(s) - of modIgnoreCase: result = termIgnoreCase(s) - of modIgnoreStyle: result = termIgnoreStyle(s) - -proc modifiedBackref(s: int, m: TModifier): TPeg = - case m - of modNone, modVerbatim: result = backref(s) - of modIgnoreCase: result = backrefIgnoreCase(s) - of modIgnoreStyle: result = backrefIgnoreStyle(s) - -proc builtin(p: var PegParser): TPeg = - # do not use "y", "skip" or "i" as these would be ambiguous - case p.tok.literal - of "n": result = newLine() - of "d": result = charSet({'0'..'9'}) - of "D": result = charSet({'\1'..'\xff'} - {'0'..'9'}) - of "s": result = charSet({' ', '\9'..'\13'}) - of "S": result = charSet({'\1'..'\xff'} - {' ', '\9'..'\13'}) - of "w": result = charSet({'a'..'z', 'A'..'Z', '_', '0'..'9'}) - of "W": result = charSet({'\1'..'\xff'} - {'a'..'z','A'..'Z','_','0'..'9'}) - of "a": result = charSet({'a'..'z', 'A'..'Z'}) - of "A": result = charSet({'\1'..'\xff'} - {'a'..'z', 'A'..'Z'}) - of "ident": result = pegs.ident - of "letter": result = unicodeLetter() - of "upper": result = unicodeUpper() - of "lower": result = unicodeLower() - of "title": result = unicodeTitle() - of "white": result = unicodeWhitespace() - else: pegError(p, "unknown built-in: " & p.tok.literal) - -proc token(terminal: TPeg, p: PegParser): TPeg = - if p.skip.kind == pkEmpty: result = terminal - else: result = sequence(p.skip, terminal) - -proc primary(p: var PegParser): TPeg = - case p.tok.kind - of tkAmp: - getTok(p) - return &primary(p) - of tkNot: - getTok(p) - return !primary(p) - of tkAt: - getTok(p) - return !*primary(p) - of tkCurlyAt: - getTok(p) - return !*\primary(p).token(p) - else: discard - case p.tok.kind - of tkIdentifier: - if p.identIsVerbatim: - var m = p.tok.modifier - if m == modNone: m = p.modifier - result = modifiedTerm(p.tok.literal, m).token(p) - getTok(p) - elif not arrowIsNextTok(p): - var nt = getNonTerminal(p, p.tok.literal) - incl(nt.flags, ntUsed) - result = nonterminal(nt).token(p) - getTok(p) - else: - pegError(p, "expression expected, but found: " & p.tok.literal) - of tkStringLit: - var m = p.tok.modifier - if m == modNone: m = p.modifier - result = modifiedTerm(p.tok.literal, m).token(p) - getTok(p) - of tkCharSet: - if '\0' in p.tok.charset: - pegError(p, "binary zero ('\\0') not allowed in character class") - result = charSet(p.tok.charset).token(p) - getTok(p) - of tkParLe: - getTok(p) - result = parseExpr(p) - eat(p, tkParRi) - of tkCurlyLe: - getTok(p) - result = capture(parseExpr(p)).token(p) - eat(p, tkCurlyRi) - inc(p.captures) - of tkAny: - result = any().token(p) - getTok(p) - of tkAnyRune: - result = anyRune().token(p) - getTok(p) - of tkBuiltin: - result = builtin(p).token(p) - getTok(p) - of tkEscaped: - result = term(p.tok.literal[0]).token(p) - getTok(p) - of tkDollar: - result = endAnchor() - getTok(p) - of tkHat: - result = startAnchor() - getTok(p) - of tkBackref: - var m = p.tok.modifier - if m == modNone: m = p.modifier - result = modifiedBackref(p.tok.index, m).token(p) - if p.tok.index < 0 or p.tok.index > p.captures: - pegError(p, "invalid back reference index: " & $p.tok.index) - getTok(p) - else: - pegError(p, "expression expected, but found: " & p.tok.literal) - getTok(p) # we must consume a token here to prevent endless loops! - while true: - case p.tok.kind - of tkOption: - result = ?result - getTok(p) - of tkStar: - result = *result - getTok(p) - of tkPlus: - result = +result - getTok(p) - else: break - -proc seqExpr(p: var PegParser): TPeg = - result = primary(p) - while true: - case p.tok.kind - of tkAmp, tkNot, tkAt, tkStringLit, tkCharSet, tkParLe, tkCurlyLe, - tkAny, tkAnyRune, tkBuiltin, tkEscaped, tkDollar, tkBackref, - tkHat, tkCurlyAt: - result = sequence(result, primary(p)) - of tkIdentifier: - if not arrowIsNextTok(p): - result = sequence(result, primary(p)) - else: break - else: break - -proc parseExpr(p: var PegParser): TPeg = - result = seqExpr(p) - while p.tok.kind == tkBar: - getTok(p) - result = result / seqExpr(p) - -proc parseRule(p: var PegParser): PNonTerminal = - if p.tok.kind == tkIdentifier and arrowIsNextTok(p): - result = getNonTerminal(p, p.tok.literal) - if ntDeclared in result.flags: - pegError(p, "attempt to redefine: " & result.name) - result.line = getLine(p) - result.col = getColumn(p) - getTok(p) - eat(p, tkArrow) - result.rule = parseExpr(p) - incl(result.flags, ntDeclared) # NOW inlining may be attempted - else: - pegError(p, "rule expected, but found: " & p.tok.literal) - -proc rawParse(p: var PegParser): TPeg = - ## parses a rule or a PEG expression - while p.tok.kind == tkBuiltin: - case p.tok.literal - of "i": - p.modifier = modIgnoreCase - getTok(p) - of "y": - p.modifier = modIgnoreStyle - getTok(p) - of "skip": - getTok(p) - p.skip = ?primary(p) - else: break - if p.tok.kind == tkIdentifier and arrowIsNextTok(p): - result = parseRule(p).rule - while p.tok.kind != tkEof: - discard parseRule(p) - else: - p.identIsVerbatim = true - result = parseExpr(p) - if p.tok.kind != tkEof: - pegError(p, "EOF expected, but found: " & p.tok.literal) - for i in 0..high(p.nonterms): - var nt = p.nonterms[i] - if ntDeclared notin nt.flags: - pegError(p, "undeclared identifier: " & nt.name, nt.line, nt.col) - elif ntUsed notin nt.flags and i > 0: - pegError(p, "unused rule: " & nt.name, nt.line, nt.col) - -proc parsePeg*(pattern: string, filename = "pattern", line = 1, col = 0): TPeg = - ## constructs a Peg object from `pattern`. `filename`, `line`, `col` are - ## used for error messages, but they only provide start offsets. `parsePeg` - ## keeps track of line and column numbers within `pattern`. - var p: PegParser - init(PegLexer(p), pattern, filename, line, col) - p.tok.kind = tkInvalid - p.tok.modifier = modNone - p.tok.literal = "" - p.tok.charset = {} - p.nonterms = @[] - p.identIsVerbatim = false - getTok(p) - result = rawParse(p) - -proc peg*(pattern: string): TPeg = - ## constructs a Peg object from the `pattern`. The short name has been - ## chosen to encourage its use as a raw string modifier:: - ## - ## peg"{\ident} \s* '=' \s* {.*}" - result = parsePeg(pattern, "pattern") - -proc escapePeg*(s: string): string = - ## escapes `s` so that it is matched verbatim when used as a peg. - result = "" - var inQuote = false - for c in items(s): - case c - of '\0'..'\31', '\'', '"', '\\': - if inQuote: - result.add('\'') - inQuote = false - result.add("\\x") - result.add(toHex(ord(c), 2)) - else: - if not inQuote: - result.add('\'') - inQuote = true - result.add(c) - if inQuote: result.add('\'') - -when isMainModule: - assert escapePeg("abc''def'") == r"'abc'\x27\x27'def'\x27" - assert match("(a b c)", peg"'(' @ ')'") - assert match("W_HI_Le", peg"\y 'while'") - assert(not match("W_HI_L", peg"\y 'while'")) - assert(not match("W_HI_Le", peg"\y v'while'")) - assert match("W_HI_Le", peg"y'while'") - - assert($ +digits == $peg"\d+") - assert "0158787".match(peg"\d+") - assert "ABC 0232".match(peg"\w+\s+\d+") - assert "ABC".match(peg"\d+ / \w+") - - for word in split("00232this02939is39an22example111", peg"\d+"): - writeln(stdout, word) - - assert matchLen("key", ident) == 3 - - var pattern = sequence(ident, *whitespace, term('='), *whitespace, ident) - assert matchLen("key1= cal9", pattern) == 11 - - var ws = newNonTerminal("ws", 1, 1) - ws.rule = *whitespace - - var expr = newNonTerminal("expr", 1, 1) - expr.rule = sequence(capture(ident), *sequence( - nonterminal(ws), term('+'), nonterminal(ws), nonterminal(expr))) - - var c: Captures - var s = "a+b + c +d+e+f" - assert rawMatch(s, expr.rule, 0, c) == len(s) - var a = "" - for i in 0..c.ml-1: - a.add(substr(s, c.matches[i][0], c.matches[i][1])) - assert a == "abcdef" - #echo expr.rule - - #const filename = "lib/devel/peg/grammar.txt" - #var grammar = parsePeg(newFileStream(filename, fmRead), filename) - #echo "a <- [abc]*?".match(grammar) - assert find("_____abc_______", term("abc"), 2) == 5 - assert match("_______ana", peg"A <- 'ana' / . A") - assert match("abcs%%%", peg"A <- ..A / .A / '%'") - - var matches: array[0..MaxSubpatterns-1, string] - if "abc" =~ peg"{'a'}'bc' 'xyz' / {\ident}": - assert matches[0] == "abc" - else: - assert false - - var g2 = peg"""S <- A B / C D - A <- 'a'+ - B <- 'b'+ - C <- 'c'+ - D <- 'd'+ - """ - assert($g2 == "((A B) / (C D))") - assert match("cccccdddddd", g2) - assert("var1=key; var2=key2".replacef(peg"{\ident}'='{\ident}", "$1<-$2$2") == - "var1<-keykey; var2<-key2key2") - assert("var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") == - "$1<-$2$2; $1<-$2$2") - assert "var1=key; var2=key2".endsWith(peg"{\ident}'='{\ident}") - - if "aaaaaa" =~ peg"'aa' !. / ({'a'})+": - assert matches[0] == "a" - else: - assert false - - if match("abcdefg", peg"c {d} ef {g}", matches, 2): - assert matches[0] == "d" - assert matches[1] == "g" - else: - assert false - - for x in findAll("abcdef", peg".", 3): - echo x - - for x in findAll("abcdef", peg"^{.}", 3): - assert x == "d" - - if "f(a, b)" =~ peg"{[0-9]+} / ({\ident} '(' {@} ')')": - assert matches[0] == "f" - assert matches[1] == "a, b" - else: - assert false - - assert match("eine übersicht und außerdem", peg"(\letter \white*)+") - # ß is not a lower cased letter?! - assert match("eine übersicht und auerdem", peg"(\lower \white*)+") - assert match("EINE ÜBERSICHT UND AUSSERDEM", peg"(\upper \white*)+") - assert(not match("456678", peg"(\letter)+")) - - assert("var1 = key; var2 = key2".replacef( - peg"\skip(\s*) {\ident}'='{\ident}", "$1<-$2$2") == - "var1<-keykey;var2<-key2key2") - - assert match("prefix/start", peg"^start$", 7) - diff --git a/lib/pure/poly.nim b/lib/pure/poly.nim deleted file mode 100644 index 58dcdc1ad..000000000 --- a/lib/pure/poly.nim +++ /dev/null @@ -1,368 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2013 Robert Persson -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -import math -import strutils -import numeric - -type - Poly* = object - cofs:seq[float] - -{.deprecated: [TPoly: Poly].} - -proc degree*(p:Poly):int= - ## Returns the degree of the polynomial, - ## that is the number of coefficients-1 - return p.cofs.len-1 - - -proc eval*(p:Poly,x:float):float= - ## Evaluates a polynomial function value for `x` - ## quickly using Horners method - var n=p.degree - result=p.cofs[n] - dec n - while n>=0: - result = result*x+p.cofs[n] - dec n - -proc `[]` *(p:Poly;idx:int):float= - ## Gets a coefficient of the polynomial. - ## p[2] will returns the quadric term, p[3] the cubic etc. - ## Out of bounds index will return 0.0. - if idx<0 or idx>p.degree: - return 0.0 - return p.cofs[idx] - -proc `[]=` *(p:var Poly;idx:int,v:float)= - ## Sets an coefficient of the polynomial by index. - ## p[2] set the quadric term, p[3] the cubic etc. - ## If index is out of range for the coefficients, - ## the polynomial grows to the smallest needed degree. - assert(idx>=0) - - if idx>p.degree: #polynomial must grow - var oldlen=p.cofs.len - p.cofs.setLen(idx+1) - for q in oldlen.. <high(p.cofs): - p.cofs[q]=0.0 #new-grown coefficients set to zero - - p.cofs[idx]=v - - -iterator items*(p:Poly):float= - ## Iterates through the coefficients of the polynomial. - var i=p.degree - while i>=0: - yield p[i] - dec i - -proc clean*(p:var Poly;zerotol=0.0)= - ## Removes leading zero coefficients of the polynomial. - ## An optional tolerance can be given for what's considered zero. - var n=p.degree - var relen=false - - while n>0 and abs(p[n])<=zerotol: # >0 => keep at least one coefficient - dec n - relen=true - - if relen: p.cofs.setLen(n+1) - - -proc `$` *(p:Poly):string = - ## Gets a somewhat reasonable string representation of the polynomial - ## The format should be compatible with most online function plotters, - ## for example directly in google search - result="" - var first=true #might skip + sign if first coefficient - - for idx in countdown(p.degree,0): - let a=p[idx] - - if a==0.0: - continue - - if a>= 0.0 and not first: - result.add('+') - first=false - - if a!=1.0 or idx==0: - result.add(formatFloat(a,ffDefault,0)) - if idx>=2: - result.add("x^" & $idx) - elif idx==1: - result.add("x") - - if result=="": - result="0" - - -proc derivative*(p: Poly): Poly= - ## Returns a new polynomial, which is the derivative of `p` - newSeq[float](result.cofs,p.degree) - for idx in 0..high(result.cofs): - result.cofs[idx]=p.cofs[idx+1]*float(idx+1) - -proc diff*(p:Poly,x:float):float= - ## Evaluates the differentiation of a polynomial with - ## respect to `x` quickly using a modifed Horners method - var n=p.degree - result=p[n]*float(n) - dec n - while n>=1: - result = result*x+p[n]*float(n) - dec n - -proc integral*(p:Poly):Poly= - ## Returns a new polynomial which is the indefinite - ## integral of `p`. The constant term is set to 0.0 - newSeq(result.cofs,p.cofs.len+1) - result.cofs[0]=0.0 #constant arbitrary term, use 0.0 - for i in 1..high(result.cofs): - result.cofs[i]=p.cofs[i-1]/float(i) - - -proc integrate*(p:Poly;xmin,xmax:float):float= - ## Computes the definite integral of `p` between `xmin` and `xmax` - ## quickly using a modified version of Horners method - var - n=p.degree - s1=p[n]/float(n+1) - s2=s1 - fac:float - - dec n - while n>=0: - fac=p[n]/float(n+1) - s1 = s1*xmin+fac - s2 = s2*xmax+fac - dec n - - result=s2*xmax-s1*xmin - -proc initPoly*(cofs:varargs[float]):Poly= - ## Initializes a polynomial with given coefficients. - ## The most significant coefficient is first, so to create x^2-2x+3: - ## intiPoly(1.0,-2.0,3.0) - if len(cofs)<=0: - result.cofs= @[0.0] #need at least one coefficient - else: - # reverse order of coefficients so indexing matches degree of - # coefficient... - result.cofs= @[] - for idx in countdown(cofs.len-1,0): - result.cofs.add(cofs[idx]) - - result.clean #remove leading zero terms - - -proc divMod*(p,d:Poly;q,r:var Poly)= - ## Divides `p` with `d`, and stores the quotinent in `q` and - ## the remainder in `d` - var - pdeg=p.degree - ddeg=d.degree - power=p.degree-d.degree - ratio:float - - r.cofs = p.cofs #initial remainder=numerator - if power<0: #denominator is larger than numerator - q.cofs= @ [0.0] #quotinent is 0.0 - return # keep remainder as numerator - - q.cofs=newSeq[float](power+1) - - for i in countdown(pdeg,ddeg): - ratio=r.cofs[i]/d.cofs[ddeg] - - q.cofs[i-ddeg]=ratio - r.cofs[i]=0.0 - - for j in countup(0,<ddeg): - var idx=i-ddeg+j - r.cofs[idx] = r.cofs[idx] - d.cofs[j]*ratio - - r.clean # drop zero coefficients in remainder - -proc `+` *(p1:Poly,p2:Poly):Poly= - ## Adds two polynomials - var n=max(p1.cofs.len,p2.cofs.len) - newSeq(result.cofs,n) - - for idx in countup(0,n-1): - result[idx]=p1[idx]+p2[idx] - - result.clean # drop zero coefficients in remainder - -proc `*` *(p1:Poly,p2:Poly):Poly= - ## Multiplies the polynomial `p1` with `p2` - var - d1=p1.degree - d2=p2.degree - n=d1+d2 - idx:int - - newSeq(result.cofs,n) - - for i1 in countup(0,d1): - for i2 in countup(0,d2): - idx=i1+i2 - result[idx]=result[idx]+p1[i1]*p2[i2] - - result.clean - -proc `*` *(p:Poly,f:float):Poly= - ## Multiplies the polynomial `p` with a real number - newSeq(result.cofs,p.cofs.len) - for i in 0..high(p.cofs): - result[i]=p.cofs[i]*f - result.clean - -proc `*` *(f:float,p:Poly):Poly= - ## Multiplies a real number with a polynomial - return p*f - -proc `-`*(p:Poly):Poly= - ## Negates a polynomial - result=p - for i in countup(0,<result.cofs.len): - result.cofs[i]= -result.cofs[i] - -proc `-` *(p1:Poly,p2:Poly):Poly= - ## Subtract `p1` with `p2` - var n=max(p1.cofs.len,p2.cofs.len) - newSeq(result.cofs,n) - - for idx in countup(0,n-1): - result[idx]=p1[idx]-p2[idx] - - result.clean # drop zero coefficients in remainder - -proc `/`*(p:Poly,f:float):Poly= - ## Divides polynomial `p` with a real number `f` - newSeq(result.cofs,p.cofs.len) - for i in 0..high(p.cofs): - result[i]=p.cofs[i]/f - result.clean - -proc `/` *(p,q:Poly):Poly= - ## Divides polynomial `p` with polynomial `q` - var dummy:Poly - p.divMod(q,result,dummy) - -proc `mod` *(p,q:Poly):Poly= - ## Computes the polynomial modulo operation, - ## that is the remainder of `p`/`q` - var dummy:Poly - p.divMod(q,dummy,result) - - -proc normalize*(p:var Poly)= - ## Multiplies the polynomial inplace by a term so that - ## the leading term is 1.0. - ## This might lead to an unstable polynomial - ## if the leading term is zero. - p=p/p[p.degree] - - -proc solveQuadric*(a,b,c:float;zerotol=0.0):seq[float]= - ## Solves the quadric equation `ax^2+bx+c`, with a possible - ## tolerance `zerotol` to find roots of curves just 'touching' - ## the x axis. Returns sequence with 0,1 or 2 solutions. - - var p,q,d:float - - p=b/(2.0*a) - - if p==Inf or p==NegInf: #linear equation.. - var linrt= -c/b - if linrt==Inf or linrt==NegInf: #constant only - return @[] - return @[linrt] - - q=c/a - d=p*p-q - - if d<0.0: - #check for inside zerotol range for neg. roots - var err=a*p*p-b*p+c #evaluate error at parabola center axis - if(err<=zerotol): return @[-p] - return @[] - else: - var sr=sqrt(d) - result= @[-sr-p,sr-p] - -proc getRangeForRoots(p:Poly):tuple[xmin,xmax:float]= - ## helper function for `roots` function - ## quickly computes a range, guaranteed to contain - ## all the real roots of the polynomial - # see http://www.mathsisfun.com/algebra/polynomials-bounds-zeros.html - - var deg=p.degree - var d=p[deg] - var bound1,bound2:float - - for i in countup(0,deg): - var c=abs(p.cofs[i]/d) - bound1=max(bound1,c+1.0) - bound2=bound2+c - - bound2=max(1.0,bound2) - result.xmax=min(bound1,bound2) - result.xmin= -result.xmax - - -proc addRoot(p:Poly,res:var seq[float],xp0,xp1,tol,zerotol,mergetol:float,maxiter:int)= - ## helper function for `roots` function - ## try to do a numeric search for a single root in range xp0-xp1, - ## adding it to `res` (allocating `res` if nil) - var br=brent(xp0,xp1, proc(x:float):float=p.eval(x),tol) - if br.success: - if res.len==0 or br.rootx>=res[high(res)]+mergetol: #dont add equal roots. - res.add(br.rootx) - else: - #this might be a 'touching' case, check function value against - #zero tolerance - if abs(br.rooty)<=zerotol: - if res.len==0 or br.rootx>=res[high(res)]+mergetol: #dont add equal roots. - res.add(br.rootx) - - -proc roots*(p:Poly,tol=1.0e-9,zerotol=1.0e-6,mergetol=1.0e-12,maxiter=1000):seq[float]= - ## Computes the real roots of the polynomial `p` - ## `tol` is the tolerance used to break searching for each root when reached. - ## `zerotol` is the tolerance, which is 'close enough' to zero to be considered a root - ## and is used to find roots for curves that only 'touch' the x-axis. - ## `mergetol` is the tolerance, of which two x-values are considered beeing the same root. - ## `maxiter` can be used to limit the number of iterations for each root. - ## Returns a (possibly empty) sorted sequence with the solutions. - var deg=p.degree - if deg<=0: #constant only => no roots - return @[] - elif p.degree==1: #linear - var linrt= -p.cofs[0]/p.cofs[1] - if linrt==Inf or linrt==NegInf: - return @[] #constant only => no roots - return @[linrt] - elif p.degree==2: - return solveQuadric(p.cofs[2],p.cofs[1],p.cofs[0],zerotol) - else: - # degree >=3 , find min/max points of polynomial with recursive - # derivative and do a numerical search for root between each min/max - var range=p.getRangeForRoots() - var minmax=p.derivative.roots(tol,zerotol,mergetol) - result= @[] - if minmax!=nil: #ie. we have minimas/maximas in this function - for x in minmax.items: - addRoot(p,result,range.xmin,x,tol,zerotol,mergetol,maxiter) - range.xmin=x - addRoot(p,result,range.xmin,range.xmax,tol,zerotol,mergetol,maxiter) - diff --git a/lib/pure/prelude.nim b/lib/pure/prelude.nim new file mode 100644 index 000000000..9428f29eb --- /dev/null +++ b/lib/pure/prelude.nim @@ -0,0 +1,28 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2012 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +when defined(nimdoc) and isMainModule: + from std/compileSettings import nil + when compileSettings.querySetting(compileSettings.SingleValueSetting.projectFull) == currentSourcePath: + ## This is an include file that simply imports common modules for your convenience. + runnableExamples: + include std/prelude + # same as: + # import std/[os, strutils, times, parseutils, hashes, tables, sets, sequtils, parseopt, strformat] + let x = 1 + assert "foo $# $#" % [$x, "bar"] == "foo 1 bar" + assert toSeq(1..3) == @[1, 2, 3] + when not defined(js) or defined(nodejs): + assert getCurrentDir().len > 0 + assert ($now()).startsWith "20" + + # xxx `nim doc -b:js -d:nodejs --doccmd:-d:nodejs lib/pure/prelude.nim` fails for some reason + # specific to `nim doc`, but the code otherwise works with nodejs. + +import std/[os, strutils, times, parseutils, hashes, tables, sets, sequtils, parseopt, strformat] diff --git a/lib/pure/random.nim b/lib/pure/random.nim new file mode 100644 index 000000000..3ec77d37e --- /dev/null +++ b/lib/pure/random.nim @@ -0,0 +1,766 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2017 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Nim's standard random number generator (RNG). +## +## Its implementation is based on the `xoroshiro128+` +## (xor/rotate/shift/rotate) library. +## * More information: http://xoroshiro.di.unimi.it +## * C implementation: http://xoroshiro.di.unimi.it/xoroshiro128plus.c +## +## **Do not use this module for cryptographic purposes!** +## +## Basic usage +## =========== +## +runnableExamples: + # Call randomize() once to initialize the default random number generator. + # If this is not called, the same results will occur every time these + # examples are run. + randomize() + + # Pick a number in 0..100. + let num = rand(100) + doAssert num in 0..100 + + # Roll a six-sided die. + let roll = rand(1..6) + doAssert roll in 1..6 + + # Pick a marble from a bag. + let marbles = ["red", "blue", "green", "yellow", "purple"] + let pick = sample(marbles) + doAssert pick in marbles + + # Shuffle some cards. + var cards = ["Ace", "King", "Queen", "Jack", "Ten"] + shuffle(cards) + doAssert cards.len == 5 + +## These examples all use the default RNG. The +## `Rand type <#Rand>`_ represents the state of an RNG. +## For convenience, this module contains a default Rand state that corresponds +## to the default RNG. Most procs in this module which do +## not take in a Rand parameter, including those called in the above examples, +## use the default generator. Those procs are **not** thread-safe. +## +## Note that the default generator always starts in the same state. +## The `randomize proc <#randomize>`_ can be called to initialize the default +## generator with a seed based on the current time, and it only needs to be +## called once before the first usage of procs from this module. If +## `randomize` is not called, the default generator will always produce +## the same results. +## +## RNGs that are independent of the default one can be created with the +## `initRand proc <#initRand,int64>`_. +## +## Again, it is important to remember that this module must **not** be used for +## cryptographic applications. +## +## See also +## ======== +## * `std/sysrand module <sysrand.html>`_ for a cryptographically secure pseudorandom number generator +## * `math module <math.html>`_ for basic math routines +## * `stats module <stats.html>`_ for statistical analysis +## * `list of cryptographic and hashing modules <lib.html#pure-libraries-hashing>`_ +## in the standard library + +import std/[algorithm, math] +import std/private/[since, jsutils] + +when defined(nimPreviewSlimSystem): + import std/[assertions] + +include system/inclrtl +{.push debugger: off.} +template whenHasBigInt64(yes64, no64): untyped = + when defined(js): + when compiles(compileOption("jsbigint64")): + when compileOption("jsbigint64"): + yes64 + else: + no64 + else: + no64 + else: + yes64 + + +whenHasBigInt64: + type Ui = uint64 + + const randMax = 18_446_744_073_709_551_615u64 +do: + type Ui = uint32 + + const randMax = 4_294_967_295u32 + + +type + Rand* = object ## State of a random number generator. + ## + ## Create a new Rand state using the `initRand proc <#initRand,int64>`_. + ## + ## The module contains a default Rand state for convenience. + ## It corresponds to the default RNG's state. + ## The default Rand state always starts with the same values, but the + ## `randomize proc <#randomize>`_ can be used to seed the default generator + ## with a value based on the current time. + ## + ## Many procs have two variations: one that takes in a Rand parameter and + ## another that uses the default generator. The procs that use the default + ## generator are **not** thread-safe! + a0, a1: Ui + +whenHasBigInt64: + const DefaultRandSeed = Rand( + a0: 0x69B4C98CB8530805u64, + a1: 0xFED1DD3004688D67CAu64) + + # racy for multi-threading but good enough for now: + var state = DefaultRandSeed # global for backwards compatibility +do: + var state = Rand( + a0: 0x69B4C98Cu32, + a1: 0xFED1DD30u32) # global for backwards compatibility + +func isValid(r: Rand): bool {.inline.} = + ## Check whether state of `r` is valid. + ## + ## In `xoroshiro128+`, if all bits of `a0` and `a1` are zero, + ## they are always zero after calling `next(r: var Rand)`. + not (r.a0 == 0 and r.a1 == 0) + +since (1, 5): + template randState*(): untyped = + ## Makes the default Rand state accessible from other modules. + ## Useful for module authors. + state + +proc rotl(x, k: Ui): Ui = + result = (x shl k) or (x shr (Ui(64) - k)) + +proc next*(r: var Rand): uint64 = + ## Computes a random `uint64` number using the given state. + ## + ## **See also:** + ## * `rand proc<#rand,Rand,Natural>`_ that returns an integer between zero and + ## a given upper bound + ## * `rand proc<#rand,Rand,range[]>`_ that returns a float + ## * `rand proc<#rand,Rand,HSlice[T: Ordinal or float or float32 or float64,T: Ordinal or float or float32 or float64]>`_ + ## that accepts a slice + ## * `rand proc<#rand,typedesc[T]>`_ that accepts an integer or range type + ## * `skipRandomNumbers proc<#skipRandomNumbers,Rand>`_ + runnableExamples("-r:off"): + var r = initRand(2019) + assert r.next() == 13223559681708962501'u64 # implementation defined + assert r.next() == 7229677234260823147'u64 # ditto + + let s0 = r.a0 + var s1 = r.a1 + result = s0 + s1 + s1 = s1 xor s0 + r.a0 = rotl(s0, 55) xor s1 xor (s1 shl 14) # a, b + r.a1 = rotl(s1, 36) # c + +proc skipRandomNumbers*(s: var Rand) = + ## The jump function for the generator. + ## + ## This proc is equivalent to `2^64` calls to `next <#next,Rand>`_, and it can + ## be used to generate `2^64` non-overlapping subsequences for parallel + ## computations. + ## + ## When multiple threads are generating random numbers, each thread must + ## own the `Rand <#Rand>`_ state it is using so that the thread can safely + ## obtain random numbers. However, if each thread creates its own Rand state, + ## the subsequences of random numbers that each thread generates may overlap, + ## even if the provided seeds are unique. This is more likely to happen as the + ## number of threads and amount of random numbers generated increases. + ## + ## If many threads will generate random numbers concurrently, it is better to + ## create a single Rand state and pass it to each thread. After passing the + ## Rand state to a thread, call this proc before passing it to the next one. + ## By using the Rand state this way, the subsequences of random numbers + ## generated in each thread will never overlap as long as no thread generates + ## more than `2^64` random numbers. + ## + ## **See also:** + ## * `next proc<#next,Rand>`_ + runnableExamples("--threads:on"): + import std/random + + const numbers = 100000 + + var + thr: array[0..3, Thread[(Rand, int)]] + vals: array[0..3, int] + + proc randomSum(params: tuple[r: Rand, index: int]) {.thread.} = + var r = params.r + var s = 0 # avoid cache thrashing + for i in 1..numbers: + s += r.rand(0..10) + vals[params.index] = s + + var r = initRand(2019) + for i in 0..<thr.len: + createThread(thr[i], randomSum, (r, i)) + r.skipRandomNumbers() + + joinThreads(thr) + + for val in vals: + doAssert abs(val - numbers * 5) / numbers < 0.1 + + doAssert vals == [501737, 497901, 500683, 500157] + + + whenHasBigInt64: + const helper = [0xbeac0467eba5facbu64, 0xd86b048b86aa9922u64] + do: + const helper = [0xbeac0467u32, 0xd86b048bu32] + var + s0 = Ui 0 + s1 = Ui 0 + for i in 0..high(helper): + for b in 0 ..< 64: + if (helper[i] and (Ui(1) shl Ui(b))) != 0: + s0 = s0 xor s.a0 + s1 = s1 xor s.a1 + discard next(s) + s.a0 = s0 + s.a1 = s1 + +proc rand[T: uint | uint64](r: var Rand; max: T): T = + # xxx export in future work + if max == 0: return + else: + let max = uint64(max) + when T.high.uint64 == uint64.high: + if max == uint64.high: return T(next(r)) + var iters = 0 + while true: + let x = next(r) + # avoid `mod` bias + if x <= randMax - (randMax mod max) or iters > 20: + return T(x mod (max + 1)) + else: + inc iters + +proc rand*(r: var Rand; max: Natural): int {.benign.} = + ## Returns a random integer in the range `0..max` using the given state. + ## + ## **See also:** + ## * `rand proc<#rand,int>`_ that returns an integer using the default RNG + ## * `rand proc<#rand,Rand,range[]>`_ that returns a float + ## * `rand proc<#rand,Rand,HSlice[T: Ordinal or float or float32 or float64,T: Ordinal or float or float32 or float64]>`_ + ## that accepts a slice + ## * `rand proc<#rand,typedesc[T]>`_ that accepts an integer or range type + runnableExamples: + var r = initRand(123) + if false: + assert r.rand(100) == 96 # implementation defined + # bootstrap: can't use `runnableExamples("-r:off")` + cast[int](rand(r, uint64(max))) + # xxx toUnsigned pending https://github.com/nim-lang/Nim/pull/18445 + +proc rand*(max: int): int {.benign.} = + ## Returns a random integer in the range `0..max`. + ## + ## If `randomize <#randomize>`_ has not been called, the sequence of random + ## numbers returned from this proc will always be the same. + ## + ## This proc uses the default RNG. Thus, it is **not** thread-safe. + ## + ## **See also:** + ## * `rand proc<#rand,Rand,Natural>`_ that returns an integer using a + ## provided state + ## * `rand proc<#rand,float>`_ that returns a float + ## * `rand proc<#rand,HSlice[T: Ordinal or float or float32 or float64,T: Ordinal or float or float32 or float64]>`_ + ## that accepts a slice + ## * `rand proc<#rand,typedesc[T]>`_ that accepts an integer or range type + runnableExamples("-r:off"): + randomize(123) + assert [rand(100), rand(100)] == [96, 63] # implementation defined + + rand(state, max) + +proc rand*(r: var Rand; max: range[0.0 .. high(float)]): float {.benign.} = + ## Returns a random floating point number in the range `0.0..max` + ## using the given state. + ## + ## **See also:** + ## * `rand proc<#rand,float>`_ that returns a float using the default RNG + ## * `rand proc<#rand,Rand,Natural>`_ that returns an integer + ## * `rand proc<#rand,Rand,HSlice[T: Ordinal or float or float32 or float64,T: Ordinal or float or float32 or float64]>`_ + ## that accepts a slice + ## * `rand proc<#rand,typedesc[T]>`_ that accepts an integer or range type + runnableExamples: + var r = initRand(234) + let f = r.rand(1.0) # 8.717181376738381e-07 + + let x = next(r) + when defined(js): + when compiles(compileOption("jsbigint64")): + when compileOption("jsbigint64"): + result = (float(x) / float(high(uint64))) * max + else: + result = (float(x) / float(high(uint32))) * max + else: + result = (float(x) / float(high(uint32))) * max + else: + let u = (0x3FFu64 shl 52u64) or (x shr 12u64) + result = (cast[float](u) - 1.0) * max + +proc rand*(max: float): float {.benign.} = + ## Returns a random floating point number in the range `0.0..max`. + ## + ## If `randomize <#randomize>`_ has not been called, the sequence of random + ## numbers returned from this proc will always be the same. + ## + ## This proc uses the default RNG. Thus, it is **not** thread-safe. + ## + ## **See also:** + ## * `rand proc<#rand,Rand,range[]>`_ that returns a float using a + ## provided state + ## * `rand proc<#rand,int>`_ that returns an integer + ## * `rand proc<#rand,HSlice[T: Ordinal or float or float32 or float64,T: Ordinal or float or float32 or float64]>`_ + ## that accepts a slice + ## * `rand proc<#rand,typedesc[T]>`_ that accepts an integer or range type + runnableExamples: + randomize(234) + let f = rand(1.0) # 8.717181376738381e-07 + + rand(state, max) + +proc rand*[T: Ordinal or SomeFloat](r: var Rand; x: HSlice[T, T]): T = + ## For a slice `a..b`, returns a value in the range `a..b` using the given + ## state. + ## + ## Allowed types for `T` are integers, floats, and enums without holes. + ## + ## **See also:** + ## * `rand proc<#rand,HSlice[T: Ordinal or float or float32 or float64,T: Ordinal or float or float32 or float64]>`_ + ## that accepts a slice and uses the default RNG + ## * `rand proc<#rand,Rand,Natural>`_ that returns an integer + ## * `rand proc<#rand,Rand,range[]>`_ that returns a float + ## * `rand proc<#rand,typedesc[T]>`_ that accepts an integer or range type + runnableExamples: + var r = initRand(345) + assert r.rand(1..5) <= 5 + assert r.rand(-1.1 .. 1.2) >= -1.1 + assert x.a <= x.b + when T is SomeFloat: + result = rand(r, x.b - x.a) + x.a + else: # Integers and Enum types + whenJsNoBigInt64: + result = cast[T](rand(r, cast[uint](x.b) - cast[uint](x.a)) + cast[uint](x.a)) + do: + result = cast[T](rand(r, cast[uint64](x.b) - cast[uint64](x.a)) + cast[uint64](x.a)) + +proc rand*[T: Ordinal or SomeFloat](x: HSlice[T, T]): T = + ## For a slice `a..b`, returns a value in the range `a..b`. + ## + ## Allowed types for `T` are integers, floats, and enums without holes. + ## + ## If `randomize <#randomize>`_ has not been called, the sequence of random + ## numbers returned from this proc will always be the same. + ## + ## This proc uses the default RNG. Thus, it is **not** thread-safe. + ## + ## **See also:** + ## * `rand proc<#rand,Rand,HSlice[T: Ordinal or float or float32 or float64,T: Ordinal or float or float32 or float64]>`_ + ## that accepts a slice and uses a provided state + ## * `rand proc<#rand,int>`_ that returns an integer + ## * `rand proc<#rand,float>`_ that returns a floating point number + ## * `rand proc<#rand,typedesc[T]>`_ that accepts an integer or range type + runnableExamples: + randomize(345) + assert rand(1..6) <= 6 + + result = rand(state, x) + +proc rand*[T: Ordinal](r: var Rand; t: typedesc[T]): T {.since: (1, 7, 1).} = + ## Returns a random Ordinal in the range `low(T)..high(T)`. + ## + ## If `randomize <#randomize>`_ has not been called, the sequence of random + ## numbers returned from this proc will always be the same. + ## + ## **See also:** + ## * `rand proc<#rand,int>`_ that returns an integer + ## * `rand proc<#rand,float>`_ that returns a floating point number + ## * `rand proc<#rand,HSlice[T: Ordinal or float or float32 or float64,T: Ordinal or float or float32 or float64]>`_ + ## that accepts a slice + when T is range or T is enum: + result = rand(r, low(T)..high(T)) + elif T is bool: + result = r.next < randMax div 2 + else: + whenJsNoBigInt64: + result = cast[T](r.next shr (sizeof(uint)*8 - sizeof(T)*8)) + do: + result = cast[T](r.next shr (sizeof(uint64)*8 - sizeof(T)*8)) + +proc rand*[T: Ordinal](t: typedesc[T]): T = + ## Returns a random Ordinal in the range `low(T)..high(T)`. + ## + ## If `randomize <#randomize>`_ has not been called, the sequence of random + ## numbers returned from this proc will always be the same. + ## + ## This proc uses the default RNG. Thus, it is **not** thread-safe. + ## + ## **See also:** + ## * `rand proc<#rand,int>`_ that returns an integer + ## * `rand proc<#rand,float>`_ that returns a floating point number + ## * `rand proc<#rand,HSlice[T: Ordinal or float or float32 or float64,T: Ordinal or float or float32 or float64]>`_ + ## that accepts a slice + runnableExamples: + randomize(567) + type E = enum a, b, c, d + + assert rand(E) in a..d + assert rand(char) in low(char)..high(char) + assert rand(int8) in low(int8)..high(int8) + assert rand(uint32) in low(uint32)..high(uint32) + assert rand(range[1..16]) in 1..16 + + result = rand(state, t) + +proc sample*[T](r: var Rand; s: set[T]): T = + ## Returns a random element from the set `s` using the given state. + ## + ## **See also:** + ## * `sample proc<#sample,set[T]>`_ that uses the default RNG + ## * `sample proc<#sample,Rand,openArray[T]>`_ for `openArray`s + ## * `sample proc<#sample,Rand,openArray[T],openArray[U]>`_ that uses a + ## cumulative distribution function + runnableExamples: + var r = initRand(987) + let s = {1, 3, 5, 7, 9} + assert r.sample(s) in s + + assert card(s) != 0 + var i = rand(r, card(s) - 1) + for e in s: + if i == 0: return e + dec(i) + +proc sample*[T](s: set[T]): T = + ## Returns a random element from the set `s`. + ## + ## If `randomize <#randomize>`_ has not been called, the order of outcomes + ## from this proc will always be the same. + ## + ## This proc uses the default RNG. Thus, it is **not** thread-safe. + ## + ## **See also:** + ## * `sample proc<#sample,Rand,set[T]>`_ that uses a provided state + ## * `sample proc<#sample,openArray[T]>`_ for `openArray`s + ## * `sample proc<#sample,openArray[T],openArray[U]>`_ that uses a + ## cumulative distribution function + runnableExamples: + randomize(987) + let s = {1, 3, 5, 7, 9} + assert sample(s) in s + + sample(state, s) + +proc sample*[T](r: var Rand; a: openArray[T]): T = + ## Returns a random element from `a` using the given state. + ## + ## **See also:** + ## * `sample proc<#sample,openArray[T]>`_ that uses the default RNG + ## * `sample proc<#sample,Rand,openArray[T],openArray[U]>`_ that uses a + ## cumulative distribution function + ## * `sample proc<#sample,Rand,set[T]>`_ for sets + runnableExamples: + let marbles = ["red", "blue", "green", "yellow", "purple"] + var r = initRand(456) + assert r.sample(marbles) in marbles + + result = a[r.rand(a.low..a.high)] + +proc sample*[T](a: openArray[T]): lent T = + ## Returns a random element from `a`. + ## + ## If `randomize <#randomize>`_ has not been called, the order of outcomes + ## from this proc will always be the same. + ## + ## This proc uses the default RNG. Thus, it is **not** thread-safe. + ## + ## **See also:** + ## * `sample proc<#sample,Rand,openArray[T]>`_ that uses a provided state + ## * `sample proc<#sample,openArray[T],openArray[U]>`_ that uses a + ## cumulative distribution function + ## * `sample proc<#sample,set[T]>`_ for sets + runnableExamples: + let marbles = ["red", "blue", "green", "yellow", "purple"] + randomize(456) + assert sample(marbles) in marbles + + result = a[rand(a.low..a.high)] + +proc sample*[T, U](r: var Rand; a: openArray[T]; cdf: openArray[U]): T = + ## Returns an element from `a` using a cumulative distribution function + ## (CDF) and the given state. + ## + ## The `cdf` argument does not have to be normalized, and it could contain + ## any type of elements that can be converted to a `float`. It must be + ## the same length as `a`. Each element in `cdf` should be greater than + ## or equal to the previous element. + ## + ## The outcome of the `cumsum<math.html#cumsum,openArray[T]>`_ proc and the + ## return value of the `cumsummed<math.html#cumsummed,openArray[T]>`_ proc, + ## which are both in the math module, can be used as the `cdf` argument. + ## + ## **See also:** + ## * `sample proc<#sample,openArray[T],openArray[U]>`_ that also utilizes + ## a CDF but uses the default RNG + ## * `sample proc<#sample,Rand,openArray[T]>`_ that does not use a CDF + ## * `sample proc<#sample,Rand,set[T]>`_ for sets + runnableExamples: + from std/math import cumsummed + + let marbles = ["red", "blue", "green", "yellow", "purple"] + let count = [1, 6, 8, 3, 4] + let cdf = count.cumsummed + var r = initRand(789) + assert r.sample(marbles, cdf) in marbles + + assert(cdf.len == a.len) # Two basic sanity checks. + assert(float(cdf[^1]) > 0.0) + # While we could check cdf[i-1] <= cdf[i] for i in 1..cdf.len, that could get + # awfully expensive even in debugging modes. + let u = r.rand(float(cdf[^1])) + a[cdf.upperBound(U(u))] + +proc sample*[T, U](a: openArray[T]; cdf: openArray[U]): T = + ## Returns an element from `a` using a cumulative distribution function + ## (CDF). + ## + ## This proc works similarly to + ## `sample <#sample,Rand,openArray[T],openArray[U]>`_. + ## See that proc's documentation for more details. + ## + ## If `randomize <#randomize>`_ has not been called, the order of outcomes + ## from this proc will always be the same. + ## + ## This proc uses the default RNG. Thus, it is **not** thread-safe. + ## + ## **See also:** + ## * `sample proc<#sample,Rand,openArray[T],openArray[U]>`_ that also utilizes + ## a CDF but uses a provided state + ## * `sample proc<#sample,openArray[T]>`_ that does not use a CDF + ## * `sample proc<#sample,set[T]>`_ for sets + runnableExamples: + from std/math import cumsummed + + let marbles = ["red", "blue", "green", "yellow", "purple"] + let count = [1, 6, 8, 3, 4] + let cdf = count.cumsummed + randomize(789) + assert sample(marbles, cdf) in marbles + + state.sample(a, cdf) + +proc gauss*(r: var Rand; mu = 0.0; sigma = 1.0): float {.since: (1, 3).} = + ## Returns a Gaussian random variate, + ## with mean `mu` and standard deviation `sigma` + ## using the given state. + # Ratio of uniforms method for normal + # https://www2.econ.osaka-u.ac.jp/~tanizaki/class/2013/econome3/13.pdf + const K = sqrt(2 / E) + var + a = 0.0 + b = 0.0 + while true: + a = rand(r, 1.0) + b = (2.0 * rand(r, 1.0) - 1.0) * K + if b * b <= -4.0 * a * a * ln(a): break + result = mu + sigma * (b / a) + +proc gauss*(mu = 0.0, sigma = 1.0): float {.since: (1, 3).} = + ## Returns a Gaussian random variate, + ## with mean `mu` and standard deviation `sigma`. + ## + ## If `randomize <#randomize>`_ has not been called, the order of outcomes + ## from this proc will always be the same. + ## + ## This proc uses the default RNG. Thus, it is **not** thread-safe. + result = gauss(state, mu, sigma) + +proc initRand*(seed: int64): Rand = + ## Initializes a new `Rand <#Rand>`_ state using the given seed. + ## + ## Providing a specific seed will produce the same results for that seed each time. + ## + ## The resulting state is independent of the default RNG's state. When `seed == 0`, + ## we internally set the seed to an implementation defined non-zero value. + ## + ## **See also:** + ## * `initRand proc<#initRand>`_ that uses the current time + ## * `randomize proc<#randomize,int64>`_ that accepts a seed for the default RNG + ## * `randomize proc<#randomize>`_ that initializes the default RNG using the current time + runnableExamples: + from std/times import getTime, toUnix, nanosecond + + var r1 = initRand(123) + let now = getTime() + var r2 = initRand(now.toUnix * 1_000_000_000 + now.nanosecond) + const seedFallback0 = int32.high # arbitrary + let seed = if seed != 0: seed else: seedFallback0 # because 0 is a fixed point + result.a0 = Ui(seed shr 16) + result.a1 = Ui(seed and 0xffff) + when not defined(nimLegacyRandomInitRand): + # calling `discard next(result)` (even a few times) would still produce + # skewed numbers for the 1st call to `rand()`. + skipRandomNumbers(result) + discard next(result) + +proc randomize*(seed: int64) {.benign.} = + ## Initializes the default random number generator with the given seed. + ## + ## Providing a specific seed will produce the same results for that seed each time. + ## + ## **See also:** + ## * `initRand proc<#initRand,int64>`_ that initializes a Rand state + ## with a given seed + ## * `randomize proc<#randomize>`_ that uses the current time instead + ## * `initRand proc<#initRand>`_ that initializes a Rand state using + ## the current time + runnableExamples: + from std/times import getTime, toUnix, nanosecond + + randomize(123) + + let now = getTime() + randomize(now.toUnix * 1_000_000_000 + now.nanosecond) + + state = initRand(seed) + +proc shuffle*[T](r: var Rand; x: var openArray[T]) = + ## Shuffles a sequence of elements in-place using the given state. + ## + ## **See also:** + ## * `shuffle proc<#shuffle,openArray[T]>`_ that uses the default RNG + runnableExamples: + var cards = ["Ace", "King", "Queen", "Jack", "Ten"] + var r = initRand(678) + r.shuffle(cards) + import std/algorithm + assert cards.sorted == @["Ace", "Jack", "King", "Queen", "Ten"] + + for i in countdown(x.high, 1): + let j = r.rand(i) + swap(x[i], x[j]) + +proc shuffle*[T](x: var openArray[T]) = + ## Shuffles a sequence of elements in-place. + ## + ## If `randomize <#randomize>`_ has not been called, the order of outcomes + ## from this proc will always be the same. + ## + ## This proc uses the default RNG. Thus, it is **not** thread-safe. + ## + ## **See also:** + ## * `shuffle proc<#shuffle,Rand,openArray[T]>`_ that uses a provided state + runnableExamples: + var cards = ["Ace", "King", "Queen", "Jack", "Ten"] + randomize(678) + shuffle(cards) + import std/algorithm + assert cards.sorted == @["Ace", "Jack", "King", "Queen", "Ten"] + + shuffle(state, x) + +when not defined(standalone): + when defined(js): + import std/times + else: + when defined(nimscript): + import std/hashes + else: + import std/[hashes, os, sysrand, monotimes] + + when compileOption("threads"): + import std/locks + var baseSeedLock: Lock + baseSeedLock.initLock + + var baseState: Rand + + proc initRand(): Rand = + ## Initializes a new Rand state. + ## + ## The resulting state is independent of the default RNG's state. + ## + ## **Note:** Does not work for the compile-time VM. + ## + ## See also: + ## * `initRand proc<#initRand,int64>`_ that accepts a seed for a new Rand state + ## * `randomize proc<#randomize>`_ that initializes the default RNG using the current time + ## * `randomize proc<#randomize,int64>`_ that accepts a seed for the default RNG + when defined(js): + let time = int64(times.epochTime() * 1000) and 0x7fff_ffff + result = initRand(time) + else: + proc getRandomState(): Rand = + when defined(nimscript): + result = Rand( + a0: CompileTime.hash.Ui, + a1: CompileDate.hash.Ui) + if not result.isValid: + result = DefaultRandSeed + else: + var urand: array[sizeof(Rand), byte] + + for i in 0 .. 7: + if sysrand.urandom(urand): + copyMem(result.addr, urand[0].addr, sizeof(Rand)) + if result.isValid: + break + + if not result.isValid: + # Don't try to get alternative random values from other source like time or process/thread id, + # because such code would be never tested and is a liability for security. + quit("Failed to initializes baseState in random module as sysrand.urandom doesn't work.") + + when compileOption("threads"): + baseSeedLock.withLock: + if not baseState.isValid: + baseState = getRandomState() + result = baseState + baseState.skipRandomNumbers + else: + if not baseState.isValid: + baseState = getRandomState() + result = baseState + baseState.skipRandomNumbers + + since (1, 5, 1): + export initRand + + proc randomize*() {.benign.} = + ## Initializes the default random number generator with a seed based on + ## random number source. + ## + ## This proc only needs to be called once, and it should be called before + ## the first usage of procs from this module that use the default RNG. + ## + ## **Note:** Does not work for the compile-time VM. + ## + ## **See also:** + ## * `randomize proc<#randomize,int64>`_ that accepts a seed + ## * `initRand proc<#initRand>`_ that initializes a Rand state using + ## the current time + ## * `initRand proc<#initRand,int64>`_ that initializes a Rand state + ## with a given seed + state = initRand() + +{.pop.} diff --git a/lib/pure/rationals.nim b/lib/pure/rationals.nim index 04aa8316a..5f806bd70 100644 --- a/lib/pure/rationals.nim +++ b/lib/pure/rationals.nim @@ -8,148 +8,213 @@ # -## This module implements rational numbers, consisting of a numerator `num` and -## a denominator `den`, both of type int. The denominator can not be 0. +## This module implements rational numbers, consisting of a numerator and +## a denominator. The denominator can not be 0. -import math +runnableExamples: + let + r1 = 1 // 2 + r2 = -3 // 4 + + doAssert r1 + r2 == -1 // 4 + doAssert r1 - r2 == 5 // 4 + doAssert r1 * r2 == -3 // 8 + doAssert r1 / r2 == -2 // 3 + +import std/[math, hashes] +when defined(nimPreviewSlimSystem): + import std/assertions type Rational*[T] = object - ## a rational number, consisting of a numerator and denominator + ## A rational number, consisting of a numerator `num` and a denominator `den`. num*, den*: T -proc initRational*[T](num, den: T): Rational[T] = - ## Create a new rational number. +func reduce*[T: SomeInteger](x: var Rational[T]) = + ## Reduces the rational number `x`, so that the numerator and denominator + ## have no common divisors other than 1 (and -1). + ## If `x` is 0, raises `DivByZeroDefect`. + ## + ## **Note:** This is called automatically by the various operations on rationals. + runnableExamples: + var r = Rational[int](num: 2, den: 4) # 1/2 + reduce(r) + doAssert r.num == 1 + doAssert r.den == 2 + if x.den == 0: + raise newException(DivByZeroDefect, "division by zero") + let common = gcd(x.num, x.den) + if x.den > 0: + x.num = x.num div common + x.den = x.den div common + when T isnot SomeUnsignedInt: + if x.den < 0: + x.num = -x.num div common + x.den = -x.den div common + +func initRational*[T: SomeInteger](num, den: T): Rational[T] = + ## Creates a new rational number with numerator `num` and denominator `den`. + ## `den` must not be 0. + ## + ## **Note:** `den != 0` is not checked when assertions are turned off. + assert(den != 0, "a denominator of zero is invalid") result.num = num result.den = den + reduce(result) -proc `//`*[T](num, den: T): Rational[T] = initRational[T](num, den) - ## A friendlier version of `initRational`. Example usage: - ## - ## .. code-block:: nim - ## var x = 1//3 + 1//5 +func `//`*[T](num, den: T): Rational[T] = + ## A friendlier version of `initRational <#initRational,T,T>`_. + runnableExamples: + let x = 1 // 3 + 1 // 5 + doAssert x == 8 // 15 + + initRational[T](num, den) + +func `$`*[T](x: Rational[T]): string = + ## Turns a rational number into a string. + runnableExamples: + doAssert $(1 // 2) == "1/2" -proc `$`*[T](x: Rational[T]): string = - ## Turn a rational number into a string. result = $x.num & "/" & $x.den -proc toRational*[T](x: SomeInteger): Rational[T] = - ## Convert some integer `x` to a rational number. +func toRational*[T: SomeInteger](x: T): Rational[T] = + ## Converts some integer `x` to a rational number. + runnableExamples: + doAssert toRational(42) == 42 // 1 + result.num = x result.den = 1 -proc toFloat*[T](x: Rational[T]): float = - ## Convert a rational number `x` to a float. +func toRational*(x: float, + n: int = high(int) shr (sizeof(int) div 2 * 8)): Rational[int] = + ## Calculates the best rational approximation of `x`, + ## where the denominator is smaller than `n` + ## (default is the largest possible `int` for maximal resolution). + ## + ## The algorithm is based on the theory of continued fractions. + # David Eppstein / UC Irvine / 8 Aug 1993 + # With corrections from Arno Formella, May 2008 + runnableExamples: + let x = 1.2 + doAssert x.toRational.toFloat == x + + var + m11, m22 = 1 + m12, m21 = 0 + ai = int(x) + x = x + while m21 * ai + m22 <= n: + swap m12, m11 + swap m22, m21 + m11 = m12 * ai + m11 + m21 = m22 * ai + m21 + if x == float(ai): break # division by zero + x = 1 / (x - float(ai)) + if x > float(high(int32)): break # representation failure + ai = int(x) + result = m11 // m21 + +func toFloat*[T](x: Rational[T]): float = + ## Converts a rational number `x` to a `float`. x.num / x.den -proc toInt*[T](x: Rational[T]): int = - ## Convert a rational number `x` to an int. Conversion rounds towards 0 if +func toInt*[T](x: Rational[T]): int = + ## Converts a rational number `x` to an `int`. Conversion rounds towards 0 if ## `x` does not contain an integer value. x.num div x.den -proc reduce*[T](x: var Rational[T]) = - ## Reduce rational `x`. - let common = gcd(x.num, x.den) - if x.den > 0: - x.num = x.num div common - x.den = x.den div common - elif x.den < 0: - x.num = -x.num div common - x.den = -x.den div common - else: - raise newException(DivByZeroError, "division by zero") - -proc `+` *[T](x, y: Rational[T]): Rational[T] = - ## Add two rational numbers. +func `+`*[T](x, y: Rational[T]): Rational[T] = + ## Adds two rational numbers. let common = lcm(x.den, y.den) result.num = common div x.den * x.num + common div y.den * y.num result.den = common reduce(result) -proc `+` *[T](x: Rational[T], y: T): Rational[T] = - ## Add rational `x` to int `y`. +func `+`*[T](x: Rational[T], y: T): Rational[T] = + ## Adds the rational `x` to the int `y`. result.num = x.num + y * x.den result.den = x.den -proc `+` *[T](x: T, y: Rational[T]): Rational[T] = - ## Add int `x` to rational `y`. +func `+`*[T](x: T, y: Rational[T]): Rational[T] = + ## Adds the int `x` to the rational `y`. result.num = x * y.den + y.num result.den = y.den -proc `+=` *[T](x: var Rational[T], y: Rational[T]) = - ## Add rational `y` to rational `x`. +func `+=`*[T](x: var Rational[T], y: Rational[T]) = + ## Adds the rational `y` to the rational `x` in-place. let common = lcm(x.den, y.den) x.num = common div x.den * x.num + common div y.den * y.num x.den = common reduce(x) -proc `+=` *[T](x: var Rational[T], y: T) = - ## Add int `y` to rational `x`. +func `+=`*[T](x: var Rational[T], y: T) = + ## Adds the int `y` to the rational `x` in-place. x.num += y * x.den -proc `-` *[T](x: Rational[T]): Rational[T] = +func `-`*[T](x: Rational[T]): Rational[T] = ## Unary minus for rational numbers. result.num = -x.num result.den = x.den -proc `-` *[T](x, y: Rational[T]): Rational[T] = - ## Subtract two rational numbers. +func `-`*[T](x, y: Rational[T]): Rational[T] = + ## Subtracts two rational numbers. let common = lcm(x.den, y.den) result.num = common div x.den * x.num - common div y.den * y.num result.den = common reduce(result) -proc `-` *[T](x: Rational[T], y: T): Rational[T] = - ## Subtract int `y` from rational `x`. +func `-`*[T](x: Rational[T], y: T): Rational[T] = + ## Subtracts the int `y` from the rational `x`. result.num = x.num - y * x.den result.den = x.den -proc `-` *[T](x: T, y: Rational[T]): Rational[T] = - ## Subtract rational `y` from int `x`. - result.num = - x * y.den + y.num +func `-`*[T](x: T, y: Rational[T]): Rational[T] = + ## Subtracts the rational `y` from the int `x`. + result.num = x * y.den - y.num result.den = y.den -proc `-=` *[T](x: var Rational[T], y: Rational[T]) = - ## Subtract rational `y` from rational `x`. +func `-=`*[T](x: var Rational[T], y: Rational[T]) = + ## Subtracts the rational `y` from the rational `x` in-place. let common = lcm(x.den, y.den) x.num = common div x.den * x.num - common div y.den * y.num x.den = common reduce(x) -proc `-=` *[T](x: var Rational[T], y: T) = - ## Subtract int `y` from rational `x`. +func `-=`*[T](x: var Rational[T], y: T) = + ## Subtracts the int `y` from the rational `x` in-place. x.num -= y * x.den -proc `*` *[T](x, y: Rational[T]): Rational[T] = - ## Multiply two rational numbers. +func `*`*[T](x, y: Rational[T]): Rational[T] = + ## Multiplies two rational numbers. result.num = x.num * y.num result.den = x.den * y.den reduce(result) -proc `*` *[T](x: Rational[T], y: T): Rational[T] = - ## Multiply rational `x` with int `y`. +func `*`*[T](x: Rational[T], y: T): Rational[T] = + ## Multiplies the rational `x` with the int `y`. result.num = x.num * y result.den = x.den reduce(result) -proc `*` *[T](x: T, y: Rational[T]): Rational[T] = - ## Multiply int `x` with rational `y`. +func `*`*[T](x: T, y: Rational[T]): Rational[T] = + ## Multiplies the int `x` with the rational `y`. result.num = x * y.num result.den = y.den reduce(result) -proc `*=` *[T](x: var Rational[T], y: Rational[T]) = - ## Multiply rationals `y` to `x`. +func `*=`*[T](x: var Rational[T], y: Rational[T]) = + ## Multiplies the rational `x` by `y` in-place. x.num *= y.num x.den *= y.den reduce(x) -proc `*=` *[T](x: var Rational[T], y: T) = - ## Multiply int `y` to rational `x`. +func `*=`*[T](x: var Rational[T], y: T) = + ## Multiplies the rational `x` by the int `y` in-place. x.num *= y reduce(x) -proc reciprocal*[T](x: Rational[T]): Rational[T] = - ## Calculate the reciprocal of `x`. (1/x) +func reciprocal*[T](x: Rational[T]): Rational[T] = + ## Calculates the reciprocal of `x` (`1/x`). + ## If `x` is 0, raises `DivByZeroDefect`. if x.num > 0: result.num = x.den result.den = x.num @@ -157,119 +222,119 @@ proc reciprocal*[T](x: Rational[T]): Rational[T] = result.num = -x.den result.den = -x.num else: - raise newException(DivByZeroError, "division by zero") + raise newException(DivByZeroDefect, "division by zero") -proc `/`*[T](x, y: Rational[T]): Rational[T] = - ## Divide rationals `x` by `y`. +func `/`*[T](x, y: Rational[T]): Rational[T] = + ## Divides the rational `x` by the rational `y`. result.num = x.num * y.den result.den = x.den * y.num reduce(result) -proc `/`*[T](x: Rational[T], y: T): Rational[T] = - ## Divide rational `x` by int `y`. +func `/`*[T](x: Rational[T], y: T): Rational[T] = + ## Divides the rational `x` by the int `y`. result.num = x.num result.den = x.den * y reduce(result) -proc `/`*[T](x: T, y: Rational[T]): Rational[T] = - ## Divide int `x` by Rational `y`. +func `/`*[T](x: T, y: Rational[T]): Rational[T] = + ## Divides the int `x` by the rational `y`. result.num = x * y.den result.den = y.num reduce(result) -proc `/=`*[T](x: var Rational[T], y: Rational[T]) = - ## Divide rationals `x` by `y` in place. +func `/=`*[T](x: var Rational[T], y: Rational[T]) = + ## Divides the rational `x` by the rational `y` in-place. x.num *= y.den x.den *= y.num reduce(x) -proc `/=`*[T](x: var Rational[T], y: T) = - ## Divide rational `x` by int `y` in place. +func `/=`*[T](x: var Rational[T], y: T) = + ## Divides the rational `x` by the int `y` in-place. x.den *= y reduce(x) -proc cmp*(x, y: Rational): int = - ## Compares two rationals. +func cmp*(x, y: Rational): int = + ## Compares two rationals. Returns + ## * a value less than zero, if `x < y` + ## * a value greater than zero, if `x > y` + ## * zero, if `x == y` (x - y).num -proc `<` *(x, y: Rational): bool = +func `<`*(x, y: Rational): bool = + ## Returns true if `x` is less than `y`. (x - y).num < 0 -proc `<=` *(x, y: Rational): bool = +func `<=`*(x, y: Rational): bool = + ## Returns tue if `x` is less than or equal to `y`. (x - y).num <= 0 -proc `==` *(x, y: Rational): bool = +func `==`*(x, y: Rational): bool = + ## Compares two rationals for equality. (x - y).num == 0 -proc abs*[T](x: Rational[T]): Rational[T] = +func abs*[T](x: Rational[T]): Rational[T] = + ## Returns the absolute value of `x`. + runnableExamples: + doAssert abs(1 // 2) == 1 // 2 + doAssert abs(-1 // 2) == 1 // 2 + result.num = abs x.num result.den = abs x.den -when isMainModule: - var - z = Rational[int](num: 0, den: 1) - o = initRational(num=1, den=1) - a = initRational(1, 2) - b = -1 // -2 - m1 = -1 // 1 - tt = 10 // 2 - - assert( a == a ) - assert( (a-a) == z ) - assert( (a+b) == o ) - assert( (a/b) == o ) - assert( (a*b) == 1 // 4 ) - assert( (3/a) == 6 // 1 ) - assert( (a/3) == 1 // 6 ) - assert( a*b == 1 // 4 ) - assert( tt*z == z ) - assert( 10*a == tt ) - assert( a*10 == tt ) - assert( tt/10 == a ) - assert( a-m1 == 3 // 2 ) - assert( a+m1 == -1 // 2 ) - assert( m1+tt == 16 // 4 ) - assert( m1-tt == 6 // -1 ) - - assert( z < o ) - assert( z <= o ) - assert( z == z ) - assert( cmp(z, o) < 0 ) - assert( cmp(o, z) > 0 ) - - assert( o == o ) - assert( o >= o ) - assert( not(o > o) ) - assert( cmp(o, o) == 0 ) - assert( cmp(z, z) == 0 ) - - assert( a == b ) - assert( a >= b ) - assert( not(b > a) ) - assert( cmp(a, b) == 0 ) - - var x = 1//3 - - x *= 5//1 - assert( x == 5//3 ) - x += 2 // 9 - assert( x == 17//9 ) - x -= 9//18 - assert( x == 25//18 ) - x /= 1//2 - assert( x == 50//18 ) - - var y = 1//3 - - y *= 4 - assert( y == 4//3 ) - y += 5 - assert( y == 19//3 ) - y -= 2 - assert( y == 13//3 ) - y /= 9 - assert( y == 13//27 ) - - assert toRational[int, int](5) == 5//1 - assert abs(toFloat(y) - 0.4814814814814815) < 1.0e-7 - assert toInt(z) == 0 +func `div`*[T: SomeInteger](x, y: Rational[T]): T = + ## Computes the rational truncated division. + (x.num * y.den) div (y.num * x.den) + +func `mod`*[T: SomeInteger](x, y: Rational[T]): Rational[T] = + ## Computes the rational modulo by truncated division (remainder). + ## This is same as `x - (x div y) * y`. + result = ((x.num * y.den) mod (y.num * x.den)) // (x.den * y.den) + reduce(result) + +func floorDiv*[T: SomeInteger](x, y: Rational[T]): T = + ## Computes the rational floor division. + ## + ## Floor division is conceptually defined as `floor(x / y)`. + ## This is different from the `div` operator, which is defined + ## as `trunc(x / y)`. That is, `div` rounds towards 0 and `floorDiv` + ## rounds down. + floorDiv(x.num * y.den, y.num * x.den) + +func floorMod*[T: SomeInteger](x, y: Rational[T]): Rational[T] = + ## Computes the rational modulo by floor division (modulo). + ## + ## This is same as `x - floorDiv(x, y) * y`. + ## This func behaves the same as the `%` operator in Python. + result = floorMod(x.num * y.den, y.num * x.den) // (x.den * y.den) + reduce(result) + +func hash*[T](x: Rational[T]): Hash = + ## Computes the hash for the rational `x`. + # reduce first so that hash(x) == hash(y) for x == y + var copy = x + reduce(copy) + + var h: Hash = 0 + h = h !& hash(copy.num) + h = h !& hash(copy.den) + result = !$h + +func `^`*[T: SomeInteger](x: Rational[T], y: T): Rational[T] = + ## Computes `x` to the power of `y`. + ## + ## The exponent `y` must be an integer. Negative exponents are supported + ## but floating point exponents are not. + runnableExamples: + doAssert (-3 // 5) ^ 0 == (1 // 1) + doAssert (-3 // 5) ^ 1 == (-3 // 5) + doAssert (-3 // 5) ^ 2 == (9 // 25) + doAssert (-3 // 5) ^ -2 == (25 // 9) + + if y >= 0: + result.num = x.num ^ y + result.den = x.den ^ y + else: + result.num = x.den ^ -y + result.den = x.num ^ -y + # Note that all powers of reduced rationals are already reduced, + # so we don't need to call reduce() here diff --git a/lib/pure/rawsockets.nim b/lib/pure/rawsockets.nim deleted file mode 100644 index ac348eb1b..000000000 --- a/lib/pure/rawsockets.nim +++ /dev/null @@ -1,437 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2015 Dominik Picheta -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module implements a low-level cross-platform sockets interface. Look -## at the ``net`` module for the higher-level version. - -# TODO: Clean up the exports a bit and everything else in general. - -import unsigned, os - -when hostOS == "solaris": - {.passl: "-lsocket -lnsl".} - -const useWinVersion = defined(Windows) or defined(nimdoc) - -when useWinVersion: - import winlean - export WSAEWOULDBLOCK, WSAECONNRESET, WSAECONNABORTED, WSAENETRESET, - WSAEDISCON, ERROR_NETNAME_DELETED -else: - import posix - export fcntl, F_GETFL, O_NONBLOCK, F_SETFL, EAGAIN, EWOULDBLOCK, MSG_NOSIGNAL, - EINTR, EINPROGRESS, ECONNRESET, EPIPE, ENETRESET - -export SocketHandle, Sockaddr_in, Addrinfo, INADDR_ANY, SockAddr, SockLen, - inet_ntoa, recv, `==`, connect, send, accept, recvfrom, sendto - -export - SO_ERROR, - SOL_SOCKET, - SOMAXCONN, - SO_ACCEPTCONN, SO_BROADCAST, SO_DEBUG, SO_DONTROUTE, - SO_KEEPALIVE, SO_OOBINLINE, SO_REUSEADDR, - MSG_PEEK - -type - Port* = distinct uint16 ## port type - - Domain* = enum ## domain, which specifies the protocol family of the - ## created socket. Other domains than those that are listed - ## here are unsupported. - AF_UNIX, ## for local socket (using a file). Unsupported on Windows. - AF_INET = 2, ## for network protocol IPv4 or - AF_INET6 = 23 ## for network protocol IPv6. - - SockType* = enum ## second argument to `socket` proc - SOCK_STREAM = 1, ## reliable stream-oriented service or Stream Sockets - SOCK_DGRAM = 2, ## datagram service or Datagram Sockets - SOCK_RAW = 3, ## raw protocols atop the network layer. - SOCK_SEQPACKET = 5 ## reliable sequenced packet service - - Protocol* = enum ## third argument to `socket` proc - IPPROTO_TCP = 6, ## Transmission control protocol. - IPPROTO_UDP = 17, ## User datagram protocol. - IPPROTO_IP, ## Internet protocol. Unsupported on Windows. - IPPROTO_IPV6, ## Internet Protocol Version 6. Unsupported on Windows. - IPPROTO_RAW, ## Raw IP Packets Protocol. Unsupported on Windows. - IPPROTO_ICMP ## Control message protocol. Unsupported on Windows. - - Servent* = object ## information about a service - name*: string - aliases*: seq[string] - port*: Port - proto*: string - - Hostent* = object ## information about a given host - name*: string - aliases*: seq[string] - addrtype*: Domain - length*: int - addrList*: seq[string] - -{.deprecated: [TPort: Port, TDomain: Domain, TType: SockType, - TProtocol: Protocol, TServent: Servent, THostent: Hostent].} - -when useWinVersion: - let - osInvalidSocket* = winlean.INVALID_SOCKET - - const - IOCPARM_MASK* = 127 - IOC_IN* = int(-2147483648) - FIONBIO* = IOC_IN.int32 or ((sizeof(int32) and IOCPARM_MASK) shl 16) or - (102 shl 8) or 126 - - proc ioctlsocket*(s: SocketHandle, cmd: clong, - argptr: ptr clong): cint {. - stdcall, importc: "ioctlsocket", dynlib: "ws2_32.dll".} -else: - let - osInvalidSocket* = posix.INVALID_SOCKET - -proc `==`*(a, b: Port): bool {.borrow.} - ## ``==`` for ports. - -proc `$`*(p: Port): string {.borrow.} - ## returns the port number as a string - -proc toInt*(domain: Domain): cint - ## Converts the TDomain enum to a platform-dependent ``cint``. - -proc toInt*(typ: SockType): cint - ## Converts the TType enum to a platform-dependent ``cint``. - -proc toInt*(p: Protocol): cint - ## Converts the TProtocol enum to a platform-dependent ``cint``. - -when not useWinVersion: - proc toInt(domain: Domain): cint = - case domain - of AF_UNIX: result = posix.AF_UNIX - of AF_INET: result = posix.AF_INET - of AF_INET6: result = posix.AF_INET6 - else: discard - - proc toInt(typ: SockType): cint = - case typ - of SOCK_STREAM: result = posix.SOCK_STREAM - of SOCK_DGRAM: result = posix.SOCK_DGRAM - of SOCK_SEQPACKET: result = posix.SOCK_SEQPACKET - of SOCK_RAW: result = posix.SOCK_RAW - else: discard - - proc toInt(p: Protocol): cint = - case p - of IPPROTO_TCP: result = posix.IPPROTO_TCP - of IPPROTO_UDP: result = posix.IPPROTO_UDP - of IPPROTO_IP: result = posix.IPPROTO_IP - of IPPROTO_IPV6: result = posix.IPPROTO_IPV6 - of IPPROTO_RAW: result = posix.IPPROTO_RAW - of IPPROTO_ICMP: result = posix.IPPROTO_ICMP - else: discard - -else: - proc toInt(domain: Domain): cint = - result = toU16(ord(domain)) - - proc toInt(typ: SockType): cint = - result = cint(ord(typ)) - - proc toInt(p: Protocol): cint = - result = cint(ord(p)) - - -proc newRawSocket*(domain: Domain = AF_INET, typ: SockType = SOCK_STREAM, - protocol: Protocol = IPPROTO_TCP): SocketHandle = - ## Creates a new socket; returns `InvalidSocket` if an error occurs. - socket(toInt(domain), toInt(typ), toInt(protocol)) - -proc newRawSocket*(domain: cint, typ: cint, protocol: cint): SocketHandle = - ## Creates a new socket; returns `InvalidSocket` if an error occurs. - ## - ## Use this overload if one of the enums specified above does - ## not contain what you need. - socket(domain, typ, protocol) - -proc close*(socket: SocketHandle) = - ## closes a socket. - when useWinVersion: - discard winlean.closesocket(socket) - else: - discard posix.close(socket) - # TODO: These values should not be discarded. An EOS should be raised. - # http://stackoverflow.com/questions/12463473/what-happens-if-you-call-close-on-a-bsd-socket-multiple-times - -proc bindAddr*(socket: SocketHandle, name: ptr SockAddr, namelen: SockLen): cint = - result = bindSocket(socket, name, namelen) - -proc listen*(socket: SocketHandle, backlog = SOMAXCONN): cint {.tags: [ReadIOEffect].} = - ## Marks ``socket`` as accepting connections. - ## ``Backlog`` specifies the maximum length of the - ## queue of pending connections. - when useWinVersion: - result = winlean.listen(socket, cint(backlog)) - else: - result = posix.listen(socket, cint(backlog)) - -proc getAddrInfo*(address: string, port: Port, af: Domain = AF_INET, typ: SockType = SOCK_STREAM, - prot: Protocol = IPPROTO_TCP): ptr AddrInfo = - ## - ## - ## **Warning**: The resulting ``ptr TAddrInfo`` must be freed using ``dealloc``! - var hints: AddrInfo - result = nil - hints.ai_family = toInt(af) - hints.ai_socktype = toInt(typ) - hints.ai_protocol = toInt(prot) - var gaiResult = getaddrinfo(address, $port, addr(hints), result) - if gaiResult != 0'i32: - when useWinVersion: - raiseOSError(osLastError()) - else: - raise newException(OSError, $gai_strerror(gaiResult)) - -proc dealloc*(ai: ptr AddrInfo) = - freeaddrinfo(ai) - -proc ntohl*(x: int32): int32 = - ## Converts 32-bit integers from network to host byte order. - ## On machines where the host byte order is the same as network byte order, - ## this is a no-op; otherwise, it performs a 4-byte swap operation. - when cpuEndian == bigEndian: result = x - else: result = (x shr 24'i32) or - (x shr 8'i32 and 0xff00'i32) or - (x shl 8'i32 and 0xff0000'i32) or - (x shl 24'i32) - -proc ntohs*(x: int16): int16 = - ## Converts 16-bit integers from network to host byte order. On machines - ## where the host byte order is the same as network byte order, this is - ## a no-op; otherwise, it performs a 2-byte swap operation. - when cpuEndian == bigEndian: result = x - else: result = (x shr 8'i16) or (x shl 8'i16) - -proc htonl*(x: int32): int32 = - ## Converts 32-bit integers from host to network byte order. On machines - ## where the host byte order is the same as network byte order, this is - ## a no-op; otherwise, it performs a 4-byte swap operation. - result = rawsockets.ntohl(x) - -proc htons*(x: int16): int16 = - ## Converts 16-bit positive integers from host to network byte order. - ## On machines where the host byte order is the same as network byte - ## order, this is a no-op; otherwise, it performs a 2-byte swap operation. - result = rawsockets.ntohs(x) - -proc getServByName*(name, proto: string): Servent {.tags: [ReadIOEffect].} = - ## Searches the database from the beginning and finds the first entry for - ## which the service name specified by ``name`` matches the s_name member - ## and the protocol name specified by ``proto`` matches the s_proto member. - ## - ## On posix this will search through the ``/etc/services`` file. - when useWinVersion: - var s = winlean.getservbyname(name, proto) - else: - var s = posix.getservbyname(name, proto) - if s == nil: raise newException(OSError, "Service not found.") - result.name = $s.s_name - result.aliases = cstringArrayToSeq(s.s_aliases) - result.port = Port(s.s_port) - result.proto = $s.s_proto - -proc getServByPort*(port: Port, proto: string): Servent {.tags: [ReadIOEffect].} = - ## Searches the database from the beginning and finds the first entry for - ## which the port specified by ``port`` matches the s_port member and the - ## protocol name specified by ``proto`` matches the s_proto member. - ## - ## On posix this will search through the ``/etc/services`` file. - when useWinVersion: - var s = winlean.getservbyport(ze(int16(port)).cint, proto) - else: - var s = posix.getservbyport(ze(int16(port)).cint, proto) - if s == nil: raise newException(OSError, "Service not found.") - result.name = $s.s_name - result.aliases = cstringArrayToSeq(s.s_aliases) - result.port = Port(s.s_port) - result.proto = $s.s_proto - -proc getHostByAddr*(ip: string): Hostent {.tags: [ReadIOEffect].} = - ## This function will lookup the hostname of an IP Address. - var myaddr: InAddr - myaddr.s_addr = inet_addr(ip) - - when useWinVersion: - var s = winlean.gethostbyaddr(addr(myaddr), sizeof(myaddr).cuint, - cint(rawsockets.AF_INET)) - if s == nil: raiseOSError(osLastError()) - else: - var s = posix.gethostbyaddr(addr(myaddr), sizeof(myaddr).Socklen, - cint(posix.AF_INET)) - if s == nil: - raise newException(OSError, $hstrerror(h_errno)) - - result.name = $s.h_name - result.aliases = cstringArrayToSeq(s.h_aliases) - when useWinVersion: - result.addrtype = Domain(s.h_addrtype) - else: - if s.h_addrtype == posix.AF_INET: - result.addrtype = AF_INET - elif s.h_addrtype == posix.AF_INET6: - result.addrtype = AF_INET6 - else: - raise newException(OSError, "unknown h_addrtype") - result.addrList = cstringArrayToSeq(s.h_addr_list) - result.length = int(s.h_length) - -proc getHostByName*(name: string): Hostent {.tags: [ReadIOEffect].} = - ## This function will lookup the IP address of a hostname. - when useWinVersion: - var s = winlean.gethostbyname(name) - else: - var s = posix.gethostbyname(name) - if s == nil: raiseOSError(osLastError()) - result.name = $s.h_name - result.aliases = cstringArrayToSeq(s.h_aliases) - when useWinVersion: - result.addrtype = Domain(s.h_addrtype) - else: - if s.h_addrtype == posix.AF_INET: - result.addrtype = AF_INET - elif s.h_addrtype == posix.AF_INET6: - result.addrtype = AF_INET6 - else: - raise newException(OSError, "unknown h_addrtype") - result.addrList = cstringArrayToSeq(s.h_addr_list) - result.length = int(s.h_length) - -proc getSockName*(socket: SocketHandle): Port = - ## returns the socket's associated port number. - var name: Sockaddr_in - when useWinVersion: - name.sin_family = int16(ord(AF_INET)) - else: - name.sin_family = posix.AF_INET - #name.sin_port = htons(cint16(port)) - #name.sin_addr.s_addr = htonl(INADDR_ANY) - var namelen = sizeof(name).SockLen - if getsockname(socket, cast[ptr SockAddr](addr(name)), - addr(namelen)) == -1'i32: - raiseOSError(osLastError()) - result = Port(rawsockets.ntohs(name.sin_port)) - -proc getSockOptInt*(socket: SocketHandle, level, optname: int): int {. - tags: [ReadIOEffect].} = - ## getsockopt for integer options. - var res: cint - var size = sizeof(res).SockLen - if getsockopt(socket, cint(level), cint(optname), - addr(res), addr(size)) < 0'i32: - raiseOSError(osLastError()) - result = int(res) - -proc setSockOptInt*(socket: SocketHandle, level, optname, optval: int) {. - tags: [WriteIOEffect].} = - ## setsockopt for integer options. - var value = cint(optval) - if setsockopt(socket, cint(level), cint(optname), addr(value), - sizeof(value).SockLen) < 0'i32: - raiseOSError(osLastError()) - -proc setBlocking*(s: SocketHandle, blocking: bool) = - ## Sets blocking mode on socket. - ## - ## Raises EOS on error. - when useWinVersion: - var mode = clong(ord(not blocking)) # 1 for non-blocking, 0 for blocking - if ioctlsocket(s, FIONBIO, addr(mode)) == -1: - raiseOSError(osLastError()) - else: # BSD sockets - var x: int = fcntl(s, F_GETFL, 0) - if x == -1: - raiseOSError(osLastError()) - else: - var mode = if blocking: x and not O_NONBLOCK else: x or O_NONBLOCK - if fcntl(s, F_SETFL, mode) == -1: - raiseOSError(osLastError()) - -proc timeValFromMilliseconds(timeout = 500): Timeval = - if timeout != -1: - var seconds = timeout div 1000 - result.tv_sec = seconds.int32 - result.tv_usec = ((timeout - seconds * 1000) * 1000).int32 - -proc createFdSet(fd: var TFdSet, s: seq[SocketHandle], m: var int) = - FD_ZERO(fd) - for i in items(s): - m = max(m, int(i)) - FD_SET(i, fd) - -proc pruneSocketSet(s: var seq[SocketHandle], fd: var TFdSet) = - var i = 0 - var L = s.len - while i < L: - if FD_ISSET(s[i], fd) == 0'i32: - s[i] = s[L-1] - dec(L) - else: - inc(i) - setLen(s, L) - -proc select*(readfds: var seq[SocketHandle], timeout = 500): int = - ## Traditional select function. This function will return the number of - ## sockets that are ready to be read from, written to, or which have errors. - ## If there are none; 0 is returned. - ## ``Timeout`` is in miliseconds and -1 can be specified for no timeout. - ## - ## A socket is removed from the specific ``seq`` when it has data waiting to - ## be read/written to or has errors (``exceptfds``). - var tv {.noInit.}: Timeval = timeValFromMilliseconds(timeout) - - var rd: TFdSet - var m = 0 - createFdSet((rd), readfds, m) - - if timeout != -1: - result = int(select(cint(m+1), addr(rd), nil, nil, addr(tv))) - else: - result = int(select(cint(m+1), addr(rd), nil, nil, nil)) - - pruneSocketSet(readfds, (rd)) - -proc selectWrite*(writefds: var seq[SocketHandle], - timeout = 500): int {.tags: [ReadIOEffect].} = - ## When a socket in ``writefds`` is ready to be written to then a non-zero - ## value will be returned specifying the count of the sockets which can be - ## written to. The sockets which can be written to will also be removed - ## from ``writefds``. - ## - ## ``timeout`` is specified in miliseconds and ``-1`` can be specified for - ## an unlimited time. - var tv {.noInit.}: Timeval = timeValFromMilliseconds(timeout) - - var wr: TFdSet - var m = 0 - createFdSet((wr), writefds, m) - - if timeout != -1: - result = int(select(cint(m+1), nil, addr(wr), nil, addr(tv))) - else: - result = int(select(cint(m+1), nil, addr(wr), nil, nil)) - - pruneSocketSet(writefds, (wr)) - -# We ignore signal SIGPIPE on Darwin -when defined(macosx): - signal(SIGPIPE, SIG_IGN) - -when defined(Windows): - var wsa: WSAData - if wsaStartup(0x0101'i16, addr wsa) != 0: raiseOSError(osLastError()) diff --git a/lib/pure/redis.nim b/lib/pure/redis.nim deleted file mode 100644 index 9177ddee5..000000000 --- a/lib/pure/redis.nim +++ /dev/null @@ -1,1096 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2012 Dominik Picheta -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module implements a redis client. It allows you to connect to a -## redis-server instance, send commands and receive replies. -## -## **Beware**: Most (if not all) functions that return a ``TRedisString`` may -## return ``redisNil``, and functions which return a ``TRedisList`` -## may return ``nil``. - -import sockets, os, strutils, parseutils - -const - redisNil* = "\0\0" - -type - Pipeline = ref object - enabled: bool - buffer: string - expected: int ## number of replies expected if pipelined - -type - SendMode = enum - normal, pipelined, multiple - -type - Redis* = object - socket: Socket - connected: bool - pipeline: Pipeline - - RedisStatus* = string - RedisInteger* = BiggestInt - RedisString* = string ## Bulk reply - RedisList* = seq[RedisString] ## Multi-bulk reply - - ReplyError* = object of IOError ## Invalid reply from redis - RedisError* = object of IOError ## Error in redis - -{.deprecated: [TSendMode: SendMode, TRedis: Redis, TRedisStatus: RedisStatus, - TRedisInteger: RedisInteger, TRedisString: RedisString, - TRedisList: RedisList, EInvalidReply: ReplyError, ERedis: RedisError].} - -proc newPipeline(): Pipeline = - new(result) - result.buffer = "" - result.enabled = false - result.expected = 0 - -proc open*(host = "localhost", port = 6379.Port): Redis = - ## Opens a connection to the redis server. - result.socket = socket(buffered = false) - if result.socket == invalidSocket: - raiseOSError(osLastError()) - result.socket.connect(host, port) - result.pipeline = newPipeline() - -proc raiseInvalidReply(expected, got: char) = - raise newException(ReplyError, - "Expected '$1' at the beginning of a status reply got '$2'" % - [$expected, $got]) - -proc raiseNoOK(status: string, pipelineEnabled: bool) = - if pipelineEnabled and not (status == "QUEUED" or status == "PIPELINED"): - raise newException(ReplyError, "Expected \"QUEUED\" or \"PIPELINED\" got \"$1\"" % status) - elif not pipelineEnabled and status != "OK": - raise newException(ReplyError, "Expected \"OK\" got \"$1\"" % status) - -template readSocket(r: Redis, dummyVal:expr): stmt = - var line {.inject.}: TaintedString = "" - if r.pipeline.enabled: - return dummyVal - else: - readLine(r.socket, line) - -proc parseStatus(r: Redis, line: string = ""): RedisStatus = - if r.pipeline.enabled: - return "PIPELINED" - - if line == "": - raise newException(RedisError, "Server closed connection prematurely") - - if line[0] == '-': - raise newException(RedisError, strip(line)) - if line[0] != '+': - raiseInvalidReply('+', line[0]) - - return line.substr(1) # Strip '+' - -proc readStatus(r:Redis): RedisStatus = - r.readSocket("PIPELINED") - return r.parseStatus(line) - -proc parseInteger(r: Redis, line: string = ""): RedisInteger = - if r.pipeline.enabled: return -1 - - #if line == "+QUEUED": # inside of multi - # return -1 - - if line == "": - raise newException(RedisError, "Server closed connection prematurely") - - if line[0] == '-': - raise newException(RedisError, strip(line)) - if line[0] != ':': - raiseInvalidReply(':', line[0]) - - # Strip ':' - if parseBiggestInt(line, result, 1) == 0: - raise newException(ReplyError, "Unable to parse integer.") - -proc readInteger(r: Redis): RedisInteger = - r.readSocket(-1) - return r.parseInteger(line) - -proc recv(sock: Socket, size: int): TaintedString = - result = newString(size).TaintedString - if sock.recv(cstring(result), size) != size: - raise newException(ReplyError, "recv failed") - -proc parseSingleString(r: Redis, line:string, allowMBNil = false): RedisString = - if r.pipeline.enabled: return "" - - # Error. - if line[0] == '-': - raise newException(RedisError, strip(line)) - - # Some commands return a /bulk/ value or a /multi-bulk/ nil. Odd. - if allowMBNil: - if line == "*-1": - return redisNil - - if line[0] != '$': - raiseInvalidReply('$', line[0]) - - var numBytes = parseInt(line.substr(1)) - if numBytes == -1: - return redisNil - - var s = r.socket.recv(numBytes+2) - result = strip(s.string) - -proc readSingleString(r: Redis): RedisString = - r.readSocket("") - return r.parseSingleString(line) - -proc readNext(r: Redis): RedisList - -proc parseArrayLines(r: Redis, countLine:string): RedisList = - if countLine.string[0] != '*': - raiseInvalidReply('*', countLine.string[0]) - - var numElems = parseInt(countLine.string.substr(1)) - if numElems == -1: return nil - result = @[] - - for i in 1..numElems: - var parsed = r.readNext() - if not isNil(parsed): - for item in parsed: - result.add(item) - -proc readArrayLines(r: Redis): RedisList = - r.readSocket(nil) - return r.parseArrayLines(line) - -proc parseBulkString(r: Redis, allowMBNil = false, line:string = ""): RedisString = - if r.pipeline.enabled: return "" - - return r.parseSingleString(line, allowMBNil) - -proc readBulkString(r: Redis, allowMBNil = false): RedisString = - r.readSocket("") - return r.parseBulkString(allowMBNil, line) - -proc readArray(r: Redis): RedisList = - r.readSocket(@[]) - return r.parseArrayLines(line) - -proc readNext(r: Redis): RedisList = - r.readSocket(@[]) - - var res = case line[0] - of '+', '-': @[r.parseStatus(line)] - of ':': @[$(r.parseInteger(line))] - of '$': @[r.parseBulkString(true,line)] - of '*': r.parseArrayLines(line) - else: - raise newException(ReplyError, "readNext failed on line: " & line) - nil - r.pipeline.expected -= 1 - return res - -proc flushPipeline*(r: Redis, wasMulti = false): RedisList = - ## Send buffered commands, clear buffer, return results - if r.pipeline.buffer.len > 0: - r.socket.send(r.pipeline.buffer) - r.pipeline.buffer = "" - - r.pipeline.enabled = false - result = @[] - - var tot = r.pipeline.expected - - for i in 0..tot-1: - var ret = r.readNext() - for item in ret: - if not (item.contains("OK") or item.contains("QUEUED")): - result.add(item) - - r.pipeline.expected = 0 - -proc startPipelining*(r: Redis) = - ## Enable command pipelining (reduces network roundtrips). - ## Note that when enabled, you must call flushPipeline to actually send commands, except - ## for multi/exec() which enable and flush the pipeline automatically. - ## Commands return immediately with dummy values; actual results returned from - ## flushPipeline() or exec() - r.pipeline.expected = 0 - r.pipeline.enabled = true - -proc sendCommand(r: Redis, cmd: string, args: varargs[string]) = - var request = "*" & $(1 + args.len()) & "\c\L" - request.add("$" & $cmd.len() & "\c\L") - request.add(cmd & "\c\L") - for i in items(args): - request.add("$" & $i.len() & "\c\L") - request.add(i & "\c\L") - - if r.pipeline.enabled: - r.pipeline.buffer.add(request) - r.pipeline.expected += 1 - else: - r.socket.send(request) - -proc sendCommand(r: Redis, cmd: string, arg1: string, - args: varargs[string]) = - var request = "*" & $(2 + args.len()) & "\c\L" - request.add("$" & $cmd.len() & "\c\L") - request.add(cmd & "\c\L") - request.add("$" & $arg1.len() & "\c\L") - request.add(arg1 & "\c\L") - for i in items(args): - request.add("$" & $i.len() & "\c\L") - request.add(i & "\c\L") - - if r.pipeline.enabled: - r.pipeline.expected += 1 - r.pipeline.buffer.add(request) - else: - r.socket.send(request) - -# Keys - -proc del*(r: Redis, keys: varargs[string]): RedisInteger = - ## Delete a key or multiple keys - r.sendCommand("DEL", keys) - return r.readInteger() - -proc exists*(r: Redis, key: string): bool = - ## Determine if a key exists - r.sendCommand("EXISTS", key) - return r.readInteger() == 1 - -proc expire*(r: Redis, key: string, seconds: int): bool = - ## Set a key's time to live in seconds. Returns `false` if the key could - ## not be found or the timeout could not be set. - r.sendCommand("EXPIRE", key, $seconds) - return r.readInteger() == 1 - -proc expireAt*(r: Redis, key: string, timestamp: int): bool = - ## Set the expiration for a key as a UNIX timestamp. Returns `false` - ## if the key could not be found or the timeout could not be set. - r.sendCommand("EXPIREAT", key, $timestamp) - return r.readInteger() == 1 - -proc keys*(r: Redis, pattern: string): RedisList = - ## Find all keys matching the given pattern - r.sendCommand("KEYS", pattern) - return r.readArray() - -proc scan*(r: Redis, cursor: var BiggestInt): RedisList = - ## Find all keys matching the given pattern and yield it to client in portions - ## using default Redis values for MATCH and COUNT parameters - r.sendCommand("SCAN", $cursor) - let reply = r.readArray() - cursor = strutils.parseBiggestInt(reply[0]) - return reply[1..high(reply)] - -proc scan*(r: Redis, cursor: var BiggestInt, pattern: string): RedisList = - ## Find all keys matching the given pattern and yield it to client in portions - ## using cursor as a client query identifier. Using default Redis value for COUNT argument - r.sendCommand("SCAN", $cursor, ["MATCH", pattern]) - let reply = r.readArray() - cursor = strutils.parseBiggestInt(reply[0]) - return reply[1..high(reply)] - -proc scan*(r: Redis, cursor: var BiggestInt, pattern: string, count: int): RedisList = - ## Find all keys matching the given pattern and yield it to client in portions - ## using cursor as a client query identifier. - r.sendCommand("SCAN", $cursor, ["MATCH", pattern, "COUNT", $count]) - let reply = r.readArray() - cursor = strutils.parseBiggestInt(reply[0]) - return reply[1..high(reply)] - -proc move*(r: Redis, key: string, db: int): bool = - ## Move a key to another database. Returns `true` on a successful move. - r.sendCommand("MOVE", key, $db) - return r.readInteger() == 1 - -proc persist*(r: Redis, key: string): bool = - ## Remove the expiration from a key. - ## Returns `true` when the timeout was removed. - r.sendCommand("PERSIST", key) - return r.readInteger() == 1 - -proc randomKey*(r: Redis): RedisString = - ## Return a random key from the keyspace - r.sendCommand("RANDOMKEY") - return r.readBulkString() - -proc rename*(r: Redis, key, newkey: string): RedisStatus = - ## Rename a key. - ## - ## **WARNING:** Overwrites `newkey` if it exists! - r.sendCommand("RENAME", key, newkey) - raiseNoOK(r.readStatus(), r.pipeline.enabled) - -proc renameNX*(r: Redis, key, newkey: string): bool = - ## Same as ``rename`` but doesn't continue if `newkey` exists. - ## Returns `true` if key was renamed. - r.sendCommand("RENAMENX", key, newkey) - return r.readInteger() == 1 - -proc ttl*(r: Redis, key: string): RedisInteger = - ## Get the time to live for a key - r.sendCommand("TTL", key) - return r.readInteger() - -proc keyType*(r: Redis, key: string): RedisStatus = - ## Determine the type stored at key - r.sendCommand("TYPE", key) - return r.readStatus() - - -# Strings - -proc append*(r: Redis, key, value: string): RedisInteger = - ## Append a value to a key - r.sendCommand("APPEND", key, value) - return r.readInteger() - -proc decr*(r: Redis, key: string): RedisInteger = - ## Decrement the integer value of a key by one - r.sendCommand("DECR", key) - return r.readInteger() - -proc decrBy*(r: Redis, key: string, decrement: int): RedisInteger = - ## Decrement the integer value of a key by the given number - r.sendCommand("DECRBY", key, $decrement) - return r.readInteger() - -proc get*(r: Redis, key: string): RedisString = - ## Get the value of a key. Returns `redisNil` when `key` doesn't exist. - r.sendCommand("GET", key) - return r.readBulkString() - -proc getBit*(r: Redis, key: string, offset: int): RedisInteger = - ## Returns the bit value at offset in the string value stored at key - r.sendCommand("GETBIT", key, $offset) - return r.readInteger() - -proc getRange*(r: Redis, key: string, start, stop: int): RedisString = - ## Get a substring of the string stored at a key - r.sendCommand("GETRANGE", key, $start, $stop) - return r.readBulkString() - -proc getSet*(r: Redis, key: string, value: string): RedisString = - ## Set the string value of a key and return its old value. Returns `redisNil` - ## when key doesn't exist. - r.sendCommand("GETSET", key, value) - return r.readBulkString() - -proc incr*(r: Redis, key: string): RedisInteger = - ## Increment the integer value of a key by one. - r.sendCommand("INCR", key) - return r.readInteger() - -proc incrBy*(r: Redis, key: string, increment: int): RedisInteger = - ## Increment the integer value of a key by the given number - r.sendCommand("INCRBY", key, $increment) - return r.readInteger() - -proc setk*(r: Redis, key, value: string) = - ## Set the string value of a key. - ## - ## NOTE: This function had to be renamed due to a clash with the `set` type. - r.sendCommand("SET", key, value) - raiseNoOK(r.readStatus(), r.pipeline.enabled) - -proc setNX*(r: Redis, key, value: string): bool = - ## Set the value of a key, only if the key does not exist. Returns `true` - ## if the key was set. - r.sendCommand("SETNX", key, value) - return r.readInteger() == 1 - -proc setBit*(r: Redis, key: string, offset: int, - value: string): RedisInteger = - ## Sets or clears the bit at offset in the string value stored at key - r.sendCommand("SETBIT", key, $offset, value) - return r.readInteger() - -proc setEx*(r: Redis, key: string, seconds: int, value: string): RedisStatus = - ## Set the value and expiration of a key - r.sendCommand("SETEX", key, $seconds, value) - raiseNoOK(r.readStatus(), r.pipeline.enabled) - -proc setRange*(r: Redis, key: string, offset: int, - value: string): RedisInteger = - ## Overwrite part of a string at key starting at the specified offset - r.sendCommand("SETRANGE", key, $offset, value) - return r.readInteger() - -proc strlen*(r: Redis, key: string): RedisInteger = - ## Get the length of the value stored in a key. Returns 0 when key doesn't - ## exist. - r.sendCommand("STRLEN", key) - return r.readInteger() - -# Hashes -proc hDel*(r: Redis, key, field: string): bool = - ## Delete a hash field at `key`. Returns `true` if the field was removed. - r.sendCommand("HDEL", key, field) - return r.readInteger() == 1 - -proc hExists*(r: Redis, key, field: string): bool = - ## Determine if a hash field exists. - r.sendCommand("HEXISTS", key, field) - return r.readInteger() == 1 - -proc hGet*(r: Redis, key, field: string): RedisString = - ## Get the value of a hash field - r.sendCommand("HGET", key, field) - return r.readBulkString() - -proc hGetAll*(r: Redis, key: string): RedisList = - ## Get all the fields and values in a hash - r.sendCommand("HGETALL", key) - return r.readArray() - -proc hIncrBy*(r: Redis, key, field: string, incr: int): RedisInteger = - ## Increment the integer value of a hash field by the given number - r.sendCommand("HINCRBY", key, field, $incr) - return r.readInteger() - -proc hKeys*(r: Redis, key: string): RedisList = - ## Get all the fields in a hash - r.sendCommand("HKEYS", key) - return r.readArray() - -proc hLen*(r: Redis, key: string): RedisInteger = - ## Get the number of fields in a hash - r.sendCommand("HLEN", key) - return r.readInteger() - -proc hMGet*(r: Redis, key: string, fields: varargs[string]): RedisList = - ## Get the values of all the given hash fields - r.sendCommand("HMGET", key, fields) - return r.readArray() - -proc hMSet*(r: Redis, key: string, - fieldValues: openArray[tuple[field, value: string]]) = - ## Set multiple hash fields to multiple values - var args = @[key] - for field, value in items(fieldValues): - args.add(field) - args.add(value) - r.sendCommand("HMSET", args) - raiseNoOK(r.readStatus(), r.pipeline.enabled) - -proc hSet*(r: Redis, key, field, value: string): RedisInteger = - ## Set the string value of a hash field - r.sendCommand("HSET", key, field, value) - return r.readInteger() - -proc hSetNX*(r: Redis, key, field, value: string): RedisInteger = - ## Set the value of a hash field, only if the field does **not** exist - r.sendCommand("HSETNX", key, field, value) - return r.readInteger() - -proc hVals*(r: Redis, key: string): RedisList = - ## Get all the values in a hash - r.sendCommand("HVALS", key) - return r.readArray() - -# Lists - -proc bLPop*(r: Redis, keys: varargs[string], timeout: int): RedisList = - ## Remove and get the *first* element in a list, or block until - ## one is available - var args: seq[string] = @[] - for i in items(keys): args.add(i) - args.add($timeout) - r.sendCommand("BLPOP", args) - return r.readArray() - -proc bRPop*(r: Redis, keys: varargs[string], timeout: int): RedisList = - ## Remove and get the *last* element in a list, or block until one - ## is available. - var args: seq[string] = @[] - for i in items(keys): args.add(i) - args.add($timeout) - r.sendCommand("BRPOP", args) - return r.readArray() - -proc bRPopLPush*(r: Redis, source, destination: string, - timeout: int): RedisString = - ## Pop a value from a list, push it to another list and return it; or - ## block until one is available. - ## - ## http://redis.io/commands/brpoplpush - r.sendCommand("BRPOPLPUSH", source, destination, $timeout) - return r.readBulkString(true) # Multi-Bulk nil allowed. - -proc lIndex*(r: Redis, key: string, index: int): RedisString = - ## Get an element from a list by its index - r.sendCommand("LINDEX", key, $index) - return r.readBulkString() - -proc lInsert*(r: Redis, key: string, before: bool, pivot, value: string): - RedisInteger = - ## Insert an element before or after another element in a list - var pos = if before: "BEFORE" else: "AFTER" - r.sendCommand("LINSERT", key, pos, pivot, value) - return r.readInteger() - -proc lLen*(r: Redis, key: string): RedisInteger = - ## Get the length of a list - r.sendCommand("LLEN", key) - return r.readInteger() - -proc lPop*(r: Redis, key: string): RedisString = - ## Remove and get the first element in a list - r.sendCommand("LPOP", key) - return r.readBulkString() - -proc lPush*(r: Redis, key, value: string, create: bool = true): RedisInteger = - ## Prepend a value to a list. Returns the length of the list after the push. - ## The ``create`` param specifies whether a list should be created if it - ## doesn't exist at ``key``. More specifically if ``create`` is true, `LPUSH` - ## will be used, otherwise `LPUSHX`. - if create: - r.sendCommand("LPUSH", key, value) - else: - r.sendCommand("LPUSHX", key, value) - return r.readInteger() - -proc lRange*(r: Redis, key: string, start, stop: int): RedisList = - ## Get a range of elements from a list. Returns `nil` when `key` - ## doesn't exist. - r.sendCommand("LRANGE", key, $start, $stop) - return r.readArray() - -proc lRem*(r: Redis, key: string, value: string, count: int = 0): RedisInteger = - ## Remove elements from a list. Returns the number of elements that have been - ## removed. - r.sendCommand("LREM", key, $count, value) - return r.readInteger() - -proc lSet*(r: Redis, key: string, index: int, value: string) = - ## Set the value of an element in a list by its index - r.sendCommand("LSET", key, $index, value) - raiseNoOK(r.readStatus(), r.pipeline.enabled) - -proc lTrim*(r: Redis, key: string, start, stop: int) = - ## Trim a list to the specified range - r.sendCommand("LTRIM", key, $start, $stop) - raiseNoOK(r.readStatus(), r.pipeline.enabled) - -proc rPop*(r: Redis, key: string): RedisString = - ## Remove and get the last element in a list - r.sendCommand("RPOP", key) - return r.readBulkString() - -proc rPopLPush*(r: Redis, source, destination: string): RedisString = - ## Remove the last element in a list, append it to another list and return it - r.sendCommand("RPOPLPUSH", source, destination) - return r.readBulkString() - -proc rPush*(r: Redis, key, value: string, create: bool = true): RedisInteger = - ## Append a value to a list. Returns the length of the list after the push. - ## The ``create`` param specifies whether a list should be created if it - ## doesn't exist at ``key``. More specifically if ``create`` is true, `RPUSH` - ## will be used, otherwise `RPUSHX`. - if create: - r.sendCommand("RPUSH", key, value) - else: - r.sendCommand("RPUSHX", key, value) - return r.readInteger() - -# Sets - -proc sadd*(r: Redis, key: string, member: string): RedisInteger = - ## Add a member to a set - r.sendCommand("SADD", key, member) - return r.readInteger() - -proc scard*(r: Redis, key: string): RedisInteger = - ## Get the number of members in a set - r.sendCommand("SCARD", key) - return r.readInteger() - -proc sdiff*(r: Redis, keys: varargs[string]): RedisList = - ## Subtract multiple sets - r.sendCommand("SDIFF", keys) - return r.readArray() - -proc sdiffstore*(r: Redis, destination: string, - keys: varargs[string]): RedisInteger = - ## Subtract multiple sets and store the resulting set in a key - r.sendCommand("SDIFFSTORE", destination, keys) - return r.readInteger() - -proc sinter*(r: Redis, keys: varargs[string]): RedisList = - ## Intersect multiple sets - r.sendCommand("SINTER", keys) - return r.readArray() - -proc sinterstore*(r: Redis, destination: string, - keys: varargs[string]): RedisInteger = - ## Intersect multiple sets and store the resulting set in a key - r.sendCommand("SINTERSTORE", destination, keys) - return r.readInteger() - -proc sismember*(r: Redis, key: string, member: string): RedisInteger = - ## Determine if a given value is a member of a set - r.sendCommand("SISMEMBER", key, member) - return r.readInteger() - -proc smembers*(r: Redis, key: string): RedisList = - ## Get all the members in a set - r.sendCommand("SMEMBERS", key) - return r.readArray() - -proc smove*(r: Redis, source: string, destination: string, - member: string): RedisInteger = - ## Move a member from one set to another - r.sendCommand("SMOVE", source, destination, member) - return r.readInteger() - -proc spop*(r: Redis, key: string): RedisString = - ## Remove and return a random member from a set - r.sendCommand("SPOP", key) - return r.readBulkString() - -proc srandmember*(r: Redis, key: string): RedisString = - ## Get a random member from a set - r.sendCommand("SRANDMEMBER", key) - return r.readBulkString() - -proc srem*(r: Redis, key: string, member: string): RedisInteger = - ## Remove a member from a set - r.sendCommand("SREM", key, member) - return r.readInteger() - -proc sunion*(r: Redis, keys: varargs[string]): RedisList = - ## Add multiple sets - r.sendCommand("SUNION", keys) - return r.readArray() - -proc sunionstore*(r: Redis, destination: string, - key: varargs[string]): RedisInteger = - ## Add multiple sets and store the resulting set in a key - r.sendCommand("SUNIONSTORE", destination, key) - return r.readInteger() - -# Sorted sets - -proc zadd*(r: Redis, key: string, score: int, member: string): RedisInteger = - ## Add a member to a sorted set, or update its score if it already exists - r.sendCommand("ZADD", key, $score, member) - return r.readInteger() - -proc zcard*(r: Redis, key: string): RedisInteger = - ## Get the number of members in a sorted set - r.sendCommand("ZCARD", key) - return r.readInteger() - -proc zcount*(r: Redis, key: string, min: string, max: string): RedisInteger = - ## Count the members in a sorted set with scores within the given values - r.sendCommand("ZCOUNT", key, min, max) - return r.readInteger() - -proc zincrby*(r: Redis, key: string, increment: string, - member: string): RedisString = - ## Increment the score of a member in a sorted set - r.sendCommand("ZINCRBY", key, increment, member) - return r.readBulkString() - -proc zinterstore*(r: Redis, destination: string, numkeys: string, - keys: openArray[string], weights: openArray[string] = [], - aggregate: string = ""): RedisInteger = - ## Intersect multiple sorted sets and store the resulting sorted set in - ## a new key - var args = @[destination, numkeys] - for i in items(keys): args.add(i) - - if weights.len != 0: - args.add("WITHSCORE") - for i in items(weights): args.add(i) - if aggregate.len != 0: - args.add("AGGREGATE") - args.add(aggregate) - - r.sendCommand("ZINTERSTORE", args) - - return r.readInteger() - -proc zrange*(r: Redis, key: string, start: string, stop: string, - withScores: bool): RedisList = - ## Return a range of members in a sorted set, by index - if not withScores: - r.sendCommand("ZRANGE", key, start, stop) - else: - r.sendCommand("ZRANGE", "WITHSCORES", key, start, stop) - return r.readArray() - -proc zrangebyscore*(r: Redis, key: string, min: string, max: string, - withScore: bool = false, limit: bool = false, - limitOffset: int = 0, limitCount: int = 0): RedisList = - ## Return a range of members in a sorted set, by score - var args = @[key, min, max] - - if withScore: args.add("WITHSCORE") - if limit: - args.add("LIMIT") - args.add($limitOffset) - args.add($limitCount) - - r.sendCommand("ZRANGEBYSCORE", args) - return r.readArray() - -proc zrank*(r: Redis, key: string, member: string): RedisString = - ## Determine the index of a member in a sorted set - r.sendCommand("ZRANK", key, member) - return r.readBulkString() - -proc zrem*(r: Redis, key: string, member: string): RedisInteger = - ## Remove a member from a sorted set - r.sendCommand("ZREM", key, member) - return r.readInteger() - -proc zremrangebyrank*(r: Redis, key: string, start: string, - stop: string): RedisInteger = - ## Remove all members in a sorted set within the given indexes - r.sendCommand("ZREMRANGEBYRANK", key, start, stop) - return r.readInteger() - -proc zremrangebyscore*(r: Redis, key: string, min: string, - max: string): RedisInteger = - ## Remove all members in a sorted set within the given scores - r.sendCommand("ZREMRANGEBYSCORE", key, min, max) - return r.readInteger() - -proc zrevrange*(r: Redis, key: string, start: string, stop: string, - withScore: bool): RedisList = - ## Return a range of members in a sorted set, by index, - ## with scores ordered from high to low - if withScore: - r.sendCommand("ZREVRANGE", "WITHSCORE", key, start, stop) - else: r.sendCommand("ZREVRANGE", key, start, stop) - return r.readArray() - -proc zrevrangebyscore*(r: Redis, key: string, min: string, max: string, - withScore: bool = false, limit: bool = false, - limitOffset: int = 0, limitCount: int = 0): RedisList = - ## Return a range of members in a sorted set, by score, with - ## scores ordered from high to low - var args = @[key, min, max] - - if withScore: args.add("WITHSCORE") - if limit: - args.add("LIMIT") - args.add($limitOffset) - args.add($limitCount) - - r.sendCommand("ZREVRANGEBYSCORE", args) - return r.readArray() - -proc zrevrank*(r: Redis, key: string, member: string): RedisString = - ## Determine the index of a member in a sorted set, with - ## scores ordered from high to low - r.sendCommand("ZREVRANK", key, member) - return r.readBulkString() - -proc zscore*(r: Redis, key: string, member: string): RedisString = - ## Get the score associated with the given member in a sorted set - r.sendCommand("ZSCORE", key, member) - return r.readBulkString() - -proc zunionstore*(r: Redis, destination: string, numkeys: string, - keys: openArray[string], weights: openArray[string] = [], - aggregate: string = ""): RedisInteger = - ## Add multiple sorted sets and store the resulting sorted set in a new key - var args = @[destination, numkeys] - for i in items(keys): args.add(i) - - if weights.len != 0: - args.add("WEIGHTS") - for i in items(weights): args.add(i) - if aggregate.len != 0: - args.add("AGGREGATE") - args.add(aggregate) - - r.sendCommand("ZUNIONSTORE", args) - - return r.readInteger() - -# HyperLogLog - -proc pfadd*(r: Redis, key: string, elements: varargs[string]): RedisInteger = - ## Add variable number of elements into special 'HyperLogLog' set type - r.sendCommand("PFADD", key, elements) - return r.readInteger() - -proc pfcount*(r: Redis, key: string): RedisInteger = - ## Count approximate number of elements in 'HyperLogLog' - r.sendCommand("PFCOUNT", key) - return r.readInteger() - -proc pfmerge*(r: Redis, destination: string, sources: varargs[string]) = - ## Merge several source HyperLogLog's into one specified by destKey - r.sendCommand("PFMERGE", destination, sources) - raiseNoOK(r.readStatus(), r.pipeline.enabled) - -# Pub/Sub - -# TODO: pub/sub -- I don't think this will work synchronously. -discard """ -proc psubscribe*(r: TRedis, pattern: openarray[string]): ???? = - ## Listen for messages published to channels matching the given patterns - r.socket.send("PSUBSCRIBE $#\c\L" % pattern) - return ??? - -proc publish*(r: TRedis, channel: string, message: string): TRedisInteger = - ## Post a message to a channel - r.socket.send("PUBLISH $# $#\c\L" % [channel, message]) - return r.readInteger() - -proc punsubscribe*(r: TRedis, [pattern: openarray[string], : string): ???? = - ## Stop listening for messages posted to channels matching the given patterns - r.socket.send("PUNSUBSCRIBE $# $#\c\L" % [[pattern.join(), ]) - return ??? - -proc subscribe*(r: TRedis, channel: openarray[string]): ???? = - ## Listen for messages published to the given channels - r.socket.send("SUBSCRIBE $#\c\L" % channel.join) - return ??? - -proc unsubscribe*(r: TRedis, [channel: openarray[string], : string): ???? = - ## Stop listening for messages posted to the given channels - r.socket.send("UNSUBSCRIBE $# $#\c\L" % [[channel.join(), ]) - return ??? - -""" - -# Transactions - -proc discardMulti*(r: Redis) = - ## Discard all commands issued after MULTI - r.sendCommand("DISCARD") - raiseNoOK(r.readStatus(), r.pipeline.enabled) - -proc exec*(r: Redis): RedisList = - ## Execute all commands issued after MULTI - r.sendCommand("EXEC") - r.pipeline.enabled = false - # Will reply with +OK for MULTI/EXEC and +QUEUED for every command - # between, then with the results - return r.flushPipeline(true) - - -proc multi*(r: Redis) = - ## Mark the start of a transaction block - r.startPipelining() - r.sendCommand("MULTI") - raiseNoOK(r.readStatus(), r.pipeline.enabled) - -proc unwatch*(r: Redis) = - ## Forget about all watched keys - r.sendCommand("UNWATCH") - raiseNoOK(r.readStatus(), r.pipeline.enabled) - -proc watch*(r: Redis, key: varargs[string]) = - ## Watch the given keys to determine execution of the MULTI/EXEC block - r.sendCommand("WATCH", key) - raiseNoOK(r.readStatus(), r.pipeline.enabled) - -# Connection - -proc auth*(r: Redis, password: string) = - ## Authenticate to the server - r.sendCommand("AUTH", password) - raiseNoOK(r.readStatus(), r.pipeline.enabled) - -proc echoServ*(r: Redis, message: string): RedisString = - ## Echo the given string - r.sendCommand("ECHO", message) - return r.readBulkString() - -proc ping*(r: Redis): RedisStatus = - ## Ping the server - r.sendCommand("PING") - return r.readStatus() - -proc quit*(r: Redis) = - ## Close the connection - r.sendCommand("QUIT") - raiseNoOK(r.readStatus(), r.pipeline.enabled) - -proc select*(r: Redis, index: int): RedisStatus = - ## Change the selected database for the current connection - r.sendCommand("SELECT", $index) - return r.readStatus() - -# Server - -proc bgrewriteaof*(r: Redis) = - ## Asynchronously rewrite the append-only file - r.sendCommand("BGREWRITEAOF") - raiseNoOK(r.readStatus(), r.pipeline.enabled) - -proc bgsave*(r: Redis) = - ## Asynchronously save the dataset to disk - r.sendCommand("BGSAVE") - raiseNoOK(r.readStatus(), r.pipeline.enabled) - -proc configGet*(r: Redis, parameter: string): RedisList = - ## Get the value of a configuration parameter - r.sendCommand("CONFIG", "GET", parameter) - return r.readArray() - -proc configSet*(r: Redis, parameter: string, value: string) = - ## Set a configuration parameter to the given value - r.sendCommand("CONFIG", "SET", parameter, value) - raiseNoOK(r.readStatus(), r.pipeline.enabled) - -proc configResetStat*(r: Redis) = - ## Reset the stats returned by INFO - r.sendCommand("CONFIG", "RESETSTAT") - raiseNoOK(r.readStatus(), r.pipeline.enabled) - -proc dbsize*(r: Redis): RedisInteger = - ## Return the number of keys in the selected database - r.sendCommand("DBSIZE") - return r.readInteger() - -proc debugObject*(r: Redis, key: string): RedisStatus = - ## Get debugging information about a key - r.sendCommand("DEBUG", "OBJECT", key) - return r.readStatus() - -proc debugSegfault*(r: Redis) = - ## Make the server crash - r.sendCommand("DEBUG", "SEGFAULT") - -proc flushall*(r: Redis): RedisStatus = - ## Remove all keys from all databases - r.sendCommand("FLUSHALL") - raiseNoOK(r.readStatus(), r.pipeline.enabled) - -proc flushdb*(r: Redis): RedisStatus = - ## Remove all keys from the current database - r.sendCommand("FLUSHDB") - raiseNoOK(r.readStatus(), r.pipeline.enabled) - -proc info*(r: Redis): RedisString = - ## Get information and statistics about the server - r.sendCommand("INFO") - return r.readBulkString() - -proc lastsave*(r: Redis): RedisInteger = - ## Get the UNIX time stamp of the last successful save to disk - r.sendCommand("LASTSAVE") - return r.readInteger() - -discard """ -proc monitor*(r: TRedis) = - ## Listen for all requests received by the server in real time - r.socket.send("MONITOR\c\L") - raiseNoOK(r.readStatus(), r.pipeline.enabled) -""" - -proc save*(r: Redis) = - ## Synchronously save the dataset to disk - r.sendCommand("SAVE") - raiseNoOK(r.readStatus(), r.pipeline.enabled) - -proc shutdown*(r: Redis) = - ## Synchronously save the dataset to disk and then shut down the server - r.sendCommand("SHUTDOWN") - var s = "".TaintedString - r.socket.readLine(s) - if s.string.len != 0: raise newException(RedisError, s.string) - -proc slaveof*(r: Redis, host: string, port: string) = - ## Make the server a slave of another instance, or promote it as master - r.sendCommand("SLAVEOF", host, port) - raiseNoOK(r.readStatus(), r.pipeline.enabled) - -iterator hPairs*(r: Redis, key: string): tuple[key, value: string] = - ## Iterator for keys and values in a hash. - var - contents = r.hGetAll(key) - k = "" - for i in items(contents): - if k == "": - k = i - else: - yield (k, i) - k = "" - -proc someTests(r: Redis, how: SendMode):seq[string] = - var list:seq[string] = @[] - - if how == pipelined: - r.startPipelining() - elif how == multiple: - r.multi() - - r.setk("nim:test", "Testing something.") - r.setk("nim:utf8", "こんにちは") - r.setk("nim:esc", "\\ths ągt\\") - r.setk("nim:int", "1") - list.add(r.get("nim:esc")) - list.add($(r.incr("nim:int"))) - list.add(r.get("nim:int")) - list.add(r.get("nim:utf8")) - list.add($(r.hSet("test1", "name", "A Test"))) - var res = r.hGetAll("test1") - for r in res: - list.add(r) - list.add(r.get("invalid_key")) - list.add($(r.lPush("mylist","itema"))) - list.add($(r.lPush("mylist","itemb"))) - r.lTrim("mylist",0,1) - var p = r.lRange("mylist", 0, -1) - - for i in items(p): - if not isNil(i): - list.add(i) - - list.add(r.debugObject("mylist")) - - r.configSet("timeout", "299") - var g = r.configGet("timeout") - for i in items(g): - list.add(i) - - list.add(r.echoServ("BLAH")) - - case how - of normal: - return list - of pipelined: - return r.flushPipeline() - of multiple: - return r.exec() - -proc assertListsIdentical(listA, listB: seq[string]) = - assert(listA.len == listB.len) - var i = 0 - for item in listA: - assert(item == listB[i]) - i = i + 1 - -when isMainModule: - when false: - var r = open() - - # Test with no pipelining - var listNormal = r.someTests(normal) - - # Test with pipelining enabled - var listPipelined = r.someTests(pipelined) - assertListsIdentical(listNormal, listPipelined) - - # Test with multi/exec() (automatic pipelining) - var listMulti = r.someTests(multiple) - assertListsIdentical(listNormal, listMulti) diff --git a/lib/pure/reservedmem.nim b/lib/pure/reservedmem.nim new file mode 100644 index 000000000..ffa0128dc --- /dev/null +++ b/lib/pure/reservedmem.nim @@ -0,0 +1,229 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2015 Nim Contributors +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## :Authors: Zahary Karadjov +## +## This module provides utilities for reserving portions of the +## address space of a program without consuming physical memory. +## It can be used to implement a dynamically resizable buffer that +## is guaranteed to remain in the same memory location. The buffer +## will be able to grow up to the size of the initially reserved +## portion of the address space. +## +## Unstable API. + +from std/oserrors import raiseOSError, osLastError + +when defined(nimPreviewSlimSystem): + import std/assertions + +template distance*(lhs, rhs: pointer): int = + cast[int](rhs) - cast[int](lhs) + +template shift*(p: pointer, distance: int): pointer = + cast[pointer](cast[int](p) + distance) + +type + MemAccessFlags* = int + + ReservedMem* = object + memStart: pointer + usedMemEnd: pointer + committedMemEnd: pointer + memEnd: pointer + maxCommittedAndUnusedPages: int + accessFlags: MemAccessFlags + + ReservedMemSeq*[T] = object + mem: ReservedMem + +when defined(windows): + import std/winlean + import std/private/win_getsysteminfo + + proc getAllocationGranularity: uint = + var sysInfo: SystemInfo + getSystemInfo(addr sysInfo) + return uint(sysInfo.dwAllocationGranularity) + + let allocationGranularity = getAllocationGranularity().int + + const + memNoAccess = MemAccessFlags(PAGE_NOACCESS) + memExec* = MemAccessFlags(PAGE_EXECUTE) + memExecRead* = MemAccessFlags(PAGE_EXECUTE_READ) + memExecReadWrite* = MemAccessFlags(PAGE_EXECUTE_READWRITE) + memRead* = MemAccessFlags(PAGE_READONLY) + memReadWrite* = MemAccessFlags(PAGE_READWRITE) + + template check(expr) = + let r = expr + if r == cast[typeof(r)](0): + raiseOSError(osLastError()) + +else: + import std/posix + + let allocationGranularity = sysconf(SC_PAGESIZE) + + let + memNoAccess = MemAccessFlags(PROT_NONE) + memExec* = MemAccessFlags(PROT_EXEC) + memExecRead* = MemAccessFlags(PROT_EXEC or PROT_READ) + memExecReadWrite* = MemAccessFlags(PROT_EXEC or PROT_READ or PROT_WRITE) + memRead* = MemAccessFlags(PROT_READ) + memReadWrite* = MemAccessFlags(PROT_READ or PROT_WRITE) + + template check(expr) = + if not expr: + raiseOSError(osLastError()) + +func nextAlignedOffset(n, alignment: int): int = + result = n + let m = n mod alignment + if m != 0: result += alignment - m + + +when defined(windows): + const + MEM_DECOMMIT = 0x4000 + MEM_RESERVE = 0x2000 + MEM_COMMIT = 0x1000 + proc virtualFree(lpAddress: pointer, dwSize: int, + dwFreeType: int32): cint {.header: "<windows.h>", stdcall, + importc: "VirtualFree".} + proc virtualAlloc(lpAddress: pointer, dwSize: int, flAllocationType, + flProtect: int32): pointer {. + header: "<windows.h>", stdcall, importc: "VirtualAlloc".} + +proc init*(T: type ReservedMem, + maxLen: Natural, + initLen: Natural = 0, + initCommitLen = initLen, + memStart = pointer(nil), + accessFlags = memReadWrite, + maxCommittedAndUnusedPages = 3): ReservedMem = + + assert initLen <= initCommitLen + let commitSize = nextAlignedOffset(initCommitLen, allocationGranularity) + + when defined(windows): + result.memStart = virtualAlloc(memStart, maxLen, MEM_RESERVE, + accessFlags.cint) + check result.memStart + if commitSize > 0: + check virtualAlloc(result.memStart, commitSize, MEM_COMMIT, + accessFlags.cint) + else: + var allocFlags = MAP_PRIVATE or MAP_ANONYMOUS # or MAP_NORESERVE + # if memStart != nil: + # allocFlags = allocFlags or MAP_FIXED_NOREPLACE + result.memStart = mmap(memStart, maxLen, PROT_NONE, allocFlags, -1, 0) + check result.memStart != MAP_FAILED + if commitSize > 0: + check mprotect(result.memStart, commitSize, cint(accessFlags)) == 0 + + result.usedMemEnd = result.memStart.shift(initLen) + result.committedMemEnd = result.memStart.shift(commitSize) + result.memEnd = result.memStart.shift(maxLen) + result.accessFlags = accessFlags + result.maxCommittedAndUnusedPages = maxCommittedAndUnusedPages + +func len*(m: ReservedMem): int = + distance(m.memStart, m.usedMemEnd) + +func commitedLen*(m: ReservedMem): int = + distance(m.memStart, m.committedMemEnd) + +func maxLen*(m: ReservedMem): int = + distance(m.memStart, m.memEnd) + +proc setLen*(m: var ReservedMem, newLen: int) = + let len = m.len + m.usedMemEnd = m.memStart.shift(newLen) + if newLen > len: + let d = distance(m.committedMemEnd, m.usedMemEnd) + if d > 0: + let commitExtensionSize = nextAlignedOffset(d, allocationGranularity) + when defined(windows): + check virtualAlloc(m.committedMemEnd, commitExtensionSize, + MEM_COMMIT, m.accessFlags.cint) + else: + check mprotect(m.committedMemEnd, commitExtensionSize, + m.accessFlags.cint) == 0 + else: + let d = distance(m.usedMemEnd, m.committedMemEnd) - + m.maxCommittedAndUnusedPages * allocationGranularity + if d > 0: + let commitSizeShrinkage = nextAlignedOffset(d, allocationGranularity) + let newCommitEnd = m.committedMemEnd.shift(-commitSizeShrinkage) + + when defined(windows): + check virtualFree(newCommitEnd, commitSizeShrinkage, MEM_DECOMMIT) + else: + check posix_madvise(newCommitEnd, commitSizeShrinkage, + POSIX_MADV_DONTNEED) == 0 + + m.committedMemEnd = newCommitEnd + +proc init*(SeqType: type ReservedMemSeq, + maxLen: Natural, + initLen: Natural = 0, + initCommitLen: Natural = 0, + memStart = pointer(nil), + accessFlags = memReadWrite, + maxCommittedAndUnusedPages = 3): SeqType = + + let elemSize = sizeof(SeqType.T) + result.mem = ReservedMem.init(maxLen * elemSize, + initLen * elemSize, + initCommitLen * elemSize, + memStart, accessFlags, + maxCommittedAndUnusedPages) + +func `[]`*[T](s: ReservedMemSeq[T], pos: Natural): lent T = + let elemAddr = s.mem.memStart.shift(pos * sizeof(T)) + rangeCheck elemAddr < s.mem.usedMemEnd + result = (cast[ptr T](elemAddr))[] + +func `[]`*[T](s: var ReservedMemSeq[T], pos: Natural): var T = + let elemAddr = s.mem.memStart.shift(pos * sizeof(T)) + rangeCheck elemAddr < s.mem.usedMemEnd + result = (cast[ptr T](elemAddr))[] + +func `[]`*[T](s: ReservedMemSeq[T], rpos: BackwardsIndex): lent T = + return s[int(s.len) - int(rpos)] + +func `[]`*[T](s: var ReservedMemSeq[T], rpos: BackwardsIndex): var T = + return s[int(s.len) - int(rpos)] + +func len*[T](s: ReservedMemSeq[T]): int = + s.mem.len div sizeof(T) + +proc setLen*[T](s: var ReservedMemSeq[T], newLen: int) = + # TODO call destructors + s.mem.setLen(newLen * sizeof(T)) + +proc add*[T](s: var ReservedMemSeq[T], val: T) = + let len = s.len + s.setLen(len + 1) + s[len] = val + +proc pop*[T](s: var ReservedMemSeq[T]): T = + assert s.usedMemEnd != s.memStart + let lastIdx = s.len - 1 + result = s[lastIdx] + s.setLen(lastIdx) + +func commitedLen*[T](s: ReservedMemSeq[T]): int = + s.mem.commitedLen div sizeof(T) + +func maxLen*[T](s: ReservedMemSeq[T]): int = + s.mem.maxLen div sizeof(T) + diff --git a/lib/pure/romans.nim b/lib/pure/romans.nim deleted file mode 100644 index 79fb75526..000000000 --- a/lib/pure/romans.nim +++ /dev/null @@ -1,59 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2011 Philippe Lhoste -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## Module for converting an integer to a Roman numeral. -## See http://en.wikipedia.org/wiki/Roman_numerals for reference. - -const - RomanNumeralDigits* = {'I', 'i', 'V', 'v', 'X', 'x', 'L', 'l', 'C', 'c', - 'D', 'd', 'M', 'm'} ## set of all characters a Roman numeral may consist of - -proc romanToDecimal*(romanVal: string): int = - ## Converts a Roman numeral to its int representation. - result = 0 - var prevVal = 0 - for i in countdown(romanVal.len - 1, 0): - var val = 0 - case romanVal[i] - of 'I', 'i': val = 1 - of 'V', 'v': val = 5 - of 'X', 'x': val = 10 - of 'L', 'l': val = 50 - of 'C', 'c': val = 100 - of 'D', 'd': val = 500 - of 'M', 'm': val = 1000 - else: - raise newException(EInvalidValue, "invalid roman numeral: " & $romanVal) - if val >= prevVal: - inc(result, val) - else: - dec(result, val) - prevVal = val - -proc decimalToRoman*(number: range[1..3_999]): string = - ## Converts a number to a Roman numeral. - const romanComposites = [ - ("M", 1000), ("CM", 900), - ("D", 500), ("CD", 400), ("C", 100), - ("XC", 90), ("L", 50), ("XL", 40), ("X", 10), ("IX", 9), - ("V", 5), ("IV", 4), ("I", 1)] - result = "" - var decVal = number - for key, val in items(romanComposites): - while decVal >= val: - dec(decVal, val) - result.add(key) - -when isMainModule: - import math - randomize() - for i in 1 .. 100: - var rnd = 1 + random(3990) - assert rnd == rnd.decimalToRoman.romanToDecimal - diff --git a/lib/pure/ropes.nim b/lib/pure/ropes.nim index 4cc64a154..8750aca87 100644 --- a/lib/pure/ropes.nim +++ b/lib/pure/ropes.nim @@ -8,19 +8,21 @@ # ## This module contains support for a `rope`:idx: data type. -## Ropes can represent very long strings efficiently; especially concatenation +## Ropes can represent very long strings efficiently; in particular, concatenation ## is done in O(1) instead of O(n). They are essentially concatenation ## trees that are only flattened when converting to a native Nim -## string. The empty string is represented by ``nil``. Ropes are immutable and +## string. The empty string is represented by `nil`. Ropes are immutable and ## subtrees can be shared without copying. ## Leaves can be cached for better memory efficiency at the cost of ## runtime efficiency. -include "system/inclrtl" +include system/inclrtl +import std/streams -{.deadCodeElim: on.} +when defined(nimPreviewSlimSystem): + import std/[syncio, formatfloat, assertions] -{.push debugger:off .} # the user does not want to trace a part +{.push debugger: off.} # the user does not want to trace a part # of the standard library! const @@ -29,16 +31,12 @@ const var cacheEnabled = false -type - Rope* = ref RopeObj ## empty rope is represented by nil - RopeObj {.acyclic.} = object +type + Rope* {.acyclic.} = ref object + ## A rope data type. The empty rope is represented by `nil`. left, right: Rope length: int - data: string # != nil if a leaf - -{.deprecated: [PRope: Rope].} - -proc isConc(r: Rope): bool {.inline.} = return isNil(r.data) + data: string # not empty if a leaf # Note that the left and right pointers are not needed for leafs. # Leaves have relatively high memory overhead (~30 bytes on a 32 @@ -49,54 +47,53 @@ proc isConc(r: Rope): bool {.inline.} = return isNil(r.data) # pointers. proc len*(a: Rope): int {.rtl, extern: "nro$1".} = - ## the rope's length - if a == nil: result = 0 - else: result = a.length - + ## The rope's length. + if a == nil: 0 else: a.length + proc newRope(): Rope = new(result) -proc newRope(data: string): Rope = +proc newRope(data: string): Rope = new(result) result.length = len(data) result.data = data -var - cache {.threadvar.}: Rope # the root of the cache tree - N {.threadvar.}: Rope # dummy rope needed for splay algorithm +var + cache {.threadvar.}: Rope # the root of the cache tree + N {.threadvar.}: Rope # dummy rope needed for splay algorithm when countCacheMisses: var misses, hits: int - -proc splay(s: string, tree: Rope, cmpres: var int): Rope = + +proc splay(s: string, tree: Rope, cmpres: var int): Rope = var c: int var t = tree N.left = nil - N.right = nil # reset to nil + N.right = nil # reset to nil var le = N var r = N - while true: + while true: c = cmp(s, t.data) - if c < 0: - if (t.left != nil) and (s < t.left.data): + if c < 0: + if (t.left != nil) and (s < t.left.data): var y = t.left t.left = y.right y.right = t t = y - if t.left == nil: break + if t.left == nil: break r.left = t r = t t = t.left - elif c > 0: - if (t.right != nil) and (s > t.right.data): + elif c > 0: + if (t.right != nil) and (s > t.right.data): var y = t.right t.right = y.left y.left = t t = y - if t.right == nil: break + if t.right == nil: break le.right = t le = t t = t.right - else: - break + else: + break cmpres = c le.right = t.left r.left = t.right @@ -104,218 +101,232 @@ proc splay(s: string, tree: Rope, cmpres: var int): Rope = t.right = N.left result = t -proc insertInCache(s: string, tree: Rope): Rope = +proc insertInCache(s: string, tree: Rope): Rope = var t = tree - if t == nil: + if t == nil: result = newRope(s) when countCacheMisses: inc(misses) - return + return var cmp: int t = splay(s, t, cmp) - if cmp == 0: + if cmp == 0: # We get here if it's already in the Tree # Don't add it again result = t when countCacheMisses: inc(hits) - else: + else: when countCacheMisses: inc(misses) result = newRope(s) - if cmp < 0: + if cmp < 0: result.left = t.left result.right = t t.left = nil - else: + else: # i > t.item: result.right = t.right result.left = t t.right = nil -proc rope*(s: string): Rope {.rtl, extern: "nro$1Str".} = - ## Converts a string to a rope. - if s.len == 0: +proc rope*(s: string = ""): Rope {.rtl, extern: "nro$1Str".} = + ## Converts a string to a rope. + runnableExamples: + let r = rope("I'm a rope") + doAssert $r == "I'm a rope" + + if s.len == 0: result = nil - elif cacheEnabled: - result = insertInCache(s, cache) - cache = result - else: - result = newRope(s) - -proc rope*(i: BiggestInt): Rope {.rtl, extern: "nro$1BiggestInt".} = - ## Converts an int to a rope. + else: + when nimvm: + # No caching in VM context + result = newRope(s) + else: + if cacheEnabled: + result = insertInCache(s, cache) + cache = result + else: + result = newRope(s) + +proc rope*(i: BiggestInt): Rope {.rtl, extern: "nro$1BiggestInt".} = + ## Converts an int to a rope. + runnableExamples: + let r = rope(429) + doAssert $r == "429" + result = rope($i) proc rope*(f: BiggestFloat): Rope {.rtl, extern: "nro$1BiggestFloat".} = - ## Converts a float to a rope. + ## Converts a float to a rope. + runnableExamples: + let r = rope(4.29) + doAssert $r == "4.29" + result = rope($f) - + proc enableCache*() {.rtl, extern: "nro$1".} = ## Enables the caching of leaves. This reduces the memory footprint at ## the cost of runtime efficiency. cacheEnabled = true proc disableCache*() {.rtl, extern: "nro$1".} = - ## the cache is discarded and disabled. The GC will reuse its used memory. + ## The cache is discarded and disabled. The GC will reuse its used memory. cache = nil cacheEnabled = false proc `&`*(a, b: Rope): Rope {.rtl, extern: "nroConcRopeRope".} = - ## the concatenation operator for ropes. - if a == nil: + ## The concatenation operator for ropes. + runnableExamples: + let r = rope("Hello, ") & rope("Nim!") + doAssert $r == "Hello, Nim!" + + if a == nil: result = b - elif b == nil: + elif b == nil: result = a else: result = newRope() result.length = a.length + b.length - when false: - # XXX rebalancing would be nice, but is too expensive. - result.left = a.left - var x = newRope() - x.left = a.right - x.right = b - result.right = x - else: - result.left = a - result.right = b - -proc `&`*(a: Rope, b: string): Rope {.rtl, extern: "nroConcRopeStr".} = - ## the concatenation operator for ropes. + result.left = a + result.right = b + +proc `&`*(a: Rope, b: string): Rope {.rtl, extern: "nroConcRopeStr".} = + ## The concatenation operator for ropes. + runnableExamples: + let r = rope("Hello, ") & "Nim!" + doAssert $r == "Hello, Nim!" + result = a & rope(b) - -proc `&`*(a: string, b: Rope): Rope {.rtl, extern: "nroConcStrRope".} = - ## the concatenation operator for ropes. + +proc `&`*(a: string, b: Rope): Rope {.rtl, extern: "nroConcStrRope".} = + ## The concatenation operator for ropes. + runnableExamples: + let r = "Hello, " & rope("Nim!") + doAssert $r == "Hello, Nim!" + result = rope(a) & b - -proc `&`*(a: openArray[Rope]): Rope {.rtl, extern: "nroConcOpenArray".} = - ## the concatenation operator for an openarray of ropes. - for i in countup(0, high(a)): result = result & a[i] + +proc `&`*(a: openArray[Rope]): Rope {.rtl, extern: "nroConcOpenArray".} = + ## The concatenation operator for an `openArray` of ropes. + runnableExamples: + let r = &[rope("Hello, "), rope("Nim"), rope("!")] + doAssert $r == "Hello, Nim!" + + for item in a: result = result & item proc add*(a: var Rope, b: Rope) {.rtl, extern: "nro$1Rope".} = - ## adds `b` to the rope `a`. + ## Adds `b` to the rope `a`. + runnableExamples: + var r = rope("Hello, ") + r.add(rope("Nim!")) + doAssert $r == "Hello, Nim!" + a = a & b proc add*(a: var Rope, b: string) {.rtl, extern: "nro$1Str".} = - ## adds `b` to the rope `a`. + ## Adds `b` to the rope `a`. + runnableExamples: + var r = rope("Hello, ") + r.add("Nim!") + doAssert $r == "Hello, Nim!" + a = a & b - + proc `[]`*(r: Rope, i: int): char {.rtl, extern: "nroCharAt".} = - ## returns the character at position `i` in the rope `r`. This is quite - ## expensive! Worst-case: O(n). If ``i >= r.len``, ``\0`` is returned. + ## Returns the character at position `i` in the rope `r`. This is quite + ## expensive! Worst-case: O(n). If `i >= r.len or i < 0`, `\0` is returned. + runnableExamples: + let r = rope("Hello, Nim!") + + doAssert r[0] == 'H' + doAssert r[7] == 'N' + doAssert r[22] == '\0' + var x = r var j = i - if x == nil: return + if x == nil or i < 0 or i >= r.len: return while true: - if not isConc(x): - if x.data.len <% j: return x.data[j] - return '\0' + if x != nil and x.data.len > 0: + # leaf + return x.data[j] else: - if x.left.len >% j: + if x.left.length > j: x = x.left else: + dec(j, x.left.length) x = x.right - dec(j, x.len) iterator leaves*(r: Rope): string = - ## iterates over any leaf string in the rope `r`. + ## Iterates over any leaf string in the rope `r`. + runnableExamples: + let r = rope("Hello") & rope(", Nim!") + let s = ["Hello", ", Nim!"] + var index = 0 + for leave in r.leaves: + doAssert leave == s[index] + inc(index) + if r != nil: var stack = @[r] - while stack.len > 0: + while stack.len > 0: var it = stack.pop - while isConc(it): + while it.left != nil: + assert(it.right != nil) stack.add(it.right) it = it.left assert(it != nil) - assert(it.data != nil) yield it.data - + iterator items*(r: Rope): char = - ## iterates over any character in the rope `r`. + ## Iterates over any character in the rope `r`. for s in leaves(r): for c in items(s): yield c proc write*(f: File, r: Rope) {.rtl, extern: "nro$1".} = - ## writes a rope to a file. + ## Writes a rope to a file. for s in leaves(r): write(f, s) -proc `$`*(r: Rope): string {.rtl, extern: "nroToString".}= - ## converts a rope back to a string. - result = newString(r.len) - setLen(result, 0) +proc write*(s: Stream, r: Rope) {.rtl, extern: "nroWriteStream".} = + ## Writes a rope to a stream. + for rs in leaves(r): write(s, rs) + +proc `$`*(r: Rope): string {.rtl, extern: "nroToString".} = + ## Converts a rope back to a string. + result = newStringOfCap(r.len) for s in leaves(r): add(result, s) -when false: - # Format string caching seems reasonable: All leaves can be shared and format - # string parsing has to be done only once. A compiled format string is stored - # as a rope. A negative length is used for the index into the args array. - proc compiledArg(idx: int): Rope = - new(result) - result.length = -idx - - proc compileFrmt(frmt: string): Rope = - var i = 0 - var length = len(frmt) - result = nil - var num = 0 - while i < length: - if frmt[i] == '$': - inc(i) - case frmt[i] - of '$': - add(result, "$") - inc(i) - of '#': - inc(i) - add(result, compiledArg(num+1)) - inc(num) - of '0'..'9': - var j = 0 - while true: - j = j * 10 + ord(frmt[i]) - ord('0') - inc(i) - if frmt[i] notin {'0'..'9'}: break - add(s, compiledArg(j)) - of '{': - inc(i) - var j = 0 - while frmt[i] in {'0'..'9'}: - j = j * 10 + ord(frmt[i]) - ord('0') - inc(i) - if frmt[i] == '}': inc(i) - else: raise newException(EInvalidValue, "invalid format string") - add(s, compiledArg(j)) - else: raise newException(EInvalidValue, "invalid format string") - var start = i - while i < length: - if frmt[i] != '$': inc(i) - else: break - if i - 1 >= start: - add(result, substr(frmt, start, i-1)) - -proc `%`*(frmt: string, args: openArray[Rope]): Rope {. - rtl, extern: "nroFormat".} = - ## `%` substitution operator for ropes. Does not support the ``$identifier`` - ## nor ``${identifier}`` notations. +proc `%`*(frmt: string, args: openArray[Rope]): Rope {.rtl, extern: "nroFormat".} = + ## `%` substitution operator for ropes. Does not support the `$identifier` + ## nor `${identifier}` notations. + runnableExamples: + let r1 = "$1 $2 $3" % [rope("Nim"), rope("is"), rope("a great language")] + doAssert $r1 == "Nim is a great language" + + let r2 = "$# $# $#" % [rope("Nim"), rope("is"), rope("a great language")] + doAssert $r2 == "Nim is a great language" + + let r3 = "${1} ${2} ${3}" % [rope("Nim"), rope("is"), rope("a great language")] + doAssert $r3 == "Nim is a great language" + var i = 0 var length = len(frmt) result = nil var num = 0 - while i < length: - if frmt[i] == '$': + while i < length: + if frmt[i] == '$': inc(i) case frmt[i] - of '$': + of '$': add(result, "$") inc(i) - of '#': + of '#': inc(i) add(result, args[num]) inc(num) - of '0'..'9': + of '0'..'9': var j = 0 - while true: + while true: j = j * 10 + ord(frmt[i]) - ord('0') inc(i) - if frmt[i] notin {'0'..'9'}: break + if i >= frmt.len or frmt[i] notin {'0'..'9'}: break add(result, args[j-1]) of '{': inc(i) @@ -325,43 +336,64 @@ proc `%`*(frmt: string, args: openArray[Rope]): Rope {. inc(i) if frmt[i] == '}': inc(i) else: raise newException(ValueError, "invalid format string") + add(result, args[j-1]) else: raise newException(ValueError, "invalid format string") var start = i - while i < length: + while i < length: if frmt[i] != '$': inc(i) - else: break - if i - 1 >= start: + else: break + if i - 1 >= start: add(result, substr(frmt, start, i - 1)) -proc addf*(c: var Rope, frmt: string, args: openArray[Rope]) {. - rtl, extern: "nro$1".} = - ## shortcut for ``add(c, frmt % args)``. +proc addf*(c: var Rope, frmt: string, args: openArray[Rope]) {.rtl, extern: "nro$1".} = + ## Shortcut for `add(c, frmt % args)`. + runnableExamples: + var r = rope("Dash: ") + r.addf "$1 $2 $3", [rope("Nim"), rope("is"), rope("a great language")] + doAssert $r == "Dash: Nim is a great language" + add(c, frmt % args) -proc equalsFile*(r: Rope, f: File): bool {.rtl, extern: "nro$1File".} = - ## returns true if the contents of the file `f` equal `r`. - var bufSize = 1024 # reasonable start value - var buf = alloc(bufSize) - for s in leaves(r): - if s.len > bufSize: - bufSize = max(bufSize * 2, s.len) - buf = realloc(buf, bufSize) - var readBytes = readBuffer(f, buf, s.len) - result = readBytes == s.len and equalMem(buf, cstring(s), s.len) - if not result: break - if result: - result = readBuffer(f, buf, 1) == 0 # really at the end of file? - dealloc(buf) - -proc equalsFile*(r: Rope, f: string): bool {.rtl, extern: "nro$1Str".} = - ## returns true if the contents of the file `f` equal `r`. If `f` does not - ## exist, false is returned. - var bin: File - result = open(bin, f) - if result: - result = equalsFile(r, bin) - close(bin) +when not defined(js) and not defined(nimscript): + const + bufSize = 1024 # 1 KB is reasonable + + proc equalsFile*(r: Rope, f: File): bool {.rtl, extern: "nro$1File".} = + ## Returns true if the contents of the file `f` equal `r`. + var + buf: array[bufSize, char] + bpos = buf.len + blen = buf.len + + for s in leaves(r): + var spos = 0 + let slen = s.len + while spos < slen: + if bpos == blen: + # Read more data + bpos = 0 + blen = readBuffer(f, addr(buf[0]), buf.len) + if blen == 0: # no more data in file + return false + let n = min(blen - bpos, slen - spos) + # TODO: There's gotta be a better way of comparing here... + if not equalMem(addr(buf[bpos]), + cast[pointer](cast[int](cstring(s)) + spos), n): + return false + spos += n + bpos += n + + result = readBuffer(f, addr(buf[0]), 1) == 0 # check that we've read all + + proc equalsFile*(r: Rope, filename: string): bool {.rtl, extern: "nro$1Str".} = + ## Returns true if the contents of the file `f` equal `r`. If `f` does not + ## exist, false is returned. + var f: File + result = open(f, filename) + if result: + result = equalsFile(r, f) + close(f) new(N) # init dummy node for splay algorithm diff --git a/lib/pure/scgi.nim b/lib/pure/scgi.nim deleted file mode 100644 index f3e2b583c..000000000 --- a/lib/pure/scgi.nim +++ /dev/null @@ -1,297 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2013 Andreas Rumpf, Dominik Picheta -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module implements helper procs for SCGI applications. Example: -## -## .. code-block:: Nim -## -## import strtabs, sockets, scgi -## -## var counter = 0 -## proc handleRequest(client: Socket, input: string, -## headers: StringTableRef): bool {.procvar.} = -## inc(counter) -## client.writeStatusOkTextContent() -## client.send("Hello for the $#th time." % $counter & "\c\L") -## return false # do not stop processing -## -## run(handleRequest) -## -## **Warning:** The API of this module is unstable, and therefore is subject -## to change. -## -## **Warning:** This module only supports the old asynchronous interface. -## You may wish to use the `asynchttpserver <asynchttpserver.html>`_ -## instead for web applications. - -include "system/inclrtl" - -import sockets, strutils, os, strtabs, asyncio - -type - ScgiError* = object of IOError ## the exception that is raised, if a SCGI error occurs - -proc raiseScgiError*(msg: string) {.noreturn.} = - ## raises an ScgiError exception with message `msg`. - var e: ref ScgiError - new(e) - e.msg = msg - raise e - -proc parseWord(inp: string, outp: var string, start: int): int = - result = start - while inp[result] != '\0': inc(result) - outp = substr(inp, start, result-1) - -proc parseHeaders(s: string, L: int): StringTableRef = - result = newStringTable() - var i = 0 - while i < L: - var key, val: string - i = parseWord(s, key, i)+1 - i = parseWord(s, val, i)+1 - result[key] = val - if s[i] == ',': inc(i) - else: raiseScgiError("',' after netstring expected") - -proc recvChar(s: Socket): char = - var c: char - if recv(s, addr(c), sizeof(c)) == sizeof(c): - result = c - -type - ScgiState* = object of RootObj ## SCGI state object - server: Socket - bufLen: int - client*: Socket ## the client socket to send data to - headers*: StringTableRef ## the parsed headers - input*: string ## the input buffer - - - # Async - - ClientMode = enum - ClientReadChar, ClientReadHeaders, ClientReadContent - - AsyncClient = ref object - c: AsyncSocket - mode: ClientMode - dataLen: int - headers: StringTableRef ## the parsed headers - input: string ## the input buffer - - AsyncScgiStateObj = object - handleRequest: proc (client: AsyncSocket, - input: string, - headers: StringTableRef) {.closure, gcsafe.} - asyncServer: AsyncSocket - disp: Dispatcher - AsyncScgiState* = ref AsyncScgiStateObj - -{.deprecated: [EScgi: ScgiError, TScgiState: ScgiState, - PAsyncScgiState: AsyncScgiState, scgiError: raiseScgiError].} - -proc recvBuffer(s: var ScgiState, L: int) = - if L > s.bufLen: - s.bufLen = L - s.input = newString(L) - if L > 0 and recv(s.client, cstring(s.input), L) != L: - raiseScgiError("could not read all data") - setLen(s.input, L) - -proc open*(s: var ScgiState, port = Port(4000), address = "127.0.0.1", - reuseAddr = false) = - ## opens a connection. - s.bufLen = 4000 - s.input = newString(s.bufLen) # will be reused - - s.server = socket() - if s.server == invalidSocket: raiseOSError(osLastError()) - new(s.client) # Initialise s.client for `next` - if s.server == invalidSocket: raiseScgiError("could not open socket") - #s.server.connect(connectionName, port) - if reuseAddr: - s.server.setSockOpt(OptReuseAddr, true) - bindAddr(s.server, port, address) - listen(s.server) - -proc close*(s: var ScgiState) = - ## closes the connection. - s.server.close() - -proc next*(s: var ScgiState, timeout: int = -1): bool = - ## proceed to the first/next request. Waits ``timeout`` miliseconds for a - ## request, if ``timeout`` is `-1` then this function will never time out. - ## Returns `true` if a new request has been processed. - var rsocks = @[s.server] - if select(rsocks, timeout) == 1 and rsocks.len == 1: - new(s.client) - accept(s.server, s.client) - var L = 0 - while true: - var d = s.client.recvChar() - if d == '\0': - s.client.close() - return false - if d notin strutils.Digits: - if d != ':': raiseScgiError("':' after length expected") - break - L = L * 10 + ord(d) - ord('0') - recvBuffer(s, L+1) - s.headers = parseHeaders(s.input, L) - if s.headers["SCGI"] != "1": raiseScgiError("SCGI Version 1 expected") - L = parseInt(s.headers["CONTENT_LENGTH"]) - recvBuffer(s, L) - return true - -proc writeStatusOkTextContent*(c: Socket, contentType = "text/html") = - ## sends the following string to the socket `c`:: - ## - ## Status: 200 OK\r\LContent-Type: text/html\r\L\r\L - ## - ## You should send this before sending your HTML page, for example. - c.send("Status: 200 OK\r\L" & - "Content-Type: $1\r\L\r\L" % contentType) - -proc run*(handleRequest: proc (client: Socket, input: string, - headers: StringTableRef): bool {.nimcall,gcsafe.}, - port = Port(4000)) = - ## encapsulates the SCGI object and main loop. - var s: ScgiState - s.open(port) - var stop = false - while not stop: - if next(s): - stop = handleRequest(s.client, s.input, s.headers) - s.client.close() - s.close() - -# -- AsyncIO start - -proc recvBufferAsync(client: AsyncClient, L: int): ReadLineResult = - result = ReadPartialLine - var data = "" - if L < 1: - raiseScgiError("Cannot read negative or zero length: " & $L) - let ret = recvAsync(client.c, data, L) - if ret == 0 and data == "": - client.c.close() - return ReadDisconnected - if ret == -1: - return ReadNone # No more data available - client.input.add(data) - if ret == L: - return ReadFullLine - -proc checkCloseSocket(client: AsyncClient) = - if not client.c.isClosed: - if client.c.isSendDataBuffered: - client.c.setHandleWrite do (s: AsyncSocket): - if not s.isClosed and not s.isSendDataBuffered: - s.close() - s.delHandleWrite() - else: client.c.close() - -proc handleClientRead(client: AsyncClient, s: AsyncScgiState) = - case client.mode - of ClientReadChar: - while true: - var d = "" - let ret = client.c.recvAsync(d, 1) - if d == "" and ret == 0: - # Disconnected - client.c.close() - return - if ret == -1: - return # No more data available - if d[0] notin strutils.Digits: - if d[0] != ':': raiseScgiError("':' after length expected") - break - client.dataLen = client.dataLen * 10 + ord(d[0]) - ord('0') - client.mode = ClientReadHeaders - handleClientRead(client, s) # Allow progression - of ClientReadHeaders: - let ret = recvBufferAsync(client, (client.dataLen+1)-client.input.len) - case ret - of ReadFullLine: - client.headers = parseHeaders(client.input, client.input.len-1) - if client.headers["SCGI"] != "1": raiseScgiError("SCGI Version 1 expected") - client.input = "" # For next part - - let contentLen = parseInt(client.headers["CONTENT_LENGTH"]) - if contentLen > 0: - client.mode = ClientReadContent - else: - s.handleRequest(client.c, client.input, client.headers) - checkCloseSocket(client) - of ReadPartialLine, ReadDisconnected, ReadNone: return - of ClientReadContent: - let L = parseInt(client.headers["CONTENT_LENGTH"])-client.input.len - if L > 0: - let ret = recvBufferAsync(client, L) - case ret - of ReadFullLine: - s.handleRequest(client.c, client.input, client.headers) - checkCloseSocket(client) - of ReadPartialLine, ReadDisconnected, ReadNone: return - else: - s.handleRequest(client.c, client.input, client.headers) - checkCloseSocket(client) - -proc handleAccept(sock: AsyncSocket, s: AsyncScgiState) = - var client: AsyncSocket - new(client) - accept(s.asyncServer, client) - var asyncClient = AsyncClient(c: client, mode: ClientReadChar, dataLen: 0, - headers: newStringTable(), input: "") - client.handleRead = - proc (sock: AsyncSocket) = - handleClientRead(asyncClient, s) - s.disp.register(client) - -proc open*(handleRequest: proc (client: AsyncSocket, - input: string, headers: StringTableRef) {. - closure, gcsafe.}, - port = Port(4000), address = "127.0.0.1", - reuseAddr = false): AsyncScgiState = - ## Creates an ``AsyncScgiState`` object which serves as a SCGI server. - ## - ## After the execution of ``handleRequest`` the client socket will be closed - ## automatically unless it has already been closed. - var cres: AsyncScgiState - new(cres) - cres.asyncServer = asyncSocket() - cres.asyncServer.handleAccept = proc (s: AsyncSocket) = handleAccept(s, cres) - if reuseAddr: - cres.asyncServer.setSockOpt(OptReuseAddr, true) - bindAddr(cres.asyncServer, port, address) - listen(cres.asyncServer) - cres.handleRequest = handleRequest - result = cres - -proc register*(d: Dispatcher, s: AsyncScgiState): Delegate {.discardable.} = - ## Registers ``s`` with dispatcher ``d``. - result = d.register(s.asyncServer) - s.disp = d - -proc close*(s: AsyncScgiState) = - ## Closes the ``AsyncScgiState``. - s.asyncServer.close() - -when false: - var counter = 0 - proc handleRequest(client: Socket, input: string, - headers: StringTableRef): bool {.procvar.} = - inc(counter) - client.writeStatusOkTextContent() - client.send("Hello for the $#th time." % $counter & "\c\L") - return false # do not stop processing - - run(handleRequest) - diff --git a/lib/pure/segfaults.nim b/lib/pure/segfaults.nim new file mode 100644 index 000000000..65b059e86 --- /dev/null +++ b/lib/pure/segfaults.nim @@ -0,0 +1,88 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2017 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This modules registers a signal handler that turns access violations / +## segfaults into a ``NilAccessDefect`` exception. To be able to catch +## a NilAccessDefect all you have to do is to import this module. +## +## Tested on these OSes: Linux, Windows, OSX + +# xxx possibly broken on arm64, see bug #17178 + +{.used.} + +# do allocate memory upfront: +var se: ref NilAccessDefect +new(se) +se.name = "NilAccessDefect" +se.msg = "Could not access value because it is nil." + +when defined(windows): + include "../system/ansi_c" + + import std/winlean + + const + EXCEPTION_ACCESS_VIOLATION = DWORD(0xc0000005'i32) + EXCEPTION_CONTINUE_SEARCH = LONG(0) + + type + PEXCEPTION_RECORD = ptr object + exceptionCode: DWORD # other fields left out + + PEXCEPTION_POINTERS = ptr object + exceptionRecord: PEXCEPTION_RECORD + contextRecord: pointer + + VectoredHandler = proc (p: PEXCEPTION_POINTERS): LONG {.stdcall.} + proc addVectoredExceptionHandler(firstHandler: ULONG, + handler: VectoredHandler): pointer {. + importc: "AddVectoredExceptionHandler", stdcall, dynlib: "kernel32.dll".} + + {.push stackTrace: off.} + proc segfaultHandler(p: PEXCEPTION_POINTERS): LONG {.stdcall.} = + if p.exceptionRecord.exceptionCode == EXCEPTION_ACCESS_VIOLATION: + {.gcsafe.}: + raise se + else: + result = EXCEPTION_CONTINUE_SEARCH + {.pop.} + + discard addVectoredExceptionHandler(0, segfaultHandler) + + when false: + {.push stackTrace: off.} + proc segfaultHandler(sig: cint) {.noconv.} = + {.gcsafe.}: + rawRaise se + {.pop.} + c_signal(SIGSEGV, segfaultHandler) + +else: + import std/posix + + var sa: Sigaction + + var SEGV_MAPERR {.importc, header: "<signal.h>".}: cint + + {.push stackTrace: off.} + proc segfaultHandler(sig: cint, y: ptr SigInfo, z: pointer) {.noconv.} = + if y.si_code == SEGV_MAPERR: + {.gcsafe.}: + raise se + else: + quit(1) + {.pop.} + + discard sigemptyset(sa.sa_mask) + + sa.sa_sigaction = segfaultHandler + sa.sa_flags = SA_SIGINFO or SA_NODEFER + + discard sigaction(SIGSEGV, sa) diff --git a/lib/pure/selectors.nim b/lib/pure/selectors.nim index b6bc9dd3a..ac180e2bd 100644 --- a/lib/pure/selectors.nim +++ b/lib/pure/selectors.nim @@ -1,323 +1,375 @@ # # # Nim's Runtime Library -# (c) Copyright 2015 Dominik Picheta +# (c) Copyright 2016 Eugene Kabanov # # See the file "copying.txt", included in this # distribution, for details about the copyright. # -# TODO: Docs. +## This module allows high-level and efficient I/O multiplexing. +## +## Supported OS primitives: `epoll`, `kqueue`, `poll` and +## Windows `select`. +## +## To use threadsafe version of this module, it needs to be compiled +## with both `-d:threadsafe` and `--threads:on` options. +## +## Supported features: files, sockets, pipes, timers, processes, signals +## and user events. +## +## Fully supported OS: MacOSX, FreeBSD, OpenBSD, NetBSD, Linux (except +## for Android). +## +## Partially supported OS: Windows (only sockets and user events), +## Solaris (files, sockets, handles and user events). +## Android (files, sockets, handles and user events). +## +## TODO: `/dev/poll`, `event ports` and filesystem events. + +import std/nativesockets +import std/oserrors + +when defined(nimPreviewSlimSystem): + import std/assertions + +const hasThreadSupport = compileOption("threads") and defined(threadsafe) + +const ioselSupportedPlatform* = defined(macosx) or defined(freebsd) or + defined(netbsd) or defined(openbsd) or + defined(dragonfly) or defined(nuttx) or + (defined(linux) and not defined(android) and not defined(emscripten)) + ## This constant is used to determine whether the destination platform is + ## fully supported by `ioselectors` module. + +const bsdPlatform = defined(macosx) or defined(freebsd) or + defined(netbsd) or defined(openbsd) or + defined(dragonfly) -import tables, os, unsigned, hashes +when defined(nimdoc): + type + Selector*[T] = ref object + ## An object which holds descriptors to be checked for read/write status + + IOSelectorsException* = object of CatchableError + ## Exception that is raised if an IOSelectors error occurs. + + Event* {.pure.} = enum + ## An enum which hold event types + Read, ## Descriptor is available for read + Write, ## Descriptor is available for write + Timer, ## Timer descriptor is completed + Signal, ## Signal is raised + Process, ## Process is finished + Vnode, ## BSD specific file change + User, ## User event is raised + Error, ## Error occurred while waiting for descriptor + VnodeWrite, ## NOTE_WRITE (BSD specific, write to file occurred) + VnodeDelete, ## NOTE_DELETE (BSD specific, unlink of file occurred) + VnodeExtend, ## NOTE_EXTEND (BSD specific, file extended) + VnodeAttrib, ## NOTE_ATTRIB (BSD specific, file attributes changed) + VnodeLink, ## NOTE_LINK (BSD specific, file link count changed) + VnodeRename, ## NOTE_RENAME (BSD specific, file renamed) + VnodeRevoke ## NOTE_REVOKE (BSD specific, file revoke occurred) + + ReadyKey* = object + ## An object which holds result for descriptor + fd* : int ## file/socket descriptor + events*: set[Event] ## set of events + errorCode*: OSErrorCode ## additional error code information for + ## Error events + + SelectEvent* = object + ## An object which holds user defined event + + proc newSelector*[T](): Selector[T] = + ## Creates a new selector -when defined(linux): - import posix, epoll -elif defined(windows): - import winlean -else: - import posix + proc close*[T](s: Selector[T]) = + ## Closes the selector. -proc hash*(x: SocketHandle): THash {.borrow.} -proc `$`*(x: SocketHandle): string {.borrow.} + proc registerHandle*[T](s: Selector[T], fd: int | SocketHandle, + events: set[Event], data: T) = + ## Registers file/socket descriptor `fd` to selector `s` + ## with events set in `events`. The `data` is application-defined + ## data, which will be passed when an event is triggered. -type - Event* = enum - EvRead, EvWrite, EvError + proc updateHandle*[T](s: Selector[T], fd: int | SocketHandle, + events: set[Event]) = + ## Update file/socket descriptor `fd`, registered in selector + ## `s` with new events set `event`. - SelectorKey* = ref object - fd*: SocketHandle - events*: set[Event] ## The events which ``fd`` listens for. - data*: RootRef ## User object. + proc registerTimer*[T](s: Selector[T], timeout: int, oneshot: bool, + data: T): int {.discardable.} = + ## Registers timer notification with `timeout` (in milliseconds) + ## to selector `s`. + ## + ## If `oneshot` is `true`, timer will be notified only once. + ## + ## Set `oneshot` to `false` if you want periodic notifications. + ## + ## The `data` is application-defined data, which will be passed, when + ## the timer is triggered. + ## + ## Returns the file descriptor for the registered timer. - ReadyInfo* = tuple[key: SelectorKey, events: set[Event]] + proc registerSignal*[T](s: Selector[T], signal: int, + data: T): int {.discardable.} = + ## Registers Unix signal notification with `signal` to selector + ## `s`. + ## + ## The `data` is application-defined data, which will be + ## passed when signal raises. + ## + ## Returns the file descriptor for the registered signal. + ## + ## **Note:** This function is not supported on `Windows`. -when defined(nimdoc): - type - Selector* = ref object - ## An object which holds file descriptors to be checked for read/write - ## status. - fds: Table[SocketHandle, SelectorKey] + proc registerProcess*[T](s: Selector[T], pid: int, + data: T): int {.discardable.} = + ## Registers a process id (pid) notification (when process has + ## exited) in selector `s`. + ## + ## The `data` is application-defined data, which will be passed when + ## process with `pid` has exited. + ## + ## Returns the file descriptor for the registered signal. - proc register*(s: Selector, fd: SocketHandle, events: set[Event], - data: RootRef): SelectorKey {.discardable.} = - ## Registers file descriptor ``fd`` to selector ``s`` with a set of TEvent - ## ``events``. + proc registerEvent*[T](s: Selector[T], ev: SelectEvent, data: T) = + ## Registers selector event `ev` in selector `s`. + ## + ## The `data` is application-defined data, which will be passed when + ## `ev` happens. + + proc registerVnode*[T](s: Selector[T], fd: cint, events: set[Event], + data: T) = + ## Registers selector BSD/MacOSX specific vnode events for file + ## descriptor `fd` and events `events`. + ## `data` application-defined data, which to be passed, when + ## vnode event happens. + ## + ## **Note:** This function is supported only by BSD and MacOSX. - proc update*(s: Selector, fd: SocketHandle, - events: set[Event]): SelectorKey {.discardable.} = - ## Updates the events which ``fd`` wants notifications for. + proc newSelectEvent*(): SelectEvent = + ## Creates a new user-defined event. - proc unregister*(s: Selector, fd: SocketHandle): SelectorKey {.discardable.} = - ## Unregisters file descriptor ``fd`` from selector ``s``. + proc trigger*(ev: SelectEvent) = + ## Trigger event `ev`. - proc close*(s: Selector) = - ## Closes the selector + proc close*(ev: SelectEvent) = + ## Closes user-defined event `ev`. - proc select*(s: Selector, timeout: int): seq[ReadyInfo] = - ## The ``events`` field of the returned ``key`` contains the original events - ## for which the ``fd`` was bound. This is contrary to the ``events`` field - ## of the ``TReadyInfo`` tuple which determines which events are ready - ## on the ``fd``. + proc unregister*[T](s: Selector[T], ev: SelectEvent) = + ## Unregisters user-defined event `ev` from selector `s`. - proc newSelector*(): Selector = - ## Creates a new selector + proc unregister*[T](s: Selector[T], fd: int|SocketHandle|cint) = + ## Unregisters file/socket descriptor `fd` from selector `s`. - proc contains*(s: Selector, fd: SocketHandle): bool = - ## Determines whether selector contains a file descriptor. + proc selectInto*[T](s: Selector[T], timeout: int, + results: var openArray[ReadyKey]): int = + ## Waits for events registered in selector `s`. + ## + ## The `timeout` argument specifies the maximum number of milliseconds + ## the function will be blocked for if no events are ready. Specifying a + ## timeout of `-1` causes the function to block indefinitely. + ## All available events will be stored in `results` array. + ## + ## Returns number of triggered events. - proc `[]`*(s: Selector, fd: SocketHandle): SelectorKey = - ## Retrieves the selector key for ``fd``. + proc select*[T](s: Selector[T], timeout: int): seq[ReadyKey] = + ## Waits for events registered in selector `s`. + ## + ## The `timeout` argument specifies the maximum number of milliseconds + ## the function will be blocked for if no events are ready. Specifying a + ## timeout of `-1` causes the function to block indefinitely. + ## + ## Returns a list of triggered events. + proc getData*[T](s: Selector[T], fd: SocketHandle|int): var T = + ## Retrieves application-defined `data` associated with descriptor `fd`. + ## If specified descriptor `fd` is not registered, empty/default value + ## will be returned. -elif defined(linux): - type - Selector* = ref object - epollFD: cint - events: array[64, epoll_event] - fds: Table[SocketHandle, SelectorKey] - - proc createEventStruct(events: set[Event], fd: SocketHandle): epoll_event = - if EvRead in events: - result.events = EPOLLIN - if EvWrite in events: - result.events = result.events or EPOLLOUT - result.events = result.events or EPOLLRDHUP - result.data.fd = fd.cint - - proc register*(s: Selector, fd: SocketHandle, events: set[Event], - data: RootRef): SelectorKey {.discardable.} = - var event = createEventStruct(events, fd) - if events != {}: - if epoll_ctl(s.epollFD, EPOLL_CTL_ADD, fd, addr(event)) != 0: - raiseOSError(osLastError()) - - var key = SelectorKey(fd: fd, events: events, data: data) - - s.fds[fd] = key - result = key - - proc update*(s: Selector, fd: SocketHandle, - events: set[Event]): SelectorKey {.discardable.} = - if s.fds[fd].events != events: - if events == {}: - # This fd is idle -- it should not be registered to epoll. - # But it should remain a part of this selector instance. - # This is to prevent epoll_wait from returning immediately - # because its got fds which are waiting for no events and - # are therefore constantly ready. (leading to 100% CPU usage). - if epoll_ctl(s.epollFD, EPOLL_CTL_DEL, fd, nil) != 0: - raiseOSError(osLastError()) - s.fds[fd].events = events - else: - var event = createEventStruct(events, fd) - if s.fds[fd].events == {}: - # This fd is idle. It's not a member of this epoll instance and must - # be re-registered. - if epoll_ctl(s.epollFD, EPOLL_CTL_ADD, fd, addr(event)) != 0: - raiseOSError(osLastError()) - else: - if epoll_ctl(s.epollFD, EPOLL_CTL_MOD, fd, addr(event)) != 0: - raiseOSError(osLastError()) - s.fds[fd].events = events - - result = s.fds[fd] - - proc unregister*(s: Selector, fd: SocketHandle): SelectorKey {.discardable.} = - if epoll_ctl(s.epollFD, EPOLL_CTL_DEL, fd, nil) != 0: - let err = osLastError() - if err.cint notin {ENOENT, EBADF}: # TODO: Why do we sometimes get an EBADF? Is this normal? - raiseOSError(err) - result = s.fds[fd] - s.fds.del(fd) - - proc close*(s: Selector) = - if s.epollFD.close() != 0: raiseOSError(osLastError()) - dealloc(addr s.events) # TODO: Test this - - proc epollHasFd(s: Selector, fd: SocketHandle): bool = - result = true - var event = createEventStruct(s.fds[fd].events, fd) - if epoll_ctl(s.epollFD, EPOLL_CTL_MOD, fd, addr(event)) != 0: - let err = osLastError() - if err.cint in {ENOENT, EBADF}: - return false - raiseOSError(osLastError()) - - proc select*(s: Selector, timeout: int): seq[ReadyInfo] = + proc setData*[T](s: Selector[T], fd: SocketHandle|int, data: var T): bool = + ## Associate application-defined `data` with descriptor `fd`. ## - ## The ``events`` field of the returned ``key`` contains the original events - ## for which the ``fd`` was bound. This is contrary to the ``events`` field - ## of the ``TReadyInfo`` tuple which determines which events are ready - ## on the ``fd``. - result = @[] - let evNum = epoll_wait(s.epollFD, addr s.events[0], 64.cint, timeout.cint) - if evNum < 0: - let err = osLastError() - if err.cint == EINTR: - return @[] - raiseOSError(osLastError()) - if evNum == 0: return @[] - for i in 0 .. <evNum: - let fd = s.events[i].data.fd.SocketHandle - - var evSet: set[Event] = {} - if (s.events[i].events and EPOLLERR) != 0 or (s.events[i].events and EPOLLHUP) != 0: evSet = evSet + {EvError} - if (s.events[i].events and EPOLLIN) != 0: evSet = evSet + {EvRead} - if (s.events[i].events and EPOLLOUT) != 0: evSet = evSet + {EvWrite} - let selectorKey = s.fds[fd] - assert selectorKey != nil - result.add((selectorKey, evSet)) - - #echo("Epoll: ", result[i].key.fd, " ", result[i].events, " ", result[i].key.events) - - proc newSelector*(): Selector = - new result - result.epollFD = epoll_create(64) - #result.events = cast[array[64, epoll_event]](alloc0(sizeof(epoll_event)*64)) - result.fds = initTable[SocketHandle, SelectorKey]() - if result.epollFD < 0: - raiseOSError(osLastError()) - - proc contains*(s: Selector, fd: SocketHandle): bool = - ## Determines whether selector contains a file descriptor. - if s.fds.hasKey(fd): - # Ensure the underlying epoll instance still contains this fd. - if s.fds[fd].events != {}: - result = epollHasFd(s, fd) - else: - result = true - else: - return false + ## Returns `true`, if data was successfully updated, `false` otherwise. - proc `[]`*(s: Selector, fd: SocketHandle): SelectorKey = - ## Retrieves the selector key for ``fd``. - return s.fds[fd] + template isEmpty*[T](s: Selector[T]): bool = # TODO: Why is this a template? + ## Returns `true`, if there are no registered events or descriptors + ## in selector. -elif not defined(nimdoc): - # TODO: kqueue for bsd/mac os x. - type - Selector* = ref object - fds: Table[SocketHandle, SelectorKey] - - proc register*(s: Selector, fd: SocketHandle, events: set[Event], - data: RootRef): SelectorKey {.discardable.} = - if s.fds.hasKey(fd): - raise newException(ValueError, "File descriptor already exists.") - var sk = SelectorKey(fd: fd, events: events, data: data) - s.fds[fd] = sk - result = sk - - proc update*(s: Selector, fd: SocketHandle, - events: set[Event]): SelectorKey {.discardable.} = - if not s.fds.hasKey(fd): - raise newException(ValueError, "File descriptor not found.") - - s.fds[fd].events = events - result = s.fds[fd] - - proc unregister*(s: Selector, fd: SocketHandle): SelectorKey {.discardable.} = - result = s.fds[fd] - s.fds.del(fd) - - proc close*(s: Selector) = discard - - proc timeValFromMilliseconds(timeout: int): TimeVal = - if timeout != -1: - var seconds = timeout div 1000 - result.tv_sec = seconds.int32 - result.tv_usec = ((timeout - seconds * 1000) * 1000).int32 - - proc createFdSet(rd, wr: var TFdSet, fds: Table[SocketHandle, SelectorKey], - m: var int) = - FD_ZERO(rd); FD_ZERO(wr) - for k, v in pairs(fds): - if EvRead in v.events: - m = max(m, int(k)) - FD_SET(k, rd) - if EvWrite in v.events: - m = max(m, int(k)) - FD_SET(k, wr) - - proc getReadyFDs(rd, wr: var TFdSet, fds: Table[SocketHandle, SelectorKey]): - seq[ReadyInfo] = - result = @[] - for k, v in pairs(fds): - var events: set[Event] = {} - if FD_ISSET(k, rd) != 0'i32: - events = events + {EvRead} - if FD_ISSET(k, wr) != 0'i32: - events = events + {EvWrite} - result.add((v, events)) - - proc select(fds: Table[SocketHandle, SelectorKey], timeout = 500): - seq[ReadyInfo] = - var tv {.noInit.}: TimeVal = timeValFromMilliseconds(timeout) - - var rd, wr: TFdSet - var m = 0 - createFdSet(rd, wr, fds, m) - - var retCode = 0 - if timeout != -1: - retCode = int(select(cint(m+1), addr(rd), addr(wr), nil, addr(tv))) - else: - retCode = int(select(cint(m+1), addr(rd), addr(wr), nil, nil)) - - if retCode < 0: - raiseOSError(osLastError()) - elif retCode == 0: - return @[] - else: - return getReadyFDs(rd, wr, fds) - - proc select*(s: Selector, timeout: int): seq[ReadyInfo] = - result = select(s.fds, timeout) + template withData*[T](s: Selector[T], fd: SocketHandle|int, value, + body: untyped) = + ## Retrieves the application-data assigned with descriptor `fd` + ## to `value`. This `value` can be modified in the scope of + ## the `withData` call. + ## + ## ```nim + ## s.withData(fd, value) do: + ## # block is executed only if `fd` registered in selector `s` + ## value.uid = 1000 + ## ``` + + template withData*[T](s: Selector[T], fd: SocketHandle|int, value, + body1, body2: untyped) = + ## Retrieves the application-data assigned with descriptor `fd` + ## to `value`. This `value` can be modified in the scope of + ## the `withData` call. + ## + ## ```nim + ## s.withData(fd, value) do: + ## # block is executed only if `fd` registered in selector `s`. + ## value.uid = 1000 + ## do: + ## # block is executed if `fd` not registered in selector `s`. + ## raise + ## ``` + + proc contains*[T](s: Selector[T], fd: SocketHandle|int): bool {.inline.} = + ## Determines whether selector contains a file descriptor. - proc newSelector*(): Selector = - new result - result.fds = initTable[SocketHandle, SelectorKey]() + proc getFd*[T](s: Selector[T]): int = + ## Retrieves the underlying selector's file descriptor. + ## + ## For *poll* and *select* selectors `-1` is returned. - proc contains*(s: Selector, fd: SocketHandle): bool = - return s.fds.hasKey(fd) +else: + import std/strutils + when hasThreadSupport: + import std/locks - proc `[]`*(s: Selector, fd: SocketHandle): SelectorKey = - return s.fds[fd] + type + SharedArray[T] = UncheckedArray[T] -proc contains*(s: Selector, key: SelectorKey): bool = - ## Determines whether selector contains this selector key. More accurate - ## than checking if the file descriptor is in the selector because it - ## ensures that the keys are equal. File descriptors may not always be - ## unique especially when an fd is closed and then a new one is opened, - ## the new one may have the same value. - return key.fd in s and s.fds[key.fd] == key + proc allocSharedArray[T](nsize: int): ptr SharedArray[T] = + result = cast[ptr SharedArray[T]](allocShared0(sizeof(T) * nsize)) -{.deprecated: [TEvent: Event, PSelectorKey: SelectorKey, - TReadyInfo: ReadyInfo, PSelector: Selector].} + proc reallocSharedArray[T](sa: ptr SharedArray[T], oldsize, nsize: int): ptr SharedArray[T] = + result = cast[ptr SharedArray[T]](reallocShared0(sa, oldsize * sizeof(T), sizeof(T) * nsize)) + proc deallocSharedArray[T](sa: ptr SharedArray[T]) = + deallocShared(cast[pointer](sa)) + type + Event* {.pure.} = enum + Read, Write, Timer, Signal, Process, Vnode, User, Error, Oneshot, + Finished, VnodeWrite, VnodeDelete, VnodeExtend, VnodeAttrib, VnodeLink, + VnodeRename, VnodeRevoke -when isMainModule and not defined(nimdoc): - # Select() - import sockets type - SockWrapper = ref object of RootObj - sock: Socket - - var sock = socket() - if sock == sockets.invalidSocket: raiseOSError(osLastError()) - #sock.setBlocking(false) - sock.connect("irc.freenode.net", Port(6667)) - - var selector = newSelector() - var data = SockWrapper(sock: sock) - let key = selector.register(sock.getFD, {EvWrite}, data) - var i = 0 - while true: - let ready = selector.select(1000) - echo ready.len - if ready.len > 0: echo ready[0].events - i.inc - if i == 6: - assert selector.unregister(sock.getFD).fd == sock.getFD - selector.close() - break + IOSelectorsException* = object of CatchableError + + ReadyKey* = object + fd*: int + events*: set[Event] + errorCode*: OSErrorCode + + SelectorKey[T] = object + ident: int + events: set[Event] + param: int + data: T + + const + InvalidIdent = -1 + + proc raiseIOSelectorsError[T](message: T) = + var msg = "" + when T is string: + msg.add(message) + elif T is OSErrorCode: + msg.add(osErrorMsg(message) & " (code: " & $int(message) & ")") + else: + msg.add("Internal Error\n") + var err = newException(IOSelectorsException, msg) + raise err + + proc setNonBlocking(fd: cint) {.inline.} = + setBlocking(fd.SocketHandle, false) + + when not defined(windows): + import std/posix + + template setKey(s, pident, pevents, pparam, pdata: untyped) = + var skey = addr(s.fds[pident]) + skey.ident = pident + skey.events = pevents + skey.param = pparam + skey.data = pdata + + when ioselSupportedPlatform: + template blockSignals(newmask: var Sigset, oldmask: var Sigset) = + when hasThreadSupport: + if posix.pthread_sigmask(SIG_BLOCK, newmask, oldmask) == -1: + raiseIOSelectorsError(osLastError()) + else: + if posix.sigprocmask(SIG_BLOCK, newmask, oldmask) == -1: + raiseIOSelectorsError(osLastError()) + + template unblockSignals(newmask: var Sigset, oldmask: var Sigset) = + when hasThreadSupport: + if posix.pthread_sigmask(SIG_UNBLOCK, newmask, oldmask) == -1: + raiseIOSelectorsError(osLastError()) + else: + if posix.sigprocmask(SIG_UNBLOCK, newmask, oldmask) == -1: + raiseIOSelectorsError(osLastError()) + + template clearKey[T](key: ptr SelectorKey[T]) = + var empty: T + key.ident = InvalidIdent + key.events = {} + key.data = empty + + proc verifySelectParams(timeout: int) = + # Timeout of -1 means: wait forever + # Anything higher is the time to wait in milliseconds. + doAssert(timeout >= -1, "Cannot select with a negative value, got: " & $timeout) + + when defined(linux) or defined(windows) or defined(macosx) or defined(bsd) or + defined(solaris) or defined(zephyr) or defined(freertos) or defined(nuttx) or defined(haiku): + template maxDescriptors*(): int = + ## Returns the maximum number of active file descriptors for the current + ## process. This involves a system call. For now `maxDescriptors` is + ## supported on the following OSes: Windows, Linux, OSX, BSD, Solaris. + when defined(windows): + 16_700_000 + elif defined(zephyr) or defined(freertos): + FD_MAX + else: + var fdLim: RLimit + var res = int(getrlimit(RLIMIT_NOFILE, fdLim)) + if res >= 0: + res = int(fdLim.rlim_cur) - 1 + res + + when defined(nimIoselector): + when nimIoselector == "epoll": + include ioselects/ioselectors_epoll + elif nimIoselector == "kqueue": + include ioselects/ioselectors_kqueue + elif nimIoselector == "poll": + include ioselects/ioselectors_poll + elif nimIoselector == "select": + include ioselects/ioselectors_select + else: + {.fatal: "Unknown nimIoselector specified by define.".} + elif defined(linux) and not defined(emscripten): + include ioselects/ioselectors_epoll + elif bsdPlatform: + include ioselects/ioselectors_kqueue + elif defined(windows): + include ioselects/ioselectors_select + elif defined(solaris): + include ioselects/ioselectors_poll # need to replace it with event ports + elif defined(genode): + include ioselects/ioselectors_select # TODO: use the native VFS layer + elif defined(nintendoswitch): + include ioselects/ioselectors_select + elif defined(freertos) or defined(lwip): + include ioselects/ioselectors_select + elif defined(zephyr): + include ioselects/ioselectors_poll + elif defined(nuttx): + include ioselects/ioselectors_epoll + else: + include ioselects/ioselectors_poll diff --git a/lib/pure/smtp.nim b/lib/pure/smtp.nim deleted file mode 100644 index 81198f9e1..000000000 --- a/lib/pure/smtp.nim +++ /dev/null @@ -1,277 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2012 Dominik Picheta -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module implements the SMTP client protocol as specified by RFC 5321, -## this can be used to send mail to any SMTP Server. -## -## This module also implements the protocol used to format messages, -## as specified by RFC 2822. -## -## Example gmail use: -## -## -## .. code-block:: Nim -## var msg = createMessage("Hello from Nim's SMTP", -## "Hello!.\n Is this awesome or what?", -## @["foo@gmail.com"]) -## var smtp = connect("smtp.gmail.com", 465, true, true) -## smtp.auth("username", "password") -## smtp.sendmail("username@gmail.com", @["foo@gmail.com"], $msg) -## -## -## For SSL support this module relies on OpenSSL. If you want to -## enable SSL, compile with ``-d:ssl``. - -import net, strutils, strtabs, base64, os -import asyncnet, asyncdispatch - -type - Smtp* = object - sock: Socket - debug: bool - - Message* = object - msgTo: seq[string] - msgCc: seq[string] - msgSubject: string - msgOtherHeaders: StringTableRef - msgBody: string - - ReplyError* = object of IOError - - AsyncSmtp* = ref object - sock: AsyncSocket - address: string - port: Port - useSsl: bool - debug: bool - -{.deprecated: [EInvalidReply: ReplyError, TMessage: Message, TSMTP: Smtp].} - -proc debugSend(smtp: Smtp, cmd: string) = - if smtp.debug: - echo("C:" & cmd) - smtp.sock.send(cmd) - -proc debugRecv(smtp: var Smtp): TaintedString = - var line = TaintedString"" - smtp.sock.readLine(line) - - if smtp.debug: - echo("S:" & line.string) - return line - -proc quitExcpt(smtp: Smtp, msg: string) = - smtp.debugSend("QUIT") - raise newException(ReplyError, msg) - -proc checkReply(smtp: var Smtp, reply: string) = - var line = smtp.debugRecv() - if not line.string.startswith(reply): - quitExcpt(smtp, "Expected " & reply & " reply, got: " & line.string) - -const compiledWithSsl = defined(ssl) - -when not defined(ssl): - type PSSLContext = ref object - let defaultSSLContext: PSSLContext = nil -else: - let defaultSSLContext = newContext(verifyMode = CVerifyNone) - -proc connect*(address: string, port = Port(25), - ssl = false, debug = false, - sslContext = defaultSSLContext): Smtp = - ## Establishes a connection with a SMTP server. - ## May fail with ReplyError or with a socket error. - result.sock = newSocket() - if ssl: - when compiledWithSsl: - sslContext.wrapSocket(result.sock) - else: - raise newException(ESystem, - "SMTP module compiled without SSL support") - result.sock.connect(address, port) - result.debug = debug - - result.checkReply("220") - result.debugSend("HELO " & address & "\c\L") - result.checkReply("250") - -proc auth*(smtp: var Smtp, username, password: string) = - ## Sends an AUTH command to the server to login as the `username` - ## using `password`. - ## May fail with ReplyError. - - smtp.debugSend("AUTH LOGIN\c\L") - smtp.checkReply("334") # TODO: Check whether it's asking for the "Username:" - # i.e "334 VXNlcm5hbWU6" - smtp.debugSend(encode(username) & "\c\L") - smtp.checkReply("334") # TODO: Same as above, only "Password:" (I think?) - - smtp.debugSend(encode(password) & "\c\L") - smtp.checkReply("235") # Check whether the authentification was successful. - -proc sendmail*(smtp: var Smtp, fromaddr: string, - toaddrs: seq[string], msg: string) = - ## Sends `msg` from `fromaddr` to `toaddr`. - ## Messages may be formed using ``createMessage`` by converting the - ## Message into a string. - - smtp.debugSend("MAIL FROM:<" & fromaddr & ">\c\L") - smtp.checkReply("250") - for address in items(toaddrs): - smtp.debugSend("RCPT TO:<" & address & ">\c\L") - smtp.checkReply("250") - - # Send the message - smtp.debugSend("DATA " & "\c\L") - smtp.checkReply("354") - smtp.debugSend(msg & "\c\L") - smtp.debugSend(".\c\L") - smtp.checkReply("250") - -proc close*(smtp: Smtp) = - ## Disconnects from the SMTP server and closes the socket. - smtp.debugSend("QUIT\c\L") - smtp.sock.close() - -proc createMessage*(mSubject, mBody: string, mTo, mCc: seq[string], - otherHeaders: openarray[tuple[name, value: string]]): Message = - ## Creates a new MIME compliant message. - result.msgTo = mTo - result.msgCc = mCc - result.msgSubject = mSubject - result.msgBody = mBody - result.msgOtherHeaders = newStringTable() - for n, v in items(otherHeaders): - result.msgOtherHeaders[n] = v - -proc createMessage*(mSubject, mBody: string, mTo, - mCc: seq[string] = @[]): Message = - ## Alternate version of the above. - result.msgTo = mTo - result.msgCc = mCc - result.msgSubject = mSubject - result.msgBody = mBody - result.msgOtherHeaders = newStringTable() - -proc `$`*(msg: Message): string = - ## stringify for ``Message``. - result = "" - if msg.msgTo.len() > 0: - result = "TO: " & msg.msgTo.join(", ") & "\c\L" - if msg.msgCc.len() > 0: - result.add("CC: " & msg.msgCc.join(", ") & "\c\L") - # TODO: Folding? i.e when a line is too long, shorten it... - result.add("Subject: " & msg.msgSubject & "\c\L") - for key, value in pairs(msg.msgOtherHeaders): - result.add(key & ": " & value & "\c\L") - - result.add("\c\L") - result.add(msg.msgBody) - -proc newAsyncSmtp*(address: string, port: Port, useSsl = false, - sslContext = defaultSslContext): AsyncSmtp = - ## Creates a new ``AsyncSmtp`` instance. - new result - result.address = address - result.port = port - result.useSsl = useSsl - - result.sock = newAsyncSocket() - if useSsl: - when compiledWithSsl: - sslContext.wrapSocket(result.sock) - else: - raise newException(ESystem, - "SMTP module compiled without SSL support") - -proc quitExcpt(smtp: AsyncSmtp, msg: string): Future[void] = - var retFuture = newFuture[void]() - var sendFut = smtp.sock.send("QUIT") - sendFut.callback = - proc () = - # TODO: Fix this in async procs. - raise newException(ReplyError, msg) - return retFuture - -proc checkReply(smtp: AsyncSmtp, reply: string) {.async.} = - var line = await smtp.sock.recvLine() - if not line.string.startswith(reply): - await quitExcpt(smtp, "Expected " & reply & " reply, got: " & line.string) - -proc connect*(smtp: AsyncSmtp) {.async.} = - ## Establishes a connection with a SMTP server. - ## May fail with ReplyError or with a socket error. - await smtp.sock.connect(smtp.address, smtp.port) - - await smtp.checkReply("220") - await smtp.sock.send("HELO " & smtp.address & "\c\L") - await smtp.checkReply("250") - -proc auth*(smtp: AsyncSmtp, username, password: string) {.async.} = - ## Sends an AUTH command to the server to login as the `username` - ## using `password`. - ## May fail with ReplyError. - - await smtp.sock.send("AUTH LOGIN\c\L") - await smtp.checkReply("334") # TODO: Check whether it's asking for the "Username:" - # i.e "334 VXNlcm5hbWU6" - await smtp.sock.send(encode(username) & "\c\L") - await smtp.checkReply("334") # TODO: Same as above, only "Password:" (I think?) - - await smtp.sock.send(encode(password) & "\c\L") - await smtp.checkReply("235") # Check whether the authentification was successful. - -proc sendMail*(smtp: AsyncSmtp, fromAddr: string, - toAddrs: seq[string], msg: string) {.async.} = - ## Sends ``msg`` from ``fromAddr`` to the addresses specified in ``toAddrs``. - ## Messages may be formed using ``createMessage`` by converting the - ## Message into a string. - - await smtp.sock.send("MAIL FROM:<" & fromAddr & ">\c\L") - await smtp.checkReply("250") - for address in items(toAddrs): - await smtp.sock.send("RCPT TO:<" & address & ">\c\L") - await smtp.checkReply("250") - - # Send the message - await smtp.sock.send("DATA " & "\c\L") - await smtp.checkReply("354") - await smtp.sock.send(msg & "\c\L") - await smtp.sock.send(".\c\L") - await smtp.checkReply("250") - -proc close*(smtp: AsyncSmtp) {.async.} = - ## Disconnects from the SMTP server and closes the socket. - await smtp.sock.send("QUIT\c\L") - smtp.sock.close() - -when isMainModule: - #var msg = createMessage("Test subject!", - # "Hello, my name is dom96.\n What\'s yours?", @["dominik@localhost"]) - #echo(msg) - - #var smtp = connect("localhost", 25, False, True) - #smtp.sendmail("root@localhost", @["dominik@localhost"], $msg) - - #echo(decode("a17sm3701420wbe.12")) - proc main() {.async.} = - var client = newAsyncSmtp("smtp.gmail.com", Port(465), true) - await client.connect() - await client.auth("johndoe", "foo") - var msg = createMessage("Hello from Nim's SMTP!", - "Hello!!!!.\n Is this awesome or what?", - @["blah@gmail.com"]) - echo(msg) - await client.sendMail("blah@gmail.com", @["blah@gmail.com"], $msg) - - await client.close() - - waitFor main() diff --git a/lib/pure/smtp.nim.cfg b/lib/pure/smtp.nim.cfg deleted file mode 100644 index 521e21de4..000000000 --- a/lib/pure/smtp.nim.cfg +++ /dev/null @@ -1 +0,0 @@ --d:ssl diff --git a/lib/pure/sockets.nim b/lib/pure/sockets.nim deleted file mode 100644 index 3afb545c8..000000000 --- a/lib/pure/sockets.nim +++ /dev/null @@ -1,1740 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2013 Andreas Rumpf, Dominik Picheta -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## **Warning:** Since version 0.10.2 this module is deprecated. -## Use the `net <net.html>`_ or the -## `rawsockets <rawsockets.html>`_ module instead. -## -## This module implements portable sockets, it supports a mix of different types -## of sockets. Sockets are buffered by default meaning that data will be -## received in ``BufferSize`` (4000) sized chunks, buffering -## behaviour can be disabled by setting the ``buffered`` parameter when calling -## the ``socket`` function to `false`. Be aware that some functions may not yet -## support buffered sockets (mainly the recvFrom function). -## -## Most procedures raise OSError on error, but some may return ``-1`` or a -## boolean ``false``. -## -## SSL is supported through the OpenSSL library. This support can be activated -## by compiling with the ``-d:ssl`` switch. When an SSL socket is used it will -## raise ESSL exceptions when SSL errors occur. -## -## Asynchronous sockets are supported, however a better alternative is to use -## the `asyncio <asyncio.html>`_ module. - -{.deprecated.} - -include "system/inclrtl" - -{.deadCodeElim: on.} - -when hostOS == "solaris": - {.passl: "-lsocket -lnsl".} - -import os, parseutils -from times import epochTime -import unsigned - -when defined(ssl): - import openssl - -when defined(Windows): - import winlean -else: - import posix - -# Note: The enumerations are mapped to Window's constants. - -when defined(ssl): - - type - SSLError* = object of Exception - - SSLCVerifyMode* = enum - CVerifyNone, CVerifyPeer - - SSLProtVersion* = enum - protSSLv2, protSSLv3, protTLSv1, protSSLv23 - - SSLContext* = distinct SSLCTX - - SSLAcceptResult* = enum - AcceptNoClient = 0, AcceptNoHandshake, AcceptSuccess - - {.deprecated: [ESSL: SSLError, TSSLCVerifyMode: SSLCVerifyMode, - TSSLProtVersion: SSLProtVersion, PSSLContext: SSLContext, - TSSLAcceptResult: SSLAcceptResult].} - -const - BufferSize*: int = 4000 ## size of a buffered socket's buffer - -type - TSocketImpl = object ## socket type - fd: SocketHandle - case isBuffered: bool # determines whether this socket is buffered. - of true: - buffer: array[0..BufferSize, char] - currPos: int # current index in buffer - bufLen: int # current length of buffer - of false: nil - when defined(ssl): - case isSsl: bool - of true: - sslHandle: SSLPtr - sslContext: SSLContext - sslNoHandshake: bool # True if needs handshake. - sslHasPeekChar: bool - sslPeekChar: char - of false: nil - nonblocking: bool - - Socket* = ref TSocketImpl - - Port* = distinct uint16 ## port type - - Domain* = enum ## domain, which specifies the protocol family of the - ## created socket. Other domains than those that are listed - ## here are unsupported. - AF_UNIX, ## for local socket (using a file). Unsupported on Windows. - AF_INET = 2, ## for network protocol IPv4 or - AF_INET6 = 23 ## for network protocol IPv6. - - SockType* = enum ## second argument to `socket` proc - SOCK_STREAM = 1, ## reliable stream-oriented service or Stream Sockets - SOCK_DGRAM = 2, ## datagram service or Datagram Sockets - SOCK_RAW = 3, ## raw protocols atop the network layer. - SOCK_SEQPACKET = 5 ## reliable sequenced packet service - - Protocol* = enum ## third argument to `socket` proc - IPPROTO_TCP = 6, ## Transmission control protocol. - IPPROTO_UDP = 17, ## User datagram protocol. - IPPROTO_IP, ## Internet protocol. Unsupported on Windows. - IPPROTO_IPV6, ## Internet Protocol Version 6. Unsupported on Windows. - IPPROTO_RAW, ## Raw IP Packets Protocol. Unsupported on Windows. - IPPROTO_ICMP ## Control message protocol. Unsupported on Windows. - - Servent* = object ## information about a service - name*: string - aliases*: seq[string] - port*: Port - proto*: string - - Hostent* = object ## information about a given host - name*: string - aliases*: seq[string] - addrtype*: Domain - length*: int - addrList*: seq[string] - - SOBool* = enum ## Boolean socket options. - OptAcceptConn, OptBroadcast, OptDebug, OptDontRoute, OptKeepAlive, - OptOOBInline, OptReuseAddr - - RecvLineResult* = enum ## result for recvLineAsync - RecvFullLine, RecvPartialLine, RecvDisconnected, RecvFail - - ReadLineResult* = enum ## result for readLineAsync - ReadFullLine, ReadPartialLine, ReadDisconnected, ReadNone - - TimeoutError* = object of Exception - -{.deprecated: [TSocket: Socket, TType: SockType, TPort: Port, TDomain: Domain, - TProtocol: Protocol, TServent: Servent, THostent: Hostent, - TSOBool: SOBool, TRecvLineResult: RecvLineResult, - TReadLineResult: ReadLineResult, ETimeout: TimeoutError].} - -when defined(booting): - let invalidSocket*: Socket = nil ## invalid socket -else: - const invalidSocket*: Socket = nil ## invalid socket - -when defined(windows): - let - osInvalidSocket = winlean.INVALID_SOCKET -else: - let - osInvalidSocket = posix.INVALID_SOCKET - -proc newTSocket(fd: SocketHandle, isBuff: bool): Socket = - if fd == osInvalidSocket: - return nil - new(result) - result.fd = fd - result.isBuffered = isBuff - if isBuff: - result.currPos = 0 - result.nonblocking = false - -proc `==`*(a, b: Port): bool {.borrow.} - ## ``==`` for ports. - -proc `$`*(p: Port): string {.borrow.} - ## returns the port number as a string - -proc ntohl*(x: int32): int32 = - ## Converts 32-bit integers from network to host byte order. - ## On machines where the host byte order is the same as network byte order, - ## this is a no-op; otherwise, it performs a 4-byte swap operation. - when cpuEndian == bigEndian: result = x - else: result = (x shr 24'i32) or - (x shr 8'i32 and 0xff00'i32) or - (x shl 8'i32 and 0xff0000'i32) or - (x shl 24'i32) - -proc ntohs*(x: int16): int16 = - ## Converts 16-bit integers from network to host byte order. On machines - ## where the host byte order is the same as network byte order, this is - ## a no-op; otherwise, it performs a 2-byte swap operation. - when cpuEndian == bigEndian: result = x - else: result = (x shr 8'i16) or (x shl 8'i16) - -proc htonl*(x: int32): int32 = - ## Converts 32-bit integers from host to network byte order. On machines - ## where the host byte order is the same as network byte order, this is - ## a no-op; otherwise, it performs a 4-byte swap operation. - result = sockets.ntohl(x) - -proc htons*(x: int16): int16 = - ## Converts 16-bit positive integers from host to network byte order. - ## On machines where the host byte order is the same as network byte - ## order, this is a no-op; otherwise, it performs a 2-byte swap operation. - result = sockets.ntohs(x) - -when defined(Posix): - proc toInt(domain: Domain): cint = - case domain - of AF_UNIX: result = posix.AF_UNIX - of AF_INET: result = posix.AF_INET - of AF_INET6: result = posix.AF_INET6 - else: discard - - proc toInt(typ: SockType): cint = - case typ - of SOCK_STREAM: result = posix.SOCK_STREAM - of SOCK_DGRAM: result = posix.SOCK_DGRAM - of SOCK_SEQPACKET: result = posix.SOCK_SEQPACKET - of SOCK_RAW: result = posix.SOCK_RAW - else: discard - - proc toInt(p: Protocol): cint = - case p - of IPPROTO_TCP: result = posix.IPPROTO_TCP - of IPPROTO_UDP: result = posix.IPPROTO_UDP - of IPPROTO_IP: result = posix.IPPROTO_IP - of IPPROTO_IPV6: result = posix.IPPROTO_IPV6 - of IPPROTO_RAW: result = posix.IPPROTO_RAW - of IPPROTO_ICMP: result = posix.IPPROTO_ICMP - else: discard - -else: - proc toInt(domain: Domain): cint = - result = toU16(ord(domain)) - - proc toInt(typ: SockType): cint = - result = cint(ord(typ)) - - proc toInt(p: Protocol): cint = - result = cint(ord(p)) - -proc socket*(domain: Domain = AF_INET, typ: SockType = SOCK_STREAM, - protocol: Protocol = IPPROTO_TCP, buffered = true): Socket = - ## Creates a new socket; returns `InvalidSocket` if an error occurs. - - # TODO: Perhaps this should just raise EOS when an error occurs. - when defined(Windows): - result = newTSocket(winlean.socket(ord(domain), ord(typ), ord(protocol)), buffered) - else: - result = newTSocket(posix.socket(toInt(domain), toInt(typ), toInt(protocol)), buffered) - -when defined(ssl): - CRYPTO_malloc_init() - SslLibraryInit() - SslLoadErrorStrings() - ErrLoadBioStrings() - OpenSSL_add_all_algorithms() - - proc raiseSSLError(s = "") = - if s != "": - raise newException(SSLError, s) - let err = ErrPeekLastError() - if err == 0: - raise newException(SSLError, "No error reported.") - if err == -1: - raiseOSError(osLastError()) - var errStr = ErrErrorString(err, nil) - raise newException(SSLError, $errStr) - - # http://simplestcodings.blogspot.co.uk/2010/08/secure-server-client-using-openssl-in-c.html - proc loadCertificates(ctx: SSL_CTX, certFile, keyFile: string) = - if certFile != "" and not existsFile(certFile): - raise newException(system.IOError, "Certificate file could not be found: " & certFile) - if keyFile != "" and not existsFile(keyFile): - raise newException(system.IOError, "Key file could not be found: " & keyFile) - - if certFile != "": - var ret = SSLCTXUseCertificateChainFile(ctx, certFile) - if ret != 1: - raiseSslError() - - # TODO: Password? www.rtfm.com/openssl-examples/part1.pdf - if keyFile != "": - if SSL_CTX_use_PrivateKey_file(ctx, keyFile, - SSL_FILETYPE_PEM) != 1: - raiseSslError() - - if SSL_CTX_check_private_key(ctx) != 1: - raiseSslError("Verification of private key file failed.") - - proc newContext*(protVersion = protSSLv23, verifyMode = CVerifyPeer, - certFile = "", keyFile = ""): SSLContext = - ## Creates an SSL context. - ## - ## Protocol version specifies the protocol to use. SSLv2, SSLv3, TLSv1 are - ## are available with the addition of ``ProtSSLv23`` which allows for - ## compatibility with all of them. - ## - ## There are currently only two options for verify mode; - ## one is ``CVerifyNone`` and with it certificates will not be verified - ## the other is ``CVerifyPeer`` and certificates will be verified for - ## it, ``CVerifyPeer`` is the safest choice. - ## - ## The last two parameters specify the certificate file path and the key file - ## path, a server socket will most likely not work without these. - ## Certificates can be generated using the following command: - ## ``openssl req -x509 -nodes -days 365 -newkey rsa:1024 -keyout mycert.pem -out mycert.pem``. - var newCTX: SSL_CTX - case protVersion - of protSSLv23: - newCTX = SSL_CTX_new(SSLv23_method()) # SSlv2,3 and TLS1 support. - of protSSLv2: - when not defined(linux) and not defined(OpenBSD): - newCTX = SSL_CTX_new(SSLv2_method()) - else: - raiseSslError() - of protSSLv3: - newCTX = SSL_CTX_new(SSLv3_method()) - of protTLSv1: - newCTX = SSL_CTX_new(TLSv1_method()) - - if newCTX.SSLCTXSetCipherList("ALL") != 1: - raiseSslError() - case verifyMode - of CVerifyPeer: - newCTX.SSLCTXSetVerify(SSLVerifyPeer, nil) - of CVerifyNone: - newCTX.SSLCTXSetVerify(SSLVerifyNone, nil) - if newCTX == nil: - raiseSslError() - - discard newCTX.SSLCTXSetMode(SSL_MODE_AUTO_RETRY) - newCTX.loadCertificates(certFile, keyFile) - return SSLContext(newCTX) - - proc wrapSocket*(ctx: SSLContext, socket: Socket) = - ## Wraps a socket in an SSL context. This function effectively turns - ## ``socket`` into an SSL socket. - ## - ## **Disclaimer**: This code is not well tested, may be very unsafe and - ## prone to security vulnerabilities. - - socket.isSSL = true - socket.sslContext = ctx - socket.sslHandle = SSLNew(SSLCTX(socket.sslContext)) - socket.sslNoHandshake = false - socket.sslHasPeekChar = false - if socket.sslHandle == nil: - raiseSslError() - - if SSLSetFd(socket.sslHandle, socket.fd) != 1: - raiseSslError() - -proc raiseSocketError*(socket: Socket, err: int = -1, async = false) = - ## Raises proper errors based on return values of ``recv`` functions. - ## - ## If ``async`` is ``True`` no error will be thrown in the case when the - ## error was caused by no data being available to be read. - ## - ## If ``err`` is not lower than 0 no exception will be raised. - when defined(ssl): - if socket.isSSL: - if err <= 0: - var ret = SSLGetError(socket.sslHandle, err.cint) - case ret - of SSL_ERROR_ZERO_RETURN: - raiseSslError("TLS/SSL connection failed to initiate, socket closed prematurely.") - of SSL_ERROR_WANT_CONNECT, SSL_ERROR_WANT_ACCEPT: - if async: - return - else: raiseSslError("Not enough data on socket.") - of SSL_ERROR_WANT_WRITE, SSL_ERROR_WANT_READ: - if async: - return - else: raiseSslError("Not enough data on socket.") - of SSL_ERROR_WANT_X509_LOOKUP: - raiseSslError("Function for x509 lookup has been called.") - of SSL_ERROR_SYSCALL, SSL_ERROR_SSL: - raiseSslError() - else: raiseSslError("Unknown Error") - - if err == -1 and not (when defined(ssl): socket.isSSL else: false): - let lastError = osLastError() - if async: - when defined(windows): - if lastError.int32 == WSAEWOULDBLOCK: - return - else: raiseOSError(lastError) - else: - if lastError.int32 == EAGAIN or lastError.int32 == EWOULDBLOCK: - return - else: raiseOSError(lastError) - else: raiseOSError(lastError) - -proc listen*(socket: Socket, backlog = SOMAXCONN) {.tags: [ReadIOEffect].} = - ## Marks ``socket`` as accepting connections. - ## ``Backlog`` specifies the maximum length of the - ## queue of pending connections. - if listen(socket.fd, cint(backlog)) < 0'i32: raiseOSError(osLastError()) - -proc invalidIp4(s: string) {.noreturn, noinline.} = - raise newException(ValueError, "invalid ip4 address: " & s) - -proc parseIp4*(s: string): BiggestInt = - ## parses an IP version 4 in dotted decimal form like "a.b.c.d". - ## - ## This is equivalent to `inet_ntoa`:idx:. - ## - ## Raises EInvalidValue in case of an error. - var a, b, c, d: int - var i = 0 - var j = parseInt(s, a, i) - if j <= 0: invalidIp4(s) - inc(i, j) - if s[i] == '.': inc(i) - else: invalidIp4(s) - j = parseInt(s, b, i) - if j <= 0: invalidIp4(s) - inc(i, j) - if s[i] == '.': inc(i) - else: invalidIp4(s) - j = parseInt(s, c, i) - if j <= 0: invalidIp4(s) - inc(i, j) - if s[i] == '.': inc(i) - else: invalidIp4(s) - j = parseInt(s, d, i) - if j <= 0: invalidIp4(s) - inc(i, j) - if s[i] != '\0': invalidIp4(s) - result = BiggestInt(a shl 24 or b shl 16 or c shl 8 or d) - -template gaiNim(a, p, h, list: expr): stmt = - block: - var gaiResult = getaddrinfo(a, $p, addr(h), list) - if gaiResult != 0'i32: - when defined(windows): - raiseOSError(osLastError()) - else: - raise newException(OSError, $gai_strerror(gaiResult)) - -proc bindAddr*(socket: Socket, port = Port(0), address = "") {. - tags: [ReadIOEffect].} = - ## binds an address/port number to a socket. - ## Use address string in dotted decimal form like "a.b.c.d" - ## or leave "" for any address. - - if address == "": - var name: Sockaddr_in - when defined(Windows): - name.sin_family = int16(ord(AF_INET)) - else: - name.sin_family = posix.AF_INET - name.sin_port = sockets.htons(int16(port)) - name.sin_addr.s_addr = sockets.htonl(INADDR_ANY) - if bindSocket(socket.fd, cast[ptr SockAddr](addr(name)), - sizeof(name).SockLen) < 0'i32: - raiseOSError(osLastError()) - else: - var hints: AddrInfo - var aiList: ptr AddrInfo = nil - hints.ai_family = toInt(AF_INET) - hints.ai_socktype = toInt(SOCK_STREAM) - hints.ai_protocol = toInt(IPPROTO_TCP) - gaiNim(address, port, hints, aiList) - if bindSocket(socket.fd, aiList.ai_addr, aiList.ai_addrlen.SockLen) < 0'i32: - raiseOSError(osLastError()) - -proc getSockName*(socket: Socket): Port = - ## returns the socket's associated port number. - var name: Sockaddr_in - when defined(Windows): - name.sin_family = int16(ord(AF_INET)) - else: - name.sin_family = posix.AF_INET - #name.sin_port = htons(cint16(port)) - #name.sin_addr.s_addr = htonl(INADDR_ANY) - var namelen = sizeof(name).SockLen - if getsockname(socket.fd, cast[ptr SockAddr](addr(name)), - addr(namelen)) == -1'i32: - raiseOSError(osLastError()) - result = Port(sockets.ntohs(name.sin_port)) - -template acceptAddrPlain(noClientRet, successRet: expr, - sslImplementation: stmt): stmt {.immediate.} = - assert(client != nil) - var sockAddress: Sockaddr_in - var addrLen = sizeof(sockAddress).SockLen - var sock = accept(server.fd, cast[ptr SockAddr](addr(sockAddress)), - addr(addrLen)) - - if sock == osInvalidSocket: - let err = osLastError() - when defined(windows): - if err.int32 == WSAEINPROGRESS: - client = invalidSocket - address = "" - when noClientRet.int == -1: - return - else: - return noClientRet - else: raiseOSError(err) - else: - if err.int32 == EAGAIN or err.int32 == EWOULDBLOCK: - client = invalidSocket - address = "" - when noClientRet.int == -1: - return - else: - return noClientRet - else: raiseOSError(err) - else: - client.fd = sock - client.isBuffered = server.isBuffered - sslImplementation - # Client socket is set above. - address = $inet_ntoa(sockAddress.sin_addr) - when successRet.int == -1: - return - else: - return successRet - -proc acceptAddr*(server: Socket, client: var Socket, address: var string) {. - tags: [ReadIOEffect].} = - ## Blocks until a connection is being made from a client. When a connection - ## is made sets ``client`` to the client socket and ``address`` to the address - ## of the connecting client. - ## If ``server`` is non-blocking then this function returns immediately, and - ## if there are no connections queued the returned socket will be - ## ``InvalidSocket``. - ## This function will raise EOS if an error occurs. - ## - ## The resulting client will inherit any properties of the server socket. For - ## example: whether the socket is buffered or not. - ## - ## **Note**: ``client`` must be initialised (with ``new``), this function - ## makes no effort to initialise the ``client`` variable. - ## - ## **Warning:** When using SSL with non-blocking sockets, it is best to use - ## the acceptAddrSSL procedure as this procedure will most likely block. - acceptAddrPlain(-1, -1): - when defined(ssl): - if server.isSSL: - # We must wrap the client sock in a ssl context. - - server.sslContext.wrapSocket(client) - let ret = SSLAccept(client.sslHandle) - while ret <= 0: - let err = SSLGetError(client.sslHandle, ret) - if err != SSL_ERROR_WANT_ACCEPT: - case err - of SSL_ERROR_ZERO_RETURN: - raiseSslError("TLS/SSL connection failed to initiate, socket closed prematurely.") - of SSL_ERROR_WANT_READ, SSL_ERROR_WANT_WRITE, - SSL_ERROR_WANT_CONNECT, SSL_ERROR_WANT_ACCEPT: - raiseSslError("acceptAddrSSL should be used for non-blocking SSL sockets.") - of SSL_ERROR_WANT_X509_LOOKUP: - raiseSslError("Function for x509 lookup has been called.") - of SSL_ERROR_SYSCALL, SSL_ERROR_SSL: - raiseSslError() - else: - raiseSslError("Unknown error") - -proc setBlocking*(s: Socket, blocking: bool) {.tags: [], gcsafe.} - ## Sets blocking mode on socket - -when defined(ssl): - proc acceptAddrSSL*(server: Socket, client: var Socket, - address: var string): SSLAcceptResult {. - tags: [ReadIOEffect].} = - ## This procedure should only be used for non-blocking **SSL** sockets. - ## It will immediately return with one of the following values: - ## - ## ``AcceptSuccess`` will be returned when a client has been successfully - ## accepted and the handshake has been successfully performed between - ## ``server`` and the newly connected client. - ## - ## ``AcceptNoHandshake`` will be returned when a client has been accepted - ## but no handshake could be performed. This can happen when the client - ## connects but does not yet initiate a handshake. In this case - ## ``acceptAddrSSL`` should be called again with the same parameters. - ## - ## ``AcceptNoClient`` will be returned when no client is currently attempting - ## to connect. - template doHandshake(): stmt = - when defined(ssl): - if server.isSSL: - client.setBlocking(false) - # We must wrap the client sock in a ssl context. - - if not client.isSSL or client.sslHandle == nil: - server.sslContext.wrapSocket(client) - let ret = SSLAccept(client.sslHandle) - while ret <= 0: - let err = SSLGetError(client.sslHandle, ret) - if err != SSL_ERROR_WANT_ACCEPT: - case err - of SSL_ERROR_ZERO_RETURN: - raiseSslError("TLS/SSL connection failed to initiate, socket closed prematurely.") - of SSL_ERROR_WANT_READ, SSL_ERROR_WANT_WRITE, - SSL_ERROR_WANT_CONNECT, SSL_ERROR_WANT_ACCEPT: - client.sslNoHandshake = true - return AcceptNoHandshake - of SSL_ERROR_WANT_X509_LOOKUP: - raiseSslError("Function for x509 lookup has been called.") - of SSL_ERROR_SYSCALL, SSL_ERROR_SSL: - raiseSslError() - else: - raiseSslError("Unknown error") - client.sslNoHandshake = false - - if client.isSSL and client.sslNoHandshake: - doHandshake() - return AcceptSuccess - else: - acceptAddrPlain(AcceptNoClient, AcceptSuccess): - doHandshake() - -proc accept*(server: Socket, client: var Socket) {.tags: [ReadIOEffect].} = - ## Equivalent to ``acceptAddr`` but doesn't return the address, only the - ## socket. - ## - ## **Note**: ``client`` must be initialised (with ``new``), this function - ## makes no effort to initialise the ``client`` variable. - - var addrDummy = "" - acceptAddr(server, client, addrDummy) - -proc acceptAddr*(server: Socket): tuple[client: Socket, address: string] {. - deprecated, tags: [ReadIOEffect].} = - ## Slightly different version of ``acceptAddr``. - ## - ## **Deprecated since version 0.9.0:** Please use the function above. - var client: Socket - new(client) - var address = "" - acceptAddr(server, client, address) - return (client, address) - -proc accept*(server: Socket): Socket {.deprecated, tags: [ReadIOEffect].} = - ## **Deprecated since version 0.9.0:** Please use the function above. - new(result) - var address = "" - acceptAddr(server, result, address) - -proc close*(socket: Socket) = - ## closes a socket. - when defined(windows): - discard winlean.closesocket(socket.fd) - else: - discard posix.close(socket.fd) - # TODO: These values should not be discarded. An EOS should be raised. - # http://stackoverflow.com/questions/12463473/what-happens-if-you-call-close-on-a-bsd-socket-multiple-times - when defined(ssl): - if socket.isSSL: - discard SSLShutdown(socket.sslHandle) - SSLFree(socket.sslHandle) - socket.sslHandle = nil - -proc getServByName*(name, proto: string): Servent {.tags: [ReadIOEffect].} = - ## Searches the database from the beginning and finds the first entry for - ## which the service name specified by ``name`` matches the s_name member - ## and the protocol name specified by ``proto`` matches the s_proto member. - ## - ## On posix this will search through the ``/etc/services`` file. - when defined(Windows): - var s = winlean.getservbyname(name, proto) - else: - var s = posix.getservbyname(name, proto) - if s == nil: raise newException(OSError, "Service not found.") - result.name = $s.s_name - result.aliases = cstringArrayToSeq(s.s_aliases) - result.port = Port(s.s_port) - result.proto = $s.s_proto - -proc getServByPort*(port: Port, proto: string): Servent {.tags: [ReadIOEffect].} = - ## Searches the database from the beginning and finds the first entry for - ## which the port specified by ``port`` matches the s_port member and the - ## protocol name specified by ``proto`` matches the s_proto member. - ## - ## On posix this will search through the ``/etc/services`` file. - when defined(Windows): - var s = winlean.getservbyport(ze(int16(port)).cint, proto) - else: - var s = posix.getservbyport(ze(int16(port)).cint, proto) - if s == nil: raise newException(OSError, "Service not found.") - result.name = $s.s_name - result.aliases = cstringArrayToSeq(s.s_aliases) - result.port = Port(s.s_port) - result.proto = $s.s_proto - -proc getHostByAddr*(ip: string): Hostent {.tags: [ReadIOEffect].} = - ## This function will lookup the hostname of an IP Address. - var myaddr: InAddr - myaddr.s_addr = inet_addr(ip) - - when defined(windows): - var s = winlean.gethostbyaddr(addr(myaddr), sizeof(myaddr).cuint, - cint(sockets.AF_INET)) - if s == nil: raiseOSError(osLastError()) - else: - var s = posix.gethostbyaddr(addr(myaddr), sizeof(myaddr).Socklen, - cint(posix.AF_INET)) - if s == nil: - raise newException(OSError, $hstrerror(h_errno)) - - result.name = $s.h_name - result.aliases = cstringArrayToSeq(s.h_aliases) - when defined(windows): - result.addrtype = Domain(s.h_addrtype) - else: - if s.h_addrtype == posix.AF_INET: - result.addrtype = AF_INET - elif s.h_addrtype == posix.AF_INET6: - result.addrtype = AF_INET6 - else: - raise newException(OSError, "unknown h_addrtype") - result.addrList = cstringArrayToSeq(s.h_addr_list) - result.length = int(s.h_length) - -proc getHostByName*(name: string): Hostent {.tags: [ReadIOEffect].} = - ## This function will lookup the IP address of a hostname. - when defined(Windows): - var s = winlean.gethostbyname(name) - else: - var s = posix.gethostbyname(name) - if s == nil: raiseOSError(osLastError()) - result.name = $s.h_name - result.aliases = cstringArrayToSeq(s.h_aliases) - when defined(windows): - result.addrtype = Domain(s.h_addrtype) - else: - if s.h_addrtype == posix.AF_INET: - result.addrtype = AF_INET - elif s.h_addrtype == posix.AF_INET6: - result.addrtype = AF_INET6 - else: - raise newException(OSError, "unknown h_addrtype") - result.addrList = cstringArrayToSeq(s.h_addr_list) - result.length = int(s.h_length) - -proc getSockOptInt*(socket: Socket, level, optname: int): int {. - tags: [ReadIOEffect].} = - ## getsockopt for integer options. - var res: cint - var size = sizeof(res).SockLen - if getsockopt(socket.fd, cint(level), cint(optname), - addr(res), addr(size)) < 0'i32: - raiseOSError(osLastError()) - result = int(res) - -proc setSockOptInt*(socket: Socket, level, optname, optval: int) {. - tags: [WriteIOEffect].} = - ## setsockopt for integer options. - var value = cint(optval) - if setsockopt(socket.fd, cint(level), cint(optname), addr(value), - sizeof(value).SockLen) < 0'i32: - raiseOSError(osLastError()) - -proc toCInt(opt: SOBool): cint = - case opt - of OptAcceptConn: SO_ACCEPTCONN - of OptBroadcast: SO_BROADCAST - of OptDebug: SO_DEBUG - of OptDontRoute: SO_DONTROUTE - of OptKeepAlive: SO_KEEPALIVE - of OptOOBInline: SO_OOBINLINE - of OptReuseAddr: SO_REUSEADDR - -proc getSockOpt*(socket: Socket, opt: SOBool, level = SOL_SOCKET): bool {. - tags: [ReadIOEffect].} = - ## Retrieves option ``opt`` as a boolean value. - var res: cint - var size = sizeof(res).SockLen - if getsockopt(socket.fd, cint(level), toCInt(opt), - addr(res), addr(size)) < 0'i32: - raiseOSError(osLastError()) - result = res != 0 - -proc setSockOpt*(socket: Socket, opt: SOBool, value: bool, level = SOL_SOCKET) {. - tags: [WriteIOEffect].} = - ## Sets option ``opt`` to a boolean value specified by ``value``. - var valuei = cint(if value: 1 else: 0) - if setsockopt(socket.fd, cint(level), toCInt(opt), addr(valuei), - sizeof(valuei).SockLen) < 0'i32: - raiseOSError(osLastError()) - -proc connect*(socket: Socket, address: string, port = Port(0), - af: Domain = AF_INET) {.tags: [ReadIOEffect].} = - ## Connects socket to ``address``:``port``. ``Address`` can be an IP address or a - ## host name. If ``address`` is a host name, this function will try each IP - ## of that host name. ``htons`` is already performed on ``port`` so you must - ## not do it. - ## - ## If ``socket`` is an SSL socket a handshake will be automatically performed. - var hints: AddrInfo - var aiList: ptr AddrInfo = nil - hints.ai_family = toInt(af) - hints.ai_socktype = toInt(SOCK_STREAM) - hints.ai_protocol = toInt(IPPROTO_TCP) - gaiNim(address, port, hints, aiList) - # try all possibilities: - var success = false - var lastError: OSErrorCode - var it = aiList - while it != nil: - if connect(socket.fd, it.ai_addr, it.ai_addrlen.SockLen) == 0'i32: - success = true - break - else: lastError = osLastError() - it = it.ai_next - - freeaddrinfo(aiList) - if not success: raiseOSError(lastError) - - when defined(ssl): - if socket.isSSL: - let ret = SSLConnect(socket.sslHandle) - if ret <= 0: - let err = SSLGetError(socket.sslHandle, ret) - case err - of SSL_ERROR_ZERO_RETURN: - raiseSslError("TLS/SSL connection failed to initiate, socket closed prematurely.") - of SSL_ERROR_WANT_READ, SSL_ERROR_WANT_WRITE, SSL_ERROR_WANT_CONNECT, - SSL_ERROR_WANT_ACCEPT: - raiseSslError("The operation did not complete. Perhaps you should use connectAsync?") - of SSL_ERROR_WANT_X509_LOOKUP: - raiseSslError("Function for x509 lookup has been called.") - of SSL_ERROR_SYSCALL, SSL_ERROR_SSL: - raiseSslError() - else: - raiseSslError("Unknown error") - - when false: - var s: TSockAddrIn - s.sin_addr.s_addr = inet_addr(address) - s.sin_port = sockets.htons(int16(port)) - when defined(windows): - s.sin_family = toU16(ord(af)) - else: - case af - of AF_UNIX: s.sin_family = posix.AF_UNIX - of AF_INET: s.sin_family = posix.AF_INET - of AF_INET6: s.sin_family = posix.AF_INET6 - else: nil - if connect(socket.fd, cast[ptr TSockAddr](addr(s)), sizeof(s).cint) < 0'i32: - OSError() - -proc connectAsync*(socket: Socket, name: string, port = Port(0), - af: Domain = AF_INET) {.tags: [ReadIOEffect].} = - ## A variant of ``connect`` for non-blocking sockets. - ## - ## This procedure will immediately return, it will not block until a connection - ## is made. It is up to the caller to make sure the connection has been established - ## by checking (using ``select``) whether the socket is writeable. - ## - ## **Note**: For SSL sockets, the ``handshake`` procedure must be called - ## whenever the socket successfully connects to a server. - var hints: AddrInfo - var aiList: ptr AddrInfo = nil - hints.ai_family = toInt(af) - hints.ai_socktype = toInt(SOCK_STREAM) - hints.ai_protocol = toInt(IPPROTO_TCP) - gaiNim(name, port, hints, aiList) - # try all possibilities: - var success = false - var lastError: OSErrorCode - var it = aiList - while it != nil: - var ret = connect(socket.fd, it.ai_addr, it.ai_addrlen.SockLen) - if ret == 0'i32: - success = true - break - else: - lastError = osLastError() - when defined(windows): - # Windows EINTR doesn't behave same as POSIX. - if lastError.int32 == WSAEWOULDBLOCK: - success = true - break - else: - if lastError.int32 == EINTR or lastError.int32 == EINPROGRESS: - success = true - break - - it = it.ai_next - - freeaddrinfo(aiList) - if not success: raiseOSError(lastError) - when defined(ssl): - if socket.isSSL: - socket.sslNoHandshake = true - -when defined(ssl): - proc handshake*(socket: Socket): bool {.tags: [ReadIOEffect, WriteIOEffect].} = - ## This proc needs to be called on a socket after it connects. This is - ## only applicable when using ``connectAsync``. - ## This proc performs the SSL handshake. - ## - ## Returns ``False`` whenever the socket is not yet ready for a handshake, - ## ``True`` whenever handshake completed successfully. - ## - ## A ESSL error is raised on any other errors. - result = true - if socket.isSSL: - var ret = SSLConnect(socket.sslHandle) - if ret <= 0: - var errret = SSLGetError(socket.sslHandle, ret) - case errret - of SSL_ERROR_ZERO_RETURN: - raiseSslError("TLS/SSL connection failed to initiate, socket closed prematurely.") - of SSL_ERROR_WANT_CONNECT, SSL_ERROR_WANT_ACCEPT, - SSL_ERROR_WANT_READ, SSL_ERROR_WANT_WRITE: - return false - of SSL_ERROR_WANT_X509_LOOKUP: - raiseSslError("Function for x509 lookup has been called.") - of SSL_ERROR_SYSCALL, SSL_ERROR_SSL: - raiseSslError() - else: - raiseSslError("Unknown Error") - socket.sslNoHandshake = false - else: - raiseSslError("Socket is not an SSL socket.") - - proc gotHandshake*(socket: Socket): bool = - ## Determines whether a handshake has occurred between a client (``socket``) - ## and the server that ``socket`` is connected to. - ## - ## Throws ESSL if ``socket`` is not an SSL socket. - if socket.isSSL: - return not socket.sslNoHandshake - else: - raiseSslError("Socket is not an SSL socket.") - -proc timeValFromMilliseconds(timeout = 500): Timeval = - if timeout != -1: - var seconds = timeout div 1000 - result.tv_sec = seconds.int32 - result.tv_usec = ((timeout - seconds * 1000) * 1000).int32 - -proc createFdSet(fd: var TFdSet, s: seq[Socket], m: var int) = - FD_ZERO(fd) - for i in items(s): - m = max(m, int(i.fd)) - FD_SET(i.fd, fd) - -proc pruneSocketSet(s: var seq[Socket], fd: var TFdSet) = - var i = 0 - var L = s.len - while i < L: - if FD_ISSET(s[i].fd, fd) == 0'i32: - # not set. - s[i] = s[L-1] - dec(L) - else: - inc(i) - setLen(s, L) - -proc hasDataBuffered*(s: Socket): bool = - ## Determines whether a socket has data buffered. - result = false - if s.isBuffered: - result = s.bufLen > 0 and s.currPos != s.bufLen - - when defined(ssl): - if s.isSSL and not result: - result = s.sslHasPeekChar - -proc checkBuffer(readfds: var seq[Socket]): int = - ## Checks the buffer of each socket in ``readfds`` to see whether there is data. - ## Removes the sockets from ``readfds`` and returns the count of removed sockets. - var res: seq[Socket] = @[] - result = 0 - for s in readfds: - if hasDataBuffered(s): - inc(result) - res.add(s) - if result > 0: - readfds = res - -proc select*(readfds, writefds, exceptfds: var seq[Socket], - timeout = 500): int {.tags: [ReadIOEffect].} = - ## Traditional select function. This function will return the number of - ## sockets that are ready to be read from, written to, or which have errors. - ## If there are none; 0 is returned. - ## ``Timeout`` is in miliseconds and -1 can be specified for no timeout. - ## - ## Sockets which are **not** ready for reading, writing or which don't have - ## errors waiting on them are removed from the ``readfds``, ``writefds``, - ## ``exceptfds`` sequences respectively. - let buffersFilled = checkBuffer(readfds) - if buffersFilled > 0: - return buffersFilled - - var tv {.noInit.}: Timeval = timeValFromMilliseconds(timeout) - - var rd, wr, ex: TFdSet - var m = 0 - createFdSet((rd), readfds, m) - createFdSet((wr), writefds, m) - createFdSet((ex), exceptfds, m) - - if timeout != -1: - result = int(select(cint(m+1), addr(rd), addr(wr), addr(ex), addr(tv))) - else: - result = int(select(cint(m+1), addr(rd), addr(wr), addr(ex), nil)) - - pruneSocketSet(readfds, (rd)) - pruneSocketSet(writefds, (wr)) - pruneSocketSet(exceptfds, (ex)) - -proc select*(readfds, writefds: var seq[Socket], - timeout = 500): int {.tags: [ReadIOEffect].} = - ## Variant of select with only a read and write list. - let buffersFilled = checkBuffer(readfds) - if buffersFilled > 0: - return buffersFilled - var tv {.noInit.}: Timeval = timeValFromMilliseconds(timeout) - - var rd, wr: TFdSet - var m = 0 - createFdSet((rd), readfds, m) - createFdSet((wr), writefds, m) - - if timeout != -1: - result = int(select(cint(m+1), addr(rd), addr(wr), nil, addr(tv))) - else: - result = int(select(cint(m+1), addr(rd), addr(wr), nil, nil)) - - pruneSocketSet(readfds, (rd)) - pruneSocketSet(writefds, (wr)) - -proc selectWrite*(writefds: var seq[Socket], - timeout = 500): int {.tags: [ReadIOEffect].} = - ## When a socket in ``writefds`` is ready to be written to then a non-zero - ## value will be returned specifying the count of the sockets which can be - ## written to. The sockets which **cannot** be written to will also be removed - ## from ``writefds``. - ## - ## ``timeout`` is specified in miliseconds and ``-1`` can be specified for - ## an unlimited time. - var tv {.noInit.}: Timeval = timeValFromMilliseconds(timeout) - - var wr: TFdSet - var m = 0 - createFdSet((wr), writefds, m) - - if timeout != -1: - result = int(select(cint(m+1), nil, addr(wr), nil, addr(tv))) - else: - result = int(select(cint(m+1), nil, addr(wr), nil, nil)) - - pruneSocketSet(writefds, (wr)) - -proc select*(readfds: var seq[Socket], timeout = 500): int = - ## variant of select with a read list only - let buffersFilled = checkBuffer(readfds) - if buffersFilled > 0: - return buffersFilled - var tv {.noInit.}: Timeval = timeValFromMilliseconds(timeout) - - var rd: TFdSet - var m = 0 - createFdSet((rd), readfds, m) - - if timeout != -1: - result = int(select(cint(m+1), addr(rd), nil, nil, addr(tv))) - else: - result = int(select(cint(m+1), addr(rd), nil, nil, nil)) - - pruneSocketSet(readfds, (rd)) - -proc readIntoBuf(socket: Socket, flags: int32): int = - result = 0 - when defined(ssl): - if socket.isSSL: - result = SSLRead(socket.sslHandle, addr(socket.buffer), int(socket.buffer.high)) - else: - result = recv(socket.fd, addr(socket.buffer), cint(socket.buffer.high), flags) - else: - result = recv(socket.fd, addr(socket.buffer), cint(socket.buffer.high), flags) - if result <= 0: - socket.bufLen = 0 - socket.currPos = 0 - return result - socket.bufLen = result - socket.currPos = 0 - -template retRead(flags, readBytes: int) {.dirty.} = - let res = socket.readIntoBuf(flags.int32) - if res <= 0: - if readBytes > 0: - return readBytes - else: - return res - -proc recv*(socket: Socket, data: pointer, size: int): int {.tags: [ReadIOEffect].} = - ## Receives data from a socket. - ## - ## **Note**: This is a low-level function, you may be interested in the higher - ## level versions of this function which are also named ``recv``. - if size == 0: return - if socket.isBuffered: - if socket.bufLen == 0: - retRead(0'i32, 0) - - var read = 0 - while read < size: - if socket.currPos >= socket.bufLen: - retRead(0'i32, read) - - let chunk = min(socket.bufLen-socket.currPos, size-read) - var d = cast[cstring](data) - copyMem(addr(d[read]), addr(socket.buffer[socket.currPos]), chunk) - read.inc(chunk) - socket.currPos.inc(chunk) - - result = read - else: - when defined(ssl): - if socket.isSSL: - if socket.sslHasPeekChar: - copyMem(data, addr(socket.sslPeekChar), 1) - socket.sslHasPeekChar = false - if size-1 > 0: - var d = cast[cstring](data) - result = SSLRead(socket.sslHandle, addr(d[1]), size-1) + 1 - else: - result = 1 - else: - result = SSLRead(socket.sslHandle, data, size) - else: - result = recv(socket.fd, data, size.cint, 0'i32) - else: - result = recv(socket.fd, data, size.cint, 0'i32) - -proc waitFor(socket: Socket, waited: var float, timeout, size: int, - funcName: string): int {.tags: [TimeEffect].} = - ## determines the amount of characters that can be read. Result will never - ## be larger than ``size``. For unbuffered sockets this will be ``1``. - ## For buffered sockets it can be as big as ``BufferSize``. - ## - ## If this function does not determine that there is data on the socket - ## within ``timeout`` ms, an ETimeout error will be raised. - result = 1 - if size <= 0: assert false - if timeout == -1: return size - if socket.isBuffered and socket.bufLen != 0 and socket.bufLen != socket.currPos: - result = socket.bufLen - socket.currPos - result = min(result, size) - else: - if timeout - int(waited * 1000.0) < 1: - raise newException(TimeoutError, "Call to '" & funcName & "' timed out.") - - when defined(ssl): - if socket.isSSL: - if socket.hasDataBuffered: - # sslPeekChar is present. - return 1 - let sslPending = SSLPending(socket.sslHandle) - if sslPending != 0: - return sslPending - - var s = @[socket] - var startTime = epochTime() - let selRet = select(s, timeout - int(waited * 1000.0)) - if selRet < 0: raiseOSError(osLastError()) - if selRet != 1: - raise newException(TimeoutError, "Call to '" & funcName & "' timed out.") - waited += (epochTime() - startTime) - -proc recv*(socket: Socket, data: pointer, size: int, timeout: int): int {. - tags: [ReadIOEffect, TimeEffect].} = - ## overload with a ``timeout`` parameter in miliseconds. - var waited = 0.0 # number of seconds already waited - - var read = 0 - while read < size: - let avail = waitFor(socket, waited, timeout, size-read, "recv") - var d = cast[cstring](data) - result = recv(socket, addr(d[read]), avail) - if result == 0: break - if result < 0: - return result - inc(read, result) - - result = read - -proc recv*(socket: Socket, data: var string, size: int, timeout = -1): int = - ## Higher-level version of ``recv``. - ## - ## When 0 is returned the socket's connection has been closed. - ## - ## This function will throw an EOS exception when an error occurs. A value - ## lower than 0 is never returned. - ## - ## A timeout may be specified in miliseconds, if enough data is not received - ## within the time specified an ETimeout exception will be raised. - ## - ## **Note**: ``data`` must be initialised. - data.setLen(size) - result = recv(socket, cstring(data), size, timeout) - if result < 0: - data.setLen(0) - socket.raiseSocketError(result) - data.setLen(result) - -proc recvAsync*(socket: Socket, data: var string, size: int): int = - ## Async version of ``recv``. - ## - ## When socket is non-blocking and no data is available on the socket, - ## ``-1`` will be returned and ``data`` will be ``""``. - ## - ## **Note**: ``data`` must be initialised. - data.setLen(size) - result = recv(socket, cstring(data), size) - if result < 0: - data.setLen(0) - socket.raiseSocketError(async = true) - result = -1 - data.setLen(result) - -proc peekChar(socket: Socket, c: var char): int {.tags: [ReadIOEffect].} = - if socket.isBuffered: - result = 1 - if socket.bufLen == 0 or socket.currPos > socket.bufLen-1: - var res = socket.readIntoBuf(0'i32) - if res <= 0: - result = res - - c = socket.buffer[socket.currPos] - else: - when defined(ssl): - if socket.isSSL: - if not socket.sslHasPeekChar: - result = SSLRead(socket.sslHandle, addr(socket.sslPeekChar), 1) - socket.sslHasPeekChar = true - - c = socket.sslPeekChar - return - result = recv(socket.fd, addr(c), 1, MSG_PEEK) - -proc recvLine*(socket: Socket, line: var TaintedString, timeout = -1): bool {. - tags: [ReadIOEffect, TimeEffect], deprecated.} = - ## Receive a line of data from ``socket``. - ## - ## If a full line is received ``\r\L`` is not - ## added to ``line``, however if solely ``\r\L`` is received then ``line`` - ## will be set to it. - ## - ## ``True`` is returned if data is available. ``False`` suggests an - ## error, EOS exceptions are not raised and ``False`` is simply returned - ## instead. - ## - ## If the socket is disconnected, ``line`` will be set to ``""`` and ``True`` - ## will be returned. - ## - ## A timeout can be specified in miliseconds, if data is not received within - ## the specified time an ETimeout exception will be raised. - ## - ## **Deprecated since version 0.9.2**: This function has been deprecated in - ## favour of readLine. - - template addNLIfEmpty(): stmt = - if line.len == 0: - line.add("\c\L") - - var waited = 0.0 - - setLen(line.string, 0) - while true: - var c: char - discard waitFor(socket, waited, timeout, 1, "recvLine") - var n = recv(socket, addr(c), 1) - if n < 0: return - elif n == 0: return true - if c == '\r': - discard waitFor(socket, waited, timeout, 1, "recvLine") - n = peekChar(socket, c) - if n > 0 and c == '\L': - discard recv(socket, addr(c), 1) - elif n <= 0: return false - addNLIfEmpty() - return true - elif c == '\L': - addNLIfEmpty() - return true - add(line.string, c) - -proc readLine*(socket: Socket, line: var TaintedString, timeout = -1) {. - tags: [ReadIOEffect, TimeEffect].} = - ## Reads a line of data from ``socket``. - ## - ## If a full line is read ``\r\L`` is not - ## added to ``line``, however if solely ``\r\L`` is read then ``line`` - ## will be set to it. - ## - ## If the socket is disconnected, ``line`` will be set to ``""``. - ## - ## An EOS exception will be raised in the case of a socket error. - ## - ## A timeout can be specified in miliseconds, if data is not received within - ## the specified time an ETimeout exception will be raised. - - template addNLIfEmpty(): stmt = - if line.len == 0: - line.add("\c\L") - - var waited = 0.0 - - setLen(line.string, 0) - while true: - var c: char - discard waitFor(socket, waited, timeout, 1, "readLine") - var n = recv(socket, addr(c), 1) - if n < 0: socket.raiseSocketError() - elif n == 0: return - if c == '\r': - discard waitFor(socket, waited, timeout, 1, "readLine") - n = peekChar(socket, c) - if n > 0 and c == '\L': - discard recv(socket, addr(c), 1) - elif n <= 0: socket.raiseSocketError() - addNLIfEmpty() - return - elif c == '\L': - addNLIfEmpty() - return - add(line.string, c) - -proc recvLineAsync*(socket: Socket, - line: var TaintedString): RecvLineResult {.tags: [ReadIOEffect], deprecated.} = - ## Similar to ``recvLine`` but designed for non-blocking sockets. - ## - ## The values of the returned enum should be pretty self explanatory: - ## - ## * If a full line has been retrieved; ``RecvFullLine`` is returned. - ## * If some data has been retrieved; ``RecvPartialLine`` is returned. - ## * If the socket has been disconnected; ``RecvDisconnected`` is returned. - ## * If call to ``recv`` failed; ``RecvFail`` is returned. - ## - ## **Deprecated since version 0.9.2**: This function has been deprecated in - ## favour of readLineAsync. - - setLen(line.string, 0) - while true: - var c: char - var n = recv(socket, addr(c), 1) - if n < 0: - return (if line.len == 0: RecvFail else: RecvPartialLine) - elif n == 0: - return (if line.len == 0: RecvDisconnected else: RecvPartialLine) - if c == '\r': - n = peekChar(socket, c) - if n > 0 and c == '\L': - discard recv(socket, addr(c), 1) - elif n <= 0: - return (if line.len == 0: RecvFail else: RecvPartialLine) - return RecvFullLine - elif c == '\L': return RecvFullLine - add(line.string, c) - -proc readLineAsync*(socket: Socket, - line: var TaintedString): ReadLineResult {.tags: [ReadIOEffect].} = - ## Similar to ``recvLine`` but designed for non-blocking sockets. - ## - ## The values of the returned enum should be pretty self explanatory: - ## - ## * If a full line has been retrieved; ``ReadFullLine`` is returned. - ## * If some data has been retrieved; ``ReadPartialLine`` is returned. - ## * If the socket has been disconnected; ``ReadDisconnected`` is returned. - ## * If no data could be retrieved; ``ReadNone`` is returned. - ## * If call to ``recv`` failed; **an EOS exception is raised.** - setLen(line.string, 0) - - template errorOrNone = - socket.raiseSocketError(async = true) - return ReadNone - - while true: - var c: char - var n = recv(socket, addr(c), 1) - #echo(n) - if n < 0: - if line.len == 0: errorOrNone else: return ReadPartialLine - elif n == 0: - return (if line.len == 0: ReadDisconnected else: ReadPartialLine) - if c == '\r': - n = peekChar(socket, c) - if n > 0 and c == '\L': - discard recv(socket, addr(c), 1) - elif n <= 0: - if line.len == 0: errorOrNone else: return ReadPartialLine - return ReadFullLine - elif c == '\L': return ReadFullLine - add(line.string, c) - -proc recv*(socket: Socket): TaintedString {.tags: [ReadIOEffect], deprecated.} = - ## receives all the available data from the socket. - ## Socket errors will result in an ``EOS`` error. - ## If socket is not a connectionless socket and socket is not connected - ## ``""`` will be returned. - ## - ## **Deprecated since version 0.9.2**: This function is not safe for use. - const bufSize = 4000 - result = newStringOfCap(bufSize).TaintedString - var pos = 0 - while true: - var bytesRead = recv(socket, addr(string(result)[pos]), bufSize-1) - if bytesRead == -1: raiseOSError(osLastError()) - setLen(result.string, pos + bytesRead) - if bytesRead != bufSize-1: break - # increase capacity: - setLen(result.string, result.string.len + bufSize) - inc(pos, bytesRead) - when false: - var buf = newString(bufSize) - result = TaintedString"" - while true: - var bytesRead = recv(socket, cstring(buf), bufSize-1) - # Error - if bytesRead == -1: OSError(osLastError()) - - buf[bytesRead] = '\0' # might not be necessary - setLen(buf, bytesRead) - add(result.string, buf) - if bytesRead != bufSize-1: break - -{.push warning[deprecated]: off.} -proc recvTimeout*(socket: Socket, timeout: int): TaintedString {. - tags: [ReadIOEffect], deprecated.} = - ## overloaded variant to support a ``timeout`` parameter, the ``timeout`` - ## parameter specifies the amount of miliseconds to wait for data on the - ## socket. - ## - ## **Deprecated since version 0.9.2**: This function is not safe for use. - if socket.bufLen == 0: - var s = @[socket] - if s.select(timeout) != 1: - raise newException(TimeoutError, "Call to recv() timed out.") - - return socket.recv -{.pop.} - -proc recvAsync*(socket: Socket, s: var TaintedString): bool {. - tags: [ReadIOEffect], deprecated.} = - ## receives all the data from a non-blocking socket. If socket is non-blocking - ## and there are no messages available, `False` will be returned. - ## Other socket errors will result in an ``EOS`` error. - ## If socket is not a connectionless socket and socket is not connected - ## ``s`` will be set to ``""``. - ## - ## **Deprecated since version 0.9.2**: This function is not safe for use. - const bufSize = 1000 - # ensure bufSize capacity: - setLen(s.string, bufSize) - setLen(s.string, 0) - var pos = 0 - while true: - var bytesRead = recv(socket, addr(string(s)[pos]), bufSize-1) - when defined(ssl): - if socket.isSSL: - if bytesRead <= 0: - var ret = SSLGetError(socket.sslHandle, bytesRead.cint) - case ret - of SSL_ERROR_ZERO_RETURN: - raiseSslError("TLS/SSL connection failed to initiate, socket closed prematurely.") - of SSL_ERROR_WANT_CONNECT, SSL_ERROR_WANT_ACCEPT: - raiseSslError("Unexpected error occurred.") # This should just not happen. - of SSL_ERROR_WANT_WRITE, SSL_ERROR_WANT_READ: - return false - of SSL_ERROR_WANT_X509_LOOKUP: - raiseSslError("Function for x509 lookup has been called.") - of SSL_ERROR_SYSCALL, SSL_ERROR_SSL: - raiseSslError() - else: raiseSslError("Unknown Error") - - if bytesRead == -1 and not (when defined(ssl): socket.isSSL else: false): - let err = osLastError() - when defined(windows): - if err.int32 == WSAEWOULDBLOCK: - return false - else: raiseOSError(err) - else: - if err.int32 == EAGAIN or err.int32 == EWOULDBLOCK: - return false - else: raiseOSError(err) - - setLen(s.string, pos + bytesRead) - if bytesRead != bufSize-1: break - # increase capacity: - setLen(s.string, s.string.len + bufSize) - inc(pos, bytesRead) - result = true - -proc recvFrom*(socket: Socket, data: var string, length: int, - address: var string, port: var Port, flags = 0'i32): int {. - tags: [ReadIOEffect].} = - ## Receives data from ``socket``. This function should normally be used with - ## connection-less sockets (UDP sockets). - ## - ## If an error occurs the return value will be ``-1``. Otherwise the return - ## value will be the length of data received. - ## - ## **Warning:** This function does not yet have a buffered implementation, - ## so when ``socket`` is buffered the non-buffered implementation will be - ## used. Therefore if ``socket`` contains something in its buffer this - ## function will make no effort to return it. - - # TODO: Buffered sockets - data.setLen(length) - var sockAddress: Sockaddr_in - var addrLen = sizeof(sockAddress).SockLen - result = recvfrom(socket.fd, cstring(data), length.cint, flags.cint, - cast[ptr SockAddr](addr(sockAddress)), addr(addrLen)) - - if result != -1: - data.setLen(result) - address = $inet_ntoa(sockAddress.sin_addr) - port = ntohs(sockAddress.sin_port).Port - -proc recvFromAsync*(socket: Socket, data: var string, length: int, - address: var string, port: var Port, - flags = 0'i32): bool {.tags: [ReadIOEffect].} = - ## Variant of ``recvFrom`` for non-blocking sockets. Unlike ``recvFrom``, - ## this function will raise an EOS error whenever a socket error occurs. - ## - ## If there is no data to be read from the socket ``False`` will be returned. - result = true - var callRes = recvFrom(socket, data, length, address, port, flags) - if callRes < 0: - let err = osLastError() - when defined(windows): - if err.int32 == WSAEWOULDBLOCK: - return false - else: raiseOSError(err) - else: - if err.int32 == EAGAIN or err.int32 == EWOULDBLOCK: - return false - else: raiseOSError(err) - -proc skip*(socket: Socket) {.tags: [ReadIOEffect], deprecated.} = - ## skips all the data that is pending for the socket - ## - ## **Deprecated since version 0.9.2**: This function is not safe for use. - const bufSize = 1000 - var buf = alloc(bufSize) - while recv(socket, buf, bufSize) == bufSize: discard - dealloc(buf) - -proc skip*(socket: Socket, size: int, timeout = -1) = - ## Skips ``size`` amount of bytes. - ## - ## An optional timeout can be specified in miliseconds, if skipping the - ## bytes takes longer than specified an ETimeout exception will be raised. - ## - ## Returns the number of skipped bytes. - var waited = 0.0 - var dummy = alloc(size) - var bytesSkipped = 0 - while bytesSkipped != size: - let avail = waitFor(socket, waited, timeout, size-bytesSkipped, "skip") - bytesSkipped += recv(socket, dummy, avail) - dealloc(dummy) - -proc send*(socket: Socket, data: pointer, size: int): int {. - tags: [WriteIOEffect].} = - ## sends data to a socket. - when defined(ssl): - if socket.isSSL: - return SSLWrite(socket.sslHandle, cast[cstring](data), size) - - when defined(windows) or defined(macosx): - result = send(socket.fd, data, size.cint, 0'i32) - else: - when defined(solaris): - const MSG_NOSIGNAL = 0 - result = send(socket.fd, data, size, int32(MSG_NOSIGNAL)) - -proc send*(socket: Socket, data: string) {.tags: [WriteIOEffect].} = - ## sends data to a socket. - if socket.nonblocking: - raise newException(ValueError, "This function cannot be used on non-blocking sockets.") - let sent = send(socket, cstring(data), data.len) - if sent < 0: - when defined(ssl): - if socket.isSSL: - raiseSslError() - - raiseOSError(osLastError()) - - if sent != data.len: - raise newException(OSError, "Could not send all data.") - -proc sendAsync*(socket: Socket, data: string): int {.tags: [WriteIOEffect].} = - ## sends data to a non-blocking socket. - ## Returns ``0`` if no data could be sent, if data has been sent - ## returns the amount of bytes of ``data`` that was successfully sent. This - ## number may not always be the length of ``data`` but typically is. - ## - ## An EOS (or ESSL if socket is an SSL socket) exception is raised if an error - ## occurs. - result = send(socket, cstring(data), data.len) - when defined(ssl): - if socket.isSSL: - if result <= 0: - let ret = SSLGetError(socket.sslHandle, result.cint) - case ret - of SSL_ERROR_ZERO_RETURN: - raiseSslError("TLS/SSL connection failed to initiate, socket closed prematurely.") - of SSL_ERROR_WANT_CONNECT, SSL_ERROR_WANT_ACCEPT: - raiseSslError("Unexpected error occurred.") # This should just not happen. - of SSL_ERROR_WANT_WRITE, SSL_ERROR_WANT_READ: - return 0 - of SSL_ERROR_WANT_X509_LOOKUP: - raiseSslError("Function for x509 lookup has been called.") - of SSL_ERROR_SYSCALL, SSL_ERROR_SSL: - raiseSslError() - else: raiseSslError("Unknown Error") - else: - return - if result == -1: - let err = osLastError() - when defined(windows): - if err.int32 == WSAEINPROGRESS: - return 0 - else: raiseOSError(err) - else: - if err.int32 == EAGAIN or err.int32 == EWOULDBLOCK: - return 0 - else: raiseOSError(err) - - -proc trySend*(socket: Socket, data: string): bool {.tags: [WriteIOEffect].} = - ## safe alternative to ``send``. Does not raise an EOS when an error occurs, - ## and instead returns ``false`` on failure. - result = send(socket, cstring(data), data.len) == data.len - -proc sendTo*(socket: Socket, address: string, port: Port, data: pointer, - size: int, af: Domain = AF_INET, flags = 0'i32): int {. - tags: [WriteIOEffect].} = - ## low-level sendTo proc. This proc sends ``data`` to the specified ``address``, - ## which may be an IP address or a hostname, if a hostname is specified - ## this function will try each IP of that hostname. - ## - ## **Note:** This proc is not available for SSL sockets. - var hints: AddrInfo - var aiList: ptr AddrInfo = nil - hints.ai_family = toInt(af) - hints.ai_socktype = toInt(SOCK_STREAM) - hints.ai_protocol = toInt(IPPROTO_TCP) - gaiNim(address, port, hints, aiList) - - # try all possibilities: - var success = false - var it = aiList - while it != nil: - result = sendto(socket.fd, data, size.cint, flags.cint, it.ai_addr, - it.ai_addrlen.SockLen) - if result != -1'i32: - success = true - break - it = it.ai_next - - freeaddrinfo(aiList) - -proc sendTo*(socket: Socket, address: string, port: Port, - data: string): int {.tags: [WriteIOEffect].} = - ## Friendlier version of the low-level ``sendTo``. - result = socket.sendTo(address, port, cstring(data), data.len) - -when defined(Windows): - const - IOCPARM_MASK = 127 - IOC_IN = int(-2147483648) - FIONBIO = IOC_IN.int32 or ((sizeof(int32) and IOCPARM_MASK) shl 16) or - (102 shl 8) or 126 - - proc ioctlsocket(s: SocketHandle, cmd: clong, - argptr: ptr clong): cint {. - stdcall, importc:"ioctlsocket", dynlib: "ws2_32.dll".} - -proc setBlocking(s: Socket, blocking: bool) = - when defined(Windows): - var mode = clong(ord(not blocking)) # 1 for non-blocking, 0 for blocking - if ioctlsocket(s.fd, FIONBIO, addr(mode)) == -1: - raiseOSError(osLastError()) - else: # BSD sockets - var x: int = fcntl(s.fd, F_GETFL, 0) - if x == -1: - raiseOSError(osLastError()) - else: - var mode = if blocking: x and not O_NONBLOCK else: x or O_NONBLOCK - if fcntl(s.fd, F_SETFL, mode) == -1: - raiseOSError(osLastError()) - s.nonblocking = not blocking - -discard """ proc setReuseAddr*(s: Socket) = - var blah: int = 1 - var mode = SO_REUSEADDR - if setsockopt(s.fd, SOL_SOCKET, mode, addr blah, TSOcklen(sizeof(int))) == -1: - raiseOSError(osLastError()) """ - -proc connect*(socket: Socket, address: string, port = Port(0), timeout: int, - af: Domain = AF_INET) {.tags: [ReadIOEffect, WriteIOEffect].} = - ## Connects to server as specified by ``address`` on port specified by ``port``. - ## - ## The ``timeout`` paremeter specifies the time in miliseconds to allow for - ## the connection to the server to be made. - let originalStatus = not socket.nonblocking - socket.setBlocking(false) - - socket.connectAsync(address, port, af) - var s: seq[Socket] = @[socket] - if selectWrite(s, timeout) != 1: - raise newException(TimeoutError, "Call to 'connect' timed out.") - else: - when defined(ssl): - if socket.isSSL: - socket.setBlocking(true) - doAssert socket.handshake() - socket.setBlocking(originalStatus) - -proc isSSL*(socket: Socket): bool = return socket.isSSL - ## Determines whether ``socket`` is a SSL socket. - -proc getFD*(socket: Socket): SocketHandle = return socket.fd - ## Returns the socket's file descriptor - -proc isBlocking*(socket: Socket): bool = not socket.nonblocking - ## Determines whether ``socket`` is blocking. - -when defined(Windows): - var wsa: WSAData - if wsaStartup(0x0101'i16, addr wsa) != 0: raiseOSError(osLastError()) - - diff --git a/lib/pure/ssl_certs.nim b/lib/pure/ssl_certs.nim new file mode 100644 index 000000000..d60cd22eb --- /dev/null +++ b/lib/pure/ssl_certs.nim @@ -0,0 +1,172 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2017 Nim contributors +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# +## Scan for SSL/TLS CA certificates on disk +## The default locations can be overridden using the SSL_CERT_FILE and +## SSL_CERT_DIR environment variables. + +import std/[os, strutils] + +# FWIW look for files before scanning entire dirs. + +when defined(macosx): + const certificatePaths = [ + "/etc/ssl/cert.pem", + "/System/Library/OpenSSL/certs/cert.pem" + ] +elif defined(linux): + const certificatePaths = [ + # Debian, Ubuntu, Arch: maintained by update-ca-certificates, SUSE, Gentoo + # NetBSD (security/mozilla-rootcerts) + # SLES10/SLES11, https://golang.org/issue/12139 + "/etc/ssl/certs/ca-certificates.crt", + # OpenSUSE + "/etc/ssl/ca-bundle.pem", + # Red Hat 5+, Fedora, Centos + "/etc/pki/tls/certs/ca-bundle.crt", + # Red Hat 4 + "/usr/share/ssl/certs/ca-bundle.crt", + # Fedora/RHEL + "/etc/pki/tls/certs", + # Android + "/data/data/com.termux/files/usr/etc/tls/cert.pem", + "/system/etc/security/cacerts", + ] +elif defined(bsd): + const certificatePaths = [ + # Debian, Ubuntu, Arch: maintained by update-ca-certificates, SUSE, Gentoo + # NetBSD (security/mozilla-rootcerts) + # SLES10/SLES11, https://golang.org/issue/12139 + "/etc/ssl/certs/ca-certificates.crt", + # FreeBSD (security/ca-root-nss package) + "/usr/local/share/certs/ca-root-nss.crt", + # OpenBSD, FreeBSD (optional symlink) + "/etc/ssl/cert.pem", + # FreeBSD + "/usr/local/share/certs", + # NetBSD + "/etc/openssl/certs", + ] +else: + const certificatePaths = [ + # Debian, Ubuntu, Arch: maintained by update-ca-certificates, SUSE, Gentoo + # NetBSD (security/mozilla-rootcerts) + # SLES10/SLES11, https://golang.org/issue/12139 + "/etc/ssl/certs/ca-certificates.crt", + # OpenSUSE + "/etc/ssl/ca-bundle.pem", + # Red Hat 5+, Fedora, Centos + "/etc/pki/tls/certs/ca-bundle.crt", + # Red Hat 4 + "/usr/share/ssl/certs/ca-bundle.crt", + # FreeBSD (security/ca-root-nss package) + "/usr/local/share/certs/ca-root-nss.crt", + # CentOS/RHEL 7 + "/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem", + # OpenBSD, FreeBSD (optional symlink) + "/etc/ssl/cert.pem", + # Fedora/RHEL + "/etc/pki/tls/certs", + # Android + "/system/etc/security/cacerts", + # FreeBSD + "/usr/local/share/certs", + # NetBSD + "/etc/openssl/certs", + ] + +when defined(haiku): + const + B_FIND_PATH_EXISTING_ONLY = 0x4 + B_FIND_PATH_DATA_DIRECTORY = 6 + + proc find_paths_etc(architecture: cstring, baseDirectory: cint, + subPath: cstring, flags: uint32, + paths: var ptr UncheckedArray[cstring], + pathCount: var csize_t): int32 + {.importc, header: "<FindDirectory.h>".} + proc free(p: pointer) {.importc, header: "<stdlib.h>".} + +iterator scanSSLCertificates*(useEnvVars = false): string = + ## Scan for SSL/TLS CA certificates on disk. + ## + ## if `useEnvVars` is true, the SSL_CERT_FILE and SSL_CERT_DIR + ## environment variables can be used to override the certificate + ## directories to scan or specify a CA certificate file. + if useEnvVars and existsEnv("SSL_CERT_FILE"): + yield getEnv("SSL_CERT_FILE") + + elif useEnvVars and existsEnv("SSL_CERT_DIR"): + let p = getEnv("SSL_CERT_DIR") + for fn in joinPath(p, "*").walkFiles(): + yield fn + + else: + when defined(windows): + const cacert = "cacert.pem" + let pem = getAppDir() / cacert + if fileExists(pem): + yield pem + else: + let path = getEnv("PATH") + for candidate in split(path, PathSep): + if candidate.len != 0: + let x = (if candidate[0] == '"' and candidate[^1] == '"': + substr(candidate, 1, candidate.len-2) else: candidate) / cacert + if fileExists(x): + yield x + elif not defined(haiku): + for p in certificatePaths: + if p.endsWith(".pem") or p.endsWith(".crt"): + if fileExists(p): + yield p + elif dirExists(p): + # check if it's a dir where each cert is one file + # named by it's hasg + for fn in joinPath(p, "*.0").walkFiles: + yield p.normalizePathEnd(true) + break + for fn in joinPath(p, "*").walkFiles(): + + yield fn + else: + var + paths: ptr UncheckedArray[cstring] + size: csize_t + let err = find_paths_etc( + nil, B_FIND_PATH_DATA_DIRECTORY, "ssl/CARootCertificates.pem", + B_FIND_PATH_EXISTING_ONLY, paths, size + ) + if err == 0: + defer: free(paths) + for i in 0 ..< size: + yield $paths[i] + +# Certificates management on windows +# when defined(windows) or defined(nimdoc): +# +# import std/openssl +# +# type +# PCCertContext {.final, pure.} = pointer +# X509 {.final, pure.} = pointer +# CertStore {.final, pure.} = pointer +# +# # OpenSSL cert store +# +# {.push stdcall, dynlib: "kernel32", importc.} +# +# proc CertOpenSystemStore*(hprov: pointer=nil, szSubsystemProtocol: cstring): CertStore +# +# proc CertEnumCertificatesInStore*(hCertStore: CertStore, pPrevCertContext: PCCertContext): pointer +# +# proc CertFreeCertificateContext*(pContext: PCCertContext): bool +# +# proc CertCloseStore*(hCertStore:CertStore, flags:cint): bool +# +# {.pop.} diff --git a/lib/pure/ssl_config.nim b/lib/pure/ssl_config.nim new file mode 100644 index 000000000..14f66ede4 --- /dev/null +++ b/lib/pure/ssl_config.nim @@ -0,0 +1,51 @@ +# This file was automatically generated by tools/ssl_config_parser on 2020-06-03T22:02:05Z. DO NOT EDIT. + +## This module contains SSL configuration parameters obtained from +## `Mozilla OpSec <https://wiki.mozilla.org/Security/Server_Side_TLS>`_. +## +## The configuration file used to generate this module: https://ssl-config.mozilla.org/guidelines/5.4.json + +const CiphersModern* = "TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256" + ## An OpenSSL-compatible list of secure ciphers for ``modern`` compatibility + ## per Mozilla's recommendations. + ## + ## Oldest clients supported by this list: + ## * Firefox 63 + ## * Android 10.0 + ## * Chrome 70 + ## * Edge 75 + ## * Java 11 + ## * OpenSSL 1.1.1 + ## * Opera 57 + ## * Safari 12.1 + +const CiphersIntermediate* = "TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384" + ## An OpenSSL-compatible list of secure ciphers for ``intermediate`` compatibility + ## per Mozilla's recommendations. + ## + ## Oldest clients supported by this list: + ## * Firefox 27 + ## * Android 4.4.2 + ## * Chrome 31 + ## * Edge + ## * IE 11 on Windows 7 + ## * Java 8u31 + ## * OpenSSL 1.0.1 + ## * Opera 20 + ## * Safari 9 + +const CiphersOld* = "TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:DHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-SHA256:ECDHE-ECDSA-AES128-SHA:ECDHE-RSA-AES128-SHA:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA384:ECDHE-ECDSA-AES256-SHA:ECDHE-RSA-AES256-SHA:DHE-RSA-AES128-SHA256:DHE-RSA-AES256-SHA256:AES128-GCM-SHA256:AES256-GCM-SHA384:AES128-SHA256:AES256-SHA256:AES128-SHA:AES256-SHA:DES-CBC3-SHA" + ## An OpenSSL-compatible list of secure ciphers for ``old`` compatibility + ## per Mozilla's recommendations. + ## + ## Oldest clients supported by this list: + ## * Firefox 1 + ## * Android 2.3 + ## * Chrome 1 + ## * Edge 12 + ## * IE8 on Windows XP + ## * Java 6 + ## * OpenSSL 0.9.8 + ## * Opera 5 + ## * Safari 1 + diff --git a/lib/pure/stats.nim b/lib/pure/stats.nim new file mode 100644 index 000000000..6a4fd8f01 --- /dev/null +++ b/lib/pure/stats.nim @@ -0,0 +1,335 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2015 Nim contributors +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Statistical analysis framework for performing +## basic statistical analysis of data. +## The data is analysed in a single pass, when it +## is pushed to a `RunningStat` or `RunningRegress` object. +## +## `RunningStat` calculates for a single data set +## - n (data count) +## - min (smallest value) +## - max (largest value) +## - sum +## - mean +## - variance +## - varianceS (sample variance) +## - standardDeviation +## - standardDeviationS (sample standard deviation) +## - skewness (the third statistical moment) +## - kurtosis (the fourth statistical moment) +## +## `RunningRegress` calculates for two sets of data +## - n (data count) +## - slope +## - intercept +## - correlation +## +## Procs are provided to calculate statistics on `openArray`s. +## +## However, if more than a single statistical calculation is required, it is more +## efficient to push the data once to a `RunningStat` object and then +## call the numerous statistical procs for the `RunningStat` object: + +runnableExamples: + from std/math import almostEqual + + template `~=`(a, b: float): bool = almostEqual(a, b) + + var statistics: RunningStat # must be var + statistics.push(@[1.0, 2.0, 1.0, 4.0, 1.0, 4.0, 1.0, 2.0]) + doAssert statistics.n == 8 + doAssert statistics.mean() ~= 2.0 + doAssert statistics.variance() ~= 1.5 + doAssert statistics.varianceS() ~= 1.714285714285715 + doAssert statistics.skewness() ~= 0.8164965809277261 + doAssert statistics.skewnessS() ~= 1.018350154434631 + doAssert statistics.kurtosis() ~= -1.0 + doAssert statistics.kurtosisS() ~= -0.7000000000000008 + +from std/math import FloatClass, sqrt, pow, round + +when defined(nimPreviewSlimSystem): + import std/[assertions, formatfloat] + +{.push debugger: off.} # the user does not want to trace a part + # of the standard library! +{.push checks: off, line_dir: off, stack_trace: off.} + +type + RunningStat* = object ## An accumulator for statistical data. + n*: int ## amount of pushed data + min*, max*, sum*: float ## self-explaining + mom1, mom2, mom3, mom4: float ## statistical moments, mom1 is mean + + RunningRegress* = object ## An accumulator for regression calculations. + n*: int ## amount of pushed data + x_stats*: RunningStat ## stats for the first set of data + y_stats*: RunningStat ## stats for the second set of data + s_xy: float ## accumulated data for combined xy + +# ----------- RunningStat -------------------------- + +proc clear*(s: var RunningStat) = + ## Resets `s`. + s.n = 0 + s.min = 0.0 + s.max = 0.0 + s.sum = 0.0 + s.mom1 = 0.0 + s.mom2 = 0.0 + s.mom3 = 0.0 + s.mom4 = 0.0 + +proc push*(s: var RunningStat, x: float) = + ## Pushes a value `x` for processing. + if s.n == 0: + s.min = x + s.max = x + else: + if s.min > x: s.min = x + if s.max < x: s.max = x + inc(s.n) + # See Knuth TAOCP vol 2, 3rd edition, page 232 + s.sum += x + let n = toFloat(s.n) + let delta = x - s.mom1 + let delta_n = delta / toFloat(s.n) + let delta_n2 = delta_n * delta_n + let term1 = delta * delta_n * toFloat(s.n - 1) + s.mom4 += term1 * delta_n2 * (n*n - 3*n + 3) + + 6*delta_n2*s.mom2 - 4*delta_n*s.mom3 + s.mom3 += term1 * delta_n * (n - 2) - 3*delta_n*s.mom2 + s.mom2 += term1 + s.mom1 += delta_n + +proc push*(s: var RunningStat, x: int) = + ## Pushes a value `x` for processing. + ## + ## `x` is simply converted to `float` + ## and the other push operation is called. + s.push(toFloat(x)) + +proc push*(s: var RunningStat, x: openArray[float|int]) = + ## Pushes all values of `x` for processing. + ## + ## Int values of `x` are simply converted to `float` and + ## the other push operation is called. + for val in x: + s.push(val) + +proc mean*(s: RunningStat): float = + ## Computes the current mean of `s`. + result = s.mom1 + +proc variance*(s: RunningStat): float = + ## Computes the current population variance of `s`. + result = s.mom2 / toFloat(s.n) + +proc varianceS*(s: RunningStat): float = + ## Computes the current sample variance of `s`. + if s.n > 1: result = s.mom2 / toFloat(s.n - 1) + +proc standardDeviation*(s: RunningStat): float = + ## Computes the current population standard deviation of `s`. + result = sqrt(variance(s)) + +proc standardDeviationS*(s: RunningStat): float = + ## Computes the current sample standard deviation of `s`. + result = sqrt(varianceS(s)) + +proc skewness*(s: RunningStat): float = + ## Computes the current population skewness of `s`. + result = sqrt(toFloat(s.n)) * s.mom3 / pow(s.mom2, 1.5) + +proc skewnessS*(s: RunningStat): float = + ## Computes the current sample skewness of `s`. + let s2 = skewness(s) + result = sqrt(toFloat(s.n*(s.n-1)))*s2 / toFloat(s.n-2) + +proc kurtosis*(s: RunningStat): float = + ## Computes the current population kurtosis of `s`. + result = toFloat(s.n) * s.mom4 / (s.mom2 * s.mom2) - 3.0 + +proc kurtosisS*(s: RunningStat): float = + ## Computes the current sample kurtosis of `s`. + result = toFloat(s.n-1) / toFloat((s.n-2)*(s.n-3)) * + (toFloat(s.n+1)*kurtosis(s) + 6) + +proc `+`*(a, b: RunningStat): RunningStat = + ## Combines two `RunningStat`s. + ## + ## Useful when performing parallel analysis of data series + ## and needing to re-combine parallel result sets. + result.clear() + result.n = a.n + b.n + + let delta = b.mom1 - a.mom1 + let delta2 = delta*delta + let delta3 = delta*delta2 + let delta4 = delta2*delta2 + let n = toFloat(result.n) + + result.mom1 = (a.n.float*a.mom1 + b.n.float*b.mom1) / n + result.mom2 = a.mom2 + b.mom2 + delta2 * a.n.float * b.n.float / n + result.mom3 = a.mom3 + b.mom3 + + delta3 * a.n.float * b.n.float * (a.n.float - b.n.float)/(n*n); + result.mom3 += 3.0*delta * (a.n.float*b.mom2 - b.n.float*a.mom2) / n + result.mom4 = a.mom4 + b.mom4 + + delta4*a.n.float*b.n.float * toFloat(a.n*a.n - a.n*b.n + b.n*b.n) / + (n*n*n) + result.mom4 += 6.0*delta2 * (a.n.float*a.n.float*b.mom2 + b.n.float*b.n.float*a.mom2) / + (n*n) + + 4.0*delta*(a.n.float*b.mom3 - b.n.float*a.mom3) / n + result.max = max(a.max, b.max) + result.min = min(a.min, b.min) + +proc `+=`*(a: var RunningStat, b: RunningStat) {.inline.} = + ## Adds the `RunningStat` `b` to `a`. + a = a + b + +proc `$`*(a: RunningStat): string = + ## Produces a string representation of the `RunningStat`. The exact + ## format is currently unspecified and subject to change. Currently + ## it contains: + ## + ## - the number of probes + ## - min, max values + ## - sum, mean and standard deviation. + result = "RunningStat(\n" + result.add " number of probes: " & $a.n & "\n" + result.add " max: " & $a.max & "\n" + result.add " min: " & $a.min & "\n" + result.add " sum: " & $a.sum & "\n" + result.add " mean: " & $a.mean & "\n" + result.add " std deviation: " & $a.standardDeviation & "\n" + result.add ")" + +# ---------------------- standalone array/seq stats --------------------- + +proc mean*[T](x: openArray[T]): float = + ## Computes the mean of `x`. + var rs: RunningStat + rs.push(x) + result = rs.mean() + +proc variance*[T](x: openArray[T]): float = + ## Computes the population variance of `x`. + var rs: RunningStat + rs.push(x) + result = rs.variance() + +proc varianceS*[T](x: openArray[T]): float = + ## Computes the sample variance of `x`. + var rs: RunningStat + rs.push(x) + result = rs.varianceS() + +proc standardDeviation*[T](x: openArray[T]): float = + ## Computes the population standard deviation of `x`. + var rs: RunningStat + rs.push(x) + result = rs.standardDeviation() + +proc standardDeviationS*[T](x: openArray[T]): float = + ## Computes the sample standard deviation of `x`. + var rs: RunningStat + rs.push(x) + result = rs.standardDeviationS() + +proc skewness*[T](x: openArray[T]): float = + ## Computes the population skewness of `x`. + var rs: RunningStat + rs.push(x) + result = rs.skewness() + +proc skewnessS*[T](x: openArray[T]): float = + ## Computes the sample skewness of `x`. + var rs: RunningStat + rs.push(x) + result = rs.skewnessS() + +proc kurtosis*[T](x: openArray[T]): float = + ## Computes the population kurtosis of `x`. + var rs: RunningStat + rs.push(x) + result = rs.kurtosis() + +proc kurtosisS*[T](x: openArray[T]): float = + ## Computes the sample kurtosis of `x`. + var rs: RunningStat + rs.push(x) + result = rs.kurtosisS() + +# ---------------------- Running Regression ----------------------------- + +proc clear*(r: var RunningRegress) = + ## Resets `r`. + r.x_stats.clear() + r.y_stats.clear() + r.s_xy = 0.0 + r.n = 0 + +proc push*(r: var RunningRegress, x, y: float) = + ## Pushes two values `x` and `y` for processing. + r.s_xy += (r.x_stats.mean() - x)*(r.y_stats.mean() - y) * + toFloat(r.n) / toFloat(r.n + 1) + r.x_stats.push(x) + r.y_stats.push(y) + inc(r.n) + +proc push*(r: var RunningRegress, x, y: int) {.inline.} = + ## Pushes two values `x` and `y` for processing. + ## + ## `x` and `y` are converted to `float` + ## and the other push operation is called. + r.push(toFloat(x), toFloat(y)) + +proc push*(r: var RunningRegress, x, y: openArray[float|int]) = + ## Pushes two sets of values `x` and `y` for processing. + assert(x.len == y.len) + for i in 0..<x.len: + r.push(x[i], y[i]) + +proc slope*(r: RunningRegress): float = + ## Computes the current slope of `r`. + let s_xx = r.x_stats.varianceS()*toFloat(r.n - 1) + result = r.s_xy / s_xx + +proc intercept*(r: RunningRegress): float = + ## Computes the current intercept of `r`. + result = r.y_stats.mean() - r.slope()*r.x_stats.mean() + +proc correlation*(r: RunningRegress): float = + ## Computes the current correlation of the two data + ## sets pushed into `r`. + let t = r.x_stats.standardDeviation() * r.y_stats.standardDeviation() + result = r.s_xy / (toFloat(r.n) * t) + +proc `+`*(a, b: RunningRegress): RunningRegress = + ## Combines two `RunningRegress` objects. + ## + ## Useful when performing parallel analysis of data series + ## and needing to re-combine parallel result sets + result.clear() + result.x_stats = a.x_stats + b.x_stats + result.y_stats = a.y_stats + b.y_stats + result.n = a.n + b.n + + let delta_x = b.x_stats.mean() - a.x_stats.mean() + let delta_y = b.y_stats.mean() - a.y_stats.mean() + result.s_xy = a.s_xy + b.s_xy + + toFloat(a.n*b.n)*delta_x*delta_y/toFloat(result.n) + +proc `+=`*(a: var RunningRegress, b: RunningRegress) = + ## Adds the `RunningRegress` `b` to `a`. + a = a + b + +{.pop.} +{.pop.} diff --git a/lib/pure/streams.nim b/lib/pure/streams.nim index e706f2016..56f49d7b1 100644 --- a/lib/pure/streams.nim +++ b/lib/pure/streams.nim @@ -1,341 +1,1521 @@ # # # Nim's Runtime Library -# (c) Copyright 2012 Andreas Rumpf +# (c) Copyright 2015 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # ## This module provides a stream interface and two implementations thereof: -## the `FileStream` and the `StringStream` which implement the stream -## interface for Nim file objects (`File`) and strings. Other modules -## may provide other implementations for this standard stream interface. +## the `FileStream <#FileStream>`_ and the `StringStream <#StringStream>`_ +## which implement the stream interface for Nim file objects (`File`) and +## strings. +## +## Other modules may provide other implementations for this standard +## stream interface. +## +## .. warning:: Due to the use of `pointer`, the `readData`, `peekData` and +## `writeData` interfaces are not available on the compile-time VM, and must +## be cast from a `ptr string` on the JS backend. However, `readDataStr` is +## available generally in place of `readData`. +## +## Basic usage +## =========== +## +## The basic flow of using this module is: +## +## 1. Open input stream +## 2. Read or write stream +## 3. Close stream +## +## StringStream example +## -------------------- +## +## ```Nim +## import std/streams +## +## var strm = newStringStream("""The first line +## the second line +## the third line""") +## +## var line = "" +## +## while strm.readLine(line): +## echo line +## +## # Output: +## # The first line +## # the second line +## # the third line +## +## strm.close() +## ``` +## +## FileStream example +## ------------------ +## +## Write file stream example: +## +## ```Nim +## import std/streams +## +## var strm = newFileStream("somefile.txt", fmWrite) +## var line = "" +## +## if not isNil(strm): +## strm.writeLine("The first line") +## strm.writeLine("the second line") +## strm.writeLine("the third line") +## strm.close() +## +## # Output (somefile.txt): +## # The first line +## # the second line +## # the third line +## ``` +## +## Read file stream example: +## +## ```Nim +## import std/streams +## +## var strm = newFileStream("somefile.txt", fmRead) +## var line = "" +## +## if not isNil(strm): +## while strm.readLine(line): +## echo line +## strm.close() +## +## # Output: +## # The first line +## # the second line +## # the third line +## ``` +## +## See also +## ======== +## * `asyncstreams module <asyncstreams.html>`_ +## * `io module <syncio.html>`_ for `FileMode enum <syncio.html#FileMode>`_ -include "system/inclrtl" +import std/private/since -proc newEIO(msg: string): ref IOError = +when defined(nimPreviewSlimSystem): + import std/syncio + export FileMode + +proc newEIO(msg: string): owned(ref IOError) = new(result) result.msg = msg type Stream* = ref StreamObj - StreamObj* = object of RootObj ## Stream interface that supports - ## writing or reading. Note that these fields - ## here shouldn't be used directly. They are - ## accessible so that a stream implementation - ## can override them. - closeImpl*: proc (s: Stream) {.nimcall, tags: [], gcsafe.} - atEndImpl*: proc (s: Stream): bool {.nimcall, tags: [], gcsafe.} - setPositionImpl*: proc (s: Stream, pos: int) {.nimcall, tags: [], gcsafe.} - getPositionImpl*: proc (s: Stream): int {.nimcall, tags: [], gcsafe.} - readDataImpl*: proc (s: Stream, buffer: pointer, - bufLen: int): int {.nimcall, tags: [ReadIOEffect], gcsafe.} - writeDataImpl*: proc (s: Stream, buffer: pointer, bufLen: int) {.nimcall, - tags: [WriteIOEffect], gcsafe.} - flushImpl*: proc (s: Stream) {.nimcall, tags: [WriteIOEffect], gcsafe.} - -{.deprecated: [PStream: Stream, TStream: StreamObj].} + ## All procedures of this module use this type. + ## Procedures don't directly use `StreamObj <#StreamObj>`_. + StreamObj* = object of RootObj + ## Stream interface that supports writing or reading. + ## + ## **Note:** + ## * That these fields here shouldn't be used directly. + ## They are accessible so that a stream implementation can override them. + closeImpl*: proc (s: Stream) + {.nimcall, raises: [IOError, OSError], tags: [WriteIOEffect], gcsafe.} + atEndImpl*: proc (s: Stream): bool + {.nimcall, raises: [Defect, IOError, OSError], tags: [], gcsafe.} + setPositionImpl*: proc (s: Stream, pos: int) + {.nimcall, raises: [Defect, IOError, OSError], tags: [], gcsafe.} + getPositionImpl*: proc (s: Stream): int + {.nimcall, raises: [Defect, IOError, OSError], tags: [], gcsafe.} + + readDataStrImpl*: proc (s: Stream, buffer: var string, slice: Slice[int]): int + {.nimcall, raises: [Defect, IOError, OSError], tags: [ReadIOEffect], gcsafe.} + + readLineImpl*: proc(s: Stream, line: var string): bool + {.nimcall, raises: [Defect, IOError, OSError], tags: [ReadIOEffect], gcsafe.} + + readDataImpl*: proc (s: Stream, buffer: pointer, bufLen: int): int + {.nimcall, raises: [Defect, IOError, OSError], tags: [ReadIOEffect], gcsafe.} + peekDataImpl*: proc (s: Stream, buffer: pointer, bufLen: int): int + {.nimcall, raises: [Defect, IOError, OSError], tags: [ReadIOEffect], gcsafe.} + writeDataImpl*: proc (s: Stream, buffer: pointer, bufLen: int) + {.nimcall, raises: [Defect, IOError, OSError], tags: [WriteIOEffect], gcsafe.} + + flushImpl*: proc (s: Stream) + {.nimcall, raises: [Defect, IOError, OSError], tags: [WriteIOEffect], gcsafe.} proc flush*(s: Stream) = - ## flushes the buffers that the stream `s` might use. + ## Flushes the buffers that the stream `s` might use. + ## + ## This procedure causes any unwritten data for that stream to be delivered + ## to the host environment to be written to the file. + ## + ## See also: + ## * `close proc <#close,Stream>`_ + runnableExamples: + from std/os import removeFile + + var strm = newFileStream("somefile.txt", fmWrite) + + doAssert "Before write:" & readFile("somefile.txt") == "Before write:" + strm.write("hello") + doAssert "After write:" & readFile("somefile.txt") == "After write:" + + strm.flush() + doAssert "After flush:" & readFile("somefile.txt") == "After flush:hello" + strm.write("HELLO") + strm.flush() + doAssert "After flush:" & readFile("somefile.txt") == "After flush:helloHELLO" + + strm.close() + doAssert "After close:" & readFile("somefile.txt") == "After close:helloHELLO" + removeFile("somefile.txt") + if not isNil(s.flushImpl): s.flushImpl(s) proc close*(s: Stream) = - ## closes the stream `s`. - if not isNil(s.closeImpl): s.closeImpl(s) + ## Closes the stream `s`. + ## + ## See also: + ## * `flush proc <#flush,Stream>`_ + runnableExamples: + block: + let strm = newStringStream("The first line\nthe second line\nthe third line") + ## do something... + strm.close() + + block: + let strm = newFileStream("amissingfile.txt") + # deferring works even if newFileStream fails + defer: strm.close() + if not isNil(strm): + ## do something... -proc close*(s, unused: Stream) {.deprecated.} = - ## closes the stream `s`. - s.closeImpl(s) + if not isNil(s) and not isNil(s.closeImpl): + s.closeImpl(s) proc atEnd*(s: Stream): bool = - ## checks if more data can be read from `f`. Returns true if all data has + ## Checks if more data can be read from `s`. Returns ``true`` if all data has ## been read. - result = s.atEndImpl(s) + runnableExamples: + var strm = newStringStream("The first line\nthe second line\nthe third line") + var line = "" + doAssert strm.atEnd() == false + while strm.readLine(line): + discard + doAssert strm.atEnd() == true + strm.close() -proc atEnd*(s, unused: Stream): bool {.deprecated.} = - ## checks if more data can be read from `f`. Returns true if all data has - ## been read. result = s.atEndImpl(s) proc setPosition*(s: Stream, pos: int) = - ## sets the position `pos` of the stream `s`. - s.setPositionImpl(s, pos) + ## Sets the position `pos` of the stream `s`. + runnableExamples: + var strm = newStringStream("The first line\nthe second line\nthe third line") + strm.setPosition(4) + doAssert strm.readLine() == "first line" + strm.setPosition(0) + doAssert strm.readLine() == "The first line" + strm.close() -proc setPosition*(s, unused: Stream, pos: int) {.deprecated.} = - ## sets the position `pos` of the stream `s`. s.setPositionImpl(s, pos) proc getPosition*(s: Stream): int = - ## retrieves the current position in the stream `s`. - result = s.getPositionImpl(s) + ## Retrieves the current position in the stream `s`. + runnableExamples: + var strm = newStringStream("The first line\nthe second line\nthe third line") + doAssert strm.getPosition() == 0 + discard strm.readLine() + doAssert strm.getPosition() == 15 + strm.close() -proc getPosition*(s, unused: Stream): int {.deprecated.} = - ## retrieves the current position in the stream `s`. result = s.getPositionImpl(s) proc readData*(s: Stream, buffer: pointer, bufLen: int): int = - ## low level proc that reads data into an untyped `buffer` of `bufLen` size. - result = s.readDataImpl(s, buffer, bufLen) + ## Low level proc that reads data into an untyped `buffer` of `bufLen` size. + ## + ## **JS note:** `buffer` is treated as a ``ptr string`` and written to between + ## ``0..<bufLen``. + runnableExamples: + var strm = newStringStream("abcde") + var buffer: array[6, char] + doAssert strm.readData(addr(buffer), 1024) == 5 + doAssert buffer == ['a', 'b', 'c', 'd', 'e', '\x00'] + doAssert strm.atEnd() == true + strm.close() -proc readData*(s, unused: Stream, buffer: pointer, - bufLen: int): int {.deprecated.} = - ## low level proc that reads data into an untyped `buffer` of `bufLen` size. result = s.readDataImpl(s, buffer, bufLen) +proc readDataStr*(s: Stream, buffer: var string, slice: Slice[int]): int = + ## Low level proc that reads data into a string ``buffer`` at ``slice``. + runnableExamples: + var strm = newStringStream("abcde") + var buffer = "12345" + doAssert strm.readDataStr(buffer, 0..3) == 4 + doAssert buffer == "abcd5" + strm.close() + + if s.readDataStrImpl != nil: + result = s.readDataStrImpl(s, buffer, slice) + else: + # fallback + when declared(prepareMutation): + # buffer might potentially be a CoW literal with ARC + prepareMutation(buffer) + result = s.readData(addr buffer[slice.a], slice.b + 1 - slice.a) + +template jsOrVmBlock(caseJsOrVm, caseElse: untyped): untyped = + when nimvm: + block: + caseJsOrVm + else: + block: + when defined(js) or defined(nimscript): + # nimscript has to be here to avoid semantic checking of caseElse + caseJsOrVm + else: + caseElse + +when (NimMajor, NimMinor) >= (1, 3) or not defined(js): + proc readAll*(s: Stream): string = + ## Reads all available data. + runnableExamples: + var strm = newStringStream("The first line\nthe second line\nthe third line") + doAssert strm.readAll() == "The first line\nthe second line\nthe third line" + doAssert strm.atEnd() == true + strm.close() + + const bufferSize = 1024 + jsOrVmBlock: + var buffer2: string + buffer2.setLen(bufferSize) + while true: + let readBytes = readDataStr(s, buffer2, 0..<bufferSize) + if readBytes == 0: + break + let prevLen = result.len + result.setLen(prevLen + readBytes) + result[prevLen..<prevLen+readBytes] = buffer2[0..<readBytes] + if readBytes < bufferSize: + break + do: # not JS or VM + var buffer {.noinit.}: array[bufferSize, char] + while true: + let readBytes = readData(s, addr(buffer[0]), bufferSize) + if readBytes == 0: + break + let prevLen = result.len + result.setLen(prevLen + readBytes) + copyMem(addr(result[prevLen]), addr(buffer[0]), readBytes) + if readBytes < bufferSize: + break + +proc peekData*(s: Stream, buffer: pointer, bufLen: int): int = + ## Low level proc that reads data into an untyped `buffer` of `bufLen` size + ## without moving stream position. + ## + ## **JS note:** `buffer` is treated as a ``ptr string`` and written to between + ## ``0..<bufLen``. + runnableExamples: + var strm = newStringStream("abcde") + var buffer: array[6, char] + doAssert strm.peekData(addr(buffer), 1024) == 5 + doAssert buffer == ['a', 'b', 'c', 'd', 'e', '\x00'] + doAssert strm.atEnd() == false + strm.close() + + result = s.peekDataImpl(s, buffer, bufLen) + proc writeData*(s: Stream, buffer: pointer, bufLen: int) = - ## low level proc that writes an untyped `buffer` of `bufLen` size + ## Low level proc that writes an untyped `buffer` of `bufLen` size ## to the stream `s`. - s.writeDataImpl(s, buffer, bufLen) + ## + ## **JS note:** `buffer` is treated as a ``ptr string`` and read between + ## ``0..<bufLen``. + runnableExamples: + ## writeData + var strm = newStringStream("") + var buffer = ['a', 'b', 'c', 'd', 'e'] + strm.writeData(addr(buffer), sizeof(buffer)) + doAssert strm.atEnd() == true + ## readData + strm.setPosition(0) + var buffer2: array[6, char] + doAssert strm.readData(addr(buffer2), sizeof(buffer2)) == 5 + doAssert buffer2 == ['a', 'b', 'c', 'd', 'e', '\x00'] + strm.close() -proc writeData*(s, unused: Stream, buffer: pointer, - bufLen: int) {.deprecated.} = - ## low level proc that writes an untyped `buffer` of `bufLen` size - ## to the stream `s`. s.writeDataImpl(s, buffer, bufLen) -proc write*[T](s: Stream, x: T) = - ## generic write procedure. Writes `x` to the stream `s`. Implementation: +proc write*[T](s: Stream, x: T) = + ## Generic write procedure. Writes `x` to the stream `s`. Implementation: ## - ## .. code-block:: Nim + ## **Note:** Not available for JS backend. Use `write(Stream, string) + ## <#write,Stream,string>`_ for now. ## - ## s.writeData(s, addr(x), sizeof(x)) - var y: T - shallowCopy(y, x) - writeData(s, addr(y), sizeof(y)) + ## ```Nim + ## s.writeData(s, unsafeAddr(x), sizeof(x)) + ## ``` + runnableExamples: + var strm = newStringStream("") + strm.write("abcde") + strm.setPosition(0) + doAssert strm.readAll() == "abcde" + strm.close() + + writeData(s, unsafeAddr(x), sizeof(x)) -proc write*(s: Stream, x: string) = - ## writes the string `x` to the the stream `s`. No length field or +proc write*(s: Stream, x: string) = + ## Writes the string `x` to the stream `s`. No length field or ## terminating zero is written. - writeData(s, cstring(x), x.len) + runnableExamples: + var strm = newStringStream("") + strm.write("THE FIRST LINE") + strm.setPosition(0) + doAssert strm.readLine() == "THE FIRST LINE" + strm.close() -proc writeln*(s: Stream, args: varargs[string, `$`]) = - ## writes one or more strings to the the stream `s` followed + when nimvm: + writeData(s, cstring(x), x.len) + else: + if x.len > 0: + when defined(js): + var x = x + writeData(s, addr(x), x.len) + else: + writeData(s, cstring(x), x.len) + +proc write*(s: Stream, args: varargs[string, `$`]) = + ## Writes one or more strings to the the stream. No length fields or + ## terminating zeros are written. + runnableExamples: + var strm = newStringStream("") + strm.write(1, 2, 3, 4) + strm.setPosition(0) + doAssert strm.readLine() == "1234" + strm.close() + + for str in args: write(s, str) + +proc writeLine*(s: Stream, args: varargs[string, `$`]) = + ## Writes one or more strings to the the stream `s` followed ## by a new line. No length field or terminating zero is written. + runnableExamples: + var strm = newStringStream("") + strm.writeLine(1, 2) + strm.writeLine(3, 4) + strm.setPosition(0) + doAssert strm.readAll() == "12\n34\n" + strm.close() + for str in args: write(s, str) write(s, "\n") -proc read[T](s: Stream, result: var T) = - ## generic read procedure. Reads `result` from the stream `s`. +proc read*[T](s: Stream, result: var T) = + ## Generic read procedure. Reads `result` from the stream `s`. + ## + ## **Note:** Not available for JS backend. Use `readStr <#readStr,Stream,int>`_ for now. + runnableExamples: + var strm = newStringStream("012") + ## readInt + var i: int8 + strm.read(i) + doAssert i == 48 + ## readData + var buffer: array[2, char] + strm.read(buffer) + doAssert buffer == ['1', '2'] + strm.close() + if readData(s, addr(result), sizeof(T)) != sizeof(T): raise newEIO("cannot read from stream") +proc peek*[T](s: Stream, result: var T) = + ## Generic peek procedure. Peeks `result` from the stream `s`. + ## + ## **Note:** Not available for JS backend. Use `peekStr <#peekStr,Stream,int>`_ for now. + runnableExamples: + var strm = newStringStream("012") + ## peekInt + var i: int8 + strm.peek(i) + doAssert i == 48 + ## peekData + var buffer: array[2, char] + strm.peek(buffer) + doAssert buffer == ['0', '1'] + strm.close() + + if peekData(s, addr(result), sizeof(T)) != sizeof(T): + raise newEIO("cannot read from stream") + proc readChar*(s: Stream): char = - ## reads a char from the stream `s`. Raises `EIO` if an error occurred. - ## Returns '\0' as an EOF marker. - if readData(s, addr(result), sizeof(result)) != 1: result = '\0' + ## Reads a char from the stream `s`. + ## + ## Raises `IOError` if an error occurred. + ## Returns '\\0' as an EOF marker. + runnableExamples: + var strm = newStringStream("12\n3") + doAssert strm.readChar() == '1' + doAssert strm.readChar() == '2' + doAssert strm.readChar() == '\n' + doAssert strm.readChar() == '3' + doAssert strm.readChar() == '\x00' + strm.close() + + jsOrVmBlock: + var str = " " + if readDataStr(s, str, 0..0) != 1: result = '\0' + else: result = str[0] + do: + if readData(s, addr(result), sizeof(result)) != 1: result = '\0' + +proc peekChar*(s: Stream): char = + ## Peeks a char from the stream `s`. Raises `IOError` if an error occurred. + ## Returns '\\0' as an EOF marker. + runnableExamples: + var strm = newStringStream("12\n3") + doAssert strm.peekChar() == '1' + doAssert strm.peekChar() == '1' + discard strm.readAll() + doAssert strm.peekChar() == '\x00' + strm.close() + + when defined(js): + var str = " " + if peekData(s, addr(str), sizeof(result)) != 1: result = '\0' + else: result = str[0] + else: + if peekData(s, addr(result), sizeof(result)) != 1: result = '\0' + +proc readBool*(s: Stream): bool = + ## Reads a bool from the stream `s`. + ## + ## A bool is one byte long and it is `true` for every non-zero + ## (`0000_0000`) value. + ## Raises `IOError` if an error occurred. + ## + ## **Note:** Not available for JS backend. Use `readStr <#readStr,Stream,int>`_ for now. + runnableExamples: + var strm = newStringStream() + ## setup for reading data + strm.write(true) + strm.write(false) + strm.flush() + strm.setPosition(0) + ## get data + doAssert strm.readBool() == true + doAssert strm.readBool() == false + doAssertRaises(IOError): discard strm.readBool() + strm.close() + + var t: byte + read(s, t) + result = t != 0.byte + +proc peekBool*(s: Stream): bool = + ## Peeks a bool from the stream `s`. + ## + ## A bool is one byte long and it is `true` for every non-zero + ## (`0000_0000`) value. + ## Raises `IOError` if an error occurred. + ## + ## **Note:** Not available for JS backend. Use `peekStr <#peekStr,Stream,int>`_ for now. + runnableExamples: + var strm = newStringStream() + ## setup for reading data + strm.write(true) + strm.write(false) + strm.flush() + strm.setPosition(0) + ## get data + doAssert strm.peekBool() == true + ## not false + doAssert strm.peekBool() == true + doAssert strm.readBool() == true + doAssert strm.peekBool() == false + strm.close() + + var t: byte + peek(s, t) + result = t != 0.byte + +proc readInt8*(s: Stream): int8 = + ## Reads an int8 from the stream `s`. Raises `IOError` if an error occurred. + ## + ## **Note:** Not available for JS backend. Use `readStr <#readStr,Stream,int>`_ for now. + runnableExamples: + var strm = newStringStream() + ## setup for reading data + strm.write(1'i8) + strm.write(2'i8) + strm.flush() + strm.setPosition(0) + ## get data + doAssert strm.readInt8() == 1'i8 + doAssert strm.readInt8() == 2'i8 + doAssertRaises(IOError): discard strm.readInt8() + strm.close() -proc readBool*(s: Stream): bool = - ## reads a bool from the stream `s`. Raises `EIO` if an error occurred. read(s, result) -proc readInt8*(s: Stream): int8 = - ## reads an int8 from the stream `s`. Raises `EIO` if an error occurred. +proc peekInt8*(s: Stream): int8 = + ## Peeks an int8 from the stream `s`. Raises `IOError` if an error occurred. + ## + ## **Note:** Not available for JS backend. Use `peekStr <#peekStr,Stream,int>`_ for now. + runnableExamples: + var strm = newStringStream() + ## setup for reading data + strm.write(1'i8) + strm.write(2'i8) + strm.flush() + strm.setPosition(0) + ## get data + doAssert strm.peekInt8() == 1'i8 + ## not 2'i8 + doAssert strm.peekInt8() == 1'i8 + doAssert strm.readInt8() == 1'i8 + doAssert strm.peekInt8() == 2'i8 + strm.close() + + peek(s, result) + +proc readInt16*(s: Stream): int16 = + ## Reads an int16 from the stream `s`. Raises `IOError` if an error occurred. + ## + ## **Note:** Not available for JS backend. Use `readStr <#readStr,Stream,int>`_ for now. + runnableExamples: + var strm = newStringStream() + ## setup for reading data + strm.write(1'i16) + strm.write(2'i16) + strm.flush() + strm.setPosition(0) + ## get data + doAssert strm.readInt16() == 1'i16 + doAssert strm.readInt16() == 2'i16 + doAssertRaises(IOError): discard strm.readInt16() + strm.close() + read(s, result) -proc readInt16*(s: Stream): int16 = - ## reads an int16 from the stream `s`. Raises `EIO` if an error occurred. +proc peekInt16*(s: Stream): int16 = + ## Peeks an int16 from the stream `s`. Raises `IOError` if an error occurred. + ## + ## **Note:** Not available for JS backend. Use `peekStr <#peekStr,Stream,int>`_ for now. + runnableExamples: + var strm = newStringStream() + ## setup for reading data + strm.write(1'i16) + strm.write(2'i16) + strm.flush() + strm.setPosition(0) + ## get data + doAssert strm.peekInt16() == 1'i16 + ## not 2'i16 + doAssert strm.peekInt16() == 1'i16 + doAssert strm.readInt16() == 1'i16 + doAssert strm.peekInt16() == 2'i16 + strm.close() + + peek(s, result) + +proc readInt32*(s: Stream): int32 = + ## Reads an int32 from the stream `s`. Raises `IOError` if an error occurred. + ## + ## **Note:** Not available for JS backend. Use `readStr <#readStr,Stream,int>`_ for now. + runnableExamples: + var strm = newStringStream() + ## setup for reading data + strm.write(1'i32) + strm.write(2'i32) + strm.flush() + strm.setPosition(0) + ## get data + doAssert strm.readInt32() == 1'i32 + doAssert strm.readInt32() == 2'i32 + doAssertRaises(IOError): discard strm.readInt32() + strm.close() + + read(s, result) + +proc peekInt32*(s: Stream): int32 = + ## Peeks an int32 from the stream `s`. Raises `IOError` if an error occurred. + ## + ## **Note:** Not available for JS backend. Use `peekStr <#peekStr,Stream,int>`_ for now. + runnableExamples: + var strm = newStringStream() + ## setup for reading data + strm.write(1'i32) + strm.write(2'i32) + strm.flush() + strm.setPosition(0) + ## get data + doAssert strm.peekInt32() == 1'i32 + ## not 2'i32 + doAssert strm.peekInt32() == 1'i32 + doAssert strm.readInt32() == 1'i32 + doAssert strm.peekInt32() == 2'i32 + strm.close() + + peek(s, result) + +proc readInt64*(s: Stream): int64 = + ## Reads an int64 from the stream `s`. Raises `IOError` if an error occurred. + ## + ## **Note:** Not available for JS backend. Use `readStr <#readStr,Stream,int>`_ for now. + runnableExamples: + var strm = newStringStream() + ## setup for reading data + strm.write(1'i64) + strm.write(2'i64) + strm.flush() + strm.setPosition(0) + ## get data + doAssert strm.readInt64() == 1'i64 + doAssert strm.readInt64() == 2'i64 + doAssertRaises(IOError): discard strm.readInt64() + strm.close() + + read(s, result) + +proc peekInt64*(s: Stream): int64 = + ## Peeks an int64 from the stream `s`. Raises `IOError` if an error occurred. + ## + ## **Note:** Not available for JS backend. Use `peekStr <#peekStr,Stream,int>`_ for now. + runnableExamples: + var strm = newStringStream() + ## setup for reading data + strm.write(1'i64) + strm.write(2'i64) + strm.flush() + strm.setPosition(0) + ## get data + doAssert strm.peekInt64() == 1'i64 + ## not 2'i64 + doAssert strm.peekInt64() == 1'i64 + doAssert strm.readInt64() == 1'i64 + doAssert strm.peekInt64() == 2'i64 + strm.close() + + peek(s, result) + +proc readUint8*(s: Stream): uint8 = + ## Reads an uint8 from the stream `s`. Raises `IOError` if an error occurred. + ## + ## **Note:** Not available for JS backend. Use `readStr <#readStr,Stream,int>`_ for now. + runnableExamples: + var strm = newStringStream() + ## setup for reading data + strm.write(1'u8) + strm.write(2'u8) + strm.flush() + strm.setPosition(0) + ## get data + doAssert strm.readUint8() == 1'u8 + doAssert strm.readUint8() == 2'u8 + doAssertRaises(IOError): discard strm.readUint8() + strm.close() + read(s, result) -proc readInt32*(s: Stream): int32 = - ## reads an int32 from the stream `s`. Raises `EIO` if an error occurred. +proc peekUint8*(s: Stream): uint8 = + ## Peeks an uint8 from the stream `s`. Raises `IOError` if an error occurred. + ## + ## **Note:** Not available for JS backend. Use `peekStr <#peekStr,Stream,int>`_ for now. + runnableExamples: + var strm = newStringStream() + ## setup for reading data + strm.write(1'u8) + strm.write(2'u8) + strm.flush() + strm.setPosition(0) + ## get data + doAssert strm.peekUint8() == 1'u8 + ## not 2'u8 + doAssert strm.peekUint8() == 1'u8 + doAssert strm.readUint8() == 1'u8 + doAssert strm.peekUint8() == 2'u8 + strm.close() + + peek(s, result) + +proc readUint16*(s: Stream): uint16 = + ## Reads an uint16 from the stream `s`. Raises `IOError` if an error occurred. + ## + ## **Note:** Not available for JS backend. Use `readStr <#readStr,Stream,int>`_ for now. + runnableExamples: + var strm = newStringStream() + ## setup for reading data + strm.write(1'u16) + strm.write(2'u16) + strm.flush() + strm.setPosition(0) + ## get data + doAssert strm.readUint16() == 1'u16 + doAssert strm.readUint16() == 2'u16 + doAssertRaises(IOError): discard strm.readUint16() + strm.close() + read(s, result) -proc readInt64*(s: Stream): int64 = - ## reads an int64 from the stream `s`. Raises `EIO` if an error occurred. +proc peekUint16*(s: Stream): uint16 = + ## Peeks an uint16 from the stream `s`. Raises `IOError` if an error occurred. + ## + ## **Note:** Not available for JS backend. Use `peekStr <#peekStr,Stream,int>`_ for now. + runnableExamples: + var strm = newStringStream() + ## setup for reading data + strm.write(1'u16) + strm.write(2'u16) + strm.flush() + strm.setPosition(0) + ## get data + doAssert strm.peekUint16() == 1'u16 + ## not 2'u16 + doAssert strm.peekUint16() == 1'u16 + doAssert strm.readUint16() == 1'u16 + doAssert strm.peekUint16() == 2'u16 + strm.close() + + peek(s, result) + +proc readUint32*(s: Stream): uint32 = + ## Reads an uint32 from the stream `s`. Raises `IOError` if an error occurred. + ## + ## **Note:** Not available for JS backend. Use `readStr <#readStr,Stream,int>`_ for now. + runnableExamples: + var strm = newStringStream() + ## setup for reading data + strm.write(1'u32) + strm.write(2'u32) + strm.flush() + strm.setPosition(0) + + ## get data + doAssert strm.readUint32() == 1'u32 + doAssert strm.readUint32() == 2'u32 + doAssertRaises(IOError): discard strm.readUint32() + strm.close() + read(s, result) -proc readFloat32*(s: Stream): float32 = - ## reads a float32 from the stream `s`. Raises `EIO` if an error occurred. +proc peekUint32*(s: Stream): uint32 = + ## Peeks an uint32 from the stream `s`. Raises `IOError` if an error occurred. + ## + ## **Note:** Not available for JS backend. Use `peekStr <#peekStr,Stream,int>`_ for now. + runnableExamples: + var strm = newStringStream() + ## setup for reading data + strm.write(1'u32) + strm.write(2'u32) + strm.flush() + strm.setPosition(0) + ## get data + doAssert strm.peekUint32() == 1'u32 + ## not 2'u32 + doAssert strm.peekUint32() == 1'u32 + doAssert strm.readUint32() == 1'u32 + doAssert strm.peekUint32() == 2'u32 + strm.close() + + peek(s, result) + +proc readUint64*(s: Stream): uint64 = + ## Reads an uint64 from the stream `s`. Raises `IOError` if an error occurred. + ## + ## **Note:** Not available for JS backend. Use `readStr <#readStr,Stream,int>`_ for now. + runnableExamples: + var strm = newStringStream() + ## setup for reading data + strm.write(1'u64) + strm.write(2'u64) + strm.flush() + strm.setPosition(0) + ## get data + doAssert strm.readUint64() == 1'u64 + doAssert strm.readUint64() == 2'u64 + doAssertRaises(IOError): discard strm.readUint64() + strm.close() + read(s, result) -proc readFloat64*(s: Stream): float64 = - ## reads a float64 from the stream `s`. Raises `EIO` if an error occurred. +proc peekUint64*(s: Stream): uint64 = + ## Peeks an uint64 from the stream `s`. Raises `IOError` if an error occurred. + ## + ## **Note:** Not available for JS backend. Use `peekStr <#peekStr,Stream,int>`_ for now. + runnableExamples: + var strm = newStringStream() + ## setup for reading data + strm.write(1'u64) + strm.write(2'u64) + strm.flush() + strm.setPosition(0) + ## get data + doAssert strm.peekUint64() == 1'u64 + ## not 2'u64 + doAssert strm.peekUint64() == 1'u64 + doAssert strm.readUint64() == 1'u64 + doAssert strm.peekUint64() == 2'u64 + strm.close() + + peek(s, result) + +proc readFloat32*(s: Stream): float32 = + ## Reads a float32 from the stream `s`. Raises `IOError` if an error occurred. + ## + ## **Note:** Not available for JS backend. Use `readStr <#readStr,Stream,int>`_ for now. + runnableExamples: + var strm = newStringStream() + ## setup for reading data + strm.write(1'f32) + strm.write(2'f32) + strm.flush() + strm.setPosition(0) + ## get data + doAssert strm.readFloat32() == 1'f32 + doAssert strm.readFloat32() == 2'f32 + doAssertRaises(IOError): discard strm.readFloat32() + strm.close() + read(s, result) -proc readStr*(s: Stream, length: int): TaintedString = - ## reads a string of length `length` from the stream `s`. Raises `EIO` if +proc peekFloat32*(s: Stream): float32 = + ## Peeks a float32 from the stream `s`. Raises `IOError` if an error occurred. + ## + ## **Note:** Not available for JS backend. Use `peekStr <#peekStr,Stream,int>`_ for now. + runnableExamples: + var strm = newStringStream() + ## setup for reading data + strm.write(1'f32) + strm.write(2'f32) + strm.flush() + strm.setPosition(0) + ## get data + doAssert strm.peekFloat32() == 1'f32 + ## not 2'f32 + doAssert strm.peekFloat32() == 1'f32 + doAssert strm.readFloat32() == 1'f32 + doAssert strm.peekFloat32() == 2'f32 + strm.close() + + peek(s, result) + +proc readFloat64*(s: Stream): float64 = + ## Reads a float64 from the stream `s`. Raises `IOError` if an error occurred. + ## + ## **Note:** Not available for JS backend. Use `readStr <#readStr,Stream,int>`_ for now. + runnableExamples: + var strm = newStringStream() + ## setup for reading data + strm.write(1'f64) + strm.write(2'f64) + strm.flush() + strm.setPosition(0) + ## get data + doAssert strm.readFloat64() == 1'f64 + doAssert strm.readFloat64() == 2'f64 + doAssertRaises(IOError): discard strm.readFloat64() + strm.close() + + read(s, result) + +proc peekFloat64*(s: Stream): float64 = + ## Peeks a float64 from the stream `s`. Raises `IOError` if an error occurred. + ## + ## **Note:** Not available for JS backend. Use `peekStr <#peekStr,Stream,int>`_ for now. + runnableExamples: + var strm = newStringStream() + ## setup for reading data + strm.write(1'f64) + strm.write(2'f64) + strm.flush() + strm.setPosition(0) + ## get data + doAssert strm.peekFloat64() == 1'f64 + ## not 2'f64 + doAssert strm.peekFloat64() == 1'f64 + doAssert strm.readFloat64() == 1'f64 + doAssert strm.peekFloat64() == 2'f64 + strm.close() + + peek(s, result) + +proc readStrPrivate(s: Stream, length: int, str: var string) = + if length > len(str): setLen(str, length) + var L: int + when nimvm: + L = readDataStr(s, str, 0..length-1) + else: + when defined(js): + L = readData(s, addr(str), length) + else: + L = readData(s, cstring(str), length) + if L != len(str): setLen(str, L) + +proc readStr*(s: Stream, length: int, str: var string) {.since: (1, 3).} = + ## Reads a string of length `length` from the stream `s`. Raises `IOError` if ## an error occurred. - result = newString(length).TaintedString - var L = readData(s, addr(string(result)[0]), length) - if L != length: setLen(result.string, L) + readStrPrivate(s, length, str) + +proc readStr*(s: Stream, length: int): string = + ## Reads a string of length `length` from the stream `s`. Raises `IOError` if + ## an error occurred. + runnableExamples: + var strm = newStringStream("abcde") + doAssert strm.readStr(2) == "ab" + doAssert strm.readStr(2) == "cd" + doAssert strm.readStr(2) == "e" + doAssert strm.readStr(2) == "" + strm.close() + result = newString(length) + readStrPrivate(s, length, result) + +proc peekStrPrivate(s: Stream, length: int, str: var string) = + if length > len(str): setLen(str, length) + when defined(js): + let L = peekData(s, addr(str), length) + else: + let L = peekData(s, cstring(str), length) + if L != len(str): setLen(str, L) + +proc peekStr*(s: Stream, length: int, str: var string) {.since: (1, 3).} = + ## Peeks a string of length `length` from the stream `s`. Raises `IOError` if + ## an error occurred. + peekStrPrivate(s, length, str) + +proc peekStr*(s: Stream, length: int): string = + ## Peeks a string of length `length` from the stream `s`. Raises `IOError` if + ## an error occurred. + runnableExamples: + var strm = newStringStream("abcde") + doAssert strm.peekStr(2) == "ab" + ## not "cd + doAssert strm.peekStr(2) == "ab" + doAssert strm.readStr(2) == "ab" + doAssert strm.peekStr(2) == "cd" + strm.close() + result = newString(length) + peekStrPrivate(s, length, result) + +proc readLine*(s: Stream, line: var string): bool = + ## Reads a line of text from the stream `s` into `line`. `line` must not be + ## ``nil``! May throw an IO exception. + ## + ## A line of text may be delimited by ``LF`` or ``CRLF``. + ## The newline character(s) are not part of the returned string. + ## Returns ``false`` if the end of the file has been reached, ``true`` + ## otherwise. If ``false`` is returned `line` contains no new data. + ## + ## See also: + ## * `readLine(Stream) proc <#readLine,Stream>`_ + ## * `peekLine(Stream) proc <#peekLine,Stream>`_ + ## * `peekLine(Stream, string) proc <#peekLine,Stream,string>`_ + runnableExamples: + var strm = newStringStream("The first line\nthe second line\nthe third line") + var line = "" + doAssert strm.readLine(line) == true + doAssert line == "The first line" + doAssert strm.readLine(line) == true + doAssert line == "the second line" + doAssert strm.readLine(line) == true + doAssert line == "the third line" + doAssert strm.readLine(line) == false + doAssert line == "" + strm.close() + + if s.readLineImpl != nil: + result = s.readLineImpl(s, line) + else: + # fallback + line.setLen(0) + while true: + var c = readChar(s) + if c == '\c': + c = readChar(s) + break + elif c == '\L': break + elif c == '\0': + if line.len > 0: break + else: return false + line.add(c) + result = true -proc readLine*(s: Stream, line: var TaintedString): bool = - ## reads a line of text from the stream `s` into `line`. `line` must not be +proc peekLine*(s: Stream, line: var string): bool = + ## Peeks a line of text from the stream `s` into `line`. `line` must not be ## ``nil``! May throw an IO exception. + ## ## A line of text may be delimited by ``CR``, ``LF`` or ## ``CRLF``. The newline character(s) are not part of the returned string. ## Returns ``false`` if the end of the file has been reached, ``true`` ## otherwise. If ``false`` is returned `line` contains no new data. - line.string.setLen(0) - while true: - var c = readChar(s) - if c == '\c': - c = readChar(s) - break - elif c == '\L': break - elif c == '\0': - if line.len > 0: break - else: return false - line.string.add(c) - result = true - -proc readLine*(s: Stream): TaintedString = - ## Reads a line from a stream `s`. Note: This is not very efficient. Raises - ## `EIO` if an error occurred. - result = TaintedString"" + ## + ## See also: + ## * `readLine(Stream) proc <#readLine,Stream>`_ + ## * `readLine(Stream, string) proc <#readLine,Stream,string>`_ + ## * `peekLine(Stream) proc <#peekLine,Stream>`_ + runnableExamples: + var strm = newStringStream("The first line\nthe second line\nthe third line") + var line = "" + doAssert strm.peekLine(line) == true + doAssert line == "The first line" + doAssert strm.peekLine(line) == true + ## not "the second line" + doAssert line == "The first line" + doAssert strm.readLine(line) == true + doAssert line == "The first line" + doAssert strm.peekLine(line) == true + doAssert line == "the second line" + strm.close() + + let pos = getPosition(s) + defer: setPosition(s, pos) + result = readLine(s, line) + +proc readLine*(s: Stream): string = + ## Reads a line from a stream `s`. Raises `IOError` if an error occurred. + ## + ## **Note:** This is not very efficient. + ## + ## See also: + ## * `readLine(Stream, string) proc <#readLine,Stream,string>`_ + ## * `peekLine(Stream) proc <#peekLine,Stream>`_ + ## * `peekLine(Stream, string) proc <#peekLine,Stream,string>`_ + runnableExamples: + var strm = newStringStream("The first line\nthe second line\nthe third line") + doAssert strm.readLine() == "The first line" + doAssert strm.readLine() == "the second line" + doAssert strm.readLine() == "the third line" + doAssertRaises(IOError): discard strm.readLine() + strm.close() + + result = "" + if s.atEnd: + raise newEIO("cannot read from stream") while true: var c = readChar(s) - if c == '\c': + if c == '\c': c = readChar(s) break if c == '\L' or c == '\0': break else: - result.string.add(c) + result.add(c) + +proc peekLine*(s: Stream): string = + ## Peeks a line from a stream `s`. Raises `IOError` if an error occurred. + ## + ## **Note:** This is not very efficient. + ## + ## See also: + ## * `readLine(Stream) proc <#readLine,Stream>`_ + ## * `readLine(Stream, string) proc <#readLine,Stream,string>`_ + ## * `peekLine(Stream, string) proc <#peekLine,Stream,string>`_ + runnableExamples: + var strm = newStringStream("The first line\nthe second line\nthe third line") + doAssert strm.peekLine() == "The first line" + ## not "the second line" + doAssert strm.peekLine() == "The first line" + doAssert strm.readLine() == "The first line" + doAssert strm.peekLine() == "the second line" + strm.close() + + let pos = getPosition(s) + defer: setPosition(s, pos) + result = readLine(s) + +iterator lines*(s: Stream): string = + ## Iterates over every line in the stream. + ## The iteration is based on ``readLine``. + ## + ## See also: + ## * `readLine(Stream) proc <#readLine,Stream>`_ + ## * `readLine(Stream, string) proc <#readLine,Stream,string>`_ + runnableExamples: + var strm = newStringStream("The first line\nthe second line\nthe third line") + var lines: seq[string] + for line in strm.lines(): + lines.add line + doAssert lines == @["The first line", "the second line", "the third line"] + strm.close() + + var line: string + while s.readLine(line): + yield line type - StringStream* = ref StringStreamObj ## a stream that encapsulates a string + StringStream* = ref StringStreamObj + ## A stream that encapsulates a string. StringStreamObj* = object of StreamObj - data*: string + ## A string stream object. + data*: string ## A string data. + ## This is updated when called `writeLine` etc. pos: int -{.deprecated: [PStringStream: StringStream, TStringStream: StringStreamObj].} - -proc ssAtEnd(s: Stream): bool = - var s = StringStream(s) - return s.pos >= s.data.len - -proc ssSetPosition(s: Stream, pos: int) = - var s = StringStream(s) - s.pos = clamp(pos, 0, s.data.high) - -proc ssGetPosition(s: Stream): int = - var s = StringStream(s) - return s.pos - -proc ssReadData(s: Stream, buffer: pointer, bufLen: int): int = - var s = StringStream(s) - result = min(bufLen, s.data.len - s.pos) - if result > 0: - copyMem(buffer, addr(s.data[s.pos]), result) - inc(s.pos, result) - -proc ssWriteData(s: Stream, buffer: pointer, bufLen: int) = - var s = StringStream(s) - if bufLen <= 0: - return - if s.pos + bufLen > s.data.len: - setLen(s.data, s.pos + bufLen) - copyMem(addr(s.data[s.pos]), buffer, bufLen) - inc(s.pos, bufLen) - -proc ssClose(s: Stream) = - var s = StringStream(s) - s.data = nil - -proc newStringStream*(s: string = ""): StringStream = - ## creates a new stream from the string `s`. - new(result) - result.data = s - result.pos = 0 - result.closeImpl = ssClose - result.atEndImpl = ssAtEnd - result.setPositionImpl = ssSetPosition - result.getPositionImpl = ssGetPosition - result.readDataImpl = ssReadData - result.writeDataImpl = ssWriteData +when (NimMajor, NimMinor) < (1, 3) and defined(js): + proc ssAtEnd(s: Stream): bool {.compileTime.} = + var s = StringStream(s) + return s.pos >= s.data.len -when not defined(js): + proc ssSetPosition(s: Stream, pos: int) {.compileTime.} = + var s = StringStream(s) + s.pos = clamp(pos, 0, s.data.len) - type - FileStream* = ref FileStreamObj ## a stream that encapsulates a `TFile` - FileStreamObj* = object of Stream - f: File - {.deprecated: [PFileStream: FileStream, TFileStream: FileStreamObj].} - - proc fsClose(s: Stream) = - if FileStream(s).f != nil: - close(FileStream(s).f) - FileStream(s).f = nil - proc fsFlush(s: Stream) = flushFile(FileStream(s).f) - proc fsAtEnd(s: Stream): bool = return endOfFile(FileStream(s).f) - proc fsSetPosition(s: Stream, pos: int) = setFilePos(FileStream(s).f, pos) - proc fsGetPosition(s: Stream): int = return int(getFilePos(FileStream(s).f)) - - proc fsReadData(s: Stream, buffer: pointer, bufLen: int): int = - result = readBuffer(FileStream(s).f, buffer, bufLen) - - proc fsWriteData(s: Stream, buffer: pointer, bufLen: int) = - if writeBuffer(FileStream(s).f, buffer, bufLen) != bufLen: - raise newEIO("cannot write to stream") - - proc newFileStream*(f: File): FileStream = - ## creates a new stream from the file `f`. + proc ssGetPosition(s: Stream): int {.compileTime.} = + var s = StringStream(s) + return s.pos + + proc ssReadDataStr(s: Stream, buffer: var string, slice: Slice[int]): int {.compileTime.} = + var s = StringStream(s) + result = min(slice.b + 1 - slice.a, s.data.len - s.pos) + if result > 0: + buffer[slice.a..<slice.a+result] = s.data[s.pos..<s.pos+result] + inc(s.pos, result) + else: + result = 0 + + proc ssClose(s: Stream) {.compileTime.} = + var s = StringStream(s) + s.data = "" + + proc newStringStream*(s: string = ""): owned StringStream {.compileTime.} = new(result) - result.f = f - result.closeImpl = fsClose - result.atEndImpl = fsAtEnd - result.setPositionImpl = fsSetPosition - result.getPositionImpl = fsGetPosition - result.readDataImpl = fsReadData - result.writeDataImpl = fsWriteData - result.flushImpl = fsFlush - - proc newFileStream*(filename: string, mode: FileMode): FileStream = - ## creates a new stream from the file named `filename` with the mode `mode`. - ## If the file cannot be opened, nil is returned. See the `system - ## <system.html>`_ module for a list of available FileMode enums. + result.data = s + result.pos = 0 + result.closeImpl = ssClose + result.atEndImpl = ssAtEnd + result.setPositionImpl = ssSetPosition + result.getPositionImpl = ssGetPosition + result.readDataStrImpl = ssReadDataStr + + proc readAll*(s: Stream): string {.compileTime.} = + const bufferSize = 1024 + var bufferr: string + bufferr.setLen(bufferSize) + while true: + let readBytes = readDataStr(s, bufferr, 0..<bufferSize) + if readBytes == 0: + break + let prevLen = result.len + result.setLen(prevLen + readBytes) + result[prevLen..<prevLen+readBytes] = bufferr[0..<readBytes] + if readBytes < bufferSize: + break + +else: # after 1.3 or JS not defined + proc ssAtEnd(s: Stream): bool = + var s = StringStream(s) + return s.pos >= s.data.len + + proc ssSetPosition(s: Stream, pos: int) = + var s = StringStream(s) + s.pos = clamp(pos, 0, s.data.len) + + proc ssGetPosition(s: Stream): int = + var s = StringStream(s) + return s.pos + + proc ssReadDataStr(s: Stream, buffer: var string, slice: Slice[int]): int = + var s = StringStream(s) + when nimvm: + discard + else: + when declared(prepareMutation): + prepareMutation(buffer) # buffer might potentially be a CoW literal with ARC + result = min(slice.b + 1 - slice.a, s.data.len - s.pos) + if result > 0: + jsOrVmBlock: + buffer[slice.a..<slice.a+result] = s.data[s.pos..<s.pos+result] + do: + copyMem(unsafeAddr buffer[slice.a], addr s.data[s.pos], result) + inc(s.pos, result) + else: + result = 0 + + proc ssReadData(s: Stream, buffer: pointer, bufLen: int): int = + var s = StringStream(s) + result = min(bufLen, s.data.len - s.pos) + if result > 0: + when defined(js): + try: + cast[ptr string](buffer)[][0..<result] = s.data[s.pos..<s.pos+result] + except: + raise newException(Defect, "could not read string stream, " & + "did you use a non-string buffer pointer?", getCurrentException()) + elif not defined(nimscript): + copyMem(buffer, addr(s.data[s.pos]), result) + inc(s.pos, result) + else: + result = 0 + + proc ssPeekData(s: Stream, buffer: pointer, bufLen: int): int = + var s = StringStream(s) + result = min(bufLen, s.data.len - s.pos) + if result > 0: + when defined(js): + try: + cast[ptr string](buffer)[][0..<result] = s.data[s.pos..<s.pos+result] + except: + raise newException(Defect, "could not peek string stream, " & + "did you use a non-string buffer pointer?", getCurrentException()) + elif not defined(nimscript): + copyMem(buffer, addr(s.data[s.pos]), result) + else: + result = 0 + + proc ssWriteData(s: Stream, buffer: pointer, bufLen: int) = + var s = StringStream(s) + if bufLen <= 0: + return + if s.pos + bufLen > s.data.len: + setLen(s.data, s.pos + bufLen) + when defined(js): + try: + s.data[s.pos..<s.pos+bufLen] = cast[ptr string](buffer)[][0..<bufLen] + except: + raise newException(Defect, "could not write to string stream, " & + "did you use a non-string buffer pointer?", getCurrentException()) + elif not defined(nimscript): + copyMem(addr(s.data[s.pos]), buffer, bufLen) + inc(s.pos, bufLen) + + proc ssClose(s: Stream) = + var s = StringStream(s) + s.data = "" + + proc newStringStream*(s: sink string = ""): owned StringStream = + ## Creates a new stream from the string `s`. + ## + ## See also: + ## * `newFileStream proc <#newFileStream,File>`_ creates a file stream from + ## opened File. + ## * `newFileStream proc <#newFileStream,string,FileMode,int>`_ creates a + ## file stream from the file name and the mode. + ## * `openFileStream proc <#openFileStream,string,FileMode,int>`_ creates a + ## file stream from the file name and the mode. + runnableExamples: + var strm = newStringStream("The first line\nthe second line\nthe third line") + doAssert strm.readLine() == "The first line" + doAssert strm.readLine() == "the second line" + doAssert strm.readLine() == "the third line" + strm.close() + + new(result) + result.data = s + when nimvm: + discard + else: + when declared(prepareMutation): + prepareMutation(result.data) # Allows us to mutate using `addr` logic like `copyMem`, otherwise it errors. + result.pos = 0 + result.closeImpl = ssClose + result.atEndImpl = ssAtEnd + result.setPositionImpl = ssSetPosition + result.getPositionImpl = ssGetPosition + result.readDataStrImpl = ssReadDataStr + when nimvm: + discard + else: + result.readDataImpl = ssReadData + result.peekDataImpl = ssPeekData + result.writeDataImpl = ssWriteData + +type + FileStream* = ref FileStreamObj + ## A stream that encapsulates a `File`. + ## + ## **Note:** Not available for JS backend. + FileStreamObj* = object of Stream + ## A file stream object. + ## + ## **Note:** Not available for JS backend. + f: File + +proc fsClose(s: Stream) = + if FileStream(s).f != nil: + close(FileStream(s).f) + FileStream(s).f = nil +proc fsFlush(s: Stream) = flushFile(FileStream(s).f) +proc fsAtEnd(s: Stream): bool = return endOfFile(FileStream(s).f) +proc fsSetPosition(s: Stream, pos: int) = setFilePos(FileStream(s).f, pos) +proc fsGetPosition(s: Stream): int = return int(getFilePos(FileStream(s).f)) + +proc fsReadData(s: Stream, buffer: pointer, bufLen: int): int = + result = readBuffer(FileStream(s).f, buffer, bufLen) + +proc fsReadDataStr(s: Stream, buffer: var string, slice: Slice[int]): int = + result = readBuffer(FileStream(s).f, addr buffer[slice.a], slice.b + 1 - slice.a) + +proc fsPeekData(s: Stream, buffer: pointer, bufLen: int): int = + let pos = fsGetPosition(s) + defer: fsSetPosition(s, pos) + result = readBuffer(FileStream(s).f, buffer, bufLen) + +proc fsWriteData(s: Stream, buffer: pointer, bufLen: int) = + if writeBuffer(FileStream(s).f, buffer, bufLen) != bufLen: + raise newEIO("cannot write to stream") + +proc fsReadLine(s: Stream, line: var string): bool = + result = readLine(FileStream(s).f, line) + +proc newFileStream*(f: File): owned FileStream = + ## Creates a new stream from the file `f`. + ## + ## **Note:** Not available for JS backend. + ## + ## See also: + ## * `newStringStream proc <#newStringStream,string>`_ creates a new stream + ## from string. + ## * `newFileStream proc <#newFileStream,string,FileMode,int>`_ is the same + ## as using `open proc <syncio.html#open,File,string,FileMode,int>`_ + ## on Examples. + ## * `openFileStream proc <#openFileStream,string,FileMode,int>`_ creates a + ## file stream from the file name and the mode. + runnableExamples: + ## Input (somefile.txt): + ## The first line + ## the second line + ## the third line var f: File - if open(f, filename, mode): result = newFileStream(f) + if open(f, "somefile.txt", fmRead, -1): + var strm = newFileStream(f) + var line = "" + while strm.readLine(line): + echo line + ## Output: + ## The first line + ## the second line + ## the third line + strm.close() + new(result) + result.f = f + result.closeImpl = fsClose + result.atEndImpl = fsAtEnd + result.setPositionImpl = fsSetPosition + result.getPositionImpl = fsGetPosition + result.readDataStrImpl = fsReadDataStr + result.readDataImpl = fsReadData + result.readLineImpl = fsReadLine + result.peekDataImpl = fsPeekData + result.writeDataImpl = fsWriteData + result.flushImpl = fsFlush -when true: - discard -else: +proc newFileStream*(filename: string, mode: FileMode = fmRead, + bufSize: int = -1): owned FileStream = + ## Creates a new stream from the file named `filename` with the mode `mode`. + ## + ## If the file cannot be opened, `nil` is returned. See the `io module + ## <syncio.html>`_ for a list of available `FileMode enums <syncio.html#FileMode>`_. + ## + ## **Note:** + ## * **This function returns nil in case of failure.** + ## To prevent unexpected behavior and ensure proper error handling, + ## use `openFileStream proc <#openFileStream,string,FileMode,int>`_ + ## instead. + ## * Not available for JS backend. + ## + ## See also: + ## * `newStringStream proc <#newStringStream,string>`_ creates a new stream + ## from string. + ## * `newFileStream proc <#newFileStream,File>`_ creates a file stream from + ## opened File. + ## * `openFileStream proc <#openFileStream,string,FileMode,int>`_ creates a + ## file stream from the file name and the mode. + runnableExamples: + from std/os import removeFile + var strm = newFileStream("somefile.txt", fmWrite) + if not isNil(strm): + strm.writeLine("The first line") + strm.writeLine("the second line") + strm.writeLine("the third line") + strm.close() + ## Output (somefile.txt) + ## The first line + ## the second line + ## the third line + removeFile("somefile.txt") + + var f: File + if open(f, filename, mode, bufSize): result = newFileStream(f) + +proc openFileStream*(filename: string, mode: FileMode = fmRead, + bufSize: int = -1): owned FileStream = + ## Creates a new stream from the file named `filename` with the mode `mode`. + ## If the file cannot be opened, an IO exception is raised. + ## + ## **Note:** Not available for JS backend. + ## + ## See also: + ## * `newStringStream proc <#newStringStream,string>`_ creates a new stream + ## from string. + ## * `newFileStream proc <#newFileStream,File>`_ creates a file stream from + ## opened File. + ## * `newFileStream proc <#newFileStream,string,FileMode,int>`_ creates a + ## file stream from the file name and the mode. + runnableExamples: + try: + ## Input (somefile.txt): + ## The first line + ## the second line + ## the third line + var strm = openFileStream("somefile.txt") + echo strm.readLine() + ## Output: + ## The first line + strm.close() + except: + stderr.write getCurrentExceptionMsg() + + var f: File + if open(f, filename, mode, bufSize): + return newFileStream(f) + else: + raise newEIO("cannot open file stream: " & filename) + +when false: type FileHandleStream* = ref FileHandleStreamObj FileHandleStreamObj* = object of Stream handle*: FileHandle pos: int - {.deprecated: [PFileHandleStream: FileHandleStream, - TFileHandleStream: FileHandleStreamObj].} - proc newEOS(msg: string): ref OSError = new(result) result.msg = msg - proc hsGetPosition(s: FileHandleStream): int = + proc hsGetPosition(s: FileHandleStream): int = return s.pos when defined(windows): # do not import windows as this increases compile times: discard else: - import posix - - proc hsSetPosition(s: FileHandleStream, pos: int) = + import std/posix + + proc hsSetPosition(s: FileHandleStream, pos: int) = discard lseek(s.handle, pos, SEEK_SET) proc hsClose(s: FileHandleStream) = discard close(s.handle) - proc hsAtEnd(s: FileHandleStream): bool = + proc hsAtEnd(s: FileHandleStream): bool = var pos = hsGetPosition(s) var theEnd = lseek(s.handle, 0, SEEK_END) result = pos >= theEnd hsSetPosition(s, pos) # set position back - proc hsReadData(s: FileHandleStream, buffer: pointer, bufLen: int): int = + proc hsReadData(s: FileHandleStream, buffer: pointer, bufLen: int): int = result = posix.read(s.handle, buffer, bufLen) inc(s.pos, result) - - proc hsWriteData(s: FileHandleStream, buffer: pointer, bufLen: int) = - if posix.write(s.handle, buffer, bufLen) != bufLen: + + proc hsPeekData(s: FileHandleStream, buffer: pointer, bufLen: int): int = + result = posix.read(s.handle, buffer, bufLen) + + proc hsWriteData(s: FileHandleStream, buffer: pointer, bufLen: int) = + if posix.write(s.handle, buffer, bufLen) != bufLen: raise newEIO("cannot write to stream") inc(s.pos, bufLen) - proc newFileHandleStream*(handle: FileHandle): FileHandleStream = + proc newFileHandleStream*(handle: FileHandle): owned FileHandleStream = new(result) result.handle = handle result.pos = 0 @@ -344,20 +1524,22 @@ else: result.setPosition = hsSetPosition result.getPosition = hsGetPosition result.readData = hsReadData + result.peekData = hsPeekData result.writeData = hsWriteData - proc newFileHandleStream*(filename: string, - mode: FileMode): FileHandleStream = + proc newFileHandleStream*(filename: string, + mode: FileMode): owned FileHandleStream = when defined(windows): discard else: var flags: cint case mode - of fmRead: flags = posix.O_RDONLY - of fmWrite: flags = O_WRONLY or int(O_CREAT) - of fmReadWrite: flags = O_RDWR or int(O_CREAT) + of fmRead: flags = posix.O_RDONLY + of fmWrite: flags = O_WRONLY or int(O_CREAT) + of fmReadWrite: flags = O_RDWR or int(O_CREAT) of fmReadWriteExisting: flags = O_RDWR - of fmAppend: flags = O_WRONLY or int(O_CREAT) or O_APPEND + of fmAppend: flags = O_WRONLY or int(O_CREAT) or O_APPEND + static: raiseAssert "unreachable" # handle bug #17888 var handle = open(filename, flags) if handle < 0: raise newEOS("posix.open() call failed") result = newFileHandleStream(handle) diff --git a/lib/pure/streamwrapper.nim b/lib/pure/streamwrapper.nim new file mode 100644 index 000000000..99752a9ab --- /dev/null +++ b/lib/pure/streamwrapper.nim @@ -0,0 +1,121 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2020 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements stream wrapper. +## +## **Since** version 1.2. + +import std/[deques, streams] + +when defined(nimPreviewSlimSystem): + import std/assertions + + +type + PipeOutStream*[T] = ref object of T + # When stream peek operation is called, it reads from base stream + # type using `baseReadDataImpl` and stores the content to this buffer. + # Next stream read operation returns data in the buffer so that previus peek + # operation looks like didn't changed read positon. + # When stream read operation that returns N byte data is called and the size is smaller than buffer size, + # first N elements are removed from buffer. + # Deque type can do such operation more efficiently than seq type. + buffer: Deque[char] + baseReadLineImpl: typeof(StreamObj.readLineImpl) + baseReadDataImpl: typeof(StreamObj.readDataImpl) + +proc posReadLine[T](s: Stream, line: var string): bool = + var s = PipeOutStream[T](s) + assert s.baseReadLineImpl != nil + + let n = s.buffer.len + line.setLen(0) + for i in 0..<n: + var c = s.buffer.popFirst + if c == '\c': + c = readChar(s) + return true + elif c == '\L': return true + elif c == '\0': + return line.len > 0 + line.add(c) + + var line2: string + result = s.baseReadLineImpl(s, line2) + line.add line2 + +proc posReadData[T](s: Stream, buffer: pointer, bufLen: int): int = + var s = PipeOutStream[T](s) + assert s.baseReadDataImpl != nil + + let + dest = cast[ptr UncheckedArray[char]](buffer) + n = min(s.buffer.len, bufLen) + result = n + for i in 0..<n: + dest[i] = s.buffer.popFirst + if bufLen > n: + result += s.baseReadDataImpl(s, addr dest[n], bufLen - n) + +proc posReadDataStr[T](s: Stream, buffer: var string, slice: Slice[int]): int = + posReadData[T](s, addr buffer[slice.a], slice.len) + +proc posPeekData[T](s: Stream, buffer: pointer, bufLen: int): int = + var s = PipeOutStream[T](s) + assert s.baseReadDataImpl != nil + + let + dest = cast[ptr UncheckedArray[char]](buffer) + n = min(s.buffer.len, bufLen) + + result = n + for i in 0..<n: + dest[i] = s.buffer[i] + + if bufLen > n: + let + newDataNeeded = bufLen - n + numRead = s.baseReadDataImpl(s, addr dest[n], newDataNeeded) + result += numRead + for i in 0..<numRead: + s.buffer.addLast dest[n + i] + +proc newPipeOutStream*[T](s: sink (ref T)): owned PipeOutStream[T] = + ## Wrap pipe for reading with PipeOutStream so that you can use peek* procs and generate runtime error + ## when setPosition/getPosition is called or write operation is performed. + ## + ## Example: + ## ```Nim + ## import std/[osproc, streamwrapper] + ## var + ## p = startProcess(exePath) + ## outStream = p.outputStream().newPipeOutStream() + ## echo outStream.peekChar + ## p.close() + ## ``` + + assert s.readDataImpl != nil + + new(result) + for dest, src in fields((ref T)(result)[], s[]): + dest = src + wasMoved(s[]) + if result.readLineImpl != nil: + result.baseReadLineImpl = result.readLineImpl + result.readLineImpl = posReadLine[T] + result.baseReadDataImpl = result.readDataImpl + result.readDataImpl = posReadData[T] + result.readDataStrImpl = posReadDataStr[T] + result.peekDataImpl = posPeekData[T] + + # Set nil to anything you may not call. + result.setPositionImpl = nil + result.getPositionImpl = nil + result.writeDataImpl = nil + result.flushImpl = nil diff --git a/lib/pure/strformat.nim b/lib/pure/strformat.nim new file mode 100644 index 000000000..7d093ebb3 --- /dev/null +++ b/lib/pure/strformat.nim @@ -0,0 +1,790 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2017 Nim contributors +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +##[ +String `interpolation`:idx: / `format`:idx: inspired by +Python's f-strings. + +# `fmt` vs. `&` + +You can use either `fmt` or the unary `&` operator for formatting. The +difference between them is subtle but important. + +The `fmt"{expr}"` syntax is more aesthetically pleasing, but it hides a small +gotcha. The string is a +`generalized raw string literal <manual.html#lexical-analysis-generalized-raw-string-literals>`_. +This has some surprising effects: +]## + +runnableExamples: + let msg = "hello" + assert fmt"{msg}\n" == "hello\\n" + +##[ +Because the literal is a raw string literal, the `\n` is not interpreted as +an escape sequence. + +There are multiple ways to get around this, including the use of the `&` operator: +]## + +runnableExamples: + let msg = "hello" + + assert &"{msg}\n" == "hello\n" + + assert fmt"{msg}{'\n'}" == "hello\n" + assert fmt("{msg}\n") == "hello\n" + assert "{msg}\n".fmt == "hello\n" + +##[ +The choice of style is up to you. + +# Formatting strings +]## + +runnableExamples: + assert &"""{"abc":>4}""" == " abc" + assert &"""{"abc":<4}""" == "abc " + +##[ +# Formatting floats +]## + +runnableExamples: + assert fmt"{-12345:08}" == "-0012345" + assert fmt"{-1:3}" == " -1" + assert fmt"{-1:03}" == "-01" + assert fmt"{16:#X}" == "0x10" + + assert fmt"{123.456}" == "123.456" + assert fmt"{123.456:>9.3f}" == " 123.456" + assert fmt"{123.456:9.3f}" == " 123.456" + assert fmt"{123.456:9.4f}" == " 123.4560" + assert fmt"{123.456:>9.0f}" == " 123." + assert fmt"{123.456:<9.4f}" == "123.4560 " + + assert fmt"{123.456:e}" == "1.234560e+02" + assert fmt"{123.456:>13e}" == " 1.234560e+02" + assert fmt"{123.456:13e}" == " 1.234560e+02" + +##[ +# Expressions +]## +runnableExamples: + let x = 3.14 + assert fmt"{(if x!=0: 1.0/x else: 0):.5}" == "0.31847" + assert fmt"""{(block: + var res: string + for i in 1..15: + res.add (if i mod 15 == 0: "FizzBuzz" + elif i mod 5 == 0: "Buzz" + elif i mod 3 == 0: "Fizz" + else: $i) & " " + res)}""" == "1 2 Fizz 4 Buzz Fizz 7 8 Fizz Buzz 11 Fizz 13 14 FizzBuzz " +##[ +# Debugging strings + +`fmt"{expr=}"` expands to `fmt"expr={expr}"` namely the text of the expression, +an equal sign and the results of evaluated expression. +]## + +runnableExamples: + assert fmt"{123.456=}" == "123.456=123.456" + assert fmt"{123.456=:>9.3f}" == "123.456= 123.456" + + let x = "hello" + assert fmt"{x=}" == "x=hello" + assert fmt"{x =}" == "x =hello" + + let y = 3.1415926 + assert fmt"{y=:.2f}" == fmt"y={y:.2f}" + assert fmt"{y=}" == fmt"y={y}" + assert fmt"{y = : <8}" == fmt"y = 3.14159 " + + proc hello(a: string, b: float): int = 12 + assert fmt"{hello(x, y) = }" == "hello(x, y) = 12" + assert fmt"{x.hello(y) = }" == "x.hello(y) = 12" + assert fmt"{hello x, y = }" == "hello x, y = 12" + +##[ +Note that it is space sensitive: +]## + +runnableExamples: + let x = "12" + assert fmt"{x=}" == "x=12" + assert fmt"{x =:}" == "x =12" + assert fmt"{x =}" == "x =12" + assert fmt"{x= :}" == "x= 12" + assert fmt"{x= }" == "x= 12" + assert fmt"{x = :}" == "x = 12" + assert fmt"{x = }" == "x = 12" + assert fmt"{x = :}" == "x = 12" + assert fmt"{x = }" == "x = 12" + +##[ +# Implementation details + +An expression like `&"{key} is {value:arg} {{z}}"` is transformed into: + + ```nim + var temp = newStringOfCap(educatedCapGuess) + temp.formatValue(key, "") + temp.add(" is ") + temp.formatValue(value, arg) + temp.add(" {z}") + temp + ``` + +Parts of the string that are enclosed in the curly braces are interpreted +as Nim code. To escape a `{` or `}`, double it. + +Within a curly expression, however, `{`, `}`, must be escaped with a backslash. + +To enable evaluating Nim expressions within curlies, colons inside parentheses +do not need to be escaped. +]## + +runnableExamples: + let x = "hello" + assert fmt"""{ "\{(" & x & ")\}" }""" == "{(hello)}" + assert fmt"""{{({ x })}}""" == "{(hello)}" + assert fmt"""{ $(\{x:1,"world":2\}) }""" == """[("hello", 1), ("world", 2)]""" + +##[ +`&` delegates most of the work to an open overloaded set +of `formatValue` procs. The required signature for a type `T` that supports +formatting is usually `proc formatValue(result: var string; x: T; specifier: string)`. + +The subexpression after the colon +(`arg` in `&"{key} is {value:arg} {{z}}"`) is optional. It will be passed as +the last argument to `formatValue`. When the colon with the subexpression it is +left out, an empty string will be taken instead. + +For strings and numeric types the optional argument is a so-called +"standard format specifier". + +# Standard format specifiers for strings, integers and floats + +The general form of a standard format specifier is: + + [[fill]align][sign][#][0][minimumwidth][.precision][type] + +The square brackets `[]` indicate an optional element. + +The optional `align` flag can be one of the following: + +`<` +: Forces the field to be left-aligned within the available + space. (This is the default for strings.) + +`>` +: Forces the field to be right-aligned within the available space. + (This is the default for numbers.) + +`^` +: Forces the field to be centered within the available space. + +Note that unless a minimum field width is defined, the field width +will always be the same size as the data to fill it, so that the alignment +option has no meaning in this case. + +The optional `fill` character defines the character to be used to pad +the field to the minimum width. The fill character, if present, must be +followed by an alignment flag. + +The `sign` option is only valid for numeric types, and can be one of the following: + +================= ==================================================== + Sign Meaning +================= ==================================================== +`+` Indicates that a sign should be used for both + positive as well as negative numbers. +`-` Indicates that a sign should be used only for + negative numbers (this is the default behavior). +(space) Indicates that a leading space should be used on + positive numbers. +================= ==================================================== + +If the `#` character is present, integers use the 'alternate form' for formatting. +This means that binary, octal and hexadecimal output will be prefixed +with `0b`, `0o` and `0x`, respectively. + +`width` is a decimal integer defining the minimum field width. If not specified, +then the field width will be determined by the content. + +If the width field is preceded by a zero (`0`) character, this enables +zero-padding. + +The `precision` is a decimal number indicating how many digits should be displayed +after the decimal point in a floating point conversion. For non-numeric types the +field indicates the maximum field size - in other words, how many characters will +be used from the field content. The precision is ignored for integer conversions. + +Finally, the `type` determines how the data should be presented. + +The available integer presentation types are: + +================= ==================================================== + Type Result +================= ==================================================== +`b` Binary. Outputs the number in base 2. +`d` Decimal Integer. Outputs the number in base 10. +`o` Octal format. Outputs the number in base 8. +`x` Hex format. Outputs the number in base 16, using + lower-case letters for the digits above 9. +`X` Hex format. Outputs the number in base 16, using + uppercase letters for the digits above 9. +(None) The same as `d`. +================= ==================================================== + +The available floating point presentation types are: + +================= ==================================================== + Type Result +================= ==================================================== +`e` Exponent notation. Prints the number in scientific + notation using the letter `e` to indicate the + exponent. +`E` Exponent notation. Same as `e` except it converts + the number to uppercase. +`f` Fixed point. Displays the number as a fixed-point + number. +`F` Fixed point. Same as `f` except it converts the + number to uppercase. +`g` General format. This prints the number as a + fixed-point number, unless the number is too + large, in which case it switches to `e` + exponent notation. +`G` General format. Same as `g` except it switches to `E` + if the number gets to large. +`i` Complex General format. This is only supported for + complex numbers, which it prints using the mathematical + (RE+IMj) format. The real and imaginary parts are printed + using the general format `g` by default, but it is + possible to combine this format with one of the other + formats (e.g `jf`). +(None) Similar to `g`, except that it prints at least one + digit after the decimal point. +================= ==================================================== + +# Limitations + +Because of the well defined order how templates and macros are +expanded, strformat cannot expand template arguments: + + ```nim + template myTemplate(arg: untyped): untyped = + echo "arg is: ", arg + echo &"--- {arg} ---" + + let x = "abc" + myTemplate(x) + ``` + +First the template `myTemplate` is expanded, where every identifier +`arg` is substituted with its argument. The `arg` inside the +format string is not seen by this process, because it is part of a +quoted string literal. It is not an identifier yet. Then the strformat +macro creates the `arg` identifier from the string literal, an +identifier that cannot be resolved anymore. + +The workaround for this is to bind the template argument to a new local variable. + + ```nim + template myTemplate(arg: untyped): untyped = + block: + let arg1 {.inject.} = arg + echo "arg is: ", arg1 + echo &"--- {arg1} ---" + ``` + +The use of `{.inject.}` here is necessary again because of template +expansion order and hygienic templates. But since we generally want to +keep the hygiene of `myTemplate`, and we do not want `arg1` +to be injected into the context where `myTemplate` is expanded, +everything is wrapped in a `block`. + + +# Future directions + +A curly expression with commas in it like `{x, argA, argB}` could be +transformed to `formatValue(result, x, argA, argB)` in order to support +formatters that do not need to parse a custom language within a custom +language but instead prefer to use Nim's existing syntax. This would also +help with readability, since there is only so much you can cram into +single letter DSLs. +]## + +import std/[macros, parseutils, unicode] +import std/strutils except format + +when defined(nimPreviewSlimSystem): + import std/assertions + + +proc mkDigit(v: int, typ: char): string {.inline.} = + assert(v < 26) + if v < 10: + result = $chr(ord('0') + v) + else: + result = $chr(ord(if typ == 'x': 'a' else: 'A') + v - 10) + +proc alignString*(s: string, minimumWidth: int; align = '\0'; fill = ' '): string = + ## Aligns `s` using the `fill` char. + ## This is only of interest if you want to write a custom `format` proc that + ## should support the standard format specifiers. + if minimumWidth == 0: + result = s + else: + let sRuneLen = if s.validateUtf8 == -1: s.runeLen else: s.len + let toFill = minimumWidth - sRuneLen + if toFill <= 0: + result = s + elif align == '<' or align == '\0': + result = s & repeat(fill, toFill) + elif align == '^': + let half = toFill div 2 + result = repeat(fill, half) & s & repeat(fill, toFill - half) + else: + result = repeat(fill, toFill) & s + +type + StandardFormatSpecifier* = object ## Type that describes "standard format specifiers". + fill*, align*: char ## Desired fill and alignment. + sign*: char ## Desired sign. + alternateForm*: bool ## Whether to prefix binary, octal and hex numbers + ## with `0b`, `0o`, `0x`. + padWithZero*: bool ## Whether to pad with zeros rather than spaces. + minimumWidth*, precision*: int ## Desired minimum width and precision. + typ*: char ## Type like 'f', 'g' or 'd'. + endPosition*: int ## End position in the format specifier after + ## `parseStandardFormatSpecifier` returned. + +proc formatInt(n: SomeNumber; radix: int; spec: StandardFormatSpecifier): string = + ## Converts `n` to a string. If `n` is `SomeFloat`, it casts to `int64`. + ## Conversion is done using `radix`. If result's length is less than + ## `minimumWidth`, it aligns result to the right or left (depending on `a`) + ## with the `fill` char. + when n is SomeUnsignedInt: + var v = n.uint64 + let negative = false + else: + let n = n.int64 + let negative = n < 0 + var v = + if negative: + # `uint64(-n)`, but accounts for `n == low(int64)` + uint64(not n) + 1 + else: + uint64(n) + + var xx = "" + if spec.alternateForm: + case spec.typ + of 'X': xx = "0x" + of 'x': xx = "0x" + of 'b': xx = "0b" + of 'o': xx = "0o" + else: discard + + if v == 0: + result = "0" + else: + result = "" + while v > typeof(v)(0): + let d = v mod typeof(v)(radix) + v = v div typeof(v)(radix) + result.add(mkDigit(d.int, spec.typ)) + for idx in 0..<(result.len div 2): + swap result[idx], result[result.len - idx - 1] + if spec.padWithZero: + let sign = negative or spec.sign != '-' + let toFill = spec.minimumWidth - result.len - xx.len - ord(sign) + if toFill > 0: + result = repeat('0', toFill) & result + + if negative: + result = "-" & xx & result + elif spec.sign != '-': + result = spec.sign & xx & result + else: + result = xx & result + + if spec.align == '<': + for i in result.len..<spec.minimumWidth: + result.add(spec.fill) + else: + let toFill = spec.minimumWidth - result.len + if spec.align == '^': + let half = toFill div 2 + result = repeat(spec.fill, half) & result & repeat(spec.fill, toFill - half) + else: + if toFill > 0: + result = repeat(spec.fill, toFill) & result + +proc parseStandardFormatSpecifier*(s: string; start = 0; + ignoreUnknownSuffix = false): StandardFormatSpecifier = + ## An exported helper proc that parses the "standard format specifiers", + ## as specified by the grammar: + ## + ## [[fill]align][sign][#][0][minimumwidth][.precision][type] + ## + ## This is only of interest if you want to write a custom `format` proc that + ## should support the standard format specifiers. If `ignoreUnknownSuffix` is true, + ## an unknown suffix after the `type` field is not an error. + const alignChars = {'<', '>', '^'} + result.fill = ' ' + result.align = '\0' + result.sign = '-' + var i = start + if i + 1 < s.len and s[i+1] in alignChars: + result.fill = s[i] + result.align = s[i+1] + inc i, 2 + elif i < s.len and s[i] in alignChars: + result.align = s[i] + inc i + + if i < s.len and s[i] in {'-', '+', ' '}: + result.sign = s[i] + inc i + + if i < s.len and s[i] == '#': + result.alternateForm = true + inc i + + if i + 1 < s.len and s[i] == '0' and s[i+1] in {'0'..'9'}: + result.padWithZero = true + inc i + + let parsedLength = parseSaturatedNatural(s, result.minimumWidth, i) + inc i, parsedLength + if i < s.len and s[i] == '.': + inc i + let parsedLengthB = parseSaturatedNatural(s, result.precision, i) + inc i, parsedLengthB + else: + result.precision = -1 + + if i < s.len and s[i] in {'A'..'Z', 'a'..'z'}: + result.typ = s[i] + inc i + result.endPosition = i + if i != s.len and not ignoreUnknownSuffix: + raise newException(ValueError, + "invalid format string, cannot parse: " & s[i..^1]) + +proc toRadix(typ: char): int = + case typ + of 'x', 'X': 16 + of 'd', '\0': 10 + of 'o': 8 + of 'b': 2 + else: + raise newException(ValueError, + "invalid type in format string for number, expected one " & + " of 'x', 'X', 'b', 'd', 'o' but got: " & typ) + +proc formatValue*[T: SomeInteger](result: var string; value: T; + specifier: static string) = + ## Standard format implementation for `SomeInteger`. It makes little + ## sense to call this directly, but it is required to exist + ## by the `&` macro. + when specifier.len == 0: + result.add $value + else: + const + spec = parseStandardFormatSpecifier(specifier) + radix = toRadix(spec.typ) + + result.add formatInt(value, radix, spec) + +proc formatValue*[T: SomeInteger](result: var string; value: T; + specifier: string) = + ## Standard format implementation for `SomeInteger`. It makes little + ## sense to call this directly, but it is required to exist + ## by the `&` macro. + if specifier.len == 0: + result.add $value + else: + let + spec = parseStandardFormatSpecifier(specifier) + radix = toRadix(spec.typ) + + result.add formatInt(value, radix, spec) + +proc formatFloat( + result: var string, value: SomeFloat, fmode: FloatFormatMode, + spec: StandardFormatSpecifier) = + var f = formatBiggestFloat(value, fmode, spec.precision) + var sign = false + if value >= 0.0: + if spec.sign != '-': + sign = true + if value == 0.0: + if 1.0 / value == Inf: + # only insert the sign if value != negZero + f.insert($spec.sign, 0) + else: + f.insert($spec.sign, 0) + else: + sign = true + + if spec.padWithZero: + var signStr = "" + if sign: + signStr = $f[0] + f = f[1..^1] + + let toFill = spec.minimumWidth - f.len - ord(sign) + if toFill > 0: + f = repeat('0', toFill) & f + if sign: + f = signStr & f + + # the default for numbers is right-alignment: + let align = if spec.align == '\0': '>' else: spec.align + let res = alignString(f, spec.minimumWidth, align, spec.fill) + if spec.typ in {'A'..'Z'}: + result.add toUpperAscii(res) + else: + result.add res + +proc toFloatFormatMode(typ: char): FloatFormatMode = + case typ + of 'e', 'E': ffScientific + of 'f', 'F': ffDecimal + of 'g', 'G': ffDefault + of '\0': ffDefault + else: + raise newException(ValueError, + "invalid type in format string for number, expected one " & + " of 'e', 'E', 'f', 'F', 'g', 'G' but got: " & typ) + +proc formatValue*(result: var string; value: SomeFloat; specifier: static string) = + ## Standard format implementation for `SomeFloat`. It makes little + ## sense to call this directly, but it is required to exist + ## by the `&` macro. + when specifier.len == 0: + result.add $value + else: + const + spec = parseStandardFormatSpecifier(specifier) + fmode = toFloatFormatMode(spec.typ) + + formatFloat(result, value, fmode, spec) + +proc formatValue*(result: var string; value: SomeFloat; specifier: string) = + ## Standard format implementation for `SomeFloat`. It makes little + ## sense to call this directly, but it is required to exist + ## by the `&` macro. + if specifier.len == 0: + result.add $value + else: + let + spec = parseStandardFormatSpecifier(specifier) + fmode = toFloatFormatMode(spec.typ) + + formatFloat(result, value, fmode, spec) + +proc formatValue*(result: var string; value: string; specifier: static string) = + ## Standard format implementation for `string`. It makes little + ## sense to call this directly, but it is required to exist + ## by the `&` macro. + const spec = parseStandardFormatSpecifier(specifier) + var value = + when spec.typ in {'s', '\0'}: value + else: static: + raise newException(ValueError, + "invalid type in format string for string, expected 's', but got " & + spec.typ) + when spec.precision != -1: + if spec.precision < runeLen(value): + const precision = cast[Natural](spec.precision) + setLen(value, Natural(runeOffset(value, precision))) + + result.add alignString(value, spec.minimumWidth, spec.align, spec.fill) + +proc formatValue*(result: var string; value: string; specifier: string) = + ## Standard format implementation for `string`. It makes little + ## sense to call this directly, but it is required to exist + ## by the `&` macro. + let spec = parseStandardFormatSpecifier(specifier) + var value = + if spec.typ in {'s', '\0'}: value + else: + raise newException(ValueError, + "invalid type in format string for string, expected 's', but got " & + spec.typ) + if spec.precision != -1: + if spec.precision < runeLen(value): + let precision = cast[Natural](spec.precision) + setLen(value, Natural(runeOffset(value, precision))) + + result.add alignString(value, spec.minimumWidth, spec.align, spec.fill) + +proc formatValue[T: not SomeInteger](result: var string; value: T; specifier: static string) = + mixin `$` + formatValue(result, $value, specifier) + +proc formatValue[T: not SomeInteger](result: var string; value: T; specifier: string) = + mixin `$` + formatValue(result, $value, specifier) + +template formatValue(result: var string; value: char; specifier: string) = + result.add value + +template formatValue(result: var string; value: cstring; specifier: string) = + result.add value + +proc strformatImpl(f: string; openChar, closeChar: char, + lineInfoNode: NimNode = nil): NimNode = + template missingCloseChar = + error("invalid format string: missing closing character '" & closeChar & "'") + + if openChar == ':' or closeChar == ':': + error "openChar and closeChar must not be ':'" + var i = 0 + let res = genSym(nskVar, "fmtRes") + result = newNimNode(nnkStmtListExpr, lineInfoNode) + # XXX: https://github.com/nim-lang/Nim/issues/8405 + # When compiling with -d:useNimRtl, certain procs such as `count` from the strutils + # module are not accessible at compile-time: + let expectedGrowth = when defined(useNimRtl): 0 else: count(f, openChar) * 10 + result.add newVarStmt(res, newCall(bindSym"newStringOfCap", + newLit(f.len + expectedGrowth))) + var strlit = "" + while i < f.len: + if f[i] == openChar: + inc i + if f[i] == openChar: + inc i + strlit.add openChar + else: + if strlit.len > 0: + result.add newCall(bindSym"add", res, newLit(strlit)) + strlit = "" + + var subexpr = "" + var inParens = 0 + var inSingleQuotes = false + var inDoubleQuotes = false + template notEscaped:bool = f[i-1]!='\\' + while i < f.len and f[i] != closeChar and (f[i] != ':' or inParens != 0): + case f[i] + of '\\': + if i < f.len-1 and f[i+1] in {openChar,closeChar,':'}: inc i + of '\'': + if not inDoubleQuotes and notEscaped: inSingleQuotes = not inSingleQuotes + of '\"': + if notEscaped: inDoubleQuotes = not inDoubleQuotes + of '(': + if not (inSingleQuotes or inDoubleQuotes): inc inParens + of ')': + if not (inSingleQuotes or inDoubleQuotes): dec inParens + of '=': + let start = i + inc i + i += f.skipWhitespace(i) + if i == f.len: + missingCloseChar + if f[i] == closeChar or f[i] == ':': + result.add newCall(bindSym"add", res, newLit(subexpr & f[start ..< i])) + else: + subexpr.add f[start ..< i] + continue + else: discard + subexpr.add f[i] + inc i + + if i == f.len: + missingCloseChar + + var x: NimNode + try: + x = parseExpr(subexpr) + except ValueError as e: + error("could not parse `$#` in `$#`.\n$#" % [subexpr, f, e.msg]) + x.copyLineInfo(lineInfoNode) + let formatSym = bindSym("formatValue", brOpen) + var options = "" + if f[i] == ':': + inc i + while i < f.len and f[i] != closeChar: + options.add f[i] + inc i + if i == f.len: + missingCloseChar + if f[i] == closeChar: + inc i + result.add newCall(formatSym, res, x, newLit(options)) + elif f[i] == closeChar: + if i<f.len-1 and f[i+1] == closeChar: + strlit.add closeChar + inc i, 2 + else: + raiseAssert "invalid format string: '$1' instead of '$1$1'" % $closeChar + else: + strlit.add f[i] + inc i + if strlit.len > 0: + result.add newCall(bindSym"add", res, newLit(strlit)) + result.add res + # workaround for #20381 + var blockExpr = newNimNode(nnkBlockExpr, lineInfoNode) + blockExpr.add(newEmptyNode()) + blockExpr.add(result) + result = blockExpr + when defined(debugFmtDsl): + echo repr result + +macro fmt(pattern: static string; openChar: static char, closeChar: static char, lineInfoNode: untyped): string = + ## version of `fmt` with dummy untyped param for line info + strformatImpl(pattern, openChar, closeChar, lineInfoNode) + +when not defined(nimHasCallsitePragma): + {.pragma: callsite.} + +template fmt*(pattern: static string; openChar: static char, closeChar: static char): string {.callsite.} = + ## Interpolates `pattern` using symbols in scope. + runnableExamples: + let x = 7 + assert "var is {x * 2}".fmt == "var is 14" + assert "var is {{x}}".fmt == "var is {x}" # escape via doubling + const s = "foo: {x}" + assert s.fmt == "foo: 7" # also works with const strings + + assert fmt"\n" == r"\n" # raw string literal + assert "\n".fmt == "\n" # regular literal (likewise with `fmt("\n")` or `fmt "\n"`) + runnableExamples: + # custom `openChar`, `closeChar` + let x = 7 + assert "<x>".fmt('<', '>') == "7" + assert "<<<x>>>".fmt('<', '>') == "<7>" + assert "`x`".fmt('`', '`') == "7" + fmt(pattern, openChar, closeChar, dummyForLineInfo) + +template fmt*(pattern: static string): untyped {.callsite.} = + ## Alias for `fmt(pattern, '{', '}')`. + fmt(pattern, '{', '}', dummyForLineInfo) + +template `&`*(pattern: string{lit}): string {.callsite.} = + ## `&pattern` is the same as `pattern.fmt`. + ## For a specification of the `&` macro, see the module level documentation. + # pending bug #18275, bug #18278, use `pattern: static string` + # consider deprecating this, it's redundant with `fmt` and `fmt` is strictly + # more flexible, readable (no confusion with the binary `&`), self-documenting, + # not to mention #18275, bug #18278. + runnableExamples: + let x = 7 + assert &"{x}\n" == "7\n" # regular string literal + assert &"{x}\n" == "{x}\n".fmt # `fmt` can be used instead + assert &"{x}\n" != fmt"{x}\n" # see `fmt` docs, this would use a raw string literal + fmt(pattern, '{', '}', dummyForLineInfo) diff --git a/lib/pure/strmisc.nim b/lib/pure/strmisc.nim new file mode 100644 index 000000000..a3e539e7e --- /dev/null +++ b/lib/pure/strmisc.nim @@ -0,0 +1,83 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2016 Joey Payne +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module contains various string utility routines that are uncommonly +## used in comparison to the ones in `strutils <strutils.html>`_. + +import std/strutils + +func expandTabs*(s: string, tabSize: int = 8): string = + ## Expands tab characters in `s`, replacing them by spaces. + ## + ## The amount of inserted spaces for each tab character is the difference + ## between the current column number and the next tab position. Tab positions + ## occur every `tabSize` characters. + ## The column number starts at 0 and is increased with every single character + ## and inserted space, except for newline, which resets the column number + ## back to 0. + runnableExamples: + doAssert expandTabs("\t", 4) == " " + doAssert expandTabs("\tfoo\t", 4) == " foo " + doAssert expandTabs("a\tb\n\txy\t", 3) == "a b\n xy " + + result = newStringOfCap(s.len + s.len shr 2) + + template addSpaces(n) = + for _ in 1..n: + result.add(' ') + pos += n + + var pos = 0 + let denominator = if tabSize > 0: tabSize else: 1 + for c in s: + if c == '\t': + let numSpaces = tabSize - pos mod denominator + addSpaces(numSpaces) + else: + result.add(c) + pos += 1 + if c == '\l': + pos = 0 + +func partition*(s: string, sep: string, + right: bool = false): (string, string, string) = + ## Splits the string at the first (if `right` is false) + ## or last (if `right` is true) occurrence of `sep` into a 3-tuple. + ## + ## Returns a 3-tuple of strings, `(beforeSep, sep, afterSep)` or + ## `(s, "", "")` if `sep` is not found and `right` is false or + ## `("", "", s)` if `sep` is not found and `right` is true. + ## + ## **See also:** + ## * `rpartition proc <#rpartition,string,string>`_ + runnableExamples: + doAssert partition("foo:bar:baz", ":") == ("foo", ":", "bar:baz") + doAssert partition("foo:bar:baz", ":", right = true) == ("foo:bar", ":", "baz") + doAssert partition("foobar", ":") == ("foobar", "", "") + doAssert partition("foobar", ":", right = true) == ("", "", "foobar") + + let position = if right: s.rfind(sep) else: s.find(sep) + if position != -1: + return (s[0 ..< position], sep, s[position + sep.len ..< s.len]) + return if right: ("", "", s) else: (s, "", "") + +func rpartition*(s: string, sep: string): (string, string, string) = + ## Splits the string at the last occurrence of `sep` into a 3-tuple. + ## + ## Returns a 3-tuple of strings, `(beforeSep, sep, afterSep)` or + ## `("", "", s)` if `sep` is not found. This is the same as + ## `partition(s, sep, right = true)`. + ## + ## **See also:** + ## * `partition proc <#partition,string,string,bool>`_ + runnableExamples: + doAssert rpartition("foo:bar:baz", ":") == ("foo:bar", ":", "baz") + doAssert rpartition("foobar", ":") == ("", "", "foobar") + + partition(s, sep, right = true) diff --git a/lib/pure/strscans.nim b/lib/pure/strscans.nim new file mode 100644 index 000000000..16ef9e642 --- /dev/null +++ b/lib/pure/strscans.nim @@ -0,0 +1,696 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2016 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +##[ +This module contains a `scanf`:idx: macro that can be used for extracting +substrings from an input string. This is often easier than regular expressions. +Some examples as an appetizer: + + ```nim + # check if input string matches a triple of integers: + const input = "(1,2,4)" + var x, y, z: int + if scanf(input, "($i,$i,$i)", x, y, z): + echo "matches and x is ", x, " y is ", y, " z is ", z + + # check if input string matches an ISO date followed by an identifier followed + # by whitespace and a floating point number: + var year, month, day: int + var identifier: string + var myfloat: float + if scanf(input, "$i-$i-$i $w$s$f", year, month, day, identifier, myfloat): + echo "yes, we have a match!" + ``` + +As can be seen from the examples, strings are matched verbatim except for +substrings starting with ``$``. These constructions are available: + +================= ======================================================== +``$b`` Matches a binary integer. This uses ``parseutils.parseBin``. +``$o`` Matches an octal integer. This uses ``parseutils.parseOct``. +``$i`` Matches a decimal integer. This uses ``parseutils.parseInt``. +``$h`` Matches a hex integer. This uses ``parseutils.parseHex``. +``$f`` Matches a floating-point number. Uses ``parseFloat``. +``$w`` Matches an ASCII identifier: ``[A-Za-z_][A-Za-z_0-9]*``. +``$c`` Matches a single ASCII character. +``$s`` Skips optional whitespace. +``$$`` Matches a single dollar sign. +``$.`` Matches if the end of the input string has been reached. +``$*`` Matches until the token following the ``$*`` was found. + The match is allowed to be of 0 length. +``$+`` Matches until the token following the ``$+`` was found. + The match must consist of at least one char. +``${foo}`` User defined matcher. Uses the proc ``foo`` to perform + the match. See below for more details. +``$[foo]`` Call user defined proc ``foo`` to **skip** some optional + parts in the input string. See below for more details. +================= ======================================================== + +Even though ``$*`` and ``$+`` look similar to the regular expressions ``.*`` +and ``.+``, they work quite differently. There is no non-deterministic +state machine involved and the matches are non-greedy. ``[$*]`` +matches ``[xyz]`` via ``parseutils.parseUntil``. + +Furthermore no backtracking is performed, if parsing fails after a value +has already been bound to a matched subexpression this value is not restored +to its original value. This rarely causes problems in practice and if it does +for you, it's easy enough to bind to a temporary variable first. + + +Startswith vs full match +======================== + +``scanf`` returns true if the input string **starts with** the specified +pattern. If instead it should only return true if there is also nothing +left in the input, append ``$.`` to your pattern. + + +User definable matchers +======================= + +One very nice advantage over regular expressions is that ``scanf`` is +extensible with ordinary Nim procs. The proc is either enclosed in ``${}`` +or in ``$[]``. ``${}`` matches and binds the result +to a variable (that was passed to the ``scanf`` macro) while ``$[]`` merely +matches optional tokens without any result binding. + + +In this example, we define a helper proc ``someSep`` that skips some separators +which we then use in our scanf pattern to help us in the matching process: + + ```nim + proc someSep(input: string; start: int; seps: set[char] = {':','-','.'}): int = + # Note: The parameters and return value must match to what ``scanf`` requires + result = 0 + while start+result < input.len and input[start+result] in seps: inc result + + if scanf(input, "$w$[someSep]$w", key, value): + ... + ``` + +It is also possible to pass arguments to a user definable matcher: + + ```nim + proc ndigits(input: string; intVal: var int; start: int; n: int): int = + # matches exactly ``n`` digits. Matchers need to return 0 if nothing + # matched or otherwise the number of processed chars. + var x = 0 + var i = 0 + while i < n and i+start < input.len and input[i+start] in {'0'..'9'}: + x = x * 10 + input[i+start].ord - '0'.ord + inc i + # only overwrite if we had a match + if i == n: + result = n + intVal = x + + # match an ISO date extracting year, month, day at the same time. + # Also ensure the input ends after the ISO date: + var year, month, day: int + if scanf("2013-01-03", "${ndigits(4)}-${ndigits(2)}-${ndigits(2)}$.", year, month, day): + ... + ``` + + +The scanp macro +=============== + +This module also implements a ``scanp`` macro, which syntax somewhat resembles +an EBNF or PEG grammar, except that it uses Nim's expression syntax and so has +to use prefix instead of postfix operators. + +============== =============================================================== +``(E)`` Grouping +``*E`` Zero or more +``+E`` One or more +``?E`` Zero or One +``E{n,m}`` From ``n`` up to ``m`` times ``E`` +``~E`` Not predicate +``a ^* b`` Shortcut for ``?(a *(b a))``. Usually used for separators. +``a ^+ b`` Shortcut for ``?(a +(b a))``. Usually used for separators. +``'a'`` Matches a single character +``{'a'..'b'}`` Matches a character set +``"s"`` Matches a string +``E -> a`` Bind matching to some action +``$_`` Access the currently matched character +============== =============================================================== + +Note that unordered or ordered choice operators (``/``, ``|``) are +not implemented. + +Simple example that parses the ``/etc/passwd`` file line by line: + + ```nim + const + etc_passwd = """root:x:0:0:root:/root:/bin/bash + daemon:x:1:1:daemon:/usr/sbin:/bin/sh + bin:x:2:2:bin:/bin:/bin/sh + sys:x:3:3:sys:/dev:/bin/sh + nobody:x:65534:65534:nobody:/nonexistent:/bin/sh + messagebus:x:103:107::/var/run/dbus:/bin/false + """ + + proc parsePasswd(content: string): seq[string] = + result = @[] + var idx = 0 + while true: + var entry = "" + if scanp(content, idx, +(~{'\L', '\0'} -> entry.add($_)), '\L'): + result.add entry + else: + break + ``` + +The ``scanp`` maps the grammar code into Nim code that performs the parsing. +The parsing is performed with the help of 3 helper templates that that can be +implemented for a custom type. + +These templates need to be named ``atom`` and ``nxt``. ``atom`` should be +overloaded to handle both `char` and `set[char]`. + + ```nim + import std/streams + + template atom(input: Stream; idx: int; c: char): bool = + ## Used in scanp for the matching of atoms (usually chars). + peekChar(input) == c + + template atom(input: Stream; idx: int; s: set[char]): bool = + peekChar(input) in s + + template nxt(input: Stream; idx, step: int = 1) = + inc(idx, step) + setPosition(input, idx) + + if scanp(content, idx, +( ~{'\L', '\0'} -> entry.add(peekChar($input))), '\L'): + result.add entry + ``` + +Calling ordinary Nim procs inside the macro is possible: + + ```nim + proc digits(s: string; intVal: var int; start: int): int = + var x = 0 + while result+start < s.len and s[result+start] in {'0'..'9'} and s[result+start] != ':': + x = x * 10 + s[result+start].ord - '0'.ord + inc result + intVal = x + + proc extractUsers(content: string): seq[string] = + # Extracts the username and home directory + # of each entry (with UID greater than 1000) + const + digits = {'0'..'9'} + result = @[] + var idx = 0 + while true: + var login = "" + var uid = 0 + var homedir = "" + if scanp(content, idx, *(~ {':', '\0'}) -> login.add($_), ':', * ~ ':', ':', + digits($input, uid, $index), ':', *`digits`, ':', * ~ ':', ':', + *('/', * ~{':', '/'}) -> homedir.add($_), ':', *('/', * ~{'\L', '/'}), '\L'): + if uid >= 1000: + result.add login & " " & homedir + else: + break + ``` + +When used for matching, keep in mind that likewise scanf, no backtracking +is performed. + + ```nim + proc skipUntil(s: string; until: string; unless = '\0'; start: int): int = + # Skips all characters until the string `until` is found. Returns 0 + # if the char `unless` is found first or the end is reached. + var i = start + var u = 0 + while true: + if i >= s.len or s[i] == unless: + return 0 + elif s[i] == until[0]: + u = 1 + while i+u < s.len and u < until.len and s[i+u] == until[u]: + inc u + if u >= until.len: break + inc(i) + result = i+u-start + + iterator collectLinks(s: string): string = + const quote = {'\'', '"'} + var idx, old = 0 + var res = "" + while idx < s.len: + old = idx + if scanp(s, idx, "<a", skipUntil($input, "href=", '>', $index), + `quote`, *( ~`quote`) -> res.add($_)): + yield res + res = "" + idx = old + 1 + + for r in collectLinks(body): + echo r + ``` + +In this example both macros are combined seamlessly in order to maximise +efficiency and perform different checks. + + ```nim + iterator parseIps*(soup: string): string = + ## ipv4 only! + const digits = {'0'..'9'} + var a, b, c, d: int + var buf = "" + var idx = 0 + while idx < soup.len: + if scanp(soup, idx, (`digits`{1,3}, '.', `digits`{1,3}, '.', + `digits`{1,3}, '.', `digits`{1,3}) -> buf.add($_)): + discard buf.scanf("$i.$i.$i.$i", a, b, c, d) + if (a >= 0 and a <= 254) and + (b >= 0 and b <= 254) and + (c >= 0 and c <= 254) and + (d >= 0 and d <= 254): + yield buf + buf.setLen(0) # need to clear `buf` each time, cause it might contain garbage + idx.inc + ``` +]## + + +import std/[macros, parseutils] +import std/private/since + +when defined(nimPreviewSlimSystem): + import std/assertions + + +proc conditionsToIfChain(n, idx, res: NimNode; start: int): NimNode = + assert n.kind == nnkStmtList + if start >= n.len: return newAssignment(res, newLit true) + var ifs: NimNode = nil + if n[start+1].kind == nnkEmpty: + ifs = conditionsToIfChain(n, idx, res, start+3) + else: + ifs = newIfStmt((n[start+1], + newTree(nnkStmtList, newCall(bindSym"inc", idx, n[start+2]), + conditionsToIfChain(n, idx, res, start+3)))) + result = newTree(nnkStmtList, n[start], ifs) + +proc notZero(x: NimNode): NimNode = newCall(bindSym"!=", x, newLit 0) + +proc buildUserCall(x: string; args: varargs[NimNode]): NimNode = + let y = parseExpr(x) + result = newTree(nnkCall) + if y.kind in nnkCallKinds: result.add y[0] + else: result.add y + for a in args: result.add a + if y.kind in nnkCallKinds: + for i in 1..<y.len: result.add y[i] + +macro scanf*(input: string; pattern: static[string]; results: varargs[typed]): bool = + ## See top level documentation of this module about how ``scanf`` works. + template matchBind(parser) {.dirty.} = + var resLen = genSym(nskLet, "resLen") + conds.add newLetStmt(resLen, newCall(bindSym(parser), inp, results[i], idx)) + conds.add resLen.notZero + conds.add resLen + + template at(s: string; i: int): char = (if i < s.len: s[i] else: '\0') + template matchError() = + error("type mismatch between pattern '$" & pattern[p] & "' (position: " & $p & + ") and " & $getTypeInst(results[i]) & " var '" & repr(results[i]) & "'") + + var i = 0 + var p = 0 + var idx = genSym(nskVar, "idx") + var res = genSym(nskVar, "res") + let inp = genSym(nskLet, "inp") + result = newTree(nnkStmtListExpr, newLetStmt(inp, input), + newVarStmt(idx, newLit 0), newVarStmt(res, newLit false)) + var conds = newTree(nnkStmtList) + var fullMatch = false + while p < pattern.len: + if pattern[p] == '$': + inc p + case pattern[p] + of '$': + var resLen = genSym(nskLet, "resLen") + conds.add newLetStmt(resLen, newCall(bindSym"skip", inp, + newLit($pattern[p]), idx)) + conds.add resLen.notZero + conds.add resLen + of 'w': + if i < results.len and getType(results[i]).typeKind == ntyString: + matchBind "parseIdent" + else: + matchError + inc i + of 'c': + if i < results.len and getType(results[i]).typeKind == ntyChar: + matchBind "parseChar" + else: + matchError + inc i + of 'b': + if i < results.len and getType(results[i]).typeKind == ntyInt: + matchBind "parseBin" + else: + matchError + inc i + of 'o': + if i < results.len and getType(results[i]).typeKind == ntyInt: + matchBind "parseOct" + else: + matchError + inc i + of 'i': + if i < results.len and getType(results[i]).typeKind == ntyInt: + matchBind "parseInt" + else: + matchError + inc i + of 'h': + if i < results.len and getType(results[i]).typeKind == ntyInt: + matchBind "parseHex" + else: + matchError + inc i + of 'f': + if i < results.len and getType(results[i]).typeKind == ntyFloat: + matchBind "parseFloat" + else: + matchError + inc i + of 's': + conds.add newCall(bindSym"inc", idx, + newCall(bindSym"skipWhitespace", inp, idx)) + conds.add newEmptyNode() + conds.add newEmptyNode() + of '.': + if p == pattern.len-1: + fullMatch = true + else: + error("invalid format string") + of '*', '+': + if i < results.len and getType(results[i]).typeKind == ntyString: + var min = ord(pattern[p] == '+') + var q = p+1 + var token = "" + while q < pattern.len and pattern[q] != '$': + token.add pattern[q] + inc q + var resLen = genSym(nskLet, "resLen") + conds.add newLetStmt(resLen, newCall(bindSym"parseUntil", inp, + results[i], newLit(token), idx)) + conds.add newCall(bindSym">=", resLen, newLit min) + conds.add resLen + else: + matchError + inc i + of '{': + inc p + var nesting = 0 + let start = p + while true: + case pattern.at(p) + of '{': inc nesting + of '}': + if nesting == 0: break + dec nesting + of '\0': error("expected closing '}'") + else: discard + inc p + let expr = pattern.substr(start, p-1) + if i < results.len: + var resLen = genSym(nskLet, "resLen") + conds.add newLetStmt(resLen, buildUserCall(expr, inp, results[i], idx)) + conds.add newCall(bindSym"!=", resLen, newLit 0) + conds.add resLen + else: + error("no var given for $" & expr & " (position: " & $p & ")") + inc i + of '[': + inc p + var nesting = 0 + let start = p + while true: + case pattern.at(p) + of '[': inc nesting + of ']': + if nesting == 0: break + dec nesting + of '\0': error("expected closing ']'") + else: discard + inc p + let expr = pattern.substr(start, p-1) + conds.add newCall(bindSym"inc", idx, buildUserCall(expr, inp, idx)) + conds.add newEmptyNode() + conds.add newEmptyNode() + else: error("invalid format string") + inc p + else: + var token = "" + while p < pattern.len and pattern[p] != '$': + token.add pattern[p] + inc p + var resLen = genSym(nskLet, "resLen") + conds.add newLetStmt(resLen, newCall(bindSym"skip", inp, newLit(token), idx)) + conds.add resLen.notZero + conds.add resLen + result.add conditionsToIfChain(conds, idx, res, 0) + if fullMatch: + result.add newCall(bindSym"and", res, + newCall(bindSym">=", idx, newCall(bindSym"len", inp))) + else: + result.add res + +macro scanTuple*(input: untyped; pattern: static[string]; matcherTypes: varargs[untyped]): untyped {.since: (1, 5).}= + ## Works identically as scanf, but instead of predeclaring variables it returns a tuple. + ## Tuple is started with a bool which indicates if the scan was successful + ## followed by the requested data. + ## If using a user defined matcher, provide the types in order they appear after pattern: + ## `line.scanTuple("${yourMatcher()}", int)` + runnableExamples: + let (success, year, month, day, time) = scanTuple("1000-01-01 00:00:00", "$i-$i-$i$s$+") + if success: + assert year == 1000 + assert month == 1 + assert day == 1 + assert time == "00:00:00" + var + p = 0 + userMatches = 0 + arguments: seq[NimNode] + result = newStmtList() + template addVar(typ: string) = + let varIdent = ident("temp" & $arguments.len) + result.add(newNimNode(nnkVarSection).add(newIdentDefs(varIdent, ident(typ), newEmptyNode()))) + arguments.add(varIdent) + while p < pattern.len: + if pattern[p] == '$': + inc p + case pattern[p] + of 'w', '*', '+': + addVar("string") + of 'c': + addVar("char") + of 'b', 'o', 'i', 'h': + addVar("int") + of 'f': + addVar("float") + of '{': + if userMatches < matcherTypes.len: + let varIdent = ident("temp" & $arguments.len) + result.add(newNimNode(nnkVarSection).add(newIdentDefs(varIdent, matcherTypes[userMatches], newEmptyNode()))) + arguments.add(varIdent) + inc userMatches + else: discard + inc p + result.add nnkTupleConstr.newTree(newCall(ident("scanf"), input, newStrLitNode(pattern))) + for arg in arguments: + result[^1][0].add arg + result[^1].add arg + result = newBlockStmt(result) + +template atom*(input: string; idx: int; c: char): bool = + ## Used in scanp for the matching of atoms (usually chars). + ## EOF is matched as ``'\0'``. + (idx < input.len and input[idx] == c) or (idx == input.len and c == '\0') + +template atom*(input: string; idx: int; s: set[char]): bool = + (idx < input.len and input[idx] in s) or (idx == input.len and '\0' in s) + +template hasNxt*(input: string; idx: int): bool = idx < input.len + +#template prepare*(input: string): int = 0 +template success*(x: int): bool = x != 0 + +template nxt*(input: string; idx, step: int = 1) = inc(idx, step) + +macro scanp*(input, idx: typed; pattern: varargs[untyped]): bool = + ## See top level documentation of this module about how ``scanp`` works. + type StmtTriple = tuple[init, cond, action: NimNode] + + template interf(x): untyped = bindSym(x, brForceOpen) + + proc toIfChain(n: seq[StmtTriple]; idx, res: NimNode; start: int): NimNode = + if start >= n.len: return newAssignment(res, newLit true) + var ifs: NimNode = nil + if n[start].cond.kind == nnkEmpty: + ifs = toIfChain(n, idx, res, start+1) + else: + ifs = newIfStmt((n[start].cond, + newTree(nnkStmtList, n[start].action, + toIfChain(n, idx, res, start+1)))) + result = newTree(nnkStmtList, n[start].init, ifs) + + proc attach(x, attached: NimNode): NimNode = + if attached == nil: x + else: newStmtList(attached, x) + + proc placeholder(n, x, j: NimNode): NimNode = + if n.kind == nnkPrefix and n[0].eqIdent("$"): + let n1 = n[1] + if n1.eqIdent"_" or n1.eqIdent"current": + result = newTree(nnkBracketExpr, x, j) + elif n1.eqIdent"input": + result = x + elif n1.eqIdent"i" or n1.eqIdent"index": + result = j + else: + error("unknown pattern " & repr(n)) + else: + result = copyNimNode(n) + for i in 0 ..< n.len: + result.add placeholder(n[i], x, j) + + proc atm(it, input, idx, attached: NimNode): StmtTriple = + template `!!`(x): untyped = attach(x, attached) + case it.kind + of nnkIdent: + var resLen = genSym(nskLet, "resLen") + result = (newLetStmt(resLen, newCall(it, input, idx)), + newCall(interf"success", resLen), + !!newCall(interf"nxt", input, idx, resLen)) + of nnkCallKinds: + # *{'A'..'Z'} !! s.add(!_) + template buildWhile(input, idx, init, cond, action): untyped = + mixin hasNxt + while hasNxt(input, idx): + init + if not cond: break + action + + # (x) a # bind action a to (x) + if it[0].kind in {nnkPar, nnkTupleConstr} and it.len == 2: + result = atm(it[0], input, idx, placeholder(it[1], input, idx)) + elif it.kind == nnkInfix and it[0].eqIdent"->": + # bind matching to some action: + result = atm(it[1], input, idx, placeholder(it[2], input, idx)) + elif it.kind == nnkInfix and it[0].eqIdent"as": + let cond = if it[1].kind in nnkCallKinds: placeholder(it[1], input, idx) + else: newCall(it[1], input, idx) + result = (newLetStmt(it[2], cond), + newCall(interf"success", it[2]), + !!newCall(interf"nxt", input, idx, it[2])) + elif it.kind == nnkPrefix and it[0].eqIdent"*": + let (init, cond, action) = atm(it[1], input, idx, attached) + result = (getAst(buildWhile(input, idx, init, cond, action)), + newEmptyNode(), newEmptyNode()) + elif it.kind == nnkPrefix and it[0].eqIdent"+": + # x+ is the same as xx* + result = atm(newTree(nnkTupleConstr, it[1], newTree(nnkPrefix, ident"*", it[1])), + input, idx, attached) + elif it.kind == nnkPrefix and it[0].eqIdent"?": + # optional. + let (init, cond, action) = atm(it[1], input, idx, attached) + if cond.kind == nnkEmpty: + error("'?' operator applied to a non-condition") + else: + result = (newTree(nnkStmtList, init, newIfStmt((cond, action))), + newEmptyNode(), newEmptyNode()) + elif it.kind == nnkPrefix and it[0].eqIdent"~": + # not operator + let (init, cond, action) = atm(it[1], input, idx, attached) + if cond.kind == nnkEmpty: + error("'~' operator applied to a non-condition") + else: + result = (init, newCall(bindSym"not", cond), action) + elif it.kind == nnkInfix and it[0].eqIdent"|": + let a = atm(it[1], input, idx, attached) + let b = atm(it[2], input, idx, attached) + if a.cond.kind == nnkEmpty or b.cond.kind == nnkEmpty: + error("'|' operator applied to a non-condition") + else: + result = (newStmtList(a.init, newIfStmt((a.cond, a.action), + (newTree(nnkStmtListExpr, b.init, b.cond), b.action))), + newEmptyNode(), newEmptyNode()) + elif it.kind == nnkInfix and it[0].eqIdent"^*": + # a ^* b is rewritten to: (a *(b a))? + #exprList = expr ^+ comma + template tmp(a, b): untyped = ?(a, *(b, a)) + result = atm(getAst(tmp(it[1], it[2])), input, idx, attached) + + elif it.kind == nnkInfix and it[0].eqIdent"^+": + # a ^* b is rewritten to: (a +(b a))? + template tmp(a, b): untyped = (a, *(b, a)) + result = atm(getAst(tmp(it[1], it[2])), input, idx, attached) + elif it.kind == nnkCommand and it.len == 2 and it[0].eqIdent"pred": + # enforce that the wrapped call is interpreted as a predicate, not a non-terminal: + result = (newEmptyNode(), placeholder(it[1], input, idx), newEmptyNode()) + else: + var resLen = genSym(nskLet, "resLen") + result = (newLetStmt(resLen, placeholder(it, input, idx)), + newCall(interf"success", resLen), + !!newCall(interf"nxt", input, idx, resLen)) + of nnkStrLit..nnkTripleStrLit: + var resLen = genSym(nskLet, "resLen") + result = (newLetStmt(resLen, newCall(interf"skip", input, it, idx)), + newCall(interf"success", resLen), + !!newCall(interf"nxt", input, idx, resLen)) + of nnkCurly, nnkAccQuoted, nnkCharLit: + result = (newEmptyNode(), newCall(interf"atom", input, idx, it), + !!newCall(interf"nxt", input, idx)) + of nnkCurlyExpr: + if it.len == 3 and it[1].kind == nnkIntLit and it[2].kind == nnkIntLit: + var h = newTree(nnkTupleConstr, it[0]) + for count in 2i64 .. it[1].intVal: h.add(it[0]) + for count in it[1].intVal .. it[2].intVal-1: + h.add(newTree(nnkPrefix, ident"?", it[0])) + result = atm(h, input, idx, attached) + elif it.len == 2 and it[1].kind == nnkIntLit: + var h = newTree(nnkTupleConstr, it[0]) + for count in 2i64 .. it[1].intVal: h.add(it[0]) + result = atm(h, input, idx, attached) + else: + error("invalid pattern") + of nnkPar, nnkTupleConstr: + if it.len == 1 and it.kind == nnkPar: + result = atm(it[0], input, idx, attached) + else: + # concatenation: + var conds: seq[StmtTriple] = @[] + for x in it: conds.add atm(x, input, idx, attached) + var res = genSym(nskVar, "res") + result = (newStmtList(newVarStmt(res, newLit false), + toIfChain(conds, idx, res, 0)), res, newEmptyNode()) + else: + error("invalid pattern") + + #var idx = genSym(nskVar, "idx") + var res = genSym(nskVar, "res") + result = newTree(nnkStmtListExpr, #newVarStmt(idx, newCall(interf"prepare", input)), + newVarStmt(res, newLit false)) + var conds: seq[StmtTriple] = @[] + for it in pattern: + conds.add atm(it, input, idx, nil) + result.add toIfChain(conds, idx, res, 0) + result.add res + when defined(debugScanp): + echo repr result diff --git a/lib/pure/strtabs.nim b/lib/pure/strtabs.nim index 727d5a386..4b07aca5a 100644 --- a/lib/pure/strtabs.nim +++ b/lib/pure/strtabs.nim @@ -9,72 +9,112 @@ ## The ``strtabs`` module implements an efficient hash table that is a mapping ## from strings to strings. Supports a case-sensitive, case-insensitive and -## style-insensitive mode. An efficient string substitution operator ``%`` -## for the string table is also provided. +## style-insensitive mode. + +runnableExamples: + var t = newStringTable() + t["name"] = "John" + t["city"] = "Monaco" + doAssert t.len == 2 + doAssert t.hasKey "name" + doAssert "name" in t + +## String tables can be created from a table constructor: +runnableExamples: + var t = {"name": "John", "city": "Monaco"}.newStringTable + +## When using the style insensitive mode (``modeStyleInsensitive``), +## all letters are compared case insensitively within the ASCII range +## and underscores are ignored. +runnableExamples: + var x = newStringTable(modeStyleInsensitive) + x["first_name"] = "John" + x["LastName"] = "Doe" + + doAssert x["firstName"] == "John" + doAssert x["last_name"] == "Doe" + +## An efficient string substitution operator +## `% <#%25,string,StringTableRef,set[FormatFlag]>`_ for the string table +## is also provided. +runnableExamples: + var t = {"name": "John", "city": "Monaco"}.newStringTable + doAssert "${name} lives in ${city}" % t == "John lives in Monaco" + +## **See also:** +## * `tables module <tables.html>`_ for general hash tables +## * `sharedtables module<sharedtables.html>`_ for shared hash table support +## * `strutils module<strutils.html>`_ for common string functions +## * `json module<json.html>`_ for table-like structure which allows +## heterogeneous members + +import std/private/since import - os, hashes, strutils + std/[hashes, strutils] + +when defined(nimPreviewSlimSystem): + import std/assertions + + +when defined(js) or defined(nimscript) or defined(Standalone): + {.pragma: rtlFunc.} +else: + {.pragma: rtlFunc, rtl.} + import std/envvars include "system/inclrtl" type - StringTableMode* = enum ## describes the tables operation mode - modeCaseSensitive, ## the table is case sensitive - modeCaseInsensitive, ## the table is case insensitive - modeStyleInsensitive ## the table is style insensitive - KeyValuePair = tuple[key, val: string] + StringTableMode* = enum ## Describes the tables operation mode. + modeCaseSensitive, ## the table is case sensitive + modeCaseInsensitive, ## the table is case insensitive + modeStyleInsensitive ## the table is style insensitive + KeyValuePair = tuple[key, val: string, hasValue: bool] KeyValuePairSeq = seq[KeyValuePair] StringTableObj* = object of RootObj counter: int data: KeyValuePairSeq mode: StringTableMode - StringTableRef* = ref StringTableObj ## use this type to declare string tables + StringTableRef* = ref StringTableObj -{.deprecated: [TStringTableMode: StringTableMode, - TStringTable: StringTableObj, PStringTable: StringTableRef].} + FormatFlag* = enum ## Flags for the `%` operator. + useEnvironment, ## Use environment variable if the ``$key`` + ## is not found in the table. + ## Does nothing when using `js` target. + useEmpty, ## Use the empty string as a default, thus it + ## won't throw an exception if ``$key`` is not + ## in the table. + useKey ## Do not replace ``$key`` if it is not found + ## in the table (or in the environment). -proc len*(t: StringTableRef): int {.rtl, extern: "nst$1".} = - ## returns the number of keys in `t`. - result = t.counter +const + growthFactor = 2 + startSize = 64 + +proc mode*(t: StringTableRef): StringTableMode {.inline.} = t.mode iterator pairs*(t: StringTableRef): tuple[key, value: string] = - ## iterates over every (key, value) pair in the table `t`. + ## Iterates over every `(key, value)` pair in the table `t`. for h in 0..high(t.data): - if not isNil(t.data[h].key): + if t.data[h].hasValue: yield (t.data[h].key, t.data[h].val) iterator keys*(t: StringTableRef): string = - ## iterates over every key in the table `t`. + ## Iterates over every key in the table `t`. for h in 0..high(t.data): - if not isNil(t.data[h].key): + if t.data[h].hasValue: yield t.data[h].key iterator values*(t: StringTableRef): string = - ## iterates over every value in the table `t`. + ## Iterates over every value in the table `t`. for h in 0..high(t.data): - if not isNil(t.data[h].key): + if t.data[h].hasValue: yield t.data[h].val -type - FormatFlag* = enum ## flags for the `%` operator - useEnvironment, ## use environment variable if the ``$key`` - ## is not found in the table - useEmpty, ## use the empty string as a default, thus it - ## won't throw an exception if ``$key`` is not - ## in the table - useKey ## do not replace ``$key`` if it is not found - ## in the table (or in the environment) - -{.deprecated: [TFormatFlag: FormatFlag].} - -# implementation - -const - growthFactor = 2 - startSize = 64 -proc myhash(t: StringTableRef, key: string): THash = +proc myhash(t: StringTableRef, key: string): Hash = case t.mode of modeCaseSensitive: result = hashes.hash(key) of modeCaseInsensitive: result = hashes.hashIgnoreCase(key) @@ -90,53 +130,122 @@ proc mustRehash(length, counter: int): bool = assert(length > counter) result = (length * 2 < counter * 3) or (length - counter < 4) -proc nextTry(h, maxHash: THash): THash {.inline.} = - result = ((5 * h) + 1) and maxHash +proc nextTry(h, maxHash: Hash): Hash {.inline.} = + result = (h + 1) and maxHash proc rawGet(t: StringTableRef, key: string): int = - var h: THash = myhash(t, key) and high(t.data) # start with real hash value - while not isNil(t.data[h].key): + var h: Hash = myhash(t, key) and high(t.data) # start with real hash value + while t.data[h].hasValue: if myCmp(t, t.data[h].key, key): return h h = nextTry(h, high(t.data)) result = - 1 -proc `[]`*(t: StringTableRef, key: string): string {.rtl, extern: "nstGet".} = - ## retrieves the value at ``t[key]``. If `key` is not in `t`, "" is returned - ## and no exception is raised. One can check with ``hasKey`` whether the key - ## exists. +template get(t: StringTableRef, key: string) = var index = rawGet(t, key) if index >= 0: result = t.data[index].val - else: result = "" + else: + raise newException(KeyError, "key not found: " & key) + + +proc len*(t: StringTableRef): int {.rtlFunc, extern: "nst$1".} = + ## Returns the number of keys in `t`. + result = t.counter + +proc `[]`*(t: StringTableRef, key: string): var string {. + rtlFunc, extern: "nstTake".} = + ## Retrieves the location at ``t[key]``. + ## + ## If `key` is not in `t`, the ``KeyError`` exception is raised. + ## One can check with `hasKey proc <#hasKey,StringTableRef,string>`_ + ## whether the key exists. + ## + ## See also: + ## * `getOrDefault proc <#getOrDefault,StringTableRef,string,string>`_ + ## * `[]= proc <#[]=,StringTableRef,string,string>`_ for inserting a new + ## (key, value) pair in the table + ## * `hasKey proc <#hasKey,StringTableRef,string>`_ for checking if a key + ## is in the table + runnableExamples: + var t = {"name": "John", "city": "Monaco"}.newStringTable + doAssert t["name"] == "John" + doAssertRaises(KeyError): + echo t["occupation"] + get(t, key) + +proc getOrDefault*(t: StringTableRef; key: string, + default: string = ""): string = + ## Retrieves the location at ``t[key]``. + ## + ## If `key` is not in `t`, the default value is returned (if not specified, + ## it is an empty string (`""`)). + ## + ## See also: + ## * `[] proc <#[],StringTableRef,string>`_ for retrieving a value of a key + ## * `hasKey proc <#hasKey,StringTableRef,string>`_ for checking if a key + ## is in the table + ## * `[]= proc <#[]=,StringTableRef,string,string>`_ for inserting a new + ## (key, value) pair in the table + runnableExamples: + var t = {"name": "John", "city": "Monaco"}.newStringTable + doAssert t.getOrDefault("name") == "John" + doAssert t.getOrDefault("occupation") == "" + doAssert t.getOrDefault("occupation", "teacher") == "teacher" + doAssert t.getOrDefault("name", "Paul") == "John" -proc mget*(t: StringTableRef, key: string): var string {. - rtl, extern: "nstTake".} = - ## retrieves the location at ``t[key]``. If `key` is not in `t`, the - ## ``KeyError`` exception is raised. var index = rawGet(t, key) if index >= 0: result = t.data[index].val - else: raise newException(KeyError, "key does not exist: " & key) - -proc hasKey*(t: StringTableRef, key: string): bool {.rtl, extern: "nst$1".} = - ## returns true iff `key` is in the table `t`. + else: result = default + +proc hasKey*(t: StringTableRef, key: string): bool {.rtlFunc, + extern: "nst$1".} = + ## Returns true if `key` is in the table `t`. + ## + ## See also: + ## * `getOrDefault proc <#getOrDefault,StringTableRef,string,string>`_ + ## * `contains proc <#contains,StringTableRef,string>`_ + runnableExamples: + var t = {"name": "John", "city": "Monaco"}.newStringTable + doAssert t.hasKey("name") + doAssert not t.hasKey("occupation") result = rawGet(t, key) >= 0 +proc contains*(t: StringTableRef, key: string): bool = + ## Alias of `hasKey proc <#hasKey,StringTableRef,string>`_ for use with + ## the `in` operator. + runnableExamples: + var t = {"name": "John", "city": "Monaco"}.newStringTable + doAssert "name" in t + doAssert "occupation" notin t + return hasKey(t, key) + proc rawInsert(t: StringTableRef, data: var KeyValuePairSeq, key, val: string) = - var h: THash = myhash(t, key) and high(data) - while not isNil(data[h].key): + var h: Hash = myhash(t, key) and high(data) + while data[h].hasValue: h = nextTry(h, high(data)) data[h].key = key data[h].val = val + data[h].hasValue = true proc enlarge(t: StringTableRef) = var n: KeyValuePairSeq newSeq(n, len(t.data) * growthFactor) for i in countup(0, high(t.data)): - if not isNil(t.data[i].key): rawInsert(t, n, t.data[i].key, t.data[i].val) + if t.data[i].hasValue: rawInsert(t, n, move t.data[i].key, move t.data[i].val) swap(t.data, n) -proc `[]=`*(t: StringTableRef, key, val: string) {.rtl, extern: "nstPut".} = - ## puts a (key, value)-pair into `t`. +proc `[]=`*(t: StringTableRef, key, val: string) {. + rtlFunc, extern: "nstPut".} = + ## Inserts a `(key, value)` pair into `t`. + ## + ## See also: + ## * `[] proc <#[],StringTableRef,string>`_ for retrieving a value of a key + ## * `del proc <#del,StringTableRef,string>`_ for removing a key from the table + runnableExamples: + var t = {"name": "John", "city": "Monaco"}.newStringTable + t["occupation"] = "teacher" + doAssert t.hasKey("occupation") + var index = rawGet(t, key) if index >= 0: t.data[index].val = val @@ -145,55 +254,146 @@ proc `[]=`*(t: StringTableRef, key, val: string) {.rtl, extern: "nstPut".} = rawInsert(t, t.data, key, val) inc(t.counter) -proc raiseFormatException(s: string) = - var e: ref ValueError - new(e) - e.msg = "format string: key not found: " & s - raise e - -proc getValue(t: StringTableRef, flags: set[FormatFlag], key: string): string = - if hasKey(t, key): return t[key] - # hm difficult: assume safety in taint mode here. XXX This is dangerous! - if useEnvironment in flags: result = os.getEnv(key).string - else: result = "" - if result.len == 0: - if useKey in flags: result = '$' & key - elif useEmpty notin flags: raiseFormatException(key) - -proc newStringTable*(mode: StringTableMode): StringTableRef {. - rtl, extern: "nst$1".} = - ## creates a new string table that is empty. - new(result) - result.mode = mode - result.counter = 0 - newSeq(result.data, startSize) +proc newStringTable*(mode: StringTableMode): owned(StringTableRef) {. + rtlFunc, extern: "nst$1", noSideEffect.} = + ## Creates a new empty string table. + ## + ## See also: + ## * `newStringTable(keyValuePairs) proc + ## <#newStringTable,varargs[tuple[string,string]],StringTableMode>`_ + result = StringTableRef(mode: mode, counter: 0, data: newSeq[KeyValuePair](startSize)) proc newStringTable*(keyValuePairs: varargs[string], - mode: StringTableMode): StringTableRef {. - rtl, extern: "nst$1WithPairs".} = - ## creates a new string table with given key value pairs. - ## Example:: - ## var mytab = newStringTable("key1", "val1", "key2", "val2", - ## modeCaseInsensitive) + mode: StringTableMode): owned(StringTableRef) {. + rtlFunc, extern: "nst$1WithPairs", noSideEffect.} = + ## Creates a new string table with given `key, value` string pairs. + ## + ## `StringTableMode` must be specified. + runnableExamples: + var mytab = newStringTable("key1", "val1", "key2", "val2", + modeCaseInsensitive) + result = newStringTable(mode) var i = 0 while i < high(keyValuePairs): - result[keyValuePairs[i]] = keyValuePairs[i + 1] + {.noSideEffect.}: + result[keyValuePairs[i]] = keyValuePairs[i + 1] inc(i, 2) proc newStringTable*(keyValuePairs: varargs[tuple[key, val: string]], - mode: StringTableMode = modeCaseSensitive): StringTableRef {. - rtl, extern: "nst$1WithTableConstr".} = - ## creates a new string table with given key value pairs. - ## Example:: - ## var mytab = newStringTable({"key1": "val1", "key2": "val2"}, - ## modeCaseInsensitive) + mode: StringTableMode = modeCaseSensitive): owned(StringTableRef) {. + rtlFunc, extern: "nst$1WithTableConstr", noSideEffect.} = + ## Creates a new string table with given `(key, value)` tuple pairs. + ## + ## The default mode is case sensitive. + runnableExamples: + var + mytab1 = newStringTable({"key1": "val1", "key2": "val2"}, modeCaseInsensitive) + mytab2 = newStringTable([("key3", "val3"), ("key4", "val4")]) + result = newStringTable(mode) - for key, val in items(keyValuePairs): result[key] = val + for key, val in items(keyValuePairs): + {.noSideEffect.}: + result[key] = val + +proc raiseFormatException(s: string) = + raise newException(ValueError, "format string: key not found: " & s) + +proc getValue(t: StringTableRef, flags: set[FormatFlag], key: string): string = + if hasKey(t, key): return t.getOrDefault(key) + when defined(js) or defined(nimscript) or defined(Standalone): + result = "" + else: + if useEnvironment in flags: result = getEnv(key) + else: result = "" + if result.len == 0: + if useKey in flags: result = '$' & key + elif useEmpty notin flags: raiseFormatException(key) + +proc clear*(s: StringTableRef, mode: StringTableMode) {. + rtlFunc, extern: "nst$1".} = + ## Resets a string table to be empty again, perhaps altering the mode. + ## + ## See also: + ## * `del proc <#del,StringTableRef,string>`_ for removing a key from the table + runnableExamples: + var t = {"name": "John", "city": "Monaco"}.newStringTable + clear(t, modeCaseSensitive) + doAssert len(t) == 0 + doAssert "name" notin t + doAssert "city" notin t + s.mode = mode + s.counter = 0 + s.data.setLen(startSize) + for i in 0..<s.data.len: + s.data[i].hasValue = false + +proc clear*(s: StringTableRef) {.since: (1, 1).} = + ## Resets a string table to be empty again without changing the mode. + s.clear(s.mode) + +proc del*(t: StringTableRef, key: string) = + ## Removes `key` from `t`. + ## + ## See also: + ## * `clear proc <#clear,StringTableRef,StringTableMode>`_ for resetting a + ## table to be empty + ## * `[]= proc <#[]=,StringTableRef,string,string>`_ for inserting a new + ## (key, value) pair in the table + runnableExamples: + var t = {"name": "John", "city": "Monaco"}.newStringTable + t.del("name") + doAssert len(t) == 1 + doAssert "name" notin t + doAssert "city" in t + + # Impl adapted from `tableimpl.delImplIdx` + var i = rawGet(t, key) + let msk = high(t.data) + if i >= 0: + dec(t.counter) + block outer: + while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1 + var j = i # The correctness of this depends on (h+1) in nextTry, + var r = j # though may be adaptable to other simple sequences. + t.data[i].hasValue = false # mark current EMPTY + t.data[i].key = "" + t.data[i].val = "" + while true: + i = (i + 1) and msk # increment mod table size + if not t.data[i].hasValue: # end of collision cluster; So all done + break outer + r = t.myhash(t.data[i].key) and msk # "home" location of key@i + if not ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)): + break + when defined(js): + t.data[j] = t.data[i] + elif defined(gcDestructors): + t.data[j] = move t.data[i] + else: + shallowCopy(t.data[j], t.data[i]) # data[j] will be marked EMPTY next loop + +proc `$`*(t: StringTableRef): string {.rtlFunc, extern: "nstDollar".} = + ## The `$` operator for string tables. Used internally when calling + ## `echo` on a table. + if t.len == 0: + result = "{:}" + else: + result = "{" + for key, val in pairs(t): + if result.len > 1: result.add(", ") + result.add(key) + result.add(": ") + result.add(val) + result.add("}") proc `%`*(f: string, t: StringTableRef, flags: set[FormatFlag] = {}): string {. - rtl, extern: "nstFormat".} = + rtlFunc, extern: "nstFormat".} = ## The `%` operator for string tables. + runnableExamples: + var t = {"name": "John", "city": "Monaco"}.newStringTable + doAssert "${name} lives in ${city}" % t == "John lives in Monaco" + const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\x80'..'\xFF'} result = "" @@ -220,25 +420,3 @@ proc `%`*(f: string, t: StringTableRef, flags: set[FormatFlag] = {}): string {. else: add(result, f[i]) inc(i) - -proc `$`*(t: StringTableRef): string {.rtl, extern: "nstDollar".} = - ## The `$` operator for string tables. - if t.len == 0: - result = "{:}" - else: - result = "{" - for key, val in pairs(t): - if result.len > 1: result.add(", ") - result.add(key) - result.add(": ") - result.add(val) - result.add("}") - -when isMainModule: - var x = {"k": "v", "11": "22", "565": "67"}.newStringTable - assert x["k"] == "v" - assert x["11"] == "22" - assert x["565"] == "67" - x.mget("11") = "23" - assert x["11"] == "23" - diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index 655203cda..81be7db17 100644 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -7,121 +7,323 @@ # distribution, for details about the copyright. # -## This module contains various string utility routines. -## See the module `re <re.html>`_ for regular expression support. -## See the module `pegs <pegs.html>`_ for PEG support. -## This module is available for the `JavaScript target -## <backends.html#the-javascript-target>`_. - -import parseutils +## The system module defines several common functions for working with strings, +## such as: +## * `$` for converting other data-types to strings +## * `&` for string concatenation +## * `add` for adding a new character or a string to the existing one +## * `in` (alias for `contains`) and `notin` for checking if a character +## is in a string +## +## This module builds upon that, providing additional functionality in form of +## procedures, iterators and templates for strings. + +runnableExamples: + let + numbers = @[867, 5309] + multiLineString = "first line\nsecond line\nthird line" + + let jenny = numbers.join("-") + assert jenny == "867-5309" + + assert splitLines(multiLineString) == + @["first line", "second line", "third line"] + assert split(multiLineString) == @["first", "line", "second", + "line", "third", "line"] + assert indent(multiLineString, 4) == + " first line\n second line\n third line" + assert 'z'.repeat(5) == "zzzzz" + +## The chaining of functions is possible thanks to the +## `method call syntax<manual.html#procedures-method-call-syntax>`_: + +runnableExamples: + from std/sequtils import map + + let jenny = "867-5309" + assert jenny.split('-').map(parseInt) == @[867, 5309] + + assert "Beetlejuice".indent(1).repeat(3).strip == + "Beetlejuice Beetlejuice Beetlejuice" -{.deadCodeElim: on.} - -{.push debugger:off .} # the user does not want to trace a part - # of the standard library! +## This module is available for the `JavaScript target +## <backends.html#backends-the-javascript-target>`_. +## +## ---- +## +## **See also:** +## * `strformat module<strformat.html>`_ for string interpolation and formatting +## * `unicode module<unicode.html>`_ for Unicode UTF-8 handling +## * `sequtils module<sequtils.html>`_ for operations on container +## types (including strings) +## * `parsecsv module<parsecsv.html>`_ for a high-performance CSV parser +## * `parseutils module<parseutils.html>`_ for lower-level parsing of tokens, +## numbers, identifiers, etc. +## * `parseopt module<parseopt.html>`_ for command-line parsing +## * `pegs module<pegs.html>`_ for PEG (Parsing Expression Grammar) support +## * `strtabs module<strtabs.html>`_ for efficient hash tables +## (dictionaries, in some programming languages) mapping from strings to strings +## * `ropes module<ropes.html>`_ for rope data type, which can represent very +## long strings efficiently +## * `re module<re.html>`_ for regular expression (regex) support +## * `strscans<strscans.html>`_ for `scanf` and `scanp` macros, which offer +## easier substring extraction than regular expressions + + +import std/parseutils +from std/math import pow, floor, log10 +from std/algorithm import fill, reverse +import std/enumutils + +from std/unicode import toLower, toUpper +export toLower, toUpper include "system/inclrtl" +import std/private/[since, jsutils] +from std/private/strimpl import cmpIgnoreStyleImpl, cmpIgnoreCaseImpl, + startsWithImpl, endsWithImpl + +when defined(nimPreviewSlimSystem): + import std/assertions -type - TCharSet* {.deprecated.} = set[char] # for compatibility with Nim const Whitespace* = {' ', '\t', '\v', '\r', '\l', '\f'} - ## All the characters that count as whitespace. + ## All the characters that count as whitespace (space, tab, vertical tab, + ## carriage return, new line, form feed). Letters* = {'A'..'Z', 'a'..'z'} - ## the set of letters + ## The set of letters. + + UppercaseLetters* = {'A'..'Z'} + ## The set of uppercase ASCII letters. + + LowercaseLetters* = {'a'..'z'} + ## The set of lowercase ASCII letters. + + PunctuationChars* = {'!'..'/', ':'..'@', '['..'`', '{'..'~'} + ## The set of all ASCII punctuation characters. Digits* = {'0'..'9'} - ## the set of digits + ## The set of digits. HexDigits* = {'0'..'9', 'A'..'F', 'a'..'f'} - ## the set of hexadecimal digits + ## The set of hexadecimal digits. IdentChars* = {'a'..'z', 'A'..'Z', '0'..'9', '_'} - ## the set of characters an identifier can consist of + ## The set of characters an identifier can consist of. IdentStartChars* = {'a'..'z', 'A'..'Z', '_'} - ## the set of characters an identifier can start with + ## The set of characters an identifier can start with. + + Newlines* = {'\13', '\10'} + ## The set of characters a newline terminator can start with (carriage + ## return, line feed). - NewLines* = {'\13', '\10'} - ## the set of characters a newline terminator can start with + PrintableChars* = Letters + Digits + PunctuationChars + Whitespace + ## The set of all printable ASCII characters (letters, digits, whitespace, and punctuation characters). AllChars* = {'\x00'..'\xFF'} ## A set with all the possible characters. ## ## Not very useful by its own, you can use it to create *inverted* sets to - ## make the `find() proc <#find,string,set[char],int>`_ find **invalid** - ## characters in strings. Example: - ## - ## .. code-block:: nim + ## make the `find func<#find,string,set[char],Natural,int>`_ + ## find **invalid** characters in strings. Example: + ## ```nim ## let invalid = AllChars - Digits ## doAssert "01234".find(invalid) == -1 ## doAssert "01A34".find(invalid) == 2 - -proc toLower*(c: char): char {.noSideEffect, procvar, - rtl, extern: "nsuToLowerChar".} = - ## Converts `c` into lower case. - ## - ## This works only for the letters ``A-Z``. See `unicode.toLower - ## <unicode.html#toLower>`_ for a version that works for any Unicode + ## ``` + +func isAlphaAscii*(c: char): bool {.rtl, extern: "nsuIsAlphaAsciiChar".} = + ## Checks whether or not character `c` is alphabetical. + ## + ## This checks a-z, A-Z ASCII characters only. + ## Use `Unicode module<unicode.html>`_ for UTF-8 support. + runnableExamples: + doAssert isAlphaAscii('e') == true + doAssert isAlphaAscii('E') == true + doAssert isAlphaAscii('8') == false + return c in Letters + +func isAlphaNumeric*(c: char): bool {.rtl, extern: "nsuIsAlphaNumericChar".} = + ## Checks whether or not `c` is alphanumeric. + ## + ## This checks a-z, A-Z, 0-9 ASCII characters only. + runnableExamples: + doAssert isAlphaNumeric('n') == true + doAssert isAlphaNumeric('8') == true + doAssert isAlphaNumeric(' ') == false + return c in Letters+Digits + +func isDigit*(c: char): bool {.rtl, extern: "nsuIsDigitChar".} = + ## Checks whether or not `c` is a number. + ## + ## This checks 0-9 ASCII characters only. + runnableExamples: + doAssert isDigit('n') == false + doAssert isDigit('8') == true + return c in Digits + +func isSpaceAscii*(c: char): bool {.rtl, extern: "nsuIsSpaceAsciiChar".} = + ## Checks whether or not `c` is a whitespace character. + runnableExamples: + doAssert isSpaceAscii('n') == false + doAssert isSpaceAscii(' ') == true + doAssert isSpaceAscii('\t') == true + return c in Whitespace + +func isLowerAscii*(c: char): bool {.rtl, extern: "nsuIsLowerAsciiChar".} = + ## Checks whether or not `c` is a lower case character. + ## + ## This checks ASCII characters only. + ## Use `Unicode module<unicode.html>`_ for UTF-8 support. + ## + ## See also: + ## * `toLowerAscii func<#toLowerAscii,char>`_ + runnableExamples: + doAssert isLowerAscii('e') == true + doAssert isLowerAscii('E') == false + doAssert isLowerAscii('7') == false + return c in LowercaseLetters + +func isUpperAscii*(c: char): bool {.rtl, extern: "nsuIsUpperAsciiChar".} = + ## Checks whether or not `c` is an upper case character. + ## + ## This checks ASCII characters only. + ## Use `Unicode module<unicode.html>`_ for UTF-8 support. + ## + ## See also: + ## * `toUpperAscii func<#toUpperAscii,char>`_ + runnableExamples: + doAssert isUpperAscii('e') == false + doAssert isUpperAscii('E') == true + doAssert isUpperAscii('7') == false + return c in UppercaseLetters + +func toLowerAscii*(c: char): char {.rtl, extern: "nsuToLowerAsciiChar".} = + ## Returns the lower case version of character `c`. + ## + ## This works only for the letters `A-Z`. See `unicode.toLower + ## <unicode.html#toLower,Rune>`_ for a version that works for any Unicode ## character. - if c in {'A'..'Z'}: - result = chr(ord(c) + (ord('a') - ord('A'))) + ## + ## See also: + ## * `isLowerAscii func<#isLowerAscii,char>`_ + ## * `toLowerAscii func<#toLowerAscii,string>`_ for converting a string + runnableExamples: + doAssert toLowerAscii('A') == 'a' + doAssert toLowerAscii('e') == 'e' + if c in UppercaseLetters: + result = char(uint8(c) xor 0b0010_0000'u8) else: result = c -proc toLower*(s: string): string {.noSideEffect, procvar, - rtl, extern: "nsuToLowerStr".} = - ## Converts `s` into lower case. - ## - ## This works only for the letters ``A-Z``. See `unicode.toLower - ## <unicode.html#toLower>`_ for a version that works for any Unicode - ## character. +template toImpl(call) = result = newString(len(s)) for i in 0..len(s) - 1: - result[i] = toLower(s[i]) + result[i] = call(s[i]) -proc toUpper*(c: char): char {.noSideEffect, procvar, - rtl, extern: "nsuToUpperChar".} = - ## Converts `c` into upper case. +func toLowerAscii*(s: string): string {.rtl, extern: "nsuToLowerAsciiStr".} = + ## Converts string `s` into lower case. ## - ## This works only for the letters ``A-Z``. See `unicode.toUpper - ## <unicode.html#toUpper>`_ for a version that works for any Unicode + ## This works only for the letters `A-Z`. See `unicode.toLower + ## <unicode.html#toLower,string>`_ for a version that works for any Unicode ## character. - if c in {'a'..'z'}: - result = chr(ord(c) - (ord('a') - ord('A'))) + ## + ## See also: + ## * `normalize func<#normalize,string>`_ + runnableExamples: + doAssert toLowerAscii("FooBar!") == "foobar!" + toImpl toLowerAscii + +func toUpperAscii*(c: char): char {.rtl, extern: "nsuToUpperAsciiChar".} = + ## Converts character `c` into upper case. + ## + ## This works only for the letters `A-Z`. See `unicode.toUpper + ## <unicode.html#toUpper,Rune>`_ for a version that works for any Unicode + ## character. + ## + ## See also: + ## * `isUpperAscii func<#isUpperAscii,char>`_ + ## * `toUpperAscii func<#toUpperAscii,string>`_ for converting a string + ## * `capitalizeAscii func<#capitalizeAscii,string>`_ + runnableExamples: + doAssert toUpperAscii('a') == 'A' + doAssert toUpperAscii('E') == 'E' + if c in LowercaseLetters: + result = char(uint8(c) xor 0b0010_0000'u8) else: result = c -proc toUpper*(s: string): string {.noSideEffect, procvar, - rtl, extern: "nsuToUpperStr".} = - ## Converts `s` into upper case. +func toUpperAscii*(s: string): string {.rtl, extern: "nsuToUpperAsciiStr".} = + ## Converts string `s` into upper case. ## - ## This works only for the letters ``A-Z``. See `unicode.toUpper - ## <unicode.html#toUpper>`_ for a version that works for any Unicode + ## This works only for the letters `A-Z`. See `unicode.toUpper + ## <unicode.html#toUpper,string>`_ for a version that works for any Unicode ## character. - result = newString(len(s)) - for i in 0..len(s) - 1: - result[i] = toUpper(s[i]) - -proc capitalize*(s: string): string {.noSideEffect, procvar, - rtl, extern: "nsuCapitalize".} = - ## Converts the first character of `s` into upper case. ## - ## This works only for the letters ``A-Z``. - result = toUpper(s[0]) & substr(s, 1) + ## See also: + ## * `capitalizeAscii func<#capitalizeAscii,string>`_ + runnableExamples: + doAssert toUpperAscii("FooBar!") == "FOOBAR!" + toImpl toUpperAscii + +func capitalizeAscii*(s: string): string {.rtl, extern: "nsuCapitalizeAscii".} = + ## Converts the first character of string `s` into upper case. + ## + ## This works only for the letters `A-Z`. + ## Use `Unicode module<unicode.html>`_ for UTF-8 support. + ## + ## See also: + ## * `toUpperAscii func<#toUpperAscii,char>`_ + runnableExamples: + doAssert capitalizeAscii("foo") == "Foo" + doAssert capitalizeAscii("-bar") == "-bar" + if s.len == 0: result = "" + else: result = toUpperAscii(s[0]) & substr(s, 1) + +func nimIdentNormalize*(s: string): string = + ## Normalizes the string `s` as a Nim identifier. + ## + ## That means to convert to lower case and remove any '_' on all characters + ## except first one. + ## + ## .. Warning:: Backticks (`) are not handled: they remain *as is* and + ## spaces are preserved. See `nimIdentBackticksNormalize + ## <dochelpers.html#nimIdentBackticksNormalize,string>`_ for + ## an alternative approach. + runnableExamples: + doAssert nimIdentNormalize("Foo_bar") == "Foobar" + result = newString(s.len) + if s.len == 0: + return + result[0] = s[0] + var j = 1 + for i in 1..len(s) - 1: + if s[i] in UppercaseLetters: + result[j] = chr(ord(s[i]) + (ord('a') - ord('A'))) + inc j + elif s[i] != '_': + result[j] = s[i] + inc j + if j != s.len: setLen(result, j) -proc normalize*(s: string): string {.noSideEffect, procvar, - rtl, extern: "nsuNormalize".} = +func normalize*(s: string): string {.rtl, extern: "nsuNormalize".} = ## Normalizes the string `s`. ## - ## That means to convert it to lower case and remove any '_'. This is needed - ## for Nim identifiers for example. + ## That means to convert it to lower case and remove any '_'. This + ## should NOT be used to normalize Nim identifier names. + ## + ## See also: + ## * `toLowerAscii func<#toLowerAscii,string>`_ + runnableExamples: + doAssert normalize("Foo_bar") == "foobar" + doAssert normalize("Foo Bar") == "foo bar" result = newString(s.len) var j = 0 for i in 0..len(s) - 1: - if s[i] in {'A'..'Z'}: + if s[i] in UppercaseLetters: result[j] = chr(ord(s[i]) + (ord('a') - ord('A'))) inc j elif s[i] != '_': @@ -129,192 +331,343 @@ proc normalize*(s: string): string {.noSideEffect, procvar, inc j if j != s.len: setLen(result, j) -proc cmpIgnoreCase*(a, b: string): int {.noSideEffect, - rtl, extern: "nsuCmpIgnoreCase", procvar.} = +func cmpIgnoreCase*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreCase".} = ## Compares two strings in a case insensitive manner. Returns: ## - ## | 0 iff a == b - ## | < 0 iff a < b - ## | > 0 iff a > b - var i = 0 - var m = min(a.len, b.len) - while i < m: - result = ord(toLower(a[i])) - ord(toLower(b[i])) - if result != 0: return - inc(i) - result = a.len - b.len + ## | `0` if a == b + ## | `< 0` if a < b + ## | `> 0` if a > b + runnableExamples: + doAssert cmpIgnoreCase("FooBar", "foobar") == 0 + doAssert cmpIgnoreCase("bar", "Foo") < 0 + doAssert cmpIgnoreCase("Foo5", "foo4") > 0 + cmpIgnoreCaseImpl(a, b) + +{.push checks: off, line_trace: off.} # this is a hot-spot in the compiler! + # thus we compile without checks here + +func cmpIgnoreStyle*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreStyle".} = + ## Semantically the same as `cmp(normalize(a), normalize(b))`. It + ## is just optimized to not allocate temporary strings. This should + ## NOT be used to compare Nim identifier names. + ## Use `macros.eqIdent<macros.html#eqIdent,string,string>`_ for that. + ## + ## Returns: + ## + ## | `0` if a == b + ## | `< 0` if a < b + ## | `> 0` if a > b + runnableExamples: + doAssert cmpIgnoreStyle("foo_bar", "FooBar") == 0 + doAssert cmpIgnoreStyle("foo_bar_5", "FooBar4") > 0 + cmpIgnoreStyleImpl(a, b) +{.pop.} -{.push checks: off, line_trace: off .} # this is a hot-spot in the compiler! - # thus we compile without checks here +# --------- Private templates for different split separators ----------- -proc cmpIgnoreStyle*(a, b: string): int {.noSideEffect, - rtl, extern: "nsuCmpIgnoreStyle", procvar.} = - ## Compares two strings normalized (i.e. case and - ## underscores do not matter). Returns: - ## - ## | 0 iff a == b - ## | < 0 iff a < b - ## | > 0 iff a > b - var i = 0 - var j = 0 - while true: - while a[i] == '_': inc(i) - while b[j] == '_': inc(j) # BUGFIX: typo - var aa = toLower(a[i]) - var bb = toLower(b[j]) - result = ord(aa) - ord(bb) - if result != 0 or aa == '\0': break - inc(i) - inc(j) +func substrEq(s: string, pos: int, substr: string): bool = + # Always returns false for empty `substr` + var length = substr.len + if length > 0: + var i = 0 + while i < length and pos+i < s.len and s[pos+i] == substr[i]: + inc i + i == length + else: false -{.pop.} +template stringHasSep(s: string, index: int, seps: set[char]): bool = + s[index] in seps -proc strip*(s: string, leading = true, trailing = true): string {.noSideEffect, - rtl, extern: "nsuStrip".} = - ## Strips whitespace from `s` and returns the resulting string. - ## - ## If `leading` is true, leading whitespace is stripped. - ## If `trailing` is true, trailing whitespace is stripped. - const - chars: set[char] = Whitespace - var - first = 0 - last = len(s)-1 - if leading: - while s[first] in chars: inc(first) - if trailing: - while last >= 0 and s[last] in chars: dec(last) - result = substr(s, first, last) +template stringHasSep(s: string, index: int, sep: char): bool = + s[index] == sep -proc toOctal*(c: char): string {.noSideEffect, rtl, extern: "nsuToOctal".} = - ## Converts a character `c` to its octal representation. +template stringHasSep(s: string, index: int, sep: string): bool = + s.substrEq(index, sep) + +template splitCommon(s, sep, maxsplit, sepLen) = + ## Common code for split procs + var last = 0 + var splits = maxsplit + + while last <= len(s): + var first = last + while last < len(s) and not stringHasSep(s, last, sep): + inc(last) + if splits == 0: last = len(s) + yield substr(s, first, last-1) + if splits == 0: break + dec(splits) + inc(last, sepLen) + +template oldSplit(s, seps, maxsplit) = + var last = 0 + var splits = maxsplit + assert(not ('\0' in seps)) + while last < len(s): + while last < len(s) and s[last] in seps: inc(last) + var first = last + while last < len(s) and s[last] notin seps: inc(last) + if first <= last-1: + if splits == 0: last = len(s) + yield substr(s, first, last-1) + if splits == 0: break + dec(splits) + +template accResult(iter: untyped) = + result = @[] + for x in iter: add(result, x) + + +iterator split*(s: string, sep: char, maxsplit: int = -1): string = + ## Splits the string `s` into substrings using a single separator. ## - ## The resulting string may not have a leading zero. Its length is always - ## exactly 3. - result = newString(3) - var val = ord(c) - for i in countdown(2, 0): - result[i] = chr(val mod 8 + ord('0')) - val = val div 8 + ## Substrings are separated by the character `sep`. + ## The code: + ## ```nim + ## for word in split(";;this;is;an;;example;;;", ';'): + ## writeLine(stdout, word) + ## ``` + ## Results in: + ## ``` + ## "" + ## "" + ## "this" + ## "is" + ## "an" + ## "" + ## "example" + ## "" + ## "" + ## "" + ## ``` + ## + ## See also: + ## * `rsplit iterator<#rsplit.i,string,char,int>`_ + ## * `splitLines iterator<#splitLines.i,string>`_ + ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_ + ## * `split func<#split,string,char,int>`_ + splitCommon(s, sep, maxsplit, 1) -iterator split*(s: string, seps: set[char] = Whitespace): string = +iterator split*(s: string, seps: set[char] = Whitespace, + maxsplit: int = -1): string = ## Splits the string `s` into substrings using a group of separators. ## - ## Substrings are separated by a substring containing only `seps`. Note - ## that whole sequences of characters found in ``seps`` will be counted as - ## a single split point and leading/trailing separators will be ignored. - ## The following example: + ## Substrings are separated by a substring containing only `seps`. ## - ## .. code-block:: nim - ## for word in split(" this is an example "): - ## writeln(stdout, word) + ## ```nim + ## for word in split("this\lis an\texample"): + ## writeLine(stdout, word) + ## ``` ## ## ...generates this output: ## - ## .. code-block:: + ## ``` ## "this" ## "is" ## "an" ## "example" + ## ``` ## ## And the following code: ## - ## .. code-block:: nim - ## for word in split(";;this;is;an;;example;;;", {';'}): - ## writeln(stdout, word) + ## ```nim + ## for word in split("this:is;an$example", {';', ':', '$'}): + ## writeLine(stdout, word) + ## ``` ## ## ...produces the same output as the first example. The code: ## - ## .. code-block:: nim + ## ```nim ## let date = "2012-11-20T22:08:08.398990" ## let separators = {' ', '-', ':', 'T'} ## for number in split(date, separators): - ## writeln(stdout, number) + ## writeLine(stdout, number) + ## ``` ## ## ...results in: ## - ## .. code-block:: + ## ``` ## "2012" ## "11" ## "20" ## "22" ## "08" ## "08.398990" + ## ``` ## - var last = 0 - assert(not ('\0' in seps)) - while last < len(s): - while s[last] in seps: inc(last) - var first = last - while last < len(s) and s[last] notin seps: inc(last) # BUGFIX! - if first <= last-1: - yield substr(s, first, last-1) + ## .. note:: Empty separator set results in returning an original string, + ## following the interpretation "split by no element". + ## + ## See also: + ## * `rsplit iterator<#rsplit.i,string,set[char],int>`_ + ## * `splitLines iterator<#splitLines.i,string>`_ + ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_ + ## * `split func<#split,string,set[char],int>`_ + splitCommon(s, seps, maxsplit, 1) -iterator split*(s: string, sep: char): string = - ## Splits the string `s` into substrings using a single separator. +iterator split*(s: string, sep: string, maxsplit: int = -1): string = + ## Splits the string `s` into substrings using a string separator. ## - ## Substrings are separated by the character `sep`. - ## Unlike the version of the iterator which accepts a set of separator - ## characters, this proc will not coalesce groups of the - ## separator, returning a string for each found character. The code: + ## Substrings are separated by the string `sep`. + ## The code: ## - ## .. code-block:: nim - ## for word in split(";;this;is;an;;example;;;", ';'): - ## writeln(stdout, word) + ## ```nim + ## for word in split("thisDATAisDATAcorrupted", "DATA"): + ## writeLine(stdout, word) + ## ``` ## ## Results in: ## - ## .. code-block:: - ## "" - ## "" + ## ``` ## "this" ## "is" - ## "an" - ## "" - ## "example" - ## "" - ## "" - ## "" + ## "corrupted" + ## ``` ## - var last = 0 - assert('\0' != sep) - if len(s) > 0: - # `<=` is correct here for the edge cases! - while last <= len(s): - var first = last - while last < len(s) and s[last] != sep: inc(last) - yield substr(s, first, last-1) - inc(last) + ## .. note:: Empty separator string results in returning an original string, + ## following the interpretation "split by no element". + ## + ## See also: + ## * `rsplit iterator<#rsplit.i,string,string,int,bool>`_ + ## * `splitLines iterator<#splitLines.i,string>`_ + ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_ + ## * `split func<#split,string,string,int>`_ + let sepLen = if sep.len == 0: 1 # prevents infinite loop + else: sep.len + splitCommon(s, sep, maxsplit, sepLen) -iterator split*(s: string, sep: string): string = - ## Splits the string `s` into substrings using a string separator. + +template rsplitCommon(s, sep, maxsplit, sepLen) = + ## Common code for rsplit functions + var + last = s.len - 1 + first = last + splits = maxsplit + startPos = 0 + # go to -1 in order to get separators at the beginning + while first >= -1: + while first >= 0 and not stringHasSep(s, first, sep): + dec(first) + if splits == 0: + # No more splits means set first to the beginning + first = -1 + if first == -1: + startPos = 0 + else: + startPos = first + sepLen + yield substr(s, startPos, last) + if splits == 0: break + dec(splits) + dec(first) + last = first + +iterator rsplit*(s: string, sep: char, + maxsplit: int = -1): string = + ## Splits the string `s` into substrings from the right using a + ## string separator. Works exactly the same as `split iterator + ## <#split.i,string,char,int>`_ except in **reverse** order. + ## + ## ```nim + ## for piece in "foo:bar".rsplit(':'): + ## echo piece + ## ``` ## - ## Substrings are separated by the string `sep`. - var last = 0 - if len(s) > 0: - while last <= len(s): - var first = last - while last < len(s) and s.substr(last, last + <sep.len) != sep: - inc(last) - yield substr(s, first, last-1) - inc(last, sep.len) + ## Results in: + ## + ## ``` + ## "bar" + ## "foo" + ## ``` + ## + ## Substrings are separated from the right by the char `sep`. + ## + ## See also: + ## * `split iterator<#split.i,string,char,int>`_ + ## * `splitLines iterator<#splitLines.i,string>`_ + ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_ + ## * `rsplit func<#rsplit,string,char,int>`_ + rsplitCommon(s, sep, maxsplit, 1) -iterator splitLines*(s: string): string = +iterator rsplit*(s: string, seps: set[char] = Whitespace, + maxsplit: int = -1): string = + ## Splits the string `s` into substrings from the right using a + ## string separator. Works exactly the same as `split iterator + ## <#split.i,string,char,int>`_ except in **reverse** order. + ## + ## ```nim + ## for piece in "foo bar".rsplit(WhiteSpace): + ## echo piece + ## ``` + ## + ## Results in: + ## + ## ``` + ## "bar" + ## "foo" + ## ``` + ## + ## Substrings are separated from the right by the set of chars `seps` + ## + ## .. note:: Empty separator set results in returning an original string, + ## following the interpretation "split by no element". + ## + ## See also: + ## * `split iterator<#split.i,string,set[char],int>`_ + ## * `splitLines iterator<#splitLines.i,string>`_ + ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_ + ## * `rsplit func<#rsplit,string,set[char],int>`_ + rsplitCommon(s, seps, maxsplit, 1) + +iterator rsplit*(s: string, sep: string, maxsplit: int = -1, + keepSeparators: bool = false): string = + ## Splits the string `s` into substrings from the right using a + ## string separator. Works exactly the same as `split iterator + ## <#split.i,string,string,int>`_ except in **reverse** order. + ## + ## ```nim + ## for piece in "foothebar".rsplit("the"): + ## echo piece + ## ``` + ## + ## Results in: + ## + ## ``` + ## "bar" + ## "foo" + ## ``` + ## + ## Substrings are separated from the right by the string `sep` + ## + ## .. note:: Empty separator string results in returning an original string, + ## following the interpretation "split by no element". + ## + ## See also: + ## * `split iterator<#split.i,string,string,int>`_ + ## * `splitLines iterator<#splitLines.i,string>`_ + ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_ + ## * `rsplit func<#rsplit,string,string,int>`_ + let sepLen = if sep.len == 0: 1 # prevents infinite loop + else: sep.len + rsplitCommon(s, sep, maxsplit, sepLen) + +iterator splitLines*(s: string, keepEol = false): string = ## Splits the string `s` into its containing lines. ## - ## Every `character literal <manual.html#character-literals>`_ newline - ## combination (CR, LF, CR-LF) is supported. The result strings contain no - ## trailing ``\n``. + ## Every `character literal <manual.html#lexical-analysis-character-literals>`_ + ## newline combination (CR, LF, CR-LF) is supported. The result strings + ## contain no trailing end of line characters unless the parameter `keepEol` + ## is set to `true`. ## ## Example: ## - ## .. code-block:: nim + ## ```nim ## for line in splitLines("\nthis\nis\nan\n\nexample\n"): - ## writeln(stdout, line) + ## writeLine(stdout, line) + ## ``` ## ## Results in: ## - ## .. code-block:: nim + ## ```nim ## "" ## "this" ## "is" @@ -322,239 +675,728 @@ iterator splitLines*(s: string): string = ## "" ## "example" ## "" + ## ``` + ## + ## See also: + ## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_ + ## * `splitLines func<#splitLines,string>`_ var first = 0 var last = 0 + var eolpos = 0 while true: - while s[last] notin {'\0', '\c', '\l'}: inc(last) - yield substr(s, first, last-1) - # skip newlines: - if s[last] == '\l': inc(last) - elif s[last] == '\c': - inc(last) + while last < s.len and s[last] notin {'\c', '\l'}: inc(last) + + eolpos = last + if last < s.len: if s[last] == '\l': inc(last) - else: break # was '\0' - first = last + elif s[last] == '\c': + inc(last) + if last < s.len and s[last] == '\l': inc(last) -proc splitLines*(s: string): seq[string] {.noSideEffect, - rtl, extern: "nsuSplitLines".} = - ## The same as the `splitLines <#splitLines.i,string>`_ iterator, but is a - ## proc that returns a sequence of substrings. - accumulateResult(splitLines(s)) - -proc countLines*(s: string): int {.noSideEffect, - rtl, extern: "nsuCountLines".} = - ## Returns the number of new line separators in the string `s`. - ## - ## This is the same as ``len(splitLines(s))``, but much more efficient - ## because it doesn't modify the string creating temporal objects. Every - ## `character literal <manual.html#character-literals>`_ newline combination - ## (CR, LF, CR-LF) is supported. - ## - ## Despite its name this proc might not actually return the *number of lines* - ## in `s` because the concept of what a line is can vary. For example, a - ## string like ``Hello world`` is a line of text, but the proc will return a - ## value of zero because there are no newline separators. Also, text editors - ## usually don't count trailing newline characters in a text file as a new - ## empty line, but this proc will. - var i = 0 - while i < s.len: - case s[i] - of '\c': - if s[i+1] == '\l': inc i - inc result - of '\l': inc result - else: discard - inc i + yield substr(s, first, if keepEol: last-1 else: eolpos-1) -proc split*(s: string, seps: set[char] = Whitespace): seq[string] {. - noSideEffect, rtl, extern: "nsuSplitCharSet".} = - ## The same as the `split iterator <#split.i,string,set[char]>`_, but is a - ## proc that returns a sequence of substrings. - accumulateResult(split(s, seps)) + # no eol characters consumed means that the string is over + if eolpos == last: + break -proc split*(s: string, sep: char): seq[string] {.noSideEffect, - rtl, extern: "nsuSplitChar".} = - ## The same as the `split iterator <#split.i,string,char>`_, but is a proc - ## that returns a sequence of substrings. - accumulateResult(split(s, sep)) + first = last -proc split*(s: string, sep: string): seq[string] {.noSideEffect, - rtl, extern: "nsuSplitString".} = +iterator splitWhitespace*(s: string, maxsplit: int = -1): string = + ## Splits the string `s` at whitespace stripping leading and trailing + ## whitespace if necessary. If `maxsplit` is specified and is positive, + ## no more than `maxsplit` splits is made. + ## + ## The following code: + ## + ## ```nim + ## let s = " foo \t bar baz " + ## for ms in [-1, 1, 2, 3]: + ## echo "------ maxsplit = ", ms, ":" + ## for item in s.splitWhitespace(maxsplit=ms): + ## echo '"', item, '"' + ## ``` + ## + ## ...results in: + ## + ## ``` + ## ------ maxsplit = -1: + ## "foo" + ## "bar" + ## "baz" + ## ------ maxsplit = 1: + ## "foo" + ## "bar baz " + ## ------ maxsplit = 2: + ## "foo" + ## "bar" + ## "baz " + ## ------ maxsplit = 3: + ## "foo" + ## "bar" + ## "baz" + ## ``` + ## + ## See also: + ## * `splitLines iterator<#splitLines.i,string>`_ + ## * `splitWhitespace func<#splitWhitespace,string,int>`_ + oldSplit(s, Whitespace, maxsplit) + + + +func split*(s: string, sep: char, maxsplit: int = -1): seq[string] {.rtl, + extern: "nsuSplitChar".} = + ## The same as the `split iterator <#split.i,string,char,int>`_ (see its + ## documentation), but is a func that returns a sequence of substrings. + ## + ## See also: + ## * `split iterator <#split.i,string,char,int>`_ + ## * `rsplit func<#rsplit,string,char,int>`_ + ## * `splitLines func<#splitLines,string>`_ + ## * `splitWhitespace func<#splitWhitespace,string,int>`_ + runnableExamples: + doAssert "a,b,c".split(',') == @["a", "b", "c"] + doAssert "".split(' ') == @[""] + accResult(split(s, sep, maxsplit)) + +func split*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): seq[ + string] {.rtl, extern: "nsuSplitCharSet".} = + ## The same as the `split iterator <#split.i,string,set[char],int>`_ (see its + ## documentation), but is a func that returns a sequence of substrings. + ## + ## .. note:: Empty separator set results in returning an original string, + ## following the interpretation "split by no element". + ## + ## See also: + ## * `split iterator <#split.i,string,set[char],int>`_ + ## * `rsplit func<#rsplit,string,set[char],int>`_ + ## * `splitLines func<#splitLines,string>`_ + ## * `splitWhitespace func<#splitWhitespace,string,int>`_ + runnableExamples: + doAssert "a,b;c".split({',', ';'}) == @["a", "b", "c"] + doAssert "".split({' '}) == @[""] + doAssert "empty seps return unsplit s".split({}) == @["empty seps return unsplit s"] + accResult(split(s, seps, maxsplit)) + +func split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl, + extern: "nsuSplitString".} = ## Splits the string `s` into substrings using a string separator. ## ## Substrings are separated by the string `sep`. This is a wrapper around the - ## `split iterator <#split.i,string,string>`_. - accumulateResult(split(s, sep)) + ## `split iterator <#split.i,string,string,int>`_. + ## + ## .. note:: Empty separator string results in returning an original string, + ## following the interpretation "split by no element". + ## + ## See also: + ## * `split iterator <#split.i,string,string,int>`_ + ## * `rsplit func<#rsplit,string,string,int>`_ + ## * `splitLines func<#splitLines,string>`_ + ## * `splitWhitespace func<#splitWhitespace,string,int>`_ + runnableExamples: + doAssert "a,b,c".split(",") == @["a", "b", "c"] + doAssert "a man a plan a canal panama".split("a ") == @["", "man ", "plan ", "canal panama"] + doAssert "".split("Elon Musk") == @[""] + doAssert "a largely spaced sentence".split(" ") == @["a", "", "largely", + "", "", "", "spaced", "sentence"] + doAssert "a largely spaced sentence".split(" ", maxsplit = 1) == @["a", " largely spaced sentence"] + doAssert "empty sep returns unsplit s".split("") == @["empty sep returns unsplit s"] + accResult(split(s, sep, maxsplit)) + +func rsplit*(s: string, sep: char, maxsplit: int = -1): seq[string] {.rtl, + extern: "nsuRSplitChar".} = + ## The same as the `rsplit iterator <#rsplit.i,string,char,int>`_, but is a func + ## that returns a sequence of substrings in original order. + ## + ## A possible common use case for `rsplit` is path manipulation, + ## particularly on systems that don't use a common delimiter. + ## + ## For example, if a system had `#` as a delimiter, you could + ## do the following to get the tail of the path: + ## + ## ```nim + ## var tailSplit = rsplit("Root#Object#Method#Index", '#', maxsplit=1) + ## ``` + ## + ## Results in `tailSplit` containing: + ## + ## ```nim + ## @["Root#Object#Method", "Index"] + ## ``` + ## + ## See also: + ## * `rsplit iterator <#rsplit.i,string,char,int>`_ + ## * `split func<#split,string,char,int>`_ + ## * `splitLines func<#splitLines,string>`_ + ## * `splitWhitespace func<#splitWhitespace,string,int>`_ + accResult(rsplit(s, sep, maxsplit)) + result.reverse() + +func rsplit*(s: string, seps: set[char] = Whitespace, + maxsplit: int = -1): seq[string] + {.rtl, extern: "nsuRSplitCharSet".} = + ## The same as the `rsplit iterator <#rsplit.i,string,set[char],int>`_, but is a + ## func that returns a sequence of substrings in original order. + ## + ## A possible common use case for `rsplit` is path manipulation, + ## particularly on systems that don't use a common delimiter. + ## + ## For example, if a system had `#` as a delimiter, you could + ## do the following to get the tail of the path: + ## + ## ```nim + ## var tailSplit = rsplit("Root#Object#Method#Index", {'#'}, maxsplit=1) + ## ``` + ## + ## Results in `tailSplit` containing: + ## + ## ```nim + ## @["Root#Object#Method", "Index"] + ## ``` + ## + ## .. note:: Empty separator set results in returning an original string, + ## following the interpretation "split by no element". + ## + ## See also: + ## * `rsplit iterator <#rsplit.i,string,set[char],int>`_ + ## * `split func<#split,string,set[char],int>`_ + ## * `splitLines func<#splitLines,string>`_ + ## * `splitWhitespace func<#splitWhitespace,string,int>`_ + accResult(rsplit(s, seps, maxsplit)) + result.reverse() + +func rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl, + extern: "nsuRSplitString".} = + ## The same as the `rsplit iterator <#rsplit.i,string,string,int,bool>`_, but is a func + ## that returns a sequence of substrings in original order. + ## + ## A possible common use case for `rsplit` is path manipulation, + ## particularly on systems that don't use a common delimiter. + ## + ## For example, if a system had `#` as a delimiter, you could + ## do the following to get the tail of the path: + ## + ## ```nim + ## var tailSplit = rsplit("Root#Object#Method#Index", "#", maxsplit=1) + ## ``` + ## + ## Results in `tailSplit` containing: + ## + ## ```nim + ## @["Root#Object#Method", "Index"] + ## ``` + ## + ## .. note:: Empty separator string results in returning an original string, + ## following the interpretation "split by no element". + ## + ## See also: + ## * `rsplit iterator <#rsplit.i,string,string,int,bool>`_ + ## * `split func<#split,string,string,int>`_ + ## * `splitLines func<#splitLines,string>`_ + ## * `splitWhitespace func<#splitWhitespace,string,int>`_ + runnableExamples: + doAssert "a largely spaced sentence".rsplit(" ", maxsplit = 1) == @[ + "a largely spaced", "sentence"] + doAssert "a,b,c".rsplit(",") == @["a", "b", "c"] + doAssert "a man a plan a canal panama".rsplit("a ") == @["", "man ", + "plan ", "canal panama"] + doAssert "".rsplit("Elon Musk") == @[""] + doAssert "a largely spaced sentence".rsplit(" ") == @["a", "", + "largely", "", "", "", "spaced", "sentence"] + doAssert "empty sep returns unsplit s".rsplit("") == @["empty sep returns unsplit s"] + accResult(rsplit(s, sep, maxsplit)) + result.reverse() + +func splitLines*(s: string, keepEol = false): seq[string] {.rtl, + extern: "nsuSplitLines".} = + ## The same as the `splitLines iterator<#splitLines.i,string>`_ (see its + ## documentation), but is a func that returns a sequence of substrings. + ## + ## See also: + ## * `splitLines iterator<#splitLines.i,string>`_ + ## * `splitWhitespace func<#splitWhitespace,string,int>`_ + ## * `countLines func<#countLines,string>`_ + accResult(splitLines(s, keepEol = keepEol)) + +func splitWhitespace*(s: string, maxsplit: int = -1): seq[string] {.rtl, + extern: "nsuSplitWhitespace".} = + ## The same as the `splitWhitespace iterator <#splitWhitespace.i,string,int>`_ + ## (see its documentation), but is a func that returns a sequence of substrings. + ## + ## See also: + ## * `splitWhitespace iterator <#splitWhitespace.i,string,int>`_ + ## * `splitLines func<#splitLines,string>`_ + accResult(splitWhitespace(s, maxsplit)) + +func toBin*(x: BiggestInt, len: Positive): string {.rtl, extern: "nsuToBin".} = + ## Converts `x` into its binary representation. + ## + ## The resulting string is always `len` characters long. No leading `0b` + ## prefix is generated. + runnableExamples: + let + a = 29 + b = 257 + doAssert a.toBin(8) == "00011101" + doAssert b.toBin(8) == "00000001" + doAssert b.toBin(9) == "100000001" + var + mask = BiggestUInt 1 + shift = BiggestUInt 0 + assert(len > 0) + result = newString(len) + for j in countdown(len-1, 0): + result[j] = chr(int((BiggestUInt(x) and mask) shr shift) + ord('0')) + inc shift + mask = mask shl BiggestUInt(1) -proc toHex*(x: BiggestInt, len: int): string {.noSideEffect, - rtl, extern: "nsuToHex".} = - ## Converts `x` to its hexadecimal representation. +func toOct*(x: BiggestInt, len: Positive): string {.rtl, extern: "nsuToOct".} = + ## Converts `x` into its octal representation. ## - ## The resulting string will be exactly `len` characters long. No prefix like - ## ``0x`` is generated. `x` is treated as an unsigned value. + ## The resulting string is always `len` characters long. No leading `0o` + ## prefix is generated. + ## + ## Do not confuse it with `toOctal func<#toOctal,char>`_. + runnableExamples: + let + a = 62 + b = 513 + doAssert a.toOct(3) == "076" + doAssert b.toOct(3) == "001" + doAssert b.toOct(5) == "01001" + var + mask = BiggestUInt 7 + shift = BiggestUInt 0 + assert(len > 0) + result = newString(len) + for j in countdown(len-1, 0): + result[j] = chr(int((BiggestUInt(x) and mask) shr shift) + ord('0')) + inc shift, 3 + mask = mask shl BiggestUInt(3) + +func toHexImpl(x: BiggestUInt, len: Positive, handleNegative: bool): string = const HexChars = "0123456789ABCDEF" - var - n = x + var n = x result = newString(len) for j in countdown(len-1, 0): - result[j] = HexChars[n and 0xF] + result[j] = HexChars[int(n and 0xF)] n = n shr 4 # handle negative overflow - if n == 0 and x < 0: n = -1 + if n == 0 and handleNegative: n = not(BiggestUInt 0) + +func toHex*[T: SomeInteger](x: T, len: Positive): string = + ## Converts `x` to its hexadecimal representation. + ## + ## The resulting string will be exactly `len` characters long. No prefix like + ## `0x` is generated. `x` is treated as an unsigned value. + runnableExamples: + let + a = 62'u64 + b = 4097'u64 + doAssert a.toHex(3) == "03E" + doAssert b.toHex(3) == "001" + doAssert b.toHex(4) == "1001" + doAssert toHex(62, 3) == "03E" + doAssert toHex(-8, 6) == "FFFFF8" + whenJsNoBigInt64: + toHexImpl(cast[BiggestUInt](x), len, x < 0) + do: + when T is SomeSignedInt: + toHexImpl(cast[BiggestUInt](BiggestInt(x)), len, x < 0) + else: + toHexImpl(BiggestUInt(x), len, x < 0) + +func toHex*[T: SomeInteger](x: T): string = + ## Shortcut for `toHex(x, T.sizeof * 2)` + runnableExamples: + doAssert toHex(1984'i64) == "00000000000007C0" + doAssert toHex(1984'i16) == "07C0" + whenJsNoBigInt64: + toHexImpl(cast[BiggestUInt](x), 2*sizeof(T), x < 0) + do: + when T is SomeSignedInt: + toHexImpl(cast[BiggestUInt](BiggestInt(x)), 2*sizeof(T), x < 0) + else: + toHexImpl(BiggestUInt(x), 2*sizeof(T), x < 0) + +func toHex*(s: string): string {.rtl.} = + ## Converts a bytes string to its hexadecimal representation. + ## + ## The output is twice the input long. No prefix like + ## `0x` is generated. + ## + ## See also: + ## * `parseHexStr func<#parseHexStr,string>`_ for the reverse operation + runnableExamples: + let + a = "1" + b = "A" + c = "\0\255" + doAssert a.toHex() == "31" + doAssert b.toHex() == "41" + doAssert c.toHex() == "00FF" + + const HexChars = "0123456789ABCDEF" + result = newString(s.len * 2) + for pos, c in s: + var n = ord(c) + result[pos * 2 + 1] = HexChars[n and 0xF] + n = n shr 4 + result[pos * 2] = HexChars[n] -proc intToStr*(x: int, minchars: int = 1): string {.noSideEffect, - rtl, extern: "nsuIntToStr".} = +func toOctal*(c: char): string {.rtl, extern: "nsuToOctal".} = + ## Converts a character `c` to its octal representation. + ## + ## The resulting string may not have a leading zero. Its length is always + ## exactly 3. + ## + ## Do not confuse it with `toOct func<#toOct,BiggestInt,Positive>`_. + runnableExamples: + doAssert toOctal('1') == "061" + doAssert toOctal('A') == "101" + doAssert toOctal('a') == "141" + doAssert toOctal('!') == "041" + + result = newString(3) + var val = ord(c) + for i in countdown(2, 0): + result[i] = chr(val mod 8 + ord('0')) + val = val div 8 + +func fromBin*[T: SomeInteger](s: string): T = + ## Parses a binary integer value from a string `s`. + ## + ## If `s` is not a valid binary integer, `ValueError` is raised. `s` can have + ## one of the following optional prefixes: `0b`, `0B`. Underscores within + ## `s` are ignored. + ## + ## Does not check for overflow. If the value represented by `s` + ## is too big to fit into a return type, only the value of the rightmost + ## binary digits of `s` is returned without producing an error. + runnableExamples: + let s = "0b_0100_1000_1000_1000_1110_1110_1001_1001" + doAssert fromBin[int](s) == 1216933529 + doAssert fromBin[int8](s) == 0b1001_1001'i8 + doAssert fromBin[int8](s) == -103'i8 + doAssert fromBin[uint8](s) == 153 + doAssert s.fromBin[:int16] == 0b1110_1110_1001_1001'i16 + doAssert s.fromBin[:uint64] == 1216933529'u64 + + let p = parseutils.parseBin(s, result) + if p != s.len or p == 0: + raise newException(ValueError, "invalid binary integer: " & s) + +func fromOct*[T: SomeInteger](s: string): T = + ## Parses an octal integer value from a string `s`. + ## + ## If `s` is not a valid octal integer, `ValueError` is raised. `s` can have + ## one of the following optional prefixes: `0o`, `0O`. Underscores within + ## `s` are ignored. + ## + ## Does not check for overflow. If the value represented by `s` + ## is too big to fit into a return type, only the value of the rightmost + ## octal digits of `s` is returned without producing an error. + runnableExamples: + let s = "0o_123_456_777" + doAssert fromOct[int](s) == 21913087 + doAssert fromOct[int8](s) == 0o377'i8 + doAssert fromOct[int8](s) == -1'i8 + doAssert fromOct[uint8](s) == 255'u8 + doAssert s.fromOct[:int16] == 24063'i16 + doAssert s.fromOct[:uint64] == 21913087'u64 + + let p = parseutils.parseOct(s, result) + if p != s.len or p == 0: + raise newException(ValueError, "invalid oct integer: " & s) + +func fromHex*[T: SomeInteger](s: string): T = + ## Parses a hex integer value from a string `s`. + ## + ## If `s` is not a valid hex integer, `ValueError` is raised. `s` can have + ## one of the following optional prefixes: `0x`, `0X`, `#`. Underscores within + ## `s` are ignored. + ## + ## Does not check for overflow. If the value represented by `s` + ## is too big to fit into a return type, only the value of the rightmost + ## hex digits of `s` is returned without producing an error. + runnableExamples: + let s = "0x_1235_8df6" + doAssert fromHex[int](s) == 305499638 + doAssert fromHex[int8](s) == 0xf6'i8 + doAssert fromHex[int8](s) == -10'i8 + doAssert fromHex[uint8](s) == 246'u8 + doAssert s.fromHex[:int16] == -29194'i16 + doAssert s.fromHex[:uint64] == 305499638'u64 + + let p = parseutils.parseHex(s, result) + if p != s.len or p == 0: + raise newException(ValueError, "invalid hex integer: " & s) + +func intToStr*(x: int, minchars: Positive = 1): string {.rtl, + extern: "nsuIntToStr".} = ## Converts `x` to its decimal representation. ## ## The resulting string will be minimally `minchars` characters long. This is ## achieved by adding leading zeros. + runnableExamples: + doAssert intToStr(1984) == "1984" + doAssert intToStr(1984, 6) == "001984" result = $abs(x) for i in 1 .. minchars - len(result): result = '0' & result if x < 0: result = '-' & result -proc parseInt*(s: string): int {.noSideEffect, procvar, - rtl, extern: "nsuParseInt".} = +func parseInt*(s: string): int {.rtl, extern: "nsuParseInt".} = ## Parses a decimal integer value contained in `s`. ## ## If `s` is not a valid integer, `ValueError` is raised. - var L = parseutils.parseInt(s, result, 0) + runnableExamples: + doAssert parseInt("-0042") == -42 + result = 0 + let L = parseutils.parseInt(s, result, 0) if L != s.len or L == 0: raise newException(ValueError, "invalid integer: " & s) -proc parseBiggestInt*(s: string): BiggestInt {.noSideEffect, procvar, - rtl, extern: "nsuParseBiggestInt".} = +func parseBiggestInt*(s: string): BiggestInt {.rtl, + extern: "nsuParseBiggestInt".} = ## Parses a decimal integer value contained in `s`. ## ## If `s` is not a valid integer, `ValueError` is raised. - var L = parseutils.parseBiggestInt(s, result, 0) + result = BiggestInt(0) + let L = parseutils.parseBiggestInt(s, result, 0) if L != s.len or L == 0: raise newException(ValueError, "invalid integer: " & s) -proc parseFloat*(s: string): float {.noSideEffect, procvar, - rtl, extern: "nsuParseFloat".} = - ## Parses a decimal floating point value contained in `s`. If `s` is not - ## a valid floating point number, `ValueError` is raised. ``NAN``, - ## ``INF``, ``-INF`` are also supported (case insensitive comparison). - var L = parseutils.parseFloat(s, result, 0) +func parseUInt*(s: string): uint {.rtl, extern: "nsuParseUInt".} = + ## Parses a decimal unsigned integer value contained in `s`. + ## + ## If `s` is not a valid integer, `ValueError` is raised. + result = uint(0) + let L = parseutils.parseUInt(s, result, 0) + if L != s.len or L == 0: + raise newException(ValueError, "invalid unsigned integer: " & s) + +func parseBiggestUInt*(s: string): BiggestUInt {.rtl, + extern: "nsuParseBiggestUInt".} = + ## Parses a decimal unsigned integer value contained in `s`. + ## + ## If `s` is not a valid integer, `ValueError` is raised. + result = BiggestUInt(0) + let L = parseutils.parseBiggestUInt(s, result, 0) + if L != s.len or L == 0: + raise newException(ValueError, "invalid unsigned integer: " & s) + +func parseFloat*(s: string): float {.rtl, extern: "nsuParseFloat".} = + ## Parses a decimal floating point value contained in `s`. + ## + ## If `s` is not a valid floating point number, `ValueError` is raised. + ##`NAN`, `INF`, `-INF` are also supported (case insensitive comparison). + runnableExamples: + doAssert parseFloat("3.14") == 3.14 + doAssert parseFloat("inf") == 1.0/0 + result = 0.0 + let L = parseutils.parseFloat(s, result, 0) if L != s.len or L == 0: raise newException(ValueError, "invalid float: " & s) -proc parseHexInt*(s: string): int {.noSideEffect, procvar, - rtl, extern: "nsuParseHexInt".} = +func parseBinInt*(s: string): int {.rtl, extern: "nsuParseBinInt".} = + ## Parses a binary integer value contained in `s`. + ## + ## If `s` is not a valid binary integer, `ValueError` is raised. `s` can have + ## one of the following optional prefixes: `0b`, `0B`. Underscores within + ## `s` are ignored. + runnableExamples: + let + a = "0b11_0101" + b = "111" + doAssert a.parseBinInt() == 53 + doAssert b.parseBinInt() == 7 + + result = 0 + let L = parseutils.parseBin(s, result, 0) + if L != s.len or L == 0: + raise newException(ValueError, "invalid binary integer: " & s) + +func parseOctInt*(s: string): int {.rtl, extern: "nsuParseOctInt".} = + ## Parses an octal integer value contained in `s`. + ## + ## If `s` is not a valid oct integer, `ValueError` is raised. `s` can have one + ## of the following optional prefixes: `0o`, `0O`. Underscores within + ## `s` are ignored. + result = 0 + let L = parseutils.parseOct(s, result, 0) + if L != s.len or L == 0: + raise newException(ValueError, "invalid oct integer: " & s) + +func parseHexInt*(s: string): int {.rtl, extern: "nsuParseHexInt".} = ## Parses a hexadecimal integer value contained in `s`. ## - ## If `s` is not a valid integer, `ValueError` is raised. `s` can have one - ## of the following optional prefixes: ``0x``, ``0X``, ``#``. Underscores + ## If `s` is not a valid hex integer, `ValueError` is raised. `s` can have one + ## of the following optional prefixes: `0x`, `0X`, `#`. Underscores ## within `s` are ignored. - var i = 0 - if s[i] == '0' and (s[i+1] == 'x' or s[i+1] == 'X'): inc(i, 2) - elif s[i] == '#': inc(i) - while true: - case s[i] - of '_': inc(i) - of '0'..'9': - result = result shl 4 or (ord(s[i]) - ord('0')) - inc(i) - of 'a'..'f': - result = result shl 4 or (ord(s[i]) - ord('a') + 10) - inc(i) - of 'A'..'F': - result = result shl 4 or (ord(s[i]) - ord('A') + 10) - inc(i) - of '\0': break - else: raise newException(ValueError, "invalid integer: " & s) + result = 0 + let L = parseutils.parseHex(s, result, 0) + if L != s.len or L == 0: + raise newException(ValueError, "invalid hex integer: " & s) -proc parseBool*(s: string): bool = +func generateHexCharToValueMap(): string = + ## Generates a string to map a hex digit to uint value. + result = "" + for inp in 0..255: + let ch = chr(inp) + let o = + case ch + of '0'..'9': inp - ord('0') + of 'a'..'f': inp - ord('a') + 10 + of 'A'..'F': inp - ord('A') + 10 + else: 17 # indicates an invalid hex char + result.add chr(o) + +const hexCharToValueMap = generateHexCharToValueMap() + +func parseHexStr*(s: string): string {.rtl, extern: "nsuParseHexStr".} = + ## Converts hex-encoded string to byte string, e.g.: + ## + ## Raises `ValueError` for an invalid hex values. The comparison is + ## case-insensitive. + ## + ## See also: + ## * `toHex func<#toHex,string>`_ for the reverse operation + runnableExamples: + let + a = "41" + b = "3161" + c = "00ff" + doAssert parseHexStr(a) == "A" + doAssert parseHexStr(b) == "1a" + doAssert parseHexStr(c) == "\0\255" + + if s.len mod 2 != 0: + raise newException(ValueError, "Incorrect hex string len") + result = newString(s.len div 2) + var buf = 0 + for pos, c in s: + let val = hexCharToValueMap[ord(c)].ord + if val == 17: + raise newException(ValueError, "Invalid hex char `" & + c & "` (ord " & $c.ord & ")") + if pos mod 2 == 0: + buf = val + else: + result[pos div 2] = chr(val + buf shl 4) + +func parseBool*(s: string): bool = ## Parses a value into a `bool`. ## - ## If ``s`` is one of the following values: ``y, yes, true, 1, on``, then - ## returns `true`. If ``s`` is one of the following values: ``n, no, false, - ## 0, off``, then returns `false`. If ``s`` is something else a - ## ``ValueError`` exception is raised. + ## If `s` is one of the following values: `y, yes, true, 1, on`, then + ## returns `true`. If `s` is one of the following values: `n, no, false, + ## 0, off`, then returns `false`. If `s` is something else a + ## `ValueError` exception is raised. + runnableExamples: + let a = "n" + doAssert parseBool(a) == false + case normalize(s) of "y", "yes", "true", "1", "on": result = true of "n", "no", "false", "0", "off": result = false else: raise newException(ValueError, "cannot interpret as a bool: " & s) -proc parseEnum*[T: enum](s: string): T = - ## Parses an enum ``T``. +func parseEnum*[T: enum](s: string): T = + ## Parses an enum `T`. This errors at compile time, if the given enum + ## type contains multiple fields with the same string value. ## - ## Raises ``ValueError`` for an invalid value in `s`. The comparison is - ## done in a style insensitive way. - for e in low(T)..high(T): - if cmpIgnoreStyle(s, $e) == 0: - return e - raise newException(ValueError, "invalid enum value: " & s) + ## Raises `ValueError` for an invalid value in `s`. The comparison is + ## done in a style insensitive way (first letter is still case-sensitive). + runnableExamples: + type + MyEnum = enum + first = "1st", + second, + third = "3rd" -proc parseEnum*[T: enum](s: string, default: T): T = - ## Parses an enum ``T``. + doAssert parseEnum[MyEnum]("1_st") == first + doAssert parseEnum[MyEnum]("second") == second + doAssertRaises(ValueError): + echo parseEnum[MyEnum]("third") + + genEnumCaseStmt(T, s, default = nil, ord(low(T)), ord(high(T)), nimIdentNormalize) + +func parseEnum*[T: enum](s: string, default: T): T = + ## Parses an enum `T`. This errors at compile time, if the given enum + ## type contains multiple fields with the same string value. ## ## Uses `default` for an invalid value in `s`. The comparison is done in a - ## style insensitive way. - for e in low(T)..high(T): - if cmpIgnoreStyle(s, $e) == 0: - return e - result = default - -proc repeat*(c: char, count: int): string {.noSideEffect, - rtl, extern: "nsuRepeatChar".} = + ## style insensitive way (first letter is still case-sensitive). + runnableExamples: + type + MyEnum = enum + first = "1st", + second, + third = "3rd" + + doAssert parseEnum[MyEnum]("1_st") == first + doAssert parseEnum[MyEnum]("second") == second + doAssert parseEnum[MyEnum]("last", third) == third + + genEnumCaseStmt(T, s, default, ord(low(T)), ord(high(T)), nimIdentNormalize) + +func repeat*(c: char, count: Natural): string {.rtl, extern: "nsuRepeatChar".} = ## Returns a string of length `count` consisting only of - ## the character `c`. You can use this proc to left align strings. Example: - ## - ## .. code-block:: nim - ## proc tabexpand(indent: int, text: string, tabsize: int = 4) = - ## echo '\t'.repeat(indent div tabsize), ' '.repeat(indent mod tabsize), text - ## - ## tabexpand(4, "At four") - ## tabexpand(5, "At five") - ## tabexpand(6, "At six") + ## the character `c`. + runnableExamples: + let a = 'z' + doAssert a.repeat(5) == "zzzzz" result = newString(count) for i in 0..count-1: result[i] = c -proc repeat*(s: string, n: int): string {.noSideEffect, - rtl, extern: "nsuRepeatStr".} = - ## Returns String `s` concatenated `n` times. Example: - ## - ## .. code-block:: nim - ## echo "+++ STOP ".repeat(4), "+++" +func repeat*(s: string, n: Natural): string {.rtl, extern: "nsuRepeatStr".} = + ## Returns string `s` concatenated `n` times. + runnableExamples: + doAssert "+ foo +".repeat(3) == "+ foo ++ foo ++ foo +" + result = newStringOfCap(n * s.len) for i in 1..n: result.add(s) -template spaces*(n: int): string = repeat(' ',n) - ## Returns a String with `n` space characters. You can use this proc - ## to left align strings. Example: - ## - ## .. code-block:: nim - ## let - ## width = 15 - ## text1 = "Hello user!" - ## text2 = "This is a very long string" - ## echo text1 & spaces(max(0, width - text1.len)) & "|" - ## echo text2 & spaces(max(0, width - text2.len)) & "|" - -proc repeatChar*(count: int, c: char = ' '): string {.deprecated.} = repeat(c, count) - ## deprecated: use repeat() or spaces() - -proc repeatStr*(count: int, s: string): string {.deprecated.} = repeat(s, count) - ## deprecated: use repeat(string, count) or string.repeat(count) - -proc align*(s: string, count: int, padding = ' '): string {. - noSideEffect, rtl, extern: "nsuAlignString".} = - ## Aligns a string `s` with `padding`, so that is of length `count`. +func spaces*(n: Natural): string {.inline.} = + ## Returns a string with `n` space characters. You can use this func + ## to left align strings. + ## + ## See also: + ## * `align func<#align,string,Natural,char>`_ + ## * `alignLeft func<#alignLeft,string,Natural,char>`_ + ## * `indent func<#indent,string,Natural,string>`_ + ## * `center func<#center,string,int,char>`_ + runnableExamples: + let + width = 15 + text1 = "Hello user!" + text2 = "This is a very long string" + doAssert text1 & spaces(max(0, width - text1.len)) & "|" == + "Hello user! |" + doAssert text2 & spaces(max(0, width - text2.len)) & "|" == + "This is a very long string|" + repeat(' ', n) + +func align*(s: string, count: Natural, padding = ' '): string {.rtl, + extern: "nsuAlignString".} = + ## Aligns a string `s` with `padding`, so that it is of length `count`. ## ## `padding` characters (by default spaces) are added before `s` resulting in - ## right alignment. If ``s.len >= count``, no spaces are added and `s` is - ## returned unchanged. If you need to left align a string use the `repeatChar - ## proc <#repeatChar>`_. Example: - ## - ## .. code-block:: nim - ## assert align("abc", 4) == " abc" - ## assert align("a", 0) == "a" - ## assert align("1232", 6) == " 1232" - ## assert align("1232", 6, '#') == "##1232" + ## right alignment. If `s.len >= count`, no spaces are added and `s` is + ## returned unchanged. If you need to left align a string use the `alignLeft + ## func<#alignLeft,string,Natural,char>`_. + ## + ## See also: + ## * `alignLeft func<#alignLeft,string,Natural,char>`_ + ## * `spaces func<#spaces,Natural>`_ + ## * `indent func<#indent,string,Natural,string>`_ + ## * `center func<#center,string,int,char>`_ + runnableExamples: + assert align("abc", 4) == " abc" + assert align("a", 0) == "a" + assert align("1232", 6) == " 1232" + assert align("1232", 6, '#') == "##1232" if s.len < count: result = newString(count) let spaces = count - s.len @@ -563,168 +1405,448 @@ proc align*(s: string, count: int, padding = ' '): string {. else: result = s -iterator tokenize*(s: string, seps: set[char] = Whitespace): tuple[ - token: string, isSep: bool] = - ## Tokenizes the string `s` into substrings. - ## - ## Substrings are separated by a substring containing only `seps`. - ## Examples: - ## - ## .. code-block:: nim - ## for word in tokenize(" this is an example "): - ## writeln(stdout, word) - ## - ## Results in: - ## - ## .. code-block:: nim - ## (" ", true) - ## ("this", false) - ## (" ", true) - ## ("is", false) - ## (" ", true) - ## ("an", false) - ## (" ", true) - ## ("example", false) - ## (" ", true) - var i = 0 - while true: - var j = i - var isSep = s[j] in seps - while j < s.len and (s[j] in seps) == isSep: inc(j) - if j > i: - yield (substr(s, i, j-1), isSep) - else: - break - i = j +func alignLeft*(s: string, count: Natural, padding = ' '): string = + ## Left-Aligns a string `s` with `padding`, so that it is of length `count`. + ## + ## `padding` characters (by default spaces) are added after `s` resulting in + ## left alignment. If `s.len >= count`, no spaces are added and `s` is + ## returned unchanged. If you need to right align a string use the `align + ## func<#align,string,Natural,char>`_. + ## + ## See also: + ## * `align func<#align,string,Natural,char>`_ + ## * `spaces func<#spaces,Natural>`_ + ## * `indent func<#indent,string,Natural,string>`_ + ## * `center func<#center,string,int,char>`_ + runnableExamples: + assert alignLeft("abc", 4) == "abc " + assert alignLeft("a", 0) == "a" + assert alignLeft("1232", 6) == "1232 " + assert alignLeft("1232", 6, '#') == "1232##" + if s.len < count: + result = newString(count) + if s.len > 0: + result[0 .. (s.len - 1)] = s + for i in s.len ..< count: + result[i] = padding + else: + result = s -proc wordWrap*(s: string, maxLineWidth = 80, - splitLongWords = true, - seps: set[char] = Whitespace, - newLine = "\n"): string {. - noSideEffect, rtl, extern: "nsuWordWrap".} = - ## Word wraps `s`. - result = newStringOfCap(s.len + s.len shr 6) - var spaceLeft = maxLineWidth - var lastSep = "" - for word, isSep in tokenize(s, seps): - if isSep: - lastSep = word - spaceLeft = spaceLeft - len(word) - continue - if len(word) > spaceLeft: - if splitLongWords and len(word) > maxLineWidth: - result.add(substr(word, 0, spaceLeft-1)) - var w = spaceLeft+1 - var wordLeft = len(word) - spaceLeft - while wordLeft > 0: - result.add(newLine) - var L = min(maxLineWidth, wordLeft) - spaceLeft = maxLineWidth - L - result.add(substr(word, w, w+L-1)) - inc(w, L) - dec(wordLeft, L) - else: - spaceLeft = maxLineWidth - len(word) - result.add(newLine) - result.add(word) +func center*(s: string, width: int, fillChar: char = ' '): string {.rtl, + extern: "nsuCenterString".} = + ## Return the contents of `s` centered in a string `width` long using + ## `fillChar` (default: space) as padding. + ## + ## The original string is returned if `width` is less than or equal + ## to `s.len`. + ## + ## See also: + ## * `align func<#align,string,Natural,char>`_ + ## * `alignLeft func<#alignLeft,string,Natural,char>`_ + ## * `spaces func<#spaces,Natural>`_ + ## * `indent func<#indent,string,Natural,string>`_ + runnableExamples: + let a = "foo" + doAssert a.center(2) == "foo" + doAssert a.center(5) == " foo " + doAssert a.center(6) == " foo " + if width <= s.len: return s + result = newString(width) + # Left padding will be one fillChar + # smaller if there are an odd number + # of characters + let + charsLeft = (width - s.len) + leftPadding = charsLeft div 2 + for i in 0 ..< width: + if i >= leftPadding and i < leftPadding + s.len: + # we are where the string should be located + result[i] = s[i-leftPadding] else: - spaceLeft = spaceLeft - len(word) - result.add(lastSep & word) - lastSep.setLen(0) - -proc unindent*(s: string, eatAllIndent = false): string {. - noSideEffect, rtl, extern: "nsuUnindent".} = - ## Unindents `s`. - result = newStringOfCap(s.len) + # we are either before or after where + # the string s should go + result[i] = fillChar + +func indent*(s: string, count: Natural, padding: string = " "): string {.rtl, + extern: "nsuIndent".} = + ## Indents each line in `s` by `count` amount of `padding`. + ## + ## **Note:** This does not preserve the new line characters used in `s`. + ## + ## See also: + ## * `align func<#align,string,Natural,char>`_ + ## * `alignLeft func<#alignLeft,string,Natural,char>`_ + ## * `spaces func<#spaces,Natural>`_ + ## * `unindent func<#unindent,string,Natural,string>`_ + ## * `dedent func<#dedent,string,Natural>`_ + runnableExamples: + doAssert indent("First line\c\l and second line.", 2) == + " First line\l and second line." + result = "" var i = 0 - var pattern = true - var indent = 0 - while s[i] == ' ': inc i - var level = if i == 0: -1 else: i - while i < s.len: - if s[i] == ' ': - if i > 0 and s[i-1] in {'\l', '\c'}: - pattern = true - indent = 0 - if pattern: - inc(indent) - if indent > level and not eatAllIndent: - result.add(s[i]) - if level < 0: level = indent - else: - # a space somewhere: do not delete - result.add(s[i]) - else: - pattern = false - result.add(s[i]) - inc i - -proc startsWith*(s, prefix: string): bool {.noSideEffect, - rtl, extern: "nsuStartsWith".} = - ## Returns true iff ``s`` starts with ``prefix``. - ## - ## If ``prefix == ""`` true is returned. + for line in s.splitLines(): + if i != 0: + result.add("\n") + for j in 1..count: + result.add(padding) + result.add(line) + i.inc + +func unindent*(s: string, count: Natural = int.high, + padding: string = " "): string {.rtl, extern: "nsuUnindent".} = + ## Unindents each line in `s` by `count` amount of `padding`. + ## + ## **Note:** This does not preserve the new line characters used in `s`. + ## + ## See also: + ## * `dedent func<#dedent,string,Natural>`_ + ## * `align func<#align,string,Natural,char>`_ + ## * `alignLeft func<#alignLeft,string,Natural,char>`_ + ## * `spaces func<#spaces,Natural>`_ + ## * `indent func<#indent,string,Natural,string>`_ + runnableExamples: + let x = """ + Hello + There + """.unindent() + + doAssert x == "Hello\nThere\n" + result = "" var i = 0 - while true: - if prefix[i] == '\0': return true - if s[i] != prefix[i]: return false - inc(i) + for line in s.splitLines(): + if i != 0: + result.add("\n") + var indentCount = 0 + for j in 0..<count.int: + indentCount.inc + if j + padding.len-1 >= line.len or line[j .. j + padding.len-1] != padding: + indentCount = j + break + result.add(line[indentCount*padding.len .. ^1]) + i.inc + +func indentation*(s: string): Natural {.since: (1, 3).} = + ## Returns the amount of indentation all lines of `s` have in common, + ## ignoring lines that consist only of whitespace. + result = int.high + for line in s.splitLines: + for i, c in line: + if i >= result: break + elif c != ' ': + result = i + break + if result == int.high: + result = 0 + +func dedent*(s: string, count: Natural = indentation(s)): string {.rtl, + extern: "nsuDedent", since: (1, 3).} = + ## Unindents each line in `s` by `count` amount of `padding`. + ## The only difference between this and the + ## `unindent func<#unindent,string,Natural,string>`_ is that this by default + ## only cuts off the amount of indentation that all lines of `s` share as + ## opposed to all indentation. It only supports spaces as padding. + ## + ## **Note:** This does not preserve the new line characters used in `s`. + ## + ## See also: + ## * `unindent func<#unindent,string,Natural,string>`_ + ## * `align func<#align,string,Natural,char>`_ + ## * `alignLeft func<#alignLeft,string,Natural,char>`_ + ## * `spaces func<#spaces,Natural>`_ + ## * `indent func<#indent,string,Natural,string>`_ + runnableExamples: + let x = """ + Hello + There + """.dedent() + + doAssert x == "Hello\n There\n" + unindent(s, count, " ") + +func delete*(s: var string, slice: Slice[int]) = + ## Deletes the items `s[slice]`, raising `IndexDefect` if the slice contains + ## elements out of range. + ## + ## This operation moves all elements after `s[slice]` in linear time, and + ## is the string analog to `sequtils.delete`. + runnableExamples: + var a = "abcde" + doAssertRaises(IndexDefect): a.delete(4..5) + assert a == "abcde" + a.delete(4..4) + assert a == "abcd" + a.delete(1..2) + assert a == "ad" + a.delete(1..<1) # empty slice + assert a == "ad" + when compileOption("boundChecks"): + if not (slice.a < s.len and slice.a >= 0 and slice.b < s.len): + raise newException(IndexDefect, $(slice: slice, len: s.len)) + if slice.b >= slice.a: + var i = slice.a + var j = slice.b + 1 + var newLen = s.len - j + i + # if j < s.len: moveMem(addr s[i], addr s[j], s.len - j) # pending benchmark + while i < newLen: + s[i] = s[j] + inc(i) + inc(j) + setLen(s, newLen) -proc endsWith*(s, suffix: string): bool {.noSideEffect, - rtl, extern: "nsuEndsWith".} = - ## Returns true iff ``s`` ends with ``suffix``. - ## - ## If ``suffix == ""`` true is returned. - var i = 0 - var j = len(s) - len(suffix) - while i+j <% s.len: - if s[i+j] != suffix[i]: return false +func delete*(s: var string, first, last: int) {.rtl, extern: "nsuDelete", + deprecated: "use `delete(s, first..last)`".} = + ## Deletes in `s` the characters at positions `first .. last` (both ends included). + runnableExamples("--warning:deprecated:off"): + var a = "abracadabra" + + a.delete(4, 5) + doAssert a == "abradabra" + + a.delete(1, 6) + doAssert a == "ara" + + a.delete(2, 999) + doAssert a == "ar" + + var i = first + var j = min(len(s), last+1) + var newLen = len(s)-j+i + while i < newLen: + s[i] = s[j] inc(i) - if suffix[i] == '\0': return true + inc(j) + setLen(s, newLen) -proc continuesWith*(s, substr: string, start: int): bool {.noSideEffect, - rtl, extern: "nsuContinuesWith".} = - ## Returns true iff ``s`` continues with ``substr`` at position ``start``. - ## - ## If ``substr == ""`` true is returned. +func startsWith*(s: string, prefix: char): bool {.inline.} = + ## Returns true if `s` starts with character `prefix`. + ## + ## See also: + ## * `endsWith func<#endsWith,string,char>`_ + ## * `continuesWith func<#continuesWith,string,string,Natural>`_ + ## * `removePrefix func<#removePrefix,string,char>`_ + runnableExamples: + let a = "abracadabra" + doAssert a.startsWith('a') == true + doAssert a.startsWith('b') == false + result = s.len > 0 and s[0] == prefix + +func startsWith*(s, prefix: string): bool {.rtl, extern: "nsuStartsWith".} = + ## Returns true if `s` starts with string `prefix`. + ## + ## If `prefix == ""` true is returned. + ## + ## See also: + ## * `endsWith func<#endsWith,string,string>`_ + ## * `continuesWith func<#continuesWith,string,string,Natural>`_ + ## * `removePrefix func<#removePrefix,string,string>`_ + runnableExamples: + let a = "abracadabra" + doAssert a.startsWith("abra") == true + doAssert a.startsWith("bra") == false + startsWithImpl(s, prefix) + +func endsWith*(s: string, suffix: char): bool {.inline.} = + ## Returns true if `s` ends with `suffix`. + ## + ## See also: + ## * `startsWith func<#startsWith,string,char>`_ + ## * `continuesWith func<#continuesWith,string,string,Natural>`_ + ## * `removeSuffix func<#removeSuffix,string,char>`_ + runnableExamples: + let a = "abracadabra" + doAssert a.endsWith('a') == true + doAssert a.endsWith('b') == false + result = s.len > 0 and s[s.high] == suffix + +func endsWith*(s, suffix: string): bool {.rtl, extern: "nsuEndsWith".} = + ## Returns true if `s` ends with `suffix`. + ## + ## If `suffix == ""` true is returned. + ## + ## See also: + ## * `startsWith func<#startsWith,string,string>`_ + ## * `continuesWith func<#continuesWith,string,string,Natural>`_ + ## * `removeSuffix func<#removeSuffix,string,string>`_ + runnableExamples: + let a = "abracadabra" + doAssert a.endsWith("abra") == true + doAssert a.endsWith("dab") == false + endsWithImpl(s, suffix) + +func continuesWith*(s, substr: string, start: Natural): bool {.rtl, + extern: "nsuContinuesWith".} = + ## Returns true if `s` continues with `substr` at position `start`. + ## + ## If `substr == ""` true is returned. + ## + ## See also: + ## * `startsWith func<#startsWith,string,string>`_ + ## * `endsWith func<#endsWith,string,string>`_ + runnableExamples: + let a = "abracadabra" + doAssert a.continuesWith("ca", 4) == true + doAssert a.continuesWith("ca", 5) == false + doAssert a.continuesWith("dab", 6) == true var i = 0 while true: - if substr[i] == '\0': return true - if s[i+start] != substr[i]: return false + if i >= substr.len: return true + if i+start >= s.len or s[i+start] != substr[i]: return false inc(i) -proc addSep*(dest: var string, sep = ", ", startLen = 0) {.noSideEffect, - inline.} = + +func removePrefix*(s: var string, chars: set[char] = Newlines) {.rtl, + extern: "nsuRemovePrefixCharSet".} = + ## Removes all characters from `chars` from the start of the string `s` + ## (in-place). + ## + ## See also: + ## * `removeSuffix func<#removeSuffix,string,set[char]>`_ + runnableExamples: + var userInput = "\r\n*~Hello World!" + userInput.removePrefix + doAssert userInput == "*~Hello World!" + userInput.removePrefix({'~', '*'}) + doAssert userInput == "Hello World!" + + var otherInput = "?!?Hello!?!" + otherInput.removePrefix({'!', '?'}) + doAssert otherInput == "Hello!?!" + + var start = 0 + while start < s.len and s[start] in chars: start += 1 + if start > 0: s.delete(0..start - 1) + +func removePrefix*(s: var string, c: char) {.rtl, + extern: "nsuRemovePrefixChar".} = + ## Removes all occurrences of a single character (in-place) from the start + ## of a string. + ## + ## See also: + ## * `removeSuffix func<#removeSuffix,string,char>`_ + ## * `startsWith func<#startsWith,string,char>`_ + runnableExamples: + var ident = "pControl" + ident.removePrefix('p') + doAssert ident == "Control" + removePrefix(s, chars = {c}) + +func removePrefix*(s: var string, prefix: string) {.rtl, + extern: "nsuRemovePrefixString".} = + ## Remove the first matching prefix (in-place) from a string. + ## + ## See also: + ## * `removeSuffix func<#removeSuffix,string,string>`_ + ## * `startsWith func<#startsWith,string,string>`_ + runnableExamples: + var answers = "yesyes" + answers.removePrefix("yes") + doAssert answers == "yes" + if s.startsWith(prefix) and prefix.len > 0: + s.delete(0..prefix.len - 1) + +func removeSuffix*(s: var string, chars: set[char] = Newlines) {.rtl, + extern: "nsuRemoveSuffixCharSet".} = + ## Removes all characters from `chars` from the end of the string `s` + ## (in-place). + ## + ## See also: + ## * `removePrefix func<#removePrefix,string,set[char]>`_ + runnableExamples: + var userInput = "Hello World!*~\r\n" + userInput.removeSuffix + doAssert userInput == "Hello World!*~" + userInput.removeSuffix({'~', '*'}) + doAssert userInput == "Hello World!" + + var otherInput = "Hello!?!" + otherInput.removeSuffix({'!', '?'}) + doAssert otherInput == "Hello" + + if s.len == 0: return + var last = s.high + while last > -1 and s[last] in chars: last -= 1 + s.setLen(last + 1) + +func removeSuffix*(s: var string, c: char) {.rtl, + extern: "nsuRemoveSuffixChar".} = + ## Removes all occurrences of a single character (in-place) from the end + ## of a string. + ## + ## See also: + ## * `removePrefix func<#removePrefix,string,char>`_ + ## * `endsWith func<#endsWith,string,char>`_ + runnableExamples: + var table = "users" + table.removeSuffix('s') + doAssert table == "user" + + var dots = "Trailing dots......." + dots.removeSuffix('.') + doAssert dots == "Trailing dots" + + removeSuffix(s, chars = {c}) + +func removeSuffix*(s: var string, suffix: string) {.rtl, + extern: "nsuRemoveSuffixString".} = + ## Remove the first matching suffix (in-place) from a string. + ## + ## See also: + ## * `removePrefix func<#removePrefix,string,string>`_ + ## * `endsWith func<#endsWith,string,string>`_ + runnableExamples: + var answers = "yeses" + answers.removeSuffix("es") + doAssert answers == "yes" + var newLen = s.len + if s.endsWith(suffix): + newLen -= len(suffix) + s.setLen(newLen) + + +func addSep*(dest: var string, sep = ", ", startLen: Natural = 0) {.inline.} = ## Adds a separator to `dest` only if its length is bigger than `startLen`. ## ## A shorthand for: ## - ## .. code-block:: nim + ## ```nim ## if dest.len > startLen: add(dest, sep) + ## ``` ## ## This is often useful for generating some code where the items need to ## be *separated* by `sep`. `sep` is only added if `dest` is longer than ## `startLen`. The following example creates a string describing - ## an array of integers: - ## - ## .. code-block:: nim - ## var arr = "[" - ## for x in items([2, 3, 5, 7, 11]): - ## addSep(arr, startLen=len("[")) - ## add(arr, $x) - ## add(arr, "]") + ## an array of integers. + runnableExamples: + var arr = "[" + for x in items([2, 3, 5, 7, 11]): + addSep(arr, startLen = len("[")) + add(arr, $x) + add(arr, "]") + doAssert arr == "[2, 3, 5, 7, 11]" + if dest.len > startLen: add(dest, sep) -proc allCharsInSet*(s: string, theSet: set[char]): bool = - ## Returns true iff each character of `s` is in the set `theSet`. +func allCharsInSet*(s: string, theSet: set[char]): bool = + ## Returns true if every character of `s` is in the set `theSet`. + runnableExamples: + doAssert allCharsInSet("aeea", {'a', 'e'}) == true + doAssert allCharsInSet("", {'a', 'e'}) == true + for c in items(s): if c notin theSet: return false return true -proc abbrev*(s: string, possibilities: openArray[string]): int = - ## Returns the index of the first item in `possibilities` if not ambiguous. +func abbrev*(s: string, possibilities: openArray[string]): int = + ## Returns the index of the first item in `possibilities` which starts + ## with `s`, if not ambiguous. ## ## Returns -1 if no item has been found and -2 if multiple items match. + runnableExamples: + doAssert abbrev("fac", ["college", "faculty", "industry"]) == 1 + doAssert abbrev("foo", ["college", "faculty", "industry"]) == -1 # Not found + doAssert abbrev("fac", ["college", "faculty", "faculties"]) == -2 # Ambiguous + doAssert abbrev("college", ["college", "colleges", "industry"]) == 0 + result = -1 # none found for i in 0..possibilities.len-1: if possibilities[i].startsWith(s): @@ -736,9 +1858,12 @@ proc abbrev*(s: string, possibilities: openArray[string]): int = # --------------------------------------------------------------------------- -proc join*(a: openArray[string], sep: string): string {. - noSideEffect, rtl, extern: "nsuJoinSep".} = - ## Concatenates all strings in `a` separating them with `sep`. +func join*(a: openArray[string], sep: string = ""): string {.rtl, + extern: "nsuJoinSep".} = + ## Concatenates all strings in the container `a`, separating them with `sep`. + runnableExamples: + doAssert join(["A", "B", "Conclusion"], " -> ") == "A -> B -> Conclusion" + if len(a) > 0: var L = sep.len * (a.len-1) for i in 0..high(a): inc(L, a[i].len) @@ -750,74 +1875,233 @@ proc join*(a: openArray[string], sep: string): string {. else: result = "" -proc join*(a: openArray[string]): string {. - noSideEffect, rtl, extern: "nsuJoin".} = - ## Concatenates all strings in `a`. - if len(a) > 0: - var L = 0 - for i in 0..high(a): inc(L, a[i].len) - result = newStringOfCap(L) - for i in 0..high(a): add(result, a[i]) - else: - result = "" +proc join*[T: not string](a: openArray[T], sep: string = ""): string = + ## Converts all elements in the container `a` to strings using `$`, + ## and concatenates them with `sep`. + runnableExamples: + doAssert join([1, 2, 3], " -> ") == "1 -> 2 -> 3" + + result = "" + for i, x in a: + if i > 0: + add(result, sep) + add(result, $x) type - SkipTable = array[char, int] + SkipTable* = array[char, int] ## Character table for efficient substring search. -proc preprocessSub(sub: string, a: var SkipTable) = - var m = len(sub) - for i in 0..0xff: a[chr(i)] = m+1 - for i in 0..m-1: a[sub[i]] = m-i +func initSkipTable*(a: var SkipTable, sub: string) {.rtl, + extern: "nsuInitSkipTable".} = + ## Initializes table `a` for efficient search of substring `sub`. + ## + ## See also: + ## * `initSkipTable func<#initSkipTable,string>`_ + ## * `find func<#find,SkipTable,string,string,Natural,int>`_ + # TODO: this should be the `default()` initializer for the type. + let m = len(sub) + fill(a, m) -proc findAux(s, sub: string, start: int, a: SkipTable): int = - # Fast "quick search" algorithm: - var - m = len(sub) - n = len(s) - # search: - var j = start - while j <= n - m: - block match: - for k in 0..m-1: - if sub[k] != s[k+j]: break match - return j - inc(j, a[s[j+m]]) - return -1 + for i in 0 ..< m - 1: + a[sub[i]] = m - 1 - i + +func initSkipTable*(sub: string): SkipTable {.noinit, rtl, + extern: "nsuInitNewSkipTable".} = + ## Returns a new table initialized for `sub`. + ## + ## See also: + ## * `initSkipTable func<#initSkipTable,SkipTable,string>`_ + ## * `find func<#find,SkipTable,string,string,Natural,int>`_ + initSkipTable(result, sub) + +func find*(a: SkipTable, s, sub: string, start: Natural = 0, last = -1): int {. + rtl, extern: "nsuFindStrA".} = + ## Searches for `sub` in `s` inside range `start..last` using preprocessed + ## table `a`. If `last` is unspecified, it defaults to `s.high` (the last + ## element). + ## + ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned. + ## + ## See also: + ## * `initSkipTable func<#initSkipTable,string>`_ + ## * `initSkipTable func<#initSkipTable,SkipTable,string>`_ + let + last = if last < 0: s.high else: last + subLast = sub.len - 1 + + if subLast == -1: + # this was an empty needle string, + # we count this as match in the first possible position: + return start + + # This is an implementation of the Boyer-Moore Horspool algorithms + # https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm + result = -1 + var skip = start + + while last - skip >= subLast: + var i = subLast + while s[skip + i] == sub[i]: + if i == 0: + return skip + dec i + inc skip, a[s[skip + subLast]] + +when not (defined(js) or defined(nimdoc) or defined(nimscript)): + func c_memchr(cstr: pointer, c: char, n: csize_t): pointer {. + importc: "memchr", header: "<string.h>".} + const hasCStringBuiltin = true +else: + const hasCStringBuiltin = false + +func find*(s: string, sub: char, start: Natural = 0, last = -1): int {.rtl, + extern: "nsuFindChar".} = + ## Searches for `sub` in `s` inside range `start..last` (both ends included). + ## If `last` is unspecified or negative, it defaults to `s.high` (the last element). + ## + ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned. + ## Otherwise the index returned is relative to `s[0]`, not `start`. + ## Subtract `start` from the result for a `start`-origin index. + ## + ## See also: + ## * `rfind func<#rfind,string,char,Natural,int>`_ + ## * `replace func<#replace,string,char,char>`_ + result = -1 + let last = if last < 0: s.high else: last + + template findImpl = + for i in int(start)..last: + if s[i] == sub: + return i + + when nimvm: + findImpl() + else: + when hasCStringBuiltin: + let length = last-start+1 + if length > 0: + let found = c_memchr(s[start].unsafeAddr, sub, cast[csize_t](length)) + if not found.isNil: + return cast[int](found) -% cast[int](s.cstring) + else: + findImpl() -proc find*(s, sub: string, start: int = 0): int {.noSideEffect, - rtl, extern: "nsuFindStr".} = - ## Searches for `sub` in `s` starting at position `start`. +func find*(s: string, chars: set[char], start: Natural = 0, last = -1): int {. + rtl, extern: "nsuFindCharSet".} = + ## Searches for `chars` in `s` inside range `start..last` (both ends included). + ## If `last` is unspecified or negative, it defaults to `s.high` (the last element). + ## + ## If `s` contains none of the characters in `chars`, -1 is returned. + ## Otherwise the index returned is relative to `s[0]`, not `start`. + ## Subtract `start` from the result for a `start`-origin index. + ## + ## See also: + ## * `rfind func<#rfind,string,set[char],Natural,int>`_ + ## * `multiReplace func<#multiReplace,string,varargs[]>`_ + result = -1 + let last = if last < 0: s.high else: last + for i in int(start)..last: + if s[i] in chars: + return i + +when defined(linux): + proc memmem(haystack: pointer, haystacklen: csize_t, + needle: pointer, needlelen: csize_t): pointer {.importc, header: """#define _GNU_SOURCE +#include <string.h>""".} +elif defined(bsd) or (defined(macosx) and not defined(ios)): + proc memmem(haystack: pointer, haystacklen: csize_t, + needle: pointer, needlelen: csize_t): pointer {.importc, header: "#include <string.h>".} + +func find*(s, sub: string, start: Natural = 0, last = -1): int {.rtl, + extern: "nsuFindStr".} = + ## Searches for `sub` in `s` inside range `start..last` (both ends included). + ## If `last` is unspecified or negative, it defaults to `s.high` (the last element). ## ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned. - var a {.noinit.}: SkipTable - preprocessSub(sub, a) - result = findAux(s, sub, start, a) + ## Otherwise the index returned is relative to `s[0]`, not `start`. + ## Subtract `start` from the result for a `start`-origin index. + ## + ## See also: + ## * `rfind func<#rfind,string,string,Natural,int>`_ + ## * `replace func<#replace,string,string,string>`_ + if sub.len > s.len - start: return -1 + if sub.len == 1: return find(s, sub[0], start, last) + + template useSkipTable = + result = find(initSkipTable(sub), s, sub, start, last) -proc find*(s: string, sub: char, start: int = 0): int {.noSideEffect, - rtl, extern: "nsuFindChar".} = - ## Searches for `sub` in `s` starting at position `start`. + when nimvm: + useSkipTable() + else: + when declared(memmem): + let subLen = sub.len + if last < 0 and start < s.len and subLen != 0: + let found = memmem(s[start].unsafeAddr, csize_t(s.len - start), sub.cstring, csize_t(subLen)) + result = if not found.isNil: + cast[int](found) -% cast[int](s.cstring) + else: + -1 + else: + useSkipTable() + else: + useSkipTable() + +func rfind*(s: string, sub: char, start: Natural = 0, last = -1): int {.rtl, + extern: "nsuRFindChar".} = + ## Searches for `sub` in `s` inside range `start..last` (both ends included) + ## in reverse -- starting at high indexes and moving lower to the first + ## character or `start`. If `last` is unspecified, it defaults to `s.high` + ## (the last element). ## ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned. - for i in start..len(s)-1: + ## Otherwise the index returned is relative to `s[0]`, not `start`. + ## Subtract `start` from the result for a `start`-origin index. + ## + ## See also: + ## * `find func<#find,string,char,Natural,int>`_ + let last = if last == -1: s.high else: last + for i in countdown(last, start): if sub == s[i]: return i return -1 -proc find*(s: string, chars: set[char], start: int = 0): int {.noSideEffect, - rtl, extern: "nsuFindCharSet".} = - ## Searches for `chars` in `s` starting at position `start`. +func rfind*(s: string, chars: set[char], start: Natural = 0, last = -1): int {. + rtl, extern: "nsuRFindCharSet".} = + ## Searches for `chars` in `s` inside range `start..last` (both ends + ## included) in reverse -- starting at high indexes and moving lower to the + ## first character or `start`. If `last` is unspecified, it defaults to + ## `s.high` (the last element). ## ## If `s` contains none of the characters in `chars`, -1 is returned. - for i in start..s.len-1: + ## Otherwise the index returned is relative to `s[0]`, not `start`. + ## Subtract `start` from the result for a `start`-origin index. + ## + ## See also: + ## * `find func<#find,string,set[char],Natural,int>`_ + let last = if last == -1: s.high else: last + for i in countdown(last, start): if s[i] in chars: return i return -1 -proc rfind*(s, sub: string, start: int = -1): int {.noSideEffect.} = - ## Searches for `sub` in `s` in reverse, starting at `start` and going - ## backwards to 0. +func rfind*(s, sub: string, start: Natural = 0, last = -1): int {.rtl, + extern: "nsuRFindStr".} = + ## Searches for `sub` in `s` inside range `start..last` (both ends included) + ## included) in reverse -- starting at high indexes and moving lower to the + ## first character or `start`. If `last` is unspecified, it defaults to + ## `s.high` (the last element). ## ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned. - let realStart = if start == -1: s.len else: start - for i in countdown(realStart-sub.len, 0): + ## Otherwise the index returned is relative to `s[0]`, not `start`. + ## Subtract `start` from the result for a `start`-origin index. + ## + ## See also: + ## * `find func<#find,string,string,Natural,int>`_ + if sub.len == 0: + let rightIndex: Natural = if last < 0: s.len else: last + return max(start, rightIndex) + if sub.len > s.len - start: + return -1 + let last = if last == -1: s.high else: last + result = 0 + for i in countdown(last - sub.len + 1, start): for j in 0..sub.len-1: result = i if sub[j] != s[i+j]: @@ -826,90 +2110,139 @@ proc rfind*(s, sub: string, start: int = -1): int {.noSideEffect.} = if result != -1: return return -1 -proc rfind*(s: string, sub: char, start: int = -1): int {.noSideEffect, - rtl.} = - ## Searches for `sub` in `s` in reverse starting at position `start`. + +func count*(s: string, sub: char): int {.rtl, extern: "nsuCountChar".} = + ## Counts the occurrences of the character `sub` in the string `s`. ## - ## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned. - let realStart = if start == -1: s.len-1 else: start - for i in countdown(realStart, 0): - if sub == s[i]: return i - return -1 + ## See also: + ## * `countLines func<#countLines,string>`_ + result = 0 + for c in s: + if c == sub: inc result + +func count*(s: string, subs: set[char]): int {.rtl, + extern: "nsuCountCharSet".} = + ## Counts the occurrences of the group of character `subs` in the string `s`. + ## + ## See also: + ## * `countLines func<#countLines,string>`_ + doAssert card(subs) > 0 + result = 0 + for c in s: + if c in subs: inc result -proc count*(s: string, sub: string, overlapping: bool = false): int {.noSideEffect, - rtl, extern: "nsuCountString".} = - ## Count the occurrences of a substring `sub` in the string `s`. +func count*(s: string, sub: string, overlapping: bool = false): int {.rtl, + extern: "nsuCountString".} = + ## Counts the occurrences of a substring `sub` in the string `s`. ## Overlapping occurrences of `sub` only count when `overlapping` - ## is set to true. + ## is set to true (default: false). + ## + ## See also: + ## * `countLines func<#countLines,string>`_ + doAssert sub.len > 0 + result = 0 var i = 0 while true: i = s.find(sub, i) - if i < 0: - break - if overlapping: - inc i - else: - i += sub.len + if i < 0: break + if overlapping: inc i + else: i += sub.len inc result -proc count*(s: string, sub: char): int {.noSideEffect, - rtl, extern: "nsuCountChar".} = - ## Count the occurrences of the character `sub` in the string `s`. - for c in s: - if c == sub: +func countLines*(s: string): int {.rtl, extern: "nsuCountLines".} = + ## Returns the number of lines in the string `s`. + ## + ## This is the same as `len(splitLines(s))`, but much more efficient + ## because it doesn't modify the string creating temporary objects. Every + ## `character literal <manual.html#lexical-analysis-character-literals>`_ + ## newline combination (CR, LF, CR-LF) is supported. + ## + ## In this context, a line is any string separated by a newline combination. + ## A line can be an empty string. + ## + ## See also: + ## * `splitLines func<#splitLines,string>`_ + runnableExamples: + doAssert countLines("First line\l and second line.") == 2 + result = 1 + var i = 0 + while i < s.len: + case s[i] + of '\c': + if i+1 < s.len and s[i+1] == '\l': inc i inc result + of '\l': inc result + else: discard + inc i -proc count*(s: string, subs: set[char]): int {.noSideEffect, - rtl, extern: "nsuCountCharSet".} = - ## Count the occurrences of the group of character `subs` in the string `s`. - for c in s: - if c in subs: - inc result -proc quoteIfContainsWhite*(s: string): string {.deprecated.} = - ## Returns ``'"' & s & '"'`` if `s` contains a space and does not - ## start with a quote, else returns `s`. +func contains*(s, sub: string): bool = + ## Same as `find(s, sub) >= 0`. ## - ## **DEPRECATED** as it was confused for shell quoting function. For this - ## application use `osproc.quoteShell <osproc.html#quoteShell>`_. - if find(s, {' ', '\t'}) >= 0 and s[0] != '"': - result = '"' & s & '"' - else: - result = s - -proc contains*(s: string, c: char): bool {.noSideEffect.} = - ## Same as ``find(s, c) >= 0``. - return find(s, c) >= 0 - -proc contains*(s, sub: string): bool {.noSideEffect.} = - ## Same as ``find(s, sub) >= 0``. + ## See also: + ## * `find func<#find,string,string,Natural,int>`_ return find(s, sub) >= 0 -proc contains*(s: string, chars: set[char]): bool {.noSideEffect.} = - ## Same as ``find(s, chars) >= 0``. +func contains*(s: string, chars: set[char]): bool = + ## Same as `find(s, chars) >= 0`. + ## + ## See also: + ## * `find func<#find,string,set[char],Natural,int>`_ return find(s, chars) >= 0 -proc replace*(s, sub: string, by = ""): string {.noSideEffect, - rtl, extern: "nsuReplaceStr".} = - ## Replaces `sub` in `s` by the string `by`. - var a {.noinit.}: SkipTable +func replace*(s, sub: string, by = ""): string {.rtl, + extern: "nsuReplaceStr".} = + ## Replaces every occurrence of the string `sub` in `s` with the string `by`. + ## + ## See also: + ## * `find func<#find,string,string,Natural,int>`_ + ## * `replace func<#replace,string,char,char>`_ for replacing + ## single characters + ## * `replaceWord func<#replaceWord,string,string,string>`_ + ## * `multiReplace func<#multiReplace,string,varargs[]>`_ result = "" - preprocessSub(sub, a) - var i = 0 - while true: - var j = findAux(s, sub, i, a) - if j < 0: break - add result, substr(s, i, j - 1) - add result, by - i = j + len(sub) - # copy the rest: - add result, substr(s, i) - -proc replace*(s: string, sub, by: char): string {.noSideEffect, - rtl, extern: "nsuReplaceChar".} = - ## Replaces `sub` in `s` by the character `by`. - ## - ## Optimized version of `replace <#replace,string,string>`_ for characters. + let subLen = sub.len + if subLen == 0: + result = s + elif subLen == 1: + # when the pattern is a single char, we use a faster + # char-based search that doesn't need a skip table: + let c = sub[0] + let last = s.high + var i = 0 + while true: + let j = find(s, c, i, last) + if j < 0: break + add result, substr(s, i, j - 1) + add result, by + i = j + subLen + # copy the rest: + add result, substr(s, i) + else: + var a = initSkipTable(sub) + let last = s.high + var i = 0 + while true: + let j = find(a, s, sub, i, last) + if j < 0: break + add result, substr(s, i, j - 1) + add result, by + i = j + subLen + # copy the rest: + add result, substr(s, i) + +func replace*(s: string, sub, by: char): string {.rtl, + extern: "nsuReplaceChar".} = + ## Replaces every occurrence of the character `sub` in `s` with the character + ## `by`. + ## + ## Optimized version of `replace <#replace,string,string,string>`_ for + ## characters. + ## + ## See also: + ## * `find func<#find,string,char,Natural,int>`_ + ## * `replaceWord func<#replaceWord,string,string,string>`_ + ## * `multiReplace func<#multiReplace,string,varargs[]>`_ result = newString(s.len) var i = 0 while i < s.len: @@ -917,135 +2250,143 @@ proc replace*(s: string, sub, by: char): string {.noSideEffect, else: result[i] = s[i] inc(i) -proc replaceWord*(s, sub: string, by = ""): string {.noSideEffect, - rtl, extern: "nsuReplaceWord".} = - ## Replaces `sub` in `s` by the string `by`. +func replaceWord*(s, sub: string, by = ""): string {.rtl, + extern: "nsuReplaceWord".} = + ## Replaces every occurrence of the string `sub` in `s` with the string `by`. ## ## Each occurrence of `sub` has to be surrounded by word boundaries - ## (comparable to ``\\w`` in regular expressions), otherwise it is not + ## (comparable to `\b` in regular expressions), otherwise it is not ## replaced. + if sub.len == 0: return s const wordChars = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\128'..'\255'} - var a {.noinit.}: SkipTable result = "" - preprocessSub(sub, a) + var a = initSkipTable(sub) var i = 0 - while true: - var j = findAux(s, sub, i, a) - if j < 0: break - # word boundary? - if (j == 0 or s[j-1] notin wordChars) and - (j+sub.len >= s.len or s[j+sub.len] notin wordChars): - add result, substr(s, i, j - 1) - add result, by - i = j + len(sub) - else: - add result, substr(s, i, j) - i = j + 1 - # copy the rest: - add result, substr(s, i) - -proc delete*(s: var string, first, last: int) {.noSideEffect, - rtl, extern: "nsuDelete".} = - ## Deletes in `s` the characters at position `first` .. `last`. - ## - ## This modifies `s` itself, it does not return a copy. - var i = first - var j = last+1 - var newLen = len(s)-j+i - while i < newLen: - s[i] = s[j] - inc(i) - inc(j) - setLen(s, newLen) - -proc parseOctInt*(s: string): int {.noSideEffect, - rtl, extern: "nsuParseOctInt".} = - ## Parses an octal integer value contained in `s`. - ## - ## If `s` is not a valid integer, `ValueError` is raised. `s` can have one - ## of the following optional prefixes: ``0o``, ``0O``. Underscores within - ## `s` are ignored. + let last = s.high + let sublen = sub.len + if sublen > 0: + while true: + var j = find(a, s, sub, i, last) + if j < 0: break + # word boundary? + if (j == 0 or s[j-1] notin wordChars) and + (j+sub.len >= s.len or s[j+sub.len] notin wordChars): + add result, substr(s, i, j - 1) + add result, by + i = j + sublen + else: + add result, substr(s, i, j) + i = j + 1 + # copy the rest: + add result, substr(s, i) + +func multiReplace*(s: string, replacements: varargs[(string, string)]): string = + ## Same as `replace<#replace,string,string,string>`_, but specialized for + ## doing multiple replacements in a single pass through the input string. + ## + ## `multiReplace` scans the input string from left to right and replaces the + ## matching substrings in the same order as passed in the argument list. + ## + ## The implications of the order of scanning the string and matching the + ## replacements: + ## - In case of multiple matches at a given position, the earliest + ## replacement is applied. + ## - Overlaps are not handled. After performing a replacement, the scan + ## continues from the character after the matched substring. If the + ## resulting string then contains a possible match starting in a newly + ## placed substring, the additional replacement is not performed. + ## + ## If the resulting string is not longer than the original input string, + ## only a single memory allocation is required. + ## + runnableExamples: + # Swapping occurrences of 'a' and 'b': + doAssert multireplace("abba", [("a", "b"), ("b", "a")]) == "baab" + + # The second replacement ("ab") is matched and performed first, the scan then + # continues from 'c', so the "bc" replacement is never matched and thus skipped. + doAssert multireplace("abc", [("bc", "x"), ("ab", "_b")]) == "_bc" + result = newStringOfCap(s.len) var i = 0 - if s[i] == '0' and (s[i+1] == 'o' or s[i+1] == 'O'): inc(i, 2) - while true: - case s[i] - of '_': inc(i) - of '0'..'7': - result = result shl 3 or (ord(s[i]) - ord('0')) + var fastChk: set[char] = {} + for sub, by in replacements.items: + if sub.len > 0: + # Include first character of all replacements + fastChk.incl sub[0] + while i < s.len: + block sIteration: + # Assume most chars in s are not candidates for any replacement operation + if s[i] in fastChk: + for sub, by in replacements.items: + if sub.len > 0 and s.continuesWith(sub, i): + add result, by + inc(i, sub.len) + break sIteration + # No matching replacement found + # copy current character from s + add result, s[i] inc(i) - of '\0': break - else: raise newException(ValueError, "invalid integer: " & s) -proc toOct*(x: BiggestInt, len: int): string {.noSideEffect, - rtl, extern: "nsuToOct".} = - ## Converts `x` into its octal representation. - ## - ## The resulting string is always `len` characters long. No leading ``0o`` - ## prefix is generated. - var - mask: BiggestInt = 7 - shift: BiggestInt = 0 - assert(len > 0) - result = newString(len) - for j in countdown(len-1, 0): - result[j] = chr(int((x and mask) shr shift) + ord('0')) - shift = shift + 3 - mask = mask shl 3 -proc toBin*(x: BiggestInt, len: int): string {.noSideEffect, - rtl, extern: "nsuToBin".} = - ## Converts `x` into its binary representation. - ## - ## The resulting string is always `len` characters long. No leading ``0b`` - ## prefix is generated. - var - mask: BiggestInt = 1 - shift: BiggestInt = 0 - assert(len > 0) - result = newString(len) - for j in countdown(len-1, 0): - result[j] = chr(int((x and mask) shr shift) + ord('0')) - shift = shift + 1 - mask = mask shl 1 -proc insertSep*(s: string, sep = '_', digits = 3): string {.noSideEffect, - rtl, extern: "nsuInsertSep".} = - ## Inserts the separator `sep` after `digits` digits from right to left. +func insertSep*(s: string, sep = '_', digits = 3): string {.rtl, + extern: "nsuInsertSep".} = + ## Inserts the separator `sep` after `digits` characters (default: 3) + ## from right to left. ## ## Even though the algorithm works with any string `s`, it is only useful ## if `s` contains a number. - ## Example: ``insertSep("1000000") == "1_000_000"`` - var L = (s.len-1) div digits + s.len - result = newString(L) + runnableExamples: + doAssert insertSep("1000000") == "1_000_000" + result = newStringOfCap(s.len) + let hasPrefix = isDigit(s[s.low]) == false + var idx: int + if hasPrefix: + result.add s[s.low] + for i in (s.low + 1)..s.high: + idx = i + if not isDigit(s[i]): + result.add s[i] + else: + break + let partsLen = s.len - idx + var L = (partsLen-1) div digits + partsLen + result.setLen(L + idx) var j = 0 dec(L) - for i in countdown(len(s)-1, 0): + for i in countdown(partsLen-1, 0): if j == digits: - result[L] = sep + result[L + idx] = sep dec(L) j = 0 - result[L] = s[i] + result[L + idx] = s[i + idx] inc(j) dec(L) -proc escape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect, - rtl, extern: "nsuEscape".} = +func escape*(s: string, prefix = "\"", suffix = "\""): string {.rtl, + extern: "nsuEscape".} = ## Escapes a string `s`. ## - ## This does these operations (at the same time): - ## * replaces any ``\`` by ``\\`` - ## * replaces any ``'`` by ``\'`` - ## * replaces any ``"`` by ``\"`` - ## * replaces any other character in the set ``{'\0'..'\31', '\128'..'\255'}`` - ## by ``\xHH`` where ``HH`` is its hexadecimal value. - ## The procedure has been designed so that its output is usable for many - ## different common syntaxes. The resulting string is prefixed with - ## `prefix` and suffixed with `suffix`. Both may be empty strings. + ## .. note:: The escaping scheme is different from + ## `system.addEscapedChar`. + ## + ## * replaces `'\0'..'\31'` and `'\127'..'\255'` by `\xHH` where `HH` is its hexadecimal value + ## * replaces ``\`` by `\\` + ## * replaces `'` by `\'` + ## * replaces `"` by `\"` + ## + ## The resulting string is prefixed with `prefix` and suffixed with `suffix`. + ## Both may be empty strings. + ## + ## See also: + ## * `addEscapedChar proc<system.html#addEscapedChar,string,char>`_ + ## * `unescape func<#unescape,string,string,string>`_ for the opposite + ## operation result = newStringOfCap(s.len + s.len shr 2) result.add(prefix) for c in items(s): case c - of '\0'..'\31', '\128'..'\255': + of '\0'..'\31', '\127'..'\255': add(result, "\\x") add(result, toHex(ord(c), 2)) of '\\': add(result, "\\\\") @@ -1054,230 +2395,394 @@ proc escape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect, else: add(result, c) add(result, suffix) -proc unescape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect, - rtl, extern: "nsuUnescape".} = +func unescape*(s: string, prefix = "\"", suffix = "\""): string {.rtl, + extern: "nsuUnescape".} = ## Unescapes a string `s`. ## - ## This complements `escape <#escape>`_ as it performs the opposite - ## operations. + ## This complements `escape func<#escape,string,string,string>`_ + ## as it performs the opposite operations. ## - ## If `s` does not begin with ``prefix`` and end with ``suffix`` a + ## If `s` does not begin with `prefix` and end with `suffix` a ## ValueError exception will be raised. result = newStringOfCap(s.len) - var i = 0 + var i = prefix.len if not s.startsWith(prefix): raise newException(ValueError, - "String does not start with a prefix of: " & prefix) - inc(i) + "String does not start with: " & prefix) while true: - if i == s.len-suffix.len: break - case s[i] - of '\\': + if i >= s.len-suffix.len: break + if s[i] == '\\': + if i+1 >= s.len: + result.add('\\') + break case s[i+1]: of 'x': - inc i - var c: int - i += parseutils.parseHex(s, c, i) + inc i, 2 + var c = 0 + i += parseutils.parseHex(s, c, i, maxLen = 2) result.add(chr(c)) - inc(i, 2) + dec i, 2 of '\\': result.add('\\') of '\'': result.add('\'') of '\"': result.add('\"') - else: result.add("\\" & s[i+1]) - inc(i) - of '\0': break + else: + result.add("\\" & s[i+1]) + inc(i, 2) else: result.add(s[i]) - inc(i) + inc(i) if not s.endsWith(suffix): raise newException(ValueError, - "String does not end with a suffix of: " & suffix) + "String does not end in: " & suffix) -proc validIdentifier*(s: string): bool {.noSideEffect, - rtl, extern: "nsuValidIdentifier".} = +func validIdentifier*(s: string): bool {.rtl, extern: "nsuValidIdentifier".} = ## Returns true if `s` is a valid identifier. ## ## A valid identifier starts with a character of the set `IdentStartChars` ## and is followed by any number of characters of the set `IdentChars`. - if s[0] in IdentStartChars: + runnableExamples: + doAssert "abc_def08".validIdentifier + + if s.len > 0 and s[0] in IdentStartChars: for i in 1..s.len-1: if s[i] notin IdentChars: return false return true -proc editDistance*(a, b: string): int {.noSideEffect, - rtl, extern: "nsuEditDistance".} = - ## Returns the edit distance between `a` and `b`. - ## - ## This uses the `Levenshtein`:idx: distance algorithm with only a linear - ## memory overhead. This implementation is highly optimized! - var len1 = a.len - var len2 = b.len - if len1 > len2: - # make `b` the longer string - return editDistance(b, a) - - # strip common prefix: - var s = 0 - while a[s] == b[s] and a[s] != '\0': - inc(s) - dec(len1) - dec(len2) - # strip common suffix: - while len1 > 0 and len2 > 0 and a[s+len1-1] == b[s+len2-1]: - dec(len1) - dec(len2) - # trivial cases: - if len1 == 0: return len2 - if len2 == 0: return len1 - - # another special case: - if len1 == 1: - for j in s..len2-1: - if a[s] == b[j]: return len2 - 1 - return len2 - - inc(len1) - inc(len2) - var half = len1 shr 1 - # initalize first row: - #var row = cast[ptr array[0..high(int) div 8, int]](alloc(len2*sizeof(int))) - var row: seq[int] - newSeq(row, len2) - var e = s + len2 - 1 # end marker - for i in 1..len2 - half - 1: row[i] = i - row[0] = len1 - half - 1 - for i in 1 .. len1 - 1: - var char1 = a[i + s - 1] - var char2p: int - var D, x: int - var p: int - if i >= len1 - half: - # skip the upper triangle: - var offset = i - len1 + half - char2p = offset - p = offset - var c3 = row[p] + ord(char1 != b[s + char2p]) - inc(p) - inc(char2p) - x = row[p] + 1 - D = x - if x > c3: x = c3 - row[p] = x - inc(p) - else: - p = 1 - char2p = 0 - D = i - x = i - if i <= half + 1: - # skip the lower triangle: - e = len2 + i - half - 2 - # main: - while p <= e: - dec(D) - var c3 = D + ord(char1 != b[char2p + s]) - inc(char2p) - inc(x) - if x > c3: x = c3 - D = row[p] + 1 - if x > D: x = D - row[p] = x - inc(p) - # lower triangle sentinel: - if i <= half: - dec(D) - var c3 = D + ord(char1 != b[char2p + s]) - inc(x) - if x > c3: x = c3 - row[p] = x - result = row[e] - #dealloc(row) - - -# floating point formating: - -proc c_sprintf(buf, frmt: cstring) {.header: "<stdio.h>", importc: "sprintf", - varargs, noSideEffect.} - -type - FloatFormatMode* = enum ## the different modes of floating point formating - ffDefault, ## use the shorter floating point notation - ffDecimal, ## use decimal floating point notation - ffScientific ## use scientific notation (using ``e`` character) -{.deprecated: [TFloatFormat: FloatFormatMode].} +# floating point formatting: +when not defined(js): + func c_snprintf(buf: cstring, n: csize_t, frmt: cstring): cint {.header: "<stdio.h>", + importc: "snprintf", varargs.} -proc formatBiggestFloat*(f: BiggestFloat, format: FloatFormatMode = ffDefault, - precision: range[0..32] = 16): string {. - noSideEffect, rtl, extern: "nsu$1".} = +type + FloatFormatMode* = enum + ## The different modes of floating point formatting. + ffDefault, ## use the shorter floating point notation + ffDecimal, ## use decimal floating point notation + ffScientific ## use scientific notation (using `e` character) + +func formatBiggestFloat*(f: BiggestFloat, format: FloatFormatMode = ffDefault, + precision: range[-1..32] = 16; + decimalSep = '.'): string {.rtl, extern: "nsu$1".} = ## Converts a floating point value `f` to a string. ## - ## If ``format == ffDecimal`` then precision is the number of digits to + ## If `format == ffDecimal` then precision is the number of digits to ## be printed after the decimal point. - ## If ``format == ffScientific`` then precision is the maximum number + ## If `format == ffScientific` then precision is the maximum number ## of significant digits to be printed. ## `precision`'s default value is the maximum number of meaningful digits - ## after the decimal point for Nim's ``biggestFloat`` type. - ## - ## If ``precision == 0``, it tries to format it nicely. - const floatFormatToChar: array[FloatFormatMode, char] = ['g', 'f', 'e'] - var - frmtstr {.noinit.}: array[0..5, char] - buf {.noinit.}: array[0..2500, char] - frmtstr[0] = '%' - if precision > 0: - frmtstr[1] = '#' - frmtstr[2] = '.' - frmtstr[3] = '*' - frmtstr[4] = floatFormatToChar[format] - frmtstr[5] = '\0' - c_sprintf(buf, frmtstr, precision, f) + ## after the decimal point for Nim's `biggestFloat` type. + ## + ## If `precision == -1`, it tries to format it nicely. + runnableExamples: + let x = 123.456 + doAssert x.formatBiggestFloat() == "123.4560000000000" + doAssert x.formatBiggestFloat(ffDecimal, 4) == "123.4560" + doAssert x.formatBiggestFloat(ffScientific, 2) == "1.23e+02" + when nimvm: + discard "implemented in the vmops" else: - frmtstr[1] = floatFormatToChar[format] - frmtstr[2] = '\0' - c_sprintf(buf, frmtstr, f) - result = $buf - -proc formatFloat*(f: float, format: FloatFormatMode = ffDefault, - precision: range[0..32] = 16): string {. - noSideEffect, rtl, extern: "nsu$1".} = + when defined(js): + var precision = precision + if precision == -1: + # use the same default precision as c_snprintf + precision = 6 + var res: cstring + case format + of ffDefault: + {.emit: "`res` = `f`.toString();".} + of ffDecimal: + {.emit: "`res` = `f`.toFixed(`precision`);".} + of ffScientific: + {.emit: "`res` = `f`.toExponential(`precision`);".} + result = $res + if 1.0 / f == -Inf: + # JavaScript removes the "-" from negative Zero, add it back here + result = "-" & $res + for i in 0 ..< result.len: + # Depending on the locale either dot or comma is produced, + # but nothing else is possible: + if result[i] in {'.', ','}: result[i] = decimalSep + else: + const floatFormatToChar: array[FloatFormatMode, char] = ['g', 'f', 'e'] + var + frmtstr {.noinit.}: array[0..5, char] + buf {.noinit.}: array[0..2500, char] + L: cint + frmtstr[0] = '%' + if precision >= 0: + frmtstr[1] = '#' + frmtstr[2] = '.' + frmtstr[3] = '*' + frmtstr[4] = floatFormatToChar[format] + frmtstr[5] = '\0' + L = c_snprintf(cast[cstring](addr buf), csize_t(2501), cast[cstring](addr frmtstr), precision, f) + else: + frmtstr[1] = floatFormatToChar[format] + frmtstr[2] = '\0' + L = c_snprintf(cast[cstring](addr buf), csize_t(2501), cast[cstring](addr frmtstr), f) + result = newString(L) + for i in 0 ..< L: + # Depending on the locale either dot or comma is produced, + # but nothing else is possible: + if buf[i] in {'.', ','}: result[i] = decimalSep + else: result[i] = buf[i] + when defined(windows): + # VS pre 2015 violates the C standard: "The exponent always contains at + # least two digits, and only as many more digits as necessary to + # represent the exponent." [C11 §7.21.6.1] + # The following post-processing fixes this behavior. + if result.len > 4 and result[^4] == '+' and result[^3] == '0': + result[^3] = result[^2] + result[^2] = result[^1] + result.setLen(result.len - 1) + +func formatFloat*(f: float, format: FloatFormatMode = ffDefault, + precision: range[-1..32] = 16; decimalSep = '.'): string {. + rtl, extern: "nsu$1".} = ## Converts a floating point value `f` to a string. ## - ## If ``format == ffDecimal`` then precision is the number of digits to + ## If `format == ffDecimal` then precision is the number of digits to ## be printed after the decimal point. - ## If ``format == ffScientific`` then precision is the maximum number + ## If `format == ffScientific` then precision is the maximum number ## of significant digits to be printed. ## `precision`'s default value is the maximum number of meaningful digits - ## after the decimal point for Nim's ``float`` type. - result = formatBiggestFloat(f, format, precision) - -proc formatSize*(bytes: BiggestInt, decimalSep = '.'): string = - ## Rounds and formats `bytes`. Examples: - ## - ## .. code-block:: nim - ## - ## formatSize(1'i64 shl 31 + 300'i64) == "2.204GB" - ## formatSize(4096) == "4KB" - ## - template frmt(a, b, c: expr): expr = - let bs = $b - insertSep($a) & decimalSep & bs.substr(0, 2) & c - let gigabytes = bytes shr 30 - let megabytes = bytes shr 20 - let kilobytes = bytes shr 10 - if gigabytes != 0: - result = frmt(gigabytes, megabytes, "GB") - elif megabytes != 0: - result = frmt(megabytes, kilobytes, "MB") - elif kilobytes != 0: - result = frmt(kilobytes, bytes, "KB") - else: - result = insertSep($bytes) & "B" + ## after the decimal point for Nim's `float` type. + ## + ## If `precision == -1`, it tries to format it nicely. + runnableExamples: + let x = 123.456 + doAssert x.formatFloat() == "123.4560000000000" + doAssert x.formatFloat(ffDecimal, 4) == "123.4560" + doAssert x.formatFloat(ffScientific, 2) == "1.23e+02" + + result = formatBiggestFloat(f, format, precision, decimalSep) + +func trimZeros*(x: var string; decimalSep = '.') = + ## Trim trailing zeros from a formatted floating point + ## value `x` (must be declared as `var`). + ## + ## This modifies `x` itself, it does not return a copy. + runnableExamples: + var x = "123.456000000" + x.trimZeros() + doAssert x == "123.456" + + let sPos = find(x, decimalSep) + if sPos >= 0: + var last = find(x, 'e', start = sPos) + last = if last >= 0: last - 1 else: high(x) + var pos = last + while pos >= 0 and x[pos] == '0': dec(pos) + if pos > sPos: inc(pos) + if last >= pos: + x.delete(pos..last) -proc findNormalized(x: string, inArray: openArray[string]): int = +type + BinaryPrefixMode* = enum ## The different names for binary prefixes. + bpIEC, # use the IEC/ISO standard prefixes such as kibi + bpColloquial # use the colloquial kilo, mega etc + +func formatSize*(bytes: int64, + decimalSep = '.', + prefix = bpIEC, + includeSpace = false): string = + ## Rounds and formats `bytes`. + ## + ## By default, uses the IEC/ISO standard binary prefixes, so 1024 will be + ## formatted as 1KiB. Set prefix to `bpColloquial` to use the colloquial + ## names from the SI standard (e.g. k for 1000 being reused as 1024). + ## + ## `includeSpace` can be set to true to include the (SI preferred) space + ## between the number and the unit (e.g. 1 KiB). + ## + ## See also: + ## * `strformat module<strformat.html>`_ for string interpolation and formatting + runnableExamples: + doAssert formatSize((1'i64 shl 31) + (300'i64 shl 20)) == "2.293GiB" + doAssert formatSize((2.234*1024*1024).int) == "2.234MiB" + doAssert formatSize(4096, includeSpace = true) == "4 KiB" + doAssert formatSize(4096, prefix = bpColloquial, includeSpace = true) == "4 kB" + doAssert formatSize(4096) == "4KiB" + doAssert formatSize(5_378_934, prefix = bpColloquial, decimalSep = ',') == "5,13MB" + + const iecPrefixes = ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"] + const collPrefixes = ["", "k", "M", "G", "T", "P", "E", "Z", "Y"] + var + xb: int64 = bytes + fbytes: float + lastXb: int64 = bytes + matchedIndex = 0 + prefixes: array[9, string] + if prefix == bpColloquial: + prefixes = collPrefixes + else: + prefixes = iecPrefixes + + # Iterate through prefixes seeing if value will be greater than + # 0 in each case + for index in 1..<prefixes.len: + lastXb = xb + xb = bytes div (1'i64 shl (index*10)) + matchedIndex = index + if xb == 0: + xb = lastXb + matchedIndex = index - 1 + break + # xb has the integer number for the latest value; index should be correct + fbytes = bytes.float / (1'i64 shl (matchedIndex*10)).float + result = formatFloat(fbytes, format = ffDecimal, precision = 3, + decimalSep = decimalSep) + result.trimZeros(decimalSep) + if includeSpace: + result &= " " + result &= prefixes[matchedIndex] + result &= "B" + +func formatEng*(f: BiggestFloat, + precision: range[0..32] = 10, + trim: bool = true, + siPrefix: bool = false, + unit: string = "", + decimalSep = '.', + useUnitSpace = false): string = + ## Converts a floating point value `f` to a string using engineering notation. + ## + ## Numbers in of the range -1000.0<f<1000.0 will be formatted without an + ## exponent. Numbers outside of this range will be formatted as a + ## significand in the range -1000.0<f<1000.0 and an exponent that will always + ## be an integer multiple of 3, corresponding with the SI prefix scale k, M, + ## G, T etc for numbers with an absolute value greater than 1 and m, μ, n, p + ## etc for numbers with an absolute value less than 1. + ## + ## The default configuration (`trim=true` and `precision=10`) shows the + ## **shortest** form that precisely (up to a maximum of 10 decimal places) + ## displays the value. For example, 4.100000 will be displayed as 4.1 (which + ## is mathematically identical) whereas 4.1000003 will be displayed as + ## 4.1000003. + ## + ## If `trim` is set to true, trailing zeros will be removed; if false, the + ## number of digits specified by `precision` will always be shown. + ## + ## `precision` can be used to set the number of digits to be shown after the + ## decimal point or (if `trim` is true) the maximum number of digits to be + ## shown. + ## + ## ```nim + ## formatEng(0, 2, trim=false) == "0.00" + ## formatEng(0, 2) == "0" + ## formatEng(0.053, 0) == "53e-3" + ## formatEng(52731234, 2) == "52.73e6" + ## formatEng(-52731234, 2) == "-52.73e6" + ## ``` + ## + ## If `siPrefix` is set to true, the number will be displayed with the SI + ## prefix corresponding to the exponent. For example 4100 will be displayed + ## as "4.1 k" instead of "4.1e3". Note that `u` is used for micro- in place + ## of the greek letter mu (μ) as per ISO 2955. Numbers with an absolute + ## value outside of the range 1e-18<f<1000e18 (1a<f<1000E) will be displayed + ## with an exponent rather than an SI prefix, regardless of whether + ## `siPrefix` is true. + ## + ## If `useUnitSpace` is true, the provided unit will be appended to the string + ## (with a space as required by the SI standard). This behaviour is slightly + ## different to appending the unit to the result as the location of the space + ## is altered depending on whether there is an exponent. + ## + ## ```nim + ## formatEng(4100, siPrefix=true, unit="V") == "4.1 kV" + ## formatEng(4.1, siPrefix=true, unit="V") == "4.1 V" + ## formatEng(4.1, siPrefix=true) == "4.1" # Note lack of space + ## formatEng(4100, siPrefix=true) == "4.1 k" + ## formatEng(4.1, siPrefix=true, unit="") == "4.1 " # Space with unit="" + ## formatEng(4100, siPrefix=true, unit="") == "4.1 k" + ## formatEng(4100) == "4.1e3" + ## formatEng(4100, unit="V") == "4.1e3 V" + ## formatEng(4100, unit="", useUnitSpace=true) == "4.1e3 " # Space with useUnitSpace=true + ## ``` + ## + ## `decimalSep` is used as the decimal separator. + ## + ## See also: + ## * `strformat module<strformat.html>`_ for string interpolation and formatting + var + absolute: BiggestFloat + significand: BiggestFloat + fexponent: BiggestFloat + exponent: int + splitResult: seq[string] + suffix: string = "" + func getPrefix(exp: int): char = + ## Get the SI prefix for a given exponent + ## + ## Assumes exponent is a multiple of 3; returns ' ' if no prefix found + const siPrefixes = ['a', 'f', 'p', 'n', 'u', 'm', ' ', 'k', 'M', 'G', 'T', + 'P', 'E'] + var index: int = (exp div 3) + 6 + result = ' ' + if index in low(siPrefixes)..high(siPrefixes): + result = siPrefixes[index] + + # Most of the work is done with the sign ignored, so get the absolute value + absolute = abs(f) + significand = f + + if absolute == 0.0: + # Simple case: just format it and force the exponent to 0 + exponent = 0 + result = significand.formatBiggestFloat(ffDecimal, precision, + decimalSep = '.') + else: + # Find the best exponent that's a multiple of 3 + fexponent = floor(log10(absolute)) + fexponent = 3.0 * floor(fexponent / 3.0) + # Adjust the significand for the new exponent + significand /= pow(10.0, fexponent) + + # Adjust the significand and check whether it has affected + # the exponent + absolute = abs(significand) + if absolute >= 1000.0: + significand *= 0.001 + fexponent += 3 + # Components of the result: + result = significand.formatBiggestFloat(ffDecimal, precision, + decimalSep = '.') + exponent = fexponent.int() + + splitResult = result.split('.') + result = splitResult[0] + # result should have at most one decimal character + if splitResult.len() > 1: + # If trim is set, we get rid of trailing zeros. Don't use trimZeros here as + # we can be a bit more efficient through knowledge that there will never be + # an exponent in this part. + if trim: + while splitResult[1].endsWith("0"): + # Trim last character + splitResult[1].setLen(splitResult[1].len-1) + if splitResult[1].len() > 0: + result &= decimalSep & splitResult[1] + else: + result &= decimalSep & splitResult[1] + + # Combine the results accordingly + if siPrefix and exponent != 0: + var p = getPrefix(exponent) + if p != ' ': + suffix = " " & p + exponent = 0 # Exponent replaced by SI prefix + if suffix == "" and useUnitSpace: + suffix = " " + suffix &= unit + if exponent != 0: + result &= "e" & $exponent + result &= suffix + +func findNormalized(x: string, inArray: openArray[string]): int = var i = 0 while i < high(inArray): if cmpIgnoreStyle(x, inArray[i]) == 0: return i @@ -1285,21 +2790,20 @@ proc findNormalized(x: string, inArray: openArray[string]): int = # security hole... return -1 -proc invalidFormatString() {.noinline.} = - raise newException(ValueError, "invalid format string") +func invalidFormatString(formatstr: string) {.noinline.} = + raise newException(ValueError, "invalid format string: " & formatstr) -proc addf*(s: var string, formatstr: string, a: varargs[string, `$`]) {. - noSideEffect, rtl, extern: "nsuAddf".} = - ## The same as ``add(s, formatstr % a)``, but more efficient. +func addf*(s: var string, formatstr: string, a: varargs[string, `$`]) {.rtl, + extern: "nsuAddf".} = + ## The same as `add(s, formatstr % a)`, but more efficient. const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '\128'..'\255', '_'} var i = 0 var num = 0 while i < len(formatstr): - if formatstr[i] == '$': - case formatstr[i+1] # again we use the fact that strings - # are zero-terminated here + if formatstr[i] == '$' and i+1 < len(formatstr): + case formatstr[i+1] of '#': - if num >% a.high: invalidFormatString() + if num > a.high: invalidFormatString(formatstr) add s, a[num] inc i, 2 inc num @@ -1311,34 +2815,49 @@ proc addf*(s: var string, formatstr: string, a: varargs[string, `$`]) {. inc(i) # skip $ var negative = formatstr[i] == '-' if negative: inc i - while formatstr[i] in Digits: + while i < formatstr.len and formatstr[i] in Digits: j = j * 10 + ord(formatstr[i]) - ord('0') inc(i) let idx = if not negative: j-1 else: a.len-j - if idx >% a.high: invalidFormatString() + if idx < 0 or idx > a.high: invalidFormatString(formatstr) add s, a[idx] of '{': - var j = i+1 - while formatstr[j] notin {'\0', '}'}: inc(j) - var x = findNormalized(substr(formatstr, i+2, j-1), a) - if x >= 0 and x < high(a): add s, a[x+1] - else: invalidFormatString() + var j = i+2 + var k = 0 + var negative = formatstr[j] == '-' + if negative: inc j + var isNumber = 0 + while j < formatstr.len and formatstr[j] notin {'\0', '}'}: + if formatstr[j] in Digits: + k = k * 10 + ord(formatstr[j]) - ord('0') + if isNumber == 0: isNumber = 1 + else: + isNumber = -1 + inc(j) + if isNumber == 1: + let idx = if not negative: k-1 else: a.len-k + if idx < 0 or idx > a.high: invalidFormatString(formatstr) + add s, a[idx] + else: + var x = findNormalized(substr(formatstr, i+2, j-1), a) + if x >= 0 and x < high(a): add s, a[x+1] + else: invalidFormatString(formatstr) i = j+1 of 'a'..'z', 'A'..'Z', '\128'..'\255', '_': var j = i+1 - while formatstr[j] in PatternChars: inc(j) + while j < formatstr.len and formatstr[j] in PatternChars: inc(j) var x = findNormalized(substr(formatstr, i+1, j-1), a) if x >= 0 and x < high(a): add s, a[x+1] - else: invalidFormatString() + else: invalidFormatString(formatstr) i = j else: - invalidFormatString() + invalidFormatString(formatstr) else: add s, formatstr[i] inc(i) -proc `%` *(formatstr: string, a: openArray[string]): string {.noSideEffect, - rtl, extern: "nsuFormatOpenArray".} = +func `%`*(formatstr: string, a: openArray[string]): string {.rtl, + extern: "nsuFormatOpenArray".} = ## Interpolates a format string with the values from `a`. ## ## The `substitution`:idx: operator performs string substitutions in @@ -1347,82 +2866,167 @@ proc `%` *(formatstr: string, a: openArray[string]): string {.noSideEffect, ## ## This is best explained by an example: ## - ## .. code-block:: nim + ## ```nim ## "$1 eats $2." % ["The cat", "fish"] + ## ``` ## ## Results in: ## - ## .. code-block:: nim + ## ```nim ## "The cat eats fish." + ## ``` ## - ## The substitution variables (the thing after the ``$``) are enumerated - ## from 1 to ``a.len``. - ## To produce a verbatim ``$``, use ``$$``. - ## The notation ``$#`` can be used to refer to the next substitution + ## The substitution variables (the thing after the `$`) are enumerated + ## from 1 to `a.len`. + ## To produce a verbatim `$`, use `$$`. + ## The notation `$#` can be used to refer to the next substitution ## variable: ## - ## .. code-block:: nim + ## ```nim ## "$# eats $#." % ["The cat", "fish"] + ## ``` ## ## Substitution variables can also be words (that is - ## ``[A-Za-z_]+[A-Za-z0-9_]*``) in which case the arguments in `a` with even + ## `[A-Za-z_]+[A-Za-z0-9_]*`) in which case the arguments in `a` with even ## indices are keys and with odd indices are the corresponding values. ## An example: ## - ## .. code-block:: nim + ## ```nim ## "$animal eats $food." % ["animal", "The cat", "food", "fish"] + ## ``` ## ## Results in: ## - ## .. code-block:: nim + ## ```nim ## "The cat eats fish." + ## ``` ## ## The variables are compared with `cmpIgnoreStyle`. `ValueError` is ## raised if an ill-formed format string has been passed to the `%` operator. + ## + ## See also: + ## * `strformat module<strformat.html>`_ for string interpolation and formatting result = newStringOfCap(formatstr.len + a.len shl 4) addf(result, formatstr, a) -proc `%` *(formatstr, a: string): string {.noSideEffect, - rtl, extern: "nsuFormatSingleElem".} = - ## This is the same as ``formatstr % [a]``. +func `%`*(formatstr, a: string): string {.rtl, + extern: "nsuFormatSingleElem".} = + ## This is the same as `formatstr % [a]` (see + ## `% func<#%25,string,openArray[string]>`_). result = newStringOfCap(formatstr.len + a.len) addf(result, formatstr, [a]) -proc format*(formatstr: string, a: varargs[string, `$`]): string {.noSideEffect, - rtl, extern: "nsuFormatVarargs".} = - ## This is the same as ``formatstr % a`` except that it supports +func format*(formatstr: string, a: varargs[string, `$`]): string {.rtl, + extern: "nsuFormatVarargs".} = + ## This is the same as `formatstr % a` (see + ## `% func<#%25,string,openArray[string]>`_) except that it supports ## auto stringification. + ## + ## See also: + ## * `strformat module<strformat.html>`_ for string interpolation and formatting result = newStringOfCap(formatstr.len + a.len) addf(result, formatstr, a) -{.pop.} -when isMainModule: - doAssert align("abc", 4) == " abc" - doAssert align("a", 0) == "a" - doAssert align("1232", 6) == " 1232" - doAssert align("1232", 6, '#') == "##1232" - echo wordWrap(""" this is a long text -- muchlongerthan10chars and here - it goes""", 10, false) - doAssert formatBiggestFloat(0.00000000001, ffDecimal, 11) == "0.00000000001" - doAssert formatBiggestFloat(0.00000000001, ffScientific, 1) == "1.0e-11" - - doAssert "$# $3 $# $#" % ["a", "b", "c"] == "a c b c" - echo formatSize(1'i64 shl 31 + 300'i64) # == "4,GB" - echo formatSize(1'i64 shl 31) - - doAssert "$animal eats $food." % ["animal", "The cat", "food", "fish"] == - "The cat eats fish." - - doAssert "-ld a-ldz -ld".replaceWord("-ld") == " a-ldz " - doAssert "-lda-ldz -ld abc".replaceWord("-ld") == "-lda-ldz abc" - - type MyEnum = enum enA, enB, enC, enuD, enE - doAssert parseEnum[MyEnum]("enu_D") == enuD - - doAssert parseEnum("invalid enum value", enC) == enC - - doAssert count("foofoofoo", "foofoo") == 1 - doAssert count("foofoofoo", "foofoo", overlapping = true) == 2 - doAssert count("foofoofoo", 'f') == 3 - doAssert count("foofoofoobar", {'f','b'}) == 4 +func strip*(s: string, leading = true, trailing = true, + chars: set[char] = Whitespace): string {.rtl, extern: "nsuStrip".} = + ## Strips leading or trailing `chars` (default: whitespace characters) + ## from `s` and returns the resulting string. + ## + ## If `leading` is true (default), leading `chars` are stripped. + ## If `trailing` is true (default), trailing `chars` are stripped. + ## If both are false, the string is returned unchanged. + ## + ## See also: + ## * `strip proc<strbasics.html#strip,string,set[char]>`_ Inplace version. + ## * `stripLineEnd func<#stripLineEnd,string>`_ + runnableExamples: + let a = " vhellov " + let b = strip(a) + doAssert b == "vhellov" + + doAssert a.strip(leading = false) == " vhellov" + doAssert a.strip(trailing = false) == "vhellov " + + doAssert b.strip(chars = {'v'}) == "hello" + doAssert b.strip(leading = false, chars = {'v'}) == "vhello" + + let c = "blaXbla" + doAssert c.strip(chars = {'b', 'a'}) == "laXbl" + doAssert c.strip(chars = {'b', 'a', 'l'}) == "X" + + var + first = 0 + last = len(s)-1 + if leading: + while first <= last and s[first] in chars: inc(first) + if trailing: + while last >= first and s[last] in chars: dec(last) + result = substr(s, first, last) + +func stripLineEnd*(s: var string) = + ## Strips one of these suffixes from `s` in-place: + ## `\r, \n, \r\n, \f, \v` (at most once instance). + ## For example, can be useful in conjunction with `osproc.execCmdEx`. + ## aka: `chomp`:idx: + runnableExamples: + var s = "foo\n\n" + s.stripLineEnd + doAssert s == "foo\n" + s = "foo\r\n" + s.stripLineEnd + doAssert s == "foo" + + if s.len > 0: + case s[^1] + of '\n': + if s.len > 1 and s[^2] == '\r': + s.setLen s.len-2 + else: + s.setLen s.len-1 + of '\r', '\v', '\f': + s.setLen s.len-1 + else: + discard + + +iterator tokenize*(s: string, seps: set[char] = Whitespace): tuple[ + token: string, isSep: bool] = + ## Tokenizes the string `s` into substrings. + ## + ## Substrings are separated by a substring containing only `seps`. + ## Example: + ## + ## ```nim + ## for word in tokenize(" this is an example "): + ## writeLine(stdout, word) + ## ``` + ## + ## Results in: + ## + ## ```nim + ## (" ", true) + ## ("this", false) + ## (" ", true) + ## ("is", false) + ## (" ", true) + ## ("an", false) + ## (" ", true) + ## ("example", false) + ## (" ", true) + ## ``` + var i = 0 + while true: + var j = i + var isSep = j < s.len and s[j] in seps + while j < s.len and (s[j] in seps) == isSep: inc(j) + if j > i: + yield (substr(s, i, j-1), isSep) + else: + break + i = j + +func isEmptyOrWhitespace*(s: string): bool {.rtl, + extern: "nsuIsEmptyOrWhitespace".} = + ## Checks if `s` is empty or consists entirely of whitespace characters. + result = s.allCharsInSet(Whitespace) diff --git a/lib/pure/subexes.nim b/lib/pure/subexes.nim deleted file mode 100644 index d701b85b1..000000000 --- a/lib/pure/subexes.nim +++ /dev/null @@ -1,401 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2012 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## Nim support for `substitution expressions`:idx: (`subex`:idx:). -## -## .. include:: ../doc/subexes.txt -## - -{.push debugger:off .} # the user does not want to trace a part - # of the standard library! - -from strutils import parseInt, cmpIgnoreStyle, Digits -include "system/inclrtl" - - -proc findNormalized(x: string, inArray: openarray[string]): int = - var i = 0 - while i < high(inArray): - if cmpIgnoreStyle(x, inArray[i]) == 0: return i - inc(i, 2) # incrementing by 1 would probably lead to a - # security hole... - return -1 - -type - SubexError* = object of ValueError ## exception that is raised for - ## an invalid subex - -{.deprecated: [EInvalidSubex: SubexError].} - -proc raiseInvalidFormat(msg: string) {.noinline.} = - raise newException(SubexError, "invalid format string: " & msg) - -type - TFormatParser = object {.pure, final.} - when defined(js): - f: string # we rely on the '\0' terminator - # which JS's native string doesn't have - else: - f: cstring - num, i, lineLen: int - -template call(x: stmt) {.immediate.} = - p.i = i - x - i = p.i - -template callNoLineLenTracking(x: stmt) {.immediate.} = - let oldLineLen = p.lineLen - p.i = i - x - i = p.i - p.lineLen = oldLineLen - -proc getFormatArg(p: var TFormatParser, a: openArray[string]): int = - const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '\128'..'\255', '_'} - var i = p.i - var f = p.f - case f[i] - of '#': - result = p.num - inc i - inc p.num - of '1'..'9', '-': - var j = 0 - var negative = f[i] == '-' - if negative: inc i - while f[i] in Digits: - j = j * 10 + ord(f[i]) - ord('0') - inc i - result = if not negative: j-1 else: a.len-j - of 'a'..'z', 'A'..'Z', '\128'..'\255', '_': - var name = "" - while f[i] in PatternChars: - name.add(f[i]) - inc(i) - result = findNormalized(name, a)+1 - of '$': - inc(i) - call: - result = getFormatArg(p, a) - result = parseInt(a[result])-1 - else: - raiseInvalidFormat("'#', '$', number or identifier expected") - if result >=% a.len: raiseInvalidFormat("index out of bounds: " & $result) - p.i = i - -proc scanDollar(p: var TFormatParser, a: openarray[string], s: var string) {. - noSideEffect.} - -proc emitChar(p: var TFormatParser, x: var string, ch: char) {.inline.} = - x.add(ch) - if ch == '\L': p.lineLen = 0 - else: inc p.lineLen - -proc emitStrLinear(p: var TFormatParser, x: var string, y: string) {.inline.} = - for ch in items(y): emitChar(p, x, ch) - -proc emitStr(p: var TFormatParser, x: var string, y: string) {.inline.} = - x.add(y) - inc p.lineLen, y.len - -proc scanQuote(p: var TFormatParser, x: var string, toAdd: bool) = - var i = p.i+1 - var f = p.f - while true: - if f[i] == '\'': - inc i - if f[i] != '\'': break - inc i - if toAdd: emitChar(p, x, '\'') - elif f[i] == '\0': raiseInvalidFormat("closing \"'\" expected") - else: - if toAdd: emitChar(p, x, f[i]) - inc i - p.i = i - -proc scanBranch(p: var TFormatParser, a: openArray[string], - x: var string, choice: int) = - var i = p.i - var f = p.f - var c = 0 - var elsePart = i - var toAdd = choice == 0 - while true: - case f[i] - of ']': break - of '|': - inc i - elsePart = i - inc c - if toAdd: break - toAdd = choice == c - of '\'': - call: scanQuote(p, x, toAdd) - of '\0': raiseInvalidFormat("closing ']' expected") - else: - if toAdd: - if f[i] == '$': - inc i - call: scanDollar(p, a, x) - else: - emitChar(p, x, f[i]) - inc i - else: - inc i - if not toAdd and choice >= 0: - # evaluate 'else' part: - var last = i - i = elsePart - while true: - case f[i] - of '|', ']': break - of '\'': - call: scanQuote(p, x, true) - of '$': - inc i - call: scanDollar(p, a, x) - else: - emitChar(p, x, f[i]) - inc i - i = last - p.i = i+1 - -proc scanSlice(p: var TFormatParser, a: openarray[string]): tuple[x, y: int] = - var slice = false - var i = p.i - var f = p.f - - if f[i] == '{': inc i - else: raiseInvalidFormat("'{' expected") - if f[i] == '.' and f[i+1] == '.': - inc i, 2 - slice = true - else: - call: result.x = getFormatArg(p, a) - if f[i] == '.' and f[i+1] == '.': - inc i, 2 - slice = true - if slice: - if f[i] != '}': - call: result.y = getFormatArg(p, a) - else: - result.y = high(a) - else: - result.y = result.x - if f[i] != '}': raiseInvalidFormat("'}' expected") - inc i - p.i = i - -proc scanDollar(p: var TFormatParser, a: openarray[string], s: var string) = - var i = p.i - var f = p.f - case f[i] - of '$': - emitChar p, s, '$' - inc i - of '*': - for j in 0..a.high: emitStr p, s, a[j] - inc i - of '{': - call: - let (x, y) = scanSlice(p, a) - for j in x..y: emitStr p, s, a[j] - of '[': - inc i - var start = i - call: scanBranch(p, a, s, -1) - var x: int - if f[i] == '{': - inc i - call: x = getFormatArg(p, a) - if f[i] != '}': raiseInvalidFormat("'}' expected") - inc i - else: - call: x = getFormatArg(p, a) - var last = i - let choice = parseInt(a[x]) - i = start - call: scanBranch(p, a, s, choice) - i = last - of '\'': - var sep = "" - callNoLineLenTracking: scanQuote(p, sep, true) - if f[i] == '~': - # $' '~{1..3} - # insert space followed by 1..3 if not empty - inc i - call: - let (x, y) = scanSlice(p, a) - var L = 0 - for j in x..y: inc L, a[j].len - if L > 0: - emitStrLinear p, s, sep - for j in x..y: emitStr p, s, a[j] - else: - block StringJoin: - block OptionalLineLengthSpecifier: - var maxLen = 0 - case f[i] - of '0'..'9': - while f[i] in Digits: - maxLen = maxLen * 10 + ord(f[i]) - ord('0') - inc i - of '$': - # do not skip the '$' here for `getFormatArg`! - call: - maxLen = getFormatArg(p, a) - else: break OptionalLineLengthSpecifier - var indent = "" - case f[i] - of 'i': - inc i - callNoLineLenTracking: scanQuote(p, indent, true) - - call: - let (x, y) = scanSlice(p, a) - if maxLen < 1: emitStrLinear(p, s, indent) - var items = 1 - emitStr p, s, a[x] - for j in x+1..y: - emitStr p, s, sep - if items >= maxLen: - emitStrLinear p, s, indent - items = 0 - emitStr p, s, a[j] - inc items - of 'c': - inc i - callNoLineLenTracking: scanQuote(p, indent, true) - - call: - let (x, y) = scanSlice(p, a) - if p.lineLen + a[x].len > maxLen: emitStrLinear(p, s, indent) - emitStr p, s, a[x] - for j in x+1..y: - emitStr p, s, sep - if p.lineLen + a[j].len > maxLen: emitStrLinear(p, s, indent) - emitStr p, s, a[j] - - else: raiseInvalidFormat("unit 'c' (chars) or 'i' (items) expected") - break StringJoin - - call: - let (x, y) = scanSlice(p, a) - emitStr p, s, a[x] - for j in x+1..y: - emitStr p, s, sep - emitStr p, s, a[j] - else: - call: - var x = getFormatArg(p, a) - emitStr p, s, a[x] - p.i = i - - -type - Subex* = distinct string ## string that contains a substitution expression - -{.deprecated: [TSubex: Subex].} - -proc subex*(s: string): Subex = - ## constructs a *substitution expression* from `s`. Currently this performs - ## no syntax checking but this may change in later versions. - result = Subex(s) - -proc addf*(s: var string, formatstr: Subex, a: varargs[string, `$`]) {. - noSideEffect, rtl, extern: "nfrmtAddf".} = - ## The same as ``add(s, formatstr % a)``, but more efficient. - var p: TFormatParser - p.f = formatstr.string - var i = 0 - while i < len(formatstr.string): - if p.f[i] == '$': - inc i - call: scanDollar(p, a, s) - else: - emitChar(p, s, p.f[i]) - inc(i) - -proc `%` *(formatstr: Subex, a: openarray[string]): string {.noSideEffect, - rtl, extern: "nfrmtFormatOpenArray".} = - ## The `substitution`:idx: operator performs string substitutions in - ## `formatstr` and returns a modified `formatstr`. This is often called - ## `string interpolation`:idx:. - ## - result = newStringOfCap(formatstr.string.len + a.len shl 4) - addf(result, formatstr, a) - -proc `%` *(formatstr: Subex, a: string): string {.noSideEffect, - rtl, extern: "nfrmtFormatSingleElem".} = - ## This is the same as ``formatstr % [a]``. - result = newStringOfCap(formatstr.string.len + a.len) - addf(result, formatstr, [a]) - -proc format*(formatstr: Subex, a: varargs[string, `$`]): string {.noSideEffect, - rtl, extern: "nfrmtFormatVarargs".} = - ## The `substitution`:idx: operator performs string substitutions in - ## `formatstr` and returns a modified `formatstr`. This is often called - ## `string interpolation`:idx:. - ## - result = newStringOfCap(formatstr.string.len + a.len shl 4) - addf(result, formatstr, a) - -{.pop.} - -when isMainModule: - - proc `%`(formatstr: string, a: openarray[string]): string = - result = newStringOfCap(formatstr.len + a.len shl 4) - addf(result, formatstr.Subex, a) - - proc `%`(formatstr: string, a: string): string = - result = newStringOfCap(formatstr.len + a.len) - addf(result, formatstr.Subex, [a]) - - - doAssert "$# $3 $# $#" % ["a", "b", "c"] == "a c b c" - doAssert "$animal eats $food." % ["animal", "The cat", "food", "fish"] == - "The cat eats fish." - - - doAssert "$[abc|def]# $3 $# $#" % ["17", "b", "c"] == "def c b c" - doAssert "$[abc|def]# $3 $# $#" % ["1", "b", "c"] == "def c b c" - doAssert "$[abc|def]# $3 $# $#" % ["0", "b", "c"] == "abc c b c" - doAssert "$[abc|def|]# $3 $# $#" % ["17", "b", "c"] == " c b c" - - doAssert "$[abc|def|]# $3 $# $#" % ["-9", "b", "c"] == " c b c" - doAssert "$1($', '{2..})" % ["f", "a", "b"] == "f(a, b)" - - doAssert "$[$1($', '{2..})|''''|fg'$3']1" % ["7", "a", "b"] == "fg$3" - - doAssert "$[$#($', '{#..})|''''|$3]1" % ["0", "a", "b"] == "0(a, b)" - doAssert "$' '~{..}" % "" == "" - doAssert "$' '~{..}" % "P0" == " P0" - doAssert "${$1}" % "1" == "1" - doAssert "${$$-1} $$1" % "1" == "1 $1" - - doAssert "$#($', '10c'\n '{#..})" % ["doAssert", "longishA", "longish"] == - """doAssert( - longishA, - longish)""" - - echo "type TMyEnum* = enum\n $', '2i'\n '{..}" % ["fieldA", - "fieldB", "FiledClkad", "fieldD", "fieldE", "longishFieldName"] - - doAssert subex"$1($', '{2..})" % ["f", "a", "b", "c"] == "f(a, b, c)" - - doAssert subex"$1 $[files|file|files]{1} copied" % ["1"] == "1 file copied" - - doAssert subex"$['''|'|''''|']']#" % "0" == "'|" - - echo subex("type\n TEnum = enum\n $', '40c'\n '{..}") % [ - "fieldNameA", "fieldNameB", "fieldNameC", "fieldNameD"] - - diff --git a/lib/pure/sugar.nim b/lib/pure/sugar.nim new file mode 100644 index 000000000..90ba20c13 --- /dev/null +++ b/lib/pure/sugar.nim @@ -0,0 +1,429 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2015 Dominik Picheta +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements nice syntactic sugar based on Nim's +## macro system. + +import std/private/since +import std/macros + +proc checkPragma(ex, prag: var NimNode) = + since (1, 3): + if ex.kind == nnkPragmaExpr: + prag = ex[1] + ex = ex[0] + +proc createProcType(p, b: NimNode): NimNode = + result = newNimNode(nnkProcTy) + var + formalParams = newNimNode(nnkFormalParams).add(b) + p = p + prag = newEmptyNode() + + checkPragma(p, prag) + + case p.kind + of nnkPar, nnkTupleConstr: + for i in 0 ..< p.len: + let ident = p[i] + var identDefs = newNimNode(nnkIdentDefs) + case ident.kind + of nnkExprColonExpr: + identDefs.add ident[0] + identDefs.add ident[1] + else: + identDefs.add newIdentNode("i" & $i) + identDefs.add(ident) + identDefs.add newEmptyNode() + formalParams.add identDefs + else: + var identDefs = newNimNode(nnkIdentDefs) + identDefs.add newIdentNode("i0") + identDefs.add(p) + identDefs.add newEmptyNode() + formalParams.add identDefs + + result.add formalParams + result.add prag + +macro `=>`*(p, b: untyped): untyped = + ## Syntax sugar for anonymous procedures. It also supports pragmas. + ## + ## .. warning:: Semicolons can not be used to separate procedure arguments. + runnableExamples: + proc passTwoAndTwo(f: (int, int) -> int): int = f(2, 2) + + assert passTwoAndTwo((x, y) => x + y) == 4 + + type + Bot = object + call: (string {.noSideEffect.} -> string) + + var myBot = Bot() + + myBot.call = (name: string) {.noSideEffect.} => "Hello " & name & ", I'm a bot." + assert myBot.call("John") == "Hello John, I'm a bot." + + let f = () => (discard) # simplest proc that returns void + f() + + var + params = @[ident"auto"] + name = newEmptyNode() + kind = nnkLambda + pragma = newEmptyNode() + p = p + + checkPragma(p, pragma) + + if p.kind == nnkInfix and p[0].kind == nnkIdent and p[0].eqIdent"->": + params[0] = p[2] + p = p[1] + + checkPragma(p, pragma) # check again after -> transform + + case p.kind + of nnkPar, nnkTupleConstr: + var untypedBeforeColon = 0 + for i, c in p: + var identDefs = newNimNode(nnkIdentDefs) + case c.kind + of nnkExprColonExpr: + let t = c[1] + since (1, 3): + # + 1 here because of return type in params + for j in (i - untypedBeforeColon + 1) .. i: + params[j][1] = t + untypedBeforeColon = 0 + identDefs.add(c[0]) + identDefs.add(t) + identDefs.add(newEmptyNode()) + of nnkIdent: + identDefs.add(c) + identDefs.add(newIdentNode("auto")) + identDefs.add(newEmptyNode()) + inc untypedBeforeColon + of nnkInfix: + if c[0].kind == nnkIdent and c[0].eqIdent"->": + var procTy = createProcType(c[1], c[2]) + params[0] = procTy[0][0] + for i in 1 ..< procTy[0].len: + params.add(procTy[0][i]) + else: + error("Expected proc type (->) got (" & c[0].strVal & ").", c) + break + else: + error("Incorrect procedure parameter.", c) + params.add(identDefs) + of nnkIdent, nnkOpenSymChoice, nnkClosedSymChoice, nnkSym: + var identDefs = newNimNode(nnkIdentDefs) + identDefs.add(ident $p) + identDefs.add(ident"auto") + identDefs.add(newEmptyNode()) + params.add(identDefs) + else: + error("Incorrect procedure parameter list.", p) + result = newProc(body = b, params = params, + pragmas = pragma, name = name, + procType = kind) + +macro `->`*(p, b: untyped): untyped = + ## Syntax sugar for procedure types. It also supports pragmas. + ## + ## .. warning:: Semicolons can not be used to separate procedure arguments. + runnableExamples: + proc passTwoAndTwo(f: (int, int) -> int): int = f(2, 2) + # is the same as: + # proc passTwoAndTwo(f: proc (x, y: int): int): int = f(2, 2) + + assert passTwoAndTwo((x, y) => x + y) == 4 + + proc passOne(f: (int {.noSideEffect.} -> int)): int = f(1) + # is the same as: + # proc passOne(f: proc (x: int): int {.noSideEffect.}): int = f(1) + + assert passOne(x {.noSideEffect.} => x + 1) == 2 + + result = createProcType(p, b) + +macro dump*(x: untyped): untyped = + ## Dumps the content of an expression, useful for debugging. + ## It accepts any expression and prints a textual representation + ## of the tree representing the expression - as it would appear in + ## source code - together with the value of the expression. + ## + ## See also: `dumpToString` which is more convenient and useful since + ## it expands intermediate templates/macros, returns a string instead of + ## calling `echo`, and works with statements and expressions. + runnableExamples("-r:off"): + let + x = 10 + y = 20 + dump(x + y) # prints: `x + y = 30` + + let s = x.toStrLit + result = quote do: + debugEcho `s`, " = ", `x` + +macro dumpToStringImpl(s: static string, x: typed): string = + let s2 = x.toStrLit + if x.typeKind == ntyVoid: + result = quote do: + `s` & ": " & `s2` + else: + result = quote do: + `s` & ": " & `s2` & " = " & $`x` + +macro dumpToString*(x: untyped): string = + ## Returns the content of a statement or expression `x` after semantic analysis, + ## useful for debugging. + runnableExamples: + const a = 1 + let x = 10 + assert dumpToString(a + 2) == "a + 2: 3 = 3" + assert dumpToString(a + x) == "a + x: 1 + x = 11" + template square(x): untyped = x * x + assert dumpToString(square(x)) == "square(x): x * x = 100" + assert not compiles dumpToString(1 + nonexistent) + import std/strutils + assert "failedAssertImpl" in dumpToString(assert true) # example with a statement + result = newCall(bindSym"dumpToStringImpl") + result.add newLit repr(x) + result.add x + +# TODO: consider exporting this in macros.nim +proc freshIdentNodes(ast: NimNode): NimNode = + # Replace NimIdent and NimSym by a fresh ident node + # see also https://github.com/nim-lang/Nim/pull/8531#issuecomment-410436458 + proc inspect(node: NimNode): NimNode = + case node.kind: + of nnkIdent, nnkSym, nnkOpenSymChoice, nnkClosedSymChoice, nnkOpenSym: + result = ident($node) + of nnkEmpty, nnkLiterals: + result = node + else: + result = node.kind.newTree() + for child in node: + result.add inspect(child) + result = inspect(ast) + +macro capture*(locals: varargs[typed], body: untyped): untyped {.since: (1, 1).} = + ## Useful when creating a closure in a loop to capture some local loop variables + ## by their current iteration values. + runnableExamples: + import std/strformat + + var myClosure: () -> string + for i in 5..7: + for j in 7..9: + if i * j == 42: + capture i, j: + myClosure = () => fmt"{i} * {j} = 42" + assert myClosure() == "6 * 7 = 42" + + var params = @[newIdentNode("auto")] + let locals = if locals.len == 1 and locals[0].kind == nnkBracket: locals[0] + else: locals + for arg in locals: + proc getIdent(n: NimNode): NimNode = + case n.kind + of nnkIdent, nnkSym: + let nStr = n.strVal + if nStr == "result": + error("The variable name cannot be `result`!", n) + result = ident(nStr) + of nnkHiddenDeref: result = n[0].getIdent() + else: + error("The argument to be captured `" & n.repr & "` is not a pure identifier. " & + "It is an unsupported `" & $n.kind & "` node.", n) + let argName = getIdent(arg) + params.add(newIdentDefs(argName, freshIdentNodes getTypeInst arg)) + result = newNimNode(nnkCall) + result.add(newProc(newEmptyNode(), params, body, nnkLambda)) + for arg in locals: result.add(arg) + +since (1, 1): + import std/private/underscored_calls + + macro dup*[T](arg: T, calls: varargs[untyped]): T = + ## Turns an `in-place`:idx: algorithm into one that works on + ## a copy and returns this copy, without modifying its input. + ## + ## This macro also allows for (otherwise in-place) function chaining. + ## + ## **Since:** Version 1.2. + runnableExamples: + import std/algorithm + + let a = @[1, 2, 3, 4, 5, 6, 7, 8, 9] + assert a.dup(sort) == sorted(a) + + # Chaining: + var aCopy = a + aCopy.insert(10) + assert a.dup(insert(10), sort) == sorted(aCopy) + + let s1 = "abc" + let s2 = "xyz" + assert s1 & s2 == s1.dup(&= s2) + + # An underscore (_) can be used to denote the place of the argument you're passing: + assert "".dup(addQuoted(_, "foo")) == "\"foo\"" + # but `_` is optional here since the substitution is in 1st position: + assert "".dup(addQuoted("foo")) == "\"foo\"" + + proc makePalindrome(s: var string) = + for i in countdown(s.len-2, 0): + s.add(s[i]) + + let c = "xyz" + + # chaining: + let d = dup c: + makePalindrome # xyzyx + sort(_, SortOrder.Descending) # zyyxx + makePalindrome # zyyxxxyyz + assert d == "zyyxxxyyz" + + result = newNimNode(nnkStmtListExpr, arg) + let tmp = genSym(nskVar, "dupResult") + result.add newVarStmt(tmp, arg) + underscoredCalls(result, calls, tmp) + result.add tmp + +proc trans(n, res, bracketExpr: NimNode): (NimNode, NimNode, NimNode) {.since: (1, 1).} = + # Looks for the last statement of the last statement, etc... + case n.kind + of nnkIfExpr, nnkIfStmt, nnkTryStmt, nnkCaseStmt, nnkWhenStmt: + result[0] = copyNimTree(n) + result[1] = copyNimTree(n) + result[2] = copyNimTree(n) + for i in ord(n.kind == nnkCaseStmt) ..< n.len: + (result[0][i], result[1][^1], result[2][^1]) = trans(n[i], res, bracketExpr) + of nnkStmtList, nnkStmtListExpr, nnkBlockStmt, nnkBlockExpr, nnkWhileStmt, + nnkForStmt, nnkElifBranch, nnkElse, nnkElifExpr, nnkOfBranch, nnkExceptBranch: + result[0] = copyNimTree(n) + result[1] = copyNimTree(n) + result[2] = copyNimTree(n) + if n.len >= 1: + (result[0][^1], result[1][^1], result[2][^1]) = trans(n[^1], + res, bracketExpr) + of nnkTableConstr: + result[1] = n[0][0] + result[2] = n[0][1] + if bracketExpr.len == 0: + bracketExpr.add(ident"initTable") # don't import tables + if bracketExpr.len == 1: + bracketExpr.add([newCall(bindSym"typeof", + newEmptyNode()), newCall(bindSym"typeof", newEmptyNode())]) + template adder(res, k, v) = res[k] = v + result[0] = getAst(adder(res, n[0][0], n[0][1])) + of nnkCurly: + result[2] = n[0] + if bracketExpr.len == 0: + bracketExpr.add(ident"initHashSet") + if bracketExpr.len == 1: + bracketExpr.add(newCall(bindSym"typeof", newEmptyNode())) + template adder(res, v) = res.incl(v) + result[0] = getAst(adder(res, n[0])) + else: + result[2] = n + if bracketExpr.len == 0: + bracketExpr.add(bindSym"newSeq") + if bracketExpr.len == 1: + bracketExpr.add(newCall(bindSym"typeof", newEmptyNode())) + template adder(res, v) = res.add(v) + result[0] = getAst(adder(res, n)) + +proc collectImpl(init, body: NimNode): NimNode {.since: (1, 1).} = + let res = genSym(nskVar, "collectResult") + var bracketExpr: NimNode + if init != nil: + expectKind init, {nnkCall, nnkIdent, nnkSym, nnkClosedSymChoice, nnkOpenSymChoice, nnkOpenSym} + bracketExpr = newTree(nnkBracketExpr, + if init.kind in {nnkCall, nnkClosedSymChoice, nnkOpenSymChoice, nnkOpenSym}: + freshIdentNodes(init[0]) else: freshIdentNodes(init)) + else: + bracketExpr = newTree(nnkBracketExpr) + let (resBody, keyType, valueType) = trans(body, res, bracketExpr) + if bracketExpr.len == 3: + bracketExpr[1][1] = keyType + bracketExpr[2][1] = valueType + else: + bracketExpr[1][1] = valueType + let call = newTree(nnkCall, bracketExpr) + if init != nil and init.kind == nnkCall: + for i in 1 ..< init.len: + call.add init[i] + result = newTree(nnkStmtListExpr, newVarStmt(res, call), resBody, res) + +macro collect*(init, body: untyped): untyped {.since: (1, 1).} = + ## Comprehension for seqs/sets/tables. + ## + ## The last expression of `body` has special syntax that specifies + ## the collection's add operation. Use `{e}` for set's `incl`, + ## `{k: v}` for table's `[]=` and `e` for seq's `add`. + # analyse the body, find the deepest expression 'it' and replace it via + # 'result.add it' + runnableExamples: + import std/[sets, tables] + + let data = @["bird", "word"] + + ## seq: + let k = collect(newSeq): + for i, d in data.pairs: + if i mod 2 == 0: d + assert k == @["bird"] + + ## seq with initialSize: + let x = collect(newSeqOfCap(4)): + for i, d in data.pairs: + if i mod 2 == 0: d + assert x == @["bird"] + + ## HashSet: + let y = collect(initHashSet()): + for d in data.items: {d} + assert y == data.toHashSet + + ## Table: + let z = collect(initTable(2)): + for i, d in data.pairs: {i: d} + assert z == {0: "bird", 1: "word"}.toTable + + result = collectImpl(init, body) + +macro collect*(body: untyped): untyped {.since: (1, 5).} = + ## Same as `collect` but without an `init` parameter. + ## + ## **See also:** + ## * `sequtils.toSeq proc<sequtils.html#toSeq.t%2Cuntyped>`_ + ## * `sequtils.mapIt template<sequtils.html#mapIt.t%2Ctyped%2Cuntyped>`_ + runnableExamples: + import std/[sets, tables] + let data = @["bird", "word"] + + # seq: + let k = collect: + for i, d in data.pairs: + if i mod 2 == 0: d + assert k == @["bird"] + + ## HashSet: + let n = collect: + for d in data.items: {d} + assert n == data.toHashSet + + ## Table: + let m = collect: + for i, d in data.pairs: {i: d} + assert m == {0: "bird", 1: "word"}.toTable + + result = collectImpl(nil, body) diff --git a/lib/pure/terminal.nim b/lib/pure/terminal.nim index df637dcb6..53b3d61da 100644 --- a/lib/pure/terminal.nim +++ b/lib/pure/terminal.nim @@ -12,331 +12,796 @@ ## sequences and does not depend on any other module, on Windows it uses the ## Windows API. ## Changing the style is permanent even after program termination! Use the -## code ``system.addQuitProc(resetAttributes)`` to restore the defaults. +## code `exitprocs.addExitProc(resetAttributes)` to restore the defaults. +## Similarly, if you hide the cursor, make sure to unhide it with +## `showCursor` before quitting. +## +## Progress bar +## ============ +## +## Basic progress bar example: +runnableExamples("-r:off"): + import std/[os, strutils] -import macros + for i in 0..100: + stdout.styledWriteLine(fgRed, "0% ", fgWhite, '#'.repeat i, if i > 50: fgGreen else: fgYellow, "\t", $i , "%") + sleep 42 + cursorUp 1 + eraseLine() + + stdout.resetAttributes() + +##[ +## Playing with colorful and styled text +]## + +## Procs like `styledWriteLine`, `styledEcho` etc. have a temporary effect on +## text parameters. Style parameters only affect the text parameter right after them. +## After being called, these procs will reset the default style of the terminal. +## While `setBackGroundColor`, `setForeGroundColor` etc. have a lasting +## influence on the terminal, you can use `resetAttributes` to +## reset the default style of the terminal. +runnableExamples("-r:off"): + stdout.styledWriteLine({styleBright, styleBlink, styleUnderscore}, "styled text ") + stdout.styledWriteLine(fgRed, "red text ") + stdout.styledWriteLine(fgWhite, bgRed, "white text in red background") + stdout.styledWriteLine(" ordinary text without style ") + + stdout.setBackGroundColor(bgCyan, true) + stdout.setForeGroundColor(fgBlue) + stdout.write("blue text in cyan background") + stdout.resetAttributes() + + # You can specify multiple text parameters. Style parameters + # only affect the text parameter right after them. + styledEcho styleBright, fgGreen, "[PASS]", resetStyle, fgGreen, " Yay!" + + stdout.styledWriteLine(fgRed, "red text ", styleBright, "bold red", fgDefault, " bold text") + +import std/macros +import std/strformat +from std/strutils import toLowerAscii, `%`, parseInt +import std/colors when defined(windows): - import windows, os + import std/winlean - var - conHandle: THandle - # = createFile("CONOUT$", GENERIC_WRITE, 0, nil, OPEN_ALWAYS, 0, 0) +when defined(nimPreviewSlimSystem): + import std/[syncio, assertions] + +type + PTerminal = ref object + trueColorIsSupported: bool + trueColorIsEnabled: bool + fgSetColor: bool + when defined(windows): + hStdout: Handle + hStderr: Handle + oldStdoutAttr: int16 + oldStderrAttr: int16 + +var gTerm {.threadvar.}: owned(PTerminal) + +when defined(windows) and defined(consoleapp): + proc newTerminal(): owned(PTerminal) {.gcsafe, raises: [OSError].} +else: + proc newTerminal(): owned(PTerminal) {.gcsafe, raises: [].} + +proc getTerminal(): PTerminal {.inline.} = + if isNil(gTerm): + gTerm = newTerminal() + result = gTerm + +const + fgPrefix = "\e[38;2;" + bgPrefix = "\e[48;2;" + ansiResetCode* = "\e[0m" + getPos = "\e[6n" + stylePrefix = "\e[" + +when defined(windows): + import std/[winlean, os] + + const + DUPLICATE_SAME_ACCESS = 2 + FOREGROUND_BLUE = 1 + FOREGROUND_GREEN = 2 + FOREGROUND_RED = 4 + FOREGROUND_INTENSITY = 8 + BACKGROUND_BLUE = 16 + BACKGROUND_GREEN = 32 + BACKGROUND_RED = 64 + BACKGROUND_INTENSITY = 128 + FOREGROUND_RGB = FOREGROUND_RED or FOREGROUND_GREEN or FOREGROUND_BLUE + BACKGROUND_RGB = BACKGROUND_RED or BACKGROUND_GREEN or BACKGROUND_BLUE + + ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004 + + type + SHORT = int16 + COORD = object + x: SHORT + y: SHORT + + SMALL_RECT = object + left: SHORT + top: SHORT + right: SHORT + bottom: SHORT + + CONSOLE_SCREEN_BUFFER_INFO = object + dwSize: COORD + dwCursorPosition: COORD + wAttributes: int16 + srWindow: SMALL_RECT + dwMaximumWindowSize: COORD + + CONSOLE_CURSOR_INFO = object + dwSize: DWORD + bVisible: WINBOOL + + proc duplicateHandle(hSourceProcessHandle: Handle, hSourceHandle: Handle, + hTargetProcessHandle: Handle, lpTargetHandle: ptr Handle, + dwDesiredAccess: DWORD, bInheritHandle: WINBOOL, + dwOptions: DWORD): WINBOOL{.stdcall, dynlib: "kernel32", + importc: "DuplicateHandle".} + proc getCurrentProcess(): Handle{.stdcall, dynlib: "kernel32", + importc: "GetCurrentProcess".} + proc getConsoleScreenBufferInfo(hConsoleOutput: Handle, + lpConsoleScreenBufferInfo: ptr CONSOLE_SCREEN_BUFFER_INFO): WINBOOL{.stdcall, + dynlib: "kernel32", importc: "GetConsoleScreenBufferInfo".} + + proc getConsoleCursorInfo(hConsoleOutput: Handle, + lpConsoleCursorInfo: ptr CONSOLE_CURSOR_INFO): WINBOOL{. + stdcall, dynlib: "kernel32", importc: "GetConsoleCursorInfo".} + + proc setConsoleCursorInfo(hConsoleOutput: Handle, + lpConsoleCursorInfo: ptr CONSOLE_CURSOR_INFO): WINBOOL{. + stdcall, dynlib: "kernel32", importc: "SetConsoleCursorInfo".} + + proc terminalWidthIoctl*(handles: openArray[Handle]): int = + var csbi: CONSOLE_SCREEN_BUFFER_INFO + for h in handles: + if getConsoleScreenBufferInfo(h, addr csbi) != 0: + return int(csbi.srWindow.right - csbi.srWindow.left + 1) + return 0 + + proc terminalHeightIoctl*(handles: openArray[Handle]): int = + var csbi: CONSOLE_SCREEN_BUFFER_INFO + for h in handles: + if getConsoleScreenBufferInfo(h, addr csbi) != 0: + return int(csbi.srWindow.bottom - csbi.srWindow.top + 1) + return 0 + + proc terminalWidth*(): int = + ## Returns the terminal width in columns. + var w: int = 0 + w = terminalWidthIoctl([getStdHandle(STD_INPUT_HANDLE), + getStdHandle(STD_OUTPUT_HANDLE), + getStdHandle(STD_ERROR_HANDLE)]) + if w > 0: return w + return 80 + + proc terminalHeight*(): int = + ## Returns the terminal height in rows. + var h: int = 0 + h = terminalHeightIoctl([getStdHandle(STD_INPUT_HANDLE), + getStdHandle(STD_OUTPUT_HANDLE), + getStdHandle(STD_ERROR_HANDLE)]) + if h > 0: return h + return 0 + + proc setConsoleCursorPosition(hConsoleOutput: Handle, + dwCursorPosition: COORD): WINBOOL{. + stdcall, dynlib: "kernel32", importc: "SetConsoleCursorPosition".} - block: - var hTemp = GetStdHandle(STD_OUTPUT_HANDLE) - if DuplicateHandle(GetCurrentProcess(), hTemp, GetCurrentProcess(), - addr(conHandle), 0, 1, DUPLICATE_SAME_ACCESS) == 0: + proc fillConsoleOutputCharacter(hConsoleOutput: Handle, cCharacter: char, + nLength: DWORD, dwWriteCoord: COORD, + lpNumberOfCharsWritten: ptr DWORD): WINBOOL{. + stdcall, dynlib: "kernel32", importc: "FillConsoleOutputCharacterA".} + + proc fillConsoleOutputAttribute(hConsoleOutput: Handle, wAttribute: int16, + nLength: DWORD, dwWriteCoord: COORD, + lpNumberOfAttrsWritten: ptr DWORD): WINBOOL{. + stdcall, dynlib: "kernel32", importc: "FillConsoleOutputAttribute".} + + proc setConsoleTextAttribute(hConsoleOutput: Handle, + wAttributes: int16): WINBOOL{. + stdcall, dynlib: "kernel32", importc: "SetConsoleTextAttribute".} + + proc getConsoleMode(hConsoleHandle: Handle, dwMode: ptr DWORD): WINBOOL{. + stdcall, dynlib: "kernel32", importc: "GetConsoleMode".} + + proc setConsoleMode(hConsoleHandle: Handle, dwMode: DWORD): WINBOOL{. + stdcall, dynlib: "kernel32", importc: "SetConsoleMode".} + + proc getCursorPos(h: Handle): tuple [x, y: int] = + var c: CONSOLE_SCREEN_BUFFER_INFO + if getConsoleScreenBufferInfo(h, addr(c)) == 0: raiseOSError(osLastError()) + return (int(c.dwCursorPosition.x), int(c.dwCursorPosition.y)) + + proc getCursorPos*(): tuple [x, y: int] {.raises: [ValueError, IOError, OSError].} = + return getCursorPos(getStdHandle(STD_OUTPUT_HANDLE)) - proc getCursorPos(): tuple [x,y: int] = - var c: TCONSOLESCREENBUFFERINFO - if GetConsoleScreenBufferInfo(conHandle, addr(c)) == 0: + proc setCursorPos(h: Handle, x, y: int) = + var c: COORD + c.x = int16(x) + c.y = int16(y) + if setConsoleCursorPosition(h, c) == 0: raiseOSError(osLastError()) - return (int(c.dwCursorPosition.X), int(c.dwCursorPosition.Y)) - proc getAttributes(): int16 = - var c: TCONSOLESCREENBUFFERINFO + proc getAttributes(h: Handle): int16 = + var c: CONSOLE_SCREEN_BUFFER_INFO # workaround Windows bugs: try several times - if GetConsoleScreenBufferInfo(conHandle, addr(c)) != 0: + if getConsoleScreenBufferInfo(h, addr(c)) != 0: return c.wAttributes return 0x70'i16 # ERROR: return white background, black text - var - oldAttr = getAttributes() + proc initTerminal(term: PTerminal) = + var hStdoutTemp = getStdHandle(STD_OUTPUT_HANDLE) + if duplicateHandle(getCurrentProcess(), hStdoutTemp, getCurrentProcess(), + addr(term.hStdout), 0, 1, DUPLICATE_SAME_ACCESS) == 0: + when defined(consoleapp): + raiseOSError(osLastError()) + var hStderrTemp = getStdHandle(STD_ERROR_HANDLE) + if duplicateHandle(getCurrentProcess(), hStderrTemp, getCurrentProcess(), + addr(term.hStderr), 0, 1, DUPLICATE_SAME_ACCESS) == 0: + when defined(consoleapp): + raiseOSError(osLastError()) + term.oldStdoutAttr = getAttributes(term.hStdout) + term.oldStderrAttr = getAttributes(term.hStderr) + + template conHandle(f: File): Handle = + let term = getTerminal() + if f == stderr: term.hStderr else: term.hStdout else: - import termios, unsigned + import std/[termios, posix, os, parseutils] proc setRaw(fd: FileHandle, time: cint = TCSAFLUSH) = var mode: Termios - discard fd.tcgetattr(addr mode) - mode.iflag = mode.iflag and not Tcflag(BRKINT or ICRNL or INPCK or ISTRIP or IXON) - mode.oflag = mode.oflag and not Tcflag(OPOST) - mode.cflag = (mode.cflag and not Tcflag(CSIZE or PARENB)) or CS8 - mode.lflag = mode.lflag and not Tcflag(ECHO or ICANON or IEXTEN or ISIG) - mode.cc[VMIN] = 1.cuchar - mode.cc[VTIME] = 0.cuchar - discard fd.tcsetattr(time, addr mode) - -proc setCursorPos*(x, y: int) = - ## sets the terminal's cursor to the (x,y) position. (0,0) is the - ## upper left of the screen. + discard fd.tcGetAttr(addr mode) + mode.c_iflag = mode.c_iflag and not Cflag(BRKINT or ICRNL or INPCK or + ISTRIP or IXON) + mode.c_oflag = mode.c_oflag and not Cflag(OPOST) + mode.c_cflag = (mode.c_cflag and not Cflag(CSIZE or PARENB)) or CS8 + mode.c_lflag = mode.c_lflag and not Cflag(ECHO or ICANON or IEXTEN or ISIG) + mode.c_cc[VMIN] = 1.cuchar + mode.c_cc[VTIME] = 0.cuchar + discard fd.tcSetAttr(time, addr mode) + + proc getCursorPos*(): tuple [x, y: int] {.raises: [ValueError, IOError].} = + ## Returns cursor position (x, y) + ## writes to stdout and expects the terminal to respond via stdin + var + xStr = "" + yStr = "" + ch: char + ct: int + readX = false + + # use raw mode to ask terminal for cursor position + let fd = getFileHandle(stdin) + var oldMode: Termios + discard fd.tcGetAttr(addr oldMode) + fd.setRaw() + stdout.write(getPos) + flushFile(stdout) + + try: + # parse response format: [yyy;xxxR + while true: + let n = readBuffer(stdin, addr ch, 1) + if n == 0 or ch == 'R': + if xStr == "" or yStr == "": + raise newException(ValueError, "Got character position message that was missing data") + break + ct += 1 + if ct > 16: + raise newException(ValueError, "Got unterminated character position message from terminal") + if ch == ';': + readX = true + elif ch in {'0'..'9'}: + if readX: + xStr.add(ch) + else: + yStr.add(ch) + finally: + # restore previous terminal mode + discard fd.tcSetAttr(TCSADRAIN, addr oldMode) + + return (parseInt(xStr), parseInt(yStr)) + + proc terminalWidthIoctl*(fds: openArray[int]): int = + ## Returns terminal width from first fd that supports the ioctl. + + var win: IOctl_WinSize + for fd in fds: + if ioctl(cint(fd), TIOCGWINSZ, addr win) != -1: + return int(win.ws_col) + return 0 + + proc terminalHeightIoctl*(fds: openArray[int]): int = + ## Returns terminal height from first fd that supports the ioctl. + + var win: IOctl_WinSize + for fd in fds: + if ioctl(cint(fd), TIOCGWINSZ, addr win) != -1: + return int(win.ws_row) + return 0 + + var L_ctermid{.importc, header: "<stdio.h>".}: cint + + proc terminalWidth*(): int = + ## Returns some reasonable terminal width from either standard file + ## descriptors, controlling terminal, environment variables or tradition. + + # POSIX environment variable takes precendence. + # _COLUMNS_: This variable shall represent a decimal integer >0 used + # to indicate the user's preferred width in column positions for + # the terminal screen or window. If this variable is unset or null, + # the implementation determines the number of columns, appropriate + # for the terminal or window, in an unspecified manner. + # When COLUMNS is set, any terminal-width information implied by TERM + # is overridden. Users and conforming applications should not set COLUMNS + # unless they wish to override the system selection and produce output + # unrelated to the terminal characteristics. + # See POSIX Base Definitions Section 8.1 Environment Variable Definition + + var w: int + var s = getEnv("COLUMNS") # Try standard env var + if len(s) > 0 and parseSaturatedNatural(s, w) > 0 and w > 0: + return w + w = terminalWidthIoctl([0, 1, 2]) # Try standard file descriptors + if w > 0: return w + var cterm = newString(L_ctermid) # Try controlling tty + var fd = open(ctermid(cstring(cterm)), O_RDONLY) + if fd != -1: + w = terminalWidthIoctl([int(fd)]) + discard close(fd) + if w > 0: return w + return 80 # Finally default to venerable value + + proc terminalHeight*(): int = + ## Returns some reasonable terminal height from either standard file + ## descriptors, controlling terminal, environment variables or tradition. + ## Zero is returned if the height could not be determined. + + # POSIX environment variable takes precendence. + # _LINES_: This variable shall represent a decimal integer >0 used + # to indicate the user's preferred number of lines on a page or + # the vertical screen or window size in lines. A line in this case + # is a vertical measure large enough to hold the tallest character + # in the character set being displayed. If this variable is unset or null, + # the implementation determines the number of lines, appropriate + # for the terminal or window (size, terminal baud rate, and so on), + # in an unspecified manner. + # When LINES is set, any terminal-height information implied by TERM + # is overridden. Users and conforming applications should not set LINES + # unless they wish to override the system selection and produce output + # unrelated to the terminal characteristics. + # See POSIX Base Definitions Section 8.1 Environment Variable Definition + + var h: int + var s = getEnv("LINES") # Try standard env var + if len(s) > 0 and parseSaturatedNatural(s, h) > 0 and h > 0: + return h + h = terminalHeightIoctl([0, 1, 2]) # Try standard file descriptors + if h > 0: return h + var cterm = newString(L_ctermid) # Try controlling tty + var fd = open(ctermid(cstring(cterm)), O_RDONLY) + if fd != -1: + h = terminalHeightIoctl([int(fd)]) + discard close(fd) + if h > 0: return h + return 0 # Could not determine height + +proc terminalSize*(): tuple[w, h: int] = + ## Returns the terminal width and height as a tuple. Internally calls + ## `terminalWidth` and `terminalHeight`, so the same assumptions apply. + result = (terminalWidth(), terminalHeight()) + +when defined(windows): + proc setCursorVisibility(f: File, visible: bool) = + var ccsi: CONSOLE_CURSOR_INFO + let h = conHandle(f) + if getConsoleCursorInfo(h, addr(ccsi)) == 0: + raiseOSError(osLastError()) + ccsi.bVisible = if visible: 1 else: 0 + if setConsoleCursorInfo(h, addr(ccsi)) == 0: + raiseOSError(osLastError()) + +proc hideCursor*(f: File) = + ## Hides the cursor. + when defined(windows): + setCursorVisibility(f, false) + else: + f.write("\e[?25l") + +proc showCursor*(f: File) = + ## Shows the cursor. + when defined(windows): + setCursorVisibility(f, true) + else: + f.write("\e[?25h") + +proc setCursorPos*(f: File, x, y: int) = + ## Sets the terminal's cursor to the (x,y) position. + ## (0,0) is the upper left of the screen. when defined(windows): - var c: TCOORD - c.X = int16(x) - c.Y = int16(y) - if SetConsoleCursorPosition(conHandle, c) == 0: raiseOSError(osLastError()) + let h = conHandle(f) + setCursorPos(h, x, y) else: - stdout.write("\e[" & $y & ';' & $x & 'f') + f.write(fmt"{stylePrefix}{y+1};{x+1}f") -proc setCursorXPos*(x: int) = - ## sets the terminal's cursor to the x position. The y position is - ## not changed. +proc setCursorXPos*(f: File, x: int) = + ## Sets the terminal's cursor to the x position. + ## The y position is not changed. when defined(windows): - var scrbuf: TCONSOLESCREENBUFFERINFO - var hStdout = conHandle - if GetConsoleScreenBufferInfo(hStdout, addr(scrbuf)) == 0: + let h = conHandle(f) + var scrbuf: CONSOLE_SCREEN_BUFFER_INFO + if getConsoleScreenBufferInfo(h, addr(scrbuf)) == 0: raiseOSError(osLastError()) var origin = scrbuf.dwCursorPosition - origin.X = int16(x) - if SetConsoleCursorPosition(conHandle, origin) == 0: + origin.x = int16(x) + if setConsoleCursorPosition(h, origin) == 0: raiseOSError(osLastError()) else: - stdout.write("\e[" & $x & 'G') + f.write(fmt"{stylePrefix}{x+1}G") when defined(windows): - proc setCursorYPos*(y: int) = - ## sets the terminal's cursor to the y position. The x position is - ## not changed. **Warning**: This is not supported on UNIX! + proc setCursorYPos*(f: File, y: int) = + ## Sets the terminal's cursor to the y position. + ## The x position is not changed. + ## .. warning:: This is not supported on UNIX! when defined(windows): - var scrbuf: TCONSOLESCREENBUFFERINFO - var hStdout = conHandle - if GetConsoleScreenBufferInfo(hStdout, addr(scrbuf)) == 0: + let h = conHandle(f) + var scrbuf: CONSOLE_SCREEN_BUFFER_INFO + if getConsoleScreenBufferInfo(h, addr(scrbuf)) == 0: raiseOSError(osLastError()) var origin = scrbuf.dwCursorPosition - origin.Y = int16(y) - if SetConsoleCursorPosition(conHandle, origin) == 0: + origin.y = int16(y) + if setConsoleCursorPosition(h, origin) == 0: raiseOSError(osLastError()) else: discard -proc cursorUp*(count=1) = +proc cursorUp*(f: File, count = 1) = ## Moves the cursor up by `count` rows. + runnableExamples("-r:off"): + stdout.cursorUp(2) + write(stdout, "Hello World!") # anything written at that location will be erased/replaced with this when defined(windows): - var p = getCursorPos() + let h = conHandle(f) + var p = getCursorPos(h) dec(p.y, count) - setCursorPos(p.x, p.y) + setCursorPos(h, p.x, p.y) else: - stdout.write("\e[" & $count & 'A') + f.write("\e[" & $count & 'A') -proc cursorDown*(count=1) = +proc cursorDown*(f: File, count = 1) = ## Moves the cursor down by `count` rows. + runnableExamples("-r:off"): + stdout.cursorDown(2) + write(stdout, "Hello World!") # anything written at that location will be erased/replaced with this when defined(windows): - var p = getCursorPos() + let h = conHandle(f) + var p = getCursorPos(h) inc(p.y, count) - setCursorPos(p.x, p.y) + setCursorPos(h, p.x, p.y) else: - stdout.write("\e[" & $count & 'B') + f.write(fmt"{stylePrefix}{count}B") -proc cursorForward*(count=1) = +proc cursorForward*(f: File, count = 1) = ## Moves the cursor forward by `count` columns. + runnableExamples("-r:off"): + stdout.cursorForward(2) + write(stdout, "Hello World!") # anything written at that location will be erased/replaced with this when defined(windows): - var p = getCursorPos() + let h = conHandle(f) + var p = getCursorPos(h) inc(p.x, count) - setCursorPos(p.x, p.y) + setCursorPos(h, p.x, p.y) else: - stdout.write("\e[" & $count & 'C') + f.write(fmt"{stylePrefix}{count}C") -proc cursorBackward*(count=1) = +proc cursorBackward*(f: File, count = 1) = ## Moves the cursor backward by `count` columns. + runnableExamples("-r:off"): + stdout.cursorBackward(2) + write(stdout, "Hello World!") # anything written at that location will be erased/replaced with this when defined(windows): - var p = getCursorPos() + let h = conHandle(f) + var p = getCursorPos(h) dec(p.x, count) - setCursorPos(p.x, p.y) + setCursorPos(h, p.x, p.y) else: - stdout.write("\e[" & $count & 'D') + f.write(fmt"{stylePrefix}{count}D") when true: discard else: - proc eraseLineEnd* = + proc eraseLineEnd*(f: File) = ## Erases from the current cursor position to the end of the current line. when defined(windows): discard else: - stdout.write("\e[K") + f.write("\e[K") - proc eraseLineStart* = + proc eraseLineStart*(f: File) = ## Erases from the current cursor position to the start of the current line. when defined(windows): discard else: - stdout.write("\e[1K") + f.write("\e[1K") - proc eraseDown* = + proc eraseDown*(f: File) = ## Erases the screen from the current line down to the bottom of the screen. when defined(windows): discard else: - stdout.write("\e[J") + f.write("\e[J") - proc eraseUp* = + proc eraseUp*(f: File) = ## Erases the screen from the current line up to the top of the screen. when defined(windows): discard else: - stdout.write("\e[1J") + f.write("\e[1J") -proc eraseLine* = +proc eraseLine*(f: File) = ## Erases the entire current line. + runnableExamples("-r:off"): + write(stdout, "never mind") + stdout.eraseLine() # nothing will be printed on the screen when defined(windows): - var scrbuf: TCONSOLESCREENBUFFERINFO + let h = conHandle(f) + var scrbuf: CONSOLE_SCREEN_BUFFER_INFO var numwrote: DWORD - var hStdout = conHandle - if GetConsoleScreenBufferInfo(hStdout, addr(scrbuf)) == 0: + if getConsoleScreenBufferInfo(h, addr(scrbuf)) == 0: raiseOSError(osLastError()) var origin = scrbuf.dwCursorPosition - origin.X = 0'i16 - if SetConsoleCursorPosition(conHandle, origin) == 0: + origin.x = 0'i16 + if setConsoleCursorPosition(h, origin) == 0: raiseOSError(osLastError()) - var ht = scrbuf.dwSize.Y - origin.Y - var wt = scrbuf.dwSize.X - origin.X - if FillConsoleOutputCharacter(hStdout,' ', ht*wt, + var wt: DWORD = scrbuf.dwSize.x - origin.x + if fillConsoleOutputCharacter(h, ' ', wt, origin, addr(numwrote)) == 0: raiseOSError(osLastError()) - if FillConsoleOutputAttribute(hStdout, scrbuf.wAttributes, ht * wt, + if fillConsoleOutputAttribute(h, scrbuf.wAttributes, wt, scrbuf.dwCursorPosition, addr(numwrote)) == 0: raiseOSError(osLastError()) else: - stdout.write("\e[2K") - setCursorXPos(0) + f.write("\e[2K") + setCursorXPos(f, 0) -proc eraseScreen* = +proc eraseScreen*(f: File) = ## Erases the screen with the background colour and moves the cursor to home. when defined(windows): - var scrbuf: TCONSOLESCREENBUFFERINFO + let h = conHandle(f) + var scrbuf: CONSOLE_SCREEN_BUFFER_INFO var numwrote: DWORD - var origin: TCOORD # is inititalized to 0, 0 - var hStdout = conHandle + var origin: COORD # is inititalized to 0, 0 - if GetConsoleScreenBufferInfo(hStdout, addr(scrbuf)) == 0: + if getConsoleScreenBufferInfo(h, addr(scrbuf)) == 0: raiseOSError(osLastError()) - let numChars = int32(scrbuf.dwSize.X)*int32(scrbuf.dwSize.Y) + let numChars = int32(scrbuf.dwSize.x)*int32(scrbuf.dwSize.y) - if FillConsoleOutputCharacter(hStdout, ' ', numChars, + if fillConsoleOutputCharacter(h, ' ', numChars, origin, addr(numwrote)) == 0: raiseOSError(osLastError()) - if FillConsoleOutputAttribute(hStdout, scrbuf.wAttributes, numChars, + if fillConsoleOutputAttribute(h, scrbuf.wAttributes, numChars, origin, addr(numwrote)) == 0: raiseOSError(osLastError()) - setCursorXPos(0) + setCursorXPos(f, 0) else: - stdout.write("\e[2J") + f.write("\e[2J") -proc resetAttributes* {.noconv.} = - ## resets all attributes; it is advisable to register this as a quit proc - ## with ``system.addQuitProc(resetAttributes)``. +when not defined(windows): + var + gFG {.threadvar.}: int + gBG {.threadvar.}: int + +proc resetAttributes*(f: File) = + ## Resets all attributes. when defined(windows): - discard SetConsoleTextAttribute(conHandle, oldAttr) + let term = getTerminal() + if f == stderr: + discard setConsoleTextAttribute(term.hStderr, term.oldStderrAttr) + else: + discard setConsoleTextAttribute(term.hStdout, term.oldStdoutAttr) else: - stdout.write("\e[0m") + f.write(ansiResetCode) + gFG = 0 + gBG = 0 type - Style* = enum ## different styles for text output - styleBright = 1, ## bright text - styleDim, ## dim text - styleUnknown, ## unknown - styleUnderscore = 4, ## underscored text - styleBlink, ## blinking/bold text - styleReverse = 7, ## unknown - styleHidden ## hidden text + Style* = enum ## Different styles for text output. + styleBright = 1, ## bright text + styleDim, ## dim text + styleItalic, ## italic (or reverse on terminals not supporting) + styleUnderscore, ## underscored text + styleBlink, ## blinking/bold text + styleBlinkRapid, ## rapid blinking/bold text (not widely supported) + styleReverse, ## reverse + styleHidden, ## hidden text + styleStrikethrough ## strikethrough -{.deprecated: [TStyle: Style].} +proc ansiStyleCode*(style: int): string = + result = fmt"{stylePrefix}{style}m" -when not defined(windows): - var - # XXX: These better be thread-local - gFG = 0 - gBG = 0 +template ansiStyleCode*(style: Style): string = + ansiStyleCode(style.int) -proc setStyle*(style: set[Style]) = - ## sets the terminal style +# The styleCache can be skipped when `style` is known at compile-time +template ansiStyleCode*(style: static[Style]): string = + (static(stylePrefix & $style.int & "m")) + +proc setStyle*(f: File, style: set[Style]) = + ## Sets the terminal style. when defined(windows): + let h = conHandle(f) + var old = getAttributes(h) and (FOREGROUND_RGB or BACKGROUND_RGB) var a = 0'i16 if styleBright in style: a = a or int16(FOREGROUND_INTENSITY) if styleBlink in style: a = a or int16(BACKGROUND_INTENSITY) if styleReverse in style: a = a or 0x4000'i16 # COMMON_LVB_REVERSE_VIDEO if styleUnderscore in style: a = a or 0x8000'i16 # COMMON_LVB_UNDERSCORE - discard SetConsoleTextAttribute(conHandle, a) + discard setConsoleTextAttribute(h, old or a) else: for s in items(style): - stdout.write("\e[" & $ord(s) & 'm') + f.write(ansiStyleCode(s)) proc writeStyled*(txt: string, style: set[Style] = {styleBright}) = - ## writes the text `txt` in a given `style`. + ## Writes the text `txt` in a given `style` to stdout. when defined(windows): - var old = getAttributes() - setStyle(style) + let term = getTerminal() + var old = getAttributes(term.hStdout) + stdout.setStyle(style) stdout.write(txt) - discard SetConsoleTextAttribute(conHandle, old) + discard setConsoleTextAttribute(term.hStdout, old) else: - setStyle(style) + stdout.setStyle(style) stdout.write(txt) - resetAttributes() + stdout.resetAttributes() if gFG != 0: - stdout.write("\e[" & $ord(gFG) & 'm') + stdout.write(ansiStyleCode(gFG)) if gBG != 0: - stdout.write("\e[" & $ord(gBG) & 'm') + stdout.write(ansiStyleCode(gBG)) type - ForegroundColor* = enum ## terminal's foreground colors - fgBlack = 30, ## black - fgRed, ## red - fgGreen, ## green - fgYellow, ## yellow - fgBlue, ## blue - fgMagenta, ## magenta - fgCyan, ## cyan - fgWhite ## white - - BackgroundColor* = enum ## terminal's background colors - bgBlack = 40, ## black - bgRed, ## red - bgGreen, ## green - bgYellow, ## yellow - bgBlue, ## blue - bgMagenta, ## magenta - bgCyan, ## cyan - bgWhite ## white - -{.deprecated: [TForegroundColor: ForegroundColor, - TBackgroundColor: BackgroundColor].} - -proc setForegroundColor*(fg: ForegroundColor, bright=false) = - ## sets the terminal's foreground color + ForegroundColor* = enum ## Terminal's foreground colors. + fgBlack = 30, ## black + fgRed, ## red + fgGreen, ## green + fgYellow, ## yellow + fgBlue, ## blue + fgMagenta, ## magenta + fgCyan, ## cyan + fgWhite, ## white + fg8Bit, ## 256-color (not supported, see `enableTrueColors` instead.) + fgDefault ## default terminal foreground color + + BackgroundColor* = enum ## Terminal's background colors. + bgBlack = 40, ## black + bgRed, ## red + bgGreen, ## green + bgYellow, ## yellow + bgBlue, ## blue + bgMagenta, ## magenta + bgCyan, ## cyan + bgWhite, ## white + bg8Bit, ## 256-color (not supported, see `enableTrueColors` instead.) + bgDefault ## default terminal background color + +when defined(windows): + var defaultForegroundColor, defaultBackgroundColor: int16 = 0xFFFF'i16 # Default to an invalid value 0xFFFF + +proc setForegroundColor*(f: File, fg: ForegroundColor, bright = false) = + ## Sets the terminal's foreground color. when defined(windows): - var old = getAttributes() and not 0x0007 - if bright: - old = old or FOREGROUND_INTENSITY - const lookup: array [ForegroundColor, int] = [ - 0, + let h = conHandle(f) + var old = getAttributes(h) and not FOREGROUND_RGB + if defaultForegroundColor == 0xFFFF'i16: + defaultForegroundColor = old + old = if bright: old or FOREGROUND_INTENSITY + else: old and not(FOREGROUND_INTENSITY) + const lookup: array[ForegroundColor, int] = [ + 0, # ForegroundColor enum with ordinal 30 (FOREGROUND_RED), (FOREGROUND_GREEN), (FOREGROUND_RED or FOREGROUND_GREEN), (FOREGROUND_BLUE), (FOREGROUND_RED or FOREGROUND_BLUE), (FOREGROUND_BLUE or FOREGROUND_GREEN), - (FOREGROUND_BLUE or FOREGROUND_GREEN or FOREGROUND_RED)] - discard SetConsoleTextAttribute(conHandle, toU16(old or lookup[fg])) + (FOREGROUND_BLUE or FOREGROUND_GREEN or FOREGROUND_RED), + 0, # fg8Bit not supported, see `enableTrueColors` instead. + 0] # unused + if fg == fgDefault: + discard setConsoleTextAttribute(h, cast[int16](cast[uint16](old) or cast[uint16](defaultForegroundColor))) + else: + discard setConsoleTextAttribute(h, cast[int16](cast[uint16](old) or cast[uint16](lookup[fg]))) else: gFG = ord(fg) if bright: inc(gFG, 60) - stdout.write("\e[" & $gFG & 'm') + f.write(ansiStyleCode(gFG)) -proc setBackgroundColor*(bg: BackgroundColor, bright=false) = - ## sets the terminal's background color +proc setBackgroundColor*(f: File, bg: BackgroundColor, bright = false) = + ## Sets the terminal's background color. when defined(windows): - var old = getAttributes() and not 0x0070 - if bright: - old = old or BACKGROUND_INTENSITY - const lookup: array [BackgroundColor, int] = [ - 0, + let h = conHandle(f) + var old = getAttributes(h) and not BACKGROUND_RGB + if defaultBackgroundColor == 0xFFFF'i16: + defaultBackgroundColor = old + old = if bright: old or BACKGROUND_INTENSITY + else: old and not(BACKGROUND_INTENSITY) + const lookup: array[BackgroundColor, int] = [ + 0, # BackgroundColor enum with ordinal 40 (BACKGROUND_RED), (BACKGROUND_GREEN), (BACKGROUND_RED or BACKGROUND_GREEN), (BACKGROUND_BLUE), (BACKGROUND_RED or BACKGROUND_BLUE), (BACKGROUND_BLUE or BACKGROUND_GREEN), - (BACKGROUND_BLUE or BACKGROUND_GREEN or BACKGROUND_RED)] - discard SetConsoleTextAttribute(conHandle, toU16(old or lookup[bg])) + (BACKGROUND_BLUE or BACKGROUND_GREEN or BACKGROUND_RED), + 0, # bg8Bit not supported, see `enableTrueColors` instead. + 0] # unused + if bg == bgDefault: + discard setConsoleTextAttribute(h, cast[int16](cast[uint16](old) or cast[uint16](defaultBackgroundColor))) + else: + discard setConsoleTextAttribute(h, cast[int16](cast[uint16](old) or cast[uint16](lookup[bg]))) else: gBG = ord(bg) if bright: inc(gBG, 60) - stdout.write("\e[" & $gBG & 'm') + f.write(ansiStyleCode(gBG)) + +proc ansiForegroundColorCode*(fg: ForegroundColor, bright = false): string = + var style = ord(fg) + if bright: inc(style, 60) + return ansiStyleCode(style) + +template ansiForegroundColorCode*(fg: static[ForegroundColor], + bright: static[bool] = false): string = + ansiStyleCode(fg.int + bright.int * 60) + +proc ansiForegroundColorCode*(color: Color): string = + let rgb = extractRGB(color) + result = fmt"{fgPrefix}{rgb.r};{rgb.g};{rgb.b}m" + +template ansiForegroundColorCode*(color: static[Color]): string = + const rgb = extractRGB(color) + # no usage of `fmt`, see issue #7632 + (static("$1$2;$3;$4m" % [$fgPrefix, $(rgb.r), $(rgb.g), $(rgb.b)])) + +proc ansiBackgroundColorCode*(color: Color): string = + let rgb = extractRGB(color) + result = fmt"{bgPrefix}{rgb.r};{rgb.g};{rgb.b}m" + +template ansiBackgroundColorCode*(color: static[Color]): string = + const rgb = extractRGB(color) + # no usage of `fmt`, see issue #7632 + (static("$1$2;$3;$4m" % [$bgPrefix, $(rgb.r), $(rgb.g), $(rgb.b)])) + +proc setForegroundColor*(f: File, color: Color) = + ## Sets the terminal's foreground true color. + if getTerminal().trueColorIsEnabled: + f.write(ansiForegroundColorCode(color)) + +proc setBackgroundColor*(f: File, color: Color) = + ## Sets the terminal's background true color. + if getTerminal().trueColorIsEnabled: + f.write(ansiBackgroundColorCode(color)) + +proc setTrueColor(f: File, color: Color) = + let term = getTerminal() + if term.fgSetColor: + setForegroundColor(f, color) + else: + setBackgroundColor(f, color) proc isatty*(f: File): bool = - ## returns true if `f` is associated with a terminal device. + ## Returns true if `f` is associated with a terminal device. when defined(posix): proc isatty(fildes: FileHandle): cint {. importc: "isatty", header: "<unistd.h>".} @@ -346,44 +811,223 @@ proc isatty*(f: File): bool = result = isatty(getFileHandle(f)) != 0'i32 -proc styledEchoProcessArg(s: string) = write stdout, s -proc styledEchoProcessArg(style: Style) = setStyle({style}) -proc styledEchoProcessArg(style: set[Style]) = setStyle style -proc styledEchoProcessArg(color: ForegroundColor) = setForegroundColor color -proc styledEchoProcessArg(color: BackgroundColor) = setBackgroundColor color +type + TerminalCmd* = enum ## commands that can be expressed as arguments + resetStyle, ## reset attributes + fgColor, ## set foreground's true color + bgColor ## set background's true color + +template styledEchoProcessArg(f: File, s: string) = write f, s +template styledEchoProcessArg(f: File, style: Style) = setStyle(f, {style}) +template styledEchoProcessArg(f: File, style: set[Style]) = setStyle f, style +template styledEchoProcessArg(f: File, color: ForegroundColor) = + setForegroundColor f, color +template styledEchoProcessArg(f: File, color: BackgroundColor) = + setBackgroundColor f, color +template styledEchoProcessArg(f: File, color: Color) = + setTrueColor f, color +template styledEchoProcessArg(f: File, cmd: TerminalCmd) = + when cmd == resetStyle: + resetAttributes(f) + elif cmd in {fgColor, bgColor}: + let term = getTerminal() + term.fgSetColor = cmd == fgColor + +macro styledWrite*(f: File, m: varargs[typed]): untyped = + ## Similar to `write`, but treating terminal style arguments specially. + ## When some argument is `Style`, `set[Style]`, `ForegroundColor`, + ## `BackgroundColor` or `TerminalCmd` then it is not sent directly to + ## `f`, but instead corresponding terminal style proc is called. + runnableExamples("-r:off"): + stdout.styledWrite(fgRed, "red text ") + stdout.styledWrite(fgGreen, "green text") -macro styledEcho*(m: varargs[expr]): stmt = - ## to be documented. - let m = callsite() + var reset = false result = newNimNode(nnkStmtList) - for i in countup(1, m.len - 1): - result.add(newCall(bindSym"styledEchoProcessArg", m[i])) + for i in countup(0, m.len - 1): + let item = m[i] + case item.kind + of nnkStrLit..nnkTripleStrLit: + if i == m.len - 1: + # optimize if string literal is last, just call write + result.add(newCall(bindSym"write", f, item)) + if reset: result.add(newCall(bindSym"resetAttributes", f)) + return + else: + # if it is string literal just call write, do not enable reset + result.add(newCall(bindSym"write", f, item)) + else: + result.add(newCall(bindSym"styledEchoProcessArg", f, item)) + reset = true + if reset: result.add(newCall(bindSym"resetAttributes", f)) - result.add(newCall(bindSym"write", bindSym"stdout", newStrLitNode("\n"))) - result.add(newCall(bindSym"resetAttributes")) +template styledWriteLine*(f: File, args: varargs[untyped]) = + ## Calls `styledWrite` and appends a newline at the end. + runnableExamples: + proc error(msg: string) = + styledWriteLine(stderr, fgRed, "Error: ", resetStyle, msg) -when not defined(windows): - proc getch*(): char = - ## Read a single character from the terminal, blocking until it is entered. - ## The character is not printed to the terminal. This is not available for - ## Windows. + styledWrite(f, args) + write(f, "\n") + +template styledEcho*(args: varargs[untyped]) = + ## Echoes styles arguments to stdout using `styledWriteLine`. + stdout.styledWriteLine(args) + +proc getch*(): char = + ## Reads a single character from the terminal, blocking until it is entered. + ## The character is not printed to the terminal. + when defined(windows): + let fd = getStdHandle(STD_INPUT_HANDLE) + var keyEvent = KEY_EVENT_RECORD() + var numRead: cint + while true: + # Block until character is entered + doAssert(waitForSingleObject(fd, INFINITE) == WAIT_OBJECT_0) + doAssert(readConsoleInput(fd, addr(keyEvent), 1, addr(numRead)) != 0) + if numRead == 0 or keyEvent.eventType != 1 or keyEvent.bKeyDown == 0: + continue + return char(keyEvent.uChar) + else: let fd = getFileHandle(stdin) var oldMode: Termios - discard fd.tcgetattr(addr oldMode) + discard fd.tcGetAttr(addr oldMode) fd.setRaw() result = stdin.readChar() - discard fd.tcsetattr(TCSADRAIN, addr oldMode) + discard fd.tcSetAttr(TCSADRAIN, addr oldMode) + +when defined(windows): + proc readPasswordFromStdin*(prompt: string, password: var string): + bool {.tags: [ReadIOEffect, WriteIOEffect].} = + ## Reads a `password` from stdin without printing it. `password` must not + ## be `nil`! Returns `false` if the end of the file has been reached, + ## `true` otherwise. + password.setLen(0) + stdout.write(prompt) + let hi = createFileA("CONIN$", + GENERIC_READ or GENERIC_WRITE, 0, nil, OPEN_EXISTING, 0, 0) + var mode = DWORD 0 + discard getConsoleMode(hi, addr mode) + let origMode = mode + const + ENABLE_PROCESSED_INPUT = 1 + ENABLE_ECHO_INPUT = 4 + mode = (mode or ENABLE_PROCESSED_INPUT) and not ENABLE_ECHO_INPUT + + discard setConsoleMode(hi, mode) + result = readLine(stdin, password) + discard setConsoleMode(hi, origMode) + discard closeHandle(hi) + stdout.write "\n" + +else: + import std/termios + + proc readPasswordFromStdin*(prompt: string, password: var string): + bool {.tags: [ReadIOEffect, WriteIOEffect].} = + password.setLen(0) + let fd = stdin.getFileHandle() + var cur, old: Termios + discard fd.tcGetAttr(cur.addr) + old = cur + cur.c_lflag = cur.c_lflag and not Cflag(ECHO) + discard fd.tcSetAttr(TCSADRAIN, cur.addr) + stdout.write prompt + result = stdin.readLine(password) + stdout.write "\n" + discard fd.tcSetAttr(TCSADRAIN, old.addr) + +proc readPasswordFromStdin*(prompt = "password: "): string = + ## Reads a password from stdin without printing it. + result = "" + discard readPasswordFromStdin(prompt, result) + + +# Wrappers assuming output to stdout: +template hideCursor*() = hideCursor(stdout) +template showCursor*() = showCursor(stdout) +template setCursorPos*(x, y: int) = setCursorPos(stdout, x, y) +template setCursorXPos*(x: int) = setCursorXPos(stdout, x) +when defined(windows): + template setCursorYPos*(x: int) = setCursorYPos(stdout, x) +template cursorUp*(count = 1) = cursorUp(stdout, count) +template cursorDown*(count = 1) = cursorDown(stdout, count) +template cursorForward*(count = 1) = cursorForward(stdout, count) +template cursorBackward*(count = 1) = cursorBackward(stdout, count) +template eraseLine*() = eraseLine(stdout) +template eraseScreen*() = eraseScreen(stdout) +template setStyle*(style: set[Style]) = + setStyle(stdout, style) +template setForegroundColor*(fg: ForegroundColor, bright = false) = + setForegroundColor(stdout, fg, bright) +template setBackgroundColor*(bg: BackgroundColor, bright = false) = + setBackgroundColor(stdout, bg, bright) +template setForegroundColor*(color: Color) = + setForegroundColor(stdout, color) +template setBackgroundColor*(color: Color) = + setBackgroundColor(stdout, color) +proc resetAttributes*() {.noconv.} = + ## Resets all attributes on stdout. + ## It is advisable to register this as a quit proc with + ## `exitprocs.addExitProc(resetAttributes)`. + resetAttributes(stdout) + +proc isTrueColorSupported*(): bool = + ## Returns true if a terminal supports true color. + return getTerminal().trueColorIsSupported + +when defined(windows): + import std/os + +proc enableTrueColors*() = + ## Enables true color. + var term = getTerminal() + when defined(windows): + var + ver: OSVERSIONINFO + ver.dwOSVersionInfoSize = sizeof(ver).DWORD + let res = getVersionExW(addr ver) + if res == 0: + term.trueColorIsSupported = false + else: + term.trueColorIsSupported = ver.dwMajorVersion > 10 or + (ver.dwMajorVersion == 10 and (ver.dwMinorVersion > 0 or + (ver.dwMinorVersion == 0 and ver.dwBuildNumber >= 10586))) + if not term.trueColorIsSupported: + term.trueColorIsSupported = getEnv("ANSICON_DEF").len > 0 -when isMainModule: - system.addQuitProc(resetAttributes) - write(stdout, "never mind") - eraseLine() - #setCursorPos(2, 2) - writeStyled("styled text ", {styleBright, styleBlink, styleUnderscore}) - setBackGroundColor(bgCyan, true) - setForeGroundColor(fgBlue) - writeln(stdout, "ordinary text") + if term.trueColorIsSupported: + if getEnv("ANSICON_DEF").len == 0: + var mode: DWORD = 0 + if getConsoleMode(getStdHandle(STD_OUTPUT_HANDLE), addr(mode)) != 0: + mode = mode or ENABLE_VIRTUAL_TERMINAL_PROCESSING + if setConsoleMode(getStdHandle(STD_OUTPUT_HANDLE), mode) != 0: + term.trueColorIsEnabled = true + else: + term.trueColorIsEnabled = false + else: + term.trueColorIsEnabled = true + else: + term.trueColorIsSupported = getEnv("COLORTERM").toLowerAscii() in [ + "truecolor", "24bit"] + term.trueColorIsEnabled = term.trueColorIsSupported - styledEcho("styled text ", {styleBright, styleBlink, styleUnderscore}) +proc disableTrueColors*() = + ## Disables true color. + var term = getTerminal() + when defined(windows): + if term.trueColorIsSupported: + if getEnv("ANSICON_DEF").len == 0: + var mode: DWORD = 0 + if getConsoleMode(getStdHandle(STD_OUTPUT_HANDLE), addr(mode)) != 0: + mode = mode and not ENABLE_VIRTUAL_TERMINAL_PROCESSING + discard setConsoleMode(getStdHandle(STD_OUTPUT_HANDLE), mode) + term.trueColorIsEnabled = false + else: + term.trueColorIsEnabled = false +proc newTerminal(): owned(PTerminal) = + new result + when defined(windows): + initTerminal(result) diff --git a/lib/pure/times.nim b/lib/pure/times.nim index 5cc9b4993..e59153455 100644 --- a/lib/pure/times.nim +++ b/lib/pure/times.nim @@ -1,1115 +1,2889 @@ # # # Nim's Runtime Library -# (c) Copyright 2013 Andreas Rumpf +# (c) Copyright 2018 Nim contributors # # See the file "copying.txt", included in this # distribution, for details about the copyright. # - -## This module contains routines and types for dealing with time. -## This module is available for the `JavaScript target -## <backends.html#the-javascript-target>`_. - -{.push debugger:off.} # the user does not want to trace a part - # of the standard library! - -import - strutils, parseutils - +##[ + The `times` module contains routines and types for dealing with time using + the `proleptic Gregorian calendar<https://en.wikipedia.org/wiki/Proleptic_Gregorian_calendar>`_. + It's also available for the + `JavaScript target <backends.html#backends-the-javascript-target>`_. + + Although the `times` module supports nanosecond time resolution, the + resolution used by `getTime()` depends on the platform and backend + (JS is limited to millisecond precision). + + Examples + ======== + + ```nim + import std/[times, os] + # Simple benchmarking + let time = cpuTime() + sleep(100) # Replace this with something to be timed + echo "Time taken: ", cpuTime() - time + + # Current date & time + let now1 = now() # Current timestamp as a DateTime in local time + let now2 = now().utc # Current timestamp as a DateTime in UTC + let now3 = getTime() # Current timestamp as a Time + + # Arithmetic using Duration + echo "One hour from now : ", now() + initDuration(hours = 1) + # Arithmetic using TimeInterval + echo "One year from now : ", now() + 1.years + echo "One month from now : ", now() + 1.months + ``` + + Parsing and Formatting Dates + ============================ + + The `DateTime` type can be parsed and formatted using the different + `parse` and `format` procedures. + + ```nim + let dt = parse("2000-01-01", "yyyy-MM-dd") + echo dt.format("yyyy-MM-dd") + ``` + + The different format patterns that are supported are documented below. + + =========== ================================================================================= ============================================== + Pattern Description Example + =========== ================================================================================= ============================================== + `d` Numeric value representing the day of the month, | `1/04/2012 -> 1` + it will be either one or two digits long. | `21/04/2012 -> 21` + `dd` Same as above, but is always two digits. | `1/04/2012 -> 01` + | `21/04/2012 -> 21` + `ddd` Three letter string which indicates the day of the week. | `Saturday -> Sat` + | `Monday -> Mon` + `dddd` Full string for the day of the week. | `Saturday -> Saturday` + | `Monday -> Monday` + `GG` The last two digits of the Iso Week-Year | `30/12/2012 -> 13` + `GGGG` The Iso week-calendar year padded to four digits | `30/12/2012 -> 2013` + `h` The hours in one digit if possible. Ranging from 1-12. | `5pm -> 5` + | `2am -> 2` + `hh` The hours in two digits always. If the hour is one digit, 0 is prepended. | `5pm -> 05` + | `11am -> 11` + `H` The hours in one digit if possible, ranging from 0-23. | `5pm -> 17` + | `2am -> 2` + `HH` The hours in two digits always. 0 is prepended if the hour is one digit. | `5pm -> 17` + | `2am -> 02` + `m` The minutes in one digit if possible. | `5:30 -> 30` + | `2:01 -> 1` + `mm` Same as above but always two digits, 0 is prepended if the minute is one digit. | `5:30 -> 30` + | `2:01 -> 01` + `M` The month in one digit if possible. | `September -> 9` + | `December -> 12` + `MM` The month in two digits always. 0 is prepended if the month value is one digit. | `September -> 09` + | `December -> 12` + `MMM` Abbreviated three-letter form of the month. | `September -> Sep` + | `December -> Dec` + `MMMM` Full month string, properly capitalized. | `September -> September` + `s` Seconds as one digit if possible. | `00:00:06 -> 6` + `ss` Same as above but always two digits. 0 is prepended if the second is one digit. | `00:00:06 -> 06` + `t` `A` when time is in the AM. `P` when time is in the PM. | `5pm -> P` + | `2am -> A` + `tt` Same as above, but `AM` and `PM` instead of `A` and `P` respectively. | `5pm -> PM` + | `2am -> AM` + `yy` The last two digits of the year. When parsing, the current century is assumed. | `2012 AD -> 12` + `yyyy` The year, padded to at least four digits. | `2012 AD -> 2012` + Is always positive, even when the year is BC. | `24 AD -> 0024` + When the year is more than four digits, '+' is prepended. | `24 BC -> 00024` + | `12345 AD -> +12345` + `YYYY` The year without any padding. | `2012 AD -> 2012` + Is always positive, even when the year is BC. | `24 AD -> 24` + | `24 BC -> 24` + | `12345 AD -> 12345` + `uuuu` The year, padded to at least four digits. Will be negative when the year is BC. | `2012 AD -> 2012` + When the year is more than four digits, '+' is prepended unless the year is BC. | `24 AD -> 0024` + | `24 BC -> -0023` + | `12345 AD -> +12345` + `UUUU` The year without any padding. Will be negative when the year is BC. | `2012 AD -> 2012` + | `24 AD -> 24` + | `24 BC -> -23` + | `12345 AD -> 12345` + `V` The Iso Week-Number as one or two digits | `3/2/2012 -> 5` + | `1/4/2012 -> 13` + `VV` The Iso Week-Number as two digits always. 0 is prepended if one digit. | `3/2/2012 -> 05` + | `1/4/2012 -> 13` + `z` Displays the timezone offset from UTC. | `UTC+7 -> +7` + | `UTC-5 -> -5` + `zz` Same as above but with leading 0. | `UTC+7 -> +07` + | `UTC-5 -> -05` + `zzz` Same as above but with `:mm` where *mm* represents minutes. | `UTC+7 -> +07:00` + | `UTC-5 -> -05:00` + `ZZZ` Same as above but with `mm` where *mm* represents minutes. | `UTC+7 -> +0700` + | `UTC-5 -> -0500` + `zzzz` Same as above but with `:ss` where *ss* represents seconds. | `UTC+7 -> +07:00:00` + | `UTC-5 -> -05:00:00` + `ZZZZ` Same as above but with `ss` where *ss* represents seconds. | `UTC+7 -> +070000` + | `UTC-5 -> -050000` + `g` Era: AD or BC | `300 AD -> AD` + | `300 BC -> BC` + `fff` Milliseconds display | `1000000 nanoseconds -> 1` + `ffffff` Microseconds display | `1000000 nanoseconds -> 1000` + `fffffffff` Nanoseconds display | `1000000 nanoseconds -> 1000000` + =========== ================================================================================= ============================================== + + Other strings can be inserted by putting them in `''`. For example + `hh'->'mm` will give `01->56`. In addition to spaces, + the following characters can be inserted without quoting them: + `:` `-` `,` `.` `(` `)` `/` `[` `]`. + A literal `'` can be specified with `''`. + + However you don't need to necessarily separate format patterns, as an + unambiguous format string like `yyyyMMddhhmmss` is also valid (although + only for years in the range 1..9999). + + Duration vs TimeInterval + ============================ + The `times` module exports two similar types that are both used to + represent some amount of time: `Duration <#Duration>`_ and + `TimeInterval <#TimeInterval>`_. + This section explains how they differ and when one should be preferred over the + other (short answer: use `Duration` unless support for months and years is + needed). + + Duration + ---------------------------- + A `Duration` represents a duration of time stored as seconds and + nanoseconds. A `Duration` is always fully normalized, so + `initDuration(hours = 1)` and `initDuration(minutes = 60)` are equivalent. + + Arithmetic with a `Duration` is very fast, especially when used with the + `Time` type, since it only involves basic arithmetic. Because `Duration` + is more performant and easier to understand it should generally preferred. + + TimeInterval + ---------------------------- + A `TimeInterval` represents an amount of time expressed in calendar + units, for example "1 year and 2 days". Since some units cannot be + normalized (the length of a year is different for leap years for example), + the `TimeInterval` type uses separate fields for every unit. The + `TimeInterval`'s returned from this module generally don't normalize + **anything**, so even units that could be normalized (like seconds, + milliseconds and so on) are left untouched. + + Arithmetic with a `TimeInterval` can be very slow, because it requires + timezone information. + + Since it's slower and more complex, the `TimeInterval` type should be + avoided unless the program explicitly needs the features it offers that + `Duration` doesn't have. + + How long is a day? + ---------------------------- + It should be especially noted that the handling of days differs between + `TimeInterval` and `Duration`. The `Duration` type always treats a day + as exactly 86400 seconds. For `TimeInterval`, it's more complex. + + As an example, consider the amount of time between these two timestamps, both + in the same timezone: + + - 2018-03-25T12:00+02:00 + - 2018-03-26T12:00+01:00 + + If only the date & time is considered, it appears that exactly one day has + passed. However, the UTC offsets are different, which means that the + UTC offset was changed somewhere in between. This happens twice each year for + timezones that use daylight savings time. Because of this change, the amount + of time that has passed is actually 25 hours. + + The `TimeInterval` type uses calendar units, and will say that exactly one + day has passed. The `Duration` type on the other hand normalizes everything + to seconds, and will therefore say that 90000 seconds has passed, which is + the same as 25 hours. + + See also + ======== + * `monotimes module <monotimes.html>`_ +]## + +import std/[strutils, math, options] + +import std/private/since include "system/inclrtl" -type - Month* = enum ## represents a month - mJan, mFeb, mMar, mApr, mMay, mJun, mJul, mAug, mSep, mOct, mNov, mDec - WeekDay* = enum ## represents a weekday - dMon, dTue, dWed, dThu, dFri, dSat, dSun +when defined(nimPreviewSlimSystem): + import std/assertions + + +when defined(js): + import std/jscore + + # This is really bad, but overflow checks are broken badly for + # ints on the JS backend. See #6752. + {.push overflowChecks: off.} + proc `*`(a, b: int64): int64 = + system.`*`(a, b) + proc `*`(a, b: int): int = + system.`*`(a, b) + proc `+`(a, b: int64): int64 = + system.`+`(a, b) + proc `+`(a, b: int): int = + system.`+`(a, b) + proc `-`(a, b: int64): int64 = + system.`-`(a, b) + proc `-`(a, b: int): int = + system.`-`(a, b) + proc inc(a: var int, b: int) = + system.inc(a, b) + proc inc(a: var int64, b: int) = + system.inc(a, b) + {.pop.} + +elif defined(posix): + import std/posix + + type CTime = posix.Time + + when defined(macosx): + proc gettimeofday(tp: var Timeval, unused: pointer = nil) + {.importc: "gettimeofday", header: "<sys/time.h>", sideEffect.} -when not defined(JS): - var - timezone {.importc, header: "<time.h>".}: int - tzname {.importc, header: "<time.h>" .}: array[0..1, cstring] +elif defined(windows): + import std/winlean, std/time_t -when defined(posix) and not defined(JS): type - TimeImpl {.importc: "time_t", header: "<time.h>".} = int - Time* = distinct TimeImpl ## distinct type that represents a time - ## measured as number of seconds since the epoch + CTime = time_t.Time + Tm {.importc: "struct tm", header: "<time.h>", final, pure.} = object + tm_sec*: cint ## Seconds [0,60]. + tm_min*: cint ## Minutes [0,59]. + tm_hour*: cint ## Hour [0,23]. + tm_mday*: cint ## Day of month [1,31]. + tm_mon*: cint ## Month of year [0,11]. + tm_year*: cint ## Years since 1900. + tm_wday*: cint ## Day of week [0,6] (Sunday =0). + tm_yday*: cint ## Day of year [0,365]. + tm_isdst*: cint ## Daylight Savings flag. + + proc localtime(a1: var CTime): ptr Tm {.importc, header: "<time.h>", sideEffect.} - Timeval {.importc: "struct timeval", - header: "<sys/select.h>".} = object ## struct timeval - tv_sec: int ## Seconds. - tv_usec: int ## Microseconds. - - # we cannot import posix.nim here, because posix.nim depends on times.nim. - # Ok, we could, but I don't want circular dependencies. - # And gettimeofday() is not defined in the posix module anyway. Sigh. +type + Month* = enum ## Represents a month. Note that the enum starts at `1`, + ## so `ord(month)` will give the month number in the + ## range `1..12`. + mJan = (1, "January") + mFeb = "February" + mMar = "March" + mApr = "April" + mMay = "May" + mJun = "June" + mJul = "July" + mAug = "August" + mSep = "September" + mOct = "October" + mNov = "November" + mDec = "December" + + WeekDay* = enum ## Represents a weekday. + dMon = "Monday" + dTue = "Tuesday" + dWed = "Wednesday" + dThu = "Thursday" + dFri = "Friday" + dSat = "Saturday" + dSun = "Sunday" - proc posix_gettimeofday(tp: var Timeval, unused: pointer = nil) {. - importc: "gettimeofday", header: "<sys/time.h>".} +type + MonthdayRange* = range[1..31] + HourRange* = range[0..23] + MinuteRange* = range[0..59] + SecondRange* = range[0..60] ## \ + ## Includes the value 60 to allow for a leap second. Note however + ## that the `second` of a `DateTime` will never be a leap second. + YeardayRange* = range[0..365] + NanosecondRange* = range[0..999_999_999] + + IsoWeekRange* = range[1 .. 53] + ## An ISO 8601 calendar week number. + IsoYear* = distinct int + ## An ISO 8601 calendar year number. + ## + ## .. warning:: The ISO week-based year can correspond to the following or previous year from 29 December to January 3. + + Time* = object ## Represents a point in time. + seconds: int64 + nanosecond: NanosecondRange + + DateTime* = object of RootObj ## \ + ## Represents a time in different parts. Although this type can represent + ## leap seconds, they are generally not supported in this module. They are + ## not ignored, but the `DateTime`'s returned by procedures in this + ## module will never have a leap second. + nanosecond: NanosecondRange + second: SecondRange + minute: MinuteRange + hour: HourRange + monthdayZero: int + monthZero: int + year: int + weekday: WeekDay + yearday: YeardayRange + isDst: bool + timezone: Timezone + utcOffset: int + + Duration* = object ## Represents a fixed duration of time, meaning a duration + ## that has constant length independent of the context. + ## + ## To create a new `Duration`, use `initDuration + ## <#initDuration,int64,int64,int64,int64,int64,int64,int64,int64>`_. + ## Instead of trying to access the private attributes, use + ## `inSeconds <#inSeconds,Duration>`_ for converting to seconds and + ## `inNanoseconds <#inNanoseconds,Duration>`_ for converting to nanoseconds. + seconds: int64 + nanosecond: NanosecondRange + + TimeUnit* = enum ## Different units of time. + Nanoseconds, Microseconds, Milliseconds, Seconds, Minutes, Hours, Days, + Weeks, Months, Years + + FixedTimeUnit* = range[Nanoseconds..Weeks] ## \ + ## Subrange of `TimeUnit` that only includes units of fixed duration. + ## These are the units that can be represented by a `Duration`. + + TimeInterval* = object ## \ + ## Represents a non-fixed duration of time. Can be used to add and + ## subtract non-fixed time units from a `DateTime <#DateTime>`_ or + ## `Time <#Time>`_. + ## + ## Create a new `TimeInterval` with `initTimeInterval proc + ## <#initTimeInterval,int,int,int,int,int,int,int,int,int,int>`_. + ## + ## Note that `TimeInterval` doesn't represent a fixed duration of time, + ## since the duration of some units depend on the context (e.g a year + ## can be either 365 or 366 days long). The non-fixed time units are + ## years, months, days and week. + ## + ## Note that `TimeInterval`'s returned from the `times` module are + ## never normalized. If you want to normalize a time unit, + ## `Duration <#Duration>`_ should be used instead. + nanoseconds*: int ## The number of nanoseconds + microseconds*: int ## The number of microseconds + milliseconds*: int ## The number of milliseconds + seconds*: int ## The number of seconds + minutes*: int ## The number of minutes + hours*: int ## The number of hours + days*: int ## The number of days + weeks*: int ## The number of weeks + months*: int ## The number of months + years*: int ## The number of years + + Timezone* = ref object ## \ + ## Timezone interface for supporting `DateTime <#DateTime>`_\s of arbitrary + ## timezones. The `times` module only supplies implementations for the + ## system's local time and UTC. + zonedTimeFromTimeImpl: proc (x: Time): ZonedTime + {.tags: [], raises: [], benign.} + zonedTimeFromAdjTimeImpl: proc (x: Time): ZonedTime + {.tags: [], raises: [], benign.} + name: string + + ZonedTime* = object ## Represents a point in time with an associated + ## UTC offset and DST flag. This type is only used for + ## implementing timezones. + time*: Time ## The point in time being represented. + utcOffset*: int ## The offset in seconds west of UTC, + ## including any offset due to DST. + isDst*: bool ## Determines whether DST is in effect. + + DurationParts* = array[FixedTimeUnit, int64] # Array of Duration parts starts + TimeIntervalParts* = array[TimeUnit, int] # Array of Duration parts starts + +const + secondsInMin = 60 + secondsInHour = 60*60 + secondsInDay = 60*60*24 + rateDiff = 10000000'i64 # 100 nsecs + # The number of hectonanoseconds between 1601/01/01 (windows epoch) + # and 1970/01/01 (unix epoch). + epochDiff = 116444736000000000'i64 + +const unitWeights: array[FixedTimeUnit, int64] = [ + 1'i64, + 1000, + 1_000_000, + 1e9.int64, + secondsInMin * 1e9.int64, + secondsInHour * 1e9.int64, + secondsInDay * 1e9.int64, + 7 * secondsInDay * 1e9.int64, +] + +when (NimMajor, NimMinor) >= (1, 4): + # Newer versions of Nim don't track defects + {.pragma: parseFormatRaises, raises: [TimeParseError, TimeFormatParseError].} + {.pragma: parseRaises, raises: [TimeParseError].} +else: + # Still track when using older versions + {.pragma: parseFormatRaises, raises: [TimeParseError, TimeFormatParseError, Defect].} + {.pragma: parseRaises, raises: [TimeParseError, Defect].} - # we also need tzset() to make sure that tzname is initialized - proc tzset() {.importc, header: "<time.h>".} - # calling tzset() implicitly to initialize tzname data. - tzset() -elif defined(windows): - import winlean +# +# Helper procs +# - when defined(vcc): - # newest version of Visual C++ defines time_t to be of 64 bits - type TimeImpl {.importc: "time_t", header: "<time.h>".} = int64 +{.pragma: operator, rtl, noSideEffect, benign.} + +proc convert*[T: SomeInteger](unitFrom, unitTo: FixedTimeUnit, quantity: T): T + {.inline.} = + ## Convert a quantity of some duration unit to another duration unit. + ## This proc only deals with integers, so the result might be truncated. + runnableExamples: + doAssert convert(Days, Hours, 2) == 48 + doAssert convert(Days, Weeks, 13) == 1 # Truncated + doAssert convert(Seconds, Milliseconds, -1) == -1000 + if unitFrom < unitTo: + (quantity div (unitWeights[unitTo] div unitWeights[unitFrom])).T else: - type TimeImpl {.importc: "time_t", header: "<time.h>".} = int32 - - type - Time* = distinct TimeImpl - -elif defined(JS): - type - Time* {.importc.} = object - getDay: proc (): int {.tags: [], raises: [], benign.} - getFullYear: proc (): int {.tags: [], raises: [], benign.} - getHours: proc (): int {.tags: [], raises: [], benign.} - getMilliseconds: proc (): int {.tags: [], raises: [], benign.} - getMinutes: proc (): int {.tags: [], raises: [], benign.} - getMonth: proc (): int {.tags: [], raises: [], benign.} - getSeconds: proc (): int {.tags: [], raises: [], benign.} - getTime: proc (): int {.tags: [], raises: [], benign.} - getTimezoneOffset: proc (): int {.tags: [], raises: [], benign.} - getDate: proc (): int {.tags: [], raises: [], benign.} - getUTCDate: proc (): int {.tags: [], raises: [], benign.} - getUTCFullYear: proc (): int {.tags: [], raises: [], benign.} - getUTCHours: proc (): int {.tags: [], raises: [], benign.} - getUTCMilliseconds: proc (): int {.tags: [], raises: [], benign.} - getUTCMinutes: proc (): int {.tags: [], raises: [], benign.} - getUTCMonth: proc (): int {.tags: [], raises: [], benign.} - getUTCSeconds: proc (): int {.tags: [], raises: [], benign.} - getUTCDay: proc (): int {.tags: [], raises: [], benign.} - getYear: proc (): int {.tags: [], raises: [], benign.} - parse: proc (s: cstring): Time {.tags: [], raises: [], benign.} - setDate: proc (x: int) {.tags: [], raises: [], benign.} - setFullYear: proc (x: int) {.tags: [], raises: [], benign.} - setHours: proc (x: int) {.tags: [], raises: [], benign.} - setMilliseconds: proc (x: int) {.tags: [], raises: [], benign.} - setMinutes: proc (x: int) {.tags: [], raises: [], benign.} - setMonth: proc (x: int) {.tags: [], raises: [], benign.} - setSeconds: proc (x: int) {.tags: [], raises: [], benign.} - setTime: proc (x: int) {.tags: [], raises: [], benign.} - setUTCDate: proc (x: int) {.tags: [], raises: [], benign.} - setUTCFullYear: proc (x: int) {.tags: [], raises: [], benign.} - setUTCHours: proc (x: int) {.tags: [], raises: [], benign.} - setUTCMilliseconds: proc (x: int) {.tags: [], raises: [], benign.} - setUTCMinutes: proc (x: int) {.tags: [], raises: [], benign.} - setUTCMonth: proc (x: int) {.tags: [], raises: [], benign.} - setUTCSeconds: proc (x: int) {.tags: [], raises: [], benign.} - setYear: proc (x: int) {.tags: [], raises: [], benign.} - toGMTString: proc (): cstring {.tags: [], raises: [], benign.} - toLocaleString: proc (): cstring {.tags: [], raises: [], benign.} - -type - TimeInfo* = object of RootObj ## represents a time in different parts - second*: range[0..61] ## The number of seconds after the minute, - ## normally in the range 0 to 59, but can - ## be up to 61 to allow for leap seconds. - minute*: range[0..59] ## The number of minutes after the hour, - ## in the range 0 to 59. - hour*: range[0..23] ## The number of hours past midnight, - ## in the range 0 to 23. - monthday*: range[1..31] ## The day of the month, in the range 1 to 31. - month*: Month ## The current month. - year*: range[-10_000..10_000] ## The current year. - weekday*: WeekDay ## The current day of the week. - yearday*: range[0..365] ## The number of days since January 1, - ## in the range 0 to 365. - ## Always 0 if the target is JS. - isDST*: bool ## Determines whether DST is in effect. Always - ## ``False`` if time is UTC. - tzname*: string ## The timezone this time is in. E.g. GMT - timezone*: int ## The offset of the (non-DST) timezone in seconds - ## west of UTC. - - ## I make some assumptions about the data in here. Either - ## everything should be positive or everything negative. Zero is - ## fine too. Mixed signs will lead to unexpected results. - TimeInterval* = object ## a time interval - miliseconds*: int ## The number of miliseconds - seconds*: int ## The number of seconds - minutes*: int ## The number of minutes - hours*: int ## The number of hours - days*: int ## The number of days - months*: int ## The number of months - years*: int ## The number of years - -{.deprecated: [TMonth: Month, TWeekDay: WeekDay, TTime: Time, - TTimeInterval: TimeInterval, TTimeInfo: TimeInfo].} - -proc getTime*(): Time {.tags: [TimeEffect], benign.} - ## gets the current calendar time as a UNIX epoch value (number of seconds - ## elapsed since 1970) with integer precission. Use epochTime for higher - ## resolution. -proc getLocalTime*(t: Time): TimeInfo {.tags: [TimeEffect], raises: [], benign.} - ## converts the calendar time `t` to broken-time representation, - ## expressed relative to the user's specified time zone. -proc getGMTime*(t: Time): TimeInfo {.tags: [TimeEffect], raises: [], benign.} - ## converts the calendar time `t` to broken-down time representation, - ## expressed in Coordinated Universal Time (UTC). - -proc timeInfoToTime*(timeInfo: TimeInfo): Time {.tags: [], benign.} - ## converts a broken-down time structure to - ## calendar time representation. The function ignores the specified - ## contents of the structure members `weekday` and `yearday` and recomputes - ## them from the other information in the broken-down time structure. - -proc fromSeconds*(since1970: float): Time {.tags: [], raises: [], benign.} - ## Takes a float which contains the number of seconds since the unix epoch and - ## returns a time object. - -proc fromSeconds*(since1970: int64): Time {.tags: [], raises: [], benign.} = - ## Takes an int which contains the number of seconds since the unix epoch and - ## returns a time object. - fromSeconds(float(since1970)) - -proc toSeconds*(time: Time): float {.tags: [], raises: [], benign.} - ## Returns the time in seconds since the unix epoch. - -proc `$` *(timeInfo: TimeInfo): string {.tags: [], raises: [], benign.} - ## converts a `TimeInfo` object to a string representation. -proc `$` *(time: Time): string {.tags: [], raises: [], benign.} - ## converts a calendar time to a string representation. - -proc `-`*(a, b: Time): int64 {. - rtl, extern: "ntDiffTime", tags: [], raises: [], benign.} - ## computes the difference of two calendar times. Result is in seconds. - -proc `<`*(a, b: Time): bool {. - rtl, extern: "ntLtTime", tags: [], raises: [].} = - ## returns true iff ``a < b``, that is iff a happened before b. - result = a - b < 0 - -proc `<=` * (a, b: Time): bool {. - rtl, extern: "ntLeTime", tags: [], raises: [].}= - ## returns true iff ``a <= b``. - result = a - b <= 0 - -proc `==`*(a, b: Time): bool {. - rtl, extern: "ntEqTime", tags: [], raises: [].} = - ## returns true if ``a == b``, that is if both times represent the same value - result = a - b == 0 - -when not defined(JS): - proc getTzname*(): tuple[nonDST, DST: string] {.tags: [TimeEffect], raises: [], - benign.} - ## returns the local timezone; ``nonDST`` is the name of the local non-DST - ## timezone, ``DST`` is the name of the local DST timezone. - -proc getTimezone*(): int {.tags: [TimeEffect], raises: [], benign.} - ## returns the offset of the local (non-DST) timezone in seconds west of UTC. - -proc getStartMilsecs*(): int {.deprecated, tags: [TimeEffect], benign.} - ## get the miliseconds from the start of the program. **Deprecated since - ## version 0.8.10.** Use ``epochTime`` or ``cpuTime`` instead. - -proc initInterval*(miliseconds, seconds, minutes, hours, days, months, - years: int = 0): TimeInterval = - ## creates a new ``TimeInterval``. - result.miliseconds = miliseconds - result.seconds = seconds - result.minutes = minutes - result.hours = hours - result.days = days - result.months = months - result.years = years + ((unitWeights[unitFrom] div unitWeights[unitTo]) * quantity).T + +proc normalize[T: Duration|Time](seconds, nanoseconds: int64): T = + ## Normalize a (seconds, nanoseconds) pair and return it as either + ## a `Duration` or `Time`. A normalized `Duration|Time` has a + ## positive nanosecond part in the range `NanosecondRange`. + result.seconds = seconds + convert(Nanoseconds, Seconds, nanoseconds) + var nanosecond = nanoseconds mod convert(Seconds, Nanoseconds, 1) + if nanosecond < 0: + nanosecond += convert(Seconds, Nanoseconds, 1) + result.seconds -= 1 + result.nanosecond = nanosecond.int proc isLeapYear*(year: int): bool = - ## returns true if ``year`` is a leap year - - if year mod 400 == 0: - return true - elif year mod 100 == 0: - return false - elif year mod 4 == 0: - return true - else: - return false + ## Returns true if `year` is a leap year. + runnableExamples: + doAssert isLeapYear(2000) + doAssert not isLeapYear(1900) + year mod 4 == 0 and (year mod 100 != 0 or year mod 400 == 0) proc getDaysInMonth*(month: Month, year: int): int = - ## gets the amount of days in a ``month`` of a ``year`` - + ## Get the number of days in `month` of `year`. # http://www.dispersiondesign.com/articles/time/number_of_days_in_a_month + runnableExamples: + doAssert getDaysInMonth(mFeb, 2000) == 29 + doAssert getDaysInMonth(mFeb, 2001) == 28 case month of mFeb: result = if isLeapYear(year): 29 else: 28 of mApr, mJun, mSep, mNov: result = 30 else: result = 31 -proc toSeconds(a: TimeInfo, interval: TimeInterval): float = - ## Calculates how many seconds the interval is worth by adding up - ## all the fields - - var anew = a - var newinterv = interval - result = 0 - - newinterv.months += interval.years * 12 - var curMonth = anew.month - for mth in 1 .. newinterv.months: - result += float(getDaysInMonth(curMonth, anew.year) * 24 * 60 * 60) - if curMonth == mDec: - curMonth = mJan - anew.year.inc() - else: - curMonth.inc() - result += float(newinterv.days * 24 * 60 * 60) - result += float(newinterv.hours * 60 * 60) - result += float(newinterv.minutes * 60) - result += float(newinterv.seconds) - result += newinterv.miliseconds / 1000 - -proc `+`*(a: TimeInfo, interval: TimeInterval): TimeInfo = - ## adds ``interval`` time. +proc assertValidDate(monthday: MonthdayRange, month: Month, year: int) + {.inline.} = + assert monthday <= getDaysInMonth(month, year), + $year & "-" & intToStr(ord(month), 2) & "-" & $monthday & + " is not a valid date" + +proc toEpochDay(monthday: MonthdayRange, month: Month, year: int): int64 = + ## Get the epoch day from a year/month/day date. + ## The epoch day is the number of days since 1970/01/01 + ## (it might be negative). + # Based on http://howardhinnant.github.io/date_algorithms.html + assertValidDate monthday, month, year + var (y, m, d) = (year, ord(month), monthday.int) + if m <= 2: + y.dec + + let era = (if y >= 0: y else: y-399) div 400 + let yoe = y - era * 400 + let doy = (153 * (m + (if m > 2: -3 else: 9)) + 2) div 5 + d-1 + let doe = yoe * 365 + yoe div 4 - yoe div 100 + doy + return era * 146097 + doe - 719468 + +proc fromEpochDay(epochday: int64): + tuple[monthday: MonthdayRange, month: Month, year: int] = + ## Get the year/month/day date from a epoch day. + ## The epoch day is the number of days since 1970/01/01 + ## (it might be negative). + # Based on http://howardhinnant.github.io/date_algorithms.html + var z = epochday + z.inc 719468 + let era = (if z >= 0: z else: z - 146096) div 146097 + let doe = z - era * 146097 + let yoe = (doe - doe div 1460 + doe div 36524 - doe div 146096) div 365 + let y = yoe + era * 400; + let doy = doe - (365 * yoe + yoe div 4 - yoe div 100) + let mp = (5 * doy + 2) div 153 + let d = doy - (153 * mp + 2) div 5 + 1 + let m = mp + (if mp < 10: 3 else: -9) + return (d.MonthdayRange, m.Month, (y + ord(m <= 2)).int) + +proc getDayOfYear*(monthday: MonthdayRange, month: Month, year: int): + YeardayRange {.tags: [], raises: [], benign.} = + ## Returns the day of the year. + ## Equivalent with `dateTime(year, month, monthday, 0, 0, 0, 0).yearday`. + runnableExamples: + doAssert getDayOfYear(1, mJan, 2000) == 0 + doAssert getDayOfYear(10, mJan, 2000) == 9 + doAssert getDayOfYear(10, mFeb, 2000) == 40 + + assertValidDate monthday, month, year + const daysUntilMonth: array[Month, int] = + [0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334] + const daysUntilMonthLeap: array[Month, int] = + [0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335] + + if isLeapYear(year): + result = daysUntilMonthLeap[month] + monthday - 1 + else: + result = daysUntilMonth[month] + monthday - 1 + +proc getDayOfWeek*(monthday: MonthdayRange, month: Month, year: int): WeekDay + {.tags: [], raises: [], benign.} = + ## Returns the day of the week enum from day, month and year. + ## Equivalent with `dateTime(year, month, monthday, 0, 0, 0, 0).weekday`. + runnableExamples: + doAssert getDayOfWeek(13, mJun, 1990) == dWed + doAssert $getDayOfWeek(13, mJun, 1990) == "Wednesday" + + assertValidDate monthday, month, year + # 1970-01-01 is a Thursday, we adjust to the previous Monday + let days = toEpochDay(monthday, month, year) - 3 + let weeks = floorDiv(days, 7'i64) + let wd = days - weeks * 7 + # The value of d is 0 for a Sunday, 1 for a Monday, 2 for a Tuesday, etc. + # so we must correct for the WeekDay type. + result = if wd == 0: dSun else: WeekDay(wd - 1) + +proc getDaysInYear*(year: int): int = + ## Get the number of days in a `year` + runnableExamples: + doAssert getDaysInYear(2000) == 366 + doAssert getDaysInYear(2001) == 365 + result = 365 + (if isLeapYear(year): 1 else: 0) + +proc `==`*(a, b: IsoYear): bool {.borrow.} +proc `$`*(p: IsoYear): string {.borrow.} + +proc getWeeksInIsoYear*(y: IsoYear): IsoWeekRange {.since: (1, 5).} = + ## Returns the number of weeks in the specified ISO 8601 week-based year, which can be + ## either 53 or 52. + runnableExamples: + assert getWeeksInIsoYear(IsoYear(2019)) == 52 + assert getWeeksInIsoYear(IsoYear(2020)) == 53 + + var y = int(y) + + # support negative years + y = if y < 0: 400 + y mod 400 else: y + + # source: https://webspace.science.uu.nl/~gent0113/calendar/isocalendar.htm + let p = (y + (y div 4) - (y div 100) + (y div 400)) mod 7 + let y1 = y - 1 + let p1 = (y1 + (y1 div 4) - (y1 div 100) + (y1 div 400)) mod 7 + if p == 4 or p1 == 3: 53 else: 52 + +proc getIsoWeekAndYear*(dt: DateTime): + tuple[isoweek: IsoWeekRange, isoyear: IsoYear] {.since: (1, 5).} = + ## Returns the ISO 8601 week and year. ## - ## **Note:** This has been only briefly tested and it may not be - ## very accurate. - let t = toSeconds(timeInfoToTime(a)) - let secs = toSeconds(a, interval) - if a.tzname == "UTC": - result = getGMTime(fromSeconds(t + secs)) + ## .. warning:: The ISO week-based year can correspond to the following or previous year from 29 December to January 3. + runnableExamples: + assert getIsoWeekAndYear(initDateTime(21, mApr, 2018, 00, 00, 00)) == (isoweek: 16.IsoWeekRange, isoyear: 2018.IsoYear) + block: + let (w, y) = getIsoWeekAndYear(initDateTime(30, mDec, 2019, 00, 00, 00)) + assert w == 01.IsoWeekRange + assert y == 2020.IsoYear + assert getIsoWeekAndYear(initDateTime(13, mSep, 2020, 00, 00, 00)) == (isoweek: 37.IsoWeekRange, isoyear: 2020.IsoYear) + block: + let (w, y) = getIsoWeekAndYear(initDateTime(2, mJan, 2021, 00, 00, 00)) + assert w.int > 52 + assert w.int < 54 + assert y.int mod 100 == 20 + + # source: https://webspace.science.uu.nl/~gent0113/calendar/isocalendar.htm + var w = (dt.yearday.int - dt.weekday.int + 10) div 7 + if w < 1: + (isoweek: getWeeksInIsoYear(IsoYear(dt.year - 1)), isoyear: IsoYear(dt.year - 1)) + elif (w > getWeeksInIsoYear(IsoYear(dt.year))): + (isoweek: IsoWeekRange(1), isoyear: IsoYear(dt.year + 1)) else: - result = getLocalTime(fromSeconds(t + secs)) + (isoweek: IsoWeekRange(w), isoyear: IsoYear(dt.year)) -proc `-`*(a: TimeInfo, interval: TimeInterval): TimeInfo = - ## subtracts ``interval`` time. - ## - ## **Note:** This has been only briefly tested, it is inaccurate especially - ## when you subtract so much that you reach the Julian calendar. - let t = toSeconds(timeInfoToTime(a)) - let secs = toSeconds(a, interval) - if a.tzname == "UTC": - result = getGMTime(fromSeconds(t - secs)) +proc stringifyUnit(value: int | int64, unit: TimeUnit): string = + ## Stringify time unit with it's name, lowercased + let strUnit = $unit + result = "" + result.addInt value + result.add ' ' + if abs(value) != 1: + result.add(strUnit.toLowerAscii()) else: - result = getLocalTime(fromSeconds(t - secs)) + result.add(strUnit[0..^2].toLowerAscii()) -when not defined(JS): - proc epochTime*(): float {.rtl, extern: "nt$1", tags: [TimeEffect].} - ## gets time after the UNIX epoch (1970) in seconds. It is a float - ## because sub-second resolution is likely to be supported (depending - ## on the hardware/OS). +proc humanizeParts(parts: seq[string]): string = + ## Make date string parts human-readable + result = "" + if parts.len == 0: + result.add "0 nanoseconds" + elif parts.len == 1: + result = parts[0] + elif parts.len == 2: + result = parts[0] & " and " & parts[1] + else: + for i in 0..high(parts)-1: + result.add parts[i] & ", " + result.add "and " & parts[high(parts)] - proc cpuTime*(): float {.rtl, extern: "nt$1", tags: [TimeEffect].} - ## gets time spent that the CPU spent to run the current process in - ## seconds. This may be more useful for benchmarking than ``epochTime``. - ## However, it may measure the real time instead (depending on the OS). - ## The value of the result has no meaning. - ## To generate useful timing values, take the difference between - ## the results of two ``cpuTime`` calls: - ## - ## .. code-block:: nim - ## var t0 = cpuTime() - ## doWork() - ## echo "CPU time [s] ", cpuTime() - t0 +template subImpl[T: Duration|Time](a: Duration|Time, b: Duration|Time): T = + normalize[T](a.seconds - b.seconds, a.nanosecond - b.nanosecond) -when not defined(JS): - # C wrapper: - type - StructTM {.importc: "struct tm", final.} = object - second {.importc: "tm_sec".}, - minute {.importc: "tm_min".}, - hour {.importc: "tm_hour".}, - monthday {.importc: "tm_mday".}, - month {.importc: "tm_mon".}, - year {.importc: "tm_year".}, - weekday {.importc: "tm_wday".}, - yearday {.importc: "tm_yday".}, - isdst {.importc: "tm_isdst".}: cint - - TimeInfoPtr = ptr StructTM - Clock {.importc: "clock_t".} = distinct int +template addImpl[T: Duration|Time](a: Duration|Time, b: Duration|Time): T = + normalize[T](a.seconds + b.seconds, a.nanosecond + b.nanosecond) - proc localtime(timer: ptr Time): TimeInfoPtr {. - importc: "localtime", header: "<time.h>", tags: [].} - proc gmtime(timer: ptr Time): TimeInfoPtr {. - importc: "gmtime", header: "<time.h>", tags: [].} - proc timec(timer: ptr Time): Time {. - importc: "time", header: "<time.h>", tags: [].} - proc mktime(t: StructTM): Time {. - importc: "mktime", header: "<time.h>", tags: [].} - proc asctime(tblock: StructTM): cstring {. - importc: "asctime", header: "<time.h>", tags: [].} - proc ctime(time: ptr Time): cstring {. - importc: "ctime", header: "<time.h>", tags: [].} - # strftime(s: CString, maxsize: int, fmt: CString, t: tm): int {. - # importc: "strftime", header: "<time.h>".} - proc getClock(): Clock {.importc: "clock", header: "<time.h>", tags: [TimeEffect].} - proc difftime(a, b: Time): float {.importc: "difftime", header: "<time.h>", - tags: [].} +template ltImpl(a: Duration|Time, b: Duration|Time): bool = + a.seconds < b.seconds or ( + a.seconds == b.seconds and a.nanosecond < b.nanosecond) - var - clocksPerSec {.importc: "CLOCKS_PER_SEC", nodecl.}: int - - # our own procs on top of that: - proc tmToTimeInfo(tm: StructTM, local: bool): TimeInfo = - const - weekDays: array [0..6, WeekDay] = [ - dSun, dMon, dTue, dWed, dThu, dFri, dSat] - TimeInfo(second: int(tm.second), - minute: int(tm.minute), - hour: int(tm.hour), - monthday: int(tm.monthday), - month: Month(tm.month), - year: tm.year + 1900'i32, - weekday: weekDays[int(tm.weekday)], - yearday: int(tm.yearday), - isDST: tm.isdst > 0, - tzname: if local: - if tm.isdst > 0: - getTzname().DST - else: - getTzname().nonDST - else: - "UTC", - timezone: if local: getTimezone() else: 0 - ) +template lqImpl(a: Duration|Time, b: Duration|Time): bool = + a.seconds < b.seconds or ( + a.seconds == b.seconds and a.nanosecond <= b.nanosecond) - proc timeInfoToTM(t: TimeInfo): StructTM = - const - weekDays: array [WeekDay, int8] = [1'i8,2'i8,3'i8,4'i8,5'i8,6'i8,0'i8] - result.second = t.second - result.minute = t.minute - result.hour = t.hour - result.monthday = t.monthday - result.month = ord(t.month) - result.year = t.year - 1900 - result.weekday = weekDays[t.weekday] - result.yearday = t.yearday - result.isdst = if t.isDST: 1 else: 0 - - when not defined(useNimRtl): - proc `-` (a, b: Time): int64 = - return toBiggestInt(difftime(a, b)) - - proc getStartMilsecs(): int = - #echo "clocks per sec: ", clocksPerSec, "clock: ", int(getClock()) - #return getClock() div (clocksPerSec div 1000) - when defined(macosx): - result = toInt(toFloat(int(getClock())) / (toFloat(clocksPerSec) / 1000.0)) - else: - result = int(getClock()) div (clocksPerSec div 1000) - when false: - var a: Timeval - posix_gettimeofday(a) - result = a.tv_sec * 1000'i64 + a.tv_usec div 1000'i64 - #echo "result: ", result - - proc getTime(): Time = return timec(nil) - proc getLocalTime(t: Time): TimeInfo = - var a = t - result = tmToTimeInfo(localtime(addr(a))[], true) - # copying is needed anyway to provide reentrancity; thus - # the conversion is not expensive - - proc getGMTime(t: Time): TimeInfo = - var a = t - result = tmToTimeInfo(gmtime(addr(a))[], false) - # copying is needed anyway to provide reentrancity; thus - # the conversion is not expensive - - proc timeInfoToTime(timeInfo: TimeInfo): Time = - var cTimeInfo = timeInfo # for C++ we have to make a copy, - # because the header of mktime is broken in my version of libc - return mktime(timeInfoToTM(cTimeInfo)) - - proc toStringTillNL(p: cstring): string = - result = "" - var i = 0 - while p[i] != '\0' and p[i] != '\10' and p[i] != '\13': - add(result, p[i]) - inc(i) +template eqImpl(a: Duration|Time, b: Duration|Time): bool = + a.seconds == b.seconds and a.nanosecond == b.nanosecond - proc `$`(timeInfo: TimeInfo): string = - # BUGFIX: asctime returns a newline at the end! - var p = asctime(timeInfoToTM(timeInfo)) - result = toStringTillNL(p) - - proc `$`(time: Time): string = - # BUGFIX: ctime returns a newline at the end! - var a = time - return toStringTillNL(ctime(addr(a))) - - const - epochDiff = 116444736000000000'i64 - rateDiff = 10000000'i64 # 100 nsecs - - proc unixTimeToWinTime*(t: Time): int64 = - ## converts a UNIX `Time` (``time_t``) to a Windows file time - result = int64(t) * rateDiff + epochDiff - - proc winTimeToUnixTime*(t: int64): Time = - ## converts a Windows time to a UNIX `Time` (``time_t``) - result = Time((t - epochDiff) div rateDiff) - - proc getTzname(): tuple[nonDST, DST: string] = - return ($tzname[0], $tzname[1]) - - proc getTimezone(): int = - return timezone - - proc fromSeconds(since1970: float): Time = Time(since1970) - - proc toSeconds(time: Time): float = float(time) - - when not defined(useNimRtl): - proc epochTime(): float = - when defined(posix): - var a: Timeval - posix_gettimeofday(a) - result = toFloat(a.tv_sec) + toFloat(a.tv_usec)*0.00_0001 - elif defined(windows): - var f: winlean.TFILETIME - getSystemTimeAsFileTime(f) - var i64 = rdFileTime(f) - epochDiff - var secs = i64 div rateDiff - var subsecs = i64 mod rateDiff - result = toFloat(int(secs)) + toFloat(int(subsecs)) * 0.0000001 - else: - {.error: "unknown OS".} +# +# Duration +# - proc cpuTime(): float = - result = toFloat(int(getClock())) / toFloat(clocksPerSec) +const DurationZero* = Duration() ## \ + ## Zero value for durations. Useful for comparisons. + ## ```nim + ## doAssert initDuration(seconds = 1) > DurationZero + ## doAssert initDuration(seconds = 0) == DurationZero + ## ``` + +proc initDuration*(nanoseconds, microseconds, milliseconds, + seconds, minutes, hours, days, weeks: int64 = 0): Duration = + ## Create a new `Duration <#Duration>`_. + runnableExamples: + let dur = initDuration(seconds = 1, milliseconds = 1) + doAssert dur.inMilliseconds == 1001 + doAssert dur.inSeconds == 1 + + let seconds = convert(Weeks, Seconds, weeks) + + convert(Days, Seconds, days) + + convert(Minutes, Seconds, minutes) + + convert(Hours, Seconds, hours) + + convert(Seconds, Seconds, seconds) + + convert(Milliseconds, Seconds, milliseconds) + + convert(Microseconds, Seconds, microseconds) + + convert(Nanoseconds, Seconds, nanoseconds) + let nanoseconds = (convert(Milliseconds, Nanoseconds, milliseconds mod 1000) + + convert(Microseconds, Nanoseconds, microseconds mod 1_000_000) + + nanoseconds mod 1_000_000_000).int + # Nanoseconds might be negative so we must normalize. + result = normalize[Duration](seconds, nanoseconds) + +template convert(dur: Duration, unit: static[FixedTimeUnit]): int64 = + # The correction is required due to how durations are normalized. + # For example,` initDuration(nanoseconds = -1)` is stored as + # { seconds = -1, nanoseconds = 999999999 }. + when unit == Nanoseconds: + dur.seconds * 1_000_000_000 + dur.nanosecond + else: + let correction = dur.seconds < 0 and dur.nanosecond > 0 + when unit >= Seconds: + convert(Seconds, unit, dur.seconds + ord(correction)) + else: + if correction: + convert(Seconds, unit, dur.seconds + 1) - + convert(Nanoseconds, unit, + convert(Seconds, Nanoseconds, 1) - dur.nanosecond) + else: + convert(Seconds, unit, dur.seconds) + + convert(Nanoseconds, unit, dur.nanosecond) + +proc inWeeks*(dur: Duration): int64 = + ## Converts the duration to the number of whole weeks. + runnableExamples: + let dur = initDuration(days = 8) + doAssert dur.inWeeks == 1 + dur.convert(Weeks) + +proc inDays*(dur: Duration): int64 = + ## Converts the duration to the number of whole days. + runnableExamples: + let dur = initDuration(hours = -50) + doAssert dur.inDays == -2 + dur.convert(Days) + +proc inHours*(dur: Duration): int64 = + ## Converts the duration to the number of whole hours. + runnableExamples: + let dur = initDuration(minutes = 60, days = 2) + doAssert dur.inHours == 49 + dur.convert(Hours) + +proc inMinutes*(dur: Duration): int64 = + ## Converts the duration to the number of whole minutes. + runnableExamples: + let dur = initDuration(hours = 2, seconds = 10) + doAssert dur.inMinutes == 120 + dur.convert(Minutes) + +proc inSeconds*(dur: Duration): int64 = + ## Converts the duration to the number of whole seconds. + runnableExamples: + let dur = initDuration(hours = 2, milliseconds = 10) + doAssert dur.inSeconds == 2 * 60 * 60 + dur.convert(Seconds) + +proc inMilliseconds*(dur: Duration): int64 = + ## Converts the duration to the number of whole milliseconds. + runnableExamples: + let dur = initDuration(seconds = -2) + doAssert dur.inMilliseconds == -2000 + dur.convert(Milliseconds) + +proc inMicroseconds*(dur: Duration): int64 = + ## Converts the duration to the number of whole microseconds. + runnableExamples: + let dur = initDuration(seconds = -2) + doAssert dur.inMicroseconds == -2000000 + dur.convert(Microseconds) + +proc inNanoseconds*(dur: Duration): int64 = + ## Converts the duration to the number of whole nanoseconds. + runnableExamples: + let dur = initDuration(seconds = -2) + doAssert dur.inNanoseconds == -2000000000 + dur.convert(Nanoseconds) + +proc toParts*(dur: Duration): DurationParts = + ## Converts a duration into an array consisting of fixed time units. + ## + ## Each value in the array gives information about a specific unit of + ## time, for example `result[Days]` gives a count of days. + ## + ## This procedure is useful for converting `Duration` values to strings. + runnableExamples: + var dp = toParts(initDuration(weeks = 2, days = 1)) + doAssert dp[Days] == 1 + doAssert dp[Weeks] == 2 + doAssert dp[Minutes] == 0 + dp = toParts(initDuration(days = -1)) + doAssert dp[Days] == -1 + + var remS = dur.seconds + var remNs = dur.nanosecond.int + + # Ensure the same sign for seconds and nanoseconds + if remS < 0 and remNs != 0: + remNs -= convert(Seconds, Nanoseconds, 1) + remS.inc 1 + + for unit in countdown(Weeks, Seconds): + let quantity = convert(Seconds, unit, remS) + remS = remS mod convert(unit, Seconds, 1) + + result[unit] = quantity + + for unit in countdown(Milliseconds, Nanoseconds): + let quantity = convert(Nanoseconds, unit, remNs) + remNs = remNs mod convert(unit, Nanoseconds, 1) + + result[unit] = quantity + +proc `$`*(dur: Duration): string = + ## Human friendly string representation of a `Duration`. + runnableExamples: + doAssert $initDuration(seconds = 2) == "2 seconds" + doAssert $initDuration(weeks = 1, days = 2) == "1 week and 2 days" + doAssert $initDuration(hours = 1, minutes = 2, seconds = 3) == + "1 hour, 2 minutes, and 3 seconds" + doAssert $initDuration(milliseconds = -1500) == + "-1 second and -500 milliseconds" + var parts = newSeq[string]() + var numParts = toParts(dur) + + for unit in countdown(Weeks, Nanoseconds): + let quantity = numParts[unit] + if quantity != 0.int64: + parts.add(stringifyUnit(quantity, unit)) + + result = humanizeParts(parts) + +proc `+`*(a, b: Duration): Duration {.operator, extern: "ntAddDuration".} = + ## Add two durations together. + runnableExamples: + doAssert initDuration(seconds = 1) + initDuration(days = 1) == + initDuration(seconds = 1, days = 1) + addImpl[Duration](a, b) + +proc `-`*(a, b: Duration): Duration {.operator, extern: "ntSubDuration".} = + ## Subtract a duration from another. + runnableExamples: + doAssert initDuration(seconds = 1, days = 1) - initDuration(seconds = 1) == + initDuration(days = 1) + subImpl[Duration](a, b) + +proc `-`*(a: Duration): Duration {.operator, extern: "ntReverseDuration".} = + ## Reverse a duration. + runnableExamples: + doAssert -initDuration(seconds = 1) == initDuration(seconds = -1) + normalize[Duration](-a.seconds, -a.nanosecond) + +proc `<`*(a, b: Duration): bool {.operator, extern: "ntLtDuration".} = + ## Note that a duration can be negative, + ## so even if `a < b` is true `a` might + ## represent a larger absolute duration. + ## Use `abs(a) < abs(b)` to compare the absolute + ## duration. + runnableExamples: + doAssert initDuration(seconds = 1) < initDuration(seconds = 2) + doAssert initDuration(seconds = -2) < initDuration(seconds = 1) + doAssert initDuration(seconds = -2).abs < initDuration(seconds = 1).abs == false + ltImpl(a, b) + +proc `<=`*(a, b: Duration): bool {.operator, extern: "ntLeDuration".} = + lqImpl(a, b) + +proc `==`*(a, b: Duration): bool {.operator, extern: "ntEqDuration".} = + runnableExamples: + let + d1 = initDuration(weeks = 1) + d2 = initDuration(days = 7) + doAssert d1 == d2 + eqImpl(a, b) + +proc `*`*(a: int64, b: Duration): Duration {.operator, + extern: "ntMulInt64Duration".} = + ## Multiply a duration by some scalar. + runnableExamples: + doAssert 5 * initDuration(seconds = 1) == initDuration(seconds = 5) + doAssert 3 * initDuration(minutes = 45) == initDuration(hours = 2, minutes = 15) + normalize[Duration](a * b.seconds, a * b.nanosecond) + +proc `*`*(a: Duration, b: int64): Duration {.operator, + extern: "ntMulDuration".} = + ## Multiply a duration by some scalar. + runnableExamples: + doAssert initDuration(seconds = 1) * 5 == initDuration(seconds = 5) + doAssert initDuration(minutes = 45) * 3 == initDuration(hours = 2, minutes = 15) + b * a + +proc `+=`*(d1: var Duration, d2: Duration) = + d1 = d1 + d2 + +proc `-=`*(dt: var Duration, ti: Duration) = + dt = dt - ti + +proc `*=`*(a: var Duration, b: int) = + a = a * b + +proc `div`*(a: Duration, b: int64): Duration {.operator, + extern: "ntDivDuration".} = + ## Integer division for durations. + runnableExamples: + doAssert initDuration(seconds = 3) div 2 == + initDuration(milliseconds = 1500) + doAssert initDuration(minutes = 45) div 30 == + initDuration(minutes = 1, seconds = 30) + doAssert initDuration(nanoseconds = 3) div 2 == + initDuration(nanoseconds = 1) + let carryOver = convert(Seconds, Nanoseconds, a.seconds mod b) + normalize[Duration](a.seconds div b, (a.nanosecond + carryOver) div b) + +proc high*(typ: typedesc[Duration]): Duration = + ## Get the longest representable duration. + initDuration(seconds = high(int64), nanoseconds = high(NanosecondRange)) + +proc low*(typ: typedesc[Duration]): Duration = + ## Get the longest representable duration of negative direction. + initDuration(seconds = low(int64)) + +proc abs*(a: Duration): Duration = + runnableExamples: + doAssert initDuration(milliseconds = -1500).abs == + initDuration(milliseconds = 1500) + initDuration(seconds = abs(a.seconds), nanoseconds = -a.nanosecond) -elif defined(JS): - proc newDate(): Time {.importc: "new Date".} - proc internGetTime(): Time {.importc: "new Date", tags: [].} - - proc newDate(value: float): Time {.importc: "new Date".} - proc newDate(value: string): Time {.importc: "new Date".} - proc getTime(): Time = - # Warning: This is something different in JS. - return newDate() - - const - weekDays: array [0..6, WeekDay] = [ - dSun, dMon, dTue, dWed, dThu, dFri, dSat] - - proc getLocalTime(t: Time): TimeInfo = - result.second = t.getSeconds() - result.minute = t.getMinutes() - result.hour = t.getHours() - result.monthday = t.getDate() - result.month = Month(t.getMonth()) - result.year = t.getFullYear() - result.weekday = weekDays[t.getDay()] - result.yearday = 0 - - proc getGMTime(t: Time): TimeInfo = - result.second = t.getUTCSeconds() - result.minute = t.getUTCMinutes() - result.hour = t.getUTCHours() - result.monthday = t.getUTCDate() - result.month = Month(t.getUTCMonth()) - result.year = t.getUTCFullYear() - result.weekday = weekDays[t.getUTCDay()] - result.yearday = 0 - - proc timeInfoToTime*(timeInfo: TimeInfo): Time = - result = internGetTime() - result.setSeconds(timeInfo.second) - result.setMinutes(timeInfo.minute) - result.setHours(timeInfo.hour) - result.setMonth(ord(timeInfo.month)) - result.setFullYear(timeInfo.year) - result.setDate(timeInfo.monthday) - - proc `$`(timeInfo: TimeInfo): string = return $(timeInfoToTime(timeInfo)) - proc `$`(time: Time): string = return $time.toLocaleString() - - proc `-` (a, b: Time): int64 = - return a.getTime() - b.getTime() +# +# Time +# - var - startMilsecs = getTime() +proc initTime*(unix: int64, nanosecond: NanosecondRange): Time = + ## Create a `Time <#Time>`_ from a unix timestamp and a nanosecond part. + result.seconds = unix + result.nanosecond = nanosecond + +proc nanosecond*(time: Time): NanosecondRange = + ## Get the fractional part of a `Time` as the number + ## of nanoseconds of the second. + time.nanosecond + +proc fromUnix*(unix: int64): Time + {.benign, tags: [], raises: [], noSideEffect.} = + ## Convert a unix timestamp (seconds since `1970-01-01T00:00:00Z`) + ## to a `Time`. + runnableExamples: + doAssert $fromUnix(0).utc == "1970-01-01T00:00:00Z" + initTime(unix, 0) + +proc toUnix*(t: Time): int64 {.benign, tags: [], raises: [], noSideEffect.} = + ## Convert `t` to a unix timestamp (seconds since `1970-01-01T00:00:00Z`). + ## See also `toUnixFloat` for subsecond resolution. + runnableExamples: + doAssert fromUnix(0).toUnix() == 0 + t.seconds + +proc fromUnixFloat(seconds: float): Time {.benign, tags: [], raises: [], noSideEffect.} = + ## Convert a unix timestamp in seconds to a `Time`; same as `fromUnix` + ## but with subsecond resolution. + runnableExamples: + doAssert fromUnixFloat(123456.0) == fromUnixFloat(123456) + doAssert fromUnixFloat(-123456.0) == fromUnixFloat(-123456) + let secs = seconds.floor + let nsecs = (seconds - secs) * 1e9 + initTime(secs.int64, nsecs.NanosecondRange) + +proc toUnixFloat(t: Time): float {.benign, tags: [], raises: [].} = + ## Same as `toUnix` but using subsecond resolution. + runnableExamples: + let t = getTime() + # `<` because of rounding errors + doAssert abs(t.toUnixFloat().fromUnixFloat - t) < initDuration(nanoseconds = 1000) + t.seconds.float + t.nanosecond / convert(Seconds, Nanoseconds, 1) + +since((1, 1)): + export fromUnixFloat + export toUnixFloat + + +proc fromWinTime*(win: int64): Time = + ## Convert a Windows file time (100-nanosecond intervals since + ## `1601-01-01T00:00:00Z`) to a `Time`. + const hnsecsPerSec = convert(Seconds, Nanoseconds, 1) div 100 + let nanos = floorMod(win, hnsecsPerSec) * 100 + let seconds = floorDiv(win - epochDiff, hnsecsPerSec) + result = initTime(seconds, nanos) + +proc toWinTime*(t: Time): int64 = + ## Convert `t` to a Windows file time (100-nanosecond intervals + ## since `1601-01-01T00:00:00Z`). + result = t.seconds * rateDiff + epochDiff + t.nanosecond div 100 + +proc getTimeImpl(typ: typedesc[Time]): Time = + discard "implemented in the vm" + +proc getTime*(): Time {.tags: [TimeEffect], benign.} = + ## Gets the current time as a `Time` with up to nanosecond resolution. + when nimvm: + result = getTimeImpl(Time) + else: + when defined(js): + let millis = newDate().getTime() + let seconds = convert(Milliseconds, Seconds, millis) + let nanos = convert(Milliseconds, Nanoseconds, + millis mod convert(Seconds, Milliseconds, 1).int) + result = initTime(seconds, nanos) + elif defined(macosx): + var a {.noinit.}: Timeval + gettimeofday(a) + result = initTime(a.tv_sec.int64, + convert(Microseconds, Nanoseconds, a.tv_usec.int)) + elif defined(posix): + var ts {.noinit.}: Timespec + discard clock_gettime(CLOCK_REALTIME, ts) + result = initTime(ts.tv_sec.int64, ts.tv_nsec.int) + elif defined(windows): + var f {.noinit.}: FILETIME + getSystemTimeAsFileTime(f) + result = fromWinTime(rdFileTime(f)) + +proc `-`*(a, b: Time): Duration {.operator, extern: "ntDiffTime".} = + ## Computes the duration between two points in time. + runnableExamples: + doAssert initTime(1000, 100) - initTime(500, 20) == + initDuration(minutes = 8, seconds = 20, nanoseconds = 80) + subImpl[Duration](a, b) + +proc `+`*(a: Time, b: Duration): Time {.operator, extern: "ntAddTime".} = + ## Add a duration of time to a `Time`. + runnableExamples: + doAssert (fromUnix(0) + initDuration(seconds = 1)) == fromUnix(1) + addImpl[Time](a, b) + +proc `-`*(a: Time, b: Duration): Time {.operator, extern: "ntSubTime".} = + ## Subtracts a duration of time from a `Time`. + runnableExamples: + doAssert (fromUnix(0) - initDuration(seconds = 1)) == fromUnix(-1) + subImpl[Time](a, b) + +proc `<`*(a, b: Time): bool {.operator, extern: "ntLtTime".} = + ## Returns true if `a < b`, that is if `a` happened before `b`. + runnableExamples: + doAssert initTime(50, 0) < initTime(99, 0) + ltImpl(a, b) + +proc `<=`*(a, b: Time): bool {.operator, extern: "ntLeTime".} = + ## Returns true if `a <= b`. + lqImpl(a, b) + +proc `==`*(a, b: Time): bool {.operator, extern: "ntEqTime".} = + ## Returns true if `a == b`, that is if both times represent the same point in time. + eqImpl(a, b) + +proc `+=`*(t: var Time, b: Duration) = + t = t + b + +proc `-=`*(t: var Time, b: Duration) = + t = t - b + +proc high*(typ: typedesc[Time]): Time = + initTime(high(int64), high(NanosecondRange)) + +proc low*(typ: typedesc[Time]): Time = + initTime(0, 0) - proc getStartMilsecs(): int = - ## get the miliseconds from the start of the program - return int(getTime() - startMilsecs) +# +# DateTime & Timezone +# - proc valueOf(time: Time): float {.importcpp: "getTime", tags:[]} +template assertDateTimeInitialized(dt: DateTime) = + assert dt.monthdayZero != 0, "Uninitialized datetime" + +proc nanosecond*(dt: DateTime): NanosecondRange {.inline.} = + ## The number of nanoseconds after the second, + ## in the range 0 to 999_999_999. + assertDateTimeInitialized(dt) + dt.nanosecond + +proc second*(dt: DateTime): SecondRange {.inline.} = + ## The number of seconds after the minute, + ## in the range 0 to 59. + assertDateTimeInitialized(dt) + dt.second + +proc minute*(dt: DateTime): MinuteRange {.inline.} = + ## The number of minutes after the hour, + ## in the range 0 to 59. + assertDateTimeInitialized(dt) + dt.minute + +proc hour*(dt: DateTime): HourRange {.inline.} = + ## The number of hours past midnight, + ## in the range 0 to 23. + assertDateTimeInitialized(dt) + dt.hour + +proc monthday*(dt: DateTime): MonthdayRange {.inline.} = + ## The day of the month, in the range 1 to 31. + assertDateTimeInitialized(dt) + # 'cast' to avoid extra range check + cast[MonthdayRange](dt.monthdayZero) + +proc month*(dt: DateTime): Month = + ## The month as an enum, the ordinal value + ## is in the range 1 to 12. + assertDateTimeInitialized(dt) + # 'cast' to avoid extra range check + cast[Month](dt.monthZero) + +proc year*(dt: DateTime): int {.inline.} = + ## The year, using astronomical year numbering + ## (meaning that before year 1 is year 0, + ## then year -1 and so on). + assertDateTimeInitialized(dt) + dt.year + +proc weekday*(dt: DateTime): WeekDay {.inline.} = + ## The day of the week as an enum, the ordinal + ## value is in the range 0 (monday) to 6 (sunday). + assertDateTimeInitialized(dt) + dt.weekday + +proc yearday*(dt: DateTime): YeardayRange {.inline.} = + ## The number of days since January 1, + ## in the range 0 to 365. + assertDateTimeInitialized(dt) + dt.yearday + +proc isDst*(dt: DateTime): bool {.inline.} = + ## Determines whether DST is in effect. + ## Always false for the JavaScript backend. + assertDateTimeInitialized(dt) + dt.isDst + +proc timezone*(dt: DateTime): Timezone {.inline.} = + ## The timezone represented as an implementation + ## of `Timezone`. + assertDateTimeInitialized(dt) + dt.timezone + +proc utcOffset*(dt: DateTime): int {.inline.} = + ## The offset in seconds west of UTC, including + ## any offset due to DST. Note that the sign of + ## this number is the opposite of the one in a + ## formatted offset string like `+01:00` (which + ## would be equivalent to the UTC offset + ## `-3600`). + assertDateTimeInitialized(dt) + dt.utcOffset + +proc isInitialized(dt: DateTime): bool = + # Returns true if `dt` is not the (invalid) default value for `DateTime`. + runnableExamples: + doAssert now().isInitialized + doAssert not default(DateTime).isInitialized + dt.monthZero != 0 + +since((1, 3)): + export isInitialized + +proc isLeapDay*(dt: DateTime): bool {.since: (1, 1).} = + ## Returns whether `t` is a leap day, i.e. Feb 29 in a leap year. This matters + ## as it affects time offset calculations. + runnableExamples: + let dt = dateTime(2020, mFeb, 29, 00, 00, 00, 00, utc()) + doAssert dt.isLeapDay + doAssert dt+1.years-1.years != dt + let dt2 = dateTime(2020, mFeb, 28, 00, 00, 00, 00, utc()) + doAssert not dt2.isLeapDay + doAssert dt2+1.years-1.years == dt2 + doAssertRaises(Exception): discard dateTime(2021, mFeb, 29, 00, 00, 00, 00, utc()) + assertDateTimeInitialized dt + dt.year.isLeapYear and dt.month == mFeb and dt.monthday == 29 + +proc toTime*(dt: DateTime): Time {.tags: [], raises: [], benign.} = + ## Converts a `DateTime` to a `Time` representing the same point in time. + assertDateTimeInitialized dt + let epochDay = toEpochDay(dt.monthday, dt.month, dt.year) + var seconds = epochDay * secondsInDay + seconds.inc dt.hour * secondsInHour + seconds.inc dt.minute * 60 + seconds.inc dt.second + seconds.inc dt.utcOffset + result = initTime(seconds, dt.nanosecond) + +proc initDateTime(zt: ZonedTime, zone: Timezone): DateTime = + ## Create a new `DateTime` using `ZonedTime` in the specified timezone. + let adjTime = zt.time - initDuration(seconds = zt.utcOffset) + let s = adjTime.seconds + let epochday = floorDiv(s, secondsInDay) + var rem = s - epochday * secondsInDay + let hour = rem div secondsInHour + rem = rem - hour * secondsInHour + let minute = rem div secondsInMin + rem = rem - minute * secondsInMin + let second = rem + + let (d, m, y) = fromEpochDay(epochday) + + DateTime( + year: y, + monthZero: m.int, + monthdayZero: d, + hour: hour, + minute: minute, + second: second, + nanosecond: zt.time.nanosecond, + weekday: getDayOfWeek(d, m, y), + yearday: getDayOfYear(d, m, y), + isDst: zt.isDst, + timezone: zone, + utcOffset: zt.utcOffset + ) + +proc newTimezone*( + name: string, + zonedTimeFromTimeImpl: proc (time: Time): ZonedTime + {.tags: [], raises: [], benign.}, + zonedTimeFromAdjTimeImpl: proc (adjTime: Time): ZonedTime + {.tags: [], raises: [], benign.} + ): owned Timezone = + ## Create a new `Timezone`. + ## + ## `zonedTimeFromTimeImpl` and `zonedTimeFromAdjTimeImpl` is used + ## as the underlying implementations for `zonedTimeFromTime` and + ## `zonedTimeFromAdjTime`. + ## + ## If possible, the name parameter should match the name used in the + ## tz database. If the timezone doesn't exist in the tz database, or if the + ## timezone name is unknown, then any string that describes the timezone + ## unambiguously can be used. Note that the timezones name is used for + ## checking equality! + runnableExamples: + proc utcTzInfo(time: Time): ZonedTime = + ZonedTime(utcOffset: 0, isDst: false, time: time) + let utc = newTimezone("Etc/UTC", utcTzInfo, utcTzInfo) + Timezone( + name: name, + zonedTimeFromTimeImpl: zonedTimeFromTimeImpl, + zonedTimeFromAdjTimeImpl: zonedTimeFromAdjTimeImpl + ) + +proc name*(zone: Timezone): string = + ## The name of the timezone. + ## + ## If possible, the name will be the name used in the tz database. + ## If the timezone doesn't exist in the tz database, or if the timezone + ## name is unknown, then any string that describes the timezone + ## unambiguously might be used. For example, the string "LOCAL" is used + ## for the system's local timezone. + ## + ## See also: https://en.wikipedia.org/wiki/Tz_database + zone.name - proc fromSeconds(since1970: float): Time = result = newDate(since1970) +proc zonedTimeFromTime*(zone: Timezone, time: Time): ZonedTime = + ## Returns the `ZonedTime` for some point in time. + zone.zonedTimeFromTimeImpl(time) - proc toSeconds(time: Time): float = result = time.valueOf() / 1000 +proc zonedTimeFromAdjTime*(zone: Timezone, adjTime: Time): ZonedTime = + ## Returns the `ZonedTime` for some local time. + ## + ## Note that the `Time` argument does not represent a point in time, it + ## represent a local time! E.g if `adjTime` is `fromUnix(0)`, it should be + ## interpreted as 1970-01-01T00:00:00 in the `zone` timezone, not in UTC. + zone.zonedTimeFromAdjTimeImpl(adjTime) + +proc `$`*(zone: Timezone): string = + ## Returns the name of the timezone. + if zone != nil: result = zone.name + +proc `==`*(zone1, zone2: Timezone): bool = + ## Two `Timezone`'s are considered equal if their name is equal. + runnableExamples: + doAssert local() == local() + doAssert local() != utc() + if system.`==`(zone1, zone2): + return true + if zone1.isNil or zone2.isNil: + return false + zone1.name == zone2.name + +proc inZone*(time: Time, zone: Timezone): DateTime + {.tags: [], raises: [], benign.} = + ## Convert `time` into a `DateTime` using `zone` as the timezone. + result = initDateTime(zone.zonedTimeFromTime(time), zone) + +proc inZone*(dt: DateTime, zone: Timezone): DateTime + {.tags: [], raises: [], benign.} = + ## Returns a `DateTime` representing the same point in time as `dt` but + ## using `zone` as the timezone. + assertDateTimeInitialized dt + dt.toTime.inZone(zone) + +proc toAdjTime(dt: DateTime): Time = + let epochDay = toEpochDay(dt.monthday, dt.month, dt.year) + var seconds = epochDay * secondsInDay + seconds.inc dt.hour * secondsInHour + seconds.inc dt.minute * secondsInMin + seconds.inc dt.second + result = initTime(seconds, dt.nanosecond) + +when defined(js): + proc localZonedTimeFromTime(time: Time): ZonedTime {.benign.} = + let jsDate = newDate(time.seconds * 1000) + let offset = jsDate.getTimezoneOffset() * secondsInMin + result.time = time + result.utcOffset = offset + result.isDst = false + + proc localZonedTimeFromAdjTime(adjTime: Time): ZonedTime {.benign.} = + let utcDate = newDate(adjTime.seconds * 1000) + let localDate = newDate(utcDate.getUTCFullYear(), utcDate.getUTCMonth(), + utcDate.getUTCDate(), utcDate.getUTCHours(), utcDate.getUTCMinutes(), + utcDate.getUTCSeconds(), 0) + + # This is as dumb as it looks - JS doesn't support years in the range + # 0-99 in the constructor because they are assumed to be 19xx... + # Because JS doesn't support timezone history, + # it doesn't really matter in practice. + if utcDate.getUTCFullYear() in 0 .. 99: + localDate.setFullYear(utcDate.getUTCFullYear()) + + result.utcOffset = localDate.getTimezoneOffset() * secondsInMin + result.time = adjTime + initDuration(seconds = result.utcOffset) + result.isDst = false + +else: + proc toAdjUnix(tm: Tm): int64 = + let epochDay = toEpochDay(tm.tm_mday, (tm.tm_mon + 1).Month, + tm.tm_year.int + 1900) + result = epochDay * secondsInDay + result.inc tm.tm_hour * secondsInHour + result.inc tm.tm_min * 60 + result.inc tm.tm_sec + + proc getLocalOffsetAndDst(unix: int64): tuple[offset: int, dst: bool] = + # Windows can't handle unix < 0, so we fall back to unix = 0. + # FIXME: This should be improved by falling back to the WinAPI instead. + when defined(windows): + if unix < 0: + var a = 0.CTime + let tmPtr = localtime(a) + if not tmPtr.isNil: + let tm = tmPtr[] + return ((0 - tm.toAdjUnix).int, false) + return (0, false) + + # In case of a 32-bit time_t, we fallback to the closest available + # timezone information. + var a = clamp(unix, low(CTime).int64, high(CTime).int64).CTime + let tmPtr = localtime(a) + if not tmPtr.isNil: + let tm = tmPtr[] + return ((a.int64 - tm.toAdjUnix).int, tm.tm_isdst > 0) + return (0, false) + + proc localZonedTimeFromTime(time: Time): ZonedTime {.benign.} = + let (offset, dst) = getLocalOffsetAndDst(time.seconds) + result.time = time + result.utcOffset = offset + result.isDst = dst + + proc localZonedTimeFromAdjTime(adjTime: Time): ZonedTime {.benign.} = + var adjUnix = adjTime.seconds + let past = adjUnix - secondsInDay + let (pastOffset, _) = getLocalOffsetAndDst(past) + + let future = adjUnix + secondsInDay + let (futureOffset, _) = getLocalOffsetAndDst(future) + + var utcOffset: int + if pastOffset == futureOffset: + utcOffset = pastOffset.int + else: + if pastOffset > futureOffset: + adjUnix -= secondsInHour + + adjUnix += pastOffset + utcOffset = getLocalOffsetAndDst(adjUnix).offset + + # This extra roundtrip is needed to normalize any impossible datetimes + # as a result of offset changes (normally due to dst) + let utcUnix = adjTime.seconds + utcOffset + let (finalOffset, dst) = getLocalOffsetAndDst(utcUnix) + result.time = initTime(utcUnix, adjTime.nanosecond) + result.utcOffset = finalOffset + result.isDst = dst + +proc utcTzInfo(time: Time): ZonedTime = + ZonedTime(utcOffset: 0, isDst: false, time: time) + +var utcInstance {.threadvar.}: Timezone +var localInstance {.threadvar.}: Timezone + +proc utc*(): Timezone = + ## Get the `Timezone` implementation for the UTC timezone. + runnableExamples: + doAssert now().utc.timezone == utc() + doAssert utc().name == "Etc/UTC" + if utcInstance.isNil: + utcInstance = newTimezone("Etc/UTC", utcTzInfo, utcTzInfo) + result = utcInstance + +proc local*(): Timezone = + ## Get the `Timezone` implementation for the local timezone. + runnableExamples: + doAssert now().timezone == local() + doAssert local().name == "LOCAL" + if localInstance.isNil: + localInstance = newTimezone("LOCAL", localZonedTimeFromTime, + localZonedTimeFromAdjTime) + result = localInstance + +proc utc*(dt: DateTime): DateTime = + ## Shorthand for `dt.inZone(utc())`. + dt.inZone(utc()) + +proc local*(dt: DateTime): DateTime = + ## Shorthand for `dt.inZone(local())`. + dt.inZone(local()) + +proc utc*(t: Time): DateTime = + ## Shorthand for `t.inZone(utc())`. + t.inZone(utc()) + +proc local*(t: Time): DateTime = + ## Shorthand for `t.inZone(local())`. + t.inZone(local()) + +proc now*(): DateTime {.tags: [TimeEffect], benign.} = + ## Get the current time as a `DateTime` in the local timezone. + ## Shorthand for `getTime().local`. + ## + ## .. warning:: Unsuitable for benchmarking, use `monotimes.getMonoTime` or + ## `cpuTime` instead, depending on the use case. + getTime().local + +proc dateTime*(year: int, month: Month, monthday: MonthdayRange, + hour: HourRange = 0, minute: MinuteRange = 0, second: SecondRange = 0, + nanosecond: NanosecondRange = 0, + zone: Timezone = local()): DateTime = + ## Create a new `DateTime <#DateTime>`_ in the specified timezone. + runnableExamples: + assert $dateTime(2017, mMar, 30, zone = utc()) == "2017-03-30T00:00:00Z" + + assertValidDate monthday, month, year + let dt = DateTime( + monthdayZero: monthday, + year: year, + monthZero: month.int, + hour: hour, + minute: minute, + second: second, + nanosecond: nanosecond + ) + result = initDateTime(zone.zonedTimeFromAdjTime(dt.toAdjTime), zone) + +proc initDateTime*(monthday: MonthdayRange, month: Month, year: int, + hour: HourRange, minute: MinuteRange, second: SecondRange, + nanosecond: NanosecondRange, + zone: Timezone = local()): DateTime {.deprecated: "use `dateTime`".} = + ## Create a new `DateTime <#DateTime>`_ in the specified timezone. + runnableExamples("--warning:deprecated:off"): + assert $initDateTime(30, mMar, 2017, 00, 00, 00, 00, utc()) == "2017-03-30T00:00:00Z" + dateTime(year, month, monthday, hour, minute, second, nanosecond, zone) + +proc initDateTime*(monthday: MonthdayRange, month: Month, year: int, + hour: HourRange, minute: MinuteRange, second: SecondRange, + zone: Timezone = local()): DateTime {.deprecated: "use `dateTime`".} = + ## Create a new `DateTime <#DateTime>`_ in the specified timezone. + runnableExamples("--warning:deprecated:off"): + assert $initDateTime(30, mMar, 2017, 00, 00, 00, utc()) == "2017-03-30T00:00:00Z" + dateTime(year, month, monthday, hour, minute, second, 0, zone) + +proc `+`*(dt: DateTime, dur: Duration): DateTime = + runnableExamples: + let dt = dateTime(2017, mMar, 30, 00, 00, 00, 00, utc()) + let dur = initDuration(hours = 5) + doAssert $(dt + dur) == "2017-03-30T05:00:00Z" + + (dt.toTime + dur).inZone(dt.timezone) + +proc `-`*(dt: DateTime, dur: Duration): DateTime = + runnableExamples: + let dt = dateTime(2017, mMar, 30, 00, 00, 00, 00, utc()) + let dur = initDuration(days = 5) + doAssert $(dt - dur) == "2017-03-25T00:00:00Z" + + (dt.toTime - dur).inZone(dt.timezone) + +proc `-`*(dt1, dt2: DateTime): Duration = + ## Compute the duration between `dt1` and `dt2`. + runnableExamples: + let dt1 = dateTime(2017, mMar, 30, 00, 00, 00, 00, utc()) + let dt2 = dateTime(2017, mMar, 25, 00, 00, 00, 00, utc()) + + doAssert dt1 - dt2 == initDuration(days = 5) + + dt1.toTime - dt2.toTime + +proc `<`*(a, b: DateTime): bool = + ## Returns true if `a` happened before `b`. + return a.toTime < b.toTime + +proc `<=`*(a, b: DateTime): bool = + ## Returns true if `a` happened before or at the same time as `b`. + return a.toTime <= b.toTime + +proc `==`*(a, b: DateTime): bool = + ## Returns true if `a` and `b` represent the same point in time. + if not a.isInitialized: not b.isInitialized + elif not b.isInitialized: false + else: a.toTime == b.toTime + +proc `+=`*(a: var DateTime, b: Duration) = + a = a + b + +proc `-=`*(a: var DateTime, b: Duration) = + a = a - b + +proc getDateStr*(dt = now()): string {.rtl, extern: "nt$1", tags: [TimeEffect].} = + ## Gets the current local date as a string of the format `YYYY-MM-dd`. + runnableExamples: + echo getDateStr(now() - 1.months) + assertDateTimeInitialized dt + result = newStringOfCap(10) # len("YYYY-MM-dd") == 10 + result.addInt dt.year + result.add '-' + result.add intToStr(dt.monthZero, 2) + result.add '-' + result.add intToStr(dt.monthday, 2) + +proc getClockStr*(dt = now()): string {.rtl, extern: "nt$1", tags: [TimeEffect].} = + ## Gets the current local clock time as a string of the format `HH:mm:ss`. + runnableExamples: + echo getClockStr(now() - 1.hours) + assertDateTimeInitialized dt + result = newStringOfCap(8) # len("HH:mm:ss") == 8 + result.add intToStr(dt.hour, 2) + result.add ':' + result.add intToStr(dt.minute, 2) + result.add ':' + result.add intToStr(dt.second, 2) - proc getTimezone(): int = result = newDate().getTimezoneOffset() - proc epochTime*(): float {.tags: [TimeEffect].} = newDate().toSeconds() +# +# Iso week forward declarations +# -proc getDateStr*(): string {.rtl, extern: "nt$1", tags: [TimeEffect].} = - ## gets the current date as a string of the format ``YYYY-MM-DD``. - var ti = getLocalTime(getTime()) - result = $ti.year & '-' & intToStr(ord(ti.month)+1, 2) & - '-' & intToStr(ti.monthday, 2) +proc initDateTime*(weekday: WeekDay, isoweek: IsoWeekRange, isoyear: IsoYear, + hour: HourRange, minute: MinuteRange, second: SecondRange, + nanosecond: NanosecondRange, + zone: Timezone = local()): DateTime {.gcsafe, raises: [], tags: [], since: (1, 5).} -proc getClockStr*(): string {.rtl, extern: "nt$1", tags: [TimeEffect].} = - ## gets the current clock time as a string of the format ``HH:MM:SS``. - var ti = getLocalTime(getTime()) - result = intToStr(ti.hour, 2) & ':' & intToStr(ti.minute, 2) & - ':' & intToStr(ti.second, 2) +proc initDateTime*(weekday: WeekDay, isoweek: IsoWeekRange, isoyear: IsoYear, + hour: HourRange, minute: MinuteRange, second: SecondRange, + zone: Timezone = local()): DateTime {.gcsafe, raises: [], tags: [], since: (1, 5).} -proc `$`*(day: WeekDay): string = - ## stingify operator for ``WeekDay``. - const lookup: array[WeekDay, string] = ["Monday", "Tuesday", "Wednesday", - "Thursday", "Friday", "Saturday", "Sunday"] - return lookup[day] +# +# TimeFormat +# -proc `$`*(m: Month): string = - ## stingify operator for ``Month``. - const lookup: array[Month, string] = ["January", "February", "March", - "April", "May", "June", "July", "August", "September", "October", - "November", "December"] - return lookup[m] +when defined(nimHasStyleChecks): + {.push styleChecks: off.} -proc formatToken(info: TimeInfo, token: string, buf: var string) = - ## Helper of the format proc to parse individual tokens. - ## - ## Pass the found token in the user input string, and the buffer where the - ## final string is being built. This has to be a var value because certain - ## formatting tokens require modifying the previous characters. - case token - of "d": - buf.add($info.monthday) - of "dd": - if info.monthday < 10: - buf.add("0") - buf.add($info.monthday) - of "ddd": - buf.add(($info.weekday)[0 .. 2]) - of "dddd": - buf.add($info.weekday) - of "h": - buf.add($(if info.hour > 12: info.hour - 12 else: info.hour)) - of "hh": - let amerHour = if info.hour > 12: info.hour - 12 else: info.hour - if amerHour < 10: - buf.add('0') - buf.add($amerHour) - of "H": - buf.add($info.hour) - of "HH": - if info.hour < 10: - buf.add('0') - buf.add($info.hour) - of "m": - buf.add($info.minute) - of "mm": - if info.minute < 10: - buf.add('0') - buf.add($info.minute) - of "M": - buf.add($(int(info.month)+1)) - of "MM": - if info.month < mOct: - buf.add('0') - buf.add($(int(info.month)+1)) - of "MMM": - buf.add(($info.month)[0..2]) - of "MMMM": - buf.add($info.month) - of "s": - buf.add($info.second) - of "ss": - if info.second < 10: - buf.add('0') - buf.add($info.second) - of "t": - if info.hour >= 12: - buf.add('P') - else: buf.add('A') - of "tt": - if info.hour >= 12: - buf.add("PM") - else: buf.add("AM") - of "y": - var fr = ($info.year).len()-1 - if fr < 0: fr = 0 - buf.add(($info.year)[fr .. ($info.year).len()-1]) - of "yy": - var fr = ($info.year).len()-2 - if fr < 0: fr = 0 - var fyear = ($info.year)[fr .. ($info.year).len()-1] - if fyear.len != 2: fyear = repeat('0', 2-fyear.len()) & fyear - buf.add(fyear) - of "yyy": - var fr = ($info.year).len()-3 - if fr < 0: fr = 0 - var fyear = ($info.year)[fr .. ($info.year).len()-1] - if fyear.len != 3: fyear = repeat('0', 3-fyear.len()) & fyear - buf.add(fyear) - of "yyyy": - var fr = ($info.year).len()-4 - if fr < 0: fr = 0 - var fyear = ($info.year)[fr .. ($info.year).len()-1] - if fyear.len != 4: fyear = repeat('0', 4-fyear.len()) & fyear - buf.add(fyear) - of "yyyyy": - var fr = ($info.year).len()-5 - if fr < 0: fr = 0 - var fyear = ($info.year)[fr .. ($info.year).len()-1] - if fyear.len != 5: fyear = repeat('0', 5-fyear.len()) & fyear - buf.add(fyear) - of "z": - let hrs = (info.timezone div 60) div 60 - buf.add($hrs) - of "zz": - let hrs = (info.timezone div 60) div 60 - - buf.add($hrs) - if hrs.abs < 10: - var atIndex = buf.len-(($hrs).len-(if hrs < 0: 1 else: 0)) - buf.insert("0", atIndex) - of "zzz": - let hrs = (info.timezone div 60) div 60 - - buf.add($hrs & ":00") - if hrs.abs < 10: - var atIndex = buf.len-(($hrs & ":00").len-(if hrs < 0: 1 else: 0)) - buf.insert("0", atIndex) - of "ZZZ": - buf.add(info.tzname) - of "": - discard - else: - raise newException(ValueError, "Invalid format string: " & token) +type + DateTimeLocale* = object + MMM*: array[mJan..mDec, string] + MMMM*: array[mJan..mDec, string] + ddd*: array[dMon..dSun, string] + dddd*: array[dMon..dSun, string] +when defined(nimHasStyleChecks): + {.pop.} -proc format*(info: TimeInfo, f: string): string = - ## This function formats `info` as specified by `f`. The following format - ## specifiers are available: - ## - ## ========== ================================================================================= ================================================ - ## Specifier Description Example - ## ========== ================================================================================= ================================================ - ## d Numeric value of the day of the month, it will be one or two digits long. ``1/04/2012 -> 1``, ``21/04/2012 -> 21`` - ## dd Same as above, but always two digits. ``1/04/2012 -> 01``, ``21/04/2012 -> 21`` - ## ddd Three letter string which indicates the day of the week. ``Saturday -> Sat``, ``Monday -> Mon`` - ## dddd Full string for the day of the week. ``Saturday -> Saturday``, ``Monday -> Monday`` - ## h The hours in one digit if possible. Ranging from 0-12. ``5pm -> 5``, ``2am -> 2`` - ## hh The hours in two digits always. If the hour is one digit 0 is prepended. ``5pm -> 05``, ``11am -> 11`` - ## H The hours in one digit if possible, randing from 0-24. ``5pm -> 17``, ``2am -> 2`` - ## HH The hours in two digits always. 0 is prepended if the hour is one digit. ``5pm -> 17``, ``2am -> 02`` - ## m The minutes in 1 digit if possible. ``5:30 -> 30``, ``2:01 -> 1`` - ## mm Same as above but always 2 digits, 0 is prepended if the minute is one digit. ``5:30 -> 30``, ``2:01 -> 01`` - ## M The month in one digit if possible. ``September -> 9``, ``December -> 12`` - ## MM The month in two digits always. 0 is prepended. ``September -> 09``, ``December -> 12`` - ## MMM Abbreviated three-letter form of the month. ``September -> Sep``, ``December -> Dec`` - ## MMMM Full month string, properly capitalized. ``September -> September`` - ## s Seconds as one digit if possible. ``00:00:06 -> 6`` - ## ss Same as above but always two digits. 0 is prepended. ``00:00:06 -> 06`` - ## t ``A`` when time is in the AM. ``P`` when time is in the PM. - ## tt Same as above, but ``AM`` and ``PM`` instead of ``A`` and ``P`` respectively. - ## y(yyyy) This displays the year to different digits. You most likely only want 2 or 4 'y's - ## yy Displays the year to two digits. ``2012 -> 12`` - ## yyyy Displays the year to four digits. ``2012 -> 2012`` - ## z Displays the timezone offset from UTC. ``GMT+7 -> +7``, ``GMT-5 -> -5`` - ## zz Same as above but with leading 0. ``GMT+7 -> +07``, ``GMT-5 -> -05`` - ## zzz Same as above but with ``:00``. ``GMT+7 -> +07:00``, ``GMT-5 -> -05:00`` - ## ZZZ Displays the name of the timezone. ``GMT -> GMT``, ``EST -> EST`` - ## ========== ================================================================================= ================================================ - ## - ## Other strings can be inserted by putting them in ``''``. For example - ## ``hh'->'mm`` will give ``01->56``. The following characters can be - ## inserted without quoting them: ``:`` ``-`` ``(`` ``)`` ``/`` ``[`` ``]`` - ## ``,``. However you don't need to necessarily separate format specifiers, a - ## unambiguous format string like ``yyyyMMddhhmmss`` is valid too. +type + AmPm = enum + apUnknown, apAm, apPm + + Era = enum + eraUnknown, eraAd, eraBc + + ParsedTime = object + amPm: AmPm + era: Era + year: Option[int] + month: Option[int] + monthday: Option[int] + isoyear: Option[int] + yearweek: Option[int] + weekday: Option[WeekDay] + utcOffset: Option[int] + + # '0' as default for these work fine + # so no need for `Option`. + hour: int + minute: int + second: int + nanosecond: int + + FormatTokenKind = enum + tkPattern, tkLiteral + + FormatPattern {.pure.} = enum + d, dd, ddd, dddd + GG, GGGG + h, hh, H, HH + m, mm, M, MM, MMM, MMMM + s, ss + fff, ffffff, fffffffff + t, tt + yy, yyyy + YYYY + uuuu + UUUU + V, VV + z, zz, zzz, zzzz + ZZZ, ZZZZ + g + + # This is a special value used to mark literal format values. + # See the doc comment for `TimeFormat.patterns`. + Lit + + TimeFormat* = object ## Represents a format for parsing and printing + ## time types. + ## + ## To create a new `TimeFormat` use `initTimeFormat proc + ## <#initTimeFormat,string>`_. + patterns: seq[byte] ## \ + ## Contains the patterns encoded as bytes. + ## Literal values are encoded in a special way. + ## They start with `Lit.byte`, then the length of the literal, then the + ## raw char values of the literal. For example, the literal `foo` would + ## be encoded as `@[Lit.byte, 3.byte, 'f'.byte, 'o'.byte, 'o'.byte]`. + formatStr: string + + TimeParseError* = object of ValueError ## \ + ## Raised when parsing input using a `TimeFormat` fails. + + TimeFormatParseError* = object of ValueError ## \ + ## Raised when parsing a `TimeFormat` string fails. + +const + DefaultLocale* = DateTimeLocale( + MMM: ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", + "Nov", "Dec"], + MMMM: ["January", "February", "March", "April", "May", "June", "July", + "August", "September", "October", "November", "December"], + ddd: ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"], + dddd: ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", + "Sunday"], + ) + + FormatLiterals = {' ', '-', '/', ':', '(', ')', '[', ']', ',', '.'} + +proc `$`*(f: TimeFormat): string = + ## Returns the format string that was used to construct `f`. + runnableExamples: + let f = initTimeFormat("yyyy-MM-dd") + doAssert $f == "yyyy-MM-dd" + f.formatStr + +proc raiseParseException(f: TimeFormat, input: string, msg: string) = + raise newException(TimeParseError, + "Failed to parse '" & input & "' with format '" & $f & + "'. " & msg) + +proc parseInt(s: string, b: var int, start = 0, maxLen = int.high, + allowSign = false): int = + var sign = -1 + var i = start + let stop = start + min(s.high - start + 1, maxLen) - 1 + if allowSign and i <= stop: + if s[i] == '+': + inc(i) + elif s[i] == '-': + inc(i) + sign = 1 + if i <= stop and s[i] in {'0'..'9'}: + b = 0 + while i <= stop and s[i] in {'0'..'9'}: + let c = ord(s[i]) - ord('0') + if b >= (low(int) + c) div 10: + b = b * 10 - c + else: + return 0 + inc(i) + if sign == -1 and b == low(int): + return 0 + b = b * sign + result = i - start - result = "" +iterator tokens(f: string): tuple[kind: FormatTokenKind, token: string] = var i = 0 - var currentF = "" - while true: - case f[i] - of ' ', '-', '/', ':', '\'', '\0', '(', ')', '[', ']', ',': - formatToken(info, currentF, result) + var currToken = "" - currentF = "" - if f[i] == '\0': break + template yieldCurrToken() = + if currToken.len != 0: + yield (tkPattern, currToken) + currToken = "" - if f[i] == '\'': + while i < f.len: + case f[i] + of '\'': + yieldCurrToken() + if i.succ < f.len and f[i.succ] == '\'': + yield (tkLiteral, "'") + i.inc 2 + else: + var token = "" inc(i) # Skip ' - while f[i] != '\'' and f.len-1 > i: - result.add(f[i]) - inc(i) - else: result.add(f[i]) - + while i < f.len and f[i] != '\'': + token.add f[i] + i.inc + + if i > f.high: + raise newException(TimeFormatParseError, + "Unclosed ' in time format string. " & + "For a literal ', use ''.") + i.inc + yield (tkLiteral, token) + of FormatLiterals: + yieldCurrToken() + yield (tkLiteral, $f[i]) + i.inc else: # Check if the letter being added matches previous accumulated buffer. - if currentF.len < 1 or currentF[high(currentF)] == f[i]: - currentF.add(f[i]) + if currToken.len == 0 or currToken[0] == f[i]: + currToken.add(f[i]) + i.inc + else: + yield (tkPattern, currToken) + currToken = $f[i] + i.inc + + yieldCurrToken() + +proc stringToPattern(str: string): FormatPattern = + case str + of "d": result = d + of "dd": result = dd + of "ddd": result = ddd + of "dddd": result = dddd + of "GG": result = GG + of "GGGG": result = GGGG + of "h": result = h + of "hh": result = hh + of "H": result = H + of "HH": result = HH + of "m": result = m + of "mm": result = mm + of "M": result = M + of "MM": result = MM + of "MMM": result = MMM + of "MMMM": result = MMMM + of "s": result = s + of "ss": result = ss + of "fff": result = fff + of "ffffff": result = ffffff + of "fffffffff": result = fffffffff + of "t": result = t + of "tt": result = tt + of "yy": result = yy + of "yyyy": result = yyyy + of "YYYY": result = YYYY + of "uuuu": result = uuuu + of "UUUU": result = UUUU + of "V": result = V + of "VV": result = VV + of "z": result = z + of "zz": result = zz + of "zzz": result = zzz + of "zzzz": result = zzzz + of "ZZZ": result = ZZZ + of "ZZZZ": result = ZZZZ + of "g": result = g + else: raise newException(TimeFormatParseError, + "'" & str & "' is not a valid pattern") + +proc initTimeFormat*(format: string): TimeFormat = + ## Construct a new time format for parsing & formatting time types. + ## + ## See `Parsing and formatting dates`_ for documentation of the + ## `format` argument. + runnableExamples: + let f = initTimeFormat("yyyy-MM-dd") + doAssert "2000-01-01" == "2000-01-01".parse(f).format(f) + result.formatStr = format + result.patterns = @[] + for kind, token in format.tokens: + case kind + of tkLiteral: + case token else: - formatToken(info, currentF, result) - dec(i) # Move position back to re-process the character separately. - currentF = "" - - inc(i) - -{.pop.} - -proc parseToken(info: var TimeInfo; token, value: string; j: var int) = - ## Helper of the parse proc to parse individual tokens. - var sv: int - case token - of "d": - var pd = parseInt(value[j..j+1], sv) - info.monthday = sv - j += pd - of "dd": - info.monthday = value[j..j+1].parseInt() - j += 2 - of "ddd": - case value[j..j+2].toLower(): - of "sun": - info.weekday = dSun - of "mon": - info.weekday = dMon - of "tue": - info.weekday = dTue - of "wed": - info.weekday = dWed - of "thu": - info.weekday = dThu - of "fri": - info.weekday = dFri - of "sat": - info.weekday = dSat + result.patterns.add(FormatPattern.Lit.byte) + if token.len > 255: + raise newException(TimeFormatParseError, + "Format literal is to long:" & token) + result.patterns.add(token.len.byte) + for c in token: + result.patterns.add(c.byte) + of tkPattern: + result.patterns.add(stringToPattern(token).byte) + +proc formatPattern(dt: DateTime, pattern: FormatPattern, result: var string, + loc: DateTimeLocale) = + template yearOfEra(dt: DateTime): int = + if dt.year <= 0: abs(dt.year) + 1 else: dt.year + + case pattern + of d: + result.add $dt.monthday + of dd: + result.add dt.monthday.intToStr(2) + of ddd: + result.add loc.ddd[dt.weekday] + of dddd: + result.add loc.dddd[dt.weekday] + of GG: + result.add (dt.getIsoWeekAndYear.isoyear.int mod 100).intToStr(2) + of GGGG: + result.add $dt.getIsoWeekAndYear.isoyear + of h: + result.add( + if dt.hour == 0: "12" + elif dt.hour > 12: $(dt.hour - 12) + else: $dt.hour + ) + of hh: + result.add( + if dt.hour == 0: "12" + elif dt.hour > 12: (dt.hour - 12).intToStr(2) + else: dt.hour.intToStr(2) + ) + of H: + result.add $dt.hour + of HH: + result.add dt.hour.intToStr(2) + of m: + result.add $dt.minute + of mm: + result.add dt.minute.intToStr(2) + of M: + result.add $ord(dt.month) + of MM: + result.add ord(dt.month).intToStr(2) + of MMM: + result.add loc.MMM[dt.month] + of MMMM: + result.add loc.MMMM[dt.month] + of s: + result.add $dt.second + of ss: + result.add dt.second.intToStr(2) + of fff: + result.add(intToStr(convert(Nanoseconds, Milliseconds, dt.nanosecond), 3)) + of ffffff: + result.add(intToStr(convert(Nanoseconds, Microseconds, dt.nanosecond), 6)) + of fffffffff: + result.add(intToStr(dt.nanosecond, 9)) + of t: + result.add if dt.hour >= 12: "P" else: "A" + of tt: + result.add if dt.hour >= 12: "PM" else: "AM" + of yy: + result.add (dt.yearOfEra mod 100).intToStr(2) + of yyyy: + let year = dt.yearOfEra + if year < 10000: + result.add year.intToStr(4) else: - raise newException(ValueError, "invalid day of week ") - j += 3 - of "dddd": - if value.len >= j+6 and value[j..j+5].cmpIgnoreCase("sunday") == 0: - info.weekday = dSun - j += 6 - elif value.len >= j+6 and value[j..j+5].cmpIgnoreCase("monday") == 0: - info.weekday = dMon - j += 6 - elif value.len >= j+7 and value[j..j+6].cmpIgnoreCase("tuesday") == 0: - info.weekday = dTue - j += 7 - elif value.len >= j+9 and value[j..j+8].cmpIgnoreCase("wednesday") == 0: - info.weekday = dWed - j += 9 - elif value.len >= j+8 and value[j..j+7].cmpIgnoreCase("thursday") == 0: - info.weekday = dThu - j += 8 - elif value.len >= j+6 and value[j..j+5].cmpIgnoreCase("friday") == 0: - info.weekday = dFri - j += 6 - elif value.len >= j+8 and value[j..j+7].cmpIgnoreCase("saturday") == 0: - info.weekday = dSat - j += 8 + result.add '+' & $year + of YYYY: + if dt.year < 1: + result.add $(abs(dt.year) + 1) else: - raise newException(ValueError, "invalid day of week ") - of "h", "H": - var pd = parseInt(value[j..j+1], sv) - info.hour = sv - j += pd - of "hh", "HH": - info.hour = value[j..j+1].parseInt() - j += 2 - of "m": - var pd = parseInt(value[j..j+1], sv) - info.minute = sv - j += pd - of "mm": - info.minute = value[j..j+1].parseInt() - j += 2 - of "M": - var pd = parseInt(value[j..j+1], sv) - info.month = Month(sv-1) - info.monthday = sv - j += pd - of "MM": - var month = value[j..j+1].parseInt() - j += 2 - info.month = Month(month-1) - of "MMM": - case value[j..j+2].toLower(): - of "jan": - info.month = mJan - of "feb": - info.month = mFeb - of "mar": - info.month = mMar - of "apr": - info.month = mApr - of "may": - info.month = mMay - of "jun": - info.month = mJun - of "jul": - info.month = mJul - of "aug": - info.month = mAug - of "sep": - info.month = mSep - of "oct": - info.month = mOct - of "nov": - info.month = mNov - of "dec": - info.month = mDec + result.add $dt.year + of uuuu: + let year = dt.year + if year < 10000 or year < 0: + result.add year.intToStr(4) else: - raise newException(ValueError, "invalid month") - j += 3 - of "MMMM": - if value.len >= j+7 and value[j..j+6].cmpIgnoreCase("january") == 0: - info.month = mJan - j += 7 - elif value.len >= j+8 and value[j..j+7].cmpIgnoreCase("february") == 0: - info.month = mFeb - j += 8 - elif value.len >= j+5 and value[j..j+4].cmpIgnoreCase("march") == 0: - info.month = mMar - j += 5 - elif value.len >= j+5 and value[j..j+4].cmpIgnoreCase("april") == 0: - info.month = mApr - j += 5 - elif value.len >= j+3 and value[j..j+2].cmpIgnoreCase("may") == 0: - info.month = mMay - j += 3 - elif value.len >= j+4 and value[j..j+3].cmpIgnoreCase("june") == 0: - info.month = mJun - j += 4 - elif value.len >= j+4 and value[j..j+3].cmpIgnoreCase("july") == 0: - info.month = mJul - j += 4 - elif value.len >= j+6 and value[j..j+5].cmpIgnoreCase("august") == 0: - info.month = mAug - j += 6 - elif value.len >= j+9 and value[j..j+8].cmpIgnoreCase("september") == 0: - info.month = mSep - j += 9 - elif value.len >= j+7 and value[j..j+6].cmpIgnoreCase("october") == 0: - info.month = mOct - j += 7 - elif value.len >= j+8 and value[j..j+7].cmpIgnoreCase("november") == 0: - info.month = mNov - j += 8 - elif value.len >= j+8 and value[j..j+7].cmpIgnoreCase("december") == 0: - info.month = mDec - j += 8 + result.add '+' & $year + of UUUU: + result.add $dt.year + of V: + result.add $dt.getIsoWeekAndYear.isoweek + of VV: + result.add dt.getIsoWeekAndYear.isoweek.intToStr(2) + of z, zz, zzz, zzzz, ZZZ, ZZZZ: + if dt.timezone != nil and dt.timezone.name == "Etc/UTC": + result.add 'Z' else: - raise newException(ValueError, "invalid month") - of "s": - var pd = parseInt(value[j..j+1], sv) - info.second = sv - j += pd - of "ss": - info.second = value[j..j+1].parseInt() - j += 2 - of "t": - if value[j] == 'P' and info.hour > 0 and info.hour < 12: - info.hour += 12 - j += 1 - of "tt": - if value[j..j+1] == "PM" and info.hour > 0 and info.hour < 12: - info.hour += 12 - j += 2 - of "yy": + result.add if -dt.utcOffset >= 0: '+' else: '-' + let absOffset = abs(dt.utcOffset) + case pattern: + of z: + result.add $(absOffset div 3600) + of zz: + result.add (absOffset div 3600).intToStr(2) + of zzz, ZZZ: + let h = (absOffset div 3600).intToStr(2) + let m = ((absOffset div 60) mod 60).intToStr(2) + let sep = if pattern == zzz: ":" else: "" + result.add h & sep & m + of zzzz, ZZZZ: + let absOffset = abs(dt.utcOffset) + let h = (absOffset div 3600).intToStr(2) + let m = ((absOffset div 60) mod 60).intToStr(2) + let s = (absOffset mod 60).intToStr(2) + let sep = if pattern == zzzz: ":" else: "" + result.add h & sep & m & sep & s + else: assert false + of g: + result.add if dt.year < 1: "BC" else: "AD" + of Lit: assert false # Can't happen + +proc parsePattern(input: string, pattern: FormatPattern, i: var int, + parsed: var ParsedTime, loc: DateTimeLocale): bool = + template takeInt(allowedWidth: Slice[int], allowSign = false): int = + var sv = 0 + var pd = parseInt(input, sv, i, allowedWidth.b, allowSign) + if pd < allowedWidth.a: + return false + i.inc pd + sv + + template contains[T](t: typedesc[T], i: int): bool = + i in low(t)..high(t) + + result = true + + case pattern + of d: + let monthday = takeInt(1..2) + parsed.monthday = some(monthday) + result = monthday in MonthdayRange + of dd: + let monthday = takeInt(2..2) + parsed.monthday = some(monthday) + result = monthday in MonthdayRange + of ddd: + result = false + for d, v in loc.ddd: + if input.substr(i, i+v.len-1).cmpIgnoreCase(v) == 0: + parsed.weekday = some(d.WeekDay) + result = true + i.inc v.len + break + of dddd: + result = false + for d, v in loc.dddd: + if input.substr(i, i+v.len-1).cmpIgnoreCase(v) == 0: + parsed.weekday = some(d.WeekDay) + result = true + i.inc v.len + break + of GG: # Assumes current century - var year = value[j..j+1].parseInt() - var thisCen = getLocalTime(getTime()).year div 100 - info.year = thisCen*100 + year - j += 2 - of "yyyy": - info.year = value[j..j+3].parseInt() - j += 4 - of "z": - if value[j] == '+': - info.timezone = parseInt($value[j+1]) - elif value[j] == '-': - info.timezone = 0-parseInt($value[j+1]) + var isoyear = takeInt(2..2) + var thisCen = now().year div 100 + parsed.isoyear = some(thisCen*100 + isoyear) + result = isoyear > 0 + of GGGG: + let isoyear = takeInt(1..high(int)) + parsed.isoyear = some(isoyear) + result = isoyear > 0 + of h, H: + parsed.hour = takeInt(1..2) + result = parsed.hour in HourRange + of hh, HH: + parsed.hour = takeInt(2..2) + result = parsed.hour in HourRange + of m: + parsed.minute = takeInt(1..2) + result = parsed.hour in MinuteRange + of mm: + parsed.minute = takeInt(2..2) + result = parsed.hour in MinuteRange + of M: + let month = takeInt(1..2) + result = month in 1..12 + parsed.month = some(month) + of MM: + let month = takeInt(2..2) + result = month in 1..12 + parsed.month = some(month) + of MMM: + result = false + for n, v in loc.MMM: + if input.substr(i, i+v.len-1).cmpIgnoreCase(v) == 0: + result = true + i.inc v.len + parsed.month = some(n.int) + break + of MMMM: + result = false + for n, v in loc.MMMM: + if input.substr(i, i+v.len-1).cmpIgnoreCase(v) == 0: + result = true + i.inc v.len + parsed.month = some(n.int) + break + of s: + parsed.second = takeInt(1..2) + of ss: + parsed.second = takeInt(2..2) + of fff, ffffff, fffffffff: + let len = ($pattern).len + let v = takeInt(len..len) + parsed.nanosecond = v * 10^(9 - len) + result = parsed.nanosecond in NanosecondRange + of t: + case input[i]: + of 'P': + parsed.amPm = apPm + of 'A': + parsed.amPm = apAm + else: + result = false + i.inc 1 + of tt: + if input.substr(i, i+1).cmpIgnoreCase("AM") == 0: + parsed.amPm = apAm + i.inc 2 + elif input.substr(i, i+1).cmpIgnoreCase("PM") == 0: + parsed.amPm = apPm + i.inc 2 + else: + result = false + of yy: + # Assumes current century + var year = takeInt(2..2) + var thisCen = now().year div 100 + parsed.year = some(thisCen*100 + year) + result = year > 0 + of yyyy: + let year = + if input[i] in {'+', '-'}: + takeInt(4..high(int), allowSign = true) + else: + takeInt(4..4) + result = year > 0 + parsed.year = some(year) + of YYYY: + let year = takeInt(1..high(int)) + parsed.year = some(year) + result = year > 0 + of uuuu: + let year = + if input[i] in {'+', '-'}: + takeInt(4..high(int), allowSign = true) + else: + takeInt(4..4) + parsed.year = some(year) + of UUUU: + parsed.year = some(takeInt(1..high(int), allowSign = true)) + of V: + let yearweek = takeInt(1..2) + parsed.yearweek = some(yearweek) + result = yearweek in IsoWeekRange + of VV: + let yearweek = takeInt(2..2) + parsed.yearweek = some(yearweek) + result = yearweek in IsoWeekRange + of z, zz, zzz, zzzz, ZZZ, ZZZZ: + case input[i] + of '+', '-': + let sign = if input[i] == '-': 1 else: -1 + i.inc + var offset = 0 + case pattern + of z: + offset = takeInt(1..2) * 3600 + of zz: + offset = takeInt(2..2) * 3600 + of zzz, ZZZ: + offset.inc takeInt(2..2) * 3600 + if pattern == zzz: + if input[i] != ':': + return false + i.inc + offset.inc takeInt(2..2) * 60 + of zzzz, ZZZZ: + offset.inc takeInt(2..2) * 3600 + if pattern == zzzz: + if input[i] != ':': + return false + i.inc + offset.inc takeInt(2..2) * 60 + if pattern == zzzz: + if input[i] != ':': + return false + i.inc + offset.inc takeInt(2..2) + else: assert false + parsed.utcOffset = some(offset * sign) + of 'Z': + parsed.utcOffset = some(0) + i.inc + else: + result = false + of g: + if input.substr(i, i+1).cmpIgnoreCase("BC") == 0: + parsed.era = eraBc + i.inc 2 + elif input.substr(i, i+1).cmpIgnoreCase("AD") == 0: + parsed.era = eraAd + i.inc 2 else: - raise newException(ValueError, "Sign for timezone " & value[j]) - j += 2 - of "zz": - if value[j] == '+': - info.timezone = value[j+1..j+2].parseInt() - elif value[j] == '-': - info.timezone = 0-value[j+1..j+2].parseInt() + result = false + of Lit: raiseAssert "Can't happen" + +proc toDateTime(p: ParsedTime, zone: Timezone, f: TimeFormat, + input: string): DateTime = + var year = p.year.get(0) + var month = p.month.get(1).Month + var monthday = p.monthday.get(1) + year = + case p.era + of eraUnknown: + year + of eraBc: + if year < 1: + raiseParseException(f, input, + "Expected year to be positive " & + "(use 'UUUU' or 'uuuu' for negative years).") + -year + 1 + of eraAd: + if year < 1: + raiseParseException(f, input, + "Expected year to be positive " & + "(use 'UUUU' or 'uuuu' for negative years).") + year + + let hour = + case p.amPm + of apUnknown: + p.hour + of apAm: + if p.hour notin 1..12: + raiseParseException(f, input, + "AM/PM time must be in the interval 1..12") + if p.hour == 12: 0 else: p.hour + of apPm: + if p.hour notin 1..12: + raiseParseException(f, input, + "AM/PM time must be in the interval 1..12") + if p.hour == 12: p.hour else: p.hour + 12 + let minute = p.minute + let second = p.second + let nanosecond = p.nanosecond + + if monthday > getDaysInMonth(month, year): + raiseParseException(f, input, + $year & "-" & ord(month).intToStr(2) & + "-" & $monthday & " is not a valid date") + + if p.utcOffset.isNone: + # No timezone parsed - assume timezone is `zone` + result = dateTime(year, month, monthday, hour, minute, second, nanosecond, zone) + else: + # Otherwise convert to `zone` + result = (dateTime(year, month, monthday, hour, minute, second, nanosecond, utc()).toTime + + initDuration(seconds = p.utcOffset.get())).inZone(zone) + +proc toDateTimeByWeek(p: ParsedTime, zone: Timezone, f: TimeFormat, + input: string): DateTime = + var isoyear = p.isoyear.get(0) + var yearweek = p.yearweek.get(1) + var weekday = p.weekday.get(dMon) + + if p.amPm != apUnknown: + raiseParseException(f, input, "Parsing iso weekyear dates does not support am/pm") + + if p.year.isSome: + raiseParseException(f, input, "Use iso-year GG or GGGG as year with iso week number") + + if p.month.isSome: + raiseParseException(f, input, "Use either iso week number V or VV or month") + + if p.monthday.isSome: + raiseParseException(f, input, "Use weekday ddd or dddd as day with with iso week number") + + if p.isoyear.isNone: + raiseParseException(f, input, "Need iso-year with week number") + + let hour = p.hour + let minute = p.minute + let second = p.second + let nanosecond = p.nanosecond + + if p.utcOffset.isNone: + result = initDateTime(weekday, yearweek.IsoWeekRange, isoyear.IsoYear, hour, minute, second, nanosecond, zone) + else: + result = (initDateTime(weekday, yearweek.IsoWeekRange, isoyear.IsoYear, hour, minute, second, nanosecond, zone).toTime + + initDuration(seconds = p.utcOffset.get())).inZone(zone) + +proc format*(dt: DateTime, f: TimeFormat, + loc: DateTimeLocale = DefaultLocale): string {.raises: [].} = + ## Format `dt` using the format specified by `f`. + runnableExamples: + let f = initTimeFormat("yyyy-MM-dd") + let dt = dateTime(2000, mJan, 01, 00, 00, 00, 00, utc()) + doAssert "2000-01-01" == dt.format(f) + assertDateTimeInitialized dt + result = "" + var idx = 0 + while idx <= f.patterns.high: + case f.patterns[idx].FormatPattern + of Lit: + idx.inc + let len = f.patterns[idx] + for i in 1'u8..len: + idx.inc + result.add f.patterns[idx].char + idx.inc else: - raise newException(ValueError, "Sign for timezone " & value[j]) - j += 3 - of "zzz": - if value[j] == '+': - info.timezone = value[j+1..j+2].parseInt() - elif value[j] == '-': - info.timezone = 0-value[j+1..j+2].parseInt() + formatPattern(dt, f.patterns[idx].FormatPattern, result = result, loc = loc) + idx.inc + +proc format*(dt: DateTime, f: string, loc: DateTimeLocale = DefaultLocale): string + {.raises: [TimeFormatParseError].} = + ## Shorthand for constructing a `TimeFormat` and using it to format `dt`. + ## + ## See `Parsing and formatting dates`_ for documentation of the + ## `format` argument. + runnableExamples: + let dt = dateTime(2000, mJan, 01, 00, 00, 00, 00, utc()) + doAssert "2000-01-01" == format(dt, "yyyy-MM-dd") + let dtFormat = initTimeFormat(f) + result = dt.format(dtFormat, loc) + +proc format*(dt: DateTime, f: static[string]): string {.raises: [].} = + ## Overload that validates `format` at compile time. + const f2 = initTimeFormat(f) + result = dt.format(f2) + +proc formatValue*(result: var string; value: DateTime | Time, specifier: string) = + ## adapter for strformat. Not intended to be called directly. + result.add format(value, + if specifier.len == 0: "yyyy-MM-dd'T'HH:mm:sszzz" else: specifier) + +proc format*(time: Time, f: string, zone: Timezone = local()): string + {.raises: [TimeFormatParseError].} = + ## Shorthand for constructing a `TimeFormat` and using it to format + ## `time`. Will use the timezone specified by `zone`. + ## + ## See `Parsing and formatting dates`_ for documentation of the + ## `f` argument. + runnableExamples: + var dt = dateTime(1970, mJan, 01, 00, 00, 00, 00, utc()) + var tm = dt.toTime() + doAssert format(tm, "yyyy-MM-dd'T'HH:mm:ss", utc()) == "1970-01-01T00:00:00" + time.inZone(zone).format(f) + +proc format*(time: Time, f: static[string], zone: Timezone = local()): string + {.raises: [].} = + ## Overload that validates `f` at compile time. + const f2 = initTimeFormat(f) + result = time.inZone(zone).format(f2) + +proc parse*(input: string, f: TimeFormat, zone: Timezone = local(), + loc: DateTimeLocale = DefaultLocale): DateTime {.parseRaises.} = + ## Parses `input` as a `DateTime` using the format specified by `f`. + ## If no UTC offset was parsed, then `input` is assumed to be specified in + ## the `zone` timezone. If a UTC offset was parsed, the result will be + ## converted to the `zone` timezone. + ## + ## Month and day names from the passed in `loc` are used. + runnableExamples: + let f = initTimeFormat("yyyy-MM-dd") + let dt = dateTime(2000, mJan, 01, 00, 00, 00, 00, utc()) + doAssert dt == "2000-01-01".parse(f, utc()) + var inpIdx = 0 # Input index + var patIdx = 0 # Pattern index + var parsed: ParsedTime + while inpIdx <= input.high and patIdx <= f.patterns.high: + let pattern = f.patterns[patIdx].FormatPattern + case pattern + of Lit: + patIdx.inc + let len = f.patterns[patIdx] + patIdx.inc + for _ in 1'u8..len: + if input[inpIdx] != f.patterns[patIdx].char: + raiseParseException(f, input, + "Unexpected character: " & input[inpIdx]) + inpIdx.inc + patIdx.inc else: - raise newException(ValueError, "Sign for timezone " & value[j]) - j += 6 - of "ZZZ": - info.tzname = value[j..j+2].toUpper() - j += 3 + if not parsePattern(input, pattern, inpIdx, parsed, loc): + raiseParseException(f, input, "Failed on pattern '" & $pattern & "'") + patIdx.inc + + if inpIdx <= input.high: + raiseParseException(f, input, + "Parsing ended but there was still input remaining") + + if patIdx <= f.patterns.high: + raiseParseException(f, input, + "Parsing ended but there was still patterns remaining") + + if parsed.yearweek.isSome: + result = toDateTimeByWeek(parsed, zone, f, input) + elif parsed.isoyear.isSome: + raiseParseException(f, input, "Iso year GG or GGGG require iso week V or VV") else: - # Ignore the token and move forward in the value string by the same length - j += token.len + result = toDateTime(parsed, zone, f, input) -proc parse*(value, layout: string): TimeInfo = - ## This function parses a date/time string using the standard format identifiers (below) - ## The function defaults information not provided in the format string from the running program (timezone, month, year, etc) +proc parse*(input, f: string, tz: Timezone = local(), + loc: DateTimeLocale = DefaultLocale): DateTime {.parseFormatRaises.} = + ## Shorthand for constructing a `TimeFormat` and using it to parse + ## `input` as a `DateTime`. ## - ## ========== ================================================================================= ================================================ - ## Specifier Description Example - ## ========== ================================================================================= ================================================ - ## d Numeric value of the day of the month, it will be one or two digits long. ``1/04/2012 -> 1``, ``21/04/2012 -> 21`` - ## dd Same as above, but always two digits. ``1/04/2012 -> 01``, ``21/04/2012 -> 21`` - ## ddd Three letter string which indicates the day of the week. ``Saturday -> Sat``, ``Monday -> Mon`` - ## dddd Full string for the day of the week. ``Saturday -> Saturday``, ``Monday -> Monday`` - ## h The hours in one digit if possible. Ranging from 0-12. ``5pm -> 5``, ``2am -> 2`` - ## hh The hours in two digits always. If the hour is one digit 0 is prepended. ``5pm -> 05``, ``11am -> 11`` - ## H The hours in one digit if possible, randing from 0-24. ``5pm -> 17``, ``2am -> 2`` - ## HH The hours in two digits always. 0 is prepended if the hour is one digit. ``5pm -> 17``, ``2am -> 02`` - ## m The minutes in 1 digit if possible. ``5:30 -> 30``, ``2:01 -> 1`` - ## mm Same as above but always 2 digits, 0 is prepended if the minute is one digit. ``5:30 -> 30``, ``2:01 -> 01`` - ## M The month in one digit if possible. ``September -> 9``, ``December -> 12`` - ## MM The month in two digits always. 0 is prepended. ``September -> 09``, ``December -> 12`` - ## MMM Abbreviated three-letter form of the month. ``September -> Sep``, ``December -> Dec`` - ## MMMM Full month string, properly capitalized. ``September -> September`` - ## s Seconds as one digit if possible. ``00:00:06 -> 6`` - ## ss Same as above but always two digits. 0 is prepended. ``00:00:06 -> 06`` - ## t ``A`` when time is in the AM. ``P`` when time is in the PM. - ## tt Same as above, but ``AM`` and ``PM`` instead of ``A`` and ``P`` respectively. - ## yy Displays the year to two digits. ``2012 -> 12`` - ## yyyy Displays the year to four digits. ``2012 -> 2012`` - ## z Displays the timezone offset from UTC. ``GMT+7 -> +7``, ``GMT-5 -> -5`` - ## zz Same as above but with leading 0. ``GMT+7 -> +07``, ``GMT-5 -> -05`` - ## zzz Same as above but with ``:00``. ``GMT+7 -> +07:00``, ``GMT-5 -> -05:00`` - ## ZZZ Displays the name of the timezone. ``GMT -> GMT``, ``EST -> EST`` - ## ========== ================================================================================= ================================================ + ## See `Parsing and formatting dates`_ for documentation of the + ## `f` argument. + runnableExamples: + let dt = dateTime(2000, mJan, 01, 00, 00, 00, 00, utc()) + doAssert dt == parse("2000-01-01", "yyyy-MM-dd", utc()) + let dtFormat = initTimeFormat(f) + result = input.parse(dtFormat, tz, loc = loc) + +proc parse*(input: string, f: static[string], zone: Timezone = local(), + loc: DateTimeLocale = DefaultLocale): DateTime {.parseRaises.} = + ## Overload that validates `f` at compile time. + const f2 = initTimeFormat(f) + result = input.parse(f2, zone, loc = loc) + +proc parseTime*(input, f: string, zone: Timezone): Time {.parseFormatRaises.} = + ## Shorthand for constructing a `TimeFormat` and using it to parse + ## `input` as a `DateTime`, then converting it a `Time`. ## - ## Other strings can be inserted by putting them in ``''``. For example - ## ``hh'->'mm`` will give ``01->56``. The following characters can be - ## inserted without quoting them: ``:`` ``-`` ``(`` ``)`` ``/`` ``[`` ``]`` - ## ``,``. However you don't need to necessarily separate format specifiers, a - ## unambiguous format string like ``yyyyMMddhhmmss`` is valid too. - var i = 0 # pointer for format string - var j = 0 # pointer for value string - var token = "" - # Assumes current day of month, month and year, but time is reset to 00:00:00. Weekday will be reset after parsing. - var info = getLocalTime(getTime()) - info.hour = 0 - info.minute = 0 - info.second = 0 - while true: - case layout[i] - of ' ', '-', '/', ':', '\'', '\0', '(', ')', '[', ']', ',': - if token.len > 0: - parseToken(info, token, value, j) - # Reset token - token = "" - # Break if at end of line - if layout[i] == '\0': break - # Skip separator and everything between single quotes - # These are literals in both the layout and the value string - if layout[i] == '\'': - inc(i) - inc(j) - while layout[i] != '\'' and layout.len-1 > i: - inc(i) - inc(j) - else: - inc(i) - inc(j) + ## See `Parsing and formatting dates`_ for documentation of the + ## `format` argument. + runnableExamples: + let tStr = "1970-01-01T00:00:00+00:00" + doAssert parseTime(tStr, "yyyy-MM-dd'T'HH:mm:sszzz", utc()) == fromUnix(0) + parse(input, f, zone).toTime() + +proc parseTime*(input: string, f: static[string], zone: Timezone): Time + {.parseRaises.} = + ## Overload that validates `format` at compile time. + const f2 = initTimeFormat(f) + result = input.parse(f2, zone).toTime() + +proc `$`*(dt: DateTime): string {.tags: [], raises: [], benign.} = + ## Converts a `DateTime` object to a string representation. + ## It uses the format `yyyy-MM-dd'T'HH:mm:sszzz`. + runnableExamples: + let dt = dateTime(2000, mJan, 01, 12, 00, 00, 00, utc()) + doAssert $dt == "2000-01-01T12:00:00Z" + doAssert $default(DateTime) == "Uninitialized DateTime" + if not dt.isInitialized: + result = "Uninitialized DateTime" + else: + result = format(dt, "yyyy-MM-dd'T'HH:mm:sszzz") + +proc `$`*(time: Time): string {.tags: [], raises: [], benign.} = + ## Converts a `Time` value to a string representation. It will use the local + ## time zone and use the format `yyyy-MM-dd'T'HH:mm:sszzz`. + runnableExamples: + let dt = dateTime(1970, mJan, 01, 00, 00, 00, 00, local()) + let tm = dt.toTime() + doAssert $tm == "1970-01-01T00:00:00" & format(dt, "zzz") + $time.local + +# +# TimeInterval +# + +proc initTimeInterval*(nanoseconds, microseconds, milliseconds, + seconds, minutes, hours, + days, weeks, months, years: int = 0): TimeInterval = + ## Creates a new `TimeInterval <#TimeInterval>`_. + ## + ## This proc doesn't perform any normalization! For example, + ## `initTimeInterval(hours = 24)` and `initTimeInterval(days = 1)` are + ## not equal. + ## + ## You can also use the convenience procedures called `milliseconds`, + ## `seconds`, `minutes`, `hours`, `days`, `months`, and `years`. + runnableExamples: + let day = initTimeInterval(hours = 24) + let dt = dateTime(2000, mJan, 01, 12, 00, 00, 00, utc()) + doAssert $(dt + day) == "2000-01-02T12:00:00Z" + doAssert initTimeInterval(hours = 24) != initTimeInterval(days = 1) + result.nanoseconds = nanoseconds + result.microseconds = microseconds + result.milliseconds = milliseconds + result.seconds = seconds + result.minutes = minutes + result.hours = hours + result.days = days + result.weeks = weeks + result.months = months + result.years = years + +proc `+`*(ti1, ti2: TimeInterval): TimeInterval = + ## Adds two `TimeInterval` objects together. + result.nanoseconds = ti1.nanoseconds + ti2.nanoseconds + result.microseconds = ti1.microseconds + ti2.microseconds + result.milliseconds = ti1.milliseconds + ti2.milliseconds + result.seconds = ti1.seconds + ti2.seconds + result.minutes = ti1.minutes + ti2.minutes + result.hours = ti1.hours + ti2.hours + result.days = ti1.days + ti2.days + result.weeks = ti1.weeks + ti2.weeks + result.months = ti1.months + ti2.months + result.years = ti1.years + ti2.years + +proc `-`*(ti: TimeInterval): TimeInterval = + ## Reverses a time interval + runnableExamples: + let day = -initTimeInterval(hours = 24) + doAssert day.hours == -24 + + result = TimeInterval( + nanoseconds: -ti.nanoseconds, + microseconds: -ti.microseconds, + milliseconds: -ti.milliseconds, + seconds: -ti.seconds, + minutes: -ti.minutes, + hours: -ti.hours, + days: -ti.days, + weeks: -ti.weeks, + months: -ti.months, + years: -ti.years + ) + +proc `-`*(ti1, ti2: TimeInterval): TimeInterval = + ## Subtracts TimeInterval `ti1` from `ti2`. + ## + ## Time components are subtracted one-by-one, see output: + runnableExamples: + let ti1 = initTimeInterval(hours = 24) + let ti2 = initTimeInterval(hours = 4) + doAssert (ti1 - ti2) == initTimeInterval(hours = 20) + + result = ti1 + (-ti2) + +proc `+=`*(a: var TimeInterval, b: TimeInterval) = + a = a + b + +proc `-=`*(a: var TimeInterval, b: TimeInterval) = + a = a - b + +proc isStaticInterval(interval: TimeInterval): bool = + interval.years == 0 and interval.months == 0 and + interval.days == 0 and interval.weeks == 0 + +proc evaluateStaticInterval(interval: TimeInterval): Duration = + assert interval.isStaticInterval + initDuration(nanoseconds = interval.nanoseconds, + microseconds = interval.microseconds, + milliseconds = interval.milliseconds, + seconds = interval.seconds, + minutes = interval.minutes, + hours = interval.hours) + +proc between*(startDt, endDt: DateTime): TimeInterval = + ## Gives the difference between `startDt` and `endDt` as a + ## `TimeInterval`. The following guarantees about the result is given: + ## + ## - All fields will have the same sign. + ## - If `startDt.timezone == endDt.timezone`, it is guaranteed that + ## `startDt + between(startDt, endDt) == endDt`. + ## - If `startDt.timezone != endDt.timezone`, then the result will be + ## equivalent to `between(startDt.utc, endDt.utc)`. + runnableExamples: + var a = dateTime(2015, mMar, 25, 12, 0, 0, 00, utc()) + var b = dateTime(2017, mApr, 1, 15, 0, 15, 00, utc()) + var ti = initTimeInterval(years = 2, weeks = 1, hours = 3, seconds = 15) + doAssert between(a, b) == ti + doAssert between(a, b) == -between(b, a) + + if startDt.timezone != endDt.timezone: + return between(startDt.utc, endDt.utc) + elif endDt < startDt: + return -between(endDt, startDt) + + type Date = tuple[year, month, monthday: int] + var startDate: Date = (startDt.year, startDt.month.ord, startDt.monthday) + var endDate: Date = (endDt.year, endDt.month.ord, endDt.monthday) + + # Subtract one day from endDate if time of day is earlier than startDay + # The subtracted day will be counted by fixed units (hour and lower) + # at the end of this proc + if (endDt.hour, endDt.minute, endDt.second, endDt.nanosecond) < + (startDt.hour, startDt.minute, startDt.second, startDt.nanosecond): + if endDate.month == 1 and endDate.monthday == 1: + endDate.year.dec + endDate.monthday = 31 + endDate.month = 12 + elif endDate.monthday == 1: + endDate.month.dec + endDate.monthday = getDaysInMonth(endDate.month.Month, endDate.year) else: - # Check if the letter being added matches previous accumulated buffer. - if token.len < 1 or token[high(token)] == layout[i]: - token.add(layout[i]) - inc(i) + endDate.monthday.dec + + # Years + result.years = endDate.year - startDate.year - 1 + if (startDate.month, startDate.monthday) <= (endDate.month, endDate.monthday): + result.years.inc + startDate.year.inc result.years + + # Months + if startDate.year < endDate.year: + result.months.inc 12 - startDate.month # Move to dec + if endDate.month != 1 or (startDate.monthday <= endDate.monthday): + result.months.inc + startDate.year = endDate.year + startDate.month = 1 + else: + startDate.month = 12 + if startDate.year == endDate.year: + if (startDate.monthday <= endDate.monthday): + result.months.inc endDate.month - startDate.month + startDate.month = endDate.month + elif endDate.month != 1: + let month = endDate.month - 1 + let daysInMonth = getDaysInMonth(month.Month, startDate.year) + if daysInMonth < startDate.monthday: + if startDate.monthday - daysInMonth < endDate.monthday: + result.months.inc endDate.month - startDate.month - 1 + startDate.month = endDate.month + startDate.monthday = startDate.monthday - daysInMonth + else: + result.months.inc endDate.month - startDate.month - 2 + startDate.month = endDate.month - 2 + else: + result.months.inc endDate.month - startDate.month - 1 + startDate.month = endDate.month - 1 + + # Days + # This means that start = dec and end = jan + if startDate.year < endDate.year: + result.days.inc 31 - startDate.monthday + endDate.monthday + startDate = endDate + else: + while startDate.month < endDate.month: + let daysInMonth = getDaysInMonth(startDate.month.Month, startDate.year) + result.days.inc daysInMonth - startDate.monthday + 1 + startDate.month.inc + startDate.monthday = 1 + result.days.inc endDate.monthday - startDate.monthday + result.weeks = result.days div 7 + result.days = result.days mod 7 + startDate = endDate + + # Handle hours, minutes, seconds, milliseconds, microseconds and nanoseconds + let newStartDt = dateTime(startDate.year, startDate.month.Month, + startDate.monthday, startDt.hour, startDt.minute, startDt.second, + startDt.nanosecond, startDt.timezone) + let dur = endDt - newStartDt + let parts = toParts(dur) + # There can still be a full day in `parts` since `Duration` and `TimeInterval` + # models days differently. + result.hours = parts[Hours].int + parts[Days].int * 24 + result.minutes = parts[Minutes].int + result.seconds = parts[Seconds].int + result.milliseconds = parts[Milliseconds].int + result.microseconds = parts[Microseconds].int + result.nanoseconds = parts[Nanoseconds].int + +proc toParts*(ti: TimeInterval): TimeIntervalParts = + ## Converts a `TimeInterval` into an array consisting of its time units, + ## starting with nanoseconds and ending with years. + ## + ## This procedure is useful for converting `TimeInterval` values to strings. + ## E.g. then you need to implement custom interval printing + runnableExamples: + var tp = toParts(initTimeInterval(years = 1, nanoseconds = 123)) + doAssert tp[Years] == 1 + doAssert tp[Nanoseconds] == 123 + + var index = 0 + for name, value in fieldPairs(ti): + result[index.TimeUnit()] = value + index += 1 + +proc `$`*(ti: TimeInterval): string = + ## Get string representation of `TimeInterval`. + runnableExamples: + doAssert $initTimeInterval(years = 1, nanoseconds = 123) == + "1 year and 123 nanoseconds" + doAssert $initTimeInterval() == "0 nanoseconds" + + var parts: seq[string] = @[] + var tiParts = toParts(ti) + for unit in countdown(Years, Nanoseconds): + if tiParts[unit] != 0: + parts.add(stringifyUnit(tiParts[unit], unit)) + + result = humanizeParts(parts) + +proc nanoseconds*(nanos: int): TimeInterval {.inline.} = + ## TimeInterval of `nanos` nanoseconds. + initTimeInterval(nanoseconds = nanos) + +proc microseconds*(micros: int): TimeInterval {.inline.} = + ## TimeInterval of `micros` microseconds. + initTimeInterval(microseconds = micros) + +proc milliseconds*(ms: int): TimeInterval {.inline.} = + ## TimeInterval of `ms` milliseconds. + initTimeInterval(milliseconds = ms) + +proc seconds*(s: int): TimeInterval {.inline.} = + ## TimeInterval of `s` seconds. + ## + ## `echo getTime() + 5.seconds` + initTimeInterval(seconds = s) + +proc minutes*(m: int): TimeInterval {.inline.} = + ## TimeInterval of `m` minutes. + ## + ## `echo getTime() + 5.minutes` + initTimeInterval(minutes = m) + +proc hours*(h: int): TimeInterval {.inline.} = + ## TimeInterval of `h` hours. + ## + ## `echo getTime() + 2.hours` + initTimeInterval(hours = h) + +proc days*(d: int): TimeInterval {.inline.} = + ## TimeInterval of `d` days. + ## + ## `echo getTime() + 2.days` + initTimeInterval(days = d) + +proc weeks*(w: int): TimeInterval {.inline.} = + ## TimeInterval of `w` weeks. + ## + ## `echo getTime() + 2.weeks` + initTimeInterval(weeks = w) + +proc months*(m: int): TimeInterval {.inline.} = + ## TimeInterval of `m` months. + ## + ## `echo getTime() + 2.months` + initTimeInterval(months = m) + +proc years*(y: int): TimeInterval {.inline.} = + ## TimeInterval of `y` years. + ## + ## `echo getTime() + 2.years` + initTimeInterval(years = y) + +proc evaluateInterval(dt: DateTime, interval: TimeInterval): + tuple[adjDur, absDur: Duration] = + ## Evaluates how many nanoseconds the interval is worth + ## in the context of `dt`. + ## The result in split into an adjusted diff and an absolute diff. + var months = interval.years * 12 + interval.months + var curYear = dt.year + var curMonth = dt.month + result = default(tuple[adjDur, absDur: Duration]) + # Subtracting + if months < 0: + for mth in countdown(-1 * months, 1): + if curMonth == mJan: + curMonth = mDec + curYear.dec else: - parseToken(info, token, value, j) - token = "" - # Reset weekday as it might not have been provided and the default may be wrong - info.weekday = getLocalTime(timeInfoToTime(info)).weekday - return info - - -when isMainModule: - # $ date --date='@2147483647' - # Tue 19 Jan 03:14:07 GMT 2038 - - var t = getGMTime(fromSeconds(2147483647)) - echo t.format("ddd dd MMM hh:mm:ss ZZZ yyyy") - echo t.format("ddd ddMMMhhmmssZZZyyyy") - assert t.format("ddd dd MMM hh:mm:ss ZZZ yyyy") == "Tue 19 Jan 03:14:07 UTC 2038" - assert t.format("ddd ddMMMhh:mm:ssZZZyyyy") == "Tue 19Jan03:14:07UTC2038" - - assert t.format("d dd ddd dddd h hh H HH m mm M MM MMM MMMM s" & - " ss t tt y yy yyy yyyy yyyyy z zz zzz ZZZ") == - "19 19 Tue Tuesday 3 03 3 03 14 14 1 01 Jan January 7 07 A AM 8 38 038 2038 02038 0 00 00:00 UTC" - - assert t.format("yyyyMMddhhmmss") == "20380119031407" - - var t2 = getGMTime(fromSeconds(160070789)) # Mon 27 Jan 16:06:29 GMT 1975 - assert t2.format("d dd ddd dddd h hh H HH m mm M MM MMM MMMM s" & - " ss t tt y yy yyy yyyy yyyyy z zz zzz ZZZ") == - "27 27 Mon Monday 4 04 16 16 6 06 1 01 Jan January 29 29 P PM 5 75 975 1975 01975 0 00 00:00 UTC" - - when not defined(JS) and sizeof(Time) == 8: - var t3 = getGMTime(fromSeconds(889067643645)) # Fri 7 Jun 19:20:45 BST 30143 - assert t3.format("d dd ddd dddd h hh H HH m mm M MM MMM MMMM s" & - " ss t tt y yy yyy yyyy yyyyy z zz zzz ZZZ") == - "7 07 Fri Friday 6 06 18 18 20 20 6 06 Jun June 45 45 P PM 3 43 143 0143 30143 0 00 00:00 UTC" - assert t3.format(":,[]()-/") == ":,[]()-/" - - var t4 = getGMTime(fromSeconds(876124714)) # Mon 6 Oct 08:58:34 BST 1997 - assert t4.format("M MM MMM MMMM") == "10 10 Oct October" - - # Interval tests - assert((t4 - initInterval(years = 2)).format("yyyy") == "1995") - assert((t4 - initInterval(years = 7, minutes = 34, seconds = 24)).format("yyyy mm ss") == "1990 24 10") - - var s = "Tuesday at 09:04am on Dec 15, 2015" - var f = "dddd at hh:mmtt on MMM d, yyyy" - assert($s.parse(f) == "Tue Dec 15 09:04:00 2015") - # ANSIC = "Mon Jan _2 15:04:05 2006" - s = "Mon Jan 2 15:04:05 2006" - f = "ddd MMM d HH:mm:ss yyyy" - assert($s.parse(f) == "Mon Jan 2 15:04:05 2006") - # UnixDate = "Mon Jan _2 15:04:05 MST 2006" - s = "Mon Jan 2 15:04:05 MST 2006" - f = "ddd MMM d HH:mm:ss ZZZ yyyy" - assert($s.parse(f) == "Mon Jan 2 15:04:05 2006") - # RubyDate = "Mon Jan 02 15:04:05 -0700 2006" - s = "Mon Jan 02 15:04:05 -07:00 2006" - f = "ddd MMM dd HH:mm:ss zzz yyyy" - assert($s.parse(f) == "Mon Jan 2 15:04:05 2006") - # RFC822 = "02 Jan 06 15:04 MST" - s = "02 Jan 06 15:04 MST" - f = "dd MMM yy HH:mm ZZZ" - assert($s.parse(f) == "Mon Jan 2 15:04:00 2006") - # RFC822Z = "02 Jan 06 15:04 -0700" # RFC822 with numeric zone - s = "02 Jan 06 15:04 -07:00" - f = "dd MMM yy HH:mm zzz" - assert($s.parse(f) == "Mon Jan 2 15:04:00 2006") - # RFC850 = "Monday, 02-Jan-06 15:04:05 MST" - s = "Monday, 02-Jan-06 15:04:05 MST" - f = "dddd, dd-MMM-yy HH:mm:ss ZZZ" - assert($s.parse(f) == "Mon Jan 2 15:04:05 2006") - # RFC1123 = "Mon, 02 Jan 2006 15:04:05 MST" - s = "Mon, 02 Jan 2006 15:04:05 MST" - f = "ddd, dd MMM yyyy HH:mm:ss ZZZ" - assert($s.parse(f) == "Mon Jan 2 15:04:05 2006") - # RFC1123Z = "Mon, 02 Jan 2006 15:04:05 -0700" # RFC1123 with numeric zone - s = "Mon, 02 Jan 2006 15:04:05 -07:00" - f = "ddd, dd MMM yyyy HH:mm:ss zzz" - assert($s.parse(f) == "Mon Jan 2 15:04:05 2006") - # RFC3339 = "2006-01-02T15:04:05Z07:00" - s = "2006-01-02T15:04:05Z-07:00" - f = "yyyy-MM-ddTHH:mm:ssZzzz" - assert($s.parse(f) == "Mon Jan 2 15:04:05 2006") - # RFC3339Nano = "2006-01-02T15:04:05.999999999Z07:00" - s = "2006-01-02T15:04:05.999999999Z-07:00" - f = "yyyy-MM-ddTHH:mm:ss.999999999Zzzz" - assert($s.parse(f) == "Mon Jan 2 15:04:05 2006") - # Kitchen = "3:04PM" - s = "3:04PM" - f = "h:mmtt" - echo "Kitchen: " & $s.parse(f) + curMonth.dec() + let days = getDaysInMonth(curMonth, curYear) + result.adjDur = result.adjDur - initDuration(days = days) + # Adding + else: + for mth in 1 .. months: + let days = getDaysInMonth(curMonth, curYear) + result.adjDur = result.adjDur + initDuration(days = days) + if curMonth == mDec: + curMonth = mJan + curYear.inc + else: + curMonth.inc() + + result.adjDur = result.adjDur + initDuration( + days = interval.days, + weeks = interval.weeks) + result.absDur = initDuration( + nanoseconds = interval.nanoseconds, + microseconds = interval.microseconds, + milliseconds = interval.milliseconds, + seconds = interval.seconds, + minutes = interval.minutes, + hours = interval.hours) + +proc `+`*(dt: DateTime, interval: TimeInterval): DateTime = + ## Adds `interval` to `dt`. Components from `interval` are added + ## in the order of their size, i.e. first the `years` component, then the + ## `months` component and so on. The returned `DateTime` will have the + ## same timezone as the input. + ## + ## Note that when adding months, monthday overflow is allowed. This means that + ## if the resulting month doesn't have enough days it, the month will be + ## incremented and the monthday will be set to the number of days overflowed. + ## So adding one month to `31 October` will result in `31 November`, which + ## will overflow and result in `1 December`. + runnableExamples: + let dt = dateTime(2017, mMar, 30, 00, 00, 00, 00, utc()) + doAssert $(dt + 1.months) == "2017-04-30T00:00:00Z" + # This is correct and happens due to monthday overflow. + doAssert $(dt - 1.months) == "2017-03-02T00:00:00Z" + let (adjDur, absDur) = evaluateInterval(dt, interval) + + if adjDur != DurationZero: + var zt = dt.timezone.zonedTimeFromAdjTime(dt.toAdjTime + adjDur) + if absDur != DurationZero: + zt = dt.timezone.zonedTimeFromTime(zt.time + absDur) + result = initDateTime(zt, dt.timezone) + else: + result = initDateTime(zt, dt.timezone) + else: + var zt = dt.timezone.zonedTimeFromTime(dt.toTime + absDur) + result = initDateTime(zt, dt.timezone) + +proc `-`*(dt: DateTime, interval: TimeInterval): DateTime = + ## Subtract `interval` from `dt`. Components from `interval` are + ## subtracted in the order of their size, i.e. first the `years` component, + ## then the `months` component and so on. The returned `DateTime` will + ## have the same timezone as the input. + runnableExamples: + let dt = dateTime(2017, mMar, 30, 00, 00, 00, 00, utc()) + doAssert $(dt - 5.days) == "2017-03-25T00:00:00Z" + + dt + (-interval) + +proc `+`*(time: Time, interval: TimeInterval): Time = + ## Adds `interval` to `time`. + ## If `interval` contains any years, months, weeks or days the operation + ## is performed in the local timezone. + runnableExamples: + let tm = fromUnix(0) + doAssert tm + 5.seconds == fromUnix(5) + + if interval.isStaticInterval: + time + evaluateStaticInterval(interval) + else: + toTime(time.local + interval) + +proc `-`*(time: Time, interval: TimeInterval): Time = + ## Subtracts `interval` from Time `time`. + ## If `interval` contains any years, months, weeks or days the operation + ## is performed in the local timezone. + runnableExamples: + let tm = fromUnix(5) + doAssert tm - 5.seconds == fromUnix(0) + + if interval.isStaticInterval: + time - evaluateStaticInterval(interval) + else: + toTime(time.local - interval) + +proc `+=`*(a: var DateTime, b: TimeInterval) = + a = a + b + +proc `-=`*(a: var DateTime, b: TimeInterval) = + a = a - b + +proc `+=`*(t: var Time, b: TimeInterval) = + t = t + b + +proc `-=`*(t: var Time, b: TimeInterval) = + t = t - b + +# +# Iso week +# + +proc initDateTime*(weekday: WeekDay, isoweek: IsoWeekRange, isoyear: IsoYear, + hour: HourRange, minute: MinuteRange, second: SecondRange, + nanosecond: NanosecondRange, + zone: Timezone = local()): DateTime {.raises: [], tags: [], since: (1, 5).} = + ## Create a new `DateTime <#DateTime>`_ from a weekday and an ISO 8601 week number and year + ## in the specified timezone. + ## + ## .. warning:: The ISO week-based year can correspond to the following or previous year from 29 December to January 3. + runnableExamples: + assert initDateTime(21, mApr, 2018, 00, 00, 00) == initDateTime(dSat, 16, 2018.IsoYear, 00, 00, 00) + assert initDateTime(30, mDec, 2019, 00, 00, 00) == initDateTime(dMon, 01, 2020.IsoYear, 00, 00, 00) + assert initDateTime(13, mSep, 2020, 00, 00, 00) == initDateTime(dSun, 37, 2020.IsoYear, 00, 00, 00) + assert initDateTime(2, mJan, 2021, 00, 00, 00) == initDateTime(dSat, 53, 2020.IsoYear, 00, 00, 00) + + # source https://webspace.science.uu.nl/~gent0113/calendar/isocalendar.htm + let d = isoweek * 7 + weekday.int - initDateTime(4, mJan, isoyear.int, 00, 00, 00, zone).weekday.int - 4 + initDateTime(1, mJan, isoyear.int, hour, minute, second, nanosecond, zone) + initTimeInterval(days=d) + +proc initDateTime*(weekday: WeekDay, isoweek: IsoWeekRange, isoyear: IsoYear, + hour: HourRange, minute: MinuteRange, second: SecondRange, + zone: Timezone = local()): DateTime {.raises: [], tags: [], since: (1, 5).} = + initDateTime(weekday, isoweek, isoyear, hour, minute, second, 0, zone) + +# +# Other +# + +proc epochTime*(): float {.tags: [TimeEffect].} = + ## Gets time after the UNIX epoch (1970) in seconds. It is a float + ## because sub-second resolution is likely to be supported (depending + ## on the hardware/OS). + ## + ## `getTime` should generally be preferred over this proc. + ## + ## .. warning:: Unsuitable for benchmarking (but still better than `now`), + ## use `monotimes.getMonoTime` or `cpuTime` instead, depending on the use case. + when defined(js): + result = newDate().getTime() / 1000 + elif defined(macosx): + var a {.noinit.}: Timeval + gettimeofday(a) + result = toBiggestFloat(a.tv_sec.int64) + toBiggestFloat( + a.tv_usec)*0.00_0001 + elif defined(posix): + var ts {.noinit.}: Timespec + discard clock_gettime(CLOCK_REALTIME, ts) + result = toBiggestFloat(ts.tv_sec.int64) + + toBiggestFloat(ts.tv_nsec.int64) / 1_000_000_000 + elif defined(windows): + var f {.noinit.}: winlean.FILETIME + getSystemTimeAsFileTime(f) + var i64 = rdFileTime(f) - epochDiff + var secs = i64 div rateDiff + var subsecs = i64 mod rateDiff + result = toFloat(int(secs)) + toFloat(int(subsecs)) * 0.0000001 + else: + {.error: "unknown OS".} + +when not defined(js): + type + Clock {.importc: "clock_t".} = distinct int + + proc getClock(): Clock + {.importc: "clock", header: "<time.h>", tags: [TimeEffect], used, sideEffect.} + + var + clocksPerSec {.importc: "CLOCKS_PER_SEC", nodecl, used.}: int + + proc cpuTime*(): float {.tags: [TimeEffect].} = + ## Gets time spent that the CPU spent to run the current process in + ## seconds. This may be more useful for benchmarking than `epochTime`. + ## However, it may measure the real time instead (depending on the OS). + ## The value of the result has no meaning. + ## To generate useful timing values, take the difference between + ## the results of two `cpuTime` calls: + runnableExamples: + var t0 = cpuTime() + # some useless work here (calculate fibonacci) + var fib = @[0, 1, 1] + for i in 1..10: + fib.add(fib[^1] + fib[^2]) + echo "CPU time [s] ", cpuTime() - t0 + echo "Fib is [s] ", fib + ## When the flag `--benchmarkVM` is passed to the compiler, this proc is + ## also available at compile time + when defined(posix) and not defined(osx) and declared(CLOCK_THREAD_CPUTIME_ID): + # 'clocksPerSec' is a compile-time constant, possibly a + # rather awful one, so use clock_gettime instead + var ts: Timespec + discard clock_gettime(CLOCK_THREAD_CPUTIME_ID, ts) + result = toFloat(ts.tv_sec.int) + + toFloat(ts.tv_nsec.int) / 1_000_000_000 + else: + result = toFloat(int(getClock())) / toFloat(clocksPerSec) + + +# +# Deprecations +# + +proc `nanosecond=`*(dt: var DateTime, value: NanosecondRange) {.deprecated: "Deprecated since v1.3.1".} = + dt.nanosecond = value + +proc `second=`*(dt: var DateTime, value: SecondRange) {.deprecated: "Deprecated since v1.3.1".} = + dt.second = value + +proc `minute=`*(dt: var DateTime, value: MinuteRange) {.deprecated: "Deprecated since v1.3.1".} = + dt.minute = value + +proc `hour=`*(dt: var DateTime, value: HourRange) {.deprecated: "Deprecated since v1.3.1".} = + dt.hour = value + +proc `monthdayZero=`*(dt: var DateTime, value: int) {.deprecated: "Deprecated since v1.3.1".} = + dt.monthdayZero = value + +proc `monthZero=`*(dt: var DateTime, value: int) {.deprecated: "Deprecated since v1.3.1".} = + dt.monthZero = value + +proc `year=`*(dt: var DateTime, value: int) {.deprecated: "Deprecated since v1.3.1".} = + dt.year = value + +proc `weekday=`*(dt: var DateTime, value: WeekDay) {.deprecated: "Deprecated since v1.3.1".} = + dt.weekday = value + +proc `yearday=`*(dt: var DateTime, value: YeardayRange) {.deprecated: "Deprecated since v1.3.1".} = + dt.yearday = value + +proc `isDst=`*(dt: var DateTime, value: bool) {.deprecated: "Deprecated since v1.3.1".} = + dt.isDst = value + +proc `timezone=`*(dt: var DateTime, value: Timezone) {.deprecated: "Deprecated since v1.3.1".} = + dt.timezone = value + +proc `utcOffset=`*(dt: var DateTime, value: int) {.deprecated: "Deprecated since v1.3.1".} = + dt.utcOffset = value diff --git a/lib/pure/typetraits.nim b/lib/pure/typetraits.nim index 2c3d872df..78af84fdd 100644 --- a/lib/pure/typetraits.nim +++ b/lib/pure/typetraits.nim @@ -8,29 +8,369 @@ # ## This module defines compile-time reflection procs for -## working with types +## working with types. +## +## Unstable API. -proc name*(t: typedesc): string {.magic: "TypeTrait".} - ## Returns the name of the given type. - ## - ## Example: +import std/private/since +export system.`$` # for backward compatibility + +when defined(nimPreviewSlimSystem): + import std/assertions + + +type HoleyEnum* = (not Ordinal) and enum ## Enum with holes. +type OrdinalEnum* = Ordinal and enum ## Enum without holes. + +runnableExamples: + type A = enum a0 = 2, a1 = 4, a2 + type B = enum b0 = 2, b1, b2 + assert A is enum + assert A is HoleyEnum + assert A isnot OrdinalEnum + assert B isnot HoleyEnum + assert B is OrdinalEnum + assert int isnot HoleyEnum + type C[T] = enum h0 = 2, h1 = 4 + assert C[float] is HoleyEnum + +proc name*(t: typedesc): string {.magic: "TypeTrait".} = + ## Returns the name of `t`. ## - ## .. code-block:: + ## Alias for `system.\`$\`(t) <dollars.html#$,typedesc>`_ since Nim v0.20. + runnableExamples: + doAssert name(int) == "int" + doAssert name(seq[string]) == "seq[string]" + +proc arity*(t: typedesc): int {.magic: "TypeTrait".} = + ## Returns the arity of `t`. This is the number of "type" + ## components or the number of generic parameters a given type `t` has. + runnableExamples: + doAssert arity(int) == 0 + doAssert arity(seq[string]) == 1 + doAssert arity(array[3, int]) == 2 + doAssert arity((int, int, float, string)) == 4 + +proc genericHead*(t: typedesc): typedesc {.magic: "TypeTrait".} = + ## Accepts an instantiated generic type and returns its + ## uninstantiated form. + ## A compile-time error will be produced if the supplied type + ## is not generic. ## - ## import typetraits + ## **See also:** + ## * `stripGenericParams proc <#stripGenericParams,typedesc>`_ + runnableExamples: + type + Foo[T] = object + FooInst = Foo[int] + Foo2 = genericHead(FooInst) + + doAssert Foo2 is Foo and Foo is Foo2 + doAssert genericHead(Foo[seq[string]]) is Foo + doAssert not compiles(genericHead(int)) + + type Generic = concept f + type _ = genericHead(typeof(f)) + + proc bar(a: Generic): typeof(a) = a + + doAssert bar(Foo[string].default) == Foo[string]() + doAssert not compiles bar(string.default) + + when false: # these don't work yet + doAssert genericHead(Foo[int])[float] is Foo[float] + doAssert seq[int].genericHead is seq + +proc stripGenericParams*(t: typedesc): typedesc {.magic: "TypeTrait".} = + ## This trait is similar to `genericHead <#genericHead,typedesc>`_, but + ## instead of producing an error for non-generic types, it will just return + ## them unmodified. + runnableExamples: + type Foo[T] = object + + doAssert stripGenericParams(Foo[string]) is Foo + doAssert stripGenericParams(int) is int + +proc supportsCopyMem*(t: typedesc): bool {.magic: "TypeTrait".} + ## Returns true if `t` is safe to use for `copyMem`:idx:. ## - ## proc `$`*[T](some:typedesc[T]): string = name(T) + ## Other languages name a type like these `blob`:idx:. + +proc hasDefaultValue*(t: typedesc): bool {.magic: "TypeTrait".} = + ## Returns true if `t` has a valid default value. + runnableExamples: + {.experimental: "strictNotNil".} + type + NilableObject = ref object + a: int + Object = NilableObject not nil + RequiresInit[T] = object + a {.requiresInit.}: T + + assert hasDefaultValue(NilableObject) + assert not hasDefaultValue(Object) + assert hasDefaultValue(string) + assert not hasDefaultValue(var string) + assert not hasDefaultValue(RequiresInit[int]) + +proc isNamedTuple*(T: typedesc): bool {.magic: "TypeTrait".} = + ## Returns true for named tuples, false for any other type. + runnableExamples: + doAssert not isNamedTuple(int) + doAssert not isNamedTuple((string, int)) + doAssert isNamedTuple(tuple[name: string, age: int]) + +template pointerBase*[T](_: typedesc[ptr T | ref T]): typedesc = + ## Returns `T` for `ref T | ptr T`. + runnableExamples: + assert (ref int).pointerBase is int + type A = ptr seq[float] + assert A.pointerBase is seq[float] + assert (ref A).pointerBase is A # not seq[float] + assert (var s = "abc"; s[0].addr).typeof.pointerBase is char + T + +proc rangeBase*(T: typedesc[range]): typedesc {.magic: "TypeTrait".} = + ## Returns the base type for range types, or the type itself otherwise. ## - ## template test(x): stmt = - ## echo "type: ", type(x), ", value: ", x + ## **See also:** + ## * `rangeBase template <#rangeBase.t,T>`_ + runnableExamples: + type MyRange = range[0..5] + type MyEnum = enum a, b, c + type MyEnumRange = range[b..c] + doAssert rangeBase(MyRange) is int + doAssert rangeBase(MyEnumRange) is MyEnum + doAssert rangeBase(range['a'..'z']) is char + +template rangeBase*[T: range](a: T): untyped = + ## Overload of `rangeBase <#rangeBase,typedesc,static[bool]>`_ for values. + runnableExamples: + type MyRange = range[0..5] + type MyEnum = enum a, b, c + type MyEnumRange = range[b..c] + let x = MyRange(3) + doAssert rangeBase(x) is int + doAssert $typeof(rangeBase(x)) == "int" + doAssert rangeBase(x) == 3 + let y: set[MyEnumRange] = {c} + for e in y: + doAssert rangeBase(e) is MyEnum + doAssert $typeof(rangeBase(e)) == "MyEnum" + doAssert rangeBase(e) == c + let z: seq[range['a'..'z']] = @['c'] + doAssert rangeBase(z[0]) is char + doAssert $typeof(rangeBase(z[0])) == "char" + doAssert rangeBase(z[0]) == 'c' + rangeBase(typeof(T))(a) + +proc distinctBase*(T: typedesc, recursive: static bool = true): typedesc {.magic: "TypeTrait".} = + ## Returns the base type for distinct types, or the type itself otherwise. + ## If `recursive` is false, only the immediate distinct base will be returned. ## - ## test 42 - ## # --> type: int, value: 42 - ## test "Foo" - ## # --> type: string, value: Foo - ## test(@['A','B']) - ## # --> type: seq[char], value: @[A, B] + ## **See also:** + ## * `distinctBase template <#distinctBase.t,T,static[bool]>`_ + runnableExamples: + type MyInt = distinct int + type MyOtherInt = distinct MyInt + doAssert distinctBase(MyInt) is int + doAssert distinctBase(MyOtherInt) is int + doAssert distinctBase(MyOtherInt, false) is MyInt + doAssert distinctBase(int) is int + +since (1, 1): + template distinctBase*[T](a: T, recursive: static bool = true): untyped = + ## Overload of `distinctBase <#distinctBase,typedesc,static[bool]>`_ for values. + runnableExamples: + type MyInt = distinct int + type MyOtherInt = distinct MyInt + doAssert 12.MyInt.distinctBase == 12 + doAssert 12.MyOtherInt.distinctBase == 12 + doAssert 12.MyOtherInt.distinctBase(false) is MyInt + doAssert 12.distinctBase == 12 + when T is distinct: + distinctBase(typeof(a), recursive)(a) + else: # avoids hint ConvFromXtoItselfNotNeeded + a + + proc tupleLen*(T: typedesc[tuple]): int {.magic: "TypeTrait".} = + ## Returns the number of elements of the tuple type `T`. + ## + ## **See also:** + ## * `tupleLen template <#tupleLen.t>`_ + runnableExamples: + doAssert tupleLen((int, int, float, string)) == 4 + doAssert tupleLen(tuple[name: string, age: int]) == 2 + + template tupleLen*(t: tuple): int = + ## Returns the number of elements of the tuple `t`. + ## + ## **See also:** + ## * `tupleLen proc <#tupleLen,typedesc>`_ + runnableExamples: + doAssert tupleLen((1, 2)) == 2 + + tupleLen(typeof(t)) + + template get*(T: typedesc[tuple], i: static int): untyped = + ## Returns the `i`-th element of `T`. + # Note: `[]` currently gives: `Error: no generic parameters allowed for ...` + runnableExamples: + doAssert get((int, int, float, string), 2) is float + + typeof(default(T)[i]) + + type StaticParam*[value: static type] = object + ## Used to wrap a static value in `genericParams <#genericParams.t,typedesc>`_. + +since (1, 3, 5): + template elementType*(a: untyped): typedesc = + ## Returns the element type of `a`, which can be any iterable (over which you + ## can iterate). + runnableExamples: + iterator myiter(n: int): auto = + for i in 0 ..< n: + yield i + + doAssert elementType(@[1,2]) is int + doAssert elementType("asdf") is char + doAssert elementType(myiter(3)) is int + + typeof(block: (for ai in a: ai)) + +import std/macros + +macro enumLen*(T: typedesc[enum]): int = + ## Returns the number of items in the enum `T`. + runnableExamples: + type Foo = enum + fooItem1 + fooItem2 + + doAssert Foo.enumLen == 2 + + let bracketExpr = getType(T) + expectKind(bracketExpr, nnkBracketExpr) + let enumTy = bracketExpr[1] + expectKind(enumTy, nnkEnumTy) + result = newLit(enumTy.len - 1) + +macro genericParamsImpl(T: typedesc): untyped = + # auxiliary macro needed, can't do it directly in `genericParams` + result = newNimNode(nnkTupleConstr) + var impl = getTypeImpl(T) + expectKind(impl, nnkBracketExpr) + impl = impl[1] + while true: + case impl.kind + of nnkSym: + impl = impl.getImpl + of nnkTypeDef: + impl = impl[2] + of nnkTypeOfExpr: + impl = getTypeInst(impl[0]) + of nnkBracketExpr: + for i in 1..<impl.len: + let ai = impl[i] + var ret: NimNode = nil + case ai.typeKind + of ntyTypeDesc: + ret = ai + of ntyStatic: raiseAssert "unreachable" + else: + # getType from a resolved symbol might return a typedesc symbol. + # If so, use it directly instead of wrapping it in StaticParam. + if (ai.kind == nnkSym and ai.symKind == nskType) or + (ai.kind == nnkBracketExpr and ai[0].kind == nnkSym and + ai[0].symKind == nskType) or ai.kind in {nnkRefTy, nnkVarTy, nnkPtrTy, nnkProcTy}: + ret = ai + elif ai.kind == nnkInfix and ai[0].kind == nnkIdent and + ai[0].strVal == "..": + # For built-in array types, the "2" is translated to "0..1" then + # automagically translated to "range[0..1]". However this is not + # reflected in the AST, thus requiring manual transformation here. + # + # We will also be losing some context here: + # var a: array[10, int] + # will be translated to: + # var a: array[0..9, int] + # after typecheck. This means that we can't get the exact + # definition as typed by the user, which will cause confusion for + # users expecting: + # genericParams(typeof(a)) is (StaticParam(10), int) + # to be true while in fact the result will be: + # genericParams(typeof(a)) is (range[0..9], int) + ret = newTree(nnkBracketExpr, @[bindSym"range", ai]) + else: + since (1, 1): + ret = newTree(nnkBracketExpr, @[bindSym"StaticParam", ai]) + result.add ret + break + else: + error "wrong kind: " & $impl.kind, impl + +since (1, 1): + template genericParams*(T: typedesc): untyped = + ## Returns the tuple of generic parameters for the generic type `T`. + ## + ## **Note:** For the builtin array type, the index generic parameter will + ## **always** become a range type after it's bound to a variable. + runnableExamples: + type Foo[T1, T2] = object + + doAssert genericParams(Foo[float, string]) is (float, string) + + type Bar[N: static float, T] = object + + doAssert genericParams(Bar[1.0, string]) is (StaticParam[1.0], string) + doAssert genericParams(Bar[1.0, string]).get(0).value == 1.0 + doAssert genericParams(seq[Bar[2.0, string]]).get(0) is Bar[2.0, string] + var s: seq[Bar[3.0, string]] + doAssert genericParams(typeof(s)) is (Bar[3.0, string],) + + doAssert genericParams(array[10, int]) is (StaticParam[10], int) + var a: array[10, int] + doAssert genericParams(typeof(a)) is (range[0..9], int) + + type T2 = T + genericParamsImpl(T2) + + +proc hasClosureImpl(n: NimNode): bool = discard "see compiler/vmops.nim" + +proc hasClosure*(fn: NimNode): bool {.since: (1, 5, 1).} = + ## Returns true if the func/proc/etc `fn` has `closure`. + ## `fn` has to be a resolved symbol of kind `nnkSym`. This + ## implies that the macro that calls this proc should accept `typed` + ## arguments and not `untyped` arguments. + expectKind fn, nnkSym + result = hasClosureImpl(fn) +template toUnsigned*(T: typedesc[SomeInteger and not range]): untyped = + ## Returns an unsigned type with same bit size as `T`. + runnableExamples: + assert int8.toUnsigned is uint8 + assert uint.toUnsigned is uint + assert int.toUnsigned is uint + # range types are currently unsupported: + assert not compiles(toUnsigned(range[0..7])) + when T is int8: uint8 + elif T is int16: uint16 + elif T is int32: uint32 + elif T is int64: uint64 + elif T is int: uint + else: T -proc arity*(t: typedesc): int {.magic: "TypeTrait".} - ## Returns the arity of the given type +template toSigned*(T: typedesc[SomeInteger and not range]): untyped = + ## Returns a signed type with same bit size as `T`. + runnableExamples: + assert int8.toSigned is int8 + assert uint16.toSigned is int16 + # range types are currently unsupported: + assert not compiles(toSigned(range[0..7])) + when T is uint8: int8 + elif T is uint16: int16 + elif T is uint32: int32 + elif T is uint64: int64 + elif T is uint: int + else: T diff --git a/lib/pure/unicode.nim b/lib/pure/unicode.nim index a6f8f916b..8cbe117bb 100644 --- a/lib/pure/unicode.nim +++ b/lib/pure/unicode.nim @@ -8,1118 +8,466 @@ # ## This module provides support to handle the Unicode UTF-8 encoding. - -{.deadCodeElim: on.} +## +## There are no specialized ``insert``, ``delete``, ``add`` and ``contains`` +## procedures for ``seq[Rune]`` in this module because the generic variants +## of these procedures in the system module already work with it. +## +## The current version is compatible with Unicode v12.0.0. +## +## **See also:** +## * `strutils module <strutils.html>`_ +## * `unidecode module <unidecode.html>`_ +## * `encodings module <encodings.html>`_ include "system/inclrtl" +import std/strbasics +template toOa(s: string): auto = s.toOpenArray(0, s.high) + +proc substr(s: openArray[char] , first, last: int): string = + # Copied substr from system + let first = max(first, 0) + let L = max(min(last, high(s)) - first + 1, 0) + result = newString(L) + for i in 0 .. L-1: + result[i] = s[i+first] type - RuneImpl = int # underlying type of Rune - Rune* = distinct RuneImpl ## type that can hold any Unicode character - Rune16* = distinct int16 ## 16 bit Unicode character + RuneImpl = int32 # underlying type of Rune + Rune* = distinct RuneImpl ## \ + ## Type that can hold a single Unicode code point. + ## + ## A Rune may be composed with other Runes to a character on the screen. + ## `RuneImpl` is the underlying type used to store Runes, currently `int32`. -{.deprecated: [TRune: Rune, TRune16: Rune16].} - -proc `<=%`*(a, b: Rune): bool = return int(a) <=% int(b) -proc `<%`*(a, b: Rune): bool = return int(a) <% int(b) -proc `==`*(a, b: Rune): bool = return int(a) == int(b) +template ones(n: untyped): untyped = ((1 shl n)-1) -template ones(n: expr): expr = ((1 shl n)-1) +proc runeLen*(s: openArray[char]): int {.rtl, extern: "nuc$1".} = + ## Returns the number of runes of the string ``s``. + runnableExamples: + let a = "añyóng" + doAssert a.runeLen == 6 + ## note: a.len == 8 -proc runeLen*(s: string): int {.rtl, extern: "nuc$1".} = - ## returns the number of Unicode characters of the string `s`. + result = 0 var i = 0 while i < len(s): - if ord(s[i]) <=% 127: inc(i) - elif ord(s[i]) shr 5 == 0b110: inc(i, 2) - elif ord(s[i]) shr 4 == 0b1110: inc(i, 3) - elif ord(s[i]) shr 3 == 0b11110: inc(i, 4) - elif ord(s[i]) shr 2 == 0b111110: inc(i, 5) - elif ord(s[i]) shr 1 == 0b1111110: inc(i, 6) + if uint(s[i]) <= 127: inc(i) + elif uint(s[i]) shr 5 == 0b110: inc(i, 2) + elif uint(s[i]) shr 4 == 0b1110: inc(i, 3) + elif uint(s[i]) shr 3 == 0b11110: inc(i, 4) + elif uint(s[i]) shr 2 == 0b111110: inc(i, 5) + elif uint(s[i]) shr 1 == 0b1111110: inc(i, 6) else: inc i inc(result) -proc runeLenAt*(s: string, i: int): int = - ## returns the number of bytes the rune starting at ``s[i]`` takes. - if ord(s[i]) <=% 127: result = 1 - elif ord(s[i]) shr 5 == 0b110: result = 2 - elif ord(s[i]) shr 4 == 0b1110: result = 3 - elif ord(s[i]) shr 3 == 0b11110: result = 4 - elif ord(s[i]) shr 2 == 0b111110: result = 5 - elif ord(s[i]) shr 1 == 0b1111110: result = 6 +proc runeLenAt*(s: openArray[char], i: Natural): int = + ## Returns the number of bytes the rune starting at ``s[i]`` takes. + ## + ## See also: + ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_ + runnableExamples: + let a = "añyóng" + doAssert a.runeLenAt(0) == 1 + doAssert a.runeLenAt(1) == 2 + + if uint(s[i]) <= 127: result = 1 + elif uint(s[i]) shr 5 == 0b110: result = 2 + elif uint(s[i]) shr 4 == 0b1110: result = 3 + elif uint(s[i]) shr 3 == 0b11110: result = 4 + elif uint(s[i]) shr 2 == 0b111110: result = 5 + elif uint(s[i]) shr 1 == 0b1111110: result = 6 else: result = 1 -template fastRuneAt*(s: string, i: int, result: expr, doInc = true) = - ## Returns the unicode character ``s[i]`` in `result`. If ``doInc == true`` - ## `i` is incremented by the number of bytes that have been processed. +const replRune = Rune(0xFFFD) + +template fastRuneAt*(s: openArray[char] or string, i: int, result: untyped, doInc = true) = + ## Returns the rune ``s[i]`` in ``result``. + ## + ## If ``doInc == true`` (default), ``i`` is incremented by the number + ## of bytes that have been processed. bind ones - if ord(s[i]) <=% 127: - result = Rune(ord(s[i])) + if uint(s[i]) <= 127: + result = Rune(uint(s[i])) when doInc: inc(i) - elif ord(s[i]) shr 5 == 0b110: - # assert(ord(s[i+1]) shr 6 == 0b10) - result = Rune((ord(s[i]) and (ones(5))) shl 6 or - (ord(s[i+1]) and ones(6))) - when doInc: inc(i, 2) - elif ord(s[i]) shr 4 == 0b1110: - # assert(ord(s[i+1]) shr 6 == 0b10) - # assert(ord(s[i+2]) shr 6 == 0b10) - result = Rune((ord(s[i]) and ones(4)) shl 12 or - (ord(s[i+1]) and ones(6)) shl 6 or - (ord(s[i+2]) and ones(6))) - when doInc: inc(i, 3) - elif ord(s[i]) shr 3 == 0b11110: - # assert(ord(s[i+1]) shr 6 == 0b10) - # assert(ord(s[i+2]) shr 6 == 0b10) - # assert(ord(s[i+3]) shr 6 == 0b10) - result = Rune((ord(s[i]) and ones(3)) shl 18 or - (ord(s[i+1]) and ones(6)) shl 12 or - (ord(s[i+2]) and ones(6)) shl 6 or - (ord(s[i+3]) and ones(6))) - when doInc: inc(i, 4) - elif ord(s[i]) shr 2 == 0b111110: - # assert(ord(s[i+1]) shr 6 == 0b10) - # assert(ord(s[i+2]) shr 6 == 0b10) - # assert(ord(s[i+3]) shr 6 == 0b10) - # assert(ord(s[i+4]) shr 6 == 0b10) - result = Rune((ord(s[i]) and ones(2)) shl 24 or - (ord(s[i+1]) and ones(6)) shl 18 or - (ord(s[i+2]) and ones(6)) shl 12 or - (ord(s[i+3]) and ones(6)) shl 6 or - (ord(s[i+4]) and ones(6))) - when doInc: inc(i, 5) - elif ord(s[i]) shr 1 == 0b1111110: - # assert(ord(s[i+1]) shr 6 == 0b10) - # assert(ord(s[i+2]) shr 6 == 0b10) - # assert(ord(s[i+3]) shr 6 == 0b10) - # assert(ord(s[i+4]) shr 6 == 0b10) - # assert(ord(s[i+5]) shr 6 == 0b10) - result = Rune((ord(s[i]) and ones(1)) shl 30 or - (ord(s[i+1]) and ones(6)) shl 24 or - (ord(s[i+2]) and ones(6)) shl 18 or - (ord(s[i+3]) and ones(6)) shl 12 or - (ord(s[i+4]) and ones(6)) shl 6 or - (ord(s[i+5]) and ones(6))) - when doInc: inc(i, 6) + elif uint(s[i]) shr 5 == 0b110: + # assert(uint(s[i+1]) shr 6 == 0b10) + if i <= s.len - 2: + result = Rune((uint(s[i]) and (ones(5))) shl 6 or + (uint(s[i+1]) and ones(6))) + when doInc: inc(i, 2) + else: + result = replRune + when doInc: inc(i) + elif uint(s[i]) shr 4 == 0b1110: + # assert(uint(s[i+1]) shr 6 == 0b10) + # assert(uint(s[i+2]) shr 6 == 0b10) + if i <= s.len - 3: + result = Rune((uint(s[i]) and ones(4)) shl 12 or + (uint(s[i+1]) and ones(6)) shl 6 or + (uint(s[i+2]) and ones(6))) + when doInc: inc(i, 3) + else: + result = replRune + when doInc: inc(i) + elif uint(s[i]) shr 3 == 0b11110: + # assert(uint(s[i+1]) shr 6 == 0b10) + # assert(uint(s[i+2]) shr 6 == 0b10) + # assert(uint(s[i+3]) shr 6 == 0b10) + if i <= s.len - 4: + result = Rune((uint(s[i]) and ones(3)) shl 18 or + (uint(s[i+1]) and ones(6)) shl 12 or + (uint(s[i+2]) and ones(6)) shl 6 or + (uint(s[i+3]) and ones(6))) + when doInc: inc(i, 4) + else: + result = replRune + when doInc: inc(i) + elif uint(s[i]) shr 2 == 0b111110: + # assert(uint(s[i+1]) shr 6 == 0b10) + # assert(uint(s[i+2]) shr 6 == 0b10) + # assert(uint(s[i+3]) shr 6 == 0b10) + # assert(uint(s[i+4]) shr 6 == 0b10) + if i <= s.len - 5: + result = Rune((uint(s[i]) and ones(2)) shl 24 or + (uint(s[i+1]) and ones(6)) shl 18 or + (uint(s[i+2]) and ones(6)) shl 12 or + (uint(s[i+3]) and ones(6)) shl 6 or + (uint(s[i+4]) and ones(6))) + when doInc: inc(i, 5) + else: + result = replRune + when doInc: inc(i) + elif uint(s[i]) shr 1 == 0b1111110: + # assert(uint(s[i+1]) shr 6 == 0b10) + # assert(uint(s[i+2]) shr 6 == 0b10) + # assert(uint(s[i+3]) shr 6 == 0b10) + # assert(uint(s[i+4]) shr 6 == 0b10) + # assert(uint(s[i+5]) shr 6 == 0b10) + if i <= s.len - 6: + result = Rune((uint(s[i]) and ones(1)) shl 30 or + (uint(s[i+1]) and ones(6)) shl 24 or + (uint(s[i+2]) and ones(6)) shl 18 or + (uint(s[i+3]) and ones(6)) shl 12 or + (uint(s[i+4]) and ones(6)) shl 6 or + (uint(s[i+5]) and ones(6))) + when doInc: inc(i, 6) + else: + result = replRune + when doInc: inc(i) else: - result = Rune(ord(s[i])) + result = Rune(uint(s[i])) when doInc: inc(i) -proc runeAt*(s: string, i: int): Rune = - ## returns the unicode character in `s` at byte index `i` +proc runeAt*(s: openArray[char], i: Natural): Rune = + ## Returns the rune in ``s`` at **byte index** ``i``. + ## + ## See also: + ## * `runeAtPos proc <#runeAtPos,string,int>`_ + ## * `runeStrAtPos proc <#runeStrAtPos,string,Natural>`_ + ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_ + runnableExamples: + let a = "añyóng" + doAssert a.runeAt(1) == "ñ".runeAt(0) + doAssert a.runeAt(2) == "ñ".runeAt(1) + doAssert a.runeAt(3) == "y".runeAt(0) fastRuneAt(s, i, result, false) -proc toUTF8*(c: Rune): string {.rtl, extern: "nuc$1".} = - ## converts a rune into its UTF8 representation +proc validateUtf8*(s: openArray[char]): int = + ## Returns the position of the invalid byte in ``s`` if the string ``s`` does + ## not hold valid UTF-8 data. Otherwise ``-1`` is returned. + ## + ## See also: + ## * `toUTF8 proc <#toUTF8,Rune>`_ + ## * `$ proc <#$,Rune>`_ alias for `toUTF8` + ## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_ + var i = 0 + let L = s.len + while i < L: + if uint(s[i]) <= 127: + inc(i) + elif uint(s[i]) shr 5 == 0b110: + if uint(s[i]) < 0xc2: return i # Catch overlong ascii representations. + if i+1 < L and uint(s[i+1]) shr 6 == 0b10: inc(i, 2) + else: return i + elif uint(s[i]) shr 4 == 0b1110: + if i+2 < L and uint(s[i+1]) shr 6 == 0b10 and uint(s[i+2]) shr 6 == 0b10: + inc i, 3 + else: return i + elif uint(s[i]) shr 3 == 0b11110: + if i+3 < L and uint(s[i+1]) shr 6 == 0b10 and + uint(s[i+2]) shr 6 == 0b10 and + uint(s[i+3]) shr 6 == 0b10: + inc i, 4 + else: return i + else: + return i + return -1 + +template fastToUTF8Copy*(c: Rune, s: var string, pos: int, doInc = true) = + ## Copies UTF-8 representation of ``c`` into the preallocated string ``s`` + ## starting at position ``pos``. + ## + ## If ``doInc == true`` (default), ``pos`` is incremented + ## by the number of bytes that have been processed. + ## + ## To be the most efficient, make sure ``s`` is preallocated + ## with an additional amount equal to the byte length of ``c``. + ## + ## See also: + ## * `validateUtf8 proc <#validateUtf8,string>`_ + ## * `toUTF8 proc <#toUTF8,Rune>`_ + ## * `$ proc <#$,Rune>`_ alias for `toUTF8` var i = RuneImpl(c) if i <=% 127: - result = newString(1) - result[0] = chr(i) + s.setLen(pos+1) + s[pos+0] = chr(i) + when doInc: inc(pos) elif i <=% 0x07FF: - result = newString(2) - result[0] = chr((i shr 6) or 0b110_00000) - result[1] = chr((i and ones(6)) or 0b10_0000_00) + s.setLen(pos+2) + s[pos+0] = chr((i shr 6) or 0b110_00000) + s[pos+1] = chr((i and ones(6)) or 0b10_0000_00) + when doInc: inc(pos, 2) elif i <=% 0xFFFF: - result = newString(3) - result[0] = chr(i shr 12 or 0b1110_0000) - result[1] = chr(i shr 6 and ones(6) or 0b10_0000_00) - result[2] = chr(i and ones(6) or 0b10_0000_00) + s.setLen(pos+3) + s[pos+0] = chr(i shr 12 or 0b1110_0000) + s[pos+1] = chr(i shr 6 and ones(6) or 0b10_0000_00) + s[pos+2] = chr(i and ones(6) or 0b10_0000_00) + when doInc: inc(pos, 3) elif i <=% 0x001FFFFF: - result = newString(4) - result[0] = chr(i shr 18 or 0b1111_0000) - result[1] = chr(i shr 12 and ones(6) or 0b10_0000_00) - result[2] = chr(i shr 6 and ones(6) or 0b10_0000_00) - result[3] = chr(i and ones(6) or 0b10_0000_00) + s.setLen(pos+4) + s[pos+0] = chr(i shr 18 or 0b1111_0000) + s[pos+1] = chr(i shr 12 and ones(6) or 0b10_0000_00) + s[pos+2] = chr(i shr 6 and ones(6) or 0b10_0000_00) + s[pos+3] = chr(i and ones(6) or 0b10_0000_00) + when doInc: inc(pos, 4) elif i <=% 0x03FFFFFF: - result = newString(5) - result[0] = chr(i shr 24 or 0b111110_00) - result[1] = chr(i shr 18 and ones(6) or 0b10_0000_00) - result[2] = chr(i shr 12 and ones(6) or 0b10_0000_00) - result[3] = chr(i shr 6 and ones(6) or 0b10_0000_00) - result[4] = chr(i and ones(6) or 0b10_0000_00) + s.setLen(pos+5) + s[pos+0] = chr(i shr 24 or 0b111110_00) + s[pos+1] = chr(i shr 18 and ones(6) or 0b10_0000_00) + s[pos+2] = chr(i shr 12 and ones(6) or 0b10_0000_00) + s[pos+3] = chr(i shr 6 and ones(6) or 0b10_0000_00) + s[pos+4] = chr(i and ones(6) or 0b10_0000_00) + when doInc: inc(pos, 5) elif i <=% 0x7FFFFFFF: - result = newString(6) - result[0] = chr(i shr 30 or 0b1111110_0) - result[1] = chr(i shr 24 and ones(6) or 0b10_0000_00) - result[2] = chr(i shr 18 and ones(6) or 0b10_0000_00) - result[3] = chr(i shr 12 and ones(6) or 0b10_0000_00) - result[4] = chr(i shr 6 and ones(6) or 0b10_0000_00) - result[5] = chr(i and ones(6) or 0b10_0000_00) + s.setLen(pos+6) + s[pos+0] = chr(i shr 30 or 0b1111110_0) + s[pos+1] = chr(i shr 24 and ones(6) or 0b10_0000_00) + s[pos+2] = chr(i shr 18 and ones(6) or 0b10_0000_00) + s[pos+3] = chr(i shr 12 and ones(6) or 0b10_0000_00) + s[pos+4] = chr(i shr 6 and ones(6) or 0b10_0000_00) + s[pos+5] = chr(i and ones(6) or 0b10_0000_00) + when doInc: inc(pos, 6) else: discard # error, exception? +proc toUTF8*(c: Rune): string {.rtl, extern: "nuc$1".} = + ## Converts a rune into its UTF-8 representation. + ## + ## See also: + ## * `validateUtf8 proc <#validateUtf8,string>`_ + ## * `$ proc <#$,Rune>`_ alias for `toUTF8` + ## * `utf8 iterator <#utf8.i,string>`_ + ## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_ + runnableExamples: + let a = "añyóng" + doAssert a.runeAt(1).toUTF8 == "ñ" + + result = "" + fastToUTF8Copy(c, result, 0, false) + +proc add*(s: var string; c: Rune) = + ## Adds a rune ``c`` to a string ``s``. + runnableExamples: + var s = "abc" + let c = "ä".runeAt(0) + s.add(c) + doAssert s == "abcä" + + let pos = s.len + fastToUTF8Copy(c, s, pos, false) + proc `$`*(rune: Rune): string = - ## converts a rune to a string + ## An alias for `toUTF8 <#toUTF8,Rune>`_. + ## + ## See also: + ## * `validateUtf8 proc <#validateUtf8,string>`_ + ## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_ rune.toUTF8 proc `$`*(runes: seq[Rune]): string = - ## converts a sequence of runes to a string + ## Converts a sequence of Runes to a string. + ## + ## See also: + ## * `toRunes <#toRunes,string>`_ for a reverse operation + runnableExamples: + let + someString = "öÑ" + someRunes = toRunes(someString) + doAssert $someRunes == someString + result = "" - for rune in runes: result.add(rune.toUTF8) - -const - alphaRanges = [ - 0x00d8, 0x00f6, # - - 0x00f8, 0x01f5, # - - 0x0250, 0x02a8, # - - 0x038e, 0x03a1, # - - 0x03a3, 0x03ce, # - - 0x03d0, 0x03d6, # - - 0x03e2, 0x03f3, # - - 0x0490, 0x04c4, # - - 0x0561, 0x0587, # - - 0x05d0, 0x05ea, # - - 0x05f0, 0x05f2, # - - 0x0621, 0x063a, # - - 0x0640, 0x064a, # - - 0x0671, 0x06b7, # - - 0x06ba, 0x06be, # - - 0x06c0, 0x06ce, # - - 0x06d0, 0x06d3, # - - 0x0905, 0x0939, # - - 0x0958, 0x0961, # - - 0x0985, 0x098c, # - - 0x098f, 0x0990, # - - 0x0993, 0x09a8, # - - 0x09aa, 0x09b0, # - - 0x09b6, 0x09b9, # - - 0x09dc, 0x09dd, # - - 0x09df, 0x09e1, # - - 0x09f0, 0x09f1, # - - 0x0a05, 0x0a0a, # - - 0x0a0f, 0x0a10, # - - 0x0a13, 0x0a28, # - - 0x0a2a, 0x0a30, # - - 0x0a32, 0x0a33, # - - 0x0a35, 0x0a36, # - - 0x0a38, 0x0a39, # - - 0x0a59, 0x0a5c, # - - 0x0a85, 0x0a8b, # - - 0x0a8f, 0x0a91, # - - 0x0a93, 0x0aa8, # - - 0x0aaa, 0x0ab0, # - - 0x0ab2, 0x0ab3, # - - 0x0ab5, 0x0ab9, # - - 0x0b05, 0x0b0c, # - - 0x0b0f, 0x0b10, # - - 0x0b13, 0x0b28, # - - 0x0b2a, 0x0b30, # - - 0x0b32, 0x0b33, # - - 0x0b36, 0x0b39, # - - 0x0b5c, 0x0b5d, # - - 0x0b5f, 0x0b61, # - - 0x0b85, 0x0b8a, # - - 0x0b8e, 0x0b90, # - - 0x0b92, 0x0b95, # - - 0x0b99, 0x0b9a, # - - 0x0b9e, 0x0b9f, # - - 0x0ba3, 0x0ba4, # - - 0x0ba8, 0x0baa, # - - 0x0bae, 0x0bb5, # - - 0x0bb7, 0x0bb9, # - - 0x0c05, 0x0c0c, # - - 0x0c0e, 0x0c10, # - - 0x0c12, 0x0c28, # - - 0x0c2a, 0x0c33, # - - 0x0c35, 0x0c39, # - - 0x0c60, 0x0c61, # - - 0x0c85, 0x0c8c, # - - 0x0c8e, 0x0c90, # - - 0x0c92, 0x0ca8, # - - 0x0caa, 0x0cb3, # - - 0x0cb5, 0x0cb9, # - - 0x0ce0, 0x0ce1, # - - 0x0d05, 0x0d0c, # - - 0x0d0e, 0x0d10, # - - 0x0d12, 0x0d28, # - - 0x0d2a, 0x0d39, # - - 0x0d60, 0x0d61, # - - 0x0e01, 0x0e30, # - - 0x0e32, 0x0e33, # - - 0x0e40, 0x0e46, # - - 0x0e5a, 0x0e5b, # - - 0x0e81, 0x0e82, # - - 0x0e87, 0x0e88, # - - 0x0e94, 0x0e97, # - - 0x0e99, 0x0e9f, # - - 0x0ea1, 0x0ea3, # - - 0x0eaa, 0x0eab, # - - 0x0ead, 0x0eae, # - - 0x0eb2, 0x0eb3, # - - 0x0ec0, 0x0ec4, # - - 0x0edc, 0x0edd, # - - 0x0f18, 0x0f19, # - - 0x0f40, 0x0f47, # - - 0x0f49, 0x0f69, # - - 0x10d0, 0x10f6, # - - 0x1100, 0x1159, # - - 0x115f, 0x11a2, # - - 0x11a8, 0x11f9, # - - 0x1e00, 0x1e9b, # - - 0x1f50, 0x1f57, # - - 0x1f80, 0x1fb4, # - - 0x1fb6, 0x1fbc, # - - 0x1fc2, 0x1fc4, # - - 0x1fc6, 0x1fcc, # - - 0x1fd0, 0x1fd3, # - - 0x1fd6, 0x1fdb, # - - 0x1fe0, 0x1fec, # - - 0x1ff2, 0x1ff4, # - - 0x1ff6, 0x1ffc, # - - 0x210a, 0x2113, # - - 0x2115, 0x211d, # - - 0x2120, 0x2122, # - - 0x212a, 0x2131, # - - 0x2133, 0x2138, # - - 0x3041, 0x3094, # - - 0x30a1, 0x30fa, # - - 0x3105, 0x312c, # - - 0x3131, 0x318e, # - - 0x3192, 0x319f, # - - 0x3260, 0x327b, # - - 0x328a, 0x32b0, # - - 0x32d0, 0x32fe, # - - 0x3300, 0x3357, # - - 0x3371, 0x3376, # - - 0x337b, 0x3394, # - - 0x3399, 0x339e, # - - 0x33a9, 0x33ad, # - - 0x33b0, 0x33c1, # - - 0x33c3, 0x33c5, # - - 0x33c7, 0x33d7, # - - 0x33d9, 0x33dd, # - - 0x4e00, 0x9fff, # - - 0xac00, 0xd7a3, # - - 0xf900, 0xfb06, # - - 0xfb13, 0xfb17, # - - 0xfb1f, 0xfb28, # - - 0xfb2a, 0xfb36, # - - 0xfb38, 0xfb3c, # - - 0xfb40, 0xfb41, # - - 0xfb43, 0xfb44, # - - 0xfb46, 0xfbb1, # - - 0xfbd3, 0xfd3d, # - - 0xfd50, 0xfd8f, # - - 0xfd92, 0xfdc7, # - - 0xfdf0, 0xfdf9, # - - 0xfe70, 0xfe72, # - - 0xfe76, 0xfefc, # - - 0xff66, 0xff6f, # - - 0xff71, 0xff9d, # - - 0xffa0, 0xffbe, # - - 0xffc2, 0xffc7, # - - 0xffca, 0xffcf, # - - 0xffd2, 0xffd7, # - - 0xffda, 0xffdc] # - - - alphaSinglets = [ - 0x00aa, # - 0x00b5, # - 0x00ba, # - 0x03da, # - 0x03dc, # - 0x03de, # - 0x03e0, # - 0x06d5, # - 0x09b2, # - 0x0a5e, # - 0x0a8d, # - 0x0ae0, # - 0x0b9c, # - 0x0cde, # - 0x0e4f, # - 0x0e84, # - 0x0e8a, # - 0x0e8d, # - 0x0ea5, # - 0x0ea7, # - 0x0eb0, # - 0x0ebd, # - 0x1fbe, # - 0x207f, # - 0x20a8, # - 0x2102, # - 0x2107, # - 0x2124, # - 0x2126, # - 0x2128, # - 0xfb3e, # - 0xfe74] # - - spaceRanges = [ - 0x0009, 0x000a, # tab and newline - 0x0020, 0x0020, # space - 0x00a0, 0x00a0, # - 0x2000, 0x200b, # - - 0x2028, 0x2029, # - 0x3000, 0x3000, # - 0xfeff, 0xfeff] # - - toupperRanges = [ - 0x0061, 0x007a, 468, # a-z A-Z - 0x00e0, 0x00f6, 468, # - - - 0x00f8, 0x00fe, 468, # - - - 0x0256, 0x0257, 295, # - - - 0x0258, 0x0259, 298, # - - - 0x028a, 0x028b, 283, # - - - 0x03ad, 0x03af, 463, # - - - 0x03b1, 0x03c1, 468, # - - - 0x03c3, 0x03cb, 468, # - - - 0x03cd, 0x03ce, 437, # - - - 0x0430, 0x044f, 468, # - - - 0x0451, 0x045c, 420, # - - - 0x045e, 0x045f, 420, # - - - 0x0561, 0x0586, 452, # - - - 0x1f00, 0x1f07, 508, # - - - 0x1f10, 0x1f15, 508, # - - - 0x1f20, 0x1f27, 508, # - - - 0x1f30, 0x1f37, 508, # - - - 0x1f40, 0x1f45, 508, # - - - 0x1f60, 0x1f67, 508, # - - - 0x1f70, 0x1f71, 574, # - - - 0x1f72, 0x1f75, 586, # - - - 0x1f76, 0x1f77, 600, # - - - 0x1f78, 0x1f79, 628, # - - - 0x1f7a, 0x1f7b, 612, # - - - 0x1f7c, 0x1f7d, 626, # - - - 0x1f80, 0x1f87, 508, # - - - 0x1f90, 0x1f97, 508, # - - - 0x1fa0, 0x1fa7, 508, # - - - 0x1fb0, 0x1fb1, 508, # - - - 0x1fd0, 0x1fd1, 508, # - - - 0x1fe0, 0x1fe1, 508, # - - - 0x2170, 0x217f, 484, # - - - 0x24d0, 0x24e9, 474, # - - - 0xff41, 0xff5a, 468] # - - - - toupperSinglets = [ - 0x00ff, 621, # - 0x0101, 499, # - 0x0103, 499, # - 0x0105, 499, # - 0x0107, 499, # - 0x0109, 499, # - 0x010b, 499, # - 0x010d, 499, # - 0x010f, 499, # - 0x0111, 499, # - 0x0113, 499, # - 0x0115, 499, # - 0x0117, 499, # - 0x0119, 499, # - 0x011b, 499, # - 0x011d, 499, # - 0x011f, 499, # - 0x0121, 499, # - 0x0123, 499, # - 0x0125, 499, # - 0x0127, 499, # - 0x0129, 499, # - 0x012b, 499, # - 0x012d, 499, # - 0x012f, 499, # - 0x0131, 268, # I - 0x0133, 499, # - 0x0135, 499, # - 0x0137, 499, # - 0x013a, 499, # - 0x013c, 499, # - 0x013e, 499, # - 0x0140, 499, # - 0x0142, 499, # - 0x0144, 499, # - 0x0146, 499, # - 0x0148, 499, # - 0x014b, 499, # - 0x014d, 499, # - 0x014f, 499, # - 0x0151, 499, # - 0x0153, 499, # - 0x0155, 499, # - 0x0157, 499, # - 0x0159, 499, # - 0x015b, 499, # - 0x015d, 499, # - 0x015f, 499, # - 0x0161, 499, # - 0x0163, 499, # - 0x0165, 499, # - 0x0167, 499, # - 0x0169, 499, # - 0x016b, 499, # - 0x016d, 499, # - 0x016f, 499, # - 0x0171, 499, # - 0x0173, 499, # - 0x0175, 499, # - 0x0177, 499, # - 0x017a, 499, # - 0x017c, 499, # - 0x017e, 499, # - 0x017f, 200, # S - 0x0183, 499, # - 0x0185, 499, # - 0x0188, 499, # - 0x018c, 499, # - 0x0192, 499, # - 0x0199, 499, # - 0x01a1, 499, # - 0x01a3, 499, # - 0x01a5, 499, # - 0x01a8, 499, # - 0x01ad, 499, # - 0x01b0, 499, # - 0x01b4, 499, # - 0x01b6, 499, # - 0x01b9, 499, # - 0x01bd, 499, # - 0x01c5, 499, # - 0x01c6, 498, # - 0x01c8, 499, # - 0x01c9, 498, # - 0x01cb, 499, # - 0x01cc, 498, # - 0x01ce, 499, # - 0x01d0, 499, # - 0x01d2, 499, # - 0x01d4, 499, # - 0x01d6, 499, # - 0x01d8, 499, # - 0x01da, 499, # - 0x01dc, 499, # - 0x01df, 499, # - 0x01e1, 499, # - 0x01e3, 499, # - 0x01e5, 499, # - 0x01e7, 499, # - 0x01e9, 499, # - 0x01eb, 499, # - 0x01ed, 499, # - 0x01ef, 499, # - 0x01f2, 499, # - 0x01f3, 498, # - 0x01f5, 499, # - 0x01fb, 499, # - 0x01fd, 499, # - 0x01ff, 499, # - 0x0201, 499, # - 0x0203, 499, # - 0x0205, 499, # - 0x0207, 499, # - 0x0209, 499, # - 0x020b, 499, # - 0x020d, 499, # - 0x020f, 499, # - 0x0211, 499, # - 0x0213, 499, # - 0x0215, 499, # - 0x0217, 499, # - 0x0253, 290, # - 0x0254, 294, # - 0x025b, 297, # - 0x0260, 295, # - 0x0263, 293, # - 0x0268, 291, # - 0x0269, 289, # - 0x026f, 289, # - 0x0272, 287, # - 0x0283, 282, # - 0x0288, 282, # - 0x0292, 281, # - 0x03ac, 462, # - 0x03cc, 436, # - 0x03d0, 438, # - 0x03d1, 443, # - 0x03d5, 453, # - 0x03d6, 446, # - 0x03e3, 499, # - 0x03e5, 499, # - 0x03e7, 499, # - 0x03e9, 499, # - 0x03eb, 499, # - 0x03ed, 499, # - 0x03ef, 499, # - 0x03f0, 414, # - 0x03f1, 420, # - 0x0461, 499, # - 0x0463, 499, # - 0x0465, 499, # - 0x0467, 499, # - 0x0469, 499, # - 0x046b, 499, # - 0x046d, 499, # - 0x046f, 499, # - 0x0471, 499, # - 0x0473, 499, # - 0x0475, 499, # - 0x0477, 499, # - 0x0479, 499, # - 0x047b, 499, # - 0x047d, 499, # - 0x047f, 499, # - 0x0481, 499, # - 0x0491, 499, # - 0x0493, 499, # - 0x0495, 499, # - 0x0497, 499, # - 0x0499, 499, # - 0x049b, 499, # - 0x049d, 499, # - 0x049f, 499, # - 0x04a1, 499, # - 0x04a3, 499, # - 0x04a5, 499, # - 0x04a7, 499, # - 0x04a9, 499, # - 0x04ab, 499, # - 0x04ad, 499, # - 0x04af, 499, # - 0x04b1, 499, # - 0x04b3, 499, # - 0x04b5, 499, # - 0x04b7, 499, # - 0x04b9, 499, # - 0x04bb, 499, # - 0x04bd, 499, # - 0x04bf, 499, # - 0x04c2, 499, # - 0x04c4, 499, # - 0x04c8, 499, # - 0x04cc, 499, # - 0x04d1, 499, # - 0x04d3, 499, # - 0x04d5, 499, # - 0x04d7, 499, # - 0x04d9, 499, # - 0x04db, 499, # - 0x04dd, 499, # - 0x04df, 499, # - 0x04e1, 499, # - 0x04e3, 499, # - 0x04e5, 499, # - 0x04e7, 499, # - 0x04e9, 499, # - 0x04eb, 499, # - 0x04ef, 499, # - 0x04f1, 499, # - 0x04f3, 499, # - 0x04f5, 499, # - 0x04f9, 499, # - 0x1e01, 499, # - 0x1e03, 499, # - 0x1e05, 499, # - 0x1e07, 499, # - 0x1e09, 499, # - 0x1e0b, 499, # - 0x1e0d, 499, # - 0x1e0f, 499, # - 0x1e11, 499, # - 0x1e13, 499, # - 0x1e15, 499, # - 0x1e17, 499, # - 0x1e19, 499, # - 0x1e1b, 499, # - 0x1e1d, 499, # - 0x1e1f, 499, # - 0x1e21, 499, # - 0x1e23, 499, # - 0x1e25, 499, # - 0x1e27, 499, # - 0x1e29, 499, # - 0x1e2b, 499, # - 0x1e2d, 499, # - 0x1e2f, 499, # - 0x1e31, 499, # - 0x1e33, 499, # - 0x1e35, 499, # - 0x1e37, 499, # - 0x1e39, 499, # - 0x1e3b, 499, # - 0x1e3d, 499, # - 0x1e3f, 499, # - 0x1e41, 499, # - 0x1e43, 499, # - 0x1e45, 499, # - 0x1e47, 499, # - 0x1e49, 499, # - 0x1e4b, 499, # - 0x1e4d, 499, # - 0x1e4f, 499, # - 0x1e51, 499, # - 0x1e53, 499, # - 0x1e55, 499, # - 0x1e57, 499, # - 0x1e59, 499, # - 0x1e5b, 499, # - 0x1e5d, 499, # - 0x1e5f, 499, # - 0x1e61, 499, # - 0x1e63, 499, # - 0x1e65, 499, # - 0x1e67, 499, # - 0x1e69, 499, # - 0x1e6b, 499, # - 0x1e6d, 499, # - 0x1e6f, 499, # - 0x1e71, 499, # - 0x1e73, 499, # - 0x1e75, 499, # - 0x1e77, 499, # - 0x1e79, 499, # - 0x1e7b, 499, # - 0x1e7d, 499, # - 0x1e7f, 499, # - 0x1e81, 499, # - 0x1e83, 499, # - 0x1e85, 499, # - 0x1e87, 499, # - 0x1e89, 499, # - 0x1e8b, 499, # - 0x1e8d, 499, # - 0x1e8f, 499, # - 0x1e91, 499, # - 0x1e93, 499, # - 0x1e95, 499, # - 0x1ea1, 499, # - 0x1ea3, 499, # - 0x1ea5, 499, # - 0x1ea7, 499, # - 0x1ea9, 499, # - 0x1eab, 499, # - 0x1ead, 499, # - 0x1eaf, 499, # - 0x1eb1, 499, # - 0x1eb3, 499, # - 0x1eb5, 499, # - 0x1eb7, 499, # - 0x1eb9, 499, # - 0x1ebb, 499, # - 0x1ebd, 499, # - 0x1ebf, 499, # - 0x1ec1, 499, # - 0x1ec3, 499, # - 0x1ec5, 499, # - 0x1ec7, 499, # - 0x1ec9, 499, # - 0x1ecb, 499, # - 0x1ecd, 499, # - 0x1ecf, 499, # - 0x1ed1, 499, # - 0x1ed3, 499, # - 0x1ed5, 499, # - 0x1ed7, 499, # - 0x1ed9, 499, # - 0x1edb, 499, # - 0x1edd, 499, # - 0x1edf, 499, # - 0x1ee1, 499, # - 0x1ee3, 499, # - 0x1ee5, 499, # - 0x1ee7, 499, # - 0x1ee9, 499, # - 0x1eeb, 499, # - 0x1eed, 499, # - 0x1eef, 499, # - 0x1ef1, 499, # - 0x1ef3, 499, # - 0x1ef5, 499, # - 0x1ef7, 499, # - 0x1ef9, 499, # - 0x1f51, 508, # - 0x1f53, 508, # - 0x1f55, 508, # - 0x1f57, 508, # - 0x1fb3, 509, # - 0x1fc3, 509, # - 0x1fe5, 507, # - 0x1ff3, 509] # - - tolowerRanges = [ - 0x0041, 0x005a, 532, # A-Z a-z - 0x00c0, 0x00d6, 532, # - - - 0x00d8, 0x00de, 532, # - - - 0x0189, 0x018a, 705, # - - - 0x018e, 0x018f, 702, # - - - 0x01b1, 0x01b2, 717, # - - - 0x0388, 0x038a, 537, # - - - 0x038e, 0x038f, 563, # - - - 0x0391, 0x03a1, 532, # - - - 0x03a3, 0x03ab, 532, # - - - 0x0401, 0x040c, 580, # - - - 0x040e, 0x040f, 580, # - - - 0x0410, 0x042f, 532, # - - - 0x0531, 0x0556, 548, # - - - 0x10a0, 0x10c5, 548, # - - - 0x1f08, 0x1f0f, 492, # - - - 0x1f18, 0x1f1d, 492, # - - - 0x1f28, 0x1f2f, 492, # - - - 0x1f38, 0x1f3f, 492, # - - - 0x1f48, 0x1f4d, 492, # - - - 0x1f68, 0x1f6f, 492, # - - - 0x1f88, 0x1f8f, 492, # - - - 0x1f98, 0x1f9f, 492, # - - - 0x1fa8, 0x1faf, 492, # - - - 0x1fb8, 0x1fb9, 492, # - - - 0x1fba, 0x1fbb, 426, # - - - 0x1fc8, 0x1fcb, 414, # - - - 0x1fd8, 0x1fd9, 492, # - - - 0x1fda, 0x1fdb, 400, # - - - 0x1fe8, 0x1fe9, 492, # - - - 0x1fea, 0x1feb, 388, # - - - 0x1ff8, 0x1ff9, 372, # - - - 0x1ffa, 0x1ffb, 374, # - - - 0x2160, 0x216f, 516, # - - - 0x24b6, 0x24cf, 526, # - - - 0xff21, 0xff3a, 532] # - - - - tolowerSinglets = [ - 0x0100, 501, # - 0x0102, 501, # - 0x0104, 501, # - 0x0106, 501, # - 0x0108, 501, # - 0x010a, 501, # - 0x010c, 501, # - 0x010e, 501, # - 0x0110, 501, # - 0x0112, 501, # - 0x0114, 501, # - 0x0116, 501, # - 0x0118, 501, # - 0x011a, 501, # - 0x011c, 501, # - 0x011e, 501, # - 0x0120, 501, # - 0x0122, 501, # - 0x0124, 501, # - 0x0126, 501, # - 0x0128, 501, # - 0x012a, 501, # - 0x012c, 501, # - 0x012e, 501, # - 0x0130, 301, # i - 0x0132, 501, # - 0x0134, 501, # - 0x0136, 501, # - 0x0139, 501, # - 0x013b, 501, # - 0x013d, 501, # - 0x013f, 501, # - 0x0141, 501, # - 0x0143, 501, # - 0x0145, 501, # - 0x0147, 501, # - 0x014a, 501, # - 0x014c, 501, # - 0x014e, 501, # - 0x0150, 501, # - 0x0152, 501, # - 0x0154, 501, # - 0x0156, 501, # - 0x0158, 501, # - 0x015a, 501, # - 0x015c, 501, # - 0x015e, 501, # - 0x0160, 501, # - 0x0162, 501, # - 0x0164, 501, # - 0x0166, 501, # - 0x0168, 501, # - 0x016a, 501, # - 0x016c, 501, # - 0x016e, 501, # - 0x0170, 501, # - 0x0172, 501, # - 0x0174, 501, # - 0x0176, 501, # - 0x0178, 379, # - 0x0179, 501, # - 0x017b, 501, # - 0x017d, 501, # - 0x0181, 710, # - 0x0182, 501, # - 0x0184, 501, # - 0x0186, 706, # - 0x0187, 501, # - 0x018b, 501, # - 0x0190, 703, # - 0x0191, 501, # - 0x0193, 705, # - 0x0194, 707, # - 0x0196, 711, # - 0x0197, 709, # - 0x0198, 501, # - 0x019c, 711, # - 0x019d, 713, # - 0x01a0, 501, # - 0x01a2, 501, # - 0x01a4, 501, # - 0x01a7, 501, # - 0x01a9, 718, # - 0x01ac, 501, # - 0x01ae, 718, # - 0x01af, 501, # - 0x01b3, 501, # - 0x01b5, 501, # - 0x01b7, 719, # - 0x01b8, 501, # - 0x01bc, 501, # - 0x01c4, 502, # - 0x01c5, 501, # - 0x01c7, 502, # - 0x01c8, 501, # - 0x01ca, 502, # - 0x01cb, 501, # - 0x01cd, 501, # - 0x01cf, 501, # - 0x01d1, 501, # - 0x01d3, 501, # - 0x01d5, 501, # - 0x01d7, 501, # - 0x01d9, 501, # - 0x01db, 501, # - 0x01de, 501, # - 0x01e0, 501, # - 0x01e2, 501, # - 0x01e4, 501, # - 0x01e6, 501, # - 0x01e8, 501, # - 0x01ea, 501, # - 0x01ec, 501, # - 0x01ee, 501, # - 0x01f1, 502, # - 0x01f2, 501, # - 0x01f4, 501, # - 0x01fa, 501, # - 0x01fc, 501, # - 0x01fe, 501, # - 0x0200, 501, # - 0x0202, 501, # - 0x0204, 501, # - 0x0206, 501, # - 0x0208, 501, # - 0x020a, 501, # - 0x020c, 501, # - 0x020e, 501, # - 0x0210, 501, # - 0x0212, 501, # - 0x0214, 501, # - 0x0216, 501, # - 0x0386, 538, # - 0x038c, 564, # - 0x03e2, 501, # - 0x03e4, 501, # - 0x03e6, 501, # - 0x03e8, 501, # - 0x03ea, 501, # - 0x03ec, 501, # - 0x03ee, 501, # - 0x0460, 501, # - 0x0462, 501, # - 0x0464, 501, # - 0x0466, 501, # - 0x0468, 501, # - 0x046a, 501, # - 0x046c, 501, # - 0x046e, 501, # - 0x0470, 501, # - 0x0472, 501, # - 0x0474, 501, # - 0x0476, 501, # - 0x0478, 501, # - 0x047a, 501, # - 0x047c, 501, # - 0x047e, 501, # - 0x0480, 501, # - 0x0490, 501, # - 0x0492, 501, # - 0x0494, 501, # - 0x0496, 501, # - 0x0498, 501, # - 0x049a, 501, # - 0x049c, 501, # - 0x049e, 501, # - 0x04a0, 501, # - 0x04a2, 501, # - 0x04a4, 501, # - 0x04a6, 501, # - 0x04a8, 501, # - 0x04aa, 501, # - 0x04ac, 501, # - 0x04ae, 501, # - 0x04b0, 501, # - 0x04b2, 501, # - 0x04b4, 501, # - 0x04b6, 501, # - 0x04b8, 501, # - 0x04ba, 501, # - 0x04bc, 501, # - 0x04be, 501, # - 0x04c1, 501, # - 0x04c3, 501, # - 0x04c7, 501, # - 0x04cb, 501, # - 0x04d0, 501, # - 0x04d2, 501, # - 0x04d4, 501, # - 0x04d6, 501, # - 0x04d8, 501, # - 0x04da, 501, # - 0x04dc, 501, # - 0x04de, 501, # - 0x04e0, 501, # - 0x04e2, 501, # - 0x04e4, 501, # - 0x04e6, 501, # - 0x04e8, 501, # - 0x04ea, 501, # - 0x04ee, 501, # - 0x04f0, 501, # - 0x04f2, 501, # - 0x04f4, 501, # - 0x04f8, 501, # - 0x1e00, 501, # - 0x1e02, 501, # - 0x1e04, 501, # - 0x1e06, 501, # - 0x1e08, 501, # - 0x1e0a, 501, # - 0x1e0c, 501, # - 0x1e0e, 501, # - 0x1e10, 501, # - 0x1e12, 501, # - 0x1e14, 501, # - 0x1e16, 501, # - 0x1e18, 501, # - 0x1e1a, 501, # - 0x1e1c, 501, # - 0x1e1e, 501, # - 0x1e20, 501, # - 0x1e22, 501, # - 0x1e24, 501, # - 0x1e26, 501, # - 0x1e28, 501, # - 0x1e2a, 501, # - 0x1e2c, 501, # - 0x1e2e, 501, # - 0x1e30, 501, # - 0x1e32, 501, # - 0x1e34, 501, # - 0x1e36, 501, # - 0x1e38, 501, # - 0x1e3a, 501, # - 0x1e3c, 501, # - 0x1e3e, 501, # - 0x1e40, 501, # - 0x1e42, 501, # - 0x1e44, 501, # - 0x1e46, 501, # - 0x1e48, 501, # - 0x1e4a, 501, # - 0x1e4c, 501, # - 0x1e4e, 501, # - 0x1e50, 501, # - 0x1e52, 501, # - 0x1e54, 501, # - 0x1e56, 501, # - 0x1e58, 501, # - 0x1e5a, 501, # - 0x1e5c, 501, # - 0x1e5e, 501, # - 0x1e60, 501, # - 0x1e62, 501, # - 0x1e64, 501, # - 0x1e66, 501, # - 0x1e68, 501, # - 0x1e6a, 501, # - 0x1e6c, 501, # - 0x1e6e, 501, # - 0x1e70, 501, # - 0x1e72, 501, # - 0x1e74, 501, # - 0x1e76, 501, # - 0x1e78, 501, # - 0x1e7a, 501, # - 0x1e7c, 501, # - 0x1e7e, 501, # - 0x1e80, 501, # - 0x1e82, 501, # - 0x1e84, 501, # - 0x1e86, 501, # - 0x1e88, 501, # - 0x1e8a, 501, # - 0x1e8c, 501, # - 0x1e8e, 501, # - 0x1e90, 501, # - 0x1e92, 501, # - 0x1e94, 501, # - 0x1ea0, 501, # - 0x1ea2, 501, # - 0x1ea4, 501, # - 0x1ea6, 501, # - 0x1ea8, 501, # - 0x1eaa, 501, # - 0x1eac, 501, # - 0x1eae, 501, # - 0x1eb0, 501, # - 0x1eb2, 501, # - 0x1eb4, 501, # - 0x1eb6, 501, # - 0x1eb8, 501, # - 0x1eba, 501, # - 0x1ebc, 501, # - 0x1ebe, 501, # - 0x1ec0, 501, # - 0x1ec2, 501, # - 0x1ec4, 501, # - 0x1ec6, 501, # - 0x1ec8, 501, # - 0x1eca, 501, # - 0x1ecc, 501, # - 0x1ece, 501, # - 0x1ed0, 501, # - 0x1ed2, 501, # - 0x1ed4, 501, # - 0x1ed6, 501, # - 0x1ed8, 501, # - 0x1eda, 501, # - 0x1edc, 501, # - 0x1ede, 501, # - 0x1ee0, 501, # - 0x1ee2, 501, # - 0x1ee4, 501, # - 0x1ee6, 501, # - 0x1ee8, 501, # - 0x1eea, 501, # - 0x1eec, 501, # - 0x1eee, 501, # - 0x1ef0, 501, # - 0x1ef2, 501, # - 0x1ef4, 501, # - 0x1ef6, 501, # - 0x1ef8, 501, # - 0x1f59, 492, # - 0x1f5b, 492, # - 0x1f5d, 492, # - 0x1f5f, 492, # - 0x1fbc, 491, # - 0x1fcc, 491, # - 0x1fec, 493, # - 0x1ffc, 491] # - - toTitleSinglets = [ - 0x01c4, 501, # - 0x01c6, 499, # - 0x01c7, 501, # - 0x01c9, 499, # - 0x01ca, 501, # - 0x01cc, 499, # - 0x01f1, 501, # - 0x01f3, 499] # - -proc binarySearch(c: RuneImpl, tab: openArray[RuneImpl], len, stride: int): int = + for rune in runes: + result.add rune + +proc runeOffset*(s: openArray[char], pos: Natural, start: Natural = 0): int = + ## Returns the byte position of rune + ## at position ``pos`` in ``s`` with an optional start byte position. + ## Returns the special value -1 if it runs out of the string. + ## + ## **Beware:** This can lead to unoptimized code and slow execution! + ## Most problems can be solved more efficiently by using an iterator + ## or conversion to a seq of Rune. + ## + ## See also: + ## * `runeReverseOffset proc <#runeReverseOffset,string,Positive>`_ + runnableExamples: + let a = "añyóng" + doAssert a.runeOffset(1) == 1 + doAssert a.runeOffset(3) == 4 + doAssert a.runeOffset(4) == 6 + + var + i = 0 + o = start + while i < pos: + o += runeLenAt(s, o) + if o >= s.len: + return -1 + inc i + return o + +proc runeReverseOffset*(s: openArray[char], rev: Positive): (int, int) = + ## Returns a tuple with the byte offset of the + ## rune at position ``rev`` in ``s``, counting + ## from the end (starting with 1) and the total + ## number of runes in the string. + ## + ## Returns a negative value for offset if there are too few runes in + ## the string to satisfy the request. + ## + ## **Beware:** This can lead to unoptimized code and slow execution! + ## Most problems can be solved more efficiently by using an iterator + ## or conversion to a seq of Rune. + ## + ## See also: + ## * `runeOffset proc <#runeOffset,string,Natural,Natural>`_ + var + a = rev.int + o = 0 + x = 0 + let times = 2*rev.int-s.runeLen # transformed from rev.int - a < s.runeLen - rev.int + while o < s.len: + let r = runeLenAt(s, o) + o += r + if a > times: + x += r + dec a + result = if a > 0: (-a, rev.int-a) else: (x, -a+rev.int) + +proc runeAtPos*(s: openArray[char], pos: int): Rune = + ## Returns the rune at position ``pos``. + ## + ## **Beware:** This can lead to unoptimized code and slow execution! + ## Most problems can be solved more efficiently by using an iterator + ## or conversion to a seq of Rune. + ## + ## See also: + ## * `runeAt proc <#runeAt,string,Natural>`_ + ## * `runeStrAtPos proc <#runeStrAtPos,string,Natural>`_ + ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_ + fastRuneAt(s, runeOffset(s, pos), result, false) + +proc runeStrAtPos*(s: openArray[char], pos: Natural): string = + ## Returns the rune at position ``pos`` as UTF8 String. + ## + ## **Beware:** This can lead to unoptimized code and slow execution! + ## Most problems can be solved more efficiently by using an iterator + ## or conversion to a seq of Rune. + ## + ## See also: + ## * `runeAt proc <#runeAt,string,Natural>`_ + ## * `runeAtPos proc <#runeAtPos,string,int>`_ + ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_ + let o = runeOffset(s, pos) + substr(s.toOpenArray(o, (o+runeLenAt(s, o)-1))) + +proc runeSubStr*(s: openArray[char], pos: int, len: int = int.high): string = + ## Returns the UTF-8 substring starting at code point ``pos`` + ## with ``len`` code points. + ## + ## If ``pos`` or ``len`` is negative they count from + ## the end of the string. If ``len`` is not given it means the longest + ## possible string. + runnableExamples: + let s = "Hänsel ««: 10,00€" + doAssert(runeSubStr(s, 0, 2) == "Hä") + doAssert(runeSubStr(s, 10, 1) == ":") + doAssert(runeSubStr(s, -6) == "10,00€") + doAssert(runeSubStr(s, 10) == ": 10,00€") + doAssert(runeSubStr(s, 12, 5) == "10,00") + doAssert(runeSubStr(s, -6, 3) == "10,") + + if pos < 0: + let (o, rl) = runeReverseOffset(s, -pos) + if len >= rl: + result = s.substr(o, s.high) + elif len < 0: + let e = rl + len + if e < 0: + result = "" + else: + result = s.substr(o, runeOffset(s, e-(rl+pos), o)-1) + else: + result = s.substr(o, runeOffset(s, len, o)-1) + else: + let o = runeOffset(s, pos) + if o < 0: + result = "" + elif len == int.high: + result = s.substr(o, s.len-1) + elif len < 0: + let (e, rl) = runeReverseOffset(s, -len) + discard rl + if e <= 0: + result = "" + else: + result = s.substr(o, e-1) + else: + var e = runeOffset(s, len, o) + if e < 0: + e = s.len + result = s.substr(o, e-1) + +proc `<=%`*(a, b: Rune): bool = + ## Checks if code point of `a` is smaller or equal to code point of `b`. + runnableExamples: + let + a = "ú".runeAt(0) + b = "ü".runeAt(0) + doAssert a <=% b + return int(a) <=% int(b) + +proc `<%`*(a, b: Rune): bool = + ## Checks if code point of `a` is smaller than code point of `b`. + runnableExamples: + let + a = "ú".runeAt(0) + b = "ü".runeAt(0) + doAssert a <% b + return int(a) <% int(b) + +proc `==`*(a, b: Rune): bool = + ## Checks if two runes are equal. + return int(a) == int(b) + + +include "includes/unicode_ranges" + +proc binarySearch(c: RuneImpl, tab: openArray[int32], len, stride: int): int = var n = len var t = 0 - while n > 1: + while n > 1: var m = n div 2 var p = t + m*stride if c >= tab[p]: @@ -1131,64 +479,103 @@ proc binarySearch(c: RuneImpl, tab: openArray[RuneImpl], len, stride: int): int return t return -1 -proc toLower*(c: Rune): Rune {.rtl, extern: "nuc$1", procvar.} = - ## Converts `c` into lower case. This works for any Unicode character. - ## If possible, prefer `toLower` over `toUpper`. +proc toLower*(c: Rune): Rune {.rtl, extern: "nuc$1".} = + ## Converts ``c`` into lower case. This works for any rune. + ## + ## If possible, prefer ``toLower`` over ``toUpper``. + ## + ## See also: + ## * `toUpper proc <#toUpper,Rune>`_ + ## * `toTitle proc <#toTitle,Rune>`_ + ## * `isLower proc <#isLower,Rune>`_ var c = RuneImpl(c) - var p = binarySearch(c, tolowerRanges, len(tolowerRanges) div 3, 3) - if p >= 0 and c >= tolowerRanges[p] and c <= tolowerRanges[p+1]: - return Rune(c + tolowerRanges[p+2] - 500) - p = binarySearch(c, tolowerSinglets, len(tolowerSinglets) div 2, 2) - if p >= 0 and c == tolowerSinglets[p]: - return Rune(c + tolowerSinglets[p+1] - 500) + var p = binarySearch(c, toLowerRanges, len(toLowerRanges) div 3, 3) + if p >= 0 and c >= toLowerRanges[p] and c <= toLowerRanges[p+1]: + return Rune(c + toLowerRanges[p+2] - 500) + p = binarySearch(c, toLowerSinglets, len(toLowerSinglets) div 2, 2) + if p >= 0 and c == toLowerSinglets[p]: + return Rune(c + toLowerSinglets[p+1] - 500) return Rune(c) -proc toUpper*(c: Rune): Rune {.rtl, extern: "nuc$1", procvar.} = - ## Converts `c` into upper case. This works for any Unicode character. - ## If possible, prefer `toLower` over `toUpper`. +proc toUpper*(c: Rune): Rune {.rtl, extern: "nuc$1".} = + ## Converts ``c`` into upper case. This works for any rune. + ## + ## If possible, prefer ``toLower`` over ``toUpper``. + ## + ## See also: + ## * `toLower proc <#toLower,Rune>`_ + ## * `toTitle proc <#toTitle,Rune>`_ + ## * `isUpper proc <#isUpper,Rune>`_ var c = RuneImpl(c) - var p = binarySearch(c, toupperRanges, len(toupperRanges) div 3, 3) - if p >= 0 and c >= toupperRanges[p] and c <= toupperRanges[p+1]: - return Rune(c + toupperRanges[p+2] - 500) - p = binarySearch(c, toupperSinglets, len(toupperSinglets) div 2, 2) - if p >= 0 and c == toupperSinglets[p]: - return Rune(c + toupperSinglets[p+1] - 500) + var p = binarySearch(c, toUpperRanges, len(toUpperRanges) div 3, 3) + if p >= 0 and c >= toUpperRanges[p] and c <= toUpperRanges[p+1]: + return Rune(c + toUpperRanges[p+2] - 500) + p = binarySearch(c, toUpperSinglets, len(toUpperSinglets) div 2, 2) + if p >= 0 and c == toUpperSinglets[p]: + return Rune(c + toUpperSinglets[p+1] - 500) return Rune(c) -proc toTitle*(c: Rune): Rune {.rtl, extern: "nuc$1", procvar.} = +proc toTitle*(c: Rune): Rune {.rtl, extern: "nuc$1".} = + ## Converts ``c`` to title case. + ## + ## See also: + ## * `toLower proc <#toLower,Rune>`_ + ## * `toUpper proc <#toUpper,Rune>`_ + ## * `isTitle proc <#isTitle,Rune>`_ var c = RuneImpl(c) var p = binarySearch(c, toTitleSinglets, len(toTitleSinglets) div 2, 2) if p >= 0 and c == toTitleSinglets[p]: return Rune(c + toTitleSinglets[p+1] - 500) return Rune(c) -proc isLower*(c: Rune): bool {.rtl, extern: "nuc$1", procvar.} = - ## returns true iff `c` is a lower case Unicode character - ## If possible, prefer `isLower` over `isUpper`. +proc isLower*(c: Rune): bool {.rtl, extern: "nuc$1".} = + ## Returns true if ``c`` is a lower case rune. + ## + ## If possible, prefer ``isLower`` over ``isUpper``. + ## + ## See also: + ## * `toLower proc <#toLower,Rune>`_ + ## * `isUpper proc <#isUpper,Rune>`_ + ## * `isTitle proc <#isTitle,Rune>`_ var c = RuneImpl(c) # Note: toUpperRanges is correct here! - var p = binarySearch(c, toupperRanges, len(toupperRanges) div 3, 3) - if p >= 0 and c >= toupperRanges[p] and c <= toupperRanges[p+1]: + var p = binarySearch(c, toUpperRanges, len(toUpperRanges) div 3, 3) + if p >= 0 and c >= toUpperRanges[p] and c <= toUpperRanges[p+1]: return true - p = binarySearch(c, toupperSinglets, len(toupperSinglets) div 2, 2) - if p >= 0 and c == toupperSinglets[p]: + p = binarySearch(c, toUpperSinglets, len(toUpperSinglets) div 2, 2) + if p >= 0 and c == toUpperSinglets[p]: return true -proc isUpper*(c: Rune): bool {.rtl, extern: "nuc$1", procvar.} = - ## returns true iff `c` is a upper case Unicode character - ## If possible, prefer `isLower` over `isUpper`. +proc isUpper*(c: Rune): bool {.rtl, extern: "nuc$1".} = + ## Returns true if ``c`` is a upper case rune. + ## + ## If possible, prefer ``isLower`` over ``isUpper``. + ## + ## See also: + ## * `toUpper proc <#toUpper,Rune>`_ + ## * `isLower proc <#isLower,Rune>`_ + ## * `isTitle proc <#isTitle,Rune>`_ + ## * `isAlpha proc <#isAlpha,Rune>`_ + ## * `isWhiteSpace proc <#isWhiteSpace,Rune>`_ var c = RuneImpl(c) # Note: toLowerRanges is correct here! - var p = binarySearch(c, tolowerRanges, len(tolowerRanges) div 3, 3) - if p >= 0 and c >= tolowerRanges[p] and c <= tolowerRanges[p+1]: + var p = binarySearch(c, toLowerRanges, len(toLowerRanges) div 3, 3) + if p >= 0 and c >= toLowerRanges[p] and c <= toLowerRanges[p+1]: return true - p = binarySearch(c, tolowerSinglets, len(tolowerSinglets) div 2, 2) - if p >= 0 and c == tolowerSinglets[p]: + p = binarySearch(c, toLowerSinglets, len(toLowerSinglets) div 2, 2) + if p >= 0 and c == toLowerSinglets[p]: return true -proc isAlpha*(c: Rune): bool {.rtl, extern: "nuc$1", procvar.} = - ## returns true iff `c` is an *alpha* Unicode character (i.e. a letter) - if isUpper(c) or isLower(c): +proc isAlpha*(c: Rune): bool {.rtl, extern: "nuc$1".} = + ## Returns true if ``c`` is an *alpha* rune (i.e., a letter). + ## + ## See also: + ## * `isLower proc <#isLower,Rune>`_ + ## * `isTitle proc <#isTitle,Rune>`_ + ## * `isAlpha proc <#isAlpha,Rune>`_ + ## * `isWhiteSpace proc <#isWhiteSpace,Rune>`_ + ## * `isCombining proc <#isCombining,Rune>`_ + if isUpper(c) or isLower(c): return true var c = RuneImpl(c) var p = binarySearch(c, alphaRanges, len(alphaRanges) div 2, 2) @@ -1197,19 +584,39 @@ proc isAlpha*(c: Rune): bool {.rtl, extern: "nuc$1", procvar.} = p = binarySearch(c, alphaSinglets, len(alphaSinglets), 1) if p >= 0 and c == alphaSinglets[p]: return true - -proc isTitle*(c: Rune): bool {.rtl, extern: "nuc$1", procvar.} = + +proc isTitle*(c: Rune): bool {.rtl, extern: "nuc$1".} = + ## Returns true if ``c`` is a Unicode titlecase code point. + ## + ## See also: + ## * `toTitle proc <#toTitle,Rune>`_ + ## * `isLower proc <#isLower,Rune>`_ + ## * `isUpper proc <#isUpper,Rune>`_ + ## * `isAlpha proc <#isAlpha,Rune>`_ + ## * `isWhiteSpace proc <#isWhiteSpace,Rune>`_ return isUpper(c) and isLower(c) -proc isWhiteSpace*(c: Rune): bool {.rtl, extern: "nuc$1", procvar.} = - ## returns true iff `c` is a Unicode whitespace character +proc isWhiteSpace*(c: Rune): bool {.rtl, extern: "nuc$1".} = + ## Returns true if ``c`` is a Unicode whitespace code point. + ## + ## See also: + ## * `isLower proc <#isLower,Rune>`_ + ## * `isUpper proc <#isUpper,Rune>`_ + ## * `isTitle proc <#isTitle,Rune>`_ + ## * `isAlpha proc <#isAlpha,Rune>`_ var c = RuneImpl(c) var p = binarySearch(c, spaceRanges, len(spaceRanges) div 2, 2) if p >= 0 and c >= spaceRanges[p] and c <= spaceRanges[p+1]: return true -proc isCombining*(c: Rune): bool {.rtl, extern: "nuc$1", procvar.} = - ## returns true iff `c` is a Unicode combining character +proc isCombining*(c: Rune): bool {.rtl, extern: "nuc$1".} = + ## Returns true if ``c`` is a Unicode combining code unit. + ## + ## See also: + ## * `isLower proc <#isLower,Rune>`_ + ## * `isUpper proc <#isUpper,Rune>`_ + ## * `isTitle proc <#isTitle,Rune>`_ + ## * `isAlpha proc <#isAlpha,Rune>`_ var c = RuneImpl(c) # Optimized to return false immediately for ASCII @@ -1219,8 +626,179 @@ proc isCombining*(c: Rune): bool {.rtl, extern: "nuc$1", procvar.} = (c >= 0x20d0 and c <= 0x20ff) or (c >= 0xfe20 and c <= 0xfe2f)) -iterator runes*(s: string): Rune = - ## iterates over any unicode character of the string `s`. +template runeCheck(s, runeProc) = + ## Common code for isAlpha and isSpace. + result = if len(s) == 0: false else: true + var + i = 0 + rune: Rune + while i < len(s) and result: + fastRuneAt(s, i, rune, doInc = true) + result = runeProc(rune) and result + +proc isAlpha*(s: openArray[char]): bool {.noSideEffect, + rtl, extern: "nuc$1Str".} = + ## Returns true if ``s`` contains all alphabetic runes. + runnableExamples: + let a = "añyóng" + doAssert a.isAlpha + runeCheck(s, isAlpha) + +proc isSpace*(s: openArray[char]): bool {.noSideEffect, + rtl, extern: "nuc$1Str".} = + ## Returns true if ``s`` contains all whitespace runes. + runnableExamples: + let a = "\t\l \v\r\f" + doAssert a.isSpace + runeCheck(s, isWhiteSpace) + + +template convertRune(s, runeProc) = + ## Convert runes in ``s`` using ``runeProc`` as the converter. + result = newString(len(s)) + var + i = 0 + resultIndex = 0 + rune: Rune + while i < len(s): + fastRuneAt(s, i, rune, doInc = true) + rune = runeProc(rune) + fastToUTF8Copy(rune, result, resultIndex, doInc = true) + +proc toUpper*(s: openArray[char]): string {.noSideEffect, + rtl, extern: "nuc$1Str".} = + ## Converts ``s`` into upper-case runes. + runnableExamples: + doAssert toUpper("abγ") == "ABΓ" + convertRune(s, toUpper) + +proc toLower*(s: openArray[char]): string {.noSideEffect, + rtl, extern: "nuc$1Str".} = + ## Converts ``s`` into lower-case runes. + runnableExamples: + doAssert toLower("ABΓ") == "abγ" + convertRune(s, toLower) + +proc swapCase*(s: openArray[char]): string {.noSideEffect, + rtl, extern: "nuc$1".} = + ## Swaps the case of runes in ``s``. + ## + ## Returns a new string such that the cases of all runes + ## are swapped if possible. + runnableExamples: + doAssert swapCase("Αlpha Βeta Γamma") == "αLPHA βETA γAMMA" + + var + i = 0 + resultIndex = 0 + rune: Rune + result = newString(len(s)) + while i < len(s): + fastRuneAt(s, i, rune) + if rune.isUpper(): + rune = rune.toLower() + elif rune.isLower(): + rune = rune.toUpper() + fastToUTF8Copy(rune, result, resultIndex, doInc = true) + +proc capitalize*(s: openArray[char]): string {.noSideEffect, + rtl, extern: "nuc$1".} = + ## Converts the first character of ``s`` into an upper-case rune. + runnableExamples: + doAssert capitalize("βeta") == "Βeta" + + if len(s) == 0: + return "" + var + rune: Rune + i = 0 + fastRuneAt(s, i, rune, doInc = true) + result = $toUpper(rune) & substr(s.toOpenArray(i, s.high)) + +when not defined(nimHasEffectsOf): + {.pragma: effectsOf.} + +proc translate*(s: openArray[char], replacements: proc(key: string): string): string {. + rtl, extern: "nuc$1", effectsOf: replacements.} = + ## Translates words in a string using the ``replacements`` proc to substitute + ## words inside ``s`` with their replacements. + ## + ## ``replacements`` is any proc that takes a word and returns + ## a new word to fill it's place. + runnableExamples: + proc wordToNumber(s: string): string = + case s + of "one": "1" + of "two": "2" + else: s + let a = "one two three four" + doAssert a.translate(wordToNumber) == "1 2 three four" + + # Allocate memory for the new string based on the old one. + # If the new string length is less than the old, no allocations + # will be needed. If the new string length is greater than the + # old, then maybe only one allocation is needed + result = newStringOfCap(s.len) + var + index = 0 + lastIndex = 0 + wordStart = 0 + inWord = false + rune: Rune + + while index < len(s): + lastIndex = index + fastRuneAt(s, index, rune) + let whiteSpace = rune.isWhiteSpace() + + if whiteSpace and inWord: + # If we've reached the end of a word + let word = substr(s.toOpenArray(wordStart, lastIndex - 1)) + result.add(replacements(word)) + result.add($rune) + inWord = false + elif not whiteSpace and not inWord: + # If we've hit a non space character and + # are not currently in a word, track + # the starting index of the word + inWord = true + wordStart = lastIndex + elif whiteSpace: + result.add($rune) + + if wordStart < len(s) and inWord: + # Get the trailing word at the end + let word = substr(s.toOpenArray(wordStart, s.high)) + result.add(replacements(word)) + +proc title*(s: openArray[char]): string {.noSideEffect, + rtl, extern: "nuc$1".} = + ## Converts ``s`` to a unicode title. + ## + ## Returns a new string such that the first character + ## in each word inside ``s`` is capitalized. + runnableExamples: + doAssert title("αlpha βeta γamma") == "Αlpha Βeta Γamma" + + var + i = 0 + resultIndex = 0 + rune: Rune + result = newString(len(s)) + var firstRune = true + + while i < len(s): + fastRuneAt(s, i, rune) + if not rune.isWhiteSpace() and firstRune: + rune = rune.toUpper() + firstRune = false + elif rune.isWhiteSpace(): + firstRune = true + fastToUTF8Copy(rune, result, resultIndex, doInc = true) + + +iterator runes*(s: openArray[char]): Rune = + ## Iterates over any rune of the string ``s`` returning runes. var i = 0 result: Rune @@ -1228,12 +806,39 @@ iterator runes*(s: string): Rune = fastRuneAt(s, i, result, true) yield result -proc cmpRunesIgnoreCase*(a, b: string): int {.rtl, extern: "nuc$1", procvar.} = - ## compares two UTF8 strings and ignores the case. Returns: +iterator utf8*(s: openArray[char]): string = + ## Iterates over any rune of the string ``s`` returning utf8 values. + ## + ## See also: + ## * `validateUtf8 proc <#validateUtf8,string>`_ + ## * `toUTF8 proc <#toUTF8,Rune>`_ + ## * `$ proc <#$,Rune>`_ alias for `toUTF8` + ## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_ + var o = 0 + while o < s.len: + let n = runeLenAt(s, o) + yield substr(s.toOpenArray(o, (o+n-1))) + o += n + +proc toRunes*(s: openArray[char]): seq[Rune] = + ## Obtains a sequence containing the Runes in ``s``. + ## + ## See also: + ## * `$ proc <#$,Rune>`_ for a reverse operation + runnableExamples: + let a = toRunes("aáä") + doAssert a == @["a".runeAt(0), "á".runeAt(0), "ä".runeAt(0)] + + result = newSeq[Rune]() + for r in s.runes: + result.add(r) + +proc cmpRunesIgnoreCase*(a, b: openArray[char]): int {.rtl, extern: "nuc$1".} = + ## Compares two UTF-8 strings and ignores the case. Returns: ## - ## | 0 iff a == b - ## | < 0 iff a < b - ## | > 0 iff a > b + ## | `0` if a == b + ## | `< 0` if a < b + ## | `> 0` if a > b var i = 0 var j = 0 var ar, br: Rune @@ -1241,20 +846,25 @@ proc cmpRunesIgnoreCase*(a, b: string): int {.rtl, extern: "nuc$1", procvar.} = # slow path: fastRuneAt(a, i, ar) fastRuneAt(b, j, br) - result = RuneImpl(toLower(ar)) - RuneImpl(toLower(br)) + when sizeof(int) < 4: + const lo = low(int).int32 + const hi = high(int).int32 + result = clamp(RuneImpl(toLower(ar)) - RuneImpl(toLower(br)), lo, hi).int + else: + result = RuneImpl(toLower(ar)) - RuneImpl(toLower(br)) if result != 0: return result = a.len - b.len -proc reversed*(s: string): string = - ## returns the reverse of `s`, interpreting it as unicode characters. Unicode - ## combining characters are correctly interpreted as well: +proc reversed*(s: openArray[char]): string = + ## Returns the reverse of ``s``, interpreting it as runes. ## - ## .. code-block:: nim - ## - ## assert reversed("Reverse this!") == "!siht esreveR" - ## assert reversed("先秦兩漢") == "漢兩秦先" - ## assert reversed("as⃝df̅") == "f̅ds⃝a" - ## assert reversed("a⃞b⃞c⃞") == "c⃞b⃞a⃞" + ## Unicode combining characters are correctly interpreted as well. + runnableExamples: + assert reversed("Reverse this!") == "!siht esreveR" + assert reversed("先秦兩漢") == "漢兩秦先" + assert reversed("as⃝df̅") == "f̅ds⃝a" + assert reversed("a⃞b⃞c⃞") == "c⃞b⃞a⃞" + var i = 0 lastI = 0 @@ -1262,7 +872,7 @@ proc reversed*(s: string): string = blockPos = 0 r: Rune - template reverseUntil(pos): stmt = + template reverseUntil(pos) = var j = pos - 1 while j > blockPos: result[newPos] = s[j] @@ -1280,14 +890,626 @@ proc reversed*(s: string): string = reverseUntil(len(s)) -when isMainModule: +proc graphemeLen*(s: openArray[char]; i: Natural): Natural = + ## The number of bytes belonging to byte index ``s[i]``, + ## including following combining code units. + runnableExamples: + let a = "añyóng" + doAssert a.graphemeLen(1) == 2 ## ñ + doAssert a.graphemeLen(2) == 1 + doAssert a.graphemeLen(4) == 2 ## ó + + var j = i.int + var r, r2: Rune + if j < s.len: + fastRuneAt(s, j, r, true) + result = j-i + while j < s.len: + fastRuneAt(s, j, r2, true) + if not isCombining(r2): break + result = j-i + +proc lastRune*(s: openArray[char]; last: int): (Rune, int) = + ## Length of the last rune in ``s[0..last]``. Returns the rune and its length + ## in bytes. + if s[last] <= chr(127): + result = (Rune(s[last]), 1) + else: + var L = 0 + while last-L >= 0 and uint(s[last-L]) shr 6 == 0b10: inc(L) + var r: Rune + fastRuneAt(s, last-L, r, false) + result = (r, L+1) + +proc size*(r: Rune): int {.noSideEffect.} = + ## Returns the number of bytes the rune ``r`` takes. + runnableExamples: + let a = toRunes "aá" + doAssert size(a[0]) == 1 + doAssert size(a[1]) == 2 + + let v = r.uint32 + if v <= 0x007F'u32: result = 1 + elif v <= 0x07FF'u32: result = 2 + elif v <= 0xFFFF'u32: result = 3 + elif v <= 0x1FFFFF'u32: result = 4 + elif v <= 0x3FFFFFF'u32: result = 5 + elif v <= 0x7FFFFFFF'u32: result = 6 + else: result = 1 + +# --------- Private templates for different split separators ----------- +proc stringHasSep(s: openArray[char], index: int, seps: openArray[Rune]): bool = + var rune: Rune + fastRuneAt(s, index, rune, false) + return seps.contains(rune) + +proc stringHasSep(s: openArray[char], index: int, sep: Rune): bool = + var rune: Rune + fastRuneAt(s, index, rune, false) + return sep == rune + +template splitCommon(s, sep, maxsplit: untyped) = + ## Common code for split procedures. let - someString = "öÑ" - someRunes = @[runeAt(someString, 0), runeAt(someString, 2)] - compared = (someString == $someRunes) - assert compared == true - - assert reversed("Reverse this!") == "!siht esreveR" - assert reversed("先秦兩漢") == "漢兩秦先" - assert reversed("as⃝df̅") == "f̅ds⃝a" - assert reversed("a⃞b⃞c⃞") == "c⃞b⃞a⃞" + sLen = len(s) + var + last = 0 + splits = maxsplit + if sLen > 0: + while last <= sLen: + var first = last + while last < sLen and not stringHasSep(s, last, sep): + inc(last, runeLenAt(s, last)) + if splits == 0: last = sLen + yield substr(s.toOpenArray(first, (last - 1))) + if splits == 0: break + dec(splits) + inc(last, if last < sLen: runeLenAt(s, last) else: 1) + +iterator split*(s: openArray[char], seps: openArray[Rune] = unicodeSpaces, + maxsplit: int = -1): string = + ## Splits the unicode string ``s`` into substrings using a group of separators. + ## + ## Substrings are separated by a substring containing only ``seps``. + runnableExamples: + import std/sequtils + + assert toSeq("hÃllo\lthis\lis an\texample\l是".split) == + @["hÃllo", "this", "is", "an", "example", "是"] + + # And the following code splits the same string using a sequence of Runes. + assert toSeq(split("añyóng:hÃllo;是$example", ";:$".toRunes)) == + @["añyóng", "hÃllo", "是", "example"] + + # example with a `Rune` separator and unused one `;`: + assert toSeq(split("ab是de:f:", ";:是".toRunes)) == @["ab", "de", "f", ""] + + # Another example that splits a string containing a date. + let date = "2012-11-20T22:08:08.398990" + + assert toSeq(split(date, " -:T".toRunes)) == + @["2012", "11", "20", "22", "08", "08.398990"] + + splitCommon(s, seps, maxsplit) + +iterator splitWhitespace*(s: openArray[char]): string = + ## Splits a unicode string at whitespace runes. + splitCommon(s, unicodeSpaces, -1) + +template accResult(iter: untyped) = + result = @[] + for x in iter: add(result, x) + +proc splitWhitespace*(s: openArray[char]): seq[string] {.noSideEffect, + rtl, extern: "ncuSplitWhitespace".} = + ## The same as the `splitWhitespace <#splitWhitespace.i,string>`_ + ## iterator, but is a proc that returns a sequence of substrings. + accResult(splitWhitespace(s)) + +iterator split*(s: openArray[char], sep: Rune, maxsplit: int = -1): string = + ## Splits the unicode string ``s`` into substrings using a single separator. + ## Substrings are separated by the rune ``sep``. + runnableExamples: + import std/sequtils + + assert toSeq(split(";;hÃllo;this;is;an;;example;;;是", ";".runeAt(0))) == + @["", "", "hÃllo", "this", "is", "an", "", "example", "", "", "是"] + + splitCommon(s, sep, maxsplit) + +proc split*(s: openArray[char], seps: openArray[Rune] = unicodeSpaces, maxsplit: int = -1): + seq[string] {.noSideEffect, rtl, extern: "nucSplitRunes".} = + ## The same as the `split iterator <#split.i,string,openArray[Rune],int>`_, + ## but is a proc that returns a sequence of substrings. + accResult(split(s, seps, maxsplit)) + +proc split*(s: openArray[char], sep: Rune, maxsplit: int = -1): seq[string] {.noSideEffect, + rtl, extern: "nucSplitRune".} = + ## The same as the `split iterator <#split.i,string,Rune,int>`_, but is a proc + ## that returns a sequence of substrings. + accResult(split(s, sep, maxsplit)) + +proc strip*(s: openArray[char], leading = true, trailing = true, + runes: openArray[Rune] = unicodeSpaces): string {.noSideEffect, + rtl, extern: "nucStrip".} = + ## Strips leading or trailing ``runes`` from ``s`` and returns + ## the resulting string. + ## + ## If ``leading`` is true (default), leading ``runes`` are stripped. + ## If ``trailing`` is true (default), trailing ``runes`` are stripped. + ## If both are false, the string is returned unchanged. + runnableExamples: + let a = "\táñyóng " + doAssert a.strip == "áñyóng" + doAssert a.strip(leading = false) == "\táñyóng" + doAssert a.strip(trailing = false) == "áñyóng " + + var + sI = 0 ## starting index into string ``s`` + eI = len(s) - 1 ## ending index into ``s``, where the last ``Rune`` starts + if leading: + var + i = 0 + xI: int ## value of ``sI`` at the beginning of the iteration + rune: Rune + while i < len(s): + xI = i + fastRuneAt(s, i, rune) + sI = i # Assume to start from next rune + if not runes.contains(rune): + sI = xI # Go back to where the current rune starts + break + if trailing: + var + i = eI + xI: int + rune: Rune + while i >= 0: + xI = i + fastRuneAt(s, xI, rune) + var yI = i - 1 + while yI >= 0: + var + yIend = yI + pRune: Rune + fastRuneAt(s, yIend, pRune) + if yIend < xI: break + i = yI + rune = pRune + dec(yI) + if not runes.contains(rune): + eI = xI - 1 + break + dec(i) + let newLen = eI - sI + 1 + result = newStringOfCap(newLen) + if newLen > 0: + result.add substr(s.toOpenArray(sI, eI)) + +proc repeat*(c: Rune, count: Natural): string {.noSideEffect, + rtl, extern: "nucRepeatRune".} = + ## Returns a string of ``count`` Runes ``c``. + ## + ## The returned string will have a rune-length of ``count``. + runnableExamples: + let a = "ñ".runeAt(0) + doAssert a.repeat(5) == "ñññññ" + + let s = $c + result = newStringOfCap(count * s.len) + for i in 0 ..< count: + result.add s + +proc align*(s: openArray[char], count: Natural, padding = ' '.Rune): string {. + noSideEffect, rtl, extern: "nucAlignString".} = + ## Aligns a unicode string ``s`` with ``padding``, so that it has a rune-length + ## of ``count``. + ## + ## ``padding`` characters (by default spaces) are added before ``s`` resulting in + ## right alignment. If ``s.runelen >= count``, no spaces are added and ``s`` is + ## returned unchanged. If you need to left align a string use the `alignLeft + ## proc <#alignLeft,string,Natural>`_. + runnableExamples: + assert align("abc", 4) == " abc" + assert align("a", 0) == "a" + assert align("1232", 6) == " 1232" + assert align("1232", 6, '#'.Rune) == "##1232" + assert align("Åge", 5) == " Åge" + assert align("×", 4, '_'.Rune) == "___×" + + let sLen = s.runeLen + if sLen < count: + let padStr = $padding + result = newStringOfCap(padStr.len * count) + let spaces = count - sLen + for i in 0 ..< spaces: result.add padStr + result.add s + else: + result = s.substr + +proc alignLeft*(s: openArray[char], count: Natural, padding = ' '.Rune): string {. + noSideEffect.} = + ## Left-aligns a unicode string ``s`` with ``padding``, so that it has a + ## rune-length of ``count``. + ## + ## ``padding`` characters (by default spaces) are added after ``s`` resulting in + ## left alignment. If ``s.runelen >= count``, no spaces are added and ``s`` is + ## returned unchanged. If you need to right align a string use the `align + ## proc <#align,string,Natural>`_. + runnableExamples: + assert alignLeft("abc", 4) == "abc " + assert alignLeft("a", 0) == "a" + assert alignLeft("1232", 6) == "1232 " + assert alignLeft("1232", 6, '#'.Rune) == "1232##" + assert alignLeft("Åge", 5) == "Åge " + assert alignLeft("×", 4, '_'.Rune) == "×___" + let sLen = s.runeLen + if sLen < count: + let padStr = $padding + result = newStringOfCap(s.len + (count - sLen) * padStr.len) + result.add s + for i in sLen ..< count: + result.add padStr + else: + result = s.substr + + +proc runeLen*(s: string): int {.inline.} = + ## Returns the number of runes of the string ``s``. + runnableExamples: + let a = "añyóng" + doAssert a.runeLen == 6 + ## note: a.len == 8 + runeLen(toOa(s)) + +proc runeLenAt*(s: string, i: Natural): int {.inline.} = + ## Returns the number of bytes the rune starting at ``s[i]`` takes. + ## + ## See also: + ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_ + runnableExamples: + let a = "añyóng" + doAssert a.runeLenAt(0) == 1 + doAssert a.runeLenAt(1) == 2 + runeLenAt(toOa(s), i) + +proc runeAt*(s: string, i: Natural): Rune {.inline.} = + ## Returns the rune in ``s`` at **byte index** ``i``. + ## + ## See also: + ## * `runeAtPos proc <#runeAtPos,string,int>`_ + ## * `runeStrAtPos proc <#runeStrAtPos,string,Natural>`_ + ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_ + runnableExamples: + let a = "añyóng" + doAssert a.runeAt(1) == "ñ".runeAt(0) + doAssert a.runeAt(2) == "ñ".runeAt(1) + doAssert a.runeAt(3) == "y".runeAt(0) + fastRuneAt(s, i, result, false) + +proc validateUtf8*(s: string): int {.inline.} = + ## Returns the position of the invalid byte in ``s`` if the string ``s`` does + ## not hold valid UTF-8 data. Otherwise ``-1`` is returned. + ## + ## See also: + ## * `toUTF8 proc <#toUTF8,Rune>`_ + ## * `$ proc <#$,Rune>`_ alias for `toUTF8` + ## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_ + validateUtf8(toOa(s)) + +proc runeOffset*(s: string, pos: Natural, start: Natural = 0): int {.inline.} = + ## Returns the byte position of rune + ## at position ``pos`` in ``s`` with an optional start byte position. + ## Returns the special value -1 if it runs out of the string. + ## + ## **Beware:** This can lead to unoptimized code and slow execution! + ## Most problems can be solved more efficiently by using an iterator + ## or conversion to a seq of Rune. + ## + ## See also: + ## * `runeReverseOffset proc <#runeReverseOffset,string,Positive>`_ + runnableExamples: + let a = "añyóng" + doAssert a.runeOffset(1) == 1 + doAssert a.runeOffset(3) == 4 + doAssert a.runeOffset(4) == 6 + runeOffset(toOa(s), pos, start) + +proc runeReverseOffset*(s: string, rev: Positive): (int, int) {.inline.} = + ## Returns a tuple with the byte offset of the + ## rune at position ``rev`` in ``s``, counting + ## from the end (starting with 1) and the total + ## number of runes in the string. + ## + ## Returns a negative value for offset if there are too few runes in + ## the string to satisfy the request. + ## + ## **Beware:** This can lead to unoptimized code and slow execution! + ## Most problems can be solved more efficiently by using an iterator + ## or conversion to a seq of Rune. + ## + ## See also: + ## * `runeOffset proc <#runeOffset,string,Natural,Natural>`_ + runeReverseOffset(toOa(s), rev) + +proc runeAtPos*(s: string, pos: int): Rune {.inline.} = + ## Returns the rune at position ``pos``. + ## + ## **Beware:** This can lead to unoptimized code and slow execution! + ## Most problems can be solved more efficiently by using an iterator + ## or conversion to a seq of Rune. + ## + ## See also: + ## * `runeAt proc <#runeAt,string,Natural>`_ + ## * `runeStrAtPos proc <#runeStrAtPos,string,Natural>`_ + ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_ + fastRuneAt(toOa(s), runeOffset(s, pos), result, false) + +proc runeStrAtPos*(s: string, pos: Natural): string {.inline.} = + ## Returns the rune at position ``pos`` as UTF8 String. + ## + ## **Beware:** This can lead to unoptimized code and slow execution! + ## Most problems can be solved more efficiently by using an iterator + ## or conversion to a seq of Rune. + ## + ## See also: + ## * `runeAt proc <#runeAt,string,Natural>`_ + ## * `runeAtPos proc <#runeAtPos,string,int>`_ + ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_ + let o = runeOffset(s, pos) + substr(s.toOpenArray(o, (o+runeLenAt(s, o)-1))) + +proc runeSubStr*(s: string, pos: int, len: int = int.high): string {.inline.} = + ## Returns the UTF-8 substring starting at code point ``pos`` + ## with ``len`` code points. + ## + ## If ``pos`` or ``len`` is negative they count from + ## the end of the string. If ``len`` is not given it means the longest + ## possible string. + runnableExamples: + let s = "Hänsel ««: 10,00€" + doAssert(runeSubStr(s, 0, 2) == "Hä") + doAssert(runeSubStr(s, 10, 1) == ":") + doAssert(runeSubStr(s, -6) == "10,00€") + doAssert(runeSubStr(s, 10) == ": 10,00€") + doAssert(runeSubStr(s, 12, 5) == "10,00") + doAssert(runeSubStr(s, -6, 3) == "10,") + runeSubStr(toOa(s), pos, len) + + +proc isAlpha*(s: string): bool {.noSideEffect, inline.} = + ## Returns true if ``s`` contains all alphabetic runes. + runnableExamples: + let a = "añyóng" + doAssert a.isAlpha + isAlpha(toOa(s)) + +proc isSpace*(s: string): bool {.noSideEffect, inline.} = + ## Returns true if ``s`` contains all whitespace runes. + runnableExamples: + let a = "\t\l \v\r\f" + doAssert a.isSpace + isSpace(toOa(s)) + + +proc toUpper*(s: string): string {.noSideEffect, inline.} = + ## Converts ``s`` into upper-case runes. + runnableExamples: + doAssert toUpper("abγ") == "ABΓ" + toUpper(toOa(s)) + +proc toLower*(s: string): string {.noSideEffect, inline.} = + ## Converts ``s`` into lower-case runes. + runnableExamples: + doAssert toLower("ABΓ") == "abγ" + toLower(toOa(s)) + +proc swapCase*(s: string): string {.noSideEffect, inline.} = + ## Swaps the case of runes in ``s``. + ## + ## Returns a new string such that the cases of all runes + ## are swapped if possible. + runnableExamples: + doAssert swapCase("Αlpha Βeta Γamma") == "αLPHA βETA γAMMA" + swapCase(toOa(s)) + +proc capitalize*(s: string): string {.noSideEffect.} = + ## Converts the first character of ``s`` into an upper-case rune. + runnableExamples: + doAssert capitalize("βeta") == "Βeta" + capitalize(toOa(s)) + + +proc translate*(s: string, replacements: proc(key: string): string): string {.effectsOf: replacements, inline.} = + ## Translates words in a string using the ``replacements`` proc to substitute + ## words inside ``s`` with their replacements. + ## + ## ``replacements`` is any proc that takes a word and returns + ## a new word to fill it's place. + runnableExamples: + proc wordToNumber(s: string): string = + case s + of "one": "1" + of "two": "2" + else: s + let a = "one two three four" + doAssert a.translate(wordToNumber) == "1 2 three four" + translate(toOa(s), replacements) + +proc title*(s: string): string {.noSideEffect, inline.} = + ## Converts ``s`` to a unicode title. + ## + ## Returns a new string such that the first character + ## in each word inside ``s`` is capitalized. + runnableExamples: + doAssert title("αlpha βeta γamma") == "Αlpha Βeta Γamma" + title(toOa(s)) + + +iterator runes*(s: string): Rune = + ## Iterates over any rune of the string ``s`` returning runes. + for rune in runes(toOa(s)): + yield rune + +iterator utf8*(s: string): string = + ## Iterates over any rune of the string ``s`` returning utf8 values. + ## + ## See also: + ## * `validateUtf8 proc <#validateUtf8,string>`_ + ## * `toUTF8 proc <#toUTF8,Rune>`_ + ## * `$ proc <#$,Rune>`_ alias for `toUTF8` + ## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_ + for str in utf8(toOa(s)): + yield str + +proc toRunes*(s: string): seq[Rune] {.inline.} = + ## Obtains a sequence containing the Runes in ``s``. + ## + ## See also: + ## * `$ proc <#$,Rune>`_ for a reverse operation + runnableExamples: + let a = toRunes("aáä") + doAssert a == @["a".runeAt(0), "á".runeAt(0), "ä".runeAt(0)] + toRunes(toOa(s)) + +proc cmpRunesIgnoreCase*(a, b: string): int {.inline.} = + ## Compares two UTF-8 strings and ignores the case. Returns: + ## + ## | `0` if a == b + ## | `< 0` if a < b + ## | `> 0` if a > b + cmpRunesIgnoreCase(a.toOa(), b.toOa()) + +proc reversed*(s: string): string {.inline.} = + ## Returns the reverse of ``s``, interpreting it as runes. + ## + ## Unicode combining characters are correctly interpreted as well. + runnableExamples: + assert reversed("Reverse this!") == "!siht esreveR" + assert reversed("先秦兩漢") == "漢兩秦先" + assert reversed("as⃝df̅") == "f̅ds⃝a" + assert reversed("a⃞b⃞c⃞") == "c⃞b⃞a⃞" + reversed(toOa(s)) + +proc graphemeLen*(s: string; i: Natural): Natural {.inline.} = + ## The number of bytes belonging to byte index ``s[i]``, + ## including following combining code unit. + runnableExamples: + let a = "añyóng" + doAssert a.graphemeLen(1) == 2 ## ñ + doAssert a.graphemeLen(2) == 1 + doAssert a.graphemeLen(4) == 2 ## ó + graphemeLen(toOa(s), i) + +proc lastRune*(s: string; last: int): (Rune, int) {.inline.} = + ## Length of the last rune in ``s[0..last]``. Returns the rune and its length + ## in bytes. + lastRune(toOa(s), last) + +iterator split*(s: string, seps: openArray[Rune] = unicodeSpaces, + maxsplit: int = -1): string = + ## Splits the unicode string ``s`` into substrings using a group of separators. + ## + ## Substrings are separated by a substring containing only ``seps``. + runnableExamples: + import std/sequtils + + assert toSeq("hÃllo\lthis\lis an\texample\l是".split) == + @["hÃllo", "this", "is", "an", "example", "是"] + + # And the following code splits the same string using a sequence of Runes. + assert toSeq(split("añyóng:hÃllo;是$example", ";:$".toRunes)) == + @["añyóng", "hÃllo", "是", "example"] + + # example with a `Rune` separator and unused one `;`: + assert toSeq(split("ab是de:f:", ";:是".toRunes)) == @["ab", "de", "f", ""] + + # Another example that splits a string containing a date. + let date = "2012-11-20T22:08:08.398990" + + assert toSeq(split(date, " -:T".toRunes)) == + @["2012", "11", "20", "22", "08", "08.398990"] + + splitCommon(toOa(s), seps, maxsplit) + +iterator splitWhitespace*(s: string): string = + ## Splits a unicode string at whitespace runes. + splitCommon(s.toOa(), unicodeSpaces, -1) + + +proc splitWhitespace*(s: string): seq[string] {.noSideEffect, inline.}= + ## The same as the `splitWhitespace <#splitWhitespace.i,string>`_ + ## iterator, but is a proc that returns a sequence of substrings. + accResult(splitWhitespace(toOa(s))) + +iterator split*(s: string, sep: Rune, maxsplit: int = -1): string = + ## Splits the unicode string ``s`` into substrings using a single separator. + ## Substrings are separated by the rune ``sep``. + runnableExamples: + import std/sequtils + + assert toSeq(split(";;hÃllo;this;is;an;;example;;;是", ";".runeAt(0))) == + @["", "", "hÃllo", "this", "is", "an", "", "example", "", "", "是"] + + splitCommon(toOa(s), sep, maxsplit) + +proc split*(s: string, seps: openArray[Rune] = unicodeSpaces, maxsplit: int = -1): + seq[string] {.noSideEffect, inline.} = + ## The same as the `split iterator <#split.i,string,openArray[Rune],int>`_, + ## but is a proc that returns a sequence of substrings. + accResult(split(toOa(s), seps, maxsplit)) + +proc split*(s: string, sep: Rune, maxsplit: int = -1): seq[string] {.noSideEffect, inline.} = + ## The same as the `split iterator <#split.i,string,Rune,int>`_, but is a proc + ## that returns a sequence of substrings. + accResult(split(toOa(s), sep, maxsplit)) + +proc strip*(s: string, leading = true, trailing = true, + runes: openArray[Rune] = unicodeSpaces): string {.noSideEffect, inline.} = + ## Strips leading or trailing ``runes`` from ``s`` and returns + ## the resulting string. + ## + ## If ``leading`` is true (default), leading ``runes`` are stripped. + ## If ``trailing`` is true (default), trailing ``runes`` are stripped. + ## If both are false, the string is returned unchanged. + runnableExamples: + let a = "\táñyóng " + doAssert a.strip == "áñyóng" + doAssert a.strip(leading = false) == "\táñyóng" + doAssert a.strip(trailing = false) == "áñyóng " + strip(toOa(s), leading, trailing, runes) + + +proc align*(s: string, count: Natural, padding = ' '.Rune): string {.noSideEffect, inline.} = + ## Aligns a unicode string ``s`` with ``padding``, so that it has a rune-length + ## of ``count``. + ## + ## ``padding`` characters (by default spaces) are added before ``s`` resulting in + ## right alignment. If ``s.runelen >= count``, no spaces are added and ``s`` is + ## returned unchanged. If you need to left align a string use the `alignLeft + ## proc <#alignLeft,string,Natural>`_. + runnableExamples: + assert align("abc", 4) == " abc" + assert align("a", 0) == "a" + assert align("1232", 6) == " 1232" + assert align("1232", 6, '#'.Rune) == "##1232" + assert align("Åge", 5) == " Åge" + assert align("×", 4, '_'.Rune) == "___×" + align(toOa(s), count, padding) + +proc alignLeft*(s: string, count: Natural, padding = ' '.Rune): string {.noSideEffect, inline.} = + ## Left-aligns a unicode string ``s`` with ``padding``, so that it has a + ## rune-length of ``count``. + ## + ## ``padding`` characters (by default spaces) are added after ``s`` resulting in + ## left alignment. If ``s.runelen >= count``, no spaces are added and ``s`` is + ## returned unchanged. If you need to right align a string use the `align + ## proc <#align,string,Natural>`_. + runnableExamples: + assert alignLeft("abc", 4) == "abc " + assert alignLeft("a", 0) == "a" + assert alignLeft("1232", 6) == "1232 " + assert alignLeft("1232", 6, '#'.Rune) == "1232##" + assert alignLeft("Åge", 5) == "Åge " + assert alignLeft("×", 4, '_'.Rune) == "×___" + alignLeft(toOa(s), count, padding) diff --git a/lib/pure/unidecode/gen.py b/lib/pure/unidecode/gen.py index 8da0136ff..2fb69f7b2 100644 --- a/lib/pure/unidecode/gen.py +++ b/lib/pure/unidecode/gen.py @@ -1,26 +1,30 @@ -#! usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- # Generates the unidecode.dat module # (c) 2010 Andreas Rumpf from unidecode import unidecode +try: + import warnings + warnings.simplefilter("ignore") +except ImportError: + pass -def main2(): - data = [] - for x in xrange(128, 0xffff + 1): - u = eval("u'\u%04x'" % x) - - val = unidecode(u) - data.append(val) - - - f = open("unidecode.dat", "wb+") - for d in data: - f.write("%s\n" % d) - f.close() +def main(): + f = open("unidecode.dat", "wb+") + for x in range(128, 0xffff + 1): + u = eval("u'\\u%04x'" % x) + val = unidecode(u) -main2() + # f.write("%x | " % x) + if x == 0x2028: # U+2028 = LINE SEPARATOR + val = "" + elif x == 0x2029: # U+2029 = PARAGRAPH SEPARATOR + val = "" + f.write("%s\n" % val) + f.close() +main() diff --git a/lib/pure/unidecode/unidecode.dat b/lib/pure/unidecode/unidecode.dat index 9dff0a4a9..5f4c075d8 100644 --- a/lib/pure/unidecode/unidecode.dat +++ b/lib/pure/unidecode/unidecode.dat @@ -58,9 +58,9 @@ P 1 o >> -1/4 -1/2 -3/4 + 1/4 + 1/2 + 3/4 ? A A @@ -91,7 +91,7 @@ U U U U -U +Y Th ss a @@ -177,7 +177,7 @@ i I i IJ - +ij J j K @@ -368,7 +368,7 @@ ZH zh j DZ -D +Dz dz G g @@ -414,8 +414,8 @@ Y y H h -[?] -[?] +N +d OU ou Z @@ -434,34 +434,34 @@ O o Y y +l +n +t +j +db +qp +A +C +c +L +T +s +z [?] [?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] +B +U +^ +E +e +J +j +q +q +R +r +Y +y a a a @@ -503,13 +503,13 @@ o OE O F -R -R -R -R r r -R +r +r +r +r +r R R s @@ -519,12 +519,12 @@ S S t t -U +u U v ^ -W -Y +w +y Y z z @@ -556,9 +556,9 @@ ls lz WW ]] -[?] -[?] -k +h +h +h h j r @@ -737,19 +737,19 @@ V -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] +a +e +i +o +u +c +d +h +m +r +t +v +x [?] [?] [?] @@ -1287,7 +1287,7 @@ o f ew [?] -. +: - [?] [?] @@ -1340,9 +1340,9 @@ o u ' +- - - +| : @@ -7402,41 +7402,41 @@ bh +b +d +f +m +n +p +r +r +s +t +z +g +p +b +d +f +g +k +l +m +n +p +r +s - - - - - - - - - - - - - - - - - - - - - - - - - - - +v +x +z @@ -7708,7 +7708,7 @@ a S [?] [?] -[?] +Ss [?] A a @@ -8109,9 +8109,6 @@ _ - - - %0 %00 @@ -8136,19 +8133,23 @@ _ / -[ ]- -[?] +?? ?! !? 7 PP (] [) +* [?] [?] [?] +% +~ [?] [?] [?] +'''' [?] [?] [?] @@ -8156,12 +8157,8 @@ PP [?] [?] [?] -[?] -[?] -[?] -[?] -[?] -[?] + + [?] [?] [?] @@ -8178,7 +8175,7 @@ PP 0 - +i 4 @@ -8209,19 +8206,19 @@ n ( ) [?] +a +e +o +x [?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] +h +k +l +m +n +p +s +t [?] [?] [?] @@ -8237,26 +8234,26 @@ Rs W NS D -EU +EUR K T Dr -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] +Pf +P +G +A +UAH +C| +L +Sm +T +Rs +L +M +m +R +l +BTC [?] [?] [?] @@ -8294,6 +8291,7 @@ Dr [?] + [?] [?] [?] @@ -8319,63 +8317,67 @@ Dr [?] [?] [?] -[?] - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + a/c + a/s +C + c/o + c/u +g +H +H +H +h +I +I +L +l +N +No. +P +Q +R +R +R +(sm) +TEL +(tm) +Z +Z +K +A +B +C +e +e +E +F +F +M +o +i +FAX @@ -8385,25 +8387,20 @@ Dr [?] [?] [?] +D +d +e +i +j [?] [?] [?] [?] +F [?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] + 1/7 + 1/9 + 1/10 1/3 2/3 1/5 @@ -8458,7 +8455,7 @@ D) [?] [?] [?] -[?] + 0/3 [?] [?] [?] @@ -8595,8 +8592,12 @@ V [?] [?] [?] +- [?] [?] +/ +\ +* [?] [?] [?] @@ -8608,6 +8609,7 @@ V [?] [?] [?] +| [?] [?] [?] @@ -8626,11 +8628,13 @@ V [?] [?] [?] +: [?] [?] [?] [?] [?] +~ [?] [?] [?] @@ -8670,17 +8674,10 @@ V [?] [?] [?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] +<= +>= +<= +>= [?] [?] [?] @@ -8836,6 +8833,7 @@ V [?] [?] [?] +^ [?] [?] [?] @@ -8873,9 +8871,8 @@ V [?] [?] [?] -[?] -[?] -[?] +< +> [?] [?] [?] @@ -9185,166 +9182,166 @@ V [?] [?] [?] - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] - +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +(1) +(2) +(3) +(4) +(5) +(6) +(7) +(8) +(9) +(10) +(11) +(12) +(13) +(14) +(15) +(16) +(17) +(18) +(19) +(20) +1. +2. +3. +4. +5. +6. +7. +8. +9. +10. +11. +12. +13. +14. +15. +16. +17. +18. +19. +20. +(a) +(b) +(c) +(d) +(e) +(f) +(g) +(h) +(i) +(j) +(k) +(l) +(m) +(n) +(o) +(p) +(q) +(r) +(s) +(t) +(u) +(v) +(w) +(x) +(y) +(z) +A +B +C +D +E +F +G +H +I +J +K +L +M +N +O +P +Q +R +S +T +U +V +W +X +Y +Z +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +0 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +0 - - | @@ -9712,7 +9709,7 @@ O - +# [?] @@ -9906,6 +9903,7 @@ O +* @@ -9944,8 +9942,7 @@ O - - +| @@ -9955,7 +9952,7 @@ O [?] [?] - +! @@ -10087,10 +10084,10 @@ O [?] [?] [?] +[ [?] -[?] -[?] -[?] +< +> [?] [?] [?] @@ -10500,6 +10497,8 @@ y +{ +} @@ -10739,6 +10738,9 @@ y +::= +== +=== @@ -11228,27 +11230,22 @@ y +L +l +L +P +R +a +t +H +h +K +k +Z +z - - - - - - - - - - - - - - - - - - - - +M +A @@ -12754,21 +12751,21 @@ H [?] [?] [?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 (g) (n) (d) @@ -12850,21 +12847,21 @@ KIS (Zi) (Xie) (Ye) -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] -[?] +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 1M 2M 3M @@ -12877,10 +12874,10 @@ KIS 10M 11M 12M -[?] -[?] -[?] -[?] +Hg +erg +eV +LTD a i u @@ -13042,16 +13039,16 @@ watt 22h 23h 24h -HPA +hPa da AU bar oV pc -[?] -[?] -[?] -[?] +dm +dm^2 +dm^3 +IU Heisei Syouwa Taisyou @@ -13092,7 +13089,7 @@ mm^2 cm^2 m^2 km^2 -mm^4 +mm^3 cm^3 m^3 km^3 @@ -13184,7 +13181,7 @@ Wb 29d 30d 31d - +gal @@ -19841,7 +19838,7 @@ Wb [?] [?] -[?] +Yi Ding Kao Qi diff --git a/lib/pure/unidecode/unidecode.nim b/lib/pure/unidecode/unidecode.nim index 798eef5d0..9affc53f6 100644 --- a/lib/pure/unidecode/unidecode.nim +++ b/lib/pure/unidecode/unidecode.nim @@ -7,68 +7,58 @@ # distribution, for details about the copyright. # -## This module is based on Python's Unidecode module by Tomaz Solc, -## which in turn is based on the ``Text::Unidecode`` Perl module by -## Sean M. Burke -## (http://search.cpan.org/~sburke/Text-Unidecode-0.04/lib/Text/Unidecode.pm ). +## This module is based on Python's [Unidecode](https://pypi.org/project/Unidecode/) +## module by Tomaz Solc, which in turn is based on the +## [Text::Unidecode](https://metacpan.org/pod/Text::Unidecode) +## Perl module by Sean M. Burke. ## -## It provides a single proc that does Unicode to ASCII transliterations: -## It finds the sequence of ASCII characters that is the closest approximation -## to the Unicode string. +## It provides a `unidecode proc <#unidecode,string>`_ that does +## Unicode to ASCII transliterations: It finds the sequence of ASCII characters +## that is the closest approximation to the Unicode string. ## -## For example, the closest to string "Äußerst" in ASCII is "Ausserst". Some -## information is lost in this transformation, of course, since several Unicode -## strings can be transformed in the same ASCII representation. So this is a -## strictly one-way transformation. However a human reader will probably -## still be able to guess what original string was meant from the context. +## For example, the closest to string "Äußerst" in ASCII is "Ausserst". Some +## information is lost in this transformation, of course, since several Unicode +## strings can be transformed to the same ASCII representation. So this is a +## strictly one-way transformation. However, a human reader will probably +## still be able to guess from the context, what the original string was. ## -## This module needs the data file "unidecode.dat" to work: You can either -## ship this file with your application and initialize this module with the -## `loadUnidecodeTable` proc or you can define the ``embedUnidecodeTable`` -## symbol to embed the file as a resource into your application. +## This module needs the data file `unidecode.dat` to work: This file is +## embedded as a resource into your application by default. You can also +## define the symbol `--define:noUnidecodeTable` during compile time and +## use the `loadUnidecodeTable proc <#loadUnidecodeTable>`_ to initialize +## this module. -import unicode +import std/unicode + +when not defined(noUnidecodeTable): + import std/strutils -when defined(embedUnidecodeTable): - import strutils - const translationTable = splitLines(slurp"unidecode/unidecode.dat") else: # shared is fine for threading: var translationTable: seq[string] proc loadUnidecodeTable*(datafile = "unidecode.dat") = - ## loads the datafile that `unidecode` to work. Unless this module is - ## compiled with the ``embedUnidecodeTable`` symbol defined, this needs - ## to be called by the main thread before any thread can make a call - ## to `unidecode`. - when not defined(embedUnidecodeTable): + ## Loads the datafile that `unidecode <#unidecode,string>`_ needs to work. + ## This is only required if the module was compiled with the + ## `--define:noUnidecodeTable` switch. This needs to be called by the + ## main thread before any thread can make a call to `unidecode`. + when defined(noUnidecodeTable): newSeq(translationTable, 0xffff) var i = 0 for line in lines(datafile): - translationTable[i] = line.string + translationTable[i] = line inc(i) -proc unidecode*(s: string): string = +proc unidecode*(s: string): string = ## Finds the sequence of ASCII characters that is the closest approximation ## to the UTF-8 string `s`. - ## - ## Example: - ## - ## ..code-block:: nim - ## - ## unidecode("\x53\x17\x4E\xB0") - ## - ## Results in: "Bei Jing" - ## - assert(not isNil(translationTable)) + runnableExamples: + doAssert unidecode("北京") == "Bei Jing " + doAssert unidecode("Äußerst") == "Ausserst" + result = "" - for r in runes(s): + for r in runes(s): var c = int(r) if c <=% 127: add(result, chr(c)) - elif c <% translationTable.len: add(result, translationTable[c-128]) - -when isMainModule: - loadUnidecodeTable("lib/pure/unidecode/unidecode.dat") - echo unidecode("Äußerst") - + elif c <% translationTable.len: add(result, translationTable[c - 128]) diff --git a/lib/pure/unittest.nim b/lib/pure/unittest.nim index 3bf4724b9..cfb762258 100644 --- a/lib/pure/unittest.nim +++ b/lib/pure/unittest.nim @@ -9,13 +9,76 @@ ## :Author: Zahary Karadjov ## -## This module implements boilerplate to make testing easy. +## This module implements boilerplate to make unit testing easy. ## -## Example: +## The test status and name is printed after any output or traceback. ## -## .. code:: nim +## Tests can be nested, however failure of a nested test will not mark the +## parent test as failed. Setup and teardown are inherited. Setup can be +## overridden locally. ## +## Compiled test files as well as `nim c -r <testfile.nim>` +## exit with 0 for success (no failed tests) or 1 for failure. +## +## Testament +## ========= +## +## Instead of `unittest`, please consider using +## `the Testament tool <testament.html>`_ which offers process isolation for your tests. +## +## Alternatively using `when isMainModule: doAssert conditionHere` is usually a +## much simpler solution for testing purposes. +## +## Running a single test +## ===================== +## +## Specify the test name as a command line argument. +## +## ```cmd +## nim c -r test "my test name" "another test" +## ``` +## +## Multiple arguments can be used. +## +## Running a single test suite +## =========================== +## +## Specify the suite name delimited by `"::"`. +## +## ```cmd +## nim c -r test "my test name::" +## ``` +## +## Selecting tests by pattern +## ========================== +## +## A single ``"*"`` can be used for globbing. +## +## Delimit the end of a suite name with `"::"`. +## +## Tests matching **any** of the arguments are executed. +## +## ```cmd +## nim c -r test fast_suite::mytest1 fast_suite::mytest2 +## nim c -r test "fast_suite::mytest*" +## nim c -r test "auth*::" "crypto::hashing*" +## # Run suites starting with 'bug #' and standalone tests starting with '#' +## nim c -r test 'bug #*::' '::#*' +## ``` +## +## Examples +## ======== +## +## ```nim ## suite "description for this stuff": +## echo "suite setup: run once before the tests" +## +## setup: +## echo "run before each test" +## +## teardown: +## echo "run after each test" +## ## test "essential truths": ## # give up and stop if this fails ## require(true) @@ -28,210 +91,699 @@ ## ## test "out of bounds error is thrown on bad access": ## let v = @[1, 2, 3] # you can do initialization here -## expect(IndexError): +## expect(IndexDefect): ## discard v[4] +## +## echo "suite teardown: run once after the tests" +## ``` +## +## Limitations/Bugs +## ================ +## Since `check` will rewrite some expressions for supporting checkpoints +## (namely assigns expressions to variables), some type conversions are not supported. +## For example `check 4.0 == 2 + 2` won't work. But `doAssert 4.0 == 2 + 2` works. +## Make sure both sides of the operator (such as `==`, `>=` and so on) have the same type. +## + +import std/private/since +import std/exitprocs -import - macros +when defined(nimPreviewSlimSystem): + import std/assertions + +import std/[macros, strutils, streams, times, sets, sequtils] when declared(stdout): - import os + import std/os -when not defined(ECMAScript): - import terminal - system.addQuitProc(resetAttributes) +const useTerminal = not defined(js) -type - TestStatus* = enum OK, FAILED - OutputLevel* = enum PRINT_ALL, PRINT_FAILURES, PRINT_NONE +when useTerminal: + import std/terminal -{.deprecated: [TTestStatus: TestStatus, TOutputLevel: OutputLevel]} +type + TestStatus* = enum ## The status of a test when it is done. + OK, + FAILED, + SKIPPED + + OutputLevel* = enum ## The output verbosity of the tests. + PRINT_ALL, ## Print as much as possible. + PRINT_FAILURES, ## Print only the failed tests. + PRINT_NONE ## Print nothing. + + TestResult* = object + suiteName*: string + ## Name of the test suite that contains this test case. + ## Can be ``nil`` if the test case is not in a suite. + testName*: string + ## Name of the test case + status*: TestStatus + + OutputFormatter* = ref object of RootObj + + ConsoleOutputFormatter* = ref object of OutputFormatter + colorOutput: bool + ## Have test results printed in color. + ## Default is `auto` depending on `isatty(stdout)`, or override it with + ## `-d:nimUnittestColor:auto|on|off`. + ## + ## Deprecated: Setting the environment variable `NIMTEST_COLOR` to `always` + ## or `never` changes the default for the non-js target to true or false respectively. + ## Deprecated: the environment variable `NIMTEST_NO_COLOR`, when set, changes the + ## default to true, if `NIMTEST_COLOR` is undefined. + outputLevel: OutputLevel + ## Set the verbosity of test results. + ## Default is `PRINT_ALL`, or override with: + ## `-d:nimUnittestOutputLevel:PRINT_ALL|PRINT_FAILURES|PRINT_NONE`. + ## + ## Deprecated: the `NIMTEST_OUTPUT_LVL` environment variable is set for the non-js target. + isInSuite: bool + isInTest: bool + + JUnitOutputFormatter* = ref object of OutputFormatter + stream: Stream + testErrors: seq[string] + testStartTime: float + testStackTrace: string var - abortOnError* {.threadvar.}: bool - outputLevel* {.threadvar.}: OutputLevel - colorOutput* {.threadvar.}: bool + abortOnError* {.threadvar.}: bool ## Set to true in order to quit + ## immediately on fail. Default is false, + ## or override with `-d:nimUnittestAbortOnError:on|off`. + ## + ## Deprecated: can also override depending on whether + ## `NIMTEST_ABORT_ON_ERROR` environment variable is set. checkpoints {.threadvar.}: seq[string] + formatters {.threadvar.}: seq[OutputFormatter] + testsFilters {.threadvar.}: HashSet[string] + disabledParamFiltering {.threadvar.}: bool -checkpoints = @[] - -template testSetupIMPL*: stmt {.immediate, dirty.} = discard -template testTeardownIMPL*: stmt {.immediate, dirty.} = discard - -proc shouldRun(testName: string): bool = - result = true - -template suite*(name: expr, body: stmt): stmt {.immediate, dirty.} = - block: - template setup*(setupBody: stmt): stmt {.immediate, dirty.} = - template testSetupIMPL: stmt {.immediate, dirty.} = setupBody - - template teardown*(teardownBody: stmt): stmt {.immediate, dirty.} = - template testTeardownIMPL: stmt {.immediate, dirty.} = teardownBody +const + outputLevelDefault = PRINT_ALL + nimUnittestOutputLevel {.strdefine.} = $outputLevelDefault + nimUnittestColor {.strdefine.} = "auto" ## auto|on|off + nimUnittestAbortOnError {.booldefine.} = false - body +template deprecateEnvVarHere() = + # xxx issue a runtime warning to deprecate this envvar. + discard -proc testDone(name: string, s: TestStatus) = - if s == FAILED: - programResult += 1 - - if outputLevel != PRINT_NONE and (outputLevel == PRINT_ALL or s == FAILED): - template rawPrint() = echo("[", $s, "] ", name) - when not defined(ECMAScript): - if colorOutput and not defined(ECMAScript): - var color = (if s == OK: fgGreen else: fgRed) - styledEcho styleBright, color, "[", $s, "] ", fgWhite, name +abortOnError = nimUnittestAbortOnError +when declared(stdout): + if existsEnv("NIMTEST_ABORT_ON_ERROR"): + deprecateEnvVarHere() + abortOnError = true + +method suiteStarted*(formatter: OutputFormatter, suiteName: string) {.base, gcsafe.} = + discard +method testStarted*(formatter: OutputFormatter, testName: string) {.base, gcsafe.} = + discard +method failureOccurred*(formatter: OutputFormatter, checkpoints: seq[string], + stackTrace: string) {.base, gcsafe.} = + ## ``stackTrace`` is provided only if the failure occurred due to an exception. + ## ``checkpoints`` is never ``nil``. + discard +method testEnded*(formatter: OutputFormatter, testResult: TestResult) {.base, gcsafe.} = + discard +method suiteEnded*(formatter: OutputFormatter) {.base, gcsafe.} = + discard + +proc addOutputFormatter*(formatter: OutputFormatter) = + formatters.add(formatter) + +proc delOutputFormatter*(formatter: OutputFormatter) = + keepIf(formatters, proc (x: OutputFormatter): bool = + x != formatter) + +proc resetOutputFormatters* {.since: (1, 1).} = + formatters = @[] + +proc newConsoleOutputFormatter*(outputLevel: OutputLevel = outputLevelDefault, + colorOutput = true): ConsoleOutputFormatter = + ConsoleOutputFormatter( + outputLevel: outputLevel, + colorOutput: colorOutput + ) + +proc colorOutput(): bool = + let color = nimUnittestColor + case color + of "auto": + when declared(stdout): result = isatty(stdout) + else: result = false + of "on": result = true + of "off": result = false + else: raiseAssert $color + + when declared(stdout): + if existsEnv("NIMTEST_COLOR"): + deprecateEnvVarHere() + let colorEnv = getEnv("NIMTEST_COLOR") + if colorEnv == "never": + result = false + elif colorEnv == "always": + result = true + elif existsEnv("NIMTEST_NO_COLOR"): + deprecateEnvVarHere() + result = false + +proc defaultConsoleFormatter*(): ConsoleOutputFormatter = + var colorOutput = colorOutput() + var outputLevel = nimUnittestOutputLevel.parseEnum[:OutputLevel] + when declared(stdout): + const a = "NIMTEST_OUTPUT_LVL" + if existsEnv(a): + # xxx issue a warning to deprecate this envvar. + outputLevel = getEnv(a).parseEnum[:OutputLevel] + result = newConsoleOutputFormatter(outputLevel, colorOutput) + +method suiteStarted*(formatter: ConsoleOutputFormatter, suiteName: string) = + template rawPrint() = echo("\n[Suite] ", suiteName) + when useTerminal: + if formatter.colorOutput: + styledEcho styleBright, fgBlue, "\n[Suite] ", resetStyle, suiteName + else: rawPrint() + else: rawPrint() + formatter.isInSuite = true + +method testStarted*(formatter: ConsoleOutputFormatter, testName: string) = + formatter.isInTest = true + +method failureOccurred*(formatter: ConsoleOutputFormatter, + checkpoints: seq[string], stackTrace: string) = + if stackTrace.len > 0: + echo stackTrace + let prefix = if formatter.isInSuite: " " else: "" + for msg in items(checkpoints): + echo prefix, msg + +method testEnded*(formatter: ConsoleOutputFormatter, testResult: TestResult) = + formatter.isInTest = false + + if formatter.outputLevel != OutputLevel.PRINT_NONE and + (formatter.outputLevel == OutputLevel.PRINT_ALL or testResult.status == TestStatus.FAILED): + let prefix = if testResult.suiteName.len > 0: " " else: "" + template rawPrint() = echo(prefix, "[", $testResult.status, "] ", + testResult.testName) + when useTerminal: + if formatter.colorOutput: + var color = case testResult.status + of TestStatus.OK: fgGreen + of TestStatus.FAILED: fgRed + of TestStatus.SKIPPED: fgYellow + styledEcho styleBright, color, prefix, "[", $testResult.status, "] ", + resetStyle, testResult.testName else: rawPrint() else: rawPrint() -template test*(name: expr, body: stmt): stmt {.immediate, dirty.} = - bind shouldRun, checkpoints, testDone +method suiteEnded*(formatter: ConsoleOutputFormatter) = + formatter.isInSuite = false + +proc xmlEscape(s: string): string = + result = newStringOfCap(s.len) + for c in items(s): + case c: + of '<': result.add("<") + of '>': result.add(">") + of '&': result.add("&") + of '"': result.add(""") + of '\'': result.add("'") + else: + if ord(c) < 32: + result.add("&#" & $ord(c) & ';') + else: + result.add(c) + +proc newJUnitOutputFormatter*(stream: Stream): JUnitOutputFormatter = + ## Creates a formatter that writes report to the specified stream in + ## JUnit format. + ## The ``stream`` is NOT closed automatically when the test are finished, + ## because the formatter has no way to know when all tests are finished. + ## You should invoke formatter.close() to finalize the report. + result = JUnitOutputFormatter( + stream: stream, + testErrors: @[], + testStackTrace: "", + testStartTime: 0.0 + ) + stream.writeLine("<?xml version=\"1.0\" encoding=\"UTF-8\"?>") + stream.writeLine("<testsuites>") + +proc close*(formatter: JUnitOutputFormatter) = + ## Completes the report and closes the underlying stream. + formatter.stream.writeLine("</testsuites>") + formatter.stream.close() + +method suiteStarted*(formatter: JUnitOutputFormatter, suiteName: string) = + formatter.stream.writeLine("\t<testsuite name=\"$1\">" % xmlEscape(suiteName)) + +method testStarted*(formatter: JUnitOutputFormatter, testName: string) = + formatter.testErrors.setLen(0) + formatter.testStackTrace.setLen(0) + formatter.testStartTime = epochTime() + +method failureOccurred*(formatter: JUnitOutputFormatter, + checkpoints: seq[string], stackTrace: string) = + ## ``stackTrace`` is provided only if the failure occurred due to an exception. + ## ``checkpoints`` is never ``nil``. + formatter.testErrors.add(checkpoints) + if stackTrace.len > 0: + formatter.testStackTrace = stackTrace + +method testEnded*(formatter: JUnitOutputFormatter, testResult: TestResult) = + let time = epochTime() - formatter.testStartTime + let timeStr = time.formatFloat(ffDecimal, precision = 8) + formatter.stream.writeLine("\t\t<testcase name=\"$#\" time=\"$#\">" % [ + xmlEscape(testResult.testName), timeStr]) + case testResult.status + of TestStatus.OK: + discard + of TestStatus.SKIPPED: + formatter.stream.writeLine("<skipped />") + of TestStatus.FAILED: + let failureMsg = if formatter.testStackTrace.len > 0 and + formatter.testErrors.len > 0: + xmlEscape(formatter.testErrors[^1]) + elif formatter.testErrors.len > 0: + xmlEscape(formatter.testErrors[0]) + else: "The test failed without outputting an error" + + var errs = "" + if formatter.testErrors.len > 1: + var startIdx = if formatter.testStackTrace.len > 0: 0 else: 1 + var endIdx = if formatter.testStackTrace.len > 0: + formatter.testErrors.len - 2 + else: formatter.testErrors.len - 1 + + for errIdx in startIdx..endIdx: + if errs.len > 0: + errs.add("\n") + errs.add(xmlEscape(formatter.testErrors[errIdx])) + + if formatter.testStackTrace.len > 0: + formatter.stream.writeLine("\t\t\t<error message=\"$#\">$#</error>" % [ + failureMsg, xmlEscape(formatter.testStackTrace)]) + if errs.len > 0: + formatter.stream.writeLine("\t\t\t<system-err>$#</system-err>" % errs) + else: + formatter.stream.writeLine("\t\t\t<failure message=\"$#\">$#</failure>" % + [failureMsg, errs]) + + formatter.stream.writeLine("\t\t</testcase>") + +method suiteEnded*(formatter: JUnitOutputFormatter) = + formatter.stream.writeLine("\t</testsuite>") + +proc glob(matcher, filter: string): bool = + ## Globbing using a single `*`. Empty `filter` matches everything. + if filter.len == 0: + return true + + if not filter.contains('*'): + return matcher == filter + + let beforeAndAfter = filter.split('*', maxsplit = 1) + if beforeAndAfter.len == 1: + # "foo*" + return matcher.startsWith(beforeAndAfter[0]) + + if matcher.len < filter.len - 1: + return false # "12345" should not match "123*345" + + return matcher.startsWith(beforeAndAfter[0]) and matcher.endsWith( + beforeAndAfter[1]) + +proc matchFilter(suiteName, testName, filter: string): bool = + if filter == "": + return true + if testName == filter: + # corner case for tests containing "::" in their name + return true + let suiteAndTestFilters = filter.split("::", maxsplit = 1) + + if suiteAndTestFilters.len == 1: + # no suite specified + let testFilter = suiteAndTestFilters[0] + return glob(testName, testFilter) + + return glob(suiteName, suiteAndTestFilters[0]) and + glob(testName, suiteAndTestFilters[1]) + +proc shouldRun(currentSuiteName, testName: string): bool = + ## Check if a test should be run by matching suiteName and testName against + ## test filters. + if testsFilters.len == 0: + return true + + for f in testsFilters: + if matchFilter(currentSuiteName, testName, f): + return true + + return false + +proc ensureInitialized() = + if formatters.len == 0: + formatters = @[OutputFormatter(defaultConsoleFormatter())] + + if not disabledParamFiltering: + when declared(paramCount): + # Read tests to run from the command line. + for i in 1 .. paramCount(): + testsFilters.incl(paramStr(i)) + +# These two procs are added as workarounds for +# https://github.com/nim-lang/Nim/issues/5549 +proc suiteEnded() = + for formatter in formatters: + formatter.suiteEnded() + +proc testEnded(testResult: TestResult) = + for formatter in formatters: + formatter.testEnded(testResult) + +template suite*(name, body) {.dirty.} = + ## Declare a test suite identified by `name` with optional ``setup`` + ## and/or ``teardown`` section. + ## + ## A test suite is a series of one or more related tests sharing a + ## common fixture (``setup``, ``teardown``). The fixture is executed + ## for EACH test. + ## + ## ```nim + ## suite "test suite for addition": + ## setup: + ## let result = 4 + ## + ## test "2 + 2 = 4": + ## check(2+2 == result) + ## + ## test "(2 + -2) != 4": + ## check(2 + -2 != result) + ## + ## # No teardown needed + ## ``` + ## + ## The suite will run the individual test cases in the order in which + ## they were listed. With default global settings the above code prints: + ## + ## [Suite] test suite for addition + ## [OK] 2 + 2 = 4 + ## [OK] (2 + -2) != 4 + bind formatters, ensureInitialized, suiteEnded + + block: + template setup(setupBody: untyped) {.dirty, used.} = + var testSetupIMPLFlag {.used.} = true + template testSetupIMPL: untyped {.dirty.} = setupBody + + template teardown(teardownBody: untyped) {.dirty, used.} = + var testTeardownIMPLFlag {.used.} = true + template testTeardownIMPL: untyped {.dirty.} = teardownBody + + let testSuiteName {.used.} = name - if shouldRun(name): + ensureInitialized() + try: + for formatter in formatters: + formatter.suiteStarted(name) + body + finally: + suiteEnded() + +proc exceptionTypeName(e: ref Exception): string {.inline.} = + if e == nil: "<foreign exception>" + else: $e.name + +when not declared(setProgramResult): + {.warning: "setProgramResult not available on platform, unittest will not" & + " give failing exit code on test failure".} + template setProgramResult(a: int) = + discard + +template test*(name, body) {.dirty.} = + ## Define a single test case identified by `name`. + ## + ## ```nim + ## test "roses are red": + ## let roses = "red" + ## check(roses == "red") + ## ``` + ## + ## The above code outputs: + ## + ## [OK] roses are red + bind shouldRun, checkpoints, formatters, ensureInitialized, testEnded, exceptionTypeName, setProgramResult + + ensureInitialized() + + if shouldRun(when declared(testSuiteName): testSuiteName else: "", name): checkpoints = @[] - var testStatusIMPL {.inject.} = OK + var testStatusIMPL {.inject.} = TestStatus.OK + for formatter in formatters: + formatter.testStarted(name) + + {.push warning[BareExcept]:off.} try: - testSetupIMPL() + when declared(testSetupIMPLFlag): testSetupIMPL() + when declared(testTeardownIMPLFlag): + defer: testTeardownIMPL() + {.push warning[BareExcept]:on.} body + {.pop.} except: - checkpoint("Unhandled exception: " & getCurrentExceptionMsg()) - echo getCurrentException().getStackTrace() - fail() + let e = getCurrentException() + let eTypeDesc = "[" & exceptionTypeName(e) & "]" + checkpoint("Unhandled exception: " & getCurrentExceptionMsg() & " " & eTypeDesc) + if e == nil: # foreign + fail() + else: + var stackTrace {.inject.} = e.getStackTrace() + fail() finally: - testTeardownIMPL() - testDone name, testStatusIMPL + if testStatusIMPL == TestStatus.FAILED: + setProgramResult 1 + let testResult = TestResult( + suiteName: when declared(testSuiteName): testSuiteName else: "", + testName: name, + status: testStatusIMPL + ) + testEnded(testResult) + checkpoints = @[] + {.pop.} proc checkpoint*(msg: string) = + ## Set a checkpoint identified by `msg`. Upon test failure all + ## checkpoints encountered so far are printed out. Example: + ## + ## ```nim + ## checkpoint("Checkpoint A") + ## check((42, "the Answer to life and everything") == (1, "a")) + ## checkpoint("Checkpoint B") + ## ``` + ## + ## outputs "Checkpoint A" once it fails. checkpoints.add(msg) # TODO: add support for something like SCOPED_TRACE from Google Test template fail* = - bind checkpoints - for msg in items(checkpoints): - # this used to be 'echo' which now breaks due to a bug. XXX will revisit - # this issue later. - stdout.writeln msg - - when not defined(ECMAScript): - if abortOnError: quit(1) - + ## Print out the checkpoints encountered so far and quit if ``abortOnError`` + ## is true. Otherwise, erase the checkpoints and indicate the test has + ## failed (change exit code and test status). This template is useful + ## for debugging, but is otherwise mostly used internally. Example: + ## + ## ```nim + ## checkpoint("Checkpoint A") + ## complicatedProcInThread() + ## fail() + ## ``` + ## + ## outputs "Checkpoint A" before quitting. + bind ensureInitialized, setProgramResult when declared(testStatusIMPL): - testStatusIMPL = FAILED + testStatusIMPL = TestStatus.FAILED else: - programResult += 1 + setProgramResult 1 + + ensureInitialized() + + # var stackTrace: string = nil + for formatter in formatters: + when declared(stackTrace): + formatter.failureOccurred(checkpoints, stackTrace) + else: + formatter.failureOccurred(checkpoints, "") + + if abortOnError: quit(1) checkpoints = @[] -macro check*(conditions: stmt): stmt {.immediate.} = - let checked = callsite()[1] +template skip* = + ## Mark the test as skipped. Should be used directly + ## in case when it is not possible to perform test + ## for reasons depending on outer environment, + ## or certain application logic conditions or configurations. + ## The test code is still executed. + ## ```nim + ## if not isGLContextCreated(): + ## skip() + ## ``` + bind checkpoints + + testStatusIMPL = TestStatus.SKIPPED + checkpoints = @[] + +macro check*(conditions: untyped): untyped = + ## Verify if a statement or a list of statements is true. + ## A helpful error message and set checkpoints are printed out on + ## failure (if ``outputLevel`` is not ``PRINT_NONE``). + runnableExamples: + import std/strutils - var - argsAsgns = newNimNode(nnkStmtList) - argsPrintOuts = newNimNode(nnkStmtList) - counter = 0 + check("AKB48".toLowerAscii() == "akb48") - template asgn(a, value: expr): stmt = + let teams = {'A', 'K', 'B', '4', '8'} + + check: + "AKB48".toLowerAscii() == "akb48" + 'C' notin teams + + let checked = callsite()[1] + + template asgn(a: untyped, value: typed) = var a = value # XXX: we need "var: var" here in order to # preserve the semantics of var params - template print(name, value: expr): stmt = + template print(name: untyped, value: typed) = when compiles(string($value)): checkpoint(name & " was " & $value) - proc inspectArgs(exp: NimNode) = - for i in 1 .. <exp.len: - if exp[i].kind notin nnkLiterals: - inc counter - var arg = newIdentNode(":p" & $counter) - var argStr = exp[i].toStrLit - var paramAst = exp[i] - if exp[i].kind in nnkCallKinds: inspectArgs(exp[i]) - if exp[i].kind == nnkExprEqExpr: - # ExprEqExpr - # Ident !"v" - # IntLit 2 - paramAst = exp[i][1] - argsAsgns.add getAst(asgn(arg, paramAst)) - argsPrintOuts.add getAst(print(argStr, arg)) - if exp[i].kind != nnkExprEqExpr: - exp[i] = arg - else: - exp[i][1] = arg + proc inspectArgs(exp: NimNode): tuple[assigns, check, printOuts: NimNode] = + result.check = copyNimTree(exp) + result.assigns = newNimNode(nnkStmtList) + result.printOuts = newNimNode(nnkStmtList) + + var counter = 0 + + if exp[0].kind in {nnkIdent, nnkOpenSymChoice, nnkClosedSymChoice, nnkSym} and + $exp[0] in ["not", "in", "notin", "==", "<=", + ">=", "<", ">", "!=", "is", "isnot"]: + + for i in 1 ..< exp.len: + if exp[i].kind notin nnkLiterals: + inc counter + let argStr = exp[i].toStrLit + let paramAst = exp[i] + if exp[i].kind == nnkIdent: + result.printOuts.add getAst(print(argStr, paramAst)) + if exp[i].kind in nnkCallKinds + {nnkDotExpr, nnkBracketExpr, nnkPar} and + (exp[i].typeKind notin {ntyTypeDesc} or $exp[0] notin ["is", "isnot"]): + let callVar = newIdentNode(":c" & $counter) + result.assigns.add getAst(asgn(callVar, paramAst)) + result.check[i] = callVar + result.printOuts.add getAst(print(argStr, callVar)) + if exp[i].kind == nnkExprEqExpr: + # ExprEqExpr + # Ident "v" + # IntLit 2 + result.check[i] = exp[i][1] + if exp[i].typeKind notin {ntyTypeDesc}: + let arg = newIdentNode(":p" & $counter) + result.assigns.add getAst(asgn(arg, paramAst)) + result.printOuts.add getAst(print(argStr, arg)) + if exp[i].kind != nnkExprEqExpr: + result.check[i] = arg + else: + result.check[i][1] = arg case checked.kind of nnkCallKinds: - template rewrite(call, lineInfoLit: expr, callLit: string, - argAssgs, argPrintOuts: stmt): stmt = + + let (assigns, check, printOuts) = inspectArgs(checked) + let lineinfo = newStrLitNode(checked.lineInfo) + let callLit = checked.toStrLit + result = quote do: block: - argAssgs - if not call: - checkpoint(lineInfoLit & ": Check failed: " & callLit) - argPrintOuts + `assigns` + if `check`: + discard + else: + checkpoint(`lineinfo` & ": Check failed: " & `callLit`) + `printOuts` fail() - var checkedStr = checked.toStrLit - inspectArgs(checked) - result = getAst(rewrite(checked, checked.lineinfo, checkedStr, - argsAsgns, argsPrintOuts)) - of nnkStmtList: result = newNimNode(nnkStmtList) - for i in countup(0, checked.len - 1): - if checked[i].kind != nnkCommentStmt: - result.add(newCall(!"check", checked[i])) + for node in checked: + if node.kind != nnkCommentStmt: + result.add(newCall(newIdentNode("check"), node)) else: - template rewrite(Exp, lineInfoLit: expr, expLit: string): stmt = - if not Exp: - checkpoint(lineInfoLit & ": Check failed: " & expLit) - fail() + let lineinfo = newStrLitNode(checked.lineInfo) + let callLit = checked.toStrLit - result = getAst(rewrite(checked, checked.lineinfo, checked.toStrLit)) + result = quote do: + if `checked`: + discard + else: + checkpoint(`lineinfo` & ": Check failed: " & `callLit`) + fail() -template require*(conditions: stmt): stmt {.immediate, dirty.} = +template require*(conditions: untyped) = + ## Same as `check` except any failed test causes the program to quit + ## immediately. Any teardown statements are not executed and the failed + ## test output is not generated. + let savedAbortOnError = abortOnError block: - const AbortOnError {.inject.} = true + abortOnError = true check conditions - -macro expect*(exceptions: varargs[expr], body: stmt): stmt {.immediate.} = - let exp = callsite() - template expectBody(errorTypes, lineInfoLit: expr, - body: stmt): NimNode {.dirty.} = + abortOnError = savedAbortOnError + +macro expect*(exceptions: varargs[typed], body: untyped): untyped = + ## Test if `body` raises an exception found in the passed `exceptions`. + ## The test passes if the raised exception is part of the acceptable + ## exceptions. Otherwise, it fails. + runnableExamples: + import std/[math, random, strutils] + proc defectiveRobot() = + randomize() + case rand(1..4) + of 1: raise newException(OSError, "CANNOT COMPUTE!") + of 2: discard parseInt("Hello World!") + of 3: raise newException(IOError, "I can't do that Dave.") + else: assert 2 + 2 == 5 + + expect IOError, OSError, ValueError, AssertionDefect: + defectiveRobot() + + template expectBody(errorTypes, lineInfoLit, body): NimNode {.dirty.} = + {.push warning[BareExcept]:off.} try: + {.push warning[BareExcept]:on.} body + {.pop.} checkpoint(lineInfoLit & ": Expect Failed, no exception was thrown.") fail() except errorTypes: discard - - var body = exp[exp.len - 1] + except: + let err = getCurrentException() + checkpoint(lineInfoLit & ": Expect Failed, " & $err.name & " was thrown.") + fail() + {.pop.} var errorTypes = newNimNode(nnkBracket) - for i in countup(1, exp.len - 2): - errorTypes.add(exp[i]) - - result = getAst(expectBody(errorTypes, exp.lineinfo, body)) - - -when declared(stdout): - ## Reading settings - var envOutLvl = os.getEnv("NIMTEST_OUTPUT_LVL").string - - abortOnError = existsEnv("NIMTEST_ABORT_ON_ERROR") - colorOutput = not existsEnv("NIMTEST_NO_COLOR") + for exp in exceptions: + errorTypes.add(exp) -else: - var envOutLvl = "" # TODO - colorOutput = false + result = getAst(expectBody(errorTypes, errorTypes.lineInfo, body)) -if envOutLvl.len > 0: - for opt in countup(low(OutputLevel), high(OutputLevel)): - if $opt == envOutLvl: - outputLevel = opt - break +proc disableParamFiltering* = + ## disables filtering tests with the command line params + disabledParamFiltering = true diff --git a/lib/pure/uri.nim b/lib/pure/uri.nim index b0afb75f9..725d5bbd9 100644 --- a/lib/pure/uri.nim +++ b/lib/pure/uri.nim @@ -8,59 +8,221 @@ # ## This module implements URI parsing as specified by RFC 3986. +## +## A Uniform Resource Identifier (URI) provides a simple and extensible +## means for identifying a resource. A URI can be further classified +## as a locator, a name, or both. The term "Uniform Resource Locator" +## (URL) refers to the subset of URIs. +## +## .. warning:: URI parsers in this module do not perform security validation. +## +## # Basic usage + + +## ## Combine URIs +runnableExamples: + let host = parseUri("https://nim-lang.org") + assert $host == "https://nim-lang.org" + assert $(host / "/blog.html") == "https://nim-lang.org/blog.html" + assert $(host / "blog2.html") == "https://nim-lang.org/blog2.html" + +## ## Access URI item +runnableExamples: + let res = parseUri("sftp://127.0.0.1:4343") + assert isAbsolute(res) + assert res.port == "4343" + +## ## Data URI Base64 +runnableExamples: + assert getDataUri("Hello World", "text/plain") == "data:text/plain;charset=utf-8;base64,SGVsbG8gV29ybGQ=" + assert getDataUri("Nim", "text/plain") == "data:text/plain;charset=utf-8;base64,Tmlt" + + +import std/[strutils, parseutils, base64] +import std/private/[since, decode_helpers] + +when defined(nimPreviewSlimSystem): + import std/assertions + -import strutils, parseutils type Url* = distinct string Uri* = object - scheme*, username*, password*: string + scheme*, username*, password*: string hostname*, port*, path*, query*, anchor*: string opaque*: bool + isIpv6*: bool + + UriParseError* = object of ValueError -{.deprecated: [TUrl: Url, TUri: Uri].} -{.push warning[deprecated]: off.} -proc `$`*(url: Url): string {.deprecated.} = - ## **Deprecated since 0.9.6**: Use ``Uri`` instead. - return string(url) +proc uriParseError*(msg: string) {.noreturn.} = + ## Raises a `UriParseError` exception with message `msg`. + raise newException(UriParseError, msg) -proc `/`*(a, b: Url): Url {.deprecated.} = - ## Joins two URLs together, separating them with / if needed. +func encodeUrl*(s: string, usePlus = true): string = + ## Encodes a URL according to RFC3986. ## - ## **Deprecated since 0.9.6**: Use ``Uri`` instead. - var urlS = $a - var bS = $b - if urlS == "": return b - if urlS[urlS.len-1] != '/': - urlS.add('/') - if bS[0] == '/': - urlS.add(bS.substr(1)) - else: - urlS.add(bs) - result = Url(urlS) + ## This means that characters in the set + ## `{'a'..'z', 'A'..'Z', '0'..'9', '-', '.', '_', '~'}` are + ## carried over to the result. + ## All other characters are encoded as `%xx` where `xx` + ## denotes its hexadecimal value. + ## + ## As a special rule, when the value of `usePlus` is true, + ## spaces are encoded as `+` instead of `%20`. + ## + ## **See also:** + ## * `decodeUrl func<#decodeUrl,string>`_ + runnableExamples: + assert encodeUrl("https://nim-lang.org") == "https%3A%2F%2Fnim-lang.org" + assert encodeUrl("https://nim-lang.org/this is a test") == "https%3A%2F%2Fnim-lang.org%2Fthis+is+a+test" + assert encodeUrl("https://nim-lang.org/this is a test", false) == "https%3A%2F%2Fnim-lang.org%2Fthis%20is%20a%20test" + result = newStringOfCap(s.len + s.len shr 2) # assume 12% non-alnum-chars + let fromSpace = if usePlus: "+" else: "%20" + for c in s: + case c + # https://tools.ietf.org/html/rfc3986#section-2.3 + of 'a'..'z', 'A'..'Z', '0'..'9', '-', '.', '_', '~': add(result, c) + of ' ': add(result, fromSpace) + else: + add(result, '%') + add(result, toHex(ord(c), 2)) -proc add*(url: var Url, a: Url) {.deprecated.} = - ## Appends url to url. +func decodeUrl*(s: string, decodePlus = true): string = + ## Decodes a URL according to RFC3986. ## - ## **Deprecated since 0.9.6**: Use ``Uri`` instead. - url = url / a -{.pop.} + ## This means that any `%xx` (where `xx` denotes a hexadecimal + ## value) are converted to the character with ordinal number `xx`, + ## and every other character is carried over. + ## If `xx` is not a valid hexadecimal value, it is left intact. + ## + ## As a special rule, when the value of `decodePlus` is true, `+` + ## characters are converted to a space. + ## + ## **See also:** + ## * `encodeUrl func<#encodeUrl,string>`_ + runnableExamples: + assert decodeUrl("https%3A%2F%2Fnim-lang.org") == "https://nim-lang.org" + assert decodeUrl("https%3A%2F%2Fnim-lang.org%2Fthis+is+a+test") == "https://nim-lang.org/this is a test" + assert decodeUrl("https%3A%2F%2Fnim-lang.org%2Fthis%20is%20a%20test", + false) == "https://nim-lang.org/this is a test" + assert decodeUrl("abc%xyz") == "abc%xyz" + + result = newString(s.len) + var i = 0 + var j = 0 + while i < s.len: + case s[i] + of '%': + result[j] = decodePercent(s, i) + of '+': + if decodePlus: + result[j] = ' ' + else: + result[j] = s[i] + else: result[j] = s[i] + inc(i) + inc(j) + setLen(result, j) + +func encodeQuery*(query: openArray[(string, string)], usePlus = true, + omitEq = true, sep = '&'): string = + ## Encodes a set of (key, value) parameters into a URL query string. + ## + ## Every (key, value) pair is URL-encoded and written as `key=value`. If the + ## value is an empty string then the `=` is omitted, unless `omitEq` is + ## false. + ## The pairs are joined together by the `sep` character. + ## + ## The `usePlus` parameter is passed down to the `encodeUrl` function that + ## is used for the URL encoding of the string values. + ## + ## **See also:** + ## * `encodeUrl func<#encodeUrl,string>`_ + runnableExamples: + assert encodeQuery({: }) == "" + assert encodeQuery({"a": "1", "b": "2"}) == "a=1&b=2" + assert encodeQuery({"a": "1", "b": ""}) == "a=1&b" + assert encodeQuery({"a": "1", "b": ""}, omitEq = false, sep = ';') == "a=1;b=" + for elem in query: + # Encode the `key = value` pairs and separate them with 'sep' + if result.len > 0: result.add(sep) + let (key, val) = elem + result.add(encodeUrl(key, usePlus)) + # Omit the '=' if the value string is empty + if not omitEq or val.len > 0: + result.add('=') + result.add(encodeUrl(val, usePlus)) + +iterator decodeQuery*(data: string, sep = '&'): tuple[key, value: string] = + ## Reads and decodes the query string `data` and yields the `(key, value)` pairs + ## the data consists of. If compiled with `-d:nimLegacyParseQueryStrict`, + ## a `UriParseError` is raised when there is an unencoded `=` character in a decoded + ## value, which was the behavior in Nim < 1.5.1. + runnableExamples: + import std/sequtils + assert toSeq(decodeQuery("foo=1&bar=2=3")) == @[("foo", "1"), ("bar", "2=3")] + assert toSeq(decodeQuery("foo=1;bar=2=3", ';')) == @[("foo", "1"), ("bar", "2=3")] + assert toSeq(decodeQuery("&a&=b&=&&")) == @[("", ""), ("a", ""), ("", "b"), ("", ""), ("", "")] + + proc parseData(data: string, i: int, field: var string, sep: char): int = + result = i + while result < data.len: + let c = data[result] + case c + of '%': add(field, decodePercent(data, result)) + of '+': add(field, ' ') + of '&': break + else: + if c == sep: break + else: add(field, data[result]) + inc(result) -proc parseAuthority(authority: string, result: var Uri) = + var i = 0 + var name = "" + var value = "" + # decode everything in one pass: + while i < data.len: + setLen(name, 0) # reuse memory + i = parseData(data, i, name, '=') + setLen(value, 0) # reuse memory + if i < data.len and data[i] == '=': + inc(i) # skip '=' + when defined(nimLegacyParseQueryStrict): + i = parseData(data, i, value, '=') + else: + i = parseData(data, i, value, sep) + yield (name, value) + if i < data.len: + when defined(nimLegacyParseQueryStrict): + if data[i] != '&': + uriParseError("'&' expected at index '$#' for '$#'" % [$i, data]) + inc(i) + +func parseAuthority(authority: string, result: var Uri) = var i = 0 var inPort = false - while true: + var inIPv6 = false + while i < authority.len: case authority[i] of '@': - result.password = result.port - result.port = "" - result.username = result.hostname - result.hostname = "" + swap result.password, result.port + result.port.setLen(0) + swap result.username, result.hostname + result.hostname.setLen(0) inPort = false of ':': - inPort = true - of '\0': break + if inIPv6: + result.hostname.add(authority[i]) + else: + inPort = true + of '[': + inIPv6 = true + result.isIpv6 = true + of ']': + inIPv6 = false else: if inPort: result.port.add(authority[i]) @@ -68,94 +230,144 @@ proc parseAuthority(authority: string, result: var Uri) = result.hostname.add(authority[i]) i.inc -proc parsePath(uri: string, i: var int, result: var Uri) = - +func parsePath(uri: string, i: var int, result: var Uri) = i.inc parseUntil(uri, result.path, {'?', '#'}, i) # The 'mailto' scheme's PATH actually contains the hostname/username - if result.scheme.toLower == "mailto": + if cmpIgnoreCase(result.scheme, "mailto") == 0: parseAuthority(result.path, result) - result.path = "" + result.path.setLen(0) - if uri[i] == '?': + if i < uri.len and uri[i] == '?': i.inc # Skip '?' i.inc parseUntil(uri, result.query, {'#'}, i) - if uri[i] == '#': + if i < uri.len and uri[i] == '#': i.inc # Skip '#' i.inc parseUntil(uri, result.anchor, {}, i) -proc initUri(): Uri = +func initUri*(isIpv6 = false): Uri = + ## Initializes a URI with `scheme`, `username`, `password`, + ## `hostname`, `port`, `path`, `query`, `anchor` and `isIpv6`. + ## + ## **See also:** + ## * `Uri type <#Uri>`_ for available fields in the URI type + runnableExamples: + var uri2 = initUri(isIpv6 = true) + uri2.scheme = "tcp" + uri2.hostname = "2001:0db8:85a3:0000:0000:8a2e:0370:7334" + uri2.port = "8080" + assert $uri2 == "tcp://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8080" result = Uri(scheme: "", username: "", password: "", hostname: "", port: "", - path: "", query: "", anchor: "") + path: "", query: "", anchor: "", isIpv6: isIpv6) -proc parseUri*(uri: string): Uri = - ## Parses a URI. - result = initUri() +func resetUri(uri: var Uri) = + for f in uri.fields: + when f is string: + f.setLen(0) + else: + f = false + +func parseUri*(uri: string, result: var Uri) = + ## Parses a URI. The `result` variable will be cleared before. + ## + ## **See also:** + ## * `Uri type <#Uri>`_ for available fields in the URI type + ## * `initUri func <#initUri>`_ for initializing a URI + runnableExamples: + var res = initUri() + parseUri("https://nim-lang.org/docs/manual.html", res) + assert res.scheme == "https" + assert res.hostname == "nim-lang.org" + assert res.path == "/docs/manual.html" + resetUri(result) var i = 0 # Check if this is a reference URI (relative URI) - if uri[i] == '/': - parsePath(uri, i, result) - return + let doubleSlash = uri.len > 1 and uri[0] == '/' and uri[1] == '/' + if i < uri.len and uri[i] == '/': + # Make sure `uri` doesn't begin with '//'. + if not doubleSlash: + parsePath(uri, i, result) + return # Scheme i.inc parseWhile(uri, result.scheme, Letters + Digits + {'+', '-', '.'}, i) - if uri[i] != ':': + if (i >= uri.len or uri[i] != ':') and not doubleSlash: # Assume this is a reference URI (relative URI) i = 0 - result.scheme = "" + result.scheme.setLen(0) parsePath(uri, i, result) return - i.inc # Skip ':' + if not doubleSlash: + i.inc # Skip ':' # Authority - if uri[i] == '/' and uri[i+1] == '/': + if i+1 < uri.len and uri[i] == '/' and uri[i+1] == '/': i.inc(2) # Skip // var authority = "" i.inc parseUntil(uri, authority, {'/', '?', '#'}, i) - if authority == "": - raise newException(ValueError, "Expected authority got nothing.") - parseAuthority(authority, result) + if authority.len > 0: + parseAuthority(authority, result) else: result.opaque = true # Path parsePath(uri, i, result) -proc removeDotSegments(path: string): string = +func parseUri*(uri: string): Uri = + ## Parses a URI and returns it. + ## + ## **See also:** + ## * `Uri type <#Uri>`_ for available fields in the URI type + runnableExamples: + let res = parseUri("ftp://Username:Password@Hostname") + assert res.username == "Username" + assert res.password == "Password" + assert res.scheme == "ftp" + result = initUri() + parseUri(uri, result) + +func removeDotSegments(path: string): string = + ## Collapses `..` and `.` in `path` in a similar way as done in `os.normalizedPath` + ## Caution: this is buggy. + runnableExamples: + assert removeDotSegments("a1/a2/../a3/a4/a5/./a6/a7/.//./") == "a1/a3/a4/a5/a6/a7/" + assert removeDotSegments("http://www.ai.") == "http://www.ai." + # xxx adapt or reuse `pathnorm.normalizePath(path, '/')` to make this more reliable, but + # taking into account url specificities such as not collapsing leading `//` in scheme + # `https://`. see `turi` for failing tests. + if path.len == 0: return "" var collection: seq[string] = @[] - let endsWithSlash = path[path.len-1] == '/' + let endsWithSlash = path.endsWith '/' var i = 0 var currentSegment = "" - while true: + while i < path.len: case path[i] of '/': collection.add(currentSegment) currentSegment = "" of '.': - if path[i+1] == '.' and path[i+2] == '/': + if i+2 < path.len and path[i+1] == '.' and path[i+2] == '/': if collection.len > 0: discard collection.pop() i.inc 3 continue - elif path[i+1] == '/': + elif i + 1 < path.len and path[i+1] == '/': i.inc 2 continue currentSegment.add path[i] - of '\0': - if currentSegment != "": - collection.add currentSegment - break else: currentSegment.add path[i] i.inc + if currentSegment != "": + collection.add currentSegment result = collection.join("/") if endsWithSlash: result.add '/' -proc merge(base, reference: Uri): string = +func merge(base, reference: Uri): string = # http://tools.ietf.org/html/rfc3986#section-5.2.3 if base.hostname != "" and base.path == "": '/' & reference.path @@ -166,30 +378,26 @@ proc merge(base, reference: Uri): string = else: base.path[0 .. lastSegment] & reference.path -proc combine*(base: Uri, reference: Uri): Uri = +func combine*(base: Uri, reference: Uri): Uri = ## Combines a base URI with a reference URI. ## ## This uses the algorithm specified in ## `section 5.2.2 of RFC 3986 <http://tools.ietf.org/html/rfc3986#section-5.2.2>`_. ## - ## This means that the slashes inside the base URI's path as well as reference - ## URI's path affect the resulting URI. - ## - ## For building URIs you may wish to use \`/\` instead. - ## - ## Examples: + ## This means that the slashes inside the base URIs path as well as reference + ## URIs path affect the resulting URI. ## - ## .. code-block:: - ## let foo = combine(parseUri("http://example.com/foo/bar"), parseUri("/baz")) - ## assert foo.path == "/baz" - ## - ## let bar = combine(parseUri("http://example.com/foo/bar"), parseUri("baz")) - ## assert bar.path == "/foo/baz" - ## - ## let bar = combine(parseUri("http://example.com/foo/bar/"), parseUri("baz")) - ## assert bar.path == "/foo/bar/baz" - - template setAuthority(dest, src: expr): stmt = + ## **See also:** + ## * `/ func <#/,Uri,string>`_ for building URIs + runnableExamples: + let foo = combine(parseUri("https://nim-lang.org/foo/bar"), parseUri("/baz")) + assert foo.path == "/baz" + let bar = combine(parseUri("https://nim-lang.org/foo/bar"), parseUri("baz")) + assert bar.path == "/foo/baz" + let qux = combine(parseUri("https://nim-lang.org/foo/bar/"), parseUri("baz")) + assert qux.path == "/foo/bar/baz" + + template setAuthority(dest, src): untyped = dest.hostname = src.hostname dest.username = src.username dest.port = src.port @@ -221,188 +429,144 @@ proc combine*(base: Uri, reference: Uri): Uri = result.scheme = base.scheme result.anchor = reference.anchor -proc combine*(uris: varargs[Uri]): Uri = +func combine*(uris: varargs[Uri]): Uri = ## Combines multiple URIs together. + ## + ## **See also:** + ## * `/ func <#/,Uri,string>`_ for building URIs + runnableExamples: + let foo = combine(parseUri("https://nim-lang.org/"), parseUri("docs/"), + parseUri("manual.html")) + assert foo.hostname == "nim-lang.org" + assert foo.path == "/docs/manual.html" result = uris[0] - for i in 1 .. <uris.len: + for i in 1 ..< uris.len: result = combine(result, uris[i]) -proc `/`*(x: Uri, path: string): Uri = - ## Concatenates the path specified to the specified URI's path. +func isAbsolute*(uri: Uri): bool = + ## Returns true if URI is absolute, false otherwise. + runnableExamples: + assert parseUri("https://nim-lang.org").isAbsolute + assert not parseUri("nim-lang").isAbsolute + return uri.scheme != "" and (uri.hostname != "" or uri.path != "") + +func `/`*(x: Uri, path: string): Uri = + ## Concatenates the path specified to the specified URIs path. ## - ## Contrary to the ``combine`` procedure you do not have to worry about - ## the slashes at the beginning and end of the path and URI's path + ## Contrary to the `combine func <#combine,Uri,Uri>`_ you do not have to worry about + ## the slashes at the beginning and end of the path and URIs path ## respectively. ## - ## Examples: - ## - ## .. code-block:: - ## let foo = parseUri("http://example.com/foo/bar") / parseUri("/baz") - ## assert foo.path == "/foo/bar/baz" - ## - ## let bar = parseUri("http://example.com/foo/bar") / parseUri("baz") - ## assert bar.path == "/foo/bar/baz" - ## - ## let bar = parseUri("http://example.com/foo/bar/") / parseUri("baz") - ## assert bar.path == "/foo/bar/baz" + ## **See also:** + ## * `combine func <#combine,Uri,Uri>`_ + runnableExamples: + let foo = parseUri("https://nim-lang.org/foo/bar") / "/baz" + assert foo.path == "/foo/bar/baz" + let bar = parseUri("https://nim-lang.org/foo/bar") / "baz" + assert bar.path == "/foo/bar/baz" + let qux = parseUri("https://nim-lang.org/foo/bar/") / "baz" + assert qux.path == "/foo/bar/baz" result = x - if result.path[result.path.len-1] == '/': - if path[0] == '/': + + if result.path.len == 0: + if path.len == 0 or path[0] != '/': + result.path = "/" + result.path.add(path) + return + + if result.path.len > 0 and result.path[result.path.len-1] == '/': + if path.len > 0 and path[0] == '/': result.path.add(path[1 .. path.len-1]) else: result.path.add(path) else: - if path[0] != '/': + if path.len == 0 or path[0] != '/': result.path.add '/' result.path.add(path) -proc `$`*(u: Uri): string = +func `?`*(u: Uri, query: openArray[(string, string)]): Uri = + ## Concatenates the query parameters to the specified URI object. + runnableExamples: + let foo = parseUri("https://example.com") / "foo" ? {"bar": "qux"} + assert $foo == "https://example.com/foo?bar=qux" + result = u + result.query = encodeQuery(query) + +func `$`*(u: Uri): string = ## Returns the string representation of the specified URI object. - result = "" - if u.scheme.len > 0: - result.add(u.scheme) - if u.opaque: - result.add(":") + runnableExamples: + assert $parseUri("https://nim-lang.org") == "https://nim-lang.org" + # Get the len of all the parts. + let schemeLen = u.scheme.len + let usernameLen = u.username.len + let passwordLen = u.password.len + let hostnameLen = u.hostname.len + let portLen = u.port.len + let pathLen = u.path.len + let queryLen = u.query.len + let anchorLen = u.anchor.len + # Prepare a string that fits all the parts and all punctuation chars. + # 12 is the max len required by all possible punctuation chars. + result = newStringOfCap( + schemeLen + usernameLen + passwordLen + hostnameLen + portLen + pathLen + queryLen + anchorLen + 12 + ) + # Insert to result. + if schemeLen > 0: + result.add u.scheme + result.add ':' + if not u.opaque: + result.add '/' + result.add '/' + if usernameLen > 0: + result.add u.username + if passwordLen > 0: + result.add ':' + result.add u.password + result.add '@' + if u.hostname.endsWith('/'): + if u.isIpv6: + result.add '[' + result.add u.hostname[0 .. ^2] + result.add ']' else: - result.add("://") - if u.username.len > 0: - result.add(u.username) - if u.password.len > 0: - result.add(":") - result.add(u.password) - result.add("@") - result.add(u.hostname) - if u.port.len > 0: - result.add(":") - result.add(u.port) - if u.path.len > 0: - result.add(u.path) - if u.query.len > 0: - result.add("?") - result.add(u.query) - if u.anchor.len > 0: - result.add("#") - result.add(u.anchor) - -when isMainModule: - block: - let str = "http://localhost" - let test = parseUri(str) - doAssert test.path == "" - - block: - let str = "http://localhost/" - let test = parseUri(str) - doAssert test.path == "/" - - block: - let str = "http://localhost:8080/test" - let test = parseUri(str) - doAssert test.scheme == "http" - doAssert test.port == "8080" - doAssert test.path == "/test" - doAssert test.hostname == "localhost" - doAssert($test == str) - - block: - let str = "foo://username:password@example.com:8042/over/there" & - "/index.dtb?type=animal&name=narwhal#nose" - let test = parseUri(str) - doAssert test.scheme == "foo" - doAssert test.username == "username" - doAssert test.password == "password" - doAssert test.hostname == "example.com" - doAssert test.port == "8042" - doAssert test.path == "/over/there/index.dtb" - doAssert test.query == "type=animal&name=narwhal" - doAssert test.anchor == "nose" - doAssert($test == str) - - block: - let str = "urn:example:animal:ferret:nose" - let test = parseUri(str) - doAssert test.scheme == "urn" - doAssert test.path == "example:animal:ferret:nose" - doAssert($test == str) - - block: - let str = "mailto:username@example.com?subject=Topic" - let test = parseUri(str) - doAssert test.scheme == "mailto" - doAssert test.username == "username" - doAssert test.hostname == "example.com" - doAssert test.query == "subject=Topic" - doAssert($test == str) - - block: - let str = "magnet:?xt=urn:sha1:72hsga62ba515sbd62&dn=foobar" - let test = parseUri(str) - doAssert test.scheme == "magnet" - doAssert test.query == "xt=urn:sha1:72hsga62ba515sbd62&dn=foobar" - doAssert($test == str) - - block: - let str = "/test/foo/bar?q=2#asdf" - let test = parseUri(str) - doAssert test.scheme == "" - doAssert test.path == "/test/foo/bar" - doAssert test.query == "q=2" - doAssert test.anchor == "asdf" - doAssert($test == str) - - block: - let str = "test/no/slash" - let test = parseUri(str) - doAssert test.path == "test/no/slash" - doAssert($test == str) - - # Remove dot segments tests - block: - doAssert removeDotSegments("/foo/bar/baz") == "/foo/bar/baz" - - # Combine tests - block: - let concat = combine(parseUri("http://google.com/foo/bar/"), parseUri("baz")) - doAssert concat.path == "/foo/bar/baz" - doAssert concat.hostname == "google.com" - doAssert concat.scheme == "http" - - block: - let concat = combine(parseUri("http://google.com/foo"), parseUri("/baz")) - doAssert concat.path == "/baz" - doAssert concat.hostname == "google.com" - doAssert concat.scheme == "http" - - block: - let concat = combine(parseUri("http://google.com/foo/test"), parseUri("bar")) - doAssert concat.path == "/foo/bar" - - block: - let concat = combine(parseUri("http://google.com/foo/test"), parseUri("/bar")) - doAssert concat.path == "/bar" - - block: - let concat = combine(parseUri("http://google.com/foo/test"), parseUri("bar")) - doAssert concat.path == "/foo/bar" - - block: - let concat = combine(parseUri("http://google.com/foo/test/"), parseUri("bar")) - doAssert concat.path == "/foo/test/bar" - - block: - let concat = combine(parseUri("http://google.com/foo/test/"), parseUri("bar/")) - doAssert concat.path == "/foo/test/bar/" - - block: - let concat = combine(parseUri("http://google.com/foo/test/"), parseUri("bar/"), - parseUri("baz")) - doAssert concat.path == "/foo/test/bar/baz" - - # `/` tests - block: - let test = parseUri("http://example.com/foo") / "bar/asd" - doAssert test.path == "/foo/bar/asd" - - block: - let test = parseUri("http://example.com/foo/") / "/bar/asd" - doAssert test.path == "/foo/bar/asd" + result.add u.hostname[0 .. ^2] + else: + if u.isIpv6: + result.add '[' + result.add u.hostname + result.add ']' + else: + result.add u.hostname + if portLen > 0: + result.add ':' + result.add u.port + if pathLen > 0: + if hostnameLen > 0 and u.path[0] != '/': + result.add '/' + result.add u.path + if queryLen > 0: + result.add '?' + result.add u.query + if anchorLen > 0: + result.add '#' + result.add u.anchor + + +proc getDataUri*(data, mime: string, encoding = "utf-8"): string {.since: (1, 3).} = + ## Convenience proc for `base64.encode` returns a standard Base64 Data URI (RFC-2397) + ## + ## **See also:** + ## * `mimetypes <mimetypes.html>`_ for `mime` argument + ## * https://tools.ietf.org/html/rfc2397 + ## * https://en.wikipedia.org/wiki/Data_URI_scheme + runnableExamples: static: assert getDataUri("Nim", "text/plain") == "data:text/plain;charset=utf-8;base64,Tmlt" + assert encoding.len > 0 and mime.len > 0 # Must *not* be URL-Safe, see RFC-2397 + let base64encoded: string = base64.encode(data) + # ("data:".len + ";charset=".len + ";base64,".len) == 22 + result = newStringOfCap(22 + mime.len + encoding.len + base64encoded.len) + result.add "data:" + result.add mime + result.add ";charset=" + result.add encoding + result.add ";base64," + result.add base64encoded diff --git a/lib/pure/volatile.nim b/lib/pure/volatile.nim new file mode 100644 index 000000000..a38247c7d --- /dev/null +++ b/lib/pure/volatile.nim @@ -0,0 +1,34 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2017 Jeff Ciesielski +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module contains code for generating volatile loads and stores, +## which are useful in embedded and systems programming. + +proc volatileLoad*[T](src: ptr T): T {.inline, noinit.} = + ## Generates a volatile load of the value stored in the container `src`. + ## Note that this only effects code generation on `C` like backends. + when nimvm: + result = src[] + else: + when defined(js): + result = src[] + else: + {.emit: [result, " = (*(", typeof(src[]), " volatile*)", src, ");"].} + +proc volatileStore*[T](dest: ptr T, val: T) {.inline.} = + ## Generates a volatile store into the container `dest` of the value + ## `val`. Note that this only effects code generation on `C` like + ## backends. + when nimvm: + dest[] = val + else: + when defined(js): + dest[] = val + else: + {.emit: ["*((", typeof(dest[]), " volatile*)(", dest, ")) = ", val, ";"].} diff --git a/lib/pure/xmldom.nim b/lib/pure/xmldom.nim deleted file mode 100644 index 6cf837f25..000000000 --- a/lib/pure/xmldom.nim +++ /dev/null @@ -1,1120 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2010 Dominik Picheta -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - - -import strutils -## This module implements XML DOM Level 2 Core -## specification (http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html) - - -#http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html - -#Exceptions -type - EDOMException* = object of ValueError ## Base exception object for all DOM Exceptions - EDOMStringSizeErr* = object of EDOMException ## If the specified range of text does not fit into a DOMString - ## Currently not used(Since DOMString is just string) - EHierarchyRequestErr* = object of EDOMException ## If any node is inserted somewhere it doesn't belong - EIndexSizeErr* = object of EDOMException ## If index or size is negative, or greater than the allowed value - EInuseAttributeErr* = object of EDOMException ## If an attempt is made to add an attribute that is already in use elsewhere - EInvalidAccessErr* = object of EDOMException ## If a parameter or an operation is not supported by the underlying object. - EInvalidCharacterErr* = object of EDOMException ## This exception is raised when a string parameter contains an illegal character - EInvalidModificationErr* = object of EDOMException ## If an attempt is made to modify the type of the underlying object. - EInvalidStateErr* = object of EDOMException ## If an attempt is made to use an object that is not, or is no longer, usable. - ENamespaceErr* = object of EDOMException ## If an attempt is made to create or change an object in a way which is incorrect with regard to namespaces. - ENotFoundErr* = object of EDOMException ## If an attempt is made to reference a node in a context where it does not exist - ENotSupportedErr* = object of EDOMException ## If the implementation does not support the requested type of object or operation. - ENoDataAllowedErr* = object of EDOMException ## If data is specified for a node which does not support data - ENoModificationAllowedErr* = object of EDOMException ## If an attempt is made to modify an object where modifications are not allowed - ESyntaxErr* = object of EDOMException ## If an invalid or illegal string is specified. - EWrongDocumentErr* = object of EDOMException ## If a node is used in a different document than the one that created it (that doesn't support it) - -const - ElementNode* = 1 - AttributeNode* = 2 - TextNode* = 3 - CDataSectionNode* = 4 - ProcessingInstructionNode* = 7 - CommentNode* = 8 - DocumentNode* = 9 - DocumentFragmentNode* = 11 - - # Nodes which are childless - Not sure about AttributeNode - childlessObjects = {DocumentNode, AttributeNode, TextNode, - CDataSectionNode, ProcessingInstructionNode, CommentNode} - # Illegal characters - illegalChars = {'>', '<', '&', '"'} - - -type - Feature = tuple[name: string, version: string] - PDOMImplementation* = ref DOMImplementation - DOMImplementation = object - features: seq[Feature] # Read-Only - - PNode* = ref Node - Node = object of RootObj - attributes*: seq[PAttr] - childNodes*: seq[PNode] - fLocalName: string # Read-only - fNamespaceURI: string # Read-only - fNodeName: string # Read-only - nodeValue*: string - fNodeType: int # Read-only - fOwnerDocument: PDocument # Read-Only - fParentNode: PNode # Read-Only - prefix*: string # Setting this should change some values... TODO! - - PElement* = ref Element - Element = object of Node - fTagName: string # Read-only - - PCharacterData* = ref CharacterData - CharacterData = object of Node - data*: string - - PDocument* = ref Document - Document = object of Node - fImplementation: PDOMImplementation # Read-only - fDocumentElement: PElement # Read-only - - PAttr* = ref Attr - Attr = object of Node - fName: string # Read-only - fSpecified: bool # Read-only - value*: string - fOwnerElement: PElement # Read-only - - PDocumentFragment* = ref DocumentFragment - DocumentFragment = object of Node - - PText* = ref Text - Text = object of CharacterData - - PComment* = ref Comment - Comment = object of CharacterData - - PCDataSection* = ref CDataSection - CDataSection = object of Text - - PProcessingInstruction* = ref ProcessingInstruction - ProcessingInstruction = object of Node - data*: string - fTarget: string # Read-only - -# DOMImplementation -proc getDOM*(): PDOMImplementation = - ## Returns a DOMImplementation - new(result) - result.features = @[(name: "core", version: "2.0"), - (name: "core", version: "1.0"), - (name: "XML", version: "2.0")] - -proc createDocument*(dom: PDOMImplementation, namespaceURI: string, qualifiedName: string): PDocument = - ## Creates an XML Document object of the specified type with its document element. - var doc: PDocument - new(doc) - doc.fNamespaceURI = namespaceURI - doc.fImplementation = dom - - var elTag: PElement - new(elTag) - elTag.fTagName = qualifiedName - elTag.fNodeName = qualifiedName - doc.fDocumentElement = elTag - doc.fNodeType = DocumentNode - - return doc - -proc createDocument*(dom: PDOMImplementation, n: PElement): PDocument = - ## Creates an XML Document object of the specified type with its document element. - - # This procedure is not in the specification, it's provided for the parser. - var doc: PDocument - new(doc) - doc.fDocumentElement = n - doc.fImplementation = dom - doc.fNodeType = DocumentNode - - return doc - -proc hasFeature*(dom: PDOMImplementation, feature: string, version: string = ""): bool = - ## Returns ``true`` if this ``version`` of the DomImplementation implements ``feature``, otherwise ``false`` - for iName, iVersion in items(dom.features): - if iName == feature: - if version == "": - return true - else: - if iVersion == version: - return true - return false - - -# Document -# Attributes - -proc implementation*(doc: PDocument): PDOMImplementation = - return doc.fImplementation - -proc documentElement*(doc: PDocument): PElement = - return doc.fDocumentElement - -# Internal procedures -proc findNodes(nl: PNode, name: string): seq[PNode] = - # Made for getElementsByTagName - var r: seq[PNode] = @[] - if isNil(nl.childNodes): return @[] - if nl.childNodes.len() == 0: return @[] - - for i in items(nl.childNodes): - if i.fNodeType == ElementNode: - if i.fNodeName == name or name == "*": - r.add(i) - - if not isNil(i.childNodes): - if i.childNodes.len() != 0: - r.add(findNodes(i, name)) - - return r - -proc findNodesNS(nl: PNode, namespaceURI: string, localName: string): seq[PNode] = - # Made for getElementsByTagNameNS - var r: seq[PNode] = @[] - if isNil(nl.childNodes): return @[] - if nl.childNodes.len() == 0: return @[] - - for i in items(nl.childNodes): - if i.fNodeType == ElementNode: - if (i.fNamespaceURI == namespaceURI or namespaceURI == "*") and (i.fLocalName == localName or localName == "*"): - r.add(i) - - if not isNil(i.childNodes): - if i.childNodes.len() != 0: - r.add(findNodesNS(i, namespaceURI, localName)) - - return r - - -#Procedures -proc createAttribute*(doc: PDocument, name: string): PAttr = - ## Creates an Attr of the given name. Note that the Attr instance can then be set on an Element using the setAttributeNode method. - ## To create an attribute with a qualified name and namespace URI, use the createAttributeNS method. - - # Check if name contains illegal characters - if illegalChars in name: - raise newException(EInvalidCharacterErr, "Invalid character") - - var attrNode: PAttr - new(attrNode) - attrNode.fName = name - attrNode.fNodeName = name - attrNode.fLocalName = nil - attrNode.prefix = nil - attrNode.fNamespaceURI = nil - attrNode.value = "" - attrNode.fSpecified = false - return attrNode - -proc createAttributeNS*(doc: PDocument, namespaceURI: string, qualifiedName: string): PAttr = - ## Creates an attribute of the given qualified name and namespace URI - - # Check if name contains illegal characters - if illegalChars in namespaceURI or illegalChars in qualifiedName: - raise newException(EInvalidCharacterErr, "Invalid character") - # Exceptions - if qualifiedName.contains(':'): - if isNil(namespaceURI): - raise newException(ENamespaceErr, "When qualifiedName contains a prefix namespaceURI cannot be nil") - elif qualifiedName.split(':')[0].toLower() == "xml" and namespaceURI != "http://www.w3.org/XML/1998/namespace": - raise newException(ENamespaceErr, - "When the namespace prefix is \"xml\" namespaceURI has to be \"http://www.w3.org/XML/1998/namespace\"") - elif qualifiedName.split(':')[1].toLower() == "xmlns" and namespaceURI != "http://www.w3.org/2000/xmlns/": - raise newException(ENamespaceErr, - "When the namespace prefix is \"xmlns\" namespaceURI has to be \"http://www.w3.org/2000/xmlns/\"") - - var attrNode: PAttr - new(attrNode) - attrNode.fName = qualifiedName - attrNode.fNodeName = qualifiedName - attrNode.fSpecified = false - attrNode.fNamespaceURI = namespaceURI - if qualifiedName.contains(':'): - attrNode.prefix = qualifiedName.split(':')[0] - attrNode.fLocalName = qualifiedName.split(':')[1] - else: - attrNode.prefix = nil - attrNode.fLocalName = qualifiedName - attrNode.value = "" - - attrNode.fNodeType = AttributeNode - return attrNode - -proc createCDATASection*(doc: PDocument, data: string): PCDataSection = - ## Creates a CDATASection node whose value is the specified string. - var cData: PCDataSection - new(cData) - cData.data = data - cData.nodeValue = data - cData.fNodeName = "#text" # Not sure about this, but this is technically a TextNode - cData.fNodeType = CDataSectionNode - return cData - -proc createComment*(doc: PDocument, data: string): PComment = - ## Creates a Comment node given the specified string. - var comm: PComment - new(comm) - comm.data = data - comm.nodeValue = data - - comm.fNodeType = CommentNode - return comm - -proc createDocumentFragment*(doc: PDocument): PDocumentFragment = - ## Creates an empty DocumentFragment object. - var df: PDocumentFragment - new(df) - return df - -proc createElement*(doc: PDocument, tagName: string): PElement = - ## Creates an element of the type specified. - - # Check if name contains illegal characters - if illegalChars in tagName: - raise newException(EInvalidCharacterErr, "Invalid character") - - var elNode: PElement - new(elNode) - elNode.fTagName = tagName - elNode.fNodeName = tagName - elNode.fLocalName = nil - elNode.prefix = nil - elNode.fNamespaceURI = nil - elNode.childNodes = @[] - elNode.attributes = @[] - - elNode.fNodeType = ElementNode - - return elNode - -proc createElementNS*(doc: PDocument, namespaceURI: string, qualifiedName: string): PElement = - ## Creates an element of the given qualified name and namespace URI. - if qualifiedName.contains(':'): - if isNil(namespaceURI): - raise newException(ENamespaceErr, "When qualifiedName contains a prefix namespaceURI cannot be nil") - elif qualifiedName.split(':')[0].toLower() == "xml" and namespaceURI != "http://www.w3.org/XML/1998/namespace": - raise newException(ENamespaceErr, - "When the namespace prefix is \"xml\" namespaceURI has to be \"http://www.w3.org/XML/1998/namespace\"") - - # Check if name contains illegal characters - if illegalChars in namespaceURI or illegalChars in qualifiedName: - raise newException(EInvalidCharacterErr, "Invalid character") - - var elNode: PElement - new(elNode) - elNode.fTagName = qualifiedName - elNode.fNodeName = qualifiedName - if qualifiedName.contains(':'): - elNode.prefix = qualifiedName.split(':')[0] - elNode.fLocalName = qualifiedName.split(':')[1] - else: - elNode.prefix = nil - elNode.fLocalName = qualifiedName - elNode.fNamespaceURI = namespaceURI - elNode.childNodes = @[] - elNode.attributes = @[] - - elNode.fNodeType = ElementNode - - return elNode - -proc createProcessingInstruction*(doc: PDocument, target: string, data: string): PProcessingInstruction = - ## Creates a ProcessingInstruction node given the specified name and data strings. - - #Check if name contains illegal characters - if illegalChars in target: - raise newException(EInvalidCharacterErr, "Invalid character") - - var pi: PProcessingInstruction - new(pi) - pi.fTarget = target - pi.data = data - pi.fNodeType = ProcessingInstructionNode - return pi - -proc createTextNode*(doc: PDocument, data: string): PText = #Propably TextNode - ## Creates a Text node given the specified string. - var txtNode: PText - new(txtNode) - txtNode.data = data - txtNode.nodeValue = data - txtNode.fNodeName = "#text" - - txtNode.fNodeType = TextNode - return txtNode - -discard """proc getElementById*(doc: PDocument, elementId: string): PElement = - ##Returns the ``Element`` whose ID is given by ``elementId``. If no such element exists, returns ``nil`` - #TODO""" - -proc getElementsByTagName*(doc: PDocument, tagName: string): seq[PNode] = - ## Returns a NodeList of all the Elements with a given tag name in - ## the order in which they are encountered in a preorder traversal of the Document tree. - result = @[] - if doc.fDocumentElement.fNodeName == tagName or tagName == "*": - result.add(doc.fDocumentElement) - - result.add(doc.fDocumentElement.findNodes(tagName)) - -proc getElementsByTagNameNS*(doc: PDocument, namespaceURI: string, localName: string): seq[PNode] = - ## Returns a NodeList of all the Elements with a given localName and namespaceURI - ## in the order in which they are encountered in a preorder traversal of the Document tree. - result = @[] - if doc.fDocumentElement.fLocalName == localName or localName == "*": - if doc.fDocumentElement.fNamespaceURI == namespaceURI or namespaceURI == "*": - result.add(doc.fDocumentElement) - - result.add(doc.fDocumentElement.findNodesNS(namespaceURI, localName)) - -proc importNode*(doc: PDocument, importedNode: PNode, deep: bool): PNode = - ## Imports a node from another document to this document - case importedNode.fNodeType - of AttributeNode: - var nAttr: PAttr = PAttr(importedNode) - nAttr.fOwnerDocument = doc - nAttr.fParentNode = nil - nAttr.fOwnerElement = nil - nAttr.fSpecified = true - return nAttr - of DocumentFragmentNode: - var n: PNode - new(n) - n = importedNode - n.fOwnerDocument = doc - n.fParentNode = nil - - n.fOwnerDocument = doc - n.fParentNode = nil - var tmp: seq[PNode] = n.childNodes - n.childNodes = @[] - if deep: - for i in low(tmp.len())..high(tmp.len()): - n.childNodes.add(importNode(doc, tmp[i], deep)) - - return n - of ElementNode: - var n: PNode - new(n) - n = importedNode - n.fOwnerDocument = doc - n.fParentNode = nil - - var tmpA: seq[PAttr] = n.attributes - n.attributes = @[] - # Import the Element node's attributes - for i in low(tmpA.len())..high(tmpA.len()): - n.attributes.add(PAttr(importNode(doc, tmpA[i], deep))) - # Import the childNodes - var tmp: seq[PNode] = n.childNodes - n.childNodes = @[] - if deep: - for i in low(tmp.len())..high(tmp.len()): - n.childNodes.add(importNode(doc, tmp[i], deep)) - - return n - of ProcessingInstructionNode, TextNode, CDataSectionNode, CommentNode: - var n: PNode - new(n) - n = importedNode - n.fOwnerDocument = doc - n.fParentNode = nil - return n - else: - raise newException(ENotSupportedErr, "The type of node being imported is not supported") - - -# Node -# Attributes - -proc firstChild*(n: PNode): PNode = - ## Returns this node's first child - - if not isNil(n.childNodes) and n.childNodes.len() > 0: - return n.childNodes[0] - else: - return nil - -proc lastChild*(n: PNode): PNode = - ## Returns this node's last child - - if not isNil(n.childNodes) and n.childNodes.len() > 0: - return n.childNodes[n.childNodes.len() - 1] - else: - return nil - -proc localName*(n: PNode): string = - ## Returns this nodes local name - - return n.fLocalName - -proc namespaceURI*(n: PNode): string = - ## Returns this nodes namespace URI - - return n.fNamespaceURI - -proc `namespaceURI=`*(n: PNode, value: string) = - n.fNamespaceURI = value - -proc nextSibling*(n: PNode): PNode = - ## Returns the next sibling of this node - - if isNil(n.fParentNode) or isNil(n.fParentNode.childNodes): - return nil - var nLow: int = low(n.fParentNode.childNodes) - var nHigh: int = high(n.fParentNode.childNodes) - for i in nLow..nHigh: - if n.fParentNode.childNodes[i] == n: - return n.fParentNode.childNodes[i + 1] - return nil - -proc nodeName*(n: PNode): string = - ## Returns the name of this node - - return n.fNodeName - -proc nodeType*(n: PNode): int = - ## Returns the type of this node - - return n.fNodeType - -proc ownerDocument*(n: PNode): PDocument = - ## Returns the owner document of this node - - return n.fOwnerDocument - -proc parentNode*(n: PNode): PNode = - ## Returns the parent node of this node - - return n.fParentNode - -proc previousSibling*(n: PNode): PNode = - ## Returns the previous sibling of this node - - if isNil(n.fParentNode) or isNil(n.fParentNode.childNodes): - return nil - var nLow: int = low(n.fParentNode.childNodes) - var nHigh: int = high(n.fParentNode.childNodes) - for i in nLow..nHigh: - if n.fParentNode.childNodes[i] == n: - return n.fParentNode.childNodes[i - 1] - return nil - -proc `prefix=`*(n: PNode, value: string) = - ## Modifies the prefix of this node - - # Setter - # Check if name contains illegal characters - if illegalChars in value: - raise newException(EInvalidCharacterErr, "Invalid character") - - if isNil(n.fNamespaceURI): - raise newException(ENamespaceErr, "namespaceURI cannot be nil") - elif value.toLower() == "xml" and n.fNamespaceURI != "http://www.w3.org/XML/1998/namespace": - raise newException(ENamespaceErr, - "When the namespace prefix is \"xml\" namespaceURI has to be \"http://www.w3.org/XML/1998/namespace\"") - elif value.toLower() == "xmlns" and n.fNamespaceURI != "http://www.w3.org/2000/xmlns/": - raise newException(ENamespaceErr, - "When the namespace prefix is \"xmlns\" namespaceURI has to be \"http://www.w3.org/2000/xmlns/\"") - elif value.toLower() == "xmlns" and n.fNodeType == AttributeNode: - raise newException(ENamespaceErr, "An AttributeNode cannot have a prefix of \"xmlns\"") - - n.fNodeName = value & ":" & n.fLocalName - if n.nodeType == ElementNode: - var el: PElement = PElement(n) - el.fTagName = value & ":" & n.fLocalName - - elif n.nodeType == AttributeNode: - var attr: PAttr = PAttr(n) - attr.fName = value & ":" & n.fLocalName - -# Procedures -proc appendChild*(n: PNode, newChild: PNode) = - ## Adds the node newChild to the end of the list of children of this node. - ## If the newChild is already in the tree, it is first removed. - - # Check if n contains newChild - if not isNil(n.childNodes): - for i in low(n.childNodes)..high(n.childNodes): - if n.childNodes[i] == newChild: - raise newException(EHierarchyRequestErr, "The node to append is already in this nodes children.") - - # Check if newChild is from this nodes document - if n.fOwnerDocument != newChild.fOwnerDocument: - raise newException(EWrongDocumentErr, "This node belongs to a different document, use importNode.") - - if n == newChild: - raise newException(EHierarchyRequestErr, "You can't add a node into itself") - - if n.nodeType in childlessObjects: - raise newException(ENoModificationAllowedErr, "Cannot append children to a childless node") - - if isNil(n.childNodes): n.childNodes = @[] - - newChild.fParentNode = n - for i in low(n.childNodes)..high(n.childNodes): - if n.childNodes[i] == newChild: - n.childNodes[i] = newChild - - n.childNodes.add(newChild) - -proc cloneNode*(n: PNode, deep: bool): PNode = - ## Returns a duplicate of this node, if ``deep`` is `true`, Element node's children are copied - case n.fNodeType - of AttributeNode: - var newNode: PAttr - new(newNode) - newNode = PAttr(n) - newNode.fSpecified = true - newNode.fOwnerElement = nil - return newNode - of ElementNode: - var newNode: PElement - new(newNode) - newNode = PElement(n) - # Import the childNodes - var tmp: seq[PNode] = n.childNodes - n.childNodes = @[] - if deep and not isNil(tmp): - for i in low(tmp.len())..high(tmp.len()): - n.childNodes.add(cloneNode(tmp[i], deep)) - return newNode - else: - var newNode: PNode - new(newNode) - newNode = n - return newNode - -proc hasAttributes*(n: PNode): bool = - ## Returns whether this node (if it is an element) has any attributes. - return not isNil(n.attributes) and n.attributes.len() > 0 - -proc hasChildNodes*(n: PNode): bool = - ## Returns whether this node has any children. - return not isNil(n.childNodes) and n.childNodes.len() > 0 - -proc insertBefore*(n: PNode, newChild: PNode, refChild: PNode): PNode = - ## Inserts the node ``newChild`` before the existing child node ``refChild``. - ## If ``refChild`` is nil, insert ``newChild`` at the end of the list of children. - - # Check if newChild is from this nodes document - if n.fOwnerDocument != newChild.fOwnerDocument: - raise newException(EWrongDocumentErr, "This node belongs to a different document, use importNode.") - - if isNil(n.childNodes): - n.childNodes = @[] - - for i in low(n.childNodes)..high(n.childNodes): - if n.childNodes[i] == refChild: - n.childNodes.insert(newChild, i - 1) - return - - n.childNodes.add(newChild) - -proc isSupported*(n: PNode, feature: string, version: string): bool = - ## Tests whether the DOM implementation implements a specific - ## feature and that feature is supported by this node. - return n.fOwnerDocument.fImplementation.hasFeature(feature, version) - -proc isEmpty(s: string): bool = - - if isNil(s) or s == "": - return true - for i in items(s): - if i != ' ': - return false - return true - -proc normalize*(n: PNode) = - ## Merges all separated TextNodes together, and removes any empty TextNodes - var curTextNode: PNode = nil - var i: int = 0 - - var newChildNodes: seq[PNode] = @[] - while true: - if isNil(n.childNodes) or i >= n.childNodes.len: - break - if n.childNodes[i].nodeType == TextNode: - - #If the TextNode is empty, remove it - if PText(n.childNodes[i]).data.isEmpty(): - inc(i) - - if isNil(curTextNode): - curTextNode = n.childNodes[i] - else: - PText(curTextNode).data.add(PText(n.childNodes[i]).data) - curTextNode.nodeValue.add(PText(n.childNodes[i]).data) - inc(i) - else: - newChildNodes.add(curTextNode) - newChildNodes.add(n.childNodes[i]) - curTextNode = nil - - inc(i) - n.childNodes = newChildNodes - -proc removeChild*(n: PNode, oldChild: PNode): PNode = - ## Removes the child node indicated by ``oldChild`` from the list of children, and returns it. - if not isNil(n.childNodes): - for i in low(n.childNodes)..high(n.childNodes): - if n.childNodes[i] == oldChild: - result = n.childNodes[i] - n.childNodes.delete(i) - return - - raise newException(ENotFoundErr, "Node not found") - -proc replaceChild*(n: PNode, newChild: PNode, oldChild: PNode): PNode = - ## Replaces the child node ``oldChild`` with ``newChild`` in the list of children, and returns the ``oldChild`` node. - - # Check if newChild is from this nodes document - if n.fOwnerDocument != newChild.fOwnerDocument: - raise newException(EWrongDocumentErr, "This node belongs to a different document, use importNode.") - - if not isNil(n.childNodes): - for i in low(n.childNodes)..high(n.childNodes): - if n.childNodes[i] == oldChild: - result = n.childNodes[i] - n.childNodes[i] = newChild - return - - raise newException(ENotFoundErr, "Node not found") - -# NamedNodeMap - -proc getNamedItem*(nList: seq[PNode], name: string): PNode = - ## Retrieves a node specified by ``name``. If this node cannot be found returns ``nil`` - for i in items(nList): - if i.nodeName() == name: - return i - return nil - -proc getNamedItem*(nList: seq[PAttr], name: string): PAttr = - ## Retrieves a node specified by ``name``. If this node cannot be found returns ``nil`` - for i in items(nList): - if i.nodeName() == name: - return i - return nil - -proc getNamedItemNS*(nList: seq[PNode], namespaceURI: string, localName: string): PNode = - ## Retrieves a node specified by ``localName`` and ``namespaceURI``. If this node cannot be found returns ``nil`` - for i in items(nList): - if i.namespaceURI() == namespaceURI and i.localName() == localName: - return i - return nil - -proc getNamedItemNS*(nList: seq[PAttr], namespaceURI: string, localName: string): PAttr = - ## Retrieves a node specified by ``localName`` and ``namespaceURI``. If this node cannot be found returns ``nil`` - for i in items(nList): - if i.namespaceURI() == namespaceURI and i.localName() == localName: - return i - return nil - -proc item*(nList: seq[PNode], index: int): PNode = - ## Returns the ``index`` th item in the map. - ## If ``index`` is greater than or equal to the number of nodes in this map, this returns ``nil``. - if index >= nList.len(): return nil - else: return nList[index] - -proc removeNamedItem*(nList: var seq[PNode], name: string): PNode = - ## Removes a node specified by ``name`` - ## Raises the ``ENotFoundErr`` exception, if the node was not found - for i in low(nList)..high(nList): - if nList[i].fNodeName == name: - result = nList[i] - nList.delete(i) - return - - raise newException(ENotFoundErr, "Node not found") - -proc removeNamedItemNS*(nList: var seq[PNode], namespaceURI: string, localName: string): PNode = - ## Removes a node specified by local name and namespace URI - for i in low(nList)..high(nList): - if nList[i].fLocalName == localName and nList[i].fNamespaceURI == namespaceURI: - result = nList[i] - nList.delete(i) - return - - raise newException(ENotFoundErr, "Node not found") - -proc setNamedItem*(nList: var seq[PNode], arg: PNode): PNode = - ## Adds ``arg`` as a ``Node`` to the ``NList`` - ## If a node with the same name is already present in this map, it is replaced by the new one. - if not isNil(nList): - if nList.len() > 0: - #Check if newChild is from this nodes document - if nList[0].fOwnerDocument != arg.fOwnerDocument: - raise newException(EWrongDocumentErr, "This node belongs to a different document, use importNode.") - #Exceptions End - - var item: PNode = nList.getNamedItem(arg.nodeName()) - if isNil(item): - nList.add(arg) - return nil - else: - # Node with the same name exists - var index: int = 0 - for i in low(nList)..high(nList): - if nList[i] == item: - index = i - break - nList[index] = arg - return item # Return the replaced node - -proc setNamedItem*(nList: var seq[PAttr], arg: PAttr): PAttr = - ## Adds ``arg`` as a ``Node`` to the ``NList`` - ## If a node with the same name is already present in this map, it is replaced by the new one. - if not isNil(nList): - if nList.len() > 0: - # Check if newChild is from this nodes document - if nList[0].fOwnerDocument != arg.fOwnerDocument: - raise newException(EWrongDocumentErr, "This node belongs to a different document, use importNode.") - - if not isNil(arg.fOwnerElement): - raise newException(EInuseAttributeErr, "This attribute is in use by another element, use cloneNode") - - # Exceptions end - var item: PAttr = nList.getNamedItem(arg.nodeName()) - if isNil(item): - nList.add(arg) - return nil - else: - # Node with the same name exists - var index: int = 0 - for i in low(nList)..high(nList): - if nList[i] == item: - index = i - break - nList[index] = arg - return item # Return the replaced node - -proc setNamedItemNS*(nList: var seq[PNode], arg: PNode): PNode = - ## Adds a node using its ``namespaceURI`` and ``localName`` - if not isNil(nList): - if nList.len() > 0: - # Check if newChild is from this nodes document - if nList[0].fOwnerDocument != arg.fOwnerDocument: - raise newException(EWrongDocumentErr, "This node belongs to a different document, use importNode.") - #Exceptions end - - var item: PNode = nList.getNamedItemNS(arg.namespaceURI(), arg.localName()) - if isNil(item): - nList.add(arg) - return nil - else: - # Node with the same name exists - var index: int = 0 - for i in low(nList)..high(nList): - if nList[i] == item: - index = i - break - nList[index] = arg - return item # Return the replaced node - -proc setNamedItemNS*(nList: var seq[PAttr], arg: PAttr): PAttr = - ## Adds a node using its ``namespaceURI`` and ``localName`` - if not isNil(nList): - if nList.len() > 0: - # Check if newChild is from this nodes document - if nList[0].fOwnerDocument != arg.fOwnerDocument: - raise newException(EWrongDocumentErr, "This node belongs to a different document, use importNode.") - - if not isNil(arg.fOwnerElement): - raise newException(EInuseAttributeErr, "This attribute is in use by another element, use cloneNode") - - # Exceptions end - var item: PAttr = nList.getNamedItemNS(arg.namespaceURI(), arg.localName()) - if isNil(item): - nList.add(arg) - return nil - else: - # Node with the same name exists - var index: int = 0 - for i in low(nList)..high(nList): - if nList[i] == item: - index = i - break - nList[index] = arg - return item # Return the replaced node - -# CharacterData - Decided to implement this, -# Didn't add the procedures, because you can just edit .data - -# Attr -# Attributes -proc name*(a: PAttr): string = - ## Returns the name of the Attribute - - return a.fName - -proc specified*(a: PAttr): bool = - ## Specifies whether this attribute was specified in the original document - - return a.fSpecified - -proc ownerElement*(a: PAttr): PElement = - ## Returns this Attributes owner element - - return a.fOwnerElement - -# Element -# Attributes - -proc tagName*(el: PElement): string = - ## Returns the Element Tag Name - - return el.fTagName - -# Procedures -proc getAttribute*(el: PElement, name: string): string = - ## Retrieves an attribute value by ``name`` - if isNil(el.attributes): - return nil - var attribute = el.attributes.getNamedItem(name) - if not isNil(attribute): - return attribute.value - else: - return nil - -proc getAttributeNS*(el: PElement, namespaceURI: string, localName: string): string = - ## Retrieves an attribute value by ``localName`` and ``namespaceURI`` - if isNil(el.attributes): - return nil - var attribute = el.attributes.getNamedItemNS(namespaceURI, localName) - if not isNil(attribute): - return attribute.value - else: - return nil - -proc getAttributeNode*(el: PElement, name: string): PAttr = - ## Retrieves an attribute node by ``name`` - ## To retrieve an attribute node by qualified name and namespace URI, use the `getAttributeNodeNS` method - if isNil(el.attributes): - return nil - return el.attributes.getNamedItem(name) - -proc getAttributeNodeNS*(el: PElement, namespaceURI: string, localName: string): PAttr = - ## Retrieves an `Attr` node by ``localName`` and ``namespaceURI`` - if isNil(el.attributes): - return nil - return el.attributes.getNamedItemNS(namespaceURI, localName) - -proc getElementsByTagName*(el: PElement, name: string): seq[PNode] = - ## Returns a `NodeList` of all descendant `Elements` of ``el`` with a given tag ``name``, - ## in the order in which they are encountered in a preorder traversal of this `Element` tree - ## If ``name`` is `*`, returns all descendant of ``el`` - result = el.findNodes(name) - -proc getElementsByTagNameNS*(el: PElement, namespaceURI: string, localName: string): seq[PNode] = - ## Returns a `NodeList` of all the descendant Elements with a given - ## ``localName`` and ``namespaceURI`` in the order in which they are - ## encountered in a preorder traversal of this Element tree - result = el.findNodesNS(namespaceURI, localName) - -proc hasAttribute*(el: PElement, name: string): bool = - ## Returns ``true`` when an attribute with a given ``name`` is specified - ## on this element , ``false`` otherwise. - if isNil(el.attributes): - return false - return not isNil(el.attributes.getNamedItem(name)) - -proc hasAttributeNS*(el: PElement, namespaceURI: string, localName: string): bool = - ## Returns ``true`` when an attribute with a given ``localName`` and - ## ``namespaceURI`` is specified on this element , ``false`` otherwise - if isNil(el.attributes): - return false - return not isNil(el.attributes.getNamedItemNS(namespaceURI, localName)) - -proc removeAttribute*(el: PElement, name: string) = - ## Removes an attribute by ``name`` - if not isNil(el.attributes): - for i in low(el.attributes)..high(el.attributes): - if el.attributes[i].fName == name: - el.attributes.delete(i) - -proc removeAttributeNS*(el: PElement, namespaceURI: string, localName: string) = - ## Removes an attribute by ``localName`` and ``namespaceURI`` - if not isNil(el.attributes): - for i in low(el.attributes)..high(el.attributes): - if el.attributes[i].fNamespaceURI == namespaceURI and - el.attributes[i].fLocalName == localName: - el.attributes.delete(i) - -proc removeAttributeNode*(el: PElement, oldAttr: PAttr): PAttr = - ## Removes the specified attribute node - ## If the attribute node cannot be found raises ``ENotFoundErr`` - if not isNil(el.attributes): - for i in low(el.attributes)..high(el.attributes): - if el.attributes[i] == oldAttr: - result = el.attributes[i] - el.attributes.delete(i) - return - - raise newException(ENotFoundErr, "oldAttr is not a member of el's Attributes") - -proc setAttributeNode*(el: PElement, newAttr: PAttr): PAttr = - ## Adds a new attribute node, if an attribute with the same `nodeName` is - ## present, it is replaced by the new one and the replaced attribute is - ## returned, otherwise ``nil`` is returned. - - # Check if newAttr is from this nodes document - if el.fOwnerDocument != newAttr.fOwnerDocument: - raise newException(EWrongDocumentErr, - "This node belongs to a different document, use importNode.") - - if not isNil(newAttr.fOwnerElement): - raise newException(EInuseAttributeErr, - "This attribute is in use by another element, use cloneNode") - # Exceptions end - - if isNil(el.attributes): el.attributes = @[] - return el.attributes.setNamedItem(newAttr) - -proc setAttributeNodeNS*(el: PElement, newAttr: PAttr): PAttr = - ## Adds a new attribute node, if an attribute with the localName and - ## namespaceURI of ``newAttr`` is present, it is replaced by the new one - ## and the replaced attribute is returned, otherwise ``nil`` is returned. - - # Check if newAttr is from this nodes document - if el.fOwnerDocument != newAttr.fOwnerDocument: - raise newException(EWrongDocumentErr, - "This node belongs to a different document, use importNode.") - - if not isNil(newAttr.fOwnerElement): - raise newException(EInuseAttributeErr, - "This attribute is in use by another element, use cloneNode") - # Exceptions end - - if isNil(el.attributes): el.attributes = @[] - return el.attributes.setNamedItemNS(newAttr) - -proc setAttribute*(el: PElement, name: string, value: string) = - ## Adds a new attribute, as specified by ``name`` and ``value`` - ## If an attribute with that name is already present in the element, its - ## value is changed to be that of the value parameter - ## Raises the EInvalidCharacterErr if the specified ``name`` contains - ## illegal characters - var attrNode = el.fOwnerDocument.createAttribute(name) - # Check if name contains illegal characters - if illegalChars in name: - raise newException(EInvalidCharacterErr, "Invalid character") - - discard el.setAttributeNode(attrNode) - # Set the info later, the setAttributeNode checks - # if FOwnerElement is nil, and if it isn't it raises an exception - attrNode.fOwnerElement = el - attrNode.fSpecified = true - attrNode.value = value - -proc setAttributeNS*(el: PElement, namespaceURI, localName, value: string) = - ## Adds a new attribute, as specified by ``namespaceURI``, ``localName`` - ## and ``value``. - - # Check if name contains illegal characters - if illegalChars in namespaceURI or illegalChars in localName: - raise newException(EInvalidCharacterErr, "Invalid character") - - var attrNode = el.fOwnerDocument.createAttributeNS(namespaceURI, localName) - - discard el.setAttributeNodeNS(attrNode) - # Set the info later, the setAttributeNode checks - # if FOwnerElement is nil, and if it isn't it raises an exception - attrNode.fOwnerElement = el - attrNode.fSpecified = true - attrNode.value = value - -# Text -proc splitData*(textNode: PText, offset: int): PText = - ## Breaks this node into two nodes at the specified offset, - ## keeping both in the tree as siblings. - - if offset > textNode.data.len(): - raise newException(EIndexSizeErr, "Index out of bounds") - - var left: string = textNode.data.substr(0, offset) - textNode.data = left - var right: string = textNode.data.substr(offset, textNode.data.len()) - - if not isNil(textNode.fParentNode) and not isNil(textNode.fParentNode.childNodes): - for i in low(textNode.fParentNode.childNodes)..high(textNode.fParentNode.childNodes): - if textNode.fParentNode.childNodes[i] == textNode: - var newNode: PText = textNode.fOwnerDocument.createTextNode(right) - textNode.fParentNode.childNodes.insert(newNode, i) - return newNode - else: - var newNode: PText = textNode.fOwnerDocument.createTextNode(right) - return newNode - - -# ProcessingInstruction -proc target*(pi: PProcessingInstruction): string = - ## Returns the Processing Instructions target - - return pi.fTarget - - -# --Other stuff-- -# Writer -proc addEscaped(s: string): string = - result = "" - for c in items(s): - case c - of '<': result.add("<") - of '>': result.add(">") - of '&': result.add("&") - of '"': result.add(""") - else: result.add(c) - -proc nodeToXml(n: PNode, indent: int = 0): string = - result = spaces(indent) & "<" & n.nodeName - if not isNil(n.attributes): - for i in items(n.attributes): - result.add(" " & i.name & "=\"" & addEscaped(i.value) & "\"") - - if isNil(n.childNodes) or n.childNodes.len() == 0: - result.add("/>") # No idea why this doesn't need a \n :O - else: - # End the beginning of this tag - result.add(">\n") - for i in items(n.childNodes): - case i.nodeType - of ElementNode: - result.add(nodeToXml(i, indent + 2)) - of TextNode: - result.add(spaces(indent * 2)) - result.add(addEscaped(i.nodeValue)) - of CDataSectionNode: - result.add(spaces(indent * 2)) - result.add("<![CDATA[" & i.nodeValue & "]]>") - of ProcessingInstructionNode: - result.add(spaces(indent * 2)) - result.add("<?" & PProcessingInstruction(i).target & " " & - PProcessingInstruction(i).data & " ?>") - of CommentNode: - result.add(spaces(indent * 2)) - result.add("<!-- " & i.nodeValue & " -->") - else: - continue - result.add("\n") - # Add the ending tag - </tag> - result.add(spaces(indent) & "</" & n.nodeName & ">") - -proc `$`*(doc: PDocument): string = - ## Converts a PDocument object into a string representation of it's XML - result = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n" - result.add(nodeToXml(doc.documentElement)) diff --git a/lib/pure/xmldomparser.nim b/lib/pure/xmldomparser.nim deleted file mode 100644 index 7f34d72a8..000000000 --- a/lib/pure/xmldomparser.nim +++ /dev/null @@ -1,168 +0,0 @@ -# -# -# Nim's Runtime Library -# (c) Copyright 2010 Dominik Picheta -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -import xmldom, os, streams, parsexml, strutils - -## This module parses a XML Document into a XML DOM Document representation. - -#XMLDom's Parser - Turns XML into a Document - -type - # Parsing errors - EMismatchedTag* = object of ValueError ## Raised when a tag is not properly closed - EParserError* = object of ValueError ## Raised when an unexpected XML Parser event occurs - - # For namespaces - XmlnsAttr = tuple[name, value: string, ownerElement: PElement] - -var nsList: seq[XmlnsAttr] = @[] # Used for storing namespaces - -proc getNS(prefix: string): string = - var defaultNS: seq[string] = @[] - - for key, value, tag in items(nsList): - if ":" in key: - if key.split(':')[1] == prefix: - return value - - if key == "xmlns": - defaultNS.add(value) - - # Don't return the default namespaces - # in the loop, because then they would have a precedence - # over normal namespaces - if defaultNS.len() > 0: - return defaultNS[0] # Return the first found default namespace - # if none are specified for this prefix - - return "" - -proc parseText(x: var XmlParser, doc: var PDocument): PText = - result = doc.createTextNode(x.charData()) - -proc parseElement(x: var XmlParser, doc: var PDocument): PElement = - var n = doc.createElement("") - - while true: - case x.kind() - of xmlEof: - break - of xmlElementStart, xmlElementOpen: - if n.tagName() != "": - n.appendChild(parseElement(x, doc)) - else: - n = doc.createElementNS("", x.elementName) - - of xmlElementEnd: - if x.elementName == n.nodeName: - # n.normalize() # Remove any whitespace etc. - - var ns: string - if x.elementName.contains(':'): - ns = getNS(x.elementName.split(':')[0]) - else: - ns = getNS("") - - n.namespaceURI = ns - - # Remove any namespaces this element declared - var count = 0 # Variable which keeps the index - # We need to edit it.. - for i in low(nsList)..len(nsList)-1: - if nsList[count][2] == n: - nsList.delete(count) - dec(count) - inc(count) - - return n - else: #The wrong element is ended - raise newException(EMismatchedTag, "Mismatched tag at line " & - $x.getLine() & " column " & $x.getColumn) - - of xmlCharData: - n.appendChild(parseText(x, doc)) - of xmlAttribute: - if x.attrKey == "xmlns" or x.attrKey.startsWith("xmlns:"): - nsList.add((x.attrKey, x.attrValue, n)) - - if x.attrKey.contains(':'): - var ns = getNS(x.attrKey) - n.setAttributeNS(ns, x.attrKey, x.attrValue) - else: - n.setAttribute(x.attrKey, x.attrValue) - - of xmlCData: - n.appendChild(doc.createCDATASection(x.charData())) - of xmlComment: - n.appendChild(doc.createComment(x.charData())) - of xmlPI: - n.appendChild(doc.createProcessingInstruction(x.piName(), x.piRest())) - - of xmlWhitespace, xmlElementClose, xmlEntity, xmlSpecial: - discard " Unused \'events\'" - - else: - raise newException(EParserError, "Unexpected XML Parser event") - x.next() - - raise newException(EMismatchedTag, - "Mismatched tag at line " & $x.getLine() & " column " & $x.getColumn) - -proc loadXMLStream*(stream: Stream): PDocument = - ## Loads and parses XML from a stream specified by ``stream``, and returns - ## a ``PDocument`` - - var x: XmlParser - open(x, stream, nil, {reportComments}) - - var xmlDoc: PDocument - var dom: PDOMImplementation = getDOM() - - while true: - x.next() - case x.kind() - of xmlEof: - break - of xmlElementStart, xmlElementOpen: - var el: PElement = parseElement(x, xmlDoc) - xmlDoc = dom.createDocument(el) - of xmlWhitespace, xmlElementClose, xmlEntity, xmlSpecial: - discard " Unused \'events\'" - else: - raise newException(EParserError, "Unexpected XML Parser event") - - return xmlDoc - -proc loadXML*(xml: string): PDocument = - ## Loads and parses XML from a string specified by ``xml``, and returns - ## a ``PDocument`` - var s = newStringStream(xml) - return loadXMLStream(s) - - -proc loadXMLFile*(path: string): PDocument = - ## Loads and parses XML from a file specified by ``path``, and returns - ## a ``PDocument`` - - var s = newFileStream(path, fmRead) - if s == nil: raise newException(IOError, "Unable to read file " & path) - return loadXMLStream(s) - - -when isMainModule: - var xml = loadXMLFile("nim/xmldom/test.xml") - #echo(xml.getElementsByTagName("m:test2")[0].namespaceURI) - #echo(xml.getElementsByTagName("bla:test")[0].namespaceURI) - #echo(xml.getElementsByTagName("test")[0].namespaceURI) - for i in items(xml.getElementsByTagName("*")): - if i.namespaceURI != nil: - echo(i.nodeName, "=", i.namespaceURI) - - - echo($xml) diff --git a/lib/pure/xmlparser.nim b/lib/pure/xmlparser.nim index 755bfcdbc..2c1e4e37c 100644 --- a/lib/pure/xmlparser.nim +++ b/lib/pure/xmlparser.nim @@ -9,33 +9,34 @@ ## This module parses an XML document and creates its XML tree representation. -import streams, parsexml, strtabs, xmltree +import std/[streams, parsexml, strtabs, xmltree] -type - XmlError* = object of ValueError ## exception that is raised - ## for invalid XML - errors*: seq[string] ## all detected parsing errors +when defined(nimPreviewSlimSystem): + import std/syncio -{.deprecated: [EInvalidXml: XmlError].} +type + XmlError* = object of ValueError ## Exception that is raised + ## for invalid XML. + errors*: seq[string] ## All detected parsing errors. -proc raiseInvalidXml(errors: seq[string]) = +proc raiseInvalidXml(errors: seq[string]) = var e: ref XmlError new(e) e.msg = errors[0] e.errors = errors raise e -proc addNode(father, son: XmlNode) = +proc addNode(father, son: XmlNode) = if son != nil: add(father, son) -proc parse(x: var XmlParser, errors: var seq[string]): XmlNode +proc parse(x: var XmlParser, errors: var seq[string]): XmlNode {.gcsafe.} -proc untilElementEnd(x: var XmlParser, result: XmlNode, +proc untilElementEnd(x: var XmlParser, result: XmlNode, errors: var seq[string]) = while true: case x.kind - of xmlElementEnd: - if x.elementName == result.tag: + of xmlElementEnd: + if x.elementName == result.tag: next(x) else: errors.add(errorMsg(x, "</" & result.tag & "> expected")) @@ -49,7 +50,7 @@ proc untilElementEnd(x: var XmlParser, result: XmlNode, proc parse(x: var XmlParser, errors: var seq[string]): XmlNode = case x.kind - of xmlComment: + of xmlComment: result = newComment(x.charData) next(x) of xmlCharData, xmlWhitespace: @@ -61,17 +62,17 @@ proc parse(x: var XmlParser, errors: var seq[string]): XmlNode = of xmlError: errors.add(errorMsg(x)) next(x) - of xmlElementStart: ## ``<elem>`` + of xmlElementStart: ## ``<elem>`` result = newElement(x.elementName) next(x) untilElementEnd(x, result, errors) of xmlElementEnd: errors.add(errorMsg(x, "unexpected ending tag: " & x.elementName)) - of xmlElementOpen: + of xmlElementOpen: result = newElement(x.elementName) next(x) result.attrs = newStringTable() - while true: + while true: case x.kind of xmlAttribute: result.attrs[x.attrKey] = x.attrValue @@ -91,25 +92,25 @@ proc parse(x: var XmlParser, errors: var seq[string]): XmlNode = of xmlAttribute, xmlElementClose: errors.add(errorMsg(x, "<some_tag> expected")) next(x) - of xmlCData: + of xmlCData: result = newCData(x.charData) next(x) of xmlEntity: ## &entity; - errors.add(errorMsg(x, "unknown entity: " & x.entityName)) + result = newEntity(x.entityName) next(x) of xmlEof: discard -proc parseXml*(s: Stream, filename: string, - errors: var seq[string]): XmlNode = - ## parses the XML from stream `s` and returns a ``PXmlNode``. Every - ## occurred parsing error is added to the `errors` sequence. +proc parseXml*(s: Stream, filename: string, + errors: var seq[string], options: set[XmlParseOption] = {reportComments}): XmlNode = + ## Parses the XML from stream ``s`` and returns a ``XmlNode``. Every + ## occurred parsing error is added to the ``errors`` sequence. var x: XmlParser - open(x, s, filename, {reportComments}) + open(x, s, filename, options) while true: x.next() case x.kind - of xmlElementOpen, xmlElementStart: + of xmlElementOpen, xmlElementStart: result = parse(x, errors) break of xmlComment, xmlWhitespace, xmlSpecial, xmlPI: discard # just skip it @@ -120,40 +121,56 @@ proc parseXml*(s: Stream, filename: string, break close(x) -proc parseXml*(s: Stream): XmlNode = - ## parses the XTML from stream `s` and returns a ``PXmlNode``. All parsing - ## errors are turned into an ``EInvalidXML`` exception. +proc parseXml*(s: Stream, options: set[XmlParseOption] = {reportComments}): XmlNode = + ## Parses the XML from stream ``s`` and returns a ``XmlNode``. All parsing + ## errors are turned into an ``XmlError`` exception. var errors: seq[string] = @[] - result = parseXml(s, "unknown_html_doc", errors) + result = parseXml(s, "unknown_xml_doc", errors, options) if errors.len > 0: raiseInvalidXml(errors) -proc loadXml*(path: string, errors: var seq[string]): XmlNode = - ## Loads and parses XML from file specified by ``path``, and returns - ## a ``PXmlNode``. Every occurred parsing error is added to the `errors` +proc parseXml*(str: string, options: set[XmlParseOption] = {reportComments}): XmlNode = + ## Parses the XML from string ``str`` and returns a ``XmlNode``. All parsing + ## errors are turned into an ``XmlError`` exception. + parseXml(newStringStream(str), options) + +proc loadXml*(path: string, errors: var seq[string], options: set[XmlParseOption] = {reportComments}): XmlNode = + ## Loads and parses XML from file specified by ``path``, and returns + ## a ``XmlNode``. Every occurred parsing error is added to the ``errors`` ## sequence. var s = newFileStream(path, fmRead) if s == nil: raise newException(IOError, "Unable to read file: " & path) - result = parseXml(s, path, errors) + result = parseXml(s, path, errors, options) -proc loadXml*(path: string): XmlNode = - ## Loads and parses XML from file specified by ``path``, and returns - ## a ``PXmlNode``. All parsing errors are turned into an ``EInvalidXML`` - ## exception. +proc loadXml*(path: string, options: set[XmlParseOption] = {reportComments}): XmlNode = + ## Loads and parses XML from file specified by ``path``, and returns + ## a ``XmlNode``. All parsing errors are turned into an ``XmlError`` + ## exception. var errors: seq[string] = @[] - result = loadXml(path, errors) + result = loadXml(path, errors, options) if errors.len > 0: raiseInvalidXml(errors) when isMainModule: - import os - - var errors: seq[string] = @[] - var x = loadXml(paramStr(1), errors) - for e in items(errors): echo e - - var f: File - if open(f, "xmltest.txt", fmWrite): - f.write($x) - f.close() + when not defined(testing): + import std/os + + var errors: seq[string] = @[] + var x = loadXml(paramStr(1), errors) + for e in items(errors): echo e + + var f: File + if open(f, "xmltest.txt", fmWrite): + f.write($x) + f.close() + else: + quit("cannot write test.txt") else: - quit("cannot write test.txt") - + block: # correctly parse ../../tests/testdata/doc1.xml + let filePath = "tests/testdata/doc1.xml" + var errors: seq[string] = @[] + var xml = loadXml(filePath, errors) + assert(errors.len == 0, "The file tests/testdata/doc1.xml should be parsed without errors.") + + block bug1518: + var err: seq[string] = @[] + assert $parsexml(newStringStream"<tag>One & two</tag>", "temp.xml", + err) == "<tag>One & two</tag>" diff --git a/lib/pure/xmltree.nim b/lib/pure/xmltree.nim index 0bf5b52a4..5c0cbc5e4 100644 --- a/lib/pure/xmltree.nim +++ b/lib/pure/xmltree.nim @@ -7,199 +7,760 @@ # distribution, for details about the copyright. # -## A simple XML tree. More efficient and simpler than the DOM. +## A simple XML tree generator. +## +runnableExamples: + var g = newElement("myTag") + g.add newText("some text") + g.add newComment("this is comment") -import macros, strtabs + var h = newElement("secondTag") + h.add newEntity("some entity") -type - XmlNode* = ref XmlNodeObj ## an XML tree consists of ``PXmlNode``'s. + let att = {"key1": "first value", "key2": "second value"}.toXmlAttributes + let k = newXmlTree("treeTag", [g, h], att) + + doAssert $k == """<treeTag key1="first value" key2="second value"> + <myTag>some text<!-- this is comment --></myTag> + <secondTag>&some entity;</secondTag> +</treeTag>""" + +## **See also:** +## * `xmlparser module <xmlparser.html>`_ for high-level XML parsing +## * `parsexml module <parsexml.html>`_ for low-level XML parsing +## * `htmlgen module <htmlgen.html>`_ for html code generator - XmlNodeKind* = enum ## different kinds of ``PXmlNode``'s - xnText, ## a text element - xnElement, ## an element with 0 or more children - xnCData, ## a CDATA node - xnEntity, ## an entity (like ``&thing;``) - xnComment ## an XML comment +import std/private/since +import std/[macros, strtabs, strutils, sequtils] - XmlAttributes* = StringTableRef ## an alias for a string to string mapping +when defined(nimPreviewSlimSystem): + import std/assertions + + +type + XmlNode* = ref XmlNodeObj ## An XML tree consisting of XML nodes. + ## + ## Use `newXmlTree proc <#newXmlTree,string,openArray[XmlNode],XmlAttributes>`_ + ## for creating a new tree. + + XmlNodeKind* = enum ## Different kinds of XML nodes. + xnText, ## a text element + xnVerbatimText, ## + xnElement, ## an element with 0 or more children + xnCData, ## a CDATA node + xnEntity, ## an entity (like ``&thing;``) + xnComment ## an XML comment + + XmlAttributes* = StringTableRef ## An alias for a string to string mapping. + ## + ## Use `toXmlAttributes proc <#toXmlAttributes,varargs[tuple[string,string]]>`_ + ## to create `XmlAttributes`. XmlNodeObj {.acyclic.} = object case k: XmlNodeKind # private, use the kind() proc to read this field. - of xnText, xnComment, xnCData, xnEntity: + of xnText, xnVerbatimText, xnComment, xnCData, xnEntity: fText: string of xnElement: fTag: string s: seq[XmlNode] fAttr: XmlAttributes - fClientData: int ## for other clients + fClientData: int ## for other clients -{.deprecated: [PXmlNode: XmlNode, TXmlNodeKind: XmlNodeKind, PXmlAttributes: - XmlAttributes, TXmlNode: XmlNodeObj].} +const + xmlHeader* = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n" + ## Header to use for complete XML output. + +template expect(node: XmlNode, kind: set[XmlNodeKind]) = + ## Check the node's kind is within a set of values + assert node.k in kind, "Got " & $node.k + +template expect(node: XmlNode, kind: XmlNodeKind) = + ## Check the node's kind equals a value + assert node.k == kind, "Got " & $node.k proc newXmlNode(kind: XmlNodeKind): XmlNode = - ## creates a new ``XmlNode``. - new(result) - result.k = kind + ## Creates a new ``XmlNode``. + result = XmlNode(k: kind) + +proc newElement*(tag: sink string): XmlNode = + ## Creates a new ``XmlNode`` of kind ``xnElement`` with the given `tag`. + ## + ## See also: + ## * `newXmlTree proc <#newXmlTree,string,openArray[XmlNode],XmlAttributes>`_ + ## * [<> macro](#<>.m,untyped) + runnableExamples: + var a = newElement("firstTag") + a.add newElement("childTag") + assert a.kind == xnElement + assert $a == """<firstTag> + <childTag /> +</firstTag>""" -proc newElement*(tag: string): XmlNode = - ## creates a new ``PXmlNode`` of kind ``xnText`` with the given `tag`. result = newXmlNode(xnElement) result.fTag = tag result.s = @[] - # init attributes lazily to safe memory + # init attributes lazily to save memory + +proc newText*(text: sink string): XmlNode = + ## Creates a new ``XmlNode`` of kind ``xnText`` with the text `text`. + runnableExamples: + var b = newText("my text") + assert b.kind == xnText + assert $b == "my text" -proc newText*(text: string): XmlNode = - ## creates a new ``PXmlNode`` of kind ``xnText`` with the text `text`. result = newXmlNode(xnText) result.fText = text -proc newComment*(comment: string): XmlNode = - ## creates a new ``PXmlNode`` of kind ``xnComment`` with the text `comment`. +proc newVerbatimText*(text: sink string): XmlNode {.since: (1, 3).} = + ## Creates a new ``XmlNode`` of kind ``xnVerbatimText`` with the text `text`. + ## **Since**: Version 1.3. + result = newXmlNode(xnVerbatimText) + result.fText = text + +proc newComment*(comment: sink string): XmlNode = + ## Creates a new ``XmlNode`` of kind ``xnComment`` with the text `comment`. + runnableExamples: + var c = newComment("my comment") + assert c.kind == xnComment + assert $c == "<!-- my comment -->" + result = newXmlNode(xnComment) result.fText = comment -proc newCData*(cdata: string): XmlNode = - ## creates a new ``PXmlNode`` of kind ``xnComment`` with the text `cdata`. +proc newCData*(cdata: sink string): XmlNode = + ## Creates a new ``XmlNode`` of kind ``xnCData`` with the text `cdata`. + runnableExamples: + var d = newCData("my cdata") + assert d.kind == xnCData + assert $d == "<![CDATA[my cdata]]>" + result = newXmlNode(xnCData) result.fText = cdata proc newEntity*(entity: string): XmlNode = - ## creates a new ``PXmlNode`` of kind ``xnEntity`` with the text `entity`. - result = newXmlNode(xnCData) + ## Creates a new ``XmlNode`` of kind ``xnEntity`` with the text `entity`. + runnableExamples: + var e = newEntity("my entity") + assert e.kind == xnEntity + assert $e == "&my entity;" + + result = newXmlNode(xnEntity) result.fText = entity -proc text*(n: XmlNode): string {.inline.} = - ## gets the associated text with the node `n`. `n` can be a CDATA, Text, - ## comment, or entity node. - assert n.k in {xnText, xnComment, xnCData, xnEntity} +proc newXmlTree*(tag: sink string, children: openArray[XmlNode], + attributes: XmlAttributes = nil): XmlNode = + ## Creates a new XML tree with `tag`, `children` and `attributes`. + ## + ## See also: + ## * `newElement proc <#newElement,string>`_ + ## * [<> macro](#<>.m,untyped) + + runnableExamples: + var g = newElement("myTag") + g.add newText("some text") + g.add newComment("this is comment") + var h = newElement("secondTag") + h.add newEntity("some entity") + let att = {"key1": "first value", "key2": "second value"}.toXmlAttributes + let k = newXmlTree("treeTag", [g, h], att) + + doAssert $k == """<treeTag key1="first value" key2="second value"> + <myTag>some text<!-- this is comment --></myTag> + <secondTag>&some entity;</secondTag> +</treeTag>""" + + result = newXmlNode(xnElement) + result.fTag = tag + newSeq(result.s, children.len) + for i in 0..children.len-1: result.s[i] = children[i] + result.fAttr = attributes + +proc text*(n: XmlNode): lent string {.inline.} = + ## Gets the associated text with the node `n`. + ## + ## `n` can be a CDATA, Text, comment, or entity node. + ## + ## See also: + ## * `text= proc <#text=,XmlNode,string>`_ for text setter + ## * `tag proc <#tag,XmlNode>`_ for tag getter + ## * `tag= proc <#tag=,XmlNode,string>`_ for tag setter + ## * `innerText proc <#innerText,XmlNode>`_ + runnableExamples: + var c = newComment("my comment") + assert $c == "<!-- my comment -->" + assert c.text == "my comment" + + n.expect {xnText, xnVerbatimText, xnComment, xnCData, xnEntity} result = n.fText +proc `text=`*(n: XmlNode, text: sink string) {.inline.} = + ## Sets the associated text with the node `n`. + ## + ## `n` can be a CDATA, Text, comment, or entity node. + ## + ## See also: + ## * `text proc <#text,XmlNode>`_ for text getter + ## * `tag proc <#tag,XmlNode>`_ for tag getter + ## * `tag= proc <#tag=,XmlNode,string>`_ for tag setter + runnableExamples: + var e = newEntity("my entity") + assert $e == "&my entity;" + e.text = "a new entity text" + assert $e == "&a new entity text;" + + n.expect {xnText, xnVerbatimText, xnComment, xnCData, xnEntity} + n.fText = text + +proc tag*(n: XmlNode): lent string {.inline.} = + ## Gets the tag name of `n`. + ## + ## `n` has to be an ``xnElement`` node. + ## + ## See also: + ## * `text proc <#text,XmlNode>`_ for text getter + ## * `text= proc <#text=,XmlNode,string>`_ for text setter + ## * `tag= proc <#tag=,XmlNode,string>`_ for tag setter + ## * `innerText proc <#innerText,XmlNode>`_ + runnableExamples: + var a = newElement("firstTag") + a.add newElement("childTag") + assert $a == """<firstTag> + <childTag /> +</firstTag>""" + assert a.tag == "firstTag" + + n.expect xnElement + result = n.fTag + +proc `tag=`*(n: XmlNode, tag: sink string) {.inline.} = + ## Sets the tag name of `n`. + ## + ## `n` has to be an ``xnElement`` node. + ## + ## See also: + ## * `text proc <#text,XmlNode>`_ for text getter + ## * `text= proc <#text=,XmlNode,string>`_ for text setter + ## * `tag proc <#tag,XmlNode>`_ for tag getter + runnableExamples: + var a = newElement("firstTag") + a.add newElement("childTag") + assert $a == """<firstTag> + <childTag /> +</firstTag>""" + a.tag = "newTag" + assert $a == """<newTag> + <childTag /> +</newTag>""" + + n.expect xnElement + n.fTag = tag + proc rawText*(n: XmlNode): string {.inline.} = - ## returns the underlying 'text' string by reference. + ## Returns the underlying 'text' string by reference. + ## ## This is only used for speed hacks. - shallowCopy(result, n.fText) + when defined(gcDestructors): + result = move(n.fText) + else: + shallowCopy(result, n.fText) proc rawTag*(n: XmlNode): string {.inline.} = - ## returns the underlying 'tag' string by reference. + ## Returns the underlying 'tag' string by reference. + ## ## This is only used for speed hacks. - shallowCopy(result, n.fTag) + when defined(gcDestructors): + result = move(n.fTag) + else: + shallowCopy(result, n.fTag) proc innerText*(n: XmlNode): string = - ## gets the inner text of `n`. `n` has to be an ``xnElement`` node. Only - ## ``xnText`` and ``xnEntity`` nodes are considered part of `n`'s inner text, - ## other child nodes are silently ignored. - result = "" - assert n.k == xnElement - for i in 0 .. n.s.len-1: - if n.s[i].k in {xnText, xnEntity}: result.add(n.s[i].fText) + ## Gets the inner text of `n`: + ## + ## - If `n` is `xnText` or `xnEntity`, returns its content. + ## - If `n` is `xnElement`, runs recursively on each child node and + ## concatenates the results. + ## - Otherwise returns an empty string. + ## + ## See also: + ## * `text proc <#text,XmlNode>`_ + runnableExamples: + var f = newElement("myTag") + f.add newText("my text") + f.add newComment("my comment") + f.add newEntity("my entity") + assert $f == "<myTag>my text<!-- my comment -->&my entity;</myTag>" + assert innerText(f) == "my textmy entity" + + proc worker(res: var string, n: XmlNode) = + case n.k + of xnText, xnEntity: + res.add(n.fText) + of xnElement: + for sub in n.s: + worker(res, sub) + else: + discard -proc tag*(n: XmlNode): string {.inline.} = - ## gets the tag name of `n`. `n` has to be an ``xnElement`` node. - assert n.k == xnElement - result = n.fTag + result = "" + worker(result, n) proc add*(father, son: XmlNode) {.inline.} = - ## adds the child `son` to `father`. + ## Adds the child `son` to `father`. + ## `father` must be of `xnElement` type + ## + ## See also: + ## * `add proc <#add,XmlNode,openArray[XmlNode]>`_ + ## * `insert proc <#insert,XmlNode,XmlNode,int>`_ + ## * `insert proc <#insert,XmlNode,openArray[XmlNode],int>`_ + ## * `delete proc <#delete,XmlNode,Natural>`_ + ## * `delete proc <#delete.XmlNode,Slice[int]>`_ + ## * `replace proc <#replace.XmlNode,int,openArray[XmlNode]>`_ + ## * `replace proc <#replace.XmlNode,Slice[int],openArray[XmlNode]>`_ + runnableExamples: + var f = newElement("myTag") + f.add newText("my text") + f.add newElement("sonTag") + f.add newEntity("my entity") + assert $f == "<myTag>my text<sonTag />&my entity;</myTag>" + + father.expect xnElement add(father.s, son) +proc add*(father: XmlNode, sons: openArray[XmlNode]) {.inline.} = + ## Adds the children `sons` to `father`. + ## `father` must be of `xnElement` type + ## + ## See also: + ## * `add proc <#add,XmlNode,XmlNode>`_ + ## * `insert proc <#insert,XmlNode,XmlNode,int>`_ + ## * `insert proc <#insert,XmlNode,openArray[XmlNode],int>`_ + ## * `delete proc <#delete,XmlNode,Natural>`_ + ## * `delete proc <#delete.XmlNode,Slice[int]>`_ + ## * `replace proc <#replace.XmlNode,int,openArray[XmlNode]>`_ + ## * `replace proc <#replace.XmlNode,Slice[int],openArray[XmlNode]>`_ + runnableExamples: + var f = newElement("myTag") + f.add(@[newText("my text"), newElement("sonTag"), newEntity("my entity")]) + assert $f == "<myTag>my text<sonTag />&my entity;</myTag>" + + father.expect xnElement + add(father.s, sons) + + +proc insert*(father, son: XmlNode, index: int) {.inline.} = + ## Inserts the child `son` to a given position in `father`. + ## + ## `father` must be of `xnElement` kind. + ## + ## See also: + ## * `insert proc <#insert,XmlNode,openArray[XmlNode],int>`_ + ## * `add proc <#add,XmlNode,XmlNode>`_ + ## * `add proc <#add,XmlNode,openArray[XmlNode]>`_ + ## * `delete proc <#delete,XmlNode,Natural>`_ + ## * `delete proc <#delete.XmlNode,Slice[int]>`_ + ## * `replace proc <#replace.XmlNode,int,openArray[XmlNode]>`_ + ## * `replace proc <#replace.XmlNode,Slice[int],openArray[XmlNode]>`_ + runnableExamples: + var f = newElement("myTag") + f.add newElement("first") + f.insert(newElement("second"), 0) + assert $f == """<myTag> + <second /> + <first /> +</myTag>""" + + father.expect xnElement + if len(father.s) > index: + insert(father.s, son, index) + else: + insert(father.s, son, len(father.s)) + +proc insert*(father: XmlNode, sons: openArray[XmlNode], index: int) {.inline.} = + ## Inserts the children openArray[`sons`] to a given position in `father`. + ## + ## `father` must be of `xnElement` kind. + ## + ## See also: + ## * `insert proc <#insert,XmlNode,XmlNode,int>`_ + ## * `add proc <#add,XmlNode,XmlNode>`_ + ## * `add proc <#add,XmlNode,openArray[XmlNode]>`_ + ## * `delete proc <#delete,XmlNode,Natural>`_ + ## * `delete proc <#delete.XmlNode,Slice[int]>`_ + ## * `replace proc <#replace.XmlNode,int,openArray[XmlNode]>`_ + ## * `replace proc <#replace.XmlNode,Slice[int],openArray[XmlNode]>`_ + runnableExamples: + var f = newElement("myTag") + f.add newElement("first") + f.insert([newElement("second"), newElement("third")], 0) + assert $f == """<myTag> + <second /> + <third /> + <first /> +</myTag>""" + + father.expect xnElement + if len(father.s) > index: + insert(father.s, sons, index) + else: + insert(father.s, sons, len(father.s)) + +proc delete*(n: XmlNode, i: Natural) = + ## Deletes the `i`'th child of `n`. + ## + ## See also: + ## * `delete proc <#delete.XmlNode,Slice[int]>`_ + ## * `add proc <#add,XmlNode,XmlNode>`_ + ## * `add proc <#add,XmlNode,openArray[XmlNode]>`_ + ## * `insert proc <#insert,XmlNode,XmlNode,int>`_ + ## * `insert proc <#insert,XmlNode,openArray[XmlNode],int>`_ + ## * `replace proc <#replace.XmlNode,int,openArray[XmlNode]>`_ + ## * `replace proc <#replace.XmlNode,Slice[int],openArray[XmlNode]>`_ + runnableExamples: + var f = newElement("myTag") + f.add newElement("first") + f.insert(newElement("second"), 0) + f.delete(0) + assert $f == """<myTag> + <first /> +</myTag>""" + + n.expect xnElement + n.s.delete(i) + +proc delete*(n: XmlNode, slice: Slice[int]) = + ## Deletes the items `n[slice]` of `n`. + ## + ## See also: + ## * `delete proc <#delete.XmlNode,int>`_ + ## * `add proc <#add,XmlNode,XmlNode>`_ + ## * `add proc <#add,XmlNode,openArray[XmlNode]>`_ + ## * `insert proc <#insert,XmlNode,XmlNode,int>`_ + ## * `insert proc <#insert,XmlNode,openArray[XmlNode],int>`_ + ## * `replace proc <#replace.XmlNode,int,openArray[XmlNode]>`_ + ## * `replace proc <#replace.XmlNode,Slice[int],openArray[XmlNode]>`_ + runnableExamples: + var f = newElement("myTag") + f.add newElement("first") + f.insert([newElement("second"), newElement("third")], 0) + f.delete(0..1) + assert $f == """<myTag> + <first /> +</myTag>""" + + n.expect xnElement + n.s.delete(slice) + +proc replace*(n: XmlNode, i: Natural, replacement: openArray[XmlNode]) = + ## Replaces the `i`'th child of `n` with `replacement` openArray. + ## + ## `n` must be of `xnElement` kind. + ## + ## See also: + ## * `replace proc <#replace.XmlNode,Slice[int],openArray[XmlNode]>`_ + ## * `add proc <#add,XmlNode,XmlNode>`_ + ## * `add proc <#add,XmlNode,openArray[XmlNode]>`_ + ## * `delete proc <#delete,XmlNode,Natural>`_ + ## * `delete proc <#delete.XmlNode,Slice[int]>`_ + ## * `insert proc <#insert,XmlNode,XmlNode,int>`_ + ## * `insert proc <#insert,XmlNode,openArray[XmlNode],int>`_ + runnableExamples: + var f = newElement("myTag") + f.add newElement("first") + f.insert(newElement("second"), 0) + f.replace(0, @[newElement("third"), newElement("fourth")]) + assert $f == """<myTag> + <third /> + <fourth /> + <first /> +</myTag>""" + + n.expect xnElement + n.s.delete(i) + n.s.insert(replacement, i) + +proc replace*(n: XmlNode, slice: Slice[int], replacement: openArray[XmlNode]) = + ## Deletes the items `n[slice]` of `n`. + ## + ## `n` must be of `xnElement` kind. + ## + ## See also: + ## * `replace proc <#replace.XmlNode,int,openArray[XmlNode]>`_ + ## * `add proc <#add,XmlNode,XmlNode>`_ + ## * `add proc <#add,XmlNode,openArray[XmlNode]>`_ + ## * `delete proc <#delete,XmlNode,Natural>`_ + ## * `delete proc <#delete.XmlNode,Slice[int]>`_ + ## * `insert proc <#insert,XmlNode,XmlNode,int>`_ + ## * `insert proc <#insert,XmlNode,openArray[XmlNode],int>`_ + runnableExamples: + var f = newElement("myTag") + f.add newElement("first") + f.insert([newElement("second"), newElement("fifth")], 0) + f.replace(0..1, @[newElement("third"), newElement("fourth")]) + assert $f == """<myTag> + <third /> + <fourth /> + <first /> +</myTag>""" + + n.expect xnElement + n.s.delete(slice) + n.s.insert(replacement, slice.a) + proc len*(n: XmlNode): int {.inline.} = - ## returns the number `n`'s children. + ## Returns the number of `n`'s children. + runnableExamples: + var f = newElement("myTag") + f.add newElement("first") + f.insert(newElement("second"), 0) + assert len(f) == 2 if n.k == xnElement: result = len(n.s) proc kind*(n: XmlNode): XmlNodeKind {.inline.} = - ## returns `n`'s kind. + ## Returns `n`'s kind. + runnableExamples: + var a = newElement("firstTag") + assert a.kind == xnElement + var b = newText("my text") + assert b.kind == xnText result = n.k -proc `[]`* (n: XmlNode, i: int): XmlNode {.inline.} = - ## returns the `i`'th child of `n`. - assert n.k == xnElement +proc `[]`*(n: XmlNode, i: int): XmlNode {.inline.} = + ## Returns the `i`'th child of `n`. + runnableExamples: + var f = newElement("myTag") + f.add newElement("first") + f.insert(newElement("second"), 0) + assert $f[1] == "<first />" + assert $f[0] == "<second />" + + n.expect xnElement result = n.s[i] -proc mget* (n: var XmlNode, i: int): var XmlNode {.inline.} = - ## returns the `i`'th child of `n` so that it can be modified - assert n.k == xnElement +proc `[]`*(n: var XmlNode, i: int): var XmlNode {.inline.} = + ## Returns the `i`'th child of `n` so that it can be modified. + n.expect xnElement result = n.s[i] +proc clear*(n: var XmlNode) = + ## Recursively clears all children of an XmlNode. + ## + runnableExamples: + var g = newElement("myTag") + g.add newText("some text") + g.add newComment("this is comment") + + var h = newElement("secondTag") + h.add newEntity("some entity") + + let att = {"key1": "first value", "key2": "second value"}.toXmlAttributes + var k = newXmlTree("treeTag", [g, h], att) + + doAssert $k == """<treeTag key1="first value" key2="second value"> + <myTag>some text<!-- this is comment --></myTag> + <secondTag>&some entity;</secondTag> +</treeTag>""" + + clear(k) + doAssert $k == """<treeTag key1="first value" key2="second value" />""" + + for i in 0 ..< n.len: + clear(n[i]) + if n.k == xnElement: + n.s.setLen(0) + + iterator items*(n: XmlNode): XmlNode {.inline.} = - ## iterates over any child of `n`. - assert n.k == xnElement + ## Iterates over all direct children of `n`. + + runnableExamples: + var g = newElement("myTag") + g.add newText("some text") + g.add newComment("this is comment") + + var h = newElement("secondTag") + h.add newEntity("some entity") + g.add h + + assert $g == "<myTag>some text<!-- this is comment --><secondTag>&some entity;</secondTag></myTag>" + + # for x in g: # the same as `for x in items(g):` + # echo x + + # some text + # <!-- this is comment --> + # <secondTag>&some entity;<![CDATA[some cdata]]></secondTag> + + n.expect xnElement for i in 0 .. n.len-1: yield n[i] iterator mitems*(n: var XmlNode): var XmlNode {.inline.} = - ## iterates over any child of `n`. - assert n.k == xnElement - for i in 0 .. n.len-1: yield mget(n, i) + ## Iterates over all direct children of `n` so that they can be modified. + n.expect xnElement + for i in 0 .. n.len-1: yield n[i] + +proc toXmlAttributes*(keyValuePairs: varargs[tuple[key, + val: string]]): XmlAttributes = + ## Converts `{key: value}` pairs into `XmlAttributes`. + ## + runnableExamples: + let att = {"key1": "first value", "key2": "second value"}.toXmlAttributes + var j = newElement("myTag") + j.attrs = att + + doAssert $j == """<myTag key1="first value" key2="second value" />""" + + newStringTable(keyValuePairs) proc attrs*(n: XmlNode): XmlAttributes {.inline.} = - ## gets the attributes belonging to `n`. + ## Gets the attributes belonging to `n`. + ## ## Returns `nil` if attributes have not been initialised for this node. - assert n.k == xnElement + ## + ## See also: + ## * `attrs= proc <#attrs=,XmlNode,XmlAttributes>`_ for XmlAttributes setter + ## * `attrsLen proc <#attrsLen,XmlNode>`_ for number of attributes + ## * `attr proc <#attr,XmlNode,string>`_ for finding an attribute + runnableExamples: + var j = newElement("myTag") + assert j.attrs == nil + let att = {"key1": "first value", "key2": "second value"}.toXmlAttributes + j.attrs = att + assert j.attrs == att + + n.expect xnElement result = n.fAttr proc `attrs=`*(n: XmlNode, attr: XmlAttributes) {.inline.} = - ## sets the attributes belonging to `n`. - assert n.k == xnElement + ## Sets the attributes belonging to `n`. + ## + ## See also: + ## * `attrs proc <#attrs,XmlNode>`_ for XmlAttributes getter + ## * `attrsLen proc <#attrsLen,XmlNode>`_ for number of attributes + ## * `attr proc <#attr,XmlNode,string>`_ for finding an attribute + runnableExamples: + var j = newElement("myTag") + assert j.attrs == nil + let att = {"key1": "first value", "key2": "second value"}.toXmlAttributes + j.attrs = att + assert j.attrs == att + + n.expect xnElement n.fAttr = attr proc attrsLen*(n: XmlNode): int {.inline.} = - ## returns the number of `n`'s attributes. - assert n.k == xnElement + ## Returns the number of `n`'s attributes. + ## + ## See also: + ## * `attrs proc <#attrs,XmlNode>`_ for XmlAttributes getter + ## * `attrs= proc <#attrs=,XmlNode,XmlAttributes>`_ for XmlAttributes setter + ## * `attr proc <#attr,XmlNode,string>`_ for finding an attribute + runnableExamples: + var j = newElement("myTag") + assert j.attrsLen == 0 + let att = {"key1": "first value", "key2": "second value"}.toXmlAttributes + j.attrs = att + assert j.attrsLen == 2 + + n.expect xnElement if not isNil(n.fAttr): result = len(n.fAttr) +proc attr*(n: XmlNode, name: string): string = + ## Finds the first attribute of `n` with a name of `name`. + ## Returns "" on failure. + ## + ## See also: + ## * `attrs proc <#attrs,XmlNode>`_ for XmlAttributes getter + ## * `attrs= proc <#attrs=,XmlNode,XmlAttributes>`_ for XmlAttributes setter + ## * `attrsLen proc <#attrsLen,XmlNode>`_ for number of attributes + runnableExamples: + var j = newElement("myTag") + let att = {"key1": "first value", "key2": "second value"}.toXmlAttributes + j.attrs = att + assert j.attr("key1") == "first value" + assert j.attr("key2") == "second value" + + n.expect xnElement + if n.attrs == nil: return "" + return n.attrs.getOrDefault(name) + proc clientData*(n: XmlNode): int {.inline.} = - ## gets the client data of `n`. The client data field is used by the HTML - ## parser and generator. + ## Gets the client data of `n`. + ## + ## The client data field is used by the HTML parser and generator. result = n.fClientData proc `clientData=`*(n: XmlNode, data: int) {.inline.} = - ## sets the client data of `n`. The client data field is used by the HTML - ## parser and generator. + ## Sets the client data of `n`. + ## + ## The client data field is used by the HTML parser and generator. n.fClientData = data proc addEscaped*(result: var string, s: string) = - ## same as ``result.add(escape(s))``, but more efficient. + ## The same as `result.add(escape(s)) <#escape,string>`_, but more efficient. for c in items(s): case c of '<': result.add("<") of '>': result.add(">") of '&': result.add("&") of '"': result.add(""") - of '\'': result.add("'") - of '/': result.add("/") + of '\'': result.add("'") else: result.add(c) proc escape*(s: string): string = - ## escapes `s` for inclusion into an XML document. + ## Escapes `s` for inclusion into an XML document. + ## ## Escapes these characters: ## - ## ------------ ------------------- + ## ============ =================== ## char is converted to - ## ------------ ------------------- + ## ============ =================== ## ``<`` ``<`` ## ``>`` ``>`` ## ``&`` ``&`` ## ``"`` ``"`` - ## ``'`` ``'`` - ## ``/`` ``/`` - ## ------------ ------------------- + ## ``'`` ``'`` + ## ============ =================== + ## + ## You can also use `addEscaped proc <#addEscaped,string,string>`_. result = newStringOfCap(s.len) addEscaped(result, s) -proc addIndent(result: var string, indent: int) = - result.add("\n") - for i in 1..indent: result.add(' ') - -proc noWhitespace(n: XmlNode): bool = - #for i in 1..n.len-1: - # if n[i].kind != n[0].kind: return true - for i in 0..n.len-1: - if n[i].kind in {xnText, xnEntity}: return true +proc addIndent(result: var string, indent: int, addNewLines: bool) = + if addNewLines: + result.add("\n") + for i in 1 .. indent: + result.add(' ') + +proc addImpl(result: var string, n: XmlNode, indent = 0, indWidth = 2, + addNewLines = true, lastNodeIsText = false) = + proc noWhitespace(n: XmlNode): bool = + for i in 0 ..< n.len: + if n[i].kind in {xnText, xnVerbatimText, xnEntity}: return true + + proc addEscapedAttr(result: var string, s: string) = + # `addEscaped` alternative with less escaped characters. + # Only to be used for escaping attribute values enclosed in double quotes! + for c in items(s): + case c + of '<': result.add("<") + of '>': result.add(">") + of '&': result.add("&") + of '"': result.add(""") + else: result.add(c) -proc add*(result: var string, n: XmlNode, indent = 0, indWidth = 2) = - ## adds the textual representation of `n` to `result`. if n == nil: return + case n.k of xnElement: + if indent > 0 and not lastNodeIsText: + result.addIndent(indent, addNewLines) + + let + addNewLines = if n.noWhitespace(): + false + else: + addNewLines + result.add('<') result.add(n.fTag) if not isNil(n.fAttr): @@ -207,30 +768,34 @@ proc add*(result: var string, n: XmlNode, indent = 0, indWidth = 2) = result.add(' ') result.add(key) result.add("=\"") - result.addEscaped(val) + result.addEscapedAttr(val) result.add('"') - if n.len > 0: - result.add('>') - if n.len > 1: - if noWhitespace(n): - # for mixed leaves, we cannot output whitespace for readability, - # because this would be wrong. For example: ``a<b>b</b>`` is - # different from ``a <b>b</b>``. - for i in 0..n.len-1: result.add(n[i], indent+indWidth, indWidth) - else: - for i in 0..n.len-1: - result.addIndent(indent+indWidth) - result.add(n[i], indent+indWidth, indWidth) - result.addIndent(indent) - else: - result.add(n[0], indent+indWidth, indWidth) - result.add("</") - result.add(n.fTag) - result.add(">") - else: + + if n.len == 0: result.add(" />") + return + + let + indentNext = if n.noWhitespace(): + indent + else: + indent+indWidth + result.add('>') + var lastNodeIsText = false + for i in 0 ..< n.len: + result.addImpl(n[i], indentNext, indWidth, addNewLines, lastNodeIsText) + lastNodeIsText = (n[i].kind == xnText) or (n[i].kind == xnVerbatimText) + + if not n.noWhitespace(): + result.addIndent(indent, addNewLines) + + result.add("</") + result.add(n.fTag) + result.add(">") of xnText: result.addEscaped(n.fText) + of xnVerbatimText: + result.add(n.fText) of xnComment: result.add("<!-- ") result.addEscaped(n.fText) @@ -244,37 +809,118 @@ proc add*(result: var string, n: XmlNode, indent = 0, indWidth = 2) = result.add(n.fText) result.add(';') -const - xmlHeader* = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n" - ## header to use for complete XML output +proc add*(result: var string, n: XmlNode, indent = 0, indWidth = 2, + addNewLines = true) {.inline.} = + ## Adds the textual representation of `n` to string `result`. + runnableExamples: + var + a = newElement("firstTag") + b = newText("my text") + c = newComment("my comment") + s = "" + s.add(c) + s.add(a) + s.add(b) + assert s == "<!-- my comment --><firstTag />my text" + result.addImpl(n, indent, indWidth, addNewLines) proc `$`*(n: XmlNode): string = - ## converts `n` into its string representation. No ``<$xml ...$>`` declaration - ## is produced, so that the produced XML fragments are composable. + ## Converts `n` into its string representation. + ## + ## No ``<$xml ...$>`` declaration is produced, so that the produced + ## XML fragments are composable. result = "" result.add(n) -proc newXmlTree*(tag: string, children: openArray[XmlNode], - attributes: XmlAttributes = nil): XmlNode = - ## creates a new XML tree with `tag`, `children` and `attributes` - result = newXmlNode(xnElement) - result.fTag = tag - newSeq(result.s, children.len) - for i in 0..children.len-1: result.s[i] = children[i] - result.fAttr = attributes +proc child*(n: XmlNode, name: string): XmlNode = + ## Finds the first child element of `n` with a name of `name`. + ## Returns `nil` on failure. + runnableExamples: + var f = newElement("myTag") + f.add newElement("firstSon") + f.add newElement("secondSon") + f.add newElement("thirdSon") + assert $(f.child("secondSon")) == "<secondSon />" + + n.expect xnElement + for i in items(n): + if i.kind == xnElement: + if i.tag == name: + return i + +proc findAll*(n: XmlNode, tag: string, result: var seq[XmlNode], + caseInsensitive = false) = + ## Iterates over all the children of `n` returning those matching `tag`. + ## + ## Found nodes satisfying the condition will be appended to the `result` + ## sequence. + runnableExamples: + var + b = newElement("good") + c = newElement("bad") + d = newElement("BAD") + e = newElement("GOOD") + b.add newText("b text") + c.add newText("c text") + d.add newText("d text") + e.add newText("e text") + let a = newXmlTree("father", [b, c, d, e]) + var s = newSeq[XmlNode]() + a.findAll("good", s) + assert $s == "@[<good>b text</good>]" + s.setLen(0) + a.findAll("good", s, caseInsensitive = true) + assert $s == "@[<good>b text</good>, <GOOD>e text</GOOD>]" + s.setLen(0) + a.findAll("BAD", s) + assert $s == "@[<BAD>d text</BAD>]" + s.setLen(0) + a.findAll("BAD", s, caseInsensitive = true) + assert $s == "@[<bad>c text</bad>, <BAD>d text</BAD>]" + + n.expect xnElement + for child in n.items(): + if child.k != xnElement: + continue + if child.tag == tag or + (caseInsensitive and cmpIgnoreCase(child.tag, tag) == 0): + result.add(child) + child.findAll(tag, result) + +proc findAll*(n: XmlNode, tag: string, caseInsensitive = false): seq[XmlNode] = + ## A shortcut version to assign in let blocks. + runnableExamples: + var + b = newElement("good") + c = newElement("bad") + d = newElement("BAD") + e = newElement("GOOD") + b.add newText("b text") + c.add newText("c text") + d.add newText("d text") + e.add newText("e text") + let a = newXmlTree("father", [b, c, d, e]) + assert $(a.findAll("good")) == "@[<good>b text</good>]" + assert $(a.findAll("BAD")) == "@[<BAD>d text</BAD>]" + assert $(a.findAll("good", caseInsensitive = true)) == "@[<good>b text</good>, <GOOD>e text</GOOD>]" + assert $(a.findAll("BAD", caseInsensitive = true)) == "@[<bad>c text</bad>, <BAD>d text</BAD>]" -proc xmlConstructor(e: NimNode): NimNode {.compileTime.} = - expectLen(e, 2) - var a = e[1] + newSeq(result, 0) + findAll(n, tag, result, caseInsensitive) + +proc xmlConstructor(a: NimNode): NimNode = if a.kind == nnkCall: result = newCall("newXmlTree", toStrLit(a[0])) var attrs = newNimNode(nnkBracket, a) - var newStringTabCall = newCall("newStringTable", attrs, - newIdentNode("modeCaseSensitive")) + var newStringTabCall = newCall(bindSym"newStringTable", attrs, + bindSym"modeCaseSensitive") var elements = newNimNode(nnkBracket, a) for i in 1..a.len-1: if a[i].kind == nnkExprEqExpr: - attrs.add(toStrLit(a[i][0])) + # In order to support attributes like `data-lang` we have to + # replace whitespace because `toStrLit` gives `data - lang`. + let attrName = toStrLit(a[i][0]).strVal.replace(" ", "") + attrs.add(newStrLitNode(attrName)) attrs.add(a[i][1]) #echo repr(attrs) else: @@ -286,72 +932,15 @@ proc xmlConstructor(e: NimNode): NimNode {.compileTime.} = else: result = newCall("newXmlTree", toStrLit(a)) -macro `<>`*(x: expr): expr {.immediate.} = +macro `<>`*(x: untyped): untyped = ## Constructor macro for XML. Example usage: ## - ## .. code-block:: nim + ## ```nim ## <>a(href="http://nim-lang.org", newText("Nim rules.")) + ## ``` ## - ## Produces an XML tree for:: + ## Produces an XML tree for: ## - ## <a href="http://nim-lang.org">Nim rules.</a> + ## <a href="http://nim-lang.org">Nim rules.</a> ## - let x = callsite() result = xmlConstructor(x) - -proc child*(n: XmlNode, name: string): XmlNode = - ## Finds the first child element of `n` with a name of `name`. - ## Returns `nil` on failure. - assert n.kind == xnElement - for i in items(n): - if i.kind == xnElement: - if i.tag == name: - return i - -proc attr*(n: XmlNode, name: string): string = - ## Finds the first attribute of `n` with a name of `name`. - ## Returns "" on failure. - assert n.kind == xnElement - if n.attrs == nil: return "" - return n.attrs[name] - -proc findAll*(n: XmlNode, tag: string, result: var seq[XmlNode]) = - ## Iterates over all the children of `n` returning those matching `tag`. - ## - ## Found nodes satisfying the condition will be appended to the `result` - ## sequence, which can't be nil or the proc will crash. Usage example: - ## - ## .. code-block:: - ## var - ## html: XmlNode - ## tags: seq[XmlNode] = @[] - ## - ## html = buildHtml() - ## findAll(html, "img", tags) - ## for imgTag in tags: - ## process(imgTag) - assert isNil(result) == false - assert n.k == xnElement - for child in n.items(): - if child.k != xnElement: - continue - if child.tag == tag: - result.add(child) - elif child.k == xnElement: - child.findAll(tag, result) - -proc findAll*(n: XmlNode, tag: string): seq[XmlNode] = - ## Shortcut version to assign in let blocks. Example: - ## - ## .. code-block:: - ## var html: XmlNode - ## - ## html = buildHtml(html) - ## for imgTag in html.findAll("img"): - ## process(imgTag) - newSeq(result, 0) - findAll(n, tag, result) - -when isMainModule: - assert """<a href="http://nim-lang.org">Nim rules.</a>""" == - $(<>a(href="http://nim-lang.org", newText("Nim rules."))) |