diff options
Diffstat (limited to 'lib/pure')
-rw-r--r-- | lib/pure/asyncdispatch.nim | 300 | ||||
-rw-r--r-- | lib/pure/collections/sets.nim | 4 | ||||
-rw-r--r-- | lib/pure/collections/tableimpl.nim | 8 | ||||
-rw-r--r-- | lib/pure/collections/tables.nim | 2 | ||||
-rw-r--r-- | lib/pure/httpclient.nim | 13 | ||||
-rw-r--r-- | lib/pure/json.nim | 13 | ||||
-rw-r--r-- | lib/pure/math.nim | 16 | ||||
-rw-r--r-- | lib/pure/memfiles.nim | 6 | ||||
-rw-r--r-- | lib/pure/os.nim | 27 | ||||
-rw-r--r-- | lib/pure/parsecsv.nim | 73 | ||||
-rw-r--r-- | lib/pure/punycode.nim | 166 | ||||
-rw-r--r-- | lib/pure/selectors.nim | 11 | ||||
-rw-r--r-- | lib/pure/streams.nim | 118 | ||||
-rw-r--r-- | lib/pure/strmisc.nim | 83 | ||||
-rw-r--r-- | lib/pure/strutils.nim | 242 | ||||
-rw-r--r-- | lib/pure/times.nim | 12 | ||||
-rw-r--r-- | lib/pure/unicode.nim | 238 | ||||
-rw-r--r-- | lib/pure/xmldom.nim | 15 |
18 files changed, 1216 insertions, 131 deletions
diff --git a/lib/pure/asyncdispatch.nim b/lib/pure/asyncdispatch.nim index 7d765ce75..bb19f87ef 100644 --- a/lib/pure/asyncdispatch.nim +++ b/lib/pure/asyncdispatch.nim @@ -434,6 +434,9 @@ when defined(windows) or defined(nimdoc): fd*: AsyncFD # TODO: Rename this. cb*: proc (fd: AsyncFD, bytesTransferred: Dword, errcode: OSErrorCode) {.closure,gcsafe.} + cell*: ForeignCell # we need this `cell` to protect our `cb` environment, + # when using RegisterWaitForSingleObject, because + # waiting is done in different thread. PDispatcher* = ref object of PDispatcherBase ioPort: Handle @@ -445,6 +448,15 @@ when defined(windows) or defined(nimdoc): PCustomOverlapped* = ref CustomOverlapped AsyncFD* = distinct int + + PostCallbackData = object + ioPort: Handle + handleFd: AsyncFD + waitFd: Handle + ovl: PCustomOverlapped + PostCallbackDataPtr = ptr PostCallbackData + + Callback = proc (fd: AsyncFD): bool {.closure,gcsafe.} {.deprecated: [TCompletionKey: CompletionKey, TAsyncFD: AsyncFD, TCustomOverlapped: CustomOverlapped, TCompletionData: CompletionData].} @@ -508,6 +520,13 @@ when defined(windows) or defined(nimdoc): customOverlapped.data.cb(customOverlapped.data.fd, lpNumberOfBytesTransferred, OSErrorCode(-1)) + + # If cell.data != nil, then system.protect(rawEnv(cb)) was called, + # so we need to dispose our `cb` environment, because it is not needed + # anymore. + if customOverlapped.data.cell.data != nil: + system.dispose(customOverlapped.data.cell) + GC_unref(customOverlapped) else: let errCode = osLastError() @@ -515,6 +534,8 @@ when defined(windows) or defined(nimdoc): assert customOverlapped.data.fd == lpCompletionKey.AsyncFD customOverlapped.data.cb(customOverlapped.data.fd, lpNumberOfBytesTransferred, errCode) + if customOverlapped.data.cell.data != nil: + system.dispose(customOverlapped.data.cell) GC_unref(customOverlapped) else: if errCode.int32 == WAIT_TIMEOUT: @@ -841,6 +862,101 @@ when defined(windows) or defined(nimdoc): # free ``ol``. return retFuture + proc sendTo*(socket: AsyncFD, data: pointer, size: int, saddr: ptr SockAddr, + saddrLen: Socklen, + flags = {SocketFlag.SafeDisconn}): Future[void] = + ## Sends ``data`` to specified destination ``saddr``, using + ## socket ``socket``. The returned future will complete once all data + ## has been sent. + verifyPresence(socket) + var retFuture = newFuture[void]("sendTo") + var dataBuf: TWSABuf + dataBuf.buf = cast[cstring](data) + dataBuf.len = size.ULONG + var bytesSent = 0.Dword + var lowFlags = 0.Dword + + # we will preserve address in our stack + var staddr: array[128, char] # SOCKADDR_STORAGE size is 128 bytes + var stalen: cint = cint(saddrLen) + zeroMem(addr(staddr[0]), 128) + copyMem(addr(staddr[0]), saddr, saddrLen) + + var ol = PCustomOverlapped() + GC_ref(ol) + ol.data = CompletionData(fd: socket, cb: + proc (fd: AsyncFD, bytesCount: Dword, errcode: OSErrorCode) = + if not retFuture.finished: + if errcode == OSErrorCode(-1): + retFuture.complete() + else: + retFuture.fail(newException(OSError, osErrorMsg(errcode))) + ) + + let ret = WSASendTo(socket.SocketHandle, addr dataBuf, 1, addr bytesSent, + lowFlags, cast[ptr SockAddr](addr(staddr[0])), + stalen, cast[POVERLAPPED](ol), nil) + if ret == -1: + let err = osLastError() + if err.int32 != ERROR_IO_PENDING: + GC_unref(ol) + retFuture.fail(newException(OSError, osErrorMsg(err))) + else: + retFuture.complete() + # We don't deallocate ``ol`` here because even though this completed + # immediately poll will still be notified about its completion and it will + # free ``ol``. + return retFuture + + proc recvFromInto*(socket: AsyncFD, data: pointer, size: int, + saddr: ptr SockAddr, saddrLen: ptr SockLen, + flags = {SocketFlag.SafeDisconn}): Future[int] = + ## Receives a datagram data from ``socket`` into ``buf``, which must + ## be at least of size ``size``, address of datagram's sender will be + ## stored into ``saddr`` and ``saddrLen``. Returned future will complete + ## once one datagram has been received, and will return size of packet + ## received. + verifyPresence(socket) + var retFuture = newFuture[int]("recvFromInto") + + var dataBuf = TWSABuf(buf: cast[cstring](data), len: size.ULONG) + + var bytesReceived = 0.Dword + var lowFlags = 0.Dword + + var ol = PCustomOverlapped() + GC_ref(ol) + ol.data = CompletionData(fd: socket, cb: + proc (fd: AsyncFD, bytesCount: Dword, errcode: OSErrorCode) = + if not retFuture.finished: + if errcode == OSErrorCode(-1): + assert bytesCount <= size + retFuture.complete(bytesCount) + else: + # datagram sockets don't have disconnection, + # so we can just raise an exception + retFuture.fail(newException(OSError, osErrorMsg(errcode))) + ) + + let res = WSARecvFrom(socket.SocketHandle, addr dataBuf, 1, + addr bytesReceived, addr lowFlags, + saddr, cast[ptr cint](saddrLen), + cast[POVERLAPPED](ol), nil) + if res == -1: + let err = osLastError() + if err.int32 != ERROR_IO_PENDING: + GC_unref(ol) + retFuture.fail(newException(OSError, osErrorMsg(err))) + else: + # Request completed immediately. + if bytesReceived != 0: + assert bytesReceived <= size + retFuture.complete(bytesReceived) + else: + if hasOverlappedIoCompleted(cast[POVERLAPPED](ol)): + retFuture.complete(bytesReceived) + return retFuture + proc acceptAddr*(socket: AsyncFD, flags = {SocketFlag.SafeDisconn}): Future[tuple[address: string, client: AsyncFD]] = ## Accepts a new connection. Returns a future containing the client socket @@ -953,6 +1069,126 @@ when defined(windows) or defined(nimdoc): ## Unregisters ``fd``. getGlobalDispatcher().handles.excl(fd) + {.push stackTrace:off.} + proc waitableCallback(param: pointer, + timerOrWaitFired: WINBOOL): void {.stdcall.} = + var p = cast[PostCallbackDataPtr](param) + discard postQueuedCompletionStatus(p.ioPort, timerOrWaitFired.Dword, + ULONG_PTR(p.handleFd), + cast[pointer](p.ovl)) + {.pop.} + + template registerWaitableEvent(mask) = + let p = getGlobalDispatcher() + var flags = (WT_EXECUTEINWAITTHREAD or WT_EXECUTEONLYONCE).Dword + var hEvent = wsaCreateEvent() + if hEvent == 0: + raiseOSError(osLastError()) + var pcd = cast[PostCallbackDataPtr](allocShared0(sizeof(PostCallbackData))) + pcd.ioPort = p.ioPort + pcd.handleFd = fd + var ol = PCustomOverlapped() + GC_ref(ol) + + ol.data = CompletionData(fd: fd, cb: + proc(fd: AsyncFD, bytesCount: Dword, errcode: OSErrorCode) = + # we excluding our `fd` because cb(fd) can register own handler + # for this `fd` + p.handles.excl(fd) + # unregisterWait() is called before callback, because appropriate + # winsockets function can re-enable event. + # https://msdn.microsoft.com/en-us/library/windows/desktop/ms741576(v=vs.85).aspx + if unregisterWait(pcd.waitFd) == 0: + let err = osLastError() + if err.int32 != ERROR_IO_PENDING: + raiseOSError(osLastError()) + if cb(fd): + # callback returned `true`, so we free all allocated resources + deallocShared(cast[pointer](pcd)) + if not wsaCloseEvent(hEvent): + raiseOSError(osLastError()) + # pcd.ovl will be unrefed in poll(). + else: + # callback returned `false` we need to continue + if p.handles.contains(fd): + # new callback was already registered with `fd`, so we free all + # allocated resources. This happens because in callback `cb` + # addRead/addWrite was called with same `fd`. + deallocShared(cast[pointer](pcd)) + if not wsaCloseEvent(hEvent): + raiseOSError(osLastError()) + else: + # we need to include `fd` again + p.handles.incl(fd) + # and register WaitForSingleObject again + if not registerWaitForSingleObject(addr(pcd.waitFd), hEvent, + cast[WAITORTIMERCALLBACK](waitableCallback), + cast[pointer](pcd), INFINITE, flags): + # pcd.ovl will be unrefed in poll() + discard wsaCloseEvent(hEvent) + deallocShared(cast[pointer](pcd)) + raiseOSError(osLastError()) + else: + # we ref pcd.ovl one more time, because it will be unrefed in + # poll() + GC_ref(pcd.ovl) + ) + # We need to protect our callback environment value, so GC will not free it + # accidentally. + ol.data.cell = system.protect(rawEnv(ol.data.cb)) + + # This is main part of `hacky way` is using WSAEventSelect, so `hEvent` + # will be signaled when appropriate `mask` events will be triggered. + if wsaEventSelect(fd.SocketHandle, hEvent, mask) != 0: + GC_unref(ol) + deallocShared(cast[pointer](pcd)) + discard wsaCloseEvent(hEvent) + raiseOSError(osLastError()) + + pcd.ovl = ol + if not registerWaitForSingleObject(addr(pcd.waitFd), hEvent, + cast[WAITORTIMERCALLBACK](waitableCallback), + cast[pointer](pcd), INFINITE, flags): + GC_unref(ol) + deallocShared(cast[pointer](pcd)) + discard wsaCloseEvent(hEvent) + raiseOSError(osLastError()) + p.handles.incl(fd) + + proc addRead*(fd: AsyncFD, cb: Callback) = + ## Start watching the file descriptor for read availability and then call + ## the callback ``cb``. + ## + ## This is not ``pure`` mechanism for Windows Completion Ports (IOCP), + ## so if you can avoid it, please do it. Use `addRead` only if really + ## need it (main usecase is adaptation of `unix like` libraries to be + ## asynchronous on Windows). + ## If you use this function, you dont need to use asyncdispatch.recv() + ## or asyncdispatch.accept(), because they are using IOCP, please use + ## nativesockets.recv() and nativesockets.accept() instead. + ## + ## Be sure your callback ``cb`` returns ``true``, if you want to remove + ## watch of `read` notifications, and ``false``, if you want to continue + ## receiving notifies. + registerWaitableEvent(FD_READ or FD_ACCEPT or FD_OOB or FD_CLOSE) + + proc addWrite*(fd: AsyncFD, cb: Callback) = + ## Start watching the file descriptor for write availability and then call + ## the callback ``cb``. + ## + ## This is not ``pure`` mechanism for Windows Completion Ports (IOCP), + ## so if you can avoid it, please do it. Use `addWrite` only if really + ## need it (main usecase is adaptation of `unix like` libraries to be + ## asynchronous on Windows). + ## If you use this function, you dont need to use asyncdispatch.send() + ## or asyncdispatch.connect(), because they are using IOCP, please use + ## nativesockets.send() and nativesockets.connect() instead. + ## + ## Be sure your callback ``cb`` returns ``true``, if you want to remove + ## watch of `write` notifications, and ``false``, if you want to continue + ## receiving notifies. + registerWaitableEvent(FD_WRITE or FD_CONNECT or FD_CLOSE) + initAll() else: import selectors @@ -1218,6 +1454,60 @@ else: addWrite(socket, cb) return retFuture + proc sendTo*(socket: AsyncFD, data: pointer, size: int, saddr: ptr SockAddr, + saddrLen: SockLen, + flags = {SocketFlag.SafeDisconn}): Future[void] = + ## Sends ``data`` of size ``size`` in bytes to specified destination + ## (``saddr`` of size ``saddrLen`` in bytes, using socket ``socket``. + ## The returned future will complete once all data has been sent. + var retFuture = newFuture[void]("sendTo") + + # we will preserve address in our stack + var staddr: array[128, char] # SOCKADDR_STORAGE size is 128 bytes + var stalen = saddrLen + zeroMem(addr(staddr[0]), 128) + copyMem(addr(staddr[0]), saddr, saddrLen) + + proc cb(sock: AsyncFD): bool = + result = true + let res = sendto(sock.SocketHandle, data, size, MSG_NOSIGNAL, + cast[ptr SockAddr](addr(staddr[0])), stalen) + if res < 0: + let lastError = osLastError() + if lastError.int32 notin {EINTR, EWOULDBLOCK, EAGAIN}: + retFuture.fail(newException(OSError, osErrorMsg(lastError))) + else: + result = false # We still want this callback to be called. + else: + retFuture.complete() + + addWrite(socket, cb) + return retFuture + + proc recvFromInto*(socket: AsyncFD, data: pointer, size: int, + saddr: ptr SockAddr, saddrLen: ptr SockLen, + flags = {SocketFlag.SafeDisconn}): Future[int] = + ## Receives a datagram data from ``socket`` into ``data``, which must + ## be at least of size ``size`` in bytes, address of datagram's sender + ## will be stored into ``saddr`` and ``saddrLen``. Returned future will + ## complete once one datagram has been received, and will return size + ## of packet received. + var retFuture = newFuture[int]("recvFromInto") + proc cb(sock: AsyncFD): bool = + result = true + let res = recvfrom(sock.SocketHandle, data, size.cint, flags.toOSFlags(), + saddr, saddrLen) + if res < 0: + let lastError = osLastError() + if lastError.int32 notin {EINTR, EWOULDBLOCK, EAGAIN}: + retFuture.fail(newException(OSError, osErrorMsg(lastError))) + else: + result = false + else: + retFuture.complete(res) + addRead(socket, cb) + return retFuture + proc acceptAddr*(socket: AsyncFD, flags = {SocketFlag.SafeDisconn}): Future[tuple[address: string, client: AsyncFD]] = var retFuture = newFuture[tuple[address: string, @@ -1401,13 +1691,11 @@ proc processBody(node, retFutureSym: NimNode, case node[1].kind of nnkIdent, nnkInfix: # await x - result = newNimNode(nnkStmtList, node) - var futureValue: NimNode - result.useVar(node[1], futureValue, futureValue, node) - # -> yield x - # -> x.read() + # await x or y + result = newNimNode(nnkYieldStmt, node).add(node[1]) # -> yield x of nnkCall, nnkCommand: # await foo(p, x) + # await foo p, x var futureValue: NimNode result.createVar("future" & $node[1][0].toStrLit, node[1], futureValue, futureValue, node) @@ -1613,7 +1901,7 @@ proc asyncSingleProc(prc: NimNode): NimNode {.compileTime.} = result[6] = outerProcBody #echo(treeRepr(result)) - #if prc[0].getName == "g": + #if prc[0].getName == "testInfix": # echo(toStrLit(result)) macro async*(prc: stmt): stmt {.immediate.} = diff --git a/lib/pure/collections/sets.nim b/lib/pure/collections/sets.nim index 20f73ded3..e2081e5bf 100644 --- a/lib/pure/collections/sets.nim +++ b/lib/pure/collections/sets.nim @@ -261,6 +261,8 @@ template doWhile(a: expr, b: stmt): stmt = b if not a: break +proc default[T](t: typedesc[T]): T {.inline.} = discard + proc excl*[A](s: var HashSet[A], key: A) = ## Excludes `key` from the set `s`. ## @@ -277,11 +279,13 @@ proc excl*[A](s: var HashSet[A], key: A) = var msk = high(s.data) if i >= 0: s.data[i].hcode = 0 + s.data[i].key = default(type(s.data[i].key)) dec(s.counter) while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1 var j = i # The correctness of this depends on (h+1) in nextTry, var r = j # though may be adaptable to other simple sequences. s.data[i].hcode = 0 # mark current EMPTY + s.data[i].key = default(type(s.data[i].key)) doWhile ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)): i = (i + 1) and msk # increment mod table size if isEmpty(s.data[i].hcode): # end of collision cluster; So all done diff --git a/lib/pure/collections/tableimpl.nim b/lib/pure/collections/tableimpl.nim index cc32fbedc..1bbf19ee9 100644 --- a/lib/pure/collections/tableimpl.nim +++ b/lib/pure/collections/tableimpl.nim @@ -110,18 +110,24 @@ template hasKeyOrPutImpl(enlarge) {.dirty, immediate.} = maybeRehashPutImpl(enlarge) else: result = true +proc default[T](t: typedesc[T]): T {.inline.} = discard + template delImpl() {.dirty, immediate.} = var hc: Hash var i = rawGet(t, key, hc) let msk = maxHash(t) if i >= 0: t.data[i].hcode = 0 + t.data[i].key = default(type(t.data[i].key)) + t.data[i].val = default(type(t.data[i].val)) dec(t.counter) block outer: while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1 var j = i # The correctness of this depends on (h+1) in nextTry, var r = j # though may be adaptable to other simple sequences. t.data[i].hcode = 0 # mark current EMPTY + t.data[i].key = default(type(t.data[i].key)) + t.data[i].val = default(type(t.data[i].val)) while true: i = (i + 1) and msk # increment mod table size if isEmpty(t.data[i].hcode): # end of collision cluster; So all done @@ -137,4 +143,6 @@ template delImpl() {.dirty, immediate.} = template clearImpl() {.dirty, immediate.} = for i in 0 .. <t.data.len: t.data[i].hcode = 0 + t.data[i].key = default(type(t.data[i].key)) + t.data[i].val = default(type(t.data[i].val)) t.counter = 0 diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim index da0b5422f..e454a43cb 100644 --- a/lib/pure/collections/tables.nim +++ b/lib/pure/collections/tables.nim @@ -754,7 +754,7 @@ proc len*[A](t: CountTable[A]): int = ## returns the number of keys in `t`. result = t.counter -proc clear*[A](t: CountTable[A] | CountTable[A]) = +proc clear*[A](t: CountTable[A] | CountTableRef[A]) = ## Resets the table so that it is empty. clearImpl() t.counter = 0 diff --git a/lib/pure/httpclient.nim b/lib/pure/httpclient.nim index b3a59551d..bc964861d 100644 --- a/lib/pure/httpclient.nim +++ b/lib/pure/httpclient.nim @@ -167,7 +167,7 @@ proc parseChunks(s: Socket, timeout: int): string = # Trailer headers will only be sent if the request specifies that we want # them: http://tools.ietf.org/html/rfc2616#section-3.6.1 -proc parseBody(s: Socket, headers: HttpHeaders, timeout: int): string = +proc parseBody(s: Socket, headers: HttpHeaders, httpVersion: string, timeout: int): string = result = "" if headers.getOrDefault"Transfer-Encoding" == "chunked": result = parseChunks(s, timeout) @@ -193,7 +193,7 @@ proc parseBody(s: Socket, headers: HttpHeaders, timeout: int): string = # -REGION- Connection: Close # (http://tools.ietf.org/html/rfc2616#section-4.4) NR.5 - if headers.getOrDefault"Connection" == "close": + if headers.getOrDefault"Connection" == "close" or httpVersion == "1.0": var buf = "" while true: buf = newString(4000) @@ -249,7 +249,7 @@ proc parseResponse(s: Socket, getBody: bool, timeout: int): Response = if not fullyRead: httpError("Connection was closed before full request has been made") if getBody: - result.body = parseBody(s, result.headers, timeout) + result.body = parseBody(s, result.headers, result.version, timeout) else: result.body = "" @@ -685,7 +685,8 @@ proc parseChunks(client: AsyncHttpClient): Future[string] {.async.} = # them: http://tools.ietf.org/html/rfc2616#section-3.6.1 proc parseBody(client: AsyncHttpClient, - headers: HttpHeaders): Future[string] {.async.} = + headers: HttpHeaders, + httpVersion: string): Future[string] {.async.} = result = "" if headers.getOrDefault"Transfer-Encoding" == "chunked": result = await parseChunks(client) @@ -707,7 +708,7 @@ proc parseBody(client: AsyncHttpClient, # -REGION- Connection: Close # (http://tools.ietf.org/html/rfc2616#section-4.4) NR.5 - if headers.getOrDefault"Connection" == "close": + if headers.getOrDefault"Connection" == "close" or httpVersion == "1.0": var buf = "" while true: buf = await client.socket.recvFull(4000) @@ -761,7 +762,7 @@ proc parseResponse(client: AsyncHttpClient, if not fullyRead: httpError("Connection was closed before full request has been made") if getBody: - result.body = await parseBody(client, result.headers) + result.body = await parseBody(client, result.headers, result.version) else: result.body = "" diff --git a/lib/pure/json.nim b/lib/pure/json.nim index cea7a1e90..b4eecdf88 100644 --- a/lib/pure/json.nim +++ b/lib/pure/json.nim @@ -20,7 +20,8 @@ ## let ## small_json = """{"test": 1.3, "key2": true}""" ## jobj = parseJson(small_json) -## assert (jobj.kind == JObject) +## assert (jobj.kind == JObject)\ +## jobj["test"] = newJFloat(0.7) # create or update ## echo($jobj["test"].fnum) ## echo($jobj["key2"].bval) ## @@ -847,6 +848,16 @@ proc hasKey*(node: JsonNode, key: string): bool = assert(node.kind == JObject) result = node.fields.hasKey(key) +proc contains*(node: JsonNode, key: string): bool = + ## Checks if `key` exists in `node`. + assert(node.kind == JObject) + node.fields.hasKey(key) + +proc contains*(node: JsonNode, val: JsonNode): bool = + ## Checks if `val` exists in array `node`. + assert(node.kind == JArray) + find(node.elems, val) >= 0 + proc existsKey*(node: JsonNode, key: string): bool {.deprecated.} = node.hasKey(key) ## Deprecated for `hasKey` diff --git a/lib/pure/math.nim b/lib/pure/math.nim index 2b903bedb..b4abf4dc8 100644 --- a/lib/pure/math.nim +++ b/lib/pure/math.nim @@ -138,11 +138,6 @@ when not defined(JS): proc exp*(x: float64): float64 {.importc: "exp", header: "<math.h>".} ## Computes the exponential function of `x` (pow(E, x)) - proc round0(x: float32): float32 {.importc: "roundf", header: "<math.h>".} - proc round0(x: float64): float64 {.importc: "round", header: "<math.h>".} - ## Converts a float to an int by rounding. Used internally by the round - ## function when the specified number of places is 0. - proc arccos*(x: float32): float32 {.importc: "acosf", header: "<math.h>".} proc arccos*(x: float64): float64 {.importc: "acos", header: "<math.h>".} ## Computes the arc cosine of `x` @@ -224,6 +219,17 @@ when not defined(JS): ## ## .. code-block:: nim ## echo ceil(-2.1) ## -2.0 + + when defined(windows) and defined(vcc): + proc round0[T: float32|float64](x: T): T = + ## Windows compilers prior to MSVC 2012 do not implement 'round', + ## 'roundl' or 'roundf'. + result = if x < 0.0: ceil(x - T(0.5)) else: floor(x + T(0.5)) + else: + proc round0(x: float32): float32 {.importc: "roundf", header: "<math.h>".} + proc round0(x: float64): float64 {.importc: "round", header: "<math.h>".} + ## Converts a float to an int by rounding. Used internally by the round + ## function when the specified number of places is 0. proc fmod*(x, y: float32): float32 {.importc: "fmodf", header: "<math.h>".} proc fmod*(x, y: float64): float64 {.importc: "fmod", header: "<math.h>".} diff --git a/lib/pure/memfiles.nim b/lib/pure/memfiles.nim index b9c574944..ff3e74d59 100644 --- a/lib/pure/memfiles.nim +++ b/lib/pure/memfiles.nim @@ -257,12 +257,10 @@ type MemSlice* = object ## represent slice of a MemFile for iteration over deli data*: pointer size*: int -proc c_memcpy(a, b: pointer, n: int) {.importc: "memcpy", header: "<string.h>".} - proc `$`*(ms: MemSlice): string {.inline.} = ## Return a Nim string built from a MemSlice. var buf = newString(ms.size) - c_memcpy(addr(buf[0]), ms.data, ms.size) + copyMem(addr(buf[0]), ms.data, ms.size) buf[ms.size] = '\0' result = buf @@ -329,7 +327,7 @@ iterator lines*(mfile: MemFile, buf: var TaintedString, delim='\l', eat='\r'): T for ms in memSlices(mfile, delim, eat): buf.setLen(ms.size) - c_memcpy(addr(buf[0]), ms.data, ms.size) + copyMem(addr(buf[0]), ms.data, ms.size) buf[ms.size] = '\0' yield buf diff --git a/lib/pure/os.nim b/lib/pure/os.nim index dee227c69..668793c20 100644 --- a/lib/pure/os.nim +++ b/lib/pure/os.nim @@ -26,7 +26,6 @@ elif defined(posix): else: {.error: "OS module not ported to your operating system!".} -include "system/ansi_c" include ospaths when defined(posix): @@ -37,6 +36,23 @@ when defined(posix): var pathMax {.importc: "PATH_MAX", header: "<stdlib.h>".}: cint +proc c_remove(filename: cstring): cint {. + importc: "remove", header: "<stdio.h>".} +proc c_rename(oldname, newname: cstring): cint {. + importc: "rename", header: "<stdio.h>".} +proc c_system(cmd: cstring): cint {. + importc: "system", header: "<stdlib.h>".} +proc c_strerror(errnum: cint): cstring {. + importc: "strerror", header: "<string.h>".} +proc c_strlen(a: cstring): cint {. + importc: "strlen", header: "<string.h>", noSideEffect.} +proc c_getenv(env: cstring): cstring {. + importc: "getenv", header: "<stdlib.h>".} +proc c_putenv(env: cstring): cint {. + importc: "putenv", header: "<stdlib.h>".} + +var errno {.importc, header: "<errno.h>".}: cint + proc osErrorMsg*(): string {.rtl, extern: "nos$1", deprecated.} = ## Retrieves the operating system's error flag, ``errno``. ## On Windows ``GetLastError`` is checked before ``errno``. @@ -61,7 +77,7 @@ proc osErrorMsg*(): string {.rtl, extern: "nos$1", deprecated.} = result = $msgbuf if msgbuf != nil: localFree(msgbuf) if errno != 0'i32: - result = $os.strerror(errno) + result = $os.c_strerror(errno) {.push warning[deprecated]: off.} proc raiseOSError*(msg: string = "") {.noinline, rtl, extern: "nos$1", @@ -114,7 +130,7 @@ proc osErrorMsg*(errorCode: OSErrorCode): string = if msgbuf != nil: localFree(msgbuf) else: if errorCode != OSErrorCode(0'i32): - result = $os.strerror(errorCode.int32) + result = $os.c_strerror(errorCode.int32) proc raiseOSError*(errorCode: OSErrorCode; additionalInfo = "") {.noinline.} = ## Raises an ``OSError`` exception. The ``errorCode`` will determine the @@ -799,7 +815,10 @@ iterator walkFiles*(pattern: string): string {.tags: [ReadDirEffect].} = if res == 0: for i in 0.. f.gl_pathc - 1: assert(f.gl_pathv[i] != nil) - yield $f.gl_pathv[i] + let path = $f.gl_pathv[i] + # Make sure it's a file and not a directory + if fileExists(path): + yield path type PathComponent* = enum ## Enumeration specifying a path component. diff --git a/lib/pure/parsecsv.nim b/lib/pure/parsecsv.nim index bb291bcbc..77b145a73 100644 --- a/lib/pure/parsecsv.nim +++ b/lib/pure/parsecsv.nim @@ -25,6 +25,28 @@ ## echo "##", val, "##" ## close(x) ## +## For CSV files with a header row, the header can be read and then used as a +## reference for item access with `rowEntry <#rowEntry.CsvParser.string>`_: +## +## .. code-block:: nim +## import parsecsv +## import os +## # Prepare a file +## var csv_content = """One,Two,Three,Four +## 1,2,3,4 +## 10,20,30,40 +## 100,200,300,400 +## """ +## writeFile("temp.csv", content) +## +## var p: CsvParser +## p.open("temp.csv") +## p.readHeaderRow() +## while p.readRow(): +## echo "new row: " +## for col in items(p.headers): +## echo "##", col, ":", p.rowEntry(col), "##" +## p.close() import lexbase, streams @@ -37,6 +59,9 @@ type sep, quote, esc: char skipWhite: bool currRow: int + headers*: seq[string] ## The columns that are defined in the csv file + ## (read using `readHeaderRow <#readHeaderRow.CsvParser>`_). + ## Used with `rowEntry <#rowEntry.CsvParser.string>`_). CsvError* = object of IOError ## exception that is raised if ## a parsing error occurs @@ -177,6 +202,22 @@ proc close*(my: var CsvParser) {.inline.} = ## closes the parser `my` and its associated input stream. lexbase.close(my) +proc readHeaderRow*(my: var CsvParser) = + ## Reads the first row and creates a look-up table for column numbers + ## See also `rowEntry <#rowEntry.CsvParser.string>`_. + var present = my.readRow() + if present: + my.headers = my.row + +proc rowEntry*(my: var CsvParser, entry: string): string = + ## Reads a specified `entry` from the current row. + ## + ## Assumes that `readHeaderRow <#readHeaderRow.CsvParser>`_ has already been + ## called. + var index = my.headers.find(entry) + if index >= 0: + result = my.row[index] + when not defined(testing) and isMainModule: import os var s = newFileStream(paramStr(1), fmRead) @@ -189,3 +230,35 @@ when not defined(testing) and isMainModule: echo "##", val, "##" close(x) +when isMainModule: + import os + import strutils + block: # Tests for reading the header row + var content = "One,Two,Three,Four\n1,2,3,4\n10,20,30,40,\n100,200,300,400\n" + writeFile("temp.csv", content) + + var p: CsvParser + p.open("temp.csv") + p.readHeaderRow() + while p.readRow(): + var zeros = repeat('0', p.currRow-2) + doAssert p.rowEntry("One") == "1" & zeros + doAssert p.rowEntry("Two") == "2" & zeros + doAssert p.rowEntry("Three") == "3" & zeros + doAssert p.rowEntry("Four") == "4" & zeros + p.close() + + when not defined(testing): + var parser: CsvParser + parser.open("temp.csv") + parser.readHeaderRow() + while parser.readRow(): + echo "new row: " + for col in items(parser.headers): + echo "##", col, ":", parser.rowEntry(col), "##" + parser.close() + removeFile("temp.csv") + + # Tidy up + removeFile("temp.csv") + diff --git a/lib/pure/punycode.nim b/lib/pure/punycode.nim new file mode 100644 index 000000000..4f35de487 --- /dev/null +++ b/lib/pure/punycode.nim @@ -0,0 +1,166 @@ + +import strutils +import unicode + +# issue #3045 + +const + Base = 36 + TMin = 1 + TMax = 26 + Skew = 38 + Damp = 700 + InitialBias = 72 + InitialN = 128 + Delimiter = '-' + +type + PunyError* = object of Exception + +proc decodeDigit(x: char): int {.raises: [PunyError].} = + if '0' <= x and x <= '9': + result = ord(x) - (ord('0') - 26) + elif 'A' <= x and x <= 'Z': + result = ord(x) - ord('A') + elif 'a' <= x and x <= 'z': + result = ord(x) - ord('a') + else: + raise newException(PunyError, "Bad input") + +proc encodeDigit(digit: int): Rune {.raises: [PunyError].} = + if 0 <= digit and digit < 26: + result = Rune(digit + ord('a')) + elif 26 <= digit and digit < 36: + result = Rune(digit + (ord('0') - 26)) + else: + raise newException(PunyError, "internal error in punycode encoding") + +proc isBasic(c: char): bool = ord(c) < 0x80 +proc isBasic(r: Rune): bool = int(r) < 0x80 + +proc adapt(delta, numPoints: int, first: bool): int = + var d = if first: delta div Damp else: delta div 2 + d += d div numPoints + var k = 0 + while d > ((Base-TMin)*TMax) div 2: + d = d div (Base - TMin) + k += Base + result = k + (Base - TMin + 1) * d div (d + Skew) + +proc encode*(prefix, s: string): string {.raises: [PunyError].} = + ## Encode a string that may contain Unicode. + ## Prepend `prefix` to the result + result = prefix + var (d, n, bias) = (0, InitialN, InitialBias) + var (b, remaining) = (0, 0) + for r in s.runes: + if r.isBasic: + # basic Ascii character + inc b + result.add($r) + else: + # special character + inc remaining + + var h = b + if b > 0: + result.add(Delimiter) # we have some Ascii chars + while remaining != 0: + var m: int = high(int32) + for r in s.runes: + if m > int(r) and int(r) >= n: + m = int(r) + d += (m - n) * (h + 1) + if d < 0: + raise newException(PunyError, "invalid label " & s) + n = m + for r in s.runes: + if int(r) < n: + inc d + if d < 0: + raise newException(PunyError, "invalid label " & s) + continue + if int(r) > n: + continue + var q = d + var k = Base + while true: + var t = k - bias + if t < TMin: + t = TMin + elif t > TMax: + t = TMax + if q < t: + break + result.add($encodeDigit(t + (q - t) mod (Base - t))) + q = (q - t) div (Base - t) + k += Base + result.add($encodeDigit(q)) + bias = adapt(d, h + 1, h == b) + d = 0 + inc h + dec remaining + inc d + inc n + +proc encode*(s: string): string {.raises: [PunyError].} = + ## Encode a string that may contain Unicode. Prefix is empty. + result = encode("", s) + +proc decode*(encoded: string): string {.raises: [PunyError].} = + ## Decode a Punycode-encoded string + var + n = InitialN + i = 0 + bias = InitialBias + var d = rfind(encoded, Delimiter) + result = "" + + if d > 0: + # found Delimiter + for j in 0..<d: + var c = encoded[j] # char + if not c.isBasic: + raise newException(PunyError, "Encoded contains a non-basic char") + result.add(c) # add the character + inc d + else: + d = 0 # set to first index + + while (d < len(encoded)): + var oldi = i + var w = 1 + var k = Base + while true: + if d == len(encoded): + raise newException(PunyError, "Bad input: " & encoded) + var c = encoded[d]; inc d + var digit = int(decodeDigit(c)) + if digit > (high(int32) - i) div w: + raise newException(PunyError, "Too large a value: " & $digit) + i += digit * w + var t: int + if k <= bias: + t = TMin + elif k >= bias + TMax: + t = TMax + else: + t = k - bias + if digit < t: + break + w *= Base - t + k += Base + bias = adapt(i - oldi, runelen(result) + 1, oldi == 0) + + if i div (runelen(result) + 1) > high(int32) - n: + raise newException(PunyError, "Value too large") + + n += i div (runelen(result) + 1) + i = i mod (runelen(result) + 1) + insert(result, $Rune(n), i) + inc i + +when isMainModule: + assert(decode(encode("", "bücher")) == "bücher") + assert(decode(encode("münchen")) == "münchen") + assert encode("xn--", "münchen") == "xn--mnchen-3ya" diff --git a/lib/pure/selectors.nim b/lib/pure/selectors.nim index 89e92c133..098b78c95 100644 --- a/lib/pure/selectors.nim +++ b/lib/pure/selectors.nim @@ -132,11 +132,12 @@ elif defined(linux): s.fds[fd].events = events proc unregister*(s: var Selector, fd: SocketHandle) = - if epoll_ctl(s.epollFD, EPOLL_CTL_DEL, fd, nil) != 0: - let err = osLastError() - if err.cint notin {ENOENT, EBADF}: - # TODO: Why do we sometimes get an EBADF? Is this normal? - raiseOSError(err) + if s.fds[fd].events != {}: + if epoll_ctl(s.epollFD, EPOLL_CTL_DEL, fd, nil) != 0: + let err = osLastError() + if err.cint notin {ENOENT, EBADF}: + # TODO: Why do we sometimes get an EBADF? Is this normal? + raiseOSError(err) s.fds.del(fd) proc close*(s: var Selector) = diff --git a/lib/pure/streams.nim b/lib/pure/streams.nim index c606b4680..eea06f4ce 100644 --- a/lib/pure/streams.nim +++ b/lib/pure/streams.nim @@ -306,68 +306,68 @@ proc peekLine*(s: Stream): TaintedString = defer: setPosition(s, pos) result = readLine(s) -type - StringStream* = ref StringStreamObj ## a stream that encapsulates a string - StringStreamObj* = object of StreamObj - data*: string - pos: int - -{.deprecated: [PStringStream: StringStream, TStringStream: StringStreamObj].} - -proc ssAtEnd(s: Stream): bool = - var s = StringStream(s) - return s.pos >= s.data.len - -proc ssSetPosition(s: Stream, pos: int) = - var s = StringStream(s) - s.pos = clamp(pos, 0, s.data.len) - -proc ssGetPosition(s: Stream): int = - var s = StringStream(s) - return s.pos - -proc ssReadData(s: Stream, buffer: pointer, bufLen: int): int = - var s = StringStream(s) - result = min(bufLen, s.data.len - s.pos) - if result > 0: - copyMem(buffer, addr(s.data[s.pos]), result) - inc(s.pos, result) - -proc ssPeekData(s: Stream, buffer: pointer, bufLen: int): int = - var s = StringStream(s) - result = min(bufLen, s.data.len - s.pos) - if result > 0: - copyMem(buffer, addr(s.data[s.pos]), result) - -proc ssWriteData(s: Stream, buffer: pointer, bufLen: int) = - var s = StringStream(s) - if bufLen <= 0: - return - if s.pos + bufLen > s.data.len: - setLen(s.data, s.pos + bufLen) - copyMem(addr(s.data[s.pos]), buffer, bufLen) - inc(s.pos, bufLen) - -proc ssClose(s: Stream) = - var s = StringStream(s) - s.data = nil - -proc newStringStream*(s: string = ""): StringStream = - ## creates a new stream from the string `s`. - new(result) - result.data = s - result.pos = 0 - result.closeImpl = ssClose - result.atEndImpl = ssAtEnd - result.setPositionImpl = ssSetPosition - result.getPositionImpl = ssGetPosition - result.readDataImpl = ssReadData - result.peekDataImpl = ssPeekData - result.writeDataImpl = ssWriteData - when not defined(js): type + StringStream* = ref StringStreamObj ## a stream that encapsulates a string + StringStreamObj* = object of StreamObj + data*: string + pos: int + + {.deprecated: [PStringStream: StringStream, TStringStream: StringStreamObj].} + + proc ssAtEnd(s: Stream): bool = + var s = StringStream(s) + return s.pos >= s.data.len + + proc ssSetPosition(s: Stream, pos: int) = + var s = StringStream(s) + s.pos = clamp(pos, 0, s.data.len) + + proc ssGetPosition(s: Stream): int = + var s = StringStream(s) + return s.pos + + proc ssReadData(s: Stream, buffer: pointer, bufLen: int): int = + var s = StringStream(s) + result = min(bufLen, s.data.len - s.pos) + if result > 0: + copyMem(buffer, addr(s.data[s.pos]), result) + inc(s.pos, result) + + proc ssPeekData(s: Stream, buffer: pointer, bufLen: int): int = + var s = StringStream(s) + result = min(bufLen, s.data.len - s.pos) + if result > 0: + copyMem(buffer, addr(s.data[s.pos]), result) + + proc ssWriteData(s: Stream, buffer: pointer, bufLen: int) = + var s = StringStream(s) + if bufLen <= 0: + return + if s.pos + bufLen > s.data.len: + setLen(s.data, s.pos + bufLen) + copyMem(addr(s.data[s.pos]), buffer, bufLen) + inc(s.pos, bufLen) + + proc ssClose(s: Stream) = + var s = StringStream(s) + s.data = nil + + proc newStringStream*(s: string = ""): StringStream = + ## creates a new stream from the string `s`. + new(result) + result.data = s + result.pos = 0 + result.closeImpl = ssClose + result.atEndImpl = ssAtEnd + result.setPositionImpl = ssSetPosition + result.getPositionImpl = ssGetPosition + result.readDataImpl = ssReadData + result.peekDataImpl = ssPeekData + result.writeDataImpl = ssWriteData + + type FileStream* = ref FileStreamObj ## a stream that encapsulates a `File` FileStreamObj* = object of Stream f: File diff --git a/lib/pure/strmisc.nim b/lib/pure/strmisc.nim new file mode 100644 index 000000000..89ef2fcd2 --- /dev/null +++ b/lib/pure/strmisc.nim @@ -0,0 +1,83 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2016 Joey Payne +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module contains various string utility routines that are uncommonly +## used in comparison to `strutils <strutils.html>`_. + +import strutils + +{.deadCodeElim: on.} + +proc expandTabs*(s: string, tabSize: int = 8): string {.noSideEffect, + procvar.} = + ## Expand tab characters in `s` by `tabSize` spaces + + result = newStringOfCap(s.len + s.len shr 2) + var pos = 0 + + template addSpaces(n) = + for j in 0 ..< n: + result.add(' ') + pos += 1 + + for i in 0 ..< len(s): + let c = s[i] + if c == '\t': + let + denominator = if tabSize > 0: tabSize else: 1 + numSpaces = tabSize - pos mod denominator + + addSpaces(numSpaces) + else: + result.add(c) + pos += 1 + if c == '\l': + pos = 0 + +proc partition*(s: string, sep: string, + right: bool = false): (string, string, string) + {.noSideEffect, procvar.} = + ## Split the string at the first or last occurrence of `sep` into a 3-tuple + ## + ## Returns a 3 string tuple of (beforeSep, `sep`, afterSep) or + ## (`s`, "", "") if `sep` is not found and `right` is false or + ## ("", "", `s`) if `sep` is not found and `right` is true + let position = if right: s.rfind(sep) else: s.find(sep) + if position != -1: + return (s[0 ..< position], sep, s[position + sep.len ..< s.len]) + return if right: ("", "", s) else: (s, "", "") + +proc rpartition*(s: string, sep: string): (string, string, string) + {.noSideEffect, procvar.} = + ## Split the string at the last occurrence of `sep` into a 3-tuple + ## + ## Returns a 3 string tuple of (beforeSep, `sep`, afterSep) or + ## ("", "", `s`) if `sep` is not found + return partition(s, sep, right = true) + +when isMainModule: + doAssert expandTabs("\t", 4) == " " + doAssert expandTabs("\tfoo\t", 4) == " foo " + doAssert expandTabs("\tfoo\tbar", 4) == " foo bar" + doAssert expandTabs("\tfoo\tbar\t", 4) == " foo bar " + doAssert expandTabs("", 4) == "" + doAssert expandTabs("", 0) == "" + doAssert expandTabs("\t\t\t", 0) == "" + + doAssert partition("foo:bar", ":") == ("foo", ":", "bar") + doAssert partition("foobarbar", "bar") == ("foo", "bar", "bar") + doAssert partition("foobarbar", "bank") == ("foobarbar", "", "") + doAssert partition("foobarbar", "foo") == ("", "foo", "barbar") + doAssert partition("foofoobar", "bar") == ("foofoo", "bar", "") + + doAssert rpartition("foo:bar", ":") == ("foo", ":", "bar") + doAssert rpartition("foobarbar", "bar") == ("foobar", "bar", "") + doAssert rpartition("foobarbar", "bank") == ("", "", "foobarbar") + doAssert rpartition("foobarbar", "foo") == ("", "foo", "barbar") + doAssert rpartition("foofoobar", "bar") == ("foofoo", "bar", "") diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index be80685ab..b6edb834c 100644 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -15,6 +15,7 @@ import parseutils from math import pow, round, floor, log10 +from algorithm import reverse {.deadCodeElim: on.} @@ -325,7 +326,8 @@ proc toOctal*(c: char): string {.noSideEffect, rtl, extern: "nsuToOctal".} = result[i] = chr(val mod 8 + ord('0')) val = val div 8 -iterator split*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): string = +iterator split*(s: string, seps: set[char] = Whitespace, + maxsplit: int = -1): string = ## Splits the string `s` into substrings using a group of separators. ## ## Substrings are separated by a substring containing only `seps`. Note @@ -422,10 +424,13 @@ iterator split*(s: string, sep: char, maxsplit: int = -1): string = dec(splits) inc(last) -proc substrEq(s: string, a, L: int, x: string): bool = +proc substrEq(s: string, pos: int, substr: string): bool = var i = 0 - while i < L and s[a+i] == x[i]: inc i - result = i == L + var length = substr.len + while i < length and s[pos+i] == substr[i]: + inc i + + return i == length iterator split*(s: string, sep: string, maxsplit: int = -1): string = ## Splits the string `s` into substrings using a string separator. @@ -433,10 +438,11 @@ iterator split*(s: string, sep: string, maxsplit: int = -1): string = ## Substrings are separated by the string `sep`. var last = 0 var splits = maxsplit + if len(s) > 0: while last <= len(s): var first = last - while last < len(s) and not s.substrEq(last, sep.len, sep): + while last < len(s) and not s.substrEq(last, sep): inc(last) if splits == 0: last = len(s) yield substr(s, first, last-1) @@ -444,6 +450,108 @@ iterator split*(s: string, sep: string, maxsplit: int = -1): string = dec(splits) inc(last, sep.len) +# --------- Private templates for different rsplit separators ----------- + +template stringHasSep(s: string, index: int, seps: set[char]): bool = + s[index] in seps + +template stringHasSep(s: string, index: int, sep: char): bool = + s[index] == sep + +template stringHasSep(s: string, index: int, sep: string): bool = + s.substrEq(index, sep) + +template rsplitCommon(s, sep, maxsplit, sepLen) = + ## Common code for rsplit functions + var + last = s.len - 1 + first = last + splits = maxsplit + startPos = 0 + + if len(s) > 0: + # go to -1 in order to get separators at the beginning + while first >= -1: + while first >= 0 and not stringHasSep(s, first, sep): + dec(first) + + if splits == 0: + # No more splits means set first to the beginning + first = -1 + + if first == -1: + startPos = 0 + else: + startPos = first + sepLen + + yield substr(s, startPos, last) + + if splits == 0: + break + + dec(splits) + dec(first) + + last = first + +iterator rsplit*(s: string, seps: set[char] = Whitespace, + maxsplit: int = -1): string = + ## Splits the string `s` into substrings from the right using a + ## string separator. Works exactly the same as `split iterator + ## <#split.i,string,char>`_ except in reverse order. + ## + ## .. code-block:: nim + ## for piece in "foo bar".rsplit(WhiteSpace): + ## echo piece + ## + ## Results in: + ## + ## .. code-block:: nim + ## "bar" + ## "foo" + ## + ## Substrings are separated from the right by the set of chars `seps` + + rsplitCommon(s, seps, maxsplit, 1) + +iterator rsplit*(s: string, sep: char, + maxsplit: int = -1): string = + ## Splits the string `s` into substrings from the right using a + ## string separator. Works exactly the same as `split iterator + ## <#split.i,string,char>`_ except in reverse order. + ## + ## .. code-block:: nim + ## for piece in "foo:bar".rsplit(':'): + ## echo piece + ## + ## Results in: + ## + ## .. code-block:: nim + ## "bar" + ## "foo" + ## + ## Substrings are separated from the right by the char `sep` + rsplitCommon(s, sep, maxsplit, 1) + +iterator rsplit*(s: string, sep: string, maxsplit: int = -1, + keepSeparators: bool = false): string = + ## Splits the string `s` into substrings from the right using a + ## string separator. Works exactly the same as `split iterator + ## <#split.i,string,string>`_ except in reverse order. + ## + ## .. code-block:: nim + ## for piece in "foothebar".rsplit("the"): + ## echo piece + ## + ## Results in: + ## + ## .. code-block:: nim + ## "bar" + ## "foo" + ## + ## Substrings are separated from the right by the string `sep` + rsplitCommon(s, sep, maxsplit, sep.len) + iterator splitLines*(s: string): string = ## Splits the string `s` into its containing lines. ## @@ -531,6 +639,73 @@ proc split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.noSideEff ## `split iterator <#split.i,string,string>`_. accumulateResult(split(s, sep, maxsplit)) +proc rsplit*(s: string, seps: set[char] = Whitespace, + maxsplit: int = -1): seq[string] + {.noSideEffect, rtl, extern: "nsuRSplitCharSet".} = + ## The same as the `rsplit iterator <#rsplit.i,string,set[char]>`_, but is a + ## proc that returns a sequence of substrings. + ## + ## A possible common use case for `rsplit` is path manipulation, + ## particularly on systems that don't use a common delimiter. + ## + ## For example, if a system had `#` as a delimiter, you could + ## do the following to get the tail of the path: + ## + ## .. code-block:: nim + ## var tailSplit = rsplit("Root#Object#Method#Index", {'#'}, maxsplit=1) + ## + ## Results in `tailSplit` containing: + ## + ## .. code-block:: nim + ## @["Root#Object#Method", "Index"] + ## + accumulateResult(rsplit(s, seps, maxsplit)) + result.reverse() + +proc rsplit*(s: string, sep: char, maxsplit: int = -1): seq[string] + {.noSideEffect, rtl, extern: "nsuRSplitChar".} = + ## The same as the `split iterator <#rsplit.i,string,char>`_, but is a proc + ## that returns a sequence of substrings. + ## + ## A possible common use case for `rsplit` is path manipulation, + ## particularly on systems that don't use a common delimiter. + ## + ## For example, if a system had `#` as a delimiter, you could + ## do the following to get the tail of the path: + ## + ## .. code-block:: nim + ## var tailSplit = rsplit("Root#Object#Method#Index", '#', maxsplit=1) + ## + ## Results in `tailSplit` containing: + ## + ## .. code-block:: nim + ## @["Root#Object#Method", "Index"] + ## + accumulateResult(rsplit(s, sep, maxsplit)) + result.reverse() + +proc rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string] + {.noSideEffect, rtl, extern: "nsuRSplitString".} = + ## The same as the `split iterator <#rsplit.i,string,string>`_, but is a proc + ## that returns a sequence of substrings. + ## + ## A possible common use case for `rsplit` is path manipulation, + ## particularly on systems that don't use a common delimiter. + ## + ## For example, if a system had `#` as a delimiter, you could + ## do the following to get the tail of the path: + ## + ## .. code-block:: nim + ## var tailSplit = rsplit("Root#Object#Method#Index", "#", maxsplit=1) + ## + ## Results in `tailSplit` containing: + ## + ## .. code-block:: nim + ## @["Root#Object#Method", "Index"] + ## + accumulateResult(rsplit(s, sep, maxsplit)) + result.reverse() + proc toHex*(x: BiggestInt, len: Positive): string {.noSideEffect, rtl, extern: "nsuToHex".} = ## Converts `x` to its hexadecimal representation. @@ -862,6 +1037,10 @@ proc startsWith*(s, prefix: string): bool {.noSideEffect, if s[i] != prefix[i]: return false inc(i) +proc startsWith*(s: string, prefix: char): bool {.noSideEffect, inline.} = + ## Returns true iff ``s`` starts with ``prefix``. + result = s[0] == prefix + proc endsWith*(s, suffix: string): bool {.noSideEffect, rtl, extern: "nsuEndsWith".} = ## Returns true iff ``s`` ends with ``suffix``. @@ -874,6 +1053,10 @@ proc endsWith*(s, suffix: string): bool {.noSideEffect, inc(i) if suffix[i] == '\0': return true +proc endsWith*(s: string, suffix: char): bool {.noSideEffect, inline.} = + ## Returns true iff ``s`` ends with ``suffix``. + result = s[s.high] == suffix + proc continuesWith*(s, substr: string, start: Natural): bool {.noSideEffect, rtl, extern: "nsuContinuesWith".} = ## Returns true iff ``s`` continues with ``substr`` at position ``start``. @@ -1027,6 +1210,34 @@ proc rfind*(s: string, sub: char, start: int = -1): int {.noSideEffect, if sub == s[i]: return i return -1 +proc center*(s: string, width: int, fillChar: char = ' '): string {. + noSideEffect, rtl, extern: "nsuCenterString".} = + ## Return the contents of `s` centered in a string `width` long using + ## `fillChar` as padding. + ## + ## The original string is returned if `width` is less than or equal + ## to `s.len`. + if width <= s.len: + return s + + result = newString(width) + + # Left padding will be one fillChar + # smaller if there are an odd number + # of characters + let + charsLeft = (width - s.len) + leftPadding = charsLeft div 2 + + for i in 0 ..< width: + if i >= leftPadding and i < leftPadding + s.len: + # we are where the string should be located + result[i] = s[i-leftPadding] + else: + # we are either before or after where + # the string s should go + result[i] = fillChar + proc count*(s: string, sub: string, overlapping: bool = false): int {. noSideEffect, rtl, extern: "nsuCountString".} = ## Count the occurrences of a substring `sub` in the string `s`. @@ -1891,6 +2102,11 @@ when isMainModule: doAssert parseEnum("invalid enum value", enC) == enC + doAssert center("foo", 13) == " foo " + doAssert center("foo", 0) == "foo" + doAssert center("foo", 3, fillChar = 'a') == "foo" + doAssert center("foo", 10, fillChar = '\t') == "\t\t\tfoo\t\t\t\t" + doAssert count("foofoofoo", "foofoo") == 1 doAssert count("foofoofoo", "foofoo", overlapping = true) == 2 doAssert count("foofoofoo", 'f') == 3 @@ -1959,6 +2175,14 @@ when isMainModule: doAssert(not isUpper("AAcc")) doAssert(not isUpper("A#$")) + doAssert rsplit("foo bar", seps=Whitespace) == @["foo", "bar"] + doAssert rsplit(" foo bar", seps=Whitespace, maxsplit=1) == @[" foo", "bar"] + doAssert rsplit(" foo bar ", seps=Whitespace, maxsplit=1) == @[" foo bar", ""] + doAssert rsplit(":foo:bar", sep=':') == @["", "foo", "bar"] + doAssert rsplit(":foo:bar", sep=':', maxsplit=2) == @["", "foo", "bar"] + doAssert rsplit(":foo:bar", sep=':', maxsplit=3) == @["", "foo", "bar"] + doAssert rsplit("foothebar", sep="the") == @["foo", "bar"] + doAssert(unescape(r"\x013", "", "") == "\x013") doAssert join(["foo", "bar", "baz"]) == "foobarbaz" @@ -2029,4 +2253,12 @@ bar # Don't use SI prefix as number is too small doAssert formatEng(3.1e-25, siPrefix=true, unit="A") == "310e-27 A" + block: # startsWith / endsWith char tests + var s = "abcdef" + doAssert s.startsWith('a') + doAssert s.startsWith('b') == false + doAssert s.endsWith('f') + doAssert s.endsWith('a') == false + doAssert s.endsWith('\0') == false + #echo("strutils tests passed") diff --git a/lib/pure/times.nim b/lib/pure/times.nim index ac8dc93ad..c0a121518 100644 --- a/lib/pure/times.nim +++ b/lib/pure/times.nim @@ -66,7 +66,7 @@ when defined(posix) and not defined(JS): when not defined(freebsd) and not defined(netbsd) and not defined(openbsd): var timezone {.importc, header: "<time.h>".}: int - var + var tzname {.importc, header: "<time.h>" .}: array[0..1, cstring] # we also need tzset() to make sure that tzname is initialized proc tzset() {.importc, header: "<time.h>".} @@ -369,7 +369,10 @@ proc `+`*(a: TimeInfo, interval: TimeInterval): TimeInfo = ## very accurate. let t = toSeconds(toTime(a)) let secs = toSeconds(a, interval) - result = getLocalTime(fromSeconds(t + secs)) + if a.tzname == "UTC": + result = getGMTime(fromSeconds(t + secs)) + else: + result = getLocalTime(fromSeconds(t + secs)) proc `-`*(a: TimeInfo, interval: TimeInterval): TimeInfo = ## subtracts ``interval`` time from TimeInfo ``a``. @@ -386,7 +389,10 @@ proc `-`*(a: TimeInfo, interval: TimeInterval): TimeInfo = intval.months = - interval.months intval.years = - interval.years let secs = toSeconds(a, intval) - result = getLocalTime(fromSeconds(t + secs)) + if a.tzname == "UTC": + result = getGMTime(fromSeconds(t + secs)) + else: + result = getLocalTime(fromSeconds(t + secs)) proc miliseconds*(t: TimeInterval): int {.deprecated.} = t.milliseconds diff --git a/lib/pure/unicode.nim b/lib/pure/unicode.nim index 5d302c9dc..ac25dccef 100644 --- a/lib/pure/unicode.nim +++ b/lib/pure/unicode.nim @@ -135,45 +135,62 @@ proc runeAt*(s: string, i: Natural): Rune = ## Returns the unicode character in ``s`` at byte index ``i`` fastRuneAt(s, i, result, false) -proc toUTF8*(c: Rune): string {.rtl, extern: "nuc$1".} = - ## Converts a rune into its UTF-8 representation +template fastToUTF8Copy*(c: Rune, s: var string, pos: int, doInc = true) = + ## Copies UTF-8 representation of `c` into the preallocated string `s` + ## starting at position `pos`. If `doInc == true`, `pos` is incremented + ## by the number of bytes that have been processed. + ## + ## To be the most efficient, make sure `s` is preallocated + ## with an additional amount equal to the byte length of + ## `c`. var i = RuneImpl(c) if i <=% 127: - result = newString(1) - result[0] = chr(i) + s.setLen(pos+1) + s[pos+0] = chr(i) + when doInc: inc(pos) elif i <=% 0x07FF: - result = newString(2) - result[0] = chr((i shr 6) or 0b110_00000) - result[1] = chr((i and ones(6)) or 0b10_0000_00) + s.setLen(pos+2) + s[pos+0] = chr((i shr 6) or 0b110_00000) + s[pos+1] = chr((i and ones(6)) or 0b10_0000_00) + when doInc: inc(pos, 2) elif i <=% 0xFFFF: - result = newString(3) - result[0] = chr(i shr 12 or 0b1110_0000) - result[1] = chr(i shr 6 and ones(6) or 0b10_0000_00) - result[2] = chr(i and ones(6) or 0b10_0000_00) + s.setLen(pos+3) + s[pos+0] = chr(i shr 12 or 0b1110_0000) + s[pos+1] = chr(i shr 6 and ones(6) or 0b10_0000_00) + s[pos+2] = chr(i and ones(6) or 0b10_0000_00) + when doInc: inc(pos, 3) elif i <=% 0x001FFFFF: - result = newString(4) - result[0] = chr(i shr 18 or 0b1111_0000) - result[1] = chr(i shr 12 and ones(6) or 0b10_0000_00) - result[2] = chr(i shr 6 and ones(6) or 0b10_0000_00) - result[3] = chr(i and ones(6) or 0b10_0000_00) + s.setLen(pos+4) + s[pos+0] = chr(i shr 18 or 0b1111_0000) + s[pos+1] = chr(i shr 12 and ones(6) or 0b10_0000_00) + s[pos+2] = chr(i shr 6 and ones(6) or 0b10_0000_00) + s[pos+3] = chr(i and ones(6) or 0b10_0000_00) + when doInc: inc(pos, 4) elif i <=% 0x03FFFFFF: - result = newString(5) - result[0] = chr(i shr 24 or 0b111110_00) - result[1] = chr(i shr 18 and ones(6) or 0b10_0000_00) - result[2] = chr(i shr 12 and ones(6) or 0b10_0000_00) - result[3] = chr(i shr 6 and ones(6) or 0b10_0000_00) - result[4] = chr(i and ones(6) or 0b10_0000_00) + s.setLen(pos+5) + s[pos+0] = chr(i shr 24 or 0b111110_00) + s[pos+1] = chr(i shr 18 and ones(6) or 0b10_0000_00) + s[pos+2] = chr(i shr 12 and ones(6) or 0b10_0000_00) + s[pos+3] = chr(i shr 6 and ones(6) or 0b10_0000_00) + s[pos+4] = chr(i and ones(6) or 0b10_0000_00) + when doInc: inc(pos, 5) elif i <=% 0x7FFFFFFF: - result = newString(6) - result[0] = chr(i shr 30 or 0b1111110_0) - result[1] = chr(i shr 24 and ones(6) or 0b10_0000_00) - result[2] = chr(i shr 18 and ones(6) or 0b10_0000_00) - result[3] = chr(i shr 12 and ones(6) or 0b10_0000_00) - result[4] = chr(i shr 6 and ones(6) or 0b10_0000_00) - result[5] = chr(i and ones(6) or 0b10_0000_00) + s.setLen(pos+6) + s[pos+0] = chr(i shr 30 or 0b1111110_0) + s[pos+1] = chr(i shr 24 and ones(6) or 0b10_0000_00) + s[pos+2] = chr(i shr 18 and ones(6) or 0b10_0000_00) + s[pos+3] = chr(i shr 12 and ones(6) or 0b10_0000_00) + s[pos+4] = chr(i shr 6 and ones(6) or 0b10_0000_00) + s[pos+5] = chr(i and ones(6) or 0b10_0000_00) + when doInc: inc(pos, 6) else: discard # error, exception? +proc toUTF8*(c: Rune): string {.rtl, extern: "nuc$1".} = + ## Converts a rune into its UTF-8 representation + result = "" + fastToUTF8Copy(c, result, 0, false) + proc `$`*(rune: Rune): string = ## Converts a Rune to a string rune.toUTF8 @@ -1352,6 +1369,136 @@ proc isCombining*(c: Rune): bool {.rtl, extern: "nuc$1", procvar.} = (c >= 0x20d0 and c <= 0x20ff) or (c >= 0xfe20 and c <= 0xfe2f)) +proc swapCase*(s: string): string {.noSideEffect, procvar, + rtl, extern: "nuc$1".} = + ## Swaps the case of unicode characters in `s` + ## + ## Returns a new string such that the cases of all unicode characters + ## are swapped if possible + + var + i = 0 + lastIndex = 0 + rune: Rune + + result = newString(len(s)) + + while i < len(s): + lastIndex = i + + fastRuneAt(s, i, rune) + + if rune.isUpper(): + rune = rune.toLower() + elif rune.isLower(): + rune = rune.toUpper() + + rune.fastToUTF8Copy(result, lastIndex) + +proc translate*(s: string, replacements: proc(key: string): string): string {. + rtl, extern: "nuc$1".} = + ## Translates words in a string using the `replacements` proc to substitute + ## words inside `s` with their replacements + ## + ## `replacements` is any proc that takes a word and returns + ## a new word to fill it's place. + + # Allocate memory for the new string based on the old one. + # If the new string length is less than the old, no allocations + # will be needed. If the new string length is greater than the + # old, then maybe only one allocation is needed + result = newStringOfCap(s.len) + + var + index = 0 + lastIndex = 0 + wordStart = 0 + inWord = false + rune: Rune + + while index < len(s): + lastIndex = index + + fastRuneAt(s, index, rune) + + let whiteSpace = rune.isWhiteSpace() + + if whiteSpace and inWord: + # If we've reached the end of a word + let word = s[wordStart ..< lastIndex] + result.add(replacements(word)) + result.add($rune) + + inWord = false + elif not whiteSpace and not inWord: + # If we've hit a non space character and + # are not currently in a word, track + # the starting index of the word + inWord = true + wordStart = lastIndex + elif whiteSpace: + result.add($rune) + + if wordStart < len(s) and inWord: + # Get the trailing word at the end + let word = s[wordStart .. ^1] + result.add(replacements(word)) + +proc title*(s: string): string {.noSideEffect, procvar, + rtl, extern: "nuc$1".} = + ## Converts `s` to a unicode title. + ## + ## Returns a new string such that the first character + ## in each word inside `s` is capitalized + + var + i = 0 + lastIndex = 0 + rune: Rune + + result = newString(len(s)) + + var firstRune = true + + while i < len(s): + lastIndex = i + + fastRuneAt(s, i, rune) + + if not rune.isWhiteSpace() and firstRune: + rune = rune.toUpper() + firstRune = false + elif rune.isWhiteSpace(): + firstRune = true + + rune.fastToUTF8Copy(result, lastIndex) + +proc isTitle*(s: string): bool {.noSideEffect, procvar, + rtl, extern: "nuc$1Str".}= + ## Checks whether or not `s` is a unicode title. + ## + ## Returns true if the first character in each word inside `s` + ## are upper case and there is at least one character in `s`. + if s.len() == 0: + return false + + result = true + + var + i = 0 + rune: Rune + + var firstRune = true + + while i < len(s) and result: + fastRuneAt(s, i, rune, doInc=true) + + if not rune.isWhiteSpace() and firstRune: + result = rune.isUpper() and result + firstRune = false + elif rune.isWhiteSpace(): + firstRune = true + iterator runes*(s: string): Rune = ## Iterates over any unicode character of the string ``s`` var @@ -1451,6 +1598,39 @@ when isMainModule: compared = (someString == $someRunes) doAssert compared == true + proc test_replacements(word: string): string = + case word + of "two": + return "2" + of "foo": + return "BAR" + of "βeta": + return "beta" + of "alpha": + return "αlpha" + else: + return "12345" + + doAssert translate("two not alpha foo βeta", test_replacements) == "2 12345 αlpha BAR beta" + doAssert translate(" two not foo βeta ", test_replacements) == " 2 12345 BAR beta " + + doAssert title("foo bar") == "Foo Bar" + doAssert title("αlpha βeta γamma") == "Αlpha Βeta Γamma" + doAssert title("") == "" + + doAssert isTitle("Foo") + doAssert(not isTitle("Foo bar")) + doAssert(not isTitle("αlpha Βeta")) + doAssert(isTitle("Αlpha Βeta Γamma")) + doAssert(not isTitle("fFoo")) + + doAssert swapCase("FooBar") == "fOObAR" + doAssert swapCase(" ") == " " + doAssert swapCase("Αlpha Βeta Γamma") == "αLPHA βETA γAMMA" + doAssert swapCase("a✓B") == "A✓b" + doAssert swapCase("") == "" + + doAssert reversed("Reverse this!") == "!siht esreveR" doAssert reversed("先秦兩漢") == "漢兩秦先" doAssert reversed("as⃝df̅") == "f̅ds⃝a" diff --git a/lib/pure/xmldom.nim b/lib/pure/xmldom.nim index 6cf837f25..559f45348 100644 --- a/lib/pure/xmldom.nim +++ b/lib/pure/xmldom.nim @@ -51,6 +51,9 @@ const # Illegal characters illegalChars = {'>', '<', '&', '"'} + # standard xml: attribute names + # see https://www.w3.org/XML/1998/namespace + stdattrnames = ["lang", "space", "base", "id"] type Feature = tuple[name: string, version: string] @@ -229,12 +232,15 @@ proc createAttributeNS*(doc: PDocument, namespaceURI: string, qualifiedName: str raise newException(EInvalidCharacterErr, "Invalid character") # Exceptions if qualifiedName.contains(':'): + let qfnamespaces = qualifiedName.toLower().split(':') if isNil(namespaceURI): raise newException(ENamespaceErr, "When qualifiedName contains a prefix namespaceURI cannot be nil") - elif qualifiedName.split(':')[0].toLower() == "xml" and namespaceURI != "http://www.w3.org/XML/1998/namespace": + elif qfnamespaces[0] == "xml" and + namespaceURI != "http://www.w3.org/XML/1998/namespace" and + qfnamespaces[1] notin stdattrnames: raise newException(ENamespaceErr, "When the namespace prefix is \"xml\" namespaceURI has to be \"http://www.w3.org/XML/1998/namespace\"") - elif qualifiedName.split(':')[1].toLower() == "xmlns" and namespaceURI != "http://www.w3.org/2000/xmlns/": + elif qfnamespaces[1] == "xmlns" and namespaceURI != "http://www.w3.org/2000/xmlns/": raise newException(ENamespaceErr, "When the namespace prefix is \"xmlns\" namespaceURI has to be \"http://www.w3.org/2000/xmlns/\"") @@ -305,9 +311,12 @@ proc createElement*(doc: PDocument, tagName: string): PElement = proc createElementNS*(doc: PDocument, namespaceURI: string, qualifiedName: string): PElement = ## Creates an element of the given qualified name and namespace URI. if qualifiedName.contains(':'): + let qfnamespaces = qualifiedName.toLower().split(':') if isNil(namespaceURI): raise newException(ENamespaceErr, "When qualifiedName contains a prefix namespaceURI cannot be nil") - elif qualifiedName.split(':')[0].toLower() == "xml" and namespaceURI != "http://www.w3.org/XML/1998/namespace": + elif qfnamespaces[0] == "xml" and + namespaceURI != "http://www.w3.org/XML/1998/namespace" and + qfnamespaces[1] notin stdattrnames: raise newException(ENamespaceErr, "When the namespace prefix is \"xml\" namespaceURI has to be \"http://www.w3.org/XML/1998/namespace\"") |