1 files changed, 261 insertions, 103 deletions
diff --git a/lib/pure/base64.nim b/lib/pure/base64.nim
index 8c4883c11..591d22cc0 100755..100644
--- a/lib/pure/base64.nim
+++ b/lib/pure/base64.nim
@@ -1,6 +1,6 @@
 #
 #
-#            Nimrod's Runtime Library
+#            Nim's Runtime Library
 #        (c) Copyright 2010 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
@@ -8,108 +8,266 @@
 #
 
 ## This module implements a base64 encoder and decoder.
+##
+## Unstable API.
+##
+## Base64 is an encoding and decoding technique used to convert binary
+## data to an ASCII string format.
+## Each Base64 digit represents exactly 6 bits of data. Three 8-bit
+## bytes (i.e., a total of 24 bits) can therefore be represented by
+## four 6-bit Base64 digits.
 
-const 
-  cb64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
-
-proc encode*(s: string, lineLen = 75, newLine="\13\10"): string = 
-  ## encodes `s` into base64 representation. After `lineLen` characters, a 
-  ## `newline` is added.
-  var total = ((len(s) + 2) div 3) * 4
-  var numLines = (total + (lineLen - 1)) div lineLen
-  if numLines > 0: inc(total, (numLines-1) * newLine.len)
-
-  result = newString(total)
-  var i = 0
-  var r = 0
-  var currLine = 0
-  while i < s.len - 2:
-    var a = ord(s[i])
-    var b = ord(s[i+1])
-    var c = ord(s[i+2])
-    result[r] = cb64[a shr 2]
-    result[r+1] = cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)]
-    result[r+2] = cb64[((b and 0x0F) shl 2) or ((c and 0xC0) shr 6)] 
-    result[r+3] = cb64[c and 0x3F] 
-    inc(r, 4)
-    inc(i, 3)
-    inc(currLine, 4)
-    if currLine >= lineLen and i != s.len-2: 
-      for x in items(newLine): 
-        result[r] = x
-        inc(r)
-      currLine = 0
-
-  if i < s.len-1:
-    var a = ord(s[i])
-    var b = ord(s[i+1])
-    result[r] = cb64[a shr 2]
-    result[r+1] = cb64[((a and 3) shl 4) or ((b and 0xF0) shr 4)]
-    result[r+2] = cb64[((b and 0x0F) shl 2)] 
-    result[r+3] = '='
-    assert(r+4 == result.len)
-  elif i < s.len:
-    var a = ord(s[i])
-    result[r] = cb64[a shr 2]
-    result[r+1] = cb64[(a and 3) shl 4]
-    result[r+2] = '='
-    result[r+3] = '='
-    assert(r+4 == result.len)
+##[
+# Basic usage
+## Encoding data
+]##
+
+runnableExamples:
+  let encoded = encode("Hello World")
+  assert encoded == "SGVsbG8gV29ybGQ="
+
+##
+## Apart from strings you can also encode lists of integers or characters:
+##
+
+runnableExamples:
+  let encodedInts = encode([1'u8,2,3])
+  assert encodedInts == "AQID"
+  let encodedChars = encode(['h','e','y'])
+  assert encodedChars == "aGV5"
+
+##[
+## Decoding data
+]##
+
+runnableExamples:
+  let decoded = decode("SGVsbG8gV29ybGQ=")
+  assert decoded == "Hello World"
+
+##[
+## URL Safe Base64
+]##
+
+runnableExamples:
+  assert encode("c\xf7>", safe = true) == "Y_c-"
+  assert encode("c\xf7>", safe = false) == "Y/c+"
+
+## See also
+## ========
+##
+## * `hashes module<hashes.html>`_ for efficient computations of hash values for diverse Nim types
+## * `md5 module<md5.html>`_ for the MD5 checksum algorithm
+## * `sha1 module<sha1.html>`_ for the SHA-1 checksum algorithm
+
+template cbBase(a, b): untyped = [
+  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+  'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+  'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+  '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', a, b]
+
+const
+  cb64 = cbBase('+', '/')
+  cb64safe = cbBase('-', '_')
+
+const
+  invalidChar = 255
+
+template encodeSize(size: int): int = (size * 4 div 3) + 6
+
+template encodeInternal(s, alphabet: typed): untyped =
+  ## encodes `s` into base64 representation.
+
+  result.setLen(encodeSize(s.len))
+
+  let
+    padding = s.len mod 3
+    inputEnds = s.len - padding
+
+  var
+    inputIndex = 0
+    outputIndex = 0
+    n: uint32
+    b: uint32
+
+  template inputByte(exp: untyped) =
+    b = uint32(s[inputIndex])
+    n = exp
+    inc inputIndex
+
+  template outputChar(x: typed) =
+    result[outputIndex] = alphabet[x and 63]
+    inc outputIndex
+
+  template outputChar(c: char) =
+    result[outputIndex] = c
+    inc outputIndex
+
+  while inputIndex != inputEnds:
+    inputByte(b shl 16)
+    inputByte(n or b shl 8)
+    inputByte(n or b shl 0)
+    outputChar(n shr 18)
+    outputChar(n shr 12)
+    outputChar(n shr 6)
+    outputChar(n shr 0)
+
+  if padding == 1:
+    inputByte(b shl 16)
+    outputChar(n shr 18)
+    outputChar(n shr 12)
+    outputChar('=')
+    outputChar('=')
+
+  elif padding == 2:
+    inputByte(b shl 16)
+    inputByte(n or b shl 8)
+    outputChar(n shr 18)
+    outputChar(n shr 12)
+    outputChar(n shr 6)
+    outputChar('=')
+
+  result.setLen(outputIndex)
+
+template encodeImpl() {.dirty.} =
+  if safe:
+    encodeInternal(s, cb64safe)
   else:
-    assert(r == result.len)
-
-proc decodeByte(b: char): int {.inline.} = 
-  case b
-  of '+': result = ord('>')
-  of '0'..'9': result = ord(b) + 4
-  of 'A'..'Z': result = ord(b) - ord('A')
-  of 'a'..'z': result = ord(b) - 71
-  else: result = 63
-
-proc decode*(s: string): string = 
-  ## decodes a string in base64 representation back into its original form.
-  ## Whitespace is skipped.
-  const Whitespace = {' ', '\t', '\v', '\r', '\l', '\f'}
-  var total = ((len(s) + 3) div 4) * 3
-  # total is an upper bound, as we will skip arbitrary whitespace:
-  result = newString(total)
-
-  var i = 0
-  var r = 0
-  while true:
-    while s[i] in Whitespace: inc(i)
-    if i < s.len-3:
-      var a = s[i].decodeByte
-      var b = s[i+1].decodeByte
-      var c = s[i+2].decodeByte
-      var d = s[i+3].decodeByte
-      
-      result[r] = chr((a shl 2) or ((b shr 4) and 0x03))
-      result[r+1] = chr((b shl 4) or ((c shr 2) and 0x0F))
-      result[r+2] = chr((c shl 6) or (d and 0x3F))
-      inc(r, 3)
-      inc(i, 4)
-    else: break
-  assert i == s.len
-  # adjust the length:
-  if i > 0 and s[i-1] == '=': 
-    dec(r)
-    if i > 1 and s[i-2] == '=': dec(r)
-  setLen(result, r)
-  
-when isMainModule:
-  assert encode("leasure.") == "bGVhc3VyZS4="
-  assert encode("easure.") == "ZWFzdXJlLg=="
-  assert encode("asure.") == "YXN1cmUu"
-  assert encode("sure.") == "c3VyZS4="
-  
-  const longText = """Man is distinguished, not only by his reason, but by this
-    singular passion from other animals, which is a lust of the mind, 
-    that by a perseverance of delight in the continued and indefatigable
-    generation of knowledge, exceeds the short vehemence of any carnal
-    pleasure."""
-  const tests = ["", "abc", "xyz", "man", "leasure.", "sure.", "easure.",
-                 "asure.", longText]
-  for t in items(tests):
-    assert decode(encode(t)) == t
+    encodeInternal(s, cb64)
+
+proc encode*[T: byte|char](s: openArray[T], safe = false): string =
+  ## Encodes `s` into base64 representation.
+  ##
+  ## If `safe` is `true` then it will encode using the
+  ## URL-Safe and Filesystem-safe standard alphabet characters,
+  ## which substitutes `-` instead of `+` and `_` instead of `/`.
+  ## * https://en.wikipedia.org/wiki/Base64#URL_applications
+  ## * https://tools.ietf.org/html/rfc4648#page-7
+  ##
+  ## **See also:**
+  ## * `decode proc<#decode,string>`_ for decoding a string
+  runnableExamples:
+    assert encode("Hello World") == "SGVsbG8gV29ybGQ="
+    assert encode(['n', 'i', 'm']) == "bmlt"
+    assert encode(@['n', 'i', 'm']) == "bmlt"
+    assert encode([1'u8, 2, 3, 4, 5]) == "AQIDBAU="
+  encodeImpl()
+
+proc encode*[T: SomeInteger and not byte](s: openArray[T], safe = false): string
+  {.deprecated: "use `byte` or `char` instead".} =
+  encodeImpl()
+
+proc encodeMime*(s: string, lineLen = 75.Positive, newLine = "\r\n",
+                 safe = false): string =
+  ## Encodes `s` into base64 representation as lines.
+  ## Used in email MIME format, use `lineLen` and `newline`.
+  ##
+  ## This procedure encodes a string according to MIME spec.
+  ##
+  ## If `safe` is `true` then it will encode using the
+  ## URL-Safe and Filesystem-safe standard alphabet characters,
+  ## which substitutes `-` instead of `+` and `_` instead of `/`.
+  ## * https://en.wikipedia.org/wiki/Base64#URL_applications
+  ## * https://tools.ietf.org/html/rfc4648#page-7
+  ##
+  ## **See also:**
+  ## * `encode proc<#encode,openArray[T]>`_ for encoding an openArray
+  ## * `decode proc<#decode,string>`_ for decoding a string
+  runnableExamples:
+    assert encodeMime("Hello World", 4, "\n") == "SGVs\nbG8g\nV29y\nbGQ="
+  template cpy(l, src, idx) =
+    b = l
+    while i < b:
+      result[i] = src[idx]
+      inc i
+      inc idx
+
+  if s.len == 0: return
+  let e = encode(s, safe)
+  if e.len <= lineLen or newLine.len == 0:
+    return e
+  result = newString(e.len + newLine.len * ((e.len div lineLen) - int(e.len mod lineLen == 0)))
+  var i, j, k, b: int
+  let nd = e.len - lineLen
+  while j < nd:
+    cpy(i + lineLen, e, j)
+    cpy(i + newLine.len, newLine, k)
+    k = 0
+  cpy(result.len, e, j)
+
+proc initDecodeTable*(): array[256, char] =
+  # computes a decode table at compile time
+  for i in 0 ..< 256:
+    let ch = char(i)
+    var code = invalidChar
+    if ch >= 'A' and ch <= 'Z': code = i - 0x00000041
+    if ch >= 'a' and ch <= 'z': code = i - 0x00000047
+    if ch >= '0' and ch <= '9': code = i + 0x00000004
+    if ch == '+' or ch == '-': code = 0x0000003E
+    if ch == '/' or ch == '_': code = 0x0000003F
+    result[i] = char(code)
+
+const
+  decodeTable = initDecodeTable()
+
+proc decode*(s: string): string =
+  ## Decodes string `s` in base64 representation back into its original form.
+  ## The initial whitespace is skipped.
+  ##
+  ## **See also:**
+  ## * `encode proc<#encode,openArray[T]>`_ for encoding an openarray
+  runnableExamples:
+    assert decode("SGVsbG8gV29ybGQ=") == "Hello World"
+    assert decode("  SGVsbG8gV29ybGQ=") == "Hello World"
+  if s.len == 0: return
+
+  proc decodeSize(size: int): int =
+    return (size * 3 div 4) + 6
+
+  template inputChar(x: untyped) =
+    let x = int decodeTable[ord(s[inputIndex])]
+    if x == invalidChar:
+      raise newException(ValueError,
+        "Invalid base64 format character `" & s[inputIndex] &
+        "` (ord " & $s[inputIndex].ord & ") at location " & $inputIndex & ".")
+    inc inputIndex
+
+  template outputChar(x: untyped) =
+    result[outputIndex] = char(x and 255)
+    inc outputIndex
 
+  # pre allocate output string once
+  result.setLen(decodeSize(s.len))
+  var
+    inputIndex = 0
+    outputIndex = 0
+    inputLen = s.len
+    inputEnds = 0
+  # strip trailing characters
+  while inputLen > 0 and s[inputLen - 1] in {'\n', '\r', ' ', '='}:
+    dec inputLen
+  # hot loop: read 4 characters at at time
+  inputEnds = inputLen - 4
+  while inputIndex <= inputEnds:
+    while s[inputIndex] in {'\n', '\r', ' '}:
+      inc inputIndex
+    inputChar(a)
+    inputChar(b)
+    inputChar(c)
+    inputChar(d)
+    outputChar(a shl 2 or b shr 4)
+    outputChar(b shl 4 or c shr 2)
+    outputChar(c shl 6 or d shr 0)
+  # do the last 2 or 3 characters
+  var leftLen = abs((inputIndex - inputLen) mod 4)
+  if leftLen == 2:
+    inputChar(a)
+    inputChar(b)
+    outputChar(a shl 2 or b shr 4)
+  elif leftLen == 3:
+    inputChar(a)
+    inputChar(b)
+    inputChar(c)
+    outputChar(a shl 2 or b shr 4)
+    outputChar(b shl 4 or c shr 2)
+  result.setLen(outputIndex)