summary refs log tree commit diff stats
path: root/lib
diff options
context:
space:
mode:
authorAndreas Rumpf <rumpf_a@web.de>2015-02-12 21:54:43 +0100
committerAndreas Rumpf <rumpf_a@web.de>2015-02-12 21:54:43 +0100
commitf039aad5e00d7da545f79f3a395ccc82bdd599ff (patch)
treedd6e614b6bed1992c5b40cab8148fc45e955ff9d /lib
parent7e7a448cf11a4dd7d293209082e6b2e2a7d0c261 (diff)
parent7251fbd76eac33c2fff76051c911380ed37b3a6f (diff)
downloadNim-f039aad5e00d7da545f79f3a395ccc82bdd599ff.tar.gz
Merge pull request #2097 from jferg2010/devel
Date/time parsing
Diffstat (limited to 'lib')
-rw-r--r--lib/pure/times.nim329
1 files changed, 328 insertions, 1 deletions
diff --git a/lib/pure/times.nim b/lib/pure/times.nim
index 0925e3471..e32ea786a 100644
--- a/lib/pure/times.nim
+++ b/lib/pure/times.nim
@@ -16,7 +16,7 @@
                       # of the standard library!
 
 import
-  strutils
+  strutils, parseutils
 
 include "system/inclrtl"
 
@@ -744,6 +744,285 @@ proc format*(info: TimeInfo, f: string): string =
 
 {.pop.}
 
+proc parseToken(info: var TimeInfo; token, value: string; j: var int) =
+  ## Helper of the parse proc to parse individual tokens.
+  var sv: int
+  case token
+  of "d":
+    var pd = parseInt(value[j..j+1], sv)
+    info.monthday = sv
+    j += pd
+  of "dd":
+    info.monthday = value[j..j+1].parseInt()
+    j += 2
+  of "ddd":
+    case value[j..j+2].toLower():
+    of "sun":
+      info.weekday = dSun
+    of "mon":
+      info.weekday = dMon
+    of "tue":
+      info.weekday = dTue
+    of "wed":
+      info.weekday = dWed
+    of "thu":
+      info.weekday = dThu
+    of "fri":
+      info.weekday = dFri
+    of "sat":
+      info.weekday = dSat
+    else:
+      raise newException(ValueError, "invalid day of week ")
+    j += 3
+  of "dddd":
+    if value.len >= j+6 and value[j..j+5].cmpIgnoreCase("sunday") == 0:
+      info.weekday = dSun
+      j += 6
+    elif value.len >= j+6 and value[j..j+5].cmpIgnoreCase("monday") == 0:
+      info.weekday = dMon
+      j += 6
+    elif value.len >= j+7 and value[j..j+6].cmpIgnoreCase("tuesday") == 0:
+      info.weekday = dTue
+      j += 7
+    elif value.len >= j+9 and value[j..j+8].cmpIgnoreCase("wednesday") == 0:
+      info.weekday = dWed
+      j += 9
+    elif value.len >= j+8 and value[j..j+7].cmpIgnoreCase("thursday") == 0:
+      info.weekday = dThu
+      j += 8
+    elif value.len >= j+6 and value[j..j+5].cmpIgnoreCase("friday") == 0:
+      info.weekday = dFri
+      j += 6
+    elif value.len >= j+8 and value[j..j+7].cmpIgnoreCase("saturday") == 0:
+      info.weekday = dSat
+      j += 8
+    else:
+      raise newException(ValueError, "invalid day of week ")    
+  of "h", "H":
+    var pd = parseInt(value[j..j+1], sv)
+    info.hour = sv
+    j += pd
+  of "hh", "HH":
+    info.hour = value[j..j+1].parseInt()
+    j += 2
+  of "m":
+    var pd = parseInt(value[j..j+1], sv)
+    info.minute = sv
+    j += pd
+  of "mm":
+    info.minute = value[j..j+1].parseInt()
+    j += 2
+  of "M":
+    var pd = parseInt(value[j..j+1], sv)
+    info.month = Month(sv-1)
+    info.monthday = sv
+    j += pd
+  of "MM":
+    var month = value[j..j+1].parseInt()
+    j += 2
+    info.month = Month(month-1)
+  of "MMM":
+    case value[j..j+2].toLower():
+    of "jan":
+      info.month =  mJan
+    of "feb":
+      info.month =  mFeb
+    of "mar":
+      info.month =  mMar
+    of "apr":
+      info.month =  mApr
+    of "may":
+      info.month =  mMay
+    of "jun":
+      info.month =  mJun
+    of "jul":
+      info.month =  mJul
+    of "aug":
+      info.month =  mAug
+    of "sep":
+      info.month =  mSep
+    of "oct":
+      info.month =  mOct
+    of "nov":
+      info.month =  mNov
+    of "dec":
+      info.month =  mDec
+    else:
+      raise newException(ValueError, "invalid month") 
+    j += 3
+  of "MMMM":
+    if value.len >= j+7 and value[j..j+6].cmpIgnoreCase("january") == 0:
+      info.month =  mJan
+      j += 7
+    elif value.len >= j+8 and value[j..j+7].cmpIgnoreCase("february") == 0:
+      info.month =  mFeb
+      j += 8
+    elif value.len >= j+5 and value[j..j+4].cmpIgnoreCase("march") == 0:
+      info.month =  mMar
+      j += 5
+    elif value.len >= j+5 and value[j..j+4].cmpIgnoreCase("april") == 0:
+      info.month =  mApr
+      j += 5
+    elif value.len >= j+3 and value[j..j+2].cmpIgnoreCase("may") == 0:
+      info.month =  mMay
+      j += 3
+    elif value.len >= j+4 and value[j..j+3].cmpIgnoreCase("june") == 0:
+      info.month =  mJun
+      j += 4
+    elif value.len >= j+4 and value[j..j+3].cmpIgnoreCase("july") == 0:
+      info.month =  mJul
+      j += 4
+    elif value.len >= j+6 and value[j..j+5].cmpIgnoreCase("august") == 0:
+      info.month =  mAug
+      j += 6
+    elif value.len >= j+9 and value[j..j+8].cmpIgnoreCase("september") == 0:
+      info.month =  mSep
+      j += 9
+    elif value.len >= j+7 and value[j..j+6].cmpIgnoreCase("october") == 0:
+      info.month =  mOct
+      j += 7
+    elif value.len >= j+8 and value[j..j+7].cmpIgnoreCase("november") == 0:
+      info.month =  mNov
+      j += 8
+    elif value.len >= j+8 and value[j..j+7].cmpIgnoreCase("december") == 0:
+      info.month =  mDec
+      j += 8
+    else:
+      raise newException(ValueError, "invalid month") 
+  of "s":
+    var pd = parseInt(value[j..j+1], sv)
+    info.second = sv
+    j += pd
+  of "ss":
+    info.second = value[j..j+1].parseInt()
+    j += 2
+  of "t":
+    if value[j] == 'P' and info.hour > 0 and info.hour < 12:
+      info.hour += 12
+    j += 1
+  of "tt":
+    if value[j..j+1] == "PM" and info.hour > 0 and info.hour < 12:
+      info.hour += 12
+    j += 2
+  of "yy":
+    # Assumes current century
+    var year = value[j..j+1].parseInt()
+    var thisCen = getLocalTime(getTime()).year div 100
+    info.year = thisCen*100 + year
+    j += 2
+  of "yyyy":
+    info.year = value[j..j+3].parseInt()
+    j += 4
+  of "z":
+    if value[j] == '+':
+      info.timezone = parseInt($value[j+1])
+    elif value[j] == '-':
+      info.timezone = 0-parseInt($value[j+1])
+    else:
+      raise newException(ValueError, "Sign for timezone " & value[j])
+    j += 2
+  of "zz":
+    if value[j] == '+':
+      info.timezone = value[j+1..j+2].parseInt()
+    elif value[j] == '-':
+      info.timezone = 0-value[j+1..j+2].parseInt()
+    else:
+      raise newException(ValueError, "Sign for timezone " & value[j])
+    j += 3
+  of "zzz":
+    if value[j] == '+':
+      info.timezone = value[j+1..j+2].parseInt()
+    elif value[j] == '-':
+      info.timezone = 0-value[j+1..j+2].parseInt()
+    else:
+      raise newException(ValueError, "Sign for timezone " & value[j])
+    j += 6
+  of "ZZZ":
+    info.tzname = value[j..j+2].toUpper()
+    j += 3
+  else:
+    # Ignore the token and move forward in the value string by the same length
+    j += token.len
+    
+proc parse*(value, layout: string): TimeInfo =
+  ## This function parses a date/time string using the standard format identifiers (below)
+  ## The function defaults information not provided in the format string from the running program (timezone, month, year, etc)
+  ##
+  ## ==========  =================================================================================  ================================================
+  ## Specifier   Description                                                                        Example
+  ## ==========  =================================================================================  ================================================
+  ##    d        Numeric value of the day of the month, it will be one or two digits long.          ``1/04/2012 -> 1``, ``21/04/2012 -> 21``
+  ##    dd       Same as above, but always two digits.                                              ``1/04/2012 -> 01``, ``21/04/2012 -> 21``
+  ##    ddd      Three letter string which indicates the day of the week.                           ``Saturday -> Sat``, ``Monday -> Mon``
+  ##    dddd     Full string for the day of the week.                                               ``Saturday -> Saturday``, ``Monday -> Monday``
+  ##    h        The hours in one digit if possible. Ranging from 0-12.                             ``5pm -> 5``, ``2am -> 2``
+  ##    hh       The hours in two digits always. If the hour is one digit 0 is prepended.           ``5pm -> 05``, ``11am -> 11``
+  ##    H        The hours in one digit if possible, randing from 0-24.                             ``5pm -> 17``, ``2am -> 2``
+  ##    HH       The hours in two digits always. 0 is prepended if the hour is one digit.           ``5pm -> 17``, ``2am -> 02``
+  ##    m        The minutes in 1 digit if possible.                                                ``5:30 -> 30``, ``2:01 -> 1``
+  ##    mm       Same as above but always 2 digits, 0 is prepended if the minute is one digit.      ``5:30 -> 30``, ``2:01 -> 01``
+  ##    M        The month in one digit if possible.                                                ``September -> 9``, ``December -> 12``
+  ##    MM       The month in two digits always. 0 is prepended.                                    ``September -> 09``, ``December -> 12``
+  ##    MMM      Abbreviated three-letter form of the month.                                        ``September -> Sep``, ``December -> Dec``
+  ##    MMMM     Full month string, properly capitalized.                                           ``September -> September``
+  ##    s        Seconds as one digit if possible.                                                  ``00:00:06 -> 6``
+  ##    ss       Same as above but always two digits. 0 is prepended.                               ``00:00:06 -> 06``
+  ##    t        ``A`` when time is in the AM. ``P`` when time is in the PM.
+  ##    tt       Same as above, but ``AM`` and ``PM`` instead of ``A`` and ``P`` respectively.
+  ##    yy       Displays the year to two digits.                                                   ``2012 -> 12``
+  ##    yyyy     Displays the year to four digits.                                                  ``2012 -> 2012``
+  ##    z        Displays the timezone offset from UTC.                                             ``GMT+7 -> +7``, ``GMT-5 -> -5``
+  ##    zz       Same as above but with leading 0.                                                  ``GMT+7 -> +07``, ``GMT-5 -> -05``
+  ##    zzz      Same as above but with ``:00``.                                                    ``GMT+7 -> +07:00``, ``GMT-5 -> -05:00``
+  ##    ZZZ      Displays the name of the timezone.                                                 ``GMT -> GMT``, ``EST -> EST``
+  ## ==========  =================================================================================  ================================================
+  ##
+  ## Other strings can be inserted by putting them in ``''``. For example
+  ## ``hh'->'mm`` will give ``01->56``.  The following characters can be
+  ## inserted without quoting them: ``:`` ``-`` ``(`` ``)`` ``/`` ``[`` ``]``
+  ## ``,``. However you don't need to necessarily separate format specifiers, a
+  ## unambiguous format string like ``yyyyMMddhhmmss`` is valid too.    
+  var i = 0 # pointer for format string
+  var j = 0 # pointer for value string
+  var token = ""
+  # Assumes current day of month, month and year, but time is reset to 00:00:00. Weekday will be reset after parsing.
+  var info = getLocalTime(getTime())
+  info.hour = 0
+  info.minute = 0
+  info.second = 0
+  while true:
+    case layout[i]
+    of ' ', '-', '/', ':', '\'', '\0', '(', ')', '[', ']', ',':
+      if token.len > 0:
+        parseToken(info, token, value, j)
+      # Reset token
+      token = ""
+      # Break if at end of line
+      if layout[i] == '\0': break
+      # Skip separator and everything between single quotes
+      # These are literals in both the layout and the value string
+      if layout[i] == '\'':
+        inc(i)
+        inc(j)
+        while layout[i] != '\'' and layout.len-1 > i:
+          inc(i)
+          inc(j)
+      else:
+        inc(i)
+        inc(j)
+    else:
+      # Check if the letter being added matches previous accumulated buffer.
+      if token.len < 1 or token[high(token)] == layout[i]:
+        token.add(layout[i])
+        inc(i)
+      else:
+        parseToken(info, token, value, j)
+        token = ""
+  # Reset weekday as it might not have been provided and the default may be wrong
+  info.weekday = getLocalTime(timeInfoToTime(info)).weekday
+  return info
+
+
 when isMainModule:
   # $ date --date='@2147483647'
   # Tue 19 Jan 03:14:07 GMT 2038
@@ -778,3 +1057,51 @@ when isMainModule:
   # Interval tests
   assert((t4 - initInterval(years = 2)).format("yyyy") == "1995")
   assert((t4 - initInterval(years = 7, minutes = 34, seconds = 24)).format("yyyy mm ss") == "1990 24 10")
+
+  var s = "Tuesday at 09:04am on Dec 15, 2015"
+  var f = "dddd at hh:mmtt on MMM d, yyyy"
+  assert($s.parse(f) == "Tue Dec 15 09:04:00 2015")
+  # ANSIC       = "Mon Jan _2 15:04:05 2006"
+  s = "Mon Jan 2 15:04:05 2006"
+  f = "ddd MMM d HH:mm:ss yyyy"
+  assert($s.parse(f) == "Mon Jan  2 15:04:05 2006")
+  # UnixDate    = "Mon Jan _2 15:04:05 MST 2006"
+  s = "Mon Jan 2 15:04:05 MST 2006"
+  f = "ddd MMM d HH:mm:ss ZZZ yyyy"
+  assert($s.parse(f) == "Mon Jan  2 15:04:05 2006")
+  # RubyDate    = "Mon Jan 02 15:04:05 -0700 2006"
+  s = "Mon Jan 02 15:04:05 -07:00 2006"
+  f = "ddd MMM dd HH:mm:ss zzz yyyy"
+  assert($s.parse(f) == "Mon Jan  2 15:04:05 2006")
+  # RFC822      = "02 Jan 06 15:04 MST"
+  s = "02 Jan 06 15:04 MST"
+  f = "dd MMM yy HH:mm ZZZ"
+  assert($s.parse(f) == "Mon Jan  2 15:04:00 2006")
+  # RFC822Z     = "02 Jan 06 15:04 -0700" # RFC822 with numeric zone
+  s = "02 Jan 06 15:04 -07:00"
+  f = "dd MMM yy HH:mm zzz"
+  assert($s.parse(f) == "Mon Jan  2 15:04:00 2006")
+  # RFC850      = "Monday, 02-Jan-06 15:04:05 MST"
+  s = "Monday, 02-Jan-06 15:04:05 MST"
+  f = "dddd, dd-MMM-yy HH:mm:ss ZZZ"
+  assert($s.parse(f) == "Mon Jan  2 15:04:05 2006")
+  # RFC1123     = "Mon, 02 Jan 2006 15:04:05 MST"
+  s = "Mon, 02 Jan 2006 15:04:05 MST"
+  f = "ddd, dd MMM yyyy HH:mm:ss ZZZ"
+  assert($s.parse(f) == "Mon Jan  2 15:04:05 2006")
+  # RFC1123Z    = "Mon, 02 Jan 2006 15:04:05 -0700" # RFC1123 with numeric zone
+  s = "Mon, 02 Jan 2006 15:04:05 -07:00"
+  f = "ddd, dd MMM yyyy HH:mm:ss zzz"
+  assert($s.parse(f) == "Mon Jan  2 15:04:05 2006")
+  # RFC3339     = "2006-01-02T15:04:05Z07:00"
+  s = "2006-01-02T15:04:05Z-07:00"
+  f = "yyyy-MM-ddTHH:mm:ssZzzz"
+  assert($s.parse(f) == "Mon Jan  2 15:04:05 2006")
+  # RFC3339Nano = "2006-01-02T15:04:05.999999999Z07:00"
+  s = "2006-01-02T15:04:05.999999999Z-07:00"
+  f = "yyyy-MM-ddTHH:mm:ss.999999999Zzzz"
+  assert($s.parse(f) == "Mon Jan  2 15:04:05 2006")
+  # Kitchen     = "3:04PM"
+  s = "3:04PM"
+  f = "h:mmtt"
+  echo "Kitchen: " & $s.parse(f)
\ No newline at end of file