summary refs log tree commit diff stats
path: root/lib/std/editdistance.nim
diff options
context:
space:
mode:
Diffstat (limited to 'lib/std/editdistance.nim')
-rw-r--r--lib/std/editdistance.nim69
1 files changed, 14 insertions, 55 deletions
diff --git a/lib/std/editdistance.nim b/lib/std/editdistance.nim
index f365d4ab2..40c0017ae 100644
--- a/lib/std/editdistance.nim
+++ b/lib/std/editdistance.nim
@@ -10,27 +10,27 @@
 ## This module implements an algorithm to compute the
 ## `edit distance`:idx: between two Unicode strings.
 
-import unicode
+import std/unicode
 
 proc editDistance*(a, b: string): int {.noSideEffect.} =
-  ## Returns the **unicode-rune** edit distance between ``a`` and ``b``.
+  ## Returns the **unicode-rune** edit distance between `a` and `b`.
   ##
   ## This uses the `Levenshtein`:idx: distance algorithm with only a linear
   ## memory overhead.
   runnableExamples: static: doAssert editdistance("Kitten", "Bitten") == 1
-  if len(a) > len(b):
-    # make ``b`` the longer string
+  if runeLen(a) > runeLen(b):
+    # make `b` the longer string
     return editDistance(b, a)
   # strip common prefix
   var
-    iStart = 0 ## The character starting index of the first rune in both strings ``a`` and ``b``
+    iStart = 0 ## The character starting index of the first rune in both strings `a` and `b`
     iNextA = 0
     iNextB = 0
     runeA, runeB: Rune
-    lenRunesA = 0 ## The number of relevant runes in string ``a``.
-    lenRunesB = 0 ## The number of relevant runes in string ``b``.
+    lenRunesA = 0 ## The number of relevant runes in string `a`.
+    lenRunesB = 0 ## The number of relevant runes in string `b`.
   block commonPrefix:
-    # ``a`` is the shorter string
+    # `a` is the shorter string
     while iStart < len(a):
       iNextA = iStart
       a.fastRuneAt(iNextA, runeA, doInc = true)
@@ -44,9 +44,9 @@ proc editDistance*(a, b: string): int {.noSideEffect.} =
   var
     # we know that we are either at the start of the strings
     # or that the current value of runeA is not equal to runeB
-    # => start search for common suffix after the current rune (``i_next_*``)
-    iEndA = iNextA ## The exclusive upper index bound of string ``a``.
-    iEndB = iNextB ## The exclusive upper index bound of string ``b``.
+    # => start search for common suffix after the current rune (`i_next_*`)
+    iEndA = iNextA ## The exclusive upper index bound of string `a`.
+    iEndB = iNextB ## The exclusive upper index bound of string `b`.
     iCurrentA = iNextA
     iCurrentB = iNextB
   block commonSuffix:
@@ -69,8 +69,8 @@ proc editDistance*(a, b: string): int {.noSideEffect.} =
         addRunesB = 0
       iCurrentA = iNextA
       iCurrentB = iNextB
-    if iCurrentA >= len(a): # ``a`` exhausted
-      if iCurrentB < len(b): # ``b`` not exhausted
+    if iCurrentA >= len(a): # `a` exhausted
+      if iCurrentB < len(b): # `b` not exhausted
         iEndA = iCurrentA
         iEndB = iCurrentB
         inc(lenRunesA, addRunesA)
@@ -79,7 +79,7 @@ proc editDistance*(a, b: string): int {.noSideEffect.} =
           b.fastRuneAt(iEndB, runeB)
           inc(lenRunesB)
           if iEndB >= len(b): break
-    elif iCurrentB >= len(b): # ``b`` exhausted and ``a`` not exhausted
+    elif iCurrentB >= len(b): # `b` exhausted and `a` not exhausted
       iEndA = iCurrentA
       iEndB = iCurrentB
       inc(lenRunesA, addRunesA)
@@ -264,44 +264,3 @@ proc editDistanceAscii*(a, b: string): int {.noSideEffect.} =
       if x > c3: x = c3
       row[p] = x
   result = row[e]
-
-
-when isMainModule:
-  doAssert editDistance("", "") == 0
-  doAssert editDistance("kitten", "sitting") == 3 # from Wikipedia
-  doAssert editDistance("flaw", "lawn") == 2 # from Wikipedia
-
-  doAssert editDistance("привет", "превет") == 1
-  doAssert editDistance("Åge", "Age") == 1
-  # editDistance, one string is longer in bytes, but shorter in rune length
-  # first string: 4 bytes, second: 6 bytes, but only 3 runes
-  doAssert editDistance("aaaa", "×××") == 4
-
-  block veryLongStringEditDistanceTest:
-    const cap = 256
-    var
-      s1 = newStringOfCap(cap)
-      s2 = newStringOfCap(cap)
-    while len(s1) < cap:
-      s1.add 'a'
-    while len(s2) < cap:
-      s2.add 'b'
-    doAssert editDistance(s1, s2) == cap
-
-  block combiningCodePointsEditDistanceTest:
-    const s = "A\xCC\x8Age"
-    doAssert editDistance(s, "Age") == 1
-
-  doAssert editDistanceAscii("", "") == 0
-  doAssert editDistanceAscii("kitten", "sitting") == 3 # from Wikipedia
-  doAssert editDistanceAscii("flaw", "lawn") == 2 # from Wikipedia
-
-
-  assert(editDistance("prefix__hallo_suffix", "prefix__hallo_suffix") == 0)
-  assert(editDistance("prefix__hallo_suffix", "prefix__hallo_suffi1") == 1)
-  assert(editDistance("prefix__hallo_suffix", "prefix__HALLO_suffix") == 5)
-  assert(editDistance("prefix__hallo_suffix", "prefix__ha_suffix") == 3)
-  assert(editDistance("prefix__hallo_suffix", "prefix") == 14)
-  assert(editDistance("prefix__hallo_suffix", "suffix") == 14)
-  assert(editDistance("prefix__hallo_suffix", "prefix__hao_suffix") == 2)
-  assert(editDistance("main", "malign") == 2)