summary refs log tree commit diff stats
path: root/lib/experimental/diff.nim
diff options
context:
space:
mode:
Diffstat (limited to 'lib/experimental/diff.nim')
-rw-r--r--lib/experimental/diff.nim190
1 files changed, 58 insertions, 132 deletions
diff --git a/lib/experimental/diff.nim b/lib/experimental/diff.nim
index 5a69687a3..669e9f613 100644
--- a/lib/experimental/diff.nim
+++ b/lib/experimental/diff.nim
@@ -10,28 +10,31 @@
 ## This module implements an algorithm to compute the
 ## `diff`:idx: between two sequences of lines.
 ##
-## A basic example of ``diffInt`` on 2 arrays of integers:
-##
-## .. code::nim
-##
-##   import experimental/diff
-##   echo diffInt([0, 1, 2, 3, 4, 5, 6, 7, 8], [-1, 1, 2, 3, 4, 5, 666, 7, 42])
-##
-## Another short example of ``diffText`` to diff strings:
-##
-## .. code::nim
-##
-##   import experimental/diff
-##   # 2 samples of text for testing (from "The Call of Cthulhu" by Lovecraft)
-##   let txt0 = """I have looked upon all the universe has to hold of horror,
-##   even skies of spring and flowers of summer must ever be poison to me."""
-##   let txt1 = """I have looked upon all your code has to hold of bugs,
-##   even skies of spring and flowers of summer must ever be poison to me."""
-##
-##   echo diffText(txt0, txt1)
-##
 ## - To learn more see `Diff on Wikipedia. <http://wikipedia.org/wiki/Diff>`_
 
+runnableExamples:
+  assert diffInt(
+    [0, 1, 2, 3, 4, 5, 6, 7, 8],
+    [-1, 1, 2, 3, 4, 5, 666, 7, 42]) ==
+    @[Item(startA: 0, startB: 0, deletedA: 1, insertedB: 1),
+      Item(startA: 6, startB: 6, deletedA: 1, insertedB: 1),
+      Item(startA: 8, startB: 8, deletedA: 1, insertedB: 1)]
+
+runnableExamples:
+  # 2 samples of text (from "The Call of Cthulhu" by Lovecraft)
+  let txt0 = """
+abc
+def ghi
+jkl2"""
+  let txt1 = """
+bacx
+abc
+def ghi
+jkl"""
+  assert diffText(txt0, txt1) ==
+    @[Item(startA: 0, startB: 0, deletedA: 0, insertedB: 1),
+      Item(startA: 2, startB: 3, deletedA: 1, insertedB: 1)]
+
 # code owner: Arne Döring
 #
 # This is based on C# code written by Matthias Hertel, http://www.mathertel.de
@@ -40,7 +43,10 @@
 # "An O(ND) Difference Algorithm and its Variations" by Eugene Myers
 # Algorithmica Vol. 1 No. 2, 1986, p 251.
 
-import tables, strutils
+import std/[tables, strutils]
+
+when defined(nimPreviewSlimSystem):
+  import std/assertions
 
 type
   Item* = object    ## An Item in the list of differences.
@@ -59,7 +65,7 @@ type
   Smsrd = object
     x, y: int
 
-# template to avoid a seq copy. Required until ``sink`` parameters are ready.
+# template to avoid a seq copy. Required until `sink` parameters are ready.
 template newDiffData(initData: seq[int]; L: int): DiffData =
   DiffData(
     data: initData,
@@ -71,9 +77,9 @@ proc len(d: DiffData): int {.inline.} = d.data.len
 proc diffCodes(aText: string; h: var Table[string, int]): DiffData =
   ## This function converts all textlines of the text into unique numbers for every unique textline
   ## so further work can work only with simple numbers.
-  ## ``aText`` the input text
-  ## ``h`` This extern initialized hashtable is used for storing all ever used textlines.
-  ## ``trimSpace`` ignore leading and trailing space characters
+  ## `aText` the input text
+  ## `h` This extern initialized hashtable is used for storing all ever used textlines.
+  ## `trimSpace` ignore leading and trailing space characters
   ## Returns a array of integers.
   var lastUsedCode = h.len
   result.data = newSeq[int]()
@@ -108,14 +114,14 @@ proc optimize(data: var DiffData) =
 proc sms(dataA: var DiffData; lowerA, upperA: int; dataB: DiffData; lowerB, upperB: int;
          downVector, upVector: var openArray[int]): Smsrd =
   ## This is the algorithm to find the Shortest Middle Snake (sms).
-  ## ``dataA`` sequence A
-  ## ``lowerA`` lower bound of the actual range in dataA
-  ## ``upperA`` upper bound of the actual range in dataA (exclusive)
-  ## ``dataB`` sequence B
-  ## ``lowerB`` lower bound of the actual range in dataB
-  ## ``upperB`` upper bound of the actual range in dataB (exclusive)
-  ## ``downVector`` a vector for the (0,0) to (x,y) search. Passed as a parameter for speed reasons.
-  ## ``upVector`` a vector for the (u,v) to (N,M) search. Passed as a parameter for speed reasons.
+  ## `dataA` sequence A
+  ## `lowerA` lower bound of the actual range in dataA
+  ## `upperA` upper bound of the actual range in dataA (exclusive)
+  ## `dataB` sequence B
+  ## `lowerB` lower bound of the actual range in dataB
+  ## `upperB` upper bound of the actual range in dataB (exclusive)
+  ## `downVector` a vector for the (0,0) to (x,y) search. Passed as a parameter for speed reasons.
+  ## `upVector` a vector for the (u,v) to (N,M) search. Passed as a parameter for speed reasons.
   ## Returns a MiddleSnakeData record containing x,y and u,v.
 
   let max = dataA.len + dataB.len + 1
@@ -138,7 +144,7 @@ proc sms(dataA: var DiffData; lowerA, upperA: int; dataB: DiffData; lowerB, uppe
 
   for D in 0 .. maxD:
     # Extend the forward path.
-    for k in countUp(downK - D, downK + D, 2):
+    for k in countup(downK - D, downK + D, 2):
       # find the only or better starting point
       var x: int
       if k == downK - D:
@@ -164,7 +170,7 @@ proc sms(dataA: var DiffData; lowerA, upperA: int; dataB: DiffData; lowerB, uppe
                        y: downVector[downOffset + k] - k)
 
     # Extend the reverse path.
-    for k in countUp(upK - D, upK + D, 2):
+    for k in countup(upK - D, upK + D, 2):
       # find the only or better starting point
       var x: int
       if k == upK + D:
@@ -195,14 +201,14 @@ proc lcs(dataA: var DiffData; lowerA, upperA: int; dataB: var DiffData; lowerB,
   ## algorithm.
   ## The published algorithm passes recursively parts of the A and B sequences.
   ## To avoid copying these arrays the lower and upper bounds are passed while the sequences stay constant.
-  ## ``dataA`` sequence A
-  ## ``lowerA`` lower bound of the actual range in dataA
-  ## ``upperA`` upper bound of the actual range in dataA (exclusive)
-  ## ``dataB`` sequence B
-  ## ``lowerB`` lower bound of the actual range in dataB
-  ## ``upperB`` upper bound of the actual range in dataB (exclusive)
-  ## ``downVector`` a vector for the (0,0) to (x,y) search. Passed as a parameter for speed reasons.
-  ## ``upVector`` a vector for the (u,v) to (N,M) search. Passed as a parameter for speed reasons.
+  ## `dataA` sequence A
+  ## `lowerA` lower bound of the actual range in dataA
+  ## `upperA` upper bound of the actual range in dataA (exclusive)
+  ## `dataB` sequence B
+  ## `lowerB` lower bound of the actual range in dataB
+  ## `upperB` upper bound of the actual range in dataB (exclusive)
+  ## `downVector` a vector for the (0,0) to (x,y) search. Passed as a parameter for speed reasons.
+  ## `upVector` a vector for the (u,v) to (N,M) search. Passed as a parameter for speed reasons.
 
   # make mutable copy:
   var lowerA = lowerA
@@ -275,9 +281,9 @@ proc createDiffs(dataA, dataB: DiffData): seq[Item] =
 proc diffInt*(arrayA, arrayB: openArray[int]): seq[Item] =
   ## Find the difference in 2 arrays of integers.
   ##
-  ## ``arrayA`` A-version of the numbers (usually the old one)
+  ## `arrayA` A-version of the numbers (usually the old one)
   ##
-  ## ``arrayB`` B-version of the numbers (usually the new one)
+  ## `arrayB` B-version of the numbers (usually the new one)
   ##
   ## Returns a sequence of Items that describe the differences.
 
@@ -288,9 +294,9 @@ proc diffInt*(arrayA, arrayB: openArray[int]): seq[Item] =
   var dataB = newDiffData(@arrayB, arrayB.len)
 
   let max = dataA.len + dataB.len + 1
-  ## vector for the (0,0) to (x,y) search
+  # vector for the (0,0) to (x,y) search
   var downVector = newSeq[int](2 * max + 2)
-  ## vector for the (u,v) to (N,M) search
+  # vector for the (u,v) to (N,M) search
   var upVector = newSeq[int](2 * max + 2)
 
   lcs(dataA, 0, dataA.len, dataB, 0, dataB.len, downVector, upVector)
@@ -304,12 +310,12 @@ proc diffText*(textA, textB: string): seq[Item] =
   ## textlines into a common hashtable so i can find duplicates in there, and generating a
   ## new number each time a new textline is inserted.
   ##
-  ## ``textA`` A-version of the text (usually the old one)
+  ## `textA` A-version of the text (usually the old one)
   ##
-  ## ``textB`` B-version of the text (usually the new one)
+  ## `textB` B-version of the text (usually the new one)
   ##
   ## Returns a seq of Items that describe the differences.
-
+  # See also `gitutils.diffStrings`.
   # prepare the input-text and convert to comparable numbers.
   var h = initTable[string, int]()  # TextA.len + TextB.len  <- probably wrong initial size
   # The A-Version of the data (original data) to be compared.
@@ -321,9 +327,9 @@ proc diffText*(textA, textB: string): seq[Item] =
   h.clear # free up hashtable memory (maybe)
 
   let max = dataA.len + dataB.len + 1
-  ## vector for the (0,0) to (x,y) search
+  # vector for the (0,0) to (x,y) search
   var downVector = newSeq[int](2 * max + 2)
-  ## vector for the (u,v) to (N,M) search
+  # vector for the (u,v) to (N,M) search
   var upVector = newSeq[int](2 * max + 2)
 
   lcs(dataA, 0, dataA.len, dataB, 0, dataB.len, downVector, upVector)
@@ -331,83 +337,3 @@ proc diffText*(textA, textB: string): seq[Item] =
   optimize(dataA)
   optimize(dataB)
   result = createDiffs(dataA, dataB)
-
-when isMainModule:
-
-  proc testHelper(f: seq[Item]): string =
-    for it in f:
-      result.add(
-        $it.deletedA & "." & $it.insertedB & "." & $it.startA & "." & $it.startB & "*"
-      )
-
-  proc main() =
-    var a, b: string
-
-    stdout.writeLine("Diff Self Test...")
-
-    # test all changes
-    a = "a,b,c,d,e,f,g,h,i,j,k,l".replace(',', '\n')
-    b = "0,1,2,3,4,5,6,7,8,9".replace(',', '\n')
-    assert(testHelper(diffText(a, b)) ==
-      "12.10.0.0*",
-      "all-changes test failed.")
-    stdout.writeLine("all-changes test passed.")
-    # test all same
-    a = "a,b,c,d,e,f,g,h,i,j,k,l".replace(',', '\n')
-    b = a
-    assert(testHelper(diffText(a, b)) ==
-      "",
-      "all-same test failed.")
-    stdout.writeLine("all-same test passed.")
-
-    # test snake
-    a = "a,b,c,d,e,f".replace(',', '\n')
-    b = "b,c,d,e,f,x".replace(',', '\n')
-    assert(testHelper(diffText(a, b)) ==
-      "1.0.0.0*0.1.6.5*",
-      "snake test failed.")
-    stdout.writeLine("snake test passed.")
-
-    # 2002.09.20 - repro
-    a = "c1,a,c2,b,c,d,e,g,h,i,j,c3,k,l".replace(',', '\n')
-    b = "C1,a,C2,b,c,d,e,I1,e,g,h,i,j,C3,k,I2,l".replace(',', '\n')
-    assert(testHelper(diffText(a, b)) ==
-      "1.1.0.0*1.1.2.2*0.2.7.7*1.1.11.13*0.1.13.15*",
-      "repro20020920 test failed.")
-    stdout.writeLine("repro20020920 test passed.")
-
-    # 2003.02.07 - repro
-    a = "F".replace(',', '\n')
-    b = "0,F,1,2,3,4,5,6,7".replace(',', '\n')
-    assert(testHelper(diffText(a, b)) ==
-      "0.1.0.0*0.7.1.2*",
-      "repro20030207 test failed.")
-    stdout.writeLine("repro20030207 test passed.")
-
-    # Muegel - repro
-    a = "HELLO\nWORLD"
-    b = "\n\nhello\n\n\n\nworld\n"
-    assert(testHelper(diffText(a, b)) ==
-      "2.8.0.0*",
-      "repro20030409 test failed.")
-    stdout.writeLine("repro20030409 test passed.")
-
-    # test some differences
-    a = "a,b,-,c,d,e,f,f".replace(',', '\n')
-    b = "a,b,x,c,e,f".replace(',', '\n')
-    assert(testHelper(diffText(a, b)) ==
-      "1.1.2.2*1.0.4.4*1.0.7.6*",
-      "some-changes test failed.")
-    stdout.writeLine("some-changes test passed.")
-
-    # test one change within long chain of repeats
-    a = "a,a,a,a,a,a,a,a,a,a".replace(',', '\n')
-    b = "a,a,a,a,-,a,a,a,a,a".replace(',', '\n')
-    assert(testHelper(diffText(a, b)) ==
-      "0.1.4.4*1.0.9.10*",
-      "long chain of repeats test failed.")
-
-    stdout.writeLine("End.")
-    stdout.flushFile
-
-  main()