# # # The Nim Compiler # (c) Copyright 2013 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # # included from cgen.nim # -------------------------- constant expressions ------------------------ proc int64Literal(i: BiggestInt): Rope = if i > low(int64): result = "IL64($1)" % [rope(i)] else: result = ~"(IL64(-9223372036854775807) - IL64(1))" proc uint64Literal(i: uint64): Rope = rope($i & "ULL") proc intLiteral(i: BiggestInt): Rope = if i > low(int32) and i <= high(int32): result = rope(i) elif i == low(int32): # Nim has the same bug for the same reasons :-) result = ~"(-2147483647 -1)" elif i > low(int64): result = "IL64($1)" % [rope(i)] else: result = ~"(IL64(-9223372036854775807) - IL64(1))" proc intLiteral(i: Int128): Rope = intLiteral(toInt64(i)) proc genLiteral(p: BProc, n: PNode, ty: PType): Rope = case n.kind of nkCharLit..nkUInt64Lit: var k: TTypeKind if ty != nil: k = skipTypes(ty, abstractVarRange).kind else: case n.kind of nkCharLit: k = tyChar of nkUInt64Lit: k = tyUInt64 of nkInt64Lit: k = tyInt64 else: k = tyNil # don't go into the case variant that uses 'ty' case k of tyChar, tyNil: result = intLiteral(n.intVal) of tyBool: if n.intVal != 0: result = ~"NIM_TRUE" else: result = ~"NIM_FALSE" of tyInt64: result = int64Literal(n.intVal) of tyUInt64: result = uint64Literal(uint64(n.intVal)) else: result = "(($1) $2)" % [getTypeDesc(p.module, ty), intLiteral(n.intVal)] of nkNilLit: let k = if ty == nil: tyPointer else: skipTypes(ty, abstractVarRange).kind if k == tyProc and skipTypes(ty, abstractVarRange).callConv == ccClosure: let id = nodeTableTestOrSet(p.module.dataCache, n, p.module.labels) result = p.module.tmpBase & rope(id) if id == p.module.labels: # not found in cache: inc(p.module.labels) addf(p.module.s[cfsData], "static NIM_CONST $1 $2 = {NIM_NIL,NIM_NIL};$n", [getTypeDesc(p.module, ty), result]) else: result = rope("NIM_NIL") of nkStrLit..nkTripleStrLit: let k = if ty == nil: tyString else: skipTypes(ty, abstractVarRange + {tyStatic, tyUserTypeClass, tyUserTypeClassInst}).kind case k of tyNil: result = genNilStringLiteral(p.module, n.info) of tyString: # with the new semantics for 'nil' strings, we can map "" to nil and # save tons of allocations: if n.strVal.len == 0 and optNilSeqs notin p.options and p.config.selectedGC != gcDestructors: result = genNilStringLiteral(p.module, n.info) else: result = genStringLiteral(p.module, n) else: result = makeCString(n.strVal) of nkFloatLit, nkFloat64Lit: result = rope(n.floatVal.toStrMaxPrecision) of nkFloat32Lit: result = rope(n.floatVal.toStrMaxPrecision("f")) else: internalError(p.config, n.info, "genLiteral(" & $n.kind & ')') result = nil proc genLiteral(p: BProc, n: PNode): Rope = result = genLiteral(p, n, n.typ) proc bitSetToWord(s: TBitSet, size: int): BiggestUInt = result = 0 for j in 0 ..< size: if j < len(s): result = result or (BiggestUInt(s[j]) shl (j * 8)) proc genRawSetData(cs: TBitSet, size: int): Rope = if size > 8: var res = "{\n" for i in 0 ..< size: res.add "0x" res.add "0123456789abcdef"[cs[i] div 16] res.add "0123456789abcdef"[cs[i] mod 16] if i < size - 1: # not last iteration if i mod 8 == 7: res.add ",\n" else: res.add ", " else: res.add "}\n" result = rope(res) else: result = intLiteral(cast[BiggestInt](bitSetToWord(cs, size))) proc genSetNode(p: BProc, n: PNode): Rope = var cs: TBitSet var size = int(getSize(p.config, n.typ)) toBitSet(p.config, n, cs) if size > 8: let id = nodeTableTestOrSet(p.module.dataCache, n, p.module.labels) result = p.module.tmpBase & rope(id) if id == p.module.labels: # not found in cache: inc(p.module.labels) addf(p.module.s[cfsData], "static NIM_CONST $1 $2 = $3;$n", [getTypeDesc(p.module, n.typ), result, genRawSetData(cs, size)]) else: result = genRawSetData(cs, size) proc getStorageLoc(n: PNode): TStorageLoc = case n.kind of nkSym: case n.sym.kind of skParam, skTemp: result = OnStack of skVar, skForVar, skResult, skLet: if sfGlobal in n.sym.flags: result = OnHeap else: result = OnStack of skConst: if sfGlobal in n.sym.flags: result = OnHeap else: result = OnUnknown else: result = OnUnknown of nkDerefExpr, nkHiddenDeref: case n.sons[0].typ.kind of tyVar, tyLent: result = OnUnknown of tyPtr: result = OnStack of tyRef: result = OnHeap else: doAssert(false, "getStorageLoc") of nkBracketExpr, nkDotExpr, nkObjDownConv, nkObjUpConv: result = getStorageLoc(n.sons[0]) else: result = OnUnknown proc canMove(p: BProc, n: PNode; dest: TLoc): bool = # for now we're conservative here: if n.kind == nkBracket: # This needs to be kept consistent with 'const' seq code # generation! if not isDeepConstExpr(n) or n.len == 0: if skipTypes(n.typ, abstractVarRange).kind == tySequence: return true elif optNilSeqs notin p.options and n.kind in nkStrKinds and n.strVal.len == 0: # Empty strings are codegen'd as NIM_NIL so it's just a pointer copy return true result = n.kind in nkCallKinds #if not result and dest.k == locTemp: # return true #if result: # echo n.info, " optimized ", n # result = false proc genRefAssign(p: BProc, dest, src: TLoc) = if (dest.storage == OnStack and p.config.selectedGC != gcGo) or not usesWriteBarrier(p.config): linefmt(p, cpsStmts, "$1 = $2;$n", [rdLoc(dest), rdLoc(src)]) elif dest.storage == OnHeap: linefmt(p, cpsStmts, "#asgnRef((void**) $1, $2);$n", [addrLoc(p.config, dest), rdLoc(src)]) else: linefmt(p, cpsStmts, "#unsureAsgnRef((void**) $1, $2);$n", [addrLoc(p.config, dest), rdLoc(src)]) proc asgnComplexity(n: PNode): int = if n != nil: case n.kind of nkSym: result = 1 of nkRecCase: # 'case objects' are too difficult to inline their assignment operation: result = 100 of nkRecList: for t in items(n): result += asgnComplexity(t) else: discard proc optAsgnLoc(a: TLoc, t: PType, field: Rope): TLoc = assert field != nil result.k = locField result.storage = a.storage result.lode = lodeTyp t result.r = rdLoc(a) & "." & field proc genOptAsgnTuple(p: BProc, dest, src: TLoc, flags: TAssignmentFlags) = let newflags = if src.storage == OnStatic: flags + {needToCopy} elif tfShallow in dest.t.flags: flags - {needToCopy} else: flags let t = skipTypes(dest.t, abstractInst).getUniqueType() for i in 0 ..< t.len: let t = t.sons[i] let field = "Field$1" % [i.rope] genAssignment(p, optAsgnLoc(dest, t, field), optAsgnLoc(src, t, field), newflags) proc genOptAsgnObject(p: BProc, dest, src: TLoc, flags: TAssignmentFlags, t: PNode, typ: PType) = if t == nil: return let newflags = if src.storage == OnStatic: flags + {needToCopy} elif tfShallow in dest.t.flags: flags - {needToCopy} else: flags case t.kind of nkSym: let field = t.sym if field.loc.r == nil: fillObjectFields(p.module, typ) genAssignment(p, optAsgnLoc(dest, field.typ, field.loc.r), optAsgnLoc(src, field.typ, field.loc.r), newflags) of nkRecList: for child in items(t): genOptAsgnObject(p, dest, src, newflags, child, typ) else: discard proc genGenericAsgn(p: BProc, dest, src: TLoc, flags: TAssignmentFlags) = # Consider: # type TMyFastString {.shallow.} = string # Due to the implementation of pragmas this would end up to set the # tfShallow flag for the built-in string type too! So we check only # here for this flag, where it is reasonably safe to do so # (for objects, etc.): if p.config.selectedGC == gcDestructors: linefmt(p, cpsStmts, "$1 = $2;$n", [rdLoc(dest), rdLoc(src)]) elif needToCopy notin flags or tfShallow in skipTypes(dest.t, abstractVarRange).flags: if (dest.storage == OnStack and p.config.selectedGC != gcGo) or not usesWriteBarrier(p.config): linefmt(p, cpsStmts, "#nimCopyMem((void*)$1, (NIM_CONST void*)$2, sizeof($3));$n", [addrLoc(p.config, dest), addrLoc(p.config, src), rdLoc(dest)]) else: linefmt(p, cpsStmts, "#genericShallowAssign((void*)$1, (void*)$2, $3);$n", [addrLoc(p.config, dest), addrLoc(p.config, src), genTypeInfo(p.module, dest.t, dest.lode.info)]) else: linefmt(p, cpsStmts, "#genericAssign((void*)$1, (void*)$2, $3);$n", [addrLoc(p.config, dest), addrLoc(p.config, src), genTypeInfo(p.module, dest.t, dest.lode.info)]) proc genAssignment(p: BProc, dest, src: TLoc, flags: TAssignmentFlags) = # This function replaces all other methods for generating # the assignment operation in C. if src.t != nil and src.t.kind == tyPtr: # little HACK to support the new 'var T' as return type: linefmt(p, cpsStmts, "$1 = $2;$n", [rdLoc(dest), rdLoc(src)]) return let ty = skipTypes(dest.t, abstractRange + tyUserTypeClasses + {tyStatic}) case ty.kind of tyRef: genRefAssign(p, dest, src) of tySequence: if p.config.selectedGC == gcDestructors: genGenericAsgn(p, dest, src, flags) elif (needToCopy notin flags and src.storage != OnStatic) or canMove(p, src.lode, dest): genRefAssign(p, dest, src) else: linefmt(p, cpsStmts, "#genericSeqAssign($1, $2, $3);$n", [addrLoc(p.config, dest), rdLoc(src), genTypeInfo(p.module, dest.t, dest.lode.info)]) of tyString: if p.config.selectedGC == gcDestructors: genGenericAsgn(p, dest, src, flags) elif (needToCopy notin flags and src.storage != OnStatic) or canMove(p, src.lode, dest): genRefAssign(p, dest, src) else: if (dest.storage == OnStack and p.config.selectedGC != gcGo) or not usesWriteBarrier(p.config): linefmt(p, cpsStmts, "$1 = #copyString($2);$n", [dest.rdLoc, src.rdLoc]) elif dest.storage == OnHeap: # we use a temporary to care for the dreaded self assignment: var tmp: TLoc getTemp(p, ty, tmp) linefmt(p, cpsStmts, "$3 = $1; $1 = #copyStringRC1($2);$n", [dest.rdLoc, src.rdLoc, tmp.rdLoc]) linefmt(p, cpsStmts, "if ($1) #nimGCunrefNoCycle($1);$n", [tmp.rdLoc]) else: linefmt(p, cpsStmts, "#unsureAsgnRef((void**) $1, #copyString($2));$n", [addrLoc(p.config, dest), rdLoc(src)]) of tyProc: if containsGarbageCollectedRef(dest.t): # optimize closure assignment: let a = optAsgnLoc(dest, dest.t, "ClE_0".rope) let b = optAsgnLoc(src, dest.t, "ClE_0".rope) genRefAssign(p, a, b) linefmt(p, cpsStmts, "$1.ClP_0 = $2.ClP_0;$n", [rdLoc(dest), rdLoc(src)]) else: linefmt(p, cpsStmts, "$1 = $2;$n", [rdLoc(dest), rdLoc(src)]) of tyTuple: if containsGarbageCollectedRef(dest.t): if dest.t.len <= 4: genOptAsgnTuple(p, dest, src, flags) else: genGenericAsgn(p, dest, src, flags) else: linefmt(p, cpsStmts, "$1 = $2;$n", [rdLoc(dest), rdLoc(src)]) of tyObject: # XXX: check for subtyping? if ty.isImportedCppType: linefmt(p, cpsStmts, "$1 = $2;$n", [rdLoc(dest), rdLoc(src)]) elif not isObjLackingTypeField(ty): genGenericAsgn(p, dest, src, flags) elif containsGarbageCollectedRef(ty): if ty.sons[0].isNil and asgnComplexity(ty.n) <= 4: discard getTypeDesc(p.module, ty) internalAssert p.config, ty.n != nil genOptAsgnObject(p, dest, src, flags, ty.n, ty) else: genGenericAsgn(p, dest, src, flags) else: linefmt(p, cpsStmts, "$1 = $2;$n", [rdLoc(dest), rdLoc(src)]) of tyArray: if containsGarbageCollectedRef(dest.t): genGenericAsgn(p, dest, src, flags) else: linefmt(p, cpsStmts, "#nimCopyMem((void*)$1, (NIM_CONST void*)$2, sizeof($3));$n", [rdLoc(dest), rdLoc(src), getTypeDesc(p.module, dest.t)]) of tyOpenArray, tyVarargs: # open arrays are always on the stack - really? What if a sequence is # passed to an open array? if containsGarbageCollectedRef(dest.t): linefmt(p, cpsStmts, # XXX: is this correct for arrays? "#genericAssignOpenArray((void*)$1, (void*)$2, $1Len_0, $3);$n", [addrLoc(p.config, dest), addrLoc(p.config, src), genTypeInfo(p.module, dest.t, dest.lode.info)]) else: linefmt(p, cpsStmts,
#
#
# Nim's Runtime Library
# (c) Copyright 2018 Nim contributors
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## This module implements an algorithm to compute the
## `edit distance`:idx: between two Unicode strings.
import unicode
proc editDistance*(a, b: string): int {.noSideEffect.} =
## Returns the unicode-rune edit distance between ``a`` and ``b``.
##
## This uses the `Levenshtein`:idx: distance algorithm with only a linear
## memory overhead.
if len(a) > len(b):
# make ``b`` the longer string
return editDistance(b, a)
# strip common prefix
var
i_start = 0 ## The character starting index of the first rune in both strings ``a`` and ``b``
i_next_a = 0
i_next_b = 0
rune_a, rune_b: Rune
len_runes_a = 0 ## The number of relevant runes in string ``a``.
len_runes_b = 0 ## The number of relevant runes in string ``b``.
block commonPrefix:
# ``a`` is the shorter string
while i_start < len(a):
i_next_a = i_start
a.fastRuneAt(i_next_a, rune_a, doInc = true)
i_next_b = i_start
b.fastRuneAt(i_next_b, rune_b, doInc = true)
if rune_a != rune_b:
inc(len_runes_a)
inc(len_runes_b)
break
i_start = i_next_a
var
# we know that we are either at the start of the strings
# or that the current value of rune_a is not equal to rune_b
# => start search for common suffix after the current rune (``i_next_*``)
i_end_a = i_next_a ## The exclusive upper index bound of string ``a``.
i_end_b = i_next_b ## The exclusive upper index bound of string ``b``.
i_current_a = i_next_a
i_current_b = i_next_b
block commonSuffix:
var
add_runes_a = 0
add_runes_b = 0
while i_current_a < len(a) and i_current_b < len(b):
i_next_a = i_current_a
a.fastRuneAt(i_next_a, rune_a)
i_next_b = i_current_b
b.fastRuneAt(i_next_b, rune_b)
inc(add_runes_a)
inc(add_runes_b)
if rune_a != rune_b:
i_end_a = i_next_a
i_end_b = i_next_b
inc(len_runes_a, add_runes_a)
inc(len_runes_b, add_runes_b)
add_runes_a = 0
add_runes_b = 0
i_current_a = i_next_a
i_current_b = i_next_b
if i_current_a >= len(a): # ``a`` exhausted
if i_current_b < len(b): # ``b`` not exhausted
i_end_a = i_current_a
i_end_b = i_current_b
inc(len_runes_a, add_runes_a)
inc(len_runes_b, add_runes_b)
while true:
b.fastRuneAt(i_end_b, rune_b)
inc(len_runes_b)
if i_end_b >= len(b): break
elif i_current_b >= len(b): # ``b`` exhausted and ``a`` not exhausted
i_end_a = i_current_a
i_end_b = i_current_b
inc(len_runes_a, add_runes_a)
inc(len_runes_b, add_runes_b)
while true:
a.fastRuneAt(i_end_a, rune_a)
inc(len_runes_a)
if i_end_a >= len(a): break
block specialCases:
# trivial cases:
if len_runes_a == 0: return len_runes_b
if len_runes_b == 0: return len_runes_a
# another special case:
if len_runes_a == 1:
a.fastRuneAt(i_start, rune_a, doInc = false)
var i_current_b = i_start
while i_current_b < i_end_b:
b.fastRuneAt(i_current_b, rune_b, doInc = true)
if rune_a == rune_b: return len_runes_b - 1
return len_runes_b
# common case:
var
len1 = len_runes_a + 1
len2 = len_runes_b + 1
row: seq[int]
let half = len_runes_a div 2
newSeq(row, len2)
var e = i_start + len2 - 1 # end marker
# initialize first row:
for i in 1 .. (len2 - half - 1): row[i] = i
row[0] = len1 - half - 1
i_current_a = i_start
var
char2p_i = -1
char2p_prev: int
for i in 1 .. (len1 - 1):
i_next_a = i_current_a
a.fastRuneAt(i_next_a, rune_a)
var
char2p: int
D, x: int
p: int
if i >= (len1 - half):
# skip the upper triangle:
let offset = i + half - len1
if char2p_i == i:
b.fastRuneAt(char2p_prev, rune_b)
char2p = char2p_prev
char2p_i = i + 1
else:
char2p = i_start
for j in 0 ..< offset:
rune_b = b.runeAt(char2p)
inc(char2p, rune_b.size)
char2p_i = i + 1
char2p_prev = char2p
p = offset
rune_b = b.runeAt(char2p)
var c3 = row[p] + (if rune_a != rune_b: 1 else: 0)
inc(char2p, rune_b.size)
inc(p)
x = row[p] + 1
D = x
if x > c3: x = c3
row[p] = x
inc(p)
else:
p = 1
char2p = i_start
D = i
x = i
if i <= (half + 1):
# skip the lower triangle:
e = len2 + i - half - 2
# main:
while p <= e:
dec(D)
rune_b = b.runeAt(char2p)
var c3 = D + (if rune_a != rune_b: 1 else: 0)
inc(char2p, rune_b.size)
inc(x)
if x > c3: x = c3
D = row[p] + 1
if x > D: x = D
row[p] = x
inc(p)
# lower triangle sentinel:
if i <= half:
dec(D)
rune_b = b.runeAt(char2p)
var c3 = D + (if rune_a != rune_b: 1 else: 0)
inc(x)
if x > c3: x = c3
row[p] = x
i_current_a = i_next_a
result = row[e]
proc editDistanceAscii*(a, b: string): int {.noSideEffect.} =
## Returns the edit distance between `a` and `b`.
##
## This uses the `Levenshtein`:idx: distance algorithm with only a linear
## memory overhead.
var len1 = a.len
var len2 = b.len
if len1 > len2:
# make `b` the longer string
return editDistanceAscii(b, a)
# strip common prefix:
var s = 0
while s < len1 and a[s] == b[s]:
inc(s)
dec(len1)
dec(len2)
# strip common suffix:
while len1 > 0 and len2 > 0 and a[s+len1-1] == b[s+len2-1]:
dec(len1)
dec(len2)
# trivial cases:
if len1 == 0: return len2
if len2 == 0: return len1
# another special case:
if len1 == 1:
for j in s..s+len2-1:
if a[s] == b[j]: return len2 - 1
return len2
inc(len1)
inc(len2)
var half = len1 shr 1
# initalize first row:
#var row = cast[ptr array[0..high(int) div 8, int]](alloc(len2*sizeof(int)))
var row: seq[int]
newSeq(row, len2)
var e = s + len2 - 1 # end marker
for i in 1..len2 - half - 1: row[i] = i
row[0] = len1 - half - 1
for i in 1 .. len1 - 1:
var char1 = a[i + s - 1]
var char2p: int
var D, x: int
var p: int
if i >= len1 - half:
# skip the upper triangle:
var offset = i - len1 + half
char2p = offset
p = offset
var c3 = row[p] + ord(char1 != b[s + char2p])
inc(p)
inc(char2p)
x = row[p] + 1
D = x
if x > c3: x = c3
row[p] = x
inc(p)
else:
p = 1
char2p = 0
D = i
x = i
if i <= half + 1:
# skip the lower triangle:
e = len2 + i - half - 2
# main:
while p <= e:
dec(D)
var c3 = D + ord(char1 != b[char2p + s])
inc(char2p)
inc(x)
if x > c3: x = c3
D = row[p] + 1
if x > D: x = D
row[p] = x
inc(p)
# lower triangle sentinel:
if i <= half:
dec(D)
var c3 = D + ord(char1 != b[char2p + s])
inc(x)
if x > c3: x = c3
row[p] = x
result = row[e]
when isMainModule:
doAssert editDistance("", "") == 0
doAssert editDistance("kitten", "sitting") == 3 # from Wikipedia
doAssert editDistance("flaw", "lawn") == 2 # from Wikipedia
doAssert editDistance("привет", "превет") == 1
doAssert editDistance("Åge", "Age") == 1
# editDistance, one string is longer in bytes, but shorter in rune length
# first string: 4 bytes, second: 6 bytes, but only 3 runes
doAssert editDistance("aaaa", "×××") == 4
block veryLongStringEditDistanceTest:
const cap = 256
var
s1 = newStringOfCap(cap)
s2 = newStringOfCap(cap)
while len(s1) < cap:
s1.add 'a'
while len(s2) < cap:
s2.add 'b'
doAssert editDistance(s1, s2) == cap
block combiningCodePointsEditDistanceTest:
const s = "A\xCC\x8Age"
doAssert editDistance(s, "Age") == 1
doAssert editDistanceAscii("", "") == 0
doAssert editDistanceAscii("kitten", "sitting") == 3 # from Wikipedia
doAssert editDistanceAscii("flaw", "lawn") == 2 # from Wikipedia