1 files changed, 132 insertions, 0 deletions
diff --git a/rod/idents.nim b/rod/idents.nim
new file mode 100755
index 000000000..03d155169
--- /dev/null
+++ b/rod/idents.nim
@@ -0,0 +1,132 @@
+#
+#
+#           The Nimrod Compiler
+#        (c) Copyright 2009 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+# Identifier handling
+# An identifier is a shared non-modifiable string that can be compared by its
+# id. This module is essential for the compiler's performance.
+
+import 
+  nhashes, strutils
+
+type 
+  TIdObj* = object of TObject
+    id*: int                  # unique id; use this for comparisons and not the pointers
+  
+  PIdObj* = ref TIdObj
+  PIdent* = ref TIdent
+  TIdent*{.acyclic.} = object of TIdObj
+    s*: string
+    next*: PIdent             # for hash-table chaining
+    h*: THash                 # hash value of s
+  
+
+proc getIdent*(identifier: string): PIdent
+proc getIdent*(identifier: string, h: THash): PIdent
+proc getIdent*(identifier: cstring, length: int, h: THash): PIdent
+  # special version for the scanner; the scanner's buffering scheme makes
+  # this horribly efficient. Most of the time no character copying is needed!
+proc IdentEq*(id: PIdent, name: string): bool
+# implementation
+
+proc IdentEq(id: PIdent, name: string): bool = 
+  result = id.id == getIdent(name).id
+
+var buckets: array[0..4096 * 2 - 1, PIdent]
+
+proc cmpIgnoreStyle(a, b: cstring, blen: int): int = 
+  var 
+    aa, bb: char
+    i, j: int
+  i = 0
+  j = 0
+  result = 1
+  while j < blen: 
+    while a[i] == '_': inc(i)
+    while b[j] == '_': 
+      inc(j)                  # tolower inlined:
+    aa = a[i]
+    bb = b[j]
+    if (aa >= 'A') and (aa <= 'Z'): aa = chr(ord(aa) + (ord('a') - ord('A')))
+    if (bb >= 'A') and (bb <= 'Z'): bb = chr(ord(bb) + (ord('a') - ord('A')))
+    result = ord(aa) - ord(bb)
+    if (result != 0) or (aa == '\0'): break 
+    inc(i)
+    inc(j)
+  if result == 0: 
+    if a[i] != '\0': result = 1
+  
+proc cmpExact(a, b: cstring, blen: int): int = 
+  var 
+    aa, bb: char
+    i, j: int
+  i = 0
+  j = 0
+  result = 1
+  while j < blen: 
+    aa = a[i]
+    bb = b[j]
+    result = ord(aa) - ord(bb)
+    if (result != 0) or (aa == '\0'): break 
+    inc(i)
+    inc(j)
+  if result == 0: 
+    if a[i] != '\0': result = 1
+  
+proc getIdent(identifier: string): PIdent = 
+  result = getIdent(cstring(identifier), len(identifier), 
+                    getNormalizedHash(identifier))
+
+proc getIdent(identifier: string, h: THash): PIdent = 
+  result = getIdent(cstring(identifier), len(identifier), h)
+
+var wordCounter: int = 1
+
+proc getIdent(identifier: cstring, length: int, h: THash): PIdent = 
+  var 
+    idx, id: int
+    last: PIdent
+  idx = h and high(buckets)
+  result = buckets[idx]
+  last = nil
+  id = 0
+  while result != nil: 
+    if cmpExact(cstring(result.s), identifier, length) == 0: 
+      if last != nil: 
+        # make access to last looked up identifier faster:
+        last.next = result.next
+        result.next = buckets[idx]
+        buckets[idx] = result
+      return 
+    elif cmpIgnoreStyle(cstring(result.s), identifier, length) == 0: 
+      #if (id <> 0) and (id <> result.id) then begin
+      #        result := buckets[idx];
+      #        writeln('current id ', id);
+      #        for i := 0 to len-1 do write(identifier[i]);
+      #        writeln;
+      #        while result <> nil do begin
+      #          writeln(result.s, '  ', result.id);
+      #          result := result.next
+      #        end
+      #      end;
+      assert((id == 0) or (id == result.id))
+      id = result.id
+    last = result
+    result = result.next
+  new(result)
+  result.h = h
+  result.s = newString(length)
+  for i in countup(0, length + 0 - 1): result.s[i] = identifier[i - 0]
+  result.next = buckets[idx]
+  buckets[idx] = result
+  if id == 0: 
+    inc(wordCounter)
+    result.id = - wordCounter
+  else: 
+    result.id = id            #  writeln('new word ', result.s);
+  
\ No newline at end of file