#
#
# The Nimrod Compiler
# (c) Copyright 2009 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
# Identifier handling
# An identifier is a shared non-modifiable string that can be compared by its
# id. This module is essential for the compiler's performance.
import
hashes, strutils
type
TIdObj* = object of TObject
id*: int # unique id; use this for comparisons and not the pointers
PIdObj* = ref TIdObj
PIdent* = ref TIdent
TIdent*{.acyclic.} = object of TIdObj
s*: string
next*: PIdent # for hash-table chaining
h*: THash # hash value of s
proc getIdent*(identifier: string): PIdent
proc getIdent*(identifier: string, h: THash): PIdent
proc getIdent*(identifier: cstring, length: int, h: THash): PIdent
# special version for the scanner; the scanner's buffering scheme makes
# this horribly efficient. Most of the time no character copying is needed!
proc IdentEq*(id: PIdent, name: string): bool
# implementation
proc IdentEq(id: PIdent, name: string): bool =
result = id.id == getIdent(name).id
var buckets: array[0..4096 * 2 - 1, PIdent]
proc cmpIgnoreStyle(a, b: cstring, blen: int): int =
var
aa, bb: char
i, j: int
i = 0
j = 0
result = 1
while j < blen:
while a[i] == '_': inc(i)
while b[j] == '_': inc(j)
# tolower inlined:
aa = a[i]
bb = b[j]
if (aa >= 'A') and (aa <= 'Z'): aa = chr(ord(aa) + (ord('a') - ord('A')))
if (bb >= 'A') and (bb <= 'Z'): bb = chr(ord(bb) + (ord('a') - ord('A')))
result = ord(aa) - ord(bb)
if (result != 0) or (aa == '\0'): break
inc(i)
inc(j)
if result == 0:
if a[i] != '\0': result = 1
proc cmpExact(a, b: cstring, blen: int): int =
var
aa, bb: char
i, j: int
i = 0
j = 0
result = 1
while j < blen:
aa = a[i]
bb = b[j]
result = ord(aa) - ord(bb)
if (result != 0) or (aa == '\0'): break
inc(i)
inc(j)
if result == 0:
if a[i] != '\0': result = 1
proc getIdent(identifier: string): PIdent =
result = getIdent(cstring(identifier), len(identifier),
hashIgnoreStyle(identifier))
proc getIdent(identifier: string, h: THash): PIdent =
result = getIdent(cstring(identifier), len(identifier), h)
var wordCounter: int = 1
proc getIdent(identifier: cstring, length: int, h: THash): PIdent =
var
idx, id: int
last: PIdent
idx = h and high(buckets)
result = buckets[idx]
last = nil
id = 0
while result != nil:
if cmpExact(cstring(result.s), identifier, length) == 0:
if last != nil:
# make access to last looked up identifier faster:
last.next = result.next
result.next = buckets[idx]
buckets[idx] = result
return
elif cmpIgnoreStyle(cstring(result.s), identifier, length) == 0:
#if (id <> 0) and (id <> result.id) then begin
# result := buckets[idx];
# writeln('current id ', id);
# for i := 0 to len-1 do write(identifier[i]);
# writeln;
# while result <> nil do begin
# writeln(result.s, ' ', result.id);
# result := result.next
# end
# end;
assert((id == 0) or (id == result.id))
id = result.id
last = result
result = result.next
new(result)
result.h = h
result.s = newString(length)
for i in countup(0, length + 0 - 1): result.s[i] = identifier[i - 0]
result.next = buckets[idx]
buckets[idx] = result
if id == 0:
inc(wordCounter)
result.id = - wordCounter
else:
result.id = id # writeln('new word ', result.s);