path: root/compiler/idents.nim



#
#
#           The Nimrod Compiler
#        (c) Copyright 2009 Andreas Rumpf
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

# Identifier handling
# An identifier is a shared non-modifiable string that can be compared by its
# id. This module is essential for the compiler's performance.

import 
  hashes, strutils

type 
  TIdObj* = object of TObject
    id*: int # unique id; use this for comparisons and not the pointers
  
  PIdObj* = ref TIdObj
  PIdent* = ref TIdent
  TIdent*{.acyclic.} = object of TIdObj
    s*: string
    next*: PIdent             # for hash-table chaining
    h*: THash                 # hash value of s
  

proc getIdent*(identifier: string): PIdent
proc getIdent*(identifier: string, h: THash): PIdent
proc getIdent*(identifier: cstring, length: int, h: THash): PIdent
  # special version for the scanner; the scanner's buffering scheme makes
  # this horribly efficient. Most of the time no character copying is needed!
proc IdentEq*(id: PIdent, name: string): bool
# implementation

proc IdentEq(id: PIdent, name: string): bool = 
  result = id.id == getIdent(name).id

var buckets: array[0..4096 * 2 - 1, PIdent]

proc cmpIgnoreStyle(a, b: cstring, blen: int): int = 
  var 
    aa, bb: char
    i, j: int
  i = 0
  j = 0
  result = 1
  while j < blen: 
    while a[i] == '_': inc(i)
    while b[j] == '_': inc(j)
    # tolower inlined:
    aa = a[i]
    bb = b[j]
    if (aa >= 'A') and (aa <= 'Z'): aa = chr(ord(aa) + (ord('a') - ord('A')))
    if (bb >= 'A') and (bb <= 'Z'): bb = chr(ord(bb) + (ord('a') - ord('A')))
    result = ord(aa) - ord(bb)
    if (result != 0) or (aa == '\0'): break 
    inc(i)
    inc(j)
  if result == 0: 
    if a[i] != '\0': result = 1
  
proc cmpExact(a, b: cstring, blen: int): int = 
  var 
    aa, bb: char
    i, j: int
  i = 0
  j = 0
  result = 1
  while j < blen: 
    aa = a[i]
    bb = b[j]
    result = ord(aa) - ord(bb)
    if (result != 0) or (aa == '\0'): break 
    inc(i)
    inc(j)
  if result == 0: 
    if a[i] != '\0': result = 1
  
proc getIdent(identifier: string): PIdent = 
  result = getIdent(cstring(identifier), len(identifier), 
                    hashIgnoreStyle(identifier))

proc getIdent(identifier: string, h: THash): PIdent = 
  result = getIdent(cstring(identifier), len(identifier), h)

var wordCounter: int = 1

proc getIdent(identifier: cstring, length: int, h: THash): PIdent = 
  var 
    idx, id: int
    last: PIdent
  idx = h and high(buckets)
  result = buckets[idx]
  last = nil
  id = 0
  while result != nil: 
    if cmpExact(cstring(result.s), identifier, length) == 0: 
      if last != nil: 
        # make access to last looked up identifier faster:
        last.next = result.next
        result.next = buckets[idx]
        buckets[idx] = result
      return 
    elif cmpIgnoreStyle(cstring(result.s), identifier, length) == 0: 
      #if (id <> 0) and (id <> result.id) then begin
      #        result := buckets[idx];
      #        writeln('current id ', id);
      #        for i := 0 to len-1 do write(identifier[i]);
      #        writeln;
      #        while result <> nil do begin
      #          writeln(result.s, '  ', result.id);
      #          result := result.next
      #        end
      #      end;
      assert((id == 0) or (id == result.id))
      id = result.id
    last = result
    result = result.next
  new(result)
  result.h = h
  result.s = newString(length)
  for i in countup(0, length + 0 - 1): result.s[i] = identifier[i - 0]
  result.next = buckets[idx]
  buckets[idx] = result
  if id == 0: 
    inc(wordCounter)
    result.id = - wordCounter
  else: 
    result.id = id            #  writeln('new word ', result.s);
  
#
#
#           The Nimrod Compiler
#        (c) Copyright 2009 Andreas Rumpf
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

# Identifier handling
# An identifier is a shared non-modifiable string that can be compared by its
# id. This module is essential for the compiler's performance.

import 
  hashes, strutils

type 
  TIdObj* = object of TObject
    id*: int # unique id; use this for comparisons and not the pointers
  
  PIdObj* = ref TIdObj
  PIdent* = ref TIdent
  TIdent*{.acyclic.} = object of TIdObj
    s*: string
    next*: PIdent             # for hash-table chaining
    h*: THash                 # hash value of s
  

proc getIdent*(identifier: string): PIdent
proc getIdent*(identifier: string, h: THash): PIdent
proc getIdent*(identifier: cstring, length: int, h: THash): PIdent
  # special version for the scanner; the scanner's buffering scheme makes
  # this horribly efficient. Most of the time no character copying is needed!
proc IdentEq*(id: PIdent, name: string): bool
# implementation

proc IdentEq(id: PIdent, name: string): bool = 
  result = id.id == getIdent(name).id

var buckets: array[0..4096 * 2 - 1, PIdent]

proc cmpIgnoreStyle(a, b: cstring, blen: int): int = 
  var 
    aa, bb: char
    i, j: int
  i = 0
  j = 0
  result = 1
  while j < blen: 
    while a[i] == '_': inc(i)
    while b[j] == '_': inc(j)
    # tolower inlined:
    aa = a[i]
    bb = b[j]
    if (aa >= 'A') and (aa <= 'Z'): aa = chr(ord(aa) + (ord('a') - ord('A')))
    if (bb >= 'A') and (bb <= 'Z'): bb = chr(ord(bb) + (ord('a') - ord('A')))
    result = ord(aa) - ord(bb)
    if (result != 0) or (aa == '\0'): break 
    inc(i)
    inc(j)
  if result == 0: 
    if a[i] != '\0': result = 1
  
proc cmpExact(a, b: cstring, blen: int): int = 
  var 
    aa, bb: char
    i, j: int
  i = 0
  j = 0
  result = 1
  while j < blen: 
    aa = a[i]
    bb = b[j]
    result = ord(aa) - ord(bb)
    if (result != 0) or (aa == '\0'): break 
    inc(i)
    inc(j)
  if result == 0: 
    if a[i] != '\0': result = 1
  
proc getIdent(identifier: string): PIdent = 
  result = getIdent(cstring(identifier), len(identifier), 
                    hashIgnoreStyle(identifier))

proc getIdent(identifier: string, h: THash): PIdent = 
  result = getIdent(cstring(identifier), len(identifier), h)

var wordCounter: int = 1

proc getIdent(identifier: cstring, length: int, h: THash): PIdent = 
  var 
    idx, id: int
    last: PIdent
  idx = h and high(buckets)
  result = buckets[idx]
  last = nil
  id = 0
  while result != nil: 
    if cmpExact(cstring(result.s), identifier, length) == 0: 
      if last != nil: 
        # make access to last looked up identifier faster:
        last.next = result.next
        result.next = buckets[idx]
        buckets[idx] = result
      return 
    elif cmpIgnoreStyle(cstring(result.s), identifier, length) == 0: 
      #if (id <> 0) and (id <> result.id) then begin
      #        result := buckets[idx];
      #        writeln('current id ', id);
      #        for i := 0 to len-1 do write(identifier[i]);
      #        writeln;
      #        while result <> nil do begin
      #          writeln(result.s, '  ', result.id);
      #          result := result.next
      #        end
      #      end;
      assert((id == 0) or (id == result.id))
      id = result.id
    last = result
    result = result.next
  new(result)
  result.h = h
  result.s = newString(length)
  for i in countup(0, length + 0 - 1): result.s[i] = identifier[i - 0]
  result.next = buckets[idx]
  buckets[idx] = result
  if id == 0: 
    inc(wordCounter)
    result.id = - wordCounter
  else: 
    result.id = id            #  writeln('new word ', result.s);