compiler/idents.nim


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132

#
#
#           The Nimrod Compiler
#        (c) Copyright 2009 Andreas Rumpf
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

# Identifier handling
# An identifier is a shared non-modifiable string that can be compared by its
# id. This module is essential for the compiler's performance.

import 
  hashes, strutils

type 
  TIdObj* = object of TObject
    id*: int # unique id; use this for comparisons and not the pointers
  
  PIdObj* = ref TIdObj
  PIdent* = ref TIdent
  TIdent*{.acyclic.} = object of TIdObj
    s*: string
    next*: PIdent             # for hash-table chaining
    h*: THash                 # hash value of s
  

proc getIdent*(identifier: string): PIdent
proc getIdent*(identifier: string, h: THash): PIdent
proc getIdent*(identifier: cstring, length: int, h: THash): PIdent
  # special version for the scanner; the scanner's buffering scheme makes
  # this horribly efficient. Most of the time no character copying is needed!
proc IdentEq*(id: PIdent, name: string): bool
# implementation

proc IdentEq(id: PIdent, name: string): bool = 
  result = id.id == getIdent(name).id

var buckets: array[0..4096 * 2 - 1, PIdent]

proc cmpIgnoreStyle(a, b: cstring, blen: int): int = 
  var 
    aa, bb: char
    i, j: int
  i = 0
  j = 0
  result = 1
  while j < blen: 
    while a[i] == '_': inc(i)
    while b[j] == '_': inc(j)
    # tolower inlined:
    aa = a[i]
    bb = b[j]
    if (aa >= 'A') and (aa <= 'Z'): aa = chr(ord(aa) + (ord('a') - ord('A')))
    if (bb >= 'A') and (bb <= 'Z'): bb = chr(ord(bb) + (ord('a') - ord('A')))
    result = ord(aa) - ord(bb)
    if (result != 0) or (aa == '\0'): break 
    inc(i)
    inc(j)
  if result == 0: 
    if a[i] != '\0': result = 1
  
proc cmpExact(a, b: cstring, blen: int): int = 
  var 
    aa, bb: char
    i, j: int
  i = 0
  j = 0
  result = 1
  while j < blen: 
    aa = a[i]
    bb = b[j]
    result = ord(aa) - ord(bb)
    if (result != 0) or (aa == '\0'): break 
    inc(i)
    inc(j)
  if result == 0: 
    if a[i] != '\0': result = 1
  
proc getIdent(identifier: string): PIdent = 
  result = getIdent(cstring(identifier), len(identifier), 
                    hashIgnoreStyle(identifier))

proc getIdent(identifier: string, h: THash): PIdent = 
  result = getIdent(cstring(identifier), len(identifier), h)

var wordCounter: int = 1

proc getIdent(identifier: cstring, length: int, h: THash): PIdent = 
  var 
    idx, id: int
    last: PIdent
  idx = h and high(buckets)
  result = buckets[idx]
  last = nil
  id = 0
  while result != nil: 
    if cmpExact(cstring(result.s), identifier, length) == 0: 
      if last != nil: 
        # make access to last looked up identifier faster:
        last.next = result.next
        result.next = buckets[idx]
        buckets[idx] = result
      return 
    elif cmpIgnoreStyle(cstring(result.s), identifier, length) == 0: 
      #if (id <> 0) and (id <> result.id) then begin
      #        result := buckets[idx];
      #        writeln('current id ', id);
      #        for i := 0 to len-1 do write(identifier[i]);
      #        writeln;
      #        while result <> nil do begin
      #          writeln(result.s, '  ', result.id);
      #          result := result.next
      #        end
      #      end;
      assert((id == 0) or (id == result.id))
      id = result.id
    last = result
    result = result.next
  new(result)
  result.h = h
  result.s = newString(length)
  for i in countup(0, length + 0 - 1): result.s[i] = identifier[i - 0]
  result.next = buckets[idx]
  buckets[idx] = result
  if id == 0: 
    inc(wordCounter)
    result.id = - wordCounter
  else: 
    result.id = id            #  writeln('new word ', result.s);