//
//
//           The Nimrod Compiler
//        (c) Copyright 2009 Andreas Rumpf
//
//    See the file "copying.txt", included in this
//    distribution, for details about the copyright.
//
unit idents;

{$include 'config.inc'}

// Identifier handling
// An identifier is a shared non-modifiable string that can be compared by its
// id. This module is essential for the compiler's performance.

interface

uses
  nhashes, nsystem, strutils;

type
  TIdObj = object(NObject)
    id: int; // unique id; use this for comparisons and not the pointers
  end;
  PIdObj = ^TIdObj;

  PIdent = ^TIdent;
  TIdent = object(TIdObj)
    s: string;
    next: PIdent;  // for hash-table chaining
    h: THash;      // hash value of s
  end {@acyclic};

function getIdent(const identifier: string): PIdent; overload;
function getIdent(const identifier: string; h: THash): PIdent; overload;
function getIdent(identifier: cstring; len: int; h: THash): PIdent; overload;
  // special version for the scanner; the scanner's buffering scheme makes
  // this horribly efficient. Most of the time no character copying is needed!

function IdentEq(id: PIdent; const name: string): bool;

implementation

function IdentEq(id: PIdent; const name: string): bool;
begin
  result := id.id = getIdent(name).id;
end;

var
  buckets: array [0..4096*2-1] of PIdent;

function cmpIgnoreStyle(a, b: cstring; blen: int): int;
var
  aa, bb: char;
  i, j: int;
begin
  i := 0;
  j := 0;
  result := 1;
  while j < blen do begin
    while a[i] = '_' do inc(i);
    while b[j] = '_' do inc(j);
    // tolower inlined:
    aa := a[i];
    bb := b[j];
    if (aa >= 'A') and (aa <= 'Z') then
      aa := chr(ord(aa) + (ord('a') - ord('A')));
    if (bb >= 'A') and (bb <= 'Z') then
      bb := chr(ord(bb) + (ord('a') - ord('A')));
    result := ord(aa) - ord(bb);
    if (result <> 0) or (aa = #0) then break;
    inc(i);
    inc(j)
  end;
  if result = 0 then 
    if a[i] <> #0 then result := 1
end;

function cmpExact(a, b: cstring; blen: int): int;
var
  aa, bb: char;
  i, j: int;
begin
  i := 0;
  j := 0;
  result := 1;
  while j < blen do begin
    aa := a[i];
    bb := b[j];
    result := ord(aa) - ord(bb);
    if (result <> 0) or (aa = #0) then break;
    inc(i);
    inc(j)
  end;
  if result = 0 then 
    if a[i] <> #0 then result := 1
end;

function getIdent(const identifier: string): PIdent;
begin
  result := getIdent(pchar(identifier), length(identifier),
                     getNormalizedHash(identifier))
end;

function getIdent(const identifier: string; h: THash): PIdent;
begin
  result := getIdent(pchar(identifier), length(identifier), h)
end;

var
  wordCounter: int = 1;

function getIdent(identifier: cstring; len: int; h: THash): PIdent;
var
  idx, i, id: int;
  last: PIdent;
begin
  idx := h and high(buckets);
  result := buckets[idx];
  last := nil;
  id := 0;
  while result <> nil do begin
    if cmpExact(pchar(result.s), identifier, len) = 0 then begin
      if last <> nil then begin
        // make access to last looked up identifier faster:
        last.next := result.next;
        result.next := buckets[idx];
        buckets[idx] := result
      end;
      exit
    end
    else if cmpIgnoreStyle(pchar(result.s), identifier, len) = 0 then begin
      (*if (id <> 0) and (id <> result.id) then begin
        result := buckets[idx];
        writeln('current id ', id);
        for i := 0 to len-1 do write(identifier[i]);
        writeln;
        while result <> nil do begin
          writeln(result.s, '  ', result.id);
          result := result.next
        end
      end;*)
      assert((id = 0) or (id = result.id));
      id := result.id
    end;
    last := result;
    result := result.next
  end;
  // new ident:
  new(result);
{@ignore}
  fillChar(result^, sizeof(result^), 0);
{@emit}
  result.h := h;
  result.s := newString(len);
  for i := strStart to len+StrStart-1 do
    result.s[i] := identifier[i-StrStart];
  result.next := buckets[idx];
  buckets[idx] := result;
  if id = 0 then begin
    inc(wordCounter);
    result.id := - wordCounter;
  end
  else
    result.id := id
//  writeln('new word ', result.s);
end;

end.