//
//
// Nimrod's Runtime Library
// (c) Copyright 2008 Andreas Rumpf
//
// See the file "copying.txt", included in this
// distribution, for details about the copyright.
//
unit nstrtabs;
// String tables.
interface
{$include 'config.inc'}
uses
nsystem, nos, nhashes, strutils;
type
TStringTableMode = (
modeCaseSensitive, // the table is case sensitive
modeCaseInsensitive, // the table is case insensitive
modeStyleInsensitive // the table is style insensitive
);
TKeyValuePair = record{@tuple}
key, val: string;
end;
TKeyValuePairSeq = array of TKeyValuePair;
TStringTable = object(NObject)
counter: int;
data: TKeyValuePairSeq;
mode: TStringTableMode;
end;
PStringTable = ^TStringTable;
function newStringTable(const keyValuePairs: array of string;
mode: TStringTableMode = modeCaseSensitive): PStringTable;
procedure put(t: PStringTable; const key, val: string);
function get(t: PStringTable; const key: string): string;
function hasKey(t: PStringTable; const key: string): bool;
function len(t: PStringTable): int;
type
TFormatFlag = (
useEnvironment, // use environment variable if the ``$key``
// is not found in the table
useEmpty, // use the empty string as a default, thus it
// won't throw an exception if ``$key`` is not
// in the table
useKey // do not replace ``$key`` if it is not found
// in the table (or in the environment)
);
TFormatFlags = set of TFormatFlag;
function format(const f: string; t: PStringTable;
flags: TFormatFlags = {@set}[]): string;
implementation
const
growthFactor = 2;
startSize = 64;
{@ignore}
function isNil(const s: string): bool;
begin
result := s = ''
end;
{@emit}
function newStringTable(const keyValuePairs: array of string;
mode: TStringTableMode = modeCaseSensitive): PStringTable;
var
i: int;
begin
new(result);
result.mode := mode;
result.counter := 0;
{@ignore}
setLength(result.data, startSize);
fillChar(result.data[0], length(result.data)*sizeof(result.data[0]), 0);
{@emit
newSeq(result.data, startSize); }
i := 0;
while i < high(keyValuePairs) do begin
put(result, keyValuePairs[i], keyValuePairs[i+1]);
inc(i, 2);
end
end;
function myhash(t: PStringTable; const key: string): THash;
begin
case t.mode of
modeCaseSensitive: result := nhashes.GetHashStr(key);
modeCaseInsensitive: result := nhashes.GetHashStrCI(key);
modeStyleInsensitive: result := nhashes.getNormalizedHash(key);
end
end;
function myCmp(t: PStringTable; const a, b: string): bool;
begin
case t.mode of
modeCaseSensitive: result := cmp(a, b) = 0;
modeCaseInsensitive: result := cmpIgnoreCase(a, b) = 0;
modeStyleInsensitive: result := cmpIgnoreStyle(a, b) = 0;
end
end;
function mustRehash(len, counter: int): bool;
begin
assert(len > counter);
result := (len * 2 < counter * 3) or (len-counter < 4);
end;
function len(t: PStringTable): int;
begin
result := t.counter
end;
{@ignore}
const
EmptySeq = nil;
{@emit
const
EmptySeq = [];
}
function nextTry(h, maxHash: THash): THash;
begin
result := ((5*h) + 1) and maxHash;
// For any initial h in range(maxHash), repeating that maxHash times
// generates each int in range(maxHash) exactly once (see any text on
// random-number generation for proof).
end;
function RawGet(t: PStringTable; const key: string): int;
var
h: THash;
begin
h := myhash(t, key) and high(t.data); // start with real hash value
while not isNil(t.data[h].key) do begin
if mycmp(t, t.data[h].key, key) then begin
result := h; exit
end;
h := nextTry(h, high(t.data))
end;
result := -1
end;
function get(t: PStringTable; const key: string): string;
var
index: int;
begin
index := RawGet(t, key);
if index >= 0 then result := t.data[index].val
else result := ''
end;
function hasKey(t: PStringTable; const key: string): bool;
begin
result := rawGet(t, key) >= 0
end;
procedure RawInsert(t: PStringTable;
var data: TKeyValuePairSeq;
const key, val: string);
var
h: THash;
begin
h := myhash(t, key) and high(data);
while not isNil(data[h].key) do begin
h := nextTry(h, high(data))
end;
data[h].key := key;
data[h].val := val;
end;
procedure Enlarge(t: PStringTable);
var
n: TKeyValuePairSeq;
i: int;
begin
{@ignore}
n := emptySeq;
setLength(n, length(t.data) * growthFactor);
fillChar(n[0], length(n)*sizeof(n[0]), 0);
{@emit
newSeq(n, length(t.data) * growthFactor); }
for i := 0 to high(t.data) do
if not isNil(t.data[i].key) then
RawInsert(t, n, t.data[i].key, t.data[i].val);
{@ignore}
t.data := n;
{@emit
swap(t.data, n);
}
end;
procedure Put(t: PStringTable; const key, val: string);
var
index: int;
begin
index := RawGet(t, key);
if index >= 0 then
t.data[index].val := val
else begin
if mustRehash(length(t.data), t.counter) then Enlarge(t);
RawInsert(t, t.data, key, val);
inc(t.counter)
end;
end;
{@ignore}
type
EInvalidValue = int; // dummy for the Pascal compiler
{@emit}
procedure RaiseFormatException(const s: string);
var
e: ^EInvalidValue;
begin
{@ignore}
raise EInvalidFormatStr.create(s);
{@emit
new(e);}
{@emit
e.msg := 'format string: key not found: ' + s;}
{@emit
raise e;}
end;
function getValue(t: PStringTable; flags: TFormatFlags;
const key: string): string;
begin
if hasKey(t, key) then begin
result := get(t, key); exit
end;
if useEnvironment in flags then
result := nos.getEnv(key)
else
result := '';
if (result = '') then begin
if useKey in flags then result := '$' + key
else if not (useEmpty in flags) then
raiseFormatException(key)
end
end;
function format(const f: string; t: PStringTable;
flags: TFormatFlags = {@set}[]): string;
const
PatternChars = ['a'..'z', 'A'..'Z', '0'..'9', '_', #128..#255];
var
i, j: int;
key: string;
begin
result := '';
i := strStart;
while i <= length(f)+strStart-1 do
if f[i] = '$' then begin
case f[i+1] of
'$': begin
addChar(result, '$');
inc(i, 2);
end;
'{': begin
j := i+1;
while (j <= length(f)+strStart-1) and (f[j] <> '}') do inc(j);
key := ncopy(f, i+2+strStart-1, j-1+strStart-1);
add(result, getValue(t, flags, key));
i := j+1
end;
'a'..'z', 'A'..'Z', #128..#255, '_': begin
j := i+1;
while (j <= length(f)+strStart-1) and (f[j] in PatternChars) do inc(j);
key := ncopy(f, i+1+strStart-1, j-1+strStart-1);
add(result, getValue(t, flags, key));
i := j
end
else begin
addChar(result, f[i]);
inc(i)
end
end
end
else begin
addChar(result, f[i]);
inc(i)
end
end;
end.