summary refs log tree commit diff stats
path: root/nim/strutils.pas
diff options
context:
space:
mode:
authorAndreas Rumpf <andreas@andi>2008-06-22 16:14:11 +0200
committerAndreas Rumpf <andreas@andi>2008-06-22 16:14:11 +0200
commit405b86068e6a3d39970b9129ceec0a9108464b28 (patch)
treec0449946f54baae6ea88baf453157ddd7faa8f86 /nim/strutils.pas
downloadNim-405b86068e6a3d39970b9129ceec0a9108464b28.tar.gz
Initial import
Diffstat (limited to 'nim/strutils.pas')
-rwxr-xr-xnim/strutils.pas687
1 files changed, 687 insertions, 0 deletions
diff --git a/nim/strutils.pas b/nim/strutils.pas
new file mode 100755
index 000000000..b654b7868
--- /dev/null
+++ b/nim/strutils.pas
@@ -0,0 +1,687 @@
+//
+//
+//           The Nimrod Compiler
+//        (c) Copyright 2008 Andreas Rumpf
+//
+//    See the file "copying.txt", included in this
+//    distribution, for details about the copyright.
+//
+unit strutils;
+
+interface
+
+{$include 'config.inc'}
+
+uses
+  sysutils, nsystem;
+
+type
+  EInvalidFormatStr = class(Exception)
+  end;
+
+const
+  StrStart = 1;
+
+function normalize(const s: string): string;
+function cmpIgnoreStyle(const x, y: string): int;
+function cmp(const x, y: string): int;
+function cmpIgnoreCase(const x, y: string): int;
+
+function format(const f: string; const args: array of string): string;
+
+function toHex(x: BiggestInt; len: int): string;
+function toOctal(value: Char): string;
+function toOct(x: BiggestInt; len: int): string;
+function toBin(x: BiggestInt; len: int): string;
+
+
+procedure addChar(var s: string; c: Char);
+function toInt(const s: string): int;
+function toBiggestInt(const s: string): BiggestInt;
+
+function toString(i: BiggestInt): string; overload;
+
+//function toString(i: int): string; overload;
+function ToStringF(const r: Real): string; overload;
+function ToString(b: Boolean): string; overload;
+
+function IntToStr(i: BiggestInt; minChars: int): string;
+
+function findSubStr(const sub, s: string; start: int = 1): int;
+function replaceStr(const s, search, by: string): string;
+procedure deleteStr(var s: string; first, last: int);
+
+function ToLower(const s: string): string;
+function toUpper(c: Char): Char; overload;
+function toUpper(s: string): string; overload;
+
+function parseInt(const s: string): int;
+function ParseFloat(const s: string; checkEnd: Boolean = True): Real;
+
+function repeatChar(count: int; c: Char = ' '): string;
+
+type
+  TStringSeq = array of string;
+  TCharSet = set of Char;
+function splitSeq(const s: string; const seps: TCharSet): TStringSeq;
+
+function startsWith(const s, prefix: string): bool;
+function endsWith(const s, postfix: string): bool;
+
+const
+  WhiteSpace = [' ', #9..#13];
+
+function strip(const s: string; const chars: TCharSet = WhiteSpace): string;
+function allCharsInSet(const s: string; const theSet: TCharSet): bool;
+
+implementation
+
+function allCharsInSet(const s: string; const theSet: TCharSet): bool;
+var
+  i: int;
+begin
+  for i := strStart to length(s)+strStart-1 do
+    if not (s[i] in theSet) then begin result := false; exit end;
+  result := true
+end;
+
+function strip(const s: string; const chars: TCharSet = WhiteSpace): string;
+var
+  a, b, last: int;
+begin
+  a := strStart;
+  last := length(s) + strStart - 1;
+  while (a <= last) and (s[a] in chars) do inc(a);
+  b := last;
+  while (b >= strStart) and (s[b] in chars) do dec(b);
+  if a <= b then
+    result := ncopy(s, a, b)
+  else
+    result := '';
+end;
+
+function startsWith(const s, prefix: string): bool;
+var
+  i, j: int;
+begin
+  result := false;
+  if length(s) >= length(prefix) then begin
+    i := 1;
+    j := 1;
+    while (i <= length(s)) and (j <= length(prefix)) do begin
+      if s[i] <> prefix[j] then exit;
+      inc(i);
+      inc(j);
+    end;
+    result := j > length(prefix);
+  end
+end;
+
+function endsWith(const s, postfix: string): bool;
+var
+  i, j: int;
+begin
+  result := false;
+  if length(s) >= length(postfix) then begin
+    i := length(s);
+    j := length(postfix);
+    while (i >= 1) and (j >= 1) do begin
+      if s[i] <> postfix[j] then exit;
+      dec(i);
+      dec(j);
+    end;
+    result := j = 0;
+  end
+end;
+
+function splitSeq(const s: string; const seps: TCharSet): TStringSeq;
+var
+  first, last, len: int;
+begin
+  first := 1;
+  last := 1;
+  setLength(result, 0);
+  while last <= length(s) do begin
+    while (last <= length(s)) and (s[last] in seps) do inc(last);
+    first := last;
+    while (last <= length(s)) and not (s[last] in seps) do inc(last);
+    len := length(result);
+    setLength(result, len+1);
+    result[len] := ncopy(s, first, last-1);
+  end
+end;
+
+function repeatChar(count: int; c: Char = ' '): string;
+var
+  i: int;
+begin
+  result := newString(count);
+  for i := strStart to count+strStart-1 do result[i] := c
+end;
+
+function cmp(const x, y: string): int;
+var
+  aa, bb: char;
+  a, b: PChar;
+  i, j: int;
+begin
+  i := 0;
+  j := 0;
+  a := PChar(x); // this is correct even for x = ''
+  b := PChar(y);
+  repeat
+    aa := a[i];
+    bb := b[j];
+    result := ord(aa) - ord(bb);
+    if (result <> 0) or (aa = #0) then break;
+    inc(i);
+    inc(j);
+  until false
+end;
+
+procedure deleteStr(var s: string; first, last: int);
+begin
+  delete(s, first, last - first + 1);
+end;
+
+function toUpper(c: Char): Char;
+begin
+  if (c >= 'a') and (c <= 'z') then
+    result := Chr(Ord(c) - Ord('a') + Ord('A'))
+  else
+    result := c
+end;
+
+function ToString(b: Boolean): string;
+begin
+  if b then result := 'true'
+  else result := 'false'
+end;
+
+function toOctal(value: Char): string;
+var
+  i: int;
+  val: int;
+begin
+  val := ord(value);
+  result := newString(3);
+  for i := strStart+2 downto strStart do begin
+    result[i] := Chr(val mod 8 + ord('0'));
+    val := val div 8
+  end;
+end;
+
+function ToLower(const s: string): string;
+var
+  i: int;
+begin
+  result := '';
+  for i := strStart to length(s)+StrStart-1 do
+    if s[i] in ['A'..'Z'] then
+      result := result + Chr(Ord(s[i]) + Ord('a') - Ord('A'))
+    else
+      result := result + s[i]
+end;
+
+function toUpper(s: string): string;
+var
+  i: int;
+begin
+  result := '';
+  for i := strStart to length(s)+StrStart-1 do
+    if s[i] in ['a'..'z'] then
+      result := result + Chr(Ord(s[i]) - Ord('a') + Ord('A'))
+    else
+      result := result + s[i]
+end;
+
+function findSubStr(const sub, s: string; start: int = 1): int;
+var
+  i, j, M, N: int;
+begin
+  M := length(sub); N := length(s);
+  i := start; j := 1;
+  if i > N then
+    result := 0
+  else begin
+    repeat
+      if s[i] = sub[j] then begin
+        Inc(i); Inc(j);
+      end
+      else begin
+        i := i - j + 2;
+        j := 1
+      end
+    until (j > M) or (i > N);
+    if j > M then result := i - M
+    else result := 0
+  end
+end;
+
+function replaceStr(const s, search, by: string): string;
+var
+  i, j: int;
+begin
+  result := '';
+  i := 1;
+  repeat
+    j := findSubStr(search, s, i);
+    if j = 0 then begin
+      // copy the rest:
+      result := result + copy(s, i, length(s) - i + 1);
+      break
+    end;
+    result := result + copy(s, i, j - i) + by;
+    i := j + length(search)
+  until false
+end;
+
+function ToStringF(const r: Real): string;
+var
+  i: int;
+begin
+  result := sysutils.format('%g', [r]);
+  i := pos(',', result);
+  if i > 0 then result[i] := '.' // long standing bug!
+  else if (cmpIgnoreStyle(result, 'nan') = 0) then // BUGFIX
+    result := 'NAN'
+  else if (cmpIgnoreStyle(result, 'inf') = 0) or
+          (cmpIgnoreStyle(result, '+inf') = 0) then
+      // FPC 2.1.1 seems to write +Inf ..., so here we go
+    result := 'INF'
+  else if (cmpIgnoreStyle(result, '-inf') = 0) then
+    result := '-INF' // another BUGFIX
+  else if pos('.', result) = 0 then
+    result := result + '.0'
+end;
+
+function toInt(const s: string): int;
+var
+  code: int;
+begin
+  Val(s, result, code)
+end;
+
+function toHex(x: BiggestInt; len: int): string;
+const
+  HexChars: array [0..$F] of Char = '0123456789ABCDEF';
+var
+  j: int;
+  mask, shift: BiggestInt;
+begin
+  assert(len > 0);
+  SetLength(result, len);
+  mask := $F;
+  shift := 0;
+  for j := len + strStart-1 downto strStart do begin
+    result[j] := HexChars[(x and mask) shr shift];
+    shift := shift + 4;
+    mask := mask shl 4;
+  end;
+end;
+
+function toOct(x: BiggestInt; len: int): string;
+var
+  j: int;
+  mask, shift: BiggestInt;
+begin
+  assert(len > 0);
+  result := newString(len);
+  mask := 7;
+  shift := 0;
+  for j := len + strStart-1 downto strStart do begin
+    result[j] := chr(((x and mask) shr shift) + ord('0'));
+    shift := shift + 3;
+    mask := mask shl 3;
+  end;
+end;
+
+function toBin(x: BiggestInt; len: int): string;
+var
+  j: int;
+  mask, shift: BiggestInt;
+begin
+  assert(len > 0);
+  result := newString(len);
+  mask := 1;
+  shift := 0;
+  for j := len + strStart-1 downto strStart do begin
+    result[j] := chr(((x and mask) shr shift) + ord('0'));
+    shift := shift + 1;
+    mask := mask shl 1;
+  end;
+end;
+
+procedure addChar(var s: string; c: Char);
+{@ignore}
+// delphi produces suboptimal code for "s := s + c"
+{$ifndef fpc}
+var
+  len: int;
+{$endif}
+{@emit}
+begin
+{@ignore}
+{$ifdef fpc}
+  s := s + c
+{$else}
+  {$ifopt H+}
+  len := length(s);
+  setLength(s, len + 1);
+  PChar(Pointer(s))[len] := c
+  {$else}
+  s := s + c
+  {$endif}
+{$endif}
+{@emit
+  s &= c
+}
+end;
+
+function IntToStr(i: BiggestInt; minChars: int): string;
+var
+  j: int;
+begin
+  result := sysutils.IntToStr(i);
+  for j := 1 to minChars - length(result) do
+    result := '0' + result;
+end;
+
+function toBiggestInt(const s: string): BiggestInt;
+begin
+{$ifdef dephi}
+  result := '';
+  str(i : 1, result);
+{$else}
+  result := StrToInt64(s);
+{$endif}
+end;
+
+function toString(i: BiggestInt): string; overload;
+begin
+  result := sysUtils.intToStr(i);
+end;
+
+function normalize(const s: string): string;
+var
+  i: int;
+begin
+  result := '';
+  for i := strStart to length(s)+StrStart-1 do
+    if s[i] in ['A'..'Z'] then
+      result := result + Chr(Ord(s[i]) + Ord('a') - Ord('A'))
+    else if s[i] <> '_' then
+      result := result + s[i]
+end;
+
+function cmpIgnoreCase(const x, y: string): int;
+var
+  aa, bb: char;
+  a, b: PChar;
+  i, j: int;
+begin
+  i := 0;
+  j := 0;
+  a := PChar(x); // this is correct even for x = ''
+  b := PChar(y);
+  repeat
+    aa := a[i];
+    bb := b[j];
+    if aa in ['A'..'Z'] then aa := Chr(Ord(aa) + Ord('a') - Ord('A'));
+    if bb in ['A'..'Z'] then bb := Chr(Ord(bb) + Ord('a') - Ord('A'));
+    result := ord(aa) - ord(bb);
+    if (result <> 0) or (a[i] = #0) then break;
+    inc(i);
+    inc(j);
+  until false
+end;
+
+function cmpIgnoreStyle(const x, y: string): int;
+// this is a hotspot in the compiler!
+// it took 14% of total runtime!
+// So we optimize the heck out of it!
+var
+  aa, bb: char;
+  a, b: PChar;
+  i, j: int;
+begin
+  i := 0;
+  j := 0;
+  a := PChar(x); // this is correct even for x = ''
+  b := PChar(y);
+  repeat
+    while a[i] = '_' do inc(i);
+    while b[j] = '_' do inc(j);
+    aa := a[i];
+    bb := b[j];
+    if aa in ['A'..'Z'] then aa := Chr(Ord(aa) + Ord('a') - Ord('A'));
+    if bb in ['A'..'Z'] then bb := Chr(Ord(bb) + Ord('a') - Ord('A'));
+    result := ord(aa) - ord(bb);
+    if (result <> 0) or (a[i] = #0) then break;
+    inc(i);
+    inc(j);
+  until false
+end;
+
+function find(const x: string; const inArray: array of string): int;
+var
+  i: int;
+  y: string;
+begin
+  y := normalize(x);
+  i := 0;
+  while i < high(inArray) do begin
+    if y = normalize(inArray[i]) then begin
+      result := i; exit
+    end;
+    inc(i, 2); // increment by 2, else a security whole!
+  end;
+  result := -1
+end;
+
+function format(const f: string; const args: array of string): string;
+const
+  PatternChars = ['a'..'z', 'A'..'Z', '0'..'9', '_', #128..#255];
+var
+  i, j, x: int;
+begin
+  result := '';
+  i := 1;
+  while i <= length(f) do
+    if f[i] = '$' then begin
+      case f[i+1] of
+        '$': begin
+          result := result + '$';
+          inc(i, 2);
+        end;
+        '1'..'9': begin
+          result := result + args[ord(f[i+1]) - ord('0') - 1];
+          inc(i, 2);
+        end;
+        '{': begin
+          j := i+1;
+          while (j <= length(f)) and (f[j] <> '}') do inc(j);
+          x := find(ncopy(f, i+2, j-1), args);
+          if (x >= 0) and (x < high(args)) then result := result + args[x+1]
+          else raise EInvalidFormatStr.create('');
+          i := j+1
+        end;
+        'a'..'z', 'A'..'Z', #128..#255, '_': begin
+          j := i+1;
+          while (j <= length(f)) and (f[j] in PatternChars) do inc(j);
+          x := find(ncopy(f, i+1, j-1), args);
+          if (x >= 0) and (x < high(args)) then result := result + args[x+1]
+          else raise EInvalidFormatStr.create('');
+          i := j
+        end
+        else raise EInvalidFormatStr.create('');
+      end
+    end
+    else begin
+      result := result + f[i];
+      inc(i)
+    end
+end;
+
+{@ignore}
+{$ifopt Q-} {$Q+}
+{$else}     {$define Q_off}
+{$endif}
+{@emit}
+// this must be compiled with overflow checking turned on:
+function rawParseInt(const a: string; var index: int): BiggestInt;
+// index contains the start position at proc entry; end position will be
+// in index before the proc returns; index = -1 on error (no number at all)
+var
+  i: int;
+  sign: BiggestInt;
+  s: string;
+begin
+  s := a + #0; // to avoid the sucking range check errors
+  i := index; // a local i is more efficient than accessing an in out parameter
+  sign := 1;
+  if s[i] = '+' then inc(i)
+  else if s[i] = '-' then begin
+    inc(i);
+    sign := -1
+  end;
+
+  if s[i] in ['0'..'9'] then begin
+    result := 0;
+    while s[i] in ['0'..'9'] do begin
+      result := result * 10 + ord(s[i]) - ord('0');
+      inc(i);
+      while s[i] = '_' do inc(i) // underscores are allowed and ignored
+    end;
+    result := result * sign;
+    index := i; // store index back
+  end
+  else begin
+    index := -1;
+    result := 0
+  end
+end;
+{@ignore}
+{$ifdef Q_off}
+{$Q-} // turn it off again!!!
+{$endif}
+{@emit}
+
+function parseInt(const s: string): int;
+var
+  index: int;
+  res: BiggestInt;
+begin
+  index := strStart;
+  res := rawParseInt(s, index);
+  if index = -1 then
+    raise EInvalidValue.create('')
+{$ifdef cpu32}
+  //else if (res < low(int)) or (res > high(int)) then
+  //  raise EOverflow.create('')
+{$endif}
+  else
+    result := int(res) // convert to smaller int type
+end;
+
+{@ignore}
+{$ifopt Q+} {$Q-}
+{$else}     {$define Q_on}
+{$endif}
+{@emit}
+// this function must be computed without overflow checking
+function parseNimInt(const a: string): biggestInt;
+var
+  i: int;
+begin
+  i := StrStart;
+  result := rawParseInt(a, i);
+  if i = -1 then raise EInvalidValue.create('');
+end;
+
+function ParseFloat(const s: string; checkEnd: Boolean = True): Real;
+var
+  hd, esign, sign: Real;
+  exponent, i, code: int;
+  flags: cardinal;
+begin
+  result := 0.0;
+  code := 1;
+  exponent := 0;
+  esign := 1;
+  flags := 0;
+  sign := 1;
+  case s[code] of
+    '+': inc(code);
+    '-': begin
+      sign := -1;
+      inc(code);
+    end;
+  end;
+  while (code <= Length(s)) and (s[code] in ['0'..'9']) do begin
+   { Read int part }
+    flags := flags or 1;
+    result := result * 10.0 + toFloat(ord(s[code])-ord('0'));
+    inc(code);
+    while (code <= length(s)) and (s[code] = '_') do inc(code);
+  end;
+  { Decimal ? }
+  if (length(s) >= code) and (s[code] = '.') then begin
+    hd := 1.0;
+    inc(code);
+    while (length(s)>=code) and (s[code] in ['0'..'9']) do begin
+      { Read fractional part. }
+      flags := flags or 2;
+      result := result * 10.0 + toFloat(ord(s[code])-ord('0'));
+      hd := hd * 10.0;
+      inc(code);
+      while (code <= length(s)) and (s[code] = '_') do inc(code);
+    end;
+    result := result / hd;
+  end;
+  { Again, read int and fractional part }
+  if flags = 0 then
+    raise EInvalidValue.create('');
+ { Exponent ? }
+  if (length(s) >= code) and (upcase(s[code]) = 'E') then begin
+    inc(code);
+    if Length(s) >= code then
+      if s[code] = '+' then
+        inc(code)
+      else
+        if s[code] = '-' then begin
+          esign := -1;
+          inc(code);
+        end;
+    if (length(s) < code) or not (s[code] in ['0'..'9']) then
+      raise EInvalidValue.create('');
+    while (length(s) >= code) and (s[code] in ['0'..'9']) do begin
+      exponent := exponent * 10;
+      exponent := exponent + ord(s[code])-ord('0');
+      inc(code);
+      while (code <= length(s)) and (s[code] = '_') do inc(code);
+    end;
+  end;
+  { Calculate Exponent }
+  hd := 1.0;
+  for i := 1 to exponent do hd := hd * 10.0;
+  if esign > 0 then
+    result := result * hd
+  else
+    result := result / hd;
+  { Not all characters are read ? }
+  if checkEnd and (length(s) >= code) then
+    raise EInvalidValue.create('');
+  { evaluate sign }
+  result := result * sign;
+end;
+
+{@ignore}
+{$ifdef Q_on}
+{$Q+} // turn it on again!
+{$endif}
+{@emit
+@pop # overflowChecks
+}
+
+end.