diff options
author | Andreas Rumpf <andreas@andi> | 2008-06-22 16:14:11 +0200 |
---|---|---|
committer | Andreas Rumpf <andreas@andi> | 2008-06-22 16:14:11 +0200 |
commit | 405b86068e6a3d39970b9129ceec0a9108464b28 (patch) | |
tree | c0449946f54baae6ea88baf453157ddd7faa8f86 /nim/strutils.pas | |
download | Nim-405b86068e6a3d39970b9129ceec0a9108464b28.tar.gz |
Initial import
Diffstat (limited to 'nim/strutils.pas')
-rwxr-xr-x | nim/strutils.pas | 687 |
1 files changed, 687 insertions, 0 deletions
diff --git a/nim/strutils.pas b/nim/strutils.pas new file mode 100755 index 000000000..b654b7868 --- /dev/null +++ b/nim/strutils.pas @@ -0,0 +1,687 @@ +// +// +// The Nimrod Compiler +// (c) Copyright 2008 Andreas Rumpf +// +// See the file "copying.txt", included in this +// distribution, for details about the copyright. +// +unit strutils; + +interface + +{$include 'config.inc'} + +uses + sysutils, nsystem; + +type + EInvalidFormatStr = class(Exception) + end; + +const + StrStart = 1; + +function normalize(const s: string): string; +function cmpIgnoreStyle(const x, y: string): int; +function cmp(const x, y: string): int; +function cmpIgnoreCase(const x, y: string): int; + +function format(const f: string; const args: array of string): string; + +function toHex(x: BiggestInt; len: int): string; +function toOctal(value: Char): string; +function toOct(x: BiggestInt; len: int): string; +function toBin(x: BiggestInt; len: int): string; + + +procedure addChar(var s: string; c: Char); +function toInt(const s: string): int; +function toBiggestInt(const s: string): BiggestInt; + +function toString(i: BiggestInt): string; overload; + +//function toString(i: int): string; overload; +function ToStringF(const r: Real): string; overload; +function ToString(b: Boolean): string; overload; + +function IntToStr(i: BiggestInt; minChars: int): string; + +function findSubStr(const sub, s: string; start: int = 1): int; +function replaceStr(const s, search, by: string): string; +procedure deleteStr(var s: string; first, last: int); + +function ToLower(const s: string): string; +function toUpper(c: Char): Char; overload; +function toUpper(s: string): string; overload; + +function parseInt(const s: string): int; +function ParseFloat(const s: string; checkEnd: Boolean = True): Real; + +function repeatChar(count: int; c: Char = ' '): string; + +type + TStringSeq = array of string; + TCharSet = set of Char; +function splitSeq(const s: string; const seps: TCharSet): TStringSeq; + +function startsWith(const s, prefix: string): bool; +function endsWith(const s, postfix: string): bool; + +const + WhiteSpace = [' ', #9..#13]; + +function strip(const s: string; const chars: TCharSet = WhiteSpace): string; +function allCharsInSet(const s: string; const theSet: TCharSet): bool; + +implementation + +function allCharsInSet(const s: string; const theSet: TCharSet): bool; +var + i: int; +begin + for i := strStart to length(s)+strStart-1 do + if not (s[i] in theSet) then begin result := false; exit end; + result := true +end; + +function strip(const s: string; const chars: TCharSet = WhiteSpace): string; +var + a, b, last: int; +begin + a := strStart; + last := length(s) + strStart - 1; + while (a <= last) and (s[a] in chars) do inc(a); + b := last; + while (b >= strStart) and (s[b] in chars) do dec(b); + if a <= b then + result := ncopy(s, a, b) + else + result := ''; +end; + +function startsWith(const s, prefix: string): bool; +var + i, j: int; +begin + result := false; + if length(s) >= length(prefix) then begin + i := 1; + j := 1; + while (i <= length(s)) and (j <= length(prefix)) do begin + if s[i] <> prefix[j] then exit; + inc(i); + inc(j); + end; + result := j > length(prefix); + end +end; + +function endsWith(const s, postfix: string): bool; +var + i, j: int; +begin + result := false; + if length(s) >= length(postfix) then begin + i := length(s); + j := length(postfix); + while (i >= 1) and (j >= 1) do begin + if s[i] <> postfix[j] then exit; + dec(i); + dec(j); + end; + result := j = 0; + end +end; + +function splitSeq(const s: string; const seps: TCharSet): TStringSeq; +var + first, last, len: int; +begin + first := 1; + last := 1; + setLength(result, 0); + while last <= length(s) do begin + while (last <= length(s)) and (s[last] in seps) do inc(last); + first := last; + while (last <= length(s)) and not (s[last] in seps) do inc(last); + len := length(result); + setLength(result, len+1); + result[len] := ncopy(s, first, last-1); + end +end; + +function repeatChar(count: int; c: Char = ' '): string; +var + i: int; +begin + result := newString(count); + for i := strStart to count+strStart-1 do result[i] := c +end; + +function cmp(const x, y: string): int; +var + aa, bb: char; + a, b: PChar; + i, j: int; +begin + i := 0; + j := 0; + a := PChar(x); // this is correct even for x = '' + b := PChar(y); + repeat + aa := a[i]; + bb := b[j]; + result := ord(aa) - ord(bb); + if (result <> 0) or (aa = #0) then break; + inc(i); + inc(j); + until false +end; + +procedure deleteStr(var s: string; first, last: int); +begin + delete(s, first, last - first + 1); +end; + +function toUpper(c: Char): Char; +begin + if (c >= 'a') and (c <= 'z') then + result := Chr(Ord(c) - Ord('a') + Ord('A')) + else + result := c +end; + +function ToString(b: Boolean): string; +begin + if b then result := 'true' + else result := 'false' +end; + +function toOctal(value: Char): string; +var + i: int; + val: int; +begin + val := ord(value); + result := newString(3); + for i := strStart+2 downto strStart do begin + result[i] := Chr(val mod 8 + ord('0')); + val := val div 8 + end; +end; + +function ToLower(const s: string): string; +var + i: int; +begin + result := ''; + for i := strStart to length(s)+StrStart-1 do + if s[i] in ['A'..'Z'] then + result := result + Chr(Ord(s[i]) + Ord('a') - Ord('A')) + else + result := result + s[i] +end; + +function toUpper(s: string): string; +var + i: int; +begin + result := ''; + for i := strStart to length(s)+StrStart-1 do + if s[i] in ['a'..'z'] then + result := result + Chr(Ord(s[i]) - Ord('a') + Ord('A')) + else + result := result + s[i] +end; + +function findSubStr(const sub, s: string; start: int = 1): int; +var + i, j, M, N: int; +begin + M := length(sub); N := length(s); + i := start; j := 1; + if i > N then + result := 0 + else begin + repeat + if s[i] = sub[j] then begin + Inc(i); Inc(j); + end + else begin + i := i - j + 2; + j := 1 + end + until (j > M) or (i > N); + if j > M then result := i - M + else result := 0 + end +end; + +function replaceStr(const s, search, by: string): string; +var + i, j: int; +begin + result := ''; + i := 1; + repeat + j := findSubStr(search, s, i); + if j = 0 then begin + // copy the rest: + result := result + copy(s, i, length(s) - i + 1); + break + end; + result := result + copy(s, i, j - i) + by; + i := j + length(search) + until false +end; + +function ToStringF(const r: Real): string; +var + i: int; +begin + result := sysutils.format('%g', [r]); + i := pos(',', result); + if i > 0 then result[i] := '.' // long standing bug! + else if (cmpIgnoreStyle(result, 'nan') = 0) then // BUGFIX + result := 'NAN' + else if (cmpIgnoreStyle(result, 'inf') = 0) or + (cmpIgnoreStyle(result, '+inf') = 0) then + // FPC 2.1.1 seems to write +Inf ..., so here we go + result := 'INF' + else if (cmpIgnoreStyle(result, '-inf') = 0) then + result := '-INF' // another BUGFIX + else if pos('.', result) = 0 then + result := result + '.0' +end; + +function toInt(const s: string): int; +var + code: int; +begin + Val(s, result, code) +end; + +function toHex(x: BiggestInt; len: int): string; +const + HexChars: array [0..$F] of Char = '0123456789ABCDEF'; +var + j: int; + mask, shift: BiggestInt; +begin + assert(len > 0); + SetLength(result, len); + mask := $F; + shift := 0; + for j := len + strStart-1 downto strStart do begin + result[j] := HexChars[(x and mask) shr shift]; + shift := shift + 4; + mask := mask shl 4; + end; +end; + +function toOct(x: BiggestInt; len: int): string; +var + j: int; + mask, shift: BiggestInt; +begin + assert(len > 0); + result := newString(len); + mask := 7; + shift := 0; + for j := len + strStart-1 downto strStart do begin + result[j] := chr(((x and mask) shr shift) + ord('0')); + shift := shift + 3; + mask := mask shl 3; + end; +end; + +function toBin(x: BiggestInt; len: int): string; +var + j: int; + mask, shift: BiggestInt; +begin + assert(len > 0); + result := newString(len); + mask := 1; + shift := 0; + for j := len + strStart-1 downto strStart do begin + result[j] := chr(((x and mask) shr shift) + ord('0')); + shift := shift + 1; + mask := mask shl 1; + end; +end; + +procedure addChar(var s: string; c: Char); +{@ignore} +// delphi produces suboptimal code for "s := s + c" +{$ifndef fpc} +var + len: int; +{$endif} +{@emit} +begin +{@ignore} +{$ifdef fpc} + s := s + c +{$else} + {$ifopt H+} + len := length(s); + setLength(s, len + 1); + PChar(Pointer(s))[len] := c + {$else} + s := s + c + {$endif} +{$endif} +{@emit + s &= c +} +end; + +function IntToStr(i: BiggestInt; minChars: int): string; +var + j: int; +begin + result := sysutils.IntToStr(i); + for j := 1 to minChars - length(result) do + result := '0' + result; +end; + +function toBiggestInt(const s: string): BiggestInt; +begin +{$ifdef dephi} + result := ''; + str(i : 1, result); +{$else} + result := StrToInt64(s); +{$endif} +end; + +function toString(i: BiggestInt): string; overload; +begin + result := sysUtils.intToStr(i); +end; + +function normalize(const s: string): string; +var + i: int; +begin + result := ''; + for i := strStart to length(s)+StrStart-1 do + if s[i] in ['A'..'Z'] then + result := result + Chr(Ord(s[i]) + Ord('a') - Ord('A')) + else if s[i] <> '_' then + result := result + s[i] +end; + +function cmpIgnoreCase(const x, y: string): int; +var + aa, bb: char; + a, b: PChar; + i, j: int; +begin + i := 0; + j := 0; + a := PChar(x); // this is correct even for x = '' + b := PChar(y); + repeat + aa := a[i]; + bb := b[j]; + if aa in ['A'..'Z'] then aa := Chr(Ord(aa) + Ord('a') - Ord('A')); + if bb in ['A'..'Z'] then bb := Chr(Ord(bb) + Ord('a') - Ord('A')); + result := ord(aa) - ord(bb); + if (result <> 0) or (a[i] = #0) then break; + inc(i); + inc(j); + until false +end; + +function cmpIgnoreStyle(const x, y: string): int; +// this is a hotspot in the compiler! +// it took 14% of total runtime! +// So we optimize the heck out of it! +var + aa, bb: char; + a, b: PChar; + i, j: int; +begin + i := 0; + j := 0; + a := PChar(x); // this is correct even for x = '' + b := PChar(y); + repeat + while a[i] = '_' do inc(i); + while b[j] = '_' do inc(j); + aa := a[i]; + bb := b[j]; + if aa in ['A'..'Z'] then aa := Chr(Ord(aa) + Ord('a') - Ord('A')); + if bb in ['A'..'Z'] then bb := Chr(Ord(bb) + Ord('a') - Ord('A')); + result := ord(aa) - ord(bb); + if (result <> 0) or (a[i] = #0) then break; + inc(i); + inc(j); + until false +end; + +function find(const x: string; const inArray: array of string): int; +var + i: int; + y: string; +begin + y := normalize(x); + i := 0; + while i < high(inArray) do begin + if y = normalize(inArray[i]) then begin + result := i; exit + end; + inc(i, 2); // increment by 2, else a security whole! + end; + result := -1 +end; + +function format(const f: string; const args: array of string): string; +const + PatternChars = ['a'..'z', 'A'..'Z', '0'..'9', '_', #128..#255]; +var + i, j, x: int; +begin + result := ''; + i := 1; + while i <= length(f) do + if f[i] = '$' then begin + case f[i+1] of + '$': begin + result := result + '$'; + inc(i, 2); + end; + '1'..'9': begin + result := result + args[ord(f[i+1]) - ord('0') - 1]; + inc(i, 2); + end; + '{': begin + j := i+1; + while (j <= length(f)) and (f[j] <> '}') do inc(j); + x := find(ncopy(f, i+2, j-1), args); + if (x >= 0) and (x < high(args)) then result := result + args[x+1] + else raise EInvalidFormatStr.create(''); + i := j+1 + end; + 'a'..'z', 'A'..'Z', #128..#255, '_': begin + j := i+1; + while (j <= length(f)) and (f[j] in PatternChars) do inc(j); + x := find(ncopy(f, i+1, j-1), args); + if (x >= 0) and (x < high(args)) then result := result + args[x+1] + else raise EInvalidFormatStr.create(''); + i := j + end + else raise EInvalidFormatStr.create(''); + end + end + else begin + result := result + f[i]; + inc(i) + end +end; + +{@ignore} +{$ifopt Q-} {$Q+} +{$else} {$define Q_off} +{$endif} +{@emit} +// this must be compiled with overflow checking turned on: +function rawParseInt(const a: string; var index: int): BiggestInt; +// index contains the start position at proc entry; end position will be +// in index before the proc returns; index = -1 on error (no number at all) +var + i: int; + sign: BiggestInt; + s: string; +begin + s := a + #0; // to avoid the sucking range check errors + i := index; // a local i is more efficient than accessing an in out parameter + sign := 1; + if s[i] = '+' then inc(i) + else if s[i] = '-' then begin + inc(i); + sign := -1 + end; + + if s[i] in ['0'..'9'] then begin + result := 0; + while s[i] in ['0'..'9'] do begin + result := result * 10 + ord(s[i]) - ord('0'); + inc(i); + while s[i] = '_' do inc(i) // underscores are allowed and ignored + end; + result := result * sign; + index := i; // store index back + end + else begin + index := -1; + result := 0 + end +end; +{@ignore} +{$ifdef Q_off} +{$Q-} // turn it off again!!! +{$endif} +{@emit} + +function parseInt(const s: string): int; +var + index: int; + res: BiggestInt; +begin + index := strStart; + res := rawParseInt(s, index); + if index = -1 then + raise EInvalidValue.create('') +{$ifdef cpu32} + //else if (res < low(int)) or (res > high(int)) then + // raise EOverflow.create('') +{$endif} + else + result := int(res) // convert to smaller int type +end; + +{@ignore} +{$ifopt Q+} {$Q-} +{$else} {$define Q_on} +{$endif} +{@emit} +// this function must be computed without overflow checking +function parseNimInt(const a: string): biggestInt; +var + i: int; +begin + i := StrStart; + result := rawParseInt(a, i); + if i = -1 then raise EInvalidValue.create(''); +end; + +function ParseFloat(const s: string; checkEnd: Boolean = True): Real; +var + hd, esign, sign: Real; + exponent, i, code: int; + flags: cardinal; +begin + result := 0.0; + code := 1; + exponent := 0; + esign := 1; + flags := 0; + sign := 1; + case s[code] of + '+': inc(code); + '-': begin + sign := -1; + inc(code); + end; + end; + while (code <= Length(s)) and (s[code] in ['0'..'9']) do begin + { Read int part } + flags := flags or 1; + result := result * 10.0 + toFloat(ord(s[code])-ord('0')); + inc(code); + while (code <= length(s)) and (s[code] = '_') do inc(code); + end; + { Decimal ? } + if (length(s) >= code) and (s[code] = '.') then begin + hd := 1.0; + inc(code); + while (length(s)>=code) and (s[code] in ['0'..'9']) do begin + { Read fractional part. } + flags := flags or 2; + result := result * 10.0 + toFloat(ord(s[code])-ord('0')); + hd := hd * 10.0; + inc(code); + while (code <= length(s)) and (s[code] = '_') do inc(code); + end; + result := result / hd; + end; + { Again, read int and fractional part } + if flags = 0 then + raise EInvalidValue.create(''); + { Exponent ? } + if (length(s) >= code) and (upcase(s[code]) = 'E') then begin + inc(code); + if Length(s) >= code then + if s[code] = '+' then + inc(code) + else + if s[code] = '-' then begin + esign := -1; + inc(code); + end; + if (length(s) < code) or not (s[code] in ['0'..'9']) then + raise EInvalidValue.create(''); + while (length(s) >= code) and (s[code] in ['0'..'9']) do begin + exponent := exponent * 10; + exponent := exponent + ord(s[code])-ord('0'); + inc(code); + while (code <= length(s)) and (s[code] = '_') do inc(code); + end; + end; + { Calculate Exponent } + hd := 1.0; + for i := 1 to exponent do hd := hd * 10.0; + if esign > 0 then + result := result * hd + else + result := result / hd; + { Not all characters are read ? } + if checkEnd and (length(s) >= code) then + raise EInvalidValue.create(''); + { evaluate sign } + result := result * sign; +end; + +{@ignore} +{$ifdef Q_on} +{$Q+} // turn it on again! +{$endif} +{@emit +@pop # overflowChecks +} + +end. |