summary refs log tree commit diff stats
path: root/nim/highlite.pas
diff options
context:
space:
mode:
Diffstat (limited to 'nim/highlite.pas')
-rwxr-xr-xnim/highlite.pas743
1 files changed, 0 insertions, 743 deletions
diff --git a/nim/highlite.pas b/nim/highlite.pas
deleted file mode 100755
index fa760d2a2..000000000
--- a/nim/highlite.pas
+++ /dev/null
@@ -1,743 +0,0 @@
-//
-//
-//           The Nimrod Compiler
-//        (c) Copyright 2008 Andreas Rumpf
-//
-//    See the file "copying.txt", included in this
-//    distribution, for details about the copyright.
-//
-unit highlite;
-
-// Source highlighter for programming or markup languages.
-// Currently only few languages are supported, other languages may be added.
-// The interface supports one language nested in another.
-
-interface
-
-{$include 'config.inc'}
-
-uses
-  charsets, nsystem, sysutils, nhashes, options, msgs, strutils, platform,
-  idents, lexbase, wordrecg, scanner;
-
-type
-  TTokenClass = (
-    gtEof,
-    gtNone,
-    gtWhitespace,
-    gtDecNumber,
-    gtBinNumber,
-    gtHexNumber,
-    gtOctNumber,
-    gtFloatNumber,
-    gtIdentifier,
-    gtKeyword,
-    gtStringLit,
-    gtLongStringLit,
-    gtCharLit,
-    gtEscapeSequence,         // escape sequence like \xff
-    gtOperator,
-    gtPunctation,
-    gtComment,
-    gtLongComment,
-    gtRegularExpression,
-    gtTagStart,
-    gtTagEnd,
-    gtKey,
-    gtValue,
-    gtRawData,
-    gtAssembler,
-    gtPreprocessor,
-    gtDirective,
-    gtCommand,
-    gtRule,
-    gtHyperlink,
-    gtLabel,
-    gtReference,
-    gtOther
-  );
-  TGeneralTokenizer = object(NObject)
-    kind: TTokenClass;
-    start, len: int;
-    // private:
-    buf: PChar;
-    pos: int;
-    state: TTokenClass;
-  end;
-  TSourceLanguage = (
-    langNone,
-    langNimrod,
-    langCpp,
-    langCsharp,
-    langC,
-    langJava
-  );
-const
-  sourceLanguageToStr: array [TSourceLanguage] of string = (
-    'none', 'Nimrod', 'C++', 'C#', 'C'+'', 'Java'
-  );
-  tokenClassToStr: array [TTokenClass] of string = (
-    'Eof',
-    'None',
-    'Whitespace',
-    'DecNumber',
-    'BinNumber',
-    'HexNumber',
-    'OctNumber',
-    'FloatNumber',
-    'Identifier',
-    'Keyword',
-    'StringLit',
-    'LongStringLit',
-    'CharLit',
-    'EscapeSequence',
-    'Operator',
-    'Punctation',
-    'Comment',
-    'LongComment',
-    'RegularExpression',
-    'TagStart',
-    'TagEnd',
-    'Key',
-    'Value',
-    'RawData',
-    'Assembler',
-    'Preprocessor',
-    'Directive',
-    'Command',
-    'Rule',
-    'Hyperlink',
-    'Label',
-    'Reference',
-    'Other'
-  );
-
-function getSourceLanguage(const name: string): TSourceLanguage;
-
-procedure initGeneralTokenizer(var g: TGeneralTokenizer;
-                               const buf: string);
-procedure deinitGeneralTokenizer(var g: TGeneralTokenizer);
-procedure getNextToken(var g: TGeneralTokenizer; lang: TSourceLanguage);
-
-implementation
-
-function getSourceLanguage(const name: string): TSourceLanguage;
-var
-  i: TSourceLanguage;
-begin
-  for i := succ(low(TSourceLanguage)) to high(TSourceLanguage) do
-    if cmpIgnoreStyle(name, sourceLanguageToStr[i]) = 0 then begin
-      result := i; exit
-    end;
-  result := langNone
-end;
-
-procedure initGeneralTokenizer(var g: TGeneralTokenizer;
-                               const buf: string);
-var
-  pos: int;
-begin
-{@ignore} fillChar(g, sizeof(g), 0); {@emit}
-  g.buf := PChar(buf);
-  g.kind := low(TTokenClass);
-  g.start := 0;
-  g.len := 0;
-  g.state := low(TTokenClass);
-  pos := 0;
-  // skip initial whitespace:
-  while g.buf[pos] in [' ', #9..#13] do inc(pos);
-  g.pos := pos;
-end;
-
-procedure deinitGeneralTokenizer(var g: TGeneralTokenizer);
-begin
-end;
-
-function nimGetKeyword(const id: string): TTokenClass;
-var
-  i: PIdent;
-begin
-  i := getIdent(id);
-  if (i.id >= ord(tokKeywordLow)-ord(tkSymbol)) and
-     (i.id <= ord(tokKeywordHigh)-ord(tkSymbol)) then
-    result := gtKeyword
-  else
-    result := gtIdentifier
-end;
-
-function nimNumberPostfix(var g: TGeneralTokenizer; position: int): int;
-var
-  pos: int;
-begin
-  pos := position;
-  if g.buf[pos] = '''' then begin
-    inc(pos);
-    case g.buf[pos] of
-      'f', 'F': begin
-        g.kind := gtFloatNumber;
-        inc(pos);
-        if g.buf[pos] in ['0'..'9'] then inc(pos);
-        if g.buf[pos] in ['0'..'9'] then inc(pos);
-      end;
-      'i', 'I': begin
-        inc(pos);
-        if g.buf[pos] in ['0'..'9'] then inc(pos);
-        if g.buf[pos] in ['0'..'9'] then inc(pos);
-      end;
-      else begin end
-    end
-  end;
-  result := pos;
-end;
-
-function nimNumber(var g: TGeneralTokenizer; position: int): int;
-const
-  decChars = ['0'..'9', '_'];
-var
-  pos: int;
-begin
-  pos := position;
-  g.kind := gtDecNumber;
-  while g.buf[pos] in decChars do inc(pos);
-  if g.buf[pos] = '.' then begin
-    g.kind := gtFloatNumber;
-    inc(pos);
-    while g.buf[pos] in decChars do inc(pos);
-  end;
-  if g.buf[pos] in ['e', 'E'] then begin
-    g.kind := gtFloatNumber;
-    inc(pos);
-    if g.buf[pos] in ['+', '-'] then inc(pos);
-    while g.buf[pos] in decChars do inc(pos);
-  end;
-  result := nimNumberPostfix(g, pos);
-end;
-
-procedure nimNextToken(var g: TGeneralTokenizer);
-const
-  hexChars = ['0'..'9', 'A'..'F', 'a'..'f', '_'];
-  octChars = ['0'..'7', '_'];
-  binChars = ['0'..'1', '_'];
-var
-  pos: int;
-  id: string;
-begin
-  pos := g.pos;
-  g.start := g.pos;
-  if g.state = gtStringLit then begin
-    g.kind := gtStringLit;
-    while true do begin
-      case g.buf[pos] of
-        '\': begin
-          g.kind := gtEscapeSequence;
-          inc(pos);
-          case g.buf[pos] of
-            'x', 'X': begin
-              inc(pos);
-              if g.buf[pos] in hexChars then inc(pos);
-              if g.buf[pos] in hexChars then inc(pos);
-            end;
-            '0'..'9': while g.buf[pos] in ['0'..'9'] do inc(pos);
-            #0: g.state := gtNone;
-            else inc(pos);
-          end;
-          break
-        end;
-        #0, #13, #10: begin g.state := gtNone; break end;
-        '"': begin
-          inc(pos);
-          g.state := gtNone;
-          break
-        end;
-        else inc(pos)
-      end
-    end
-  end
-  else begin
-    case g.buf[pos] of
-      ' ', #9..#13: begin
-        g.kind := gtWhitespace;
-        while g.buf[pos] in [' ', #9..#13] do inc(pos);
-      end;
-      '#': begin
-        g.kind := gtComment;
-        while not (g.buf[pos] in [#0, #10, #13]) do inc(pos);
-      end;
-      'a'..'z', 'A'..'Z', '_', #128..#255: begin
-        id := '';
-        while g.buf[pos] in scanner.SymChars+['_'] do begin
-          addChar(id, g.buf[pos]);
-          inc(pos)
-        end;
-        if (g.buf[pos] = '"') then begin
-          if (g.buf[pos+1] = '"') and (g.buf[pos+2] = '"') then begin
-            inc(pos, 3);
-            g.kind := gtLongStringLit;
-            while true do begin
-              case g.buf[pos] of
-                #0: break;
-                '"': begin
-                  inc(pos);
-                  if (g.buf[pos] = '"') and (g.buf[pos+1] = '"') then begin
-                    inc(pos, 2);
-                    break
-                  end
-                end;
-                else inc(pos);
-              end
-            end
-          end
-          else begin
-            g.kind := gtRawData;
-            inc(pos);
-            while not (g.buf[pos] in [#0, '"', #10, #13]) do inc(pos);
-            if g.buf[pos] = '"' then inc(pos);
-          end
-        end
-        else begin
-          g.kind := nimGetKeyword(id);
-        end
-      end;
-      '0': begin
-        inc(pos);
-        case g.buf[pos] of
-          'b', 'B': begin
-            inc(pos);
-            while g.buf[pos] in binChars do inc(pos);
-            pos := nimNumberPostfix(g, pos);
-          end;
-          'x', 'X': begin
-            inc(pos);
-            while g.buf[pos] in hexChars do inc(pos);
-            pos := nimNumberPostfix(g, pos);
-          end;
-          'o', 'O': begin
-            inc(pos);
-            while g.buf[pos] in octChars do inc(pos);
-            pos := nimNumberPostfix(g, pos);
-          end;
-          else
-            pos := nimNumber(g, pos);
-        end
-      end;
-      '1'..'9': begin
-        pos := nimNumber(g, pos);
-      end;
-      '''': begin
-        inc(pos);
-        g.kind := gtCharLit;
-        while true do begin
-          case g.buf[pos] of
-            #0, #13, #10: break;
-            '''': begin inc(pos); break end;
-            '\': begin inc(pos, 2); end;
-            else inc(pos);
-          end
-        end
-      end;
-      '"': begin
-        inc(pos);
-        if (g.buf[pos] = '"') and (g.buf[pos+1] = '"') then begin
-          inc(pos, 2);
-          g.kind := gtLongStringLit;
-          while true do begin
-            case g.buf[pos] of
-              #0: break;
-              '"': begin
-                inc(pos);
-                if (g.buf[pos] = '"') and (g.buf[pos+1] = '"') then begin
-                  inc(pos, 2);
-                  break
-                end
-              end;
-              else inc(pos);
-            end
-          end
-        end
-        else begin
-          g.kind := gtStringLit;
-          while true do begin
-            case g.buf[pos] of
-              #0, #13, #10: break;
-              '"': begin inc(pos); break end;
-              '\': begin g.state := g.kind; break end;
-              else inc(pos);
-            end
-          end
-        end
-      end;
-      '(', ')', '[', ']', '{', '}', '`', ':', ',', ';': begin
-        inc(pos);
-        g.kind := gtPunctation
-      end;
-      #0: g.kind := gtEof;
-      else if g.buf[pos] in scanner.OpChars then begin
-        g.kind := gtOperator;
-        while g.buf[pos] in scanner.OpChars do inc(pos);
-      end
-      else begin
-        inc(pos);
-        g.kind := gtNone
-      end;
-    end
-  end;
-  g.len := pos - g.pos;
-  if (g.kind <> gtEof) and (g.len <= 0) then 
-    InternalError('nimNextToken: ' + toString(g.buf));
-  g.pos := pos;
-end;
-
-// ------------------------------- helpers ------------------------------------
-
-function generalNumber(var g: TGeneralTokenizer; position: int): int;
-const
-  decChars = ['0'..'9'];
-var
-  pos: int;
-begin
-  pos := position;
-  g.kind := gtDecNumber;
-  while g.buf[pos] in decChars do inc(pos);
-  if g.buf[pos] = '.' then begin
-    g.kind := gtFloatNumber;
-    inc(pos);
-    while g.buf[pos] in decChars do inc(pos);
-  end;
-  if g.buf[pos] in ['e', 'E'] then begin
-    g.kind := gtFloatNumber;
-    inc(pos);
-    if g.buf[pos] in ['+', '-'] then inc(pos);
-    while g.buf[pos] in decChars do inc(pos);
-  end;
-  result := pos;
-end;
-
-function generalStrLit(var g: TGeneralTokenizer; position: int): int;
-const
-  decChars = ['0'..'9'];
-  hexChars = ['0'..'9', 'A'..'F', 'a'..'f'];
-var
-  pos: int;
-  c: Char;
-begin
-  pos := position;
-  g.kind := gtStringLit;
-  c := g.buf[pos];
-  inc(pos); // skip " or '
-  while true do begin
-    case g.buf[pos] of
-      #0: break;
-      '\': begin
-        inc(pos);
-        case g.buf[pos] of
-          #0: break;
-          '0'..'9': while g.buf[pos] in decChars do inc(pos);
-          'x', 'X': begin
-            inc(pos);
-            if g.buf[pos] in hexChars then inc(pos);
-            if g.buf[pos] in hexChars then inc(pos);
-          end;
-          else inc(pos, 2)
-        end
-      end;
-      else if g.buf[pos] = c then begin
-        inc(pos); break;
-      end
-      else
-        inc(pos);
-    end
-  end;
-  result := pos;
-end;
-
-function isKeyword(const x: array of string; const y: string): int;
-var
-  a, b, mid, c: int;
-begin
-  a := 0;
-  b := length(x)-1;
-  while a <= b do begin
-    mid := (a + b) div 2;
-    c := cmp(x[mid], y);
-    if c < 0 then
-      a := mid + 1
-    else if c > 0 then
-      b := mid - 1
-    else begin
-      result := mid;
-      exit
-    end
-  end;
-  result := -1
-end;
-
-function isKeywordIgnoreCase(const x: array of string; const y: string): int;
-var
-  a, b, mid, c: int;
-begin
-  a := 0;
-  b := length(x)-1;
-  while a <= b do begin
-    mid := (a + b) div 2;
-    c := cmpIgnoreCase(x[mid], y);
-    if c < 0 then
-      a := mid + 1
-    else if c > 0 then
-      b := mid - 1
-    else begin
-      result := mid;
-      exit
-    end
-  end;
-  result := -1
-end;
-
-// ---------------------------------------------------------------------------
-
-type
-  TTokenizerFlag = (hasPreprocessor, hasNestedComments);
-  TTokenizerFlags = set of TTokenizerFlag;
-
-procedure clikeNextToken(var g: TGeneralTokenizer;
-                         const keywords: array of string;
-                         flags: TTokenizerFlags);
-const
-  hexChars = ['0'..'9', 'A'..'F', 'a'..'f'];
-  octChars = ['0'..'7'];
-  binChars = ['0'..'1'];
-  symChars = ['A'..'Z', 'a'..'z', '0'..'9', '_', #128..#255];
-var
-  pos, nested: int;
-  id: string;
-begin
-  pos := g.pos;
-  g.start := g.pos;
-  if g.state = gtStringLit then begin
-    g.kind := gtStringLit;
-    while true do begin
-      case g.buf[pos] of
-        '\': begin
-          g.kind := gtEscapeSequence;
-          inc(pos);
-          case g.buf[pos] of
-            'x', 'X': begin
-              inc(pos);
-              if g.buf[pos] in hexChars then inc(pos);
-              if g.buf[pos] in hexChars then inc(pos);
-            end;
-            '0'..'9': while g.buf[pos] in ['0'..'9'] do inc(pos);
-            #0: g.state := gtNone;
-            else inc(pos);
-          end;
-          break
-        end;
-        #0, #13, #10: begin g.state := gtNone; break end;
-        '"': begin
-          inc(pos);
-          g.state := gtNone;
-          break
-        end;
-        else inc(pos)
-      end
-    end
-  end
-  else begin
-    case g.buf[pos] of
-      ' ', #9..#13: begin
-        g.kind := gtWhitespace;
-        while g.buf[pos] in [' ', #9..#13] do inc(pos);
-      end;
-      '/': begin
-        inc(pos);
-        if g.buf[pos] = '/' then begin
-          g.kind := gtComment;
-          while not (g.buf[pos] in [#0, #10, #13]) do inc(pos);
-        end
-        else if g.buf[pos] = '*' then begin
-          g.kind := gtLongComment;
-          nested := 0;
-          inc(pos);
-          while true do begin
-            case g.buf[pos] of
-              '*': begin
-                inc(pos);
-                if g.buf[pos] = '/' then begin
-                  inc(pos);
-                  if nested = 0 then break
-                end;
-              end;
-              '/': begin
-                inc(pos);
-                if g.buf[pos] = '*' then begin
-                  inc(pos);
-                  if hasNestedComments in flags then inc(nested);
-                end
-              end;
-              #0: break;
-              else inc(pos);
-            end
-          end
-        end
-      end;
-      '#': begin
-        inc(pos);
-        if hasPreprocessor in flags then begin
-          g.kind := gtPreprocessor;
-          while g.buf[pos] in [' ', Tabulator] do inc(pos);
-          while g.buf[pos] in symChars do inc(pos);
-        end
-        else
-          g.kind := gtOperator
-      end;
-      'a'..'z', 'A'..'Z', '_', #128..#255: begin
-        id := '';
-        while g.buf[pos] in SymChars do begin
-          addChar(id, g.buf[pos]);
-          inc(pos)
-        end;
-        if isKeyword(keywords, id) >= 0 then g.kind := gtKeyword
-        else g.kind := gtIdentifier;
-      end;
-      '0': begin
-        inc(pos);
-        case g.buf[pos] of
-          'b', 'B': begin
-            inc(pos);
-            while g.buf[pos] in binChars do inc(pos);
-            if g.buf[pos] in ['A'..'Z', 'a'..'z'] then inc(pos);
-          end;
-          'x', 'X': begin
-            inc(pos);
-            while g.buf[pos] in hexChars do inc(pos);
-            if g.buf[pos] in ['A'..'Z', 'a'..'z'] then inc(pos);
-          end;
-          '0'..'7': begin
-            inc(pos);
-            while g.buf[pos] in octChars do inc(pos);
-            if g.buf[pos] in ['A'..'Z', 'a'..'z'] then inc(pos);
-          end;
-          else begin
-            pos := generalNumber(g, pos);
-            if g.buf[pos] in ['A'..'Z', 'a'..'z'] then inc(pos);
-          end
-        end
-      end;
-      '1'..'9': begin
-        pos := generalNumber(g, pos);
-        if g.buf[pos] in ['A'..'Z', 'a'..'z'] then inc(pos);
-      end;
-      '''': begin
-        pos := generalStrLit(g, pos);
-        g.kind := gtCharLit;
-      end;
-      '"': begin
-        inc(pos);
-        g.kind := gtStringLit;
-        while true do begin
-          case g.buf[pos] of
-            #0: break;
-            '"': begin inc(pos); break end;
-            '\': begin g.state := g.kind; break end;
-            else inc(pos);
-          end
-        end
-      end;
-      '(', ')', '[', ']', '{', '}', ':', ',', ';', '.': begin
-        inc(pos);
-        g.kind := gtPunctation
-      end;
-      #0: g.kind := gtEof;
-      else if g.buf[pos] in scanner.OpChars then begin
-        g.kind := gtOperator;
-        while g.buf[pos] in scanner.OpChars do inc(pos);
-      end
-      else begin
-        inc(pos);
-        g.kind := gtNone
-      end;
-    end
-  end;
-  g.len := pos - g.pos;
-  if (g.kind <> gtEof) and (g.len <= 0) then InternalError('clikeNextToken');
-  g.pos := pos;
-end;
-
-// --------------------------------------------------------------------------
-
-procedure cNextToken(var g: TGeneralTokenizer);
-const
-  keywords: array [0..36] of string = (
-    '_Bool', '_Complex', '_Imaginary',
-    'auto', 'break', 'case', 'char', 'const', 'continue', 'default', 'do',
-    'double', 'else', 'enum', 'extern', 'float', 'for', 'goto', 'if',
-    'inline', 'int', 'long', 'register', 'restrict', 'return', 'short',
-    'signed', 'sizeof', 'static', 'struct', 'switch', 'typedef', 'union',
-    'unsigned', 'void', 'volatile', 'while'
-  );
-begin
-  clikeNextToken(g, keywords, {@set}[hasPreprocessor]);
-end;
-
-procedure cppNextToken(var g: TGeneralTokenizer);
-const
-  keywords: array [0..47] of string = (
-    'asm', 'auto', 'break', 'case', 'catch', 'char', 'class', 'const',
-    'continue', 'default', 'delete', 'do', 'double', 'else', 'enum', 'extern',
-    'float', 'for', 'friend', 'goto', 'if', 'inline', 'int', 'long', 'new',
-    'operator', 'private', 'protected', 'public', 'register', 'return',
-    'short', 'signed', 'sizeof', 'static', 'struct', 'switch', 'template',
-    'this', 'throw', 'try', 'typedef', 'union', 'unsigned', 'virtual', 'void',
-    'volatile', 'while'
-  );
-begin
-  clikeNextToken(g, keywords, {@set}[hasPreprocessor]);
-end;
-
-procedure csharpNextToken(var g: TGeneralTokenizer);
-const
-  keywords: array [0..76] of string = (
-    'abstract', 'as', 'base', 'bool', 'break', 'byte', 'case', 'catch',
-    'char', 'checked', 'class', 'const', 'continue', 'decimal', 'default',
-    'delegate', 'do', 'double', 'else', 'enum', 'event', 'explicit', 'extern',
-    'false', 'finally', 'fixed', 'float', 'for', 'foreach', 'goto', 'if',
-    'implicit', 'in', 'int', 'interface', 'internal', 'is', 'lock', 'long',
-    'namespace', 'new', 'null', 'object', 'operator', 'out', 'override',
-    'params', 'private', 'protected', 'public', 'readonly', 'ref', 'return',
-    'sbyte', 'sealed', 'short', 'sizeof', 'stackalloc', 'static', 'string',
-    'struct', 'switch', 'this', 'throw', 'true', 'try', 'typeof', 'uint',
-    'ulong', 'unchecked', 'unsafe', 'ushort', 'using', 'virtual', 'void',
-    'volatile', 'while'
-  );
-begin
-  clikeNextToken(g, keywords, {@set}[hasPreprocessor]);
-end;
-
-procedure javaNextToken(var g: TGeneralTokenizer);
-const
-  keywords: array [0..52] of string = (
-    'abstract', 'assert', 'boolean', 'break', 'byte', 'case', 'catch',
-    'char', 'class', 'const', 'continue', 'default', 'do', 'double', 'else',
-    'enum', 'extends', 'false', 'final', 'finally', 'float', 'for', 'goto',
-    'if', 'implements', 'import', 'instanceof', 'int', 'interface', 'long',
-    'native', 'new', 'null', 'package', 'private', 'protected', 'public',
-    'return', 'short', 'static', 'strictfp', 'super', 'switch',
-    'synchronized', 'this', 'throw', 'throws', 'transient', 'true', 'try',
-    'void', 'volatile', 'while'
-  );
-begin
-  clikeNextToken(g, keywords, {@set}[]);
-end;
-
-procedure getNextToken(var g: TGeneralTokenizer; lang: TSourceLanguage);
-begin
-  case lang of
-    langNimrod: nimNextToken(g);
-    langCpp: cppNextToken(g);
-    langCsharp: csharpNextToken(g);
-    langC: cNextToken(g);
-    langJava: javaNextToken(g);
-    else InternalError('getNextToken');
-  end
-end;
-
-end.