summary refs log tree commit diff stats
path: root/rod/highlite.nim
diff options
context:
space:
mode:
Diffstat (limited to 'rod/highlite.nim')
-rwxr-xr-xrod/highlite.nim531
1 files changed, 0 insertions, 531 deletions
diff --git a/rod/highlite.nim b/rod/highlite.nim
deleted file mode 100755
index c2fc95da8..000000000
--- a/rod/highlite.nim
+++ /dev/null
@@ -1,531 +0,0 @@
-#
-#
-#           The Nimrod Compiler
-#        (c) Copyright 2010 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-# Source highlighter for programming or markup languages.
-# Currently only few languages are supported, other languages may be added.
-# The interface supports one language nested in another.
-
-import 
-  nhashes, options, msgs, strutils, platform, idents, lexbase, wordrecg, scanner
-
-type 
-  TTokenClass* = enum 
-    gtEof, gtNone, gtWhitespace, gtDecNumber, gtBinNumber, gtHexNumber, 
-    gtOctNumber, gtFloatNumber, gtIdentifier, gtKeyword, gtStringLit, 
-    gtLongStringLit, gtCharLit, gtEscapeSequence, # escape sequence like \xff
-    gtOperator, gtPunctation, gtComment, gtLongComment, gtRegularExpression, 
-    gtTagStart, gtTagEnd, gtKey, gtValue, gtRawData, gtAssembler, 
-    gtPreprocessor, gtDirective, gtCommand, gtRule, gtHyperlink, gtLabel, 
-    gtReference, gtOther
-  TGeneralTokenizer* = object of TObject
-    kind*: TTokenClass
-    start*, length*: int      # private:
-    buf*: cstring
-    pos*: int
-    state*: TTokenClass
-
-  TSourceLanguage* = enum 
-    langNone, langNimrod, langCpp, langCsharp, langC, langJava
-
-const 
-  sourceLanguageToStr*: array[TSourceLanguage, string] = ["none", "Nimrod", 
-    "C++", "C#", "C", "Java"]
-  tokenClassToStr*: array[TTokenClass, string] = ["Eof", "None", "Whitespace", 
-    "DecNumber", "BinNumber", "HexNumber", "OctNumber", "FloatNumber", 
-    "Identifier", "Keyword", "StringLit", "LongStringLit", "CharLit", 
-    "EscapeSequence", "Operator", "Punctation", "Comment", "LongComment", 
-    "RegularExpression", "TagStart", "TagEnd", "Key", "Value", "RawData", 
-    "Assembler", "Preprocessor", "Directive", "Command", "Rule", "Hyperlink", 
-    "Label", "Reference", "Other"]
-
-proc getSourceLanguage*(name: string): TSourceLanguage
-proc initGeneralTokenizer*(g: var TGeneralTokenizer, buf: string)
-proc deinitGeneralTokenizer*(g: var TGeneralTokenizer)
-proc getNextToken*(g: var TGeneralTokenizer, lang: TSourceLanguage)
-# implementation
-
-proc getSourceLanguage(name: string): TSourceLanguage = 
-  for i in countup(succ(low(TSourceLanguage)), high(TSourceLanguage)): 
-    if cmpIgnoreStyle(name, sourceLanguageToStr[i]) == 0: 
-      return i
-  result = langNone
-
-proc initGeneralTokenizer(g: var TGeneralTokenizer, buf: string) = 
-  g.buf = cstring(buf)
-  g.kind = low(TTokenClass)
-  g.start = 0
-  g.length = 0
-  g.state = low(TTokenClass)
-  var pos = 0                     # skip initial whitespace:
-  while g.buf[pos] in {' ', '\x09'..'\x0D'}: inc(pos)
-  g.pos = pos
-
-proc deinitGeneralTokenizer(g: var TGeneralTokenizer) = 
-  nil
-
-proc nimGetKeyword(id: string): TTokenClass = 
-  var i = getIdent(id)
-  if (i.id >= ord(tokKeywordLow) - ord(tkSymbol)) and
-      (i.id <= ord(tokKeywordHigh) - ord(tkSymbol)): 
-    result = gtKeyword
-  else: 
-    result = gtIdentifier
-  
-proc nimNumberPostfix(g: var TGeneralTokenizer, position: int): int = 
-  var pos = position
-  if g.buf[pos] == '\'': 
-    inc(pos)
-    case g.buf[pos]
-    of 'f', 'F': 
-      g.kind = gtFloatNumber
-      inc(pos)
-      if g.buf[pos] in {'0'..'9'}: inc(pos)
-      if g.buf[pos] in {'0'..'9'}: inc(pos)
-    of 'i', 'I': 
-      inc(pos)
-      if g.buf[pos] in {'0'..'9'}: inc(pos)
-      if g.buf[pos] in {'0'..'9'}: inc(pos)
-    else: 
-      nil
-  result = pos
-
-proc nimNumber(g: var TGeneralTokenizer, position: int): int = 
-  const decChars = {'0'..'9', '_'}
-  var pos = position
-  g.kind = gtDecNumber
-  while g.buf[pos] in decChars: inc(pos)
-  if g.buf[pos] == '.': 
-    g.kind = gtFloatNumber
-    inc(pos)
-    while g.buf[pos] in decChars: inc(pos)
-  if g.buf[pos] in {'e', 'E'}: 
-    g.kind = gtFloatNumber
-    inc(pos)
-    if g.buf[pos] in {'+', '-'}: inc(pos)
-    while g.buf[pos] in decChars: inc(pos)
-  result = nimNumberPostfix(g, pos)
-
-proc nimNextToken(g: var TGeneralTokenizer) = 
-  const 
-    hexChars = {'0'..'9', 'A'..'F', 'a'..'f', '_'}
-    octChars = {'0'..'7', '_'}
-    binChars = {'0'..'1', '_'}
-  var pos = g.pos
-  g.start = g.pos
-  if g.state == gtStringLit: 
-    g.kind = gtStringLit
-    while true: 
-      case g.buf[pos]
-      of '\\': 
-        g.kind = gtEscapeSequence
-        inc(pos)
-        case g.buf[pos]
-        of 'x', 'X': 
-          inc(pos)
-          if g.buf[pos] in hexChars: inc(pos)
-          if g.buf[pos] in hexChars: inc(pos)
-        of '0'..'9': 
-          while g.buf[pos] in {'0'..'9'}: inc(pos)
-        of '\0': 
-          g.state = gtNone
-        else: inc(pos)
-        break 
-      of '\0', '\x0D', '\x0A': 
-        g.state = gtNone
-        break 
-      of '\"': 
-        inc(pos)
-        g.state = gtNone
-        break 
-      else: inc(pos)
-  else: 
-    case g.buf[pos]
-    of ' ', '\x09'..'\x0D': 
-      g.kind = gtWhitespace
-      while g.buf[pos] in {' ', '\x09'..'\x0D'}: inc(pos)
-    of '#': 
-      g.kind = gtComment
-      while not (g.buf[pos] in {'\0', '\x0A', '\x0D'}): inc(pos)
-    of 'a'..'z', 'A'..'Z', '_', '\x80'..'\xFF': 
-      var id = ""
-      while g.buf[pos] in scanner.SymChars + {'_'}: 
-        add(id, g.buf[pos])
-        inc(pos)
-      if (g.buf[pos] == '\"'): 
-        if (g.buf[pos + 1] == '\"') and (g.buf[pos + 2] == '\"'): 
-          inc(pos, 3)
-          g.kind = gtLongStringLit
-          while true: 
-            case g.buf[pos]
-            of '\0': 
-              break 
-            of '\"': 
-              inc(pos)
-              if g.buf[pos] == '\"' and g.buf[pos+1] == '\"' and 
-                  g.buf[pos+2] != '\"': 
-                inc(pos, 2)
-                break 
-            else: inc(pos)
-        else: 
-          g.kind = gtRawData
-          inc(pos)
-          while not (g.buf[pos] in {'\0', '\x0A', '\x0D'}): 
-            if g.buf[pos] == '"' and g.buf[pos+1] != '"': break
-            inc(pos)
-          if g.buf[pos] == '\"': inc(pos)
-      else: 
-        g.kind = nimGetKeyword(id)
-    of '0': 
-      inc(pos)
-      case g.buf[pos]
-      of 'b', 'B': 
-        inc(pos)
-        while g.buf[pos] in binChars: inc(pos)
-        pos = nimNumberPostfix(g, pos)
-      of 'x', 'X': 
-        inc(pos)
-        while g.buf[pos] in hexChars: inc(pos)
-        pos = nimNumberPostfix(g, pos)
-      of 'o', 'O': 
-        inc(pos)
-        while g.buf[pos] in octChars: inc(pos)
-        pos = nimNumberPostfix(g, pos)
-      else: pos = nimNumber(g, pos)
-    of '1'..'9': 
-      pos = nimNumber(g, pos)
-    of '\'': 
-      inc(pos)
-      g.kind = gtCharLit
-      while true: 
-        case g.buf[pos]
-        of '\0', '\x0D', '\x0A': 
-          break 
-        of '\'': 
-          inc(pos)
-          break 
-        of '\\': 
-          inc(pos, 2)
-        else: inc(pos)
-    of '\"': 
-      inc(pos)
-      if (g.buf[pos] == '\"') and (g.buf[pos + 1] == '\"'): 
-        inc(pos, 2)
-        g.kind = gtLongStringLit
-        while true: 
-          case g.buf[pos]
-          of '\0': 
-            break 
-          of '\"': 
-            inc(pos)
-            if g.buf[pos] == '\"' and g.buf[pos+1] == '\"' and 
-                g.buf[pos+2] != '\"': 
-              inc(pos, 2)
-              break 
-          else: inc(pos)
-      else: 
-        g.kind = gtStringLit
-        while true: 
-          case g.buf[pos]
-          of '\0', '\x0D', '\x0A': 
-            break 
-          of '\"': 
-            inc(pos)
-            break 
-          of '\\': 
-            g.state = g.kind
-            break 
-          else: inc(pos)
-    of '(', ')', '[', ']', '{', '}', '`', ':', ',', ';': 
-      inc(pos)
-      g.kind = gtPunctation
-    of '\0': 
-      g.kind = gtEof
-    else: 
-      if g.buf[pos] in scanner.OpChars: 
-        g.kind = gtOperator
-        while g.buf[pos] in scanner.OpChars: inc(pos)
-      else: 
-        inc(pos)
-        g.kind = gtNone
-  g.length = pos - g.pos
-  if (g.kind != gtEof) and (g.length <= 0): 
-    InternalError("nimNextToken: " & $(g.buf))
-  g.pos = pos
-
-proc generalNumber(g: var TGeneralTokenizer, position: int): int = 
-  const decChars = {'0'..'9'}
-  var pos = position
-  g.kind = gtDecNumber
-  while g.buf[pos] in decChars: inc(pos)
-  if g.buf[pos] == '.': 
-    g.kind = gtFloatNumber
-    inc(pos)
-    while g.buf[pos] in decChars: inc(pos)
-  if g.buf[pos] in {'e', 'E'}: 
-    g.kind = gtFloatNumber
-    inc(pos)
-    if g.buf[pos] in {'+', '-'}: inc(pos)
-    while g.buf[pos] in decChars: inc(pos)
-  result = pos
-
-proc generalStrLit(g: var TGeneralTokenizer, position: int): int = 
-  const 
-    decChars = {'0'..'9'}
-    hexChars = {'0'..'9', 'A'..'F', 'a'..'f'}
-  var pos = position
-  g.kind = gtStringLit
-  var c = g.buf[pos]
-  inc(pos)                    # skip " or '
-  while true: 
-    case g.buf[pos]
-    of '\0': 
-      break 
-    of '\\': 
-      inc(pos)
-      case g.buf[pos]
-      of '\0': 
-        break 
-      of '0'..'9': 
-        while g.buf[pos] in decChars: inc(pos)
-      of 'x', 'X': 
-        inc(pos)
-        if g.buf[pos] in hexChars: inc(pos)
-        if g.buf[pos] in hexChars: inc(pos)
-      else: inc(pos, 2)
-    else: 
-      if g.buf[pos] == c: 
-        inc(pos)
-        break 
-      else: 
-        inc(pos)
-  result = pos
-
-proc isKeyword(x: openarray[string], y: string): int = 
-  var a = 0
-  var b = len(x) - 1
-  while a <= b: 
-    var mid = (a + b) div 2
-    var c = cmp(x[mid], y)
-    if c < 0: 
-      a = mid + 1
-    elif c > 0: 
-      b = mid - 1
-    else: 
-      return mid
-  result = - 1
-
-proc isKeywordIgnoreCase(x: openarray[string], y: string): int = 
-  var a = 0
-  var b = len(x) - 1
-  while a <= b: 
-    var mid = (a + b) div 2
-    var c = cmpIgnoreCase(x[mid], y)
-    if c < 0: 
-      a = mid + 1
-    elif c > 0: 
-      b = mid - 1
-    else: 
-      return mid
-  result = - 1
-
-type 
-  TTokenizerFlag = enum 
-    hasPreprocessor, hasNestedComments
-  TTokenizerFlags = set[TTokenizerFlag]
-
-proc clikeNextToken(g: var TGeneralTokenizer, keywords: openarray[string], 
-                    flags: TTokenizerFlags) = 
-  const 
-    hexChars = {'0'..'9', 'A'..'F', 'a'..'f'}
-    octChars = {'0'..'7'}
-    binChars = {'0'..'1'}
-    symChars = {'A'..'Z', 'a'..'z', '0'..'9', '_', '\x80'..'\xFF'}
-  var pos = g.pos
-  g.start = g.pos
-  if g.state == gtStringLit: 
-    g.kind = gtStringLit
-    while true: 
-      case g.buf[pos]
-      of '\\': 
-        g.kind = gtEscapeSequence
-        inc(pos)
-        case g.buf[pos]
-        of 'x', 'X': 
-          inc(pos)
-          if g.buf[pos] in hexChars: inc(pos)
-          if g.buf[pos] in hexChars: inc(pos)
-        of '0'..'9': 
-          while g.buf[pos] in {'0'..'9'}: inc(pos)
-        of '\0': 
-          g.state = gtNone
-        else: inc(pos)
-        break 
-      of '\0', '\x0D', '\x0A': 
-        g.state = gtNone
-        break 
-      of '\"': 
-        inc(pos)
-        g.state = gtNone
-        break 
-      else: inc(pos)
-  else: 
-    case g.buf[pos]
-    of ' ', '\x09'..'\x0D': 
-      g.kind = gtWhitespace
-      while g.buf[pos] in {' ', '\x09'..'\x0D'}: inc(pos)
-    of '/': 
-      inc(pos)
-      if g.buf[pos] == '/': 
-        g.kind = gtComment
-        while not (g.buf[pos] in {'\0', '\x0A', '\x0D'}): inc(pos)
-      elif g.buf[pos] == '*': 
-        g.kind = gtLongComment
-        var nested = 0
-        inc(pos)
-        while true: 
-          case g.buf[pos]
-          of '*': 
-            inc(pos)
-            if g.buf[pos] == '/': 
-              inc(pos)
-              if nested == 0: break 
-          of '/': 
-            inc(pos)
-            if g.buf[pos] == '*': 
-              inc(pos)
-              if hasNestedComments in flags: inc(nested)
-          of '\0': 
-            break 
-          else: inc(pos)
-    of '#': 
-      inc(pos)
-      if hasPreprocessor in flags: 
-        g.kind = gtPreprocessor
-        while g.buf[pos] in {' ', Tabulator}: inc(pos)
-        while g.buf[pos] in symChars: inc(pos)
-      else: 
-        g.kind = gtOperator
-    of 'a'..'z', 'A'..'Z', '_', '\x80'..'\xFF': 
-      var id = ""
-      while g.buf[pos] in SymChars: 
-        add(id, g.buf[pos])
-        inc(pos)
-      if isKeyword(keywords, id) >= 0: g.kind = gtKeyword
-      else: g.kind = gtIdentifier
-    of '0': 
-      inc(pos)
-      case g.buf[pos]
-      of 'b', 'B': 
-        inc(pos)
-        while g.buf[pos] in binChars: inc(pos)
-        if g.buf[pos] in {'A'..'Z', 'a'..'z'}: inc(pos)
-      of 'x', 'X': 
-        inc(pos)
-        while g.buf[pos] in hexChars: inc(pos)
-        if g.buf[pos] in {'A'..'Z', 'a'..'z'}: inc(pos)
-      of '0'..'7': 
-        inc(pos)
-        while g.buf[pos] in octChars: inc(pos)
-        if g.buf[pos] in {'A'..'Z', 'a'..'z'}: inc(pos)
-      else: 
-        pos = generalNumber(g, pos)
-        if g.buf[pos] in {'A'..'Z', 'a'..'z'}: inc(pos)
-    of '1'..'9': 
-      pos = generalNumber(g, pos)
-      if g.buf[pos] in {'A'..'Z', 'a'..'z'}: inc(pos)
-    of '\'': 
-      pos = generalStrLit(g, pos)
-      g.kind = gtCharLit
-    of '\"': 
-      inc(pos)
-      g.kind = gtStringLit
-      while true: 
-        case g.buf[pos]
-        of '\0': 
-          break 
-        of '\"': 
-          inc(pos)
-          break 
-        of '\\': 
-          g.state = g.kind
-          break 
-        else: inc(pos)
-    of '(', ')', '[', ']', '{', '}', ':', ',', ';', '.': 
-      inc(pos)
-      g.kind = gtPunctation
-    of '\0': 
-      g.kind = gtEof
-    else: 
-      if g.buf[pos] in scanner.OpChars: 
-        g.kind = gtOperator
-        while g.buf[pos] in scanner.OpChars: inc(pos)
-      else: 
-        inc(pos)
-        g.kind = gtNone
-  g.length = pos - g.pos
-  if (g.kind != gtEof) and (g.length <= 0): InternalError("clikeNextToken")
-  g.pos = pos
-
-proc cNextToken(g: var TGeneralTokenizer) = 
-  const 
-    keywords: array[0..36, string] = ["_Bool", "_Complex", "_Imaginary", "auto", 
-      "break", "case", "char", "const", "continue", "default", "do", "double", 
-      "else", "enum", "extern", "float", "for", "goto", "if", "inline", "int", 
-      "long", "register", "restrict", "return", "short", "signed", "sizeof", 
-      "static", "struct", "switch", "typedef", "union", "unsigned", "void", 
-      "volatile", "while"]
-  clikeNextToken(g, keywords, {hasPreprocessor})
-
-proc cppNextToken(g: var TGeneralTokenizer) = 
-  const 
-    keywords: array[0..47, string] = ["asm", "auto", "break", "case", "catch", 
-      "char", "class", "const", "continue", "default", "delete", "do", "double", 
-      "else", "enum", "extern", "float", "for", "friend", "goto", "if", 
-      "inline", "int", "long", "new", "operator", "private", "protected", 
-      "public", "register", "return", "short", "signed", "sizeof", "static", 
-      "struct", "switch", "template", "this", "throw", "try", "typedef", 
-      "union", "unsigned", "virtual", "void", "volatile", "while"]
-  clikeNextToken(g, keywords, {hasPreprocessor})
-
-proc csharpNextToken(g: var TGeneralTokenizer) = 
-  const 
-    keywords: array[0..76, string] = ["abstract", "as", "base", "bool", "break", 
-      "byte", "case", "catch", "char", "checked", "class", "const", "continue", 
-      "decimal", "default", "delegate", "do", "double", "else", "enum", "event", 
-      "explicit", "extern", "false", "finally", "fixed", "float", "for", 
-      "foreach", "goto", "if", "implicit", "in", "int", "interface", "internal", 
-      "is", "lock", "long", "namespace", "new", "null", "object", "operator", 
-      "out", "override", "params", "private", "protected", "public", "readonly", 
-      "ref", "return", "sbyte", "sealed", "short", "sizeof", "stackalloc", 
-      "static", "string", "struct", "switch", "this", "throw", "true", "try", 
-      "typeof", "uint", "ulong", "unchecked", "unsafe", "ushort", "using", 
-      "virtual", "void", "volatile", "while"]
-  clikeNextToken(g, keywords, {hasPreprocessor})
-
-proc javaNextToken(g: var TGeneralTokenizer) = 
-  const 
-    keywords: array[0..52, string] = ["abstract", "assert", "boolean", "break", 
-      "byte", "case", "catch", "char", "class", "const", "continue", "default", 
-      "do", "double", "else", "enum", "extends", "false", "final", "finally", 
-      "float", "for", "goto", "if", "implements", "import", "instanceof", "int", 
-      "interface", "long", "native", "new", "null", "package", "private", 
-      "protected", "public", "return", "short", "static", "strictfp", "super", 
-      "switch", "synchronized", "this", "throw", "throws", "transient", "true", 
-      "try", "void", "volatile", "while"]
-  clikeNextToken(g, keywords, {})
-
-proc getNextToken(g: var TGeneralTokenizer, lang: TSourceLanguage) = 
-  case lang
-  of langNimrod: nimNextToken(g)
-  of langCpp: cppNextToken(g)
-  of langCsharp: csharpNextToken(g)
-  of langC: cNextToken(g)
-  of langJava: javaNextToken(g)
-  else: InternalError("getNextToken")
-