diff options
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/charsets.nim | 49 | ||||
-rw-r--r-- | compiler/parsecfg.nim | 346 | ||||
-rw-r--r-- | compiler/pendx.nim | 18 | ||||
-rw-r--r-- | compiler/syntaxes.nim | 2 |
4 files changed, 0 insertions, 415 deletions
diff --git a/compiler/charsets.nim b/compiler/charsets.nim deleted file mode 100644 index d3d00b687..000000000 --- a/compiler/charsets.nim +++ /dev/null @@ -1,49 +0,0 @@ -# -# -# The Nimrod Compiler -# (c) Copyright 2012 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -const - CharSize* = SizeOf(Char) - Lrz* = ' ' - Apo* = '\'' - Tabulator* = '\x09' - ESC* = '\x1B' - CR* = '\x0D' - FF* = '\x0C' - LF* = '\x0A' - BEL* = '\x07' - BACKSPACE* = '\x08' - VT* = '\x0B' - -when defined(macos): - DirSep == ':' - "\n" == CR & "" - FirstNLchar == CR - PathSep == ';' # XXX: is this correct? -else: - when defined(unix): - DirSep == '/' - "\n" == LF & "" - FirstNLchar == LF - PathSep == ':' - else: - # windows, dos - DirSep == '\\' - "\n" == CR + LF - FirstNLchar == CR - DriveSeparator == ':' - PathSep == ';' -UpLetters == {'A'..'Z', '\xC0'..'\xDE'} -DownLetters == {'a'..'z', '\xDF'..'\xFF'} -Numbers == {'0'..'9'} -Letters == UpLetters + DownLetters -type - TCharSet* = set[Char] - PCharSet* = ref TCharSet - -# implementation diff --git a/compiler/parsecfg.nim b/compiler/parsecfg.nim deleted file mode 100644 index e0d1afff1..000000000 --- a/compiler/parsecfg.nim +++ /dev/null @@ -1,346 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2012 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -# A HIGH-PERFORMANCE configuration file parser; -# the Nimrod version of this file is part of the -# standard library. - -import - llstream, nhashes, strutils, nimlexbase - -type - TCfgEventKind* = enum - cfgEof, # end of file reached - cfgSectionStart, # a ``[section]`` has been parsed - cfgKeyValuePair, # a ``key=value`` pair has been detected - cfgOption, # a ``--key=value`` command line option - cfgError # an error ocurred during parsing; msg contains the - # error message - TCfgEvent* = object of TObject - case kind*: TCfgEventKind - of cfgEof: - nil - - of cfgSectionStart: - section*: string - - of cfgKeyValuePair, cfgOption: - key*, value*: string - - of cfgError: - msg*: string - - - TTokKind* = enum - tkInvalid, tkEof, # order is important here! - tkSymbol, tkEquals, tkColon, tkBracketLe, tkBracketRi, tkDashDash - TToken*{.final.} = object # a token - kind*: TTokKind # the type of the token - literal*: string # the parsed (string) literal - - TParserState* = enum - startState, commaState - TCfgParser* = object of TBaseLexer - tok*: TToken - state*: TParserState - filename*: string - - -proc Open*(c: var TCfgParser, filename: string, inputStream: PLLStream) -proc Close*(c: var TCfgParser) -proc next*(c: var TCfgParser): TCfgEvent -proc getColumn*(c: TCfgParser): int -proc getLine*(c: TCfgParser): int -proc getFilename*(c: TCfgParser): string -proc errorStr*(c: TCfgParser, msg: string): string -# implementation - -const - SymChars: TCharSet = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\x80'..'\xFF'} # - # ---------------------------------------------------------------------------- - -proc rawGetTok(c: var TCfgParser, tok: var TToken) -proc open(c: var TCfgParser, filename: string, inputStream: PLLStream) = - openBaseLexer(c, inputStream) - c.filename = filename - c.state = startState - c.tok.kind = tkInvalid - c.tok.literal = "" - rawGetTok(c, c.tok) - -proc close(c: var TCfgParser) = - closeBaseLexer(c) - -proc getColumn(c: TCfgParser): int = - result = getColNumber(c, c.bufPos) - -proc getLine(c: TCfgParser): int = - result = c.linenumber - -proc getFilename(c: TCfgParser): string = - result = c.filename - -proc handleHexChar(c: var TCfgParser, xi: var int) = - case c.buf[c.bufpos] - of '0'..'9': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0')) - inc(c.bufpos) - of 'a'..'f': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10) - inc(c.bufpos) - of 'A'..'F': - xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10) - inc(c.bufpos) - else: - nil - -proc handleDecChars(c: var TCfgParser, xi: var int) = - while c.buf[c.bufpos] in {'0'..'9'}: - xi = (xi * 10) + (ord(c.buf[c.bufpos]) - ord('0')) - inc(c.bufpos) - -proc getEscapedChar(c: var TCfgParser, tok: var TToken) = - var xi: int - inc(c.bufpos) # skip '\' - case c.buf[c.bufpos] - of 'n', 'N': - tok.literal = tok.literal & "\n" - Inc(c.bufpos) - of 'r', 'R', 'c', 'C': - add(tok.literal, CR) - Inc(c.bufpos) - of 'l', 'L': - add(tok.literal, LF) - Inc(c.bufpos) - of 'f', 'F': - add(tok.literal, FF) - inc(c.bufpos) - of 'e', 'E': - add(tok.literal, ESC) - Inc(c.bufpos) - of 'a', 'A': - add(tok.literal, BEL) - Inc(c.bufpos) - of 'b', 'B': - add(tok.literal, BACKSPACE) - Inc(c.bufpos) - of 'v', 'V': - add(tok.literal, VT) - Inc(c.bufpos) - of 't', 'T': - add(tok.literal, Tabulator) - Inc(c.bufpos) - of '\'', '\"': - add(tok.literal, c.buf[c.bufpos]) - Inc(c.bufpos) - of '\\': - add(tok.literal, '\\') - Inc(c.bufpos) - of 'x', 'X': - inc(c.bufpos) - xi = 0 - handleHexChar(c, xi) - handleHexChar(c, xi) - add(tok.literal, Chr(xi)) - of '0'..'9': - xi = 0 - handleDecChars(c, xi) - if (xi <= 255): add(tok.literal, Chr(xi)) - else: tok.kind = tkInvalid - else: tok.kind = tkInvalid - -proc HandleCRLF(c: var TCfgParser, pos: int): int = - case c.buf[pos] - of CR: result = lexbase.HandleCR(c, pos) - of LF: result = lexbase.HandleLF(c, pos) - else: result = pos - -proc getString(c: var TCfgParser, tok: var TToken, rawMode: bool) = - var - pos: int - ch: Char - buf: cstring - pos = c.bufPos + 1 # skip " - buf = c.buf # put `buf` in a register - tok.kind = tkSymbol - if (buf[pos] == '\"') and (buf[pos + 1] == '\"'): - # long string literal: - inc(pos, 2) # skip "" - # skip leading newline: - pos = HandleCRLF(c, pos) - buf = c.buf - while true: - case buf[pos] - of '\"': - if (buf[pos + 1] == '\"') and (buf[pos + 2] == '\"'): break - add(tok.literal, '\"') - Inc(pos) - of CR, LF: - pos = HandleCRLF(c, pos) - buf = c.buf - tok.literal = tok.literal & "\n" - of lexbase.EndOfFile: - tok.kind = tkInvalid - break - else: - add(tok.literal, buf[pos]) - Inc(pos) - c.bufpos = pos + - 3 # skip the three """ - else: - # ordinary string literal - while true: - ch = buf[pos] - if ch == '\"': - inc(pos) # skip '"' - break - if ch in {CR, LF, lexbase.EndOfFile}: - tok.kind = tkInvalid - break - if (ch == '\\') and not rawMode: - c.bufPos = pos - getEscapedChar(c, tok) - pos = c.bufPos - else: - add(tok.literal, ch) - Inc(pos) - c.bufpos = pos - -proc getSymbol(c: var TCfgParser, tok: var TToken) = - var - pos: int - buf: cstring - pos = c.bufpos - buf = c.buf - while true: - add(tok.literal, buf[pos]) - Inc(pos) - if not (buf[pos] in SymChars): break - c.bufpos = pos - tok.kind = tkSymbol - -proc skip(c: var TCfgParser) = - var - buf: cstring - pos: int - pos = c.bufpos - buf = c.buf - while true: - case buf[pos] - of ' ': - Inc(pos) - of Tabulator: - inc(pos) - of '#', ';': - while not (buf[pos] in {CR, LF, lexbase.EndOfFile}): inc(pos) - of CR, LF: - pos = HandleCRLF(c, pos) - buf = c.buf - else: - break # EndOfFile also leaves the loop - c.bufpos = pos - -proc rawGetTok(c: var TCfgParser, tok: var TToken) = - tok.kind = tkInvalid - setlen(tok.literal, 0) - skip(c) - case c.buf[c.bufpos] - of '=': - tok.kind = tkEquals - inc(c.bufpos) - tok.literal = "=" - of '-': - inc(c.bufPos) - if c.buf[c.bufPos] == '-': inc(c.bufPos) - tok.kind = tkDashDash - tok.literal = "--" - of ':': - tok.kind = tkColon - inc(c.bufpos) - tok.literal = ":" - of 'r', 'R': - if c.buf[c.bufPos + 1] == '\"': - Inc(c.bufPos) - getString(c, tok, true) - else: - getSymbol(c, tok) - of '[': - tok.kind = tkBracketLe - inc(c.bufpos) - tok.literal = "[" - of ']': - tok.kind = tkBracketRi - Inc(c.bufpos) - tok.literal = "]" - of '\"': - getString(c, tok, false) - of lexbase.EndOfFile: - tok.kind = tkEof - else: getSymbol(c, tok) - -proc errorStr(c: TCfgParser, msg: string): string = - result = `%`("$1($2, $3) Error: $4", - [c.filename, $(getLine(c)), $(getColumn(c)), msg]) - -proc getKeyValPair(c: var TCfgParser, kind: TCfgEventKind): TCfgEvent = - if c.tok.kind == tkSymbol: - result.kind = kind - result.key = c.tok.literal - result.value = "" - rawGetTok(c, c.tok) - while c.tok.literal == ".": - add(result.key, '.') - rawGetTok(c, c.tok) - if c.tok.kind == tkSymbol: - add(result.key, c.tok.literal) - rawGetTok(c, c.tok) - else: - result.kind = cfgError - result.msg = errorStr(c, "symbol expected, but found: " & c.tok.literal) - break - if c.tok.kind in {tkEquals, tkColon}: - rawGetTok(c, c.tok) - if c.tok.kind == tkSymbol: - result.value = c.tok.literal - else: - result.kind = cfgError - result.msg = errorStr(c, "symbol expected, but found: " & c.tok.literal) - rawGetTok(c, c.tok) - else: - result.kind = cfgError - result.msg = errorStr(c, "symbol expected, but found: " & c.tok.literal) - rawGetTok(c, c.tok) - -proc next(c: var TCfgParser): TCfgEvent = - case c.tok.kind - of tkEof: - result.kind = cfgEof - of tkDashDash: - rawGetTok(c, c.tok) - result = getKeyValPair(c, cfgOption) - of tkSymbol: - result = getKeyValPair(c, cfgKeyValuePair) - of tkBracketLe: - rawGetTok(c, c.tok) - if c.tok.kind == tkSymbol: - result.kind = cfgSectionStart - result.section = c.tok.literal - else: - result.kind = cfgError - result.msg = errorStr(c, "symbol expected, but found: " & c.tok.literal) - rawGetTok(c, c.tok) - if c.tok.kind == tkBracketRi: - rawGetTok(c, c.tok) - else: - result.kind = cfgError - result.msg = errorStr(c, "\']\' expected, but found: " & c.tok.literal) - of tkInvalid, tkBracketRi, tkEquals, tkColon: - result.kind = cfgError - result.msg = errorStr(c, "invalid token: " & c.tok.literal) - rawGetTok(c, c.tok) diff --git a/compiler/pendx.nim b/compiler/pendx.nim deleted file mode 100644 index 2942968a0..000000000 --- a/compiler/pendx.nim +++ /dev/null @@ -1,18 +0,0 @@ -# -# -# The Nimrod Compiler -# (c) Copyright 2012 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -import - llstream, lexer, parser, idents, strutils, ast, msgs - -proc ParseAll*(p: var TParser): PNode = - result = nil - -proc parseTopLevelStmt*(p: var TParser): PNode = - result = nil - diff --git a/compiler/syntaxes.nim b/compiler/syntaxes.nim index 478c2a837..d5062544f 100644 --- a/compiler/syntaxes.nim +++ b/compiler/syntaxes.nim @@ -62,7 +62,6 @@ proc parseAll(p: var TParsers): PNode = of skinEndX: internalError("parser to implement") result = ast.emptyNode - # skinEndX: result := pendx.parseAll(p.parser); proc parseTopLevelStmt(p: var TParsers): PNode = case p.skin @@ -73,7 +72,6 @@ proc parseTopLevelStmt(p: var TParsers): PNode = of skinEndX: internalError("parser to implement") result = ast.emptyNode - #skinEndX: result := pendx.parseTopLevelStmt(p.parser); proc utf8Bom(s: string): int = if (s[0] == '\xEF') and (s[1] == '\xBB') and (s[2] == '\xBF'): |