diff options
author | Andreas Rumpf <andreas@andreas-laptop> | 2010-07-21 09:44:47 +0200 |
---|---|---|
committer | Andreas Rumpf <andreas@andreas-laptop> | 2010-07-21 09:44:47 +0200 |
commit | d10973adb00840631e5314ec902d502f15934801 (patch) | |
tree | a207854b0cf984815beb26bf2d71933ce566f6d7 /rod/c2nim | |
parent | c441cdb64ca5394f74faadf76563bcfafeda18f4 (diff) | |
download | Nim-d10973adb00840631e5314ec902d502f15934801.tar.gz |
c2nim tool added
Diffstat (limited to 'rod/c2nim')
-rwxr-xr-x | rod/c2nim/c2nim.nim | 74 | ||||
-rwxr-xr-x | rod/c2nim/clex.nim | 751 | ||||
-rwxr-xr-x | rod/c2nim/cparse.nim | 1469 | ||||
-rwxr-xr-x | rod/c2nim/cpp.nim | 231 | ||||
-rw-r--r-- | rod/c2nim/manual.txt | 235 |
5 files changed, 2760 insertions, 0 deletions
diff --git a/rod/c2nim/c2nim.nim b/rod/c2nim/c2nim.nim new file mode 100755 index 000000000..52d16ce05 --- /dev/null +++ b/rod/c2nim/c2nim.nim @@ -0,0 +1,74 @@ +# +# +# c2nim - C to Nimrod source converter +# (c) Copyright 2010 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +import + strutils, os, times, parseopt, llstream, ast, rnimsyn, options, msgs, + clex, cparse + +const + Version = "0.8.10" + Usage = """ +c2nim - C to Nimrod source converter + (c) 2010 Andreas Rumpf +Usage: c2nim [options] inputfile [options] +Options: + -o, --out:FILE set output filename + --dynlib:SYMBOL import from dynlib: SYMBOL will be used for the import + --header:HEADER_FILE import from a HEADER_FILE (discouraged!) + --cdecl annotate procs with ``{.cdecl.}`` + --stdcall annotate procs with ``{.stdcall.}`` + --ref convert typ* to ref typ (default: ptr typ) + --prefix:PREFIX strip prefix for the generated Nimrod identifiers + (multiple --prefix options are supported) + --suffix:SUFFIX strip suffix for the generated Nimrod identifiers + (multiple --suffix options are supported) + --skip:IDENT skip IDENT in the input file + -v, --version write c2nim's version + -h, --help show this help +""" + +proc main(infile, outfile: string, options: PParserOptions) = + var start = getTime() + var stream = LLStreamOpen(infile, fmRead) + if stream == nil: rawMessage(errCannotOpenFile, infile) + var p: TParser + openParser(p, infile, stream, options) + var module = parseUnit(p) + closeParser(p) + renderModule(module, outfile) + rawMessage(hintSuccessX, [$gLinesCompiled, $(getTime() - start)]) + +var + infile = "" + outfile = "" + parserOptions = newParserOptions() +for kind, key, val in getopt(): + case kind + of cmdArgument: infile = key + of cmdLongOption, cmdShortOption: + case key.toLower + of "help", "h": + stdout.write(Usage) + quit(0) + of "version", "v": + stdout.write(Version & "\n") + quit(0) + of "o", "out": outfile = key + else: + if not parserOptions.setOption(key, val): + stdout.write("[Error] unknown option: " & key) + of cmdEnd: assert(false) +if infile.len == 0: + # no filename has been given, so we show the help: + stdout.write(Usage) +else: + if outfile.len == 0: + outfile = changeFileExt(infile, "nim") + infile = addFileExt(infile, "h") + main(infile, outfile, parserOptions) diff --git a/rod/c2nim/clex.nim b/rod/c2nim/clex.nim new file mode 100755 index 000000000..ecf337dd9 --- /dev/null +++ b/rod/c2nim/clex.nim @@ -0,0 +1,751 @@ +# +# +# c2nim - C to Nimrod source converter +# (c) Copyright 2010 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# This module implements an Ansi C scanner. This is an adaption from +# the scanner module. Keywords are not handled here, but in the parser to make +# it more flexible. + + +import + options, msgs, strutils, platform, lexbase, llstream + +const + MaxLineLength* = 80 # lines longer than this lead to a warning + numChars*: TCharSet = {'0'..'9', 'a'..'z', 'A'..'Z'} + SymChars*: TCharSet = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\x80'..'\xFF'} + SymStartChars*: TCharSet = {'a'..'z', 'A'..'Z', '_', '\x80'..'\xFF'} + +type + TTokKind* = enum + pxInvalid, pxEof, + pxStarComment, # /* */ comment + pxLineComment, # // comment + pxDirective, # #define, etc. + pxDirectiveParLe, # #define m( with parle (yes, C is that ugly!) + pxDirConc, # ## + pxNewLine, # newline: end of directive + pxAmp, # & + pxAmpAmp, # && + pxAmpAsgn, # &= + pxAmpAmpAsgn, # &&= + pxBar, # | + pxBarBar, # || + pxBarAsgn, # |= + pxBarBarAsgn, # ||= + pxNot, # ! + pxPlusPlus, # ++ + pxMinusMinus, # -- + pxPlus, # + + pxPlusAsgn, # += + pxMinus, # - + pxMinusAsgn, # -= + pxMod, # % + pxModAsgn, # %= + pxSlash, # / + pxSlashAsgn, # /= + pxStar, # * + pxStarAsgn, # *= + pxHat, # ^ + pxHatAsgn, # ^= + pxAsgn, # = + pxEquals, # == + pxDot, # . + pxDotDotDot, # ... + pxLe, # <= + pxLt, # < + pxGe, # >= + pxGt, # > + pxNeq, # != + pxConditional, # ? + pxShl, # << + pxShlAsgn, # <<= + pxShr, # >> + pxShrAsgn, # >>= + pxTilde, # ~ + pxTildeAsgn, # ~= + pxArrow, # -> + pxScope, # :: + + pxStrLit, + pxCharLit, + pxSymbol, # a symbol + pxIntLit, + pxInt64Lit, # long constant like 0x70fffffff or out of int range + pxFloatLit, + pxParLe, pxParRi, + pxBracketLe, pxBracketRi, + pxComma, pxSemiColon, pxColon, + pxCurlyLe, pxCurlyRi + TTokKinds* = set[TTokKind] + +type + TNumericalBase* = enum base10, base2, base8, base16 + TToken* = object + xkind*: TTokKind # the type of the token + s*: string # parsed symbol, char or string literal + iNumber*: BiggestInt # the parsed integer literal + fNumber*: BiggestFloat # the parsed floating point literal + base*: TNumericalBase # the numerical base; only valid for int + # or float literals + next*: ref TToken # for C we need arbitrary look-ahead :-( + + TLexer* = object of TBaseLexer + filename*: string + inDirective: bool + + +proc getTok*(L: var TLexer, tok: var TToken) +proc PrintTok*(tok: TToken) +proc `$`*(tok: TToken): string +# implementation + +var + gLinesCompiled*: int + +proc fillToken(L: var TToken) = + L.xkind = pxInvalid + L.iNumber = 0 + L.s = "" + L.fNumber = 0.0 + L.base = base10 + +proc openLexer*(lex: var TLexer, filename: string, inputstream: PLLStream) = + openBaseLexer(lex, inputstream) + lex.filename = filename + +proc closeLexer*(lex: var TLexer) = + inc(gLinesCompiled, lex.LineNumber) + closeBaseLexer(lex) + +proc getColumn*(L: TLexer): int = + result = getColNumber(L, L.bufPos) + +proc getLineInfo*(L: TLexer): TLineInfo = + result = newLineInfo(L.filename, L.linenumber, getColNumber(L, L.bufpos)) + +proc lexMessage*(L: TLexer, msg: TMsgKind, arg = "") = + msgs.liMessage(getLineInfo(L), msg, arg) + +proc lexMessagePos(L: var TLexer, msg: TMsgKind, pos: int, arg = "") = + var info = newLineInfo(L.filename, L.linenumber, pos - L.lineStart) + msgs.liMessage(info, msg, arg) + +proc TokKindToStr*(k: TTokKind): string = + case k + of pxEof: result = "[EOF]" + of pxInvalid: result = "[invalid]" + of pxStarComment, pxLineComment: result = "[comment]" + of pxStrLit: result = "[string literal]" + of pxCharLit: result = "[char literal]" + + of pxDirective, pxDirectiveParLe: result = "#" # #define, etc. + of pxDirConc: result = "##" + of pxNewLine: result = "[NewLine]" + of pxAmp: result = "&" # & + of pxAmpAmp: result = "&&" # && + of pxAmpAsgn: result = "&=" # &= + of pxAmpAmpAsgn: result = "&&=" # &&= + of pxBar: result = "|" # | + of pxBarBar: result = "||" # || + of pxBarAsgn: result = "|=" # |= + of pxBarBarAsgn: result = "||=" # ||= + of pxNot: result = "!" # ! + of pxPlusPlus: result = "++" # ++ + of pxMinusMinus: result = "--" # -- + of pxPlus: result = "+" # + + of pxPlusAsgn: result = "+=" # += + of pxMinus: result = "-" # - + of pxMinusAsgn: result = "-=" # -= + of pxMod: result = "%" # % + of pxModAsgn: result = "%=" # %= + of pxSlash: result = "/" # / + of pxSlashAsgn: result = "/=" # /= + of pxStar: result = "*" # * + of pxStarAsgn: result = "*=" # *= + of pxHat: result = "^" # ^ + of pxHatAsgn: result = "^=" # ^= + of pxAsgn: result = "=" # = + of pxEquals: result = "==" # == + of pxDot: result = "." # . + of pxDotDotDot: result = "..." # ... + of pxLe: result = "<=" # <= + of pxLt: result = "<" # < + of pxGe: result = ">=" # >= + of pxGt: result = ">" # > + of pxNeq: result = "!=" # != + of pxConditional: result = "?" + of pxShl: result = "<<" + of pxShlAsgn: result = "<<=" + of pxShr: result = ">>" + of pxShrAsgn: result = ">>=" + of pxTilde: result = "~" + of pxTildeAsgn: result = "~=" + of pxArrow: result = "->" + of pxScope: result = "::" + + of pxSymbol: result = "[identifier]" + of pxIntLit, pxInt64Lit: result = "[integer literal]" + of pxFloatLit: result = "[floating point literal]" + of pxParLe: result = "(" + of pxParRi: result = ")" + of pxBracketLe: result = "[" + of pxBracketRi: result = "]" + of pxComma: result = "," + of pxSemiColon: result = ";" + of pxColon: result = ":" + of pxCurlyLe: result = "{" + of pxCurlyRi: result = "}" + +proc `$`(tok: TToken): string = + case tok.xkind + of pxSymbol, pxInvalid, pxStarComment, pxLineComment, pxStrLit: result = tok.s + of pxIntLit, pxInt64Lit: result = $tok.iNumber + of pxFloatLit: result = $tok.fNumber + else: result = TokKindToStr(tok.xkind) + +proc PrintTok(tok: TToken) = + writeln(stdout, $tok) + +proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: TCharSet) = + # matches ([chars]_)* + var pos = L.bufpos # use registers for pos, buf + var buf = L.buf + while true: + if buf[pos] in chars: + add(tok.s, buf[pos]) + Inc(pos) + else: + break + if buf[pos] == '_': + add(tok.s, '_') + Inc(pos) + L.bufPos = pos + +proc isFloatLiteral(s: string): bool = + for i in countup(0, len(s)-1): + if s[i] in {'.', 'e', 'E'}: + return true + +proc getNumber2(L: var TLexer, tok: var TToken) = + var pos = L.bufpos + 2 # skip 0b + tok.base = base2 + var xi: biggestInt = 0 + var bits = 0 + while true: + case L.buf[pos] + of 'A'..'Z', 'a'..'z': + # ignore type suffix: + inc(pos) + of '2'..'9', '.': + lexMessage(L, errInvalidNumber) + inc(pos) + of '_': + inc(pos) + of '0', '1': + xi = `shl`(xi, 1) or (ord(L.buf[pos]) - ord('0')) + inc(pos) + inc(bits) + else: break + tok.iNumber = xi + if (bits > 32): tok.xkind = pxInt64Lit + else: tok.xkind = pxIntLit + L.bufpos = pos + +proc getNumber8(L: var TLexer, tok: var TToken) = + var pos = L.bufpos + 2 # skip 0b + tok.base = base8 + var xi: biggestInt = 0 + var bits = 0 + while true: + case L.buf[pos] + of 'A'..'Z', 'a'..'z': + # ignore type suffix: + inc(pos) + of '8'..'9', '.': + lexMessage(L, errInvalidNumber) + inc(pos) + of '_': + inc(pos) + of '0'..'7': + xi = `shl`(xi, 3) or (ord(L.buf[pos]) - ord('0')) + inc(pos) + inc(bits) + else: break + tok.iNumber = xi + if (bits > 12): tok.xkind = pxInt64Lit + else: tok.xkind = pxIntLit + L.bufpos = pos + +proc getNumber16(L: var TLexer, tok: var TToken) = + var pos = L.bufpos + 2 # skip 0x + tok.base = base16 + var xi: biggestInt = 0 + var bits = 0 + while true: + case L.buf[pos] + of 'G'..'Z', 'g'..'z': + # ignore type suffix: + inc(pos) + of '_': inc(pos) + of '0'..'9': + xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('0')) + inc(pos) + inc(bits, 4) + of 'a'..'f': + xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('a') + 10) + inc(pos) + inc(bits, 4) + of 'A'..'F': + xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('A') + 10) + inc(pos) + inc(bits, 4) + else: break + tok.iNumber = xi + if bits > 32: tok.xkind = pxInt64Lit + else: tok.xkind = pxIntLit + L.bufpos = pos + +proc getNumber(L: var TLexer, tok: var TToken) = + tok.base = base10 + matchUnderscoreChars(L, tok, {'0'..'9'}) + if (L.buf[L.bufpos] == '.') and (L.buf[L.bufpos + 1] in {'0'..'9'}): + add(tok.s, '.') + inc(L.bufpos) + matchUnderscoreChars(L, tok, {'e', 'E', '+', '-', '0'..'9'}) + try: + if isFloatLiteral(tok.s): + tok.fnumber = parseFloat(tok.s) + tok.xkind = pxFloatLit + else: + tok.iNumber = ParseInt(tok.s) + if (tok.iNumber < low(int32)) or (tok.iNumber > high(int32)): + tok.xkind = pxInt64Lit + else: + tok.xkind = pxIntLit + except EInvalidValue: + lexMessage(L, errInvalidNumber, tok.s) + except EOverflow: + lexMessage(L, errNumberOutOfRange, tok.s) + # ignore type suffix: + while L.buf[L.bufpos] in {'A'..'Z', 'a'..'z'}: inc(L.bufpos) + +proc HandleCRLF(L: var TLexer, pos: int): int = + case L.buf[pos] + of CR: result = lexbase.HandleCR(L, pos) + of LF: result = lexbase.HandleLF(L, pos) + else: result = pos + +proc escape(L: var TLexer, tok: var TToken, allowEmpty=false) = + inc(L.bufpos) # skip \ + case L.buf[L.bufpos] + of 'b', 'B': + add(tok.s, '\b') + inc(L.bufpos) + of 't', 'T': + add(tok.s, '\t') + inc(L.bufpos) + of 'n', 'N': + add(tok.s, '\L') + inc(L.bufpos) + of 'f', 'F': + add(tok.s, '\f') + inc(L.bufpos) + of 'r', 'R': + add(tok.s, '\r') + inc(L.bufpos) + of '\'': + add(tok.s, '\'') + inc(L.bufpos) + of '"': + add(tok.s, '"') + inc(L.bufpos) + of '\\': + add(tok.s, '\b') + inc(L.bufpos) + of '0'..'7': + var xi = ord(L.buf[L.bufpos]) - ord('0') + inc(L.bufpos) + if L.buf[L.bufpos] in {'0'..'7'}: + xi = (xi shl 3) or (ord(L.buf[L.bufpos]) - ord('0')) + inc(L.bufpos) + if L.buf[L.bufpos] in {'0'..'7'}: + xi = (xi shl 3) or (ord(L.buf[L.bufpos]) - ord('0')) + inc(L.bufpos) + add(tok.s, chr(xi)) + elif not allowEmpty: + lexMessage(L, errInvalidCharacterConstant) + +proc getCharLit(L: var TLexer, tok: var TToken) = + inc(L.bufpos) # skip ' + if L.buf[L.bufpos] == '\\': + escape(L, tok) + else: + add(tok.s, L.buf[L.bufpos]) + inc(L.bufpos) + if L.buf[L.bufpos] == '\'': + inc(L.bufpos) + else: + lexMessage(L, errMissingFinalQuote) + tok.xkind = pxCharLit + +proc getString(L: var TLexer, tok: var TToken) = + var pos = L.bufPos + 1 # skip " + var buf = L.buf # put `buf` in a register + var line = L.linenumber # save linenumber for better error message + while true: + case buf[pos] + of '\"': + Inc(pos) + break + of CR: + pos = lexbase.HandleCR(L, pos) + buf = L.buf + of LF: + pos = lexbase.HandleLF(L, pos) + buf = L.buf + of lexbase.EndOfFile: + var line2 = L.linenumber + L.LineNumber = line + lexMessagePos(L, errClosingQuoteExpected, L.lineStart) + L.LineNumber = line2 + break + of '\\': + # we allow an empty \ for line concatenation, but we don't require it + # for line concatenation + L.bufpos = pos + escape(L, tok, allowEmpty=true) + pos = L.bufpos + else: + add(tok.s, buf[pos]) + Inc(pos) + L.bufpos = pos + tok.xkind = pxStrLit + +proc getSymbol(L: var TLexer, tok: var TToken) = + var pos = L.bufpos + var buf = L.buf + while true: + var c = buf[pos] + if c notin SymChars: break + add(tok.s, c) + Inc(pos) + L.bufpos = pos + tok.xkind = pxSymbol + +proc scanLineComment(L: var TLexer, tok: var TToken) = + var pos = L.bufpos + var buf = L.buf + # a comment ends if the next line does not start with the // on the same + # column after only whitespace + tok.xkind = pxLineComment + var col = getColNumber(L, pos) + while true: + inc(pos, 2) # skip // + add(tok.s, '#') + while not (buf[pos] in {CR, LF, lexbase.EndOfFile}): + add(tok.s, buf[pos]) + inc(pos) + pos = handleCRLF(L, pos) + buf = L.buf + var indent = 0 + while buf[pos] == ' ': + inc(pos) + inc(indent) + if (col == indent) and (buf[pos] == '/') and (buf[pos + 1] == '/'): + add(tok.s, "\n") + else: + break + L.bufpos = pos + +proc scanStarComment(L: var TLexer, tok: var TToken) = + var pos = L.bufpos + var buf = L.buf + tok.s = "#" + tok.xkind = pxStarComment + while true: + case buf[pos] + of CR, LF: + pos = HandleCRLF(L, pos) + buf = L.buf + add(tok.s, "\n#") + # skip annoying stars as line prefix: (eg. + # /* + # * ugly comment <-- this star + # */ + while buf[pos] in {' ', '\t'}: + add(tok.s, ' ') + inc(pos) + if buf[pos] == '*' and buf[pos+1] != '/': inc(pos) + of '*': + inc(pos) + if buf[pos] == '/': + inc(pos) + break + else: + add(tok.s, '*') + of lexbase.EndOfFile: + lexMessage(L, errTokenExpected, "*/") + else: + add(tok.s, buf[pos]) + inc(pos) + L.bufpos = pos + +proc skip(L: var TLexer, tok: var TToken) = + var pos = L.bufpos + var buf = L.buf + while true: + case buf[pos] + of '\\': + # Ignore \ line continuation characters when not inDirective + inc(pos) + if L.inDirective: + while buf[pos] in {' ', '\t'}: inc(pos) + if buf[pos] in {CR, LF}: + pos = HandleCRLF(L, pos) + buf = L.buf + of ' ', Tabulator: + Inc(pos) # newline is special: + of CR, LF: + pos = HandleCRLF(L, pos) + buf = L.buf + if L.inDirective: + tok.xkind = pxNewLine + L.inDirective = false + else: + break # EndOfFile also leaves the loop + L.bufpos = pos + +proc getDirective(L: var TLexer, tok: var TToken) = + var pos = L.bufpos + 1 + var buf = L.buf + while buf[pos] in {' ', '\t'}: inc(pos) + while buf[pos] in SymChars: + add(tok.s, buf[pos]) + inc(pos) + # a HACK: we need to distinguish + # #define x (...) + # from: + # #define x(...) + # + L.bufpos = pos + # look ahead: + while buf[pos] in {' ', '\t'}: inc(pos) + while buf[pos] in SymChars: inc(pos) + if buf[pos] == '(': tok.xkind = pxDirectiveParLe + else: tok.xkind = pxDirective + L.inDirective = true + +proc getTok(L: var TLexer, tok: var TToken) = + tok.xkind = pxInvalid + fillToken(tok) + skip(L, tok) + if tok.xkind == pxNewLine: return + var c = L.buf[L.bufpos] + if c in SymStartChars: + getSymbol(L, tok) + elif c == '0': + case L.buf[L.bufpos+1] + of 'x', 'X': getNumber16(L, tok) + of 'b', 'B': getNumber2(L, tok) + of '1'..'7': getNumber8(L, tok) + else: getNumber(L, tok) + elif c in {'1'..'9'}: + getNumber(L, tok) + else: + case c + of ';': + tok.xkind = pxSemicolon + Inc(L.bufpos) + of '/': + if L.buf[L.bufpos + 1] == '/': + scanLineComment(L, tok) + elif L.buf[L.bufpos+1] == '*': + inc(L.bufpos, 2) + scanStarComment(L, tok) + elif L.buf[L.bufpos+1] == '=': + inc(L.bufpos, 2) + tok.xkind = pxSlashAsgn + else: + tok.xkind = pxSlash + inc(L.bufpos) + of ',': + tok.xkind = pxComma + Inc(L.bufpos) + of '(': + Inc(L.bufpos) + tok.xkind = pxParLe + of '*': + inc(L.bufpos) + if L.buf[L.bufpos] == '=': + inc(L.bufpos) + tok.xkind = pxStarAsgn + else: + tok.xkind = pxStar + of ')': + Inc(L.bufpos) + tok.xkind = pxParRi + of '[': + Inc(L.bufpos) + tok.xkind = pxBracketLe + of ']': + Inc(L.bufpos) + tok.xkind = pxBracketRi + of '.': + inc(L.bufpos) + if L.buf[L.bufpos] == '.' and L.buf[L.bufpos+1] == '.': + tok.xkind = pxDotDotDot + inc(L.bufpos, 2) + else: + tok.xkind = pxDot + of '{': + Inc(L.bufpos) + tok.xkind = pxCurlyLe + of '}': + Inc(L.bufpos) + tok.xkind = pxCurlyRi + of '+': + inc(L.bufpos) + if L.buf[L.bufpos] == '=': + tok.xkind = pxPlusAsgn + inc(L.bufpos) + elif L.buf[L.bufpos] == '+': + tok.xkind = pxPlusPlus + inc(L.bufpos) + else: + tok.xkind = pxPlus + of '-': + inc(L.bufpos) + case L.buf[L.bufpos] + of '>': + tok.xkind = pxArrow + inc(L.bufpos) + of '=': + tok.xkind = pxMinusAsgn + inc(L.bufpos) + of '-': + tok.xkind = pxMinusMinus + inc(L.bufpos) + else: + tok.xkind = pxMinus + of '?': + inc(L.bufpos) + tok.xkind = pxConditional + of ':': + inc(L.bufpos) + if L.buf[L.bufpos] == ':': + tok.xkind = pxScope + inc(L.bufpos) + else: + tok.xkind = pxColon + of '!': + inc(L.bufpos) + if L.buf[L.bufpos] == '=': + tok.xkind = pxNeq + inc(L.bufpos) + else: + tok.xkind = pxNot + of '<': + inc(L.bufpos) + if L.buf[L.bufpos] == '=': + inc(L.bufpos) + tok.xkind = pxLe + elif L.buf[L.bufpos] == '<': + inc(L.bufpos) + if L.buf[L.bufpos] == '=': + inc(L.bufpos) + tok.xkind = pxShlAsgn + else: + tok.xkind = pxShl + else: + tok.xkind = pxLt + of '>': + inc(L.bufpos) + if L.buf[L.bufpos] == '=': + inc(L.bufpos) + tok.xkind = pxGe + elif L.buf[L.bufpos] == '>': + inc(L.bufpos) + if L.buf[L.bufpos] == '=': + inc(L.bufpos) + tok.xkind = pxShrAsgn + else: + tok.xkind = pxShr + else: + tok.xkind = pxGt + of '=': + inc(L.bufpos) + if L.buf[L.bufpos] == '=': + tok.xkind = pxEquals + inc(L.bufpos) + else: + tok.xkind = pxAsgn + of '&': + inc(L.bufpos) + if L.buf[L.bufpos] == '=': + tok.xkind = pxAmpAsgn + inc(L.bufpos) + elif L.buf[L.bufpos] == '&': + inc(L.bufpos) + if L.buf[L.bufpos] == '=': + inc(L.bufpos) + tok.xkind = pxAmpAmpAsgn + else: + tok.xkind = pxAmpAmp + else: + tok.xkind = pxAmp + of '|': + inc(L.bufpos) + if L.buf[L.bufpos] == '=': + tok.xkind = pxBarAsgn + inc(L.bufpos) + elif L.buf[L.bufpos] == '|': + inc(L.bufpos) + if L.buf[L.bufpos] == '=': + inc(L.bufpos) + tok.xkind = pxBarBarAsgn + else: + tok.xkind = pxBarBar + else: + tok.xkind = pxBar + of '^': + inc(L.bufpos) + if L.buf[L.bufpos] == '=': + tok.xkind = pxHatAsgn + inc(L.bufpos) + else: + tok.xkind = pxHat + of '%': + inc(L.bufpos) + if L.buf[L.bufpos] == '=': + tok.xkind = pxModAsgn + inc(L.bufpos) + else: + tok.xkind = pxMod + of '~': + inc(L.bufpos) + if L.buf[L.bufpos] == '=': + tok.xkind = pxTildeAsgn + inc(L.bufpos) + else: + tok.xkind = pxTilde + of '#': + if L.buf[L.bufpos+1] == '#': + inc(L.bufpos, 2) + tok.xkind = pxDirConc + else: + getDirective(L, tok) + of '"': getString(L, tok) + of '\'': getCharLit(L, tok) + of lexbase.EndOfFile: + tok.xkind = pxEof + else: + tok.s = $c + tok.xkind = pxInvalid + lexMessage(L, errInvalidToken, c & " (\\" & $(ord(c)) & ')') + Inc(L.bufpos) diff --git a/rod/c2nim/cparse.nim b/rod/c2nim/cparse.nim new file mode 100755 index 000000000..b637bfa61 --- /dev/null +++ b/rod/c2nim/cparse.nim @@ -0,0 +1,1469 @@ +# +# +# c2nim - C to Nimrod source converter +# (c) Copyright 2010 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# This module implements an Ansi C parser. +# It transfers a C source file into a Nimrod AST. Then the renderer can be +# used to convert the AST to its text representation. + +# XXX standalone structs and unions! +# XXX header pragma for struct and union fields! +# XXX rewrite symbol export handling! + +import + os, llstream, rnimsyn, clex, idents, strutils, pegs, ast, astalgo, msgs, + options, strtabs + +type + TParserFlag* = enum + pfRefs, ## use "ref" instead of "ptr" for C's typ* + pfCDecl, ## annotate procs with cdecl + pfStdCall ## annotate procs with stdcall + + TParserOptions {.final.} = object + flags: set[TParserFlag] + prefixes, suffixes, skipWords: seq[string] + mangleRules: seq[tuple[pattern: TPeg, frmt: string]] + dynlibSym, header: string + PParserOptions* = ref TParserOptions + + TParser* {.final.} = object + lex: TLexer + tok: ref TToken # current token + options: PParserOptions + backtrack: seq[ref TToken] + inTypeDef: int + scopeCounter: int + + TReplaceTuple* = array[0..1, string] + +proc newParserOptions*(): PParserOptions = + new(result) + result.prefixes = @[] + result.suffixes = @[] + result.skipWords = @[] + result.mangleRules = @[] + result.flags = {} + result.dynlibSym = "" + result.header = "" + +proc setOption*(parserOptions: PParserOptions, key: string, val=""): bool = + result = true + case key + of "ref": incl(parserOptions.flags, pfRefs) + of "dynlib": parserOptions.dynlibSym = val + of "header": parserOptions.header = val + of "cdecl": incl(parserOptions.flags, pfCdecl) + of "stdcall": incl(parserOptions.flags, pfStdCall) + of "prefix": parserOptions.prefixes.add(val) + of "suffix": parserOptions.suffixes.add(val) + of "skip": parserOptions.skipWords.add(val) + else: result = false + +proc ParseUnit*(p: var TParser): PNode +proc openParser*(p: var TParser, filename: string, inputStream: PLLStream, + options = newParserOptions()) +proc closeParser*(p: var TParser) +proc exSymbol*(n: var PNode) +proc fixRecordDef*(n: var PNode) + # XXX: move these two to an auxiliary module + +# implementation + +proc OpenParser(p: var TParser, filename: string, + inputStream: PLLStream, options = newParserOptions()) = + OpenLexer(p.lex, filename, inputStream) + p.options = options + p.backtrack = @[] + new(p.tok) + +proc CloseParser(p: var TParser) = CloseLexer(p.lex) +proc safeContext(p: var TParser) = p.backtrack.add(p.tok) +proc closeContext(p: var TParser) = discard p.backtrack.pop() +proc backtrackContext(p: var TParser) = p.tok = p.backtrack.pop() + +proc rawGetTok(p: var TParser) = + if p.tok.next != nil: + p.tok = p.tok.next + elif p.backtrack.len == 0: + p.tok.next = nil + getTok(p.lex, p.tok^) + else: + # We need the next token and must be able to backtrack. So we need to + # allocate a new token. + var t: ref TToken + new(t) + getTok(p.lex, t^) + p.tok.next = t + p.tok = t + +proc isSkipWord(p: TParser): bool = + for s in items(p.options.skipWords): + if p.tok.s == s: return true + +proc getTok(p: var TParser) = + while true: + rawGetTok(p) + if p.tok.xkind != pxSymbol or not isSkipWord(p): break + +proc parMessage(p: TParser, msg: TMsgKind, arg = "") = + #assert false + lexMessage(p.lex, msg, arg) + +proc parLineInfo(p: TParser): TLineInfo = + result = getLineInfo(p.lex) + +proc skipCom(p: var TParser, n: PNode) = + while p.tok.xkind in {pxLineComment, pxStarComment}: + if (n != nil): + if n.comment == nil: n.comment = p.tok.s + else: add(n.comment, "\n" & p.tok.s) + else: + parMessage(p, warnCommentXIgnored, p.tok.s) + getTok(p) + +proc skipStarCom(p: var TParser, n: PNode) = + while p.tok.xkind == pxStarComment: + if (n != nil): + if n.comment == nil: n.comment = p.tok.s + else: add(n.comment, "\n" & p.tok.s) + else: + parMessage(p, warnCommentXIgnored, p.tok.s) + getTok(p) + +proc getTok(p: var TParser, n: PNode) = + getTok(p) + skipCom(p, n) + +proc ExpectIdent(p: TParser) = + if p.tok.xkind != pxSymbol: + parMessage(p, errIdentifierExpected, $(p.tok^)) + +proc Eat(p: var TParser, xkind: TTokKind, n: PNode) = + if p.tok.xkind == xkind: getTok(p, n) + else: parMessage(p, errTokenExpected, TokKindToStr(xkind)) + +proc Eat(p: var TParser, xkind: TTokKind) = + if p.tok.xkind == xkind: getTok(p) + else: parMessage(p, errTokenExpected, TokKindToStr(xkind)) + +proc Eat(p: var TParser, tok: string, n: PNode) = + if p.tok.s == tok: getTok(p, n) + else: parMessage(p, errTokenExpected, tok) + +proc Opt(p: var TParser, xkind: TTokKind, n: PNode) = + if p.tok.xkind == xkind: getTok(p, n) + +proc addSon(father, a, b: PNode) = + addSon(father, a) + addSon(father, b) + +proc addSon(father, a, b, c: PNode) = + addSon(father, a) + addSon(father, b) + addSon(father, c) + +proc newNodeP(kind: TNodeKind, p: TParser): PNode = + result = newNodeI(kind, getLineInfo(p.lex)) + +proc newIntNodeP(kind: TNodeKind, intVal: BiggestInt, p: TParser): PNode = + result = newNodeP(kind, p) + result.intVal = intVal + +proc newFloatNodeP(kind: TNodeKind, floatVal: BiggestFloat, + p: TParser): PNode = + result = newNodeP(kind, p) + result.floatVal = floatVal + +proc newStrNodeP(kind: TNodeKind, strVal: string, p: TParser): PNode = + result = newNodeP(kind, p) + result.strVal = strVal + +proc newIdentNodeP(ident: PIdent, p: TParser): PNode = + result = newNodeP(nkIdent, p) + result.ident = ident + +proc newIdentNodeP(ident: string, p: TParser): PNode = + result = newIdentNodeP(getIdent(ident), p) + +proc mangleName(s: string, p: TParser): string = + for pattern, frmt in items(p.options.mangleRules): + if s.match(pattern): + return s.replace(pattern, frmt) + block prefixes: + for prefix in items(p.options.prefixes): + if s.startsWith(prefix): + result = s.copy(prefix.len) + break prefixes + result = s + for suffix in items(p.options.suffixes): + if result.endsWith(suffix): + setLen(result, result.len - suffix.len) + break + +proc mangledIdent(ident: string, p: TParser): PNode = + result = newNodeP(nkIdent, p) + result.ident = getIdent(mangleName(ident, p)) + +proc newIdentPair(a, b: string, p: TParser): PNode = + result = newNodeP(nkExprColonExpr, p) + addSon(result, newIdentNodeP(a, p)) + addSon(result, newIdentNodeP(b, p)) + +proc newIdentStrLitPair(a, b: string, p: TParser): PNode = + result = newNodeP(nkExprColonExpr, p) + addSon(result, newIdentNodeP(a, p)) + addSon(result, newStrNodeP(nkStrLit, b, p)) + +proc addImportToPragma(pragmas: PNode, ident: string, p: TParser) = + addSon(pragmas, newIdentStrLitPair("importc", ident, p)) + if p.options.dynlibSym.len > 0: + addSon(pragmas, newIdentPair("dynlib", p.options.dynlibSym, p)) + else: + addSon(pragmas, newIdentStrLitPair("header", p.options.header, p)) + +proc mangledIdentAndImport(ident: string, p: TParser): PNode = + result = mangledIdent(ident, p) + if p.scopeCounter > 0: return + if p.options.dynlibSym.len > 0 or p.options.header.len > 0: + var a = result + result = newNodeP(nkPragmaExpr, p) + var pragmas = newNodeP(nkPragma, p) + addSon(result, a) + addSon(result, pragmas) + addImportToPragma(pragmas, ident, p) + +proc DoImport(ident: string, pragmas: PNode, p: TParser) = + if p.options.dynlibSym.len > 0 or p.options.header.len > 0: + addImportToPragma(pragmas, ident, p) + +proc newBinary(opr: string, a, b: PNode, p: TParser): PNode = + result = newNodeP(nkInfix, p) + addSon(result, newIdentNodeP(getIdent(opr), p)) + addSon(result, a) + addSon(result, b) + +# --------------- symbol exporter -------------------------------------------- + +proc identVis(p: var TParser): PNode = + # identifier with visability + var a = mangledIdent(p.tok.s, p) + result = newNodeP(nkPostfix, p) + addSon(result, newIdentNodeP("*", p)) + addSon(result, a) + getTok(p) + +proc exSymbol(n: var PNode) = + case n.kind + of nkPostfix: + nil + of nkPragmaExpr: + exSymbol(n.sons[0]) + of nkIdent, nkAccQuoted: + var a = newNodeI(nkPostFix, n.info) + addSon(a, newIdentNode(getIdent("*"), n.info)) + addSon(a, n) + n = a + else: internalError(n.info, "exSymbol(): " & $n.kind) + +proc fixRecordDef(n: var PNode) = + if n == nil: return + case n.kind + of nkRecCase: + fixRecordDef(n.sons[0]) + for i in countup(1, sonsLen(n) - 1): + var length = sonsLen(n.sons[i]) + fixRecordDef(n.sons[i].sons[length - 1]) + of nkRecList, nkRecWhen, nkElse, nkOfBranch, nkElifBranch, nkObjectTy: + for i in countup(0, sonsLen(n) - 1): fixRecordDef(n.sons[i]) + of nkIdentDefs: + for i in countup(0, sonsLen(n) - 3): exSymbol(n.sons[i]) + of nkNilLit: nil + else: internalError(n.info, "fixRecordDef(): " & $n.kind) + +proc addPragmaToIdent(ident: var PNode, pragma: PNode) = + var pragmasNode: PNode + if ident.kind != nkPragmaExpr: + pragmasNode = newNodeI(nkPragma, ident.info) + var e = newNodeI(nkPragmaExpr, ident.info) + addSon(e, ident) + addSon(e, pragmasNode) + ident = e + else: + pragmasNode = ident.sons[1] + if pragmasNode.kind != nkPragma: + InternalError(ident.info, "addPragmaToIdent") + addSon(pragmasNode, pragma) + +proc exSymbols(n: PNode) = + if n == nil: return + case n.kind + of nkEmpty..nkNilLit: nil + of nkProcDef..nkIteratorDef: exSymbol(n.sons[namePos]) + of nkWhenStmt: + for i in countup(0, sonsLen(n) - 1): exSymbols(lastSon(n.sons[i])) + of nkStmtList: + for i in countup(0, sonsLen(n) - 1): exSymbols(n.sons[i]) + of nkVarSection, nkConstSection: + for i in countup(0, sonsLen(n) - 1): exSymbol(n.sons[i].sons[0]) + of nkTypeSection: + for i in countup(0, sonsLen(n) - 1): + exSymbol(n.sons[i].sons[0]) + if (n.sons[i].sons[2] != nil) and + (n.sons[i].sons[2].kind == nkObjectTy): + fixRecordDef(n.sons[i].sons[2]) + else: nil + +# --------------- parser ----------------------------------------------------- +# We use this parsing rule: If it looks like a declaration, it is one. This +# avoids to build a symbol table, which can't be done reliably anyway for our +# purposes. + +proc expression(p: var TParser): PNode +proc constantExpression(p: var TParser): PNode +proc assignmentExpression(p: var TParser): PNode +proc compoundStatement(p: var TParser): PNode +proc statement(p: var TParser): PNode + +proc declKeyword(s: string): bool = + # returns true if it is a keyword that introduces a declaration + case s + of "extern", "static", "auto", "register", "const", "volatile", "restrict", + "inline", "__inline", "__cdecl", "__stdcall", "__syscall", "__fastcall", + "__safecall", "void", "struct", "union", "enum", "typedef", + "short", "int", "long", "float", "double", "signed", "unsigned", "char": + result = true + +proc stmtKeyword(s: string): bool = + case s + of "if", "for", "while", "do", "switch", "break", "continue", "return", + "goto": + result = true + +# ------------------- type desc ----------------------------------------------- + +proc skipIdent(p: var TParser): PNode = + expectIdent(p) + result = mangledIdent(p.tok.s, p) + getTok(p, result) + +proc isIntType(s: string): bool = + case s + of "short", "int", "long", "float", "double", "signed", "unsigned": + result = true + +proc skipConst(p: var TParser) = + while p.tok.xkind == pxSymbol and + (p.tok.s == "const" or p.tok.s == "volatile" or p.tok.s == "restrict"): + getTok(p, nil) + +proc typeAtom(p: var TParser): PNode = + if p.tok.xkind != pxSymbol: return nil + skipConst(p) + ExpectIdent(p) + case p.tok.s + of "void": + result = newNodeP(nkNilLit, p) # little hack + getTok(p, nil) + of "struct", "union", "enum": + getTok(p, nil) + result = skipIdent(p) + elif isIntType(p.tok.s): + var x = "c" & p.tok.s + getTok(p, nil) + while p.tok.xkind == pxSymbol and isIntType(p.tok.s): + add(x, p.tok.s) + getTok(p, nil) + result = newIdentNodeP(x, p) + else: + result = newIdentNodeP(p.tok.s, p) + getTok(p, result) + +proc newPointerTy(p: TParser, typ: PNode): PNode = + if pfRefs in p.options.flags: + result = newNodeP(nkRefTy, p) + else: + result = newNodeP(nkPtrTy, p) + result.addSon(typ) + +proc pointer(p: var TParser, a: PNode): PNode = + result = a + var i = 0 + skipConst(p) + while p.tok.xkind == pxStar: + inc(i) + getTok(p, result) + skipConst(p) + result = newPointerTy(p, result) + if a.kind == nkIdent and a.ident.s == "char": + if i >= 2: + result = newIdentNodeP("cstringArray", p) + for j in 1..i-2: result = newPointerTy(p, result) + elif i == 1: result = newIdentNodeP("cstring", p) + elif a.kind == nkNilLit and i > 0: + result = newIdentNodeP("pointer", p) + for j in 1..i-1: result = newPointerTy(p, result) + +proc parseTypeSuffix(p: var TParser, typ: PNode): PNode = + result = typ + while p.tok.xkind == pxBracketLe: + getTok(p, result) + skipConst(p) # POSIX contains: ``int [restrict]`` + if p.tok.xkind != pxBracketRi: + var tmp = result + var index = expression(p) + # array type: + result = newNodeP(nkBracketExpr, p) + addSon(result, newIdentNodeP("array", p)) + var r = newNodeP(nkRange, p) + addSon(r, newIntNodeP(nkIntLit, 0, p)) + addSon(r, newBinary("-", index, newIntNodeP(nkIntLit, 1, p), p)) + addSon(result, r) + addSon(result, tmp) + else: + # pointer type: + var tmp = result + if pfRefs in p.options.flags: + result = newNodeP(nkRefTy, p) + else: + result = newNodeP(nkPtrTy, p) + result.addSon(tmp) + eat(p, pxBracketRi, result) + +proc typeDesc(p: var TParser): PNode = + result = typeAtom(p) + if result != nil: + result = pointer(p, result) + +proc parseStructBody(p: var TParser): PNode = + result = newNodeP(nkRecList, p) + eat(p, pxCurlyLe, result) + while p.tok.xkind notin {pxEof, pxCurlyRi}: + var baseTyp = typeAtom(p) + while true: + var def = newNodeP(nkIdentDefs, p) + var t = pointer(p, baseTyp) + var i = skipIdent(p) + t = parseTypeSuffix(p, t) + addSon(def, i, t, nil) + addSon(result, def) + if p.tok.xkind != pxComma: break + getTok(p, def) + eat(p, pxSemicolon, lastSon(result)) + eat(p, pxCurlyRi, result) + +proc structPragmas(p: TParser, name: PNode): PNode = + result = newNodeP(nkPragmaExpr, p) + addson(result, name) + var pragmas = newNodep(nkPragma, p) + addSon(pragmas, newIdentNodeP("pure", p)) + addSon(pragmas, newIdentNodeP("final", p)) + addSon(result, pragmas) + +proc enumPragmas(p: TParser, name: PNode): PNode = + result = newNodeP(nkPragmaExpr, p) + addson(result, name) + var pragmas = newNodep(nkPragma, p) + var e = newNodeP(nkExprColonExpr, p) + addSon(e, newIdentNodeP("size", p)) + addSon(e, newIntNodeP(nkIntLit, 4, p)) + addSon(pragmas, e) + addSon(result, pragmas) + +proc parseStruct(p: var TParser): PNode = + result = newNodeP(nkObjectTy, p) + addSon(result, nil) # no pragmas + addSon(result, nil) # no inheritance + if p.tok.xkind == pxCurlyLe: + addSon(result, parseStructBody(p)) + else: + addSon(result, newNodeP(nkRecList, p)) + +proc parseParam(p: var TParser, params: PNode) = + var typ = typeDesc(p) + # support for ``(void)`` parameter list: + if typ.kind == nkNilLit and p.tok.xkind == pxParRi: return + var name: PNode + if p.tok.xkind == pxSymbol: + name = skipIdent(p) + else: + # generate a name for the formal parameter: + var idx = sonsLen(params)+1 + name = newIdentNodeP("a" & $idx, p) + typ = parseTypeSuffix(p, typ) + var x = newNodeP(nkIdentDefs, p) + addSon(x, name) + addSon(x, typ) + if p.tok.xkind == pxAsgn: + # we support default parameters for C++: + getTok(p, x) + addSon(x, assignmentExpression(p)) + else: + addSon(x, nil) + addSon(params, x) + +proc parseFormalParams(p: var TParser, params, pragmas: PNode) = + eat(p, pxParLe, params) + while p.tok.xkind notin {pxEof, pxParRi}: + if p.tok.xkind == pxDotDotDot: + addSon(pragmas, newIdentNodeP("varargs", p)) + getTok(p, pragmas) + break + parseParam(p, params) + if p.tok.xkind != pxComma: break + getTok(p, params) + eat(p, pxParRi, params) + +proc parseCallConv(p: var TParser, pragmas: PNode) = + while p.tok.xkind == pxSymbol: + case p.tok.s + of "inline", "__inline": addSon(pragmas, newIdentNodeP("inline", p)) + of "__cdecl": addSon(pragmas, newIdentNodeP("cdecl", p)) + of "__stdcall": addSon(pragmas, newIdentNodeP("stdcall", p)) + of "__syscall": addSon(pragmas, newIdentNodeP("syscall", p)) + of "__fastcall": addSon(pragmas, newIdentNodeP("fastcall", p)) + of "__safecall": addSon(pragmas, newIdentNodeP("safecall", p)) + else: break + getTok(p, nil) + +proc parseFunctionPointerDecl(p: var TParser, rettyp: PNode): PNode = + var procType = newNodeP(nkProcTy, p) + var pragmas = newNodeP(nkPragma, p) + if pfCDecl in p.options.flags: + addSon(pragmas, newIdentNodeP("cdecl", p)) + elif pfStdCall in p.options.flags: + addSon(pragmas, newIdentNodeP("stdcall", p)) + var params = newNodeP(nkFormalParams, p) + eat(p, pxParLe, params) + addSon(params, rettyp) + parseCallConv(p, pragmas) + if p.tok.xkind == pxStar: getTok(p, params) + else: parMessage(p, errTokenExpected, "*") + var name = skipIdent(p) + eat(p, pxParRi, name) + parseFormalParams(p, params, pragmas) + addSon(procType, params) + addSon(procType, pragmas) + + if p.inTypeDef == 0: + result = newNodeP(nkVarSection, p) + var def = newNodeP(nkIdentDefs, p) + addSon(def, name) + addSon(def, procType) + addSon(def, nil) + addSon(result, def) + else: + result = newNodeP(nkTypeDef, p) + addSon(result, name) + addSon(result, nil) # no generics + addSon(result, procType) + +proc addTypeDef(section, name, t: PNode) = + var def = newNodeI(nkTypeDef, name.info) + addSon(def, name, nil, t) + addSon(section, def) + +proc otherTypeDef(p: var TParser, section, typ: PNode) = + var name, t: PNode + case p.tok.xkind + of pxParLe: + # function pointer: typedef typ (*name)(); + getTok(p, nil) + var x = parseFunctionPointerDecl(p, typ) + name = x[0] + t = x[2] + of pxStar: + # typedef typ *b; + t = pointer(p, typ) + name = skipIdent(p) + else: + # typedef typ name; + name = skipIdent(p) + t = parseTypeSuffix(p, t) + addTypeDef(section, name, t) + +proc parseTrailingDefinedTypes(p: var TParser, section, typ: PNode) = + while p.tok.xkind == pxComma: + getTok(p, nil) + var newTyp = pointer(p, typ) + var newName = skipIdent(p) + newTyp = parseTypeSuffix(p, newTyp) + addTypeDef(section, newName, newTyp) + +proc enumFields(p: var TParser): PNode = + result = newNodeP(nkEnumTy, p) + addSon(result, nil) # enum does not inherit from anything + while true: + var e = skipIdent(p) + if p.tok.xkind == pxAsgn: + getTok(p, e) + var c = constantExpression(p) + var a = e + e = newNodeP(nkEnumFieldDef, p) + addSon(e, a) + addSon(e, c) + skipCom(p, e) + + addSon(result, e) + if p.tok.xkind != pxComma: break + getTok(p, e) + +proc parseTypeDef(p: var TParser): PNode = + result = newNodeP(nkTypeSection, p) + while p.tok.xkind == pxSymbol and p.tok.s == "typedef": + getTok(p, result) + inc(p.inTypeDef) + expectIdent(p) + case p.tok.s + of "struct", "union": + getTok(p, result) + if p.tok.xkind == pxCurlyLe: + var t = parseStruct(p) + var name = skipIdent(p) + addTypeDef(result, structPragmas(p, name), t) + parseTrailingDefinedTypes(p, result, name) + elif p.tok.xkind == pxSymbol: + # name to be defined or type "struct a", we don't know yet: + var nameOrType = skipIdent(p) + case p.tok.xkind + of pxCurlyLe: + var t = parseStruct(p) + if p.tok.xkind == pxSymbol: + # typedef struct tagABC {} abc, *pabc; + # --> abc is a better type name than tagABC! + var name = skipIdent(p) + addTypeDef(result, structPragmas(p, name), t) + parseTrailingDefinedTypes(p, result, name) + else: + addTypeDef(result, structPragmas(p, nameOrType), t) + of pxSymbol: + # typedef struct a a? + if mangleName(p.tok.s, p) == nameOrType.ident.s: + # ignore the declaration: + getTok(p, nil) + else: + # typedef struct a b; or typedef struct a b[45]; + otherTypeDef(p, result, nameOrType) + else: + otherTypeDef(p, result, nameOrType) + else: + expectIdent(p) + of "enum": + getTok(p, result) + if p.tok.xkind == pxCurlyLe: + getTok(p, result) + var t = enumFields(p) + eat(p, pxCurlyRi, t) + var name = skipIdent(p) + addTypeDef(result, enumPragmas(p, name), t) + parseTrailingDefinedTypes(p, result, name) + elif p.tok.xkind == pxSymbol: + # name to be defined or type "enum a", we don't know yet: + var nameOrType = skipIdent(p) + case p.tok.xkind + of pxCurlyLe: + getTok(p, result) + var t = enumFields(p) + eat(p, pxCurlyRi, t) + if p.tok.xkind == pxSymbol: + # typedef enum tagABC {} abc, *pabc; + # --> abc is a better type name than tagABC! + var name = skipIdent(p) + addTypeDef(result, enumPragmas(p, name), t) + parseTrailingDefinedTypes(p, result, name) + else: + addTypeDef(result, enumPragmas(p, nameOrType), t) + of pxSymbol: + # typedef enum a a? + if mangleName(p.tok.s, p) == nameOrType.ident.s: + # ignore the declaration: + getTok(p, nil) + else: + # typedef enum a b; or typedef enum a b[45]; + otherTypeDef(p, result, nameOrType) + else: + otherTypeDef(p, result, nameOrType) + else: + expectIdent(p) + else: + var t = typeAtom(p) + otherTypeDef(p, result, t) + + eat(p, pxSemicolon) + dec(p.inTypeDef) + +proc skipDeclarationSpecifiers(p: var TParser) = + while p.tok.xkind == pxSymbol: + case p.tok.s + of "extern", "static", "auto", "register", "const", "volatile": + getTok(p, nil) + else: break + +proc parseInitializer(p: var TParser): PNode = + if p.tok.xkind == pxCurlyLe: + result = newNodeP(nkBracket, p) + getTok(p, result) + while p.tok.xkind notin {pxEof, pxCurlyRi}: + addSon(result, parseInitializer(p)) + opt(p, pxComma, nil) + eat(p, pxCurlyRi, result) + else: + result = assignmentExpression(p) + +proc addInitializer(p: var TParser, def: PNode) = + if p.tok.xkind == pxAsgn: + getTok(p, def) + addSon(def, parseInitializer(p)) + else: + addSon(def, nil) + +proc parseVarDecl(p: var TParser, baseTyp, typ: PNode, + origName: string): PNode = + result = newNodeP(nkVarSection, p) + var def = newNodeP(nkIdentDefs, p) + addSon(def, mangledIdentAndImport(origName, p)) + addSon(def, parseTypeSuffix(p, typ)) + addInitializer(p, def) + addSon(result, def) + + while p.tok.xkind == pxComma: + getTok(p, def) + var t = pointer(p, baseTyp) + expectIdent(p) + def = newNodeP(nkIdentDefs, p) + addSon(def, mangledIdentAndImport(p.tok.s, p)) + getTok(p, def) + addSon(def, parseTypeSuffix(p, t)) + addInitializer(p, def) + addSon(result, def) + eat(p, pxSemicolon, result) + +proc declaration(p: var TParser): PNode = + result = newNodeP(nkProcDef, p) + var pragmas = newNodeP(nkPragma, p) + + skipDeclarationSpecifiers(p) + parseCallConv(p, pragmas) + skipDeclarationSpecifiers(p) + expectIdent(p) + var baseTyp = typeAtom(p) + var rettyp = pointer(p, baseTyp) + if rettyp != nil and rettyp.kind == nkNilLit: rettyp = nil + skipDeclarationSpecifiers(p) + parseCallConv(p, pragmas) + skipDeclarationSpecifiers(p) + + if p.tok.xkind == pxParLe: + # Function pointer declaration: This is of course only a heuristic, but the + # best we can do here. + result = parseFunctionPointerDecl(p, rettyp) + eat(p, pxSemicolon, result) + return + ExpectIdent(p) + var origName = p.tok.s + getTok(p) # skip identifier + case p.tok.xkind + of pxParLe: + # really a function! + var name = mangledIdent(origName, p) + var params = newNodeP(nkFormalParams, p) + addSon(params, rettyp) + parseFormalParams(p, params, pragmas) + + if pfCDecl in p.options.flags: + addSon(pragmas, newIdentNodeP("cdecl", p)) + elif pfStdcall in p.options.flags: + addSon(pragmas, newIdentNodeP("stdcall", p)) + addSon(result, name) + addSon(result, nil) # no generics + addSon(result, params) + addSon(result, pragmas) + case p.tok.xkind + of pxSemicolon: + getTok(p) + addSon(result, nil) # nobody + if p.scopeCounter == 0: DoImport(origName, pragmas, p) + of pxCurlyLe: + addSon(result, compoundStatement(p)) + else: + parMessage(p, errTokenExpected, ";") + if sonsLen(result.sons[pragmasPos]) == 0: result.sons[pragmasPos] = nil + of pxAsgn, pxSemicolon, pxComma: + result = parseVarDecl(p, baseTyp, rettyp, origName) + else: + parMessage(p, errTokenExpected, ";") + +proc createConst(name, typ, val: PNode, p: TParser): PNode = + result = newNodeP(nkConstDef, p) + addSon(result, name, typ, val) + +proc enumSpecifier(p: var TParser): PNode = + getTok(p, nil) # skip "enum" + case p.tok.xkind + of pxCurlyLe: + # make a const section out of it: + result = newNodeP(nkConstSection, p) + getTok(p, result) + var i = 0 + while true: + var name = skipIdent(p) + var val: PNode + if p.tok.xkind == pxAsgn: + getTok(p, name) + val = constantExpression(p) + if val.kind == nkIntLit: i = int(val.intVal)+1 + else: parMessage(p, errXExpected, "int literal") + else: + val = newIntNodeP(nkIntLit, i, p) + inc(i) + var c = createConst(name, nil, val, p) + addSon(result, c) + if p.tok.xkind != pxComma: break + getTok(p, c) + eat(p, pxCurlyRi, result) + eat(p, pxSemicolon) + of pxSymbol: + result = skipIdent(p) + if p.tok.xkind == pxCurlyLe: + var name = result + # create a type section containing the enum + result = newNodeP(nkTypeSection, p) + var t = newNodeP(nkTypeDef, p) + getTok(p, t) + var e = enumFields(p) + addSon(t, name, nil, e) # nil for generic params + addSon(result, t) + else: + parMessage(p, errTokenExpected, "{") + +# Expressions + +proc setBaseFlags(n: PNode, base: TNumericalBase) = + case base + of base10: nil + of base2: incl(n.flags, nfBase2) + of base8: incl(n.flags, nfBase8) + of base16: incl(n.flags, nfBase16) + +proc primaryExpression(p: var TParser): PNode = + case p.tok.xkind + of pxSymbol: + if p.tok.s == "NULL": + result = newNodeP(nkNilLit, p) + else: + result = mangledIdent(p.tok.s, p) + getTok(p, result) + of pxIntLit: + result = newIntNodeP(nkIntLit, p.tok.iNumber, p) + setBaseFlags(result, p.tok.base) + getTok(p, result) + of pxInt64Lit: + result = newIntNodeP(nkInt64Lit, p.tok.iNumber, p) + setBaseFlags(result, p.tok.base) + getTok(p, result) + of pxFloatLit: + result = newFloatNodeP(nkFloatLit, p.tok.fNumber, p) + setBaseFlags(result, p.tok.base) + getTok(p, result) + of pxStrLit: + # Ansi C allows implicit string literal concatenations: + result = newStrNodeP(nkStrLit, p.tok.s, p) + getTok(p, result) + while p.tok.xkind == pxStrLit: + add(result.strVal, p.tok.s) + getTok(p, result) + of pxCharLit: + result = newIntNodeP(nkCharLit, ord(p.tok.s[0]), p) + getTok(p, result) + of pxParLe: + result = newNodeP(nkPar, p) + getTok(p, result) + addSon(result, expression(p)) + eat(p, pxParRi, result) + else: + result = nil + +proc unaryExpression(p: var TParser): PNode +proc castExpression(p: var TParser): PNode = + if p.tok.xkind == pxParLe: + SafeContext(p) + result = newNodeP(nkCast, p) + getTok(p, result) + var a = typeDesc(p) + if a != nil and p.tok.xkind == pxParRi: + closeContext(p) + eat(p, pxParRi, result) + addSon(result, a) + addSon(result, castExpression(p)) + else: + backtrackContext(p) + result = unaryExpression(p) + else: + result = unaryExpression(p) + +proc multiplicativeExpression(p: var TParser): PNode = + result = castExpression(p) + while true: + case p.tok.xkind + of pxStar: + var a = result + result = newNodeP(nkInfix, p) + addSon(result, newIdentNodeP("*", p), a) + getTok(p, result) + var b = castExpression(p) + addSon(result, b) + of pxSlash: + var a = result + result = newNodeP(nkInfix, p) + addSon(result, newIdentNodeP("div", p), a) + getTok(p, result) + var b = castExpression(p) + addSon(result, b) + of pxMod: + var a = result + result = newNodeP(nkInfix, p) + addSon(result, newIdentNodeP("mod", p), a) + getTok(p, result) + var b = castExpression(p) + addSon(result, b) + else: break + +proc additiveExpression(p: var TParser): PNode = + result = multiplicativeExpression(p) + while true: + case p.tok.xkind + of pxPlus: + var a = result + result = newNodeP(nkInfix, p) + addSon(result, newIdentNodeP("+", p), a) + getTok(p, result) + var b = multiplicativeExpression(p) + addSon(result, b) + of pxMinus: + var a = result + result = newNodeP(nkInfix, p) + addSon(result, newIdentNodeP("-", p), a) + getTok(p, result) + var b = multiplicativeExpression(p) + addSon(result, b) + else: break + +proc incdec(p: var TParser, opr: string): PNode = + result = newNodeP(nkCall, p) + addSon(result, newIdentNodeP(opr, p)) + gettok(p, result) + addSon(result, unaryExpression(p)) + +proc unaryOp(p: var TParser, kind: TNodeKind): PNode = + result = newNodeP(kind, p) + getTok(p, result) + addSon(result, castExpression(p)) + +proc prefixCall(p: var TParser, opr: string): PNode = + result = newNodeP(nkPrefix, p) + addSon(result, newIdentNodeP(opr, p)) + gettok(p, result) + addSon(result, castExpression(p)) + +proc postfixExpression(p: var TParser): PNode = + result = primaryExpression(p) + while true: + case p.tok.xkind + of pxBracketLe: + var a = result + result = newNodeP(nkBracketExpr, p) + addSon(result, a) + getTok(p, result) + var b = expression(p) + addSon(result, b) + eat(p, pxBracketRi, result) + of pxParLe: + var a = result + result = newNodeP(nkCall, p) + addSon(result, a) + getTok(p, result) + if p.tok.xkind != pxParRi: + a = assignmentExpression(p) + addSon(result, a) + while p.tok.xkind == pxComma: + getTok(p, a) + a = assignmentExpression(p) + addSon(result, a) + eat(p, pxParRi, result) + of pxDot, pxArrow: + var a = result + result = newNodeP(nkDotExpr, p) + addSon(result, a) + getTok(p, result) + addSon(result, skipIdent(p)) + of pxPlusPlus: + var a = result + result = newNodeP(nkCall, p) + addSon(result, newIdentNodeP("inc", p)) + gettok(p, result) + addSon(result, a) + of pxMinusMinus: + var a = result + result = newNodeP(nkCall, p) + addSon(result, newIdentNodeP("dec", p)) + gettok(p, result) + addSon(result, a) + else: break + +proc unaryExpression(p: var TParser): PNode = + case p.tok.xkind + of pxPlusPlus: result = incdec(p, "inc") + of pxMinusMinus: result = incdec(p, "dec") + of pxAmp: result = unaryOp(p, nkAddr) + of pxStar: result = unaryOp(p, nkDerefExpr) + of pxPlus: result = prefixCall(p, "+") + of pxMinus: result = prefixCall(p, "-") + of pxTilde: result = prefixCall(p, "not") + of pxNot: result = prefixCall(p, "not") + of pxSymbol: + if p.tok.s == "sizeof": + result = newNodeP(nkCall, p) + addSon(result, newIdentNodeP("sizeof", p)) + getTok(p, result) + if p.tok.xkind == pxParLe: + getTok(p, result) + addson(result, typeDesc(p)) + eat(p, pxParRi, result) + else: + addSon(result, unaryExpression(p)) + else: + result = postfixExpression(p) + else: result = postfixExpression(p) + +proc expression(p: var TParser): PNode = + # we cannot support C's ``,`` operator + result = assignmentExpression(p) + if p.tok.xkind == pxComma: + getTok(p, result) + parMessage(p, errOperatorExpected, ",") + +proc conditionalExpression(p: var TParser): PNode + +proc constantExpression(p: var TParser): PNode = + result = conditionalExpression(p) + +proc lvalue(p: var TParser): PNode = + result = unaryExpression(p) + +proc asgnExpr(p: var TParser, opr: string, a: PNode): PNode = + closeContext(p) + getTok(p, a) + var b = assignmentExpression(p) + result = newNodeP(nkAsgn, p) + addSon(result, a) + addSon(result, newBinary(opr, copyTree(a), b, p)) + +proc incdec(p: var TParser, opr: string, a: PNode): PNode = + closeContext(p) + getTok(p, a) + var b = assignmentExpression(p) + result = newNodeP(nkCall, p) + addSon(result, newIdentNodeP(getIdent(opr), p)) + addSon(result, a) + addSon(result, b) + +proc assignmentExpression(p: var TParser): PNode = + safeContext(p) + var a = lvalue(p) + case p.tok.xkind + of pxAsgn: + closeContext(p) + getTok(p, a) + var b = assignmentExpression(p) + result = newNodeP(nkAsgn, p) + addSon(result, a) + addSon(result, b) + of pxPlusAsgn: result = incDec(p, "inc", a) + of pxMinusAsgn: result = incDec(p, "dec", a) + of pxStarAsgn: result = asgnExpr(p, "*", a) + of pxSlashAsgn: result = asgnExpr(p, "/", a) + of pxModAsgn: result = asgnExpr(p, "mod", a) + of pxShlAsgn: result = asgnExpr(p, "shl", a) + of pxShrAsgn: result = asgnExpr(p, "shr", a) + of pxAmpAsgn: result = asgnExpr(p, "and", a) + of pxHatAsgn: result = asgnExpr(p, "xor", a) + of pxBarAsgn: result = asgnExpr(p, "or", a) + else: + backtrackContext(p) + result = conditionalExpression(p) + +proc shiftExpression(p: var TParser): PNode = + result = additiveExpression(p) + while p.tok.xkind in {pxShl, pxShr}: + var op = if p.tok.xkind == pxShl: "shl" else: "shr" + getTok(p, result) + var a = result + var b = additiveExpression(p) + result = newBinary(op, a, b, p) + +proc relationalExpression(p: var TParser): PNode = + result = shiftExpression(p) + # Nimrod uses ``<`` and ``<=``, etc. too: + while p.tok.xkind in {pxLt, pxLe, pxGt, pxGe}: + var op = TokKindToStr(p.tok.xkind) + getTok(p, result) + var a = result + var b = shiftExpression(p) + result = newBinary(op, a, b, p) + +proc equalityExpression(p: var TParser): PNode = + result = relationalExpression(p) + # Nimrod uses ``==`` and ``!=`` too: + while p.tok.xkind in {pxEquals, pxNeq}: + var op = TokKindToStr(p.tok.xkind) + getTok(p, result) + var a = result + var b = relationalExpression(p) + result = newBinary(op, a, b, p) + +proc andExpression(p: var TParser): PNode = + result = equalityExpression(p) + while p.tok.xkind == pxAmp: + getTok(p, result) + var a = result + var b = equalityExpression(p) + result = newBinary("&", a, b, p) + +proc exclusiveOrExpression(p: var TParser): PNode = + result = andExpression(p) + while p.tok.xkind == pxHat: + getTok(p, result) + var a = result + var b = andExpression(p) + result = newBinary("^", a, b, p) + +proc inclusiveOrExpression(p: var TParser): PNode = + result = exclusiveOrExpression(p) + while p.tok.xkind == pxBar: + getTok(p, result) + var a = result + var b = exclusiveOrExpression(p) + result = newBinary("or", a, b, p) + +proc logicalAndExpression(p: var TParser): PNode = + result = inclusiveOrExpression(p) + while p.tok.xkind == pxAmpAmp: + getTok(p, result) + var a = result + var b = inclusiveOrExpression(p) + result = newBinary("and", a, b, p) + +proc logicalOrExpression(p: var TParser): PNode = + result = logicalAndExpression(p) + while p.tok.xkind == pxBarBar: + getTok(p, result) + var a = result + var b = logicalAndExpression(p) + result = newBinary("or", a, b, p) + +proc conditionalExpression(p: var TParser): PNode = + result = logicalOrExpression(p) + if p.tok.xkind == pxConditional: + getTok(p, result) # skip '?' + var a = result + var b = expression(p) + eat(p, pxColon, b) + var c = conditionalExpression(p) + result = newNodeP(nkIfExpr, p) + var branch = newNodeP(nkElifExpr, p) + addSon(branch, a) + addSon(branch, b) + addSon(result, branch) + branch = newNodeP(nkElseExpr, p) + addSon(branch, c) + addSon(result, branch) + +# Statements + +proc buildStmtList(a: PNode): PNode = + if a.kind == nkStmtList: result = a + else: + result = newNodeI(nkStmtList, a.info) + addSon(result, a) + +proc nestedStatement(p: var TParser): PNode = + # careful: We need to translate: + # if (x) if (y) stmt; + # into: + # if x: + # if x: + # stmt + # + # Nimrod requires complex statements to be nested in whitespace! + const + complexStmt = {nkProcDef, nkMethodDef, nkConverterDef, nkMacroDef, + nkTemplateDef, nkIteratorDef, nkMacroStmt, nkIfStmt, + nkWhenStmt, nkForStmt, nkWhileStmt, nkCaseStmt, nkVarSection, + nkConstSection, nkTypeSection, nkTryStmt, nkBlockStmt, nkStmtList, + nkCommentStmt, nkStmtListExpr, nkBlockExpr, nkStmtListType, nkBlockType} + result = statement(p) + if result.kind in complexStmt: + result = buildStmtList(result) + +proc expressionStatement(p: var TParser): PNode = + # do not skip the comment after a semicolon to make a new nkCommentStmt + if p.tok.xkind == pxSemicolon: + getTok(p) + else: + result = expression(p) + if p.tok.xkind == pxSemicolon: getTok(p) + else: parMessage(p, errTokenExpected, ";") + +proc parseIf(p: var TParser): PNode = + # we parse additional "else if"s too here for better Nimrod code + result = newNodeP(nkIfStmt, p) + while true: + getTok(p) # skip ``if`` + var branch = newNodeP(nkElifBranch, p) + skipCom(p, branch) + eat(p, pxParLe, branch) + addSon(branch, expression(p)) + eat(p, pxParRi, branch) + addSon(branch, nestedStatement(p)) + addSon(result, branch) + if p.tok.s == "else": + getTok(p, result) + if p.tok.s != "if": + # ordinary else part: + branch = newNodeP(nkElse, p) + addSon(branch, nestedStatement(p)) + addSon(result, branch) + break + else: + break + +proc parseWhile(p: var TParser): PNode = + result = newNodeP(nkWhileStmt, p) + getTok(p, result) + eat(p, pxParLe, result) + addSon(result, expression(p)) + eat(p, pxParRi, result) + addSon(result, nestedStatement(p)) + +proc parseDoWhile(p: var TParser): PNode = + # we only support ``do stmt while (0)`` as an idiom for + # ``block: stmt`` + result = newNodeP(nkBlockStmt, p) + getTok(p, result) # skip "do" + addSon(result, nil, nestedStatement(p)) + eat(p, "while", result) + eat(p, pxParLe, result) + if p.tok.xkind == pxIntLit and p.tok.iNumber == 0: getTok(p, result) + else: parMessage(p, errTokenExpected, "0") + eat(p, pxParRi, result) + if p.tok.xkind == pxSemicolon: getTok(p) + +proc declarationOrStatement(p: var TParser): PNode = + if p.tok.xkind != pxSymbol: + result = expressionStatement(p) + elif declKeyword(p.tok.s): + result = declaration(p) + else: + # ordinary identifier: + safeContext(p) + getTok(p) # skip identifier to look ahead + case p.tok.xkind + of pxSymbol, pxStar: + # we parse + # a b + # a * b + # always as declarations! This is of course not correct, but good + # enough for most real world C code out there. + backtrackContext(p) + result = declaration(p) + of pxColon: + # it is only a label: + closeContext(p) + getTok(p) + result = statement(p) + else: + backtrackContext(p) + result = expressionStatement(p) + +proc parseFor(p: var TParser, result: PNode) = + # 'for' '(' expression_statement expression_statement expression? ')' + # statement + getTok(p, result) + eat(p, pxParLe, result) + var initStmt = declarationOrStatement(p) + addSonIfNotNil(result, initStmt) + var w = newNodeP(nkWhileStmt, p) + var condition = expressionStatement(p) + if condition == nil: condition = newIdentNodeP("true", p) + addSon(w, condition) + var step = if p.tok.xkind != pxParRi: expression(p) else: nil + eat(p, pxParRi, step) + var loopBody = nestedStatement(p) + if step != nil: + loopBody = buildStmtList(loopBody) + addSon(loopBody, step) + addSon(w, loopBody) + addSon(result, w) + +proc switchStatement(p: var TParser): PNode = + result = newNodeP(nkStmtList, p) + while true: + if p.tok.xkind in {pxEof, pxCurlyRi}: break + case p.tok.s + of "break": + getTok(p, result) + eat(p, pxSemicolon, result) + break + of "return", "continue", "goto": + addSon(result, statement(p)) + break + of "case", "default": + break + else: nil + addSon(result, statement(p)) + if sonsLen(result) == 0: + # translate empty statement list to Nimrod's ``nil`` statement + result = newNodeP(nkNilLit, p) + +proc rangeExpression(p: var TParser): PNode = + # We support GCC's extension: ``case expr...expr:`` + result = constantExpression(p) + if p.tok.xkind == pxDotDotDot: + getTok(p, result) + var a = result + var b = constantExpression(p) + result = newNodeP(nkRange, p) + addSon(result, a) + addSon(result, b) + +proc parseSwitch(p: var TParser): PNode = + # We cannot support Duff's device or C's crazy switch syntax. We just support + # sane usages of switch. ;-) + result = newNodeP(nkCaseStmt, p) + getTok(p, result) + eat(p, pxParLe, result) + addSon(result, expression(p)) + eat(p, pxParRi, result) + eat(p, pxCurlyLe, result) + var b: PNode + while (p.tok.xkind != pxCurlyRi) and (p.tok.xkind != pxEof): + case p.tok.s + of "default": + b = newNodeP(nkElse, p) + getTok(p, b) + eat(p, pxColon, b) + of "case": + b = newNodeP(nkOfBranch, p) + while p.tok.xkind == pxSymbol and p.tok.s == "case": + getTok(p, b) + addSon(b, rangeExpression(p)) + eat(p, pxColon, b) + else: + parMessage(p, errXExpected, "case") + addSon(b, switchStatement(p)) + addSon(result, b) + if b.kind == nkElse: break + eat(p, pxCurlyRi) + +proc embedStmts(sl, a: PNode) = + if a.kind != nkStmtList: + addSon(sl, a) + else: + for i in 0..sonsLen(a)-1: addSon(sl, a[i]) + +proc compoundStatement(p: var TParser): PNode = + result = newNodeP(nkStmtList, p) + eat(p, pxCurlyLe) + inc(p.scopeCounter) + while p.tok.xkind notin {pxEof, pxCurlyRi}: + var a = statement(p) + if a == nil: break + embedStmts(result, a) + if sonsLen(result) == 0: + # translate ``{}`` to Nimrod's ``nil`` statement + result = newNodeP(nkNilLit, p) + dec(p.scopeCounter) + eat(p, pxCurlyRi) + +include cpp + +proc statement(p: var TParser): PNode = + case p.tok.xkind + of pxSymbol: + case p.tok.s + of "if": result = parseIf(p) + of "switch": result = parseSwitch(p) + of "while": result = parseWhile(p) + of "do": result = parseDoWhile(p) + of "for": + result = newNodeP(nkStmtList, p) + parseFor(p, result) + of "goto": + # we cannot support "goto"; in hand-written C, "goto" is most often used + # to break a block, so we convert it to a break statement with label. + result = newNodeP(nkBreakStmt, p) + getTok(p) + addSon(result, skipIdent(p)) + eat(p, pxSemicolon) + of "continue": + result = newNodeP(nkContinueStmt, p) + getTok(p) + eat(p, pxSemicolon) + addSon(result, nil) + of "break": + result = newNodeP(nkBreakStmt, p) + getTok(p) + eat(p, pxSemicolon) + addSon(result, nil) + of "return": + result = newNodeP(nkReturnStmt, p) + getTok(p) + # special case for ``return (expr)`` because I hate the redundant + # parenthesis ;-) + if p.tok.xkind == pxParLe: + getTok(p, result) + addSon(result, expression(p)) + eat(p, pxParRi, result) + elif p.tok.xkind != pxSemicolon: + addSon(result, expression(p)) + else: + addSon(result, nil) + eat(p, pxSemicolon) + of "enum": + result = enumSpecifier(p) + of "typedef": + result = parseTypeDef(p) + else: + result = declarationOrStatement(p) + of pxCurlyLe: + result = compoundStatement(p) + of pxDirective, pxDirectiveParLe: + result = parseDir(p) + of pxLineComment, pxStarComment: + result = newNodeP(nkCommentStmt, p) + skipCom(p, result) + of pxSemicolon: + # empty statement: + getTok(p) + if p.tok.xkind in {pxLineComment, pxStarComment}: + result = newNodeP(nkCommentStmt, p) + skipCom(p, result) + else: + result = newNodeP(nkNilLit, p) + else: + result = expressionStatement(p) + #parMessage(p, errStmtExpected) + +proc parseUnit(p: var TParser): PNode = + result = newNodeP(nkStmtList, p) + getTok(p) # read first token + while p.tok.xkind != pxEof: + var s = statement(p) + if s != nil: embedStmts(result, s) + exSymbols(result) + diff --git a/rod/c2nim/cpp.nim b/rod/c2nim/cpp.nim new file mode 100755 index 000000000..61873628e --- /dev/null +++ b/rod/c2nim/cpp.nim @@ -0,0 +1,231 @@ +# Preprocessor support + +const + c2nimSymbol = "C2NIM" + +proc eatNewLine(p: var TParser, n: PNode) = + if p.tok.xkind == pxLineComment: + skipCom(p, n) + if p.tok.xkind == pxNewLine: getTok(p) + else: + eat(p, pxNewLine) + +proc parseDefineBody(p: var TParser, tmplDef: PNode): string = + if p.tok.xkind == pxCurlyLe or + (p.tok.xkind == pxSymbol and (declKeyword(p.tok.s) or stmtKeyword(p.tok.s))): + addSon(tmplDef, statement(p)) + result = "stmt" + elif p.tok.xkind in {pxLineComment, pxNewLine}: + addSon(tmplDef, buildStmtList(newNodeP(nkNilLit, p))) + result = "stmt" + else: + addSon(tmplDef, buildStmtList(expression(p))) + result = "expr" + +proc parseDefine(p: var TParser): PNode = + if p.tok.xkind == pxDirectiveParLe: + # a macro with parameters: + result = newNodeP(nkTemplateDef, p) + getTok(p) + addSon(result, skipIdent(p)) + eat(p, pxParLe) + var params = newNodeP(nkFormalParams, p) + # return type; not known yet: + addSon(params, nil) + var identDefs = newNodeP(nkIdentDefs, p) + while p.tok.xkind != pxParRi: + addSon(identDefs, skipIdent(p)) + skipStarCom(p, nil) + if p.tok.xkind != pxComma: break + getTok(p) + addSon(identDefs, newIdentNodeP("expr", p)) + addSon(identDefs, nil) + addSon(params, identDefs) + eat(p, pxParRi) + + addSon(result, nil) # no generic parameters + addSon(result, params) + addSon(result, nil) # no pragmas + var kind = parseDefineBody(p, result) + params.sons[0] = newIdentNodeP(kind, p) + eatNewLine(p, result) + else: + # a macro without parameters: + result = newNodeP(nkConstSection, p) + while p.tok.xkind == pxDirective and p.tok.s == "define": + getTok(p) # skip #define + var c = newNodeP(nkConstDef, p) + addSon(c, skipIdent(p)) + addSon(c, nil) + skipStarCom(p, c) + if p.tok.xkind in {pxLineComment, pxNewLine, pxEof}: + addSon(c, newIdentNodeP("true", p)) + else: + addSon(c, expression(p)) + addSon(result, c) + eatNewLine(p, c) + +proc isDir(p: TParser, dir: string): bool = + result = p.tok.xkind in {pxDirectiveParLe, pxDirective} and p.tok.s == dir + +proc parseInclude(p: var TParser): PNode = + result = newNodeP(nkImportStmt, p) + while isDir(p, "include"): + getTok(p) # skip "include" + if p.tok.xkind == pxStrLit: + var file = newStrNodeP(nkStrLit, p.tok.s, p) + addSon(result, file) + getTok(p) + skipStarCom(p, file) + elif p.tok.xkind == pxLt: + while p.tok.xkind notin {pxEof, pxNewLine, pxLineComment}: getTok(p) + else: + parMessage(p, errXExpected, "string literal") + eatNewLine(p, nil) + if sonsLen(result) == 0: + # we only parsed includes that we chose to ignore: + result = nil + +proc definedExprAux(p: var TParser): PNode = + result = newNodeP(nkCall, p) + addSon(result, newIdentNodeP("defined", p)) + addSon(result, skipIdent(p)) + +proc parseStmtList(p: var TParser): PNode = + result = newNodeP(nkStmtList, p) + while true: + case p.tok.xkind + of pxEof: break + of pxDirectiveParLe, pxDirective: + case p.tok.s + of "else", "endif", "elif": break + else: nil + addSon(result, statement(p)) + +proc parseIfDirAux(p: var TParser, result: PNode) = + addSon(result.sons[0], parseStmtList(p)) + while isDir(p, "elif"): + var b = newNodeP(nkElifBranch, p) + getTok(p) + addSon(b, expression(p)) + eatNewLine(p, nil) + addSon(b, parseStmtList(p)) + addSon(result, b) + if isDir(p, "else"): + var s = newNodeP(nkElse, p) + while p.tok.xkind notin {pxEof, pxNewLine, pxLineComment}: getTok(p) + eatNewLine(p, nil) + addSon(s, parseStmtList(p)) + addSon(result, s) + if isDir(p, "endif"): + while p.tok.xkind notin {pxEof, pxNewLine, pxLineComment}: getTok(p) + eatNewLine(p, nil) + else: + parMessage(p, errXExpected, "#endif") + +proc specialIf(p: TParser): bool = + ExpectIdent(p) + result = p.tok.s == c2nimSymbol + +proc chooseBranch(whenStmt: PNode, branch: int): PNode = + var L = sonsLen(whenStmt) + if branch < L: + if L == 2 and whenStmt[1].kind == nkElse or branch == 0: + result = lastSon(whenStmt[branch]) + else: + var b = whenStmt[branch] + assert(b.kind == nkElifBranch) + result = newNodeI(nkWhenStmt, whenStmt.info) + for i in branch .. L-1: + addSon(result, whenStmt[i]) + +proc skipIfdefCPlusPlus(p: var TParser): PNode = + while p.tok.xkind != pxEof: + if isDir(p, "endif"): + while p.tok.xkind notin {pxEof, pxNewLine, pxLineComment}: getTok(p) + eatNewLine(p, nil) + return + getTok(p) + parMessage(p, errXExpected, "#endif") + +proc parseIfdefDir(p: var TParser): PNode = + result = newNodeP(nkWhenStmt, p) + addSon(result, newNodeP(nkElifBranch, p)) + getTok(p) + var special = specialIf(p) + if p.tok.s == "__cplusplus": + return skipIfdefCPlusPlus(p) + addSon(result.sons[0], definedExprAux(p)) + eatNewLine(p, nil) + parseIfDirAux(p, result) + if special: + result = chooseBranch(result, 0) + +proc parseIfndefDir(p: var TParser): PNode = + result = newNodeP(nkWhenStmt, p) + addSon(result, newNodeP(nkElifBranch, p)) + getTok(p) + var special = specialIf(p) + var e = newNodeP(nkCall, p) + addSon(e, newIdentNodeP("not", p)) + addSon(e, definedExprAux(p)) + eatNewLine(p, nil) + addSon(result.sons[0], e) + parseIfDirAux(p, result) + if special: + result = chooseBranch(result, 1) + +proc parseIfDir(p: var TParser): PNode = + result = newNodeP(nkWhenStmt, p) + addSon(result, newNodeP(nkElifBranch, p)) + getTok(p) + addSon(result.sons[0], expression(p)) + eatNewLine(p, nil) + parseIfDirAux(p, result) + +proc parseMangleDir(p: var TParser) = + var col = getColumn(p.lex) + 2 + getTok(p) + if p.tok.xkind != pxStrLit: ExpectIdent(p) + try: + var pattern = parsePeg( + input = p.tok.s, + filename = p.lex.filename, + line = p.lex.linenumber, + col = col) + getTok(p) + if p.tok.xkind != pxStrLit: ExpectIdent(p) + p.options.mangleRules.add((pattern, p.tok.s)) + getTok(p) + except EInvalidPeg: + parMessage(p, errUser, getCurrentExceptionMsg()) + eatNewLine(p, nil) + +proc parseDir(p: var TParser): PNode = + assert(p.tok.xkind in {pxDirective, pxDirectiveParLe}) + case p.tok.s + of "define": result = parseDefine(p) + of "include": result = parseInclude(p) + of "ifdef": result = parseIfdefDir(p) + of "ifndef": result = parseIfndefDir(p) + of "if": result = parseIfDir(p) + of "cdecl", "stdcall", "ref": + discard setOption(p.options, p.tok.s) + getTok(p) + eatNewLine(p, nil) + of "dynlib", "header", "prefix", "suffix", "skip": + var key = p.tok.s + getTok(p) + if p.tok.xkind != pxStrLit: ExpectIdent(p) + discard setOption(p.options, key, p.tok.s) + getTok(p) + eatNewLine(p, nil) + of "mangle": + parseMangleDir(p) + else: + # ignore unimportant/unknown directive ("undef", "pragma", "error") + while true: + getTok(p) + if p.tok.xkind in {pxEof, pxNewLine, pxLineComment}: break + eatNewLine(p, nil) + diff --git a/rod/c2nim/manual.txt b/rod/c2nim/manual.txt new file mode 100644 index 000000000..d0c45272b --- /dev/null +++ b/rod/c2nim/manual.txt @@ -0,0 +1,235 @@ +================================= + c2nim User's manual +================================= + +:Author: Andreas Rumpf +:Version: 0.8.10 + +Introduction +============ + +c2nim is a tool to translate Ansi C code to Nimrod. The output is +human-readable Nimrod code that is meant to be tweaked by hand after the +translation process. c2nim is no real compiler! + +c2nim is preliminary meant to translate C header files. Because of this, the +preprocessor is part of the parser. For example: + +.. code-block:: C + #define abc 123 + #define xyz 789 + +Is translated into: + +.. code-block:: Nimrod + const + abc* = 123 + xyz* = 789 + + +c2nim is meant to translate fragments of C code and thus does not follow +include files. c2nim cannot parse all of Ansi C and many constructs cannot +be represented in Nimrod: for example `duff's device`:idx: cannot be translated +to Nimrod. + + +Preprocessor support +==================== + +Even though the translation process is not perfect, it is often the case that +the translated Nimrod code does not need any tweaking by hand. In other cases +it may be preferable to modify the input file instead of the generated Nimrod +code so that c2nim can parse it properly. c2nim's preprocessor defines the +symbol ``C2NIM`` that can be used to mark code sections: + +.. code-block:: C + #ifndef C2NIM + // C2NIM should ignore this prototype: + int fprintf(FILE* f, const char* frmt, ...); + #endif + +The ``C2NIM`` symbol is only recognized in ``#ifdef`` and ``#ifndef`` +constructs! ``#if defined(C2NIM)`` does **not** work. + +c2nim *processes* ``#ifdef C2NIM`` and ``#ifndef C2NIM`` directives, but other +``#if[def]`` directives are *translated* into Nimrod's ``when`` construct: + +.. code-block:: C + #ifdef DEBUG + # define OUT(x) printf("%s\n", x) + #else + # define OUT(x) + #endif + +Is translated into: + +.. code-block:: Nimrod + when defined(debug): + template OUT(x: expr): expr = + printf("%s\x0A", x) + else: + template OUT(x: expr): stmt = + nil + +As can been seen from the example, C's macros with parameters are mapped +to Nimrod's templates. This mapping is the best one can do, but it is of course +not accurate: Nimrod's templates operate on syntax trees whereas C's +macros work on the token level. c2nim cannot translate any macro that contains +the ``##`` token concatenation operator. + +c2nim's preprocessor supports special directives that affect how the output +is generated. They should be put into a ``#ifdef C2NIM`` section so that +ordinary C compilers ignore them. + + +``#stdcall`` and ``#cdecl`` directives +-------------------------------------- +**Note**: There are also ``--stdcall`` and ``--cdecl`` command line options +that can be used for the same purpose. + +These directives tell c2nim that it should annotate every proc (or proc type) +with the ``stdcall`` / ``cdecl`` calling convention. + + +``#dynlib`` directive +--------------------- +**Note**: There is also a ``--dynlib`` command line option that can be used for +the same purpose. + +This directive tells c2nim that it should annotate every proc that resulted +from a C function prototype with the ``dynlib`` pragma: + +.. code-block:: C + + #ifdef C2NIM + # dynlib iupdll + # cdecl + # if defined(windows) + # define iupdll "iup.dll" + # elif defined(macosx) + # define iupdll "libiup.dynlib" + # else + # define iupdll "libiup.so" + # endif + #endif + + int IupConvertXYToPos(PIhandle ih, int x, int y); + +Is translated to: + +.. code-block:: Nimrod + when defined(windows): + const iupdll* = "iup.dll" + elif defined(macosx): + const iupdll* = "libiup.dynlib" + else: + const iupdll* = "libiup.so" + + proc IupConvertXYToPos*(ih: PIhandle, x: cint, y: cint): cint {. + importc: "IupConvertXYToPos", cdecl, dynlib: iupdll.} + +Note how the example contains extra C code to declare the ``iupdll`` symbol +in the generated Nimrod code. + + +``#header`` directive +--------------------- +**Note**: There is also a ``--header`` command line option that can be used for +the same purpose. + +The ``#header`` directive tells c2nim that it should annotate every proc that +resulted from a C function prototype and every exported variable and type with +the ``header`` pragma: + +.. code-block:: C + + #ifdef C2NIM + # header "iup.h" + #endif + + int IupConvertXYToPos(PIhandle ih, int x, int y); + +Is translated to: + +.. code-block:: Nimrod + proc IupConvertXYToPos*(ih: PIhandle, x: cint, y: cint): cint {. + importc: "IupConvertXYToPos", header: "iup.h".} + +The ``#header`` and the ``#dynlib`` directives are mutually exclusive. +A binding that uses ``dynlib`` is much more preferable over one that uses +``header``! The Nimrod compiler might drop support for the ``header`` pragma +in the future as it cannot work for backends that do not generate C code. + + +``#prefix`` and ``#suffix`` directives +-------------------------------------- + +**Note**: There are also ``--prefix`` and ``--suffix`` command line options +that can be used for the same purpose. + +c2nim does not do any name mangling by default. However the + ``#prefix`` and ``#suffix`` directives can be used to strip prefixes and +suffixes from the identifiers in the C code: + +.. code-block:: C + + #ifdef C2NIM + # prefix Iup + # dynlib dllname + # cdecl + #endif + + int IupConvertXYToPos(PIhandle ih, int x, int y); + +Is translated to: + +.. code-block:: Nimrod + + proc ConvertXYToPos*(ih: PIhandle, x: cint, y: cint): cint {. + importc: "IupConvertXYToPos", cdecl, dynlib: dllname.} + + +``#mangle`` directive +--------------------- + +Even more sophisticated name mangling can be achieved by the ``#mangle`` +directive: It takes a PEG pattern and format string that specify how the +identifier should be converted: + +.. code-block:: C + #mangle "'GTK_'{.*}" "TGtk$1" + + +``#skip`` directive +------------------- +**Note**: There is also ``--skip`` command line option that can be used for the +same purpose. + +Often C code contains special macros that affect the declaration of a function +prototype but confuse c2nim's parser: + +.. code-block:: C + // does not parse! + EXPORT int f(void); + EXPORT int g(void); + +Instead of to remove ``EXPORT`` from the input source file, one can tell c2nim +to skip special identifiers: + +.. code-block:: C + #skip EXPORT + // does parse now! + EXPORT int f(void); + EXPORT int g(void); + + +Limitations +=========== + +* C's ``,`` operator (comma operator) is not supported. +* C's ``union`` has no equivalent in Nimrod. +* Standalone ``struct x {}`` declarations are not implemented. Put them into + a ``typedef``. +* The condition in a ``do while(condition)`` statement must be ``0``. +* Lots of other small issues... + |