#
#
# Nimrod's Runtime Library
# (c) Copyright 2009 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## The ``parsesql`` module implements a high performance SQL file
## parser. It parses PostgreSQL syntax and the SQL ANSI standard.
import
hashes, strutils, lexbase, streams
# ------------------- scanner -------------------------------------------------
type
TTokKind = enum ## enumeration of all SQL tokens
tkInvalid, ## invalid token
tkEof, ## end of file reached
tkIdentifier, ## abc
tkQuotedIdentifier, ## "abc"
tkStringConstant, ## 'abc'
tkEscapeConstant, ## e'abc'
tkDollarQuotedConstant, ## $tag$abc$tag$
tkBitStringConstant, ## B'00011'
tkHexStringConstant, ## x'00011'
tkInteger,
tkNumeric,
tkOperator, ## + - * / < > = ~ ! @ # % ^ & | ` ?
tkSemicolon, ## ';'
tkColon, ## ':'
tkComma, ## ','
tkParLe, ## '('
tkParRi, ## ')'
tkBracketLe, ## '['
tkBracketRi, ## ']'
tkDot ## '.'
TToken {.final.} = object # a token
kind: TTokKind # the type of the token
literal: string # the parsed (string) literal
TSqlLexer* = object of TBaseLexer ## the parser object.
filename: string
const
tokKindToStr: array[TTokKind, string] = [
"invalid", "[EOF]", "identifier", "quoted identifier", "string constant",
"escape string constant", "dollar quoted constant", "bit string constant",
"hex string constant", "integer constant", "numeric constant", "operator",
";", ":", ",", "(", ")", "[", "]", "."
]
proc open(L: var TSqlLexer, input: PStream, filename: string) =
lexbase.open(L, input)
L.filename = filename
proc close(L: var TSqlLexer) =
lexbase.close(L)
proc getColumn(L: TSqlLexer): int =
## get the current column the parser has arrived at.
result = getColNumber(L, L.bufPos)
proc getLine(L: TSqlLexer): int =
result = L.linenumber
proc handleHexChar(c: var TSqlLexer, xi: var int) =
case c.buf[c.bufpos]
of '0'..'9':
xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0'))
inc(c.bufpos)
of 'a'..'f':
xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10)
inc(c.bufpos)
of 'A'..'F':
xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10)
inc(c.bufpos)
else:
nil
proc handleOctChar(c: var TSqlLexer, xi: var int) =
if c.buf[c.bufpos] in {'0'..'7'}:
xi = (xi shl 3) or (ord(c.buf[c.bufpos]) - ord('0'))
inc(c.bufpos)
proc getEscapedChar(c: var TSqlLexer, tok: var TToken) =
inc(c.bufpos)
case c.buf[c.bufpos]
of 'n', 'N':
add(tok.literal, '\L')
Inc(c.bufpos)
of 'r', 'R', 'c', 'C':
add(tok.literal, '\c')
Inc(c.bufpos)
of 'l', 'L':
add(tok.literal, '\L')
Inc(c.bufpos)
of 'f', 'F':
add(tok.literal, '\f')
inc(c.bufpos)
of 'e', 'E':
add(tok.literal, '\e')
Inc(c.bufpos)
of 'a', 'A':
add(tok.literal, '\a')
Inc(c.bufpos)
of 'b', 'B':
add(tok.literal, '\b')
Inc(c.bufpos)
of 'v', 'V':
add(tok.literal, '\v')
Inc(c.bufpos)
of 't', 'T':
add(tok.literal, '\t')
Inc(c.bufpos)
of '\'', '\"':
add(tok.literal, c.buf[c.bufpos])
Inc(c.bufpos)
of '\\':
add(tok.literal, '\\')
Inc(c.bufpos)
of 'x', 'X':
inc(c.bufpos)
var xi = 0
handleHexChar(c, xi)
handleHexChar(c, xi)
add(tok.literal, Chr(xi))
of '0'..'7':
var xi = 0
handleOctChar(c, xi)
handleOctChar(c, xi)
handleOctChar(c, xi)
if (xi <= 255): add(tok.literal, Chr(xi))
else: tok.kind = tkInvalid
else: tok.kind = tkInvalid
proc HandleCRLF(c: var TSqlLexer, pos: int): int =
case c.buf[pos]
of '\c': result = lexbase.HandleCR(c, pos)
of '\L': result = lexbase.HandleLF(c, pos)
else: result = pos
proc skip(c: var TSqlLexer) =
var pos = c.bufpos
var buf = c.buf
var nested = 0
while true:
case buf[pos]
of ' ', '\t':
Inc(pos)
of '-':
if buf[pos+1] == '-':
while not (buf[pos] in {'\c', '\L', lexbase.EndOfFile}): inc(pos)
else:
break
of '/':
if buf[pos+1] == '*':
inc(pos,2)
while true:
case buf[pos]
of '\0': break
of '\c', '\L':
pos = HandleCRLF(c, pos)
buf = c.buf
of '*':
if buf[pos+1] == '/':
inc(pos, 2)
if nested <= 0: break
dec(nested)
else:
inc(pos)
of '/':
if buf[pos+1] == '*':
inc(pos, 2)
inc(nested)
else:
inc(pos)
else: inc(pos)
else: break
of '\c', '\L':
pos = HandleCRLF(c, pos)
buf = c.buf
else:
break # EndOfFile also leaves the loop
c.bufpos = pos
proc getString(c: var TSqlLexer, tok: var TToken, kind: TTokKind) =
var pos = c.bufPos + 1
var buf = c.buf
tok.kind = kind
block parseLoop:
while true:
while true:
var ch = buf[pos]
if ch == '\'':
if buf[pos+1] == '\'':
inc(pos, 2)
add(tok.literal, '\'')
else:
inc(pos)
break
elif ch in {'\c', '\L', lexbase.EndOfFile}:
tok.kind = tkInvalid
break parseLoop
elif (ch == '\\') and kind == tkEscapeConstant:
c.bufPos = pos
getEscapedChar(c, tok)
pos = c.bufPos
else:
add(tok.literal, ch)
Inc(pos)
c.bufpos = pos
var line = c.linenumber
skip(c)
if c.linenumber > line:
# a new line whitespace has been parsed, so we check if the string
# continues after the whitespace:
buf = c.buf # may have been reallocated
pos = c.bufpos
if buf[pos] == '\'': inc(pos)
else: break parseLoop
else: break parseLoop
c.bufpos = pos
proc getDollarString(c: var TSqlLexer, tok: var TToken) =
var pos = c.bufPos + 1
var buf = c.buf
tok.kind = tkDollarQuotedConstant
var tag = "$"
while buf[pos] in IdentChars:
add(tag, buf[pos])
inc(pos)
if buf[pos] == '$': inc(pos)
else:
tok.kind = tkInvalid
return
while true:
case buf[pos]
of '\c', '\L':
pos = HandleCRLF(c, pos)
buf = c.buf
add(tok.literal, "\L")
of '\0':
tok.kind = tkInvalid
break
of '$':
inc(pos)
var tag2 = "$"
while buf[pos] in IdentChars:
add(tag2, buf[pos])
inc(pos)
if buf[pos] == '$': inc(pos)
if tag2 == tag: break
add(tok.literal, tag2)
add(tok.literal, '$')
else:
add(tok.literal, buf[pos])
inc(pos)
c.bufpos = pos
proc getSymbol(c: var TSqlLexer, tok: var TToken) =
var pos = c.bufpos
var buf = c.buf
while true:
add(tok.literal, buf[pos])
Inc(pos)
if not (buf[pos] in {'a'..'z','A'..'Z','0'..'9','_','$', '\128'..'\255'}):
break
c.bufpos = pos
tok.kind = tkIdentifier
proc getQuotedIdentifier(c: var TSqlLexer, tok: var TToken) =
var pos = c.bufPos + 1
var buf = c.buf
tok.kind = tkQuotedIdentifier
while true:
var ch = buf[pos]
if ch == '\"':
if buf[pos+1] == '\"':
inc(pos, 2)
add(tok.literal, '\"')
else:
inc(pos)
break
elif ch in {'\c', '\L', lexbase.EndOfFile}:
tok.kind = tkInvalid
break
else:
add(tok.literal, ch)
Inc(pos)
c.bufpos = pos
proc getBitHexString(c: var TSqlLexer, tok: var TToken, validChars: TCharSet) =
var pos = c.bufPos + 1
var buf = c.buf
block parseLoop:
while true:
while true:
var ch = buf[pos]
if ch in validChars:
add(tok.literal, ch)
Inc(pos)
elif ch == '\'':
inc(pos)
break
else:
tok.kind = tkInvalid
break parseLoop
c.bufpos = pos
var line = c.linenumber
skip(c)
if c.linenumber > line:
# a new line whitespace has been parsed, so we check if the string
# continues after the whitespace:
buf = c.buf # may have been reallocated
pos = c.bufpos
if buf[pos] == '\'': inc(pos)
else: break parseLoop
else: break parseLoop
c.bufpos = pos
proc getNumeric(c: var TSqlLexer, tok: var TToken) =
tok.kind = tkInteger
var pos = c.bufPos
var buf = c.buf
while buf[pos] in Digits:
add(tok.literal, buf[pos])
inc(pos)
if buf[pos] == '.':
tok.kind = tkNumeric
add(tok.literal, buf[pos])
inc(pos)
while buf[pos] in Digits:
add(tok.literal, buf[pos])
inc(pos)
if buf[pos] in {'E', 'e'}:
tok.kind = tkNumeric
add(tok.literal, buf[pos])
inc(pos)
if buf[pos] == '+':
inc(pos)
elif buf[pos] == '-':
add(tok.literal, buf[pos])
inc(pos)
if buf[pos] in Digits:
while buf[pos] in Digits:
add(tok.literal, buf[pos])
inc(pos)
else:
tok.kind = tkInvalid
c.bufpos = pos
proc getOperator(c: var TSqlLexer, tok: var TToken) =
const operators = {'+', '-', '*', '/', '<', '>', '=', '~', '!', '@', '#', '%',
'^', '&', '|', '`', '?'}
tok.kind = tkOperator
var pos = c.bufPos
var buf = c.buf
var trailingPlusMinus = false
while true:
case buf[pos]
of '-':
if buf[pos] == '-': break
if not trailingPlusMinus and buf[pos+1] notin operators and
tok.literal.len > 0: break
of '/':
if buf[pos] == '*': break
of '~', '!', '@', '#', '%', '^', '&', '|', '`', '?':
trailingPlusMinus = true
of '+':
if not trailingPlusMinus and buf[pos+1] notin operators and
tok.literal.len > 0: break
of '*', '<', '>', '=': nil
else: break
add(tok.literal, buf[pos])
inc(pos)
c.bufpos = pos
proc getTok(c: var TSqlLexer, tok: var TToken) =
tok.kind = tkInvalid
setlen(tok.literal, 0)
skip(c)
case c.buf[c.bufpos]
of ';':
tok.kind = tkSemiColon
inc(c.bufPos)
add(tok.literal, ';')
of ',':
tok.kind = tkComma
inc(c.bufpos)
add(tok.literal, ',')
of ':':
tok.kind = tkColon
inc(c.bufpos)
add(tok.literal, ':')
of 'e', 'E':
if c.buf[c.bufPos + 1] == '\'':
Inc(c.bufPos)
getString(c, tok, tkEscapeConstant)
else:
getSymbol(c, tok)
of 'b', 'B':
if c.buf[c.bufPos + 1] == '\'':
tok.kind = tkBitStringConstant
getBitHexString(c, tok, {'0'..'1'})
else:
getSymbol(c, tok)
of 'x', 'X':
if c.buf[c.bufPos + 1] == '\'':
tok.kind = tkHexStringConstant
getBitHexString(c, tok, {'a'..'f','A'..'F','0'..'9'})
else:
getSymbol(c, tok)
of '$': getDollarString(c, tok)
of '[':
tok.kind = tkBracketLe
inc(c.bufpos)
add(tok.literal, '[')
of ']':
tok.kind = tkBracketRi
Inc(c.bufpos)
add(tok.literal, ']')
of '(':
tok.kind = tkParLe
Inc(c.bufpos)
add(tok.literal, '(')
of ')':
tok.kind = tkParRi
Inc(c.bufpos)
add(tok.literal, ')')
of '.':
if c.buf[c.bufPos + 1] in Digits:
getNumeric(c, tok)
else:
tok.kind = tkDot
inc(c.bufpos)
add(tok.literal, '.')
of '0'..'9': getNumeric(c, tok)
of '\'': getString(c, tok, tkStringConstant)
of '"': getQuotedIdentifier(c, tok)
of lexbase.EndOfFile:
tok.kind = tkEof
tok.literal = "[EOF]"
of 'a', 'c', 'd', 'f'..'w', 'y', 'z', 'A', 'C', 'D', 'F'..'W', 'Y', 'Z', '_',
'\128'..'\255':
getSymbol(c, tok)
of '+', '-', '*', '/', '<', '>', '=', '~', '!', '@', '#', '%',
'^', '&', '|', '`', '?':
getOperator(c, tok)
else:
add(tok.literal, c.buf[c.bufpos])
inc(c.bufpos)
proc errorStr(L: TSqlLexer, msg: string): string =
result = "$1($2, $3) Error: $4" % [L.filename, $getLine(L), $getColumn(L), msg]
# ----------------------------- parser ----------------------------------------
# Operator/Element Associativity Description
# . left table/column name separator
# :: left PostgreSQL-style typecast
# [ ] left array element selection
# - right unary minus
# ^ left exponentiation
# * / % left multiplication, division, modulo
# + - left addition, subtraction
# IS IS TRUE, IS FALSE, IS UNKNOWN, IS NULL
# ISNULL test for null
# NOTNULL test for not null
# (any other) left all other native and user-defined oprs
# IN set membership
# BETWEEN range containment
# OVERLAPS time interval overlap
# LIKE ILIKE SIMILAR string pattern matching
# < > less than, greater than
# = right equality, assignment
# NOT right logical negation
# AND left logical conjunction
# OR left logical disjunction
type
TSqlNodeKind* = enum ## kind of SQL abstract syntax tree
nkNone,
nkIdent,
nkStringLit,
nkBitStringLit,
nkHexStringLit,
nkIntegerLit,
nkNumericLit,
nkPrimaryKey,
nkForeignKey,
nkNotNull,
nkStmtList,
nkDot,
nkDotDot,
nkPrefix,
nkInfix,
nkCall,
nkColumnReference,
nkReferences,
nkDefault,
nkCheck,
nkConstraint,
nkUnique,
nkIdentity,
nkColumnDef, ## name, datatype, constraints
nkInsert,
nkUpdate,
nkDelete,
nkSelect,
nkSelectDistinct,
nkSelectColumns,
nkAsgn,
nkFrom,
nkGroup,
nkHaving,
nkOrder,
nkDesc,
nkUnion,
nkIntersect,
nkExcept,
nkColumnList,
nkValueList,
nkWhere,
nkCreateTable,
nkCreateTableIfNotExists,
nkCreateType,
nkCreateTypeIfNotExists,
nkCreateIndex,
nkCreateIndexIfNotExists,
nkEnumDef
type
EInvalidSql* = object of EInvalidValue ## Invalid SQL encountered
PSqlNode* = ref TSqlNode ## an SQL abstract syntax tree node
TSqlNode* = object ## an SQL abstract syntax tree node
case kind*: TSqlNodeKind ## kind of syntax tree
of nkIdent, nkStringLit, nkBitStringLit, nkHexStringLit,
nkIntegerLit, nkNumericLit:
strVal*: string ## AST leaf: the identifier, numeric literal
## string literal, etc.
else:
sons*: seq[PSqlNode] ## the node's children
TSqlParser* = object of TSqlLexer ## SQL parser object
tok: TToken
proc newNode(k: TSqlNodeKind): PSqlNode =
new(result)
result.kind = k
proc newNode(k: TSqlNodeKind, s: string): PSqlNode =
new(result)
result.kind = k
result.strVal = s
proc len*(n: PSqlNode): int =
if isNil(n.sons): result = 0
else: result = n.sons.len
proc add*(father, n: PSqlNode) =
if isNil(father.sons): father.sons = @[]
add(father.sons, n)
proc getTok(p: var TSqlParser) =
getTok(p, p.tok)
proc sqlError(p: TSqlParser, msg: string) =
var e: ref EInvalidSql
new(e)
e.msg = errorStr(p, msg)
raise e
proc isKeyw(p: TSqlParser, keyw: string): bool =
result = p.tok.kind == tkIdentifier and
cmpIgnoreCase(p.tok.literal, keyw) == 0
proc isOpr(p: TSqlParser, opr: string): bool =
result = p.tok.kind == tkOperator and
cmpIgnoreCase(p.tok.literal, opr) == 0
proc optKeyw(p: var TSqlParser, keyw: string) =
if p.tok.kind == tkIdentifier and cmpIgnoreCase(p.tok.literal, keyw) == 0:
getTok(p)
proc expectIdent(p: TSqlParser) =
if p.tok.kind != tkIdentifier and p.tok.kind != tkQuotedIdentifier:
sqlError(p, "identifier expected")
proc expect(p: TSqlParser, kind: TTokKind) =
if p.tok.kind != kind:
sqlError(p, tokKindToStr[kind] & " expected")
proc eat(p: var TSqlParser, kind: TTokKind) =
if p.tok.kind == kind:
getTok(p)
else:
sqlError(p, tokKindToStr[kind] & " expected")
proc eat(p: var TSqlParser, keyw: string) =
if isKeyw(p, keyw):
getTok(p)
else:
sqlError(p, keyw.toUpper() & " expected")
proc parseDataType(p: var TSqlParser): PSqlNode =
if isKeyw(p, "enum"):
result = newNode(nkEnumDef)
getTok(p)
if p.tok.kind == tkParLe:
getTok(p)
result.add(newNode(nkStringLit, p.tok.literal))
getTok(p)
while p.tok.kind == tkComma:
getTok(p)
result.add(newNode(nkStringLit, p.tok.literal))
getTok(p)
eat(p, tkParRi)
else:
expectIdent(p)
result = newNode(nkIdent, p.tok.literal)
getTok(p)
# ignore (12, 13) part:
if p.tok.kind == tkParLe:
getTok(p)
expect(p, tkInteger)
getTok(p)
while p.tok.kind == tkComma:
getTok(p)
expect(p, tkInteger)
getTok(p)
eat(p, tkParRi)
proc getPrecedence(p: TSqlParser): int =
if isOpr(p, "*") or isOpr(p, "/") or isOpr(p, "%"):
result = 6
elif isOpr(p, "+") or isOpr(p, "-"):
result = 5
elif isOpr(p, "=") or isOpr(p, "<") or isOpr(p, ">") or isOpr(p, ">=") or
isOpr(p, "<=") or isOpr(p, "<>") or isOpr(p, "!=") or isKeyw(p, "is") or
isKeyw(p, "like"):
result = 3
elif isKeyw(p, "and"):
result = 2
elif isKeyw(p, "or"):
result = 1
elif p.tok.kind == tkOperator:
# user-defined operator:
result = 0
else:
result = - 1
proc parseExpr(p: var TSqlParser): PSqlNode
proc identOrLiteral(p: var TSqlParser): PSqlNode =
case p.tok.kind
of tkIdentifier, tkQuotedIdentifier:
result = newNode(nkIdent, p.tok.literal)
getTok(p)
of tkStringConstant, tkEscapeConstant, tkDollarQuotedConstant:
result = newNode(nkStringLit, p.tok.literal)
getTok(p)
of tkBitStringConstant:
result = newNode(nkBitStringLit, p.tok.literal)
getTok(p)
of tkHexStringConstant:
result = newNode(nkHexStringLit, p.tok.literal)
getTok(p)
of tkInteger:
result = newNode(nkIntegerLit, p.tok.literal)
getTok(p)
of tkNumeric:
result = newNode(nkNumericLit, p.tok.literal)
getTok(p)
of tkParLe:
getTok(p)
result = parseExpr(p)
eat(p, tkParRi)
else:
sqlError(p, "expression expected")
getTok(p) # we must consume a token here to prevend endless loops!
proc primary(p: var TSqlParser): PSqlNode =
if p.tok.kind == tkOperator or isKeyw(p, "not"):
result = newNode(nkPrefix)
result.add(newNode(nkIdent, p.tok.literal))
getTok(p)
result.add(primary(p))
return
result = identOrLiteral(p)
while true:
case p.tok.kind
of tkParLe:
var a = result
result = newNode(nkCall)
result.add(a)
getTok(p)
while true:
result.add(parseExpr(p))
if p.tok.kind == tkComma: getTok(p)
else: break
eat(p, tkParRi)
of tkDot:
getTok(p)
var a = result
if p.tok.kind == tkDot:
getTok(p)
result = newNode(nkDotDot)
else:
result = newNode(nkDot)
result.add(a)
if isOpr(p, "*"):
result.add(newNode(nkIdent, "*"))
elif p.tok.kind in {tkIdentifier, tkQuotedIdentifier}:
result.add(newNode(nkIdent, p.tok.literal))
else:
sqlError(p, "identifier expected")
getTok(p)
else: break
proc lowestExprAux(p: var TSqlParser, v: var PSqlNode, limit: int): int =
var
v2, node, opNode: PSqlNode
v = primary(p) # expand while operators have priorities higher than 'limit'
var opPred = getPrecedence(p)
result = opPred
while opPred > limit:
node = newNode(nkInfix)
opNode = newNode(nkIdent, p.tok.literal)
getTok(p)
result = lowestExprAux(p, v2, opPred)
node.add(opNode)
node.add(v)
node.add(v2)
v = node
opPred = getPrecedence(p)
proc parseExpr(p: var TSqlParser): PSqlNode =
discard lowestExprAux(p, result, - 1)
proc parseTableName(p: var TSqlParser): PSqlNode =
expectIdent(p)
result = primary(p)
proc parseColumnReference(p: var TSqlParser): PSqlNode =
result = parseTableName(p)
if p.tok.kind == tkParLe:
getTok(p)
var a = result
result = newNode(nkColumnReference)
result.add(a)
result.add(parseTableName(p))
while p.tok.kind == tkComma:
getTok(p)
result.add(parseTableName(p))
eat(p, tkParRi)
proc parseCheck(p: var TSqlParser): PSqlNode =
getTok(p)
result = newNode(nkCheck)
result.add(parseExpr(p))
proc parseConstraint(p: var TSqlParser): PSqlNode =
getTok(p)
result = newNode(nkConstraint)
expectIdent(p)
result.add(newNode(nkIdent, p.tok.literal))
getTok(p)
eat(p, "check")
result.add(parseExpr(p))
proc parseColumnConstraints(p: var TSqlParser, result: PSqlNode) =
while true:
if isKeyw(p, "default"):
getTok(p)
var n = newNode(nkDefault)
n.add(parseExpr(p))
result.add(n)
elif isKeyw(p, "references"):
getTok(p)
var n = newNode(nkReferences)
n.add(parseColumnReference(p))
result.add(n)
elif isKeyw(p, "not"):
getTok(p)
eat(p, "null")
result.add(newNode(nkNotNull))
elif isKeyw(p, "identity"):
getTok(p)
result.add(newNode(nkIdentity))
elif isKeyw(p, "primary"):
getTok(p)
eat(p, "key")
result.add(newNode(nkPrimaryKey))
elif isKeyw(p, "check"):
result.add(parseCheck(p))
elif isKeyw(p, "constraint"):
result.add(parseConstraint(p))
elif isKeyw(p, "unique"):
result.add(newNode(nkUnique))
else:
break
proc parseColumnDef(p: var TSqlParser): PSqlNode =
expectIdent(p)
result = newNode(nkColumnDef)
result.add(newNode(nkIdent, p.tok.literal))
getTok(p)
result.add(parseDataType(p))
parseColumnConstraints(p, result)
proc parseIfNotExists(p: var TSqlParser, k: TSqlNodeKind): PSqlNode =
getTok(p)
if isKeyw(p, "if"):
getTok(p)
eat(p, "not")
eat(p, "exists")
result = newNode(succ(k))
else:
result = newNode(k)
proc parseParIdentList(p: var TSqlParser, father: PSqlNode) =
eat(p, tkParLe)
while true:
expectIdent(p)
father.add(newNode(nkIdent, p.tok.literal))
getTok(p)
if p.tok.kind != tkComma: break
getTok(p)
eat(p, tkParRi)
proc parseTableConstraint(p: var TSqlParser): PSqlNode =
if isKeyw(p, "primary"):
getTok(p)
eat(p, "key")
result = newNode(nkPrimaryKey)
parseParIdentList(p, result)
elif isKeyw(p, "foreign"):
getTok(p)
eat(p, "key")
result = newNode(nkForeignKey)
parseParIdentList(p, result)
eat(p, "references")
var m = newNode(nkReferences)
m.add(parseColumnReference(p))
result.add(m)
elif isKeyw(p, "unique"):
getTok(p)
eat(p, "key")
result = newNode(nkUnique)
parseParIdentList(p, result)
elif isKeyw(p, "check"):
result = parseCheck(p)
elif isKeyw(p, "constraint"):
result = parseConstraint(p)
else:
sqlError(p, "column definition expected")
proc parseTableDef(p: var TSqlParser): PSqlNode =
result = parseIfNotExists(p, nkCreateTable)
expectIdent(p)
result.add(newNode(nkIdent, p.tok.literal))
getTok(p)
if p.tok.kind == tkParLe:
while true:
getTok(p)
if p.tok.kind == tkIdentifier or p.tok.kind == tkQuotedIdentifier:
result.add(parseColumnDef(p))
else:
result.add(parseTableConstraint(p))
if p.tok.kind != tkComma: break
eat(p, tkParRi)
proc parseTypeDef(p: var TSqlParser): PSqlNode =
result = parseIfNotExists(p, nkCreateType)
expectIdent(p)
result.add(newNode(nkIdent, p.tok.literal))
getTok(p)
eat(p, "as")
result.add(parseDataType(p))
proc parseWhere(p: var TSqlParser): PSqlNode =
getTok(p)
result = newNode(nkWhere)
result.add(parseExpr(p))
proc parseIndexDef(p: var TSqlParser): PSqlNode =
result = parseIfNotExists(p, nkCreateIndex)
if isKeyw(p, "primary"):
getTok(p)
eat(p, "key")
result.add(newNode(nkPrimaryKey))
else:
expectIdent(p)
result.add(newNode(nkIdent, p.tok.literal))
getTok(p)
eat(p, "on")
expectIdent(p)
result.add(newNode(nkIdent, p.tok.literal))
getTok(p)
eat(p, tkParLe)
expectIdent(p)
result.add(newNode(nkIdent, p.tok.literal))
getTok(p)
while p.tok.kind == tkComma:
getTok(p)
expectIdent(p)
result.add(newNode(nkIdent, p.tok.literal))
getTok(p)
eat(p, tkParRi)
proc parseInsert(p: var TSqlParser): PSqlNode =
getTok(p)
eat(p, "into")
expectIdent(p)
result = newNode(nkInsert)
result.add(newNode(nkIdent, p.tok.literal))
getTok(p)
if p.tok.kind == tkParLe:
var n = newNode(nkColumnList)
parseParIdentList(p, n)
else:
result.add(nil)
if isKeyw(p, "default"):
getTok(p)
eat(p, "values")
result.add(newNode(nkDefault))
else:
eat(p, "values")
eat(p, tkParLe)
var n = newNode(nkValueList)
while true:
n.add(parseExpr(p))
if p.tok.kind != tkComma: break
getTok(p)
result.add(n)
eat(p, tkParRi)
proc parseUpdate(p: var TSqlParser): PSqlNode =
getTok(p)
result = newNode(nkUpdate)
result.add(primary(p))
eat(p, "set")
while true:
var a = newNode(nkAsgn)
expectIdent(p)
a.add(newNode(nkIdent, p.tok.literal))
getTok(p)
if isOpr(p, "="): getTok(p)
else: sqlError(p, "= expected")
a.add(parseExpr(p))
result.add(a)
if p.tok.kind != tkComma: break
getTok(p)
if isKeyw(p, "where"):
result.add(parseWhere(p))
else:
result.add(nil)
proc parseDelete(p: var TSqlParser): PSqlNode =
getTok(p)
result = newNode(nkDelete)
eat(p, "from")
result.add(primary(p))
if isKeyw(p, "where"):
result.add(parseWhere(p))
else:
result.add(nil)
proc parseSelect(p: var TSqlParser): PSqlNode =
getTok(p)
if isKeyw(p, "distinct"):
getTok(p)
result = newNode(nkSelectDistinct)
elif isKeyw(p, "all"):
getTok(p)
result = newNode(nkSelect)
var a = newNode(nkSelectColumns)
while true:
if isOpr(p, "*"):
a.add(newNode(nkIdent, "*"))
getTok(p)
else:
a.add(parseExpr(p))
if p.tok.kind != tkComma: break
getTok(p)
result.add(a)
if isKeyw(p, "from"):
var f = newNode(nkFrom)
while true:
getTok(p)
f.add(parseExpr(p))
if p.tok.kind != tkComma: break
result.add(f)
if isKeyw(p, "where"):
result.add(parseWhere(p))
if isKeyw(p, "group"):
getTok(p)
eat(p, "by")
var g = newNode(nkGroup)
while true:
g.add(parseExpr(p))
if p.tok.kind != tkComma: break
getTok(p)
result.add(g)
if isKeyw(p, "having"):
var h = newNode(nkHaving)
while true:
getTok(p)
h.add(parseExpr(p))
if p.tok.kind != tkComma: break
result.add(h)
if isKeyw(p, "union"):
result.add(newNode(nkUnion))
getTok(p)
elif isKeyw(p, "intersect"):
result.add(newNode(nkIntersect))
getTok(p)
elif isKeyw(p, "except"):
result.add(newNode(nkExcept))
getTok(p)
if isKeyw(p, "order"):
getTok(p)
eat(p, "by")
var n = newNode(nkOrder)
while true:
var e = parseExpr(p)
if isKeyw(p, "asc"): getTok(p) # is default
elif isKeyw(p, "desc"):
getTok(p)
var x = newNode(nkDesc)
x.add(e)
e = x
n.add(e)
if p.tok.kind != tkComma: break
getTok(p)
result.add(n)
proc parseStmt(p: var TSqlParser): PSqlNode =
if isKeyw(p, "create"):
getTok(p)
optKeyw(p, "cached")
optKeyw(p, "memory")
optKeyw(p, "temp")
optKeyw(p, "global")
optKeyw(p, "local")
optKeyw(p, "temporary")
optKeyw(p, "unique")
optKeyw(p, "hash")
if isKeyw(p, "table"):
result = parseTableDef(p)
elif isKeyw(p, "type"):
result = parseTypeDef(p)
elif isKeyw(p, "index"):
result = parseIndexDef(p)
else:
sqlError(p, "TABLE expected")
elif isKeyw(p, "insert"):
result = parseInsert(p)
elif isKeyw(p, "update"):
result = parseUpdate(p)
elif isKeyw(p, "delete"):
result = parseDelete(p)
elif isKeyw(p, "select"):
result = parseSelect(p)
else:
sqlError(p, "CREATE expected")
proc open(p: var TSqlParser, input: PStream, filename: string) =
## opens the parser `p` and assigns the input stream `input` to it.
## `filename` is only used for error messages.
open(TSqlLexer(p), input, filename)
p.tok.kind = tkInvalid
p.tok.literal = ""
getTok(p)
proc parse(p: var TSqlParser): PSqlNode =
## parses the content of `p`'s input stream and returns the SQL AST.
## Syntax errors raise an `EInvalidSql` exception.
result = newNode(nkStmtList)
while p.tok.kind != tkEof:
var s = parseStmt(p)
eat(p, tkSemiColon)
result.add(s)
if result.len == 1:
result = result.sons[0]
proc close(p: var TSqlParser) =
## closes the parser `p`. The associated input stream is closed too.
close(TSqlLexer(p))
proc parseSQL*(input: PStream, filename: string): PSqlNode =
## parses the SQL from `input` into an AST and returns the AST.
## `filename` is only used for error messages.
## Syntax errors raise an `EInvalidSql` exception.
var p: TSqlParser
open(p, input, filename)
try:
result = parse(p)
finally:
close(p)
proc ra(n: PSqlNode, s: var string, indent: int)
proc rs(n: PSqlNode, s: var string, indent: int,
prefix = "(", suffix = ")",
sep = ", ") =
if n.len > 0:
s.add(prefix)
for i in 0 .. n.len-1:
if i > 0: s.add(sep)
ra(n.sons[i], s, indent)
s.add(suffix)
proc ra(n: PSqlNode, s: var string, indent: int) =
if n == nil: return
case n.kind
of nkNone: nil
of nkIdent:
if allCharsInSet(n.strVal, {'\33'..'\127'}):
s.add(n.strVal)
else:
s.add("\"" & replace(n.strVal, "\"", "\"\"") & "\"")
of nkStringLit:
s.add(escape(n.strVal, "e'", "'"))
of nkBitStringLit:
s.add("b'" & n.strVal & "'")
of nkHexStringLit:
s.add("x'" & n.strVal & "'")
of nkIntegerLit, nkNumericLit:
s.add(n.strVal)
of nkPrimaryKey:
s.add(" primary key")
rs(n, s, indent)
of nkForeignKey:
s.add(" foreign key")
rs(n, s, indent)
of nkNotNull:
s.add(" not null")
of nkDot:
ra(n.sons[0], s, indent)
s.add(".")
ra(n.sons[1], s, indent)
of nkDotDot:
ra(n.sons[0], s, indent)
s.add(". .")
ra(n.sons[1], s, indent)
of nkPrefix:
s.add('(')
ra(n.sons[0], s, indent)
s.add(' ')
ra(n.sons[1], s, indent)
s.add(')')
of nkInfix:
s.add('(')
ra(n.sons[1], s, indent)
s.add(' ')
ra(n.sons[0], s, indent)
s.add(' ')
ra(n.sons[2], s, indent)
s.add(')')
of nkCall, nkColumnReference:
ra(n.sons[0], s, indent)
s.add('(')
for i in 1..n.len-1:
if i > 1: s.add(", ")
ra(n.sons[i], s, indent)
s.add(')')
of nkReferences:
s.add(" references ")
ra(n.sons[0], s, indent)
of nkDefault:
s.add(" default ")
ra(n.sons[0], s, indent)
of nkCheck:
s.add(" check ")
ra(n.sons[0], s, indent)
of nkConstraint:
s.add(" constraint ")
ra(n.sons[0], s, indent)
s.add(" check ")
ra(n.sons[1], s, indent)
of nkUnique:
s.add(" unique")
rs(n, s, indent)
of nkIdentity:
s.add(" identity")
of nkColumnDef:
s.add("\n ")
rs(n, s, indent, "", "", " ")
of nkStmtList:
for i in 0..n.len-1:
ra(n.sons[i], s, indent)
s.add("\n")
of nkInsert:
assert n.len == 3
s.add("insert into ")
ra(n.sons[0], s, indent)
ra(n.sons[1], s, indent)
if n.sons[2].kind == nkDefault:
s.add("default values")
else:
s.add("\nvalues ")
ra(n.sons[2], s, indent)
s.add(';')
of nkUpdate:
s.add("update ")
ra(n.sons[0], s, indent)
s.add(" set ")
var L = n.len
for i in 1 .. L-2:
if i > 1: s.add(", ")
var it = n.sons[i]
assert it.kind == nkAsgn
ra(it, s, indent)
ra(n.sons[L-1], s, indent)
s.add(';')
of nkDelete:
s.add("delete from ")
ra(n.sons[0], s, indent)
ra(n.sons[1], s, indent)
s.add(';')
of nkSelect, nkSelectDistinct:
s.add("select ")
if n.kind == nkSelectDistinct:
s.add("distinct ")
rs(n.sons[0], s, indent, "", "", ", ")
for i in 1 .. n.len-1: ra(n.sons[i], s, indent)
s.add(';')
of nkSelectColumns:
assert(false)
of nkAsgn:
ra(n.sons[0], s, indent)
s.add(" = ")
ra(n.sons[1], s, indent)
of nkFrom:
s.add("\nfrom ")
rs(n, s, indent, "", "", ", ")
of nkGroup:
s.add("\ngroup by")
rs(n, s, indent, "", "", ", ")
of nkHaving:
s.add("\nhaving")
rs(n, s, indent, "", "", ", ")
of nkOrder:
s.add("\norder by ")
rs(n, s, indent, "", "", ", ")
of nkDesc:
ra(n.sons[0], s, indent)
s.add(" desc")
of nkUnion:
s.add(" union")
of nkIntersect:
s.add(" intersect")
of nkExcept:
s.add(" except")
of nkColumnList:
rs(n, s, indent)
of nkValueList:
s.add("values ")
rs(n, s, indent)
of nkWhere:
s.add("\nwhere ")
ra(n.sons[0], s, indent)
of nkCreateTable, nkCreateTableIfNotExists:
s.add("create table ")
if n.kind == nkCreateTableIfNotExists:
s.add("if not exists ")
ra(n.sons[0], s, indent)
s.add('(')
for i in 1..n.len-1:
if i > 1: s.add(", ")
ra(n.sons[i], s, indent)
s.add(");")
of nkCreateType, nkCreateTypeIfNotExists:
s.add("create type ")
if n.kind == nkCreateTypeIfNotExists:
s.add("if not exists ")
ra(n.sons[0], s, indent)
s.add(" as ")
ra(n.sons[1], s, indent)
s.add(';')
of nkCreateIndex, nkCreateIndexIfNotExists:
s.add("create index ")
if n.kind == nkCreateIndexIfNotExists:
s.add("if not exists ")
ra(n.sons[0], s, indent)
s.add(" on ")
ra(n.sons[1], s, indent)
s.add('(')
for i in 2..n.len-1:
if i > 2: s.add(", ")
ra(n.sons[i], s, indent)
s.add(");")
of nkEnumDef:
s.add("enum ")
rs(n, s, indent)
# What I want:
#
#select(columns = [T1.all, T2.name],
# fromm = [T1, T2],
# where = T1.name ==. T2.name,
# orderby = [name]):
#
#for row in dbQuery(db, """select x, y, z
# from a, b
# where a.name = b.name"""):
#
#select x, y, z:
# fromm: Table1, Table2
# where: x.name == y.name
#db.select(fromm = [t1, t2], where = t1.name == t2.name):
#for x, y, z in db.select(fromm = a, b where = a.name == b.name):
# writeln x, y, z
proc renderSQL*(n: PSqlNode): string =
## Converts an SQL abstract syntax tree to its string representation.
result = ""
ra(n, result, 0)
when isMainModule:
echo(renderSQL(parseSQL(newStringStream("""
CREATE TYPE happiness AS ENUM ('happy', 'very happy', 'ecstatic');
CREATE TABLE holidays (
num_weeks int,
happiness happiness
);
CREATE INDEX table1_attr1 ON table1(attr1);
SELECT * FROM myTab WHERE col1 = 'happy';
"""), "stdin")))
# CREATE TYPE happiness AS ENUM ('happy', 'very happy', 'ecstatic');
# CREATE TABLE holidays (
# num_weeks int,
# happiness happiness
# );
# CREATE INDEX table1_attr1 ON table1(attr1)