https://github.com/akkartik/mu/blob/main/shell/parse.mu
  1 fn parse-input tokens: (addr stream token), out: (addr handle cell), trace: (addr trace) {
  2   rewind-stream tokens
  3   var empty?/eax: boolean <- stream-empty? tokens
  4   compare empty?, 0/false
  5   {
  6     break-if-=
  7     error trace, "nothing to parse"
  8     return
  9   }
 10   var close-paren?/eax: boolean <- copy 0/false
 11   var dot?/ecx: boolean <- copy 0/false
 12   close-paren?, dot? <- parse-sexpression tokens, out, trace
 13   
discard """
  output: '''true'''
"""

# Just check that we can parse 'somesql' and render it without crashes.

import parsesql, streams, os

var tree = parseSql(newFileStream(parentDir(currentSourcePath) / "somesql.sql"), "somesql")
discard renderSql(tree)

echo "true"
e-higher trace 66 return close-paren?, dot? 67 } 68 # backquote quote -> parse as list with a special car 69 var backquote-token?/eax: boolean <- backquote-token? curr-token 70 compare backquote-token?, 0/false 71 { 72 break-if-= 73 var out/edi: (addr handle cell) <- copy _out 74 allocate-pair out 75 var out-addr/eax: (addr cell) <- lookup *out 76 var left-ah/edx: (addr handle cell) <- get out-addr, left 77 new-symbol left-ah, "`" 78 var right-ah/edx: (addr handle cell) <- get out-addr, right 79 var close-paren?/eax: boolean <- copy 0/false 80 var dot?/ecx: boolean <- copy 0/false 81 close-paren?, dot? <- parse-sexpression tokens, right-ah, trace 82 trace-higher trace 83 return close-paren?, dot? 84 } 85 # unquote -> parse as list with a special car 86 var unquote-token?/eax: boolean <- unquote-token? curr-token 87 compare unquote-token?, 0/false 88 { 89 break-if-= 90 var out/edi: (addr handle cell) <- copy _out 91 allocate-pair out 92 var out-addr/eax: (addr cell) <- lookup *out 93 var left-ah/edx: (addr handle cell) <- get out-addr, left 94 new-symbol left-ah, "," 95 var right-ah/edx: (addr handle cell) <- get out-addr, right 96 var close-paren?/eax: boolean <- copy 0/false 97 var dot?/ecx: boolean <- copy 0/false 98 close-paren?, dot? <- parse-sexpression tokens, right-ah, trace 99 trace-higher trace 100 return close-paren?, dot? 101 } 102 # unquote-splice -> parse as list with a special car 103 var unquote-splice-token?/eax: boolean <- unquote-splice-token? curr-token 104 compare unquote-splice-token?, 0/false 105 { 106 break-if-= 107 var out/edi: (addr handle cell) <- copy _out 108 allocate-pair out 109 var out-addr/eax: (addr cell) <- lookup *out 110 var left-ah/edx: (addr handle cell) <- get out-addr, left 111 new-symbol left-ah, ",@" 112 var right-ah/edx: (addr handle cell) <- get out-addr, right 113 var close-paren?/eax: boolean <- copy 0/false 114 var dot?/ecx: boolean <- copy 0/false 115 close-paren?, dot? <- parse-sexpression tokens, right-ah, trace 116 trace-higher trace 117 return close-paren?, dot? 118 } 119 # dot -> return 120 var dot?/eax: boolean <- dot-token? curr-token 121 compare dot?, 0/false 122 { 123 break-if-= 124 trace-higher trace 125 return 0/false, 1/true 126 } 127 # not bracket -> parse atom 128 var bracket-token?/eax: boolean <- bracket-token? curr-token 129 compare bracket-token?, 0/false 130 { 131 break-if-!= 132 parse-atom curr-token, _out, trace 133 break $parse-sexpression:type-check 134 } 135 # open paren -> parse list 136 var open-paren?/eax: boolean <- open-paren-token? curr-token 137 compare open-paren?, 0/false 138 { 139 break-if-= 140 var curr/esi: (addr handle cell) <- copy _out 141 allocate-pair curr 142 var curr-addr/eax: (addr cell) <- lookup *curr 143 var left/edx: (addr handle cell) <- get curr-addr, left 144 { 145 var close-paren?/eax: boolean <- copy 0/false 146 var dot?/ecx: boolean <- copy 0/false 147 close-paren?, dot? <- parse-sexpression tokens, left, trace 148 { 149 compare dot?, 0/false 150 break-if-= 151 error trace, "'.' cannot be at the start of a list" 152 return 1/true, dot? 153 } 154 compare close-paren?, 0/false 155 break-if-!= 156 var curr-addr/eax: (addr cell) <- lookup *curr 157 curr <- get curr-addr, right 158 var tmp-storage: (handle cell) 159 var tmp/edx: (addr handle cell) <- address tmp-storage 160 $parse-sexpression:list-loop: { 161 var close-paren?/eax: boolean <- copy 0/false 162 var dot?/ecx: boolean <- copy 0/false 163 close-paren?, dot? <- parse-sexpression tokens, tmp, trace 164 # '.' -> clean up right here and return 165 compare dot?, 0/false 166 { 167 break-if-= 168 parse-dot-tail tokens, curr, trace 169 return 0/false, 0/false 170 } 171 allocate-pair curr 172 # ')' -> return 173 compare close-paren?, 0/false 174 break-if-!= 175 var curr-addr/eax: (addr cell) <- lookup *curr 176 var left/ecx: (addr handle cell) <- get curr-addr, left 177 copy-object tmp, left 178 # 179 curr <- get curr-addr, right 180 loop 181 } 182 } 183 break $parse-sexpression:type-check 184 } 185 # close paren -> return 186 var close-paren?/eax: boolean <- close-paren-token? curr-token 187 compare close-paren?, 0/false 188 { 189 break-if-= 190 trace-higher trace 191 return 1/true, 0/false 192 } 193 # otherwise abort 194 var stream-storage: (stream byte 0x400) 195 var stream/edx: (addr stream byte) <- address stream-storage 196 write stream, "unexpected token " 197 var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data 198 var curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah 199 rewind-stream curr-token-data 200 write-stream stream, curr-token-data 201 error-stream trace, stream 202 } 203 trace-higher trace 204 return 0/false, 0/false 205 } 206 207 fn parse-atom _curr-token: (addr token), _out: (addr handle cell), trace: (addr trace) { 208 trace-text trace, "parse", "parse atom" 209 var curr-token/ecx: (addr token) <- copy _curr-token 210 var curr-token-data-ah/eax: (addr handle stream byte) <- get curr-token, text-data 211 var _curr-token-data/eax: (addr stream byte) <- lookup *curr-token-data-ah 212 var curr-token-data/esi: (addr stream byte) <- copy _curr-token-data 213 trace trace, "parse", curr-token-data 214 # number 215 var number-token?/eax: boolean <- number-token? curr-token 216 compare number-token?, 0/false 217 { 218 break-if-= 219 rewind-stream curr-token-data 220 var _val/eax: int <- parse-decimal-int-from-stream curr-token-data 221 var val/ecx: int <- copy _val 222 var val-float/xmm0: float <- convert val 223 allocate-number _out 224 var out/eax: (addr handle cell) <- copy _out 225 var out-addr/eax: (addr cell) <- lookup *out 226 var dest/edi: (addr float) <- get out-addr, number-data 227 copy-to *dest, val-float 228 { 229 { 230 var should-trace?/eax: boolean <- should-trace? trace 231 compare should-trace?, 0/false 232 } 233 break-if-= 234 var stream-storage: (stream byte 0x400) 235 var stream/ecx: (addr stream byte) <- address stream-storage 236 write stream, "=> number " 237 var nested-trace-storage: trace 238 var nested-trace/edi: (addr trace) <- address nested-trace-storage 239 initialize-trace nested-trace, 1/only-errors, 0x10/capacity, 0/visible 240 print-number out-addr, stream, nested-trace 241 trace trace, "parse", stream 242 } 243 return 244 } 245 # default: copy either to a symbol or a stream 246 # stream token -> literal 247 var stream-token?/eax: boolean <- stream-token? curr-token 248 compare stream-token?, 0/false 249 { 250 break-if-= 251 allocate-stream _out 252 } 253 compare stream-token?, 0/false 254 { 255 break-if-!= 256 allocate-symbol _out 257 } 258 # copy token data 259 var out/eax: (addr handle cell) <- copy _out 260 var out-addr/eax: (addr cell) <- lookup *out 261 var curr-token-data-ah/ecx: (addr handle stream byte) <- get curr-token, text-data 262 var dest-ah/edx: (addr handle stream byte) <- get out-addr, text-data 263 copy-object curr-token-data-ah, dest-ah 264 { 265 { 266 var should-trace?/eax: boolean <- should-trace? trace 267 compare should-trace?, 0/false 268 } 269 break-if-= 270 var stream-storage: (stream byte 0x40000) 271 var stream/ecx: (addr stream byte) <- address stream-storage 272 write stream, "=> symbol " 273 var nested-trace-storage: trace 274 var nested-trace/edi: (addr trace) <- address nested-trace-storage 275 initialize-trace nested-trace, 1/only-errors, 0x10/capacity, 0/visible 276 print-symbol out-addr, stream, nested-trace 277 trace trace, "parse", stream 278 } 279 } 280 281 fn parse-dot-tail tokens: (addr stream token), _out: (addr handle cell), trace: (addr trace) { 282 var out/edi: (addr handle cell) <- copy _out 283 var close-paren?/eax: boolean <- copy 0/false 284 var dot?/ecx: boolean <- copy 0/false 285 close-paren?, dot? <- parse-sexpression tokens, out, trace 286 compare close-paren?, 0/false 287 { 288 break-if-= 289 error trace, "'. )' makes no sense" 290 return 291 } 292 compare dot?, 0/false 293 { 294 break-if-= 295 error trace, "'. .' makes no sense" 296 return 297 } 298 # 299 var dummy: (handle cell) 300 var dummy-ah/edi: (addr handle cell) <- address dummy 301 close-paren?, dot? <- parse-sexpression tokens, dummy-ah, trace 302 compare close-paren?, 0/false 303 { 304 break-if-!= 305 error trace, "cannot have multiple expressions between '.' and ')'" 306 return 307 } 308 compare dot?, 0/false 309 { 310 break-if-= 311 error trace, "cannot have two dots in a single list" 312 return 313 } 314 }