# Toy lisp interpreter # # To run: # $ ./ntranslate init.linux 0*.subx apps/subx-common.subx apps/mulisp.subx # $ ./a.elf # 42 # => 42 # ^D # $ == code Entry: # run tests if necessary, a REPL if not # . prolog 89/<- %ebp 4/r32/esp # initialize heap (new-segment Heap-size Heap) # if (argc <= 1) goto run-main 81 7/subop/compare *ebp 1/imm32 7e/jump-if-lesser-or-equal $run-main/disp8 # if (argv[1] != "test")) goto run-main (kernel-string-equal? *(ebp+8) "test") # => eax 3d/compare-eax-and 0/imm32 74/jump-if-equal $run-main/disp8 # (run-tests) # syscall(exit, *Num-test-failures) 8b/-> *Num-test-failures 3/r32/ebx eb/jump $main:end/disp8 $run-main: (repl Stdin Stdout) # syscall(exit, 0) bb/copy-to-ebx 0/imm32 $main:end: b8/copy-to-eax 1/imm32/exit cd/syscall 0x80/imm8 # Data structures # # Lisp is dynamically typed. Values always carry around knowledge of their # type. # # There's several types of types in the description below, so we need a # glossary and notational convention to disambiguate: # lisp type: what Lisp code can see. Looks how you type it at the prompt. # nil num char string symbol pair array # type tag: the numeric code for a lisp type. All caps. # NIL NUM CHAR STRING SYMBOL PAIR ARRAY # memory type: a type specifying memory layout at the SubX level. Starts # with a '$'. # $int $array $(address _) # # Lisp values are represented in memory by the _cell_ data structure. A cell # is 12 bytes long: # tag: $int (4 bytes; we're not concerned about wasting space) # data: 8 bytes whose contents and meaning depend on tag # # What values of the different Lisp types look like in memory: # - nil: cell{ tag: 0/NIL, data: 0 0 } # - num: cell{ tag: 1/NUM, data: $int 0 } # data contains the number # - char: cell{ tag: 2/CHAR, data: $int 0 } # data contains the utf-8 code of the character (no compound glyphs, no # modifiers, etc., etc.) # - string: cell{ tag: 3/STRING, data: $(address stream byte) # data contains an (address array byte) containing the string in utf-8 # - symbol: cell{ tag: 4/SYMBOL, data: $(address array byte) 0 } # data contains an (address array byte) containing the name of the symbol in utf-8 # alternatively, data could contain an index into the table of interned symbols # - pair: cell{ tag: 5/PAIR, data: $(address cell) $(address cell) } # data contains pointers to car and cdr # - array: cell{ tag: 6/ARRAY, data: $tag $(address stream data) # data contains a pointer to an array of 8-byte data fields and the common # tag for them all repl: # in : (address buffered-file), out : (address buffered-file) -> # . prolog 55/push-ebp 89/<- %ebp 4/r32/esp # . save registers 50/push-eax $repl:loop: (lisp-read Stdin) # => eax : (address cell) # if (eax == 0) break 3d/compare-eax-and 0/imm32 74/jump-if-equal $repl:end/disp8 # (lisp-eval %eax) # => eax : (address cell) (lisp-print Stdout %eax) # loop eb/jump $repl:loop/disp8 $repl:end: # . restore registers 58/pop-to-eax # . epilog 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return # numbers start with a digit and are always in hex # characters start with a backslash # pairs start with '(' # arrays start with '[' # symbols start with anything else but quote, backquote, unquote or splice # only one s-expression per line lisp-read: # in : (address buffered-file) -> eax : (address cell) # . prolog 55/push-ebp 89/<- %ebp 4/r32/esp # . save registers 51/push-ecx # var s/ecx : (address stream) = new-stream(512) 81 5/subop/subtract %esp 0x200/imm32 68/push 0x200/imm32/size 68/push 0/imm32/read 68/push 0/imm32/write 89/<- %ecx 4/r32/esp $lisp-read:loop: # read line into s (clear-stream %ecx) (read-line-buffered *(ebp+8) %ecx) # if (s->write == 0) return null 81 7/subop/compare *ecx 0/imm32 75/jump-if-not-equal $lisp-read:loop/disp8 b8/copy-to-eax 0/imm32/eof eb/jump $lisp-read:end/disp8 # return s 89/<- %eax 1/r32/ecx $lisp-read:end: # . reclaim locals 81 0/subop/add %esp 0x20c/imm32 # . restore registers 59/pop-to-ecx # . epilog 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return # lisp-read: in : (address buffered-file) -> (address cell) # token tmp = next-token(in) # if is-int(tmp) return cell(tmp) # if is-string(tmp) return cell(tmp) # if is-pair(tmp) ... # if is-array(tmp) ... next-token: # in : (address buffered-file), line : (address stream), result : (address slice) # pseudocode: # if (line->read >= line->write) # read-line-buffered(in, line) # recurse # if (line->data[line->read] == ' ') # skip-chars-matching-whitespace(line) # recurse # if (line->data[line->read] == '#') # read-line-buffered(in, line) # recurse # eax = line->data[line->read] # if (eax == '"') # result->start = &line->data[line->read] # skip-string(in) # result->end = &line->data[line->read] # return # if (is-digit(eax)) # result->start = &line->data[line->read] # skip-hex-int(in) # result->end = &line->data[line->read] # return # if (eax in '(' ')' '[' ']') # result->start = &line->data[line->read] # ++line->read # result->en = &line->data[line->read] # return # else # result->start = &line->data[line->read] # skip-lisp-word(line) # result->en = &line->data[line->read] # return # # . prolog 55/push-ebp 89/<- %ebp 4/r32/esp # . save registers $next-token:end: # . reclaim locals # . restore registers # . epilog 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return new-int-cell: # in : (address slice) -> eax : (address cell) new-string-cell: # in : (address slice) -> eax : (address cell) lisp-eval: # in : (address cell) -> eax : (address cell) # . prolog 55/push-ebp 89/<- %ebp 4/r32/esp # . save registers 8b/-> *(ebp+8) 0/r32/eax $lisp-eval:end: # . restore registers # . epilog 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return lisp-print: # out : (address buffered-file), x : (address cell) # . prolog 55/push-ebp 89/<- %ebp 4/r32/esp # . save registers # write(x) (write-buffered Stdout "=> ") (write-stream-data Stdout *(ebp+0xc)) (flush Stdout) $lisp-print:end: # . restore registers # . epilog 89/<- %esp 5/r32/ebp 5d/pop-to-ebp c3/return == data Nil: 0/imm32/tag 0/imm32/data