https://github.com/akkartik/mu/blob/master/apps/mulisp.subx
  1 # Toy lisp interpreter. Incomplete.
  2 #
  3 # To run:
  4 #   $ ./translate_subx init.linux 0*.subx apps/mulisp.subx
  5 #   $ ./a.elf
  6 #   42
  7 #   => 42
  8 #   ^D
  9 #   $
 10 
 11 == code
 12 
 13 Entry:  # run tests if necessary, a REPL if not
 14     # . prologue
 15     89/<- %ebp 4/r32/esp
 16     # initialize heap
 17     (new-segment *Heap-size Heap)
 18     {
 19       # if (argc <= 1) break
 20       81 7/subop/compare *ebp 1/imm32
 21       7e/jump-if-<= break/disp8
 22       # if (argv[1] != "test")) break
 23       (kernel-string-equal? *(ebp+8) "test")  # => eax
 24       3d/compare-eax-and 0/imm32/false
 25       74/jump-if-= break/disp8
 26       #
 27       (run-tests)
 28       # syscall(exit, *Num-test-failures)
 29       8b/-> *Num-test-failures 3/r32/ebx
 30       eb/jump $main:end/disp8
 31     }
 32     (repl Stdin Stdout)
 33     # syscall(exit, 0)
 34     bb/copy-to-ebx 0/imm32
 35 $main:end:
 36     e8/call  syscall_exit/disp32
 37 
 38 # Data structures
 39 #
 40 # Lisp is dynamically typed. Values always carry around knowledge of their
 41 # type.
 42 #
 43 # There's several types of types in the description below, so we need a
 44 # glossary and notational convention to disambiguate:
 45 #   lisp type: what Lisp code can see. Looks how you type it at the prompt.
 46 #     nil num char string symbol pair array
 47 #   type tag: the numeric code for a lisp type. All caps.
 48 #     NIL NUM CHAR STRING SYMBOL PAIR ARRAY
 49 #   memory type: a type specifying memory layout at the SubX level. Starts
 50 #   with a '$'.
 51 #     $int $array $(addr _)
 52 #
 53 # Lisp values are represented in memory by the _cell_ data structure. A cell
 54 # is 12 bytes long:
 55 #   tag: $int (4 bytes; we're not concerned about wasting space)
 56 #   data: 8 bytes whose contents and meaning depend on tag
 57 #
 58 # What values of the different Lisp types look like in memory:
 59 #   - nil: cell{ tag: 0/NIL, data: 0 0 }
 60 #   - num: cell{ tag: 1/NUM, data: $int 0 }
 61 #     data contains the number
 62 #   - char: cell{ tag: 2/CHAR, data: $int 0 }
 63 #     data contains the utf-8 code of the character (no compound glyphs, no
 64 #     modifiers, etc., etc.)
 65 #   - string: cell{ tag: 3/STRING, data: $(addr stream byte)
 66 #     data contains an (addr array byte) containing the string in utf-8
 67 #   - symbol: cell{ tag: 4/SYMBOL, data: $(addr array byte) 0 }
 68 #     data contains an (addr array byte) containing the name of the symbol in utf-8
 69 #     alternatively, data could contain an index into the table of interned symbols
 70 #   - pair: cell{ tag: 5/PAIR, data: $(addr cell) $(addr cell)  }
 71 #     data contains pointers to car and cdr
 72 #   - array: cell{ tag: 6/ARRAY, data: $tag $(addr stream data)
 73 #     data contains a pointer to an array of 8-byte data fields and the common
 74 #     tag for them all
 75 
 76 repl:  # in: (addr buffered-file), out: (addr buffered-file)
 77     # . prologue
 78     55/push-ebp
 79     89/<- %ebp 4/r32/esp
 80     # . save registers
 81     50/push-eax
 82     {
 83       (lisp-read Stdin)  # => eax: (handle cell)
 84       # if (eax == 0) break
 85       3d/compare-eax-and 0/imm32
 86       74/jump-if-= break/disp8
 87       #
 88       (lisp-eval %eax)  # => eax: (handle cell)
 89       (lisp-print Stdout %eax)
 90       eb/jump loop/disp8
 91     }
 92 $repl:end:
 93     # . restore registers
 94     58/pop-to-eax
 95     # . epilogue
 96     89/<- %esp 5/r32/ebp
 97     5d/pop-to-ebp
 98     c3/return
 99 
100 # numbers start with a digit and are always in hex
101 # characters start with a backslash
102 # pairs start with '('
103 # arrays start with '['
104 # symbols start with anything else but quote, backquote, unquote or splice
105 # only one s-expression per line
106 lisp-read:  # in: (addr buffered-file) -> eax: (handle cell)
107     # . prologue
108     55/push-ebp
109     89/<- %ebp 4/r32/esp
110     # . save registers
111     51/push-ecx
112     # var s/ecx: (stream byte 512)
113     81 5/subop/subtract %esp 0x200/imm32
114     68/push 0x200/imm32/size
115     68/push 0/imm32/read
116     68/push 0/imm32/write
117     89/<- %ecx 4/r32/esp
118     {
119       # read line into s
120       (clear-stream %ecx)
121       (read-line-buffered *(ebp+8) %ecx)
122       # if (s->write == 0) return null
123       {
124         81 7/subop/compare *ecx 0/imm32
125         75/jump-if-!= break/disp8
126         b8/copy-to-eax 0/imm32/eof
127         eb/jump $lisp-read:end/disp8
128       }
129       # ...
130 #?       eb/jump loop/disp8
131     }
132     # return s
133     89/<- %eax 1/r32/ecx
134 $lisp-read:end:
135     # . reclaim locals
136     81 0/subop/add %esp 0x20c/imm32
137     # . restore registers
138     59/pop-to-ecx
139     # . epilogue
140     89/<- %esp 5/r32/ebp
141     5d/pop-to-ebp
142     c3/return
143 
144 # lisp-read:  in: (addr buffered-file) -> (handle cell)
145 #   token tmp = next-mulisp-token(in)
146 #   if is-int(tmp) return cell(tmp)
147 #   if is-string(tmp) return cell(tmp)
148 #   if is-pair(tmp) ...
149 #   if is-array(tmp) ...
150 
151 next-mulisp-token:  # in: (addr buffered-file), line: (addr stream byte), result: (addr slice)
152     # pseudocode:
153     #   if (line->read >= line->write)
154     #     read-line-buffered(in, line)
155     #     recurse
156     #   if (line->data[line->read] == ' ')
157     #     skip-chars-matching-whitespace(line)
158     #     recurse
159     #   if (line->data[line->read] == '#')
160     #     read-line-buffered(in, line)
161     #     recurse
162     #   eax = line->data[line->read]
163     #   if (eax == '"')
164     #     result->start = &line->data[line->read]
165     #     skip-string(in)
166     #     result->end = &line->data[line->read]
167     #     return
168     #   if (is-digit(eax))
169     #     result->start = &line->data[line->read]
170     #     skip-hex-int(in)
171     #     result->end = &line->data[line->read]
172     #     return
173     #   if (eax in '(' ')' '[' ']')
174     #     result->start = &line->data[line->read]
175     #     ++line->read
176     #     result->en = &line->data[line->read]
177     #     return
178     #   else
179     #     result->start = &line->data[line->read]
180     #     skip-lisp-word(line)
181     #     result->en = &line->data[line->read]
182     #     return
183     #
184     # . prologue
185     55/push-ebp
186     89/<- %ebp 4/r32/esp
187     # . save registers
188 $next-mulisp-token:end:
189     # . reclaim locals
190     # . restore registers
191     # . epilogue
192     89/<- %esp 5/r32/ebp
193     5d/pop-to-ebp
194     c3/return
195 
196 new-int-cell:  # in: (addr slice) -> eax: (handle cell)
197 
198 new-string-cell:  # in: (addr slice) -> eax: (handle cell)
199 
200 lisp-eval:  # in: (addr cell) -> eax: (handle cell)
201     # . prologue
202     55/push-ebp
203     89/<- %ebp 4/r32/esp
204     # . save registers
205     8b/-> *(ebp+8) 0/r32/eax
206 $lisp-eval:end:
207     # . restore registers
208     # . epilogue
209     89/<- %esp 5/r32/ebp
210     5d/pop-to-ebp
211     c3/return
212 
213 lisp-print:  # out: (addr buffered-file), x: (addr cell)
214     # . prologue
215     55/push-ebp
216     89/<- %ebp 4/r32/esp
217     # . save registers
218     # write(x)
219     (write-buffered Stdout "=> ")
220     (write-stream-data Stdout *(ebp+0xc))
221     (flush Stdout)
222 $lisp-print:end:
223     # . restore registers
224     # . epilogue
225     89/<- %esp 5/r32/ebp
226     5d/pop-to-ebp
227     c3/return
228 
229 == data
230 
231 Nil:
232   0/imm32/tag
233   0/imm32/data