https://github.com/akkartik/mu/blob/master/apps/mulisp.subx
  1 # Toy lisp interpreter. Incomplete.
  2 #
  3 # To run:
  4 #   $ ./translate_subx init.linux 0*.subx apps/mulisp.subx
  5 #   $ ./a.elf
  6 #   42
  7 #   => 42
  8 #   ^D
  9 #   $
 10 
 11 == code
 12 
 13 Entry:  # run tests if necessary, a REPL if not
 14     # . prologue
 15     89/<- %ebp 4/r32/esp
 16     # initialize heap
 17     (new-segment *Heap-size Heap)
 18     {
 19       # if (argc <= 1) break
 20       81 7/subop/compare *ebp 1/imm32
 21       7e/jump-if-<= break/disp8
 22       # if (argv[1] != "test")) break
 23       (kernel-string-equal? *(ebp+8) "test")  # => eax
 24       3d/compare-eax-and 0/imm32
 25       74/jump-if-= break/disp8
 26       #
 27       (run-tests)
 28       # syscall(exit, *Num-test-failures)
 29       8b/-> *Num-test-failures 3/r32/ebx
 30       eb/jump $main:end/disp8
 31     }
 32     (repl Stdin Stdout)
 33     # syscall(exit, 0)
 34     bb/copy-to-ebx 0/imm32
 35 $main:end:
 36     b8/copy-to-eax 1/imm32/exit
 37     cd/syscall 0x80/imm8
 38 
 39 # Data structures
 40 #
 41 # Lisp is dynamically typed. Values always carry around knowledge of their
 42 # type.
 43 #
 44 # There's several types of types in the description below, so we need a
 45 # glossary and notational convention to disambiguate:
 46 #   lisp type: what Lisp code can see. Looks how you type it at the prompt.
 47 #     nil num char string symbol pair array
 48 #   type tag: the numeric code for a lisp type. All caps.
 49 #     NIL NUM CHAR STRING SYMBOL PAIR ARRAY
 50 #   memory type: a type specifying memory layout at the SubX level. Starts
 51 #   with a '$'.
 52 #     $int $array $(addr _)
 53 #
 54 # Lisp values are represented in memory by the _cell_ data structure. A cell
 55 # is 12 bytes long:
 56 #   tag: $int (4 bytes; we're not concerned about wasting space)
 57 #   data: 8 bytes whose contents and meaning depend on tag
 58 #
 59 # What values of the different Lisp types look like in memory:
 60 #   - nil: cell{ tag: 0/NIL, data: 0 0 }
 61 #   - num: cell{ tag: 1/NUM, data: $int 0 }
 62 #     data contains the number
 63 #   - char: cell{ tag: 2/CHAR, data: $int 0 }
 64 #     data contains the utf-8 code of the character (no compound glyphs, no
 65 #     modifiers, etc., etc.)
 66 #   - string: cell{ tag: 3/STRING, data: $(addr stream byte)
 67 #     data contains an (addr array byte) containing the string in utf-8
 68 #   - symbol: cell{ tag: 4/SYMBOL, data: $(addr array byte) 0 }
 69 #     data contains an (addr array byte) containing the name of the symbol in utf-8
 70 #     alternatively, data could contain an index into the table of interned symbols
 71 #   - pair: cell{ tag: 5/PAIR, data: $(addr cell) $(addr cell)  }
 72 #     data contains pointers to car and cdr
 73 #   - array: cell{ tag: 6/ARRAY, data: $tag $(addr stream data)
 74 #     data contains a pointer to an array of 8-byte data fields and the common
 75 #     tag for them all
 76 
 77 repl:  # in : (addr buffered-file), out : (addr buffered-file)
 78     # . prologue
 79     55/push-ebp
 80     89/<- %ebp 4/r32/esp
 81     # . save registers
 82     50/push-eax
 83     {
 84       (lisp-read Stdin)  # => eax : (handle cell)
 85       # if (eax == 0) break
 86       3d/compare-eax-and 0/imm32
 87       74/jump-if-= break/disp8
 88       #
 89       (lisp-eval %eax)  # => eax : (handle cell)
 90       (lisp-print Stdout %eax)
 91       eb/jump loop/disp8
 92     }
 93 $repl:end:
 94     # . restore registers
 95     58/pop-to-eax
 96     # . epilogue
 97     89/<- %esp 5/r32/ebp
 98     5d/pop-to-ebp
 99     c3/return
100 
101 # numbers start with a digit and are always in hex
102 # characters start with a backslash
103 # pairs start with '('
104 # arrays start with '['
105 # symbols start with anything else but quote, backquote, unquote or splice
106 # only one s-expression per line
107 lisp-read:  # in : (addr buffered-file) -> eax : (handle cell)
108     # . prologue
109     55/push-ebp
110     89/<- %ebp 4/r32/esp
111     # . save registers
112     51/push-ecx
113     # var s/ecx : (stream byte 512)
114     81 5/subop/subtract %esp 0x200/imm32
115     68/push 0x200/imm32/size
116     68/push 0/imm32/read
117     68/push 0/imm32/write
118     89/<- %ecx 4/r32/esp
119     {
120       # read line into s
121       (clear-stream %ecx)
122       (read-line-buffered *(ebp+8) %ecx)
123       # if (s->write == 0) return null
124       {
125         81 7/subop/compare *ecx 0/imm32
126         75/jump-if-!= break/disp8
127         b8/copy-to-eax 0/imm32/eof
128         eb/jump $lisp-read:end/disp8
129       }
130       # ...
131 #?       eb/jump loop/disp8
132     }
133     # return s
134     89/<- %eax 1/r32/ecx
135 $lisp-read:end:
136     # . reclaim locals
137     81 0/subop/add %esp 0x20c/imm32
138     # . restore registers
139     59/pop-to-ecx
140     # . epilogue
141     89/<- %esp 5/r32/ebp
142     5d/pop-to-ebp
143     c3/return
144 
145 # lisp-read:  in : (addr buffered-file) -> (handle cell)
146 #   token tmp = next-mulisp-token(in)
147 #   if is-int(tmp) return cell(tmp)
148 #   if is-string(tmp) return cell(tmp)
149 #   if is-pair(tmp) ...
150 #   if is-array(tmp) ...
151 
152 next-mulisp-token:  # in : (addr buffered-file), line : (addr stream byte), result : (addr slice)
153     # pseudocode:
154     #   if (line->read >= line->write)
155     #     read-line-buffered(in, line)
156     #     recurse
157     #   if (line->data[line->read] == ' ')
158     #     skip-chars-matching-whitespace(line)
159     #     recurse
160     #   if (line->data[line->read] == '#')
161     #     read-line-buffered(in, line)
162     #     recurse
163     #   eax = line->data[line->read]
164     #   if (eax == '"')
165     #     result->start = &line->data[line->read]
166     #     skip-string(in)
167     #     result->end = &line->data[line->read]
168     #     return
169     #   if (is-digit(eax))
170     #     result->start = &line->data[line->read]
171     #     skip-hex-int(in)
172     #     result->end = &line->data[line->read]
173     #     return
174     #   if (eax in '(' ')' '[' ']')
175     #     result->start = &line->data[line->read]
176     #     ++line->read
177     #     result->en = &line->data[line->read]
178     #     return
179     #   else
180     #     result->start = &line->data[line->read]
181     #     skip-lisp-word(line)
182     #     result->en = &line->data[line->read]
183     #     return
184     #
185     # . prologue
186     55/push-ebp
187     89/<- %ebp 4/r32/esp
188     # . save registers
189 $next-mulisp-token:end:
190     # . reclaim locals
191     # . restore registers
192     # . epilogue
193     89/<- %esp 5/r32/ebp
194     5d/pop-to-ebp
195     c3/return
196 
197 new-int-cell:  # in : (addr slice) -> eax : (handle cell)
198 
199 new-string-cell:  # in : (addr slice) -> eax : (handle cell)
200 
201 lisp-eval:  # in : (addr cell) -> eax : (handle cell)
202     # . prologue
203     55/push-ebp
204     89/<- %ebp 4/r32/esp
205     # . save registers
206     8b/-> *(ebp+8) 0/r32/eax
207 $lisp-eval:end:
208     # . restore registers
209     # . epilogue
210     89/<- %esp 5/r32/ebp
211     5d/pop-to-ebp
212     c3/return
213 
214 lisp-print:  # out : (addr buffered-file), x : (addr cell)
215     # . prologue
216     55/push-ebp
217     89/<- %ebp 4/r32/esp
218     # . save registers
219     # write(x)
220     (write-buffered Stdout "=> ")
221     (write-stream-data Stdout *(ebp+0xc))
222     (flush Stdout)
223 $lisp-print:end:
224     # . restore registers
225     # . epilogue
226     89/<- %esp 5/r32/ebp
227     5d/pop-to-ebp
228     c3/return
229 
230 == data
231 
232 Nil:
233   0/imm32/tag
234   0/imm32/data