https://github.com/akkartik/mu/blob/master/apps/mulisp.subx
  1 # Toy lisp interpreter. Incomplete.
  2 #
  3 # To run:
  4 #   $ ./ntranslate init.linux 0*.subx apps/subx-params.subx apps/mulisp.subx
  5 #   $ ./a.elf
  6 #   42
  7 #   => 42
  8 #   ^D
  9 #   $
 10 
 11 == code
 12 
 13 Entry:  # run tests if necessary, a REPL if not
 14     # . prolog
 15     89/<- %ebp 4/r32/esp
 16     # initialize heap
 17     (new-segment Heap-size Heap)
 18     # if (argc <= 1) goto run-main
 19     81 7/subop/compare *ebp 1/imm32
 20     7e/jump-if-lesser-or-equal $run-main/disp8
 21     # if (argv[1] != "test")) goto run-main
 22     (kernel-string-equal? *(ebp+8) "test")  # => eax
 23     3d/compare-eax-and 0/imm32
 24     74/jump-if-equal $run-main/disp8
 25     #
 26     (run-tests)
 27     # syscall(exit, *Num-test-failures)
 28     8b/-> *Num-test-failures 3/r32/ebx
 29     eb/jump $main:end/disp8
 30 $run-main:
 31     (repl Stdin Stdout)
 32     # syscall(exit, 0)
 33     bb/copy-to-ebx 0/imm32
 34 $main:end:
 35     b8/copy-to-eax 1/imm32/exit
 36     cd/syscall 0x80/imm8
 37 
 38 # Data structures
 39 #
 40 # Lisp is dynamically typed. Values always carry around knowledge of their
 41 # type.
 42 #
 43 # There's several types of types in the description below, so we need a
 44 # glossary and notational convention to disambiguate:
 45 #   lisp type: what Lisp code can see. Looks how you type it at the prompt.
 46 #     nil num char string symbol pair array
 47 #   type tag: the numeric code for a lisp type. All caps.
 48 #     NIL NUM CHAR STRING SYMBOL PAIR ARRAY
 49 #   memory type: a type specifying memory layout at the SubX level. Starts
 50 #   with a '$'.
 51 #     $int $array $(address _)
 52 #
 53 # Lisp values are represented in memory by the _cell_ data structure. A cell
 54 # is 12 bytes long:
 55 #   tag: $int (4 bytes; we're not concerned about wasting space)
 56 #   data: 8 bytes whose contents and meaning depend on tag
 57 #
 58 # What values of the different Lisp types look like in memory:
 59 #   - nil: cell{ tag: 0/NIL, data: 0 0 }
 60 #   - num: cell{ tag: 1/NUM, data: $int 0 }
 61 #     data contains the number
 62 #   - char: cell{ tag: 2/CHAR, data: $int 0 }
 63 #     data contains the utf-8 code of the character (no compound glyphs, no
 64 #     modifiers, etc., etc.)
 65 #   - string: cell{ tag: 3/STRING, data: $(address stream byte)
 66 #     data contains an (address array byte) containing the string in utf-8
 67 #   - symbol: cell{ tag: 4/SYMBOL, data: $(address array byte) 0 }
 68 #     data contains an (address array byte) containing the name of the symbol in utf-8
 69 #     alternatively, data could contain an index into the table of interned symbols
 70 #   - pair: cell{ tag: 5/PAIR, data: $(address cell) $(address cell)  }
 71 #     data contains pointers to car and cdr
 72 #   - array: cell{ tag: 6/ARRAY, data: $tag $(address stream data)
 73 #     data contains a pointer to an array of 8-byte data fields and the common
 74 #     tag for them all
 75 
 76 repl:  # in : (address buffered-file), out : (address buffered-file) -> <void>
 77     # . prolog
 78     55/push-ebp
 79     89/<- %ebp 4/r32/esp
 80     # . save registers
 81     50/push-eax
 82 $repl:loop:
 83     (lisp-read Stdin)  # => eax : (address cell)
 84     # if (eax == 0) break
 85     3d/compare-eax-and 0/imm32
 86     74/jump-if-equal $repl:end/disp8
 87     #
 88     (lisp-eval %eax)  # => eax : (address cell)
 89     (lisp-print Stdout %eax)
 90     # loop
 91     eb/jump $repl:loop/disp8
 92 $repl:end:
 93     # . restore registers
 94     58/pop-to-eax
 95     # . epilog
 96     89/<- %esp 5/r32/ebp
 97     5d/pop-to-ebp
 98     c3/return
 99 
100 # numbers start with a digit and are always in hex
101 # characters start with a backslash
102 # pairs start with '('
103 # arrays start with '['
104 # symbols start with anything else but quote, backquote, unquote or splice
105 # only one s-expression per line
106 lisp-read:  # in : (address buffered-file) -> eax : (address cell)
107     # . prolog
108     55/push-ebp
109     89/<- %ebp 4/r32/esp
110     # . save registers
111     51/push-ecx
112     # var s/ecx : (address stream) = new-stream(512)
113     81 5/subop/subtract %esp 0x200/imm32
114     68/push 0x200/imm32/size
115     68/push 0/imm32/read
116     68/push 0/imm32/write
117     89/<- %ecx 4/r32/esp
118 $lisp-read:loop:
119     # read line into s
120     (clear-stream %ecx)
121     (read-line-buffered *(ebp+8) %ecx)
122     # if (s->write == 0) return null
123     81 7/subop/compare *ecx 0/imm32
124     75/jump-if-not-equal $lisp-read:loop/disp8
125     b8/copy-to-eax 0/imm32/eof
126     eb/jump $lisp-read:end/disp8
127     # return s
128     89/<- %eax 1/r32/ecx
129 $lisp-read:end:
130     # . reclaim locals
131     81 0/subop/add %esp 0x20c/imm32
132     # . restore registers
133     59/pop-to-ecx
134     # . epilog
135     89/<- %esp 5/r32/ebp
136     5d/pop-to-ebp
137     c3/return
138 
139 # lisp-read:  in : (address buffered-file) -> (address cell)
140 #   token tmp = next-token(in)
141 #   if is-int(tmp) return cell(tmp)
142 #   if is-string(tmp) return cell(tmp)
143 #   if is-pair(tmp) ...
144 #   if is-array(tmp) ...
145 
146 next-token:  # in : (address buffered-file), line : (address stream), result : (address slice)
147     # pseudocode:
148     #   if (line->read >= line->write)
149     #     read-line-buffered(in, line)
150     #     recurse
151     #   if (line->data[line->read] == ' ')
152     #     skip-chars-matching-whitespace(line)
153     #     recurse
154     #   if (line->data[line->read] == '#')
155     #     read-line-buffered(in, line)
156     #     recurse
157     #   eax = line->data[line->read]
158     #   if (eax == '"')
159     #     result->start = &line->data[line->read]
160     #     skip-string(in)
161     #     result->end = &line->data[line->read]
162     #     return
163     #   if (is-digit(eax))
164     #     result->start = &line->data[line->read]
165     #     skip-hex-int(in)
166     #     result->end = &line->data[line->read]
167     #     return
168     #   if (eax in '(' ')' '[' ']')
169     #     result->start = &line->data[line->read]
170     #     ++line->read
171     #     result->en = &line->data[line->read]
172     #     return
173     #   else
174     #     result->start = &line->data[line->read]
175     #     skip-lisp-word(line)
176     #     result->en = &line->data[line->read]
177     #     return
178     #
179     # . prolog
180     55/push-ebp
181     89/<- %ebp 4/r32/esp
182     # . save registers
183 $next-token:end:
184     # . reclaim locals
185     # . restore registers
186     # . epilog
187     89/<- %esp 5/r32/ebp
188     5d/pop-to-ebp
189     c3/return
190 
191 new-int-cell:  # in : (address slice) -> eax : (address cell)
192 
193 new-string-cell:  # in : (address slice) -> eax : (address cell)
194 
195 lisp-eval:  # in : (address cell) -> eax : (address cell)
196     # . prolog
197     55/push-ebp
198     89/<- %ebp 4/r32/esp
199     # . save registers
200     8b/-> *(ebp+8) 0/r32/eax
201 $lisp-eval:end:
202     # . restore registers
203     # . epilog
204     89/<- %esp 5/r32/ebp
205     5d/pop-to-ebp
206     c3/return
207 
208 lisp-print:  # out : (address buffered-file), x : (address cell)
209     # . prolog
210     55/push-ebp
211     89/<- %ebp 4/r32/esp
212     # . save registers
213     # write(x)
214     (write-buffered Stdout "=> ")
215     (write-stream-data Stdout *(ebp+0xc))
216     (flush Stdout)
217 $lisp-print:end:
218     # . restore registers
219     # . epilog
220     89/<- %esp 5/r32/ebp
221     5d/pop-to-ebp
222     c3/return
223 
224 == data
225 
226 Nil:
227   0/imm32/tag
228   0/imm32/data