https://github.com/akkartik/mu/blob/main/apps/crenshaw2-1b.subx
  1 # Port of https://github.com/akkartik/crenshaw/blob/master/tutor2.1.pas
  2 # which corresponds to the section "single digits" in https://compilers.iecc.com/crenshaw/tutor2.txt
  3 # except that we support hex numbers of multiple digits.
  4 #
  5 # To run:
  6 #   $ ./bootstrap translate init.linux 0*.subx apps/crenshaw2-1b.subx -o apps/crenshaw2-1b
  7 #   $ echo '1a'  |./bootstrap run apps/crenshaw2-1b
  8 # Expected output:
  9 #   # syscall(exit, 1a)
 10 #   bb/copy-to-ebx  3/imm32
 11 #   b8/copy-to-eax  1/imm32/exit
 12 #   cd/syscall  0x80/imm8
 13 #
 14 # To run the generated output:
 15 #   $ echo '1a'  |./bootstrap run apps/crenshaw2-1b > z1.subx
 16 #   $ ./bootstrap translate init.linux z1.subx -o z1
 17 #   $ ./bootstrap run z1
 18 #   $ echo $?
 19 #   26  # 0x1a in decimal
 20 #
 21 # Stdin must contain just a single hex digit. Other input will print an error:
 22 #   $ echo 'xyz'  |./bootstrap run apps/crenshaw2-1b
 23 #   Error: integer expected
 24 #
 25 # Names in this file sometimes follow Crenshaw's original rather than my usual
 26 # naming conventions.
 27 
 28 == code
 29 #   instruction                     effective address                                                   register    displacement    immediate
 30 # . op          subop               mod             rm32          base        index         scale       r32
 31 # . 1-3 bytes   3 bits              2 bits          3 bits        3 bits      3 bits        2 bits      2 bits      0/1/2/4 bytes   0/1/2/4 bytes
 32 
 33 Entry:  # run tests if necessary, call 'compile' if not
 34     # . prologue
 35     89/copy                         3/mod/direct    5/rm32/ebp    .           .             .           4/r32/esp   .               .                 # copy esp to ebp
 36 
 37     # initialize heap
 38     # . Heap = new-segment(Heap-size)
 39     # . . push args
 40     68/push  Heap/imm32
 41     ff          6/subop/push        0/mod/indirect  5/rm32/.disp32            .             .           .           Heap-size/disp32                  # push *Heap-size
 42     # . . call
 43     e8/call  new-segment/disp32
 44     # . . discard args
 45     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               8/imm32           # add to esp
 46 
 47     # - if argc > 1 and argv[1] == "test", then return run_tests()
 48     # if (argc <= 1) goto run-main
 49     81          7/subop/compare     1/mod/*+disp8   5/rm32/ebp    .           .             .           .           0/disp8         1/imm32           # compare *ebp
 50     7e/jump-if-<=  $run-main/disp8
 51     # if (!kernel-string-equal?(argv[1], "test")) goto run-main
 52     # . eax = kernel-string-equal?(argv[1], "test")
 53     # . . push args
 54     68/push  "test"/imm32
 55     ff          6/subop/push        1/mod/*+disp8   5/rm32/ebp    .           .             .           .           8/disp8         .                 # push *(ebp+8)
 56     # . . call
 57     e8/call  kernel-string-equal?/disp32
 58     # . . discard args
 59     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               8/imm32           # add to esp
 60     # . if (eax == false) goto run-main
 61     3d/compare-eax-and  0/imm32/false
 62     74/jump-if-=  $run-main/disp8
 63     # run-tests()
 64     e8/call  run-tests/disp32
 65     # syscall(exit, *Num-test-failures)
 66     8b/copy                         0/mod/indirect  5/rm32/.disp32            .             .           3/r32/ebx   Num-test-failures/disp32          # copy *Num-test-failures to ebx
 67     eb/jump  $main:end/disp8
 68 $run-main:
 69     # - otherwise read a program from stdin and emit its translation to stdout
 70     # . compile(Stdin, 1/stdout, 2/stderr, 0)
 71     # . . push args
 72     68/push  0/imm32/exit-descriptor
 73     68/push  2/imm32/stderr
 74     68/push  1/imm32/stdout
 75     68/push  Stdin/imm32
 76     # . . call
 77     e8/call  compile/disp32
 78     # . . discard args
 79     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               0x10/imm32        # add to esp
 80     # syscall(exit, 0)
 81     bb/copy-to-ebx  0/imm32
 82 $main:end:
 83     e8/call  syscall_exit/disp32
 84 
 85 # the main entry point
 86 compile:  # in: (addr buffered-file), out: fd or (addr stream byte), err: fd or (addr stream byte), ed: (addr exit-descriptor)
 87     # . prologue
 88     55/push-ebp
 89     89/copy                         3/mod/direct    5/rm32/ebp    .           .             .           4/r32/esp   .               .                 # copy esp to ebp
 90     # . save registers
 91     50/push-eax
 92     51/push-ecx
 93     # prime the pump
 94     # . Look = get-char(in)
 95     # . . push args
 96     ff          6/subop/push        1/mod/*+disp8   5/rm32/ebp    .           .             .           .           8/disp8      .                    # push *(ebp+8)
 97     # . . call
 98     e8/call  get-char/disp32
 99     # . . discard args
100     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               4/imm32           # add to esp
101     # var num/ecx: (stream byte 7)
102     # Numbers can be 32 bits or 8 hex bytes long. One of them will be in 'Look', so we need space for 7 bytes.
103     # Sizing the stream just right buys us overflow-handling for free inside 'get-num'.
104     # Add 12 bytes for 'read', 'write' and 'size' fields, for a total of 19 bytes, or 0x13 in hex.
105     # The stack pointer is no longer aligned, so dump_stack() can be misleading past this point.
106     81          5/subop/subtract    3/mod/direct    4/rm32/esp    .           .             .           .           .               0x13/imm32        # subtract from esp
107     89/copy                         3/mod/direct    1/rm32/ecx    .           .             .           4/r32/esp   .               .                 # copy esp to ecx
108     # initialize the stream
109     # . num->size = 7
110     c7          0/subop/copy        1/mod/*+disp8   1/rm32/ecx    .           .             .           .           8/disp8         7/imm32           # copy to *(ecx+8)
111     # . clear-stream(num)
112     # . . push args
113     51/push-ecx
114     # . . call
115     e8/call  clear-stream/disp32
116     # . . discard args
117     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               4/imm32           # add to esp
118     # read a digit from 'in' into 'num'
119     # . get-num(in, num, err, ed)
120     # . . push args
121     ff          6/subop/push        1/mod/*+disp8   5/rm32/ebp    .           .             .           .           0x14/disp8      .                 # push *(ebp+20)
122     ff          6/subop/push        1/mod/*+disp8   5/rm32/ebp    .           .             .           .           0x10/disp8      .                 # push *(ebp+16)
123     51/push-ecx/num
124     ff          6/subop/push        1/mod/*+disp8   5/rm32/ebp    .           .             .           .           8/disp8      .                    # push *(ebp+8)
125     # . . call
126     e8/call  get-num/disp32
127     # . . discard args
128     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               0x10/imm32        # add to esp
129     # render 'num' into the following template on 'out':
130     #   bb/copy-to-ebx  _num_
131     #   b8/copy-to-eax  1/imm32/exit
132     #   cd/syscall  0x80/imm8
133     #
134     # . write(out, "bb/copy-to-ebx  ")
135     # . . push args
136     68/push  "bb/copy-to-ebx  "/imm32
137     ff          6/subop/push        1/mod/*+disp8   5/rm32/ebp    .           .             .           .           0xc/disp8       .                 # push *(ebp+12)
138     # . . call
139     e8/call  write/disp32
140     # . . discard args
141     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               8/imm32           # add to esp
142     # . write-stream(out, num)
143     # . . push args
144     51/push-ecx/num
145     ff          6/subop/push        1/mod/*+disp8   5/rm32/ebp    .           .             .           .           0xc/disp8       .                 # push *(ebp+12)
146     # . . call
147     e8/call  write-stream/disp32
148     # . . discard args
149     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               8/imm32           # add to esp
150     # . write(out, Newline)
151     # . . push args
152     68/push  Newline/imm32
153     ff          6/subop/push        1/mod/*+disp8   5/rm32/ebp    .           .             .           .           0xc/disp8       .                 # push *(ebp+12)
154     # . . call
155     e8/call  write/disp32
156     # . . discard args
157     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               8/imm32           # add to esp
158     # . write(out, "b8/copy-to-eax  1/imm32/exit\n")
159     # . . push args
160     68/push  "b8/copy-to-eax  1/imm32/exit\n"/imm32
161     ff          6/subop/push        1/mod/*+disp8   5/rm32/ebp    .           .             .           .           0xc/disp8       .                 # push *(ebp+12)
162     # . . call
163     e8/call  write/disp32
164     # . . discard args
165     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               8/imm32           # add to esp
166     # . write(out, "cd/syscall  0x80/imm8\n")
167     # . . push args
168     68/push  "cd/syscall  0x80/imm8\n"/imm32
169     ff          6/subop/push        1/mod/*+disp8   5/rm32/ebp    .           .             .           .           0xc/disp8       .                 # push *(ebp+12)
170     # . . call
171     e8/call  write/disp32
172     # . . discard args
173     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               8/imm32           # add to esp
174 $compile:end:
175     # . restore registers
176     59/pop-to-ecx
177     58/pop-to-eax
178     # . epilogue
179     89/copy                         3/mod/direct    4/rm32/esp    .           .             .           5/r32/ebp   .               .                 # copy ebp to esp
180     5d/pop-to-ebp
181     c3/return
182 
183 # Read a sequence of digits into 'out'. Abort if there are none, or if there is
184 # no space in 'out'.
185 # Input comes from the global variable 'Look' (first byte) and the argument
186 # 'in' (rest). We leave the next byte from 'in' into 'Look' on exit.
187 get-num:  # in: (addr buffered-file), out: (addr stream byte), err: fd or (addr stream byte), ed: (addr exit-descriptor)
188     # pseudocode:
189     #   if (!is-digit?(Look)) expected(ed, err, "integer")
190     #   do
191     #     if out->write >= out->size
192     #       write(err, "Error: too many digits in number\n")
193     #       stop(ed, 1)
194     #     out->data[out->write] = LSB(Look)
195     #     ++out->write
196     #     Look = get-char(in)
197     #   while is-digit?(Look)
198     # This is complicated because I don't want to hard-code the error strategy in
199     # a general helper like write-byte-buffered. Maybe I should just create a
200     # local helper.
201     #
202     # within the loop we'll try to keep things in registers:
203     #   in: esi
204     #   out: edi
205     #   out->write: ecx (cached copy; need to keep in sync)
206     #   out->size: edx
207     #   temporaries: eax, ebx
208     # We can't allocate Look to a register because it gets written implicitly in
209     # get-char in each iteration of the loop. (Thereby demonstrating that it's
210     # not the right interface for us. But we'll keep it just to follow Crenshaw.)
211     #
212     # . prologue
213     55/push-ebp
214     89/copy                         3/mod/direct    5/rm32/ebp    .           .             .           4/r32/esp   .               .                 # copy esp to ebp
215     # - if (is-digit?(Look)) expected(ed, err, "integer")
216     # . eax = is-digit?(Look)
217     # . . push args
218     ff          6/subop/push        0/mod/indirect  5/rm32/.disp32            .             .           .           Look/disp32     .                 # push *Look
219     # . . call
220     e8/call  is-digit?/disp32
221     # . . discard args
222     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               4/imm32           # add to esp
223     # . if (eax == false)
224     3d/compare-eax-and  0/imm32/false
225     75/jump-if-!=  $get-num:main/disp8
226     # . expected(ed, err, "integer")
227     # . . push args
228     68/push  "integer"/imm32
229     ff          6/subop/push        1/mod/*+disp8   5/rm32/ebp    .           .             .           .           0x10/disp8      .                 # push *(ebp+16)
230     ff          6/subop/push        1/mod/*+disp8   5/rm32/ebp    .           .             .           .           0x14/disp8      .                 # push *(ebp+20)
231     # . . call
232     e8/call  expected/disp32  # never returns
233     # . . discard args
234     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               0xc/imm32         # add to esp
235 $get-num:main:
236     # - otherwise read a digit
237     # . save registers
238     50/push-eax
239     51/push-ecx
240     52/push-edx
241     53/push-ebx
242     56/push-esi
243     57/push-edi
244     # read necessary variables to registers
245     # esi = in
246     8b/copy                         1/mod/*+disp8   5/rm32/ebp    .           .             .           6/r32/esi   8/disp8         .                 # copy *(ebp+8) to esi
247     # edi = out
248     8b/copy                         1/mod/*+disp8   5/rm32/ebp    .           .             .           7/r32/edi   0xc/disp8       .                 # copy *(ebp+12) to edi
249     # ecx = out->write
250     8b/copy                         0/mod/indirect  7/rm32/edi    .           .             .           1/r32/ecx   .               .                 # copy *edi to ecx
251     # edx = out->size
252     8b/copy                         1/mod/*+disp8   7/rm32/edi    .           .             .           2/r32/edx   8/disp8         .                 # copy *(edi+8) to edx
253 $get-num:loop:
254     # if (out->write >= out->size) error
255     39/compare                      3/mod/direct    2/rm32/edx    .           .             .           1/r32/ecx   .               .                 # compare edx with ecx
256     7d/jump-if-<  $get-num:loop-stage2/disp8
257     # . error(ed, err, msg)  # TODO: show full number
258     # . . push args
259     68/push  "get-num: too many digits in number"/imm32
260     ff          6/subop/push        1/mod/*+disp8   5/rm32/ebp    .           .             .           .           0x10/disp8      .                 # push *(ebp+16)
261     ff          6/subop/push        1/mod/*+disp8   5/rm32/ebp    .           .             .           .           0x14/disp8      .                 # push *(ebp+20)
262     # . . call
263     e8/call  error/disp32  # never returns
264     # . . discard args
265     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               0xc/imm32         # add to esp
266 $get-num:loop-stage2:
267     # out->data[out->write] = LSB(Look)
268     8d/copy-address                 1/mod/*+disp8   4/rm32/sib    7/base/edi  1/index/ecx   .           3/r32/ebx   0xc/disp8       .                 # copy edi+ecx+12 to ebx
269     8b/copy                         0/mod/indirect  5/rm32/.disp32            .             .           0/r32/eax   Look/disp32     .                 # copy *Look to eax
270     88/copy-byte                    0/mod/indirect  3/rm32/ebx    .           .             .           0/r32/AL    .               .                 # copy byte at AL to *ebx
271     # ++out->write
272     41/increment-ecx
273     # Look = get-char(in)
274     # . . push args
275     56/push-esi
276     # . . call
277     e8/call  get-char/disp32
278     # . . discard args
279     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               4/imm32           # add to esp
280     # if (is-digit?(Look)) loop
281     # . eax = is-digit?(Look)
282     # . . push args
283     ff          6/subop/push        0/mod/indirect  5/rm32/.disp32            .             .           .           Look/disp32     .                 # push *Look
284     # . . call
285     e8/call  is-digit?/disp32
286     # . . discard args
287     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               4/imm32           # add to esp
288     # . if (eax != false) loop
289     3d/compare-eax-and  0/imm32/false
290     0f 85/jump-if-!=  $get-num:loop/disp32
291 $get-num:loop-end:
292     # persist necessary variables from registers
293     89/copy                         0/mod/indirect  7/rm32/edi    .           .             .           1/r32/ecx   .               .                 # copy ecx to *edi
294 $get-num:end:
295     # . restore registers
296     5f/pop-to-edi
297     5e/pop-to-esi
298     5b/pop-to-ebx
299     5a/pop-to-edx
300     59/pop-to-ecx
301     58/pop-to-eax
302     # . epilogue
303     89/copy                         3/mod/direct    4/rm32/esp    .           .             .           5/r32/ebp   .               .                 # copy ebp to esp
304     5d/pop-to-ebp
305     c3/return
306 
307 test-get-num-reads-single-digit:
308     # - check that get-num returns first character if it's a digit
309     # This test uses exit-descriptors. Use ebp for setting up local variables.
310     55/push-ebp
311     89/copy                         3/mod/direct    5/rm32/ebp    .           .             .           4/r32/esp   .               .                 # copy esp to ebp
312     # clear all streams
313     # . clear-stream(_test-stream)
314     # . . push args
315     68/push  _test-stream/imm32
316     # . . call
317     e8/call  clear-stream/disp32
318     # . . discard args
319     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               4/imm32           # add to esp
320     # . clear-stream($_test-buffered-file->buffer)
321     # . . push args
322     68/push  $_test-buffered-file->buffer/imm32
323     # . . call
324     e8/call  clear-stream/disp32
325     # . . discard args
326     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               4/imm32           # add to esp
327     # . clear-stream(_test-output-stream)
328     # . . push args
329     68/push  _test-output-stream/imm32
330     # . . call
331     e8/call  clear-stream/disp32
332     # . . discard args
333     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               4/imm32           # add to esp
334     # . clear-stream(_test-error-stream)
335     # . . push args
336     68/push  _test-error-stream/imm32
337     # . . call
338     e8/call  clear-stream/disp32
339     # . . discard args
340     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               4/imm32           # add to esp
341     # initialize 'in'
342     # . write(_test-stream, "3")
343     # . . push args
344     68/push  "3"/imm32
345     68/push  _test-stream/imm32
346     # . . call
347     e8/call  write/disp32
348     # . . discard args
349     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               8/imm32           # add to esp
350     # initialize exit-descriptor 'ed' for the call to 'get-num' below
351     # . var ed/eax: exit-descriptor
352     81          5/subop/subtract    3/mod/direct    4/rm32/esp    .           .             .           .           .               8/imm32           # subtract from esp
353     89/copy                         3/mod/direct    0/rm32/eax    .           .             .           4/r32/esp   .               .                 # copy esp to eax
354     # . tailor-exit-descriptor(ed, 16)
355     # . . push args
356     68/push  0x10/imm32/nbytes-of-args-for-get-num
357     50/push-eax/ed
358     # . . call
359     e8/call  tailor-exit-descriptor/disp32
360     # . . discard args
361     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               8/imm32           # add to esp
362     # prime the pump
363     # . get-char(_test-buffered-file)
364     # . . push args
365     68/push  _test-buffered-file/imm32
366     # . . call
367     e8/call  get-char/disp32
368     # . . discard args
369     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               4/imm32           # add to esp
370     # get-num(in, out, err, ed)
371     # . . push args
372     50/push-eax/ed
373     68/push  _test-error-stream/imm32
374     68/push  _test-output-stream/imm32
375     68/push  _test-buffered-file/imm32
376     # . . call
377     e8/call  get-num/disp32
378     # registers except esp may be clobbered at this point
379     # . . discard args
380     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               0x10/imm32        # add to esp
381     # check-ints-equal(*_test-output-stream->data, '3', msg)
382     # . . push args
383     68/push  "F - test-get-num-reads-single-digit"/imm32
384     68/push  0x33/imm32
385     b8/copy-to-eax  _test-output-stream/imm32
386     ff          6/subop/push        1/mod/*+disp8   0/rm32/eax    .           .             .           .           0xc/disp8       .                 # push *(eax+12)
387     # . . call
388     e8/call  check-ints-equal/disp32
389     # . . discard args
390     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               0xc/imm32         # add to esp
391     # . reclaim locals
392     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               8/imm32           # add to esp
393     5d/pop-to-ebp
394     c3/return
395 
396 test-get-num-aborts-on-non-digit-in-Look:
397     # - check that get-num returns first character if it's a digit
398     # This test uses exit-descriptors. Use ebp for setting up local variables.
399     55/push-ebp
400     89/copy                         3/mod/direct    5/rm32/ebp    .           .             .           4/r32/esp   .               .                 # copy esp to ebp
401     # clear all streams
402     # . clear-stream(_test-stream)
403     # . . push args
404     68/push  _test-stream/imm32
405     # . . call
406     e8/call  clear-stream/disp32
407     # . . discard args
408     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               4/imm32           # add to esp
409     # . clear-stream($_test-buffered-file->buffer)
410     # . . push args
411     68/push  $_test-buffered-file->buffer/imm32
412     # . . call
413     e8/call  clear-stream/disp32
414     # . . discard args
415     81          0/subop/add         3/mod/direct    4/rm32/esp    .           .             .           .           .               4/imm32           # add to esp
416     # . clear-stream(_test-output-stream)
417     # . . push args
418     68/push  _test-output-stream/imm32
419     # . . call
420     e8/call  clear-stream/disp32
421     # . . discard arg
//: Extend 'new' to handle a unicode string literal argument or 'text'.

//: A Mu text is an address to an array of characters.
:(before "End Mu Types Initialization")
put(Type_abbreviations, "text", new_type_tree("&:@:character"));

:(code)
void test_new_string() {
  run(
      "def main [\n"
      "  10:text <- new [abc def]\n"
      "  20:char <- index *10:text, 5\n"
      "]\n"
  );
  CHECK_TRACE_CONTENTS(
      // number code for 'e'
      "mem: storing 101 in location 20\n"
  );
}

void test_new_string_handles_unicode() {
  run(
      "def main [\n"
      "  10:text <- new [a«c]\n"
      "  20:num <- length *10:text\n"
      "  21:char <- index *10:text, 1\n"
      "]\n"
  );
  CHECK_TRACE_CONTENTS(
      "mem: storing 3 in location 20\n"
      // unicode for '«'
      "mem: storing 171 in location 21\n"
  );
}

:(before "End NEW Check Special-cases")
if (is_literal_text(inst.ingredients.at(0))) break;
:(before "Convert 'new' To 'allocate'")
if (inst.name == "new" && !inst.ingredients.empty() && is_literal_text(inst.ingredients.at(0))) continue;
:(after "case NEW" following "Primitive Recipe Implementations")
  if (is_literal_text(current_instruction().ingredients.at(0))) {
    products.resize(1);
    products.at(0).push_back(/*alloc id*/0);
    products.at(0).push_back(new_mu_text(current_instruction().ingredients.at(0).name));
    trace(Callstack_depth+1, "mem") << "new string alloc: " << products.at(0).at(0) << end();
    break;
  }

:(code)
int new_mu_text(const string& contents) {
  // allocate an array just large enough for it
  int string_length = unicode_length(contents);
//?   Total_alloc += string_length+1;
//?   ++Num_alloc;
  int result = allocate(/*array length*/1 + string_length);
  int curr_address = result;
  ++curr_address;  // skip alloc id
  trace(Callstack_depth+1, "mem") << "storing string length " << string_length << " in location " << curr_address << end();
  put(Memory, curr_address, string_length);
  ++curr_address;  // skip length
  int curr = 0;
  const char* raw_contents = contents.c_str();
  for (int i = 0;  i < string_length;  ++i) {
    uint32_t curr_character;
    assert(curr < SIZE(contents));
    tb_utf8_char_to_unicode(&curr_character, &raw_contents[curr]);
    trace(Callstack_depth+1, "mem") << "storing string character " << curr_character << " in location " << curr_address << end();
    put(Memory, curr_address, curr_character);
    curr += tb_utf8_char_length(raw_contents[curr]);
    ++curr_address;
  }
  // Mu strings are not null-terminated in memory.
  return result;
}

//: a new kind of typo

void test_literal_text_without_instruction() {
  Hide_errors = true;
  run(
      "def main [\n"
      "  [abc]\n"
      "]\n"
  );
  CHECK_TRACE_CONTENTS(
      "error: main: instruction '[abc]' has no recipe in '[abc]'\n"
  );
}

//: stash recognizes texts

void test_stash_text() {
  run(
      "def main [\n"
      "  1:text <- new [abc]\n"
      "  stash [foo:], 1:text\n"
      "]\n"
  );
  CHECK_TRACE_CONTENTS(
      "app: foo: abc\n"
  );
}

:(before "End inspect Special-cases(r, data)")
if (is_mu_text(r)) {
  return read_mu_text(data.at(/*skip alloc id*/1));
}

:(before "End $print Special-cases")
else if (is_mu_text(current_instruction().ingredients.at(i))) {
  cout << read_mu_text(ingredients.at(i).at(/*skip alloc id*/1));
}

:(code)
void test_unicode_text() {
  run(
      "def main [\n"
      "  1:text <- new [♠]\n"
      "  stash [foo:], 1:text\n"
      "]\n"
  );
  CHECK_TRACE_CONTENTS(
      "app: foo: ♠\n"
  );
}

void test_stash_space_after_text() {
  run(
      "def main [\n"
      "  1:text <- new [abc]\n"
      "  stash 1:text, [foo]\n"
      "]\n"
  );
  CHECK_TRACE_CONTENTS(
      "app: abc foo\n"
  );
}

void test_stash_text_as_array() {
  run(
      "def main [\n"
      "  1:text <- new [abc]\n"
      "  stash *1:text\n"
      "]\n"
  );
  CHECK_TRACE_CONTENTS(
      "app: 3 97 98 99\n"
  );
}

//: fixes way more than just stash
:(before "End Preprocess is_mu_text(reagent x)")
if (!canonize_type(x)) return false;

//: Allocate more to routine when initializing a literal text
:(code)
void test_new_text_overflow() {
  Initial_memory_per_routine = 3;
  run(
      "def main [\n"
      "  10:&:num/raw <- new number:type\n"
      "  20:text/raw <- new [a]\n"  // not enough room in initial page, if you take the array length into account
      "]\n"
  );
  CHECK_TRACE_CONTENTS(
      "new: routine allocated memory from 1000 to 1003\n"
      "new: routine allocated memory from 1003 to 1006\n"
  );
}

//: helpers
:(code)
int unicode_length(const string& s) {
  const char* in = s.c_str();
  int result = 0;
  int curr = 0;
  while (curr < SIZE(s)) {  // carefully bounds-check on the string
    // before accessing its raw pointer
    ++result;
    curr += tb_utf8_char_length(in[curr]);
  }
  return result;
}

string read_mu_text(int address) {
  if (address == 0) return "";
  int length = get_or_insert(Memory, address+/*alloc id*/1);
  if (length == 0) return "";
  return read_mu_characters(address+/*alloc id*/1+/*length*/1, length);
}

string read_mu_characters(int start, int length) {
  ostringstream tmp;
  for (int curr = start;  curr < start+length;  ++curr)
    tmp << to_unicode(static_cast<uint32_t>(get_or_insert(Memory, curr)));
  return tmp.str();
}

//:: some miscellaneous helpers now that we have text

//: assert: perform sanity checks at runtime

void test_assert_literal() {
  Hide_errors = true;
  run(
      "def main [\n"
      "  assert 0, [this is an assert in Mu]\n"
      "]\n"
  );
  CHECK_TRACE_CONTENTS(
      "error: this is an assert in Mu\n"
  );
}

void test_assert() {
  Hide_errors = true;
  run(
      "def main [\n"
      "  1:text <- new [this is an assert in Mu]\n"
      "  assert 0, 1:text\n"
      "]\n"
  );
  CHECK_TRACE_CONTENTS(
      "error: this is an assert in Mu\n"
  );
}

:(before "End Primitive Recipe Declarations")
ASSERT,
:(before "End Primitive Recipe Numbers")
put(Recipe_ordinal, "assert", ASSERT);
:(before "End Primitive Recipe Checks")
case ASSERT: {
  if (SIZE(inst.ingredients) != 2) {
    raise << maybe(get(Recipe, r).name) << "'assert' takes exactly two ingredients rather than '" << to_original_string(inst) << "'\n" << end();
    break;
  }
  if (!is_mu_address(inst.ingredients.at(0)) && !is_mu_scalar(inst.ingredients.at(0))) {
    raise << maybe(get(Recipe, r).name) << "'assert' requires a scalar or address for its first ingredient, but got '" << inst.ingredients.at(0).original_string << "'\n" << end();
    break;
  }
  if (!is_literal_text(inst.ingredients.at(1)) && !is_mu_text(inst.ingredients.at(1))) {
    raise << maybe(get(Recipe, r).name) << "'assert' requires a text as its second ingredient, but got '" << inst.ingredients.at(1).original_string << "'\n" << end();
    break;
  }
  break;
}
:(before "End Primitive Recipe Implementations")
case ASSERT: {
  if (!scalar_ingredient(ingredients, 0)) {
    if (is_literal_text(current_instruction().ingredients.at(1)))
      raise << current_instruction().ingredients.at(1).name << '\n' << end();
    else
      raise << read_mu_text(ingredients.at(1).at(/*skip alloc id*/1))