1 # Port of https://github.com/akkartik/crenshaw/blob/master/tutor2.1.pas 2 # which corresponds to the section "single digits" in https://compilers.iecc.com/crenshaw/tutor2.txt 3 # except that we support hex numbers of multiple digits. 4 # 5 # To run: 6 # $ ./bootstrap translate init.linux 0*.subx apps/crenshaw2-1b.subx -o apps/crenshaw2-1b 7 # $ echo '1a' |./bootstrap run apps/crenshaw2-1b 8 # Expected output: 9 # # syscall(exit, 1a) 10 # bb/copy-to-ebx 3/imm32 11 # b8/copy-to-eax 1/imm32/exit 12 # cd/syscall 0x80/imm8 13 # 14 # To run the generated output: 15 # $ echo '1a' |./bootstrap run apps/crenshaw2-1b > z1.subx 16 # $ ./bootstrap translate init.linux z1.subx -o z1 17 # $ ./bootstrap run z1 18 # $ echo $? 19 # 26 # 0x1a in decimal 20 # 21 # Stdin must contain just a single hex digit. Other input will print an error: 22 # $ echo 'xyz' |./bootstrap run apps/crenshaw2-1b 23 # Error: integer expected 24 # 25 # Names in this file sometimes follow Crenshaw's original rather than my usual 26 # naming conventions. 27 28 == code 29 # instruction effective address register displacement immediate 30 # . op subop mod rm32 base index scale r32 31 # . 1-3 bytes 3 bits 2 bits 3 bits 3 bits 3 bits 2 bits 2 bits 0/1/2/4 bytes 0/1/2/4 bytes 32 33 Entry: # run tests if necessary, call 'compile' if not 34 # . prologue 35 89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp 36 37 # initialize heap 38 # . Heap = new-segment(Heap-size) 39 # . . push args 40 68/push Heap/imm32 41 ff 6/subop/push 0/mod/indirect 5/rm32/.disp32 . . . Heap-size/disp32 # push *Heap-size 42 # . . call 43 e8/call new-segment/disp32 44 # . . discard args 45 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 8/imm32 # add to esp 46 47 # - if argc > 1 and argv[1] == "test", then return run_tests() 48 # if (argc <= 1) goto run-main 49 81 7/subop/compare 1/mod/*+disp8 5/rm32/ebp . . . . 0/disp8 1/imm32 # compare *ebp 50 7e/jump-if-<= $run-main/disp8 51 # if (!kernel-string-equal?(argv[1], "test")) goto run-main 52 # . eax = kernel-string-equal?(argv[1], "test") 53 # . . push args 54 68/push "test"/imm32 55 ff 6/subop/push 1/mod/*+disp8 5/rm32/ebp . . . . 8/disp8 . # push *(ebp+8) 56 # . . call 57 e8/call kernel-string-equal?/disp32 58 # . . discard args 59 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 8/imm32 # add to esp 60 # . if (eax == false) goto run-main 61 3d/compare-eax-and 0/imm32/false 62 74/jump-if-= $run-main/disp8 63 # run-tests() 64 e8/call run-tests/disp32 65 # syscall(exit, *Num-test-failures) 66 8b/copy 0/mod/indirect 5/rm32/.disp32 . . 3/r32/ebx Num-test-failures/disp32 # copy *Num-test-failures to ebx 67 eb/jump $main:end/disp8 68 $run-main: 69 # - otherwise read a program from stdin and emit its translation to stdout 70 # . compile(Stdin, 1/stdout, 2/stderr, 0) 71 # . . push args 72 68/push 0/imm32/exit-descriptor 73 68/push 2/imm32/stderr 74 68/push 1/imm32/stdout 75 68/push Stdin/imm32 76 # . . call 77 e8/call compile/disp32 78 # . . discard args 79 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0x10/imm32 # add to esp 80 # syscall(exit, 0) 81 bb/copy-to-ebx 0/imm32 82 $main:end: 83 e8/call syscall_exit/disp32 84 85 # the main entry point 86 compile: # in: (addr buffered-file), out: fd or (addr stream byte), err: fd or (addr stream byte), ed: (addr exit-descriptor) 87 # . prologue 88 55/push-ebp 89 89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp 90 # . save registers 91 50/push-eax 92 51/push-ecx 93 # prime the pump 94 # . Look = get-char(in) 95 # . . push args 96 ff 6/subop/push 1/mod/*+disp8 5/rm32/ebp . . . . 8/disp8 . # push *(ebp+8) 97 # . . call 98 e8/call get-char/disp32 99 # . . discard args 100 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 4/imm32 # add to esp 101 # var num/ecx: (stream byte 7) 102 # Numbers can be 32 bits or 8 hex bytes long. One of them will be in 'Look', so we need space for 7 bytes. 103 # Sizing the stream just right buys us overflow-handling for free inside 'get-num'. 104 # Add 12 bytes for 'read', 'write' and 'size' fields, for a total of 19 bytes, or 0x13 in hex. 105 # The stack pointer is no longer aligned, so dump_stack() can be misleading past this point. 106 81 5/subop/subtract 3/mod/direct 4/rm32/esp . . . . . 0x13/imm32 # subtract from esp 107 89/copy 3/mod/direct 1/rm32/ecx . . . 4/r32/esp . . # copy esp to ecx 108 # initialize the stream 109 # . num->size = 7 110 c7 0/subop/copy 1/mod/*+disp8 1/rm32/ecx . . . . 8/disp8 7/imm32 # copy to *(ecx+8) 111 # . clear-stream(num) 112 # . . push args 113 51/push-ecx 114 # . . call 115 e8/call clear-stream/disp32 116 # . . discard args 117 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 4/imm32 # add to esp 118 # read a digit from 'in' into 'num' 119 # . get-num(in, num, err, ed) 120 # . . push args 121 ff 6/subop/push 1/mod/*+disp8 5/rm32/ebp . . . . 0x14/disp8 . # push *(ebp+20) 122 ff 6/subop/push 1/mod/*+disp8 5/rm32/ebp . . . . 0x10/disp8 . # push *(ebp+16) 123 51/push-ecx/num 124 ff 6/subop/push 1/mod/*+disp8 5/rm32/ebp . . . . 8/disp8 . # push *(ebp+8) 125 # . . call 126 e8/call get-num/disp32 127 # . . discard args 128 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0x10/imm32 # add to esp 129 # render 'num' into the following template on 'out': 130 # bb/copy-to-ebx _num_ 131 # b8/copy-to-eax 1/imm32/exit 132 # cd/syscall 0x80/imm8 133 # 134 # . write(out, "bb/copy-to-ebx ") 135 # . . push args 136 68/push "bb/copy-to-ebx "/imm32 137 ff 6/subop/push 1/mod/*+disp8 5/rm32/ebp . . . . 0xc/disp8 . # push *(ebp+12) 138 # . . call 139 e8/call write/disp32 140 # . . discard args 141 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 8/imm32 # add to esp 142 # . write-stream(out, num) 143 # . . push args 144 51/push-ecx/num 145 ff 6/subop/push 1/mod/*+disp8 5/rm32/ebp . . . . 0xc/disp8 . # push *(ebp+12) 146 # . . call 147 e8/call write-stream/disp32 148 # . . discard args 149 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 8/imm32 # add to esp 150 # . write(out, Newline) 151 # . . push args 152 68/push Newline/imm32 153 ff 6/subop/push 1/mod/*+disp8 5/rm32/ebp . . . . 0xc/disp8 . # push *(ebp+12) 154 # . . call 155 e8/call write/disp32 156 # . . discard args 157 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 8/imm32 # add to esp 158 # . write(out, "b8/copy-to-eax 1/imm32/exit\n") 159 # . . push args 160 68/push "b8/copy-to-eax 1/imm32/exit\n"/imm32 161 ff 6/subop/push 1/mod/*+disp8 5/rm32/ebp . . . . 0xc/disp8 . # push *(ebp+12) 162 # . . call 163 e8/call write/disp32 164 # . . discard args 165 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 8/imm32 # add to esp 166 # . write(out, "cd/syscall 0x80/imm8\n") 167 # . . push args 168 68/push "cd/syscall 0x80/imm8\n"/imm32 169 ff 6/subop/push 1/mod/*+disp8 5/rm32/ebp . . . . 0xc/disp8 . # push *(ebp+12) 170 # . . call 171 e8/call write/disp32 172 # . . discard args 173 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 8/imm32 # add to esp 174 $compile:end: 175 # . restore registers 176 59/pop-to-ecx 177 58/pop-to-eax 178 # . epilogue 179 89/copy 3/mod/direct 4/rm32/esp . . . 5/r32/ebp . . # copy ebp to esp 180 5d/pop-to-ebp 181 c3/return 182 183 # Read a sequence of digits into 'out'. Abort if there are none, or if there is 184 # no space in 'out'. 185 # Input comes from the global variable 'Look' (first byte) and the argument 186 # 'in' (rest). We leave the next byte from 'in' into 'Look' on exit. 187 get-num: # in: (addr buffered-file), out: (addr stream byte), err: fd or (addr stream byte), ed: (addr exit-descriptor) 188 # pseudocode: 189 # if (!is-digit?(Look)) expected(ed, err, "integer") 190 # do 191 # if out->write >= out->size 192 # write(err, "Error: too many digits in number\n") 193 # stop(ed, 1) 194 # out->data[out->write] = LSB(Look) 195 # ++out->write 196 # Look = get-char(in) 197 # while is-digit?(Look) 198 # This is complicated because I don't want to hard-code the error strategy in 199 # a general helper like write-byte-buffered. Maybe I should just create a 200 # local helper. 201 # 202 # within the loop we'll try to keep things in registers: 203 # in: esi 204 # out: edi 205 # out->write: ecx (cached copy; need to keep in sync) 206 # out->size: edx 207 # temporaries: eax, ebx 208 # We can't allocate Look to a register because it gets written implicitly in 209 # get-char in each iteration of the loop. (Thereby demonstrating that it's 210 # not the right interface for us. But we'll keep it just to follow Crenshaw.) 211 # 212 # . prologue 213 55/push-ebp 214 89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp 215 # - if (is-digit?(Look)) expected(ed, err, "integer") 216 # . eax = is-digit?(Look) 217 # . . push args 218 ff 6/subop/push 0/mod/indirect 5/rm32/.disp32 . . . Look/disp32 . # push *Look 219 # . . call 220 e8/call is-digit?/disp32 221 # . . discard args 222 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 4/imm32 # add to esp 223 # . if (eax == false) 224 3d/compare-eax-and 0/imm32/false 225 75/jump-if-!= $get-num:main/disp8 226 # . expected(ed, err, "integer") 227 # . . push args 228 68/push "integer"/imm32 229 ff 6/subop/push 1/mod/*+disp8 5/rm32/ebp . . . . 0x10/disp8 . # push *(ebp+16) 230 ff 6/subop/push 1/mod/*+disp8 5/rm32/ebp . . . . 0x14/disp8 . # push *(ebp+20) 231 # . . call 232 e8/call expected/disp32 # never returns 233 # . . discard args 234 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0xc/imm32 # add to esp 235 $get-num:main: 236 # - otherwise read a digit 237 # . save registers 238 50/push-eax 239 51/push-ecx 240 52/push-edx 241 53/push-ebx 242 56/push-esi 243 57/push-edi 244 # read necessary variables to registers 245 # esi = in 246 8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 6/r32/esi 8/disp8 . # copy *(ebp+8) to esi 247 # edi = out 248 8b/copy 1/mod/*+disp8 5/rm32/ebp . . . 7/r32/edi 0xc/disp8 . # copy *(ebp+12) to edi 249 # ecx = out->write 250 8b/copy 0/mod/indirect 7/rm32/edi . . . 1/r32/ecx . . # copy *edi to ecx 251 # edx = out->size 252 8b/copy 1/mod/*+disp8 7/rm32/edi . . . 2/r32/edx 8/disp8 . # copy *(edi+8) to edx 253 $get-num:loop: 254 # if (out->write >= out->size) error 255 39/compare 3/mod/direct 2/rm32/edx . . . 1/r32/ecx . . # compare edx with ecx 256 7d/jump-if-< $get-num:loop-stage2/disp8 257 # . error(ed, err, msg) # TODO: show full number 258 # . . push args 259 68/push "get-num: too many digits in number"/imm32 260 ff 6/subop/push 1/mod/*+disp8 5/rm32/ebp . . . . 0x10/disp8 . # push *(ebp+16) 261 ff 6/subop/push 1/mod/*+disp8 5/rm32/ebp . . . . 0x14/disp8 . # push *(ebp+20) 262 # . . call 263 e8/call error/disp32 # never returns 264 # . . discard args 265 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0xc/imm32 # add to esp 266 $get-num:loop-stage2: 267 # out->data[out->write] = LSB(Look) 268 8d/copy-address 1/mod/*+disp8 4/rm32/sib 7/base/edi 1/index/ecx . 3/r32/ebx 0xc/disp8 . # copy edi+ecx+12 to ebx 269 8b/copy 0/mod/indirect 5/rm32/.disp32 . . 0/r32/eax Look/disp32 . # copy *Look to eax 270 88/copy-byte 0/mod/indirect 3/rm32/ebx . . . 0/r32/AL . . # copy byte at AL to *ebx 271 # ++out->write 272 41/increment-ecx 273 # Look = get-char(in) 274 # . . push args 275 56/push-esi 276 # . . call 277 e8/call get-char/disp32 278 # . . discard args 279 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 4/imm32 # add to esp 280 # if (is-digit?(Look)) loop 281 # . eax = is-digit?(Look) 282 # . . push args 283 ff 6/subop/push 0/mod/indirect 5/rm32/.disp32 . . . Look/disp32 . # push *Look 284 # . . call 285 e8/call is-digit?/disp32 286 # . . discard args 287 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 4/imm32 # add to esp 288 # . if (eax != false) loop 289 3d/compare-eax-and 0/imm32/false 290 0f 85/jump-if-!= $get-num:loop/disp32 291 $get-num:loop-end: 292 # persist necessary variables from registers 293 89/copy 0/mod/indirect 7/rm32/edi . . . 1/r32/ecx . . # copy ecx to *edi 294 $get-num:end: 295 # . restore registers 296 5f/pop-to-edi 297 5e/pop-to-esi 298 5b/pop-to-ebx 299 5a/pop-to-edx 300 59/pop-to-ecx 301 58/pop-to-eax 302 # . epilogue 303 89/copy 3/mod/direct 4/rm32/esp . . . 5/r32/ebp . . # copy ebp to esp 304 5d/pop-to-ebp 305 c3/return 306 307 test-get-num-reads-single-digit: 308 # - check that get-num returns first character if it's a digit 309 # This test uses exit-descriptors. Use ebp for setting up local variables. 310 55/push-ebp 311 89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp 312 # clear all streams 313 # . clear-stream(_test-stream) 314 # . . push args 315 68/push _test-stream/imm32 316 # . . call 317 e8/call clear-stream/disp32 318 # . . discard args 319 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 4/imm32 # add to esp 320 # . clear-stream($_test-buffered-file->buffer) 321 # . . push args 322 68/push $_test-buffered-file->buffer/imm32 323 # . . call 324 e8/call clear-stream/disp32 325 # . . discard args 326 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 4/imm32 # add to esp 327 # . clear-stream(_test-output-stream) 328 # . . push args 329 68/push _test-output-stream/imm32 330 # . . call 331 e8/call clear-stream/disp32 332 # . . discard args 333 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 4/imm32 # add to esp 334 # . clear-stream(_test-error-stream) 335 # . . push args 336 68/push _test-error-stream/imm32 337 # . . call 338 e8/call clear-stream/disp32 339 # . . discard args 340 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 4/imm32 # add to esp 341 # initialize 'in' 342 # . write(_test-stream, "3") 343 # . . push args 344 68/push "3"/imm32 345 68/push _test-stream/imm32 346 # . . call 347 e8/call write/disp32 348 # . . discard args 349 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 8/imm32 # add to esp 350 # initialize exit-descriptor 'ed' for the call to 'get-num' below 351 # . var ed/eax: exit-descriptor 352 81 5/subop/subtract 3/mod/direct 4/rm32/esp . . . . . 8/imm32 # subtract from esp 353 89/copy 3/mod/direct 0/rm32/eax . . . 4/r32/esp . . # copy esp to eax 354 # . tailor-exit-descriptor(ed, 16) 355 # . . push args 356 68/push 0x10/imm32/nbytes-of-args-for-get-num 357 50/push-eax/ed 358 # . . call 359 e8/call tailor-exit-descriptor/disp32 360 # . . discard args 361 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 8/imm32 # add to esp 362 # prime the pump 363 # . get-char(_test-buffered-file) 364 # . . push args 365 68/push _test-buffered-file/imm32 366 # . . call 367 e8/call get-char/disp32 368 # . . discard args 369 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 4/imm32 # add to esp 370 # get-num(in, out, err, ed) 371 # . . push args 372 50/push-eax/ed 373 68/push _test-error-stream/imm32 374 68/push _test-output-stream/imm32 375 68/push _test-buffered-file/imm32 376 # . . call 377 e8/call get-num/disp32 378 # registers except esp may be clobbered at this point 379 # . . discard args 380 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0x10/imm32 # add to esp 381 # check-ints-equal(*_test-output-stream->data, '3', msg) 382 # . . push args 383 68/push "F - test-get-num-reads-single-digit"/imm32 384 68/push 0x33/imm32 385 b8/copy-to-eax _test-output-stream/imm32 386 ff 6/subop/push 1/mod/*+disp8 0/rm32/eax . . . . 0xc/disp8 . # push *(eax+12) 387 # . . call 388 e8/call check-ints-equal/disp32 389 # . . discard args 390 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 0xc/imm32 # add to esp 391 # . reclaim locals 392 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 8/imm32 # add to esp 393 5d/pop-to-ebp 394 c3/return 395 396 test-get-num-aborts-on-non-digit-in-Look: 397 # - check that get-num returns first character if it's a digit 398 # This test uses exit-descriptors. Use ebp for setting up local variables. 399 55/push-ebp 400 89/copy 3/mod/direct 5/rm32/ebp . . . 4/r32/esp . . # copy esp to ebp 401 # clear all streams 402 # . clear-stream(_test-stream) 403 # . . push args 404 68/push _test-stream/imm32 405 # . . call 406 e8/call clear-stream/disp32 407 # . . discard args 408 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 4/imm32 # add to esp 409 # . clear-stream($_test-buffered-file->buffer) 410 # . . push args 411 68/push $_test-buffered-file->buffer/imm32 412 # . . call 413 e8/call clear-stream/disp32 414 # . . discard args 415 81 0/subop/add 3/mod/direct 4/rm32/esp . . . . . 4/imm32 # add to esp 416 # . clear-stream(_test-output-stream) 417 # . . push args 418 68/push _test-output-stream/imm32 419 # . . call 420 e8/call clear-stream/disp32 421 # . . discard arg//: Extend 'new' to handle a unicode string literal argument or 'text'. //: A Mu text is an address to an array of characters. :(before "End Mu Types Initialization") put(Type_abbreviations, "text", new_type_tree("&:@:character")); :(code) void test_new_string() { run( "def main [\n" " 10:text <- new [abc def]\n" " 20:char <- index *10:text, 5\n" "]\n" ); CHECK_TRACE_CONTENTS( // number code for 'e' "mem: storing 101 in location 20\n" ); } void test_new_string_handles_unicode() { run( "def main [\n" " 10:text <- new [a«c]\n" " 20:num <- length *10:text\n" " 21:char <- index *10:text, 1\n" "]\n" ); CHECK_TRACE_CONTENTS( "mem: storing 3 in location 20\n" // unicode for '«' "mem: storing 171 in location 21\n" ); } :(before "End NEW Check Special-cases") if (is_literal_text(inst.ingredients.at(0))) break; :(before "Convert 'new' To 'allocate'") if (inst.name == "new" && !inst.ingredients.empty() && is_literal_text(inst.ingredients.at(0))) continue; :(after "case NEW" following "Primitive Recipe Implementations") if (is_literal_text(current_instruction().ingredients.at(0))) { products.resize(1); products.at(0).push_back(/*alloc id*/0); products.at(0).push_back(new_mu_text(current_instruction().ingredients.at(0).name)); trace(Callstack_depth+1, "mem") << "new string alloc: " << products.at(0).at(0) << end(); break; } :(code) int new_mu_text(const string& contents) { // allocate an array just large enough for it int string_length = unicode_length(contents); //? Total_alloc += string_length+1; //? ++Num_alloc; int result = allocate(/*array length*/1 + string_length); int curr_address = result; ++curr_address; // skip alloc id trace(Callstack_depth+1, "mem") << "storing string length " << string_length << " in location " << curr_address << end(); put(Memory, curr_address, string_length); ++curr_address; // skip length int curr = 0; const char* raw_contents = contents.c_str(); for (int i = 0; i < string_length; ++i) { uint32_t curr_character; assert(curr < SIZE(contents)); tb_utf8_char_to_unicode(&curr_character, &raw_contents[curr]); trace(Callstack_depth+1, "mem") << "storing string character " << curr_character << " in location " << curr_address << end(); put(Memory, curr_address, curr_character); curr += tb_utf8_char_length(raw_contents[curr]); ++curr_address; } // Mu strings are not null-terminated in memory. return result; } //: a new kind of typo void test_literal_text_without_instruction() { Hide_errors = true; run( "def main [\n" " [abc]\n" "]\n" ); CHECK_TRACE_CONTENTS( "error: main: instruction '[abc]' has no recipe in '[abc]'\n" ); } //: stash recognizes texts void test_stash_text() { run( "def main [\n" " 1:text <- new [abc]\n" " stash [foo:], 1:text\n" "]\n" ); CHECK_TRACE_CONTENTS( "app: foo: abc\n" ); } :(before "End inspect Special-cases(r, data)") if (is_mu_text(r)) { return read_mu_text(data.at(/*skip alloc id*/1)); } :(before "End $print Special-cases") else if (is_mu_text(current_instruction().ingredients.at(i))) { cout << read_mu_text(ingredients.at(i).at(/*skip alloc id*/1)); } :(code) void test_unicode_text() { run( "def main [\n" " 1:text <- new [♠]\n" " stash [foo:], 1:text\n" "]\n" ); CHECK_TRACE_CONTENTS( "app: foo: ♠\n" ); } void test_stash_space_after_text() { run( "def main [\n" " 1:text <- new [abc]\n" " stash 1:text, [foo]\n" "]\n" ); CHECK_TRACE_CONTENTS( "app: abc foo\n" ); } void test_stash_text_as_array() { run( "def main [\n" " 1:text <- new [abc]\n" " stash *1:text\n" "]\n" ); CHECK_TRACE_CONTENTS( "app: 3 97 98 99\n" ); } //: fixes way more than just stash :(before "End Preprocess is_mu_text(reagent x)") if (!canonize_type(x)) return false; //: Allocate more to routine when initializing a literal text :(code) void test_new_text_overflow() { Initial_memory_per_routine = 3; run( "def main [\n" " 10:&:num/raw <- new number:type\n" " 20:text/raw <- new [a]\n" // not enough room in initial page, if you take the array length into account "]\n" ); CHECK_TRACE_CONTENTS( "new: routine allocated memory from 1000 to 1003\n" "new: routine allocated memory from 1003 to 1006\n" ); } //: helpers :(code) int unicode_length(const string& s) { const char* in = s.c_str(); int result = 0; int curr = 0; while (curr < SIZE(s)) { // carefully bounds-check on the string // before accessing its raw pointer ++result; curr += tb_utf8_char_length(in[curr]); } return result; } string read_mu_text(int address) { if (address == 0) return ""; int length = get_or_insert(Memory, address+/*alloc id*/1); if (length == 0) return ""; return read_mu_characters(address+/*alloc id*/1+/*length*/1, length); } string read_mu_characters(int start, int length) { ostringstream tmp; for (int curr = start; curr < start+length; ++curr) tmp << to_unicode(static_cast<uint32_t>(get_or_insert(Memory, curr))); return tmp.str(); } //:: some miscellaneous helpers now that we have text //: assert: perform sanity checks at runtime void test_assert_literal() { Hide_errors = true; run( "def main [\n" " assert 0, [this is an assert in Mu]\n" "]\n" ); CHECK_TRACE_CONTENTS( "error: this is an assert in Mu\n" ); } void test_assert() { Hide_errors = true; run( "def main [\n" " 1:text <- new [this is an assert in Mu]\n" " assert 0, 1:text\n" "]\n" ); CHECK_TRACE_CONTENTS( "error: this is an assert in Mu\n" ); } :(before "End Primitive Recipe Declarations") ASSERT, :(before "End Primitive Recipe Numbers") put(Recipe_ordinal, "assert", ASSERT); :(before "End Primitive Recipe Checks") case ASSERT: { if (SIZE(inst.ingredients) != 2) { raise << maybe(get(Recipe, r).name) << "'assert' takes exactly two ingredients rather than '" << to_original_string(inst) << "'\n" << end(); break; } if (!is_mu_address(inst.ingredients.at(0)) && !is_mu_scalar(inst.ingredients.at(0))) { raise << maybe(get(Recipe, r).name) << "'assert' requires a scalar or address for its first ingredient, but got '" << inst.ingredients.at(0).original_string << "'\n" << end(); break; } if (!is_literal_text(inst.ingredients.at(1)) && !is_mu_text(inst.ingredients.at(1))) { raise << maybe(get(Recipe, r).name) << "'assert' requires a text as its second ingredient, but got '" << inst.ingredients.at(1).original_string << "'\n" << end(); break; } break; } :(before "End Primitive Recipe Implementations") case ASSERT: { if (!scalar_ingredient(ingredients, 0)) { if (is_literal_text(current_instruction().ingredients.at(1))) raise << current_instruction().ingredients.at(1).name << '\n' << end(); else raise << read_mu_text(ingredients.at(1).at(/*skip alloc id*/1))