https://github.com/akkartik/mu/blob/main/linux/tile/word.mu
  1 fn initialize-word _self: (addr word) {
  2   var self/esi: (addr word) <- copy _self
  3   var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
  4   allocate data-ah
  5   var data/eax: (addr gap-buffer) <- lookup *data-ah
  6   initialize-gap-buffer data
  7 }
  8 
  9 ## some helpers for creating words. mostly for tests
 10 
 11 fn initialize-word-with _self: (addr word), s: (addr array byte) {
 12   var self/esi: (addr word) <- copy _self
 13   var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
 14   allocate data-ah
 15   var data/eax: (addr gap-buffer) <- lookup *data-ah
 16   initialize-gap-buffer-with data, s
 17 }
 18 
 19 fn allocate-word-with _out: (addr handle word), s: (addr array byte) {
 20   var out/eax: (addr handle word) <- copy _out
 21   allocate out
 22   var out-addr/eax: (addr word) <- lookup *out
 23   initialize-word-with out-addr, s
 24 }
 25 
 26 # just for tests for now
 27 # TODO: handle existing next
 28 # one implication of handles: append must take a handle
 29 fn append-word-with self-h: (handle word), s: (addr array byte) {
 30   var self/eax: (addr word) <- lookup self-h
 31   var next-ah/eax: (addr handle word) <- get self, next
 32   allocate-word-with next-ah, s
 33   var next/eax: (addr word) <- lookup *next-ah
 34   var prev-ah/eax: (addr handle word) <- get next, prev
 35   copy-handle self-h, prev-ah
 36 }
 37 
 38 # just for tests for now
 39 # TODO: handle existing prev
 40 fn prepend-word-with self-h: (handle word), s: (addr array byte) {
 41   var self/eax: (addr word) <- lookup self-h
 42   var prev-ah/eax: (addr handle word) <- get self, prev
 43   allocate-word-with prev-ah, s
 44   var prev/eax: (addr word) <- lookup *prev-ah
 45   var next-ah/eax: (addr handle word) <- get prev, next
 46   copy-handle self-h, next-ah
 47 }
 48 
 49 ## real primitives
 50 
 51 fn move-word-contents _src-ah: (addr handle word), _dest-ah: (addr handle word) {
 52   var dest-ah/eax: (addr handle word) <- copy _dest-ah
 53   var _dest/eax: (addr word) <- lookup *dest-ah
 54   var dest/edi: (addr word) <- copy _dest
 55   var src-ah/eax: (addr handle word) <- copy _src-ah
 56   var _src/eax: (addr word) <- lookup *src-ah
 57   var src/esi: (addr word) <- copy _src
 58   cursor-to-start src
 59   var src-data-ah/eax: (addr handle gap-buffer) <- get src, scalar-data
 60   var src-data/eax: (addr gap-buffer) <- lookup *src-data-ah
 61   var src-stack/ecx: (addr grapheme-stack) <- get src-data, right
 62   {
 63     var done?/eax: boolean <- grapheme-stack-empty? src-stack
 64     compare done?, 0/false
 65     break-if-!=
 66     var g/eax: code-point-utf8 <- pop-grapheme-stack src-stack
 67 #?     print-code-point-utf8 0, g
 68 #?     print-string 0, "\n"
 69     add-code-point-utf8-to-word dest, g
 70     loop
 71   }
 72 }
 73 
 74 fn copy-word-contents-before-cursor _src-ah: (addr handle word), _dest-ah: (addr handle word) {
 75   var dest-ah/eax: (addr handle word) <- copy _dest-ah
 76   var _dest/eax: (addr word) <- lookup *dest-ah
 77   var dest/edi: (addr word) <- copy _dest
 78   var src-ah/eax: (addr handle word) <- copy _src-ah
 79   var src/eax: (addr word) <- lookup *src-ah
 80   var src-data-ah/eax: (addr handle gap-buffer) <- get src, scalar-data
 81   var src-data/eax: (addr gap-buffer) <- lookup *src-data-ah
 82   var src-stack/ecx: (addr grapheme-stack) <- get src-data, left
 83   var src-stack-data-ah/eax: (addr handle array code-point-utf8) <- get src-stack, data
 84   var _src-stack-data/eax: (addr array code-point-utf8) <- lookup *src-stack-data-ah
 85   var src-stack-data/edx: (addr array code-point-utf8) <- copy _src-stack-data
 86   var top-addr/ecx: (addr int) <- get src-stack, top
 87   var i/eax: int <- copy 0
 88   {
 89     compare i, *top-addr
 90     break-if->=
 91     var g/edx: (addr code-point-utf8) <- index src-stack-data, i
 92     add-code-point-utf8-to-word dest, *g
 93     i <- increment
 94     loop
 95   }
 96 }
 97 
 98 fn word-equal? _self: (addr word), s: (addr array byte) -> _/eax: boolean {
 99   var self/esi: (addr word) <- copy _self
100   var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
101   var data/eax: (addr gap-buffer) <- lookup *data-ah
102   var result/eax: boolean <- gap-buffer-equal? data, s
103   return result
104 }
105 
106 fn word-length _self: (addr word) -> _/eax: int {
107   var self/esi: (addr word) <- copy _self
108   var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
109   var data/eax: (addr gap-buffer) <- lookup *data-ah
110   var result/eax: int <- gap-buffer-length data
111   return result
112 }
113 
114 fn final-word _in: (addr handle word), out: (addr handle word) {
115   var curr-h: (handle word)
116   var curr-ah/esi: (addr handle word) <- address curr-h
117   copy-object _in, curr-ah
118   var curr/eax: (addr word) <- copy 0
119   var next/edi: (addr handle word) <- copy 0
120   {
121     curr <- lookup *curr-ah
122     next <- get curr, next
123     curr <- lookup *next
124     compare curr, 0
125     break-if-=
126     copy-object next, curr-ah
127     loop
128   }
129   copy-object curr-ah, out  # modify 'out' right at the end, just in case it's same as 'in'
130 }
131 
132 fn first-code-point-utf8 _self: (addr word) -> _/eax: code-point-utf8 {
133   var self/esi: (addr word) <- copy _self
134   var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
135   var data/eax: (addr gap-buffer) <- lookup *data-ah
136   var result/eax: code-point-utf8 <- first-code-point-utf8-in-gap-buffer data
137   return result
138 }
139 
140 fn code-point-utf8-before-cursor _self: (addr word) -> _/eax: code-point-utf8 {
141   var self/esi: (addr word) <- copy _self
142   var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
143   var data/eax: (addr gap-buffer) <- lookup *data-ah
144   var result/eax: code-point-utf8 <- code-point-utf8-before-cursor-in-gap-buffer data
145   return result
146 }
147 
148 fn add-code-point-utf8-to-word _self: (addr word), c: code-point-utf8 {
149   var self/esi: (addr word) <- copy _self
150   var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
151   var data/eax: (addr gap-buffer) <- lookup *data-ah
152   add-code-point-utf8-at-gap data, c
153 }
154 
155 fn cursor-at-start? _self: (addr word) -> _/eax: boolean {
156   var self/esi: (addr word) <- copy _self
157   var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
158   var data/eax: (addr gap-buffer) <- lookup *data-ah
159   var result/eax: boolean <- gap-at-start? data
160   return result
161 }
162 
163 fn cursor-at-end? _self: (addr word) -> _/eax: boolean {
164   var self/esi: (addr word) <- copy _self
165   var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
166   var data/eax: (addr gap-buffer) <- lookup *data-ah
167   var result/eax: boolean <- gap-at-end? data
168   return result
169 }
170 
171 fn cursor-left _self: (addr word) {
172   var self/esi: (addr word) <- copy _self
173   var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
174   var data/eax: (addr gap-buffer) <- lookup *data-ah
175   var dummy/eax: code-point-utf8 <- gap-left data
176 }
177 
178 fn cursor-right _self: (addr word) {
179   var self/esi: (addr word) <- copy _self
180   var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
181   var data/eax: (addr gap-buffer) <- lookup *data-ah
182   var dummy/eax: code-point-utf8 <- gap-right data
183 }
184 
185 fn cursor-to-start _self: (addr word) {
186   var self/esi: (addr word) <- copy _self
187   var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
188   var data/eax: (addr gap-buffer) <- lookup *data-ah
189   gap-to-start data
190 }
191 
192 fn cursor-to-end _self: (addr word) {
193   var self/esi: (addr word) <- copy _self
194   var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
195   var data/eax: (addr gap-buffer) <- lookup *data-ah
196   gap-to-end data
197 }
198 
199 fn cursor-index _self: (addr word) -> _/eax: int {
200   var self/esi: (addr word) <- copy _self
201   var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
202   var data/eax: (addr gap-buffer) <- lookup *data-ah
203   var result/eax: int <- gap-index data
204   return result
205 }
206 
207 fn delete-before-cursor _self: (addr word) {
208   var self/esi: (addr word) <- copy _self
209   var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
210   var data/eax: (addr gap-buffer) <- lookup *data-ah
211   delete-before-gap data
212 }
213 
214 fn pop-after-cursor _self: (addr word) -> _/eax: code-point-utf8 {
215   var self/esi: (addr word) <- copy _self
216   var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
217   var data/eax: (addr gap-buffer) <- lookup *data-ah
218   var result/eax: code-point-utf8 <- pop-after-gap data
219   return result
220 }
221 
222 fn delete-next _self: (addr word) {
223   var self/esi: (addr word) <- copy _self
224   var next-ah/edi: (addr handle word) <- get self, next
225   var next/eax: (addr word) <- lookup *next-ah
226   compare next, 0
227   break-if-=
228   var next-next-ah/ecx: (addr handle word) <- get next, next
229   var self-ah/esi: (addr handle word) <- get next, prev
230   copy-object next-next-ah, next-ah
231   var new-next/eax: (addr word) <- lookup *next-next-ah
232   compare new-next, 0
233   break-if-=
234   var dest/eax: (addr handle word) <- get new-next, prev
235   copy-object self-ah, dest
236 }
237 
238 fn print-word screen: (addr screen), _self: (addr word) {
239   var self/esi: (addr word) <- copy _self
240   var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
241   var data/eax: (addr gap-buffer) <- lookup *data-ah
242   render-gap-buffer screen, data
243 }
244 
245 fn print-words-in-reverse screen: (addr screen), _words-ah: (addr handle word) {
246   var words-ah/eax: (addr handle word) <- copy _words-ah
247   var words-a/eax: (addr word) <- lookup *words-ah
248   compare words-a, 0
249   break-if-=
250   # recurse
251   var next-ah/ecx: (addr handle word) <- get words-a, next
252   print-words-in-reverse screen, next-ah
253   # print
254   print-word screen, words-a
255   print-string screen, " "
256 }
257 
258 # Gotcha with some word operations: ensure dest-ah isn't in the middle of some
259 # existing chain of words. There are two pointers to patch, and you'll forget
260 # to do the other one.
261 fn copy-words _src-ah: (addr handle word), _dest-ah: (addr handle word) {
262   var src-ah/eax: (addr handle word) <- copy _src-ah
263   var src-a/eax: (addr word) <- lookup *src-ah
264   compare src-a, 0
265   break-if-=
266   # copy
267   var dest-ah/edi: (addr handle word) <- copy _dest-ah
268   copy-word src-a, dest-ah
269   # recurse
270   var rest: (handle word)
271   var rest-ah/ecx: (addr handle word) <- address rest
272   var next-src-ah/esi: (addr handle word) <- get src-a, next
273   copy-words next-src-ah, rest-ah
274   chain-words dest-ah, rest-ah
275 }
276 
277 fn copy-words-in-reverse _src-ah: (addr handle word), _dest-ah: (addr handle word) {
278   var src-ah/eax: (addr handle word) <- copy _src-ah
279   var _src-a/eax: (addr word) <- lookup *src-ah
280   var src-a/esi: (addr word) <- copy _src-a
281   compare src-a, 0
282   break-if-=
283   # recurse
284   var next-src-ah/ecx: (addr handle word) <- get src-a, next
285   var dest-ah/edi: (addr handle word) <- copy _dest-ah
286   copy-words-in-reverse next-src-ah, dest-ah
287   #
288   copy-word-at-end src-a, dest-ah
289 }
290 
291 fn copy-word-at-end src: (addr word), _dest-ah: (addr handle word) {
292   var dest-ah/edi: (addr handle word) <- copy _dest-ah
293   # if dest is null, copy and return
294   var dest-a/eax: (addr word) <- lookup *dest-ah
295   compare dest-a, 0
296   {
297     break-if-!=
298     copy-word src, dest-ah
299     return
300   }
301   # copy current word
302   var new: (handle word)
303   var new-ah/ecx: (addr handle word) <- address new
304   copy-word src, new-ah
305   # append it at the end
306   var curr-ah/edi: (addr handle word) <- copy dest-ah
307   {
308     var curr-a/eax: (addr word) <- lookup *curr-ah  # curr-a guaranteed not to be null
309     var next-ah/ecx: (addr handle word) <- get curr-a, next
310     var next-a/eax: (addr word) <- lookup *next-ah
311     compare next-a, 0
312     break-if-=
313     curr-ah <- copy next-ah
314     loop
315   }
316   chain-words curr-ah, new-ah
317 }
318 
319 fn append-word-at-end-with _dest-ah: (addr handle word), s: (addr array byte) {
320   var dest-ah/edi: (addr handle word) <- copy _dest-ah
321   # if dest is null, copy and return
322   var dest-a/eax: (addr word) <- lookup *dest-ah
323   compare dest-a, 0
324   {
325     break-if-!=
326     allocate-word-with dest-ah, s
327     return
328   }
329   # otherwise append at end
330   var curr-ah/edi: (addr handle word) <- copy dest-ah
331   {
332     var curr-a/eax: (addr word) <- lookup *curr-ah  # curr-a guaranteed not to be null
333     var next-ah/ecx: (addr handle word) <- get curr-a, next
334     var next-a/eax: (addr word) <- lookup *next-ah
335     compare next-a, 0
336     break-if-=
337     curr-ah <- copy next-ah
338     loop
339   }
340   append-word-with *curr-ah, s
341 }
342 
343 fn copy-word _src-a: (addr word), _dest-ah: (addr handle word) {
344   var dest-ah/eax: (addr handle word) <- copy _dest-ah
345   allocate dest-ah
346   var _dest-a/eax: (addr word) <- lookup *dest-ah
347   var dest-a/eax: (addr word) <- copy _dest-a
348   initialize-word dest-a
349   var dest/edi: (addr handle gap-buffer) <- get dest-a, scalar-data
350   var src-a/eax: (addr word) <- copy _src-a
351   var src/eax: (addr handle gap-buffer) <- get src-a, scalar-data
352   copy-gap-buffer src, dest
353 }
354 
355 # one implication of handles: append must take a handle
356 fn append-word _self-ah: (addr handle word) {
357   var saved-self-storage: (handle word)
358   var saved-self/eax: (addr handle word) <- address saved-self-storage
359   copy-object _self-ah, saved-self
360 #?   {
361 #?     print-string 0, "self-ah is "
362 #?     var foo/eax: int <- copy _self-ah
363 #?     print-int32-hex 0, foo
364 #?     print-string 0, "\n"
365 #?   }
366   var self-ah/esi: (addr handle word) <- copy _self-ah
367   var _self/eax: (addr word) <- lookup *self-ah
368   var self/ebx: (addr word) <- copy _self
369 #?   {
370 #?     print-string 0, "0: self is "
371 #?     var self-ah/eax: (addr handle word) <- copy _self-ah
372 #?     var self/eax: (addr word) <- lookup *self-ah
373 #?     var foo/eax: int <- copy self
374 #?     print-int32-hex 0, foo
375 #?     print-string 0, "\n"
376 #?   }
377   # allocate new handle
378   var new: (handle word)
379   var new-ah/ecx: (addr handle word) <- address new
380   allocate new-ah
381   var new-addr/eax: (addr word) <- lookup new
382   initialize-word new-addr
383 #?   {
384 #?     print-string 0, "new is "
385 #?     var foo/eax: int <- copy new-addr
386 #?     print-int32-hex 0, foo
387 #?     print-string 0, "\n"
388 #?   }
389   # new->next = self->next
390   var src/esi: (addr handle word) <- get self, next
391 #?   {
392 #?     print-string 0, "src is "
393 #?     var foo/eax: int <- copy src
394 #?     print-int32-hex 0, foo
395 #?     print-string 0, "\n"
396 #?   }
397   var dest/edi: (addr handle word) <- get new-addr, next
398   copy-object src, dest
399   # new->next->prev = new
400   {
401     var next-addr/eax: (addr word) <- lookup *src
402     compare next-addr, 0
403     break-if-=
404 #?     {
405 #?       print-string 0, "next-addr is "
406 #?       var foo/eax: int <- copy next-addr
407 #?       print-int32-hex 0, foo
408 #?       print-string 0, "\n"
409 #?     }
410     dest <- get next-addr, prev
411 #? #?     {
412 #? #?       print-string 0, "self-ah is "
413 #? #?       var foo/eax: int <- copy _self-ah
414 #? #?       print-int32-hex 0, foo
415 #? #?       print-string 0, "\n"
416 #? #?       print-string 0, "2: self is "
417 #? #?       var self-ah/eax: (addr handle word) <- copy _self-ah
418 #? #?       var self/eax: (addr word) <- lookup *self-ah
419 #? #?       var foo/eax: int <- copy self
420 #? #?       print-int32-hex 0, foo
421 #? #?       print-string 0, "\n"
422 #? #?     }
423 #?     {
424 #?       print-string 0, "copying new to "
425 #?       var foo/eax: int <- copy dest
426 #?       print-int32-hex 0, foo
427 #?       print-string 0, "\n"
428 #?     }
429     copy-object new-ah, dest
430 #?     {
431 #?       print-string 0, "4: self is "
432 #?       var self-ah/eax: (addr handle word) <- copy _self-ah
433 #?       var self/eax: (addr word) <- lookup *self-ah
434 #?       var foo/eax: int <- copy self
435 #?       print-int32-hex 0, foo
436 #?       print-string 0, "\n"
437 #?     }
438   }
439   # new->prev = saved-self
440   dest <- get new-addr, prev
441 #?   {
442 #?     print-string 0, "copying "
443 #?     var self-ah/esi: (addr handle word) <- copy _self-ah
444 #?     var self/eax: (addr word) <- lookup *self-ah
445 #?     var foo/eax: int <- copy self
446 #?     print-int32-hex 0, foo
447 #?     print-string 0, " to "
448 #?     foo <- copy dest
449 #?     print-int32-hex 0, foo
450 #?     print-string 0, "\n"
451 #?   }
452   var saved-self-ah/eax: (addr handle word) <- address saved-self-storage
453   copy-object saved-self-ah, dest
454   # self->next = new
455   dest <- get self, next
456   copy-object new-ah, dest
457 }
458 
459 fn chain-words _self-ah: (addr handle word), _next: (addr handle word) {
460   var self-ah/esi: (addr handle word) <- copy _self-ah
461   var _self/eax: (addr word) <- lookup *self-ah
462   var self/ecx: (addr word) <- copy _self
463   var dest/edx: (addr handle word) <- get self, next
464   var next-ah/edi: (addr handle word) <- copy _next
465   copy-object next-ah, dest
466   var next/eax: (addr word) <- lookup *next-ah
467   compare next, 0
468   break-if-=
469   dest <- get next, prev
470   copy-object self-ah, dest
471 }
472 
473 fn emit-word _self: (addr word), out: (addr stream byte) {
474   var self/esi: (addr word) <- copy _self
475   var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
476   var data/eax: (addr gap-buffer) <- lookup *data-ah
477   emit-gap-buffer data, out
478 }
479 
480 fn word-to-string _self: (addr word), out: (addr handle array byte) {
481   var self/esi: (addr word) <- copy _self
482   var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
483   var data/eax: (addr gap-buffer) <- lookup *data-ah
484   gap-buffer-to-string data, out
485 }
486 
487 fn word-is-decimal-integer? _self: (addr word) -> _/eax: boolean {
488   var self/eax: (addr word) <- copy _self
489   var data-ah/eax: (addr handle gap-buffer) <- get self, scalar-data
490   var data/eax: (addr gap-buffer) <- lookup *data-ah
491   var result/eax: boolean <- gap-buffer-is-decimal-integer? data
492   return result
493 }
494 
495 # ABSOLUTELY GHASTLY
496 fn word-exists? _haystack-ah: (addr handle word), _needle: (addr word) -> _/ebx: boolean {
497   var needle-name-storage: (handle array byte)
498   var needle-name-ah/eax: (addr handle array byte) <- address needle-name-storage
499   word-to-string _needle, needle-name-ah  # profligate leak
500   var _needle-name/eax: (addr array byte) <- lookup *needle-name-ah
501   var needle-name/edi: (addr array byte) <- copy _needle-name
502   # base case
503   var haystack-ah/esi: (addr handle word) <- copy _haystack-ah
504   var curr/eax: (addr word) <- lookup *haystack-ah
505   compare curr, 0
506   {
507     break-if-!=
508     return 0/false
509   }
510   # check curr
511   var curr-name-storage: (handle array byte)
512   var curr-name-ah/ecx: (addr handle array byte) <- address curr-name-storage
513   word-to-string curr, curr-name-ah  # profligate leak
514   var curr-name/eax: (addr array byte) <- lookup *curr-name-ah
515   var found?/eax: boolean <- string-equal? needle-name, curr-name
516   compare found?, 0
517   {
518     break-if-=
519     return 1/true
520   }
521   # recurse
522   var curr/eax: (addr word) <- lookup *haystack-ah
523   var next-haystack-ah/eax: (addr handle word) <- get curr, next
524   var result/ebx: boolean <- word-exists? next-haystack-ah, _needle
525   return result
526 }
527 
528 fn word-list-length words: (addr handle word) -> _/eax: int {
529   var curr-ah/esi: (addr handle word) <- copy words
530   var result/edi: int <- copy 0
531   {
532     var curr/eax: (addr word) <- lookup *curr-ah
533     compare curr, 0
534     break-if-=
535     {
536       var word-len/eax: int <- word-length curr
537       result <- add word-len
538       result <- add 1/inter-word-margin
539     }
540     curr-ah <- get curr, next
541     loop
542   }
543   return result
544 }
545 
546 # out-ah already has a word allocated and initialized
547 fn parse-words in: (addr array byte), out-ah: (addr handle word) {
548   var in-stream: (stream byte 0x100)
549   var in-stream-a/esi: (addr stream byte) <- address in-stream
550   write in-stream-a, in
551   var cursor-word-ah/ebx: (addr handle word) <- copy out-ah
552   $parse-words:loop: {
553     var done?/eax: boolean <- stream-empty? in-stream-a
554     compare done?, 0/false
555     break-if-!=
556     var _g/eax: code-point-utf8 <- read-code-point-utf8 in-stream-a
557     var g/ecx: code-point-utf8 <- copy _g
558     # if not space, insert
559     compare g, 0x20/space
560     {
561       break-if-=
562       var cursor-word/eax: (addr word) <- lookup *cursor-word-ah
563       add-code-point-utf8-to-word cursor-word, g
564       loop $parse-words:loop
565     }
566     # otherwise insert word after and move cursor to it
567     append-word cursor-word-ah
568     var cursor-word/eax: (addr word) <- lookup *cursor-word-ah
569     cursor-to-start cursor-word  # reset cursor in each function
570     cursor-word-ah <- get cursor-word, next
571     loop
572   }
573 }