5644 - plan data structures for mulisp

author: Kartik Agaram <vc@akkartik.com> 2019-09-08 00:33:57 -0700
committer: Kartik Agaram <vc@akkartik.com> 2019-09-08 00:33:57 -0700
commit: 27decf77f22fffe44054c5ec90b07fd089888df5 (patch)
tree: 2874b02dd365bb25bff291fd86912e7a99a38686 /apps
parent: 4c131b38139eb989a25c7125502295ae1de2a1ea (diff)
download: mu-27decf77f22fffe44054c5ec90b07fd089888df5.tar.gz
1 files changed, 42 insertions, 7 deletions
diff --git a/apps/mulisp.subx b/apps/mulisp.subx
index 92803e97..6487e009 100644
--- a/apps/mulisp.subx
+++ b/apps/mulisp.subx
@@ -35,13 +35,43 @@ $main:end:
     b8/copy-to-eax 1/imm32/exit
     cd/syscall 0x80/imm8
 
-# type cell = nil | num | char | symbol | pair | array | stream
-#     tag:    0     1     2       3       4       5       6
-# numbers start with a digit and are always in hex
-# characters start with a backslash
-# pairs start with '('
-# arrays start with '['
-# symbols start with anything else but quote, backquote, unquote or splice
+# Data structures
+#
+# Lisp is dynamically typed. Values always carry around knowledge of their
+# type.
+#
+# There's several types of types in the description below, so we need a
+# glossary and notational convention to disambiguate:
+#   lisp type: what Lisp code can see. Looks how you type it at the prompt.
+#     nil num char string symbol pair array
+#   type tag: the code for a lisp type. All caps.
+#     NIL NUM CHAR STRING SYMBOL PAIR ARRAY
+#   memory type: a type specifying memory layout at the SubX level. Starts
+#   with a '$'.
+#     $int $array $(address _)
+#
+# Lisp values are represented in memory by the _cell_ data structure. A cell
+# is 12 bytes long:
+#   tag: $int (4 bytes; we're not concerned about wasting space)
+#   data: 8 bytes whose contents and meaning depend on tag
+#
+# What values of the different Lisp types look like in memory:
+#   - nil: cell{ tag: 0/NIL, data: 0 0 }
+#   - num: cell{ tag: 1/NUM, data: $int 0 }
+#     data contains the number
+#   - char: cell{ tag: 2/CHAR, data: $int 0 }
+#     data contains the utf-8 code of the character (no compound glyphs, no
+#     modifiers, etc., etc.)
+#   - string: cell{ tag: 3/STRING, data: $(address array byte)
+#     data contains an (address array byte) containing the string in utf-8
+#   - symbol: cell{ tag: 4/SYMBOL, data: $(address array byte) 0 }
+#     data contains an (address array byte) containing the name of the symbol in utf-8
+#     alternatively, data could contain an index into the table of interned symbols
+#   - pair: cell{ tag: 5/PAIR, data: $(address cell) $(address cell)  }
+#     data contains pointers to car and cdr
+#   - array: cell{ tag: 6/ARRAY, data: $tag $(address array data)
+#     data contains a pointer to an array of 8-byte data fields and the common
+#     tag for them all
 
 repl:  # in : (address buffered-file), out : (address buffered-file) -> <void>
     # . prolog
@@ -67,6 +97,11 @@ $repl:end:
     5d/pop-to-ebp
     c3/return
 
+# numbers start with a digit and are always in hex
+# characters start with a backslash
+# pairs start with '('
+# arrays start with '['
+# symbols start with anything else but quote, backquote, unquote or splice
 lisp-read:  # in : (address buffered-file) -> eax : (address cell)
     # . prolog
     55/push-ebp
author	Kartik Agaram <vc@akkartik.com>	2019-09-08 00:33:57 -0700
committer	Kartik Agaram <vc@akkartik.com>	2019-09-08 00:33:57 -0700
commit	27decf77f22fffe44054c5ec90b07fd089888df5 (patch)
tree	2874b02dd365bb25bff291fd86912e7a99a38686 /apps
parent	4c131b38139eb989a25c7125502295ae1de2a1ea (diff)
download	mu-27decf77f22fffe44054c5ec90b07fd089888df5.tar.gz