From 27decf77f22fffe44054c5ec90b07fd089888df5 Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Sun, 8 Sep 2019 00:33:57 -0700 Subject: 5644 - plan data structures for mulisp --- apps/mulisp.subx | 49 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 7 deletions(-) (limited to 'apps/mulisp.subx') diff --git a/apps/mulisp.subx b/apps/mulisp.subx index 92803e97..6487e009 100644 --- a/apps/mulisp.subx +++ b/apps/mulisp.subx @@ -35,13 +35,43 @@ $main:end: b8/copy-to-eax 1/imm32/exit cd/syscall 0x80/imm8 -# type cell = nil | num | char | symbol | pair | array | stream -# tag: 0 1 2 3 4 5 6 -# numbers start with a digit and are always in hex -# characters start with a backslash -# pairs start with '(' -# arrays start with '[' -# symbols start with anything else but quote, backquote, unquote or splice +# Data structures +# +# Lisp is dynamically typed. Values always carry around knowledge of their +# type. +# +# There's several types of types in the description below, so we need a +# glossary and notational convention to disambiguate: +# lisp type: what Lisp code can see. Looks how you type it at the prompt. +# nil num char string symbol pair array +# type tag: the code for a lisp type. All caps. +# NIL NUM CHAR STRING SYMBOL PAIR ARRAY +# memory type: a type specifying memory layout at the SubX level. Starts +# with a '$'. +# $int $array $(address _) +# +# Lisp values are represented in memory by the _cell_ data structure. A cell +# is 12 bytes long: +# tag: $int (4 bytes; we're not concerned about wasting space) +# data: 8 bytes whose contents and meaning depend on tag +# +# What values of the different Lisp types look like in memory: +# - nil: cell{ tag: 0/NIL, data: 0 0 } +# - num: cell{ tag: 1/NUM, data: $int 0 } +# data contains the number +# - char: cell{ tag: 2/CHAR, data: $int 0 } +# data contains the utf-8 code of the character (no compound glyphs, no +# modifiers, etc., etc.) +# - string: cell{ tag: 3/STRING, data: $(address array byte) +# data contains an (address array byte) containing the string in utf-8 +# - symbol: cell{ tag: 4/SYMBOL, data: $(address array byte) 0 } +# data contains an (address array byte) containing the name of the symbol in utf-8 +# alternatively, data could contain an index into the table of interned symbols +# - pair: cell{ tag: 5/PAIR, data: $(address cell) $(address cell) } +# data contains pointers to car and cdr +# - array: cell{ tag: 6/ARRAY, data: $tag $(address array data) +# data contains a pointer to an array of 8-byte data fields and the common +# tag for them all repl: # in : (address buffered-file), out : (address buffered-file) -> # . prolog @@ -67,6 +97,11 @@ $repl:end: 5d/pop-to-ebp c3/return +# numbers start with a digit and are always in hex +# characters start with a backslash +# pairs start with '(' +# arrays start with '[' +# symbols start with anything else but quote, backquote, unquote or splice lisp-read: # in : (address buffered-file) -> eax : (address cell) # . prolog 55/push-ebp -- cgit 1.4.1-2-gfad0 sts/test_roster.h?id=f0f973fcb46f3ab332e7ac5623d720067075f908'>blame)