012transform.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102

//: Phase 2: Filter loaded recipes through an extensible list of 'transforms'.
//:
//:   The process of running Mu code:
//:     load -> transform -> run
//:
//: The hope is that this framework of transform tools will provide a
//: deconstructed alternative to conventional compilers.
//:
//: We're going to have many transforms in Mu, and getting their order right
//: (not the same as ordering of layers) is a well-known problem. Some tips:
//:   a) Design each layer to rely on as few previous layers as possible.
//:
//:   b) When positioning transforms, try to find the tightest constraint in
//:   each transform relative to previous layers.
//:
//:   c) Even so you'll periodically need to try adjusting each transform
//:   relative to those in previous layers to find a better arrangement.

:(before "End recipe Fields")
int transformed_until;
:(before "End recipe Constructor")
transformed_until = -1;

:(before "End Types")
typedef void (*transform_fn)(const recipe_ordinal);

:(before "End Globals")
vector<transform_fn> Transform;

:(before "End One-time Setup")
initialize_transforms();
:(code)
void initialize_transforms() {
  // Begin Transforms
    // Begin Instruction Inserting/Deleting Transforms
    // End Instruction Inserting/Deleting Transforms

    // Begin Instruction Modifying Transforms
    // End Instruction Modifying Transforms
  // End Transforms

  // Begin Checks
  // End Checks
}

void transform_all() {
  trace(9990, "transform") << "=== transform_all()" << end();
  // Begin transform_all
  for (int t = 0;  t < SIZE(Transform);  ++t) {
    for (map<recipe_ordinal, recipe>::iterator p = Recipe.begin();  p != Recipe.end();  ++p) {
      recipe& r = p->second;
      if (r.transformed_until != t-1) continue;
      // End Transform Checks
      (*Transform.at(t))(/*recipe_ordinal*/p->first);
      r.transformed_until = t;
    }
  }
  parse_int_reagents();  // do this after all other transforms have run
  // End transform_all
}

//: Even though a run will involve many calls to transform_all() for tests,
//: our logical model is to load all code, then transform all code, then run.
//: If you load new code that should cause already-transformed recipes to
//: change, that's not supported. To help detect such situations and raise
//: helpful errors we track a count of the number of calls made to
//: transform_all().
:(before "End Globals")
int Num_calls_to_transform_all = 0;
:(after "void transform_all()")
  ++Num_calls_to_transform_all;

:(code)
void parse_int_reagents() {
  trace(9991, "transform") << "--- parsing any uninitialized reagents as integers" << end();
  for (map<recipe_ordinal, recipe>::iterator p = Recipe.begin();  p != Recipe.end();  ++p) {
    recipe& r = p->second;
    if (r.steps.empty()) continue;
    for (int index = 0;  index < SIZE(r.steps);  ++index) {
      instruction& inst = r.steps.at(index);
      for (int i = 0;  i < SIZE(inst.ingredients);  ++i) {
        populate_value(inst.ingredients.at(i));
      }
      for (int i = 0;  i < SIZE(inst.products);  ++i) {
        populate_value(inst.products.at(i));
      }
    }
  }
}

void populate_value(reagent& r) {
  if (r.initialized) return;
  // End Reagent-parsing Exceptions
  if (!is_integer(r.name)) return;
  r.set_value(to_integer(r.name));
}

// helper for tests -- temporarily suppress run
void transform(string form) {
  load(form);
  transform_all();
}
## Mu's instructions and their table-driven translation

See http://akkartik.name/akkartik-convivial-20200607.pdf for the complete
story. In brief: Mu is a statement-oriented language. Blocks consist of flat
lists of instructions. Instructions can have inputs after the operation, and
outputs to the left of a '<-'. Inputs and outputs must be variables. They can't
include nested expressions. Variables can be literals ('n'), or live in a
register ('var/reg') or in memory ('var') at some 'stack-offset' from the 'ebp'
register. Outputs must be registers. To modify a variable in memory, pass it in
by reference as an input. (Inputs are more precisely called 'inouts'.)
Conversely, registers that are just read from must not be passed as inputs.

The following chart shows all the instruction forms supported by Mu, along with
the SubX instruction they're translated to.

var/eax <- increment              => "40/increment-eax"
var/ecx <- increment              => "41/increment-ecx"
var/edx <- increment              => "42/increment-edx"
var/ebx <- increment              => "43/increment-ebx"
var/esi <- increment              => "46/increment-esi"
var/edi <- increment              => "47/increment-edi"
increment var                     => "ff 0/subop/increment *(ebp+" var.stack-offset ")"
increment *var/reg                => "ff 0/subop/increment *" reg

var/eax <- decrement              => "48/decrement-eax"
var/ecx <- decrement              => "49/decrement-ecx"
var/edx <- decrement              => "4a/decrement-edx"
var/ebx <- decrement              => "4b/decrement-ebx"
var/esi <- decrement              => "4e/decrement-esi"
var/edi <- decrement              => "4f/decrement-edi"
decrement var                     => "ff 1/subop/decrement *(ebp+" var.stack-offset ")"
decrement *var/reg                => "ff 1/subop/decrement *" reg

var/reg <- add var2/reg2          => "01/add-to %" reg " " reg2 "/r32"
var/reg <- add var2               => "03/add *(ebp+" var2.stack-offset ") " reg "/r32"
var/reg <- add *var2/reg2         => "03/add *" reg2 " " reg "/r32"
add-to var1, var2/reg             => "01/add-to *(ebp+" var1.stack-offset ") " reg "/r32"
var/eax <- add n                  => "05/add-to-eax " n "/imm32"
var/reg <- add n                  => "81 0/subop/add %" reg " " n "/imm32"
add-to var, n                     => "81 0/subop/add *(ebp+" var.stack-offset ") " n "/imm32"
add-to *var/reg, n                => "81 0/subop/add *" reg " " n "/imm32"

var/reg <- subtract var2/reg2     => "29/subtract-from %" reg " " reg2 "/r32"
var/reg <- subtract var2          => "2b/subtract *(ebp+" var2.stack-offset ") " reg "/r32"
var/reg <- subtract *var2/reg2    => "2b/subtract *" reg2 " " reg1 "/r32"
subtract-from var1, var2/reg2     => "29/subtract-from *(ebp+" var1.stack-offset ") " reg2 "/r32"
var/eax <- subtract n             => "2d/subtract-from-eax " n "/imm32"
var/reg <- subtract n             => "81 5/subop/subtract %" reg " " n "/imm32"
subtract-from var, n              => "81 5/subop/subtract *(ebp+" var.stack-offset ") " n "/imm32"
subtract-from *var/reg, n         => "81 5/subop/subtract *" reg " " n "/imm32"

var/reg <- and var2/reg2          => "21/and-with %" reg " " reg2 "/r32"
var/reg <- and var2               => "23/and *(ebp+" var2.stack-offset " " reg "/r32"
var/reg <- and *var2/reg2         => "23/and *" reg2 " " reg "/r32"
and-with var1, var2/reg           => "21/and-with *(ebp+" var1.stack-offset ") " reg "/r32"
var/eax <- and n                  => "25/and-with-eax " n "/imm32"
var/reg <- and n                  => "81 4/subop/and %" reg " " n "/imm32"
and-with var, n                   => "81 4/subop/and *(ebp+" var.stack-offset ") " n "/imm32"
and-with *var/reg, n              => "81 4/subop/and *" reg " " n "/imm32"

var/reg <- or var2/reg2           => "09/or-with %" reg " " reg2 "/r32"
var/reg <- or var2                => "0b/or *(ebp+" var2.stack-offset ") " reg "/r32"
var/reg <- or *var2/reg2          => "0b/or *" reg2 " " reg "/r32"
or-with var1, var2/reg2           => "09/or-with *(ebp+" var1.stack-offset " " reg2 "/r32"
var/eax <- or n                   => "0d/or-with-eax " n "/imm32"
var/reg <- or n                   => "81 1/subop/or %" reg " " n "/imm32"
or-with var, n                    => "81 1/subop/or *(ebp+" var.stack-offset ") " n "/imm32"
or-with *var/reg, n               => "81 1/subop/or *" reg " " n "/imm32"

var/reg <- xor var2/reg2          => "31/xor-with %" reg " " reg2 "/r32"
var/reg <- xor var2               => "33/xor *(ebp+" var2.stack-offset ") " reg "/r32"
var/reg <- xor *var2/reg2         => "33/xor *" reg2 " " reg "/r32"
xor-with var1, var2/reg           => "31/xor-with *(ebp+" var1.stack-offset ") " reg "/r32"
var/eax <- xor n                  => "35/xor-with-eax " n "/imm32"
var/reg <- xor n                  => "81 6/subop/xor %" reg " " n "/imm32"
xor-with var, n                   => "81 6/subop/xor *(ebp+" var.stack-offset ") " n "/imm32"
xor-with *var/reg, n              => "81 6/subop/xor *" reg " " n "/imm32"

var/reg <- shift-left n
var/reg <- shift-right n
var/reg <- shift-right-signed n
shift-left var, n
shift-right var, n
shift-right-signed var, n

var/eax <- copy n                 => "b8/copy-to-eax " n "/imm32"
var/ecx <- copy n                 => "b9/copy-to-ecx " n "/imm32"
var/edx <- copy n                 => "ba/copy-to-edx " n "/imm32"
var/ebx <- copy n                 => "bb/copy-to-ebx " n "/imm32"
var/esi <- copy n                 => "be/copy-to-esi " n "/imm32"
var/edi <- copy n                 => "bf/copy-to-edi " n "/imm32"
var/reg <- copy var2/reg2         => "89/<- %" reg " " reg2 "/r32"
copy-to var1, var2/reg            => "89/<- *(ebp+" var1.stack-offset ") " reg "/r32"
var/reg <- copy var2              => "8b/-> *(ebp+" var2.stack-offset ") " reg "/r32"
var/reg <- copy *var2/reg2        => "8b/-> *" reg2 " " reg "/r32"
var/reg <- copy n                 => "c7 0/subop/copy %" reg " " n "/imm32"
copy-to var, n                    => "c7 0/subop/copy *(ebp+" var.stack-offset ") " n "/imm32"
copy-to *var/reg, n               => "c7 0/subop/copy *" reg " " n "/imm32"

var/reg <- copy-byte var2/reg2    => "8a/byte-> %" reg2 " " reg "/r32"
var/reg <- copy-byte *var2/reg2   => "8a/byte-> *" reg2 " " reg "/r32"
copy-byte-to *var1/reg1, var2/reg2  => "88/byte<- *" reg1 " " reg2 "/r32"

compare var1, var2/reg2           => "39/compare *(ebp+" var1.stack-offset ") " reg2 "/r32"
compare *var1/reg1, var2/reg2     => "39/compare *" reg1 " " reg2 "/r32"
compare var1/reg1, var2           => "3b/compare<- *(ebp+" var2.stack-offset ") " reg1 "/r32"
compare var/reg, *var2/reg2       => "3b/compare<- *" reg " " n "/imm32"
compare var/eax, n                => "3d/compare-eax-with " n "/imm32"
compare var/reg, n                => "81 7/subop/compare %" reg " " n "/imm32"
compare var, n                    => "81 7/subop/compare *(ebp+" var.stack-offset ") " n "/imm32"
compare *var/reg, n               => "81 7/subop/compare *" reg " " n "/imm32"

var/reg <- multiply var2          => "0f af/multiply *(ebp+" var2.stack-offset ") " reg "/r32"
var/reg <- multiply *var2/reg2    => "0f af/multiply *" reg2 " " reg "/r32"

break                             => "e9/jump break/disp32"
break label                       => "e9/jump " label ":break/disp32"
loop                              => "e9/jump loop/disp32"
loop label                        => "e9/jump " label ":loop/disp32"

break-if-=                        => "0f 84/jump-if-= break/disp32"
break-if-= label                  => "0f 84/jump-if-= " label ":break/disp32"
loop-if-=                         => "0f 84/jump-if-= loop/disp32"
loop-if-= label                   => "0f 84/jump-if-= " label ":loop/disp32"

break-if-!=                       => "0f 85/jump-if-!= break/disp32"
break-if-!= label                 => "0f 85/jump-if-!= " label ":break/disp32"
loop-if-!=                        => "0f 85/jump-if-!= loop/disp32"
loop-if-!= label                  => "0f 85/jump-if-!= " label ":loop/disp32"

break-if-<                        => "0f 8c/jump-if-< break/disp32"
break-if-< label                  => "0f 8c/jump-if-< " label ":break/disp32"
loop-if-<                         => "0f 8c/jump-if-< loop/disp32"
loop-if-< label                   => "0f 8c/jump-if-< " label ":loop/disp32"

break-if->                        => "0f 8f/jump-if-> break/disp32"
break-if-> label                  => "0f 8f/jump-if-> " label ":break/disp32"
loop-if->                         => "0f 8f/jump-if-> loop/disp32"
loop-if-> label                   => "0f 8f/jump-if-> " label ":loop/disp32"

break-if-<=                       => "0f 8e/jump-if-<= break/disp32"
break-if-<= label                 => "0f 8e/jump-if-<= " label ":break/disp32"
loop-if-<=                        => "0f 8e/jump-if-<= loop/disp32"
loop-if-<= label                  => "0f 8e/jump-if-<= " label ":loop/disp32"

break-if->=                       => "0f 8d/jump-if->= break/disp32"
break-if->= label                 => "0f 8d/jump-if->= " label ":break/disp32"
loop-if->=                        => "0f 8d/jump-if->= loop/disp32"
loop-if->= label                  => "0f 8d/jump-if->= " label ":loop/disp32"

break-if-addr<                    => "0f 82/jump-if-addr< break/disp32"
break-if-addr< label              => "0f 82/jump-if-addr< " label ":break/disp32"
loop-if-addr<                     => "0f 82/jump-if-addr< loop/disp32"
loop-if-addr< label               => "0f 82/jump-if-addr< " label ":loop/disp32"

break-if-addr>                    => "0f 87/jump-if-addr> break/disp32"
break-if-addr> label              => "0f 87/jump-if-addr> " label ":break/disp32"
loop-if-addr>                     => "0f 87/jump-if-addr> loop/disp32"
loop-if-addr> label               => "0f 87/jump-if-addr> " label ":loop/disp32"

break-if-addr<=                   => "0f 86/jump-if-addr<= break/disp32"
break-if-addr<= label             => "0f 86/jump-if-addr<= " label ":break/disp32"
loop-if-addr<=                    => "0f 86/jump-if-addr<= loop/disp32"
loop-if-addr<= label              => "0f 86/jump-if-addr<= " label ":loop/disp32"

break-if-addr>=                   => "0f 83/jump-if-addr>= break/disp32"
break-if-addr>= label             => "0f 83/jump-if-addr>= " label ":break/disp32"
loop-if-addr>=                    => "0f 83/jump-if-addr>= loop/disp32"
loop-if-addr>= label              => "0f 83/jump-if-addr>= " label ":loop/disp32"

In the following instructions types are provided for clarity even if they must
be provided in an earlier 'var' declaration.

# Address operations

var/reg: (addr T) <- address var2: T
  => "8d/copy-address *(ebp+" var2.stack-offset ") " reg "/r32"

# Array operations
(TODO: bounds-checking)

var/reg <- index arr/rega: (addr array T), idx/regi: int
  | if size-of(T) is 4 or 8
      => "8d/copy-address *(" rega "+" regi "<<" log2(size-of(T)) "+4) " reg "/r32"
var/reg <- index arr: (array T sz), idx/regi: int
  => "8d/copy-address *(ebp+" regi "<<" log2(size-of(T)) "+" (arr.stack-offset + 4) ") " reg "/r32"
var/reg <- index arr/rega: (addr array T), n
  => "8d/copy-address *(" rega "+" (n*size-of(T)+4) ") " reg "/r32"
var/reg <- index arr: (array T sz), n
  => "8d/copy-address *(ebp+" (arr.stack-offset+4+n*size-of(T)) ") " reg "/r32"

var/reg: (offset T) <- compute-offset arr: (addr array T), idx/regi: int  # arr can be in reg or mem
  => "69/multiply %" regi " " size-of(T) "/imm32 " reg "/r32"
var/reg: (offset T) <- compute-offset arr: (addr array T), idx: int       # arr can be in reg or mem
  => "69/multiply *(ebp+" idx.stack-offset ") " size-of(T) "/imm32 " reg "/r32"
var/reg <- index arr/rega: (addr array T), o/rego: offset
  => "8d/copy-address *(" rega "+" rego "+4) " reg "/r32"

Computing the length of an array is complex.

var/reg <- length arr/reg2: (addr array T)
  | if T is byte (TODO)
      => "8b/-> *" reg2 " " reg "/r32"
  | if size-of(T) is 4 or 8 or 16 or 32 or 64 or 128
      => "8b/-> *" reg2 " " reg "/r32"
         "c1/shift 5/subop/logic-right %" reg " " log2(size-of(T)) "/imm8"
  | otherwise
      x86 has no instruction to divide by a literal, so
      we need up to 3 extra registers! eax/edx for division and say ecx
      => if reg is not eax
          "50/push-eax"
         if reg is not ecx
          "51/push-ecx"
         if reg is not edx
          "52/push-edx"
         "8b/-> *" reg2 " eax/r32"
         "31/xor %edx 2/r32/edx"  # sign-extend, but array size can't be negative
         "b9/copy-to-ecx " size-of(T) "/imm32"
         "f7 7/subop/idiv-eax-edx-by %ecx"
         if reg is not eax
           "89/<- %" reg " 0/r32/eax"
         if reg is not edx
          "5a/pop-to-edx"
         if reg is not ecx
          "59/pop-to-ecx"
         if reg is not eax
          "58/pop-to-eax"

# User-defined types

If a record (product) type T was defined to have elements a, b, c, ... of
types T_a, T_b, T_c, ..., then accessing one of those elements f of type T_f:

var/reg: (addr T_f) <- get var2/reg2: (addr T), f
  => "8d/copy-address *(" reg2 "+" offset(f) ") " reg "/r32"
var/reg: (addr T_f) <- get var2: T, f
  => "8d/copy-address *(ebp+" var2.stack-offset "+" offset(f) ") " reg "/r32"

# Allocating memory

allocate in: (addr handle T)
  => "(allocate Heap " size-of(T) " " in ")"

populate in: (addr handle array T), num  # can be literal or variable on stack or register
  => "(allocate-array2 Heap " size-of(T) " " num " " in ")"

populate-stream in: (addr handle stream T), num  # can be literal or variable on stack or register
  => "(new-stream Heap " size-of(T) " " num " " in ")"

read-from-stream s: (addr stream T), out: (addr T)
  => "(read-from-stream " s " " out " " size-of(T) ")"

write-to-stream s: (addr stream T), in: (addr T)
  => "(write-to-stream " s " " in " " size-of(T) ")"

vim:ft=mu:nowrap:textwidth=0