about summary refs log blame commit diff stats
path: root/tools/treeshake.cc
blob: 9bf5106eddb87fed76a41d4c88d1bc99c3ca50c2 (plain) (tree)




























                                                                                             
                










                                                        

        







































                                                                                                                                                 




















                                                                                                                          

 


















                                                                                         








                                                                                                             
            

                                          

                                         







                                                                  
// Read a set of lines on stdin of the following form:
//  definition:
//    ...
//    ...
//
// Delete all 'dead' definitions with following indented lines that aren't
// used outside their bodies.
//
// This can be transitive; deleting one definition may cause other definitions
// to become dead.
//
// Also assorts segments as a side-effect.
//
// Like linkify, treeshake is a hack.

#include<assert.h>

#include<map>
using std::map;
#include<vector>
using std::vector;
#define SIZE(X) (assert((X).size() < (1LL<<(sizeof(int)*8-2))), static_cast<int>((X).size()))

#include<string>
using std::string;

#include<iostream>
using std::cin;
using std::cout;
using std::cerr;

#include<sstream>
using std::istringstream;

bool starts_with(const string& s, const string& pat) {
  string::const_iterator a=s.begin(), b=pat.begin();
  for (/*nada*/;  a!=s.end() && b!=pat.end();  ++a, ++b)
    if (*a != *b) return false;
  return b == pat.end();
}

// input

void read_body(string name, string definition_line, map<string, vector<string> >& segment) {
  // last definition wins; this only matters for the 'Entry' label in the code segment
  segment[name] = vector<string>();
  segment[name].push_back(definition_line);
  while (!cin.eof()) {
    if (cin.peek() != ' ' && cin.peek() != '$') break;  // assumes: no whitespace but spaces; internal labels start with '$'
    string line;
    getline(cin, line);
    segment[name].push_back(line);
  }
}

void read_lines(string segment_header, map<string, vector<string> >& segment) {
  // first segment header wins
  if (segment.empty())
    segment["=="].push_back(segment_header);  // '==' is a special key containing the segment header
  while (!cin.eof()) {
    if (cin.peek() == '=') break;  // assumes: no line can start with '=' except a segment header
    assert(cin.peek() != ' ');  // assumes: no whitespace but spaces
    string line;
    getline(cin, line);
    istringstream lstream(line);
    string name;
    getline(lstream, name, ' ');
    assert(name[SIZE(name)-1] == ':');
    name.erase(--name.end());
    read_body(name, line, segment);
  }
}

void read_lines(map<string, vector<string> >& code, map<string, vector<string> >& data) {
  while (!cin.eof()) {
    string line;
    getline(cin, line);
    assert(starts_with(line, "== "));
    map<string, vector<string> >& curr = (line.substr(3, 4) == "code") ? code : data;  // HACK: doesn't support segments except 'code' and 'data'
    read_lines(line, curr);
  }
}

// treeshake

bool any_line_matches(string pat, const vector<string>& lines) {
  for (int i = 0;  i < SIZE(lines);  ++i)
    if (lines.at(i).find(pat) != string::npos)  // conservative: confused by word boundaries, comments and string literals
      return true;
  return false;
}

bool is_dead(string key, const map<string, vector<string> >& code, const map<string, vector<string> >& data) {
  if (key == "Entry") return false;
  if (key == "==") return false;
  for (map<string, vector<string> >::const_iterator p = code.begin();  p != code.end();  ++p) {
    if (p->first == key) continue;
    if (any_line_matches(key, p->second)) return false;
  }
  for (map<string, vector<string> >::const_iterator p = data.begin();  p != data.end();  ++p) {
    if (p->first == key) continue;
    if (any_line_matches(key, p->second)) return false;
  }
  return true;
}

void treeshake(map<string, vector<string> >& code, map<string, vector<string> >& data) {
  for (map<string, vector<string> >::iterator p = code.begin();  p != code.end();  ++p) {
    if (is_dead(p->first, code, data)) {
//?       cerr << "  erasing " << p->first << '\n';
      code.erase(p);
      return;
    }
  }
  for (map<string, vector<string> >::iterator p = data.begin();  p != data.end();  ++p) {
    if (is_dead(p->first, code, data)) {
//?       cerr << "  erasing " << p->first << '\n';
      data.erase(p);
      return;
    }
  }
}

// output

void dump(const map<string, vector<string> > definitions) {
  // nothing special needed for segment headers, since '=' precedes all alphabet in ASCII
  for (map<string, vector<string> >::const_iterator p = definitions.begin();  p != definitions.end();  ++p) {
    const vector<string>& lines = p->second;
    for (int i = 0;  i < SIZE(lines);  ++i)
      cout << lines[i] << '\n';
  }
}

int main() {
  map<string, vector<string> > code, data;
  read_lines(code, data);
  for (int iter = 0;  ;  ++iter) {
//?     cerr << "iter: " << iter << '\n';
    int old_csize = SIZE(code), old_dsize = SIZE(data);
    treeshake(code, data);
    if (SIZE(code) == old_csize && SIZE(data) == old_dsize) break;
  }
  dump(code);
  dump(data);
  return 0;
}
/span>:bool <- new num:type ] +error: main: product of 'new' has incorrect type: '1:bool <- new num:type' :(scenario new_discerns_singleton_list_from_atom_container) % Hide_errors = true; def main [ 1:&:num <- new {(num): type} # should be '{num: type}' ] +error: main: product of 'new' has incorrect type: '1:&:num <- new {(num): type}' :(scenario new_with_type_abbreviation) def main [ 1:&:num <- new num:type ] $error: 0 :(scenario new_with_type_abbreviation_inside_compound) def main [ {1: (address address number), raw: ()} <- new {(& num): type} ] $error: 0 :(scenario equal_result_of_new_with_null) def main [ 1:&:num <- new num:type 10:bool <- equal 1:&:num, null ] +mem: storing 0 in location 10 //: To implement 'new', a Mu transform turns all 'new' instructions into //: 'allocate' instructions that precompute the amount of memory they want to //: allocate. //: Ensure that we never call 'allocate' directly, and that there's no 'new' //: instructions left after the transforms have run. :(before "End Primitive Recipe Checks") case ALLOCATE: { raise << "never call 'allocate' directly'; always use 'new'\n" << end(); break; } :(before "End Primitive Recipe Implementations") case NEW: { raise << "no implementation for 'new'; why wasn't it translated to 'allocate'? Please save a copy of your program and send it to Kartik.\n" << end(); break; } :(after "Transform.push_back(check_instruction)") // check_instruction will guard against direct 'allocate' instructions below Transform.push_back(transform_new_to_allocate); // idempotent :(code) void transform_new_to_allocate(const recipe_ordinal r) { trace(9991, "transform") << "--- convert 'new' to 'allocate' for recipe " << get(Recipe, r).name << end(); for (int i = 0; i < SIZE(get(Recipe, r).steps); ++i) { instruction& inst = get(Recipe, r).steps.at(i); // Convert 'new' To 'allocate' if (inst.name == "new") { if (inst.ingredients.empty()) return; // error raised elsewhere inst.operation = ALLOCATE; type_tree* type = new_type_tree(inst.ingredients.at(0).name); inst.ingredients.at(0).set_value(size_of(type)); trace(9992, "new") << "size of '" << inst.ingredients.at(0).name << "' is " << inst.ingredients.at(0).value << end(); delete type; } } } //: implement 'allocate' based on size :(before "End Globals") extern const int Reserved_for_tests = 1000; int Memory_allocated_until = Reserved_for_tests; int Initial_memory_per_routine = 100000; :(before "End Reset") Memory_allocated_until = Reserved_for_tests; Initial_memory_per_routine = 100000; :(before "End routine Fields") int alloc, alloc_max; :(before "End routine Constructor") alloc = Memory_allocated_until; Memory_allocated_until += Initial_memory_per_routine; alloc_max = Memory_allocated_until; trace("new") << "routine allocated memory from " << alloc << " to " << alloc_max << end(); :(before "End Primitive Recipe Declarations") ALLOCATE, :(before "End Primitive Recipe Numbers") put(Recipe_ordinal, "allocate", ALLOCATE); :(before "End Primitive Recipe Implementations") case ALLOCATE: { // compute the space we need int size = ingredients.at(0).at(0); int alloc_id = Next_alloc_id; Next_alloc_id++; if (SIZE(ingredients) > 1) { // array allocation trace("mem") << "array length is " << ingredients.at(1).at(0) << end(); size = /*space for length*/1 + size*ingredients.at(1).at(0); } int result = allocate(size); // initialize alloc-id in payload trace("mem") << "storing alloc-id " << alloc_id << " in location " << result << end(); put(Memory, result, alloc_id); if (SIZE(current_instruction().ingredients) > 1) { // initialize array length trace("mem") << "storing array length " << ingredients.at(1).at(0) << " in location " << result+/*skip alloc id*/1 << end(); put(Memory, result+/*skip alloc id*/1, ingredients.at(1).at(0)); } products.resize(1); products.at(0).push_back(alloc_id); products.at(0).push_back(result); break; } :(code) int allocate(int size) { // include space for alloc id ++size; trace("mem") << "allocating size " << size << end(); //? Total_alloc += size; //? ++Num_alloc; // Allocate Special-cases // compute the region of memory to return // really crappy at the moment ensure_space(size); const int result = Current_routine->alloc; trace("mem") << "new alloc: " << result << end(); // initialize allocated space for (int address = result; address < result+size; ++address) { trace("mem") << "storing 0 in location " << address << end(); put(Memory, address, 0); } Current_routine->alloc += size; // no support yet for reclaiming memory between routines assert(Current_routine->alloc <= Current_routine->alloc_max); return result; } //: statistics for debugging //? :(before "End Globals") //? int Total_alloc = 0; //? int Num_alloc = 0; //? int Total_free = 0; //? int Num_free = 0; //? :(before "End Reset") //? if (!Memory.empty()) { //? cerr << Total_alloc << "/" << Num_alloc //? << " vs " << Total_free << "/" << Num_free << '\n'; //? cerr << SIZE(Memory) << '\n'; //? } //? Total_alloc = Num_alloc = Total_free = Num_free = 0; :(code) void ensure_space(int size) { if (size > Initial_memory_per_routine) { cerr << "can't allocate " << size << " locations, that's too much compared to " << Initial_memory_per_routine << ".\n"; exit(1); } if (Current_routine->alloc + size > Current_routine->alloc_max) { // waste the remaining space and create a new chunk Current_routine->alloc = Memory_allocated_until; Memory_allocated_until += Initial_memory_per_routine; Current_routine->alloc_max = Memory_allocated_until; trace("new") << "routine allocated memory from " << Current_routine->alloc << " to " << Current_routine->alloc_max << end(); } } :(scenario new_initializes) % Memory_allocated_until = 10; % put(Memory, Memory_allocated_until, 1); def main [ 1:&:num <- new num:type ] +mem: storing 0 in location 10 +mem: storing 0 in location 11 +mem: storing 10 in location 2 :(scenario new_initializes_alloc_id) % Memory_allocated_until = 10; % put(Memory, Memory_allocated_until, 1); % Next_alloc_id = 23; def main [ 1:&:num <- new num:type ] # initialize memory +mem: storing 0 in location 10 +mem: storing 0 in location 11 # alloc-id in payload +mem: storing alloc-id 23 in location 10 # alloc-id in address +mem: storing 23 in location 1 :(scenario new_size) def main [ 10:&:num <- new num:type 12:&:num <- new num:type 20:num/alloc1, 21:num/alloc2 <- deaddress 10:&:num, 12:&:num 30:num <- subtract 21:num/alloc2, 20:num/alloc1 ] # size of number + alloc id +mem: storing 2 in location 30 :(scenario new_array_size) def main [ 10:&:@:num <- new num:type, 5 12:&:num <- new num:type 20:num/alloc1, 21:num/alloc2 <- deaddress 10:&:num, 12:&:num 30:num <- subtract 21:num/alloc2, 20:num/alloc1 ] # 5 locations for array contents + array length + alloc id +mem: storing 7 in location 30 :(scenario new_empty_array) def main [ 10:&:@:num <- new num:type, 0 12:&:num <- new num:type 20:num/alloc1, 21:num/alloc2 <- deaddress 10:&:@:num, 12:&:num 30:num <- subtract 21:num/alloc2, 20:num/alloc1 ] +run: {10: ("address" "array" "number")} <- new {num: "type"}, {0: "literal"} +mem: array length is 0 # one location for array length +mem: storing 2 in location 30 //: If a routine runs out of its initial allocation, it should allocate more. :(scenario new_overflow) % Initial_memory_per_routine = 3; // barely enough room for point allocation below def main [ 10:&:num <- new num:type 12:&:point <- new point:type # not enough room in initial page ] +new: routine allocated memory from 1000 to 1003 +new: routine allocated memory from 1003 to 1006 :(scenario new_without_ingredient) % Hide_errors = true; def main [ 1:&:num <- new # missing ingredient ] +error: main: 'new' requires one or two ingredients, but got '1:&:num <- new' //: a little helper: convert address to number :(before "End Primitive Recipe Declarations") DEADDRESS, :(before "End Primitive Recipe Numbers") put(Recipe_ordinal, "deaddress", DEADDRESS); :(before "End Primitive Recipe Checks") case DEADDRESS: { // primary goal of these checks is to forbid address arithmetic for (int i = 0; i < SIZE(inst.ingredients); ++i) { if (!is_mu_address(inst.ingredients.at(i))) { raise << maybe(get(Recipe, r).name) << "'deaddress' requires address ingredients, but got '" << inst.ingredients.at(i).original_string << "'\n" << end(); goto finish_checking_instruction; } } if (SIZE(inst.products) > SIZE(inst.ingredients)) { raise << maybe(get(Recipe, r).name) << "too many products in '" << to_original_string(inst) << "'\n" << end(); break; } for (int i = 0; i < SIZE(inst.products); ++i) { if (!is_real_mu_number(inst.products.at(i))) { raise << maybe(get(Recipe, r).name) << "'deaddress' requires number products, but got '" << inst.products.at(i).original_string << "'\n" << end(); goto finish_checking_instruction; } } break; } :(before "End Primitive Recipe Implementations") case DEADDRESS: { products.resize(SIZE(ingredients)); for (int i = 0; i < SIZE(ingredients); ++i) { products.at(i).push_back(ingredients.at(i).at(/*skip alloc id*/1)); } break; }