about summary refs log tree commit diff stats
path: root/038literal_strings.cc
diff options
context:
space:
mode:
authorKartik K. Agaram <vc@akkartik.com>2021-03-03 22:09:50 -0800
committerKartik K. Agaram <vc@akkartik.com>2021-03-03 22:21:03 -0800
commit71e4f3812982dba2efb471283d310224e8db363e (patch)
treeea111a1acb8b8845dbda39c0e1b4bac1d198143b /038literal_strings.cc
parentc6b928be29ac8cdb4e4d6e1eaa20420ff03e5a4c (diff)
downloadmu-71e4f3812982dba2efb471283d310224e8db363e.tar.gz
7842 - new directory organization
Baremetal is now the default build target and therefore has its sources
at the top-level. Baremetal programs build using the phase-2 Mu toolchain
that requires a Linux kernel. This phase-2 codebase which used to be at
the top-level is now under the linux/ directory. Finally, the phase-2 toolchain,
while self-hosting, has a way to bootstrap from a C implementation, which
is now stored in linux/bootstrap. The bootstrap C implementation uses some
literate programming tools that are now in linux/bootstrap/tools.

So the whole thing has gotten inverted. Each directory should build one
artifact and include the main sources (along with standard library). Tools
used for building it are relegated to sub-directories, even though those
tools are often useful in their own right, and have had lots of interesting
programs written using them.

A couple of things have gotten dropped in this process:
  - I had old ways to run on just a Linux kernel, or with a Soso kernel.
    No more.
  - I had some old tooling for running a single test at the cursor. I haven't
    used that lately. Maybe I'll bring it back one day.

The reorg isn't done yet. Still to do:
  - redo documentation everywhere. All the README files, all other markdown,
    particularly vocabulary.md.
  - clean up how-to-run comments at the start of programs everywhere
  - rethink what to do with the html/ directory. Do we even want to keep
    supporting it?

In spite of these shortcomings, all the scripts at the top-level, linux/
and linux/bootstrap are working. The names of the scripts also feel reasonable.
This is a good milestone to take stock at.
Diffstat (limited to '038literal_strings.cc')
-rw-r--r--038literal_strings.cc362
1 files changed, 0 insertions, 362 deletions
diff --git a/038literal_strings.cc b/038literal_strings.cc
deleted file mode 100644
index b0b3c13f..00000000
--- a/038literal_strings.cc
+++ /dev/null
@@ -1,362 +0,0 @@
-//: Allow instructions to mention literals directly.
-//:
-//: This layer will transparently move them to the global segment (assumed to
-//: always be the second segment).
-
-void test_transform_literal_string() {
-  run(
-      "== code 0x1\n"
-      "b8/copy  \"test\"/imm32\n"
-      "== data 0x2000\n"  // need an empty segment
-  );
-  CHECK_TRACE_CONTENTS(
-      "transform: -- move literal strings to data segment\n"
-      "transform: adding global variable '__subx_global_1' containing \"test\"\n"
-      "transform: line after transform: 'b8 __subx_global_1'\n"
-  );
-}
-
-//: We don't rely on any transforms running in previous layers, but this layer
-//: knows about labels and global variables and will emit them for previous
-//: layers to transform.
-:(after "Begin Transforms")
-Transform.push_back(transform_literal_strings);
-
-:(before "End Globals")
-int Next_auto_global = 1;
-:(before "End Reset")
-Next_auto_global = 1;
-:(code)
-void transform_literal_strings(program& p) {
-  trace(3, "transform") << "-- move literal strings to data segment" << end();
-  if (p.segments.empty()) return;
-  vector<line> new_lines;
-  for (int s = 0;  s < SIZE(p.segments);  ++s) {
-    segment& seg = p.segments.at(s);
-    trace(99, "transform") << "segment '" << seg.name << "'" << end();
-    for (int i = 0;  i < SIZE(seg.lines);  ++i) {
-//?       cerr << seg.name << '/' << i << '\n';
-      line& line = seg.lines.at(i);
-      for (int j = 0;  j < SIZE(line.words);  ++j) {
-        word& curr = line.words.at(j);
-        if (curr.data.at(0) != '"') continue;
-        ostringstream global_name;
-        global_name << "__subx_global_" << Next_auto_global;
-        ++Next_auto_global;
-        add_global_to_data_segment(global_name.str(), curr, new_lines);
-        curr.data = global_name.str();
-      }
-      trace(99, "transform") << "line after transform: '" << data_to_string(line) << "'" << end();
-    }
-  }
-  segment* data = find(p, "data");
-  if (data)
-    data->lines.insert(data->lines.end(), new_lines.begin(), new_lines.end());
-}
-
-void add_global_to_data_segment(const string& name, const word& value, vector<line>& out) {
-  trace(99, "transform") << "adding global variable '" << name << "' containing " << value.data << end();
-  // emit label
-  out.push_back(label(name));
-  // emit size for size-prefixed array
-  out.push_back(line());
-  emit_hex_bytes(out.back(), SIZE(value.data)-/*skip quotes*/2, 4/*bytes*/);
-  // emit data byte by byte
-  out.push_back(line());
-  line& curr = out.back();
-  for (int i = /*skip start quote*/1;  i < SIZE(value.data)-/*skip end quote*/1;  ++i) {
-    char c = value.data.at(i);
-    curr.words.push_back(word());
-    curr.words.back().data = hex_byte_to_string(c);
-    curr.words.back().metadata.push_back(string(1, c));
-  }
-}
-
-//: Within strings, whitespace is significant. So we need to redo our instruction
-//: parsing.
-
-void test_instruction_with_string_literal() {
-  parse_instruction_character_by_character(
-      "a \"abc  def\" z\n"  // two spaces inside string
-  );
-  CHECK_TRACE_CONTENTS(
-      "parse2: word: a\n"
-      "parse2: word: \"abc  def\"\n"
-      "parse2: word: z\n"
-  );
-  // no other words
-  CHECK_TRACE_COUNT("parse2", 3);
-}
-
-void test_string_literal_in_data_segment() {
-  run(
-      "== code 0x1\n"
-      "b8/copy  X/imm32\n"
-      "== data 0x2000\n"
-      "X:\n"
-      "\"test\"/imm32\n"
-  );
-  CHECK_TRACE_CONTENTS(
-      "transform: -- move literal strings to data segment\n"
-      "transform: adding global variable '__subx_global_1' containing \"test\"\n"
-      "transform: line after transform: '__subx_global_1'\n"
-  );
-}
-
-void test_string_literal_with_missing_quote() {
-  Hide_errors = true;
-  run(
-      "== code 0x1\n"
-      "b8/copy  \"test/imm32\n"
-      "== data 0x2000\n"
-  );
-  CHECK_TRACE_CONTENTS(
-      "error: unclosed string in: b8/copy  \"test/imm32"
-  );
-}
-
-:(before "End Line Parsing Special-cases(line_data -> l)")
-if (line_data.find('"') != string::npos) {  // can cause false-positives, but we can handle them
-  parse_instruction_character_by_character(line_data, l);
-  continue;
-}
-
-:(code)
-void parse_instruction_character_by_character(const string& line_data, vector<line>& out) {
-  if (line_data.find('\n') != string::npos  && line_data.find('\n') != line_data.size()-1) {
-    raise << "parse_instruction_character_by_character: should receive only a single line\n" << end();
-    return;
-  }
-  // parse literals
-  istringstream in(line_data);
-  in >> std::noskipws;
-  line result;
-  result.original = line_data;
-  // add tokens (words or strings) one by one
-  while (has_data(in)) {
-    skip_whitespace(in);
-    if (!has_data(in)) break;
-    char c = in.get();
-    if (c == '#') break;  // comment; drop rest of line
-    if (c == ':') break;  // line metadata; skip for now
-    if (c == '.') {
-      if (!has_data(in)) break;  // comment token at end of line
-      if (isspace(in.peek()))
-        continue;  // '.' followed by space is comment token; skip
-    }
-    result.words.push_back(word());
-    if (c == '"') {
-      // string literal; slurp everything between quotes into data
-      ostringstream d;
-      d << c;
-      while (true) {
-        if (!has_data(in)) {
-          raise << "unclosed string in: " << line_data << end();
-          return;
-        }
-        in >> c;
-        if (c == '\\') {
-          in >> c;
-          if (c == 'n') d << '\n';
-          else if (c == '"') d << '"';
-          else if (c == '\\') d << '\\';
-          else {
-            raise << "parse_instruction_character_by_character: unknown escape sequence '\\" << c << "'\n" << end();
-            return;
-          }
-          continue;
-        } else {
-          d << c;
-        }
-        if (c == '"') break;
-      }
-      result.words.back().data = d.str();
-      result.words.back().original = d.str();
-      // slurp metadata
-      ostringstream m;
-      while (!isspace(in.peek()) && has_data(in)) {  // peek can sometimes trigger eof(), so do it first
-        in >> c;
-        if (c == '/') {
-          if (!m.str().empty()) result.words.back().metadata.push_back(m.str());
-          m.str("");
-        }
-        else {
-          m << c;
-        }
-      }
-      if (!m.str().empty()) result.words.back().metadata.push_back(m.str());
-    }
-    else {
-      // not a string literal; slurp all characters until whitespace
-      ostringstream w;
-      w << c;
-      while (!isspace(in.peek()) && has_data(in)) {  // peek can sometimes trigger eof(), so do it first
-        in >> c;
-        w << c;
-      }
-      parse_word(w.str(), result.words.back());
-    }
-    trace(99, "parse2") << "word: " << to_string(result.words.back()) << end();
-  }
-  if (!result.words.empty())
-    out.push_back(result);
-}
-
-void skip_whitespace(istream& in) {
-  while (has_data(in) && isspace(in.peek())) {
-    in.get();
-  }
-}
-
-void skip_comment(istream& in) {
-  if (has_data(in) && in.peek() == '#') {
-    in.get();
-    while (has_data(in) && in.peek() != '\n') in.get();
-  }
-}
-
-line label(string s) {
-  line result;
-  result.words.push_back(word());
-  result.words.back().data = (s+":");
-  return result;
-}
-
-// helper for tests
-void parse_instruction_character_by_character(const string& line_data) {
-  vector<line> out;
-  parse_instruction_character_by_character(line_data, out);
-}
-
-void test_parse2_comment_token_in_middle() {
-  parse_instruction_character_by_character(
-      "a . z\n"
-  );
-  CHECK_TRACE_CONTENTS(
-      "parse2: word: a\n"
-      "parse2: word: z\n"
-  );
-  CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
-  // no other words
-  CHECK_TRACE_COUNT("parse2", 2);
-}
-
-void test_parse2_word_starting_with_dot() {
-  parse_instruction_character_by_character(
-      "a .b c\n"
-  );
-  CHECK_TRACE_CONTENTS(
-      "parse2: word: a\n"
-      "parse2: word: .b\n"
-      "parse2: word: c\n"
-  );
-}
-
-void test_parse2_comment_token_at_start() {
-  parse_instruction_character_by_character(
-      ". a b\n"
-  );
-  CHECK_TRACE_CONTENTS(
-      "parse2: word: a\n"
-      "parse2: word: b\n"
-  );
-  CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
-}
-
-void test_parse2_comment_token_at_end() {
-  parse_instruction_character_by_character(
-      "a b .\n"
-  );
-  CHECK_TRACE_CONTENTS(
-      "parse2: word: a\n"
-      "parse2: word: b\n"
-  );
-  CHECK_TRACE_DOESNT_CONTAIN("parse2: word: .");
-}
-
-void test_parse2_word_starting_with_dot_at_start() {
-  parse_instruction_character_by_character(
-      ".a b c\n"
-  );
-  CHECK_TRACE_CONTENTS(
-      "parse2: word: .a\n"
-      "parse2: word: b\n"
-      "parse2: word: c\n"
-  );
-}
-
-void test_parse2_metadata() {
-  parse_instruction_character_by_character(
-      ".a b/c d\n"
-  );
-  CHECK_TRACE_CONTENTS(
-      "parse2: word: .a\n"
-      "parse2: word: b /c\n"
-      "parse2: word: d\n"
-  );
-}
-
-void test_parse2_string_with_metadata() {
-  parse_instruction_character_by_character(
-      "a \"bc  def\"/disp32 g\n"
-  );
-  CHECK_TRACE_CONTENTS(
-      "parse2: word: a\n"
-      "parse2: word: \"bc  def\" /disp32\n"
-      "parse2: word: g\n"
-  );
-}
-
-void test_parse2_string_with_metadata_at_end() {
-  parse_instruction_character_by_character(
-      "a \"bc  def\"/disp32\n"
-  );
-  CHECK_TRACE_CONTENTS(
-      "parse2: word: a\n"
-      "parse2: word: \"bc  def\" /disp32\n"
-  );
-}
-
-void test_parse2_string_with_metadata_at_end_of_line_without_newline() {
-  parse_instruction_character_by_character(
-      "68/push \"test\"/f"  // no newline, which is how calls from parse() will look
-  );
-  CHECK_TRACE_CONTENTS(
-      "parse2: word: 68 /push\n"
-      "parse2: word: \"test\" /f\n"
-  );
-}
-
-//: Make sure slashes inside strings don't trigger adding stuff from inside the
-//: string to metadata.
-
-void test_parse2_string_containing_slashes() {
-  parse_instruction_character_by_character(
-      "a \"bc/def\"/disp32\n"
-  );
-  CHECK_TRACE_CONTENTS(
-      "parse2: word: \"bc/def\" /disp32\n"
-  );
-}
-
-void test_instruction_with_string_literal_with_escaped_quote() {
-  parse_instruction_character_by_character(
-      "\"a\\\"b\"\n"  // escaped quote inside string
-  );
-  CHECK_TRACE_CONTENTS(
-      "parse2: word: \"a\"b\"\n"
-  );
-  // no other words
-  CHECK_TRACE_COUNT("parse2", 1);
-}
-
-void test_instruction_with_string_literal_with_escaped_backslash() {
-  parse_instruction_character_by_character(
-      "\"a\\\\b\"\n"  // escaped backslash inside string
-  );
-  CHECK_TRACE_CONTENTS(
-      "parse2: word: \"a\\b\"\n"
-  );
-  // no other words
-  CHECK_TRACE_COUNT("parse2", 1);
-}