summary refs log tree commit diff stats
path: root/note/really.html
blob: 8585689dc88202684c1f917da51e3951ba7347db (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
	<head>
		<title>"Really" is a confusing word</title>
		<link rel="stylesheet" href="/style.css" />
		<meta name="viewport" content="width=device-width, initial-scale=1.0" />
		<link rel="shortcut icon" href="/favicon.ico" type="image/x-icon" />
		<meta charset="utf-8" />
	</head>
	<body>
		<h1>"Really" is a confusing word</h1>
		<p>
		Often times when the word "really" is used in as an adverb in an assertion or proposition, it may be useful to replace it with "in some sense", and seek to ask or clarify "in what sense". It shall be recognized that questions that arise may be legitimately hard to answer, but it may be still beneficial to consider them.
		</p>
		<p>The same applies for "actually" and similar words when used this way, though in my experience, people tend to use "really" this way. I do so too, and I'm trying to get rid of this habit.</p>
		<div id="footer">
			<hr />
			<p><a href="/">Runxi Yu's Website</a></p>
			<p>Unless otherwise specified with the "<span class="copyright">copyright</span>" HTML/CSS class, works hosted on this subdomain (<code>www.andrewyu.org</code>) served with the HTTP(S) protocol is available under <a href="https://www.andrewyu.org/note/pubdom.html">Runxi Yu's Public Domain Dedication</a>.</p>
		</div>
	</body>
</html>
n.Pseudo */ .highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */ .highlight .vc { color: #336699 } /* Name.Variable.Class */ .highlight .vg { color: #dd7700 } /* Name.Variable.Global */ .highlight .vi { color: #3333bb } /* Name.Variable.Instance */ .highlight .vm { color: #336699 } /* Name.Variable.Magic */ .highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */
//: The bedrock level 1 of abstraction is now done, and we're going to start
//: building levels above it that make programming in x86 machine code a
//: little more ergonomic.
//:
//: All levels will be "pass through by default". Whatever they don't
//: understand they will silently pass through to lower levels.
//:
//: Since raw hex bytes of machine code are always possible to inject, SubX is
//: not a language, and we aren't building a compiler. This is something
//: deliberately leakier. Levels are more for improving auditing, checks and
//: error messages rather than for hiding low-level details.

//: Translator workflow: read 'source' file. Run a series of transforms on it,
//: each passing through what it doesn't understand. The final program should
//: be just machine code, suitable to write to an ELF binary.
//:
//: Higher levels usually transform code on the basis of metadata.

:(before "End Main")
if (is_equal(argv[1], "translate")) {
  START_TRACING_UNTIL_END_OF_SCOPE;
  reset();
  program p;
  string output_filename;
  for (int i = /*skip 'subx translate'*/2;  i < argc;  ++i) {
    if (is_equal(argv[i], "-o")) {
      ++i;
      if (i >= argc) {
        print_translate_usage();
        cerr << "'-o' must be followed by a filename to write results to\n";
        exit(1);
      }
      output_filename = argv[i];
    }
    else {
      ifstream fin(argv[i]);
      if (!fin) {
        cerr << "could not open " << argv[i] << '\n';
        return 1;
      }
      parse(fin, p);
      if (trace_contains_errors()) return 1;
    }
  }
  if (p.segments.empty()) {
    print_translate_usage();
    cerr << "nothing to do; must provide at least one file to read\n";
    exit(1);
  }
  if (output_filename.empty()) {
    print_translate_usage();
    cerr << "must provide a filename to write to using '-o'\n";
    exit(1);
  }
  transform(p);
  if (trace_contains_errors()) return 1;
  save_elf(p, output_filename);
  if (trace_contains_errors()) {
    unlink(output_filename.c_str());
    return 1;
  }
  return 0;
}

:(code)
void print_translate_usage() {
  cerr << "Usage: subx translate file1 file2 ... -o output\n";
}

// write out a program to a bare-bones ELF file
void save_elf(const program& p, const string& filename) {
  ofstream out(filename.c_str(), ios::binary);
  write_elf_header(out, p);
  for (size_t i = 0;  i < p.segments.size();  ++i)
    write_segment(p.segments.at(i), out);
  out.close();
}

void write_elf_header(ostream& out, const program& p) {
  char c = '\0';
#define O(X)  c = (X); out.write(&c, sizeof(c))
// host is required to be little-endian
#define emit(X)  out.write(reinterpret_cast<const char*>(&X), sizeof(X))
  //// ehdr
  // e_ident
  O(0x7f); O(/*E*/0x45); O(/*L*/0x4c); O(/*F*/0x46);
    O(0x1);  // 32-bit format
    O(0x1);  // little-endian
    O(0x1); O(0x0);
  for (size_t i = 0;  i < 8;  ++i) { O(0x0); }
  // e_type
  O(0x02); O(0x00);
  // e_machine
  O(0x03); O(0x00);
  // e_version
  O(0x01); O(0x00); O(0x00); O(0x00);
  // e_entry
  int e_entry = p.segments.at(0).start;  // convention
  emit(e_entry);
  // e_phoff -- immediately after ELF header
  int e_phoff = 0x34;
  emit(e_phoff);
  // e_shoff; unused
  int dummy32 = 0;
  emit(dummy32);
  // e_flags; unused
  emit(dummy32);
  // e_ehsize
  uint16_t e_ehsize = 0x34;
  emit(e_ehsize);
  // e_phentsize
  uint16_t e_phentsize = 0x20;
  emit(e_phentsize);
  // e_phnum
  uint16_t e_phnum = SIZE(p.segments);
  emit(e_phnum);
  // e_shentsize
  uint16_t dummy16 = 0x0;
  emit(dummy16);
  // e_shnum
  emit(dummy16);
  // e_shstrndx
  emit(dummy16);

  uint32_t p_offset = /*size of ehdr*/0x34 + SIZE(p.segments)*0x20/*size of each phdr*/;
  for (int i = 0;  i < SIZE(p.segments);  ++i) {
    //// phdr
    // p_type
    uint32_t p_type = 0x1;
    emit(p_type);
    // p_offset
    emit(p_offset);
    // p_vaddr
    uint32_t p_start = p.segments.at(i).start;
    emit(p_start);
    // p_paddr
    emit(p_start);
    // p_filesz
    uint32_t size = num_words(p.segments.at(i));
    assert(p_offset + size < SEGMENT_SIZE);
    emit(size);
    // p_memsz
    emit(size);
    // p_flags
    uint32_t p_flags = (i == 0) ? /*r-x*/0x5 : /*rw-*/0x6;  // convention: only first segment is code
    emit(p_flags);

    // p_align
    // "As the system creates or augments a process image, it logically copies
    // a file's segment to a virtual memory segment.  When—and if— the system
    // physically reads the file depends on the program's execution behavior,
    // system load, and so on.  A process does not require a physical page
    // unless it references the logical page during execution, and processes
    // commonly leave many pages unreferenced. Therefore delaying physical
    // reads frequently obviates them, improving system performance. To obtain
    // this efficiency in practice, executable and shared object files must
    // have segment images whose file offsets and virtual addresses are
    // congruent, modulo the page size." -- http://refspecs.linuxbase.org/elf/elf.pdf (page 95)
    uint32_t p_align = 0x1000;  // default page size on linux
    emit(p_align);
    if (p_offset % p_align != p_start % p_align) {
      raise << "segment starting at 0x" << HEXWORD << p_start << " is improperly aligned; alignment for p_offset " << p_offset << " should be " << (p_offset % p_align) << " but is " << (p_start % p_align) << '\n' << end();
      return;
    }

    // prepare for next segment
    p_offset += size;
  }
#undef O
#undef emit
}

void write_segment(const segment& s, ostream& out) {
  for (int i = 0;  i < SIZE(s.lines);  ++i) {
    const vector<word>& w = s.lines.at(i).words;
    for (int j = 0;  j < SIZE(w);  ++j) {
      uint8_t x = hex_byte(w.at(j).data);  // we're done with metadata by this point
      out.write(reinterpret_cast<const char*>(&x), /*sizeof(byte)*/1);
    }
  }
}

uint32_t num_words(const segment& s) {
  uint32_t sum = 0;
  for (int i = 0;  i < SIZE(s.lines);  ++i)
    sum += SIZE(s.lines.at(i).words);
  return sum;
}

:(before "End Includes")
using std::ios;