https://github.com/akkartik/mu/blob/master/011run.cc
  1 //: Running SubX programs on the VM.
  2 
  3 //: (Not to be confused with the 'run' subcommand for running ELF binaries on
  4 //: the VM. That comes later.)
  5 
  6 :(before "End Help Texts")
  7 put_new(Help, "syntax",
  8   "SubX programs consist of segments, each segment in turn consisting of lines.\n"
  9   "Line-endings are significant; each line should contain a single\n"
 10   "instruction, macro or directive.\n"
 11   "\n"
 12   "Comments start with the '#' character. It should be at the start of a word\n"
 13   "(start of line, or following a space).\n"
 14   "\n"
 15   "Each segment starts with a header line: a '==' delimiter followed by the name of\n"
 16   "the segment and a (sometimes approximate) starting address in memory.\n"
 17   "The name 'code' is special; instructions to execute should always go here.\n"
 18   "\n"
 19   "The resulting binary starts running code from a label called 'Entry'\n"
 20   "in the code segment.\n"
 21   "\n"
 22   "Segments with the same name get merged together. This rule helps keep functions and\n"
 23   "their data close together in .subx files.\n"
 24   "You don't have to specify the starting address after the first time.\n"
 25   "\n"
 26   "Lines consist of a series of words. Words can contain arbitrary metadata\n"
 27   "after a '/', but they can never contain whitespace. Metadata has no effect\n"
 28   "at runtime, but can be handy when rewriting macros.\n"
 29   "\n"
 30   "Check out the examples in the examples/ directory.\n"
 31 );
 32 :(before "End Help Contents")
 33 cerr << "  syntax\n";
 34 
 35 :(code)
 36 void test_copy_imm32_to_EAX() {
 37   // At the lowest level, SubX programs are a series of hex bytes, each
 38   // (variable-length) instruction on one line.
 39   run(
 40       // Comments start with '#' and are ignored.
 41       "# comment\n"
 42       // Segment headers start with '==', a name and a starting hex address.
 43       // There's usually one code and one data segment. The code segment
 44       // always comes first.
 45       "== code 0x1\n"  // code segment
 46 
 47       // After the header, each segment consists of lines, and each line
 48       // consists of words separated by whitespace.
 49       //
 50       // All words can have metadata after a '/'. No spaces allowed in
 51       // metadata, of course.
 52       // Unrecognized metadata never causes errors, so you can use it for
 53       // documentation.
pre { line-height: 125%; }
td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl { color: #336699; font-style: italic } /* Name.Label */
.highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #336699; font-weight: bold } /* Name.Property */
.highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #336699 } /* Name.Variable */
.highlight .ow { color: #008800 } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */
.highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
.highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
.highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */
.highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
.highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
.highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
.highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
.highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
.highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
.highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
.highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
.highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
.highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */
.highlight .vc { color: #336699 } /* Name.Variable.Class */
.highlight .vg { color: #dd7700 } /* Name.Variable.Global */
.highlight .vi { color: #3333bb } /* Name.Variable.Instance */
.highlight .vm { color: #336699 } /* Name.Variable.Magic */
.highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */
#
#
#           The Nim Compiler
#        (c) Copyright 2018 Andreas Rumpf
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

## exposes the Nim VM to clients.
import
  ast, modules, condsyms,
  options, llstream, lineinfos, vm,
  vmdef, modulegraphs, idents, os, pathutils,
  scriptconfig, std/[compilesettings, tables]

import pipelines


when defined(nimPreviewSlimSystem):
  import std/[assertions, syncio]

type
  Interpreter* = ref object ## Use Nim as an interpreter with this object
    mainModule: PSym
    graph: ModuleGraph
    scriptName: string
    idgen: IdGenerator

iterator exportedSymbols*(i: Interpreter): PSym =
  assert i != nil
  assert i.mainModule != nil, "no main module selected"
  for s in modulegraphs.allSyms(i.graph, i.mainModule):
    yield s

proc selectUniqueSymbol*(i: Interpreter; name: string;
                         symKinds: set[TSymKind] = {skLet, skVar}): PSym =
  ## Can be used to access a unique symbol of ``name`` and
  ## the given ``symKinds`` filter.
  assert i != nil
  assert i.mainModule != nil, "no main module selected"
  let n = getIdent(i.graph.cache, name)
  var it: ModuleIter
  var s = initModuleIter(it, i.graph, i.mainModule, n)
  result = nil
  while s != nil:
    if s.kind in symKinds:
      if result == nil: result = s
      else: return nil # ambiguous
    s = nextModuleIter(it, i.graph)

proc selectRoutine*(i: Interpreter; name: string): PSym =
  ## Selects a declared routine (proc/func/etc) from the main module.
  ## The routine needs to have the export marker ``*``. The only matching
  ## routine is returned and ``nil`` if it is overloaded.
  result = selectUniqueSymbol(i, name, {skTemplate, skMacro, skFunc,
                                        skMethod, skProc, skConverter})

proc callRoutine*(i: Interpreter; routine: PSym; args: openArray[PNode]): PNode =
  assert i != nil
  result = vm.execProc(PCtx i.graph.vm, routine, args)

proc getGlobalValue*(i: Interpreter; letOrVar: PSym): PNode =
  result = vm.getGlobalValue(PCtx i.graph.vm, letOrVar)

proc setGlobalValue*(i: Interpreter; letOrVar: PSym, val: PNode) =
  ## Sets a global value to a given PNode, does not do any type checking.
  vm.setGlobalValue(PCtx i.graph.vm, letOrVar, val)

proc implementRoutine*(i: Interpreter; pkg, module, name: string;
                       impl: proc (a: VmArgs) {.closure, gcsafe.}) =
  assert i != nil
  let vm = PCtx(i.graph.vm)
  vm.registerCallback(pkg & "." & module & "." & name, impl)

proc evalScript*(i: Interpreter; scriptStream: PLLStream = nil) =
  ## This can also be used to *reload* the script.
  assert i != nil
  assert i.mainModule != nil, "no main module selected"
  initStrTables(i.graph, i.mainModule)
  i.graph.cacheSeqs.clear()
  i.graph.cacheCounters.clear()
  i.graph.cacheTables.clear()
  i.mainModule.ast = nil

  let s = if scriptStream != nil: scriptStream
          else: llStreamOpen(findFile(i.graph.config, i.scriptName), fmRead)
  discard processPipelineModule(i.graph, i.mainModule, i.idgen, s)

proc findNimStdLib*(): string =
  ## Tries to find a path to a valid "system.nim" file.
  ## Returns "" on failure.
  try:
    let nimexe = os.findExe("nim")
      # this can't work with choosenim shims, refs https://github.com/dom96/choosenim/issues/189
      # it'd need `nim dump --dump.format:json . | jq -r .libpath`
      # which we should simplify as `nim dump --key:libpath`
    if nimexe.len == 0: return ""
    result = nimexe.splitPath()[0] /../ "lib"
    if not fileExists(result / "system.nim"):
      when defined(unix):
        result = nimexe.expandSymlink.splitPath()[0] /../ "lib"
        if not fileExists(result / "system.nim"): return ""
  except OSError, ValueError:
    return ""

proc findNimStdLibCompileTime*(): string =
  ## Same as `findNimStdLib` but uses source files used at compile time,
  ## and asserts on error.
  result = querySetting(libPath)
  doAssert fileExists(result / "system.nim"), "result:" & result

proc createInterpreter*(scriptName: string;
                        searchPaths: openArray[string];
                        flags: TSandboxFlags = {},
                        defines = @[("nimscript", "true")],
                        registerOps = true): Interpreter =
  var conf = newConfigRef()
  var cache = newIdentCache()
  var graph = newModuleGraph(cache, conf)
  connectPipelineCallbacks(graph)
  initDefines(conf.symbols)
  for define in defines:
    defineSymbol(conf.symbols, define[0], define[1])

  for p in searchPaths:
    conf.searchPaths.add(AbsoluteDir p)
    if conf.libpath.isEmpty: conf.libpath = AbsoluteDir p

  var m = graph.makeModule(scriptName)
  incl(m.flags, sfMainModule)
  var idgen = idGeneratorFromModule(m)
  var vm = newCtx(m, cache, graph, idgen)
  vm.mode = emRepl
  vm.features = flags
  if registerOps:
    vm.registerAdditionalOps() # Required to register parts of stdlib modules
  graph.vm = vm
  setPipeLinePass(graph, EvalPass)
  graph.compilePipelineSystemModule()
  result = Interpreter(mainModule: m, graph: graph, scriptName: scriptName, idgen: idgen)

proc destroyInterpreter*(i: Interpreter) =
  ## destructor.
  discard "currently nothing to do."

proc registerErrorHook*(i: Interpreter, hook:
                        proc (config: ConfigRef; info: TLineInfo; msg: string;
                              severity: Severity) {.gcsafe.}) =
  i.graph.config.structuredErrorHook = hook

proc runRepl*(r: TLLRepl;
              searchPaths: openArray[string];
              supportNimscript: bool) =
  ## deadcode but please don't remove... might be revived
  var conf = newConfigRef()
  var cache = newIdentCache()
  var graph = newModuleGraph(cache, conf)

  for p in searchPaths:
    conf.searchPaths.add(AbsoluteDir p)
    if conf.libpath.isEmpty: conf.libpath = AbsoluteDir p

  conf.cmd = cmdInteractive # see also `setCmd`
  conf.setErrorMaxHighMaybe
  initDefines(conf.symbols)
  defineSymbol(conf.symbols, "nimscript")
  if supportNimscript: defineSymbol(conf.symbols, "nimconfig")
  when hasFFI: defineSymbol(graph.config.symbols, "nimffi")
  var m = graph.makeStdinModule()
  incl(m.flags, sfMainModule)
  var idgen = idGeneratorFromModule(m)

  if supportNimscript: graph.vm = setupVM(m, cache, "stdin", graph, idgen)
  setPipeLinePass(graph, InterpreterPass)
  graph.compilePipelineSystemModule()
  discard processPipelineModule(graph, m, idgen, llStreamOpenStdIn(r))
51" class="LineNr">351 void test_negative_number_too_small() { 352 Hide_errors = true; 353 parse_and_load( 354 "== code 0x1\n" 355 "01 -12345\n" 356 ); 357 CHECK_TRACE_CONTENTS( 358 "error: token '-12345' is not a hex byte\n" 359 ); 360 } 361 362 void test_hex_prefix() { 363 parse_and_load( 364 "== code 0x1\n" 365 "0x01 -0x02\n" 366 ); 367 CHECK_TRACE_COUNT("error", 0); 368 } 369 370 void test_repeated_segment_merges_data() { 371 parse_and_load( 372 "== code 0x1\n" 373 "11 22\n" 374 "== code\n" // again 375 "33 44\n" 376 ); 377 CHECK_TRACE_CONTENTS( 378 "parse: new segment 'code'\n" 379 "parse: appending to segment 'code'\n" 380 // first segment 381 "load: 0x00000001 -> 11\n" 382 "load: 0x00000002 -> 22\n" 383 // second segment 384 "load: 0x00000003 -> 33\n" 385 "load: 0x00000004 -> 44\n" 386 ); 387 } 388 389 void test_error_on_missing_segment_header() { 390 Hide_errors = true; 391 parse_and_load( 392 "01 02\n" 393 ); 394 CHECK_TRACE_CONTENTS( 395 "error: input does not start with a '==' section header\n" 396 ); 397 } 398 399 void test_error_on_uppercase_hex() { 400 Hide_errors = true; 401 parse_and_load( 402 "== code\n" 403 "01 Ab\n" 404 ); 405 CHECK_TRACE_CONTENTS( 406 "error: uppercase hex not allowed: Ab\n" 407 ); 408 } 409 410 //: helper for tests 411 void parse_and_load(const string& text_bytes) { 412 program p; 413 istringstream in(text_bytes); 414 parse(in, p); 415 if (trace_contains_errors()) return; // if any stage raises errors, stop immediately 416 load(p); 417 } 418 419 //:: run 420 421 :(before "End Initialize Op Names") 422 put_new(Name, "b8", "copy imm32 to EAX (mov)"); 423 424 //: our first opcode 425 426 :(before "End Single-Byte Opcodes") 427 case 0xb8: { // copy imm32 to EAX 428 const int32_t src = next32(); 429 trace(Callstack_depth+1, "run") << "copy imm32 0x" << HEXWORD << src << " to EAX" << end(); 430 Reg[EAX].i = src; 431 break; 432 } 433 434 :(code) 435 void test_copy_imm32_to_EAX_again() { 436 run( 437 "== code 0x1\n" // code segment 438 // op ModR/M SIB displacement immediate 439 " b8 0a 0b 0c 0d \n" // copy 0x0d0c0b0a to EAX 440 ); 441 CHECK_TRACE_CONTENTS( 442 "run: copy imm32 0x0d0c0b0a to EAX\n" 443 ); 444 } 445 446 // read a 32-bit int in little-endian order from the instruction stream 447 int32_t next32() { 448 int32_t result = read_mem_i32(EIP); 449 EIP+=4; 450 return result; 451 } 452 453 //:: helpers 454 455 string to_string(const word& w) { 456 ostringstream out; 457 out << w.data; 458 for (int i = 0; i < SIZE(w.metadata); ++i) 459 out << " /" << w.metadata.at(i); 460 return out.str(); 461 } 462 463 bool contains_uppercase(const string& s) { 464 for (int i = 0; i < SIZE(s); ++i) 465 if (isupper(s.at(i))) return true; 466 return false; 467 }