about summary refs log blame commit diff stats
path: root/index.html
blob: 98df9a9ee39e76f87d1bce8fec89263209dcc795 (plain) (tree)
1
2
3
4
5
6
7
8
9

                 
                                                                                                                
   

                                                               
   

      
 


                                                                                    
 
   


                                                                                                                  
 
                                                 


                                                                          
                                                                             

                                                                          
                                                                           

                                                                            
                                                                              
                                   


                                                                                                 
                                              
                                                                        
                                                                              
            


                                                                          

                                                                                                                    
                                                                          



                                                                           


                                                                             
                                                                                                                                                 

     



                                                                                                            
                                      
 



                                                                                                                           
                                                                                   
                                                                            
                                                                        
                                              
                                                                        

                                                                          

                                                                                                                          
                                                                       
 
                                                                             
                 
 
                                                                    
                                                       
                                                                              
                                                        
                                                                      
                                                                         
                                                                        
                                                                       
                                                                           
                                                                       
                                                                              
                                                              
                                                                               
                                                 




                                                                            










                                                                               
                                                                         



                                                                             

                                                                                      

                                                                                                    
 


                                                                               
                                                                            





                                                                              

                                                                                    

                                                                           
                                                                           
                                                                        


                                                                                                 


                                                                                      
 













                                                                                                           

                                                                              
 
                                                                                               

                            
                                                                                                                        

                                                                                                 


                                                                              
                                                                                        
                                     




                                                                            
                                                                            

                                                                  
                                                                              
                              
                                                                      

                                                                         
                                                                       

                                                                           
                                                                              
                            




                                                                            
                                                                               
                                                                   
                                           
                                                                             

                                                                                         
                                        












                                                                                                                          
                                                               
 
                                 
 
                                                                    
                                                                            

                                                                            
                                                                        
                           








                                                                                                                                         

                                                                         
 
                                                           
 

                                                                       
                                                                           
                                           
                                                                         
                                                   

                                                                          
                                                            
                                                                           
                                                     
                                                              











                                                                                 
 
                                                                      
                                              
                                                                       
                                                                       

                                                                    




                                                                                                              
 


                                                                         








                                                                                                     
                                                                            
                                     

                                                                               
                                                                                   




                                                                               

                                                                              
                                                                                 

                                                                              



                                                                              
                                                                  
                                                                             
                                      
 

    


              



                                                    

     


                      








                                                                                  
                                                                                        




                                                                             
 
                               
<title>Mu</title>

With apologies to <a href='http://en.wikipedia.org/wiki/Mu_%28negative%29#In_popular_culture'>Robert Pirsig</a>:
<p>
<div style='font-style: italic; margin-left:2em'>
Is it a language, or an operating system, or a virtual machine?
<p>
Mu.
</div>

Read these first: <b><a href='http://akkartik.name/about'>problem statement</a></b>,
<b><a href='http://github.com/akkartik/mu#readme'>trying out Mu</a></b>.
(Mu requires minimal dependencies.)

<p>
Mu's code looks quite alien, requiring editors to be specially configured to
colorize it in a sane manner. So this page provides links to the source files
showing how it currently looks in my <a href='https://github.com/akkartik/mu/blob/master/mu.vim'>custom setup</a>.

<p>Whetting your appetite, some example programs:

<ul>
<li><a href='html/x.mu.html'>x.mu</a>: a simple program to add two numbers
together. Shows that at bottom Mu is a simple VM bytecode designed to convert
directly to machine code.
<li><a href='html/factorial.mu.html'>factorial.mu</a>: everyone's favorite
example, showing how Mu supports conditionals and loops without any special
syntax, using the special labels '{' and '}'.
<li><a href='html/tangle.mu.html'>tangle.mu</a>: another (contrived) version
of factorial showing Mu's ability to 'tangle' code from multiple places into a
single function or <em>recipe</em>.
<li>simple examples showing off support for concurrency: <a href='html/fork.mu.html'>fork.mu</a>,
<a href='html/channel.mu.html'>channel.mu</a>
<li>simple examples showing off hardware control: <a href='html/display.mu.html'>display.mu</a>,
<a href='html/console.mu.html'>console.mu</a>.
<li><a href='html/screen.mu.html'>screen.mu</a>: example program showing
print primitives that inject a 'screen' <em>dependency</em> which can be faked
for testing.
<li><a href='html/filesystem.mu.html'>filesystem.mu</a>: example program
showing file primitives that inject a 'filesystem' dependency which can be
faked for testing.
<li><a href='html/http-client.mu.html'>http-client.mu</a> and <a href='html/http-server.mu.html'>http-server.mu</a>,
examples of Mu's testable high-level interfaces to the network.
<li><a href='html/static-dispatch.mu.html'>static-dispatch.mu</a>: example
program showing mu's ability to define recipes with headers, and thereby to
allow functions with the same name but arguments of different types to
coexist.
<li><a href='html/counters.mu.html'>counters.mu</a>: lexical scope
<li><a href='html/chessboard.mu.html'>chessboard.mu</a>: a little program for
2 people to play chess, with thorough tests of its behavior including both
screen and keyboard handling.
<li><a href='html/nqueens.mu.html'>nqueens.mu</a>: a solution to the <a href='http://rosettacode.org/wiki/N-queens_problem'>N queens problem</a>.
</ul>

Now a listing of every layer in mu. Recall that you can <a href='http://akkartik.name/post/wart-layers'>stop
loading at any layer and get a valid program to run with a subset of features,
that passes all its tests</a>.

<p><b>Part I</b>: basic infrastructure

<p/><a href='html/000organization.cc.html'>000organization.cc</a>: the basic
skeleton program. Compiles and runs but doesn't do much. Later <em>layers</em>
hook into this skeleton to add functionality. Mu's guarantee: you can <a href='http://youtube.com/watch?v=c8N72t7aScY'>load
features</a> up until any layer, and it will compile and pass all tests until
that point. <a href='http://akkartik.name/post/wart-layers'>More details &rarr;</a>
<br/><a href='html/001help.cc.html'>001help.cc</a>: just a simple test layer
to show how to hook into the skeleton. Also summarizes how to invoke Mu,
behaviors that later layers will be providing.
<br/><a href='html/002test.cc.html'>002test.cc</a>: Mu's minimalist test
harness, relying on a couple of one-liners in the <tt>build</tt> script to
auto-generate lists of tests to run.
<br/><a href='html/003trace.cc.html'>003trace.cc</a>: support for logging
facts about our program, and for <a href='http://akkartik.name/post/tracing-tests'>checking the facts logged in tests</a>.
(<a href='html/003trace.test.cc.html'>tests for the tracing system</a>)

<p><b>Part II</b>: the core Mu virtual machine, designed to compile easily to
machine language.

<p/><a href='html/010vm.cc.html'>010vm.cc</a>: core data structures:
recipes, instructions and <em>reagents</em> (operands).
<br/><a href='html/011load.cc.html'>011load.cc</a>: the textual representation
of recipes and how it's turned into the data structures.
<br/><a href='html/012transform.cc.html'>012transform.cc</a>: after Mu
programs are loaded but before they are run they can be transformed in an
extensible manner akin to lisp macros. Think of this as the core of Mu's
&lsquo;compiler&rsquo; for providing high-level features atop the core.
<br/><a href='html/013update_operation.cc.html'>013update_operation.cc</a>:
our first transform: check for unknown recipes before the program runs.
<br/><a href='html/014literal_string.cc.html'>014literal_string.cc</a>: extend
the loader to support literal strings in various instructions.
<br/><a href='html/015literal_noninteger.cc.html'>015literal_noninteger.cc</a>:
extend the loader to support non-integer numbers.
<br/><a href='html/016dilated_reagent.cc.html'>016dilated_reagent.cc</a>:
allowing whitespace in reagents.
<br/><a href='html/017parse_tree.cc.html'>017parse_tree.cc</a>: a new syntax
for representing complex types as trees using whitespace and parentheses
(s-expressions).
<br/><a href='html/018type_abbreviations.cc.html'>018type_abbreviations.cc</a>:
the core types of Mu are designed to be fully explicit and familiar to
non-programmers at the cost of some verbosity: <tt>number</tt>,
<tt>character</tt>, <tt>boolean</tt>, etc. Once learners get acclimatized,
we can teach them abbreviated forms that are familiar to veteran programmers:
<tt>num</tt>, <tt>char</tt>, <tt>bool</tt>. Mu's facility for type
abbreviations is extensible: learners can abbreviate <tt>number</tt> to
<tt>n</tt> if they so choose, thereby exploring such trade-offs. You can also
create abbreviations suitable for a specific program, like abbreviating
<tt>address:array:address:array:character</tt> to <tt>board</tt> for say a
tic-tac-toe or chess program. Think C's <tt>typedef</tt> statement.
<br/><a href='html/020run.cc.html'>020run.cc</a>: executing Mu recipes by
executing the list of instructions they contain. Future layers will define
more primitive operations that can be used in instructions.
<br/><a href='html/021check_instruction.cc.html'>021check_instruction.cc</a>:
harness for adding per-primitive checks to run before running a program.

<p/>Various primitive operations: on <a href='html/022arithmetic.cc.html'>numbers</a>,
<a href='html/023boolean.cc.html'>booleans</a>, for <a href='html/024jump.cc.html'>control flow</a>,
and <a href='html/025compare.cc.html'>comparing values</a>.

<p/>Support for <a href='html/026call.cc.html'>defining new recipes</a>. In
Mu calls to functions look just like primitive operations, with the ability to 
<a href='html/027call_ingredient.cc.html'>pass in ingredients</a>, and to 
<a href='html/028call_return.cc.html'>return products</a>. In particular, Mu
supports returning multiple values, and uses this ability far more pervasively
than high-level languages can.

<p/>Support for various data structures: heterogeneous compound types called
<a href='html/030container.cc.html'><em>containers</em></a>, akin to records
or structs, homogeneous <a href='html/032array.cc.html'>arrays</a> of a single
type of value (type <tt>array</tt> conventionally abbreviated as <tt>@</tt>),
and <a href='html/033exclusive_container.cc.html'><em>exclusive containers</em></a>,
akin to C unions but with a tag so each value knows its &lsquo;kind&rsquo;.
Out of these primitive types, Mu builds the usual and growing menagerie of
data structures: <a href='html/064list.mu.html'>linked lists</a> permitting
fast insertion and deletion and unidirectional scanning but slow search;
<a href='html/065duplex_list.mu.html'><em>duplex lists</em></a> that permit
bidirectional scanning; <a href='html/070table.mu.html'>associative arrays or <em>tables</em></a>
for fast insertion, deletion and search using <a href='html/069hash.cc.html'>hash</a>
functions; <a href='html/066stream.mu.html'><em>streams</em></a> for scanning
through strings incrementally; and <a href='html/061text.mu.html'><em>buffers</em></a>
for gradually constructing long strings in a piecemeal fashion.

<p/>Dynamic memory management: Mu supports <a href='html/034address.cc.html'>allocating</a>
space at run-time as pointers or <em>addresses</em>. All Mu instructions can
dereference or <a href='html/035lookup.cc.html'><em>lookup</em></a> addresses
of values in addition to operating on regular values. These addresses are
manually managed like C. However, all allocations are transparently
reference-counted or <a href='html/036refcount.cc.html'><em>refcounted</em></a>,
with every copy of a pointer updating refcounts appropriately. When the
refcount of an allocation drops to zero it is transparently <a href='html/037abandon.cc.html'>reclaimed</a>
and made available to future allocations. By construction it is impossible to
reclaim memory prematurely, while some other part of a program is still
pointing to it. This eliminates a whole class of undefined behavior and
security vulnerabilities that plague C. Compared to Rust, Mu pays some
additional runtime cost in exchange for C-like flexibility (you can copy
addresses around all you like, and write from any copy of an address) and
simpler implementation (no static analysis). Mu by convention abbreviates type
<tt>address</tt> to <tt>&amp;</tt>.

<p/>Support for higher-order recipes that can pass <a href='html/071recipe.cc.html'>recipes</a>
around like any other value.

<p/>Support for running multiple functions concurrently using <a href='html/072scheduler.cc.html'><em>routines</em></a>,
for communicating between routines using <a href='html/075channel.mu.html'><em>channels</em></a>,
and for <a href='html/073wait.cc.html'>synchronizing</a> between routines.
Channels are Mu's only synchronization primitive, queues that can cause the
routine reading or writing from them to stall without taking up CPU resources.
Mu provides safe concurrency by forbidding routines from sharing addresses;
writing to a channel always performs a <a href='html/074deep_copy.cc.html'>deep copy</a>
that preserves all internal aliasing.

<p><b>Part III</b>: transforms to make Mu a little more expressive, and give
it some of the benefits of a high-level language.

<p/><a href='html/040brace.cc.html'>040brace.cc</a> and
<a href='html/041jump_target.cc.html'>041jump_target.cc</a>: how Mu provides
structured goto-less programming without introducing the syntax of
conditionals and loops other languages require.
<br/><a href='html/042name.cc.html'>042name.cc</a>: how Mu transforms variable
names to raw memory addresses.
<br/><a href='html/043space.cc.html'>043space.cc</a>: how variables in
different routines are isolated from each other using <em>spaces</em>. Mu
&lsquo;local variables&rsquo; are allocated on the heap.
<br/><a href='html/044space_surround.cc.html'>044space_surround.cc</a>:
Chaining spaces together to accomodate variables with varying lifetimes and
ownership properties.
<br/><a href='html/045closure_name.cc.html'>045closure_name.cc</a>: how spaces
can implement lexical scope.
<br/><a href='html/046global.cc.html'>046global.cc</a>: experimental support
for 'global' variables that are always available inside a single routine. Mu
has no variables that are available transparently across routines, and this
might go away as well. Global variables are currently not checked as
rigorously as the rest of the type system.
<br/><a href='html/047check_type_by_name.cc.html'>047check_type_by_name.cc</a>:
a transform to deduce missing types in instructions on the basis of
previous instructions in the same function.
<br/><a href='html/050scenario.cc.html'>050scenario.cc</a>: Mu's first syntax
&mdash; not for code but for tests. (<a href='html/051scenario_test.mu.html'>example</a>)
<br/><a href='html/052tangle.cc.html'>052tangle.cc</a>: support for layers in
Mu programs. They've been so good to us.
<br/><a href='html/053recipe_header.cc.html'>053recipe_header.cc</a>:
a new syntax for summarizing the number and types of ingredients and products
a function expects at the top next to its name, in a <em>header</em>.
<br/><a href='html/054static_dispatch.cc.html'>054static_dispatch.cc</a>:
allowing multiple variants of a function to coexist as long as each has a
unique header.
<br/>Support for generic or <em>shape-shifting</em> <a href='html/055shape_shifting_container.cc.html'>data structures</a>
and <a href='html/056shape_shifting_recipe.cc.html'>recipes</a>, containing
wildcard type ingredients that start with an &lsquo;_&rsquo;. Everytime you
use a shape-shifting recipe with a new set of &lsquo;concrete&rsquo; types
for its type ingredients, it creates a new variant of the recipe for you
matching those types.
<br/><a href='html/057immutable.cc.html'>057immutable.cc</a>, a static analysis to
ensure that functions never modify anything but their products.

<p><b>Part IV</b>: Miscellaneous.

<p/><a href='html/061text.mu.html'>061text.mu</a>: strings in Mu are
bounds-checked rather than null-terminated. They're also unicode-aware (code
points only; no control characters, no combining characters, no
normalization). Mu recipes that take strings can take literal strings thanks
to a <a href='html/060rewrite_literal_string.cc.html'>transform</a> that
allocates them on the heap.
<br/><a href='html/062convert_ingredients_to_text.cc.html'>062convert_ingredients_to_text.cc</a>:
a convenience transform primarily intended to provide the illusion of dynamic
typing when adding to the trace. The <span style='font-family:courier,fixed'>stash</span>
command can print any number of ingredients of any type into the trace. Its
default output format is fairly simplistic, but it can be overridden for
arbitrary types by defining a variant of the <a href='html/058to_text.cc.html'><span style='font-family:courier,fixed'>to-text</span></a>
function with an ingredient of the appropriate type. For example, see
<a href='html/064list.mu.html'>064list.mu</a> which defines how we trace
lists.
<br/><a href='html/067random.cc.html'>067random.cc</a>: a random-number
generator with a <a href='html/068random.mu.html'>testable</a> interface.

<p><b>Part V</b>: Primitives for interfacing with hardware.

<p/><a href='html/080display.cc.html'>080display.cc</a>: primitives for
accessing the keyboard and screen.
<br/><a href='html/081print.mu.html'>081print.mu</a>: helpers that can swap
the real screen with fake ones for testing.
<br/><a href='html/082scenario_screen.cc.html'>082scenario_screen.cc</a>:
writing tests that check what is printed to screen.
(<a href='html/083scenario_screen_test.mu.html'>examples</a>)
<br/><a href='html/084console.mu.html'>084console.mu</a>: helpers that can
swap the real keyboard and mouse with fake ones for testing.
<br/><a href='html/085scenario_console.cc.html'>085scenario_console.cc</a>:
writing tests for keyboard and mouse using the fakes.
(<a href='html/086scenario_console_test.mu.html'>examples</a>)
<br/><a href='html/087file.cc.html'>087file.cc</a>: primitives for accessing
the file system.
<br/><a href='html/088file.mu.html'>088file.mu</a>: helpers that permit
swapping a fake filesystem or <tt>resources</tt> object for testing.
<br/><a href='html/089scenario_filesystem.cc.html'>089scenario_filesystem.cc</a>:
writing tests for filesystem using the fakes.
(<a href='html/090scenario_filesystem_test.mu.html'>examples</a>)
<br/><a href='html/091socket.cc.html'>091socket.cc</a>: primitives for
accessing the network.
<br/><a href='html/092socket.mu.html'>092socket.mu</a>: helpers for the
network. In Mu you create a fake network &lsquo;neighborhood&rsquo; the same
way you create a fake local file system.

<p/><a href='html/029tools.cc.html'>029tools.cc</a>: various primitive
operations to help with testing and debugging.
<br/><a href='html/100trace_browser.cc.html'>100trace_browser.cc</a>: a
zoomable UI for inspecting traces generated by Mu programs. Allows both
scanning a high-level view and drilling down into selective details.

<p><b>Part VI</b>: An environment that watches programmers as they
manually test their code, and turns these interactive sessions into reproducible
test scenarios. The <a href='https://github.com/akkartik/mu/blob/master/edit/Readme.md'>readme for the app</a>
contains instructions for running it. Stop loading after each of these layers
to get a working version with just fewer features. 

<p/><a href='html/edit/001-editor.mu.html'>edit/001-editor.mu</a>: data
structures for a text editor widget. Load just this layer to see just the
rendering and line-wrapping at work.
<br/><a href='html/edit/002-typing.mu.html'>edit/002-typing.mu</a>: support
for moving the cursor anywhere with the mouse and typing text in there.
<br/><a href='html/edit/003-shortcuts.mu.html'>edit/003-shortcuts.mu</a>:
support for various keyboard shortcuts for manipulating text you've typed in.
<br/><a href='html/edit/004-programming-environment.mu.html'>edit/004-programming-environment.mu</a>:
combining two text editor widgets, one on the left, one on the right.
<br/><a href='html/edit/005-sandbox.mu.html'>edit/005-sandbox.mu</a>: support
for running mu code in the right-hand widget using code from the left, and
displaying results in a <em>sandbox</em> below on the right. You can have
multiple sandboxes, and hit F4 to rerun them all at any time with the latest
version of the code on the left side.
<br/><a href='html/edit/006-sandbox-copy.mu.html'>edit/006-sandbox-edit.mu</a>:
click on the 'copy' button in each sandbox to duplicate its contents.
<br/><a href='html/edit/007-sandbox-delete.mu.html'>edit/007-sandbox-delete.mu</a>:
support for the 'delete' button in each sandbox.
<br/><a href='html/edit/008-sandbox-edit.mu.html'>edit/008-sandbox-edit.mu</a>:
click on the 'edit' button of each sandbox to pop its back into the sandbox
editor. Basically like copying and then deleting a sandbox.
<br/><a href='html/edit/009-sandbox-test.mu.html'>edit/009-sandbox-test.mu</a>:
click on the results of a sandbox to turn them green and save the output as
golden/expected. Any future changes to the output will then be flagged in red.
<br/><a href='html/edit/010-sandbox-trace.mu.html'>edit/010-sandbox-trace.mu</a>:
click on code in a sandbox to open up a drawer containing its trace. The trace
can be added to using the <span style='font-family:courier,fixed'>stash</span>
command, which can be extended to render arbitrary data structures by creating
new variants of the <span style='font-family:courier,fixed'>to-text</span>
recipe for the appropriate types.
<br/><a href='html/edit/011-errors.mu.html'>edit/011-errors.mu</a>:
support for rendering errors on both the left and in each sandbox.
<br/><a href='html/edit/012-editor-undo.mu.html'>edit/012-editor-undo.mu</a>:
support for undo in the editor widget.

<hr>

<p>
The zen of mu:
<ul>
<li>traces, not interfaces
<li>be rewrite-friendly, not backwards-compatible
<li>be easy to port rather than portable
<li>global structure matters more than local hygiene
</ul>

<p>
Mu's vision of utopia:
<ul>
<li>Run your devices in 1/1000th the code.
<li>1000x more forks for open source projects.
<li>Make simple changes to any project in an afternoon, no matter how large it is.
<li>Projects don't slow down with age, they continue to evolve just as fast as
when they were first started.
<li>All software rewards curiosity, allowing anyone to query its design
decisions, gradually learn how to tweak it, try out increasingly radical
redesign ideas in a sandbox. People learn programming as an imperceptible side
effect of tinkering with the projects they care about.
<li><a href='http://akkartik.name/post/habitability'>Habitable</a> digital environments.
<li>A <em>literate</em> digital society with widespread skills for
comprehending large-scale software structure and comparing-and-contrasting
similar solutions. (I don't think anybody is literate by this definition
today. All we can do easily is read our own programs that we wrote recently.)
</ul>

<p style='margin-bottom: 2em'/>
ss="p">, "rm32 = bitwise AND of r32 with rm32 (and)"); :(code) void test_and_r32_with_r32() { Reg[EAX].i = 0x0a0b0c0d; Reg[EBX].i = 0x000000ff; run( "== code 0x1\n" // code segment // op ModR/M SIB displacement immediate " 21 d8 \n" // and EBX with destination EAX // ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) ); CHECK_TRACE_CONTENTS( "run: and EBX with r/m32\n" "run: r/m32 is EAX\n" "run: storing 0x0000000d\n" ); } :(before "End Single-Byte Opcodes") case 0x21: { // and r32 with r/m32 const uint8_t modrm = next(); const uint8_t arg2 = (modrm>>3)&0x7; trace(Callstack_depth+1, "run") << "and " << rname(arg2) << " with r/m32" << end(); // bitwise ops technically operate on unsigned numbers, but it makes no // difference int32_t* signed_arg1 = effective_address(modrm); *signed_arg1 &= Reg[arg2].i; trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *signed_arg1 << end(); SF = (*signed_arg1 >> 31); ZF = (*signed_arg1 == 0); CF = false; OF = false; trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end(); break; } //:: or :(before "End Initialize Op Names") put_new(Name, "09", "rm32 = bitwise OR of r32 with rm32 (or)"); :(code) void test_or_r32_with_r32() { Reg[EAX].i = 0x0a0b0c0d; Reg[EBX].i = 0xa0b0c0d0; run( "== code 0x1\n" // code segment // op ModR/M SIB displacement immediate " 09 d8 \n" // or EBX with destination EAX // ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) ); CHECK_TRACE_CONTENTS( "run: or EBX with r/m32\n" "run: r/m32 is EAX\n" "run: storing 0xaabbccdd\n" ); } :(before "End Single-Byte Opcodes") case 0x09: { // or r32 with r/m32 const uint8_t modrm = next(); const uint8_t arg2 = (modrm>>3)&0x7; trace(Callstack_depth+1, "run") << "or " << rname(arg2) << " with r/m32" << end(); // bitwise ops technically operate on unsigned numbers, but it makes no // difference int32_t* signed_arg1 = effective_address(modrm); *signed_arg1 |= Reg[arg2].i; trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *signed_arg1 << end(); SF = (*signed_arg1 >> 31); ZF = (*signed_arg1 == 0); CF = false; OF = false; trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end(); break; } //:: xor :(before "End Initialize Op Names") put_new(Name, "31", "rm32 = bitwise XOR of r32 with rm32 (xor)"); :(code) void test_xor_r32_with_r32() { Reg[EAX].i = 0x0a0b0c0d; Reg[EBX].i = 0xaabbc0d0; run( "== code 0x1\n" // code segment // op ModR/M SIB displacement immediate " 31 d8 \n" // xor EBX with destination EAX // ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) ); CHECK_TRACE_CONTENTS( "run: xor EBX with r/m32\n" "run: r/m32 is EAX\n" "run: storing 0xa0b0ccdd\n" ); } :(before "End Single-Byte Opcodes") case 0x31: { // xor r32 with r/m32 const uint8_t modrm = next(); const uint8_t arg2 = (modrm>>3)&0x7; trace(Callstack_depth+1, "run") << "xor " << rname(arg2) << " with r/m32" << end(); // bitwise ops technically operate on unsigned numbers, but it makes no // difference int32_t* signed_arg1 = effective_address(modrm); *signed_arg1 ^= Reg[arg2].i; trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *signed_arg1 << end(); SF = (*signed_arg1 >> 31); ZF = (*signed_arg1 == 0); CF = false; OF = false; trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end(); break; } //:: not :(code) void test_not_r32() { Reg[EBX].i = 0x0f0f00ff; run( "== code 0x1\n" // code segment // op ModR/M SIB displacement immediate " f7 d3 \n" // not EBX // ModR/M in binary: 11 (direct mode) 010 (subop not) 011 (dest EBX) ); CHECK_TRACE_CONTENTS( "run: operate on r/m32\n" "run: r/m32 is EBX\n" "run: subop: not\n" "run: storing 0xf0f0ff00\n" ); } :(before "End Op f7 Subops") case 2: { // not r/m32 trace(Callstack_depth+1, "run") << "subop: not" << end(); *arg1 = ~(*arg1); trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *arg1 << end(); // no flags affected break; } //:: compare (cmp) :(before "End Initialize Op Names") put_new(Name, "39", "compare: set SF if rm32 < r32 (cmp)"); :(code) void test_compare_r32_with_r32_greater() { Reg[EAX].i = 0x0a0b0c0d; Reg[EBX].i = 0x0a0b0c07; run( "== code 0x1\n" // code segment // op ModR/M SIB displacement immediate " 39 d8 \n" // compare EAX with EBX // ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) ); CHECK_TRACE_CONTENTS( "run: compare r/m32 with EBX\n" "run: r/m32 is EAX\n" "run: SF=0; ZF=0; CF=0; OF=0\n" ); } :(before "End Single-Byte Opcodes") case 0x39: { // set SF if r/m32 < r32 const uint8_t modrm = next(); const uint8_t reg2 = (modrm>>3)&0x7; trace(Callstack_depth+1, "run") << "compare r/m32 with " << rname(reg2) << end(); const int32_t* signed_arg1 = effective_address(modrm); const int32_t signed_difference = *signed_arg1 - Reg[reg2].i; SF = (signed_difference < 0); ZF = (signed_difference == 0); const int64_t signed_full_difference = static_cast<int64_t>(*signed_arg1) - Reg[reg2].i; OF = (signed_difference != signed_full_difference); // set CF const uint32_t unsigned_arg1 = static_cast<uint32_t>(*signed_arg1); const uint32_t unsigned_difference = unsigned_arg1 - Reg[reg2].u; const uint64_t unsigned_full_difference = static_cast<uint64_t>(unsigned_arg1) - Reg[reg2].u; CF = (unsigned_difference != unsigned_full_difference); trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end(); break; } :(code) void test_compare_r32_with_r32_lesser_unsigned_and_signed() { Reg[EAX].i = 0x0a0b0c07; Reg[EBX].i = 0x0a0b0c0d; run( "== code 0x1\n" // code segment // op ModR/M SIB displacement immediate " 39 d8 \n" // compare EAX with EBX // ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) ); CHECK_TRACE_CONTENTS( "run: compare r/m32 with EBX\n" "run: r/m32 is EAX\n" "run: SF=1; ZF=0; CF=1; OF=0\n" ); } void test_compare_r32_with_r32_lesser_unsigned_and_signed_due_to_overflow() { Reg[EAX].i = 0x7fffffff; // largest positive signed integer Reg[EBX].i = 0x80000000; // smallest negative signed integer run( "== code 0x1\n" // code segment // op ModR/M SIB displacement immediate " 39 d8 \n" // compare EAX with EBX // ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) ); CHECK_TRACE_CONTENTS( "run: compare r/m32 with EBX\n" "run: r/m32 is EAX\n" "run: SF=1; ZF=0; CF=1; OF=1\n" ); } void test_compare_r32_with_r32_lesser_signed() { Reg[EAX].i = 0xffffffff; // -1 Reg[EBX].i = 0x00000001; // 1 run( "== code 0x1\n" // code segment // op ModR/M SIB displacement immediate " 39 d8 \n" // compare EAX with EBX // ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) ); CHECK_TRACE_CONTENTS( "run: compare r/m32 with EBX\n" "run: r/m32 is EAX\n" "run: SF=1; ZF=0; CF=0; OF=0\n" ); } void test_compare_r32_with_r32_lesser_unsigned() { Reg[EAX].i = 0x00000001; // 1 Reg[EBX].i = 0xffffffff; // -1 run( "== code 0x1\n" // code segment // op ModR/M SIB displacement immediate " 39 d8 \n" // compare EAX with EBX // ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) ); CHECK_TRACE_CONTENTS( "run: compare r/m32 with EBX\n" "run: r/m32 is EAX\n" "run: SF=0; ZF=0; CF=1; OF=0\n" ); } void test_compare_r32_with_r32_equal() { Reg[EAX].i = 0x0a0b0c0d; Reg[EBX].i = 0x0a0b0c0d; run( "== code 0x1\n" // code segment // op ModR/M SIB displacement immediate " 39 d8 \n" // compare EAX and EBX // ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) ); CHECK_TRACE_CONTENTS( "run: compare r/m32 with EBX\n" "run: r/m32 is EAX\n" "run: SF=0; ZF=1; CF=0; OF=0\n" ); } //:: copy (mov) :(before "End Initialize Op Names") put_new(Name, "89", "copy r32 to rm32 (mov)"); :(code) void test_copy_r32_to_r32() { Reg[EBX].i = 0xaf; run( "== code 0x1\n" // code segment // op ModR/M SIB displacement immediate " 89 d8 \n" // copy EBX to EAX // ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) ); CHECK_TRACE_CONTENTS( "run: copy EBX to r/m32\n" "run: r/m32 is EAX\n" "run: storing 0x000000af\n" ); } :(before "End Single-Byte Opcodes") case 0x89: { // copy r32 to r/m32 const uint8_t modrm = next(); const uint8_t rsrc = (modrm>>3)&0x7; trace(Callstack_depth+1, "run") << "copy " << rname(rsrc) << " to r/m32" << end(); int32_t* dest = effective_address(modrm); *dest = Reg[rsrc].i; // Write multiple elements of vector<uint8_t> at once. Assumes sizeof(int) == 4 on the host as well. trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *dest << end(); break; } //:: xchg :(before "End Initialize Op Names") put_new(Name, "87", "swap the contents of r32 and rm32 (xchg)"); :(code) void test_xchg_r32_with_r32() { Reg[EBX].i = 0xaf; Reg[EAX].i = 0x2e; run( "== code 0x1\n" // code segment // op ModR/M SIB displacement immediate " 87 d8 \n" // exchange EBX with EAX // ModR/M in binary: 11 (direct mode) 011 (src EBX) 000 (dest EAX) ); CHECK_TRACE_CONTENTS( "run: exchange EBX with r/m32\n" "run: r/m32 is EAX\n" "run: storing 0x000000af in r/m32\n" "run: storing 0x0000002e in EBX\n" ); } :(before "End Single-Byte Opcodes") case 0x87: { // exchange r32 with r/m32 const uint8_t modrm = next(); const uint8_t reg2 = (modrm>>3)&0x7; trace(Callstack_depth+1, "run") << "exchange " << rname(reg2) << " with r/m32" << end(); int32_t* arg1 = effective_address(modrm); const int32_t tmp = *arg1; *arg1 = Reg[reg2].i; Reg[reg2].i = tmp; trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << *arg1 << " in r/m32" << end(); trace(Callstack_depth+1, "run") << "storing 0x" << HEXWORD << Reg[reg2].i << " in " << rname(reg2) << end(); break; } //:: increment :(before "End Initialize Op Names") put_new(Name, "40", "increment EAX (inc)"); put_new(Name, "41", "increment ECX (inc)"); put_new(Name, "42", "increment EDX (inc)"); put_new(Name, "43", "increment EBX (inc)"); put_new(Name, "44", "increment ESP (inc)"); put_new(Name, "45", "increment EBP (inc)"); put_new(Name, "46", "increment ESI (inc)"); put_new(Name, "47", "increment EDI (inc)"); :(code) void test_increment_r32() { Reg[ECX].u = 0x1f; run( "== code 0x1\n" // code segment // op ModR/M SIB displacement immediate " 41 \n" // increment ECX ); CHECK_TRACE_CONTENTS( "run: increment ECX\n" "run: storing value 0x00000020\n" ); } :(before "End Single-Byte Opcodes") case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47: { // increment r32 const uint8_t reg = op & 0x7; trace(Callstack_depth+1, "run") << "increment " << rname(reg) << end(); ++Reg[reg].u; trace(Callstack_depth+1, "run") << "storing value 0x" << HEXWORD << Reg[reg].u << end(); break; } :(before "End Initialize Op Names") put_new(Name, "ff", "increment/decrement/jump/push/call rm32 based on subop (inc/dec/jmp/push/call)"); :(code) void test_increment_rm32() { Reg[EAX].u = 0x20; run( "== code 0x1\n" // code segment // op ModR/M SIB displacement immediate " ff c0 \n" // increment EAX // ModR/M in binary: 11 (direct mode) 000 (subop inc) 000 (EAX) ); CHECK_TRACE_CONTENTS( "run: increment r/m32\n" "run: r/m32 is EAX\n" "run: storing value 0x00000021\n" ); } :(before "End Single-Byte Opcodes") case 0xff: { const uint8_t modrm = next(); const uint8_t subop = (modrm>>3)&0x7; // middle 3 'reg opcode' bits switch (subop) { case 0: { // increment r/m32 trace(Callstack_depth+1, "run") << "increment r/m32" << end(); int32_t* arg = effective_address(modrm); ++*arg; trace(Callstack_depth+1, "run") << "storing value 0x" << HEXWORD << *arg << end(); break; } default: cerr << "unrecognized subop for ff: " << HEXBYTE << NUM(subop) << '\n'; exit(1); // End Op ff Subops } break; } //:: decrement :(before "End Initialize Op Names") put_new(Name, "48", "decrement EAX (dec)"); put_new(Name, "49", "decrement ECX (dec)"); put_new(Name, "4a", "decrement EDX (dec)"); put_new(Name, "4b", "decrement EBX (dec)"); put_new(Name, "4c", "decrement ESP (dec)"); put_new(Name, "4d", "decrement EBP (dec)"); put_new(Name, "4e", "decrement ESI (dec)"); put_new(Name, "4f", "decrement EDI (dec)"); :(code) void test_decrement_r32() { Reg[ECX].u = 0x1f; run( "== code 0x1\n" // code segment // op ModR/M SIB displacement immediate " 49 \n" // decrement ECX ); CHECK_TRACE_CONTENTS( "run: decrement ECX\n" "run: storing value 0x0000001e\n" ); } :(before "End Single-Byte Opcodes") case 0x48: case 0x49: case 0x4a: case 0x4b: case 0x4c: case 0x4d: case 0x4e: case 0x4f: { // decrement r32 const uint8_t reg = op & 0x7; trace(Callstack_depth+1, "run") << "decrement " << rname(reg) << end(); --Reg[reg].u; trace(Callstack_depth+1, "run") << "storing value 0x" << HEXWORD << Reg[reg].u << end(); break; } :(code) void test_decrement_rm32() { Reg[EAX].u = 0x20; run( "== code 0x1\n" // code segment // op ModR/M SIB displacement immediate " ff c8 \n" // decrement EAX // ModR/M in binary: 11 (direct mode) 001 (subop inc) 000 (EAX) ); CHECK_TRACE_CONTENTS( "run: decrement r/m32\n" "run: r/m32 is EAX\n" "run: storing value 0x0000001f\n" ); } :(before "End Op ff Subops") case 1: { // decrement r/m32 trace(Callstack_depth+1, "run") << "decrement r/m32" << end(); int32_t* arg = effective_address(modrm); --*arg; trace(Callstack_depth+1, "run") << "storing value 0x" << HEXWORD << *arg << end(); break; } //:: push :(before "End Initialize Op Names") put_new(Name, "50", "push EAX to stack (push)"); put_new(Name, "51", "push ECX to stack (push)"); put_new(Name, "52", "push EDX to stack (push)"); put_new(Name, "53", "push EBX to stack (push)"); put_new(Name, "54", "push ESP to stack (push)"); put_new(Name, "55", "push EBP to stack (push)"); put_new(Name, "56", "push ESI to stack (push)"); put_new(Name, "57", "push EDI to stack (push)"); :(code) void test_push_r32() { Mem.push_back(vma(0xbd000000)); // manually allocate memory Reg[ESP].u = 0xbd000008; Reg[EBX].i = 0x0000000a; run( "== code 0x1\n" // code segment // op ModR/M SIB displacement immediate " 53 \n" // push EBX to stack ); CHECK_TRACE_CONTENTS( "run: push EBX\n" "run: decrementing ESP to 0xbd000004\n" "run: pushing value 0x0000000a\n" ); } :(before "End Single-Byte Opcodes") case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57: { // push r32 to stack uint8_t reg = op & 0x7; trace(Callstack_depth+1, "run") << "push " << rname(reg) << end(); //? cerr << "push: " << NUM(reg) << ": " << Reg[reg].u << " => " << Reg[ESP].u << '\n'; push(Reg[reg].u); break; } //:: pop :(before "End Initialize Op Names") put_new(Name, "58", "pop top of stack to EAX (pop)"); put_new(Name, "59", "pop top of stack to ECX (pop)"); put_new(Name, "5a", "pop top of stack to EDX (pop)"); put_new(Name, "5b", "pop top of stack to EBX (pop)"); put_new(Name, "5c", "pop top of stack to ESP (pop)"); put_new(Name, "5d", "pop top of stack to EBP (pop)"); put_new(Name, "5e", "pop top of stack to ESI (pop)"); put_new(Name, "5f", "pop top of stack to EDI (pop)"); :(code) void test_pop_r32() { Mem.push_back(vma(0xbd000000)); // manually allocate memory Reg[ESP].u = 0xbd000008; write_mem_i32(0xbd000008, 0x0000000a); // ..before this write run( "== code 0x1\n" // code segment // op ModR/M SIB displacement immediate " 5b \n" // pop stack to EBX "== data 0x2000\n" // data segment "0a 00 00 00\n" // 0x0000000a ); CHECK_TRACE_CONTENTS( "run: pop into EBX\n" "run: popping value 0x0000000a\n" "run: incrementing ESP to 0xbd00000c\n" ); } :(before "End Single-Byte Opcodes") case 0x58: case 0x59: case 0x5a: case 0x5b: case 0x5c: case 0x5d: case 0x5e: case 0x5f: { // pop stack into r32 const uint8_t reg = op & 0x7; trace(Callstack_depth+1, "run") << "pop into " << rname(reg) << end(); //? cerr << "pop from " << Reg[ESP].u << '\n'; Reg[reg].u = pop(); //? cerr << "=> " << NUM(reg) << ": " << Reg[reg].u << '\n'; break; } :(code) uint32_t pop() { const uint32_t result = read_mem_u32(Reg[ESP].u); trace(Callstack_depth+1, "run") << "popping value 0x" << HEXWORD << result << end(); Reg[ESP].u += 4; trace(Callstack_depth+1, "run") << "incrementing ESP to 0x" << HEXWORD << Reg[ESP].u << end(); assert(Reg[ESP].u < AFTER_STACK); return result; }