From 3350c34a74844e21ea69077e01efff3bae64bdcd Mon Sep 17 00:00:00 2001 From: Kartik Agaram Date: Tue, 23 Mar 2021 17:31:08 -0700 Subject: . --- html/linux/bootstrap/022float.cc.html | 582 ++++++++++++++++++++++++++++++++++ 1 file changed, 582 insertions(+) create mode 100644 html/linux/bootstrap/022float.cc.html (limited to 'html/linux/bootstrap/022float.cc.html') diff --git a/html/linux/bootstrap/022float.cc.html b/html/linux/bootstrap/022float.cc.html new file mode 100644 index 00000000..8365cc91 --- /dev/null +++ b/html/linux/bootstrap/022float.cc.html @@ -0,0 +1,582 @@ + + + + +Mu - linux/bootstrap/022float.cc + + + + + + + + + + +https://github.com/akkartik/mu/blob/main/linux/bootstrap/022float.cc +
+  1 //: floating-point operations
+  2 
+  3 //:: copy
+  4 
+  5 :(before "End Initialize Op Names")
+  6 put_new(Name_f3_0f, "10", "copy xm32 to x32 (movss)");
+  7 put_new(Name_f3_0f, "11", "copy x32 to xm32 (movss)");
+  8 
+  9 :(code)
+ 10 void test_copy_x32_to_x32() {
+ 11   Xmm[3] = 0.5;
+ 12   run(
+ 13       "== code 0x1\n"  // code segment
+ 14       // op     ModR/M  SIB   displacement  immediate
+ 15       "f3 0f 11 d8                                    \n"  // copy XMM3 to XMM0
+ 16       // ModR/M in binary: 11 (direct mode) 011 (src XMM3) 000 (dest XMM0)
+ 17   );
+ 18   CHECK_TRACE_CONTENTS(
+ 19       "run: copy XMM3 to x/m32\n"
+ 20       "run: x/m32 is XMM0\n"
+ 21       "run: storing 0.5\n"
+ 22   );
+ 23 }
+ 24 
+ 25 :(before "End Three-Byte Opcodes Starting With f3 0f")
+ 26 case 0x10: {  // copy x/m32 to x32
+ 27   const uint8_t modrm = next();
+ 28   const uint8_t rdest = (modrm>>3)&0x7;
+ 29   trace(Callstack_depth+1, "run") << "copy x/m32 to " << Xname[rdest] << end();
+ 30   float* src = effective_address_float(modrm);
+ 31   Xmm[rdest] = *src;  // Write multiple elements of vector<uint8_t> at once. Assumes sizeof(float) == 4 on the host as well.
+ 32   trace(Callstack_depth+1, "run") << "storing " << Xmm[rdest] << end();
+ 33   break;
+ 34 }
+ 35 case 0x11: {  // copy x32 to x/m32
+ 36   const uint8_t modrm = next();
+ 37   const uint8_t rsrc = (modrm>>3)&0x7;
+ 38   trace(Callstack_depth+1, "run") << "copy " << Xname[rsrc] << " to x/m32" << end();
+ 39   float* dest = effective_address_float(modrm);
+ 40   *dest = Xmm[rsrc];  // Write multiple elements of vector<uint8_t> at once. Assumes sizeof(float) == 4 on the host as well.
+ 41   trace(Callstack_depth+1, "run") << "storing " << *dest << end();
+ 42   break;
+ 43 }
+ 44 
+ 45 :(code)
+ 46 void test_copy_x32_to_mem_at_xm32() {
+ 47   Xmm[3] = 0.5;
+ 48   Reg[EAX].i = 0x60;
+ 49   run(
+ 50       "== code 0x1\n"
+ 51       // op     ModR/M  SIB   displacement  immediate
+ 52       "f3 0f 11 18                                    \n"  // copy XMM3 to *EAX
+ 53       // ModR/M in binary: 00 (indirect mode) 011 (src XMM3) 000 (dest EAX)
+ 54   );
+ 55   CHECK_TRACE_CONTENTS(
+ 56       "run: copy XMM3 to x/m32\n"
+ 57       "run: effective address is 0x00000060 (EAX)\n"
+ 58       "run: storing 0.5\n"
+ 59   );
+ 60 }
+ 61 
+ 62 void test_copy_mem_at_xm32_to_x32() {
+ 63   Reg[EAX].i = 0x2000;
+ 64   run(
+ 65       "== code 0x1\n"
+ 66       // op     ModR/M  SIB   displacement  immediate
+ 67       "f3 0f 10 18                                    \n"  // copy *EAX to XMM3
+ 68       "== data 0x2000\n"
+ 69       "00 00 00 3f\n"  // 0x3f000000 = 0.5
+ 70   );
+ 71   CHECK_TRACE_CONTENTS(
+ 72       "run: copy x/m32 to XMM3\n"
+ 73       "run: effective address is 0x00002000 (EAX)\n"
+ 74       "run: storing 0.5\n"
+ 75   );
+ 76 }
+ 77 
+ 78 //:: convert to floating point
+ 79 
+ 80 :(before "End Initialize Op Names")
+ 81 put_new(Name_f3_0f, "2a", "convert integer to floating-point (cvtsi2ss)");
+ 82 
+ 83 :(code)
+ 84 void test_cvtsi2ss() {
+ 85   Reg[EAX].i = 10;
+ 86   run(
+ 87       "== code 0x1\n"
+ 88       // op     ModR/M  SIB   displacement  immediate
+ 89       "f3 0f 2a c0                                    \n"
+ 90       // ModR/M in binary: 11 (direct mode) 000 (XMM0) 000 (EAX)
+ 91   );
+ 92   CHECK_TRACE_CONTENTS(
+ 93       "run: convert r/m32 to XMM0\n"
+ 94       "run: r/m32 is EAX\n"
+ 95       "run: XMM0 is now 10\n"
+ 96   );
+ 97 }
+ 98 
+ 99 :(before "End Three-Byte Opcodes Starting With f3 0f")
+100 case 0x2a: {  // convert integer to float
+101   const uint8_t modrm = next();
+102   const uint8_t dest = (modrm>>3)&0x7;
+103   trace(Callstack_depth+1, "run") << "convert r/m32 to " << Xname[dest] << end();
+104   const int32_t* src = effective_address(modrm);
+105   Xmm[dest] = *src;
+106   trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
+107   break;
+108 }
+109 
+110 //:: convert floating point to int
+111 
+112 :(before "End Initialize Op Names")
+113 put_new(Name_f3_0f, "2d", "convert floating-point to int (cvtss2si)");
+114 put_new(Name_f3_0f, "2c", "truncate floating-point to int (cvttss2si)");
+115 
+116 :(code)
+117 void test_cvtss2si() {
+118   Xmm[0] = 9.8;
+119   run(
+120       "== code 0x1\n"
+121       // op     ModR/M  SIB   displacement  immediate
+122       "f3 0f 2d c0                                    \n"
+123       // ModR/M in binary: 11 (direct mode) 000 (EAX) 000 (XMM0)
+124   );
+125   CHECK_TRACE_CONTENTS(
+126       "run: convert x/m32 to EAX\n"
+127       "run: x/m32 is XMM0\n"
+128       "run: EAX is now 0x0000000a\n"
+129   );
+130 }
+131 
+132 :(before "End Three-Byte Opcodes Starting With f3 0f")
+133 case 0x2d: {  // convert float to integer
+134   const uint8_t modrm = next();
+135   const uint8_t dest = (modrm>>3)&0x7;
+136   trace(Callstack_depth+1, "run") << "convert x/m32 to " << rname(dest) << end();
+137   const float* src = effective_address_float(modrm);
+138   Reg[dest].i = round(*src);
+139   trace(Callstack_depth+1, "run") << rname(dest) << " is now 0x" << HEXWORD << Reg[dest].i << end();
+140   break;
+141 }
+142 
+143 :(code)
+144 void test_cvttss2si() {
+145   Xmm[0] = 9.8;
+146   run(
+147       "== code 0x1\n"
+148       // op     ModR/M  SIB   displacement  immediate
+149       "f3 0f 2c c0                                    \n"
+150       // ModR/M in binary: 11 (direct mode) 000 (EAX) 000 (XMM0)
+151   );
+152   CHECK_TRACE_CONTENTS(
+153       "run: truncate x/m32 to EAX\n"
+154       "run: x/m32 is XMM0\n"
+155       "run: EAX is now 0x00000009\n"
+156   );
+157 }
+158 
+159 :(before "End Three-Byte Opcodes Starting With f3 0f")
+160 case 0x2c: {  // truncate float to integer
+161   const uint8_t modrm = next();
+162   const uint8_t dest = (modrm>>3)&0x7;
+163   trace(Callstack_depth+1, "run") << "truncate x/m32 to " << rname(dest) << end();
+164   const float* src = effective_address_float(modrm);
+165   Reg[dest].i = trunc(*src);
+166   trace(Callstack_depth+1, "run") << rname(dest) << " is now 0x" << HEXWORD << Reg[dest].i << end();
+167   break;
+168 }
+169 
+170 //:: add
+171 
+172 :(before "End Initialize Op Names")
+173 put_new(Name_f3_0f, "58", "add floats (addss)");
+174 
+175 :(code)
+176 void test_addss() {
+177   Xmm[0] = 3.0;
+178   Xmm[1] = 2.0;
+179   run(
+180       "== code 0x1\n"
+181       // op     ModR/M  SIB   displacement  immediate
+182       "f3 0f 58 c1                                    \n"
+183       // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
+184   );
+185   CHECK_TRACE_CONTENTS(
+186       "run: add x/m32 to XMM0\n"
+187       "run: x/m32 is XMM1\n"
+188       "run: XMM0 is now 5\n"
+189   );
+190 }
+191 
+192 :(before "End Three-Byte Opcodes Starting With f3 0f")
+193 case 0x58: {  // add x/m32 to x32
+194   const uint8_t modrm = next();
+195   const uint8_t dest = (modrm>>3)&0x7;
+196   trace(Callstack_depth+1, "run") << "add x/m32 to " << Xname[dest] << end();
+197   const float* src = effective_address_float(modrm);
+198   Xmm[dest] += *src;
+199   trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
+200   break;
+201 }
+202 
+203 //:: subtract
+204 
+205 :(before "End Initialize Op Names")
+206 put_new(Name_f3_0f, "5c", "subtract floats (subss)");
+207 
+208 :(code)
+209 void test_subss() {
+210   Xmm[0] = 3.0;
+211   Xmm[1] = 2.0;
+212   run(
+213       "== code 0x1\n"
+214       // op     ModR/M  SIB   displacement  immediate
+215       "f3 0f 5c c1                                    \n"
+216       // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
+217   );
+218   CHECK_TRACE_CONTENTS(
+219       "run: subtract x/m32 from XMM0\n"
+220       "run: x/m32 is XMM1\n"
+221       "run: XMM0 is now 1\n"
+222   );
+223 }
+224 
+225 :(before "End Three-Byte Opcodes Starting With f3 0f")
+226 case 0x5c: {  // subtract x/m32 from x32
+227   const uint8_t modrm = next();
+228   const uint8_t dest = (modrm>>3)&0x7;
+229   trace(Callstack_depth+1, "run") << "subtract x/m32 from " << Xname[dest] << end();
+230   const float* src = effective_address_float(modrm);
+231   Xmm[dest] -= *src;
+232   trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
+233   break;
+234 }
+235 
+236 //:: multiply
+237 
+238 :(before "End Initialize Op Names")
+239 put_new(Name_f3_0f, "59", "multiply floats (mulss)");
+240 
+241 :(code)
+242 void test_mulss() {
+243   Xmm[0] = 3.0;
+244   Xmm[1] = 2.0;
+245   run(
+246       "== code 0x1\n"
+247       // op     ModR/M  SIB   displacement  immediate
+248       "f3 0f 59 c1                                    \n"
+249       // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
+250   );
+251   CHECK_TRACE_CONTENTS(
+252       "run: multiply XMM0 by x/m32\n"
+253       "run: x/m32 is XMM1\n"
+254       "run: XMM0 is now 6\n"
+255   );
+256 }
+257 
+258 :(before "End Three-Byte Opcodes Starting With f3 0f")
+259 case 0x59: {  // multiply x32 by x/m32
+260   const uint8_t modrm = next();
+261   const uint8_t dest = (modrm>>3)&0x7;
+262   trace(Callstack_depth+1, "run") << "multiply " << Xname[dest] << " by x/m32" << end();
+263   const float* src = effective_address_float(modrm);
+264   Xmm[dest] *= *src;
+265   trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
+266   break;
+267 }
+268 
+269 //:: divide
+270 
+271 :(before "End Initialize Op Names")
+272 put_new(Name_f3_0f, "5e", "divide floats (divss)");
+273 
+274 :(code)
+275 void test_divss() {
+276   Xmm[0] = 3.0;
+277   Xmm[1] = 2.0;
+278   run(
+279       "== code 0x1\n"
+280       // op     ModR/M  SIB   displacement  immediate
+281       "f3 0f 5e c1                                    \n"
+282       // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
+283   );
+284   CHECK_TRACE_CONTENTS(
+285       "run: divide XMM0 by x/m32\n"
+286       "run: x/m32 is XMM1\n"
+287       "run: XMM0 is now 1.5\n"
+288   );
+289 }
+290 
+291 :(before "End Three-Byte Opcodes Starting With f3 0f")
+292 case 0x5e: {  // divide x32 by x/m32
+293   const uint8_t modrm = next();
+294   const uint8_t dest = (modrm>>3)&0x7;
+295   trace(Callstack_depth+1, "run") << "divide " << Xname[dest] << " by x/m32" << end();
+296   const float* src = effective_address_float(modrm);
+297   Xmm[dest] /= *src;
+298   trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
+299   break;
+300 }
+301 
+302 //:: min
+303 
+304 :(before "End Initialize Op Names")
+305 put_new(Name_f3_0f, "5d", "minimum of two floats (minss)");
+306 
+307 :(code)
+308 void test_minss() {
+309   Xmm[0] = 3.0;
+310   Xmm[1] = 2.0;
+311   run(
+312       "== code 0x1\n"
+313       // op     ModR/M  SIB   displacement  immediate
+314       "f3 0f 5d c1                                    \n"
+315       // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
+316   );
+317   CHECK_TRACE_CONTENTS(
+318       "run: minimum of XMM0 and x/m32\n"
+319       "run: x/m32 is XMM1\n"
+320       "run: XMM0 is now 2\n"
+321   );
+322 }
+323 
+324 :(before "End Three-Byte Opcodes Starting With f3 0f")
+325 case 0x5d: {  // minimum of x32, x/m32
+326   const uint8_t modrm = next();
+327   const uint8_t dest = (modrm>>3)&0x7;
+328   trace(Callstack_depth+1, "run") << "minimum of " << Xname[dest] << " and x/m32" << end();
+329   const float* src = effective_address_float(modrm);
+330   Xmm[dest] = min(Xmm[dest], *src);
+331   trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
+332   break;
+333 }
+334 
+335 //:: max
+336 
+337 :(before "End Initialize Op Names")
+338 put_new(Name_f3_0f, "5f", "maximum of two floats (maxss)");
+339 
+340 :(code)
+341 void test_maxss() {
+342   Xmm[0] = 3.0;
+343   Xmm[1] = 2.0;
+344   run(
+345       "== code 0x1\n"
+346       // op     ModR/M  SIB   displacement  immediate
+347       "f3 0f 5f c1                                    \n"
+348       // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
+349   );
+350   CHECK_TRACE_CONTENTS(
+351       "run: maximum of XMM0 and x/m32\n"
+352       "run: x/m32 is XMM1\n"
+353       "run: XMM0 is now 3\n"
+354   );
+355 }
+356 
+357 :(before "End Three-Byte Opcodes Starting With f3 0f")
+358 case 0x5f: {  // maximum of x32, x/m32
+359   const uint8_t modrm = next();
+360   const uint8_t dest = (modrm>>3)&0x7;
+361   trace(Callstack_depth+1, "run") << "maximum of " << Xname[dest] << " and x/m32" << end();
+362   const float* src = effective_address_float(modrm);
+363   Xmm[dest] = max(Xmm[dest], *src);
+364   trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
+365   break;
+366 }
+367 
+368 //:: reciprocal
+369 
+370 :(before "End Initialize Op Names")
+371 put_new(Name_f3_0f, "53", "reciprocal of float (rcpss)");
+372 
+373 :(code)
+374 void test_rcpss() {
+375   Xmm[1] = 2.0;
+376   run(
+377       "== code 0x1\n"
+378       // op     ModR/M  SIB   displacement  immediate
+379       "f3 0f 53 c1                                    \n"
+380       // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
+381   );
+382   CHECK_TRACE_CONTENTS(
+383       "run: reciprocal of x/m32 into XMM0\n"
+384       "run: x/m32 is XMM1\n"
+385       "run: XMM0 is now 0.5\n"
+386   );
+387 }
+388 
+389 :(before "End Three-Byte Opcodes Starting With f3 0f")
+390 case 0x53: {  // reciprocal of x/m32 into x32
+391   const uint8_t modrm = next();
+392   const uint8_t dest = (modrm>>3)&0x7;
+393   trace(Callstack_depth+1, "run") << "reciprocal of x/m32 into " << Xname[dest] << end();
+394   const float* src = effective_address_float(modrm);
+395   Xmm[dest] = 1.0 / *src;
+396   trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
+397   break;
+398 }
+399 
+400 //:: square root
+401 
+402 :(before "End Initialize Op Names")
+403 put_new(Name_f3_0f, "51", "square root of float (sqrtss)");
+404 
+405 :(code)
+406 void test_sqrtss() {
+407   Xmm[1] = 2.0;
+408   run(
+409       "== code 0x1\n"
+410       // op     ModR/M  SIB   displacement  immediate
+411       "f3 0f 51 c1                                    \n"
+412       // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
+413   );
+414   CHECK_TRACE_CONTENTS(
+415       "run: square root of x/m32 into XMM0\n"
+416       "run: x/m32 is XMM1\n"
+417       "run: XMM0 is now 1.41421\n"
+418   );
+419 }
+420 
+421 :(before "End Three-Byte Opcodes Starting With f3 0f")
+422 case 0x51: {  // square root of x/m32 into x32
+423   const uint8_t modrm = next();
+424   const uint8_t dest = (modrm>>3)&0x7;
+425   trace(Callstack_depth+1, "run") << "square root of x/m32 into " << Xname[dest] << end();
+426   const float* src = effective_address_float(modrm);
+427   Xmm[dest] = sqrt(*src);
+428   trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
+429   break;
+430 }
+431 
+432 :(before "End Includes")
+433 #include <math.h>
+434 
+435 //:: inverse square root
+436 
+437 :(before "End Initialize Op Names")
+438 put_new(Name_f3_0f, "52", "inverse square root of float (rsqrtss)");
+439 
+440 :(code)
+441 void test_rsqrtss() {
+442   Xmm[1] = 0.01;
+443   run(
+444       "== code 0x1\n"
+445       // op     ModR/M  SIB   displacement  immediate
+446       "f3 0f 52 c1                                    \n"
+447       // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
+448   );
+449   CHECK_TRACE_CONTENTS(
+450       "run: inverse square root of x/m32 into XMM0\n"
+451       "run: x/m32 is XMM1\n"
+452       "run: XMM0 is now 10\n"
+453   );
+454 }
+455 
+456 :(before "End Three-Byte Opcodes Starting With f3 0f")
+457 case 0x52: {  // inverse square root of x/m32 into x32
+458   const uint8_t modrm = next();
+459   const uint8_t dest = (modrm>>3)&0x7;
+460   trace(Callstack_depth+1, "run") << "inverse square root of x/m32 into " << Xname[dest] << end();
+461   const float* src = effective_address_float(modrm);
+462   Xmm[dest] = 1.0 / sqrt(*src);
+463   trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
+464   break;
+465 }
+466 
+467 :(code)
+468 float* effective_address_float(uint8_t modrm) {
+469   const uint8_t mod = (modrm>>6);
+470   // ignore middle 3 'reg opcode' bits
+471   const uint8_t rm = modrm & 0x7;
+472   if (mod == 3) {
+473     // mod 3 is just register direct addressing
+474     trace(Callstack_depth+1, "run") << "x/m32 is " << Xname[rm] << end();
+475     return &Xmm[rm];
+476   }
+477   uint32_t addr = effective_address_number(modrm);
+478   trace(Callstack_depth+1, "run") << "effective address contains " << read_mem_f32(addr) << end();
+479   return mem_addr_f32(addr);
+480 }
+481 
+482 //: compare
+483 
+484 :(before "End Initialize Op Names")
+485 put_new(Name_0f, "2f", "compare: set CF if x32 < xm32 (comiss)");
+486 
+487 :(code)
+488 void test_compare_x32_with_mem_at_rm32() {
+489   Reg[EAX].i = 0x2000;
+490   Xmm[3] = 0.5;
+491   run(
+492       "== code 0x1\n"
+493       // op     ModR/M  SIB   displacement  immediate
+494       "  0f 2f  18                                    \n"  // compare XMM3 with *EAX
+495       // ModR/M in binary: 00 (indirect mode) 011 (lhs XMM3) 000 (rhs EAX)
+496       "== data 0x2000\n"
+497       "00 00 00 00\n"  // 0x00000000 = 0.0
+498   );
+499   CHECK_TRACE_CONTENTS(
+500       "run: compare XMM3 with x/m32\n"
+501       "run: effective address is 0x00002000 (EAX)\n"
+502       "run: SF=0; ZF=0; CF=0; OF=0\n"
+503   );
+504 }
+505 
+506 :(before "End Two-Byte Opcodes Starting With 0f")
+507 case 0x2f: {  // set CF if x32 < x/m32
+508   const uint8_t modrm = next();
+509   const uint8_t reg1 = (modrm>>3)&0x7;
+510   trace(Callstack_depth+1, "run") << "compare " << Xname[reg1] << " with x/m32" << end();
+511   const float* arg2 = effective_address_float(modrm);
+512   // Flag settings carefully copied from the Intel manual.
+513   // See also https://stackoverflow.com/questions/7057501/x86-assembler-floating-point-compare/7057771#7057771
+514   SF = ZF = CF = OF = false;
+515   if (Xmm[reg1] == *arg2) ZF = true;
+516   if (Xmm[reg1] < *arg2) CF = true;
+517   trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
+518   break;
+519 }
+
+ + + -- cgit 1.4.1-2-gfad0