https://github.com/akkartik/mu/blob/main/linux/bootstrap/022float.cc
  1 //: floating-point operations
  2 
  3 //:: copy
  4 
  5 :(before "End Initialize Op Names")
  6 put_new(Name_f3_0f, "10", "copy xm32 to x32 (movss)");
  7 put_new(Name_f3_0f, "11", "copy x32 to xm32 (movss)");
  8 
  9 :(code)
 10 void test_copy_x32_to_x32() {
 11   Xmm[3] = 0.5;
 12   run(
 13       "== code 0x1\n"  // code segment
 14       // op     ModR/M  SIB   displacement  immediate
 15       "f3 0f 11 d8                                    \n"  // copy XMM3 to XMM0
 16       // ModR/M in binary: 11 (direct mode) 011 (src XMM3) 000 (dest XMM0)
 17   );
 18   CHECK_TRACE_CONTENTS(
 19       "run: copy XMM3 to x/m32\n"
 20       "run: x/m32 is XMM0\n"
 21       "run: storing 0.5\n"
 22   );
 23 }
 24 
 25 :(before "End Three-Byte Opcodes Starting With f3 0f")
 26 case 0x10: {  // copy x/m32 to x32
 27   const uint8_t modrm = next();
 28   const uint8_t rdest = (modrm>>3)&0x7;
 29   trace(Callstack_depth+1, "run") << "copy x/m32 to " << Xname[rdest] << end();
 30   float* src = effective_address_float(modrm);
 31   Xmm[rdest] = *src;  // Write multiple elements of vector<uint8_t> at once. Assumes sizeof(float) == 4 on the host as well.
 32   trace(Callstack_depth+1, "run") << "storing " << Xmm[rdest] << end();
 33   break;
 34 }
 35 case 0x11: {  // copy x32 to x/m32
 36   const uint8_t modrm = next();
 37   const uint8_t rsrc = (modrm>>3)&0x7;
 38   trace(Callstack_depth+1, "run") << "copy " << Xname[rsrc] << " to x/m32" << end();
 39   float* dest = effective_address_float(modrm);
 40   *dest = Xmm[rsrc];  // Write multiple elements of vector<uint8_t> at once. Assumes sizeof(float) == 4 on the host as well.
 41   trace(Callstack_depth+1, "run") << "storing " << *dest << end();
 42   break;
 43 }
 44 
 45 :(code)
 46 void test_copy_x32_to_mem_at_xm32() {
 47   Xmm[3] = 0.5;
 48   Reg[EAX].i = 0x60;
 49   run(
 50       "== code 0x1\n"
 51       // op     ModR/M  SIB   displacement  immediate
 52       "f3 0f 11 18                                    \n"  // copy XMM3 to *EAX
 53       // ModR/M in binary: 00 (indirect mode) 011 (src XMM3) 000 (dest EAX)
 54   );
 55   CHECK_TRACE_CONTENTS(
 56       "run: copy XMM3 to x/m32\n"
 57       "run: effective address is 0x00000060 (EAX)\n"
 58       "run: storing 0.5\n"
 59   );
 60 }
 61 
 62 void test_copy_mem_at_xm32_to_x32() {
 63   Reg[EAX].i = 0x2000;
 64   run(
 65       "== code 0x1\n"
 66       // op     ModR/M  SIB   displacement  immediate
 67       "f3 0f 10 18                                    \n"  // copy *EAX to XMM3
 68       "== data 0x2000\n"
 69       "00 00 00 3f\n"  // 0x3f000000 = 0.5
 70   );
 71   CHECK_TRACE_CONTENTS(
 72       "run: copy x/m32 to XMM3\n"
 73       "run: effective address is 0x00002000 (EAX)\n"
 74       "run: storing 0.5\n"
 75   );
 76 }
 77 
 78 //:: convert to floating point
 79 
 80 :(before "End Initialize Op Names")
 81 put_new(Name_f3_0f, "2a", "convert integer to floating-point (cvtsi2ss)");
 82 
 83 :(code)
 84 void test_cvtsi2ss() {
 85   Reg[EAX].i = 10;
 86   run(
 87       "== code 0x1\n"
 88       // op     ModR/M  SIB   displacement  immediate
 89       "f3 0f 2a c0                                    \n"
 90       // ModR/M in binary: 11 (direct mode) 000 (XMM0) 000 (EAX)
 91   );
 92   CHECK_TRACE_CONTENTS(
 93       "run: convert r/m32 to XMM0\n"
 94       "run: r/m32 is EAX\n"
 95       "run: XMM0 is now 10\n"
 96   );
 97 }
 98 
 99 :(before "End Three-Byte Opcodes Starting With f3 0f")
100 case 0x2a: {  // convert integer to float
101   const uint8_t modrm = next();
102   const uint8_t dest = (modrm>>3)&0x7;
103   trace(Callstack_depth+1, "run") << "convert r/m32 to " << Xname[dest] << end();
104   const int32_t* src = effective_address(modrm);
105   Xmm[dest] = *src;
106   trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
107   break;
108 }
109 
110 //:: convert floating point to int
111 
112 :(before "End Initialize Op Names")
113 put_new(Name_f3_0f, "2d", "convert floating-point to int (cvtss2si)");
114 put_new(Name_f3_0f, "2c", "truncate floating-point to int (cvttss2si)");
115 
116 :(code)
117 void test_cvtss2si() {
118   Xmm[0] = 9.8;
119   run(
120       "== code 0x1\n"
121       // op     ModR/M  SIB   displacement  immediate
122       "f3 0f 2d c0                                    \n"
123       // ModR/M in binary: 11 (direct mode) 000 (EAX) 000 (XMM0)
124   );
125   CHECK_TRACE_CONTENTS(
126       "run: convert x/m32 to EAX\n"
127       "run: x/m32 is XMM0\n"
128       "run: EAX is now 0x0000000a\n"
129   );
130 }
131 
132 :(before "End Three-Byte Opcodes Starting With f3 0f")
133 case 0x2d: {  // convert float to integer
134   const uint8_t modrm = next();
135   const uint8_t dest = (modrm>>3)&0x7;
136   trace(Callstack_depth+1, "run") << "convert x/m32 to " << rname(dest) << end();
137   const float* src = effective_address_float(modrm);
138   Reg[dest].i = round(*src);
139   trace(Callstack_depth+1, "run") << rname(dest) << " is now 0x" << HEXWORD << Reg[dest].i << end();
140   break;
141 }
142 
143 :(code)
144 void test_cvttss2si() {
145   Xmm[0] = 9.8;
146   run(
147       "== code 0x1\n"
148       // op     ModR/M  SIB   displacement  immediate
149       "f3 0f 2c c0                                    \n"
150       // ModR/M in binary: 11 (direct mode) 000 (EAX) 000 (XMM0)
151   );
152   CHECK_TRACE_CONTENTS(
153       "run: truncate x/m32 to EAX\n"
154       "run: x/m32 is XMM0\n"
155       "run: EAX is now 0x00000009\n"
156   );
157 }
158 
159 :(before "End Three-Byte Opcodes Starting With f3 0f")
160 case 0x2c: {  // truncate float to integer
161   const uint8_t modrm = next();
162   const uint8_t dest = (modrm>>3)&0x7;
163   trace(Callstack_depth+1, "run") << "truncate x/m32 to " << rname(dest) << end();
164   const float* src = effective_address_float(modrm);
165   Reg[dest].i = trunc(*src);
166   trace(Callstack_depth+1, "run") << rname(dest) << " is now 0x" << HEXWORD << Reg[dest].i << end();
167   break;
168 }
169 
170 //:: add
171 
172 :(before "End Initialize Op Names")
173 put_new(Name_f3_0f, "58", "add floats (addss)");
174 
175 :(code)
176 void test_addss() {
177   Xmm[0] = 3.0;
178   Xmm[1] = 2.0;
179   run(
180       "== code 0x1\n"
181       // op     ModR/M  SIB   displacement  immediate
182       "f3 0f 58 c1                                    \n"
183       // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
184   );
185   CHECK_TRACE_CONTENTS(
186       "run: add x/m32 to XMM0\n"
187       "run: x/m32 is XMM1\n"
188       "run: XMM0 is now 5\n"
189   );
190 }
191 
192 :(before "End Three-Byte Opcodes Starting With f3 0f")
193 case 0x58: {  // add x/m32 to x32
194   const uint8_t modrm = next();
195   const uint8_t dest = (modrm>>3)&0x7;
196   trace(Callstack_depth+1, "run") << "add x/m32 to " << Xname[dest] << end();
197   const float* src = effective_address_float(modrm);
198   Xmm[dest] += *src;
199   trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
200   break;
201 }
202 
203 //:: subtract
204 
205 :(before "End Initialize Op Names")
206 put_new(Name_f3_0f, "5c", "subtract floats (subss)");
207 
208 :(code)
209 void test_subss() {
210   Xmm[0] = 3.0;
211   Xmm[1] = 2.0;
212   run(
213       "== code 0x1\n"
214       // op     ModR/M  SIB   displacement  immediate
215       "f3 0f 5c c1                                    \n"
216       // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
217   );
218   CHECK_TRACE_CONTENTS(
219       "run: subtract x/m32 from XMM0\n"
220       "run: x/m32 is XMM1\n"
221       "run: XMM0 is now 1\n"
222   );
223 }
224 
225 :(before "End Three-Byte Opcodes Starting With f3 0f")
226 case 0x5c: {  // subtract x/m32 from x32
227   const uint8_t modrm = next();
228   const uint8_t dest = (modrm>>3)&0x7;
229   trace(Callstack_depth+1, "run") << "subtract x/m32 from " << Xname[dest] << end();
230   const float* src = effective_address_float(modrm);
231   Xmm[dest] -= *src;
232   trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
233   break;
234 }
235 
236 //:: multiply
237 
238 :(before "End Initialize Op Names")
239 put_new(Name_f3_0f, "59", "multiply floats (mulss)");
240 
241 :(code)
242 void test_mulss() {
243   Xmm[0] = 3.0;
244   Xmm[1] = 2.0;
245   run(
246       "== code 0x1\n"
247       // op     ModR/M  SIB   displacement  immediate
248       "f3 0f 59 c1                                    \n"
249       // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
250   );
251   CHECK_TRACE_CONTENTS(
252       "run: multiply XMM0 by x/m32\n"
253       "run: x/m32 is XMM1\n"
254       "run: XMM0 is now 6\n"
255   );
256 }
257 
258 :(before "End Three-Byte Opcodes Starting With f3 0f")
259 case 0x59: {  // multiply x32 by x/m32
260   const uint8_t modrm = next();
261   const uint8_t dest = (modrm>>3)&0x7;
262   trace(Callstack_depth+1, "run") << "multiply " << Xname[dest] << " by x/m32" << end();
263   const float* src = effective_address_float(modrm);
264   Xmm[dest] *= *src;
265   trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
266   break;
267 }
268 
269 //:: divide
270 
271 :(before "End Initialize Op Names")
272 put_new(Name_f3_0f, "5e", "divide floats (divss)");
273 
274 :(code)
275 void test_divss() {
276   Xmm[0] = 3.0;
277   Xmm[1] = 2.0;
278   run(
279       "== code 0x1\n"
280       // op     ModR/M  SIB   displacement  immediate
281       "f3 0f 5e c1                                    \n"
282       // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
283   );
284   CHECK_TRACE_CONTENTS(
285       "run: divide XMM0 by x/m32\n"
286       "run: x/m32 is XMM1\n"
287       "run: XMM0 is now 1.5\n"
288   );
289 }
290 
291 :(before "End Three-Byte Opcodes Starting With f3 0f")
292 case 0x5e: {  // divide x32 by x/m32
293   const uint8_t modrm = next();
294   const uint8_t dest = (modrm>>3)&0x7;
295   trace(Callstack_depth+1, "run") << "divide " << Xname[dest] << " by x/m32" << end();
296   const float* src = effective_address_float(modrm);
297   Xmm[dest] /= *src;
298   trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
299   break;
300 }
301 
302 //:: min
303 
304 :(before "End Initialize Op Names")
305 put_new(Name_f3_0f, "5d", "minimum of two floats (minss)");
306 
307 :(code)
308 void test_minss() {
309   Xmm[0] = 3.0;
310   Xmm[1] = 2.0;
311   run(
312       "== code 0x1\n"
313       // op     ModR/M  SIB   displacement  immediate
314       "f3 0f 5d c1                                    \n"
315       // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
316   );
317   CHECK_TRACE_CONTENTS(
318       "run: minimum of XMM0 and x/m32\n"
319       "run: x/m32 is XMM1\n"
320       "run: XMM0 is now 2\n"
321   );
322 }
323 
324 :(before "End Three-Byte Opcodes Starting With f3 0f")
325 case 0x5d: {  // minimum of x32, x/m32
326   const uint8_t modrm = next();
327   const uint8_t dest = (modrm>>3)&0x7;
328   trace(Callstack_depth+1, "run") << "minimum of " << Xname[dest] << " and x/m32" << end();
329   const float* src = effective_address_float(modrm);
330   Xmm[dest] = min(Xmm[dest], *src);
331   trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
332   break;
333 }
334 
335 //:: max
336 
337 :(before "End Initialize Op Names")
338 put_new(Name_f3_0f, "5f", "maximum of two floats (maxss)");
339 
340 :(code)
341 void test_maxss() {
342   Xmm[0] = 3.0;
343   Xmm[1] = 2.0;
344   run(
345       "== code 0x1\n"
346       // op     ModR/M  SIB   displacement  immediate
347       "f3 0f 5f c1                                    \n"
348       // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
349   );
350   CHECK_TRACE_CONTENTS(
351       "run: maximum of XMM0 and x/m32\n"
352       "run: x/m32 is XMM1\n"
353       "run: XMM0 is now 3\n"
354   );
355 }
356 
357 :(before "End Three-Byte Opcodes Starting With f3 0f")
358 case 0x5f: {  // maximum of x32, x/m32
359   const uint8_t modrm = next();
360   const uint8_t dest = (modrm>>3)&0x7;
361   trace(Callstack_depth+1, "run") << "maximum of " << Xname[dest] << " and x/m32" << end();
362   const float* src = effective_address_float(modrm);
363   Xmm[dest] = max(Xmm[dest], *src);
364   trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
365   break;
366 }
367 
368 //:: reciprocal
369 
370 :(before "End Initialize Op Names")
371 put_new(Name_f3_0f, "53", "reciprocal of float (rcpss)");
372 
373 :(code)
374 void test_rcpss() {
375   Xmm[1] = 2.0;
376   run(
377       "== code 0x1\n"
378       // op     ModR/M  SIB   displacement  immediate
379       "f3 0f 53 c1                                    \n"
380       // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
381   );
382   CHECK_TRACE_CONTENTS(
383       "run: reciprocal of x/m32 into XMM0\n"
384       "run: x/m32 is XMM1\n"
385       "run: XMM0 is now 0.5\n"
386   );
387 }
388 
389 :(before "End Three-Byte Opcodes Starting With f3 0f")
390 case 0x53: {  // reciprocal of x/m32 into x32
391   const uint8_t modrm = next();
392   const uint8_t dest = (modrm>>3)&0x7;
393   trace(Callstack_depth+1, "run") << "reciprocal of x/m32 into " << Xname[dest] << end();
394   const float* src = effective_address_float(modrm);
395   Xmm[dest] = 1.0 / *src;
396   trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
397   break;
398 }
399 
400 //:: square root
401 
402 :(before "End Initialize Op Names")
403 put_new(Name_f3_0f, "51", "square root of float (sqrtss)");
404 
405 :(code)
406 void test_sqrtss() {
407   Xmm[1] = 2.0;
408   run(
409       "== code 0x1\n"
410       // op     ModR/M  SIB   displacement  immediate
411       "f3 0f 51 c1                                    \n"
412       // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
413   );
414   CHECK_TRACE_CONTENTS(
415       "run: square root of x/m32 into XMM0\n"
416       "run: x/m32 is XMM1\n"
417       "run: XMM0 is now 1.41421\n"
418   );
419 }
420 
421 :(before "End Three-Byte Opcodes Starting With f3 0f")
422 case 0x51: {  // square root of x/m32 into x32
423   const uint8_t modrm = next();
424   const uint8_t dest = (modrm>>3)&0x7;
425   trace(Callstack_depth+1, "run") << "square root of x/m32 into " << Xname[dest] << end();
426   const float* src = effective_address_float(modrm);
427   Xmm[dest] = sqrt(*src);
428   trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
429   break;
430 }
431 
432 :(before "End Includes")
433 #include <math.h>
434 
435 //:: inverse square root
436 
437 :(before "End Initialize Op Names")
438 put_new(Name_f3_0f, "52", "inverse square root of float (rsqrtss)");
439 
440 :(code)
441 void test_rsqrtss() {
442   Xmm[1] = 0.01;
443   run(
444       "== code 0x1\n"
445       // op     ModR/M  SIB   displacement  immediate
446       "f3 0f 52 c1                                    \n"
447       // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
448   );
449   CHECK_TRACE_CONTENTS(
450       "run: inverse square root of x/m32 into XMM0\n"
451       "run: x/m32 is XMM1\n"
452       "run: XMM0 is now 10\n"
453   );
454 }
455 
456 :(before "End Three-Byte Opcodes Starting With f3 0f")
457 case 0x52: {  // inverse square root of x/m32 into x32
458   const uint8_t modrm = next();
459   const uint8_t dest = (modrm>>3)&0x7;
460   trace(Callstack_depth+1, "run") << "inverse square root of x/m32 into " << Xname[dest] << end();
461   const float* src = effective_address_float(modrm);
462   Xmm[dest] = 1.0 / sqrt(*src);
463   trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
464   break;
465 }
466 
467 :(code)
468 float* effective_address_float(uint8_t modrm) {
469   const uint8_t mod = (modrm>>6);
470   // ignore middle 3 'reg opcode' bits
471   const uint8_t rm = modrm & 0x7;
472   if (mod == 3) {
473     // mod 3 is just register direct addressing
474     trace(Callstack_depth+1, "run") << "x/m32 is " << Xname[rm] << end();
475     return &Xmm[rm];
476   }
477   uint32_t addr = effective_address_number(modrm);
478   trace(Callstack_depth+1, "run") << "effective address contains " << read_mem_f32(addr) << end();
479   return mem_addr_f32(addr);
480 }
481 
482 //: compare
483 
484 :(before "End Initialize Op Names")
485 put_new(Name_0f, "2f", "compare: set CF if x32 < xm32 (comiss)");
486 
487 :(code)
488 void test_compare_x32_with_mem_at_rm32() {
489   Reg[EAX].i = 0x2000;
490   Xmm[3] = 0.5;
491   run(
492       "== code 0x1\n"
493       // op     ModR/M  SIB   displacement  immediate
494       "  0f 2f  18                                    \n"  // compare XMM3 with *EAX
495       // ModR/M in binary: 00 (indirect mode) 011 (lhs XMM3) 000 (rhs EAX)
496       "== data 0x2000\n"
497       "00 00 00 00\n"  // 0x00000000 = 0.0
498   );
499   CHECK_TRACE_CONTENTS(
500       "run: compare XMM3 with x/m32\n"
501       "run: effective address is 0x00002000 (EAX)\n"
502       "run: SF=0; ZF=0; CF=0; OF=0\n"
503   );
504 }
505 
506 :(before "End Two-Byte Opcodes Starting With 0f")
507 case 0x2f: {  // set CF if x32 < x/m32
508   const uint8_t modrm = next();
509   const uint8_t reg1 = (modrm>>3)&0x7;
510   trace(Callstack_depth+1, "run") << "compare " << Xname[reg1] << " with x/m32" << end();
511   const float* arg2 = effective_address_float(modrm);
512   // Flag settings carefully copied from the Intel manual.
513   // See also https://stackoverflow.com/questions/7057501/x86-assembler-floating-point-compare/7057771#7057771
514   SF = ZF = CF = OF = false;
515   if (Xmm[reg1] == *arg2) ZF = true;
516   if (Xmm[reg1] < *arg2) CF = true;
517   trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
518   break;
519 }