https://github.com/akkartik/mu/blob/main/linux/bootstrap/022float.cc
1
2
3
4
5 :(before "End Initialize Op Names")
6 put_new(Name_f3_0f, "10", "copy xm32 to x32 (movss)");
7 put_new(Name_f3_0f, "11", "copy x32 to xm32 (movss)");
8
9 :(code)
10 void test_copy_x32_to_x32() {
11 Xmm[3] = 0.5;
12 run(
13 "== code 0x1\n"
14
15 "f3 0f 11 d8 \n"
16
17 );
18 CHECK_TRACE_CONTENTS(
19 "run: copy XMM3 to x/m32\n"
20 "run: x/m32 is XMM0\n"
21 "run: storing 0.5\n"
22 );
23 }
24
25 :(before "End Three-Byte Opcodes Starting With f3 0f")
26 case 0x10: {
27 const uint8_t modrm = next();
28 const uint8_t rdest = (modrm>>3)&0x7;
29 trace(Callstack_depth+1, "run") << "copy x/m32 to " << Xname[rdest] << end();
30 float* src = effective_address_float(modrm);
31 Xmm[rdest] = *src;
32 trace(Callstack_depth+1, "run") << "storing " << Xmm[rdest] << end();
33 break;
34 }
35 case 0x11: {
36 const uint8_t modrm = next();
37 const uint8_t rsrc = (modrm>>3)&0x7;
38 trace(Callstack_depth+1, "run") << "copy " << Xname[rsrc] << " to x/m32" << end();
39 float* dest = effective_address_float(modrm);
40 *dest = Xmm[rsrc];
41 trace(Callstack_depth+1, "run") << "storing " << *dest << end();
42 break;
43 }
44
45 :(code)
46 void test_copy_x32_to_mem_at_xm32() {
47 Xmm[3] = 0.5;
48 Reg[EAX].i = 0x60;
49 run(
50 "== code 0x1\n"
51
52 "f3 0f 11 18 \n"
53
54 );
55 CHECK_TRACE_CONTENTS(
56 "run: copy XMM3 to x/m32\n"
57 "run: effective address is 0x00000060 (EAX)\n"
58 "run: storing 0.5\n"
59 );
60 }
61
62 void test_copy_mem_at_xm32_to_x32() {
63 Reg[EAX].i = 0x2000;
64 run(
65 "== code 0x1\n"
66
67 "f3 0f 10 18 \n"
68 "== data 0x2000\n"
69 "00 00 00 3f\n"
70 );
71 CHECK_TRACE_CONTENTS(
72 "run: copy x/m32 to XMM3\n"
73 "run: effective address is 0x00002000 (EAX)\n"
74 "run: storing 0.5\n"
75 );
76 }
77
78
79
80 :(before "End Initialize Op Names")
81 put_new(Name_f3_0f, "2a", "convert integer to floating-point (cvtsi2ss)");
82
83 :(code)
84 void test_cvtsi2ss() {
85 Reg[EAX].i = 10;
86 run(
87 "== code 0x1\n"
88
89 "f3 0f 2a c0 \n"
90
91 );
92 CHECK_TRACE_CONTENTS(
93 "run: convert r/m32 to XMM0\n"
94 "run: r/m32 is EAX\n"
95 "run: XMM0 is now 10\n"
96 );
97 }
98
99 :(before "End Three-Byte Opcodes Starting With f3 0f")
100 case 0x2a: {
101 const uint8_t modrm = next();
102 const uint8_t dest = (modrm>>3)&0x7;
103 trace(Callstack_depth+1, "run") << "convert r/m32 to " << Xname[dest] << end();
104 const int32_t* src = effective_address(modrm);
105 Xmm[dest] = *src;
106 trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
107 break;
108 }
109
110
111
112 :(before "End Initialize Op Names")
113 put_new(Name_f3_0f, "2d", "convert floating-point to int (cvtss2si)");
114 put_new(Name_f3_0f, "2c", "truncate floating-point to int (cvttss2si)");
115
116 :(code)
117 void test_cvtss2si() {
118 Xmm[0] = 9.8;
119 run(
120 "== code 0x1\n"
121
122 "f3 0f 2d c0 \n"
123
124 );
125 CHECK_TRACE_CONTENTS(
126 "run: convert x/m32 to EAX\n"
127 "run: x/m32 is XMM0\n"
128 "run: EAX is now 0x0000000a\n"
129 );
130 }
131
132 :(before "End Three-Byte Opcodes Starting With f3 0f")
133 case 0x2d: {
134 const uint8_t modrm = next();
135 const uint8_t dest = (modrm>>3)&0x7;
136 trace(Callstack_depth+1, "run") << "convert x/m32 to " << rname(dest) << end();
137 const float* src = effective_address_float(modrm);
138 Reg[dest].i = round(*src);
139 trace(Callstack_depth+1, "run") << rname(dest) << " is now 0x" << HEXWORD << Reg[dest].i << end();
140 break;
141 }
142
143 :(code)
144 void test_cvttss2si() {
145 Xmm[0] = 9.8;
146 run(
147 "== code 0x1\n"
148
149 "f3 0f 2c c0 \n"
150
151 );
152 CHECK_TRACE_CONTENTS(
153 "run: truncate x/m32 to EAX\n"
154 "run: x/m32 is XMM0\n"
155 "run: EAX is now 0x00000009\n"
156 );
157 }
158
159 :(before "End Three-Byte Opcodes Starting With f3 0f")
160 case 0x2c: {
161 const uint8_t modrm = next();
162 const uint8_t dest = (modrm>>3)&0x7;
163 trace(Callstack_depth+1, "run") << "truncate x/m32 to " << rname(dest) << end();
164 const float* src = effective_address_float(modrm);
165 Reg[dest].i = trunc(*src);
166 trace(Callstack_depth+1, "run") << rname(dest) << " is now 0x" << HEXWORD << Reg[dest].i << end();
167 break;
168 }
169
170
171
172 :(before "End Initialize Op Names")
173 put_new(Name_f3_0f, "58", "add floats (addss)");
174
175 :(code)
176 void test_addss() {
177 Xmm[0] = 3.0;
178 Xmm[1] = 2.0;
179 run(
180 "== code 0x1\n"
181
182 "f3 0f 58 c1 \n"
183
184 );
185 CHECK_TRACE_CONTENTS(
186 "run: add x/m32 to XMM0\n"
187 "run: x/m32 is XMM1\n"
188 "run: XMM0 is now 5\n"
189 );
190 }
191
192 :(before "End Three-Byte Opcodes Starting With f3 0f")
193 case 0x58: {
194 const uint8_t modrm = next();
195 const uint8_t dest = (modrm>>3)&0x7;
196 trace(Callstack_depth+1, "run") << "add x/m32 to " << Xname[dest] << end();
197 const float* src = effective_address_float(modrm);
198 Xmm[dest] += *src;
199 trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
200 break;
201 }
202
203
204
205 :(before "End Initialize Op Names")
206 put_new(Name_f3_0f, "5c", "subtract floats (subss)");
207
208 :(code)
209 void test_subss() {
210 Xmm[0] = 3.0;
211 Xmm[1] = 2.0;
212 run(
213 "== code 0x1\n"
214
215 "f3 0f 5c c1 \n"
216
217 );
218 CHECK_TRACE_CONTENTS(
219 "run: subtract x/m32 from XMM0\n"
220 "run: x/m32 is XMM1\n"
221 "run: XMM0 is now 1\n"
222 );
223 }
224
225 :(before "End Three-Byte Opcodes Starting With f3 0f")
226 case 0x5c: {
227 const uint8_t modrm = next();
228 const uint8_t dest = (modrm>>3)&0x7;
229 trace(Callstack_depth+1, "run") << "subtract x/m32 from " << Xname[dest] << end();
230 const float* src = effective_address_float(modrm);
231 Xmm[dest] -= *src;
232 trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
233 break;
234 }
235
236
237
238 :(before "End Initialize Op Names")
239 put_new(Name_f3_0f, "59", "multiply floats (mulss)");
240
241 :(code)
242 void test_mulss() {
243 Xmm[0] = 3.0;
244 Xmm[1] = 2.0;
245 run(
246 "== code 0x1\n"
247
248 "f3 0f 59 c1 \n"
249
250 );
251 CHECK_TRACE_CONTENTS(
252 "run: multiply XMM0 by x/m32\n"
253 "run: x/m32 is XMM1\n"
254 "run: XMM0 is now 6\n"
255 );
256 }
257
258 :(before "End Three-Byte Opcodes Starting With f3 0f")
259 case 0x59: {
260 const uint8_t modrm = next();
261 const uint8_t dest = (modrm>>3)&0x7;
262 trace(Callstack_depth+1, "run") << "multiply " << Xname[dest] << " by x/m32" << end();
263 const float* src = effective_address_float(modrm);
264 Xmm[dest] *= *src;
265 trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
266 break;
267 }
268
269
270
271 :(before "End Initialize Op Names")
272 put_new(Name_f3_0f, "5e", "divide floats (divss)");
273
274 :(code)
275 void test_divss() {
276 Xmm[0] = 3.0;
277 Xmm[1] = 2.0;
278 run(
279 "== code 0x1\n"
280
281 "f3 0f 5e c1 \n"
282
283 );
284 CHECK_TRACE_CONTENTS(
285 "run: divide XMM0 by x/m32\n"
286 "run: x/m32 is XMM1\n"
287 "run: XMM0 is now 1.5\n"
288 );
289 }
290
291 :(before "End Three-Byte Opcodes Starting With f3 0f")
292 case 0x5e: {
293 const uint8_t modrm = next();
294 const uint8_t dest = (modrm>>3)&0x7;
295 trace(Callstack_depth+1, "run") << "divide " << Xname[dest] << " by x/m32" << end();
296 const float* src = effective_address_float(modrm);
297 Xmm[dest] /= *src;
298 trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
299 break;
300 }
301
302
303
304 :(before "End Initialize Op Names")
305 put_new(Name_f3_0f, "5d", "minimum of two floats (minss)");
306
307 :(code)
308 void test_minss() {
309 Xmm[0] = 3.0;
310 Xmm[1] = 2.0;
311 run(
312 "== code 0x1\n"
313
314 "f3 0f 5d c1 \n"
315
316 );
317 CHECK_TRACE_CONTENTS(
318 "run: minimum of XMM0 and x/m32\n"
319 "run: x/m32 is XMM1\n"
320 "run: XMM0 is now 2\n"
321 );
322 }
323
324 :(before "End Three-Byte Opcodes Starting With f3 0f")
325 case 0x5d: {
326 const uint8_t modrm = next();
327 const uint8_t dest = (modrm>>3)&0x7;
328 trace(Callstack_depth+1, "run") << "minimum of " << Xname[dest] << " and x/m32" << end();
329 const float* src = effective_address_float(modrm);
330 Xmm[dest] = min(Xmm[dest], *src);
331 trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
332 break;
333 }
334
335
336
337 :(before "End Initialize Op Names")
338 put_new(Name_f3_0f, "5f", "maximum of two floats (maxss)");
339
340 :(code)
341 void test_maxss() {
342 Xmm[0] = 3.0;
343 Xmm[1] = 2.0;
344 run(
345 "== code 0x1\n"
346
347 "f3 0f 5f c1 \n"
348
349 );
350 CHECK_TRACE_CONTENTS(
351 "run: maximum of XMM0 and x/m32\n"
352 "run: x/m32 is XMM1\n"
353 "run: XMM0 is now 3\n"
354 );
355 }
356
357 :(before "End Three-Byte Opcodes Starting With f3 0f")
358 case 0x5f: {
359 const uint8_t modrm = next();
360 const uint8_t dest = (modrm>>3)&0x7;
361 trace(Callstack_depth+1, "run") << "maximum of " << Xname[dest] << " and x/m32" << end();
362 const float* src = effective_address_float(modrm);
363 Xmm[dest] = max(Xmm[dest], *src);
364 trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
365 break;
366 }
367
368
369
370 :(before "End Initialize Op Names")
371 put_new(Name_f3_0f, "53", "reciprocal of float (rcpss)");
372
373 :(code)
374 void test_rcpss() {
375 Xmm[1] = 2.0;
376 run(
377 "== code 0x1\n"
378
379 "f3 0f 53 c1 \n"
380
381 );
382 CHECK_TRACE_CONTENTS(
383 "run: reciprocal of x/m32 into XMM0\n"
384 "run: x/m32 is XMM1\n"
385 "run: XMM0 is now 0.5\n"
386 );
387 }
388
389 :(before "End Three-Byte Opcodes Starting With f3 0f")
390 case 0x53: {
391 const uint8_t modrm = next();
392 const uint8_t dest = (modrm>>3)&0x7;
393 trace(Callstack_depth+1, "run") << "reciprocal of x/m32 into " << Xname[dest] << end();
394 const float* src = effective_address_float(modrm);
395 Xmm[dest] = 1.0 / *src;
396 trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
397 break;
398 }
399
400
401
402 :(before "End Initialize Op Names")
403 put_new(Name_f3_0f, "51", "square root of float (sqrtss)");
404
405 :(code)
406 void test_sqrtss() {
407 Xmm[1] = 2.0;
408 run(
409 "== code 0x1\n"
410
411 "f3 0f 51 c1 \n"
412
413 );
414 CHECK_TRACE_CONTENTS(
415 "run: square root of x/m32 into XMM0\n"
416 "run: x/m32 is XMM1\n"
417 "run: XMM0 is now 1.41421\n"
418 );
419 }
420
421 :(before "End Three-Byte Opcodes Starting With f3 0f")
422 case 0x51: {
423 const uint8_t modrm = next();
424 const uint8_t dest = (modrm>>3)&0x7;
425 trace(Callstack_depth+1, "run") << "square root of x/m32 into " << Xname[dest] << end();
426 const float* src = effective_address_float(modrm);
427 Xmm[dest] = sqrt(*src);
428 trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
429 break;
430 }
431
432 :(before "End Includes")
433 #include <math.h>
434
435
436
437 :(before "End Initialize Op Names")
438 put_new(Name_f3_0f, "52", "inverse square root of float (rsqrtss)");
439
440 :(code)
441 void test_rsqrtss() {
442 Xmm[1] = 0.01;
443 run(
444 "== code 0x1\n"
445
446 "f3 0f 52 c1 \n"
447
448 );
449 CHECK_TRACE_CONTENTS(
450 "run: inverse square root of x/m32 into XMM0\n"
451 "run: x/m32 is XMM1\n"
452 "run: XMM0 is now 10\n"
453 );
454 }
455
456 :(before "End Three-Byte Opcodes Starting With f3 0f")
457 case 0x52: {
458 const uint8_t modrm = next();
459 const uint8_t dest = (modrm>>3)&0x7;
460 trace(Callstack_depth+1, "run") << "inverse square root of x/m32 into " << Xname[dest] << end();
461 const float* src = effective_address_float(modrm);
462 Xmm[dest] = 1.0 / sqrt(*src);
463 trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
464 break;
465 }
466
467 :(code)
468 float* effective_address_float(uint8_t modrm) {
469 const uint8_t mod = (modrm>>6);
470
471 const uint8_t rm = modrm & 0x7;
472 if (mod == 3) {
473
474 trace(Callstack_depth+1, "run") << "x/m32 is " << Xname[rm] << end();
475 return &Xmm[rm];
476 }
477 uint32_t addr = effective_address_number(modrm);
478 trace(Callstack_depth+1, "run") << "effective address contains " << read_mem_f32(addr) << end();
479 return mem_addr_f32(addr);
480 }
481
482
483
484 :(before "End Initialize Op Names")
485 put_new(Name_0f, "2f", "compare: set CF if x32 < xm32 (comiss)");
486
487 :(code)
488 void test_compare_x32_with_mem_at_rm32() {
489 Reg[EAX].i = 0x2000;
490 Xmm[3] = 0.5;
491 run(
492 "== code 0x1\n"
493
494 " 0f 2f 18 \n"
495
496 "== data 0x2000\n"
497 "00 00 00 00\n"
498 );
499 CHECK_TRACE_CONTENTS(
500 "run: compare XMM3 with x/m32\n"
501 "run: effective address is 0x00002000 (EAX)\n"
502 "run: SF=0; ZF=0; CF=0; OF=0\n"
503 );
504 }
505
506 :(before "End Two-Byte Opcodes Starting With 0f")
507 case 0x2f: {
508 const uint8_t modrm = next();
509 const uint8_t reg1 = (modrm>>3)&0x7;
510 trace(Callstack_depth+1, "run") << "compare " << Xname[reg1] << " with x/m32" << end();
511 const float* arg2 = effective_address_float(modrm);
512
513
514 SF = ZF = CF = OF = false;
515 if (Xmm[reg1] == *arg2) ZF = true;
516 if (Xmm[reg1] < *arg2) CF = true;
517 trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
518 break;
519 }