about summary refs log tree commit diff stats
path: root/056recipe_header.cc
Commit message (Expand)AuthorAgeFilesLines
* 2473 - bad idea to use /raw with multiple intentionsKartik K. Agaram2015-11-221-1/+1
* 2460 - headers for remaining recipesKartik K. Agaram2015-11-181-3/+1
* 2419Kartik K. Agaram2015-11-101-48/+48
* 2417 - support mutable ingredients in headersKartik K. Agaram2015-11-101-31/+74
* 2413 - another backfilled test for 2391Kartik K. Agaram2015-11-091-0/+8
* 2407 - bugfix: parsing recipe headersKartik K. Agaram2015-11-091-2/+26
* 2406Kartik K. Agaram2015-11-081-4/+2
* 2404 - ah, finally a useful assertionKartik K. Agaram2015-11-081-7/+16
* 2400 - eliminate last few warningsKartik K. Agaram2015-11-081-4/+7
* 2399 - consistent debug_string vocabularyKartik K. Agaram2015-11-081-2/+2
* 2383 - new concern: idempotence of transformsKartik K. Agaram2015-11-061-2/+2
* 2382Kartik K. Agaram2015-11-061-0/+2
* 2377 - stop using operator[] in mapKartik K. Agaram2015-11-061-10/+10
* 2370 - layers 1-4 of edit are backKartik K. Agaram2015-11-051-4/+13
* 2360Kartik K. Agaram2015-11-041-4/+4
* 2358 - starting to tackle the phase ordering problemKartik K. Agaram2015-11-041-5/+5
* 2355Kartik K. Agaram2015-11-041-0/+14
* 2336Kartik K. Agaram2015-10-311-2/+3
* 2335Kartik K. Agaram2015-10-311-1/+1
* 2334Kartik K. Agaram2015-10-311-0/+26
* 2333Kartik K. Agaram2015-10-311-0/+8
* 2328Kartik K. Agaram2015-10-301-3/+19
* 2322 - deduce types from recipe headerKartik K. Agaram2015-10-291-3/+47
* 2321 - more preparations for static dispatchKartik K. Agaram2015-10-291-4/+4
* 2318Kartik K. Agaram2015-10-291-1/+1
* 2316 - preparing for static dispatchKartik K. Agaram2015-10-291-1/+6
* 2312Kartik K. Agaram2015-10-291-1/+1
* 2310 - add some more tracingKartik K. Agaram2015-10-291-0/+4
* 2308 - auto-reply on fall-throughKartik K. Agaram2015-10-281-0/+30
* 2306 - recipe headersKartik K. Agaram2015-10-281-0/+111
' href='#n341'>341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519
//: floating-point operations

//:: copy

:(before "End Initialize Op Names")
put_new(Name_f3_0f, "10", "copy xm32 to x32 (movss)");
put_new(Name_f3_0f, "11", "copy x32 to xm32 (movss)");

:(code)
void test_copy_x32_to_x32() {
  Xmm[3] = 0.5;
  run(
      "== code 0x1\n"  // code segment
      // op     ModR/M  SIB   displacement  immediate
      "f3 0f 11 d8                                    \n"  // copy XMM3 to XMM0
      // ModR/M in binary: 11 (direct mode) 011 (src XMM3) 000 (dest XMM0)
  );
  CHECK_TRACE_CONTENTS(
      "run: copy XMM3 to x/m32\n"
      "run: x/m32 is XMM0\n"
      "run: storing 0.5\n"
  );
}

:(before "End Three-Byte Opcodes Starting With f3 0f")
case 0x10: {  // copy x/m32 to x32
  const uint8_t modrm = next();
  const uint8_t rdest = (modrm>>3)&0x7;
  trace(Callstack_depth+1, "run") << "copy x/m32 to " << Xname[rdest] << end();
  float* src = effective_address_float(modrm);
  Xmm[rdest] = *src;  // Write multiple elements of vector<uint8_t> at once. Assumes sizeof(float) == 4 on the host as well.
  trace(Callstack_depth+1, "run") << "storing " << Xmm[rdest] << end();
  break;
}
case 0x11: {  // copy x32 to x/m32
  const uint8_t modrm = next();
  const uint8_t rsrc = (modrm>>3)&0x7;
  trace(Callstack_depth+1, "run") << "copy " << Xname[rsrc] << " to x/m32" << end();
  float* dest = effective_address_float(modrm);
  *dest = Xmm[rsrc];  // Write multiple elements of vector<uint8_t> at once. Assumes sizeof(float) == 4 on the host as well.
  trace(Callstack_depth+1, "run") << "storing " << *dest << end();
  break;
}

:(code)
void test_copy_x32_to_mem_at_xm32() {
  Xmm[3] = 0.5;
  Reg[EAX].i = 0x60;
  run(
      "== code 0x1\n"
      // op     ModR/M  SIB   displacement  immediate
      "f3 0f 11 18                                    \n"  // copy XMM3 to *EAX
      // ModR/M in binary: 00 (indirect mode) 011 (src XMM3) 000 (dest EAX)
  );
  CHECK_TRACE_CONTENTS(
      "run: copy XMM3 to x/m32\n"
      "run: effective address is 0x00000060 (EAX)\n"
      "run: storing 0.5\n"
  );
}

void test_copy_mem_at_xm32_to_x32() {
  Reg[EAX].i = 0x2000;
  run(
      "== code 0x1\n"
      // op     ModR/M  SIB   displacement  immediate
      "f3 0f 10 18                                    \n"  // copy *EAX to XMM3
      "== data 0x2000\n"
      "00 00 00 3f\n"  // 0x3f000000 = 0.5
  );
  CHECK_TRACE_CONTENTS(
      "run: copy x/m32 to XMM3\n"
      "run: effective address is 0x00002000 (EAX)\n"
      "run: storing 0.5\n"
  );
}

//:: convert to floating point

:(before "End Initialize Op Names")
put_new(Name_f3_0f, "2a", "convert integer to floating-point (cvtsi2ss)");

:(code)
void test_cvtsi2ss() {
  Reg[EAX].i = 10;
  run(
      "== code 0x1\n"
      // op     ModR/M  SIB   displacement  immediate
      "f3 0f 2a c0                                    \n"
      // ModR/M in binary: 11 (direct mode) 000 (XMM0) 000 (EAX)
  );
  CHECK_TRACE_CONTENTS(
      "run: convert r/m32 to XMM0\n"
      "run: r/m32 is EAX\n"
      "run: XMM0 is now 10\n"
  );
}

:(before "End Three-Byte Opcodes Starting With f3 0f")
case 0x2a: {  // convert integer to float
  const uint8_t modrm = next();
  const uint8_t dest = (modrm>>3)&0x7;
  trace(Callstack_depth+1, "run") << "convert r/m32 to " << Xname[dest] << end();
  const int32_t* src = effective_address(modrm);
  Xmm[dest] = *src;
  trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
  break;
}

//:: convert floating point to int

:(before "End Initialize Op Names")
put_new(Name_f3_0f, "2d", "convert floating-point to int (cvtss2si)");
put_new(Name_f3_0f, "2c", "truncate floating-point to int (cvttss2si)");

:(code)
void test_cvtss2si() {
  Xmm[0] = 9.8;
  run(
      "== code 0x1\n"
      // op     ModR/M  SIB   displacement  immediate
      "f3 0f 2d c0                                    \n"
      // ModR/M in binary: 11 (direct mode) 000 (EAX) 000 (XMM0)
  );
  CHECK_TRACE_CONTENTS(
      "run: convert x/m32 to EAX\n"
      "run: x/m32 is XMM0\n"
      "run: EAX is now 0x0000000a\n"
  );
}

:(before "End Three-Byte Opcodes Starting With f3 0f")
case 0x2d: {  // convert float to integer
  const uint8_t modrm = next();
  const uint8_t dest = (modrm>>3)&0x7;
  trace(Callstack_depth+1, "run") << "convert x/m32 to " << rname(dest) << end();
  const float* src = effective_address_float(modrm);
  Reg[dest].i = round(*src);
  trace(Callstack_depth+1, "run") << rname(dest) << " is now 0x" << HEXWORD << Reg[dest].i << end();
  break;
}

:(code)
void test_cvttss2si() {
  Xmm[0] = 9.8;
  run(
      "== code 0x1\n"
      // op     ModR/M  SIB   displacement  immediate
      "f3 0f 2c c0                                    \n"
      // ModR/M in binary: 11 (direct mode) 000 (EAX) 000 (XMM0)
  );
  CHECK_TRACE_CONTENTS(
      "run: truncate x/m32 to EAX\n"
      "run: x/m32 is XMM0\n"
      "run: EAX is now 0x00000009\n"
  );
}

:(before "End Three-Byte Opcodes Starting With f3 0f")
case 0x2c: {  // truncate float to integer
  const uint8_t modrm = next();
  const uint8_t dest = (modrm>>3)&0x7;
  trace(Callstack_depth+1, "run") << "truncate x/m32 to " << rname(dest) << end();
  const float* src = effective_address_float(modrm);
  Reg[dest].i = trunc(*src);
  trace(Callstack_depth+1, "run") << rname(dest) << " is now 0x" << HEXWORD << Reg[dest].i << end();
  break;
}

//:: add

:(before "End Initialize Op Names")
put_new(Name_f3_0f, "58", "add floats (addss)");

:(code)
void test_addss() {
  Xmm[0] = 3.0;
  Xmm[1] = 2.0;
  run(
      "== code 0x1\n"
      // op     ModR/M  SIB   displacement  immediate
      "f3 0f 58 c1                                    \n"
      // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
  );
  CHECK_TRACE_CONTENTS(
      "run: add x/m32 to XMM0\n"
      "run: x/m32 is XMM1\n"
      "run: XMM0 is now 5\n"
  );
}

:(before "End Three-Byte Opcodes Starting With f3 0f")
case 0x58: {  // add x/m32 to x32
  const uint8_t modrm = next();
  const uint8_t dest = (modrm>>3)&0x7;
  trace(Callstack_depth+1, "run") << "add x/m32 to " << Xname[dest] << end();
  const float* src = effective_address_float(modrm);
  Xmm[dest] += *src;
  trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
  break;
}

//:: subtract

:(before "End Initialize Op Names")
put_new(Name_f3_0f, "5c", "subtract floats (subss)");

:(code)
void test_subss() {
  Xmm[0] = 3.0;
  Xmm[1] = 2.0;
  run(
      "== code 0x1\n"
      // op     ModR/M  SIB   displacement  immediate
      "f3 0f 5c c1                                    \n"
      // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
  );
  CHECK_TRACE_CONTENTS(
      "run: subtract x/m32 from XMM0\n"
      "run: x/m32 is XMM1\n"
      "run: XMM0 is now 1\n"
  );
}

:(before "End Three-Byte Opcodes Starting With f3 0f")
case 0x5c: {  // subtract x/m32 from x32
  const uint8_t modrm = next();
  const uint8_t dest = (modrm>>3)&0x7;
  trace(Callstack_depth+1, "run") << "subtract x/m32 from " << Xname[dest] << end();
  const float* src = effective_address_float(modrm);
  Xmm[dest] -= *src;
  trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
  break;
}

//:: multiply

:(before "End Initialize Op Names")
put_new(Name_f3_0f, "59", "multiply floats (mulss)");

:(code)
void test_mulss() {
  Xmm[0] = 3.0;
  Xmm[1] = 2.0;
  run(
      "== code 0x1\n"
      // op     ModR/M  SIB   displacement  immediate
      "f3 0f 59 c1                                    \n"
      // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
  );
  CHECK_TRACE_CONTENTS(
      "run: multiply XMM0 by x/m32\n"
      "run: x/m32 is XMM1\n"
      "run: XMM0 is now 6\n"
  );
}

:(before "End Three-Byte Opcodes Starting With f3 0f")
case 0x59: {  // multiply x32 by x/m32
  const uint8_t modrm = next();
  const uint8_t dest = (modrm>>3)&0x7;
  trace(Callstack_depth+1, "run") << "multiply " << Xname[dest] << " by x/m32" << end();
  const float* src = effective_address_float(modrm);
  Xmm[dest] *= *src;
  trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
  break;
}

//:: divide

:(before "End Initialize Op Names")
put_new(Name_f3_0f, "5e", "divide floats (divss)");

:(code)
void test_divss() {
  Xmm[0] = 3.0;
  Xmm[1] = 2.0;
  run(
      "== code 0x1\n"
      // op     ModR/M  SIB   displacement  immediate
      "f3 0f 5e c1                                    \n"
      // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
  );
  CHECK_TRACE_CONTENTS(
      "run: divide XMM0 by x/m32\n"
      "run: x/m32 is XMM1\n"
      "run: XMM0 is now 1.5\n"
  );
}

:(before "End Three-Byte Opcodes Starting With f3 0f")
case 0x5e: {  // divide x32 by x/m32
  const uint8_t modrm = next();
  const uint8_t dest = (modrm>>3)&0x7;
  trace(Callstack_depth+1, "run") << "divide " << Xname[dest] << " by x/m32" << end();
  const float* src = effective_address_float(modrm);
  Xmm[dest] /= *src;
  trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
  break;
}

//:: min

:(before "End Initialize Op Names")
put_new(Name_f3_0f, "5d", "minimum of two floats (minss)");

:(code)
void test_minss() {
  Xmm[0] = 3.0;
  Xmm[1] = 2.0;
  run(
      "== code 0x1\n"
      // op     ModR/M  SIB   displacement  immediate
      "f3 0f 5d c1                                    \n"
      // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
  );
  CHECK_TRACE_CONTENTS(
      "run: minimum of XMM0 and x/m32\n"
      "run: x/m32 is XMM1\n"
      "run: XMM0 is now 2\n"
  );
}

:(before "End Three-Byte Opcodes Starting With f3 0f")
case 0x5d: {  // minimum of x32, x/m32
  const uint8_t modrm = next();
  const uint8_t dest = (modrm>>3)&0x7;
  trace(Callstack_depth+1, "run") << "minimum of " << Xname[dest] << " and x/m32" << end();
  const float* src = effective_address_float(modrm);
  Xmm[dest] = min(Xmm[dest], *src);
  trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
  break;
}

//:: max

:(before "End Initialize Op Names")
put_new(Name_f3_0f, "5f", "maximum of two floats (maxss)");

:(code)
void test_maxss() {
  Xmm[0] = 3.0;
  Xmm[1] = 2.0;
  run(
      "== code 0x1\n"
      // op     ModR/M  SIB   displacement  immediate
      "f3 0f 5f c1                                    \n"
      // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
  );
  CHECK_TRACE_CONTENTS(
      "run: maximum of XMM0 and x/m32\n"
      "run: x/m32 is XMM1\n"
      "run: XMM0 is now 3\n"
  );
}

:(before "End Three-Byte Opcodes Starting With f3 0f")
case 0x5f: {  // maximum of x32, x/m32
  const uint8_t modrm = next();
  const uint8_t dest = (modrm>>3)&0x7;
  trace(Callstack_depth+1, "run") << "maximum of " << Xname[dest] << " and x/m32" << end();
  const float* src = effective_address_float(modrm);
  Xmm[dest] = max(Xmm[dest], *src);
  trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
  break;
}

//:: reciprocal

:(before "End Initialize Op Names")
put_new(Name_f3_0f, "53", "reciprocal of float (rcpss)");

:(code)
void test_rcpss() {
  Xmm[1] = 2.0;
  run(
      "== code 0x1\n"
      // op     ModR/M  SIB   displacement  immediate
      "f3 0f 53 c1                                    \n"
      // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
  );
  CHECK_TRACE_CONTENTS(
      "run: reciprocal of x/m32 into XMM0\n"
      "run: x/m32 is XMM1\n"
      "run: XMM0 is now 0.5\n"
  );
}

:(before "End Three-Byte Opcodes Starting With f3 0f")
case 0x53: {  // reciprocal of x/m32 into x32
  const uint8_t modrm = next();
  const uint8_t dest = (modrm>>3)&0x7;
  trace(Callstack_depth+1, "run") << "reciprocal of x/m32 into " << Xname[dest] << end();
  const float* src = effective_address_float(modrm);
  Xmm[dest] = 1.0 / *src;
  trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
  break;
}

//:: square root

:(before "End Initialize Op Names")
put_new(Name_f3_0f, "51", "square root of float (sqrtss)");

:(code)
void test_sqrtss() {
  Xmm[1] = 2.0;
  run(
      "== code 0x1\n"
      // op     ModR/M  SIB   displacement  immediate
      "f3 0f 51 c1                                    \n"
      // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
  );
  CHECK_TRACE_CONTENTS(
      "run: square root of x/m32 into XMM0\n"
      "run: x/m32 is XMM1\n"
      "run: XMM0 is now 1.41421\n"
  );
}

:(before "End Three-Byte Opcodes Starting With f3 0f")
case 0x51: {  // square root of x/m32 into x32
  const uint8_t modrm = next();
  const uint8_t dest = (modrm>>3)&0x7;
  trace(Callstack_depth+1, "run") << "square root of x/m32 into " << Xname[dest] << end();
  const float* src = effective_address_float(modrm);
  Xmm[dest] = sqrt(*src);
  trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
  break;
}

:(before "End Includes")
#include <math.h>

//:: inverse square root

:(before "End Initialize Op Names")
put_new(Name_f3_0f, "52", "inverse square root of float (rsqrtss)");

:(code)
void test_rsqrtss() {
  Xmm[1] = 0.01;
  run(
      "== code 0x1\n"
      // op     ModR/M  SIB   displacement  immediate
      "f3 0f 52 c1                                    \n"
      // ModR/M in binary: 11 (direct mode) 000 (XMM0) 001 (XMM1)
  );
  CHECK_TRACE_CONTENTS(
      "run: inverse square root of x/m32 into XMM0\n"
      "run: x/m32 is XMM1\n"
      "run: XMM0 is now 10\n"
  );
}

:(before "End Three-Byte Opcodes Starting With f3 0f")
case 0x52: {  // inverse square root of x/m32 into x32
  const uint8_t modrm = next();
  const uint8_t dest = (modrm>>3)&0x7;
  trace(Callstack_depth+1, "run") << "inverse square root of x/m32 into " << Xname[dest] << end();
  const float* src = effective_address_float(modrm);
  Xmm[dest] = 1.0 / sqrt(*src);
  trace(Callstack_depth+1, "run") << Xname[dest] << " is now " << Xmm[dest] << end();
  break;
}

:(code)
float* effective_address_float(uint8_t modrm) {
  const uint8_t mod = (modrm>>6);
  // ignore middle 3 'reg opcode' bits
  const uint8_t rm = modrm & 0x7;
  if (mod == 3) {
    // mod 3 is just register direct addressing
    trace(Callstack_depth+1, "run") << "x/m32 is " << Xname[rm] << end();
    return &Xmm[rm];
  }
  uint32_t addr = effective_address_number(modrm);
  trace(Callstack_depth+1, "run") << "effective address contains " << read_mem_f32(addr) << end();
  return mem_addr_f32(addr);
}

//: compare

:(before "End Initialize Op Names")
put_new(Name_0f, "2f", "compare: set CF if x32 < xm32 (comiss)");

:(code)
void test_compare_x32_with_mem_at_rm32() {
  Reg[EAX].i = 0x2000;
  Xmm[3] = 0.5;
  run(
      "== code 0x1\n"
      // op     ModR/M  SIB   displacement  immediate
      "  0f 2f  18                                    \n"  // compare XMM3 with *EAX
      // ModR/M in binary: 00 (indirect mode) 011 (lhs XMM3) 000 (rhs EAX)
      "== data 0x2000\n"
      "00 00 00 00\n"  // 0x00000000 = 0.0
  );
  CHECK_TRACE_CONTENTS(
      "run: compare XMM3 with x/m32\n"
      "run: effective address is 0x00002000 (EAX)\n"
      "run: SF=0; ZF=0; CF=0; OF=0\n"
  );
}

:(before "End Two-Byte Opcodes Starting With 0f")
case 0x2f: {  // set CF if x32 < x/m32
  const uint8_t modrm = next();
  const uint8_t reg1 = (modrm>>3)&0x7;
  trace(Callstack_depth+1, "run") << "compare " << Xname[reg1] << " with x/m32" << end();
  const float* arg2 = effective_address_float(modrm);
  // Flag settings carefully copied from the Intel manual.
  // See also https://stackoverflow.com/questions/7057501/x86-assembler-floating-point-compare/7057771#7057771
  SF = ZF = CF = OF = false;
  if (Xmm[reg1] == *arg2) ZF = true;
  if (Xmm[reg1] < *arg2) CF = true;
  trace(Callstack_depth+1, "run") << "SF=" << SF << "; ZF=" << ZF << "; CF=" << CF << "; OF=" << OF << end();
  break;
}