7160
7193
storeLE( mkexpr(addr), mkexpr(dest2) );
7201
ULong dis_cmpxchg8b ( /*OUT*/Bool* ok,
7209
IRType ty = szToITy(sz);
7210
IRTemp eq = newTemp(Ity_I8);
7211
IRTemp olda = newTemp(ty);
7212
IRTemp oldb = newTemp(ty);
7213
IRTemp oldc = newTemp(ty);
7214
IRTemp oldd = newTemp(ty);
7215
IRTemp newa = newTemp(Ity_I64);
7216
IRTemp newd = newTemp(Ity_I64);
7217
IRTemp oldml = newTemp(ty);
7218
IRTemp oldmh = newTemp(ty);
7219
IRTemp newml = newTemp(ty);
7220
IRTemp newmh = newTemp(ty);
7221
IRTemp addr = IRTemp_INVALID;
7222
IRTemp oldrf = newTemp(Ity_I64);
7223
IRTemp newrf = newTemp(Ity_I64);
7224
UChar rm = getUChar(delta0);
7225
vassert(sz == 4 || sz == 8); /* guaranteed by caller */
7227
if (epartIsReg(rm)) {
7232
addr = disAMode ( &len, pfx, delta0, dis_buf, 0 );
7234
DIP("cmpxchg%s %s\n", sz == 4 ? "8" : "16", dis_buf);
7237
assign( olda, getIReg32( R_RAX ) );
7238
assign( oldb, getIReg32( R_RBX ) );
7239
assign( oldc, getIReg32( R_RCX ) );
7240
assign( oldd, getIReg32( R_RDX ) );
7241
assign( oldml, loadLE( Ity_I32, mkexpr(addr) ));
7242
assign( oldmh, loadLE( Ity_I32,
7243
binop(Iop_Add64,mkexpr(addr),mkU64(4)) ));
7248
binop(Iop_Xor32,mkexpr(olda),mkexpr(oldml)),
7249
binop(Iop_Xor32,mkexpr(oldd),mkexpr(oldmh))),
7251
assign( newml, IRExpr_Mux0X(mkexpr(eq),mkexpr(oldml),mkexpr(oldb)) );
7252
assign( newmh, IRExpr_Mux0X(mkexpr(eq),mkexpr(oldmh),mkexpr(oldc)) );
7253
assign( newa, IRExpr_Mux0X(mkexpr(eq),
7254
unop(Iop_32Uto64,mkexpr(oldml)),
7256
assign( newd, IRExpr_Mux0X(mkexpr(eq),
7257
unop(Iop_32Uto64,mkexpr(oldmh)),
7260
storeLE( mkexpr(addr), mkexpr(newml) );
7261
storeLE( binop(Iop_Add64,mkexpr(addr),mkU64(4)),
7263
putIRegRAX( 8, mkexpr(newa) );
7264
putIRegRDX( 8, mkexpr(newd) );
7266
assign( olda, getIReg64( R_RAX ) );
7267
assign( oldb, getIReg64( R_RBX ) );
7268
assign( oldc, getIReg64( R_RCX ) );
7269
assign( oldd, getIReg64( R_RDX ) );
7270
assign( oldml, loadLE( Ity_I64, mkexpr(addr) ));
7271
assign( oldmh, loadLE( Ity_I64,
7272
binop(Iop_Add64,mkexpr(addr),mkU64(8)) ));
7277
binop(Iop_Xor64,mkexpr(olda),mkexpr(oldml)),
7278
binop(Iop_Xor64,mkexpr(oldd),mkexpr(oldmh))),
7280
assign( newml, IRExpr_Mux0X(mkexpr(eq),mkexpr(oldml),mkexpr(oldb)) );
7281
assign( newmh, IRExpr_Mux0X(mkexpr(eq),mkexpr(oldmh),mkexpr(oldc)) );
7282
assign( newa, IRExpr_Mux0X(mkexpr(eq),mkexpr(oldml),mkexpr(olda)) );
7283
assign( newd, IRExpr_Mux0X(mkexpr(eq),mkexpr(oldmh),mkexpr(oldd)) );
7285
storeLE( mkexpr(addr), mkexpr(newml) );
7286
storeLE( binop(Iop_Add64,mkexpr(addr),mkU64(8)),
7288
putIRegRAX( 8, mkexpr(newa) );
7289
putIRegRDX( 8, mkexpr(newd) );
7292
/* And set the flags. Z is set if original d:a == mem, else
7293
cleared. All others unchanged. (This is different from normal
7294
cmpxchg which just sets them according to SUB.). */
7295
assign( oldrf, binop(Iop_And64,
7296
mk_amd64g_calculate_rflags_all(),
7297
mkU64(~AMD64G_CC_MASK_Z)) );
7302
binop(Iop_And64, unop(Iop_8Uto64, mkexpr(eq)), mkU64(1)),
7303
mkU8(AMD64G_CC_SHIFT_Z))
7305
stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
7306
stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
7307
stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(newrf) ));
7308
/* Set NDEP even though it isn't used. This makes redundant-PUT
7309
elimination of previous stores to this field work better. */
7310
stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
7167
7316
//.. //-- static
7168
7317
//.. //-- Addr dis_cmpxchg8b ( UCodeBlock* cb,
11111
11265
goto decode_success;
11268
/* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
11269
from E(xmm or mem) to G(xmm) */
11270
if (have66noF2noF3(pfx) && sz == 2
11271
&& insn[0] == 0x0F && insn[1] == 0xF6) {
11272
IRTemp s1V = newTemp(Ity_V128);
11273
IRTemp s2V = newTemp(Ity_V128);
11274
IRTemp dV = newTemp(Ity_V128);
11275
IRTemp s1Hi = newTemp(Ity_I64);
11276
IRTemp s1Lo = newTemp(Ity_I64);
11277
IRTemp s2Hi = newTemp(Ity_I64);
11278
IRTemp s2Lo = newTemp(Ity_I64);
11279
IRTemp dHi = newTemp(Ity_I64);
11280
IRTemp dLo = newTemp(Ity_I64);
11282
if (epartIsReg(modrm)) {
11283
assign( s1V, getXMMReg(eregOfRexRM(pfx,modrm)) );
11285
DIP("psadbw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11286
nameXMMReg(gregOfRexRM(pfx,modrm)));
11288
addr = disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
11289
assign( s1V, loadLE(Ity_V128, mkexpr(addr)) );
11291
DIP("psadbw %s,%s\n", dis_buf,
11292
nameXMMReg(gregOfRexRM(pfx,modrm)));
11294
assign( s2V, getXMMReg(gregOfRexRM(pfx,modrm)) );
11295
assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) );
11296
assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) );
11297
assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) );
11298
assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) );
11299
assign( dHi, mkIRExprCCall(
11300
Ity_I64, 0/*regparms*/,
11301
"amd64g_calculate_mmx_psadbw",
11302
&amd64g_calculate_mmx_psadbw,
11303
mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi))
11305
assign( dLo, mkIRExprCCall(
11306
Ity_I64, 0/*regparms*/,
11307
"amd64g_calculate_mmx_psadbw",
11308
&amd64g_calculate_mmx_psadbw,
11309
mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo))
11311
assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ;
11312
putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV));
11313
goto decode_success;
11114
11316
/* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
11115
11317
if (have66noF2noF3(pfx) && sz == 2
11116
11318
&& insn[0] == 0x0F && insn[1] == 0x70) {
11697
11899
goto decode_success;
11701
11902
/* ---------------------------------------------------- */
11702
11903
/* --- end of the SSE/SSE2 decoder. --- */
11703
11904
/* ---------------------------------------------------- */
11906
/* ---------------------------------------------------- */
11907
/* --- start of the SSE3 decoder. --- */
11908
/* ---------------------------------------------------- */
11910
/* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
11911
duplicating some lanes (2:2:0:0). */
11912
/* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
11913
duplicating some lanes (3:3:1:1). */
11914
if (haveF3no66noF2(pfx) && sz == 4
11915
&& insn[0] == 0x0F && (insn[1] == 0x12 || insn[1] == 0x16)) {
11916
IRTemp s3, s2, s1, s0;
11917
IRTemp sV = newTemp(Ity_V128);
11918
Bool isH = insn[1] == 0x16;
11919
s3 = s2 = s1 = s0 = IRTemp_INVALID;
11922
if (epartIsReg(modrm)) {
11923
assign( sV, getXMMReg( eregOfRexRM(pfx,modrm)) );
11924
DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
11925
nameXMMReg(eregOfRexRM(pfx,modrm)),
11926
nameXMMReg(gregOfRexRM(pfx,modrm)));
11929
addr = disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
11930
assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11931
DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
11933
nameXMMReg(gregOfRexRM(pfx,modrm)));
11937
breakup128to32s( sV, &s3, &s2, &s1, &s0 );
11938
putXMMReg( gregOfRexRM(pfx,modrm),
11939
isH ? mk128from32s( s3, s3, s1, s1 )
11940
: mk128from32s( s2, s2, s0, s0 ) );
11941
goto decode_success;
11944
/* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
11945
duplicating some lanes (0:1:0:1). */
11946
if (haveF2no66noF3(pfx) && sz == 4
11947
&& insn[0] == 0x0F && insn[1] == 0x12) {
11948
IRTemp sV = newTemp(Ity_V128);
11949
IRTemp d0 = newTemp(Ity_I64);
11952
if (epartIsReg(modrm)) {
11953
assign( sV, getXMMReg( eregOfRexRM(pfx,modrm)) );
11954
DIP("movddup %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11955
nameXMMReg(gregOfRexRM(pfx,modrm)));
11957
assign ( d0, unop(Iop_V128to64, mkexpr(sV)) );
11959
addr = disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
11960
assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
11961
DIP("movddup %s,%s\n", dis_buf,
11962
nameXMMReg(gregOfRexRM(pfx,modrm)));
11966
putXMMReg( gregOfRexRM(pfx,modrm),
11967
binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) );
11968
goto decode_success;
11971
/* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
11972
if (haveF2no66noF3(pfx) && sz == 4
11973
&& insn[0] == 0x0F && insn[1] == 0xD0) {
11974
IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
11975
IRTemp eV = newTemp(Ity_V128);
11976
IRTemp gV = newTemp(Ity_V128);
11977
IRTemp addV = newTemp(Ity_V128);
11978
IRTemp subV = newTemp(Ity_V128);
11979
a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11982
if (epartIsReg(modrm)) {
11983
assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) );
11984
DIP("addsubps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11985
nameXMMReg(gregOfRexRM(pfx,modrm)));
11988
addr = disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
11989
assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
11990
DIP("addsubps %s,%s\n", dis_buf,
11991
nameXMMReg(gregOfRexRM(pfx,modrm)));
11995
assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) );
11997
assign( addV, binop(Iop_Add32Fx4, mkexpr(gV), mkexpr(eV)) );
11998
assign( subV, binop(Iop_Sub32Fx4, mkexpr(gV), mkexpr(eV)) );
12000
breakup128to32s( addV, &a3, &a2, &a1, &a0 );
12001
breakup128to32s( subV, &s3, &s2, &s1, &s0 );
12003
putXMMReg( gregOfRexRM(pfx,modrm), mk128from32s( a3, s2, a1, s0 ));
12004
goto decode_success;
12007
/* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
12008
if (have66noF2noF3(pfx) && sz == 2
12009
&& insn[0] == 0x0F && insn[1] == 0xD0) {
12010
IRTemp eV = newTemp(Ity_V128);
12011
IRTemp gV = newTemp(Ity_V128);
12012
IRTemp addV = newTemp(Ity_V128);
12013
IRTemp subV = newTemp(Ity_V128);
12014
IRTemp a1 = newTemp(Ity_I64);
12015
IRTemp s0 = newTemp(Ity_I64);
12018
if (epartIsReg(modrm)) {
12019
assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) );
12020
DIP("addsubpd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12021
nameXMMReg(gregOfRexRM(pfx,modrm)));
12024
addr = disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
12025
assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
12026
DIP("addsubpd %s,%s\n", dis_buf,
12027
nameXMMReg(gregOfRexRM(pfx,modrm)));
12031
assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) );
12033
assign( addV, binop(Iop_Add64Fx2, mkexpr(gV), mkexpr(eV)) );
12034
assign( subV, binop(Iop_Sub64Fx2, mkexpr(gV), mkexpr(eV)) );
12036
assign( a1, unop(Iop_V128HIto64, mkexpr(addV) ));
12037
assign( s0, unop(Iop_V128to64, mkexpr(subV) ));
12039
putXMMReg( gregOfRexRM(pfx,modrm),
12040
binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) );
12041
goto decode_success;
12044
/* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
12045
/* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
12046
if (haveF2no66noF3(pfx) && sz == 4
12047
&& insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) {
12048
IRTemp e3, e2, e1, e0, g3, g2, g1, g0;
12049
IRTemp eV = newTemp(Ity_V128);
12050
IRTemp gV = newTemp(Ity_V128);
12051
IRTemp leftV = newTemp(Ity_V128);
12052
IRTemp rightV = newTemp(Ity_V128);
12053
Bool isAdd = insn[1] == 0x7C;
12054
HChar* str = isAdd ? "add" : "sub";
12055
e3 = e2 = e1 = e0 = g3 = g2 = g1 = g0 = IRTemp_INVALID;
12058
if (epartIsReg(modrm)) {
12059
assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) );
12060
DIP("h%sps %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
12061
nameXMMReg(gregOfRexRM(pfx,modrm)));
12064
addr = disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
12065
assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
12066
DIP("h%sps %s,%s\n", str, dis_buf,
12067
nameXMMReg(gregOfRexRM(pfx,modrm)));
12071
assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) );
12073
breakup128to32s( eV, &e3, &e2, &e1, &e0 );
12074
breakup128to32s( gV, &g3, &g2, &g1, &g0 );
12076
assign( leftV, mk128from32s( e2, e0, g2, g0 ) );
12077
assign( rightV, mk128from32s( e3, e1, g3, g1 ) );
12079
putXMMReg( gregOfRexRM(pfx,modrm),
12080
binop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,
12081
mkexpr(leftV), mkexpr(rightV) ) );
12082
goto decode_success;
12085
/* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
12086
/* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
12087
if (have66noF2noF3(pfx) && sz == 2
12088
&& insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) {
12089
IRTemp e1 = newTemp(Ity_I64);
12090
IRTemp e0 = newTemp(Ity_I64);
12091
IRTemp g1 = newTemp(Ity_I64);
12092
IRTemp g0 = newTemp(Ity_I64);
12093
IRTemp eV = newTemp(Ity_V128);
12094
IRTemp gV = newTemp(Ity_V128);
12095
IRTemp leftV = newTemp(Ity_V128);
12096
IRTemp rightV = newTemp(Ity_V128);
12097
Bool isAdd = insn[1] == 0x7C;
12098
HChar* str = isAdd ? "add" : "sub";
12101
if (epartIsReg(modrm)) {
12102
assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) );
12103
DIP("h%spd %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
12104
nameXMMReg(gregOfRexRM(pfx,modrm)));
12107
addr = disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
12108
assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
12109
DIP("h%spd %s,%s\n", str, dis_buf,
12110
nameXMMReg(gregOfRexRM(pfx,modrm)));
12114
assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) );
12116
assign( e1, unop(Iop_V128HIto64, mkexpr(eV) ));
12117
assign( e0, unop(Iop_V128to64, mkexpr(eV) ));
12118
assign( g1, unop(Iop_V128HIto64, mkexpr(gV) ));
12119
assign( g0, unop(Iop_V128to64, mkexpr(gV) ));
12121
assign( leftV, binop(Iop_64HLtoV128, mkexpr(e0),mkexpr(g0)) );
12122
assign( rightV, binop(Iop_64HLtoV128, mkexpr(e1),mkexpr(g1)) );
12124
putXMMReg( gregOfRexRM(pfx,modrm),
12125
binop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,
12126
mkexpr(leftV), mkexpr(rightV) ) );
12127
goto decode_success;
12130
/* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
12131
if (haveF2no66noF3(pfx) && sz == 4
12132
&& insn[0] == 0x0F && insn[1] == 0xF0) {
12134
if (epartIsReg(modrm)) {
12135
goto decode_failure;
12137
addr = disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
12138
putXMMReg( gregOfRexRM(pfx,modrm),
12139
loadLE(Ity_V128, mkexpr(addr)) );
12140
DIP("lddqu %s,%s\n", dis_buf,
12141
nameXMMReg(gregOfRexRM(pfx,modrm)));
12144
goto decode_success;
12147
/* ---------------------------------------------------- */
12148
/* --- end of the SSE3 decoder. --- */
12149
/* ---------------------------------------------------- */
11705
12151
/*after_sse_decoders:*/
11707
12153
/* Get the primary opcode. */