2
/*---------------------------------------------------------------*/
3
/*--- begin guest_generic_x87.c ---*/
4
/*---------------------------------------------------------------*/
7
This file is part of Valgrind, a dynamic binary instrumentation
10
Copyright (C) 2004-2011 OpenWorks LLP
13
This program is free software; you can redistribute it and/or
14
modify it under the terms of the GNU General Public License as
15
published by the Free Software Foundation; either version 2 of the
16
License, or (at your option) any later version.
18
This program is distributed in the hope that it will be useful, but
19
WITHOUT ANY WARRANTY; without even the implied warranty of
20
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
General Public License for more details.
23
You should have received a copy of the GNU General Public License
24
along with this program; if not, write to the Free Software
25
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28
The GNU General Public License is contained in the file COPYING.
30
Neither the names of the U.S. Department of Energy nor the
31
University of California nor the names of its contributors may be
32
used to endorse or promote products derived from this software
33
without prior written permission.
36
/* This file contains functions for doing some x87-specific
37
operations. Both the amd64 and x86 front ends (guests) indirectly
38
call these functions via guest helper calls. By putting them here,
39
code duplication is avoided. Some of these functions are tricky
40
and hard to verify, so there is much to be said for only having one
44
#include "libvex_basictypes.h"
46
#include "main_util.h"
47
#include "guest_generic_x87.h"
50
/* 80 and 64-bit floating point formats:
55
S 0 0X------X denormals
56
S 1-7FFE 1X------X normals (all normals have leading 1)
57
S 7FFF 10------0 infinity
61
S is the sign bit. For runs X----X, at least one of the Xs must be
62
nonzero. Exponent is 15 bits, fractional part is 63 bits, and
63
there is an explicitly represented leading 1, and a sign bit,
66
64-bit avoids the confusion of an explicitly represented leading 1
70
S 0 X------X denormals
72
S 7FF 0------0 infinity
76
Exponent is 11 bits, fractional part is 52 bits, and there is a
77
sign bit, giving 64 in total.
81
static inline UInt read_bit_array ( UChar* arr, UInt n )
83
UChar c = arr[n >> 3];
88
static inline void write_bit_array ( UChar* arr, UInt n, UInt b )
90
UChar c = arr[n >> 3];
91
c = toUChar( c & ~(1 << (n&7)) );
92
c = toUChar( c | ((b&1) << (n&7)) );
96
/* Convert an IEEE754 double (64-bit) into an x87 extended double
97
(80-bit), mimicing the hardware fairly closely. Both numbers are
98
stored little-endian. Limitations, all of which could be fixed,
99
given some level of hassle:
101
* Identity of NaNs is not preserved.
103
See comments in the code for more details.
105
void convert_f64le_to_f80le ( /*IN*/UChar* f64, /*OUT*/UChar* f80 )
108
Int bexp, i, j, shift;
111
sign = toUChar( (f64[7] >> 7) & 1 );
112
bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
115
mantissaIsZero = False;
116
if (bexp == 0 || bexp == 0x7FF) {
117
/* We'll need to know whether or not the mantissa (bits 51:0) is
118
all zeroes in order to handle these cases. So figure it
123
&& f64[5] == 0 && f64[4] == 0 && f64[3] == 0
124
&& f64[2] == 0 && f64[1] == 0 && f64[0] == 0
128
/* If the exponent is zero, either we have a zero or a denormal.
129
Produce a zero. This is a hack in that it forces denormals to
130
zero. Could do better. */
132
f80[9] = toUChar( sign << 7 );
133
f80[8] = f80[7] = f80[6] = f80[5] = f80[4]
134
= f80[3] = f80[2] = f80[1] = f80[0] = 0;
137
/* It really is zero, so that's all we can do. */
140
/* There is at least one 1-bit in the mantissa. So it's a
141
potentially denormalised double -- but we can produce a
142
normalised long double. Count the leading zeroes in the
143
mantissa so as to decide how much to bump the exponent down
144
by. Note, this is SLOW. */
146
for (i = 51; i >= 0; i--) {
147
if (read_bit_array(f64, i))
152
/* and copy into place as many bits as we can get our hands on. */
154
for (i = 51 - shift; i >= 0; i--) {
155
write_bit_array( f80, j,
156
read_bit_array( f64, i ) );
160
/* Set the exponent appropriately, and we're done. */
162
bexp += (16383 - 1023);
163
f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) );
164
f80[8] = toUChar( bexp & 0xFF );
168
/* If the exponent is 7FF, this is either an Infinity, a SNaN or
169
QNaN, as determined by examining bits 51:0, thus:
173
where at least one of the Xs is not zero.
176
if (mantissaIsZero) {
177
/* Produce an appropriately signed infinity:
178
S 1--1 (15) 1 0--0 (63)
180
f80[9] = toUChar( (sign << 7) | 0x7F );
183
f80[6] = f80[5] = f80[4] = f80[3]
184
= f80[2] = f80[1] = f80[0] = 0;
187
/* So it's either a QNaN or SNaN. Distinguish by considering
188
bit 51. Note, this destroys all the trailing bits
189
(identity?) of the NaN. IEEE754 doesn't require preserving
190
these (it only requires that there be one QNaN value and one
191
SNaN value), but x87 does seem to have some ability to
192
preserve them. Anyway, here, the NaN's identity is
193
destroyed. Could be improved. */
195
/* QNaN. Make a QNaN:
196
S 1--1 (15) 1 1--1 (63)
198
f80[9] = toUChar( (sign << 7) | 0x7F );
201
f80[6] = f80[5] = f80[4] = f80[3]
202
= f80[2] = f80[1] = f80[0] = 0xFF;
204
/* SNaN. Make a SNaN:
205
S 1--1 (15) 0 1--1 (63)
207
f80[9] = toUChar( (sign << 7) | 0x7F );
210
f80[6] = f80[5] = f80[4] = f80[3]
211
= f80[2] = f80[1] = f80[0] = 0xFF;
216
/* It's not a zero, denormal, infinity or nan. So it must be a
217
normalised number. Rebias the exponent and build the new
219
bexp += (16383 - 1023);
221
f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) );
222
f80[8] = toUChar( bexp & 0xFF );
223
f80[7] = toUChar( (1 << 7) | ((f64[6] << 3) & 0x78)
224
| ((f64[5] >> 5) & 7) );
225
f80[6] = toUChar( ((f64[5] << 3) & 0xF8) | ((f64[4] >> 5) & 7) );
226
f80[5] = toUChar( ((f64[4] << 3) & 0xF8) | ((f64[3] >> 5) & 7) );
227
f80[4] = toUChar( ((f64[3] << 3) & 0xF8) | ((f64[2] >> 5) & 7) );
228
f80[3] = toUChar( ((f64[2] << 3) & 0xF8) | ((f64[1] >> 5) & 7) );
229
f80[2] = toUChar( ((f64[1] << 3) & 0xF8) | ((f64[0] >> 5) & 7) );
230
f80[1] = toUChar( ((f64[0] << 3) & 0xF8) );
231
f80[0] = toUChar( 0 );
235
/* Convert an x87 extended double (80-bit) into an IEEE 754 double
236
(64-bit), mimicking the hardware fairly closely. Both numbers are
237
stored little-endian. Limitations, both of which could be fixed,
238
given some level of hassle:
240
* Rounding following truncation could be a bit better.
242
* Identity of NaNs is not preserved.
244
See comments in the code for more details.
246
void convert_f80le_to_f64le ( /*IN*/UChar* f80, /*OUT*/UChar* f64 )
252
sign = toUChar((f80[9] >> 7) & 1);
253
bexp = (((UInt)f80[9]) << 8) | (UInt)f80[8];
256
/* If the exponent is zero, either we have a zero or a denormal.
257
But an extended precision denormal becomes a double precision
258
zero, so in either case, just produce the appropriately signed
261
f64[7] = toUChar(sign << 7);
262
f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
266
/* If the exponent is 7FFF, this is either an Infinity, a SNaN or
267
QNaN, as determined by examining bits 62:0, thus:
271
where at least one of the Xs is not zero.
273
if (bexp == 0x7FFF) {
276
&& f80[6] == 0 && f80[5] == 0 && f80[4] == 0
277
&& f80[3] == 0 && f80[2] == 0 && f80[1] == 0
281
if (0 == (f80[7] & 0x80))
283
/* Produce an appropriately signed infinity:
284
S 1--1 (11) 0--0 (52)
286
f64[7] = toUChar((sign << 7) | 0x7F);
288
f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
291
/* So it's either a QNaN or SNaN. Distinguish by considering
292
bit 62. Note, this destroys all the trailing bits
293
(identity?) of the NaN. IEEE754 doesn't require preserving
294
these (it only requires that there be one QNaN value and one
295
SNaN value), but x87 does seem to have some ability to
296
preserve them. Anyway, here, the NaN's identity is
297
destroyed. Could be improved. */
299
/* QNaN. Make a QNaN:
300
S 1--1 (11) 1 1--1 (51)
302
f64[7] = toUChar((sign << 7) | 0x7F);
304
f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF;
306
/* SNaN. Make a SNaN:
307
S 1--1 (11) 0 1--1 (51)
309
f64[7] = toUChar((sign << 7) | 0x7F);
311
f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF;
316
/* If it's not a Zero, NaN or Inf, and the integer part (bit 62) is
317
zero, the x87 FPU appears to consider the number denormalised
318
and converts it to a QNaN. */
319
if (0 == (f80[7] & 0x80)) {
321
/* Strange hardware QNaN:
322
S 1--1 (11) 1 0--0 (51)
324
/* On a PIII, these QNaNs always appear with sign==1. I have
326
f64[7] = (1 /*sign*/ << 7) | 0x7F;
328
f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
332
/* It's not a zero, denormal, infinity or nan. So it must be a
333
normalised number. Rebias the exponent and consider. */
334
bexp -= (16383 - 1023);
336
/* It's too big for a double. Construct an infinity. */
337
f64[7] = toUChar((sign << 7) | 0x7F);
339
f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
344
/* It's too small for a normalised double. First construct a
345
zero and then see if it can be improved into a denormal. */
346
f64[7] = toUChar(sign << 7);
347
f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
350
/* Too small even for a denormal. */
353
/* Ok, let's make a denormal. Note, this is SLOW. */
354
/* Copy bits 63, 62, 61, etc of the src mantissa into the dst,
355
indexes 52+bexp, 51+bexp, etc, until k+bexp < 0. */
356
/* bexp is in range -52 .. 0 inclusive */
357
for (i = 63; i >= 0; i--) {
360
/* We shouldn't really call vassert from generated code. */
361
vassert(j >= 0 && j < 52);
362
write_bit_array ( f64,
364
read_bit_array ( f80, i ) );
366
/* and now we might have to round ... */
367
if (read_bit_array(f80, 10+1 - bexp) == 1)
373
/* Ok, it's a normalised number which is representable as a double.
374
Copy the exponent and mantissa into place. */
376
for (i = 0; i < 52; i++)
377
write_bit_array ( f64,
379
read_bit_array ( f80, i+11 ) );
381
f64[0] = toUChar( (f80[1] >> 3) | (f80[2] << 5) );
382
f64[1] = toUChar( (f80[2] >> 3) | (f80[3] << 5) );
383
f64[2] = toUChar( (f80[3] >> 3) | (f80[4] << 5) );
384
f64[3] = toUChar( (f80[4] >> 3) | (f80[5] << 5) );
385
f64[4] = toUChar( (f80[5] >> 3) | (f80[6] << 5) );
386
f64[5] = toUChar( (f80[6] >> 3) | (f80[7] << 5) );
388
f64[6] = toUChar( ((bexp << 4) & 0xF0) | ((f80[7] >> 3) & 0x0F) );
390
f64[7] = toUChar( (sign << 7) | ((bexp >> 4) & 0x7F) );
392
/* Now consider any rounding that needs to happen as a result of
393
truncating the mantissa. */
394
if (f80[1] & 4) /* read_bit_array(f80, 10) == 1) */ {
396
/* If the bottom bits of f80 are "100 0000 0000", then the
397
infinitely precise value is deemed to be mid-way between the
398
two closest representable values. Since we're doing
399
round-to-nearest (the default mode), in that case it is the
400
bit immediately above which indicates whether we should round
401
upwards or not -- if 0, we don't. All that is encapsulated
402
in the following simple test. */
403
if ((f80[1] & 0xF) == 4/*0100b*/ && f80[0] == 0)
407
/* Round upwards. This is a kludge. Once in every 2^24
408
roundings (statistically) the bottom three bytes are all 0xFF
409
and so we don't round at all. Could be improved. */
410
if (f64[0] != 0xFF) {
414
if (f64[0] == 0xFF && f64[1] != 0xFF) {
419
if (f64[0] == 0xFF && f64[1] == 0xFF && f64[2] != 0xFF) {
424
/* else we don't round, but we should. */
429
/* CALLED FROM GENERATED CODE: CLEAN HELPER */
430
/* Extract the signed significand or exponent component as per
431
fxtract. Arg and result are doubles travelling under the guise of
432
ULongs. Returns significand when getExp is zero and exponent
434
ULong x86amd64g_calculate_FXTRACT ( ULong arg, HWord getExp )
442
S 7FF 0------0 infinity
446
const ULong posInf = 0x7FF0000000000000ULL;
447
const ULong negInf = 0xFFF0000000000000ULL;
448
const ULong nanMask = 0x7FF0000000000000ULL;
449
const ULong qNan = 0x7FF8000000000000ULL;
450
const ULong posZero = 0x0000000000000000ULL;
451
const ULong negZero = 0x8000000000000000ULL;
452
const ULong bit51 = 1ULL << 51;
453
const ULong bit52 = 1ULL << 52;
454
const ULong sigMask = bit52 - 1;
456
/* Mimic Core i5 behaviour for special cases. */
458
return getExp ? posInf : posInf;
460
return getExp ? posInf : negInf;
461
if ((arg & nanMask) == nanMask)
462
return qNan | (arg & (1ULL << 63));
464
return getExp ? negInf : posZero;
466
return getExp ? negInf : negZero;
468
/* Split into sign, exponent and significand. */
469
sign = ((UInt)(arg >> 63)) & 1;
471
/* Mask off exponent & sign. uSig is in range 0 .. 2^52-1. */
472
uSig = arg & sigMask;
474
/* Get the exponent. */
475
sExp = ((Int)(arg >> 52)) & 0x7FF;
477
/* Deal with denormals: if the exponent is zero, then the
478
significand cannot possibly be zero (negZero/posZero are handled
479
above). Shift the significand left until bit 51 of it becomes
480
1, and decrease the exponent accordingly.
483
for (i = 0; i < 52; i++) {
491
/* Add the implied leading-1 in the significand. */
495
/* Roll in the sign. */
497
/* if (sign) sSig =- sSig; */
499
/* Convert sig into a double. This should be an exact conversion.
500
Then divide by 2^52, which should give a value in the range 1.0
501
to 2.0-epsilon, at least for normalised args. */
502
/* dSig = (Double)sSig; */
503
/* dSig /= 67108864.0; */ /* 2^26 */
504
/* dSig /= 67108864.0; */ /* 2^26 */
506
uSig |= 0x3FF0000000000000ULL;
510
/* Convert exp into a double. Also an exact conversion. */
511
/* dExp = (Double)(sExp - 1023); */
516
uExp = sExp < 0 ? -sExp : sExp;
518
/* 1 <= uExp <= 1074 */
519
/* Skip first 42 iterations of normalisation loop as we know they
520
will always happen */
523
for (i = 0; i < 52-42; i++) {
530
uExp |= ((ULong)expExp) << 52;
531
if (sExp < 0) uExp ^= negZero;
534
return getExp ? uExp : uSig;
539
/*---------------------------------------------------------*/
540
/*--- SSE4.2 PCMP{E,I}STR{I,M} helpers ---*/
541
/*---------------------------------------------------------*/
543
/* We need the definitions for OSZACP eflags/rflags offsets.
544
#including guest_{amd64,x86}_defs.h causes chaos, so just copy the
545
required values directly. They are not going to change in the
546
foreseeable future :-)
556
#define MASK_O (1 << SHIFT_O)
557
#define MASK_S (1 << SHIFT_S)
558
#define MASK_Z (1 << SHIFT_Z)
559
#define MASK_A (1 << SHIFT_A)
560
#define MASK_C (1 << SHIFT_C)
561
#define MASK_P (1 << SHIFT_P)
564
/* Count leading zeroes, w/ 0-produces-32 semantics, a la Hacker's
566
static UInt clz32 ( UInt x )
590
static UInt ctz32 ( UInt x )
592
return 32 - clz32((~x) & (x-1));
595
/* Convert a 4-bit value to a 32-bit value by cloning each bit 8
596
times. There's surely a better way to do this, but I don't know
598
static UInt bits4_to_bytes4 ( UInt bits4 )
601
r |= (bits4 & 1) ? 0x000000FF : 0;
602
r |= (bits4 & 2) ? 0x0000FF00 : 0;
603
r |= (bits4 & 4) ? 0x00FF0000 : 0;
604
r |= (bits4 & 8) ? 0xFF000000 : 0;
609
/* Given partial results from a pcmpXstrX operation (intRes1,
610
basically), generate an I- or M-format output value, also the new
613
void compute_PCMPxSTRx_gen_output (/*OUT*/V128* resV,
614
/*OUT*/UInt* resOSZACP,
616
UInt zmaskL, UInt zmaskR,
621
vassert((pol >> 2) == 0);
622
vassert((idx >> 1) == 0);
626
case 0: intRes2 = intRes1; break; // pol +
627
case 1: intRes2 = ~intRes1; break; // pol -
628
case 2: intRes2 = intRes1; break; // pol m+
629
case 3: intRes2 = intRes1 ^ validL; break; // pol m-
635
// generate M-format output (a bit or byte mask in XMM0)
637
resV->w32[0] = bits4_to_bytes4( (intRes2 >> 0) & 0xF );
638
resV->w32[1] = bits4_to_bytes4( (intRes2 >> 4) & 0xF );
639
resV->w32[2] = bits4_to_bytes4( (intRes2 >> 8) & 0xF );
640
resV->w32[3] = bits4_to_bytes4( (intRes2 >> 12) & 0xF );
642
resV->w32[0] = intRes2 & 0xFFFF;
650
// generate I-format output (an index in ECX)
651
// generate ecx value
655
newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2));
658
newECX = intRes2 == 0 ? 16 : ctz32(intRes2);
661
resV->w32[0] = newECX;
668
// generate new flags, common to all ISTRI and ISTRM cases
669
*resOSZACP // A, P are zero
670
= ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
671
| ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
672
| ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0
673
| ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0]
677
/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
680
For xSTRI variants, the new ECX value is placed in the 32 bits
681
pointed to by *resV, and the top 96 bits are zeroed. For xSTRM
682
variants, the result is a 128 bit value and is placed at *resV in
685
For all variants, the new OSZACP value is placed at *resOSZACP.
687
argLV and argRV are the vector args. The caller must prepare a
688
16-bit mask for each, zmaskL and zmaskR. For ISTRx variants this
689
must be 1 for each zero byte of of the respective arg. For ESTRx
690
variants this is derived from the explicit length indication, and
691
must be 0 in all places except at the bit index corresponding to
692
the valid length (0 .. 16). If the valid length is 16 then the
693
mask must be all zeroes. In all cases, bits 31:16 must be zero.
695
imm8 is the original immediate from the instruction. isSTRM
696
indicates whether this is a xSTRM or xSTRI variant, which controls
697
how much of *res is written.
699
If the given imm8 case can be handled, the return value is True.
700
If not, False is returned, and neither *res not *resOSZACP are
704
Bool compute_PCMPxSTRx ( /*OUT*/V128* resV,
705
/*OUT*/UInt* resOSZACP,
706
V128* argLV, V128* argRV,
707
UInt zmaskL, UInt zmaskR,
708
UInt imm8, Bool isxSTRM )
710
vassert(imm8 < 0x80);
711
vassert((zmaskL >> 16) == 0);
712
vassert((zmaskR >> 16) == 0);
714
/* Explicitly reject any imm8 values that haven't been validated,
715
even if they would probably work. Life is too short to have
716
unvalidated cases in the code base. */
719
case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x12:
720
case 0x1A: case 0x38: case 0x3A: case 0x44: case 0x4A:
726
UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format
727
UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn
728
UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity
729
UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask
731
/*----------------------------------------*/
732
/*-- strcmp on byte data --*/
733
/*----------------------------------------*/
735
if (agg == 2/*equal each, aka strcmp*/
736
&& (fmt == 0/*ub*/ || fmt == 2/*sb*/)) {
738
UChar* argL = (UChar*)argLV;
739
UChar* argR = (UChar*)argRV;
741
for (i = 15; i >= 0; i--) {
744
boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
746
UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
747
UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
749
// do invalidation, common to all equal-each cases
751
= (boolResII & validL & validR) // if both valid, use cmpres
752
| (~ (validL | validR)); // if both invalid, force 1
756
// generate I-format output
757
compute_PCMPxSTRx_gen_output(
759
intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
765
/*----------------------------------------*/
766
/*-- set membership on byte data --*/
767
/*----------------------------------------*/
769
if (agg == 0/*equal any, aka find chars in a set*/
770
&& (fmt == 0/*ub*/ || fmt == 2/*sb*/)) {
771
/* argL: the string, argR: charset */
773
UChar* argL = (UChar*)argLV;
774
UChar* argR = (UChar*)argRV;
776
UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
777
UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
779
for (si = 0; si < 16; si++) {
780
if ((validL & (1 << si)) == 0)
781
// run off the end of the string.
784
for (ci = 0; ci < 16; ci++) {
785
if ((validR & (1 << ci)) == 0) break;
786
if (argR[ci] == argL[si]) { m = 1; break; }
788
boolRes |= (m << si);
791
// boolRes is "pre-invalidated"
792
UInt intRes1 = boolRes & 0xFFFF;
794
// generate I-format output
795
compute_PCMPxSTRx_gen_output(
797
intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
803
/*----------------------------------------*/
804
/*-- substring search on byte data --*/
805
/*----------------------------------------*/
807
if (agg == 3/*equal ordered, aka substring search*/
808
&& (fmt == 0/*ub*/ || fmt == 2/*sb*/)) {
810
/* argL: haystack, argR: needle */
812
UChar* argL = (UChar*)argLV;
813
UChar* argR = (UChar*)argRV;
815
UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
816
UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
817
for (hi = 0; hi < 16; hi++) {
818
if ((validL & (1 << hi)) == 0)
819
// run off the end of the haystack
822
for (ni = 0; ni < 16; ni++) {
823
if ((validR & (1 << ni)) == 0) break;
826
if (argL[i] != argR[ni]) { m = 0; break; }
828
boolRes |= (m << hi);
831
// boolRes is "pre-invalidated"
832
UInt intRes1 = boolRes & 0xFFFF;
834
// generate I-format output
835
compute_PCMPxSTRx_gen_output(
837
intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
843
/*----------------------------------------*/
844
/*-- ranges, unsigned byte data --*/
845
/*----------------------------------------*/
847
if (agg == 1/*ranges*/
850
/* argL: string, argR: range-pairs */
852
UChar* argL = (UChar*)argLV;
853
UChar* argR = (UChar*)argRV;
855
UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
856
UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
857
for (si = 0; si < 16; si++) {
858
if ((validL & (1 << si)) == 0)
859
// run off the end of the string
862
for (ri = 0; ri < 16; ri += 2) {
863
if ((validR & (3 << ri)) != (3 << ri)) break;
864
if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
868
boolRes |= (m << si);
871
// boolRes is "pre-invalidated"
872
UInt intRes1 = boolRes & 0xFFFF;
874
// generate I-format output
875
compute_PCMPxSTRx_gen_output(
877
intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
887
/*---------------------------------------------------------------*/
888
/*--- end guest_generic_x87.c ---*/
889
/*---------------------------------------------------------------*/