1
/*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==*
3
* The LLVM Compiler Infrastructure
5
* This file is distributed under the University of Illinois Open Source
6
* License. See LICENSE.TXT for details.
8
*===----------------------------------------------------------------------===*
10
* This file is part of the X86 Disassembler.
11
* It contains the implementation of the instruction decoder.
12
* Documentation for the disassembler can be found in X86Disassembler.h.
14
*===----------------------------------------------------------------------===*/
16
#include <assert.h> /* for assert() */
17
#include <stdarg.h> /* for va_*() */
18
#include <stdio.h> /* for vsnprintf() */
19
#include <stdlib.h> /* for exit() */
20
#include <string.h> /* for memset() */
22
#include "X86DisassemblerDecoder.h"
24
#include "X86GenDisassemblerTables.inc"
30
#define NORETURN __attribute__((noreturn))
35
#define unreachable(s) \
37
fprintf(stderr, "%s:%d: %s\n", __FILE__, __LINE__, s); \
42
* contextForAttrs - Client for the instruction context table. Takes a set of
43
* attributes and returns the appropriate decode context.
45
* @param attrMask - Attributes, from the enumeration attributeBits.
46
* @return - The InstructionContext to use when looking up an
47
* an instruction with these attributes.
49
static InstructionContext contextForAttrs(uint8_t attrMask) {
50
return CONTEXTS_SYM[attrMask];
54
* modRMRequired - Reads the appropriate instruction table to determine whether
55
* the ModR/M byte is required to decode a particular instruction.
57
* @param type - The opcode type (i.e., how many bytes it has).
58
* @param insnContext - The context for the instruction, as returned by
60
* @param opcode - The last byte of the instruction's opcode, not counting
61
* ModR/M extensions and escapes.
62
* @return - TRUE if the ModR/M byte is required, FALSE otherwise.
64
static int modRMRequired(OpcodeType type,
65
InstructionContext insnContext,
67
const struct ContextDecision* decision = 0;
71
decision = &ONEBYTE_SYM;
74
decision = &TWOBYTE_SYM;
77
decision = &THREEBYTE38_SYM;
80
decision = &THREEBYTE3A_SYM;
84
return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
85
modrm_type != MODRM_ONEENTRY;
87
unreachable("Unknown opcode type");
92
* decode - Reads the appropriate instruction table to obtain the unique ID of
95
* @param type - See modRMRequired().
96
* @param insnContext - See modRMRequired().
97
* @param opcode - See modRMRequired().
98
* @param modRM - The ModR/M byte if required, or any value if not.
100
static InstrUID decode(OpcodeType type,
101
InstructionContext insnContext,
104
struct ModRMDecision* dec;
108
unreachable("Unknown opcode type");
110
dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
113
dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
116
dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
119
dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
123
switch (dec->modrm_type) {
125
unreachable("Corrupt table! Unknown modrm_type");
127
return dec->instructionIDs[0];
129
if (modFromModRM(modRM) == 0x3)
130
return dec->instructionIDs[1];
132
return dec->instructionIDs[0];
134
return dec->instructionIDs[modRM];
141
* specifierForUID - Given a UID, returns the name and operand specification for
144
* @param uid - The unique ID for the instruction. This should be returned by
145
* decode(); specifierForUID will not check bounds.
146
* @return - A pointer to the specification for that instruction.
148
static struct InstructionSpecifier* specifierForUID(InstrUID uid) {
149
return &INSTRUCTIONS_SYM[uid];
153
* consumeByte - Uses the reader function provided by the user to consume one
154
* byte from the instruction's memory and advance the cursor.
156
* @param insn - The instruction with the reader function to use. The cursor
157
* for this instruction is advanced.
158
* @param byte - A pointer to a pre-allocated memory buffer to be populated
159
* with the data read.
160
* @return - 0 if the read was successful; nonzero otherwise.
162
static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
163
int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
166
++(insn->readerCursor);
172
* lookAtByte - Like consumeByte, but does not advance the cursor.
174
* @param insn - See consumeByte().
175
* @param byte - See consumeByte().
176
* @return - See consumeByte().
178
static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
179
return insn->reader(insn->readerArg, byte, insn->readerCursor);
182
static void unconsumeByte(struct InternalInstruction* insn) {
183
insn->readerCursor--;
186
#define CONSUME_FUNC(name, type) \
187
static int name(struct InternalInstruction* insn, type* ptr) { \
190
for (offset = 0; offset < sizeof(type); ++offset) { \
192
int ret = insn->reader(insn->readerArg, \
194
insn->readerCursor + offset); \
197
combined = combined | ((type)byte << ((type)offset * 8)); \
200
insn->readerCursor += sizeof(type); \
205
* consume* - Use the reader function provided by the user to consume data
206
* values of various sizes from the instruction's memory and advance the
207
* cursor appropriately. These readers perform endian conversion.
209
* @param insn - See consumeByte().
210
* @param ptr - A pointer to a pre-allocated memory of appropriate size to
211
* be populated with the data read.
212
* @return - See consumeByte().
214
CONSUME_FUNC(consumeInt8, int8_t)
215
CONSUME_FUNC(consumeInt16, int16_t)
216
CONSUME_FUNC(consumeInt32, int32_t)
217
CONSUME_FUNC(consumeUInt16, uint16_t)
218
CONSUME_FUNC(consumeUInt32, uint32_t)
219
CONSUME_FUNC(consumeUInt64, uint64_t)
222
* dbgprintf - Uses the logging function provided by the user to log a single
223
* message, typically without a carriage-return.
225
* @param insn - The instruction containing the logging function.
226
* @param format - See printf().
227
* @param ... - See printf().
229
static void dbgprintf(struct InternalInstruction* insn,
238
va_start(ap, format);
239
(void)vsnprintf(buffer, sizeof(buffer), format, ap);
242
insn->dlog(insn->dlogArg, buffer);
248
* setPrefixPresent - Marks that a particular prefix is present at a particular
251
* @param insn - The instruction to be marked as having the prefix.
252
* @param prefix - The prefix that is present.
253
* @param location - The location where the prefix is located (in the address
254
* space of the instruction's reader).
256
static void setPrefixPresent(struct InternalInstruction* insn,
260
insn->prefixPresent[prefix] = 1;
261
insn->prefixLocations[prefix] = location;
265
* isPrefixAtLocation - Queries an instruction to determine whether a prefix is
266
* present at a given location.
268
* @param insn - The instruction to be queried.
269
* @param prefix - The prefix.
270
* @param location - The location to query.
271
* @return - Whether the prefix is at that location.
273
static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
277
if (insn->prefixPresent[prefix] == 1 &&
278
insn->prefixLocations[prefix] == location)
285
* readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
286
* instruction as having them. Also sets the instruction's default operand,
287
* address, and other relevant data sizes to report operands correctly.
289
* @param insn - The instruction whose prefixes are to be read.
290
* @return - 0 if the instruction could be read until the end of the prefix
291
* bytes, and no prefixes conflicted; nonzero otherwise.
293
static int readPrefixes(struct InternalInstruction* insn) {
294
BOOL isPrefix = TRUE;
295
BOOL prefixGroups[4] = { FALSE };
296
uint64_t prefixLocation;
299
BOOL hasAdSize = FALSE;
300
BOOL hasOpSize = FALSE;
302
dbgprintf(insn, "readPrefixes()");
305
prefixLocation = insn->readerCursor;
307
if (consumeByte(insn, &byte))
311
case 0xf0: /* LOCK */
312
case 0xf2: /* REPNE/REPNZ */
313
case 0xf3: /* REP or REPE/REPZ */
315
dbgprintf(insn, "Redundant Group 1 prefix");
316
prefixGroups[0] = TRUE;
317
setPrefixPresent(insn, byte, prefixLocation);
319
case 0x2e: /* CS segment override -OR- Branch not taken */
320
case 0x36: /* SS segment override -OR- Branch taken */
321
case 0x3e: /* DS segment override */
322
case 0x26: /* ES segment override */
323
case 0x64: /* FS segment override */
324
case 0x65: /* GS segment override */
327
insn->segmentOverride = SEG_OVERRIDE_CS;
330
insn->segmentOverride = SEG_OVERRIDE_SS;
333
insn->segmentOverride = SEG_OVERRIDE_DS;
336
insn->segmentOverride = SEG_OVERRIDE_ES;
339
insn->segmentOverride = SEG_OVERRIDE_FS;
342
insn->segmentOverride = SEG_OVERRIDE_GS;
345
unreachable("Unhandled override");
348
dbgprintf(insn, "Redundant Group 2 prefix");
349
prefixGroups[1] = TRUE;
350
setPrefixPresent(insn, byte, prefixLocation);
352
case 0x66: /* Operand-size override */
354
dbgprintf(insn, "Redundant Group 3 prefix");
355
prefixGroups[2] = TRUE;
357
setPrefixPresent(insn, byte, prefixLocation);
359
case 0x67: /* Address-size override */
361
dbgprintf(insn, "Redundant Group 4 prefix");
362
prefixGroups[3] = TRUE;
364
setPrefixPresent(insn, byte, prefixLocation);
366
default: /* Not a prefix byte */
372
dbgprintf(insn, "Found prefix 0x%hhx", byte);
375
if (insn->mode == MODE_64BIT) {
376
if ((byte & 0xf0) == 0x40) {
379
if(lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
380
dbgprintf(insn, "Redundant REX prefix");
384
insn->rexPrefix = byte;
385
insn->necessaryPrefixLocation = insn->readerCursor - 2;
387
dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
390
insn->necessaryPrefixLocation = insn->readerCursor - 1;
396
if (insn->mode == MODE_16BIT) {
397
insn->registerSize = (hasOpSize ? 4 : 2);
398
insn->addressSize = (hasAdSize ? 4 : 2);
399
insn->displacementSize = (hasAdSize ? 4 : 2);
400
insn->immediateSize = (hasOpSize ? 4 : 2);
401
} else if (insn->mode == MODE_32BIT) {
402
insn->registerSize = (hasOpSize ? 2 : 4);
403
insn->addressSize = (hasAdSize ? 2 : 4);
404
insn->displacementSize = (hasAdSize ? 2 : 4);
405
insn->immediateSize = (hasAdSize ? 2 : 4);
406
} else if (insn->mode == MODE_64BIT) {
407
if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
408
insn->registerSize = 8;
409
insn->addressSize = (hasAdSize ? 4 : 8);
410
insn->displacementSize = 4;
411
insn->immediateSize = 4;
412
} else if (insn->rexPrefix) {
413
insn->registerSize = (hasOpSize ? 2 : 4);
414
insn->addressSize = (hasAdSize ? 4 : 8);
415
insn->displacementSize = (hasOpSize ? 2 : 4);
416
insn->immediateSize = (hasOpSize ? 2 : 4);
418
insn->registerSize = (hasOpSize ? 2 : 4);
419
insn->addressSize = (hasAdSize ? 4 : 8);
420
insn->displacementSize = (hasOpSize ? 2 : 4);
421
insn->immediateSize = (hasOpSize ? 2 : 4);
429
* readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
430
* extended or escape opcodes).
432
* @param insn - The instruction whose opcode is to be read.
433
* @return - 0 if the opcode could be read successfully; nonzero otherwise.
435
static int readOpcode(struct InternalInstruction* insn) {
436
/* Determine the length of the primary opcode */
440
dbgprintf(insn, "readOpcode()");
442
insn->opcodeType = ONEBYTE;
443
if (consumeByte(insn, ¤t))
446
if (current == 0x0f) {
447
dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
449
insn->twoByteEscape = current;
451
if (consumeByte(insn, ¤t))
454
if (current == 0x38) {
455
dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
457
insn->threeByteEscape = current;
459
if (consumeByte(insn, ¤t))
462
insn->opcodeType = THREEBYTE_38;
463
} else if (current == 0x3a) {
464
dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
466
insn->threeByteEscape = current;
468
if (consumeByte(insn, ¤t))
471
insn->opcodeType = THREEBYTE_3A;
473
dbgprintf(insn, "Didn't find a three-byte escape prefix");
475
insn->opcodeType = TWOBYTE;
480
* At this point we have consumed the full opcode.
481
* Anything we consume from here on must be unconsumed.
484
insn->opcode = current;
489
static int readModRM(struct InternalInstruction* insn);
492
* getIDWithAttrMask - Determines the ID of an instruction, consuming
493
* the ModR/M byte as appropriate for extended and escape opcodes,
494
* and using a supplied attribute mask.
496
* @param instructionID - A pointer whose target is filled in with the ID of the
498
* @param insn - The instruction whose ID is to be determined.
499
* @param attrMask - The attribute mask to search.
500
* @return - 0 if the ModR/M could be read when needed or was not
501
* needed; nonzero otherwise.
503
static int getIDWithAttrMask(uint16_t* instructionID,
504
struct InternalInstruction* insn,
506
BOOL hasModRMExtension;
508
uint8_t instructionClass;
510
instructionClass = contextForAttrs(attrMask);
512
hasModRMExtension = modRMRequired(insn->opcodeType,
516
if (hasModRMExtension) {
519
*instructionID = decode(insn->opcodeType,
524
*instructionID = decode(insn->opcodeType,
534
* is16BitEquivalent - Determines whether two instruction names refer to
535
* equivalent instructions but one is 16-bit whereas the other is not.
537
* @param orig - The instruction that is not 16-bit
538
* @param equiv - The instruction that is 16-bit
540
static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
544
if(orig[i] == '\0' && equiv[i] == '\0')
546
if(orig[i] == '\0' || equiv[i] == '\0')
548
if(orig[i] != equiv[i]) {
549
if((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
551
if((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
553
if((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
561
* is64BitEquivalent - Determines whether two instruction names refer to
562
* equivalent instructions but one is 64-bit whereas the other is not.
564
* @param orig - The instruction that is not 64-bit
565
* @param equiv - The instruction that is 64-bit
567
static BOOL is64BitEquivalent(const char* orig, const char* equiv) {
571
if(orig[i] == '\0' && equiv[i] == '\0')
573
if(orig[i] == '\0' || equiv[i] == '\0')
575
if(orig[i] != equiv[i]) {
576
if((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
578
if((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
580
if((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
589
* getID - Determines the ID of an instruction, consuming the ModR/M byte as
590
* appropriate for extended and escape opcodes. Determines the attributes and
591
* context for the instruction before doing so.
593
* @param insn - The instruction whose ID is to be determined.
594
* @return - 0 if the ModR/M could be read when needed or was not needed;
597
static int getID(struct InternalInstruction* insn) {
599
uint16_t instructionID;
601
dbgprintf(insn, "getID()");
603
attrMask = ATTR_NONE;
605
if (insn->mode == MODE_64BIT)
606
attrMask |= ATTR_64BIT;
608
if (insn->rexPrefix & 0x08)
609
attrMask |= ATTR_REXW;
611
if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
612
attrMask |= ATTR_OPSIZE;
613
else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
615
else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
618
if(getIDWithAttrMask(&instructionID, insn, attrMask))
621
/* The following clauses compensate for limitations of the tables. */
623
if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) {
625
* Although for SSE instructions it is usually necessary to treat REX.W+F2
626
* as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is
627
* an occasional instruction where F2 is incidental and REX.W is the more
628
* significant. If the decoded instruction is 32-bit and adding REX.W
629
* instead of F2 changes a 32 to a 64, we adopt the new encoding.
632
struct InstructionSpecifier* spec;
633
uint16_t instructionIDWithREXw;
634
struct InstructionSpecifier* specWithREXw;
636
spec = specifierForUID(instructionID);
638
if (getIDWithAttrMask(&instructionIDWithREXw,
640
attrMask & (~ATTR_XD))) {
642
* Decoding with REX.w would yield nothing; give up and return original
646
insn->instructionID = instructionID;
651
specWithREXw = specifierForUID(instructionIDWithREXw);
653
if (is64BitEquivalent(spec->name, specWithREXw->name)) {
654
insn->instructionID = instructionIDWithREXw;
655
insn->spec = specWithREXw;
657
insn->instructionID = instructionID;
663
if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
665
* The instruction tables make no distinction between instructions that
666
* allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
667
* particular spot (i.e., many MMX operations). In general we're
668
* conservative, but in the specific case where OpSize is present but not
669
* in the right place we check if there's a 16-bit operation.
672
struct InstructionSpecifier* spec;
673
uint16_t instructionIDWithOpsize;
674
struct InstructionSpecifier* specWithOpsize;
676
spec = specifierForUID(instructionID);
678
if (getIDWithAttrMask(&instructionIDWithOpsize,
680
attrMask | ATTR_OPSIZE)) {
682
* ModRM required with OpSize but not present; give up and return version
686
insn->instructionID = instructionID;
691
specWithOpsize = specifierForUID(instructionIDWithOpsize);
693
if (is16BitEquvalent(spec->name, specWithOpsize->name)) {
694
insn->instructionID = instructionIDWithOpsize;
695
insn->spec = specWithOpsize;
697
insn->instructionID = instructionID;
703
insn->instructionID = instructionID;
704
insn->spec = specifierForUID(insn->instructionID);
710
* readSIB - Consumes the SIB byte to determine addressing information for an
713
* @param insn - The instruction whose SIB byte is to be read.
714
* @return - 0 if the SIB byte was successfully read; nonzero otherwise.
716
static int readSIB(struct InternalInstruction* insn) {
717
SIBIndex sibIndexBase = 0;
718
SIBBase sibBaseBase = 0;
721
dbgprintf(insn, "readSIB()");
723
if (insn->consumedSIB)
726
insn->consumedSIB = TRUE;
728
switch (insn->addressSize) {
730
dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
734
sibIndexBase = SIB_INDEX_EAX;
735
sibBaseBase = SIB_BASE_EAX;
738
sibIndexBase = SIB_INDEX_RAX;
739
sibBaseBase = SIB_BASE_RAX;
743
if (consumeByte(insn, &insn->sib))
746
index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
750
insn->sibIndex = SIB_INDEX_NONE;
753
insn->sibIndex = (EABase)(sibIndexBase + index);
754
if (insn->sibIndex == SIB_INDEX_sib ||
755
insn->sibIndex == SIB_INDEX_sib64)
756
insn->sibIndex = SIB_INDEX_NONE;
760
switch (scaleFromSIB(insn->sib)) {
775
base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
779
switch (modFromModRM(insn->modRM)) {
781
insn->eaDisplacement = EA_DISP_32;
782
insn->sibBase = SIB_BASE_NONE;
785
insn->eaDisplacement = EA_DISP_8;
786
insn->sibBase = (insn->addressSize == 4 ?
787
SIB_BASE_EBP : SIB_BASE_RBP);
790
insn->eaDisplacement = EA_DISP_32;
791
insn->sibBase = (insn->addressSize == 4 ?
792
SIB_BASE_EBP : SIB_BASE_RBP);
795
unreachable("Cannot have Mod = 0b11 and a SIB byte");
799
insn->sibBase = (EABase)(sibBaseBase + base);
807
* readDisplacement - Consumes the displacement of an instruction.
809
* @param insn - The instruction whose displacement is to be read.
810
* @return - 0 if the displacement byte was successfully read; nonzero
813
static int readDisplacement(struct InternalInstruction* insn) {
818
dbgprintf(insn, "readDisplacement()");
820
if (insn->consumedDisplacement)
823
insn->consumedDisplacement = TRUE;
825
switch (insn->eaDisplacement) {
827
insn->consumedDisplacement = FALSE;
830
if (consumeInt8(insn, &d8))
832
insn->displacement = d8;
835
if (consumeInt16(insn, &d16))
837
insn->displacement = d16;
840
if (consumeInt32(insn, &d32))
842
insn->displacement = d32;
846
insn->consumedDisplacement = TRUE;
851
* readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
852
* displacement) for an instruction and interprets it.
854
* @param insn - The instruction whose addressing information is to be read.
855
* @return - 0 if the information was successfully read; nonzero otherwise.
857
static int readModRM(struct InternalInstruction* insn) {
858
uint8_t mod, rm, reg;
860
dbgprintf(insn, "readModRM()");
862
if (insn->consumedModRM)
865
consumeByte(insn, &insn->modRM);
866
insn->consumedModRM = TRUE;
868
mod = modFromModRM(insn->modRM);
869
rm = rmFromModRM(insn->modRM);
870
reg = regFromModRM(insn->modRM);
873
* This goes by insn->registerSize to pick the correct register, which messes
874
* up if we're using (say) XMM or 8-bit register operands. That gets fixed in
877
switch (insn->registerSize) {
879
insn->regBase = MODRM_REG_AX;
880
insn->eaRegBase = EA_REG_AX;
883
insn->regBase = MODRM_REG_EAX;
884
insn->eaRegBase = EA_REG_EAX;
887
insn->regBase = MODRM_REG_RAX;
888
insn->eaRegBase = EA_REG_RAX;
892
reg |= rFromREX(insn->rexPrefix) << 3;
893
rm |= bFromREX(insn->rexPrefix) << 3;
895
insn->reg = (Reg)(insn->regBase + reg);
897
switch (insn->addressSize) {
899
insn->eaBaseBase = EA_BASE_BX_SI;
904
insn->eaBase = EA_BASE_NONE;
905
insn->eaDisplacement = EA_DISP_16;
906
if(readDisplacement(insn))
909
insn->eaBase = (EABase)(insn->eaBaseBase + rm);
910
insn->eaDisplacement = EA_DISP_NONE;
914
insn->eaBase = (EABase)(insn->eaBaseBase + rm);
915
insn->eaDisplacement = EA_DISP_8;
916
if(readDisplacement(insn))
920
insn->eaBase = (EABase)(insn->eaBaseBase + rm);
921
insn->eaDisplacement = EA_DISP_16;
922
if(readDisplacement(insn))
926
insn->eaBase = (EABase)(insn->eaRegBase + rm);
927
if(readDisplacement(insn))
934
insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
938
insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
941
case 0xc: /* in case REXW.b is set */
942
insn->eaBase = (insn->addressSize == 4 ?
943
EA_BASE_sib : EA_BASE_sib64);
945
if(readDisplacement(insn))
949
insn->eaBase = EA_BASE_NONE;
950
insn->eaDisplacement = EA_DISP_32;
951
if(readDisplacement(insn))
955
insn->eaBase = (EABase)(insn->eaBaseBase + rm);
961
insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
964
case 0xc: /* in case REXW.b is set */
965
insn->eaBase = EA_BASE_sib;
967
if(readDisplacement(insn))
971
insn->eaBase = (EABase)(insn->eaBaseBase + rm);
972
if(readDisplacement(insn))
978
insn->eaDisplacement = EA_DISP_NONE;
979
insn->eaBase = (EABase)(insn->eaRegBase + rm);
983
} /* switch (insn->addressSize) */
988
#define GENERIC_FIXUP_FUNC(name, base, prefix) \
989
static uint8_t name(struct InternalInstruction *insn, \
996
unreachable("Unhandled register type"); \
998
return base + index; \
1000
if(insn->rexPrefix && \
1001
index >= 4 && index <= 7) { \
1002
return prefix##_SPL + (index - 4); \
1004
return prefix##_AL + index; \
1007
return prefix##_AX + index; \
1009
return prefix##_EAX + index; \
1011
return prefix##_RAX + index; \
1016
return prefix##_XMM0 + index; \
1022
return prefix##_MM0 + index; \
1023
case TYPE_SEGMENTREG: \
1026
return prefix##_ES + index; \
1027
case TYPE_DEBUGREG: \
1030
return prefix##_DR0 + index; \
1034
return prefix##_ECR0 + index; \
1038
return prefix##_RCR0 + index; \
1043
* fixup*Value - Consults an operand type to determine the meaning of the
1044
* reg or R/M field. If the operand is an XMM operand, for example, an
1045
* operand would be XMM0 instead of AX, which readModRM() would otherwise
1046
* misinterpret it as.
1048
* @param insn - The instruction containing the operand.
1049
* @param type - The operand type.
1050
* @param index - The existing value of the field as reported by readModRM().
1051
* @param valid - The address of a uint8_t. The target is set to 1 if the
1052
* field is valid for the register class; 0 if not.
1054
GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
1055
GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
1058
* fixupReg - Consults an operand specifier to determine which of the
1059
* fixup*Value functions to use in correcting readModRM()'ss interpretation.
1061
* @param insn - See fixup*Value().
1062
* @param op - The operand specifier.
1063
* @return - 0 if fixup was successful; -1 if the register returned was
1064
* invalid for its class.
1066
static int fixupReg(struct InternalInstruction *insn,
1067
struct OperandSpecifier *op) {
1070
dbgprintf(insn, "fixupReg()");
1072
switch ((OperandEncoding)op->encoding) {
1074
unreachable("Expected a REG or R/M encoding in fixupReg");
1076
insn->reg = (Reg)fixupRegValue(insn,
1077
(OperandType)op->type,
1078
insn->reg - insn->regBase,
1084
if (insn->eaBase >= insn->eaRegBase) {
1085
insn->eaBase = (EABase)fixupRMValue(insn,
1086
(OperandType)op->type,
1087
insn->eaBase - insn->eaRegBase,
1099
* readOpcodeModifier - Reads an operand from the opcode field of an
1100
* instruction. Handles AddRegFrm instructions.
1102
* @param insn - The instruction whose opcode field is to be read.
1103
* @param inModRM - Indicates that the opcode field is to be read from the
1104
* ModR/M extension; useful for escape opcodes
1106
static void readOpcodeModifier(struct InternalInstruction* insn) {
1107
dbgprintf(insn, "readOpcodeModifier()");
1109
if (insn->consumedOpcodeModifier)
1112
insn->consumedOpcodeModifier = TRUE;
1114
switch(insn->spec->modifierType) {
1116
unreachable("Unknown modifier type.");
1118
unreachable("No modifier but an operand expects one.");
1119
case MODIFIER_OPCODE:
1120
insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
1122
case MODIFIER_MODRM:
1123
insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
1129
* readOpcodeRegister - Reads an operand from the opcode field of an
1130
* instruction and interprets it appropriately given the operand width.
1131
* Handles AddRegFrm instructions.
1133
* @param insn - See readOpcodeModifier().
1134
* @param size - The width (in bytes) of the register being specified.
1135
* 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1138
static void readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
1139
dbgprintf(insn, "readOpcodeRegister()");
1141
readOpcodeModifier(insn);
1144
size = insn->registerSize;
1148
insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
1149
| insn->opcodeModifier));
1150
if(insn->rexPrefix &&
1151
insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1152
insn->opcodeRegister < MODRM_REG_AL + 0x8) {
1153
insn->opcodeRegister = (Reg)(MODRM_REG_SPL
1154
+ (insn->opcodeRegister - MODRM_REG_AL - 4));
1159
insn->opcodeRegister = (Reg)(MODRM_REG_AX
1160
+ ((bFromREX(insn->rexPrefix) << 3)
1161
| insn->opcodeModifier));
1164
insn->opcodeRegister = (Reg)(MODRM_REG_EAX +
1165
+ ((bFromREX(insn->rexPrefix) << 3)
1166
| insn->opcodeModifier));
1169
insn->opcodeRegister = (Reg)(MODRM_REG_RAX
1170
+ ((bFromREX(insn->rexPrefix) << 3)
1171
| insn->opcodeModifier));
1177
* readImmediate - Consumes an immediate operand from an instruction, given the
1178
* desired operand size.
1180
* @param insn - The instruction whose operand is to be read.
1181
* @param size - The width (in bytes) of the operand.
1182
* @return - 0 if the immediate was successfully consumed; nonzero
1185
static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
1191
dbgprintf(insn, "readImmediate()");
1193
if (insn->numImmediatesConsumed == 2)
1194
unreachable("Already consumed two immediates");
1197
size = insn->immediateSize;
1199
insn->immediateSize = size;
1203
if (consumeByte(insn, &imm8))
1205
insn->immediates[insn->numImmediatesConsumed] = imm8;
1208
if (consumeUInt16(insn, &imm16))
1210
insn->immediates[insn->numImmediatesConsumed] = imm16;
1213
if (consumeUInt32(insn, &imm32))
1215
insn->immediates[insn->numImmediatesConsumed] = imm32;
1218
if (consumeUInt64(insn, &imm64))
1220
insn->immediates[insn->numImmediatesConsumed] = imm64;
1224
insn->numImmediatesConsumed++;
1230
* readOperands - Consults the specifier for an instruction and consumes all
1231
* operands for that instruction, interpreting them as it goes.
1233
* @param insn - The instruction whose operands are to be read and interpreted.
1234
* @return - 0 if all operands could be read; nonzero otherwise.
1236
static int readOperands(struct InternalInstruction* insn) {
1239
dbgprintf(insn, "readOperands()");
1241
for (index = 0; index < X86_MAX_OPERANDS; ++index) {
1242
switch (insn->spec->operands[index].encoding) {
1247
if (readModRM(insn))
1249
if (fixupReg(insn, &insn->spec->operands[index]))
1258
dbgprintf(insn, "We currently don't hande code-offset encodings");
1261
if (readImmediate(insn, 1))
1265
if (readImmediate(insn, 2))
1269
if (readImmediate(insn, 4))
1273
if (readImmediate(insn, 8))
1277
readImmediate(insn, insn->immediateSize);
1280
readImmediate(insn, insn->addressSize);
1283
readOpcodeRegister(insn, 1);
1286
readOpcodeRegister(insn, 2);
1289
readOpcodeRegister(insn, 4);
1292
readOpcodeRegister(insn, 8);
1295
readOpcodeRegister(insn, 0);
1298
readOpcodeModifier(insn);
1303
dbgprintf(insn, "Encountered an operand with an unknown encoding.");
1312
* decodeInstruction - Reads and interprets a full instruction provided by the
1315
* @param insn - A pointer to the instruction to be populated. Must be
1317
* @param reader - The function to be used to read the instruction's bytes.
1318
* @param readerArg - A generic argument to be passed to the reader to store
1319
* any internal state.
1320
* @param logger - If non-NULL, the function to be used to write log messages
1322
* @param loggerArg - A generic argument to be passed to the logger to store
1323
* any internal state.
1324
* @param startLoc - The address (in the reader's address space) of the first
1325
* byte in the instruction.
1326
* @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
1327
* decode the instruction in.
1328
* @return - 0 if the instruction's memory could be read; nonzero if
1331
int decodeInstruction(struct InternalInstruction* insn,
1332
byteReader_t reader,
1337
DisassemblerMode mode) {
1338
memset(insn, 0, sizeof(struct InternalInstruction));
1340
insn->reader = reader;
1341
insn->readerArg = readerArg;
1342
insn->dlog = logger;
1343
insn->dlogArg = loggerArg;
1344
insn->startLocation = startLoc;
1345
insn->readerCursor = startLoc;
1347
insn->numImmediatesConsumed = 0;
1349
if (readPrefixes(insn) ||
1352
insn->instructionID == 0 ||
1356
insn->length = insn->readerCursor - insn->startLocation;
1358
dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %llu",
1359
startLoc, insn->readerCursor, insn->length);
1361
if (insn->length > 15)
1362
dbgprintf(insn, "Instruction exceeds 15-byte limit");