1
/*===- X86DisassemblerDecoderInternal.h - Disassembler decoder -----*- C -*-==*
3
* The LLVM Compiler Infrastructure
5
* This file is distributed under the University of Illinois Open Source
6
* License. See LICENSE.TXT for details.
8
*===----------------------------------------------------------------------===*
10
* This file is part of the X86 Disassembler.
11
* It contains the public interface of the instruction decoder.
12
* Documentation for the disassembler can be found in X86Disassembler.h.
14
*===----------------------------------------------------------------------===*/
16
#ifndef X86DISASSEMBLERDECODER_H
17
#define X86DISASSEMBLERDECODER_H
23
#define INSTRUCTION_SPECIFIER_FIELDS \
26
#define INSTRUCTION_IDS \
27
InstrUID* instructionIDs;
29
#include "X86DisassemblerDecoderCommon.h"
31
#undef INSTRUCTION_SPECIFIER_FIELDS
32
#undef INSTRUCTION_IDS
35
* Accessor functions for various fields of an Intel instruction
37
#define modFromModRM(modRM) ((modRM & 0xc0) >> 6)
38
#define regFromModRM(modRM) ((modRM & 0x38) >> 3)
39
#define rmFromModRM(modRM) (modRM & 0x7)
40
#define scaleFromSIB(sib) ((sib & 0xc0) >> 6)
41
#define indexFromSIB(sib) ((sib & 0x38) >> 3)
42
#define baseFromSIB(sib) (sib & 0x7)
43
#define wFromREX(rex) ((rex & 0x8) >> 3)
44
#define rFromREX(rex) ((rex & 0x4) >> 2)
45
#define xFromREX(rex) ((rex & 0x2) >> 1)
46
#define bFromREX(rex) (rex & 0x1)
49
* These enums represent Intel registers for use by the decoder.
74
#define EA_BASES_16BIT \
110
#define EA_BASES_32BIT \
146
#define EA_BASES_64BIT \
210
#define REGS_SEGMENT \
228
#define REGS_CONTROL_32BIT \
238
#define REGS_CONTROL_64BIT \
249
#define ALL_EA_BASES \
254
#define ALL_SIB_BASES \
272
* EABase - All possible values of the base field for effective-address
273
* computations, a.k.a. the Mod and R/M fields of the ModR/M byte. We
274
* distinguish between bases (EA_BASE_*) and registers that just happen to be
275
* referred to when Mod == 0b11 (EA_REG_*).
279
#define ENTRY(x) EA_BASE_##x,
282
#define ENTRY(x) EA_REG_##x,
289
* SIBIndex - All possible values of the SIB index field.
290
* Borrows entries from ALL_EA_BASES with the special case that
291
* sib is synonymous with NONE.
295
#define ENTRY(x) SIB_INDEX_##x,
302
* SIBBase - All possible values of the SIB base field.
306
#define ENTRY(x) SIB_BASE_##x,
313
* EADisplacement - Possible displacement types for effective-address
324
* Reg - All possible values of the reg field in the ModR/M byte.
327
#define ENTRY(x) MODRM_REG_##x,
334
* SegmentOverride - All possible segment overrides.
347
typedef uint8_t BOOL;
350
* byteReader_t - Type for the byte reader that the consumer must provide to
351
* the decoder. Reads a single byte from the instruction's address space.
352
* @param arg - A baton that the consumer can associate with any internal
353
* state that it needs.
354
* @param byte - A pointer to a single byte in memory that should be set to
355
* contain the value at address.
356
* @param address - The address in the instruction's address space that should
358
* @return - -1 if the byte cannot be read for any reason; 0 otherwise.
360
typedef int (*byteReader_t)(void* arg, uint8_t* byte, uint64_t address);
363
* dlog_t - Type for the logging function that the consumer can provide to
364
* get debugging output from the decoder.
365
* @param arg - A baton that the consumer can associate with any internal
366
* state that it needs.
367
* @param log - A string that contains the message. Will be reused after
368
* the logger returns.
370
typedef void (*dlog_t)(void* arg, const char *log);
373
* The x86 internal instruction, which is produced by the decoder.
375
struct InternalInstruction {
376
/* Reader interface (C) */
378
/* Opaque value passed to the reader */
380
/* The address of the next byte to read via the reader */
381
uint64_t readerCursor;
383
/* Logger interface (C) */
385
/* Opaque value passed to the logger */
388
/* General instruction information */
390
/* The mode to disassemble for (64-bit, protected, real) */
391
DisassemblerMode mode;
392
/* The start of the instruction, usable with the reader */
393
uint64_t startLocation;
394
/* The length of the instruction, in bytes */
399
/* 1 if the prefix byte corresponding to the entry is present; 0 if not */
400
uint8_t prefixPresent[0x100];
401
/* contains the location (for use with the reader) of the prefix byte */
402
uint64_t prefixLocations[0x100];
403
/* The value of the REX prefix, if present */
405
/* The location of the REX prefix */
406
uint64_t rexLocation;
407
/* The location where a mandatory prefix would have to be (i.e., right before
408
the opcode, or right before the REX prefix if one is present) */
409
uint64_t necessaryPrefixLocation;
410
/* The segment override type */
411
SegmentOverride segmentOverride;
413
/* Sizes of various critical pieces of data */
414
uint8_t registerSize;
416
uint8_t displacementSize;
417
uint8_t immediateSize;
421
/* The value of the two-byte escape prefix (usually 0x0f) */
422
uint8_t twoByteEscape;
423
/* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */
424
uint8_t threeByteEscape;
425
/* The last byte of the opcode, not counting any ModR/M extension */
427
/* The ModR/M byte of the instruction, if it is an opcode extension */
428
uint8_t modRMExtension;
432
/* The type of opcode, used for indexing into the array of decode tables */
433
OpcodeType opcodeType;
434
/* The instruction ID, extracted from the decode table */
435
uint16_t instructionID;
436
/* The specifier for the instruction, from the instruction info table */
437
struct InstructionSpecifier* spec;
439
/* state for additional bytes, consumed during operand decode. Pattern:
440
consumed___ indicates that the byte was already consumed and does not
441
need to be consumed again */
443
/* The ModR/M byte, which contains most register operands and some portion of
444
all memory operands */
448
/* The SIB byte, used for more complex 32- or 64-bit memory operands */
452
/* The displacement, used for memory operands */
453
BOOL consumedDisplacement;
454
int32_t displacement;
456
/* Immediates. There can be two in some cases */
457
uint8_t numImmediatesConsumed;
458
uint8_t numImmediatesTranslated;
459
uint64_t immediates[2];
461
/* A register or immediate operand encoded into the opcode */
462
BOOL consumedOpcodeModifier;
463
uint8_t opcodeModifier;
466
/* Portions of the ModR/M byte */
468
/* These fields determine the allowable values for the ModR/M fields, which
469
depend on operand and address widths */
474
/* The Mod and R/M fields can encode a base for an effective address, or a
475
register. These are separated into two fields here */
477
EADisplacement eaDisplacement;
478
/* The reg field always encodes a register */
487
/* decodeInstruction - Decode one instruction and store the decoding results in
488
* a buffer provided by the consumer.
489
* @param insn - The buffer to store the instruction in. Allocated by the
491
* @param reader - The byteReader_t for the bytes to be read.
492
* @param readerArg - An argument to pass to the reader for storing context
493
* specific to the consumer. May be NULL.
494
* @param logger - The dlog_t to be used in printing status messages from the
495
* disassembler. May be NULL.
496
* @param loggerArg - An argument to pass to the logger for storing context
497
* specific to the logger. May be NULL.
498
* @param startLoc - The address (in the reader's address space) of the first
499
* byte in the instruction.
500
* @param mode - The mode (16-bit, 32-bit, 64-bit) to decode in.
501
* @return - Nonzero if there was an error during decode, 0 otherwise.
503
int decodeInstruction(struct InternalInstruction* insn,
509
DisassemblerMode mode);