1
/* Copyright (c) 2007, Google Inc.
4
* Redistribution and use in source and binary forms, with or without
5
* modification, are permitted provided that the following conditions are
8
* * Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
* * Redistributions in binary form must reproduce the above
11
* copyright notice, this list of conditions and the following disclaimer
12
* in the documentation and/or other materials provided with the
14
* * Neither the name of Google Inc. nor the names of its
15
* contributors may be used to endorse or promote products derived from
16
* this software without specific prior written permission.
18
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
* Author: Joi Sigurdsson
33
* Implementation of MiniDisassembler.
36
#include "mini_disassembler.h"
40
MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits,
41
bool address_default_is_32_bits)
42
: operand_default_is_32_bits_(operand_default_is_32_bits),
43
address_default_is_32_bits_(address_default_is_32_bits) {
47
MiniDisassembler::MiniDisassembler()
48
: operand_default_is_32_bits_(true),
49
address_default_is_32_bits_(true) {
53
InstructionType MiniDisassembler::Disassemble(
55
unsigned int& instruction_bytes) {
56
// Clean up any state from previous invocations.
59
// Start by processing any prefixes.
60
byte* current_byte = start_byte;
61
unsigned int size = 0;
62
InstructionType instruction_type = ProcessPrefixes(current_byte, size);
64
if (IT_UNKNOWN == instruction_type)
65
return instruction_type;
70
// Invariant: We have stripped all prefixes, and the operand_is_32_bits_
71
// and address_is_32_bits_ flags are correctly set.
73
instruction_type = ProcessOpcode(current_byte, 0, size);
75
// Check for error processing instruction
76
if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) {
82
// Invariant: operand_bytes_ indicates the total size of operands
83
// specified by the opcode and/or ModR/M byte and/or SIB byte.
84
// pCurrentByte points to the first byte after the ModR/M byte, or after
85
// the SIB byte if it is present (i.e. the first byte of any operands
86
// encoded in the instruction).
88
// We get the total length of any prefixes, the opcode, and the ModR/M and
89
// SIB bytes if present, by taking the difference of the original starting
90
// address and the current byte (which points to the first byte of the
91
// operands if present, or to the first byte of the next instruction if
92
// they are not). Adding the count of bytes in the operands encoded in
93
// the instruction gives us the full length of the instruction in bytes.
94
instruction_bytes += operand_bytes_ + (current_byte - start_byte);
96
// Return the instruction type, which was set by ProcessOpcode().
97
return instruction_type_;
100
void MiniDisassembler::Initialize() {
101
operand_is_32_bits_ = operand_default_is_32_bits_;
102
address_is_32_bits_ = address_default_is_32_bits_;
105
should_decode_modrm_ = false;
106
instruction_type_ = IT_UNKNOWN;
107
got_f2_prefix_ = false;
108
got_f3_prefix_ = false;
109
got_66_prefix_ = false;
112
InstructionType MiniDisassembler::ProcessPrefixes(byte* start_byte,
113
unsigned int& size) {
114
InstructionType instruction_type = IT_GENERIC;
115
const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte];
117
switch (opcode.type_) {
118
case IT_PREFIX_ADDRESS:
119
address_is_32_bits_ = !address_default_is_32_bits_;
120
goto nochangeoperand;
121
case IT_PREFIX_OPERAND:
122
operand_is_32_bits_ = !operand_default_is_32_bits_;
126
if (0xF2 == (*start_byte))
127
got_f2_prefix_ = true;
128
else if (0xF3 == (*start_byte))
129
got_f3_prefix_ = true;
130
else if (0x66 == (*start_byte))
131
got_66_prefix_ = true;
133
instruction_type = opcode.type_;
135
// we got a prefix, so add one and check next byte
136
ProcessPrefixes(start_byte + 1, size);
138
break; // not a prefix byte
141
return instruction_type;
144
InstructionType MiniDisassembler::ProcessOpcode(byte* start_byte,
145
unsigned int table_index,
146
unsigned int& size) {
147
const OpcodeTable& table = s_ia32_opcode_map_[table_index]; // Get our table
148
byte current_byte = (*start_byte) >> table.shift_;
149
current_byte = current_byte & table.mask_; // Mask out the bits we will use
151
// Check whether the byte we have is inside the table we have.
152
if (current_byte < table.min_lim_ || current_byte > table.max_lim_) {
153
instruction_type_ = IT_UNKNOWN;
154
return instruction_type_;
157
const Opcode& opcode = table.table_[current_byte];
158
if (IT_UNUSED == opcode.type_) {
159
// This instruction is not used by the IA-32 ISA, so we indicate
160
// this to the user. Probably means that we were pointed to
161
// a byte in memory that was not the start of an instruction.
162
instruction_type_ = IT_UNUSED;
163
return instruction_type_;
164
} else if (IT_REFERENCE == opcode.type_) {
165
// We are looking at an opcode that has more bytes (or is continued
166
// in the ModR/M byte). Recursively find the opcode definition in
167
// the table for the opcode's next byte.
169
ProcessOpcode(start_byte + 1, opcode.table_index_, size);
170
return instruction_type_;
173
const SpecificOpcode* specific_opcode = (SpecificOpcode*)&opcode;
174
if (opcode.is_prefix_dependent_) {
175
if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) {
176
specific_opcode = &opcode.opcode_if_f2_prefix_;
177
} else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) {
178
specific_opcode = &opcode.opcode_if_f3_prefix_;
179
} else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) {
180
specific_opcode = &opcode.opcode_if_66_prefix_;
184
// Inv: The opcode type is known.
185
instruction_type_ = specific_opcode->type_;
187
// Let's process the operand types to see if we have any immediate
188
// operands, and/or a ModR/M byte.
190
ProcessOperand(specific_opcode->flag_dest_);
191
ProcessOperand(specific_opcode->flag_source_);
192
ProcessOperand(specific_opcode->flag_aux_);
194
// Inv: We have processed the opcode and incremented operand_bytes_
195
// by the number of bytes of any operands specified by the opcode
196
// that are stored in the instruction (not registers etc.). Now
197
// we need to return the total number of bytes for the opcode and
198
// for the ModR/M or SIB bytes if they are present.
200
if (table.mask_ != 0xff) {
202
// we're looking at a ModR/M byte so we're not going to
203
// count that into the opcode size
204
ProcessModrm(start_byte, size);
207
// need to count the ModR/M byte even if it's just being
208
// used for opcode extension
214
// The ModR/M byte is the next byte.
216
ProcessModrm(start_byte + 1, size);
225
bool MiniDisassembler::ProcessOperand(int flag_operand) {
226
bool succeeded = true;
227
if (AM_NOT_USED == flag_operand)
230
// Decide what to do based on the addressing mode.
231
switch (flag_operand & AM_MASK) {
232
// No ModR/M byte indicated by these addressing modes, and no
233
// additional (e.g. immediate) parameters.
234
case AM_A: // Direct address
235
case AM_F: // EFLAGS register
236
case AM_X: // Memory addressed by the DS:SI register pair
237
case AM_Y: // Memory addressed by the ES:DI register pair
238
case AM_IMPLICIT: // Parameter is implicit, occupies no space in
242
// There is a ModR/M byte but it does not necessarily need
244
case AM_C: // reg field of ModR/M selects a control register
245
case AM_D: // reg field of ModR/M selects a debug register
246
case AM_G: // reg field of ModR/M selects a general register
247
case AM_P: // reg field of ModR/M selects an MMX register
248
case AM_R: // mod field of ModR/M may refer only to a general register
249
case AM_S: // reg field of ModR/M selects a segment register
250
case AM_T: // reg field of ModR/M selects a test register
251
case AM_V: // reg field of ModR/M selects a 128-bit XMM register
255
// In these addressing modes, there is a ModR/M byte and it needs to be
256
// decoded. No other (e.g. immediate) params than indicated in ModR/M.
257
case AM_E: // Operand is either a general-purpose register or memory,
258
// specified by ModR/M byte
259
case AM_M: // ModR/M byte will refer only to memory
260
case AM_Q: // Operand is either an MMX register or memory (complex
261
// evaluation), specified by ModR/M byte
262
case AM_W: // Operand is either a 128-bit XMM register or memory (complex
263
// eval), specified by ModR/M byte
265
should_decode_modrm_ = true;
268
// These addressing modes specify an immediate or an offset value
269
// directly, so we need to look at the operand type to see how many
271
case AM_I: // Immediate data.
272
case AM_J: // Jump to offset.
273
case AM_O: // Operand is at offset.
274
switch (flag_operand & OT_MASK) {
275
case OT_B: // Byte regardless of operand-size attribute.
276
operand_bytes_ += OS_BYTE;
278
case OT_C: // Byte or word, depending on operand-size attribute.
279
if (operand_is_32_bits_)
280
operand_bytes_ += OS_WORD;
282
operand_bytes_ += OS_BYTE;
284
case OT_D: // Doubleword, regardless of operand-size attribute.
285
operand_bytes_ += OS_DOUBLE_WORD;
287
case OT_DQ: // Double-quadword, regardless of operand-size attribute.
288
operand_bytes_ += OS_DOUBLE_QUAD_WORD;
290
case OT_P: // 32-bit or 48-bit pointer, depending on operand-size
292
if (operand_is_32_bits_)
293
operand_bytes_ += OS_48_BIT_POINTER;
295
operand_bytes_ += OS_32_BIT_POINTER;
297
case OT_PS: // 128-bit packed single-precision floating-point data.
298
operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING;
300
case OT_Q: // Quadword, regardless of operand-size attribute.
301
operand_bytes_ += OS_QUAD_WORD;
303
case OT_S: // 6-byte pseudo-descriptor.
304
operand_bytes_ += OS_PSEUDO_DESCRIPTOR;
306
case OT_SD: // Scalar Double-Precision Floating-Point Value
307
case OT_PD: // Unaligned packed double-precision floating point value
308
operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING;
311
// Scalar element of a 128-bit packed single-precision
313
// We simply return enItUnknown since we don't have to support
317
case OT_V: // Word or doubleword, depending on operand-size attribute.
318
if (operand_is_32_bits_)
319
operand_bytes_ += OS_DOUBLE_WORD;
321
operand_bytes_ += OS_WORD;
323
case OT_W: // Word, regardless of operand-size attribute.
324
operand_bytes_ += OS_WORD;
327
// Can safely ignore these.
328
case OT_A: // Two one-word operands in memory or two double-word
329
// operands in memory
330
case OT_PI: // Quadword MMX technology register (e.g. mm0)
331
case OT_SI: // Doubleword integer register (e.g., eax)
346
bool MiniDisassembler::ProcessModrm(byte* start_byte,
347
unsigned int& size) {
348
// If we don't need to decode, we just return the size of the ModR/M
349
// byte (there is never a SIB byte in this case).
350
if (!should_decode_modrm_) {
355
// We never care about the reg field, only the combination of the mod
356
// and r/m fields, so let's start by packing those fields together into
358
byte modrm = (*start_byte);
359
byte mod = modrm & 0xC0; // mask out top two bits to get mod field
360
modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field
361
mod = mod >> 3; // shift the mod field to the right place
362
modrm = mod | modrm; // combine the r/m and mod fields as discussed
363
mod = mod >> 3; // shift the mod field to bits 2..0
365
// Invariant: modrm contains the mod field in bits 4..3 and the r/m field
366
// in bits 2..0, and mod contains the mod field in bits 2..0
368
const ModrmEntry* modrm_entry = 0;
369
if (address_is_32_bits_)
370
modrm_entry = &s_ia32_modrm_map_[modrm];
372
modrm_entry = &s_ia16_modrm_map_[modrm];
374
// Invariant: modrm_entry points to information that we need to decode
377
// Add to the count of operand bytes, if the ModR/M byte indicates
378
// that some operands are encoded in the instruction.
379
if (modrm_entry->is_encoded_in_instruction_)
380
operand_bytes_ += modrm_entry->operand_size_;
382
// Process the SIB byte if necessary, and return the count
383
// of ModR/M and SIB bytes.
384
if (modrm_entry->use_sib_byte_) {
386
return ProcessSib(start_byte + 1, mod, size);
393
bool MiniDisassembler::ProcessSib(byte* start_byte,
395
unsigned int& size) {
396
// get the mod field from the 2..0 bits of the SIB byte
397
byte sib_base = (*start_byte) & 0x07;
398
if (0x05 == sib_base) {
400
case 0x00: // mod == 00
401
case 0x02: // mod == 10
402
operand_bytes_ += OS_DOUBLE_WORD;
404
case 0x01: // mod == 01
405
operand_bytes_ += OS_BYTE;
407
case 0x03: // mod == 11
408
// According to the IA-32 docs, there does not seem to be a disp
409
// value for this value of mod
419
}; // namespace sidestep