Added libdisasm to 3rd_party.

Started the move of 'prog' global into Project.
This commit is contained in:
Artur K 2012-03-17 21:36:10 +01:00
parent b85106498e
commit 467ff56356
37 changed files with 9207 additions and 29 deletions

3
3rd_party/CMakeLists.txt vendored Normal file
View File

@ -0,0 +1,3 @@
add_definitions(-D_CRT_SECURE_NO_WARNINGS -D__STDC_FORMAT_MACROS)
add_subdirectory(libdisasm)

33
3rd_party/libdisasm/CMakeLists.txt vendored Normal file
View File

@ -0,0 +1,33 @@
SET(disasm_LIB_SRCS
ia32_implicit.cpp
ia32_implicit.h
ia32_insn.cpp
ia32_insn.h
ia32_invariant.cpp
ia32_invariant.h
ia32_modrm.cpp
ia32_modrm.h
ia32_opcode_tables.cpp
ia32_opcode_tables.h
ia32_operand.cpp
ia32_operand.h
ia32_reg.cpp
ia32_reg.h
ia32_settings.cpp
ia32_settings.h
libdis.h
qword.h
x86_disasm.cpp
x86_format.cpp
x86_imm.cpp
x86_imm.h
x86_insn.cpp
x86_misc.cpp
x86_operand_list.cpp
x86_operand_list.h
)
add_library(disasm SHARED ${disasm_LIB_SRCS})
add_library(disasm_s STATIC ${disasm_LIB_SRCS})
install(TARGETS disasm DESTINATION lib)
install(FILES libdis.h DESTINATION /include)

424
3rd_party/libdisasm/ia32_implicit.cpp vendored Normal file
View File

@ -0,0 +1,424 @@
#include <stdlib.h>
#include "libdis.h"
#include "ia32_implicit.h"
#include "ia32_insn.h"
#include "ia32_reg.h"
#include "x86_operand_list.h"
/* Conventions: Register operands which are aliases of another register
* operand (e.g. AX in one operand and AL in another) assume that the
* operands are different registers and that alias tracking will resolve
* data flow. This means that something like
* mov ax, al
* would have 'write only' access for AX and 'read only' access for AL,
* even though both AL and AX are read and written */
typedef struct {
uint32_t type;
uint32_t operand;
} op_implicit_list_t;
static op_implicit_list_t list_aaa[] =
/* 37 : AAA : rw AL */
/* 3F : AAS : rw AL */
{{ OP_R | OP_W, REG_BYTE_OFFSET }, {0}}; /* aaa */
static op_implicit_list_t list_aad[] =
/* D5 0A, D5 (ib) : AAD : rw AX */
/* D4 0A, D4 (ib) : AAM : rw AX */
{{ OP_R | OP_W, REG_WORD_OFFSET }, {0}}; /* aad */
static op_implicit_list_t list_call[] =
/* E8, FF, 9A, FF : CALL : rw ESP, rw EIP */
/* C2, C3, CA, CB : RET : rw ESP, rw EIP */
{{ OP_R | OP_W, REG_EIP_INDEX },
{ OP_R | OP_W, REG_ESP_INDEX }, {0}}; /* call, ret */
static op_implicit_list_t list_cbw[] =
/* 98 : CBW : r AL, rw AX */
{{ OP_R | OP_W, REG_WORD_OFFSET },
{ OP_R, REG_BYTE_OFFSET}, {0}}; /* cbw */
static op_implicit_list_t list_cwde[] =
/* 98 : CWDE : r AX, rw EAX */
{{ OP_R | OP_W, REG_DWORD_OFFSET },
{ OP_R, REG_WORD_OFFSET }, {0}}; /* cwde */
static op_implicit_list_t list_clts[] =
/* 0F 06 : CLTS : rw CR0 */
{{ OP_R | OP_W, REG_CTRL_OFFSET}, {0}}; /* clts */
static op_implicit_list_t list_cmpxchg[] =
/* 0F B0 : CMPXCHG : rw AL */
{{ OP_R | OP_W, REG_BYTE_OFFSET }, {0}}; /* cmpxchg */
static op_implicit_list_t list_cmpxchgb[] =
/* 0F B1 : CMPXCHG : rw EAX */
{{ OP_R | OP_W, REG_DWORD_OFFSET }, {0}}; /* cmpxchg */
static op_implicit_list_t list_cmpxchg8b[] =
/* 0F C7 : CMPXCHG8B : rw EDX, rw EAX, r ECX, r EBX */
{{ OP_R | OP_W, REG_DWORD_OFFSET },
{ OP_R | OP_W, REG_DWORD_OFFSET + 2 },
{ OP_R, REG_DWORD_OFFSET + 1 },
{ OP_R, REG_DWORD_OFFSET + 3 }, {0}}; /* cmpxchg8b */
static op_implicit_list_t list_cpuid[] =
/* 0F A2 : CPUID : rw EAX, w EBX, w ECX, w EDX */
{{ OP_R | OP_W, REG_DWORD_OFFSET },
{ OP_W, REG_DWORD_OFFSET + 1 },
{ OP_W, REG_DWORD_OFFSET + 2 },
{ OP_W, REG_DWORD_OFFSET + 3 }, {0}}; /* cpuid */
static op_implicit_list_t list_cwd[] =
/* 99 : CWD/CWQ : rw EAX, w EDX */
{{ OP_R | OP_W, REG_DWORD_OFFSET },
{ OP_W, REG_DWORD_OFFSET + 2 }, {0}}; /* cwd */
static op_implicit_list_t list_daa[] =
/* 27 : DAA : rw AL */
/* 2F : DAS : rw AL */
{{ OP_R | OP_W, REG_BYTE_OFFSET }, {0}}; /* daa */
static op_implicit_list_t list_idiv[] =
/* F6 : DIV, IDIV : r AX, w AL, w AH */
/* FIXED: first op was EAX, not Aw. TODO: verify! */
{{ OP_R, REG_WORD_OFFSET },
{ OP_W, REG_BYTE_OFFSET },
{ OP_W, REG_BYTE_OFFSET + 4 }, {0}}; /* div */
static op_implicit_list_t list_div[] =
/* F7 : DIV, IDIV : rw EDX, rw EAX */
{{ OP_R | OP_W, REG_DWORD_OFFSET + 2 },
{ OP_R | OP_W, REG_DWORD_OFFSET }, {0}}; /* div */
static op_implicit_list_t list_enter[] =
/* C8 : ENTER : rw ESP w EBP */
{{ OP_R | OP_W, REG_DWORD_OFFSET + 4 },
{ OP_R, REG_DWORD_OFFSET + 5 }, {0}}; /* enter */
static op_implicit_list_t list_f2xm1[] =
/* D9 F0 : F2XM1 : rw ST(0) */
/* D9 E1 : FABS : rw ST(0) */
/* D9 E0 : FCHS : rw ST(0) */
/* D9 FF : FCOS : rw ST(0)*/
/* D8, DA : FDIV : rw ST(0) */
/* D8, DA : FDIVR : rw ST(0) */
/* D9 F2 : FPTAN : rw ST(0) */
/* D9 FC : FRNDINT : rw ST(0) */
/* D9 FB : FSINCOS : rw ST(0) */
/* D9 FE : FSIN : rw ST(0) */
/* D9 FA : FSQRT : rw ST(0) */
/* D9 F4 : FXTRACT : rw ST(0) */
{{ OP_R | OP_W, REG_FPU_OFFSET }, {0}}; /* f2xm1 */
static op_implicit_list_t list_fcom[] =
/* D8, DC, DE D9 : FCOM : r ST(0) */
/* DE, DA : FICOM : r ST(0) */
/* DF, D8 : FIST : r ST(0) */
/* D9 E4 : FTST : r ST(0) */
/* D9 E5 : FXAM : r ST(0) */
{{ OP_R, REG_FPU_OFFSET }, {0}}; /* fcom */
static op_implicit_list_t list_fpatan[] =
/* D9 F3 : FPATAN : r ST(0), rw ST(1) */
{{ OP_R, REG_FPU_OFFSET }, {0}}; /* fpatan */
static op_implicit_list_t list_fprem[] =
/* D9 F8, D9 F5 : FPREM : rw ST(0) r ST(1) */
/* D9 FD : FSCALE : rw ST(0), r ST(1) */
{{ OP_R | OP_W, REG_FPU_OFFSET },
{ OP_R, REG_FPU_OFFSET + 1 }, {0}}; /* fprem */
static op_implicit_list_t list_faddp[] =
/* DE C1 : FADDP : r ST(0), rw ST(1) */
/* DE E9 : FSUBP : r ST(0), rw ST(1) */
/* D9 F1 : FYL2X : r ST(0), rw ST(1) */
/* D9 F9 : FYL2XP1 : r ST(0), rw ST(1) */
{{ OP_R, REG_FPU_OFFSET },
{ OP_R | OP_W, REG_FPU_OFFSET + 1 }, {0}}; /* faddp */
static op_implicit_list_t list_fucompp[] =
/* DA E9 : FUCOMPP : r ST(0), r ST(1) */
{{ OP_R, REG_FPU_OFFSET },
{ OP_R, REG_FPU_OFFSET + 1 }, {0}}; /* fucompp */
static op_implicit_list_t list_imul[] =
/* F6 : IMUL : r AL, w AX */
/* F6 : MUL : r AL, w AX */
{{ OP_R, REG_BYTE_OFFSET },
{ OP_W, REG_WORD_OFFSET }, {0}}; /* imul */
static op_implicit_list_t list_mul[] =
/* F7 : IMUL : rw EAX, w EDX */
/* F7 : MUL : rw EAX, w EDX */
{{ OP_R | OP_W, REG_DWORD_OFFSET },
{ OP_W, REG_DWORD_OFFSET + 2 }, {0}}; /* imul */
static op_implicit_list_t list_lahf[] =
/* 9F : LAHF : r EFLAGS, w AH */
{{ OP_R, REG_FLAGS_INDEX },
{ OP_W, REG_BYTE_OFFSET + 4 }, {0}}; /* lahf */
static op_implicit_list_t list_ldmxcsr[] =
/* 0F AE : LDMXCSR : w MXCSR SSE Control Status Reg */
{{ OP_W, REG_MXCSG_INDEX }, {0}}; /* ldmxcsr */
static op_implicit_list_t list_leave[] =
/* C9 : LEAVE : rw ESP, w EBP */
{{ OP_R | OP_W, REG_ESP_INDEX },
{ OP_W, REG_DWORD_OFFSET + 5 }, {0}}; /* leave */
static op_implicit_list_t list_lgdt[] =
/* 0F 01 : LGDT : w GDTR */
{{ OP_W, REG_GDTR_INDEX }, {0}}; /* lgdt */
static op_implicit_list_t list_lidt[] =
/* 0F 01 : LIDT : w IDTR */
{{ OP_W, REG_IDTR_INDEX }, {0}}; /* lidt */
static op_implicit_list_t list_lldt[] =
/* 0F 00 : LLDT : w LDTR */
{{ OP_W, REG_LDTR_INDEX }, {0}}; /* lldt */
static op_implicit_list_t list_lmsw[] =
/* 0F 01 : LMSW : w CR0 */
{{ OP_W, REG_CTRL_OFFSET }, {0}}; /* lmsw */
static op_implicit_list_t list_loop[] =
/* E0, E1, E2 : LOOP : rw ECX */
{{ OP_R | OP_W, REG_DWORD_OFFSET + 1 }, {0}};/* loop */
static op_implicit_list_t list_ltr[] =
/* 0F 00 : LTR : w Task Register */
{{ OP_W, REG_TR_INDEX }, {0}}; /* ltr */
static op_implicit_list_t list_pop[] =
/* 8F, 58, 1F, 07, 17, 0F A1, 0F A9 : POP : rw ESP */
/* FF, 50, 6A, 68, 0E, 16, 1E, 06, 0F A0, 0F A8 : PUSH : rw ESP */
{{ OP_R | OP_W, REG_ESP_INDEX }, {0}}; /* pop, push */
static op_implicit_list_t list_popad[] =
/* 61 : POPAD : rw esp, w edi esi ebp ebx edx ecx eax */
{{ OP_R | OP_W, REG_ESP_INDEX },
{ OP_W, REG_DWORD_OFFSET + 7 },
{ OP_W, REG_DWORD_OFFSET + 6 },
{ OP_W, REG_DWORD_OFFSET + 5 },
{ OP_W, REG_DWORD_OFFSET + 3 },
{ OP_W, REG_DWORD_OFFSET + 2 },
{ OP_W, REG_DWORD_OFFSET + 1 },
{ OP_W, REG_DWORD_OFFSET }, {0}}; /* popad */
static op_implicit_list_t list_popfd[] =
/* 9D : POPFD : rw esp, w eflags */
{{ OP_R | OP_W, REG_ESP_INDEX },
{ OP_W, REG_FLAGS_INDEX }, {0}}; /* popfd */
static op_implicit_list_t list_pushad[] =
/* FF, 50, 6A, 68, 0E, 16, 1E, 06, 0F A0, 0F A8 : PUSH : rw ESP */
/* 60 : PUSHAD : rw esp, r eax ecx edx ebx esp ebp esi edi */
{{ OP_R | OP_W, REG_ESP_INDEX },
{ OP_R, REG_DWORD_OFFSET },
{ OP_R, REG_DWORD_OFFSET + 1 },
{ OP_R, REG_DWORD_OFFSET + 2 },
{ OP_R, REG_DWORD_OFFSET + 3 },
{ OP_R, REG_DWORD_OFFSET + 5 },
{ OP_R, REG_DWORD_OFFSET + 6 },
{ OP_R, REG_DWORD_OFFSET + 7 }, {0}}; /* pushad */
static op_implicit_list_t list_pushfd[] =
/* 9C : PUSHFD : rw esp, r eflags */
{{ OP_R | OP_W, REG_ESP_INDEX },
{ OP_R, REG_FLAGS_INDEX }, {0}}; /* pushfd */
static op_implicit_list_t list_rdmsr[] =
/* 0F 32 : RDMSR : r ECX, w EDX, w EAX */
{{ OP_R, REG_DWORD_OFFSET + 1 },
{ OP_W, REG_DWORD_OFFSET + 2 },
{ OP_W, REG_DWORD_OFFSET }, {0}}; /* rdmsr */
static op_implicit_list_t list_rdpmc[] =
/* 0F 33 : RDPMC : r ECX, w EDX, w EAX */
{{ OP_R, REG_DWORD_OFFSET + 1 },
{ OP_W, REG_DWORD_OFFSET + 2 },
{ OP_W, REG_DWORD_OFFSET }, {0}}; /* rdpmc */
static op_implicit_list_t list_rdtsc[] =
/* 0F 31 : RDTSC : rw EDX, rw EAX */
{{ OP_R | OP_W, REG_DWORD_OFFSET + 2 },
{ OP_R | OP_W, REG_DWORD_OFFSET }, {0}}; /* rdtsc */
static op_implicit_list_t list_rep[] =
/* F3, F2 ... : REP : rw ECX */
{{ OP_R | OP_W, REG_DWORD_OFFSET + 1 }, {0}};/* rep */
static op_implicit_list_t list_rsm[] =
/* 0F AA : RSM : r CR4, r CR0 */
{{ OP_R, REG_CTRL_OFFSET + 4 },
{ OP_R, REG_CTRL_OFFSET }, {0}}; /* rsm */
static op_implicit_list_t list_sahf[] =
/* 9E : SAHF : r ah, rw eflags (set SF ZF AF PF CF) */
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* sahf */
static op_implicit_list_t list_sgdt[] =
/* 0F : SGDT : r gdtr */
/* TODO: finish this! */
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* sgdt */
static op_implicit_list_t list_sidt[] =
/* 0F : SIDT : r idtr */
/* TODO: finish this! */
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* sidt */
static op_implicit_list_t list_sldt[] =
/* 0F : SLDT : r ldtr */
/* TODO: finish this! */
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* sldt */
static op_implicit_list_t list_smsw[] =
/* 0F : SMSW : r CR0 */
/* TODO: finish this! */
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* smsw */
static op_implicit_list_t list_stmxcsr[] =
/* 0F AE : STMXCSR : r MXCSR */
/* TODO: finish this! */
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* stmxcsr */
static op_implicit_list_t list_str[] =
/* 0F 00 : STR : r TR (task register) */
/* TODO: finish this! */
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* str */
static op_implicit_list_t list_sysenter[] =
/* 0F 34 : SYSENTER : w cs, w eip, w ss, w esp, r CR0, w eflags
* r sysenter_cs_msr, sysenter_esp_msr, sysenter_eip_msr */
/* TODO: finish this! */
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* sysenter */
static op_implicit_list_t list_sysexit[] =
/* 0F 35 : SYSEXIT : r edx, r ecx, w cs, w eip, w ss, w esp
* r sysenter_cs_msr */
/* TODO: finish this! */
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* sysexit */
static op_implicit_list_t list_wrmsr[] =
/* 0F 30 : WRMST : r edx, r eax, r ecx */
/* TODO: finish this! */
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* wrmsr */
static op_implicit_list_t list_xlat[] =
/* D7 : XLAT : rw al r ebx (ptr) */
/* TODO: finish this! */
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* xlat */
/* TODO:
* monitor 0f 01 c8 eax OP_R ecx OP_R edx OP_R
* mwait 0f 01 c9 eax OP_R ecx OP_R
*/
static op_implicit_list_t list_monitor[] =
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* monitor */
static op_implicit_list_t list_mwait[] =
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* mwait */
op_implicit_list_t *op_implicit_list[] = {
/* This is a list of implicit operands which are read/written by
* various x86 instructions. Note that modifications to the stack
* register are mentioned here, but that additional information on
* the effect an instruction has on the stack is contained in the
* x86_insn_t 'stack_mod' and 'stack_mod_val' fields. Use of the
* eflags register, i.e. setting, clearing, and testing flags, is
* not recorded here but rather in the flags_set and flags_tested
* fields of the x86_insn_t.*/
NULL,
list_aaa, list_aad, list_call, list_cbw, /* 1 - 4 */
list_cwde, list_clts, list_cmpxchg, list_cmpxchgb, /* 5 - 8 */
list_cmpxchg8b, list_cpuid, list_cwd, list_daa, /* 9 - 12 */
list_idiv, list_div, list_enter, list_f2xm1, /* 13 - 16 */
list_fcom, list_fpatan, list_fprem, list_faddp, /* 17 - 20 */
list_fucompp, list_imul, list_mul, list_lahf, /* 21 - 24 */
list_ldmxcsr, list_leave, list_lgdt, list_lidt, /* 25 - 28 */
list_lldt, list_lmsw, list_loop, list_ltr, /* 29 - 32 */
list_pop, list_popad, list_popfd, list_pushad, /* 33 - 36 */
list_pushfd, list_rdmsr, list_rdpmc, list_rdtsc, /* 37 - 40 */
/* NOTE: 'REP' is a hack since it is a prefix: if its position
* in the table changes, then change IDX_IMPLICIT_REP in the .h */
list_rep, list_rsm, list_sahf, list_sgdt, /* 41 - 44 */
list_sidt, list_sldt, list_smsw, list_stmxcsr, /* 45 - 48 */
list_str, list_sysenter, list_sysexit, list_wrmsr, /* 49 - 52 */
list_xlat, list_monitor, list_mwait, /* 53 - 55*/
NULL /* end of list */
};
#define LAST_IMPL_IDX 55
static void handle_impl_reg( x86_op_t *op, uint32_t val ) {
x86_reg_t *reg = &op->data.reg;
op->type = op_register;
ia32_handle_register( reg, (unsigned int) val );
switch (reg->size) {
case 1:
op->datatype = op_byte; break;
case 2:
op->datatype = op_word; break;
case 4:
op->datatype = op_dword; break;
case 8:
op->datatype = op_qword; break;
case 10:
op->datatype = op_extreal; break;
case 16:
op->datatype = op_dqword; break;
}
return;
}
/* 'impl_idx' is the value from the opcode table: between 1 and LAST_IMPL_IDX */
/* returns number of operands added */
unsigned int Ia32_Decoder::ia32_insn_implicit_ops( unsigned int impl_idx ) {
op_implicit_list_t *list;
x86_oplist_t * existing=0;
x86_op_t *op;
unsigned int num = 0;
if (! impl_idx || impl_idx > LAST_IMPL_IDX ) {
return 0;
}
for ( list = op_implicit_list[impl_idx]; list->type; list++, num++ ) {
enum x86_op_access access = (enum x86_op_access) OP_PERM(list->type);
x86_op_flags flags;
flags.whole = (OP_FLAGS(list->type) >> 12);
op = NULL;
/* In some cases (MUL), EAX is an implicit operand hardcoded in
* the instruction without being explicitly listed in assembly.
* For this situation, find the hardcoded operand and add the
* implied flag rather than adding a new implicit operand. */
existing=0;
if (ia32_true_register_id(list->operand) == REG_DWORD_OFFSET) {
for ( existing = m_decoded->operands; existing; existing = existing->next ) {
if (existing->op.type == op_register &&
existing->op.data.reg.id == list->operand) {
op = &existing->op;
break;
}
}
}
if (!op) {
op = m_decoded->x86_operand_new();
/* all implicit operands are registers */
handle_impl_reg( op, list->operand );
/* decrement the 'explicit count' incremented by default in
* x86_operand_new */
m_decoded->explicit_count = m_decoded->explicit_count -1;
}
if (!op) {
return num; /* gah! return early */
}
op->access = x86_op_access((int)op->access | (int)access);
op->flags.whole |= flags.whole;
op->flags.op_implied=true;
}
return num;
}

13
3rd_party/libdisasm/ia32_implicit.h vendored Normal file
View File

@ -0,0 +1,13 @@
#ifndef IA32_IMPLICIT_H
#define IA32_IMPLICIT_H
#include "libdis.h"
/* OK, this is a hack to deal with prefixes having implicit operands...
* thought I had removed all the old hackishness ;( */
#define IDX_IMPLICIT_REP 41 /* change this if the table changes! */
//unsigned int ia32_insn_implicit_ops( x86_insn_t *insn, unsigned int impl_idx );
#endif

624
3rd_party/libdisasm/ia32_insn.cpp vendored Normal file
View File

@ -0,0 +1,624 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cassert>
#include "qword.h"
#include "ia32_insn.h"
#include "ia32_opcode_tables.h"
#include "ia32_reg.h"
#include "ia32_operand.h"
#include "ia32_implicit.h"
#include "ia32_settings.h"
#include "libdis.h"
extern ia32_table_desc_t ia32_tables[];
extern ia32_settings_t ia32_settings;
#define IS_SP( op ) (op->type == op_register && \
(op->data.reg.id == REG_ESP_INDEX || \
op->data.reg.alias == REG_ESP_INDEX) )
#define IS_IMM( op ) (op->type == op_immediate )
#ifdef WIN32
# define INLINE
#else
# define INLINE inline
#endif
/* for calculating stack modification based on an operand */
INLINE int32_t x86_op_t::long_from_operand() {
if (! IS_IMM(this) ) {
return 0L;
}
switch ( datatype ) {
case op_byte:
return (int32_t) data.sbyte;
case op_word:
return (int32_t) data.sword;
case op_qword:
return (int32_t) data.sqword;
case op_dword:
return data.sdword;
default:
/* these are not used in stack insn */
break;
}
return 0L;
}
/* determine what this insn does to the stack */
void Ia32_Decoder::ia32_stack_mod() {
x86_op_t *dest, *src = NULL;
assert(this);
if (! m_decoded->operands ) {
return;
}
dest = &m_decoded->operands->op;
if ( dest ) {
src = &m_decoded->operands->next->op;
}
m_decoded->stack_mod = 0;
m_decoded->stack_mod_val = 0;
switch ( m_decoded->type ) {
case insn_call:
case insn_callcc:
m_decoded->stack_mod = 1;
m_decoded->stack_mod_val = m_decoded->addr_size * -1;
break;
case insn_push:
m_decoded->stack_mod = 1;
m_decoded->stack_mod_val = m_decoded->addr_size * -1;
break;
case insn_return:
m_decoded->stack_mod = 1;
m_decoded->stack_mod_val = m_decoded->addr_size;
case insn_int: case insn_intcc:
case insn_iret:
break;
case insn_pop:
m_decoded->stack_mod = 1;
if (! IS_SP( dest ) ) {
m_decoded->stack_mod_val = m_decoded->op_size;
} /* else we don't know the stack change in a pop esp */
break;
case insn_enter:
m_decoded->stack_mod = 1;
m_decoded->stack_mod_val = 0; /* TODO : FIX */
break;
case insn_leave:
m_decoded->stack_mod = 1;
m_decoded->stack_mod_val = 0; /* TODO : FIX */
break;
case insn_pushregs:
m_decoded->stack_mod = 1;
m_decoded->stack_mod_val = 0; /* TODO : FIX */
break;
case insn_popregs:
m_decoded->stack_mod = 1;
m_decoded->stack_mod_val = 0; /* TODO : FIX */
break;
case insn_pushflags:
m_decoded->stack_mod = 1;
m_decoded->stack_mod_val = 0; /* TODO : FIX */
break;
case insn_popflags:
m_decoded->stack_mod = 1;
m_decoded->stack_mod_val = 0; /* TODO : FIX */
break;
case insn_add:
if ( IS_SP( dest ) ) {
m_decoded->stack_mod = 1;
m_decoded->stack_mod_val = src->long_from_operand();
}
break;
case insn_sub:
if ( IS_SP( dest ) ) {
m_decoded->stack_mod = 1;
m_decoded->stack_mod_val = src->long_from_operand();
m_decoded->stack_mod_val *= -1;
}
break;
case insn_inc:
if ( IS_SP( dest ) ) {
m_decoded->stack_mod = 1;
m_decoded->stack_mod_val = 1;
}
break;
case insn_dec:
if ( IS_SP( dest ) ) {
m_decoded->stack_mod = 1;
m_decoded->stack_mod_val = 1;
}
break;
case insn_mov: case insn_movcc:
case insn_xchg: case insn_xchgcc:
case insn_mul: case insn_div:
case insn_shl: case insn_shr:
case insn_rol: case insn_ror:
case insn_and: case insn_or:
case insn_not: case insn_neg:
case insn_xor:
if ( IS_SP( dest ) ) {
m_decoded->stack_mod = 1;
}
break;
default:
break;
}
if (! strcmp("enter", m_decoded->mnemonic) ) {
m_decoded->stack_mod = 1;
} else if (! strcmp("leave", m_decoded->mnemonic) ) {
m_decoded->stack_mod = 1;
}
/* for mov, etc we return 0 -- unknown stack mod */
return;
}
/* get the cpu details for this insn from cpu flags int */
void Ia32_Decoder::ia32_handle_cpu( unsigned int cpu ) {
cpu = (enum x86_insn_cpu) CPU_MODEL(cpu);
m_decoded->isa = (enum x86_insn_isa) ((ISA_SUBSET(cpu)) >> 16);
return;
}
/* handle mnemonic type and group */
void Ia32_Decoder::ia32_handle_mnemtype(unsigned int mnemtype) {
unsigned int type = mnemtype & ~INS_FLAG_MASK;
m_decoded->group = (enum x86_insn_t::x86_insn_group) ((INS_GROUP(type)) >> 12);
m_decoded->type = (enum x86_insn_type) INS_TYPE(type);
return;
}
void Ia32_Decoder::ia32_handle_notes(unsigned int notes) {
m_decoded->note = (enum x86_insn_note) notes;
return;
}
void Ia32_Decoder::ia32_handle_eflags( unsigned int eflags) {
unsigned int flags;
/* handle flags effected */
flags = INS_FLAGS_TEST(eflags);
/* handle weird OR cases */
/* these are either JLE (ZF | SF<>OF) or JBE (CF | ZF) */
if (flags & INS_TEST_OR) {
flags &= ~INS_TEST_OR;
if ( flags & INS_TEST_ZERO ) {
flags &= ~INS_TEST_ZERO;
if ( flags & INS_TEST_CARRY ) {
flags &= ~INS_TEST_CARRY ;
flags |= (int)insn_carry_or_zero_set;
} else if ( flags & INS_TEST_SFNEOF ) {
flags &= ~INS_TEST_SFNEOF;
flags |= (int)insn_zero_set_or_sign_ne_oflow;
}
}
}
m_decoded->flags_tested = (enum x86_flag_status) flags;
m_decoded->flags_set = (enum x86_flag_status) (INS_FLAGS_SET(eflags) >> 16);
return;
}
void Ia32_Decoder::ia32_handle_prefix( unsigned int prefixes ) {
m_decoded->prefix = (enum x86_insn_prefix) (prefixes & PREFIX_MASK); // >> 20;
if (! (m_decoded->prefix & PREFIX_PRINT_MASK) ) {
/* no printable prefixes */
m_decoded->prefix = insn_no_prefix;
}
/* concat all prefix strings */
if ( (unsigned int)m_decoded->prefix & PREFIX_LOCK ) {
strncat(m_decoded->prefix_string, "lock ", 32 -
strlen(m_decoded->prefix_string));
}
if ( (unsigned int)m_decoded->prefix & PREFIX_REPNZ ) {
strncat(m_decoded->prefix_string, "repnz ", 32 -
strlen(m_decoded->prefix_string));
} else if ( (unsigned int)m_decoded->prefix & PREFIX_REPZ ) {
strncat(m_decoded->prefix_string, "repz ", 32 -
strlen(m_decoded->prefix_string));
}
return;
}
static void reg_32_to_16( x86_op_t *op, x86_insn_t *insn, void *arg ) {
/* if this is a 32-bit register and it is a general register ... */
if ( op->type == op_register && op->data.reg.size == 4 &&
(op->data.reg.type & reg_gen) ) {
/* WORD registers are 8 indices off from DWORD registers */
ia32_handle_register( &(op->data.reg),
op->data.reg.id + 8 );
}
}
void Ia32_Decoder::handle_insn_metadata( ia32_insn_t *raw_insn ) {
ia32_handle_mnemtype( raw_insn->mnem_flag );
ia32_handle_notes( raw_insn->notes );
ia32_handle_eflags( raw_insn->flags_effected );
ia32_handle_cpu(raw_insn->cpu );
ia32_stack_mod();
}
size_t Ia32_Decoder::ia32_decode_insn( unsigned char *buf, size_t buf_len,
ia32_insn_t *raw_insn, unsigned int prefixes ) {
size_t size, op_size;
unsigned char modrm;
/* this should never happen, but just in case... */
if ( raw_insn->mnem_flag == INS_INVALID ) {
return 0;
}
if (ia32_settings.options & opt_16_bit) {
op_size = ( prefixes & PREFIX_OP_SIZE ) ? 4 : 2;
m_decoded->addr_size = ( prefixes & PREFIX_ADDR_SIZE ) ? 4 : 2;
} else {
op_size = ( prefixes & PREFIX_OP_SIZE ) ? 2 : 4;
m_decoded->addr_size = ( prefixes & PREFIX_ADDR_SIZE ) ? 2 : 4;
}
/* ++++ 1. Copy mnemonic and mnemonic-flags to CODE struct */
if ((ia32_settings.options & opt_att_mnemonics) && raw_insn->mnemonic_att[0]) {
strncpy( m_decoded->mnemonic, raw_insn->mnemonic_att, 16 );
}
else {
strncpy( m_decoded->mnemonic, raw_insn->mnemonic, 16 );
}
ia32_handle_prefix( prefixes );
handle_insn_metadata( raw_insn );
/* prefetch the next byte in case it is a modr/m byte -- saves
* worrying about whether the 'mod/rm' operand or the 'reg' operand
* occurs first */
modrm = GET_BYTE( buf, buf_len );
/* ++++ 2. Decode Explicit Operands */
/* Intel uses up to 3 explicit operands in its instructions;
* the first is 'dest', the second is 'src', and the third
* is an additional source value (usually an immediate value,
* e.g. in the MUL instructions). These three explicit operands
* are encoded in the opcode tables, even if they are not used
* by the instruction. Additional implicit operands are stored
* in a supplemental table and are handled later. */
op_size = ia32_decode_operand( buf, buf_len, raw_insn->dest,
raw_insn->dest_flag, prefixes, modrm );
/* advance buffer, increase size if necessary */
buf += op_size;
buf_len -= op_size;
size = op_size;
op_size = ia32_decode_operand( buf, buf_len, raw_insn->src,
raw_insn->src_flag, prefixes, modrm );
buf += op_size;
buf_len -= op_size;
size += op_size;
op_size = ia32_decode_operand( buf, buf_len, raw_insn->aux,
raw_insn->aux_flag, prefixes, modrm );
size += op_size;
/* ++++ 3. Decode Implicit Operands */
/* apply implicit operands */
ia32_insn_implicit_ops( raw_insn->implicit_ops );
/* we have one small inelegant hack here, to deal with
* the two prefixes that have implicit operands. If Intel
* adds more, we'll change the algorithm to suit :) */
if ( (prefixes & PREFIX_REPZ) || (prefixes & PREFIX_REPNZ) ) {
ia32_insn_implicit_ops( IDX_IMPLICIT_REP );
}
/* 16-bit hack: foreach operand, if 32-bit reg, make 16-bit reg */
//TODO: find a better way to handle this
if ( op_size == 2 ) {
m_decoded->x86_operand_foreach( reg_32_to_16, NULL, op_any );
}
return size;
}
/* convenience routine */
#define USES_MOD_RM(flag) \
(flag == ADDRMETH_E || flag == ADDRMETH_M || flag == ADDRMETH_Q || \
flag == ADDRMETH_W || flag == ADDRMETH_R)
static int uses_modrm_flag( unsigned int flag ) {
unsigned int meth;
if ( flag == ARG_NONE ) {
return 0;
}
meth = (flag & ADDRMETH_MASK);
if ( USES_MOD_RM(meth) ) {
return 1;
}
return 0;
}
/* This routine performs the actual byte-by-byte opcode table lookup.
* Originally it was pretty simple: get a byte, adjust it to a proper
* index into the table, then check the table row at that index to
* determine what to do next. But is anything that simple with Intel?
* This is now a huge, convoluted mess, mostly of bitter comments. */
/* buf: pointer to next byte to read from stream
* buf_len: length of buf
* table: index of table to use for lookups
* raw_insn: output pointer that receives opcode definition
* prefixes: output integer that is encoded with prefixes in insn
* returns : number of bytes consumed from stream during lookup */
size_t ia32_table_lookup( unsigned char *buf, size_t buf_len,
unsigned int table, ia32_insn_t **raw_insn,
unsigned int *prefixes ) {
unsigned char *next, op = buf[0]; /* byte value -- 'opcode' */
size_t size = 1, sub_size = 0, next_len;
ia32_table_desc_t *table_desc;
unsigned int subtable, prefix = 0, recurse_table = 0;
table_desc = &ia32_tables[table];
op = GET_BYTE( buf, buf_len );
if ( table_desc->type == tbl_fpu && op > table_desc->maxlim) {
/* one of the fucking FPU tables out of the 00-BH range */
/* OK,. this is a bit of a hack -- the proper way would
* have been to use subtables in the 00-BF FPU opcode tables,
* but that is rather wasteful of space... */
table_desc = &ia32_tables[table +1];
}
/* PERFORM TABLE LOOKUP */
/* ModR/M trick: shift extension bits into lowest bits of byte */
/* Note: non-ModR/M tables have a shift value of 0 */
op >>= table_desc->shift;
/* ModR/M trick: mask out high bits to turn extension into an index */
/* Note: non-ModR/M tables have a mask value of 0xFF */
op &= table_desc->mask;
/* Sparse table trick: check that byte is <= max value */
/* Note: full (256-entry) tables have a maxlim of 155 */
if ( op > table_desc->maxlim ) {
/* this is a partial table, truncated at the tail,
and op is out of range! */
return INVALID_INSN;
}
/* Sparse table trick: check that byte is >= min value */
/* Note: full (256-entry) tables have a minlim of 0 */
if ( table_desc->minlim > op ) {
/* this is a partial table, truncated at the head,
and op is out of range! */
return INVALID_INSN;
}
/* adjust op to be an offset from table index 0 */
op -= table_desc->minlim;
/* Yay! 'op' is now fully adjusted to be an index into 'table' */
*raw_insn = &(table_desc->table[op]);
//printf("BYTE %X TABLE %d OP %X\n", buf[0], table, op );
if ( (*raw_insn)->mnem_flag & INS_FLAG_PREFIX ) {
prefix = (*raw_insn)->mnem_flag & PREFIX_MASK;
}
/* handle escape to a multibyte/coproc/extension/etc table */
/* NOTE: if insn is a prefix and has a subtable, then we
* only recurse if this is the first prefix byte --
* that is, if *prefixes is 0.
* NOTE also that suffix tables are handled later */
subtable = (*raw_insn)->table;
if ( subtable && ia32_tables[subtable].type != tbl_suffix &&
(! prefix || ! *prefixes) ) {
if ( ia32_tables[subtable].type == tbl_ext_ext ||
ia32_tables[subtable].type == tbl_fpu_ext ) {
/* opcode extension: reuse current byte in buffer */
next = buf;
next_len = buf_len;
} else {
/* "normal" opcode: advance to next byte in buffer */
if ( buf_len > 1 ) {
next = &buf[1];
next_len = buf_len - 1;
}
else {
// buffer is truncated
return INVALID_INSN;
}
}
/* we encountered a multibyte opcode: recurse using the
* table specified in the opcode definition */
sub_size = ia32_table_lookup( next, next_len, subtable,
raw_insn, prefixes );
/* SSE/prefix hack: if the original opcode def was a
* prefix that specified a subtable, and the subtable
* lookup returned a valid insn, then we have encountered
* an SSE opcode definition; otherwise, we pretend we
* never did the subtable lookup, and deal with the
* prefix normally later */
if ( prefix && ( sub_size == INVALID_INSN ||
INS_TYPE((*raw_insn)->mnem_flag) == INS_INVALID ) ) {
/* this is a prefix, not an SSE insn :
* lookup next byte in main table,
* subsize will be reset during the
* main table lookup */
recurse_table = 1;
} else {
/* this is either a subtable (two-byte) insn
* or an invalid insn: either way, set prefix
* to NULL and end the opcode lookup */
prefix = 0;
// short-circuit lookup on invalid insn
if (sub_size == INVALID_INSN) return INVALID_INSN;
}
} else if ( prefix ) {
recurse_table = 1;
}
/* by default, we assume that we have the opcode definition,
* and there is no need to recurse on the same table, but
* if we do then a prefix was encountered... */
if ( recurse_table ) {
/* this must have been a prefix: use the same table for
* lookup of the next byte */
sub_size = ia32_table_lookup( &buf[1], buf_len - 1, table,
raw_insn, prefixes );
// short-circuit lookup on invalid insn
if (sub_size == INVALID_INSN) return INVALID_INSN;
/* a bit of a hack for branch hints */
if ( prefix & BRANCH_HINT_MASK ) {
if ( INS_GROUP((*raw_insn)->mnem_flag) == INS_EXEC ) {
/* segment override prefixes are invalid for
* all branch instructions, so delete them */
prefix &= ~PREFIX_REG_MASK;
} else {
prefix &= ~BRANCH_HINT_MASK;
}
}
/* apply prefix to instruction */
/* TODO: implement something enforcing prefix groups */
(*prefixes) |= prefix;
}
/* if this lookup was in a ModR/M table, then an opcode byte is
* NOT consumed: subtract accordingly. NOTE that if none of the
* operands used the ModR/M, then we need to consume the byte
* here, but ONLY in the 'top-level' opcode extension table */
if ( table_desc->type == tbl_ext_ext ) {
/* extensions-to-extensions never consume a byte */
--size;
} else if ( (table_desc->type == tbl_extension ||
table_desc->type == tbl_fpu ||
table_desc->type == tbl_fpu_ext ) &&
/* extensions that have an operand encoded in ModR/M
* never consume a byte */
(uses_modrm_flag((*raw_insn)->dest_flag) ||
uses_modrm_flag((*raw_insn)->src_flag) ) ) {
--size;
}
size += sub_size;
return size;
}
size_t Ia32_Decoder::handle_insn_suffix( unsigned char *buf, size_t buf_len,
ia32_insn_t *raw_insn ) {
ia32_table_desc_t *table_desc;
ia32_insn_t *sfx_insn;
size_t size;
unsigned int prefixes = 0;
table_desc = &ia32_tables[raw_insn->table];
size = ia32_table_lookup( buf, buf_len, raw_insn->table, &sfx_insn,
&prefixes );
if (size == INVALID_INSN || sfx_insn->mnem_flag == INS_INVALID ) {
return 0;
}
strncpy( m_decoded->mnemonic, sfx_insn->mnemonic, 16 );
handle_insn_metadata( sfx_insn );
return 1;
}
/* invalid instructions are handled by returning 0 [error] from the
* function, setting the size of the insn to 1 byte, and copying
* the byte at the start of the invalid insn into the x86_insn_t.
* if the caller is saving the x86_insn_t for invalid instructions,
* instead of discarding them, this will maintain a consistent
* address space in the x86_insn_ts */
/* this function is called by the controlling disassembler, so its name and
* calling convention cannot be changed */
/* buf points to the loc of the current opcode (start of the
* instruction) in the instruction stream. The instruction
* stream is assumed to be a buffer of bytes read directly
* from the file for the purpose of disassembly; a mem-mapped
* file is ideal for * this.
* insn points to a code structure to be filled by instr_decode
* returns the size of the decoded instruction in bytes */
size_t Ia32_Decoder::ia32_disasm_addr( unsigned char * buf, size_t buf_len) {
ia32_insn_t *raw_insn = NULL;
unsigned int prefixes = 0;
size_t _size, sfx_size;
assert(m_decoded);
if ( (ia32_settings.options & opt_ignore_nulls) && buf_len > 3 &&
!buf[0] && !buf[1] && !buf[2] && !buf[3]) {
/* IF IGNORE_NULLS is set AND
* first 4 bytes in the intruction stream are NULL
* THEN return 0 (END_OF_DISASSEMBLY) */
/* TODO: set errno */
m_decoded->make_invalid(buf);
return 0; /* 4 00 bytes in a row? This isn't code! */
}
/* Perform recursive table lookup starting with main table (0) */
_size = ia32_table_lookup(buf, buf_len, idx_Main, &raw_insn, &prefixes);
if ( _size == INVALID_INSN || _size > buf_len || raw_insn->mnem_flag == INS_INVALID ) {
m_decoded->make_invalid( buf );
/* TODO: set errno */
return 0;
}
/* We now have the opcode itself figured out: we can decode
* the rest of the instruction. */
_size += ia32_decode_insn( &buf[_size], buf_len - _size, raw_insn, prefixes );
if ( raw_insn->mnem_flag & INS_FLAG_SUFFIX ) {
/* AMD 3DNow! suffix -- get proper operand type here */
sfx_size = handle_insn_suffix( &buf[_size], buf_len - _size,
raw_insn);
if (! sfx_size ) {
/* TODO: set errno */
m_decoded->make_invalid( buf );
return 0;
}
_size += sfx_size;
}
if (! _size ) {
/* invalid insn */
m_decoded->make_invalid( buf );
return 0;
}
m_decoded->size = _size;
return _size; /* return size of instruction in bytes */
}

509
3rd_party/libdisasm/ia32_insn.h vendored Normal file
View File

@ -0,0 +1,509 @@
#ifndef IA32_INSN_H
#define IA32_INSN_H
/* this file contains the structure of opcode definitions and the
* constants they use */
#include <sys/types.h>
#include "libdis.h"
#define GET_BYTE( buf, buf_len ) buf_len ? *buf : 0
#define OP_SIZE_16 1
#define OP_SIZE_32 2
#define ADDR_SIZE_16 4
#define ADDR_SIZE_32 8
#define MAX_INSTRUCTION_SIZE 20
/* invalid instructions are handled by returning 0 [error] from the
* function, setting the size of the insn to 1 byte, and copying
* the byte at the start of the invalid insn into the x86_insn_t.
* if the caller is saving the x86_insn_t for invalid instructions,
* instead of discarding them, this will maintain a consistent
* address space in the x86_insn_ts */
#define INVALID_INSN ((size_t) -1) /* return value for invalid insn */
#define MAKE_INVALID( i, buf ) \
strcpy( i->mnemonic, "invalid" ); \
x86_oplist_free( i ); \
i->size = 1; \
i->group = insn_none; \
i->type = insn_invalid; \
memcpy( i->bytes, buf, 1 );
size_t ia32_disasm_addr( unsigned char * buf, size_t buf_len,
x86_insn_t *insn);
/* --------------------------------------------------------- Table Lookup */
/* IA32 Instruction defintion for ia32_opcodes.c */
struct ia32_insn_t{
unsigned int table; /* escape to this sub-table */
unsigned int mnem_flag; /* Flags referring to mnemonic */
unsigned int notes; /* Notes for this instruction */
unsigned int dest_flag, src_flag, aux_flag; /* and for specific operands */
unsigned int cpu; /* minimumCPU [AND with clocks?? */
char mnemonic[16]; /* buffers for building instruction */
char mnemonic_att[16]; /* at&t style mnemonic name */
int32_t dest;
int32_t src;
int32_t aux;
unsigned int flags_effected;
unsigned int implicit_ops; /* implicit operands */
};
/* --------------------------------------------------------- Prefixes */
/* Prefix Flags */
/* Prefixes, same order as in the manual */
/* had to reverse the values of the first three as they were entered into
* libdis.h incorrectly. */
#define PREFIX_LOCK 0x0004
#define PREFIX_REPNZ 0x0002
#define PREFIX_REPZ 0x0001
#define PREFIX_OP_SIZE 0x0010
#define PREFIX_ADDR_SIZE 0x0020
#define PREFIX_CS 0x0100
#define PREFIX_SS 0x0200
#define PREFIX_DS 0x0300
#define PREFIX_ES 0x0400
#define PREFIX_FS 0x0500
#define PREFIX_GS 0x0600
#define PREFIX_TAKEN 0x1000 /* branch taken */
#define PREFIX_NOTTAKEN 0x2000 /* branch not taken */
#define PREFIX_REG_MASK 0x0F00
#define BRANCH_HINT_MASK 0x3000
#define PREFIX_PRINT_MASK 0x000F /* printable prefixes */
#define PREFIX_MASK 0xFFFF
/* ---------------------------------------------------------- CPU Type */
#define cpu_8086 0x0001
#define cpu_80286 0x0002
#define cpu_80386 0x0003
#define cpu_80387 0x0004 /* originally these were a co-proc */
#define cpu_80486 0x0005
#define cpu_PENTIUM 0x0006
#define cpu_PENTPRO 0x0007
#define cpu_PENTIUM2 0x0008
#define cpu_PENTIUM3 0x0009
#define cpu_PENTIUM4 0x000A
#define cpu_K6 0x0010
#define cpu_K7 0x0020
#define cpu_ATHLON 0x0030
#define CPU_MODEL_MASK 0xFFFF
#define CPU_MODEL(cpu) (cpu & CPU_MODEL_MASK)
/* intel instruction subsets */
#define isa_GP 0x10000 /* General Purpose Instructions */
#define isa_FPU 0x20000 /* FPU instructions */
#define isa_FPUMGT 0x30000 /* FPU/SIMD Management */
#define isa_MMX 0x40000 /* MMX */
#define isa_SSE1 0x50000 /* SSE */
#define isa_SSE2 0x60000 /* SSE 2 */
#define isa_SSE3 0x70000 /* SSE 3 */
#define isa_3DNOW 0x80000 /* AMD 3d Now */
#define isa_SYS 0x90000 /* System Instructions */
#define ISA_SUBSET_MASK 0xFFFF0000
#define ISA_SUBSET(isa) (isa & ISA_SUBSET_MASK)
/* ------------------------------------------------------ Operand Decoding */
#define ARG_NONE 0
/* Using a mask allows us to store info such as OP_SIGNED in the
* operand flags field */
#define OPFLAGS_MASK 0x0000FFFF
/* Operand Addressing Methods, per intel manual */
#define ADDRMETH_MASK 0x00FF0000
/* note: for instructions with implied operands, use no ADDRMETH */
#define ADDRMETH_A 0x00010000
#define ADDRMETH_C 0x00020000
#define ADDRMETH_D 0x00030000
#define ADDRMETH_E 0x00040000
#define ADDRMETH_F 0x00050000
#define ADDRMETH_G 0x00060000
#define ADDRMETH_I 0x00070000
#define ADDRMETH_J 0x00080000
#define ADDRMETH_M 0x00090000
#define ADDRMETH_O 0x000A0000
#define ADDRMETH_P 0x000B0000
#define ADDRMETH_Q 0x000C0000
#define ADDRMETH_R 0x000D0000
#define ADDRMETH_S 0x000E0000
#define ADDRMETH_T 0x000F0000
#define ADDRMETH_V 0x00100000
#define ADDRMETH_W 0x00110000
#define ADDRMETH_X 0x00120000
#define ADDRMETH_Y 0x00130000
#define ADDRMETH_RR 0x00140000 /* gen reg hard-coded in opcode */
#define ADDRMETH_RS 0x00150000 /* seg reg hard-coded in opcode */
#define ADDRMETH_RT 0x00160000 /* test reg hard-coded in opcode */
#define ADDRMETH_RF 0x00170000 /* fpu reg hard-coded in opcode */
#define ADDRMETH_II 0x00180000 /* immediate hard-coded in opcode */
#define ADDRMETH_PP 0x00190000 /* mm reg ONLY in modr/m field */
#define ADDRMETH_VV 0x001A0000 /* xmm reg ONLY in mod/rm field */
/* Operand Types, per intel manual */
#define OPTYPE_MASK 0xFF000000
#define OPTYPE_a 0x01000000 /* BOUND: h:h or w:w */
#define OPTYPE_b 0x02000000 /* byte */
#define OPTYPE_c 0x03000000 /* byte or word */
#define OPTYPE_d 0x04000000 /* word */
#define OPTYPE_dq 0x05000000 /* qword */
#define OPTYPE_p 0x06000000 /* 16:16 or 16:32 pointer */
#define OPTYPE_pi 0x07000000 /* dword MMX reg */
#define OPTYPE_ps 0x08000000 /* 128-bit single fp */
#define OPTYPE_q 0x09000000 /* dword */
#define OPTYPE_s 0x0A000000 /* 6-byte descriptor */
#define OPTYPE_ss 0x0B000000 /* scalar of 128-bit single fp */
#define OPTYPE_si 0x0C000000 /* word general register */
#define OPTYPE_v 0x0D000000 /* hword or word */
#define OPTYPE_w 0x0E000000 /* hword */
#define OPTYPE_m 0x0F000000 /* to handle LEA */
#define OPTYPE_none 0xFF000000 /* no valid operand size, INVLPG */
/* custom ones for FPU instructions */
#define OPTYPE_fs 0x10000000 /* pointer to single-real*/
#define OPTYPE_fd 0x20000000 /* pointer to double real */
#define OPTYPE_fe 0x30000000 /* pointer to extended real */
#define OPTYPE_fb 0x40000000 /* pointer to packed BCD */
#define OPTYPE_fv 0x50000000 /* pointer to FPU env: 14|28-bytes */
#define OPTYPE_ft 0x60000000 /* pointer to FPU state: 94|108-bytes */
#define OPTYPE_fx 0x70000000 /* pointer to FPU regs: 512 bites */
#define OPTYPE_fp 0x80000000 /* general fpu register: dbl ext */
/* SSE2 operand types */
#define OPTYPE_sd 0x90000000 /* scalar of 128-bit double fp */
#define OPTYPE_pd 0xA0000000 /* 128-bit double fp */
/* ---------------------------------------------- Opcode Table Descriptions */
/* the table type describes how to handle byte/size increments before
* and after lookup. Some tables re-use the current byte, others
* consume a byte only if the ModR/M encodes no operands, etc */
enum ia32_tbl_type_id {
tbl_opcode = 0, /* standard opcode table: no surprises */
tbl_prefix, /* Prefix Override, e.g. 66/F2/F3 */
tbl_suffix, /* 3D Now style */
tbl_extension, /* ModR/M extension: 00-FF -> 00-07 */
tbl_ext_ext, /* extension of modr/m using R/M field */
tbl_fpu, /* fpu table: 00-BF -> 00-0F */
tbl_fpu_ext /* fpu extension : C0-FF -> 00-1F */
};
/* How it works:
* Bytes are 'consumed' if the next table lookup requires that the byte
* pointer be advanced in the instruction stream. 'Does not consume' means
* that, when the lookup function recurses, the same byte it re-used in the
* new table. It also means that size is not decremented, for example when
* a ModR/M byte is used. Note that tbl_extension (ModR/M) instructions that
* do not increase the size of an insn with their operands have a forced
3 size increase in the lookup algo. Weird, yes, confusing, yes, welcome
* to the Intel ISA. Another note: tbl_prefix is used as an override, so an
* empty insn in a prefix table causes the instruction in the original table
* to be used, rather than an invalid insn being generated.
* tbl_opcode uses current byte and consumes it
* tbl_prefix uses current byte but does not consume it
* tbl_suffix uses and consumes last byte in insn
* tbl_extension uses current byte but does not consume it
* tbl_ext_ext uses current byte but does not consume it
* tbl_fpu uses current byte and consumes it
* tbl_fpu_ext uses current byte but does not consume it
*/
/* Convenience struct for opcode tables : these will be stored in a
* 'table of tables' so we can use a table index instead of a pointer */
typedef struct { /* Assembly instruction tables */
ia32_insn_t *table; /* Pointer to table of instruction encodings */
enum ia32_tbl_type_id type;
unsigned char shift; /* amount to shift modrm byte */
unsigned char mask; /* bit mask for look up */
unsigned char minlim,maxlim; /* limits on min/max entries. */
} ia32_table_desc_t;
/* ---------------------------------------------- 'Cooked' Operand Type Info */
/* Permissions: */
#define OP_R 0x001 /* operand is READ */
#define OP_W 0x002 /* operand is WRITTEN */
#define OP_RW 0x003 /* (OP_R|OP_W): convenience macro */
#define OP_X 0x004 /* operand is EXECUTED */
#define OP_PERM_MASK 0x0000007 /* perms are NOT mutually exclusive */
#define OP_PERM( type ) (type & OP_PERM_MASK)
/* Flags */
#define OP_SIGNED 0x010 /* operand is signed */
#define OP_FLAG_MASK 0x0F0 /* mods are NOT mutually exclusive */
#define OP_FLAGS( type ) (type & OP_FLAG_MASK)
#define OP_REG_MASK 0x0000FFFF /* lower WORD is register ID */
#define OP_REGTBL_MASK 0xFFFF0000 /* higher word is register type [gen/dbg] */
#define OP_REGID( type ) (type & OP_REG_MASK)
#define OP_REGTYPE( type ) (type & OP_REGTBL_MASK)
/* ------------------------------------------'Cooked' Instruction Type Info */
/* high-bit opcode types/insn meta-types */
#define INS_FLAG_PREFIX 0x10000000 /* insn is a prefix */
#define INS_FLAG_SUFFIX 0x20000000 /* followed by a suffix byte */
#define INS_FLAG_MASK 0xFF000000
/* insn notes */
#define INS_NOTE_RING0 0x00000001 /* insn is privileged */
#define INS_NOTE_SMM 0x00000002 /* Sys Mgt Mode only */
#define INS_NOTE_SERIAL 0x00000004 /* serializes */
#define INS_NOTE_NONSWAP 0x00000008 /* insn is not swapped in att format */ // could be separate field?
#define INS_NOTE_NOSUFFIX 0x00000010 /* insn has no size suffix in att format */ // could be separate field?
//#define INS_NOTE_NMI
#define INS_INVALID 0
/* instruction groups */
#define INS_EXEC 0x1000
#define INS_ARITH 0x2000
#define INS_LOGIC 0x3000
#define INS_STACK 0x4000
#define INS_COND 0x5000
#define INS_LOAD 0x6000
#define INS_ARRAY 0x7000
#define INS_BIT 0x8000
#define INS_FLAG 0x9000
#define INS_FPU 0xA000
#define INS_TRAPS 0xD000
#define INS_SYSTEM 0xE000
#define INS_OTHER 0xF000
#define INS_GROUP_MASK 0xF000
#define INS_GROUP( type ) ( type & INS_GROUP_MASK )
/* INS_EXEC group */
#define INS_BRANCH (INS_EXEC | 0x01) /* Unconditional branch */
#define INS_BRANCHCC (INS_EXEC | 0x02) /* Conditional branch */
#define INS_CALL (INS_EXEC | 0x03) /* Jump to subroutine */
#define INS_CALLCC (INS_EXEC | 0x04) /* Jump to subroutine */
#define INS_RET (INS_EXEC | 0x05) /* Return from subroutine */
/* INS_ARITH group */
#define INS_ADD (INS_ARITH | 0x01)
#define INS_SUB (INS_ARITH | 0x02)
#define INS_MUL (INS_ARITH | 0x03)
#define INS_DIV (INS_ARITH | 0x04)
#define INS_INC (INS_ARITH | 0x05) /* increment */
#define INS_DEC (INS_ARITH | 0x06) /* decrement */
#define INS_SHL (INS_ARITH | 0x07) /* shift right */
#define INS_SHR (INS_ARITH | 0x08) /* shift left */
#define INS_ROL (INS_ARITH | 0x09) /* rotate left */
#define INS_ROR (INS_ARITH | 0x0A) /* rotate right */
#define INS_MIN (INS_ARITH | 0x0B) /* min func */
#define INS_MAX (INS_ARITH | 0x0C) /* max func */
#define INS_AVG (INS_ARITH | 0x0D) /* avg func */
#define INS_FLR (INS_ARITH | 0x0E) /* floor func */
#define INS_CEIL (INS_ARITH | 0x0F) /* ceiling func */
/* INS_LOGIC group */
#define INS_AND (INS_LOGIC | 0x01)
#define INS_OR (INS_LOGIC | 0x02)
#define INS_XOR (INS_LOGIC | 0x03)
#define INS_NOT (INS_LOGIC | 0x04)
#define INS_NEG (INS_LOGIC | 0x05)
#define INS_NAND (INS_LOGIC | 0x06)
/* INS_STACK group */
#define INS_PUSH (INS_STACK | 0x01)
#define INS_POP (INS_STACK | 0x02)
#define INS_PUSHREGS (INS_STACK | 0x03) /* push register context */
#define INS_POPREGS (INS_STACK | 0x04) /* pop register context */
#define INS_PUSHFLAGS (INS_STACK | 0x05) /* push all flags */
#define INS_POPFLAGS (INS_STACK | 0x06) /* pop all flags */
#define INS_ENTER (INS_STACK | 0x07) /* enter stack frame */
#define INS_LEAVE (INS_STACK | 0x08) /* leave stack frame */
/* INS_COND group */
#define INS_TEST (INS_COND | 0x01)
#define INS_CMP (INS_COND | 0x02)
/* INS_LOAD group */
#define INS_MOV (INS_LOAD | 0x01)
#define INS_MOVCC (INS_LOAD | 0x02)
#define INS_XCHG (INS_LOAD | 0x03)
#define INS_XCHGCC (INS_LOAD | 0x04)
#define INS_CONV (INS_LOAD | 0x05) /* move and convert type */
/* INS_ARRAY group */
#define INS_STRCMP (INS_ARRAY | 0x01)
#define INS_STRLOAD (INS_ARRAY | 0x02)
#define INS_STRMOV (INS_ARRAY | 0x03)
#define INS_STRSTOR (INS_ARRAY | 0x04)
#define INS_XLAT (INS_ARRAY | 0x05)
/* INS_BIT group */
#define INS_BITTEST (INS_BIT | 0x01)
#define INS_BITSET (INS_BIT | 0x02)
#define INS_BITCLR (INS_BIT | 0x03)
/* INS_FLAG group */
#define INS_CLEARCF (INS_FLAG | 0x01) /* clear Carry flag */
#define INS_CLEARZF (INS_FLAG | 0x02) /* clear Zero flag */
#define INS_CLEAROF (INS_FLAG | 0x03) /* clear Overflow flag */
#define INS_CLEARDF (INS_FLAG | 0x04) /* clear Direction flag */
#define INS_CLEARSF (INS_FLAG | 0x05) /* clear Sign flag */
#define INS_CLEARPF (INS_FLAG | 0x06) /* clear Parity flag */
#define INS_SETCF (INS_FLAG | 0x07)
#define INS_SETZF (INS_FLAG | 0x08)
#define INS_SETOF (INS_FLAG | 0x09)
#define INS_SETDF (INS_FLAG | 0x0A)
#define INS_SETSF (INS_FLAG | 0x0B)
#define INS_SETPF (INS_FLAG | 0x0C)
#define INS_TOGCF (INS_FLAG | 0x10) /* toggle */
#define INS_TOGZF (INS_FLAG | 0x20)
#define INS_TOGOF (INS_FLAG | 0x30)
#define INS_TOGDF (INS_FLAG | 0x40)
#define INS_TOGSF (INS_FLAG | 0x50)
#define INS_TOGPF (INS_FLAG | 0x60)
/* INS_FPU */
#define INS_FMOV (INS_FPU | 0x1)
#define INS_FMOVCC (INS_FPU | 0x2)
#define INS_FNEG (INS_FPU | 0x3)
#define INS_FABS (INS_FPU | 0x4)
#define INS_FADD (INS_FPU | 0x5)
#define INS_FSUB (INS_FPU | 0x6)
#define INS_FMUL (INS_FPU | 0x7)
#define INS_FDIV (INS_FPU | 0x8)
#define INS_FSQRT (INS_FPU | 0x9)
#define INS_FCMP (INS_FPU | 0xA)
#define INS_FCOS (INS_FPU | 0xC) /* cosine */
#define INS_FLDPI (INS_FPU | 0xD) /* load pi */
#define INS_FLDZ (INS_FPU | 0xE) /* load 0 */
#define INS_FTAN (INS_FPU | 0xF) /* tanget */
#define INS_FSINE (INS_FPU | 0x10) /* sine */
#define INS_FSYS (INS_FPU | 0x20) /* misc */
/* INS_TRAP */
#define INS_TRAP (INS_TRAPS | 0x01) /* generate trap */
#define INS_TRAPCC (INS_TRAPS | 0x02) /* conditional trap gen */
#define INS_TRET (INS_TRAPS | 0x03) /* return from trap */
#define INS_BOUNDS (INS_TRAPS | 0x04) /* gen bounds trap */
#define INS_DEBUG (INS_TRAPS | 0x05) /* gen breakpoint trap */
#define INS_TRACE (INS_TRAPS | 0x06) /* gen single step trap */
#define INS_INVALIDOP (INS_TRAPS | 0x07) /* gen invalid insn */
#define INS_OFLOW (INS_TRAPS | 0x08) /* gen overflow trap */
#define INS_ICEBP (INS_TRAPS | 0x09) /* ICE breakpoint */
/* INS_SYSTEM */
#define INS_HALT (INS_SYSTEM | 0x01) /* halt machine */
#define INS_IN (INS_SYSTEM | 0x02) /* input form port */
#define INS_OUT (INS_SYSTEM | 0x03) /* output to port */
#define INS_CPUID (INS_SYSTEM | 0x04) /* identify cpu */
#define INS_LMSW (INS_SYSTEM | 0x05) /* load machine status word (CR0[3:0]) */
#define INS_SMSW (INS_SYSTEM | 0x06) /* store machine status word (CR0[3:0]) */
#define INS_CLTS (INS_SYSTEM | 0x07) /* clear task switched flag (CR0[3]) */
/* INS_OTHER */
#define INS_NOP (INS_OTHER | 0x01)
#define INS_BCDCONV (INS_OTHER | 0x02) /* convert to/from BCD */
#define INS_SZCONV (INS_OTHER | 0x03) /* convert size of operand */
#define INS_SALC (INS_OTHER | 0x04) /* set %al on carry */
#define INS_UNKNOWN (INS_OTHER | 0x05)
#define INS_TYPE_MASK 0xFFFF
#define INS_TYPE( type ) ( type & INS_TYPE_MASK )
/* flags effected by instruction */
#define INS_TEST_CARRY 0x01 /* carry */
#define INS_TEST_ZERO 0x02 /* zero/equal */
#define INS_TEST_OFLOW 0x04 /* overflow */
#define INS_TEST_DIR 0x08 /* direction */
#define INS_TEST_SIGN 0x10 /* negative */
#define INS_TEST_PARITY 0x20 /* parity */
#define INS_TEST_OR 0x40 /* used in jle */
#define INS_TEST_NCARRY 0x100 /* ! carry */
#define INS_TEST_NZERO 0x200 /* ! zero */
#define INS_TEST_NOFLOW 0x400 /* ! oflow */
#define INS_TEST_NDIR 0x800 /* ! dir */
#define INS_TEST_NSIGN 0x1000 /* ! sign */
#define INS_TEST_NPARITY 0x2000 /* ! parity */
/* SF == OF */
#define INS_TEST_SFEQOF 0x4000
/* SF != OF */
#define INS_TEST_SFNEOF 0x8000
#define INS_TEST_ALL INS_TEST_CARRY | INS_TEST_ZERO | \
INS_TEST_OFLOW | INS_TEST_SIGN | \
INS_TEST_PARITY
#define INS_SET_CARRY 0x010000 /* carry */
#define INS_SET_ZERO 0x020000 /* zero/equal */
#define INS_SET_OFLOW 0x040000 /* overflow */
#define INS_SET_DIR 0x080000 /* direction */
#define INS_SET_SIGN 0x100000 /* negative */
#define INS_SET_PARITY 0x200000 /* parity */
#define INS_SET_NCARRY 0x1000000
#define INS_SET_NZERO 0x2000000
#define INS_SET_NOFLOW 0x4000000
#define INS_SET_NDIR 0x8000000
#define INS_SET_NSIGN 0x10000000
#define INS_SET_NPARITY 0x20000000
#define INS_SET_SFEQOF 0x40000000
#define INS_SET_SFNEOF 0x80000000
#define INS_SET_ALL INS_SET_CARRY | INS_SET_ZERO | \
INS_SET_OFLOW | INS_SET_SIGN | \
INS_SET_PARITY
#define INS_TEST_MASK 0x0000FFFF
#define INS_FLAGS_TEST(x) (x & INS_TEST_MASK)
#define INS_SET_MASK 0xFFFF0000
#define INS_FLAGS_SET(x) (x & INS_SET_MASK)
#if 0
/* TODO: actually start using these */
#define X86_PAIR_NP 1 /* not pairable; execs in U */
#define X86_PAIR_PU 2 /* pairable in U pipe */
#define X86_PAIR_PV 3 /* pairable in V pipe */
#define X86_PAIR_UV 4 /* pairable in UV pipe */
#define X86_PAIR_FX 5 /* pairable with FXCH */
#define X86_EXEC_PORT_0 1
#define X86_EXEC_PORT_1 2
#define X86_EXEC_PORT_2 4
#define X86_EXEC_PORT_3 8
#define X86_EXEC_PORT_4 16
#define X86_EXEC_UNITS
typedef struct { /* representation of an insn during decoding */
uint32_t flags; /* runtime settings */
/* instruction prefixes and other foolishness */
uint32_t prefix; /* encoding of prefix */
char prefix_str[16]; /* mnemonics for prefix */
uint32_t branch_hint; /* gah! */
unsigned int cpu_ver; /* TODO: cpu version */
unsigned int clocks; /* TODO: clock cycles: min/max */
unsigned char last_prefix;
/* runtime intruction decoding helpers */
unsigned char mode; /* 16, 32, 64 */
unsigned char gen_regs; /* offset of default general reg set */
unsigned char sz_operand; /* operand size for insn */
unsigned char sz_address; /* address size for insn */
unsigned char uops; /* uops per insn */
unsigned char pairing; /* np,pu,pv.lv */
unsigned char exec_unit;
unsigned char exec_port;
unsigned char latency;
} ia32_info_t;
#define MODE_32 0 /* default */
#define MODE_16 1
#define MODE_64 2
#endif
#endif

312
3rd_party/libdisasm/ia32_invariant.cpp vendored Normal file
View File

@ -0,0 +1,312 @@
#include <stdlib.h>
#include <string.h>
#include "ia32_invariant.h"
#include "ia32_insn.h"
#include "ia32_settings.h"
extern ia32_table_desc_t *ia32_tables;
extern ia32_settings_t ia32_settings;
extern size_t ia32_table_lookup( unsigned char *buf, size_t buf_len,
unsigned int table, ia32_insn_t **raw_insn,
unsigned int *prefixes );
/* -------------------------------- ModR/M, SIB */
/* Convenience flags */
#define MODRM_EA 1 /* ModR/M is an effective addr */
#define MODRM_reg 2 /* ModR/M is a register */
/* ModR/M flags */
#define MODRM_RM_SIB 0x04 /* R/M == 100 */
#define MODRM_RM_NOREG 0x05 /* R/B == 101 */
/* if (MODRM.MOD_NODISP && MODRM.RM_NOREG) then just disp32 */
#define MODRM_MOD_NODISP 0x00 /* mod == 00 */
#define MODRM_MOD_DISP8 0x01 /* mod == 01 */
#define MODRM_MOD_DISP32 0x02 /* mod == 10 */
#define MODRM_MOD_NOEA 0x03 /* mod == 11 */
/* 16-bit modrm flags */
#define MOD16_MOD_NODISP 0
#define MOD16_MOD_DISP8 1
#define MOD16_MOD_DISP16 2
#define MOD16_MOD_REG 3
#define MOD16_RM_BXSI 0
#define MOD16_RM_BXDI 1
#define MOD16_RM_BPSI 2
#define MOD16_RM_BPDI 3
#define MOD16_RM_SI 4
#define MOD16_RM_DI 5
#define MOD16_RM_BP 6
#define MOD16_RM_BX 7
/* SIB flags */
#define SIB_INDEX_NONE 0x04
#define SIB_BASE_EBP 0x05
#define SIB_SCALE_NOBASE 0x00
/* Convenience struct for modR/M bitfield */
struct modRM_byte {
unsigned int mod : 2;
unsigned int reg : 3;
unsigned int rm : 3;
};
/* Convenience struct for SIB bitfield */
struct SIB_byte {
unsigned int scale : 2;
unsigned int index : 3;
unsigned int base : 3;
};
#ifdef WIN32
static void byte_decode(unsigned char b, struct modRM_byte *modrm) {
#else
static inline void byte_decode(unsigned char b, struct modRM_byte *modrm) {
#endif
/* generic bitfield-packing routine */
modrm->mod = b >> 6; /* top 2 bits */
modrm->reg = (b & 56) >> 3; /* middle 3 bits */
modrm->rm = b & 7; /* bottom 3 bits */
}
static int ia32_invariant_modrm( unsigned char *in, unsigned char *out,
unsigned int mode_16, x86_invariant_op_t *op) {
struct modRM_byte modrm;
struct SIB_byte sib;
unsigned char *c, *cin;
unsigned short *s;
unsigned int *i;
int size = 0; /* modrm byte is already counted */
byte_decode(*in, &modrm); /* get bitfields */
out[0] = in[0]; /* save modrm byte */
cin = &in[1];
c = &out[1];
s = (unsigned short *)&out[1];
i = (unsigned int *)&out[1];
op->type = op_expression;
op->flags.op_pointer = true; //|= op_pointer;
if ( ! mode_16 && modrm.rm == MODRM_RM_SIB &&
modrm.mod != MODRM_MOD_NOEA ) {
size ++;
byte_decode(*cin, (struct modRM_byte *)(void*)&sib);
out[1] = in[1]; /* save sib byte */
cin = &in[2];
c = &out[2];
s = (unsigned short *)&out[2];
i = (unsigned int *)&out[2];
if ( sib.base == SIB_BASE_EBP && ! modrm.mod ) {
/* disp 32 is variant! */
memset( i, X86_WILDCARD_BYTE, 4 );
size += 4;
}
}
if (! modrm.mod && modrm.rm == 101) {
if ( mode_16 ) { /* straight RVA in disp */
memset( s, X86_WILDCARD_BYTE, 2 );
size += 2;
} else {
memset( i, X86_WILDCARD_BYTE, 2 );
size += 4;
}
} else if (modrm.mod && modrm.mod < 3) {
if (modrm.mod == MODRM_MOD_DISP8) { /* offset in disp */
*c = *cin;
size += 1;
} else if ( mode_16 ) {
*s = (* ((unsigned short *) cin));
size += 2;
} else {
*i = (*((unsigned int *) cin));
size += 4;
}
} else if ( modrm.mod == 3 ) {
op->type = op_register;
op->flags.op_pointer = false;// &= ~op_pointer;
}
return (size);
}
static int ia32_decode_invariant( unsigned char *buf, size_t buf_len,
ia32_insn_t *t, unsigned char *out,
unsigned int prefixes, x86_invariant_t *inv) {
unsigned int addr_size, op_size, mode_16;
unsigned int op_flags[3] = { t->dest_flag, t->src_flag, t->aux_flag };
int x, type, bytes = 0, size = 0, modrm = 0;
/* set addressing mode */
if (ia32_settings.options & opt_16_bit) {
op_size = ( prefixes & PREFIX_OP_SIZE ) ? 4 : 2;
addr_size = ( prefixes & PREFIX_ADDR_SIZE ) ? 4 : 2;
mode_16 = ( prefixes & PREFIX_ADDR_SIZE ) ? 0 : 1;
} else {
op_size = ( prefixes & PREFIX_OP_SIZE ) ? 2 : 4;
addr_size = ( prefixes & PREFIX_ADDR_SIZE ) ? 2 : 4;
mode_16 = ( prefixes & PREFIX_ADDR_SIZE ) ? 1 : 0;
}
for (x = 0; x < 3; x++) {
inv->operands[x].access = (enum x86_op_access) OP_PERM(op_flags[x]);
inv->operands[x].flags.whole = (OP_FLAGS(op_flags[x]) >> 12);
//(enum x86_op_flags) (OP_FLAGS(op_flags[x]) >> 12);
switch (op_flags[x] & OPTYPE_MASK) {
case OPTYPE_c:
size = (op_size == 4) ? 2 : 1;
break;
case OPTYPE_a: case OPTYPE_v:
size = (op_size == 4) ? 4 : 2;
break;
case OPTYPE_p:
size = (op_size == 4) ? 6 : 4;
break;
case OPTYPE_b:
size = 1;
break;
case OPTYPE_w:
size = 2;
break;
case OPTYPE_d: case OPTYPE_fs: case OPTYPE_fd:
case OPTYPE_fe: case OPTYPE_fb: case OPTYPE_fv:
case OPTYPE_si: case OPTYPE_fx:
size = 4;
break;
case OPTYPE_s:
size = 6;
break;
case OPTYPE_q: case OPTYPE_pi:
size = 8;
break;
case OPTYPE_dq: case OPTYPE_ps: case OPTYPE_ss:
case OPTYPE_pd: case OPTYPE_sd:
size = 16;
break;
case OPTYPE_m:
size = (addr_size == 4) ? 4 : 2;
break;
default:
break;
}
type = op_flags[x] & ADDRMETH_MASK;
switch (type) {
case ADDRMETH_E: case ADDRMETH_M: case ADDRMETH_Q:
case ADDRMETH_R: case ADDRMETH_W:
modrm = 1;
bytes += ia32_invariant_modrm( buf, out,
mode_16, &inv->operands[x]);
break;
case ADDRMETH_C: case ADDRMETH_D: case ADDRMETH_G:
case ADDRMETH_P: case ADDRMETH_S: case ADDRMETH_T:
case ADDRMETH_V:
inv->operands[x].type = op_register;
modrm = 1;
break;
case ADDRMETH_A: case ADDRMETH_O:
/* pad with xF4's */
memset( &out[bytes + modrm], X86_WILDCARD_BYTE,
size );
bytes += size;
inv->operands[x].type = op_offset;
if ( type == ADDRMETH_O ) {
inv->operands[x].flags.op_signed = true;
inv->operands[x].flags.op_pointer = true;
}
break;
case ADDRMETH_I: case ADDRMETH_J:
/* grab imm value */
if ((op_flags[x] & OPTYPE_MASK) == OPTYPE_v) {
/* assume this is an address */
memset( &out[bytes + modrm], X86_WILDCARD_BYTE, size );
} else {
memcpy( &out[bytes + modrm], &buf[bytes + modrm], size );
}
bytes += size;
if ( type == ADDRMETH_J ) {
if ( size == 1 ) {
inv->operands[x].type = op_relative_near;
} else {
inv->operands[x].type = op_relative_far;
}
inv->operands[x].flags.op_signed=true;
} else {
inv->operands[x].type = op_immediate;
}
break;
case ADDRMETH_F:
inv->operands[x].type = op_register;
break;
case ADDRMETH_X:
inv->operands[x].flags.op_signed=true;
inv->operands[x].flags.op_pointer=true;
inv->operands[x].flags.op_seg=x86_op_flags::op_ds_seg;
inv->operands[x].flags.op_string=true;
break;
case ADDRMETH_Y:
inv->operands[x].flags.op_signed=true;
inv->operands[x].flags.op_pointer=true;
inv->operands[x].flags.op_seg=x86_op_flags::op_es_seg;
inv->operands[x].flags.op_string=true;
break;
case ADDRMETH_RR:
inv->operands[x].type = op_register;
break;
case ADDRMETH_II:
inv->operands[x].type = op_immediate;
break;
default:
inv->operands[x].type = op_unused;
break;
}
}
return (bytes + modrm);
}
size_t ia32_disasm_invariant( unsigned char * buf, size_t buf_len,
x86_invariant_t *inv ) {
ia32_insn_t *raw_insn = NULL;
unsigned int prefixes=0;
unsigned int type;
size_t size;
/* Perform recursive table lookup starting with main table (0) */
size = ia32_table_lookup( buf, buf_len, 0, &raw_insn, &prefixes );
if ( size == INVALID_INSN || size > buf_len ) {
/* TODO: set errno */
return 0;
}
/* copy opcode bytes to buffer */
memcpy( inv->bytes, buf, size );
/* set mnemonic type and group */
type = raw_insn->mnem_flag & ~INS_FLAG_MASK;
inv->group = (x86_insn_t::x86_insn_group) ((INS_GROUP(type)) >> 12);
inv->type = (enum x86_insn_type) INS_TYPE(type);
/* handle operands */
size += ia32_decode_invariant( buf + size, buf_len - size, raw_insn,
&buf[size - 1], prefixes, inv );
inv->size = size;
return size; /* return size of instruction in bytes */
}
size_t ia32_disasm_size( unsigned char *buf, size_t buf_len ) {
x86_invariant_t inv = { {0} };
return( ia32_disasm_invariant( buf, buf_len, &inv ) );
}

11
3rd_party/libdisasm/ia32_invariant.h vendored Normal file
View File

@ -0,0 +1,11 @@
#ifndef IA32_INVARIANT_H
#define IA32_INVARIANT_H
#include "libdis.h"
size_t ia32_disasm_invariant( unsigned char *buf, size_t buf_len,
x86_invariant_t *inv);
size_t ia32_disasm_size( unsigned char *buf, size_t buf_len );
#endif

314
3rd_party/libdisasm/ia32_modrm.cpp vendored Normal file
View File

@ -0,0 +1,314 @@
#include "ia32_modrm.h"
#include "ia32_reg.h"
#include "x86_imm.h"
/* NOTE: when decoding ModR/M and SIB, we have to add 1 to all register
* values obtained from decoding the ModR/M or SIB byte, since they
* are encoded with eAX = 0 and the tables in ia32_reg.c use eAX = 1.
* ADDENDUM: this is only the case when the register value is used
* directly as an index into the register table, not when it is added to
* a genregs offset. */
/* -------------------------------- ModR/M, SIB */
/* ModR/M flags */
#define MODRM_RM_SIB 0x04 /* R/M == 100 */
#define MODRM_RM_NOREG 0x05 /* R/B == 101 */
/* if (MODRM.MOD_NODISP && MODRM.RM_NOREG) then just disp32 */
#define MODRM_MOD_NODISP 0x00 /* mod == 00 */
#define MODRM_MOD_DISP8 0x01 /* mod == 01 */
#define MODRM_MOD_DISP32 0x02 /* mod == 10 */
#define MODRM_MOD_NOEA 0x03 /* mod == 11 */
/* 16-bit modrm flags */
#define MOD16_MOD_NODISP 0
#define MOD16_MOD_DISP8 1
#define MOD16_MOD_DISP16 2
#define MOD16_MOD_REG 3
#define MOD16_RM_BXSI 0
#define MOD16_RM_BXDI 1
#define MOD16_RM_BPSI 2
#define MOD16_RM_BPDI 3
#define MOD16_RM_SI 4
#define MOD16_RM_DI 5
#define MOD16_RM_BP 6
#define MOD16_RM_BX 7
/* SIB flags */
#define SIB_INDEX_NONE 0x04
#define SIB_BASE_EBP 0x05
#define SIB_SCALE_NOBASE 0x00
/* Convenience struct for modR/M bitfield */
struct modRM_byte {
unsigned int mod : 2;
unsigned int reg : 3;
unsigned int rm : 3;
};
/* Convenience struct for SIB bitfield */
struct SIB_byte {
unsigned int scale : 2;
unsigned int index : 3;
unsigned int base : 3;
};
#if 0
int modrm_rm[] = {0,1,2,3,MODRM_RM_SIB,MODRM_MOD_DISP32,6,7};
int modrm_reg[] = {0, 1, 2, 3, 4, 5, 6, 7};
int modrm_mod[] = {0, MODRM_MOD_DISP8, MODRM_MOD_DISP32, MODRM_MOD_NOEA};
int sib_scl[] = {0, 2, 4, 8};
int sib_idx[] = {0, 1, 2, 3, SIB_INDEX_NONE, 5, 6, 7 };
int sib_bas[] = {0, 1, 2, 3, 4, SIB_SCALE_NOBASE, 6, 7 };
#endif
/* this is needed to replace x86_imm_signsized() which does not sign-extend
* to dest */
static unsigned int imm32_signsized( unsigned char *buf, size_t buf_len,
int32_t *dest, unsigned int size ) {
if ( size > buf_len ) {
return 0;
}
switch (size) {
case 1:
*dest = *((signed char *) buf);
break;
case 2:
*dest = *((signed short *) buf);
break;
case 4:
default:
*dest = *((signed int *) buf);
break;
}
return size;
}
static void byte_decode(unsigned char b, struct modRM_byte *modrm) {
/* generic bitfield-packing routine */
modrm->mod = b >> 6; /* top 2 bits */
modrm->reg = (b & 56) >> 3; /* middle 3 bits */
modrm->rm = b & 7; /* bottom 3 bits */
}
static size_t sib_decode( unsigned char *buf, size_t buf_len, x86_ea_t *ea,
unsigned int mod ) {
/* set Address Expression fields (scale, index, base, disp)
* according to the contents of the SIB byte.
* b points to the SIB byte in the instruction-stream buffer; the
* byte after b[0] is therefore the byte after the SIB
* returns number of bytes 'used', including the SIB byte */
size_t size = 1; /* start at 1 for SIB byte */
struct SIB_byte sib;
if ( buf_len < 1 ) {
return 0;
}
byte_decode( *buf, (struct modRM_byte *)(void*)&sib ); /* get bit-fields */
if ( sib.base == SIB_BASE_EBP && ! mod ) { /* if base == 101 (ebp) */
/* IF BASE == EBP, deal with exception */
/* IF (ModR/M did not create a Disp */
/* ... create a 32-bit Displacement */
imm32_signsized( &buf[1], buf_len, &ea->disp, sizeof(int32_t));
ea->disp_size = sizeof(int32_t);
ea->disp_sign = (ea->disp < 0) ? 1 : 0;
size += 4; /* add sizeof disp to count */
} else {
/* ELSE BASE refers to a General Register */
ia32_handle_register( &ea->base, sib.base + 1 );
}
/* set scale to 1, 2, 4, 8 */
ea->scale = 1 << sib.scale;
if (sib.index != SIB_INDEX_NONE) {
/* IF INDEX is not 'ESP' (100) */
ia32_handle_register( &ea->index, sib.index + 1 );
}
return (size); /* return number of bytes processed */
}
static size_t modrm_decode16( unsigned char *buf, unsigned int buf_len,
x86_op_t *op, struct modRM_byte *modrm ) {
/* 16-bit mode: hackish, but not as hackish as 32-bit mode ;) */
size_t size = 1; /* # of bytes decoded [1 for modR/M byte] */
x86_ea_t * ea = &op->data.expression;
switch( modrm->rm ) {
case MOD16_RM_BXSI:
ia32_handle_register(&ea->base, REG_WORD_OFFSET + 3);
ia32_handle_register(&ea->index, REG_WORD_OFFSET + 6);
break;
case MOD16_RM_BXDI:
ia32_handle_register(&ea->base, REG_WORD_OFFSET + 3);
ia32_handle_register(&ea->index, REG_WORD_OFFSET + 7);
case MOD16_RM_BPSI:
op->flags.op_seg = x86_op_flags::op_ss_seg;
ia32_handle_register(&ea->base, REG_WORD_OFFSET + 5);
ia32_handle_register(&ea->index, REG_WORD_OFFSET + 6);
break;
case MOD16_RM_BPDI:
op->flags.op_seg = x86_op_flags::op_ss_seg;
ia32_handle_register(&ea->base, REG_WORD_OFFSET + 5);
ia32_handle_register(&ea->index, REG_WORD_OFFSET + 7);
break;
case MOD16_RM_SI:
ia32_handle_register(&ea->base, REG_WORD_OFFSET + 6);
break;
case MOD16_RM_DI:
ia32_handle_register(&ea->base, REG_WORD_OFFSET + 7);
break;
case MOD16_RM_BP:
if ( modrm->mod != MOD16_MOD_NODISP ) {
op->flags.op_seg = x86_op_flags::op_ss_seg;
ia32_handle_register(&ea->base,
REG_WORD_OFFSET + 5);
}
else {
/* There is a special case when
(mod == 0) && (rm == 6) for disp16 */
modrm->mod = MOD16_MOD_DISP16;
}
break;
case MOD16_RM_BX:
ia32_handle_register(&ea->base, REG_WORD_OFFSET + 3);
break;
}
/* move to byte after ModR/M */
++buf;
--buf_len;
if ( modrm->mod == MOD16_MOD_DISP8 ) {
imm32_signsized( buf, buf_len, &ea->disp, sizeof(char) );
ea->disp_sign = (ea->disp < 0) ? 1 : 0;
ea->disp_size = sizeof(char);
size += sizeof(char);
} else if ( modrm->mod == MOD16_MOD_DISP16 ) {
imm32_signsized( buf, buf_len, &ea->disp, sizeof(short) );
ea->disp_sign = (ea->disp < 0) ? 1 : 0;
ea->disp_size = sizeof(short);
size += sizeof(short);
}
return size;
}
/* TODO : Mark index modes
Use addressing mode flags to imply arrays (index), structure (disp),
two-dimensional arrays [disp + index], classes [ea reg], and so on.
*/
size_t Ia32_Decoder::ia32_modrm_decode( unsigned char *buf, unsigned int buf_len,
x86_op_t *op, size_t gen_regs ) {
/* create address expression and/or fill operand based on value of
* ModR/M byte. Calls sib_decode as appropriate.
* flags specifies whether Reg or mod+R/M fields are being decoded
* returns the number of bytes in the instruction, including modR/M */
struct modRM_byte modrm;
size_t size = 1; /* # of bytes decoded [1 for modR/M byte] */
x86_ea_t * ea;
byte_decode(*buf, &modrm); /* get bitfields */
/* first, handle the case where the mod field is a register only */
if ( modrm.mod == MODRM_MOD_NOEA ) {
op->type = op_register;
ia32_handle_register(&op->data.reg, modrm.rm + gen_regs);
/* increase insn size by 1 for modrm byte */
return 1;
}
/* then deal with cases where there is an effective address */
ea = &op->data.expression;
op->type = op_expression;
op->flags.op_pointer=true;
if ( m_decoded->addr_size == 2 ) {
/* gah! 16 bit mode! */
return modrm_decode16( buf, buf_len, op, &modrm);
}
/* move to byte after ModR/M */
++buf;
--buf_len;
if (modrm.mod == MODRM_MOD_NODISP) { /* if mod == 00 */
/* IF MOD == No displacement, just Indirect Register */
if (modrm.rm == MODRM_RM_NOREG) { /* if r/m == 101 */
/* IF RM == No Register, just Displacement */
/* This is an Intel Moronic Exception TM */
imm32_signsized( buf, buf_len, &ea->disp,
sizeof(int32_t) );
ea->disp_size = sizeof(int32_t);
ea->disp_sign = (ea->disp < 0) ? 1 : 0;
size += 4; /* add sizeof disp to count */
} else if (modrm.rm == MODRM_RM_SIB) { /* if r/m == 100 */
/* ELSE IF an SIB byte is present */
/* TODO: check for 0 retval */
size += sib_decode( buf, buf_len, ea, modrm.mod);
/* move to byte after SIB for displacement */
++buf;
--buf_len;
} else { /* modR/M specifies base register */
/* ELSE RM encodes a general register */
ia32_handle_register( &ea->base, modrm.rm + 1 );
}
} else { /* mod is 01 or 10 */
if (modrm.rm == MODRM_RM_SIB) { /* rm == 100 */
/* IF base is an AddrExpr specified by an SIB byte */
/* TODO: check for 0 retval */
size += sib_decode( buf, buf_len, ea, modrm.mod);
/* move to byte after SIB for displacement */
++buf;
--buf_len;
} else {
/* ELSE base is a general register */
ia32_handle_register( &ea->base, modrm.rm + 1 );
}
/* ELSE mod + r/m specify a disp##[base] or disp##(SIB) */
if (modrm.mod == MODRM_MOD_DISP8) { /* mod == 01 */
/* If this is an 8-bit displacement */
imm32_signsized( buf, buf_len, &ea->disp,
sizeof(char));
ea->disp_size = sizeof(char);
ea->disp_sign = (ea->disp < 0) ? 1 : 0;
size += 1; /* add sizeof disp to count */
} else {
/* Displacement is dependent on address size */
imm32_signsized( buf, buf_len, &ea->disp, m_decoded->addr_size);
ea->disp_size = m_decoded->addr_size;
ea->disp_sign = (ea->disp < 0) ? 1 : 0;
size += 4;
}
}
return size; /* number of bytes found in instruction */
}
void ia32_reg_decode( unsigned char byte, x86_op_t *op, size_t gen_regs ) {
struct modRM_byte modrm;
byte_decode( byte, &modrm ); /* get bitfields */
/* set operand to register ID */
op->type = op_register;
ia32_handle_register(&op->data.reg, modrm.reg + gen_regs);
return;
}

13
3rd_party/libdisasm/ia32_modrm.h vendored Normal file
View File

@ -0,0 +1,13 @@
#ifndef IA32_MODRM_H
#define IA32_MODRM_H
#include "libdis.h"
#include "ia32_insn.h"
size_t ia32_modrm_decode( unsigned char *buf, unsigned int buf_len,
x86_op_t *op, x86_insn_t *insn,
size_t gen_regs );
void ia32_reg_decode( unsigned char byte, x86_op_t *op, size_t gen_regs );
#endif

2939
3rd_party/libdisasm/ia32_opcode_tables.cpp vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,57 @@
#define idx_Main 0
#define idx_66 1
#define idx_F2 2
#define idx_F3 3
#define idx_0F 4
#define idx_660F 5
#define idx_F20F 6
#define idx_F30F 7
#define idx_0F00 8
#define idx_0F01 9
#define idx_0F0111 10
#define idx_0F12 11
#define idx_0F16 12
#define idx_0F18 13
#define idx_0F71 14
#define idx_660F71 15
#define idx_0F72 16
#define idx_660F72 17
#define idx_0F73 18
#define idx_660F73 19
#define idx_0FAE 20
#define idx_0FBA 21
#define idx_0FC7 22
#define idx_0FB9 23
#define idx_C6 24
#define idx_C7 25
#define idx_80 26
#define idx_81 27
#define idx_82 28
#define idx_83 29
#define idx_C0 30
#define idx_C1 31
#define idx_D0 32
#define idx_D1 33
#define idx_D2 34
#define idx_D3 35
#define idx_F6 36
#define idx_F7 37
#define idx_FE 38
#define idx_FF 39
#define idx_D8 40
#define idx_D8C0 41
#define idx_D9 42
#define idx_D9C0 43
#define idx_DA 44
#define idx_DAC0 45
#define idx_DB 46
#define idx_DBC0 47
#define idx_DC 48
#define idx_DCC0 49
#define idx_DD 50
#define idx_DDC0 51
#define idx_DE 52
#define idx_DEC0 53
#define idx_DF 54
#define idx_DFC0 55
#define idx_0F0F 56

421
3rd_party/libdisasm/ia32_operand.cpp vendored Normal file
View File

@ -0,0 +1,421 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "libdis.h"
#include "ia32_insn.h"
#include "ia32_operand.h"
#include "ia32_modrm.h"
#include "ia32_reg.h"
#include "x86_imm.h"
#include "x86_operand_list.h"
/* apply segment override to memory operand in insn */
static void apply_seg( x86_op_t *op, unsigned int prefixes ) {
if (! prefixes ) return;
/* apply overrides from prefix */
switch ( prefixes & PREFIX_REG_MASK ) {
/* NOTE: that op->flags for segment override are not a bitfield */
case PREFIX_CS:
op->flags.op_seg = x86_op_flags::op_cs_seg; break;
case PREFIX_SS:
op->flags.op_seg = x86_op_flags::op_ss_seg; break;
case PREFIX_DS:
op->flags.op_seg = x86_op_flags::op_ds_seg; break;
case PREFIX_ES:
op->flags.op_seg = x86_op_flags::op_es_seg; break;
case PREFIX_FS:
op->flags.op_seg = x86_op_flags::op_fs_seg; break;
case PREFIX_GS:
op->flags.op_seg = x86_op_flags::op_gs_seg; break;
}
return;
}
size_t Ia32_Decoder::decode_operand_value( unsigned char *buf, size_t buf_len,
x86_op_t *op,
unsigned int addr_meth, size_t op_size,
unsigned int op_value, unsigned char modrm,
size_t gen_regs ) {
size_t size = 0;
/* ++ Do Operand Addressing Method / Decode operand ++ */
switch (addr_meth) {
/* This sets the operand Size based on the Intel Opcode Map
* (Vol 2, Appendix A). Letter encodings are from section
* A.1.1, 'Codes for Addressing Method' */
/* ---------------------- Addressing Method -------------- */
/* Note that decoding mod ModR/M operand adjusts the size of
* the instruction, but decoding the reg operand does not.
* This should not cause any problems, as every 'reg' operand
* has an associated 'mod' operand.
* Goddamn-Intel-Note:
* Some Intel addressing methods [M, R] specify that modR/M
* byte may only refer to a memory address/may only refer to
* a register -- however Intel provides no clues on what to do
* if, say, the modR/M for an M opcode decodes to a register
* rather than a memory address ... returning 0 is out of the
* question, as this would be an Immediate or a RelOffset, so
* instead these modR/Ms are decoded with total disregard to
* the M, R constraints. */
/* MODRM -- mod operand. sets size to at least 1! */
case ADDRMETH_E: /* ModR/M present, Gen reg or memory */
size = ia32_modrm_decode( buf, buf_len, op, gen_regs );
break;
case ADDRMETH_M: /* ModR/M only refers to memory */
size = ia32_modrm_decode( buf, buf_len, op, gen_regs );
break;
case ADDRMETH_Q: /* ModR/M present, MMX or Memory */
size = ia32_modrm_decode( buf, buf_len, op, REG_MMX_OFFSET );
break;
case ADDRMETH_R: /* ModR/M mod == gen reg */
size = ia32_modrm_decode( buf, buf_len, op, gen_regs );
break;
case ADDRMETH_W: /* ModR/M present, mem or SIMD reg */
size = ia32_modrm_decode( buf, buf_len, op, REG_SIMD_OFFSET );
break;
/* MODRM -- reg operand. does not effect size! */
case ADDRMETH_C: /* ModR/M reg == control reg */
ia32_reg_decode( modrm, op, REG_CTRL_OFFSET );
break;
case ADDRMETH_D: /* ModR/M reg == debug reg */
ia32_reg_decode( modrm, op, REG_DEBUG_OFFSET );
break;
case ADDRMETH_G: /* ModR/M reg == gen-purpose reg */
ia32_reg_decode( modrm, op, gen_regs );
break;
case ADDRMETH_P: /* ModR/M reg == qword MMX reg */
ia32_reg_decode( modrm, op, REG_MMX_OFFSET );
break;
case ADDRMETH_S: /* ModR/M reg == segment reg */
ia32_reg_decode( modrm, op, REG_SEG_OFFSET );
break;
case ADDRMETH_T: /* ModR/M reg == test reg */
ia32_reg_decode( modrm, op, REG_TEST_OFFSET );
break;
case ADDRMETH_V: /* ModR/M reg == SIMD reg */
ia32_reg_decode( modrm, op, REG_SIMD_OFFSET );
break;
/* No MODRM : note these set operand type explicitly */
case ADDRMETH_A: /* No modR/M -- direct addr */
op->type = op_absolute;
/* segment:offset address used in far calls */
x86_imm_sized( buf, buf_len,
&op->data.absolute.segment, 2 );
if ( m_decoded->addr_size == 4 ) {
x86_imm_sized( buf, buf_len,
&op->data.absolute.offset.off32, 4 );
size = 6;
} else {
x86_imm_sized( buf, buf_len,
&op->data.absolute.offset.off16, 2 );
size = 4;
}
break;
case ADDRMETH_I: /* Immediate val */
op->type = op_immediate;
/* if it ever becomes legal to have imm as dest and
* there is a src ModR/M operand, we are screwed! */
if ( op->flags.op_signed ) {
x86_imm_signsized(buf, buf_len, &op->data.byte,
op_size);
} else {
x86_imm_sized(buf, buf_len, &op->data.byte,
op_size);
}
size = op_size;
break;
case ADDRMETH_J: /* Rel offset to add to IP [jmp] */
/* this fills op->data.near_offset or
op->data.far_offset depending on the size of
the operand */
op->flags.op_signed = true;
if ( op_size == 1 ) {
/* one-byte near offset */
op->type = op_relative_near;
x86_imm_signsized(buf, buf_len, &op->data.relative_near, 1);
} else {
/* far offset...is this truly signed? */
op->type = op_relative_far;
x86_imm_signsized(buf, buf_len,
&op->data.relative_far, op_size );
}
size = op_size;
break;
case ADDRMETH_O: /* No ModR/M; op is word/dword offset */
/* NOTE: these are actually RVAs not offsets to seg!! */
/* note bene: 'O' ADDR_METH uses addr_size to
determine operand size */
op->type = op_offset;
op->flags.op_pointer=true;
x86_imm_sized( buf, buf_len, &op->data.offset, m_decoded->addr_size );
size = m_decoded->addr_size;
break;
/* Hard-coded: these are specified in the insn definition */
case ADDRMETH_F: /* EFLAGS register */
op->type = op_register;
op->flags.op_hardcode=true;
ia32_handle_register( &op->data.reg, REG_FLAGS_INDEX );
break;
case ADDRMETH_X: /* Memory addressed by DS:SI [string] */
op->type = op_expression;
op->flags.op_hardcode = true;
op->flags.op_seg = x86_op_flags::op_ds_seg;
op->flags.op_pointer = true;
op->flags.op_string = true;
ia32_handle_register( &op->data.expression.base,
REG_DWORD_OFFSET + 6 );
break;
case ADDRMETH_Y: /* Memory addressed by ES:DI [string] */
op->type = op_expression;
op->flags.op_hardcode = true;
op->flags.op_seg = x86_op_flags::op_es_seg;
op->flags.op_pointer = true;
op->flags.op_string = true;
ia32_handle_register( &op->data.expression.base,
REG_DWORD_OFFSET + 7 );
break;
case ADDRMETH_RR: /* Gen Register hard-coded in opcode */
op->type = op_register;
op->flags.op_hardcode = true;
ia32_handle_register( &op->data.reg,
op_value + gen_regs );
break;
case ADDRMETH_RS: /* Seg Register hard-coded in opcode */
op->type = op_register;
op->flags.op_hardcode = true;
ia32_handle_register( &op->data.reg, op_value + REG_SEG_OFFSET );
break;
case ADDRMETH_RF: /* FPU Register hard-coded in opcode */
op->type = op_register;
op->flags.op_hardcode = true;
ia32_handle_register( &op->data.reg,
op_value + REG_FPU_OFFSET );
break;
case ADDRMETH_RT: /* TST Register hard-coded in opcode */
op->type = op_register;
op->flags.op_hardcode = true;
ia32_handle_register( &op->data.reg,
op_value + REG_TEST_OFFSET );
break;
case ADDRMETH_II: /* Immediate hard-coded in opcode */
op->type = op_immediate;
op->data.dword = op_value;
op->flags.op_hardcode = true;
break;
case 0: /* Operand is not used */
default:
/* ignore -- operand not used in this insn */
op->type = op_unused; /* this shouldn't happen! */
break;
}
return size;
}
size_t Ia32_Decoder::decode_operand_size( unsigned int op_type, x86_op_t *op ) {
size_t size;
/* ++ Do Operand Type ++ */
switch (op_type) {
/* This sets the operand Size based on the Intel Opcode Map
* (Vol 2, Appendix A). Letter encodings are from section
* A.1.2, 'Codes for Operand Type' */
/* NOTE: in this routines, 'size' refers to the size
* of the operand in the raw (encoded) instruction;
* 'datatype' stores the actual size and datatype
* of the operand */
/* ------------------------ Operand Type ----------------- */
case OPTYPE_c: /* byte or word [op size attr] */
size = (m_decoded->op_size == 4) ? 2 : 1;
op->datatype = (size == 4) ? op_word : op_byte;
break;
case OPTYPE_a: /* 2 word or 2 dword [op size attr] */
/* pointer to a 16:16 or 32:32 BOUNDS operand */
size = (m_decoded->op_size == 4) ? 8 : 4;
op->datatype = (size == 4) ? op_bounds32 : op_bounds16;
break;
case OPTYPE_v: /* word or dword [op size attr] */
size = (m_decoded->op_size == 4) ? 4 : 2;
op->datatype = (size == 4) ? op_dword : op_word;
break;
case OPTYPE_p: /* 32/48-bit ptr [op size attr] */
/* technically these flags are not accurate: the
* value s a 16:16 pointer or a 16:32 pointer, where
* the first '16' is a segment */
size = (m_decoded->addr_size == 4) ? 6 : 4;
op->datatype = (size == 4) ? op_descr32 : op_descr16;
break;
case OPTYPE_b: /* byte, ignore op-size */
size = 1;
op->datatype = op_byte;
break;
case OPTYPE_w: /* word, ignore op-size */
size = 2;
op->datatype = op_word;
break;
case OPTYPE_d: /* dword , ignore op-size */
size = 4;
op->datatype = op_dword;
break;
case OPTYPE_s: /* 6-byte psuedo-descriptor */
/* ptr to 6-byte value which is 32:16 in 32-bit
* mode, or 8:24:16 in 16-bit mode. The high byte
* is ignored in 16-bit mode. */
size = 6;
op->datatype = (m_decoded->addr_size == 4) ?
op_pdescr32 : op_pdescr16;
break;
case OPTYPE_q: /* qword, ignore op-size */
size = 8;
op->datatype = op_qword;
break;
case OPTYPE_dq: /* d-qword, ignore op-size */
size = 16;
op->datatype = op_dqword;
break;
case OPTYPE_ps: /* 128-bit FP data */
size = 16;
/* really this is 4 packed SP FP values */
op->datatype = op_ssimd;
break;
case OPTYPE_pd: /* 128-bit FP data */
size = 16;
/* really this is 2 packed DP FP values */
op->datatype = op_dsimd;
break;
case OPTYPE_ss: /* Scalar elem of 128-bit FP data */
size = 16;
/* this only looks at the low dword (4 bytes)
* of the xmmm register passed as a param.
* This is a 16-byte register where only 4 bytes
* are used in the insn. Painful, ain't it? */
op->datatype = op_sssimd;
break;
case OPTYPE_sd: /* Scalar elem of 128-bit FP data */
size = 16;
/* this only looks at the low qword (8 bytes)
* of the xmmm register passed as a param.
* This is a 16-byte register where only 8 bytes
* are used in the insn. Painful, again... */
op->datatype = op_sdsimd;
break;
case OPTYPE_pi: /* qword mmx register */
size = 8;
op->datatype = op_qword;
break;
case OPTYPE_si: /* dword integer register */
size = 4;
op->datatype = op_dword;
break;
case OPTYPE_fs: /* single-real */
size = 4;
op->datatype = op_sreal;
break;
case OPTYPE_fd: /* double real */
size = 8;
op->datatype = op_dreal;
break;
case OPTYPE_fe: /* extended real */
size = 10;
op->datatype = op_extreal;
break;
case OPTYPE_fb: /* packed BCD */
size = 10;
op->datatype = op_bcd;
break;
case OPTYPE_fv: /* pointer to FPU env: 14 or 28-bytes */
size = (m_decoded->addr_size == 4)? 28 : 14;
op->datatype = (size == 28)? op_fpuenv32: op_fpuenv16;
break;
case OPTYPE_ft: /* pointer to FPU env: 94 or 108 bytes */
size = (m_decoded->addr_size == 4)? 108 : 94;
op->datatype = (size == 108)?
op_fpustate32: op_fpustate16;
break;
case OPTYPE_fx: /* 512-byte register stack */
size = 512;
op->datatype = op_fpregset;
break;
case OPTYPE_fp: /* floating point register */
size = 10; /* double extended precision */
op->datatype = op_fpreg;
break;
case OPTYPE_m: /* fake operand type used for "lea Gv, M" */
size = m_decoded->addr_size;
op->datatype = (size == 4) ? op_dword : op_word;
break;
case OPTYPE_none: /* handle weird instructions that have no encoding but use a dword datatype, like invlpg */
size = 0;
op->datatype = op_none;
break;
case 0:
default:
size = m_decoded->op_size;
op->datatype = (size == 4) ? op_dword : op_word;
break;
}
return size;
}
size_t Ia32_Decoder::ia32_decode_operand( unsigned char *buf, size_t buf_len,
unsigned int raw_op,
unsigned int raw_flags, unsigned int prefixes,
unsigned char modrm ) {
unsigned int addr_meth, op_type, op_size, gen_regs;
x86_op_t *op;
size_t size;
/* ++ Yank optype and addr mode out of operand flags */
addr_meth = raw_flags & ADDRMETH_MASK;
op_type = raw_flags & OPTYPE_MASK;
if ( raw_flags == ARG_NONE ) {
/* operand is not used in this instruction */
return 0;
}
/* allocate a new operand */
op = m_decoded->x86_operand_new();
/* ++ Copy flags from opcode table to x86_insn_t */
op->access = (enum x86_op_access) OP_PERM(raw_flags);
op->flags.whole = (OP_FLAGS(raw_flags) >> 12);
/* Get size (for decoding) and datatype of operand */
op_size = decode_operand_size(op_type, op);
/* override default register set based on Operand Type */
/* this allows mixing of 8, 16, and 32 bit regs in insn */
if (op_size == 1) {
gen_regs = REG_BYTE_OFFSET;
} else if (op_size == 2) {
gen_regs = REG_WORD_OFFSET;
} else {
gen_regs = REG_DWORD_OFFSET;
}
size = decode_operand_value( buf, buf_len, op, addr_meth,
op_size, raw_op, modrm, gen_regs );
/* if operand is an address, apply any segment override prefixes */
if ( op->type == op_expression || op->type == op_offset ) {
apply_seg(op, prefixes);
}
return size; /* return number of bytes in instruction */
}

11
3rd_party/libdisasm/ia32_operand.h vendored Normal file
View File

@ -0,0 +1,11 @@
#ifndef IA32_OPERAND_H
#define IA32_OPERAND_H
#include "libdis.h"
#include "ia32_insn.h"
size_t ia32_decode_operand( unsigned char *buf, size_t buf_len,
x86_insn_t *insn, unsigned int raw_op,
unsigned int raw_flags, unsigned int prefixes,
unsigned char modrm );
#endif

234
3rd_party/libdisasm/ia32_reg.cpp vendored Normal file
View File

@ -0,0 +1,234 @@
#include <stdlib.h>
#include <string.h>
#include "ia32_reg.h"
#include "ia32_insn.h"
#define NUM_X86_REGS 92
/* register sizes */
#define REG_DWORD_SIZE 4
#define REG_WORD_SIZE 2
#define REG_BYTE_SIZE 1
#define REG_MMX_SIZE 8
#define REG_SIMD_SIZE 16
#define REG_DEBUG_SIZE 4
#define REG_CTRL_SIZE 4
#define REG_TEST_SIZE 4
#define REG_SEG_SIZE 2
#define REG_FPU_SIZE 10
#define REG_FLAGS_SIZE 4
#define REG_FPCTRL_SIZE 2
#define REG_FPSTATUS_SIZE 2
#define REG_FPTAG_SIZE 2
#define REG_EIP_SIZE 4
#define REG_IP_SIZE 2
/* REGISTER ALIAS TABLE:
*
* NOTE: the MMX register mapping is fixed to the physical registers
* used by the FPU. The floating FP stack does not effect the location
* of the MMX registers, so this aliasing is not 100% accurate.
* */
static struct {
unsigned char alias; /* id of register this is an alias for */
unsigned char shift; /* # of bits register must be shifted */
} ia32_reg_aliases[] = {
{ 0,0 },
{ REG_DWORD_OFFSET, 0 }, /* al : 1 */
{ REG_DWORD_OFFSET, 8 }, /* ah : 2 */
{ REG_DWORD_OFFSET, 0 }, /* ax : 3 */
{ REG_DWORD_OFFSET + 1, 0 }, /* cl : 4 */
{ REG_DWORD_OFFSET + 1, 8 }, /* ch : 5 */
{ REG_DWORD_OFFSET + 1, 0 }, /* cx : 6 */
{ REG_DWORD_OFFSET + 2, 0 }, /* dl : 7 */
{ REG_DWORD_OFFSET + 2, 8 }, /* dh : 8 */
{ REG_DWORD_OFFSET + 2, 0 }, /* dx : 9 */
{ REG_DWORD_OFFSET + 3, 0 }, /* bl : 10 */
{ REG_DWORD_OFFSET + 3, 8 }, /* bh : 11 */
{ REG_DWORD_OFFSET + 3, 0 }, /* bx : 12 */
{ REG_DWORD_OFFSET + 4, 0 }, /* sp : 13 */
{ REG_DWORD_OFFSET + 5, 0 }, /* bp : 14 */
{ REG_DWORD_OFFSET + 6, 0 }, /* si : 15 */
{ REG_DWORD_OFFSET + 7, 0 }, /* di : 16 */
{ REG_EIP_INDEX, 0 }, /* ip : 17 */
{ REG_FPU_OFFSET, 0 }, /* mm0 : 18 */
{ REG_FPU_OFFSET + 1, 0 }, /* mm1 : 19 */
{ REG_FPU_OFFSET + 2, 0 }, /* mm2 : 20 */
{ REG_FPU_OFFSET + 3, 0 }, /* mm3 : 21 */
{ REG_FPU_OFFSET + 4, 0 }, /* mm4 : 22 */
{ REG_FPU_OFFSET + 5, 0 }, /* mm5 : 23 */
{ REG_FPU_OFFSET + 6, 0 }, /* mm6 : 24 */
{ REG_FPU_OFFSET + 7, 0 } /* mm7 : 25 */
};
/* REGISTER TABLE: size, type, and name of every register in the
* CPU. Does not include MSRs since the are, after all,
* model specific. */
static struct {
unsigned int size;
enum x86_reg_type type;
unsigned int alias;
char mnemonic[8];
} ia32_reg_table[NUM_X86_REGS + 2] = {
{ 0, reg_undef, 0, "" },
/* REG_DWORD_OFFSET */
{ REG_DWORD_SIZE, (x86_reg_type)(reg_gen | reg_ret), 0, "eax" },
{ REG_DWORD_SIZE, (x86_reg_type)(reg_gen | reg_count), 0, "ecx" },
{ REG_DWORD_SIZE, reg_gen, 0, "edx" },
{ REG_DWORD_SIZE, reg_gen, 0, "ebx" },
/* REG_ESP_INDEX */
{ REG_DWORD_SIZE, (x86_reg_type)(reg_gen | reg_sp), 0, "esp" },
{ REG_DWORD_SIZE, (x86_reg_type)(reg_gen | reg_fp), 0, "ebp" },
{ REG_DWORD_SIZE, (x86_reg_type)(reg_gen | reg_src), 0, "esi" },
{ REG_DWORD_SIZE, (x86_reg_type)(reg_gen | reg_dest), 0, "edi" },
/* REG_WORD_OFFSET */
{ REG_WORD_SIZE, (x86_reg_type)(reg_gen | reg_ret), 3, "ax" },
{ REG_WORD_SIZE, (x86_reg_type)(reg_gen | reg_count), 6, "cx" },
{ REG_WORD_SIZE, reg_gen, 9, "dx" },
{ REG_WORD_SIZE, reg_gen, 12, "bx" },
{ REG_WORD_SIZE, (x86_reg_type)(reg_gen | reg_sp), 13, "sp" },
{ REG_WORD_SIZE, (x86_reg_type)(reg_gen | reg_fp), 14, "bp" },
{ REG_WORD_SIZE, (x86_reg_type)(reg_gen | reg_src), 15, "si" },
{ REG_WORD_SIZE, (x86_reg_type)(reg_gen | reg_dest), 16, "di" },
/* REG_BYTE_OFFSET */
{ REG_BYTE_SIZE, reg_gen, 1, "al" },
{ REG_BYTE_SIZE, reg_gen, 4, "cl" },
{ REG_BYTE_SIZE, reg_gen, 7, "dl" },
{ REG_BYTE_SIZE, reg_gen, 10, "bl" },
{ REG_BYTE_SIZE, reg_gen, 2, "ah" },
{ REG_BYTE_SIZE, reg_gen, 5, "ch" },
{ REG_BYTE_SIZE, reg_gen, 8, "dh" },
{ REG_BYTE_SIZE, reg_gen, 11, "bh" },
/* REG_MMX_OFFSET */
{ REG_MMX_SIZE, reg_simd, 18, "mm0" },
{ REG_MMX_SIZE, reg_simd, 19, "mm1" },
{ REG_MMX_SIZE, reg_simd, 20, "mm2" },
{ REG_MMX_SIZE, reg_simd, 21, "mm3" },
{ REG_MMX_SIZE, reg_simd, 22, "mm4" },
{ REG_MMX_SIZE, reg_simd, 23, "mm5" },
{ REG_MMX_SIZE, reg_simd, 24, "mm6" },
{ REG_MMX_SIZE, reg_simd, 25, "mm7" },
/* REG_SIMD_OFFSET */
{ REG_SIMD_SIZE, reg_simd, 0, "xmm0" },
{ REG_SIMD_SIZE, reg_simd, 0, "xmm1" },
{ REG_SIMD_SIZE, reg_simd, 0, "xmm2" },
{ REG_SIMD_SIZE, reg_simd, 0, "xmm3" },
{ REG_SIMD_SIZE, reg_simd, 0, "xmm4" },
{ REG_SIMD_SIZE, reg_simd, 0, "xmm5" },
{ REG_SIMD_SIZE, reg_simd, 0, "xmm6" },
{ REG_SIMD_SIZE, reg_simd, 0, "xmm7" },
/* REG_DEBUG_OFFSET */
{ REG_DEBUG_SIZE, reg_sys, 0, "dr0" },
{ REG_DEBUG_SIZE, reg_sys, 0, "dr1" },
{ REG_DEBUG_SIZE, reg_sys, 0, "dr2" },
{ REG_DEBUG_SIZE, reg_sys, 0, "dr3" },
{ REG_DEBUG_SIZE, reg_sys, 0, "dr4" },
{ REG_DEBUG_SIZE, reg_sys, 0, "dr5" },
{ REG_DEBUG_SIZE, reg_sys, 0, "dr6" },
{ REG_DEBUG_SIZE, reg_sys, 0, "dr7" },
/* REG_CTRL_OFFSET */
{ REG_CTRL_SIZE, reg_sys, 0, "cr0" },
{ REG_CTRL_SIZE, reg_sys, 0, "cr1" },
{ REG_CTRL_SIZE, reg_sys, 0, "cr2" },
{ REG_CTRL_SIZE, reg_sys, 0, "cr3" },
{ REG_CTRL_SIZE, reg_sys, 0, "cr4" },
{ REG_CTRL_SIZE, reg_sys, 0, "cr5" },
{ REG_CTRL_SIZE, reg_sys, 0, "cr6" },
{ REG_CTRL_SIZE, reg_sys, 0, "cr7" },
/* REG_TEST_OFFSET */
{ REG_TEST_SIZE, reg_sys, 0, "tr0" },
{ REG_TEST_SIZE, reg_sys, 0, "tr1" },
{ REG_TEST_SIZE, reg_sys, 0, "tr2" },
{ REG_TEST_SIZE, reg_sys, 0, "tr3" },
{ REG_TEST_SIZE, reg_sys, 0, "tr4" },
{ REG_TEST_SIZE, reg_sys, 0, "tr5" },
{ REG_TEST_SIZE, reg_sys, 0, "tr6" },
{ REG_TEST_SIZE, reg_sys, 0, "tr7" },
/* REG_SEG_OFFSET */
{ REG_SEG_SIZE, reg_seg, 0, "es" },
{ REG_SEG_SIZE, reg_seg, 0, "cs" },
{ REG_SEG_SIZE, reg_seg, 0, "ss" },
{ REG_SEG_SIZE, reg_seg, 0, "ds" },
{ REG_SEG_SIZE, reg_seg, 0, "fs" },
{ REG_SEG_SIZE, reg_seg, 0, "gs" },
/* REG_LDTR_INDEX */
{ REG_DWORD_SIZE, reg_sys, 0, "ldtr" },
/* REG_GDTR_INDEX */
{ REG_DWORD_SIZE, reg_sys, 0, "gdtr" },
/* REG_FPU_OFFSET */
{ REG_FPU_SIZE, reg_fpu, 0, "st(0)" },
{ REG_FPU_SIZE, reg_fpu, 0, "st(1)" },
{ REG_FPU_SIZE, reg_fpu, 0, "st(2)" },
{ REG_FPU_SIZE, reg_fpu, 0, "st(3)" },
{ REG_FPU_SIZE, reg_fpu, 0, "st(4)" },
{ REG_FPU_SIZE, reg_fpu, 0, "st(5)" },
{ REG_FPU_SIZE, reg_fpu, 0, "st(6)" },
{ REG_FPU_SIZE, reg_fpu, 0, "st(7)" },
/* REG_FLAGS_INDEX : 81 */
{ REG_FLAGS_SIZE, reg_cond, 0, "eflags" },
/* REG_FPCTRL_INDEX : 82*/
{ REG_FPCTRL_SIZE, (x86_reg_type)(reg_fpu | reg_sys), 0, "fpctrl" },
/* REG_FPSTATUS_INDEX : 83*/
{ REG_FPSTATUS_SIZE, (x86_reg_type)(reg_fpu | reg_sys), 0, "fpstat" },
/* REG_FPTAG_INDEX : 84 */
{ REG_FPTAG_SIZE, (x86_reg_type)(reg_fpu | reg_sys), 0, "fptag" },
/* REG_EIP_INDEX : 85 */
{ REG_EIP_SIZE, reg_pc, 0, "eip" },
/* REG_IP_INDEX : 86 */
{ REG_IP_SIZE, reg_pc, 17, "ip" },
/* REG_IDTR_INDEX : 87 */
{ REG_DWORD_SIZE, reg_sys, 0, "idtr" },
/* REG_MXCSG_INDEX : SSE Control Reg : 88 */
{ REG_DWORD_SIZE, (x86_reg_type)(reg_sys | reg_simd), 0, "mxcsr" },
/* REG_TR_INDEX : Task Register : 89 */
{ 16 + 64, reg_sys, 0, "tr" },
/* REG_CSMSR_INDEX : SYSENTER_CS_MSR : 90 */
{ REG_DWORD_SIZE, reg_sys, 0, "cs_msr" },
/* REG_ESPMSR_INDEX : SYSENTER_ESP_MSR : 91 */
{ REG_DWORD_SIZE, reg_sys, 0, "esp_msr" },
/* REG_EIPMSR_INDEX : SYSENTER_EIP_MSR : 92 */
{ REG_DWORD_SIZE, reg_sys, 0, "eip_msr" },
{ 0 }
};
static size_t sz_regtable = NUM_X86_REGS + 1;
void ia32_handle_register( x86_reg_t *reg, size_t id ) {
unsigned int alias;
if (! id || id > sz_regtable ) {
return;
}
memset( reg, 0, sizeof(x86_reg_t) );
strncpy( reg->name, ia32_reg_table[id].mnemonic, MAX_REGNAME );
reg->type = ia32_reg_table[id].type;
reg->size = ia32_reg_table[id].size;
alias = ia32_reg_table[id].alias;
if ( alias ) {
reg->alias = ia32_reg_aliases[alias].alias;
reg->shift = ia32_reg_aliases[alias].shift;
}
reg->id = id;
return;
}
size_t ia32_true_register_id( size_t id ) {
size_t reg;
if (! id || id > sz_regtable ) {
return 0;
}
reg = id;
if (ia32_reg_table[reg].alias) {
reg = ia32_reg_aliases[ia32_reg_table[reg].alias].alias;
}
return reg;
}

41
3rd_party/libdisasm/ia32_reg.h vendored Normal file
View File

@ -0,0 +1,41 @@
#ifndef IA32_REG_H
#define IA32_REG_H
#include <sys/types.h> /* for size_t */
#include "libdis.h" /* for x86_reg_t */
/* NOTE these are used in opcode tables for hard-coded registers */
#define REG_DWORD_OFFSET 1 /* 0 + 1 */
#define REG_ECX_INDEX 2 /* 0 + 1 + 1 */
#define REG_ESP_INDEX 5 /* 0 + 4 + 1 */
#define REG_EBP_INDEX 6 /* 0 + 5 + 1 */
#define REG_ESI_INDEX 7 /* 0 + 6 + 1 */
#define REG_EDI_INDEX 8 /* 0 + 7 + 1 */
#define REG_WORD_OFFSET 9 /* 1 * 8 + 1 */
#define REG_BYTE_OFFSET 17 /* 2 * 8 + 1 */
#define REG_MMX_OFFSET 25 /* 3 * 8 + 1 */
#define REG_SIMD_OFFSET 33 /* 4 * 8 + 1 */
#define REG_DEBUG_OFFSET 41 /* 5 * 8 + 1 */
#define REG_CTRL_OFFSET 49 /* 6 * 8 + 1 */
#define REG_TEST_OFFSET 57 /* 7 * 8 + 1 */
#define REG_SEG_OFFSET 65 /* 8 * 8 + 1 */
#define REG_LDTR_INDEX 71 /* 8 * 8 + 1 + 1 */
#define REG_GDTR_INDEX 72 /* 8 * 8 + 2 + 1 */
#define REG_FPU_OFFSET 73 /* 9 * 8 + 1 */
#define REG_FLAGS_INDEX 81 /* 10 * 8 + 1 */
#define REG_FPCTRL_INDEX 82 /* 10 * 8 + 1 + 1 */
#define REG_FPSTATUS_INDEX 83 /* 10 * 8 + 2 + 1 */
#define REG_FPTAG_INDEX 84 /* 10 * 8 + 3 + 1 */
#define REG_EIP_INDEX 85 /* 10 * 8 + 4 + 1 */
#define REG_IP_INDEX 86 /* 10 * 8 + 5 + 1 */
#define REG_IDTR_INDEX 87 /* 10 * 8 + 6 + 1 */
#define REG_MXCSG_INDEX 88 /* 10 * 8 + 7 + 1 */
#define REG_TR_INDEX 89 /* 10 * 8 + 8 + 1 */
#define REG_CSMSR_INDEX 90 /* 10 * 8 + 9 + 1 */
#define REG_ESPMSR_INDEX 91 /* 10 * 8 + 10 + 1 */
#define REG_EIPMSR_INDEX 92 /* 10 * 8 + 11 + 1 */
void ia32_handle_register( x86_reg_t *reg, size_t id );
size_t ia32_true_register_id( size_t id );
#endif

13
3rd_party/libdisasm/ia32_settings.cpp vendored Normal file
View File

@ -0,0 +1,13 @@
#include "libdis.h"
#include "ia32_settings.h"
#include "ia32_reg.h"
#include "ia32_insn.h"
ia32_settings_t ia32_settings = {
1, 0xF4,
MAX_INSTRUCTION_SIZE,
4, 4, 8, 4, 8,
REG_ESP_INDEX, REG_EBP_INDEX, REG_EIP_INDEX, REG_FLAGS_INDEX,
REG_DWORD_OFFSET, REG_SEG_OFFSET, REG_FPU_OFFSET,
opt_none
};

27
3rd_party/libdisasm/ia32_settings.h vendored Normal file
View File

@ -0,0 +1,27 @@
#ifndef IA32_SETTINGS_H
#define IA32_SETTINGS_H
#include "libdis.h"
typedef struct {
/* options */
unsigned char endian, /* 0 = big, 1 = little */
wc_byte, /* wildcard byte */
max_insn, /* max insn size */
sz_addr, /* default address size */
sz_oper, /* default operand size */
sz_byte, /* # bits in byte */
sz_word, /* # bytes in machine word */
sz_dword; /* # bytes in machine dword */
unsigned int id_sp_reg, /* id of stack pointer */
id_fp_reg, /* id of frame pointer */
id_ip_reg, /* id of instruction pointer */
id_flag_reg, /* id of flags register */
offset_gen_regs, /* start of general regs */
offset_seg_regs, /* start of segment regs */
offset_fpu_regs; /* start of floating point regs */
/* user-controlled settings */
enum x86_options options;
} ia32_settings_t;
#endif

894
3rd_party/libdisasm/libdis.h vendored Normal file
View File

@ -0,0 +1,894 @@
#ifndef LIBDISASM_H
#define LIBDISASM_H
#ifdef WIN32
#include <windows.h>
#endif
#include <cstring>
#include <cstdlib>
#include <stdint.h>
/* 'NEW" types
* __________________________________________________________________________*/
#ifndef LIBDISASM_QWORD_H /* do not interfere with qword.h */
#define LIBDISASM_QWORD_H
#ifdef _MSC_VER
typedef __int64 qword_t;
#else
typedef int64_t qword_t;
#endif
#endif
#include <sys/types.h>
#ifdef __cplusplus
//extern "C" {
#endif
/* 'NEW" x86 API
* __________________________________________________________________________*/
/* ========================================= Error Reporting */
/* REPORT CODES
* These are passed to a reporter function passed at initialization.
* Each code determines the type of the argument passed to the reporter;
* this allows the report to recover from errors, or just log them.
*/
enum x86_report_codes {
report_disasm_bounds, /* RVA OUT OF BOUNDS : The disassembler could
not disassemble the supplied RVA as it is
out of the range of the buffer. The
application should store the address and
attempt to determine what section of the
binary it is in, then disassemble the
address from the bytes in that section.
data: uint32_t rva */
report_insn_bounds, /* INSTRUCTION OUT OF BOUNDS: The disassembler
could not disassemble the instruction as
the instruction would require bytes beyond
the end of the current buffer. This usually
indicated garbage bytes at the end of a
buffer, or an incorrectly-sized buffer.
data: uint32_t rva */
report_invalid_insn, /* INVALID INSTRUCTION: The disassembler could
not disassemble the instruction as it has an
invalid combination of opcodes and operands.
This will stop automated disassembly; the
application can restart the disassembly
after the invalid instruction.
data: uint32_t rva */
report_unknown
};
/* Disassembly formats:
* AT&T is standard AS/GAS-style: "mnemonic\tsrc, dest, imm"
* Intel is standard MASM/NASM/TASM: "mnemonic\tdest,src, imm"
* Native is tab-delimited: "RVA\tbytes\tmnemonic\tdest\tsrc\timm"
* XML is your typical <insn> ... </insn>
* Raw is addr|offset|size|bytes|prefix... see libdisasm_formats.7
*/
enum x86_asm_format {
unknown_syntax = 0, /* never use! */
native_syntax, /* header: 35 bytes */
intel_syntax, /* header: 23 bytes */
att_syntax, /* header: 23 bytes */
xml_syntax, /* header: 679 bytes */
raw_syntax /* header: 172 bytes */
};
/* 'arg' is optional arbitrary data provided by the code passing the
* callback -- for example, it could be 'this' or 'self' in OOP code.
* 'code' is provided by libdisasm, it is one of the above
* 'data' is provided by libdisasm and is context-specific, per the enums */
typedef void (*DISASM_REPORTER)( enum x86_report_codes code,
void *data, void *arg );
/* ========================================= Libdisasm Management Routines */
enum x86_options { /* these can be ORed together */
opt_none= 0,
opt_ignore_nulls=1, /* ignore sequences of > 4 NULL bytes */
opt_16_bit=2, /* 16-bit/DOS disassembly */
opt_att_mnemonics=4, /* use AT&T syntax names for alternate opcode mnemonics */
};
/* ========================================= Instruction Representation */
/* these defines are only intended for use in the array decl's */
#define MAX_REGNAME 8
#define MAX_PREFIX_STR 32
#define MAX_MNEM_STR 16
#define MAX_INSN_SIZE 20 /* same as in i386.h */
#define MAX_OP_STRING 32 /* max possible operand size in string form */
#define MAX_OP_RAW_STRING 64 /* max possible operand size in raw form */
#define MAX_OP_XML_STRING 256 /* max possible operand size in xml form */
#define MAX_NUM_OPERANDS 8 /* max # implicit and explicit operands */
/* in these, the '2 *' is arbitrary: the max # of operands should require
* more space than the rest of the insn */
#define MAX_INSN_STRING 512 /* 2 * 8 * MAX_OP_STRING */
#define MAX_INSN_RAW_STRING 1024 /* 2 * 8 * MAX_OP_RAW_STRING */
#define MAX_INSN_XML_STRING 4096 /* 2 * 8 * MAX_OP_XML_STRING */
enum x86_reg_type { /* NOTE: these may be ORed together */
reg_undef = 0x00000, // used only in ia32_reg_table initializater
reg_gen = 0x00001, /* general purpose */
reg_in = 0x00002, /* incoming args, ala RISC */
reg_out = 0x00004, /* args to calls, ala RISC */
reg_local = 0x00008, /* local vars, ala RISC */
reg_fpu = 0x00010, /* FPU data register */
reg_seg = 0x00020, /* segment register */
reg_simd = 0x00040, /* SIMD/MMX reg */
reg_sys = 0x00080, /* restricted/system register */
reg_sp = 0x00100, /* stack pointer */
reg_fp = 0x00200, /* frame pointer */
reg_pc = 0x00400, /* program counter */
reg_retaddr = 0x00800, /* return addr for func */
reg_cond = 0x01000, /* condition code / flags */
reg_zero = 0x02000, /* zero register, ala RISC */
reg_ret = 0x04000, /* return value */
reg_src = 0x10000, /* array/rep source */
reg_dest = 0x20000, /* array/rep destination */
reg_count = 0x40000 /* array/rep/loop counter */
};
/* x86_reg_t : an X86 CPU register */
struct x86_reg_t {
char name[MAX_REGNAME];
enum x86_reg_type type; /* what register is used for */
unsigned int size; /* size of register in bytes */
unsigned int id; /* register ID #, for quick compares */
unsigned int alias; /* ID of reg this is an alias for */
unsigned int shift; /* amount to shift aliased reg by */
x86_reg_t * aliased_reg( ) {
x86_reg_t * reg = (x86_reg_t * )calloc( sizeof(x86_reg_t), 1 );
reg->x86_reg_from_id( id );
return reg;
}
void x86_reg_from_id( unsigned int _id);
};
/* x86_ea_t : an X86 effective address (address expression) */
typedef struct {
unsigned int scale; /* scale factor */
x86_reg_t index, base; /* index, base registers */
int32_t disp; /* displacement */
char disp_sign; /* is negative? 1/0 */
char disp_size; /* 0, 1, 2, 4 */
} x86_ea_t;
/* x86_absolute_t : an X86 segment:offset address (descriptor) */
typedef struct {
unsigned short segment; /* loaded directly into CS */
union {
unsigned short off16; /* loaded directly into IP */
uint32_t off32; /* loaded directly into EIP */
} offset;
} x86_absolute_t;
enum x86_op_type { /* mutually exclusive */
op_unused = 0, /* empty/unused operand: should never occur */
op_register = 1, /* CPU register */
op_immediate = 2, /* Immediate Value */
op_relative_near = 3, /* Relative offset from IP */
op_relative_far = 4, /* Relative offset from IP */
op_absolute = 5, /* Absolute address (ptr16:32) */
op_expression = 6, /* Address expression (scale/index/base/disp) */
op_offset = 7, /* Offset from start of segment (m32) */
op_unknown
};
#define x86_optype_is_address( optype ) \
( optype == op_absolute || optype == op_offset )
#define x86_optype_is_relative( optype ) \
( optype == op_relative_near || optype == op_relative_far )
#define x86_optype_is_memory( optype ) \
( optype > op_immediate && optype < op_unknown )
enum x86_op_datatype { /* these use Intel's lame terminology */
op_byte = 1, /* 1 byte integer */
op_word = 2, /* 2 byte integer */
op_dword = 3, /* 4 byte integer */
op_qword = 4, /* 8 byte integer */
op_dqword = 5, /* 16 byte integer */
op_sreal = 6, /* 4 byte real (single real) */
op_dreal = 7, /* 8 byte real (double real) */
op_extreal = 8, /* 10 byte real (extended real) */
op_bcd = 9, /* 10 byte binary-coded decimal */
op_ssimd = 10, /* 16 byte : 4 packed single FP (SIMD, MMX) */
op_dsimd = 11, /* 16 byte : 2 packed double FP (SIMD, MMX) */
op_sssimd = 12, /* 4 byte : scalar single FP (SIMD, MMX) */
op_sdsimd = 13, /* 8 byte : scalar double FP (SIMD, MMX) */
op_descr32 = 14, /* 6 byte Intel descriptor 2:4 */
op_descr16 = 15, /* 4 byte Intel descriptor 2:2 */
op_pdescr32 = 16, /* 6 byte Intel pseudo-descriptor 32:16 */
op_pdescr16 = 17, /* 6 byte Intel pseudo-descriptor 8:24:16 */
op_bounds16 = 18, /* signed 16:16 lower:upper bounds */
op_bounds32 = 19, /* signed 32:32 lower:upper bounds */
op_fpuenv16 = 20, /* 14 byte FPU control/environment data */
op_fpuenv32 = 21, /* 28 byte FPU control/environment data */
op_fpustate16 = 22, /* 94 byte FPU state (env & reg stack) */
op_fpustate32 = 23, /* 108 byte FPU state (env & reg stack) */
op_fpregset = 24, /* 512 bytes: register set */
op_fpreg = 25, /* FPU register */
op_none = 0xFF, /* operand without a datatype (INVLPG) */
};
enum x86_op_access { /* ORed together */
op_read = 1,
op_write = 2,
op_execute = 4
};
struct x86_op_flags { /* ORed together, but segs are mutually exclusive */
union {
unsigned int op_signed:1, /* signed integer */
op_string:1,// = 2, /* possible string or array */
op_constant:1,// = 4, /* symbolic constant */
op_pointer:1,// = 8, /* operand points to a memory address */
op_sysref:1,// = 0x010, /* operand is a syscall number */
op_implied:1,// = 0x020, /* operand is implicit in the insn */
op_hardcode:1,// = 0x40, /* operand is hardcoded in insn definition */
/* NOTE: an 'implied' operand is one which can be considered a side
* effect of the insn, e.g. %esp being modified by PUSH or POP. A
* 'hard-coded' operand is one which is specified in the instruction
* definition, e.g. %es:%edi in MOVSB or 1 in ROL Eb, 1. The difference
* is that hard-coded operands are printed by disassemblers and are
* required to re-assemble, while implicit operands are invisible. */
op_seg : 3;
unsigned int whole;
};
enum {
op_es_seg = 0x100, /* ES segment override */
op_cs_seg = 0x200, /* CS segment override */
op_ss_seg = 0x300, /* SS segment override */
op_ds_seg = 0x400, /* DS segment override */
op_fs_seg = 0x500, /* FS segment override */
op_gs_seg = 0x600 /* GS segment override */
};
};
/* x86_op_t : an X86 instruction operand */
struct x86_op_t{
friend struct x86_insn_t;
enum x86_op_type type; /* operand type */
enum x86_op_datatype datatype; /* operand size */
enum x86_op_access access; /* operand access [RWX] */
x86_op_flags flags; /* misc flags */
union {
/* sizeof will have to work on these union members! */
/* immediate values */
char sbyte;
short sword;
int32_t sdword;
qword_t sqword;
unsigned char byte;
unsigned short word;
uint32_t dword;
qword_t qword;
float sreal;
double dreal;
/* misc large/non-native types */
unsigned char extreal[10];
unsigned char bcd[10];
qword_t dqword[2];
unsigned char simd[16];
unsigned char fpuenv[28];
/* offset from segment */
uint32_t offset;
/* ID of CPU register */
x86_reg_t reg;
/* offsets from current insn */
char relative_near;
int32_t relative_far;
/* segment:offset */
x86_absolute_t absolute;
/* effective address [expression] */
x86_ea_t expression;
} data;
/* this is needed to make formatting operands more sane */
void * insn; /* pointer to x86_insn_t owning operand */
size_t size()
{
return x86_operand_size();
}
/* get size of operand data in bytes */
size_t x86_operand_size();
/* format (sprintf) an operand into 'buf' using specified syntax */
int x86_format_operand(char *buf, int len, enum x86_asm_format format );
bool is_address( ) {
return ( type == op_absolute || type == op_offset );
}
bool is_relative( ) {
return ( type == op_relative_near || type == op_relative_far );
}
char * format( enum x86_asm_format format );
x86_op_t * copy() {
x86_op_t *op = (x86_op_t *) calloc( sizeof(x86_op_t), 1 );
if ( op ) {
memcpy( op, this, sizeof(x86_op_t) );
}
return op;
}
int32_t long_from_operand();
private:
};
/* Linked list of x86_op_t; provided for manual traversal of the operand
* list in an insn. Users wishing to add operands to this list, e.g. to add
* implicit operands, should use x86_operand_new in x86_operand_list.h */
struct x86_oplist_t {
x86_op_t op;
struct x86_oplist_t *next;
};
enum x86_insn_type {
insn_invalid = 0, /* invalid instruction */
/* insn_controlflow */
insn_jmp = 0x1001,
insn_jcc = 0x1002,
insn_call = 0x1003,
insn_callcc = 0x1004,
insn_return = 0x1005,
/* insn_arithmetic */
insn_add = 0x2001,
insn_sub = 0x2002,
insn_mul = 0x2003,
insn_div = 0x2004,
insn_inc = 0x2005,
insn_dec = 0x2006,
insn_shl = 0x2007,
insn_shr = 0x2008,
insn_rol = 0x2009,
insn_ror = 0x200A,
/* insn_logic */
insn_and = 0x3001,
insn_or = 0x3002,
insn_xor = 0x3003,
insn_not = 0x3004,
insn_neg = 0x3005,
/* insn_stack */
insn_push = 0x4001,
insn_pop = 0x4002,
insn_pushregs = 0x4003,
insn_popregs = 0x4004,
insn_pushflags = 0x4005,
insn_popflags = 0x4006,
insn_enter = 0x4007,
insn_leave = 0x4008,
/* insn_comparison */
insn_test = 0x5001,
insn_cmp = 0x5002,
/* insn_move */
insn_mov = 0x6001, /* move */
insn_movcc = 0x6002, /* conditional move */
insn_xchg = 0x6003, /* exchange */
insn_xchgcc = 0x6004, /* conditional exchange */
/* insn_string */
insn_strcmp = 0x7001,
insn_strload = 0x7002,
insn_strmov = 0x7003,
insn_strstore = 0x7004,
insn_translate = 0x7005, /* xlat */
/* insn_bit_manip */
insn_bittest = 0x8001,
insn_bitset = 0x8002,
insn_bitclear = 0x8003,
/* insn_flag_manip */
insn_clear_carry = 0x9001,
insn_clear_zero = 0x9002,
insn_clear_oflow = 0x9003,
insn_clear_dir = 0x9004,
insn_clear_sign = 0x9005,
insn_clear_parity = 0x9006,
insn_set_carry = 0x9007,
insn_set_zero = 0x9008,
insn_set_oflow = 0x9009,
insn_set_dir = 0x900A,
insn_set_sign = 0x900B,
insn_set_parity = 0x900C,
insn_tog_carry = 0x9010,
insn_tog_zero = 0x9020,
insn_tog_oflow = 0x9030,
insn_tog_dir = 0x9040,
insn_tog_sign = 0x9050,
insn_tog_parity = 0x9060,
/* insn_fpu */
insn_fmov = 0xA001,
insn_fmovcc = 0xA002,
insn_fneg = 0xA003,
insn_fabs = 0xA004,
insn_fadd = 0xA005,
insn_fsub = 0xA006,
insn_fmul = 0xA007,
insn_fdiv = 0xA008,
insn_fsqrt = 0xA009,
insn_fcmp = 0xA00A,
insn_fcos = 0xA00C,
insn_fldpi = 0xA00D,
insn_fldz = 0xA00E,
insn_ftan = 0xA00F,
insn_fsine = 0xA010,
insn_fsys = 0xA020,
/* insn_interrupt */
insn_int = 0xD001,
insn_intcc = 0xD002, /* not present in x86 ISA */
insn_iret = 0xD003,
insn_bound = 0xD004,
insn_debug = 0xD005,
insn_trace = 0xD006,
insn_invalid_op = 0xD007,
insn_oflow = 0xD008,
/* insn_system */
insn_halt = 0xE001,
insn_in = 0xE002, /* input from port/bus */
insn_out = 0xE003, /* output to port/bus */
insn_cpuid = 0xE004,
insn_lmsw = 0xE005,
insn_smsw = 0xE006,
insn_clts = 0xE007,
/* insn_other */
insn_nop = 0xF001,
insn_bcdconv = 0xF002, /* convert to or from BCD */
insn_szconv = 0xF003 /* change size of operand */
};
/* These flags specify special characteristics of the instruction, such as
* whether the inatruction is privileged or whether it serializes the
* pipeline.
* NOTE : These may not be accurate for all instructions; updates to the
* opcode tables have not been completed. */
enum x86_insn_note {
insn_note_ring0 = 1, /* Only available in ring 0 */
insn_note_smm = 2, /* "" in System Management Mode */
insn_note_serial = 4, /* Serializing instruction */
insn_note_nonswap = 8, /* Does not swap arguments in att-style formatting */
insn_note_nosuffix = 16, /* Does not have size suffix in att-style formatting */
};
/* This specifies what effects the instruction has on the %eflags register */
enum x86_eflags
{
insn_eflag_carry,
insn_eflag_zero,
insn_eflag_overflow,
insn_eflag_direction,
insn_eflag_sign,
insn_eflag_parity
};
enum x86_flag_status {
insn_carry_set = 0x1, /* CF */
insn_zero_set = 0x2, /* ZF */
insn_oflow_set = 0x4, /* OF */
insn_dir_set = 0x8, /* DF */
insn_sign_set = 0x10, /* SF */
insn_parity_set = 0x20, /* PF */
insn_carry_or_zero_set = 0x40,
insn_zero_set_or_sign_ne_oflow = 0x80,
insn_carry_clear = 0x100,
insn_zero_clear = 0x200,
insn_oflow_clear = 0x400,
insn_dir_clear = 0x800,
insn_sign_clear = 0x1000,
insn_parity_clear = 0x2000,
insn_sign_eq_oflow = 0x4000,
insn_sign_ne_oflow = 0x8000
};
/* The CPU model in which the insturction first appeared; this can be used
* to mask out instructions appearing in earlier or later models or to
* check the portability of a binary.
* NOTE : These may not be accurate for all instructions; updates to the
* opcode tables have not been completed. */
enum x86_insn_cpu {
cpu_8086 = 1, /* Intel */
cpu_80286 = 2,
cpu_80386 = 3,
cpu_80387 = 4,
cpu_80486 = 5,
cpu_pentium = 6,
cpu_pentiumpro = 7,
cpu_pentium2 = 8,
cpu_pentium3 = 9,
cpu_pentium4 = 10,
cpu_k6 = 16, /* AMD */
cpu_k7 = 32,
cpu_athlon = 48
};
/* CPU ISA subsets: These are derived from the Instruction Groups in
* Intel Vol 1 Chapter 5; they represent subsets of the IA32 ISA but
* do not reflect the 'type' of the instruction in the same way that
* x86_insn_group does. In short, these are AMD/Intel's somewhat useless
* designations.
* NOTE : These may not be accurate for all instructions; updates to the
* opcode tables have not been completed. */
enum x86_insn_isa {
isa_gp = 1, /* general purpose */
isa_fp = 2, /* floating point */
isa_fpumgt = 3, /* FPU/SIMD management */
isa_mmx = 4, /* Intel MMX */
isa_sse1 = 5, /* Intel SSE SIMD */
isa_sse2 = 6, /* Intel SSE2 SIMD */
isa_sse3 = 7, /* Intel SSE3 SIMD */
isa_3dnow = 8, /* AMD 3DNow! SIMD */
isa_sys = 9 /* system instructions */
};
enum x86_insn_prefix {
insn_no_prefix = 0,
insn_rep_zero = 1, /* REPZ and REPE */
insn_rep_notzero = 2, /* REPNZ and REPNZ */
insn_lock = 4 /* LOCK: */
};
/* TODO: maybe provide insn_new/free(), and have disasm return new insn_t */
/* FOREACH types: these are used to limit the foreach results to
* operands which match a certain "type" (implicit or explicit)
* or which are accessed in certain ways (e.g. read or write). Note
* that this operates on the operand list of single instruction, so
* specifying the 'real' operand type (register, memory, etc) is not
* useful. Note also that by definition Execute Access implies Read
* Access and implies Not Write Access.
* The "type" (implicit or explicit) and the access method can
* be ORed together, e.g. op_wo | op_explicit */
enum x86_op_foreach_type {
op_any = 0, /* ALL operands (explicit, implicit, rwx) */
op_dest = 1, /* operands with Write access */
op_src = 2, /* operands with Read access */
op_ro = 3, /* operands with Read but not Write access */
op_wo = 4, /* operands with Write but not Read access */
op_xo = 5, /* operands with Execute access */
op_rw = 6, /* operands with Read AND Write access */
op_implicit = 0x10, /* operands that are implied by the opcode */
op_explicit = 0x20 /* operands that are not side-effects */
};
/* Operand FOREACH callback: 'arg' is an abritrary parameter passed to the
* foreach routine, 'insn' is the x86_insn_t whose operands are being
* iterated over, and 'op' is the current x86_op_t */
struct x86_insn_t;
struct ia32_insn_t;
typedef void (*x86_operand_fn)(x86_op_t *op, x86_insn_t *insn, void *arg);
/* x86_insn_t : an X86 instruction */
struct x86_insn_t {
enum x86_insn_group {
insn_none = 0, /* invalid instruction */
insn_controlflow = 1,
insn_arithmetic = 2,
insn_logic = 3,
insn_stack = 4,
insn_comparison = 5,
insn_move = 6,
insn_string = 7,
insn_bit_manip = 8,
insn_flag_manip = 9,
insn_fpu = 10,
insn_interrupt = 13,
insn_system = 14,
insn_other = 15
};
private:
void x86_oplist_append(x86_oplist_t *op);
public:
/* information about the instruction */
uint32_t addr; /* load address */
uint32_t offset; /* offset into file/buffer */
enum x86_insn_group group; /* meta-type, e.g. INS_EXEC */
enum x86_insn_type type; /* type, e.g. INS_BRANCH */
enum x86_insn_note note; /* note, e.g. RING0 */
unsigned char bytes[MAX_INSN_SIZE];
unsigned char size; /* size of insn in bytes */
/* 16/32-bit mode settings */
unsigned char addr_size; /* default address size : 2 or 4 */
unsigned char op_size; /* default operand size : 2 or 4 */
/* CPU/instruction set */
enum x86_insn_cpu cpu;
enum x86_insn_isa isa;
/* flags */
enum x86_flag_status flags_set; /* flags set or tested by insn */
enum x86_flag_status flags_tested;
bool containsFlag(x86_eflags flg,x86_flag_status in);
/* stack */
unsigned char stack_mod; /* 0 or 1 : is the stack modified? */
int32_t stack_mod_val; /* val stack is modified by if known */
/* the instruction proper */
enum x86_insn_prefix prefix; /* prefixes ORed together */
char prefix_string[MAX_PREFIX_STR]; /* prefixes [might be truncated] */
char mnemonic[MAX_MNEM_STR];
x86_oplist_t *operands; /* list of explicit/implicit operands */
size_t operand_count; /* total number of operands */
size_t explicit_count; /* number of explicit operands */
/* convenience fields for user */
void *block; /* code block containing this insn */
void *function; /* function containing this insn */
int tag; /* tag the insn as seen/processed */
x86_op_t *x86_operand_new();
/* convenience routine: returns count of operands matching 'type' */
size_t x86_operand_count( enum x86_op_foreach_type type );
/* accessor functions for the operands */
x86_op_t * x86_operand_1st( );
x86_op_t * x86_operand_2nd( );
x86_op_t * x86_operand_3rd( );
/* Get Relative Offset: return as a sign-extended int32_t the near or far
* relative offset operand, or 0 if there is none. There can be only one
* relaive offset operand in an instruction. */
int32_t x86_get_rel_offset( );
/* Get Branch Target: return the x86_op_t containing the target of
* a jump or call operand, or NULL if there is no branch target.
* Internally, a 'branch target' is defined as any operand with
* Execute Access set. There can be only one branch target per instruction. */
x86_op_t * x86_get_branch_target( );
/* Get Immediate: return the x86_op_t containing the immediate operand
* for this instruction, or NULL if there is no immediate operand. There
* can be only one immediate operand per instruction */
x86_op_t * x86_get_imm( );
/* Get Raw Immediate Data: returns a pointer to the immediate data encoded
* in the instruction. This is useful for large data types [>32 bits] currently
* not supported by libdisasm, or for determining if the disassembler
* screwed up the conversion of the immediate data. Note that 'imm' in this
* context refers to immediate data encoded at the end of an instruction as
* detailed in the Intel Manual Vol II Chapter 2; it does not refer to the
* 'op_imm' operand (the third operand in instructions like 'mul' */
unsigned char * x86_get_raw_imm( );
/* More accessor fuctions, this time for user-defined info... */
/* set the address (usually RVA) of the insn */
void x86_set_insn_addr( uint32_t addr );
/* format (sprintf) an instruction mnemonic into 'buf' using specified syntax */
int x86_format_mnemonic( char *buf, int len, enum x86_asm_format format);
/* format (sprintf) an instruction into 'buf' using specified syntax;
* this includes formatting all operands */
int x86_format_insn( char *buf, int len, enum x86_asm_format);
/* free the operand list associated with an instruction -- useful for
* preventing memory leaks when free()ing an x86_insn_t */
void x86_oplist_free( );
/* returns 0 if an instruction is invalid, 1 if valid */
int x86_insn_is_valid( );
/* Get Address: return the value of an offset operand, or the offset of
* a segment:offset absolute address */
uint32_t x86_get_address( );
void make_invalid(unsigned char *buf);
/* instruction tagging: these routines allow the programmer to mark
* instructions as "seen" in a DFS, for example. libdisasm does not use
* the tag field.*/
/* set insn->tag to 1 */
void x86_tag_insn( );
/* return insn->tag */
int x86_insn_is_tagged();
/* set insn->tag to 0 */
void x86_untag_insn();
/* Operand foreach: invokes 'func' with 'insn' and 'arg' as arguments. The
* 'type' parameter is used to select only operands matching specific
* criteria. */
int x86_operand_foreach( x86_operand_fn func, void *arg, enum x86_op_foreach_type type );
/* set the offset (usually offset into file) of the insn */
void x86_set_insn_offset( unsigned int offset );
/* set a pointer to the function owning the instruction. The
* type of 'func' is user-defined; libdisasm does not use the func field. */
void x86_set_insn_function( void * func );
/* set a pointer to the block of code owning the instruction. The
* type of 'block' is user-defined; libdisasm does not use the block field. */
void x86_set_insn_block( void * block );
private:
};
class Ia32_Decoder
{
x86_insn_t *m_decoded;
void handle_insn_metadata( ia32_insn_t *raw_insn );
void ia32_stack_mod();
void ia32_handle_cpu( unsigned int cpu );
void ia32_handle_mnemtype(unsigned int mnemtype);
void ia32_handle_notes(unsigned int notes);
void ia32_handle_eflags( unsigned int eflags);
void ia32_handle_prefix( unsigned int prefixes );
size_t ia32_decode_insn( unsigned char *buf, size_t buf_len, ia32_insn_t *raw_insn, unsigned int prefixes );
size_t ia32_decode_operand( unsigned char *buf, size_t buf_len, unsigned int raw_op, unsigned int raw_flags, unsigned int prefixes, unsigned char modrm );
size_t decode_operand_size( unsigned int op_type, x86_op_t *op );
size_t decode_operand_value( unsigned char *buf, size_t buf_len, x86_op_t *op, unsigned int addr_meth, size_t op_size, unsigned int op_value, unsigned char modrm, size_t gen_regs );
size_t ia32_modrm_decode( unsigned char *buf, unsigned int buf_len, x86_op_t *op, size_t gen_regs );
unsigned int ia32_insn_implicit_ops( unsigned int impl_idx );
size_t handle_insn_suffix( unsigned char *buf, size_t buf_len, ia32_insn_t *raw_insn );
public:
size_t ia32_disasm_addr( unsigned char * buf, size_t buf_len);
void decode_into(x86_insn_t *ins) {m_decoded=ins;}
};
/* DISASSEMBLY ROUTINES
* Canonical order of arguments is
* (buf, buf_len, buf_rva, offset, len, insn, func, arg, resolve_func)
* ...but of course all of these are not used at the same time.
*/
class X86_Disasm
{
public:
/* Function prototype for caller-supplied callback routine
* These callbacks are intended to process 'insn' further, e.g. by
* adding it to a linked list, database, etc */
typedef void (*DISASM_CALLBACK)( x86_insn_t *insn, void * arg );
/* Function prototype for caller-supplied address resolver.
* This routine is used to determine the rva to disassemble next, given
* the 'dest' operand of a jump/call. This allows the caller to resolve
* jump/call targets stored in a register or on the stack, and also allows
* the caller to prevent endless loops by checking if an address has
* already been disassembled. If an address cannot be resolved from the
* operand, or if the address has already been disassembled, this routine
* should return -1; in all other cases the RVA to be disassembled next
* should be returned. */
typedef int32_t (*DISASM_RESOLVER)( x86_op_t *op, x86_insn_t * current_insn,
void *arg );
protected:
DISASM_REPORTER __x86_reporter_func;
void * __x86_reporter_arg;
Ia32_Decoder m_decoder;
public:
X86_Disasm( x86_options options=opt_none,DISASM_REPORTER reporter=0, void *arg=0 ) :
__x86_reporter_func(reporter),
__x86_reporter_arg(arg) {
x86_init( options,reporter,arg);
}
/* management routines */
/* 'arg' is caller-specific data which is passed as the first argument
* to the reporter callback routine */
int x86_init( x86_options options, DISASM_REPORTER reporter, void *arg);
void x86_set_reporter( DISASM_REPORTER reporter, void *arg);
void x86_set_options( x86_options options );
x86_options x86_get_options( void );
int x86_cleanup(void);
/* x86_disasm: Disassemble a single instruction from a buffer of bytes.
* Returns size of instruction in bytes.
* Caller is responsible for calling x86_oplist_free() on
* a reused "insn" to avoid leaking memory when calling this
* function repeatedly.
* buf : Buffer of bytes to disassemble
* buf_len : Length of the buffer
* buf_rva : Load address of the start of the buffer
* offset : Offset in buffer to disassemble
* insn : Structure to fill with disassembled instruction
*/
unsigned int x86_disasm( unsigned char *buf, unsigned int buf_len,
uint32_t buf_rva, unsigned int offset,
x86_insn_t * insn );
/* x86_disasm_range: Sequential disassembly of a range of bytes in a buffer,
* invoking a callback function each time an instruction
* is successfully disassembled. The 'range' refers to the
* bytes between 'offset' and 'offset + len' in the buffer;
* 'len' is assumed to be less than the length of the buffer.
* Returns number of instructions processed.
* buf : Buffer of bytes to disassemble (e.g. .text section)
* buf_rva : Load address of buffer (e.g. ELF Virtual Address)
* offset : Offset in buffer to start disassembly at
* len : Number of bytes to disassemble
* func : Callback function to invoke (may be NULL)
* arg : Arbitrary data to pass to callback (may be NULL)
*/
unsigned int x86_disasm_range( unsigned char *buf, uint32_t buf_rva,
unsigned int offset, unsigned int len,
DISASM_CALLBACK func, void *arg );
/* x86_disasm_forward: Flow-of-execution disassembly of the bytes in a buffer,
* invoking a callback function each time an instruction
* is successfully disassembled.
* buf : Buffer to disassemble (e.g. .text section)
* buf_len : Number of bytes in buffer
* buf_rva : Load address of buffer (e.g. ELF Virtual Address)
* offset : Offset in buffer to start disassembly at (e.g. entry point)
* func : Callback function to invoke (may be NULL)
* arg : Arbitrary data to pass to callback (may be NULL)
* resolver: Caller-supplied address resolver. If no resolver is
* supplied, a default internal one is used -- however the
* internal resolver does NOT catch loops and could end up
* disassembling forever..
* r_arg : Arbitrary data to pass to resolver (may be NULL)
*/
unsigned int x86_disasm_forward( unsigned char *buf, unsigned int buf_len,
uint32_t buf_rva, unsigned int offset,
DISASM_CALLBACK func, void *arg,
DISASM_RESOLVER resolver, void *r_arg );
/* Call the register reporter to report an error */
void x86_report_error( enum x86_report_codes code, void *data );
/* fill 'buf' with a description of the format's syntax */
int x86_format_header( char *buf, int len, enum x86_asm_format format);
/* Endianness of an x86 CPU : 0 is big, 1 is little; always returns 1 */
unsigned int x86_endian(void);
/* Default address and operand size in bytes */
unsigned int x86_addr_size(void);
unsigned int x86_op_size(void);
/* Size of a machine word in bytes */
unsigned int x86_word_size(void);
/* maximum size of a code instruction */
unsigned int x86_max_insn_size(void);
/* register IDs of Stack, Frame, Instruction pointer and Flags register */
unsigned int x86_sp_reg(void);
unsigned int x86_fp_reg(void);
unsigned int x86_ip_reg(void);
unsigned int x86_flag_reg(void);
};
/* Instruction operands: these are stored as a list of explicit and
* implicit operands. It is recommended that the 'foreach' routines
* be used to when examining operands for purposes of data flow analysis */
/* Operand Convenience Routines: the following three routines are common
* operations on operands, intended to ease the burden of the programmer. */
/* format (sprintf) an instruction mnemonic into 'buf' using specified syntax */
//int x86_format_mnemonic(x86_insn_t *insn, char *buf, int len,
// enum x86_asm_format format);
/* fill 'reg' struct with details of register 'id' */
//void x86_reg_from_id( unsigned int id, x86_reg_t * reg );
/* convenience macro demonstrating how to get an aliased register; proto is
* void x86_get_aliased_reg( x86_reg_t *alias_reg, x86_reg_t *output_reg )
* where 'alias_reg' is a reg operand and 'output_reg' is filled with the
* register that the operand is an alias for */
//#define x86_get_aliased_reg( alias_reg, output_reg ) \
// x86_reg_from_id( alias_reg->alias, output_reg )
/* ================================== Invariant Instruction Representation */
/* Invariant instructions are used for generating binary signatures;
* the instruction is modified so that all variant bytes in an instruction
* are replaced with a wildcard byte.
*
* A 'variant byte' is one that is expected to be modified by either the
* static or the dynamic linker: for example, an address encoded in an
* instruction.
*
* By comparing the invariant representation of one instruction [or of a
* sequence of instructions] with the invariant representation of another,
* one determine whether the two invariant representations are from the same
* relocatable object [.o] file. Thus one can use binary signatures [which
* are just sequences of invariant instruction representations] to look for
* library routines which have been statically-linked into a binary.
*
* The invariant routines are faster and smaller than the disassembly
* routines; they can be used to determine the size of an instruction
* without all of the overhead of a full instruction disassembly.
*/
/* This byte is used to replace variant bytes */
#define X86_WILDCARD_BYTE 0xF4
struct x86_invariant_op_t{
enum x86_op_type type; /* operand type */
enum x86_op_datatype datatype; /* operand size */
enum x86_op_access access; /* operand access [RWX] */
x86_op_flags flags; /* misc flags */
};
struct x86_invariant_t {
unsigned char bytes[64]; /* invariant representation */
unsigned int size; /* number of bytes in insn */
x86_insn_t::x86_insn_group group; /* meta-type, e.g. INS_EXEC */
enum x86_insn_type type; /* type, e.g. INS_BRANCH */
x86_invariant_op_t operands[3]; /* operands: dest, src, imm */
} ;
/* return a version of the instruction with the variant bytes masked out */
size_t x86_invariant_disasm( unsigned char *buf, int buf_len,
x86_invariant_t *inv );
/* return the size in bytes of the intruction pointed to by 'buf';
* this used x86_invariant_disasm since it faster than x86_disasm */
size_t x86_size_disasm( unsigned char *buf, unsigned int buf_len );
#ifdef __cplusplus
//}
#endif
#endif

14
3rd_party/libdisasm/qword.h vendored Normal file
View File

@ -0,0 +1,14 @@
#ifndef LIBDISASM_QWORD_H
#define LIBDISASM_QWORD_H
#include <stdint.h>
/* platform independent data types */
#ifdef _MSC_VER
typedef __int64 qword_t;
#else
typedef int64_t qword_t;
#endif
#endif

220
3rd_party/libdisasm/x86_disasm.cpp vendored Normal file
View File

@ -0,0 +1,220 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "libdis.h"
#include "ia32_insn.h"
#include "ia32_invariant.h"
#include "x86_operand_list.h"
#ifdef _MSC_VER
#define snprintf _snprintf
#define inline __inline
#endif
void x86_insn_t::make_invalid(unsigned char *buf)
{
strcpy( mnemonic, "invalid" );
x86_oplist_free( );
size = 1;
group = x86_insn_t::insn_none;
type = insn_invalid;
memcpy( bytes, buf, 1 );
}
unsigned int X86_Disasm::x86_disasm( unsigned char *buf, unsigned int buf_len,
uint32_t buf_rva, unsigned int offset,
x86_insn_t *insn ){
int len, size;
unsigned char bytes[MAX_INSTRUCTION_SIZE];
if ( ! buf || ! insn || ! buf_len ) {
/* caller screwed up somehow */
return 0;
}
/* ensure we are all NULLed up */
memset( insn, 0, sizeof(x86_insn_t) );
insn->addr = buf_rva + offset;
insn->offset = offset;
/* default to invalid insn */
insn->type = insn_invalid;
insn->group = x86_insn_t::insn_none;
if ( offset >= buf_len ) {
/* another caller screwup ;) */
x86_report_error(report_disasm_bounds, (void*)((long)buf_rva+offset));
return 0;
}
len = buf_len - offset;
/* copy enough bytes for disassembly into buffer : this
* helps prevent buffer overruns at the end of a file */
memset( bytes, 0, MAX_INSTRUCTION_SIZE );
memcpy( bytes, &buf[offset], (len < MAX_INSTRUCTION_SIZE) ? len :
MAX_INSTRUCTION_SIZE );
/* actually do the disassembly */
/* TODO: allow switching when more disassemblers are added */
m_decoder.decode_into(insn);
size = m_decoder.ia32_disasm_addr(bytes, len);
//size = insn->ia32_disasm_addr( bytes, len);
/* check and see if we had an invalid instruction */
if (! size ) {
x86_report_error(report_invalid_insn, (void*)((long)buf_rva+offset) );
return 0;
}
/* check if we overran the end of the buffer */
if ( size > len ) {
x86_report_error( report_insn_bounds, (void*)((long)buf_rva + offset) );
insn->make_invalid(bytes);
return 0;
}
/* fill bytes field of insn */
memcpy( insn->bytes, bytes, size );
return size;
}
unsigned int X86_Disasm::x86_disasm_range( unsigned char *buf, uint32_t buf_rva,
unsigned int offset, unsigned int len,
DISASM_CALLBACK func, void *arg ) {
x86_insn_t insn;
unsigned int buf_len, size, count = 0, bytes = 0;
/* buf_len is implied by the arguments */
buf_len = len + offset;
while ( bytes < len ) {
size = x86_disasm( buf, buf_len, buf_rva, offset + bytes,
&insn );
if ( size ) {
/* invoke callback if it exists */
if ( func ) {
(*func)( &insn, arg );
}
bytes += size;
count ++;
} else {
/* error */
bytes++; /* try next byte */
}
insn.x86_oplist_free();
}
return( count );
}
static inline int follow_insn_dest( x86_insn_t *insn ) {
if ( insn->type == insn_jmp || insn->type == insn_jcc ||
insn->type == insn_call || insn->type == insn_callcc ) {
return(1);
}
return(0);
}
static inline int insn_doesnt_return( x86_insn_t *insn ) {
return( (insn->type == insn_jmp || insn->type == insn_return) ? 1: 0 );
}
static int32_t internal_resolver( x86_op_t *op, x86_insn_t *insn ){
int32_t next_addr = -1;
if ( x86_optype_is_address(op->type) ) {
next_addr = op->data.sdword;
} else if ( op->type == op_relative_near ) {
next_addr = insn->addr + insn->size + op->data.relative_near;
} else if ( op->type == op_relative_far ) {
next_addr = insn->addr + insn->size + op->data.relative_far;
}
return( next_addr );
}
unsigned int X86_Disasm::x86_disasm_forward( unsigned char *buf, unsigned int buf_len,
uint32_t buf_rva, unsigned int offset,
DISASM_CALLBACK func, void *arg,
DISASM_RESOLVER resolver, void *r_arg ){
x86_insn_t insn;
x86_op_t *op;
int32_t next_addr;
uint32_t next_offset;
unsigned int size, count = 0, bytes = 0, cont = 1;
while ( cont && bytes < buf_len ) {
size = x86_disasm( buf, buf_len, buf_rva, offset + bytes,
&insn );
if ( size ) {
/* invoke callback if it exists */
if ( func ) {
(*func)( &insn, arg );
}
bytes += size;
count ++;
} else {
/* error */
bytes++; /* try next byte */
}
if ( follow_insn_dest(&insn) ) {
op = insn.x86_operand_1st();//x86_get_dest_operand
next_addr = -1;
/* if caller supplied a resolver, use it to determine
* the address to disassemble */
if ( resolver ) {
next_addr = resolver(op, &insn, r_arg);
} else {
next_addr = internal_resolver(op, &insn);
}
if (next_addr != -1 ) {
next_offset = next_addr - buf_rva;
/* if offset is in this buffer... */
if ( next_offset >= 0 &&
next_offset < buf_len ) {
/* go ahead and disassemble */
count += x86_disasm_forward( buf,
buf_len,
buf_rva,
next_offset,
func, arg,
resolver, r_arg );
} else {
/* report unresolved address */
x86_report_error( report_disasm_bounds,
(void*)(long)next_addr );
}
}
} /* end follow_insn */
if ( insn_doesnt_return(&insn) ) {
/* stop disassembling */
cont = 0;
}
insn.x86_oplist_free( );
}
return( count );
}
/* invariant instruction representation */
size_t x86_invariant_disasm( unsigned char *buf, int buf_len,
x86_invariant_t *inv ){
if (! buf || ! buf_len || ! inv ) {
return(0);
}
return ia32_disasm_invariant(buf, buf_len, inv);
}
size_t x86_size_disasm( unsigned char *buf, unsigned int buf_len ) {
if (! buf || ! buf_len ) {
return(0);
}
return ia32_disasm_size(buf, buf_len);
}

1450
3rd_party/libdisasm/x86_format.cpp vendored Normal file

File diff suppressed because it is too large Load Diff

70
3rd_party/libdisasm/x86_imm.cpp vendored Normal file
View File

@ -0,0 +1,70 @@
#include "qword.h"
#include "x86_imm.h"
#include <stdio.h>
unsigned int x86_imm_signsized( unsigned char * buf, size_t buf_len,
void *dest, unsigned int size ) {
signed char *cp = (signed char *) dest;
signed short *sp = (signed short *) dest;
int32_t *lp = (int32_t *) dest;
qword_t *qp = (qword_t *) dest;
if ( size > buf_len ) {
return 0;
}
/* Copy 'size' bytes from *buf to *op
* return number of bytes copied */
switch (size) {
case 1: /* BYTE */
*cp = *((signed char *) buf);
break;
case 2: /* WORD */
*sp = *((signed short *) buf);
break;
case 6:
case 8: /* QWORD */
*qp = *((qword_t *) buf);
break;
case 4: /* DWORD */
default:
*lp = *((int32_t *) buf);
break;
}
return (size);
}
unsigned int x86_imm_sized( unsigned char * buf, size_t buf_len, void *dest,
unsigned int size ) {
unsigned char *cp = (unsigned char *) dest;
unsigned short *sp = (unsigned short *) dest;
uint32_t *lp = (uint32_t *) dest;
qword_t *qp = (qword_t *) dest;
if ( size > buf_len ) {
return 0;
}
/* Copy 'size' bytes from *buf to *op
* return number of bytes copied */
switch (size) {
case 1: /* BYTE */
*cp = *((unsigned char *) buf);
break;
case 2: /* WORD */
*sp = *((unsigned short *) buf);
break;
case 6:
case 8: /* QWORD */
*qp = *((qword_t *) buf);
break;
case 4: /* DWORD */
default:
*lp = *((uint32_t *) buf);
break;
}
return (size);
}

18
3rd_party/libdisasm/x86_imm.h vendored Normal file
View File

@ -0,0 +1,18 @@
#ifndef x86_IMM_H
#define x86_IMM_H
#include "./qword.h"
#include <sys/types.h>
#ifdef WIN32
#include <windows.h>
#endif
/* these are in the global x86 namespace but are not a part of the
* official API */
unsigned int x86_imm_sized( unsigned char *buf, size_t buf_len, void *dest,
unsigned int size );
unsigned int x86_imm_signsized( unsigned char *buf, size_t buf_len, void *dest,
unsigned int size );
#endif

196
3rd_party/libdisasm/x86_insn.cpp vendored Normal file
View File

@ -0,0 +1,196 @@
#include <stdio.h>
#include <stdlib.h>
#include <cassert>
#include "libdis.h"
#ifdef _MSC_VER
#define snprintf _snprintf
#define inline __inline
#endif
int x86_insn_is_valid( x86_insn_t *insn ) {
if ( insn && insn->type != insn_invalid && insn->size > 0 ) {
return 1;
}
return 0;
}
int x86_insn_t::x86_insn_is_valid( )
{
if ( this && this->type != insn_invalid && this->size > 0 )
{
return 1;
}
return 0;
}
uint32_t x86_insn_t::x86_get_address() {
x86_oplist_t *op_lst;
assert(this);
if (! operands ) {
return 0;
}
for (op_lst = operands; op_lst; op_lst = op_lst->next ) {
if ( op_lst->op.type == op_offset ) {
return op_lst->op.data.offset;
} else if ( op_lst->op.type == op_absolute ) {
if ( op_lst->op.datatype == op_descr16 ) {
return (uint32_t)
op_lst->op.data.absolute.offset.off16;
}
return op_lst->op.data.absolute.offset.off32;
}
}
return 0;
}
int32_t x86_insn_t::x86_get_rel_offset( ) {
x86_oplist_t *op_lst;
assert(this);
if (! operands ) {
return 0;
}
for (op_lst = operands; op_lst; op_lst = op_lst->next ) {
if ( op_lst->op.type == op_relative_near ) {
return (int32_t) op_lst->op.data.relative_near;
} else if ( op_lst->op.type == op_relative_far ) {
return op_lst->op.data.relative_far;
}
}
return 0;
}
x86_op_t * x86_insn_t::x86_get_branch_target() {
x86_oplist_t *op_lst;
assert(this);
if (! operands ) {
return NULL;
}
for (op_lst = operands; op_lst; op_lst = op_lst->next ) {
if ( op_lst->op.access & op_execute ) {
return &(op_lst->op);
}
}
return NULL;
}
x86_op_t * x86_insn_t::x86_get_imm() {
x86_oplist_t *op_lst;
assert(this);
if ( ! operands ) {
return NULL;
}
for (op_lst = operands; op_lst; op_lst = op_lst->next ) {
if ( op_lst->op.type == op_immediate ) {
return &(op_lst->op);
}
}
return NULL;
}
#define IS_PROPER_IMM( x ) \
x->op.type == op_immediate && ! (x->op.flags.op_hardcode)
/* if there is an immediate value in the instruction, return a pointer to
* it */
unsigned char * x86_insn_t::x86_get_raw_imm() {
int size, offset;
x86_op_t *op = NULL;
assert(this);
if (! operands ) {
return(NULL);
}
/* a bit inelegant, but oh well... */
if ( IS_PROPER_IMM( operands ) ) {
op = &operands->op;
} else if ( operands->next ) {
if ( IS_PROPER_IMM( operands->next ) ) {
op = &operands->next->op;
} else if ( operands->next->next &&
IS_PROPER_IMM( operands->next->next ) ) {
op = &operands->next->next->op;
}
}
if (! op ) {
return( NULL );
}
/* immediate data is at the end of the insn */
size = op->x86_operand_size();
offset = size - size;
return( &bytes[offset] );
}
size_t x86_op_t::x86_operand_size() {
switch (datatype ) {
case op_byte: return 1;
case op_word: return 2;
case op_dword: return 4;
case op_qword: return 8;
case op_dqword: return 16;
case op_sreal: return 4;
case op_dreal: return 8;
case op_extreal: return 10;
case op_bcd: return 10;
case op_ssimd: return 16;
case op_dsimd: return 16;
case op_sssimd: return 4;
case op_sdsimd: return 8;
case op_descr32: return 6;
case op_descr16: return 4;
case op_pdescr32: return 6;
case op_pdescr16: return 6;
case op_bounds16: return 4;
case op_bounds32: return 8;
case op_fpuenv16: return 14;
case op_fpuenv32: return 28;
case op_fpustate16: return 94;
case op_fpustate32: return 108;
case op_fpregset: return 512;
case op_fpreg: return 10;
case op_none: return 0;
}
return(4); /* default size */
}
void x86_insn_t::x86_set_insn_addr( uint32_t _addr ) {
addr = _addr;
}
void x86_insn_t::x86_set_insn_offset( unsigned int offset ){
offset = offset;
}
void x86_insn_t::x86_set_insn_function( void * func ){
function = func;
}
void x86_insn_t::x86_set_insn_block( void * _block ){
block = _block;
}
void x86_insn_t::x86_tag_insn(){
tag = 1;
}
void x86_insn_t::x86_untag_insn(){
tag = 0;
}
int x86_insn_t::x86_insn_is_tagged(){
return tag;
}

69
3rd_party/libdisasm/x86_misc.cpp vendored Normal file
View File

@ -0,0 +1,69 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "libdis.h"
#include "ia32_insn.h"
#include "ia32_reg.h" /* for ia32_reg wrapper */
#include "ia32_settings.h"
extern ia32_settings_t ia32_settings;
#ifdef _MSC_VER
#define snprintf _snprintf
#define inline __inline
#endif
/* =========================================================== INIT/TERM */
int X86_Disasm::x86_init( enum x86_options options, DISASM_REPORTER reporter, void * arg )
{
ia32_settings.options = options;
__x86_reporter_func = reporter;
__x86_reporter_arg = arg;
return 1;
}
void X86_Disasm::x86_set_reporter( DISASM_REPORTER reporter, void * arg ) {
__x86_reporter_func = reporter;
__x86_reporter_arg = arg;
}
void X86_Disasm::x86_set_options( enum x86_options options ){
ia32_settings.options = options;
}
enum x86_options X86_Disasm::x86_get_options( void ) {
return ia32_settings.options;
}
int X86_Disasm::x86_cleanup( void )
{
return 1;
}
/* =========================================================== ERRORS */
void X86_Disasm::x86_report_error( enum x86_report_codes code, void *data ) {
if ( __x86_reporter_func ) {
(*__x86_reporter_func)(code, data, __x86_reporter_arg);
}
}
/* =========================================================== MISC */
unsigned int X86_Disasm::x86_endian(void) { return ia32_settings.endian; }
unsigned int X86_Disasm::x86_addr_size(void) { return ia32_settings.sz_addr; }
unsigned int X86_Disasm::x86_op_size(void) { return ia32_settings.sz_oper; }
unsigned int X86_Disasm::x86_word_size(void) { return ia32_settings.sz_word; }
unsigned int X86_Disasm::x86_max_insn_size(void){ return ia32_settings.max_insn; }
unsigned int X86_Disasm::x86_sp_reg(void) { return ia32_settings.id_sp_reg; }
unsigned int X86_Disasm::x86_fp_reg(void) { return ia32_settings.id_fp_reg; }
unsigned int X86_Disasm::x86_ip_reg(void) { return ia32_settings.id_ip_reg; }
unsigned int X86_Disasm::x86_flag_reg(void) { return ia32_settings.id_flag_reg; }
/* wrapper function to hide the IA32 register fn */
void x86_reg_t::x86_reg_from_id( unsigned int _id) {
ia32_handle_register( this, _id );
return;
}

206
3rd_party/libdisasm/x86_operand_list.cpp vendored Normal file
View File

@ -0,0 +1,206 @@
#include <stdlib.h>
#include <cassert>
#include "libdis.h"
void x86_insn_t::x86_oplist_append( x86_oplist_t *op ) {
x86_oplist_t *list;
assert(this);
list = operands;
if (! list ) {
operand_count = 1;
/* Note that we have no way of knowing if this is an
* exlicit operand or not, since the caller fills
* the x86_op_t after we return. We increase the
* explicit count automatically, and ia32_insn_implicit_ops
* decrements it */
explicit_count = 1;
operands = op;
return;
}
/* get to end of list */
for ( ; list->next; list = list->next )
;
operand_count = operand_count + 1;
explicit_count = explicit_count + 1;
list->next = op;
return;
}
bool x86_insn_t::containsFlag(x86_eflags tofind, x86_flag_status in)
{
switch(tofind)
{
case insn_eflag_carry:
return (in & (insn_carry_set | insn_carry_or_zero_set | insn_carry_clear))!=0;
case insn_eflag_zero:
return (in & (insn_zero_set | insn_carry_or_zero_set |
insn_zero_set_or_sign_ne_oflow | insn_zero_clear))!=0;
case insn_eflag_overflow:
return (in & (insn_oflow_set | insn_zero_set_or_sign_ne_oflow |
insn_oflow_clear | insn_sign_eq_oflow |
insn_sign_ne_oflow))!=0;
case insn_eflag_direction:
return (in & (insn_dir_set | insn_dir_clear))!=0;
case insn_eflag_sign:
return (in & (insn_sign_set | insn_sign_clear | insn_zero_set_or_sign_ne_oflow |
insn_sign_eq_oflow | insn_sign_ne_oflow))!=0;
case insn_eflag_parity:
return (in & (insn_parity_set | insn_parity_clear))!=0;
}
return false;
}
x86_op_t * x86_insn_t::x86_operand_new( ) {
x86_oplist_t *op;
assert(this);
op = (x86_oplist_t *)calloc( sizeof(x86_oplist_t), 1 );
op->op.insn = this;
x86_oplist_append( op );
return( &(op->op) );
}
void x86_insn_t::x86_oplist_free( )
{
x86_oplist_t *op, *list;
assert(this);
for ( list = operands; list; ) {
op = list;
list = list->next;
free(op);
}
operands = NULL;
operand_count = 0;
explicit_count = 0;
return;
}
/* ================================================== LIBDISASM API */
/* these could probably just be #defines, but that means exposing the
enum... yet one more confusing thing in the API */
int x86_insn_t::x86_operand_foreach( x86_operand_fn func, void *arg,
enum x86_op_foreach_type type ){
x86_oplist_t *list;
char _explicit = 1, implicit = 1;
assert(this);
if ( ! func ) {
return 0;
}
/* note: explicit and implicit can be ORed together to
* allow an "all" limited by access type, even though the
* user is stupid to do this since it is default behavior :) */
if ( (type & op_explicit) && ! (type & op_implicit) ) {
implicit = 0;
}
if ( (type & op_implicit) && ! (type & op_explicit) ) {
_explicit = 0;
}
type = (x86_op_foreach_type)((int)type & 0x0F); /* mask out explicit/implicit operands */
for ( list = operands; list; list = list->next ) {
if (! implicit && (list->op.flags.op_implied) ) {
/* operand is implicit */
continue;
}
if (! _explicit && ! (list->op.flags.op_implied) ) {
/* operand is not implicit */
continue;
}
switch ( type ) {
case op_any:
break;
case op_dest:
if (! (list->op.access & op_write) ) {
continue;
}
break;
case op_src:
if (! (list->op.access & op_read) ) {
continue;
}
break;
case op_ro:
if (! (list->op.access & op_read) ||
(list->op.access & op_write ) ) {
continue;
}
break;
case op_wo:
if (! (list->op.access & op_write) ||
(list->op.access & op_read ) ) {
continue;
}
break;
case op_xo:
if (! (list->op.access & op_execute) ) {
continue;
}
break;
case op_rw:
if (! (list->op.access & op_write) ||
! (list->op.access & op_read ) ) {
continue;
}
break;
case op_implicit: case op_explicit: /* make gcc happy */
break;
}
/* any non-continue ends up here: invoke the callback */
(*func)( &list->op, this, arg );
}
return 1;
}
static void count_operand( x86_op_t *op, x86_insn_t *insn, void *arg ) {
size_t * count = (size_t *) arg;
*count = *count + 1;
}
size_t x86_insn_t::x86_operand_count( enum x86_op_foreach_type type ) {
size_t count = 0;
/* save us a list traversal for common counts... */
if ( type == op_any ) {
return operand_count;
} else if ( type == op_explicit ) {
return explicit_count;
}
x86_operand_foreach( count_operand, &count, type );
return count;
}
/* accessor functions */
x86_op_t * x86_insn_t::x86_operand_1st() {
if (! explicit_count ) {
return NULL;
}
return &(operands->op);
}
x86_op_t * x86_insn_t::x86_operand_2nd( ) {
if ( explicit_count < 2 ) {
return NULL;
}
return &(operands->next->op);
}
x86_op_t * x86_insn_t::x86_operand_3rd( ) {
if ( explicit_count < 3 ) {
return NULL;
}
return &(operands->next->next->op);
}

View File

@ -0,0 +1,8 @@
#ifndef X86_OPERAND_LIST_H
#define X86_OPERAND_LIST_H
#include "libdis.h"
//x86_op_t * x86_operand_new( x86_insn_t *insn );
#endif

View File

@ -9,7 +9,7 @@ IF(CMAKE_BUILD_TOOL MATCHES "(msdev|devenv|nmake)")
ADD_DEFINITIONS(/W4)
ELSE()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall --std=c++0x")
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}" ) #-D_GLIBCXX_DEBUG
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_GLIBCXX_DEBUG " ) #--coverage
ENDIF()
SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/CMakeScripts;${CMAKE_MODULE_PATH})
@ -20,8 +20,11 @@ IF(dcc_build_tests)
FIND_PACKAGE(GMock)
ENDIF()
ADD_SUBDIRECTORY(3rd_party)
llvm_map_components_to_libraries(REQ_LLVM_LIBRARIES jit native mc support)
INCLUDE_DIRECTORIES(
3rd_party/libdisasm
include
include/idioms
${Boost_INCLUDE_DIRS}
@ -59,6 +62,7 @@ set(dcc_SOURCES
src/perfhlib.cpp
src/procs.cpp
src/project.cpp
src/Procedure.cpp
src/proplong.cpp
src/reducible.cpp
src/scanner.cpp
@ -69,6 +73,7 @@ set(dcc_SOURCES
set(dcc_HEADERS
include/ast.h
include/bundle.h
include/BinaryImage.h
include/dcc.h
include/disassem.h
include/dosdcc.h
@ -101,7 +106,7 @@ SOURCE_GROUP(Source FILES ${dcc_SOURCES})
SOURCE_GROUP(Headers FILES ${dcc_HEADERS})
ADD_EXECUTABLE(dcc_original ${dcc_SOURCES} ${dcc_HEADERS})
TARGET_LINK_LIBRARIES(dcc_original ${REQ_LLVM_LIBRARIES})
TARGET_LINK_LIBRARIES(dcc_original disasm_s ${REQ_LLVM_LIBRARIES})
if(dcc_build_tests)
ADD_SUBDIRECTORY(src)
endif()

20
include/BinaryImage.h Normal file
View File

@ -0,0 +1,20 @@
#pragma once
#include <stdint.h>
struct PROG /* Loaded program image parameters */
{
int16_t initCS;
int16_t initIP; /* These are initial load values */
int16_t initSS; /* Probably not of great interest */
uint16_t initSP;
bool fCOM; /* Flag set if COM program (else EXE)*/
int cReloc; /* No. of relocation table entries */
uint32_t * relocTable; /* Ptr. to relocation table */
uint8_t * map; /* Memory bitmap ptr */
int cProcs; /* Number of procedures so far */
int offMain; /* The offset of the main() proc */
uint16_t segMain; /* The segment of the main() proc */
bool bSigs; /* True if signatures loaded */
int cbImage; /* Length of image in bytes */
uint8_t * Image; /* Allocated by loader to hold entire program image */
};

View File

@ -84,6 +84,7 @@ enum eLLFlags
JMP_ICODE =0x1000000, /* Jmp dest immed.op converted to icode index */
JX_LOOP =0x2000000, /* Cond jump is part of loop conditional exp */
REST_STK =0x4000000 /* Stack needs to be restored after CALL */
#define ICODEMASK 0x0FF00FF /* Masks off parser flags */
};
/* Types of icodes */
enum icodeType

View File

@ -61,24 +61,7 @@ typedef struct { /* Command line option flags */
extern OPTION option; /* Command line options */
struct PROG /* Loaded program image parameters */
{
int16_t initCS;
int16_t initIP; /* These are initial load values */
int16_t initSS; /* Probably not of great interest */
uint16_t initSP;
bool fCOM; /* Flag set if COM program (else EXE)*/
int cReloc; /* No. of relocation table entries */
uint32_t * relocTable; /* Ptr. to relocation table */
uint8_t * map; /* Memory bitmap ptr */
int cProcs; /* Number of procedures so far */
int offMain; /* The offset of the main() proc */
uint16_t segMain; /* The segment of the main() proc */
bool bSigs; /* True if signatures loaded */
int cbImage; /* Length of image in bytes */
uint8_t * Image; /* Allocated by loader to hold entire
* program image */
};
#include "BinaryImage.h"
extern PROG prog; /* Loaded program image parameters */
extern std::bitset<32> duReg[30]; /* def/use bits for registers */

View File

@ -3,6 +3,7 @@
#include <string>
#include <sstream>
#include <bitset>
/* Machine registers */
enum eReg
{
@ -42,11 +43,21 @@ enum eReg
INDEX_BX, // "bx"
LAST_REG
};
class SourceMachine
{
public:
// virtual bool physicalReg(eReg r)=0;
class Machine_X86
};
//class Machine_X86_Disassembler
//{
// void formatRM(std::ostringstream &p, uint32_t flg, const LLOperand &pm);
//};
class Machine_X86 : public SourceMachine
{
public:
Machine_X86();
virtual ~Machine_X86() {}
static const std::string &regName(eReg r);
static const std::string &opcodeName(unsigned r);
static const std::string &floatOpName(unsigned r);
@ -67,4 +78,5 @@ public:
static bool isMemOff(eReg r);
static bool isSubRegisterOf(eReg reg, eReg parent);
};

View File

@ -5,7 +5,9 @@
#include <list>
#include <llvm/ADT/ilist.h>
#include "symtab.h"
#include "BinaryImage.h"
struct Function;
struct SourceMachine;
struct CALL_GRAPH;
typedef llvm::iplist<Function> FunctionListType;
@ -20,6 +22,7 @@ struct Project
std::string m_fname;
FunctionListType pProcList;
CALL_GRAPH * callGraph; /* Pointer to the head of the call graph */
PROG prog; /* Loaded program image parameters */
Project() {}
// no copies
Project(const Project&) = delete;
@ -51,7 +54,6 @@ struct Project
return *this;
}
static Project *get();
public:
ilFunction funcIter(Function *to_find);
ilFunction findByEntry(uint32_t entry);
@ -65,6 +67,10 @@ public:
const std::string &symbolName(size_t idx);
const SYM &getSymByIdx(size_t idx) const;
static Project *get();
PROG * binary() {return &prog;}
SourceMachine *machine();
protected:
void writeGlobSymTable();
};

View File

@ -1,7 +1,12 @@
#pragma once
/* Scanner functions
* (C) Cristina Cifuentes, Jeff Ledermann
*/
//#define LH(p) ((int)((uint8_t *)(p))[0] + ((int)((uint8_t *)(p))[1] << 8))
#include <stdint.h>
#include "error.h"
/* Extracts reg bits from middle of mod-reg-rm uint8_t */
#define REG(x) ((uint8_t)(x & 0x38) >> 3)
struct ICODE;
extern eErrorId scan(uint32_t ip, ICODE &p);

View File

@ -29,6 +29,7 @@ typedef unsigned char boolT; /* 8 bits */
//#define LH(p) ((int16)((byte *)(p))[0] + ((int16)((byte *)(p))[1] << 8))
#define LH(p) ((word)((byte *)(p))[0] + ((word)((byte *)(p))[1] << 8))
/* Macro reads a LH word from the image regardless of host convention */
/* Returns a signed quantity, e.g. C000 is read into an Int as FFFFC000 */
#define LH_SIGNED(p) (((byte *)(p))[0] + (((char *)(p))[1] << 8))

View File

@ -1,6 +1,7 @@
#include "Procedure.h"
FunctionType *Function::getFunctionType() const
{
return &m_type;
}
#include "project.h"
#include "scanner.h"
//FunctionType *Function::getFunctionType() const
//{
// return &m_type;
//}