A few changes to libdisasm

This commit is contained in:
Artur K 2012-03-18 11:35:13 +01:00
parent 71fc8bfef7
commit fee4d6fe9a
5 changed files with 598 additions and 584 deletions

View File

@ -190,7 +190,7 @@ static struct {
/* REG_EIPMSR_INDEX : SYSENTER_EIP_MSR : 92 */ /* REG_EIPMSR_INDEX : SYSENTER_EIP_MSR : 92 */
{ REG_DWORD_SIZE, reg_sys, 0, "eip_msr" }, { REG_DWORD_SIZE, reg_sys, 0, "eip_msr" },
{ 0 } { 0 }
}; };
static size_t sz_regtable = NUM_X86_REGS + 1; static size_t sz_regtable = NUM_X86_REGS + 1;

View File

@ -210,7 +210,7 @@ enum x86_op_datatype { /* these use Intel's lame terminology */
op_fpustate32 = 23, /* 108 byte FPU state (env & reg stack) */ op_fpustate32 = 23, /* 108 byte FPU state (env & reg stack) */
op_fpregset = 24, /* 512 bytes: register set */ op_fpregset = 24, /* 512 bytes: register set */
op_fpreg = 25, /* FPU register */ op_fpreg = 25, /* FPU register */
op_none = 0xFF, /* operand without a datatype (INVLPG) */ op_none = 0xFF /* operand without a datatype (INVLPG) */
}; };
enum x86_op_access { /* ORed together */ enum x86_op_access { /* ORed together */
@ -275,24 +275,20 @@ struct x86_op_t{
unsigned char fpuenv[28]; unsigned char fpuenv[28];
/* offset from segment */ /* offset from segment */
uint32_t offset; uint32_t offset;
/* ID of CPU register */ x86_reg_t reg; /* ID of CPU register */
x86_reg_t reg; char relative_near; /* offsets from current insn */
/* offsets from current insn */
char relative_near;
int32_t relative_far; int32_t relative_far;
/* segment:offset */ x86_absolute_t absolute; /* segment:offset */
x86_absolute_t absolute; x86_ea_t expression; /* effective address [expression] */
/* effective address [expression] */
x86_ea_t expression;
} data; } data;
/* this is needed to make formatting operands more sane */ /* this is needed to make formatting operands more sane */
void * insn; /* pointer to x86_insn_t owning operand */ void * insn; /* pointer to x86_insn_t owning operand */
size_t size() size_t size()
{ {
return x86_operand_size(); return operand_size();
} }
/* get size of operand data in bytes */ /* get size of operand data in bytes */
size_t x86_operand_size(); size_t operand_size();
/* format (sprintf) an operand into 'buf' using specified syntax */ /* format (sprintf) an operand into 'buf' using specified syntax */
int x86_format_operand(char *buf, int len, enum x86_asm_format format ); int x86_format_operand(char *buf, int len, enum x86_asm_format format );
bool is_address( ) { bool is_address( ) {
@ -302,9 +298,9 @@ struct x86_op_t{
return ( type == op_relative_near || type == op_relative_far ); return ( type == op_relative_near || type == op_relative_far );
} }
char * format( enum x86_asm_format format ); char * format( enum x86_asm_format format );
x86_op_t * copy() { x86_op_t * copy()
{
x86_op_t *op = (x86_op_t *) calloc( sizeof(x86_op_t), 1 ); x86_op_t *op = (x86_op_t *) calloc( sizeof(x86_op_t), 1 );
if ( op ) { if ( op ) {
memcpy( op, this, sizeof(x86_op_t) ); memcpy( op, this, sizeof(x86_op_t) );
} }
@ -524,6 +520,7 @@ enum x86_insn_prefix {
/* TODO: maybe provide insn_new/free(), and have disasm return new insn_t */ /* TODO: maybe provide insn_new/free(), and have disasm return new insn_t */
/* FOREACH types: these are used to limit the foreach results to /* FOREACH types: these are used to limit the foreach results to
* operands which match a certain "type" (implicit or explicit) * operands which match a certain "type" (implicit or explicit)
* or which are accessed in certain ways (e.g. read or write). Note * or which are accessed in certain ways (e.g. read or write). Note
@ -577,9 +574,9 @@ public:
/* information about the instruction */ /* information about the instruction */
uint32_t addr; /* load address */ uint32_t addr; /* load address */
uint32_t offset; /* offset into file/buffer */ uint32_t offset; /* offset into file/buffer */
enum x86_insn_group group; /* meta-type, e.g. INS_EXEC */ x86_insn_group group; /* meta-type, e.g. INS_EXEC */
enum x86_insn_type type; /* type, e.g. INS_BRANCH */ x86_insn_type type; /* type, e.g. INS_BRANCH */
enum x86_insn_note note; /* note, e.g. RING0 */ x86_insn_note note; /* note, e.g. RING0 */
unsigned char bytes[MAX_INSN_SIZE]; unsigned char bytes[MAX_INSN_SIZE];
unsigned char size; /* size of insn in bytes */ unsigned char size; /* size of insn in bytes */
/* 16/32-bit mode settings */ /* 16/32-bit mode settings */
@ -607,58 +604,29 @@ public:
void *block; /* code block containing this insn */ void *block; /* code block containing this insn */
void *function; /* function containing this insn */ void *function; /* function containing this insn */
int tag; /* tag the insn as seen/processed */ int tag; /* tag the insn as seen/processed */
x86_op_t *x86_operand_new(); x86_op_t * x86_operand_new();
/* convenience routine: returns count of operands matching 'type' */
size_t x86_operand_count( enum x86_op_foreach_type type ); size_t x86_operand_count( enum x86_op_foreach_type type );
/* accessor functions for the operands */ /* accessor functions for the operands */
x86_op_t * x86_operand_1st( ); x86_op_t * x86_operand_1st( );
x86_op_t * x86_operand_2nd( ); x86_op_t * x86_operand_2nd( );
x86_op_t * x86_operand_3rd( ); x86_op_t * x86_operand_3rd( );
/* Get Relative Offset: return as a sign-extended int32_t the near or far x86_op_t * get_dest();
* relative offset operand, or 0 if there is none. There can be only one
* relaive offset operand in an instruction. */
int32_t x86_get_rel_offset( ); int32_t x86_get_rel_offset( );
/* Get Branch Target: return the x86_op_t containing the target of
* a jump or call operand, or NULL if there is no branch target.
* Internally, a 'branch target' is defined as any operand with
* Execute Access set. There can be only one branch target per instruction. */
x86_op_t * x86_get_branch_target( ); x86_op_t * x86_get_branch_target( );
/* Get Immediate: return the x86_op_t containing the immediate operand
* for this instruction, or NULL if there is no immediate operand. There
* can be only one immediate operand per instruction */
x86_op_t * x86_get_imm( ); x86_op_t * x86_get_imm( );
/* Get Raw Immediate Data: returns a pointer to the immediate data encoded uint8_t * x86_get_raw_imm( );
* in the instruction. This is useful for large data types [>32 bits] currently
* not supported by libdisasm, or for determining if the disassembler
* screwed up the conversion of the immediate data. Note that 'imm' in this
* context refers to immediate data encoded at the end of an instruction as
* detailed in the Intel Manual Vol II Chapter 2; it does not refer to the
* 'op_imm' operand (the third operand in instructions like 'mul' */
unsigned char * x86_get_raw_imm( );
/* More accessor fuctions, this time for user-defined info... */ /* More accessor fuctions, this time for user-defined info... */
/* set the address (usually RVA) of the insn */
void x86_set_insn_addr( uint32_t addr ); void x86_set_insn_addr( uint32_t addr );
/* format (sprintf) an instruction mnemonic into 'buf' using specified syntax */
int x86_format_mnemonic( char *buf, int len, enum x86_asm_format format); int x86_format_mnemonic( char *buf, int len, enum x86_asm_format format);
/* format (sprintf) an instruction into 'buf' using specified syntax;
* this includes formatting all operands */
int x86_format_insn( char *buf, int len, enum x86_asm_format); int x86_format_insn( char *buf, int len, enum x86_asm_format);
/* free the operand list associated with an instruction -- useful for
* preventing memory leaks when free()ing an x86_insn_t */
void x86_oplist_free( ); void x86_oplist_free( );
/* returns 0 if an instruction is invalid, 1 if valid */ bool is_valid( );
int x86_insn_is_valid( );
/* Get Address: return the value of an offset operand, or the offset of
* a segment:offset absolute address */
uint32_t x86_get_address( ); uint32_t x86_get_address( );
void make_invalid(unsigned char *buf); void make_invalid(unsigned char *buf);
/* instruction tagging: these routines allow the programmer to mark /* instruction tagging: these routines allow the programmer to mark
* instructions as "seen" in a DFS, for example. libdisasm does not use * instructions as "seen" in a DFS, for example. libdisasm does not use
* the tag field.*/ * the tag field.*/
/* set insn->tag to 1 */
void x86_tag_insn( ); void x86_tag_insn( );
/* return insn->tag */
int x86_insn_is_tagged(); int x86_insn_is_tagged();
/* set insn->tag to 0 */ /* set insn->tag to 0 */
void x86_untag_insn(); void x86_untag_insn();

View File

@ -1077,10 +1077,10 @@ static int format_att_mnemonic( x86_insn_t *insn, char *buf, int len) {
)) { )) {
if ( insn->x86_operand_count( op_explicit ) > 0 && if ( insn->x86_operand_count( op_explicit ) > 0 &&
is_memory_op( insn->x86_operand_1st() ) ){ is_memory_op( insn->x86_operand_1st() ) ){
size = insn->x86_operand_1st()->x86_operand_size(); size = insn->x86_operand_1st()->operand_size();
} else if ( insn->x86_operand_count( op_explicit ) > 1 && } else if ( insn->x86_operand_count( op_explicit ) > 1 &&
is_memory_op( insn->x86_operand_2nd() ) ){ is_memory_op( insn->x86_operand_2nd() ) ){
size = insn->x86_operand_2nd()->x86_operand_size(); size = insn->x86_operand_2nd()->operand_size();
} }
} }
@ -1094,6 +1094,7 @@ static int format_att_mnemonic( x86_insn_t *insn, char *buf, int len) {
return ( strlen( buf ) ); return ( strlen( buf ) );
} }
/** format (sprintf) an instruction mnemonic into 'buf' using specified syntax */
int x86_format_mnemonic(x86_insn_t *insn, char *buf, int len, int x86_format_mnemonic(x86_insn_t *insn, char *buf, int len,
enum x86_asm_format format){ enum x86_asm_format format){
char str[MAX_OP_STRING]; char str[MAX_OP_STRING];
@ -1325,6 +1326,8 @@ int X86_Disasm::x86_format_header( char *buf, int len, enum x86_asm_format forma
return( strlen(buf) ); return( strlen(buf) );
} }
/** format (sprintf) an instruction into 'buf' using specified syntax;
* this includes formatting all operands */
int x86_insn_t::x86_format_insn( char *buf, int len, int x86_insn_t::x86_format_insn( char *buf, int len,
enum x86_asm_format format ){ enum x86_asm_format format ){
char str[MAX_OP_STRING]; char str[MAX_OP_STRING];

View File

@ -17,7 +17,8 @@ int x86_insn_is_valid( x86_insn_t *insn ) {
return 0; return 0;
} }
int x86_insn_t::x86_insn_is_valid( ) /** \returns false if an instruction is invalid, true if valid */
bool x86_insn_t::is_valid( )
{ {
if ( this && this->type != insn_invalid && this->size > 0 ) if ( this && this->type != insn_invalid && this->size > 0 )
{ {
@ -26,10 +27,15 @@ int x86_insn_t::x86_insn_is_valid( )
return 0; return 0;
} }
uint32_t x86_insn_t::x86_get_address() { /* Get Address: return the value of an offset operand, or the offset of
* a segment:offset absolute address */
uint32_t x86_insn_t::x86_get_address()
{
x86_oplist_t *op_lst; x86_oplist_t *op_lst;
assert(this); assert(this);
if (! operands ) { if (! operands )
{
return 0; return 0;
} }
@ -48,6 +54,9 @@ uint32_t x86_insn_t::x86_get_address() {
return 0; return 0;
} }
/** Get Relative Offset: return as a sign-extended int32_t the near or far
* relative offset operand, or 0 if there is none. There can be only one
* relaive offset operand in an instruction. */
int32_t x86_insn_t::x86_get_rel_offset( ) { int32_t x86_insn_t::x86_get_rel_offset( ) {
x86_oplist_t *op_lst; x86_oplist_t *op_lst;
assert(this); assert(this);
@ -66,6 +75,10 @@ int32_t x86_insn_t::x86_get_rel_offset( ) {
return 0; return 0;
} }
/** Get Branch Target: return the x86_op_t containing the target of
* a jump or call operand, or NULL if there is no branch target.
* Internally, a 'branch target' is defined as any operand with
* Execute Access set. There can be only one branch target per instruction. */
x86_op_t * x86_insn_t::x86_get_branch_target() { x86_op_t * x86_insn_t::x86_get_branch_target() {
x86_oplist_t *op_lst; x86_oplist_t *op_lst;
assert(this); assert(this);
@ -81,6 +94,24 @@ x86_op_t * x86_insn_t::x86_get_branch_target() {
return NULL; return NULL;
} }
x86_op_t * x86_insn_t::get_dest() {
x86_oplist_t *op_lst;
assert(this);
if ( ! operands ) {
return NULL;
}
assert(this->x86_operand_count(op_dest)==1);
for (op_lst = operands; op_lst; op_lst = op_lst->next ) {
if ( op_lst->op.access & op_write)
return &(op_lst->op);
}
return NULL;
}
/** \brief Get Immediate: return the x86_op_t containing the immediate operand
for this instruction, or NULL if there is no immediate operand. There
can be only one immediate operand per instruction */
x86_op_t * x86_insn_t::x86_get_imm() { x86_op_t * x86_insn_t::x86_get_imm() {
x86_oplist_t *op_lst; x86_oplist_t *op_lst;
assert(this); assert(this);
@ -101,9 +132,15 @@ x86_op_t * x86_insn_t::x86_get_imm() {
x->op.type == op_immediate && ! (x->op.flags.op_hardcode) x->op.type == op_immediate && ! (x->op.flags.op_hardcode)
/* if there is an immediate value in the instruction, return a pointer to /** \brief if there is an immediate value in the instruction, return a pointer to it
* it */ * Get Raw Immediate Data: returns a pointer to the immediate data encoded
unsigned char * x86_insn_t::x86_get_raw_imm() { * in the instruction. This is useful for large data types [>32 bits] currently
* not supported by libdisasm, or for determining if the disassembler
* screwed up the conversion of the immediate data. Note that 'imm' in this
* context refers to immediate data encoded at the end of an instruction as
* detailed in the Intel Manual Vol II Chapter 2; it does not refer to the
* 'op_imm' operand (the third operand in instructions like 'mul' */
uint8_t *x86_insn_t::x86_get_raw_imm() {
int size, offset; int size, offset;
x86_op_t *op = NULL; x86_op_t *op = NULL;
assert(this); assert(this);
@ -128,13 +165,13 @@ unsigned char * x86_insn_t::x86_get_raw_imm() {
} }
/* immediate data is at the end of the insn */ /* immediate data is at the end of the insn */
size = op->x86_operand_size(); size = op->operand_size();
offset = size - size; offset = size - size;
return( &bytes[offset] ); return( &bytes[offset] );
} }
size_t x86_op_t::x86_operand_size() { size_t x86_op_t::operand_size() {
switch (datatype ) { switch (datatype ) {
case op_byte: return 1; case op_byte: return 1;
case op_word: return 2; case op_word: return 2;
@ -166,6 +203,7 @@ size_t x86_op_t::x86_operand_size() {
return(4); /* default size */ return(4); /* default size */
} }
/** set the address (usually RVA) of the insn */
void x86_insn_t::x86_set_insn_addr( uint32_t _addr ) { void x86_insn_t::x86_set_insn_addr( uint32_t _addr ) {
addr = _addr; addr = _addr;
} }
@ -182,6 +220,7 @@ void x86_insn_t::x86_set_insn_block( void * _block ){
block = _block; block = _block;
} }
/** set insn->tag to 1 */
void x86_insn_t::x86_tag_insn(){ void x86_insn_t::x86_tag_insn(){
tag = 1; tag = 1;
} }
@ -190,6 +229,7 @@ void x86_insn_t::x86_untag_insn(){
tag = 0; tag = 0;
} }
/** \return insn->tag */
int x86_insn_t::x86_insn_is_tagged(){ int x86_insn_t::x86_insn_is_tagged(){
return tag; return tag;
} }

View File

@ -64,6 +64,8 @@ x86_op_t * x86_insn_t::x86_operand_new( ) {
return( &(op->op) ); return( &(op->op) );
} }
/** free the operand list associated with an instruction -- useful for
* preventing memory leaks when free()ing an x86_insn_t */
void x86_insn_t::x86_oplist_free( ) void x86_insn_t::x86_oplist_free( )
{ {
x86_oplist_t *op, *list; x86_oplist_t *op, *list;
@ -84,8 +86,8 @@ void x86_insn_t::x86_oplist_free( )
/* ================================================== LIBDISASM API */ /* ================================================== LIBDISASM API */
/* these could probably just be #defines, but that means exposing the /* these could probably just be #defines, but that means exposing the
enum... yet one more confusing thing in the API */ enum... yet one more confusing thing in the API */
int x86_insn_t::x86_operand_foreach( x86_operand_fn func, void *arg, int x86_insn_t::x86_operand_foreach( x86_operand_fn func, void *arg, enum x86_op_foreach_type type )
enum x86_op_foreach_type type ){ {
x86_oplist_t *list; x86_oplist_t *list;
char _explicit = 1, implicit = 1; char _explicit = 1, implicit = 1;
assert(this); assert(this);
@ -167,6 +169,7 @@ static void count_operand( x86_op_t *op, x86_insn_t *insn, void *arg ) {
*count = *count + 1; *count = *count + 1;
} }
/** convenience routine: returns count of operands matching 'type' */
size_t x86_insn_t::x86_operand_count( enum x86_op_foreach_type type ) { size_t x86_insn_t::x86_operand_count( enum x86_op_foreach_type type ) {
size_t count = 0; size_t count = 0;