A few changes to libdisasm

This commit is contained in:
Artur K 2012-03-18 11:35:13 +01:00
parent 71fc8bfef7
commit fee4d6fe9a
5 changed files with 598 additions and 584 deletions

View File

@ -190,7 +190,7 @@ static struct {
/* REG_EIPMSR_INDEX : SYSENTER_EIP_MSR : 92 */
{ REG_DWORD_SIZE, reg_sys, 0, "eip_msr" },
{ 0 }
};
};
static size_t sz_regtable = NUM_X86_REGS + 1;

View File

@ -210,7 +210,7 @@ enum x86_op_datatype { /* these use Intel's lame terminology */
op_fpustate32 = 23, /* 108 byte FPU state (env & reg stack) */
op_fpregset = 24, /* 512 bytes: register set */
op_fpreg = 25, /* FPU register */
op_none = 0xFF, /* operand without a datatype (INVLPG) */
op_none = 0xFF /* operand without a datatype (INVLPG) */
};
enum x86_op_access { /* ORed together */
@ -275,24 +275,20 @@ struct x86_op_t{
unsigned char fpuenv[28];
/* offset from segment */
uint32_t offset;
/* ID of CPU register */
x86_reg_t reg;
/* offsets from current insn */
char relative_near;
x86_reg_t reg; /* ID of CPU register */
char relative_near; /* offsets from current insn */
int32_t relative_far;
/* segment:offset */
x86_absolute_t absolute;
/* effective address [expression] */
x86_ea_t expression;
x86_absolute_t absolute; /* segment:offset */
x86_ea_t expression; /* effective address [expression] */
} data;
/* this is needed to make formatting operands more sane */
void * insn; /* pointer to x86_insn_t owning operand */
size_t size()
{
return x86_operand_size();
return operand_size();
}
/* get size of operand data in bytes */
size_t x86_operand_size();
size_t operand_size();
/* format (sprintf) an operand into 'buf' using specified syntax */
int x86_format_operand(char *buf, int len, enum x86_asm_format format );
bool is_address( ) {
@ -302,9 +298,9 @@ struct x86_op_t{
return ( type == op_relative_near || type == op_relative_far );
}
char * format( enum x86_asm_format format );
x86_op_t * copy() {
x86_op_t * copy()
{
x86_op_t *op = (x86_op_t *) calloc( sizeof(x86_op_t), 1 );
if ( op ) {
memcpy( op, this, sizeof(x86_op_t) );
}
@ -524,6 +520,7 @@ enum x86_insn_prefix {
/* TODO: maybe provide insn_new/free(), and have disasm return new insn_t */
/* FOREACH types: these are used to limit the foreach results to
* operands which match a certain "type" (implicit or explicit)
* or which are accessed in certain ways (e.g. read or write). Note
@ -577,9 +574,9 @@ public:
/* information about the instruction */
uint32_t addr; /* load address */
uint32_t offset; /* offset into file/buffer */
enum x86_insn_group group; /* meta-type, e.g. INS_EXEC */
enum x86_insn_type type; /* type, e.g. INS_BRANCH */
enum x86_insn_note note; /* note, e.g. RING0 */
x86_insn_group group; /* meta-type, e.g. INS_EXEC */
x86_insn_type type; /* type, e.g. INS_BRANCH */
x86_insn_note note; /* note, e.g. RING0 */
unsigned char bytes[MAX_INSN_SIZE];
unsigned char size; /* size of insn in bytes */
/* 16/32-bit mode settings */
@ -607,58 +604,29 @@ public:
void *block; /* code block containing this insn */
void *function; /* function containing this insn */
int tag; /* tag the insn as seen/processed */
x86_op_t *x86_operand_new();
/* convenience routine: returns count of operands matching 'type' */
x86_op_t * x86_operand_new();
size_t x86_operand_count( enum x86_op_foreach_type type );
/* accessor functions for the operands */
x86_op_t * x86_operand_1st( );
x86_op_t * x86_operand_2nd( );
x86_op_t * x86_operand_3rd( );
/* Get Relative Offset: return as a sign-extended int32_t the near or far
* relative offset operand, or 0 if there is none. There can be only one
* relaive offset operand in an instruction. */
x86_op_t * get_dest();
int32_t x86_get_rel_offset( );
/* Get Branch Target: return the x86_op_t containing the target of
* a jump or call operand, or NULL if there is no branch target.
* Internally, a 'branch target' is defined as any operand with
* Execute Access set. There can be only one branch target per instruction. */
x86_op_t * x86_get_branch_target( );
/* Get Immediate: return the x86_op_t containing the immediate operand
* for this instruction, or NULL if there is no immediate operand. There
* can be only one immediate operand per instruction */
x86_op_t * x86_get_imm( );
/* Get Raw Immediate Data: returns a pointer to the immediate data encoded
* in the instruction. This is useful for large data types [>32 bits] currently
* not supported by libdisasm, or for determining if the disassembler
* screwed up the conversion of the immediate data. Note that 'imm' in this
* context refers to immediate data encoded at the end of an instruction as
* detailed in the Intel Manual Vol II Chapter 2; it does not refer to the
* 'op_imm' operand (the third operand in instructions like 'mul' */
unsigned char * x86_get_raw_imm( );
uint8_t * x86_get_raw_imm( );
/* More accessor fuctions, this time for user-defined info... */
/* set the address (usually RVA) of the insn */
void x86_set_insn_addr( uint32_t addr );
/* format (sprintf) an instruction mnemonic into 'buf' using specified syntax */
int x86_format_mnemonic( char *buf, int len, enum x86_asm_format format);
/* format (sprintf) an instruction into 'buf' using specified syntax;
* this includes formatting all operands */
int x86_format_insn( char *buf, int len, enum x86_asm_format);
/* free the operand list associated with an instruction -- useful for
* preventing memory leaks when free()ing an x86_insn_t */
void x86_oplist_free( );
/* returns 0 if an instruction is invalid, 1 if valid */
int x86_insn_is_valid( );
/* Get Address: return the value of an offset operand, or the offset of
* a segment:offset absolute address */
bool is_valid( );
uint32_t x86_get_address( );
void make_invalid(unsigned char *buf);
/* instruction tagging: these routines allow the programmer to mark
* instructions as "seen" in a DFS, for example. libdisasm does not use
* the tag field.*/
/* set insn->tag to 1 */
void x86_tag_insn( );
/* return insn->tag */
int x86_insn_is_tagged();
/* set insn->tag to 0 */
void x86_untag_insn();

View File

@ -1077,10 +1077,10 @@ static int format_att_mnemonic( x86_insn_t *insn, char *buf, int len) {
)) {
if ( insn->x86_operand_count( op_explicit ) > 0 &&
is_memory_op( insn->x86_operand_1st() ) ){
size = insn->x86_operand_1st()->x86_operand_size();
size = insn->x86_operand_1st()->operand_size();
} else if ( insn->x86_operand_count( op_explicit ) > 1 &&
is_memory_op( insn->x86_operand_2nd() ) ){
size = insn->x86_operand_2nd()->x86_operand_size();
size = insn->x86_operand_2nd()->operand_size();
}
}
@ -1094,6 +1094,7 @@ static int format_att_mnemonic( x86_insn_t *insn, char *buf, int len) {
return ( strlen( buf ) );
}
/** format (sprintf) an instruction mnemonic into 'buf' using specified syntax */
int x86_format_mnemonic(x86_insn_t *insn, char *buf, int len,
enum x86_asm_format format){
char str[MAX_OP_STRING];
@ -1325,6 +1326,8 @@ int X86_Disasm::x86_format_header( char *buf, int len, enum x86_asm_format forma
return( strlen(buf) );
}
/** format (sprintf) an instruction into 'buf' using specified syntax;
* this includes formatting all operands */
int x86_insn_t::x86_format_insn( char *buf, int len,
enum x86_asm_format format ){
char str[MAX_OP_STRING];

View File

@ -17,7 +17,8 @@ int x86_insn_is_valid( x86_insn_t *insn ) {
return 0;
}
int x86_insn_t::x86_insn_is_valid( )
/** \returns false if an instruction is invalid, true if valid */
bool x86_insn_t::is_valid( )
{
if ( this && this->type != insn_invalid && this->size > 0 )
{
@ -26,10 +27,15 @@ int x86_insn_t::x86_insn_is_valid( )
return 0;
}
uint32_t x86_insn_t::x86_get_address() {
/* Get Address: return the value of an offset operand, or the offset of
* a segment:offset absolute address */
uint32_t x86_insn_t::x86_get_address()
{
x86_oplist_t *op_lst;
assert(this);
if (! operands ) {
if (! operands )
{
return 0;
}
@ -48,6 +54,9 @@ uint32_t x86_insn_t::x86_get_address() {
return 0;
}
/** Get Relative Offset: return as a sign-extended int32_t the near or far
* relative offset operand, or 0 if there is none. There can be only one
* relaive offset operand in an instruction. */
int32_t x86_insn_t::x86_get_rel_offset( ) {
x86_oplist_t *op_lst;
assert(this);
@ -66,6 +75,10 @@ int32_t x86_insn_t::x86_get_rel_offset( ) {
return 0;
}
/** Get Branch Target: return the x86_op_t containing the target of
* a jump or call operand, or NULL if there is no branch target.
* Internally, a 'branch target' is defined as any operand with
* Execute Access set. There can be only one branch target per instruction. */
x86_op_t * x86_insn_t::x86_get_branch_target() {
x86_oplist_t *op_lst;
assert(this);
@ -81,6 +94,24 @@ x86_op_t * x86_insn_t::x86_get_branch_target() {
return NULL;
}
x86_op_t * x86_insn_t::get_dest() {
x86_oplist_t *op_lst;
assert(this);
if ( ! operands ) {
return NULL;
}
assert(this->x86_operand_count(op_dest)==1);
for (op_lst = operands; op_lst; op_lst = op_lst->next ) {
if ( op_lst->op.access & op_write)
return &(op_lst->op);
}
return NULL;
}
/** \brief Get Immediate: return the x86_op_t containing the immediate operand
for this instruction, or NULL if there is no immediate operand. There
can be only one immediate operand per instruction */
x86_op_t * x86_insn_t::x86_get_imm() {
x86_oplist_t *op_lst;
assert(this);
@ -101,9 +132,15 @@ x86_op_t * x86_insn_t::x86_get_imm() {
x->op.type == op_immediate && ! (x->op.flags.op_hardcode)
/* if there is an immediate value in the instruction, return a pointer to
* it */
unsigned char * x86_insn_t::x86_get_raw_imm() {
/** \brief if there is an immediate value in the instruction, return a pointer to it
* Get Raw Immediate Data: returns a pointer to the immediate data encoded
* in the instruction. This is useful for large data types [>32 bits] currently
* not supported by libdisasm, or for determining if the disassembler
* screwed up the conversion of the immediate data. Note that 'imm' in this
* context refers to immediate data encoded at the end of an instruction as
* detailed in the Intel Manual Vol II Chapter 2; it does not refer to the
* 'op_imm' operand (the third operand in instructions like 'mul' */
uint8_t *x86_insn_t::x86_get_raw_imm() {
int size, offset;
x86_op_t *op = NULL;
assert(this);
@ -128,13 +165,13 @@ unsigned char * x86_insn_t::x86_get_raw_imm() {
}
/* immediate data is at the end of the insn */
size = op->x86_operand_size();
size = op->operand_size();
offset = size - size;
return( &bytes[offset] );
}
size_t x86_op_t::x86_operand_size() {
size_t x86_op_t::operand_size() {
switch (datatype ) {
case op_byte: return 1;
case op_word: return 2;
@ -166,6 +203,7 @@ size_t x86_op_t::x86_operand_size() {
return(4); /* default size */
}
/** set the address (usually RVA) of the insn */
void x86_insn_t::x86_set_insn_addr( uint32_t _addr ) {
addr = _addr;
}
@ -182,6 +220,7 @@ void x86_insn_t::x86_set_insn_block( void * _block ){
block = _block;
}
/** set insn->tag to 1 */
void x86_insn_t::x86_tag_insn(){
tag = 1;
}
@ -190,6 +229,7 @@ void x86_insn_t::x86_untag_insn(){
tag = 0;
}
/** \return insn->tag */
int x86_insn_t::x86_insn_is_tagged(){
return tag;
}

View File

@ -64,6 +64,8 @@ x86_op_t * x86_insn_t::x86_operand_new( ) {
return( &(op->op) );
}
/** free the operand list associated with an instruction -- useful for
* preventing memory leaks when free()ing an x86_insn_t */
void x86_insn_t::x86_oplist_free( )
{
x86_oplist_t *op, *list;
@ -84,8 +86,8 @@ void x86_insn_t::x86_oplist_free( )
/* ================================================== LIBDISASM API */
/* these could probably just be #defines, but that means exposing the
enum... yet one more confusing thing in the API */
int x86_insn_t::x86_operand_foreach( x86_operand_fn func, void *arg,
enum x86_op_foreach_type type ){
int x86_insn_t::x86_operand_foreach( x86_operand_fn func, void *arg, enum x86_op_foreach_type type )
{
x86_oplist_t *list;
char _explicit = 1, implicit = 1;
assert(this);
@ -167,6 +169,7 @@ static void count_operand( x86_op_t *op, x86_insn_t *insn, void *arg ) {
*count = *count + 1;
}
/** convenience routine: returns count of operands matching 'type' */
size_t x86_insn_t::x86_operand_count( enum x86_op_foreach_type type ) {
size_t count = 0;