From d5e1fc733f8445182cf960b9f5381810d0d83432 Mon Sep 17 00:00:00 2001 From: Artur K Date: Thu, 19 Jul 2012 19:37:30 +0200 Subject: [PATCH] Fixes to libdisasm, also use it a bit more --- 3rd_party/libdisasm/ia32_operand.cpp | 43 +- 3rd_party/libdisasm/libdis.h | 719 ++++++++++++++------------- 3rd_party/libdisasm/x86_disasm.cpp | 2 +- CMakeLists.txt | 3 +- base_regression.sh | 2 +- full_regression.sh | 2 +- include/BinaryImage.h | 3 +- include/Enums.h | 1 + include/IdentType.h | 25 +- include/Procedure.h | 10 +- include/ast.h | 210 +++++--- include/bundle.h | 2 +- include/dcc.h | 9 +- include/error.h | 7 +- include/icode.h | 93 ++-- include/locident.h | 4 +- include/state.h | 4 + include/symtab.h | 4 +- regression_tester.rb | 1 + src/BasicBlock.cpp | 1 - src/Procedure.cpp | 6 +- src/ast.cpp | 501 +++++++------------ src/backend.cpp | 12 +- src/chklib.cpp | 74 +-- src/comwrite.cpp | 4 +- src/control.cpp | 6 +- src/dataflow.cpp | 205 ++++---- src/dcc.cpp | 98 +++- src/disassem.cpp | 2 +- src/frontend.cpp | 12 +- src/hlicode.cpp | 66 +-- src/hltype.cpp | 2 +- src/icode.cpp | 8 +- src/idioms/arith_idioms.cpp | 141 +++--- src/idioms/mov_idioms.cpp | 6 +- src/idioms/neg_idioms.cpp | 8 +- src/idioms/shift_idioms.cpp | 21 +- src/idioms/xor_idioms.cpp | 12 +- src/locident.cpp | 8 +- src/parser.cpp | 60 ++- src/procs.cpp | 107 ++-- src/proplong.cpp | 16 +- src/scanner.cpp | 113 +++-- tests/prev_base/DHAMP.b | 141 ++---- tests/prev_base/LONGOPS.b | 13 +- tests/prev_base/MATRIXMU.b | 7 +- 46 files changed, 1463 insertions(+), 1331 deletions(-) diff --git a/3rd_party/libdisasm/ia32_operand.cpp b/3rd_party/libdisasm/ia32_operand.cpp index 82798bd..a36787d 100644 --- a/3rd_party/libdisasm/ia32_operand.cpp +++ b/3rd_party/libdisasm/ia32_operand.cpp @@ -109,17 +109,15 @@ size_t Ia32_Decoder::decode_operand_value( unsigned char *buf, size_t buf_len, op->type = op_absolute; /* segment:offset address used in far calls */ - x86_imm_sized( buf, buf_len, - &op->data.absolute.segment, 2 ); if ( m_decoded->addr_size == 4 ) { - x86_imm_sized( buf, buf_len, - &op->data.absolute.offset.off32, 4 ); - size = 6; - } else { - x86_imm_sized( buf, buf_len, - &op->data.absolute.offset.off16, 2 ); + x86_imm_sized( buf, buf_len, &op->data.absolute.offset.off32, 4 ); size = 4; + } else { + x86_imm_sized( buf, buf_len,&op->data.absolute.offset.off16, 2 ); + size = 2; } + x86_imm_sized( buf+size, buf_len-size, &op->data.absolute.segment, 2 ); + size+=2; break; case ADDRMETH_I: /* Immediate val */ @@ -140,17 +138,24 @@ size_t Ia32_Decoder::decode_operand_value( unsigned char *buf, size_t buf_len, op->data.far_offset depending on the size of the operand */ op->flags.op_signed = true; - if ( op_size == 1 ) { - /* one-byte near offset */ - op->type = op_relative_near; - x86_imm_signsized(buf, buf_len, &op->data.relative_near, 1); - } else { - /* far offset...is this truly signed? */ - op->type = op_relative_far; - x86_imm_signsized(buf, buf_len, - &op->data.relative_far, op_size ); + switch(op_size) + { + case 1: + /* one-byte near offset */ + op->type = op_relative_near; + size = x86_imm_signsized(buf, buf_len, &op->data.relative_near, 1); + break; + case 2: + op->type = op_relative_far; + int16_t offset_val; + size = x86_imm_signsized(buf, buf_len,&offset_val, 2); + op->data.relative_far=offset_val; + break; + default: + assert(false); + size=0; + } - size = op_size; break; case ADDRMETH_O: /* No ModR/M; op is word/dword offset */ /* NOTE: these are actually RVAs not offsets to seg!! */ @@ -258,7 +263,7 @@ size_t Ia32_Decoder::decode_operand_size( unsigned int op_type, x86_op_t *op ) { * value s a 16:16 pointer or a 16:32 pointer, where * the first '16' is a segment */ size = (m_decoded->addr_size == 4) ? 6 : 4; - op->datatype = (size == 4) ? op_descr32 : op_descr16; + op->datatype = (size == 6) ? op_descr32 : op_descr16; break; case OPTYPE_b: /* byte, ignore op-size */ size = 1; diff --git a/3rd_party/libdisasm/libdis.h b/3rd_party/libdisasm/libdis.h index 10f66d4..a9d6f08 100644 --- a/3rd_party/libdisasm/libdis.h +++ b/3rd_party/libdisasm/libdis.h @@ -6,17 +6,17 @@ #endif #include #include +#include #include - /* 'NEW" types * __________________________________________________________________________*/ #ifndef LIBDISASM_QWORD_H /* do not interfere with qword.h */ - #define LIBDISASM_QWORD_H - #ifdef _MSC_VER - typedef __int64 qword_t; - #else - typedef int64_t qword_t; - #endif +#define LIBDISASM_QWORD_H +#ifdef _MSC_VER +typedef __int64 qword_t; +#else +typedef int64_t qword_t; +#endif #endif #include @@ -36,7 +36,7 @@ * this allows the report to recover from errors, or just log them. */ enum x86_report_codes { - report_disasm_bounds, /* RVA OUT OF BOUNDS : The disassembler could + report_disasm_bounds, /* RVA OUT OF BOUNDS : The disassembler could not disassemble the supplied RVA as it is out of the range of the buffer. The application should store the address and @@ -44,21 +44,21 @@ enum x86_report_codes { binary it is in, then disassemble the address from the bytes in that section. data: uint32_t rva */ - report_insn_bounds, /* INSTRUCTION OUT OF BOUNDS: The disassembler + report_insn_bounds, /* INSTRUCTION OUT OF BOUNDS: The disassembler could not disassemble the instruction as the instruction would require bytes beyond the end of the current buffer. This usually indicated garbage bytes at the end of a buffer, or an incorrectly-sized buffer. data: uint32_t rva */ - report_invalid_insn, /* INVALID INSTRUCTION: The disassembler could + report_invalid_insn, /* INVALID INSTRUCTION: The disassembler could not disassemble the instruction as it has an invalid combination of opcodes and operands. This will stop automated disassembly; the application can restart the disassembly after the invalid instruction. data: uint32_t rva */ - report_unknown + report_unknown }; /* Disassembly formats: * AT&T is standard AS/GAS-style: "mnemonic\tsrc, dest, imm" @@ -68,12 +68,12 @@ enum x86_report_codes { * Raw is addr|offset|size|bytes|prefix... see libdisasm_formats.7 */ enum x86_asm_format { - unknown_syntax = 0, /* never use! */ - native_syntax, /* header: 35 bytes */ - intel_syntax, /* header: 23 bytes */ - att_syntax, /* header: 23 bytes */ - xml_syntax, /* header: 679 bytes */ - raw_syntax /* header: 172 bytes */ + unknown_syntax = 0, /* never use! */ + native_syntax, /* header: 35 bytes */ + intel_syntax, /* header: 23 bytes */ + att_syntax, /* header: 23 bytes */ + xml_syntax, /* header: 679 bytes */ + raw_syntax /* header: 172 bytes */ }; /* 'arg' is optional arbitrary data provided by the code passing the @@ -86,10 +86,10 @@ typedef void (*DISASM_REPORTER)( enum x86_report_codes code, /* ========================================= Libdisasm Management Routines */ enum x86_options { /* these can be ORed together */ - opt_none= 0, - opt_ignore_nulls=1, /* ignore sequences of > 4 NULL bytes */ - opt_16_bit=2, /* 16-bit/DOS disassembly */ - opt_att_mnemonics=4 /* use AT&T syntax names for alternate opcode mnemonics */ + opt_none= 0, + opt_ignore_nulls=1, /* ignore sequences of > 4 NULL bytes */ + opt_16_bit=2, /* 16-bit/DOS disassembly */ + opt_att_mnemonics=4 /* use AT&T syntax names for alternate opcode mnemonics */ }; /* ========================================= Instruction Representation */ @@ -110,35 +110,35 @@ enum x86_options { /* these can be ORed together */ #define MAX_INSN_XML_STRING 4096 /* 2 * 8 * MAX_OP_XML_STRING */ enum x86_reg_type { /* NOTE: these may be ORed together */ - reg_undef = 0x00000, // used only in ia32_reg_table initializater - reg_gen = 0x00001, /* general purpose */ - reg_in = 0x00002, /* incoming args, ala RISC */ - reg_out = 0x00004, /* args to calls, ala RISC */ - reg_local = 0x00008, /* local vars, ala RISC */ - reg_fpu = 0x00010, /* FPU data register */ - reg_seg = 0x00020, /* segment register */ - reg_simd = 0x00040, /* SIMD/MMX reg */ - reg_sys = 0x00080, /* restricted/system register */ - reg_sp = 0x00100, /* stack pointer */ - reg_fp = 0x00200, /* frame pointer */ - reg_pc = 0x00400, /* program counter */ - reg_retaddr = 0x00800, /* return addr for func */ - reg_cond = 0x01000, /* condition code / flags */ - reg_zero = 0x02000, /* zero register, ala RISC */ - reg_ret = 0x04000, /* return value */ - reg_src = 0x10000, /* array/rep source */ - reg_dest = 0x20000, /* array/rep destination */ - reg_count = 0x40000 /* array/rep/loop counter */ + reg_undef = 0x00000, // used only in ia32_reg_table initializater + reg_gen = 0x00001, /* general purpose */ + reg_in = 0x00002, /* incoming args, ala RISC */ + reg_out = 0x00004, /* args to calls, ala RISC */ + reg_local = 0x00008, /* local vars, ala RISC */ + reg_fpu = 0x00010, /* FPU data register */ + reg_seg = 0x00020, /* segment register */ + reg_simd = 0x00040, /* SIMD/MMX reg */ + reg_sys = 0x00080, /* restricted/system register */ + reg_sp = 0x00100, /* stack pointer */ + reg_fp = 0x00200, /* frame pointer */ + reg_pc = 0x00400, /* program counter */ + reg_retaddr = 0x00800, /* return addr for func */ + reg_cond = 0x01000, /* condition code / flags */ + reg_zero = 0x02000, /* zero register, ala RISC */ + reg_ret = 0x04000, /* return value */ + reg_src = 0x10000, /* array/rep source */ + reg_dest = 0x20000, /* array/rep destination */ + reg_count = 0x40000 /* array/rep/loop counter */ }; /* x86_reg_t : an X86 CPU register */ struct x86_reg_t { - char name[MAX_REGNAME]; - enum x86_reg_type type; /* what register is used for */ - unsigned int size; /* size of register in bytes */ - unsigned int id; /* register ID #, for quick compares */ - unsigned int alias; /* ID of reg this is an alias for */ - unsigned int shift; /* amount to shift aliased reg by */ + char name[MAX_REGNAME]; + enum x86_reg_type type; /* what register is used for */ + unsigned int size; /* size of register in bytes */ + unsigned int id; /* register ID #, for quick compares */ + unsigned int alias; /* ID of reg this is an alias for */ + unsigned int shift; /* amount to shift aliased reg by */ x86_reg_t * aliased_reg( ) { x86_reg_t * reg = (x86_reg_t * )calloc( sizeof(x86_reg_t), 1 ); reg->x86_reg_from_id( id ); @@ -149,92 +149,92 @@ struct x86_reg_t { /* x86_ea_t : an X86 effective address (address expression) */ typedef struct { - unsigned int scale; /* scale factor */ - x86_reg_t index, base; /* index, base registers */ - int32_t disp; /* displacement */ - char disp_sign; /* is negative? 1/0 */ - char disp_size; /* 0, 1, 2, 4 */ + unsigned int scale; /* scale factor */ + x86_reg_t index, base; /* index, base registers */ + int32_t disp; /* displacement */ + char disp_sign; /* is negative? 1/0 */ + char disp_size; /* 0, 1, 2, 4 */ } x86_ea_t; /* x86_absolute_t : an X86 segment:offset address (descriptor) */ typedef struct { - unsigned short segment; /* loaded directly into CS */ - union { - unsigned short off16; /* loaded directly into IP */ - uint32_t off32; /* loaded directly into EIP */ - } offset; + unsigned short segment; /* loaded directly into CS */ + union { + unsigned short off16; /* loaded directly into IP */ + uint32_t off32; /* loaded directly into EIP */ + } offset; } x86_absolute_t; enum x86_op_type { /* mutually exclusive */ - op_unused = 0, /* empty/unused operand: should never occur */ - op_register = 1, /* CPU register */ - op_immediate = 2, /* Immediate Value */ - op_relative_near = 3, /* Relative offset from IP */ - op_relative_far = 4, /* Relative offset from IP */ - op_absolute = 5, /* Absolute address (ptr16:32) */ - op_expression = 6, /* Address expression (scale/index/base/disp) */ - op_offset = 7, /* Offset from start of segment (m32) */ - op_unknown + op_unused = 0, /* empty/unused operand: should never occur */ + op_register = 1, /* CPU register */ + op_immediate = 2, /* Immediate Value */ + op_relative_near = 3, /* Relative offset from IP */ + op_relative_far = 4, /* Relative offset from IP */ + op_absolute = 5, /* Absolute address (ptr16:32) */ + op_expression = 6, /* Address expression (scale/index/base/disp) */ + op_offset = 7, /* Offset from start of segment (m32) */ + op_unknown }; #define x86_optype_is_address( optype ) \ - ( optype == op_absolute || optype == op_offset ) + ( optype == op_absolute || optype == op_offset ) #define x86_optype_is_relative( optype ) \ - ( optype == op_relative_near || optype == op_relative_far ) + ( optype == op_relative_near || optype == op_relative_far ) #define x86_optype_is_memory( optype ) \ - ( optype > op_immediate && optype < op_unknown ) + ( optype > op_immediate && optype < op_unknown ) enum x86_op_datatype { /* these use Intel's lame terminology */ - op_byte = 1, /* 1 byte integer */ - op_word = 2, /* 2 byte integer */ - op_dword = 3, /* 4 byte integer */ - op_qword = 4, /* 8 byte integer */ - op_dqword = 5, /* 16 byte integer */ - op_sreal = 6, /* 4 byte real (single real) */ - op_dreal = 7, /* 8 byte real (double real) */ - op_extreal = 8, /* 10 byte real (extended real) */ - op_bcd = 9, /* 10 byte binary-coded decimal */ - op_ssimd = 10, /* 16 byte : 4 packed single FP (SIMD, MMX) */ - op_dsimd = 11, /* 16 byte : 2 packed double FP (SIMD, MMX) */ - op_sssimd = 12, /* 4 byte : scalar single FP (SIMD, MMX) */ - op_sdsimd = 13, /* 8 byte : scalar double FP (SIMD, MMX) */ - op_descr32 = 14, /* 6 byte Intel descriptor 2:4 */ - op_descr16 = 15, /* 4 byte Intel descriptor 2:2 */ - op_pdescr32 = 16, /* 6 byte Intel pseudo-descriptor 32:16 */ - op_pdescr16 = 17, /* 6 byte Intel pseudo-descriptor 8:24:16 */ - op_bounds16 = 18, /* signed 16:16 lower:upper bounds */ - op_bounds32 = 19, /* signed 32:32 lower:upper bounds */ - op_fpuenv16 = 20, /* 14 byte FPU control/environment data */ - op_fpuenv32 = 21, /* 28 byte FPU control/environment data */ - op_fpustate16 = 22, /* 94 byte FPU state (env & reg stack) */ - op_fpustate32 = 23, /* 108 byte FPU state (env & reg stack) */ - op_fpregset = 24, /* 512 bytes: register set */ - op_fpreg = 25, /* FPU register */ - op_none = 0xFF /* operand without a datatype (INVLPG) */ + op_byte = 1, /* 1 byte integer */ + op_word = 2, /* 2 byte integer */ + op_dword = 3, /* 4 byte integer */ + op_qword = 4, /* 8 byte integer */ + op_dqword = 5, /* 16 byte integer */ + op_sreal = 6, /* 4 byte real (single real) */ + op_dreal = 7, /* 8 byte real (double real) */ + op_extreal = 8, /* 10 byte real (extended real) */ + op_bcd = 9, /* 10 byte binary-coded decimal */ + op_ssimd = 10, /* 16 byte : 4 packed single FP (SIMD, MMX) */ + op_dsimd = 11, /* 16 byte : 2 packed double FP (SIMD, MMX) */ + op_sssimd = 12, /* 4 byte : scalar single FP (SIMD, MMX) */ + op_sdsimd = 13, /* 8 byte : scalar double FP (SIMD, MMX) */ + op_descr32 = 14, /* 6 byte Intel descriptor 2:4 */ + op_descr16 = 15, /* 4 byte Intel descriptor 2:2 */ + op_pdescr32 = 16, /* 6 byte Intel pseudo-descriptor 32:16 */ + op_pdescr16 = 17, /* 6 byte Intel pseudo-descriptor 8:24:16 */ + op_bounds16 = 18, /* signed 16:16 lower:upper bounds */ + op_bounds32 = 19, /* signed 32:32 lower:upper bounds */ + op_fpuenv16 = 20, /* 14 byte FPU control/environment data */ + op_fpuenv32 = 21, /* 28 byte FPU control/environment data */ + op_fpustate16 = 22, /* 94 byte FPU state (env & reg stack) */ + op_fpustate32 = 23, /* 108 byte FPU state (env & reg stack) */ + op_fpregset = 24, /* 512 bytes: register set */ + op_fpreg = 25, /* FPU register */ + op_none = 0xFF /* operand without a datatype (INVLPG) */ }; enum x86_op_access { /* ORed together */ - op_read = 1, - op_write = 2, - op_execute = 4 + op_read = 1, + op_write = 2, + op_execute = 4 }; struct x86_op_flags { /* ORed together, but segs are mutually exclusive */ union { unsigned int op_signed:1, /* signed integer */ - op_string:1,// = 2, /* possible string or array */ - op_constant:1,// = 4, /* symbolic constant */ - op_pointer:1,// = 8, /* operand points to a memory address */ - op_sysref:1,// = 0x010, /* operand is a syscall number */ - op_implied:1,// = 0x020, /* operand is implicit in the insn */ - op_hardcode:1,// = 0x40, /* operand is hardcoded in insn definition */ - /* NOTE: an 'implied' operand is one which can be considered a side - * effect of the insn, e.g. %esp being modified by PUSH or POP. A - * 'hard-coded' operand is one which is specified in the instruction - * definition, e.g. %es:%edi in MOVSB or 1 in ROL Eb, 1. The difference - * is that hard-coded operands are printed by disassemblers and are - * required to re-assemble, while implicit operands are invisible. */ - op_seg : 3; + op_string:1,// = 2, /* possible string or array */ + op_constant:1,// = 4, /* symbolic constant */ + op_pointer:1,// = 8, /* operand points to a memory address */ + op_sysref:1,// = 0x010, /* operand is a syscall number */ + op_implied:1,// = 0x020, /* operand is implicit in the insn */ + op_hardcode:1,// = 0x40, /* operand is hardcoded in insn definition */ + /* NOTE: an 'implied' operand is one which can be considered a side + * effect of the insn, e.g. %esp being modified by PUSH or POP. A + * 'hard-coded' operand is one which is specified in the instruction + * definition, e.g. %es:%edi in MOVSB or 1 in ROL Eb, 1. The difference + * is that hard-coded operands are printed by disassemblers and are + * required to re-assemble, while implicit operands are invisible. */ + op_seg : 3; unsigned int whole; }; enum { @@ -250,43 +250,43 @@ struct x86_op_flags { /* ORed together, but segs are mutually exclusive */ /* x86_op_t : an X86 instruction operand */ struct x86_op_t{ friend struct x86_insn_t; - enum x86_op_type type; /* operand type */ - enum x86_op_datatype datatype; /* operand size */ - enum x86_op_access access; /* operand access [RWX] */ - x86_op_flags flags; /* misc flags */ - union { - /* sizeof will have to work on these union members! */ - /* immediate values */ - char sbyte; - short sword; - int32_t sdword; - qword_t sqword; - unsigned char byte; - unsigned short word; - uint32_t dword; - qword_t qword; - float sreal; - double dreal; - /* misc large/non-native types */ - unsigned char extreal[10]; - unsigned char bcd[10]; - qword_t dqword[2]; - unsigned char simd[16]; - unsigned char fpuenv[28]; - /* offset from segment */ - uint32_t offset; - /* ID of CPU register */ - x86_reg_t reg; - /* offsets from current insn */ - char relative_near; - int32_t relative_far; - /* segment:offset */ - x86_absolute_t absolute; - /* effective address [expression] */ - x86_ea_t expression; - } data; - /* this is needed to make formatting operands more sane */ - void * insn; /* pointer to x86_insn_t owning operand */ + enum x86_op_type type; /* operand type */ + enum x86_op_datatype datatype; /* operand size */ + enum x86_op_access access; /* operand access [RWX] */ + x86_op_flags flags; /* misc flags */ + union { + /* sizeof will have to work on these union members! */ + /* immediate values */ + char sbyte; + short sword; + int32_t sdword; + qword_t sqword; + unsigned char byte; + unsigned short word; + uint32_t dword; + qword_t qword; + float sreal; + double dreal; + /* misc large/non-native types */ + unsigned char extreal[10]; + unsigned char bcd[10]; + qword_t dqword[2]; + unsigned char simd[16]; + unsigned char fpuenv[28]; + /* offset from segment */ + uint32_t offset; + /* ID of CPU register */ + x86_reg_t reg; + /* offsets from current insn */ + char relative_near; + int32_t relative_far; + /* segment:offset */ + x86_absolute_t absolute; + /* effective address [expression] */ + x86_ea_t expression; + } data; + /* this is needed to make formatting operands more sane */ + void * insn; /* pointer to x86_insn_t owning operand */ size_t size() { return operand_size(); @@ -301,6 +301,29 @@ struct x86_op_t{ bool is_relative( ) { return ( type == op_relative_near || type == op_relative_far ); } + int32_t getAddress() + { + assert(is_address()||is_relative()); + switch(type) + { + case op_absolute: + { + if(datatype==op_descr16) + return (int32_t(data.absolute.segment)<<4) + data.absolute.offset.off16; + else + return (int32_t(data.absolute.segment)<<4) + data.absolute.offset.off32; + } + case op_offset: + return data.offset; + case op_relative_near: + return data.relative_near; + case op_relative_far: + return data.relative_far; + default: + assert(false); + return ~0; + } + } char * format( enum x86_asm_format format ); x86_op_t * copy() { @@ -319,119 +342,119 @@ private: * list in an insn. Users wishing to add operands to this list, e.g. to add * implicit operands, should use x86_operand_new in x86_operand_list.h */ struct x86_oplist_t { - x86_op_t op; - struct x86_oplist_t *next; + x86_op_t op; + struct x86_oplist_t *next; }; enum x86_insn_type { - insn_invalid = 0, /* invalid instruction */ - /* insn_controlflow */ - insn_jmp = 0x1001, - insn_jcc = 0x1002, - insn_call = 0x1003, - insn_callcc = 0x1004, - insn_return = 0x1005, - /* insn_arithmetic */ - insn_add = 0x2001, - insn_sub = 0x2002, - insn_mul = 0x2003, - insn_div = 0x2004, - insn_inc = 0x2005, - insn_dec = 0x2006, - insn_shl = 0x2007, - insn_shr = 0x2008, - insn_rol = 0x2009, - insn_ror = 0x200A, - /* insn_logic */ - insn_and = 0x3001, - insn_or = 0x3002, - insn_xor = 0x3003, - insn_not = 0x3004, - insn_neg = 0x3005, - /* insn_stack */ - insn_push = 0x4001, - insn_pop = 0x4002, - insn_pushregs = 0x4003, - insn_popregs = 0x4004, - insn_pushflags = 0x4005, - insn_popflags = 0x4006, - insn_enter = 0x4007, - insn_leave = 0x4008, - /* insn_comparison */ - insn_test = 0x5001, - insn_cmp = 0x5002, - /* insn_move */ - insn_mov = 0x6001, /* move */ - insn_movcc = 0x6002, /* conditional move */ - insn_xchg = 0x6003, /* exchange */ - insn_xchgcc = 0x6004, /* conditional exchange */ - /* insn_string */ - insn_strcmp = 0x7001, - insn_strload = 0x7002, - insn_strmov = 0x7003, - insn_strstore = 0x7004, - insn_translate = 0x7005, /* xlat */ - /* insn_bit_manip */ - insn_bittest = 0x8001, - insn_bitset = 0x8002, - insn_bitclear = 0x8003, - /* insn_flag_manip */ - insn_clear_carry = 0x9001, - insn_clear_zero = 0x9002, - insn_clear_oflow = 0x9003, - insn_clear_dir = 0x9004, - insn_clear_sign = 0x9005, - insn_clear_parity = 0x9006, - insn_set_carry = 0x9007, - insn_set_zero = 0x9008, - insn_set_oflow = 0x9009, - insn_set_dir = 0x900A, - insn_set_sign = 0x900B, - insn_set_parity = 0x900C, - insn_tog_carry = 0x9010, - insn_tog_zero = 0x9020, - insn_tog_oflow = 0x9030, - insn_tog_dir = 0x9040, - insn_tog_sign = 0x9050, - insn_tog_parity = 0x9060, - /* insn_fpu */ - insn_fmov = 0xA001, - insn_fmovcc = 0xA002, - insn_fneg = 0xA003, - insn_fabs = 0xA004, - insn_fadd = 0xA005, - insn_fsub = 0xA006, - insn_fmul = 0xA007, - insn_fdiv = 0xA008, - insn_fsqrt = 0xA009, - insn_fcmp = 0xA00A, - insn_fcos = 0xA00C, - insn_fldpi = 0xA00D, - insn_fldz = 0xA00E, - insn_ftan = 0xA00F, - insn_fsine = 0xA010, - insn_fsys = 0xA020, - /* insn_interrupt */ - insn_int = 0xD001, - insn_intcc = 0xD002, /* not present in x86 ISA */ - insn_iret = 0xD003, - insn_bound = 0xD004, - insn_debug = 0xD005, - insn_trace = 0xD006, - insn_invalid_op = 0xD007, - insn_oflow = 0xD008, - /* insn_system */ - insn_halt = 0xE001, - insn_in = 0xE002, /* input from port/bus */ - insn_out = 0xE003, /* output to port/bus */ - insn_cpuid = 0xE004, - insn_lmsw = 0xE005, - insn_smsw = 0xE006, - insn_clts = 0xE007, - /* insn_other */ - insn_nop = 0xF001, - insn_bcdconv = 0xF002, /* convert to or from BCD */ - insn_szconv = 0xF003 /* change size of operand */ + insn_invalid = 0, /* invalid instruction */ + /* insn_controlflow */ + insn_jmp = 0x1001, + insn_jcc = 0x1002, + insn_call = 0x1003, + insn_callcc = 0x1004, + insn_return = 0x1005, + /* insn_arithmetic */ + insn_add = 0x2001, + insn_sub = 0x2002, + insn_mul = 0x2003, + insn_div = 0x2004, + insn_inc = 0x2005, + insn_dec = 0x2006, + insn_shl = 0x2007, + insn_shr = 0x2008, + insn_rol = 0x2009, + insn_ror = 0x200A, + /* insn_logic */ + insn_and = 0x3001, + insn_or = 0x3002, + insn_xor = 0x3003, + insn_not = 0x3004, + insn_neg = 0x3005, + /* insn_stack */ + insn_push = 0x4001, + insn_pop = 0x4002, + insn_pushregs = 0x4003, + insn_popregs = 0x4004, + insn_pushflags = 0x4005, + insn_popflags = 0x4006, + insn_enter = 0x4007, + insn_leave = 0x4008, + /* insn_comparison */ + insn_test = 0x5001, + insn_cmp = 0x5002, + /* insn_move */ + insn_mov = 0x6001, /* move */ + insn_movcc = 0x6002, /* conditional move */ + insn_xchg = 0x6003, /* exchange */ + insn_xchgcc = 0x6004, /* conditional exchange */ + /* insn_string */ + insn_strcmp = 0x7001, + insn_strload = 0x7002, + insn_strmov = 0x7003, + insn_strstore = 0x7004, + insn_translate = 0x7005, /* xlat */ + /* insn_bit_manip */ + insn_bittest = 0x8001, + insn_bitset = 0x8002, + insn_bitclear = 0x8003, + /* insn_flag_manip */ + insn_clear_carry = 0x9001, + insn_clear_zero = 0x9002, + insn_clear_oflow = 0x9003, + insn_clear_dir = 0x9004, + insn_clear_sign = 0x9005, + insn_clear_parity = 0x9006, + insn_set_carry = 0x9007, + insn_set_zero = 0x9008, + insn_set_oflow = 0x9009, + insn_set_dir = 0x900A, + insn_set_sign = 0x900B, + insn_set_parity = 0x900C, + insn_tog_carry = 0x9010, + insn_tog_zero = 0x9020, + insn_tog_oflow = 0x9030, + insn_tog_dir = 0x9040, + insn_tog_sign = 0x9050, + insn_tog_parity = 0x9060, + /* insn_fpu */ + insn_fmov = 0xA001, + insn_fmovcc = 0xA002, + insn_fneg = 0xA003, + insn_fabs = 0xA004, + insn_fadd = 0xA005, + insn_fsub = 0xA006, + insn_fmul = 0xA007, + insn_fdiv = 0xA008, + insn_fsqrt = 0xA009, + insn_fcmp = 0xA00A, + insn_fcos = 0xA00C, + insn_fldpi = 0xA00D, + insn_fldz = 0xA00E, + insn_ftan = 0xA00F, + insn_fsine = 0xA010, + insn_fsys = 0xA020, + /* insn_interrupt */ + insn_int = 0xD001, + insn_intcc = 0xD002, /* not present in x86 ISA */ + insn_iret = 0xD003, + insn_bound = 0xD004, + insn_debug = 0xD005, + insn_trace = 0xD006, + insn_invalid_op = 0xD007, + insn_oflow = 0xD008, + /* insn_system */ + insn_halt = 0xE001, + insn_in = 0xE002, /* input from port/bus */ + insn_out = 0xE003, /* output to port/bus */ + insn_cpuid = 0xE004, + insn_lmsw = 0xE005, + insn_smsw = 0xE006, + insn_clts = 0xE007, + /* insn_other */ + insn_nop = 0xF001, + insn_bcdconv = 0xF002, /* convert to or from BCD */ + insn_szconv = 0xF003 /* change size of operand */ }; /* These flags specify special characteristics of the instruction, such as @@ -440,11 +463,11 @@ enum x86_insn_type { * NOTE : These may not be accurate for all instructions; updates to the * opcode tables have not been completed. */ enum x86_insn_note { - insn_note_ring0 = 1, /* Only available in ring 0 */ - insn_note_smm = 2, /* "" in System Management Mode */ - insn_note_serial = 4, /* Serializing instruction */ - insn_note_nonswap = 8, /* Does not swap arguments in att-style formatting */ - insn_note_nosuffix = 16 /* Does not have size suffix in att-style formatting */ + insn_note_ring0 = 1, /* Only available in ring 0 */ + insn_note_smm = 2, /* "" in System Management Mode */ + insn_note_serial = 4, /* Serializing instruction */ + insn_note_nonswap = 8, /* Does not swap arguments in att-style formatting */ + insn_note_nosuffix = 16 /* Does not have size suffix in att-style formatting */ }; /* This specifies what effects the instruction has on the %eflags register */ @@ -458,22 +481,22 @@ enum x86_eflags insn_eflag_parity }; enum x86_flag_status { - insn_carry_set = 0x1, /* CF */ - insn_zero_set = 0x2, /* ZF */ - insn_oflow_set = 0x4, /* OF */ - insn_dir_set = 0x8, /* DF */ - insn_sign_set = 0x10, /* SF */ - insn_parity_set = 0x20, /* PF */ - insn_carry_or_zero_set = 0x40, - insn_zero_set_or_sign_ne_oflow = 0x80, - insn_carry_clear = 0x100, - insn_zero_clear = 0x200, - insn_oflow_clear = 0x400, - insn_dir_clear = 0x800, - insn_sign_clear = 0x1000, - insn_parity_clear = 0x2000, - insn_sign_eq_oflow = 0x4000, - insn_sign_ne_oflow = 0x8000 + insn_carry_set = 0x1, /* CF */ + insn_zero_set = 0x2, /* ZF */ + insn_oflow_set = 0x4, /* OF */ + insn_dir_set = 0x8, /* DF */ + insn_sign_set = 0x10, /* SF */ + insn_parity_set = 0x20, /* PF */ + insn_carry_or_zero_set = 0x40, + insn_zero_set_or_sign_ne_oflow = 0x80, + insn_carry_clear = 0x100, + insn_zero_clear = 0x200, + insn_oflow_clear = 0x400, + insn_dir_clear = 0x800, + insn_sign_clear = 0x1000, + insn_parity_clear = 0x2000, + insn_sign_eq_oflow = 0x4000, + insn_sign_ne_oflow = 0x8000 }; /* The CPU model in which the insturction first appeared; this can be used @@ -482,19 +505,19 @@ enum x86_flag_status { * NOTE : These may not be accurate for all instructions; updates to the * opcode tables have not been completed. */ enum x86_insn_cpu { - cpu_8086 = 1, /* Intel */ - cpu_80286 = 2, - cpu_80386 = 3, - cpu_80387 = 4, - cpu_80486 = 5, - cpu_pentium = 6, - cpu_pentiumpro = 7, - cpu_pentium2 = 8, - cpu_pentium3 = 9, - cpu_pentium4 = 10, - cpu_k6 = 16, /* AMD */ - cpu_k7 = 32, - cpu_athlon = 48 + cpu_8086 = 1, /* Intel */ + cpu_80286 = 2, + cpu_80386 = 3, + cpu_80387 = 4, + cpu_80486 = 5, + cpu_pentium = 6, + cpu_pentiumpro = 7, + cpu_pentium2 = 8, + cpu_pentium3 = 9, + cpu_pentium4 = 10, + cpu_k6 = 16, /* AMD */ + cpu_k7 = 32, + cpu_athlon = 48 }; /* CPU ISA subsets: These are derived from the Instruction Groups in @@ -505,22 +528,22 @@ enum x86_insn_cpu { * NOTE : These may not be accurate for all instructions; updates to the * opcode tables have not been completed. */ enum x86_insn_isa { - isa_gp = 1, /* general purpose */ - isa_fp = 2, /* floating point */ - isa_fpumgt = 3, /* FPU/SIMD management */ - isa_mmx = 4, /* Intel MMX */ - isa_sse1 = 5, /* Intel SSE SIMD */ - isa_sse2 = 6, /* Intel SSE2 SIMD */ - isa_sse3 = 7, /* Intel SSE3 SIMD */ - isa_3dnow = 8, /* AMD 3DNow! SIMD */ - isa_sys = 9 /* system instructions */ + isa_gp = 1, /* general purpose */ + isa_fp = 2, /* floating point */ + isa_fpumgt = 3, /* FPU/SIMD management */ + isa_mmx = 4, /* Intel MMX */ + isa_sse1 = 5, /* Intel SSE SIMD */ + isa_sse2 = 6, /* Intel SSE2 SIMD */ + isa_sse3 = 7, /* Intel SSE3 SIMD */ + isa_3dnow = 8, /* AMD 3DNow! SIMD */ + isa_sys = 9 /* system instructions */ }; enum x86_insn_prefix { - insn_no_prefix = 0, - insn_rep_zero = 1, /* REPZ and REPE */ - insn_rep_notzero = 2, /* REPNZ and REPNZ */ - insn_lock = 4 /* LOCK: */ + insn_no_prefix = 0, + insn_rep_zero = 1, /* REPZ and REPE */ + insn_rep_notzero = 2, /* REPNZ and REPNZ */ + insn_lock = 4 /* LOCK: */ }; @@ -535,15 +558,15 @@ enum x86_insn_prefix { * The "type" (implicit or explicit) and the access method can * be ORed together, e.g. op_wo | op_explicit */ enum x86_op_foreach_type { - op_any = 0, /* ALL operands (explicit, implicit, rwx) */ - op_dest = 1, /* operands with Write access */ - op_src = 2, /* operands with Read access */ - op_ro = 3, /* operands with Read but not Write access */ - op_wo = 4, /* operands with Write but not Read access */ - op_xo = 5, /* operands with Execute access */ - op_rw = 6, /* operands with Read AND Write access */ - op_implicit = 0x10, /* operands that are implied by the opcode */ - op_explicit = 0x20 /* operands that are not side-effects */ + op_any = 0, /* ALL operands (explicit, implicit, rwx) */ + op_dest = 1, /* operands with Write access */ + op_src = 2, /* operands with Read access */ + op_ro = 3, /* operands with Read but not Write access */ + op_wo = 4, /* operands with Write but not Read access */ + op_xo = 5, /* operands with Execute access */ + op_rw = 6, /* operands with Read AND Write access */ + op_implicit = 0x10, /* operands that are implied by the opcode */ + op_explicit = 0x20 /* operands that are not side-effects */ }; /* Operand FOREACH callback: 'arg' is an abritrary parameter passed to the @@ -684,9 +707,9 @@ public: * (buf, buf_len, buf_rva, offset, len, insn, func, arg, resolve_func) * ...but of course all of these are not used at the same time. */ - class X86_Disasm - { - public: +class X86_Disasm +{ +public: /* Function prototype for caller-supplied callback routine * These callbacks are intended to process 'insn' further, e.g. by * adding it to a linked list, database, etc */ @@ -701,28 +724,28 @@ public: * should return -1; in all other cases the RVA to be disassembled next * should be returned. */ typedef int32_t (*DISASM_RESOLVER)( x86_op_t *op, x86_insn_t * current_insn, - void *arg ); - protected: - DISASM_REPORTER __x86_reporter_func; - void * __x86_reporter_arg; - Ia32_Decoder m_decoder; + void *arg ); +protected: + DISASM_REPORTER __x86_reporter_func; + void * __x86_reporter_arg; + Ia32_Decoder m_decoder; - public: - X86_Disasm( x86_options options=opt_none,DISASM_REPORTER reporter=0, void *arg=0 ) : - __x86_reporter_func(reporter), - __x86_reporter_arg(arg) { - x86_init( options,reporter,arg); - } - /* management routines */ - /* 'arg' is caller-specific data which is passed as the first argument +public: + X86_Disasm( x86_options options=opt_none,DISASM_REPORTER reporter=0, void *arg=0 ) : + __x86_reporter_func(reporter), + __x86_reporter_arg(arg) { + x86_init( options,reporter,arg); + } + /* management routines */ + /* 'arg' is caller-specific data which is passed as the first argument * to the reporter callback routine */ - int x86_init( x86_options options, DISASM_REPORTER reporter, void *arg); - void x86_set_reporter( DISASM_REPORTER reporter, void *arg); - void x86_set_options( x86_options options ); - x86_options x86_get_options( void ); - int x86_cleanup(void); + int x86_init( x86_options options, DISASM_REPORTER reporter, void *arg); + void x86_set_reporter( DISASM_REPORTER reporter, void *arg); + void x86_set_options( x86_options options ); + x86_options x86_get_options( void ); + int x86_cleanup(void); - /* x86_disasm: Disassemble a single instruction from a buffer of bytes. + /* x86_disasm: Disassemble a single instruction from a buffer of bytes. * Returns size of instruction in bytes. * Caller is responsible for calling x86_oplist_free() on * a reused "insn" to avoid leaking memory when calling this @@ -733,9 +756,9 @@ public: * offset : Offset in buffer to disassemble * insn : Structure to fill with disassembled instruction */ - unsigned int x86_disasm( unsigned char *buf, unsigned int buf_len, - uint32_t buf_rva, unsigned int offset, - x86_insn_t * insn ); + unsigned int x86_disasm(const unsigned char *buf, unsigned int buf_len, + uint32_t buf_rva, unsigned int offset, + x86_insn_t * insn ); /* x86_disasm_range: Sequential disassembly of a range of bytes in a buffer, * invoking a callback function each time an instruction * is successfully disassembled. The 'range' refers to the @@ -794,7 +817,7 @@ public: unsigned int x86_ip_reg(void); unsigned int x86_flag_reg(void); - }; +}; /* Instruction operands: these are stored as a list of explicit and * implicit operands. It is recommended that the 'foreach' routines @@ -843,24 +866,24 @@ public: #define X86_WILDCARD_BYTE 0xF4 struct x86_invariant_op_t{ - enum x86_op_type type; /* operand type */ - enum x86_op_datatype datatype; /* operand size */ - enum x86_op_access access; /* operand access [RWX] */ - x86_op_flags flags; /* misc flags */ + enum x86_op_type type; /* operand type */ + enum x86_op_datatype datatype; /* operand size */ + enum x86_op_access access; /* operand access [RWX] */ + x86_op_flags flags; /* misc flags */ }; struct x86_invariant_t { - unsigned char bytes[64]; /* invariant representation */ - unsigned int size; /* number of bytes in insn */ + unsigned char bytes[64]; /* invariant representation */ + unsigned int size; /* number of bytes in insn */ x86_insn_t::x86_insn_group group; /* meta-type, e.g. INS_EXEC */ - enum x86_insn_type type; /* type, e.g. INS_BRANCH */ - x86_invariant_op_t operands[3]; /* operands: dest, src, imm */ + enum x86_insn_type type; /* type, e.g. INS_BRANCH */ + x86_invariant_op_t operands[3]; /* operands: dest, src, imm */ } ; /* return a version of the instruction with the variant bytes masked out */ size_t x86_invariant_disasm( unsigned char *buf, int buf_len, - x86_invariant_t *inv ); + x86_invariant_t *inv ); /* return the size in bytes of the intruction pointed to by 'buf'; * this used x86_invariant_disasm since it faster than x86_disasm */ size_t x86_size_disasm( unsigned char *buf, unsigned int buf_len ); diff --git a/3rd_party/libdisasm/x86_disasm.cpp b/3rd_party/libdisasm/x86_disasm.cpp index 883cdb3..f083349 100644 --- a/3rd_party/libdisasm/x86_disasm.cpp +++ b/3rd_party/libdisasm/x86_disasm.cpp @@ -21,7 +21,7 @@ void x86_insn_t::make_invalid(unsigned char *buf) type = insn_invalid; memcpy( bytes, buf, 1 ); } -unsigned int X86_Disasm::x86_disasm( unsigned char *buf, unsigned int buf_len, +unsigned int X86_Disasm::x86_disasm( const unsigned char *buf, unsigned int buf_len, uint32_t buf_rva, unsigned int offset, x86_insn_t *insn ){ int len, size; diff --git a/CMakeLists.txt b/CMakeLists.txt index e0e028f..a454609 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,7 +25,7 @@ ENDIF() ADD_SUBDIRECTORY(3rd_party) -llvm_map_components_to_libraries(REQ_LLVM_LIBRARIES jit native mc support) +llvm_map_components_to_libraries(REQ_LLVM_LIBRARIES jit native mc support tablegen) INCLUDE_DIRECTORIES( 3rd_party/libdisasm include @@ -50,6 +50,7 @@ set(dcc_LIB_SOURCES src/hltype.cpp src/machine_x86.cpp src/icode.cpp + src/RegisterNode src/idioms.cpp src/idioms/idiom1.cpp src/idioms/arith_idioms.cpp diff --git a/base_regression.sh b/base_regression.sh index 5065e25..410afca 100755 --- a/base_regression.sh +++ b/base_regression.sh @@ -3,4 +3,4 @@ #make -j5 #cd .. ./test_use_base.sh -./regression_tester.rb ./dcc_original -s -c 2>stderr >stdout; diff tests/prev/ tests/outputs/ +./regression_tester.rb ./dcc_original -s -c 2>stderr >stdout; diff -wB tests/prev/ tests/outputs/ diff --git a/full_regression.sh b/full_regression.sh index d7e6324..9b8f1c5 100755 --- a/full_regression.sh +++ b/full_regression.sh @@ -1,3 +1,3 @@ #!/bin/bash ./test_use_all.sh -./regression_tester.rb ./dcc_original -s -c 2>stderr >stdout; diff tests/prev/ tests/outputs/ +./regression_tester.rb ./dcc_original -s -c 2>stderr >stdout; diff -wB tests/prev/ tests/outputs/ diff --git a/include/BinaryImage.h b/include/BinaryImage.h index dcd3fb9..ec35af2 100644 --- a/include/BinaryImage.h +++ b/include/BinaryImage.h @@ -15,6 +15,7 @@ struct PROG /* Loaded program image parameters */ uint16_t segMain; /* The segment of the main() proc */ bool bSigs; /* True if signatures loaded */ int cbImage; /* Length of image in bytes */ - uint8_t * Image; /* Allocated by loader to hold entire program image */ + const uint8_t *image() const {return Imagez;} + uint8_t * Imagez; /* Allocated by loader to hold entire program image */ }; diff --git a/include/Enums.h b/include/Enums.h index e2d3507..d8a2ddc 100644 --- a/include/Enums.h +++ b/include/Enums.h @@ -8,6 +8,7 @@ enum regType }; enum condId { + UNDEF=0, GLOB_VAR, /* global variable */ REGISTER, /* register */ LOCAL_VAR, /* negative disp */ diff --git a/include/IdentType.h b/include/IdentType.h index b743711..7578695 100644 --- a/include/IdentType.h +++ b/include/IdentType.h @@ -2,32 +2,29 @@ #include "ast.h" #include "types.h" #include "machine_x86.h" - +struct GlobalVariable; +struct AstIdent; struct IDENTTYPE { + friend struct GlobalVariable; + friend struct Constant; + friend struct AstIdent; +protected: condId idType; - regType regiType; /* for REGISTER only */ +public: + condId type() {return idType;} + void type(condId t) {idType=t;} union _idNode { - int regiIdx; /* index into localId, REGISTER */ - int globIdx; /* index into symtab for GLOB_VAR */ int localIdx; /* idx into localId, LOCAL_VAR */ int paramIdx; /* idx into args symtab, PARAMS */ - int idxGlbIdx; /* idx into localId, GLOB_VAR_IDX */ - struct _kte - { /* for CONSTANT only */ - uint32_t kte; /* value of the constant */ - uint8_t size; /* #bytes size constant */ - } kte; uint32_t strIdx; /* idx into image, for STRING */ int longIdx; /* idx into LOCAL_ID table, LONG_VAR*/ - struct _call { /* for FUNCTION only */ - Function *proc; - STKFRAME *args; - } call; struct { /* for OTHER; tmp struct */ eReg seg; /* segment */ eReg regi; /* index mode */ int16_t off; /* offset */ } other; } idNode; + IDENTTYPE() : idType(UNDEF) + {} }; diff --git a/include/Procedure.h b/include/Procedure.h index fb196a0..ae96d80 100644 --- a/include/Procedure.h +++ b/include/Procedure.h @@ -9,7 +9,7 @@ #include "StackFrame.h" /* PROCEDURE NODE */ struct CALL_GRAPH; -struct COND_EXPR; +struct Expr; struct Disassembler; struct Function; struct CALL_GRAPH; @@ -74,8 +74,8 @@ struct FunctionType }; struct Assignment { - COND_EXPR *lhs; - COND_EXPR *rhs; + Expr *lhs; + Expr *rhs; }; struct JumpTable { @@ -184,9 +184,11 @@ public: void displayCFG(); void displayStats(); - void processHliCall(COND_EXPR *exp, iICODE picode); + void processHliCall(Expr *exp, iICODE picode); void preprocessReturnDU(LivenessSet &_liveOut); + Expr * adjustActArgType(Expr *_exp, hlType forType); + std::string writeCall(Function *tproc, STKFRAME &args, int *numLoc); protected: void extractJumpTableRange(ICODE& pIcode, STATE *pstate, JumpTable &table); bool followAllTableEntries(JumpTable &table, uint32_t cs, ICODE &pIcode, CALL_GRAPH *pcallGraph, STATE *pstate); diff --git a/include/ast.h b/include/ast.h index a96260f..acd4e6a 100644 --- a/include/ast.h +++ b/include/ast.h @@ -5,6 +5,7 @@ * (C) Cristina Cifuentes */ #pragma once +#include #include #include #include @@ -34,37 +35,37 @@ typedef boost::iterator_range rICODE; #include "IdentType.h" /* Expression data type */ -struct COND_EXPR +struct Expr { public: condNodeType m_type; /* Conditional Expression Node Type */ public: - static bool insertSubTreeLongReg(COND_EXPR *exp, COND_EXPR *&tree, int longIdx); - static bool insertSubTreeReg(COND_EXPR *&tree, COND_EXPR *_expr, eReg regi, const LOCAL_ID *locsym); - static bool insertSubTreeReg(AstIdent *&tree, COND_EXPR *_expr, eReg regi, const LOCAL_ID *locsym); + static bool insertSubTreeLongReg(Expr *exp, Expr *&tree, int longIdx); + static bool insertSubTreeReg(Expr *&tree, Expr *_expr, eReg regi, const LOCAL_ID *locsym); + static bool insertSubTreeReg(AstIdent *&tree, Expr *_expr, eReg regi, const LOCAL_ID *locsym); public: - virtual COND_EXPR *clone() const; - void release(); - COND_EXPR(condNodeType t=UNKNOWN_OP) : m_type(t) + + virtual Expr *clone() const=0; //!< Makes a deep copy of the given expression + Expr(condNodeType t=UNKNOWN_OP) : m_type(t) { } - virtual ~COND_EXPR(); + virtual ~Expr(); public: virtual std::string walkCondExpr (Function * pProc, int* numLoc) const=0; - virtual COND_EXPR *inverse() const=0; // return new COND_EXPR that is invarse of this + virtual Expr *inverse() const=0; // return new COND_EXPR that is invarse of this virtual bool xClear(rICODE range_to_check, iICODE lastBBinst, const LOCAL_ID &locId)=0; - virtual COND_EXPR *insertSubTreeReg(COND_EXPR *_expr, eReg regi, const LOCAL_ID *locsym)=0; - virtual COND_EXPR *insertSubTreeLongReg(COND_EXPR *_expr, int longIdx)=0; - virtual hlType expType(Function *pproc) const; + virtual Expr *insertSubTreeReg(Expr *_expr, eReg regi, const LOCAL_ID *locsym)=0; + virtual Expr *insertSubTreeLongReg(Expr *_expr, int longIdx)=0; + virtual hlType expType(Function *pproc) const=0; virtual int hlTypeSize(Function *pproc) const=0; - virtual void performLongRemoval(eReg regi, LOCAL_ID *locId) {} + virtual Expr * performLongRemoval(eReg regi, LOCAL_ID *locId) { return this; } }; -struct UnaryOperator : public COND_EXPR +struct UnaryOperator : public Expr { - UnaryOperator(condNodeType t=UNKNOWN_OP) : COND_EXPR(t),unaryExp(nullptr) {} - COND_EXPR *unaryExp; - virtual COND_EXPR *inverse() const + UnaryOperator(condNodeType t=UNKNOWN_OP) : Expr(t),unaryExp(nullptr) {} + Expr *unaryExp; + virtual Expr *inverse() const { if (m_type == NEGATION) //TODO: memleak here { @@ -72,14 +73,14 @@ struct UnaryOperator : public COND_EXPR } return this->clone(); } - virtual COND_EXPR *clone() const + virtual Expr *clone() const { UnaryOperator *newExp = new UnaryOperator(*this); newExp->unaryExp = unaryExp->clone(); return newExp; } virtual bool xClear(rICODE range_to_check, iICODE lastBBinst, const LOCAL_ID &locs); - static UnaryOperator *Create(condNodeType t, COND_EXPR *sub_expr) + static UnaryOperator *Create(condNodeType t, Expr *sub_expr) { UnaryOperator *newExp = new UnaryOperator(); newExp->m_type = t; @@ -94,22 +95,22 @@ struct UnaryOperator : public COND_EXPR public: int hlTypeSize(Function *pproc) const; virtual std::string walkCondExpr(Function *pProc, int *numLoc) const; - virtual COND_EXPR *insertSubTreeReg(COND_EXPR *_expr, eReg regi, const LOCAL_ID *locsym); + virtual Expr *insertSubTreeReg(Expr *_expr, eReg regi, const LOCAL_ID *locsym); virtual hlType expType(Function *pproc) const; - virtual COND_EXPR *insertSubTreeLongReg(COND_EXPR *_expr, int longIdx); + virtual Expr *insertSubTreeLongReg(Expr *_expr, int longIdx); }; -struct BinaryOperator : public COND_EXPR +struct BinaryOperator : public Expr { condOp m_op; - COND_EXPR *m_lhs; - COND_EXPR *m_rhs; - BinaryOperator(condOp o) : COND_EXPR(BOOLEAN_OP) + Expr *m_lhs; + Expr *m_rhs; + BinaryOperator(condOp o) : Expr(BOOLEAN_OP) { m_op = o; m_lhs=m_rhs=nullptr; } - BinaryOperator(condOp o,COND_EXPR *l,COND_EXPR *r) : COND_EXPR(BOOLEAN_OP) + BinaryOperator(condOp o,Expr *l,Expr *r) : Expr(BOOLEAN_OP) { m_op = o; m_lhs=l; @@ -121,51 +122,51 @@ struct BinaryOperator : public COND_EXPR delete m_lhs; delete m_rhs; } - static BinaryOperator *Create(condOp o,COND_EXPR *l,COND_EXPR *r) + static BinaryOperator *Create(condOp o,Expr *l,Expr *r) { BinaryOperator *res = new BinaryOperator(o); res->m_lhs = l; res->m_rhs = r; return res; } - static BinaryOperator *LogicAnd(COND_EXPR *l,COND_EXPR *r) + static BinaryOperator *LogicAnd(Expr *l,Expr *r) { return new BinaryOperator(DBL_AND,l,r); } - static BinaryOperator *And(COND_EXPR *l,COND_EXPR *r) + static BinaryOperator *And(Expr *l,Expr *r) { return new BinaryOperator(AND,l,r); } - static BinaryOperator *Or(COND_EXPR *l,COND_EXPR *r) + static BinaryOperator *Or(Expr *l,Expr *r) { return new BinaryOperator(OR,l,r); } - static BinaryOperator *LogicOr(COND_EXPR *l,COND_EXPR *r) + static BinaryOperator *LogicOr(Expr *l,Expr *r) { return new BinaryOperator(DBL_OR,l,r); } - static BinaryOperator *CreateAdd(COND_EXPR *l,COND_EXPR *r); + static BinaryOperator *CreateAdd(Expr *l,Expr *r); void changeBoolOp(condOp newOp); - virtual COND_EXPR *inverse() const; - virtual COND_EXPR *clone() const; + virtual Expr *inverse() const; + virtual Expr *clone() const; virtual bool xClear(rICODE range_to_check, iICODE lastBBinst, const LOCAL_ID &locs); - virtual COND_EXPR *insertSubTreeReg(COND_EXPR *_expr, eReg regi, const LOCAL_ID *locsym); - virtual COND_EXPR *insertSubTreeLongReg(COND_EXPR *_expr, int longIdx); - const COND_EXPR *lhs() const + virtual Expr *insertSubTreeReg(Expr *_expr, eReg regi, const LOCAL_ID *locsym); + virtual Expr *insertSubTreeLongReg(Expr *_expr, int longIdx); + const Expr *lhs() const { - return const_cast(const_cast(this)->lhs()); + return const_cast(const_cast(this)->lhs()); } - const COND_EXPR *rhs() const + const Expr *rhs() const { - return const_cast(const_cast(this)->rhs()); + return const_cast(const_cast(this)->rhs()); } - COND_EXPR *lhs() + Expr *lhs() { assert(m_type==BOOLEAN_OP); return m_lhs; } - COND_EXPR *rhs() + Expr *rhs() { assert(m_type==BOOLEAN_OP); return m_rhs; @@ -182,31 +183,27 @@ struct AstIdent : public UnaryOperator { AstIdent() : UnaryOperator(IDENTIFIER) { - memset(&ident,0,sizeof(ident)); } - virtual COND_EXPR *clone() const + IDENTTYPE ident; /* for IDENTIFIER */ + static AstIdent * Loc(int off, LOCAL_ID *localId); + static AstIdent * LongIdx(int idx); + static AstIdent * String(uint32_t idx); + static AstIdent * Other(eReg seg, eReg regi, int16_t off); + static AstIdent * Param(int off, const STKFRAME *argSymtab); + static AstIdent * Long(LOCAL_ID *localId, opLoc sd, iICODE pIcode, hlFirst f, iICODE ix, operDu du, LLInst &atOffset); + static AstIdent * idID(const ID *retVal, LOCAL_ID *locsym, iICODE ix_); + static Expr * id(const LLInst &ll_insn, opLoc sd, Function *pProc, iICODE ix_, ICODE &duIcode, operDu du); + + virtual Expr *clone() const { return new AstIdent(*this); } - IDENTTYPE ident; /* for IDENTIFIER */ - static AstIdent * RegIdx(int idx, regType reg_type); - static AstIdent * Kte(uint32_t kte, uint8_t size); - static AstIdent * Loc(int off, LOCAL_ID *localId); - static AstIdent * Reg(eReg regi, uint32_t icodeFlg, LOCAL_ID *locsym); - static AstIdent * LongIdx(int idx); - static AstIdent * Other(eReg seg, eReg regi, int16_t off); - static AstIdent * idParam(int off, const STKFRAME *argSymtab); - static AstIdent * idLong(LOCAL_ID *localId, opLoc sd, iICODE pIcode, hlFirst f, iICODE ix, operDu du, LLInst &atOffset); - static AstIdent * idFunc(Function *pproc, STKFRAME *args); - static AstIdent * idID(const ID *retVal, LOCAL_ID *locsym, iICODE ix_); - static COND_EXPR * id(const LLInst &ll_insn, opLoc sd, Function *pProc, iICODE ix_, ICODE &duIcode, operDu du); - virtual int hlTypeSize(Function *pproc) const; virtual hlType expType(Function *pproc) const; - virtual void performLongRemoval(eReg regi, LOCAL_ID *locId); + virtual Expr * performLongRemoval(eReg regi, LOCAL_ID *locId); virtual std::string walkCondExpr(Function *pProc, int *numLoc) const; - virtual COND_EXPR *insertSubTreeReg(COND_EXPR *_expr, eReg regi, const LOCAL_ID *locsym); - virtual COND_EXPR *insertSubTreeLongReg(COND_EXPR *_expr, int longIdx); + virtual Expr *insertSubTreeReg(Expr *_expr, eReg regi, const LOCAL_ID *locsym); + virtual Expr *insertSubTreeLongReg(Expr *_expr, int longIdx); virtual bool xClear(rICODE range_to_check, iICODE lastBBinst, const LOCAL_ID &locId); protected: eReg otherLongRegi (eReg regi, int idx, LOCAL_ID *locTbl); @@ -214,7 +211,94 @@ protected: }; struct GlobalVariable : public AstIdent { - static AstIdent *Create(int16_t segValue, int16_t off); + bool valid; + int globIdx; + virtual Expr *clone() const + { + return new GlobalVariable(*this); + } + GlobalVariable(int16_t segValue, int16_t off); + std::string walkCondExpr(Function *pProc, int *numLoc) const; + int hlTypeSize(Function *pproc) const; + hlType expType(Function *pproc) const; +}; +struct GlobalVariableIdx : public AstIdent +{ + bool valid; + int idxGlbIdx; /* idx into localId, GLOB_VAR_IDX */ + + virtual Expr *clone() const + { + return new GlobalVariableIdx(*this); + } + GlobalVariableIdx(int16_t segValue, int16_t off, uint8_t regi, const LOCAL_ID *locSym); + std::string walkCondExpr(Function *pProc, int *numLoc) const; + int hlTypeSize(Function *pproc) const; + hlType expType(Function *pproc) const; +}; +struct Constant : public AstIdent +{ + struct _kte + { /* for CONSTANT only */ + uint32_t kte; /* value of the constant */ + uint8_t size; /* #bytes size constant */ + } kte; + + Constant(uint32_t _kte, uint8_t size) + { + ident.idType = CONSTANT; + kte.kte = _kte; + kte.size = size; + } + virtual Expr *clone() const + { + return new Constant(*this); + } + std::string walkCondExpr(Function *pProc, int *numLoc) const; + int hlTypeSize(Function *pproc) const; + hlType expType(Function *pproc) const; +}; +struct FuncNode : public AstIdent +{ + struct _call { /* for FUNCTION only */ + Function *proc; + STKFRAME *args; + } call; + + FuncNode(Function *pproc, STKFRAME *args) + { + call.proc = pproc; + call.args = args; + } + virtual Expr *clone() const + { + return new FuncNode(*this); + } + std::string walkCondExpr(Function *pProc, int *numLoc) const; + int hlTypeSize(Function *pproc) const; + hlType expType(Function *pproc) const; +}; +struct RegisterNode : public AstIdent +{ + regType regiType; /* for REGISTER only */ + int regiIdx; /* index into localId, REGISTER */ + + virtual Expr *insertSubTreeReg(Expr *_expr, eReg regi, const LOCAL_ID *locsym); + + RegisterNode(int idx, regType reg_type) + { + ident.type(REGISTER); + regiType = reg_type; + regiIdx = idx; + } + + RegisterNode(eReg regi, uint32_t icodeFlg, LOCAL_ID *locsym); + virtual Expr *clone() const + { + return new RegisterNode(*this); + } + std::string walkCondExpr(Function *pProc, int *numLoc) const; + int hlTypeSize(Function *) const; + hlType expType(Function *pproc) const; + bool xClear(rICODE range_to_check, iICODE lastBBinst, const LOCAL_ID &locId); }; -struct Constant : public COND_EXPR -{}; diff --git a/include/bundle.h b/include/bundle.h index fd09809..4c866db 100644 --- a/include/bundle.h +++ b/include/bundle.h @@ -33,7 +33,7 @@ public: int current_indent; }; - +extern bundle cCode; #define lineSize 360 /* 3 lines in the mean time */ //void newBundle (bundle *procCode); diff --git a/include/dcc.h b/include/dcc.h index 7f21d55..93c5b99 100644 --- a/include/dcc.h +++ b/include/dcc.h @@ -37,9 +37,6 @@ public: bool insertCallGraph(Function *caller, ilFunction callee); void insertArc(ilFunction newProc); }; -//#define NUM_PROCS_DELTA 5 /* delta # procs a proc invokes */ -//extern std::list pProcList; -//extern FunctionListType pProcList; //extern CALL_GRAPH * callGraph; /* Pointer to the head of the call graph */ extern bundle cCode; /* Output C procedure's declaration and code */ @@ -110,11 +107,11 @@ void udm(void); /* udm.c */ void freeCFG(BB * cfg); /* graph.c */ BB * newBB(BB *, int, int, uint8_t, int, Function *); /* graph.c */ void BackEnd(char *filename, CALL_GRAPH *); /* backend.c */ -char *cChar(uint8_t c); /* backend.c */ +extern char *cChar(uint8_t c); /* backend.c */ eErrorId scan(uint32_t ip, ICODE &p); /* scanner.c */ void parse (CALL_GRAPH * *); /* parser.c */ -int strSize (uint8_t *, char); /* parser.c */ +extern int strSize (const uint8_t *, char); /* parser.c */ //void disassem(int pass, Function * pProc); /* disassem.c */ void interactDis(Function *, int initIC); /* disassem.c */ bool JmpInst(llIcode opcode); /* idioms.c */ @@ -126,10 +123,8 @@ bool LibCheck(Function &p); /* chklib.c */ /* Exported functions from procs.c */ boolT insertCallGraph (CALL_GRAPH *, ilFunction, ilFunction); -void adjustActArgType (COND_EXPR *, hlType, Function *); /* Exported functions from hlicode.c */ -std::string writeCall (Function *, STKFRAME &, Function *, int *); char *writeJcond (const HLTYPE &, Function *, int *); char *writeJcondInv (HLTYPE, Function *, int *); diff --git a/include/error.h b/include/error.h index 9ffeb98..126bd0f 100644 --- a/include/error.h +++ b/include/error.h @@ -1,7 +1,10 @@ -/***************************************************************************** +/* + +**************************************************************************** * Error codes * (C) Cristina Cifuentes - ****************************************************************************/ +*************************************************************************** +*/ #pragma once /* These definitions refer to errorMessage in error.c */ diff --git a/include/icode.h b/include/icode.h index ee7f88e..2f28028 100644 --- a/include/icode.h +++ b/include/icode.h @@ -65,6 +65,8 @@ struct LivenessSet : public std::bitset<32> { return test(r-rAX); } +public: + LivenessSet &clrReg(int r); private: void postProcessCompositeRegs(); }; @@ -83,15 +85,16 @@ struct DU #define MAX_REGS_DEF 4 /* 2 regs def'd for long-reg vars */ -struct COND_EXPR; +struct Expr; struct AstIdent; +struct UnaryOperator; struct HlTypeSupport { //hlIcode opcode; /* hlIcode opcode */ virtual bool removeRegFromLong(eReg regi, LOCAL_ID *locId)=0; virtual std::string writeOut(Function *pProc, int *numLoc) const=0; protected: - void performLongRemoval (eReg regi, LOCAL_ID *locId, COND_EXPR *tree); + Expr * performLongRemoval (eReg regi, LOCAL_ID *locId, Expr *tree); }; struct CallType : public HlTypeSupport @@ -100,9 +103,9 @@ struct CallType : public HlTypeSupport Function * proc; STKFRAME * args; // actual arguments void allocStkArgs (int num); - bool newStkArg(COND_EXPR *exp, llIcode opcode, Function *pproc); - void placeStkArg(COND_EXPR *exp, int pos); - virtual COND_EXPR * toId(); + bool newStkArg(Expr *exp, llIcode opcode, Function *pproc); + void placeStkArg(Expr *exp, int pos); + virtual Expr * toAst(); public: bool removeRegFromLong(eReg /*regi*/, LOCAL_ID */*locId*/) { @@ -114,20 +117,24 @@ public: struct AssignType : public HlTypeSupport { /* for HLI_ASSIGN */ - COND_EXPR *lhs; - COND_EXPR *rhs; - AssignType() : lhs(0),rhs(0) {} +protected: +public: + Expr *m_lhs; + Expr *rhs; + AssignType() {} + Expr *lhs() const {return m_lhs;} + void lhs(Expr *l); bool removeRegFromLong(eReg regi, LOCAL_ID *locId); std::string writeOut(Function *pProc, int *numLoc) const; }; struct ExpType : public HlTypeSupport { /* for HLI_JCOND, HLI_RET, HLI_PUSH, HLI_POP*/ - COND_EXPR *v; + Expr *v; ExpType() : v(0) {} bool removeRegFromLong(eReg regi, LOCAL_ID *locId) { - performLongRemoval(regi,locId,v); + v=performLongRemoval(regi,locId,v); return true; } std::string writeOut(Function *pProc, int *numLoc) const; @@ -147,15 +154,21 @@ public: return const_cast(const_cast(this)->get()); } - void expr(COND_EXPR *e) + void expr(Expr *e) { assert(e); exp.v=e; } - void replaceExpr(COND_EXPR *e); - COND_EXPR * expr() { return exp.v;} - const COND_EXPR * expr() const { return exp.v;} - void set(hlIcode i,COND_EXPR *e) + Expr *getMyExpr() + { + if(opcode==HLI_CALL) + return call.toAst(); + return expr(); + } + void replaceExpr(Expr *e); + Expr * expr() { return exp.v;} + const Expr * expr() const { return exp.v;} + void set(hlIcode i,Expr *e) { if(i!=HLI_RET) assert(e); @@ -163,7 +176,7 @@ public: opcode=i; exp.v=e; } - void set(COND_EXPR *l,COND_EXPR *r); + void set(Expr *l,Expr *r); void setCall(Function *proc); HLTYPE(hlIcode op=HLI_INVALID) : opcode(op) {} @@ -179,7 +192,7 @@ public: } public: std::string write1HlIcode(Function *pProc, int *numLoc) const; - void setAsgn(COND_EXPR *lhs, COND_EXPR *rhs); + void setAsgn(Expr *lhs, Expr *rhs); } ; /* LOW_LEVEL icode operand record */ struct LLOperand @@ -201,6 +214,16 @@ struct LLOperand proc.proc=0; proc.cb=0; } + bool operator==(const LLOperand &with) const + { + return (seg==with.seg) && + (segOver==with.segOver) && + (segValue==with.segValue) && + (regi == with.regi) && + (off == with.off) && + (opz==with.opz) && + (proc.proc==with.proc.proc); + } int64_t getImm2() const {return opz;} void SetImmediateOp(uint32_t dw) { @@ -219,8 +242,6 @@ struct LLOperand static LLOperand CreateReg2(unsigned Val) { LLOperand Op; -// Op.Kind = kRegister; -// Op.RegVal = Reg; Op.regi = (eReg)Val; return Op; } @@ -229,20 +250,15 @@ struct LLOperand struct LLInst : public llvm::MCInst //: public llvm::ilist_node { protected: - uint32_t flg; /* icode flags */ -// LLOperand &get(int idx) -// { -// assert(idx caseTbl2; int hllLabNum; /* label # for hll codegen */ bool conditionalJump() @@ -260,9 +276,7 @@ public: flg &= ~flag; } uint32_t getFlag() const {return flg;} - //llIcode getOpcode() const { return opcode; } - - uint32_t GetLlLabel() const { return label;} + uint32_t GetLlLabel() const { return label;} void SetImmediateOp(uint32_t dw) {m_src.SetImmediateOp(dw);} @@ -308,7 +322,6 @@ public: void flops(std::ostringstream &out); bool isJmpInst(); - //HLTYPE toHighLevel(COND_EXPR *lhs, COND_EXPR *rhs, Function *func); HLTYPE createCall(); LLInst(ICODE *container) : flg(0),codeIdx(0),numBytes(0),m_link(container) { @@ -465,13 +478,13 @@ public: condId idType(opLoc sd); // HLL setting functions // set this icode to be an assign - void setAsgn(COND_EXPR *lhs, COND_EXPR *rhs) + void setAsgn(Expr *lhs, Expr *rhs) { type=HIGH_LEVEL; hlU()->setAsgn(lhs,rhs); } - void setUnary(hlIcode op, COND_EXPR *_exp); - void setJCond(COND_EXPR *cexp); + void setUnary(hlIcode op, Expr *_exp); + void setJCond(Expr *cexp); void emitGotoLabel(int indLevel); void copyDU(const ICODE &duIcode, operDu _du, operDu duDu); @@ -480,7 +493,7 @@ public: public: bool removeDefRegi(eReg regi, int thisDefIdx, LOCAL_ID *locId); void checkHlCall(); - bool newStkArg(COND_EXPR *exp, llIcode opcode, Function *pproc) + bool newStkArg(Expr *exp, llIcode opcode, Function *pproc) { return hlU()->call.newStkArg(exp,opcode,pproc); } diff --git a/include/locident.h b/include/locident.h index 550dbd5..96ea12a 100644 --- a/include/locident.h +++ b/include/locident.h @@ -18,7 +18,7 @@ /* Type definition */ // this array has to stay in-order of addition i.e. not std::set > // TODO: why ? -struct COND_EXPR; +struct Expr; struct AstIdent; struct ICODE; struct LLInst; @@ -132,7 +132,7 @@ public: size_t csym() const {return id_arr.size();} void newRegArg(iICODE picode, iICODE ticode) const; void processTargetIcode(iICODE picode, int &numHlIcodes, iICODE ticode, bool isLong) const; - void forwardSubs(COND_EXPR *lhs, COND_EXPR *rhs, iICODE picode, iICODE ticode, int &numHlIcodes) const; + void forwardSubs(Expr *lhs, Expr *rhs, iICODE picode, iICODE ticode, int &numHlIcodes) const; AstIdent *createId(const ID *retVal, iICODE ix_); }; diff --git a/include/state.h b/include/state.h index 003720d..ff0f99a 100644 --- a/include/state.h +++ b/include/state.h @@ -30,6 +30,10 @@ struct STATE memset(r,0,sizeof(int16_t)*INDEX_BX_SI); //TODO: move this to machine_x86 memset(f,0,sizeof(uint8_t)*INDEX_BX_SI); } + void setMemoryByte(uint32_t addr,uint8_t val) + { + //TODO: make this into a full scale value tracking class ! + }; }; diff --git a/include/symtab.h b/include/symtab.h index e3b3650..b29363d 100644 --- a/include/symtab.h +++ b/include/symtab.h @@ -7,7 +7,7 @@ #include #include "Enums.h" #include "types.h" -struct COND_EXPR; +struct Expr; struct AstIdent; struct TypeContainer; /* * * * * * * * * * * * * * * * * */ @@ -36,7 +36,7 @@ struct SYM : public SymbolCommon struct STKSYM : public SymbolCommon { typedef int16_t tLabel; - COND_EXPR *actual; /* Expression tree of actual parameter */ + Expr *actual; /* Expression tree of actual parameter */ AstIdent *regs; /* For register arguments only */ tLabel label; /* Immediate off from BP (+:args, -:params) */ uint8_t regOff; /* Offset is a register (e.g. SI, DI) */ diff --git a/regression_tester.rb b/regression_tester.rb index 43eeee4..a860dbf 100755 --- a/regression_tester.rb +++ b/regression_tester.rb @@ -14,6 +14,7 @@ def perform_test(exepath,filepath,outname,args) filepath=path_local(filepath) joined_args = args.join(' ') printf("calling:" + "#{exepath} -a1 #{joined_args} -o#{output_path}.a1 #{filepath}\n") + STDERR << "Errors for : #{filepath}" result = `#{exepath} -a1 -o#{output_path}.a1 #{filepath}` result = `#{exepath} -a2 #{joined_args} -o#{output_path}.a2 #{filepath}` result = `#{exepath} #{joined_args} -o#{output_path} #{filepath}` diff --git a/src/BasicBlock.cpp b/src/BasicBlock.cpp index 659a2ad..dff3fb8 100644 --- a/src/BasicBlock.cpp +++ b/src/BasicBlock.cpp @@ -408,7 +408,6 @@ ICODE &BB::back() size_t BB::size() { - return distance(instructions.begin(),instructions.end()); } diff --git a/src/Procedure.cpp b/src/Procedure.cpp index db39e60..492e5dc 100644 --- a/src/Procedure.cpp +++ b/src/Procedure.cpp @@ -14,7 +14,7 @@ void JumpTable::pruneEntries(uint16_t cs) PROG *prg(Project::get()->binary()); for (uint32_t i = start; i < finish; i += 2) { - uint32_t target = cs + LH(&prg->Image[i]); + uint32_t target = cs + LH(&prg->image()[i]); if (target < finish && target >= start) finish = target; else if (target >= (uint32_t)prg->cbImage) @@ -23,9 +23,9 @@ void JumpTable::pruneEntries(uint16_t cs) ICODE _Icode; // used as scan input for (uint32_t i = start; i < finish; i += 2) { - uint32_t target = cs + LH(&prg->Image[i]); + uint32_t target = cs + LH(&prg->image()[i]); /* Be wary of 00 00 as code - it's probably data */ - if (! (prg->Image[target] || prg->Image[target+1]) || scan(target, _Icode)) + if (! (prg->image()[target] || prg->image()[target+1]) || scan(target, _Icode)) finish = i; } diff --git a/src/ast.cpp b/src/ast.cpp index 6231177..7ebdaf0 100644 --- a/src/ast.cpp +++ b/src/ast.cpp @@ -9,11 +9,24 @@ #include #include #include +#include +#include +#include +#include + #include "types.h" -#include "dcc.h" +#include "ast.h" +#include "bundle.h" #include "machine_x86.h" #include "project.h" using namespace std; +using namespace boost; +using namespace boost::adaptors; +extern int strSize (const uint8_t *, char); +extern char *cChar(uint8_t c); + + + // Conditional operator symbols in C. Index by condOp enumeration type static const char * const condOpSym[] = { " <= ", " < ", " == ", " != ", " > ", " >= ", " & ", " | ", " ^ ", " ~ ", @@ -97,61 +110,27 @@ void ICODE::copyDU(const ICODE &duIcode, operDu _du, operDu duDu) /* Returns an identifier conditional expression node of type GLOB_VAR */ -AstIdent *GlobalVariable::Create(int16_t segValue, int16_t off) +GlobalVariable::GlobalVariable(int16_t segValue, int16_t off) { - AstIdent *newExp; uint32_t adr; - - newExp = new AstIdent(); - newExp->ident.idType = GLOB_VAR; + valid = true; + ident.idType = GLOB_VAR; adr = opAdr(segValue, off); auto i=Project::get()->getSymIdxByAdd(adr); if ( not Project::get()->validSymIdx(i) ) { printf ("Error, glob var not found in symtab\n"); - delete newExp; - return 0; + valid = false; } - newExp->ident.idNode.globIdx = i; - return (newExp); + globIdx = i; } - -/* Returns an identifier conditional expression node of type REGISTER */ -AstIdent *AstIdent::Reg(eReg regi, uint32_t icodeFlg, LOCAL_ID *locsym) +string GlobalVariable::walkCondExpr(Function *, int *) const { - AstIdent *newExp; - - newExp = new AstIdent(); - newExp->ident.idType = REGISTER; - hlType type_sel; - regType reg_type; - if ((icodeFlg & B) || (icodeFlg & SRC_B)) - { - type_sel = TYPE_BYTE_SIGN; - reg_type = BYTE_REG; - } - else /* uint16_t */ - { - type_sel = TYPE_WORD_SIGN; - reg_type = WORD_REG; - } - newExp->ident.idNode.regiIdx = locsym->newByteWordReg(type_sel, regi); - newExp->ident.regiType = reg_type; - return (newExp); -} - - -/* Returns an identifier conditional expression node of type REGISTER */ -AstIdent *AstIdent::RegIdx(int idx, regType reg_type) -{ - AstIdent *newExp; - - newExp = new AstIdent(); - newExp->ident.idType = REGISTER; - newExp->ident.regiType = reg_type; - newExp->ident.idNode.regiIdx = idx; - return (newExp); + if(valid) + return Project::get()->symtab[globIdx].name; + else + return "INVALID GlobalVariable"; } /* Returns an identifier conditional expression node of type LOCAL_VAR */ @@ -177,7 +156,7 @@ AstIdent *AstIdent::Loc(int off, LOCAL_ID *localId) /* Returns an identifier conditional expression node of type PARAM */ -AstIdent *AstIdent::idParam(int off, const STKFRAME * argSymtab) +AstIdent *AstIdent::Param(int off, const STKFRAME * argSymtab) { AstIdent *newExp; @@ -193,11 +172,10 @@ AstIdent *AstIdent::idParam(int off, const STKFRAME * argSymtab) /* Returns an identifier conditional expression node of type GLOB_VAR_IDX. * This global variable is indexed by regi. */ -AstIdent *idCondExpIdxGlob (int16_t segValue, int16_t off, uint8_t regi, const LOCAL_ID *locSym) +GlobalVariableIdx::GlobalVariableIdx (int16_t segValue, int16_t off, uint8_t regi, const LOCAL_ID *locSym) { size_t i; - AstIdent *newExp = new AstIdent(); - newExp->ident.idType = GLOB_VAR_IDX; + ident.type(GLOB_VAR_IDX); for (i = 0; i < locSym->csym(); i++) { const BWGLB_TYPE &lID(locSym->id_arr[i].id.bwGlb); @@ -206,19 +184,14 @@ AstIdent *idCondExpIdxGlob (int16_t segValue, int16_t off, uint8_t regi, const L } if (i == locSym->csym()) printf ("Error, indexed-glob var not found in local id table\n"); - newExp->ident.idNode.idxGlbIdx = i; - return (newExp); + idxGlbIdx = i; } - - -/* Returns an identifier conditional expression node of type CONSTANT */ -AstIdent *AstIdent::Kte(uint32_t kte, uint8_t size) +string GlobalVariableIdx::walkCondExpr(Function *pProc, int *) const { - AstIdent *newExp = new AstIdent(); - newExp->ident.idType = CONSTANT; - newExp->ident.idNode.kte.kte = kte; - newExp->ident.idNode.kte.size = size; - return (newExp); + ostringstream o; + auto bwGlb = &pProc->localId.id_arr[idxGlbIdx].id.bwGlb; + o << (bwGlb->seg << 4) + bwGlb->off << "["<regi)<<"]"; + return o.str(); } @@ -232,27 +205,34 @@ AstIdent *AstIdent::LongIdx (int idx) return (newExp); } +AstIdent *AstIdent::String(uint32_t idx) +{ + AstIdent *newExp = new AstIdent; + newExp->ident.idNode.strIdx = idx; + newExp->ident.type(STRING); + return newExp; +} + /* Returns an identifier conditional expression node of type LONG_VAR */ -AstIdent *AstIdent::idLong(LOCAL_ID *localId, opLoc sd, iICODE pIcode, hlFirst f, iICODE ix, operDu du, LLInst &atOffset) +AstIdent *AstIdent::Long(LOCAL_ID *localId, opLoc sd, iICODE pIcode, hlFirst f, iICODE ix, operDu du, LLInst &atOffset) { int idx; - AstIdent *newExp = new AstIdent(); + AstIdent *newExp; /* Check for long constant and save it as a constant expression */ if ((sd == SRC) && pIcode->ll()->testFlags(I)) /* constant */ { - newExp->ident.idType = CONSTANT; int value; if (f == HIGH_FIRST) value = (pIcode->ll()->src().getImm2() << 16) + atOffset.src().getImm2(); else/* LOW_FIRST */ value = (atOffset.src().getImm2() << 16)+ pIcode->ll()->src().getImm2(); - newExp->ident.idNode.kte.kte = value; - newExp->ident.idNode.kte.size = 4; + newExp = new Constant(value,4); } /* Save it as a long expression (reg, stack or glob) */ else { + newExp = new AstIdent(); idx = localId->newLong(sd, pIcode, f, ix, du, atOffset); newExp->ident.idType = LONG_VAR; newExp->ident.idNode.longIdx = idx; @@ -260,19 +240,6 @@ AstIdent *AstIdent::idLong(LOCAL_ID *localId, opLoc sd, iICODE pIcode, hlFirst f return (newExp); } - -/* Returns an identifier conditional expression node of type FUNCTION */ -AstIdent *AstIdent::idFunc(Function * pproc, STKFRAME * args) -{ - AstIdent *newExp = new AstIdent(); - - newExp->ident.idType = FUNCTION; - newExp->ident.idNode.call.proc = pproc; - newExp->ident.idNode.call.args = args; - return (newExp); -} - - /* Returns an identifier conditional expression node of type OTHER. * Temporary solution, should really be encoded as an indexed type (eg. * arrays). */ @@ -291,20 +258,26 @@ AstIdent *AstIdent::Other(eReg seg, eReg regi, int16_t off) * TYPE_WORD_SIGN */ AstIdent *AstIdent::idID (const ID *retVal, LOCAL_ID *locsym, iICODE ix_) { - AstIdent *newExp = new AstIdent(); int idx; - - if (retVal->type == TYPE_LONG_SIGN) + AstIdent *newExp=nullptr; + switch(retVal->type) { - idx = locsym->newLongReg (TYPE_LONG_SIGN, retVal->id.longId.h,retVal->id.longId.l, ix_); - newExp->ident.idType = LONG_VAR; - newExp->ident.idNode.longIdx = idx; - } - else if (retVal->type == TYPE_WORD_SIGN) - { - newExp->ident.idType = REGISTER; - newExp->ident.idNode.regiIdx = locsym->newByteWordReg(TYPE_WORD_SIGN, retVal->id.regi); - newExp->ident.regiType = WORD_REG; + case TYPE_LONG_SIGN: + { + newExp = new AstIdent(); + idx = locsym->newLongReg (TYPE_LONG_SIGN, retVal->id.longId.h,retVal->id.longId.l, ix_); + newExp->ident.idType = LONG_VAR; + newExp->ident.idNode.longIdx = idx; + break; + } + case TYPE_WORD_SIGN: + newExp = new RegisterNode(locsym->newByteWordReg(retVal->type, retVal->id.regi),WORD_REG); + break; + case TYPE_BYTE_SIGN: + newExp = new RegisterNode(locsym->newByteWordReg(retVal->type, retVal->id.regi),BYTE_REG); + break; + default: + fprintf(stderr,"AstIdent::idID unhandled type %d\n",retVal->type); } return (newExp); } @@ -315,9 +288,9 @@ AstIdent *AstIdent::idID (const ID *retVal, LOCAL_ID *locsym, iICODE ix_) * Arguments: * duIcode: icode instruction that needs the du set. * du: operand is defined or used in current instruction. */ -COND_EXPR *AstIdent::id(const LLInst &ll_insn, opLoc sd, Function * pProc, iICODE ix_,ICODE &duIcode, operDu du) +Expr *AstIdent::id(const LLInst &ll_insn, opLoc sd, Function * pProc, iICODE ix_,ICODE &duIcode, operDu du) { - COND_EXPR *newExp; + Expr *newExp; int idx; /* idx into pIcode->localId table */ @@ -335,35 +308,33 @@ COND_EXPR *AstIdent::id(const LLInst &ll_insn, opLoc sd, Function * pProc, iICOD else if ((sd == DST) && ll_insn.testFlags(IM_TMP_DST)) { /* implicit tmp */ - newExp = AstIdent::Reg (rTMP, 0, &pProc->localId); + newExp = new RegisterNode(rTMP, 0, &pProc->localId); duIcode.setRegDU(rTMP, (operDu)eUSE); } else if ((sd == SRC) && ll_insn.testFlags(I)) /* constant */ - newExp = AstIdent::Kte (ll_insn.src().getImm2(), 2); + newExp = new Constant(ll_insn.src().getImm2(), 2); else if (pm.regi == rUNDEF) /* global variable */ - newExp = GlobalVariable::Create(pm.segValue, pm.off); + newExp = new GlobalVariable(pm.segValue, pm.off); else if ( pm.isReg() ) /* register */ { - newExp = AstIdent::Reg (pm.regi, (sd == SRC) ? ll_insn.getFlag() : - ll_insn.getFlag() & NO_SRC_B, - &pProc->localId); + newExp = new RegisterNode(pm.regi, (sd == SRC) ? ll_insn.getFlag() : ll_insn.getFlag() & NO_SRC_B, &pProc->localId); duIcode.setRegDU( pm.regi, du); } else if (pm.off) /* offset */ - { + { // TODO: this is ABI specific, should be actually based on Function calling conv if ((pm.seg == rSS) && (pm.regi == INDEX_BP)) /* idx on bp */ { if (pm.off >= 0) /* argument */ - newExp = AstIdent::idParam (pm.off, &pProc->args); + newExp = AstIdent::Param (pm.off, &pProc->args); else /* local variable */ newExp = AstIdent::Loc (pm.off, &pProc->localId); } else if ((pm.seg == rDS) && (pm.regi == INDEX_BX)) /* bx */ { if (pm.off > 0) /* global variable */ - newExp = idCondExpIdxGlob (pm.segValue, pm.off, rBX,&pProc->localId); + newExp = new GlobalVariableIdx(pm.segValue, pm.off, rBX,&pProc->localId); else newExp = AstIdent::Other (pm.seg, pm.regi, pm.off); duIcode.setRegDU( rBX, eUSE); @@ -377,26 +348,18 @@ COND_EXPR *AstIdent::id(const LLInst &ll_insn, opLoc sd, Function * pProc, iICOD { if ((pm.seg == rDS) && (pm.regi > INDEX_BP_DI)) /* dereference */ { + eReg selected; switch (pm.regi) { - case INDEX_SI: - newExp = AstIdent::Reg(rSI, 0, &pProc->localId); - duIcode.setRegDU( rSI, du); - break; - case INDEX_DI: - newExp = AstIdent::Reg(rDI, 0, &pProc->localId); - duIcode.setRegDU( rDI, du); - break; - case INDEX_BP: - newExp = AstIdent::Reg(rBP, 0, &pProc->localId); - break; - case INDEX_BX: - newExp = AstIdent::Reg(rBX, 0, &pProc->localId); - duIcode.setRegDU( rBX, du); - break; + case INDEX_SI: selected = rSI; break; + case INDEX_DI: selected = rDI; break; + case INDEX_BP: selected = rBP; break; + case INDEX_BX: selected = rBX; break; default: newExp = 0; assert(false); } + newExp = new RegisterNode(selected, 0, &pProc->localId); + duIcode.setRegDU( selected, du); newExp = UnaryOperator::Create(DEREFERENCE, newExp); } else @@ -433,30 +396,11 @@ condId LLInst::idType(opLoc sd) const /* Size of hl types */ int hlSize[] = {2, 1, 1, 2, 2, 4, 4, 4, 2, 2, 1, 4, 4}; -int COND_EXPR::hlTypeSize(Function * pproc) const +int Expr::hlTypeSize(Function * pproc) const { if (this == NULL) return (2); /* for TYPE_UNKNOWN */ - - switch (m_type) { - case BOOLEAN_OP: - assert(false); - return 0; - // return expr->hlTypeSize(pproc); - case NEGATION: case ADDRESSOF: - case POST_INC: case POST_DEC: - case PRE_INC: case PRE_DEC: - case DEREFERENCE: - assert(false); - return 0; - //return expr->hlTypeSize(pproc); - case IDENTIFIER: - assert(false); - return 0; - default: - fprintf(stderr,"hlTypeSize queried for Unkown type %d \n",m_type); - break; - } + fprintf(stderr,"hlTypeSize queried for Unkown type %d \n",m_type); return 2; // CC: is this correct? } @@ -469,33 +413,35 @@ int UnaryOperator::hlTypeSize(Function *pproc) const { return (unaryExp->hlTypeSize (pproc)); } +int GlobalVariable::hlTypeSize(Function *pproc) const +{ + return (Project::get()->symbolSize(globIdx)); +} +int GlobalVariableIdx::hlTypeSize(Function *pproc) const +{ + return (hlSize[pproc->localId.id_arr[idxGlbIdx].type]); +} + int AstIdent::hlTypeSize(Function *pproc) const { switch (ident.idType) { case GLOB_VAR: - return (Project::get()->symbolSize(ident.idNode.globIdx)); - case REGISTER: - if (ident.regiType == BYTE_REG) - return (1); - else - return (2); + assert(false); + return 1; case LOCAL_VAR: return (hlSize[pproc->localId.id_arr[ident.idNode.localIdx].type]); case PARAM: return (hlSize[pproc->args[ident.idNode.paramIdx].type]); - case GLOB_VAR_IDX: - return (hlSize[pproc->localId.id_arr[ident.idNode.idxGlbIdx].type]); - case CONSTANT: - return (ident.idNode.kte.size); case STRING: return (2); case LONG_VAR: return (4); - case FUNCTION: - return (hlSize[ident.idNode.call.proc->retVal.type]); case OTHER: return (2); + default: + assert(false); + return -1; } /* eos */ } hlType BinaryOperator::expType(Function *pproc) const @@ -516,70 +462,47 @@ hlType UnaryOperator::expType(Function *pproc) const { return unaryExp->expType (pproc); } +hlType GlobalVariable::expType(Function *pproc) const +{ + return Project::get()->symbolType(globIdx); +} +hlType GlobalVariableIdx::expType(Function *pproc) const +{ + return (pproc->localId.id_arr[idxGlbIdx].type); +} + hlType AstIdent::expType(Function *pproc) const { switch (ident.idType) { - case GLOB_VAR: - return Project::get()->symbolType(ident.idNode.globIdx); + case UNDEF: + case CONSTANT: + case FUNCTION: case REGISTER: - if (ident.regiType == BYTE_REG) - return (TYPE_BYTE_SIGN); - else - return (TYPE_WORD_SIGN); + case GLOB_VAR: + case GLOB_VAR_IDX: + assert(false); + return TYPE_UNKNOWN; case LOCAL_VAR: return (pproc->localId.id_arr[ident.idNode.localIdx].type); case PARAM: return (pproc->args[ident.idNode.paramIdx].type); - case GLOB_VAR_IDX: - return (pproc->localId.id_arr[ident.idNode.idxGlbIdx].type); - case CONSTANT: - return (TYPE_CONST); case STRING: return (TYPE_STR); case LONG_VAR: return (pproc->localId.id_arr[ident.idNode.longIdx].type); - case FUNCTION: - return (ident.idNode.call.proc->retVal.type); default: return (TYPE_UNKNOWN); } /* eos */ return (TYPE_UNKNOWN); } /* Returns the type of the expression */ -hlType COND_EXPR::expType(Function * pproc) const -{ - - if (this == nullptr) - return (TYPE_UNKNOWN); - - switch (m_type) - { - case BOOLEAN_OP: - assert(false); - return TYPE_UNKNOWN; - case POST_INC: case POST_DEC: - case PRE_INC: case PRE_DEC: - case NEGATION: - assert(false); - return TYPE_UNKNOWN; - case ADDRESSOF: return (TYPE_PTR); /***????****/ - case DEREFERENCE: return (TYPE_PTR); - case IDENTIFIER: - assert(false); - return TYPE_UNKNOWN; - case UNKNOWN_OP: - assert(false); - return (TYPE_UNKNOWN); - } - return TYPE_UNKNOWN; // CC: Correct? -} /* Removes the register from the tree. If the register was part of a long * register (eg. dx:ax), the node gets transformed into an integer register * node. */ -void HlTypeSupport::performLongRemoval (eReg regi, LOCAL_ID *locId, COND_EXPR *tree) +Expr * HlTypeSupport::performLongRemoval (eReg regi, LOCAL_ID *locId, Expr *tree) { switch (tree->m_type) { case BOOLEAN_OP: @@ -588,12 +511,13 @@ void HlTypeSupport::performLongRemoval (eReg regi, LOCAL_ID *locId, COND_EXPR *t case NEGATION: case ADDRESSOF: case DEREFERENCE: case IDENTIFIER: - tree->performLongRemoval(regi,locId); + return tree->performLongRemoval(regi,locId); break; default: fprintf(stderr,"performLongRemoval attemped on %d\n",tree->m_type); break; } + return tree; } /* Returns the string located in image, formatted in C format. */ @@ -603,10 +527,10 @@ static std::string getString (int offset) ostringstream o; int strLen, i; - strLen = strSize (&prog.Image[offset], '\0'); + strLen = strSize (&prog.image()[offset], '\0'); o << '"'; for (i = 0; i < strLen; i++) - o<walkCondExpr(pProc, numLoc); + if(m_op!=NOT) + { + outStr << lhs()->walkCondExpr(pProc, numLoc); + } + assert(rhs()); outStr << condOpSym[m_op]; outStr << rhs()->walkCondExpr(pProc, numLoc); outStr << ")"; @@ -631,23 +559,6 @@ string AstIdent::walkCondExpr(Function *pProc, int *numLoc) const std::ostringstream o; switch (ident.idType) { - case GLOB_VAR: - o << Project::get()->symtab[ident.idNode.globIdx].name; - break; - case REGISTER: - id = &pProc->localId.id_arr[ident.idNode.regiIdx]; - if (id->name[0] == '\0') /* no name */ - { - id->setLocalName(++(*numLoc)); - codeOut <type)<< " "<name<<"; "; - codeOut <<"/* "<id.regi)<<" */\n"; - } - if (id->hasMacro) - o << id->macro << "("<name<<")"; - else - o << id->name; - break; - case LOCAL_VAR: o << pProc->localId.id_arr[ident.idNode.localIdx].name; break; @@ -659,19 +570,6 @@ string AstIdent::walkCondExpr(Function *pProc, int *numLoc) const else o << psym->name; break; - - case GLOB_VAR_IDX: - bwGlb = &pProc->localId.id_arr[ident.idNode.idxGlbIdx].id.bwGlb; - o << (bwGlb->seg << 4) + bwGlb->off << "["<regi)<<"]"; - break; - - case CONSTANT: - if (ident.idNode.kte.kte < 1000) - o << ident.idNode.kte.kte; - else - o << "0x"<id.longGlb.seg<<4) + id->id.longGlb.offH <<"][bx]"; } break; - - case FUNCTION: - o << writeCall (ident.idNode.call.proc,*ident.idNode.call.args, pProc, numLoc); - break; - case OTHER: off = ident.idNode.other.off; o << Machine_X86::regName(ident.idNode.other.seg)<< "["; @@ -711,6 +604,12 @@ string AstIdent::walkCondExpr(Function *pProc, int *numLoc) const else if (off>0) o << "+"<< hexStr (off); o << "]"; + break; + default: + assert(false); + return ""; + + } /* eos */ outStr << o.str(); cCode.appendDecl(codeOut.str()); @@ -783,35 +682,6 @@ string UnaryOperator::walkCondExpr(Function *pProc, int *numLoc) const -/* Makes a copy of the given expression. Allocates newExp storage for each - * node. Returns the copy. */ -COND_EXPR *COND_EXPR::clone() const -{ - COND_EXPR* newExp=nullptr; /* Expression node copy */ - - switch (m_type) - { - case BOOLEAN_OP: - assert(false); - break; - - case NEGATION: - case ADDRESSOF: - case DEREFERENCE: - case PRE_DEC: case POST_DEC: - case PRE_INC: case POST_INC: - assert(false); - break; - - case IDENTIFIER: - assert(false); - break; - - default: - fprintf(stderr,"Clone attempt on unhandled type %d\n",m_type); - } - return (newExp); -} /* Changes the boolean conditional operator at the root of this expression */ @@ -819,9 +689,9 @@ void BinaryOperator::changeBoolOp (condOp newOp) { m_op = newOp; } -bool COND_EXPR::insertSubTreeReg (AstIdent *&tree, COND_EXPR *_expr, eReg regi,const LOCAL_ID *locsym) +bool Expr::insertSubTreeReg (AstIdent *&tree, Expr *_expr, eReg regi,const LOCAL_ID *locsym) { - COND_EXPR *nd = tree; + Expr *nd = tree; bool res=insertSubTreeReg (nd, _expr, regi,locsym); if(res) { @@ -833,12 +703,12 @@ bool COND_EXPR::insertSubTreeReg (AstIdent *&tree, COND_EXPR *_expr, eReg regi,c } /* Inserts the expression exp into the tree at the location specified by the * register regi */ -bool COND_EXPR::insertSubTreeReg (COND_EXPR *&tree, COND_EXPR *_expr, eReg regi,const LOCAL_ID *locsym) +bool Expr::insertSubTreeReg (Expr *&tree, Expr *_expr, eReg regi,const LOCAL_ID *locsym) { if (tree == NULL) return false; - COND_EXPR *temp=tree->insertSubTreeReg(_expr,regi,locsym); + Expr *temp=tree->insertSubTreeReg(_expr,regi,locsym); if(nullptr!=temp) { tree=temp; @@ -847,11 +717,9 @@ bool COND_EXPR::insertSubTreeReg (COND_EXPR *&tree, COND_EXPR *_expr, eReg regi, return false; } -COND_EXPR *UnaryOperator::insertSubTreeReg(COND_EXPR *_expr, eReg regi, const LOCAL_ID *locsym) +Expr *UnaryOperator::insertSubTreeReg(Expr *_expr, eReg regi, const LOCAL_ID *locsym) { - - eReg treeReg; - COND_EXPR *temp; + Expr *temp; switch (m_type) { case NEGATION: @@ -869,9 +737,9 @@ COND_EXPR *UnaryOperator::insertSubTreeReg(COND_EXPR *_expr, eReg regi, const LO } return nullptr; } -COND_EXPR *BinaryOperator::insertSubTreeReg(COND_EXPR *_expr, eReg regi, const LOCAL_ID *locsym) +Expr *BinaryOperator::insertSubTreeReg(Expr *_expr, eReg regi, const LOCAL_ID *locsym) { - COND_EXPR *r; + Expr *r; if(this->op()!=NOT) { assert(m_lhs); @@ -891,30 +759,21 @@ COND_EXPR *BinaryOperator::insertSubTreeReg(COND_EXPR *_expr, eReg regi, const L } return nullptr; } -COND_EXPR *AstIdent::insertSubTreeReg(COND_EXPR *_expr, eReg regi, const LOCAL_ID *locsym) +Expr *AstIdent::insertSubTreeReg(Expr *_expr, eReg regi, const LOCAL_ID *locsym) { - eReg treeReg; if (ident.idType == REGISTER) { - treeReg = locsym->id_arr[ident.idNode.regiIdx].id.regi; - if (treeReg == regi) /* uint16_t reg */ - { - return _expr; - } - else if(Machine_X86::isSubRegisterOf(treeReg,regi)) /* uint16_t/uint8_t reg */ - { - return _expr; - } + assert(false); } return nullptr; } /* Inserts the expression exp into the tree at the location specified by the * long register index longIdx*/ -bool COND_EXPR::insertSubTreeLongReg(COND_EXPR *_expr, COND_EXPR *&tree, int longIdx) +bool Expr::insertSubTreeLongReg(Expr *_expr, Expr *&tree, int longIdx) { if (tree == NULL) return false; - COND_EXPR *temp=tree->insertSubTreeLongReg(_expr,longIdx); + Expr *temp=tree->insertSubTreeLongReg(_expr,longIdx); if(nullptr!=temp) { tree=temp; @@ -922,9 +781,9 @@ bool COND_EXPR::insertSubTreeLongReg(COND_EXPR *_expr, COND_EXPR *&tree, int lon } return false; } -COND_EXPR *UnaryOperator::insertSubTreeLongReg(COND_EXPR *_expr, int longIdx) +Expr *UnaryOperator::insertSubTreeLongReg(Expr *_expr, int longIdx) { - COND_EXPR *temp = unaryExp->insertSubTreeLongReg(_expr,longIdx); + Expr *temp = unaryExp->insertSubTreeLongReg(_expr,longIdx); if (nullptr!=temp) { unaryExp = temp; @@ -932,9 +791,9 @@ COND_EXPR *UnaryOperator::insertSubTreeLongReg(COND_EXPR *_expr, int longIdx) } return nullptr; } -COND_EXPR *BinaryOperator::insertSubTreeLongReg(COND_EXPR *_expr, int longIdx) +Expr *BinaryOperator::insertSubTreeLongReg(Expr *_expr, int longIdx) { - COND_EXPR *r; + Expr *r; if(m_op!=NOT) { r=m_lhs->insertSubTreeLongReg(_expr,longIdx); @@ -952,7 +811,7 @@ COND_EXPR *BinaryOperator::insertSubTreeLongReg(COND_EXPR *_expr, int longIdx) } return nullptr; } -COND_EXPR *AstIdent::insertSubTreeLongReg(COND_EXPR *_expr, int longIdx) +Expr *AstIdent::insertSubTreeLongReg(Expr *_expr, int longIdx) { if (ident.idNode.longIdx == longIdx) { @@ -962,24 +821,12 @@ COND_EXPR *AstIdent::insertSubTreeLongReg(COND_EXPR *_expr, int longIdx) } /* Recursively deallocates the abstract syntax tree rooted at *exp */ -COND_EXPR::~COND_EXPR() -{ - switch (m_type) - { - case BOOLEAN_OP: - case NEGATION: - case ADDRESSOF: - case DEREFERENCE: - case IDENTIFIER: - break; - default: - fprintf(stderr,"release attempt on unhandled type %d\n",m_type); - } -} +Expr::~Expr(){} + /* Makes a copy of the given expression. Allocates newExp storage for each * node. Returns the copy. */ -COND_EXPR *BinaryOperator::clone() const +Expr *BinaryOperator::clone() const { BinaryOperator* newExp=new BinaryOperator(m_op); /* Expression node copy */ newExp->m_lhs = m_lhs->clone(); @@ -987,7 +834,7 @@ COND_EXPR *BinaryOperator::clone() const return newExp; } -COND_EXPR *BinaryOperator::inverse() const +Expr *BinaryOperator::inverse() const { static condOp invCondOp[] = {GREATER, GREATER_EQUAL, NOT_EQUAL, EQUAL, LESS_EQUAL, LESS, DUMMY,DUMMY,DUMMY,DUMMY, @@ -1017,18 +864,17 @@ COND_EXPR *BinaryOperator::inverse() const return res; } -void AstIdent::performLongRemoval(eReg regi, LOCAL_ID *locId) +Expr *AstIdent::performLongRemoval(eReg regi, LOCAL_ID *locId) { eReg otherRegi; /* high or low part of long register */ - IDENTTYPE* ident_2 = &ident; - if (ident_2->idType == LONG_VAR) + if (ident.idType == LONG_VAR) { - otherRegi = otherLongRegi (regi, ident_2->idNode.longIdx, locId); - ident_2->idType = REGISTER; - ident_2->regiType = WORD_REG; - ident_2->idNode.regiIdx = locId->newByteWordReg(TYPE_WORD_SIGN,otherRegi); + otherRegi = otherLongRegi (regi, ident.idNode.longIdx, locId); + delete this; + return new RegisterNode(locId->newByteWordReg(TYPE_WORD_SIGN,otherRegi),WORD_REG); } + return this; } eReg AstIdent::otherLongRegi (eReg regi, int idx, LOCAL_ID *locTbl) { @@ -1045,3 +891,42 @@ eReg AstIdent::otherLongRegi (eReg regi, int idx, LOCAL_ID *locTbl) } return rUNDEF; // Cristina: please check this! } + + +string Constant::walkCondExpr(Function *, int *) const +{ + ostringstream o; + if (kte.kte < 1000) + o << kte.kte; + else + o << "0x"<writeCall(call.proc,*call.args, numLoc); +} + +int FuncNode::hlTypeSize(Function *) const +{ + return hlSize[call.proc->retVal.type]; +} + +hlType FuncNode::expType(Function *) const +{ + return call.proc->retVal.type; +} + + + diff --git a/src/backend.cpp b/src/backend.cpp index bbeecd1..315e409 100644 --- a/src/backend.cpp +++ b/src/backend.cpp @@ -108,23 +108,23 @@ static void printGlobVar (std::ostream &ostr,SYM * psym) switch (psym->size) { case 1: - ostr << "uint8_t\t"<name<<" = "<name<<" = "<name<<" = "<name<<" = "<type == TYPE_PTR) /* pointer */ - ostr << "uint16_t *\t"<name<<" = "<name<<" = "<name<<"[4] = \""<< - prog.Image[relocOp]<size; j++) - strContents << cChar(prog.Image[relocOp + j]); + strContents << cChar(prog.image()[relocOp + j]); ostr << "char\t*"<name<<" = \""< is a runtime routine */ } } - if (locatePattern(prog.Image, pProc.procEntry, + if (locatePattern(prog.image(), pProc.procEntry, pProc.procEntry+sizeof(pattMsChkstk), pattMsChkstk, sizeof(pattMsChkstk), &Idx)) { @@ -587,11 +587,11 @@ void dispKey(int /*i*/) iPatLen). The pattern can contain wild bytes; if you really want to match for the pattern that is used up by the WILD uint8_t, tough - it will match with everything else as well. */ -static boolT locatePattern(uint8_t *source, int iMin, int iMax, uint8_t *pattern, int iPatLen, +static boolT locatePattern(const uint8_t *source, int iMin, int iMax, uint8_t *pattern, int iPatLen, int *index) { int i, j; - uint8_t *pSrc; /* Pointer to start of considered source */ + const uint8_t *pSrc; /* Pointer to start of considered source */ int iLast; iLast = iMax - iPatLen; /* Last source uint8_t to consider */ @@ -645,18 +645,18 @@ void STATE::checkStartup() /* Check the Turbo Pascal signatures first, since they involve only the first 3 bytes, and false positives may be founf with the others later */ - if (locatePattern(prog.Image, startOff, startOff+5, pattBorl4on,sizeof(pattBorl4on), &i)) + if (locatePattern(prog.image(), startOff, startOff+5, pattBorl4on,sizeof(pattBorl4on), &i)) { /* The first 5 bytes are a far call. Follow that call and determine the version from that */ - rel = LH(&prog.Image[startOff+1]); /* This is abs off of init */ - para= LH(&prog.Image[startOff+3]);/* This is abs seg of init */ + rel = LH(&prog.image()[startOff+1]); /* This is abs off of init */ + para= LH(&prog.image()[startOff+3]);/* This is abs seg of init */ init = ((uint32_t)para << 4) + rel; - if (locatePattern(prog.Image, init, init+26, pattBorl4Init, + if (locatePattern(prog.image(), init, init+26, pattBorl4Init, sizeof(pattBorl4Init), &i)) { - setState(rDS, LH(&prog.Image[i+1])); + setState(rDS, LH(&prog.image()[i+1])); printf("Borland Pascal v4 detected\n"); chVendor = 't'; /* Trubo */ chModel = 'p'; /* Pascal */ @@ -665,11 +665,11 @@ void STATE::checkStartup() prog.segMain = prog.initCS; /* At the 5 uint8_t jump */ goto gotVendor; /* Already have vendor */ } - else if (locatePattern(prog.Image, init, init+26, pattBorl5Init, + else if (locatePattern(prog.image(), init, init+26, pattBorl5Init, sizeof(pattBorl5Init), &i)) { - setState( rDS, LH(&prog.Image[i+1])); + setState( rDS, LH(&prog.image()[i+1])); printf("Borland Pascal v5.0 detected\n"); chVendor = 't'; /* Trubo */ chModel = 'p'; /* Pascal */ @@ -678,11 +678,11 @@ void STATE::checkStartup() prog.segMain = prog.initCS; goto gotVendor; /* Already have vendor */ } - else if (locatePattern(prog.Image, init, init+26, pattBorl7Init, + else if (locatePattern(prog.image(), init, init+26, pattBorl7Init, sizeof(pattBorl7Init), &i)) { - setState( rDS, LH(&prog.Image[i+1])); + setState( rDS, LH(&prog.image()[i+1])); printf("Borland Pascal v7 detected\n"); chVendor = 't'; /* Trubo */ chModel = 'p'; /* Pascal */ @@ -701,43 +701,43 @@ void STATE::checkStartup() as near data, just more pushes at the start. */ if(prog.cbImage>int(startOff+0x180+sizeof(pattMainLarge))) { - if (locatePattern(prog.Image, startOff, startOff+0x180, pattMainLarge,sizeof(pattMainLarge), &i)) + if (locatePattern(prog.image(), startOff, startOff+0x180, pattMainLarge,sizeof(pattMainLarge), &i)) { - rel = LH(&prog.Image[i+OFFMAINLARGE]); /* This is abs off of main */ - para= LH(&prog.Image[i+OFFMAINLARGE+2]);/* This is abs seg of main */ + rel = LH(&prog.image()[i+OFFMAINLARGE]); /* This is abs off of main */ + para= LH(&prog.image()[i+OFFMAINLARGE+2]);/* This is abs seg of main */ /* Save absolute image offset */ prog.offMain = ((uint32_t)para << 4) + rel; prog.segMain = (uint16_t)para; chModel = 'l'; /* Large model */ } - else if (locatePattern(prog.Image, startOff, startOff+0x180, pattMainCompact, + else if (locatePattern(prog.image(), startOff, startOff+0x180, pattMainCompact, sizeof(pattMainCompact), &i)) { - rel = LH_SIGNED(&prog.Image[i+OFFMAINCOMPACT]);/* This is the rel addr of main */ + rel = LH_SIGNED(&prog.image()[i+OFFMAINCOMPACT]);/* This is the rel addr of main */ prog.offMain = i+OFFMAINCOMPACT+2+rel; /* Save absolute image offset */ prog.segMain = prog.initCS; chModel = 'c'; /* Compact model */ } - else if (locatePattern(prog.Image, startOff, startOff+0x180, pattMainMedium, + else if (locatePattern(prog.image(), startOff, startOff+0x180, pattMainMedium, sizeof(pattMainMedium), &i)) { - rel = LH(&prog.Image[i+OFFMAINMEDIUM]); /* This is abs off of main */ - para= LH(&prog.Image[i+OFFMAINMEDIUM+2]);/* This is abs seg of main */ + rel = LH(&prog.image()[i+OFFMAINMEDIUM]); /* This is abs off of main */ + para= LH(&prog.image()[i+OFFMAINMEDIUM+2]);/* This is abs seg of main */ prog.offMain = ((uint32_t)para << 4) + rel; prog.segMain = (uint16_t)para; chModel = 'm'; /* Medium model */ } - else if (locatePattern(prog.Image, startOff, startOff+0x180, pattMainSmall, + else if (locatePattern(prog.image(), startOff, startOff+0x180, pattMainSmall, sizeof(pattMainSmall), &i)) { - rel = LH_SIGNED(&prog.Image[i+OFFMAINSMALL]); /* This is rel addr of main */ + rel = LH_SIGNED(&prog.image()[i+OFFMAINSMALL]); /* This is rel addr of main */ prog.offMain = i+OFFMAINSMALL+2+rel; /* Save absolute image offset */ prog.segMain = prog.initCS; chModel = 's'; /* Small model */ } - else if (memcmp(&prog.Image[startOff], pattTPasStart, sizeof(pattTPasStart)) == 0) + else if (memcmp(&prog.image()[startOff], pattTPasStart, sizeof(pattTPasStart)) == 0) { - rel = LH_SIGNED(&prog.Image[startOff+1]); /* Get the jump offset */ + rel = LH_SIGNED(&prog.image()[startOff+1]); /* Get the jump offset */ prog.offMain = rel+startOff+3; /* Save absolute image offset */ prog.offMain += 0x20; /* These first 32 bytes are setting up */ prog.segMain = prog.initCS; @@ -764,27 +764,27 @@ void STATE::checkStartup() /* Now decide the compiler vendor and version number */ - if (memcmp(&prog.Image[startOff], pattMsC5Start, sizeof(pattMsC5Start)) == 0) + if (memcmp(&prog.image()[startOff], pattMsC5Start, sizeof(pattMsC5Start)) == 0) { /* Yes, this is Microsoft startup code. The DS is sitting right here in the next 2 bytes */ - setState( rDS, LH(&prog.Image[startOff+sizeof(pattMsC5Start)])); + setState( rDS, LH(&prog.image()[startOff+sizeof(pattMsC5Start)])); chVendor = 'm'; /* Microsoft compiler */ chVersion = '5'; /* Version 5 */ printf("MSC 5 detected\n"); } /* The C8 startup pattern is different from C5's */ - else if (memcmp(&prog.Image[startOff], pattMsC8Start, sizeof(pattMsC8Start)) == 0) + else if (memcmp(&prog.image()[startOff], pattMsC8Start, sizeof(pattMsC8Start)) == 0) { - setState( rDS, LH(&prog.Image[startOff+sizeof(pattMsC8Start)])); + setState( rDS, LH(&prog.image()[startOff+sizeof(pattMsC8Start)])); printf("MSC 8 detected\n"); chVendor = 'm'; /* Microsoft compiler */ chVersion = '8'; /* Version 8 */ } /* The C8 .com startup pattern is different again! */ - else if (memcmp(&prog.Image[startOff], pattMsC8ComStart, + else if (memcmp(&prog.image()[startOff], pattMsC8ComStart, sizeof(pattMsC8ComStart)) == 0) { printf("MSC 8 .com detected\n"); @@ -792,27 +792,27 @@ void STATE::checkStartup() chVersion = '8'; /* Version 8 */ } - else if (locatePattern(prog.Image, startOff, startOff+0x30, pattBorl2Start, + else if (locatePattern(prog.image(), startOff, startOff+0x30, pattBorl2Start, sizeof(pattBorl2Start), &i)) { /* Borland startup. DS is at the second uint8_t (offset 1) */ - setState( rDS, LH(&prog.Image[i+1])); + setState( rDS, LH(&prog.image()[i+1])); printf("Borland v2 detected\n"); chVendor = 'b'; /* Borland compiler */ chVersion = '2'; /* Version 2 */ } - else if (locatePattern(prog.Image, startOff, startOff+0x30, pattBorl3Start, + else if (locatePattern(prog.image(), startOff, startOff+0x30, pattBorl3Start, sizeof(pattBorl3Start), &i)) { /* Borland startup. DS is at the second uint8_t (offset 1) */ - setState( rDS, LH(&prog.Image[i+1])); + setState( rDS, LH(&prog.image()[i+1])); printf("Borland v3 detected\n"); chVendor = 'b'; /* Borland compiler */ chVersion = '3'; /* Version 3 */ } - else if (locatePattern(prog.Image, startOff, startOff+0x30, pattLogiStart, + else if (locatePattern(prog.image(), startOff, startOff+0x30, pattLogiStart, sizeof(pattLogiStart), &i)) { /* Logitech modula startup. DS is 0, despite appearances */ diff --git a/src/comwrite.cpp b/src/comwrite.cpp index dae315c..c7f5504 100644 --- a/src/comwrite.cpp +++ b/src/comwrite.cpp @@ -207,9 +207,9 @@ void Function::writeProcComments(std::ostream &ostr) { psym = &this->args[i]; ostr << " * "<name<<" = "; - if (psym->regs->ident.idType == REGISTER) + if (psym->regs->ident.type() == REGISTER) { - id = &this->localId.id_arr[psym->regs->ident.idNode.regiIdx]; + id = &this->localId.id_arr[((RegisterNode *)psym->regs)->regiIdx]; ostr << Machine_X86::regName(id->id.regi); } else /* long register */ diff --git a/src/control.cpp b/src/control.cpp index fa5d6dc..329bfba 100644 --- a/src/control.cpp +++ b/src/control.cpp @@ -529,9 +529,11 @@ bool Function::Case_X_and_Y(BB* pbb, BB* thenBB, BB* elseBB) HLTYPE &hl1(*pbb->back().hlU()); HLTYPE &hl2(*thenBB->back().hlU()); BB* obb = elseBB->edges[ELSE].BBptr; - + Expr * hl2_expr = hl2.getMyExpr(); /* Construct compound DBL_AND expression */ - hl1.expr(BinaryOperator::Create(DBL_AND,hl1.expr(),hl2.expr())); + assert(hl1.expr()); + assert(hl2_expr); + hl1.expr(BinaryOperator::Create(DBL_AND,hl1.expr(),hl2_expr)); /* Replace in-edge to obb from e to pbb */ replaceInEdge(obb,elseBB,pbb); diff --git a/src/dataflow.cpp b/src/dataflow.cpp index 79e1eee..6cb5334 100644 --- a/src/dataflow.cpp +++ b/src/dataflow.cpp @@ -20,12 +20,12 @@ using namespace boost; using namespace boost::adaptors; struct ExpStack { - typedef std::list EXP_STK; + typedef std::list EXP_STK; EXP_STK expStk; /* local expression stack */ void init(); - void push(COND_EXPR *); - COND_EXPR * pop(); + void push(Expr *); + Expr * pop(); int numElem(); boolT empty(); void processExpPush(int &numHlIcodes, iICODE picode) @@ -48,7 +48,7 @@ void ExpStack::init() } /* Pushes the given expression onto the local stack (expStk). */ -void ExpStack::push(COND_EXPR *expr) +void ExpStack::push(Expr *expr) { expStk.push_back(expr); } @@ -57,11 +57,11 @@ void ExpStack::push(COND_EXPR *expr) /* Returns the element on the top of the local expression stack (expStk), * and deallocates the space allocated by this node. * If there are no elements on the stack, returns NULL. */ -COND_EXPR *ExpStack::pop() +Expr *ExpStack::pop() { if(expStk.empty()) return 0; - COND_EXPR *topExp = expStk.back(); + Expr *topExp = expStk.back(); expStk.pop_back(); return topExp; } @@ -91,13 +91,13 @@ size_t STKFRAME::getLocVar(int off) /* Returns a string with the source operand of Icode */ -static COND_EXPR *srcIdent (const LLInst &ll_insn, Function * pProc, iICODE i, ICODE & duIcode, operDu du) +static Expr *srcIdent (const LLInst &ll_insn, Function * pProc, iICODE i, ICODE & duIcode, operDu du) { if (ll_insn.testFlags(I)) /* immediate operand */ { if (ll_insn.testFlags(B)) - return AstIdent::Kte (ll_insn.src().getImm2(), 1); - return AstIdent::Kte (ll_insn.src().getImm2(), 2); + return new Constant(ll_insn.src().getImm2(), 1); + return new Constant(ll_insn.src().getImm2(), 2); } // otherwise return AstIdent::id (ll_insn, SRC, pProc, i, duIcode, du); @@ -105,9 +105,9 @@ static COND_EXPR *srcIdent (const LLInst &ll_insn, Function * pProc, iICODE i, I /* Returns the destination operand */ -static COND_EXPR *dstIdent (const LLInst & ll_insn, Function * pProc, iICODE i, ICODE & duIcode, operDu du) +static Expr *dstIdent (const LLInst & ll_insn, Function * pProc, iICODE i, ICODE & duIcode, operDu du) { - COND_EXPR *n; + Expr *n; n = AstIdent::id (ll_insn, DST, pProc, i, duIcode, du); /** Is it needed? (pIcode->ll()->flg) & NO_SRC_B **/ return (n); @@ -120,8 +120,8 @@ void Function::elimCondCodes () uint8_t use; /* Used flags bit vector */ uint8_t def; /* Defined flags bit vector */ boolT notSup; /* Use/def combination not supported */ - COND_EXPR *rhs; /* Source operand */ - COND_EXPR *lhs; /* Destination operand */ + Expr *rhs; /* Source operand */ + Expr *lhs; /* Destination operand */ BinaryOperator *_expr; /* Boolean expression */ //BB * pBB; /* Pointer to BBs in dfs last ordering */ riICODE useAt; /* Instruction that used flag */ @@ -159,12 +159,12 @@ void Function::elimCondCodes () break; case iOR: - lhs = defAt->hl()->asgn.lhs->clone(); + lhs = defAt->hl()->asgn.lhs()->clone(); useAt->copyDU(*defAt, eUSE, eDEF); if (defAt->ll()->testFlags(B)) - rhs = AstIdent::Kte (0, 1); + rhs = new Constant(0, 1); else - rhs = AstIdent::Kte (0, 2); + rhs = new Constant(0, 2); break; case iTEST: @@ -172,18 +172,18 @@ void Function::elimCondCodes () lhs = dstIdent (*defAt->ll(),this, befDefAt,*useAt, eUSE); lhs = BinaryOperator::And(lhs, rhs); if (defAt->ll()->testFlags(B)) - rhs = AstIdent::Kte (0, 1); + rhs = new Constant(0, 1); else - rhs = AstIdent::Kte (0, 2); + rhs = new Constant(0, 2); break; case iINC: case iDEC: //WARNING: verbatim copy from iOR needs fixing ? - lhs = defAt->hl()->asgn.lhs->clone(); + lhs = defAt->hl()->asgn.lhs()->clone(); useAt->copyDU(*defAt, eUSE, eDEF); if (defAt->ll()->testFlags(B)) - rhs = AstIdent::Kte (0, 1); + rhs = new Constant(0, 1); else - rhs = AstIdent::Kte (0, 2); + rhs = new Constant(0, 2); break; default: notSup = true; @@ -202,9 +202,9 @@ void Function::elimCondCodes () else if (useAtOp == iJCXZ) { - lhs = AstIdent::Reg (rCX, 0, &localId); + lhs = new RegisterNode(rCX, 0, &localId); useAt->setRegDU (rCX, eUSE); - rhs = AstIdent::Kte (0, 2); + rhs = new Constant(0, 2); _expr = BinaryOperator::Create(EQUAL,lhs,rhs); useAt->setJCond(_expr); } @@ -319,7 +319,7 @@ void Function::liveRegAnalysis (LivenessSet &in_liveOut) auto picode = pbb->rbegin(); /* icode of function return */ if (picode->hl()->opcode == HLI_RET) { - picode->hlU()->expr(AstIdent::idID (&retVal, &localId, (++pbb->rbegin()).base())); + picode->hlU()->expr(AstIdent::idID(&retVal, &localId, (++pbb->rbegin()).base())); picode->du.use = in_liveOut; } } @@ -392,13 +392,13 @@ void Function::liveRegAnalysis (LivenessSet &in_liveOut) /* Remove any references to register variables */ if (flg & SI_REGVAR) { - liveIn.set(rSI,0); - pbb->liveIn.set(rSI,0); + liveIn.clrReg(rSI); + pbb->liveIn.clrReg(rSI); } if (flg & DI_REGVAR) { - liveIn.set(rDI,0); - pbb->liveIn.set(rDI,0); + liveIn.clrReg(rDI); + pbb->liveIn.clrReg(rDI); } } @@ -555,23 +555,23 @@ void Function::genDU1 () } /* Substitutes the rhs (or lhs if rhs not possible) of ticode for the rhs of picode. */ -void LOCAL_ID::forwardSubs (COND_EXPR *lhs, COND_EXPR *rhs, iICODE picode, iICODE ticode, int &numHlIcodes) const +void LOCAL_ID::forwardSubs (Expr *lhs, Expr *rhs, iICODE picode, iICODE ticode, int &numHlIcodes) const { bool res; UnaryOperator *lhs_unary; - while(lhs_unary = dynamic_cast(lhs)) + while( (lhs_unary = dynamic_cast(lhs)) ) { if(dynamic_cast(lhs_unary)) break; lhs = lhs_unary->unaryExp; } - AstIdent * lhs_id=dynamic_cast(lhs_unary); - assert(lhs_id); + RegisterNode * lhs_reg=dynamic_cast(lhs_unary); + assert(lhs_reg); if (rhs == NULL) /* In case expression popped is NULL */ return; /* Insert on rhs of ticode, if possible */ - res = COND_EXPR::insertSubTreeReg (ticode->hlU()->asgn.rhs,rhs, id_arr[lhs_id->ident.idNode.regiIdx].id.regi, this); + res = Expr::insertSubTreeReg (ticode->hlU()->asgn.rhs,rhs, id_arr[lhs_reg->regiIdx].id.regi, this); if (res) { picode->invalidate(); @@ -580,7 +580,18 @@ void LOCAL_ID::forwardSubs (COND_EXPR *lhs, COND_EXPR *rhs, iICODE picode, iICOD else { /* Try to insert it on lhs of ticode*/ - res = COND_EXPR::insertSubTreeReg (ticode->hlU()->asgn.lhs,rhs, id_arr[lhs_id->ident.idNode.regiIdx].id.regi, this); + RegisterNode *op = dynamic_cast(ticode->hlU()->asgn.m_lhs); + if(op) + { + eReg inserted = id_arr[lhs_reg->regiIdx].id.regi; + eReg lhsReg = id_arr[op->regiIdx].id.regi; + if((lhsReg==inserted)||Machine_X86::isSubRegisterOf(lhsReg,inserted)) + { + // Do not replace ax = XYZ; given ax = H << P; with H << P = + return; + } + } + res = Expr::insertSubTreeReg (ticode->hlU()->asgn.m_lhs,rhs, id_arr[lhs_reg->regiIdx].id.regi, this); if (res) { picode->invalidate(); @@ -591,7 +602,7 @@ void LOCAL_ID::forwardSubs (COND_EXPR *lhs, COND_EXPR *rhs, iICODE picode, iICOD /* Substitutes the rhs (or lhs if rhs not possible) of ticode for the expression exp given */ -static void forwardSubsLong (int longIdx, COND_EXPR *_exp, iICODE picode, iICODE ticode, int *numHlIcodes) +static void forwardSubsLong (int longIdx, Expr *_exp, iICODE picode, iICODE ticode, int *numHlIcodes) { bool res; @@ -599,7 +610,7 @@ static void forwardSubsLong (int longIdx, COND_EXPR *_exp, iICODE picode, iICODE return; /* Insert on rhs of ticode, if possible */ - res = COND_EXPR::insertSubTreeLongReg (_exp, ticode->hlU()->asgn.rhs, longIdx); + res = Expr::insertSubTreeLongReg (_exp, ticode->hlU()->asgn.rhs, longIdx); if (res) { picode->invalidate(); @@ -608,7 +619,7 @@ static void forwardSubsLong (int longIdx, COND_EXPR *_exp, iICODE picode, iICODE else { /* Try to insert it on lhs of ticode*/ - res = COND_EXPR::insertSubTreeLongReg (_exp, ticode->hlU()->asgn.lhs, longIdx); + res = Expr::insertSubTreeLongReg (_exp, ticode->hlU()->asgn.m_lhs, longIdx); if (res) { picode->invalidate(); @@ -639,19 +650,9 @@ bool BinaryOperator::xClear(rICODE range_to_check, iICODE lastBBinst, const LOCA } bool AstIdent::xClear(rICODE range_to_check, iICODE lastBBinst, const LOCAL_ID &locId) { - iICODE i; - uint8_t regi; if (ident.idType == REGISTER) { - regi= locId.id_arr[ident.idNode.regiIdx].id.regi; - range_to_check.advance_begin(1); - auto all_valid_and_high_level_after_start = range_to_check | filtered(ICODE::select_valid_high_level); - for (ICODE &i : all_valid_and_high_level_after_start) - if ((i.du.def & duReg[regi]).any()) - return false; - if (all_valid_and_high_level_after_start.end().base() != lastBBinst) - return true; - return false; + assert(false); } else return true; @@ -663,7 +664,7 @@ bool AstIdent::xClear(rICODE range_to_check, iICODE lastBBinst, const LOCAL_ID & /// @returns the type size of the stored Arg static int processCArg (Function * pp, Function * pProc, ICODE * picode, size_t numArgs) { - COND_EXPR *_exp; + Expr *_exp; bool res; /* if (numArgs == 0) @@ -677,10 +678,10 @@ static int processCArg (Function * pp, Function * pProc, ICODE * picode, size_t if (pp->flg & PROC_VARARG) { if (numArgs < pp->args.size()) - adjustActArgType (_exp, pp->args[numArgs].type, pProc); + _exp = pProc->adjustActArgType (_exp, pp->args[numArgs].type); } else - adjustActArgType (_exp, pp->args[numArgs].type, pProc); + _exp = pProc->adjustActArgType (_exp, pp->args[numArgs].type); } } else /* user function */ @@ -714,14 +715,14 @@ void LOCAL_ID::processTargetIcode(iICODE picode, int &numHlIcodes, iICODE ticode boolT res; HLTYPE &p_hl(*picode->hlU()); HLTYPE &t_hl(*ticode->hlU()); - AstIdent *lhs_ident = dynamic_cast(p_hl.asgn.lhs); + AstIdent *lhs_ident = dynamic_cast(p_hl.asgn.lhs()); switch (t_hl.opcode) { case HLI_ASSIGN: + assert(lhs_ident); if(isLong) { - assert(lhs_ident); forwardSubsLong (lhs_ident->ident.idNode.longIdx, p_hl.asgn.rhs, picode,ticode, &numHlIcodes); @@ -734,18 +735,19 @@ void LOCAL_ID::processTargetIcode(iICODE picode, int &numHlIcodes, iICODE ticode if(isLong) { assert(lhs_ident); - res = COND_EXPR::insertSubTreeLongReg ( + res = Expr::insertSubTreeLongReg ( p_hl.asgn.rhs, t_hl.exp.v, lhs_ident->ident.idNode.longIdx); } else { - assert(lhs_ident); - res = COND_EXPR::insertSubTreeReg ( + RegisterNode *lhs_reg = dynamic_cast(p_hl.asgn.lhs()); + assert(lhs_reg); + res = Expr::insertSubTreeReg ( t_hl.exp.v, p_hl.asgn.rhs, - id_arr[lhs_ident->ident.idNode.regiIdx].id.regi, + id_arr[lhs_reg->regiIdx].id.regi, this); } if (res) @@ -766,7 +768,7 @@ void LOCAL_ID::processTargetIcode(iICODE picode, int &numHlIcodes, iICODE ticode } } -void Function::processHliCall(COND_EXPR *_exp, iICODE picode) +void Function::processHliCall(Expr *_exp, iICODE picode) { Function * pp; int cb, numArgs; @@ -784,7 +786,7 @@ void Function::processHliCall(COND_EXPR *_exp, iICODE picode) if (pp->flg & PROC_ISLIB) /* library function */ { if (pp->args.numArgs > 0) - adjustActArgType(_exp, pp->args[numArgs].type, this); + _exp = adjustActArgType(_exp, pp->args[numArgs].type); res = picode->newStkArg (_exp, (llIcode)picode->ll()->getOpcode(), this); } else /* user function */ @@ -834,8 +836,8 @@ void BB::findBBExps(LOCAL_ID &locals,Function *fnc) bool res; ID *_retVal; // function return value - COND_EXPR *_exp, // expression pointer - for HLI_POP and HLI_CALL */ - *lhs; // exp ptr for return value of a HLI_CALL */ + Expr *_exp; // expression pointer - for HLI_POP and HLI_CALL */ + //Expr *lhs; // exp ptr for return value of a HLI_CALL */ iICODE ticode; // Target icode */ HLTYPE *ti_hl=0; uint8_t regi; @@ -897,11 +899,11 @@ void BB::findBBExps(LOCAL_ID &locals,Function *fnc) case HLI_JCOND: case HLI_PUSH: case HLI_RET: { - AstIdent *v = dynamic_cast(_icHl.expr()); + RegisterNode *v = dynamic_cast(_icHl.expr()); assert(v); - res = COND_EXPR::insertSubTreeReg (ti_hl->exp.v, + res = Expr::insertSubTreeReg (ti_hl->exp.v, _exp, - locals.id_arr[v->ident.idNode.regiIdx].id.regi, + locals.id_arr[v->regiIdx].id.regi, &locals); if (res) { @@ -930,24 +932,24 @@ void BB::findBBExps(LOCAL_ID &locals,Function *fnc) { case HLI_ASSIGN: assert(ti_hl->asgn.rhs); - _exp = _icHl.call.toId(); - res = COND_EXPR::insertSubTreeReg (ti_hl->asgn.rhs,_exp, _retVal->id.regi, &locals); + _exp = _icHl.call.toAst(); + res = Expr::insertSubTreeReg (ti_hl->asgn.rhs,_exp, _retVal->id.regi, &locals); if (! res) - COND_EXPR::insertSubTreeReg (ti_hl->asgn.lhs, _exp,_retVal->id.regi, &locals); + Expr::insertSubTreeReg (ti_hl->asgn.m_lhs, _exp,_retVal->id.regi, &locals); //TODO: HERE missing: 2 regs picode->invalidate(); numHlIcodes--; break; case HLI_PUSH: case HLI_RET: - ti_hl->expr( _icHl.call.toId() ); + ti_hl->expr( _icHl.call.toAst() ); picode->invalidate(); numHlIcodes--; break; case HLI_JCOND: - _exp = _icHl.call.toId(); - res = COND_EXPR::insertSubTreeReg (ti_hl->exp.v, _exp, _retVal->id.regi, &locals); + _exp = _icHl.call.toAst(); + res = Expr::insertSubTreeReg (ti_hl->exp.v, _exp, _retVal->id.regi, &locals); if (res) /* was substituted */ { picode->invalidate(); @@ -1008,9 +1010,9 @@ void BB::findBBExps(LOCAL_ID &locals,Function *fnc) _exp, picode.base(), ticode, &numHlIcodes); break; case HLI_JCOND: case HLI_PUSH: - res = COND_EXPR::insertSubTreeLongReg (_exp, + res = Expr::insertSubTreeLongReg (_exp, ticode->hlU()->exp.v, - dynamic_cast(_icHl.asgn.lhs)->ident.idNode.longIdx); + dynamic_cast(_icHl.asgn.lhs())->ident.idNode.longIdx); if (res) { picode->invalidate(); @@ -1030,26 +1032,26 @@ void BB::findBBExps(LOCAL_ID &locals,Function *fnc) switch (ticode->hl()->opcode) { case HLI_ASSIGN: - _exp = _icHl.call.toId(); - ticode->hlU()->asgn.lhs = - AstIdent::idLong(&locals, DST, + _exp = _icHl.call.toAst(); + ticode->hlU()->asgn.lhs( + AstIdent::Long(&locals, DST, ticode,HIGH_FIRST, picode.base(), - eDEF, *(++iICODE(ticode))->ll()); + eDEF, *(++iICODE(ticode))->ll())); ticode->hlU()->asgn.rhs = _exp; picode->invalidate(); numHlIcodes--; break; case HLI_PUSH: case HLI_RET: - ticode->hlU()->expr( _icHl.call.toId() ); + ticode->hlU()->expr( _icHl.call.toAst() ); picode->invalidate(); numHlIcodes--; break; case HLI_JCOND: - _exp = _icHl.call.toId(); + _exp = _icHl.call.toAst(); _retVal = &picode->hl()->call.proc->retVal; - res = COND_EXPR::insertSubTreeLongReg (_exp, + res = Expr::insertSubTreeLongReg (_exp, ticode->hlU()->exp.v, locals.newLongReg ( _retVal->type, _retVal->id.longId.h, _retVal->id.longId.l, picode.base())); @@ -1098,7 +1100,7 @@ void BB::findBBExps(LOCAL_ID &locals,Function *fnc) * assign it to the corresponding registers */ if ( not _icHl.call.proc->isLibrary() and (not picode->du1.used(0)) and (picode->du1.numRegsDef > 0)) { - _exp = AstIdent::idFunc (_icHl.call.proc, _icHl.call.args); + _exp = new FuncNode(_icHl.call.proc, _icHl.call.args); auto lhs = AstIdent::idID (&_icHl.call.proc->retVal, &locals, picode.base()); picode->setAsgn(lhs, _exp); } @@ -1110,22 +1112,13 @@ void BB::findBBExps(LOCAL_ID &locals,Function *fnc) void Function::findExps() { - //int i, numHlIcodes; - //STKFRAME * args; // pointer to arguments - for HLI_CALL */ - /* Initialize expression stack */ g_exp_stk.init(); - /* Traverse tree in dfsLast order */ - // for (i = 0; i < numBBs; i++) - for(BB *pbb : m_dfsLast) + for(BB *pbb : m_dfsLast | filtered(BB::ValidFunctor())) { - /* Process one BB */ - // pbb = m_dfsLast[i]; - if (not pbb->valid()) - continue; + /* Process one valid BB */ pbb->findBBExps( this->localId, this); - } } @@ -1135,6 +1128,16 @@ void Function::preprocessReturnDU(LivenessSet &_liveOut) { // int idx; bool isAx, isBx, isCx, isDx; + eReg bad_regs[] = {rES,rCS,rDS,rSS}; + constexpr char * names[] ={"ES","CS","DS","SS"}; + for(int i=0; i<4; ++i) + if(_liveOut.testReg(bad_regs[i])) + { + fprintf(stderr,"LivenessSet probably screwed up, %s register as an liveOut in preprocessReturnDU\n",names[i]); + _liveOut.clrReg(bad_regs[i]); + if(not _liveOut.any()) + return; + } flg |= PROC_IS_FUNC; isAx = _liveOut.testReg(rAX); isBx = _liveOut.testReg(rBX); @@ -1205,7 +1208,20 @@ void Function::preprocessReturnDU(LivenessSet &_liveOut) else retVal.id.regi = rDL; /*idx = */localId.newByteWordReg(TYPE_BYTE_SIGN,retVal.id.regi); - + } + else if(isAH||isBH||isCH||isDH) + { + retVal.type = TYPE_BYTE_SIGN; + retVal.loc = REG_FRAME; + if (isAH) + retVal.id.regi = rAH; + else if (isBH) + retVal.id.regi = rBH; + else if (isCH) + retVal.id.regi = rCH; + else + retVal.id.regi = rDH; + /*idx = */localId.newByteWordReg(TYPE_BYTE_SIGN,retVal.id.regi); } } } @@ -1217,9 +1233,9 @@ void Function::dataFlow(LivenessSet &_liveOut) /* Remove references to register variables */ if (flg & SI_REGVAR) - _liveOut.set(rSI,0); + _liveOut.clrReg(rSI); if (flg & DI_REGVAR) - _liveOut.set(rDI,0); + _liveOut.clrReg(rDI); /* Function - return value register(s) */ preprocessReturnDU(_liveOut); @@ -1235,4 +1251,3 @@ void Function::dataFlow(LivenessSet &_liveOut) findExps (); /* forward substitution algorithm */ } } - diff --git a/src/dcc.cpp b/src/dcc.cpp index abf014e..e5550a4 100644 --- a/src/dcc.cpp +++ b/src/dcc.cpp @@ -4,9 +4,9 @@ * (C) Cristina Cifuentes ****************************************************************************/ +#include #include "dcc.h" #include "project.h" -#include /* Global variables - extern to other modules */ extern char *asm1_name, *asm2_name; /* Assembler output filenames */ @@ -22,18 +22,102 @@ extern OPTION option; /* Command line options */ static char *initargs(int argc, char *argv[]); static void displayTotalStats(void); #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /**************************************************************************** * main ***************************************************************************/ #include -int main(int argc, char *argv[]) +using namespace llvm; +class TVisitor : public TableGenAction { +public: + virtual bool operator()(raw_ostream &OS, RecordKeeper &Records) + { + Record *rec = Records.getDef("ADD8i8"); + if(rec) + { + if(not rec->getTemplateArgs().empty()) + std::cout << "Has template args\n"; + auto classes(rec->getSuperClasses()); + for(auto val : rec->getSuperClasses()) + std::cout << "Super "<getName()<<"\n"; + +// DagInit * in = rec->getValueAsDag(val.getName()); +// in->dump(); + for(const RecordVal &val : rec->getValues()) + { +// val.dump(); + } + rec->dump(); + + } + // rec = Records.getDef("CCR"); + // if(rec) + // rec->dump(); + for(auto val : Records.getDefs()) + { + //std::cout<< "Def "<createTargetMachine(TheTriple.getTriple(),MCPU,Features,opts); + std::cerr<getInstrInfo()->getName(97)<<"\n"; + const MCInstrDesc &ds(tm->getInstrInfo()->get(97)); + const MCOperandInfo *op1=ds.OpInfo; + uint16_t impl_def = ds.getImplicitDefs()[0]; + std::cerr< [%04X]\n", prog.relocTable[i],LH(prog.Image + prog.relocTable[i])); + printf("%06X -> [%04X]\n", prog.relocTable[i],LH(prog.image() + prog.relocTable[i])); } } printf("\n"); @@ -303,12 +303,12 @@ void DccFrontend::LoadImage(Project &proj) /* Allocate a block of memory for the program. */ prog.cbImage = cb + sizeof(PSP); - prog.Image = new uint8_t [prog.cbImage]; - prog.Image[0] = 0xCD; /* Fill in PSP int 20h location */ - prog.Image[1] = 0x20; /* for termination checking */ + prog.Imagez = new uint8_t [prog.cbImage]; + prog.Imagez[0] = 0xCD; /* Fill in PSP int 20h location */ + prog.Imagez[1] = 0x20; /* for termination checking */ /* Read in the image past where a PSP would go */ - if (cb != (int)fread(prog.Image + sizeof(PSP), 1, (size_t)cb, fp)) + if (cb != (int)fread(prog.Imagez + sizeof(PSP), 1, (size_t)cb, fp)) { fatalError(CANNOT_READ, proj.binary_path().c_str()); } @@ -323,7 +323,7 @@ void DccFrontend::LoadImage(Project &proj) { for (i = 0; i < prog.cReloc; i++) { - uint8_t *p = &prog.Image[prog.relocTable[i]]; + uint8_t *p = &prog.Imagez[prog.relocTable[i]]; uint16_t w = (uint16_t)LH(p) + EXE_RELOCATION; *p++ = (uint8_t)(w & 0x00FF); *p = (uint8_t)((w & 0xFF00) >> 8); diff --git a/src/hlicode.cpp b/src/hlicode.cpp index 6dfa7a6..af2261f 100644 --- a/src/hlicode.cpp +++ b/src/hlicode.cpp @@ -11,7 +11,6 @@ #include "dcc.h" using namespace std; - /* Masks off bits set by duReg[] */ LivenessSet maskDuReg[] = { 0x00, /* uint16_t regs */ @@ -38,7 +37,7 @@ static char buf[lineSize]; /* Line buffer for hl icode output */ /* Places the new HLI_ASSIGN high-level operand in the high-level icode array */ -void HLTYPE::setAsgn(COND_EXPR *lhs, COND_EXPR *rhs) +void HLTYPE::setAsgn(Expr *lhs, Expr *rhs) { assert(lhs); set(lhs,rhs); @@ -68,7 +67,7 @@ void ICODE::newCallHl() /* Places the new HLI_POP/HLI_PUSH/HLI_RET high-level operand in the high-level icode * array */ -void ICODE::setUnary(hlIcode op, COND_EXPR *_exp) +void ICODE::setUnary(hlIcode op, Expr *_exp) { type = HIGH_LEVEL; hlU()->set(op,_exp); @@ -76,7 +75,7 @@ void ICODE::setUnary(hlIcode op, COND_EXPR *_exp) /* Places the new HLI_JCOND high-level operand in the high-level icode array */ -void ICODE::setJCond(COND_EXPR *cexp) +void ICODE::setJCond(Expr *cexp) { type = HIGH_LEVEL; hlU()->set(HLI_JCOND,cexp); @@ -293,8 +292,8 @@ void Function::highLevelGen() { size_t numIcode; /* number of icode instructions */ iICODE pIcode; /* ptr to current icode node */ - COND_EXPR *lhs; - COND_EXPR *rhs; /* left- and right-hand side of expression */ + Expr *lhs; + Expr *rhs; /* left- and right-hand side of expression */ uint32_t _flg; /* icode flags */ numIcode = Icode.size(); for (iICODE i = Icode.begin(); i!=Icode.end() ; ++i) @@ -333,8 +332,7 @@ void Function::highLevelGen() break; case iDEC: - rhs = AstIdent::Kte (1, 2); - rhs = new BinaryOperator(SUB,lhs, rhs); + rhs = new BinaryOperator(SUB,lhs, new Constant(1, 2)); pIcode->setAsgn(lhs, rhs); break; @@ -343,12 +341,12 @@ void Function::highLevelGen() rhs = new BinaryOperator(DIV,lhs, rhs); if ( ll->testFlags(B) ) { - lhs = AstIdent::Reg (rAL, 0, &localId); + lhs = new RegisterNode(rAL, 0, &localId); pIcode->setRegDU( rAL, eDEF); } else { - lhs = AstIdent::Reg (rAX, 0, &localId); + lhs = new RegisterNode(rAX, 0, &localId); pIcode->setRegDU( rAX, eDEF); } pIcode->setAsgn(lhs, rhs); @@ -361,8 +359,7 @@ void Function::highLevelGen() break; case iINC: - rhs = AstIdent::Kte (1, 2); - rhs = new BinaryOperator(ADD,lhs, rhs); + rhs = new BinaryOperator(ADD,lhs, new Constant(1, 2)); pIcode->setAsgn(lhs, rhs); break; @@ -373,16 +370,15 @@ void Function::highLevelGen() case iMOD: rhs = new BinaryOperator(MOD,lhs, rhs); + eReg lhs_reg; + if ( ll->testFlags(B) ) - { - lhs = AstIdent::Reg (rAH, 0, &localId); - pIcode->setRegDU( rAH, eDEF); - } + lhs_reg = rAH; else - { - lhs = AstIdent::Reg (rDX, 0, &localId); - pIcode->setRegDU( rDX, eDEF); - } + lhs_reg = rDX; + + lhs = new RegisterNode(lhs_reg, 0, &localId); + pIcode->setRegDU( lhs_reg, eDEF); pIcode->setAsgn(lhs, rhs); break; @@ -462,7 +458,7 @@ void Function::highLevelGen() /* Returns the string that represents the procedure call of tproc (ie. with * actual parameters) */ -std::string writeCall (Function * tproc, STKFRAME & args, Function * pproc, int *numLoc) +std::string Function::writeCall (Function * tproc, STKFRAME & args, int *numLoc) { //string condExp; ostringstream ostr; @@ -470,7 +466,7 @@ std::string writeCall (Function * tproc, STKFRAME & args, Function * pproc, int for(const STKSYM &sym : args) { if(sym.actual) - ostr << sym.actual->walkCondExpr (pproc, numLoc); + ostr << sym.actual->walkCondExpr (this, numLoc); else ostr << ""; if((&sym)!=&(args.back())) @@ -484,11 +480,15 @@ std::string writeCall (Function * tproc, STKFRAME & args, Function * pproc, int /* Displays the output of a HLI_JCOND icode. */ char *writeJcond (const HLTYPE &h, Function * pProc, int *numLoc) { - assert(h.expr()); memset (buf, ' ', sizeof(buf)); buf[0] = '\0'; strcat (buf, "if "); - COND_EXPR *inverted=h.expr()->inverse(); + if(h.opcode==HLI_INVALID) + { + return "if (*HLI_INVALID*) {\n"; + } + assert(h.expr()); + Expr *inverted=h.expr()->inverse(); //inverseCondOp (&h.exp); std::string e = inverted->walkCondExpr (pProc, numLoc); delete inverted; @@ -506,7 +506,12 @@ char *writeJcondInv (HLTYPE h, Function * pProc, int *numLoc) memset (buf, ' ', sizeof(buf)); buf[0] = '\0'; strcat (buf, "if "); - std::string e = h.expr()->walkCondExpr (pProc, numLoc); + std::string e; + if(h.expr()==nullptr) + e = "( *failed condition recovery* )"; + else + e = h.expr()->walkCondExpr (pProc, numLoc); + strcat (buf, e.c_str()); strcat (buf, " {\n"); return (buf); @@ -515,7 +520,7 @@ char *writeJcondInv (HLTYPE h, Function * pProc, int *numLoc) string AssignType::writeOut(Function *pProc, int *numLoc) const { ostringstream ostr; - ostr << lhs->walkCondExpr (pProc, numLoc); + ostr << m_lhs->walkCondExpr (pProc, numLoc); ostr << " = "; ostr << rhs->walkCondExpr (pProc, numLoc); ostr << ";\n"; @@ -524,7 +529,7 @@ string AssignType::writeOut(Function *pProc, int *numLoc) const string CallType::writeOut(Function *pProc, int *numLoc) const { ostringstream ostr; - ostr << writeCall (proc, *args, pProc,numLoc); + ostr << pProc->writeCall (proc, *args, numLoc); ostr << ";\n"; return ostr.str(); } @@ -535,13 +540,14 @@ string ExpType::writeOut(Function *pProc, int *numLoc) const return v->walkCondExpr (pProc, numLoc); } -void HLTYPE::set(COND_EXPR *l, COND_EXPR *r) +void HLTYPE::set(Expr *l, Expr *r) { assert(l); assert(r); opcode = HLI_ASSIGN; - assert((asgn.lhs==0) and (asgn.rhs==0)); //prevent memory leaks - asgn.lhs=l; + //assert((asgn.lhs==0) and (asgn.rhs==0)); //prevent memory leaks + assert(dynamic_cast(l)); + asgn.m_lhs=l; asgn.rhs=r; } /* Returns a string with the contents of the current high-level icode. diff --git a/src/hltype.cpp b/src/hltype.cpp index cfe1d18..9bd5f67 100644 --- a/src/hltype.cpp +++ b/src/hltype.cpp @@ -1,7 +1,7 @@ #include "icode.h" #include "ast.h" -void HLTYPE::replaceExpr(COND_EXPR *e) +void HLTYPE::replaceExpr(Expr *e) { assert(e); delete exp.v; diff --git a/src/icode.cpp b/src/icode.cpp index 0dde24f..d8cfffc 100644 --- a/src/icode.cpp +++ b/src/icode.cpp @@ -103,6 +103,12 @@ void HLTYPE::setCall(Function *proc) } bool AssignType::removeRegFromLong(eReg regi, LOCAL_ID *locId) { - lhs->performLongRemoval(regi,locId); + m_lhs=lhs()->performLongRemoval(regi,locId); return true; } +void AssignType::lhs(Expr *l) +{ + assert(dynamic_cast(l)); + m_lhs=l; +} + diff --git a/src/idioms/arith_idioms.cpp b/src/idioms/arith_idioms.cpp index cb3a203..d604333 100644 --- a/src/idioms/arith_idioms.cpp +++ b/src/idioms/arith_idioms.cpp @@ -26,9 +26,9 @@ bool Idiom5::match(iICODE pIcode) int Idiom5::action() { AstIdent *rhs,*lhs; - COND_EXPR *expr; - lhs = AstIdent::idLong (&m_func->localId, DST, m_icodes[0], LOW_FIRST, m_icodes[0], USE_DEF, *m_icodes[1]->ll()); - rhs = AstIdent::idLong (&m_func->localId, SRC, m_icodes[0], LOW_FIRST, m_icodes[0], eUSE, *m_icodes[1]->ll()); + Expr *expr; + lhs = AstIdent::Long (&m_func->localId, DST, m_icodes[0], LOW_FIRST, m_icodes[0], USE_DEF, *m_icodes[1]->ll()); + rhs = AstIdent::Long (&m_func->localId, SRC, m_icodes[0], LOW_FIRST, m_icodes[0], eUSE, *m_icodes[1]->ll()); expr = new BinaryOperator(ADD,lhs, rhs); m_icodes[0]->setAsgn(lhs, expr); m_icodes[1]->invalidate(); @@ -61,9 +61,9 @@ int Idiom6::action() { AstIdent *rhs,*lhs; - COND_EXPR *expr; - lhs = AstIdent::idLong (&m_func->localId, DST, m_icodes[0], LOW_FIRST, m_icodes[0], USE_DEF, *m_icodes[1]->ll()); - rhs = AstIdent::idLong (&m_func->localId, SRC, m_icodes[0], LOW_FIRST, m_icodes[0], eUSE, *m_icodes[1]->ll()); + Expr *expr; + lhs = AstIdent::Long (&m_func->localId, DST, m_icodes[0], LOW_FIRST, m_icodes[0], USE_DEF, *m_icodes[1]->ll()); + rhs = AstIdent::Long (&m_func->localId, SRC, m_icodes[0], LOW_FIRST, m_icodes[0], eUSE, *m_icodes[1]->ll()); expr = new BinaryOperator(SUB,lhs, rhs); m_icodes[0]->setAsgn(lhs, expr); m_icodes[1]->invalidate(); @@ -102,7 +102,13 @@ bool Idiom18::match(iICODE picode) m_is_dec = m_icodes[1]->ll()->match(iDEC); uint8_t regi; /* register of the MOV */ - + if(not (m_icodes[0]->ll()->match(iMOV) and m_icodes[0]->ll()->dst.isReg() )) + return false; + regi = m_icodes[0]->ll()->dst.regi; + if( not ( m_icodes[2]->ll()->match(iCMP) && (m_icodes[2]->ll()->dst.regi == regi) && + m_icodes[3]->ll()->conditionalJump() ) ) + return false; + // Simple matching finished, select apropriate matcher based on dst type /* Get variable */ if (m_icodes[1]->ll()->dst.regi == 0) /* global variable */ { @@ -111,10 +117,11 @@ bool Idiom18::match(iICODE picode) } else if ( m_icodes[1]->ll()->dst.isReg() ) /* register */ { - if ((m_icodes[1]->ll()->dst.regi == rSI) && (m_func->flg & SI_REGVAR)) - m_idiom_type = 1; - else if ((m_icodes[1]->ll()->dst.regi == rDI) && (m_func->flg & DI_REGVAR)) - m_idiom_type = 1; + m_idiom_type = 1; +// if ((m_icodes[1]->ll()->dst.regi == rSI) && (m_func->flg & SI_REGVAR)) +// m_idiom_type = 1; +// else if ((m_icodes[1]->ll()->dst.regi == rDI) && (m_func->flg & DI_REGVAR)) +// m_idiom_type = 1; } else if (m_icodes[1]->ll()->dst.off) /* local variable */ m_idiom_type = 2; @@ -134,31 +141,23 @@ bool Idiom18::match(iICODE picode) break; case 1: /* register variable */ /* Check previous instruction for a MOV */ - if (m_icodes[0]->ll()->match(iMOV) && (m_icodes[0]->ll()->src().regi == m_icodes[1]->ll()->dst.regi)) + if ( (m_icodes[0]->ll()->src().regi == m_icodes[1]->ll()->dst.regi)) { - regi = m_icodes[0]->ll()->dst.regi; - if ( m_icodes[0]->ll()->dst.isReg() ) - { - if ( m_icodes[2]->ll()->match(iCMP) && (m_icodes[2]->ll()->dst.regi == regi) && - m_icodes[3]->ll()->conditionalJump() ) - return true; - } + return true; } break; case 2: /* local */ - if (m_icodes[0]->ll()->match(iMOV) && (m_icodes[0]->ll()->src().off == m_icodes[1]->ll()->dst.off)) + if ((m_icodes[0]->ll()->src().off == m_icodes[1]->ll()->dst.off)) { - regi = m_icodes[0]->ll()->dst.regi; - if ( m_icodes[0]->ll()->dst.isReg() ) - { - if ( m_icodes[2]->ll()->match(iCMP) && (m_icodes[2]->ll()->dst.regi == regi) && - m_icodes[3]->ll()->conditionalJump() ) - return true; - } + return true; } break; case 3: // indexed - printf("Unsupported idiom18 type: indexed"); + printf("Untested idiom18 type: indexed\n"); + if ((m_icodes[0]->ll()->src() == m_icodes[1]->ll()->dst)) + { + return true; + } break; } return false; @@ -166,8 +165,8 @@ bool Idiom18::match(iICODE picode) int Idiom18::action() // action length { - COND_EXPR *rhs,*lhs;/* Pointers to left and right hand side exps */ - COND_EXPR *expr; + Expr *rhs,*lhs;/* Pointers to left and right hand side exps */ + Expr *expr; lhs = AstIdent::id (*m_icodes[0]->ll(), SRC, m_func, m_icodes[1], *m_icodes[1], eUSE); lhs = UnaryOperator::Create(m_is_dec ? POST_DEC : POST_INC, lhs); rhs = AstIdent::id (*m_icodes[2]->ll(), SRC, m_func, m_icodes[1], *m_icodes[3], eUSE); @@ -195,38 +194,40 @@ bool Idiom19::match(iICODE picode) if(std::distance(picode,m_end)<2) return false; ICODE &ic(*picode); - + int type; for(int i=0; i<2; ++i) m_icodes[i] =picode++; m_is_dec = m_icodes[0]->ll()->match(iDEC); + if ( not m_icodes[1]->ll()->conditionalJump() ) + return false; if (m_icodes[0]->ll()->dst.regi == 0) /* global variable */ /* not supported yet */ ; else if ( m_icodes[0]->ll()->dst.isReg() ) /* register */ { // if (((picode->ll()->dst.regi == rSI) && (pproc->flg & SI_REGVAR)) || // ((picode->ll()->dst.regi == rDI) && (pproc->flg & DI_REGVAR))) - if (m_icodes[1]->ll()->conditionalJump()) - return true; + return true; } else if (m_icodes[0]->ll()->dst.off) /* stack variable */ { - if ( m_icodes[1]->ll()->conditionalJump() ) - return true; + return true; } else /* indexed */ { + fprintf(stderr,"idiom19 : Untested type [indexed]\n"); + return true; + /* not supported yet */ } return false; } int Idiom19::action() { - COND_EXPR *lhs,*rhs,*expr; - ICODE &ic1(*m_icodes[1]); + Expr *lhs,*expr; + lhs = AstIdent::id (*m_icodes[0]->ll(), DST, m_func, m_icodes[0], *m_icodes[1], eUSE); lhs = UnaryOperator::Create(m_is_dec ? PRE_DEC : PRE_INC, lhs); - rhs = AstIdent::Kte (0, 2); - expr = new BinaryOperator(condOpJCond[m_icodes[1]->ll()->getOpcode() - iJB],lhs, rhs); + expr = new BinaryOperator(condOpJCond[m_icodes[1]->ll()->getOpcode() - iJB],lhs, new Constant(0, 2)); m_icodes[1]->setJCond(expr); m_icodes[0]->invalidate(); return 2; @@ -255,6 +256,9 @@ bool Idiom20::match(iICODE picode) return false; for(int i=0; i<4; ++i) m_icodes[i] =picode++; + /* Check second instruction for a MOV */ + if(not (m_icodes[1]->ll()->match(iMOV) && m_icodes[1]->ll()->dst.isReg())) + return false; m_is_dec = m_icodes[0]->ll()->match(iDEC) ? PRE_DEC : PRE_INC; @@ -266,53 +270,52 @@ bool Idiom20::match(iICODE picode) } else if ( ll_dest.isReg() ) /* register */ { - if ((ll_dest.regi == rSI) && (m_func->flg & SI_REGVAR)) - type = 1; - else if ((ll_dest.regi == rDI) && (m_func->flg & DI_REGVAR)) - type = 1; + type = 1; +// if ((ll_dest.regi == rSI) && (m_func->flg & SI_REGVAR)) +// type = 1; +// else if ((ll_dest.regi == rDI) && (m_func->flg & DI_REGVAR)) +// type = 1; } else if (ll_dest.off) /* local variable */ type = 2; else /* indexed */ { - printf("idiom20 : Unsupported type [indexed]\n"); + printf("idiom20 : Untested type [indexed]\n"); + type = 3; /* not supported yet */ ; } - - /* Check previous instruction for a MOV */ - if (type == 1) /* register variable */ + regi = m_icodes[1]->ll()->dst.regi; + const LLOperand &mov_src(m_icodes[1]->ll()->src()); + if (m_icodes[2]->ll()->match(iCMP,(eReg)regi) && m_icodes[3]->ll()->conditionalJump()) { - if (m_icodes[1]->ll()->match(iMOV) && - (m_icodes[1]->ll()->src().regi == ll_dest.regi)) + switch(type) { - regi = m_icodes[1]->ll()->dst.regi; - if ( m_icodes[1]->ll()->dst.isReg() ) - { - if (m_icodes[2]->ll()->match(iCMP,(eReg)regi) && - m_icodes[3]->ll()->conditionalJump()) + case 1: /* register variable */ + if ((mov_src.regi == ll_dest.regi)) + { return true; - } - } - } - else if (type == 2) /* local */ - { - if ( m_icodes[0]->ll()->match(iMOV) && - (m_icodes[1]->ll()->src().off == ll_dest.off)) - { - regi = m_icodes[1]->ll()->dst.regi; - if ( m_icodes[1]->ll()->dst.isReg() ) - { - if (m_icodes[2]->ll()->match(iCMP,(eReg)regi) && - m_icodes[3]->ll()->conditionalJump()) + } + break; + case 2: // local + if ((mov_src.off == ll_dest.off)) + { return true; - } + } + break; + case 3: + fprintf(stderr,"Test 3 "); + if ((mov_src == ll_dest)) + { + return true; + } + break; } } return false; } int Idiom20::action() { - COND_EXPR *lhs,*rhs,*expr; + Expr *lhs,*rhs,*expr; lhs = AstIdent::id (*m_icodes[1]->ll(), SRC, m_func, m_icodes[0], *m_icodes[0], eUSE); lhs = UnaryOperator::Create(m_is_dec, lhs); rhs = AstIdent::id (*m_icodes[2]->ll(), SRC, m_func, m_icodes[0], *m_icodes[3], eUSE); diff --git a/src/idioms/mov_idioms.cpp b/src/idioms/mov_idioms.cpp index 592c5e6..e5d16d2 100644 --- a/src/idioms/mov_idioms.cpp +++ b/src/idioms/mov_idioms.cpp @@ -50,7 +50,7 @@ int Idiom14::action() { int idx; AstIdent *lhs; - COND_EXPR *rhs; + Expr *rhs; idx = m_func->localId.newLongReg (TYPE_LONG_SIGN, m_regH, m_regL, m_icodes[0]); lhs = AstIdent::LongIdx (idx); @@ -101,8 +101,8 @@ bool Idiom13::match(iICODE pIcode) int Idiom13::action() { AstIdent *lhs; - COND_EXPR *rhs; - lhs = AstIdent::Reg (m_loaded_reg, 0, &m_func->localId); + Expr *rhs; + lhs = new RegisterNode(m_loaded_reg, 0, &m_func->localId); m_icodes[0]->setRegDU( m_loaded_reg, eDEF); m_icodes[0]->du1.numRegsDef--; /* prev uint8_t reg def */ rhs = AstIdent::id (*m_icodes[0]->ll(), SRC, m_func, m_icodes[0], *m_icodes[0], NONE); diff --git a/src/idioms/neg_idioms.cpp b/src/idioms/neg_idioms.cpp index 00c8166..d950d34 100644 --- a/src/idioms/neg_idioms.cpp +++ b/src/idioms/neg_idioms.cpp @@ -53,8 +53,8 @@ bool Idiom11::match (iICODE picode) int Idiom11::action() { AstIdent *lhs; - COND_EXPR *rhs; - lhs = AstIdent::idLong (&m_func->localId, DST, m_icodes[0], HIGH_FIRST,m_icodes[0], USE_DEF, *m_icodes[1]->ll()); + Expr *rhs; + lhs = AstIdent::Long (&m_func->localId, DST, m_icodes[0], HIGH_FIRST,m_icodes[0], USE_DEF, *m_icodes[1]->ll()); rhs = UnaryOperator::Create(NEGATION, lhs); m_icodes[0]->setAsgn(lhs, rhs); m_icodes[1]->invalidate(); @@ -96,8 +96,8 @@ bool Idiom16::match (iICODE picode) int Idiom16::action() { AstIdent *lhs; - COND_EXPR *rhs; - lhs = AstIdent::Reg (m_icodes[0]->ll()->dst.regi, m_icodes[0]->ll()->getFlag(),&m_func->localId); + Expr *rhs; + lhs = new RegisterNode(m_icodes[0]->ll()->dst.regi, m_icodes[0]->ll()->getFlag(),&m_func->localId); rhs = UnaryOperator::Create(NEGATION, lhs->clone()); m_icodes[0]->setAsgn(lhs, rhs); m_icodes[1]->invalidate(); diff --git a/src/idioms/shift_idioms.cpp b/src/idioms/shift_idioms.cpp index 9393334..c3047e7 100644 --- a/src/idioms/shift_idioms.cpp +++ b/src/idioms/shift_idioms.cpp @@ -29,7 +29,7 @@ int Idiom8::action() { int idx; AstIdent *lhs; - COND_EXPR *rhs,*expr; + Expr *expr; eReg regH,regL; regH=m_icodes[0]->ll()->dst.regi; regL=m_icodes[1]->ll()->dst.regi; @@ -37,8 +37,7 @@ int Idiom8::action() lhs = AstIdent::LongIdx (idx); m_icodes[0]->setRegDU( regL, USE_DEF); - rhs = AstIdent::Kte(1,2); - expr = new BinaryOperator(SHR,lhs, rhs); + expr = new BinaryOperator(SHR,lhs, new Constant(1, 2)); m_icodes[0]->setAsgn(lhs, expr); m_icodes[1]->invalidate(); return 2; @@ -81,11 +80,11 @@ int Idiom15::action() { AstIdent *lhs; - COND_EXPR *rhs,*_exp; - lhs = AstIdent::Reg (m_icodes[0]->ll()->dst.regi, + Expr *rhs,*_exp; + lhs = new RegisterNode(m_icodes[0]->ll()->dst.regi, m_icodes[0]->ll()->getFlag() & NO_SRC_B, &m_func->localId); - rhs = AstIdent::Kte (m_icodes.size(), 2); + rhs = new Constant(m_icodes.size(), 2); _exp = new BinaryOperator(SHL,lhs, rhs); m_icodes[0]->setAsgn(lhs, _exp); for (size_t i=1; ilocalId.newLongReg (TYPE_LONG_UNSIGN, regH, regL,m_icodes[0]); lhs = AstIdent::LongIdx (idx); m_icodes[0]->setRegDU( regH, USE_DEF); - rhs = AstIdent::Kte (1, 2); - expr = new BinaryOperator(SHL,lhs, rhs); + expr = new BinaryOperator(SHL,lhs, new Constant(1, 2)); m_icodes[0]->setAsgn(lhs, expr); m_icodes[1]->invalidate(); return 2; @@ -161,15 +159,14 @@ int Idiom9::action() { int idx; AstIdent *lhs; - COND_EXPR *rhs,*expr; + Expr *rhs,*expr; eReg regH,regL; regL=m_icodes[1]->ll()->dst.regi; regH=m_icodes[0]->ll()->dst.regi; idx = m_func->localId.newLongReg (TYPE_LONG_UNSIGN,regH,regL,m_icodes[0]); lhs = AstIdent::LongIdx (idx); m_icodes[0]->setRegDU(regL, USE_DEF); - rhs = AstIdent::Kte (1, 2); - expr = new BinaryOperator(SHR,lhs, rhs); + expr = new BinaryOperator(SHR,lhs, new Constant(1, 2)); m_icodes[0]->setAsgn(lhs, expr); m_icodes[1]->invalidate(); return 2; diff --git a/src/idioms/xor_idioms.cpp b/src/idioms/xor_idioms.cpp index ca24a58..673e6c0 100644 --- a/src/idioms/xor_idioms.cpp +++ b/src/idioms/xor_idioms.cpp @@ -39,11 +39,11 @@ bool Idiom21::match (iICODE picode) } int Idiom21::action() { - COND_EXPR *rhs; + Expr *rhs; AstIdent *lhs; - lhs = AstIdent::idLong (&m_func->localId, DST, m_icodes[0],HIGH_FIRST, m_icodes[0], eDEF, *m_icodes[1]->ll()); - rhs = AstIdent::Kte (m_icodes[1]->ll()->src().getImm2() , 4); + lhs = AstIdent::Long (&m_func->localId, DST, m_icodes[0],HIGH_FIRST, m_icodes[0], eDEF, *m_icodes[1]->ll()); + rhs = new Constant(m_icodes[1]->ll()->src().getImm2(), 4); m_icodes[0]->setAsgn(lhs, rhs); m_icodes[0]->du.use = 0; /* clear register used in iXOR */ m_icodes[1]->invalidate(); @@ -84,11 +84,9 @@ bool Idiom7::match(iICODE picode) } int Idiom7::action() { - COND_EXPR *lhs; - COND_EXPR *rhs; + Expr *lhs; lhs = AstIdent::id (*m_icode->ll(), DST, m_func, m_icode, *m_icode, NONE); - rhs = AstIdent::Kte (0, 2); - m_icode->setAsgn(dynamic_cast(lhs), rhs); + m_icode->setAsgn(dynamic_cast(lhs), new Constant(0, 2)); m_icode->du.use = 0; /* clear register used in iXOR */ m_icode->ll()->setFlags(I); return 1; diff --git a/src/locident.cpp b/src/locident.cpp index 5cd272a..c4d5c1e 100644 --- a/src/locident.cpp +++ b/src/locident.cpp @@ -341,13 +341,13 @@ boolT checkLongEq (LONG_STKID_TYPE longId, iICODE pIcode, int i, Function * pPro if ( not pIcode->ll()->testFlags(NO_SRC) ) { - asgn.rhs = AstIdent::idLong (&pProc->localId, SRC, pIcode, HIGH_FIRST, pIcode, eUSE, atOffset); + asgn.rhs = AstIdent::Long (&pProc->localId, SRC, pIcode, HIGH_FIRST, pIcode, eUSE, atOffset); } return true; } else if ((longId.offH == pmHsrc->off) && (longId.offL == pmLsrc->off)) { - asgn.lhs = AstIdent::idLong (&pProc->localId, DST, pIcode, HIGH_FIRST, pIcode,eDEF, atOffset); + asgn.lhs = AstIdent::Long (&pProc->localId, DST, pIcode, HIGH_FIRST, pIcode,eDEF, atOffset); asgn.rhs = AstIdent::LongIdx (i); return true; } @@ -380,13 +380,13 @@ boolT checkLongRegEq (LONGID_TYPE longId, iICODE pIcode, int i, asgn.lhs = AstIdent::LongIdx (i); if ( not pIcode->ll()->testFlags(NO_SRC) ) { - asgn.rhs = AstIdent::idLong (&pProc->localId, SRC, pIcode, HIGH_FIRST, pIcode, eUSE, atOffset); + asgn.rhs = AstIdent::Long (&pProc->localId, SRC, pIcode, HIGH_FIRST, pIcode, eUSE, atOffset); } return true; } else if ((longId.h == pmHsrc->regi) && (longId.l == pmLsrc->regi)) { - asgn.lhs = AstIdent::idLong (&pProc->localId, DST, pIcode, HIGH_FIRST, pIcode, eDEF, atOffset); + asgn.lhs = AstIdent::Long (&pProc->localId, DST, pIcode, HIGH_FIRST, pIcode, eDEF, atOffset); asgn.rhs = AstIdent::LongIdx (i); return true; } diff --git a/src/parser.cpp b/src/parser.cpp index b3f5dd1..b787516 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -81,11 +81,11 @@ void DccFrontend::parse(Project &proj) /* Returns the size of the string pointed by sym and delimited by delim. * Size includes delimiter. */ -int strSize (uint8_t *sym, char delim) +int strSize (const uint8_t *sym, char delim) { PROG &prog(Project::get()->prog); - int till_end = sym-prog.Image; - uint8_t *end_ptr=std::find(sym,sym+(prog.cbImage-(till_end)),delim); + int till_end = sym-prog.image(); + const uint8_t *end_ptr=std::find(sym,sym+(prog.cbImage-(till_end)),delim); return end_ptr-sym+1; } Function *fakeproc=Function::Create(0,0,"fake"); @@ -311,8 +311,8 @@ void Function::FollowCtrl(CALL_GRAPH * pcallGraph, STATE *pstate) operand = ((uint32_t)(uint16_t)pstate->r[rDS]<<4) + (uint32_t)(uint16_t)pstate->r[rDX]; size = prog.fCOM ? - strSize (&prog.Image[operand], '$') : - strSize (&prog.Image[operand], '$'); // + 0x100 + strSize (&prog.image()[operand], '$') : + strSize (&prog.image()[operand], '$'); // + 0x100 global_symbol_table.updateSymType (operand, TypeContainer(TYPE_STR, size)); } } @@ -355,9 +355,9 @@ void Function::FollowCtrl(CALL_GRAPH * pcallGraph, STATE *pstate) if ((psym = lookupAddr(&ll->src(), pstate, 4, eDuVal::USE)) /* && (Icode.ll()->flg & SEG_IMMED) */ ) { - offset = LH(&prog.Image[psym->label]); + offset = LH(&prog.image()[psym->label]); pstate->setState( (ll->getOpcode() == iLDS)? rDS: rES, - LH(&prog.Image[psym->label + 2])); + LH(&prog.image()[psym->label + 2])); pstate->setState( ll->dst.regi, (int16_t)offset); psym->type = TYPE_PTR; } @@ -370,7 +370,7 @@ void Function::FollowCtrl(CALL_GRAPH * pcallGraph, STATE *pstate) if (err == INVALID_386OP || err == INVALID_OPCODE) { - fatalError(err, prog.Image[_Icode.ll()->label], _Icode.ll()->label); + fatalError(err, prog.image()[_Icode.ll()->label], _Icode.ll()->label); this->flg |= PROC_BADINST; } else if (err == IP_OUT_OF_RANGE) @@ -410,7 +410,7 @@ bool Function::followAllTableEntries(JumpTable &table, uint32_t cs, ICODE& pIcod for (size_t i = table.start; i < table.finish; i += 2) { StCopy = *pstate; - StCopy.IP = cs + LH(&prog.Image[i]); + StCopy.IP = cs + LH(&prog.image()[i]); iICODE last_current_insn = (++Icode.rbegin()).base(); FollowCtrl (pcallGraph, &StCopy); @@ -434,7 +434,7 @@ bool Function::process_JMP (ICODE & pIcode, STATE *pstate, CALL_GRAPH * pcallGra if (pIcode.ll()->testFlags(I)) { if (pIcode.ll()->getOpcode() == iJMPF) - pstate->setState( rCS, LH(prog.Image + pIcode.ll()->label + 3)); + pstate->setState( rCS, LH(prog.image() + pIcode.ll()->label + 3)); pstate->IP = pIcode.ll()->src().getImm2(); int64_t i = pIcode.ll()->src().getImm2(); if (i < 0) @@ -483,7 +483,7 @@ bool Function::process_JMP (ICODE & pIcode, STATE *pstate, CALL_GRAPH * pcallGra cs = (uint32_t)(uint16_t)pstate->r[rCS] << 4; for (i = offTable; i < endTable; i += 2) { - target = cs + LH(&prog.Image[i]); + target = cs + LH(&prog.image()[i]); if (target < endTable && target >= offTable) endTable = target; else if (target >= (uint32_t)prog.cbImage) @@ -492,9 +492,9 @@ bool Function::process_JMP (ICODE & pIcode, STATE *pstate, CALL_GRAPH * pcallGra for (i = offTable; i < endTable; i += 2) { - target = cs + LH(&prog.Image[i]); + target = cs + LH(&prog.image()[i]); /* Be wary of 00 00 as code - it's probably data */ - if (! (prog.Image[target] || prog.Image[target+1]) || + if (! (prog.image()[target] || prog.image()[target+1]) || scan(target, _Icode)) endTable = i; } @@ -516,7 +516,7 @@ bool Function::process_JMP (ICODE & pIcode, STATE *pstate, CALL_GRAPH * pcallGra for (i = offTable, k = 0; i < endTable; i += 2) { StCopy = *pstate; - StCopy.IP = cs + LH(&prog.Image[i]); + StCopy.IP = cs + LH(&prog.image()[i]); iICODE last_current_insn = (++Icode.rbegin()).base(); //ip = Icode.size(); @@ -604,9 +604,9 @@ boolT Function::process_CALL (ICODE & pIcode, CALL_GRAPH * pcallGraph, STATE *ps * previous offset into the program image */ uint32_t tgtAddr=0; if (pIcode.ll()->getOpcode() == iCALLF) - tgtAddr= LH(&prog.Image[off]) + ((uint32_t)(LH(&prog.Image[off+2])) << 4); + tgtAddr= LH(&prog.image()[off]) + ((uint32_t)(LH(&prog.image()[off+2])) << 4); else - tgtAddr= LH(&prog.Image[off]) + ((uint32_t)(uint16_t)state.r[rCS] << 4); + tgtAddr= LH(&prog.image()[off]) + ((uint32_t)(uint16_t)state.r[rCS] << 4); pIcode.ll()->replaceSrc(LLOperand::CreateImm2( tgtAddr ) ); pIcode.ll()->setFlags(I); indirect = true; @@ -651,7 +651,7 @@ boolT Function::process_CALL (ICODE & pIcode, CALL_GRAPH * pcallGraph, STATE *ps localState = *pstate; pstate->IP = pIcode.ll()->src().getImm2(); if (pIcode.ll()->getOpcode() == iCALLF) - pstate->setState( rCS, LH(prog.Image + pIcode.ll()->label + 3)); + pstate->setState( rCS, LH(prog.image() + pIcode.ll()->label + 3)); x.state = *pstate; /* Insert new procedure in call graph */ @@ -694,7 +694,7 @@ static void process_MOV(LLInst & ll, STATE * pstate) { psym = lookupAddr(&ll.src(), pstate, 2, eDuVal::USE); if (psym && ((psym->flg & SEG_IMMED) || psym->duVal.val)) - pstate->setState( dstReg, LH(&prog.Image[psym->label])); + pstate->setState( dstReg, LH(&prog.image()[psym->label])); } else if (srcReg < INDEX_BX_SI && pstate->f[srcReg]) /* reg */ { @@ -714,9 +714,13 @@ static void process_MOV(LLInst & ll, STATE * pstate) { if (ll.testFlags(I)) /* immediate */ { - prog.Image[psym->label] = (uint8_t)ll.src().getImm2(); + //prog.image()[psym->label] = (uint8_t)ll.src().getImm2(); + pstate->setMemoryByte(psym->label,(uint8_t)ll.src().getImm2()); if(psym->size>1) - prog.Image[psym->label+1] = (uint8_t)(ll.src().getImm2()>>8); + { + pstate->setMemoryByte(psym->label+1,uint8_t(ll.src().getImm2()>>8)); + //prog.image()[psym->label+1] = (uint8_t)(ll.src().getImm2()>>8); + } psym->duVal.val = 1; } else if (srcReg == 0) /* direct mem offset */ @@ -724,18 +728,26 @@ static void process_MOV(LLInst & ll, STATE * pstate) psym2 = lookupAddr (&ll.src(), pstate, 2, eDuVal::USE); if (psym2 && ((psym->flg & SEG_IMMED) || (psym->duVal.val))) { - prog.Image[psym->label] = (uint8_t)prog.Image[psym2->label]; + //prog.image()[psym->label] = (uint8_t)prog.image()[psym2->label]; + pstate->setMemoryByte(psym->label,(uint8_t)prog.image()[psym2->label]); if(psym->size>1) - prog.Image[psym->label+1] = prog.Image[psym2->label+1];//(uint8_t)(prog.Image[psym2->label+1] >> 8); + { + pstate->setMemoryByte(psym->label+1,(uint8_t)prog.image()[psym2->label+1]); + //prog.image()[psym->label+1] = prog.image()[psym2->label+1];//(uint8_t)(prog.image()[psym2->label+1] >> 8); + } psym->duVal.setFlags(eDuVal::DEF); psym2->duVal.setFlags(eDuVal::USE); } } else if (srcReg < INDEX_BX_SI && pstate->f[srcReg]) /* reg */ { - prog.Image[psym->label] = (uint8_t)pstate->r[srcReg]; + //prog.image()[psym->label] = (uint8_t)pstate->r[srcReg]; + pstate->setMemoryByte(psym->label,(uint8_t)pstate->r[srcReg]); if(psym->size>1) - prog.Image[psym->label+1] = (uint8_t)(pstate->r[srcReg] >> 8); + { + pstate->setMemoryByte(psym->label,(uint8_t)pstate->r[srcReg]>>8); + //prog.image()[psym->label+1] = (uint8_t)(pstate->r[srcReg] >> 8); + } psym->duVal.setFlags(eDuVal::DEF); } } diff --git a/src/procs.cpp b/src/procs.cpp index 86102ef..a02d252 100644 --- a/src/procs.cpp +++ b/src/procs.cpp @@ -94,7 +94,7 @@ void LOCAL_ID::newRegArg(iICODE picode, iICODE ticode) const STKFRAME * call_args_stackframe, *target_stackframe; const ID *id; int tidx; - boolT regExist; + bool regExist=false; condId type; Function * tproc; eReg regL, regH; /* Registers involved in arguments */ @@ -106,16 +106,29 @@ void LOCAL_ID::newRegArg(iICODE picode, iICODE ticode) const /* Get registers and index into target procedure's local list */ call_args_stackframe = ticode->hl()->call.args; target_stackframe = &tproc->args; - lhs = dynamic_cast(picode->hl()->asgn.lhs); + lhs = dynamic_cast(picode->hl()->asgn.lhs()); + RegisterNode *lhs_reg = dynamic_cast(lhs); assert(lhs); - type = lhs->ident.idType; - if (type == REGISTER) + type = lhs->ident.type(); + if (lhs_reg) { - regL = id_arr[lhs->ident.idNode.regiIdx].id.regi; + regL = id_arr[lhs_reg->regiIdx].id.regi; if (regL < rAL) tidx = tproc->localId.newByteWordReg(TYPE_WORD_SIGN, regL); else tidx = tproc->localId.newByteWordReg(TYPE_BYTE_SIGN, regL); + /* Check if register argument already on the formal argument list */ + for(STKSYM &tgt_sym : *target_stackframe) + { + RegisterNode *tgt_sym_regs = dynamic_cast(tgt_sym.regs); + if( tgt_sym_regs == NULL ) // both REGISTER and LONG_VAR require this precondition + continue; + if ( tgt_sym_regs->regiIdx == tidx ) + { + regExist = true; + break; + } + } } else if (type == LONG_VAR) { @@ -123,32 +136,20 @@ void LOCAL_ID::newRegArg(iICODE picode, iICODE ticode) const regL = id_arr[longIdx].id.longId.l; regH = id_arr[longIdx].id.longId.h; tidx = tproc->localId.newLongReg(TYPE_LONG_SIGN, regH, regL, tproc->Icode.begin() /*0*/); - } - - /* Check if register argument already on the formal argument list */ - regExist = false; - for(STKSYM &tgt_sym : *target_stackframe) - { - if( tgt_sym.regs == NULL ) // both REGISTER and LONG_VAR require this precondition - continue; - if (type == REGISTER) - { - if ( tgt_sym.regs->ident.idNode.regiIdx == tidx ) - { - regExist = true; - } - } - else if (type == LONG_VAR) + /* Check if register argument already on the formal argument list */ + for(STKSYM &tgt_sym : *target_stackframe) { + if( tgt_sym.regs == NULL ) // both REGISTER and LONG_VAR require this precondition + continue; if ( tgt_sym.regs->ident.idNode.longIdx == tidx ) { regExist = true; + break; } } - if(regExist == true) - break; } - + else + ;//regExist = false; /* Do ts (formal arguments) */ if (regExist == false) { @@ -161,12 +162,12 @@ void LOCAL_ID::newRegArg(iICODE picode, iICODE ticode) const if (regL < rAL) { newsym.type = TYPE_WORD_SIGN; - newsym.regs = AstIdent::RegIdx(tidx, WORD_REG); + newsym.regs = new RegisterNode(tidx, WORD_REG); } else { newsym.type = TYPE_BYTE_SIGN; - newsym.regs = AstIdent::RegIdx(tidx, BYTE_REG); + newsym.regs = new RegisterNode(tidx, BYTE_REG); } tproc->localId.id_arr[tidx].name = newsym.name; } @@ -189,7 +190,7 @@ void LOCAL_ID::newRegArg(iICODE picode, iICODE ticode) const /* Mask off high and low register(s) in picode */ switch (type) { case REGISTER: - id = &id_arr[lhs->ident.idNode.regiIdx]; + id = &id_arr[lhs_reg->regiIdx]; picode->du.def &= maskDuReg[id->id.regi]; if (id->id.regi < rAL) newsym.type = TYPE_WORD_SIGN; @@ -215,9 +216,9 @@ void LOCAL_ID::newRegArg(iICODE picode, iICODE ticode) const * @return true if it was a near call that made use of a segment register. * false elsewhere */ -bool CallType::newStkArg(COND_EXPR *exp, llIcode opcode, Function * pproc) +bool CallType::newStkArg(Expr *exp, llIcode opcode, Function * pproc) { - AstIdent *expr = dynamic_cast(exp); + RegisterNode *expr = dynamic_cast(exp); uint8_t regi; /* Check for far procedure call, in which case, references to segment @@ -225,16 +226,13 @@ bool CallType::newStkArg(COND_EXPR *exp, llIcode opcode, Function * pproc) * long references to another segment) */ if (expr) { - if (expr->ident.idType == REGISTER) + regi = pproc->localId.id_arr[expr->regiIdx].id.regi; + if ((regi >= rES) && (regi <= rDS)) { - regi = pproc->localId.id_arr[expr->ident.idNode.regiIdx].id.regi; - if ((regi >= rES) && (regi <= rDS)) - { - if (opcode == iCALLF) - return false; - else - return true; - } + if (opcode == iCALLF) + return false; + else + return true; } } @@ -249,22 +247,22 @@ bool CallType::newStkArg(COND_EXPR *exp, llIcode opcode, Function * pproc) /* Places the actual argument exp in the position given by pos in the * argument list of picode. */ -void CallType::placeStkArg (COND_EXPR *exp, int pos) +void CallType::placeStkArg (Expr *exp, int pos) { (*args)[pos].actual = exp; (*args)[pos].setArgName(pos); } -COND_EXPR *CallType::toId() +Expr *CallType::toAst() { - return AstIdent::idFunc( proc, args); + return new FuncNode( proc, args); } /* Checks to determine whether the expression (actual argument) has the * same type as the given type (from the procedure's formal list). If not, * the actual argument gets modified */ -void adjustActArgType (COND_EXPR *_exp, hlType forType, Function * pproc) +Expr *Function::adjustActArgType (Expr *_exp, hlType forType) { AstIdent *expr = dynamic_cast(_exp); PROG &prog(Project::get()->prog); @@ -272,11 +270,11 @@ void adjustActArgType (COND_EXPR *_exp, hlType forType, Function * pproc) int offset, offL; if (expr == NULL) - return; + return _exp; - actType = expr-> expType (pproc); + actType = expr-> expType (this); if (actType == forType) - return; + return _exp; switch (forType) { case TYPE_UNKNOWN: case TYPE_BYTE_SIGN: @@ -292,16 +290,20 @@ void adjustActArgType (COND_EXPR *_exp, hlType forType, Function * pproc) case TYPE_STR: switch (actType) { case TYPE_CONST: - /* It's an offset into image where a string is - * found. Point to the string. */ - offL = expr->ident.idNode.kte.kte; + /* It's an offset into image where a string is found. Point to the string. */ + { + Constant *c=dynamic_cast(expr); + assert(c); + offL = c->kte.kte; if (prog.fCOM) - offset = (pproc->state.r[rDS]<<4) + offL + 0x100; + offset = (state.r[rDS]<<4) + offL + 0x100; else - offset = (pproc->state.r[rDS]<<4) + offL; + offset = (state.r[rDS]<<4) + offL; expr->ident.idNode.strIdx = offset; - expr->ident.idType = STRING; - break; + expr->ident.type(STRING); + delete c; + return AstIdent::String(offset); + } case TYPE_PTR: /* It's a pointer to a char rather than a pointer to @@ -319,6 +321,7 @@ void adjustActArgType (COND_EXPR *_exp, hlType forType, Function * pproc) default: fprintf(stderr,"adjustForArgType unhandled forType %d \n",forType); } + return _exp; } diff --git a/src/proplong.cpp b/src/proplong.cpp index 6f6aa28..c8a48c6 100644 --- a/src/proplong.cpp +++ b/src/proplong.cpp @@ -336,7 +336,7 @@ int Function::findBackwarLongDefs(int loc_ident_idx, const ID &pLocId, iICODE be localId.id_arr[loc_ident_idx].idx.push_back(pIcode);//idx-1//insert icode.setRegDU( pmL->regi, eDEF); asgn.lhs = AstIdent::LongIdx (loc_ident_idx); - asgn.rhs = AstIdent::idLong (&this->localId, SRC, pIcode, HIGH_FIRST, pIcode, eUSE, *next1->ll()); + asgn.rhs = AstIdent::Long (&this->localId, SRC, pIcode, HIGH_FIRST, pIcode, eUSE, *next1->ll()); icode.setAsgn(asgn.lhs, asgn.rhs); next1->invalidate(); forced_finish=true; /* to exit the loop */ @@ -365,7 +365,7 @@ int Function::findBackwarLongDefs(int loc_ident_idx, const ID &pLocId, iICODE be if ((pLocId.id.longId.h == pmH->regi) && (pLocId.id.longId.l == pmL->regi)) { asgn.lhs = AstIdent::LongIdx (loc_ident_idx); - asgn.rhs = AstIdent::idLong (&this->localId, SRC, pIcode, LOW_FIRST, pIcode, eUSE, *next1->ll()); + asgn.rhs = AstIdent::Long (&this->localId, SRC, pIcode, LOW_FIRST, pIcode, eUSE, *next1->ll()); icode.setRegDU( pmH->regi, USE_DEF); condOp toCreate=DUMMY; switch (icode.ll()->getOpcode()) @@ -411,13 +411,13 @@ int Function::findForwardLongUses(int loc_ident_idx, const ID &pLocId, iICODE be const LLOperand &src_op1(pIcode->ll()->src()); const LLOperand &src_op2(next1->ll()->src()); eReg srcReg1=src_op1.getReg2(); - eReg srcReg2=src_op2.getReg2(); - if ((ref_long.h == srcReg1) && (ref_long.l == srcReg2)) + eReg nextReg2=src_op2.getReg2(); + if ((ref_long.h == srcReg1) && (ref_long.l == nextReg2)) { - pIcode->setRegDU( next1->ll()->src().getReg2(), eUSE); + pIcode->setRegDU( nextReg2, eUSE); asgn.rhs = AstIdent::LongIdx (loc_ident_idx); - asgn.lhs = AstIdent::idLong (&this->localId, DST, pIcode,HIGH_FIRST, pIcode, eDEF, *next1->ll()); + asgn.lhs = AstIdent::Long (&this->localId, DST, pIcode,HIGH_FIRST, pIcode, eDEF, *next1->ll()); pIcode->setAsgn(dynamic_cast(asgn.lhs), asgn.rhs); next1->invalidate(); @@ -453,7 +453,7 @@ int Function::findForwardLongUses(int loc_ident_idx, const ID &pLocId, iICODE be { asgn.lhs = AstIdent::LongIdx (loc_ident_idx); pIcode->setRegDU( pmH->regi, USE_DEF); - asgn.rhs = AstIdent::idLong (&this->localId, SRC, pIcode, + asgn.rhs = AstIdent::Long (&this->localId, SRC, pIcode, LOW_FIRST, pIcode, eUSE, *next1->ll()); condOp toCreate=DUMMY; switch (pIcode->ll()->getOpcode()) { @@ -505,7 +505,7 @@ int Function::findForwardLongUses(int loc_ident_idx, const ID &pLocId, iICODE be if (pLocId.id.longId.srcDstRegMatch(pIcode,pIcode)) { asgn.lhs = AstIdent::LongIdx (loc_ident_idx); - asgn.rhs = AstIdent::Kte (0, 4); /* long 0 */ + asgn.rhs = new Constant(0, 4); /* long 0 */ asgn.lhs = new BinaryOperator(condOpJCond[next1->ll()->getOpcode() - iJB],asgn.lhs, asgn.rhs); next1->setJCond(asgn.lhs); next1->copyDU(*pIcode, eUSE, eUSE); diff --git a/src/scanner.cpp b/src/scanner.cpp index 1128a37..99d30b0 100644 --- a/src/scanner.cpp +++ b/src/scanner.cpp @@ -314,14 +314,29 @@ static struct { } ; static uint16_t SegPrefix, RepPrefix; -static uint8_t *pInst; /* Ptr. to current uint8_t of instruction */ +static const uint8_t *pInst; /* Ptr. to current uint8_t of instruction */ static ICODE * pIcode; /* Ptr to Icode record filled in by scan() */ -/***************************************************************************** - Scans one machine instruction at offset ip in prog.Image and returns error. - At the same time, fill in low-level icode details for the scanned inst. - ****************************************************************************/ +static void decodeBranchTgt(x86_insn_t &insn) +{ + x86_op_t *tgt_op = insn.x86_get_branch_target(); + if(tgt_op->type==op_expression) + return; // unhandled for now + if(tgt_op->type==op_register) + return; // unhandled for now + int32_t addr = tgt_op->getAddress(); + if(tgt_op->is_relative()) + { + addr += insn.addr+insn.size; + } + pIcode->ll()->replaceSrc((uint32_t)addr); + pIcode->ll()->setFlags(I); + // PROG &prog(Project::get()->prog); + // long off = (short)getWord(); /* Signed displacement */ + // assert(addr==(uint32_t)(off + (unsigned)(pInst - prog.image()))); + +} static void convertUsedFlags(x86_insn_t &from,ICODE &to) { @@ -345,6 +360,13 @@ static void convertUsedFlags(x86_insn_t &from,ICODE &to) if(from.containsFlag(insn_eflag_direction,from.flags_tested)) to.ll()->flagDU.u |= Df; } +static void convertPrefix(x86_insn_prefix prefix,ICODE &to) +{ + if(prefix ==insn_no_prefix) + return; + // insn_lock - no need to handle + RepPrefix = (uint16_t)prefix & ~insn_lock; +} /**************************************************************************** Checks for int 34 to int 3B - if so, converts to ESC nn instruction ****************************************************************************/ @@ -362,7 +384,7 @@ static void fixFloatEmulation(x86_insn_t &insn) /* This is a Borland/Microsoft floating point emulation instruction. Treat as if it is an ESC opcode */ int actual_valid_bytes=std::min(16U,prog.cbImage-insn.offset); - memcpy(buf,prog.Image+insn.offset,actual_valid_bytes); + memcpy(buf,prog.image()+insn.offset,actual_valid_bytes); X86_Disasm ds(opt_16_bit); x86_insn_t patched_insn; //patch actual instruction into buffer; @@ -378,7 +400,7 @@ int disassembleOneLibDisasm(uint32_t ip,x86_insn_t &l) { PROG &prog(Project::get()->prog); X86_Disasm ds(opt_16_bit); - int cnt=ds.x86_disasm(prog.Image,prog.cbImage,0,ip,&l); + int cnt=ds.x86_disasm(prog.image(),prog.cbImage,0,ip,&l); if(cnt && l.is_valid()) { fixFloatEmulation(l); //can change 'l' @@ -416,6 +438,11 @@ LLOperand convertOperand(const x86_op_t &from) } return LLOperand::CreateImm2(0); } +/***************************************************************************** + Scans one machine instruction at offset ip in prog.Image and returns error. + At the same time, fill in low-level icode details for the scanned inst. + ****************************************************************************/ + eErrorId scan(uint32_t ip, ICODE &p) { PROG &prog(Project::get()->prog); @@ -431,10 +458,12 @@ eErrorId scan(uint32_t ip, ICODE &p) if(cnt) { convertUsedFlags(p.insn,p); + convertPrefix(p.insn.prefix,p); + } SegPrefix = RepPrefix = 0; - pInst = prog.Image + ip; + pInst = prog.image() + ip; pIcode = &p; do @@ -446,12 +475,20 @@ eErrorId scan(uint32_t ip, ICODE &p) (*stateTable[op].state2)(op); /* Third state */ } while (stateTable[op].state1 == prefix); /* Loop if prefix */ + if(p.insn.group == x86_insn_t::insn_controlflow) + { + if(p.insn.x86_get_branch_target()) + decodeBranchTgt(p.insn); + } +// LLOperand conv = convertOperand(*p.insn.get_dest()); +// assert(conv==p.ll()->dst); if (p.ll()->getOpcode()) { /* Save bytes of image used */ - p.ll()->numBytes = (uint8_t)((pInst - prog.Image) - ip); + p.ll()->numBytes = (uint8_t)((pInst - prog.image()) - ip); if(p.insn.is_valid()) assert(p.ll()->numBytes == p.insn.size); + p.ll()->numBytes = p.insn.size; return ((SegPrefix)? FUNNY_SEGOVR: /* Seg. Override invalid */ (RepPrefix ? FUNNY_REP: NO_ERR));/* REP prefix invalid */ } @@ -462,11 +499,11 @@ eErrorId scan(uint32_t ip, ICODE &p) /*************************************************************************** relocItem - returns true if uint16_t pointed at is in relocation table **************************************************************************/ -static bool relocItem(uint8_t *p) +static bool relocItem(const uint8_t *p) { PROG &prog(Project::get()->prog); int i; - uint32_t off = p - prog.Image; + uint32_t off = p - prog.image(); for (i = 0; i < prog.cReloc; i++) if (prog.relocTable[i] == off) @@ -501,13 +538,13 @@ static int signex(uint8_t b) * Note: fdst == true is for the r/m part of the field (dest, unless TO_REG) * fdst == false is for reg part of the field ***************************************************************************/ -static void setAddress(int i, boolT fdst, uint16_t seg, int16_t reg, uint16_t off) +static void setAddress(int i, bool fdst, uint16_t seg, int16_t reg, uint16_t off) { LLOperand *pm; /* If not to register (i.e. to r/m), and talking about r/m, then this is dest */ pm = (!(stateTable[i].flg & TO_REG) == fdst) ? - &pIcode->ll()->dst : &pIcode->ll()->src(); + &pIcode->ll()->dst : &pIcode->ll()->src(); /* Set segment. A later procedure (lookupAddr in proclist.c) will * provide the value of this segment in the field segValue. */ @@ -572,7 +609,7 @@ static void rm(int i) setAddress(i, true, 0, rm + rAX, 0); break; } - + //pIcode->insn.get_dest()-> if ((stateTable[i].flg & NSP) && (pIcode->ll()->src().getReg2()==rSP || pIcode->ll()->dst.getReg2()==rSP)) pIcode->ll()->setFlags(NOT_HLL); @@ -739,7 +776,7 @@ static void arith(int i) uint8_t opcode; static llIcode arithTable[8] = { - iTEST , (llIcode)0, iNOT, iNEG, + iTEST, (llIcode)0, iNOT, iNEG, iMUL , iIMUL, iDIV, iIDIV }; opcode = arithTable[REG(*pInst)]; @@ -810,47 +847,40 @@ static void dispM(int i) { setAddress(i, false, SegPrefix, 0, getWord()); } - - /**************************************************************************** dispN - 2 uint8_t disp as immed relative to ip ****************************************************************************/ static void dispN(int ) { - PROG &prog(Project::get()->prog); - long off = (short)getWord(); /* Signed displacement */ + //PROG &prog(Project::get()->prog); + /*long off = (short)*/getWord(); /* Signed displacement */ /* Note: the result of the subtraction could be between 32k and 64k, and still be positive; it is an offset from prog.Image. So this must be treated as unsigned */ - pIcode->ll()->replaceSrc((uint32_t)(off + (unsigned)(pInst - prog.Image))); - pIcode->ll()->setFlags(I); + // decodeBranchTgt(); } /*************************************************************************** - dispS - 1 uint8_t disp as immed relative to ip + dispS - 1 byte disp as immed relative to ip ***************************************************************************/ static void dispS(int ) { - PROG &prog(Project::get()->prog); - long off = signex(*pInst++); /* Signed displacement */ + /*long off =*/ signex(*pInst++); /* Signed displacement */ - pIcode->ll()->replaceSrc((uint32_t)(off + (unsigned)(pInst - prog.Image))); - pIcode->ll()->setFlags(I); + // decodeBranchTgt(); } /**************************************************************************** - dispF - 4 uint8_t disp as immed 20-bit target address + dispF - 4 byte disp as immed 20-bit target address ***************************************************************************/ static void dispF(int ) { - uint32_t off = (unsigned)getWord(); - uint32_t seg = (unsigned)getWord(); - - pIcode->ll()->replaceSrc(off + ((uint32_t)(unsigned)seg << 4)); - pIcode->ll()->setFlags(I); + /*off = */(unsigned)getWord(); + /*seg = */(unsigned)getWord(); + // decodeBranchTgt(); } @@ -880,12 +910,19 @@ static void strop(int ) { if (RepPrefix) { - // pIcode->ll()->getOpcode() += ((pIcode->ll()->getOpcode() == iCMPS || - // pIcode->ll()->getOpcode() == iSCAS) - // && RepPrefix == iREPE)? 2: 1; - if ((pIcode->ll()->match(iCMPS) || pIcode->ll()->match(iSCAS) ) && RepPrefix == iREPE) - BumpOpcode(*pIcode->ll()); // += 2 - BumpOpcode(*pIcode->ll()); // else += 1 + if ( pIcode->ll()->match(iCMPS) || pIcode->ll()->match(iSCAS) ) + { + if(pIcode->insn.prefix & insn_rep_zero) + { + BumpOpcode(*pIcode->ll()); // iCMPS -> iREPE_CMPS + BumpOpcode(*pIcode->ll()); + } + else if(pIcode->insn.prefix & insn_rep_notzero) + BumpOpcode(*pIcode->ll()); // iX -> iREPNE_X + } + else + if(pIcode->insn.prefix & insn_rep_zero) + BumpOpcode(*pIcode->ll()); // iX -> iREPE_X if (pIcode->ll()->match(iREP_LODS) ) pIcode->ll()->setFlags(NOT_HLL); RepPrefix = 0; diff --git a/tests/prev_base/DHAMP.b b/tests/prev_base/DHAMP.b index 363070c..eef5485 100644 --- a/tests/prev_base/DHAMP.b +++ b/tests/prev_base/DHAMP.b @@ -13,16 +13,15 @@ int proc_2 (long arg0, long arg1) */ { char loc1; /* al */ -int loc2; /* al */ -int loc3; /* bx */ +int loc2; /* bx */ do { arg0 = (arg0 + 1); loc1 = es[bx]; arg1 = (arg1 + 1); es[bx] = loc1; - } while ((loc2 != 0)); - return (loc3); + } while ((loc1 != 0)); + return (loc2); } @@ -35,7 +34,6 @@ int proc_3 (long arg0, long arg1) int loc1; /* ax */ while ((es[bx] == es[bx])) { - if (es[bx] == 0) { loc1 = 0; return (loc1); @@ -57,9 +55,9 @@ int proc_1 (int arg0, int arg1, int arg2, int arg3) { int loc1; /* si */ int loc2; /* di */ + loc1 = 0; loc2 = 0; - while ((loc1 < 0x2328)) { proc_2 (arg1, arg0, 311); proc_2 (arg3, arg2, 328); @@ -79,13 +77,12 @@ int loc1; int loc2; int loc3; int loc4; - loc3 = 0; + loc3 = 0; while ((loc3 < 0x3e8)) { loc1 = 0; loc4 = 0; loc2 = 1; - while ((loc4 < 179)) { loc1 = (loc1 + loc2); loc2 = (loc2 + 2); @@ -105,8 +102,8 @@ int proc_5 (int arg0) { int loc1; /* si */ int loc2; /* ax */ - loc1 = arg0; + loc1 = arg0; if (loc1 > 2) { loc2 = (proc_5 ((loc1 - 1)) + proc_5 ((loc1 + 0xfffe))); } @@ -179,7 +176,7 @@ void proc_8 (int arg0) } - proc_7 (int arg0, int arg1, int arg2, int arg3) +void proc_7 (int arg0, int arg1, int arg2, int arg3) /* Takes 8 bytes of parameters. * High-level language prologue code. * Untranslatable routine. Assembler provided. @@ -245,7 +242,7 @@ void proc_8 (int arg0) } - proc_9 (int arg0) +void proc_9 (int arg0) /* Takes 8 bytes of parameters. * High-level language prologue code. * C calling convention. @@ -256,106 +253,62 @@ void proc_8 (int arg0) int loc1; int loc2; int loc3; /* ax */ + loc2 = 100; loc3 = loc2; loc2 = (loc2 - 1); - while (((loc3 | loc3) != 0)) { loc3 = loc2; loc2 = (loc2 - 1); } /* end of while */ - return (var06278); } int proc_10 () /* Takes no parameters. * High-level language prologue code. - * Untranslatable routine. Assembler provided. - * Return value in register ax. * Contains instructions not normally used by compilers. */ { - PUSH bp - MOV bp, sp - SUB sp, 68h - PUSH si - PUSH di - PUSH ds - MOV ax, 159h - PUSH ax - PUSH ss - LEA ax, [bp-64h] - PUSH ax - PUSH cs - CALL near ptr proc_2 - ADD sp, 8 - PUSH ds - MOV ax, 170h - PUSH ax - PUSH ds - MOV ax, 167h - PUSH ax - CALL far ptr fopen - ADD sp, 8 - MOV [bp-66h], dx - MOV [bp-68h], ax - OR dx, ax - JNE L1 - PUSH ds - MOV ax, 172h - PUSH ax - CALL far ptr printf - POP cx - POP cx - MOV ax, 0FFFFh - PUSH ax - CALL far ptr exit - POP cx +int loc1; +int loc2; +int loc3; +int loc4; +int loc5; +int loc6; /* bx */ +int loc7; /* dx */ +int loc8; /* ax */ + loc6 = proc_2 (&loc1, 345, , ); + fopen ("zyxw.vut", 368); + loc2 = loc7; + loc3 = loc8; - L1: XOR di, 0 + if ((loc7 | loc8) == 0) { + printf ("Cannot open file"); + exit (0xffff); + } +l1: + if (++loc5 >= 0x3e8) { + fclose (loc3, loc2); + return (loc5); + } + else { + loc4 = 0; - L2: INC di - MOV ax, di - CMP ax, 3E8h - JL L3 - PUSH word ptr [bp-66h] - PUSH word ptr [bp-68h] - CALL far ptr fclose - POP cx - POP cx - MOV ax, di - POP di - POP si - MOV sp, bp - POP bp - RETF + while ((ss[bp+si-0x64] != 0)) { - L3: XOR si, 0 - - L4: CMP byte ptr ss:[bp+si-64h], 0 - JNE L5 - - L5: LES bx, dword ptr[bp-68h] - INC word ptr es:[bx] - JGE L6 - MOV al, ss:[bp+si-64h] - LES bx, dword ptr[bp-68h] - INC word ptr es:[bx+0Ch] - LES bx, dword ptres:[bx+0Ch] - DEC bx - MOV es:[bx], al - MOV ah, 0 - - L7: INC si - JMP L4 ;Synthetic inst - - L6: PUSH word ptr [bp-66h] - PUSH word ptr [bp-68h] - PUSH word ptr ss:[bp+si-64h] - CALL far ptr _fputc - ADD sp, 6 - JMP L7 ;Synthetic inst + if (++es[bx] < 0) { + es[bx+0xc] = (es[bx+0xc] + 1); + loc6 = (loc6 - 1); + es[bx] = ss[bp+si-0x64]; + } + else { + _fputc (ss[bp+si-0x64], loc3, loc2); + } + loc4 = (loc4 + 1); + } /* end of while */ + goto L1; + } } @@ -379,11 +332,11 @@ int loc10; int loc11; int loc12; /* ax */ int loc13; /* bx */ - printf ("Start...%c\n\n", 7); + loc11 = 0; + printf ("Start...%c\n\n", 7); while ((loc11 < 6)) { loc12 = loc11; - if (loc12 <= 5) { loc13 = (loc12 << 1); var06278 = proc_1 (&loc2, &loc1, , ); diff --git a/tests/prev_base/LONGOPS.b b/tests/prev_base/LONGOPS.b index d7cf9ea..5c82cb6 100644 --- a/tests/prev_base/LONGOPS.b +++ b/tests/prev_base/LONGOPS.b @@ -15,30 +15,32 @@ long LXMUL@ (long arg0, long arg1) { int loc1; int loc2; /* tmp */ + loc2 = LO(arg0); LO(arg0) = loc1; loc1 = loc2; loc2 = LO(arg0); LO(arg0) = HI(arg0); - if ((LO(arg0) & LO(arg0)) != 0) { + LO(arg0) = (LO(arg0) * LO(arg1)); } loc2 = LO(arg0); LO(arg0) = HI(arg1); HI(arg1) = loc2; - if ((LO(arg0) & LO(arg0)) != 0) { LO(arg0) = (LO(arg0) * loc1); HI(arg1) = (HI(arg1) + LO(arg0)); } loc2 = LO(arg0); - arg0 = (loc1 * LO(arg1)); + LO(arg0) = loc1; + loc1 = loc2; + arg0 = (LO(arg0) * LO(arg1)); HI(arg0) = (HI(arg0) + HI(arg1)); return (arg0); } -long LDIV@ (long arg0, int arg3) +long LDIV@ (long arg0, long arg2) /* Takes 8 bytes of parameters. * Runtime support routine of the compiler. * High-level language prologue code. @@ -131,7 +133,7 @@ long LDIV@ (long arg0, int arg3) } -long LMOD@ (long arg0, int arg3) +long LMOD@ (long arg0, long arg2) /* Takes 8 bytes of parameters. * Runtime support routine of the compiler. * High-level language prologue code. @@ -280,6 +282,7 @@ void main () { long loc1; long loc2; + loc2 = 255; loc1 = 143; loc1 = (loc2 + loc1); diff --git a/tests/prev_base/MATRIXMU.b b/tests/prev_base/MATRIXMU.b index f645886..843b29f 100644 --- a/tests/prev_base/MATRIXMU.b +++ b/tests/prev_base/MATRIXMU.b @@ -15,16 +15,14 @@ void proc_1 (int arg0, int arg1, int arg2) int loc1; int loc2; int loc3; - loc2 = 0; + loc2 = 0; while ((loc2 < 5)) { loc3 = 0; - while ((loc3 < 4)) { loc1 = 0; - while ((loc1 < 4)) { - *((((loc2 * 10) + arg2) + (loc3 << 1))) = ((*(((((loc2 << 3) << 1) + arg0) + (loc1 << 1))) * *((((loc1 * 10) + arg1) + (loc3 << 1)))) + *((((loc2 * 10) + arg2) + (loc3 << 1)))); + *((((loc2 * 10) + arg2) + (loc3 << 1))) = ((*((((loc2 << 3) + arg0) + (loc1 << 1))) * *((((loc1 * 10) + arg1) + (loc3 << 1)))) + *((((loc2 * 10) + arg2) + (loc3 << 1)))); loc1 = (loc1 + 1); } /* end of while */ loc3 = (loc3 + 1); @@ -42,6 +40,7 @@ void main () int loc1; int loc2; int loc3; + proc_1 (&loc3, &loc2, &loc1); }