From ca129c5177e17545a3ba5070ca652b4fc8c2c1bd Mon Sep 17 00:00:00 2001 From: Artur K Date: Sun, 15 Jul 2012 20:17:16 +0200 Subject: [PATCH] Fix to idiom19 and fixFloatEmulation() --- include/icode.h | 2 + include/idioms/arith_idioms.h | 4 +- include/machine_x86.h | 5 ++- regression_tester.rb | 1 + src/backend.cpp | 8 ++-- src/chklib.cpp | 8 +++- src/dataflow.cpp | 42 +++++++++--------- src/dcc.cpp | 7 ++- src/idioms/arith_idioms.cpp | 81 +++++++++++++++++++---------------- src/locident.cpp | 3 +- src/machine_x86.cpp | 25 ++++++++++- src/scanner.cpp | 2 + src/udm.cpp | 3 ++ 13 files changed, 119 insertions(+), 72 deletions(-) diff --git a/include/icode.h b/include/icode.h index dd9bd4f..b0e870d 100644 --- a/include/icode.h +++ b/include/icode.h @@ -65,6 +65,8 @@ struct LivenessSet : public std::bitset<32> { return test(r-rAX); } +private: + void postProcessCompositeRegs(); }; extern LivenessSet duReg[30]; diff --git a/include/idioms/arith_idioms.h b/include/idioms/arith_idioms.h index a48e09b..d7faeb0 100644 --- a/include/idioms/arith_idioms.h +++ b/include/idioms/arith_idioms.h @@ -37,6 +37,8 @@ struct Idiom18 : public Idiom protected: iICODE m_icodes[4]; bool m_is_dec; + /* type of variable: 1 = reg-var, 2 = local */ + int m_idiom_type; public: Idiom18(Function *f) : Idiom(f) { @@ -64,7 +66,7 @@ struct Idiom20 : public Idiom { protected: iICODE m_icodes[4]; - bool m_is_dec; + condNodeType m_is_dec; public: Idiom20(Function *f) : Idiom(f) { diff --git a/include/machine_x86.h b/include/machine_x86.h index cddcc66..8d845b8 100644 --- a/include/machine_x86.h +++ b/include/machine_x86.h @@ -75,8 +75,11 @@ public: } static eReg subRegH(eReg reg); static eReg subRegL(eReg reg); - static bool isMemOff(eReg r); static bool isSubRegisterOf(eReg reg, eReg parent); + static bool hasSubregisters(eReg reg); + + static bool isPartOfComposite(eReg reg); + static eReg compositeParent(eReg reg); }; diff --git a/regression_tester.rb b/regression_tester.rb index 6d530bf..43eeee4 100755 --- a/regression_tester.rb +++ b/regression_tester.rb @@ -16,6 +16,7 @@ def perform_test(exepath,filepath,outname,args) printf("calling:" + "#{exepath} -a1 #{joined_args} -o#{output_path}.a1 #{filepath}\n") result = `#{exepath} -a1 -o#{output_path}.a1 #{filepath}` result = `#{exepath} -a2 #{joined_args} -o#{output_path}.a2 #{filepath}` + result = `#{exepath} #{joined_args} -o#{output_path} #{filepath}` puts result p $? end diff --git a/src/backend.cpp b/src/backend.cpp index 6def061..f468798 100644 --- a/src/backend.cpp +++ b/src/backend.cpp @@ -297,7 +297,7 @@ void Function::codeGen (std::ostream &fs) /* Recursive procedure. Displays the procedure's code in depth-first order * of the call graph. */ -static void backBackEnd (char *filename, CALL_GRAPH * pcallGraph, std::ostream &_ios) +static void backBackEnd (CALL_GRAPH * pcallGraph, std::ostream &_ios) { // IFace.Yield(); /* This is a good place to yield to other apps */ @@ -311,7 +311,7 @@ static void backBackEnd (char *filename, CALL_GRAPH * pcallGraph, std::ostream & /* Dfs if this procedure has any successors */ for (size_t i = 0; i < pcallGraph->outEdges.size(); i++) { - backBackEnd (filename, pcallGraph->outEdges[i], _ios); + backBackEnd (pcallGraph->outEdges[i], _ios); } /* Generate code for this procedure */ @@ -346,14 +346,14 @@ void BackEnd (char *fileName, CALL_GRAPH * pcallGraph) printf ("dcc: Writing C beta file %s\n", outNam.c_str()); /* Header information */ - writeHeader (fs, fileName); + writeHeader (fs, option.filename); /* Initialize total Icode instructions statistics */ stats.totalLL = 0; stats.totalHL = 0; /* Process each procedure at a time */ - backBackEnd (fileName, pcallGraph, fs); + backBackEnd (pcallGraph, fs); /* Close output file */ fs.close(); diff --git a/src/chklib.cpp b/src/chklib.cpp index fc7b94d..420145a 100644 --- a/src/chklib.cpp +++ b/src/chklib.cpp @@ -492,7 +492,7 @@ bool LibCheck(Function & pProc) pProc.flg |= PROC_IS_FUNC; switch (pProc.retVal.type) { case TYPE_LONG_SIGN: case TYPE_LONG_UNSIGN: - pProc.liveOut.setReg(rDX) |= duReg[rAX]; + pProc.liveOut.setReg(rDX).addReg(rAX); break; case TYPE_WORD_SIGN: case TYPE_WORD_UNSIGN: pProc.liveOut.setReg(rAX); @@ -500,8 +500,12 @@ bool LibCheck(Function & pProc) case TYPE_BYTE_SIGN: case TYPE_BYTE_UNSIGN: pProc.liveOut.setReg(rAL); break; + case TYPE_PTR: + fprintf(stderr,"Warning assuming Large memory model\n"); + pProc.liveOut.setReg(rAX).addReg(rDS); + break; default: - fprintf(stderr,"Unknown retval type %d in LibCheck\n",pProc.retVal.type); + fprintf(stderr,"Unknown retval type %d for %s in LibCheck\n",pProc.retVal.type,pProc.name.c_str()); /*** other types are not considered yet ***/ } } diff --git a/src/dataflow.cpp b/src/dataflow.cpp index 9729c83..40af9ea 100644 --- a/src/dataflow.cpp +++ b/src/dataflow.cpp @@ -4,16 +4,18 @@ * Purpose: Data flow analysis module. * (C) Cristina Cifuentes ****************************************************************************/ - -#include "dcc.h" +#include +#include +#include +#include +#include #include #include #include #include -#include -#include -#include -#include + +#include "dcc.h" + using namespace boost; using namespace boost::adaptors; struct ExpStack @@ -232,11 +234,11 @@ void Function::elimCondCodes () /** Generates the LiveUse() and Def() sets for each basic block in the graph. - * Note: these sets are constant and could have been constructed during - * the construction of the graph, but since the code hasn't been - * analyzed yet for idioms, the procedure preamble misleads the - * analysis (eg: push si, would include si in LiveUse; although it - * is not really meant to be a register that is used before defined). */ + \note these sets are constant and could have been constructed during + the construction of the graph, but since the code hasn't been + analyzed yet for idioms, the procedure preamble misleads the + analysis (eg: push si, would include si in LiveUse; although it + is not really meant to be a register that is used before defined). */ void Function::genLiveKtes () { BB * pbb; @@ -291,7 +293,7 @@ void Function::liveRegAnalysis (LivenessSet &in_liveOut) { /* Get current liveIn() and liveOut() sets */ - prevLiveIn = pbb->liveIn; + prevLiveIn = pbb->liveIn; prevLiveOut = pbb->liveOut; /* liveOut(b) = U LiveIn(s); where s is successor(b) @@ -503,22 +505,22 @@ void BB::genDU1() assert(0!=Parent); ICODE::TypeFilter select_high_level; auto all_high_levels = instructions | filtered(select_high_level); - printf("\n"); for (auto picode=all_high_levels.begin(); picode!=all_high_levels.end(); ++picode) { + ICODE &ic = *picode; int defRegIdx = 0; // foreach defined register - for (int k = 0; k < INDEX_BX_SI; k++) + for (int k = rAX; k < INDEX_BX_SI; k++) { - if (not picode->du.def.test(k)) + if (not ic.du.def.testReg(k)) continue; - eReg regi = (eReg)(k + 1); /* Register that was defined */ + eReg regi = (eReg)(k); /* Register that was defined */ picode->du1.regi[defRegIdx] = regi; if(FindUseBeforeDef(regi,defRegIdx, picode.base())) continue; - ProcessUseDefForFunc(regi, defRegIdx,*picode); + ProcessUseDefForFunc(regi, defRegIdx,ic); RemoveUnusedDefs(regi, defRegIdx, picode.base()); defRegIdx++; @@ -541,8 +543,7 @@ void Function::genDU1 () } -/* Substitutes the rhs (or lhs if rhs not possible) of ticode for the rhs - * of picode. */ +/* Substitutes the rhs (or lhs if rhs not possible) of ticode for the rhs of picode. */ void LOCAL_ID::forwardSubs (COND_EXPR *lhs, COND_EXPR *rhs, iICODE picode, iICODE ticode, int &numHlIcodes) const { bool res; @@ -570,8 +571,7 @@ void LOCAL_ID::forwardSubs (COND_EXPR *lhs, COND_EXPR *rhs, iICODE picode, iICOD } -/* Substitutes the rhs (or lhs if rhs not possible) of ticode for the - * expression exp given */ +/* Substitutes the rhs (or lhs if rhs not possible) of ticode for the expression exp given */ static void forwardSubsLong (int longIdx, COND_EXPR *_exp, iICODE picode, iICODE ticode, int *numHlIcodes) { bool res; diff --git a/src/dcc.cpp b/src/dcc.cpp index a39c124..abf014e 100644 --- a/src/dcc.cpp +++ b/src/dcc.cpp @@ -44,18 +44,21 @@ int main(int argc, char *argv[]) DccFrontend fe(option.filename); if(false==fe.FrontEnd ()) return -1; - + if(option.asm1) + return 0; /* In the middle is a so called Universal Decompiling Machine. * It processes the procedure list and I-code and attaches where it can * to each procedure an optimised cfg and ud lists */ udm(); + if(option.asm2) + return 0; /* Back end converts each procedure into C using I-code, interval * analysis, data flow etc. and outputs it to output file ready for * re-compilation. */ - BackEnd(option.filename, Project::get()->callGraph); + BackEnd(asm1_name ? asm1_name:option.filename, Project::get()->callGraph); Project::get()->callGraph->write(); diff --git a/src/idioms/arith_idioms.cpp b/src/idioms/arith_idioms.cpp index 5b90ae6..053e574 100644 --- a/src/idioms/arith_idioms.cpp +++ b/src/idioms/arith_idioms.cpp @@ -95,65 +95,68 @@ bool Idiom18::match(iICODE picode) for(int i=0; i<4; ++i) m_icodes[i] =picode++; + m_idiom_type=-1; m_is_dec = m_icodes[1]->ll()->match(iDEC); - int type = -1; /* type of variable: 1 = reg-var, 2 = local */ + uint8_t regi; /* register of the MOV */ /* Get variable */ if (m_icodes[1]->ll()->dst.regi == 0) /* global variable */ { /* not supported yet */ - type = 0; + m_idiom_type = 0; } else if ( m_icodes[1]->ll()->dst.isReg() ) /* register */ { if ((m_icodes[1]->ll()->dst.regi == rSI) && (m_func->flg & SI_REGVAR)) - type = 1; + m_idiom_type = 1; else if ((m_icodes[1]->ll()->dst.regi == rDI) && (m_func->flg & DI_REGVAR)) - type = 1; + m_idiom_type = 1; } else if (m_icodes[1]->ll()->dst.off) /* local variable */ - type = 2; + m_idiom_type = 2; else /* indexed */ { - type=3; + m_idiom_type=3; /* not supported yet */ - printf("Unsupported idiom18 type: indexed"); + ICODE &ic(*picode); + const Function *my_proc(ic.getParent()->getParent()); + printf("Unsupported idiom18 type at %x in %s:%x : indexed\n",ic.loc_ip,my_proc->name.c_str(),my_proc->procEntry); } - switch(type) + switch(m_idiom_type) { - case 0: // global - printf("Unsupported idiom18 type: global variable"); - break; - case 1: /* register variable */ - /* Check previous instruction for a MOV */ - if (m_icodes[0]->ll()->match(iMOV) && (m_icodes[0]->ll()->src().regi == m_icodes[1]->ll()->dst.regi)) - { - regi = m_icodes[0]->ll()->dst.regi; - if ( m_icodes[0]->ll()->dst.isReg() ) + case 0: // global + printf("Unsupported idiom18 type at %x : global variable\n",picode->loc_ip); + break; + case 1: /* register variable */ + /* Check previous instruction for a MOV */ + if (m_icodes[0]->ll()->match(iMOV) && (m_icodes[0]->ll()->src().regi == m_icodes[1]->ll()->dst.regi)) { - if ( m_icodes[2]->ll()->match(iCMP) && (m_icodes[2]->ll()->dst.regi == regi) && - m_icodes[3]->ll()->conditionalJump() ) - return true; + regi = m_icodes[0]->ll()->dst.regi; + if ( m_icodes[0]->ll()->dst.isReg() ) + { + if ( m_icodes[2]->ll()->match(iCMP) && (m_icodes[2]->ll()->dst.regi == regi) && + m_icodes[3]->ll()->conditionalJump() ) + return true; + } } - } - break; - case 2: /* local */ - if (m_icodes[0]->ll()->match(iMOV) && (m_icodes[0]->ll()->src().off == m_icodes[1]->ll()->dst.off)) - { - regi = m_icodes[0]->ll()->dst.regi; - if ( m_icodes[0]->ll()->dst.isReg() ) + break; + case 2: /* local */ + if (m_icodes[0]->ll()->match(iMOV) && (m_icodes[0]->ll()->src().off == m_icodes[1]->ll()->dst.off)) { - if ( m_icodes[2]->ll()->match(iCMP) && (m_icodes[2]->ll()->dst.regi == regi) && - m_icodes[3]->ll()->conditionalJump() ) - return true; + regi = m_icodes[0]->ll()->dst.regi; + if ( m_icodes[0]->ll()->dst.isReg() ) + { + if ( m_icodes[2]->ll()->match(iCMP) && (m_icodes[2]->ll()->dst.regi == regi) && + m_icodes[3]->ll()->conditionalJump() ) + return true; + } } - } - break; - case 3: // indexed - printf("Unsupported idiom18 type: indexed"); - break; + break; + case 3: // indexed + printf("Unsupported idiom18 type: indexed"); + break; } return false; } @@ -188,6 +191,7 @@ bool Idiom19::match(iICODE picode) { if(std::distance(picode,m_end)<2) return false; + ICODE &ic(*picode); for(int i=0; i<2; ++i) m_icodes[i] =picode++; @@ -215,7 +219,8 @@ bool Idiom19::match(iICODE picode) int Idiom19::action() { COND_EXPR *lhs,*rhs,*expr; - lhs = COND_EXPR::id (*m_icodes[1]->ll(), DST, m_func, m_icodes[0], *m_icodes[1], eUSE); + ICODE &ic1(*m_icodes[1]); + lhs = COND_EXPR::id (*m_icodes[0]->ll(), DST, m_func, m_icodes[0], *m_icodes[1], eUSE); lhs = COND_EXPR::unary (m_is_dec ? PRE_DEC : PRE_INC, lhs); rhs = COND_EXPR::idKte (0, 2); expr = COND_EXPR::boolOp (lhs, rhs, condOpJCond[m_icodes[1]->ll()->getOpcode() - iJB]); @@ -248,7 +253,7 @@ bool Idiom20::match(iICODE picode) for(int i=0; i<4; ++i) m_icodes[i] =picode++; - m_is_dec = m_icodes[0]->ll()->match(iDEC); + m_is_dec = m_icodes[0]->ll()->match(iDEC) ? PRE_DEC : PRE_INC; LLOperand &ll_dest(m_icodes[0]->ll()->dst); /* Get variable */ @@ -306,7 +311,7 @@ int Idiom20::action() { COND_EXPR *lhs,*rhs,*expr; lhs = COND_EXPR::id (*m_icodes[1]->ll(), SRC, m_func, m_icodes[0], *m_icodes[0], eUSE); - lhs = COND_EXPR::unary (m_is_dec ? PRE_DEC : PRE_INC, lhs); + lhs = COND_EXPR::unary (m_is_dec, lhs); rhs = COND_EXPR::id (*m_icodes[2]->ll(), SRC, m_func, m_icodes[0], *m_icodes[3], eUSE); expr = COND_EXPR::boolOp (lhs, rhs, condOpJCond[m_icodes[3]->ll()->getOpcode() - iJB]); m_icodes[3]->setJCond(expr); diff --git a/src/locident.cpp b/src/locident.cpp index ad815dd..54e24d4 100644 --- a/src/locident.cpp +++ b/src/locident.cpp @@ -161,7 +161,6 @@ int LOCAL_ID::newLongReg(hlType t, eReg regH, eReg regL, iICODE ix_) { /* Insert icode index in list */ entry.idx.push_back(ix_); - //entry.idx.insert(ix_); return (idx); } } @@ -169,7 +168,7 @@ int LOCAL_ID::newLongReg(hlType t, eReg regH, eReg regL, iICODE ix_) /* Not in the table, create new identifier */ newIdent (t, REG_FRAME); - id_arr[id_arr.size()-1].idx.push_back(ix_);//insert(ix_); + id_arr[id_arr.size()-1].idx.push_back(ix_); idx = id_arr.size() - 1; id_arr[idx].id.longId.h = regH; id_arr[idx].id.longId.l = regL; diff --git a/src/machine_x86.cpp b/src/machine_x86.cpp index 217838d..b0e87cf 100644 --- a/src/machine_x86.cpp +++ b/src/machine_x86.cpp @@ -17,7 +17,7 @@ static const std::string regNames[] = { Machine_X86::Machine_X86() { static_assert((sizeof(regNames)/sizeof(std::string))==LAST_REG, - "Reg count not equal number of strings"); + "Reg count not equal number of strings"); } const std::string &Machine_X86::regName(eReg r) @@ -109,3 +109,26 @@ bool Machine_X86::isSubRegisterOf(eReg reg,eReg parent) return false; // only AX -> BX are coverede by subregisters return ((reg==subRegH(parent)) || (reg == subRegL(parent))); } +bool Machine_X86::hasSubregisters(eReg reg) +{ + return ((reg >= rAX) && (reg <= rBX)); +} + +bool Machine_X86::isPartOfComposite(eReg reg) +{ + return ((reg >= rAL) && (reg <= rBH)); +} + +eReg Machine_X86::compositeParent(eReg reg) +{ + switch(reg) + { + case rAL: case rAH: return rAX; + case rCL: case rCH: return rCX; + case rDL: case rDH: return rDX; + case rBL: case rBH: return rBX; + default: + return rUNDEF; + } + return rUNDEF; +} diff --git a/src/scanner.cpp b/src/scanner.cpp index 378e8e3..1128a37 100644 --- a/src/scanner.cpp +++ b/src/scanner.cpp @@ -350,6 +350,8 @@ static void convertUsedFlags(x86_insn_t &from,ICODE &to) ****************************************************************************/ static void fixFloatEmulation(x86_insn_t &insn) { + if(insn.operand_count==0) + return; if(insn.group!=x86_insn_t::insn_interrupt) return; PROG &prog(Project::get()->prog); diff --git a/src/udm.cpp b/src/udm.cpp index 03c461d..c8f2e81 100644 --- a/src/udm.cpp +++ b/src/udm.cpp @@ -31,6 +31,7 @@ void Function::buildCFG(Disassembler &ds) if (option.asm2) { ds.disassem(this); // Print 2nd pass assembler listing + return; } /* Idiom analysis and propagation of long type */ @@ -78,6 +79,8 @@ void udm(void) { iter->buildCFG(ds); } + if (option.asm2) + return; /* Data flow analysis - eliminate condition codes, extraneous registers