commit 4c249fe5c4a5e93d3fcdd0f8309e846cd11dc47b Author: Artur K Date: Thu Nov 24 20:19:41 2011 +0100 init diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100755 index 0000000..313bfd6 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,63 @@ +#CC = gcc -g -O -D__UNIX__ +PROJECT(dcc_original) +cmake_minimum_required(VERSION 2.6) +SET(CMAKE_BUILD_TYPE Debug) +ADD_DEFINITIONS(-D_CRT_SECURE_NO_WARNINGS -D__UNIX__) +INCLUDE_DIRECTORIES(include ${Boost_INCLUDE_DIRS}) +if(CMAKE_BUILD_TOOL MATCHES "(msdev|devenv|nmake)") + ADD_DEFINITIONS(-D_CRT_SECURE_NO_WARNINGS -D__UNIX__ -D_CRT_NONSTDC_NO_DEPRECATE) + add_definitions(/W4) +else() + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall --std=c++0x") +endif() + +set(dcc_SOURCES +src/dcc.cpp +src/ast.cpp +src/backend.cpp +src/bundle.cpp +src/chklib.cpp +src/comwrite.cpp +src/control.cpp +src/dataflow.cpp +src/disassem.cpp +src/error.cpp +src/fixwild.cpp +src/frontend.cpp +src/graph.cpp +src/hlicode.cpp +src/icode.cpp +src/idioms.cpp +src/locident.cpp +src/parser.cpp +src/perfhlib.cpp +src/procs.cpp +src/proplong.cpp +src/reducible.cpp +src/scanner.cpp +src/symtab.cpp +src/udm.cpp +src/BasicBlock.cpp +) +set(dcc_HEADERS + include/ast.h + include/bundle.h + include/dcc.h + include/disassem.h + include/dosdcc.h + include/error.h + include/graph.h + include/hlicode.h + include/icode.h + include/locident.h + include/perfhlib.h + include/scanner.h + include/state.h + include/symtab.h + include/types.h + include/Procedure.h + include/StackFrame.h + include/BasicBlock.h +) +ADD_EXECUTABLE(dcc_original ${dcc_SOURCES} ${dcc_HEADERS}) + diff --git a/include/BasicBlock.h b/include/BasicBlock.h new file mode 100644 index 0000000..6cf10d1 --- /dev/null +++ b/include/BasicBlock.h @@ -0,0 +1,88 @@ +#pragma once +#include +#include +#include +#include "types.h" +/* Basic block (BB) node definition */ +struct Function; +class CIcodeRec; +struct BB; +struct interval; +typedef union +{ + dword ip; /* Out edge icode address */ + BB * BBptr; /* Out edge pointer to next BB */ + interval *intPtr; /* Out edge ptr to next interval*/ +} TYPEADR_TYPE; + +struct BB +{ +protected: + BB(const BB&); + BB() : nodeType(0),traversed(0),start(0),length(0), + numHlIcodes(0),flg(0), + numInEdges(0),inEdges(0), + numOutEdges(0),edges(0),beenOnH(0),inEdgeCount(0),reachingInt(0), + inInterval(0),correspInt(0),liveUse(0),def(0),liveIn(0),liveOut(0), + dfsFirstNum(0),dfsLastNum(0),immedDom(0),ifFollow(0),loopType(0),latchNode(0), + numBackEdges(0),loopHead(0),loopFollow(0),caseHead(0),caseTail(0),index(0) + { + + } + +public: + byte nodeType; /* Type of node */ + Int traversed; /* Boolean: traversed yet? */ + Int start; /* First instruction offset */ + Int length; /* No. of instructions this BB */ + Int numHlIcodes; /* No. of high-level icodes */ + flags32 flg; /* BB flags */ + + /* In edges and out edges */ + Int numInEdges; /* Number of in edges */ + std::vector inEdges; // does not own held pointers + + Int numOutEdges; /* Number of out edges */ + std::vector edges;/* Array of ptrs. to out edges */ + + /* For interval construction */ + Int beenOnH; /* #times been on header list H */ + Int inEdgeCount; /* #inEdges (to find intervals) */ + BB * reachingInt; /* Reaching interval header */ + interval *inInterval; /* Node's interval */ + + /* For derived sequence construction */ + interval *correspInt; /* Corresponding interval in + * derived graph Gi-1 */ + + /* For live register analysis + * LiveIn(b) = LiveUse(b) U (LiveOut(b) - Def(b)) */ + dword liveUse; /* LiveUse(b) */ + dword def; /* Def(b) */ + dword liveIn; /* LiveIn(b) */ + dword liveOut; /* LiveOut(b) */ + + /* For structuring analysis */ + Int dfsFirstNum; /* DFS #: first visit of node */ + Int dfsLastNum; /* DFS #: last visit of node */ + Int immedDom; /* Immediate dominator (dfsLast + * index) */ + Int ifFollow; /* node that ends the if */ + Int loopType; /* Type of loop (if any) */ + Int latchNode; /* latching node of the loop */ + Int numBackEdges; /* # of back edges */ + Int loopHead; /* most nested loop head to which + * thcis node belongs (dfsLast) */ + Int loopFollow; /* node that follows the loop */ + Int caseHead; /* most nested case to which this + node belongs (dfsLast) */ + Int caseTail; /* tail node for the case */ + + Int index; /* Index, used in several ways */ + static BB *Create(void *ctx=0,const std::string &s="",Function *parent=0,BB *insertBefore=0); + static BB *Create(Int start, Int ip, byte nodeType, Int numOutEdges, Function * parent); + void writeCode(Int indLevel, Function *pProc, Int *numLoc, Int latchNode, Int ifFollow); + void mergeFallThrough(CIcodeRec &Icode); + void dfsNumbering(std::vector &dfsLast, Int *first, Int *last); + void displayDfs(); +}; diff --git a/include/Enums.h b/include/Enums.h new file mode 100644 index 0000000..e3bf1dd --- /dev/null +++ b/include/Enums.h @@ -0,0 +1,85 @@ +#pragma once +/* Register types */ +enum regType +{ + BYTE_REG, + WORD_REG +}; +enum condId +{ + GLOB_VAR, /* global variable */ + REGISTER, /* register */ + LOCAL_VAR, /* negative disp */ + PARAM, /* positive disp */ + GLOB_VAR_IDX, /* indexed global variable *//*** should merge w/glob-var*/ + CONSTANT, /* constant */ + STRING, /* string */ + LONG_VAR, /* long variable */ + FUNCTION, /* function */ + OTHER /* other **** tmp solution */ +}; + +enum condOp +{ + /* For conditional expressions */ + LESS_EQUAL = 0, /* <= */ + LESS, /* < */ + EQUAL, /* == */ + NOT_EQUAL, /* != */ + GREATER, /* > */ + GREATER_EQUAL, /* >= */ + /* For general expressions */ + AND, /* & */ + OR, /* | */ + XOR, /* ^ */ + NOT, /* ~ */ /* 1's complement */ + ADD, /* + */ + SUB, /* - */ + MUL, /* * */ + DIV, /* / */ + SHR, /* >> */ + SHL, /* << */ + MOD, /* % */ + DBL_AND, /* && */ + DBL_OR, /* || */ + DUMMY /* */ +}; +/* LOW_LEVEL operand location: source or destination */ +enum opLoc +{ + SRC, /* Source operand */ + DST, /* Destination operand */ + LHS_OP /* Left-hand side operand (for HIGH_LEVEL) */ +}; + +/* Conditional Expression enumeration nodes and operators */ +enum condNodeType +{ + UNKNOWN_OP=0, + BOOLEAN_OP, /* condOps */ + NEGATION, /* not (2's complement) */ + ADDRESSOF, /* addressOf (&) */ + DEREFERENCE, /* contents of (*) */ + IDENTIFIER, /* {register | local | param | constant | global} */ + /* The following are only available to C programs */ + POST_INC, /* ++ (post increment) */ + POST_DEC, /* -- (post decrement) */ + PRE_INC, /* ++ (pre increment) */ + PRE_DEC /* -- (pre decrement) */ +} ; + +/* Enumeration to determine whether pIcode points to the high or low part + * of a long number */ +enum hlFirst +{ + HIGH_FIRST, /* High value is first */ + LOW_FIRST /* Low value is first */ +}; +/* Operand is defined, used or both flag */ +enum operDu +{ + eDEF=0x10, /* Operand is defined */ + eUSE=0x100, /* Operand is used */ + USE_DEF, /* Operand is used and defined */ + NONE /* No operation is required on this operand */ +}; diff --git a/include/IdentType.h b/include/IdentType.h new file mode 100644 index 0000000..d13aca6 --- /dev/null +++ b/include/IdentType.h @@ -0,0 +1,31 @@ +#pragma once +#include "ast.h" +#include "types.h" +struct IDENTTYPE +{ + condId idType; + regType regiType; /* for REGISTER only */ + union _idNode { + Int regiIdx; /* index into localId, REGISTER */ + Int globIdx; /* index into symtab for GLOB_VAR */ + Int localIdx; /* idx into localId, LOCAL_VAR */ + Int paramIdx; /* idx into args symtab, PARAMS */ + Int idxGlbIdx; /* idx into localId, GLOB_VAR_IDX */ + struct _kte + { /* for CONSTANT only */ + dword kte; /* value of the constant */ + byte size; /* #bytes size constant */ + } kte; + dword strIdx; /* idx into image, for STRING */ + Int longIdx; /* idx into LOCAL_ID table, LONG_VAR*/ + struct _call { /* for FUNCTION only */ + Function *proc; + STKFRAME *args; + } call; + struct { /* for OTHER; tmp struct */ + byte seg; /* segment */ + byte regi; /* index mode */ + int16 off; /* offset */ + } other; + } idNode; +}; diff --git a/include/Procedure.h b/include/Procedure.h new file mode 100644 index 0000000..b0b39ef --- /dev/null +++ b/include/Procedure.h @@ -0,0 +1,77 @@ +#pragma once +#include "types.h" +#include "ast.h" +#include "icode.h" +#include "locident.h" +#include "error.h" +#include "graph.h" +#include "bundle.h" +#include "StackFrame.h" +/* PROCEDURE NODE */ +struct CALL_GRAPH; +struct Function +{ + dword procEntry; /* label number */ + char name[SYMLEN]; /* Meaningful name for this proc */ + STATE state; /* Entry state */ + Int depth; /* Depth at which we found it - for printing */ + flags32 flg; /* Combination of Icode & Proc flags */ + int16 cbParam; /* Probable no. of bytes of parameters */ + STKFRAME args; /* Array of arguments */ + LOCAL_ID localId; /* Local identifiers */ + ID retVal; /* Return value - identifier */ + + /* Icodes and control flow graph */ + CIcodeRec Icode; /* Object with ICODE records */ + std::vector cfg; /* Ptr. to BB list/CFG */ + std::vector dfsLast; + std::vector heldBBs; + //BB * *dfsLast; /* Array of pointers to BBs in dfsLast +// * (reverse postorder) order */ + Int numBBs; /* Number of BBs in the graph cfg */ + boolT hasCase; /* Procedure has a case node */ + + /* For interprocedural live analysis */ + dword liveIn; /* Registers used before defined */ + dword liveOut; /* Registers that may be used in successors */ + boolT liveAnal; /* Procedure has been analysed already */ + + /* Double-linked list */ +// Function *next; +// Function *prev; +public: + Function() : procEntry(0),depth(0),flg(0),cbParam(0),cfg(0),dfsLast(0),numBBs(0), + hasCase(false),liveIn(0),liveOut(0),liveAnal(0)//,next(0),prev(0) + { + memset(name,0,SYMLEN); + } + void compoundCond(); + void writeProcComments(); + void lowLevelAnalysis(); + void bindIcodeOff(); + void dataFlow(dword liveOut); + void compressCFG(); + void highLevelGen(); + void structure(derSeq *derivedG); + derSeq *checkReducibility(); + void createCFG(); + void markImpure(); + void findImmedDom(); + void FollowCtrl(CALL_GRAPH *pcallGraph, STATE *pstate); + void process_operands(ICODE *pIcode, STATE *pstate); + boolT process_JMP(ICODE *pIcode, STATE *pstate, CALL_GRAPH *pcallGraph); + boolT process_CALL(ICODE *pIcode, CALL_GRAPH *pcallGraph, STATE *pstate); + void displayCFG(); + void freeCFG(); + void codeGen(std::ostream &fs); + void displayStats(); + void mergeFallThrough(BB *pBB); +protected: + void findExps(); + void genDU1(); + void elimCondCodes(); + void liveRegAnalysis(dword in_liveOut); + void findIdioms(); + void propLong(); + void genLiveKtes(); +}; diff --git a/include/StackFrame.h b/include/StackFrame.h new file mode 100644 index 0000000..316470a --- /dev/null +++ b/include/StackFrame.h @@ -0,0 +1,45 @@ +#pragma once +#include "types.h" +#include "ast.h" +#include "icode.h" +#include "locident.h" +#include "error.h" +#include "graph.h" +#include "bundle.h" + +/* STACK FRAME */ +struct STKSYM +{ + COND_EXPR *actual; /* Expression tree of actual parameter */ + COND_EXPR *regs; /* For register arguments only */ + int16 off; /* Immediate off from BP (+:args, -:params) */ + byte regOff; /* Offset is a register (e.g. SI, DI) */ + Int size; /* Size */ + hlType type; /* Probable type */ + eDuVal duVal; /* DEF, USE, VAL */ + boolT hasMacro; /* This type needs a macro */ + char macro[10]; /* Macro name */ + char name[10]; /* Name for this symbol/argument */ + boolT invalid; /* Boolean: invalid entry in formal arg list*/ + STKSYM() + { + memset(this,0,sizeof(STKSYM)); + } +}; + +struct STKFRAME +{ + std::vector sym; + //STKSYM * sym; /* Symbols */ + int16 minOff; /* Initial offset in stack frame*/ + int16 maxOff; /* Maximum offset in stack frame*/ + Int cb; /* Number of bytes in arguments */ + Int numArgs; /* No. of arguments in the table*/ + void adjustForArgType(Int numArg_, hlType actType_); + STKFRAME() : sym(0),minOff(0),maxOff(0),cb(0),numArgs(0) + { + + } +public: + Int getLocVar(Int off); +}; diff --git a/include/ast.h b/include/ast.h new file mode 100644 index 0000000..895466a --- /dev/null +++ b/include/ast.h @@ -0,0 +1,91 @@ +/* + * File: ast.h + * Purpose: definition of the abstract syntax tree ADT. + * Date: September 1993 + * (C) Cristina Cifuentes + */ +#pragma once +static const int operandSize=20; +#include +#include "Enums.h" +/* The following definitions and types define the Conditional Expression + * attributed syntax tree, as defined by the following EBNF: + CondExp ::= CondTerm AND CondTerm | CondTerm + CondTerm ::= (CondFactor op CondFactor) + CondFactor ::= Identifier | ! CondFactor + Identifier ::= globalVar | register | localVar | parameter | constant + op ::= <= | < | = | != | > | >= + */ + +/* High-level BOOLEAN conditions for iJB..iJNS icodes */ +static const condOp condOpJCond[12] = {LESS, LESS_EQUAL, GREATER_EQUAL, GREATER, + EQUAL, NOT_EQUAL, LESS, GREATER_EQUAL, + LESS_EQUAL, GREATER, GREATER_EQUAL, LESS}; + +static const condOp invCondOpJCond[12] = {GREATER_EQUAL, GREATER, LESS, LESS_EQUAL, + NOT_EQUAL, EQUAL, GREATER_EQUAL, LESS, + GREATER, LESS_EQUAL, LESS, GREATER_EQUAL}; + +struct Function; +struct STKFRAME; +struct LOCAL_ID; +struct ICODE; +struct ID; +#include "IdentType.h" +//enum opLoc; +//enum hlFirst; +//enum operDu; +/* Expression data type */ +struct COND_EXPR +{ + condNodeType type; /* Conditional Expression Node Type */ + union _exprNode { /* Different cond expr nodes */ + struct /* for BOOLEAN_OP */ + { + condOp op; + COND_EXPR *lhs; + COND_EXPR *rhs; + } boolExpr; + COND_EXPR *unaryExp; /* for NEGATION,ADDRESSOF,DEREFERENCE*/ + IDENTTYPE ident; /* for IDENTIFIER */ + } expr; +public: + static COND_EXPR *idGlob(int16 segValue, int16 off); + static COND_EXPR *idRegIdx(Int idx, regType reg_type); + static COND_EXPR *idKte(dword kte, byte size); + static COND_EXPR *idLoc(Int off, LOCAL_ID *localId); + static COND_EXPR *idReg(byte regi, flags32 icodeFlg, LOCAL_ID *locsym); + static COND_EXPR *idLongIdx(Int idx); + static COND_EXPR *idOther(byte seg, byte regi, int16 off); + static COND_EXPR *idParam(Int off, const STKFRAME *argSymtab); + static COND_EXPR *unary(condNodeType t, COND_EXPR *sub_expr); + static COND_EXPR *idLong(LOCAL_ID *localId, opLoc sd, ICODE *pIcode, hlFirst f, Int ix, operDu du, Int off); + static COND_EXPR *idFunc(Function *pproc, STKFRAME *args); + static COND_EXPR *idID(const ID *retVal, LOCAL_ID *locsym, Int ix); + static COND_EXPR *id(ICODE *pIcode, opLoc sd, Function *pProc, Int i, ICODE *duIcode, operDu du); + static COND_EXPR *boolOp(COND_EXPR *lhs, COND_EXPR *rhs, condOp op); +public: + COND_EXPR *clone(); + void release(); + void changeBoolOp(condOp newOp); + COND_EXPR(COND_EXPR &other) + { + type=other.type; + expr=other.expr; + } + COND_EXPR() + { + type=UNKNOWN_OP; + memset(&expr,0,sizeof(_exprNode)); + } +}; + +/* Sequence of conditional expression data type */ +/*** NOTE: not used at present ****/ +//struct SEQ_COND_EXPR +//{ +// COND_EXPR *expr; +// struct _condExpSeq *neccxt; +//}; + + diff --git a/include/bundle.h b/include/bundle.h new file mode 100644 index 0000000..e416335 --- /dev/null +++ b/include/bundle.h @@ -0,0 +1,31 @@ +/***************************************************************************** + * Project: dcc + * File: bundle.h + * Purpose: Module to handle the bundle type (array of pointers to strings). + * (C) Cristina Cifuentes + ****************************************************************************/ +#pragma once +#include +#include +#include +typedef std::vector strTable; + +struct bundle +{ +public: + void appendCode(const char *format, ...); + void appendDecl(const char *format, ...); + strTable decl; /* Declarations */ + strTable code; /* C code */ +}; + + +#define lineSize 360 /* 3 lines in the mean time */ + +void newBundle (bundle *procCode); +//void appendStrTab (strTable *strTab, const char *format, ...); +Int nextBundleIdx (strTable *strTab); +void addLabelBundle (strTable &strTab, Int idx, Int label); +void writeBundle (std::ostream &ios, bundle procCode); +void freeBundle (bundle *procCode); + diff --git a/include/dcc.h b/include/dcc.h new file mode 100644 index 0000000..b533076 --- /dev/null +++ b/include/dcc.h @@ -0,0 +1,219 @@ +/**************************************************************************** + * dcc project general header + * (C) Cristina Cifuentes, Mike van Emmerik + ****************************************************************************/ +#pragma once + +#include "types.h" +#include "ast.h" +#include "icode.h" +#include "locident.h" +#include "error.h" +#include "graph.h" +#include "bundle.h" +#include "Procedure.h" +#include "BasicBlock.h" +typedef std::list lFunction; +typedef std::list::iterator ilFunction; + +/* SYMBOL TABLE */ +struct SYM { + char name[10]; /* New name for this variable */ + dword label; /* physical address (20 bit) */ + Int size; /* maximum size */ + flags32 flg; /* SEG_IMMED, IMPURE, WORD_OFF */ + hlType type; /* probable type */ + eDuVal duVal; /* DEF, USE, VAL */ +}; + +struct SYMTAB +{ + Int csym; /* No. of symbols in table */ + Int alloc; /* Allocation */ + SYM * sym; /* Symbols */ +}; + +/* CALL GRAPH NODE */ +struct CALL_GRAPH +{ + ilFunction proc; /* Pointer to procedure in pProcList */ + std::vector outEdges; /* array of out edges */ +public: + void write(); + CALL_GRAPH() : outEdges(0) + { + } +public: + void writeNodeCallGraph(Int indIdx); + boolT insertCallGraph(ilFunction caller, ilFunction callee); + boolT insertCallGraph(Function *caller, ilFunction callee); + void insertArc(ilFunction newProc); +}; +#define NUM_PROCS_DELTA 5 /* delta # procs a proc invokes */ +extern std::list pProcList; +extern CALL_GRAPH * callGraph; /* Pointer to the head of the call graph */ +extern bundle cCode; /* Output C procedure's declaration and code */ + +/* Procedure FLAGS */ +enum PROC_FLAGS +{ + PROC_BADINST=0x000100,/* Proc contains invalid or 386 instruction */ + PROC_IJMP =0x000200,/* Proc incomplete due to indirect jmp */ + PROC_ICALL =0x000400, /* Proc incomplete due to indirect call */ + PROC_HLL=0x001000, /* Proc is likely to be from a HLL */ + CALL_PASCAL=0x002000, /* Proc uses Pascal calling convention */ + CALL_C=0x004000, /* Proc uses C calling convention */ + CALL_UNKNOWN=0x008000, /* Proc uses unknown calling convention */ + PROC_NEAR=0x010000, /* Proc exits with near return */ + PROC_FAR=0x020000, /* Proc exits with far return */ + GRAPH_IRRED=0x100000, /* Proc generates an irreducible graph */ + SI_REGVAR=0x200000, /* SI is used as a stack variable */ + DI_REGVAR=0x400000, /* DI is used as a stack variable */ + PROC_IS_FUNC=0x800000, /* Proc is a function */ + REG_ARGS=0x1000000, /* Proc has registers as arguments */ + PROC_VARARG=0x2000000, /* Proc has variable arguments */ + PROC_OUTPUT=0x4000000, /* C for this proc has been output */ + PROC_RUNTIME=0x8000000, /* Proc is part of the runtime support */ + PROC_ISLIB=0x10000000, /* Proc is a library function */ + PROC_ASM=0x20000000, /* Proc is an intrinsic assembler routine */ + PROC_IS_HLL=0x40000000 /* Proc has HLL prolog code */ +}; +#define CALL_MASK 0xFFFF9FFF /* Masks off CALL_C and CALL_PASCAL */ + + + + +/**** Global variables ****/ + +extern char *asm1_name, *asm2_name; /* Assembler output filenames */ + +typedef struct { /* Command line option flags */ + unsigned verbose : 1; + unsigned VeryVerbose : 1; + unsigned asm1 : 1; /* Early disassembly listing */ + unsigned asm2 : 1; /* Disassembly listing after restruct */ + unsigned Map : 1; + unsigned Stats : 1; + unsigned Interact : 1; /* Interactive mode */ + unsigned Calls : 1; /* Follow register indirect calls */ + char filename[80]; /* The input filename */ +} OPTION; + +extern OPTION option; /* Command line options */ +extern SYMTAB symtab; /* Global symbol table */ + +struct PROG /* Loaded program image parameters */ +{ + int16 initCS; + int16 initIP; /* These are initial load values */ + int16 initSS; /* Probably not of great interest */ + int16 initSP; + boolT fCOM; /* Flag set if COM program (else EXE)*/ + Int cReloc; /* No. of relocation table entries */ + dword *relocTable; /* Ptr. to relocation table */ + byte *map; /* Memory bitmap ptr */ + Int cProcs; /* Number of procedures so far */ + Int offMain; /* The offset of the main() proc */ + word segMain; /* The segment of the main() proc */ + boolT bSigs; /* True if signatures loaded */ + Int cbImage; /* Length of image in bytes */ + byte *Image; /* Allocated by loader to hold entire + * program image */ +}; + +extern PROG prog; /* Loaded program image parameters */ +extern char condExp[200]; /* Conditional expression buffer */ +extern char callBuf[100]; /* Function call buffer */ +extern dword duReg[30]; /* def/use bits for registers */ +extern dword maskDuReg[30]; /* masks off du bits for regs */ + +/* Registers used by icode instructions */ +static const char *allRegs[21] = {"ax", "cx", "dx", "bx", "sp", "bp", + "si", "di", "es", "cs", "ss", "ds", + "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh", + "tmp"}; + +/* Memory map states */ +#define BM_UNKNOWN 0 /* Unscanned memory */ +#define BM_DATA 1 /* Data */ +#define BM_CODE 2 /* Code */ +#define BM_IMPURE 3 /* Used as Data and Code*/ + +/* Intermediate instructions statistics */ +struct STATS +{ + Int numBBbef; /* number of basic blocks initially */ + Int numBBaft; /* number of basic blocks at the end */ + Int nOrder; /* n-th order */ + Int numLLIcode; /* number of low-level Icode instructions */ + Int numHLIcode; /* number of high-level Icode instructions */ + Int totalLL; /* total number of low-level Icode insts */ + Int totalHL; /* total number of high-level Icod insts */ +}; + +extern STATS stats; /* Icode statistics */ + + +/**** Global function prototypes ****/ + +void FrontEnd(char *filename, CALL_GRAPH * *); /* frontend.c */ +void *allocMem(Int cb); /* frontend.c */ +void *reallocVar(void *p, Int newsize); /* frontend.c */ +void udm(void); /* udm.c */ +void freeCFG(BB * cfg); /* graph.c */ +BB * newBB(BB *, Int, Int, byte, Int, Function *); /* graph.c */ +void BackEnd(char *filename, CALL_GRAPH *); /* backend.c */ +char *cChar(byte c); /* backend.c */ +Int scan(dword ip, ICODE * p); /* scanner.c */ +void parse (CALL_GRAPH * *); /* parser.c */ +boolT labelSrch(ICODE * pIc, Int n, dword tg, Int *pIdx); /* parser.c */ +Int strSize (byte *, char); /* parser.c */ +void disassem(Int pass, Function * pProc); /* disassem.c */ +void interactDis(Function * initProc, Int initIC); /* disassem.c */ +boolT JmpInst(llIcode opcode); /* idioms.c */ +queue::iterator appendQueue(queue &Q, BB *node); /* reducible.c */ + +void SetupLibCheck(void); /* chklib.c */ +void CleanupLibCheck(void); /* chklib.c */ +boolT LibCheck(Function &p); /* chklib.c */ + +/* Exported functions from procs.c */ +boolT insertCallGraph (CALL_GRAPH *, ilFunction, ilFunction); +void newRegArg (Function *, ICODE *, ICODE *); +boolT newStkArg (ICODE *, COND_EXPR *, llIcode, Function *); +void allocStkArgs (ICODE *, Int); +void placeStkArg (ICODE *, COND_EXPR *, Int); +void adjustActArgType (COND_EXPR *, hlType, Function *); + +/* Exported functions from ast.c */ +void removeRegFromLong (byte, LOCAL_ID *, COND_EXPR *); +std::string walkCondExpr (const COND_EXPR *exp, Function * pProc, Int *); +Int hlTypeSize (const COND_EXPR *, Function *); +hlType expType (const COND_EXPR *, Function *); +void copyDU (ICODE *, const ICODE *, operDu, operDu); +boolT insertSubTreeReg (COND_EXPR *, COND_EXPR **, byte, LOCAL_ID *); +boolT insertSubTreeLongReg (COND_EXPR *, COND_EXPR **, Int); +//COND_EXPR *concatExps (SEQ_COND_EXPR *, COND_EXPR *, condNodeType); + +void initExpStk(); +void pushExpStk (COND_EXPR *); +COND_EXPR *popExpStk(); +Int numElemExpStk(); +boolT emptyExpStk(); + +/* Exported functions from hlicode.c */ +boolT removeDefRegi (byte, ICODE *, Int, LOCAL_ID *); +std::string writeCall (Function *, STKFRAME *, Function *, Int *); +char *write1HlIcode (HLTYPE, Function *, Int *); +char *writeJcond (HLTYPE, Function *, Int *); +char *writeJcondInv (HLTYPE, Function *, Int *); +Int power2 (Int); +void inverseCondOp (COND_EXPR **); + +/* Exported funcions from locident.c */ +boolT checkLongEq (LONG_STKID_TYPE, ICODE *, Int, Int, Function *, COND_EXPR **,COND_EXPR **, Int); +boolT checkLongRegEq (LONGID_TYPE, ICODE *, Int, Int, Function *, COND_EXPR **,COND_EXPR **, Int); +byte otherLongRegi (byte, Int, LOCAL_ID *); +void insertIdx (IDX_ARRAY *, Int); + + diff --git a/include/disassem.h b/include/disassem.h new file mode 100644 index 0000000..3789bf6 --- /dev/null +++ b/include/disassem.h @@ -0,0 +1,43 @@ +/**************************************************************************** + * dcc project disassembler header + * (C) Mike van Emmerik + ****************************************************************************/ + +/* Definitions for extended keys (first key is zero) */ + +#define EXT 0x100 /* "Extended" flag */ + +#ifdef __MSDOS__ +#define KEY_DOWN EXT+'P' +#define KEY_LEFT EXT+'K' +#define KEY_UP EXT+'H' +#define KEY_RIGHT EXT+'M' +#define KEY_NPAGE EXT+'Q' +#define KEY_PPAGE EXT+'I' +#endif + +#ifdef _CONSOLE +#define KEY_DOWN 0x50 /* Same as keypad scancodes */ +#define KEY_LEFT 0x4B +#define KEY_UP 0x48 +#define KEY_RIGHT 0x4D +#define KEY_NPAGE 0x51 +#define KEY_PPAGE 0x49 +#endif + +#ifdef __UNIX__ +#define KEY_DOWN EXT+'B' +#define KEY_LEFT EXT+'D' +#define KEY_UP EXT+'A' +#define KEY_RIGHT EXT+'C' +#define KEY_NPAGE EXT+'J' /* Enter correct value! */ +#define KEY_PPAGE EXT+'K' /* Another guess! */ +#endif + +/* "Attributes" */ +#define A_NORMAL 'N' /* For Dos/Unix */ +#define A_REVERSE 'I' +#define A_BOLD 'B' + +#define LINES 24 +#define COLS 80 diff --git a/include/dosdcc.h b/include/dosdcc.h new file mode 100644 index 0000000..84ccd96 --- /dev/null +++ b/include/dosdcc.h @@ -0,0 +1,77 @@ +/*************************************************************************** + * File : dosdcc.h + * Purpose : include file for files decompiled by dcc. + * Copyright (c) Cristina Cifuentes - QUT - 1992 + **************************************************************************/ + +/* Type definitions for intel 80x86 architecture */ +typedef unsigned int Word; /* 16 bits */ +typedef unsigned char Byte; /* 8 bits */ +typedef union { + unsigned long dW; + Word wL, wH; /* 2 words */ +} Dword; /* 32 bits */ + +/* Structure to access high and low bits of a Byte or Word variable */ +typedef struct { + /* low byte */ + Word lowBitWord : 1; + Word filler1 : 6; + Word highBitByte : 1; + /* high byte */ + Word lowBitByte : 1; + Word filler2 : 6; + Word highBitWord : 1; +} wordBits; + +/* Low and high bits of a Byte or Word variable */ +#define lowBit(a) ((wordBits)(a).lowBitWord) +#define highBitByte(a) ((wordBits)(a).highBitByte) +#define lowBitByte(a) ((wordBits)(a).lowBitByte) +#define highBit(a) (sizeof(a) == sizeof(Word) ? \ + ((wordBits)(a).highBitWord):\ + ((wordBits)(a).highBitByte)) + +/* Word register variables */ +#define ax regs.x.ax +#define bx regs.x.bx +#define cx regs.x.cx +#define dx regs.x.dx + +#define cs regs.x.cs +#define es regs.x.es +#define ds regs.x.ds +#define ss regs.x.ss + +#define si regs.x.si +#define di regs.x.di +#define bp regs.x.bp +#define sp regs.x.sp + +/* getting rid of all flags */ +#define carry regs.x.cflags +#define overF regs.x.flags /***** check *****/ + +/* Byte register variables */ +#define ah regs.h.ah +#define al regs.h.al +#define bh regs.h.bh +#define bl regs.h.bl +#define ch regs.h.ch +#define cl regs.h.cl +#define dh regs.h.dh +#define dl regs.h.dl + + +/* High and low words of a Dword */ +#define highWord(w) (*((Word*)&(w) + 1)) +#define lowWord(w) ((Word)(w)) + +#define MAXByte 0xFF +#define MAXWord 0xFFFF +#define MAXSignByte 0x7F +#define MINSignByte 0x81 +#define MAXSignWord 0x7FFF +#define MINSignWord 0x8001 + + diff --git a/include/error.h b/include/error.h new file mode 100644 index 0000000..e0afd90 --- /dev/null +++ b/include/error.h @@ -0,0 +1,33 @@ +/***************************************************************************** + * Error codes + * (C) Cristina Cifuentes + ****************************************************************************/ +#pragma once + +/* These definitions refer to errorMessage in error.c */ + +#define USAGE 0 +#define INVALID_ARG 1 +#define INVALID_OPCODE 2 +#define INVALID_386OP 3 +#define FUNNY_SEGOVR 4 +#define FUNNY_REP 5 +#define CANNOT_OPEN 6 +#define CANNOT_READ 7 +#define MALLOC_FAILED 8 +#define NEWEXE_FORMAT 9 + +#define NO_BB 10 +#define INVALID_SYNTHETIC_BB 11 +#define INVALID_INT_BB 12 +#define IP_OUT_OF_RANGE 13 +#define DEF_NOT_FOUND 14 +#define JX_NOT_DEF 15 +#define NOT_DEF_USE 16 +#define REPEAT_FAIL 17 +#define WHILE_FAIL 18 + + +void fatalError(Int errId, ...); +void reportError(Int errId, ...); + diff --git a/include/graph.h b/include/graph.h new file mode 100644 index 0000000..3892566 --- /dev/null +++ b/include/graph.h @@ -0,0 +1,99 @@ +/***************************************************************************** + * CFG, BB and interval related definitions + * (C) Cristina Cifuentes + ****************************************************************************/ +#pragma once +#include +#include +/* Types of basic block nodes */ +/* Real basic blocks: type defined according to their out-edges */ +enum eBBKind +{ + ONE_BRANCH = 0, /* unconditional branch */ + TWO_BRANCH = 1, /* conditional branch */ + MULTI_BRANCH=2, /* case branch */ + FALL_NODE=3, /* fall through */ + RETURN_NODE=4, /* procedure/program return */ + CALL_NODE=5, /* procedure call */ + LOOP_NODE=6, /* loop instruction */ + REP_NODE=7, /* repeat instruction */ + INTERVAL_NODE=8, /* contains interval list */ + + TERMINATE_NODE=11, /* Exit to DOS */ + NOWHERE_NODE=12 /* No outedges going anywhere */ +}; + + +/* Depth-first traversal constants */ +enum eDFS +{ + DFS_DISP=1, /* Display graph pass */ + DFS_MERGE=2, /* Merge nodes pass */ + DFS_NUM=3, /* DFS numbering pass */ + DFS_CASE=4, /* Case pass */ + DFS_ALPHA=5, /* Alpha code generation*/ + DFS_JMP=9 /* rmJMP pass - must be largest flag */ +}; + +/* Control flow analysis constants */ +enum eNodeHeaderType +{ + NO_TYPE=0, /* node is not a loop header*/ + WHILE_TYPE=1, /* node is a while header */ + REPEAT_TYPE=2, /* node is a repeat header */ + ENDLESS_TYPE=3 /* endless loop header */ +}; + +/* Uninitialized values for certain fields */ +#define NO_NODE MAX /* node has no associated node */ +#define NO_DOM MAX /* node has no dominator */ +#define UN_INIT MAX /* uninitialized variable */ + +#define THEN 0 /* then edge */ +#define ELSE 1 /* else edge */ + +/* Basic Block (BB) flags */ +#define INVALID_BB 0x0001 /* BB is not valid any more */ +#define IS_LATCH_NODE 0x0002 /* BB is the latching node of a loop */ + +struct BB; +/* Interval structure */ +typedef std::list queue; + +struct interval +{ + byte numInt; /* # of the interval */ + byte numOutEdges; /* Number of out edges */ + queue nodes; /* Nodes of the interval*/ + queue::iterator currNode; /* Current node */ + interval *next; /* Next interval */ + BB *firstOfInt(); + interval() + { + numInt=numOutEdges=0; + currNode=nodes.end(); + next=0; + } +}; + + +/* Derived Sequence structure */ +struct derSeq_Entry +{ + BB * Gi; /* Graph pointer */ + interval * Ii; /* Interval list of Gi */ + derSeq_Entry() : Gi(0),Ii(0) + { + + } + ~derSeq_Entry(); +public: + void findIntervals(); +}; +class derSeq : public std::list +{ +public: + void display(); +}; +void freeDerivedSeq(derSeq &derivedG); /* reducible.c */ + diff --git a/include/hlicode.h b/include/hlicode.h new file mode 100644 index 0000000..55b866a --- /dev/null +++ b/include/hlicode.h @@ -0,0 +1,36 @@ +/* + * File: hlIcode.h + * Purpose: module definitions for high-level icodes + * Date: September 1993 + */ + + +/* High level icodes opcodes - def in file icode.h */ +/*typedef enum { + HLI_ASSIGN, + INC, + DEC, + HLI_JCOND, + +} hlIcode; */ + + +typedef struct { + hlIcode opcode; /* hlIcode opcode */ + union { /* different operands */ + struct { + COND_EXPR *lhs; + COND_EXPR *rhs; + } asgn; /* for HLI_ASSIGN hlIcode */ + COND_EXPR *exp; /* for HLI_JCOND, INC, DEC */ + } oper; /* operand */ + boolT valid; /* has a valid hlIcode */ +} HLICODE; + + +//typedef struct { +// Int numIcodes; /* No. of hlIcode reocrds written */ +// Int numAlloc; /* No. of hlIcode records allocated */ +// HLICODE *hlIcode; /* Array of high-level icodes */ +//} HLICODEREC; + diff --git a/include/icode.h b/include/icode.h new file mode 100644 index 0000000..2d73bd0 --- /dev/null +++ b/include/icode.h @@ -0,0 +1,362 @@ +/***************************************************************************** + * I-code related definitions + * (C) Cristina Cifuentes + ****************************************************************************/ +#pragma once +#include +#include "Enums.h" +//enum condId; + +/* LOW_LEVEL icode flags */ +enum eLLFlags +{ + + B =0x0000001, /* Byte operands (value implicitly used) */ + I =0x0000002, /* Immed. source */ + NOT_HLL =0x0000004, /* Not HLL inst. */ + FLOAT_OP =0x0000008, /* ESC or WAIT */ + SEG_IMMED =0x0000010, /* Number is relocated segment value */ + IMPURE =0x0000020, /* Instruction modifies code */ + WORD_OFF =0x0000040, /* Inst has word offset ie.could be address */ + TERMINATES =0x0000080, /* Instruction terminates program */ + CASE =0x0000100, /* Label as case part of switch */ + SWITCH =0x0000200, /* Treat indirect JMP as switch stmt */ + TARGET =0x0000400, /* Jump target */ + SYNTHETIC =0x0000800, /* Synthetic jump instruction */ + NO_LABEL =0x0001000, /* Immed. jump cannot be linked to a label */ + NO_CODE =0x0002000, /* Hole in Icode array */ + SYM_USE =0x0004000, /* Instruction uses a symbol */ + SYM_DEF =0x0008000, /* Instruction defines a symbol */ + + NO_SRC =0x0010000, /* Opcode takes no source */ + NO_OPS =0x0020000, /* Opcode takes no operands */ + IM_OPS =0x0040000, /* Opcode takes implicit operands */ + SRC_B =0x0080000, /* Source operand is byte (dest is word) */ +#define NO_SRC_B 0xF7FFFF /* Masks off SRC_B */ + HLL_LABEL =0x0100000, /* Icode has a high level language label */ + IM_DST =0x0200000, /* Implicit DST for opcode (SIGNEX) */ + IM_SRC =0x0400000, /* Implicit SRC for opcode (dx:ax) */ + IM_TMP_DST =0x0800000, /* Implicit rTMP DST for opcode (DIV/IDIV) */ + + JMP_ICODE =0x1000000, /* Jmp dest immed.op converted to icode index */ + JX_LOOP =0x2000000, /* Cond jump is part of loop conditional exp */ + REST_STK =0x4000000 /* Stack needs to be restored after CALL */ +}; + +/* Parser flags */ +#define TO_REG 0x000100 /* rm is source */ +#define S 0x000200 /* sign extend */ +#define OP386 0x000400 /* 386 op-code */ +#define NSP 0x000800 /* NOT_HLL if SP is src or dst */ +#define ICODEMASK 0xFF00FF /* Masks off parser flags */ + +/* LOW_LEVEL icode, DU flag bits */ +#define Cf 1 +#define Sf 2 +#define Zf 4 +#define Df 8 + +/* Machine registers */ +#define rAX 1 /* These are numbered relative to real 8086 */ +#define rCX 2 +#define rDX 3 +#define rBX 4 +#define rSP 5 +#define rBP 6 +#define rSI 7 +#define rDI 8 + +#define rES 9 +#define rCS 10 +#define rSS 11 +#define rDS 12 + +#define rAL 13 +#define rCL 14 +#define rDL 15 +#define rBL 16 +#define rAH 17 +#define rCH 18 +#define rDH 19 +#define rBH 20 + +#define rTMP 21 /* temp register for DIV/IDIV/MOD */ +#define INDEXBASE 22 /* Indexed modes go from INDEXBASE to + * INDEXBASE+7 */ +/* Byte and Word registers */ +static const char *const byteReg[9] = {"al", "cl", "dl", "bl", + "ah", "ch", "dh", "bh", "tmp" }; +static const char *const wordReg[21] = {"ax", "cx", "dx", "bx", "sp", "bp", + "si", "di", "es", "cs", "ss", "ds", + "", "", "", "", "", "", "", "", "tmp"}; + +#include "state.h" // State depends on INDEXBASE, but later need STATE + +/* Types of icodes */ +enum icodeType +{ + NOT_SCANNED = 0, /* not even scanned yet */ + LOW_LEVEL, /* low-level icode */ + HIGH_LEVEL /* high-level icode */ +}; + + +/* LOW_LEVEL icode opcodes */ +enum llIcode +{ + iCBW, /* 0 */ + iAAA, + iAAD, + iAAM, + iAAS, + iADC, + iADD, + iAND, + iBOUND, + iCALL, + iCALLF, /* 10 */ + iCLC, + iCLD, + iCLI, + iCMC, + iCMP, + iCMPS, + iREPNE_CMPS, + iREPE_CMPS, + iDAA, + iDAS, /* 20 */ + iDEC, + iDIV, + iENTER, + iESC, + iHLT, + iIDIV, + iIMUL, + iIN, + iINC, + iINS, /* 30 */ + iREP_INS, + iINT, + iIRET, + iJB, + iJBE, + iJAE, + iJA, + iJE, + iJNE, + iJL, /* 40 */ + iJGE, + iJLE, + iJG, + iJS, + iJNS, + iJO, + iJNO, + iJP, + iJNP, + iJCXZ, /* 50 */ + iJMP, + iJMPF, + iLAHF, + iLDS, + iLEA, + iLEAVE, + iLES, + iLOCK, + iLODS, + iREP_LODS, /* 60 */ + iLOOP, + iLOOPE, + iLOOPNE, + iMOV, /* 64 */ + iMOVS, + iREP_MOVS, + iMUL, /* 67 */ + iNEG, + iNOT, + iOR, /* 70 */ + iOUT, + iOUTS, + iREP_OUTS, + iPOP, + iPOPA, + iPOPF, + iPUSH, + iPUSHA, + iPUSHF, + iRCL, /* 80 */ + iRCR, + iROL, + iROR, + iRET, /* 84 */ + iRETF, + iSAHF, + iSAR, + iSHL, + iSHR, + iSBB, /* 90 */ + iSCAS, + iREPNE_SCAS, + iREPE_SCAS, + iSIGNEX, + iSTC, + iSTD, + iSTI, + iSTOS, + iREP_STOS, + iSUB, /* 100 */ + iTEST, + iWAIT, + iXCHG, + iXLAT, + iXOR, + iINTO, + iNOP, + iREPNE, + iREPE, + iMOD /* 110 */ +}; +struct BB; +struct Function; +struct STKFRAME; +/* HIGH_LEVEL icodes opcodes */ +typedef enum { + HLI_ASSIGN, /* := */ + HLI_CALL, /* Call procedure */ + HLI_JCOND, /* Conditional jump */ + HLI_RET, /* Return from procedure */ + /* pseudo high-level icodes */ + HLI_POP, /* Pop expression */ + HLI_PUSH, /* Push expression */ +} hlIcode; + +/* Def/use of flags - low 4 bits represent flags */ +struct DU +{ + byte d; + byte u; +}; + +/* Def/Use of registers and stack variables */ +struct DU_ICODE +{ + dword def; /* For Registers: position in dword is reg index*/ + dword lastDefRegi;/* Bit set if last def of this register in BB */ + dword use; /* For Registers: position in dword is reg index*/ +}; + + +/* Definition-use chain for level 1 (within a basic block) */ +#define MAX_REGS_DEF 2 /* 2 regs def'd for long-reg vars */ +#define MAX_USES 5 + +struct DU1 +{ + Int numRegsDef; /* # registers defined by this inst */ + byte regi[MAX_REGS_DEF]; /* registers defined by this inst */ + Int idx[MAX_REGS_DEF][MAX_USES]; /* inst that uses this def */ +}; + + +/* LOW_LEVEL icode operand record */ +struct ICODEMEM +{ + byte seg; /* CS, DS, ES, SS */ + int16 segValue; /* Value of segment seg during analysis */ + byte segOver; /* CS, DS, ES, SS if segment override */ + byte regi; /* 0 < regs < INDEXBASE <= index modes */ + int16 off; /* memory address offset */ +} ; + + +struct COND_EXPR; +struct HLTYPE +{ + hlIcode opcode; /* hlIcode opcode */ + union { /* different operands */ + struct { /* for HLI_ASSIGN */ + COND_EXPR *lhs; + COND_EXPR *rhs; + } asgn; + COND_EXPR *exp; /* for HLI_JCOND, HLI_RET, HLI_PUSH, HLI_POP*/ + struct { /* for HLI_CALL */ + Function *proc; + STKFRAME *args; /* actual arguments */ + } call; + } oper; /* operand */ +} ; + +typedef struct +{ + llIcode opcode; /* llIcode instruction */ + byte numBytes; /* Number of bytes this instr */ + flags32 flg; /* icode flags */ + dword label; /* offset in image (20-bit adr) */ + ICODEMEM dst; /* destination operand */ + ICODEMEM src; /* source operand */ + union { /* Source operand if (flg & I) */ + dword op; /* idx of immed src op */ + struct { /* Call & # actual arg bytes */ + Function *proc; /* ^ target proc (for CALL(F))*/ + Int cb; /* # actual arg bytes */ + } proc; + } immed; + DU flagDU; /* def/use of flags */ + struct { /* Case table if op==JMP && !I */ + Int numEntries; /* # entries in case table */ + dword *entries; /* array of offsets */ + } caseTbl; + Int hllLabNum; /* label # for hll codegen */ +} LLTYPE; + +/* Icode definition: LOW_LEVEL and HIGH_LEVEL */ +struct ICODE +{ + icodeType type; /* Icode type */ + boolT invalid; /* Has no HIGH_LEVEL equivalent */ + BB *inBB; /* BB to which this icode belongs */ + DU_ICODE du; /* Def/use regs/vars */ + DU1 du1; /* du chain 1 */ + Int codeIdx; /* Index into cCode.code */ + struct IC { /* Different types of icodes */ + LLTYPE ll; + HLTYPE hl; /* For HIGH_LEVEL icodes */ + }; + IC ic;/* intermediate code */ + void writeIntComment(char *s); + void setRegDU(byte regi, operDu du_in); + void invalidate(); + void newCallHl(); + void writeDU(Int idx); + condId idType(opLoc sd); + // HLL setting functions + void setAsgn(COND_EXPR *lhs, COND_EXPR *rhs); // set this icode to be an assign + void setUnary(hlIcode op, COND_EXPR *exp); + void setJCond(COND_EXPR *cexp); + int loc_ip; // used by CICodeRec to number ICODEs +}; + +// This is the icode array object. +// The bulk of this could well be done with a class library +class CIcodeRec : public std::vector +{ +public: + CIcodeRec(); // Constructor + ~CIcodeRec(); // Destructor + + ICODE * addIcode(ICODE *pIcode); + ICODE * GetFirstIcode(); + // ICODE * GetNextIcode(ICODE * pCurIcode); + boolT IsValid(ICODE * pCurIcode); + int GetNumIcodes(); + void SetInBB(int start, int end, BB* pnewBB); + void SetImmediateOp(int ip, dword dw); + void SetLlFlag(int ip, dword flag); + void ClearLlFlag(int ip, dword flag); + dword GetLlFlag(int ip); + void SetLlInvalid(int ip, boolT fInv); + dword GetLlLabel(int ip); + llIcode GetLlOpcode(int ip); + boolT labelSrch(dword target, Int *pIndex); + ICODE * GetIcode(int ip); +}; diff --git a/include/locident.h b/include/locident.h new file mode 100644 index 0000000..542c0af --- /dev/null +++ b/include/locident.h @@ -0,0 +1,130 @@ +/* + * File: locIdent.h + * Purpose: High-level local identifier definitions + * Date: October 1993 + * (C) Cristina Cifuentes + */ + +#pragma once +#include +#include +/* Type definition */ +struct IDX_ARRAY : public std::vector +{ + bool inList(int idx) + { + return std::find(begin(),end(),idx)!=end(); + } +}; +/* Type definitions used in the decompiled program */ +typedef enum { + TYPE_UNKNOWN = 0, /* unknown so far */ + TYPE_BYTE_SIGN, /* signed byte (8 bits) */ + TYPE_BYTE_UNSIGN, /* unsigned byte */ + TYPE_WORD_SIGN, /* signed word (16 bits) */ + TYPE_WORD_UNSIGN, /* unsigned word (16 bits) */ + TYPE_LONG_SIGN, /* signed long (32 bits) */ + TYPE_LONG_UNSIGN, /* unsigned long (32 bits) */ + TYPE_RECORD, /* record structure */ + TYPE_PTR, /* pointer (32 bit ptr) */ + TYPE_STR, /* string */ + TYPE_CONST, /* constant (any type) */ + TYPE_FLOAT, /* floating point */ + TYPE_DOUBLE /* double precision float */ +} hlType; + +static const char *hlTypes[13] = {"", "char", "unsigned char", "int", "unsigned int", + "long", "unsigned long", "record", "int *", "char *", + "", "float", "double"}; + +typedef enum +{ + STK_FRAME, /* For stack vars */ + REG_FRAME, /* For register variables */ + GLB_FRAME /* For globals */ +} frameType; + +typedef struct +{ + int16 seg; /* segment value */ + int16 off; /* offset */ + byte regi; /* optional indexed register */ +} BWGLB_TYPE; + + +typedef struct +{ /* For TYPE_LONG_(UN)SIGN on the stack */ + Int offH; /* high offset from BP */ + Int offL; /* low offset from BP */ +} LONG_STKID_TYPE; +typedef struct +{ /* For TYPE_LONG_(UN)SIGN registers */ + byte h; /* high register */ + byte l; /* low register */ +} LONGID_TYPE; + + +/* ID, LOCAL_ID */ +struct ID +{ + hlType type; /* Probable type */ + boolT illegal;/* Boolean: not a valid field any more */ + IDX_ARRAY idx; /* Index into icode array (REG_FRAME only) */ + frameType loc; /* Frame location */ + boolT hasMacro;/* Identifier requires a macro */ + char macro[10];/* Macro for this identifier */ + char name[20];/* Identifier's name */ + union { /* Different types of identifiers */ + byte regi; /* For TYPE_BYTE(WORD)_(UN)SIGN registers */ + struct { /* For TYPE_BYTE(WORD)_(UN)SIGN on the stack */ + byte regOff; /* register offset (if any) */ + Int off; /* offset from BP */ + } bwId; + BWGLB_TYPE bwGlb; /* For TYPE_BYTE(WORD)_(UN)SIGN globals */ + LONGID_TYPE longId; /* For TYPE_LONG_(UN)SIGN registers */ + LONG_STKID_TYPE longStkId;/* For TYPE_LONG_(UN)SIGN on the stack */ + struct { /* For TYPE_LONG_(UN)SIGN globals */ + int16 seg; /* segment value */ + int16 offH; /* offset high */ + int16 offL; /* offset low */ + byte regi; /* optional indexed register */ + } longGlb; + struct { /* For TYPE_LONG_(UN)SIGN constants */ + dword h; /* high word */ + dword l; /* low word */ + } longKte; + } id; + ID() + { + memset(this,0,sizeof(ID)); + } + ID(hlType t, frameType f) + { + memset(this,0,sizeof(ID)); + type=t; + loc=f; + } +}; + +struct LOCAL_ID +{ + std::vector id_arr; +public: + LOCAL_ID() + {} + Int newByteWordReg(hlType t, byte regi); + Int newByteWordStk(hlType t, Int off, byte regOff); + Int newIntIdx(int16 seg, int16 off, byte regi, Int ix, hlType t); + Int newLongReg(hlType t, byte regH, byte regL, Int ix); + Int newLong(opLoc sd, ICODE *pIcode, hlFirst f, Int ix, operDu du, Int off); + void newIdent(hlType t, frameType f); + void flagByteWordId(Int off); + void propLongId(byte regL, byte regH, const char *name); + size_t csym() const {return id_arr.size();} +protected: + Int newLongIdx(int16 seg, int16 offH, int16 offL, byte regi, Int ix, hlType t); + Int newLongGlb(int16 seg, int16 offH, int16 offL, Int ix, hlType t); + Int newLongStk(hlType t, Int offH, Int offL); +}; + + diff --git a/include/perfhlib.h b/include/perfhlib.h new file mode 100644 index 0000000..4fb0d03 --- /dev/null +++ b/include/perfhlib.h @@ -0,0 +1,34 @@ +/* Perfect hashing function library. Contains functions to generate perfect + hashing functions + * (C) Mike van Emmerik + */ + + +#define TRUE 1 +#define FALSE 0 +#define bool unsigned char +#define byte unsigned char +#define word unsigned short + +/* Prototypes */ +void hashParams(int NumEntry, int EntryLen, int SetSize, char SetMin, + int NumVert); /* Set the parameters for the hash table */ +void hashCleanup(void); /* Frees memory allocated by hashParams() */ +void map(void); /* Part 1 of creating the tables */ +void assign(void); /* Part 2 of creating the tables */ +int hash(byte *s); /* Hash the string to an int 0 .. NUMENTRY-1 */ + +word *readT1(void); /* Returns a pointer to the T1 table */ +word *readT2(void); /* Returns a pointer to the T2 table */ +word *readG(void); /* Returns a pointer to the g table */ + + +/* The application must provide these functions: */ +void getKey(int i, byte **pKeys);/* Set *keys to point to the i+1th key */ +void dispKey(int i); /* Display the key */ + + +/* Macro reads a LH word from the image regardless of host convention */ +#ifndef LH +#define LH(p) ((int)((byte *)(p))[0] + ((int)((byte *)(p))[1] << 8)) +#endif diff --git a/include/scanner.h b/include/scanner.h new file mode 100644 index 0000000..94a8078 --- /dev/null +++ b/include/scanner.h @@ -0,0 +1,38 @@ +/* Scanner functions + * (C) Cristina Cifuentes, Jeff Ledermann + */ + +#define LH(p) ((int)((byte *)(p))[0] + ((int)((byte *)(p))[1] << 8)) + +static void rm(Int i); +static void modrm(Int i); +static void segrm(Int i); +static void data1(Int i); +static void data2(Int i); +static void regop(Int i); +static void segop(Int i); +static void strop(Int i); +static void escop(Int i); +static void axImp(Int i); +static void alImp(Int i); +static void axSrcIm(Int i); +static void memImp(Int i); +static void memReg0(Int i); +static void memOnly(Int i); +static void dispM(Int i); +static void dispS(Int i); +static void dispN(Int i); +static void dispF(Int i); +static void prefix(Int i); +static void immed(Int i); +static void shift(Int i); +static void arith(Int i); +static void trans(Int i); +static void const1(Int i); +static void const3(Int i); +static void none1(Int i); +static void none2(Int i); +static void checkInt(Int i); + +/* Extracts reg bits from middle of mod-reg-rm byte */ +#define REG(x) ((byte)(x & 0x38) >> 3) diff --git a/include/state.h b/include/state.h new file mode 100644 index 0000000..d44832c --- /dev/null +++ b/include/state.h @@ -0,0 +1,28 @@ +/**************************************************************************** + * dcc project header + * (C) Cristina Cifuentes, Mike van Emmerik + ****************************************************************************/ + +/* STATE TABLE */ +struct STATE +{ + dword IP; /* Offset into Image */ + int16 r[INDEXBASE]; /* Value of segs and AX */ + byte f[INDEXBASE]; /* True if r[.] has a value */ + struct + { /* For case stmt indexed reg */ + byte regi; /* Last conditional jump */ + int16 immed; /* Contents of the previous register */ + } JCond; + void setState(word reg, int16 value); +public: + void checkStartup(); + STATE() : IP(0) + { + JCond.immed=0; + memset(r,0,sizeof(int16)*INDEXBASE); + memset(f,0,sizeof(byte)*INDEXBASE); + } +}; + + diff --git a/include/symtab.h b/include/symtab.h new file mode 100644 index 0000000..1e44b2a --- /dev/null +++ b/include/symtab.h @@ -0,0 +1,50 @@ +/* + * Symbol table prototypes + * (C) Mike van Emmerik +*/ +#pragma once +/* * * * * * * * * * * * * * * * * */ +/* Symbol table structs and protos */ +/* * * * * * * * * * * * * * * * * */ +struct Function; +struct SYMTABLE +{ + std::string pSymName; /* Ptr to symbolic name or comment */ + dword symOff; /* Symbol image offset */ + Function *symProc; /* Procedure pointer */ + word preHash; /* Hash value before the modulo */ + word postHash; /* Hash value after the modulo */ + word nextOvf; /* Next entry this hash bucket, or -1 */ + word prevOvf; /* Back link in Ovf chain */ + SYMTABLE() : symOff(0),symProc(0) {} + SYMTABLE(dword _sym,Function *_proc) : symOff(_sym),symProc(_proc) + {} + bool operator == (const SYMTABLE &other) const + { + // does not yse pSymName, to ease finding by symOff/symProc combo + // in map + return (symOff==other.symOff) && symProc==(other.symProc); + } +}; + +enum tableType /* The table types */ +{ + Label=0, /* The label table */ + Comment, /* The comment table */ + NUM_TABLE_TYPES /* Number of entries: must be last */ +}; + +void createSymTables(void); +void destroySymTables(void); +void enterSym(char *symName, dword symOff, Function *symProc, boolT bSymToo); +boolT readSym (char *symName, dword *pSymOff, Function **pSymProc); +boolT readVal (char *symName, dword symOff, Function *symProc); +void deleteSym(char *symName); +void deleteVal(dword symOff, Function * symProc, boolT bSymToo); +std::string findVal(dword symOff, Function * symProc, word *pIndex); +word symHash(char *name, word *pre); +word valHash(dword off, Function * proc, word *pre); +void selectTable(tableType); /* Select a particular table */ + +char *addStrTbl(char *pStr); /* Add string to string table */ + diff --git a/include/types.h b/include/types.h new file mode 100644 index 0000000..a252184 --- /dev/null +++ b/include/types.h @@ -0,0 +1,80 @@ +/**************************************************************************** + * dcc project general header + * (C) Cristina Cifuentes, Mike van Emmerik + ****************************************************************************/ +#pragma once +#include +/**** Common definitions and macros ****/ +#ifdef __MSDOS__ /* Intel: 16 bit integer */ +typedef long Int; /* Int: 0x80000000..0x7FFFFFFF */ +typedef unsigned long flags32; /* 32 bits */ +typedef unsigned long dword; /* 32 bits */ +#define MAX 0x7FFFFFFF +#else /* Unix: 32 bit integer */ +typedef int Int; /* Int: 0x80000000..0x7FFFFFFF */ +typedef unsigned int flags32; /* 32 bits */ +typedef unsigned int dword; /* 32 bits */ +#define MAX 0x7FFFFFFF +#endif + +/* Type definitions used in the program */ +typedef unsigned char byte; /* 8 bits */ +typedef unsigned short word;/* 16 bits */ +typedef short int16; /* 16 bits */ +typedef unsigned char boolT; /* 8 bits */ + +#if defined(__MSDOS__) | defined(WIN32) +#define unlink _unlink // Compiler is picky about non Ansi names +#endif + + +#define TRUE 1 +#define FALSE 0 + +#define SYNTHESIZED_MIN 0x100000 /* Synthesized labs use bits 21..32 */ + +/* These are for C library signature detection */ +#define SYMLEN 16 /* Length of proc symbols, incl null */ +#define PATLEN 23 /* Length of proc patterns */ +#define WILD 0xF4 /* The wild byte */ + +/****** MACROS *******/ + +/* Macro reads a LH word from the image regardless of host convention */ +/* Returns a 16 bit quantity, e.g. C000 is read into an Int as C000 */ +//#define LH(p) ((int16)((byte *)(p))[0] + ((int16)((byte *)(p))[1] << 8)) +#define LH(p) ((word)((byte *)(p))[0] + ((word)((byte *)(p))[1] << 8)) + +/* Macro reads a LH word from the image regardless of host convention */ +/* Returns a signed quantity, e.g. C000 is read into an Int as FFFFC000 */ +#define LHS(p) (((byte *)(p))[0] + (((char *)(p))[1] << 8)) + +/* Macro tests bit b for type t in prog.map */ +#define BITMAP(b, t) (prog.map[(b) >> 2] & ((t) << (((b) & 3) << 1))) + +/* Macro to convert a segment, offset definition into a 20 bit address */ +#define opAdr(seg,off) ((seg << 4) + off) + +/* duVal FLAGS */ +struct eDuVal +{ + enum flgs + { + DEF=1, + USE=2, + VAL=4 + }; + int def :1; /* Variable was first defined than used */ + int use :1; /* Variable was first used than defined */ + int val :1; /* Variable has an initial value. 2 cases: + * 1. When variable is used first (ie. global) + * 2. When a value is moved into the variable + * for the first time. */ + void setFlags(uint16_t x) + { + def = x&DEF; + use = x&USE; + val = x&VAL; + } + bool isUSE_VAL() {return use&&val;} /* Use and Val */ +}; diff --git a/regression_tester.rb b/regression_tester.rb new file mode 100755 index 0000000..425b358 --- /dev/null +++ b/regression_tester.rb @@ -0,0 +1,31 @@ +#!/usr/bin/env ruby +require 'fileutils' +print("Regression tester 0.0.1\n") +def path_local(from) + + return from #from.gsub('/','//') + from.gsub('/','\\\\') +end +TESTS_DIR="./tests" +def perform_test(exepath,filepath,outname) + output_path=path_local(TESTS_DIR+"/outputs/"+outname) + exepath=path_local(exepath) + output_path=path_local(output_path) + filepath=path_local(filepath) + printf("calling:" + "#{exepath} -a1 -o#{output_path}.a1 #{filepath}\n") + result = `#{exepath} -a1 -o#{output_path}.a1 #{filepath}` + result = `#{exepath} -a2 -o#{output_path}.a2 #{filepath}` + puts result + p $? +end +`rm -rf #{TESTS_DIR}/outputs/*.*` +#exit(1) +Dir.open(TESTS_DIR+"/inputs").each() {|f| + next if f=="." or f==".." + perform_test(".//"+ARGV[0],TESTS_DIR+"/inputs/"+f,f) +} +Dir.open(TESTS_DIR+"/inputs").each() {|f| + next if f=="." or f==".." + FileUtils.mv(TESTS_DIR+"/inputs/"+f,TESTS_DIR+"/outputs/"+f) if f.end_with?(".b") +} +"diff -rqbwB" \ No newline at end of file diff --git a/src/BasicBlock.cpp b/src/BasicBlock.cpp new file mode 100644 index 0000000..7bc1754 --- /dev/null +++ b/src/BasicBlock.cpp @@ -0,0 +1,38 @@ +#include "BasicBlock.h" +#include "Procedure.h" +#include "dcc.h" +BB *BB::Create(void *ctx, const std::string &s, Function *parent, BB *insertBefore) +{ + return new BB; +} + +BB *BB::Create(Int start, Int ip, byte nodeType, Int numOutEdges, Function *parent) +{ + parent->cfg; + BB* pnewBB; + + pnewBB = new BB; + pnewBB->nodeType = nodeType; /* Initialise */ + pnewBB->start = start; + pnewBB->length = ip - start + 1; + pnewBB->numOutEdges = (byte)numOutEdges; + pnewBB->immedDom = NO_DOM; + pnewBB->loopHead = pnewBB->caseHead = pnewBB->caseTail = + pnewBB->latchNode= pnewBB->loopFollow = NO_NODE; + + if (numOutEdges) + pnewBB->edges.resize(numOutEdges); + + /* Mark the basic block to which the icodes belong to, but only for + * real code basic blocks (ie. not interval bbs) */ + if(parent) + { + if (start >= 0) + parent->Icode.SetInBB(start, ip, pnewBB); + parent->heldBBs.push_back(pnewBB); + parent->cfg.push_back(pnewBB); + } + if (start != -1) /* Only for code BB's */ + stats.numBBbef++; + return pnewBB; +} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..82434c6 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,27 @@ +SET(SOURCES ast.cpp backend.cpp bundle.cpp chklib.cpp +comwrite.cpp control.cpp dataflow.cpp dcc.cpp +disassem.cpp error.cpp fixwild.cpp frontend.cpp +graph.cpp hlicode.cpp icode.cpp +idioms.cpp locident.cpp parser.cpp +perfhlib.cpp procs.cpp proplong.cpp reducible.cpp +scanner.cpp symtab.cpp udm.cpp) +SET(dc_INCLUDES +${PROJECT_SOURCE_DIR}/include/ast.h +${PROJECT_SOURCE_DIR}/include/bundle.h +${PROJECT_SOURCE_DIR}/include/dcc.h +${PROJECT_SOURCE_DIR}/include/disassem.h +${PROJECT_SOURCE_DIR}/include/dosdcc.h +${PROJECT_SOURCE_DIR}/include/error.h +${PROJECT_SOURCE_DIR}/include/graph.h +${PROJECT_SOURCE_DIR}/include/hlicode.h +${PROJECT_SOURCE_DIR}/include/icode.h +${PROJECT_SOURCE_DIR}/include/locident.h +${PROJECT_SOURCE_DIR}/include/perfhlib.h +${PROJECT_SOURCE_DIR}/include/scanner.h +${PROJECT_SOURCE_DIR}/include/state.h +${PROJECT_SOURCE_DIR}/include/symtab.h +${PROJECT_SOURCE_DIR}/include/types.h +) +SOURCE_GROUP(Source FILES ${SOURCES}) +SOURCE_GROUP(Headers FILES ${dc_INCLUDES}) +ADD_EXECUTABLE(dcc_oo ${SOURCES} ${dc_INCLUDES}) \ No newline at end of file diff --git a/src/ast.cpp b/src/ast.cpp new file mode 100644 index 0000000..37f2bcc --- /dev/null +++ b/src/ast.cpp @@ -0,0 +1,995 @@ +/* + * File: ast.c + * Purpose: Support module for abstract syntax trees. + * Date: September 1993 + * (C) Cristina Cifuentes + */ +#include +#include /* For free() */ +#include +#include +#include +#include +#include "types.h" +#include "dcc.h" +using namespace std; +/* Index registers **** temp solution */ +static const char *idxReg[8] = {"bx+si", "bx+di", "bp+si", "bp+di", + "si", "di", "bp", "bx" }; +/* Conditional operator symbols in C. Index by condOp enumeration type */ +static const char *condOpSym[] = { " <= ", " < ", " == ", " != ", " > ", " >= ", + " & ", " | ", " ^ ", " ~ ", + " + ", " - ", " * ", " / ", + " >> ", " << ", " % ", " && ", " || " }; + +#define EXP_SIZE 200 /* Size of the expression buffer */ + +/* Local expression stack */ +//typedef struct _EXP_STK { +// COND_EXPR *exp; +// struct _EXP_STK *next; +//} EXP_STK; +typedef std::list EXP_STK; +static EXP_STK expStk; /* local expression stack */ + + +/* Returns the integer i in C hexadecimal format */ +static char *hexStr (uint16_t i) +{ + static char buf[10]; + // i &= 0xFFFF; + sprintf (buf, "%s%x", (i > 9) ? "0x" : "", i); + return (buf); +} + + +/* Sets the du record for registers according to the du flag */ +void ICODE::setRegDU (byte regi, operDu du_in) +{ + // printf("%s %d %x\n",__FUNCTION__,regi,int(du_in)); + switch (du_in) + { + case eDEF: + du.def |= duReg[regi]; + du1.numRegsDef++; + printf("%s du.def |= %x\n",__FUNCTION__,duReg[regi]); + break; + case eUSE: + du.use |= duReg[regi]; + printf("%s du.use |= %x\n",__FUNCTION__,duReg[regi]); + break; + case USE_DEF: + du.def |= duReg[regi]; + du1.numRegsDef++; + printf("%s du.def |= %x\n",__FUNCTION__,duReg[regi]); + printf("%s du.use |= %x\n",__FUNCTION__,duReg[regi]); + du.use |= duReg[regi]; + break; + case NONE: /* do nothing */ + break; + } +} + + +/* Copies the def, use, or def and use fields of duIcode into pIcode */ +void copyDU (ICODE *pIcode, const ICODE *duIcode, operDu du, operDu duDu) +{ + // printf("%s %d,%d from %d to %d\n",__FUNCTION__,int(du),int(duDu),duIcode->ic.ll.opcode,pIcode->ic.ll.opcode); + switch (du) { + case eDEF: + if (duDu == eDEF) + pIcode->du.def=duIcode->du.def; + else + pIcode->du.def=duIcode->du.use; + break; + case eUSE: + if (duDu == eDEF) + pIcode->du.use=duIcode->du.def; + else + pIcode->du.use =duIcode->du.use; + break; + case USE_DEF: + pIcode->du = duIcode->du; + break; + case NONE: + assert(false); + break; + } + printf("%s end: %x,%x\n",__FUNCTION__,pIcode->du.def,pIcode->du.use); +} + + +/* Creates a newExp conditional expression node of type t and returns it */ +static COND_EXPR *newCondExp (condNodeType t) +{ + //printf("%s:%d\n",__FUNCTION__,int(t)); + + COND_EXPR *newExp; + + newExp = new COND_EXPR; + //memset(newExp, 0, sizeof(COND_EXPR)); + newExp->type = t; + return (newExp); +} + + +/* Creates a conditional boolean expression and returns it */ +COND_EXPR *COND_EXPR::boolOp(COND_EXPR *lhs, COND_EXPR *rhs, condOp op) +{ + //printf("%s:%d\n",__FUNCTION__,int(op)); + COND_EXPR *newExp; + + newExp = newCondExp (BOOLEAN_OP); + newExp->expr.boolExpr.op = op; + newExp->expr.boolExpr.lhs = lhs; + newExp->expr.boolExpr.rhs = rhs; + return (newExp); +} + + +/* Returns a unary conditional expression node. This procedure should + * only be used with the following conditional node types: NEGATION, + * ADDRESSOF, DEREFERENCE, POST_INC, POST_DEC, PRE_INC, PRE_DEC */ +COND_EXPR *COND_EXPR::unary(condNodeType t, COND_EXPR *sub_expr) +{ + COND_EXPR *newExp; + + newExp = newCondExp (t); + newExp->expr.unaryExp = sub_expr; + return (newExp); +} + + +/* Returns an identifier conditional expression node of type GLOB_VAR */ +COND_EXPR *COND_EXPR::idGlob (int16 segValue, int16 off) +{ + COND_EXPR *newExp; + dword adr; + Int i; + + newExp = newCondExp (IDENTIFIER); + newExp->expr.ident.idType = GLOB_VAR; + adr = opAdr(segValue, off); + for (i = 0; i < symtab.csym; i++) + if (symtab.sym[i].label == adr) + break; + if (i == symtab.csym) + printf ("Error, glob var not found in symtab\n"); + newExp->expr.ident.idNode.globIdx = i; + return (newExp); +} + + +/* Returns an identifier conditional expression node of type REGISTER */ +COND_EXPR *COND_EXPR::idReg(byte regi, flags32 icodeFlg, LOCAL_ID *locsym) +{ + COND_EXPR *newExp; + + newExp = newCondExp (IDENTIFIER); + newExp->expr.ident.idType = REGISTER; + if ((icodeFlg & B) || (icodeFlg & SRC_B)) + { + newExp->expr.ident.idNode.regiIdx = locsym->newByteWordReg(TYPE_BYTE_SIGN, regi); + newExp->expr.ident.regiType = BYTE_REG; + } + else /* word */ + { + newExp->expr.ident.idNode.regiIdx = locsym->newByteWordReg( TYPE_WORD_SIGN, regi); + newExp->expr.ident.regiType = WORD_REG; + } + return (newExp); +} + + +/* Returns an identifier conditional expression node of type REGISTER */ +COND_EXPR *COND_EXPR::idRegIdx(Int idx, regType reg_type) +{ + COND_EXPR *newExp; + + newExp = newCondExp (IDENTIFIER); + newExp->expr.ident.idType = REGISTER; + newExp->expr.ident.regiType = reg_type; + newExp->expr.ident.idNode.regiIdx = idx; + return (newExp); +} + +/* Returns an identifier conditional expression node of type LOCAL_VAR */ +COND_EXPR *COND_EXPR::idLoc(Int off, LOCAL_ID *localId) +{ + COND_EXPR *newExp; + size_t i; + + newExp = newCondExp (IDENTIFIER); + newExp->expr.ident.idType = LOCAL_VAR; + for (i = 0; i < localId->csym(); i++) + if ((localId->id_arr[i].id.bwId.off == off) && + (localId->id_arr[i].id.bwId.regOff == 0)) + break; + if (i == localId->csym()) + printf ("Error, cannot find local var\n"); + newExp->expr.ident.idNode.localIdx = i; + sprintf (localId->id_arr[i].name, "loc%ld", i); + return (newExp); +} + + +/* Returns an identifier conditional expression node of type PARAM */ +COND_EXPR *COND_EXPR::idParam(Int off, const STKFRAME * argSymtab) +{ + COND_EXPR *newExp; + size_t i; + + newExp = newCondExp (IDENTIFIER); + newExp->expr.ident.idType = PARAM; + for (i = 0; i < argSymtab->sym.size(); i++) + if (argSymtab->sym[i].off == off) + break; + if (i == argSymtab->sym.size()) printf ("Error, cannot find argument var\n"); + newExp->expr.ident.idNode.localIdx = i; + return (newExp); +} + + +/* Returns an identifier conditional expression node of type GLOB_VAR_IDX. + * This global variable is indexed by regi. */ +COND_EXPR *idCondExpIdxGlob (int16 segValue, int16 off, byte regi, const LOCAL_ID *locSym) +{ + COND_EXPR *newExp; + size_t i; + + newExp = newCondExp (IDENTIFIER); + newExp->expr.ident.idType = GLOB_VAR_IDX; + for (i = 0; i < locSym->csym(); i++) + if ((locSym->id_arr[i].id.bwGlb.seg == segValue) && + (locSym->id_arr[i].id.bwGlb.off == off) && + (locSym->id_arr[i].id.bwGlb.regi == regi)) + break; + if (i == locSym->csym()) + printf ("Error, indexed-glob var not found in local id table\n"); + newExp->expr.ident.idNode.idxGlbIdx = i; + return (newExp); +} + + +/* Returns an identifier conditional expression node of type CONSTANT */ +COND_EXPR *COND_EXPR::idKte(dword kte, byte size) +{ + COND_EXPR *newExp; + + newExp = newCondExp (IDENTIFIER); + newExp->expr.ident.idType = CONSTANT; + newExp->expr.ident.idNode.kte.kte = kte; + newExp->expr.ident.idNode.kte.size = size; + return (newExp); +} + + +/* Returns an identifier conditional expression node of type LONG_VAR, + * that points to the given index idx. */ +COND_EXPR *COND_EXPR::idLongIdx (Int idx) +{ + COND_EXPR *newExp; + + newExp = newCondExp (IDENTIFIER); + newExp->expr.ident.idType = LONG_VAR; + newExp->expr.ident.idNode.longIdx = idx; + return (newExp); +} + + +/* Returns an identifier conditional expression node of type LONG_VAR */ +COND_EXPR *COND_EXPR::idLong(LOCAL_ID *localId, opLoc sd, ICODE *pIcode, hlFirst f, Int ix, operDu du, Int off) +{ + COND_EXPR *newExp; + Int idx; + + newExp = newCondExp (IDENTIFIER); + + /* Check for long constant and save it as a constant expression */ + if ((sd == SRC) && ((pIcode->ic.ll.flg & I) == I)) /* constant */ + { + newExp->expr.ident.idType = CONSTANT; + if (f == HIGH_FIRST) + newExp->expr.ident.idNode.kte.kte = (pIcode->ic.ll.immed.op << 16) + + (pIcode+off)->ic.ll.immed.op; + else /* LOW_FIRST */ + newExp->expr.ident.idNode.kte.kte = + ((pIcode+off)->ic.ll.immed.op << 16)+ pIcode->ic.ll.immed.op; + newExp->expr.ident.idNode.kte.size = 4; + } + /* Save it as a long expression (reg, stack or glob) */ + else + { + idx = localId->newLong(sd, pIcode, f, ix, du, off); + newExp->expr.ident.idType = LONG_VAR; + newExp->expr.ident.idNode.longIdx = idx; + } + return (newExp); +} + + +/* Returns an identifier conditional expression node of type FUNCTION */ +COND_EXPR *COND_EXPR::idFunc(Function * pproc, STKFRAME * args) +{ + COND_EXPR *newExp; + + newExp = newCondExp (IDENTIFIER); + newExp->expr.ident.idType = FUNCTION; + newExp->expr.ident.idNode.call.proc = pproc; + newExp->expr.ident.idNode.call.args = args; + return (newExp); +} + + +/* Returns an identifier conditional expression node of type OTHER. + * Temporary solution, should really be encoded as an indexed type (eg. + * arrays). */ +COND_EXPR *COND_EXPR::idOther(byte seg, byte regi, int16 off) +{ + COND_EXPR *newExp; + + newExp = newCondExp (IDENTIFIER); + newExp->expr.ident.idType = OTHER; + newExp->expr.ident.idNode.other.seg = seg; + newExp->expr.ident.idNode.other.regi = regi; + newExp->expr.ident.idNode.other.off = off; + return (newExp); +} + + +/* Returns an identifier conditional expression node of type TYPE_LONG or + * TYPE_WORD_SIGN */ +COND_EXPR *COND_EXPR::idID (const ID *retVal, LOCAL_ID *locsym, Int ix) +{ + COND_EXPR *newExp; + Int idx; + + newExp = newCondExp (IDENTIFIER); + if (retVal->type == TYPE_LONG_SIGN) + { + idx = locsym->newLongReg (TYPE_LONG_SIGN, retVal->id.longId.h,retVal->id.longId.l, ix); + newExp->expr.ident.idType = LONG_VAR; + newExp->expr.ident.idNode.longIdx = idx; + } + else if (retVal->type == TYPE_WORD_SIGN) + { + newExp->expr.ident.idType = REGISTER; + newExp->expr.ident.idNode.regiIdx = locsym->newByteWordReg(TYPE_WORD_SIGN, retVal->id.regi); + newExp->expr.ident.regiType = WORD_REG; + } + return (newExp); +} + + +/* Returns an identifier conditional expression node, according to the given + * type. + * Arguments: i : index into the icode array, used for newLongRegId only. + * duIcode: icode instruction that needs the du set. + * du: operand is defined or used in current instruction. */ +COND_EXPR *COND_EXPR::id(ICODE *pIcode, opLoc sd, Function * pProc, Int i,ICODE *duIcode, operDu du) +{ + COND_EXPR *newExp; + ICODEMEM * pm; + Int idx; /* idx into pIcode->localId table */ + + pm = (sd == SRC) ? &pIcode->ic.ll.src : &pIcode->ic.ll.dst; + + if (((sd == DST) && (pIcode->ic.ll.flg & IM_DST) == IM_DST) || + ((sd == SRC) && (pIcode->ic.ll.flg & IM_SRC)) || + (sd == LHS_OP)) /* for MUL lhs */ + { /* implicit dx:ax */ + idx = pProc->localId.newLongReg (TYPE_LONG_SIGN, rDX, rAX, i); + newExp = COND_EXPR::idLongIdx (idx); + duIcode->setRegDU (rDX, du); + duIcode->setRegDU (rAX, du); + } + + else if ((sd == DST) && (pIcode->ic.ll.flg & IM_TMP_DST) == IM_TMP_DST) + { /* implicit tmp */ + newExp = COND_EXPR::idReg (rTMP, 0, &pProc->localId); + duIcode->setRegDU(rTMP, (operDu)eUSE); + } + + else if ((sd == SRC) && ((pIcode->ic.ll.flg & I) == I)) /* constant */ + newExp = COND_EXPR::idKte (pIcode->ic.ll.immed.op, 2); + + else if (pm->regi == 0) /* global variable */ + newExp = COND_EXPR::idGlob(pm->segValue, pm->off); + + else if (pm->regi < INDEXBASE) /* register */ + { + newExp = COND_EXPR::idReg (pm->regi, (sd == SRC) ? pIcode->ic.ll.flg : + pIcode->ic.ll.flg & NO_SRC_B, &pProc->localId); + duIcode->setRegDU( pm->regi, du); + } + + else if (pm->off) /* offset */ + { + if ((pm->seg == rSS) && (pm->regi == INDEXBASE + 6)) /* idx on bp */ + { + if (pm->off >= 0) /* argument */ + newExp = COND_EXPR::idParam (pm->off, &pProc->args); + else /* local variable */ + newExp = COND_EXPR::idLoc (pm->off, &pProc->localId); + } + else if ((pm->seg == rDS) && (pm->regi == INDEXBASE + 7)) /* bx */ + { + if (pm->off > 0) /* global variable */ + newExp = idCondExpIdxGlob (pm->segValue, pm->off, rBX,&pProc->localId); + else + newExp = COND_EXPR::idOther (pm->seg, pm->regi, pm->off); + duIcode->setRegDU( rBX, eUSE); + } + else /* idx <> bp, bx */ + newExp = COND_EXPR::idOther (pm->seg, pm->regi, pm->off); + /**** check long ops, indexed global var *****/ + } + + else /* (pm->regi >= INDEXBASE && pm->off = 0) => indexed && no off */ + { + if ((pm->seg == rDS) && (pm->regi > INDEXBASE + 3)) /* dereference */ + { + switch (pm->regi) { + case INDEXBASE + 4: newExp = COND_EXPR::idReg(rSI, 0, &pProc->localId); + duIcode->setRegDU( rSI, du); + break; + case INDEXBASE + 5: newExp = COND_EXPR::idReg(rDI, 0, &pProc->localId); + duIcode->setRegDU( rDI, du); + break; + case INDEXBASE + 6: newExp = COND_EXPR::idReg(rBP, 0, &pProc->localId); + break; + case INDEXBASE + 7: newExp = COND_EXPR::idReg(rBX, 0, &pProc->localId); + duIcode->setRegDU( rBX, du); + break; + default: + newExp = 0; + assert(false); + } + newExp = COND_EXPR::unary (DEREFERENCE, newExp); + } + else + newExp = COND_EXPR::idOther (pm->seg, pm->regi, 0); + } + + return (newExp); +} + + +/* Returns the identifier type */ +condId ICODE::idType(opLoc sd) +{ + ICODEMEM *pm; + + pm = (sd == SRC) ? &ic.ll.src : &ic.ll.dst; + + if ((sd == SRC) && ((ic.ll.flg & I) == I)) + return (CONSTANT); + else if (pm->regi == 0) + return (GLOB_VAR); + else if (pm->regi < INDEXBASE) + return (REGISTER); + else if ((pm->seg == rSS) && (pm->regi == INDEXBASE)) + { + if (pm->off >= 0) + return (PARAM); + else + return (LOCAL_VAR); + } + else + return (OTHER); +} + + +/* Size of hl types */ +Int hlSize[] = {2, 1, 1, 2, 2, 4, 4, 4, 2, 2, 1, 4, 4}; + + +/* Returns the type of the expression */ +Int hlTypeSize (const COND_EXPR *expr, Function * pproc) +{ + Int first, second; + + if (expr == NULL) + return (2); /* for TYPE_UNKNOWN */ + + switch (expr->type) { + case BOOLEAN_OP: + first = hlTypeSize (expr->expr.boolExpr.lhs, pproc); + second = hlTypeSize (expr->expr.boolExpr.rhs, pproc); + if (first > second) + return (first); + else + return (second); + + case NEGATION: case ADDRESSOF: + case POST_INC: case POST_DEC: + case PRE_INC: case PRE_DEC: + case DEREFERENCE: return (hlTypeSize (expr->expr.unaryExp, pproc)); + + case IDENTIFIER: + switch (expr->expr.ident.idType) + { + case GLOB_VAR: + return (symtab.sym[expr->expr.ident.idNode.globIdx].size); + case REGISTER: + if (expr->expr.ident.regiType == BYTE_REG) + return (1); + else + return (2); + case LOCAL_VAR: + return (hlSize[pproc->localId.id_arr[expr->expr.ident.idNode.localIdx].type]); + case PARAM: + return (hlSize[pproc->args.sym[expr->expr.ident.idNode.paramIdx].type]); + case GLOB_VAR_IDX: + return (hlSize[pproc->localId.id_arr[expr->expr.ident.idNode.idxGlbIdx].type]); + case CONSTANT: + return (expr->expr.ident.idNode.kte.size); + case STRING: + return (2); + case LONG_VAR: + return (4); + case FUNCTION: + return (hlSize[expr->expr.ident.idNode.call.proc->retVal.type]); + case OTHER: + return (2); + } /* eos */ + break; + } + return 2; // CC: is this correct? +} + + +/* Returns the type of the expression */ +hlType expType (const COND_EXPR *expr, Function * pproc) +{ + hlType first, second; + + if (expr == NULL) + return (TYPE_UNKNOWN); + + switch (expr->type) + { + case BOOLEAN_OP: + first = expType (expr->expr.boolExpr.lhs, pproc); + second = expType (expr->expr.boolExpr.rhs, pproc); + if (first != second) + { + if (hlTypeSize (expr->expr.boolExpr.lhs, pproc) > + hlTypeSize (expr->expr.boolExpr.rhs, pproc)) + return (first); + else + return (second); + } + else + return (first); + + case POST_INC: case POST_DEC: + case PRE_INC: case PRE_DEC: + case NEGATION: return (expType (expr->expr.unaryExp, pproc)); + + case ADDRESSOF: return (TYPE_PTR); /***????****/ + case DEREFERENCE: return (TYPE_PTR); + case IDENTIFIER: + switch (expr->expr.ident.idType) + { + case GLOB_VAR: + return (symtab.sym[expr->expr.ident.idNode.globIdx].type); + case REGISTER: + if (expr->expr.ident.regiType == BYTE_REG) + return (TYPE_BYTE_SIGN); + else + return (TYPE_WORD_SIGN); + case LOCAL_VAR: + return (pproc->localId.id_arr[expr->expr.ident.idNode.localIdx].type); + case PARAM: + return (pproc->args.sym[expr->expr.ident.idNode.paramIdx].type); + case GLOB_VAR_IDX: + return (pproc->localId.id_arr[expr->expr.ident.idNode.idxGlbIdx].type); + case CONSTANT: + return (TYPE_CONST); + case STRING: + return (TYPE_STR); + case LONG_VAR: + return (pproc->localId.id_arr[expr->expr.ident.idNode.longIdx].type); + case FUNCTION: + return (expr->expr.ident.idNode.call.proc->retVal.type); + case OTHER: + return (TYPE_UNKNOWN); + } /* eos */ + case UNKNOWN_OP: + assert(false); + return (TYPE_UNKNOWN); + } + return TYPE_UNKNOWN; // CC: Correct? +} + + +/* Removes the register from the tree. If the register was part of a long + * register (eg. dx:ax), the node gets transformed into an integer register + * node. */ +void removeRegFromLong (byte regi, LOCAL_ID *locId, COND_EXPR *tree) +{ + IDENTTYPE* ident; /* ptr to an identifier */ + byte otherRegi; /* high or low part of long register */ + + switch (tree->type) { + case BOOLEAN_OP: + break; + case POST_INC: case POST_DEC: + case PRE_INC: case PRE_DEC: + case NEGATION: case ADDRESSOF: + case DEREFERENCE: + break; + case IDENTIFIER: + ident = &tree->expr.ident; + if (ident->idType == LONG_VAR) + { + otherRegi = otherLongRegi (regi, ident->idNode.longIdx, locId); + ident->idType = REGISTER; + ident->regiType = WORD_REG; + ident->idNode.regiIdx = locId->newByteWordReg(TYPE_WORD_SIGN,otherRegi); + } + break; + } +} + + +/* Returns the string located in image, formatted in C format. */ +static std::string getString (Int offset) +{ + ostringstream o; + Int strLen, i; + + strLen = strSize (&prog.Image[offset], '\0'); + o << '"'; + for (i = 0; i < strLen; i++) + o<type) + { + case BOOLEAN_OP: + outStr << "("; + outStr << walkCondExpr(expr->expr.boolExpr.lhs, pProc, numLoc); + outStr << condOpSym[expr->expr.boolExpr.op]; + outStr << walkCondExpr(expr->expr.boolExpr.rhs, pProc, numLoc); + outStr << ")"; + break; + + case NEGATION: + if (expr->expr.unaryExp->type == IDENTIFIER) + { + needBracket = FALSE; + outStr << "!"; + } + else + outStr << "! ("; + outStr << walkCondExpr (expr->expr.unaryExp, pProc, numLoc); + if (needBracket == TRUE) + outStr << ")"; + break; + + case ADDRESSOF: + if (expr->expr.unaryExp->type == IDENTIFIER) + { + needBracket = FALSE; + outStr << "&"; + } + else + outStr << "&("; + outStr << walkCondExpr (expr->expr.unaryExp, pProc, numLoc); + if (needBracket == TRUE) + outStr << ")"; + break; + + case DEREFERENCE: + outStr << "*"; + if (expr->expr.unaryExp->type == IDENTIFIER) + needBracket = FALSE; + else + outStr << "("; + outStr << walkCondExpr (expr->expr.unaryExp, pProc, numLoc); + if (needBracket == TRUE) + outStr << ")"; + break; + + case POST_INC: + outStr << walkCondExpr (expr->expr.unaryExp, pProc, numLoc) << "++"; + break; + + case POST_DEC: + outStr << walkCondExpr (expr->expr.unaryExp, pProc, numLoc) << "--"; + break; + + case PRE_INC: + outStr << "++"<< walkCondExpr (expr->expr.unaryExp, pProc, numLoc); + break; + + case PRE_DEC: + outStr << "--"<< walkCondExpr (expr->expr.unaryExp, pProc, numLoc); + break; + + case IDENTIFIER: + std::ostringstream o; + switch (expr->expr.ident.idType) + { + case GLOB_VAR: + o << symtab.sym[expr->expr.ident.idNode.globIdx].name; + break; + case REGISTER: + id = &pProc->localId.id_arr[expr->expr.ident.idNode.regiIdx]; + if (id->name[0] == '\0') /* no name */ + { + sprintf (id->name, "loc%ld", ++(*numLoc)); + if (id->id.regi < rAL) + cCode.appendDecl("%s %s; /* %s */\n",hlTypes[id->type], id->name,wordReg[id->id.regi - rAX]); + else + cCode.appendDecl("%s %s; /* %s */\n",hlTypes[id->type], id->name,byteReg[id->id.regi - rAL]); + } + if (id->hasMacro) + o << id->macro << "("<name<<")"; + else + o << id->name; + break; + + case LOCAL_VAR: + o << pProc->localId.id_arr[expr->expr.ident.idNode.localIdx].name; + break; + + case PARAM: + psym = &pProc->args.sym[expr->expr.ident.idNode.paramIdx]; + if (psym->hasMacro) + o << psym->macro<<"("<name<< ")"; + else + o << psym->name; + break; + + case GLOB_VAR_IDX: + bwGlb = &pProc->localId.id_arr[expr->expr.ident.idNode.idxGlbIdx].id.bwGlb; + o << (bwGlb->seg << 4) + bwGlb->off << "["<regi - rAX]<<"]"; + break; + + case CONSTANT: + if (expr->expr.ident.idNode.kte.kte < 1000) + o << expr->expr.ident.idNode.kte.kte; + else + o << "0x"<expr.ident.idNode.kte.kte; + break; + + case STRING: + o << getString (expr->expr.ident.idNode.strIdx); + break; + + case LONG_VAR: + id = &pProc->localId.id_arr[expr->expr.ident.idNode.longIdx]; + if (id->name[0] != '\0') /* STK_FRAME & REG w/name*/ + o << id->name; + else if (id->loc == REG_FRAME) + { + sprintf (id->name, "loc%ld", ++(*numLoc)); + cCode.appendDecl("%s %s; /* %s:%s */\n",hlTypes[id->type], id->name,wordReg[id->id.longId.h - rAX],wordReg[id->id.longId.l - rAX]); + o << id->name; + pProc->localId.propLongId (id->id.longId.l,id->id.longId.h, id->name); + } + else /* GLB_FRAME */ + { + if (id->id.longGlb.regi == 0) /* not indexed */ + o << "[" << (id->id.longGlb.seg<<4) + id->id.longGlb.offH <<"]"; + else if (id->id.longGlb.regi == rBX) + o << "[" << (id->id.longGlb.seg<<4) + id->id.longGlb.offH <<"][bx]"; + } + break; + + case FUNCTION: + o << writeCall (expr->expr.ident.idNode.call.proc,expr->expr.ident.idNode.call.args, pProc, numLoc); + break; + + case OTHER: + off = expr->expr.ident.idNode.other.off; + o << wordReg[expr->expr.ident.idNode.other.seg - rAX]<< "["; + o << idxReg[expr->expr.ident.idNode.other.regi - INDEXBASE]; + if (off < 0) + o << "-"<< hexStr (-off); + else if (off>0) + o << "+"<< hexStr (off); + o << "]"; + } /* eos */ + outStr << o.str(); + break; + } + + return outStr.str(); +} + + +/* Makes a copy of the given expression. Allocates newExp storage for each + * node. Returns the copy. */ +COND_EXPR *COND_EXPR::clone() +{ + COND_EXPR* newExp=0; /* Expression node copy */ + + switch (type) + { + case BOOLEAN_OP: + newExp = new COND_EXPR(*this); + newExp->expr.boolExpr.lhs = expr.boolExpr.lhs->clone(); + newExp->expr.boolExpr.rhs = expr.boolExpr.rhs->clone(); + break; + + case NEGATION: + case ADDRESSOF: + case DEREFERENCE: + newExp = new COND_EXPR(*this); + newExp->expr.unaryExp = expr.unaryExp->clone(); + break; + + case IDENTIFIER: + newExp = new COND_EXPR(*this); + } + return (newExp); +} + + +/* Changes the boolean conditional operator at the root of this expression */ +void COND_EXPR::changeBoolOp (condOp newOp) +{ + expr.boolExpr.op = newOp; +} + + +/* Inserts the expression exp into the tree at the location specified by the + * register regi */ +boolT insertSubTreeReg (COND_EXPR *expr, COND_EXPR **tree, byte regi,LOCAL_ID *locsym) +{ + byte treeReg; + + if (*tree == NULL) + return FALSE; + + switch ((*tree)->type) { + case IDENTIFIER: + if ((*tree)->expr.ident.idType == REGISTER) + { + treeReg = locsym->id_arr[(*tree)->expr.ident.idNode.regiIdx].id.regi; + if (treeReg == regi) /* word reg */ + { + *tree = expr; + return TRUE; + } + else if ((regi >= rAX) && (regi <= rBX)) /* word/byte reg */ + { + if ((treeReg == (regi + rAL-1)) || (treeReg == (regi + rAH-1))) + { + *tree = expr; + return TRUE; + } + } + } + return FALSE; + + case BOOLEAN_OP: + if (insertSubTreeReg (expr, &(*tree)->expr.boolExpr.lhs, regi, locsym)) + return TRUE; + if (insertSubTreeReg (expr, &(*tree)->expr.boolExpr.rhs, regi, locsym)) + return TRUE; + return FALSE; + + case NEGATION: + case ADDRESSOF: + case DEREFERENCE: + if (insertSubTreeReg(expr, &(*tree)->expr.unaryExp,regi, locsym)) + return TRUE; + return FALSE; + } + return FALSE; +} + + +/* Inserts the expression exp into the tree at the location specified by the + * long register index longIdx*/ +boolT insertSubTreeLongReg (COND_EXPR *exp, COND_EXPR **tree, Int longIdx) +{ + switch ((*tree)->type) { + case IDENTIFIER: if ((*tree)->expr.ident.idNode.longIdx == longIdx) + { + *tree = exp; + return TRUE; + } + return FALSE; + + case BOOLEAN_OP: if (insertSubTreeLongReg (exp, &(*tree)->expr.boolExpr.lhs, longIdx)) + return TRUE; + if (insertSubTreeLongReg (exp, &(*tree)->expr.boolExpr.rhs, longIdx)) + return TRUE; + return FALSE; + + case NEGATION: + case ADDRESSOF: + case DEREFERENCE: if (insertSubTreeLongReg (exp, &(*tree)->expr.unaryExp, longIdx)) + return TRUE; + return FALSE; + } + return FALSE; +} + + +/* Recursively deallocates the abstract syntax tree rooted at *exp */ +void COND_EXPR::release() +{ + switch (type) + { + case BOOLEAN_OP: + expr.boolExpr.lhs->release(); + expr.boolExpr.rhs->release(); + break; + case NEGATION: + case ADDRESSOF: + case DEREFERENCE: + expr.unaryExp->release(); + break; + } + delete (this); +} + + +/*************************************************************************** + * Expression stack functions + **************************************************************************/ + +/* Reinitalizes the expression stack (expStk) to NULL, by freeing all the + * space allocated (if any). */ +void initExpStk() +{ + expStk.clear(); +} + + +/* Pushes the given expression onto the local stack (expStk). */ +void pushExpStk (COND_EXPR *expr) +{ + expStk.push_back(expr); +} + + +/* Returns the element on the top of the local expression stack (expStk), + * and deallocates the space allocated by this node. + * If there are no elements on the stack, returns NULL. */ +COND_EXPR *popExpStk() +{ + if(expStk.empty()) + return 0; + COND_EXPR *topExp = expStk.back(); + expStk.pop_back(); + return topExp; +} + +/* Returns the number of elements available in the expression stack */ +Int numElemExpStk() +{ + return expStk.size(); +} + +/* Returns whether the expression stack is empty or not */ +boolT emptyExpStk() +{ + return expStk.empty(); +} diff --git a/src/backend.cpp b/src/backend.cpp new file mode 100644 index 0000000..008ca12 --- /dev/null +++ b/src/backend.cpp @@ -0,0 +1,668 @@ +/***************************************************************************** + * Project: dcc + * File: backend.c + * Purpose: Back-end module. Generates C code for each procedure. + * (C) Cristina Cifuentes + ****************************************************************************/ +#include +#include + +#include "dcc.h" +#include +#include +#include + +bundle cCode; /* Procedure declaration and code */ +using namespace std; +/* Indentation buffer */ +#define indSize 81 /* size of the indentation buffer. Each indentation + * is of 4 spaces => max. 20 indentation levels */ +static char indentBuf[indSize] = + " "; + + +/* Indentation according to the depth of the statement */ +static char *indent (Int indLevel) +{ + + + return (&indentBuf[indSize-(indLevel*4)-1]); +} + + +static Int getNextLabel() +/* Returns a unique index to the next label */ +{ static Int labelIdx = 1; /* index of the next label */ + + return (labelIdx++); +} + + +/* displays statistics on the subroutine */ +void Function::displayStats () +{ + printf("\nStatistics - Subroutine %s\n", name); + printf ("Number of Icode instructions:\n"); + printf (" Low-level : %4d\n", stats.numLLIcode); + if (! (flg & PROC_ASM)) + { + printf (" High-level: %4d\n", stats.numHLIcode); + printf (" Percentage reduction: %2.2f%%\n", 100.0 - (stats.numHLIcode * + 100.0) / stats.numLLIcode); + } +} + + +/**** this proc is not required any more?? ****/ +#if 0 +static void fixupLabels (PPROC pProc) +/* Checks the graph (pProc->cfg) for any nodes that have labels, and gives + * a unique label number for it. This label is placed in the associated + * icode for the node (pProc->Icode). The procedure is done in sequential + * order of dsfLast numbering. */ +{ Int i; /* index into the dfsLast array */ + PBB *dfsLast; /* pointer to the dfsLast array */ + + dfsLast = pProc->dfsLast; + for (i = 0; i < pProc->numBBs; i++) + if (dfsLast[i]->flg/* & BB_HAS_LABEL*/) { + pProc->Icode.icode[dfsLast[i]->start].ic.ll.flg |= HLL_LABEL; + pProc->Icode.icode[dfsLast[i]->start].ic.ll.hllLabNum = getNextLabel(); + } +} +#endif + + +/* Returns the corresponding C string for the given character c. Character + * constants such as carriage return and line feed, require 2 C characters. */ +char *cChar (byte c) +{ + static char res[3]; + + switch (c) { + case 0x8: /* backspace */ + sprintf (res, "\\b"); + break; + case 0x9: /* horizontal tab */ + sprintf (res, "\\t"); + break; + case 0x0A: /* new line */ + sprintf (res, "\\n"); + break; + case 0x0C: /* form feed */ + sprintf (res, "\\f"); + break; + case 0x0D: /* carriage return */ + sprintf (res, "\\r"); + break; + default: /* any other character*/ + sprintf (res, "%c", c); + } + return (res); +} + + +/* Prints the variable's name and initial contents on the file. + * Note: to get to the value of the variable: + * com file: prog.Image[operand] + * exe file: prog.Image[operand+0x100] */ +static void printGlobVar (SYM * psym) +{ + Int j; + dword relocOp = prog.fCOM ? psym->label : psym->label + 0x100; + char *strContents; /* initial contents of variable */ + + switch (psym->size) { + case 1: cCode.appendDecl( "byte\t%s = %ld;\n", + psym->name, prog.Image[relocOp]); + break; + case 2: cCode.appendDecl( "word\t%s = %ld;\n", + psym->name, LH(prog.Image+relocOp)); + break; + case 4: if (psym->type == TYPE_PTR) /* pointer */ + cCode.appendDecl( "word\t*%s = %ld;\n", + psym->name, LH(prog.Image+relocOp)); + else /* char */ + cCode.appendDecl( + "char\t%s[4] = \"%c%c%c%c\";\n", + psym->name, prog.Image[relocOp], + prog.Image[relocOp+1], prog.Image[relocOp+2], + prog.Image[relocOp+3]); + break; + default:strContents = (char *)allocMem((psym->size*2+1) *sizeof(char)); + strContents[0] = '\0'; + for (j=0; j < psym->size; j++) + strcat (strContents, cChar(prog.Image[relocOp + j])); + cCode.appendDecl( "char\t*%s = \"%s\";\n", + psym->name, strContents); + } +} + + +// Note: Not called at present. +/* Writes the contents of the symbol table, along with any variable + * initialization. */ +static void writeGlobSymTable() +{ + Int idx; + char type[10]; + SYM * pSym; + + if (symtab.csym) + { + cCode.appendDecl( "/* Global variables */\n"); + for (idx = 0; idx < symtab.csym; idx++) + { + pSym = &symtab.sym[idx]; + if (symtab.sym[idx].duVal.isUSE_VAL()) /* first used */ + printGlobVar (&(symtab.sym[idx])); + else { /* first defined */ + switch (pSym->size) { + case 1: strcpy (type, "byte\t"); break; + case 2: strcpy (type, "int\t"); break; + case 4: if (pSym->type == TYPE_PTR) + strcpy (type, "int\t*"); + else + strcpy (type, "char\t*"); + break; + default: strcpy (type, "char\t*"); + } + cCode.appendDecl( "%s%s;\t/* size = %ld */\n", + type, pSym->name, pSym->size); + } + } + cCode.appendDecl( "\n"); + } +} + + +/* Writes the header information and global variables to the output C file + * fp. */ +static void writeHeader (std::ostream &ios, char *fileName) +{ + /* Write header information */ + newBundle (&cCode); + cCode.appendDecl( "/*\n"); + cCode.appendDecl( " * Input file\t: %s\n", fileName); + cCode.appendDecl( " * File type\t: %s\n", (prog.fCOM)?"COM":"EXE"); + cCode.appendDecl( " */\n\n#include \"dcc.h\"\n\n"); + + /* Write global symbol table */ + /** writeGlobSymTable(); *** need to change them into locident fmt ***/ + writeBundle (ios, cCode); + freeBundle (&cCode); +} + + +/* Writes the registers that are set in the bitvector */ +static void writeBitVector (dword regi) +{ Int j; + + for (j = 0; j < INDEXBASE; j++) + { + if ((regi & power2(j)) != 0) + printf ("%s ", allRegs[j]); + } +} + + +/* Checks the given icode to determine whether it has a label associated + * to it. If so, a goto is emitted to this label; otherwise, a new label + * is created and a goto is also emitted. + * Note: this procedure is to be used when the label is to be backpatched + * onto code in cCode.code */ +static void emitGotoLabel (ICODE * pt, Int indLevel) +{ + if (! (pt->ic.ll.flg & HLL_LABEL)) /* node hasn't got a lab */ + { + /* Generate new label */ + pt->ic.ll.hllLabNum = getNextLabel(); + pt->ic.ll.flg |= HLL_LABEL; + + /* Node has been traversed already, so backpatch this label into + * the code */ + addLabelBundle (cCode.code, pt->codeIdx, pt->ic.ll.hllLabNum); + } + cCode.appendCode( "%sgoto L%ld;\n", indent(indLevel), + pt->ic.ll.hllLabNum); + stats.numHLIcode++; +} + + +// Note: Not currently called! +static void emitFwdGotoLabel (ICODE * pt, Int indLevel) +/* Checks the given icode to determine whether it has a label associated + * to it. If so, a goto is emitted to this label; otherwise, a new label + * is created and a goto is also emitted. + * Note: this procedure is to be used when the label is to be forward on + * the code; that is, the target code has not been traversed yet. */ +{ + if (! (pt->ic.ll.flg & HLL_LABEL)) /* node hasn't got a lab */ + { + /* Generate new label */ + pt->ic.ll.hllLabNum = getNextLabel(); + pt->ic.ll.flg |= HLL_LABEL; + } + cCode.appendCode( "%sgoto l%ld;\n", indent(indLevel), + pt->ic.ll.hllLabNum); +} + + +/* Writes the code for the current basic block. + * Args: pBB: pointer to the current basic block. + * Icode: pointer to the array of icodes for current procedure. + * lev: indentation level - used for formatting. */ +static void writeBB (const BB * const pBB, ICODE * hli, Int lev, Function * pProc, Int *numLoc) +{ Int i, last; + char *line; /* Pointer to the HIGH-LEVEL line */ + + /* Save the index into the code table in case there is a later goto + * into this instruction (first instruction of the BB) */ + hli[pBB->start].codeIdx = nextBundleIdx (&cCode.code); + + /* Generate code for each hlicode that is not a HLI_JCOND */ + for (i = pBB->start, last = i + pBB->length; i < last; i++) + if ((hli[i].type == HIGH_LEVEL) && (hli[i].invalid == FALSE)) + { + line = write1HlIcode (hli[i].ic.hl, pProc, numLoc); + if (line[0] != '\0') + { + cCode.appendCode( "%s%s", indent(lev), line); + stats.numHLIcode++; + } + if (option.verbose) + hli[i].writeDU(i); + } + //if (hli[i].invalid) + //printf("Invalid icode: %d!\n", hli[i].invalid); +} + + +/* Recursive procedure that writes the code for the given procedure, pointed + * to by pBB. + * Parameters: pBB: pointer to the cfg. + * Icode: pointer to the Icode array for the cfg graph of the + * current procedure. + * indLevel: indentation level - used for formatting. + * numLoc: last # assigned to local variables */ +void BB::writeCode (Int indLevel, Function * pProc , Int *numLoc,Int latchNode, Int _ifFollow) +{ + Int follow, /* ifFollow */ + _loopType, /* Type of loop, if any */ + _nodeType; /* Type of node */ + BB * succ, *latch; /* Successor and latching node */ + ICODE * picode; /* Pointer to HLI_JCOND instruction */ + char *l; /* Pointer to HLI_JCOND expression */ + boolT emptyThen, /* THEN clause is empty */ + repCond; /* Repeat condition for while() */ + + /* Check if this basic block should be analysed */ + if ((_ifFollow != UN_INIT) && (this == pProc->dfsLast[_ifFollow])) + return; + + if (traversed == DFS_ALPHA) + return; + traversed = DFS_ALPHA; + + /* Check for start of loop */ + repCond = FALSE; + latch = NULL; + _loopType = loopType; + if (_loopType) + { + latch = pProc->dfsLast[this->latchNode]; + switch (_loopType) + { + case WHILE_TYPE: + picode = pProc->Icode.GetIcode(start + length - 1); + + /* Check for error in while condition */ + if (picode->ic.hl.opcode != HLI_JCOND) + reportError (WHILE_FAIL); + + /* Check if condition is more than 1 HL instruction */ + if (numHlIcodes > 1) + { + /* Write the code for this basic block */ + writeBB(this, pProc->Icode.GetFirstIcode(), indLevel, pProc, numLoc); + repCond = TRUE; + } + + /* Condition needs to be inverted if the loop body is along + * the THEN path of the header node */ + if (edges[ELSE].BBptr->dfsLastNum == loopFollow) + inverseCondOp (&picode->ic.hl.oper.exp); + { + std::string e=walkCondExpr (picode->ic.hl.oper.exp, pProc, numLoc); + cCode.appendCode( "\n%swhile (%s) {\n", indent(indLevel),e.c_str()); + } + picode->invalidate(); + break; + + case REPEAT_TYPE: + cCode.appendCode( "\n%sdo {\n", indent(indLevel)); + picode = pProc->Icode.GetIcode(latch->start+latch->length-1); + picode->invalidate(); + break; + + case ENDLESS_TYPE: + cCode.appendCode( "\n%sfor (;;) {\n", indent(indLevel)); + } + stats.numHLIcode += 1; + indLevel++; + } + + /* Write the code for this basic block */ + if (repCond == FALSE) + writeBB (this, pProc->Icode.GetFirstIcode(), indLevel, pProc, numLoc); + + /* Check for end of path */ + _nodeType = nodeType; + if (_nodeType == RETURN_NODE || _nodeType == TERMINATE_NODE || + _nodeType == NOWHERE_NODE || (dfsLastNum == latchNode)) + return; + + /* Check type of loop/node and process code */ + if (_loopType) /* there is a loop */ + { + assert(latch); + if (this != latch) /* loop is over several bbs */ + { + if (_loopType == WHILE_TYPE) + { + succ = edges[THEN].BBptr; + if (succ->dfsLastNum == loopFollow) + succ = edges[ELSE].BBptr; + } + else + succ = edges[0].BBptr; + if (succ->traversed != DFS_ALPHA) + succ->writeCode (indLevel, pProc, numLoc, latch->dfsLastNum,_ifFollow); + else /* has been traversed so we need a goto */ + emitGotoLabel (pProc->Icode.GetIcode(succ->start), indLevel); + } + + /* Loop epilogue: generate the loop trailer */ + indLevel--; + if (_loopType == WHILE_TYPE) + { + /* Check if there is need to repeat other statements involved + * in while condition, then, emit the loop trailer */ + if (repCond) + writeBB (this, pProc->Icode.GetFirstIcode(), indLevel+1, pProc, numLoc); + cCode.appendCode( "%s} /* end of while */\n",indent(indLevel)); + } + else if (_loopType == ENDLESS_TYPE) + cCode.appendCode( "%s} /* end of loop */\n",indent(indLevel)); + else if (_loopType == REPEAT_TYPE) + { + if (picode->ic.hl.opcode != HLI_JCOND) + reportError (REPEAT_FAIL); + { + string e=walkCondExpr (picode->ic.hl.oper.exp, pProc, numLoc); + cCode.appendCode( "%s} while (%s);\n", indent(indLevel),e.c_str()); + } + } + + /* Recurse on the loop follow */ + if (loopFollow != MAX) + { + succ = pProc->dfsLast[loopFollow]; + if (succ->traversed != DFS_ALPHA) + succ->writeCode (indLevel, pProc, numLoc, latchNode, _ifFollow); + else /* has been traversed so we need a goto */ + emitGotoLabel (pProc->Icode.GetIcode(succ->start), indLevel); + } + } + + else /* no loop, process nodeType of the graph */ + { + if (_nodeType == TWO_BRANCH) /* if-then[-else] */ + { + stats.numHLIcode++; + indLevel++; + emptyThen = FALSE; + + if (ifFollow != MAX) /* there is a follow */ + { + /* process the THEN part */ + follow = ifFollow; + succ = edges[THEN].BBptr; + if (succ->traversed != DFS_ALPHA) /* not visited */ + { + if (succ->dfsLastNum != follow) /* THEN part */ + { + l = writeJcond ( pProc->Icode.GetIcode(start + length -1)->ic.hl, + pProc, numLoc); + cCode.appendCode( "\n%s%s", indent(indLevel-1), l); + succ->writeCode (indLevel, pProc, numLoc, latchNode,follow); + } + else /* empty THEN part => negate ELSE part */ + { + l = writeJcondInv ( pProc->Icode.GetIcode(start + length -1)->ic.hl, + pProc, numLoc); + cCode.appendCode( "\n%s%s", indent(indLevel-1), l); + edges[ELSE].BBptr->writeCode (indLevel, pProc, numLoc, latchNode, follow); + emptyThen = TRUE; + } + } + else /* already visited => emit label */ + emitGotoLabel (pProc->Icode.GetIcode(succ->start), indLevel); + + /* process the ELSE part */ + succ = edges[ELSE].BBptr; + if (succ->traversed != DFS_ALPHA) /* not visited */ + { + if (succ->dfsLastNum != follow) /* ELSE part */ + { + cCode.appendCode( "%s}\n%selse {\n", + indent(indLevel-1), indent(indLevel - 1)); + succ->writeCode (indLevel, pProc, numLoc, latchNode, follow); + } + /* else (empty ELSE part) */ + } + else if (! emptyThen) /* already visited => emit label */ + { + cCode.appendCode( "%s}\n%selse {\n", + indent(indLevel-1), indent(indLevel - 1)); + emitGotoLabel (pProc->Icode.GetIcode(succ->start), indLevel); + } + cCode.appendCode( "%s}\n", indent(--indLevel)); + + /* Continue with the follow */ + succ = pProc->dfsLast[follow]; + if (succ->traversed != DFS_ALPHA) + succ->writeCode (indLevel, pProc, numLoc, latchNode,_ifFollow); + } + else /* no follow => if..then..else */ + { + l = writeJcond ( + pProc->Icode.GetIcode(start + length -1)->ic.hl, pProc, numLoc); + cCode.appendCode( "%s%s", indent(indLevel-1), l); + edges[THEN].BBptr->writeCode (indLevel, pProc, numLoc, latchNode, _ifFollow); + cCode.appendCode( "%s}\n%selse {\n", indent(indLevel-1), indent(indLevel - 1)); + edges[ELSE].BBptr->writeCode (indLevel, pProc, numLoc, latchNode, _ifFollow); + cCode.appendCode( "%s}\n", indent(--indLevel)); + } + } + + else /* fall, call, 1w */ + { + succ = edges[0].BBptr; /* fall-through edge */ + if (succ->traversed != DFS_ALPHA) + succ->writeCode (indLevel, pProc,numLoc, latchNode,_ifFollow); + } + } +} + + +/* Writes the procedure's declaration (including arguments), local variables, + * and invokes the procedure that writes the code of the given record *hli */ +void Function::codeGen (std::ostream &fs) +{ + Int i, numLoc; + //STKFRAME * args; /* Procedure arguments */ + char buf[200], /* Procedure's definition */ + arg[30]; /* One argument */ + ID *locid; /* Pointer to one local identifier */ + BB *pBB; /* Pointer to basic block */ + + /* Write procedure/function header */ + newBundle (&cCode); + if (flg & PROC_IS_FUNC) /* Function */ + cCode.appendDecl( "\n%s %s (", hlTypes[retVal.type],name); + else /* Procedure */ + cCode.appendDecl( "\nvoid %s (", name); + + /* Write arguments */ + memset (buf, 0, sizeof(buf)); + for (i = 0; i < args.sym.size(); i++) + { + if (args.sym[i].invalid == FALSE) + { + sprintf (arg,"%s %s",hlTypes[args.sym[i].type], args.sym[i].name); + strcat (buf, arg); + if (i < (args.numArgs - 1)) + strcat (buf, ", "); + } + } + strcat (buf, ")\n"); + cCode.appendDecl( "%s", buf); + + /* Write comments */ + writeProcComments(); + + /* Write local variables */ + if (! (flg & PROC_ASM)) + { + numLoc = 0; + for (i = 0; i < localId.csym(); i++) + { + locid = &localId.id_arr[i]; + /* Output only non-invalidated entries */ + if (locid->illegal == FALSE) + { + if (locid->loc == REG_FRAME) + { + /* Register variables are assigned to a local variable */ + if (((flg & SI_REGVAR) && (locid->id.regi == rSI)) || + ((flg & DI_REGVAR) && (locid->id.regi == rDI))) + { + sprintf (locid->name, "loc%ld", ++numLoc); + cCode.appendDecl( "int %s;\n", locid->name); + } + /* Other registers are named when they are first used in + * the output C code, and appended to the proc decl. */ + } + + else if (locid->loc == STK_FRAME) + { + /* Name local variables and output appropriate type */ + sprintf (locid->name, "loc%ld", ++numLoc); + cCode.appendDecl( "%s %s;\n",hlTypes[locid->type], locid->name); + } + } + } + } + /* Write procedure's code */ + if (flg & PROC_ASM) /* generate assembler */ + disassem (3, this); + else /* generate C */ + cfg.front()->writeCode (1, this, &numLoc, MAX, UN_INIT); + + cCode.appendCode( "}\n\n"); + writeBundle (fs, cCode); + freeBundle (&cCode); + + /* Write Live register analysis information */ + if (option.verbose) + for (i = 0; i < numBBs; i++) + { + pBB = dfsLast[i]; + if (pBB->flg & INVALID_BB) continue; /* skip invalid BBs */ + printf ("BB %d\n", i); + printf (" Start = %d, end = %d\n", pBB->start, pBB->start + + pBB->length - 1); + printf (" LiveUse = "); + writeBitVector (pBB->liveUse); + printf ("\n Def = "); + writeBitVector (pBB->def); + printf ("\n LiveOut = "); + writeBitVector (pBB->liveOut); + printf ("\n LiveIn = "); + writeBitVector (pBB->liveIn); + printf ("\n\n"); + } +} + + +/* Recursive procedure. Displays the procedure's code in depth-first order + * of the call graph. */ +static void backBackEnd (char *filename, CALL_GRAPH * pcallGraph, std::ostream &ios) +{ + Int i; + + // IFace.Yield(); /* This is a good place to yield to other apps */ + + /* Check if this procedure has been processed already */ + if ((pcallGraph->proc->flg & PROC_OUTPUT) || + (pcallGraph->proc->flg & PROC_ISLIB)) + return; + pcallGraph->proc->flg |= PROC_OUTPUT; + + /* Dfs if this procedure has any successors */ + for (i = 0; i < pcallGraph->outEdges.size(); i++) + { + backBackEnd (filename, pcallGraph->outEdges[i], ios); + } + + /* Generate code for this procedure */ + stats.numLLIcode = pcallGraph->proc->Icode.GetNumIcodes(); + stats.numHLIcode = 0; + pcallGraph->proc->codeGen (ios); + + /* Generate statistics */ + if (option.Stats) + pcallGraph->proc->displayStats (); + if (! (pcallGraph->proc->flg & PROC_ASM)) + { + stats.totalLL += stats.numLLIcode; + stats.totalHL += stats.numHLIcode; + } +} + + +/* Invokes the necessary routines to produce code one procedure at a time. */ +void BackEnd (char *fileName, CALL_GRAPH * pcallGraph) +{ + char* outName, *ext; + std::ofstream fs; /* Output C file */ + + /* Get output file name */ + outName = strcpy ((char*)allocMem(strlen(fileName)+1), fileName); + if ((ext = strrchr (outName, '.')) != NULL) + *ext = '\0'; + strcat (outName, ".b"); /* b for beta */ + + /* Open output file */ + fs.open(outName); + if(!fs.is_open()) + fatalError (CANNOT_OPEN, outName); + printf ("dcc: Writing C beta file %s\n", outName); + + /* Header information */ + writeHeader (fs, fileName); + + /* Initialize total Icode instructions statistics */ + stats.totalLL = 0; + stats.totalHL = 0; + + /* Process each procedure at a time */ + backBackEnd (fileName, pcallGraph, fs); + + /* Close output file */ + fs.close(); + printf ("dcc: Finished writing C beta file\n"); +} + + diff --git a/src/bundle.cpp b/src/bundle.cpp new file mode 100644 index 0000000..11cbd89 --- /dev/null +++ b/src/bundle.cpp @@ -0,0 +1,115 @@ +/***************************************************************************** + * File: bundle.c + * Module that handles the bundle type (array of pointers to strings). + * (C) Cristina Cifuentes + ****************************************************************************/ + +#include "dcc.h" +#include +#include +#include +#include +#include + +#define deltaProcLines 20 + + +/* Allocates memory for a new bundle and initializes it to zero. */ +void newBundle (bundle *) +{ +} + + +/* Increments the size of the table strTab by deltaProcLines and copies all + * the strings to the new table. */ +static void incTableSize (strTable *strTab) +{ + strTab->resize(strTab->size()+deltaProcLines); +} + + +/* Appends the new line (in printf style) to the string table strTab. */ +void appendStrTab (strTable *strTab, const char *format, ...) +{ + va_list args; + char buf[lineSize]; + va_start (args, format); + vsprintf (buf, format, args); + strTab->push_back(buf); + va_end (args); +} + + +/* Returns the next available index into the table */ +Int nextBundleIdx (strTable *strTab) +{ + return (strTab->size()); +} + + +/* Adds the given label to the start of the line strTab[idx]. The first + * tab is removed and replaced by this label */ +void addLabelBundle (strTable &strTab, Int idx, Int label) +{ + char s[lineSize]; + sprintf (s, "l%ld: %s", label, strTab[idx].c_str()+4); + strTab[idx] = s; +} + + +/* Writes the contents of the string table on the file fp. */ +static void writeStrTab (std::ostream &ios, strTable &strTab) +{ + Int i; + + for (i = 0; i < strTab.size(); i++) + ios << strTab[i]; +} + + +/* Writes the contents of the bundle (procedure code and declaration) to + * a file. */ +void writeBundle (std::ostream &ios, bundle procCode) +{ + writeStrTab (ios, procCode.decl); + if (procCode.decl[procCode.decl.size() - 1][0] != ' ') + ios << "\n"; + writeStrTab (ios, procCode.code); +} + + +/* Frees the storage allocated by the string table. */ +static void freeStrTab (strTable &strTab) +{ + strTab.clear(); +} + + +void freeBundle (bundle *procCode) +/* Deallocates the space taken by the bundle procCode */ +{ + freeStrTab (procCode->decl); + freeStrTab (procCode->code); +} + +void bundle::appendCode(const char *format,...) +{ + va_list args; + char buf[lineSize]={0}; + va_start (args, format); + vsprintf (buf, format, args); + code.push_back(buf); + va_end (args); +} + +void bundle::appendDecl(const char *format,...) +{ + va_list args; + char buf[lineSize]={0}; + va_start (args, format); + vsprintf (buf, format, args); + decl.push_back(buf); + va_end (args); +} + + diff --git a/src/chklib.cpp b/src/chklib.cpp new file mode 100644 index 0000000..ceb979b --- /dev/null +++ b/src/chklib.cpp @@ -0,0 +1,1018 @@ +/* + * Code to check for library functions. If found, replaces procNNNN with the + * library function name. Also checks startup code for correct DS, and the + * address of main() + * (C) Mike van Emmerik +*/ + +#include +#include +#ifdef __BORLAND__ +#include +#else +#include +#endif +#include +#include "dcc.h" +#include "perfhlib.h" + +#define NIL -1 /* Used like NULL, but 0 is valid */ + +/* Hash table structure */ +typedef struct HT_tag +{ + char htSym[SYMLEN]; + byte htPat[PATLEN]; +} HT; + +/* Structure of the prototypes table. Same as the struct in parsehdr.h, + except here we don't need the "next" index (the elements are already + sorted by function name) */ +typedef +struct ph_func_tag +{ + char name[SYMLEN]; /* Name of function or arg */ + hlType typ; /* Return type */ + int numArg; /* Number of args */ + int firstArg; /* Index of first arg in chain */ + // int next; /* Index of next function in chain */ + bool bVararg; /* True if variable arguements */ +} PH_FUNC_STRUCT; + + +#define NUM_PLIST 64 /* Number of entries to increase allocation by */ + +/* statics */ +char buf[100]; /* A general purpose buffer */ +int numKeys; /* Number of hash table entries (keys) */ +int numVert; /* Number of vertices in the graph (also size of g[]) */ +unsigned PatLen; /* Size of the keys (pattern length) */ +unsigned SymLen; /* Max size of the symbols, including null */ +FILE *f; /* File being read */ +static char sSigName[100]; /* Full path name of .sig file */ + +static word *T1base, *T2base; /* Pointers to start of T1, T2 */ +static word *g; /* g[] */ +static HT *ht; /* The hash table */ +static PH_FUNC_STRUCT *pFunc; /* Points to the array of func names */ +static hlType *pArg; /* Points to the array of param types */ +static int numFunc; /* Number of func names actually stored */ +static int numArg; /* Number of param names actually stored */ +#define DCCLIBS "dcclibs.dat" /* Name of the prototypes data file */ + +/* prototypes */ +void grab(int n, FILE *f); +word readFileShort(FILE *f); +void readFileSection(word* p, int len, FILE *f); +void cleanup(void); +void checkStartup(STATE *state); +void readProtoFile(void); +void fixNewline(char *s); +int searchPList(char *name); +void checkHeap(char *msg); /* For debugging */ + +void fixWildCards(byte pat[]); /* In fixwild.c */ + +static boolT locatePattern(byte *source, Int iMin, Int iMax, byte *pattern, + Int iPatLen, Int *index); + +/* * * * * * * * * * * * * * * *\ +* * +* S t a r t P a t t e r n s ( V e n d o r i d ) * +* * +\* * * * * * * * * * * * * * * */ +static byte pattMsC5Start[] = +{ + 0xB4, 0x30, /* Mov ah, 30 */ + 0xCD, 0x21, /* int 21 (dos version number) */ + 0x3C, 0x02, /* cmp al, 2 */ + 0x73, 0x02, /* jnb $+4 */ + 0xCD, 0x20, /* int 20 (exit) */ + 0xBF /* Mov di, DSEG */ +}; +static byte pattMsC8Start[] = +{ + 0xB4, 0x30, /* Mov ah, 30 */ + 0xCD, 0x21, /* int 21 */ + 0x3C, 0x02, /* cmp al,2 */ + 0x73, 0x05, /* jnb $+7 */ + 0x33, 0xC0, /* xor ax, ax */ + 0x06, 0x50, /* push es:ax */ + 0xCB, /* retf */ + 0xBF /* mov di, DSEG */ +}; +static byte pattMsC8ComStart[] = +{ + 0xB4, 0x30, /* Mov ah, 30 */ + 0xCD, 0x21, /* int 21 (dos version number) */ + 0x3C, 0x02, /* cmp al, 2 */ + 0x73, 0x01, /* jnb $+3 */ + 0xC3, /* ret */ + 0x8C, 0xDF /* Mov di, ds */ +}; +static byte pattBorl2Start[] = +{ + 0xBA, WILD, WILD, /* Mov dx, dseg */ + 0x2E, 0x89, 0x16, /* mov cs:[], dx */ + WILD, WILD, + 0xB4, 0x30, /* mov ah, 30 */ + 0xCD, 0x21, /* int 21 (dos version number) */ + 0x8B, 0x2E, 0x02, 0, /* mov bp, [2] */ + 0x8B, 0x1E, 0x2C, 0, /* mov bx, [2C] */ + 0x8E, 0xDA, /* mov ds, dx */ + 0xA3, WILD, WILD, /* mov [xx], ax */ + 0x8C, 0x06, WILD, WILD, /* mov [xx], es */ + 0x89, 0x1E, WILD, WILD, /* mov [xx], bx */ + 0x89, 0x2E, WILD, WILD, /* mov [xx], bp */ + 0xC7 /* mov [xx], -1 */ +}; +static byte pattBorl3Start[] = +{ + 0xBA, WILD, WILD, /* Mov dx, dseg */ + 0x2E, 0x89, 0x16, /* mov cs:[], dx */ + WILD, WILD, + 0xB4, 0x30, /* mov ah, 30 */ + 0xCD, 0x21, /* int 21 (dos version number) */ + 0x8B, 0x2E, 0x02, 0, /* mov bp, [2] */ + 0x8B, 0x1E, 0x2C, 0, /* mov bx, [2C] */ + 0x8E, 0xDA, /* mov ds, dx */ + 0xA3, WILD, WILD, /* mov [xx], ax */ + 0x8C, 0x06, WILD, WILD, /* mov [xx], es */ + 0x89, 0x1E, WILD, WILD, /* mov [xx], bx */ + 0x89, 0x2E, WILD, WILD, /* mov [xx], bp */ + 0xE8 /* call ... */ +}; + +static byte pattBorl4on[] = +{ + 0x9A, 0, 0, WILD, WILD /* Call init (offset always 0) */ +}; + +static byte pattBorl4Init[] = +{ + 0xBA, WILD, WILD, /* Mov dx, dseg */ + 0x8E, 0xDA, /* mov ds, dx */ + 0x8C, 0x06, WILD, WILD, /* mov [xx], es */ + 0x8B, 0xC4, /* mov ax, sp */ + 0x05, 0x13, 0, /* add ax, 13h */ + 0xB1, 0x04, /* mov cl, 4 */ + 0xD3, 0xE8, /* shr ax, cl */ + 0x8C, 0xD2 /* mov dx, ss */ +}; + +static byte pattBorl5Init[] = +{ + 0xBA, WILD, WILD, /* Mov dx, dseg */ + 0x8E, 0xDA, /* mov ds, dx */ + 0x8C, 0x06, 0x30, 0, /* mov [0030], es */ + 0x33, 0xED, /* xor bp, bp <----- */ + 0x8B, 0xC4, /* mov ax, sp */ + 0x05, 0x13, 0, /* add ax, 13h */ + 0xB1, 0x04, /* mov cl, 4 */ + 0xD3, 0xE8, /* shr ax, cl */ + 0x8C, 0xD2 /* mov dx, ss */ +}; + +static byte pattBorl7Init[] = +{ + 0xBA, WILD, WILD, /* Mov dx, dseg */ + 0x8E, 0xDA, /* mov ds, dx */ + 0x8C, 0x06, 0x30, 0, /* mov [0030], es */ + 0xE8, WILD, WILD, /* call xxxx */ + 0xE8, WILD, WILD, /* call xxxx... offset always 00A0? */ + 0x8B, 0xC4, /* mov ax, sp */ + 0x05, 0x13, 0, /* add ax, 13h */ + 0xB1, 0x04, /* mov cl, 4 */ + 0xD3, 0xE8, /* shr ax, cl */ + 0x8C, 0xD2 /* mov dx, ss */ +}; + + +static byte pattLogiStart[] = +{ + 0xEB, 0x04, /* jmp short $+6 */ + WILD, WILD, /* Don't know what this is */ + WILD, WILD, /* Don't know what this is */ + 0xB8, WILD, WILD, /* mov ax, dseg */ + 0x8E, 0xD8 /* mov ds, ax */ +}; + +static byte pattTPasStart[] = +{ + 0xE9, 0x79, 0x2C /* Jmp 2D7C - Turbo pascal 3.0 */ +}; + + + +/* * * * * * * * * * * * * * * *\ +* * +* M a i n P a t t e r n s ( M o d e l i d ) * +* * +\* * * * * * * * * * * * * * * */ + + +/* This pattern works for MS and Borland, small and tiny model */ +static byte pattMainSmall[] = +{ + 0xFF, 0x36, WILD, WILD, /* Push environment pointer */ + 0xFF, 0x36, WILD, WILD, /* Push argv */ + 0xFF, 0x36, WILD, WILD, /* Push argc */ + 0xE8, WILD, WILD /* call _main */ + /* 0x50, /* push ax... not in Borland V3 */ + /* 0xE8 /* call _exit */ +}; +/* Num bytes from start pattern to the relative offset of main() */ +#define OFFMAINSMALL 13 + +/* This pattern works for MS and Borland, medium model */ +static byte pattMainMedium[] = +{ + 0xFF, 0x36, WILD, WILD, /* Push environment pointer */ + 0xFF, 0x36, WILD, WILD, /* Push argv */ + 0xFF, 0x36, WILD, WILD, /* Push argc */ + 0x9A, WILD, WILD, WILD, WILD /* call far _main */ + /* 0x50 /* push ax */ + /* 0x0E, /* push cs NB not tested Borland */ + /* 0xE8 /* call _exit */ +}; +/* Num bytes from start pattern to the relative offset of main() */ +#define OFFMAINMEDIUM 13 + +/* This pattern works for MS and Borland, compact model */ +static byte pattMainCompact[] = +{ + 0xFF, 0x36, WILD, WILD, /* Push environment pointer lo */ + 0xFF, 0x36, WILD, WILD, /* Push environment pointer hi */ + 0xFF, 0x36, WILD, WILD, /* Push argv lo */ + 0xFF, 0x36, WILD, WILD, /* Push argv hi */ + 0xFF, 0x36, WILD, WILD, /* Push argc */ + 0xE8, WILD, WILD, /* call _main */ + /* 0x50, /* push ax */ + /* 0xE8 /* call _exit */ +}; +/* Num bytes from start pattern to the relative offset of main() */ +#define OFFMAINCOMPACT 21 + +/* This pattern works for MS and Borland, large model */ +static byte pattMainLarge[] = +{ + 0xFF, 0x36, WILD, WILD, /* Push environment pointer lo */ + 0xFF, 0x36, WILD, WILD, /* Push environment pointer hi */ + 0xFF, 0x36, WILD, WILD, /* Push argv lo */ + 0xFF, 0x36, WILD, WILD, /* Push argv hi */ + 0xFF, 0x36, WILD, WILD, /* Push argc */ + 0x9A, WILD, WILD, WILD, WILD /* call far _main */ + /* 0x50 /* push ax */ + /* 0x0E, /* push cs */ + /* 0xE8 /* call _exit */ +}; +/* Num bytes from start pattern to the relative offset of main() */ +#define OFFMAINLARGE 21 + + +/* * * * * * * * * * * * * * * *\ +* * +* M i s c e l l a n e o u s P a t t e r n s * +* * +\* * * * * * * * * * * * * * * */ + +/* This pattern is for the stack check code in Microsoft compilers */ +static byte pattMsChkstk[] = +{ + 0x59, /* pop cx */ + 0x8B, 0xDC, /* mov bx, sp */ + 0x2B, 0xD8, /* sub bx, ax */ + 0x72, 0x0A, /* jb bad */ + 0x3B, 0x1E, WILD, WILD, /* cmp bx, XXXX */ + 0x72, 0x04, /* jb bad */ + 0x8B, 0xE3, /* mov sp, bx */ + 0xFF, 0xE1, /* jmp [cx] */ + 0x33, 0xC0, /* xor ax, ax */ + 0xE9 /* jmp XXXX */ +}; + + + + +/* This procedure is called to initialise the library check code */ +void SetupLibCheck(void) +{ + word w, len; + int i; + + if ((f = fopen(sSigName, "rb")) == NULL) + { + printf("Warning: cannot open signature file %s\n", sSigName); + return; + } + + readProtoFile(); + + + prog.bSigs = FALSE; /* False unless everything goes right */ + /* Read the parameters */ + grab(4, f); + if (memcmp("dccs", buf, 4) != 0) + { + printf("Not a dcc signature file!\n"); + exit(3); + } + numKeys = readFileShort(f); + numVert = readFileShort(f); + PatLen = readFileShort(f); + SymLen = readFileShort(f); + if ((PatLen != PATLEN) || (SymLen != SYMLEN)) + { + printf("Sorry! Compiled for sym and pattern lengths of %d and %d\n", + SYMLEN, PATLEN); + exit(1); + } + + /* Initialise the perfhlib stuff. Also allocates T1, T2, g, etc */ + hashParams( /* Set the parameters for the hash table */ + numKeys, /* The number of symbols */ + PatLen, /* The length of the pattern to be hashed */ + 256, /* The character set of the pattern (0-FF) */ + 0, /* Minimum pattern character value */ + numVert); /* Specifies c, the sparseness of the graph. + See Czech, Havas and Majewski for details */ + + T1base = readT1(); + T2base = readT2(); + g = readG(); + + /* Read T1 and T2 tables */ + grab(2, f); + if (memcmp("T1", buf, 2) != 0) + { + printf("Expected 'T1'\n"); + exit(3); + } + len = (word) (PatLen * 256 * sizeof(word)); + w = readFileShort(f); + if (w != len) + { + printf("Problem with size of T1: file %d, calc %d\n", w, len); + exit(4); + } + readFileSection(T1base, len, f); + + grab(2, f); + if (memcmp("T2", buf, 2) != 0) + { + printf("Expected 'T2'\n"); + exit(3); + } + w = readFileShort(f); + if (w != len) + { + printf("Problem with size of T2: file %d, calc %d\n", w, len); + exit(4); + } + readFileSection(T2base, len, f); + + /* Now read the function g[] */ + grab(2, f); + if (memcmp("gg", buf, 2) != 0) + { + printf("Expected 'gg'\n"); + exit(3); + } + len = (word)(numVert * sizeof(word)); + w = readFileShort(f); + if (w != len) + { + printf("Problem with size of g[]: file %d, calc %d\n", w, len); + exit(4); + } + readFileSection(g, len, f); + + + /* This is now the hash table */ + /* First allocate space for the table */ + if ((ht = (HT *)allocMem(numKeys * sizeof(HT))) == 0) + { + printf("Could not allocate hash table\n"); + exit(1); + } + grab(2, f); + if (memcmp("ht", buf, 2) != 0) + { + printf("Expected 'ht'\n"); + exit(3); + } + w = readFileShort(f); + if (w != numKeys * (SymLen + PatLen + sizeof(word))) + { + printf("Problem with size of hash table: file %d, calc %d\n", w, len); + exit(6); + } + + + for (i=0; i < numKeys; i++) + { + if (fread(&ht[i], 1, SymLen + PatLen, f) != SymLen + PatLen) + { + printf("Could not read signature\n"); + exit(11); + } + } + fclose(f); + prog.bSigs = TRUE; +} + + +void +CleanupLibCheck(void) +{ + /* Deallocate all the stuff allocated in SetupLibCheck() */ + if (T1base) free(T1base); + if (T1base) free(T2base); + if (g) free(g); + if (ht) free(ht); + if (pFunc)free(pFunc); +} + + +/* Check this function to see if it is a library function. Return TRUE if + it is, and copy its name to pProc->name +*/ +boolT LibCheck(Function & pProc) +{ + long fileOffset; + int h, i, j, arg; + Int Idx; + byte pat[PATLEN]; + + if (prog.bSigs == FALSE) + { + /* No signatures... can't rely on hash parameters to be initialised + so always return false */ + return FALSE; + } + + fileOffset = pProc.procEntry; /* Offset into the image */ + if (fileOffset == prog.offMain) + { + /* Easy - this function is called main! */ + strcpy(pProc.name, "main"); + return FALSE; + } + + memmove(pat, &prog.Image[fileOffset], PATLEN); + fixWildCards(pat); /* Fix wild cards in the copy */ + h = hash(pat); /* Hash the found proc */ + /* We always have to compare keys, because the hash function will + always return a valid index */ + if (memcmp(ht[h].htPat, pat, PATLEN) == 0) + { + /* We have a match. Save the name, if not already set */ + if (pProc.name[0] == '\0') /* Don't overwrite existing name */ + { + /* Give proc the new name */ + strcpy(pProc.name, ht[h].htSym); + } + /* But is it a real library function? */ + i = NIL; + if ((numFunc == 0) || (i=searchPList(ht[h].htSym)) != NIL) + { + pProc.flg |= PROC_ISLIB; /* It's a lib function */ + if (i != NIL) + { + /* Allocate space for the arg struct, and copy the hlType to + the appropriate field */ + arg = pFunc[i].firstArg; + pProc.args.numArgs = pFunc[i].numArg; + pProc.args.sym.resize(pFunc[i].numArg); + for (j=0; j < pFunc[i].numArg; j++) + { + pProc.args.sym[j].type = pArg[arg++]; + } + if (pFunc[i].typ != TYPE_UNKNOWN) + { + pProc.retVal.type = pFunc[i].typ; + pProc.flg |= PROC_IS_FUNC; + switch (pProc.retVal.type) { + case TYPE_LONG_SIGN: case TYPE_LONG_UNSIGN: + pProc.liveOut = duReg[rDX] | duReg[rAX]; + break; + case TYPE_WORD_SIGN: case TYPE_WORD_UNSIGN: + pProc.liveOut = duReg[rAX]; + break; + case TYPE_BYTE_SIGN: case TYPE_BYTE_UNSIGN: + pProc.liveOut = duReg[rAL]; + break; + /*** other types are not considered yet ***/ + } + } + if (pFunc[i].bVararg) pProc.flg |= PROC_VARARG; + } + } + else if (i == NIL) + { + /* Have a symbol for it, but does not appear in a header file. + Treat it as if it is not a library function */ + pProc.flg |= PROC_RUNTIME; /* => is a runtime routine */ + } + } + + if (locatePattern(prog.Image, pProc.procEntry, + pProc.procEntry+sizeof(pattMsChkstk), + pattMsChkstk, sizeof(pattMsChkstk), &Idx)) + { + /* Found _chkstk */ + strcpy(pProc.name, "chkstk"); + pProc.flg |= PROC_ISLIB; /* We'll say its a lib function */ + pProc.args.numArgs = 0; /* With no args */ + } + + return (boolT)((pProc.flg & PROC_ISLIB) != 0); +} + + + +void grab(int n, FILE *f) +{ + if (fread(buf, 1, n, f) != (unsigned)n) + { + printf("Could not grab\n"); + exit(11); + } +} + +word +readFileShort(FILE *f) +{ + byte b1, b2; + + if (fread(&b1, 1, 1, f) != 1) + { + printf("Could not read short\n"); + exit(11); + } + if (fread(&b2, 1, 1, f) != 1) + { + printf("Could not read short\n"); + exit(11); + } + return (word)(b2 << 8) + (word)b1; +} + +// Read a section of the file, considering endian issues +void +readFileSection(word* p, int len, FILE* f) +{ + for (int i=0; i < len; i += 2) + { + *p++ = readFileShort(f); + } +} + +/* The following two functions are dummies, since we don't call map() */ +void getKey(int i, byte **keys) +{ + +} + +void dispKey(int i) +{ + +} + +/* Search the source array between limits iMin and iMax for the pattern (length + iPatLen). The pattern can contain wild bytes; if you really want to match + for the pattern that is used up by the WILD byte, tough - it will match with + everything else as well. */ +static boolT locatePattern(byte *source, Int iMin, Int iMax, byte *pattern, Int iPatLen, + Int *index) +{ + Int i, j; + byte *pSrc; /* Pointer to start of considered source */ + Int iLast; + + iLast = iMax - iPatLen; /* Last source byte to consider */ + + for (i=iMin; i <= iLast; i++) + { + pSrc = &source[i]; /* Start of current part of source */ + /* i is the index of the start of the moving pattern */ + for (j=0; j < iPatLen; j++) + { + /* j is the index of the byte being considered in the pattern. */ + if ((*pSrc++ != pattern[j]) && (pattern[j] != WILD)) + { + /* A definite mismatch */ + break; /* Break to outer loop */ + } + } + if (j >= iPatLen) + { + /* Pattern has been found */ + *index = i; /* Pass start of pattern */ + return 1; /* Indicate success */ + } + /* Else just try next value of i */ + } + /* Pattern was not found */ + *index = -1; /* Invalidate index */ + return 0; /* Indicate failure */ +} + + +void STATE::checkStartup() +{ + /* This function checks the startup code for various compilers' way of + loading DS. If found, it sets DS. This may not be needed in the future if + pushing and popping of registers is implemented. + Also sets prog.offMain and prog.segMain if possible */ + + + Int startOff; /* Offset into the Image of the initial CS:IP */ + Int i, rel, para, init; + char chModel = 'x'; + char chVendor = 'x'; + char chVersion = 'x'; + char *pPath; + char temp[4]; + + startOff = ((dword)prog.initCS << 4) + prog.initIP; + + /* Check the Turbo Pascal signatures first, since they involve only the + first 3 bytes, and false positives may be founf with the others later */ + if (locatePattern(prog.Image, startOff, startOff+5, pattBorl4on,sizeof(pattBorl4on), &i)) + { + /* The first 5 bytes are a far call. Follow that call and + determine the version from that */ + rel = LH(&prog.Image[startOff+1]); /* This is abs off of init */ + para= LH(&prog.Image[startOff+3]);/* This is abs seg of init */ + init = ((dword)para << 4) + rel; + if (locatePattern(prog.Image, init, init+26, pattBorl4Init, + sizeof(pattBorl4Init), &i)) + { + + setState(rDS, LH(&prog.Image[i+1])); + printf("Borland Pascal v4 detected\n"); + chVendor = 't'; /* Trubo */ + chModel = 'p'; /* Pascal */ + chVersion = '4'; /* Version 4 */ + prog.offMain = startOff; /* Code starts immediately */ + prog.segMain = prog.initCS; /* At the 5 byte jump */ + goto gotVendor; /* Already have vendor */ + } + else if (locatePattern(prog.Image, init, init+26, pattBorl5Init, + sizeof(pattBorl5Init), &i)) + { + + setState( rDS, LH(&prog.Image[i+1])); + printf("Borland Pascal v5.0 detected\n"); + chVendor = 't'; /* Trubo */ + chModel = 'p'; /* Pascal */ + chVersion = '5'; /* Version 5 */ + prog.offMain = startOff; /* Code starts immediately */ + prog.segMain = prog.initCS; + goto gotVendor; /* Already have vendor */ + } + else if (locatePattern(prog.Image, init, init+26, pattBorl7Init, + sizeof(pattBorl7Init), &i)) + { + + setState( rDS, LH(&prog.Image[i+1])); + printf("Borland Pascal v7 detected\n"); + chVendor = 't'; /* Trubo */ + chModel = 'p'; /* Pascal */ + chVersion = '7'; /* Version 7 */ + prog.offMain = startOff; /* Code starts immediately */ + prog.segMain = prog.initCS; + goto gotVendor; /* Already have vendor */ + } + + } + + + /* Search for the call to main pattern. This is compiler independant, + but decides the model required. Note: must do the far data models + (large and compact) before the others, since they are the same pattern + as near data, just more pushes at the start. */ + if(prog.cbImage>0x180) + { + if (locatePattern(prog.Image, startOff, startOff+0x180, pattMainLarge,sizeof(pattMainLarge), &i)) + { + rel = LH(&prog.Image[i+OFFMAINLARGE]); /* This is abs off of main */ + para= LH(&prog.Image[i+OFFMAINLARGE+2]);/* This is abs seg of main */ + /* Save absolute image offset */ + prog.offMain = ((dword)para << 4) + rel; + prog.segMain = (word)para; + chModel = 'l'; /* Large model */ + } + else if (locatePattern(prog.Image, startOff, startOff+0x180, pattMainCompact, + sizeof(pattMainCompact), &i)) + { + rel = LHS(&prog.Image[i+OFFMAINCOMPACT]);/* This is the rel addr of main */ + prog.offMain = i+OFFMAINCOMPACT+2+rel; /* Save absolute image offset */ + prog.segMain = prog.initCS; + chModel = 'c'; /* Compact model */ + } + else if (locatePattern(prog.Image, startOff, startOff+0x180, pattMainMedium, + sizeof(pattMainMedium), &i)) + { + rel = LH(&prog.Image[i+OFFMAINMEDIUM]); /* This is abs off of main */ + para= LH(&prog.Image[i+OFFMAINMEDIUM+2]);/* This is abs seg of main */ + prog.offMain = ((dword)para << 4) + rel; + prog.segMain = (word)para; + chModel = 'm'; /* Medium model */ + } + else if (locatePattern(prog.Image, startOff, startOff+0x180, pattMainSmall, + sizeof(pattMainSmall), &i)) + { + rel = LHS(&prog.Image[i+OFFMAINSMALL]); /* This is rel addr of main */ + prog.offMain = i+OFFMAINSMALL+2+rel; /* Save absolute image offset */ + prog.segMain = prog.initCS; + chModel = 's'; /* Small model */ + } + else if (memcmp(&prog.Image[startOff], pattTPasStart, sizeof(pattTPasStart)) == 0) + { + rel = LHS(&prog.Image[startOff+1]); /* Get the jump offset */ + prog.offMain = rel+startOff+3; /* Save absolute image offset */ + prog.offMain += 0x20; /* These first 32 bytes are setting up */ + prog.segMain = prog.initCS; + chVendor = 't'; /* Turbo.. */ + chModel = 'p'; /* ...Pascal... (only 1 model) */ + chVersion = '3'; /* 3.0 */ + printf("Turbo Pascal 3.0 detected\n"); + printf("Main at %04X\n", prog.offMain); + goto gotVendor; /* Already have vendor */ + } + else + { + printf("Main could not be located!\n"); + prog.offMain = -1; + } + } + else + { + printf("Main could not be located!\n"); + prog.offMain = -1; + } + + printf("Model: %c\n", chModel); + + + /* Now decide the compiler vendor and version number */ + if (memcmp(&prog.Image[startOff], pattMsC5Start, sizeof(pattMsC5Start)) == 0) + { + /* Yes, this is Microsoft startup code. The DS is sitting right here + in the next 2 bytes */ + setState( rDS, LH(&prog.Image[startOff+sizeof(pattMsC5Start)])); + chVendor = 'm'; /* Microsoft compiler */ + chVersion = '5'; /* Version 5 */ + printf("MSC 5 detected\n"); + } + + /* The C8 startup pattern is different from C5's */ + else if (memcmp(&prog.Image[startOff], pattMsC8Start, sizeof(pattMsC8Start)) == 0) + { + setState( rDS, LH(&prog.Image[startOff+sizeof(pattMsC8Start)])); + printf("MSC 8 detected\n"); + chVendor = 'm'; /* Microsoft compiler */ + chVersion = '8'; /* Version 8 */ + } + + /* The C8 .com startup pattern is different again! */ + else if (memcmp(&prog.Image[startOff], pattMsC8ComStart, + sizeof(pattMsC8ComStart)) == 0) + { + printf("MSC 8 .com detected\n"); + chVendor = 'm'; /* Microsoft compiler */ + chVersion = '8'; /* Version 8 */ + } + + else if (locatePattern(prog.Image, startOff, startOff+0x30, pattBorl2Start, + sizeof(pattBorl2Start), &i)) + { + /* Borland startup. DS is at the second byte (offset 1) */ + setState( rDS, LH(&prog.Image[i+1])); + printf("Borland v2 detected\n"); + chVendor = 'b'; /* Borland compiler */ + chVersion = '2'; /* Version 2 */ + } + + else if (locatePattern(prog.Image, startOff, startOff+0x30, pattBorl3Start, + sizeof(pattBorl3Start), &i)) + { + /* Borland startup. DS is at the second byte (offset 1) */ + setState( rDS, LH(&prog.Image[i+1])); + printf("Borland v3 detected\n"); + chVendor = 'b'; /* Borland compiler */ + chVersion = '3'; /* Version 3 */ + } + + else if (locatePattern(prog.Image, startOff, startOff+0x30, pattLogiStart, + sizeof(pattLogiStart), &i)) + { + /* Logitech modula startup. DS is 0, despite appearances */ + printf("Logitech modula detected\n"); + chVendor = 'l'; /* Logitech compiler */ + chVersion = '1'; /* Version 1 */ + } + + /* Other startup idioms would go here */ + else + { + printf("Warning - compiler not recognised\n"); + } + +gotVendor: + + /* Use the DCC environment variable to set where the .sig files will + be found. Otherwise, assume current directory */ + pPath = getenv("DCC"); + if (pPath) + { + strcpy(sSigName, pPath); /* Use path given */ + if (sSigName[strlen(sSigName)-1] != '/') + { + strcat(sSigName, "/"); /* Append a slash if necessary */ + } + } + else + { + strcpy(sSigName, "./"); /* Current directory */ + } + strcat(sSigName, "dcc"); + temp[1] = '\0'; + temp[0] = chVendor; + strcat(sSigName, temp); /* Add vendor */ + temp[0] = chVersion; + strcat(sSigName, temp); /* Add version */ + temp[0] = chModel; + strcat(sSigName, temp); /* Add model */ + strcat(sSigName, ".sig"); /* Add extension */ + printf("Signature file: %s\n", sSigName); + +} + +/* DCCLIBS.DAT is a data file sorted on function name containing names and + return types of functions found in include files, and the names and types + of arguements. Only functions in this list will be considered library + functions; others (like LXMUL@) are helper files, and need to be analysed + by dcc, rather than considered as known functions. When a prototype is + found (in searchPList()), the parameter info is written to the proc struct. +*/ +void +readProtoFile(void) +{ + FILE *fProto; + char *pPath; /* Point to the environment string */ + char szProFName[81]; /* Full name of dclibs.lst */ + int i; + + /* Use the DCC environment variable to set where the dcclibs.lst file will + be found. Otherwise, assume current directory */ + pPath = getenv("DCC"); + if (pPath) + { + strcpy(szProFName, pPath); /* Use path given */ + if (szProFName[strlen(szProFName)-1] != '/') + { + strcat(szProFName, "/"); /* Append a slash if necessary */ + } + } + else + { + strcpy(szProFName, "./"); /* Current directory */ + } + strcat(szProFName, DCCLIBS); + + if ((fProto = fopen(szProFName, "rb")) == NULL) + { + printf("Warning: cannot open library prototype data file %s\n", szProFName); + return; + } + + grab(4, fProto); + if (strncmp(buf, "dccp", 4) != 0) + { + printf("%s is not a dcc prototype file\n", szProFName); + exit(1); + } + + grab(2, fProto); + if (strncmp(buf, "FN", 2) != 0) + { + printf("FN (Function Name) subsection expected in %s\n", szProFName); + exit(2); + } + + numFunc = readFileShort(fProto); /* Num of entries to allocate */ + + /* Allocate exactly correct # entries */ + pFunc = (PH_FUNC_STRUCT*) allocMem(numFunc * sizeof(PH_FUNC_STRUCT)); + + for (i=0; i < numFunc; i++) + { + fread(&pFunc[i], 1, SYMLEN, fProto); + pFunc[i].typ = (hlType)readFileShort(fProto); + pFunc[i].numArg = readFileShort(fProto); + pFunc[i].firstArg = readFileShort(fProto); + fread(&pFunc[i].bVararg, 1, 1, fProto); + } + + grab(2, fProto); + if (strncmp(buf, "PM", 2) != 0) + { + printf("PM (Parameter) subsection expected in %s\n", szProFName); + exit(2); + } + + numArg = readFileShort(fProto); /* Num of entries to allocate */ + + /* Allocate exactly correct # entries */ + pArg = (hlType*) allocMem(numArg * sizeof(hlType)); + + for (i=0; i < numArg; i++) + { + // fread(&pArg[i], 1, SYMLEN, fProto); /* No names to read as yet */ + pArg[i] = (hlType) readFileShort(fProto); + } + + fclose(fProto); + +} + +int +searchPList(char *name) +{ + /* Search through the symbol names for the name */ + /* Use binary search */ + int mx, mn, i, res; + + + mx = numFunc; + mn = 0; + + while (mn < mx) + { + i = (mn + mx) /2; + res = strcmp(pFunc[i].name, name); + if (res == 0) + { + return i; /* Found! */ + } + else + { + if (res < 0) + { + mn = i+1; + } + else + { + mx = i-1; + } + } + } + + /* Still could be the case that mn == mx == required record */ + res = strcmp(pFunc[mn].name, name); + if (res == 0) + { + return mn; /* Found! */ + } + else + { + return NIL; + } +} + +#if DEBUG_HEAP +void +checkHeap(char *msg) + +/* HEAPCHK.C: This program checks the heap for + * consistency and prints an appropriate message. + */ +{ + int heapstatus; + + printf("%s\n", msg); + + /* Check heap status */ + heapstatus = _heapchk(); + switch( heapstatus ) + { + case _HEAPOK: + printf(" OK - heap is fine\n" ); + break; + case _HEAPEMPTY: + printf(" OK - heap is empty\n" ); + break; + case _HEAPBADBEGIN: + printf( "ERROR - bad start of heap\n" ); + break; + case _HEAPBADNODE: + printf( "ERROR - bad node in heap\n" ); + break; + } +} +#endif + + diff --git a/src/comwrite.cpp b/src/comwrite.cpp new file mode 100644 index 0000000..f9b2a0f --- /dev/null +++ b/src/comwrite.cpp @@ -0,0 +1,265 @@ +/***************************************************************************** + * File: comwrite.c + * Purpose: writes comments about C programs and descriptions about dos + * interrupts in the string line given. + * Project: dcc + * (C) Cristina Cifuentes + ****************************************************************************/ + +#include "dcc.h" +#include +#define intSize 40 + +static const char *int21h[] = +{"Terminate process", + "Character input with echo", + "Character output", + "Auxiliary input", + "Auxiliary output", + "Printer output", + "Direct console i/o", + "Unfiltered char i w/o echo", + "Character input without echo", + "Display string", + "Buffered keyboard input", + "Check input status", + "Flush input buffer and then input", + "Disk reset", + "Select disk", + "Open file", + "Close file", + "Find first file", + "Find next file", + "Delete file", + "Sequential read", + "Sequential write", + "Create file", + "Rename file", + "Reserved", + "Get current disk", + "Set DTA address", + "Get default drive data", + "Get drive data", + "Reserved", + "Reserved", + "Reserved", + "Reserved", + "Random read", + "Random write", + "Get file size", + "Set relative record number", + "Set interrupt vector", + "Create new PSP", + "Random block read", + "Random block write", + "Parse filename", + "Get date", + "Set date", + "Get time", + "Set time", + "Set verify flag", + "Get DTA address", + "Get MSDOS version number", + "Terminate and stay resident", + "Reserved", + "Get or set break flag", + "Reserved", + "Get interrupt vector", + "Get drive allocation info", + "Reserved", + "Get or set country info", + "Create directory", + "Delete directory", + "Set current directory", + "Create file", + "Open file", + "Close file", + "Read file or device", + "Write file or device", + "Delete file", + "Set file pointer", + "Get or set file attributes", + "IOCTL (i/o control)", + "Duplicate handle", + "Redirect handle", + "Get current directory", + "Alloate memory block", + "Release memory block", + "Resize memory block", + "Execute program (exec)", + "Terminate process with return code", + "Get return code", + "Find first file", + "Find next file", + "Reserved", + "Reserved", + "Reserved", + "Reserved", + "Get verify flag", + "Reserved", + "Rename file", + "Get or set file date & time", + "Get or set allocation strategy", + "Get extended error information", + "Create temporary file", + "Create new file", + "Lock or unlock file region", + "Reserved", + "Get machine name", + "Device redirection", + "Reserved", + "Reserved", + "Get PSP address", + "Get DBCS lead byte table", + "Reserved", + "Get extended country information", + "Get or set code page", + "Set handle count", + "Commit file", + "Reserved", + "Reserved", + "Reserved", + "Extended open file" +}; + + +static const char *intOthers[] = { + "Exit", /* 0x20 */ + "", /* other table */ + "Terminate handler address", /* 0x22 */ + "Ctrl-C handler address", /* 0x23 */ + "Critical-error handler address", /* 0x24 */ + "Absolute disk read", /* 0x25 */ + "Absolute disk write", /* 0x26 */ + "Terminate and stay resident", /* 0x27 */ + "Reserved", /* 0x28 */ + "Reserved", /* 0x29 */ + "Reserved", /* 0x2A */ + "Reserved", /* 0x2B */ + "Reserved", /* 0x2C */ + "Reserved", /* 0x2D */ + "Reserved" /* 0x2E */ +}; + + +/* Writes the description of the current interrupt. Appends it to the + * string s. */ +void ICODE::writeIntComment (char *s) +{ + char *t; + + t = (char *)allocMem(intSize * sizeof(char)); + if (ic.ll.immed.op == 0x21) + { sprintf (t, "\t/* %s */\n", int21h[ic.ll.dst.off]); + strcat (s, t); + } + else if (ic.ll.immed.op > 0x1F && ic.ll.immed.op < 0x2F) + { + sprintf (t, "\t/* %s */\n", intOthers[ic.ll.immed.op - 0x20]); + strcat (s, t); + } + else if (ic.ll.immed.op == 0x2F) + switch (ic.ll.dst.off) + { + case 0x01 : strcat (s, "\t/* Print spooler */\n"); + break; + case 0x02: strcat (s, "\t/* Assign */\n"); + break; + case 0x10: strcat (s, "\t/* Share */\n"); + break; + case 0xB7: strcat (s, "\t/* Append */\n"); + } + else + strcat (s, "\n"); +} + + +//, &cCode.decl +void Function::writeProcComments() +{ + int i; + ID *id; /* Pointer to register argument identifier */ + STKSYM * psym; /* Pointer to register argument symbol */ + + /* About the parameters */ + if (this->cbParam) + cCode.appendDecl("/* Takes %d bytes of parameters.\n",this->cbParam); + else if (this->flg & REG_ARGS) + { + cCode.appendDecl("/* Uses register arguments:\n"); + for (i = 0; i < this->args.numArgs; i++) + { + psym = &this->args.sym[i]; + if (psym->regs->expr.ident.idType == REGISTER) + { + id = &this->localId.id_arr[psym->regs->expr.ident.idNode.regiIdx]; + if (psym->regs->expr.ident.regiType == WORD_REG) + cCode.appendDecl(" * %s = %s.\n", psym->name, + wordReg[id->id.regi - rAX]); + else /* BYTE_REG */ + cCode.appendDecl(" * %s = %s.\n", psym->name, + byteReg[id->id.regi - rAL]); + } + else /* long register */ + { + id = &this->localId.id_arr[psym->regs->expr.ident.idNode.longIdx]; + cCode.appendDecl(" * %s = %s:%s.\n", psym->name, + wordReg[id->id.longId.h - rAX], + wordReg[id->id.longId.l - rAX]); + } + + } + } + else + cCode.appendDecl("/* Takes no parameters.\n"); + + /* Type of procedure */ + if (this->flg & PROC_RUNTIME) + cCode.appendDecl(" * Runtime support routine of the compiler.\n"); + if (this->flg & PROC_IS_HLL) + cCode.appendDecl(" * High-level language prologue code.\n"); + if (this->flg & PROC_ASM) + { + cCode.appendDecl(" * Untranslatable routine. Assembler provided.\n"); + if (this->flg & PROC_IS_FUNC) + switch (this->retVal.type) { + case TYPE_BYTE_SIGN: case TYPE_BYTE_UNSIGN: + cCode.appendDecl(" * Return value in register al.\n"); + break; + case TYPE_WORD_SIGN: case TYPE_WORD_UNSIGN: + cCode.appendDecl(" * Return value in register ax.\n"); + break; + case TYPE_LONG_SIGN: case TYPE_LONG_UNSIGN: + cCode.appendDecl(" * Return value in registers dx:ax.\n"); + break; + } /* eos */ + } + + /* Calling convention */ + if (this->flg & CALL_PASCAL) + cCode.appendDecl(" * Pascal calling convention.\n"); + else if (this->flg & CALL_C) + cCode.appendDecl(" * C calling convention.\n"); + else if (this->flg & CALL_UNKNOWN) + cCode.appendDecl(" * Unknown calling convention.\n"); + + /* Other flags */ + if (this->flg & (PROC_BADINST | PROC_IJMP)) + cCode.appendDecl(" * Incomplete due to an %s.\n", + (this->flg & PROC_BADINST)? "untranslated opcode": + "indirect JMP"); + if (this->flg & PROC_ICALL) + cCode.appendDecl(" * Indirect call procedure.\n"); + if (this->flg & IMPURE) + cCode.appendDecl(" * Contains impure code.\n"); + if (this->flg & NOT_HLL) + cCode.appendDecl(" * Contains instructions not normally used by compilers.\n"); + if (this->flg & FLOAT_OP) + cCode.appendDecl(" * Contains coprocessor instructions.\n"); + + /* Graph reducibility */ + if (this->flg & GRAPH_IRRED) + cCode.appendDecl(" * Irreducible control flow graph.\n"); + cCode.appendDecl(" */\n{\n"); +} + diff --git a/src/control.cpp b/src/control.cpp new file mode 100644 index 0000000..88356a1 --- /dev/null +++ b/src/control.cpp @@ -0,0 +1,692 @@ +/********************************************************************* + * Description : Performs control flow analysis on the CFG + * (C) Cristina Cifuentes + ********************************************************************/ +#include +#include +#include +#include "dcc.h" +#include +#include +#if __BORLAND__ +#include +#else +#include +#endif + +//typedef struct list { +// Int nodeIdx; +// struct list *next; +//} nodeList; +typedef std::list nodeList; /* dfsLast index to the node */ + +#define ancestor(a,b) ((a->dfsLastNum < b->dfsLastNum) && (a->dfsFirstNum < b->dfsFirstNum)) +/* there is a path on the DFST from a to b if the a was first visited in a + * dfs, and a was later visited than b when doing the last visit of each + * node. */ + + +/* Checks if the edge (p,s) is a back edge. If node s was visited first + * during the dfs traversal (ie. s has a smaller dfsFirst number) or s == p, + * then it is a backedge. + * Also incrementes the number of backedges entries to the header node. */ +static boolT isBackEdge (BB * p,BB * s) +{ + if (p->dfsFirstNum >= s->dfsFirstNum) + { + s->numBackEdges++; + return (TRUE); + } + return (FALSE); +} + + +static Int commonDom (Int currImmDom, Int predImmDom, Function * pProc) +/* Finds the common dominator of the current immediate dominator + * currImmDom and its predecessor's immediate dominator predImmDom */ +{ + if (currImmDom == NO_DOM) + return (predImmDom); + if (predImmDom == NO_DOM) /* predecessor is the root */ + return (currImmDom); + + while ((currImmDom != NO_DOM) && (predImmDom != NO_DOM) && + (currImmDom != predImmDom)) + { + if (currImmDom < predImmDom) + predImmDom = pProc->dfsLast[predImmDom]->immedDom; + else + currImmDom = pProc->dfsLast[currImmDom]->immedDom; + } + return (currImmDom); +} + + +/* Finds the immediate dominator of each node in the graph pProc->cfg. + * Adapted version of the dominators algorithm by Hecht and Ullman; finds + * immediate dominators only. + * Note: graph should be reducible */ +void Function::findImmedDom () +{ + BB * currNode; + Int currIdx, j, predIdx; + + for (currIdx = 0; currIdx < numBBs; currIdx++) + { + currNode = dfsLast[currIdx]; + if (currNode->flg & INVALID_BB) /* Do not process invalid BBs */ + continue; + + for (j = 0; j < currNode->inEdges.size(); j++) + { + BB* inedge=currNode->inEdges[j]; + predIdx = inedge->dfsLastNum; + if (predIdx < currIdx) + currNode->immedDom = commonDom (currNode->immedDom, + predIdx, this); + } + } +} + + +/* Inserts the node n to the list l. */ +static void insertList (nodeList &l, Int n) +{ + l.push_back(n); +} + + +/* Returns whether or not the node n (dfsLast numbering of a basic block) + * is on the list l. */ +static boolT inList (nodeList &l, Int n) +{ + return std::find(l.begin(),l.end(),n)!=l.end(); +} + + +/* Frees space allocated by the list l. */ +static void freeList (nodeList &l) +{ + l.clear(); +} + + +/* Returns whether the node n belongs to the queue list q. */ +static boolT inInt(BB * n, queue &q) +{ + return std::find(q.begin(),q.end(),n)!=q.end(); +} + + +/* Finds the follow of the endless loop headed at node head (if any). + * The follow node is the closest node to the loop. */ +static void findEndlessFollow (Function * pProc, nodeList &loopNodes, BB * head) +{ + Int j, succ; + + head->loopFollow = MAX; + nodeList::iterator p = loopNodes.begin(); + for( ;p != loopNodes.end();++p) + { + for (j = 0; j < pProc->dfsLast[*p]->numOutEdges; j++) + { + succ = pProc->dfsLast[*p]->edges[j].BBptr->dfsLastNum; + if ((! inList(loopNodes, succ)) && (succ < head->loopFollow)) + head->loopFollow = succ; + } + } +} + + +//static void findNodesInLoop(BB * latchNode,BB * head,PPROC pProc,queue *intNodes) +/* Flags nodes that belong to the loop determined by (latchNode, head) and + * determines the type of loop. */ +static void findNodesInLoop(BB * latchNode,BB * head,Function * pProc,queue &intNodes) +{ + Int i, headDfsNum, intNodeType; + nodeList loopNodes; + Int immedDom, /* dfsLast index to immediate dominator */ + thenDfs, elseDfs; /* dsfLast index for THEN and ELSE nodes */ + BB * pbb; + + /* Flag nodes in loop headed by head (except header node) */ + headDfsNum = head->dfsLastNum; + head->loopHead = headDfsNum; + insertList (loopNodes, headDfsNum); + for (i = headDfsNum + 1; i < latchNode->dfsLastNum; i++) + { + if (pProc->dfsLast[i]->flg & INVALID_BB) /* skip invalid BBs */ + continue; + + immedDom = pProc->dfsLast[i]->immedDom; + if (inList (loopNodes, immedDom) && inInt(pProc->dfsLast[i], intNodes)) + { + insertList (loopNodes, i); + if (pProc->dfsLast[i]->loopHead == NO_NODE)/*not in other loop*/ + pProc->dfsLast[i]->loopHead = headDfsNum; + } + } + latchNode->loopHead = headDfsNum; + if (latchNode != head) + insertList (loopNodes, latchNode->dfsLastNum); + + /* Determine type of loop and follow node */ + intNodeType = head->nodeType; + if (latchNode->nodeType == TWO_BRANCH) + if ((intNodeType == TWO_BRANCH) || (latchNode == head)) + if ((latchNode == head) || + (inList (loopNodes, head->edges[THEN].BBptr->dfsLastNum) && + inList (loopNodes, head->edges[ELSE].BBptr->dfsLastNum))) + { + head->loopType = REPEAT_TYPE; + if (latchNode->edges[0].BBptr == head) + head->loopFollow = latchNode->edges[ELSE].BBptr->dfsLastNum; + else + head->loopFollow = latchNode->edges[THEN].BBptr->dfsLastNum; + pProc->Icode.SetLlFlag(latchNode->start + latchNode->length - 1,JX_LOOP); + } + else + { + head->loopType = WHILE_TYPE; + if (inList (loopNodes, head->edges[THEN].BBptr->dfsLastNum)) + head->loopFollow = head->edges[ELSE].BBptr->dfsLastNum; + else + head->loopFollow = head->edges[THEN].BBptr->dfsLastNum; + pProc->Icode.SetLlFlag(head->start + head->length - 1, JX_LOOP); + } + else /* head = anything besides 2-way, latch = 2-way */ + { + head->loopType = REPEAT_TYPE; + if (latchNode->edges[THEN].BBptr == head) + head->loopFollow = latchNode->edges[ELSE].BBptr->dfsLastNum; + else + head->loopFollow = latchNode->edges[THEN].BBptr->dfsLastNum; + pProc->Icode.SetLlFlag(latchNode->start + latchNode->length - 1, + JX_LOOP); + } + else /* latch = 1-way */ + if (latchNode->nodeType == LOOP_NODE) + { + head->loopType = REPEAT_TYPE; + head->loopFollow = latchNode->edges[0].BBptr->dfsLastNum; + } + else if (intNodeType == TWO_BRANCH) + { + head->loopType = WHILE_TYPE; + pbb = latchNode; + thenDfs = head->edges[THEN].BBptr->dfsLastNum; + elseDfs = head->edges[ELSE].BBptr->dfsLastNum; + while (1) + { + if (pbb->dfsLastNum == thenDfs) + { + head->loopFollow = elseDfs; + break; + } + else if (pbb->dfsLastNum == elseDfs) + { + head->loopFollow = thenDfs; + break; + } + + /* Check if couldn't find it, then it is a strangely formed + * loop, so it is safer to consider it an endless loop */ + if (pbb->dfsLastNum <= head->dfsLastNum) + { + head->loopType = ENDLESS_TYPE; + findEndlessFollow (pProc, loopNodes, head); + break; + } + pbb = pProc->dfsLast[pbb->immedDom]; + } + if (pbb->dfsLastNum > head->dfsLastNum) + pProc->dfsLast[head->loopFollow]->loopHead = NO_NODE; /*****/ + pProc->Icode.SetLlFlag(head->start + head->length - 1, JX_LOOP); + } + else + { + head->loopType = ENDLESS_TYPE; + findEndlessFollow (pProc, loopNodes, head); + } + + freeList(loopNodes); +} + + +//static void findNodesInInt (queue **intNodes, Int level, interval *Ii) +/* Recursive procedure to find nodes that belong to the interval (ie. nodes + * from G1). */ +static void findNodesInInt (queue &intNodes, Int level, interval *Ii) +{ + if (level == 1) + { + std::for_each(Ii->nodes.begin(),Ii->nodes.end(),[&intNodes](BB *en)->void { + appendQueue(intNodes,en); + }); + } + else + std::for_each(Ii->nodes.begin(),Ii->nodes.end(),[&intNodes,level](BB *en)->void { + findNodesInInt(intNodes,level-1,en->correspInt); + }); +} + + +/* Algorithm for structuring loops */ +static void structLoops(Function * pProc, derSeq *derivedG) +{ + interval *Ii; + BB * intHead, /* interval header node */ + * pred, /* predecessor node */ + * latchNode;/* latching node (in case of loops) */ + Int i, /* counter */ + level = 0; /* derived sequence level */ + interval *initInt; /* initial interval */ + queue intNodes; /* list of interval nodes */ + + /* Structure loops */ + /* for all derived sequences Gi */ + for(derSeq::iterator iter=derivedG->begin(); iter!=derivedG->end(); ++iter) + { + level++; + Ii = iter->Ii; + while (Ii) /* for all intervals Ii of Gi */ + { + latchNode = NULL; + intNodes.clear(); + + /* Find interval head (original BB node in G1) and create + * list of nodes of interval Ii. */ + initInt = Ii; + for (i = 1; i < level; i++) + initInt = (*initInt->nodes.begin())->correspInt; + intHead = *initInt->nodes.begin(); + + /* Find nodes that belong to the interval (nodes from G1) */ + findNodesInInt (intNodes, level, Ii); + + /* Find greatest enclosing back edge (if any) */ + assert(intHead->numInEdges==intHead->inEdges.size()); + for (i = 0; i < intHead->inEdges.size(); i++) + { + pred = intHead->inEdges[i]; + if (inInt(pred, intNodes) && isBackEdge(pred, intHead)) + if (! latchNode) + latchNode = pred; + else + { + if (pred->dfsLastNum > latchNode->dfsLastNum) + latchNode = pred; + } + } + + /* Find nodes in the loop and the type of loop */ + if (latchNode) + { + /* Check latching node is at the same nesting level of case + * statements (if any) and that the node doesn't belong to + * another loop. */ + if ((latchNode->caseHead == intHead->caseHead) && + (latchNode->loopHead == NO_NODE)) + { + intHead->latchNode = latchNode->dfsLastNum; + findNodesInLoop(latchNode, intHead, pProc, intNodes); + latchNode->flg |= IS_LATCH_NODE; + } + } + + /* Next interval */ + Ii = Ii->next; + } + + /* Next derived sequence */ + } +} + + +static boolT successor (Int s, Int h, Function * pProc) +/* Returns whether the BB indexed by s is a successor of the BB indexed by + * h. Note that h is a case node. */ +{ Int i; + BB * header; + + header = pProc->dfsLast[h]; + for (i = 0; i < header->numOutEdges; i++) + if (header->edges[i].BBptr->dfsLastNum == s) + return (TRUE); + return (FALSE); +} + + +static void tagNodesInCase (BB * pBB, nodeList &l, Int head, Int tail) +/* Recursive procedure to tag nodes that belong to the case described by + * the list l, head and tail (dfsLast index to first and exit node of the + * case). */ +{ Int current, /* index to current node */ + i; + + pBB->traversed = DFS_CASE; + current = pBB->dfsLastNum; + if ((current != tail) && (pBB->nodeType != MULTI_BRANCH) && (inList (l, pBB->immedDom))) + { + insertList (l, current); + pBB->caseHead = head; + for (i = 0; i < pBB->numOutEdges; i++) + if (pBB->edges[i].BBptr->traversed != DFS_CASE) + tagNodesInCase (pBB->edges[i].BBptr, l, head, tail); + } +} + + +static void structCases(Function * pProc) +/* Structures case statements. This procedure is invoked only when pProc + * has a case node. */ +{ Int i, j; + BB * caseHeader; /* case header node */ + Int exitNode = NO_NODE; /* case exit node */ + nodeList caseNodes; /* temporary: list of nodes in case */ + + /* Linear scan of the nodes in reverse dfsLast order, searching for + * case nodes */ + for (i = pProc->numBBs - 1; i >= 0; i--) + if (pProc->dfsLast[i]->nodeType == MULTI_BRANCH) + { + caseHeader = pProc->dfsLast[i]; + + /* Find descendant node which has as immediate predecessor + * the current header node, and is not a successor. */ + for (j = i + 2; j < pProc->numBBs; j++) + { + if ((!successor(j, i, pProc)) && + (pProc->dfsLast[j]->immedDom == i)) + if (exitNode == NO_NODE) + exitNode = j; + else if (pProc->dfsLast[exitNode]->numInEdges < + pProc->dfsLast[j]->numInEdges) + exitNode = j; + } + pProc->dfsLast[i]->caseTail = exitNode; + + /* Tag nodes that belong to the case by recording the + * header field with caseHeader. */ + insertList (caseNodes, i); + pProc->dfsLast[i]->caseHead = i; + for (j = 0; j < caseHeader->numOutEdges; j++) + tagNodesInCase (caseHeader->edges[j].BBptr, caseNodes, i, + exitNode); + if (exitNode != NO_NODE) + pProc->dfsLast[exitNode]->caseHead = i; + } +} + + +/* Flags all nodes in the list l as having follow node f, and deletes all + * nodes from the list. */ +static void flagNodes (nodeList &l, Int f, Function * pProc) +{ + nodeList::iterator p; + + p = l.begin(); + while (p!=l.end()) + { + pProc->dfsLast[*p]->ifFollow = f; + p = l.erase(p); + } +} + + +static void structIfs (Function * pProc) +/* Structures if statements */ +{ Int curr, /* Index for linear scan of nodes */ + desc, /* Index for descendant */ + followInEdges, /* Largest # in-edges so far */ + follow; /* Possible follow node */ + nodeList domDesc, /* List of nodes dominated by curr */ + unresolved, /* List of unresolved if nodes */ + *l; /* Temporary list */ + BB * currNode, /* Pointer to current node */ + * pbb; + + /* Linear scan of nodes in reverse dfsLast order */ + for (curr = pProc->numBBs - 1; curr >= 0; curr--) + { + currNode = pProc->dfsLast[curr]; + if (currNode->flg & INVALID_BB) /* Do not process invalid BBs */ + continue; + + if ((currNode->nodeType == TWO_BRANCH) && + (! (pProc->Icode.GetLlFlag(currNode->start + currNode->length - 1) + & JX_LOOP))) + { + followInEdges = 0; + follow = 0; + + /* Find all nodes that have this node as immediate dominator */ + for (desc = curr+1; desc < pProc->numBBs; desc++) + { + if (pProc->dfsLast[desc]->immedDom == curr) { + insertList (domDesc, desc); + pbb = pProc->dfsLast[desc]; + if ((pbb->numInEdges - pbb->numBackEdges) >= followInEdges) + { + follow = desc; + followInEdges = pbb->numInEdges - pbb->numBackEdges; + } + } + } + + /* Determine follow according to number of descendants + * immediately dominated by this node */ + if ((follow != 0) && (followInEdges > 1)) + { + currNode->ifFollow = follow; + if (!unresolved.empty()) + flagNodes (unresolved, follow, pProc); + } + else + insertList (unresolved, curr); + } + freeList (domDesc); + } +} + + +/* Checks for compound conditions of basic blocks that have only 1 high + * level instruction. Whenever these blocks are found, they are merged + * into one block with the appropriate condition */ +void Function::compoundCond() +{ + Int i, j, k, numOutEdges; + BB * pbb, * t, * e, * obb,* pred; + ICODE * picode, * ticode; + COND_EXPR *exp; + TYPEADR_TYPE *edges; + boolT change; + + change = TRUE; + while (change) + { + change = FALSE; + + /* Traverse nodes in postorder, this way, the header node of a + * compound condition is analysed first */ + for (i = 0; i < this->numBBs; i++) + { + pbb = this->dfsLast[i]; + if (pbb->flg & INVALID_BB) + continue; + + if (pbb->nodeType == TWO_BRANCH) + { + t = pbb->edges[THEN].BBptr; + e = pbb->edges[ELSE].BBptr; + + /* Check (X || Y) case */ + if ((t->nodeType == TWO_BRANCH) && (t->numHlIcodes == 1) && + (t->numInEdges == 1) && (t->edges[ELSE].BBptr == e)) + { + obb = t->edges[THEN].BBptr; + + /* Construct compound DBL_OR expression */ + picode = this->Icode.GetIcode(pbb->start + pbb->length -1); + ticode = this->Icode.GetIcode(t->start + t->length -1); + exp = COND_EXPR::boolOp (picode->ic.hl.oper.exp, + ticode->ic.hl.oper.exp, DBL_OR); + picode->ic.hl.oper.exp = exp; + + /* Replace in-edge to obb from t to pbb */ + for (j = 0; j < obb->numInEdges; j++) + if (obb->inEdges[j] == t) + { + obb->inEdges[j] = pbb; + break; + } + + /* New THEN out-edge of pbb */ + pbb->edges[THEN].BBptr = obb; + + /* Remove in-edge t to e */ + auto iter=std::find(e->inEdges.begin(),e->inEdges.end(),t); + assert(iter!=e->inEdges.end()); + e->inEdges.erase(iter); + e->numInEdges--; /* looses 1 arc */ + assert(e->numInEdges==e->inEdges.size()); + t->flg |= INVALID_BB; + + if (pbb->flg & IS_LATCH_NODE) + this->dfsLast[t->dfsLastNum] = pbb; + else + i--; /* to repeat this analysis */ + + change = TRUE; + } + + /* Check (!X && Y) case */ + else if ((t->nodeType == TWO_BRANCH) && (t->numHlIcodes == 1) && + (t->numInEdges == 1) && (t->edges[THEN].BBptr == e)) + { + obb = t->edges[ELSE].BBptr; + + /* Construct compound DBL_AND expression */ + picode = this->Icode.GetIcode(pbb->start + pbb->length -1); + ticode = this->Icode.GetIcode(t->start + t->length -1); + inverseCondOp (&picode->ic.hl.oper.exp); + exp = COND_EXPR::boolOp (picode->ic.hl.oper.exp, + ticode->ic.hl.oper.exp, DBL_AND); + picode->ic.hl.oper.exp = exp; + + /* Replace in-edge to obb from t to pbb */ + auto iter=std::find(obb->inEdges.begin(),obb->inEdges.end(),t); + assert(iter!=obb->inEdges.end()); + *iter=pbb; + + /* New THEN and ELSE out-edges of pbb */ + pbb->edges[THEN].BBptr = e; + pbb->edges[ELSE].BBptr = obb; + + /* Remove in-edge t to e */ + iter=std::find(e->inEdges.begin(),e->inEdges.end(),t); + assert(iter!=e->inEdges.end()); + e->inEdges.erase(iter); /* looses 1 arc */ + e->numInEdges--; /* looses 1 arc */ + assert(t->inEdges.size()==t->numInEdges); + t->flg |= INVALID_BB; + + if (pbb->flg & IS_LATCH_NODE) + this->dfsLast[t->dfsLastNum] = pbb; + else + i--; /* to repeat this analysis */ + + change = TRUE; + } + + /* Check (X && Y) case */ + else if ((e->nodeType == TWO_BRANCH) && (e->numHlIcodes == 1) && + (e->numInEdges == 1) && (e->edges[THEN].BBptr == t)) + { + obb = e->edges[ELSE].BBptr; + + /* Construct compound DBL_AND expression */ + picode = this->Icode.GetIcode(pbb->start + pbb->length -1); + ticode = this->Icode.GetIcode(t->start + t->length -1); + exp = COND_EXPR::boolOp (picode->ic.hl.oper.exp, + ticode->ic.hl.oper.exp, DBL_AND); + picode->ic.hl.oper.exp = exp; + + /* Replace in-edge to obb from e to pbb */ + auto iter = std::find(obb->inEdges.begin(),obb->inEdges.end(),e); + assert(iter!=obb->inEdges.end()); + *iter=pbb; + /* New ELSE out-edge of pbb */ + pbb->edges[ELSE].BBptr = obb; + + /* Remove in-edge e to t */ + iter = std::find(t->inEdges.begin(),t->inEdges.end(),e); + assert(iter!=t->inEdges.end()); + t->inEdges.erase(iter); + t->numInEdges--; /* looses 1 arc */ + assert(t->inEdges.size()==t->numInEdges); + e->flg |= INVALID_BB; + + if (pbb->flg & IS_LATCH_NODE) + this->dfsLast[e->dfsLastNum] = pbb; + else + i--; /* to repeat this analysis */ + + change = TRUE; + } + + /* Check (!X || Y) case */ + else if ((e->nodeType == TWO_BRANCH) && (e->numHlIcodes == 1) && + (e->numInEdges == 1) && (e->edges[ELSE].BBptr == t)) + { + obb = e->edges[THEN].BBptr; + + /* Construct compound DBL_OR expression */ + picode = this->Icode.GetIcode(pbb->start + pbb->length -1); + ticode = this->Icode.GetIcode(t->start + t->length -1); + inverseCondOp (&picode->ic.hl.oper.exp); + exp = COND_EXPR::boolOp (picode->ic.hl.oper.exp, + ticode->ic.hl.oper.exp, DBL_OR); + picode->ic.hl.oper.exp = exp; + + /* Replace in-edge to obb from e to pbb */ + assert(obb->numInEdges==obb->inEdges.size()); + auto iter = std::find(obb->inEdges.begin(),obb->inEdges.end(),e); + assert(iter!=obb->inEdges.end()); + *iter=pbb; + + /* New THEN and ELSE out-edges of pbb */ + pbb->edges[THEN].BBptr = obb; + pbb->edges[ELSE].BBptr = t; + + /* Remove in-edge e to t */ + iter = std::find(t->inEdges.begin(),t->inEdges.end(),e); + assert(iter!=t->inEdges.end()); + t->inEdges.erase(iter); + t->numInEdges--; /* looses 1 arc */ + assert(t->numInEdges=t->inEdges.size()); + e->flg |= INVALID_BB; + + if (pbb->flg & IS_LATCH_NODE) + this->dfsLast[e->dfsLastNum] = pbb; + else + i--; /* to repeat this analysis */ + + change = TRUE; + } + } + } + } +} + + +void Function::structure(derSeq *derivedG) +/* Structuring algorithm to find the structures of the graph pProc->cfg */ +{ + /* Find immediate dominators of the graph */ + findImmedDom(); + if (hasCase) + structCases(this); + structLoops(this, derivedG); + structIfs(this); +} diff --git a/src/dataflow.cpp b/src/dataflow.cpp new file mode 100644 index 0000000..031f84c --- /dev/null +++ b/src/dataflow.cpp @@ -0,0 +1,1099 @@ +/***************************************************************************** + * Project: dcc + * File: dataflow.c + * Purpose: Data flow analysis module. + * (C) Cristina Cifuentes + ****************************************************************************/ + +#include "dcc.h" +#include +#include + + +/* Returns the index of the local variable or parameter at offset off, if it + * is in the stack frame provided. */ +Int STKFRAME::getLocVar(Int off) +{ Int i; + + for (i = 0; i < sym.size(); i++) + if (sym[i].off == off) + break; + return (i); +} + + +/* Returns a string with the source operand of Icode */ +static COND_EXPR *srcIdent (ICODE * Icode, Function * pProc, Int i, ICODE * duIcode, operDu du) +{ COND_EXPR *n; + + if (Icode->ic.ll.flg & I) /* immediate operand */ + { + if (Icode->ic.ll.flg & B) + n = COND_EXPR::idKte (Icode->ic.ll.immed.op, 1); + else + n = COND_EXPR::idKte (Icode->ic.ll.immed.op, 2); + } + else + n = COND_EXPR::id (Icode, SRC, pProc, i, duIcode, du); + return (n); +} + + +/* Returns the destination operand */ +static COND_EXPR *dstIdent (ICODE * pIcode, Function * pProc, Int i, ICODE * duIcode, + operDu du) +{ COND_EXPR *n; + + n = COND_EXPR::id (pIcode, DST, pProc, i, duIcode, du); + /** Is it needed? (pIcode->ic.ll.flg) & NO_SRC_B **/ + return (n); +} + + + +/* Eliminates all condition codes and generates new hlIcode instructions */ +void Function::elimCondCodes () +{ + Int i, + useAt, /* Index to instruction that used flag */ + defAt; /* Index to instruction that defined flag */ + byte use; /* Used flags bit vector */ + byte def; /* Defined flags bit vector */ + boolT notSup; /* Use/def combination not supported */ + COND_EXPR *rhs; /* Source operand */ + COND_EXPR *lhs; /* Destination operand */ + COND_EXPR *exp; /* Boolean expression */ + BB * pBB; /* Pointer to BBs in dfs last ordering */ + ICODE *prev; /* For extended basic blocks - previous icode inst */ + + for (i = 0; i < numBBs; i++) + { + pBB = dfsLast[i]; + if (pBB->flg & INVALID_BB) continue; /* Do not process invalid BBs */ + + for (useAt = pBB->start + pBB->length; useAt != pBB->start; useAt--) + if ((Icode.GetIcode(useAt-1)->type == LOW_LEVEL) && + (Icode.GetIcode(useAt-1)->invalid == FALSE) && + (use = Icode.GetIcode(useAt-1)->ic.ll.flagDU.u)) + { + /* Find definition within the same basic block */ + for (defAt = useAt-1; defAt != pBB->start; defAt--) + { + def = Icode.GetIcode(defAt-1)->ic.ll.flagDU.d; + if ((use & def) == use) + { + notSup = FALSE; + if ((Icode.GetLlOpcode(useAt-1) >= iJB) && + (Icode.GetLlOpcode(useAt-1) <= iJNS)) + { + switch (Icode.GetLlOpcode(defAt-1)) + { + case iCMP: + rhs = srcIdent (Icode.GetIcode(defAt-1), + this, defAt-1, + Icode.GetIcode(useAt-1), eUSE); + lhs = dstIdent (Icode.GetIcode(defAt-1), + this, defAt-1, + Icode.GetIcode(useAt-1), eUSE); + break; + + case iOR: + lhs = Icode.GetIcode(defAt-1)->ic.hl.oper.asgn.lhs->clone(); + copyDU (Icode.GetIcode(useAt-1),Icode.GetIcode(defAt-1), eUSE, eDEF); + if (Icode.GetLlFlag(defAt-1) & B) + rhs = COND_EXPR::idKte (0, 1); + else + rhs = COND_EXPR::idKte (0, 2); + break; + + case iTEST: + rhs = srcIdent (Icode.GetIcode(defAt-1), + this, defAt-1, + Icode.GetIcode(useAt-1), eUSE); + lhs = dstIdent (Icode.GetIcode(defAt-1), + this, defAt-1, + Icode.GetIcode(useAt-1), eUSE); + lhs = COND_EXPR::boolOp (lhs, rhs, AND); + if (Icode.GetLlFlag(defAt-1) & B) + rhs = COND_EXPR::idKte (0, 1); + else + rhs = COND_EXPR::idKte (0, 2); + break; + + default: + notSup = TRUE; + reportError (JX_NOT_DEF, Icode.GetLlOpcode(defAt-1)); + flg |= PROC_ASM; /* generate asm */ + } + if (! notSup) + { + exp = COND_EXPR::boolOp (lhs, rhs, + condOpJCond[Icode.GetLlOpcode(useAt-1)-iJB]); + Icode.GetIcode(useAt-1)->setJCond(exp); + } + } + + else if (Icode.GetLlOpcode(useAt-1) == iJCXZ) + { + lhs = COND_EXPR::idReg (rCX, 0, &localId); + Icode.GetIcode(useAt-1)->setRegDU (rCX, eUSE); + rhs = COND_EXPR::idKte (0, 2); + exp = COND_EXPR::boolOp (lhs, rhs, EQUAL); + Icode.GetIcode(useAt-1)->setJCond(exp); + } + + else + { + reportError (NOT_DEF_USE, + Icode.GetLlOpcode(defAt-1), + Icode.GetLlOpcode(useAt-1)); + flg |= PROC_ASM; /* generate asm */ + } + break; + } + } + + /* Check for extended basic block */ + if ((pBB->length == 1) && + (Icode.GetLlOpcode(useAt-1) >= iJB) && + (Icode.GetLlOpcode(useAt-1) <= iJNS)) + { + prev = Icode.GetIcode(pBB->inEdges[0]->start + + pBB->inEdges[0]->length - 1); + if (prev->ic.hl.opcode == HLI_JCOND) + { + exp = prev->ic.hl.oper.exp->clone(); + exp->changeBoolOp (condOpJCond[Icode.GetLlOpcode(useAt-1)-iJB]); + copyDU (Icode.GetIcode(useAt-1), prev, eUSE, eUSE); + Icode.GetIcode(useAt-1)->setJCond(exp); + } + } + /* Error - definition not found for use of a cond code */ + else if (defAt == pBB->start) + fatalError (DEF_NOT_FOUND, + Icode.GetLlOpcode(useAt-1)); + } + } +} + + +/* Generates the LiveUse() and Def() sets for each basic block in the graph. + * Note: these sets are constant and could have been constructed during + * the construction of the graph, but since the code hasn't been + * analyzed yet for idioms, the procedure preamble misleads the + * analysis (eg: push si, would include si in LiveUse; although it + * is not really meant to be a register that is used before defined). */ +void Function::genLiveKtes () +{ Int i, j; + BB * pbb; + ICODE * picode; + dword liveUse, def; + + for (i = 0; i < numBBs; i++) + { + liveUse = def = 0; + pbb = dfsLast[i]; + if (pbb->flg & INVALID_BB) + continue; /* skip invalid BBs */ + for (j = pbb->start; j < (pbb->start + pbb->length); j++) + { + picode = Icode.GetIcode(j); + if ((picode->type == HIGH_LEVEL) && (picode->invalid == FALSE)) + { + liveUse |= (picode->du.use & ~def); + def |= picode->du.def; + } + } + pbb->liveUse = liveUse; + pbb->def = def; + } +} + + +/* Generates the liveIn() and liveOut() sets for each basic block via an + * iterative approach. + * Propagates register usage information to the procedure call. */ +void Function::liveRegAnalysis (dword in_liveOut) +{ + Int i, j; + BB * pbb=0; /* pointer to current basic block */ + Function * pcallee; /* invoked subroutine */ + ICODE *ticode, /* icode that invokes a subroutine */ + *picode; /* icode of function return */ + dword prevLiveOut, /* previous live out */ + prevLiveIn; /* previous live in */ + boolT change; /* is there change in the live sets?*/ + + /* liveOut for this procedure */ + liveOut = in_liveOut; + + change = TRUE; + while (change) + { + /* Process nodes in reverse postorder order */ + change = FALSE; + for (i = numBBs; i > 0; i--) + { + pbb = dfsLast[i-1]; + if (pbb->flg & INVALID_BB) /* Do not process invalid BBs */ + continue; + + /* Get current liveIn() and liveOut() sets */ + prevLiveIn = pbb->liveIn; + prevLiveOut = pbb->liveOut; + + /* liveOut(b) = U LiveIn(s); where s is successor(b) + * liveOut(b) = {liveOut}; when b is a HLI_RET node */ + if (pbb->numOutEdges == 0) /* HLI_RET node */ + { + pbb->liveOut = in_liveOut; + + /* Get return expression of function */ + if (flg & PROC_IS_FUNC) + { + picode = Icode.GetIcode(pbb->start + pbb->length - 1); + if (picode->ic.hl.opcode == HLI_RET) + { + picode->ic.hl.oper.exp = COND_EXPR::idID (&retVal, + &localId, pbb->start + pbb->length - 1); + picode->du.use = in_liveOut; + } + } + } + else /* Check successors */ + { + for (j = 0; j < pbb->numOutEdges; j++) + pbb->liveOut |= pbb->edges[j].BBptr->liveIn; + + /* propagate to invoked procedure */ + if (pbb->nodeType == CALL_NODE) + { + ticode = Icode.GetIcode(pbb->start + pbb->length - 1); + pcallee = ticode->ic.hl.oper.call.proc; + + /* user/runtime routine */ + if (! (pcallee->flg & PROC_ISLIB)) + { + if (pcallee->liveAnal == FALSE) /* hasn't been processed */ + pcallee->dataFlow (pbb->liveOut); + pbb->liveOut = pcallee->liveIn; + } + else /* library routine */ + { + if ((pcallee->flg & PROC_IS_FUNC) && /* returns a value */ + (pcallee->liveOut & pbb->edges[0].BBptr->liveIn)) + pbb->liveOut = pcallee->liveOut; + else + pbb->liveOut = 0; + } + + if ((! (pcallee->flg & PROC_ISLIB)) || (pbb->liveOut != 0)) + { + switch (pcallee->retVal.type) { + case TYPE_LONG_SIGN: case TYPE_LONG_UNSIGN: + ticode->du1.numRegsDef = 2; + break; + case TYPE_WORD_SIGN: case TYPE_WORD_UNSIGN: + case TYPE_BYTE_SIGN: case TYPE_BYTE_UNSIGN: + ticode->du1.numRegsDef = 1; + break; + } /*eos*/ + + /* Propagate def/use results to calling icode */ + ticode->du.use = pcallee->liveIn; + ticode->du.def = pcallee->liveOut; + } + } + } + + /* liveIn(b) = liveUse(b) U (liveOut(b) - def(b) */ + pbb->liveIn = pbb->liveUse | (pbb->liveOut & ~pbb->def); + + /* Check if live sets have been modified */ + if ((prevLiveIn != pbb->liveIn) || (prevLiveOut != pbb->liveOut)) + change = TRUE; + } + } + + /* Propagate liveIn(b) to procedure header */ + if (pbb->liveIn != 0) /* uses registers */ + liveIn = pbb->liveIn; + + /* Remove any references to register variables */ + if (flg & SI_REGVAR) + { + liveIn &= maskDuReg[rSI]; + pbb->liveIn &= maskDuReg[rSI]; + } + if (flg & DI_REGVAR) + { + liveIn &= maskDuReg[rDI]; + pbb->liveIn &= maskDuReg[rDI]; + } +} + + +/* Generates the du chain of each instruction in a basic block */ +void Function::genDU1 () +{ + byte regi; /* Register that was defined */ + Int i, j, k, p, n, lastInst, defRegIdx, useIdx; + ICODE * picode, *ticode;/* Current and target bb */ + BB * pbb, *tbb; /* Current and target basic block */ + boolT res; + COND_EXPR *exp, *lhs; + + /* Traverse tree in dfsLast order */ + for (i = 0; i < numBBs; i++) + { + pbb = dfsLast[i]; + if (pbb->flg & INVALID_BB) continue; + + /* Process each register definition of a HIGH_LEVEL icode instruction. + * Note that register variables should not be considered registers. + */ + lastInst = pbb->start + pbb->length; + for (j = pbb->start; j < lastInst; j++) + { + picode = Icode.GetIcode(j); + if (picode->type == HIGH_LEVEL) + { + regi = 0; + defRegIdx = 0; + for (k = 0; k < INDEXBASE; k++) + { + if ((picode->du.def & power2(k)) != 0) + { + regi = (byte)(k + 1); /* defined register */ + picode->du1.regi[defRegIdx] = regi; + + /* Check remaining instructions of the BB for all uses + * of register regi, before any definitions of the + * register */ + if ((regi == rDI) && (flg & DI_REGVAR)) + continue; + if ((regi == rSI) && (flg & SI_REGVAR)) + continue; + if ((j + 1) < lastInst) /* several instructions */ + { + useIdx = 0; + for (n = j+1; n < lastInst; n++) + { + /* Only check uses of HIGH_LEVEL icodes */ + ticode = Icode.GetIcode(n); + if (ticode->type == HIGH_LEVEL) + { + /* if used, get icode index */ + if (ticode->du.use & duReg[regi]) + picode->du1.idx[defRegIdx][useIdx++] = n; + + /* if defined, stop finding uses for this reg */ + if (ticode->du.def & duReg[regi]) + break; + } + } + + /* Check if last definition of this register */ + if ((! (ticode->du.def & duReg[regi])) && + (pbb->liveOut & duReg[regi])) + picode->du.lastDefRegi |= duReg[regi]; + } + else /* only 1 instruction in this basic block */ + { + /* Check if last definition of this register */ + if (pbb->liveOut & duReg[regi]) + picode->du.lastDefRegi |= duReg[regi]; + } + + /* Find target icode for HLI_CALL icodes to procedures + * that are functions. The target icode is in the + * next basic block (unoptimized code) or somewhere else + * on optimized code. */ + if ((picode->ic.hl.opcode == HLI_CALL) && + (picode->ic.hl.oper.call.proc->flg & PROC_IS_FUNC)) + { + tbb = pbb->edges[0].BBptr; + useIdx = 0; + for (n = tbb->start; n < tbb->start + tbb->length; n++) + { + ticode = Icode.GetIcode(n); + if (ticode->type == HIGH_LEVEL) + { + /* if used, get icode index */ + if (ticode->du.use & duReg[regi]) + picode->du1.idx[defRegIdx][useIdx++] = n; + + /* if defined, stop finding uses for this reg */ + if (ticode->du.def & duReg[regi]) + break; + } + } + + /* if not used in this basic block, check if the + * register is live out, if so, make it the last + * definition of this register */ + if ((picode->du1.idx[defRegIdx][useIdx] == 0) && + (tbb->liveOut & duReg[regi])) + picode->du.lastDefRegi |= duReg[regi]; + } + + /* If not used within this bb or in successors of this + * bb (ie. not in liveOut), then register is useless, + * thus remove it. Also check that this is not a return + * from a library function (routines such as printf + * return an integer, which is normally not taken into + * account by the programmer). */ + if ((picode->invalid == FALSE) && + (picode->du1.idx[defRegIdx][0] == 0) && + (! (picode->du.lastDefRegi & duReg[regi])) && + // (! ((picode->ic.hl.opcode != HLI_CALL) && + (! ((picode->ic.hl.opcode == HLI_CALL) && + (picode->ic.hl.oper.call.proc->flg & PROC_ISLIB)))) + { + if (! (pbb->liveOut & duReg[regi])) /* not liveOut */ + { + res = removeDefRegi (regi, picode, defRegIdx+1, + &localId); + + /* Backpatch any uses of this instruction, within + * the same BB, if the instruction was invalidated */ + if (res == TRUE) + for (p = j; p > pbb->start; p--) + { + ticode = Icode.GetIcode(p-1); + for (n = 0; n < MAX_USES; n++) + { + if (ticode->du1.idx[0][n] == j) + { + if (n < MAX_USES - 1) + { + memmove (&ticode->du1.idx[0][n], + &ticode->du1.idx[0][n+1], + (size_t)((MAX_USES - n - 1) * sizeof(Int))); + n--; + } + ticode->du1.idx[0][MAX_USES - 1] = 0; + } + } + } + } + else /* liveOut */ + picode->du.lastDefRegi |= duReg[regi]; + } + defRegIdx++; + + /* Check if all defined registers have been processed */ + if ((defRegIdx >= picode->du1.numRegsDef) || + (defRegIdx == MAX_REGS_DEF)) + break; + } + } + } + } + } + +} + + +/* Substitutes the rhs (or lhs if rhs not possible) of ticode for the rhs + * of picode. */ +static void forwardSubs (COND_EXPR *lhs, COND_EXPR *rhs, ICODE * picode, + ICODE * ticode, LOCAL_ID *locsym, Int *numHlIcodes) +{ + boolT res; + + if (rhs == NULL) /* In case expression popped is NULL */ + return; + + /* Insert on rhs of ticode, if possible */ + res = insertSubTreeReg (rhs, &ticode->ic.hl.oper.asgn.rhs, + locsym->id_arr[lhs->expr.ident.idNode.regiIdx].id.regi, + locsym); + if (res) + { + picode->invalidate(); + (*numHlIcodes)--; + } + else + { + /* Try to insert it on lhs of ticode*/ + res = insertSubTreeReg (rhs, &ticode->ic.hl.oper.asgn.lhs, + locsym->id_arr[lhs->expr.ident.idNode.regiIdx].id.regi, + locsym); + if (res) + { + picode->invalidate(); + (*numHlIcodes)--; + } + } +} + + +/* Substitutes the rhs (or lhs if rhs not possible) of ticode for the + * expression exp given */ +static void forwardSubsLong (Int longIdx, COND_EXPR *exp, ICODE * picode, + ICODE * ticode, Int *numHlIcodes) +{ boolT res; + + if (exp == NULL) /* In case expression popped is NULL */ + return; + + /* Insert on rhs of ticode, if possible */ + res = insertSubTreeLongReg (exp, &ticode->ic.hl.oper.asgn.rhs, longIdx); + if (res) + { + picode->invalidate(); + (*numHlIcodes)--; + } + else + { + /* Try to insert it on lhs of ticode*/ + res = insertSubTreeLongReg (exp, &ticode->ic.hl.oper.asgn.lhs, longIdx); + if (res) + { + picode->invalidate(); + (*numHlIcodes)--; + } + } +} + + +/* Returns whether the elements of the expression rhs are all x-clear from + * instruction f up to instruction t. */ +static boolT xClear (COND_EXPR *rhs, Int f, Int t, Int lastBBinst, Function * pproc) +{ Int i; + boolT res; + byte regi; + ICODE * picode; + + if (rhs == NULL) + return (FALSE); + + switch (rhs->type) { + case IDENTIFIER: + if (rhs->expr.ident.idType == REGISTER) + { + picode = pproc->Icode.GetFirstIcode(); + regi= pproc->localId.id_arr[rhs->expr.ident.idNode.regiIdx].id.regi; + for (i = (f + 1); (i < lastBBinst) && (i < t); i++) + if ((picode[i].type == HIGH_LEVEL) && + (picode[i].invalid == FALSE)) + { + if (picode[i].du.def & duReg[regi]) + return (FALSE); + } + if (i < lastBBinst) + return (TRUE); + else + return (FALSE); + } + else + return (TRUE); + /* else if (rhs->expr.ident.idType == LONG_VAR) + { +missing all other identifiers **** + } */ + + case BOOLEAN_OP: + res = xClear (rhs->expr.boolExpr.rhs, f, t, lastBBinst, pproc); + if (res == FALSE) + return (FALSE); + return (xClear (rhs->expr.boolExpr.lhs, f, t, lastBBinst, pproc)); + + case NEGATION: + case ADDRESSOF: + case DEREFERENCE: + return (xClear (rhs->expr.unaryExp, f, t, lastBBinst, pproc)); + } /* eos */ + return FALSE; +} + + +/* Checks the type of the formal argument as against to the actual argument, + * whenever possible, and then places the actual argument on the procedure's + * argument list. */ +static void processCArg (Function * pp, Function * pProc, ICODE * picode, Int numArgs, Int *k) +{ + COND_EXPR *exp; + boolT res; + + /* if (numArgs == 0) + return; */ + + exp = popExpStk(); + if (pp->flg & PROC_ISLIB) /* library function */ + { + if (pp->args.numArgs > 0) + if (pp->flg & PROC_VARARG) + { + if (numArgs < pp->args.sym.size()) + adjustActArgType (exp, pp->args.sym[numArgs].type, pProc); + } + else + adjustActArgType (exp, pp->args.sym[numArgs].type, pProc); + res = newStkArg (picode, exp, picode->ic.ll.opcode, pProc); + } + else /* user function */ + { + if (pp->args.numArgs > 0) + pp->args.adjustForArgType (numArgs, expType (exp, pProc)); + res = newStkArg (picode, exp, picode->ic.ll.opcode, pProc); + } + + /* Do not update the size of k if the expression was a segment register + * in a near call */ + if (res == FALSE) + *k += hlTypeSize (exp, pProc); +} + + +/* Eliminates extraneous intermediate icode instructions when finding + * expressions. Generates new hlIcodes in the form of expression trees. + * For HLI_CALL hlIcodes, places the arguments in the argument list. */ +void Function::findExps() +{ + Int i, j, k, lastInst, lastInstN, numHlIcodes; + ICODE * picode, /* Current icode */ + * ticode; /* Target icode */ + BB * pbb; /* Current and next basic block */ + boolT res; + COND_EXPR *exp, /* expression pointer - for HLI_POP and HLI_CALL */ + *lhs; /* exp ptr for return value of a HLI_CALL */ + STKFRAME * args; /* pointer to arguments - for HLI_CALL */ + byte regi, regi2; /* register(s) to be forward substituted */ + ID *retVal; /* function return value */ + + /* Initialize expression stack */ + initExpStk(); + + /* Traverse tree in dfsLast order */ + for (i = 0; i < numBBs; i++) + { + /* Process one BB */ + pbb = dfsLast[i]; + if (pbb->flg & INVALID_BB) continue; + lastInst = pbb->start + pbb->length; + numHlIcodes = 0; + for (j = pbb->start; j < lastInst; j++) + { + picode = Icode.GetIcode(j); + if ((picode->type == HIGH_LEVEL) && (picode->invalid == FALSE)) + { + numHlIcodes++; + if (picode->du1.numRegsDef == 1) /* byte/word regs */ + { + /* Check for only one use of this register. If this is + * the last definition of the register in this BB, check + * that it is not liveOut from this basic block */ + if ((picode->du1.idx[0][0] != 0) && + (picode->du1.idx[0][1] == 0)) + { + /* Check that this register is not liveOut, if it + * is the last definition of the register */ + regi = picode->du1.regi[0]; + + /* Check if we can forward substitute this register */ + switch (picode->ic.hl.opcode) { + case HLI_ASSIGN: + /* Replace rhs of current icode into target + * icode expression */ + ticode = Icode.GetIcode(picode->du1.idx[0][0]); + if ((picode->du.lastDefRegi & duReg[regi]) && + ((ticode->ic.hl.opcode != HLI_CALL) && + (ticode->ic.hl.opcode != HLI_RET))) + continue; + + if (xClear (picode->ic.hl.oper.asgn.rhs, j, + picode->du1.idx[0][0], lastInst, this)) + { + switch (ticode->ic.hl.opcode) { + case HLI_ASSIGN: + forwardSubs (picode->ic.hl.oper.asgn.lhs, + picode->ic.hl.oper.asgn.rhs, + picode, ticode, &localId, + &numHlIcodes); + break; + + case HLI_JCOND: case HLI_PUSH: case HLI_RET: + res = insertSubTreeReg ( + picode->ic.hl.oper.asgn.rhs, + &ticode->ic.hl.oper.exp, + localId.id_arr[picode->ic.hl.oper.asgn.lhs->expr.ident.idNode.regiIdx].id.regi, + &localId); + if (res) + { + picode->invalidate(); + numHlIcodes--; + } + break; + + case HLI_CALL: /* register arguments */ + newRegArg (this, picode, ticode); + picode->invalidate(); + numHlIcodes--; + break; + } /* eos */ + } + break; + + case HLI_POP: + ticode = Icode.GetIcode(picode->du1.idx[0][0]); + if ((picode->du.lastDefRegi & duReg[regi]) && + ((ticode->ic.hl.opcode != HLI_CALL) && + (ticode->ic.hl.opcode != HLI_RET))) + continue; + + exp = popExpStk(); /* pop last exp pushed */ + switch (ticode->ic.hl.opcode) { + case HLI_ASSIGN: + forwardSubs (picode->ic.hl.oper.exp, exp, + picode, ticode, &localId, + &numHlIcodes); + break; + + case HLI_JCOND: case HLI_PUSH: case HLI_RET: + res = insertSubTreeReg (exp, + &ticode->ic.hl.oper.exp, + localId.id_arr[picode->ic.hl.oper.exp->expr.ident.idNode.regiIdx].id.regi, + &localId); + if (res) + { + picode->invalidate(); + numHlIcodes--; + } + break; + + /****case HLI_CALL: /* register arguments + newRegArg (pProc, picode, ticode); + picode->invalidate(); + numHlIcodes--; + break; */ + } /* eos */ + break; + + case HLI_CALL: + ticode = Icode.GetIcode(picode->du1.idx[0][0]); + switch (ticode->ic.hl.opcode) { + case HLI_ASSIGN: + exp = COND_EXPR::idFunc ( + picode->ic.hl.oper.call.proc, + picode->ic.hl.oper.call.args); + res = insertSubTreeReg (exp, + &ticode->ic.hl.oper.asgn.rhs, + picode->ic.hl.oper.call.proc->retVal.id.regi, + &localId); + if (! res) + insertSubTreeReg (exp, + &ticode->ic.hl.oper.asgn.lhs, + picode->ic.hl.oper.call.proc->retVal.id.regi, + &localId); + /*** HERE missing: 2 regs ****/ + picode->invalidate(); + numHlIcodes--; + break; + + case HLI_PUSH: case HLI_RET: + exp = COND_EXPR::idFunc ( + picode->ic.hl.oper.call.proc, + picode->ic.hl.oper.call.args); + ticode->ic.hl.oper.exp = exp; + picode->invalidate(); + numHlIcodes--; + break; + + case HLI_JCOND: + exp = COND_EXPR::idFunc ( + picode->ic.hl.oper.call.proc, + picode->ic.hl.oper.call.args); + retVal = &picode->ic.hl.oper.call.proc->retVal, + res = insertSubTreeReg (exp, + &ticode->ic.hl.oper.exp, + retVal->id.regi, &localId); + if (res) /* was substituted */ + { + picode->invalidate(); + numHlIcodes--; + } + else /* cannot substitute function */ + { + lhs = COND_EXPR::idID(retVal,&localId,j); + picode->setAsgn(lhs, exp); + } + break; + } /* eos */ + break; + } /* eos */ + } + } + + else if (picode->du1.numRegsDef == 2) /* long regs */ + { + /* Check for only one use of these registers */ + if ((picode->du1.idx[0][0] != 0) && + (picode->du1.idx[0][1] == 0) && + (picode->du1.idx[1][0] != 0) && + (picode->du1.idx[1][1] == 0)) + { + switch (picode->ic.hl.opcode) { + case HLI_ASSIGN: + /* Replace rhs of current icode into target + * icode expression */ + if (picode->du1.idx[0][0] == picode->du1.idx[1][0]) + { + ticode = Icode.GetIcode(picode->du1.idx[0][0]); + if ((picode->du.lastDefRegi & duReg[regi]) && + ((ticode->ic.hl.opcode != HLI_CALL) && + (ticode->ic.hl.opcode != HLI_RET))) + continue; + + switch (ticode->ic.hl.opcode) { + case HLI_ASSIGN: + forwardSubsLong (picode->ic.hl.oper.asgn.lhs->expr.ident.idNode.longIdx, + picode->ic.hl.oper.asgn.rhs, picode, + ticode, &numHlIcodes); + break; + + case HLI_JCOND: case HLI_PUSH: case HLI_RET: + res = insertSubTreeLongReg ( + picode->ic.hl.oper.asgn.rhs, + &ticode->ic.hl.oper.exp, + picode->ic.hl.oper.asgn.lhs->expr.ident.idNode.longIdx); + if (res) + { + picode->invalidate(); + numHlIcodes--; + } + break; + + case HLI_CALL: /* register arguments */ + newRegArg (this, picode, ticode); + picode->invalidate(); + numHlIcodes--; + break; + } /* eos */ + } + break; + + case HLI_POP: + if (picode->du1.idx[0][0] == picode->du1.idx[1][0]) + { + ticode = Icode.GetIcode(picode->du1.idx[0][0]); + if ((picode->du.lastDefRegi & duReg[regi]) && + ((ticode->ic.hl.opcode != HLI_CALL) && + (ticode->ic.hl.opcode != HLI_RET))) + continue; + + exp = popExpStk(); /* pop last exp pushed */ + switch (ticode->ic.hl.opcode) { + case HLI_ASSIGN: + forwardSubsLong (picode->ic.hl.oper.exp->expr.ident.idNode.longIdx, + exp, picode, ticode, &numHlIcodes); + break; + case HLI_JCOND: case HLI_PUSH: + res = insertSubTreeLongReg (exp, + &ticode->ic.hl.oper.exp, + picode->ic.hl.oper.asgn.lhs->expr.ident.idNode.longIdx); + if (res) + { + picode->invalidate(); + numHlIcodes--; + } + break; + case HLI_CALL: /*** missing ***/ + break; + } /* eos */ + } + break; + + case HLI_CALL: /* check for function return */ + ticode = Icode.GetIcode(picode->du1.idx[0][0]); + switch (ticode->ic.hl.opcode) + { + case HLI_ASSIGN: + exp = COND_EXPR::idFunc ( + picode->ic.hl.oper.call.proc, + picode->ic.hl.oper.call.args); + ticode->ic.hl.oper.asgn.lhs = + COND_EXPR::idLong(&localId, DST, ticode, + HIGH_FIRST, j, eDEF, 1); + ticode->ic.hl.oper.asgn.rhs = exp; + picode->invalidate(); + numHlIcodes--; + break; + + case HLI_PUSH: case HLI_RET: + exp = COND_EXPR::idFunc ( + picode->ic.hl.oper.call.proc, + picode->ic.hl.oper.call.args); + ticode->ic.hl.oper.exp = exp; + picode->invalidate(); + numHlIcodes--; + break; + + case HLI_JCOND: + exp = COND_EXPR::idFunc ( + picode->ic.hl.oper.call.proc, + picode->ic.hl.oper.call.args); + retVal = &picode->ic.hl.oper.call.proc->retVal; + res = insertSubTreeLongReg (exp, + &ticode->ic.hl.oper.exp, + localId.newLongReg + ( + retVal->type, retVal->id.longId.h, + retVal->id.longId.l, j)); + if (res) /* was substituted */ + { + picode->invalidate(); + numHlIcodes--; + } + else /* cannot substitute function */ + { + lhs = COND_EXPR::idID(retVal,&localId,j); + picode->setAsgn(lhs, exp); + } + break; + } /* eos */ + } /* eos */ + } + } + + /* HLI_PUSH doesn't define any registers, only uses registers. + * Push the associated expression to the register on the local + * expression stack */ + else if (picode->ic.hl.opcode == HLI_PUSH) + { + pushExpStk (picode->ic.hl.oper.exp); + picode->invalidate(); + numHlIcodes--; + } + + /* For HLI_CALL instructions that use arguments from the stack, + * pop them from the expression stack and place them on the + * procedure's argument list */ + if ((picode->ic.hl.opcode == HLI_CALL) && + ! (picode->ic.hl.oper.call.proc->flg & REG_ARGS)) + { Function * pp; + Int cb, numArgs; + boolT res; + + pp = picode->ic.hl.oper.call.proc; + if (pp->flg & CALL_PASCAL) + { + cb = pp->cbParam; /* fixed # arguments */ + for (k = 0, numArgs = 0; k < cb; numArgs++) + { + exp = popExpStk(); + if (pp->flg & PROC_ISLIB) /* library function */ + { + if (pp->args.numArgs > 0) + adjustActArgType(exp, pp->args.sym[numArgs].type, this); + res = newStkArg (picode, exp, picode->ic.ll.opcode, this); + } + else /* user function */ + { + if (pp->args.numArgs >0) + pp->args.adjustForArgType (numArgs,expType (exp, this)); + res = newStkArg (picode, exp,picode->ic.ll.opcode, this); + } + if (res == FALSE) + k += hlTypeSize (exp, this); + } + } + else /* CALL_C */ + { + cb = picode->ic.hl.oper.call.args->cb; + numArgs = 0; + if (cb) + for (k = 0; k < cb; numArgs++) + processCArg (pp, this, picode, numArgs, &k); + else if ((cb == 0) && (picode->ic.ll.flg & REST_STK)) + while (! emptyExpStk()) + { + processCArg (pp, this, picode, numArgs, &k); + numArgs++; + } + } + } + + /* If we could not substitute the result of a function, + * assign it to the corresponding registers */ + if ((picode->ic.hl.opcode == HLI_CALL) && + ((picode->ic.hl.oper.call.proc->flg & PROC_ISLIB) != + PROC_ISLIB) && (picode->du1.idx[0][0] == 0) && + (picode->du1.numRegsDef > 0)) + { + exp = COND_EXPR::idFunc (picode->ic.hl.oper.call.proc, + picode->ic.hl.oper.call.args); + lhs = COND_EXPR::idID (&picode->ic.hl.oper.call.proc->retVal, + &localId, j); + picode->setAsgn(lhs, exp); + } + } + } + + /* Store number of high-level icodes in current basic block */ + pbb->numHlIcodes = numHlIcodes; + } +} + + +/* Invokes procedures related with data flow analysis. Works on a procedure + * at a time basis. + * Note: indirect recursion in liveRegAnalysis is possible. */ +void Function::dataFlow(dword liveOut) +{ + boolT isAx, isBx, isCx, isDx; + Int idx; + + /* Remove references to register variables */ + if (flg & SI_REGVAR) + liveOut &= maskDuReg[rSI]; + if (flg & DI_REGVAR) + liveOut &= maskDuReg[rDI]; + + /* Function - return value register(s) */ + if (liveOut != 0) + { + flg |= PROC_IS_FUNC; + isAx = (boolT)(liveOut & power2(rAX - rAX)); + isBx = (boolT)(liveOut & power2(rBX - rAX)); + isCx = (boolT)(liveOut & power2(rCX - rAX)); + isDx = (boolT)(liveOut & power2(rDX - rAX)); + + if (isAx && isDx) /* long or pointer */ + { + retVal.type = TYPE_LONG_SIGN; + retVal.loc = REG_FRAME; + retVal.id.longId.h = rDX; + retVal.id.longId.l = rAX; + idx = localId.newLongReg(TYPE_LONG_SIGN, rDX, rAX, 0); + localId.propLongId (rAX, rDX, "\0"); + } + else if (isAx || isBx || isCx || isDx) /* word */ + { + retVal.type = TYPE_WORD_SIGN; + retVal.loc = REG_FRAME; + if (isAx) + retVal.id.regi = rAX; + else if (isBx) + retVal.id.regi = rBX; + else if (isCx) + retVal.id.regi = rCX; + else + retVal.id.regi = rDX; + idx = localId.newByteWordReg(TYPE_WORD_SIGN,retVal.id.regi); + } + } + + /* Data flow analysis */ + liveAnal = TRUE; + elimCondCodes(); + genLiveKtes(); + liveRegAnalysis (liveOut); /* calls dataFlow() recursively */ + if (! (flg & PROC_ASM)) /* can generate C for pProc */ + { + genDU1 (); /* generate def/use level 1 chain */ + findExps (); /* forward substitution algorithm */ + } +} + diff --git a/src/dcc.cpp b/src/dcc.cpp new file mode 100644 index 0000000..e78d323 --- /dev/null +++ b/src/dcc.cpp @@ -0,0 +1,163 @@ +/***************************************************************************** + * dcc decompiler + * Reads the command line switches and then executes each major section in turn + * (C) Cristina Cifuentes + ****************************************************************************/ + +#include "dcc.h" +#include +#ifdef __UNIX__ +//#include +#else +#include +#include /* For unlink() */ +#endif + + +/* Global variables - extern to other modules */ +char *progname; /* argv[0] - for error msgs */ +char *asm1_name, *asm2_name; /* Assembler output filenames */ +SYMTAB symtab; /* Global symbol table */ +STATS stats; /* cfg statistics */ +PROG prog; /* programs fields */ +OPTION option; /* Command line options */ +//Function * pProcList; /* List of procedures, topologically sort */ +//Function * pLastProc; /* Pointer to last node in procedure list */ +std::list pProcList; +CALL_GRAPH *callGraph; /* Call graph of the program */ + +static char *initargs(int argc, char *argv[]); +static void displayTotalStats(); + + +/**************************************************************************** + * main + ***************************************************************************/ + + +int main(int argc, char *argv[]) +{ + /* Extract switches and filename */ + strcpy(option.filename, initargs(argc, argv)); + + /* Front end reads in EXE or COM file, parses it into I-code while + * building the call graph and attaching appropriate bits of code for + * each procedure. + */ + FrontEnd (option.filename, &callGraph); + + /* In the middle is a so called Universal Decompiling Machine. + * It processes the procedure list and I-code and attaches where it can + * to each procedure an optimised cfg and ud lists + */ + udm(); + + /* Back end converts each procedure into C using I-code, interval + * analysis, data flow etc. and outputs it to output file ready for + * re-compilation. + */ + BackEnd(option.filename, callGraph); + + callGraph->write(); + + if (option.Stats) + displayTotalStats(); + +/* + freeDataStructures(pProcList); +*/ + return 0; +} + +/**************************************************************************** + * initargs - Extract command line arguments + ***************************************************************************/ +static char *initargs(int argc, char *argv[]) +{ + char *pc; + progname = *argv; /* Save invocation name for error messages */ + + while (--argc > 0 && (*++argv)[0] == '-') { + for (pc = argv[0]+1; *pc; pc++) + switch (*pc) { + case 'a': /* Print assembler listing */ + if (*(pc+1) == '2') + option.asm2 = TRUE; + else + option.asm1 = TRUE; + if (*(pc+1) == '1' || *(pc+1) == '2') + pc++; + break; + case 'c': + option.Calls = TRUE; + break; + case 'i': + option.Interact = TRUE; + break; + case 'm': /* Print memory map */ + option.Map = TRUE; + break; + case 's': /* Print Stats */ + option.Stats = TRUE; + break; + case 'V': /* Very verbose => verbose */ + option.VeryVerbose = TRUE; + case 'v': /* Make everything verbose */ + option.verbose = TRUE; + break; + case 'o': /* assembler output file */ + if (*(pc+1)) { + asm1_name = asm2_name = pc+1; + goto NextArg; + } + else if (--argc > 0) { + asm1_name = asm2_name = *++argv; + goto NextArg; + } + default: + fatalError(INVALID_ARG, *pc); + return *argv; + } + NextArg:; + } + + if (argc == 1) + { + if (option.asm1 || option.asm2) + { + if (! asm1_name) + { + asm1_name = strcpy((char*)allocMem(strlen(*argv)+4), *argv); + pc = strrchr(asm1_name, '.'); + if (pc > strrchr(asm1_name, '/')) + { + *pc = '\0'; + } + asm2_name = (char*)allocMem(strlen(asm1_name)+4) ; + strcat(strcpy(asm2_name, asm1_name), ".a2"); + unlink(asm2_name); + strcat(asm1_name, ".a1"); + } + unlink(asm1_name); /* Remove asm output files */ + } + return *argv; /* filename of the program to decompile */ + } + + fatalError(USAGE); + return *argv; +} + +static void +displayTotalStats () +/* Displays final statistics for the complete program */ +{ + printf ("\nFinal Program Statistics\n"); + printf (" Total number of low-level Icodes : %ld\n", stats.totalLL); + printf (" Total number of high-level Icodes: %ld\n", stats.totalHL); + printf (" Total reduction of instructions : %2.2f%%\n", 100.0 - + (stats.totalHL * 100.0) / stats.totalLL); +} + + + + diff --git a/src/disassem.cpp b/src/disassem.cpp new file mode 100644 index 0000000..2bc0b36 --- /dev/null +++ b/src/disassem.cpp @@ -0,0 +1,1613 @@ +/**************************************************************************** + * dcc project disassembler + * (C) Cristina Cifuentes, Mike van Emmerik, Jeff Ledermann + ****************************************************************************/ +#include +#include "dcc.h" +#include "symtab.h" +#include +#include +#include /* For free() */ +#include +#ifdef _CONSOLE +#include /* For console mode routines */ +#endif +#include "disassem.h" +// Note: for the time being, there is no interactive disassembler +// for unix +#ifndef __UNIX__ +#include // getch() etc +#endif +using namespace std; + + +#define POS_LAB 15 /* Position of label */ +#define POS_OPC 20 /* Position of opcode */ +#define POS_OPR 25 /* Position of operand */ +#define WID_PTR 10 /* Width of the "xword ptr" lingo */ +#define POS_OPR2 POS_OPR+WID_PTR /* Position of operand after "xword ptr" */ +#define POS_CMT 54 /* Position of comment */ + + +#define DELTA_ICODE 16 /* Number of icodes to realloc by each time */ + +static const char *szOps[] = +{ + "CBW", "AAA", "AAD", "AAM", "AAS", "ADC", "ADD", "AND", + "BOUND","CALL", "CALL", "CLC", "CLD", "CLI", "CMC", "CMP", + "CMPS", "REPNE CMPS","REPE CMPS","DAA", "DAS", "DEC", "DIV", "ENTER", + "ESC", "HLT", "IDIV", "IMUL", "IN", "INC", "INS", "REP INS", + "INT", "IRET", "JB", "JBE", "JAE", "JA", "JE", "JNE", + "JL", "JGE", "JLE", "JG", "JS", "JNS", "JO", "JNO", + "JP", "JNP", "JCXZ", "JMP", "JMP", "LAHF", "LDS", "LEA", + "LEAVE","LES", "LOCK", "LODS", "REP LODS", "LOOP", "LOOPE","LOOPNE", + "MOV", "MOVS", "REP MOVS", "MUL", "NEG", "NOT", "OR", "OUT", + "OUTS", "REP OUTS", "POP", "POPA", "POPF", "PUSH", "PUSHA","PUSHF", + "RCL", "RCR", "ROL", "ROR", "RET", "RETF", "SAHF", "SAR", + "SHL", "SHR", "SBB", "SCAS", "REPNE SCAS","REPE SCAS", "CWD", "STC", + "STD", "STI", "STOS", "REP STOS", "SUB", "TEST", "WAIT", "XCHG", + "XLAT", "XOR", "INTO", "NOP", "REPNE", "REPE", "MOD" +}; + +/* The following opcodes are for mod != 3 */ +static const char *szFlops1[] = +{ + /* 0 1 2 3 4 5 6 7 */ + "FADD", "FMUL", "FCOM", "FCOMP", "FSUB", "FSUBR", "FDIV", "FDIVR", /* 00 */ + "FLD", "???", "FST", "???", "FLDENV","FLDCW", "FSTENV","FSTSW", /* 08 */ + "FIADD", "FIMUL", "FICOM","FICOMP","FISUB", "FISUBR","FIDIV", "FIDIVR", /* 10 */ + "FILD", "???", "FIST", "FISTP", "???", "???", "???", "FSTP", /* 18 */ + "FADD", "FMUL", "FCOM", "FCOMP", "FSUB", "FSUBR", "FDIV", "FDIVR", /* 20 */ + "FLD", "FLD", "FST", "FSTP", "FRESTOR","???", "FSAVE", "FSTSW", /* 28 */ + "FIADD", "FIMUL", "FICOM","FICOMP","FISUB", "FISUBR","FIDIV", "FIDIVR", /* 30 */ + "FILD", "???", "FIST", "FISTP", "FBLD", "???", "FBSTP", "FISTP" /* 38 */ +}; + +/* The following opcodes are for mod == 3 */ +static const char *szFlops2[] = +{ + /* 0 1 2 3 4 5 6 7 */ + "FADD", "FMUL", "FCOM", "FCOMP", "FSUB", "FSUBR", "FDIV", "FDIVR", /* 00 */ + "FLD", "FXCH", "FNOP", "???", "", "", "", "", /* 08 */ + "FIADD", "FIMUL", "FICOM","FICOMP","FISUB", "", "FIDIV", "FIDIVR", /* 10 */ + "FILD", "???", "FIST", "FISTP", "???", "???", "???", "FSTP", /* 18 */ + "FADD", "FMUL", "FCOM", "FCOMP", "FSUB", "FSUBR", "FDIV", "FDIVR", /* 20 */ + "FFREE", "FSTP", "FST", "???", "FUCOM", "FUCOMP","???", "???", /* 28 */ + "FADDP", "FMULP", "FICOM","", "FSUBRP","FISUBR","FDIVRP","FDIVP", /* 30 */ + "FILD", "???", "FIST", "FISTP", "", "???", "FBSTP", "FISTP" /* 38 */ +}; + +static const char *szFlops0C[] = +{ + "FCHS", "FABS", "???", "???", "FTST", "FXAM", "???", "???" +}; + +static const char *szFlops0D[] = +{ + "FLD1", "FLDL2T","FLDL2E","FLDP1", "FLDLG2","FLDLN2","FLDZ", "???" +}; + +static const char *szFlops0E[] = +{ + "F2XM1", "FYL2X", "FPTAN", "FPATAN","FXTRACT","FPREM1","FDECSTP","FINCSTP" +}; + +static const char *szFlops0F[] = +{ + "FPREM", "FYLXP1","FSQRT", "FSINCOS","FRNDINT","FSCALE","FSIN","FCOS" +}; + +static const char *szFlops15[] = +{ + "???", "FUCOMPP", "???", "???", "???", "???", "???", "???" +}; + +static const char *szFlops1C[] = +{ + "???", "???", "FCLEX", "FINIT", "FTST", "FXAM", "???", "???" +}; + +static const char *szFlops33[] = +{ + "???", "FCOMPP", "???", "???", "???", "???", "???", "???" +}; + +static const char *szFlops3C[] = +{ + "FSTSWAX","???", "???", "???", "???", "???", "???", "???" +}; + + +static const char *szIndex[8] = {"bx+si", "bx+di", "bp+si", "bp+di", "si", "di","bp","bx" }; +static const char *szBreg[8] = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh" }; +static const char *szWreg[12] = { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di", + "es", "cs", "ss", "ds" }; +static const char *szPtr[2] = { " word ptr ", " byte ptr " }; + + +static void dis1Line (Int i, boolT fWin, char attr, Int pass); +void dis1LineOp(Int i, boolT fWin, char attr, word *len, Function * pProc); +static void formatRM(char *p, flags32 flg, ICODEMEM* pm); +static char *strDst(flags32 flg, ICODEMEM *pm); +static char *strSrc(ICODE * pc); +static char *strHex(dword d); +static Int checkScanned(dword pcCur); +static void setProc(Function * proc); +static void dispData(word dataSeg); +static void flops(ICODE * pi); +boolT callArg(word off, char *temp); /* Check for procedure name */ + +static FILE *fp; +static ICODE * pc; +static char buf[200], *p; +static Int cb, j, numIcode, allocIcode, eop; +static vector pl; +static dword nextInst; +static boolT fImpure; +static Int lab, prevPass; +static Function * pProc; /* Points to current proc struct */ + +struct POSSTACK_ENTRY +{ + Int ic; /* An icode offset */ + Function * pProc; /* A pointer to a PROCEDURE structure */ +} ; +vector posStack; /* position stack */ +byte iPS; /* Index into the stack */ + +static char cbuf[256]; /* Has to be 256 for wgetstr() to work */ + +// These are "curses equivalent" functions. (Used to use curses for all this, +// but it was too much of a distribution hassle + +#if _CONSOLE +HANDLE hConsole; /* All 32 bit console style routines need this handle */ +#endif + +void attrSet(char attrib) +{ +#ifdef _CONSOLE + switch (attrib) + { + case A_NORMAL: + SetConsoleTextAttribute(hConsole,FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED); + break; + case A_REVERSE: + SetConsoleTextAttribute(hConsole,BACKGROUND_BLUE | BACKGROUND_GREEN | BACKGROUND_RED); + break; + case A_BOLD: + SetConsoleTextAttribute(hConsole,FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED |FOREGROUND_INTENSITY); + break; + } +#else + /* Set the attribute, using VT100 codes */ + switch (attrib) + { + case A_NORMAL: + printf("\033[0m"); + break; + case A_REVERSE: + printf("\033[7m"); + break; + case A_BOLD: + printf("\033[1m"); + break; + } +#endif +} + +#ifdef _CONSOLE +void initConsole() +{ + hConsole = GetStdHandle(STD_OUTPUT_HANDLE); +} +#endif + +void erase(void) +{ +#ifdef _CONSOLE + COORD coordScreen = { 0, 0 }; /* here's where we'll home the + cursor */ + DWORD cCharsWritten; + CONSOLE_SCREEN_BUFFER_INFO csbi; /* to get buffer info */ + DWORD dwConSize; /* number of character cells in + the current buffer */ + + /* get the number of character cells in the current buffer */ + GetConsoleScreenBufferInfo( hConsole, &csbi ); + dwConSize = csbi.dwSize.X * csbi.dwSize.Y; + + /* fill the entire screen with blanks */ + FillConsoleOutputCharacter( hConsole, (TCHAR) ' ',dwConSize, coordScreen, &cCharsWritten ); + + /* get the current text attribute */ + // GetConsoleScreenBufferInfo( hConsole, &csbi ); + + /* now set the buffer's attributes accordingly */ + FillConsoleOutputAttribute( hConsole, csbi.wAttributes,dwConSize, coordScreen, &cCharsWritten ); + + /* put the cursor at (0, 0) */ + SetConsoleCursorPosition( hConsole, coordScreen ); + +#else + // Assume that ANSI is supported + printf("\033[2J"); +#endif +} + +void move(int r, int c) +{ +#ifdef _CONSOLE + COORD pos; + pos.X = c; + pos.Y = r; + SetConsoleCursorPosition( hConsole, pos ); +#else + printf("\033[%d;%dH", r+1, c+1); +#endif +} + +#define printfd(x) printf(x) +#define dis_newline() printf("\n") +#define dis_show() // Nothing to do unless using Curses + + +/***************************************************************************** + * disassem - Prints a disassembled listing of a procedure. + * pass == 1 generates output on file .a1 + * pass == 2 generates output on file .a2 + * pass == 3 generates output on file .b + ****************************************************************************/ +void disassem(Int pass, Function * ppProc) +{ + Int i; + + pProc = ppProc; /* Save the passes pProc */ + if (pass != prevPass) + { + prevPass = pass; + lab = 0; /* Restart label numbers */ + } + createSymTables(); + allocIcode = numIcode = pProc->Icode.GetNumIcodes(); + if ((cb = allocIcode * sizeof(ICODE)) == 0) + { + return; /* No Icode */ + } + + /* Open the output file (.a1 or .a2 only) */ + if (pass != 3) + { + p = (pass == 1)? asm1_name: asm2_name; + fp = fopen(p, "a+"); + if (!fp) + { + fatalError(CANNOT_OPEN, p); + } + } + + /* Create temporary code array */ + // Mike: needs objectising! + pc = (ICODE *)memcpy(allocMem(cb), pProc->Icode.GetFirstIcode(), (size_t)cb); + + if (pass == 1) + { + /* Bind jump offsets to labels */ + for (i = 0; i < numIcode; i++) + { + if ((pc[i].ic.ll.flg & I) && !(pc[i].ic.ll.flg & JMP_ICODE) && + JmpInst(pc[i].ic.ll.opcode)) + { + /* Replace the immediate operand with an icode index */ + if (labelSrch(pc,numIcode, pc[i].ic.ll.immed.op,(Int *)&pc[i].ic.ll.immed.op)) + { + /* This icode is the target of a jump */ + pc[pc[i].ic.ll.immed.op].ic.ll.flg |= TARGET; + pc[i].ic.ll.flg |= JMP_ICODE; /* So its not done twice */ + } + else + { + /* This jump cannot be linked to a label */ + pc[i].ic.ll.flg |= NO_LABEL; + } + } + } + } + + /* Create label array to keep track of location => label name */ + pl.clear(); + pl.resize(numIcode,0); + + /* Write procedure header */ + if (pass != 3) + fprintf(fp, "\t\t%s PROC %s\n", pProc->name, (pProc->flg & PROC_FAR)? "FAR": "NEAR"); + + /* Loop over array printing each record */ + for (i = nextInst = 0; i < numIcode; i++) + { + dis1Line(i, FALSE, 0, pass); + } + + /* Write procedure epilogue */ + if (pass != 3) + { + fprintf(fp, "\n\t\t%s ENDP\n\n", pProc->name); + fclose(fp); + } + + free(pc); + destroySymTables(); +} + +/**************************************************************************** + * dis1Line() - disassemble one line to stream fp * * + * i is index into Icode for this proc * + * It is assumed that icode i is already scanned * + ****************************************************************************/ +static void +dis1Line(Int i, boolT fWindow, char attr, Int pass) +{ + ICODE * pIcode = &pc[i]; + + /* Disassembly stage 1 -- + * Do not try to display NO_CODE entries or synthetic instructions, + * other than JMPs, that have been introduced for def/use analysis. */ + if ((option.asm1) && + ((pIcode->ic.ll.flg & NO_CODE) || + ((pIcode->ic.ll.flg & SYNTHETIC) && (pIcode->ic.ll.opcode != iJMP)))) + { + return; + } + else if (pIcode->ic.ll.flg & NO_CODE) + { + return; + } + + /* p points to the current position in buf[] */ + p = (char*)memset(buf, ' ', sizeof(buf)); + + if (pIcode->ic.ll.flg & (TARGET | CASE)) + { + if (fWindow) /* Printing to disassem window? */ + dis_newline(); /* Yes */ + else if (pass == 3) + cCode.appendCode("\n"); /* No, print to c code buffer */ + else + fprintf(fp, "\n"); /* No, print to the stream */ + } + + /* Find next instruction label and print hex bytes */ + if (pIcode->ic.ll.flg & SYNTHETIC) + nextInst = pIcode->ic.ll.label; + else + { + cb = (dword) pIcode->ic.ll.numBytes; + nextInst = pIcode->ic.ll.label + cb; + + /* Output hexa code in program image */ + if (pass != 3) + { + for (j = 0; j < cb; j++, p += 2) + sprintf(p, "%02X", prog.Image[pIcode->ic.ll.label + j]); + *p = ' '; + } + } + + /* Check if there is a symbol here */ + selectTable(Label); + if (readVal(&buf[POS_LAB], pIcode->ic.ll.label, 0)) + { + buf[strlen(buf)] = ':'; /* Also removes the null */ + } + + else if (pIcode->ic.ll.flg & TARGET) /* Symbols override Lnn labels */ + { /* Print label */ + if (! pl[i]) + { + pl[i] = ++lab; + } + if (pass == 3) + sprintf(buf, "L%ld", pl[i]); + else + sprintf(&buf[15], "L%ld", pl[i]); + buf[strlen(buf)] = ':'; /* Also removes the null */ + } + + if (pIcode->ic.ll.opcode == iSIGNEX && (pIcode->ic.ll.flg & B)) + { + pIcode->ic.ll.opcode = iCBW; + } + + if (pass == 3) + { + strcpy (&buf[8], szOps[pIcode->ic.ll.opcode]); + buf[eop = strlen(buf)] = ' '; + p = buf + 8 + (POS_OPR - POS_OPC); + } + else + { + strcpy(&buf[POS_OPC], szOps[pIcode->ic.ll.opcode]); + buf[eop = strlen(buf)] = ' '; + p = buf + POS_OPR; + } + + switch (pIcode->ic.ll.opcode) + { + case iADD: case iADC: case iSUB: case iSBB: case iAND: case iOR: + case iXOR: case iTEST: case iCMP: case iMOV: case iLEA: case iXCHG: + strcpy(p, strDst(pIcode->ic.ll.flg, &pIcode->ic.ll.dst)); + strcat(p, strSrc(pIcode)); + break; + + case iESC: + flops(pIcode); + break; + + case iSAR: case iSHL: case iSHR: case iRCL: case iRCR: case iROL: + case iROR: + strcpy(p, strDst(pIcode->ic.ll.flg | I, &pIcode->ic.ll.dst)); + strcat(p, (pIcode->ic.ll.flg & I)? strSrc(pIcode): ", cl"); + break; + + case iINC: case iDEC: case iNEG: case iNOT: case iPOP: + strcpy(p, strDst(pIcode->ic.ll.flg | I, &pIcode->ic.ll.dst)); + break; + + case iPUSH: + if (pIcode->ic.ll.flg & I) + { + strcpy(p + WID_PTR, strHex(pIcode->ic.ll.immed.op)); + } + else + { + strcpy(p, strDst(pIcode->ic.ll.flg | I, &pIcode->ic.ll.dst)); + } + break; + + case iDIV: case iIDIV: case iMUL: case iIMUL: case iMOD: + if (pIcode->ic.ll.flg & I) + { + strcat(strcpy(p, strDst(pIcode->ic.ll.flg, &pIcode->ic.ll.dst)),", "); + formatRM(p + strlen(p), pIcode->ic.ll.flg, &pIcode->ic.ll.src); + strcat(p, strSrc(pIcode)); + } + else + strcpy(p, strDst(pIcode->ic.ll.flg | I, &pIcode->ic.ll.src)); + break; + + case iLDS: case iLES: case iBOUND: + strcpy(p, strDst(pIcode->ic.ll.flg, &pIcode->ic.ll.dst)); + strcat(strcat(p, ", dword ptr"), strSrc(pIcode)+1); + break; + + case iJB: case iJBE: case iJAE: case iJA: + case iJL: case iJLE: case iJGE: case iJG: + case iJE: case iJNE: case iJS: case iJNS: + case iJO: case iJNO: case iJP: case iJNP: + case iJCXZ:case iLOOP: case iLOOPE:case iLOOPNE: + case iJMP: case iJMPF: + + /* Check if there is a symbol here */ + selectTable(Label); + if ((pIcode->ic.ll.immed.op < (dword)numIcode) && /* Ensure in range */ + readVal(p+WID_PTR, pc[pIcode->ic.ll.immed.op].ic.ll.label, 0)) + { + break; /* Symbolic label. Done */ + } + + if (pIcode->ic.ll.flg & NO_LABEL) + { + strcpy(p + WID_PTR, strHex(pIcode->ic.ll.immed.op)); + } + else if (pIcode->ic.ll.flg & I) + { + j = pIcode->ic.ll.immed.op; + if (! pl[j]) /* Forward jump */ + { + pl[j] = ++lab; + } + if (pIcode->ic.ll.opcode == iJMPF) + { + sprintf(p, " far ptr L%ld", pl[j]); + } + else + { + sprintf(p + WID_PTR, "L%ld", pl[j]); + } + } + else if (pIcode->ic.ll.opcode == iJMPF) + { + strcat(strcpy(p-1, "dword ptr"), strSrc(pIcode)+1); + } + else + { + strcpy(p, strDst(I, &pIcode->ic.ll.src)); + } + break; + + case iCALL: case iCALLF: + if (pIcode->ic.ll.flg & I) + { + sprintf(p, "%s ptr %s",(pIcode->ic.ll.opcode == iCALL) ?" near":" far",(pIcode->ic.ll.immed.proc.proc)->name); + } + else if (pIcode->ic.ll.opcode == iCALLF) + { + strcat(strcpy(p, "dword ptr"),strSrc(pIcode)+1); + } + else + { + strcpy(p, strDst(I, &pIcode->ic.ll.src)); + } + break; + + case iENTER: + strcat(strcpy(p + WID_PTR, strHex(pIcode->ic.ll.dst.off)), ", "); + strcat(p, strHex(pIcode->ic.ll.immed.op)); + break; + + case iRET: case iRETF: case iINT: + if (pIcode->ic.ll.flg & I) + { + strcpy(p + WID_PTR, strHex(pIcode->ic.ll.immed.op)); + } + else + { + buf[eop] = '\0'; + } + break; + + case iCMPS: case iREPNE_CMPS: case iREPE_CMPS: + case iSCAS: case iREPNE_SCAS: case iREPE_SCAS: + case iSTOS: case iREP_STOS: + case iLODS: case iREP_LODS: + case iMOVS: case iREP_MOVS: + case iINS: case iREP_INS: + case iOUTS: case iREP_OUTS: + if (pIcode->ic.ll.src.segOver) + { + (pIcode->ic.ll.opcode == iOUTS || pIcode->ic.ll.opcode == iREP_OUTS) + ? strcat(strcpy(p+WID_PTR,"dx, "), szPtr[pIcode->ic.ll.flg & B]) + : strcpy(&buf[eop+1], szPtr[pIcode->ic.ll.flg & B]); + if (pIcode->ic.ll.opcode == iLODS || + pIcode->ic.ll.opcode == iREP_LODS || + pIcode->ic.ll.opcode == iOUTS || + pIcode->ic.ll.opcode == iREP_OUTS) + { + strcat(p, szWreg[pIcode->ic.ll.src.segOver-rAX]); + } + else + { + strcat(strcat(p, "es:[di], "),szWreg[pIcode->ic.ll.src.segOver - rAX]); + } + strcat(p, ":[si]"); + } + else strcpy(&buf[eop], (pIcode->ic.ll.flg & B)? "B": "W"); + break; + + case iXLAT: + if (pIcode->ic.ll.src.segOver) + { + strcpy(&buf[eop+1], szPtr[1]); + strcat(strcat(p, szWreg[pIcode->ic.ll.src.segOver-rAX]), ":[bx]"); + } + else buf[eop] = '\0'; + break; + + case iIN: + strcpy(p+WID_PTR, (pIcode->ic.ll.flg & B)?"al, ": "ax, "); + strcat(p+WID_PTR, (pIcode->ic.ll.flg & I)? strHex(pIcode->ic.ll.immed.op): "dx"); + break; + + case iOUT: + strcpy(p+WID_PTR, (pIcode->ic.ll.flg & I)? strHex(pIcode->ic.ll.immed.op): "dx"); + strcat(p+WID_PTR, (pIcode->ic.ll.flg & B)?", al": ", ax"); + break; + + default: + buf[eop] = '\0'; + break; + } + + /* Comments */ + if (pIcode->ic.ll.flg & SYNTHETIC) + { + fImpure = FALSE; + } + else + { + for (j = pIcode->ic.ll.label, fImpure = 0; j > 0 && j < (Int)nextInst; + j++) + { + fImpure |= BITMAP(j, BM_DATA); + } + } + + + /* Check for user supplied comment */ + selectTable(Comment); + if (readVal(cbuf, pIcode->ic.ll.label, 0)) + { + buf[strlen(buf)] = ' '; /* Removes the null */ + buf[POS_CMT] = ';'; + strcpy(buf+POS_CMT+1, cbuf); + } + + else if (fImpure || (pIcode->ic.ll.flg & (SWITCH | CASE | SEG_IMMED | + IMPURE | SYNTHETIC | TERMINATES))) + { + buf[strlen(buf)] = ' '; + buf[POS_CMT] = '\0'; + if (pIcode->ic.ll.flg & CASE) + { + sprintf(buf+POS_CMT, ";Case l%ld", pIcode->ic.ll.caseTbl.numEntries); + } + if (pIcode->ic.ll.flg & SWITCH) + { + strcat(buf, ";Switch "); + } + if (fImpure) + { + strcat(buf, ";Accessed as data "); + } + if (pIcode->ic.ll.flg & IMPURE) + { + strcat(buf, ";Impure operand "); + } + if (pIcode->ic.ll.flg & SEG_IMMED) + { + strcat(buf, ";Segment constant"); + } + if (pIcode->ic.ll.flg & TERMINATES) + { + strcat(buf, ";Exit to DOS"); + } + } + + /* Comment on iINT icodes */ + if (pIcode->ic.ll.opcode == iINT) + pIcode->writeIntComment (buf); + + /* Display output line */ + if (! (pIcode->ic.ll.flg & SYNTHETIC)) + { + if (fWindow) + { + word off; + char szOffset[6]; + + off = (word)(pIcode->ic.ll.label - ((dword)pProc->state.r[rCS] << 4)); + attrSet(attr); + + sprintf(szOffset, "%04X ", off); + printfd(szOffset); + printfd(buf); + dis_newline(); + attrSet(A_NORMAL); + } + else if (pass == 3) /* output to .b code buffer */ + cCode.appendCode("%s\n", buf); + else /* output to .a1 or .a2 file */ + fprintf (fp, "%03ld %06lX %s\n", i, pIcode->ic.ll.label, buf); + } + else /* SYNTHETIC instruction */ + { + strcat (buf, ";Synthetic inst"); + if (fWindow) + { + printfd(" "); + printfd(buf); + dis_newline(); + } + else if (pass == 3) /* output to .b code buffer */ + { + cCode.appendCode("%s\n", buf); + } + else /* output to .a1 or .a2 file */ + { + fprintf (fp, "%03ld %s\n", i, buf); + } + } +} + + + +/**************************************************************************** + * formatRM + ***************************************************************************/ +static void formatRM(char *p, flags32 flg, ICODEMEM *pm) +{ + char seg[4]; + + if (pm->segOver) + { + strcat(strcpy(seg, szWreg[pm->segOver - rAX]), ":"); + } + else *seg = '\0'; + + if (pm->regi == 0) + { + sprintf(p,"%s[%s]", seg, strHex((dword)pm->off)); + } + + else if (pm->regi == (INDEXBASE - 1)) + { + strcpy (p, "tmp"); + } + + else if (pm->regi < INDEXBASE) + { + strcpy(p, (flg & B)? szBreg[pm->regi - rAL]: szWreg[pm->regi - rAX]); + } + + else if (pm->off) + { + if (pm->off < 0) + { + sprintf(p,"%s[%s-%s]", seg, szIndex[pm->regi - INDEXBASE],strHex((dword)(- pm->off))); + } + else + { + sprintf(p,"%s[%s+%s]", seg, szIndex[pm->regi - INDEXBASE],strHex((dword)pm->off)); + } + } + else sprintf(p,"%s[%s]", seg, szIndex[pm->regi - INDEXBASE]); +} + + +/***************************************************************************** + * strDst + ****************************************************************************/ +static char *strDst(flags32 flg, ICODEMEM *pm) +{ + static char buf[30]; + + /* Immediates to memory require size descriptor */ + if ((flg & I) && (pm->regi == 0 || pm->regi >= INDEXBASE)) + { + memcpy(buf, szPtr[flg & B], WID_PTR); + } + else + { + memset(buf, ' ', WID_PTR); + } + + formatRM(buf + WID_PTR, flg, pm); + return buf; +} + + +/**************************************************************************** + * strSrc * + ****************************************************************************/ +static char *strSrc(ICODE *pc) +{ + static char buf[30] = {", "}; + + if (pc->ic.ll.flg & I) + strcpy(buf + 2, strHex(pc->ic.ll.immed.op)); + else if (pc->ic.ll.flg & IM_SRC) /* level 2 */ + strcpy (buf + 2, "dx:ax"); + else + formatRM(buf + 2, pc->ic.ll.flg, &pc->ic.ll.src); + + return buf; +} + + +/**************************************************************************** + * strHex * + ****************************************************************************/ +static char *strHex(dword d) +{ + static char buf[10]; + + d &= 0xFFFF; + sprintf(buf, "0%lX%s", d, (d > 9)? "h": ""); + return (buf + (buf[1] <= '9')); +} + + + + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\ +| Interactive Disassembler and Associated Routines | +\* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + + + +dword pcTop; /* Image offset of top line */ +Int icTop; /* Icode index of top line */ +dword pcCur; /* Image offset of cursor */ +static dword oldPcCur; /* As above, before latest command */ +Int icCur; /* Icode index of cursor */ +dword pcBot; /* Image offset of bottom line */ +Int icBot; /* Icode index of bottom line */ +dword pcLast; /* Image offset of last instr in proc */ +int NSCROLL; /* Number of limes to scroll. Pseudo constant */ + +/* Paint the title line */ +void dispTitle(void) +{ + char buf[80]; + + move(0, 0); /* Must move before setting attributes */ + attrSet(A_BOLD); + sprintf(buf, "Proc %s at %06lX (%04X:%04X): %d bytes of parameters ",pProc->name, pProc->Icode.GetFirstIcode()->ic.ll.label, + pProc->state.r[rCS],(word)(pProc->Icode.GetFirstIcode()->ic.ll.label - ((dword)(pProc->state.r[rCS]) << 4)), + pProc->cbParam); + printfd(buf); + if (pProc->flg & PROC_ISLIB) printfd(" LIBRARY"); + attrSet(A_NORMAL); +} + + +/**************************************************************************** +* updateScr - update the screen * + ****************************************************************************/ +/* bNew is true if must recalculate the top line */ +void updateScr(boolT bNew) +{ + int y, x; + Int i, ic; + + bNew |= (pcCur > pcBot) || (pcCur < pcTop); + if (bNew) + { + /* We need to redo the screen completely */ + erase(); + dispTitle(); + icTop = icCur; + for (x=0; x < NSCROLL; x++) + { + if (icTop && pc[icTop-1].ic.ll.label + + (dword)pc[icTop-1].ic.ll.numBytes == pc[icTop].ic.ll.label) + { + /* Then this instruction is contiguous with the current */ + icTop--; + } + else break; + } + pcTop = pc[icTop].ic.ll.label; + } + else + { + dispTitle(); + } + + move(1, 0); + nextInst = pcTop; + for (y=1, ic=icTop; y < LINES-1; ic++, y++) + { + if ((ic >= numIcode) || (nextInst != pc[ic].ic.ll.label)) + { + if (labelSrch(pc,numIcode, nextInst, &i)) + { + ic = i; + } + else + { + pcLast = pc[ic-1].ic.ll.label; /* Remember end of proc */ + break; /* Must be past last */ + } + } + + /* Save pc of current line. Last assignment will be pc of bott line */ + pcBot = nextInst; + icBot = ic; + + // Only have to repaint if screen is new, or repainting formerly highlighted + // line, or newly highlighted line + if (bNew || (pcCur == nextInst) || (oldPcCur == nextInst)) + dis1Line(ic, TRUE, (char)((pcCur == nextInst) ? A_REVERSE : A_NORMAL), 0); + + if (ic == numIcode-1) + { + switch (pc[ic].ic.ll.opcode) + { + case iJMP: case iJMPF: + case iRET: case iRETF: + case iIRET: + break; + + default: + /* We have other than a break of control flow instruction + at the end of the proc. Parse more instructions to + complete the basic block + */ + if ((ic = checkScanned(nextInst)) == -1) + { + /* Some error. */ + pcLast = pcCur; /* Remember end of proc */ + break; /* Must be past last */ + } + + } + } + } + dis_show(); /* Make it happen */ +} + +#if 0 +/* An opcode based version of updateScr() */ +/**************************************************************************** +* updateScrOp - update the screen * + ****************************************************************************/ +/* bNew is true if must recalculate the top line */ +void +updateScrOp(boolT bNew) +{ + int y, x; + dword pc; + word len; + + dispTitle(); + if (bNew || (pcCur > pcBot) || (pcCur < pcTop)) + { + /* We need to redo the screen completely */ + pcTop = pcCur; + } + + move(1, 0); + for (y=1, pc = pcTop; y < LINES-1;) + { + /* Save pc of current line. Last assignment will be pc of bott line */ + pcBot = pc; + + dis1LineOp(pc, TRUE, (pcCur == pc) ? A_REVERSE : A_NORMAL, &len,pProc); + pc += len; + getyx(stdscr, y, x); + } + + refresh(); +} + +#endif + +void pushPosStack(void) +{ + /* Push the current position on the position stack */ + posStack[iPS].ic = icCur; + posStack[iPS++].pProc = pProc; +} + +static void popPosStack(void) +{ + /* Push the current position on the position stack */ + /* Note: relies on the byte wraparound. Beware! */ + // if ((Int)(posStack[--iPS].pProc) != -1) + if ((intptr_t)(posStack[--iPS].pProc) != intptr_t(-1)) + { + if (posStack[iPS].pProc != pProc) + { + setProc(posStack[iPS].pProc); + } + icCur = posStack[iPS].ic; + pcCur = pc[icCur].ic.ll.label; + } + else iPS++; /* Stack empty.. don't pop */ +} + + +/* Check to see if there is an icode for given image offset. + Scan it if necessary, adjusting the allocation of pc[] and pl[] + if necessary. Returns -1 if an error, otherwise the icode offset +*/ +static Int checkScanned(dword pcCur) +{ + Int i; + + /* First we check if the current icode is in range */ + /* A sanity check first */ + if (pcCur >= (dword)prog.cbImage) + { + /* Couldn't be! */ + return -1; + } + + if (!labelSrch(pc,numIcode, pcCur, &i)) + { + /* This icode does not exist yet. Tack it on the end of the existing */ + if (numIcode >= allocIcode) + { + allocIcode = numIcode + DELTA_ICODE; /* Make space for this one, and a few more */ + pc = (ICODE *)reallocVar(pc, allocIcode * sizeof(ICODE)); + /* It is important to clear the new icodes, to ensure that the type + is set to NOT_SCANNED */ + memset(&pc[numIcode], 0, (size_t)(allocIcode-numIcode)*sizeof(ICODE)); + pl.resize(allocIcode); + memset(&pl[numIcode], 0, (size_t)(allocIcode-numIcode)*sizeof(Int)); + } + i = numIcode++; + } + + if (pc[i].type == NOT_SCANNED) + { + /* This is a new icode not even scanned yet. Scan it now */ + /* Ignore most errors... at this stage */ + if (scan(pcCur, &pc[i]) == IP_OUT_OF_RANGE) + { + /* Something went wrong... just forget it */ + return -1; + } + } + + return i; +} + + + + + +/* Set up to use the procedure proc */ +/* This includes some important initialisations, allocations, etc that are + normally done in disassem() */ +static void setProc(Function * proc) +{ + Int i; + + pProc = proc; /* Keep in a static */ + + /* Free old arrays, if any */ + if (pc) free(pc); + pl.clear(); + + + /* Create temporary code array */ + numIcode = pProc->Icode.GetNumIcodes(); + cb = numIcode * sizeof(ICODE); + // Mike: needs objectising + pc = (ICODE *)memcpy(allocMem(cb), pProc->Icode.GetFirstIcode(), (size_t)cb); + + /* Create label array to keep track of location => label name */ + pl.clear(); + pl.resize(numIcode,0); + + /* Bind jump offsets to labels */ + for (i = 0; i < numIcode; i++) + { + if ((pc[i].ic.ll.flg & I) && !(pc[i].ic.ll.flg & JMP_ICODE) && + JmpInst(pc[i].ic.ll.opcode)) + { + /* Immediate jump instructions. Make dest an icode index */ + if (labelSrch(pc,numIcode, pc[i].ic.ll.immed.op, (Int *)&pc[i].ic.ll.immed.op)) + { + /* This icode is the target of a jump */ + pc[pc[i].ic.ll.immed.op].ic.ll.flg |= TARGET; + pc[i].ic.ll.flg |= JMP_ICODE; /* So its not done twice */ + } + else + { + /* This jump cannot be linked to a label */ + pc[i].ic.ll.flg |= NO_LABEL; + } + } + } + + /* Window initially scrolled with entry point on top */ + pcCur = pcTop = pProc->procEntry; + labelSrch(pc,numIcode, pcCur, &icCur); + /* pcLast is set properly in updateScr(), at least for now */ + pcLast = (dword)-1; + +} + +/**************************************************************************** + * interactDis - interactive disassembler * + ****************************************************************************/ +void interactDis(Function * initProc, Int initIC) +{ + + +#ifdef __UNIX__ + printf("Sorry - interactive disasassembler option not available for Unix\n"); + return; +#else + boolT fInteract; + int nEsc = 0; /* This cycles 0 1 2 for Esc [ X under Unix */ + /* and 0 1 for NULL X under Dos */ + int ch; + Int i; + pProc = initProc; /* Keep copy of init proc */ + NSCROLL = max(3, LINES >> 3); /* Number of lines to scroll */ + + /* Allocate the position stack */ + posStack = (POSSTACK_ENTRY*)allocMem(256 * sizeof(POSSTACK_ENTRY)); + iPS = 0; + memset(posStack, -1, 256 * sizeof(POSSTACK_ENTRY)); + + + /* Initialise the console interface, if required */ + initConsole(); + + /* Initially, work on the given proc */ + setProc(initProc); + if (initIC) + { + icCur = initIC; + pcCur = pc[icCur].ic.ll.label; + } + + /* Initialise the symbol table */ + createSymTables(); + + strcpy(cbuf, "label"); /* Provide a default label string */ + + updateScr(TRUE); + + fInteract = TRUE; + while (fInteract) + { + ch = ::_getch(); // Mike: need a Unix equivalent of getch()! +#ifdef __MSDOS__ + if (nEsc) + { + ch += EXT; /* Got the NULL before, so this is extended */ + nEsc = 0; + } + else if (ch == 0) + { + nEsc = 1; /* Got one escape (actually, NULL) char */ + break; + } +#endif +#ifdef __UNIX__ + switch (nEsc) + { + case 1: /* Already got one escape */ + if (ch == '[') + { + nEsc++; /* Got 2 chars in the escape sequence */ + break; + } + else + { + /* Escape something else. Ignore */ + nEsc = 0; + } + break; + case 2: + /* Already got Esc [ ... */ + ch += EXT; /* Make it an extended key */ + nEsc = 0; /* Reset the escape state */ + break; + case 0: + /* No escapes... yet */ + if (ch == 0x1B) + { + nEsc++; /* That's one escape... */ + break; + } + } +#endif + + // For consoles, we get a 0xE0 then KEY_DOWN for the normal down arrow character. + // We simply ignore the 0xE0; this has the effect that the numeric keypad keys + // work as well (regardless of numlock state). + oldPcCur = pcCur; + switch (ch) + { + case KEY_DOWN: + + if (pcCur >= pcLast) continue; /* Ignore it */ + pcCur += pc[icCur].ic.ll.numBytes; + labelSrch(pc,numIcode, pcCur, &icCur); + if (pcCur >= pcBot) + { + int j; + + /* We have gone past the bottom line. Scroll a few lines */ + for (j=0; j < NSCROLL; j++) + { + if (pcTop >= pcLast) + { + break; + } + pcTop += pc[icTop].ic.ll.numBytes; + if (labelSrch(pc,numIcode, pcTop, &i)) + icTop = i; + else break; /* Some problem... no more scroll */ + } + } + updateScr(FALSE); + break; + + case KEY_UP: + /* First simply try the prev icode */ + if ((icCur == 0) || + pc[--icCur].ic.ll.label + (dword)pc[icCur].ic.ll.numBytes != pcCur) + { + for (i = 0; i < numIcode; i++) + { + if (pc[i].ic.ll.label + (dword)pc[i].ic.ll.numBytes == pcCur) + { + break; /* This is the one! */ + } + } + if (pc[i].ic.ll.label + pc[i].ic.ll.numBytes != pcCur) + break; /* Not found. Sorry! */ + icCur = i; + } + pcCur = pc[icCur].ic.ll.label; + updateScr(FALSE); + break; + + + case '2': /* Think up a better key... */ + /* As for right arrow, but considers source operand first */ + if (pc[icCur].ic.ll.src.off != 0) + { + pushPosStack(); + pcCur = pc[icCur].ic.ll.src.off; + if (!labelSrch(pc,numIcode, pcCur, &icCur)) + break; + updateScr(FALSE); + } + /* Fall through to KEY_RIGHT processing */ + + case KEY_RIGHT: + if (pc[icCur].ic.ll.flg & I) + { + if ((pc[icCur].ic.ll.opcode >= iJB) && + (pc[icCur].ic.ll.opcode <= iJMPF)) + { + /* An immediate jump op. Jump to it */ + pushPosStack(); + if (pc[icCur].ic.ll.flg & JMP_ICODE) + { + /* immed.op is an icode offset */ + icCur = pc[icCur].ic.ll.immed.op; + pcCur = pc[icCur].ic.ll.label; + } + else + { + /* immed.op is still an image offset. + Quite likely we need to scan */ + pcCur = pc[icCur].ic.ll.immed.op; + if ((icCur = checkScanned(pcCur)) == -1) + break; + } + } + else if ((pc[icCur].ic.ll.opcode == iCALL) || + (pc[icCur].ic.ll.opcode == iCALLF)) + { + /* The dest is a pointer to a proc struct */ + // First check that the procedure has icodes (e.g. may be + // a library function, or just not disassembled yet) + Function * pp = (Function *)pc[icCur].ic.ll.immed.op; + if (pp->Icode.GetFirstIcode() != NULL) + { + pushPosStack(); + setProc(pp); + } + } + else + { + /* Other immediate */ + pushPosStack(); + pcCur = pc[icCur].ic.ll.immed.op; + dispData(pProc->state.r[rDS]); + break; + } + } + else if (pc[icCur].ic.ll.dst.off != 0) + { + pushPosStack(); + pcCur = pc[icCur].ic.ll.dst.off; + if (!labelSrch(pc,numIcode, pcCur, &icCur)) + { + dispData(pProc->state.r[rDS]); + break; + } + } + else if (pc[icCur].ic.ll.src.off != 0) + { + pushPosStack(); + pcCur = pc[icCur].ic.ll.src.off; + if (!labelSrch(pc,numIcode, pcCur, &icCur)) + { + dispData(pProc->state.r[rDS]); + break; + } + } + updateScr(TRUE); + break; + + case KEY_LEFT: + popPosStack(); + pcCur = pc[icCur].ic.ll.label; + updateScr(TRUE); + break; + + + case KEY_NPAGE: + pcCur = pcTop = pcBot; /* Put bottom line at top now */ + icCur = icTop = icBot; + updateScr(FALSE); + break; + + case KEY_PPAGE: + pcTop -= (LINES-2) * 2; /* Average of 2 bytes per inst */ + for (i = 0; i < numIcode; i++) + { + if ((pc[i].ic.ll.label <= pcTop) && + (pc[i].ic.ll.label + (dword)pc[i].ic.ll.numBytes >= pcTop)) + { + break; /* This is the spot! */ + } + } + if (i >= numIcode) + { + /* Something went wrong. Goto to first icode */ + i = 0; + } + icCur = icTop = i; + pcCur = pcTop = pc[i].ic.ll.label; + updateScr(FALSE); + break; + + case 'l': /* Add a symbolic label here */ + { + char *pStr; + + move(LINES, 0); + printf("Enter symbol: "); + gets(cbuf); /* Get a string to buf */ + move (LINES, 0); + printf("%50c", ' '); + + if (strlen(cbuf) >= SYMLEN) + { + /* Name too ling. Truncate */ + cbuf[SYMLEN-1] = '\0'; + } + pStr = addStrTbl(cbuf); /* Add to the string table */ + + selectTable(Label); /* Select the label table */ + /* Add the symbol to both value- and symbol- hashed tables */ + enterSym(pStr, pcCur, pProc, TRUE); + + if (icCur == 0) + { + /* We are at the first icode of a function. + Assume it is the entry point, and rename the function */ + strcpy(pProc->name, cbuf); + } + + updateScr(FALSE); + break; + } + + case ';': + { + char *pStr; + word w; + + if (findVal(pcCur, 0, &w)) + { + readVal(cbuf, pcCur, 0);/* Make it the default string */ + deleteVal(pcCur, 0, FALSE); + } + else + { + cbuf[0] = '\0'; /* Remove prev string */ + } + + /* Enter a comment here, from a window */ + move(LINES, 0); + printf("Enter comment: "); + gets(cbuf); /* Get a string to buf */ + move(LINES, 0); + printf("%50c", ' '); + + pStr = addStrTbl(cbuf); /* Add to the string table */ + + selectTable(Comment); + enterSym(pStr, pcCur, pProc, FALSE);/* Add the symbol */ + + updateScr(FALSE); + break; + } + + + case 'X' & 0x1F: /* Control X; can't use Alt with Unix */ + fInteract = FALSE; /* Exit interactive mode */ + attrSet(A_NORMAL); /* Normal attributes */ + break; + } + } + + free(posStack); + destroySymTables(); +#endif // #ifdef unix +} + + +/**************************************************************************** + * Display the current image position as data * + ****************************************************************************/ +static void +dispData(word dataSeg) +{ + int y, c, i; + Int pc, pcStart; + Int off = (Int)dataSeg << 4; + char szOffset[6], szByte[4]; + + if (pcCur >= (dword)prog.cbImage) + { + /* We're at an invalid address. Use 0x100 instead */ + pcCur = 0; + } + erase(); + dispTitle(); + + pcStart = pc = pcCur; /* pc at start of line */ + for (y=1; y < LINES-1; y++) + { + move (y, 1); + sprintf(szOffset, "%04lX ", pc); + printfd(szOffset); + for (i=0; i < 16; i++) + { + sprintf(szByte, "%02X ", prog.Image[pc++ + off]); + printfd(szByte); + if ((pc + off) > prog.cbImage) break; + } + pc = pcStart; + for (i=0; i < 16; i++) + { + c = prog.Image[pc++ + off]; + if ((c < 0x20) || (c > 0x7E)) + { + c = '.'; + } + szByte[0] = (char)c; + szByte[1] = '\0'; + printfd(szByte); + if ((pc + off) > prog.cbImage) break; + } + dis_newline(); + pcStart = pc; + + if ((pc + off) > prog.cbImage) break; + + /* getyx(stdscr, y, x); */ + } + +} + + +boolT callArg(word off, char *sym) +{ + dword imageOff; + + imageOff = off + ((dword)pProc->state.r[rCS] << 4); + /* Search procedure list for one with appropriate entry point */ + std::list::iterator iter= std::find_if(pProcList.begin(),pProcList.end(), + [imageOff](const Function &f) -> bool { return f.procEntry==imageOff; }); + if(iter==pProcList.end()) + { + /* No existing proc entry */ + //ERROR: dereferencing NULL !?! + //LibCheck(*iter); + Function x; + x.procEntry=imageOff; + LibCheck(x); + if (x.flg & PROC_ISLIB) + { + /* No entry for this proc, but it is a library function. + Create an entry for it */ + pProcList.push_back(x); + iter = (++pProcList.rbegin()).base(); + } + } + if(iter==pProcList.end()) + return false; + /* We have a proc entry for this procedure. Copy the name */ + strcpy(sym, iter->name); + return true; +} + +/* Handle the floating point opcodes (icode iESC) */ +static void flops(ICODE *pIcode) +{ + char bf[30]; + byte op = (byte)pIcode->ic.ll.immed.op; + + /* Note that op is set to the escape number, e.g. + esc 0x38 is FILD */ + + if ((pIcode->ic.ll.dst.regi == 0) || (pIcode->ic.ll.dst.regi >= INDEXBASE)) + { + /* The mod/rm mod bits are not set to 11 (i.e. register). + This is the normal floating point opcode */ + strcpy(&buf[POS_OPC], szFlops1[op]); + buf[strlen(buf)] = ' '; + + if ((op == 0x29) || (op == 0x1F)) + { + strcpy(bf, "tbyte ptr "); + } + else switch (op & 0x30) + { + case 0x00: + case 0x10: + strcpy(bf, "dword ptr "); + break; + case 0x20: + strcpy(bf, "qword ptr "); + break; + case 0x30: + switch (op) + { + case 0x3C: /* FBLD */ + case 0x3E: /* FBSTP */ + strcpy(bf, "tbyte ptr "); + break; + case 0x3D: /* FILD 64 bit */ + case 0x3F: /* FISTP 64 bit */ + strcpy(bf, "qword ptr "); + break; + + default: + strcpy(bf, "word ptr "); + break; + } + } + + formatRM(bf + 10, pIcode->ic.ll.flg, &pIcode->ic.ll.dst); + strcpy(p, bf); + } + else + { + /* The mod/rm mod bits are set to 11 (i.e. register). + Could be specials (0x0C-0x0F, etc), or the st(i) versions of + normal opcodes. Because the opcodes are slightly different for + this case (e.g. op=04 means FSUB if reg != 3, but FSUBR for + reg == 3), a separate table is used (szFlops2). */ + switch (op) + { + case 0x0C: + strcpy(&buf[POS_OPC], szFlops0C[pIcode->ic.ll.dst.regi - rAX]); + break; + case 0x0D: + strcpy(&buf[POS_OPC], szFlops0D[pIcode->ic.ll.dst.regi - rAX]); + break; + case 0x0E: + strcpy(&buf[POS_OPC], szFlops0E[pIcode->ic.ll.dst.regi - rAX]); + break; + case 0x0F: + strcpy(&buf[POS_OPC], szFlops0F[pIcode->ic.ll.dst.regi - rAX]); + break; + case 0x15: + strcpy(&buf[POS_OPC], szFlops15[pIcode->ic.ll.dst.regi - rAX]); + break; + case 0x1C: + strcpy(&buf[POS_OPC], szFlops1C[pIcode->ic.ll.dst.regi - rAX]); + break; + case 0x33: + strcpy(&buf[POS_OPC], szFlops33[pIcode->ic.ll.dst.regi - rAX]); + break; + case 0x3C: + strcpy(&buf[POS_OPC], szFlops3C[pIcode->ic.ll.dst.regi - rAX]); + break; + default: + strcpy(&buf[POS_OPC], szFlops2[op]); + buf[strlen(buf)] = ' '; + if ((op >= 0x20) && (op <= 0x27)) + { + /* This is the ST(i), ST form. */ + sprintf(&buf[POS_OPR2], "ST(%d),ST", pIcode->ic.ll.dst.regi - rAX); + } + else + { + /* ST, ST(i) */ + sprintf(&buf[POS_OPR2], "ST,ST(%d)", pIcode->ic.ll.dst.regi - rAX); + } + + break; + } + } +} + + diff --git a/src/error.cpp b/src/error.cpp new file mode 100644 index 0000000..24d2ef3 --- /dev/null +++ b/src/error.cpp @@ -0,0 +1,86 @@ +/**************************************************************************** + * dcc project error messages + * (C) Cristina Cifuentes + ***************************************************************************/ + +#include "dcc.h" + +#include +#include +//#ifndef __UNIX__ +#if 1 +#include +#else +#include +#endif + +static const char *errorMessage[] = { + "Invalid option -%c\n", /* INVALID_ARG */ + "Invalid instruction %02X at location %06lX\n", /* INVALID_OPCODE */ + "Don't understand 80386 instruction %02X at location %06lX\n", + /* INVALID_386OP */ + "Segment override with no memory operand at location %06lX\n", + /* FUNNY_SEGOVR */ + "REP prefix without a string instruction at location %06lX\n",/* FUNNY_REP */ + "Cannot open %s\n", /* CANNOT_OPEN */ + "Error while reading %s\n", /* CANNOT_READ */ + "malloc of %ld bytes failed\n", /* MALLOC_FAILED */ + "Don't understand new EXE format\n", /* NEWEXE_FORMAT */ + "Failed to find a BB for jump to %ld in proc %s\n", /* NO_BB */ + "Basic Block is a synthetic jump\n", /* INVALID_SYNTHETIC_BB */ + "Failed to find a BB for interval\n", /* INVALID_INT_BB */ + "Instruction at location %06lX goes beyond loaded image\n", + /* IP_OUT_OF_RANGE*/ + "Definition not found for condition code usage at opcode %d\n", + /* DEF_NOT_FOUND */ + "JX use, definition not supported at opcode #%d\n", /* JX_NOT_DEF */ + "Def - use not supported. Def op = %d, use op = %d.\n", /* NOT_DEF_USE */ + "Failed to construct repeat..until() condition.\n", /* REPEAT_FAIL */ + "Failed to construct while() condition.\n", /* WHILE_FAIL */ +}; + + +/**************************************************************************** + fatalError: displays error message and exits the program. + ****************************************************************************/ +void fatalError(Int errId, ...) +{ va_list args; +//#ifdef __UNIX__ /* ultrix */ +#if 0 + Int errId; + + va_start(args); + errId = va_arg(args, Int); +#else + va_start(args, errId); +#endif + + if (errId == USAGE) + fprintf(stderr,"Usage: dcc [-a1a2cmpsvVi][-o asmfile] DOS_executable\n"); + else { + fprintf(stderr, "dcc: "); + vfprintf(stderr, errorMessage[errId - 1], args); + } + va_end(args); + exit((int)errId); +} + + +/**************************************************************************** + reportError: reports the warning/error and continues with the program. + ****************************************************************************/ +void reportError(Int errId, ...) +{ va_list args; +//#ifdef __UNIX__ /* ultrix */ +#if 0 + Int errId; + + va_start(args); + errId = va_arg(args, Int); +#else /* msdos or windows*/ + va_start(args, errId); +#endif + fprintf(stderr, "dcc: "); + vfprintf(stderr, errorMessage[errId - 1], args); + va_end(args); +} diff --git a/src/fixwild.cpp b/src/fixwild.cpp new file mode 100644 index 0000000..300fe37 --- /dev/null +++ b/src/fixwild.cpp @@ -0,0 +1,521 @@ +/* + * Fix Wildcards + * (C) Mike van Emmerik + */ + +/* * * * * * * * * * * * *\ +* * +* Fix Wild Cards Code * +* * +\* * * * * * * * * * * * */ + +#include + +#ifndef PATLEN +#define PATLEN 23 +#define WILD 0xF4 +#endif + +#ifndef bool +#define bool unsigned char +#define TRUE 1 +#define FALSE 0 +#define byte unsigned char +#endif + +static int pc; /* Indexes into pat[] */ + +/* prototypes */ +static bool ModRM(byte pat[]); /* Handle the mod/rm byte */ +static bool TwoWild(byte pat[]); /* Make the next 2 bytes wild */ +static bool FourWild(byte pat[]); /* Make the next 4 bytes wild */ + void fixWildCards(byte pat[]); /* Main routine */ + + +/* Handle the mod/rm case. Returns true if pattern exhausted */ +static bool ModRM(byte pat[]) +{ + byte op; + + /* A standard mod/rm byte follows opcode */ + op = pat[pc++]; /* The mod/rm byte */ + if (pc >= PATLEN) return TRUE; /* Skip Mod/RM */ + switch (op & 0xC0) + { + case 0x00: /* [reg] or [nnnn] */ + if ((op & 0xC7) == 6) + { + /* Uses [nnnn] address mode */ + pat[pc++] = WILD; + if (pc >= PATLEN) return TRUE; + pat[pc++] = WILD; + if (pc >= PATLEN) return TRUE; + } + break; + case 0x40: /* [reg + nn] */ + if ((pc+=1) >= PATLEN) return TRUE; + break; + case 0x80: /* [reg + nnnn] */ + /* Possibly just a long constant offset from a register, + but often will be an index from a variable */ + pat[pc++] = WILD; + if (pc >= PATLEN) return TRUE; + pat[pc++] = WILD; + if (pc >= PATLEN) return TRUE; + break; + case 0xC0: /* reg */ + break; + } + return FALSE; +} + +/* Change the next two bytes to wild cards */ +static bool +TwoWild(byte pat[]) +{ + pat[pc++] = WILD; + if (pc >= PATLEN) return TRUE; /* Pattern exhausted */ + pat[pc++] = WILD; + if (pc >= PATLEN) return TRUE; + return FALSE; +} + +/* Change the next four bytes to wild cards */ +static bool +FourWild(byte pat[]) +{ + TwoWild(pat); + return TwoWild(pat); +} + +/* Chop from the current point by wiping with zeroes. Can't rely on anything + after this point */ +static void +chop(byte pat[]) +{ + if (pc >= PATLEN) return; /* Could go negative otherwise */ + memset(&pat[pc], 0, PATLEN - pc); +} + +static bool +op0F(byte pat[]) +{ + /* The two byte opcodes */ + byte op = pat[pc++]; + switch (op & 0xF0) + { + case 0x00: /* 00 - 0F */ + if (op >= 0x06) /* Clts, Invd, Wbinvd */ + return FALSE; + else + { + /* Grp 6, Grp 7, LAR, LSL */ + return ModRM(pat); + } + case 0x20: /* Various funnies, all with Mod/RM */ + return ModRM(pat); + + case 0x80: + pc += 2; /* Word displacement cond jumps */ + return FALSE; + + case 0x90: /* Byte set on condition */ + return ModRM(pat); + + case 0xA0: + switch (op) + { + case 0xA0: /* Push FS */ + case 0xA1: /* Pop FS */ + case 0xA8: /* Push GS */ + case 0xA9: /* Pop GS */ + return FALSE; + + case 0xA3: /* Bt Ev,Gv */ + case 0xAB: /* Bts Ev,Gv */ + return ModRM(pat); + + case 0xA4: /* Shld EvGbIb */ + case 0xAC: /* Shrd EvGbIb */ + if (ModRM(pat)) return TRUE; + pc++; /* The #num bits to shift */ + return FALSE; + + case 0xA5: /* Shld EvGb CL */ + case 0xAD: /* Shrd EvGb CL */ + return ModRM(pat); + + default: /* CmpXchg, Imul */ + return ModRM(pat); + } + + case 0xB0: + if (op == 0xBA) + { + /* Grp 8: bt/bts/btr/btc Ev,#nn */ + if (ModRM(pat)) return TRUE; + pc++; /* The #num bits to shift */ + return FALSE; + } + return ModRM(pat); + + case 0xC0: + if (op <= 0xC1) + { + /* Xadd */ + return ModRM(pat); + } + /* Else BSWAP */ + return FALSE; + + default: + return FALSE; /* Treat as double byte opcodes */ + + } + +} + +/* Scan through the instructions in pat[], looking for opcodes that may + have operands that vary with different instances. For example, load and + store from statics, calls to other procs (even relative calls; they may + call procs loaded in a different order, etc). + Note that this procedure is architecture specific, and assumes the + processor is in 16 bit address mode (real mode). + PATLEN bytes are scanned. +*/ +void +fixWildCards(byte pat[]) +{ + + byte op, quad, intArg; + + + pc=0; + while (pc < PATLEN) + { + op = pat[pc++]; + if (pc >= PATLEN) return; + + quad = (byte) (op & 0xC0); /* Quadrant of the opcode map */ + if (quad == 0) + { + /* Arithmetic group 00-3F */ + + if ((op & 0xE7) == 0x26) /* First check for the odds */ + { + /* Segment prefix: treat as 1 byte opcode */ + continue; + } + if (op == 0x0F) /* 386 2 byte opcodes */ + { + if (op0F(pat)) return; + continue; + } + + if (op & 0x04) + { + /* All these are constant. Work out the instr length */ + if (op & 2) + { + /* Push, pop, other 1 byte opcodes */ + continue; + } + else + { + if (op & 1) + { + /* Word immediate operands */ + pc += 2; + continue; + } + else + { + /* Byte immediate operands */ + pc++; + continue; + } + } + } + else + { + /* All these have mod/rm bytes */ + if (ModRM(pat)) return; + continue; + } + } + else if (quad == 0x40) + { + if ((op & 0x60) == 0x40) + { + /* 0x40 - 0x5F -- these are inc, dec, push, pop of general + registers */ + continue; + } + else + { + /* 0x60 - 0x70 */ + if (op & 0x10) + { + /* 70-7F 2 byte jump opcodes */ + pc++; + continue; + } + else + { + /* Odds and sods */ + switch (op) + { + case 0x60: /* pusha */ + case 0x61: /* popa */ + case 0x64: /* overrides */ + case 0x65: + case 0x66: + case 0x67: + case 0x6C: /* insb DX */ + case 0x6E: /* outsb DX */ + continue; + + case 0x62: /* bound */ + pc += 4; + continue; + + case 0x63: /* arpl */ + if (TwoWild(pat)) return; + continue; + + case 0x68: /* Push byte */ + case 0x6A: /* Push byte */ + case 0x6D: /* insb port */ + case 0x6F: /* outsb port */ + /* 2 byte instr, no wilds */ + pc++; + continue; + + } + } + + } + } + else if (quad == 0x80) + { + switch (op & 0xF0) + { + case 0x80: /* 80 - 8F */ + /* All have a mod/rm byte */ + if (ModRM(pat)) return; + /* These also have immediate values */ + switch (op) + { + case 0x80: + case 0x83: + /* One byte immediate */ + pc++; + continue; + + case 0x81: + /* Immediate 16 bit values might be constant, but + also might be relocatable. Have to make them + wild */ + if (TwoWild(pat)) return; + continue; + } + continue; + case 0x90: /* 90 - 9F */ + if (op == 0x9A) + { + /* far call */ + if (FourWild(pat)) return; + continue; + } + /* All others are 1 byte opcodes */ + continue; + case 0xA0: /* A0 - AF */ + if ((op & 0x0C) == 0) + { + /* mov al/ax to/from [nnnn] */ + if (TwoWild(pat)) return; + continue; + } + else if ((op & 0xFE) == 0xA8) + { + /* test al,#byte or test ax,#word */ + if (op & 1) pc += 2; + else pc += 1; + continue; + + } + case 0xB0: /* B0 - BF */ + { + if (op & 8) + { + /* mov reg, #16 */ + /* Immediate 16 bit values might be constant, but also + might be relocatable. For now, make them wild */ + if (TwoWild(pat)) return; + } + else + { + /* mov reg, #8 */ + pc++; + } + continue; + } + } + } + else + { + /* In the last quadrant of the op code table */ + switch (op) + { + case 0xC0: /* 386: Rotate group 2 ModRM, byte, #byte */ + case 0xC1: /* 386: Rotate group 2 ModRM, word, #byte */ + if (ModRM(pat)) return; + /* Byte immediate value follows ModRM */ + pc++; + continue; + + case 0xC3: /* Return */ + case 0xCB: /* Return far */ + chop(pat); + return; + case 0xC2: /* Ret nnnn */ + case 0xCA: /* Retf nnnn */ + pc += 2; + chop(pat); + return; + + case 0xC4: /* les Gv, Mp */ + case 0xC5: /* lds Gv, Mp */ + if (ModRM(pat)) return; + continue; + + case 0xC6: /* Mov ModRM, #nn */ + if (ModRM(pat)) return; + /* Byte immediate value follows ModRM */ + pc++; + continue; + case 0xC7: /* Mov ModRM, #nnnn */ + if (ModRM(pat)) return; + /* Word immediate value follows ModRM */ + /* Immediate 16 bit values might be constant, but also + might be relocatable. For now, make them wild */ + if (TwoWild(pat)) return; + continue; + + case 0xC8: /* Enter Iw, Ib */ + pc += 3; /* Constant word, byte */ + continue; + case 0xC9: /* Leave */ + continue; + + case 0xCC: /* Int 3 */ + continue; + + case 0xCD: /* Int nn */ + intArg = pat[pc++]; + if ((intArg >= 0x34) && (intArg <= 0x3B)) + { + /* Borland/Microsoft FP emulations */ + if (ModRM(pat)) return; + } + continue; + + case 0xCE: /* Into */ + continue; + + case 0xCF: /* Iret */ + continue; + + case 0xD0: /* Group 2 rotate, byte, 1 bit */ + case 0xD1: /* Group 2 rotate, word, 1 bit */ + case 0xD2: /* Group 2 rotate, byte, CL bits */ + case 0xD3: /* Group 2 rotate, word, CL bits */ + if (ModRM(pat)) return; + continue; + + case 0xD4: /* Aam */ + case 0xD5: /* Aad */ + case 0xD7: /* Xlat */ + continue; + + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: /* Esc opcodes */ + case 0xDC: /* i.e. floating point */ + case 0xDD: /* coprocessor calls */ + case 0xDE: + case 0xDF: + if (ModRM(pat)) return; + continue; + + case 0xE0: /* Loopne */ + case 0xE1: /* Loope */ + case 0xE2: /* Loop */ + case 0xE3: /* Jcxz */ + pc++; /* Short jump offset */ + continue; + + case 0xE4: /* in al,nn */ + case 0xE6: /* out nn,al */ + pc++; + continue; + + case 0xE5: /* in ax,nn */ + case 0xE7: /* in nn,ax */ + pc += 2; + continue; + + case 0xE8: /* Call rel */ + if (TwoWild(pat)) return; + continue; + case 0xE9: /* Jump rel, unconditional */ + if (TwoWild(pat)) return; + chop(pat); + return; + case 0xEA: /* Jump abs */ + if (FourWild(pat)) return; + chop(pat); + return; + case 0xEB: /* Jmp short unconditional */ + pc++; + chop(pat); + return; + + case 0xEC: /* In al,dx */ + case 0xED: /* In ax,dx */ + case 0xEE: /* Out dx,al */ + case 0xEF: /* Out dx,ax */ + continue; + + case 0xF0: /* Lock */ + case 0xF2: /* Repne */ + case 0xF3: /* Rep/repe */ + case 0xF4: /* Halt */ + case 0xF5: /* Cmc */ + case 0xF8: /* Clc */ + case 0xF9: /* Stc */ + case 0xFA: /* Cli */ + case 0xFB: /* Sti */ + case 0xFC: /* Cld */ + case 0xFD: /* Std */ + continue; + + case 0xF6: /* Group 3 byte test/not/mul/div */ + case 0xF7: /* Group 3 word test/not/mul/div */ + case 0xFE: /* Inc/Dec group 4 */ + if (ModRM(pat)) return; + continue; + + case 0xFF: /* Group 5 Inc/Dec/Call/Jmp/Push */ + /* Most are like standard ModRM */ + if (ModRM(pat)) return; + continue; + + default: /* Rest are single byte opcodes */ + continue; + } + } + } +} + diff --git a/src/frontend.cpp b/src/frontend.cpp new file mode 100644 index 0000000..bcde899 --- /dev/null +++ b/src/frontend.cpp @@ -0,0 +1,372 @@ +/***************************************************************************** + * dcc project Front End module + * Loads a program into simulated main memory and builds the procedure list. + * (C) Cristina Cifuentes + ****************************************************************************/ + +#include "dcc.h" +#include +#include +#include +#ifdef __BORLAND__ +#include +#else +#include /* For malloc, free, realloc */ +#endif + +typedef struct { /* PSP structure */ + word int20h; /* interrupt 20h */ + word eof; /* segment, end of allocation block */ + byte res1; /* reserved */ + byte dosDisp[5]; /* far call to DOS function dispatcher */ + byte int22h[4]; /* vector for terminate routine */ + byte int23h[4]; /* vector for ctrl+break routine */ + byte int24h[4]; /* vector for error routine */ + byte res2[22]; /* reserved */ + word segEnv; /* segment address of environment block */ + byte res3[34]; /* reserved */ + byte int21h[6]; /* opcode for int21h and far return */ + byte res4[6]; /* reserved */ + byte fcb1[16]; /* default file control block 1 */ + byte fcb2[16]; /* default file control block 2 */ + byte res5[4]; /* reserved */ + byte cmdTail[0x80]; /* command tail and disk transfer area */ +} PSP; + +static struct { /* EXE file header */ + byte sigLo; /* .EXE signature: 0x4D 0x5A */ + byte sigHi; + word lastPageSize; /* Size of the last page */ + word numPages; /* Number of pages in the file */ + word numReloc; /* Number of relocation items */ + word numParaHeader; /* # of paragraphs in the header */ + word minAlloc; /* Minimum number of paragraphs */ + word maxAlloc; /* Maximum number of paragraphs */ + word initSS; /* Segment displacement of stack */ + word initSP; /* Contents of SP at entry */ + word checkSum; /* Complemented checksum */ + word initIP; /* Contents of IP at entry */ + word initCS; /* Segment displacement of code */ + word relocTabOffset; /* Relocation table offset */ + word overlayNum; /* Overlay number */ +} header; + +#define EXE_RELOCATION 0x10 /* EXE images rellocated to above PSP */ + +static void LoadImage(char *filename); +static void displayLoadInfo(void); +static void displayMemMap(void); + +/***************************************************************************** + * FrontEnd - invokes the loader, parser, disassembler (if asm1), icode + * rewritter, and displays any useful information. + ****************************************************************************/ +void FrontEnd (char *filename, CALL_GRAPH * *pcallGraph) +{ + /* Load program into memory */ + LoadImage(filename); + + if (option.verbose) + displayLoadInfo(); + + /* Do depth first flow analysis building call graph and procedure list, + * and attaching the I-code to each procedure */ + parse (pcallGraph); + + if (option.asm1) + { + printf("dcc: writing assembler file %s\n", asm1_name); + } + + /* Search through code looking for impure references and flag them */ + std::for_each(pProcList.begin(),pProcList.end(), + [](Function &f)->void { + f.markImpure(); + if (option.asm1) + disassem(1, &f); }); +if (option.Interact) +{ + interactDis(&pProcList.front(), 0); /* Interactive disassembler */ +} + +/* Converts jump target addresses to icode offsets */ +std::for_each(pProcList.begin(),pProcList.end(), + [](Function &f)->void { f.bindIcodeOff(); }); + +/* Print memory bitmap */ +if (option.Map) +displayMemMap(); +} + + +/**************************************************************************** + * displayLoadInfo - Displays low level loader type info. + ***************************************************************************/ +static void displayLoadInfo(void) +{ + Int i; + + printf("File type is %s\n", (prog.fCOM)?"COM":"EXE"); + if (! prog.fCOM) { + printf("Signature = %02X%02X\n", header.sigLo, header.sigHi); + printf("File size %% 512 = %04X\n", LH(&header.lastPageSize)); + printf("File size / 512 = %04X pages\n", LH(&header.numPages)); + printf("# relocation items = %04X\n", LH(&header.numReloc)); + printf("Offset to load image = %04X paras\n", LH(&header.numParaHeader)); + printf("Minimum allocation = %04X paras\n", LH(&header.minAlloc)); + printf("Maximum allocation = %04X paras\n", LH(&header.maxAlloc)); + } + printf("Load image size = %04X\n", prog.cbImage - sizeof(PSP)); + printf("Initial SS:SP = %04X:%04X\n", prog.initSS, prog.initSP); + printf("Initial CS:IP = %04X:%04X\n", prog.initCS, prog.initIP); + + if (option.VeryVerbose && prog.cReloc) + { + printf("\nRelocation Table\n"); + for (i = 0; i < prog.cReloc; i++) + { + printf("%06X -> [%04X]\n", prog.relocTable[i],LH(prog.Image + prog.relocTable[i])); + } + } + printf("\n"); +} + + +/***************************************************************************** + * fill - Fills line for displayMemMap() + ****************************************************************************/ +static void fill(Int ip, char *bf) +{ + static byte type[4] = {'.', 'd', 'c', 'x'}; + byte i; + + for (i = 0; i < 16; i++, ip++) + { + *bf++ = ' '; + *bf++ = (ip < prog.cbImage)? + type[(prog.map[ip >> 2] >> ((ip & 3) * 2)) & 3]: ' '; + } + *bf = '\0'; +} + + +/***************************************************************************** + * displayMemMap - Displays the memory bitmap + ****************************************************************************/ +static void displayMemMap(void) +{ + char c, b1[33], b2[33], b3[33]; + byte i; + Int ip = 0; + + printf("\nMemory Map\n"); + while (ip < prog.cbImage) + { + fill(ip, b1); + printf("%06X %s\n", ip, b1); + ip += 16; + for (i = 3, c = b1[1]; i < 32 && c == b1[i]; i += 2) + ; /* Check if all same */ + if (i > 32) + { + fill(ip, b2); /* Skip until next two are not same */ + fill(ip+16, b3); + if (! (strcmp(b1, b2) || strcmp(b1, b3))) + { + printf(" :\n"); + do + { + ip += 16; + fill(ip+16, b1); + } while (! strcmp(b1, b2)); + } + } + } + printf("\n"); +} + + +/***************************************************************************** + * LoadImage + ****************************************************************************/ +static void LoadImage(char *filename) +{ + FILE *fp; + Int i, cb; + byte buf[4]; + + /* Open the input file */ + if ((fp = fopen(filename, "rb")) == NULL) + { + fatalError(CANNOT_OPEN, filename); + } + + /* Read in first 2 bytes to check EXE signature */ + if (fread(&header, 1, 2, fp) != 2) + { + fatalError(CANNOT_READ, filename); + } + + if (! (prog.fCOM = (boolT)(header.sigLo != 0x4D || header.sigHi != 0x5A))) { + /* Read rest of header */ + fseek(fp, 0, SEEK_SET); + if (fread(&header, sizeof(header), 1, fp) != 1) + { + fatalError(CANNOT_READ, filename); + } + + /* This is a typical DOS kludge! */ + if (LH(&header.relocTabOffset) == 0x40) + { + fatalError(NEWEXE_FORMAT); + } + + /* Calculate the load module size. + * This is the number of pages in the file + * less the length of the header and reloc table + * less the number of bytes unused on last page + */ + cb = (dword)LH(&header.numPages) * 512 - (dword)LH(&header.numParaHeader) * 16; + if (header.lastPageSize) + { + cb -= 512 - LH(&header.lastPageSize); + } + + /* We quietly ignore minAlloc and maxAlloc since for our + * purposes it doesn't really matter where in real memory + * the program would end up. EXE programs can't really rely on + * their load location so setting the PSP segment to 0 is fine. + * Certainly programs that prod around in DOS or BIOS are going + * to have to load DS from a constant so it'll be pretty + * obvious. + */ + prog.initCS = (int16)LH(&header.initCS) + EXE_RELOCATION; + prog.initIP = (int16)LH(&header.initIP); + prog.initSS = (int16)LH(&header.initSS) + EXE_RELOCATION; + prog.initSP = (int16)LH(&header.initSP); + prog.cReloc = (int16)LH(&header.numReloc); + + /* Allocate the relocation table */ + if (prog.cReloc) + { + prog.relocTable = (dword*)allocMem(prog.cReloc * sizeof(Int)); + fseek(fp, LH(&header.relocTabOffset), SEEK_SET); + + /* Read in seg:offset pairs and convert to Image ptrs */ + for (i = 0; i < prog.cReloc; i++) + { + fread(buf, 1, 4, fp); + prog.relocTable[i] = LH(buf) + + (((Int)LH(buf+2) + EXE_RELOCATION)<<4); + } + } + /* Seek to start of image */ + fseek(fp, (Int)LH(&header.numParaHeader) * 16, SEEK_SET); + } + else + { /* COM file + * In this case the load module size is just the file length + */ + fseek(fp, 0, SEEK_END); + cb = ftell(fp); + + /* COM programs start off with an ORG 100H (to leave room for a PSP) + * This is also the implied start address so if we load the image + * at offset 100H addresses should all line up properly again. + */ + prog.initCS = 0; + prog.initIP = 0x100; + prog.initSS = 0; + prog.initSP = 0xFFFE; + prog.cReloc = 0; + + fseek(fp, 0, SEEK_SET); + } + + /* Allocate a block of memory for the program. */ + prog.cbImage = cb + sizeof(PSP); + prog.Image = (byte*)allocMem(prog.cbImage); + prog.Image[0] = 0xCD; /* Fill in PSP Int 20h location */ + prog.Image[1] = 0x20; /* for termination checking */ + + /* Read in the image past where a PSP would go */ +#ifdef __DOSWIN__ + if (cb > 0xFFFF) + { + printf("Image size of %ld bytes too large for fread!\n", cb); + fatalError(CANNOT_READ, filename); + } +#endif + if (cb != (Int)fread(prog.Image + sizeof(PSP), 1, (size_t)cb, fp)) + { + fatalError(CANNOT_READ, filename); + } + + /* Set up memory map */ + cb = (prog.cbImage + 3) / 4; + prog.map = (byte *)memset(allocMem(cb), BM_UNKNOWN, (size_t)cb); + + /* Relocate segment constants */ + if (prog.cReloc) + { + for (i = 0; i < prog.cReloc; i++) + { + byte *p = &prog.Image[prog.relocTable[i]]; + word w = (word)LH(p) + EXE_RELOCATION; + *p++ = (byte)(w & 0x00FF); + *p = (byte)((w & 0xFF00) >> 8); + } + } + + fclose(fp); +} + + +/***************************************************************************** + * allocMem - malloc with failure test + ****************************************************************************/ +void *allocMem(Int cb) +{ + byte *p; + + //printf("Attempt to allocMem %5ld bytes\n", cb); + + if (! (p = (byte*)malloc((size_t)cb))) + /* if (! (p = (byte*)calloc((size_t)cb, 1))) */ + { + fatalError(MALLOC_FAILED, cb); + } + /*printf("allocMem: %p\n", p);/**/ + return p; +} + + +/***************************************************************************** + * reallocVar - reallocs extra variable space + ****************************************************************************/ +void *reallocVar(void *p, Int newsize) +{ + /*printf("Attempt to reallocVar %5d bytes\n", newsize);/**/ + if (! (p = realloc((byte *)p, (size_t)newsize))) + { + fatalError(MALLOC_FAILED, newsize); + } + + /*printf("reallocVar: %p\n", p);/**/ + return p; +} + +#if 0 +void free(void *p) +{ + _ffree(p); + switch (_heapset('Z')) + { + case _HEAPBADBEGIN: printf("f: Bad heap begin\n"); getchar(); break; + case _HEAPBADNODE: printf("f: Bad heap node\n"); getchar(); break; + case _HEAPEMPTY: printf("f: Heap empty\n"); getchar(); break; + case _HEAPOK:putchar('!');break; + }/**/ +} +#endif + diff --git a/src/graph.cpp b/src/graph.cpp new file mode 100644 index 0000000..e354d47 --- /dev/null +++ b/src/graph.cpp @@ -0,0 +1,379 @@ +/***************************************************************************** + * dcc project CFG related functions + * (C) Cristina Cifuentes + ****************************************************************************/ + +#include "dcc.h" +#include +#if __BORLAND__ +#include +#else +#include /* For free() */ +#endif +#include "graph.h" + +static BB * rmJMP(Function * pProc, Int marker, BB * pBB); +static void mergeFallThrough(Function * pProc, BB * pBB); +static void dfsNumbering(BB * pBB, std::vector &dfsLast, Int *first, Int *last); + +/***************************************************************************** + * createCFG - Create the basic control flow graph + ****************************************************************************/ +void Function::createCFG() +{ + /* Splits Icode associated with the procedure into Basic Blocks. + * The links between BBs represent the control flow graph of the + * procedure. + * A Basic Block is defined to end on one of the following instructions: + * 1) Conditional and unconditional jumps + * 2) CALL(F) + * 3) RET(F) + * 4) On the instruction before a join (a flagged TARGET) + * 5) Repeated string instructions + * 6) End of procedure + */ + Int i; + Int ip, start; + BB * psBB; + BB * pBB; + ICODE * pIcode = Icode.GetFirstIcode(); + + stats.numBBbef = stats.numBBaft = 0; + for (ip = start = 0; Icode.IsValid(pIcode); ip++, pIcode++) + { + /* Stick a NOWHERE_NODE on the end if we terminate + * with anything other than a ret, jump or terminate */ + if (ip + 1 == Icode.GetNumIcodes() && + ! (pIcode->ic.ll.flg & TERMINATES) && + pIcode->ic.ll.opcode != iJMP && pIcode->ic.ll.opcode != iJMPF && + pIcode->ic.ll.opcode != iRET && pIcode->ic.ll.opcode != iRETF) + pBB=BB::Create(start, ip, NOWHERE_NODE, 0, this); + + /* Only process icodes that have valid instructions */ + else if ((pIcode->ic.ll.flg & NO_CODE) != NO_CODE) + { + switch (pIcode->ic.ll.opcode) { + case iJB: case iJBE: case iJAE: case iJA: + case iJL: case iJLE: case iJGE: case iJG: + case iJE: case iJNE: case iJS: case iJNS: + case iJO: case iJNO: case iJP: case iJNP: + case iJCXZ: + pBB = BB::Create(start, ip, TWO_BRANCH, 2, this); +CondJumps: + start = ip + 1; + pBB->edges[0].ip = (dword)start; + /* This is for jumps off into nowhere */ + if (pIcode->ic.ll.flg & NO_LABEL) + pBB->numOutEdges--; + else + pBB->edges[1].ip = pIcode->ic.ll.immed.op; + break; + + case iLOOP: case iLOOPE: case iLOOPNE: + pBB = BB::Create(start, ip, LOOP_NODE, 2, this); + goto CondJumps; + + case iJMPF: case iJMP: + if (pIcode->ic.ll.flg & SWITCH) + { + pBB = BB::Create(start, ip, MULTI_BRANCH, + pIcode->ic.ll.caseTbl.numEntries, this); + for (i = 0; i < pIcode->ic.ll.caseTbl.numEntries; i++) + pBB->edges[i].ip = pIcode->ic.ll.caseTbl.entries[i]; + hasCase = TRUE; + } + else if ((pIcode->ic.ll.flg & (I | NO_LABEL)) == I) { + pBB = BB::Create(start, ip, ONE_BRANCH, 1, this); + pBB->edges[0].ip = pIcode->ic.ll.immed.op; + } + else + BB::Create(start, ip, NOWHERE_NODE, 0, this); + start = ip + 1; + break; + + case iCALLF: case iCALL: + { + Function * p = pIcode->ic.ll.immed.proc.proc; + if (p) + i = ((p->flg) & TERMINATES) ? 0 : 1; + else + i = 1; + pBB = BB::Create(start, ip, CALL_NODE, i, this); + start = ip + 1; + if (i) + pBB->edges[0].ip = (dword)start; + } + break; + + case iRET: case iRETF: + BB::Create(start, ip, RETURN_NODE, 0, this); + start = ip + 1; + break; + + default: + /* Check for exit to DOS */ + if (pIcode->ic.ll.flg & TERMINATES) + { + pBB = BB::Create(start, ip, TERMINATE_NODE, 0, this); + start = ip + 1; + } + /* Check for a fall through */ + else if (Icode.GetFirstIcode()[ip + 1].ic.ll.flg & (TARGET | CASE)) + { + pBB = BB::Create(start, ip, FALL_NODE, 1, this); + start = ip + 1; + pBB->edges[0].ip = (dword)start; + } + break; + } + } + } + std::vector::iterator iter=heldBBs.begin(); + /* Convert list of BBs into a graph */ + for (; iter!=heldBBs.end(); ++iter) + { + pBB = *iter; + for (i = 0; i < pBB->numOutEdges; i++) + { + ip = pBB->edges[i].ip; + if (ip >= SYNTHESIZED_MIN) + fatalError (INVALID_SYNTHETIC_BB); + else + { + auto iter2=std::find_if(heldBBs.begin(),heldBBs.end(), + [ip](BB *psBB)->bool {return psBB->start==ip;}); + if(iter2==heldBBs.end()) + fatalError(NO_BB, ip, name); + psBB = *iter2; + pBB->edges[i].BBptr = psBB; + psBB->numInEdges++; + } + } + } +} + +void Function::markImpure() +{ + SYM * psym; + for (int i = 0; i < Icode.GetNumIcodes(); i++) + { + if (Icode.GetLlFlag(i) & (SYM_USE | SYM_DEF)) + { + psym = &symtab.sym[Icode.GetIcode(i)->ic.ll.caseTbl.numEntries]; + for (int c = (Int)psym->label; c < (Int)psym->label+psym->size; c++) + { + if (BITMAP(c, BM_CODE)) + { + Icode.SetLlFlag(i, IMPURE); + flg |= IMPURE; + break; + } + } + } + } + +} + + + +/***************************************************************************** + * newBB - Allocate new BB and link to end of list + *****************************************************************************/ + +/***************************************************************************** + * freeCFG - Deallocates a cfg + ****************************************************************************/ +void Function::freeCFG() +{ + std::for_each(heldBBs.begin(),heldBBs.end(),[](BB *p)->void {delete p;}); +} + + +/***************************************************************************** + * compressCFG - Remove redundancies and add in-edge information + ****************************************************************************/ +void Function::compressCFG() +{ + BB * pBB, *pNxt; + Int ip, first=0, last, i; + + /* First pass over BB list removes redundant jumps of the form + * (Un)Conditional -> Unconditional jump */ + std::vector::iterator iter=cfg.begin(); + for (;iter!=cfg.end(); ++iter) + { + pBB = *iter; + pBB->inEdges.resize(pBB->numInEdges,0); + if (pBB->numInEdges != 0 && (pBB->nodeType == ONE_BRANCH || pBB->nodeType == TWO_BRANCH)) + for (i = 0; i < pBB->numOutEdges; i++) + { + ip = pBB->start + pBB->length - 1; + pNxt = rmJMP(this, ip, pBB->edges[i].BBptr); + + if (pBB->numOutEdges) /* Might have been clobbered */ + { + pBB->edges[i].BBptr = pNxt; + Icode.SetImmediateOp(ip, (dword)pNxt->start); + } + } + } + + /* Next is a depth-first traversal merging any FALL_NODE or + * ONE_BRANCH that fall through to a node with that as their only + * in-edge. */ + this->cfg.front()->mergeFallThrough(Icode); + + /* Remove redundant BBs created by the above compressions + * and allocate in-edge arrays as required. */ + stats.numBBaft = stats.numBBbef; + + for(auto iter=cfg.begin(); iter!=cfg.end(); ++iter) + { + pBB = *iter; + if (pBB->numInEdges == 0) + { + if (iter == cfg.begin()) /* Init it misses out on */ + pBB->index = UN_INIT; + else + { + if (pBB->numOutEdges) + pBB->edges.clear(); + delete pBB; + stats.numBBaft--; + } + } + else + { + pBB->inEdgeCount = pBB->numInEdges; + } + } + + /* Allocate storage for dfsLast[] array */ + numBBs = stats.numBBaft; + dfsLast.resize(numBBs,0); // = (BB **)allocMem(numBBs * sizeof(BB *)) + + /* Now do a dfs numbering traversal and fill in the inEdges[] array */ + last = numBBs - 1; + cfg.front()->dfsNumbering(dfsLast, &first, &last); +} + + +/**************************************************************************** + * rmJMP - If BB addressed is just a JMP it is replaced with its target + ***************************************************************************/ +static BB * rmJMP(Function * pProc, Int marker, BB * pBB) +{ + marker += DFS_JMP; + + while (pBB->nodeType == ONE_BRANCH && pBB->length == 1) { + if (pBB->traversed != marker) { + pBB->traversed = marker; + if (--pBB->numInEdges) + pBB->edges[0].BBptr->numInEdges++; + else + { + pProc->Icode.SetLlFlag(pBB->start, NO_CODE); + pProc->Icode.SetLlInvalid(pBB->start, TRUE); + } + + pBB = pBB->edges[0].BBptr; + } + else { /* We are going around in circles */ + pBB->nodeType = NOWHERE_NODE; + pProc->Icode.GetIcode(pBB->start)->ic.ll.immed.op = (dword)pBB->start; + pProc->Icode.SetImmediateOp(pBB->start, (dword)pBB->start); + do { + pBB = pBB->edges[0].BBptr; + if (! --pBB->numInEdges) + { + pProc->Icode.SetLlFlag(pBB->start, NO_CODE); + pProc->Icode.SetLlInvalid(pBB->start, TRUE); + } + } while (pBB->nodeType != NOWHERE_NODE); + + pBB->edges.clear(); + pBB->numOutEdges = 0; + } + } + return pBB; +} + + +/***************************************************************************** + * mergeFallThrough + ****************************************************************************/ +void BB::mergeFallThrough( CIcodeRec &Icode) +{ + BB * pChild; + Int i, _ip; + + if (!this) + { + printf("mergeFallThrough on empty BB!\n"); + } + while (nodeType == FALL_NODE || nodeType == ONE_BRANCH) + { + pChild = edges[0].BBptr; + /* Jump to next instruction can always be removed */ + if (nodeType == ONE_BRANCH) + { + _ip = start + length; + for (i = _ip; i < pChild->start && (Icode.GetLlFlag(i) & NO_CODE); i++); + if (i != pChild->start) + break; + Icode.SetLlFlag(_ip - 1, NO_CODE); + Icode.SetLlInvalid(_ip - 1, TRUE); + nodeType = FALL_NODE; + length--; + + } + /* If there's no other edges into child can merge */ + if (pChild->numInEdges != 1) + break; + + nodeType = pChild->nodeType; + length = pChild->start + pChild->length - start; + Icode.ClearLlFlag(pChild->start, TARGET); + numOutEdges = pChild->numOutEdges; + edges.swap(pChild->edges); + + pChild->numOutEdges = pChild->numInEdges = 0; + pChild->edges.clear(); + } + traversed = DFS_MERGE; + + /* Process all out edges recursively */ + for (i = 0; i < numOutEdges; i++) + if (edges[i].BBptr->traversed != DFS_MERGE) + edges[i].BBptr->mergeFallThrough(Icode); +} + + +/***************************************************************************** + * dfsNumbering - Numbers nodes during first and last visits and determine + * in-edges + ****************************************************************************/ +void BB::dfsNumbering(std::vector &dfsLast, Int *first, Int *last) +{ + BB * pChild; + byte i; + + traversed = DFS_NUM; + dfsFirstNum = (*first)++; + + /* index is being used as an index to inEdges[]. */ + for (i = 0; i < numOutEdges; i++) + { + pChild = edges[i].BBptr; + pChild->inEdges[pChild->index++] = this; + + /* Is this the last visit? */ + if (pChild->index == pChild->numInEdges) + pChild->index = UN_INIT; + + if (pChild->traversed != DFS_NUM) + pChild->dfsNumbering(dfsLast, first, last); + } + dfsLastNum = *last; + dfsLast[(*last)--] = this; +} diff --git a/src/hlicode.cpp b/src/hlicode.cpp new file mode 100644 index 0000000..6deb4cf --- /dev/null +++ b/src/hlicode.cpp @@ -0,0 +1,501 @@ +/* + * File: hlIcode.c + * Purpose: High-level icode routines + * Date: September-October 1993 + * (C) Cristina Cifuentes + */ +#include +#include +#include +#include +#include "dcc.h" +using namespace std; +#define ICODE_DELTA 25 + +/* Masks off bits set by duReg[] */ +dword maskDuReg[] = { 0x00, + 0xFEEFFE, 0xFDDFFD, 0xFBB00B, 0xF77007, /* word regs */ + 0xFFFFEF, 0xFFFFDF, 0xFFFFBF, 0xFFFF7F, + 0xFFFEFF, 0xFFFDFF, 0xFFFBFF, 0xFFF7FF, /* seg regs */ + 0xFFEFFF, 0xFFDFFF, 0xFFBFFF, 0xFF7FFF, /* byte regs */ + 0xFEFFFF, 0xFDFFFF, 0xFBFFFF, 0xF7FFFF, + 0xEFFFFF, /* tmp reg */ + 0xFFFFB7, 0xFFFF77, 0xFFFF9F, 0xFFFF5F, /* index regs */ + 0xFFFFBF, 0xFFFF7F, 0xFFFFDF, 0xFFFFF7 }; + +static char buf[lineSize]; /* Line buffer for hl icode output */ + + + +/* Places the new HLI_ASSIGN high-level operand in the high-level icode array */ +void ICODE::setAsgn(COND_EXPR *lhs, COND_EXPR *rhs) +{ + type = HIGH_LEVEL; + ic.hl.opcode = HLI_ASSIGN; + assert(ic.hl.oper.asgn.lhs==0); //prevent memory leaks + assert(ic.hl.oper.asgn.rhs==0); //prevent memory leaks + ic.hl.oper.asgn.lhs = lhs; + ic.hl.oper.asgn.rhs = rhs; +} +/* Places the new HLI_CALL high-level operand in the high-level icode array */ +void ICODE::newCallHl() +{ + type = HIGH_LEVEL; + ic.hl.opcode = HLI_CALL; + ic.hl.oper.call.proc = ic.ll.immed.proc.proc; + ic.hl.oper.call.args = new STKFRAME; + if (ic.ll.immed.proc.cb != 0) + ic.hl.oper.call.args->cb = ic.ll.immed.proc.cb; + else + ic.hl.oper.call.args->cb =ic.hl.oper.call.proc->cbParam; +} + + +/* Places the new HLI_POP/HLI_PUSH/HLI_RET high-level operand in the high-level icode + * array */ +void ICODE::setUnary(hlIcode op, COND_EXPR *exp) +{ + assert(ic.hl.oper.exp==0); + type = HIGH_LEVEL; + ic.hl.opcode = op; + ic.hl.oper.exp = exp; +} + + +/* Places the new HLI_JCOND high-level operand in the high-level icode array */ +void ICODE::setJCond(COND_EXPR *cexp) +{ + assert(ic.hl.oper.exp==0); + type = HIGH_LEVEL; + ic.hl.opcode = HLI_JCOND; + ic.hl.oper.exp = cexp; +} + + +/* Sets the invalid field to TRUE as this low-level icode is no longer valid, + * it has been replaced by a high-level icode. */ +void ICODE ::invalidate() +{ + invalid = TRUE; +} + + +/* Removes the defined register regi from the lhs subtree. If all registers + * of this instruction are unused, the instruction is invalidated (ie. + * removed) */ +boolT removeDefRegi (byte regi, ICODE *picode, Int thisDefIdx, LOCAL_ID *locId) +{ Int numDefs; + + numDefs = picode->du1.numRegsDef; + if (numDefs == thisDefIdx) + for ( ; numDefs > 0; numDefs--) + { + if ((picode->du1.idx[numDefs-1][0] != 0)||(picode->du.lastDefRegi)) + break; + } + + if (numDefs == 0) + { + picode->invalidate(); + return (TRUE); + } + else + { + switch (picode->ic.hl.opcode) { + case HLI_ASSIGN: removeRegFromLong (regi, locId, + picode->ic.hl.oper.asgn.lhs); + picode->du1.numRegsDef--; + picode->du.def &= maskDuReg[regi]; + break; + case HLI_POP: + case HLI_PUSH: removeRegFromLong (regi, locId, picode->ic.hl.oper.exp); + picode->du1.numRegsDef--; + picode->du.def &= maskDuReg[regi]; + break; + } + return (FALSE); + } +} + + +/* Translates LOW_LEVEL icodes to HIGH_LEVEL icodes - 1st stage. + * Note: this process should be done before data flow analysis, which + * refines the HIGH_LEVEL icodes. */ +void Function::highLevelGen() +{ Int i, /* idx into icode array */ + numIcode; /* number of icode instructions */ + ICODE * pIcode; /* ptr to current icode node */ + COND_EXPR *lhs, *rhs; /* left- and right-hand side of expression */ + flags32 flg; /* icode flags */ + + numIcode = Icode.GetNumIcodes(); + for (i = 0; i < numIcode; i++) + { + pIcode = Icode.GetIcode(i); + if ((pIcode->ic.ll.flg & NOT_HLL) == NOT_HLL) + pIcode->invalidate(); + if ((pIcode->type == LOW_LEVEL) && (pIcode->invalid == FALSE)) + { + flg = pIcode->ic.ll.flg; + if ((flg & IM_OPS) != IM_OPS) /* not processing IM_OPS yet */ + if ((flg & NO_OPS) != NO_OPS) /* if there are opers */ + { + if ((flg & NO_SRC) != NO_SRC) /* if there is src op */ + rhs = COND_EXPR::id (pIcode, SRC, this, i, pIcode, NONE); + lhs = COND_EXPR::id (pIcode, DST, this, i, pIcode, NONE); + } + + switch (pIcode->ic.ll.opcode) { + case iADD: rhs = COND_EXPR::boolOp (lhs, rhs, ADD); + pIcode->setAsgn(lhs, rhs); + break; + + case iAND: rhs = COND_EXPR::boolOp (lhs, rhs, AND); + pIcode->setAsgn(lhs, rhs); + break; + + case iCALL: + case iCALLF: pIcode->newCallHl(); + break; + + case iDEC: + rhs = COND_EXPR::idKte (1, 2); + rhs = COND_EXPR::boolOp (lhs, rhs, SUB); + pIcode->setAsgn(lhs, rhs); + break; + + case iDIV: + case iIDIV:/* should be signed div */ + rhs = COND_EXPR::boolOp (lhs, rhs, DIV); + if (pIcode->ic.ll.flg & B) + { + lhs = COND_EXPR::idReg (rAL, 0, &localId); + pIcode->setRegDU( rAL, eDEF); + } + else + { + lhs = COND_EXPR::idReg (rAX, 0, &localId); + pIcode->setRegDU( rAX, eDEF); + } + pIcode->setAsgn(lhs, rhs); + break; + + case iIMUL: rhs = COND_EXPR::boolOp (lhs, rhs, MUL); + lhs = COND_EXPR::id (pIcode, LHS_OP, this, i, pIcode, + NONE); + pIcode->setAsgn(lhs, rhs); + break; + + case iINC: rhs = COND_EXPR::idKte (1, 2); + rhs = COND_EXPR::boolOp (lhs, rhs, ADD); + pIcode->setAsgn(lhs, rhs); + break; + + case iLEA: rhs = COND_EXPR::unary (ADDRESSOF, rhs); + pIcode->setAsgn(lhs, rhs); + break; + + case iMOD: rhs = COND_EXPR::boolOp (lhs, rhs, MOD); + if (pIcode->ic.ll.flg & B) + { + lhs = COND_EXPR::idReg (rAH, 0, &localId); + pIcode->setRegDU( rAH, eDEF); + } + else + { + lhs = COND_EXPR::idReg (rDX, 0, &localId); + pIcode->setRegDU( rDX, eDEF); + } + pIcode->setAsgn(lhs, rhs); + break; + + case iMOV: pIcode->setAsgn(lhs, rhs); + break; + + case iMUL: rhs = COND_EXPR::boolOp (lhs, rhs, MUL); + lhs = COND_EXPR::id (pIcode, LHS_OP, this, i, pIcode, + NONE); + pIcode->setAsgn(lhs, rhs); + break; + + case iNEG: rhs = COND_EXPR::unary (NEGATION, lhs); + pIcode->setAsgn(lhs, rhs); + break; + + case iNOT: rhs = COND_EXPR::boolOp (NULL, rhs, NOT); + pIcode->setAsgn(lhs, rhs); + break; + + case iOR: rhs = COND_EXPR::boolOp (lhs, rhs, OR); + pIcode->setAsgn(lhs, rhs); + break; + + case iPOP: pIcode->setUnary(HLI_POP, lhs); + break; + + case iPUSH: pIcode->setUnary(HLI_PUSH, lhs); + break; + + case iRET: + case iRETF: pIcode->setUnary(HLI_RET, NULL); + break; + + case iSHL: rhs = COND_EXPR::boolOp (lhs, rhs, SHL); + pIcode->setAsgn(lhs, rhs); + break; + + case iSAR: /* signed */ + case iSHR: rhs = COND_EXPR::boolOp (lhs, rhs, SHR); /* unsigned*/ + pIcode->setAsgn(lhs, rhs); + break; + + case iSIGNEX: pIcode->setAsgn(lhs, rhs); + break; + + case iSUB: rhs = COND_EXPR::boolOp (lhs, rhs, SUB); + pIcode->setAsgn(lhs, rhs); + break; + + case iXCHG: + break; + + case iXOR: rhs = COND_EXPR::boolOp (lhs, rhs, XOR); + pIcode->setAsgn(lhs, rhs); + break; + } + } + + } + +} + + +/* Modifies the given conditional operator to its inverse. This is used + * in if..then[..else] statements, to reflect the condition that takes the + * then part. */ +void inverseCondOp (COND_EXPR **exp) +{ + static condOp invCondOp[] = {GREATER, GREATER_EQUAL, NOT_EQUAL, EQUAL, + LESS_EQUAL, LESS, DUMMY,DUMMY,DUMMY,DUMMY, + DUMMY, DUMMY, DUMMY, DUMMY, DUMMY, DUMMY, + DUMMY, DBL_OR, DBL_AND}; + if (*exp == NULL) + return; + + if ((*exp)->type == BOOLEAN_OP) + { + switch ((*exp)->expr.boolExpr.op) + { + case LESS_EQUAL: case LESS: case EQUAL: + case NOT_EQUAL: case GREATER: case GREATER_EQUAL: + (*exp)->expr.boolExpr.op = invCondOp[(*exp)->expr.boolExpr.op]; + break; + + case AND: case OR: case XOR: case NOT: case ADD: + case SUB: case MUL: case DIV: case SHR: case SHL: case MOD: + *exp = COND_EXPR::unary (NEGATION, *exp); + break; + + case DBL_AND: case DBL_OR: + (*exp)->expr.boolExpr.op = invCondOp[(*exp)->expr.boolExpr.op]; + inverseCondOp (&(*exp)->expr.boolExpr.lhs); + inverseCondOp (&(*exp)->expr.boolExpr.rhs); + break; + } /* eos */ + + } + else if ((*exp)->type == NEGATION) //TODO: memleak here + *exp = (*exp)->expr.unaryExp; + + /* other types are left unmodified */ +} + + +/* Returns the string that represents the procedure call of tproc (ie. with + * actual parameters) */ +std::string writeCall (Function * tproc, STKFRAME * args, Function * pproc, Int *numLoc) +{ + Int i; /* counter of # arguments */ + string condExp; + ostringstream s; + s<name<<" ("; + for (i = 0; i < args->sym.size(); i++) + { + s << walkCondExpr (args->sym[i].actual, pproc, numLoc); + if (i < (args->sym.size() - 1)) + s << ", "; + } + s << ")"; + return s.str(); +} + + +/* Displays the output of a HLI_JCOND icode. */ +char *writeJcond (HLTYPE h, Function * pProc, Int *numLoc) +{ + memset (buf, ' ', sizeof(buf)); + buf[0] = '\0'; + strcat (buf, "if "); + inverseCondOp (&h.oper.exp); + std::string e = walkCondExpr (h.oper.exp, pProc, numLoc); + strcat (buf, e.c_str()); + strcat (buf, " {\n"); + return (buf); +} + + +/* Displays the inverse output of a HLI_JCOND icode. This is used in the case + * when the THEN clause of an if..then..else is empty. The clause is + * negated and the ELSE clause is used instead. */ +char *writeJcondInv (HLTYPE h, Function * pProc, Int *numLoc) +{ + memset (buf, ' ', sizeof(buf)); + buf[0] = '\0'; + strcat (buf, "if "); + std::string e = walkCondExpr (h.oper.exp, pProc, numLoc); + strcat (buf, e.c_str()); + strcat (buf, " {\n"); + return (buf); +} + + +/* Returns a string with the contents of the current high-level icode. + * Note: this routine does not output the contens of HLI_JCOND icodes. This is + * done in a separate routine to be able to support the removal of + * empty THEN clauses on an if..then..else. */ +char *write1HlIcode (HLTYPE h, Function * pProc, Int *numLoc) +{ + std::string e; + + memset (buf, ' ', sizeof(buf)); + buf[0] = '\0'; + switch (h.opcode) { + case HLI_ASSIGN: + e = walkCondExpr (h.oper.asgn.lhs, pProc, numLoc); + strcat (buf, e.c_str()); + strcat (buf, " = "); + e = walkCondExpr (h.oper.asgn.rhs, pProc, numLoc); + strcat (buf, e.c_str()); + strcat (buf, ";\n"); + break; + case HLI_CALL: + e = writeCall (h.oper.call.proc, h.oper.call.args, pProc, + numLoc); + strcat (buf, e.c_str()); + strcat (buf, ";\n"); + break; + case HLI_RET: + e = walkCondExpr (h.oper.exp, pProc, numLoc); + if (! e.empty()) + { + strcat (buf, "return ("); + strcat (buf, e.c_str()); + strcat (buf, ");\n"); + } + break; + case HLI_POP: + strcat (buf, "HLI_POP "); + e = walkCondExpr (h.oper.exp, pProc, numLoc); + strcat (buf, e.c_str()); + strcat (buf, "\n"); + break; + case HLI_PUSH: strcat (buf, "HLI_PUSH "); + e = walkCondExpr (h.oper.exp, pProc, numLoc); + strcat (buf, e.c_str()); + strcat (buf, "\n"); + break; + } + return (buf); +} + + +Int power2 (Int i) +/* Returns the value of 2 to the power of i */ +{ + if (i == 0) + return (1); + return (2 << (i-1)); +} + + +/* Writes the registers/stack variables that are used and defined by this + * instruction. */ +void ICODE::writeDU(Int idx) +{ + static char buf[100]; + Int i, j; + + memset (buf, ' ', sizeof(buf)); + buf[0] = '\0'; + for (i = 0; i < (INDEXBASE-1); i++) + { + if ((du.def & power2(i)) != 0) + { + strcat (buf, allRegs[i]); + strcat (buf, " "); + } + } + if (buf[0] != '\0') + printf ("Def (reg) = %s\n", buf); + + memset (buf, ' ', sizeof(buf)); + buf[0] = '\0'; + for (i = 0; i < INDEXBASE; i++) + { + if ((du.use & power2(i)) != 0) + { + strcat (buf, allRegs[i]); + strcat (buf, " "); + } + } + if (buf[0] != '\0') + printf ("Use (reg) = %s\n", buf); + + /* Print du1 chain */ + printf ("# regs defined = %d\n", du1.numRegsDef); + for (i = 0; i < MAX_REGS_DEF; i++) + { + if (du1.idx[i][0] != 0) + { + printf ("%d: du1[%d][] = ", idx, i); + for (j = 0; j < MAX_USES; j++) + { + if (du1.idx[i][j] == 0) + break; + printf ("%d ", du1.idx[i][j]); + } + printf ("\n"); + } + } + + /* For HLI_CALL, print # parameter bytes */ + if (ic.hl.opcode == HLI_CALL) + printf ("# param bytes = %d\n", ic.hl.oper.call.args->cb); + printf ("\n"); +} + + +/* Frees the storage allocated to h->hlIcode */ +void freeHlIcode (ICODE * icode, Int numIcodes) +{ + Int i; + HLTYPE h; + + for (i = 0; i < numIcodes; i++) + { + h = icode[i].ic.hl; + switch (h.opcode) + { + case HLI_ASSIGN: + h.oper.asgn.lhs->release(); + h.oper.asgn.rhs->release(); + break; + case HLI_POP: + case HLI_PUSH: + case HLI_JCOND: + h.oper.exp->release(); + break; + } + } +} + diff --git a/src/icode.cpp b/src/icode.cpp new file mode 100644 index 0000000..bd81130 --- /dev/null +++ b/src/icode.cpp @@ -0,0 +1,123 @@ +// Object oriented icode code for dcc +// (C) 1997 Mike Van Emmerik + +#include +#include +#include + +#include "types.h" // Common types like byte, etc +#include "ast.h" // Some icode types depend on these +#include "icode.h" + +#define ICODE_DELTA 25 // Amount to allocate for new chunk + + +CIcodeRec::CIcodeRec() +{ +} + +CIcodeRec::~CIcodeRec() +{ +} + +/* Copies the icode that is pointed to by pIcode to the icode array. + * If there is need to allocate extra memory, it is done so, and + * the alloc variable is adjusted. */ +ICODE * CIcodeRec::addIcode(ICODE *pIcode) +{ + push_back(*pIcode); + return &back(); +} + +ICODE * CIcodeRec::GetFirstIcode() +{ + return &front(); +} + +/* Don't need this; just pIcode++ since array is guaranteed to be contiguous +ICODE * CIcodeRec::GetNextIcode(ICODE * pCurIcode) +{ + int idx = pCurIcode - icode; // Current index + ASSERT(idx+1 < numIcode); + return &icode[idx+1]; +} +*/ + +boolT CIcodeRec::IsValid(ICODE *pCurIcode) +{ + ptrdiff_t idx = pCurIcode - &this->front(); // Current index + return (idx>=0) && (idx < size()); +} + +int CIcodeRec::GetNumIcodes() +{ + return size(); +} + +void CIcodeRec::SetInBB(int start, int end, BB *pnewBB) +{ + for (int i = start; i <= end; i++) + at(i).inBB = pnewBB; +} + +void CIcodeRec::SetImmediateOp(int ip, dword dw) +{ + at(ip).ic.ll.immed.op = dw; +} + +void CIcodeRec::SetLlFlag(int ip, dword flag) +{ + at(ip).ic.ll.flg |= flag; +} + +dword CIcodeRec::GetLlFlag(int ip) +{ + return at(ip).ic.ll.flg; +} + +void CIcodeRec::ClearLlFlag(int ip, dword flag) +{ + at(ip).ic.ll.flg &= (~flag); +} + +void CIcodeRec::SetLlInvalid(int ip, boolT fInv) +{ + at(ip).invalid = fInv; +} + +dword CIcodeRec::GetLlLabel(int ip) +{ + return at(ip).ic.ll.label; +} + +llIcode CIcodeRec::GetLlOpcode(int ip) +{ + return at(ip).ic.ll.opcode; +} + + +/* labelSrchRepl - Searches the icodes for instruction with label = target, and + replaces *pIndex with an icode index */ +boolT CIcodeRec::labelSrch(dword target, Int *pIndex) +{ + Int i; + + for (i = 0; i < size(); i++) + { + if (at(i).ic.ll.label == target) + { + *pIndex = i; + return TRUE; + } + } + return FALSE; +} + +ICODE * CIcodeRec::GetIcode(int ip) +{ + return &at(ip); +} + + + + diff --git a/src/idioms.cpp b/src/idioms.cpp new file mode 100644 index 0000000..e8f10f1 --- /dev/null +++ b/src/idioms.cpp @@ -0,0 +1,1344 @@ +/***************************************************************************** + * dcc project machine idiom recognition + * (C) Cristina Cifuentes + ****************************************************************************/ + +#include "dcc.h" +#include +#ifdef __DOSWIN__ +#include +#endif + + +/***************************************************************************** + * JmpInst - Returns TRUE if opcode is a conditional or unconditional jump + ****************************************************************************/ +boolT JmpInst(llIcode opcode) +{ + switch (opcode) { + case iJMP: case iJMPF: case iJCXZ: + case iLOOP: case iLOOPE:case iLOOPNE: + case iJB: case iJBE: case iJAE: case iJA: + case iJL: case iJLE: case iJGE: case iJG: + case iJE: case iJNE: case iJS: case iJNS: + case iJO: case iJNO: case iJP: case iJNP: + return TRUE; + } + return FALSE; +} + + +/***************************************************************************** +/* checkStkVars - Checks for PUSH SI + * [PUSH DI] + * or PUSH DI + * [PUSH SI] + * In which case, the stack variable flags are set + ****************************************************************************/ +static Int checkStkVars (ICODE * pIcode, ICODE * pEnd, Function * pProc) +{ + /* Look for PUSH SI */ + if ((pIcode < pEnd) && (pIcode->ic.ll.opcode == iPUSH) && + (pIcode->ic.ll.dst.regi == rSI)) + { + pProc->flg |= SI_REGVAR; + + /* Look for PUSH DI */ + if (++pIcode < pEnd && (pIcode->ic.ll.opcode == iPUSH) && + (pIcode->ic.ll.dst.regi == rDI)) + { + pProc->flg |= DI_REGVAR; + return 2; + } + else + return 1; + } + else if ((pIcode < pEnd) && (pIcode->ic.ll.opcode == iPUSH) && + (pIcode->ic.ll.dst.regi == rDI)) + { + pProc->flg |= DI_REGVAR; + + /* Look for PUSH SI */ + if ((++pIcode < pEnd) && (pIcode->ic.ll.opcode == iPUSH) && + (pIcode->ic.ll.dst.regi == rSI)) + { + pProc->flg |= SI_REGVAR; + return 2; + } + else + return 1; + } + return 0; +} + + +/***************************************************************************** + * idiom1 - HLL procedure prologue; Returns number of instructions matched. + * PUSH BP ==> ENTER immed, 0 + * MOV BP, SP and sets PROC_HLL flag + * [SUB SP, immed] + * [PUSH SI] + * [PUSH DI] + * - Second version: Push stack variables and then save BP + * PUSH BP + * PUSH SI + * [PUSH DI] + * MOV BP, SP + * - Third version: Stack variables + * [PUSH SI] + * [PUSH DI] + ****************************************************************************/ +static Int idiom1(ICODE * pIcode, ICODE * pEnd, Function * pProc) +{ Int n; + + /* PUSH BP as first instruction of procedure */ + if ( !(pIcode->ic.ll.flg & I) && pIcode->ic.ll.src.regi == rBP) + { + /* MOV BP, SP as next instruction */ + if (++pIcode < pEnd && ! (pIcode->ic.ll.flg & (I | TARGET | CASE)) + && pIcode->ic.ll.opcode == iMOV && pIcode->ic.ll.dst.regi == rBP + && pIcode->ic.ll.src.regi == rSP) + { + pProc->args.minOff = 2; + pProc->flg |= PROC_IS_HLL; + + /* Look for SUB SP, immed */ + if ((++pIcode < pEnd) && + (pIcode->ic.ll.flg & (I | TARGET | CASE)) == I && + pIcode->ic.ll.opcode == iSUB && pIcode->ic.ll.dst.regi == rSP) + { + return (3 + checkStkVars (++pIcode, pEnd, pProc)); + } + else + return (2 + checkStkVars (pIcode, pEnd, pProc)); + } + + /* PUSH SI + * [PUSH DI] + * MOV BP, SP */ + else + { + n = checkStkVars (pIcode, pEnd, pProc); + if (n > 0) + { + /* Look for MOV BP, SP */ + pIcode += n; + if (pIcode < pEnd && + ! (pIcode->ic.ll.flg & (I | TARGET | CASE)) && + pIcode->ic.ll.opcode == iMOV && + pIcode->ic.ll.dst.regi == rBP && + pIcode->ic.ll.src.regi == rSP) + { + pProc->args.minOff = 2 + (n * 2); + return (2 + n); + } + else return 0; // Cristina: check this please! + } + else return 0; // Cristina: check this please! + } + } + else + return (checkStkVars (pIcode, pEnd, pProc)); +} + + +/***************************************************************************** + * popStkVars - checks for + * [POP DI] + * [POP SI] + * or [POP SI] + * [POP DI] + ****************************************************************************/ +static void popStkVars (ICODE * pIcode, ICODE * pEnd, Function * pProc) +{ + /* Match [POP DI] */ + if (pIcode->ic.ll.opcode == iPOP) + if ((pProc->flg & DI_REGVAR) && (pIcode->ic.ll.dst.regi == rDI)) + pIcode->invalidate(); + else if ((pProc->flg & SI_REGVAR) && (pIcode->ic.ll.dst.regi == rSI)) + pIcode->invalidate(); + + /* Match [POP SI] */ + if ((pIcode+1)->ic.ll.opcode == iPOP) + if ((pProc->flg & SI_REGVAR) && ((pIcode+1)->ic.ll.dst.regi == rSI)) + (pIcode+1)->invalidate(); + else if ((pProc->flg & DI_REGVAR) && ((pIcode+1)->ic.ll.dst.regi == rDI)) + (pIcode+1)->invalidate(); +} + + +/***************************************************************************** + * idiom2 - HLL procedure epilogue; Returns number of instructions matched. + * [POP DI] + * [POP SI] + * MOV SP, BP + * POP BP + * RET(F) + *****************************************************************************/ +static Int idiom2(ICODE * pIcode, ICODE * pEnd, Int ip, Function * pProc) +{ ICODE * nicode; + + /* Match MOV SP, BP */ + if (ip != 0 && ((pIcode->ic.ll.flg & I) != I) && + pIcode->ic.ll.dst.regi == rSP && pIcode->ic.ll.src.regi == rBP) + { + /* Get next icode, skip over holes in the icode array */ + nicode = pIcode + 1; + while (nicode->ic.ll.flg & NO_CODE) + nicode++; + + /* Match POP BP */ + if (nicode < pEnd && + ! (nicode->ic.ll.flg & (I | TARGET | CASE)) && + nicode->ic.ll.opcode == iPOP && + nicode->ic.ll.dst.regi == rBP) + { + nicode++; + + /* Match RET(F) */ + if (nicode < pEnd && + ! (nicode->ic.ll.flg & (I | TARGET | CASE)) && + (nicode->ic.ll.opcode == iRET || + nicode->ic.ll.opcode == iRETF)) + { + popStkVars (pIcode-2, pEnd, pProc); + return 2; + } + } + } + return 0; +} + + + +/***************************************************************************** + * idiom3 - C calling convention. + * CALL(F) proc_X + * ADD SP, immed + * Eg: CALL proc_X + * ADD SP, 6 + * => pProc->cbParam = immed + * Special case: when the call is at the end of the procedure, + * sometimes the stack gets restored by a MOV sp, bp. + * Need to flag the procedure in these cases. + * Used by compilers to restore the stack when invoking a procedure using + * the C calling convention. + ****************************************************************************/ +static Int idiom3(ICODE * pIcode, ICODE * pEnd) +{ + /* Match ADD SP, immed */ + if ((++pIcode < pEnd) && (pIcode->ic.ll.flg & I) && + (pIcode->ic.ll.opcode == iADD) && (pIcode->ic.ll.dst.regi == rSP)) + return (pIcode->ic.ll.immed.op); + else if ((pIcode->ic.ll.opcode == iMOV) && (pIcode->ic.ll.dst.regi == rSP) + && (pIcode->ic.ll.src.regi == rBP)) + (pIcode-1)->ic.ll.flg |= REST_STK; + return 0; +} + + +/***************************************************************************** + * idiom 17 - C calling convention. + * CALL(F) xxxx + * POP reg + * [POP reg] reg in {AX, BX, CX, DX} + * Eg: CALL proc_X + * POP cx + * POP cx (4 bytes of arguments) + * => pProc->cbParam = # pops * 2 + * Found in Turbo C when restoring the stack for a procedure that uses the + * C calling convention. Used to restore the stack of 2 or 4 bytes args. + ****************************************************************************/ +static Int idiom17 (ICODE * pIcode, ICODE * pEnd) +{ Int i = 0; /* Count on # pops */ + byte regi; + + /* Match POP reg */ + if ((++pIcode < pEnd) && (pIcode->ic.ll.opcode == iPOP)) + { + regi = pIcode->ic.ll.dst.regi; + if ((regi >= rAX) && (regi <= rBX)) + i++; + while ((++pIcode)->ic.ll.opcode == iPOP) + { + if (pIcode->ic.ll.dst.regi == regi) + i++; + else + break; + } + return (i * 2); + } + return (0); +} + + +/***************************************************************************** + * idiom4 - Pascal calling convention. + * RET(F) immed + * ==> pProc->cbParam = immed + * sets CALL_PASCAL flag + * - Second version: check for optional pop of stack vars + * [POP DI] + * [POP SI] + * POP BP + * RET(F) [immed] + * - Third version: pop stack vars + * [POP DI] + * [POP SI] + * RET(F) [immed] + ****************************************************************************/ +static void idiom4 (ICODE * pIcode, ICODE * pEnd, Function * pProc) +{ + /* Check for [POP DI] + * [POP SI] */ + popStkVars (pIcode-3, pEnd, pProc); + + /* Check for POP BP */ + if (((pIcode-1)->ic.ll.opcode == iPOP) && + (((pIcode-1)->ic.ll.flg & I) != I) && + ((pIcode-1)->ic.ll.dst.regi == rBP)) + (pIcode-1)->invalidate(); + else + popStkVars (pIcode-2, pEnd, pProc); + + /* Check for RET(F) immed */ + if (pIcode->ic.ll.flg & I) + { + pProc->cbParam = (int16)pIcode->ic.ll.immed.op; + pProc->flg |= CALL_PASCAL; + } +} + + +/***************************************************************************** + * idiom5 - Long addition. + * ADD reg/stackOff, reg/stackOff + * ADC reg/stackOff, reg/stackOff + * Eg: ADD ax, [bp-4] + * ADC dx, [bp-2] + * => dx:ax = dx:ax + [bp-2]:[bp-4] + * Found in Borland Turbo C code. + * Commonly used idiom for long addition. + ****************************************************************************/ +static boolT idiom5 (ICODE * pIcode, ICODE * pEnd) +{ + if (pIcode < pEnd) + if ((pIcode+1)->ic.ll.opcode == iADC) + return (TRUE); + return (FALSE); +} + + +/***************************************************************************** + * idiom6 - Long substraction. + * SUB reg/stackOff, reg/stackOff + * SBB reg/stackOff, reg/stackOff + * Eg: SUB ax, [bp-4] + * SBB dx, [bp-2] + * => dx:ax = dx:ax - [bp-2]:[bp-4] + * Found in Borland Turbo C code. + * Commonly used idiom for long substraction. + ****************************************************************************/ +static boolT idiom6 (ICODE * pIcode, ICODE * pEnd) +{ + if (pIcode < pEnd) + if ((pIcode+1)->ic.ll.opcode == iSBB) + return (TRUE); + return (FALSE); +} + + +/***************************************************************************** + * idiom7 - Assign zero + * XOR reg/stackOff, reg/stackOff + * Eg: XOR ax, ax + * => ax = 0 + * Found in Borland Turbo C and Microsoft C code. + ****************************************************************************/ +static boolT idiom7 (ICODE * pIcode) +{ + ICODEMEM *dst, *src; + + dst = &pIcode->ic.ll.dst; + src = &pIcode->ic.ll.src; + if (dst->regi == 0) /* global variable */ + { + if ((dst->segValue == src->segValue) && (dst->off == src->off)) + return (TRUE); + } + else if (dst->regi < INDEXBASE) /* register */ + { + if (dst->regi == src->regi) + return (TRUE); + } + else if ((dst->off) && (dst->seg == rSS) && (dst->regi == INDEXBASE + 6)) + /* offset from BP */ + { + if ((dst->off == src->off) && (dst->seg == src->seg) && + (dst->regi == src->regi)) + return (TRUE); + } + return (FALSE); +} + + +/***************************************************************************** + * idiom21 - Assign long kte with high part zero + * XOR regH, regH + * MOV regL, kte + * => regH:regL = kte + * Eg: XOR dx, dx + * MOV ax, 3 + * => dx:ax = 3 + * Note: only the following valid combinations are available: + * dx:ax + * cx:bx + * Found in Borland Turbo C code. + ****************************************************************************/ +static boolT idiom21 (ICODE * picode, ICODE * pend) +{ ICODEMEM *dst, *src; + + dst = &picode->ic.ll.dst; + src = &picode->ic.ll.src; + if (((picode+1) < pend) && ((picode+1)->ic.ll.flg & I)) + { + if ((dst->regi == src->regi) && (dst->regi > 0) && + (dst->regi < INDEXBASE)) + { + if ((dst->regi == rDX) && ((picode+1)->ic.ll.dst.regi == rAX)) + return (TRUE); + if ((dst->regi == rCX) && ((picode+1)->ic.ll.dst.regi == rBX)) + return (TRUE); + } + } + return (FALSE); +} + + +/***************************************************************************** + * idiom8 - Shift right by 1 (signed long ops) + * SAR reg, 1 + * RCR reg, 1 + * Eg: SAR dx, 1 + * RCR ax, 1 + * => dx:ax = dx:ax >> 1 (dx:ax are signed long) + * Found in Microsoft C code for long signed variable shift right. + ****************************************************************************/ +static boolT idiom8 (ICODE * pIcode, ICODE * pEnd) +{ + if (pIcode < pEnd) + { + if (((pIcode->ic.ll.flg & I) == I) && (pIcode->ic.ll.immed.op == 1)) + if (((pIcode+1)->ic.ll.opcode == iRCR) && + (((pIcode+1)->ic.ll.flg & I) == I) && + ((pIcode+1)->ic.ll.immed.op == 1)) + return (TRUE); + } + return (FALSE); +} + + +/***************************************************************************** + * idiom 15 - Shift left by n + * SHL reg, 1 + * SHL reg, 1 + * [...] + * [SHL reg, 1] + * Eg: SHL ax, 1 + * SHL ax, 1 + * => ax = ax << 2 + * Found in Borland Turbo C code to index an array (array multiplication) + ****************************************************************************/ +static Int idiom15 (ICODE * picode, ICODE * pend) +{ Int n = 1; + byte regi; + + if (picode < pend) + { + /* Match SHL reg, 1 */ + if ((picode->ic.ll.flg & I) && (picode->ic.ll.immed.op == 1)) + { + regi = picode->ic.ll.dst.regi; + while (1) + { + if (((picode+n) < pend) && + ((picode+n)->ic.ll.opcode == iSHL) && + ((picode+n)->ic.ll.flg & I) && + ((picode+n)->ic.ll.immed.op == 1) && + ((picode+n)->ic.ll.dst.regi == regi)) + n++; + else + break; + } + } + } + if (n > 1) + return (n); + else + return (0); +} + + +/***************************************************************************** + * idiom12 - Shift left long by 1 + * SHL reg, 1 + * RCL reg, 1 + * Eg: SHL ax, 1 + * RCL dx, 1 + * => dx:ax = dx:ax << 1 + * Found in Borland Turbo C code for long variable shift left. + ****************************************************************************/ +static boolT idiom12 (ICODE * pIcode, ICODE * pEnd) +{ + if (pIcode < pEnd) + { + if (((pIcode->ic.ll.flg & I) == I) && (pIcode->ic.ll.immed.op == 1)) + if (((pIcode+1)->ic.ll.opcode == iRCL) && + (((pIcode+1)->ic.ll.flg & I) == I) && + ((pIcode+1)->ic.ll.immed.op == 1)) + return (TRUE); + } + return (FALSE); +} + + +/***************************************************************************** + * idiom9 - Shift right by 1 (unsigned long ops) + * SHR reg, 1 + * RCR reg, 1 + * Eg: SHR dx, 1 + * RCR ax, 1 + * => dx:ax = dx:ax >> 1 (dx:ax are unsigned long) + * Found in Microsoft C code for long unsigned variable shift right. + ****************************************************************************/ +static boolT idiom9 (ICODE * pIcode, ICODE * pEnd) +{ + if (pIcode < pEnd) + { + if (((pIcode->ic.ll.flg & I) == I) && (pIcode->ic.ll.immed.op == 1)) + if (((pIcode+1)->ic.ll.opcode == iRCR) && + (((pIcode+1)->ic.ll.flg & I) == I) && + ((pIcode+1)->ic.ll.immed.op == 1)) + return (TRUE); + } + return (FALSE); +} + + +/***************************************************************************** + * idiom10 - Jump if not equal to 0 + * OR reg, reg + * JNE labX + * Eg: OR ax, ax + * JNE labX + * => HLI_JCOND (ax != 0) labX + * Note: we also check that these instructions are not followed by + * CMP reg, kte + * JE lab + * because this is most likely a long conditional equality test. + * Found in Borland Turbo C. + ****************************************************************************/ +static boolT idiom10old (ICODE * pIcode, ICODE * pEnd) +{ + if (pIcode < pEnd) + { + /* Check OR reg, reg */ + if (((pIcode->ic.ll.flg & I) != I) && + (pIcode->ic.ll.src. regi > 0) && + (pIcode->ic.ll.src.regi < INDEXBASE) && + (pIcode->ic.ll.src.regi == pIcode->ic.ll.dst.regi)) + if ((pIcode+3) < pEnd) + { + if (((pIcode+1)->ic.ll.opcode == iJNE) && + ((pIcode+2)->ic.ll.opcode != iCMP) && + ((pIcode+3)->ic.ll.opcode != iJE)) + return (TRUE); + } + else /* at the end of the procedure */ + if (((pIcode+1) < pEnd) && ((pIcode+1)->ic.ll.opcode == iJNE)) + return (TRUE); + } + return (FALSE); +} + + +/***************************************************************************** + * idiom10 - Jump if not equal to 0 + * OR reg, reg + * JNE labX + * Eg: OR ax, ax + * JNE labX + * => CMP reg 0 + * JNE labX + * This instruction is NOT converted into the equivalent high-level + * instruction "HLI_JCOND (reg != 0) labX" because we do not know yet if + * it forms part of a long register conditional test. It is therefore + * modified to simplify the analysis. + * Found in Borland Turbo C. + ****************************************************************************/ +static void idiom10 (ICODE * pIcode, ICODE * pEnd) +{ + if (pIcode < pEnd) + { + /* Check OR reg, reg */ + if (((pIcode->ic.ll.flg & I) != I) && + (pIcode->ic.ll.src. regi > 0) && + (pIcode->ic.ll.src.regi < INDEXBASE) && + (pIcode->ic.ll.src.regi == pIcode->ic.ll.dst.regi)) + if (((pIcode+1) < pEnd) && ((pIcode+1)->ic.ll.opcode == iJNE)) + { + pIcode->ic.ll.opcode = iCMP; + pIcode->ic.ll.flg |= I; + pIcode->ic.ll.immed.op = 0; + pIcode->du.def = 0; + pIcode->du1.numRegsDef = 0; + } + } +} + + +/***************************************************************************** + * idiom 13 - Word assign + * MOV regL, mem + * MOV regH, 0 + * Eg: MOV al, [bp-2] + * MOV ah, 0 + * => MOV ax, [bp-2] + * Found in Borland Turbo C, used for multiplication and division of + * byte operands (ie. they need to be extended to words). + ****************************************************************************/ +static byte idiom13 (ICODE * picode, ICODE * pend) +{ byte regi; + + if (picode < pend) + { + /* Check for regL */ + regi = picode->ic.ll.dst.regi; + if (((picode->ic.ll.flg & I) != I) && (regi >= rAL) && (regi <= rBH)) + { + /* Check for MOV regH, 0 */ + if (((picode+1)->ic.ll.opcode == iMOV) && + ((picode+1)->ic.ll.flg & I) && + ((picode+1)->ic.ll.immed.op == 0)) + { + if ((picode+1)->ic.ll.dst.regi == (regi + 4)) + return (regi - rAL + rAX); + } + } + } + return (0); +} + + +/***************************************************************************** + * idiom 14 - Long word assign + * MOV regL, mem/reg + * XOR regH, regH + * Eg: MOV ax, di + * XOR dx, dx + * => MOV dx:ax, di + * Note: only the following combinations are allowed: + * dx:ax + * cx:bx + * this is to remove the possibility of making errors in situations + * like this: + * MOV dx, offH + * MOV ax, offL + * XOR cx, cx + * Found in Borland Turbo C, used for division of unsigned integer + * operands. + ****************************************************************************/ +static boolT idiom14 (ICODE * picode, ICODE * pend, byte *regL, byte *regH) +{ + if (picode < pend) + { + /* Check for regL */ + *regL = picode->ic.ll.dst.regi; + if (((picode->ic.ll.flg & I) != I) && ((*regL == rAX) || (*regL ==rBX))) + { + /* Check for XOR regH, regH */ + if (((picode+1)->ic.ll.opcode == iXOR) && + (((picode+1)->ic.ll.flg & I) != I)) + { + *regH = (picode+1)->ic.ll.dst.regi; + if (*regH == (picode+1)->ic.ll.src.regi) + { + if ((*regL == rAX) && (*regH == rDX)) + return (TRUE); + if ((*regL == rBX) && (*regH == rCX)) + return (TRUE); + } + } + } + } + return (FALSE); +} + +/***************************************************************************** + * idiom11 - Negate long integer + * NEG regH + * NEG regL + * SBB regH, 0 + * Eg: NEG dx + * NEG ax + * SBB dx, 0 + * => dx:ax = - dx:ax + * Found in Borland Turbo C. + ****************************************************************************/ +static boolT idiom11 (ICODE * pIcode, ICODE * pEnd) +{ condId type; /* type of argument */ + + if ((pIcode + 2) < pEnd) + { + type = pIcode->idType(DST); + if ((type != CONSTANT) && (type != OTHER)) + { + /* Check NEG reg/mem + * SBB reg/mem, 0*/ + if (((pIcode+1)->ic.ll.opcode == iNEG) && + ((pIcode+2)->ic.ll.opcode == iSBB)) + switch (type) { + case GLOB_VAR: if (((pIcode+2)->ic.ll.dst.segValue == + pIcode->ic.ll.dst.segValue) && + ((pIcode+2)->ic.ll.dst.off == + pIcode->ic.ll.dst.off)) + return (TRUE); + break; + case REGISTER: if ((pIcode+2)->ic.ll.dst.regi == + pIcode->ic.ll.dst.regi) + return (TRUE); + break; + case PARAM: + case LOCAL_VAR: if ((pIcode+2)->ic.ll.dst.off == + pIcode->ic.ll.dst.off) + return (TRUE); + break; + } + } + } + return (FALSE); +} + + +/***************************************************************************** + * idiom 16: Bitwise negation + * NEG reg + * SBB reg, reg + * INC reg + * => ASGN reg, !reg + * Eg: NEG ax + * SBB ax, ax + * INC ax + * => ax = !ax + * Found in Borland Turbo C when negating bitwise. + ****************************************************************************/ +static boolT idiom16 (ICODE * picode, ICODE * pend) +{ byte regi; + + if ((picode+2) < pend) + { + regi = picode->ic.ll.dst.regi; + if ((regi >= rAX) && (regi < INDEXBASE)) + { + if (((picode+1)->ic.ll.opcode == iSBB) && + ((picode+2)->ic.ll.opcode == iINC)) + if (((picode+1)->ic.ll.dst.regi == + ((picode+1)->ic.ll.src.regi)) && + ((picode+1)->ic.ll.dst.regi == regi) && + ((picode+2)->ic.ll.dst.regi == regi)) + return (TRUE); + } + } + return (FALSE); +} + + +/***************************************************************************** + * idiom 18: Post-increment or post-decrement in a conditional jump + * MOV reg, var (including register variables) + * INC var or DEC var + * CMP var, Y + * JX label + * => HLI_JCOND (var++ X Y) + * Eg: MOV ax, si + * INC si + * CMP ax, 8 + * JL labX + * => HLI_JCOND (si++ < 8) + * Found in Borland Turbo C. Intrinsic to C languages. + ****************************************************************************/ +static boolT idiom18 (ICODE * picode, ICODE * pend, Function * pproc) +{ boolT type = 0; /* type of variable: 1 = reg-var, 2 = local */ + byte regi; /* register of the MOV */ + + /* Get variable */ + if (picode->ic.ll.dst.regi == 0) /* global variable */ + /* not supported yet */ ; + else if (picode->ic.ll.dst.regi < INDEXBASE) /* register */ + { + if ((picode->ic.ll.dst.regi == rSI) && (pproc->flg & SI_REGVAR)) + type = 1; + else if ((picode->ic.ll.dst.regi == rDI) && (pproc->flg & DI_REGVAR)) + type = 1; + } + else if (picode->ic.ll.dst.off) /* local variable */ + type = 2; + else /* indexed */ + /* not supported yet */ ; + + /* Check previous instruction for a MOV */ + if (type == 1) /* register variable */ + { + if (((picode-1)->ic.ll.opcode == iMOV) && + ((picode-1)->ic.ll.src.regi == picode->ic.ll.dst.regi)) + { + regi = (picode-1)->ic.ll.dst.regi; + if ((regi > 0) && (regi < INDEXBASE)) + { + if ((picode < pend) && ((picode+1) < pend) && + ((picode+1)->ic.ll.opcode == iCMP) && + ((picode+1)->ic.ll.dst.regi == regi) && + (((picode+2)->ic.ll.opcode >= iJB) && + ((picode+2)->ic.ll.opcode < iJCXZ))) + return (TRUE); + } + } + } + else if (type == 2) /* local */ + { + if (((picode-1)->ic.ll.opcode == iMOV) && + ((picode-1)->ic.ll.src.off == picode->ic.ll.dst.off)) + { + regi = (picode-1)->ic.ll.dst.regi; + if ((regi > 0) && (regi < INDEXBASE)) + { + if ((picode < pend) && ((picode+1) < pend) && + ((picode+1)->ic.ll.opcode == iCMP) && + ((picode+1)->ic.ll.dst.regi == regi) && + (((picode+2)->ic.ll.opcode >= iJB) && + ((picode+2)->ic.ll.opcode < iJCXZ))) + return (TRUE); + } + } + } + return (FALSE); +} + + +/***************************************************************************** + * idiom 19: pre-increment or pre-decrement in conditional jump, comparing + * against 0. + * INC var or DEC var (including register vars) + * JX lab JX lab + * => HLI_JCOND (++var X 0) or HLI_JCOND (--var X 0) + * Eg: INC [bp+4] + * JG lab2 + * => HLI_JCOND (++[bp+4] > 0) + * Found in Borland Turbo C. Intrinsic to C language. + ****************************************************************************/ +static boolT idiom19 (ICODE * picode, ICODE * pend, Function * pproc) +{ + if (picode->ic.ll.dst.regi == 0) /* global variable */ + /* not supported yet */ ; + else if (picode->ic.ll.dst.regi < INDEXBASE) /* register */ + { + if (((picode->ic.ll.dst.regi == rSI) && (pproc->flg & SI_REGVAR)) || + ((picode->ic.ll.dst.regi == rDI) && (pproc->flg & DI_REGVAR))) + if ((picode < pend) && ((picode+1)->ic.ll.opcode >= iJB) && + ((picode+1)->ic.ll.opcode < iJCXZ)) + return (TRUE); + } + else if (picode->ic.ll.dst.off) /* stack variable */ + { + if ((picode < pend) && ((picode+1)->ic.ll.opcode >= iJB) && + ((picode+1)->ic.ll.opcode < iJCXZ)) + return (TRUE); + } + else /* indexed */ + /* not supported yet */ ; + return (FALSE); +} + + +/***************************************************************************** + * idiom20: Pre increment/decrement in conditional expression (compares + * against a register, variable or constant different than 0). + * INC var or DEC var (including register vars) + * MOV reg, var MOV reg, var + * CMP reg, Y CMP reg, Y + * JX lab JX lab + * => HLI_JCOND (++var X Y) or HLI_JCOND (--var X Y) + * Eg: INC si (si is a register variable) + * MOV ax, si + * CMP ax, 2 + * JL lab4 + * => HLI_JCOND (++si < 2) + * Found in Turbo C. Intrinsic to C language. + ****************************************************************************/ +static boolT idiom20 (ICODE * picode, ICODE * pend, Function * pproc) +{ boolT type = 0; /* type of variable: 1 = reg-var, 2 = local */ + byte regi; /* register of the MOV */ + + /* Get variable */ + if (picode->ic.ll.dst.regi == 0) /* global variable */ + /* not supported yet */ ; + else if (picode->ic.ll.dst.regi < INDEXBASE) /* register */ + { + if ((picode->ic.ll.dst.regi == rSI) && (pproc->flg & SI_REGVAR)) + type = 1; + else if ((picode->ic.ll.dst.regi == rDI) && (pproc->flg & DI_REGVAR)) + type = 1; + } + else if (picode->ic.ll.dst.off) /* local variable */ + type = 2; + else /* indexed */ + /* not supported yet */ ; + + /* Check previous instruction for a MOV */ + if (type == 1) /* register variable */ + { + if ((picode < pend) && ((picode+1)->ic.ll.opcode == iMOV) && + ((picode+1)->ic.ll.src.regi == picode->ic.ll.dst.regi)) + { + regi = (picode+1)->ic.ll.dst.regi; + if ((regi > 0) && (regi < INDEXBASE)) + { + if (((picode+1) < pend) && ((picode+2) < pend) && + ((picode+2)->ic.ll.opcode == iCMP) && + ((picode+2)->ic.ll.dst.regi == regi) && + (((picode+3)->ic.ll.opcode >= iJB) && + ((picode+3)->ic.ll.opcode < iJCXZ))) + return (TRUE); + } + } + } + else if (type == 2) /* local */ + { + if ((picode < pend) && ((picode+1)->ic.ll.opcode == iMOV) && + ((picode+1)->ic.ll.src.off == picode->ic.ll.dst.off)) + { + regi = (picode+1)->ic.ll.dst.regi; + if ((regi > 0) && (regi < INDEXBASE)) + { + if (((picode+1) < pend) && ((picode+2) < pend) && + ((picode+2)->ic.ll.opcode == iCMP) && + ((picode+2)->ic.ll.dst.regi == regi) && + (((picode+3)->ic.ll.opcode >= iJB) && + ((picode+3)->ic.ll.opcode < iJCXZ))) + return (TRUE); + } + } + } + return (FALSE); +} + + +/***************************************************************************** + * findIdioms - translates LOW_LEVEL icode idioms into HIGH_LEVEL icodes. + ****************************************************************************/ +void Function::findIdioms() +{ + Int ip; /* Index to current icode */ + ICODE * pEnd, * pIcode; /* Pointers to end of BB and current icodes */ + int16 delta; + COND_EXPR *rhs, *lhs; /* Pointers to left and right hand side exps */ + COND_EXPR *exp; /* Pointer to temporal expression */ + Int idx; /* Index into local identifier table */ + byte regH, regL; /* High and low registers for long word reg */ + + pIcode = Icode.GetFirstIcode(); + pEnd = pIcode + Icode.GetNumIcodes(); + ip = 0; + + while (pIcode < pEnd) + { + switch (pIcode->ic.ll.opcode) { + case iDEC: case iINC: + if (idiom18 (pIcode, pEnd, this)) + { + lhs = COND_EXPR::id (pIcode-1, SRC, this, ip, pIcode, eUSE); + if (pIcode->ic.ll.opcode == iDEC) + lhs = COND_EXPR::unary (POST_DEC, lhs); + else + lhs = COND_EXPR::unary (POST_INC, lhs); + rhs = COND_EXPR::id (pIcode+1, SRC, this, ip, pIcode+2, eUSE); + exp = COND_EXPR::boolOp (lhs, rhs, + condOpJCond[(pIcode+2)->ic.ll.opcode - iJB]); + (pIcode+2)->setJCond(exp); + + (pIcode-1)->invalidate(); + pIcode->invalidate(); + (pIcode+1)->invalidate(); + pIcode += 3; + ip += 2; + } + else if (idiom19 (pIcode, pEnd, this)) + { + lhs = COND_EXPR::id (pIcode, DST, this, ip, pIcode+1, eUSE); + if (pIcode->ic.ll.opcode == iDEC) + lhs = COND_EXPR::unary (PRE_DEC, lhs); + else + lhs = COND_EXPR::unary (PRE_INC, lhs); + rhs = COND_EXPR::idKte (0, 2); + exp = COND_EXPR::boolOp (lhs, rhs, + condOpJCond[(pIcode+1)->ic.ll.opcode - iJB]); + (pIcode+1)->setJCond(exp); + pIcode->invalidate(); + pIcode += 2; + ip++; + } + else if (idiom20 (pIcode, pEnd, this)) + { + lhs = COND_EXPR::id (pIcode+1, SRC, this, ip, pIcode, eUSE); + if (pIcode->ic.ll.opcode == iDEC) + lhs = COND_EXPR::unary (PRE_DEC, lhs); + else + lhs = COND_EXPR::unary (PRE_INC, lhs); + rhs = COND_EXPR::id (pIcode+2, SRC, this, ip, pIcode+3, eUSE); + exp = COND_EXPR::boolOp (lhs, rhs, + condOpJCond[(pIcode+3)->ic.ll.opcode - iJB]); + (pIcode+3)->setJCond(exp); + pIcode->invalidate(); + (pIcode+1)->invalidate(); + (pIcode+2)->invalidate(); + pIcode += 3; + ip += 2; + } + else + pIcode++; + break; + + case iPUSH: /* Idiom 1 */ + if ((! (flg & PROC_HLL)) && (idx = idiom1 (pIcode, pEnd, this))) + { + flg |= PROC_HLL; + for ( ; idx > 0; idx--) + { + (pIcode++)->invalidate(); + ip++; + } + ip--; + } + else + pIcode++; + break; + + case iMOV: /* Idiom 2 */ + if (idx = idiom2(pIcode, pEnd, ip, this)) + { + pIcode->invalidate(); + (pIcode+1)->invalidate(); + pIcode += 3; + ip += 2; + } + else if (idiom14 (pIcode, pEnd, ®L, ®H)) /* Idiom 14 */ + { + idx = localId.newLongReg (TYPE_LONG_SIGN, regH, regL, ip); + lhs = COND_EXPR::idLongIdx (idx); + pIcode->setRegDU( regH, eDEF); + rhs = COND_EXPR::id (pIcode, SRC, this, ip, pIcode, NONE); + pIcode->setAsgn(lhs, rhs); + (pIcode+1)->invalidate(); + pIcode += 2; + ip++; + } + else if (idx = idiom13 (pIcode, pEnd)) /* Idiom 13 */ + { + lhs = COND_EXPR::idReg (idx, 0, &localId); + pIcode->setRegDU( idx, eDEF); + pIcode->du1.numRegsDef--; /* prev byte reg def */ + rhs = COND_EXPR::id (pIcode, SRC, this, ip, pIcode, NONE); + pIcode->setAsgn(lhs, rhs); + (pIcode+1)->invalidate(); + pIcode += 2; + ip++; + } + else + pIcode++; + break; + + case iCALL: case iCALLF: + /* Check for library functions that return a long register. + * Propagate this result */ + if (pIcode->ic.ll.immed.proc.proc != 0) + if ((pIcode->ic.ll.immed.proc.proc->flg & PROC_ISLIB) && + (pIcode->ic.ll.immed.proc.proc->flg & PROC_IS_FUNC)) + { + if ((pIcode->ic.ll.immed.proc.proc->retVal.type==TYPE_LONG_SIGN) + || (pIcode->ic.ll.immed.proc.proc->retVal.type == + TYPE_LONG_UNSIGN)) + localId.newLongReg(TYPE_LONG_SIGN, rDX, rAX, ip); + } + + /* Check for idioms */ + if (idx = idiom3(pIcode, pEnd)) /* idiom 3 */ + { + if (pIcode->ic.ll.flg & I) + { + (pIcode->ic.ll.immed.proc.proc)->cbParam = (int16)idx; + pIcode->ic.ll.immed.proc.cb = idx; + (pIcode->ic.ll.immed.proc.proc)->flg |= CALL_C; + pIcode++; + (pIcode++)->invalidate(); + ip++; + } + } + else if (idx = idiom17 (pIcode, pEnd)) /* idiom 17 */ + { + if (pIcode->ic.ll.flg & I) + { + (pIcode->ic.ll.immed.proc.proc)->cbParam = (int16)idx; + pIcode->ic.ll.immed.proc.cb = idx; + (pIcode->ic.ll.immed.proc.proc)->flg |= CALL_C; + ip += idx/2 - 1; + pIcode++; + for (idx /= 2; idx > 0; idx--) + (pIcode++)->invalidate(); + } + } + else + pIcode++; + break; + + case iRET: /* Idiom 4 */ + case iRETF: + idiom4 (pIcode, pEnd, this); + pIcode++; + break; + + case iADD: /* Idiom 5 */ + if (idiom5 (pIcode, pEnd)) + { + lhs = COND_EXPR::idLong (&localId, DST, pIcode, LOW_FIRST, + ip, USE_DEF, 1); + rhs = COND_EXPR::idLong (&localId, SRC, pIcode, LOW_FIRST, + ip, eUSE, 1); + exp = COND_EXPR::boolOp (lhs, rhs, ADD); + pIcode->setAsgn(lhs, exp); + (pIcode+1)->invalidate(); + pIcode++; + ip++; + } + pIcode++; + break; + + case iSAR: /* Idiom 8 */ + if (idiom8 (pIcode, pEnd)) + { + idx = localId.newLongReg(TYPE_LONG_SIGN, + pIcode->ic.ll.dst.regi, (pIcode+1)->ic.ll.dst.regi,ip); + lhs = COND_EXPR::idLongIdx (idx); + pIcode->setRegDU( (pIcode+1)->ic.ll.dst.regi, USE_DEF); + rhs = COND_EXPR::idKte (1, 2); + exp = COND_EXPR::boolOp (lhs, rhs, SHR); + pIcode->setAsgn(lhs, exp); + (pIcode+1)->invalidate(); + pIcode++; + ip++; + } + pIcode++; + break; + + case iSHL: + if (idx = idiom15 (pIcode, pEnd)) /* idiom 15 */ + { + lhs = COND_EXPR::idReg (pIcode->ic.ll.dst.regi, + pIcode->ic.ll.flg & NO_SRC_B, + &localId); + rhs = COND_EXPR::idKte (idx, 2); + exp = COND_EXPR::boolOp (lhs, rhs, SHL); + pIcode->setAsgn(lhs, exp); + pIcode++; + for (idx-- ; idx > 0; idx--) + { + (pIcode++)->invalidate(); + ip++; + } + } + else if (idiom12 (pIcode, pEnd)) /* idiom 12 */ + { + idx = localId.newLongReg (TYPE_LONG_UNSIGN, + (pIcode+1)->ic.ll.dst.regi, pIcode->ic.ll.dst.regi,ip); + lhs = COND_EXPR::idLongIdx (idx); + pIcode->setRegDU( (pIcode+1)->ic.ll.dst.regi, USE_DEF); + rhs = COND_EXPR::idKte (1, 2); + exp = COND_EXPR::boolOp (lhs, rhs, SHL); + pIcode->setAsgn(lhs, exp); + (pIcode+1)->invalidate(); + pIcode += 2; + ip++; + } + else + pIcode++; + break; + + case iSHR: /* Idiom 9 */ + if (idiom9 (pIcode, pEnd)) + { + idx = localId.newLongReg (TYPE_LONG_UNSIGN, + pIcode->ic.ll.dst.regi, (pIcode+1)->ic.ll.dst.regi,ip); + lhs = COND_EXPR::idLongIdx (idx); + pIcode->setRegDU( (pIcode+1)->ic.ll.dst.regi, USE_DEF); + rhs = COND_EXPR::idKte (1, 2); + exp = COND_EXPR::boolOp (lhs, rhs, SHR); + pIcode->setAsgn(lhs, exp); + (pIcode+1)->invalidate(); + pIcode++; + ip++; + } + pIcode++; + break; + + case iSUB: /* Idiom 6 */ + if (idiom6 (pIcode, pEnd)) + { + lhs = COND_EXPR::idLong (&localId, DST, pIcode, LOW_FIRST, + ip, USE_DEF, 1); + rhs = COND_EXPR::idLong (&localId, SRC, pIcode, LOW_FIRST, + ip, eUSE, 1); + exp = COND_EXPR::boolOp (lhs, rhs, SUB); + pIcode->setAsgn(lhs, exp); + (pIcode+1)->invalidate(); + pIcode++; + ip++; + } + pIcode++; + break; + + case iOR: /* Idiom 10 */ + idiom10 (pIcode, pEnd); + pIcode++; + break; + + case iNEG: /* Idiom 11 */ + if (idiom11 (pIcode, pEnd)) + { + lhs = COND_EXPR::idLong (&localId, DST, pIcode, HIGH_FIRST, + ip, USE_DEF, 1); + rhs = COND_EXPR::unary (NEGATION, lhs); + pIcode->setAsgn(lhs, rhs); + (pIcode+1)->invalidate(); + (pIcode+2)->invalidate(); + pIcode += 3; + ip += 2; + } + else if (idiom16 (pIcode, pEnd)) + { + lhs = COND_EXPR::idReg (pIcode->ic.ll.dst.regi, pIcode->ic.ll.flg, + &localId); + rhs = lhs->clone(); + rhs = COND_EXPR::unary (NEGATION, lhs); + pIcode->setAsgn(lhs, rhs); + (pIcode+1)->invalidate(); + (pIcode+2)->invalidate(); + pIcode += 3; + ip += 2; + } + else + pIcode++; + break; + + case iNOP: + (pIcode++)->invalidate(); + break; + + case iENTER: /* ENTER is equivalent to init PUSH bp */ + if (ip == 0) + flg |= (PROC_HLL | PROC_IS_HLL); + pIcode++; + break; + + case iXOR: /* Idiom 7 */ + if (idiom21 (pIcode, pEnd)) + { + lhs = COND_EXPR::idLong (&localId, DST, pIcode,HIGH_FIRST, ip, eDEF, 1); + rhs = COND_EXPR::idKte ((pIcode+1)->ic.ll.immed.op , 4); + pIcode->setAsgn(lhs, rhs); + pIcode->du.use = 0; /* clear register used in iXOR */ + (pIcode+1)->invalidate(); + pIcode++; + ip++; + } + else if (idiom7 (pIcode)) + { + lhs = COND_EXPR::id (pIcode, DST, this, ip, pIcode, NONE); + rhs = COND_EXPR::idKte (0, 2); + pIcode->setAsgn(lhs, rhs); + pIcode->du.use = 0; /* clear register used in iXOR */ + pIcode->ic.ll.flg |= I; + } + pIcode++; + break; + + default: + pIcode++; + } + ip++; + } + + /* Check if number of parameter bytes match their calling convention */ + if ((flg & PROC_HLL) && (!args.sym.empty())) + { + args.minOff += (flg & PROC_FAR ? 4 : 2); + delta = args.maxOff - args.minOff; + if (cbParam != delta) + { + cbParam = delta; + flg |= (CALL_MASK & CALL_UNKNOWN); + } + } +} + + +/* Sets up the TARGET flag for jump target addresses, and + * binds jump target addresses to icode offsets. */ +void Function::bindIcodeOff() +{ + Int i, j; /* idx into icode array */ + ICODE * pIcode; /* ptr icode array */ + dword *p; /* for case table */ + + if (! Icode.GetNumIcodes()) /* No Icode */ + return; + pIcode = Icode.GetFirstIcode(); + + /* Flag all jump targets for BB construction and disassembly stage 2 */ + for (i = 0; i < Icode.GetNumIcodes(); i++) + if ((pIcode[i].ic.ll.flg & I) && JmpInst(pIcode[i].ic.ll.opcode)) + { + if (Icode.labelSrch(pIcode[i].ic.ll.immed.op, &j)) + { + pIcode[j].ic.ll.flg |= TARGET; + } + } + + /* Finally bind jump targets to Icode offsets. Jumps for which no label + * is found (no code at dest. of jump) are simply left unlinked and + * flagged as going nowhere. */ + pIcode = Icode.GetFirstIcode(); + for (i = 0; i < Icode.GetNumIcodes(); i++) + if (JmpInst(pIcode[i].ic.ll.opcode)) + { + if (pIcode[i].ic.ll.flg & I) + { + if (! Icode.labelSrch(pIcode[i].ic.ll.immed.op, + (Int *)&pIcode[i].ic.ll.immed.op)) + pIcode[i].ic.ll.flg |= NO_LABEL; + } + else if (pIcode[i].ic.ll.flg & SWITCH) + { + p = pIcode[i].ic.ll.caseTbl.entries; + for (j = 0; j < pIcode[i].ic.ll.caseTbl.numEntries; j++, p++) + Icode.labelSrch(*p, (Int *)p); + } + } +} + +/* Performs idioms analysis, and propagates long operands, if any */ +void Function::lowLevelAnalysis () +{ + /* Idiom analysis - sets up some flags and creates some HIGH_LEVEL icodes */ + findIdioms(); + /* Propagate HIGH_LEVEL idiom information for long operands */ + propLong(); +} diff --git a/src/locident.cpp b/src/locident.cpp new file mode 100644 index 0000000..faaa62b --- /dev/null +++ b/src/locident.cpp @@ -0,0 +1,424 @@ +/* + * File: locIdent.c + * Purpose: support routines for high-level local identifier definitions. + * Date: October 1993 + * (C) Cristina Cifuentes + */ + +#include "dcc.h" +#include + + +#define LOCAL_ID_DELTA 25 +#define IDX_ARRAY_DELTA 5 + +/* Creates a new identifier node of type t and returns it. + * Arguments: locSym : local long symbol table + * t : type of LONG identifier + * f : frame where this variable is located + * ix : index into icode array where this var is used */ +void LOCAL_ID::newIdent(hlType t, frameType f) +{ + ID newid(t,f); + id_arr.push_back(newid); +} + + +/* Creates a new register identifier node of TYPE_BYTE_(UN)SIGN or + * TYPE_WORD_(UN)SIGN type. Returns the index to this new entry. */ +Int LOCAL_ID::newByteWordReg(hlType t, byte regi) +{ + Int idx; + + /* Check for entry in the table */ + auto found=std::find_if(id_arr.begin(),id_arr.end(),[t,regi](ID &el)->bool { + if ((el.type == t) && (el.id.regi == regi)) + return true; + return false; + }); + if(found!=id_arr.end()) + return found-id_arr.begin(); + /* Not in table, create new identifier */ + newIdent (t, REG_FRAME); + idx = id_arr.size() - 1; + id_arr[idx].id.regi = regi; + return (idx); +} + + +/* Flags the entry associated with the offset off to illegal, as this + * offset is part of a long stack variable. + * Note: it is easier enough to remove this entry by moving the rest of + * the array 1 position. The problem is that indexes into this + * array have already been saved in several positions; therefore, + * flagging this entry as illegal is all that can be done. */ +void LOCAL_ID::flagByteWordId (Int off) +{ + Int idx; + auto found=std::find_if(id_arr.begin(),id_arr.end(),[off](ID &en)->bool { + + if (((en.type == TYPE_WORD_SIGN) || (en.type == TYPE_BYTE_SIGN)) && + (en.id.bwId.off == off) && (en.id.bwId.regOff == 0)) + return true; + return false; + }); + if(found==id_arr.end()) + { + printf("Entry not found in LOCAL_ID::flagByteWordId \n"); + return; + } + found->illegal = TRUE; +} + +/* Creates a new stack identifier node of TYPE_BYTE_(UN)SIGN or + * TYPE_WORD_(UN)SIGN type. Returns the index to this new entry. */ +Int LOCAL_ID::newByteWordStk(hlType t, Int off, byte regOff) +{ + Int idx; + + /* Check for entry in the table */ + auto found=std::find_if(id_arr.begin(),id_arr.end(),[off,regOff](ID &el)->bool { + if ((el.id.bwId.off == off) && (el.id.bwId.regOff == regOff)) + return true; + return false; + }); + if(found!=id_arr.end()) + return found-id_arr.begin(); //return Index to found element + + /* Not in table, create new identifier */ + newIdent (t, STK_FRAME); + idx = id_arr.size() - 1; + id_arr[idx].id.bwId.regOff = regOff; + id_arr[idx].id.bwId.off = off; + return (idx); +} + + +/* Checks if the entry exists in the locSym, if so, returns the idx to this + * entry; otherwise creates a new global identifier node of type + * TYPE_WORD_(UN)SIGN and returns the index to this new entry. + * Arguments: locSym : ptr to the local symbol table + * seg: segment value for global variable + * off: offset from segment + * regi: indexed register into global variable + * ix: index into icode array + * t: HIGH_LEVEL type */ +Int LOCAL_ID::newIntIdx(int16 seg, int16 off, byte regi,Int ix, hlType t) +{ + Int idx; + + /* Check for entry in the table */ + for (idx = 0; idx < id_arr.size(); idx++) + { + if (/*(locSym->id[idx].type == t) && Not checking type */ + (id_arr[idx].id.bwGlb.seg == seg) && + (id_arr[idx].id.bwGlb.off == off) && + (id_arr[idx].id.bwGlb.regi == regi)) + return (idx); + } + + /* Not in the table, create new identifier */ + newIdent (t, GLB_FRAME); + idx = id_arr.size() - 1; + id_arr[idx].id.bwGlb.seg = seg; + id_arr[idx].id.bwGlb.off = off; + id_arr[idx].id.bwGlb.regi = regi; + return (idx); +} + + +/* Checks if the entry exists in the locSym, if so, returns the idx to this + * entry; otherwise creates a new register identifier node of type + * TYPE_LONG_(UN)SIGN and returns the index to this new entry. */ +Int LOCAL_ID::newLongReg(hlType t, byte regH, byte regL, Int ix) +{ + Int idx; + + /* Check for entry in the table */ + for (idx = 0; idx < id_arr.size(); idx++) + { + if (/*(locSym->id[idx].type == t) && Not checking type */ + (id_arr[idx].id.longId.h == regH) && + (id_arr[idx].id.longId.l == regL)) + { + /* Check for occurrence in the list */ + if (id_arr[idx].idx.inList(ix)) + return (idx); + else + { + /* Insert icode index in list */ + id_arr[idx].idx.push_back(ix); + return (idx); + } + } + } + + /* Not in the table, create new identifier */ + newIdent (t, REG_FRAME); + id_arr[id_arr.size()-1].idx.push_back(ix); + idx = id_arr.size() - 1; + id_arr[idx].id.longId.h = regH; + id_arr[idx].id.longId.l = regL; + return (idx); +} + + +/* Checks if the entry exists in the locSym, if so, returns the idx to this + * entry; otherwise creates a new global identifier node of type + * TYPE_LONG_(UN)SIGN and returns the index to this new entry. */ +Int LOCAL_ID::newLongGlb(int16 seg, int16 offH, int16 offL,Int ix, hlType t) +{ + Int idx; + + /* Check for entry in the table */ + for (idx = 0; idx < id_arr.size(); idx++) + { + if (/*(locSym->id[idx].type == t) && Not checking type */ + (id_arr[idx].id.longGlb.seg == seg) && + (id_arr[idx].id.longGlb.offH == offH) && + (id_arr[idx].id.longGlb.offL == offL)) + return (idx); + } + + /* Not in the table, create new identifier */ + newIdent (t, GLB_FRAME); + idx = id_arr.size() - 1; + id_arr[idx].id.longGlb.seg = seg; + id_arr[idx].id.longGlb.offH = offH; + id_arr[idx].id.longGlb.offL = offL; + return (idx); +} + + +/* Checks if the entry exists in the locSym, if so, returns the idx to this + * entry; otherwise creates a new global identifier node of type + * TYPE_LONG_(UN)SIGN and returns the index to this new entry. */ +Int LOCAL_ID::newLongIdx( int16 seg, int16 offH, int16 offL,byte regi, Int ix, hlType t) +{ Int idx; + + /* Check for entry in the table */ + for (idx = 0; idx < id_arr.size(); idx++) + { + if (/*(locSym->id[idx].type == t) && Not checking type */ + (id_arr[idx].id.longGlb.seg == seg) && + (id_arr[idx].id.longGlb.offH == offH) && + (id_arr[idx].id.longGlb.offL == offL) && + (id_arr[idx].id.longGlb.regi == regi)) + return (idx); + } + + /* Not in the table, create new identifier */ + newIdent (t, GLB_FRAME); + idx = id_arr.size() - 1; + id_arr[idx].id.longGlb.seg = seg; + id_arr[idx].id.longGlb.offH = offH; + id_arr[idx].id.longGlb.offL = offL; + id_arr[idx].id.longGlb.regi = regi; + return (idx); +} + + +/* Creates a new stack identifier node of type TYPE_LONG_(UN)SIGN. + * Returns the index to this entry. */ +Int LOCAL_ID::newLongStk(hlType t, Int offH, Int offL) +{ + Int idx; + + /* Check for entry in the table */ + for (idx = 0; idx < id_arr.size(); idx++) + { + if ((id_arr[idx].type == t) && + (id_arr[idx].id.longStkId.offH == offH) && + (id_arr[idx].id.longStkId.offL == offL)) + return (idx); + } + + /* Not in the table; flag as invalid offH and offL */ + flagByteWordId (offH); + flagByteWordId (offL); + + /* Create new identifier */ + newIdent (t, STK_FRAME); + idx = id_arr.size() - 1; + id_arr[idx].id.longStkId.offH = offH; + id_arr[idx].id.longStkId.offL = offL; + return (idx); +} + + +/* Returns the index to an appropriate long identifier. + * Note: long constants should be checked first and stored as a long integer + * number in an expression record. */ +Int LOCAL_ID::newLong(opLoc sd, ICODE *pIcode, hlFirst f, Int ix,operDu du, Int off) +{ + Int idx; + ICODEMEM *pmH, *pmL; + + if (f == LOW_FIRST) + { + pmL = (sd == SRC) ? &pIcode->ic.ll.src : &pIcode->ic.ll.dst; + pmH = (sd == SRC) ? &(pIcode+off)->ic.ll.src : &(pIcode+off)->ic.ll.dst; + } + else /* HIGH_FIRST */ + { + pmH = (sd == SRC) ? &pIcode->ic.ll.src : &pIcode->ic.ll.dst; + pmL = (sd == SRC) ? &(pIcode+off)->ic.ll.src : &(pIcode+off)->ic.ll.dst; + } + + if (pmL->regi == 0) /* global variable */ + idx = newLongGlb(pmH->segValue, pmH->off, pmL->off, ix,TYPE_LONG_SIGN); + + else if (pmL->regi < INDEXBASE) /* register */ + { + idx = newLongReg(TYPE_LONG_SIGN, pmH->regi, pmL->regi, ix); + if (f == HIGH_FIRST) + pIcode->setRegDU( pmL->regi, du); /* low part */ + else + pIcode->setRegDU( pmH->regi, du); /* high part */ + } + + else if (pmL->off) { /* offset */ + if ((pmL->seg == rSS) && (pmL->regi == INDEXBASE + 6)) /* idx on bp */ + idx = newLongStk(TYPE_LONG_SIGN, pmH->off, pmL->off); + else if ((pmL->seg == rDS) && (pmL->regi == INDEXBASE + 7)) /* bx */ + { /* glb var indexed on bx */ + idx = newLongIdx(pmH->segValue, pmH->off, pmL->off,rBX, ix, TYPE_LONG_SIGN); + pIcode->setRegDU( rBX, eUSE); + } + else /* idx <> bp, bx */ + printf ("long not supported, idx <> bp\n"); + } + + else /* (pm->regi >= INDEXBASE && pm->off = 0) => indexed && no off */ + printf ("long not supported, idx && no off\n"); + + return (idx); +} + + +boolT checkLongEq (LONG_STKID_TYPE longId, ICODE *pIcode, Int i, Int idx, + Function * pProc, COND_EXPR **rhs, COND_EXPR **lhs, Int off) +/* Checks whether the long stack identifier is equivalent to the source or + * destination operands of pIcode and pIcode+1 (ie. these are LOW_LEVEL + * icodes at present). If so, returns the rhs and lhs of this instruction. + * Arguments: longId : long stack identifier + * pIcode : ptr to first LOW_LEVEL icode instruction + * i : idx into local identifier table for longId + * idx : idx into icode array + * pProc : ptr to current procedure record + * rhs, lhs : return expressions if successful. */ +{ + ICODEMEM *pmHdst, *pmLdst, *pmHsrc, *pmLsrc; /* pointers to LOW_LEVEL icodes */ + + pmHdst = &pIcode->ic.ll.dst; + pmLdst = &(pIcode+off)->ic.ll.dst; + pmHsrc = &pIcode->ic.ll.src; + pmLsrc = &(pIcode+off)->ic.ll.src; + + if ((longId.offH == pmHdst->off) && (longId.offL == pmLdst->off)) + { + *lhs = COND_EXPR::idLongIdx (i); + if ((pIcode->ic.ll.flg & NO_SRC) != NO_SRC) + *rhs = COND_EXPR::idLong (&pProc->localId, SRC, pIcode, HIGH_FIRST, + idx, eUSE, off); + return (TRUE); + } + else if ((longId.offH == pmHsrc->off) && (longId.offL == pmLsrc->off)) + { + *lhs = COND_EXPR::idLong (&pProc->localId, DST, pIcode, HIGH_FIRST, idx, + eDEF, off); + *rhs = COND_EXPR::idLongIdx (i); + return (TRUE); + } + return (FALSE); +} + + +/* Checks whether the long stack identifier is equivalent to the source or + * destination operands of pIcode and pIcode+1 (ie. these are LOW_LEVEL + * icodes at present). If so, returns the rhs and lhs of this instruction. + * Arguments: longId : long stack identifier + * pIcode : ptr to first LOW_LEVEL icode instruction + * i : idx into local identifier table for longId + * idx : idx into icode array + * pProc : ptr to current procedure record + * rhs, lhs : return expressions if successful. */ +boolT checkLongRegEq (LONGID_TYPE longId, ICODE *pIcode, Int i, Int idx, + Function * pProc, COND_EXPR **rhs, COND_EXPR **lhs, Int off) +{ + ICODEMEM *pmHdst, *pmLdst, *pmHsrc, *pmLsrc; /* pointers to LOW_LEVEL icodes */ + + pmHdst = &pIcode->ic.ll.dst; + pmLdst = &(pIcode+off)->ic.ll.dst; + pmHsrc = &pIcode->ic.ll.src; + pmLsrc = &(pIcode+off)->ic.ll.src; + + if ((longId.h == pmHdst->regi) && (longId.l == pmLdst->regi)) + { + *lhs = COND_EXPR::idLongIdx (i); + if ((pIcode->ic.ll.flg & NO_SRC) != NO_SRC) + *rhs = COND_EXPR::idLong (&pProc->localId, SRC, pIcode, HIGH_FIRST, idx, eUSE, off); + return (TRUE); + } + else if ((longId.h == pmHsrc->regi) && (longId.l == pmLsrc->regi)) + { + *lhs = COND_EXPR::idLong (&pProc->localId, DST, pIcode, HIGH_FIRST, idx, eDEF, off); + *rhs = COND_EXPR::idLongIdx (i); + return (TRUE); + } + return (FALSE); +} + + + +/* Given an index into the local identifier table for a long register + * variable, determines whether regi is the high or low part, and returns + * the other part */ +byte otherLongRegi (byte regi, Int idx, LOCAL_ID *locTbl) +{ + ID *id; + + id = &locTbl->id_arr[idx]; + if ((id->loc == REG_FRAME) && ((id->type == TYPE_LONG_SIGN) || + (id->type == TYPE_LONG_UNSIGN))) + { + if (id->id.longId.h == regi) + return (id->id.longId.l); + else if (id->id.longId.l == regi) + return (id->id.longId.h); + } + return 0; // Cristina: please check this! +} + + +/* Checks if the registers regL and regH have been used independently in + * the local identifier table. If so, macros for these registers are + * placed in the local identifier table, as these registers belong to a + * long register identifier. */ +void LOCAL_ID::propLongId (byte regL, byte regH, const char *name) +{ + Int i; + ID *_id; + + for (i = 0; i < id_arr.size(); i++) + { + _id = &id_arr[i]; + if ((_id->type == TYPE_WORD_SIGN) || (_id->type == TYPE_WORD_UNSIGN)) + { + if (_id->id.regi == regL) + { + strcpy (_id->name, name); + strcpy (_id->macro, "LO"); + _id->hasMacro = TRUE; + _id->illegal = TRUE; + } + else if (_id->id.regi == regH) + { + strcpy (_id->name, name); + strcpy (_id->macro, "HI"); + _id->hasMacro = TRUE; + _id->illegal = TRUE; + } + } + } +} diff --git a/src/parser.cpp b/src/parser.cpp new file mode 100644 index 0000000..0637265 --- /dev/null +++ b/src/parser.cpp @@ -0,0 +1,1208 @@ +/**************************************************************************** + * dcc project procedure list builder + * (C) Cristina Cifuentes, Mike van Emmerik, Jeff Ledermann + ****************************************************************************/ + +#include "dcc.h" +#include +#include /* For exit() */ + +#ifdef __DOSWIN__ +#include +#endif + +static void FollowCtrl (Function * pProc, CALL_GRAPH * pcallGraph, STATE * pstate); +static boolT process_JMP (ICODE * pIcode, STATE * pstate, + CALL_GRAPH * pcallGraph); +static boolT process_CALL(ICODE * pIcode, CALL_GRAPH * pcallGraph, + STATE * pstate); +static void process_operands(ICODE * pIcode, Function * pProc, STATE * pstate, + Int ix); +static void setBits(int16 type, dword start, dword len); +static SYM * updateGlobSym(dword operand, Int size, word duFlag); +static void process_MOV(ICODE * pIcode, STATE * pstate); +static SYM * lookupAddr (ICODEMEM *pm, STATE * pstate, Int size, word duFlag); +void interactDis(Function * initProc, Int ic); +static dword SynthLab; + + +/* Parses the program, builds the call graph, and returns the list of + * procedures found */ +void parse (CALL_GRAPH * *pcallGraph) +{ + STATE state; + + /* Set initial state */ + memset(&state, 0, sizeof(STATE)); + state.setState(rES, 0); /* PSP segment */ + state.setState(rDS, 0); + state.setState(rCS, prog.initCS); + state.setState(rSS, prog.initSS); + state.setState(rSP, prog.initSP); + state.IP = ((dword)prog.initCS << 4) + prog.initIP; + SynthLab = SYNTHESIZED_MIN; + + + /* Check for special settings of initial state, based on idioms of the + startup code */ + state.checkStartup(); + + /* Make a struct for the initial procedure */ + pProcList.resize(1); // default-construct a Function object ! + if (prog.offMain != -1) + { + /* We know where main() is. Start the flow of control from there */ + pProcList.front().procEntry = prog.offMain; + /* In medium and large models, the segment of main may (will?) not be + the same as the initial CS segment (of the startup code) */ + state.setState(rCS, prog.segMain); + strcpy(pProcList.front().name, "main"); + state.IP = prog.offMain; + } + else + { + /* Create initial procedure at program start address */ + pProcList.front().procEntry = (dword)state.IP; + } + /* The state info is for the first procedure */ + pProcList.front().state = state; + + /* Set up call graph initial node */ + *pcallGraph = new CALL_GRAPH; + (*pcallGraph)->proc = pProcList.begin(); + + /* This proc needs to be called to set things up for LibCheck(), which + checks a proc to see if it is a know C (etc) library */ + SetupLibCheck(); + + /* Recursively build entire procedure list */ + pProcList.front().FollowCtrl (*pcallGraph, &state); + + /* This proc needs to be called to clean things up from SetupLibCheck() */ + CleanupLibCheck(); +} + + +static void updateSymType (dword symbol, hlType symType, Int size) +/* Updates the type of the symbol in the symbol table. The size is updated + * if necessary (0 means no update necessary). */ +{ Int i; + + for (i = 0; i < symtab.csym; i++) + if (symtab.sym[i].label == symbol) + { + symtab.sym[i].type = symType; + if (size != 0) + symtab.sym[i].size = size; + break; + } +} + + +/* Returns the size of the string pointed by sym and delimited by delim. + * Size includes delimiter. */ +Int strSize (byte *sym, char delim) +{ + Int i; + for (i = 0; *sym++ != delim; i++) ; + return (i+1); +} + + +/* FollowCtrl - Given an initial procedure, state information and symbol table + * builds a list of procedures reachable from the initial procedure + * using a depth first search. */ +void Function::FollowCtrl(CALL_GRAPH * pcallGraph, STATE *pstate) +{ + ICODE _Icode, *pIcode; /* This gets copied to pProc->Icode[] later */ + ICODE eIcode; /* extra icodes for iDIV, iIDIV, iXCHG */ + SYM * psym; + dword offset; + Int err; + boolT done = FALSE; + Int lab; + + if (strstr(name, "chkstk") != NULL) + { + // Danger! Dcc will likely fall over in this code. + // So we act as though we have done with this proc + // pProc->flg &= ~TERMINATES; // Not sure about this + done = TRUE; + // And mark it as a library function, so structure() won't choke on it + flg |= PROC_ISLIB; + return; + } + if (option.VeryVerbose) + { + printf("Parsing proc %s at %lX\n", name, pstate->IP); + } + + while (! done && ! (err = scan(pstate->IP, &_Icode))) + { + pstate->IP += (dword)_Icode.ic.ll.numBytes; + setBits(BM_CODE, _Icode.ic.ll.label, (dword)_Icode.ic.ll.numBytes); + + process_operands(&_Icode,pstate); + + /* Keep track of interesting instruction flags in procedure */ + flg |= (_Icode.ic.ll.flg & (NOT_HLL | FLOAT_OP)); + + /* Check if this instruction has already been parsed */ + if (Icode.labelSrch(_Icode.ic.ll.label, &lab)) + { /* Synthetic jump */ + _Icode.type = LOW_LEVEL; + _Icode.ic.ll.opcode = iJMP; + _Icode.ic.ll.flg = I | SYNTHETIC | NO_OPS; + _Icode.ic.ll.immed.op = Icode.GetLlLabel(lab); + _Icode.ic.ll.label = SynthLab++; + } + + /* Copy Icode to Proc */ + if ((_Icode.ic.ll.opcode == iDIV) || (_Icode.ic.ll.opcode == iIDIV)) + { + /* MOV rTMP, reg */ + memset (&eIcode, 0, sizeof (ICODE)); + eIcode.type = LOW_LEVEL; + eIcode.ic.ll.opcode = iMOV; + eIcode.ic.ll.dst.regi = rTMP; + if (_Icode.ic.ll.flg & B) + { + eIcode.ic.ll.flg |= B; + eIcode.ic.ll.src.regi = rAX; + eIcode.setRegDU( rAX, eUSE); + } + else /* implicit dx:ax */ + { + eIcode.ic.ll.flg |= IM_SRC; + eIcode.setRegDU( rAX, eUSE); + eIcode.setRegDU( rDX, eUSE); + } + eIcode.setRegDU( rTMP, eDEF); + eIcode.ic.ll.flg |= SYNTHETIC; + /* eIcode.ic.ll.label = SynthLab++; */ + eIcode.ic.ll.label = _Icode.ic.ll.label; + pIcode = Icode.addIcode(&eIcode); + + /* iDIV, iIDIV */ + pIcode = Icode.addIcode(&_Icode); + + /* iMOD */ + memset (&eIcode, 0, sizeof (ICODE)); + eIcode.type = LOW_LEVEL; + eIcode.ic.ll.opcode = iMOD; + eIcode.ic.ll.src = _Icode.ic.ll.src; + eIcode.du = _Icode.du; + eIcode.ic.ll.flg = (_Icode.ic.ll.flg | SYNTHETIC); + eIcode.ic.ll.label = SynthLab++; + pIcode = Icode.addIcode(&eIcode); + } + else if (_Icode.ic.ll.opcode == iXCHG) + { + /* MOV rTMP, regDst */ + memset (&eIcode, 0, sizeof (ICODE)); + eIcode.type = LOW_LEVEL; + eIcode.ic.ll.opcode = iMOV; + eIcode.ic.ll.dst.regi = rTMP; + eIcode.ic.ll.src.regi = _Icode.ic.ll.dst.regi; + eIcode.setRegDU( rTMP, eDEF); + eIcode.setRegDU( eIcode.ic.ll.src.regi, eUSE); + eIcode.ic.ll.flg |= SYNTHETIC; + /* eIcode.ic.ll.label = SynthLab++; */ + eIcode.ic.ll.label = _Icode.ic.ll.label; + pIcode = Icode.addIcode(&eIcode); + + /* MOV regDst, regSrc */ + _Icode.ic.ll.opcode = iMOV; + _Icode.ic.ll.flg |= SYNTHETIC; + /* Icode.ic.ll.label = SynthLab++; */ + pIcode = Icode.addIcode(&_Icode); + _Icode.ic.ll.opcode = iXCHG; /* for next case */ + + /* MOV regSrc, rTMP */ + memset (&eIcode, 0, sizeof (ICODE)); + eIcode.type = LOW_LEVEL; + eIcode.ic.ll.opcode = iMOV; + eIcode.ic.ll.dst.regi = _Icode.ic.ll.src.regi; + eIcode.ic.ll.src.regi = rTMP; + eIcode.setRegDU( eIcode.ic.ll.dst.regi, eDEF); + eIcode.setRegDU( rTMP, eUSE); + eIcode.ic.ll.flg |= SYNTHETIC; + eIcode.ic.ll.label = SynthLab++; + pIcode = Icode.addIcode(&eIcode); + } + else + pIcode = Icode.addIcode(&_Icode); + + switch (_Icode.ic.ll.opcode) { + /*** Conditional jumps ***/ + case iLOOP: case iLOOPE: case iLOOPNE: + case iJB: case iJBE: case iJAE: case iJA: + case iJL: case iJLE: case iJGE: case iJG: + case iJE: case iJNE: case iJS: case iJNS: + case iJO: case iJNO: case iJP: case iJNP: + case iJCXZ: + { STATE StCopy; + int ip = Icode.GetNumIcodes()-1; /* Index of this jump */ + ICODE * prev = Icode.GetIcode(ip-1); /* Previous icode */ + boolT fBranch = FALSE; + + pstate->JCond.regi = 0; + + /* This sets up range check for indexed JMPs hopefully + * Handles JA/JAE for fall through and JB/JBE on branch + */ + if (ip > 0 && prev->ic.ll.opcode == iCMP && (prev->ic.ll.flg & I)) + { + pstate->JCond.immed = (int16)prev->ic.ll.immed.op; + if (_Icode.ic.ll.opcode == iJA || _Icode.ic.ll.opcode == iJBE) + pstate->JCond.immed++; + if (_Icode.ic.ll.opcode == iJAE || _Icode.ic.ll.opcode == iJA) + pstate->JCond.regi = prev->ic.ll.dst.regi; + fBranch = (boolT) + (_Icode.ic.ll.opcode == iJB || _Icode.ic.ll.opcode == iJBE); + } + + memcpy(&StCopy, pstate, sizeof(STATE)); + + /* Straight line code */ + this->FollowCtrl (pcallGraph, &StCopy); // recurrent ? + + if (fBranch) /* Do branching code */ + { + pstate->JCond.regi = prev->ic.ll.dst.regi; + } + /* Next icode. Note: not the same as GetLastIcode() because of the call + to FollowCtrl() */ + pIcode = Icode.GetIcode(ip); + } /* Fall through to do the jump path */ + + /*** Jumps ***/ + case iJMP: + case iJMPF: /* Returns TRUE if we've run into a loop */ + done = process_JMP (pIcode, pstate, pcallGraph); + break; + + /*** Calls ***/ + case iCALL: + case iCALLF: + done = process_CALL (pIcode, pcallGraph, pstate); + break; + + /*** Returns ***/ + case iRET: + case iRETF: + this->flg |= (_Icode.ic.ll.opcode == iRET)? PROC_NEAR:PROC_FAR; + /* Fall through */ + case iIRET: + this->flg &= ~TERMINATES; + done = TRUE; + break; + + case iINT: + if (_Icode.ic.ll.immed.op == 0x21 && pstate->f[rAH]) + { + Int funcNum = pstate->r[rAH]; + Int operand; + Int size; + + /* Save function number */ + Icode.back().ic.ll.dst.off = (int16)funcNum; + //Icode.GetIcode(Icode.GetNumIcodes() - 1)-> + + /* Program termination: int21h, fn 00h, 31h, 4Ch */ + done = (boolT)(funcNum == 0x00 || funcNum == 0x31 || + funcNum == 0x4C); + + /* String functions: int21h, fn 09h */ + if (pstate->f[rDX]) /* offset goes into DX */ + if (funcNum == 0x09) + { + operand = ((dword)(word)pstate->r[rDS]<<4) + + (dword)(word)pstate->r[rDX]; + size = prog.fCOM ? + strSize (&prog.Image[operand], '$') : + strSize (&prog.Image[operand + 0x100], '$'); + updateSymType (operand, TYPE_STR, size); + } + } + else if ((_Icode.ic.ll.immed.op == 0x2F) && (pstate->f[rAH])) + { + Icode.back().ic.ll.dst.off = pstate->r[rAH]; + } + else /* Program termination: int20h, int27h */ + done = (boolT)(_Icode.ic.ll.immed.op == 0x20 || + _Icode.ic.ll.immed.op == 0x27); + if (done) + pIcode->ic.ll.flg |= TERMINATES; + break; + + case iMOV: + process_MOV(pIcode, pstate); + break; + + /* case iXCHG: + process_MOV (pIcode, pstate); + + break; **** HERE ***/ + + case iSHL: + if (pstate->JCond.regi == _Icode.ic.ll.dst.regi) + if ((_Icode.ic.ll.flg & I) && _Icode.ic.ll.immed.op == 1) + pstate->JCond.immed *= 2; + else + pstate->JCond.regi = 0; + break; + + case iLEA: + if (_Icode.ic.ll.src.regi == 0) /* direct mem offset */ + pstate->setState( _Icode.ic.ll.dst.regi, _Icode.ic.ll.src.off); + break; + + case iLDS: case iLES: + if ((psym = lookupAddr(&_Icode.ic.ll.src, pstate, 4, eDuVal::USE)) + /* && (Icode.ic.ll.flg & SEG_IMMED) */ ) { + offset = LH(&prog.Image[psym->label]); + pstate->setState( (_Icode.ic.ll.opcode == iLDS)? rDS: rES, + LH(&prog.Image[psym->label + 2])); + pstate->setState( _Icode.ic.ll.dst.regi, (int16)offset); + psym->type = TYPE_PTR; + } + break; + } + } + + if (err) { + this->flg &= ~TERMINATES; + + if (err == INVALID_386OP || err == INVALID_OPCODE) + { + fatalError(err, prog.Image[_Icode.ic.ll.label], _Icode.ic.ll.label); + this->flg |= PROC_BADINST; + } + else if (err == IP_OUT_OF_RANGE) + fatalError (err, _Icode.ic.ll.label); + else + reportError(err, _Icode.ic.ll.label); + } +} + + +/* process_JMP - Handles JMPs, returns TRUE if we should end recursion */ +boolT Function::process_JMP (ICODE * pIcode, STATE *pstate, CALL_GRAPH * pcallGraph) +{ + static byte i2r[4] = {rSI, rDI, rBP, rBX}; + ICODE _Icode; + dword cs, offTable, endTable; + dword i, k, seg, target; + Int tmp; + + if (pIcode->ic.ll.flg & I) + { + if (pIcode->ic.ll.opcode == iJMPF) + pstate->setState( rCS, LH(prog.Image + pIcode->ic.ll.label + 3)); + i = pstate->IP = pIcode->ic.ll.immed.op; + if ((long)i < 0) + { + exit(1); + } + + /* Return TRUE if jump target is already parsed */ + return Icode.labelSrch(i, &tmp); + } + + /* We've got an indirect JMP - look for switch() stmt. idiom of the form + * JMP word ptr word_offset[rBX | rSI | rDI] */ + seg = (pIcode->ic.ll.src.seg)? pIcode->ic.ll.src.seg: rDS; + + /* Ensure we have a word offset & valid seg */ + if (pIcode->ic.ll.opcode == iJMP && (pIcode->ic.ll.flg & WORD_OFF) && + pstate->f[seg] && + (pIcode->ic.ll.src.regi == INDEXBASE + 4 || + pIcode->ic.ll.src.regi == INDEXBASE + 5 || /* Idx reg. BX, SI, DI */ + pIcode->ic.ll.src.regi == INDEXBASE + 7)) + { + + offTable = ((dword)(word)pstate->r[seg] << 4) + pIcode->ic.ll.src.off; + + /* Firstly look for a leading range check of the form:- + * CMP {BX | SI | DI}, immed + * JA | JAE | JB | JBE + * This is stored in the current state as if we had just + * followed a JBE branch (i.e. [reg] lies between 0 - immed). + */ + if (pstate->JCond.regi == i2r[pIcode->ic.ll.src.regi-(INDEXBASE+4)]) + endTable = offTable + pstate->JCond.immed; + else + endTable = (dword)prog.cbImage; + + /* Search for first byte flagged after start of table */ + for (i = offTable; i <= endTable; i++) + if (BITMAP(i, BM_CODE | BM_DATA)) + break; + endTable = i & ~1; /* Max. possible table size */ + + /* Now do some heuristic pruning. Look for ptrs. into the table + * and for addresses that don't appear to point to valid code. + */ + cs = (dword)(word)pstate->r[rCS] << 4; + for (i = offTable; i < endTable; i += 2) + { + target = cs + LH(&prog.Image[i]); + if (target < endTable && target >= offTable) + endTable = target; + else if (target >= (dword)prog.cbImage) + endTable = i; + } + + for (i = offTable; i < endTable; i += 2) + { + target = cs + LH(&prog.Image[i]); + /* Be wary of 00 00 as code - it's probably data */ + if (! (prog.Image[target] || prog.Image[target+1]) || + scan(target, &_Icode)) + endTable = i; + } + + /* Now for each entry in the table take a copy of the current + * state and recursively call FollowCtrl(). */ + if (offTable < endTable) + { + STATE StCopy; + Int ip; + dword *psw; + + setBits(BM_DATA, offTable, endTable - offTable); + + pIcode->ic.ll.flg |= SWITCH; + pIcode->ic.ll.caseTbl.numEntries = (endTable - offTable) / 2; + psw = (dword*)allocMem(pIcode->ic.ll.caseTbl.numEntries*sizeof(dword)); + pIcode->ic.ll.caseTbl.entries = psw; + + for (i = offTable, k = 0; i < endTable; i += 2) + { + memcpy(&StCopy, pstate, sizeof(STATE)); + StCopy.IP = cs + LH(&prog.Image[i]); + ip = Icode.GetNumIcodes(); + + FollowCtrl (pcallGraph, &StCopy); + + Icode.GetIcode(ip)->ic.ll.caseTbl.numEntries = k++; + Icode.GetIcode(ip)->ic.ll.flg |= CASE; + *psw++ = Icode.GetLlLabel(ip); + } + return TRUE; + } + } + + /* Can't do anything with this jump */ + + flg |= PROC_IJMP; + flg &= ~TERMINATES; + interactDis(this, this->Icode.GetNumIcodes()-1); + return TRUE; +} + + +/* Process procedure call. + * Note: We assume that CALL's will return unless there is good evidence to + * the contrary - thus we return FALSE unless all paths in the called + * procedure end in DOS exits. This is reasonable since C procedures + * will always include the epilogue after the call anyway and it's to + * be assumed that if an assembler program contains a CALL that the + * programmer expected it to come back - otherwise surely a JMP would + * have been used. */ +boolT Function::process_CALL (ICODE * pIcode, CALL_GRAPH * pcallGraph, STATE *pstate) +{ + Int ip = Icode.GetNumIcodes() - 1; + STATE localState; /* Local copy of the machine state */ + dword off; + boolT indirect; + + /* For Indirect Calls, find the function address */ + indirect = FALSE; + if (! (pIcode->ic.ll.flg & I)) + { + /* Not immediate, i.e. indirect call */ + + if (pIcode->ic.ll.dst.regi && (!option.Calls)) + { + /* We have not set the brave option to attempt to follow + the execution path through register indirect calls. + So we just exit this function, and ignore the call. + We probably should not have parsed this deep, anyway. + */ + return FALSE; + } + + /* Offset into program image is seg:off of read input */ + /* Note: this assumes that the pointer itself is at + es:0 where es:0 is the start of the image. This is + usually wrong! Consider also CALL [BP+0E] in which the + segment for the pointer is in SS! - Mike */ + + off = (dword)(word)pIcode->ic.ll.dst.off + + ((dword)(word)pIcode->ic.ll.dst.segValue << 4); + + /* Address of function is given by 4 (CALLF) or 2 (CALL) bytes at + * previous offset into the program image */ + if (pIcode->ic.ll.opcode == iCALLF) + pIcode->ic.ll.immed.op = LH(&prog.Image[off]) + + ((dword)(LH(&prog.Image[off+2])) << 4); + else + pIcode->ic.ll.immed.op = LH(&prog.Image[off]) + ((dword)(word)state.r[rCS] << 4); + pIcode->ic.ll.flg |= I; + indirect = TRUE; + } + + /* Process CALL. Function address is located in pIcode->ic.ll.immed.op */ + if (pIcode->ic.ll.flg & I) + { + /* Search procedure list for one with appropriate entry point */ + std::list::iterator iter= std::find_if(pProcList.begin(),pProcList.end(), + [pIcode](const Function &f) -> + bool { return f.procEntry==pIcode->ic.ll.immed.op; }); + + /* Create a new procedure node and save copy of the state */ + if (iter==pProcList.end()) + { + pProcList.push_back(Function()); + Function &x(pProcList.back()); + iter = (++pProcList.rbegin()).base(); + x.procEntry = pIcode->ic.ll.immed.op; + LibCheck(x); + + if (x.flg & PROC_ISLIB) + { + /* A library function. No need to do any more to it */ + pcallGraph->insertCallGraph (this, iter); + iter = (++pProcList.rbegin()).base(); + + Icode.GetIcode(ip)->ic.ll.immed.proc.proc = &x; + return false; + } + + if (indirect) + x.flg |= PROC_ICALL; + + if (x.name[0] == '\0') /* Don't overwrite existing name */ + { + sprintf(x.name, "proc_%ld", ++prog.cProcs); + } + x.depth = x.depth + 1; + x.flg |= TERMINATES; + + /* Save machine state in localState, load up IP and CS.*/ + localState = *pstate; + pstate->IP = pIcode->ic.ll.immed.op; + if (pIcode->ic.ll.opcode == iCALLF) + pstate->setState( rCS, LH(prog.Image + pIcode->ic.ll.label + 3)); + x.state = *pstate; + + /* Insert new procedure in call graph */ + pcallGraph->insertCallGraph (this, iter); + + /* Process new procedure */ + x.FollowCtrl (pcallGraph, pstate); + + /* Restore segment registers & IP from localState */ + pstate->IP = localState.IP; + pstate->setState( rCS, localState.r[rCS]); + pstate->setState( rDS, localState.r[rDS]); + pstate->setState( rES, localState.r[rES]); + pstate->setState( rSS, localState.r[rSS]); + + } + else + pcallGraph->insertCallGraph (this, iter); + + Icode.GetIcode(ip)->ic.ll.immed.proc.proc = &(*iter); // ^ target proc + /* return ((p->flg & TERMINATES) != 0); */ + return FALSE; + } + return FALSE; // Cristina, please check!! +} + + +/* process_MOV - Handles state changes due to simple assignments */ +static void process_MOV(ICODE * pIcode, STATE * pstate) +{ + SYM * psym, *psym2; /* Pointer to symbol in global symbol table */ + byte dstReg = pIcode->ic.ll.dst.regi; + byte srcReg = pIcode->ic.ll.src.regi; + + if (dstReg > 0 && dstReg < INDEXBASE) + { + if (pIcode->ic.ll.flg & I) + pstate->setState( dstReg, (int16)pIcode->ic.ll.immed.op); + else if (srcReg == 0) /* direct memory offset */ + { + psym = lookupAddr(&pIcode->ic.ll.src, pstate, 2, eDuVal::USE); + if (psym && ((psym->flg & SEG_IMMED) || psym->duVal.val)) + pstate->setState( dstReg, LH(&prog.Image[psym->label])); + } + else if (srcReg < INDEXBASE && pstate->f[srcReg]) /* reg */ + { + pstate->setState( dstReg, pstate->r[srcReg]); + + /* Follow moves of the possible index register */ + if (pstate->JCond.regi == srcReg) + pstate->JCond.regi = dstReg; + } + } + else if (dstReg == 0) { /* direct memory offset */ + psym = lookupAddr (&pIcode->ic.ll.dst, pstate, 2, eDEF); + if (psym && ! (psym->duVal.val)) /* no initial value yet */ + if (pIcode->ic.ll.flg & I) { /* immediate */ + prog.Image[psym->label] = (byte)pIcode->ic.ll.immed.op; + prog.Image[psym->label+1] = (byte)(pIcode->ic.ll.immed.op>>8); + psym->duVal.val = 1; + } + else if (srcReg == 0) { /* direct mem offset */ + psym2 = lookupAddr (&pIcode->ic.ll.src, pstate, 2, eDuVal::USE); + if (psym2 && ((psym->flg & SEG_IMMED) || (psym->duVal.val))) + { + prog.Image[psym->label] = (byte)prog.Image[psym2->label]; + prog.Image[psym->label+1] = + (byte)(prog.Image[psym2->label+1] >> 8); + psym->duVal.val=1; + } + } + else if (srcReg < INDEXBASE && pstate->f[srcReg]) /* reg */ + { + prog.Image[psym->label] = (byte)pstate->r[srcReg]; + prog.Image[psym->label+1] = (byte)(pstate->r[srcReg] >> 8); + psym->duVal.val; + } + } +} + + +/* Type of the symbol according to the number of bytes it uses */ +static hlType cbType[] = {TYPE_UNKNOWN, TYPE_BYTE_UNSIGN, TYPE_WORD_SIGN, + TYPE_UNKNOWN, TYPE_LONG_SIGN}; + +/* Creates an entry in the global symbol table (symtab) if the variable + * is not there yet. If it is part of the symtab, the size of the variable + * is checked and updated if the old size was less than the new size (ie. + * the maximum size is always saved). */ +static SYM * updateGlobSym (dword operand, Int size, word duFlag) +{ Int i; + + /* Check for symbol in symbol table */ + for (i = 0; i < symtab.csym; i++) + if (symtab.sym[i].label == operand) { + if (symtab.sym[i].size < size) + symtab.sym[i].size = size; + break; + } + + /* New symbol, not in symbol table */ + if (i == symtab.csym) { + if (++symtab.csym > symtab.alloc) { + symtab.alloc += 5; + symtab.sym = (SYM *)reallocVar(symtab.sym, symtab.alloc * sizeof(SYM)); + memset (&symtab.sym[i], 0, 5 * sizeof(SYM)); + } + sprintf (symtab.sym[i].name, "var%05lX", operand); + symtab.sym[i].label = operand; + symtab.sym[i].size = size; + symtab.sym[i].type = cbType[size]; + if (duFlag == eDuVal::USE) /* must already have init value */ + { + symtab.sym[i].duVal.use =1; // USEVAL; + symtab.sym[i].duVal.val =1; + } + else + { + symtab.sym[i].duVal.setFlags(duFlag); + } + } + return (&symtab.sym[i]); +} + + +/* Updates the offset entry to the stack frame table (arguments), + * and returns a pointer to such entry. */ +static void updateFrameOff (STKFRAME * ps, int16 off, Int size, word duFlag) +{ + Int i; + + /* Check for symbol in stack frame table */ + for (i = 0; i < ps->sym.size(); i++) + { + if (ps->sym[i].off == off) + { + if (ps->sym[i].size < size) + { + ps->sym[i].size = size; + } + break; + } + } + + /* New symbol, not in table */ + if (i == ps->sym.size()) + { + STKSYM new_sym; + sprintf (new_sym.name, "arg%ld", i); + new_sym.off = off; + new_sym.size = size; + new_sym.type = cbType[size]; + if (duFlag == eDuVal::USE) /* must already have init value */ + { + new_sym.duVal.use=1; + //new_sym.duVal.val=1; + } + else + { + new_sym.duVal.setFlags(duFlag); + } + ps->sym.push_back(new_sym); + ps->numArgs++; + } + + /* Save maximum argument offset */ + if ((dword)ps->maxOff < (off + (dword)size)) + ps->maxOff = off + (int16)size; +} + + +/* lookupAddr - Looks up a data reference in the symbol table and stores it + * if necessary. + * Returns a pointer to the symbol in the + * symbol table, or Null if it's not a direct memory offset. */ +static SYM * lookupAddr (ICODEMEM *pm, STATE *pstate, Int size, word duFlag) +{ + Int i; + SYM * psym; + dword operand; + + if (pm->regi == 0) { /* Global var */ + if (pm->segValue) { /* there is a value in the seg field */ + operand = opAdr (pm->segValue, pm->off); + psym = updateGlobSym (operand, size, duFlag); + + /* Check for out of bounds */ + if (psym->label >= (dword)prog.cbImage) + return (NULL); + return (psym); + } + else if (pstate->f[pm->seg]) { /* new value */ + pm->segValue = pstate->r[pm->seg]; + operand = opAdr(pm->segValue, pm->off); + i = symtab.csym; + psym = updateGlobSym (operand, size, duFlag); + + /* Flag new memory locations that are segment values */ + if (symtab.csym > i) { + if (size == 4) + operand += 2; /* High word */ + for (i = 0; i < prog.cReloc; i++) + if (prog.relocTable[i] == operand) { + psym->flg = SEG_IMMED; + break; + } + } + + /* Check for out of bounds */ + if (psym->label >= (dword)prog.cbImage) + return (NULL); + return (psym); + } + } + return (NULL); +} + + +/* setState - Assigns a value to a reg. */ +void STATE::setState(word reg, int16 value) +{ + value &= 0xFFFF; + r[reg] = value; + f[reg] = TRUE; + switch (reg) { + case rAX: case rCX: case rDX: case rBX: + r[reg + rAL - rAX] = value & 0xFF; + f[reg + rAL - rAX] = TRUE; + r[reg + rAH - rAX] = (value >> 8) & 0xFF; + f[reg + rAH - rAX] = TRUE; + break; + + case rAL: case rCL: case rDL: case rBL: + if (f[reg - rAL + rAH]) { + r[reg - rAL + rAX] =(r[reg - rAL + rAH] << 8) + (value & 0xFF); + f[reg - rAL + rAX] = TRUE; + } + break; + + case rAH: case rCH: case rDH: case rBH: + if (f[reg - rAH + rAL]) + { + r[reg - rAH + rAX] = r[reg - rAH + rAL] + ((value & 0xFF) << 8); + f[reg - rAH + rAX] = TRUE; + } + break; + } +} + + +/* labelSrchRepl - Searches Icode for instruction with label = target, and + replaces *pIndex with an icode index */ +boolT labelSrch(ICODE *pIcode, Int numIp, dword target, Int *pIndex) +{ + Int i; + + for (i = 0; i < numIp; i++) + { + if (pIcode[i].ic.ll.label == target) + { + *pIndex = i; + return TRUE; + } + } + return FALSE; +} + + +static void setBits(int16 type, dword start, dword len) +/* setBits - Sets memory bitmap bits for BM_CODE or BM_DATA (additively) */ +{ + dword i; + + if (start < (dword)prog.cbImage) + { + if (start + len > (dword)prog.cbImage) + len = (dword)(prog.cbImage - start); + + for (i = start + len - 1; i >= start; i--) + { + prog.map[i >> 2] |= type << ((i & 3) << 1); + if (i == 0) break; // Fixes inf loop! + } + } +} + + +/* DU bit definitions for each reg value - including index registers */ +dword duReg[] = { 0x00, + 0x11001, 0x22002, 0x44004, 0x88008, /* word regs */ + 0x10, 0x20, 0x40, 0x80, + 0x100, 0x200, 0x400, 0x800, /* seg regs */ + 0x1000, 0x2000, 0x4000, 0x8000, /* byte regs */ + 0x10000, 0x20000, 0x40000, 0x80000, + 0x100000, /* tmp reg */ + 0x48, 0x88, 0x60, 0xA0, /* index regs */ + 0x40, 0x80, 0x20, 0x08 }; + + +/* Checks which registers where used and updates the du.u flag. + * Places local variables on the local symbol table. + * Arguments: d : SRC or DST icode operand + * pIcode: ptr to icode instruction + * pProc : ptr to current procedure structure + * pstate: ptr to current procedure state + * size : size of the operand + * ix : current index into icode array */ +static void use (opLoc d, ICODE * pIcode, Function * pProc, STATE * pstate, Int size, + Int ix) +{ + ICODEMEM * pm = (d == SRC)? &pIcode->ic.ll.src: &pIcode->ic.ll.dst; + SYM * psym; + + if (pm->regi == 0 || pm->regi >= INDEXBASE) + { + if (pm->regi == INDEXBASE + 6) /* indexed on bp */ + { + if (pm->off >= 2) + updateFrameOff (&pProc->args, pm->off, size, eDuVal::USE); + else if (pm->off < 0) + pProc->localId.newByteWordStk (TYPE_WORD_SIGN, pm->off, 0); + } + + else if (pm->regi == INDEXBASE + 2 || pm->regi == INDEXBASE + 3) + pProc->localId.newByteWordStk (TYPE_WORD_SIGN, pm->off, + (byte)((pm->regi == INDEXBASE + 2) ? rSI : rDI)); + + else if ((pm->regi >= INDEXBASE + 4) && (pm->regi <= INDEXBASE + 7)) + { + if ((pm->seg == rDS) && (pm->regi == INDEXBASE + 7)) /* bx */ + { + if (pm->off > 0) /* global indexed variable */ + pProc->localId.newIntIdx(pm->segValue, pm->off, rBX,ix, TYPE_WORD_SIGN); + } + pIcode->du.use |= duReg[pm->regi]; + } + + else if (psym = lookupAddr(pm, pstate, size, eDuVal::USE)) + { + setBits (BM_DATA, psym->label, (dword)size); + pIcode->ic.ll.flg |= SYM_USE; + pIcode->ic.ll.caseTbl.numEntries = psym - symtab.sym; + } + } + + /* Use of register */ + else if ((d == DST) || ((d == SRC) && (pIcode->ic.ll.flg & I) != I)) + pIcode->du.use |= duReg[pm->regi]; +} + + +static void def (opLoc d, ICODE * pIcode, Function * pProc, STATE * pstate, Int size, + Int ix) +/* Checks which registers were defined (ie. got a new value) and updates the + * du.d flag. + * Places local variables in the local symbol table. */ +{ + ICODEMEM *pm = (d == SRC)? &pIcode->ic.ll.src: &pIcode->ic.ll.dst; + SYM * psym; + + if (pm->regi == 0 || pm->regi >= INDEXBASE) + { + if (pm->regi == INDEXBASE + 6) /* indexed on bp */ + { + if (pm->off >= 2) + updateFrameOff (&pProc->args, pm->off, size, eDEF); + else if (pm->off < 0) + pProc->localId.newByteWordStk (TYPE_WORD_SIGN, pm->off, 0); + } + + else if (pm->regi == INDEXBASE + 2 || pm->regi == INDEXBASE + 3) + { + pProc->localId.newByteWordStk(TYPE_WORD_SIGN, pm->off, + (byte)((pm->regi == INDEXBASE + 2) ? rSI : rDI)); + } + + else if ((pm->regi >= INDEXBASE + 4) && (pm->regi <= INDEXBASE + 7)) + { + if ((pm->seg == rDS) && (pm->regi == INDEXBASE + 7)) /* bx */ + { + if (pm->off > 0) /* global var */ + pProc->localId.newIntIdx(pm->segValue, pm->off, rBX,ix, TYPE_WORD_SIGN); + } + pIcode->du.use |= duReg[pm->regi]; + } + + else if (psym = lookupAddr(pm, pstate, size, eDEF)) + { + setBits(BM_DATA, psym->label, (dword)size); + pIcode->ic.ll.flg |= SYM_DEF; + pIcode->ic.ll.caseTbl.numEntries = psym - symtab.sym; + } + } + + /* Definition of register */ + else if ((d == DST) || ((d == SRC) && (pIcode->ic.ll.flg & I) != I)) + { + pIcode->du.def |= duReg[pm->regi]; + pIcode->du1.numRegsDef++; + } +} + + +/* use_def - operand is both use and def'd. + * Note: the destination will always be a register, stack variable, or global + * variable. */ +static void use_def(opLoc d, ICODE * pIcode, Function * pProc, STATE * pstate, Int cb, + Int ix) +{ + ICODEMEM * pm = (d == SRC)? &pIcode->ic.ll.src: &pIcode->ic.ll.dst; + + use (d, pIcode, pProc, pstate, cb, ix); + + if (pm->regi < INDEXBASE) /* register */ + { + pIcode->du.def |= duReg[pm->regi]; + pIcode->du1.numRegsDef++; + } +} + + +/* Set DU vector, local variables and arguments, and DATA bits in the + * bitmap */ +void Function::process_operands(ICODE * pIcode, STATE * pstate) +{ + Int ix=Icode.GetNumIcodes(); + Int i; + Int sseg = (pIcode->ic.ll.src.seg)? pIcode->ic.ll.src.seg: rDS; + Int cb = (pIcode->ic.ll.flg & B) ? 1: 2; + flags32 Imm = (pIcode->ic.ll.flg & I); + + switch (pIcode->ic.ll.opcode) { + case iAND: case iOR: case iXOR: + case iSAR: case iSHL: case iSHR: + case iRCL: case iRCR: case iROL: case iROR: + case iADD: case iADC: case iSUB: case iSBB: + if (! Imm) { + use(SRC, pIcode, this, pstate, cb, ix); + } + case iINC: case iDEC: case iNEG: case iNOT: + case iAAA: case iAAD: case iAAM: case iAAS: + case iDAA: case iDAS: + use_def(DST, pIcode, this, pstate, cb, ix); + break; + + case iXCHG: + /* This instruction is replaced by 3 instructions, only need + * to define the src operand and use the destination operand + * in the mean time. */ + use(SRC, pIcode, this, pstate, cb, ix); + def(DST, pIcode, this, pstate, cb, ix); + break; + + case iTEST: case iCMP: + if (! Imm) + use(SRC, pIcode, this, pstate, cb, ix); + use(DST, pIcode, this, pstate, cb, ix); + break; + + case iDIV: case iIDIV: + use(SRC, pIcode, this, pstate, cb, ix); + if (cb == 1) + pIcode->du.use |= duReg[rTMP]; + break; + + case iMUL: case iIMUL: + use(SRC, pIcode, this, pstate, cb, ix); + if (! Imm) + { + use (DST, pIcode, this, pstate, cb, ix); + if (cb == 1) + { + pIcode->du.def |= duReg[rAX]; + pIcode->du1.numRegsDef++; + } + else + { + pIcode->du.def |= (duReg[rAX] | duReg[rDX]); + pIcode->du1.numRegsDef += 2; + } + } + else + def (DST, pIcode, this, pstate, cb, ix); + break; + + case iSIGNEX: + cb = (pIcode->ic.ll.flg & SRC_B) ? 1 : 2; + if (cb == 1) /* byte */ + { + pIcode->du.def |= duReg[rAX]; + pIcode->du1.numRegsDef++; + pIcode->du.use |= duReg[rAL]; + } + else /* word */ + { + pIcode->du.def |= (duReg[rDX] | duReg[rAX]); + pIcode->du1.numRegsDef += 2; + pIcode->du.use |= duReg[rAX]; + } + break; + + case iCALLF: /* Ignore def's on CS for now */ + cb = 4; + case iCALL: case iPUSH: case iPOP: + if (! Imm) { + if (pIcode->ic.ll.opcode == iPOP) + def(DST, pIcode, this, pstate, cb, ix); + else + use(DST, pIcode, this, pstate, cb, ix); + } + break; + + case iESC: /* operands may be larger */ + use(DST, pIcode, this, pstate, cb, ix); + break; + + case iLDS: case iLES: + pIcode->du.def |= duReg[(pIcode->ic.ll.opcode == iLDS) ? rDS : rES]; + pIcode->du1.numRegsDef++; + cb = 4; + case iMOV: + use(SRC, pIcode, this, pstate, cb, ix); + def(DST, pIcode, this, pstate, cb, ix); + break; + + case iLEA: + use(SRC, pIcode, this, pstate, 2, ix); + def(DST, pIcode, this, pstate, 2, ix); + break; + + case iBOUND: + use(SRC, pIcode, this, pstate, 4, ix); + use(DST, pIcode, this, pstate, cb, ix); + break; + + case iJMPF: + cb = 4; + case iJMP: + if (! Imm) + use(SRC, pIcode, this, pstate, cb, ix); + break; + + case iLOOP: case iLOOPE: case iLOOPNE: + pIcode->du.def |= duReg[rCX]; + pIcode->du1.numRegsDef++; + case iJCXZ: + pIcode->du.use |= duReg[rCX]; + break; + + case iREPNE_CMPS: case iREPE_CMPS: case iREP_MOVS: + pIcode->du.def |= duReg[rCX]; + pIcode->du1.numRegsDef++; + pIcode->du.use |= duReg[rCX]; + case iCMPS: case iMOVS: + pIcode->du.def |= duReg[rSI] | duReg[rDI]; + pIcode->du1.numRegsDef += 2; + pIcode->du.use |= duReg[rSI] | duReg[rDI] | duReg[rES] | duReg[sseg]; + break; + + case iREPNE_SCAS: case iREPE_SCAS: case iREP_STOS: case iREP_INS: + pIcode->du.def |= duReg[rCX]; + pIcode->du1.numRegsDef++; + pIcode->du.use |= duReg[rCX]; + case iSCAS: case iSTOS: case iINS: + pIcode->du.def |= duReg[rDI]; + pIcode->du1.numRegsDef++; + if (pIcode->ic.ll.opcode == iREP_INS || pIcode->ic.ll.opcode== iINS) + { + pIcode->du.use |= duReg[rDI] | duReg[rES] | duReg[rDX]; + } + else + { + pIcode->du.use |= duReg[rDI] | duReg[rES] | duReg[(cb == 2)? rAX: rAL]; + } + break; + + case iREP_LODS: + pIcode->du.def |= duReg[rCX]; + pIcode->du1.numRegsDef++; + pIcode->du.use |= duReg[rCX]; + case iLODS: + pIcode->du.def |= duReg[rSI] | duReg[(cb==2)? rAX: rAL]; + pIcode->du1.numRegsDef += 2; + pIcode->du.use |= duReg[rSI] | duReg[sseg]; + break; + + case iREP_OUTS: + pIcode->du.def |= duReg[rCX]; + pIcode->du1.numRegsDef++; + pIcode->du.use |= duReg[rCX]; + case iOUTS: + pIcode->du.def |= duReg[rSI]; + pIcode->du1.numRegsDef++; + pIcode->du.use |= duReg[rSI] | duReg[rDX] | duReg[sseg]; + break; + + case iIN: case iOUT: + def(DST, pIcode, this, pstate, cb, ix); + if (! Imm) + { + pIcode->du.use |= duReg[rDX]; + } + break; + } + + for (i = rSP; i <= rBH; i++) /* Kill all defined registers */ + if (pIcode->ic.ll.flagDU.d & (1 << i)) + pstate->f[i] = FALSE; +} + diff --git a/src/perfhlib.cpp b/src/perfhlib.cpp new file mode 100644 index 0000000..920328f --- /dev/null +++ b/src/perfhlib.cpp @@ -0,0 +1,448 @@ +/* + * Perfect hashing function library. Contains functions to generate perfect + * hashing functions + * (C) Mike van Emmerik + */ + +#include +#include +#include +#include "perfhlib.h" + +/* Private data structures */ + +static int NumEntry; /* Number of entries in the hash table (# keys) */ +static int EntryLen; /* Size (bytes) of each entry (size of keys) */ +static int SetSize; /* Size of the char set */ +static char SetMin; /* First char in the set */ +static int NumVert; /* c times NumEntry */ + +static word *T1base, *T2base; /* Pointers to start of T1, T2 */ +static word *T1, *T2; /* Pointers to T1[i], T2[i] */ + +static int *graphNode; /* The array of edges */ +static int *graphNext; /* Linked list of edges */ +static int *graphFirst;/* First edge at a vertex */ + +static short *g; /* g[] */ + +static int numEdges; /* An edge counter */ +static bool *visited; /* Array of bools: whether visited */ + +/* Private prototypes */ +static void initGraph(void); +static void addToGraph(int e, int v1, int v2); +static bool isCycle(void); +static void duplicateKeys(int v1, int v2); + +void +hashParams(int _NumEntry, int _EntryLen, int _SetSize, char _SetMin, + int _NumVert) +{ + /* These parameters are stored in statics so as to obviate the need for + passing all these (or defererencing pointers) for every call to hash() + */ + + NumEntry = _NumEntry; + EntryLen = _EntryLen; + SetSize = _SetSize; + SetMin = _SetMin; + NumVert = _NumVert; + + /* Allocate the variable sized tables etc */ + if ((T1base = (word *)malloc(EntryLen * SetSize * sizeof(word))) == 0) + { + goto BadAlloc; + } + if ((T2base = (word *)malloc(EntryLen * SetSize * sizeof(word))) == 0) + { + goto BadAlloc; + } + + if ((graphNode = (int *)malloc((NumEntry*2 + 1) * sizeof(int))) == 0) + { + goto BadAlloc; + } + if ((graphNext = (int *)malloc((NumEntry*2 + 1) * sizeof(int))) == 0) + { + goto BadAlloc; + } + if ((graphFirst = (int *)malloc((NumVert + 1) * sizeof(int))) == 0) + { + goto BadAlloc; + } + + if ((g = (short *)malloc((NumVert+1) * sizeof(short))) == 0) + { + goto BadAlloc; + } + if ((visited = (bool *)malloc((NumVert+1) * sizeof(bool))) == 0) + { + goto BadAlloc; + } + return; + +BadAlloc: + printf("Could not allocate memory\n"); + hashCleanup(); + exit(1); +} + +void +hashCleanup(void) +{ + /* Free the storage for variable sized tables etc */ + if (T1base) free(T1base); + if (T2base) free(T2base); + if (graphNode) free(graphNode); + if (graphNext) free(graphNext); + if (graphFirst) free(graphFirst); + if (g) free(g); +} + +void +map(void) +{ + int i, j, c; + word f1, f2; + bool cycle; + byte *keys; + + c = 0; + + do + { + initGraph(); + cycle = FALSE; + + /* Randomly generate T1 and T2 */ + for (i=0; i < SetSize*EntryLen; i++) + { + T1base[i] = rand() % NumVert; + T2base[i] = rand() % NumVert; + } + + for (i=0; i < NumEntry; i++) + { + f1 = 0; f2 = 0; + getKey(i, &keys); + for (j=0; j < EntryLen; j++) + { + T1 = T1base + j * SetSize; + T2 = T2base + j * SetSize; + f1 += T1[keys[j] - SetMin]; + f2 += T2[keys[j] - SetMin]; + } + f1 %= (word)NumVert; + f2 %= (word)NumVert; + if (f1 == f2) + { + /* A self loop. Reject! */ + printf("Self loop on vertex %d!\n", f1); + cycle = TRUE; + break; + } + addToGraph(numEdges++, f1, f2); + } + if (cycle || (cycle = isCycle())) /* OK - is there a cycle? */ + { + printf("Iteration %d\n", ++c); + } + else + { + break; + } + } + while (/* there is a cycle */ 1); + +} + +/* Initialise the graph */ +static void +initGraph(void) +{ + int i; + + for (i=1; i <= NumVert; i++) + { + graphFirst[i] = 0; + } + + for (i= -NumEntry; i <= NumEntry; i++) + { + /* No need to init graphNode[] as they will all be filled by successive + calls to addToGraph() */ + graphNext[NumEntry+i] = 0; + } + + numEdges = 0; +} + +/* Add an edge e between vertices v1 and v2 */ +/* e, v1, v2 are 0 based */ +static void +addToGraph(int e, int v1, int v2) +{ + e++; v1++; v2++; /* So much more convenient */ + + graphNode[NumEntry+e] = v2; /* Insert the edge information */ + graphNode[NumEntry-e] = v1; + + graphNext[NumEntry+e] = graphFirst[v1]; /* Insert v1 to list of alphas */ + graphFirst[v1]= e; + graphNext[NumEntry-e] = graphFirst[v2]; /* Insert v2 to list of omegas */ + graphFirst[v2]= -e; + +} + +bool DFS(int parentE, int v) +{ + int e, w; + + /* Depth first search of the graph, starting at vertex v, looking for + cycles. parent and v are origin 1. Note parent is an EDGE, + not a vertex */ + + visited[v] = TRUE; + + /* For each e incident with v .. */ + for (e = graphFirst[v]; e; e = graphNext[NumEntry+e]) + { + byte *key1; + + getKey(abs(e)-1, &key1); + if (*(long *)key1 == 0) + { + /* A deleted key. Just ignore it */ + continue; + } + w = graphNode[NumEntry+e]; + if (visited[w]) + { + /* Did we just come through this edge? If so, ignore it. */ + if (abs(e) != abs(parentE)) + { + /* There is a cycle in the graph. There is some subtle code here + to work around the distinct possibility that there may be + duplicate keys. Duplicate keys will always cause unit + cycles, since f1 and f2 (used to select v and w) will be the + same for both. The edges (representing an index into the + array of keys) are distinct, but the key values are not. + The logic is as follows: for the candidate edge e, check to + see if it terminates in the parent vertex. If so, we test + the keys associated with e and the parent, and if they are + the same, we can safely ignore e for the purposes of cycle + detection, since edge e adds nothing to the cycle. Cycles + involving v, w, and e0 will still be found. The parent + edge was not similarly eliminated because at the time when + it was a candidate, v was not yet visited. + We still have to remove the key from further consideration, + since each edge is visited twice, but with a different + parent edge each time. + */ + /* We save some stack space by calculating the parent vertex + for these relatively few cases where it is needed */ + int parentV = graphNode[NumEntry-parentE]; + + if (w == parentV) + { + byte *key2; + + getKey(abs(parentE)-1, &key2); + if (memcmp(key1, key2, EntryLen) == 0) + { + printf("Duplicate keys with edges %d and %d (", + e, parentE); + dispKey(abs(e)-1); + printf(" & "); + dispKey(abs(parentE)-1); + printf(")\n"); +/* *(long *)key1 = 0; /* Wipe the key */ +memset(key1, 0, EntryLen); + } + else + { + /* A genuine (unit) cycle. */ +printf("There is a unit cycle involving vertex %d and edge %d\n", v, e); + return TRUE; + } + + } + else + { + /* We have reached a previously visited vertex not the + parent. Therefore, we have uncovered a genuine cycle */ +printf("There is a cycle involving vertex %d and edge %d\n", v, e); + return TRUE; + + } + } + } + else /* Not yet seen. Traverse it */ + { + if (DFS(e, w)) + { + /* Cycle found deeper down. Exit */ + return TRUE; + } + } + } + return FALSE; +} + +static bool +isCycle(void) +{ + int v; + + for (v=1; v <= NumVert; v++) + { + visited[v] = FALSE; + } + for (v=1; v <= NumVert; v++) + { + if (!visited[v]) + { + if (DFS(-32767, v)) + { + return TRUE; + } + } + } + return FALSE; +} + +void +traverse(int u) +{ + int w, e; + + visited[u] = TRUE; + /* Find w, the neighbours of u, by searching the edges e associated with u */ + e = graphFirst[1+u]; + while (e) + { + w = graphNode[NumEntry+e]-1; + if (!visited[w]) + { + g[w] = (abs(e)-1 - g[u]) % NumEntry; + if (g[w] < 0) g[w] += NumEntry; /* Keep these positive */ + traverse(w); + } + e = graphNext[NumEntry+e]; + } + +} + +void +assign(void) +{ + int v; + + + for (v=0; v < NumVert; v++) + { + g[v] = 0; /* g is sparse; leave the gaps 0 */ + visited[v] = FALSE; + } + + for (v=0; v < NumVert; v++) + { + if (!visited[v]) + { + g[v] = 0; + traverse(v); + } + } +} + +int +hash(byte *string) +{ + word u, v; + int j; + + u = 0; + for (j=0; j < EntryLen; j++) + { + T1 = T1base + j * SetSize; + u += T1[string[j] - SetMin]; + } + u %= NumVert; + + v = 0; + for (j=0; j < EntryLen; j++) + { + T2 = T2base + j * SetSize; + v += T2[string[j] - SetMin]; + } + v %= NumVert; + + return (g[u] + g[v]) % NumEntry; +} + +word * +readT1(void) +{ + return T1base; +} + +word * +readT2(void) +{ + return T2base; +} + +word * +readG(void) +{ + return (word *)g; +} + +#if 0 +void dispRecord(int i); + +void +duplicateKeys(int v1, int v2) +{ + int i, j; + byte *keys; + int u, v; + + v1--; v2--; /* These guys are origin 1 */ + + printf("Duplicate keys:\n"); + + for (i=0; i < NumEntry; i++) + { + getKey(i, &keys); + u = 0; + for (j=0; j < EntryLen; j++) + { + T1 = T1base + j * SetSize; + u += T1[keys[j] - SetMin]; + } + u %= NumVert; + if ((u != v1) && (u != v2)) continue; + + v = 0; + for (j=0; j < EntryLen; j++) + { + T2 = T2base + j * SetSize; + v += T2[keys[j] - SetMin]; + } + v %= NumVert; + + if ((v == v2) || (v == v1)) + { + printf("Entry #%d key: ", i+1); + for (j=0; j < EntryLen; j++) printf("%02X ", keys[j]); + printf("\n"); + dispRecord(i+1); + } + } + exit(1); + + +} +#endif diff --git a/src/procs.cpp b/src/procs.cpp new file mode 100644 index 0000000..24aba25 --- /dev/null +++ b/src/procs.cpp @@ -0,0 +1,394 @@ +/* + * File: procs.c + * Purpose: Functions to support Call graphs and procedures + * Date: November 1993 + * (C) Cristina Cifuentes + */ + +#include +#include +#include "dcc.h" + + +/* Static indentation buffer */ +#define indSize 61 /* size of indentation buffer; max 20 */ +static char indentBuf[indSize] = + " "; + +static char *indent (Int indLevel) +/* Indentation according to the depth of the statement */ +{ + return (&indentBuf[indSize-(indLevel*3)-1]); +} + + +/* Inserts an outEdge at the current callGraph pointer if the newProc does + * not exist. */ +void CALL_GRAPH::insertArc (ilFunction newProc) +{ + CALL_GRAPH *pcg; + Int i; + + /* Check if procedure already exists */ + for (i = 0; i < outEdges.size(); i++) + if (outEdges[i]->proc == newProc) + return; + + /* Include new arc */ + pcg = new CALL_GRAPH; + pcg->proc = newProc; + outEdges.push_back(pcg); +} + + +/* Inserts a (caller, callee) arc in the call graph tree. */ +boolT CALL_GRAPH::insertCallGraph(ilFunction caller, ilFunction callee) +{ + Int i; + + if (proc == caller) + { + insertArc (callee); + return (TRUE); + } + else + { + for (i = 0; i < outEdges.size(); i++) + if (outEdges[i]->insertCallGraph (caller, callee)) + return true; + return (false); + } +} + +boolT CALL_GRAPH::insertCallGraph(Function *caller, ilFunction callee) +{ + auto iter = std::find_if(pProcList.begin(),pProcList.end(), + [caller](const Function &f)->bool {return caller==&f;}); + assert(iter!=pProcList.end()); + return insertCallGraph(iter,callee); +} + + +/* Displays the current node of the call graph, and invokes recursively on + * the nodes the procedure invokes. */ +void CALL_GRAPH::writeNodeCallGraph(Int indIdx) +{ + Int i; + + printf ("%s%s\n", indent(indIdx), proc->name); + for (i = 0; i < outEdges.size(); i++) + outEdges[i]->writeNodeCallGraph (indIdx + 1); +} + + +/* Writes the header and invokes recursive procedure */ +void CALL_GRAPH::write() +{ + printf ("\nCall Graph:\n"); + writeNodeCallGraph (0); +} + + +/************************************************************************** + * Routines to support arguments + *************************************************************************/ + +/* Updates the argument table by including the register(s) (ie. lhs of + * picode) and the actual expression (ie. rhs of picode). + * Note: register(s) are only included once in the table. */ +void newRegArg (Function * pproc, ICODE *picode, ICODE *ticode) +{ + COND_EXPR *lhs; + STKFRAME * ps, *ts; + ID *id; + Int i, tidx; + boolT regExist; + condId type; + Function * tproc; + byte regL, regH; /* Registers involved in arguments */ + + /* Flag ticode as having register arguments */ + tproc = ticode->ic.hl.oper.call.proc; + tproc->flg |= REG_ARGS; + + /* Get registers and index into target procedure's local list */ + ps = ticode->ic.hl.oper.call.args; + ts = &tproc->args; + lhs = picode->ic.hl.oper.asgn.lhs; + type = lhs->expr.ident.idType; + if (type == REGISTER) + { + regL = pproc->localId.id_arr[lhs->expr.ident.idNode.regiIdx].id.regi; + if (regL < rAL) + tidx = tproc->localId.newByteWordReg(TYPE_WORD_SIGN, regL); + else + tidx = tproc->localId.newByteWordReg(TYPE_BYTE_SIGN, regL); + } + else if (type == LONG_VAR) + { + regL = pproc->localId.id_arr[lhs->expr.ident.idNode.longIdx].id.longId.l; + regH = pproc->localId.id_arr[lhs->expr.ident.idNode.longIdx].id.longId.h; + tidx = tproc->localId.newLongReg(TYPE_LONG_SIGN, regH, regL, 0); + } + + /* Check if register argument already on the formal argument list */ + regExist = FALSE; + for (i = 0; i < ts->sym.size(); i++) + { + if (type == REGISTER) + { + if ((ts->sym[i].regs != NULL) && + (ts->sym[i].regs->expr.ident.idNode.regiIdx == tidx)) + { + regExist = TRUE; + i = ts->sym.size(); + } + } + else if (type == LONG_VAR) + { + if ((ts->sym[i].regs != NULL) && + (ts->sym[i].regs->expr.ident.idNode.longIdx == tidx)) + { + regExist = TRUE; + i = ts->sym.size(); + } + } + } + + /* Do ts (formal arguments) */ + if (regExist == FALSE) + { + STKSYM newsym; + sprintf (newsym.name, "arg%ld", ts->sym.size()); + if (type == REGISTER) + { + if (regL < rAL) + { + newsym.type = TYPE_WORD_SIGN; + newsym.regs = COND_EXPR::idRegIdx(tidx, WORD_REG); + } + else + { + newsym.type = TYPE_BYTE_SIGN; + newsym.regs = COND_EXPR::idRegIdx(tidx, BYTE_REG); + } + sprintf (tproc->localId.id_arr[tidx].name, "arg%ld", ts->sym.size()); + } + else if (type == LONG_VAR) + { + newsym.regs = COND_EXPR::idLongIdx (tidx); + newsym.type = TYPE_LONG_SIGN; + sprintf (tproc->localId.id_arr[tidx].name, "arg%ld", ts->sym.size()); + tproc->localId.propLongId (regL, regH, + tproc->localId.id_arr[tidx].name); + } + ts->sym.push_back(newsym); + ts->numArgs++; + } + + /* Do ps (actual arguments) */ + STKSYM newsym; + sprintf (newsym.name, "arg%ld", ps->sym.size()); + newsym.actual = picode->ic.hl.oper.asgn.rhs; + newsym.regs = lhs; + /* Mask off high and low register(s) in picode */ + switch (type) { + case REGISTER: + id = &pproc->localId.id_arr[lhs->expr.ident.idNode.regiIdx]; + picode->du.def &= maskDuReg[id->id.regi]; + if (id->id.regi < rAL) + newsym.type = TYPE_WORD_SIGN; + else + newsym.type = TYPE_BYTE_SIGN; + break; + case LONG_VAR: + id = &pproc->localId.id_arr[lhs->expr.ident.idNode.longIdx]; + picode->du.def &= maskDuReg[id->id.longId.h]; + picode->du.def &= maskDuReg[id->id.longId.l]; + newsym.type = TYPE_LONG_SIGN; + break; + } + ps->sym.push_back(newsym); + ps->numArgs++; +} + + +/* Allocates num arguments in the actual argument list of the current + * icode picode. */ +/** NOTE: this function is not used ****/ +void allocStkArgs (ICODE *picode, Int num) +{ + STKFRAME * ps; + ps = picode->ic.hl.oper.call.args; + ps->numArgs = num; + ps->sym.resize(num); +} + + +boolT newStkArg (ICODE *picode, COND_EXPR *exp, llIcode opcode, Function * pproc) +/* Inserts the new expression (ie. the actual parameter) on the argument + * list. + * Returns: TRUE if it was a near call that made use of a segment register. + * FALSE elsewhere */ +{ STKFRAME * ps; + byte regi; + + /* Check for far procedure call, in which case, references to segment + * registers are not be considered another parameter (i.e. they are + * long references to another segment) */ + if (exp) + { + if ((exp->type == IDENTIFIER) && (exp->expr.ident.idType == REGISTER)) + { + regi = pproc->localId.id_arr[exp->expr.ident.idNode.regiIdx].id.regi; + if ((regi >= rES) && (regi <= rDS)) + if (opcode == iCALLF) + return (FALSE); + else + return (TRUE); + } + } + + /* Place register argument on the argument list */ + ps = picode->ic.hl.oper.call.args; + STKSYM newsym; + newsym.actual = exp; + ps->sym.push_back(newsym); + ps->numArgs++; + return false; +} + + +/* Places the actual argument exp in the position given by pos in the + * argument list of picode. */ +void placeStkArg (ICODE *picode, COND_EXPR *exp, Int pos) +{ STKFRAME * ps; + + ps = picode->ic.hl.oper.call.args; + ps->sym[pos].actual = exp; + sprintf (ps->sym[pos].name, "arg%ld", pos); +} + + +/* Checks to determine whether the expression (actual argument) has the + * same type as the given type (from the procedure's formal list). If not, + * the actual argument gets modified */ +void adjustActArgType (COND_EXPR *exp, hlType forType, Function * pproc) +{ hlType actType; + Int offset, offL; + + if (exp == NULL) + return; + + actType = expType (exp, pproc); + if ((actType != forType) && (exp->type == IDENTIFIER)) + { + switch (forType) { + case TYPE_UNKNOWN: case TYPE_BYTE_SIGN: + case TYPE_BYTE_UNSIGN: case TYPE_WORD_SIGN: + case TYPE_WORD_UNSIGN: case TYPE_LONG_SIGN: + case TYPE_LONG_UNSIGN: case TYPE_RECORD: + break; + + case TYPE_PTR: + case TYPE_CONST: + break; + + case TYPE_STR: + switch (actType) { + case TYPE_CONST: + /* It's an offset into image where a string is + * found. Point to the string. */ + offL = exp->expr.ident.idNode.kte.kte; + if (prog.fCOM) + offset = (pproc->state.r[rDS]<<4) + offL + 0x100; + else + offset = (pproc->state.r[rDS]<<4) + offL; + exp->expr.ident.idNode.strIdx = offset; + exp->expr.ident.idType = STRING; + break; + + case TYPE_PTR: + /* It's a pointer to a char rather than a pointer to + * an integer */ + /***HERE - modify the type ****/ + break; + + case TYPE_WORD_SIGN: + + break; + } /* eos */ + break; + } + } +} + + +/* Determines whether the formal argument has the same type as the given + * type (type of the actual argument). If not, the formal argument is + * changed its type */ +void STKFRAME::adjustForArgType(Int numArg_, hlType actType_) +{ + hlType forType; + STKSYM * psym, * nsym; + Int off, i; + + /* Find stack offset for this argument */ + off = minOff; + for (i = 0; i < numArg_; i++) + off += sym[i].size; + + /* Find formal argument */ + if (numArg_ < sym.size()) + { + psym = &sym[numArg_]; + i = numArg_; + while ((i < sym.size()) && (psym->off != off)) + { + psym++; + i++; + } + if (numArg_ == sym.size()) + return; + } + /* If formal argument does not exist, do not create new ones, just + * ignore actual argument */ + else + return; + + forType = psym->type; + if (forType != actType_) + { + switch (actType_) { + case TYPE_UNKNOWN: case TYPE_BYTE_SIGN: + case TYPE_BYTE_UNSIGN: case TYPE_WORD_SIGN: + case TYPE_WORD_UNSIGN: case TYPE_RECORD: + break; + + case TYPE_LONG_UNSIGN: case TYPE_LONG_SIGN: + if ((forType == TYPE_WORD_UNSIGN) || + (forType == TYPE_WORD_SIGN) || + (forType == TYPE_UNKNOWN)) + { + /* Merge low and high */ + psym->type = actType_; + psym->size = 4; + nsym = psym + 1; + sprintf (nsym->macro, "HI"); + sprintf (psym->macro, "LO"); + nsym->hasMacro = TRUE; + psym->hasMacro = TRUE; + sprintf (nsym->name, "%s", psym->name); + nsym->invalid = TRUE; + numArgs--; + } + break; + + case TYPE_PTR: + case TYPE_CONST: + case TYPE_STR: + break; + } /* eos */ + } +} + diff --git a/src/proplong.cpp b/src/proplong.cpp new file mode 100644 index 0000000..1ce7c48 --- /dev/null +++ b/src/proplong.cpp @@ -0,0 +1,541 @@ +/************************************************************************** + * File : propLong.c + * Purpose: propagate the value of long variables (local variables and + * registers) along the graph. Structure the graph in this way. + * (C) Cristina Cifuentes + **************************************************************************/ +#include +#include +#include +#include + +#include "dcc.h" + +static boolT isJCond (llIcode opcode) +/* Returns whether the given icode opcode is within the range of valid + * high-level conditional jump icodes (iJB..iJG) */ +{ + if ((opcode >= iJB) && (opcode <= iJG)) + return (TRUE); + return (FALSE); +} + + +static boolT isLong23 (Int i, BB * pbb, ICODE * icode, Int *off, Int *arc) +/* Returns whether the conditions for a 2-3 long variable are satisfied */ +{ BB * t, * e, * obb2; + + if (pbb->nodeType != TWO_BRANCH) + return (FALSE); + t = pbb->edges[THEN].BBptr; + e = pbb->edges[ELSE].BBptr; + + /* Check along the THEN path */ + if ((t->length == 1) && (t->nodeType == TWO_BRANCH) && (t->numInEdges == 1)) + { + obb2 = t->edges[THEN].BBptr; + if ((obb2->length == 2) && (obb2->nodeType == TWO_BRANCH) && + (icode[obb2->start].ic.ll.opcode == iCMP)) + { + *off = obb2->start - i; + *arc = THEN; + return (TRUE); + } + } + + /* Check along the ELSE path */ + else if ((e->length == 1) && (e->nodeType == TWO_BRANCH) && + (e->numInEdges == 1)) + { + obb2 = e->edges[THEN].BBptr; + if ((obb2->length == 2) && (obb2->nodeType == TWO_BRANCH) && + (icode[obb2->start].ic.ll.opcode == iCMP)) + { + *off = obb2->start - i; + *arc = ELSE; + return (TRUE); + } + } + return (FALSE); +} + + +/* Returns whether the conditions for a 2-2 long variable are satisfied */ +static boolT isLong22 (ICODE * pIcode, ICODE * pEnd, Int *off) +{ + if (((pIcode+2) < pEnd) && ((pIcode+2)->ic.ll.opcode == iCMP) && + (isJCond ((pIcode+1)->ic.ll.opcode)) && + (isJCond ((pIcode+3)->ic.ll.opcode))) + { + *off = 2; + return (TRUE); + } + return (FALSE); +} + + +/* Creates a long conditional <=, >=, <, or > at (pIcode+1). + * Removes excess nodes from the graph by flagging them, and updates + * the new edges for the remaining nodes. */ +static void longJCond23 (COND_EXPR *rhs, COND_EXPR *lhs, ICODE * pIcode, + Int *idx, Function * pProc, Int arc, Int off) +{ Int j; + BB * pbb, * obb1, * obb2, * tbb; + + if (arc == THEN) + { + /* Find intermediate basic blocks and target block */ + pbb = pIcode->inBB; + obb1 = pbb->edges[THEN].BBptr; + obb2 = obb1->edges[THEN].BBptr; + tbb = obb2->edges[THEN].BBptr; + + /* Modify out edge of header basic block */ + pbb->edges[THEN].BBptr = tbb; + + /* Modify in edges of target basic block */ + auto newlast=std::remove_if(tbb->inEdges.begin(),tbb->inEdges.end(), + [obb1,obb2](BB *b) -> bool + { + return (b==obb1) || (b==obb2); + } + ); + tbb->inEdges.erase(newlast,tbb->inEdges.end()); + tbb->numInEdges--; /* looses 2 arcs, gains 1 arc */ + tbb->inEdges.push_back(pbb); + assert(tbb->inEdges.size()==tbb->numInEdges); + + /* Modify in edges of the ELSE basic block */ + tbb = pbb->edges[ELSE].BBptr; + auto iter=std::find(tbb->inEdges.begin(),tbb->inEdges.end(),obb2); + assert(iter!=tbb->inEdges.end()); + tbb->inEdges.erase(iter); + tbb->numInEdges--; /* looses 1 arc */ + assert(tbb->inEdges.size()==tbb->numInEdges); + /* Update icode index */ + (*idx) += 5; + } + + else /* ELSE arc */ + { + /* Find intermediate basic blocks and target block */ + pbb = pIcode->inBB; + obb1 = pbb->edges[ELSE].BBptr; + obb2 = obb1->edges[THEN].BBptr; + tbb = obb2->edges[THEN].BBptr; + + /* Modify in edges of target basic block */ + auto iter=std::find(tbb->inEdges.begin(),tbb->inEdges.end(),obb2); + assert(iter!=tbb->inEdges.end()); + tbb->inEdges.erase(iter); + tbb->numInEdges--; /* looses 1 arc */ + assert(tbb->inEdges.size()==tbb->numInEdges); + + /* Modify in edges of the ELSE basic block */ + tbb = obb2->edges[ELSE].BBptr; + auto newlast=std::remove_if(tbb->inEdges.begin(),tbb->inEdges.end(), + [obb1,obb2](BB *b) -> bool + { + return (b==obb1) || (b==obb2); + } + ); + tbb->inEdges.erase(newlast,tbb->inEdges.end()); + tbb->numInEdges--; /* looses 2 arcs, gains 1 arc */ + tbb->inEdges.push_back(pbb); + assert(tbb->inEdges.size()==tbb->numInEdges); + + /* Modify out edge of header basic block */ + pbb->edges[ELSE].BBptr = tbb; + + /* Update icode index */ + (*idx) += 2; + } + + /* Create new HLI_JCOND and condition */ + lhs = COND_EXPR::boolOp (lhs, rhs, condOpJCond[(pIcode+off+1)->ic.ll.opcode-iJB]); + (pIcode+1)->setJCond(lhs); + copyDU (pIcode+1, pIcode, eUSE, eUSE); + (pIcode+1)->du.use |= (pIcode+off)->du.use; + + /* Update statistics */ + obb1->flg |= INVALID_BB; + obb2->flg |= INVALID_BB; + stats.numBBaft -= 2; + + pIcode->invalidate(); + pProc->Icode.GetIcode(obb1->start)->invalidate(); + pProc->Icode.GetIcode(obb2->start)->invalidate(); + pProc->Icode.GetIcode(obb2->start+1)->invalidate(); +} + + +/* Creates a long conditional equality or inequality at (pIcode+1). + * Removes excess nodes from the graph by flagging them, and updates + * the new edges for the remaining nodes. */ +static void longJCond22 (COND_EXPR *rhs, COND_EXPR *lhs, ICODE * pIcode, + Int *idx) +{ + Int j; + BB * pbb, * obb1, * tbb; + + /* Form conditional expression */ + lhs = COND_EXPR::boolOp (lhs, rhs, condOpJCond[(pIcode+3)->ic.ll.opcode - iJB]); + (pIcode+1)->setJCond(lhs); + copyDU (pIcode+1, pIcode, eUSE, eUSE); + (pIcode+1)->du.use |= (pIcode+2)->du.use; + + /* Adjust outEdges[0] to the new target basic block */ + pbb = pIcode->inBB; + if ((pbb->start + pbb->length - 1) == (*idx + 1)) + { + /* Find intermediate and target basic blocks */ + obb1 = pbb->edges[THEN].BBptr; + tbb = obb1->edges[THEN].BBptr; + + /* Modify THEN out edge of header basic block */ + pbb->edges[THEN].BBptr = tbb; + + /* Modify in edges of target basic block */ + auto iter=std::find(tbb->inEdges.begin(),tbb->inEdges.end(),obb1); + assert(iter!=tbb->inEdges.end()); + tbb->inEdges.erase(iter); + + if ((pIcode+3)->ic.ll.opcode == iJE) + tbb->numInEdges--; /* looses 1 arc */ + else /* iJNE => replace arc */ + tbb->inEdges.push_back(pbb); + assert(tbb->inEdges.size()==tbb->numInEdges); + + /* Modify ELSE out edge of header basic block */ + tbb = obb1->edges[ELSE].BBptr; + pbb->edges[ELSE].BBptr = tbb; + + iter=std::find(tbb->inEdges.begin(),tbb->inEdges.end(),obb1); + assert(iter!=tbb->inEdges.end()); + tbb->inEdges.erase(iter); + if ((pIcode+3)->ic.ll.opcode == iJE) /* replace */ + tbb->inEdges.push_back(pbb); + else + tbb->numInEdges--; /* iJNE => looses 1 arc */ + assert(tbb->inEdges.size()==tbb->numInEdges); + + + /* Update statistics */ + obb1->flg |= INVALID_BB; + stats.numBBaft--; + } + + pIcode->invalidate(); + (pIcode+2)->invalidate(); + (pIcode+3)->invalidate(); + (*idx) += 4; +} + +/* Propagates TYPE_LONG_(UN)SIGN icode information to the current pIcode + * Pointer. + * Arguments: i : index into the local identifier table + * pLocId: ptr to the long local identifier + * pProc : ptr to current procedure's record. */ +static void propLongStk (Int i, ID *pLocId, Function * pProc) +{ + Int idx, off, arc; + COND_EXPR *lhs, *rhs; /* Pointers to left and right hand expression */ + ICODE * pIcode, * pEnd; + + /* Check all icodes for offHi:offLo */ + pEnd = pProc->Icode.GetIcode(pProc->Icode.GetNumIcodes() -1); + for (idx = 0; idx < (pProc->Icode.GetNumIcodes() - 1); idx++) + { + pIcode = pProc->Icode.GetIcode(idx); + if ((pIcode->type == HIGH_LEVEL) || (pIcode->invalid == TRUE)) + continue; + + if (pIcode->ic.ll.opcode == (pIcode+1)->ic.ll.opcode) + { + switch (pIcode->ic.ll.opcode) { + case iMOV: + if (checkLongEq (pLocId->id.longStkId, pIcode, i, idx, pProc, + &rhs, &lhs, 1) == TRUE) + { + pIcode->setAsgn(lhs, rhs); + (pIcode+1)->invalidate(); + idx++; + } + break; + + case iAND: case iOR: case iXOR: + if (checkLongEq (pLocId->id.longStkId, pIcode, i, idx, pProc, + &rhs, &lhs, 1) == TRUE) + { + switch (pIcode->ic.ll.opcode) { + case iAND: rhs = COND_EXPR::boolOp (lhs, rhs, AND); + break; + case iOR: rhs = COND_EXPR::boolOp (lhs, rhs, OR); + break; + case iXOR: rhs = COND_EXPR::boolOp (lhs, rhs, XOR); + break; + } + pIcode->setAsgn(lhs, rhs); + (pIcode+1)->invalidate(); + idx++; + } + break; + + case iPUSH: + if (checkLongEq (pLocId->id.longStkId, pIcode, i, idx, pProc, + &rhs, &lhs, 1) == TRUE) + { + pIcode->setUnary( HLI_PUSH, lhs); + (pIcode+1)->invalidate(); + idx++; + } + break; + } /*eos*/ + } + + /* Check long conditional (i.e. 2 CMPs and 3 branches */ + else if ((pIcode->ic.ll.opcode == iCMP) && (isLong23 (idx, pIcode->inBB, pProc->Icode.GetFirstIcode(),&off, &arc))) + { + if (checkLongEq (pLocId->id.longStkId, pIcode, i, idx, pProc, &rhs, &lhs, off) == TRUE) + longJCond23 (rhs, lhs, pIcode, &idx, pProc, arc, off); + } + + /* Check for long conditional equality or inequality. This requires + * 2 CMPs and 2 branches */ + else if ((pIcode->ic.ll.opcode == iCMP) && + isLong22 (pIcode, pEnd, &off)) + { + if (checkLongEq (pLocId->id.longStkId, pIcode, i, idx, pProc, + &rhs, &lhs, off) == TRUE) + longJCond22 (rhs, lhs, pIcode, &idx); + } + } +} + + +/* Finds the definition of the long register pointed to by pLocId, and + * transforms that instruction into a HIGH_LEVEL icode instruction. + * Arguments: i : index into the local identifier table + * pLocId: ptr to the long local identifier + * pProc : ptr to current procedure's record. */ +static void propLongReg (Int i, ID *pLocId, Function * pProc) +{ + COND_EXPR *lhs, *rhs; + Int idx, j, off, arc; + ICODE * pIcode, * pEnd; + ICODEMEM * pmH,* pmL; /* Pointers to dst LOW_LEVEL icodes */ + + /* Process all definitions/uses of long registers at an icode position */ + pEnd = pProc->Icode.GetIcode(pProc->Icode.GetNumIcodes() -1); + for (j = 0; j < pLocId->idx.size(); j++) + { + /* Check backwards for a definition of this long register */ + for (idx = pLocId->idx[j] - 1; idx > 0 ; idx--) + { + pIcode = pProc->Icode.GetIcode(idx-1); + if ((pIcode->type == HIGH_LEVEL) || (pIcode->invalid == TRUE)) + continue; + + if (pIcode->ic.ll.opcode == (pIcode+1)->ic.ll.opcode) + switch (pIcode->ic.ll.opcode) + { + case iMOV: + pmH = &pIcode->ic.ll.dst; + pmL = &(pIcode+1)->ic.ll.dst; + if ((pLocId->id.longId.h == pmH->regi) && (pLocId->id.longId.l == pmL->regi)) + { + lhs = COND_EXPR::idLongIdx (i); + pProc->localId.id_arr[i].idx.push_back(idx-1); + pIcode->setRegDU( pmL->regi, eDEF); + rhs = COND_EXPR::idLong (&pProc->localId, SRC, pIcode, HIGH_FIRST, idx, eUSE, 1); + pIcode->setAsgn(lhs, rhs); + (pIcode+1)->invalidate(); + idx = 0; /* to exit the loop */ + } + break; + + case iPOP: + pmH = &(pIcode+1)->ic.ll.dst; + pmL = &pIcode->ic.ll.dst; + if ((pLocId->id.longId.h == pmH->regi) && (pLocId->id.longId.l == pmL->regi)) + { + lhs = COND_EXPR::idLongIdx (i); + pIcode->setRegDU( pmH->regi, eDEF); + pIcode->setUnary(HLI_POP, lhs); + (pIcode+1)->invalidate(); + idx = 0; /* to exit the loop */ + } + break; + + /**** others missing ***/ + + case iAND: case iOR: case iXOR: + pmL = &pIcode->ic.ll.dst; + pmH = &(pIcode+1)->ic.ll.dst; + if ((pLocId->id.longId.h == pmH->regi) && (pLocId->id.longId.l == pmL->regi)) + { + lhs = COND_EXPR::idLongIdx (i); + pIcode->setRegDU( pmH->regi, USE_DEF); + rhs = COND_EXPR::idLong (&pProc->localId, SRC, pIcode, LOW_FIRST, idx, eUSE, 1); + switch (pIcode->ic.ll.opcode) { + case iAND: rhs = COND_EXPR::boolOp (lhs, rhs, AND); + break; + case iOR: + rhs = COND_EXPR::boolOp (lhs, rhs, OR); + break; + case iXOR: rhs = COND_EXPR::boolOp (lhs, rhs, XOR); + break; + } /* eos */ + pIcode->setAsgn(lhs, rhs); + (pIcode+1)->invalidate(); + idx = 0; + } + break; + } /* eos */ + } + + /* If no definition backwards, check forward for a use of this long reg */ + if (idx <= 0) + for (idx = pLocId->idx[j] + 1; idx < pProc->Icode.GetNumIcodes() - 1; idx++) + { + pIcode = pProc->Icode.GetIcode(idx); + if ((pIcode->type == HIGH_LEVEL) || (pIcode->invalid == TRUE)) + continue; + + if (pIcode->ic.ll.opcode == (pIcode+1)->ic.ll.opcode) + switch (pIcode->ic.ll.opcode) { + case iMOV: + if ((pLocId->id.longId.h == pIcode->ic.ll.src.regi) && + (pLocId->id.longId.l == (pIcode+1)->ic.ll.src.regi)) + { + rhs = COND_EXPR::idLongIdx (i); + pIcode->setRegDU( (pIcode+1)->ic.ll.src.regi, eUSE); + lhs = COND_EXPR::idLong (&pProc->localId, DST, pIcode, + HIGH_FIRST, idx, eDEF, 1); + pIcode->setAsgn(lhs, rhs); + (pIcode+1)->invalidate(); + idx = pProc->Icode.GetNumIcodes(); /* to exit the loop */ + } + break; + + case iPUSH: + if ((pLocId->id.longId.h == pIcode->ic.ll.src.regi) && + (pLocId->id.longId.l == (pIcode+1)->ic.ll.src.regi)) + { + rhs = COND_EXPR::idLongIdx (i); + pIcode->setRegDU( (pIcode+1)->ic.ll.src.regi, eUSE); + pIcode->setUnary(HLI_PUSH, lhs); + (pIcode+1)->invalidate(); + } + idx = pProc->Icode.GetNumIcodes(); /* to exit the loop */ + break; + + /*** others missing ****/ + + case iAND: case iOR: case iXOR: + pmL = &pIcode->ic.ll.dst; + pmH = &(pIcode+1)->ic.ll.dst; + if ((pLocId->id.longId.h == pmH->regi) && + (pLocId->id.longId.l == pmL->regi)) + { + lhs = COND_EXPR::idLongIdx (i); + pIcode->setRegDU( pmH->regi, USE_DEF); + rhs = COND_EXPR::idLong (&pProc->localId, SRC, pIcode, + LOW_FIRST, idx, eUSE, 1); + switch (pIcode->ic.ll.opcode) { + case iAND: rhs = COND_EXPR::boolOp (lhs, rhs, AND); + break; + case iOR: rhs = COND_EXPR::boolOp (lhs, rhs, OR); + break; + case iXOR: rhs = COND_EXPR::boolOp (lhs, rhs, XOR); + break; + } + pIcode->setAsgn(lhs, rhs); + (pIcode+1)->invalidate(); + idx = 0; + } + break; + } /* eos */ + + /* Check long conditional (i.e. 2 CMPs and 3 branches */ + else if ((pIcode->ic.ll.opcode == iCMP) && + (isLong23 (idx, pIcode->inBB, pProc->Icode.GetFirstIcode(), + &off, &arc))) + { + if (checkLongRegEq (pLocId->id.longId, pIcode, i, idx, pProc, + &rhs, &lhs, off) == TRUE) + longJCond23 (rhs, lhs, pIcode, &idx, pProc, arc, off); + } + + /* Check for long conditional equality or inequality. This requires + * 2 CMPs and 2 branches */ + else if ((pIcode->ic.ll.opcode == iCMP) && + (isLong22 (pIcode, pEnd, &off))) + { + if (checkLongRegEq (pLocId->id.longId, pIcode, i, idx, pProc, + &rhs, &lhs, off) == TRUE) + longJCond22 (rhs, lhs, pIcode, &idx); + } + + /* Check for OR regH, regL + * JX lab + * => HLI_JCOND (regH:regL X 0) lab + * This is better code than HLI_JCOND (HI(regH:regL) | LO(regH:regL)) */ + else if ((pIcode->ic.ll.opcode == iOR) && ((pIcode+1) < pEnd) && + (isJCond ((pIcode+1)->ic.ll.opcode))) + { + if ((pIcode->ic.ll.dst.regi == pLocId->id.longId.h) && + (pIcode->ic.ll.src.regi == pLocId->id.longId.l)) + { + lhs = COND_EXPR::idLongIdx (i); + + rhs = COND_EXPR::idKte (0, 4); /* long 0 */ + lhs = COND_EXPR::boolOp (lhs, rhs, + condOpJCond[(pIcode+1)->ic.ll.opcode - iJB]); + (pIcode+1)->setJCond(lhs); + copyDU (pIcode+1, pIcode, eUSE, eUSE); + pIcode->invalidate(); + } + } + + } /* end for */ + } /* end for */ +} + + +/* Propagates the long global address across all LOW_LEVEL icodes. + * Transforms some LOW_LEVEL icodes into HIGH_LEVEL */ +static void propLongGlb (Int i, ID *pLocId, Function * pProc) +{ + +} + + +/* Propagated identifier information, thus converting some LOW_LEVEL icodes + * into HIGH_LEVEL icodes. */ +void Function::propLong() +{ + Int i; + ID *pLocId; /* Pointer to current local identifier */ + + for (i = 0; i < localId.csym(); i++) + { + pLocId = &localId.id_arr[i]; + if ((pLocId->type==TYPE_LONG_SIGN) || (pLocId->type==TYPE_LONG_UNSIGN)) + { + switch (pLocId->loc) + { + case STK_FRAME: + propLongStk (i, pLocId, this); + break; + case REG_FRAME: + propLongReg (i, pLocId, this); + break; + case GLB_FRAME: + propLongGlb (i, pLocId, this); + break; + } + } + } +} + diff --git a/src/reducible.cpp b/src/reducible.cpp new file mode 100644 index 0000000..951524e --- /dev/null +++ b/src/reducible.cpp @@ -0,0 +1,399 @@ +/******************************************************************** + * Checks for reducibility of a graph by intervals, and + * constructs an equivalent reducible graph if one is not found. + * (C) Cristina Cifuentes + ********************************************************************/ +#include +#include +#include "dcc.h" +#include +#ifdef __BORLAND__ +#include +#else +#include /* For free() */ +#endif +#include + +static Int numInt; /* Number of intervals */ + + +#define nonEmpty(q) (q != NULL) +/* Returns whether the queue q is empty or not */ + +#define trivialGraph(G) (G->numOutEdges == 0) +/* Returns whether the graph is a trivial graph or not */ + + +/* Returns the first element in the queue Q, and removes this element + * from the list. Q is not an empty queue. */ +static BB *firstOfQueue (queue &Q) +{ + assert(!Q.empty()); + BB *res=*Q.begin(); + Q.pop_front(); + return res; +} + + +/* Appends pointer to node at the end of the queue Q if node is not present + * in this queue. Returns the queue node just appended. */ +queue::iterator appendQueue (queue &Q, BB *node) +{ + auto iter=std::find(Q.begin(),Q.end(),node); + if(iter==Q.end()) + { + Q.push_back(node); + iter=Q.end(); + --iter; + } + return iter; +} + + +/* Returns the next unprocessed node of the interval list (pointed to by + * pI->currNode). Removes this element logically from the list, by updating + * the currNode pointer to the next unprocessed element. */ +BB *interval::firstOfInt () +{ + auto pq = currNode; + if (pq == nodes.end()) + return (NULL); + ++currNode; + return *pq; +} + + +/* Appends node @node to the end of the interval list @pI, updates currNode + * if necessary, and removes the node from the header list @pqH if it is + * there. The interval header information is placed in the field + * node->inInterval. + * Note: nodes are added to the interval list in interval order (which + * topsorts the dominance relation). */ +static void appendNodeInt (queue &pqH, BB *node, interval *pI) +{ + queue::iterator pq; /* Pointer to current node of the list */ + + /* Append node if it is not already in the interval list */ + pq = appendQueue (pI->nodes, node); + + /* Update currNode if necessary */ + if (pI->currNode == pI->nodes.end()) + pI->currNode = pq; + + /* Check header list for occurrence of node, if found, remove it + * and decrement number of out-edges from this interval. */ + if (node->beenOnH && !pqH.empty()) + { + auto found_iter=std::find(pqH.begin(),pqH.end(),node); + if(found_iter!=pqH.end()) + { + pI->numOutEdges -= (byte)(*found_iter)->numInEdges - 1; + pqH.erase(found_iter); + } + } + /* Update interval header information for this basic block */ + node->inInterval = pI; +} + + +/* Finds the intervals of graph derivedGi->Gi and places them in the list + * of intervals derivedGi->Ii. + * Algorithm by M.S.Hecht. */ +void derSeq_Entry::findIntervals () +{ + interval *pI, /* Interval being processed */ + *J; /* ^ last interval in derivedGi->Ii */ + BB *h, /* Node being processed */ + *header, /* Current interval's header node */ + *succ; /* Successor basic block */ + Int i; /* Counter */ + queue H; /* Queue of possible header nodes */ + boolT first = TRUE; /* First pass through the loop */ + + appendQueue (H, Gi); /* H = {first node of G} */ + Gi->beenOnH = TRUE; + Gi->reachingInt = BB::Create(); /* ^ empty BB */ + + /* Process header nodes list H */ + while (!H.empty()) + { + header = firstOfQueue (H); + pI = new interval; + pI->numInt = (byte)numInt++; + if (first) /* ^ to first interval */ + Ii = J = pI; + appendNodeInt (H, header, pI); /* pI(header) = {header} */ + + /* Process all nodes in the current interval list */ + while ((h = pI->firstOfInt()) != NULL) + { + /* Check all immediate successors of h */ + for (i = 0; i < h->numOutEdges; i++) + { + succ = h->edges[i].BBptr; + succ->inEdgeCount--; + + if (succ->reachingInt == NULL) /* first visit */ + { + succ->reachingInt = header; + if (succ->inEdgeCount == 0) + appendNodeInt (H, succ, pI); + else if (! succ->beenOnH) /* out edge */ + { + appendQueue (H, succ); + succ->beenOnH = TRUE; + pI->numOutEdges++; + } + } + else /* node has been visited before */ + if (succ->inEdgeCount == 0) + { + if (succ->reachingInt == header || succ->inInterval == pI) /* same interval */ + { + if (succ != header) + appendNodeInt (H, succ, pI); + } + else /* out edge */ + pI->numOutEdges++; + } + else if (succ != header && succ->beenOnH) + pI->numOutEdges++; + } + } + + /* Link interval I to list of intervals */ + if (! first) + { + J->next = pI; + J = pI; + } + else /* first interval */ + first = FALSE; + } +} + + +/* Displays the intervals of the graph Gi. */ +static void displayIntervals (interval *pI) +{ + queue::iterator nodePtr; + + while (pI) + { + nodePtr = pI->nodes.begin(); + printf (" Interval #: %ld\t#OutEdges: %ld\n", pI->numInt, pI->numOutEdges); + while (nodePtr!=pI->nodes.end()) + { + if ((*nodePtr)->correspInt == NULL) /* real BBs */ + printf (" Node: %ld\n", (*nodePtr)->start); + else /* BBs represent intervals */ + printf (" Node (corresp int): %d\n", + (*nodePtr)->correspInt->numInt); + ++nodePtr; + } + pI = pI->next; + } +} + + +/* Allocates space for a new derSeq node. */ +static derSeq_Entry *newDerivedSeq() +{ + return new derSeq_Entry; +} + + +/* Frees the storage allocated for the queue q*/ +void freeQueue (queue &q) +{ + q.clear(); +} + + +/* Frees the storage allocated for the interval pI */ +static void freeInterval (interval **pI) +{ + interval *Iptr; + + while (*pI) + { + (*pI)->nodes.clear(); + Iptr = *pI; + *pI = (*pI)->next; + delete (Iptr); + } +} + + +/* Frees the storage allocated by the derived sequence structure, except + * for the original graph cfg (derivedG->Gi). */ +void freeDerivedSeq(derSeq &derivedG) +{ + derivedG.clear(); +} +derSeq_Entry::~derSeq_Entry() +{ + freeInterval (&Ii); +// if(Gi && Gi->nodeType == INTERVAL_NODE) +// freeCFG (Gi); +} + +/* Finds the next order graph of derivedGi->Gi according to its intervals + * (derivedGi->Ii), and places it in derivedGi->next->Gi. */ +static boolT nextOrderGraph (derSeq *derivedGi) +{ + interval *Ii; /* Interval being processed */ + BB *BBnode, /* New basic block of intervals */ + *curr, /* BB being checked for out edges */ + *succ /* Successor node */ + ; + queue *listIi; /* List of intervals */ + Int i, /* Index to outEdges array */ + j; /* Index to successors */ + boolT sameGraph; /* Boolean, isomorphic graphs */ + + /* Process Gi's intervals */ + derSeq_Entry &prev_entry(derivedGi->back()); + derivedGi->push_back(derSeq_Entry()); + derSeq_Entry &new_entry(derivedGi->back()); + Ii = prev_entry.Ii; + sameGraph = TRUE; + BBnode = 0; + std::vector bbs; + while (Ii) + { + i = 0; + bbs.push_back(BB::Create(-1, -1, INTERVAL_NODE, Ii->numOutEdges, NULL)); + BBnode = bbs.back(); + BBnode->correspInt = Ii; + const queue &listIi(Ii->nodes); + + /* Check for more than 1 interval */ + if (sameGraph && (listIi.size()>1)) + sameGraph = FALSE; + + /* Find out edges */ + + if (BBnode->numOutEdges > 0) + { + for(auto iter=listIi.begin();iter!=listIi.end(); ++iter) + { + curr = *iter; + for (j = 0; j < curr->numOutEdges; j++) + { + succ = curr->edges[j].BBptr; + if (succ->inInterval != curr->inInterval) + BBnode->edges[i++].intPtr = succ->inInterval; + } + } + } + + /* Next interval */ + Ii = Ii->next; + } + + /* Convert list of pointers to intervals into a real graph. + * Determines the number of in edges to each new BB, and places it + * in numInEdges and inEdgeCount for later interval processing. */ + curr = new_entry.Gi = bbs.front(); + for(auto curr=bbs.begin(); curr!=bbs.end(); ++curr) + { + for (i = 0; i < (*curr)->numOutEdges; i++) + { + BBnode = new_entry.Gi; /* BB of an interval */ + TYPEADR_TYPE &edge=(*curr)->edges[i]; + auto iter= std::find_if(bbs.begin(),bbs.end(), + [&edge](BB *node)->bool { return edge.intPtr==node->correspInt;}); + if(iter==bbs.end()) + fatalError (INVALID_INT_BB); + edge.BBptr = *iter; + (*iter)->numInEdges++; + (*iter)->inEdgeCount++; + } + } + return (boolT)(! sameGraph); +} + + + +/* Finds the derived sequence of the graph derivedG->Gi (ie. cfg). + * Constructs the n-th order graph and places all the intermediate graphs + * in the derivedG list sequence. */ +static byte findDerivedSeq (derSeq *derivedGi) +{ + BB *Gi; /* Current derived sequence graph */ + + derSeq::iterator iter=derivedGi->begin(); + Gi = iter->Gi; + while (! trivialGraph (Gi)) + { + /* Find the intervals of Gi and place them in derivedGi->Ii */ + iter->findIntervals (); + + /* Create Gi+1 and check if it is equivalent to Gi */ + if (! nextOrderGraph (derivedGi)) + break; + ++iter; + Gi = iter->Gi; + stats.nOrder++; + } + + if (! trivialGraph (Gi)) + { + ++iter; + derivedGi->erase(iter,derivedGi->end()); /* remove Gi+1 */ + // freeDerivedSeq(derivedGi->next); + // derivedGi->next = NULL; + return FALSE; + } + derivedGi->back().findIntervals (); + return TRUE; +} + +/* Converts the irreducible graph G into an equivalent reducible one, by + * means of node splitting. */ +static void nodeSplitting (std::vector &G) +{ + printf("Attempt to perform node splitting: NOT IMPLEMENTED\n"); +} + +/* Displays the derived sequence and intervals of the graph G */ +void derSeq::display() +{ + Int n = 1; /* Derived sequence number */ + printf ("\nDerived Sequence Intervals\n"); + derSeq::iterator iter=this->begin(); + while (iter!=this->end()) + { + printf ("\nIntervals for G%lX\n", n++); + displayIntervals (iter->Ii); + ++iter; + } +} + + +/* Checks whether the control flow graph, cfg, is reducible or not. + * If it is not reducible, it is converted into an equivalent reducible + * graph by node splitting. The derived sequence of graphs built from cfg + * are returned in the pointer *derivedG. + */ +derSeq * Function::checkReducibility() +{ + derSeq * der_seq; + byte reducible; /* Reducible graph flag */ + + numInt = 1; /* reinitialize no. of intervals*/ + stats.nOrder = 1; /* nOrder(cfg) = 1 */ + der_seq = new derSeq; + der_seq->resize(1); + der_seq->back().Gi = cfg.front(); + reducible = findDerivedSeq(der_seq); + + if (! reducible) + { + flg |= GRAPH_IRRED; + nodeSplitting (cfg); + } + return der_seq; +} + diff --git a/src/scanner.cpp b/src/scanner.cpp new file mode 100644 index 0000000..6e1a2e1 --- /dev/null +++ b/src/scanner.cpp @@ -0,0 +1,844 @@ +/***************************************************************************** + * dcc project scanner module + * Implements a simple state driven scanner to convert 8086 machine code into + * I-code + * (C) Cristina Cifuentes, Jeff Ledermann + ****************************************************************************/ + +#include "dcc.h" +#include "scanner.h" +#include + +#define iZERO (llIcode)0 // For neatness +#define IC llIcode + +static struct { + void (*state1)(Int); + void (*state2)(Int); + flags32 flg; + llIcode opcode; + byte df; + byte uf; +} stateTable[] = { + { modrm, none2, B , iADD , Sf | Zf | Cf, }, /* 00 */ + { modrm, none2, 0 , iADD , Sf | Zf | Cf, }, /* 01 */ + { modrm, none2, TO_REG | B , iADD , Sf | Zf | Cf, }, /* 02 */ + { modrm, none2, TO_REG , iADD , Sf | Zf | Cf, }, /* 03 */ + { data1, axImp, B , iADD , Sf | Zf | Cf, }, /* 04 */ + { data2, axImp, 0 , iADD , Sf | Zf | Cf, }, /* 05 */ + { segop, none2, NO_SRC , iPUSH , 0 , }, /* 06 */ + { segop, none2, NO_SRC , iPOP , 0 , }, /* 07 */ + { modrm, none2, B , iOR , Sf | Zf | Cf, }, /* 08 */ + { modrm, none2, NSP , iOR , Sf | Zf | Cf, }, /* 09 */ + { modrm, none2, TO_REG | B , iOR , Sf | Zf | Cf, }, /* 0A */ + { modrm, none2, TO_REG | NSP , iOR , Sf | Zf | Cf, }, /* 0B */ + { data1, axImp, B , iOR , Sf | Zf | Cf, }, /* 0C */ + { data2, axImp, 0 , iOR , Sf | Zf | Cf, }, /* 0D */ + { segop, none2, NO_SRC , iPUSH , 0 , }, /* 0E */ + { none1, none2, OP386 , iZERO , 0 , }, /* 0F */ + { modrm, none2, B , iADC , Sf | Zf | Cf, Cf }, /* 10 */ + { modrm, none2, NSP , iADC , Sf | Zf | Cf, Cf }, /* 11 */ + { modrm, none2, TO_REG | B , iADC , Sf | Zf | Cf, Cf }, /* 12 */ + { modrm, none2, TO_REG | NSP , iADC , Sf | Zf | Cf, Cf }, /* 13 */ + { data1, axImp, B , iADC , Sf | Zf | Cf, Cf }, /* 14 */ + { data2, axImp, 0 , iADC , Sf | Zf | Cf, Cf }, /* 15 */ + { segop, none2, NOT_HLL | NO_SRC , iPUSH , 0 , }, /* 16 */ + { segop, none2, NOT_HLL | NO_SRC , iPOP , 0 , }, /* 17 */ + { modrm, none2, B , iSBB , Sf | Zf | Cf, Cf }, /* 18 */ + { modrm, none2, NSP , iSBB , Sf | Zf | Cf, Cf }, /* 19 */ + { modrm, none2, TO_REG | B , iSBB , Sf | Zf | Cf, Cf }, /* 1A */ + { modrm, none2, TO_REG | NSP , iSBB , Sf | Zf | Cf, Cf }, /* 1B */ + { data1, axImp, B , iSBB , Sf | Zf | Cf, Cf }, /* 1C */ + { data2, axImp, 0 , iSBB , Sf | Zf | Cf, Cf }, /* 1D */ + { segop, none2, NO_SRC , iPUSH , 0 , }, /* 1E */ + { segop, none2, NO_SRC , iPOP , 0 , }, /* 1F */ + { modrm, none2, B , iAND , Sf | Zf | Cf, }, /* 20 */ + { modrm, none2, NSP , iAND , Sf | Zf | Cf, }, /* 21 */ + { modrm, none2, TO_REG | B , iAND , Sf | Zf | Cf, }, /* 22 */ + { modrm, none2, TO_REG | NSP , iAND , Sf | Zf | Cf, }, /* 23 */ + { data1, axImp, B , iAND , Sf | Zf | Cf, }, /* 24 */ + { data2, axImp, 0 , iAND , Sf | Zf | Cf, }, /* 25 */ + { prefix, none2, 0 , (IC)rES,0 , }, /* 26 */ + { none1, axImp, NOT_HLL | B|NO_SRC , iDAA , Sf | Zf | Cf, }, /* 27 */ + { modrm, none2, B , iSUB , Sf | Zf | Cf, }, /* 28 */ + { modrm, none2, 0 , iSUB , Sf | Zf | Cf, }, /* 29 */ + { modrm, none2, TO_REG | B , iSUB , Sf | Zf | Cf, }, /* 2A */ + { modrm, none2, TO_REG , iSUB , Sf | Zf | Cf, }, /* 2B */ + { data1, axImp, B , iSUB , Sf | Zf | Cf, }, /* 2C */ + { data2, axImp, 0 , iSUB , Sf | Zf | Cf, }, /* 2D */ + { prefix, none2, 0 , (IC)rCS,0 , }, /* 2E */ + { none1, axImp, NOT_HLL | B|NO_SRC , iDAS , Sf | Zf | Cf, }, /* 2F */ + { modrm, none2, B , iXOR , Sf | Zf | Cf, }, /* 30 */ + { modrm, none2, NSP , iXOR , Sf | Zf | Cf, }, /* 31 */ + { modrm, none2, TO_REG | B , iXOR , Sf | Zf | Cf, }, /* 32 */ + { modrm, none2, TO_REG | NSP , iXOR , Sf | Zf | Cf, }, /* 33 */ + { data1, axImp, B , iXOR , Sf | Zf | Cf, }, /* 34 */ + { data2, axImp, 0 , iXOR , Sf | Zf | Cf, }, /* 35 */ + { prefix, none2, 0 , (IC)rSS,0 , }, /* 36 */ + { none1, axImp, NOT_HLL | NO_SRC , iAAA , Sf | Zf | Cf, }, /* 37 */ + { modrm, none2, B , iCMP , Sf | Zf | Cf, }, /* 38 */ + { modrm, none2, NSP , iCMP , Sf | Zf | Cf, }, /* 39 */ + { modrm, none2, TO_REG | B , iCMP , Sf | Zf | Cf, }, /* 3A */ + { modrm, none2, TO_REG | NSP , iCMP , Sf | Zf | Cf, }, /* 3B */ + { data1, axImp, B , iCMP , Sf | Zf | Cf, }, /* 3C */ + { data2, axImp, 0 , iCMP , Sf | Zf | Cf, }, /* 3D */ + { prefix, none2, 0 , (IC)rDS,0 , }, /* 3E */ + { none1, axImp, NOT_HLL | NO_SRC , iAAS , Sf | Zf | Cf, }, /* 3F */ + { regop, none2, 0 , iINC , Sf | Zf, }, /* 40 */ + { regop, none2, 0 , iINC , Sf | Zf, }, /* 41 */ + { regop, none2, 0 , iINC , Sf | Zf, }, /* 42 */ + { regop, none2, 0 , iINC , Sf | Zf, }, /* 43 */ + { regop, none2, NOT_HLL , iINC , Sf | Zf, }, /* 44 */ + { regop, none2, 0 , iINC , Sf | Zf, }, /* 45 */ + { regop, none2, 0 , iINC , Sf | Zf, }, /* 46 */ + { regop, none2, 0 , iINC , Sf | Zf, }, /* 47 */ + { regop, none2, 0 , iDEC , Sf | Zf, }, /* 48 */ + { regop, none2, 0 , iDEC , Sf | Zf, }, /* 49 */ + { regop, none2, 0 , iDEC , Sf | Zf, }, /* 4A */ + { regop, none2, 0 , iDEC , Sf | Zf, }, /* 4B */ + { regop, none2, NOT_HLL , iDEC , Sf | Zf, }, /* 4C */ + { regop, none2, 0 , iDEC , Sf | Zf, }, /* 4D */ + { regop, none2, 0 , iDEC , Sf | Zf, }, /* 4E */ + { regop, none2, 0 , iDEC , Sf | Zf, }, /* 4F */ + { regop, none2, NO_SRC , iPUSH , 0 , }, /* 50 */ + { regop, none2, NO_SRC , iPUSH , 0 , }, /* 51 */ + { regop, none2, NO_SRC , iPUSH , 0 , }, /* 52 */ + { regop, none2, NO_SRC , iPUSH , 0 , }, /* 53 */ + { regop, none2, NOT_HLL | NO_SRC , iPUSH , 0 , }, /* 54 */ + { regop, none2, NO_SRC , iPUSH , 0 , }, /* 55 */ + { regop, none2, NO_SRC , iPUSH , 0 , }, /* 56 */ + { regop, none2, NO_SRC , iPUSH , 0 , }, /* 57 */ + { regop, none2, NO_SRC , iPOP , 0 , }, /* 58 */ + { regop, none2, NO_SRC , iPOP , 0 , }, /* 59 */ + { regop, none2, NO_SRC , iPOP , 0 , }, /* 5A */ + { regop, none2, NO_SRC , iPOP , 0 , }, /* 5B */ + { regop, none2, NOT_HLL | NO_SRC , iPOP , 0 , }, /* 5C */ + { regop, none2, NO_SRC , iPOP , 0 , }, /* 5D */ + { regop, none2, NO_SRC , iPOP , 0 , }, /* 5E */ + { regop, none2, NO_SRC , iPOP , 0 , }, /* 5F */ + { none1, none2, NOT_HLL | NO_OPS , iPUSHA, 0 , }, /* 60 */ + { none1, none2, NOT_HLL | NO_OPS , iPOPA , 0 , }, /* 61 */ + { memOnly, modrm, TO_REG | NSP , iBOUND, 0 , }, /* 62 */ + { none1, none2, OP386 , iZERO , 0 , }, /* 63 */ + { none1, none2, OP386 , iZERO , 0 , }, /* 64 */ + { none1, none2, OP386 , iZERO , 0 , }, /* 65 */ + { none1, none2, OP386 , iZERO , 0 , }, /* 66 */ + { none1, none2, OP386 , iZERO , 0 , }, /* 67 */ + { data2, none2, NO_SRC , iPUSH , 0 , }, /* 68 */ + { modrm, data2, TO_REG | NSP , iIMUL , Sf | Zf | Cf, }, /* 69 */ + { data1, none2, S | NO_SRC , iPUSH , 0 , }, /* 6A */ + { modrm, data1, TO_REG | NSP | S , iIMUL , Sf | Zf | Cf, }, /* 6B */ + { strop, memImp, NOT_HLL | B|IM_OPS , iINS , 0 , Df }, /* 6C */ + { strop, memImp, NOT_HLL | IM_OPS , iINS , 0 , Df }, /* 6D */ + { strop, memImp, NOT_HLL | B|IM_OPS , iOUTS , 0 , Df }, /* 6E */ + { strop, memImp, NOT_HLL | IM_OPS , iOUTS , 0 , Df }, /* 6F */ + { dispS, none2, NOT_HLL , iJO , 0 , }, /* 70 */ + { dispS, none2, NOT_HLL , iJNO , 0 , }, /* 71 */ + { dispS, none2, 0 , iJB , 0 , Cf }, /* 72 */ + { dispS, none2, 0 , iJAE , 0 , Cf }, /* 73 */ + { dispS, none2, 0 , iJE , 0 , Zf }, /* 74 */ + { dispS, none2, 0 , iJNE , 0 , Zf }, /* 75 */ + { dispS, none2, 0 , iJBE , 0 , Zf | Cf }, /* 76 */ + { dispS, none2, 0 , iJA , 0 , Zf | Cf }, /* 77 */ + { dispS, none2, 0 , iJS , 0 , Sf }, /* 78 */ + { dispS, none2, 0 , iJNS , 0 , Sf }, /* 79 */ + { dispS, none2, NOT_HLL , iJP , 0 , }, /* 7A */ + { dispS, none2, NOT_HLL , iJNP , 0 , }, /* 7B */ + { dispS, none2, 0 , iJL , 0 , Sf }, /* 7C */ + { dispS, none2, 0 , iJGE , 0 , Sf }, /* 7D */ + { dispS, none2, 0 , iJLE , 0 , Sf | Zf }, /* 7E */ + { dispS, none2, 0 , iJG , 0 , Sf | Zf }, /* 7F */ + { immed, data1, B , iZERO , 0 , }, /* 80 */ + { immed, data2, NSP , iZERO , 0 , }, /* 81 */ + { immed, data1, B , iZERO , 0 , }, /* 82 */ /* ?? */ + { immed, data1, NSP | S , iZERO , 0 , }, /* 83 */ + { modrm, none2, TO_REG | B , iTEST , Sf | Zf | Cf, }, /* 84 */ + { modrm, none2, TO_REG | NSP , iTEST , Sf | Zf | Cf, }, /* 85 */ + { modrm, none2, TO_REG | B , iXCHG , 0 , }, /* 86 */ + { modrm, none2, TO_REG | NSP , iXCHG , 0 , }, /* 87 */ + { modrm, none2, B , iMOV , 0 , }, /* 88 */ + { modrm, none2, 0 , iMOV , 0 , }, /* 89 */ + { modrm, none2, TO_REG | B , iMOV , 0 , }, /* 8A */ + { modrm, none2, TO_REG , iMOV , 0 , }, /* 8B */ + { segrm, none2, NSP , iMOV , 0 , }, /* 8C */ + { memOnly, modrm, TO_REG | NSP , iLEA , 0 , }, /* 8D */ + { segrm, none2, TO_REG | NSP , iMOV , 0 , }, /* 8E */ + { memReg0, none2, NO_SRC , iPOP , 0 , }, /* 8F */ + { none1, none2, NO_OPS , iNOP , 0 , }, /* 90 */ + { regop, axImp, 0 , iXCHG , 0 , }, /* 91 */ + { regop, axImp, 0 , iXCHG , 0 , }, /* 92 */ + { regop, axImp, 0 , iXCHG , 0 , }, /* 93 */ + { regop, axImp, NOT_HLL , iXCHG , 0 , }, /* 94 */ + { regop, axImp, 0 , iXCHG , 0 , }, /* 95 */ + { regop, axImp, 0 , iXCHG , 0 , }, /* 96 */ + { regop, axImp, 0 , iXCHG , 0 , }, /* 97 */ + { alImp, axImp, SRC_B | S , iSIGNEX,0 , }, /* 98 */ + {axSrcIm, axImp, IM_DST | S , iSIGNEX,0 , }, /* 99 */ + { dispF, none2, 0 , iCALLF ,0 , }, /* 9A */ + { none1, none2, FLOAT_OP| NO_OPS , iWAIT , 0 , }, /* 9B */ + { none1, none2, NOT_HLL | NO_OPS , iPUSHF, 0 , }, /* 9C */ + { none1, none2, NOT_HLL | NO_OPS , iPOPF , Sf | Zf | Cf | Df,}, /* 9D */ + { none1, none2, NOT_HLL | NO_OPS , iSAHF , Sf | Zf | Cf, }, /* 9E */ + { none1, none2, NOT_HLL | NO_OPS , iLAHF , 0 , Sf | Zf | Cf }, /* 9F */ + { dispM, axImp, B , iMOV , 0 , }, /* A0 */ + { dispM, axImp, 0 , iMOV , 0 , }, /* A1 */ + { dispM, axImp, TO_REG | B , iMOV , 0 , }, /* A2 */ + { dispM, axImp, TO_REG , iMOV , 0 , }, /* A3 */ + { strop, memImp, B | IM_OPS , iMOVS , 0 , Df }, /* A4 */ + { strop, memImp, IM_OPS , iMOVS , 0 , Df }, /* A5 */ + { strop, memImp, B | IM_OPS , iCMPS , Sf | Zf | Cf, Df }, /* A6 */ + { strop, memImp, IM_OPS , iCMPS , Sf | Zf | Cf, Df }, /* A7 */ + { data1, axImp, B , iTEST , Sf | Zf | Cf, }, /* A8 */ + { data2, axImp, 0 , iTEST , Sf | Zf | Cf, }, /* A9 */ + { strop, memImp, B | IM_OPS , iSTOS , 0 , Df }, /* AA */ + { strop, memImp, IM_OPS , iSTOS , 0 , Df }, /* AB */ + { strop, memImp, B | IM_OPS , iLODS , 0 , Df }, /* AC */ + { strop, memImp, IM_OPS , iLODS , 0 , Df }, /* AD */ + { strop, memImp, B | IM_OPS , iSCAS , Sf | Zf | Cf, Df }, /* AE */ + { strop, memImp, IM_OPS , iSCAS , Sf | Zf | Cf, Df }, /* AF */ + { regop, data1, B , iMOV , 0 , }, /* B0 */ + { regop, data1, B , iMOV , 0 , }, /* B1 */ + { regop, data1, B , iMOV , 0 , }, /* B2 */ + { regop, data1, B , iMOV , 0 , }, /* B3 */ + { regop, data1, B , iMOV , 0 , }, /* B4 */ + { regop, data1, B , iMOV , 0 , }, /* B5 */ + { regop, data1, B , iMOV , 0 , }, /* B6 */ + { regop, data1, B , iMOV , 0 , }, /* B7 */ + { regop, data2, 0 , iMOV , 0 , }, /* B8 */ + { regop, data2, 0 , iMOV , 0 , }, /* B9 */ + { regop, data2, 0 , iMOV , 0 , }, /* BA */ + { regop, data2, 0 , iMOV , 0 , }, /* BB */ + { regop, data2, NOT_HLL , iMOV , 0 , }, /* BC */ + { regop, data2, 0 , iMOV , 0 , }, /* BD */ + { regop, data2, 0 , iMOV , 0 , }, /* BE */ + { regop, data2, 0 , iMOV , 0 , }, /* BF */ + { shift, data1, B , iZERO , 0 , }, /* C0 */ + { shift, data1, NSP | SRC_B , iZERO , 0 , }, /* C1 */ + { data2, none2, 0 , iRET , 0 , }, /* C2 */ + { none1, none2, NO_OPS , iRET , 0 , }, /* C3 */ + { memOnly, modrm, TO_REG | NSP , iLES , 0 , }, /* C4 */ + { memOnly, modrm, TO_REG | NSP , iLDS , 0 , }, /* C5 */ + { memReg0, data1, B , iMOV , 0 , }, /* C6 */ + { memReg0, data2, 0 , iMOV , 0 , }, /* C7 */ + { data2, data1, 0 , iENTER, 0 , }, /* C8 */ + { none1, none2, NO_OPS , iLEAVE, 0 , }, /* C9 */ + { data2, none2, 0 , iRETF , 0 , }, /* CA */ + { none1, none2, NO_OPS , iRETF , 0 , }, /* CB */ + { const3, none2, NOT_HLL , iINT , 0 , }, /* CC */ + { data1,checkInt, NOT_HLL , iINT , 0 , }, /* CD */ + { none1, none2, NOT_HLL | NO_OPS , iINTO , 0 , }, /* CE */ + { none1, none2, NOT_HLL | NO_OPS , iIRET , 0 , }, /* Cf */ + { shift, const1, B , iZERO , 0 , }, /* D0 */ + { shift, const1, SRC_B , iZERO , 0 , }, /* D1 */ + { shift, none1, B , iZERO , 0 , }, /* D2 */ + { shift, none1, SRC_B , iZERO , 0 , }, /* D3 */ + { data1, axImp, NOT_HLL , iAAM , Sf | Zf | Cf, }, /* D4 */ + { data1, axImp, NOT_HLL , iAAD , Sf | Zf | Cf, }, /* D5 */ + { none1, none2, 0 , iZERO , 0 , }, /* D6 */ + { memImp, axImp, NOT_HLL | B| IM_OPS, iXLAT , 0 , }, /* D7 */ + { escop, none2, FLOAT_OP , iESC , 0 , }, /* D8 */ + { escop, none2, FLOAT_OP , iESC , 0 , }, /* D9 */ + { escop, none2, FLOAT_OP , iESC , 0 , }, /* DA */ + { escop, none2, FLOAT_OP , iESC , 0 , }, /* DB */ + { escop, none2, FLOAT_OP , iESC , 0 , }, /* DC */ + { escop, none2, FLOAT_OP , iESC , 0 , }, /* DD */ + { escop, none2, FLOAT_OP , iESC , 0 , }, /* DE */ + { escop, none2, FLOAT_OP , iESC , 0 , }, /* Df */ + { dispS, none2, 0 , iLOOPNE,0 , Zf }, /* E0 */ + { dispS, none2, 0 , iLOOPE, 0 , Zf }, /* E1 */ + { dispS, none2, 0 , iLOOP , 0 , }, /* E2 */ + { dispS, none2, 0 , iJCXZ , 0 , }, /* E3 */ + { data1, axImp, NOT_HLL | B|NO_SRC , iIN , 0 , }, /* E4 */ + { data1, axImp, NOT_HLL | NO_SRC , iIN , 0 , }, /* E5 */ + { data1, axImp, NOT_HLL | B|NO_SRC , iOUT , 0 , }, /* E6 */ + { data1, axImp, NOT_HLL | NO_SRC , iOUT , 0 , }, /* E7 */ + { dispN, none2, 0 , iCALL , 0 , }, /* E8 */ + { dispN, none2, 0 , iJMP , 0 , }, /* E9 */ + { dispF, none2, 0 , iJMPF , 0 , }, /* EA */ + { dispS, none2, 0 , iJMP , 0 , }, /* EB */ + { none1, axImp, NOT_HLL | B|NO_SRC , iIN , 0 , }, /* EC */ + { none1, axImp, NOT_HLL | NO_SRC , iIN , 0 , }, /* ED */ + { none1, axImp, NOT_HLL | B|NO_SRC , iOUT , 0 , }, /* EE */ + { none1, axImp, NOT_HLL | NO_SRC , iOUT , 0 , }, /* EF */ + { none1, none2, NOT_HLL | NO_OPS , iLOCK , 0 , }, /* F0 */ + { none1, none2, 0 , iZERO , 0 , }, /* F1 */ + { prefix, none2, 0 , iREPNE, 0 , }, /* F2 */ + { prefix, none2, 0 , iREPE , 0 , }, /* F3 */ + { none1, none2, NOT_HLL | NO_OPS , iHLT , 0 , }, /* F4 */ + { none1, none2, NO_OPS , iCMC , Cf, Cf }, /* F5 */ + { arith, none1, B , iZERO , 0 , }, /* F6 */ + { arith, none1, NSP , iZERO , 0 , }, /* F7 */ + { none1, none2, NO_OPS , iCLC , Cf, }, /* F8 */ + { none1, none2, NO_OPS , iSTC , Cf, }, /* F9 */ + { none1, none2, NOT_HLL | NO_OPS , iCLI , 0 , }, /* FA */ + { none1, none2, NOT_HLL | NO_OPS , iSTI , 0 , }, /* FB */ + { none1, none2, NO_OPS , iCLD , Df, }, /* FC */ + { none1, none2, NO_OPS , iSTD , Df, }, /* FD */ + { trans, none1, B , iZERO , 0 , }, /* FE */ + { trans, none1, NSP , iZERO , 0 , } /* FF */ +} ; + +static word SegPrefix, RepPrefix; +static byte *pInst; /* Ptr. to current byte of instruction */ +static ICODE * pIcode; /* Ptr to Icode record filled in by scan() */ + + +/***************************************************************************** + Scans one machine instruction at offset ip in prog.Image and returns error. + At the same time, fill in low-level icode details for the scanned inst. + ****************************************************************************/ +Int scan(dword ip, ICODE *p) +{ + Int op; + + memset(p, 0, sizeof(ICODE)); + p->type = LOW_LEVEL; + p->ic.ll.label = ip; /* ip is absolute offset into image*/ + if (ip >= (dword)prog.cbImage) + { + return (IP_OUT_OF_RANGE); + } + + SegPrefix = RepPrefix = 0; + pInst = prog.Image + ip; + pIcode = p; + + do + { + op = *pInst++; /* First state - trivial */ + p->ic.ll.opcode = stateTable[op].opcode; /* Convert to Icode.opcode */ + p->ic.ll.flg = stateTable[op].flg & ICODEMASK; + p->ic.ll.flagDU.d = stateTable[op].df; + p->ic.ll.flagDU.u = stateTable[op].uf; + + (*stateTable[op].state1)(op); /* Second state */ + (*stateTable[op].state2)(op); /* Third state */ + + } while (stateTable[op].state1 == prefix); /* Loop if prefix */ + + if (p->ic.ll.opcode) + { + /* Save bytes of image used */ + p->ic.ll.numBytes = (byte)((pInst - prog.Image) - ip); + return ((SegPrefix)? FUNNY_SEGOVR: /* Seg. Override invalid */ + (RepPrefix ? FUNNY_REP: 0));/* REP prefix invalid */ + } + /* Else opcode error */ + return ((stateTable[op].flg & OP386)? INVALID_386OP: INVALID_OPCODE); +} + + +/*************************************************************************** + relocItem - returns TRUE if word pointed at is in relocation table + **************************************************************************/ +static boolT relocItem(byte *p) +{ + Int i; + dword off = p - prog.Image; + + for (i = 0; i < prog.cReloc; i++) + if (prog.relocTable[i] == off) + return TRUE; + return FALSE; +} + + +/*************************************************************************** + getWord - returns next word from image + **************************************************************************/ +static word getWord(void) +{ + word w = LH(pInst); + pInst += 2; + return w; +} + + +/**************************************************************************** + signex - returns byte sign extended to Int + ***************************************************************************/ +static Int signex(byte b) +{ + long s = b; + return ((b & 0x80)? (Int)(0xFFFFFF00 | s): (Int)s); +} + + +/**************************************************************************** + * setAddress - Updates the source or destination field for the current + * icode, based on fdst and the TO_REG flag. + * Note: fdst == TRUE is for the r/m part of the field (dest, unless TO_REG) + * fdst == FALSE is for reg part of the field + ***************************************************************************/ +static void setAddress(Int i, boolT fdst, word seg, int16 reg, word off) +{ + ICODEMEM *pm; + + /* If not to register (i.e. to r/m), and talking about r/m, + then this is dest */ + pm = (!(stateTable[i].flg & TO_REG) == fdst) ? + &pIcode->ic.ll.dst : &pIcode->ic.ll.src; + + /* Set segment. A later procedure (lookupAddr in proclist.c) will + * provide the value of this segment in the field segValue. */ + if (seg) /* segment override */ + { + pm->seg = pm->segOver = (byte)seg; + } + else + { /* no override, check indexed register */ + if ((reg >= INDEXBASE) && (reg == INDEXBASE + 2 || + reg == INDEXBASE + 3 || reg == INDEXBASE + 6)) + { + pm->seg = rSS; /* indexed on bp */ + } + else + { + pm->seg = rDS; /* any other indexed reg */ + } + } + pm->regi = (byte)reg; + pm->off = (int16)off; + if (reg && reg < INDEXBASE && (stateTable[i].flg & B)) + { + pm->regi += rAL - rAX; + } + + if (seg) /* So we can catch invalid use of segment overrides */ + { + SegPrefix = 0; + } +} + + +/**************************************************************************** + rm - Decodes r/m part of modrm byte for dst (unless TO_REG) part of icode + ***************************************************************************/ +static void rm(Int i) +{ + byte mod = *pInst >> 6; + byte rm = *pInst++ & 7; + + switch (mod) { + case 0: /* No disp unless rm == 6 */ + if (rm == 6) { + setAddress(i, TRUE, SegPrefix, 0, getWord()); + pIcode->ic.ll.flg |= WORD_OFF; + } + else setAddress(i, TRUE, SegPrefix, rm + INDEXBASE, 0); + break; + + case 1: /* 1 byte disp */ + setAddress(i, TRUE, SegPrefix, rm+INDEXBASE, (word)signex(*pInst++)); + break; + + case 2: /* 2 byte disp */ + setAddress(i, TRUE, SegPrefix, rm + INDEXBASE, getWord()); + pIcode->ic.ll.flg |= WORD_OFF; + break; + + case 3: /* reg */ + setAddress(i, TRUE, 0, rm + rAX, 0); + break; + } + + if ((stateTable[i].flg & NSP) && (pIcode->ic.ll.src.regi==rSP || + pIcode->ic.ll.dst.regi==rSP)) + pIcode->ic.ll.flg |= NOT_HLL; +} + + +/**************************************************************************** + modrm - Sets up src and dst from modrm byte + ***************************************************************************/ +static void modrm(Int i) +{ + setAddress(i, FALSE, 0, REG(*pInst) + rAX, 0); + rm(i); +} + + +/**************************************************************************** + segrm - seg encoded as reg of modrm + ****************************************************************************/ +static void segrm(Int i) +{ + Int reg = REG(*pInst) + rES; + + if (reg > rDS || (reg == rCS && (stateTable[i].flg & TO_REG))) + pIcode->ic.ll.opcode = (llIcode)0; + else { + setAddress(i, FALSE, 0, (int16)reg, 0); + rm(i); + } +} + + +/**************************************************************************** + regop - src/dst reg encoded as low 3 bits of opcode + ***************************************************************************/ +static void regop(Int i) +{ + setAddress(i, FALSE, 0, ((int16)i & 7) + rAX, 0); + pIcode->ic.ll.dst.regi = pIcode->ic.ll.src.regi; +} + + +/***************************************************************************** + segop - seg encoded in middle of opcode + *****************************************************************************/ +static void segop(Int i) +{ + setAddress(i, TRUE, 0, (((int16)i & 0x18) >> 3) + rES, 0); +} + + +/**************************************************************************** + axImp - Plugs an implied AX dst + ***************************************************************************/ +static void axImp(Int i) +{ + setAddress(i, TRUE, 0, rAX, 0); +} + + +static void axSrcIm (Int i) +/* Implied AX source */ +{ + pIcode->ic.ll.src.regi = rAX; +} + + +static void alImp (Int i) +/* Implied AL source */ +{ + pIcode->ic.ll.src.regi = rAL; +} + + +/***************************************************************************** + memImp - Plugs implied src memory operand with any segment override + ****************************************************************************/ +static void memImp(Int i) +{ + setAddress(i, FALSE, SegPrefix, 0, 0); +} + + +/**************************************************************************** + memOnly - Instruction is not valid if modrm refers to register (i.e. mod == 3) + ***************************************************************************/ +static void memOnly(Int i) +{ + if ((*pInst & 0xC0) == 0xC0) + pIcode->ic.ll.opcode = (llIcode)0; +} + + +/**************************************************************************** + memReg0 - modrm for 'memOnly' and Reg field must also be 0 + ****************************************************************************/ +static void memReg0(Int i) +{ + if (REG(*pInst) || (*pInst & 0xC0) == 0xC0) + pIcode->ic.ll.opcode = (llIcode)0; + else + rm(i); +} + + +/*************************************************************************** + immed - Sets up dst and opcode from modrm byte + **************************************************************************/ +static void immed(Int i) +{ + static llIcode immedTable[8] = {iADD, iOR, iADC, iSBB, iAND, iSUB, iXOR, iCMP}; + static byte uf[8] = { 0, 0, Cf, Cf, 0, 0, 0, 0 }; + + pIcode->ic.ll.opcode = immedTable[REG(*pInst)]; + pIcode->ic.ll.flagDU.u = uf[REG(*pInst)]; + pIcode->ic.ll.flagDU.d = (Sf | Zf | Cf); + rm(i); + + if (pIcode->ic.ll.opcode == iADD || pIcode->ic.ll.opcode == iSUB) + pIcode->ic.ll.flg &= ~NOT_HLL; /* Allow ADD/SUB SP, immed */ +} + + +/**************************************************************************** + shift - Sets up dst and opcode from modrm byte + ***************************************************************************/ +static void shift(Int i) +{ + static llIcode shiftTable[8] = + { + (llIcode)iROL, (llIcode)iROR, (llIcode)iRCL, (llIcode)iRCR, + (llIcode)iSHL, (llIcode)iSHR, (llIcode)0, (llIcode)iSAR}; + static byte uf[8] = {0, 0, Cf, Cf, 0, 0, 0, 0 }; + static byte df[8] = {Cf, Cf, Cf, Cf, Sf | Zf | Cf, + Sf | Zf | Cf, 0, Sf | Zf | Cf}; + + pIcode->ic.ll.opcode = shiftTable[REG(*pInst)]; + pIcode->ic.ll.flagDU.u = uf[REG(*pInst)]; + pIcode->ic.ll.flagDU.d = df[REG(*pInst)]; + rm(i); + pIcode->ic.ll.src.regi = rCL; +} + + +/**************************************************************************** + trans - Sets up dst and opcode from modrm byte + ***************************************************************************/ +static void trans(Int i) +{ + static llIcode transTable[8] = + { + (llIcode)iINC, (llIcode)iDEC, (llIcode)iCALL, (llIcode)iCALLF, + (llIcode)iJMP, (llIcode)iJMPF,(llIcode)iPUSH, (llIcode)0 + }; + static byte df[8] = {Sf | Zf, Sf | Zf, 0, 0, 0, 0, 0, 0}; + + if ((byte)REG(*pInst) < 2 || !(stateTable[i].flg & B)) { /* INC & DEC */ + pIcode->ic.ll.opcode = transTable[REG(*pInst)]; /* valid on bytes */ + pIcode->ic.ll.flagDU.d = df[REG(*pInst)]; + rm(i); + memcpy(&pIcode->ic.ll.src, &pIcode->ic.ll.dst, sizeof(ICODEMEM)); + if (pIcode->ic.ll.opcode == iJMP || pIcode->ic.ll.opcode == iCALL || + pIcode->ic.ll.opcode == iCALLF) + pIcode->ic.ll.flg |= NO_OPS; + else if (pIcode->ic.ll.opcode == iINC || pIcode->ic.ll.opcode == iPUSH + || pIcode->ic.ll.opcode == iDEC) + pIcode->ic.ll.flg |= NO_SRC; + } +} + + +/**************************************************************************** + arith - Sets up dst and opcode from modrm byte + ****************************************************************************/ +static void arith(Int i) +{ byte opcode; + static llIcode arithTable[8] = + { + (llIcode)iTEST, (llIcode)0, (llIcode)iNOT, (llIcode)iNEG, + (llIcode)iMUL, (llIcode)iIMUL, (llIcode)iDIV, (llIcode)iIDIV + }; + static byte df[8] = {Sf | Zf | Cf, 0, 0, Sf | Zf | Cf, + Sf | Zf | Cf, Sf | Zf | Cf, Sf | Zf | Cf, + Sf | Zf | Cf}; + + opcode = pIcode->ic.ll.opcode = arithTable[REG(*pInst)]; + pIcode->ic.ll.flagDU.d = df[REG(*pInst)]; + rm(i); + if (opcode == iTEST) + { + if (stateTable[i].flg & B) + data1(i); + else + data2(i); + } + else if (!(opcode == iNOT || opcode == iNEG)) + { + memcpy(&pIcode->ic.ll.src, &pIcode->ic.ll.dst, sizeof(ICODEMEM)); + setAddress(i, TRUE, 0, rAX, 0); /* dst = AX */ + } + else if (opcode == iNEG || opcode == iNOT) + pIcode->ic.ll.flg |= NO_SRC; + + if ((opcode == iDIV) || (opcode == iIDIV)) + { + if ((pIcode->ic.ll.flg & B) != B) + pIcode->ic.ll.flg |= IM_TMP_DST; + } +} + + +/***************************************************************************** + data1 - Sets up immed from 1 byte data + *****************************************************************************/ +static void data1(Int i) +{ + pIcode->ic.ll.immed.op = (stateTable[i].flg & S)? signex(*pInst++): + *pInst++; + pIcode->ic.ll.flg |= I; +} + + +/***************************************************************************** + data2 - Sets up immed from 2 byte data + ****************************************************************************/ +static void data2(Int i) +{ + if (relocItem(pInst)) + pIcode->ic.ll.flg |= SEG_IMMED; + + /* ENTER is a special case, it does not take a destination operand, + * but this field is being used as the number of bytes to allocate + * on the stack. The procedure level is stored in the immediate + * field. There is no source operand; therefore, the flag flg is + * set to NO_OPS. */ + if (pIcode->ic.ll.opcode == iENTER) + { + pIcode->ic.ll.dst.off = getWord(); + pIcode->ic.ll.flg |= NO_OPS; + } + else + pIcode->ic.ll.immed.op = getWord(); + pIcode->ic.ll.flg |= I; +} + + +/**************************************************************************** + dispM - 2 byte offset without modrm (== mod 0, rm 6) (Note:TO_REG bits are + reversed) + ****************************************************************************/ +static void dispM(Int i) +{ + setAddress(i, FALSE, SegPrefix, 0, getWord()); +} + + +/**************************************************************************** + dispN - 2 byte disp as immed relative to ip + ****************************************************************************/ +static void dispN(Int i) +{ + long off = (short)getWord(); /* Signed displacement */ + + /* Note: the result of the subtraction could be between 32k and 64k, and + still be positive; it is an offset from prog.Image. So this must be + treated as unsigned */ + pIcode->ic.ll.immed.op = (dword)(off + (unsigned)(pInst - prog.Image)); + pIcode->ic.ll.flg |= I; +} + + +/*************************************************************************** + dispS - 1 byte disp as immed relative to ip + ***************************************************************************/ +static void dispS(Int i) +{ + long off = signex(*pInst++); /* Signed displacement */ + + pIcode->ic.ll.immed.op = (dword)(off + (unsigned)(pInst - prog.Image)); + pIcode->ic.ll.flg |= I; +} + + +/**************************************************************************** + dispF - 4 byte disp as immed 20-bit target address + ***************************************************************************/ +static void dispF(Int i) +{ + dword off = (unsigned)getWord(); + dword seg = (unsigned)getWord(); + + pIcode->ic.ll.immed.op = off + ((dword)(unsigned)seg << 4); + pIcode->ic.ll.flg |= I; +} + + +/**************************************************************************** + prefix - picks up prefix byte for following instruction (LOCK is ignored + on purpose) + ****************************************************************************/ +static void prefix(Int i) +{ + if (pIcode->ic.ll.opcode == iREPE || pIcode->ic.ll.opcode == iREPNE) + RepPrefix = pIcode->ic.ll.opcode; + else + SegPrefix = pIcode->ic.ll.opcode; +} + +inline void BumpOpcode(llIcode& ic) +{ + ic = (llIcode)(((int)ic)+1); // Bump this icode via the int type +} + +/***************************************************************************** + strop - checks RepPrefix and converts string instructions accordingly + *****************************************************************************/ +static void strop(Int i) +{ + if (RepPrefix) + { + // pIcode->ic.ll.opcode += ((pIcode->ic.ll.opcode == iCMPS || + // pIcode->ic.ll.opcode == iSCAS) + // && RepPrefix == iREPE)? 2: 1; + if ((pIcode->ic.ll.opcode == iCMPS || pIcode->ic.ll.opcode == iSCAS) + && RepPrefix == iREPE) + BumpOpcode(pIcode->ic.ll.opcode); // += 2 + BumpOpcode(pIcode->ic.ll.opcode); // else += 1 + if (pIcode->ic.ll.opcode == iREP_LODS) + pIcode->ic.ll.flg |= NOT_HLL; + RepPrefix = 0; + } +} + + +/*************************************************************************** + escop - esc operands + ***************************************************************************/ +static void escop(Int i) +{ + pIcode->ic.ll.immed.op = REG(*pInst) + (dword)((i & 7) << 3); + pIcode->ic.ll.flg |= I; + rm(i); +} + + +/**************************************************************************** + const1 + ****************************************************************************/ +static void const1(Int i) +{ + pIcode->ic.ll.immed.op = 1; + pIcode->ic.ll.flg |= I; +} + + +/***************************************************************************** + const3 + ****************************************************************************/ +static void const3(Int i) +{ + pIcode->ic.ll.immed.op = 3; + pIcode->ic.ll.flg |= I; +} + + +/**************************************************************************** + none1 + ****************************************************************************/ +static void none1(Int i) +{ +} + + +/**************************************************************************** + none2 - Sets the NO_OPS flag if the operand is immediate + ****************************************************************************/ +static void none2(Int i) +{ + if (pIcode->ic.ll.flg & I) + pIcode->ic.ll.flg |= NO_OPS; +} + +/**************************************************************************** + Checks for int 34 to int 3B - if so, converts to ESC nn instruction + ****************************************************************************/ +static void checkInt(Int i) +{ + word wOp = (word) pIcode->ic.ll.immed.op; + if ((wOp >= 0x34) && (wOp <= 0x3B)) + { + /* This is a Borland/Microsoft floating point emulation instruction. + Treat as if it is an ESC opcode */ + pIcode->ic.ll.immed.op = wOp - 0x34; + pIcode->ic.ll.opcode = iESC; + pIcode->ic.ll.flg |= FLOAT_OP; + + escop(wOp - 0x34 + 0xD8); + + } +} diff --git a/src/symtab.cpp b/src/symtab.cpp new file mode 100644 index 0000000..42d7c4a --- /dev/null +++ b/src/symtab.cpp @@ -0,0 +1,626 @@ +/* + * (C) Mike van Emmerik + * These could probably be replaced by functions from libg++ + */ + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * *\ +* * +* S y m b o l t a b l e F u n c t i o n s * +* * +\* * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* This file implements a symbol table with a symbolic name, a symbol value + (word), and a procedure number. Two tables are maintained, to be able to + look up by name or by value. Pointers are used for the duplicated symbolic + name to save space. Both tables have the same structure. + The hash tables automatically expand when they get 90% full; they are + never compressed. Expanding the tables could take some time, since about + half of the entries have to be moved on average. + Linear probing is used, due to the difficulty of implementing (e.g.) + quadratic probing with a variable table size. +*/ +#include +#include +#include +#include +#include +#include "dcc.h" +#include "symtab.h" + +#define TABLESIZE 16 /* Number of entries added each expansion */ + /* Probably has to be a power of 2 */ +#define STRTABSIZE 256 /* Size string table is inc'd by */ +#define NIL ((word)-1) +using namespace std; +static char *pStrTab; /* Pointer to the current string table */ +static int strTabNext; /* Next free index into pStrTab */ +namespace std +{ +template<> +struct hash : public unary_function +{ + size_t operator()(const SYMTABLE & key) const + { + word h = 0; + h = (word)(key.symOff ^ (key.symOff >> 8)); + return h; + } + +}; +} +static tableType curTableType; /* Which table is current */ +struct TABLEINFO_TYPE +{ + void deleteVal(dword symOff, Function *symProc, boolT bSymToo); + void enterSym(const char *symName, dword symOff, Function *symProc, boolT bSymToo); + std::string findVal(dword symOff, Function *symProc, word &pIndex); + void create(tableType type); + void destroy(); +private: + void deleteSym(char *symName); + boolT findSym(const char *symName, word &pIndex); + boolT readSym(char *symName, dword *pSymOff, Function **pSymProc); + void expandSym(void); + word findBlankSym(const std::string &symName); + word symHash(const char *name, word *pre); + word valHash(dword symOff, Function *symProc, word *pre); + + SYMTABLE *symTab; /* Pointer to the symbol hashed table */ + SYMTABLE *valTab; /* Pointer to the value hashed table */ + word numEntry; /* Number of entries in this table */ + word tableSize;/* Size of the table (entries) */ + unordered_map z; + unordered_map z2; +}; + +TABLEINFO_TYPE tableInfo[NUM_TABLE_TYPES]; /* Array of info about tables */ +TABLEINFO_TYPE currentTabInfo; + +/* Create a new symbol table. Returns "handle" */ +void TABLEINFO_TYPE::create(tableType type) +{ + switch(type) + { + case Comment: + numEntry = 0; + tableSize = TABLESIZE; + valTab = (SYMTABLE*)allocMem(sizeof(SYMTABLE) * TABLESIZE); + symTab = 0; + memset(valTab, 0, sizeof(SYMTABLE) * TABLESIZE); + break; + case Label: + currentTabInfo.numEntry = 0; + currentTabInfo.tableSize = TABLESIZE; + currentTabInfo.symTab = (SYMTABLE*)allocMem(sizeof(SYMTABLE) * TABLESIZE); + memset(currentTabInfo.symTab, 0, sizeof(SYMTABLE) * TABLESIZE); + + currentTabInfo.valTab = (SYMTABLE*)allocMem(sizeof(SYMTABLE) * TABLESIZE); + memset(currentTabInfo.valTab, 0, sizeof(SYMTABLE) * TABLESIZE); + break; + } + +} + +void createSymTables(void) +{ + /* Initilise the comment table */ + /* NB - there is no symbol hashed comment table */ + currentTabInfo.create(Comment); + tableInfo[Comment] = currentTabInfo; + + /* Initialise the label table */ + currentTabInfo.create(Label); + + tableInfo[Label] = currentTabInfo; + curTableType = Label; + + /* Now the string table */ + strTabNext = 0; + pStrTab = (char *)allocMem(STRTABSIZE); + +// tableInfo[Label].symTab = currentTabInfo.symTab; +// tableInfo[Label].valTab = currentTabInfo.valTab; +// tableInfo[Label].numEntry = currentTabInfo.numEntry; +// tableInfo[Label].tableSize = currentTabInfo.tableSize; + curTableType = Label; + +} + +void selectTable(tableType tt) +{ + if (curTableType == tt) + return; /* Nothing to do */ + currentTabInfo = tableInfo[tt]; + curTableType = tt; +} +void TABLEINFO_TYPE::destroy() +{ + if(symTab) + free(symTab); // The symbol hashed label table + if(valTab) + free(valTab); // And the value hashed label table + +} +void destroySymTables(void) +{ + selectTable(Label); + currentTabInfo.destroy(); + selectTable(Comment); + currentTabInfo.destroy(); +} + + +/* Hash the symbolic name */ +word TABLEINFO_TYPE::symHash(const char *name, word *pre) +{ + int i; + word h = 0; + char ch; + + for (i=0; i < (int)strlen(name); i++) + { + ch = name[i]; + h = (h << 2) ^ ch; + h += (ch >> 2) + (ch << 5); + } + + *pre = h; /* Pre modulo hash value */ + return h % tableSize; /* Post modulo hash value */ +} + +/* Hash the symOff and symProc fields */ +/* Note: for the time being, there no use is made of the symProc field */ +word TABLEINFO_TYPE::valHash(dword symOff, Function * symProc, word *pre) +{ + word h = 0; + + h = (word)(symOff ^ (symOff >> 8)); + + *pre = h; /* Pre modulo hash value */ + return h % tableSize; /* Post modulo hash value */ +} +void TABLEINFO_TYPE::enterSym(const char *symName, dword symOff, Function * symProc, boolT bSymToo) +{ + word h, pre, j; + SYMTABLE entry; + entry.pSymName= symName; /* Symbol name ptr */ + entry.symOff = symOff; /* Offset of the symbol */ + entry.symProc = symProc; /* Symbol's proc num */ + entry.preHash = pre; /* Pre modulo hash value */ + entry.postHash= h; /* Post modulo hash value */ + entry.nextOvf = NIL; /* No overflow */ + entry.prevOvf = NIL; /* No back link */ + z[symName] = entry; + z2[entry] = symName; + if ((numEntry / 9 * 10) >= tableSize) + { + /* Table is full. Expand it */ + expandSym(); + } + + /* Enter it into the value hashed table first */ + h = valHash(symOff, symProc, &pre); /* Ideal spot for this entry */ + if (valTab[h].symProc == 0) /* Collision? */ + { + /* No. Just insert here */ + valTab[h].pSymName= symName; /* Symbol name ptr */ + valTab[h].symOff = symOff; /* Offset of the symbol */ + valTab[h].symProc = symProc; /* Symbol's proc num */ + valTab[h].preHash = pre; /* Pre modulo hash value */ + valTab[h].postHash= h; /* Post modulo hash value */ + valTab[h].nextOvf = NIL; /* No overflow */ + valTab[h].prevOvf = NIL; /* No back link */ + } + else + { + /* Linear probing, for now */ + j = (h+1) % tableSize; + while (j != h) + { + if (valTab[j].symProc == 0) + { + /* Insert here */ + valTab[j].pSymName= symName; /* Symbol name ptr */ + valTab[j].symOff = symOff; /* Offset of the symbol */ + valTab[j].symProc = symProc; /* Symbol's proc num */ + valTab[j].preHash = pre; /* Pre modulo hash value */ + valTab[j].postHash= h; /* Post modulo hash value */ + /* Insert after the primary entry in the table */ + valTab[j].nextOvf = valTab[h].nextOvf; + valTab[h].nextOvf = j; + valTab[j].prevOvf = h; /* The backlink */ + break; + } + else + { + /* Probe further */ + j = (j+1) % tableSize; + } + } + if (j == h) + { + printf("enterSym: val table overflow!\n"); + exit(1); + } + } + + /* Now enter into the symbol hashed table as well, if reqd */ + if (!bSymToo) return; + h = symHash(symName, &pre); /* Ideal spot for this entry */ + if (symTab[h].pSymName.empty()) /* Collision? */ + { + /* No. Just insert here */ + symTab[h].pSymName= symName; /* Symbol name ptr */ + symTab[h].symOff = symOff; /* Offset of the symbol */ + symTab[h].symProc = symProc; /* Symbol's proc num */ + symTab[h].preHash = pre; /* Pre modulo hash value */ + symTab[h].postHash= h; /* Post modulo hash value */ + symTab[h].nextOvf = NIL; /* No overflow */ + symTab[h].prevOvf = NIL; /* No back link */ + } + else + { + /* Linear probing, for now */ + j = (h+1) % tableSize; + while (j != h) + { + if (symTab[j].pSymName.empty()) + { + /* Insert here */ + symTab[j].pSymName= symName; /* Symbol name ptr */ + symTab[j].symOff = symOff; /* Offset of the symbol */ + symTab[j].symProc = symProc; /* Symbol's proc num */ + symTab[j].preHash = pre; /* Pre modulo hash value */ + symTab[j].postHash= h; /* Post modulo hash value */ + /* Insert after the primary entry in the table */ + symTab[j].nextOvf = symTab[h].nextOvf; + symTab[h].nextOvf = j; + symTab[j].prevOvf = h; /* The backlink */ + break; + } + else + { + /* Probe further */ + j = (j+1) % tableSize; + } + } + if (j == h) + { + printf("enterSym: sym table overflow!\n"); + exit(1); + } + } +} + + +void enterSym(char *symName, dword symOff, Function * symProc, boolT bSymToo) +{ + currentTabInfo.enterSym(symName,symOff,symProc,bSymToo); +} + +boolT TABLEINFO_TYPE::findSym(const char *symName, word &pIndex) +{ + word h, j, pre; + + h = symHash(symName, &pre); + j = h; + bool found=false; + do + { + if (symTab[j].pSymName.empty()) + { + return FALSE; /* No entry at all */ + } + if (strcmp(symName, symTab[j].pSymName.c_str()) == 0) + { + pIndex = j; + found=true; + break; /* Symbol found */ + } + j = symTab[j].nextOvf; /* Follow the chain */ + } + while (j != NIL); + auto iter = z.find(symName); + if(iter!=z.end()) + { + assert(iter->second==symTab[j]); + } + + return found; /* End of chain */ +} +/* Find symbol by value */ +std::string TABLEINFO_TYPE::findVal(dword symOff, Function * symProc, word &pIndex) +{ + word h, j, pre; + std::string res=""; + h = valHash(symOff, symProc, &pre); + j = h; + do + { + if (valTab[j].symProc == 0) + break; /* No entry at all */ + + if ((valTab[j].symOff == symOff) /*&& (valTab[j].symProc == symProc)*/) + { + pIndex = j; + res=valTab[j].pSymName; + break; /* Symbol found */ + } + j = valTab[j].nextOvf; /* Follow the chain */ + } + while (j != NIL); + auto iter = z2.find(SYMTABLE(symOff,symProc)); + if(iter!=z2.end()) + { + assert(iter->second==res); + } + return res; /* End of chain */ +} + +word TABLEINFO_TYPE::findBlankSym(const std::string &symName) +{ + word h, j, pre; + + h = symHash(symName.c_str(), &pre); + j = h; + do + { + if (symTab[j].pSymName.empty()) + { + return j; /* Empty entry. Terminate probing */ + } + j = (++j) % tableSize; /* Linear probing */ + } + while (j != h); + printf("Could not find blank entry in table! Num entries is %ld of %ld\n", + (long)numEntry, (long)tableSize); + return 0; +} + +/* Using the symbolic name, read the value */ +boolT TABLEINFO_TYPE::readSym(char *symName, dword *pSymOff, Function * *pSymProc) +{ + word i; + + if (!findSym(symName, i)) + { + return FALSE; + } + *pSymOff = symTab[i].symOff; + *pSymProc= symTab[i].symProc; + return TRUE; +} + + + + +/* A doubly linked list of entries belonging to the same hash bucket is + maintained, to prevent the need for many entries to be moved when deleting + an entry. It is implemented with indexes, and is not an open hashing system. + Symbols are deleted from both hash tables. +*/ + +/* Known limitation: strings are never deleted from the string table */ + +void TABLEINFO_TYPE::deleteSym(char *symName) +{ + word i, j, back; + dword symOff; + Function * symProc; + + /* Delete from symbol hashed table first */ + if (!findSym(symName, i)) + { + printf("Could not delete non existant symbol name %s\n", symName); + exit(1); + } + symOff = symTab[i].symOff; /* Remember these for valTab */ + symProc= symTab[i].symProc; + j = symTab[i].nextOvf; /* Look at next overflowed entry */ + + if (j == NIL) /* Any overflows? */ + { + /* No, so we just wipe out this record. Must NIL the pointer of + the previous record, however */ + symTab[symTab[i].prevOvf].nextOvf = NIL; + j = i; /* So we wipe out the current name */ + } + else + { + /* Yes, move this entry to this vacated spot. Note that the nextOvf + field will still point to the next record in the overflow chain, + but we need to preserve the backlink for adjusting the current + item's backlink */ + back = symTab[j].prevOvf; + symTab[i] = symTab[j]; + symTab[i].prevOvf = back; + } + /* And now mark the vacated record as empty */ + symTab[j].pSymName.clear(); /* Rub out the name */ + + + /* Delete from value hashed table */ + if (findVal(symOff, symProc, i).empty()) + { + printf("Could not delete non existant symbol off %04X proc %d\n",symOff, symProc); + exit(1); + } + j = valTab[i].nextOvf; /* Look at next overflowed entry */ + + if (j == NIL) /* Any overflows? */ + { + /* No, so we just wipe out this record. Must NIL the pointer of + the previous record, however */ + valTab[valTab[i].prevOvf].nextOvf = NIL; + j = i; /* So we wipe out the current entry */ + } + else + { + /* Yes, move this entry to this vacated spot. Note that the nextOvf + field will still point to the next record in the overflow chain, + but we need to preserve the backlink for adjusting the current + item's backlink */ + back = valTab[j].prevOvf; + valTab[i]= valTab[j]; + valTab[i].prevOvf = back; + } + /* And now mark the vacated record as empty */ + valTab[j].symProc = 0; /* Rub out the entry */ +} +void TABLEINFO_TYPE::deleteVal(dword symOff, Function * symProc, boolT bSymToo) +{ + word i, j, back; + std::string symName; + + /* Delete from value hashed table */ + if (findVal(symOff, symProc, i).empty()) + { + printf("Could not delete non existant symbol off %04X proc %p\n", + symOff, symProc); + exit(1); + } + symName = symTab[i].pSymName; /* Remember this for symTab */ + j = valTab[i].nextOvf; /* Look at next overflowed entry */ + + if (j == NIL) /* Any overflows? */ + { + /* No, so we just wipe out this record. Must NIL the pointer of + the previous record, however */ + valTab[valTab[i].prevOvf].nextOvf = NIL; + j = i; /* So we wipe out the current entry */ + } + else + { + /* Yes, move this entry to this vacated spot. Note that the nextOvf + field will still point to the next record in the overflow chain, + but we need to preserve the backlink for adjusting the current + item's backlink */ + back = valTab[j].prevOvf; + memcpy(&valTab[i], &valTab[j], sizeof(SYMTABLE)); + valTab[i].prevOvf = back; + } + /* And now mark the vacated record as empty */ + valTab[j].symProc = 0; /* Rub out the entry */ + + /* If requested, delete from symbol hashed table now */ + if (!bSymToo) return; + if (!findSym(symName.c_str(), i)) + { + printf("Could not delete non existant symbol name %s\n", symName.c_str()); + exit(1); + } + j = symTab[i].nextOvf; /* Look at next overflowed entry */ + + if (j == NIL) /* Any overflows? */ + { + /* No, so we just wipe out this record. Must NIL the pointer of + the previous record, however */ + symTab[symTab[i].prevOvf].nextOvf = NIL; + j = i; /* So we wipe out the current name */ + } + else + { + /* Yes, move this entry to this vacated spot. Note that the nextOvf + field will still point to the next record in the overflow chain, + but we need to preserve the backlink for adjusting the current + item's backlink */ + back = symTab[j].prevOvf; + symTab[i] = symTab[j]; + symTab[i].prevOvf = back; + } + /* And now mark the vacated record as empty */ + symTab[j].pSymName.clear(); /* Rub out the name */ + +} + +void TABLEINFO_TYPE::expandSym(void) +{ + word i, j, n, newPost; + + printf("\nResizing table...\r"); + /* We double the table size each time, so on average only half of the + entries move to the new half. This works because we are effectively + shifting the "binary point" of the hash value to the left each time, + thereby leaving the number unchanged or adding an MSBit of 1. */ + tableSize <<= 2; + symTab = (SYMTABLE*)reallocVar(symTab, tableSize * sizeof(SYMTABLE)); + memset (&symTab[tableSize/2], 0, (tableSize/2) * sizeof(SYMTABLE)); + + /* Now we have to move some of the entries to take advantage of the extra + space */ + + for (i=0; i < numEntry; i++) + { + newPost = symTab[i].preHash % tableSize; + if (newPost != symTab[i].postHash) + { + /* This entry is now in the wrong place. Copy it to the new position, + then delete it. */ + j = findBlankSym(symTab[i].pSymName); + memcpy(&symTab[j], &symTab[i], sizeof(SYMTABLE)); + /* Correct the post hash value */ + symTab[j].postHash = newPost; + + /* Now adjust links */ + n = symTab[j].prevOvf; + if (n != NIL) + { + symTab[n].nextOvf = j; + } + + n = symTab[j].nextOvf; + if (n != NIL) + { + symTab[n].prevOvf = j; + } + + /* Mark old position as deleted */ + symTab[i].pSymName.clear(); + } + } +} + +/* This function adds to the string table. At this stage, strings are not + deleted */ +char * addStrTbl(char *pStr) +{ + char *p; + + if ((strTabNext + strlen(pStr) + 1) >= STRTABSIZE) + { + /* We can't realloc the old string table pointer, since that will + potentially move the string table, and pointers will be invalid. + So we realloc this one to its present usage (hopefully it won't + move), and allocate a new one */ + if (reallocVar((void *)pStrTab, strTabNext) != pStrTab) + { + printf("Damn it! String table moved on shrinking!\n"); + exit(1); + } + pStrTab = (char *)allocMem(STRTABSIZE); + strTabNext = 0; + } + p = strcpy(&pStrTab[strTabNext], pStr); + strTabNext += strlen(pStr) +1; + return p; +} +void deleteVal(dword symOff, Function * symProc, boolT bSymToo) +{ + currentTabInfo.deleteVal(symOff,symProc,bSymToo); +} +std::string findVal(dword symOff, Function * symProc, word *pIndex) +{ + return currentTabInfo.findVal(symOff,symProc,*pIndex); +} +/* Using the value, read the symbolic name */ +boolT readVal(char *symName, dword symOff, Function * symProc) +{ + word i; + std::string r=currentTabInfo.findVal(symOff, symProc, i); + if (r.empty()) + { + return false; + } + strcpy(symName, r.c_str()); + return true; +} diff --git a/src/udm.cpp b/src/udm.cpp new file mode 100644 index 0000000..e84329a --- /dev/null +++ b/src/udm.cpp @@ -0,0 +1,159 @@ +/***************************************************************************** + * dcc project Universal Decompilation Module + * This is supposedly a machine independant and language independant module + * that just plays with abstract cfg's and intervals and such like. + * (C) Cristina Cifuentes + ****************************************************************************/ +#include +#include +#include +#include "dcc.h" + +static void displayCFG(Function * pProc); +static void displayDfs(BB * pBB); + +/**************************************************************************** + * udm + ****************************************************************************/ +void udm(void) +{ + + /* Build the control flow graph, find idioms, and convert low-level + * icodes to high-level ones */ + for (auto iter = pProcList.rbegin(); iter!=pProcList.rend(); ++iter) + { + + if (iter->flg & PROC_ISLIB) + continue; /* Ignore library functions */ + + /* Create the basic control flow graph */ + iter->createCFG(); + if (option.VeryVerbose) + iter->displayCFG(); + + /* Remove redundancies and add in-edge information */ + iter->compressCFG(); + + /* Print 2nd pass assembler listing */ + if (option.asm2) + disassem(2, &(*iter)); + + /* Idiom analysis and propagation of long type */ + iter->lowLevelAnalysis(); + + /* Generate HIGH_LEVEL icodes whenever possible */ + iter->highLevelGen(); + } + + /* Data flow analysis - eliminate condition codes, extraneous registers + * and intermediate instructions. Find expressions by forward + * substitution algorithm */ + pProcList.front().dataFlow (0); + derSeq *derivedG=0; + + /* Control flow analysis - structuring algorithm */ + for (auto iter = pProcList.rbegin(); iter!=pProcList.rend(); ++iter) + { + + if (iter->flg & PROC_ISLIB) + continue; /* Ignore library functions */ + + /* Make cfg reducible and build derived sequences */ + derivedG=iter->checkReducibility(); + + if (option.VeryVerbose) + derivedG->display(); + + /* Structure the graph */ + iter->structure(derivedG); + + /* Check for compound conditions */ + iter->compoundCond (); + + if (option.verbose) { + printf("\nDepth first traversal - Proc %s\n", iter->name); + iter->cfg.front()->displayDfs(); + } + + /* Free storage occupied by this procedure */ + freeDerivedSeq(*derivedG); + } +} + + +static const char *const s_nodeType[] = {"branch", "if", "case", "fall", "return", "call", + "loop", "repeat", "interval", "cycleHead", + "caseHead", "terminate", + "nowhere" }; + +static const char *const s_loopType[] = {"noLoop", "while", "repeat", "loop", "for"}; + + +/**************************************************************************** + * displayCFG - Displays the Basic Block list + ***************************************************************************/ +void Function::displayCFG() +{ + Int i; + BB * pBB; + + printf("\nBasic Block List - Proc %s", name); + + for (auto iter = cfg.begin(); iter!=cfg.end(); ++iter) + { + pBB = *iter; + printf("\nnode type = %s, ", s_nodeType[pBB->nodeType]); + printf("start = %ld, length = %ld, #out edges = %ld\n", + pBB->start, pBB->length, pBB->numOutEdges); + + for (i = 0; i < pBB->numOutEdges; i++) + printf(" outEdge[%2d] = %ld\n",i, pBB->edges[i].BBptr->start); + } +} + + +/***************************************************************************** + * displayDfs - Displays the CFG using a depth first traversal + ****************************************************************************/ +void BB::displayDfs() +{ + Int i; + assert(this); + traversed = DFS_DISP; + + printf("node type = %s, ", s_nodeType[nodeType]); + printf("start = %ld, length = %ld, #in-edges = %ld, #out-edges = %ld\n", + start, length, inEdges.size(), numOutEdges); + printf("dfsFirst = %ld, dfsLast = %ld, immed dom = %ld\n", + dfsFirstNum, dfsLastNum, + immedDom == MAX ? -1 : immedDom); + printf("loopType = %s, loopHead = %ld, latchNode = %ld, follow = %ld\n", + s_loopType[loopType], + loopHead == MAX ? -1 : loopHead, + latchNode == MAX ? -1 : latchNode, + loopFollow == MAX ? -1 : loopFollow); + printf ("ifFollow = %ld, caseHead = %ld, caseTail = %ld\n", + ifFollow == MAX ? -1 : ifFollow, + caseHead == MAX ? -1 : caseHead, + caseTail == MAX ? -1 : caseTail); + + if (nodeType == INTERVAL_NODE) + printf("corresponding interval = %ld\n", correspInt->numInt); + else + for (i = 0; i < inEdges.size(); i++) + printf (" inEdge[%ld] = %ld\n", i, inEdges[i]->start); + + /* Display out edges information */ + for (i = 0; i < numOutEdges; i++) + if (nodeType == INTERVAL_NODE) + printf(" outEdge[%ld] = %ld\n", i, + edges[i].BBptr->correspInt->numInt); + else + printf(" outEdge[%d] = %ld\n", i, edges[i].BBptr->start); + printf("----\n"); + + /* Recursive call on successors of current node */ + for (i = 0; i < numOutEdges; i++) + if (edges[i].BBptr->traversed != DFS_DISP) + edges[i].BBptr->displayDfs(); +} diff --git a/tests/inputs/BENCHFN.EXE b/tests/inputs/BENCHFN.EXE new file mode 100755 index 0000000..f4849e9 Binary files /dev/null and b/tests/inputs/BENCHFN.EXE differ diff --git a/tests/inputs/BENCHLNG.EXE b/tests/inputs/BENCHLNG.EXE new file mode 100755 index 0000000..a8328b7 Binary files /dev/null and b/tests/inputs/BENCHLNG.EXE differ diff --git a/tests/inputs/BENCHMUL.EXE b/tests/inputs/BENCHMUL.EXE new file mode 100755 index 0000000..eb1720e Binary files /dev/null and b/tests/inputs/BENCHMUL.EXE differ diff --git a/tests/inputs/BENCHMUS.EXE b/tests/inputs/BENCHMUS.EXE new file mode 100755 index 0000000..1206a25 Binary files /dev/null and b/tests/inputs/BENCHMUS.EXE differ diff --git a/tests/inputs/BENCHSHO.EXE b/tests/inputs/BENCHSHO.EXE new file mode 100755 index 0000000..c400fcc Binary files /dev/null and b/tests/inputs/BENCHSHO.EXE differ diff --git a/tests/inputs/BYTEOPS.EXE b/tests/inputs/BYTEOPS.EXE new file mode 100755 index 0000000..434edc7 Binary files /dev/null and b/tests/inputs/BYTEOPS.EXE differ diff --git a/tests/inputs/FIBOS.EXE b/tests/inputs/FIBOS.EXE new file mode 100755 index 0000000..baa7d40 Binary files /dev/null and b/tests/inputs/FIBOS.EXE differ diff --git a/tests/inputs/MIN.EXE b/tests/inputs/MIN.EXE new file mode 100755 index 0000000..f0eb4bb Binary files /dev/null and b/tests/inputs/MIN.EXE differ diff --git a/tests/outputs/BENCHFN.EXE.a1 b/tests/outputs/BENCHFN.EXE.a1 new file mode 100644 index 0000000..14e823d --- /dev/null +++ b/tests/outputs/BENCHFN.EXE.a1 @@ -0,0 +1,109 @@ + main PROC NEAR +000 000365 55 PUSH bp +001 000366 8BEC MOV bp, sp +002 000368 83EC08 SUB sp, 8 +003 00036B B89401 MOV ax, 194h +004 00036E 50 PUSH ax +005 00036F E8D90B CALL near ptr printf +006 000372 59 POP cx +007 000373 8D46FC LEA ax, [bp-4] +008 000376 50 PUSH ax +009 000377 B8B001 MOV ax, 1B0h +010 00037A 50 PUSH ax +011 00037B E85614 CALL near ptr scanf +012 00037E 59 POP cx +013 00037F 59 POP cx +014 000380 FF76FE PUSH word ptr [bp-2] +015 000383 FF76FC PUSH word ptr [bp-4] +016 000386 B8B401 MOV ax, 1B4h +017 000389 50 PUSH ax +018 00038A E8BE0B CALL near ptr printf +019 00038D 83C406 ADD sp, 6 +020 000390 C746FA0000 MOV word ptr [bp-6], 0 +021 000395 C746F80100 MOV word ptr [bp-8], 1 +022 00039A EB0B JMP L1 + +023 0003A7 8B56FA L1: MOV dx, [bp-6] +024 0003AA 8B46F8 MOV ax, [bp-8] +025 0003AD 3B56FE CMP dx, [bp-2] +026 0003B0 7CEA JL L2 +027 0003B2 7F05 JG L3 +028 0003B4 3B46FC CMP ax, [bp-4] +029 0003B7 76E3 JBE L2 + +030 0003B9 B8CE01 L3: MOV ax, 1CEh +031 0003BC 50 PUSH ax +032 0003BD E88B0B CALL near ptr printf +033 0003C0 59 POP cx +034 0003C1 8BE5 MOV sp, bp +035 0003C3 5D POP bp +036 0003C4 C3 RET + +037 00039C E8A6FF L2: CALL near ptr proc_1 +038 00039F 8346F801 ADD word ptr [bp-8], 1 +039 0003A3 8356FA00 ADC word ptr [bp-6], 0 +040 JMP L1 ;Synthetic inst + + main ENDP + + proc_1 PROC NEAR +000 000345 55 PUSH bp +001 000346 8BEC MOV bp, sp +002 000348 E8D7FF CALL near ptr proc_2 +003 00034B E8D4FF CALL near ptr proc_2 +004 00034E E8D1FF CALL near ptr proc_2 +005 000351 E8CEFF CALL near ptr proc_2 +006 000354 E8CBFF CALL near ptr proc_2 +007 000357 E8C8FF CALL near ptr proc_2 +008 00035A E8C5FF CALL near ptr proc_2 +009 00035D E8C2FF CALL near ptr proc_2 +010 000360 E8BFFF CALL near ptr proc_2 +011 000363 5D POP bp +012 000364 C3 RET + + proc_1 ENDP + + proc_2 PROC NEAR +000 000322 55 PUSH bp +001 000323 8BEC MOV bp, sp +002 000325 E8D7FF CALL near ptr proc_3 +003 000328 E8D4FF CALL near ptr proc_3 +004 00032B E8D1FF CALL near ptr proc_3 +005 00032E E8CEFF CALL near ptr proc_3 +006 000331 E8CBFF CALL near ptr proc_3 +007 000334 E8C8FF CALL near ptr proc_3 +008 000337 E8C5FF CALL near ptr proc_3 +009 00033A E8C2FF CALL near ptr proc_3 +010 00033D E8BFFF CALL near ptr proc_3 +011 000340 E8BCFF CALL near ptr proc_3 +012 000343 5D POP bp +013 000344 C3 RET + + proc_2 ENDP + + proc_3 PROC NEAR +000 0002FF 55 PUSH bp +001 000300 8BEC MOV bp, sp +002 000302 E8F5FF CALL near ptr proc_4 +003 000305 E8F2FF CALL near ptr proc_4 +004 000308 E8EFFF CALL near ptr proc_4 +005 00030B E8ECFF CALL near ptr proc_4 +006 00030E E8E9FF CALL near ptr proc_4 +007 000311 E8E6FF CALL near ptr proc_4 +008 000314 E8E3FF CALL near ptr proc_4 +009 000317 E8E0FF CALL near ptr proc_4 +010 00031A E8DDFF CALL near ptr proc_4 +011 00031D E8DAFF CALL near ptr proc_4 +012 000320 5D POP bp +013 000321 C3 RET + + proc_3 ENDP + + proc_4 PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 5D POP bp +003 0002FE C3 RET + + proc_4 ENDP + diff --git a/tests/outputs/BENCHFN.EXE.a2 b/tests/outputs/BENCHFN.EXE.a2 new file mode 100644 index 0000000..98d7e04 --- /dev/null +++ b/tests/outputs/BENCHFN.EXE.a2 @@ -0,0 +1,108 @@ + proc_4 PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 5D POP bp +003 0002FE C3 RET + + proc_4 ENDP + + proc_3 PROC NEAR +000 0002FF 55 PUSH bp +001 000300 8BEC MOV bp, sp +002 000302 E8F5FF CALL near ptr proc_4 +003 000305 E8F2FF CALL near ptr proc_4 +004 000308 E8EFFF CALL near ptr proc_4 +005 00030B E8ECFF CALL near ptr proc_4 +006 00030E E8E9FF CALL near ptr proc_4 +007 000311 E8E6FF CALL near ptr proc_4 +008 000314 E8E3FF CALL near ptr proc_4 +009 000317 E8E0FF CALL near ptr proc_4 +010 00031A E8DDFF CALL near ptr proc_4 +011 00031D E8DAFF CALL near ptr proc_4 +012 000320 5D POP bp +013 000321 C3 RET + + proc_3 ENDP + + proc_2 PROC NEAR +000 000322 55 PUSH bp +001 000323 8BEC MOV bp, sp +002 000325 E8D7FF CALL near ptr proc_3 +003 000328 E8D4FF CALL near ptr proc_3 +004 00032B E8D1FF CALL near ptr proc_3 +005 00032E E8CEFF CALL near ptr proc_3 +006 000331 E8CBFF CALL near ptr proc_3 +007 000334 E8C8FF CALL near ptr proc_3 +008 000337 E8C5FF CALL near ptr proc_3 +009 00033A E8C2FF CALL near ptr proc_3 +010 00033D E8BFFF CALL near ptr proc_3 +011 000340 E8BCFF CALL near ptr proc_3 +012 000343 5D POP bp +013 000344 C3 RET + + proc_2 ENDP + + proc_1 PROC NEAR +000 000345 55 PUSH bp +001 000346 8BEC MOV bp, sp +002 000348 E8D7FF CALL near ptr proc_2 +003 00034B E8D4FF CALL near ptr proc_2 +004 00034E E8D1FF CALL near ptr proc_2 +005 000351 E8CEFF CALL near ptr proc_2 +006 000354 E8CBFF CALL near ptr proc_2 +007 000357 E8C8FF CALL near ptr proc_2 +008 00035A E8C5FF CALL near ptr proc_2 +009 00035D E8C2FF CALL near ptr proc_2 +010 000360 E8BFFF CALL near ptr proc_2 +011 000363 5D POP bp +012 000364 C3 RET + + proc_1 ENDP + + main PROC NEAR +000 000365 55 PUSH bp +001 000366 8BEC MOV bp, sp +002 000368 83EC08 SUB sp, 8 +003 00036B B89401 MOV ax, 194h +004 00036E 50 PUSH ax +005 00036F E8D90B CALL near ptr printf +006 000372 59 POP cx +007 000373 8D46FC LEA ax, [bp-4] +008 000376 50 PUSH ax +009 000377 B8B001 MOV ax, 1B0h +010 00037A 50 PUSH ax +011 00037B E85614 CALL near ptr scanf +012 00037E 59 POP cx +013 00037F 59 POP cx +014 000380 FF76FE PUSH word ptr [bp-2] +015 000383 FF76FC PUSH word ptr [bp-4] +016 000386 B8B401 MOV ax, 1B4h +017 000389 50 PUSH ax +018 00038A E8BE0B CALL near ptr printf +019 00038D 83C406 ADD sp, 6 +020 000390 C746FA0000 MOV word ptr [bp-6], 0 +021 000395 C746F80100 MOV word ptr [bp-8], 1 + +023 0003A7 8B56FA L1: MOV dx, [bp-6] +024 0003AA 8B46F8 MOV ax, [bp-8] +025 0003AD 3B56FE CMP dx, [bp-2] +026 0003B0 7CEA JL L2 +027 0003B2 7F05 JG L3 +028 0003B4 3B46FC CMP ax, [bp-4] +029 0003B7 76E3 JBE L2 + +030 0003B9 B8CE01 L3: MOV ax, 1CEh +031 0003BC 50 PUSH ax +032 0003BD E88B0B CALL near ptr printf +033 0003C0 59 POP cx +034 0003C1 8BE5 MOV sp, bp +035 0003C3 5D POP bp +036 0003C4 C3 RET + +037 00039C E8A6FF L2: CALL near ptr proc_1 +038 00039F 8346F801 ADD word ptr [bp-8], 1 +039 0003A3 8356FA00 ADC word ptr [bp-6], 0 +040 JMP L1 ;Synthetic inst + + main ENDP + diff --git a/tests/outputs/BENCHFN.b b/tests/outputs/BENCHFN.b new file mode 100644 index 0000000..41ae251 --- /dev/null +++ b/tests/outputs/BENCHFN.b @@ -0,0 +1,89 @@ +/* + * Input file : ./tests/inputs/BENCHFN.EXE + * File type : EXE + */ + +#include "dcc.h" + + +void proc_4 () +/* Takes no parameters. + * High-level language prologue code. + */ +{ +} + + +void proc_3 () +/* Takes no parameters. + * High-level language prologue code. + */ +{ + proc_4 (); + proc_4 (); + proc_4 (); + proc_4 (); + proc_4 (); + proc_4 (); + proc_4 (); + proc_4 (); + proc_4 (); + proc_4 (); +} + + +void proc_2 () +/* Takes no parameters. + * High-level language prologue code. + */ +{ + proc_3 (); + proc_3 (); + proc_3 (); + proc_3 (); + proc_3 (); + proc_3 (); + proc_3 (); + proc_3 (); + proc_3 (); + proc_3 (); +} + + +void proc_1 () +/* Takes no parameters. + * High-level language prologue code. + */ +{ + proc_2 (); + proc_2 (); + proc_2 (); + proc_2 (); + proc_2 (); + proc_2 (); + proc_2 (); + proc_2 (); + proc_2 (); +} + + +void main () +/* Takes no parameters. + * High-level language prologue code. + */ +{ +long loc1; +long loc2; + + printf ("enter number of iterations "); + scanf ("%ld", &loc0); + printf ("executing %ld iterations\n", loc2); + loc1 = 1; + + while ((loc1 <= loc2)) { + proc_1 (); + loc1 = (loc1 + 1); + } /* end of while */ + printf ("finished\n"); +} + diff --git a/tests/outputs/BENCHLNG.EXE.a1 b/tests/outputs/BENCHLNG.EXE.a1 new file mode 100644 index 0000000..a3fa2ec --- /dev/null +++ b/tests/outputs/BENCHLNG.EXE.a1 @@ -0,0 +1,242 @@ + main PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 83EC14 SUB sp, 14h +003 000300 8D46FC LEA ax, [bp-4] +004 000303 50 PUSH ax +005 000304 B89401 MOV ax, 194h +006 000307 50 PUSH ax +007 000308 E85D15 CALL near ptr scanf +008 00030B 59 POP cx +009 00030C 59 POP cx +010 00030D FF76FE PUSH word ptr [bp-2] +011 000310 FF76FC PUSH word ptr [bp-4] +012 000313 B89801 MOV ax, 198h +013 000316 50 PUSH ax +014 000317 E8C50C CALL near ptr printf +015 00031A 83C406 ADD sp, 6 +016 00031D 8D46EC LEA ax, [bp-14h] +017 000320 50 PUSH ax +018 000321 B8B201 MOV ax, 1B2h +019 000324 50 PUSH ax +020 000325 E84015 CALL near ptr scanf +021 000328 59 POP cx +022 000329 59 POP cx +023 00032A 8D46F0 LEA ax, [bp-10h] +024 00032D 50 PUSH ax +025 00032E B8B601 MOV ax, 1B6h +026 000331 50 PUSH ax +027 000332 E83315 CALL near ptr scanf +028 000335 59 POP cx +029 000336 59 POP cx +030 000337 C746FA0000 MOV word ptr [bp-6], 0 +031 00033C C746F80100 MOV word ptr [bp-8], 1 +032 000341 E9E900 JMP L1 + +033 00042D 8B56FA L1: MOV dx, [bp-6] +034 000430 8B46F8 MOV ax, [bp-8] +035 000433 3B56FE CMP dx, [bp-2] +036 000436 7D03 JGE L2 +037 000438 E909FF JMP L3 + +038 000344 C746F60000 L3: MOV word ptr [bp-0Ah], 0 +039 000349 C746F40100 MOV word ptr [bp-0Ch], 1 +040 00034E E9C000 JMP L4 + +041 000411 837EF600 L4: CMP word ptr [bp-0Ah], 0 +042 000415 7D03 JGE L5 +043 000417 E937FF JMP L6 + +044 000351 8B56EE L6: MOV dx, [bp-12h] +045 000354 8B46EC MOV ax, [bp-14h] +046 000357 0346F0 ADD ax, [bp-10h] +047 00035A 1356F2 ADC dx, [bp-0Eh] +048 00035D 0346F4 ADD ax, [bp-0Ch] +049 000360 1356F6 ADC dx, [bp-0Ah] +050 000363 8956EE MOV [bp-12h], dx +051 000366 8946EC MOV [bp-14h], ax +052 000369 8B56EE MOV dx, [bp-12h] +053 00036C 8B46EC MOV ax, [bp-14h] +054 00036F D1FA SAR dx, 1 +055 000371 D1D8 RCR ax, 1 +056 000373 8956F2 MOV [bp-0Eh], dx +057 000376 8946F0 MOV [bp-10h], ax +058 000379 33D2 XOR dx, dx +059 00037B B80A00 MOV ax, 0Ah +060 00037E 52 PUSH dx +061 00037F 50 PUSH ax +062 000380 FF76F2 PUSH word ptr [bp-0Eh] +063 000383 FF76F0 PUSH word ptr [bp-10h] +064 000386 9AEB1D1000 CALL far ptr LMOD@ +065 00038B 8956EE MOV [bp-12h], dx +066 00038E 8946EC MOV [bp-14h], ax +067 000391 8B56F2 MOV dx, [bp-0Eh] +068 000394 8B46F0 MOV ax, [bp-10h] +069 000397 3B56F6 CMP dx, [bp-0Ah] +070 00039A 750A JNE L7 +071 00039C 3B46F4 CMP ax, [bp-0Ch] +072 00039F 7505 JNE L7 +073 0003A1 B80100 MOV ax, 1 +074 0003A4 EB02 JMP L8 + +075 0003A8 99 L8: CWD +076 0003A9 8956EE MOV [bp-12h], dx +077 0003AC 8946EC MOV [bp-14h], ax +078 0003AF 8B56EE MOV dx, [bp-12h] +079 0003B2 8B46EC MOV ax, [bp-14h] +080 0003B5 0B46F4 OR ax, [bp-0Ch] +081 0003B8 0B56F6 OR dx, [bp-0Ah] +082 0003BB 8956F2 MOV [bp-0Eh], dx +083 0003BE 8946F0 MOV [bp-10h], ax +084 0003C1 8B46F0 MOV ax, [bp-10h] +085 0003C4 0B46F2 OR ax, [bp-0Eh] +086 0003C7 7505 JNE L9 +087 0003C9 B80100 MOV ax, 1 +088 0003CC EB02 JMP L10 + +089 0003D0 99 L10: CWD +090 0003D1 8956EE MOV [bp-12h], dx +091 0003D4 8946EC MOV [bp-14h], ax +092 0003D7 8B56EE MOV dx, [bp-12h] +093 0003DA 8B46EC MOV ax, [bp-14h] +094 0003DD 0346F4 ADD ax, [bp-0Ch] +095 0003E0 1356F6 ADC dx, [bp-0Ah] +096 0003E3 8956F2 MOV [bp-0Eh], dx +097 0003E6 8946F0 MOV [bp-10h], ax +098 0003E9 8B56F2 MOV dx, [bp-0Eh] +099 0003EC 8B46F0 MOV ax, [bp-10h] +100 0003EF 3B56F6 CMP dx, [bp-0Ah] +101 0003F2 7C0C JL L11 +102 0003F4 7F05 JG L12 +103 0003F6 3B46F4 CMP ax, [bp-0Ch] +104 0003F9 7605 JBE L11 + +105 0003FB B80100 L12: MOV ax, 1 +106 0003FE EB02 JMP L13 + +107 000402 99 L13: CWD +108 000403 8956EE MOV [bp-12h], dx +109 000406 8946EC MOV [bp-14h], ax +110 000409 8346F401 ADD word ptr [bp-0Ch], 1 +111 00040D 8356F600 ADC word ptr [bp-0Ah], 0 +112 JMP L4 ;Synthetic inst + +113 000400 33C0 L11: XOR ax, ax +114 JMP L13 ;Synthetic inst + +115 0003CE 33C0 L9: XOR ax, ax +116 JMP L10 ;Synthetic inst + +117 0003A6 33C0 L7: XOR ax, ax +118 JMP L8 ;Synthetic inst + +119 00041A 7F09 L5: JG L14 +120 00041C 837EF428 CMP word ptr [bp-0Ch], 28h +121 000420 7703 JA L14 +122 000422 E92CFF JMP L6 + +123 000425 8346F801 L14: ADD word ptr [bp-8], 1 +124 000429 8356FA00 ADC word ptr [bp-6], 0 +125 JMP L1 ;Synthetic inst + +126 00043B 7F08 L2: JG L15 +127 00043D 3B46FC CMP ax, [bp-4] +128 000440 7703 JA L15 +129 000442 E9FFFE JMP L3 + +130 000445 FF76EE L15: PUSH word ptr [bp-12h] +131 000448 FF76EC PUSH word ptr [bp-14h] +132 00044B B8BA01 MOV ax, 1BAh +133 00044E 50 PUSH ax +134 00044F E88D0B CALL near ptr printf +135 000452 83C406 ADD sp, 6 +136 000455 8BE5 MOV sp, bp +137 000457 5D POP bp +138 000458 C3 RET + + main ENDP + + LMOD@ PROC FAR +000 001EEB B90200 MOV cx, 2 +001 001EEE EB03 JMP L16 + +002 001EF3 55 L16: PUSH bp +003 001EF4 56 PUSH si +004 001EF5 57 PUSH di +005 001EF6 8BEC MOV bp, sp +006 001EF8 8BF9 MOV di, cx +007 001EFA 8B460A MOV ax, [bp+0Ah] +008 001EFD 8B560C MOV dx, [bp+0Ch] +009 001F00 8B5E0E MOV bx, [bp+0Eh] +010 001F03 8B4E10 MOV cx, [bp+10h] +011 001F06 0BC9 OR cx, cx +012 001F08 7508 JNE L17 +013 001F0A 0BD2 OR dx, dx +014 001F0C 7469 JE L18 +015 001F0E 0BDB OR bx, bx +016 001F10 7465 JE L18 + +017 001F12 F7C70100 L17: TEST di, 1 +018 001F16 751C JNE L19 +019 001F18 0BD2 OR dx, dx +020 001F1A 790A JNS L20 +021 001F1C F7DA NEG dx +022 001F1E F7D8 NEG ax +023 001F20 83DA00 SBB dx, 0 +024 001F23 83CF0C OR di, 0Ch + +025 001F26 0BC9 L20: OR cx, cx +026 001F28 790A JNS L19 +027 001F2A F7D9 NEG cx +028 001F2C F7DB NEG bx +029 001F2E 83D900 SBB cx, 0 +030 001F31 83F704 XOR di, 4 + +031 001F34 8BE9 L19: MOV bp, cx +032 001F36 B92000 MOV cx, 20h +033 001F39 57 PUSH di +034 001F3A 33FF XOR di, di +035 001F3C 33F6 XOR si, si + +036 001F3E D1E0 L21: SHL ax, 1 +037 001F40 D1D2 RCL dx, 1 +038 001F42 D1D6 RCL si, 1 +039 001F44 D1D7 RCL di, 1 +040 001F46 3BFD CMP di, bp +041 001F48 720B JB L22 +042 001F4A 7704 JA L23 +043 001F4C 3BF3 CMP si, bx +044 001F4E 7205 JB L22 + +045 001F50 2BF3 L23: SUB si, bx +046 001F52 1BFD SBB di, bp +047 001F54 40 INC ax + +048 001F55 E2E7 L22: LOOP L21 +049 001F57 5B POP bx +050 001F58 F7C30200 TEST bx, 2 +051 001F5C 7406 JE L24 +052 001F5E 8BC6 MOV ax, si +053 001F60 8BD7 MOV dx, di +054 001F62 D1EB SHR bx, 1 + +055 001F64 F7C30400 L24: TEST bx, 4 +056 001F68 7407 JE L25 +057 001F6A F7DA NEG dx +058 001F6C F7D8 NEG ax +059 001F6E 83DA00 SBB dx, 0 + +060 001F71 5F L25: POP di +061 001F72 5E POP si +062 001F73 5D POP bp +063 001F74 CA0800 RETF 8 +065 001F77 F7F3 DIV bx +067 001F79 F7C70200 TEST di, 2 +068 001F7D 7402 JE L26 +069 001F7F 8BC2 MOV ax, dx + +070 001F81 33D2 L26: XOR dx, dx +071 001F83 EBEC JMP L25 + + LMOD@ ENDP + diff --git a/tests/outputs/BENCHLNG.EXE.a2 b/tests/outputs/BENCHLNG.EXE.a2 new file mode 100644 index 0000000..3fab437 --- /dev/null +++ b/tests/outputs/BENCHLNG.EXE.a2 @@ -0,0 +1,234 @@ + LMOD@ PROC FAR +000 001EEB B90200 MOV cx, 2 +002 001EF3 55 PUSH bp +003 001EF4 56 PUSH si +004 001EF5 57 PUSH di +005 001EF6 8BEC MOV bp, sp +006 001EF8 8BF9 MOV di, cx +007 001EFA 8B460A MOV ax, [bp+0Ah] +008 001EFD 8B560C MOV dx, [bp+0Ch] +009 001F00 8B5E0E MOV bx, [bp+0Eh] +010 001F03 8B4E10 MOV cx, [bp+10h] +011 001F06 0BC9 OR cx, cx +012 001F08 7508 JNE L1 +013 001F0A 0BD2 OR dx, dx +014 001F0C 7469 JE L2 +015 001F0E 0BDB OR bx, bx +016 001F10 7465 JE L2 + +017 001F12 F7C70100 L1: TEST di, 1 +018 001F16 751C JNE L3 +019 001F18 0BD2 OR dx, dx +020 001F1A 790A JNS L4 +021 001F1C F7DA NEG dx +022 001F1E F7D8 NEG ax +023 001F20 83DA00 SBB dx, 0 +024 001F23 83CF0C OR di, 0Ch + +025 001F26 0BC9 L4: OR cx, cx +026 001F28 790A JNS L3 +027 001F2A F7D9 NEG cx +028 001F2C F7DB NEG bx +029 001F2E 83D900 SBB cx, 0 +030 001F31 83F704 XOR di, 4 + +031 001F34 8BE9 L3: MOV bp, cx +032 001F36 B92000 MOV cx, 20h +033 001F39 57 PUSH di +034 001F3A 33FF XOR di, di +035 001F3C 33F6 XOR si, si + +036 001F3E D1E0 L5: SHL ax, 1 +037 001F40 D1D2 RCL dx, 1 +038 001F42 D1D6 RCL si, 1 +039 001F44 D1D7 RCL di, 1 +040 001F46 3BFD CMP di, bp +041 001F48 720B JB L6 +042 001F4A 7704 JA L7 +043 001F4C 3BF3 CMP si, bx +044 001F4E 7205 JB L6 + +045 001F50 2BF3 L7: SUB si, bx +046 001F52 1BFD SBB di, bp +047 001F54 40 INC ax + +048 001F55 E2E7 L6: LOOP L5 +049 001F57 5B POP bx +050 001F58 F7C30200 TEST bx, 2 +051 001F5C 7406 JE L8 +052 001F5E 8BC6 MOV ax, si +053 001F60 8BD7 MOV dx, di +054 001F62 D1EB SHR bx, 1 + +055 001F64 F7C30400 L8: TEST bx, 4 +056 001F68 7407 JE L9 +057 001F6A F7DA NEG dx +058 001F6C F7D8 NEG ax +059 001F6E 83DA00 SBB dx, 0 + +060 001F71 5F L9: POP di +061 001F72 5E POP si +062 001F73 5D POP bp +063 001F74 CA0800 RETF 8 + +064 L2: MOV tmp, dx:ax ;Synthetic inst +065 001F77 F7F3 DIV bx +066 MOD bx ;Synthetic inst +067 001F79 F7C70200 TEST di, 2 +068 001F7D 7402 JE L10 +069 001F7F 8BC2 MOV ax, dx + +070 001F81 33D2 L10: XOR dx, dx +071 001F83 EBEC JMP L9 + + LMOD@ ENDP + + main PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 83EC14 SUB sp, 14h +003 000300 8D46FC LEA ax, [bp-4] +004 000303 50 PUSH ax +005 000304 B89401 MOV ax, 194h +006 000307 50 PUSH ax +007 000308 E85D15 CALL near ptr scanf +008 00030B 59 POP cx +009 00030C 59 POP cx +010 00030D FF76FE PUSH word ptr [bp-2] +011 000310 FF76FC PUSH word ptr [bp-4] +012 000313 B89801 MOV ax, 198h +013 000316 50 PUSH ax +014 000317 E8C50C CALL near ptr printf +015 00031A 83C406 ADD sp, 6 +016 00031D 8D46EC LEA ax, [bp-14h] +017 000320 50 PUSH ax +018 000321 B8B201 MOV ax, 1B2h +019 000324 50 PUSH ax +020 000325 E84015 CALL near ptr scanf +021 000328 59 POP cx +022 000329 59 POP cx +023 00032A 8D46F0 LEA ax, [bp-10h] +024 00032D 50 PUSH ax +025 00032E B8B601 MOV ax, 1B6h +026 000331 50 PUSH ax +027 000332 E83315 CALL near ptr scanf +028 000335 59 POP cx +029 000336 59 POP cx +030 000337 C746FA0000 MOV word ptr [bp-6], 0 +031 00033C C746F80100 MOV word ptr [bp-8], 1 + +033 00042D 8B56FA L11: MOV dx, [bp-6] +034 000430 8B46F8 MOV ax, [bp-8] +035 000433 3B56FE CMP dx, [bp-2] +036 000436 7D03 JGE L12 + +038 000344 C746F60000 L13: MOV word ptr [bp-0Ah], 0 +039 000349 C746F40100 MOV word ptr [bp-0Ch], 1 + +041 000411 837EF600 L14: CMP word ptr [bp-0Ah], 0 +042 000415 7D03 JGE L15 + +044 000351 8B56EE L16: MOV dx, [bp-12h] +045 000354 8B46EC MOV ax, [bp-14h] +046 000357 0346F0 ADD ax, [bp-10h] +047 00035A 1356F2 ADC dx, [bp-0Eh] +048 00035D 0346F4 ADD ax, [bp-0Ch] +049 000360 1356F6 ADC dx, [bp-0Ah] +050 000363 8956EE MOV [bp-12h], dx +051 000366 8946EC MOV [bp-14h], ax +052 000369 8B56EE MOV dx, [bp-12h] +053 00036C 8B46EC MOV ax, [bp-14h] +054 00036F D1FA SAR dx, 1 +055 000371 D1D8 RCR ax, 1 +056 000373 8956F2 MOV [bp-0Eh], dx +057 000376 8946F0 MOV [bp-10h], ax +058 000379 33D2 XOR dx, dx +059 00037B B80A00 MOV ax, 0Ah +060 00037E 52 PUSH dx +061 00037F 50 PUSH ax +062 000380 FF76F2 PUSH word ptr [bp-0Eh] +063 000383 FF76F0 PUSH word ptr [bp-10h] +064 000386 9AEB1D1000 CALL far ptr LMOD@ +065 00038B 8956EE MOV [bp-12h], dx +066 00038E 8946EC MOV [bp-14h], ax +067 000391 8B56F2 MOV dx, [bp-0Eh] +068 000394 8B46F0 MOV ax, [bp-10h] +069 000397 3B56F6 CMP dx, [bp-0Ah] +070 00039A 750A JNE L17 +071 00039C 3B46F4 CMP ax, [bp-0Ch] +072 00039F 7505 JNE L17 +073 0003A1 B80100 MOV ax, 1 + +075 0003A8 99 L18: CWD +076 0003A9 8956EE MOV [bp-12h], dx +077 0003AC 8946EC MOV [bp-14h], ax +078 0003AF 8B56EE MOV dx, [bp-12h] +079 0003B2 8B46EC MOV ax, [bp-14h] +080 0003B5 0B46F4 OR ax, [bp-0Ch] +081 0003B8 0B56F6 OR dx, [bp-0Ah] +082 0003BB 8956F2 MOV [bp-0Eh], dx +083 0003BE 8946F0 MOV [bp-10h], ax +084 0003C1 8B46F0 MOV ax, [bp-10h] +085 0003C4 0B46F2 OR ax, [bp-0Eh] +086 0003C7 7505 JNE L19 +087 0003C9 B80100 MOV ax, 1 + +089 0003D0 99 L20: CWD +090 0003D1 8956EE MOV [bp-12h], dx +091 0003D4 8946EC MOV [bp-14h], ax +092 0003D7 8B56EE MOV dx, [bp-12h] +093 0003DA 8B46EC MOV ax, [bp-14h] +094 0003DD 0346F4 ADD ax, [bp-0Ch] +095 0003E0 1356F6 ADC dx, [bp-0Ah] +096 0003E3 8956F2 MOV [bp-0Eh], dx +097 0003E6 8946F0 MOV [bp-10h], ax +098 0003E9 8B56F2 MOV dx, [bp-0Eh] +099 0003EC 8B46F0 MOV ax, [bp-10h] +100 0003EF 3B56F6 CMP dx, [bp-0Ah] +101 0003F2 7C0C JL L21 +102 0003F4 7F05 JG L22 +103 0003F6 3B46F4 CMP ax, [bp-0Ch] +104 0003F9 7605 JBE L21 + +105 0003FB B80100 L22: MOV ax, 1 + +107 000402 99 L23: CWD +108 000403 8956EE MOV [bp-12h], dx +109 000406 8946EC MOV [bp-14h], ax +110 000409 8346F401 ADD word ptr [bp-0Ch], 1 +111 00040D 8356F600 ADC word ptr [bp-0Ah], 0 +112 JMP L14 ;Synthetic inst + +113 000400 33C0 L21: XOR ax, ax +114 JMP L23 ;Synthetic inst + +115 0003CE 33C0 L19: XOR ax, ax +116 JMP L20 ;Synthetic inst + +117 0003A6 33C0 L17: XOR ax, ax +118 JMP L18 ;Synthetic inst + +119 00041A 7F09 L15: JG L24 +120 00041C 837EF428 CMP word ptr [bp-0Ch], 28h +121 000420 7703 JA L24 + +123 000425 8346F801 L24: ADD word ptr [bp-8], 1 +124 000429 8356FA00 ADC word ptr [bp-6], 0 +125 JMP L11 ;Synthetic inst + +126 00043B 7F08 L12: JG L25 +127 00043D 3B46FC CMP ax, [bp-4] +128 000440 7703 JA L25 + +130 000445 FF76EE L25: PUSH word ptr [bp-12h] +131 000448 FF76EC PUSH word ptr [bp-14h] +132 00044B B8BA01 MOV ax, 1BAh +133 00044E 50 PUSH ax +134 00044F E88D0B CALL near ptr printf +135 000452 83C406 ADD sp, 6 +136 000455 8BE5 MOV sp, bp +137 000457 5D POP bp +138 000458 C3 RET + + main ENDP + diff --git a/tests/outputs/BENCHLNG.b b/tests/outputs/BENCHLNG.b new file mode 100644 index 0000000..3c40ea3 --- /dev/null +++ b/tests/outputs/BENCHLNG.b @@ -0,0 +1,158 @@ +/* + * Input file : ./tests/inputs/BENCHLNG.EXE + * File type : EXE + */ + +#include "dcc.h" + + +long LMOD@ (long arg0, int arg2int arg3) +/* Takes 8 bytes of parameters. + * Runtime support routine of the compiler. + * Untranslatable routine. Assembler provided. + * Return value in registers dx:ax. + * Pascal calling convention. + */ +{ + MOV cx, 2 + PUSH bp + PUSH si + PUSH di + MOV bp, sp + MOV di, cx + MOV ax, [bp+0Ah] + MOV dx, [bp+0Ch] + MOV bx, [bp+0Eh] + MOV cx, [bp+10h] + CMP cx, 0 + JNE L1 + OR dx, dx + JE L2 + OR bx, bx + JE L2 + +L1: TEST di, 1 + JNE L3 + OR dx, dx + JNS L4 + NEG dx + NEG ax + SBB dx, 0 + OR di, 0Ch + +L4: OR cx, cx + JNS L3 + NEG cx + NEG bx + SBB cx, 0 + XOR di, 4 + +L3: MOV bp, cx + MOV cx, 20h + PUSH di + XOR di, 0 + XOR si, 0 + +L5: SHL ax, 1 + RCL dx, 1 + RCL si, 1 + RCL di, 1 + CMP di, bp + JB L6 + JA L7 + CMP si, bx + JB L6 + +L7: SUB si, bx + SBB di, bp + INC ax + +L6: LOOP L5 + POP bx + TEST bx, 2 + JE L8 + MOV ax, si + MOV dx, di + SHR bx, 1 + +L8: TEST bx, 4 + JE L9 + NEG dx + NEG ax + SBB dx, 0 + +L9: POP di + POP si + POP bp + RETF 8 + +L2: MOV tmp, dx:ax ;Synthetic inst + DIV bx + MOD bx ;Synthetic inst + TEST di, 2 + JE L10 + MOV ax, dx + +L10: XOR dx, dx + JMP L9 +} + + +void main () +/* Takes no parameters. + * High-level language prologue code. + */ +{ +long loc1; +long loc2; +long loc3; +long loc4; +long loc5; +int loc6; /* ax */ + + scanf ("%ld", &loc0); + printf ("executing %ld iterations\n", loc5); + scanf ("%ld", &loc2); + scanf ("%ld", &loc3); + loc3 = 1; + + while ((loc3 <= loc5)) { + loc2 = 1; + + while ((loc2 <= 40)) { + loc4 = ((loc4 + loc1) + loc2); + loc1 = (loc4 >> 1); + loc4 = LMOD@ (loc1, 10); + + if (loc1 == loc2) { + loc6 = 1; + } + else { + loc6 = 0; + } + loc4 = loc6; + loc1 = (loc4 | loc2); + + if ((loc3 | loc9) == 0) { + loc6 = 1; + } + else { + loc6 = 0; + } + loc4 = loc6; + loc1 = (loc4 + loc2); + + if (loc1 > loc2) { + loc6 = 1; + } + else { + loc6 = 0; + } + loc4 = loc6; + loc2 = (loc2 + 1); + } /* end of while */ + loc3 = (loc3 + 1); + } /* end of while */ + printf ("a=%d\n", loc4); +} + diff --git a/tests/outputs/BENCHMUL.EXE.a1 b/tests/outputs/BENCHMUL.EXE.a1 new file mode 100644 index 0000000..9e41585 --- /dev/null +++ b/tests/outputs/BENCHMUL.EXE.a1 @@ -0,0 +1,101 @@ + main PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 83EC0C SUB sp, 0Ch +003 000300 56 PUSH si +004 000301 B89401 MOV ax, 194h +005 000304 50 PUSH ax +006 000305 E8530C CALL near ptr printf +007 000308 59 POP cx +008 000309 8D46FC LEA ax, [bp-4] +009 00030C 50 PUSH ax +010 00030D B8B001 MOV ax, 1B0h +011 000310 50 PUSH ax +012 000311 E8D014 CALL near ptr scanf +013 000314 59 POP cx +014 000315 59 POP cx +015 000316 FF76FE PUSH word ptr [bp-2] +016 000319 FF76FC PUSH word ptr [bp-4] +017 00031C B8B401 MOV ax, 1B4h +018 00031F 50 PUSH ax +019 000320 E8380C CALL near ptr printf +020 000323 83C406 ADD sp, 6 +021 000326 8D46F4 LEA ax, [bp-0Ch] +022 000329 50 PUSH ax +023 00032A B8CE01 MOV ax, 1CEh +024 00032D 50 PUSH ax +025 00032E E8B314 CALL near ptr scanf +026 000331 59 POP cx +027 000332 59 POP cx +028 000333 8D46F6 LEA ax, [bp-0Ah] +029 000336 50 PUSH ax +030 000337 B8D101 MOV ax, 1D1h +031 00033A 50 PUSH ax +032 00033B E8A614 CALL near ptr scanf +033 00033E 59 POP cx +034 00033F 59 POP cx +035 000340 C746FA0000 MOV word ptr [bp-6], 0 +036 000345 C746F80100 MOV word ptr [bp-8], 1 +037 00034A EB66 JMP L1 + +038 0003B2 8B56FA L1: MOV dx, [bp-6] +039 0003B5 8B46F8 MOV ax, [bp-8] +040 0003B8 3B56FE CMP dx, [bp-2] +041 0003BB 7C8F JL L2 +042 0003BD 7F05 JG L3 +043 0003BF 3B46FC CMP ax, [bp-4] +044 0003C2 7688 JBE L2 + +045 0003C4 FF76F4 L3: PUSH word ptr [bp-0Ch] +046 0003C7 B8D401 MOV ax, 1D4h +047 0003CA 50 PUSH ax +048 0003CB E88D0B CALL near ptr printf +049 0003CE 59 POP cx +050 0003CF 59 POP cx +051 0003D0 5E POP si +052 0003D1 8BE5 MOV sp, bp +053 0003D3 5D POP bp +054 0003D4 C3 RET + +055 00034C BE0100 L2: MOV si, 1 +056 00034F EB54 JMP L4 + +057 0003A5 83FE28 L4: CMP si, 28h +058 0003A8 7EA7 JLE L5 +059 0003AA 8346F801 ADD word ptr [bp-8], 1 +060 0003AE 8356FA00 ADC word ptr [bp-6], 0 +061 JMP L1 ;Synthetic inst + +062 000351 8B46F4 L5: MOV ax, [bp-0Ch] +063 000354 F766F4 MUL word ptr [bp-0Ch] +064 000357 F766F4 MUL word ptr [bp-0Ch] +065 00035A F766F4 MUL word ptr [bp-0Ch] +066 00035D F766F4 MUL word ptr [bp-0Ch] +067 000360 F766F4 MUL word ptr [bp-0Ch] +068 000363 F766F4 MUL word ptr [bp-0Ch] +069 000366 F766F4 MUL word ptr [bp-0Ch] +070 000369 F766F4 MUL word ptr [bp-0Ch] +071 00036C F766F4 MUL word ptr [bp-0Ch] +072 00036F F766F4 MUL word ptr [bp-0Ch] +073 000372 F766F4 MUL word ptr [bp-0Ch] +074 000375 F766F4 MUL word ptr [bp-0Ch] +075 000378 F766F4 MUL word ptr [bp-0Ch] +076 00037B F766F4 MUL word ptr [bp-0Ch] +077 00037E F766F4 MUL word ptr [bp-0Ch] +078 000381 F766F4 MUL word ptr [bp-0Ch] +079 000384 F766F4 MUL word ptr [bp-0Ch] +080 000387 F766F4 MUL word ptr [bp-0Ch] +081 00038A F766F4 MUL word ptr [bp-0Ch] +082 00038D F766F4 MUL word ptr [bp-0Ch] +083 000390 F766F4 MUL word ptr [bp-0Ch] +084 000393 F766F4 MUL word ptr [bp-0Ch] +085 000396 F766F4 MUL word ptr [bp-0Ch] +086 000399 F766F4 MUL word ptr [bp-0Ch] +087 00039C BA0300 MOV dx, 3 +088 00039F F7E2 MUL dx +089 0003A1 8946F4 MOV [bp-0Ch], ax +090 0003A4 46 INC si +091 JMP L4 ;Synthetic inst + + main ENDP + diff --git a/tests/outputs/BENCHMUL.EXE.a2 b/tests/outputs/BENCHMUL.EXE.a2 new file mode 100644 index 0000000..39303a6 --- /dev/null +++ b/tests/outputs/BENCHMUL.EXE.a2 @@ -0,0 +1,99 @@ + main PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 83EC0C SUB sp, 0Ch +003 000300 56 PUSH si +004 000301 B89401 MOV ax, 194h +005 000304 50 PUSH ax +006 000305 E8530C CALL near ptr printf +007 000308 59 POP cx +008 000309 8D46FC LEA ax, [bp-4] +009 00030C 50 PUSH ax +010 00030D B8B001 MOV ax, 1B0h +011 000310 50 PUSH ax +012 000311 E8D014 CALL near ptr scanf +013 000314 59 POP cx +014 000315 59 POP cx +015 000316 FF76FE PUSH word ptr [bp-2] +016 000319 FF76FC PUSH word ptr [bp-4] +017 00031C B8B401 MOV ax, 1B4h +018 00031F 50 PUSH ax +019 000320 E8380C CALL near ptr printf +020 000323 83C406 ADD sp, 6 +021 000326 8D46F4 LEA ax, [bp-0Ch] +022 000329 50 PUSH ax +023 00032A B8CE01 MOV ax, 1CEh +024 00032D 50 PUSH ax +025 00032E E8B314 CALL near ptr scanf +026 000331 59 POP cx +027 000332 59 POP cx +028 000333 8D46F6 LEA ax, [bp-0Ah] +029 000336 50 PUSH ax +030 000337 B8D101 MOV ax, 1D1h +031 00033A 50 PUSH ax +032 00033B E8A614 CALL near ptr scanf +033 00033E 59 POP cx +034 00033F 59 POP cx +035 000340 C746FA0000 MOV word ptr [bp-6], 0 +036 000345 C746F80100 MOV word ptr [bp-8], 1 + +038 0003B2 8B56FA L1: MOV dx, [bp-6] +039 0003B5 8B46F8 MOV ax, [bp-8] +040 0003B8 3B56FE CMP dx, [bp-2] +041 0003BB 7C8F JL L2 +042 0003BD 7F05 JG L3 +043 0003BF 3B46FC CMP ax, [bp-4] +044 0003C2 7688 JBE L2 + +045 0003C4 FF76F4 L3: PUSH word ptr [bp-0Ch] +046 0003C7 B8D401 MOV ax, 1D4h +047 0003CA 50 PUSH ax +048 0003CB E88D0B CALL near ptr printf +049 0003CE 59 POP cx +050 0003CF 59 POP cx +051 0003D0 5E POP si +052 0003D1 8BE5 MOV sp, bp +053 0003D3 5D POP bp +054 0003D4 C3 RET + +055 00034C BE0100 L2: MOV si, 1 + +057 0003A5 83FE28 L4: CMP si, 28h +058 0003A8 7EA7 JLE L5 +059 0003AA 8346F801 ADD word ptr [bp-8], 1 +060 0003AE 8356FA00 ADC word ptr [bp-6], 0 +061 JMP L1 ;Synthetic inst + +062 000351 8B46F4 L5: MOV ax, [bp-0Ch] +063 000354 F766F4 MUL word ptr [bp-0Ch] +064 000357 F766F4 MUL word ptr [bp-0Ch] +065 00035A F766F4 MUL word ptr [bp-0Ch] +066 00035D F766F4 MUL word ptr [bp-0Ch] +067 000360 F766F4 MUL word ptr [bp-0Ch] +068 000363 F766F4 MUL word ptr [bp-0Ch] +069 000366 F766F4 MUL word ptr [bp-0Ch] +070 000369 F766F4 MUL word ptr [bp-0Ch] +071 00036C F766F4 MUL word ptr [bp-0Ch] +072 00036F F766F4 MUL word ptr [bp-0Ch] +073 000372 F766F4 MUL word ptr [bp-0Ch] +074 000375 F766F4 MUL word ptr [bp-0Ch] +075 000378 F766F4 MUL word ptr [bp-0Ch] +076 00037B F766F4 MUL word ptr [bp-0Ch] +077 00037E F766F4 MUL word ptr [bp-0Ch] +078 000381 F766F4 MUL word ptr [bp-0Ch] +079 000384 F766F4 MUL word ptr [bp-0Ch] +080 000387 F766F4 MUL word ptr [bp-0Ch] +081 00038A F766F4 MUL word ptr [bp-0Ch] +082 00038D F766F4 MUL word ptr [bp-0Ch] +083 000390 F766F4 MUL word ptr [bp-0Ch] +084 000393 F766F4 MUL word ptr [bp-0Ch] +085 000396 F766F4 MUL word ptr [bp-0Ch] +086 000399 F766F4 MUL word ptr [bp-0Ch] +087 00039C BA0300 MOV dx, 3 +088 00039F F7E2 MUL dx +089 0003A1 8946F4 MOV [bp-0Ch], ax +090 0003A4 46 INC si +091 JMP L4 ;Synthetic inst + + main ENDP + diff --git a/tests/outputs/BENCHMUL.b b/tests/outputs/BENCHMUL.b new file mode 100644 index 0000000..e2f9357 --- /dev/null +++ b/tests/outputs/BENCHMUL.b @@ -0,0 +1,38 @@ +/* + * Input file : ./tests/inputs/BENCHMUL.EXE + * File type : EXE + */ + +#include "dcc.h" + + +void main () +/* Takes no parameters. + * High-level language prologue code. + */ +{ +int loc1; +int loc2; +long loc3; +long loc4; +int loc5; + + printf ("enter number of iterations\n"); + scanf ("%ld", &loc0); + printf ("executing %ld iterations\n", loc4); + scanf ("%d", &loc1); + scanf ("%d", &loc2); + loc3 = 1; + + while ((loc3 <= loc4)) { + loc5 = 1; + + while ((loc5 <= 40)) { + loc1 = (((((((((((((((((((((((((loc1 * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * 3); + loc5 = (loc5 + 1); + } /* end of while */ + loc3 = (loc3 + 1); + } /* end of while */ + printf ("a=%d\n", loc1); +} + diff --git a/tests/outputs/BENCHMUS.EXE.a1 b/tests/outputs/BENCHMUS.EXE.a1 new file mode 100644 index 0000000..0edcb0c --- /dev/null +++ b/tests/outputs/BENCHMUS.EXE.a1 @@ -0,0 +1,91 @@ + main PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 83EC0A SUB sp, 0Ah +003 000300 56 PUSH si +004 000301 57 PUSH di +005 000302 B8A801 MOV ax, 1A8h +006 000305 50 PUSH ax +007 000306 E8240C CALL near ptr printf +008 000309 59 POP cx +009 00030A 8D46FC LEA ax, [bp-4] +010 00030D 50 PUSH ax +011 00030E B8C401 MOV ax, 1C4h +012 000311 50 PUSH ax +013 000312 E8A114 CALL near ptr scanf +014 000315 59 POP cx +015 000316 59 POP cx +016 000317 FF76FE PUSH word ptr [bp-2] +017 00031A FF76FC PUSH word ptr [bp-4] +018 00031D B8C801 MOV ax, 1C8h +019 000320 50 PUSH ax +020 000321 E8090C CALL near ptr printf +021 000324 83C406 ADD sp, 6 +022 000327 BE1400 MOV si, 14h +023 00032A 8976F6 MOV [bp-0Ah], si +024 00032D C746FA0000 MOV word ptr [bp-6], 0 +025 000332 C746F80100 MOV word ptr [bp-8], 1 +026 000337 EB4C JMP L1 + +027 000385 8B56FA L1: MOV dx, [bp-6] +028 000388 8B46F8 MOV ax, [bp-8] +029 00038B 3B56FE CMP dx, [bp-2] +030 00038E 7CA9 JL L2 +031 000390 7F05 JG L3 +032 000392 3B46FC CMP ax, [bp-4] +033 000395 76A2 JBE L2 + +034 000397 56 L3: PUSH si +035 000398 B8E201 MOV ax, 1E2h +036 00039B 50 PUSH ax +037 00039C E88E0B CALL near ptr printf +038 00039F 59 POP cx +039 0003A0 59 POP cx +040 0003A1 5F POP di +041 0003A2 5E POP si +042 0003A3 8BE5 MOV sp, bp +043 0003A5 5D POP bp +044 0003A6 C3 RET + +045 000339 BF0100 L2: MOV di, 1 +046 00033C EB3A JMP L4 + +047 000378 83FF28 L4: CMP di, 28h +048 00037B 7EC1 JLE L5 +049 00037D 8346F801 ADD word ptr [bp-8], 1 +050 000381 8356FA00 ADC word ptr [bp-6], 0 +051 JMP L1 ;Synthetic inst + +052 00033E 8BC6 L5: MOV ax, si +053 000340 F7E6 MUL si +054 000342 F7E6 MUL si +055 000344 F7E6 MUL si +056 000346 F7E6 MUL si +057 000348 F7E6 MUL si +058 00034A F7E6 MUL si +059 00034C F7E6 MUL si +060 00034E F7E6 MUL si +061 000350 F7E6 MUL si +062 000352 F7E6 MUL si +063 000354 F7E6 MUL si +064 000356 F7E6 MUL si +065 000358 F7E6 MUL si +066 00035A F7E6 MUL si +067 00035C F7E6 MUL si +068 00035E F7E6 MUL si +069 000360 F7E6 MUL si +070 000362 F7E6 MUL si +071 000364 F7E6 MUL si +072 000366 F7E6 MUL si +073 000368 F7E6 MUL si +074 00036A F7E6 MUL si +075 00036C F7E6 MUL si +076 00036E F7E6 MUL si +077 000370 BA0300 MOV dx, 3 +078 000373 F7E2 MUL dx +079 000375 8BF0 MOV si, ax +080 000377 47 INC di +081 JMP L4 ;Synthetic inst + + main ENDP + diff --git a/tests/outputs/BENCHMUS.EXE.a2 b/tests/outputs/BENCHMUS.EXE.a2 new file mode 100644 index 0000000..343d780 --- /dev/null +++ b/tests/outputs/BENCHMUS.EXE.a2 @@ -0,0 +1,89 @@ + main PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 83EC0A SUB sp, 0Ah +003 000300 56 PUSH si +004 000301 57 PUSH di +005 000302 B8A801 MOV ax, 1A8h +006 000305 50 PUSH ax +007 000306 E8240C CALL near ptr printf +008 000309 59 POP cx +009 00030A 8D46FC LEA ax, [bp-4] +010 00030D 50 PUSH ax +011 00030E B8C401 MOV ax, 1C4h +012 000311 50 PUSH ax +013 000312 E8A114 CALL near ptr scanf +014 000315 59 POP cx +015 000316 59 POP cx +016 000317 FF76FE PUSH word ptr [bp-2] +017 00031A FF76FC PUSH word ptr [bp-4] +018 00031D B8C801 MOV ax, 1C8h +019 000320 50 PUSH ax +020 000321 E8090C CALL near ptr printf +021 000324 83C406 ADD sp, 6 +022 000327 BE1400 MOV si, 14h +023 00032A 8976F6 MOV [bp-0Ah], si +024 00032D C746FA0000 MOV word ptr [bp-6], 0 +025 000332 C746F80100 MOV word ptr [bp-8], 1 + +027 000385 8B56FA L1: MOV dx, [bp-6] +028 000388 8B46F8 MOV ax, [bp-8] +029 00038B 3B56FE CMP dx, [bp-2] +030 00038E 7CA9 JL L2 +031 000390 7F05 JG L3 +032 000392 3B46FC CMP ax, [bp-4] +033 000395 76A2 JBE L2 + +034 000397 56 L3: PUSH si +035 000398 B8E201 MOV ax, 1E2h +036 00039B 50 PUSH ax +037 00039C E88E0B CALL near ptr printf +038 00039F 59 POP cx +039 0003A0 59 POP cx +040 0003A1 5F POP di +041 0003A2 5E POP si +042 0003A3 8BE5 MOV sp, bp +043 0003A5 5D POP bp +044 0003A6 C3 RET + +045 000339 BF0100 L2: MOV di, 1 + +047 000378 83FF28 L4: CMP di, 28h +048 00037B 7EC1 JLE L5 +049 00037D 8346F801 ADD word ptr [bp-8], 1 +050 000381 8356FA00 ADC word ptr [bp-6], 0 +051 JMP L1 ;Synthetic inst + +052 00033E 8BC6 L5: MOV ax, si +053 000340 F7E6 MUL si +054 000342 F7E6 MUL si +055 000344 F7E6 MUL si +056 000346 F7E6 MUL si +057 000348 F7E6 MUL si +058 00034A F7E6 MUL si +059 00034C F7E6 MUL si +060 00034E F7E6 MUL si +061 000350 F7E6 MUL si +062 000352 F7E6 MUL si +063 000354 F7E6 MUL si +064 000356 F7E6 MUL si +065 000358 F7E6 MUL si +066 00035A F7E6 MUL si +067 00035C F7E6 MUL si +068 00035E F7E6 MUL si +069 000360 F7E6 MUL si +070 000362 F7E6 MUL si +071 000364 F7E6 MUL si +072 000366 F7E6 MUL si +073 000368 F7E6 MUL si +074 00036A F7E6 MUL si +075 00036C F7E6 MUL si +076 00036E F7E6 MUL si +077 000370 BA0300 MOV dx, 3 +078 000373 F7E2 MUL dx +079 000375 8BF0 MOV si, ax +080 000377 47 INC di +081 JMP L4 ;Synthetic inst + + main ENDP + diff --git a/tests/outputs/BENCHMUS.b b/tests/outputs/BENCHMUS.b new file mode 100644 index 0000000..c36ff5c --- /dev/null +++ b/tests/outputs/BENCHMUS.b @@ -0,0 +1,38 @@ +/* + * Input file : ./tests/inputs/BENCHMUS.EXE + * File type : EXE + */ + +#include "dcc.h" + + +void main () +/* Takes no parameters. + * High-level language prologue code. + */ +{ +int loc1; +long loc2; +long loc3; +int loc4; +int loc5; + + printf ("enter number of iterations\n"); + scanf ("%ld", &loc0); + printf ("executing %ld iterations\n", loc3); + loc4 = 20; + loc1 = loc4; + loc2 = 1; + + while ((loc2 <= loc3)) { + loc5 = 1; + + while ((loc5 <= 40)) { + loc4 = (((((((((((((((((((((((((loc4 * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * 3); + loc5 = (loc5 + 1); + } /* end of while */ + loc2 = (loc2 + 1); + } /* end of while */ + printf ("a=%d\n", loc4); +} + diff --git a/tests/outputs/BENCHSHO.EXE.a1 b/tests/outputs/BENCHSHO.EXE.a1 new file mode 100644 index 0000000..f16494f --- /dev/null +++ b/tests/outputs/BENCHSHO.EXE.a1 @@ -0,0 +1,113 @@ + main PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 83EC0E SUB sp, 0Eh +003 000300 8D46FC LEA ax, [bp-4] +004 000303 50 PUSH ax +005 000304 B89401 MOV ax, 194h +006 000307 50 PUSH ax +007 000308 E8E914 CALL near ptr scanf +008 00030B 59 POP cx +009 00030C 59 POP cx +010 00030D FF76FE PUSH word ptr [bp-2] +011 000310 FF76FC PUSH word ptr [bp-4] +012 000313 B89801 MOV ax, 198h +013 000316 50 PUSH ax +014 000317 E8510C CALL near ptr printf +015 00031A 83C406 ADD sp, 6 +016 00031D 8D46F2 LEA ax, [bp-0Eh] +017 000320 50 PUSH ax +018 000321 B8B201 MOV ax, 1B2h +019 000324 50 PUSH ax +020 000325 E8CC14 CALL near ptr scanf +021 000328 59 POP cx +022 000329 59 POP cx +023 00032A 8D46F4 LEA ax, [bp-0Ch] +024 00032D 50 PUSH ax +025 00032E B8B601 MOV ax, 1B6h +026 000331 50 PUSH ax +027 000332 E8BF14 CALL near ptr scanf +028 000335 59 POP cx +029 000336 59 POP cx +030 000337 C746FA0000 MOV word ptr [bp-6], 0 +031 00033C C746F80100 MOV word ptr [bp-8], 1 +032 000341 E97900 JMP L1 + +033 0003BD 8B56FA L1: MOV dx, [bp-6] +034 0003C0 8B46F8 MOV ax, [bp-8] +035 0003C3 3B56FE CMP dx, [bp-2] +036 0003C6 7D03 JGE L2 +037 0003C8 E979FF JMP L3 + +038 000344 C746F60100 L3: MOV word ptr [bp-0Ah], 1 +039 000349 EB64 JMP L4 + +040 0003AF 837EF628 L4: CMP word ptr [bp-0Ah], 28h +041 0003B3 7E96 JLE L5 +042 0003B5 8346F801 ADD word ptr [bp-8], 1 +043 0003B9 8356FA00 ADC word ptr [bp-6], 0 +044 JMP L1 ;Synthetic inst + +045 00034B 8B46F2 L5: MOV ax, [bp-0Eh] +046 00034E 0346F4 ADD ax, [bp-0Ch] +047 000351 0346F6 ADD ax, [bp-0Ah] +048 000354 8946F2 MOV [bp-0Eh], ax +049 000357 8B46F2 MOV ax, [bp-0Eh] +050 00035A D1F8 SAR ax, 1 +051 00035C 8946F4 MOV [bp-0Ch], ax +052 00035F 8B46F4 MOV ax, [bp-0Ch] +053 000362 BB0A00 MOV bx, 0Ah +054 000365 99 CWD +056 000366 F7FB IDIV bx +058 000368 8956F2 MOV [bp-0Eh], dx +059 00036B 8B46F4 MOV ax, [bp-0Ch] +060 00036E 3B46F6 CMP ax, [bp-0Ah] +061 000371 7505 JNE L6 +062 000373 B80100 MOV ax, 1 +063 000376 EB02 JMP L7 + +064 00037A 8946F2 L7: MOV [bp-0Eh], ax +065 00037D 8B46F2 MOV ax, [bp-0Eh] +066 000380 0B46F6 OR ax, [bp-0Ah] +067 000383 8946F4 MOV [bp-0Ch], ax +068 000386 8B46F4 MOV ax, [bp-0Ch] +069 000389 F7D8 NEG ax +070 00038B 1BC0 SBB ax, ax +071 00038D 40 INC ax +072 00038E 8946F2 MOV [bp-0Eh], ax +073 000391 8B46F2 MOV ax, [bp-0Eh] +074 000394 0346F6 ADD ax, [bp-0Ah] +075 000397 8946F4 MOV [bp-0Ch], ax +076 00039A 8B46F4 MOV ax, [bp-0Ch] +077 00039D 3B46F6 CMP ax, [bp-0Ah] +078 0003A0 7E05 JLE L8 +079 0003A2 B80100 MOV ax, 1 +080 0003A5 EB02 JMP L9 + +081 0003A9 8946F2 L9: MOV [bp-0Eh], ax +082 0003AC FF46F6 INC word ptr [bp-0Ah] +083 JMP L4 ;Synthetic inst + +084 0003A7 33C0 L8: XOR ax, ax +085 JMP L9 ;Synthetic inst + +086 000378 33C0 L6: XOR ax, ax +087 JMP L7 ;Synthetic inst + +088 0003CB 7F08 L2: JG L10 +089 0003CD 3B46FC CMP ax, [bp-4] +090 0003D0 7703 JA L10 +091 0003D2 E96FFF JMP L3 + +092 0003D5 FF76F2 L10: PUSH word ptr [bp-0Eh] +093 0003D8 B8BA01 MOV ax, 1BAh +094 0003DB 50 PUSH ax +095 0003DC E88C0B CALL near ptr printf +096 0003DF 59 POP cx +097 0003E0 59 POP cx +098 0003E1 8BE5 MOV sp, bp +099 0003E3 5D POP bp +100 0003E4 C3 RET + + main ENDP + diff --git a/tests/outputs/BENCHSHO.EXE.a2 b/tests/outputs/BENCHSHO.EXE.a2 new file mode 100644 index 0000000..c2b9e42 --- /dev/null +++ b/tests/outputs/BENCHSHO.EXE.a2 @@ -0,0 +1,109 @@ + main PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 83EC0E SUB sp, 0Eh +003 000300 8D46FC LEA ax, [bp-4] +004 000303 50 PUSH ax +005 000304 B89401 MOV ax, 194h +006 000307 50 PUSH ax +007 000308 E8E914 CALL near ptr scanf +008 00030B 59 POP cx +009 00030C 59 POP cx +010 00030D FF76FE PUSH word ptr [bp-2] +011 000310 FF76FC PUSH word ptr [bp-4] +012 000313 B89801 MOV ax, 198h +013 000316 50 PUSH ax +014 000317 E8510C CALL near ptr printf +015 00031A 83C406 ADD sp, 6 +016 00031D 8D46F2 LEA ax, [bp-0Eh] +017 000320 50 PUSH ax +018 000321 B8B201 MOV ax, 1B2h +019 000324 50 PUSH ax +020 000325 E8CC14 CALL near ptr scanf +021 000328 59 POP cx +022 000329 59 POP cx +023 00032A 8D46F4 LEA ax, [bp-0Ch] +024 00032D 50 PUSH ax +025 00032E B8B601 MOV ax, 1B6h +026 000331 50 PUSH ax +027 000332 E8BF14 CALL near ptr scanf +028 000335 59 POP cx +029 000336 59 POP cx +030 000337 C746FA0000 MOV word ptr [bp-6], 0 +031 00033C C746F80100 MOV word ptr [bp-8], 1 + +033 0003BD 8B56FA L1: MOV dx, [bp-6] +034 0003C0 8B46F8 MOV ax, [bp-8] +035 0003C3 3B56FE CMP dx, [bp-2] +036 0003C6 7D03 JGE L2 + +038 000344 C746F60100 L3: MOV word ptr [bp-0Ah], 1 + +040 0003AF 837EF628 L4: CMP word ptr [bp-0Ah], 28h +041 0003B3 7E96 JLE L5 +042 0003B5 8346F801 ADD word ptr [bp-8], 1 +043 0003B9 8356FA00 ADC word ptr [bp-6], 0 +044 JMP L1 ;Synthetic inst + +045 00034B 8B46F2 L5: MOV ax, [bp-0Eh] +046 00034E 0346F4 ADD ax, [bp-0Ch] +047 000351 0346F6 ADD ax, [bp-0Ah] +048 000354 8946F2 MOV [bp-0Eh], ax +049 000357 8B46F2 MOV ax, [bp-0Eh] +050 00035A D1F8 SAR ax, 1 +051 00035C 8946F4 MOV [bp-0Ch], ax +052 00035F 8B46F4 MOV ax, [bp-0Ch] +053 000362 BB0A00 MOV bx, 0Ah +054 000365 99 CWD +055 MOV tmp, dx:ax ;Synthetic inst +056 000366 F7FB IDIV bx +057 MOD bx ;Synthetic inst +058 000368 8956F2 MOV [bp-0Eh], dx +059 00036B 8B46F4 MOV ax, [bp-0Ch] +060 00036E 3B46F6 CMP ax, [bp-0Ah] +061 000371 7505 JNE L6 +062 000373 B80100 MOV ax, 1 + +064 00037A 8946F2 L7: MOV [bp-0Eh], ax +065 00037D 8B46F2 MOV ax, [bp-0Eh] +066 000380 0B46F6 OR ax, [bp-0Ah] +067 000383 8946F4 MOV [bp-0Ch], ax +068 000386 8B46F4 MOV ax, [bp-0Ch] +069 000389 F7D8 NEG ax +070 00038B 1BC0 SBB ax, ax +071 00038D 40 INC ax +072 00038E 8946F2 MOV [bp-0Eh], ax +073 000391 8B46F2 MOV ax, [bp-0Eh] +074 000394 0346F6 ADD ax, [bp-0Ah] +075 000397 8946F4 MOV [bp-0Ch], ax +076 00039A 8B46F4 MOV ax, [bp-0Ch] +077 00039D 3B46F6 CMP ax, [bp-0Ah] +078 0003A0 7E05 JLE L8 +079 0003A2 B80100 MOV ax, 1 + +081 0003A9 8946F2 L9: MOV [bp-0Eh], ax +082 0003AC FF46F6 INC word ptr [bp-0Ah] +083 JMP L4 ;Synthetic inst + +084 0003A7 33C0 L8: XOR ax, ax +085 JMP L9 ;Synthetic inst + +086 000378 33C0 L6: XOR ax, ax +087 JMP L7 ;Synthetic inst + +088 0003CB 7F08 L2: JG L10 +089 0003CD 3B46FC CMP ax, [bp-4] +090 0003D0 7703 JA L10 + +092 0003D5 FF76F2 L10: PUSH word ptr [bp-0Eh] +093 0003D8 B8BA01 MOV ax, 1BAh +094 0003DB 50 PUSH ax +095 0003DC E88C0B CALL near ptr printf +096 0003DF 59 POP cx +097 0003E0 59 POP cx +098 0003E1 8BE5 MOV sp, bp +099 0003E3 5D POP bp +100 0003E4 C3 RET + + main ENDP + diff --git a/tests/outputs/BENCHSHO.b b/tests/outputs/BENCHSHO.b new file mode 100644 index 0000000..7786608 --- /dev/null +++ b/tests/outputs/BENCHSHO.b @@ -0,0 +1,59 @@ +/* + * Input file : ./tests/inputs/BENCHSHO.EXE + * File type : EXE + */ + +#include "dcc.h" + + +void main () +/* Takes no parameters. + * High-level language prologue code. + */ +{ +int loc1; +int loc2; +int loc3; +long loc4; +long loc5; +int loc6; /* ax */ + + scanf ("%ld", &loc0); + printf ("executing %ld iterations\n", loc5); + scanf ("%ld", &loc1); + scanf ("%ld", &loc2); + loc4 = 1; + + while ((loc4 <= loc5)) { + loc3 = 1; + + while ((loc3 <= 40)) { + loc1 = ((loc1 + loc2) + loc3); + loc2 = (loc1 >> 1); + loc1 = (loc2 % 10); + + if (loc2 == loc3) { + loc6 = 1; + } + else { + loc6 = 0; + } + loc1 = loc6; + loc2 = (loc1 | loc3); + loc1 = !loc2; + loc2 = (loc1 + loc3); + + if (loc2 > loc3) { + loc6 = 1; + } + else { + loc6 = 0; + } + loc1 = loc6; + loc3 = (loc3 + 1); + } /* end of while */ + loc4 = (loc4 + 1); + } /* end of while */ + printf ("a=%d\n", loc1); +} + diff --git a/tests/outputs/BYTEOPS.EXE.a1 b/tests/outputs/BYTEOPS.EXE.a1 new file mode 100644 index 0000000..1ee2f72 --- /dev/null +++ b/tests/outputs/BYTEOPS.EXE.a1 @@ -0,0 +1,58 @@ + main PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 83EC02 SUB sp, 2 +003 000300 C646FEFF MOV byte ptr [bp-2], 0FFh +004 000304 C646FF8F MOV byte ptr [bp-1], 8Fh +005 000308 8A46FE MOV al, [bp-2] +006 00030B 0246FF ADD al, [bp-1] +007 00030E 8846FF MOV [bp-1], al +008 000311 8A46FE MOV al, [bp-2] +009 000314 2A46FF SUB al, [bp-1] +010 000317 8846FE MOV [bp-2], al +011 00031A 8A46FE MOV al, [bp-2] +012 00031D B400 MOV ah, 0 +013 00031F 8A56FF MOV dl, [bp-1] +014 000322 B600 MOV dh, 0 +015 000324 F7E2 MUL dx +016 000326 8846FE MOV [bp-2], al +017 000329 8A46FF MOV al, [bp-1] +018 00032C B400 MOV ah, 0 +019 00032E 8A56FE MOV dl, [bp-2] +020 000331 B600 MOV dh, 0 +021 000333 8BDA MOV bx, dx +022 000335 99 CWD +024 000336 F7FB IDIV bx +026 000338 8846FF MOV [bp-1], al +027 00033B 8A46FF MOV al, [bp-1] +028 00033E B400 MOV ah, 0 +029 000340 8A56FE MOV dl, [bp-2] +030 000343 B600 MOV dh, 0 +031 000345 8BDA MOV bx, dx +032 000347 99 CWD +034 000348 F7FB IDIV bx +036 00034A 8856FF MOV [bp-1], dl +037 00034D 8A46FE MOV al, [bp-2] +038 000350 B105 MOV cl, 5 +039 000352 D2E0 SHL al, cl +040 000354 8846FE MOV [bp-2], al +041 000357 8A46FF MOV al, [bp-1] +042 00035A 8A4EFE MOV cl, [bp-2] +043 00035D D2E8 SHR al, cl +044 00035F 8846FF MOV [bp-1], al +045 000362 8A46FF MOV al, [bp-1] +046 000365 B400 MOV ah, 0 +047 000367 50 PUSH ax +048 000368 8A46FE MOV al, [bp-2] +049 00036B B400 MOV ah, 0 +050 00036D 50 PUSH ax +051 00036E B89401 MOV ax, 194h +052 000371 50 PUSH ax +053 000372 E8AB06 CALL near ptr printf +054 000375 83C406 ADD sp, 6 +055 000378 8BE5 MOV sp, bp +056 00037A 5D POP bp +057 00037B C3 RET + + main ENDP + diff --git a/tests/outputs/BYTEOPS.EXE.a2 b/tests/outputs/BYTEOPS.EXE.a2 new file mode 100644 index 0000000..4967d4f --- /dev/null +++ b/tests/outputs/BYTEOPS.EXE.a2 @@ -0,0 +1,62 @@ + main PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 83EC02 SUB sp, 2 +003 000300 C646FEFF MOV byte ptr [bp-2], 0FFh +004 000304 C646FF8F MOV byte ptr [bp-1], 8Fh +005 000308 8A46FE MOV al, [bp-2] +006 00030B 0246FF ADD al, [bp-1] +007 00030E 8846FF MOV [bp-1], al +008 000311 8A46FE MOV al, [bp-2] +009 000314 2A46FF SUB al, [bp-1] +010 000317 8846FE MOV [bp-2], al +011 00031A 8A46FE MOV al, [bp-2] +012 00031D B400 MOV ah, 0 +013 00031F 8A56FF MOV dl, [bp-1] +014 000322 B600 MOV dh, 0 +015 000324 F7E2 MUL dx +016 000326 8846FE MOV [bp-2], al +017 000329 8A46FF MOV al, [bp-1] +018 00032C B400 MOV ah, 0 +019 00032E 8A56FE MOV dl, [bp-2] +020 000331 B600 MOV dh, 0 +021 000333 8BDA MOV bx, dx +022 000335 99 CWD +023 MOV tmp, dx:ax ;Synthetic inst +024 000336 F7FB IDIV bx +025 MOD bx ;Synthetic inst +026 000338 8846FF MOV [bp-1], al +027 00033B 8A46FF MOV al, [bp-1] +028 00033E B400 MOV ah, 0 +029 000340 8A56FE MOV dl, [bp-2] +030 000343 B600 MOV dh, 0 +031 000345 8BDA MOV bx, dx +032 000347 99 CWD +033 MOV tmp, dx:ax ;Synthetic inst +034 000348 F7FB IDIV bx +035 MOD bx ;Synthetic inst +036 00034A 8856FF MOV [bp-1], dl +037 00034D 8A46FE MOV al, [bp-2] +038 000350 B105 MOV cl, 5 +039 000352 D2E0 SHL al, cl +040 000354 8846FE MOV [bp-2], al +041 000357 8A46FF MOV al, [bp-1] +042 00035A 8A4EFE MOV cl, [bp-2] +043 00035D D2E8 SHR al, cl +044 00035F 8846FF MOV [bp-1], al +045 000362 8A46FF MOV al, [bp-1] +046 000365 B400 MOV ah, 0 +047 000367 50 PUSH ax +048 000368 8A46FE MOV al, [bp-2] +049 00036B B400 MOV ah, 0 +050 00036D 50 PUSH ax +051 00036E B89401 MOV ax, 194h +052 000371 50 PUSH ax +053 000372 E8AB06 CALL near ptr printf +054 000375 83C406 ADD sp, 6 +055 000378 8BE5 MOV sp, bp +056 00037A 5D POP bp +057 00037B C3 RET + + main ENDP + diff --git a/tests/outputs/BYTEOPS.b b/tests/outputs/BYTEOPS.b new file mode 100644 index 0000000..219e8cd --- /dev/null +++ b/tests/outputs/BYTEOPS.b @@ -0,0 +1,28 @@ +/* + * Input file : ./tests/inputs/BYTEOPS.EXE + * File type : EXE + */ + +#include "dcc.h" + + +void main () +/* Takes no parameters. + * High-level language prologue code. + */ +{ +int loc1; +int loc2; + + loc1 = 255; + loc2 = 143; + loc2 = (loc1 + loc2); + loc1 = (loc1 - loc2); + loc1 = (loc1 * loc2); + loc2 = (loc2 / loc1); + loc2 = (loc2 % loc1); + loc1 = (loc1 << 5); + loc2 = (loc2 >> loc1); + printf ("a = %d, b = %d\n", loc1, loc2); +} + diff --git a/tests/outputs/FIBOS.EXE.a1 b/tests/outputs/FIBOS.EXE.a1 new file mode 100644 index 0000000..471fed6 --- /dev/null +++ b/tests/outputs/FIBOS.EXE.a1 @@ -0,0 +1,90 @@ + main PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 83EC04 SUB sp, 4 +003 000300 56 PUSH si +004 000301 57 PUSH di +005 000302 B89401 MOV ax, 194h +006 000305 50 PUSH ax +007 000306 E8080C CALL near ptr printf +008 000309 59 POP cx +009 00030A 8D46FC LEA ax, [bp-4] +010 00030D 50 PUSH ax +011 00030E B8B101 MOV ax, 1B1h +012 000311 50 PUSH ax +013 000312 E88514 CALL near ptr scanf +014 000315 59 POP cx +015 000316 59 POP cx +016 000317 BE0100 MOV si, 1 +017 00031A EB2D JMP L1 + +018 000349 3B76FC L1: CMP si, [bp-4] +019 00034C 7ECE JLE L2 +020 00034E 33C0 XOR ax, ax +021 000350 50 PUSH ax +022 000351 E87300 CALL near ptr exit +023 000354 59 POP cx +024 000355 5F POP di +025 000356 5E POP si +026 000357 8BE5 MOV sp, bp +027 000359 5D POP bp +028 00035A C3 RET + +029 00031C B8B401 L2: MOV ax, 1B4h +030 00031F 50 PUSH ax +031 000320 E8EE0B CALL near ptr printf +032 000323 59 POP cx +033 000324 8D46FE LEA ax, [bp-2] +034 000327 50 PUSH ax +035 000328 B8C301 MOV ax, 1C3h +036 00032B 50 PUSH ax +037 00032C E86B14 CALL near ptr scanf +038 00032F 59 POP cx +039 000330 59 POP cx +040 000331 FF76FE PUSH word ptr [bp-2] +041 000334 E82400 CALL near ptr proc_1 +042 000337 59 POP cx +043 000338 8BF8 MOV di, ax +044 00033A 57 PUSH di +045 00033B FF76FE PUSH word ptr [bp-2] +046 00033E B8C601 MOV ax, 1C6h +047 000341 50 PUSH ax +048 000342 E8CC0B CALL near ptr printf +049 000345 83C406 ADD sp, 6 +050 000348 46 INC si +051 JMP L1 ;Synthetic inst + + main ENDP + + proc_1 PROC NEAR +000 00035B 55 PUSH bp +001 00035C 8BEC MOV bp, sp +002 00035E 56 PUSH si +003 00035F 8B7604 MOV si, [bp+4] +004 000362 83FE02 CMP si, 2 +005 000365 7E1C JLE L3 +006 000367 8BC6 MOV ax, si +007 000369 48 DEC ax +008 00036A 50 PUSH ax +009 00036B E8EDFF CALL near ptr proc_1 +010 00036E 59 POP cx +011 00036F 50 PUSH ax +012 000370 8BC6 MOV ax, si +013 000372 05FEFF ADD ax, 0FFFEh +014 000375 50 PUSH ax +015 000376 E8E2FF CALL near ptr proc_1 +016 000379 59 POP cx +017 00037A 8BD0 MOV dx, ax +018 00037C 58 POP ax +019 00037D 03C2 ADD ax, dx +020 00037F EB07 JMP L4 + +021 000388 5E L4: POP si +022 000389 5D POP bp +023 00038A C3 RET + +024 000383 B80100 L3: MOV ax, 1 +025 000386 EB00 JMP L4 + + proc_1 ENDP + diff --git a/tests/outputs/FIBOS.EXE.a2 b/tests/outputs/FIBOS.EXE.a2 new file mode 100644 index 0000000..433c14d --- /dev/null +++ b/tests/outputs/FIBOS.EXE.a2 @@ -0,0 +1,88 @@ + proc_1 PROC NEAR +000 00035B 55 PUSH bp +001 00035C 8BEC MOV bp, sp +002 00035E 56 PUSH si +003 00035F 8B7604 MOV si, [bp+4] +004 000362 83FE02 CMP si, 2 +005 000365 7E1C JLE L1 +006 000367 8BC6 MOV ax, si +007 000369 48 DEC ax +008 00036A 50 PUSH ax +009 00036B E8EDFF CALL near ptr proc_1 +010 00036E 59 POP cx +011 00036F 50 PUSH ax +012 000370 8BC6 MOV ax, si +013 000372 05FEFF ADD ax, 0FFFEh +014 000375 50 PUSH ax +015 000376 E8E2FF CALL near ptr proc_1 +016 000379 59 POP cx +017 00037A 8BD0 MOV dx, ax +018 00037C 58 POP ax +019 00037D 03C2 ADD ax, dx + +021 000388 5E L2: POP si +022 000389 5D POP bp +023 00038A C3 RET + +024 000383 B80100 L1: MOV ax, 1 +025 000386 EB00 JMP L2 + + proc_1 ENDP + + main PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 83EC04 SUB sp, 4 +003 000300 56 PUSH si +004 000301 57 PUSH di +005 000302 B89401 MOV ax, 194h +006 000305 50 PUSH ax +007 000306 E8080C CALL near ptr printf +008 000309 59 POP cx +009 00030A 8D46FC LEA ax, [bp-4] +010 00030D 50 PUSH ax +011 00030E B8B101 MOV ax, 1B1h +012 000311 50 PUSH ax +013 000312 E88514 CALL near ptr scanf +014 000315 59 POP cx +015 000316 59 POP cx +016 000317 BE0100 MOV si, 1 + +018 000349 3B76FC L3: CMP si, [bp-4] +019 00034C 7ECE JLE L4 +020 00034E 33C0 XOR ax, ax +021 000350 50 PUSH ax +022 000351 E87300 CALL near ptr exit +023 000354 59 POP cx +024 000355 5F POP di +025 000356 5E POP si +026 000357 8BE5 MOV sp, bp +027 000359 5D POP bp +028 00035A C3 RET + +029 00031C B8B401 L4: MOV ax, 1B4h +030 00031F 50 PUSH ax +031 000320 E8EE0B CALL near ptr printf +032 000323 59 POP cx +033 000324 8D46FE LEA ax, [bp-2] +034 000327 50 PUSH ax +035 000328 B8C301 MOV ax, 1C3h +036 00032B 50 PUSH ax +037 00032C E86B14 CALL near ptr scanf +038 00032F 59 POP cx +039 000330 59 POP cx +040 000331 FF76FE PUSH word ptr [bp-2] +041 000334 E82400 CALL near ptr proc_1 +042 000337 59 POP cx +043 000338 8BF8 MOV di, ax +044 00033A 57 PUSH di +045 00033B FF76FE PUSH word ptr [bp-2] +046 00033E B8C601 MOV ax, 1C6h +047 000341 50 PUSH ax +048 000342 E8CC0B CALL near ptr printf +049 000345 83C406 ADD sp, 6 +050 000348 46 INC si +051 JMP L3 ;Synthetic inst + + main ENDP + diff --git a/tests/outputs/FIBOS.b b/tests/outputs/FIBOS.b new file mode 100644 index 0000000..aec76cf --- /dev/null +++ b/tests/outputs/FIBOS.b @@ -0,0 +1,53 @@ +/* + * Input file : ./tests/inputs/FIBOS.EXE + * File type : EXE + */ + +#include "dcc.h" + + +int proc_1 (int arg0) +/* Takes 2 bytes of parameters. + * High-level language prologue code. + * C calling convention. + */ +{ +int loc1; +int loc2; /* ax */ + + loc1 = arg0; + + if (loc1 > 2) { + loc2 = (proc_1 ((loc1 - 1)) + proc_1 ((loc1 + 0xfffe))); + } + else { + loc2 = 1; + } + return (loc2); +} + + +void main () +/* Takes no parameters. + * High-level language prologue code. + */ +{ +int loc1; +int loc2; +int loc3; +int loc4; + + printf ("Input number of iterations: "); + scanf ("%d", &loc1); + loc3 = 1; + + while ((loc3 <= loc1)) { + printf ("Input number: "); + scanf ("%d", &loc2); + loc4 = proc_1 (loc2); + printf ("fibonacci(%d) = %u\n", loc2, loc4); + loc3 = (loc3 + 1); + } /* end of while */ + exit (0); +} + diff --git a/tests/outputs/MIN.EXE.a1 b/tests/outputs/MIN.EXE.a1 new file mode 100644 index 0000000..26e919c --- /dev/null +++ b/tests/outputs/MIN.EXE.a1 @@ -0,0 +1,39 @@ + PROC NEAR +000 000100 55 PUSH bp +001 000101 8BEC MOV bp, sp +002 000103 83EC02 SUB sp, 2 +003 000106 C746FE0000 MOV word ptr [bp-2], 0 +004 00010B 8B46FE MOV ax, [bp-2] +005 00010E 3D0600 CMP ax, 6 +006 000111 7735 JA L1 +007 000113 8BD8 MOV bx, ax +008 000115 D1E3 SHL bx, 1 +009 000117 2EFFA71C00 JMP word ptr cs:[bx+1Ch] ;Switch + +010 00012A B80200 MOV ax, 2 ;Case l0 +011 00012D EB1E JMP L2 + +012 00014D 8BE5 L2: MOV sp, bp +013 00014F 5D POP bp +014 000150 C3 RET + +015 00012F B80300 MOV ax, 3 ;Case l1 +016 000132 EB19 JMP L2 + +017 000134 B80700 MOV ax, 7 ;Case l2 +018 000137 EB14 JMP L2 + +019 000139 B80D00 MOV ax, 0Dh ;Case l3 +020 00013C EB0F JMP L2 + +021 000148 B82C00 L1: MOV ax, 2Ch ;Case l4 +022 00014B EB00 JMP L2 + +023 00013E B81700 MOV ax, 17h ;Case l5 +024 000141 EB0A JMP L2 + +025 000143 B80D00 MOV ax, 0Dh ;Case l6 +026 000146 EB05 JMP L2 + + ENDP + diff --git a/tests/outputs/MIN.EXE.a2 b/tests/outputs/MIN.EXE.a2 new file mode 100644 index 0000000..ba41ff9 --- /dev/null +++ b/tests/outputs/MIN.EXE.a2 @@ -0,0 +1,38 @@ + PROC NEAR +000 000100 55 PUSH bp +001 000101 8BEC MOV bp, sp +002 000103 83EC02 SUB sp, 2 +003 000106 C746FE0000 MOV word ptr [bp-2], 0 +004 00010B 8B46FE MOV ax, [bp-2] +005 00010E 3D0600 CMP ax, 6 +006 000111 7735 JA L1 +007 000113 8BD8 MOV bx, ax +008 000115 D1E3 SHL bx, 1 +009 000117 2EFFA71C00 JMP word ptr cs:[bx+1Ch] ;Switch + +010 00012A B80200 MOV ax, 2 ;Case l0 + +012 00014D 8BE5 L2: MOV sp, bp +013 00014F 5D POP bp +014 000150 C3 RET + +015 00012F B80300 MOV ax, 3 ;Case l1 +016 000132 EB19 JMP L2 + +017 000134 B80700 MOV ax, 7 ;Case l2 +018 000137 EB14 JMP L2 + +019 000139 B80D00 MOV ax, 0Dh ;Case l3 +020 00013C EB0F JMP L2 + +021 000148 B82C00 L1: MOV ax, 2Ch ;Case l4 +022 00014B EB00 JMP L2 + +023 00013E B81700 MOV ax, 17h ;Case l5 +024 000141 EB0A JMP L2 + +025 000143 B80D00 MOV ax, 0Dh ;Case l6 +026 000146 EB05 JMP L2 + + ENDP + diff --git a/tests/outputs/MIN.b b/tests/outputs/MIN.b new file mode 100644 index 0000000..f140e3b --- /dev/null +++ b/tests/outputs/MIN.b @@ -0,0 +1,27 @@ +/* + * Input file : ./tests/inputs/MIN.EXE + * File type : EXE + */ + +#include "dcc.h" + + +void () +/* Takes no parameters. + * High-level language prologue code. + */ +{ +int loc1; +int loc2; /* ax */ +int loc3; /* bx */ + + loc1 = 0; + loc2 = loc1; + + if (loc2 <= 6) { + loc3 = loc2; + } + else { + } +} + diff --git a/tests/prev/BENCHFN.EXE.a1 b/tests/prev/BENCHFN.EXE.a1 new file mode 100755 index 0000000..14e823d --- /dev/null +++ b/tests/prev/BENCHFN.EXE.a1 @@ -0,0 +1,109 @@ + main PROC NEAR +000 000365 55 PUSH bp +001 000366 8BEC MOV bp, sp +002 000368 83EC08 SUB sp, 8 +003 00036B B89401 MOV ax, 194h +004 00036E 50 PUSH ax +005 00036F E8D90B CALL near ptr printf +006 000372 59 POP cx +007 000373 8D46FC LEA ax, [bp-4] +008 000376 50 PUSH ax +009 000377 B8B001 MOV ax, 1B0h +010 00037A 50 PUSH ax +011 00037B E85614 CALL near ptr scanf +012 00037E 59 POP cx +013 00037F 59 POP cx +014 000380 FF76FE PUSH word ptr [bp-2] +015 000383 FF76FC PUSH word ptr [bp-4] +016 000386 B8B401 MOV ax, 1B4h +017 000389 50 PUSH ax +018 00038A E8BE0B CALL near ptr printf +019 00038D 83C406 ADD sp, 6 +020 000390 C746FA0000 MOV word ptr [bp-6], 0 +021 000395 C746F80100 MOV word ptr [bp-8], 1 +022 00039A EB0B JMP L1 + +023 0003A7 8B56FA L1: MOV dx, [bp-6] +024 0003AA 8B46F8 MOV ax, [bp-8] +025 0003AD 3B56FE CMP dx, [bp-2] +026 0003B0 7CEA JL L2 +027 0003B2 7F05 JG L3 +028 0003B4 3B46FC CMP ax, [bp-4] +029 0003B7 76E3 JBE L2 + +030 0003B9 B8CE01 L3: MOV ax, 1CEh +031 0003BC 50 PUSH ax +032 0003BD E88B0B CALL near ptr printf +033 0003C0 59 POP cx +034 0003C1 8BE5 MOV sp, bp +035 0003C3 5D POP bp +036 0003C4 C3 RET + +037 00039C E8A6FF L2: CALL near ptr proc_1 +038 00039F 8346F801 ADD word ptr [bp-8], 1 +039 0003A3 8356FA00 ADC word ptr [bp-6], 0 +040 JMP L1 ;Synthetic inst + + main ENDP + + proc_1 PROC NEAR +000 000345 55 PUSH bp +001 000346 8BEC MOV bp, sp +002 000348 E8D7FF CALL near ptr proc_2 +003 00034B E8D4FF CALL near ptr proc_2 +004 00034E E8D1FF CALL near ptr proc_2 +005 000351 E8CEFF CALL near ptr proc_2 +006 000354 E8CBFF CALL near ptr proc_2 +007 000357 E8C8FF CALL near ptr proc_2 +008 00035A E8C5FF CALL near ptr proc_2 +009 00035D E8C2FF CALL near ptr proc_2 +010 000360 E8BFFF CALL near ptr proc_2 +011 000363 5D POP bp +012 000364 C3 RET + + proc_1 ENDP + + proc_2 PROC NEAR +000 000322 55 PUSH bp +001 000323 8BEC MOV bp, sp +002 000325 E8D7FF CALL near ptr proc_3 +003 000328 E8D4FF CALL near ptr proc_3 +004 00032B E8D1FF CALL near ptr proc_3 +005 00032E E8CEFF CALL near ptr proc_3 +006 000331 E8CBFF CALL near ptr proc_3 +007 000334 E8C8FF CALL near ptr proc_3 +008 000337 E8C5FF CALL near ptr proc_3 +009 00033A E8C2FF CALL near ptr proc_3 +010 00033D E8BFFF CALL near ptr proc_3 +011 000340 E8BCFF CALL near ptr proc_3 +012 000343 5D POP bp +013 000344 C3 RET + + proc_2 ENDP + + proc_3 PROC NEAR +000 0002FF 55 PUSH bp +001 000300 8BEC MOV bp, sp +002 000302 E8F5FF CALL near ptr proc_4 +003 000305 E8F2FF CALL near ptr proc_4 +004 000308 E8EFFF CALL near ptr proc_4 +005 00030B E8ECFF CALL near ptr proc_4 +006 00030E E8E9FF CALL near ptr proc_4 +007 000311 E8E6FF CALL near ptr proc_4 +008 000314 E8E3FF CALL near ptr proc_4 +009 000317 E8E0FF CALL near ptr proc_4 +010 00031A E8DDFF CALL near ptr proc_4 +011 00031D E8DAFF CALL near ptr proc_4 +012 000320 5D POP bp +013 000321 C3 RET + + proc_3 ENDP + + proc_4 PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 5D POP bp +003 0002FE C3 RET + + proc_4 ENDP + diff --git a/tests/prev/BENCHFN.EXE.a2 b/tests/prev/BENCHFN.EXE.a2 new file mode 100755 index 0000000..98d7e04 --- /dev/null +++ b/tests/prev/BENCHFN.EXE.a2 @@ -0,0 +1,108 @@ + proc_4 PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 5D POP bp +003 0002FE C3 RET + + proc_4 ENDP + + proc_3 PROC NEAR +000 0002FF 55 PUSH bp +001 000300 8BEC MOV bp, sp +002 000302 E8F5FF CALL near ptr proc_4 +003 000305 E8F2FF CALL near ptr proc_4 +004 000308 E8EFFF CALL near ptr proc_4 +005 00030B E8ECFF CALL near ptr proc_4 +006 00030E E8E9FF CALL near ptr proc_4 +007 000311 E8E6FF CALL near ptr proc_4 +008 000314 E8E3FF CALL near ptr proc_4 +009 000317 E8E0FF CALL near ptr proc_4 +010 00031A E8DDFF CALL near ptr proc_4 +011 00031D E8DAFF CALL near ptr proc_4 +012 000320 5D POP bp +013 000321 C3 RET + + proc_3 ENDP + + proc_2 PROC NEAR +000 000322 55 PUSH bp +001 000323 8BEC MOV bp, sp +002 000325 E8D7FF CALL near ptr proc_3 +003 000328 E8D4FF CALL near ptr proc_3 +004 00032B E8D1FF CALL near ptr proc_3 +005 00032E E8CEFF CALL near ptr proc_3 +006 000331 E8CBFF CALL near ptr proc_3 +007 000334 E8C8FF CALL near ptr proc_3 +008 000337 E8C5FF CALL near ptr proc_3 +009 00033A E8C2FF CALL near ptr proc_3 +010 00033D E8BFFF CALL near ptr proc_3 +011 000340 E8BCFF CALL near ptr proc_3 +012 000343 5D POP bp +013 000344 C3 RET + + proc_2 ENDP + + proc_1 PROC NEAR +000 000345 55 PUSH bp +001 000346 8BEC MOV bp, sp +002 000348 E8D7FF CALL near ptr proc_2 +003 00034B E8D4FF CALL near ptr proc_2 +004 00034E E8D1FF CALL near ptr proc_2 +005 000351 E8CEFF CALL near ptr proc_2 +006 000354 E8CBFF CALL near ptr proc_2 +007 000357 E8C8FF CALL near ptr proc_2 +008 00035A E8C5FF CALL near ptr proc_2 +009 00035D E8C2FF CALL near ptr proc_2 +010 000360 E8BFFF CALL near ptr proc_2 +011 000363 5D POP bp +012 000364 C3 RET + + proc_1 ENDP + + main PROC NEAR +000 000365 55 PUSH bp +001 000366 8BEC MOV bp, sp +002 000368 83EC08 SUB sp, 8 +003 00036B B89401 MOV ax, 194h +004 00036E 50 PUSH ax +005 00036F E8D90B CALL near ptr printf +006 000372 59 POP cx +007 000373 8D46FC LEA ax, [bp-4] +008 000376 50 PUSH ax +009 000377 B8B001 MOV ax, 1B0h +010 00037A 50 PUSH ax +011 00037B E85614 CALL near ptr scanf +012 00037E 59 POP cx +013 00037F 59 POP cx +014 000380 FF76FE PUSH word ptr [bp-2] +015 000383 FF76FC PUSH word ptr [bp-4] +016 000386 B8B401 MOV ax, 1B4h +017 000389 50 PUSH ax +018 00038A E8BE0B CALL near ptr printf +019 00038D 83C406 ADD sp, 6 +020 000390 C746FA0000 MOV word ptr [bp-6], 0 +021 000395 C746F80100 MOV word ptr [bp-8], 1 + +023 0003A7 8B56FA L1: MOV dx, [bp-6] +024 0003AA 8B46F8 MOV ax, [bp-8] +025 0003AD 3B56FE CMP dx, [bp-2] +026 0003B0 7CEA JL L2 +027 0003B2 7F05 JG L3 +028 0003B4 3B46FC CMP ax, [bp-4] +029 0003B7 76E3 JBE L2 + +030 0003B9 B8CE01 L3: MOV ax, 1CEh +031 0003BC 50 PUSH ax +032 0003BD E88B0B CALL near ptr printf +033 0003C0 59 POP cx +034 0003C1 8BE5 MOV sp, bp +035 0003C3 5D POP bp +036 0003C4 C3 RET + +037 00039C E8A6FF L2: CALL near ptr proc_1 +038 00039F 8346F801 ADD word ptr [bp-8], 1 +039 0003A3 8356FA00 ADC word ptr [bp-6], 0 +040 JMP L1 ;Synthetic inst + + main ENDP + diff --git a/tests/prev/BENCHFN.b b/tests/prev/BENCHFN.b new file mode 100755 index 0000000..41ae251 --- /dev/null +++ b/tests/prev/BENCHFN.b @@ -0,0 +1,89 @@ +/* + * Input file : ./tests/inputs/BENCHFN.EXE + * File type : EXE + */ + +#include "dcc.h" + + +void proc_4 () +/* Takes no parameters. + * High-level language prologue code. + */ +{ +} + + +void proc_3 () +/* Takes no parameters. + * High-level language prologue code. + */ +{ + proc_4 (); + proc_4 (); + proc_4 (); + proc_4 (); + proc_4 (); + proc_4 (); + proc_4 (); + proc_4 (); + proc_4 (); + proc_4 (); +} + + +void proc_2 () +/* Takes no parameters. + * High-level language prologue code. + */ +{ + proc_3 (); + proc_3 (); + proc_3 (); + proc_3 (); + proc_3 (); + proc_3 (); + proc_3 (); + proc_3 (); + proc_3 (); + proc_3 (); +} + + +void proc_1 () +/* Takes no parameters. + * High-level language prologue code. + */ +{ + proc_2 (); + proc_2 (); + proc_2 (); + proc_2 (); + proc_2 (); + proc_2 (); + proc_2 (); + proc_2 (); + proc_2 (); +} + + +void main () +/* Takes no parameters. + * High-level language prologue code. + */ +{ +long loc1; +long loc2; + + printf ("enter number of iterations "); + scanf ("%ld", &loc0); + printf ("executing %ld iterations\n", loc2); + loc1 = 1; + + while ((loc1 <= loc2)) { + proc_1 (); + loc1 = (loc1 + 1); + } /* end of while */ + printf ("finished\n"); +} + diff --git a/tests/prev/BENCHLNG.EXE.a1 b/tests/prev/BENCHLNG.EXE.a1 new file mode 100755 index 0000000..a3fa2ec --- /dev/null +++ b/tests/prev/BENCHLNG.EXE.a1 @@ -0,0 +1,242 @@ + main PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 83EC14 SUB sp, 14h +003 000300 8D46FC LEA ax, [bp-4] +004 000303 50 PUSH ax +005 000304 B89401 MOV ax, 194h +006 000307 50 PUSH ax +007 000308 E85D15 CALL near ptr scanf +008 00030B 59 POP cx +009 00030C 59 POP cx +010 00030D FF76FE PUSH word ptr [bp-2] +011 000310 FF76FC PUSH word ptr [bp-4] +012 000313 B89801 MOV ax, 198h +013 000316 50 PUSH ax +014 000317 E8C50C CALL near ptr printf +015 00031A 83C406 ADD sp, 6 +016 00031D 8D46EC LEA ax, [bp-14h] +017 000320 50 PUSH ax +018 000321 B8B201 MOV ax, 1B2h +019 000324 50 PUSH ax +020 000325 E84015 CALL near ptr scanf +021 000328 59 POP cx +022 000329 59 POP cx +023 00032A 8D46F0 LEA ax, [bp-10h] +024 00032D 50 PUSH ax +025 00032E B8B601 MOV ax, 1B6h +026 000331 50 PUSH ax +027 000332 E83315 CALL near ptr scanf +028 000335 59 POP cx +029 000336 59 POP cx +030 000337 C746FA0000 MOV word ptr [bp-6], 0 +031 00033C C746F80100 MOV word ptr [bp-8], 1 +032 000341 E9E900 JMP L1 + +033 00042D 8B56FA L1: MOV dx, [bp-6] +034 000430 8B46F8 MOV ax, [bp-8] +035 000433 3B56FE CMP dx, [bp-2] +036 000436 7D03 JGE L2 +037 000438 E909FF JMP L3 + +038 000344 C746F60000 L3: MOV word ptr [bp-0Ah], 0 +039 000349 C746F40100 MOV word ptr [bp-0Ch], 1 +040 00034E E9C000 JMP L4 + +041 000411 837EF600 L4: CMP word ptr [bp-0Ah], 0 +042 000415 7D03 JGE L5 +043 000417 E937FF JMP L6 + +044 000351 8B56EE L6: MOV dx, [bp-12h] +045 000354 8B46EC MOV ax, [bp-14h] +046 000357 0346F0 ADD ax, [bp-10h] +047 00035A 1356F2 ADC dx, [bp-0Eh] +048 00035D 0346F4 ADD ax, [bp-0Ch] +049 000360 1356F6 ADC dx, [bp-0Ah] +050 000363 8956EE MOV [bp-12h], dx +051 000366 8946EC MOV [bp-14h], ax +052 000369 8B56EE MOV dx, [bp-12h] +053 00036C 8B46EC MOV ax, [bp-14h] +054 00036F D1FA SAR dx, 1 +055 000371 D1D8 RCR ax, 1 +056 000373 8956F2 MOV [bp-0Eh], dx +057 000376 8946F0 MOV [bp-10h], ax +058 000379 33D2 XOR dx, dx +059 00037B B80A00 MOV ax, 0Ah +060 00037E 52 PUSH dx +061 00037F 50 PUSH ax +062 000380 FF76F2 PUSH word ptr [bp-0Eh] +063 000383 FF76F0 PUSH word ptr [bp-10h] +064 000386 9AEB1D1000 CALL far ptr LMOD@ +065 00038B 8956EE MOV [bp-12h], dx +066 00038E 8946EC MOV [bp-14h], ax +067 000391 8B56F2 MOV dx, [bp-0Eh] +068 000394 8B46F0 MOV ax, [bp-10h] +069 000397 3B56F6 CMP dx, [bp-0Ah] +070 00039A 750A JNE L7 +071 00039C 3B46F4 CMP ax, [bp-0Ch] +072 00039F 7505 JNE L7 +073 0003A1 B80100 MOV ax, 1 +074 0003A4 EB02 JMP L8 + +075 0003A8 99 L8: CWD +076 0003A9 8956EE MOV [bp-12h], dx +077 0003AC 8946EC MOV [bp-14h], ax +078 0003AF 8B56EE MOV dx, [bp-12h] +079 0003B2 8B46EC MOV ax, [bp-14h] +080 0003B5 0B46F4 OR ax, [bp-0Ch] +081 0003B8 0B56F6 OR dx, [bp-0Ah] +082 0003BB 8956F2 MOV [bp-0Eh], dx +083 0003BE 8946F0 MOV [bp-10h], ax +084 0003C1 8B46F0 MOV ax, [bp-10h] +085 0003C4 0B46F2 OR ax, [bp-0Eh] +086 0003C7 7505 JNE L9 +087 0003C9 B80100 MOV ax, 1 +088 0003CC EB02 JMP L10 + +089 0003D0 99 L10: CWD +090 0003D1 8956EE MOV [bp-12h], dx +091 0003D4 8946EC MOV [bp-14h], ax +092 0003D7 8B56EE MOV dx, [bp-12h] +093 0003DA 8B46EC MOV ax, [bp-14h] +094 0003DD 0346F4 ADD ax, [bp-0Ch] +095 0003E0 1356F6 ADC dx, [bp-0Ah] +096 0003E3 8956F2 MOV [bp-0Eh], dx +097 0003E6 8946F0 MOV [bp-10h], ax +098 0003E9 8B56F2 MOV dx, [bp-0Eh] +099 0003EC 8B46F0 MOV ax, [bp-10h] +100 0003EF 3B56F6 CMP dx, [bp-0Ah] +101 0003F2 7C0C JL L11 +102 0003F4 7F05 JG L12 +103 0003F6 3B46F4 CMP ax, [bp-0Ch] +104 0003F9 7605 JBE L11 + +105 0003FB B80100 L12: MOV ax, 1 +106 0003FE EB02 JMP L13 + +107 000402 99 L13: CWD +108 000403 8956EE MOV [bp-12h], dx +109 000406 8946EC MOV [bp-14h], ax +110 000409 8346F401 ADD word ptr [bp-0Ch], 1 +111 00040D 8356F600 ADC word ptr [bp-0Ah], 0 +112 JMP L4 ;Synthetic inst + +113 000400 33C0 L11: XOR ax, ax +114 JMP L13 ;Synthetic inst + +115 0003CE 33C0 L9: XOR ax, ax +116 JMP L10 ;Synthetic inst + +117 0003A6 33C0 L7: XOR ax, ax +118 JMP L8 ;Synthetic inst + +119 00041A 7F09 L5: JG L14 +120 00041C 837EF428 CMP word ptr [bp-0Ch], 28h +121 000420 7703 JA L14 +122 000422 E92CFF JMP L6 + +123 000425 8346F801 L14: ADD word ptr [bp-8], 1 +124 000429 8356FA00 ADC word ptr [bp-6], 0 +125 JMP L1 ;Synthetic inst + +126 00043B 7F08 L2: JG L15 +127 00043D 3B46FC CMP ax, [bp-4] +128 000440 7703 JA L15 +129 000442 E9FFFE JMP L3 + +130 000445 FF76EE L15: PUSH word ptr [bp-12h] +131 000448 FF76EC PUSH word ptr [bp-14h] +132 00044B B8BA01 MOV ax, 1BAh +133 00044E 50 PUSH ax +134 00044F E88D0B CALL near ptr printf +135 000452 83C406 ADD sp, 6 +136 000455 8BE5 MOV sp, bp +137 000457 5D POP bp +138 000458 C3 RET + + main ENDP + + LMOD@ PROC FAR +000 001EEB B90200 MOV cx, 2 +001 001EEE EB03 JMP L16 + +002 001EF3 55 L16: PUSH bp +003 001EF4 56 PUSH si +004 001EF5 57 PUSH di +005 001EF6 8BEC MOV bp, sp +006 001EF8 8BF9 MOV di, cx +007 001EFA 8B460A MOV ax, [bp+0Ah] +008 001EFD 8B560C MOV dx, [bp+0Ch] +009 001F00 8B5E0E MOV bx, [bp+0Eh] +010 001F03 8B4E10 MOV cx, [bp+10h] +011 001F06 0BC9 OR cx, cx +012 001F08 7508 JNE L17 +013 001F0A 0BD2 OR dx, dx +014 001F0C 7469 JE L18 +015 001F0E 0BDB OR bx, bx +016 001F10 7465 JE L18 + +017 001F12 F7C70100 L17: TEST di, 1 +018 001F16 751C JNE L19 +019 001F18 0BD2 OR dx, dx +020 001F1A 790A JNS L20 +021 001F1C F7DA NEG dx +022 001F1E F7D8 NEG ax +023 001F20 83DA00 SBB dx, 0 +024 001F23 83CF0C OR di, 0Ch + +025 001F26 0BC9 L20: OR cx, cx +026 001F28 790A JNS L19 +027 001F2A F7D9 NEG cx +028 001F2C F7DB NEG bx +029 001F2E 83D900 SBB cx, 0 +030 001F31 83F704 XOR di, 4 + +031 001F34 8BE9 L19: MOV bp, cx +032 001F36 B92000 MOV cx, 20h +033 001F39 57 PUSH di +034 001F3A 33FF XOR di, di +035 001F3C 33F6 XOR si, si + +036 001F3E D1E0 L21: SHL ax, 1 +037 001F40 D1D2 RCL dx, 1 +038 001F42 D1D6 RCL si, 1 +039 001F44 D1D7 RCL di, 1 +040 001F46 3BFD CMP di, bp +041 001F48 720B JB L22 +042 001F4A 7704 JA L23 +043 001F4C 3BF3 CMP si, bx +044 001F4E 7205 JB L22 + +045 001F50 2BF3 L23: SUB si, bx +046 001F52 1BFD SBB di, bp +047 001F54 40 INC ax + +048 001F55 E2E7 L22: LOOP L21 +049 001F57 5B POP bx +050 001F58 F7C30200 TEST bx, 2 +051 001F5C 7406 JE L24 +052 001F5E 8BC6 MOV ax, si +053 001F60 8BD7 MOV dx, di +054 001F62 D1EB SHR bx, 1 + +055 001F64 F7C30400 L24: TEST bx, 4 +056 001F68 7407 JE L25 +057 001F6A F7DA NEG dx +058 001F6C F7D8 NEG ax +059 001F6E 83DA00 SBB dx, 0 + +060 001F71 5F L25: POP di +061 001F72 5E POP si +062 001F73 5D POP bp +063 001F74 CA0800 RETF 8 +065 001F77 F7F3 DIV bx +067 001F79 F7C70200 TEST di, 2 +068 001F7D 7402 JE L26 +069 001F7F 8BC2 MOV ax, dx + +070 001F81 33D2 L26: XOR dx, dx +071 001F83 EBEC JMP L25 + + LMOD@ ENDP + diff --git a/tests/prev/BENCHLNG.EXE.a2 b/tests/prev/BENCHLNG.EXE.a2 new file mode 100755 index 0000000..3fab437 --- /dev/null +++ b/tests/prev/BENCHLNG.EXE.a2 @@ -0,0 +1,234 @@ + LMOD@ PROC FAR +000 001EEB B90200 MOV cx, 2 +002 001EF3 55 PUSH bp +003 001EF4 56 PUSH si +004 001EF5 57 PUSH di +005 001EF6 8BEC MOV bp, sp +006 001EF8 8BF9 MOV di, cx +007 001EFA 8B460A MOV ax, [bp+0Ah] +008 001EFD 8B560C MOV dx, [bp+0Ch] +009 001F00 8B5E0E MOV bx, [bp+0Eh] +010 001F03 8B4E10 MOV cx, [bp+10h] +011 001F06 0BC9 OR cx, cx +012 001F08 7508 JNE L1 +013 001F0A 0BD2 OR dx, dx +014 001F0C 7469 JE L2 +015 001F0E 0BDB OR bx, bx +016 001F10 7465 JE L2 + +017 001F12 F7C70100 L1: TEST di, 1 +018 001F16 751C JNE L3 +019 001F18 0BD2 OR dx, dx +020 001F1A 790A JNS L4 +021 001F1C F7DA NEG dx +022 001F1E F7D8 NEG ax +023 001F20 83DA00 SBB dx, 0 +024 001F23 83CF0C OR di, 0Ch + +025 001F26 0BC9 L4: OR cx, cx +026 001F28 790A JNS L3 +027 001F2A F7D9 NEG cx +028 001F2C F7DB NEG bx +029 001F2E 83D900 SBB cx, 0 +030 001F31 83F704 XOR di, 4 + +031 001F34 8BE9 L3: MOV bp, cx +032 001F36 B92000 MOV cx, 20h +033 001F39 57 PUSH di +034 001F3A 33FF XOR di, di +035 001F3C 33F6 XOR si, si + +036 001F3E D1E0 L5: SHL ax, 1 +037 001F40 D1D2 RCL dx, 1 +038 001F42 D1D6 RCL si, 1 +039 001F44 D1D7 RCL di, 1 +040 001F46 3BFD CMP di, bp +041 001F48 720B JB L6 +042 001F4A 7704 JA L7 +043 001F4C 3BF3 CMP si, bx +044 001F4E 7205 JB L6 + +045 001F50 2BF3 L7: SUB si, bx +046 001F52 1BFD SBB di, bp +047 001F54 40 INC ax + +048 001F55 E2E7 L6: LOOP L5 +049 001F57 5B POP bx +050 001F58 F7C30200 TEST bx, 2 +051 001F5C 7406 JE L8 +052 001F5E 8BC6 MOV ax, si +053 001F60 8BD7 MOV dx, di +054 001F62 D1EB SHR bx, 1 + +055 001F64 F7C30400 L8: TEST bx, 4 +056 001F68 7407 JE L9 +057 001F6A F7DA NEG dx +058 001F6C F7D8 NEG ax +059 001F6E 83DA00 SBB dx, 0 + +060 001F71 5F L9: POP di +061 001F72 5E POP si +062 001F73 5D POP bp +063 001F74 CA0800 RETF 8 + +064 L2: MOV tmp, dx:ax ;Synthetic inst +065 001F77 F7F3 DIV bx +066 MOD bx ;Synthetic inst +067 001F79 F7C70200 TEST di, 2 +068 001F7D 7402 JE L10 +069 001F7F 8BC2 MOV ax, dx + +070 001F81 33D2 L10: XOR dx, dx +071 001F83 EBEC JMP L9 + + LMOD@ ENDP + + main PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 83EC14 SUB sp, 14h +003 000300 8D46FC LEA ax, [bp-4] +004 000303 50 PUSH ax +005 000304 B89401 MOV ax, 194h +006 000307 50 PUSH ax +007 000308 E85D15 CALL near ptr scanf +008 00030B 59 POP cx +009 00030C 59 POP cx +010 00030D FF76FE PUSH word ptr [bp-2] +011 000310 FF76FC PUSH word ptr [bp-4] +012 000313 B89801 MOV ax, 198h +013 000316 50 PUSH ax +014 000317 E8C50C CALL near ptr printf +015 00031A 83C406 ADD sp, 6 +016 00031D 8D46EC LEA ax, [bp-14h] +017 000320 50 PUSH ax +018 000321 B8B201 MOV ax, 1B2h +019 000324 50 PUSH ax +020 000325 E84015 CALL near ptr scanf +021 000328 59 POP cx +022 000329 59 POP cx +023 00032A 8D46F0 LEA ax, [bp-10h] +024 00032D 50 PUSH ax +025 00032E B8B601 MOV ax, 1B6h +026 000331 50 PUSH ax +027 000332 E83315 CALL near ptr scanf +028 000335 59 POP cx +029 000336 59 POP cx +030 000337 C746FA0000 MOV word ptr [bp-6], 0 +031 00033C C746F80100 MOV word ptr [bp-8], 1 + +033 00042D 8B56FA L11: MOV dx, [bp-6] +034 000430 8B46F8 MOV ax, [bp-8] +035 000433 3B56FE CMP dx, [bp-2] +036 000436 7D03 JGE L12 + +038 000344 C746F60000 L13: MOV word ptr [bp-0Ah], 0 +039 000349 C746F40100 MOV word ptr [bp-0Ch], 1 + +041 000411 837EF600 L14: CMP word ptr [bp-0Ah], 0 +042 000415 7D03 JGE L15 + +044 000351 8B56EE L16: MOV dx, [bp-12h] +045 000354 8B46EC MOV ax, [bp-14h] +046 000357 0346F0 ADD ax, [bp-10h] +047 00035A 1356F2 ADC dx, [bp-0Eh] +048 00035D 0346F4 ADD ax, [bp-0Ch] +049 000360 1356F6 ADC dx, [bp-0Ah] +050 000363 8956EE MOV [bp-12h], dx +051 000366 8946EC MOV [bp-14h], ax +052 000369 8B56EE MOV dx, [bp-12h] +053 00036C 8B46EC MOV ax, [bp-14h] +054 00036F D1FA SAR dx, 1 +055 000371 D1D8 RCR ax, 1 +056 000373 8956F2 MOV [bp-0Eh], dx +057 000376 8946F0 MOV [bp-10h], ax +058 000379 33D2 XOR dx, dx +059 00037B B80A00 MOV ax, 0Ah +060 00037E 52 PUSH dx +061 00037F 50 PUSH ax +062 000380 FF76F2 PUSH word ptr [bp-0Eh] +063 000383 FF76F0 PUSH word ptr [bp-10h] +064 000386 9AEB1D1000 CALL far ptr LMOD@ +065 00038B 8956EE MOV [bp-12h], dx +066 00038E 8946EC MOV [bp-14h], ax +067 000391 8B56F2 MOV dx, [bp-0Eh] +068 000394 8B46F0 MOV ax, [bp-10h] +069 000397 3B56F6 CMP dx, [bp-0Ah] +070 00039A 750A JNE L17 +071 00039C 3B46F4 CMP ax, [bp-0Ch] +072 00039F 7505 JNE L17 +073 0003A1 B80100 MOV ax, 1 + +075 0003A8 99 L18: CWD +076 0003A9 8956EE MOV [bp-12h], dx +077 0003AC 8946EC MOV [bp-14h], ax +078 0003AF 8B56EE MOV dx, [bp-12h] +079 0003B2 8B46EC MOV ax, [bp-14h] +080 0003B5 0B46F4 OR ax, [bp-0Ch] +081 0003B8 0B56F6 OR dx, [bp-0Ah] +082 0003BB 8956F2 MOV [bp-0Eh], dx +083 0003BE 8946F0 MOV [bp-10h], ax +084 0003C1 8B46F0 MOV ax, [bp-10h] +085 0003C4 0B46F2 OR ax, [bp-0Eh] +086 0003C7 7505 JNE L19 +087 0003C9 B80100 MOV ax, 1 + +089 0003D0 99 L20: CWD +090 0003D1 8956EE MOV [bp-12h], dx +091 0003D4 8946EC MOV [bp-14h], ax +092 0003D7 8B56EE MOV dx, [bp-12h] +093 0003DA 8B46EC MOV ax, [bp-14h] +094 0003DD 0346F4 ADD ax, [bp-0Ch] +095 0003E0 1356F6 ADC dx, [bp-0Ah] +096 0003E3 8956F2 MOV [bp-0Eh], dx +097 0003E6 8946F0 MOV [bp-10h], ax +098 0003E9 8B56F2 MOV dx, [bp-0Eh] +099 0003EC 8B46F0 MOV ax, [bp-10h] +100 0003EF 3B56F6 CMP dx, [bp-0Ah] +101 0003F2 7C0C JL L21 +102 0003F4 7F05 JG L22 +103 0003F6 3B46F4 CMP ax, [bp-0Ch] +104 0003F9 7605 JBE L21 + +105 0003FB B80100 L22: MOV ax, 1 + +107 000402 99 L23: CWD +108 000403 8956EE MOV [bp-12h], dx +109 000406 8946EC MOV [bp-14h], ax +110 000409 8346F401 ADD word ptr [bp-0Ch], 1 +111 00040D 8356F600 ADC word ptr [bp-0Ah], 0 +112 JMP L14 ;Synthetic inst + +113 000400 33C0 L21: XOR ax, ax +114 JMP L23 ;Synthetic inst + +115 0003CE 33C0 L19: XOR ax, ax +116 JMP L20 ;Synthetic inst + +117 0003A6 33C0 L17: XOR ax, ax +118 JMP L18 ;Synthetic inst + +119 00041A 7F09 L15: JG L24 +120 00041C 837EF428 CMP word ptr [bp-0Ch], 28h +121 000420 7703 JA L24 + +123 000425 8346F801 L24: ADD word ptr [bp-8], 1 +124 000429 8356FA00 ADC word ptr [bp-6], 0 +125 JMP L11 ;Synthetic inst + +126 00043B 7F08 L12: JG L25 +127 00043D 3B46FC CMP ax, [bp-4] +128 000440 7703 JA L25 + +130 000445 FF76EE L25: PUSH word ptr [bp-12h] +131 000448 FF76EC PUSH word ptr [bp-14h] +132 00044B B8BA01 MOV ax, 1BAh +133 00044E 50 PUSH ax +134 00044F E88D0B CALL near ptr printf +135 000452 83C406 ADD sp, 6 +136 000455 8BE5 MOV sp, bp +137 000457 5D POP bp +138 000458 C3 RET + + main ENDP + diff --git a/tests/prev/BENCHLNG.b b/tests/prev/BENCHLNG.b new file mode 100755 index 0000000..3c40ea3 --- /dev/null +++ b/tests/prev/BENCHLNG.b @@ -0,0 +1,158 @@ +/* + * Input file : ./tests/inputs/BENCHLNG.EXE + * File type : EXE + */ + +#include "dcc.h" + + +long LMOD@ (long arg0, int arg2int arg3) +/* Takes 8 bytes of parameters. + * Runtime support routine of the compiler. + * Untranslatable routine. Assembler provided. + * Return value in registers dx:ax. + * Pascal calling convention. + */ +{ + MOV cx, 2 + PUSH bp + PUSH si + PUSH di + MOV bp, sp + MOV di, cx + MOV ax, [bp+0Ah] + MOV dx, [bp+0Ch] + MOV bx, [bp+0Eh] + MOV cx, [bp+10h] + CMP cx, 0 + JNE L1 + OR dx, dx + JE L2 + OR bx, bx + JE L2 + +L1: TEST di, 1 + JNE L3 + OR dx, dx + JNS L4 + NEG dx + NEG ax + SBB dx, 0 + OR di, 0Ch + +L4: OR cx, cx + JNS L3 + NEG cx + NEG bx + SBB cx, 0 + XOR di, 4 + +L3: MOV bp, cx + MOV cx, 20h + PUSH di + XOR di, 0 + XOR si, 0 + +L5: SHL ax, 1 + RCL dx, 1 + RCL si, 1 + RCL di, 1 + CMP di, bp + JB L6 + JA L7 + CMP si, bx + JB L6 + +L7: SUB si, bx + SBB di, bp + INC ax + +L6: LOOP L5 + POP bx + TEST bx, 2 + JE L8 + MOV ax, si + MOV dx, di + SHR bx, 1 + +L8: TEST bx, 4 + JE L9 + NEG dx + NEG ax + SBB dx, 0 + +L9: POP di + POP si + POP bp + RETF 8 + +L2: MOV tmp, dx:ax ;Synthetic inst + DIV bx + MOD bx ;Synthetic inst + TEST di, 2 + JE L10 + MOV ax, dx + +L10: XOR dx, dx + JMP L9 +} + + +void main () +/* Takes no parameters. + * High-level language prologue code. + */ +{ +long loc1; +long loc2; +long loc3; +long loc4; +long loc5; +int loc6; /* ax */ + + scanf ("%ld", &loc0); + printf ("executing %ld iterations\n", loc5); + scanf ("%ld", &loc2); + scanf ("%ld", &loc3); + loc3 = 1; + + while ((loc3 <= loc5)) { + loc2 = 1; + + while ((loc2 <= 40)) { + loc4 = ((loc4 + loc1) + loc2); + loc1 = (loc4 >> 1); + loc4 = LMOD@ (loc1, 10); + + if (loc1 == loc2) { + loc6 = 1; + } + else { + loc6 = 0; + } + loc4 = loc6; + loc1 = (loc4 | loc2); + + if ((loc3 | loc9) == 0) { + loc6 = 1; + } + else { + loc6 = 0; + } + loc4 = loc6; + loc1 = (loc4 + loc2); + + if (loc1 > loc2) { + loc6 = 1; + } + else { + loc6 = 0; + } + loc4 = loc6; + loc2 = (loc2 + 1); + } /* end of while */ + loc3 = (loc3 + 1); + } /* end of while */ + printf ("a=%d\n", loc4); +} + diff --git a/tests/prev/BENCHMUL.EXE.a1 b/tests/prev/BENCHMUL.EXE.a1 new file mode 100755 index 0000000..9e41585 --- /dev/null +++ b/tests/prev/BENCHMUL.EXE.a1 @@ -0,0 +1,101 @@ + main PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 83EC0C SUB sp, 0Ch +003 000300 56 PUSH si +004 000301 B89401 MOV ax, 194h +005 000304 50 PUSH ax +006 000305 E8530C CALL near ptr printf +007 000308 59 POP cx +008 000309 8D46FC LEA ax, [bp-4] +009 00030C 50 PUSH ax +010 00030D B8B001 MOV ax, 1B0h +011 000310 50 PUSH ax +012 000311 E8D014 CALL near ptr scanf +013 000314 59 POP cx +014 000315 59 POP cx +015 000316 FF76FE PUSH word ptr [bp-2] +016 000319 FF76FC PUSH word ptr [bp-4] +017 00031C B8B401 MOV ax, 1B4h +018 00031F 50 PUSH ax +019 000320 E8380C CALL near ptr printf +020 000323 83C406 ADD sp, 6 +021 000326 8D46F4 LEA ax, [bp-0Ch] +022 000329 50 PUSH ax +023 00032A B8CE01 MOV ax, 1CEh +024 00032D 50 PUSH ax +025 00032E E8B314 CALL near ptr scanf +026 000331 59 POP cx +027 000332 59 POP cx +028 000333 8D46F6 LEA ax, [bp-0Ah] +029 000336 50 PUSH ax +030 000337 B8D101 MOV ax, 1D1h +031 00033A 50 PUSH ax +032 00033B E8A614 CALL near ptr scanf +033 00033E 59 POP cx +034 00033F 59 POP cx +035 000340 C746FA0000 MOV word ptr [bp-6], 0 +036 000345 C746F80100 MOV word ptr [bp-8], 1 +037 00034A EB66 JMP L1 + +038 0003B2 8B56FA L1: MOV dx, [bp-6] +039 0003B5 8B46F8 MOV ax, [bp-8] +040 0003B8 3B56FE CMP dx, [bp-2] +041 0003BB 7C8F JL L2 +042 0003BD 7F05 JG L3 +043 0003BF 3B46FC CMP ax, [bp-4] +044 0003C2 7688 JBE L2 + +045 0003C4 FF76F4 L3: PUSH word ptr [bp-0Ch] +046 0003C7 B8D401 MOV ax, 1D4h +047 0003CA 50 PUSH ax +048 0003CB E88D0B CALL near ptr printf +049 0003CE 59 POP cx +050 0003CF 59 POP cx +051 0003D0 5E POP si +052 0003D1 8BE5 MOV sp, bp +053 0003D3 5D POP bp +054 0003D4 C3 RET + +055 00034C BE0100 L2: MOV si, 1 +056 00034F EB54 JMP L4 + +057 0003A5 83FE28 L4: CMP si, 28h +058 0003A8 7EA7 JLE L5 +059 0003AA 8346F801 ADD word ptr [bp-8], 1 +060 0003AE 8356FA00 ADC word ptr [bp-6], 0 +061 JMP L1 ;Synthetic inst + +062 000351 8B46F4 L5: MOV ax, [bp-0Ch] +063 000354 F766F4 MUL word ptr [bp-0Ch] +064 000357 F766F4 MUL word ptr [bp-0Ch] +065 00035A F766F4 MUL word ptr [bp-0Ch] +066 00035D F766F4 MUL word ptr [bp-0Ch] +067 000360 F766F4 MUL word ptr [bp-0Ch] +068 000363 F766F4 MUL word ptr [bp-0Ch] +069 000366 F766F4 MUL word ptr [bp-0Ch] +070 000369 F766F4 MUL word ptr [bp-0Ch] +071 00036C F766F4 MUL word ptr [bp-0Ch] +072 00036F F766F4 MUL word ptr [bp-0Ch] +073 000372 F766F4 MUL word ptr [bp-0Ch] +074 000375 F766F4 MUL word ptr [bp-0Ch] +075 000378 F766F4 MUL word ptr [bp-0Ch] +076 00037B F766F4 MUL word ptr [bp-0Ch] +077 00037E F766F4 MUL word ptr [bp-0Ch] +078 000381 F766F4 MUL word ptr [bp-0Ch] +079 000384 F766F4 MUL word ptr [bp-0Ch] +080 000387 F766F4 MUL word ptr [bp-0Ch] +081 00038A F766F4 MUL word ptr [bp-0Ch] +082 00038D F766F4 MUL word ptr [bp-0Ch] +083 000390 F766F4 MUL word ptr [bp-0Ch] +084 000393 F766F4 MUL word ptr [bp-0Ch] +085 000396 F766F4 MUL word ptr [bp-0Ch] +086 000399 F766F4 MUL word ptr [bp-0Ch] +087 00039C BA0300 MOV dx, 3 +088 00039F F7E2 MUL dx +089 0003A1 8946F4 MOV [bp-0Ch], ax +090 0003A4 46 INC si +091 JMP L4 ;Synthetic inst + + main ENDP + diff --git a/tests/prev/BENCHMUL.EXE.a2 b/tests/prev/BENCHMUL.EXE.a2 new file mode 100755 index 0000000..39303a6 --- /dev/null +++ b/tests/prev/BENCHMUL.EXE.a2 @@ -0,0 +1,99 @@ + main PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 83EC0C SUB sp, 0Ch +003 000300 56 PUSH si +004 000301 B89401 MOV ax, 194h +005 000304 50 PUSH ax +006 000305 E8530C CALL near ptr printf +007 000308 59 POP cx +008 000309 8D46FC LEA ax, [bp-4] +009 00030C 50 PUSH ax +010 00030D B8B001 MOV ax, 1B0h +011 000310 50 PUSH ax +012 000311 E8D014 CALL near ptr scanf +013 000314 59 POP cx +014 000315 59 POP cx +015 000316 FF76FE PUSH word ptr [bp-2] +016 000319 FF76FC PUSH word ptr [bp-4] +017 00031C B8B401 MOV ax, 1B4h +018 00031F 50 PUSH ax +019 000320 E8380C CALL near ptr printf +020 000323 83C406 ADD sp, 6 +021 000326 8D46F4 LEA ax, [bp-0Ch] +022 000329 50 PUSH ax +023 00032A B8CE01 MOV ax, 1CEh +024 00032D 50 PUSH ax +025 00032E E8B314 CALL near ptr scanf +026 000331 59 POP cx +027 000332 59 POP cx +028 000333 8D46F6 LEA ax, [bp-0Ah] +029 000336 50 PUSH ax +030 000337 B8D101 MOV ax, 1D1h +031 00033A 50 PUSH ax +032 00033B E8A614 CALL near ptr scanf +033 00033E 59 POP cx +034 00033F 59 POP cx +035 000340 C746FA0000 MOV word ptr [bp-6], 0 +036 000345 C746F80100 MOV word ptr [bp-8], 1 + +038 0003B2 8B56FA L1: MOV dx, [bp-6] +039 0003B5 8B46F8 MOV ax, [bp-8] +040 0003B8 3B56FE CMP dx, [bp-2] +041 0003BB 7C8F JL L2 +042 0003BD 7F05 JG L3 +043 0003BF 3B46FC CMP ax, [bp-4] +044 0003C2 7688 JBE L2 + +045 0003C4 FF76F4 L3: PUSH word ptr [bp-0Ch] +046 0003C7 B8D401 MOV ax, 1D4h +047 0003CA 50 PUSH ax +048 0003CB E88D0B CALL near ptr printf +049 0003CE 59 POP cx +050 0003CF 59 POP cx +051 0003D0 5E POP si +052 0003D1 8BE5 MOV sp, bp +053 0003D3 5D POP bp +054 0003D4 C3 RET + +055 00034C BE0100 L2: MOV si, 1 + +057 0003A5 83FE28 L4: CMP si, 28h +058 0003A8 7EA7 JLE L5 +059 0003AA 8346F801 ADD word ptr [bp-8], 1 +060 0003AE 8356FA00 ADC word ptr [bp-6], 0 +061 JMP L1 ;Synthetic inst + +062 000351 8B46F4 L5: MOV ax, [bp-0Ch] +063 000354 F766F4 MUL word ptr [bp-0Ch] +064 000357 F766F4 MUL word ptr [bp-0Ch] +065 00035A F766F4 MUL word ptr [bp-0Ch] +066 00035D F766F4 MUL word ptr [bp-0Ch] +067 000360 F766F4 MUL word ptr [bp-0Ch] +068 000363 F766F4 MUL word ptr [bp-0Ch] +069 000366 F766F4 MUL word ptr [bp-0Ch] +070 000369 F766F4 MUL word ptr [bp-0Ch] +071 00036C F766F4 MUL word ptr [bp-0Ch] +072 00036F F766F4 MUL word ptr [bp-0Ch] +073 000372 F766F4 MUL word ptr [bp-0Ch] +074 000375 F766F4 MUL word ptr [bp-0Ch] +075 000378 F766F4 MUL word ptr [bp-0Ch] +076 00037B F766F4 MUL word ptr [bp-0Ch] +077 00037E F766F4 MUL word ptr [bp-0Ch] +078 000381 F766F4 MUL word ptr [bp-0Ch] +079 000384 F766F4 MUL word ptr [bp-0Ch] +080 000387 F766F4 MUL word ptr [bp-0Ch] +081 00038A F766F4 MUL word ptr [bp-0Ch] +082 00038D F766F4 MUL word ptr [bp-0Ch] +083 000390 F766F4 MUL word ptr [bp-0Ch] +084 000393 F766F4 MUL word ptr [bp-0Ch] +085 000396 F766F4 MUL word ptr [bp-0Ch] +086 000399 F766F4 MUL word ptr [bp-0Ch] +087 00039C BA0300 MOV dx, 3 +088 00039F F7E2 MUL dx +089 0003A1 8946F4 MOV [bp-0Ch], ax +090 0003A4 46 INC si +091 JMP L4 ;Synthetic inst + + main ENDP + diff --git a/tests/prev/BENCHMUL.b b/tests/prev/BENCHMUL.b new file mode 100755 index 0000000..e2f9357 --- /dev/null +++ b/tests/prev/BENCHMUL.b @@ -0,0 +1,38 @@ +/* + * Input file : ./tests/inputs/BENCHMUL.EXE + * File type : EXE + */ + +#include "dcc.h" + + +void main () +/* Takes no parameters. + * High-level language prologue code. + */ +{ +int loc1; +int loc2; +long loc3; +long loc4; +int loc5; + + printf ("enter number of iterations\n"); + scanf ("%ld", &loc0); + printf ("executing %ld iterations\n", loc4); + scanf ("%d", &loc1); + scanf ("%d", &loc2); + loc3 = 1; + + while ((loc3 <= loc4)) { + loc5 = 1; + + while ((loc5 <= 40)) { + loc1 = (((((((((((((((((((((((((loc1 * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * loc1) * 3); + loc5 = (loc5 + 1); + } /* end of while */ + loc3 = (loc3 + 1); + } /* end of while */ + printf ("a=%d\n", loc1); +} + diff --git a/tests/prev/BENCHMUS.EXE.a1 b/tests/prev/BENCHMUS.EXE.a1 new file mode 100755 index 0000000..0edcb0c --- /dev/null +++ b/tests/prev/BENCHMUS.EXE.a1 @@ -0,0 +1,91 @@ + main PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 83EC0A SUB sp, 0Ah +003 000300 56 PUSH si +004 000301 57 PUSH di +005 000302 B8A801 MOV ax, 1A8h +006 000305 50 PUSH ax +007 000306 E8240C CALL near ptr printf +008 000309 59 POP cx +009 00030A 8D46FC LEA ax, [bp-4] +010 00030D 50 PUSH ax +011 00030E B8C401 MOV ax, 1C4h +012 000311 50 PUSH ax +013 000312 E8A114 CALL near ptr scanf +014 000315 59 POP cx +015 000316 59 POP cx +016 000317 FF76FE PUSH word ptr [bp-2] +017 00031A FF76FC PUSH word ptr [bp-4] +018 00031D B8C801 MOV ax, 1C8h +019 000320 50 PUSH ax +020 000321 E8090C CALL near ptr printf +021 000324 83C406 ADD sp, 6 +022 000327 BE1400 MOV si, 14h +023 00032A 8976F6 MOV [bp-0Ah], si +024 00032D C746FA0000 MOV word ptr [bp-6], 0 +025 000332 C746F80100 MOV word ptr [bp-8], 1 +026 000337 EB4C JMP L1 + +027 000385 8B56FA L1: MOV dx, [bp-6] +028 000388 8B46F8 MOV ax, [bp-8] +029 00038B 3B56FE CMP dx, [bp-2] +030 00038E 7CA9 JL L2 +031 000390 7F05 JG L3 +032 000392 3B46FC CMP ax, [bp-4] +033 000395 76A2 JBE L2 + +034 000397 56 L3: PUSH si +035 000398 B8E201 MOV ax, 1E2h +036 00039B 50 PUSH ax +037 00039C E88E0B CALL near ptr printf +038 00039F 59 POP cx +039 0003A0 59 POP cx +040 0003A1 5F POP di +041 0003A2 5E POP si +042 0003A3 8BE5 MOV sp, bp +043 0003A5 5D POP bp +044 0003A6 C3 RET + +045 000339 BF0100 L2: MOV di, 1 +046 00033C EB3A JMP L4 + +047 000378 83FF28 L4: CMP di, 28h +048 00037B 7EC1 JLE L5 +049 00037D 8346F801 ADD word ptr [bp-8], 1 +050 000381 8356FA00 ADC word ptr [bp-6], 0 +051 JMP L1 ;Synthetic inst + +052 00033E 8BC6 L5: MOV ax, si +053 000340 F7E6 MUL si +054 000342 F7E6 MUL si +055 000344 F7E6 MUL si +056 000346 F7E6 MUL si +057 000348 F7E6 MUL si +058 00034A F7E6 MUL si +059 00034C F7E6 MUL si +060 00034E F7E6 MUL si +061 000350 F7E6 MUL si +062 000352 F7E6 MUL si +063 000354 F7E6 MUL si +064 000356 F7E6 MUL si +065 000358 F7E6 MUL si +066 00035A F7E6 MUL si +067 00035C F7E6 MUL si +068 00035E F7E6 MUL si +069 000360 F7E6 MUL si +070 000362 F7E6 MUL si +071 000364 F7E6 MUL si +072 000366 F7E6 MUL si +073 000368 F7E6 MUL si +074 00036A F7E6 MUL si +075 00036C F7E6 MUL si +076 00036E F7E6 MUL si +077 000370 BA0300 MOV dx, 3 +078 000373 F7E2 MUL dx +079 000375 8BF0 MOV si, ax +080 000377 47 INC di +081 JMP L4 ;Synthetic inst + + main ENDP + diff --git a/tests/prev/BENCHMUS.EXE.a2 b/tests/prev/BENCHMUS.EXE.a2 new file mode 100755 index 0000000..343d780 --- /dev/null +++ b/tests/prev/BENCHMUS.EXE.a2 @@ -0,0 +1,89 @@ + main PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 83EC0A SUB sp, 0Ah +003 000300 56 PUSH si +004 000301 57 PUSH di +005 000302 B8A801 MOV ax, 1A8h +006 000305 50 PUSH ax +007 000306 E8240C CALL near ptr printf +008 000309 59 POP cx +009 00030A 8D46FC LEA ax, [bp-4] +010 00030D 50 PUSH ax +011 00030E B8C401 MOV ax, 1C4h +012 000311 50 PUSH ax +013 000312 E8A114 CALL near ptr scanf +014 000315 59 POP cx +015 000316 59 POP cx +016 000317 FF76FE PUSH word ptr [bp-2] +017 00031A FF76FC PUSH word ptr [bp-4] +018 00031D B8C801 MOV ax, 1C8h +019 000320 50 PUSH ax +020 000321 E8090C CALL near ptr printf +021 000324 83C406 ADD sp, 6 +022 000327 BE1400 MOV si, 14h +023 00032A 8976F6 MOV [bp-0Ah], si +024 00032D C746FA0000 MOV word ptr [bp-6], 0 +025 000332 C746F80100 MOV word ptr [bp-8], 1 + +027 000385 8B56FA L1: MOV dx, [bp-6] +028 000388 8B46F8 MOV ax, [bp-8] +029 00038B 3B56FE CMP dx, [bp-2] +030 00038E 7CA9 JL L2 +031 000390 7F05 JG L3 +032 000392 3B46FC CMP ax, [bp-4] +033 000395 76A2 JBE L2 + +034 000397 56 L3: PUSH si +035 000398 B8E201 MOV ax, 1E2h +036 00039B 50 PUSH ax +037 00039C E88E0B CALL near ptr printf +038 00039F 59 POP cx +039 0003A0 59 POP cx +040 0003A1 5F POP di +041 0003A2 5E POP si +042 0003A3 8BE5 MOV sp, bp +043 0003A5 5D POP bp +044 0003A6 C3 RET + +045 000339 BF0100 L2: MOV di, 1 + +047 000378 83FF28 L4: CMP di, 28h +048 00037B 7EC1 JLE L5 +049 00037D 8346F801 ADD word ptr [bp-8], 1 +050 000381 8356FA00 ADC word ptr [bp-6], 0 +051 JMP L1 ;Synthetic inst + +052 00033E 8BC6 L5: MOV ax, si +053 000340 F7E6 MUL si +054 000342 F7E6 MUL si +055 000344 F7E6 MUL si +056 000346 F7E6 MUL si +057 000348 F7E6 MUL si +058 00034A F7E6 MUL si +059 00034C F7E6 MUL si +060 00034E F7E6 MUL si +061 000350 F7E6 MUL si +062 000352 F7E6 MUL si +063 000354 F7E6 MUL si +064 000356 F7E6 MUL si +065 000358 F7E6 MUL si +066 00035A F7E6 MUL si +067 00035C F7E6 MUL si +068 00035E F7E6 MUL si +069 000360 F7E6 MUL si +070 000362 F7E6 MUL si +071 000364 F7E6 MUL si +072 000366 F7E6 MUL si +073 000368 F7E6 MUL si +074 00036A F7E6 MUL si +075 00036C F7E6 MUL si +076 00036E F7E6 MUL si +077 000370 BA0300 MOV dx, 3 +078 000373 F7E2 MUL dx +079 000375 8BF0 MOV si, ax +080 000377 47 INC di +081 JMP L4 ;Synthetic inst + + main ENDP + diff --git a/tests/prev/BENCHMUS.b b/tests/prev/BENCHMUS.b new file mode 100755 index 0000000..c36ff5c --- /dev/null +++ b/tests/prev/BENCHMUS.b @@ -0,0 +1,38 @@ +/* + * Input file : ./tests/inputs/BENCHMUS.EXE + * File type : EXE + */ + +#include "dcc.h" + + +void main () +/* Takes no parameters. + * High-level language prologue code. + */ +{ +int loc1; +long loc2; +long loc3; +int loc4; +int loc5; + + printf ("enter number of iterations\n"); + scanf ("%ld", &loc0); + printf ("executing %ld iterations\n", loc3); + loc4 = 20; + loc1 = loc4; + loc2 = 1; + + while ((loc2 <= loc3)) { + loc5 = 1; + + while ((loc5 <= 40)) { + loc4 = (((((((((((((((((((((((((loc4 * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * loc4) * 3); + loc5 = (loc5 + 1); + } /* end of while */ + loc2 = (loc2 + 1); + } /* end of while */ + printf ("a=%d\n", loc4); +} + diff --git a/tests/prev/BENCHSHO.EXE.a1 b/tests/prev/BENCHSHO.EXE.a1 new file mode 100755 index 0000000..f16494f --- /dev/null +++ b/tests/prev/BENCHSHO.EXE.a1 @@ -0,0 +1,113 @@ + main PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 83EC0E SUB sp, 0Eh +003 000300 8D46FC LEA ax, [bp-4] +004 000303 50 PUSH ax +005 000304 B89401 MOV ax, 194h +006 000307 50 PUSH ax +007 000308 E8E914 CALL near ptr scanf +008 00030B 59 POP cx +009 00030C 59 POP cx +010 00030D FF76FE PUSH word ptr [bp-2] +011 000310 FF76FC PUSH word ptr [bp-4] +012 000313 B89801 MOV ax, 198h +013 000316 50 PUSH ax +014 000317 E8510C CALL near ptr printf +015 00031A 83C406 ADD sp, 6 +016 00031D 8D46F2 LEA ax, [bp-0Eh] +017 000320 50 PUSH ax +018 000321 B8B201 MOV ax, 1B2h +019 000324 50 PUSH ax +020 000325 E8CC14 CALL near ptr scanf +021 000328 59 POP cx +022 000329 59 POP cx +023 00032A 8D46F4 LEA ax, [bp-0Ch] +024 00032D 50 PUSH ax +025 00032E B8B601 MOV ax, 1B6h +026 000331 50 PUSH ax +027 000332 E8BF14 CALL near ptr scanf +028 000335 59 POP cx +029 000336 59 POP cx +030 000337 C746FA0000 MOV word ptr [bp-6], 0 +031 00033C C746F80100 MOV word ptr [bp-8], 1 +032 000341 E97900 JMP L1 + +033 0003BD 8B56FA L1: MOV dx, [bp-6] +034 0003C0 8B46F8 MOV ax, [bp-8] +035 0003C3 3B56FE CMP dx, [bp-2] +036 0003C6 7D03 JGE L2 +037 0003C8 E979FF JMP L3 + +038 000344 C746F60100 L3: MOV word ptr [bp-0Ah], 1 +039 000349 EB64 JMP L4 + +040 0003AF 837EF628 L4: CMP word ptr [bp-0Ah], 28h +041 0003B3 7E96 JLE L5 +042 0003B5 8346F801 ADD word ptr [bp-8], 1 +043 0003B9 8356FA00 ADC word ptr [bp-6], 0 +044 JMP L1 ;Synthetic inst + +045 00034B 8B46F2 L5: MOV ax, [bp-0Eh] +046 00034E 0346F4 ADD ax, [bp-0Ch] +047 000351 0346F6 ADD ax, [bp-0Ah] +048 000354 8946F2 MOV [bp-0Eh], ax +049 000357 8B46F2 MOV ax, [bp-0Eh] +050 00035A D1F8 SAR ax, 1 +051 00035C 8946F4 MOV [bp-0Ch], ax +052 00035F 8B46F4 MOV ax, [bp-0Ch] +053 000362 BB0A00 MOV bx, 0Ah +054 000365 99 CWD +056 000366 F7FB IDIV bx +058 000368 8956F2 MOV [bp-0Eh], dx +059 00036B 8B46F4 MOV ax, [bp-0Ch] +060 00036E 3B46F6 CMP ax, [bp-0Ah] +061 000371 7505 JNE L6 +062 000373 B80100 MOV ax, 1 +063 000376 EB02 JMP L7 + +064 00037A 8946F2 L7: MOV [bp-0Eh], ax +065 00037D 8B46F2 MOV ax, [bp-0Eh] +066 000380 0B46F6 OR ax, [bp-0Ah] +067 000383 8946F4 MOV [bp-0Ch], ax +068 000386 8B46F4 MOV ax, [bp-0Ch] +069 000389 F7D8 NEG ax +070 00038B 1BC0 SBB ax, ax +071 00038D 40 INC ax +072 00038E 8946F2 MOV [bp-0Eh], ax +073 000391 8B46F2 MOV ax, [bp-0Eh] +074 000394 0346F6 ADD ax, [bp-0Ah] +075 000397 8946F4 MOV [bp-0Ch], ax +076 00039A 8B46F4 MOV ax, [bp-0Ch] +077 00039D 3B46F6 CMP ax, [bp-0Ah] +078 0003A0 7E05 JLE L8 +079 0003A2 B80100 MOV ax, 1 +080 0003A5 EB02 JMP L9 + +081 0003A9 8946F2 L9: MOV [bp-0Eh], ax +082 0003AC FF46F6 INC word ptr [bp-0Ah] +083 JMP L4 ;Synthetic inst + +084 0003A7 33C0 L8: XOR ax, ax +085 JMP L9 ;Synthetic inst + +086 000378 33C0 L6: XOR ax, ax +087 JMP L7 ;Synthetic inst + +088 0003CB 7F08 L2: JG L10 +089 0003CD 3B46FC CMP ax, [bp-4] +090 0003D0 7703 JA L10 +091 0003D2 E96FFF JMP L3 + +092 0003D5 FF76F2 L10: PUSH word ptr [bp-0Eh] +093 0003D8 B8BA01 MOV ax, 1BAh +094 0003DB 50 PUSH ax +095 0003DC E88C0B CALL near ptr printf +096 0003DF 59 POP cx +097 0003E0 59 POP cx +098 0003E1 8BE5 MOV sp, bp +099 0003E3 5D POP bp +100 0003E4 C3 RET + + main ENDP + diff --git a/tests/prev/BENCHSHO.EXE.a2 b/tests/prev/BENCHSHO.EXE.a2 new file mode 100755 index 0000000..c2b9e42 --- /dev/null +++ b/tests/prev/BENCHSHO.EXE.a2 @@ -0,0 +1,109 @@ + main PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 83EC0E SUB sp, 0Eh +003 000300 8D46FC LEA ax, [bp-4] +004 000303 50 PUSH ax +005 000304 B89401 MOV ax, 194h +006 000307 50 PUSH ax +007 000308 E8E914 CALL near ptr scanf +008 00030B 59 POP cx +009 00030C 59 POP cx +010 00030D FF76FE PUSH word ptr [bp-2] +011 000310 FF76FC PUSH word ptr [bp-4] +012 000313 B89801 MOV ax, 198h +013 000316 50 PUSH ax +014 000317 E8510C CALL near ptr printf +015 00031A 83C406 ADD sp, 6 +016 00031D 8D46F2 LEA ax, [bp-0Eh] +017 000320 50 PUSH ax +018 000321 B8B201 MOV ax, 1B2h +019 000324 50 PUSH ax +020 000325 E8CC14 CALL near ptr scanf +021 000328 59 POP cx +022 000329 59 POP cx +023 00032A 8D46F4 LEA ax, [bp-0Ch] +024 00032D 50 PUSH ax +025 00032E B8B601 MOV ax, 1B6h +026 000331 50 PUSH ax +027 000332 E8BF14 CALL near ptr scanf +028 000335 59 POP cx +029 000336 59 POP cx +030 000337 C746FA0000 MOV word ptr [bp-6], 0 +031 00033C C746F80100 MOV word ptr [bp-8], 1 + +033 0003BD 8B56FA L1: MOV dx, [bp-6] +034 0003C0 8B46F8 MOV ax, [bp-8] +035 0003C3 3B56FE CMP dx, [bp-2] +036 0003C6 7D03 JGE L2 + +038 000344 C746F60100 L3: MOV word ptr [bp-0Ah], 1 + +040 0003AF 837EF628 L4: CMP word ptr [bp-0Ah], 28h +041 0003B3 7E96 JLE L5 +042 0003B5 8346F801 ADD word ptr [bp-8], 1 +043 0003B9 8356FA00 ADC word ptr [bp-6], 0 +044 JMP L1 ;Synthetic inst + +045 00034B 8B46F2 L5: MOV ax, [bp-0Eh] +046 00034E 0346F4 ADD ax, [bp-0Ch] +047 000351 0346F6 ADD ax, [bp-0Ah] +048 000354 8946F2 MOV [bp-0Eh], ax +049 000357 8B46F2 MOV ax, [bp-0Eh] +050 00035A D1F8 SAR ax, 1 +051 00035C 8946F4 MOV [bp-0Ch], ax +052 00035F 8B46F4 MOV ax, [bp-0Ch] +053 000362 BB0A00 MOV bx, 0Ah +054 000365 99 CWD +055 MOV tmp, dx:ax ;Synthetic inst +056 000366 F7FB IDIV bx +057 MOD bx ;Synthetic inst +058 000368 8956F2 MOV [bp-0Eh], dx +059 00036B 8B46F4 MOV ax, [bp-0Ch] +060 00036E 3B46F6 CMP ax, [bp-0Ah] +061 000371 7505 JNE L6 +062 000373 B80100 MOV ax, 1 + +064 00037A 8946F2 L7: MOV [bp-0Eh], ax +065 00037D 8B46F2 MOV ax, [bp-0Eh] +066 000380 0B46F6 OR ax, [bp-0Ah] +067 000383 8946F4 MOV [bp-0Ch], ax +068 000386 8B46F4 MOV ax, [bp-0Ch] +069 000389 F7D8 NEG ax +070 00038B 1BC0 SBB ax, ax +071 00038D 40 INC ax +072 00038E 8946F2 MOV [bp-0Eh], ax +073 000391 8B46F2 MOV ax, [bp-0Eh] +074 000394 0346F6 ADD ax, [bp-0Ah] +075 000397 8946F4 MOV [bp-0Ch], ax +076 00039A 8B46F4 MOV ax, [bp-0Ch] +077 00039D 3B46F6 CMP ax, [bp-0Ah] +078 0003A0 7E05 JLE L8 +079 0003A2 B80100 MOV ax, 1 + +081 0003A9 8946F2 L9: MOV [bp-0Eh], ax +082 0003AC FF46F6 INC word ptr [bp-0Ah] +083 JMP L4 ;Synthetic inst + +084 0003A7 33C0 L8: XOR ax, ax +085 JMP L9 ;Synthetic inst + +086 000378 33C0 L6: XOR ax, ax +087 JMP L7 ;Synthetic inst + +088 0003CB 7F08 L2: JG L10 +089 0003CD 3B46FC CMP ax, [bp-4] +090 0003D0 7703 JA L10 + +092 0003D5 FF76F2 L10: PUSH word ptr [bp-0Eh] +093 0003D8 B8BA01 MOV ax, 1BAh +094 0003DB 50 PUSH ax +095 0003DC E88C0B CALL near ptr printf +096 0003DF 59 POP cx +097 0003E0 59 POP cx +098 0003E1 8BE5 MOV sp, bp +099 0003E3 5D POP bp +100 0003E4 C3 RET + + main ENDP + diff --git a/tests/prev/BENCHSHO.b b/tests/prev/BENCHSHO.b new file mode 100755 index 0000000..7786608 --- /dev/null +++ b/tests/prev/BENCHSHO.b @@ -0,0 +1,59 @@ +/* + * Input file : ./tests/inputs/BENCHSHO.EXE + * File type : EXE + */ + +#include "dcc.h" + + +void main () +/* Takes no parameters. + * High-level language prologue code. + */ +{ +int loc1; +int loc2; +int loc3; +long loc4; +long loc5; +int loc6; /* ax */ + + scanf ("%ld", &loc0); + printf ("executing %ld iterations\n", loc5); + scanf ("%ld", &loc1); + scanf ("%ld", &loc2); + loc4 = 1; + + while ((loc4 <= loc5)) { + loc3 = 1; + + while ((loc3 <= 40)) { + loc1 = ((loc1 + loc2) + loc3); + loc2 = (loc1 >> 1); + loc1 = (loc2 % 10); + + if (loc2 == loc3) { + loc6 = 1; + } + else { + loc6 = 0; + } + loc1 = loc6; + loc2 = (loc1 | loc3); + loc1 = !loc2; + loc2 = (loc1 + loc3); + + if (loc2 > loc3) { + loc6 = 1; + } + else { + loc6 = 0; + } + loc1 = loc6; + loc3 = (loc3 + 1); + } /* end of while */ + loc4 = (loc4 + 1); + } /* end of while */ + printf ("a=%d\n", loc1); +} + diff --git a/tests/prev/BYTEOPS.EXE.a1 b/tests/prev/BYTEOPS.EXE.a1 new file mode 100755 index 0000000..1ee2f72 --- /dev/null +++ b/tests/prev/BYTEOPS.EXE.a1 @@ -0,0 +1,58 @@ + main PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 83EC02 SUB sp, 2 +003 000300 C646FEFF MOV byte ptr [bp-2], 0FFh +004 000304 C646FF8F MOV byte ptr [bp-1], 8Fh +005 000308 8A46FE MOV al, [bp-2] +006 00030B 0246FF ADD al, [bp-1] +007 00030E 8846FF MOV [bp-1], al +008 000311 8A46FE MOV al, [bp-2] +009 000314 2A46FF SUB al, [bp-1] +010 000317 8846FE MOV [bp-2], al +011 00031A 8A46FE MOV al, [bp-2] +012 00031D B400 MOV ah, 0 +013 00031F 8A56FF MOV dl, [bp-1] +014 000322 B600 MOV dh, 0 +015 000324 F7E2 MUL dx +016 000326 8846FE MOV [bp-2], al +017 000329 8A46FF MOV al, [bp-1] +018 00032C B400 MOV ah, 0 +019 00032E 8A56FE MOV dl, [bp-2] +020 000331 B600 MOV dh, 0 +021 000333 8BDA MOV bx, dx +022 000335 99 CWD +024 000336 F7FB IDIV bx +026 000338 8846FF MOV [bp-1], al +027 00033B 8A46FF MOV al, [bp-1] +028 00033E B400 MOV ah, 0 +029 000340 8A56FE MOV dl, [bp-2] +030 000343 B600 MOV dh, 0 +031 000345 8BDA MOV bx, dx +032 000347 99 CWD +034 000348 F7FB IDIV bx +036 00034A 8856FF MOV [bp-1], dl +037 00034D 8A46FE MOV al, [bp-2] +038 000350 B105 MOV cl, 5 +039 000352 D2E0 SHL al, cl +040 000354 8846FE MOV [bp-2], al +041 000357 8A46FF MOV al, [bp-1] +042 00035A 8A4EFE MOV cl, [bp-2] +043 00035D D2E8 SHR al, cl +044 00035F 8846FF MOV [bp-1], al +045 000362 8A46FF MOV al, [bp-1] +046 000365 B400 MOV ah, 0 +047 000367 50 PUSH ax +048 000368 8A46FE MOV al, [bp-2] +049 00036B B400 MOV ah, 0 +050 00036D 50 PUSH ax +051 00036E B89401 MOV ax, 194h +052 000371 50 PUSH ax +053 000372 E8AB06 CALL near ptr printf +054 000375 83C406 ADD sp, 6 +055 000378 8BE5 MOV sp, bp +056 00037A 5D POP bp +057 00037B C3 RET + + main ENDP + diff --git a/tests/prev/BYTEOPS.EXE.a2 b/tests/prev/BYTEOPS.EXE.a2 new file mode 100755 index 0000000..4967d4f --- /dev/null +++ b/tests/prev/BYTEOPS.EXE.a2 @@ -0,0 +1,62 @@ + main PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 83EC02 SUB sp, 2 +003 000300 C646FEFF MOV byte ptr [bp-2], 0FFh +004 000304 C646FF8F MOV byte ptr [bp-1], 8Fh +005 000308 8A46FE MOV al, [bp-2] +006 00030B 0246FF ADD al, [bp-1] +007 00030E 8846FF MOV [bp-1], al +008 000311 8A46FE MOV al, [bp-2] +009 000314 2A46FF SUB al, [bp-1] +010 000317 8846FE MOV [bp-2], al +011 00031A 8A46FE MOV al, [bp-2] +012 00031D B400 MOV ah, 0 +013 00031F 8A56FF MOV dl, [bp-1] +014 000322 B600 MOV dh, 0 +015 000324 F7E2 MUL dx +016 000326 8846FE MOV [bp-2], al +017 000329 8A46FF MOV al, [bp-1] +018 00032C B400 MOV ah, 0 +019 00032E 8A56FE MOV dl, [bp-2] +020 000331 B600 MOV dh, 0 +021 000333 8BDA MOV bx, dx +022 000335 99 CWD +023 MOV tmp, dx:ax ;Synthetic inst +024 000336 F7FB IDIV bx +025 MOD bx ;Synthetic inst +026 000338 8846FF MOV [bp-1], al +027 00033B 8A46FF MOV al, [bp-1] +028 00033E B400 MOV ah, 0 +029 000340 8A56FE MOV dl, [bp-2] +030 000343 B600 MOV dh, 0 +031 000345 8BDA MOV bx, dx +032 000347 99 CWD +033 MOV tmp, dx:ax ;Synthetic inst +034 000348 F7FB IDIV bx +035 MOD bx ;Synthetic inst +036 00034A 8856FF MOV [bp-1], dl +037 00034D 8A46FE MOV al, [bp-2] +038 000350 B105 MOV cl, 5 +039 000352 D2E0 SHL al, cl +040 000354 8846FE MOV [bp-2], al +041 000357 8A46FF MOV al, [bp-1] +042 00035A 8A4EFE MOV cl, [bp-2] +043 00035D D2E8 SHR al, cl +044 00035F 8846FF MOV [bp-1], al +045 000362 8A46FF MOV al, [bp-1] +046 000365 B400 MOV ah, 0 +047 000367 50 PUSH ax +048 000368 8A46FE MOV al, [bp-2] +049 00036B B400 MOV ah, 0 +050 00036D 50 PUSH ax +051 00036E B89401 MOV ax, 194h +052 000371 50 PUSH ax +053 000372 E8AB06 CALL near ptr printf +054 000375 83C406 ADD sp, 6 +055 000378 8BE5 MOV sp, bp +056 00037A 5D POP bp +057 00037B C3 RET + + main ENDP + diff --git a/tests/prev/BYTEOPS.b b/tests/prev/BYTEOPS.b new file mode 100755 index 0000000..219e8cd --- /dev/null +++ b/tests/prev/BYTEOPS.b @@ -0,0 +1,28 @@ +/* + * Input file : ./tests/inputs/BYTEOPS.EXE + * File type : EXE + */ + +#include "dcc.h" + + +void main () +/* Takes no parameters. + * High-level language prologue code. + */ +{ +int loc1; +int loc2; + + loc1 = 255; + loc2 = 143; + loc2 = (loc1 + loc2); + loc1 = (loc1 - loc2); + loc1 = (loc1 * loc2); + loc2 = (loc2 / loc1); + loc2 = (loc2 % loc1); + loc1 = (loc1 << 5); + loc2 = (loc2 >> loc1); + printf ("a = %d, b = %d\n", loc1, loc2); +} + diff --git a/tests/prev/FIBOS.EXE.a1 b/tests/prev/FIBOS.EXE.a1 new file mode 100755 index 0000000..471fed6 --- /dev/null +++ b/tests/prev/FIBOS.EXE.a1 @@ -0,0 +1,90 @@ + main PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 83EC04 SUB sp, 4 +003 000300 56 PUSH si +004 000301 57 PUSH di +005 000302 B89401 MOV ax, 194h +006 000305 50 PUSH ax +007 000306 E8080C CALL near ptr printf +008 000309 59 POP cx +009 00030A 8D46FC LEA ax, [bp-4] +010 00030D 50 PUSH ax +011 00030E B8B101 MOV ax, 1B1h +012 000311 50 PUSH ax +013 000312 E88514 CALL near ptr scanf +014 000315 59 POP cx +015 000316 59 POP cx +016 000317 BE0100 MOV si, 1 +017 00031A EB2D JMP L1 + +018 000349 3B76FC L1: CMP si, [bp-4] +019 00034C 7ECE JLE L2 +020 00034E 33C0 XOR ax, ax +021 000350 50 PUSH ax +022 000351 E87300 CALL near ptr exit +023 000354 59 POP cx +024 000355 5F POP di +025 000356 5E POP si +026 000357 8BE5 MOV sp, bp +027 000359 5D POP bp +028 00035A C3 RET + +029 00031C B8B401 L2: MOV ax, 1B4h +030 00031F 50 PUSH ax +031 000320 E8EE0B CALL near ptr printf +032 000323 59 POP cx +033 000324 8D46FE LEA ax, [bp-2] +034 000327 50 PUSH ax +035 000328 B8C301 MOV ax, 1C3h +036 00032B 50 PUSH ax +037 00032C E86B14 CALL near ptr scanf +038 00032F 59 POP cx +039 000330 59 POP cx +040 000331 FF76FE PUSH word ptr [bp-2] +041 000334 E82400 CALL near ptr proc_1 +042 000337 59 POP cx +043 000338 8BF8 MOV di, ax +044 00033A 57 PUSH di +045 00033B FF76FE PUSH word ptr [bp-2] +046 00033E B8C601 MOV ax, 1C6h +047 000341 50 PUSH ax +048 000342 E8CC0B CALL near ptr printf +049 000345 83C406 ADD sp, 6 +050 000348 46 INC si +051 JMP L1 ;Synthetic inst + + main ENDP + + proc_1 PROC NEAR +000 00035B 55 PUSH bp +001 00035C 8BEC MOV bp, sp +002 00035E 56 PUSH si +003 00035F 8B7604 MOV si, [bp+4] +004 000362 83FE02 CMP si, 2 +005 000365 7E1C JLE L3 +006 000367 8BC6 MOV ax, si +007 000369 48 DEC ax +008 00036A 50 PUSH ax +009 00036B E8EDFF CALL near ptr proc_1 +010 00036E 59 POP cx +011 00036F 50 PUSH ax +012 000370 8BC6 MOV ax, si +013 000372 05FEFF ADD ax, 0FFFEh +014 000375 50 PUSH ax +015 000376 E8E2FF CALL near ptr proc_1 +016 000379 59 POP cx +017 00037A 8BD0 MOV dx, ax +018 00037C 58 POP ax +019 00037D 03C2 ADD ax, dx +020 00037F EB07 JMP L4 + +021 000388 5E L4: POP si +022 000389 5D POP bp +023 00038A C3 RET + +024 000383 B80100 L3: MOV ax, 1 +025 000386 EB00 JMP L4 + + proc_1 ENDP + diff --git a/tests/prev/FIBOS.EXE.a2 b/tests/prev/FIBOS.EXE.a2 new file mode 100755 index 0000000..433c14d --- /dev/null +++ b/tests/prev/FIBOS.EXE.a2 @@ -0,0 +1,88 @@ + proc_1 PROC NEAR +000 00035B 55 PUSH bp +001 00035C 8BEC MOV bp, sp +002 00035E 56 PUSH si +003 00035F 8B7604 MOV si, [bp+4] +004 000362 83FE02 CMP si, 2 +005 000365 7E1C JLE L1 +006 000367 8BC6 MOV ax, si +007 000369 48 DEC ax +008 00036A 50 PUSH ax +009 00036B E8EDFF CALL near ptr proc_1 +010 00036E 59 POP cx +011 00036F 50 PUSH ax +012 000370 8BC6 MOV ax, si +013 000372 05FEFF ADD ax, 0FFFEh +014 000375 50 PUSH ax +015 000376 E8E2FF CALL near ptr proc_1 +016 000379 59 POP cx +017 00037A 8BD0 MOV dx, ax +018 00037C 58 POP ax +019 00037D 03C2 ADD ax, dx + +021 000388 5E L2: POP si +022 000389 5D POP bp +023 00038A C3 RET + +024 000383 B80100 L1: MOV ax, 1 +025 000386 EB00 JMP L2 + + proc_1 ENDP + + main PROC NEAR +000 0002FA 55 PUSH bp +001 0002FB 8BEC MOV bp, sp +002 0002FD 83EC04 SUB sp, 4 +003 000300 56 PUSH si +004 000301 57 PUSH di +005 000302 B89401 MOV ax, 194h +006 000305 50 PUSH ax +007 000306 E8080C CALL near ptr printf +008 000309 59 POP cx +009 00030A 8D46FC LEA ax, [bp-4] +010 00030D 50 PUSH ax +011 00030E B8B101 MOV ax, 1B1h +012 000311 50 PUSH ax +013 000312 E88514 CALL near ptr scanf +014 000315 59 POP cx +015 000316 59 POP cx +016 000317 BE0100 MOV si, 1 + +018 000349 3B76FC L3: CMP si, [bp-4] +019 00034C 7ECE JLE L4 +020 00034E 33C0 XOR ax, ax +021 000350 50 PUSH ax +022 000351 E87300 CALL near ptr exit +023 000354 59 POP cx +024 000355 5F POP di +025 000356 5E POP si +026 000357 8BE5 MOV sp, bp +027 000359 5D POP bp +028 00035A C3 RET + +029 00031C B8B401 L4: MOV ax, 1B4h +030 00031F 50 PUSH ax +031 000320 E8EE0B CALL near ptr printf +032 000323 59 POP cx +033 000324 8D46FE LEA ax, [bp-2] +034 000327 50 PUSH ax +035 000328 B8C301 MOV ax, 1C3h +036 00032B 50 PUSH ax +037 00032C E86B14 CALL near ptr scanf +038 00032F 59 POP cx +039 000330 59 POP cx +040 000331 FF76FE PUSH word ptr [bp-2] +041 000334 E82400 CALL near ptr proc_1 +042 000337 59 POP cx +043 000338 8BF8 MOV di, ax +044 00033A 57 PUSH di +045 00033B FF76FE PUSH word ptr [bp-2] +046 00033E B8C601 MOV ax, 1C6h +047 000341 50 PUSH ax +048 000342 E8CC0B CALL near ptr printf +049 000345 83C406 ADD sp, 6 +050 000348 46 INC si +051 JMP L3 ;Synthetic inst + + main ENDP + diff --git a/tests/prev/FIBOS.b b/tests/prev/FIBOS.b new file mode 100755 index 0000000..aec76cf --- /dev/null +++ b/tests/prev/FIBOS.b @@ -0,0 +1,53 @@ +/* + * Input file : ./tests/inputs/FIBOS.EXE + * File type : EXE + */ + +#include "dcc.h" + + +int proc_1 (int arg0) +/* Takes 2 bytes of parameters. + * High-level language prologue code. + * C calling convention. + */ +{ +int loc1; +int loc2; /* ax */ + + loc1 = arg0; + + if (loc1 > 2) { + loc2 = (proc_1 ((loc1 - 1)) + proc_1 ((loc1 + 0xfffe))); + } + else { + loc2 = 1; + } + return (loc2); +} + + +void main () +/* Takes no parameters. + * High-level language prologue code. + */ +{ +int loc1; +int loc2; +int loc3; +int loc4; + + printf ("Input number of iterations: "); + scanf ("%d", &loc1); + loc3 = 1; + + while ((loc3 <= loc1)) { + printf ("Input number: "); + scanf ("%d", &loc2); + loc4 = proc_1 (loc2); + printf ("fibonacci(%d) = %u\n", loc2, loc4); + loc3 = (loc3 + 1); + } /* end of while */ + exit (0); +} + diff --git a/tests/prev/MIN.EXE.a1 b/tests/prev/MIN.EXE.a1 new file mode 100755 index 0000000..26e919c --- /dev/null +++ b/tests/prev/MIN.EXE.a1 @@ -0,0 +1,39 @@ + PROC NEAR +000 000100 55 PUSH bp +001 000101 8BEC MOV bp, sp +002 000103 83EC02 SUB sp, 2 +003 000106 C746FE0000 MOV word ptr [bp-2], 0 +004 00010B 8B46FE MOV ax, [bp-2] +005 00010E 3D0600 CMP ax, 6 +006 000111 7735 JA L1 +007 000113 8BD8 MOV bx, ax +008 000115 D1E3 SHL bx, 1 +009 000117 2EFFA71C00 JMP word ptr cs:[bx+1Ch] ;Switch + +010 00012A B80200 MOV ax, 2 ;Case l0 +011 00012D EB1E JMP L2 + +012 00014D 8BE5 L2: MOV sp, bp +013 00014F 5D POP bp +014 000150 C3 RET + +015 00012F B80300 MOV ax, 3 ;Case l1 +016 000132 EB19 JMP L2 + +017 000134 B80700 MOV ax, 7 ;Case l2 +018 000137 EB14 JMP L2 + +019 000139 B80D00 MOV ax, 0Dh ;Case l3 +020 00013C EB0F JMP L2 + +021 000148 B82C00 L1: MOV ax, 2Ch ;Case l4 +022 00014B EB00 JMP L2 + +023 00013E B81700 MOV ax, 17h ;Case l5 +024 000141 EB0A JMP L2 + +025 000143 B80D00 MOV ax, 0Dh ;Case l6 +026 000146 EB05 JMP L2 + + ENDP + diff --git a/tests/prev/MIN.EXE.a2 b/tests/prev/MIN.EXE.a2 new file mode 100755 index 0000000..ba41ff9 --- /dev/null +++ b/tests/prev/MIN.EXE.a2 @@ -0,0 +1,38 @@ + PROC NEAR +000 000100 55 PUSH bp +001 000101 8BEC MOV bp, sp +002 000103 83EC02 SUB sp, 2 +003 000106 C746FE0000 MOV word ptr [bp-2], 0 +004 00010B 8B46FE MOV ax, [bp-2] +005 00010E 3D0600 CMP ax, 6 +006 000111 7735 JA L1 +007 000113 8BD8 MOV bx, ax +008 000115 D1E3 SHL bx, 1 +009 000117 2EFFA71C00 JMP word ptr cs:[bx+1Ch] ;Switch + +010 00012A B80200 MOV ax, 2 ;Case l0 + +012 00014D 8BE5 L2: MOV sp, bp +013 00014F 5D POP bp +014 000150 C3 RET + +015 00012F B80300 MOV ax, 3 ;Case l1 +016 000132 EB19 JMP L2 + +017 000134 B80700 MOV ax, 7 ;Case l2 +018 000137 EB14 JMP L2 + +019 000139 B80D00 MOV ax, 0Dh ;Case l3 +020 00013C EB0F JMP L2 + +021 000148 B82C00 L1: MOV ax, 2Ch ;Case l4 +022 00014B EB00 JMP L2 + +023 00013E B81700 MOV ax, 17h ;Case l5 +024 000141 EB0A JMP L2 + +025 000143 B80D00 MOV ax, 0Dh ;Case l6 +026 000146 EB05 JMP L2 + + ENDP + diff --git a/tests/prev/MIN.b b/tests/prev/MIN.b new file mode 100755 index 0000000..f140e3b --- /dev/null +++ b/tests/prev/MIN.b @@ -0,0 +1,27 @@ +/* + * Input file : ./tests/inputs/MIN.EXE + * File type : EXE + */ + +#include "dcc.h" + + +void () +/* Takes no parameters. + * High-level language prologue code. + */ +{ +int loc1; +int loc2; /* ax */ +int loc3; /* bx */ + + loc1 = 0; + loc2 = loc1; + + if (loc2 <= 6) { + loc3 = loc2; + } + else { + } +} +