This commit is contained in:
Artur K
2011-11-24 20:19:41 +01:00
commit 4c249fe5c4
105 changed files with 21153 additions and 0 deletions

38
src/BasicBlock.cpp Normal file
View File

@@ -0,0 +1,38 @@
#include "BasicBlock.h"
#include "Procedure.h"
#include "dcc.h"
BB *BB::Create(void *ctx, const std::string &s, Function *parent, BB *insertBefore)
{
return new BB;
}
BB *BB::Create(Int start, Int ip, byte nodeType, Int numOutEdges, Function *parent)
{
parent->cfg;
BB* pnewBB;
pnewBB = new BB;
pnewBB->nodeType = nodeType; /* Initialise */
pnewBB->start = start;
pnewBB->length = ip - start + 1;
pnewBB->numOutEdges = (byte)numOutEdges;
pnewBB->immedDom = NO_DOM;
pnewBB->loopHead = pnewBB->caseHead = pnewBB->caseTail =
pnewBB->latchNode= pnewBB->loopFollow = NO_NODE;
if (numOutEdges)
pnewBB->edges.resize(numOutEdges);
/* Mark the basic block to which the icodes belong to, but only for
* real code basic blocks (ie. not interval bbs) */
if(parent)
{
if (start >= 0)
parent->Icode.SetInBB(start, ip, pnewBB);
parent->heldBBs.push_back(pnewBB);
parent->cfg.push_back(pnewBB);
}
if (start != -1) /* Only for code BB's */
stats.numBBbef++;
return pnewBB;
}

27
src/CMakeLists.txt Normal file
View File

@@ -0,0 +1,27 @@
SET(SOURCES ast.cpp backend.cpp bundle.cpp chklib.cpp
comwrite.cpp control.cpp dataflow.cpp dcc.cpp
disassem.cpp error.cpp fixwild.cpp frontend.cpp
graph.cpp hlicode.cpp icode.cpp
idioms.cpp locident.cpp parser.cpp
perfhlib.cpp procs.cpp proplong.cpp reducible.cpp
scanner.cpp symtab.cpp udm.cpp)
SET(dc_INCLUDES
${PROJECT_SOURCE_DIR}/include/ast.h
${PROJECT_SOURCE_DIR}/include/bundle.h
${PROJECT_SOURCE_DIR}/include/dcc.h
${PROJECT_SOURCE_DIR}/include/disassem.h
${PROJECT_SOURCE_DIR}/include/dosdcc.h
${PROJECT_SOURCE_DIR}/include/error.h
${PROJECT_SOURCE_DIR}/include/graph.h
${PROJECT_SOURCE_DIR}/include/hlicode.h
${PROJECT_SOURCE_DIR}/include/icode.h
${PROJECT_SOURCE_DIR}/include/locident.h
${PROJECT_SOURCE_DIR}/include/perfhlib.h
${PROJECT_SOURCE_DIR}/include/scanner.h
${PROJECT_SOURCE_DIR}/include/state.h
${PROJECT_SOURCE_DIR}/include/symtab.h
${PROJECT_SOURCE_DIR}/include/types.h
)
SOURCE_GROUP(Source FILES ${SOURCES})
SOURCE_GROUP(Headers FILES ${dc_INCLUDES})
ADD_EXECUTABLE(dcc_oo ${SOURCES} ${dc_INCLUDES})

995
src/ast.cpp Normal file
View File

@@ -0,0 +1,995 @@
/*
* File: ast.c
* Purpose: Support module for abstract syntax trees.
* Date: September 1993
* (C) Cristina Cifuentes
*/
#include <stdint.h>
#include <malloc.h> /* For free() */
#include <string>
#include <sstream>
#include <iostream>
#include <cassert>
#include "types.h"
#include "dcc.h"
using namespace std;
/* Index registers **** temp solution */
static const char *idxReg[8] = {"bx+si", "bx+di", "bp+si", "bp+di",
"si", "di", "bp", "bx" };
/* Conditional operator symbols in C. Index by condOp enumeration type */
static const char *condOpSym[] = { " <= ", " < ", " == ", " != ", " > ", " >= ",
" & ", " | ", " ^ ", " ~ ",
" + ", " - ", " * ", " / ",
" >> ", " << ", " % ", " && ", " || " };
#define EXP_SIZE 200 /* Size of the expression buffer */
/* Local expression stack */
//typedef struct _EXP_STK {
// COND_EXPR *exp;
// struct _EXP_STK *next;
//} EXP_STK;
typedef std::list<COND_EXPR *> EXP_STK;
static EXP_STK expStk; /* local expression stack */
/* Returns the integer i in C hexadecimal format */
static char *hexStr (uint16_t i)
{
static char buf[10];
// i &= 0xFFFF;
sprintf (buf, "%s%x", (i > 9) ? "0x" : "", i);
return (buf);
}
/* Sets the du record for registers according to the du flag */
void ICODE::setRegDU (byte regi, operDu du_in)
{
// printf("%s %d %x\n",__FUNCTION__,regi,int(du_in));
switch (du_in)
{
case eDEF:
du.def |= duReg[regi];
du1.numRegsDef++;
printf("%s du.def |= %x\n",__FUNCTION__,duReg[regi]);
break;
case eUSE:
du.use |= duReg[regi];
printf("%s du.use |= %x\n",__FUNCTION__,duReg[regi]);
break;
case USE_DEF:
du.def |= duReg[regi];
du1.numRegsDef++;
printf("%s du.def |= %x\n",__FUNCTION__,duReg[regi]);
printf("%s du.use |= %x\n",__FUNCTION__,duReg[regi]);
du.use |= duReg[regi];
break;
case NONE: /* do nothing */
break;
}
}
/* Copies the def, use, or def and use fields of duIcode into pIcode */
void copyDU (ICODE *pIcode, const ICODE *duIcode, operDu du, operDu duDu)
{
// printf("%s %d,%d from %d to %d\n",__FUNCTION__,int(du),int(duDu),duIcode->ic.ll.opcode,pIcode->ic.ll.opcode);
switch (du) {
case eDEF:
if (duDu == eDEF)
pIcode->du.def=duIcode->du.def;
else
pIcode->du.def=duIcode->du.use;
break;
case eUSE:
if (duDu == eDEF)
pIcode->du.use=duIcode->du.def;
else
pIcode->du.use =duIcode->du.use;
break;
case USE_DEF:
pIcode->du = duIcode->du;
break;
case NONE:
assert(false);
break;
}
printf("%s end: %x,%x\n",__FUNCTION__,pIcode->du.def,pIcode->du.use);
}
/* Creates a newExp conditional expression node of type t and returns it */
static COND_EXPR *newCondExp (condNodeType t)
{
//printf("%s:%d\n",__FUNCTION__,int(t));
COND_EXPR *newExp;
newExp = new COND_EXPR;
//memset(newExp, 0, sizeof(COND_EXPR));
newExp->type = t;
return (newExp);
}
/* Creates a conditional boolean expression and returns it */
COND_EXPR *COND_EXPR::boolOp(COND_EXPR *lhs, COND_EXPR *rhs, condOp op)
{
//printf("%s:%d\n",__FUNCTION__,int(op));
COND_EXPR *newExp;
newExp = newCondExp (BOOLEAN_OP);
newExp->expr.boolExpr.op = op;
newExp->expr.boolExpr.lhs = lhs;
newExp->expr.boolExpr.rhs = rhs;
return (newExp);
}
/* Returns a unary conditional expression node. This procedure should
* only be used with the following conditional node types: NEGATION,
* ADDRESSOF, DEREFERENCE, POST_INC, POST_DEC, PRE_INC, PRE_DEC */
COND_EXPR *COND_EXPR::unary(condNodeType t, COND_EXPR *sub_expr)
{
COND_EXPR *newExp;
newExp = newCondExp (t);
newExp->expr.unaryExp = sub_expr;
return (newExp);
}
/* Returns an identifier conditional expression node of type GLOB_VAR */
COND_EXPR *COND_EXPR::idGlob (int16 segValue, int16 off)
{
COND_EXPR *newExp;
dword adr;
Int i;
newExp = newCondExp (IDENTIFIER);
newExp->expr.ident.idType = GLOB_VAR;
adr = opAdr(segValue, off);
for (i = 0; i < symtab.csym; i++)
if (symtab.sym[i].label == adr)
break;
if (i == symtab.csym)
printf ("Error, glob var not found in symtab\n");
newExp->expr.ident.idNode.globIdx = i;
return (newExp);
}
/* Returns an identifier conditional expression node of type REGISTER */
COND_EXPR *COND_EXPR::idReg(byte regi, flags32 icodeFlg, LOCAL_ID *locsym)
{
COND_EXPR *newExp;
newExp = newCondExp (IDENTIFIER);
newExp->expr.ident.idType = REGISTER;
if ((icodeFlg & B) || (icodeFlg & SRC_B))
{
newExp->expr.ident.idNode.regiIdx = locsym->newByteWordReg(TYPE_BYTE_SIGN, regi);
newExp->expr.ident.regiType = BYTE_REG;
}
else /* word */
{
newExp->expr.ident.idNode.regiIdx = locsym->newByteWordReg( TYPE_WORD_SIGN, regi);
newExp->expr.ident.regiType = WORD_REG;
}
return (newExp);
}
/* Returns an identifier conditional expression node of type REGISTER */
COND_EXPR *COND_EXPR::idRegIdx(Int idx, regType reg_type)
{
COND_EXPR *newExp;
newExp = newCondExp (IDENTIFIER);
newExp->expr.ident.idType = REGISTER;
newExp->expr.ident.regiType = reg_type;
newExp->expr.ident.idNode.regiIdx = idx;
return (newExp);
}
/* Returns an identifier conditional expression node of type LOCAL_VAR */
COND_EXPR *COND_EXPR::idLoc(Int off, LOCAL_ID *localId)
{
COND_EXPR *newExp;
size_t i;
newExp = newCondExp (IDENTIFIER);
newExp->expr.ident.idType = LOCAL_VAR;
for (i = 0; i < localId->csym(); i++)
if ((localId->id_arr[i].id.bwId.off == off) &&
(localId->id_arr[i].id.bwId.regOff == 0))
break;
if (i == localId->csym())
printf ("Error, cannot find local var\n");
newExp->expr.ident.idNode.localIdx = i;
sprintf (localId->id_arr[i].name, "loc%ld", i);
return (newExp);
}
/* Returns an identifier conditional expression node of type PARAM */
COND_EXPR *COND_EXPR::idParam(Int off, const STKFRAME * argSymtab)
{
COND_EXPR *newExp;
size_t i;
newExp = newCondExp (IDENTIFIER);
newExp->expr.ident.idType = PARAM;
for (i = 0; i < argSymtab->sym.size(); i++)
if (argSymtab->sym[i].off == off)
break;
if (i == argSymtab->sym.size()) printf ("Error, cannot find argument var\n");
newExp->expr.ident.idNode.localIdx = i;
return (newExp);
}
/* Returns an identifier conditional expression node of type GLOB_VAR_IDX.
* This global variable is indexed by regi. */
COND_EXPR *idCondExpIdxGlob (int16 segValue, int16 off, byte regi, const LOCAL_ID *locSym)
{
COND_EXPR *newExp;
size_t i;
newExp = newCondExp (IDENTIFIER);
newExp->expr.ident.idType = GLOB_VAR_IDX;
for (i = 0; i < locSym->csym(); i++)
if ((locSym->id_arr[i].id.bwGlb.seg == segValue) &&
(locSym->id_arr[i].id.bwGlb.off == off) &&
(locSym->id_arr[i].id.bwGlb.regi == regi))
break;
if (i == locSym->csym())
printf ("Error, indexed-glob var not found in local id table\n");
newExp->expr.ident.idNode.idxGlbIdx = i;
return (newExp);
}
/* Returns an identifier conditional expression node of type CONSTANT */
COND_EXPR *COND_EXPR::idKte(dword kte, byte size)
{
COND_EXPR *newExp;
newExp = newCondExp (IDENTIFIER);
newExp->expr.ident.idType = CONSTANT;
newExp->expr.ident.idNode.kte.kte = kte;
newExp->expr.ident.idNode.kte.size = size;
return (newExp);
}
/* Returns an identifier conditional expression node of type LONG_VAR,
* that points to the given index idx. */
COND_EXPR *COND_EXPR::idLongIdx (Int idx)
{
COND_EXPR *newExp;
newExp = newCondExp (IDENTIFIER);
newExp->expr.ident.idType = LONG_VAR;
newExp->expr.ident.idNode.longIdx = idx;
return (newExp);
}
/* Returns an identifier conditional expression node of type LONG_VAR */
COND_EXPR *COND_EXPR::idLong(LOCAL_ID *localId, opLoc sd, ICODE *pIcode, hlFirst f, Int ix, operDu du, Int off)
{
COND_EXPR *newExp;
Int idx;
newExp = newCondExp (IDENTIFIER);
/* Check for long constant and save it as a constant expression */
if ((sd == SRC) && ((pIcode->ic.ll.flg & I) == I)) /* constant */
{
newExp->expr.ident.idType = CONSTANT;
if (f == HIGH_FIRST)
newExp->expr.ident.idNode.kte.kte = (pIcode->ic.ll.immed.op << 16) +
(pIcode+off)->ic.ll.immed.op;
else /* LOW_FIRST */
newExp->expr.ident.idNode.kte.kte =
((pIcode+off)->ic.ll.immed.op << 16)+ pIcode->ic.ll.immed.op;
newExp->expr.ident.idNode.kte.size = 4;
}
/* Save it as a long expression (reg, stack or glob) */
else
{
idx = localId->newLong(sd, pIcode, f, ix, du, off);
newExp->expr.ident.idType = LONG_VAR;
newExp->expr.ident.idNode.longIdx = idx;
}
return (newExp);
}
/* Returns an identifier conditional expression node of type FUNCTION */
COND_EXPR *COND_EXPR::idFunc(Function * pproc, STKFRAME * args)
{
COND_EXPR *newExp;
newExp = newCondExp (IDENTIFIER);
newExp->expr.ident.idType = FUNCTION;
newExp->expr.ident.idNode.call.proc = pproc;
newExp->expr.ident.idNode.call.args = args;
return (newExp);
}
/* Returns an identifier conditional expression node of type OTHER.
* Temporary solution, should really be encoded as an indexed type (eg.
* arrays). */
COND_EXPR *COND_EXPR::idOther(byte seg, byte regi, int16 off)
{
COND_EXPR *newExp;
newExp = newCondExp (IDENTIFIER);
newExp->expr.ident.idType = OTHER;
newExp->expr.ident.idNode.other.seg = seg;
newExp->expr.ident.idNode.other.regi = regi;
newExp->expr.ident.idNode.other.off = off;
return (newExp);
}
/* Returns an identifier conditional expression node of type TYPE_LONG or
* TYPE_WORD_SIGN */
COND_EXPR *COND_EXPR::idID (const ID *retVal, LOCAL_ID *locsym, Int ix)
{
COND_EXPR *newExp;
Int idx;
newExp = newCondExp (IDENTIFIER);
if (retVal->type == TYPE_LONG_SIGN)
{
idx = locsym->newLongReg (TYPE_LONG_SIGN, retVal->id.longId.h,retVal->id.longId.l, ix);
newExp->expr.ident.idType = LONG_VAR;
newExp->expr.ident.idNode.longIdx = idx;
}
else if (retVal->type == TYPE_WORD_SIGN)
{
newExp->expr.ident.idType = REGISTER;
newExp->expr.ident.idNode.regiIdx = locsym->newByteWordReg(TYPE_WORD_SIGN, retVal->id.regi);
newExp->expr.ident.regiType = WORD_REG;
}
return (newExp);
}
/* Returns an identifier conditional expression node, according to the given
* type.
* Arguments: i : index into the icode array, used for newLongRegId only.
* duIcode: icode instruction that needs the du set.
* du: operand is defined or used in current instruction. */
COND_EXPR *COND_EXPR::id(ICODE *pIcode, opLoc sd, Function * pProc, Int i,ICODE *duIcode, operDu du)
{
COND_EXPR *newExp;
ICODEMEM * pm;
Int idx; /* idx into pIcode->localId table */
pm = (sd == SRC) ? &pIcode->ic.ll.src : &pIcode->ic.ll.dst;
if (((sd == DST) && (pIcode->ic.ll.flg & IM_DST) == IM_DST) ||
((sd == SRC) && (pIcode->ic.ll.flg & IM_SRC)) ||
(sd == LHS_OP)) /* for MUL lhs */
{ /* implicit dx:ax */
idx = pProc->localId.newLongReg (TYPE_LONG_SIGN, rDX, rAX, i);
newExp = COND_EXPR::idLongIdx (idx);
duIcode->setRegDU (rDX, du);
duIcode->setRegDU (rAX, du);
}
else if ((sd == DST) && (pIcode->ic.ll.flg & IM_TMP_DST) == IM_TMP_DST)
{ /* implicit tmp */
newExp = COND_EXPR::idReg (rTMP, 0, &pProc->localId);
duIcode->setRegDU(rTMP, (operDu)eUSE);
}
else if ((sd == SRC) && ((pIcode->ic.ll.flg & I) == I)) /* constant */
newExp = COND_EXPR::idKte (pIcode->ic.ll.immed.op, 2);
else if (pm->regi == 0) /* global variable */
newExp = COND_EXPR::idGlob(pm->segValue, pm->off);
else if (pm->regi < INDEXBASE) /* register */
{
newExp = COND_EXPR::idReg (pm->regi, (sd == SRC) ? pIcode->ic.ll.flg :
pIcode->ic.ll.flg & NO_SRC_B, &pProc->localId);
duIcode->setRegDU( pm->regi, du);
}
else if (pm->off) /* offset */
{
if ((pm->seg == rSS) && (pm->regi == INDEXBASE + 6)) /* idx on bp */
{
if (pm->off >= 0) /* argument */
newExp = COND_EXPR::idParam (pm->off, &pProc->args);
else /* local variable */
newExp = COND_EXPR::idLoc (pm->off, &pProc->localId);
}
else if ((pm->seg == rDS) && (pm->regi == INDEXBASE + 7)) /* bx */
{
if (pm->off > 0) /* global variable */
newExp = idCondExpIdxGlob (pm->segValue, pm->off, rBX,&pProc->localId);
else
newExp = COND_EXPR::idOther (pm->seg, pm->regi, pm->off);
duIcode->setRegDU( rBX, eUSE);
}
else /* idx <> bp, bx */
newExp = COND_EXPR::idOther (pm->seg, pm->regi, pm->off);
/**** check long ops, indexed global var *****/
}
else /* (pm->regi >= INDEXBASE && pm->off = 0) => indexed && no off */
{
if ((pm->seg == rDS) && (pm->regi > INDEXBASE + 3)) /* dereference */
{
switch (pm->regi) {
case INDEXBASE + 4: newExp = COND_EXPR::idReg(rSI, 0, &pProc->localId);
duIcode->setRegDU( rSI, du);
break;
case INDEXBASE + 5: newExp = COND_EXPR::idReg(rDI, 0, &pProc->localId);
duIcode->setRegDU( rDI, du);
break;
case INDEXBASE + 6: newExp = COND_EXPR::idReg(rBP, 0, &pProc->localId);
break;
case INDEXBASE + 7: newExp = COND_EXPR::idReg(rBX, 0, &pProc->localId);
duIcode->setRegDU( rBX, du);
break;
default:
newExp = 0;
assert(false);
}
newExp = COND_EXPR::unary (DEREFERENCE, newExp);
}
else
newExp = COND_EXPR::idOther (pm->seg, pm->regi, 0);
}
return (newExp);
}
/* Returns the identifier type */
condId ICODE::idType(opLoc sd)
{
ICODEMEM *pm;
pm = (sd == SRC) ? &ic.ll.src : &ic.ll.dst;
if ((sd == SRC) && ((ic.ll.flg & I) == I))
return (CONSTANT);
else if (pm->regi == 0)
return (GLOB_VAR);
else if (pm->regi < INDEXBASE)
return (REGISTER);
else if ((pm->seg == rSS) && (pm->regi == INDEXBASE))
{
if (pm->off >= 0)
return (PARAM);
else
return (LOCAL_VAR);
}
else
return (OTHER);
}
/* Size of hl types */
Int hlSize[] = {2, 1, 1, 2, 2, 4, 4, 4, 2, 2, 1, 4, 4};
/* Returns the type of the expression */
Int hlTypeSize (const COND_EXPR *expr, Function * pproc)
{
Int first, second;
if (expr == NULL)
return (2); /* for TYPE_UNKNOWN */
switch (expr->type) {
case BOOLEAN_OP:
first = hlTypeSize (expr->expr.boolExpr.lhs, pproc);
second = hlTypeSize (expr->expr.boolExpr.rhs, pproc);
if (first > second)
return (first);
else
return (second);
case NEGATION: case ADDRESSOF:
case POST_INC: case POST_DEC:
case PRE_INC: case PRE_DEC:
case DEREFERENCE: return (hlTypeSize (expr->expr.unaryExp, pproc));
case IDENTIFIER:
switch (expr->expr.ident.idType)
{
case GLOB_VAR:
return (symtab.sym[expr->expr.ident.idNode.globIdx].size);
case REGISTER:
if (expr->expr.ident.regiType == BYTE_REG)
return (1);
else
return (2);
case LOCAL_VAR:
return (hlSize[pproc->localId.id_arr[expr->expr.ident.idNode.localIdx].type]);
case PARAM:
return (hlSize[pproc->args.sym[expr->expr.ident.idNode.paramIdx].type]);
case GLOB_VAR_IDX:
return (hlSize[pproc->localId.id_arr[expr->expr.ident.idNode.idxGlbIdx].type]);
case CONSTANT:
return (expr->expr.ident.idNode.kte.size);
case STRING:
return (2);
case LONG_VAR:
return (4);
case FUNCTION:
return (hlSize[expr->expr.ident.idNode.call.proc->retVal.type]);
case OTHER:
return (2);
} /* eos */
break;
}
return 2; // CC: is this correct?
}
/* Returns the type of the expression */
hlType expType (const COND_EXPR *expr, Function * pproc)
{
hlType first, second;
if (expr == NULL)
return (TYPE_UNKNOWN);
switch (expr->type)
{
case BOOLEAN_OP:
first = expType (expr->expr.boolExpr.lhs, pproc);
second = expType (expr->expr.boolExpr.rhs, pproc);
if (first != second)
{
if (hlTypeSize (expr->expr.boolExpr.lhs, pproc) >
hlTypeSize (expr->expr.boolExpr.rhs, pproc))
return (first);
else
return (second);
}
else
return (first);
case POST_INC: case POST_DEC:
case PRE_INC: case PRE_DEC:
case NEGATION: return (expType (expr->expr.unaryExp, pproc));
case ADDRESSOF: return (TYPE_PTR); /***????****/
case DEREFERENCE: return (TYPE_PTR);
case IDENTIFIER:
switch (expr->expr.ident.idType)
{
case GLOB_VAR:
return (symtab.sym[expr->expr.ident.idNode.globIdx].type);
case REGISTER:
if (expr->expr.ident.regiType == BYTE_REG)
return (TYPE_BYTE_SIGN);
else
return (TYPE_WORD_SIGN);
case LOCAL_VAR:
return (pproc->localId.id_arr[expr->expr.ident.idNode.localIdx].type);
case PARAM:
return (pproc->args.sym[expr->expr.ident.idNode.paramIdx].type);
case GLOB_VAR_IDX:
return (pproc->localId.id_arr[expr->expr.ident.idNode.idxGlbIdx].type);
case CONSTANT:
return (TYPE_CONST);
case STRING:
return (TYPE_STR);
case LONG_VAR:
return (pproc->localId.id_arr[expr->expr.ident.idNode.longIdx].type);
case FUNCTION:
return (expr->expr.ident.idNode.call.proc->retVal.type);
case OTHER:
return (TYPE_UNKNOWN);
} /* eos */
case UNKNOWN_OP:
assert(false);
return (TYPE_UNKNOWN);
}
return TYPE_UNKNOWN; // CC: Correct?
}
/* Removes the register from the tree. If the register was part of a long
* register (eg. dx:ax), the node gets transformed into an integer register
* node. */
void removeRegFromLong (byte regi, LOCAL_ID *locId, COND_EXPR *tree)
{
IDENTTYPE* ident; /* ptr to an identifier */
byte otherRegi; /* high or low part of long register */
switch (tree->type) {
case BOOLEAN_OP:
break;
case POST_INC: case POST_DEC:
case PRE_INC: case PRE_DEC:
case NEGATION: case ADDRESSOF:
case DEREFERENCE:
break;
case IDENTIFIER:
ident = &tree->expr.ident;
if (ident->idType == LONG_VAR)
{
otherRegi = otherLongRegi (regi, ident->idNode.longIdx, locId);
ident->idType = REGISTER;
ident->regiType = WORD_REG;
ident->idNode.regiIdx = locId->newByteWordReg(TYPE_WORD_SIGN,otherRegi);
}
break;
}
}
/* Returns the string located in image, formatted in C format. */
static std::string getString (Int offset)
{
ostringstream o;
Int strLen, i;
strLen = strSize (&prog.Image[offset], '\0');
o << '"';
for (i = 0; i < strLen; i++)
o<<cChar(prog.Image[offset+i]);
o << "\"\0";
return (o.str());
}
/* Walks the conditional expression tree and returns the result on a string */
// TODO: use string stream here
string walkCondExpr (const COND_EXPR* expr, Function * pProc, Int* numLoc)
{
int16 off; /* temporal - for OTHER */
ID* id; /* Pointer to local identifier table */
char* o; /* Operand string pointer */
boolT needBracket; /* Determine whether parenthesis is needed */
BWGLB_TYPE* bwGlb; /* Ptr to BWGLB_TYPE (global indexed var) */
STKSYM * psym; /* Pointer to argument in the stack */
std::ostringstream outStr;
if (expr == NULL)
return "";
needBracket = TRUE;
switch (expr->type)
{
case BOOLEAN_OP:
outStr << "(";
outStr << walkCondExpr(expr->expr.boolExpr.lhs, pProc, numLoc);
outStr << condOpSym[expr->expr.boolExpr.op];
outStr << walkCondExpr(expr->expr.boolExpr.rhs, pProc, numLoc);
outStr << ")";
break;
case NEGATION:
if (expr->expr.unaryExp->type == IDENTIFIER)
{
needBracket = FALSE;
outStr << "!";
}
else
outStr << "! (";
outStr << walkCondExpr (expr->expr.unaryExp, pProc, numLoc);
if (needBracket == TRUE)
outStr << ")";
break;
case ADDRESSOF:
if (expr->expr.unaryExp->type == IDENTIFIER)
{
needBracket = FALSE;
outStr << "&";
}
else
outStr << "&(";
outStr << walkCondExpr (expr->expr.unaryExp, pProc, numLoc);
if (needBracket == TRUE)
outStr << ")";
break;
case DEREFERENCE:
outStr << "*";
if (expr->expr.unaryExp->type == IDENTIFIER)
needBracket = FALSE;
else
outStr << "(";
outStr << walkCondExpr (expr->expr.unaryExp, pProc, numLoc);
if (needBracket == TRUE)
outStr << ")";
break;
case POST_INC:
outStr << walkCondExpr (expr->expr.unaryExp, pProc, numLoc) << "++";
break;
case POST_DEC:
outStr << walkCondExpr (expr->expr.unaryExp, pProc, numLoc) << "--";
break;
case PRE_INC:
outStr << "++"<< walkCondExpr (expr->expr.unaryExp, pProc, numLoc);
break;
case PRE_DEC:
outStr << "--"<< walkCondExpr (expr->expr.unaryExp, pProc, numLoc);
break;
case IDENTIFIER:
std::ostringstream o;
switch (expr->expr.ident.idType)
{
case GLOB_VAR:
o << symtab.sym[expr->expr.ident.idNode.globIdx].name;
break;
case REGISTER:
id = &pProc->localId.id_arr[expr->expr.ident.idNode.regiIdx];
if (id->name[0] == '\0') /* no name */
{
sprintf (id->name, "loc%ld", ++(*numLoc));
if (id->id.regi < rAL)
cCode.appendDecl("%s %s; /* %s */\n",hlTypes[id->type], id->name,wordReg[id->id.regi - rAX]);
else
cCode.appendDecl("%s %s; /* %s */\n",hlTypes[id->type], id->name,byteReg[id->id.regi - rAL]);
}
if (id->hasMacro)
o << id->macro << "("<<id->name<<")";
else
o << id->name;
break;
case LOCAL_VAR:
o << pProc->localId.id_arr[expr->expr.ident.idNode.localIdx].name;
break;
case PARAM:
psym = &pProc->args.sym[expr->expr.ident.idNode.paramIdx];
if (psym->hasMacro)
o << psym->macro<<"("<<psym->name<< ")";
else
o << psym->name;
break;
case GLOB_VAR_IDX:
bwGlb = &pProc->localId.id_arr[expr->expr.ident.idNode.idxGlbIdx].id.bwGlb;
o << (bwGlb->seg << 4) + bwGlb->off << "["<<wordReg[bwGlb->regi - rAX]<<"]";
break;
case CONSTANT:
if (expr->expr.ident.idNode.kte.kte < 1000)
o << expr->expr.ident.idNode.kte.kte;
else
o << "0x"<<std::hex << expr->expr.ident.idNode.kte.kte;
break;
case STRING:
o << getString (expr->expr.ident.idNode.strIdx);
break;
case LONG_VAR:
id = &pProc->localId.id_arr[expr->expr.ident.idNode.longIdx];
if (id->name[0] != '\0') /* STK_FRAME & REG w/name*/
o << id->name;
else if (id->loc == REG_FRAME)
{
sprintf (id->name, "loc%ld", ++(*numLoc));
cCode.appendDecl("%s %s; /* %s:%s */\n",hlTypes[id->type], id->name,wordReg[id->id.longId.h - rAX],wordReg[id->id.longId.l - rAX]);
o << id->name;
pProc->localId.propLongId (id->id.longId.l,id->id.longId.h, id->name);
}
else /* GLB_FRAME */
{
if (id->id.longGlb.regi == 0) /* not indexed */
o << "[" << (id->id.longGlb.seg<<4) + id->id.longGlb.offH <<"]";
else if (id->id.longGlb.regi == rBX)
o << "[" << (id->id.longGlb.seg<<4) + id->id.longGlb.offH <<"][bx]";
}
break;
case FUNCTION:
o << writeCall (expr->expr.ident.idNode.call.proc,expr->expr.ident.idNode.call.args, pProc, numLoc);
break;
case OTHER:
off = expr->expr.ident.idNode.other.off;
o << wordReg[expr->expr.ident.idNode.other.seg - rAX]<< "[";
o << idxReg[expr->expr.ident.idNode.other.regi - INDEXBASE];
if (off < 0)
o << "-"<< hexStr (-off);
else if (off>0)
o << "+"<< hexStr (off);
o << "]";
} /* eos */
outStr << o.str();
break;
}
return outStr.str();
}
/* Makes a copy of the given expression. Allocates newExp storage for each
* node. Returns the copy. */
COND_EXPR *COND_EXPR::clone()
{
COND_EXPR* newExp=0; /* Expression node copy */
switch (type)
{
case BOOLEAN_OP:
newExp = new COND_EXPR(*this);
newExp->expr.boolExpr.lhs = expr.boolExpr.lhs->clone();
newExp->expr.boolExpr.rhs = expr.boolExpr.rhs->clone();
break;
case NEGATION:
case ADDRESSOF:
case DEREFERENCE:
newExp = new COND_EXPR(*this);
newExp->expr.unaryExp = expr.unaryExp->clone();
break;
case IDENTIFIER:
newExp = new COND_EXPR(*this);
}
return (newExp);
}
/* Changes the boolean conditional operator at the root of this expression */
void COND_EXPR::changeBoolOp (condOp newOp)
{
expr.boolExpr.op = newOp;
}
/* Inserts the expression exp into the tree at the location specified by the
* register regi */
boolT insertSubTreeReg (COND_EXPR *expr, COND_EXPR **tree, byte regi,LOCAL_ID *locsym)
{
byte treeReg;
if (*tree == NULL)
return FALSE;
switch ((*tree)->type) {
case IDENTIFIER:
if ((*tree)->expr.ident.idType == REGISTER)
{
treeReg = locsym->id_arr[(*tree)->expr.ident.idNode.regiIdx].id.regi;
if (treeReg == regi) /* word reg */
{
*tree = expr;
return TRUE;
}
else if ((regi >= rAX) && (regi <= rBX)) /* word/byte reg */
{
if ((treeReg == (regi + rAL-1)) || (treeReg == (regi + rAH-1)))
{
*tree = expr;
return TRUE;
}
}
}
return FALSE;
case BOOLEAN_OP:
if (insertSubTreeReg (expr, &(*tree)->expr.boolExpr.lhs, regi, locsym))
return TRUE;
if (insertSubTreeReg (expr, &(*tree)->expr.boolExpr.rhs, regi, locsym))
return TRUE;
return FALSE;
case NEGATION:
case ADDRESSOF:
case DEREFERENCE:
if (insertSubTreeReg(expr, &(*tree)->expr.unaryExp,regi, locsym))
return TRUE;
return FALSE;
}
return FALSE;
}
/* Inserts the expression exp into the tree at the location specified by the
* long register index longIdx*/
boolT insertSubTreeLongReg (COND_EXPR *exp, COND_EXPR **tree, Int longIdx)
{
switch ((*tree)->type) {
case IDENTIFIER: if ((*tree)->expr.ident.idNode.longIdx == longIdx)
{
*tree = exp;
return TRUE;
}
return FALSE;
case BOOLEAN_OP: if (insertSubTreeLongReg (exp, &(*tree)->expr.boolExpr.lhs, longIdx))
return TRUE;
if (insertSubTreeLongReg (exp, &(*tree)->expr.boolExpr.rhs, longIdx))
return TRUE;
return FALSE;
case NEGATION:
case ADDRESSOF:
case DEREFERENCE: if (insertSubTreeLongReg (exp, &(*tree)->expr.unaryExp, longIdx))
return TRUE;
return FALSE;
}
return FALSE;
}
/* Recursively deallocates the abstract syntax tree rooted at *exp */
void COND_EXPR::release()
{
switch (type)
{
case BOOLEAN_OP:
expr.boolExpr.lhs->release();
expr.boolExpr.rhs->release();
break;
case NEGATION:
case ADDRESSOF:
case DEREFERENCE:
expr.unaryExp->release();
break;
}
delete (this);
}
/***************************************************************************
* Expression stack functions
**************************************************************************/
/* Reinitalizes the expression stack (expStk) to NULL, by freeing all the
* space allocated (if any). */
void initExpStk()
{
expStk.clear();
}
/* Pushes the given expression onto the local stack (expStk). */
void pushExpStk (COND_EXPR *expr)
{
expStk.push_back(expr);
}
/* Returns the element on the top of the local expression stack (expStk),
* and deallocates the space allocated by this node.
* If there are no elements on the stack, returns NULL. */
COND_EXPR *popExpStk()
{
if(expStk.empty())
return 0;
COND_EXPR *topExp = expStk.back();
expStk.pop_back();
return topExp;
}
/* Returns the number of elements available in the expression stack */
Int numElemExpStk()
{
return expStk.size();
}
/* Returns whether the expression stack is empty or not */
boolT emptyExpStk()
{
return expStk.empty();
}

668
src/backend.cpp Normal file
View File

@@ -0,0 +1,668 @@
/*****************************************************************************
* Project: dcc
* File: backend.c
* Purpose: Back-end module. Generates C code for each procedure.
* (C) Cristina Cifuentes
****************************************************************************/
#include <cassert>
#include <string>
#include "dcc.h"
#include <fstream>
#include <string.h>
#include <stdio.h>
bundle cCode; /* Procedure declaration and code */
using namespace std;
/* Indentation buffer */
#define indSize 81 /* size of the indentation buffer. Each indentation
* is of 4 spaces => max. 20 indentation levels */
static char indentBuf[indSize] =
" ";
/* Indentation according to the depth of the statement */
static char *indent (Int indLevel)
{
return (&indentBuf[indSize-(indLevel*4)-1]);
}
static Int getNextLabel()
/* Returns a unique index to the next label */
{ static Int labelIdx = 1; /* index of the next label */
return (labelIdx++);
}
/* displays statistics on the subroutine */
void Function::displayStats ()
{
printf("\nStatistics - Subroutine %s\n", name);
printf ("Number of Icode instructions:\n");
printf (" Low-level : %4d\n", stats.numLLIcode);
if (! (flg & PROC_ASM))
{
printf (" High-level: %4d\n", stats.numHLIcode);
printf (" Percentage reduction: %2.2f%%\n", 100.0 - (stats.numHLIcode *
100.0) / stats.numLLIcode);
}
}
/**** this proc is not required any more?? ****/
#if 0
static void fixupLabels (PPROC pProc)
/* Checks the graph (pProc->cfg) for any nodes that have labels, and gives
* a unique label number for it. This label is placed in the associated
* icode for the node (pProc->Icode). The procedure is done in sequential
* order of dsfLast numbering. */
{ Int i; /* index into the dfsLast array */
PBB *dfsLast; /* pointer to the dfsLast array */
dfsLast = pProc->dfsLast;
for (i = 0; i < pProc->numBBs; i++)
if (dfsLast[i]->flg/* & BB_HAS_LABEL*/) {
pProc->Icode.icode[dfsLast[i]->start].ic.ll.flg |= HLL_LABEL;
pProc->Icode.icode[dfsLast[i]->start].ic.ll.hllLabNum = getNextLabel();
}
}
#endif
/* Returns the corresponding C string for the given character c. Character
* constants such as carriage return and line feed, require 2 C characters. */
char *cChar (byte c)
{
static char res[3];
switch (c) {
case 0x8: /* backspace */
sprintf (res, "\\b");
break;
case 0x9: /* horizontal tab */
sprintf (res, "\\t");
break;
case 0x0A: /* new line */
sprintf (res, "\\n");
break;
case 0x0C: /* form feed */
sprintf (res, "\\f");
break;
case 0x0D: /* carriage return */
sprintf (res, "\\r");
break;
default: /* any other character*/
sprintf (res, "%c", c);
}
return (res);
}
/* Prints the variable's name and initial contents on the file.
* Note: to get to the value of the variable:
* com file: prog.Image[operand]
* exe file: prog.Image[operand+0x100] */
static void printGlobVar (SYM * psym)
{
Int j;
dword relocOp = prog.fCOM ? psym->label : psym->label + 0x100;
char *strContents; /* initial contents of variable */
switch (psym->size) {
case 1: cCode.appendDecl( "byte\t%s = %ld;\n",
psym->name, prog.Image[relocOp]);
break;
case 2: cCode.appendDecl( "word\t%s = %ld;\n",
psym->name, LH(prog.Image+relocOp));
break;
case 4: if (psym->type == TYPE_PTR) /* pointer */
cCode.appendDecl( "word\t*%s = %ld;\n",
psym->name, LH(prog.Image+relocOp));
else /* char */
cCode.appendDecl(
"char\t%s[4] = \"%c%c%c%c\";\n",
psym->name, prog.Image[relocOp],
prog.Image[relocOp+1], prog.Image[relocOp+2],
prog.Image[relocOp+3]);
break;
default:strContents = (char *)allocMem((psym->size*2+1) *sizeof(char));
strContents[0] = '\0';
for (j=0; j < psym->size; j++)
strcat (strContents, cChar(prog.Image[relocOp + j]));
cCode.appendDecl( "char\t*%s = \"%s\";\n",
psym->name, strContents);
}
}
// Note: Not called at present.
/* Writes the contents of the symbol table, along with any variable
* initialization. */
static void writeGlobSymTable()
{
Int idx;
char type[10];
SYM * pSym;
if (symtab.csym)
{
cCode.appendDecl( "/* Global variables */\n");
for (idx = 0; idx < symtab.csym; idx++)
{
pSym = &symtab.sym[idx];
if (symtab.sym[idx].duVal.isUSE_VAL()) /* first used */
printGlobVar (&(symtab.sym[idx]));
else { /* first defined */
switch (pSym->size) {
case 1: strcpy (type, "byte\t"); break;
case 2: strcpy (type, "int\t"); break;
case 4: if (pSym->type == TYPE_PTR)
strcpy (type, "int\t*");
else
strcpy (type, "char\t*");
break;
default: strcpy (type, "char\t*");
}
cCode.appendDecl( "%s%s;\t/* size = %ld */\n",
type, pSym->name, pSym->size);
}
}
cCode.appendDecl( "\n");
}
}
/* Writes the header information and global variables to the output C file
* fp. */
static void writeHeader (std::ostream &ios, char *fileName)
{
/* Write header information */
newBundle (&cCode);
cCode.appendDecl( "/*\n");
cCode.appendDecl( " * Input file\t: %s\n", fileName);
cCode.appendDecl( " * File type\t: %s\n", (prog.fCOM)?"COM":"EXE");
cCode.appendDecl( " */\n\n#include \"dcc.h\"\n\n");
/* Write global symbol table */
/** writeGlobSymTable(); *** need to change them into locident fmt ***/
writeBundle (ios, cCode);
freeBundle (&cCode);
}
/* Writes the registers that are set in the bitvector */
static void writeBitVector (dword regi)
{ Int j;
for (j = 0; j < INDEXBASE; j++)
{
if ((regi & power2(j)) != 0)
printf ("%s ", allRegs[j]);
}
}
/* Checks the given icode to determine whether it has a label associated
* to it. If so, a goto is emitted to this label; otherwise, a new label
* is created and a goto is also emitted.
* Note: this procedure is to be used when the label is to be backpatched
* onto code in cCode.code */
static void emitGotoLabel (ICODE * pt, Int indLevel)
{
if (! (pt->ic.ll.flg & HLL_LABEL)) /* node hasn't got a lab */
{
/* Generate new label */
pt->ic.ll.hllLabNum = getNextLabel();
pt->ic.ll.flg |= HLL_LABEL;
/* Node has been traversed already, so backpatch this label into
* the code */
addLabelBundle (cCode.code, pt->codeIdx, pt->ic.ll.hllLabNum);
}
cCode.appendCode( "%sgoto L%ld;\n", indent(indLevel),
pt->ic.ll.hllLabNum);
stats.numHLIcode++;
}
// Note: Not currently called!
static void emitFwdGotoLabel (ICODE * pt, Int indLevel)
/* Checks the given icode to determine whether it has a label associated
* to it. If so, a goto is emitted to this label; otherwise, a new label
* is created and a goto is also emitted.
* Note: this procedure is to be used when the label is to be forward on
* the code; that is, the target code has not been traversed yet. */
{
if (! (pt->ic.ll.flg & HLL_LABEL)) /* node hasn't got a lab */
{
/* Generate new label */
pt->ic.ll.hllLabNum = getNextLabel();
pt->ic.ll.flg |= HLL_LABEL;
}
cCode.appendCode( "%sgoto l%ld;\n", indent(indLevel),
pt->ic.ll.hllLabNum);
}
/* Writes the code for the current basic block.
* Args: pBB: pointer to the current basic block.
* Icode: pointer to the array of icodes for current procedure.
* lev: indentation level - used for formatting. */
static void writeBB (const BB * const pBB, ICODE * hli, Int lev, Function * pProc, Int *numLoc)
{ Int i, last;
char *line; /* Pointer to the HIGH-LEVEL line */
/* Save the index into the code table in case there is a later goto
* into this instruction (first instruction of the BB) */
hli[pBB->start].codeIdx = nextBundleIdx (&cCode.code);
/* Generate code for each hlicode that is not a HLI_JCOND */
for (i = pBB->start, last = i + pBB->length; i < last; i++)
if ((hli[i].type == HIGH_LEVEL) && (hli[i].invalid == FALSE))
{
line = write1HlIcode (hli[i].ic.hl, pProc, numLoc);
if (line[0] != '\0')
{
cCode.appendCode( "%s%s", indent(lev), line);
stats.numHLIcode++;
}
if (option.verbose)
hli[i].writeDU(i);
}
//if (hli[i].invalid)
//printf("Invalid icode: %d!\n", hli[i].invalid);
}
/* Recursive procedure that writes the code for the given procedure, pointed
* to by pBB.
* Parameters: pBB: pointer to the cfg.
* Icode: pointer to the Icode array for the cfg graph of the
* current procedure.
* indLevel: indentation level - used for formatting.
* numLoc: last # assigned to local variables */
void BB::writeCode (Int indLevel, Function * pProc , Int *numLoc,Int latchNode, Int _ifFollow)
{
Int follow, /* ifFollow */
_loopType, /* Type of loop, if any */
_nodeType; /* Type of node */
BB * succ, *latch; /* Successor and latching node */
ICODE * picode; /* Pointer to HLI_JCOND instruction */
char *l; /* Pointer to HLI_JCOND expression */
boolT emptyThen, /* THEN clause is empty */
repCond; /* Repeat condition for while() */
/* Check if this basic block should be analysed */
if ((_ifFollow != UN_INIT) && (this == pProc->dfsLast[_ifFollow]))
return;
if (traversed == DFS_ALPHA)
return;
traversed = DFS_ALPHA;
/* Check for start of loop */
repCond = FALSE;
latch = NULL;
_loopType = loopType;
if (_loopType)
{
latch = pProc->dfsLast[this->latchNode];
switch (_loopType)
{
case WHILE_TYPE:
picode = pProc->Icode.GetIcode(start + length - 1);
/* Check for error in while condition */
if (picode->ic.hl.opcode != HLI_JCOND)
reportError (WHILE_FAIL);
/* Check if condition is more than 1 HL instruction */
if (numHlIcodes > 1)
{
/* Write the code for this basic block */
writeBB(this, pProc->Icode.GetFirstIcode(), indLevel, pProc, numLoc);
repCond = TRUE;
}
/* Condition needs to be inverted if the loop body is along
* the THEN path of the header node */
if (edges[ELSE].BBptr->dfsLastNum == loopFollow)
inverseCondOp (&picode->ic.hl.oper.exp);
{
std::string e=walkCondExpr (picode->ic.hl.oper.exp, pProc, numLoc);
cCode.appendCode( "\n%swhile (%s) {\n", indent(indLevel),e.c_str());
}
picode->invalidate();
break;
case REPEAT_TYPE:
cCode.appendCode( "\n%sdo {\n", indent(indLevel));
picode = pProc->Icode.GetIcode(latch->start+latch->length-1);
picode->invalidate();
break;
case ENDLESS_TYPE:
cCode.appendCode( "\n%sfor (;;) {\n", indent(indLevel));
}
stats.numHLIcode += 1;
indLevel++;
}
/* Write the code for this basic block */
if (repCond == FALSE)
writeBB (this, pProc->Icode.GetFirstIcode(), indLevel, pProc, numLoc);
/* Check for end of path */
_nodeType = nodeType;
if (_nodeType == RETURN_NODE || _nodeType == TERMINATE_NODE ||
_nodeType == NOWHERE_NODE || (dfsLastNum == latchNode))
return;
/* Check type of loop/node and process code */
if (_loopType) /* there is a loop */
{
assert(latch);
if (this != latch) /* loop is over several bbs */
{
if (_loopType == WHILE_TYPE)
{
succ = edges[THEN].BBptr;
if (succ->dfsLastNum == loopFollow)
succ = edges[ELSE].BBptr;
}
else
succ = edges[0].BBptr;
if (succ->traversed != DFS_ALPHA)
succ->writeCode (indLevel, pProc, numLoc, latch->dfsLastNum,_ifFollow);
else /* has been traversed so we need a goto */
emitGotoLabel (pProc->Icode.GetIcode(succ->start), indLevel);
}
/* Loop epilogue: generate the loop trailer */
indLevel--;
if (_loopType == WHILE_TYPE)
{
/* Check if there is need to repeat other statements involved
* in while condition, then, emit the loop trailer */
if (repCond)
writeBB (this, pProc->Icode.GetFirstIcode(), indLevel+1, pProc, numLoc);
cCode.appendCode( "%s} /* end of while */\n",indent(indLevel));
}
else if (_loopType == ENDLESS_TYPE)
cCode.appendCode( "%s} /* end of loop */\n",indent(indLevel));
else if (_loopType == REPEAT_TYPE)
{
if (picode->ic.hl.opcode != HLI_JCOND)
reportError (REPEAT_FAIL);
{
string e=walkCondExpr (picode->ic.hl.oper.exp, pProc, numLoc);
cCode.appendCode( "%s} while (%s);\n", indent(indLevel),e.c_str());
}
}
/* Recurse on the loop follow */
if (loopFollow != MAX)
{
succ = pProc->dfsLast[loopFollow];
if (succ->traversed != DFS_ALPHA)
succ->writeCode (indLevel, pProc, numLoc, latchNode, _ifFollow);
else /* has been traversed so we need a goto */
emitGotoLabel (pProc->Icode.GetIcode(succ->start), indLevel);
}
}
else /* no loop, process nodeType of the graph */
{
if (_nodeType == TWO_BRANCH) /* if-then[-else] */
{
stats.numHLIcode++;
indLevel++;
emptyThen = FALSE;
if (ifFollow != MAX) /* there is a follow */
{
/* process the THEN part */
follow = ifFollow;
succ = edges[THEN].BBptr;
if (succ->traversed != DFS_ALPHA) /* not visited */
{
if (succ->dfsLastNum != follow) /* THEN part */
{
l = writeJcond ( pProc->Icode.GetIcode(start + length -1)->ic.hl,
pProc, numLoc);
cCode.appendCode( "\n%s%s", indent(indLevel-1), l);
succ->writeCode (indLevel, pProc, numLoc, latchNode,follow);
}
else /* empty THEN part => negate ELSE part */
{
l = writeJcondInv ( pProc->Icode.GetIcode(start + length -1)->ic.hl,
pProc, numLoc);
cCode.appendCode( "\n%s%s", indent(indLevel-1), l);
edges[ELSE].BBptr->writeCode (indLevel, pProc, numLoc, latchNode, follow);
emptyThen = TRUE;
}
}
else /* already visited => emit label */
emitGotoLabel (pProc->Icode.GetIcode(succ->start), indLevel);
/* process the ELSE part */
succ = edges[ELSE].BBptr;
if (succ->traversed != DFS_ALPHA) /* not visited */
{
if (succ->dfsLastNum != follow) /* ELSE part */
{
cCode.appendCode( "%s}\n%selse {\n",
indent(indLevel-1), indent(indLevel - 1));
succ->writeCode (indLevel, pProc, numLoc, latchNode, follow);
}
/* else (empty ELSE part) */
}
else if (! emptyThen) /* already visited => emit label */
{
cCode.appendCode( "%s}\n%selse {\n",
indent(indLevel-1), indent(indLevel - 1));
emitGotoLabel (pProc->Icode.GetIcode(succ->start), indLevel);
}
cCode.appendCode( "%s}\n", indent(--indLevel));
/* Continue with the follow */
succ = pProc->dfsLast[follow];
if (succ->traversed != DFS_ALPHA)
succ->writeCode (indLevel, pProc, numLoc, latchNode,_ifFollow);
}
else /* no follow => if..then..else */
{
l = writeJcond (
pProc->Icode.GetIcode(start + length -1)->ic.hl, pProc, numLoc);
cCode.appendCode( "%s%s", indent(indLevel-1), l);
edges[THEN].BBptr->writeCode (indLevel, pProc, numLoc, latchNode, _ifFollow);
cCode.appendCode( "%s}\n%selse {\n", indent(indLevel-1), indent(indLevel - 1));
edges[ELSE].BBptr->writeCode (indLevel, pProc, numLoc, latchNode, _ifFollow);
cCode.appendCode( "%s}\n", indent(--indLevel));
}
}
else /* fall, call, 1w */
{
succ = edges[0].BBptr; /* fall-through edge */
if (succ->traversed != DFS_ALPHA)
succ->writeCode (indLevel, pProc,numLoc, latchNode,_ifFollow);
}
}
}
/* Writes the procedure's declaration (including arguments), local variables,
* and invokes the procedure that writes the code of the given record *hli */
void Function::codeGen (std::ostream &fs)
{
Int i, numLoc;
//STKFRAME * args; /* Procedure arguments */
char buf[200], /* Procedure's definition */
arg[30]; /* One argument */
ID *locid; /* Pointer to one local identifier */
BB *pBB; /* Pointer to basic block */
/* Write procedure/function header */
newBundle (&cCode);
if (flg & PROC_IS_FUNC) /* Function */
cCode.appendDecl( "\n%s %s (", hlTypes[retVal.type],name);
else /* Procedure */
cCode.appendDecl( "\nvoid %s (", name);
/* Write arguments */
memset (buf, 0, sizeof(buf));
for (i = 0; i < args.sym.size(); i++)
{
if (args.sym[i].invalid == FALSE)
{
sprintf (arg,"%s %s",hlTypes[args.sym[i].type], args.sym[i].name);
strcat (buf, arg);
if (i < (args.numArgs - 1))
strcat (buf, ", ");
}
}
strcat (buf, ")\n");
cCode.appendDecl( "%s", buf);
/* Write comments */
writeProcComments();
/* Write local variables */
if (! (flg & PROC_ASM))
{
numLoc = 0;
for (i = 0; i < localId.csym(); i++)
{
locid = &localId.id_arr[i];
/* Output only non-invalidated entries */
if (locid->illegal == FALSE)
{
if (locid->loc == REG_FRAME)
{
/* Register variables are assigned to a local variable */
if (((flg & SI_REGVAR) && (locid->id.regi == rSI)) ||
((flg & DI_REGVAR) && (locid->id.regi == rDI)))
{
sprintf (locid->name, "loc%ld", ++numLoc);
cCode.appendDecl( "int %s;\n", locid->name);
}
/* Other registers are named when they are first used in
* the output C code, and appended to the proc decl. */
}
else if (locid->loc == STK_FRAME)
{
/* Name local variables and output appropriate type */
sprintf (locid->name, "loc%ld", ++numLoc);
cCode.appendDecl( "%s %s;\n",hlTypes[locid->type], locid->name);
}
}
}
}
/* Write procedure's code */
if (flg & PROC_ASM) /* generate assembler */
disassem (3, this);
else /* generate C */
cfg.front()->writeCode (1, this, &numLoc, MAX, UN_INIT);
cCode.appendCode( "}\n\n");
writeBundle (fs, cCode);
freeBundle (&cCode);
/* Write Live register analysis information */
if (option.verbose)
for (i = 0; i < numBBs; i++)
{
pBB = dfsLast[i];
if (pBB->flg & INVALID_BB) continue; /* skip invalid BBs */
printf ("BB %d\n", i);
printf (" Start = %d, end = %d\n", pBB->start, pBB->start +
pBB->length - 1);
printf (" LiveUse = ");
writeBitVector (pBB->liveUse);
printf ("\n Def = ");
writeBitVector (pBB->def);
printf ("\n LiveOut = ");
writeBitVector (pBB->liveOut);
printf ("\n LiveIn = ");
writeBitVector (pBB->liveIn);
printf ("\n\n");
}
}
/* Recursive procedure. Displays the procedure's code in depth-first order
* of the call graph. */
static void backBackEnd (char *filename, CALL_GRAPH * pcallGraph, std::ostream &ios)
{
Int i;
// IFace.Yield(); /* This is a good place to yield to other apps */
/* Check if this procedure has been processed already */
if ((pcallGraph->proc->flg & PROC_OUTPUT) ||
(pcallGraph->proc->flg & PROC_ISLIB))
return;
pcallGraph->proc->flg |= PROC_OUTPUT;
/* Dfs if this procedure has any successors */
for (i = 0; i < pcallGraph->outEdges.size(); i++)
{
backBackEnd (filename, pcallGraph->outEdges[i], ios);
}
/* Generate code for this procedure */
stats.numLLIcode = pcallGraph->proc->Icode.GetNumIcodes();
stats.numHLIcode = 0;
pcallGraph->proc->codeGen (ios);
/* Generate statistics */
if (option.Stats)
pcallGraph->proc->displayStats ();
if (! (pcallGraph->proc->flg & PROC_ASM))
{
stats.totalLL += stats.numLLIcode;
stats.totalHL += stats.numHLIcode;
}
}
/* Invokes the necessary routines to produce code one procedure at a time. */
void BackEnd (char *fileName, CALL_GRAPH * pcallGraph)
{
char* outName, *ext;
std::ofstream fs; /* Output C file */
/* Get output file name */
outName = strcpy ((char*)allocMem(strlen(fileName)+1), fileName);
if ((ext = strrchr (outName, '.')) != NULL)
*ext = '\0';
strcat (outName, ".b"); /* b for beta */
/* Open output file */
fs.open(outName);
if(!fs.is_open())
fatalError (CANNOT_OPEN, outName);
printf ("dcc: Writing C beta file %s\n", outName);
/* Header information */
writeHeader (fs, fileName);
/* Initialize total Icode instructions statistics */
stats.totalLL = 0;
stats.totalHL = 0;
/* Process each procedure at a time */
backBackEnd (fileName, pcallGraph, fs);
/* Close output file */
fs.close();
printf ("dcc: Finished writing C beta file\n");
}

115
src/bundle.cpp Normal file
View File

@@ -0,0 +1,115 @@
/*****************************************************************************
* File: bundle.c
* Module that handles the bundle type (array of pointers to strings).
* (C) Cristina Cifuentes
****************************************************************************/
#include "dcc.h"
#include <stdarg.h>
#include <iostream>
#include <memory.h>
#include <stdlib.h>
#include <string.h>
#define deltaProcLines 20
/* Allocates memory for a new bundle and initializes it to zero. */
void newBundle (bundle *)
{
}
/* Increments the size of the table strTab by deltaProcLines and copies all
* the strings to the new table. */
static void incTableSize (strTable *strTab)
{
strTab->resize(strTab->size()+deltaProcLines);
}
/* Appends the new line (in printf style) to the string table strTab. */
void appendStrTab (strTable *strTab, const char *format, ...)
{
va_list args;
char buf[lineSize];
va_start (args, format);
vsprintf (buf, format, args);
strTab->push_back(buf);
va_end (args);
}
/* Returns the next available index into the table */
Int nextBundleIdx (strTable *strTab)
{
return (strTab->size());
}
/* Adds the given label to the start of the line strTab[idx]. The first
* tab is removed and replaced by this label */
void addLabelBundle (strTable &strTab, Int idx, Int label)
{
char s[lineSize];
sprintf (s, "l%ld: %s", label, strTab[idx].c_str()+4);
strTab[idx] = s;
}
/* Writes the contents of the string table on the file fp. */
static void writeStrTab (std::ostream &ios, strTable &strTab)
{
Int i;
for (i = 0; i < strTab.size(); i++)
ios << strTab[i];
}
/* Writes the contents of the bundle (procedure code and declaration) to
* a file. */
void writeBundle (std::ostream &ios, bundle procCode)
{
writeStrTab (ios, procCode.decl);
if (procCode.decl[procCode.decl.size() - 1][0] != ' ')
ios << "\n";
writeStrTab (ios, procCode.code);
}
/* Frees the storage allocated by the string table. */
static void freeStrTab (strTable &strTab)
{
strTab.clear();
}
void freeBundle (bundle *procCode)
/* Deallocates the space taken by the bundle procCode */
{
freeStrTab (procCode->decl);
freeStrTab (procCode->code);
}
void bundle::appendCode(const char *format,...)
{
va_list args;
char buf[lineSize]={0};
va_start (args, format);
vsprintf (buf, format, args);
code.push_back(buf);
va_end (args);
}
void bundle::appendDecl(const char *format,...)
{
va_list args;
char buf[lineSize]={0};
va_start (args, format);
vsprintf (buf, format, args);
decl.push_back(buf);
va_end (args);
}

1018
src/chklib.cpp Normal file

File diff suppressed because it is too large Load Diff

265
src/comwrite.cpp Normal file
View File

@@ -0,0 +1,265 @@
/*****************************************************************************
* File: comwrite.c
* Purpose: writes comments about C programs and descriptions about dos
* interrupts in the string line given.
* Project: dcc
* (C) Cristina Cifuentes
****************************************************************************/
#include "dcc.h"
#include <string.h>
#define intSize 40
static const char *int21h[] =
{"Terminate process",
"Character input with echo",
"Character output",
"Auxiliary input",
"Auxiliary output",
"Printer output",
"Direct console i/o",
"Unfiltered char i w/o echo",
"Character input without echo",
"Display string",
"Buffered keyboard input",
"Check input status",
"Flush input buffer and then input",
"Disk reset",
"Select disk",
"Open file",
"Close file",
"Find first file",
"Find next file",
"Delete file",
"Sequential read",
"Sequential write",
"Create file",
"Rename file",
"Reserved",
"Get current disk",
"Set DTA address",
"Get default drive data",
"Get drive data",
"Reserved",
"Reserved",
"Reserved",
"Reserved",
"Random read",
"Random write",
"Get file size",
"Set relative record number",
"Set interrupt vector",
"Create new PSP",
"Random block read",
"Random block write",
"Parse filename",
"Get date",
"Set date",
"Get time",
"Set time",
"Set verify flag",
"Get DTA address",
"Get MSDOS version number",
"Terminate and stay resident",
"Reserved",
"Get or set break flag",
"Reserved",
"Get interrupt vector",
"Get drive allocation info",
"Reserved",
"Get or set country info",
"Create directory",
"Delete directory",
"Set current directory",
"Create file",
"Open file",
"Close file",
"Read file or device",
"Write file or device",
"Delete file",
"Set file pointer",
"Get or set file attributes",
"IOCTL (i/o control)",
"Duplicate handle",
"Redirect handle",
"Get current directory",
"Alloate memory block",
"Release memory block",
"Resize memory block",
"Execute program (exec)",
"Terminate process with return code",
"Get return code",
"Find first file",
"Find next file",
"Reserved",
"Reserved",
"Reserved",
"Reserved",
"Get verify flag",
"Reserved",
"Rename file",
"Get or set file date & time",
"Get or set allocation strategy",
"Get extended error information",
"Create temporary file",
"Create new file",
"Lock or unlock file region",
"Reserved",
"Get machine name",
"Device redirection",
"Reserved",
"Reserved",
"Get PSP address",
"Get DBCS lead byte table",
"Reserved",
"Get extended country information",
"Get or set code page",
"Set handle count",
"Commit file",
"Reserved",
"Reserved",
"Reserved",
"Extended open file"
};
static const char *intOthers[] = {
"Exit", /* 0x20 */
"", /* other table */
"Terminate handler address", /* 0x22 */
"Ctrl-C handler address", /* 0x23 */
"Critical-error handler address", /* 0x24 */
"Absolute disk read", /* 0x25 */
"Absolute disk write", /* 0x26 */
"Terminate and stay resident", /* 0x27 */
"Reserved", /* 0x28 */
"Reserved", /* 0x29 */
"Reserved", /* 0x2A */
"Reserved", /* 0x2B */
"Reserved", /* 0x2C */
"Reserved", /* 0x2D */
"Reserved" /* 0x2E */
};
/* Writes the description of the current interrupt. Appends it to the
* string s. */
void ICODE::writeIntComment (char *s)
{
char *t;
t = (char *)allocMem(intSize * sizeof(char));
if (ic.ll.immed.op == 0x21)
{ sprintf (t, "\t/* %s */\n", int21h[ic.ll.dst.off]);
strcat (s, t);
}
else if (ic.ll.immed.op > 0x1F && ic.ll.immed.op < 0x2F)
{
sprintf (t, "\t/* %s */\n", intOthers[ic.ll.immed.op - 0x20]);
strcat (s, t);
}
else if (ic.ll.immed.op == 0x2F)
switch (ic.ll.dst.off)
{
case 0x01 : strcat (s, "\t/* Print spooler */\n");
break;
case 0x02: strcat (s, "\t/* Assign */\n");
break;
case 0x10: strcat (s, "\t/* Share */\n");
break;
case 0xB7: strcat (s, "\t/* Append */\n");
}
else
strcat (s, "\n");
}
//, &cCode.decl
void Function::writeProcComments()
{
int i;
ID *id; /* Pointer to register argument identifier */
STKSYM * psym; /* Pointer to register argument symbol */
/* About the parameters */
if (this->cbParam)
cCode.appendDecl("/* Takes %d bytes of parameters.\n",this->cbParam);
else if (this->flg & REG_ARGS)
{
cCode.appendDecl("/* Uses register arguments:\n");
for (i = 0; i < this->args.numArgs; i++)
{
psym = &this->args.sym[i];
if (psym->regs->expr.ident.idType == REGISTER)
{
id = &this->localId.id_arr[psym->regs->expr.ident.idNode.regiIdx];
if (psym->regs->expr.ident.regiType == WORD_REG)
cCode.appendDecl(" * %s = %s.\n", psym->name,
wordReg[id->id.regi - rAX]);
else /* BYTE_REG */
cCode.appendDecl(" * %s = %s.\n", psym->name,
byteReg[id->id.regi - rAL]);
}
else /* long register */
{
id = &this->localId.id_arr[psym->regs->expr.ident.idNode.longIdx];
cCode.appendDecl(" * %s = %s:%s.\n", psym->name,
wordReg[id->id.longId.h - rAX],
wordReg[id->id.longId.l - rAX]);
}
}
}
else
cCode.appendDecl("/* Takes no parameters.\n");
/* Type of procedure */
if (this->flg & PROC_RUNTIME)
cCode.appendDecl(" * Runtime support routine of the compiler.\n");
if (this->flg & PROC_IS_HLL)
cCode.appendDecl(" * High-level language prologue code.\n");
if (this->flg & PROC_ASM)
{
cCode.appendDecl(" * Untranslatable routine. Assembler provided.\n");
if (this->flg & PROC_IS_FUNC)
switch (this->retVal.type) {
case TYPE_BYTE_SIGN: case TYPE_BYTE_UNSIGN:
cCode.appendDecl(" * Return value in register al.\n");
break;
case TYPE_WORD_SIGN: case TYPE_WORD_UNSIGN:
cCode.appendDecl(" * Return value in register ax.\n");
break;
case TYPE_LONG_SIGN: case TYPE_LONG_UNSIGN:
cCode.appendDecl(" * Return value in registers dx:ax.\n");
break;
} /* eos */
}
/* Calling convention */
if (this->flg & CALL_PASCAL)
cCode.appendDecl(" * Pascal calling convention.\n");
else if (this->flg & CALL_C)
cCode.appendDecl(" * C calling convention.\n");
else if (this->flg & CALL_UNKNOWN)
cCode.appendDecl(" * Unknown calling convention.\n");
/* Other flags */
if (this->flg & (PROC_BADINST | PROC_IJMP))
cCode.appendDecl(" * Incomplete due to an %s.\n",
(this->flg & PROC_BADINST)? "untranslated opcode":
"indirect JMP");
if (this->flg & PROC_ICALL)
cCode.appendDecl(" * Indirect call procedure.\n");
if (this->flg & IMPURE)
cCode.appendDecl(" * Contains impure code.\n");
if (this->flg & NOT_HLL)
cCode.appendDecl(" * Contains instructions not normally used by compilers.\n");
if (this->flg & FLOAT_OP)
cCode.appendDecl(" * Contains coprocessor instructions.\n");
/* Graph reducibility */
if (this->flg & GRAPH_IRRED)
cCode.appendDecl(" * Irreducible control flow graph.\n");
cCode.appendDecl(" */\n{\n");
}

692
src/control.cpp Normal file
View File

@@ -0,0 +1,692 @@
/*********************************************************************
* Description : Performs control flow analysis on the CFG
* (C) Cristina Cifuentes
********************************************************************/
#include <algorithm>
#include <list>
#include <cassert>
#include "dcc.h"
#include <stdio.h>
#include <string.h>
#if __BORLAND__
#include <alloc.h>
#else
#include <malloc.h>
#endif
//typedef struct list {
// Int nodeIdx;
// struct list *next;
//} nodeList;
typedef std::list<Int> nodeList; /* dfsLast index to the node */
#define ancestor(a,b) ((a->dfsLastNum < b->dfsLastNum) && (a->dfsFirstNum < b->dfsFirstNum))
/* there is a path on the DFST from a to b if the a was first visited in a
* dfs, and a was later visited than b when doing the last visit of each
* node. */
/* Checks if the edge (p,s) is a back edge. If node s was visited first
* during the dfs traversal (ie. s has a smaller dfsFirst number) or s == p,
* then it is a backedge.
* Also incrementes the number of backedges entries to the header node. */
static boolT isBackEdge (BB * p,BB * s)
{
if (p->dfsFirstNum >= s->dfsFirstNum)
{
s->numBackEdges++;
return (TRUE);
}
return (FALSE);
}
static Int commonDom (Int currImmDom, Int predImmDom, Function * pProc)
/* Finds the common dominator of the current immediate dominator
* currImmDom and its predecessor's immediate dominator predImmDom */
{
if (currImmDom == NO_DOM)
return (predImmDom);
if (predImmDom == NO_DOM) /* predecessor is the root */
return (currImmDom);
while ((currImmDom != NO_DOM) && (predImmDom != NO_DOM) &&
(currImmDom != predImmDom))
{
if (currImmDom < predImmDom)
predImmDom = pProc->dfsLast[predImmDom]->immedDom;
else
currImmDom = pProc->dfsLast[currImmDom]->immedDom;
}
return (currImmDom);
}
/* Finds the immediate dominator of each node in the graph pProc->cfg.
* Adapted version of the dominators algorithm by Hecht and Ullman; finds
* immediate dominators only.
* Note: graph should be reducible */
void Function::findImmedDom ()
{
BB * currNode;
Int currIdx, j, predIdx;
for (currIdx = 0; currIdx < numBBs; currIdx++)
{
currNode = dfsLast[currIdx];
if (currNode->flg & INVALID_BB) /* Do not process invalid BBs */
continue;
for (j = 0; j < currNode->inEdges.size(); j++)
{
BB* inedge=currNode->inEdges[j];
predIdx = inedge->dfsLastNum;
if (predIdx < currIdx)
currNode->immedDom = commonDom (currNode->immedDom,
predIdx, this);
}
}
}
/* Inserts the node n to the list l. */
static void insertList (nodeList &l, Int n)
{
l.push_back(n);
}
/* Returns whether or not the node n (dfsLast numbering of a basic block)
* is on the list l. */
static boolT inList (nodeList &l, Int n)
{
return std::find(l.begin(),l.end(),n)!=l.end();
}
/* Frees space allocated by the list l. */
static void freeList (nodeList &l)
{
l.clear();
}
/* Returns whether the node n belongs to the queue list q. */
static boolT inInt(BB * n, queue &q)
{
return std::find(q.begin(),q.end(),n)!=q.end();
}
/* Finds the follow of the endless loop headed at node head (if any).
* The follow node is the closest node to the loop. */
static void findEndlessFollow (Function * pProc, nodeList &loopNodes, BB * head)
{
Int j, succ;
head->loopFollow = MAX;
nodeList::iterator p = loopNodes.begin();
for( ;p != loopNodes.end();++p)
{
for (j = 0; j < pProc->dfsLast[*p]->numOutEdges; j++)
{
succ = pProc->dfsLast[*p]->edges[j].BBptr->dfsLastNum;
if ((! inList(loopNodes, succ)) && (succ < head->loopFollow))
head->loopFollow = succ;
}
}
}
//static void findNodesInLoop(BB * latchNode,BB * head,PPROC pProc,queue *intNodes)
/* Flags nodes that belong to the loop determined by (latchNode, head) and
* determines the type of loop. */
static void findNodesInLoop(BB * latchNode,BB * head,Function * pProc,queue &intNodes)
{
Int i, headDfsNum, intNodeType;
nodeList loopNodes;
Int immedDom, /* dfsLast index to immediate dominator */
thenDfs, elseDfs; /* dsfLast index for THEN and ELSE nodes */
BB * pbb;
/* Flag nodes in loop headed by head (except header node) */
headDfsNum = head->dfsLastNum;
head->loopHead = headDfsNum;
insertList (loopNodes, headDfsNum);
for (i = headDfsNum + 1; i < latchNode->dfsLastNum; i++)
{
if (pProc->dfsLast[i]->flg & INVALID_BB) /* skip invalid BBs */
continue;
immedDom = pProc->dfsLast[i]->immedDom;
if (inList (loopNodes, immedDom) && inInt(pProc->dfsLast[i], intNodes))
{
insertList (loopNodes, i);
if (pProc->dfsLast[i]->loopHead == NO_NODE)/*not in other loop*/
pProc->dfsLast[i]->loopHead = headDfsNum;
}
}
latchNode->loopHead = headDfsNum;
if (latchNode != head)
insertList (loopNodes, latchNode->dfsLastNum);
/* Determine type of loop and follow node */
intNodeType = head->nodeType;
if (latchNode->nodeType == TWO_BRANCH)
if ((intNodeType == TWO_BRANCH) || (latchNode == head))
if ((latchNode == head) ||
(inList (loopNodes, head->edges[THEN].BBptr->dfsLastNum) &&
inList (loopNodes, head->edges[ELSE].BBptr->dfsLastNum)))
{
head->loopType = REPEAT_TYPE;
if (latchNode->edges[0].BBptr == head)
head->loopFollow = latchNode->edges[ELSE].BBptr->dfsLastNum;
else
head->loopFollow = latchNode->edges[THEN].BBptr->dfsLastNum;
pProc->Icode.SetLlFlag(latchNode->start + latchNode->length - 1,JX_LOOP);
}
else
{
head->loopType = WHILE_TYPE;
if (inList (loopNodes, head->edges[THEN].BBptr->dfsLastNum))
head->loopFollow = head->edges[ELSE].BBptr->dfsLastNum;
else
head->loopFollow = head->edges[THEN].BBptr->dfsLastNum;
pProc->Icode.SetLlFlag(head->start + head->length - 1, JX_LOOP);
}
else /* head = anything besides 2-way, latch = 2-way */
{
head->loopType = REPEAT_TYPE;
if (latchNode->edges[THEN].BBptr == head)
head->loopFollow = latchNode->edges[ELSE].BBptr->dfsLastNum;
else
head->loopFollow = latchNode->edges[THEN].BBptr->dfsLastNum;
pProc->Icode.SetLlFlag(latchNode->start + latchNode->length - 1,
JX_LOOP);
}
else /* latch = 1-way */
if (latchNode->nodeType == LOOP_NODE)
{
head->loopType = REPEAT_TYPE;
head->loopFollow = latchNode->edges[0].BBptr->dfsLastNum;
}
else if (intNodeType == TWO_BRANCH)
{
head->loopType = WHILE_TYPE;
pbb = latchNode;
thenDfs = head->edges[THEN].BBptr->dfsLastNum;
elseDfs = head->edges[ELSE].BBptr->dfsLastNum;
while (1)
{
if (pbb->dfsLastNum == thenDfs)
{
head->loopFollow = elseDfs;
break;
}
else if (pbb->dfsLastNum == elseDfs)
{
head->loopFollow = thenDfs;
break;
}
/* Check if couldn't find it, then it is a strangely formed
* loop, so it is safer to consider it an endless loop */
if (pbb->dfsLastNum <= head->dfsLastNum)
{
head->loopType = ENDLESS_TYPE;
findEndlessFollow (pProc, loopNodes, head);
break;
}
pbb = pProc->dfsLast[pbb->immedDom];
}
if (pbb->dfsLastNum > head->dfsLastNum)
pProc->dfsLast[head->loopFollow]->loopHead = NO_NODE; /*****/
pProc->Icode.SetLlFlag(head->start + head->length - 1, JX_LOOP);
}
else
{
head->loopType = ENDLESS_TYPE;
findEndlessFollow (pProc, loopNodes, head);
}
freeList(loopNodes);
}
//static void findNodesInInt (queue **intNodes, Int level, interval *Ii)
/* Recursive procedure to find nodes that belong to the interval (ie. nodes
* from G1). */
static void findNodesInInt (queue &intNodes, Int level, interval *Ii)
{
if (level == 1)
{
std::for_each(Ii->nodes.begin(),Ii->nodes.end(),[&intNodes](BB *en)->void {
appendQueue(intNodes,en);
});
}
else
std::for_each(Ii->nodes.begin(),Ii->nodes.end(),[&intNodes,level](BB *en)->void {
findNodesInInt(intNodes,level-1,en->correspInt);
});
}
/* Algorithm for structuring loops */
static void structLoops(Function * pProc, derSeq *derivedG)
{
interval *Ii;
BB * intHead, /* interval header node */
* pred, /* predecessor node */
* latchNode;/* latching node (in case of loops) */
Int i, /* counter */
level = 0; /* derived sequence level */
interval *initInt; /* initial interval */
queue intNodes; /* list of interval nodes */
/* Structure loops */
/* for all derived sequences Gi */
for(derSeq::iterator iter=derivedG->begin(); iter!=derivedG->end(); ++iter)
{
level++;
Ii = iter->Ii;
while (Ii) /* for all intervals Ii of Gi */
{
latchNode = NULL;
intNodes.clear();
/* Find interval head (original BB node in G1) and create
* list of nodes of interval Ii. */
initInt = Ii;
for (i = 1; i < level; i++)
initInt = (*initInt->nodes.begin())->correspInt;
intHead = *initInt->nodes.begin();
/* Find nodes that belong to the interval (nodes from G1) */
findNodesInInt (intNodes, level, Ii);
/* Find greatest enclosing back edge (if any) */
assert(intHead->numInEdges==intHead->inEdges.size());
for (i = 0; i < intHead->inEdges.size(); i++)
{
pred = intHead->inEdges[i];
if (inInt(pred, intNodes) && isBackEdge(pred, intHead))
if (! latchNode)
latchNode = pred;
else
{
if (pred->dfsLastNum > latchNode->dfsLastNum)
latchNode = pred;
}
}
/* Find nodes in the loop and the type of loop */
if (latchNode)
{
/* Check latching node is at the same nesting level of case
* statements (if any) and that the node doesn't belong to
* another loop. */
if ((latchNode->caseHead == intHead->caseHead) &&
(latchNode->loopHead == NO_NODE))
{
intHead->latchNode = latchNode->dfsLastNum;
findNodesInLoop(latchNode, intHead, pProc, intNodes);
latchNode->flg |= IS_LATCH_NODE;
}
}
/* Next interval */
Ii = Ii->next;
}
/* Next derived sequence */
}
}
static boolT successor (Int s, Int h, Function * pProc)
/* Returns whether the BB indexed by s is a successor of the BB indexed by
* h. Note that h is a case node. */
{ Int i;
BB * header;
header = pProc->dfsLast[h];
for (i = 0; i < header->numOutEdges; i++)
if (header->edges[i].BBptr->dfsLastNum == s)
return (TRUE);
return (FALSE);
}
static void tagNodesInCase (BB * pBB, nodeList &l, Int head, Int tail)
/* Recursive procedure to tag nodes that belong to the case described by
* the list l, head and tail (dfsLast index to first and exit node of the
* case). */
{ Int current, /* index to current node */
i;
pBB->traversed = DFS_CASE;
current = pBB->dfsLastNum;
if ((current != tail) && (pBB->nodeType != MULTI_BRANCH) && (inList (l, pBB->immedDom)))
{
insertList (l, current);
pBB->caseHead = head;
for (i = 0; i < pBB->numOutEdges; i++)
if (pBB->edges[i].BBptr->traversed != DFS_CASE)
tagNodesInCase (pBB->edges[i].BBptr, l, head, tail);
}
}
static void structCases(Function * pProc)
/* Structures case statements. This procedure is invoked only when pProc
* has a case node. */
{ Int i, j;
BB * caseHeader; /* case header node */
Int exitNode = NO_NODE; /* case exit node */
nodeList caseNodes; /* temporary: list of nodes in case */
/* Linear scan of the nodes in reverse dfsLast order, searching for
* case nodes */
for (i = pProc->numBBs - 1; i >= 0; i--)
if (pProc->dfsLast[i]->nodeType == MULTI_BRANCH)
{
caseHeader = pProc->dfsLast[i];
/* Find descendant node which has as immediate predecessor
* the current header node, and is not a successor. */
for (j = i + 2; j < pProc->numBBs; j++)
{
if ((!successor(j, i, pProc)) &&
(pProc->dfsLast[j]->immedDom == i))
if (exitNode == NO_NODE)
exitNode = j;
else if (pProc->dfsLast[exitNode]->numInEdges <
pProc->dfsLast[j]->numInEdges)
exitNode = j;
}
pProc->dfsLast[i]->caseTail = exitNode;
/* Tag nodes that belong to the case by recording the
* header field with caseHeader. */
insertList (caseNodes, i);
pProc->dfsLast[i]->caseHead = i;
for (j = 0; j < caseHeader->numOutEdges; j++)
tagNodesInCase (caseHeader->edges[j].BBptr, caseNodes, i,
exitNode);
if (exitNode != NO_NODE)
pProc->dfsLast[exitNode]->caseHead = i;
}
}
/* Flags all nodes in the list l as having follow node f, and deletes all
* nodes from the list. */
static void flagNodes (nodeList &l, Int f, Function * pProc)
{
nodeList::iterator p;
p = l.begin();
while (p!=l.end())
{
pProc->dfsLast[*p]->ifFollow = f;
p = l.erase(p);
}
}
static void structIfs (Function * pProc)
/* Structures if statements */
{ Int curr, /* Index for linear scan of nodes */
desc, /* Index for descendant */
followInEdges, /* Largest # in-edges so far */
follow; /* Possible follow node */
nodeList domDesc, /* List of nodes dominated by curr */
unresolved, /* List of unresolved if nodes */
*l; /* Temporary list */
BB * currNode, /* Pointer to current node */
* pbb;
/* Linear scan of nodes in reverse dfsLast order */
for (curr = pProc->numBBs - 1; curr >= 0; curr--)
{
currNode = pProc->dfsLast[curr];
if (currNode->flg & INVALID_BB) /* Do not process invalid BBs */
continue;
if ((currNode->nodeType == TWO_BRANCH) &&
(! (pProc->Icode.GetLlFlag(currNode->start + currNode->length - 1)
& JX_LOOP)))
{
followInEdges = 0;
follow = 0;
/* Find all nodes that have this node as immediate dominator */
for (desc = curr+1; desc < pProc->numBBs; desc++)
{
if (pProc->dfsLast[desc]->immedDom == curr) {
insertList (domDesc, desc);
pbb = pProc->dfsLast[desc];
if ((pbb->numInEdges - pbb->numBackEdges) >= followInEdges)
{
follow = desc;
followInEdges = pbb->numInEdges - pbb->numBackEdges;
}
}
}
/* Determine follow according to number of descendants
* immediately dominated by this node */
if ((follow != 0) && (followInEdges > 1))
{
currNode->ifFollow = follow;
if (!unresolved.empty())
flagNodes (unresolved, follow, pProc);
}
else
insertList (unresolved, curr);
}
freeList (domDesc);
}
}
/* Checks for compound conditions of basic blocks that have only 1 high
* level instruction. Whenever these blocks are found, they are merged
* into one block with the appropriate condition */
void Function::compoundCond()
{
Int i, j, k, numOutEdges;
BB * pbb, * t, * e, * obb,* pred;
ICODE * picode, * ticode;
COND_EXPR *exp;
TYPEADR_TYPE *edges;
boolT change;
change = TRUE;
while (change)
{
change = FALSE;
/* Traverse nodes in postorder, this way, the header node of a
* compound condition is analysed first */
for (i = 0; i < this->numBBs; i++)
{
pbb = this->dfsLast[i];
if (pbb->flg & INVALID_BB)
continue;
if (pbb->nodeType == TWO_BRANCH)
{
t = pbb->edges[THEN].BBptr;
e = pbb->edges[ELSE].BBptr;
/* Check (X || Y) case */
if ((t->nodeType == TWO_BRANCH) && (t->numHlIcodes == 1) &&
(t->numInEdges == 1) && (t->edges[ELSE].BBptr == e))
{
obb = t->edges[THEN].BBptr;
/* Construct compound DBL_OR expression */
picode = this->Icode.GetIcode(pbb->start + pbb->length -1);
ticode = this->Icode.GetIcode(t->start + t->length -1);
exp = COND_EXPR::boolOp (picode->ic.hl.oper.exp,
ticode->ic.hl.oper.exp, DBL_OR);
picode->ic.hl.oper.exp = exp;
/* Replace in-edge to obb from t to pbb */
for (j = 0; j < obb->numInEdges; j++)
if (obb->inEdges[j] == t)
{
obb->inEdges[j] = pbb;
break;
}
/* New THEN out-edge of pbb */
pbb->edges[THEN].BBptr = obb;
/* Remove in-edge t to e */
auto iter=std::find(e->inEdges.begin(),e->inEdges.end(),t);
assert(iter!=e->inEdges.end());
e->inEdges.erase(iter);
e->numInEdges--; /* looses 1 arc */
assert(e->numInEdges==e->inEdges.size());
t->flg |= INVALID_BB;
if (pbb->flg & IS_LATCH_NODE)
this->dfsLast[t->dfsLastNum] = pbb;
else
i--; /* to repeat this analysis */
change = TRUE;
}
/* Check (!X && Y) case */
else if ((t->nodeType == TWO_BRANCH) && (t->numHlIcodes == 1) &&
(t->numInEdges == 1) && (t->edges[THEN].BBptr == e))
{
obb = t->edges[ELSE].BBptr;
/* Construct compound DBL_AND expression */
picode = this->Icode.GetIcode(pbb->start + pbb->length -1);
ticode = this->Icode.GetIcode(t->start + t->length -1);
inverseCondOp (&picode->ic.hl.oper.exp);
exp = COND_EXPR::boolOp (picode->ic.hl.oper.exp,
ticode->ic.hl.oper.exp, DBL_AND);
picode->ic.hl.oper.exp = exp;
/* Replace in-edge to obb from t to pbb */
auto iter=std::find(obb->inEdges.begin(),obb->inEdges.end(),t);
assert(iter!=obb->inEdges.end());
*iter=pbb;
/* New THEN and ELSE out-edges of pbb */
pbb->edges[THEN].BBptr = e;
pbb->edges[ELSE].BBptr = obb;
/* Remove in-edge t to e */
iter=std::find(e->inEdges.begin(),e->inEdges.end(),t);
assert(iter!=e->inEdges.end());
e->inEdges.erase(iter); /* looses 1 arc */
e->numInEdges--; /* looses 1 arc */
assert(t->inEdges.size()==t->numInEdges);
t->flg |= INVALID_BB;
if (pbb->flg & IS_LATCH_NODE)
this->dfsLast[t->dfsLastNum] = pbb;
else
i--; /* to repeat this analysis */
change = TRUE;
}
/* Check (X && Y) case */
else if ((e->nodeType == TWO_BRANCH) && (e->numHlIcodes == 1) &&
(e->numInEdges == 1) && (e->edges[THEN].BBptr == t))
{
obb = e->edges[ELSE].BBptr;
/* Construct compound DBL_AND expression */
picode = this->Icode.GetIcode(pbb->start + pbb->length -1);
ticode = this->Icode.GetIcode(t->start + t->length -1);
exp = COND_EXPR::boolOp (picode->ic.hl.oper.exp,
ticode->ic.hl.oper.exp, DBL_AND);
picode->ic.hl.oper.exp = exp;
/* Replace in-edge to obb from e to pbb */
auto iter = std::find(obb->inEdges.begin(),obb->inEdges.end(),e);
assert(iter!=obb->inEdges.end());
*iter=pbb;
/* New ELSE out-edge of pbb */
pbb->edges[ELSE].BBptr = obb;
/* Remove in-edge e to t */
iter = std::find(t->inEdges.begin(),t->inEdges.end(),e);
assert(iter!=t->inEdges.end());
t->inEdges.erase(iter);
t->numInEdges--; /* looses 1 arc */
assert(t->inEdges.size()==t->numInEdges);
e->flg |= INVALID_BB;
if (pbb->flg & IS_LATCH_NODE)
this->dfsLast[e->dfsLastNum] = pbb;
else
i--; /* to repeat this analysis */
change = TRUE;
}
/* Check (!X || Y) case */
else if ((e->nodeType == TWO_BRANCH) && (e->numHlIcodes == 1) &&
(e->numInEdges == 1) && (e->edges[ELSE].BBptr == t))
{
obb = e->edges[THEN].BBptr;
/* Construct compound DBL_OR expression */
picode = this->Icode.GetIcode(pbb->start + pbb->length -1);
ticode = this->Icode.GetIcode(t->start + t->length -1);
inverseCondOp (&picode->ic.hl.oper.exp);
exp = COND_EXPR::boolOp (picode->ic.hl.oper.exp,
ticode->ic.hl.oper.exp, DBL_OR);
picode->ic.hl.oper.exp = exp;
/* Replace in-edge to obb from e to pbb */
assert(obb->numInEdges==obb->inEdges.size());
auto iter = std::find(obb->inEdges.begin(),obb->inEdges.end(),e);
assert(iter!=obb->inEdges.end());
*iter=pbb;
/* New THEN and ELSE out-edges of pbb */
pbb->edges[THEN].BBptr = obb;
pbb->edges[ELSE].BBptr = t;
/* Remove in-edge e to t */
iter = std::find(t->inEdges.begin(),t->inEdges.end(),e);
assert(iter!=t->inEdges.end());
t->inEdges.erase(iter);
t->numInEdges--; /* looses 1 arc */
assert(t->numInEdges=t->inEdges.size());
e->flg |= INVALID_BB;
if (pbb->flg & IS_LATCH_NODE)
this->dfsLast[e->dfsLastNum] = pbb;
else
i--; /* to repeat this analysis */
change = TRUE;
}
}
}
}
}
void Function::structure(derSeq *derivedG)
/* Structuring algorithm to find the structures of the graph pProc->cfg */
{
/* Find immediate dominators of the graph */
findImmedDom();
if (hasCase)
structCases(this);
structLoops(this, derivedG);
structIfs(this);
}

1099
src/dataflow.cpp Normal file

File diff suppressed because it is too large Load Diff

163
src/dcc.cpp Normal file
View File

@@ -0,0 +1,163 @@
/*****************************************************************************
* dcc decompiler
* Reads the command line switches and then executes each major section in turn
* (C) Cristina Cifuentes
****************************************************************************/
#include "dcc.h"
#include <string.h>
#ifdef __UNIX__
//#include <unistd.h>
#else
#include <stdio.h>
#include <io.h> /* For unlink() */
#endif
/* Global variables - extern to other modules */
char *progname; /* argv[0] - for error msgs */
char *asm1_name, *asm2_name; /* Assembler output filenames */
SYMTAB symtab; /* Global symbol table */
STATS stats; /* cfg statistics */
PROG prog; /* programs fields */
OPTION option; /* Command line options */
//Function * pProcList; /* List of procedures, topologically sort */
//Function * pLastProc; /* Pointer to last node in procedure list */
std::list<Function> pProcList;
CALL_GRAPH *callGraph; /* Call graph of the program */
static char *initargs(int argc, char *argv[]);
static void displayTotalStats();
/****************************************************************************
* main
***************************************************************************/
int main(int argc, char *argv[])
{
/* Extract switches and filename */
strcpy(option.filename, initargs(argc, argv));
/* Front end reads in EXE or COM file, parses it into I-code while
* building the call graph and attaching appropriate bits of code for
* each procedure.
*/
FrontEnd (option.filename, &callGraph);
/* In the middle is a so called Universal Decompiling Machine.
* It processes the procedure list and I-code and attaches where it can
* to each procedure an optimised cfg and ud lists
*/
udm();
/* Back end converts each procedure into C using I-code, interval
* analysis, data flow etc. and outputs it to output file ready for
* re-compilation.
*/
BackEnd(option.filename, callGraph);
callGraph->write();
if (option.Stats)
displayTotalStats();
/*
freeDataStructures(pProcList);
*/
return 0;
}
/****************************************************************************
* initargs - Extract command line arguments
***************************************************************************/
static char *initargs(int argc, char *argv[])
{
char *pc;
progname = *argv; /* Save invocation name for error messages */
while (--argc > 0 && (*++argv)[0] == '-') {
for (pc = argv[0]+1; *pc; pc++)
switch (*pc) {
case 'a': /* Print assembler listing */
if (*(pc+1) == '2')
option.asm2 = TRUE;
else
option.asm1 = TRUE;
if (*(pc+1) == '1' || *(pc+1) == '2')
pc++;
break;
case 'c':
option.Calls = TRUE;
break;
case 'i':
option.Interact = TRUE;
break;
case 'm': /* Print memory map */
option.Map = TRUE;
break;
case 's': /* Print Stats */
option.Stats = TRUE;
break;
case 'V': /* Very verbose => verbose */
option.VeryVerbose = TRUE;
case 'v': /* Make everything verbose */
option.verbose = TRUE;
break;
case 'o': /* assembler output file */
if (*(pc+1)) {
asm1_name = asm2_name = pc+1;
goto NextArg;
}
else if (--argc > 0) {
asm1_name = asm2_name = *++argv;
goto NextArg;
}
default:
fatalError(INVALID_ARG, *pc);
return *argv;
}
NextArg:;
}
if (argc == 1)
{
if (option.asm1 || option.asm2)
{
if (! asm1_name)
{
asm1_name = strcpy((char*)allocMem(strlen(*argv)+4), *argv);
pc = strrchr(asm1_name, '.');
if (pc > strrchr(asm1_name, '/'))
{
*pc = '\0';
}
asm2_name = (char*)allocMem(strlen(asm1_name)+4) ;
strcat(strcpy(asm2_name, asm1_name), ".a2");
unlink(asm2_name);
strcat(asm1_name, ".a1");
}
unlink(asm1_name); /* Remove asm output files */
}
return *argv; /* filename of the program to decompile */
}
fatalError(USAGE);
return *argv;
}
static void
displayTotalStats ()
/* Displays final statistics for the complete program */
{
printf ("\nFinal Program Statistics\n");
printf (" Total number of low-level Icodes : %ld\n", stats.totalLL);
printf (" Total number of high-level Icodes: %ld\n", stats.totalHL);
printf (" Total reduction of instructions : %2.2f%%\n", 100.0 -
(stats.totalHL * 100.0) / stats.totalLL);
}

1613
src/disassem.cpp Normal file

File diff suppressed because it is too large Load Diff

86
src/error.cpp Normal file
View File

@@ -0,0 +1,86 @@
/****************************************************************************
* dcc project error messages
* (C) Cristina Cifuentes
***************************************************************************/
#include "dcc.h"
#include <stdio.h>
#include <stdlib.h>
//#ifndef __UNIX__
#if 1
#include <stdarg.h>
#else
#include <varargs.h>
#endif
static const char *errorMessage[] = {
"Invalid option -%c\n", /* INVALID_ARG */
"Invalid instruction %02X at location %06lX\n", /* INVALID_OPCODE */
"Don't understand 80386 instruction %02X at location %06lX\n",
/* INVALID_386OP */
"Segment override with no memory operand at location %06lX\n",
/* FUNNY_SEGOVR */
"REP prefix without a string instruction at location %06lX\n",/* FUNNY_REP */
"Cannot open %s\n", /* CANNOT_OPEN */
"Error while reading %s\n", /* CANNOT_READ */
"malloc of %ld bytes failed\n", /* MALLOC_FAILED */
"Don't understand new EXE format\n", /* NEWEXE_FORMAT */
"Failed to find a BB for jump to %ld in proc %s\n", /* NO_BB */
"Basic Block is a synthetic jump\n", /* INVALID_SYNTHETIC_BB */
"Failed to find a BB for interval\n", /* INVALID_INT_BB */
"Instruction at location %06lX goes beyond loaded image\n",
/* IP_OUT_OF_RANGE*/
"Definition not found for condition code usage at opcode %d\n",
/* DEF_NOT_FOUND */
"JX use, definition not supported at opcode #%d\n", /* JX_NOT_DEF */
"Def - use not supported. Def op = %d, use op = %d.\n", /* NOT_DEF_USE */
"Failed to construct repeat..until() condition.\n", /* REPEAT_FAIL */
"Failed to construct while() condition.\n", /* WHILE_FAIL */
};
/****************************************************************************
fatalError: displays error message and exits the program.
****************************************************************************/
void fatalError(Int errId, ...)
{ va_list args;
//#ifdef __UNIX__ /* ultrix */
#if 0
Int errId;
va_start(args);
errId = va_arg(args, Int);
#else
va_start(args, errId);
#endif
if (errId == USAGE)
fprintf(stderr,"Usage: dcc [-a1a2cmpsvVi][-o asmfile] DOS_executable\n");
else {
fprintf(stderr, "dcc: ");
vfprintf(stderr, errorMessage[errId - 1], args);
}
va_end(args);
exit((int)errId);
}
/****************************************************************************
reportError: reports the warning/error and continues with the program.
****************************************************************************/
void reportError(Int errId, ...)
{ va_list args;
//#ifdef __UNIX__ /* ultrix */
#if 0
Int errId;
va_start(args);
errId = va_arg(args, Int);
#else /* msdos or windows*/
va_start(args, errId);
#endif
fprintf(stderr, "dcc: ");
vfprintf(stderr, errorMessage[errId - 1], args);
va_end(args);
}

521
src/fixwild.cpp Normal file
View File

@@ -0,0 +1,521 @@
/*
* Fix Wildcards
* (C) Mike van Emmerik
*/
/* * * * * * * * * * * * *\
* *
* Fix Wild Cards Code *
* *
\* * * * * * * * * * * * */
#include <memory.h>
#ifndef PATLEN
#define PATLEN 23
#define WILD 0xF4
#endif
#ifndef bool
#define bool unsigned char
#define TRUE 1
#define FALSE 0
#define byte unsigned char
#endif
static int pc; /* Indexes into pat[] */
/* prototypes */
static bool ModRM(byte pat[]); /* Handle the mod/rm byte */
static bool TwoWild(byte pat[]); /* Make the next 2 bytes wild */
static bool FourWild(byte pat[]); /* Make the next 4 bytes wild */
void fixWildCards(byte pat[]); /* Main routine */
/* Handle the mod/rm case. Returns true if pattern exhausted */
static bool ModRM(byte pat[])
{
byte op;
/* A standard mod/rm byte follows opcode */
op = pat[pc++]; /* The mod/rm byte */
if (pc >= PATLEN) return TRUE; /* Skip Mod/RM */
switch (op & 0xC0)
{
case 0x00: /* [reg] or [nnnn] */
if ((op & 0xC7) == 6)
{
/* Uses [nnnn] address mode */
pat[pc++] = WILD;
if (pc >= PATLEN) return TRUE;
pat[pc++] = WILD;
if (pc >= PATLEN) return TRUE;
}
break;
case 0x40: /* [reg + nn] */
if ((pc+=1) >= PATLEN) return TRUE;
break;
case 0x80: /* [reg + nnnn] */
/* Possibly just a long constant offset from a register,
but often will be an index from a variable */
pat[pc++] = WILD;
if (pc >= PATLEN) return TRUE;
pat[pc++] = WILD;
if (pc >= PATLEN) return TRUE;
break;
case 0xC0: /* reg */
break;
}
return FALSE;
}
/* Change the next two bytes to wild cards */
static bool
TwoWild(byte pat[])
{
pat[pc++] = WILD;
if (pc >= PATLEN) return TRUE; /* Pattern exhausted */
pat[pc++] = WILD;
if (pc >= PATLEN) return TRUE;
return FALSE;
}
/* Change the next four bytes to wild cards */
static bool
FourWild(byte pat[])
{
TwoWild(pat);
return TwoWild(pat);
}
/* Chop from the current point by wiping with zeroes. Can't rely on anything
after this point */
static void
chop(byte pat[])
{
if (pc >= PATLEN) return; /* Could go negative otherwise */
memset(&pat[pc], 0, PATLEN - pc);
}
static bool
op0F(byte pat[])
{
/* The two byte opcodes */
byte op = pat[pc++];
switch (op & 0xF0)
{
case 0x00: /* 00 - 0F */
if (op >= 0x06) /* Clts, Invd, Wbinvd */
return FALSE;
else
{
/* Grp 6, Grp 7, LAR, LSL */
return ModRM(pat);
}
case 0x20: /* Various funnies, all with Mod/RM */
return ModRM(pat);
case 0x80:
pc += 2; /* Word displacement cond jumps */
return FALSE;
case 0x90: /* Byte set on condition */
return ModRM(pat);
case 0xA0:
switch (op)
{
case 0xA0: /* Push FS */
case 0xA1: /* Pop FS */
case 0xA8: /* Push GS */
case 0xA9: /* Pop GS */
return FALSE;
case 0xA3: /* Bt Ev,Gv */
case 0xAB: /* Bts Ev,Gv */
return ModRM(pat);
case 0xA4: /* Shld EvGbIb */
case 0xAC: /* Shrd EvGbIb */
if (ModRM(pat)) return TRUE;
pc++; /* The #num bits to shift */
return FALSE;
case 0xA5: /* Shld EvGb CL */
case 0xAD: /* Shrd EvGb CL */
return ModRM(pat);
default: /* CmpXchg, Imul */
return ModRM(pat);
}
case 0xB0:
if (op == 0xBA)
{
/* Grp 8: bt/bts/btr/btc Ev,#nn */
if (ModRM(pat)) return TRUE;
pc++; /* The #num bits to shift */
return FALSE;
}
return ModRM(pat);
case 0xC0:
if (op <= 0xC1)
{
/* Xadd */
return ModRM(pat);
}
/* Else BSWAP */
return FALSE;
default:
return FALSE; /* Treat as double byte opcodes */
}
}
/* Scan through the instructions in pat[], looking for opcodes that may
have operands that vary with different instances. For example, load and
store from statics, calls to other procs (even relative calls; they may
call procs loaded in a different order, etc).
Note that this procedure is architecture specific, and assumes the
processor is in 16 bit address mode (real mode).
PATLEN bytes are scanned.
*/
void
fixWildCards(byte pat[])
{
byte op, quad, intArg;
pc=0;
while (pc < PATLEN)
{
op = pat[pc++];
if (pc >= PATLEN) return;
quad = (byte) (op & 0xC0); /* Quadrant of the opcode map */
if (quad == 0)
{
/* Arithmetic group 00-3F */
if ((op & 0xE7) == 0x26) /* First check for the odds */
{
/* Segment prefix: treat as 1 byte opcode */
continue;
}
if (op == 0x0F) /* 386 2 byte opcodes */
{
if (op0F(pat)) return;
continue;
}
if (op & 0x04)
{
/* All these are constant. Work out the instr length */
if (op & 2)
{
/* Push, pop, other 1 byte opcodes */
continue;
}
else
{
if (op & 1)
{
/* Word immediate operands */
pc += 2;
continue;
}
else
{
/* Byte immediate operands */
pc++;
continue;
}
}
}
else
{
/* All these have mod/rm bytes */
if (ModRM(pat)) return;
continue;
}
}
else if (quad == 0x40)
{
if ((op & 0x60) == 0x40)
{
/* 0x40 - 0x5F -- these are inc, dec, push, pop of general
registers */
continue;
}
else
{
/* 0x60 - 0x70 */
if (op & 0x10)
{
/* 70-7F 2 byte jump opcodes */
pc++;
continue;
}
else
{
/* Odds and sods */
switch (op)
{
case 0x60: /* pusha */
case 0x61: /* popa */
case 0x64: /* overrides */
case 0x65:
case 0x66:
case 0x67:
case 0x6C: /* insb DX */
case 0x6E: /* outsb DX */
continue;
case 0x62: /* bound */
pc += 4;
continue;
case 0x63: /* arpl */
if (TwoWild(pat)) return;
continue;
case 0x68: /* Push byte */
case 0x6A: /* Push byte */
case 0x6D: /* insb port */
case 0x6F: /* outsb port */
/* 2 byte instr, no wilds */
pc++;
continue;
}
}
}
}
else if (quad == 0x80)
{
switch (op & 0xF0)
{
case 0x80: /* 80 - 8F */
/* All have a mod/rm byte */
if (ModRM(pat)) return;
/* These also have immediate values */
switch (op)
{
case 0x80:
case 0x83:
/* One byte immediate */
pc++;
continue;
case 0x81:
/* Immediate 16 bit values might be constant, but
also might be relocatable. Have to make them
wild */
if (TwoWild(pat)) return;
continue;
}
continue;
case 0x90: /* 90 - 9F */
if (op == 0x9A)
{
/* far call */
if (FourWild(pat)) return;
continue;
}
/* All others are 1 byte opcodes */
continue;
case 0xA0: /* A0 - AF */
if ((op & 0x0C) == 0)
{
/* mov al/ax to/from [nnnn] */
if (TwoWild(pat)) return;
continue;
}
else if ((op & 0xFE) == 0xA8)
{
/* test al,#byte or test ax,#word */
if (op & 1) pc += 2;
else pc += 1;
continue;
}
case 0xB0: /* B0 - BF */
{
if (op & 8)
{
/* mov reg, #16 */
/* Immediate 16 bit values might be constant, but also
might be relocatable. For now, make them wild */
if (TwoWild(pat)) return;
}
else
{
/* mov reg, #8 */
pc++;
}
continue;
}
}
}
else
{
/* In the last quadrant of the op code table */
switch (op)
{
case 0xC0: /* 386: Rotate group 2 ModRM, byte, #byte */
case 0xC1: /* 386: Rotate group 2 ModRM, word, #byte */
if (ModRM(pat)) return;
/* Byte immediate value follows ModRM */
pc++;
continue;
case 0xC3: /* Return */
case 0xCB: /* Return far */
chop(pat);
return;
case 0xC2: /* Ret nnnn */
case 0xCA: /* Retf nnnn */
pc += 2;
chop(pat);
return;
case 0xC4: /* les Gv, Mp */
case 0xC5: /* lds Gv, Mp */
if (ModRM(pat)) return;
continue;
case 0xC6: /* Mov ModRM, #nn */
if (ModRM(pat)) return;
/* Byte immediate value follows ModRM */
pc++;
continue;
case 0xC7: /* Mov ModRM, #nnnn */
if (ModRM(pat)) return;
/* Word immediate value follows ModRM */
/* Immediate 16 bit values might be constant, but also
might be relocatable. For now, make them wild */
if (TwoWild(pat)) return;
continue;
case 0xC8: /* Enter Iw, Ib */
pc += 3; /* Constant word, byte */
continue;
case 0xC9: /* Leave */
continue;
case 0xCC: /* Int 3 */
continue;
case 0xCD: /* Int nn */
intArg = pat[pc++];
if ((intArg >= 0x34) && (intArg <= 0x3B))
{
/* Borland/Microsoft FP emulations */
if (ModRM(pat)) return;
}
continue;
case 0xCE: /* Into */
continue;
case 0xCF: /* Iret */
continue;
case 0xD0: /* Group 2 rotate, byte, 1 bit */
case 0xD1: /* Group 2 rotate, word, 1 bit */
case 0xD2: /* Group 2 rotate, byte, CL bits */
case 0xD3: /* Group 2 rotate, word, CL bits */
if (ModRM(pat)) return;
continue;
case 0xD4: /* Aam */
case 0xD5: /* Aad */
case 0xD7: /* Xlat */
continue;
case 0xD8:
case 0xD9:
case 0xDA:
case 0xDB: /* Esc opcodes */
case 0xDC: /* i.e. floating point */
case 0xDD: /* coprocessor calls */
case 0xDE:
case 0xDF:
if (ModRM(pat)) return;
continue;
case 0xE0: /* Loopne */
case 0xE1: /* Loope */
case 0xE2: /* Loop */
case 0xE3: /* Jcxz */
pc++; /* Short jump offset */
continue;
case 0xE4: /* in al,nn */
case 0xE6: /* out nn,al */
pc++;
continue;
case 0xE5: /* in ax,nn */
case 0xE7: /* in nn,ax */
pc += 2;
continue;
case 0xE8: /* Call rel */
if (TwoWild(pat)) return;
continue;
case 0xE9: /* Jump rel, unconditional */
if (TwoWild(pat)) return;
chop(pat);
return;
case 0xEA: /* Jump abs */
if (FourWild(pat)) return;
chop(pat);
return;
case 0xEB: /* Jmp short unconditional */
pc++;
chop(pat);
return;
case 0xEC: /* In al,dx */
case 0xED: /* In ax,dx */
case 0xEE: /* Out dx,al */
case 0xEF: /* Out dx,ax */
continue;
case 0xF0: /* Lock */
case 0xF2: /* Repne */
case 0xF3: /* Rep/repe */
case 0xF4: /* Halt */
case 0xF5: /* Cmc */
case 0xF8: /* Clc */
case 0xF9: /* Stc */
case 0xFA: /* Cli */
case 0xFB: /* Sti */
case 0xFC: /* Cld */
case 0xFD: /* Std */
continue;
case 0xF6: /* Group 3 byte test/not/mul/div */
case 0xF7: /* Group 3 word test/not/mul/div */
case 0xFE: /* Inc/Dec group 4 */
if (ModRM(pat)) return;
continue;
case 0xFF: /* Group 5 Inc/Dec/Call/Jmp/Push */
/* Most are like standard ModRM */
if (ModRM(pat)) return;
continue;
default: /* Rest are single byte opcodes */
continue;
}
}
}
}

372
src/frontend.cpp Normal file
View File

@@ -0,0 +1,372 @@
/*****************************************************************************
* dcc project Front End module
* Loads a program into simulated main memory and builds the procedure list.
* (C) Cristina Cifuentes
****************************************************************************/
#include "dcc.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef __BORLAND__
#include <alloc.h>
#else
#include <malloc.h> /* For malloc, free, realloc */
#endif
typedef struct { /* PSP structure */
word int20h; /* interrupt 20h */
word eof; /* segment, end of allocation block */
byte res1; /* reserved */
byte dosDisp[5]; /* far call to DOS function dispatcher */
byte int22h[4]; /* vector for terminate routine */
byte int23h[4]; /* vector for ctrl+break routine */
byte int24h[4]; /* vector for error routine */
byte res2[22]; /* reserved */
word segEnv; /* segment address of environment block */
byte res3[34]; /* reserved */
byte int21h[6]; /* opcode for int21h and far return */
byte res4[6]; /* reserved */
byte fcb1[16]; /* default file control block 1 */
byte fcb2[16]; /* default file control block 2 */
byte res5[4]; /* reserved */
byte cmdTail[0x80]; /* command tail and disk transfer area */
} PSP;
static struct { /* EXE file header */
byte sigLo; /* .EXE signature: 0x4D 0x5A */
byte sigHi;
word lastPageSize; /* Size of the last page */
word numPages; /* Number of pages in the file */
word numReloc; /* Number of relocation items */
word numParaHeader; /* # of paragraphs in the header */
word minAlloc; /* Minimum number of paragraphs */
word maxAlloc; /* Maximum number of paragraphs */
word initSS; /* Segment displacement of stack */
word initSP; /* Contents of SP at entry */
word checkSum; /* Complemented checksum */
word initIP; /* Contents of IP at entry */
word initCS; /* Segment displacement of code */
word relocTabOffset; /* Relocation table offset */
word overlayNum; /* Overlay number */
} header;
#define EXE_RELOCATION 0x10 /* EXE images rellocated to above PSP */
static void LoadImage(char *filename);
static void displayLoadInfo(void);
static void displayMemMap(void);
/*****************************************************************************
* FrontEnd - invokes the loader, parser, disassembler (if asm1), icode
* rewritter, and displays any useful information.
****************************************************************************/
void FrontEnd (char *filename, CALL_GRAPH * *pcallGraph)
{
/* Load program into memory */
LoadImage(filename);
if (option.verbose)
displayLoadInfo();
/* Do depth first flow analysis building call graph and procedure list,
* and attaching the I-code to each procedure */
parse (pcallGraph);
if (option.asm1)
{
printf("dcc: writing assembler file %s\n", asm1_name);
}
/* Search through code looking for impure references and flag them */
std::for_each(pProcList.begin(),pProcList.end(),
[](Function &f)->void {
f.markImpure();
if (option.asm1)
disassem(1, &f); });
if (option.Interact)
{
interactDis(&pProcList.front(), 0); /* Interactive disassembler */
}
/* Converts jump target addresses to icode offsets */
std::for_each(pProcList.begin(),pProcList.end(),
[](Function &f)->void { f.bindIcodeOff(); });
/* Print memory bitmap */
if (option.Map)
displayMemMap();
}
/****************************************************************************
* displayLoadInfo - Displays low level loader type info.
***************************************************************************/
static void displayLoadInfo(void)
{
Int i;
printf("File type is %s\n", (prog.fCOM)?"COM":"EXE");
if (! prog.fCOM) {
printf("Signature = %02X%02X\n", header.sigLo, header.sigHi);
printf("File size %% 512 = %04X\n", LH(&header.lastPageSize));
printf("File size / 512 = %04X pages\n", LH(&header.numPages));
printf("# relocation items = %04X\n", LH(&header.numReloc));
printf("Offset to load image = %04X paras\n", LH(&header.numParaHeader));
printf("Minimum allocation = %04X paras\n", LH(&header.minAlloc));
printf("Maximum allocation = %04X paras\n", LH(&header.maxAlloc));
}
printf("Load image size = %04X\n", prog.cbImage - sizeof(PSP));
printf("Initial SS:SP = %04X:%04X\n", prog.initSS, prog.initSP);
printf("Initial CS:IP = %04X:%04X\n", prog.initCS, prog.initIP);
if (option.VeryVerbose && prog.cReloc)
{
printf("\nRelocation Table\n");
for (i = 0; i < prog.cReloc; i++)
{
printf("%06X -> [%04X]\n", prog.relocTable[i],LH(prog.Image + prog.relocTable[i]));
}
}
printf("\n");
}
/*****************************************************************************
* fill - Fills line for displayMemMap()
****************************************************************************/
static void fill(Int ip, char *bf)
{
static byte type[4] = {'.', 'd', 'c', 'x'};
byte i;
for (i = 0; i < 16; i++, ip++)
{
*bf++ = ' ';
*bf++ = (ip < prog.cbImage)?
type[(prog.map[ip >> 2] >> ((ip & 3) * 2)) & 3]: ' ';
}
*bf = '\0';
}
/*****************************************************************************
* displayMemMap - Displays the memory bitmap
****************************************************************************/
static void displayMemMap(void)
{
char c, b1[33], b2[33], b3[33];
byte i;
Int ip = 0;
printf("\nMemory Map\n");
while (ip < prog.cbImage)
{
fill(ip, b1);
printf("%06X %s\n", ip, b1);
ip += 16;
for (i = 3, c = b1[1]; i < 32 && c == b1[i]; i += 2)
; /* Check if all same */
if (i > 32)
{
fill(ip, b2); /* Skip until next two are not same */
fill(ip+16, b3);
if (! (strcmp(b1, b2) || strcmp(b1, b3)))
{
printf(" :\n");
do
{
ip += 16;
fill(ip+16, b1);
} while (! strcmp(b1, b2));
}
}
}
printf("\n");
}
/*****************************************************************************
* LoadImage
****************************************************************************/
static void LoadImage(char *filename)
{
FILE *fp;
Int i, cb;
byte buf[4];
/* Open the input file */
if ((fp = fopen(filename, "rb")) == NULL)
{
fatalError(CANNOT_OPEN, filename);
}
/* Read in first 2 bytes to check EXE signature */
if (fread(&header, 1, 2, fp) != 2)
{
fatalError(CANNOT_READ, filename);
}
if (! (prog.fCOM = (boolT)(header.sigLo != 0x4D || header.sigHi != 0x5A))) {
/* Read rest of header */
fseek(fp, 0, SEEK_SET);
if (fread(&header, sizeof(header), 1, fp) != 1)
{
fatalError(CANNOT_READ, filename);
}
/* This is a typical DOS kludge! */
if (LH(&header.relocTabOffset) == 0x40)
{
fatalError(NEWEXE_FORMAT);
}
/* Calculate the load module size.
* This is the number of pages in the file
* less the length of the header and reloc table
* less the number of bytes unused on last page
*/
cb = (dword)LH(&header.numPages) * 512 - (dword)LH(&header.numParaHeader) * 16;
if (header.lastPageSize)
{
cb -= 512 - LH(&header.lastPageSize);
}
/* We quietly ignore minAlloc and maxAlloc since for our
* purposes it doesn't really matter where in real memory
* the program would end up. EXE programs can't really rely on
* their load location so setting the PSP segment to 0 is fine.
* Certainly programs that prod around in DOS or BIOS are going
* to have to load DS from a constant so it'll be pretty
* obvious.
*/
prog.initCS = (int16)LH(&header.initCS) + EXE_RELOCATION;
prog.initIP = (int16)LH(&header.initIP);
prog.initSS = (int16)LH(&header.initSS) + EXE_RELOCATION;
prog.initSP = (int16)LH(&header.initSP);
prog.cReloc = (int16)LH(&header.numReloc);
/* Allocate the relocation table */
if (prog.cReloc)
{
prog.relocTable = (dword*)allocMem(prog.cReloc * sizeof(Int));
fseek(fp, LH(&header.relocTabOffset), SEEK_SET);
/* Read in seg:offset pairs and convert to Image ptrs */
for (i = 0; i < prog.cReloc; i++)
{
fread(buf, 1, 4, fp);
prog.relocTable[i] = LH(buf) +
(((Int)LH(buf+2) + EXE_RELOCATION)<<4);
}
}
/* Seek to start of image */
fseek(fp, (Int)LH(&header.numParaHeader) * 16, SEEK_SET);
}
else
{ /* COM file
* In this case the load module size is just the file length
*/
fseek(fp, 0, SEEK_END);
cb = ftell(fp);
/* COM programs start off with an ORG 100H (to leave room for a PSP)
* This is also the implied start address so if we load the image
* at offset 100H addresses should all line up properly again.
*/
prog.initCS = 0;
prog.initIP = 0x100;
prog.initSS = 0;
prog.initSP = 0xFFFE;
prog.cReloc = 0;
fseek(fp, 0, SEEK_SET);
}
/* Allocate a block of memory for the program. */
prog.cbImage = cb + sizeof(PSP);
prog.Image = (byte*)allocMem(prog.cbImage);
prog.Image[0] = 0xCD; /* Fill in PSP Int 20h location */
prog.Image[1] = 0x20; /* for termination checking */
/* Read in the image past where a PSP would go */
#ifdef __DOSWIN__
if (cb > 0xFFFF)
{
printf("Image size of %ld bytes too large for fread!\n", cb);
fatalError(CANNOT_READ, filename);
}
#endif
if (cb != (Int)fread(prog.Image + sizeof(PSP), 1, (size_t)cb, fp))
{
fatalError(CANNOT_READ, filename);
}
/* Set up memory map */
cb = (prog.cbImage + 3) / 4;
prog.map = (byte *)memset(allocMem(cb), BM_UNKNOWN, (size_t)cb);
/* Relocate segment constants */
if (prog.cReloc)
{
for (i = 0; i < prog.cReloc; i++)
{
byte *p = &prog.Image[prog.relocTable[i]];
word w = (word)LH(p) + EXE_RELOCATION;
*p++ = (byte)(w & 0x00FF);
*p = (byte)((w & 0xFF00) >> 8);
}
}
fclose(fp);
}
/*****************************************************************************
* allocMem - malloc with failure test
****************************************************************************/
void *allocMem(Int cb)
{
byte *p;
//printf("Attempt to allocMem %5ld bytes\n", cb);
if (! (p = (byte*)malloc((size_t)cb)))
/* if (! (p = (byte*)calloc((size_t)cb, 1))) */
{
fatalError(MALLOC_FAILED, cb);
}
/*printf("allocMem: %p\n", p);/**/
return p;
}
/*****************************************************************************
* reallocVar - reallocs extra variable space
****************************************************************************/
void *reallocVar(void *p, Int newsize)
{
/*printf("Attempt to reallocVar %5d bytes\n", newsize);/**/
if (! (p = realloc((byte *)p, (size_t)newsize)))
{
fatalError(MALLOC_FAILED, newsize);
}
/*printf("reallocVar: %p\n", p);/**/
return p;
}
#if 0
void free(void *p)
{
_ffree(p);
switch (_heapset('Z'))
{
case _HEAPBADBEGIN: printf("f: Bad heap begin\n"); getchar(); break;
case _HEAPBADNODE: printf("f: Bad heap node\n"); getchar(); break;
case _HEAPEMPTY: printf("f: Heap empty\n"); getchar(); break;
case _HEAPOK:putchar('!');break;
}/**/
}
#endif

379
src/graph.cpp Normal file
View File

@@ -0,0 +1,379 @@
/*****************************************************************************
* dcc project CFG related functions
* (C) Cristina Cifuentes
****************************************************************************/
#include "dcc.h"
#include <string.h>
#if __BORLAND__
#include <alloc.h>
#else
#include <malloc.h> /* For free() */
#endif
#include "graph.h"
static BB * rmJMP(Function * pProc, Int marker, BB * pBB);
static void mergeFallThrough(Function * pProc, BB * pBB);
static void dfsNumbering(BB * pBB, std::vector<BB*> &dfsLast, Int *first, Int *last);
/*****************************************************************************
* createCFG - Create the basic control flow graph
****************************************************************************/
void Function::createCFG()
{
/* Splits Icode associated with the procedure into Basic Blocks.
* The links between BBs represent the control flow graph of the
* procedure.
* A Basic Block is defined to end on one of the following instructions:
* 1) Conditional and unconditional jumps
* 2) CALL(F)
* 3) RET(F)
* 4) On the instruction before a join (a flagged TARGET)
* 5) Repeated string instructions
* 6) End of procedure
*/
Int i;
Int ip, start;
BB * psBB;
BB * pBB;
ICODE * pIcode = Icode.GetFirstIcode();
stats.numBBbef = stats.numBBaft = 0;
for (ip = start = 0; Icode.IsValid(pIcode); ip++, pIcode++)
{
/* Stick a NOWHERE_NODE on the end if we terminate
* with anything other than a ret, jump or terminate */
if (ip + 1 == Icode.GetNumIcodes() &&
! (pIcode->ic.ll.flg & TERMINATES) &&
pIcode->ic.ll.opcode != iJMP && pIcode->ic.ll.opcode != iJMPF &&
pIcode->ic.ll.opcode != iRET && pIcode->ic.ll.opcode != iRETF)
pBB=BB::Create(start, ip, NOWHERE_NODE, 0, this);
/* Only process icodes that have valid instructions */
else if ((pIcode->ic.ll.flg & NO_CODE) != NO_CODE)
{
switch (pIcode->ic.ll.opcode) {
case iJB: case iJBE: case iJAE: case iJA:
case iJL: case iJLE: case iJGE: case iJG:
case iJE: case iJNE: case iJS: case iJNS:
case iJO: case iJNO: case iJP: case iJNP:
case iJCXZ:
pBB = BB::Create(start, ip, TWO_BRANCH, 2, this);
CondJumps:
start = ip + 1;
pBB->edges[0].ip = (dword)start;
/* This is for jumps off into nowhere */
if (pIcode->ic.ll.flg & NO_LABEL)
pBB->numOutEdges--;
else
pBB->edges[1].ip = pIcode->ic.ll.immed.op;
break;
case iLOOP: case iLOOPE: case iLOOPNE:
pBB = BB::Create(start, ip, LOOP_NODE, 2, this);
goto CondJumps;
case iJMPF: case iJMP:
if (pIcode->ic.ll.flg & SWITCH)
{
pBB = BB::Create(start, ip, MULTI_BRANCH,
pIcode->ic.ll.caseTbl.numEntries, this);
for (i = 0; i < pIcode->ic.ll.caseTbl.numEntries; i++)
pBB->edges[i].ip = pIcode->ic.ll.caseTbl.entries[i];
hasCase = TRUE;
}
else if ((pIcode->ic.ll.flg & (I | NO_LABEL)) == I) {
pBB = BB::Create(start, ip, ONE_BRANCH, 1, this);
pBB->edges[0].ip = pIcode->ic.ll.immed.op;
}
else
BB::Create(start, ip, NOWHERE_NODE, 0, this);
start = ip + 1;
break;
case iCALLF: case iCALL:
{
Function * p = pIcode->ic.ll.immed.proc.proc;
if (p)
i = ((p->flg) & TERMINATES) ? 0 : 1;
else
i = 1;
pBB = BB::Create(start, ip, CALL_NODE, i, this);
start = ip + 1;
if (i)
pBB->edges[0].ip = (dword)start;
}
break;
case iRET: case iRETF:
BB::Create(start, ip, RETURN_NODE, 0, this);
start = ip + 1;
break;
default:
/* Check for exit to DOS */
if (pIcode->ic.ll.flg & TERMINATES)
{
pBB = BB::Create(start, ip, TERMINATE_NODE, 0, this);
start = ip + 1;
}
/* Check for a fall through */
else if (Icode.GetFirstIcode()[ip + 1].ic.ll.flg & (TARGET | CASE))
{
pBB = BB::Create(start, ip, FALL_NODE, 1, this);
start = ip + 1;
pBB->edges[0].ip = (dword)start;
}
break;
}
}
}
std::vector<BB *>::iterator iter=heldBBs.begin();
/* Convert list of BBs into a graph */
for (; iter!=heldBBs.end(); ++iter)
{
pBB = *iter;
for (i = 0; i < pBB->numOutEdges; i++)
{
ip = pBB->edges[i].ip;
if (ip >= SYNTHESIZED_MIN)
fatalError (INVALID_SYNTHETIC_BB);
else
{
auto iter2=std::find_if(heldBBs.begin(),heldBBs.end(),
[ip](BB *psBB)->bool {return psBB->start==ip;});
if(iter2==heldBBs.end())
fatalError(NO_BB, ip, name);
psBB = *iter2;
pBB->edges[i].BBptr = psBB;
psBB->numInEdges++;
}
}
}
}
void Function::markImpure()
{
SYM * psym;
for (int i = 0; i < Icode.GetNumIcodes(); i++)
{
if (Icode.GetLlFlag(i) & (SYM_USE | SYM_DEF))
{
psym = &symtab.sym[Icode.GetIcode(i)->ic.ll.caseTbl.numEntries];
for (int c = (Int)psym->label; c < (Int)psym->label+psym->size; c++)
{
if (BITMAP(c, BM_CODE))
{
Icode.SetLlFlag(i, IMPURE);
flg |= IMPURE;
break;
}
}
}
}
}
/*****************************************************************************
* newBB - Allocate new BB and link to end of list
*****************************************************************************/
/*****************************************************************************
* freeCFG - Deallocates a cfg
****************************************************************************/
void Function::freeCFG()
{
std::for_each(heldBBs.begin(),heldBBs.end(),[](BB *p)->void {delete p;});
}
/*****************************************************************************
* compressCFG - Remove redundancies and add in-edge information
****************************************************************************/
void Function::compressCFG()
{
BB * pBB, *pNxt;
Int ip, first=0, last, i;
/* First pass over BB list removes redundant jumps of the form
* (Un)Conditional -> Unconditional jump */
std::vector<BB*>::iterator iter=cfg.begin();
for (;iter!=cfg.end(); ++iter)
{
pBB = *iter;
pBB->inEdges.resize(pBB->numInEdges,0);
if (pBB->numInEdges != 0 && (pBB->nodeType == ONE_BRANCH || pBB->nodeType == TWO_BRANCH))
for (i = 0; i < pBB->numOutEdges; i++)
{
ip = pBB->start + pBB->length - 1;
pNxt = rmJMP(this, ip, pBB->edges[i].BBptr);
if (pBB->numOutEdges) /* Might have been clobbered */
{
pBB->edges[i].BBptr = pNxt;
Icode.SetImmediateOp(ip, (dword)pNxt->start);
}
}
}
/* Next is a depth-first traversal merging any FALL_NODE or
* ONE_BRANCH that fall through to a node with that as their only
* in-edge. */
this->cfg.front()->mergeFallThrough(Icode);
/* Remove redundant BBs created by the above compressions
* and allocate in-edge arrays as required. */
stats.numBBaft = stats.numBBbef;
for(auto iter=cfg.begin(); iter!=cfg.end(); ++iter)
{
pBB = *iter;
if (pBB->numInEdges == 0)
{
if (iter == cfg.begin()) /* Init it misses out on */
pBB->index = UN_INIT;
else
{
if (pBB->numOutEdges)
pBB->edges.clear();
delete pBB;
stats.numBBaft--;
}
}
else
{
pBB->inEdgeCount = pBB->numInEdges;
}
}
/* Allocate storage for dfsLast[] array */
numBBs = stats.numBBaft;
dfsLast.resize(numBBs,0); // = (BB **)allocMem(numBBs * sizeof(BB *))
/* Now do a dfs numbering traversal and fill in the inEdges[] array */
last = numBBs - 1;
cfg.front()->dfsNumbering(dfsLast, &first, &last);
}
/****************************************************************************
* rmJMP - If BB addressed is just a JMP it is replaced with its target
***************************************************************************/
static BB * rmJMP(Function * pProc, Int marker, BB * pBB)
{
marker += DFS_JMP;
while (pBB->nodeType == ONE_BRANCH && pBB->length == 1) {
if (pBB->traversed != marker) {
pBB->traversed = marker;
if (--pBB->numInEdges)
pBB->edges[0].BBptr->numInEdges++;
else
{
pProc->Icode.SetLlFlag(pBB->start, NO_CODE);
pProc->Icode.SetLlInvalid(pBB->start, TRUE);
}
pBB = pBB->edges[0].BBptr;
}
else { /* We are going around in circles */
pBB->nodeType = NOWHERE_NODE;
pProc->Icode.GetIcode(pBB->start)->ic.ll.immed.op = (dword)pBB->start;
pProc->Icode.SetImmediateOp(pBB->start, (dword)pBB->start);
do {
pBB = pBB->edges[0].BBptr;
if (! --pBB->numInEdges)
{
pProc->Icode.SetLlFlag(pBB->start, NO_CODE);
pProc->Icode.SetLlInvalid(pBB->start, TRUE);
}
} while (pBB->nodeType != NOWHERE_NODE);
pBB->edges.clear();
pBB->numOutEdges = 0;
}
}
return pBB;
}
/*****************************************************************************
* mergeFallThrough
****************************************************************************/
void BB::mergeFallThrough( CIcodeRec &Icode)
{
BB * pChild;
Int i, _ip;
if (!this)
{
printf("mergeFallThrough on empty BB!\n");
}
while (nodeType == FALL_NODE || nodeType == ONE_BRANCH)
{
pChild = edges[0].BBptr;
/* Jump to next instruction can always be removed */
if (nodeType == ONE_BRANCH)
{
_ip = start + length;
for (i = _ip; i < pChild->start && (Icode.GetLlFlag(i) & NO_CODE); i++);
if (i != pChild->start)
break;
Icode.SetLlFlag(_ip - 1, NO_CODE);
Icode.SetLlInvalid(_ip - 1, TRUE);
nodeType = FALL_NODE;
length--;
}
/* If there's no other edges into child can merge */
if (pChild->numInEdges != 1)
break;
nodeType = pChild->nodeType;
length = pChild->start + pChild->length - start;
Icode.ClearLlFlag(pChild->start, TARGET);
numOutEdges = pChild->numOutEdges;
edges.swap(pChild->edges);
pChild->numOutEdges = pChild->numInEdges = 0;
pChild->edges.clear();
}
traversed = DFS_MERGE;
/* Process all out edges recursively */
for (i = 0; i < numOutEdges; i++)
if (edges[i].BBptr->traversed != DFS_MERGE)
edges[i].BBptr->mergeFallThrough(Icode);
}
/*****************************************************************************
* dfsNumbering - Numbers nodes during first and last visits and determine
* in-edges
****************************************************************************/
void BB::dfsNumbering(std::vector<BB *> &dfsLast, Int *first, Int *last)
{
BB * pChild;
byte i;
traversed = DFS_NUM;
dfsFirstNum = (*first)++;
/* index is being used as an index to inEdges[]. */
for (i = 0; i < numOutEdges; i++)
{
pChild = edges[i].BBptr;
pChild->inEdges[pChild->index++] = this;
/* Is this the last visit? */
if (pChild->index == pChild->numInEdges)
pChild->index = UN_INIT;
if (pChild->traversed != DFS_NUM)
pChild->dfsNumbering(dfsLast, first, last);
}
dfsLastNum = *last;
dfsLast[(*last)--] = this;
}

501
src/hlicode.cpp Normal file
View File

@@ -0,0 +1,501 @@
/*
* File: hlIcode.c
* Purpose: High-level icode routines
* Date: September-October 1993
* (C) Cristina Cifuentes
*/
#include <cassert>
#include <string.h>
#include <string>
#include <sstream>
#include "dcc.h"
using namespace std;
#define ICODE_DELTA 25
/* Masks off bits set by duReg[] */
dword maskDuReg[] = { 0x00,
0xFEEFFE, 0xFDDFFD, 0xFBB00B, 0xF77007, /* word regs */
0xFFFFEF, 0xFFFFDF, 0xFFFFBF, 0xFFFF7F,
0xFFFEFF, 0xFFFDFF, 0xFFFBFF, 0xFFF7FF, /* seg regs */
0xFFEFFF, 0xFFDFFF, 0xFFBFFF, 0xFF7FFF, /* byte regs */
0xFEFFFF, 0xFDFFFF, 0xFBFFFF, 0xF7FFFF,
0xEFFFFF, /* tmp reg */
0xFFFFB7, 0xFFFF77, 0xFFFF9F, 0xFFFF5F, /* index regs */
0xFFFFBF, 0xFFFF7F, 0xFFFFDF, 0xFFFFF7 };
static char buf[lineSize]; /* Line buffer for hl icode output */
/* Places the new HLI_ASSIGN high-level operand in the high-level icode array */
void ICODE::setAsgn(COND_EXPR *lhs, COND_EXPR *rhs)
{
type = HIGH_LEVEL;
ic.hl.opcode = HLI_ASSIGN;
assert(ic.hl.oper.asgn.lhs==0); //prevent memory leaks
assert(ic.hl.oper.asgn.rhs==0); //prevent memory leaks
ic.hl.oper.asgn.lhs = lhs;
ic.hl.oper.asgn.rhs = rhs;
}
/* Places the new HLI_CALL high-level operand in the high-level icode array */
void ICODE::newCallHl()
{
type = HIGH_LEVEL;
ic.hl.opcode = HLI_CALL;
ic.hl.oper.call.proc = ic.ll.immed.proc.proc;
ic.hl.oper.call.args = new STKFRAME;
if (ic.ll.immed.proc.cb != 0)
ic.hl.oper.call.args->cb = ic.ll.immed.proc.cb;
else
ic.hl.oper.call.args->cb =ic.hl.oper.call.proc->cbParam;
}
/* Places the new HLI_POP/HLI_PUSH/HLI_RET high-level operand in the high-level icode
* array */
void ICODE::setUnary(hlIcode op, COND_EXPR *exp)
{
assert(ic.hl.oper.exp==0);
type = HIGH_LEVEL;
ic.hl.opcode = op;
ic.hl.oper.exp = exp;
}
/* Places the new HLI_JCOND high-level operand in the high-level icode array */
void ICODE::setJCond(COND_EXPR *cexp)
{
assert(ic.hl.oper.exp==0);
type = HIGH_LEVEL;
ic.hl.opcode = HLI_JCOND;
ic.hl.oper.exp = cexp;
}
/* Sets the invalid field to TRUE as this low-level icode is no longer valid,
* it has been replaced by a high-level icode. */
void ICODE ::invalidate()
{
invalid = TRUE;
}
/* Removes the defined register regi from the lhs subtree. If all registers
* of this instruction are unused, the instruction is invalidated (ie.
* removed) */
boolT removeDefRegi (byte regi, ICODE *picode, Int thisDefIdx, LOCAL_ID *locId)
{ Int numDefs;
numDefs = picode->du1.numRegsDef;
if (numDefs == thisDefIdx)
for ( ; numDefs > 0; numDefs--)
{
if ((picode->du1.idx[numDefs-1][0] != 0)||(picode->du.lastDefRegi))
break;
}
if (numDefs == 0)
{
picode->invalidate();
return (TRUE);
}
else
{
switch (picode->ic.hl.opcode) {
case HLI_ASSIGN: removeRegFromLong (regi, locId,
picode->ic.hl.oper.asgn.lhs);
picode->du1.numRegsDef--;
picode->du.def &= maskDuReg[regi];
break;
case HLI_POP:
case HLI_PUSH: removeRegFromLong (regi, locId, picode->ic.hl.oper.exp);
picode->du1.numRegsDef--;
picode->du.def &= maskDuReg[regi];
break;
}
return (FALSE);
}
}
/* Translates LOW_LEVEL icodes to HIGH_LEVEL icodes - 1st stage.
* Note: this process should be done before data flow analysis, which
* refines the HIGH_LEVEL icodes. */
void Function::highLevelGen()
{ Int i, /* idx into icode array */
numIcode; /* number of icode instructions */
ICODE * pIcode; /* ptr to current icode node */
COND_EXPR *lhs, *rhs; /* left- and right-hand side of expression */
flags32 flg; /* icode flags */
numIcode = Icode.GetNumIcodes();
for (i = 0; i < numIcode; i++)
{
pIcode = Icode.GetIcode(i);
if ((pIcode->ic.ll.flg & NOT_HLL) == NOT_HLL)
pIcode->invalidate();
if ((pIcode->type == LOW_LEVEL) && (pIcode->invalid == FALSE))
{
flg = pIcode->ic.ll.flg;
if ((flg & IM_OPS) != IM_OPS) /* not processing IM_OPS yet */
if ((flg & NO_OPS) != NO_OPS) /* if there are opers */
{
if ((flg & NO_SRC) != NO_SRC) /* if there is src op */
rhs = COND_EXPR::id (pIcode, SRC, this, i, pIcode, NONE);
lhs = COND_EXPR::id (pIcode, DST, this, i, pIcode, NONE);
}
switch (pIcode->ic.ll.opcode) {
case iADD: rhs = COND_EXPR::boolOp (lhs, rhs, ADD);
pIcode->setAsgn(lhs, rhs);
break;
case iAND: rhs = COND_EXPR::boolOp (lhs, rhs, AND);
pIcode->setAsgn(lhs, rhs);
break;
case iCALL:
case iCALLF: pIcode->newCallHl();
break;
case iDEC:
rhs = COND_EXPR::idKte (1, 2);
rhs = COND_EXPR::boolOp (lhs, rhs, SUB);
pIcode->setAsgn(lhs, rhs);
break;
case iDIV:
case iIDIV:/* should be signed div */
rhs = COND_EXPR::boolOp (lhs, rhs, DIV);
if (pIcode->ic.ll.flg & B)
{
lhs = COND_EXPR::idReg (rAL, 0, &localId);
pIcode->setRegDU( rAL, eDEF);
}
else
{
lhs = COND_EXPR::idReg (rAX, 0, &localId);
pIcode->setRegDU( rAX, eDEF);
}
pIcode->setAsgn(lhs, rhs);
break;
case iIMUL: rhs = COND_EXPR::boolOp (lhs, rhs, MUL);
lhs = COND_EXPR::id (pIcode, LHS_OP, this, i, pIcode,
NONE);
pIcode->setAsgn(lhs, rhs);
break;
case iINC: rhs = COND_EXPR::idKte (1, 2);
rhs = COND_EXPR::boolOp (lhs, rhs, ADD);
pIcode->setAsgn(lhs, rhs);
break;
case iLEA: rhs = COND_EXPR::unary (ADDRESSOF, rhs);
pIcode->setAsgn(lhs, rhs);
break;
case iMOD: rhs = COND_EXPR::boolOp (lhs, rhs, MOD);
if (pIcode->ic.ll.flg & B)
{
lhs = COND_EXPR::idReg (rAH, 0, &localId);
pIcode->setRegDU( rAH, eDEF);
}
else
{
lhs = COND_EXPR::idReg (rDX, 0, &localId);
pIcode->setRegDU( rDX, eDEF);
}
pIcode->setAsgn(lhs, rhs);
break;
case iMOV: pIcode->setAsgn(lhs, rhs);
break;
case iMUL: rhs = COND_EXPR::boolOp (lhs, rhs, MUL);
lhs = COND_EXPR::id (pIcode, LHS_OP, this, i, pIcode,
NONE);
pIcode->setAsgn(lhs, rhs);
break;
case iNEG: rhs = COND_EXPR::unary (NEGATION, lhs);
pIcode->setAsgn(lhs, rhs);
break;
case iNOT: rhs = COND_EXPR::boolOp (NULL, rhs, NOT);
pIcode->setAsgn(lhs, rhs);
break;
case iOR: rhs = COND_EXPR::boolOp (lhs, rhs, OR);
pIcode->setAsgn(lhs, rhs);
break;
case iPOP: pIcode->setUnary(HLI_POP, lhs);
break;
case iPUSH: pIcode->setUnary(HLI_PUSH, lhs);
break;
case iRET:
case iRETF: pIcode->setUnary(HLI_RET, NULL);
break;
case iSHL: rhs = COND_EXPR::boolOp (lhs, rhs, SHL);
pIcode->setAsgn(lhs, rhs);
break;
case iSAR: /* signed */
case iSHR: rhs = COND_EXPR::boolOp (lhs, rhs, SHR); /* unsigned*/
pIcode->setAsgn(lhs, rhs);
break;
case iSIGNEX: pIcode->setAsgn(lhs, rhs);
break;
case iSUB: rhs = COND_EXPR::boolOp (lhs, rhs, SUB);
pIcode->setAsgn(lhs, rhs);
break;
case iXCHG:
break;
case iXOR: rhs = COND_EXPR::boolOp (lhs, rhs, XOR);
pIcode->setAsgn(lhs, rhs);
break;
}
}
}
}
/* Modifies the given conditional operator to its inverse. This is used
* in if..then[..else] statements, to reflect the condition that takes the
* then part. */
void inverseCondOp (COND_EXPR **exp)
{
static condOp invCondOp[] = {GREATER, GREATER_EQUAL, NOT_EQUAL, EQUAL,
LESS_EQUAL, LESS, DUMMY,DUMMY,DUMMY,DUMMY,
DUMMY, DUMMY, DUMMY, DUMMY, DUMMY, DUMMY,
DUMMY, DBL_OR, DBL_AND};
if (*exp == NULL)
return;
if ((*exp)->type == BOOLEAN_OP)
{
switch ((*exp)->expr.boolExpr.op)
{
case LESS_EQUAL: case LESS: case EQUAL:
case NOT_EQUAL: case GREATER: case GREATER_EQUAL:
(*exp)->expr.boolExpr.op = invCondOp[(*exp)->expr.boolExpr.op];
break;
case AND: case OR: case XOR: case NOT: case ADD:
case SUB: case MUL: case DIV: case SHR: case SHL: case MOD:
*exp = COND_EXPR::unary (NEGATION, *exp);
break;
case DBL_AND: case DBL_OR:
(*exp)->expr.boolExpr.op = invCondOp[(*exp)->expr.boolExpr.op];
inverseCondOp (&(*exp)->expr.boolExpr.lhs);
inverseCondOp (&(*exp)->expr.boolExpr.rhs);
break;
} /* eos */
}
else if ((*exp)->type == NEGATION) //TODO: memleak here
*exp = (*exp)->expr.unaryExp;
/* other types are left unmodified */
}
/* Returns the string that represents the procedure call of tproc (ie. with
* actual parameters) */
std::string writeCall (Function * tproc, STKFRAME * args, Function * pproc, Int *numLoc)
{
Int i; /* counter of # arguments */
string condExp;
ostringstream s;
s<<tproc->name<<" (";
for (i = 0; i < args->sym.size(); i++)
{
s << walkCondExpr (args->sym[i].actual, pproc, numLoc);
if (i < (args->sym.size() - 1))
s << ", ";
}
s << ")";
return s.str();
}
/* Displays the output of a HLI_JCOND icode. */
char *writeJcond (HLTYPE h, Function * pProc, Int *numLoc)
{
memset (buf, ' ', sizeof(buf));
buf[0] = '\0';
strcat (buf, "if ");
inverseCondOp (&h.oper.exp);
std::string e = walkCondExpr (h.oper.exp, pProc, numLoc);
strcat (buf, e.c_str());
strcat (buf, " {\n");
return (buf);
}
/* Displays the inverse output of a HLI_JCOND icode. This is used in the case
* when the THEN clause of an if..then..else is empty. The clause is
* negated and the ELSE clause is used instead. */
char *writeJcondInv (HLTYPE h, Function * pProc, Int *numLoc)
{
memset (buf, ' ', sizeof(buf));
buf[0] = '\0';
strcat (buf, "if ");
std::string e = walkCondExpr (h.oper.exp, pProc, numLoc);
strcat (buf, e.c_str());
strcat (buf, " {\n");
return (buf);
}
/* Returns a string with the contents of the current high-level icode.
* Note: this routine does not output the contens of HLI_JCOND icodes. This is
* done in a separate routine to be able to support the removal of
* empty THEN clauses on an if..then..else. */
char *write1HlIcode (HLTYPE h, Function * pProc, Int *numLoc)
{
std::string e;
memset (buf, ' ', sizeof(buf));
buf[0] = '\0';
switch (h.opcode) {
case HLI_ASSIGN:
e = walkCondExpr (h.oper.asgn.lhs, pProc, numLoc);
strcat (buf, e.c_str());
strcat (buf, " = ");
e = walkCondExpr (h.oper.asgn.rhs, pProc, numLoc);
strcat (buf, e.c_str());
strcat (buf, ";\n");
break;
case HLI_CALL:
e = writeCall (h.oper.call.proc, h.oper.call.args, pProc,
numLoc);
strcat (buf, e.c_str());
strcat (buf, ";\n");
break;
case HLI_RET:
e = walkCondExpr (h.oper.exp, pProc, numLoc);
if (! e.empty())
{
strcat (buf, "return (");
strcat (buf, e.c_str());
strcat (buf, ");\n");
}
break;
case HLI_POP:
strcat (buf, "HLI_POP ");
e = walkCondExpr (h.oper.exp, pProc, numLoc);
strcat (buf, e.c_str());
strcat (buf, "\n");
break;
case HLI_PUSH: strcat (buf, "HLI_PUSH ");
e = walkCondExpr (h.oper.exp, pProc, numLoc);
strcat (buf, e.c_str());
strcat (buf, "\n");
break;
}
return (buf);
}
Int power2 (Int i)
/* Returns the value of 2 to the power of i */
{
if (i == 0)
return (1);
return (2 << (i-1));
}
/* Writes the registers/stack variables that are used and defined by this
* instruction. */
void ICODE::writeDU(Int idx)
{
static char buf[100];
Int i, j;
memset (buf, ' ', sizeof(buf));
buf[0] = '\0';
for (i = 0; i < (INDEXBASE-1); i++)
{
if ((du.def & power2(i)) != 0)
{
strcat (buf, allRegs[i]);
strcat (buf, " ");
}
}
if (buf[0] != '\0')
printf ("Def (reg) = %s\n", buf);
memset (buf, ' ', sizeof(buf));
buf[0] = '\0';
for (i = 0; i < INDEXBASE; i++)
{
if ((du.use & power2(i)) != 0)
{
strcat (buf, allRegs[i]);
strcat (buf, " ");
}
}
if (buf[0] != '\0')
printf ("Use (reg) = %s\n", buf);
/* Print du1 chain */
printf ("# regs defined = %d\n", du1.numRegsDef);
for (i = 0; i < MAX_REGS_DEF; i++)
{
if (du1.idx[i][0] != 0)
{
printf ("%d: du1[%d][] = ", idx, i);
for (j = 0; j < MAX_USES; j++)
{
if (du1.idx[i][j] == 0)
break;
printf ("%d ", du1.idx[i][j]);
}
printf ("\n");
}
}
/* For HLI_CALL, print # parameter bytes */
if (ic.hl.opcode == HLI_CALL)
printf ("# param bytes = %d\n", ic.hl.oper.call.args->cb);
printf ("\n");
}
/* Frees the storage allocated to h->hlIcode */
void freeHlIcode (ICODE * icode, Int numIcodes)
{
Int i;
HLTYPE h;
for (i = 0; i < numIcodes; i++)
{
h = icode[i].ic.hl;
switch (h.opcode)
{
case HLI_ASSIGN:
h.oper.asgn.lhs->release();
h.oper.asgn.rhs->release();
break;
case HLI_POP:
case HLI_PUSH:
case HLI_JCOND:
h.oper.exp->release();
break;
}
}
}

123
src/icode.cpp Normal file
View File

@@ -0,0 +1,123 @@
// Object oriented icode code for dcc
// (C) 1997 Mike Van Emmerik
#include <stdlib.h>
#include <malloc.h>
#include <memory.h>
#include "types.h" // Common types like byte, etc
#include "ast.h" // Some icode types depend on these
#include "icode.h"
#define ICODE_DELTA 25 // Amount to allocate for new chunk
CIcodeRec::CIcodeRec()
{
}
CIcodeRec::~CIcodeRec()
{
}
/* Copies the icode that is pointed to by pIcode to the icode array.
* If there is need to allocate extra memory, it is done so, and
* the alloc variable is adjusted. */
ICODE * CIcodeRec::addIcode(ICODE *pIcode)
{
push_back(*pIcode);
return &back();
}
ICODE * CIcodeRec::GetFirstIcode()
{
return &front();
}
/* Don't need this; just pIcode++ since array is guaranteed to be contiguous
ICODE * CIcodeRec::GetNextIcode(ICODE * pCurIcode)
{
int idx = pCurIcode - icode; // Current index
ASSERT(idx+1 < numIcode);
return &icode[idx+1];
}
*/
boolT CIcodeRec::IsValid(ICODE *pCurIcode)
{
ptrdiff_t idx = pCurIcode - &this->front(); // Current index
return (idx>=0) && (idx < size());
}
int CIcodeRec::GetNumIcodes()
{
return size();
}
void CIcodeRec::SetInBB(int start, int end, BB *pnewBB)
{
for (int i = start; i <= end; i++)
at(i).inBB = pnewBB;
}
void CIcodeRec::SetImmediateOp(int ip, dword dw)
{
at(ip).ic.ll.immed.op = dw;
}
void CIcodeRec::SetLlFlag(int ip, dword flag)
{
at(ip).ic.ll.flg |= flag;
}
dword CIcodeRec::GetLlFlag(int ip)
{
return at(ip).ic.ll.flg;
}
void CIcodeRec::ClearLlFlag(int ip, dword flag)
{
at(ip).ic.ll.flg &= (~flag);
}
void CIcodeRec::SetLlInvalid(int ip, boolT fInv)
{
at(ip).invalid = fInv;
}
dword CIcodeRec::GetLlLabel(int ip)
{
return at(ip).ic.ll.label;
}
llIcode CIcodeRec::GetLlOpcode(int ip)
{
return at(ip).ic.ll.opcode;
}
/* labelSrchRepl - Searches the icodes for instruction with label = target, and
replaces *pIndex with an icode index */
boolT CIcodeRec::labelSrch(dword target, Int *pIndex)
{
Int i;
for (i = 0; i < size(); i++)
{
if (at(i).ic.ll.label == target)
{
*pIndex = i;
return TRUE;
}
}
return FALSE;
}
ICODE * CIcodeRec::GetIcode(int ip)
{
return &at(ip);
}

1344
src/idioms.cpp Normal file

File diff suppressed because it is too large Load Diff

424
src/locident.cpp Normal file
View File

@@ -0,0 +1,424 @@
/*
* File: locIdent.c
* Purpose: support routines for high-level local identifier definitions.
* Date: October 1993
* (C) Cristina Cifuentes
*/
#include "dcc.h"
#include <string.h>
#define LOCAL_ID_DELTA 25
#define IDX_ARRAY_DELTA 5
/* Creates a new identifier node of type t and returns it.
* Arguments: locSym : local long symbol table
* t : type of LONG identifier
* f : frame where this variable is located
* ix : index into icode array where this var is used */
void LOCAL_ID::newIdent(hlType t, frameType f)
{
ID newid(t,f);
id_arr.push_back(newid);
}
/* Creates a new register identifier node of TYPE_BYTE_(UN)SIGN or
* TYPE_WORD_(UN)SIGN type. Returns the index to this new entry. */
Int LOCAL_ID::newByteWordReg(hlType t, byte regi)
{
Int idx;
/* Check for entry in the table */
auto found=std::find_if(id_arr.begin(),id_arr.end(),[t,regi](ID &el)->bool {
if ((el.type == t) && (el.id.regi == regi))
return true;
return false;
});
if(found!=id_arr.end())
return found-id_arr.begin();
/* Not in table, create new identifier */
newIdent (t, REG_FRAME);
idx = id_arr.size() - 1;
id_arr[idx].id.regi = regi;
return (idx);
}
/* Flags the entry associated with the offset off to illegal, as this
* offset is part of a long stack variable.
* Note: it is easier enough to remove this entry by moving the rest of
* the array 1 position. The problem is that indexes into this
* array have already been saved in several positions; therefore,
* flagging this entry as illegal is all that can be done. */
void LOCAL_ID::flagByteWordId (Int off)
{
Int idx;
auto found=std::find_if(id_arr.begin(),id_arr.end(),[off](ID &en)->bool {
if (((en.type == TYPE_WORD_SIGN) || (en.type == TYPE_BYTE_SIGN)) &&
(en.id.bwId.off == off) && (en.id.bwId.regOff == 0))
return true;
return false;
});
if(found==id_arr.end())
{
printf("Entry not found in LOCAL_ID::flagByteWordId \n");
return;
}
found->illegal = TRUE;
}
/* Creates a new stack identifier node of TYPE_BYTE_(UN)SIGN or
* TYPE_WORD_(UN)SIGN type. Returns the index to this new entry. */
Int LOCAL_ID::newByteWordStk(hlType t, Int off, byte regOff)
{
Int idx;
/* Check for entry in the table */
auto found=std::find_if(id_arr.begin(),id_arr.end(),[off,regOff](ID &el)->bool {
if ((el.id.bwId.off == off) && (el.id.bwId.regOff == regOff))
return true;
return false;
});
if(found!=id_arr.end())
return found-id_arr.begin(); //return Index to found element
/* Not in table, create new identifier */
newIdent (t, STK_FRAME);
idx = id_arr.size() - 1;
id_arr[idx].id.bwId.regOff = regOff;
id_arr[idx].id.bwId.off = off;
return (idx);
}
/* Checks if the entry exists in the locSym, if so, returns the idx to this
* entry; otherwise creates a new global identifier node of type
* TYPE_WORD_(UN)SIGN and returns the index to this new entry.
* Arguments: locSym : ptr to the local symbol table
* seg: segment value for global variable
* off: offset from segment
* regi: indexed register into global variable
* ix: index into icode array
* t: HIGH_LEVEL type */
Int LOCAL_ID::newIntIdx(int16 seg, int16 off, byte regi,Int ix, hlType t)
{
Int idx;
/* Check for entry in the table */
for (idx = 0; idx < id_arr.size(); idx++)
{
if (/*(locSym->id[idx].type == t) && Not checking type */
(id_arr[idx].id.bwGlb.seg == seg) &&
(id_arr[idx].id.bwGlb.off == off) &&
(id_arr[idx].id.bwGlb.regi == regi))
return (idx);
}
/* Not in the table, create new identifier */
newIdent (t, GLB_FRAME);
idx = id_arr.size() - 1;
id_arr[idx].id.bwGlb.seg = seg;
id_arr[idx].id.bwGlb.off = off;
id_arr[idx].id.bwGlb.regi = regi;
return (idx);
}
/* Checks if the entry exists in the locSym, if so, returns the idx to this
* entry; otherwise creates a new register identifier node of type
* TYPE_LONG_(UN)SIGN and returns the index to this new entry. */
Int LOCAL_ID::newLongReg(hlType t, byte regH, byte regL, Int ix)
{
Int idx;
/* Check for entry in the table */
for (idx = 0; idx < id_arr.size(); idx++)
{
if (/*(locSym->id[idx].type == t) && Not checking type */
(id_arr[idx].id.longId.h == regH) &&
(id_arr[idx].id.longId.l == regL))
{
/* Check for occurrence in the list */
if (id_arr[idx].idx.inList(ix))
return (idx);
else
{
/* Insert icode index in list */
id_arr[idx].idx.push_back(ix);
return (idx);
}
}
}
/* Not in the table, create new identifier */
newIdent (t, REG_FRAME);
id_arr[id_arr.size()-1].idx.push_back(ix);
idx = id_arr.size() - 1;
id_arr[idx].id.longId.h = regH;
id_arr[idx].id.longId.l = regL;
return (idx);
}
/* Checks if the entry exists in the locSym, if so, returns the idx to this
* entry; otherwise creates a new global identifier node of type
* TYPE_LONG_(UN)SIGN and returns the index to this new entry. */
Int LOCAL_ID::newLongGlb(int16 seg, int16 offH, int16 offL,Int ix, hlType t)
{
Int idx;
/* Check for entry in the table */
for (idx = 0; idx < id_arr.size(); idx++)
{
if (/*(locSym->id[idx].type == t) && Not checking type */
(id_arr[idx].id.longGlb.seg == seg) &&
(id_arr[idx].id.longGlb.offH == offH) &&
(id_arr[idx].id.longGlb.offL == offL))
return (idx);
}
/* Not in the table, create new identifier */
newIdent (t, GLB_FRAME);
idx = id_arr.size() - 1;
id_arr[idx].id.longGlb.seg = seg;
id_arr[idx].id.longGlb.offH = offH;
id_arr[idx].id.longGlb.offL = offL;
return (idx);
}
/* Checks if the entry exists in the locSym, if so, returns the idx to this
* entry; otherwise creates a new global identifier node of type
* TYPE_LONG_(UN)SIGN and returns the index to this new entry. */
Int LOCAL_ID::newLongIdx( int16 seg, int16 offH, int16 offL,byte regi, Int ix, hlType t)
{ Int idx;
/* Check for entry in the table */
for (idx = 0; idx < id_arr.size(); idx++)
{
if (/*(locSym->id[idx].type == t) && Not checking type */
(id_arr[idx].id.longGlb.seg == seg) &&
(id_arr[idx].id.longGlb.offH == offH) &&
(id_arr[idx].id.longGlb.offL == offL) &&
(id_arr[idx].id.longGlb.regi == regi))
return (idx);
}
/* Not in the table, create new identifier */
newIdent (t, GLB_FRAME);
idx = id_arr.size() - 1;
id_arr[idx].id.longGlb.seg = seg;
id_arr[idx].id.longGlb.offH = offH;
id_arr[idx].id.longGlb.offL = offL;
id_arr[idx].id.longGlb.regi = regi;
return (idx);
}
/* Creates a new stack identifier node of type TYPE_LONG_(UN)SIGN.
* Returns the index to this entry. */
Int LOCAL_ID::newLongStk(hlType t, Int offH, Int offL)
{
Int idx;
/* Check for entry in the table */
for (idx = 0; idx < id_arr.size(); idx++)
{
if ((id_arr[idx].type == t) &&
(id_arr[idx].id.longStkId.offH == offH) &&
(id_arr[idx].id.longStkId.offL == offL))
return (idx);
}
/* Not in the table; flag as invalid offH and offL */
flagByteWordId (offH);
flagByteWordId (offL);
/* Create new identifier */
newIdent (t, STK_FRAME);
idx = id_arr.size() - 1;
id_arr[idx].id.longStkId.offH = offH;
id_arr[idx].id.longStkId.offL = offL;
return (idx);
}
/* Returns the index to an appropriate long identifier.
* Note: long constants should be checked first and stored as a long integer
* number in an expression record. */
Int LOCAL_ID::newLong(opLoc sd, ICODE *pIcode, hlFirst f, Int ix,operDu du, Int off)
{
Int idx;
ICODEMEM *pmH, *pmL;
if (f == LOW_FIRST)
{
pmL = (sd == SRC) ? &pIcode->ic.ll.src : &pIcode->ic.ll.dst;
pmH = (sd == SRC) ? &(pIcode+off)->ic.ll.src : &(pIcode+off)->ic.ll.dst;
}
else /* HIGH_FIRST */
{
pmH = (sd == SRC) ? &pIcode->ic.ll.src : &pIcode->ic.ll.dst;
pmL = (sd == SRC) ? &(pIcode+off)->ic.ll.src : &(pIcode+off)->ic.ll.dst;
}
if (pmL->regi == 0) /* global variable */
idx = newLongGlb(pmH->segValue, pmH->off, pmL->off, ix,TYPE_LONG_SIGN);
else if (pmL->regi < INDEXBASE) /* register */
{
idx = newLongReg(TYPE_LONG_SIGN, pmH->regi, pmL->regi, ix);
if (f == HIGH_FIRST)
pIcode->setRegDU( pmL->regi, du); /* low part */
else
pIcode->setRegDU( pmH->regi, du); /* high part */
}
else if (pmL->off) { /* offset */
if ((pmL->seg == rSS) && (pmL->regi == INDEXBASE + 6)) /* idx on bp */
idx = newLongStk(TYPE_LONG_SIGN, pmH->off, pmL->off);
else if ((pmL->seg == rDS) && (pmL->regi == INDEXBASE + 7)) /* bx */
{ /* glb var indexed on bx */
idx = newLongIdx(pmH->segValue, pmH->off, pmL->off,rBX, ix, TYPE_LONG_SIGN);
pIcode->setRegDU( rBX, eUSE);
}
else /* idx <> bp, bx */
printf ("long not supported, idx <> bp\n");
}
else /* (pm->regi >= INDEXBASE && pm->off = 0) => indexed && no off */
printf ("long not supported, idx && no off\n");
return (idx);
}
boolT checkLongEq (LONG_STKID_TYPE longId, ICODE *pIcode, Int i, Int idx,
Function * pProc, COND_EXPR **rhs, COND_EXPR **lhs, Int off)
/* Checks whether the long stack identifier is equivalent to the source or
* destination operands of pIcode and pIcode+1 (ie. these are LOW_LEVEL
* icodes at present). If so, returns the rhs and lhs of this instruction.
* Arguments: longId : long stack identifier
* pIcode : ptr to first LOW_LEVEL icode instruction
* i : idx into local identifier table for longId
* idx : idx into icode array
* pProc : ptr to current procedure record
* rhs, lhs : return expressions if successful. */
{
ICODEMEM *pmHdst, *pmLdst, *pmHsrc, *pmLsrc; /* pointers to LOW_LEVEL icodes */
pmHdst = &pIcode->ic.ll.dst;
pmLdst = &(pIcode+off)->ic.ll.dst;
pmHsrc = &pIcode->ic.ll.src;
pmLsrc = &(pIcode+off)->ic.ll.src;
if ((longId.offH == pmHdst->off) && (longId.offL == pmLdst->off))
{
*lhs = COND_EXPR::idLongIdx (i);
if ((pIcode->ic.ll.flg & NO_SRC) != NO_SRC)
*rhs = COND_EXPR::idLong (&pProc->localId, SRC, pIcode, HIGH_FIRST,
idx, eUSE, off);
return (TRUE);
}
else if ((longId.offH == pmHsrc->off) && (longId.offL == pmLsrc->off))
{
*lhs = COND_EXPR::idLong (&pProc->localId, DST, pIcode, HIGH_FIRST, idx,
eDEF, off);
*rhs = COND_EXPR::idLongIdx (i);
return (TRUE);
}
return (FALSE);
}
/* Checks whether the long stack identifier is equivalent to the source or
* destination operands of pIcode and pIcode+1 (ie. these are LOW_LEVEL
* icodes at present). If so, returns the rhs and lhs of this instruction.
* Arguments: longId : long stack identifier
* pIcode : ptr to first LOW_LEVEL icode instruction
* i : idx into local identifier table for longId
* idx : idx into icode array
* pProc : ptr to current procedure record
* rhs, lhs : return expressions if successful. */
boolT checkLongRegEq (LONGID_TYPE longId, ICODE *pIcode, Int i, Int idx,
Function * pProc, COND_EXPR **rhs, COND_EXPR **lhs, Int off)
{
ICODEMEM *pmHdst, *pmLdst, *pmHsrc, *pmLsrc; /* pointers to LOW_LEVEL icodes */
pmHdst = &pIcode->ic.ll.dst;
pmLdst = &(pIcode+off)->ic.ll.dst;
pmHsrc = &pIcode->ic.ll.src;
pmLsrc = &(pIcode+off)->ic.ll.src;
if ((longId.h == pmHdst->regi) && (longId.l == pmLdst->regi))
{
*lhs = COND_EXPR::idLongIdx (i);
if ((pIcode->ic.ll.flg & NO_SRC) != NO_SRC)
*rhs = COND_EXPR::idLong (&pProc->localId, SRC, pIcode, HIGH_FIRST, idx, eUSE, off);
return (TRUE);
}
else if ((longId.h == pmHsrc->regi) && (longId.l == pmLsrc->regi))
{
*lhs = COND_EXPR::idLong (&pProc->localId, DST, pIcode, HIGH_FIRST, idx, eDEF, off);
*rhs = COND_EXPR::idLongIdx (i);
return (TRUE);
}
return (FALSE);
}
/* Given an index into the local identifier table for a long register
* variable, determines whether regi is the high or low part, and returns
* the other part */
byte otherLongRegi (byte regi, Int idx, LOCAL_ID *locTbl)
{
ID *id;
id = &locTbl->id_arr[idx];
if ((id->loc == REG_FRAME) && ((id->type == TYPE_LONG_SIGN) ||
(id->type == TYPE_LONG_UNSIGN)))
{
if (id->id.longId.h == regi)
return (id->id.longId.l);
else if (id->id.longId.l == regi)
return (id->id.longId.h);
}
return 0; // Cristina: please check this!
}
/* Checks if the registers regL and regH have been used independently in
* the local identifier table. If so, macros for these registers are
* placed in the local identifier table, as these registers belong to a
* long register identifier. */
void LOCAL_ID::propLongId (byte regL, byte regH, const char *name)
{
Int i;
ID *_id;
for (i = 0; i < id_arr.size(); i++)
{
_id = &id_arr[i];
if ((_id->type == TYPE_WORD_SIGN) || (_id->type == TYPE_WORD_UNSIGN))
{
if (_id->id.regi == regL)
{
strcpy (_id->name, name);
strcpy (_id->macro, "LO");
_id->hasMacro = TRUE;
_id->illegal = TRUE;
}
else if (_id->id.regi == regH)
{
strcpy (_id->name, name);
strcpy (_id->macro, "HI");
_id->hasMacro = TRUE;
_id->illegal = TRUE;
}
}
}
}

1208
src/parser.cpp Normal file

File diff suppressed because it is too large Load Diff

448
src/perfhlib.cpp Normal file
View File

@@ -0,0 +1,448 @@
/*
* Perfect hashing function library. Contains functions to generate perfect
* hashing functions
* (C) Mike van Emmerik
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "perfhlib.h"
/* Private data structures */
static int NumEntry; /* Number of entries in the hash table (# keys) */
static int EntryLen; /* Size (bytes) of each entry (size of keys) */
static int SetSize; /* Size of the char set */
static char SetMin; /* First char in the set */
static int NumVert; /* c times NumEntry */
static word *T1base, *T2base; /* Pointers to start of T1, T2 */
static word *T1, *T2; /* Pointers to T1[i], T2[i] */
static int *graphNode; /* The array of edges */
static int *graphNext; /* Linked list of edges */
static int *graphFirst;/* First edge at a vertex */
static short *g; /* g[] */
static int numEdges; /* An edge counter */
static bool *visited; /* Array of bools: whether visited */
/* Private prototypes */
static void initGraph(void);
static void addToGraph(int e, int v1, int v2);
static bool isCycle(void);
static void duplicateKeys(int v1, int v2);
void
hashParams(int _NumEntry, int _EntryLen, int _SetSize, char _SetMin,
int _NumVert)
{
/* These parameters are stored in statics so as to obviate the need for
passing all these (or defererencing pointers) for every call to hash()
*/
NumEntry = _NumEntry;
EntryLen = _EntryLen;
SetSize = _SetSize;
SetMin = _SetMin;
NumVert = _NumVert;
/* Allocate the variable sized tables etc */
if ((T1base = (word *)malloc(EntryLen * SetSize * sizeof(word))) == 0)
{
goto BadAlloc;
}
if ((T2base = (word *)malloc(EntryLen * SetSize * sizeof(word))) == 0)
{
goto BadAlloc;
}
if ((graphNode = (int *)malloc((NumEntry*2 + 1) * sizeof(int))) == 0)
{
goto BadAlloc;
}
if ((graphNext = (int *)malloc((NumEntry*2 + 1) * sizeof(int))) == 0)
{
goto BadAlloc;
}
if ((graphFirst = (int *)malloc((NumVert + 1) * sizeof(int))) == 0)
{
goto BadAlloc;
}
if ((g = (short *)malloc((NumVert+1) * sizeof(short))) == 0)
{
goto BadAlloc;
}
if ((visited = (bool *)malloc((NumVert+1) * sizeof(bool))) == 0)
{
goto BadAlloc;
}
return;
BadAlloc:
printf("Could not allocate memory\n");
hashCleanup();
exit(1);
}
void
hashCleanup(void)
{
/* Free the storage for variable sized tables etc */
if (T1base) free(T1base);
if (T2base) free(T2base);
if (graphNode) free(graphNode);
if (graphNext) free(graphNext);
if (graphFirst) free(graphFirst);
if (g) free(g);
}
void
map(void)
{
int i, j, c;
word f1, f2;
bool cycle;
byte *keys;
c = 0;
do
{
initGraph();
cycle = FALSE;
/* Randomly generate T1 and T2 */
for (i=0; i < SetSize*EntryLen; i++)
{
T1base[i] = rand() % NumVert;
T2base[i] = rand() % NumVert;
}
for (i=0; i < NumEntry; i++)
{
f1 = 0; f2 = 0;
getKey(i, &keys);
for (j=0; j < EntryLen; j++)
{
T1 = T1base + j * SetSize;
T2 = T2base + j * SetSize;
f1 += T1[keys[j] - SetMin];
f2 += T2[keys[j] - SetMin];
}
f1 %= (word)NumVert;
f2 %= (word)NumVert;
if (f1 == f2)
{
/* A self loop. Reject! */
printf("Self loop on vertex %d!\n", f1);
cycle = TRUE;
break;
}
addToGraph(numEdges++, f1, f2);
}
if (cycle || (cycle = isCycle())) /* OK - is there a cycle? */
{
printf("Iteration %d\n", ++c);
}
else
{
break;
}
}
while (/* there is a cycle */ 1);
}
/* Initialise the graph */
static void
initGraph(void)
{
int i;
for (i=1; i <= NumVert; i++)
{
graphFirst[i] = 0;
}
for (i= -NumEntry; i <= NumEntry; i++)
{
/* No need to init graphNode[] as they will all be filled by successive
calls to addToGraph() */
graphNext[NumEntry+i] = 0;
}
numEdges = 0;
}
/* Add an edge e between vertices v1 and v2 */
/* e, v1, v2 are 0 based */
static void
addToGraph(int e, int v1, int v2)
{
e++; v1++; v2++; /* So much more convenient */
graphNode[NumEntry+e] = v2; /* Insert the edge information */
graphNode[NumEntry-e] = v1;
graphNext[NumEntry+e] = graphFirst[v1]; /* Insert v1 to list of alphas */
graphFirst[v1]= e;
graphNext[NumEntry-e] = graphFirst[v2]; /* Insert v2 to list of omegas */
graphFirst[v2]= -e;
}
bool DFS(int parentE, int v)
{
int e, w;
/* Depth first search of the graph, starting at vertex v, looking for
cycles. parent and v are origin 1. Note parent is an EDGE,
not a vertex */
visited[v] = TRUE;
/* For each e incident with v .. */
for (e = graphFirst[v]; e; e = graphNext[NumEntry+e])
{
byte *key1;
getKey(abs(e)-1, &key1);
if (*(long *)key1 == 0)
{
/* A deleted key. Just ignore it */
continue;
}
w = graphNode[NumEntry+e];
if (visited[w])
{
/* Did we just come through this edge? If so, ignore it. */
if (abs(e) != abs(parentE))
{
/* There is a cycle in the graph. There is some subtle code here
to work around the distinct possibility that there may be
duplicate keys. Duplicate keys will always cause unit
cycles, since f1 and f2 (used to select v and w) will be the
same for both. The edges (representing an index into the
array of keys) are distinct, but the key values are not.
The logic is as follows: for the candidate edge e, check to
see if it terminates in the parent vertex. If so, we test
the keys associated with e and the parent, and if they are
the same, we can safely ignore e for the purposes of cycle
detection, since edge e adds nothing to the cycle. Cycles
involving v, w, and e0 will still be found. The parent
edge was not similarly eliminated because at the time when
it was a candidate, v was not yet visited.
We still have to remove the key from further consideration,
since each edge is visited twice, but with a different
parent edge each time.
*/
/* We save some stack space by calculating the parent vertex
for these relatively few cases where it is needed */
int parentV = graphNode[NumEntry-parentE];
if (w == parentV)
{
byte *key2;
getKey(abs(parentE)-1, &key2);
if (memcmp(key1, key2, EntryLen) == 0)
{
printf("Duplicate keys with edges %d and %d (",
e, parentE);
dispKey(abs(e)-1);
printf(" & ");
dispKey(abs(parentE)-1);
printf(")\n");
/* *(long *)key1 = 0; /* Wipe the key */
memset(key1, 0, EntryLen);
}
else
{
/* A genuine (unit) cycle. */
printf("There is a unit cycle involving vertex %d and edge %d\n", v, e);
return TRUE;
}
}
else
{
/* We have reached a previously visited vertex not the
parent. Therefore, we have uncovered a genuine cycle */
printf("There is a cycle involving vertex %d and edge %d\n", v, e);
return TRUE;
}
}
}
else /* Not yet seen. Traverse it */
{
if (DFS(e, w))
{
/* Cycle found deeper down. Exit */
return TRUE;
}
}
}
return FALSE;
}
static bool
isCycle(void)
{
int v;
for (v=1; v <= NumVert; v++)
{
visited[v] = FALSE;
}
for (v=1; v <= NumVert; v++)
{
if (!visited[v])
{
if (DFS(-32767, v))
{
return TRUE;
}
}
}
return FALSE;
}
void
traverse(int u)
{
int w, e;
visited[u] = TRUE;
/* Find w, the neighbours of u, by searching the edges e associated with u */
e = graphFirst[1+u];
while (e)
{
w = graphNode[NumEntry+e]-1;
if (!visited[w])
{
g[w] = (abs(e)-1 - g[u]) % NumEntry;
if (g[w] < 0) g[w] += NumEntry; /* Keep these positive */
traverse(w);
}
e = graphNext[NumEntry+e];
}
}
void
assign(void)
{
int v;
for (v=0; v < NumVert; v++)
{
g[v] = 0; /* g is sparse; leave the gaps 0 */
visited[v] = FALSE;
}
for (v=0; v < NumVert; v++)
{
if (!visited[v])
{
g[v] = 0;
traverse(v);
}
}
}
int
hash(byte *string)
{
word u, v;
int j;
u = 0;
for (j=0; j < EntryLen; j++)
{
T1 = T1base + j * SetSize;
u += T1[string[j] - SetMin];
}
u %= NumVert;
v = 0;
for (j=0; j < EntryLen; j++)
{
T2 = T2base + j * SetSize;
v += T2[string[j] - SetMin];
}
v %= NumVert;
return (g[u] + g[v]) % NumEntry;
}
word *
readT1(void)
{
return T1base;
}
word *
readT2(void)
{
return T2base;
}
word *
readG(void)
{
return (word *)g;
}
#if 0
void dispRecord(int i);
void
duplicateKeys(int v1, int v2)
{
int i, j;
byte *keys;
int u, v;
v1--; v2--; /* These guys are origin 1 */
printf("Duplicate keys:\n");
for (i=0; i < NumEntry; i++)
{
getKey(i, &keys);
u = 0;
for (j=0; j < EntryLen; j++)
{
T1 = T1base + j * SetSize;
u += T1[keys[j] - SetMin];
}
u %= NumVert;
if ((u != v1) && (u != v2)) continue;
v = 0;
for (j=0; j < EntryLen; j++)
{
T2 = T2base + j * SetSize;
v += T2[keys[j] - SetMin];
}
v %= NumVert;
if ((v == v2) || (v == v1))
{
printf("Entry #%d key: ", i+1);
for (j=0; j < EntryLen; j++) printf("%02X ", keys[j]);
printf("\n");
dispRecord(i+1);
}
}
exit(1);
}
#endif

394
src/procs.cpp Normal file
View File

@@ -0,0 +1,394 @@
/*
* File: procs.c
* Purpose: Functions to support Call graphs and procedures
* Date: November 1993
* (C) Cristina Cifuentes
*/
#include <cstring>
#include <cassert>
#include "dcc.h"
/* Static indentation buffer */
#define indSize 61 /* size of indentation buffer; max 20 */
static char indentBuf[indSize] =
" ";
static char *indent (Int indLevel)
/* Indentation according to the depth of the statement */
{
return (&indentBuf[indSize-(indLevel*3)-1]);
}
/* Inserts an outEdge at the current callGraph pointer if the newProc does
* not exist. */
void CALL_GRAPH::insertArc (ilFunction newProc)
{
CALL_GRAPH *pcg;
Int i;
/* Check if procedure already exists */
for (i = 0; i < outEdges.size(); i++)
if (outEdges[i]->proc == newProc)
return;
/* Include new arc */
pcg = new CALL_GRAPH;
pcg->proc = newProc;
outEdges.push_back(pcg);
}
/* Inserts a (caller, callee) arc in the call graph tree. */
boolT CALL_GRAPH::insertCallGraph(ilFunction caller, ilFunction callee)
{
Int i;
if (proc == caller)
{
insertArc (callee);
return (TRUE);
}
else
{
for (i = 0; i < outEdges.size(); i++)
if (outEdges[i]->insertCallGraph (caller, callee))
return true;
return (false);
}
}
boolT CALL_GRAPH::insertCallGraph(Function *caller, ilFunction callee)
{
auto iter = std::find_if(pProcList.begin(),pProcList.end(),
[caller](const Function &f)->bool {return caller==&f;});
assert(iter!=pProcList.end());
return insertCallGraph(iter,callee);
}
/* Displays the current node of the call graph, and invokes recursively on
* the nodes the procedure invokes. */
void CALL_GRAPH::writeNodeCallGraph(Int indIdx)
{
Int i;
printf ("%s%s\n", indent(indIdx), proc->name);
for (i = 0; i < outEdges.size(); i++)
outEdges[i]->writeNodeCallGraph (indIdx + 1);
}
/* Writes the header and invokes recursive procedure */
void CALL_GRAPH::write()
{
printf ("\nCall Graph:\n");
writeNodeCallGraph (0);
}
/**************************************************************************
* Routines to support arguments
*************************************************************************/
/* Updates the argument table by including the register(s) (ie. lhs of
* picode) and the actual expression (ie. rhs of picode).
* Note: register(s) are only included once in the table. */
void newRegArg (Function * pproc, ICODE *picode, ICODE *ticode)
{
COND_EXPR *lhs;
STKFRAME * ps, *ts;
ID *id;
Int i, tidx;
boolT regExist;
condId type;
Function * tproc;
byte regL, regH; /* Registers involved in arguments */
/* Flag ticode as having register arguments */
tproc = ticode->ic.hl.oper.call.proc;
tproc->flg |= REG_ARGS;
/* Get registers and index into target procedure's local list */
ps = ticode->ic.hl.oper.call.args;
ts = &tproc->args;
lhs = picode->ic.hl.oper.asgn.lhs;
type = lhs->expr.ident.idType;
if (type == REGISTER)
{
regL = pproc->localId.id_arr[lhs->expr.ident.idNode.regiIdx].id.regi;
if (regL < rAL)
tidx = tproc->localId.newByteWordReg(TYPE_WORD_SIGN, regL);
else
tidx = tproc->localId.newByteWordReg(TYPE_BYTE_SIGN, regL);
}
else if (type == LONG_VAR)
{
regL = pproc->localId.id_arr[lhs->expr.ident.idNode.longIdx].id.longId.l;
regH = pproc->localId.id_arr[lhs->expr.ident.idNode.longIdx].id.longId.h;
tidx = tproc->localId.newLongReg(TYPE_LONG_SIGN, regH, regL, 0);
}
/* Check if register argument already on the formal argument list */
regExist = FALSE;
for (i = 0; i < ts->sym.size(); i++)
{
if (type == REGISTER)
{
if ((ts->sym[i].regs != NULL) &&
(ts->sym[i].regs->expr.ident.idNode.regiIdx == tidx))
{
regExist = TRUE;
i = ts->sym.size();
}
}
else if (type == LONG_VAR)
{
if ((ts->sym[i].regs != NULL) &&
(ts->sym[i].regs->expr.ident.idNode.longIdx == tidx))
{
regExist = TRUE;
i = ts->sym.size();
}
}
}
/* Do ts (formal arguments) */
if (regExist == FALSE)
{
STKSYM newsym;
sprintf (newsym.name, "arg%ld", ts->sym.size());
if (type == REGISTER)
{
if (regL < rAL)
{
newsym.type = TYPE_WORD_SIGN;
newsym.regs = COND_EXPR::idRegIdx(tidx, WORD_REG);
}
else
{
newsym.type = TYPE_BYTE_SIGN;
newsym.regs = COND_EXPR::idRegIdx(tidx, BYTE_REG);
}
sprintf (tproc->localId.id_arr[tidx].name, "arg%ld", ts->sym.size());
}
else if (type == LONG_VAR)
{
newsym.regs = COND_EXPR::idLongIdx (tidx);
newsym.type = TYPE_LONG_SIGN;
sprintf (tproc->localId.id_arr[tidx].name, "arg%ld", ts->sym.size());
tproc->localId.propLongId (regL, regH,
tproc->localId.id_arr[tidx].name);
}
ts->sym.push_back(newsym);
ts->numArgs++;
}
/* Do ps (actual arguments) */
STKSYM newsym;
sprintf (newsym.name, "arg%ld", ps->sym.size());
newsym.actual = picode->ic.hl.oper.asgn.rhs;
newsym.regs = lhs;
/* Mask off high and low register(s) in picode */
switch (type) {
case REGISTER:
id = &pproc->localId.id_arr[lhs->expr.ident.idNode.regiIdx];
picode->du.def &= maskDuReg[id->id.regi];
if (id->id.regi < rAL)
newsym.type = TYPE_WORD_SIGN;
else
newsym.type = TYPE_BYTE_SIGN;
break;
case LONG_VAR:
id = &pproc->localId.id_arr[lhs->expr.ident.idNode.longIdx];
picode->du.def &= maskDuReg[id->id.longId.h];
picode->du.def &= maskDuReg[id->id.longId.l];
newsym.type = TYPE_LONG_SIGN;
break;
}
ps->sym.push_back(newsym);
ps->numArgs++;
}
/* Allocates num arguments in the actual argument list of the current
* icode picode. */
/** NOTE: this function is not used ****/
void allocStkArgs (ICODE *picode, Int num)
{
STKFRAME * ps;
ps = picode->ic.hl.oper.call.args;
ps->numArgs = num;
ps->sym.resize(num);
}
boolT newStkArg (ICODE *picode, COND_EXPR *exp, llIcode opcode, Function * pproc)
/* Inserts the new expression (ie. the actual parameter) on the argument
* list.
* Returns: TRUE if it was a near call that made use of a segment register.
* FALSE elsewhere */
{ STKFRAME * ps;
byte regi;
/* Check for far procedure call, in which case, references to segment
* registers are not be considered another parameter (i.e. they are
* long references to another segment) */
if (exp)
{
if ((exp->type == IDENTIFIER) && (exp->expr.ident.idType == REGISTER))
{
regi = pproc->localId.id_arr[exp->expr.ident.idNode.regiIdx].id.regi;
if ((regi >= rES) && (regi <= rDS))
if (opcode == iCALLF)
return (FALSE);
else
return (TRUE);
}
}
/* Place register argument on the argument list */
ps = picode->ic.hl.oper.call.args;
STKSYM newsym;
newsym.actual = exp;
ps->sym.push_back(newsym);
ps->numArgs++;
return false;
}
/* Places the actual argument exp in the position given by pos in the
* argument list of picode. */
void placeStkArg (ICODE *picode, COND_EXPR *exp, Int pos)
{ STKFRAME * ps;
ps = picode->ic.hl.oper.call.args;
ps->sym[pos].actual = exp;
sprintf (ps->sym[pos].name, "arg%ld", pos);
}
/* Checks to determine whether the expression (actual argument) has the
* same type as the given type (from the procedure's formal list). If not,
* the actual argument gets modified */
void adjustActArgType (COND_EXPR *exp, hlType forType, Function * pproc)
{ hlType actType;
Int offset, offL;
if (exp == NULL)
return;
actType = expType (exp, pproc);
if ((actType != forType) && (exp->type == IDENTIFIER))
{
switch (forType) {
case TYPE_UNKNOWN: case TYPE_BYTE_SIGN:
case TYPE_BYTE_UNSIGN: case TYPE_WORD_SIGN:
case TYPE_WORD_UNSIGN: case TYPE_LONG_SIGN:
case TYPE_LONG_UNSIGN: case TYPE_RECORD:
break;
case TYPE_PTR:
case TYPE_CONST:
break;
case TYPE_STR:
switch (actType) {
case TYPE_CONST:
/* It's an offset into image where a string is
* found. Point to the string. */
offL = exp->expr.ident.idNode.kte.kte;
if (prog.fCOM)
offset = (pproc->state.r[rDS]<<4) + offL + 0x100;
else
offset = (pproc->state.r[rDS]<<4) + offL;
exp->expr.ident.idNode.strIdx = offset;
exp->expr.ident.idType = STRING;
break;
case TYPE_PTR:
/* It's a pointer to a char rather than a pointer to
* an integer */
/***HERE - modify the type ****/
break;
case TYPE_WORD_SIGN:
break;
} /* eos */
break;
}
}
}
/* Determines whether the formal argument has the same type as the given
* type (type of the actual argument). If not, the formal argument is
* changed its type */
void STKFRAME::adjustForArgType(Int numArg_, hlType actType_)
{
hlType forType;
STKSYM * psym, * nsym;
Int off, i;
/* Find stack offset for this argument */
off = minOff;
for (i = 0; i < numArg_; i++)
off += sym[i].size;
/* Find formal argument */
if (numArg_ < sym.size())
{
psym = &sym[numArg_];
i = numArg_;
while ((i < sym.size()) && (psym->off != off))
{
psym++;
i++;
}
if (numArg_ == sym.size())
return;
}
/* If formal argument does not exist, do not create new ones, just
* ignore actual argument */
else
return;
forType = psym->type;
if (forType != actType_)
{
switch (actType_) {
case TYPE_UNKNOWN: case TYPE_BYTE_SIGN:
case TYPE_BYTE_UNSIGN: case TYPE_WORD_SIGN:
case TYPE_WORD_UNSIGN: case TYPE_RECORD:
break;
case TYPE_LONG_UNSIGN: case TYPE_LONG_SIGN:
if ((forType == TYPE_WORD_UNSIGN) ||
(forType == TYPE_WORD_SIGN) ||
(forType == TYPE_UNKNOWN))
{
/* Merge low and high */
psym->type = actType_;
psym->size = 4;
nsym = psym + 1;
sprintf (nsym->macro, "HI");
sprintf (psym->macro, "LO");
nsym->hasMacro = TRUE;
psym->hasMacro = TRUE;
sprintf (nsym->name, "%s", psym->name);
nsym->invalid = TRUE;
numArgs--;
}
break;
case TYPE_PTR:
case TYPE_CONST:
case TYPE_STR:
break;
} /* eos */
}
}

541
src/proplong.cpp Normal file
View File

@@ -0,0 +1,541 @@
/**************************************************************************
* File : propLong.c
* Purpose: propagate the value of long variables (local variables and
* registers) along the graph. Structure the graph in this way.
* (C) Cristina Cifuentes
**************************************************************************/
#include <string.h>
#include <memory.h>
#include <cassert>
#include <algorithm>
#include "dcc.h"
static boolT isJCond (llIcode opcode)
/* Returns whether the given icode opcode is within the range of valid
* high-level conditional jump icodes (iJB..iJG) */
{
if ((opcode >= iJB) && (opcode <= iJG))
return (TRUE);
return (FALSE);
}
static boolT isLong23 (Int i, BB * pbb, ICODE * icode, Int *off, Int *arc)
/* Returns whether the conditions for a 2-3 long variable are satisfied */
{ BB * t, * e, * obb2;
if (pbb->nodeType != TWO_BRANCH)
return (FALSE);
t = pbb->edges[THEN].BBptr;
e = pbb->edges[ELSE].BBptr;
/* Check along the THEN path */
if ((t->length == 1) && (t->nodeType == TWO_BRANCH) && (t->numInEdges == 1))
{
obb2 = t->edges[THEN].BBptr;
if ((obb2->length == 2) && (obb2->nodeType == TWO_BRANCH) &&
(icode[obb2->start].ic.ll.opcode == iCMP))
{
*off = obb2->start - i;
*arc = THEN;
return (TRUE);
}
}
/* Check along the ELSE path */
else if ((e->length == 1) && (e->nodeType == TWO_BRANCH) &&
(e->numInEdges == 1))
{
obb2 = e->edges[THEN].BBptr;
if ((obb2->length == 2) && (obb2->nodeType == TWO_BRANCH) &&
(icode[obb2->start].ic.ll.opcode == iCMP))
{
*off = obb2->start - i;
*arc = ELSE;
return (TRUE);
}
}
return (FALSE);
}
/* Returns whether the conditions for a 2-2 long variable are satisfied */
static boolT isLong22 (ICODE * pIcode, ICODE * pEnd, Int *off)
{
if (((pIcode+2) < pEnd) && ((pIcode+2)->ic.ll.opcode == iCMP) &&
(isJCond ((pIcode+1)->ic.ll.opcode)) &&
(isJCond ((pIcode+3)->ic.ll.opcode)))
{
*off = 2;
return (TRUE);
}
return (FALSE);
}
/* Creates a long conditional <=, >=, <, or > at (pIcode+1).
* Removes excess nodes from the graph by flagging them, and updates
* the new edges for the remaining nodes. */
static void longJCond23 (COND_EXPR *rhs, COND_EXPR *lhs, ICODE * pIcode,
Int *idx, Function * pProc, Int arc, Int off)
{ Int j;
BB * pbb, * obb1, * obb2, * tbb;
if (arc == THEN)
{
/* Find intermediate basic blocks and target block */
pbb = pIcode->inBB;
obb1 = pbb->edges[THEN].BBptr;
obb2 = obb1->edges[THEN].BBptr;
tbb = obb2->edges[THEN].BBptr;
/* Modify out edge of header basic block */
pbb->edges[THEN].BBptr = tbb;
/* Modify in edges of target basic block */
auto newlast=std::remove_if(tbb->inEdges.begin(),tbb->inEdges.end(),
[obb1,obb2](BB *b) -> bool
{
return (b==obb1) || (b==obb2);
}
);
tbb->inEdges.erase(newlast,tbb->inEdges.end());
tbb->numInEdges--; /* looses 2 arcs, gains 1 arc */
tbb->inEdges.push_back(pbb);
assert(tbb->inEdges.size()==tbb->numInEdges);
/* Modify in edges of the ELSE basic block */
tbb = pbb->edges[ELSE].BBptr;
auto iter=std::find(tbb->inEdges.begin(),tbb->inEdges.end(),obb2);
assert(iter!=tbb->inEdges.end());
tbb->inEdges.erase(iter);
tbb->numInEdges--; /* looses 1 arc */
assert(tbb->inEdges.size()==tbb->numInEdges);
/* Update icode index */
(*idx) += 5;
}
else /* ELSE arc */
{
/* Find intermediate basic blocks and target block */
pbb = pIcode->inBB;
obb1 = pbb->edges[ELSE].BBptr;
obb2 = obb1->edges[THEN].BBptr;
tbb = obb2->edges[THEN].BBptr;
/* Modify in edges of target basic block */
auto iter=std::find(tbb->inEdges.begin(),tbb->inEdges.end(),obb2);
assert(iter!=tbb->inEdges.end());
tbb->inEdges.erase(iter);
tbb->numInEdges--; /* looses 1 arc */
assert(tbb->inEdges.size()==tbb->numInEdges);
/* Modify in edges of the ELSE basic block */
tbb = obb2->edges[ELSE].BBptr;
auto newlast=std::remove_if(tbb->inEdges.begin(),tbb->inEdges.end(),
[obb1,obb2](BB *b) -> bool
{
return (b==obb1) || (b==obb2);
}
);
tbb->inEdges.erase(newlast,tbb->inEdges.end());
tbb->numInEdges--; /* looses 2 arcs, gains 1 arc */
tbb->inEdges.push_back(pbb);
assert(tbb->inEdges.size()==tbb->numInEdges);
/* Modify out edge of header basic block */
pbb->edges[ELSE].BBptr = tbb;
/* Update icode index */
(*idx) += 2;
}
/* Create new HLI_JCOND and condition */
lhs = COND_EXPR::boolOp (lhs, rhs, condOpJCond[(pIcode+off+1)->ic.ll.opcode-iJB]);
(pIcode+1)->setJCond(lhs);
copyDU (pIcode+1, pIcode, eUSE, eUSE);
(pIcode+1)->du.use |= (pIcode+off)->du.use;
/* Update statistics */
obb1->flg |= INVALID_BB;
obb2->flg |= INVALID_BB;
stats.numBBaft -= 2;
pIcode->invalidate();
pProc->Icode.GetIcode(obb1->start)->invalidate();
pProc->Icode.GetIcode(obb2->start)->invalidate();
pProc->Icode.GetIcode(obb2->start+1)->invalidate();
}
/* Creates a long conditional equality or inequality at (pIcode+1).
* Removes excess nodes from the graph by flagging them, and updates
* the new edges for the remaining nodes. */
static void longJCond22 (COND_EXPR *rhs, COND_EXPR *lhs, ICODE * pIcode,
Int *idx)
{
Int j;
BB * pbb, * obb1, * tbb;
/* Form conditional expression */
lhs = COND_EXPR::boolOp (lhs, rhs, condOpJCond[(pIcode+3)->ic.ll.opcode - iJB]);
(pIcode+1)->setJCond(lhs);
copyDU (pIcode+1, pIcode, eUSE, eUSE);
(pIcode+1)->du.use |= (pIcode+2)->du.use;
/* Adjust outEdges[0] to the new target basic block */
pbb = pIcode->inBB;
if ((pbb->start + pbb->length - 1) == (*idx + 1))
{
/* Find intermediate and target basic blocks */
obb1 = pbb->edges[THEN].BBptr;
tbb = obb1->edges[THEN].BBptr;
/* Modify THEN out edge of header basic block */
pbb->edges[THEN].BBptr = tbb;
/* Modify in edges of target basic block */
auto iter=std::find(tbb->inEdges.begin(),tbb->inEdges.end(),obb1);
assert(iter!=tbb->inEdges.end());
tbb->inEdges.erase(iter);
if ((pIcode+3)->ic.ll.opcode == iJE)
tbb->numInEdges--; /* looses 1 arc */
else /* iJNE => replace arc */
tbb->inEdges.push_back(pbb);
assert(tbb->inEdges.size()==tbb->numInEdges);
/* Modify ELSE out edge of header basic block */
tbb = obb1->edges[ELSE].BBptr;
pbb->edges[ELSE].BBptr = tbb;
iter=std::find(tbb->inEdges.begin(),tbb->inEdges.end(),obb1);
assert(iter!=tbb->inEdges.end());
tbb->inEdges.erase(iter);
if ((pIcode+3)->ic.ll.opcode == iJE) /* replace */
tbb->inEdges.push_back(pbb);
else
tbb->numInEdges--; /* iJNE => looses 1 arc */
assert(tbb->inEdges.size()==tbb->numInEdges);
/* Update statistics */
obb1->flg |= INVALID_BB;
stats.numBBaft--;
}
pIcode->invalidate();
(pIcode+2)->invalidate();
(pIcode+3)->invalidate();
(*idx) += 4;
}
/* Propagates TYPE_LONG_(UN)SIGN icode information to the current pIcode
* Pointer.
* Arguments: i : index into the local identifier table
* pLocId: ptr to the long local identifier
* pProc : ptr to current procedure's record. */
static void propLongStk (Int i, ID *pLocId, Function * pProc)
{
Int idx, off, arc;
COND_EXPR *lhs, *rhs; /* Pointers to left and right hand expression */
ICODE * pIcode, * pEnd;
/* Check all icodes for offHi:offLo */
pEnd = pProc->Icode.GetIcode(pProc->Icode.GetNumIcodes() -1);
for (idx = 0; idx < (pProc->Icode.GetNumIcodes() - 1); idx++)
{
pIcode = pProc->Icode.GetIcode(idx);
if ((pIcode->type == HIGH_LEVEL) || (pIcode->invalid == TRUE))
continue;
if (pIcode->ic.ll.opcode == (pIcode+1)->ic.ll.opcode)
{
switch (pIcode->ic.ll.opcode) {
case iMOV:
if (checkLongEq (pLocId->id.longStkId, pIcode, i, idx, pProc,
&rhs, &lhs, 1) == TRUE)
{
pIcode->setAsgn(lhs, rhs);
(pIcode+1)->invalidate();
idx++;
}
break;
case iAND: case iOR: case iXOR:
if (checkLongEq (pLocId->id.longStkId, pIcode, i, idx, pProc,
&rhs, &lhs, 1) == TRUE)
{
switch (pIcode->ic.ll.opcode) {
case iAND: rhs = COND_EXPR::boolOp (lhs, rhs, AND);
break;
case iOR: rhs = COND_EXPR::boolOp (lhs, rhs, OR);
break;
case iXOR: rhs = COND_EXPR::boolOp (lhs, rhs, XOR);
break;
}
pIcode->setAsgn(lhs, rhs);
(pIcode+1)->invalidate();
idx++;
}
break;
case iPUSH:
if (checkLongEq (pLocId->id.longStkId, pIcode, i, idx, pProc,
&rhs, &lhs, 1) == TRUE)
{
pIcode->setUnary( HLI_PUSH, lhs);
(pIcode+1)->invalidate();
idx++;
}
break;
} /*eos*/
}
/* Check long conditional (i.e. 2 CMPs and 3 branches */
else if ((pIcode->ic.ll.opcode == iCMP) && (isLong23 (idx, pIcode->inBB, pProc->Icode.GetFirstIcode(),&off, &arc)))
{
if (checkLongEq (pLocId->id.longStkId, pIcode, i, idx, pProc, &rhs, &lhs, off) == TRUE)
longJCond23 (rhs, lhs, pIcode, &idx, pProc, arc, off);
}
/* Check for long conditional equality or inequality. This requires
* 2 CMPs and 2 branches */
else if ((pIcode->ic.ll.opcode == iCMP) &&
isLong22 (pIcode, pEnd, &off))
{
if (checkLongEq (pLocId->id.longStkId, pIcode, i, idx, pProc,
&rhs, &lhs, off) == TRUE)
longJCond22 (rhs, lhs, pIcode, &idx);
}
}
}
/* Finds the definition of the long register pointed to by pLocId, and
* transforms that instruction into a HIGH_LEVEL icode instruction.
* Arguments: i : index into the local identifier table
* pLocId: ptr to the long local identifier
* pProc : ptr to current procedure's record. */
static void propLongReg (Int i, ID *pLocId, Function * pProc)
{
COND_EXPR *lhs, *rhs;
Int idx, j, off, arc;
ICODE * pIcode, * pEnd;
ICODEMEM * pmH,* pmL; /* Pointers to dst LOW_LEVEL icodes */
/* Process all definitions/uses of long registers at an icode position */
pEnd = pProc->Icode.GetIcode(pProc->Icode.GetNumIcodes() -1);
for (j = 0; j < pLocId->idx.size(); j++)
{
/* Check backwards for a definition of this long register */
for (idx = pLocId->idx[j] - 1; idx > 0 ; idx--)
{
pIcode = pProc->Icode.GetIcode(idx-1);
if ((pIcode->type == HIGH_LEVEL) || (pIcode->invalid == TRUE))
continue;
if (pIcode->ic.ll.opcode == (pIcode+1)->ic.ll.opcode)
switch (pIcode->ic.ll.opcode)
{
case iMOV:
pmH = &pIcode->ic.ll.dst;
pmL = &(pIcode+1)->ic.ll.dst;
if ((pLocId->id.longId.h == pmH->regi) && (pLocId->id.longId.l == pmL->regi))
{
lhs = COND_EXPR::idLongIdx (i);
pProc->localId.id_arr[i].idx.push_back(idx-1);
pIcode->setRegDU( pmL->regi, eDEF);
rhs = COND_EXPR::idLong (&pProc->localId, SRC, pIcode, HIGH_FIRST, idx, eUSE, 1);
pIcode->setAsgn(lhs, rhs);
(pIcode+1)->invalidate();
idx = 0; /* to exit the loop */
}
break;
case iPOP:
pmH = &(pIcode+1)->ic.ll.dst;
pmL = &pIcode->ic.ll.dst;
if ((pLocId->id.longId.h == pmH->regi) && (pLocId->id.longId.l == pmL->regi))
{
lhs = COND_EXPR::idLongIdx (i);
pIcode->setRegDU( pmH->regi, eDEF);
pIcode->setUnary(HLI_POP, lhs);
(pIcode+1)->invalidate();
idx = 0; /* to exit the loop */
}
break;
/**** others missing ***/
case iAND: case iOR: case iXOR:
pmL = &pIcode->ic.ll.dst;
pmH = &(pIcode+1)->ic.ll.dst;
if ((pLocId->id.longId.h == pmH->regi) && (pLocId->id.longId.l == pmL->regi))
{
lhs = COND_EXPR::idLongIdx (i);
pIcode->setRegDU( pmH->regi, USE_DEF);
rhs = COND_EXPR::idLong (&pProc->localId, SRC, pIcode, LOW_FIRST, idx, eUSE, 1);
switch (pIcode->ic.ll.opcode) {
case iAND: rhs = COND_EXPR::boolOp (lhs, rhs, AND);
break;
case iOR:
rhs = COND_EXPR::boolOp (lhs, rhs, OR);
break;
case iXOR: rhs = COND_EXPR::boolOp (lhs, rhs, XOR);
break;
} /* eos */
pIcode->setAsgn(lhs, rhs);
(pIcode+1)->invalidate();
idx = 0;
}
break;
} /* eos */
}
/* If no definition backwards, check forward for a use of this long reg */
if (idx <= 0)
for (idx = pLocId->idx[j] + 1; idx < pProc->Icode.GetNumIcodes() - 1; idx++)
{
pIcode = pProc->Icode.GetIcode(idx);
if ((pIcode->type == HIGH_LEVEL) || (pIcode->invalid == TRUE))
continue;
if (pIcode->ic.ll.opcode == (pIcode+1)->ic.ll.opcode)
switch (pIcode->ic.ll.opcode) {
case iMOV:
if ((pLocId->id.longId.h == pIcode->ic.ll.src.regi) &&
(pLocId->id.longId.l == (pIcode+1)->ic.ll.src.regi))
{
rhs = COND_EXPR::idLongIdx (i);
pIcode->setRegDU( (pIcode+1)->ic.ll.src.regi, eUSE);
lhs = COND_EXPR::idLong (&pProc->localId, DST, pIcode,
HIGH_FIRST, idx, eDEF, 1);
pIcode->setAsgn(lhs, rhs);
(pIcode+1)->invalidate();
idx = pProc->Icode.GetNumIcodes(); /* to exit the loop */
}
break;
case iPUSH:
if ((pLocId->id.longId.h == pIcode->ic.ll.src.regi) &&
(pLocId->id.longId.l == (pIcode+1)->ic.ll.src.regi))
{
rhs = COND_EXPR::idLongIdx (i);
pIcode->setRegDU( (pIcode+1)->ic.ll.src.regi, eUSE);
pIcode->setUnary(HLI_PUSH, lhs);
(pIcode+1)->invalidate();
}
idx = pProc->Icode.GetNumIcodes(); /* to exit the loop */
break;
/*** others missing ****/
case iAND: case iOR: case iXOR:
pmL = &pIcode->ic.ll.dst;
pmH = &(pIcode+1)->ic.ll.dst;
if ((pLocId->id.longId.h == pmH->regi) &&
(pLocId->id.longId.l == pmL->regi))
{
lhs = COND_EXPR::idLongIdx (i);
pIcode->setRegDU( pmH->regi, USE_DEF);
rhs = COND_EXPR::idLong (&pProc->localId, SRC, pIcode,
LOW_FIRST, idx, eUSE, 1);
switch (pIcode->ic.ll.opcode) {
case iAND: rhs = COND_EXPR::boolOp (lhs, rhs, AND);
break;
case iOR: rhs = COND_EXPR::boolOp (lhs, rhs, OR);
break;
case iXOR: rhs = COND_EXPR::boolOp (lhs, rhs, XOR);
break;
}
pIcode->setAsgn(lhs, rhs);
(pIcode+1)->invalidate();
idx = 0;
}
break;
} /* eos */
/* Check long conditional (i.e. 2 CMPs and 3 branches */
else if ((pIcode->ic.ll.opcode == iCMP) &&
(isLong23 (idx, pIcode->inBB, pProc->Icode.GetFirstIcode(),
&off, &arc)))
{
if (checkLongRegEq (pLocId->id.longId, pIcode, i, idx, pProc,
&rhs, &lhs, off) == TRUE)
longJCond23 (rhs, lhs, pIcode, &idx, pProc, arc, off);
}
/* Check for long conditional equality or inequality. This requires
* 2 CMPs and 2 branches */
else if ((pIcode->ic.ll.opcode == iCMP) &&
(isLong22 (pIcode, pEnd, &off)))
{
if (checkLongRegEq (pLocId->id.longId, pIcode, i, idx, pProc,
&rhs, &lhs, off) == TRUE)
longJCond22 (rhs, lhs, pIcode, &idx);
}
/* Check for OR regH, regL
* JX lab
* => HLI_JCOND (regH:regL X 0) lab
* This is better code than HLI_JCOND (HI(regH:regL) | LO(regH:regL)) */
else if ((pIcode->ic.ll.opcode == iOR) && ((pIcode+1) < pEnd) &&
(isJCond ((pIcode+1)->ic.ll.opcode)))
{
if ((pIcode->ic.ll.dst.regi == pLocId->id.longId.h) &&
(pIcode->ic.ll.src.regi == pLocId->id.longId.l))
{
lhs = COND_EXPR::idLongIdx (i);
rhs = COND_EXPR::idKte (0, 4); /* long 0 */
lhs = COND_EXPR::boolOp (lhs, rhs,
condOpJCond[(pIcode+1)->ic.ll.opcode - iJB]);
(pIcode+1)->setJCond(lhs);
copyDU (pIcode+1, pIcode, eUSE, eUSE);
pIcode->invalidate();
}
}
} /* end for */
} /* end for */
}
/* Propagates the long global address across all LOW_LEVEL icodes.
* Transforms some LOW_LEVEL icodes into HIGH_LEVEL */
static void propLongGlb (Int i, ID *pLocId, Function * pProc)
{
}
/* Propagated identifier information, thus converting some LOW_LEVEL icodes
* into HIGH_LEVEL icodes. */
void Function::propLong()
{
Int i;
ID *pLocId; /* Pointer to current local identifier */
for (i = 0; i < localId.csym(); i++)
{
pLocId = &localId.id_arr[i];
if ((pLocId->type==TYPE_LONG_SIGN) || (pLocId->type==TYPE_LONG_UNSIGN))
{
switch (pLocId->loc)
{
case STK_FRAME:
propLongStk (i, pLocId, this);
break;
case REG_FRAME:
propLongReg (i, pLocId, this);
break;
case GLB_FRAME:
propLongGlb (i, pLocId, this);
break;
}
}
}
}

399
src/reducible.cpp Normal file
View File

@@ -0,0 +1,399 @@
/********************************************************************
* Checks for reducibility of a graph by intervals, and
* constructs an equivalent reducible graph if one is not found.
* (C) Cristina Cifuentes
********************************************************************/
#include <algorithm>
#include <cassert>
#include "dcc.h"
#include <stdio.h>
#ifdef __BORLAND__
#include <alloc.h>
#else
#include <malloc.h> /* For free() */
#endif
#include <string.h>
static Int numInt; /* Number of intervals */
#define nonEmpty(q) (q != NULL)
/* Returns whether the queue q is empty or not */
#define trivialGraph(G) (G->numOutEdges == 0)
/* Returns whether the graph is a trivial graph or not */
/* Returns the first element in the queue Q, and removes this element
* from the list. Q is not an empty queue. */
static BB *firstOfQueue (queue &Q)
{
assert(!Q.empty());
BB *res=*Q.begin();
Q.pop_front();
return res;
}
/* Appends pointer to node at the end of the queue Q if node is not present
* in this queue. Returns the queue node just appended. */
queue::iterator appendQueue (queue &Q, BB *node)
{
auto iter=std::find(Q.begin(),Q.end(),node);
if(iter==Q.end())
{
Q.push_back(node);
iter=Q.end();
--iter;
}
return iter;
}
/* Returns the next unprocessed node of the interval list (pointed to by
* pI->currNode). Removes this element logically from the list, by updating
* the currNode pointer to the next unprocessed element. */
BB *interval::firstOfInt ()
{
auto pq = currNode;
if (pq == nodes.end())
return (NULL);
++currNode;
return *pq;
}
/* Appends node @node to the end of the interval list @pI, updates currNode
* if necessary, and removes the node from the header list @pqH if it is
* there. The interval header information is placed in the field
* node->inInterval.
* Note: nodes are added to the interval list in interval order (which
* topsorts the dominance relation). */
static void appendNodeInt (queue &pqH, BB *node, interval *pI)
{
queue::iterator pq; /* Pointer to current node of the list */
/* Append node if it is not already in the interval list */
pq = appendQueue (pI->nodes, node);
/* Update currNode if necessary */
if (pI->currNode == pI->nodes.end())
pI->currNode = pq;
/* Check header list for occurrence of node, if found, remove it
* and decrement number of out-edges from this interval. */
if (node->beenOnH && !pqH.empty())
{
auto found_iter=std::find(pqH.begin(),pqH.end(),node);
if(found_iter!=pqH.end())
{
pI->numOutEdges -= (byte)(*found_iter)->numInEdges - 1;
pqH.erase(found_iter);
}
}
/* Update interval header information for this basic block */
node->inInterval = pI;
}
/* Finds the intervals of graph derivedGi->Gi and places them in the list
* of intervals derivedGi->Ii.
* Algorithm by M.S.Hecht. */
void derSeq_Entry::findIntervals ()
{
interval *pI, /* Interval being processed */
*J; /* ^ last interval in derivedGi->Ii */
BB *h, /* Node being processed */
*header, /* Current interval's header node */
*succ; /* Successor basic block */
Int i; /* Counter */
queue H; /* Queue of possible header nodes */
boolT first = TRUE; /* First pass through the loop */
appendQueue (H, Gi); /* H = {first node of G} */
Gi->beenOnH = TRUE;
Gi->reachingInt = BB::Create(); /* ^ empty BB */
/* Process header nodes list H */
while (!H.empty())
{
header = firstOfQueue (H);
pI = new interval;
pI->numInt = (byte)numInt++;
if (first) /* ^ to first interval */
Ii = J = pI;
appendNodeInt (H, header, pI); /* pI(header) = {header} */
/* Process all nodes in the current interval list */
while ((h = pI->firstOfInt()) != NULL)
{
/* Check all immediate successors of h */
for (i = 0; i < h->numOutEdges; i++)
{
succ = h->edges[i].BBptr;
succ->inEdgeCount--;
if (succ->reachingInt == NULL) /* first visit */
{
succ->reachingInt = header;
if (succ->inEdgeCount == 0)
appendNodeInt (H, succ, pI);
else if (! succ->beenOnH) /* out edge */
{
appendQueue (H, succ);
succ->beenOnH = TRUE;
pI->numOutEdges++;
}
}
else /* node has been visited before */
if (succ->inEdgeCount == 0)
{
if (succ->reachingInt == header || succ->inInterval == pI) /* same interval */
{
if (succ != header)
appendNodeInt (H, succ, pI);
}
else /* out edge */
pI->numOutEdges++;
}
else if (succ != header && succ->beenOnH)
pI->numOutEdges++;
}
}
/* Link interval I to list of intervals */
if (! first)
{
J->next = pI;
J = pI;
}
else /* first interval */
first = FALSE;
}
}
/* Displays the intervals of the graph Gi. */
static void displayIntervals (interval *pI)
{
queue::iterator nodePtr;
while (pI)
{
nodePtr = pI->nodes.begin();
printf (" Interval #: %ld\t#OutEdges: %ld\n", pI->numInt, pI->numOutEdges);
while (nodePtr!=pI->nodes.end())
{
if ((*nodePtr)->correspInt == NULL) /* real BBs */
printf (" Node: %ld\n", (*nodePtr)->start);
else /* BBs represent intervals */
printf (" Node (corresp int): %d\n",
(*nodePtr)->correspInt->numInt);
++nodePtr;
}
pI = pI->next;
}
}
/* Allocates space for a new derSeq node. */
static derSeq_Entry *newDerivedSeq()
{
return new derSeq_Entry;
}
/* Frees the storage allocated for the queue q*/
void freeQueue (queue &q)
{
q.clear();
}
/* Frees the storage allocated for the interval pI */
static void freeInterval (interval **pI)
{
interval *Iptr;
while (*pI)
{
(*pI)->nodes.clear();
Iptr = *pI;
*pI = (*pI)->next;
delete (Iptr);
}
}
/* Frees the storage allocated by the derived sequence structure, except
* for the original graph cfg (derivedG->Gi). */
void freeDerivedSeq(derSeq &derivedG)
{
derivedG.clear();
}
derSeq_Entry::~derSeq_Entry()
{
freeInterval (&Ii);
// if(Gi && Gi->nodeType == INTERVAL_NODE)
// freeCFG (Gi);
}
/* Finds the next order graph of derivedGi->Gi according to its intervals
* (derivedGi->Ii), and places it in derivedGi->next->Gi. */
static boolT nextOrderGraph (derSeq *derivedGi)
{
interval *Ii; /* Interval being processed */
BB *BBnode, /* New basic block of intervals */
*curr, /* BB being checked for out edges */
*succ /* Successor node */
;
queue *listIi; /* List of intervals */
Int i, /* Index to outEdges array */
j; /* Index to successors */
boolT sameGraph; /* Boolean, isomorphic graphs */
/* Process Gi's intervals */
derSeq_Entry &prev_entry(derivedGi->back());
derivedGi->push_back(derSeq_Entry());
derSeq_Entry &new_entry(derivedGi->back());
Ii = prev_entry.Ii;
sameGraph = TRUE;
BBnode = 0;
std::vector<BB *> bbs;
while (Ii)
{
i = 0;
bbs.push_back(BB::Create(-1, -1, INTERVAL_NODE, Ii->numOutEdges, NULL));
BBnode = bbs.back();
BBnode->correspInt = Ii;
const queue &listIi(Ii->nodes);
/* Check for more than 1 interval */
if (sameGraph && (listIi.size()>1))
sameGraph = FALSE;
/* Find out edges */
if (BBnode->numOutEdges > 0)
{
for(auto iter=listIi.begin();iter!=listIi.end(); ++iter)
{
curr = *iter;
for (j = 0; j < curr->numOutEdges; j++)
{
succ = curr->edges[j].BBptr;
if (succ->inInterval != curr->inInterval)
BBnode->edges[i++].intPtr = succ->inInterval;
}
}
}
/* Next interval */
Ii = Ii->next;
}
/* Convert list of pointers to intervals into a real graph.
* Determines the number of in edges to each new BB, and places it
* in numInEdges and inEdgeCount for later interval processing. */
curr = new_entry.Gi = bbs.front();
for(auto curr=bbs.begin(); curr!=bbs.end(); ++curr)
{
for (i = 0; i < (*curr)->numOutEdges; i++)
{
BBnode = new_entry.Gi; /* BB of an interval */
TYPEADR_TYPE &edge=(*curr)->edges[i];
auto iter= std::find_if(bbs.begin(),bbs.end(),
[&edge](BB *node)->bool { return edge.intPtr==node->correspInt;});
if(iter==bbs.end())
fatalError (INVALID_INT_BB);
edge.BBptr = *iter;
(*iter)->numInEdges++;
(*iter)->inEdgeCount++;
}
}
return (boolT)(! sameGraph);
}
/* Finds the derived sequence of the graph derivedG->Gi (ie. cfg).
* Constructs the n-th order graph and places all the intermediate graphs
* in the derivedG list sequence. */
static byte findDerivedSeq (derSeq *derivedGi)
{
BB *Gi; /* Current derived sequence graph */
derSeq::iterator iter=derivedGi->begin();
Gi = iter->Gi;
while (! trivialGraph (Gi))
{
/* Find the intervals of Gi and place them in derivedGi->Ii */
iter->findIntervals ();
/* Create Gi+1 and check if it is equivalent to Gi */
if (! nextOrderGraph (derivedGi))
break;
++iter;
Gi = iter->Gi;
stats.nOrder++;
}
if (! trivialGraph (Gi))
{
++iter;
derivedGi->erase(iter,derivedGi->end()); /* remove Gi+1 */
// freeDerivedSeq(derivedGi->next);
// derivedGi->next = NULL;
return FALSE;
}
derivedGi->back().findIntervals ();
return TRUE;
}
/* Converts the irreducible graph G into an equivalent reducible one, by
* means of node splitting. */
static void nodeSplitting (std::vector<BB *> &G)
{
printf("Attempt to perform node splitting: NOT IMPLEMENTED\n");
}
/* Displays the derived sequence and intervals of the graph G */
void derSeq::display()
{
Int n = 1; /* Derived sequence number */
printf ("\nDerived Sequence Intervals\n");
derSeq::iterator iter=this->begin();
while (iter!=this->end())
{
printf ("\nIntervals for G%lX\n", n++);
displayIntervals (iter->Ii);
++iter;
}
}
/* Checks whether the control flow graph, cfg, is reducible or not.
* If it is not reducible, it is converted into an equivalent reducible
* graph by node splitting. The derived sequence of graphs built from cfg
* are returned in the pointer *derivedG.
*/
derSeq * Function::checkReducibility()
{
derSeq * der_seq;
byte reducible; /* Reducible graph flag */
numInt = 1; /* reinitialize no. of intervals*/
stats.nOrder = 1; /* nOrder(cfg) = 1 */
der_seq = new derSeq;
der_seq->resize(1);
der_seq->back().Gi = cfg.front();
reducible = findDerivedSeq(der_seq);
if (! reducible)
{
flg |= GRAPH_IRRED;
nodeSplitting (cfg);
}
return der_seq;
}

844
src/scanner.cpp Normal file
View File

@@ -0,0 +1,844 @@
/*****************************************************************************
* dcc project scanner module
* Implements a simple state driven scanner to convert 8086 machine code into
* I-code
* (C) Cristina Cifuentes, Jeff Ledermann
****************************************************************************/
#include "dcc.h"
#include "scanner.h"
#include <string.h>
#define iZERO (llIcode)0 // For neatness
#define IC llIcode
static struct {
void (*state1)(Int);
void (*state2)(Int);
flags32 flg;
llIcode opcode;
byte df;
byte uf;
} stateTable[] = {
{ modrm, none2, B , iADD , Sf | Zf | Cf, }, /* 00 */
{ modrm, none2, 0 , iADD , Sf | Zf | Cf, }, /* 01 */
{ modrm, none2, TO_REG | B , iADD , Sf | Zf | Cf, }, /* 02 */
{ modrm, none2, TO_REG , iADD , Sf | Zf | Cf, }, /* 03 */
{ data1, axImp, B , iADD , Sf | Zf | Cf, }, /* 04 */
{ data2, axImp, 0 , iADD , Sf | Zf | Cf, }, /* 05 */
{ segop, none2, NO_SRC , iPUSH , 0 , }, /* 06 */
{ segop, none2, NO_SRC , iPOP , 0 , }, /* 07 */
{ modrm, none2, B , iOR , Sf | Zf | Cf, }, /* 08 */
{ modrm, none2, NSP , iOR , Sf | Zf | Cf, }, /* 09 */
{ modrm, none2, TO_REG | B , iOR , Sf | Zf | Cf, }, /* 0A */
{ modrm, none2, TO_REG | NSP , iOR , Sf | Zf | Cf, }, /* 0B */
{ data1, axImp, B , iOR , Sf | Zf | Cf, }, /* 0C */
{ data2, axImp, 0 , iOR , Sf | Zf | Cf, }, /* 0D */
{ segop, none2, NO_SRC , iPUSH , 0 , }, /* 0E */
{ none1, none2, OP386 , iZERO , 0 , }, /* 0F */
{ modrm, none2, B , iADC , Sf | Zf | Cf, Cf }, /* 10 */
{ modrm, none2, NSP , iADC , Sf | Zf | Cf, Cf }, /* 11 */
{ modrm, none2, TO_REG | B , iADC , Sf | Zf | Cf, Cf }, /* 12 */
{ modrm, none2, TO_REG | NSP , iADC , Sf | Zf | Cf, Cf }, /* 13 */
{ data1, axImp, B , iADC , Sf | Zf | Cf, Cf }, /* 14 */
{ data2, axImp, 0 , iADC , Sf | Zf | Cf, Cf }, /* 15 */
{ segop, none2, NOT_HLL | NO_SRC , iPUSH , 0 , }, /* 16 */
{ segop, none2, NOT_HLL | NO_SRC , iPOP , 0 , }, /* 17 */
{ modrm, none2, B , iSBB , Sf | Zf | Cf, Cf }, /* 18 */
{ modrm, none2, NSP , iSBB , Sf | Zf | Cf, Cf }, /* 19 */
{ modrm, none2, TO_REG | B , iSBB , Sf | Zf | Cf, Cf }, /* 1A */
{ modrm, none2, TO_REG | NSP , iSBB , Sf | Zf | Cf, Cf }, /* 1B */
{ data1, axImp, B , iSBB , Sf | Zf | Cf, Cf }, /* 1C */
{ data2, axImp, 0 , iSBB , Sf | Zf | Cf, Cf }, /* 1D */
{ segop, none2, NO_SRC , iPUSH , 0 , }, /* 1E */
{ segop, none2, NO_SRC , iPOP , 0 , }, /* 1F */
{ modrm, none2, B , iAND , Sf | Zf | Cf, }, /* 20 */
{ modrm, none2, NSP , iAND , Sf | Zf | Cf, }, /* 21 */
{ modrm, none2, TO_REG | B , iAND , Sf | Zf | Cf, }, /* 22 */
{ modrm, none2, TO_REG | NSP , iAND , Sf | Zf | Cf, }, /* 23 */
{ data1, axImp, B , iAND , Sf | Zf | Cf, }, /* 24 */
{ data2, axImp, 0 , iAND , Sf | Zf | Cf, }, /* 25 */
{ prefix, none2, 0 , (IC)rES,0 , }, /* 26 */
{ none1, axImp, NOT_HLL | B|NO_SRC , iDAA , Sf | Zf | Cf, }, /* 27 */
{ modrm, none2, B , iSUB , Sf | Zf | Cf, }, /* 28 */
{ modrm, none2, 0 , iSUB , Sf | Zf | Cf, }, /* 29 */
{ modrm, none2, TO_REG | B , iSUB , Sf | Zf | Cf, }, /* 2A */
{ modrm, none2, TO_REG , iSUB , Sf | Zf | Cf, }, /* 2B */
{ data1, axImp, B , iSUB , Sf | Zf | Cf, }, /* 2C */
{ data2, axImp, 0 , iSUB , Sf | Zf | Cf, }, /* 2D */
{ prefix, none2, 0 , (IC)rCS,0 , }, /* 2E */
{ none1, axImp, NOT_HLL | B|NO_SRC , iDAS , Sf | Zf | Cf, }, /* 2F */
{ modrm, none2, B , iXOR , Sf | Zf | Cf, }, /* 30 */
{ modrm, none2, NSP , iXOR , Sf | Zf | Cf, }, /* 31 */
{ modrm, none2, TO_REG | B , iXOR , Sf | Zf | Cf, }, /* 32 */
{ modrm, none2, TO_REG | NSP , iXOR , Sf | Zf | Cf, }, /* 33 */
{ data1, axImp, B , iXOR , Sf | Zf | Cf, }, /* 34 */
{ data2, axImp, 0 , iXOR , Sf | Zf | Cf, }, /* 35 */
{ prefix, none2, 0 , (IC)rSS,0 , }, /* 36 */
{ none1, axImp, NOT_HLL | NO_SRC , iAAA , Sf | Zf | Cf, }, /* 37 */
{ modrm, none2, B , iCMP , Sf | Zf | Cf, }, /* 38 */
{ modrm, none2, NSP , iCMP , Sf | Zf | Cf, }, /* 39 */
{ modrm, none2, TO_REG | B , iCMP , Sf | Zf | Cf, }, /* 3A */
{ modrm, none2, TO_REG | NSP , iCMP , Sf | Zf | Cf, }, /* 3B */
{ data1, axImp, B , iCMP , Sf | Zf | Cf, }, /* 3C */
{ data2, axImp, 0 , iCMP , Sf | Zf | Cf, }, /* 3D */
{ prefix, none2, 0 , (IC)rDS,0 , }, /* 3E */
{ none1, axImp, NOT_HLL | NO_SRC , iAAS , Sf | Zf | Cf, }, /* 3F */
{ regop, none2, 0 , iINC , Sf | Zf, }, /* 40 */
{ regop, none2, 0 , iINC , Sf | Zf, }, /* 41 */
{ regop, none2, 0 , iINC , Sf | Zf, }, /* 42 */
{ regop, none2, 0 , iINC , Sf | Zf, }, /* 43 */
{ regop, none2, NOT_HLL , iINC , Sf | Zf, }, /* 44 */
{ regop, none2, 0 , iINC , Sf | Zf, }, /* 45 */
{ regop, none2, 0 , iINC , Sf | Zf, }, /* 46 */
{ regop, none2, 0 , iINC , Sf | Zf, }, /* 47 */
{ regop, none2, 0 , iDEC , Sf | Zf, }, /* 48 */
{ regop, none2, 0 , iDEC , Sf | Zf, }, /* 49 */
{ regop, none2, 0 , iDEC , Sf | Zf, }, /* 4A */
{ regop, none2, 0 , iDEC , Sf | Zf, }, /* 4B */
{ regop, none2, NOT_HLL , iDEC , Sf | Zf, }, /* 4C */
{ regop, none2, 0 , iDEC , Sf | Zf, }, /* 4D */
{ regop, none2, 0 , iDEC , Sf | Zf, }, /* 4E */
{ regop, none2, 0 , iDEC , Sf | Zf, }, /* 4F */
{ regop, none2, NO_SRC , iPUSH , 0 , }, /* 50 */
{ regop, none2, NO_SRC , iPUSH , 0 , }, /* 51 */
{ regop, none2, NO_SRC , iPUSH , 0 , }, /* 52 */
{ regop, none2, NO_SRC , iPUSH , 0 , }, /* 53 */
{ regop, none2, NOT_HLL | NO_SRC , iPUSH , 0 , }, /* 54 */
{ regop, none2, NO_SRC , iPUSH , 0 , }, /* 55 */
{ regop, none2, NO_SRC , iPUSH , 0 , }, /* 56 */
{ regop, none2, NO_SRC , iPUSH , 0 , }, /* 57 */
{ regop, none2, NO_SRC , iPOP , 0 , }, /* 58 */
{ regop, none2, NO_SRC , iPOP , 0 , }, /* 59 */
{ regop, none2, NO_SRC , iPOP , 0 , }, /* 5A */
{ regop, none2, NO_SRC , iPOP , 0 , }, /* 5B */
{ regop, none2, NOT_HLL | NO_SRC , iPOP , 0 , }, /* 5C */
{ regop, none2, NO_SRC , iPOP , 0 , }, /* 5D */
{ regop, none2, NO_SRC , iPOP , 0 , }, /* 5E */
{ regop, none2, NO_SRC , iPOP , 0 , }, /* 5F */
{ none1, none2, NOT_HLL | NO_OPS , iPUSHA, 0 , }, /* 60 */
{ none1, none2, NOT_HLL | NO_OPS , iPOPA , 0 , }, /* 61 */
{ memOnly, modrm, TO_REG | NSP , iBOUND, 0 , }, /* 62 */
{ none1, none2, OP386 , iZERO , 0 , }, /* 63 */
{ none1, none2, OP386 , iZERO , 0 , }, /* 64 */
{ none1, none2, OP386 , iZERO , 0 , }, /* 65 */
{ none1, none2, OP386 , iZERO , 0 , }, /* 66 */
{ none1, none2, OP386 , iZERO , 0 , }, /* 67 */
{ data2, none2, NO_SRC , iPUSH , 0 , }, /* 68 */
{ modrm, data2, TO_REG | NSP , iIMUL , Sf | Zf | Cf, }, /* 69 */
{ data1, none2, S | NO_SRC , iPUSH , 0 , }, /* 6A */
{ modrm, data1, TO_REG | NSP | S , iIMUL , Sf | Zf | Cf, }, /* 6B */
{ strop, memImp, NOT_HLL | B|IM_OPS , iINS , 0 , Df }, /* 6C */
{ strop, memImp, NOT_HLL | IM_OPS , iINS , 0 , Df }, /* 6D */
{ strop, memImp, NOT_HLL | B|IM_OPS , iOUTS , 0 , Df }, /* 6E */
{ strop, memImp, NOT_HLL | IM_OPS , iOUTS , 0 , Df }, /* 6F */
{ dispS, none2, NOT_HLL , iJO , 0 , }, /* 70 */
{ dispS, none2, NOT_HLL , iJNO , 0 , }, /* 71 */
{ dispS, none2, 0 , iJB , 0 , Cf }, /* 72 */
{ dispS, none2, 0 , iJAE , 0 , Cf }, /* 73 */
{ dispS, none2, 0 , iJE , 0 , Zf }, /* 74 */
{ dispS, none2, 0 , iJNE , 0 , Zf }, /* 75 */
{ dispS, none2, 0 , iJBE , 0 , Zf | Cf }, /* 76 */
{ dispS, none2, 0 , iJA , 0 , Zf | Cf }, /* 77 */
{ dispS, none2, 0 , iJS , 0 , Sf }, /* 78 */
{ dispS, none2, 0 , iJNS , 0 , Sf }, /* 79 */
{ dispS, none2, NOT_HLL , iJP , 0 , }, /* 7A */
{ dispS, none2, NOT_HLL , iJNP , 0 , }, /* 7B */
{ dispS, none2, 0 , iJL , 0 , Sf }, /* 7C */
{ dispS, none2, 0 , iJGE , 0 , Sf }, /* 7D */
{ dispS, none2, 0 , iJLE , 0 , Sf | Zf }, /* 7E */
{ dispS, none2, 0 , iJG , 0 , Sf | Zf }, /* 7F */
{ immed, data1, B , iZERO , 0 , }, /* 80 */
{ immed, data2, NSP , iZERO , 0 , }, /* 81 */
{ immed, data1, B , iZERO , 0 , }, /* 82 */ /* ?? */
{ immed, data1, NSP | S , iZERO , 0 , }, /* 83 */
{ modrm, none2, TO_REG | B , iTEST , Sf | Zf | Cf, }, /* 84 */
{ modrm, none2, TO_REG | NSP , iTEST , Sf | Zf | Cf, }, /* 85 */
{ modrm, none2, TO_REG | B , iXCHG , 0 , }, /* 86 */
{ modrm, none2, TO_REG | NSP , iXCHG , 0 , }, /* 87 */
{ modrm, none2, B , iMOV , 0 , }, /* 88 */
{ modrm, none2, 0 , iMOV , 0 , }, /* 89 */
{ modrm, none2, TO_REG | B , iMOV , 0 , }, /* 8A */
{ modrm, none2, TO_REG , iMOV , 0 , }, /* 8B */
{ segrm, none2, NSP , iMOV , 0 , }, /* 8C */
{ memOnly, modrm, TO_REG | NSP , iLEA , 0 , }, /* 8D */
{ segrm, none2, TO_REG | NSP , iMOV , 0 , }, /* 8E */
{ memReg0, none2, NO_SRC , iPOP , 0 , }, /* 8F */
{ none1, none2, NO_OPS , iNOP , 0 , }, /* 90 */
{ regop, axImp, 0 , iXCHG , 0 , }, /* 91 */
{ regop, axImp, 0 , iXCHG , 0 , }, /* 92 */
{ regop, axImp, 0 , iXCHG , 0 , }, /* 93 */
{ regop, axImp, NOT_HLL , iXCHG , 0 , }, /* 94 */
{ regop, axImp, 0 , iXCHG , 0 , }, /* 95 */
{ regop, axImp, 0 , iXCHG , 0 , }, /* 96 */
{ regop, axImp, 0 , iXCHG , 0 , }, /* 97 */
{ alImp, axImp, SRC_B | S , iSIGNEX,0 , }, /* 98 */
{axSrcIm, axImp, IM_DST | S , iSIGNEX,0 , }, /* 99 */
{ dispF, none2, 0 , iCALLF ,0 , }, /* 9A */
{ none1, none2, FLOAT_OP| NO_OPS , iWAIT , 0 , }, /* 9B */
{ none1, none2, NOT_HLL | NO_OPS , iPUSHF, 0 , }, /* 9C */
{ none1, none2, NOT_HLL | NO_OPS , iPOPF , Sf | Zf | Cf | Df,}, /* 9D */
{ none1, none2, NOT_HLL | NO_OPS , iSAHF , Sf | Zf | Cf, }, /* 9E */
{ none1, none2, NOT_HLL | NO_OPS , iLAHF , 0 , Sf | Zf | Cf }, /* 9F */
{ dispM, axImp, B , iMOV , 0 , }, /* A0 */
{ dispM, axImp, 0 , iMOV , 0 , }, /* A1 */
{ dispM, axImp, TO_REG | B , iMOV , 0 , }, /* A2 */
{ dispM, axImp, TO_REG , iMOV , 0 , }, /* A3 */
{ strop, memImp, B | IM_OPS , iMOVS , 0 , Df }, /* A4 */
{ strop, memImp, IM_OPS , iMOVS , 0 , Df }, /* A5 */
{ strop, memImp, B | IM_OPS , iCMPS , Sf | Zf | Cf, Df }, /* A6 */
{ strop, memImp, IM_OPS , iCMPS , Sf | Zf | Cf, Df }, /* A7 */
{ data1, axImp, B , iTEST , Sf | Zf | Cf, }, /* A8 */
{ data2, axImp, 0 , iTEST , Sf | Zf | Cf, }, /* A9 */
{ strop, memImp, B | IM_OPS , iSTOS , 0 , Df }, /* AA */
{ strop, memImp, IM_OPS , iSTOS , 0 , Df }, /* AB */
{ strop, memImp, B | IM_OPS , iLODS , 0 , Df }, /* AC */
{ strop, memImp, IM_OPS , iLODS , 0 , Df }, /* AD */
{ strop, memImp, B | IM_OPS , iSCAS , Sf | Zf | Cf, Df }, /* AE */
{ strop, memImp, IM_OPS , iSCAS , Sf | Zf | Cf, Df }, /* AF */
{ regop, data1, B , iMOV , 0 , }, /* B0 */
{ regop, data1, B , iMOV , 0 , }, /* B1 */
{ regop, data1, B , iMOV , 0 , }, /* B2 */
{ regop, data1, B , iMOV , 0 , }, /* B3 */
{ regop, data1, B , iMOV , 0 , }, /* B4 */
{ regop, data1, B , iMOV , 0 , }, /* B5 */
{ regop, data1, B , iMOV , 0 , }, /* B6 */
{ regop, data1, B , iMOV , 0 , }, /* B7 */
{ regop, data2, 0 , iMOV , 0 , }, /* B8 */
{ regop, data2, 0 , iMOV , 0 , }, /* B9 */
{ regop, data2, 0 , iMOV , 0 , }, /* BA */
{ regop, data2, 0 , iMOV , 0 , }, /* BB */
{ regop, data2, NOT_HLL , iMOV , 0 , }, /* BC */
{ regop, data2, 0 , iMOV , 0 , }, /* BD */
{ regop, data2, 0 , iMOV , 0 , }, /* BE */
{ regop, data2, 0 , iMOV , 0 , }, /* BF */
{ shift, data1, B , iZERO , 0 , }, /* C0 */
{ shift, data1, NSP | SRC_B , iZERO , 0 , }, /* C1 */
{ data2, none2, 0 , iRET , 0 , }, /* C2 */
{ none1, none2, NO_OPS , iRET , 0 , }, /* C3 */
{ memOnly, modrm, TO_REG | NSP , iLES , 0 , }, /* C4 */
{ memOnly, modrm, TO_REG | NSP , iLDS , 0 , }, /* C5 */
{ memReg0, data1, B , iMOV , 0 , }, /* C6 */
{ memReg0, data2, 0 , iMOV , 0 , }, /* C7 */
{ data2, data1, 0 , iENTER, 0 , }, /* C8 */
{ none1, none2, NO_OPS , iLEAVE, 0 , }, /* C9 */
{ data2, none2, 0 , iRETF , 0 , }, /* CA */
{ none1, none2, NO_OPS , iRETF , 0 , }, /* CB */
{ const3, none2, NOT_HLL , iINT , 0 , }, /* CC */
{ data1,checkInt, NOT_HLL , iINT , 0 , }, /* CD */
{ none1, none2, NOT_HLL | NO_OPS , iINTO , 0 , }, /* CE */
{ none1, none2, NOT_HLL | NO_OPS , iIRET , 0 , }, /* Cf */
{ shift, const1, B , iZERO , 0 , }, /* D0 */
{ shift, const1, SRC_B , iZERO , 0 , }, /* D1 */
{ shift, none1, B , iZERO , 0 , }, /* D2 */
{ shift, none1, SRC_B , iZERO , 0 , }, /* D3 */
{ data1, axImp, NOT_HLL , iAAM , Sf | Zf | Cf, }, /* D4 */
{ data1, axImp, NOT_HLL , iAAD , Sf | Zf | Cf, }, /* D5 */
{ none1, none2, 0 , iZERO , 0 , }, /* D6 */
{ memImp, axImp, NOT_HLL | B| IM_OPS, iXLAT , 0 , }, /* D7 */
{ escop, none2, FLOAT_OP , iESC , 0 , }, /* D8 */
{ escop, none2, FLOAT_OP , iESC , 0 , }, /* D9 */
{ escop, none2, FLOAT_OP , iESC , 0 , }, /* DA */
{ escop, none2, FLOAT_OP , iESC , 0 , }, /* DB */
{ escop, none2, FLOAT_OP , iESC , 0 , }, /* DC */
{ escop, none2, FLOAT_OP , iESC , 0 , }, /* DD */
{ escop, none2, FLOAT_OP , iESC , 0 , }, /* DE */
{ escop, none2, FLOAT_OP , iESC , 0 , }, /* Df */
{ dispS, none2, 0 , iLOOPNE,0 , Zf }, /* E0 */
{ dispS, none2, 0 , iLOOPE, 0 , Zf }, /* E1 */
{ dispS, none2, 0 , iLOOP , 0 , }, /* E2 */
{ dispS, none2, 0 , iJCXZ , 0 , }, /* E3 */
{ data1, axImp, NOT_HLL | B|NO_SRC , iIN , 0 , }, /* E4 */
{ data1, axImp, NOT_HLL | NO_SRC , iIN , 0 , }, /* E5 */
{ data1, axImp, NOT_HLL | B|NO_SRC , iOUT , 0 , }, /* E6 */
{ data1, axImp, NOT_HLL | NO_SRC , iOUT , 0 , }, /* E7 */
{ dispN, none2, 0 , iCALL , 0 , }, /* E8 */
{ dispN, none2, 0 , iJMP , 0 , }, /* E9 */
{ dispF, none2, 0 , iJMPF , 0 , }, /* EA */
{ dispS, none2, 0 , iJMP , 0 , }, /* EB */
{ none1, axImp, NOT_HLL | B|NO_SRC , iIN , 0 , }, /* EC */
{ none1, axImp, NOT_HLL | NO_SRC , iIN , 0 , }, /* ED */
{ none1, axImp, NOT_HLL | B|NO_SRC , iOUT , 0 , }, /* EE */
{ none1, axImp, NOT_HLL | NO_SRC , iOUT , 0 , }, /* EF */
{ none1, none2, NOT_HLL | NO_OPS , iLOCK , 0 , }, /* F0 */
{ none1, none2, 0 , iZERO , 0 , }, /* F1 */
{ prefix, none2, 0 , iREPNE, 0 , }, /* F2 */
{ prefix, none2, 0 , iREPE , 0 , }, /* F3 */
{ none1, none2, NOT_HLL | NO_OPS , iHLT , 0 , }, /* F4 */
{ none1, none2, NO_OPS , iCMC , Cf, Cf }, /* F5 */
{ arith, none1, B , iZERO , 0 , }, /* F6 */
{ arith, none1, NSP , iZERO , 0 , }, /* F7 */
{ none1, none2, NO_OPS , iCLC , Cf, }, /* F8 */
{ none1, none2, NO_OPS , iSTC , Cf, }, /* F9 */
{ none1, none2, NOT_HLL | NO_OPS , iCLI , 0 , }, /* FA */
{ none1, none2, NOT_HLL | NO_OPS , iSTI , 0 , }, /* FB */
{ none1, none2, NO_OPS , iCLD , Df, }, /* FC */
{ none1, none2, NO_OPS , iSTD , Df, }, /* FD */
{ trans, none1, B , iZERO , 0 , }, /* FE */
{ trans, none1, NSP , iZERO , 0 , } /* FF */
} ;
static word SegPrefix, RepPrefix;
static byte *pInst; /* Ptr. to current byte of instruction */
static ICODE * pIcode; /* Ptr to Icode record filled in by scan() */
/*****************************************************************************
Scans one machine instruction at offset ip in prog.Image and returns error.
At the same time, fill in low-level icode details for the scanned inst.
****************************************************************************/
Int scan(dword ip, ICODE *p)
{
Int op;
memset(p, 0, sizeof(ICODE));
p->type = LOW_LEVEL;
p->ic.ll.label = ip; /* ip is absolute offset into image*/
if (ip >= (dword)prog.cbImage)
{
return (IP_OUT_OF_RANGE);
}
SegPrefix = RepPrefix = 0;
pInst = prog.Image + ip;
pIcode = p;
do
{
op = *pInst++; /* First state - trivial */
p->ic.ll.opcode = stateTable[op].opcode; /* Convert to Icode.opcode */
p->ic.ll.flg = stateTable[op].flg & ICODEMASK;
p->ic.ll.flagDU.d = stateTable[op].df;
p->ic.ll.flagDU.u = stateTable[op].uf;
(*stateTable[op].state1)(op); /* Second state */
(*stateTable[op].state2)(op); /* Third state */
} while (stateTable[op].state1 == prefix); /* Loop if prefix */
if (p->ic.ll.opcode)
{
/* Save bytes of image used */
p->ic.ll.numBytes = (byte)((pInst - prog.Image) - ip);
return ((SegPrefix)? FUNNY_SEGOVR: /* Seg. Override invalid */
(RepPrefix ? FUNNY_REP: 0));/* REP prefix invalid */
}
/* Else opcode error */
return ((stateTable[op].flg & OP386)? INVALID_386OP: INVALID_OPCODE);
}
/***************************************************************************
relocItem - returns TRUE if word pointed at is in relocation table
**************************************************************************/
static boolT relocItem(byte *p)
{
Int i;
dword off = p - prog.Image;
for (i = 0; i < prog.cReloc; i++)
if (prog.relocTable[i] == off)
return TRUE;
return FALSE;
}
/***************************************************************************
getWord - returns next word from image
**************************************************************************/
static word getWord(void)
{
word w = LH(pInst);
pInst += 2;
return w;
}
/****************************************************************************
signex - returns byte sign extended to Int
***************************************************************************/
static Int signex(byte b)
{
long s = b;
return ((b & 0x80)? (Int)(0xFFFFFF00 | s): (Int)s);
}
/****************************************************************************
* setAddress - Updates the source or destination field for the current
* icode, based on fdst and the TO_REG flag.
* Note: fdst == TRUE is for the r/m part of the field (dest, unless TO_REG)
* fdst == FALSE is for reg part of the field
***************************************************************************/
static void setAddress(Int i, boolT fdst, word seg, int16 reg, word off)
{
ICODEMEM *pm;
/* If not to register (i.e. to r/m), and talking about r/m,
then this is dest */
pm = (!(stateTable[i].flg & TO_REG) == fdst) ?
&pIcode->ic.ll.dst : &pIcode->ic.ll.src;
/* Set segment. A later procedure (lookupAddr in proclist.c) will
* provide the value of this segment in the field segValue. */
if (seg) /* segment override */
{
pm->seg = pm->segOver = (byte)seg;
}
else
{ /* no override, check indexed register */
if ((reg >= INDEXBASE) && (reg == INDEXBASE + 2 ||
reg == INDEXBASE + 3 || reg == INDEXBASE + 6))
{
pm->seg = rSS; /* indexed on bp */
}
else
{
pm->seg = rDS; /* any other indexed reg */
}
}
pm->regi = (byte)reg;
pm->off = (int16)off;
if (reg && reg < INDEXBASE && (stateTable[i].flg & B))
{
pm->regi += rAL - rAX;
}
if (seg) /* So we can catch invalid use of segment overrides */
{
SegPrefix = 0;
}
}
/****************************************************************************
rm - Decodes r/m part of modrm byte for dst (unless TO_REG) part of icode
***************************************************************************/
static void rm(Int i)
{
byte mod = *pInst >> 6;
byte rm = *pInst++ & 7;
switch (mod) {
case 0: /* No disp unless rm == 6 */
if (rm == 6) {
setAddress(i, TRUE, SegPrefix, 0, getWord());
pIcode->ic.ll.flg |= WORD_OFF;
}
else setAddress(i, TRUE, SegPrefix, rm + INDEXBASE, 0);
break;
case 1: /* 1 byte disp */
setAddress(i, TRUE, SegPrefix, rm+INDEXBASE, (word)signex(*pInst++));
break;
case 2: /* 2 byte disp */
setAddress(i, TRUE, SegPrefix, rm + INDEXBASE, getWord());
pIcode->ic.ll.flg |= WORD_OFF;
break;
case 3: /* reg */
setAddress(i, TRUE, 0, rm + rAX, 0);
break;
}
if ((stateTable[i].flg & NSP) && (pIcode->ic.ll.src.regi==rSP ||
pIcode->ic.ll.dst.regi==rSP))
pIcode->ic.ll.flg |= NOT_HLL;
}
/****************************************************************************
modrm - Sets up src and dst from modrm byte
***************************************************************************/
static void modrm(Int i)
{
setAddress(i, FALSE, 0, REG(*pInst) + rAX, 0);
rm(i);
}
/****************************************************************************
segrm - seg encoded as reg of modrm
****************************************************************************/
static void segrm(Int i)
{
Int reg = REG(*pInst) + rES;
if (reg > rDS || (reg == rCS && (stateTable[i].flg & TO_REG)))
pIcode->ic.ll.opcode = (llIcode)0;
else {
setAddress(i, FALSE, 0, (int16)reg, 0);
rm(i);
}
}
/****************************************************************************
regop - src/dst reg encoded as low 3 bits of opcode
***************************************************************************/
static void regop(Int i)
{
setAddress(i, FALSE, 0, ((int16)i & 7) + rAX, 0);
pIcode->ic.ll.dst.regi = pIcode->ic.ll.src.regi;
}
/*****************************************************************************
segop - seg encoded in middle of opcode
*****************************************************************************/
static void segop(Int i)
{
setAddress(i, TRUE, 0, (((int16)i & 0x18) >> 3) + rES, 0);
}
/****************************************************************************
axImp - Plugs an implied AX dst
***************************************************************************/
static void axImp(Int i)
{
setAddress(i, TRUE, 0, rAX, 0);
}
static void axSrcIm (Int i)
/* Implied AX source */
{
pIcode->ic.ll.src.regi = rAX;
}
static void alImp (Int i)
/* Implied AL source */
{
pIcode->ic.ll.src.regi = rAL;
}
/*****************************************************************************
memImp - Plugs implied src memory operand with any segment override
****************************************************************************/
static void memImp(Int i)
{
setAddress(i, FALSE, SegPrefix, 0, 0);
}
/****************************************************************************
memOnly - Instruction is not valid if modrm refers to register (i.e. mod == 3)
***************************************************************************/
static void memOnly(Int i)
{
if ((*pInst & 0xC0) == 0xC0)
pIcode->ic.ll.opcode = (llIcode)0;
}
/****************************************************************************
memReg0 - modrm for 'memOnly' and Reg field must also be 0
****************************************************************************/
static void memReg0(Int i)
{
if (REG(*pInst) || (*pInst & 0xC0) == 0xC0)
pIcode->ic.ll.opcode = (llIcode)0;
else
rm(i);
}
/***************************************************************************
immed - Sets up dst and opcode from modrm byte
**************************************************************************/
static void immed(Int i)
{
static llIcode immedTable[8] = {iADD, iOR, iADC, iSBB, iAND, iSUB, iXOR, iCMP};
static byte uf[8] = { 0, 0, Cf, Cf, 0, 0, 0, 0 };
pIcode->ic.ll.opcode = immedTable[REG(*pInst)];
pIcode->ic.ll.flagDU.u = uf[REG(*pInst)];
pIcode->ic.ll.flagDU.d = (Sf | Zf | Cf);
rm(i);
if (pIcode->ic.ll.opcode == iADD || pIcode->ic.ll.opcode == iSUB)
pIcode->ic.ll.flg &= ~NOT_HLL; /* Allow ADD/SUB SP, immed */
}
/****************************************************************************
shift - Sets up dst and opcode from modrm byte
***************************************************************************/
static void shift(Int i)
{
static llIcode shiftTable[8] =
{
(llIcode)iROL, (llIcode)iROR, (llIcode)iRCL, (llIcode)iRCR,
(llIcode)iSHL, (llIcode)iSHR, (llIcode)0, (llIcode)iSAR};
static byte uf[8] = {0, 0, Cf, Cf, 0, 0, 0, 0 };
static byte df[8] = {Cf, Cf, Cf, Cf, Sf | Zf | Cf,
Sf | Zf | Cf, 0, Sf | Zf | Cf};
pIcode->ic.ll.opcode = shiftTable[REG(*pInst)];
pIcode->ic.ll.flagDU.u = uf[REG(*pInst)];
pIcode->ic.ll.flagDU.d = df[REG(*pInst)];
rm(i);
pIcode->ic.ll.src.regi = rCL;
}
/****************************************************************************
trans - Sets up dst and opcode from modrm byte
***************************************************************************/
static void trans(Int i)
{
static llIcode transTable[8] =
{
(llIcode)iINC, (llIcode)iDEC, (llIcode)iCALL, (llIcode)iCALLF,
(llIcode)iJMP, (llIcode)iJMPF,(llIcode)iPUSH, (llIcode)0
};
static byte df[8] = {Sf | Zf, Sf | Zf, 0, 0, 0, 0, 0, 0};
if ((byte)REG(*pInst) < 2 || !(stateTable[i].flg & B)) { /* INC & DEC */
pIcode->ic.ll.opcode = transTable[REG(*pInst)]; /* valid on bytes */
pIcode->ic.ll.flagDU.d = df[REG(*pInst)];
rm(i);
memcpy(&pIcode->ic.ll.src, &pIcode->ic.ll.dst, sizeof(ICODEMEM));
if (pIcode->ic.ll.opcode == iJMP || pIcode->ic.ll.opcode == iCALL ||
pIcode->ic.ll.opcode == iCALLF)
pIcode->ic.ll.flg |= NO_OPS;
else if (pIcode->ic.ll.opcode == iINC || pIcode->ic.ll.opcode == iPUSH
|| pIcode->ic.ll.opcode == iDEC)
pIcode->ic.ll.flg |= NO_SRC;
}
}
/****************************************************************************
arith - Sets up dst and opcode from modrm byte
****************************************************************************/
static void arith(Int i)
{ byte opcode;
static llIcode arithTable[8] =
{
(llIcode)iTEST, (llIcode)0, (llIcode)iNOT, (llIcode)iNEG,
(llIcode)iMUL, (llIcode)iIMUL, (llIcode)iDIV, (llIcode)iIDIV
};
static byte df[8] = {Sf | Zf | Cf, 0, 0, Sf | Zf | Cf,
Sf | Zf | Cf, Sf | Zf | Cf, Sf | Zf | Cf,
Sf | Zf | Cf};
opcode = pIcode->ic.ll.opcode = arithTable[REG(*pInst)];
pIcode->ic.ll.flagDU.d = df[REG(*pInst)];
rm(i);
if (opcode == iTEST)
{
if (stateTable[i].flg & B)
data1(i);
else
data2(i);
}
else if (!(opcode == iNOT || opcode == iNEG))
{
memcpy(&pIcode->ic.ll.src, &pIcode->ic.ll.dst, sizeof(ICODEMEM));
setAddress(i, TRUE, 0, rAX, 0); /* dst = AX */
}
else if (opcode == iNEG || opcode == iNOT)
pIcode->ic.ll.flg |= NO_SRC;
if ((opcode == iDIV) || (opcode == iIDIV))
{
if ((pIcode->ic.ll.flg & B) != B)
pIcode->ic.ll.flg |= IM_TMP_DST;
}
}
/*****************************************************************************
data1 - Sets up immed from 1 byte data
*****************************************************************************/
static void data1(Int i)
{
pIcode->ic.ll.immed.op = (stateTable[i].flg & S)? signex(*pInst++):
*pInst++;
pIcode->ic.ll.flg |= I;
}
/*****************************************************************************
data2 - Sets up immed from 2 byte data
****************************************************************************/
static void data2(Int i)
{
if (relocItem(pInst))
pIcode->ic.ll.flg |= SEG_IMMED;
/* ENTER is a special case, it does not take a destination operand,
* but this field is being used as the number of bytes to allocate
* on the stack. The procedure level is stored in the immediate
* field. There is no source operand; therefore, the flag flg is
* set to NO_OPS. */
if (pIcode->ic.ll.opcode == iENTER)
{
pIcode->ic.ll.dst.off = getWord();
pIcode->ic.ll.flg |= NO_OPS;
}
else
pIcode->ic.ll.immed.op = getWord();
pIcode->ic.ll.flg |= I;
}
/****************************************************************************
dispM - 2 byte offset without modrm (== mod 0, rm 6) (Note:TO_REG bits are
reversed)
****************************************************************************/
static void dispM(Int i)
{
setAddress(i, FALSE, SegPrefix, 0, getWord());
}
/****************************************************************************
dispN - 2 byte disp as immed relative to ip
****************************************************************************/
static void dispN(Int i)
{
long off = (short)getWord(); /* Signed displacement */
/* Note: the result of the subtraction could be between 32k and 64k, and
still be positive; it is an offset from prog.Image. So this must be
treated as unsigned */
pIcode->ic.ll.immed.op = (dword)(off + (unsigned)(pInst - prog.Image));
pIcode->ic.ll.flg |= I;
}
/***************************************************************************
dispS - 1 byte disp as immed relative to ip
***************************************************************************/
static void dispS(Int i)
{
long off = signex(*pInst++); /* Signed displacement */
pIcode->ic.ll.immed.op = (dword)(off + (unsigned)(pInst - prog.Image));
pIcode->ic.ll.flg |= I;
}
/****************************************************************************
dispF - 4 byte disp as immed 20-bit target address
***************************************************************************/
static void dispF(Int i)
{
dword off = (unsigned)getWord();
dword seg = (unsigned)getWord();
pIcode->ic.ll.immed.op = off + ((dword)(unsigned)seg << 4);
pIcode->ic.ll.flg |= I;
}
/****************************************************************************
prefix - picks up prefix byte for following instruction (LOCK is ignored
on purpose)
****************************************************************************/
static void prefix(Int i)
{
if (pIcode->ic.ll.opcode == iREPE || pIcode->ic.ll.opcode == iREPNE)
RepPrefix = pIcode->ic.ll.opcode;
else
SegPrefix = pIcode->ic.ll.opcode;
}
inline void BumpOpcode(llIcode& ic)
{
ic = (llIcode)(((int)ic)+1); // Bump this icode via the int type
}
/*****************************************************************************
strop - checks RepPrefix and converts string instructions accordingly
*****************************************************************************/
static void strop(Int i)
{
if (RepPrefix)
{
// pIcode->ic.ll.opcode += ((pIcode->ic.ll.opcode == iCMPS ||
// pIcode->ic.ll.opcode == iSCAS)
// && RepPrefix == iREPE)? 2: 1;
if ((pIcode->ic.ll.opcode == iCMPS || pIcode->ic.ll.opcode == iSCAS)
&& RepPrefix == iREPE)
BumpOpcode(pIcode->ic.ll.opcode); // += 2
BumpOpcode(pIcode->ic.ll.opcode); // else += 1
if (pIcode->ic.ll.opcode == iREP_LODS)
pIcode->ic.ll.flg |= NOT_HLL;
RepPrefix = 0;
}
}
/***************************************************************************
escop - esc operands
***************************************************************************/
static void escop(Int i)
{
pIcode->ic.ll.immed.op = REG(*pInst) + (dword)((i & 7) << 3);
pIcode->ic.ll.flg |= I;
rm(i);
}
/****************************************************************************
const1
****************************************************************************/
static void const1(Int i)
{
pIcode->ic.ll.immed.op = 1;
pIcode->ic.ll.flg |= I;
}
/*****************************************************************************
const3
****************************************************************************/
static void const3(Int i)
{
pIcode->ic.ll.immed.op = 3;
pIcode->ic.ll.flg |= I;
}
/****************************************************************************
none1
****************************************************************************/
static void none1(Int i)
{
}
/****************************************************************************
none2 - Sets the NO_OPS flag if the operand is immediate
****************************************************************************/
static void none2(Int i)
{
if (pIcode->ic.ll.flg & I)
pIcode->ic.ll.flg |= NO_OPS;
}
/****************************************************************************
Checks for int 34 to int 3B - if so, converts to ESC nn instruction
****************************************************************************/
static void checkInt(Int i)
{
word wOp = (word) pIcode->ic.ll.immed.op;
if ((wOp >= 0x34) && (wOp <= 0x3B))
{
/* This is a Borland/Microsoft floating point emulation instruction.
Treat as if it is an ESC opcode */
pIcode->ic.ll.immed.op = wOp - 0x34;
pIcode->ic.ll.opcode = iESC;
pIcode->ic.ll.flg |= FLOAT_OP;
escop(wOp - 0x34 + 0xD8);
}
}

626
src/symtab.cpp Normal file
View File

@@ -0,0 +1,626 @@
/*
* (C) Mike van Emmerik
* These could probably be replaced by functions from libg++
*/
/* * * * * * * * * * * * * * * * * * * * * * * * * * * *\
* *
* S y m b o l t a b l e F u n c t i o n s *
* *
\* * * * * * * * * * * * * * * * * * * * * * * * * * * */
/* This file implements a symbol table with a symbolic name, a symbol value
(word), and a procedure number. Two tables are maintained, to be able to
look up by name or by value. Pointers are used for the duplicated symbolic
name to save space. Both tables have the same structure.
The hash tables automatically expand when they get 90% full; they are
never compressed. Expanding the tables could take some time, since about
half of the entries have to be moved on average.
Linear probing is used, due to the difficulty of implementing (e.g.)
quadratic probing with a variable table size.
*/
#include <cstdio>
#include <cassert>
#include <cstdlib>
#include <cstring>
#include <unordered_map>
#include "dcc.h"
#include "symtab.h"
#define TABLESIZE 16 /* Number of entries added each expansion */
/* Probably has to be a power of 2 */
#define STRTABSIZE 256 /* Size string table is inc'd by */
#define NIL ((word)-1)
using namespace std;
static char *pStrTab; /* Pointer to the current string table */
static int strTabNext; /* Next free index into pStrTab */
namespace std
{
template<>
struct hash<SYMTABLE> : public unary_function<const SYMTABLE &,size_t>
{
size_t operator()(const SYMTABLE & key) const
{
word h = 0;
h = (word)(key.symOff ^ (key.symOff >> 8));
return h;
}
};
}
static tableType curTableType; /* Which table is current */
struct TABLEINFO_TYPE
{
void deleteVal(dword symOff, Function *symProc, boolT bSymToo);
void enterSym(const char *symName, dword symOff, Function *symProc, boolT bSymToo);
std::string findVal(dword symOff, Function *symProc, word &pIndex);
void create(tableType type);
void destroy();
private:
void deleteSym(char *symName);
boolT findSym(const char *symName, word &pIndex);
boolT readSym(char *symName, dword *pSymOff, Function **pSymProc);
void expandSym(void);
word findBlankSym(const std::string &symName);
word symHash(const char *name, word *pre);
word valHash(dword symOff, Function *symProc, word *pre);
SYMTABLE *symTab; /* Pointer to the symbol hashed table */
SYMTABLE *valTab; /* Pointer to the value hashed table */
word numEntry; /* Number of entries in this table */
word tableSize;/* Size of the table (entries) */
unordered_map<string,SYMTABLE> z;
unordered_map<SYMTABLE,string> z2;
};
TABLEINFO_TYPE tableInfo[NUM_TABLE_TYPES]; /* Array of info about tables */
TABLEINFO_TYPE currentTabInfo;
/* Create a new symbol table. Returns "handle" */
void TABLEINFO_TYPE::create(tableType type)
{
switch(type)
{
case Comment:
numEntry = 0;
tableSize = TABLESIZE;
valTab = (SYMTABLE*)allocMem(sizeof(SYMTABLE) * TABLESIZE);
symTab = 0;
memset(valTab, 0, sizeof(SYMTABLE) * TABLESIZE);
break;
case Label:
currentTabInfo.numEntry = 0;
currentTabInfo.tableSize = TABLESIZE;
currentTabInfo.symTab = (SYMTABLE*)allocMem(sizeof(SYMTABLE) * TABLESIZE);
memset(currentTabInfo.symTab, 0, sizeof(SYMTABLE) * TABLESIZE);
currentTabInfo.valTab = (SYMTABLE*)allocMem(sizeof(SYMTABLE) * TABLESIZE);
memset(currentTabInfo.valTab, 0, sizeof(SYMTABLE) * TABLESIZE);
break;
}
}
void createSymTables(void)
{
/* Initilise the comment table */
/* NB - there is no symbol hashed comment table */
currentTabInfo.create(Comment);
tableInfo[Comment] = currentTabInfo;
/* Initialise the label table */
currentTabInfo.create(Label);
tableInfo[Label] = currentTabInfo;
curTableType = Label;
/* Now the string table */
strTabNext = 0;
pStrTab = (char *)allocMem(STRTABSIZE);
// tableInfo[Label].symTab = currentTabInfo.symTab;
// tableInfo[Label].valTab = currentTabInfo.valTab;
// tableInfo[Label].numEntry = currentTabInfo.numEntry;
// tableInfo[Label].tableSize = currentTabInfo.tableSize;
curTableType = Label;
}
void selectTable(tableType tt)
{
if (curTableType == tt)
return; /* Nothing to do */
currentTabInfo = tableInfo[tt];
curTableType = tt;
}
void TABLEINFO_TYPE::destroy()
{
if(symTab)
free(symTab); // The symbol hashed label table
if(valTab)
free(valTab); // And the value hashed label table
}
void destroySymTables(void)
{
selectTable(Label);
currentTabInfo.destroy();
selectTable(Comment);
currentTabInfo.destroy();
}
/* Hash the symbolic name */
word TABLEINFO_TYPE::symHash(const char *name, word *pre)
{
int i;
word h = 0;
char ch;
for (i=0; i < (int)strlen(name); i++)
{
ch = name[i];
h = (h << 2) ^ ch;
h += (ch >> 2) + (ch << 5);
}
*pre = h; /* Pre modulo hash value */
return h % tableSize; /* Post modulo hash value */
}
/* Hash the symOff and symProc fields */
/* Note: for the time being, there no use is made of the symProc field */
word TABLEINFO_TYPE::valHash(dword symOff, Function * symProc, word *pre)
{
word h = 0;
h = (word)(symOff ^ (symOff >> 8));
*pre = h; /* Pre modulo hash value */
return h % tableSize; /* Post modulo hash value */
}
void TABLEINFO_TYPE::enterSym(const char *symName, dword symOff, Function * symProc, boolT bSymToo)
{
word h, pre, j;
SYMTABLE entry;
entry.pSymName= symName; /* Symbol name ptr */
entry.symOff = symOff; /* Offset of the symbol */
entry.symProc = symProc; /* Symbol's proc num */
entry.preHash = pre; /* Pre modulo hash value */
entry.postHash= h; /* Post modulo hash value */
entry.nextOvf = NIL; /* No overflow */
entry.prevOvf = NIL; /* No back link */
z[symName] = entry;
z2[entry] = symName;
if ((numEntry / 9 * 10) >= tableSize)
{
/* Table is full. Expand it */
expandSym();
}
/* Enter it into the value hashed table first */
h = valHash(symOff, symProc, &pre); /* Ideal spot for this entry */
if (valTab[h].symProc == 0) /* Collision? */
{
/* No. Just insert here */
valTab[h].pSymName= symName; /* Symbol name ptr */
valTab[h].symOff = symOff; /* Offset of the symbol */
valTab[h].symProc = symProc; /* Symbol's proc num */
valTab[h].preHash = pre; /* Pre modulo hash value */
valTab[h].postHash= h; /* Post modulo hash value */
valTab[h].nextOvf = NIL; /* No overflow */
valTab[h].prevOvf = NIL; /* No back link */
}
else
{
/* Linear probing, for now */
j = (h+1) % tableSize;
while (j != h)
{
if (valTab[j].symProc == 0)
{
/* Insert here */
valTab[j].pSymName= symName; /* Symbol name ptr */
valTab[j].symOff = symOff; /* Offset of the symbol */
valTab[j].symProc = symProc; /* Symbol's proc num */
valTab[j].preHash = pre; /* Pre modulo hash value */
valTab[j].postHash= h; /* Post modulo hash value */
/* Insert after the primary entry in the table */
valTab[j].nextOvf = valTab[h].nextOvf;
valTab[h].nextOvf = j;
valTab[j].prevOvf = h; /* The backlink */
break;
}
else
{
/* Probe further */
j = (j+1) % tableSize;
}
}
if (j == h)
{
printf("enterSym: val table overflow!\n");
exit(1);
}
}
/* Now enter into the symbol hashed table as well, if reqd */
if (!bSymToo) return;
h = symHash(symName, &pre); /* Ideal spot for this entry */
if (symTab[h].pSymName.empty()) /* Collision? */
{
/* No. Just insert here */
symTab[h].pSymName= symName; /* Symbol name ptr */
symTab[h].symOff = symOff; /* Offset of the symbol */
symTab[h].symProc = symProc; /* Symbol's proc num */
symTab[h].preHash = pre; /* Pre modulo hash value */
symTab[h].postHash= h; /* Post modulo hash value */
symTab[h].nextOvf = NIL; /* No overflow */
symTab[h].prevOvf = NIL; /* No back link */
}
else
{
/* Linear probing, for now */
j = (h+1) % tableSize;
while (j != h)
{
if (symTab[j].pSymName.empty())
{
/* Insert here */
symTab[j].pSymName= symName; /* Symbol name ptr */
symTab[j].symOff = symOff; /* Offset of the symbol */
symTab[j].symProc = symProc; /* Symbol's proc num */
symTab[j].preHash = pre; /* Pre modulo hash value */
symTab[j].postHash= h; /* Post modulo hash value */
/* Insert after the primary entry in the table */
symTab[j].nextOvf = symTab[h].nextOvf;
symTab[h].nextOvf = j;
symTab[j].prevOvf = h; /* The backlink */
break;
}
else
{
/* Probe further */
j = (j+1) % tableSize;
}
}
if (j == h)
{
printf("enterSym: sym table overflow!\n");
exit(1);
}
}
}
void enterSym(char *symName, dword symOff, Function * symProc, boolT bSymToo)
{
currentTabInfo.enterSym(symName,symOff,symProc,bSymToo);
}
boolT TABLEINFO_TYPE::findSym(const char *symName, word &pIndex)
{
word h, j, pre;
h = symHash(symName, &pre);
j = h;
bool found=false;
do
{
if (symTab[j].pSymName.empty())
{
return FALSE; /* No entry at all */
}
if (strcmp(symName, symTab[j].pSymName.c_str()) == 0)
{
pIndex = j;
found=true;
break; /* Symbol found */
}
j = symTab[j].nextOvf; /* Follow the chain */
}
while (j != NIL);
auto iter = z.find(symName);
if(iter!=z.end())
{
assert(iter->second==symTab[j]);
}
return found; /* End of chain */
}
/* Find symbol by value */
std::string TABLEINFO_TYPE::findVal(dword symOff, Function * symProc, word &pIndex)
{
word h, j, pre;
std::string res="";
h = valHash(symOff, symProc, &pre);
j = h;
do
{
if (valTab[j].symProc == 0)
break; /* No entry at all */
if ((valTab[j].symOff == symOff) /*&& (valTab[j].symProc == symProc)*/)
{
pIndex = j;
res=valTab[j].pSymName;
break; /* Symbol found */
}
j = valTab[j].nextOvf; /* Follow the chain */
}
while (j != NIL);
auto iter = z2.find(SYMTABLE(symOff,symProc));
if(iter!=z2.end())
{
assert(iter->second==res);
}
return res; /* End of chain */
}
word TABLEINFO_TYPE::findBlankSym(const std::string &symName)
{
word h, j, pre;
h = symHash(symName.c_str(), &pre);
j = h;
do
{
if (symTab[j].pSymName.empty())
{
return j; /* Empty entry. Terminate probing */
}
j = (++j) % tableSize; /* Linear probing */
}
while (j != h);
printf("Could not find blank entry in table! Num entries is %ld of %ld\n",
(long)numEntry, (long)tableSize);
return 0;
}
/* Using the symbolic name, read the value */
boolT TABLEINFO_TYPE::readSym(char *symName, dword *pSymOff, Function * *pSymProc)
{
word i;
if (!findSym(symName, i))
{
return FALSE;
}
*pSymOff = symTab[i].symOff;
*pSymProc= symTab[i].symProc;
return TRUE;
}
/* A doubly linked list of entries belonging to the same hash bucket is
maintained, to prevent the need for many entries to be moved when deleting
an entry. It is implemented with indexes, and is not an open hashing system.
Symbols are deleted from both hash tables.
*/
/* Known limitation: strings are never deleted from the string table */
void TABLEINFO_TYPE::deleteSym(char *symName)
{
word i, j, back;
dword symOff;
Function * symProc;
/* Delete from symbol hashed table first */
if (!findSym(symName, i))
{
printf("Could not delete non existant symbol name %s\n", symName);
exit(1);
}
symOff = symTab[i].symOff; /* Remember these for valTab */
symProc= symTab[i].symProc;
j = symTab[i].nextOvf; /* Look at next overflowed entry */
if (j == NIL) /* Any overflows? */
{
/* No, so we just wipe out this record. Must NIL the pointer of
the previous record, however */
symTab[symTab[i].prevOvf].nextOvf = NIL;
j = i; /* So we wipe out the current name */
}
else
{
/* Yes, move this entry to this vacated spot. Note that the nextOvf
field will still point to the next record in the overflow chain,
but we need to preserve the backlink for adjusting the current
item's backlink */
back = symTab[j].prevOvf;
symTab[i] = symTab[j];
symTab[i].prevOvf = back;
}
/* And now mark the vacated record as empty */
symTab[j].pSymName.clear(); /* Rub out the name */
/* Delete from value hashed table */
if (findVal(symOff, symProc, i).empty())
{
printf("Could not delete non existant symbol off %04X proc %d\n",symOff, symProc);
exit(1);
}
j = valTab[i].nextOvf; /* Look at next overflowed entry */
if (j == NIL) /* Any overflows? */
{
/* No, so we just wipe out this record. Must NIL the pointer of
the previous record, however */
valTab[valTab[i].prevOvf].nextOvf = NIL;
j = i; /* So we wipe out the current entry */
}
else
{
/* Yes, move this entry to this vacated spot. Note that the nextOvf
field will still point to the next record in the overflow chain,
but we need to preserve the backlink for adjusting the current
item's backlink */
back = valTab[j].prevOvf;
valTab[i]= valTab[j];
valTab[i].prevOvf = back;
}
/* And now mark the vacated record as empty */
valTab[j].symProc = 0; /* Rub out the entry */
}
void TABLEINFO_TYPE::deleteVal(dword symOff, Function * symProc, boolT bSymToo)
{
word i, j, back;
std::string symName;
/* Delete from value hashed table */
if (findVal(symOff, symProc, i).empty())
{
printf("Could not delete non existant symbol off %04X proc %p\n",
symOff, symProc);
exit(1);
}
symName = symTab[i].pSymName; /* Remember this for symTab */
j = valTab[i].nextOvf; /* Look at next overflowed entry */
if (j == NIL) /* Any overflows? */
{
/* No, so we just wipe out this record. Must NIL the pointer of
the previous record, however */
valTab[valTab[i].prevOvf].nextOvf = NIL;
j = i; /* So we wipe out the current entry */
}
else
{
/* Yes, move this entry to this vacated spot. Note that the nextOvf
field will still point to the next record in the overflow chain,
but we need to preserve the backlink for adjusting the current
item's backlink */
back = valTab[j].prevOvf;
memcpy(&valTab[i], &valTab[j], sizeof(SYMTABLE));
valTab[i].prevOvf = back;
}
/* And now mark the vacated record as empty */
valTab[j].symProc = 0; /* Rub out the entry */
/* If requested, delete from symbol hashed table now */
if (!bSymToo) return;
if (!findSym(symName.c_str(), i))
{
printf("Could not delete non existant symbol name %s\n", symName.c_str());
exit(1);
}
j = symTab[i].nextOvf; /* Look at next overflowed entry */
if (j == NIL) /* Any overflows? */
{
/* No, so we just wipe out this record. Must NIL the pointer of
the previous record, however */
symTab[symTab[i].prevOvf].nextOvf = NIL;
j = i; /* So we wipe out the current name */
}
else
{
/* Yes, move this entry to this vacated spot. Note that the nextOvf
field will still point to the next record in the overflow chain,
but we need to preserve the backlink for adjusting the current
item's backlink */
back = symTab[j].prevOvf;
symTab[i] = symTab[j];
symTab[i].prevOvf = back;
}
/* And now mark the vacated record as empty */
symTab[j].pSymName.clear(); /* Rub out the name */
}
void TABLEINFO_TYPE::expandSym(void)
{
word i, j, n, newPost;
printf("\nResizing table...\r");
/* We double the table size each time, so on average only half of the
entries move to the new half. This works because we are effectively
shifting the "binary point" of the hash value to the left each time,
thereby leaving the number unchanged or adding an MSBit of 1. */
tableSize <<= 2;
symTab = (SYMTABLE*)reallocVar(symTab, tableSize * sizeof(SYMTABLE));
memset (&symTab[tableSize/2], 0, (tableSize/2) * sizeof(SYMTABLE));
/* Now we have to move some of the entries to take advantage of the extra
space */
for (i=0; i < numEntry; i++)
{
newPost = symTab[i].preHash % tableSize;
if (newPost != symTab[i].postHash)
{
/* This entry is now in the wrong place. Copy it to the new position,
then delete it. */
j = findBlankSym(symTab[i].pSymName);
memcpy(&symTab[j], &symTab[i], sizeof(SYMTABLE));
/* Correct the post hash value */
symTab[j].postHash = newPost;
/* Now adjust links */
n = symTab[j].prevOvf;
if (n != NIL)
{
symTab[n].nextOvf = j;
}
n = symTab[j].nextOvf;
if (n != NIL)
{
symTab[n].prevOvf = j;
}
/* Mark old position as deleted */
symTab[i].pSymName.clear();
}
}
}
/* This function adds to the string table. At this stage, strings are not
deleted */
char * addStrTbl(char *pStr)
{
char *p;
if ((strTabNext + strlen(pStr) + 1) >= STRTABSIZE)
{
/* We can't realloc the old string table pointer, since that will
potentially move the string table, and pointers will be invalid.
So we realloc this one to its present usage (hopefully it won't
move), and allocate a new one */
if (reallocVar((void *)pStrTab, strTabNext) != pStrTab)
{
printf("Damn it! String table moved on shrinking!\n");
exit(1);
}
pStrTab = (char *)allocMem(STRTABSIZE);
strTabNext = 0;
}
p = strcpy(&pStrTab[strTabNext], pStr);
strTabNext += strlen(pStr) +1;
return p;
}
void deleteVal(dword symOff, Function * symProc, boolT bSymToo)
{
currentTabInfo.deleteVal(symOff,symProc,bSymToo);
}
std::string findVal(dword symOff, Function * symProc, word *pIndex)
{
return currentTabInfo.findVal(symOff,symProc,*pIndex);
}
/* Using the value, read the symbolic name */
boolT readVal(char *symName, dword symOff, Function * symProc)
{
word i;
std::string r=currentTabInfo.findVal(symOff, symProc, i);
if (r.empty())
{
return false;
}
strcpy(symName, r.c_str());
return true;
}

159
src/udm.cpp Normal file
View File

@@ -0,0 +1,159 @@
/*****************************************************************************
* dcc project Universal Decompilation Module
* This is supposedly a machine independant and language independant module
* that just plays with abstract cfg's and intervals and such like.
* (C) Cristina Cifuentes
****************************************************************************/
#include <list>
#include <cassert>
#include <stdio.h>
#include "dcc.h"
static void displayCFG(Function * pProc);
static void displayDfs(BB * pBB);
/****************************************************************************
* udm
****************************************************************************/
void udm(void)
{
/* Build the control flow graph, find idioms, and convert low-level
* icodes to high-level ones */
for (auto iter = pProcList.rbegin(); iter!=pProcList.rend(); ++iter)
{
if (iter->flg & PROC_ISLIB)
continue; /* Ignore library functions */
/* Create the basic control flow graph */
iter->createCFG();
if (option.VeryVerbose)
iter->displayCFG();
/* Remove redundancies and add in-edge information */
iter->compressCFG();
/* Print 2nd pass assembler listing */
if (option.asm2)
disassem(2, &(*iter));
/* Idiom analysis and propagation of long type */
iter->lowLevelAnalysis();
/* Generate HIGH_LEVEL icodes whenever possible */
iter->highLevelGen();
}
/* Data flow analysis - eliminate condition codes, extraneous registers
* and intermediate instructions. Find expressions by forward
* substitution algorithm */
pProcList.front().dataFlow (0);
derSeq *derivedG=0;
/* Control flow analysis - structuring algorithm */
for (auto iter = pProcList.rbegin(); iter!=pProcList.rend(); ++iter)
{
if (iter->flg & PROC_ISLIB)
continue; /* Ignore library functions */
/* Make cfg reducible and build derived sequences */
derivedG=iter->checkReducibility();
if (option.VeryVerbose)
derivedG->display();
/* Structure the graph */
iter->structure(derivedG);
/* Check for compound conditions */
iter->compoundCond ();
if (option.verbose) {
printf("\nDepth first traversal - Proc %s\n", iter->name);
iter->cfg.front()->displayDfs();
}
/* Free storage occupied by this procedure */
freeDerivedSeq(*derivedG);
}
}
static const char *const s_nodeType[] = {"branch", "if", "case", "fall", "return", "call",
"loop", "repeat", "interval", "cycleHead",
"caseHead", "terminate",
"nowhere" };
static const char *const s_loopType[] = {"noLoop", "while", "repeat", "loop", "for"};
/****************************************************************************
* displayCFG - Displays the Basic Block list
***************************************************************************/
void Function::displayCFG()
{
Int i;
BB * pBB;
printf("\nBasic Block List - Proc %s", name);
for (auto iter = cfg.begin(); iter!=cfg.end(); ++iter)
{
pBB = *iter;
printf("\nnode type = %s, ", s_nodeType[pBB->nodeType]);
printf("start = %ld, length = %ld, #out edges = %ld\n",
pBB->start, pBB->length, pBB->numOutEdges);
for (i = 0; i < pBB->numOutEdges; i++)
printf(" outEdge[%2d] = %ld\n",i, pBB->edges[i].BBptr->start);
}
}
/*****************************************************************************
* displayDfs - Displays the CFG using a depth first traversal
****************************************************************************/
void BB::displayDfs()
{
Int i;
assert(this);
traversed = DFS_DISP;
printf("node type = %s, ", s_nodeType[nodeType]);
printf("start = %ld, length = %ld, #in-edges = %ld, #out-edges = %ld\n",
start, length, inEdges.size(), numOutEdges);
printf("dfsFirst = %ld, dfsLast = %ld, immed dom = %ld\n",
dfsFirstNum, dfsLastNum,
immedDom == MAX ? -1 : immedDom);
printf("loopType = %s, loopHead = %ld, latchNode = %ld, follow = %ld\n",
s_loopType[loopType],
loopHead == MAX ? -1 : loopHead,
latchNode == MAX ? -1 : latchNode,
loopFollow == MAX ? -1 : loopFollow);
printf ("ifFollow = %ld, caseHead = %ld, caseTail = %ld\n",
ifFollow == MAX ? -1 : ifFollow,
caseHead == MAX ? -1 : caseHead,
caseTail == MAX ? -1 : caseTail);
if (nodeType == INTERVAL_NODE)
printf("corresponding interval = %ld\n", correspInt->numInt);
else
for (i = 0; i < inEdges.size(); i++)
printf (" inEdge[%ld] = %ld\n", i, inEdges[i]->start);
/* Display out edges information */
for (i = 0; i < numOutEdges; i++)
if (nodeType == INTERVAL_NODE)
printf(" outEdge[%ld] = %ld\n", i,
edges[i].BBptr->correspInt->numInt);
else
printf(" outEdge[%d] = %ld\n", i, edges[i].BBptr->start);
printf("----\n");
/* Recursive call on successors of current node */
for (i = 0; i < numOutEdges; i++)
if (edges[i].BBptr->traversed != DFS_DISP)
edges[i].BBptr->displayDfs();
}