dcc/src/disassem.cpp
2016-05-19 10:18:17 +02:00

642 lines
19 KiB
C++

/****************************************************************************
* dcc project disassembler
* (C) Cristina Cifuentes, Mike van Emmerik, Jeff Ledermann
****************************************************************************/
#include "disassem.h"
#include "dcc.h"
#include "msvc_fixes.h"
#include "symtab.h"
#include "project.h"
#include <QtCore/QFile>
#include <QtCore/QDebug>
#include <stdint.h>
#include <vector>
#include <map>
#include <sstream>
#include <iomanip>
#include <stdio.h>
#include <string.h>
#include "src/ui/StructuredTextTarget.h"
// Note: for the time being, there is no interactive disassembler
// for unix
using namespace std;
#define POS_LAB 15 /* Position of label */
#define POS_OPC 20 /* Position of opcode */
#define POS_OPR 25 /* Position of operand */
#define WID_PTR 10 /* Width of the "xword ptr" lingo */
#define POS_OPR2 POS_OPR+WID_PTR /* Position of operand after "xword ptr" */
#define POS_CMT 54 /* Position of comment */
static const char *szFlops0C[] =
{
"FCHS", "FABS", "???", "???", "FTST", "FXAM", "???", "???"
};
static const char *szFlops0D[] =
{
"FLD1", "FLDL2T","FLDL2E","FLDP1", "FLDLG2","FLDLN2","FLDZ", "???"
};
static const char *szFlops0E[] =
{
"F2XM1", "FYL2X", "FPTAN", "FPATAN","FXTRACT","FPREM1","FDECSTP","FINCSTP"
};
static const char *szFlops0F[] =
{
"FPREM", "FYLXP1","FSQRT", "FSINCOS","FRNDINT","FSCALE","FSIN","FCOS"
};
static const char *szFlops15[] =
{
"???", "FUCOMPP", "???", "???", "???", "???", "???", "???"
};
static const char *szFlops1C[] =
{
"???", "???", "FCLEX", "FINIT", "FTST", "FXAM", "???", "???"
};
static const char *szFlops33[] =
{
"???", "FCOMPP", "???", "???", "???", "???", "???", "???"
};
static const char *szFlops3C[] =
{
"FSTSWAX","???", "???", "???", "???", "???", "???", "???"
};
static const char *szPtr[2] = { "word ptr ", "byte ptr " };
static void formatRM(QTextStream & p, const LLOperand &pm);
static QTextStream & strDst(QTextStream & os, uint32_t flg, const LLOperand &pm);
static char *strHex(uint32_t d);
//static int checkScanned(uint32_t pcCur);
//static void setProc(Function * proc);
//static void dispData(uint16_t dataSeg);
bool callArg(uint16_t off, char *temp); /* Check for procedure name */
//static FILE *dis_g_fp;
static CIcodeRec pc;
static int cb, j, numIcode, allocIcode;
static map<int,int> pl;
static uint32_t nextInst;
static bool fImpure;
//static int g_lab;
static PtrFunction pProc; /* Points to current proc struct */
struct POSSTACK_ENTRY
{
int ic; /* An icode offset */
Function * pProc; /* A pointer to a PROCEDURE structure */
} ;
static vector<POSSTACK_ENTRY> posStack; /* position stack */
//static uint8_t iPS; /* Index into the stack */
// These are "curses equivalent" functions. (Used to use curses for all this,
// but it was too much of a distribution hassle
#define printfd(x) printf(x)
#define dis_newline() printf("\n")
#define dis_show() // Nothing to do unless using Curses
void LLInst::findJumpTargets(CIcodeRec &_pc)
{
if (srcIsImmed() and not testFlags(JMP_ICODE) and isJmpInst())
{
/* Replace the immediate operand with an icode index */
iICODE labTgt=_pc.labelSrch(src().getImm2());
if (labTgt!=_pc.end())
{
m_src.SetImmediateOp(labTgt->loc_ip);
/* This icode is the target of a jump */
labTgt->ll()->setFlags(TARGET);
setFlags(JMP_ICODE); /* So its not done twice */
}
else
{
/* This jump cannot be linked to a label */
setFlags(NO_LABEL);
}
}
}
/*****************************************************************************
* disassem - Prints a disassembled listing of a procedure.
* pass == 1 generates output on file .a1
* pass == 2 generates output on file .a2
* pass == 3 generates output on file .b
****************************************************************************/
void Disassembler::disassem(PtrFunction ppProc)
{
pProc = ppProc; /* Save the passes pProc */
createSymTables();
allocIcode = numIcode = pProc->Icode.size();
cb = allocIcode * sizeof(ICODE);
if (numIcode == 0)
{
return; /* No Icode */
}
/* Open the output file (.a1 or .a2 only) */
if (pass != 3)
{
auto p = (pass == 1)? asm1_name: asm2_name;
m_disassembly_target = new QFile(p);
if(!m_disassembly_target->open(QFile::WriteOnly|QFile::Text|QFile::Append)) {
fatalError(CANNOT_OPEN, p.toStdString().c_str());
}
m_fp.setDevice(m_disassembly_target);
}
/* Create temporary code array */
// Mike: needs objectising!
pc=pProc->Icode;
if (pass == 1)
{
/* Bind jump offsets to labels */
//for (i = 0; i < numIcode; i++)
for( ICODE &icode : pc)
{
LLInst *ll=icode.ll();
ll->findJumpTargets(pc);
}
}
/* Create label array to keep track of location => label name */
pl.clear();
/* Write procedure header */
if (pass != 3)
{
const char * near_far=(pProc->flg & PROC_FAR)? "FAR": "NEAR";
m_fp << "\t\t"<<pProc->name<<" PROC "<< near_far<<"\n";
}
/* Loop over array printing each record */
nextInst = 0;
for( ICODE &icode : pc)
{
this->dis1Line(*icode.ll(),icode.loc_ip,pass);
}
/* Write procedure epilogue */
if (pass != 3)
{
m_fp << "\n\t\t"<<pProc->name<<" ENDP\n\n";
m_fp.setDevice(nullptr);
m_disassembly_target->close();
delete m_disassembly_target;
}
pc.clear();
destroySymTables();
}
/****************************************************************************
* dis1Line() - disassemble one line to stream fp *
* i is index into Icode for this proc *
* It is assumed that icode i is already scanned *
****************************************************************************/
void Disassembler::dis1Line(LLInst &inst,int loc_ip, int pass)
{
assert(false);
}
/****************************************************************************
* formatRM
***************************************************************************/
static void formatRM(QTextStream &p, const LLOperand &pm)
{
//char seg[4];
if (pm.segOver)
{
p <<Machine_X86::regName(pm.segOver)<<':';
}
if (pm.regi == rUNDEF)
{
p<<"["<<strHex((uint32_t)pm.off)<<"]";
}
else if (pm.isReg())
{
p<<Machine_X86::regName(pm.regi);
}
else if (pm.off)
{
if (pm.off < 0)
{
p <<"["<<Machine_X86::regName(pm.regi)<<"-"<<strHex((uint32_t)(- pm.off))<<"]";
}
else
{
p <<"["<<Machine_X86::regName(pm.regi)<<"+"<<strHex((uint32_t)(pm.off))<<"]";
}
}
else
p <<"["<<Machine_X86::regName(pm.regi)<<"]";
}
/*****************************************************************************
* strDst
****************************************************************************/
static QTextStream & strDst(QTextStream &os,uint32_t flg, const LLOperand &pm)
{
/* Immediates to memory require size descriptor */
//os << setw(WID_PTR);
if ((flg & I) and not pm.isReg())
os << szPtr[flg & B];
formatRM(os, pm);
return os;
}
/****************************************************************************
* strSrc *
****************************************************************************/
//QTextStream &LLInst::strSrc(QTextStream &os,bool skip_comma)
//{
// if(false==skip_comma)
// os<<", ";
// if (srcIsImmed())
// os<<strHex(src().getImm2());
// else if (testFlags(IM_SRC)) /* level 2 */
// os<<"dx:ax";
// else
// formatRM(os, src());
// return os;
//}
/****************************************************************************
* strHex *
****************************************************************************/
static char *strHex(uint32_t d)
{
static char buf[10];
d &= 0xFFFF;
sprintf(buf, "0%X%s", d, (d > 9)? "h": "");
return (buf + (buf[1] <= '9'));
}
/****************************************************************************
* interactDis - interactive disassembler *
****************************************************************************/
void interactDis(const PtrFunction & initProc, int initIC)
{
QString procname = "UNKNOWN";
if(initProc)
procname = initProc->name;
qDebug() << "Wanted to start interactive disasassembler for "<<procname<<":"<<initIC;
return;
}
/* Handle the floating point opcodes (icode iESC) */
void LLInst::flops(QTextStream &out)
{
//char bf[30];
uint8_t op = (uint8_t)src().getImm2();
/* Note that op is set to the escape number, e.g.
esc 0x38 is FILD */
if ( not m_dst.isReg() )
{
/* The mod/rm mod bits are not set to 11 (i.e. register). This is the normal floating point opcode */
out<<Machine_X86::floatOpName(op)<<' ';
out.setFieldWidth(10);
if ((op == 0x29) or (op == 0x1F))
{
out << "tbyte ptr ";
}
else switch (op & 0x30)
{
case 0x00:
case 0x10:
out << "dword ptr ";
break;
case 0x20:
out << "qword ptr ";
break;
case 0x30:
switch (op)
{
case 0x3C: /* FBLD */
case 0x3E: /* FBSTP */
out << "tbyte ptr ";
break;
case 0x3D: /* FILD 64 bit */
case 0x3F: /* FISTP 64 bit */
out << "qword ptr ";
break;
default:
out << "uint16_t ptr ";
break;
}
}
out.setFieldWidth(0);
formatRM(out, m_dst);
}
else
{
/* The mod/rm mod bits are set to 11 (i.e. register).
Could be specials (0x0C-0x0F, etc), or the st(i) versions of
normal opcodes. Because the opcodes are slightly different for
this case (e.g. op=04 means FSUB if reg != 3, but FSUBR for
reg == 3), a separate table is used (szFlops2). */
int destRegIdx=m_dst.regi - rAX;
switch (op)
{
case 0x0C:
out << szFlops0C[destRegIdx];
break;
case 0x0D:
out << szFlops0D[destRegIdx];
break;
case 0x0E:
out << szFlops0E[destRegIdx];
break;
case 0x0F:
out << szFlops0F[destRegIdx];
break;
case 0x15:
out << szFlops15[destRegIdx];
break;
case 0x1C:
out << szFlops1C[destRegIdx];
break;
case 0x33:
out << szFlops33[destRegIdx];
break;
case 0x3C:
out << szFlops3C[destRegIdx];
break;
default:
out << Machine_X86::floatOpName(0x40+op);
if ((op >= 0x20) and (op <= 0x27))
{
/* This is the ST(i), ST form. */
out << "ST("<<destRegIdx - rAX<<"),ST";
}
else
{
/* ST, ST(i) */
out << "ST,ST("<<destRegIdx;
}
break;
}
}
}
struct AsmFormatter {
IStructuredTextTarget * target;
int operand_count;
void visitOperand(const LLOperand &pm) {
if(not pm.isSet())
return;
if(operand_count>0) {
target->prtt(", ");
}
operand_count++;
if (pm.immed and not pm.isReg()) {
//target->addTaggedString(XT_Keyword,szPtr[flg&B]);
target->addTaggedString(XT_Number,strHex(pm.getImm2()));
return;
}
if (pm.segOver)
{
target->prtt(Machine_X86::regName(pm.segOver)+':');
}
if (pm.regi == rUNDEF)
{
target->prtt(QString("[")+strHex((uint32_t)pm.off)+"]");
}
else if (pm.isReg())
{
target->prtt(Machine_X86::regName(pm.regi));
}
else if (pm.off)
{
if (pm.off < 0)
{
target->prtt("["+Machine_X86::regName(pm.regi)+"-"+strHex((uint32_t)(- pm.off))+"]");
}
else
{
target->prtt("["+Machine_X86::regName(pm.regi)+"+"+strHex((uint32_t)(pm.off))+"]");
}
}
else
target->prtt("["+Machine_X86::regName(pm.regi)+"]");
}
};
void toStructuredText(LLInst *insn,IStructuredTextTarget *out, int level) {
AsmFormatter formatter {out};
const LLInst &inst(*insn);
QString opcode = Machine_X86::opcodeName(insn->getOpcode());
out->addSpace(4);
out->addTaggedString(XT_Number,QString("%1").arg(insn->label,8,16,QChar('0').toUpper()));
out->addSpace(4);
out->addTaggedString(XT_Keyword,Machine_X86::opcodeName(insn->getOpcode()),insn);
out->addSpace(2);
switch(insn->getOpcode()) {
case iADD: case iADC: case iSUB: case iSBB: case iAND: case iOR:
case iXOR: case iTEST: case iCMP: case iMOV: case iLEA: case iXCHG:
case iSAR: case iSHL: case iSHR: case iRCL: case iRCR: case iROL:
case iROR:
formatter.visitOperand(insn->dst());
formatter.visitOperand(insn->src());
break;
case iINC: case iDEC: case iNEG: case iNOT: case iPOP:
formatter.visitOperand(insn->dst());
break;
case iPUSH:
formatter.visitOperand(insn->dst());
break;
case iDIV: case iIDIV: case iMUL: case iIMUL: case iMOD:
if (inst.srcIsImmed())
{
formatter.visitOperand(insn->dst());
formatter.visitOperand(insn->src());
}
else
formatter.visitOperand(insn->dst());
break;
case iLDS: case iLES: case iBOUND:
formatter.visitOperand(insn->dst());
formatter.visitOperand(insn->src());
break;
case iJB: case iJBE: case iJAE: case iJA:
case iJL: case iJLE: case iJGE: case iJG:
case iJE: case iJNE: case iJS: case iJNS:
case iJO: case iJNO: case iJP: case iJNP:
case iJCXZ:case iLOOP: case iLOOPE:case iLOOPNE:
case iJMP: case iJMPF:
/* Check if there is a symbol here */
{
// ICODE *lab=pc.GetIcode(inst.src().getImm2());
// selectTable(Label);
// if ((inst.src().getImm2() < (uint32_t)numIcode) and /* Ensure in range */
// readVal(operands_s, lab->ll()->label, nullptr))
// {
// break; /* Symbolic label. Done */
// }
}
if (inst.testFlags(NO_LABEL))
{
//strcpy(p + WID_PTR, strHex(pIcode->ll()->immed.op));
out->addTaggedString(XT_AsmLabel,strHex(inst.src().getImm2()));
}
else if (inst.srcIsImmed() )
{
int64_t tgt_addr = inst.src().getImm2();
if (inst.getOpcode() == iJMPF)
{
out->addTaggedString(XT_Keyword," far ptr ");
}
out->addTaggedString(XT_AsmLabel,QString("L_%1").arg(strHex(tgt_addr)));
}
else if (inst.getOpcode() == iJMPF)
{
out->addTaggedString(XT_Keyword,"dword ptr");
formatter.visitOperand(inst.src());
}
else
{
formatter.visitOperand(inst.src());
}
break;
case iCALL: case iCALLF:
if (inst.srcIsImmed())
{
out->addTaggedString(XT_Keyword,QString("%1 ptr ").arg((inst.getOpcode() == iCALL) ? "near" : "far"));
out->addTaggedString(XT_AsmLabel,(inst.src().proc.proc)->name);
}
else if (inst.getOpcode() == iCALLF)
{
out->addTaggedString(XT_Keyword,"dword ptr ");
formatter.visitOperand(inst.src());
}
else
formatter.visitOperand(inst.src());
break;
case iENTER:
formatter.visitOperand(inst.dst());
formatter.visitOperand(inst.src());
break;
case iRET:
case iRETF:
case iINT:
formatter.visitOperand(inst.src());
break;
case iCMPS: case iREPNE_CMPS: case iREPE_CMPS:
case iSCAS: case iREPNE_SCAS: case iREPE_SCAS:
case iSTOS: case iREP_STOS:
case iLODS: case iREP_LODS:
case iMOVS: case iREP_MOVS:
case iINS: case iREP_INS:
case iOUTS: case iREP_OUTS:
if (inst.src().segOver)
{
bool is_dx_src=(inst.getOpcode() == iOUTS or inst.getOpcode() == iREP_OUTS);
if(is_dx_src)
{
out->addTaggedString(XT_Symbol,"dx");
out->prtt(", ");
out->addTaggedString(XT_Keyword,szPtr[inst.getFlag() & B]);
out->addSpace(2);
}
else
out->addTaggedString(XT_Keyword,szPtr[inst.getFlag() & B]);
if (inst.getOpcode() == iLODS or
inst.getOpcode() == iREP_LODS or
inst.getOpcode() == iOUTS or
inst.getOpcode() == iREP_OUTS)
{
out->addTaggedString(XT_Symbol,Machine_X86::regName(inst.src().segOver)); // szWreg[src.segOver-rAX]
}
else
{
out->addTaggedString(XT_Symbol,"es:[di]");
out->prtt(", ");
out->addTaggedString(XT_Symbol,Machine_X86::regName(inst.src().segOver));
}
out->addTaggedString(XT_Symbol,":[si]");
}
else
{
out->delChars(2); // TODO: this is wonky way of adding instruction suffix
if(inst.getFlag() & B)
out->addTaggedString(XT_Keyword,"B");
else
out->addTaggedString(XT_Keyword,"W");
out->addSpace(2);
}
break;
case iXLAT:
if (inst.src().segOver)
{
out->addTaggedString(XT_Keyword,QString(" ") + szPtr[1]);
out->addTaggedString(XT_Symbol,Machine_X86::regName(inst.src().segOver)+":[bx]");
}
break;
case iIN:
out->addTaggedString(XT_Symbol, (inst.getFlag() & B)? "al" : "ax");
out->prtt(", ");
formatter.visitOperand(inst.src());
break;
case iOUT:
{
formatter.visitOperand(inst.src());
if(inst.srcIsImmed())
out->addTaggedString(XT_Number, strHex(inst.src().getImm2()));
else
out->addTaggedString(XT_Symbol, "dx");
out->prtt(", ");
out->addTaggedString(XT_Symbol, (inst.getFlag() & B)? "al" : "ax");
}
}
out->addEOL();
}