104 Commits

Author SHA1 Message Date
nemerle
3905c4e281 Re-activate unit testing and starting work on proper memory segmentation support
Use Qt testing framework.

Reorganize source file references in CMakeLists.txt

Add simplistic Address header and type ( typedef for now )
2016-05-20 15:44:37 +02:00
nemerle
171abc0415 Remove unused variables. 2016-05-20 10:24:28 +02:00
nemerle
126e206b08 Constify parsehdr error/warning reporting function arguments. 2016-05-20 10:24:01 +02:00
nemerle
7f1a4e26bd Fix struct/class confusion for Function class. 2016-05-20 10:23:00 +02:00
nemerle
8875371cee Merge remote-tracking branch 'origin' into experimental_command_streams 2016-05-20 10:20:51 +02:00
nemerle
3af6f8f5c3 Merge branch 'qt5' into experimental_command_streams 2016-05-20 10:16:59 +02:00
Artur K
e1f0c084f8 Merge pull request #24 from lab313ru/patch-1
Fixed symbol name collision in parsehdr
2016-05-20 10:13:45 +02:00
Lab 313
e2a6b25345 Fixed collision with round math.h function 2016-05-20 11:01:00 +03:00
nemerle
f210ed78c2 Add the last of the original tools dispsig and srchsig
Closes #22
2016-05-19 20:15:37 +02:00
nemerle
ccc8cc526b Add readsig tool to build
As requested in #22
2016-05-19 20:03:49 +02:00
nemerle
cd6797499f Add parsehdr to the build
As requested in #22

(cherry picked from commit d5985b4b97)
2016-05-19 19:52:48 +02:00
nemerle
d5985b4b97 Add parsehdr to the build
As requested in #22
2016-05-19 19:50:47 +02:00
nemerle
b60903306f Fix memset-of-non-POD bug.
PROG contains vector, but was memset.
2016-05-19 16:17:08 +02:00
nemerle
5f68987001 Merge branch 'qt5' into experimental_command_streams 2016-05-19 15:57:52 +02:00
nemerle
1df7cb3be4 Fix msvc detection + prevent range library from pulling in boost::regex 2016-05-19 14:45:03 +02:00
nemerle
a7265f06b0 Fix msvc detection in cmake 2016-05-19 14:14:26 +02:00
nemerle
58532f4402 Removing LLVM dependency contd. 2016-05-19 12:40:59 +02:00
nemerle
7d986ef661 Removing LLVM dependencies WIP 2016-05-19 11:14:46 +02:00
nemerle
7f4197acc1 Use cmake features to mark c++11 as a required standard 2016-05-19 10:44:45 +02:00
nemerle
e71c8051c3 Rename icodeType entries
Following 2f80f16e6f76e3794ddc35ac9a6c1a86cb11cd28
2016-05-19 10:27:15 +02:00
nemerle
73cf949e25 Undefine PASCAL if it's defined as a macro
Following 0af07017b9c2f14435bd0fd103ae199cbf840bb0
2016-05-19 10:20:51 +02:00
nemerle
14ceb301c1 WIP - do not use. 2016-05-19 10:18:17 +02:00
nemerle
bc654cbf76 Fix: bad loader instancfe created for COM files 2016-05-10 14:48:46 +02:00
nemerle
9c6dfd676e Move JMP processing from Function class
Add MarkAsSwitchCase command
2016-05-10 14:02:26 +02:00
nemerle
ae27258e3c Move XCHG and DIV rewriters from Function class
Preliminary work for #19
2016-05-10 13:07:05 +02:00
nemerle
41e9faec0e Start moving parser functionality from Function class.
Add state switching operation to Function interface
2016-05-10 12:36:31 +02:00
nemerle
6f7bfbddf8 Fix: initialize initial state for start proc if no main was found 2016-05-10 10:46:23 +02:00
nemerle
db39014e1b A few more skeleton things for planner 2016-05-09 16:05:15 +02:00
nemerle
3376818a17 fix previous commit
Support simple character deletion operations in StructuredTextTarget.h
to support opcode suffix modification.
2016-05-09 11:53:03 +02:00
nemerle
72ca6bbb70 disassembler: more instructions output into structuredtext 2016-05-09 11:35:30 +02:00
nemerle
888de8d35e More tabs replaced with spaces 2016-05-06 15:57:15 +02:00
nemerle
59c199837c Replace some tabs with spaces 2016-05-06 15:04:45 +02:00
nemerle
152625d67d Extend responsibilities of FunctionViewWidget
Reduced IDcc interface functionality.

Fix a few bugs/warnings discovered by coverity.

Emit functionUpdate signal from project when function object internal
fields change.
2016-05-06 14:40:33 +02:00
nemerle
292e4041e1 Initialize some class attributes. 2016-05-06 00:26:52 +02:00
nemerle
36d95946b3 Show JMP targets 2016-05-05 23:55:39 +02:00
nemerle
4cc3b41e64 Use QTextDocument instead of html to build text display
Continued work on rendering disassembly level text.
2016-05-05 16:06:06 +02:00
nemerle
6ade935e37 Function's command queue implemented, flow control scanning starts to
work.
2016-05-05 14:28:25 +02:00
nemerle
c8fd3a01df Add AutomatedPlanner skeleton class 2016-05-04 14:22:28 +02:00
nemerle
29353111ac A few missing returns 2016-05-04 09:54:38 +02:00
nemerle
4dc321650f Add skeletal functions for ICODE -> StructuredText 2016-05-04 00:57:59 +02:00
nemerle
0521206de5 WIP: More GUI work, use shared_ptr to store Function references. 2016-05-03 13:59:14 +02:00
nemerle
60a4fefe95 Fix command queue stepping.
Also add instanceDescription to Command class to allow for more verbose command information reporting
2016-05-01 18:45:22 +02:00
nemerle
0391f67109 Per-Function flag to prevent decompilation/disassembly
For now it just sets PROC_ISLIB flag
2016-05-01 18:42:28 +02:00
nemerle
d22624a99b Missing include 2016-05-01 14:55:06 +02:00
nemerle
95acbaa7fa GUI work. 2016-04-29 15:51:02 +02:00
nemerle
2452984864 Convert more of Frontend processing to command lists. 2016-04-29 12:23:12 +02:00
nemerle
4682bda8d8 if CFG is missing Project::createFunction should create one 2016-04-29 12:22:42 +02:00
nemerle
4a6d97c1b1 Prepare CMakeLists for UI work 2016-04-29 12:21:33 +02:00
nemerle
3d5a907b30 Move Function closer to LLVM interface ( FunctionType etc. )
A few more places are using Commands.
2016-04-28 16:25:58 +02:00
nemerle
0684062130 Fix full_regression.sh mkdir 2016-04-28 13:13:35 +02:00
nemerle
62d8633113 Implementation 2016-04-26 16:18:23 +02:00
nemerle
b2be1cf2da Simplify: SetupLibCheck does not depend on PROG or Project classes.
It also does not exit the program when ".sig" loader fails.

Use QString to build signature filename.
2016-04-26 13:42:41 +02:00
nemerle
94e3016a5b Fix SetCurFunc_by_Name implementation 2016-04-26 13:36:57 +02:00
nemerle
145a50369e Fix: Jump labels were printed incorrectly. 2016-04-26 13:35:20 +02:00
Vladimir Kryvian
d77927c608 Added new line for "removeRegFromLong not supproted" message in icode.h. 2016-04-26 10:06:00 +02:00
Vladimir Kryvian
3bb72987a6 Fixed double import with correct one in makedsig. 2016-04-26 10:05:10 +02:00
nemerle
c782892db4 Prevent the use of msvc's min/max macros 2016-04-26 09:26:28 +02:00
nemerle
a944ea5da8 Implement some of the methods in DccImpl 2016-04-26 09:23:34 +02:00
nemerle
d1738ea630 New feature: option to decompile only a specific function.
Similar to boomerang's -E option:

```
dcc -E 0x1222 ./TARGET.EXE
```

Will only decompile function at given address.
This might help in isolating dcc crashes.
2016-04-26 00:46:56 +02:00
nemerle
5f39236ba2 Fix LOCAL_ID::newLongIdx 2016-04-26 00:27:49 +02:00
nemerle
ede09ddae3 Record native function address as part of the name.
Might help while debugging dcc's handling of large programs.
2016-04-25 16:08:51 +02:00
nemerle
34b1f4f4fe Fix: long processing regression introduced in
bb007ddefc

Invalidated instructions should still be processed in findBBExps
2016-04-25 16:02:58 +02:00
nemerle
d6249916e1 More logic operator replacements.
Use Qt string classes.
2016-04-25 15:51:58 +02:00
nemerle
9cd3226536 Normalize logic operation keywords and add use msvc fix
Logical or should be only 'or','and','not', and not error prone
'||','&&','!'
2016-04-25 11:39:07 +02:00
nemerle
3f217e83da Add header that will contain msvc fixes 2016-04-25 10:08:25 +02:00
nemerle
652cfb67c3 Add simple gitattributes 2016-04-25 10:03:55 +02:00
nemerle
c0e9ba2fb3 Add addOutEdgesForConditionalJump to header 2016-04-25 10:03:30 +02:00
nemerle
5963f5fd4d Thanks to @lab313ru : fix bad iterator usage
Trying to increment past the end in graph.cpp

Also removed a goto by extracting a common function.
2016-04-24 12:22:15 +02:00
nemerle
12ee08f87e Implement two new switch idioms closes #14 2016-04-23 20:05:11 +02:00
nemerle
5c85c92d1a Replace tabs with spaces 2016-04-22 11:45:23 +02:00
nemerle
b509d0fcf0 Extend disassembly failure reporting a bit. 2016-04-22 10:47:14 +02:00
nemerle
bb007ddefc Stop processing invalidated instructions in dataflow 2016-04-22 10:36:29 +02:00
nemerle
9129d48429 Comment out a debugging printf 2016-04-22 10:35:41 +02:00
nemerle
d105182051 Add missing \n to error printf 2016-04-22 10:35:14 +02:00
nemerle
157a968372 Assume TYPE_STR is returned in exactly the same way as TYPE_PTR is 2016-04-22 10:34:36 +02:00
Artur K
bae2a582f1 Merge pull request #11 from lab313ru/lab313ru-patch-1
Lab313ru patch 1
2016-04-22 09:19:02 +02:00
Lab 313
19191876e2 Update libdis.h
Fixed negative address getting.
2016-04-22 02:37:22 +03:00
Lab 313
fcfe3c1f4b Update scanner.cpp
Fixed negative address calculating.
2016-04-22 02:35:40 +03:00
nemerle
97f093feaa This build requires LLVM, does not need ncurses - modify CMakeLists.txt to match 2016-02-13 15:14:14 +01:00
Artur K.
3561de6e12 Merge pull request #5 from Arthur2e5/patch-1
README: Recognizing code segments
2015-10-20 06:18:10 +00:00
Mingye Wang
e84d09b97c README: Recognizing code segments 2015-10-20 01:15:59 -04:00
Artur K.
d8a4fe1c04 Merge pull request #4 from Arthur2e5/patch-1
README: tweak formatting by a bit
2015-10-20 05:15:49 +00:00
Mingye Wang
e4e6ad6415 README: tweak formatting by a bit
Trying to get a nice balance between Markdown rendering and plain text readability. And I think I got it.
2015-10-20 01:11:34 -04:00
nemerle
2543617930 Remove llvm as a build requirement 2015-08-13 20:46:54 +02:00
nemerle
bc5784a8f2 Fix #1 - just use QFileInfo. 2015-05-28 15:13:43 +02:00
Artur K
842687726f Update the dcc tools code 2015-04-28 14:59:00 +02:00
nemerle
c5c9196561 Fix for functional tests when running on clean checkout 2015-02-10 17:31:57 +01:00
nemerle
a697ad05c0 Add original dcc tools to repository
* makedsig has been integrated with makedstp, it should handle both LIB and TPL files
* other tools have not been modified
2015-02-10 17:28:50 +01:00
Artur K.
d8c66e7791 Update Readme.md 2014-06-05 15:01:12 +02:00
nemerle
337a6c44aa Added original readme 2014-05-25 12:36:39 +02:00
nemerle
cde4484821 Remove unused local 2014-05-25 12:33:18 +02:00
nemerle
36b063c183 Working towards gui integration with exetoc_qt 2014-05-24 17:08:05 +02:00
nemerle
3603877f42 Qt5 command options processing 2014-03-07 20:01:36 +01:00
nemerle
50950028e0 Pre-qt5 2014-03-07 19:42:27 +01:00
nemerle
1c5e1c2fce replace boolT with plain old bool in a few places 2014-02-28 11:26:02 +01:00
nemerle
5c7799b778 Const fixes and name updates for libdis.h 2014-02-28 11:24:09 +01:00
Artur K
0209b7ceb2 Changes 2012-07-20 18:18:25 +02:00
Artur K
f6118dc0c4 Fixes to libdisasm, also use it a bit more 2012-07-19 19:50:34 +02:00
Artur K
d5e1fc733f Fixes to libdisasm, also use it a bit more 2012-07-19 19:37:30 +02:00
Artur K
c1eb8df114 Split COND_EXPR into Unary/Binary/AstIdent subclasses 2012-07-16 19:31:29 +02:00
Artur K
ca129c5177 Fix to idiom19 and fixFloatEmulation() 2012-07-15 20:17:16 +02:00
Artur K
c19231a1bd extracted FunctionCfg as it's own class 2012-07-15 16:52:59 +02:00
Artur K
5087a051b5 More simplifications on BB creation 2012-07-14 23:04:09 +02:00
Artur K
ba110a64cb removed most of clang warnings / errors 2012-03-29 22:02:25 +02:00
181 changed files with 25152 additions and 9148 deletions

6
.gitattributes vendored Normal file
View File

@@ -0,0 +1,6 @@
* text=auto
*.c text
*.cpp text
*.ui text
*.qrc text
*.h text

2
.gitignore vendored
View File

@@ -5,3 +5,5 @@ tests/outputs/*
tests/errors
*.autosave
bld*
*.user
*.idb

68
3rd_party/libdisasm/INTEL_BUGS vendored Normal file
View File

@@ -0,0 +1,68 @@
PMOVMSKB
Gd, Pq1H
PMOVMSKB
(66)
Gd, Vdq1H
should be
PMOVMSKB
Gd, Qq1H
PMOVMSKB
(66)
Gd, Wdq1H
The instruction represented by this opcode expression does not support any
operand to be a memory location.
MASKMOVQ
Pq, Pq1H
MASKMOVDQU
(66)
Vdq, Vdq1H
should be
MASKMOVQ
Pq, Pq1H
MASKMOVDQU
(66)
Vdq, Wdq1H
MOVMSKPS
Gd, Vps1H
MOVMSKPD
(66)
Gd, Vpd1H
should be
MOVMSKPS
Gd, Wps1H
MOVMSKPD
(66)
Gd, Wpd1H
The opcode table entries for LFS, LGS, and LSS
L[FGS]S
Mp
should be
L[FGS]S
Gv,Mp
MOVHLPS
Vps, Vps
MOVLHPS
Vps, Vps
should be
MOVHLPS
Vps, Wps
MOVLHPS
Vps, Wps

137
3rd_party/libdisasm/LICENSE vendored Normal file
View File

@@ -0,0 +1,137 @@
The "Clarified Artistic License"
Preamble
The intent of this document is to state the conditions under which a
Package may be copied, such that the Copyright Holder maintains some
semblance of artistic control over the development of the package,
while giving the users of the package the right to use and distribute
the Package in a more-or-less customary fashion, plus the right to make
reasonable modifications.
Definitions:
"Package" refers to the collection of files distributed by the
Copyright Holder, and derivatives of that collection of files
created through textual modification.
"Standard Version" refers to such a Package if it has not been
modified, or has been modified in accordance with the wishes
of the Copyright Holder as specified below.
"Copyright Holder" is whoever is named in the copyright or
copyrights for the package.
"You" is you, if you're thinking about copying or distributing
this Package.
"Distribution fee" is a fee you charge for providing a copy of this
Package to another party.
"Freely Available" means that no fee is charged for the right to use
the item, though there may be fees involved in handling the item.
1. You may make and give away verbatim copies of the source form of the
Standard Version of this Package without restriction, provided that you
duplicate all of the original copyright notices and associated disclaimers.
2. You may apply bug fixes, portability fixes and other modifications
derived from the Public Domain, or those made Freely Available, or from
the Copyright Holder. A Package modified in such a way shall still be
considered the Standard Version.
3. You may otherwise modify your copy of this Package in any way, provided
that you insert a prominent notice in each changed file stating how and
when you changed that file, and provided that you do at least ONE of the
following:
a) place your modifications in the Public Domain or otherwise make them
Freely Available, such as by posting said modifications to Usenet or
an equivalent medium, or placing the modifications on a major archive
site allowing unrestricted access to them, or by allowing the Copyright
Holder to include your modifications in the Standard Version of the
Package.
b) use the modified Package only within your corporation or organization.
c) rename any non-standard executables so the names do not conflict
with standard executables, which must also be provided, and provide
a separate manual page for each non-standard executable that clearly
documents how it differs from the Standard Version.
d) make other distribution arrangements with the Copyright Holder.
e) permit and encourge anyone who receives a copy of the modified Package
permission to make your modifications Freely Available in some specific
way.
4. You may distribute the programs of this Package in object code or
executable form, provided that you do at least ONE of the following:
a) distribute a Standard Version of the executables and library files,
together with instructions (in the manual page or equivalent) on where
to get the Standard Version.
b) accompany the distribution with the machine-readable source of
the Package with your modifications.
c) give non-standard executables non-standard names, and clearly
document the differences in manual pages (or equivalent), together
with instructions on where to get the Standard Version.
d) make other distribution arrangements with the Copyright Holder.
e) offer the machine-readable source of the Package, with your
modifications, by mail order.
5. You may charge a distribution fee for any distribution of this Package.
If you offer support for this Package, you may charge any fee you choose
for that support. You may not charge a license fee for the right to use
this Package itself. You may distribute this Package in aggregate with
other (possibly commercial and possibly nonfree) programs as part of a
larger (possibly commercial and possibly nonfree) software distribution,
and charge license fees for other parts of that software distribution,
provided that you do not advertise this Package as a product of your own.
If the Package includes an interpreter, You may embed this Package's
interpreter within an executable of yours (by linking); this shall be
construed as a mere form of aggregation, provided that the complete
Standard Version of the interpreter is so embedded.
6. The scripts and library files supplied as input to or produced as
output from the programs of this Package do not automatically fall
under the copyright of this Package, but belong to whoever generated
them, and may be sold commercially, and may be aggregated with this
Package. If such scripts or library files are aggregated with this
Package via the so-called "undump" or "unexec" methods of producing a
binary executable image, then distribution of such an image shall
neither be construed as a distribution of this Package nor shall it
fall under the restrictions of Paragraphs 3 and 4, provided that you do
not represent such an executable image as a Standard Version of this
Package.
7. C subroutines (or comparably compiled subroutines in other
languages) supplied by you and linked into this Package in order to
emulate subroutines and variables of the language defined by this
Package shall not be considered part of this Package, but are the
equivalent of input as in Paragraph 6, provided these subroutines do
not change the language in any way that would cause it to fail the
regression tests for the language.
8. Aggregation of the Standard Version of the Package with a commercial
distribution is always permitted provided that the use of this Package is
embedded; that is, when no overt attempt is made to make this Package's
interfaces visible to the end user of the commercial distribution.
Such use shall not be construed as a distribution of this Package.
9. The name of the Copyright Holder may not be used to endorse or promote
products derived from this software without specific prior written permission.
10. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
The End

12
3rd_party/libdisasm/NAMESPACE.TXT vendored Normal file
View File

@@ -0,0 +1,12 @@
The rewritten libdisasm code uses the following namespaces:
Prefix Namespace
----------------------------------------------------
x86_ Global 'libdisasm' namespace
ia32_ Internal IA32 ISA namespace
ia64_ Internal IA64 ISA namespace
ix64_ Internal X86-64 ISA namespace
Note that the 64-bit ISAs are not yet supported/written.

2
3rd_party/libdisasm/README vendored Normal file
View File

@@ -0,0 +1,2 @@
This is a cut-up version of libdisasm originally from the bastard project http://bastard.sourceforge.net/

43
3rd_party/libdisasm/TODO vendored Normal file
View File

@@ -0,0 +1,43 @@
x86_format.c
------------
intel: jmpf -> jmp, callf -> call
att: jmpf -> ljmp, callf -> lcall
opcode table
------------
finish typing instructions
fix flag clear/set/toggle types
ix64 stuff
----------
document output file formats in web page
features doc: register aliases, implicit operands, stack mods,
ring0 flags, eflags, cpu model/isa
ia32_handle_* implementation
fix operand 0F C2
CMPPS
* sysenter, sysexit as CALL types -- preceded by MSR writes
* SYSENTER/SYSEXIT stack : overwrites SS, ESP
* stos, cmps, scas, movs, ins, outs, lods -> OP_PTR
* OP_SIZE in implicit operands
* use OP_SIZE to choose reg sizes!
DONE?? :
implicit operands: provide action ?
e.g. add/inc for stach, write, etc
replace table numbers in opcodes.dat with
#defines for table names
replace 0 with INSN_INVALID [or maybe FF for imnvalid and 00 for Not Applicable */
no wait that is only for prefix tables -- n/p
if ( prefx) only use if insn != invalid
these should cover all the wacky disasm exceptions
for the rep one we can chet, match only a 0x90
todo: privilege | ring

36
3rd_party/libdisasm/ia32_fixup.cpp vendored Normal file
View File

@@ -0,0 +1,36 @@
#include <stdio.h>
static const char * mem_fixup[256] = {
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 00 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 08 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 10 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 18 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 20 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 28 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 30 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 38 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 40 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 48 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 50 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 58 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 60 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 68 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 70 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 78 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 80 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 88 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 90 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 98 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* A0 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* A8 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* B0 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* B8 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* C0 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* C8 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* D0 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* D8 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* E0 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* E8 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* F0 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL /* F8 */
};

View File

@@ -20,81 +20,81 @@ typedef struct {
static op_implicit_list_t list_aaa[] =
/* 37 : AAA : rw AL */
/* 3F : AAS : rw AL */
{{ OP_R | OP_W, REG_BYTE_OFFSET }, {0}}; /* aaa */
{{ OP_R | OP_W, REG_BYTE_OFFSET }, {0,0}}; /* aaa */
static op_implicit_list_t list_aad[] =
/* D5 0A, D5 (ib) : AAD : rw AX */
/* D4 0A, D4 (ib) : AAM : rw AX */
{{ OP_R | OP_W, REG_WORD_OFFSET }, {0}}; /* aad */
{{ OP_R | OP_W, REG_WORD_OFFSET }, {0,0}}; /* aad */
static op_implicit_list_t list_call[] =
/* E8, FF, 9A, FF : CALL : rw ESP, rw EIP */
/* C2, C3, CA, CB : RET : rw ESP, rw EIP */
{{ OP_R | OP_W, REG_EIP_INDEX },
{ OP_R | OP_W, REG_ESP_INDEX }, {0}}; /* call, ret */
{ OP_R | OP_W, REG_ESP_INDEX }, {0,0}}; /* call, ret */
static op_implicit_list_t list_cbw[] =
/* 98 : CBW : r AL, rw AX */
{{ OP_R | OP_W, REG_WORD_OFFSET },
{ OP_R, REG_BYTE_OFFSET}, {0}}; /* cbw */
{ OP_R, REG_BYTE_OFFSET}, {0,0}}; /* cbw */
static op_implicit_list_t list_cwde[] =
/* 98 : CWDE : r AX, rw EAX */
{{ OP_R | OP_W, REG_DWORD_OFFSET },
{ OP_R, REG_WORD_OFFSET }, {0}}; /* cwde */
{ OP_R, REG_WORD_OFFSET }, {0,0}}; /* cwde */
static op_implicit_list_t list_clts[] =
/* 0F 06 : CLTS : rw CR0 */
{{ OP_R | OP_W, REG_CTRL_OFFSET}, {0}}; /* clts */
{{ OP_R | OP_W, REG_CTRL_OFFSET}, {0,0}}; /* clts */
static op_implicit_list_t list_cmpxchg[] =
/* 0F B0 : CMPXCHG : rw AL */
{{ OP_R | OP_W, REG_BYTE_OFFSET }, {0}}; /* cmpxchg */
{{ OP_R | OP_W, REG_BYTE_OFFSET }, {0,0}}; /* cmpxchg */
static op_implicit_list_t list_cmpxchgb[] =
/* 0F B1 : CMPXCHG : rw EAX */
{{ OP_R | OP_W, REG_DWORD_OFFSET }, {0}}; /* cmpxchg */
{{ OP_R | OP_W, REG_DWORD_OFFSET }, {0,0}}; /* cmpxchg */
static op_implicit_list_t list_cmpxchg8b[] =
/* 0F C7 : CMPXCHG8B : rw EDX, rw EAX, r ECX, r EBX */
{{ OP_R | OP_W, REG_DWORD_OFFSET },
{ OP_R | OP_W, REG_DWORD_OFFSET + 2 },
{ OP_R, REG_DWORD_OFFSET + 1 },
{ OP_R, REG_DWORD_OFFSET + 3 }, {0}}; /* cmpxchg8b */
{ OP_R, REG_DWORD_OFFSET + 3 }, {0,0}}; /* cmpxchg8b */
static op_implicit_list_t list_cpuid[] =
/* 0F A2 : CPUID : rw EAX, w EBX, w ECX, w EDX */
{{ OP_R | OP_W, REG_DWORD_OFFSET },
{ OP_W, REG_DWORD_OFFSET + 1 },
{ OP_W, REG_DWORD_OFFSET + 2 },
{ OP_W, REG_DWORD_OFFSET + 3 }, {0}}; /* cpuid */
{ OP_W, REG_DWORD_OFFSET + 3 }, {0,0}}; /* cpuid */
static op_implicit_list_t list_cwd[] =
/* 99 : CWD/CWQ : rw EAX, w EDX */
{{ OP_R | OP_W, REG_DWORD_OFFSET },
{ OP_W, REG_DWORD_OFFSET + 2 }, {0}}; /* cwd */
{ OP_W, REG_DWORD_OFFSET + 2 }, {0,0}}; /* cwd */
static op_implicit_list_t list_daa[] =
/* 27 : DAA : rw AL */
/* 2F : DAS : rw AL */
{{ OP_R | OP_W, REG_BYTE_OFFSET }, {0}}; /* daa */
{{ OP_R | OP_W, REG_BYTE_OFFSET }, {0,0}}; /* daa */
static op_implicit_list_t list_idiv[] =
/* F6 : DIV, IDIV : r AX, w AL, w AH */
/* FIXED: first op was EAX, not Aw. TODO: verify! */
{{ OP_R, REG_WORD_OFFSET },
{ OP_W, REG_BYTE_OFFSET },
{ OP_W, REG_BYTE_OFFSET + 4 }, {0}}; /* div */
{ OP_W, REG_BYTE_OFFSET + 4 }, {0,0}}; /* div */
static op_implicit_list_t list_div[] =
/* F7 : DIV, IDIV : rw EDX, rw EAX */
{{ OP_R | OP_W, REG_DWORD_OFFSET + 2 },
{ OP_R | OP_W, REG_DWORD_OFFSET }, {0}}; /* div */
{ OP_R | OP_W, REG_DWORD_OFFSET }, {0,0}}; /* div */
static op_implicit_list_t list_enter[] =
/* C8 : ENTER : rw ESP w EBP */
{{ OP_R | OP_W, REG_DWORD_OFFSET + 4 },
{ OP_R, REG_DWORD_OFFSET + 5 }, {0}}; /* enter */
{ OP_R, REG_DWORD_OFFSET + 5 }, {0,0}}; /* enter */
static op_implicit_list_t list_f2xm1[] =
/* D9 F0 : F2XM1 : rw ST(0) */
@@ -109,7 +109,7 @@ static op_implicit_list_t list_f2xm1[] =
/* D9 FE : FSIN : rw ST(0) */
/* D9 FA : FSQRT : rw ST(0) */
/* D9 F4 : FXTRACT : rw ST(0) */
{{ OP_R | OP_W, REG_FPU_OFFSET }, {0}}; /* f2xm1 */
{{ OP_R | OP_W, REG_FPU_OFFSET }, {0,0}}; /* f2xm1 */
static op_implicit_list_t list_fcom[] =
/* D8, DC, DE D9 : FCOM : r ST(0) */
@@ -117,17 +117,17 @@ static op_implicit_list_t list_fcom[] =
/* DF, D8 : FIST : r ST(0) */
/* D9 E4 : FTST : r ST(0) */
/* D9 E5 : FXAM : r ST(0) */
{{ OP_R, REG_FPU_OFFSET }, {0}}; /* fcom */
{{ OP_R, REG_FPU_OFFSET }, {0,0}}; /* fcom */
static op_implicit_list_t list_fpatan[] =
/* D9 F3 : FPATAN : r ST(0), rw ST(1) */
{{ OP_R, REG_FPU_OFFSET }, {0}}; /* fpatan */
{{ OP_R, REG_FPU_OFFSET }, {0,0}}; /* fpatan */
static op_implicit_list_t list_fprem[] =
/* D9 F8, D9 F5 : FPREM : rw ST(0) r ST(1) */
/* D9 FD : FSCALE : rw ST(0), r ST(1) */
{{ OP_R | OP_W, REG_FPU_OFFSET },
{ OP_R, REG_FPU_OFFSET + 1 }, {0}}; /* fprem */
{ OP_R, REG_FPU_OFFSET + 1 }, {0,0}}; /* fprem */
static op_implicit_list_t list_faddp[] =
/* DE C1 : FADDP : r ST(0), rw ST(1) */
@@ -135,67 +135,67 @@ static op_implicit_list_t list_faddp[] =
/* D9 F1 : FYL2X : r ST(0), rw ST(1) */
/* D9 F9 : FYL2XP1 : r ST(0), rw ST(1) */
{{ OP_R, REG_FPU_OFFSET },
{ OP_R | OP_W, REG_FPU_OFFSET + 1 }, {0}}; /* faddp */
{ OP_R | OP_W, REG_FPU_OFFSET + 1 }, {0,0}}; /* faddp */
static op_implicit_list_t list_fucompp[] =
/* DA E9 : FUCOMPP : r ST(0), r ST(1) */
{{ OP_R, REG_FPU_OFFSET },
{ OP_R, REG_FPU_OFFSET + 1 }, {0}}; /* fucompp */
{ OP_R, REG_FPU_OFFSET + 1 }, {0,0}}; /* fucompp */
static op_implicit_list_t list_imul[] =
/* F6 : IMUL : r AL, w AX */
/* F6 : MUL : r AL, w AX */
{{ OP_R, REG_BYTE_OFFSET },
{ OP_W, REG_WORD_OFFSET }, {0}}; /* imul */
{ OP_W, REG_WORD_OFFSET }, {0,0}}; /* imul */
static op_implicit_list_t list_mul[] =
/* F7 : IMUL : rw EAX, w EDX */
/* F7 : MUL : rw EAX, w EDX */
{{ OP_R | OP_W, REG_DWORD_OFFSET },
{ OP_W, REG_DWORD_OFFSET + 2 }, {0}}; /* imul */
{ OP_W, REG_DWORD_OFFSET + 2 }, {0,0}}; /* imul */
static op_implicit_list_t list_lahf[] =
/* 9F : LAHF : r EFLAGS, w AH */
{{ OP_R, REG_FLAGS_INDEX },
{ OP_W, REG_BYTE_OFFSET + 4 }, {0}}; /* lahf */
{ OP_W, REG_BYTE_OFFSET + 4 }, {0,0}}; /* lahf */
static op_implicit_list_t list_ldmxcsr[] =
/* 0F AE : LDMXCSR : w MXCSR SSE Control Status Reg */
{{ OP_W, REG_MXCSG_INDEX }, {0}}; /* ldmxcsr */
{{ OP_W, REG_MXCSG_INDEX }, {0,0}}; /* ldmxcsr */
static op_implicit_list_t list_leave[] =
/* C9 : LEAVE : rw ESP, w EBP */
{{ OP_R | OP_W, REG_ESP_INDEX },
{ OP_W, REG_DWORD_OFFSET + 5 }, {0}}; /* leave */
{ OP_W, REG_DWORD_OFFSET + 5 }, {0,0}}; /* leave */
static op_implicit_list_t list_lgdt[] =
/* 0F 01 : LGDT : w GDTR */
{{ OP_W, REG_GDTR_INDEX }, {0}}; /* lgdt */
{{ OP_W, REG_GDTR_INDEX }, {0,0}}; /* lgdt */
static op_implicit_list_t list_lidt[] =
/* 0F 01 : LIDT : w IDTR */
{{ OP_W, REG_IDTR_INDEX }, {0}}; /* lidt */
{{ OP_W, REG_IDTR_INDEX }, {0,0}}; /* lidt */
static op_implicit_list_t list_lldt[] =
/* 0F 00 : LLDT : w LDTR */
{{ OP_W, REG_LDTR_INDEX }, {0}}; /* lldt */
{{ OP_W, REG_LDTR_INDEX }, {0,0}}; /* lldt */
static op_implicit_list_t list_lmsw[] =
/* 0F 01 : LMSW : w CR0 */
{{ OP_W, REG_CTRL_OFFSET }, {0}}; /* lmsw */
{{ OP_W, REG_CTRL_OFFSET }, {0,0}}; /* lmsw */
static op_implicit_list_t list_loop[] =
/* E0, E1, E2 : LOOP : rw ECX */
{{ OP_R | OP_W, REG_DWORD_OFFSET + 1 }, {0}};/* loop */
{{ OP_R | OP_W, REG_DWORD_OFFSET + 1 }, {0,0}};/* loop */
static op_implicit_list_t list_ltr[] =
/* 0F 00 : LTR : w Task Register */
{{ OP_W, REG_TR_INDEX }, {0}}; /* ltr */
{{ OP_W, REG_TR_INDEX }, {0,0}}; /* ltr */
static op_implicit_list_t list_pop[] =
/* 8F, 58, 1F, 07, 17, 0F A1, 0F A9 : POP : rw ESP */
/* FF, 50, 6A, 68, 0E, 16, 1E, 06, 0F A0, 0F A8 : PUSH : rw ESP */
{{ OP_R | OP_W, REG_ESP_INDEX }, {0}}; /* pop, push */
{{ OP_R | OP_W, REG_ESP_INDEX }, {0,0}}; /* pop, push */
static op_implicit_list_t list_popad[] =
/* 61 : POPAD : rw esp, w edi esi ebp ebx edx ecx eax */
@@ -206,12 +206,12 @@ static op_implicit_list_t list_popad[] =
{ OP_W, REG_DWORD_OFFSET + 3 },
{ OP_W, REG_DWORD_OFFSET + 2 },
{ OP_W, REG_DWORD_OFFSET + 1 },
{ OP_W, REG_DWORD_OFFSET }, {0}}; /* popad */
{ OP_W, REG_DWORD_OFFSET }, {0,0}}; /* popad */
static op_implicit_list_t list_popfd[] =
/* 9D : POPFD : rw esp, w eflags */
{{ OP_R | OP_W, REG_ESP_INDEX },
{ OP_W, REG_FLAGS_INDEX }, {0}}; /* popfd */
{ OP_W, REG_FLAGS_INDEX }, {0,0}}; /* popfd */
static op_implicit_list_t list_pushad[] =
/* FF, 50, 6A, 68, 0E, 16, 1E, 06, 0F A0, 0F A8 : PUSH : rw ESP */
@@ -223,102 +223,102 @@ static op_implicit_list_t list_pushad[] =
{ OP_R, REG_DWORD_OFFSET + 3 },
{ OP_R, REG_DWORD_OFFSET + 5 },
{ OP_R, REG_DWORD_OFFSET + 6 },
{ OP_R, REG_DWORD_OFFSET + 7 }, {0}}; /* pushad */
{ OP_R, REG_DWORD_OFFSET + 7 }, {0,0}}; /* pushad */
static op_implicit_list_t list_pushfd[] =
/* 9C : PUSHFD : rw esp, r eflags */
{{ OP_R | OP_W, REG_ESP_INDEX },
{ OP_R, REG_FLAGS_INDEX }, {0}}; /* pushfd */
{ OP_R, REG_FLAGS_INDEX }, {0,0}}; /* pushfd */
static op_implicit_list_t list_rdmsr[] =
/* 0F 32 : RDMSR : r ECX, w EDX, w EAX */
{{ OP_R, REG_DWORD_OFFSET + 1 },
{ OP_W, REG_DWORD_OFFSET + 2 },
{ OP_W, REG_DWORD_OFFSET }, {0}}; /* rdmsr */
{ OP_W, REG_DWORD_OFFSET }, {0,0}}; /* rdmsr */
static op_implicit_list_t list_rdpmc[] =
/* 0F 33 : RDPMC : r ECX, w EDX, w EAX */
{{ OP_R, REG_DWORD_OFFSET + 1 },
{ OP_W, REG_DWORD_OFFSET + 2 },
{ OP_W, REG_DWORD_OFFSET }, {0}}; /* rdpmc */
{ OP_W, REG_DWORD_OFFSET }, {0,0}}; /* rdpmc */
static op_implicit_list_t list_rdtsc[] =
/* 0F 31 : RDTSC : rw EDX, rw EAX */
{{ OP_R | OP_W, REG_DWORD_OFFSET + 2 },
{ OP_R | OP_W, REG_DWORD_OFFSET }, {0}}; /* rdtsc */
{ OP_R | OP_W, REG_DWORD_OFFSET }, {0,0}}; /* rdtsc */
static op_implicit_list_t list_rep[] =
/* F3, F2 ... : REP : rw ECX */
{{ OP_R | OP_W, REG_DWORD_OFFSET + 1 }, {0}};/* rep */
{{ OP_R | OP_W, REG_DWORD_OFFSET + 1 }, {0,0}};/* rep */
static op_implicit_list_t list_rsm[] =
/* 0F AA : RSM : r CR4, r CR0 */
{{ OP_R, REG_CTRL_OFFSET + 4 },
{ OP_R, REG_CTRL_OFFSET }, {0}}; /* rsm */
{ OP_R, REG_CTRL_OFFSET }, {0,0}}; /* rsm */
static op_implicit_list_t list_sahf[] =
/* 9E : SAHF : r ah, rw eflags (set SF ZF AF PF CF) */
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* sahf */
{{ OP_R, REG_DWORD_OFFSET }, {0,0}}; /* sahf */
static op_implicit_list_t list_sgdt[] =
/* 0F : SGDT : r gdtr */
/* TODO: finish this! */
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* sgdt */
{{ OP_R, REG_DWORD_OFFSET }, {0,0}}; /* sgdt */
static op_implicit_list_t list_sidt[] =
/* 0F : SIDT : r idtr */
/* TODO: finish this! */
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* sidt */
{{ OP_R, REG_DWORD_OFFSET }, {0,0}}; /* sidt */
static op_implicit_list_t list_sldt[] =
/* 0F : SLDT : r ldtr */
/* TODO: finish this! */
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* sldt */
{{ OP_R, REG_DWORD_OFFSET }, {0,0}}; /* sldt */
static op_implicit_list_t list_smsw[] =
/* 0F : SMSW : r CR0 */
/* TODO: finish this! */
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* smsw */
{{ OP_R, REG_DWORD_OFFSET }, {0,0}}; /* smsw */
static op_implicit_list_t list_stmxcsr[] =
/* 0F AE : STMXCSR : r MXCSR */
/* TODO: finish this! */
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* stmxcsr */
{{ OP_R, REG_DWORD_OFFSET }, {0,0}}; /* stmxcsr */
static op_implicit_list_t list_str[] =
/* 0F 00 : STR : r TR (task register) */
/* TODO: finish this! */
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* str */
{{ OP_R, REG_DWORD_OFFSET }, {0,0}}; /* str */
static op_implicit_list_t list_sysenter[] =
/* 0F 34 : SYSENTER : w cs, w eip, w ss, w esp, r CR0, w eflags
* r sysenter_cs_msr, sysenter_esp_msr, sysenter_eip_msr */
/* TODO: finish this! */
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* sysenter */
{{ OP_R, REG_DWORD_OFFSET }, {0,0}}; /* sysenter */
static op_implicit_list_t list_sysexit[] =
/* 0F 35 : SYSEXIT : r edx, r ecx, w cs, w eip, w ss, w esp
* r sysenter_cs_msr */
/* TODO: finish this! */
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* sysexit */
{{ OP_R, REG_DWORD_OFFSET }, {0,0}}; /* sysexit */
static op_implicit_list_t list_wrmsr[] =
/* 0F 30 : WRMST : r edx, r eax, r ecx */
/* TODO: finish this! */
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* wrmsr */
{{ OP_R, REG_DWORD_OFFSET }, {0,0}}; /* wrmsr */
static op_implicit_list_t list_xlat[] =
/* D7 : XLAT : rw al r ebx (ptr) */
/* TODO: finish this! */
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* xlat */
{{ OP_R, REG_DWORD_OFFSET }, {0,0}}; /* xlat */
/* TODO:
* monitor 0f 01 c8 eax OP_R ecx OP_R edx OP_R
* mwait 0f 01 c9 eax OP_R ecx OP_R
*/
static op_implicit_list_t list_monitor[] =
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* monitor */
{{ OP_R, REG_DWORD_OFFSET }, {0,0}}; /* monitor */
static op_implicit_list_t list_mwait[] =
{{ OP_R, REG_DWORD_OFFSET }, {0}}; /* mwait */
{{ OP_R, REG_DWORD_OFFSET }, {0,0}}; /* mwait */
op_implicit_list_t *op_implicit_list[] = {
/* This is a list of implicit operands which are read/written by
@@ -407,6 +407,20 @@ unsigned int Ia32_Decoder::ia32_insn_implicit_ops( unsigned int impl_idx ) {
if (!op) {
op = m_decoded->x86_operand_new();
/* all implicit operands are registers */
if(m_decoded->addr_size==2)
{
if(list->operand==REG_EIP_INDEX)
handle_impl_reg( op, REG_IP_INDEX );
else if(list->operand<REG_WORD_OFFSET)
{
handle_impl_reg( op, (list->operand-REG_DWORD_OFFSET)+REG_WORD_OFFSET);
assert((list->operand-REG_DWORD_OFFSET)<REG_WORD_OFFSET-REG_DWORD_OFFSET);
}
else
handle_impl_reg( op, list->operand);
}
else
handle_impl_reg( op, list->operand );
/* decrement the 'explicit count' incremented by default in
* x86_operand_new */

View File

@@ -240,7 +240,7 @@ void Ia32_Decoder::ia32_handle_prefix( unsigned int prefixes ) {
}
static void reg_32_to_16( x86_op_t *op, x86_insn_t *insn, void *arg ) {
static void reg_32_to_16( x86_op_t *op, x86_insn_t */*insn*/, void */*arg*/ ) {
/* if this is a 32-bit register and it is a general register ... */
if ( op->type == op_register && op->data.reg.size == 4 &&
@@ -539,12 +539,11 @@ size_t ia32_table_lookup( unsigned char *buf, size_t buf_len,
size_t Ia32_Decoder::handle_insn_suffix( unsigned char *buf, size_t buf_len,
ia32_insn_t *raw_insn ) {
ia32_table_desc_t *table_desc;
// ia32_table_desc_t *table_desc;
ia32_insn_t *sfx_insn;
size_t size;
unsigned int prefixes = 0;
table_desc = &ia32_tables[raw_insn->table];
//table_desc = &ia32_tables[raw_insn->table];
size = ia32_table_lookup( buf, buf_len, raw_insn->table, &sfx_insn,
&prefixes );
if (size == INVALID_INSN || sfx_insn->mnem_flag == INS_INVALID ) {

View File

@@ -137,7 +137,7 @@ static int ia32_invariant_modrm( unsigned char *in, unsigned char *out,
}
static int ia32_decode_invariant( unsigned char *buf, size_t buf_len,
static int ia32_decode_invariant( unsigned char *buf, size_t /*buf_len*/,
ia32_insn_t *t, unsigned char *out,
unsigned int prefixes, x86_invariant_t *inv) {
@@ -251,13 +251,13 @@ static int ia32_decode_invariant( unsigned char *buf, size_t buf_len,
case ADDRMETH_X:
inv->operands[x].flags.op_signed=true;
inv->operands[x].flags.op_pointer=true;
inv->operands[x].flags.op_seg=x86_op_flags::op_ds_seg;
inv->operands[x].flags.op_seg=(x86_op_flags::op_ds_seg)>>8;
inv->operands[x].flags.op_string=true;
break;
case ADDRMETH_Y:
inv->operands[x].flags.op_signed=true;
inv->operands[x].flags.op_pointer=true;
inv->operands[x].flags.op_seg=x86_op_flags::op_es_seg;
inv->operands[x].flags.op_seg=x86_op_flags::op_es_seg>>8;
inv->operands[x].flags.op_string=true;
break;
case ADDRMETH_RR:
@@ -307,6 +307,7 @@ size_t ia32_disasm_invariant( unsigned char * buf, size_t buf_len,
}
size_t ia32_disasm_size( unsigned char *buf, size_t buf_len ) {
x86_invariant_t inv = { {0} };
x86_invariant_t inv;
memset(&inv,0,sizeof(x86_invariant_t));
return( ia32_disasm_invariant( buf, buf_len, &inv ) );
}

View File

@@ -155,12 +155,12 @@ static size_t modrm_decode16( unsigned char *buf, unsigned int buf_len,
ia32_handle_register(&ea->base, REG_WORD_OFFSET + 3);
ia32_handle_register(&ea->index, REG_WORD_OFFSET + 7);
case MOD16_RM_BPSI:
op->flags.op_seg = x86_op_flags::op_ss_seg;
op->flags.op_seg = x86_op_flags::op_ss_seg>>8;
ia32_handle_register(&ea->base, REG_WORD_OFFSET + 5);
ia32_handle_register(&ea->index, REG_WORD_OFFSET + 6);
break;
case MOD16_RM_BPDI:
op->flags.op_seg = x86_op_flags::op_ss_seg;
op->flags.op_seg = x86_op_flags::op_ss_seg>>8;
ia32_handle_register(&ea->base, REG_WORD_OFFSET + 5);
ia32_handle_register(&ea->index, REG_WORD_OFFSET + 7);
break;
@@ -172,7 +172,7 @@ static size_t modrm_decode16( unsigned char *buf, unsigned int buf_len,
break;
case MOD16_RM_BP:
if ( modrm->mod != MOD16_MOD_NODISP ) {
op->flags.op_seg = x86_op_flags::op_ss_seg;
op->flags.op_seg = x86_op_flags::op_ss_seg>>8;
ia32_handle_register(&ea->base,
REG_WORD_OFFSET + 5);
}

3206
3rd_party/libdisasm/ia32_opcode.dat vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -20,17 +20,17 @@ static void apply_seg( x86_op_t *op, unsigned int prefixes ) {
switch ( prefixes & PREFIX_REG_MASK ) {
/* NOTE: that op->flags for segment override are not a bitfield */
case PREFIX_CS:
op->flags.op_seg = x86_op_flags::op_cs_seg; break;
op->flags.op_seg = x86_op_flags::op_cs_seg>>8; break;
case PREFIX_SS:
op->flags.op_seg = x86_op_flags::op_ss_seg; break;
op->flags.op_seg = x86_op_flags::op_ss_seg>>8; break;
case PREFIX_DS:
op->flags.op_seg = x86_op_flags::op_ds_seg; break;
op->flags.op_seg = x86_op_flags::op_ds_seg>>8; break;
case PREFIX_ES:
op->flags.op_seg = x86_op_flags::op_es_seg; break;
op->flags.op_seg = x86_op_flags::op_es_seg>>8; break;
case PREFIX_FS:
op->flags.op_seg = x86_op_flags::op_fs_seg; break;
op->flags.op_seg = x86_op_flags::op_fs_seg>>8; break;
case PREFIX_GS:
op->flags.op_seg = x86_op_flags::op_gs_seg; break;
op->flags.op_seg = x86_op_flags::op_gs_seg>>8; break;
}
return;
@@ -107,19 +107,17 @@ size_t Ia32_Decoder::decode_operand_value( unsigned char *buf, size_t buf_len,
/* No MODRM : note these set operand type explicitly */
case ADDRMETH_A: /* No modR/M -- direct addr */
op->type = op_absolute;
//according to Intel Manuals, offset goes first
/* segment:offset address used in far calls */
x86_imm_sized( buf, buf_len,
&op->data.absolute.segment, 2 );
if ( m_decoded->addr_size == 4 ) {
x86_imm_sized( buf, buf_len,
&op->data.absolute.offset.off32, 4 );
size = 6;
} else {
x86_imm_sized( buf, buf_len,
&op->data.absolute.offset.off16, 2 );
x86_imm_sized( buf, buf_len, &op->data.absolute.offset.off32, 4 );
size = 4;
} else {
x86_imm_sized( buf, buf_len, &op->data.absolute.offset.off16, 2 );
size = 2;
}
x86_imm_sized( buf+size, buf_len-size, &op->data.absolute.segment, 2 );
size+=2;
break;
case ADDRMETH_I: /* Immediate val */
@@ -140,17 +138,24 @@ size_t Ia32_Decoder::decode_operand_value( unsigned char *buf, size_t buf_len,
op->data.far_offset depending on the size of
the operand */
op->flags.op_signed = true;
if ( op_size == 1 ) {
switch(op_size)
{
case 1:
/* one-byte near offset */
op->type = op_relative_near;
x86_imm_signsized(buf, buf_len, &op->data.relative_near, 1);
} else {
size = x86_imm_signsized(buf, buf_len, &op->data.relative_near, 1);
break;
case 2:
/* far offset...is this truly signed? */
op->type = op_relative_far;
x86_imm_signsized(buf, buf_len,
&op->data.relative_far, op_size );
int16_t offset_val; // easier upcast to int32_t
size = x86_imm_signsized(buf, buf_len, &offset_val, 2 );
op->data.relative_far=offset_val;
break;
default:
assert(false);
size=0;
}
size = op_size;
break;
case ADDRMETH_O: /* No ModR/M; op is word/dword offset */
/* NOTE: these are actually RVAs not offsets to seg!! */
@@ -172,20 +177,20 @@ size_t Ia32_Decoder::decode_operand_value( unsigned char *buf, size_t buf_len,
case ADDRMETH_X: /* Memory addressed by DS:SI [string] */
op->type = op_expression;
op->flags.op_hardcode = true;
op->flags.op_seg = x86_op_flags::op_ds_seg;
op->flags.op_seg = x86_op_flags::op_ds_seg>>8;
op->flags.op_pointer = true;
op->flags.op_string = true;
ia32_handle_register( &op->data.expression.base,
REG_DWORD_OFFSET + 6 );
gen_regs + 6 );
break;
case ADDRMETH_Y: /* Memory addressed by ES:DI [string] */
op->type = op_expression;
op->flags.op_hardcode = true;
op->flags.op_seg = x86_op_flags::op_es_seg;
op->flags.op_seg = x86_op_flags::op_es_seg>>8;
op->flags.op_pointer = true;
op->flags.op_string = true;
ia32_handle_register( &op->data.expression.base,
REG_DWORD_OFFSET + 7 );
gen_regs + 7 );
break;
case ADDRMETH_RR: /* Gen Register hard-coded in opcode */
op->type = op_register;
@@ -258,7 +263,7 @@ size_t Ia32_Decoder::decode_operand_size( unsigned int op_type, x86_op_t *op ) {
* value s a 16:16 pointer or a 16:32 pointer, where
* the first '16' is a segment */
size = (m_decoded->addr_size == 4) ? 6 : 4;
op->datatype = (size == 4) ? op_descr32 : op_descr16;
op->datatype = (size == 6) ? op_descr32 : op_descr16;
break;
case OPTYPE_b: /* byte, ignore op-size */
size = 1;

View File

@@ -189,7 +189,7 @@ static struct {
{ REG_DWORD_SIZE, reg_sys, 0, "esp_msr" },
/* REG_EIPMSR_INDEX : SYSENTER_EIP_MSR : 92 */
{ REG_DWORD_SIZE, reg_sys, 0, "eip_msr" },
{ 0 }
{ 0,reg_undef,0,"" }
};

View File

@@ -6,6 +6,7 @@
#endif
#include <cstring>
#include <cstdlib>
#include <cassert>
#include <stdint.h>
/* 'NEW" types
@@ -89,7 +90,7 @@ enum x86_options { /* these can be ORed together */
opt_none= 0,
opt_ignore_nulls=1, /* ignore sequences of > 4 NULL bytes */
opt_16_bit=2, /* 16-bit/DOS disassembly */
opt_att_mnemonics=4, /* use AT&T syntax names for alternate opcode mnemonics */
opt_att_mnemonics=4 /* use AT&T syntax names for alternate opcode mnemonics */
};
/* ========================================= Instruction Representation */
@@ -275,32 +276,62 @@ struct x86_op_t{
unsigned char fpuenv[28];
/* offset from segment */
uint32_t offset;
x86_reg_t reg; /* ID of CPU register */
char relative_near; /* offsets from current insn */
/* ID of CPU register */
x86_reg_t reg;
/* offsets from current insn */
char relative_near;
int32_t relative_far;
x86_absolute_t absolute; /* segment:offset */
x86_ea_t expression; /* effective address [expression] */
/* segment:offset */
x86_absolute_t absolute;
/* effective address [expression] */
x86_ea_t expression;
} data;
/* this is needed to make formatting operands more sane */
void * insn; /* pointer to x86_insn_t owning operand */
size_t size()
size_t size() const
{
return operand_size();
}
/* get size of operand data in bytes */
size_t operand_size();
size_t operand_size() const;
/* format (sprintf) an operand into 'buf' using specified syntax */
int x86_format_operand(char *buf, int len, enum x86_asm_format format );
bool is_address( ) {
bool is_address( ) const {
return ( type == op_absolute || type == op_offset );
}
bool is_relative( ) {
bool is_relative( ) const {
return ( type == op_relative_near || type == op_relative_far );
}
bool is_immediate( ) const { return ( type == op_immediate ); }
int32_t getAddress()
{
assert(is_address()||is_relative());
switch ( type ) {
case op_relative_near:
return (int32_t) data.relative_near;
case op_absolute:
if(datatype==op_descr16)
return int32_t((data.absolute.segment)<<4) + data.absolute.offset.off16;
else
return int32_t((data.absolute.segment)<<4) + data.absolute.offset.off32;
case op_offset:
return data.offset;
case op_relative_far:
if (data.relative_far & 0x8000)
return (data.relative_far & 0xFFFF) | 0xFFFF0000;
else
return (int32_t)data.relative_far;
default:
assert(false);
break;
}
return ~0;
}
char * format( enum x86_asm_format format );
x86_op_t * copy()
{
x86_op_t *op = (x86_op_t *) calloc( sizeof(x86_op_t), 1 );
if ( op ) {
memcpy( op, this, sizeof(x86_op_t) );
}
@@ -439,7 +470,7 @@ enum x86_insn_note {
insn_note_smm = 2, /* "" in System Management Mode */
insn_note_serial = 4, /* Serializing instruction */
insn_note_nonswap = 8, /* Does not swap arguments in att-style formatting */
insn_note_nosuffix = 16, /* Does not have size suffix in att-style formatting */
insn_note_nosuffix = 16 /* Does not have size suffix in att-style formatting */
};
/* This specifies what effects the instruction has on the %eflags register */
@@ -520,7 +551,6 @@ enum x86_insn_prefix {
/* TODO: maybe provide insn_new/free(), and have disasm return new insn_t */
/* FOREACH types: these are used to limit the foreach results to
* operands which match a certain "type" (implicit or explicit)
* or which are accessed in certain ways (e.g. read or write). Note
@@ -596,7 +626,7 @@ public:
/* the instruction proper */
enum x86_insn_prefix prefix; /* prefixes ORed together */
char prefix_string[MAX_PREFIX_STR]; /* prefixes [might be truncated] */
char mnemonic[MAX_MNEM_STR];
char mnemonic[MAX_MNEM_STR+1];
x86_oplist_t *operands; /* list of explicit/implicit operands */
size_t operand_count; /* total number of operands */
size_t explicit_count; /* number of explicit operands */
@@ -605,28 +635,35 @@ public:
void *function; /* function containing this insn */
int tag; /* tag the insn as seen/processed */
x86_op_t *x86_operand_new();
/* convenience routine: returns count of operands matching 'type' */
size_t x86_operand_count( enum x86_op_foreach_type type );
/* accessor functions for the operands */
x86_op_t * x86_operand_1st( );
x86_op_t * x86_operand_2nd( );
x86_op_t * x86_operand_3rd( );
x86_op_t * get_dest();
x86_op_t * operand_1st( );
x86_op_t * operand_2nd( );
x86_op_t * operand_3rd( );
const x86_op_t * get_dest() const;
int32_t x86_get_rel_offset( );
x86_op_t * x86_get_branch_target( );
x86_op_t * x86_get_imm( );
uint8_t * x86_get_raw_imm( );
/* More accessor fuctions, this time for user-defined info... */
uint8_t * x86_get_raw_imm( );
/* set the address (usually RVA) of the insn */
void x86_set_insn_addr( uint32_t addr );
/* format (sprintf) an instruction mnemonic into 'buf' using specified syntax */
int x86_format_mnemonic( char *buf, int len, enum x86_asm_format format);
int x86_format_insn( char *buf, int len, enum x86_asm_format);
void x86_oplist_free( );
/* returns 0 if an instruction is invalid, 1 if valid */
bool is_valid( );
uint32_t x86_get_address( );
void make_invalid(unsigned char *buf);
/* instruction tagging: these routines allow the programmer to mark
* instructions as "seen" in a DFS, for example. libdisasm does not use
* the tag field.*/
/* set insn->tag to 1 */
void x86_tag_insn( );
/* return insn->tag */
int x86_insn_is_tagged();
/* set insn->tag to 0 */
void x86_untag_insn();
@@ -722,7 +759,7 @@ public:
* offset : Offset in buffer to disassemble
* insn : Structure to fill with disassembled instruction
*/
unsigned int x86_disasm( unsigned char *buf, unsigned int buf_len,
unsigned int x86_disasm(const unsigned char *buf, unsigned int buf_len,
uint32_t buf_rva, unsigned int offset,
x86_insn_t * insn );
/* x86_disasm_range: Sequential disassembly of a range of bytes in a buffer,
@@ -803,7 +840,7 @@ public:
* void x86_get_aliased_reg( x86_reg_t *alias_reg, x86_reg_t *output_reg )
* where 'alias_reg' is a reg operand and 'output_reg' is filled with the
* register that the operand is an alias for */
//#define x86_get_aliased_reg( alias_reg, output_reg ) \
//#define x86_get_aliased_reg( alias_reg, output_reg )
// x86_reg_from_id( alias_reg->alias, output_reg )

49
3rd_party/libdisasm/libdisasm.def vendored Normal file
View File

@@ -0,0 +1,49 @@
;libdisasm.def : Declares the module parameters
LIBRARY "libdisasm.dll"
DESCRIPTION "libdisasm exported functions"
EXPORTS
x86_addr_size @1
x86_cleanup @2
x86_disasm @3
x86_disasm_forward @4
x86_disasm_range @5
x86_endian @6
x86_format_header @7
x86_format_insn @8
x86_format_mnemonic @9
x86_format_operand @10
x86_fp_reg @11
x86_get_branch_target @12
x86_get_imm @13
x86_get_options @14
x86_get_raw_imm @15
x86_get_rel_offset @16
x86_imm_signsized @17
x86_imm_sized @18
x86_init @19
x86_insn_is_tagged @20
x86_insn_is_valid @21
x86_invariant_disasm @22
x86_ip_reg @23
x86_max_insn_size @24
x86_op_size @25
x86_operand_1st @26
x86_operand_2nd @27
x86_operand_3rd @28
x86_operand_count @29
x86_operand_foreach @30
x86_operand_new @31
x86_operand_size @32
x86_oplist_free @33
x86_reg_from_id @34
x86_report_error @35
x86_set_insn_addr @36
x86_set_insn_block @37
x86_set_insn_function @38
x86_set_insn_offset @39
x86_set_options @40
x86_set_reporter @41
x86_size_disasm @42
x86_sp_reg @43
x86_tag_insn @44

View File

@@ -21,7 +21,7 @@ void x86_insn_t::make_invalid(unsigned char *buf)
type = insn_invalid;
memcpy( bytes, buf, 1 );
}
unsigned int X86_Disasm::x86_disasm( unsigned char *buf, unsigned int buf_len,
unsigned int X86_Disasm::x86_disasm( const unsigned char *buf, unsigned int buf_len,
uint32_t buf_rva, unsigned int offset,
x86_insn_t *insn ){
int len, size;
@@ -141,7 +141,7 @@ unsigned int X86_Disasm::x86_disasm_forward( unsigned char *buf, unsigned int bu
x86_insn_t insn;
x86_op_t *op;
int32_t next_addr;
uint32_t next_offset;
int32_t next_offset;
unsigned int size, count = 0, bytes = 0, cont = 1;
while ( cont && bytes < buf_len ) {
@@ -161,7 +161,7 @@ unsigned int X86_Disasm::x86_disasm_forward( unsigned char *buf, unsigned int bu
}
if ( follow_insn_dest(&insn) ) {
op = insn.x86_operand_1st();//x86_get_dest_operand
op = insn.operand_1st();//x86_get_dest_operand
next_addr = -1;
/* if caller supplied a resolver, use it to determine
@@ -175,8 +175,7 @@ unsigned int X86_Disasm::x86_disasm_forward( unsigned char *buf, unsigned int bu
if (next_addr != -1 ) {
next_offset = next_addr - buf_rva;
/* if offset is in this buffer... */
if ( next_offset >= 0 &&
next_offset < buf_len ) {
if ( next_offset >= 0 && next_offset < buf_len ) {
/* go ahead and disassemble */
count += x86_disasm_forward( buf,
buf_len,

View File

@@ -46,7 +46,7 @@
} \
} while( 0 )
static char *prefix_strings[] = {
static const char *prefix_strings[] = {
"", /* no prefix */
"repz ", /* the trailing spaces make it easy to prepend to mnemonic */
"repnz ",
@@ -115,7 +115,7 @@ static void get_operand_data_str( x86_op_t *op, char *str, int len ){
static void get_operand_regtype_str( int regtype, char *str, int len )
{
static struct {
char *name;
const char *name;
int value;
} operand_regtypes[] = {
{"reg_gen" , 0x00001},
@@ -284,7 +284,7 @@ static int format_expr( x86_ea_t *ea, char *buf, int len,
static int format_seg( x86_op_t *op, char *buf, int len,
enum x86_asm_format format ) {
int len_orig = len;
char *reg = "";
const char *reg = "";
if (! op || ! buf || ! len || ! op->flags.whole) {
return(0);
@@ -295,8 +295,9 @@ static int format_seg( x86_op_t *op, char *buf, int len,
if (! (int) op->flags.op_seg) {
return(0);
}
switch (op->flags.op_seg) {
uint16_t seg_ov=uint16_t(op->flags.op_seg)<<8;
switch (seg_ov)
{
case x86_op_flags::op_es_seg: reg = "es"; break;
case x86_op_flags::op_cs_seg: reg = "cs"; break;
case x86_op_flags::op_ss_seg: reg = "ss"; break;
@@ -328,9 +329,9 @@ static int format_seg( x86_op_t *op, char *buf, int len,
return( len_orig - len ); /* return length of appended string */
}
static char *get_operand_datatype_str( x86_op_t *op ){
static const char *get_operand_datatype_str( x86_op_t *op ){
static char *types[] = {
static const char *types[] = {
"sbyte", /* 0 */
"sword",
"sqword",
@@ -405,7 +406,7 @@ static int format_insn_eflags_str( enum x86_flag_status flags, char *buf,
int len) {
static struct {
char *name;
const char *name;
int value;
} insn_flags[] = {
{ "carry_set ", 0x0001 },
@@ -440,9 +441,9 @@ static int format_insn_eflags_str( enum x86_flag_status flags, char *buf,
return( len_orig - len );
}
static char *get_insn_group_str( enum x86_insn_t::x86_insn_group gp ) {
static const char *get_insn_group_str( enum x86_insn_t::x86_insn_group gp ) {
static char *types[] = {
static const char *types[] = {
"", // 0
"controlflow",// 1
"arithmetic", // 2
@@ -467,10 +468,10 @@ static char *get_insn_group_str( enum x86_insn_t::x86_insn_group gp ) {
return types[gp];
}
static char *get_insn_type_str( enum x86_insn_type type ) {
static const char *get_insn_type_str( enum x86_insn_type type ) {
static struct {
char *name;
const char *name;
int value;
} types[] = {
/* insn_controlflow */
@@ -592,8 +593,8 @@ static char *get_insn_type_str( enum x86_insn_type type ) {
return "";
}
static char *get_insn_cpu_str( enum x86_insn_cpu cpu ) {
static char *intel[] = {
static const char *get_insn_cpu_str( enum x86_insn_cpu cpu ) {
static const char *intel[] = {
"", // 0
"8086", // 1
"80286", // 2
@@ -620,8 +621,8 @@ static char *get_insn_cpu_str( enum x86_insn_cpu cpu ) {
return "";
}
static char *get_insn_isa_str( enum x86_insn_isa isa ) {
static char *subset[] = {
static const char *get_insn_isa_str( enum x86_insn_isa isa ) {
static const char *subset[] = {
NULL, // 0
"General Purpose", // 1
"Floating Point", // 2
@@ -880,11 +881,11 @@ static int format_operand_xml( x86_op_t *op, x86_insn_t *insn, char *buf,
return( strlen( buf ) );
}
static int format_operand_raw( x86_op_t *op, x86_insn_t *insn, char *buf,
static int format_operand_raw( x86_op_t *op, x86_insn_t */*insn*/, char *buf,
int len){
char str[MAX_OP_RAW_STRING];
char *datatype = get_operand_datatype_str(op);
const char *datatype = get_operand_datatype_str(op);
switch (op->type) {
case op_register:
@@ -1042,7 +1043,7 @@ char * x86_op_t::format( enum x86_asm_format format ) {
static int format_att_mnemonic( x86_insn_t *insn, char *buf, int len) {
int size = 0;
char *suffix;
const char *suffix;
if (! insn || ! buf || ! len )
return(0);
@@ -1051,8 +1052,8 @@ static int format_att_mnemonic( x86_insn_t *insn, char *buf, int len) {
/* do long jump/call prefix */
if ( insn->type == insn_jmp || insn->type == insn_call ) {
if (! is_imm_jmp( insn->x86_operand_1st() ) ||
(insn->x86_operand_1st())->datatype != op_byte ) {
if (! is_imm_jmp( insn->operand_1st() ) ||
(insn->operand_1st())->datatype != op_byte ) {
/* far jump/call, use "l" prefix */
STRNCAT( buf, "l", len );
}
@@ -1076,11 +1077,11 @@ static int format_att_mnemonic( x86_insn_t *insn, char *buf, int len) {
insn->type == insn_out
)) {
if ( insn->x86_operand_count( op_explicit ) > 0 &&
is_memory_op( insn->x86_operand_1st() ) ){
size = insn->x86_operand_1st()->operand_size();
is_memory_op( insn->operand_1st() ) ){
size = insn->operand_1st()->operand_size();
} else if ( insn->x86_operand_count( op_explicit ) > 1 &&
is_memory_op( insn->x86_operand_2nd() ) ){
size = insn->x86_operand_2nd()->operand_size();
is_memory_op( insn->operand_2nd() ) ){
size = insn->operand_2nd()->operand_size();
}
}
@@ -1094,7 +1095,6 @@ static int format_att_mnemonic( x86_insn_t *insn, char *buf, int len) {
return ( strlen( buf ) );
}
/** format (sprintf) an instruction mnemonic into 'buf' using specified syntax */
int x86_format_mnemonic(x86_insn_t *insn, char *buf, int len,
enum x86_asm_format format){
char str[MAX_OP_STRING];
@@ -1137,7 +1137,7 @@ static int format_insn_note(x86_insn_t *insn, char *buf, int len){
return( len_orig - len );
}
static int format_raw_insn( x86_insn_t *insn, char *buf, int len ){
static int format_raw_insn( x86_insn_t *insn, char *buf, size_t len ){
struct op_string opstr = { buf, len };
int i;
@@ -1223,24 +1223,24 @@ static int format_xml_insn( x86_insn_t *insn, char *buf, int len ) {
len -= format_insn_eflags_str( insn->flags_tested, buf, len );
STRNCAT( buf, "\"/>\n\t</flags>\n", len );
if ( insn->x86_operand_1st() ) {
insn->x86_operand_1st()->x86_format_operand(str,
if ( insn->operand_1st() ) {
insn->operand_1st()->x86_format_operand(str,
sizeof str, xml_syntax);
STRNCAT( buf, "\t<operand name=dest>\n", len );
STRNCAT( buf, str, len );
STRNCAT( buf, "\t</operand>\n", len );
}
if ( insn->x86_operand_2nd() ) {
insn->x86_operand_2nd()->x86_format_operand(str,sizeof str,
if ( insn->operand_2nd() ) {
insn->operand_2nd()->x86_format_operand(str,sizeof str,
xml_syntax);
STRNCAT( buf, "\t<operand name=src>\n", len );
STRNCAT( buf, str, len );
STRNCAT( buf, "\t</operand>\n", len );
}
if ( insn->x86_operand_3rd() ) {
insn->x86_operand_3rd()->x86_format_operand(str,sizeof str,
if ( insn->operand_3rd() ) {
insn->operand_3rd()->x86_format_operand(str,sizeof str,
xml_syntax);
STRNCAT( buf, "\t<operand name=imm>\n", len );
STRNCAT( buf, str, len );
@@ -1342,13 +1342,13 @@ int x86_insn_t::x86_format_insn( char *buf, int len,
STRNCAT( buf, "\t", len );
/* dest */
if ( (dst = x86_operand_1st()) && !(dst->flags.op_implied) ) {
if ( (dst = operand_1st()) && !(dst->flags.op_implied) ) {
dst->x86_format_operand(str, MAX_OP_STRING, format);
STRNCAT( buf, str, len );
}
/* src */
if ( (src = x86_operand_2nd()) ) {
if ( (src = operand_2nd()) ) {
if ( !(dst->flags.op_implied) ) {
STRNCAT( buf, ", ", len );
}
@@ -1357,9 +1357,9 @@ int x86_insn_t::x86_format_insn( char *buf, int len,
}
/* imm */
if ( x86_operand_3rd()) {
if ( operand_3rd()) {
STRNCAT( buf, ", ", len );
x86_operand_3rd()->x86_format_operand(str, MAX_OP_STRING,format);
operand_3rd()->x86_format_operand(str, MAX_OP_STRING,format);
STRNCAT( buf, str, len );
}
@@ -1373,8 +1373,8 @@ int x86_insn_t::x86_format_insn( char *buf, int len,
/* not sure which is correct? sometimes GNU as requires
* an imm as the first operand, sometimes as the third... */
/* imm */
if ( x86_operand_3rd() ) {
x86_operand_3rd()->x86_format_operand(str, MAX_OP_STRING,format);
if ( operand_3rd() ) {
operand_3rd()->x86_format_operand(str, MAX_OP_STRING,format);
STRNCAT( buf, str, len );
/* there is always 'dest' operand if there is 'src' */
STRNCAT( buf, ", ", len );
@@ -1382,13 +1382,13 @@ int x86_insn_t::x86_format_insn( char *buf, int len,
if ( (note & insn_note_nonswap ) == 0 ) {
/* regular AT&T style swap */
src = x86_operand_2nd();
dst = x86_operand_1st();
src = operand_2nd();
dst = operand_1st();
}
else {
/* special-case instructions */
src = x86_operand_1st();
dst = x86_operand_2nd();
src = operand_1st();
dst = operand_2nd();
}
/* src */
@@ -1431,20 +1431,20 @@ int x86_insn_t::x86_format_insn( char *buf, int len,
/* print operands */
/* dest */
if ( x86_operand_1st() ) {
x86_operand_1st()->x86_format_operand(str, MAX_OP_STRING,format);
if ( operand_1st() ) {
operand_1st()->x86_format_operand(str, MAX_OP_STRING,format);
STRNCATF( buf, "%s\t", str, len );
}
/* src */
if ( x86_operand_2nd() ) {
x86_operand_2nd()->x86_format_operand(str, MAX_OP_STRING,format);
if ( operand_2nd() ) {
operand_2nd()->x86_format_operand(str, MAX_OP_STRING,format);
STRNCATF( buf, "%s\t", str, len );
}
/* imm */
if ( x86_operand_3rd()) {
x86_operand_3rd()->x86_format_operand(str, MAX_OP_STRING,format);
if ( operand_3rd()) {
operand_3rd()->x86_format_operand(str, MAX_OP_STRING,format);
STRNCAT( buf, str, len );
}
}

View File

@@ -17,7 +17,6 @@ int x86_insn_is_valid( x86_insn_t *insn ) {
return 0;
}
/** \returns false if an instruction is invalid, true if valid */
bool x86_insn_t::is_valid( )
{
if ( this && this->type != insn_invalid && this->size > 0 )
@@ -94,13 +93,12 @@ x86_op_t * x86_insn_t::x86_get_branch_target() {
return NULL;
}
x86_op_t * x86_insn_t::get_dest() {
const x86_op_t * x86_insn_t::get_dest() const {
x86_oplist_t *op_lst;
assert(this);
if ( ! operands ) {
return NULL;
}
assert(this->x86_operand_count(op_dest)==1);
for (op_lst = operands; op_lst; op_lst = op_lst->next ) {
if ( op_lst->op.access & op_write)
return &(op_lst->op);
@@ -171,7 +169,7 @@ uint8_t *x86_insn_t::x86_get_raw_imm() {
}
size_t x86_op_t::operand_size() {
size_t x86_op_t::operand_size() const {
switch (datatype ) {
case op_byte: return 1;
case op_word: return 2;
@@ -203,13 +201,12 @@ size_t x86_op_t::operand_size() {
return(4); /* default size */
}
/** set the address (usually RVA) of the insn */
void x86_insn_t::x86_set_insn_addr( uint32_t _addr ) {
addr = _addr;
}
void x86_insn_t::x86_set_insn_offset( unsigned int offset ){
offset = offset;
void x86_insn_t::x86_set_insn_offset( unsigned int _offset ){
offset = _offset;
}
void x86_insn_t::x86_set_insn_function( void * func ){
@@ -220,7 +217,6 @@ void x86_insn_t::x86_set_insn_block( void * _block ){
block = _block;
}
/** set insn->tag to 1 */
void x86_insn_t::x86_tag_insn(){
tag = 1;
}
@@ -229,7 +225,6 @@ void x86_insn_t::x86_untag_insn(){
tag = 0;
}
/** \return insn->tag */
int x86_insn_t::x86_insn_is_tagged(){
return tag;
}

View File

@@ -164,12 +164,11 @@ int x86_insn_t::x86_operand_foreach( x86_operand_fn func, void *arg, enum x86_op
return 1;
}
static void count_operand( x86_op_t *op, x86_insn_t *insn, void *arg ) {
static void count_operand( x86_op_t */*op*/, x86_insn_t */*insn*/, void *arg ) {
size_t * count = (size_t *) arg;
*count = *count + 1;
}
/** convenience routine: returns count of operands matching 'type' */
size_t x86_insn_t::x86_operand_count( enum x86_op_foreach_type type ) {
size_t count = 0;
@@ -185,7 +184,7 @@ size_t x86_insn_t::x86_operand_count( enum x86_op_foreach_type type ) {
}
/* accessor functions */
x86_op_t * x86_insn_t::x86_operand_1st() {
x86_op_t * x86_insn_t::operand_1st() {
if (! explicit_count ) {
return NULL;
}
@@ -193,7 +192,7 @@ x86_op_t * x86_insn_t::x86_operand_1st() {
return &(operands->op);
}
x86_op_t * x86_insn_t::x86_operand_2nd( ) {
x86_op_t * x86_insn_t::operand_2nd( ) {
if ( explicit_count < 2 ) {
return NULL;
}
@@ -201,7 +200,7 @@ x86_op_t * x86_insn_t::x86_operand_2nd( ) {
return &(operands->next->op);
}
x86_op_t * x86_insn_t::x86_operand_3rd( ) {
x86_op_t * x86_insn_t::operand_3rd( ) {
if ( explicit_count < 3 ) {
return NULL;
}

View File

@@ -1,114 +1,46 @@
PROJECT(dcc_original)
CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
cmake_minimum_required(VERSION 3.1)
OPTION(dcc_build_tests "Enable unit tests." OFF)
ADD_DEFINITIONS(-D_CRT_SECURE_NO_WARNINGS -D__UNIX__ -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS)
IF(CMAKE_BUILD_TOOL MATCHES "(msdev|devenv|nmake)")
ADD_DEFINITIONS(-D_CRT_SECURE_NO_WARNINGS -D__UNIX__ -D_CRT_NONSTDC_NO_DEPRECATE)
#SET(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR})
ADD_DEFINITIONS(-D_CRT_SECURE_NO_WARNINGS -D__UNIX__ -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS)
IF("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
ADD_DEFINITIONS(-D_CRT_SECURE_NO_WARNINGS -D__UNIX__ -D_CRT_NONSTDC_NO_DEPRECATE -DNOMINMAX)
ADD_DEFINITIONS(/W4)
ELSE()
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall --std=c++0x")
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_GLIBCXX_DEBUG " ) #--coverage
#-D_GLIBCXX_DEBUG
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -std=c++11")
SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} " ) #--coverage
ENDIF()
SET(CMAKE_CXX_STANDARD 11)
SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/CMakeScripts;${CMAKE_MODULE_PATH})
SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR})
FIND_PACKAGE(LLVM)
FIND_PACKAGE(Boost)
set(CMAKE_INCLUDE_CURRENT_DIR ON)
set(CMAKE_AUTOMOC ON)
set(CMAKE_AUTOUIC ON)
set(CMAKE_AUTORCC ON)
find_package(Qt5Core)
find_package(Qt5Widgets)
find_package(Boost)
OPTION(dcc_build_tests "Enable unit tests." OFF)
IF(dcc_build_tests)
FIND_PACKAGE(GMock)
enable_testing()
find_package(Qt5Test)
#FIND_PACKAGE(GMock)
ENDIF()
ADD_SUBDIRECTORY(3rd_party)
llvm_map_components_to_libraries(REQ_LLVM_LIBRARIES jit native mc support)
INCLUDE_DIRECTORIES(
${PROJECT_SOURCE_DIR}
3rd_party/libdisasm
include
include/idioms
common
${Boost_INCLUDE_DIRS}
${LLVM_INCLUDE_DIRS}
)
set(dcc_SOURCES
src/dcc.cpp
src/ast.cpp
src/backend.cpp
src/bundle.cpp
src/chklib.cpp
src/comwrite.cpp
src/control.cpp
src/dataflow.cpp
src/disassem.cpp
src/error.cpp
src/fixwild.cpp
src/frontend.cpp
src/graph.cpp
src/hlicode.cpp
src/machine_x86.cpp
src/icode.cpp
src/idioms.cpp
src/idioms/idiom1.cpp
src/idioms/arith_idioms.cpp
src/idioms/call_idioms.cpp
src/idioms/epilogue_idioms.cpp
src/idioms/mov_idioms.cpp
src/idioms/neg_idioms.cpp
src/idioms/shift_idioms.cpp
src/idioms/xor_idioms.cpp
src/locident.cpp
src/parser.cpp
src/perfhlib.cpp
src/procs.cpp
src/project.cpp
src/Procedure.cpp
src/proplong.cpp
src/reducible.cpp
src/scanner.cpp
src/symtab.cpp
src/udm.cpp
src/BasicBlock.cpp
)
set(dcc_HEADERS
include/ast.h
include/bundle.h
include/BinaryImage.h
include/dcc.h
include/disassem.h
include/dosdcc.h
include/error.h
include/graph.h
include/hlicode.h
include/machine_x86.h
include/icode.h
include/idioms/idiom.h
include/idioms/idiom1.h
include/idioms/arith_idioms.h
include/idioms/call_idioms.h
include/idioms/epilogue_idioms.h
include/idioms/mov_idioms.h
include/idioms/neg_idioms.h
include/idioms/shift_idioms.h
include/idioms/xor_idioms.h
include/locident.h
include/perfhlib.h
include/project.h
include/scanner.h
include/state.h
include/symtab.h
include/types.h
include/Procedure.h
include/StackFrame.h
include/BasicBlock.h
)
SOURCE_GROUP(Source FILES ${dcc_SOURCES})
SOURCE_GROUP(Headers FILES ${dcc_HEADERS})
ADD_EXECUTABLE(dcc_original ${dcc_SOURCES} ${dcc_HEADERS})
TARGET_LINK_LIBRARIES(dcc_original disasm_s ${REQ_LLVM_LIBRARIES})
if(dcc_build_tests)
ADD_SUBDIRECTORY(3rd_party)
ADD_SUBDIRECTORY(common)
ADD_SUBDIRECTORY(tools)
ADD_SUBDIRECTORY(src)
endif()

View File

@@ -0,0 +1,21 @@
MACRO(ADD_UNIT_TEST name)
IF(NOT ${name}_TEST_VISITED)
# add the loader as a dll
ADD_EXECUTABLE(${name} ${ARGN})
qt5_use_modules(${name} Core)
MESSAGE(WARNING "Adding test " ${name} " " ${ARGN})
TARGET_LINK_LIBRARIES(${name} ${UNIT_TEST_LIBS})
ADD_TEST(NAME ${name} COMMAND ${name})
set_property(TEST ${name} APPEND PROPERTY ENVIRONMENT DCC_TEST_BASE=${PROJECT_SOURCE_DIR})
SET(${name}_TEST_VISITED true)
ENDIF()
ENDMACRO()
function(ADD_QTEST NAME)
add_executable(${NAME} ${NAME}.cpp ${NAME}.h) #${PROTO_SRCS} ${PROTO_HDRS}
target_link_libraries(${NAME} ${test_LIBRARIES})
qt5_use_modules(${NAME} Core Test)
add_test( NAME ${NAME} COMMAND $<TARGET_FILE:${NAME}>)
set_property(TEST ${NAME} APPEND PROPERTY ENVIRONMENT DCC_TEST_BASE=${PROJECT_SOURCE_DIR})
endfunction()

3392
CMakeScripts/cotire.cmake Normal file

File diff suppressed because it is too large Load Diff

127
Readme.md Normal file
View File

@@ -0,0 +1,127 @@
I've fixed many issues in this codebase, among other things - memory reallocation during decompilation.
To reflect those fixes, I've edited the original readme a bit.
* * *
dcc Distribution
================
The code provided in this distribution is (C) by their authors:
- Cristina Cifuentes (most of dcc code)
- Mike van Emmerik (signatures and prototype code)
- Jeff Ledermann (some disassembly code)
and is provided "as is". Additional contributor list is available
[on GitHub](https://github.com/nemerle/dcc/graphs/contributors).
The following files are included in the dccoo.tar.gz distribution:
- dcc.zip (dcc.exe DOS program, 1995)
- dccsrc.zip (source code *.c, *.h for dcc, 1993-1994)
- dcc32.zip (dcc_oo.exe 32 bit console (Win95/Win-NT) program, 1997)
- dccsrcoo.zip (source code *.cpp, *.h for "oo" dcc, 1993-1997)
- dccbsig.zip (library signatures for Borland C compilers, 1994)
- dccmsig.zip (library signatures for Microsoft C compilers, 1994)
- dcctpsig.zip (library signatures for Turbo Pascal compilers, 1994)
- dcclibs.dat (prototype file for C headers, 1994)
- test.zip (sample test files: *.c *.exe *.b, 1993-1996)
- makedsig.zip (creates a .sig file from a .lib C file, 1994)
- makedstp.zip (creates a .sig file from a Pascal library file, 1994)
- readsig.zip (reads signatures in a .sig file, 1994)
- dispsrch.zip (displays the name of a function given a signature, 1994)
- parsehdr.zip (generates a prototype file (dcclibs.dat) from C *.h files, 1994)
Note that the dcc_oo.exe program (in dcc32.zip) is a 32 bit program,
so it won't work under Windows 3.1. Also, it is a console mode program,
meaning that it has to be run in the "Command Prompt" window (sometimes
known as the "Dos Box"). It is not a GUI program.
The following files are included in the test.zip file: fibo,
benchsho, benchlng, benchfn, benchmul, byteops, intops, longops,
max, testlong, matrixmu, strlen, dhamp.
The version of dcc included in this distribution (dccsrcoo.zip and
dcc32.exe) is a bit better than the first release, but it is still
broken in some cases, and we do not have the time to work in this
project at present so we cannot provide any changes.
Comments on individual files:
- fibo (fibonacci): the small model (fibos.exe) decompiles correctly,
the large model (fibol.exe) expects an extra argument for
`scanf()`. This argument is the segment and is not displayed.
- benchsho: the first `scanf()` takes loc0 as an argument. This is
part of a long variable, but dcc does not have any clue at that
stage that the stack offset pushed on the stack is to be used
as a long variable rather than an integer variable.
- benchlng: as part of the `main()` code, `LO(loc1) | HI(loc1)` should
be displayed instead of `loc3 | loc9`. These two integer variables
are equivalent to the one long loc1 variable.
- benchfn: see benchsho.
- benchmul: see benchsho.
- byteops: decompiles correctly.
- intops: the du analysis for `DIV` and `MOD` is broken. dcc currently
generates code for a long and an integer temporary register that
were used as part of the analysis.
- longops: decompiles correctly.
- max: decompiles correctly.
- testlong: this example decompiles correctly given the algorithms
implemented in dcc. However, it shows that when long variables
are defined and used as integers (or long) without giving dcc
any hint that this is happening, the variable will be treated as
two integer variables. This is due to the fact that the assembly
code is in terms of integer registers, and long registers are not
available in 80286, so a long variable is equivalent to two integer
registers. dcc only knows of this through idioms such as add two
long variables.
- matrixmu: decompiles correctly. Shows that arrays are not supported
in dcc.
- strlen: decompiles correctly. Shows that pointers are partially
supported by dcc.
- dhamp: this program has far more data types than what dcc recognizes
at present.
Our thanks to Gary Shaffstall for some debugging work. Current bugs
are:
- [ ] if the code generated in the one line is too long, the (static)
buffer used for that line is clobbered. Solution: make the buffer
larger (currently 200 chars).
- [ ] the large memory model problem & `scanf()`
- [ ] dcc's error message shows a p option available which doesn't
exist, and doesn't show an i option which exists.
- [x] there is a nasty problem whereby some arrays can get reallocated
to a new address, and some pointers can become invalid. This mainly
tends to happen to larger executable files. A major rewrite will
probably be required to fix this.
For more information refer to the thesis "Reverse Compilation
Techniques" by Cristina Cifuentes, Queensland University of
Technology, 1994, and the dcc home page:
http://www.it.uq.edu.au/groups/csm/dcc_readme.html
Please note that the executable version of dcc provided in this
distribution does not necessarily match the source code provided,
some changes were done without us keeping track of every change.
Using dcc
---------
Here is a very brief summary of switches for dcc:
* `a1`, `a2`: assembler output, before and after re-ordering of input code
* `c`: Attempt to follow control through indirect call instructions
* `i`: Enter interactive disassembler
* `m`: Memory map
* `s`: Statistics summary
* `v`, `V`: verbose (and Very verbose)
* `o` filename: Use filename as assembler output file
If dcc encounters illegal instructions, it will attempt to enter the so called
interactive disassembler. The idea of this was to allow commands to fix the
problem so that dcc could continue, but no such changes are implemented
as yet. (Note: the Unix versions do not have the interactive disassembler). If
you get into this, you can get out of it by pressing `^X` (control-X). Once dcc
has entered the interactive disassembler, however, there is little chance that
it will recover and produce useful output.
If dcc loads the signature file `dccxxx.sig`, this means that it has not
recognised the compiler library used. You can place the signatures in a
different direcory to where you are working if you set the DCC environment
variable to point to their path. Note that if dcc can't find its signature
files, it will be severely handicapped.

View File

@@ -1,6 +1,7 @@
#!/bin/bash
cd bld
make -j5
cd ..
#cd bld
#make -j5
#cd ..
mkdir -p tests/outputs
./test_use_base.sh
./regression_tester.rb ./bld/dcc_original -s -c 2>stderr >stdout; diff tests/prev/ tests/outputs/
./regression_tester.rb ./dcc_original -s -c 2>stderr >stdout; diff -wB tests/prev/ tests/outputs/

7
common/CMakeLists.txt Normal file
View File

@@ -0,0 +1,7 @@
set(SRC
perfhlib.cpp
perfhlib.h
PatternCollector.h
)
add_library(dcc_hash STATIC ${SRC})

82
common/PatternCollector.h Normal file
View File

@@ -0,0 +1,82 @@
#ifndef PATTERNCOLLECTOR
#define PATTERNCOLLECTOR
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <vector>
#define SYMLEN 16 /* Number of chars in the symbol name, incl null */
#define PATLEN 23 /* Number of bytes in the pattern part */
struct HASHENTRY
{
char name[SYMLEN]; /* The symbol name */
uint8_t pat [PATLEN]; /* The pattern */
uint16_t offset; /* Offset (needed temporarily) */
};
struct PatternCollector {
uint8_t buf[100], bufSave[7]; /* Temp buffer for reading the file */
uint16_t readShort(FILE *f)
{
uint8_t b1, b2;
if (fread(&b1, 1, 1, f) != 1)
{
printf("Could not read\n");
exit(11);
}
if (fread(&b2, 1, 1, f) != 1)
{
printf("Could not read\n");
exit(11);
}
return (b2 << 8) + b1;
}
void grab(FILE *f,int n)
{
if (fread(buf, 1, n, f) != (size_t)n)
{
printf("Could not read\n");
exit(11);
}
}
uint8_t readByte(FILE *f)
{
uint8_t b;
if (fread(&b, 1, 1, f) != 1)
{
printf("Could not read\n");
exit(11);
}
return b;
}
uint16_t readWord(FILE *fl)
{
uint8_t b1, b2;
b1 = readByte(fl);
b2 = readByte(fl);
return b1 + (b2 << 8);
}
/* Called by map(). Return the i+1th key in *pKeys */
uint8_t *getKey(int i)
{
return keys[i].pat;
}
/* Display key i */
void dispKey(int i)
{
printf("%s", keys[i].name);
}
std::vector<HASHENTRY> keys; /* array of keys */
virtual int readSyms(FILE *f)=0;
};
#endif // PATTERNCOLLECTOR

438
common/perfhlib.cpp Normal file
View File

@@ -0,0 +1,438 @@
/*
*$Log: perfhlib.c,v $
* Revision 1.5 93/09/29 14:45:02 emmerik
* Oops, didn't do the casts last check in
*
* Revision 1.4 93/09/29 14:41:45 emmerik
* Added casts to mod instructions to keep the SVR4 compiler happy
*
*
* Perfect hashing function library. Contains functions to generate perfect
* hashing functions
*/
#include "perfhlib.h"
#include "PatternCollector.h"
#include "msvc_fixes.h"
#include <stdio.h>
#include <cassert>
#include <stdlib.h>
#include <string.h>
/* Private data structures */
//static int NumEntry; /* Number of entries in the hash table (# keys) */
//static int EntryLen; /* Size (bytes) of each entry (size of keys) */
//static int SetSize; /* Size of the char set */
//static char SetMin; /* First char in the set */
//static int NumVert; /* c times NumEntry */
//static uint16_t *T1base, *T2base; /* Pointers to start of T1, T2 */
static uint16_t *T1, *T2; /* Pointers to T1[i], T2[i] */
static int *graphNode; /* The array of edges */
static int *graphNext; /* Linked list of edges */
static int *graphFirst;/* First edge at a vertex */
static int numEdges; /* An edge counter */
static bool *visited; /* Array of bools: whether visited */
static bool *deleted; /* Array of bools: whether deleted */
/* Private prototypes */
static void duplicateKeys(int v1, int v2);
void PerfectHash::setHashParams(int _NumEntry, int _EntryLen, int _SetSize, char _SetMin,
int _NumVert)
{
/* These parameters are stored in statics so as to obviate the need for
passing all these (or defererencing pointers) for every call to hash()
*/
NumEntry = _NumEntry;
EntryLen = _EntryLen;
SetSize = _SetSize;
SetMin = _SetMin;
NumVert = _NumVert;
/* Allocate the variable sized tables etc */
if ((T1base = (uint16_t *)malloc(EntryLen * SetSize * sizeof(uint16_t))) == 0)
{
goto BadAlloc;
}
if ((T2base = (uint16_t *)malloc(EntryLen * SetSize * sizeof(uint16_t))) == 0)
{
goto BadAlloc;
}
if ((graphNode = (int *)malloc((NumEntry*2 + 1) * sizeof(int))) == 0)
{
goto BadAlloc;
}
if ((graphNext = (int *)malloc((NumEntry*2 + 1) * sizeof(int))) == 0)
{
goto BadAlloc;
}
if ((graphFirst = (int *)malloc((NumVert + 1) * sizeof(int))) == 0)
{
goto BadAlloc;
}
if ((g = (short *)malloc((NumVert+1) * sizeof(short))) == 0)
{
goto BadAlloc;
}
if ((visited = (bool *)malloc((NumVert+1) * sizeof(bool))) == 0)
{
goto BadAlloc;
}
if ((deleted = (bool *)malloc((NumEntry+1) * sizeof(bool))) == 0)
{
goto BadAlloc;
}
return;
BadAlloc:
printf("Could not allocate memory\n");
hashCleanup();
exit(1);
}
void PerfectHash::hashCleanup(void)
{
/* Free the storage for variable sized tables etc */
if (T1base) free(T1base);
if (T2base) free(T2base);
if (graphNode) free(graphNode);
if (graphNext) free(graphNext);
if (graphFirst) free(graphFirst);
if (g) free(g);
if (visited) free(visited);
if (deleted) free(deleted);
}
void PerfectHash::map(PatternCollector *collector)
{
m_collector = collector;
assert(nullptr!=collector);
int i, j, c;
uint16_t f1, f2;
bool cycle;
uint8_t *keys;
c = 0;
do
{
initGraph();
cycle = false;
/* Randomly generate T1 and T2 */
for (i=0; i < SetSize*EntryLen; i++)
{
T1base[i] = rand() % NumVert;
T2base[i] = rand() % NumVert;
}
for (i=0; i < NumEntry; i++)
{
f1 = 0; f2 = 0;
keys = m_collector->getKey(i);
for (j=0; j < EntryLen; j++)
{
T1 = T1base + j * SetSize;
T2 = T2base + j * SetSize;
f1 += T1[keys[j] - SetMin];
f2 += T2[keys[j] - SetMin];
}
f1 %= (uint16_t)NumVert;
f2 %= (uint16_t)NumVert;
if (f1 == f2)
{
/* A self loop. Reject! */
printf("Self loop on vertex %d!\n", f1);
cycle = true;
break;
}
addToGraph(numEdges++, f1, f2);
}
if (cycle or (cycle = isCycle())) /* OK - is there a cycle? */
{
printf("Iteration %d\n", ++c);
}
else
{
break;
}
}
while (/* there is a cycle */ 1);
}
/* Initialise the graph */
void PerfectHash::initGraph()
{
int i;
for (i=1; i <= NumVert; i++)
{
graphFirst[i] = 0;
}
for (i= -NumEntry; i <= NumEntry; i++)
{
/* No need to init graphNode[] as they will all be filled by successive
calls to addToGraph() */
graphNext[NumEntry+i] = 0;
}
numEdges = 0;
}
/* Add an edge e between vertices v1 and v2 */
/* e, v1, v2 are 0 based */
void PerfectHash::addToGraph(int e, int v1, int v2)
{
e++; v1++; v2++; /* So much more convenient */
graphNode[NumEntry+e] = v2; /* Insert the edge information */
graphNode[NumEntry-e] = v1;
graphNext[NumEntry+e] = graphFirst[v1]; /* Insert v1 to list of alphas */
graphFirst[v1]= e;
graphNext[NumEntry-e] = graphFirst[v2]; /* Insert v2 to list of omegas */
graphFirst[v2]= -e;
}
bool PerfectHash::DFS(int parentE, int v)
{
int e, w;
/* Depth first search of the graph, starting at vertex v, looking for
cycles. parent and v are origin 1. Note parent is an EDGE,
not a vertex */
visited[v] = true;
/* For each e incident with v .. */
for (e = graphFirst[v]; e; e = graphNext[NumEntry+e])
{
uint8_t *key1;
if (deleted[abs(e)])
{
/* A deleted key. Just ignore it */
continue;
}
key1 = m_collector->getKey(abs(e)-1);
w = graphNode[NumEntry+e];
if (visited[w])
{
/* Did we just come through this edge? If so, ignore it. */
if (abs(e) != abs(parentE))
{
/* There is a cycle in the graph. There is some subtle code here
to work around the distinct possibility that there may be
duplicate keys. Duplicate keys will always cause unit
cycles, since f1 and f2 (used to select v and w) will be the
same for both. The edges (representing an index into the
array of keys) are distinct, but the key values are not.
The logic is as follows: for the candidate edge e, check to
see if it terminates in the parent vertex. If so, we test
the keys associated with e and the parent, and if they are
the same, we can safely ignore e for the purposes of cycle
detection, since edge e adds nothing to the cycle. Cycles
involving v, w, and e0 will still be found. The parent
edge was not similarly eliminated because at the time when
it was a candidate, v was not yet visited.
We still have to remove the key from further consideration,
since each edge is visited twice, but with a different
parent edge each time.
*/
/* We save some stack space by calculating the parent vertex
for these relatively few cases where it is needed */
int parentV = graphNode[NumEntry-parentE];
if (w == parentV)
{
uint8_t *key2;
key2=m_collector->getKey(abs(parentE)-1);
if (memcmp(key1, key2, EntryLen) == 0)
{
printf("Duplicate keys with edges %d and %d (",
e, parentE);
m_collector->dispKey(abs(e)-1);
printf(" & ");
m_collector->dispKey(abs(parentE)-1);
printf(")\n");
deleted[abs(e)] = true; /* Wipe the key */
}
else
{
/* A genuine (unit) cycle. */
printf("There is a unit cycle involving vertex %d and edge %d\n", v, e);
return true;
}
}
else
{
/* We have reached a previously visited vertex not the
parent. Therefore, we have uncovered a genuine cycle */
printf("There is a cycle involving vertex %d and edge %d\n", v, e);
return true;
}
}
}
else /* Not yet seen. Traverse it */
{
if (DFS(e, w))
{
/* Cycle found deeper down. Exit */
return true;
}
}
}
return false;
}
bool PerfectHash::isCycle(void)
{
int v, e;
for (v=1; v <= NumVert; v++)
{
visited[v] = false;
}
for (e=1; e <= NumEntry; e++)
{
deleted[e] = false;
}
for (v=1; v <= NumVert; v++)
{
if (not visited[v])
{
if (DFS(-32767, v))
{
return true;
}
}
}
return false;
}
void PerfectHash::traverse(int u)
{
int w, e;
visited[u] = true;
/* Find w, the neighbours of u, by searching the edges e associated with u */
e = graphFirst[1+u];
while (e)
{
w = graphNode[NumEntry+e]-1;
if (not visited[w])
{
g[w] = (abs(e)-1 - g[u]) % NumEntry;
if (g[w] < 0) g[w] += NumEntry; /* Keep these positive */
traverse(w);
}
e = graphNext[NumEntry+e];
}
}
void PerfectHash::assign(void)
{
int v;
for (v=0; v < NumVert; v++)
{
g[v] = 0; /* g is sparse; leave the gaps 0 */
visited[v] = false;
}
for (v=0; v < NumVert; v++)
{
if (not visited[v])
{
g[v] = 0;
traverse(v);
}
}
}
int PerfectHash::hash(uint8_t *string)
{
uint16_t u, v;
int j;
u = 0;
for (j=0; j < EntryLen; j++)
{
T1 = T1base + j * SetSize;
u += T1[string[j] - SetMin];
}
u %= NumVert;
v = 0;
for (j=0; j < EntryLen; j++)
{
T2 = T2base + j * SetSize;
v += T2[string[j] - SetMin];
}
v %= NumVert;
return (g[u] + g[v]) % NumEntry;
}
#if 0
void dispRecord(int i);
void
duplicateKeys(int v1, int v2)
{
int i, j;
uint8_t *keys;
int u, v;
v1--; v2--; /* These guys are origin 1 */
printf("Duplicate keys:\n");
for (i=0; i < NumEntry; i++)
{
getKey(i, &keys);
u = 0;
for (j=0; j < EntryLen; j++)
{
T1 = T1base + j * SetSize;
u += T1[keys[j] - SetMin];
}
u %= NumVert;
if ((u != v1) and (u != v2)) continue;
v = 0;
for (j=0; j < EntryLen; j++)
{
T2 = T2base + j * SetSize;
v += T2[keys[j] - SetMin];
}
v %= NumVert;
if ((v == v2) or (v == v1))
{
printf("Entry #%d key: ", i+1);
for (j=0; j < EntryLen; j++) printf("%02X ", keys[j]);
printf("\n");
dispRecord(i+1);
}
}
exit(1);
}
#endif

39
common/perfhlib.h Normal file
View File

@@ -0,0 +1,39 @@
#pragma once
#include <stdint.h>
/** Perfect hashing function library. Contains functions to generate perfect
hashing functions */
struct PatternCollector;
struct PerfectHash {
uint16_t *T1base;
uint16_t *T2base; /* Pointers to start of T1, T2 */
short *g; /* g[] */
int NumEntry; /* Number of entries in the hash table (# keys) */
int EntryLen; /* Size (bytes) of each entry (size of keys) */
int SetSize; /* Size of the char set */
char SetMin; /* First char in the set */
int NumVert; /* c times NumEntry */
/** Set the parameters for the hash table */
void setHashParams(int _numEntry, int _entryLen, int _setSize, char _setMin, int _numVert);
public:
void map(PatternCollector * collector); /* Part 1 of creating the tables */
void hashCleanup(); /* Frees memory allocated by setHashParams() */
void assign(); /* Part 2 of creating the tables */
int hash(uint8_t *string); /* Hash the string to an int 0 .. NUMENTRY-1 */
const uint16_t *readT1(void) const { return T1base; }
const uint16_t *readT2(void) const { return T2base; }
const uint16_t *readG(void) const { return (uint16_t *)g; }
uint16_t *readT1(void){ return T1base; }
uint16_t *readT2(void){ return T2base; }
uint16_t *readG(void) { return (uint16_t *)g; }
private:
void initGraph();
void addToGraph(int e, int v1, int v2);
bool isCycle();
bool DFS(int parentE, int v);
void traverse(int u);
PatternCollector *m_collector; /* used to retrieve the keys */
};

View File

@@ -1,3 +1,4 @@
#!/bin/bash
mkdir -p tests/outputs
./test_use_all.sh
./regression_tester.rb ./bld/dcc_original -s -c 2>stderr >stdout; diff tests/prev/ tests/outputs/
./regression_tester.rb ./dcc_original -s -c 2>stderr >stdout; diff -wB tests/prev/ tests/outputs/

View File

@@ -3,37 +3,39 @@
#include <vector>
#include <bitset>
#include <string>
#include <llvm/ADT/ilist.h>
#include <llvm/ADT/ilist_node.h>
#include <boost/range.hpp>
#include <boost/range/iterator_range.hpp>
#include "icode.h"
#include "types.h"
#include "graph.h"
//#include "icode.h"
/* Basic block (BB) node definition */
struct Function;
class Function;
class CIcodeRec;
struct BB;
struct LOCAL_ID;
struct interval;
//TODO: consider default address value -> INVALID
struct TYPEADR_TYPE
{
uint32_t ip; /* Out edge icode address */
BB * BBptr; /* Out edge pointer to next BB */
interval *intPtr; /* Out edge ptr to next interval*/
TYPEADR_TYPE(uint32_t addr=0) : ip(addr),BBptr(nullptr),intPtr(nullptr)
{}
TYPEADR_TYPE(interval *v) : ip(0),BBptr(nullptr),intPtr(v)
{}
};
struct BB : public llvm::ilist_node<BB>
struct BB
{
friend class Function;
private:
BB(const BB&);
BB() : nodeType(0),traversed(DFS_NONE),
numHlIcodes(0),flg(0),
inEdges(0),
edges(0),beenOnH(0),inEdgeCount(0),reachingInt(0),
inInterval(0),correspInt(0),liveUse(0),def(0),liveIn(0),liveOut(0),
dfsFirstNum(0),dfsLastNum(0),immedDom(0),ifFollow(0),loopType(0),latchNode(0),
inInterval(0),correspInt(0),
dfsFirstNum(0),dfsLastNum(0),immedDom(0),ifFollow(0),loopType(NO_TYPE),latchNode(0),
numBackEdges(0),loopHead(0),loopFollow(0),caseHead(0),caseTail(0),index(0)
{
@@ -41,6 +43,7 @@ private:
//friend class SymbolTableListTraits<BB, Function>;
typedef boost::iterator_range<iICODE> rCODE;
rCODE instructions;
rCODE &my_range() {return instructions;}
public:
struct ValidFunctor
@@ -72,36 +75,31 @@ public:
interval *inInterval; /* Node's interval */
/* For derived sequence construction */
interval *correspInt; /* Corresponding interval in
* derived graph Gi-1 */
/* For live register analysis
* LiveIn(b) = LiveUse(b) U (LiveOut(b) - Def(b)) */
std::bitset<32> liveUse; /* LiveUse(b) */
std::bitset<32> def; /* Def(b) */
std::bitset<32> liveIn; /* LiveIn(b) */
std::bitset<32> liveOut; /* LiveOut(b) */
interval *correspInt; //!< Corresponding interval in derived graph Gi-1
// For live register analysis
// LiveIn(b) = LiveUse(b) U (LiveOut(b) - Def(b))
LivenessSet liveUse; /* LiveUse(b) */
LivenessSet def; /* Def(b) */
LivenessSet liveIn; /* LiveIn(b) */
LivenessSet liveOut; /* LiveOut(b) */
/* For structuring analysis */
int dfsFirstNum; /* DFS #: first visit of node */
int dfsLastNum; /* DFS #: last visit of node */
int immedDom; /* Immediate dominator (dfsLast
* index) */
int immedDom; /* Immediate dominator (dfsLast index) */
int ifFollow; /* node that ends the if */
int loopType; /* Type of loop (if any) */
eNodeHeaderType loopType; /* Type of loop (if any) */
int latchNode; /* latching node of the loop */
int numBackEdges; /* # of back edges */
int loopHead; /* most nested loop head to which
* thcis node belongs (dfsLast) */
size_t numBackEdges; /* # of back edges */
int loopHead; /* most nested loop head to which this node belongs (dfsLast) */
int loopFollow; /* node that follows the loop */
int caseHead; /* most nested case to which this
node belongs (dfsLast) */
int caseHead; /* most nested case to which this node belongs (dfsLast) */
int caseTail; /* tail node for the case */
int index; /* Index, used in several ways */
static BB * Create(void *ctx=0,const std::string &s="",Function *parent=0,BB *insertBefore=0);
static BB * Create(int start, int ip, uint8_t nodeType, int numOutEdges, Function * parent);
static BB * Create(iICODE start, iICODE fin, uint8_t _nodeType, int numOutEdges, Function *parent);
static BB * CreateIntervalBB(Function *parent);
static BB * Create(const rCODE &r, eBBKind _nodeType, Function *parent);
void writeCode(int indLevel, Function *pProc, int *numLoc, int latchNode, int ifFollow);
void mergeFallThrough(CIcodeRec &Icode);
void dfsNumbering(std::vector<BB *> &dfsLast, int *first, int *last);
@@ -111,21 +109,26 @@ public:
///
const Function *getParent() const { return Parent; }
Function *getParent() { return Parent; }
void writeBB(std::ostream &ostr, int lev, Function *pProc, int *numLoc);
void writeBB(QTextStream & ostr, int lev, Function *pProc, int *numLoc);
BB * rmJMP(int marker, BB *pBB);
void genDU1();
int findBBExps(LOCAL_ID &locals, Function *f);
void findBBExps(LOCAL_ID &locals, Function *f);
bool valid() {return 0==(flg & INVALID_BB); }
bool wasTraversedAtLevel(int l) const {return traversed==l;}
ICODE * writeLoopHeader(int &indLevel, Function* pProc, int *numLoc, BB *&latch, boolT &repCond);
ICODE * writeLoopHeader(int &indLevel, Function* pProc, int *numLoc, BB *&latch, bool &repCond);
void addOutEdge(uint32_t ip) // TODO: fix this
{
edges[0].ip = ip;
edges.push_back(TYPEADR_TYPE(ip));
}
void addOutEdgeInterval(interval *i) // TODO: fix this
{
edges.push_back(TYPEADR_TYPE(i));
}
void RemoveUnusedDefs(eReg regi, int defRegIdx, iICODE picode);
private:
bool FindUseBeforeDef(eReg regi, int defRegIdx, iICODE start_at);
void ProcessUseDefForFunc(eReg regi, int defRegIdx, iICODE picode);
void ProcessUseDefForFunc(eReg regi, int defRegIdx, ICODE &picode);
bool isEndOfPath(int latch_node_idx) const;
Function *Parent;

View File

@@ -1,20 +1,23 @@
#pragma once
#include <stdint.h>
#include <vector>
struct PROG /* Loaded program image parameters */
{
int16_t initCS;
int16_t initIP; /* These are initial load values */
int16_t initSS; /* Probably not of great interest */
uint16_t initSP;
bool fCOM; /* Flag set if COM program (else EXE)*/
int cReloc; /* No. of relocation table entries */
uint32_t * relocTable; /* Ptr. to relocation table */
uint8_t * map; /* Memory bitmap ptr */
int cProcs; /* Number of procedures so far */
int offMain; /* The offset of the main() proc */
uint16_t segMain; /* The segment of the main() proc */
bool bSigs; /* True if signatures loaded */
int cbImage; /* Length of image in bytes */
uint8_t * Image; /* Allocated by loader to hold entire program image */
int16_t initCS=0;
int16_t initIP=0; /* These are initial load values */
int16_t initSS=0; /* Probably not of great interest */
uint16_t initSP=0;
bool fCOM=false; /* Flag set if COM program (else EXE)*/
int cReloc=0; /* No. of relocation table entries */
std::vector<uint32_t> relocTable; /* Ptr. to relocation table */
uint8_t * map=nullptr; /* Memory bitmap ptr */
int cProcs=0; /* Number of procedures so far */
int offMain=0; /* The offset of the main() proc */
uint16_t segMain=0; /* The segment of the main() proc */
int cbImage=0; /* Length of image in bytes */
uint8_t * Imagez=nullptr; /* Allocated by loader to hold entire program image */
public:
const uint8_t *image() const {return Imagez;}
void displayLoadInfo();
};

41
include/CallConvention.h Normal file
View File

@@ -0,0 +1,41 @@
#pragma once
#include "ast.h"
#ifdef PASCAL
#undef PASCAL
#endif
class QTextStream;
struct CConv {
enum CC_Type {
UNKNOWN=0,
C,
PASCAL
};
virtual void processHLI(Function *func, Expr *_exp, iICODE picode)=0;
//! given return and argument types fill Function's STKFRAME and return locations
virtual void calculateStackLayout(Function *func)=0;
virtual void writeComments(QTextStream &)=0;
static CConv * create(CC_Type v);
protected:
};
struct C_CallingConvention : public CConv {
virtual void processHLI(Function *func, Expr *_exp, iICODE picode) override;
virtual void writeComments(QTextStream &) override;
void calculateStackLayout(Function *func) override;
private:
int processCArg(Function *callee, Function *pProc, ICODE *picode, size_t numArgs);
};
struct Pascal_CallingConvention : public CConv {
virtual void processHLI(Function *func, Expr *_exp, iICODE picode) override;
virtual void writeComments(QTextStream &) override;
void calculateStackLayout(Function *func) override;
};
struct Unknown_CallingConvention : public CConv {
void processHLI(Function *func, Expr *_exp, iICODE picode) override {}
void calculateStackLayout(Function *func) override;
virtual void writeComments(QTextStream &) override;
};

19
include/CallGraph.h Normal file
View File

@@ -0,0 +1,19 @@
#pragma once
#include "Procedure.h"
/* CALL GRAPH NODE */
struct CALL_GRAPH
{
PtrFunction proc; /* Pointer to procedure in pProcList */
std::vector<CALL_GRAPH *> outEdges; /* array of out edges */
public:
void write();
CALL_GRAPH()
{
}
public:
void writeNodeCallGraph(int indIdx);
bool insertCallGraph(PtrFunction caller, PtrFunction callee);
//bool insertCallGraph(PtrFunction caller, PtrFunction callee);
void insertArc(PtrFunction newProc);
};
//extern CALL_GRAPH * callGraph; /* Pointer to the head of the call graph */

44
include/DccFrontend.h Normal file
View File

@@ -0,0 +1,44 @@
#pragma once
#include <QtCore/QObject>
#include "src/Command.h"
#include "project.h"
class Project;
class DccFrontend : public QObject
{
Q_OBJECT
void LoadImage();
void parse(Project &proj);
public:
explicit DccFrontend(QObject *parent = 0);
bool FrontEnd(); /* frontend.c */
signals:
public slots:
};
struct MachineStateInitialization : public Command {
MachineStateInitialization() : Command("Initialize simulated machine state",eProject) {}
bool execute(CommandContext *ctx) override;
};
struct FindMain : public Command {
FindMain() : Command("Locate the main entry point",eProject) {}
bool execute(CommandContext *ctx);
};
struct CreateFunction : public Command {
QString m_name;
SegOffAddr m_addr;
FunctionType *m_type;
CreateFunction(QString name,SegOffAddr address,FunctionType *f) : Command("Create function",eProject),
m_name(name),
m_addr(address),
m_type(f)
{}
QString instanceDescription() const override;
bool execute(CommandContext *ctx) override;
};

View File

@@ -8,6 +8,7 @@ enum regType
};
enum condId
{
UNDEF=0,
GLOB_VAR, /* global variable */
REGISTER, /* register */
LOCAL_VAR, /* negative disp */
@@ -89,9 +90,9 @@ enum eLLFlags
/* Types of icodes */
enum icodeType
{
NOT_SCANNED = 0, // not even scanned yet
LOW_LEVEL, // low-level icode
HIGH_LEVEL // high-level icode
NOT_SCANNED_ICODE = 0, // not even scanned yet
LOW_LEVEL_ICODE, // low-level icode
HIGH_LEVEL_ICODE // high-level icode
};
@@ -176,7 +177,7 @@ enum llIcode
iPOP,
iPOPA,
iPOPF,
iPUSH,
iPUSH, // 77
iPUSHA,
iPUSHF,
iRCL, /* 80 */
@@ -216,6 +217,7 @@ enum condNodeType
{
UNKNOWN_OP=0,
BOOLEAN_OP, /* condOps */
NEGATION, /* not (2's complement) */
ADDRESSOF, /* addressOf (&) */
DEREFERENCE, /* contents of (*) */
@@ -237,7 +239,7 @@ enum hlFirst
/* HIGH_LEVEL icodes opcodes */
enum hlIcode
{
HLI_INVALID,
HLI_INVALID=0,
HLI_ASSIGN, /* := */
HLI_CALL, /* Call procedure */
HLI_JCOND, /* Conditional jump */
@@ -261,7 +263,8 @@ enum hlType
TYPE_STR, /* string */
TYPE_CONST, /* constant (any type) */
TYPE_FLOAT, /* floating point */
TYPE_DOUBLE /* double precision float */
TYPE_DOUBLE, /* double precision float */
TYPE_FUNC
};
/* Operand is defined, used or both flag */

View File

@@ -2,32 +2,29 @@
#include "ast.h"
#include "types.h"
#include "machine_x86.h"
struct GlobalVariable;
struct AstIdent;
struct IDENTTYPE
{
friend struct GlobalVariable;
friend struct Constant;
friend struct AstIdent;
protected:
condId idType;
regType regiType; /* for REGISTER only */
public:
condId type() {return idType;}
void type(condId t) {idType=t;}
union _idNode {
int regiIdx; /* index into localId, REGISTER */
int globIdx; /* index into symtab for GLOB_VAR */
int localIdx; /* idx into localId, LOCAL_VAR */
int paramIdx; /* idx into args symtab, PARAMS */
int idxGlbIdx; /* idx into localId, GLOB_VAR_IDX */
struct _kte
{ /* for CONSTANT only */
uint32_t kte; /* value of the constant */
uint8_t size; /* #bytes size constant */
} kte;
uint32_t strIdx; /* idx into image, for STRING */
int longIdx; /* idx into LOCAL_ID table, LONG_VAR*/
struct _call { /* for FUNCTION only */
Function *proc;
STKFRAME *args;
} call;
struct { /* for OTHER; tmp struct */
eReg seg; /* segment */
eReg regi; /* index mode */
int16_t off; /* offset */
} other;
} idNode;
IDENTTYPE() : idType(UNDEF)
{}
};

View File

@@ -1,46 +1,30 @@
#pragma once
#include <llvm/ADT/ilist.h>
#include <llvm/ADT/ilist_node.h>
#include <bitset>
#include "BasicBlock.h"
#include "locident.h"
#include "state.h"
#include "icode.h"
#include "StackFrame.h"
#include "CallConvention.h"
#include <memory>
#include <stdint.h>
#include <QtCore/QString>
#include <bitset>
#include <map>
class QIODevice;
class QTextStream;
/* PROCEDURE NODE */
struct CALL_GRAPH;
struct COND_EXPR;
struct Expr;
struct Disassembler;
struct Function;
class Function;
struct CALL_GRAPH;
struct PROG;
struct IStructuredTextTarget;
typedef llvm::iplist<Function> FunctionListType;
typedef FunctionListType lFunction;
typedef lFunction::iterator ilFunction;
namespace llvm
{
// Traits for intrusive list of basic blocks...
template<>
struct ilist_traits<BB> : public ilist_default_traits<BB>
{
// createSentinel is used to get hold of the node that marks the end of the
// list... (same trick used here as in ilist_traits<Instruction>)
BB *createSentinel() const {
return static_cast<BB*>(&Sentinel);
}
static void destroySentinel(BB*) {}
BB *provideInitialHead() const { return createSentinel(); }
BB *ensureHead(BB*) const { return createSentinel(); }
static void noteHead(BB*, BB*) {}
//static ValueSymbolTable *getSymTab(Function *ItemParent);
private:
mutable ilist_half_node<BB> Sentinel;
};
}
/* Procedure FLAGS */
enum PROC_FLAGS
{
@@ -48,34 +32,63 @@ enum PROC_FLAGS
PROC_IJMP =0x00000200, /* Proc incomplete due to indirect jmp */
PROC_ICALL =0x00000400, /* Proc incomplete due to indirect call */
PROC_HLL =0x00001000, /* Proc is likely to be from a HLL */
CALL_PASCAL =0x00002000, /* Proc uses Pascal calling convention */
CALL_C =0x00004000, /* Proc uses C calling convention */
CALL_UNKNOWN=0x00008000, /* Proc uses unknown calling convention */
PROC_NEAR =0x00010000, /* Proc exits with near return */
PROC_FAR =0x00020000, /* Proc exits with far return */
GRAPH_IRRED =0x00100000, /* Proc generates an irreducible graph */
SI_REGVAR =0x00200000, /* SI is used as a stack variable */
DI_REGVAR =0x00400000, /* DI is used as a stack variable */
PROC_IS_FUNC=0x00800000, /* Proc is a function */
REG_ARGS =0x01000000, /* Proc has registers as arguments */
PROC_VARARG =0x02000000, /* Proc has variable arguments */
// PROC_VARARG =0x02000000, /* Proc has variable arguments */
PROC_OUTPUT =0x04000000, /* C for this proc has been output */
PROC_RUNTIME=0x08000000, /* Proc is part of the runtime support */
PROC_ISLIB =0x10000000, /* Proc is a library function */
PROC_ASM =0x20000000, /* Proc is an intrinsic assembler routine */
PROC_IS_HLL =0x40000000 /* Proc has HLL prolog code */
#define CALL_MASK 0xFFFF9FFF /* Masks off CALL_C and CALL_PASCAL */
//#define CALL_MASK 0xFFFF9FFF /* Masks off CALL_C and CALL_PASCAL */
};
struct FunctionType
struct Type {
hlType dcc_type;
};
struct FunctionType : public Type
{
bool m_vararg;
CConv * m_call_conv;
std::vector<Type> ContainedTys;
ID retVal; /* Return value - identifier */
bool m_vararg=false;
unsigned getNumParams() const { return ContainedTys.size(); }
bool isVarArg() const {return m_vararg;}
void setReturnType(hlType t) {
retVal.type = t;
}
void setReturnLocation(const LONGID_TYPE &v) {
retVal.loc = REG_FRAME;
retVal.longId() = v;
}
void setReturnLocation(eReg reg) {
retVal.loc = REG_FRAME;
retVal.id.regi = reg;
}
hlType getReturnType() const { return retVal.type; }
void addArgument(hlType hl) {
ContainedTys.push_back(Type {hl});
}
void clearArguments() { ContainedTys.clear(); }
void setCallingConvention(CConv::CC_Type cc);
static FunctionType *get(Type result,std::vector<Type> params, bool vararg_func) {
FunctionType * res = new FunctionType;
res->setReturnType(result.dcc_type);
std::swap(res->ContainedTys,params);
res->m_vararg = vararg_func;
return res;
}
};
struct Assignment
{
COND_EXPR *lhs;
COND_EXPR *rhs;
Expr *lhs;
Expr *rhs;
};
struct JumpTable
{
@@ -86,61 +99,108 @@ struct JumpTable
size_t entrySize() { return 2;}
void pruneEntries(uint16_t cs);
};
struct Function : public llvm::ilist_node<Function>
class FunctionCfg
{
typedef llvm::iplist<BB> BasicBlockListType;
std::list<BB*> m_listBB; /* Ptr. to BB list/CFG */
public:
typedef std::list<BB*>::iterator iterator;
iterator begin() {
return m_listBB.begin();
}
iterator end() {
return m_listBB.end();
}
BB * &front() { return m_listBB.front();}
void nodeSplitting()
{
/* Converts the irreducible graph G into an equivalent reducible one, by
* means of node splitting. */
fprintf(stderr,"Attempt to perform node splitting: NOT IMPLEMENTED\n");
}
void push_back(BB *v) { m_listBB.push_back(v);}
};
typedef std::shared_ptr<Function> PtrFunction;
enum DecompilationStep : uint32_t {
eNotDecoded, // no processing done yet
eDisassemblyInProgress,
eDissassembled, // low level disassembly done
//eLocatedImpureRefs,
//eStackTracing, // tracing stack depth across function calls
};
class Function : public std::enable_shared_from_this<Function>
{
typedef std::list<BB *> BasicBlockListType;
// BasicBlock iterators...
typedef BasicBlockListType::iterator iterator;
typedef BasicBlockListType::const_iterator const_iterator;
private:
protected:
BasicBlockListType BasicBlocks; ///< The basic blocks
Function(FunctionType *ty) : nStep(eNotDecoded),procEntry(0),depth(0),flg(0),cbParam(0),m_dfsLast(0),numBBs(0),
hasCase(false),liveAnal(0)
{
type = ty;
if(!ty) // No type was provided, create it
type = new FunctionType;
callingConv(CConv::UNKNOWN);
}
public:
DecompilationStep nStep; // decompilation step number for this function
FunctionType * type;
uint32_t procEntry; /* label number */
std::string name; /* Meaningful name for this proc */
QString name; /* Meaningful name for this proc */
STATE state; /* Entry state */
int depth; /* Depth at which we found it - for printing */
uint32_t flg; /* Combination of Icode & Proc flags */
int16_t cbParam; /* Probable no. of bytes of parameters */
STKFRAME args; /* Array of arguments */
LOCAL_ID localId; /* Local identifiers */
ID retVal; /* Return value - identifier */
/* Icodes and control flow graph */
CIcodeRec Icode; /* Object with ICODE records */
std::list<BB*> m_cfg; /* Ptr. to BB list/CFG */
FunctionCfg m_actual_cfg;
std::vector<BB*> m_dfsLast;
std::list<BB*> heldBBs;
//BB * *dfsLast; /* Array of pointers to BBs in dfsLast
std::map<int,BB*> m_ip_to_bb;
// * (reverse postorder) order */
size_t numBBs; /* Number of BBs in the graph cfg */
bool hasCase; /* Procedure has a case node */
/* For interprocedural live analysis */
std::bitset<32> liveIn; /* Registers used before defined */
std::bitset<32> liveOut; /* Registers that may be used in successors */
LivenessSet liveIn; /* Registers used before defined */
LivenessSet liveOut; /* Registers that may be used in successors */
bool liveAnal; /* Procedure has been analysed already */
Function(void *ty=0) : procEntry(0),depth(0),flg(0),cbParam(0),m_cfg(0),m_dfsLast(0),numBBs(0),
hasCase(false),liveIn(0),liveOut(0),liveAnal(0)//,next(0),prev(0)
{
virtual ~Function() {
delete type;
}
public:
static Function *Create(void *ty=0,int Linkage=0,const std::string &nm="",void *module=0)
static PtrFunction Create(FunctionType *ty=0,int /*Linkage*/=0,const QString &nm="",void */*module*/=0)
{
Function *r=new Function(ty);
PtrFunction r(new Function(ty));
r->name = nm;
return r;
}
bool anyFlagsSet(uint32_t t) { return (flg&t)!=0;}
hlType getReturnType() const {
return getFunctionType()->getReturnType();
}
FunctionType *getFunctionType() const {
return type;
}
CConv *callingConv() const { return type->m_call_conv;}
void callingConv(CConv::CC_Type v);
// bool anyFlagsSet(uint32_t t) { return (flg&t)!=0;}
bool hasRegArgs() const { return (flg & REG_ARGS)!=0;}
void markDoNotDecompile() { flg |= PROC_ISLIB; }
bool doNotDecompile() const { return isLibrary(); }
bool isLibrary() const { return (flg & PROC_ISLIB)!=0;}
void compoundCond();
void writeProcComments();
void lowLevelAnalysis();
void bindIcodeOff();
void dataFlow(std::bitset<32> &liveOut);
void dataFlow(LivenessSet &liveOut);
void compressCFG();
void highLevelGen();
void structure(derSeq *derivedG);
@@ -148,28 +208,30 @@ public:
void createCFG();
void markImpure();
void findImmedDom();
void FollowCtrl(CALL_GRAPH *pcallGraph, STATE *pstate);
void process_operands(ICODE &pIcode, STATE *pstate);
bool process_JMP(ICODE &pIcode, STATE *pstate, CALL_GRAPH *pcallGraph);
boolT process_CALL(ICODE &pIcode, CALL_GRAPH *pcallGraph, STATE *pstate);
void freeCFG();
void codeGen(std::ostream &fs);
void codeGen(QIODevice & fs);
void mergeFallThrough(BB *pBB);
void structIfs();
void structLoops(derSeq *derivedG);
void buildCFG(Disassembler &ds);
void controlFlowAnalysis();
void newRegArg(iICODE picode, iICODE ticode);
void writeProcComments(std::ostream &ostr);
void writeProcComments(QTextStream & ostr);
void toStructuredText(IStructuredTextTarget *out,int level);
void displayCFG();
void displayStats();
void processHliCall(COND_EXPR *exp, iICODE picode);
void processHliCall(Expr *exp, iICODE picode);
void preprocessReturnDU(std::bitset<32> &_liveOut);
void preprocessReturnDU(LivenessSet &_liveOut);
Expr * adjustActArgType(Expr *_exp, hlType forType);
QString writeCall(Function *tproc, STKFRAME &args, int *numLoc);
void processDosInt(STATE *pstate, PROG &prog, bool done);
void switchState(DecompilationStep s);
protected:
void extractJumpTableRange(ICODE& pIcode, STATE *pstate, JumpTable &table);
bool followAllTableEntries(JumpTable &table, uint32_t cs, ICODE &pIcode, CALL_GRAPH *pcallGraph, STATE *pstate);
bool removeInEdge_Flag_and_ProcessLatch(BB *pbb, BB *a, BB *b);
bool Case_X_and_Y(BB* pbb, BB* thenBB, BB* elseBB);
bool Case_X_or_Y(BB* pbb, BB* thenBB, BB* elseBB);
@@ -190,10 +252,14 @@ protected:
void findExps();
void genDU1();
void elimCondCodes();
void liveRegAnalysis(std::bitset<32> &in_liveOut);
void liveRegAnalysis(LivenessSet &in_liveOut);
void findIdioms();
void propLong();
void genLiveKtes();
uint8_t findDerivedSeq (derSeq &derivedGi);
bool findDerivedSeq(derSeq &derivedGi);
bool nextOrderGraph(derSeq &derivedGi);
void addOutEdgesForConditionalJump(BB *pBB, int next_ip, LLInst *ll);
};
typedef std::list<PtrFunction> FunctionListType;
typedef FunctionListType lFunction;
typedef lFunction::iterator ilFunction;

View File

@@ -10,11 +10,11 @@ struct STKFRAME : public SymbolTableCommon<STKSYM>
//std::vector<STKSYM> sym;
//STKSYM * sym; /* Symbols */
int16_t m_minOff; /* Initial offset in stack frame*/
int16_t maxOff; /* Maximum offset in stack frame*/
int16_t m_maxOff; /* Maximum offset in stack frame*/
int cb; /* Number of bytes in arguments */
int numArgs; /* No. of arguments in the table*/
void adjustForArgType(int numArg_, hlType actType_);
STKFRAME() : m_minOff(0),maxOff(0),cb(0),numArgs(0)
void adjustForArgType(size_t numArg_, hlType actType_);
STKFRAME() : m_minOff(0),m_maxOff(0),cb(0),numArgs(0)
{
}

View File

@@ -5,10 +5,15 @@
* (C) Cristina Cifuentes
*/
#pragma once
#include "Enums.h"
#include "msvc_fixes.h"
#include <boost/range/iterator_range.hpp>
#include <stdint.h>
#include <cstring>
#include <list>
#include "Enums.h"
#include <boost/range.hpp>
static const int operandSize=20;
/* The following definitions and types define the Conditional Expression
* attributed syntax tree, as defined by the following EBNF:
@@ -22,129 +27,162 @@ static const int operandSize=20;
static const condOp condOpJCond[12] = {LESS, LESS_EQUAL, GREATER_EQUAL, GREATER,
EQUAL, NOT_EQUAL, LESS, GREATER_EQUAL,
LESS_EQUAL, GREATER, GREATER_EQUAL, LESS};
struct Function;
struct AstIdent;
class Function;
struct STKFRAME;
struct LOCAL_ID;
struct ICODE;
struct LLInst;
struct LLOperand;
struct ID;
typedef std::list<ICODE>::iterator iICODE;
typedef boost::iterator_range<iICODE> rICODE;
#include "IdentType.h"
/* Expression data type */
struct COND_EXPR
struct Expr
{
protected:
struct /* for BOOLEAN_OP */
{
condOp op;
COND_EXPR *lhs;
COND_EXPR *rhs;
} boolExpr;
public:
condNodeType m_type; /* Conditional Expression Node Type */
union _exprNode { /* Different cond expr nodes */
COND_EXPR *unaryExp; /* for NEGATION,ADDRESSOF,DEREFERENCE*/
IDENTTYPE ident; /* for IDENTIFIER */
} expr;
COND_EXPR *lhs()
{
assert(m_type==BOOLEAN_OP);
return boolExpr.lhs;
}
const COND_EXPR *lhs() const
{
assert(m_type==BOOLEAN_OP);
return boolExpr.lhs;
}
COND_EXPR *rhs()
{
assert(m_type==BOOLEAN_OP);
return boolExpr.rhs;
}
const COND_EXPR *rhs() const
{
assert(m_type==BOOLEAN_OP);
return boolExpr.rhs;
}
condOp op() const { return boolExpr.op;}
public:
static COND_EXPR *idRegIdx(int idx, regType reg_type);
static COND_EXPR *idKte(uint32_t kte, uint8_t size);
static COND_EXPR *idLoc(int off, LOCAL_ID *localId);
static COND_EXPR *idReg(eReg regi, uint32_t icodeFlg, LOCAL_ID *locsym);
static COND_EXPR *idLongIdx(int idx);
static COND_EXPR *idOther(eReg seg, eReg regi, int16_t off);
static COND_EXPR *idParam(int off, const STKFRAME *argSymtab);
static COND_EXPR *unary(condNodeType t, COND_EXPR *sub_expr);
static COND_EXPR *idLong(LOCAL_ID *localId, opLoc sd, iICODE pIcode, hlFirst f, iICODE ix, operDu du, LLInst &atOffset);
static COND_EXPR *idFunc(Function *pproc, STKFRAME *args);
static COND_EXPR *idID(const ID *retVal, LOCAL_ID *locsym, iICODE ix_);
static COND_EXPR * id(const LLInst &ll_insn, opLoc sd, Function *pProc, iICODE ix_, ICODE &duIcode, operDu du);
static COND_EXPR *boolOp(COND_EXPR *_lhs, COND_EXPR *_rhs, condOp _op);
static bool insertSubTreeLongReg(COND_EXPR *exp, COND_EXPR **tree, int longIdx);
static bool insertSubTreeReg(COND_EXPR *&tree, COND_EXPR *_expr, eReg regi, const LOCAL_ID *locsym);
static bool insertSubTreeLongReg(Expr *exp, Expr *&tree, int longIdx);
static bool insertSubTreeReg(Expr *&tree, Expr *_expr, eReg regi, const LOCAL_ID *locsym);
static bool insertSubTreeReg(AstIdent *&tree, Expr *_expr, eReg regi, const LOCAL_ID *locsym);
public:
virtual COND_EXPR *clone() const;
void release();
void changeBoolOp(condOp newOp);
COND_EXPR(const COND_EXPR &other)
virtual Expr *clone() const=0; //!< Makes a deep copy of the given expression
Expr(condNodeType t=UNKNOWN_OP) : m_type(t)
{
m_type=other.m_type;
expr=other.expr;
boolExpr=other.boolExpr;
}
COND_EXPR(condNodeType t=UNKNOWN_OP) : m_type(t)
{
memset(&expr,0,sizeof(_exprNode));
memset(&boolExpr,0,sizeof(boolExpr));
}
virtual ~COND_EXPR() {}
/** Recursively deallocates the abstract syntax tree rooted at *exp */
virtual ~Expr() {}
public:
virtual COND_EXPR *inverse() const; // return new COND_EXPR that is invarse of this
virtual bool xClear(rICODE range_to_check, iICODE lastBBinst, const LOCAL_ID &locId);
virtual COND_EXPR *insertSubTreeReg(COND_EXPR *_expr, eReg regi, const LOCAL_ID *locsym);
virtual COND_EXPR *insertSubTreeLongReg(COND_EXPR *_expr, int longIdx);
virtual hlType expType(Function *pproc) const;
virtual QString walkCondExpr (Function * pProc, int* numLoc) const=0;
virtual Expr *inverse() const=0; // return new COND_EXPR that is invarse of this
virtual bool xClear(rICODE range_to_check, iICODE lastBBinst, const LOCAL_ID &locId)=0;
virtual Expr *insertSubTreeReg(Expr *_expr, eReg regi, const LOCAL_ID *locsym)=0;
virtual Expr *insertSubTreeLongReg(Expr *_expr, int longIdx)=0;
virtual hlType expType(Function *pproc) const=0;
virtual int hlTypeSize(Function *pproc) const=0;
virtual Expr * performLongRemoval(eReg regi, LOCAL_ID *locId) { return this; }
};
struct BinaryOperator : public COND_EXPR
struct UnaryOperator : public Expr
{
UnaryOperator(condNodeType t=UNKNOWN_OP) : Expr(t),unaryExp(nullptr) {}
Expr *unaryExp;
virtual Expr *inverse() const
{
if (m_type == NEGATION) //TODO: memleak here
{
return unaryExp->clone();
}
return this->clone();
}
virtual Expr *clone() const
{
UnaryOperator *newExp = new UnaryOperator(*this);
newExp->unaryExp = unaryExp->clone();
return newExp;
}
virtual bool xClear(rICODE range_to_check, iICODE lastBBinst, const LOCAL_ID &locs);
static UnaryOperator *Create(condNodeType t, Expr *sub_expr)
{
UnaryOperator *newExp = new UnaryOperator();
newExp->m_type = t;
newExp->unaryExp = sub_expr;
return (newExp);
}
~UnaryOperator()
{
delete unaryExp;
unaryExp=nullptr;
}
public:
int hlTypeSize(Function *pproc) const;
virtual QString walkCondExpr(Function *pProc, int *numLoc) const;
virtual Expr *insertSubTreeReg(Expr *_expr, eReg regi, const LOCAL_ID *locsym);
virtual hlType expType(Function *pproc) const;
virtual Expr *insertSubTreeLongReg(Expr *_expr, int longIdx);
private:
QString wrapUnary(Function *pProc, int *numLoc, QChar op) const;
};
struct BinaryOperator : public Expr
{
condOp m_op;
COND_EXPR *m_lhs;
COND_EXPR *m_rhs;
BinaryOperator(condOp o)
Expr *m_lhs;
Expr *m_rhs;
BinaryOperator(condOp o) : Expr(BOOLEAN_OP)
{
m_op = o;
m_lhs=m_rhs=nullptr;
}
static BinaryOperator *Create(condOp o,COND_EXPR *l,COND_EXPR *r);
static BinaryOperator *CreateAdd(COND_EXPR *l,COND_EXPR *r);
virtual COND_EXPR *inverse();
virtual COND_EXPR *clone();
virtual bool xClear(rICODE range_to_check, iICODE lastBBinst, const LOCAL_ID &locs);
virtual COND_EXPR *insertSubTreeReg(COND_EXPR *_expr, eReg regi, LOCAL_ID *locsym);
virtual COND_EXPR *insertSubTreeLongReg(COND_EXPR *_expr, int longIdx);
BinaryOperator(condOp o,Expr *l,Expr *r) : Expr(BOOLEAN_OP)
{
m_op = o;
m_lhs=l;
m_rhs=r;
}
~BinaryOperator()
{
assert(m_lhs!=m_rhs or m_lhs==nullptr);
delete m_lhs;
delete m_rhs;
m_lhs=m_rhs=nullptr;
}
static BinaryOperator *Create(condOp o,Expr *l,Expr *r)
{
BinaryOperator *res = new BinaryOperator(o);
res->m_lhs = l;
res->m_rhs = r;
return res;
}
static BinaryOperator *LogicAnd(Expr *l,Expr *r)
{
return Create(DBL_AND,l,r);
}
static BinaryOperator *createSHL(Expr *l,Expr *r)
{
return Create(SHL,l,r);
}
static BinaryOperator *And(Expr *l,Expr *r)
{
return Create(AND,l,r);
}
static BinaryOperator *Or(Expr *l,Expr *r)
{
return Create(OR,l,r);
}
static BinaryOperator *LogicOr(Expr *l,Expr *r)
{
return Create(DBL_OR,l,r);
}
static BinaryOperator *CreateAdd(Expr *l,Expr *r) {
return Create(ADD,l,r);
COND_EXPR *lhs()
}
void changeBoolOp(condOp newOp);
virtual Expr *inverse() const;
virtual Expr *clone() const;
virtual bool xClear(rICODE range_to_check, iICODE lastBBinst, const LOCAL_ID &locs);
virtual Expr *insertSubTreeReg(Expr *_expr, eReg regi, const LOCAL_ID *locsym);
virtual Expr *insertSubTreeLongReg(Expr *_expr, int longIdx);
const Expr *lhs() const
{
return const_cast<const Expr *>(const_cast<BinaryOperator *>(this)->lhs());
}
const Expr *rhs() const
{
return const_cast<const Expr *>(const_cast<BinaryOperator *>(this)->rhs());
}
Expr *lhs()
{
assert(m_type==BOOLEAN_OP);
return m_lhs;
}
const COND_EXPR *lhs() const
{
assert(m_type==BOOLEAN_OP);
return m_lhs;
}
COND_EXPR *rhs()
{
assert(m_type==BOOLEAN_OP);
return m_rhs;
}
const COND_EXPR *rhs() const
Expr *rhs()
{
assert(m_type==BOOLEAN_OP);
return m_rhs;
@@ -152,26 +190,134 @@ struct BinaryOperator : public COND_EXPR
condOp op() const { return m_op;}
/* Changes the boolean conditional operator at the root of this expression */
void op(condOp o) { m_op=o;}
QString walkCondExpr(Function * pProc, int* numLoc) const;
public:
hlType expType(Function *pproc) const;
int hlTypeSize(Function *pproc) const;
};
struct UnaryOperator : public COND_EXPR
struct AstIdent : public UnaryOperator
{
condOp op;
COND_EXPR *unaryExp;
virtual COND_EXPR *inverse();
virtual COND_EXPR *clone();
virtual bool xClear(rICODE range_to_check, iICODE lastBBinst, const LOCAL_ID &locs);
static UnaryOperator *Create(condNodeType t, COND_EXPR *sub_expr)
AstIdent() : UnaryOperator(IDENTIFIER)
{
UnaryOperator *newExp = new UnaryOperator();
newExp->m_type=t;
newExp->unaryExp = sub_expr;
return (newExp);
}
};
IDENTTYPE ident; /* for IDENTIFIER */
static AstIdent * Loc(int off, LOCAL_ID *localId);
static AstIdent * LongIdx(int idx);
static AstIdent * String(uint32_t idx);
static AstIdent * Other(eReg seg, eReg regi, int16_t off);
static AstIdent * Param(int off, const STKFRAME *argSymtab);
static AstIdent * Long(LOCAL_ID *localId, opLoc sd, iICODE pIcode, hlFirst f, iICODE ix, operDu du, LLInst &atOffset);
static AstIdent * idID(const ID *retVal, LOCAL_ID *locsym, iICODE ix_);
static Expr * id(const LLInst &ll_insn, opLoc sd, Function *pProc, iICODE ix_, ICODE &duIcode, operDu du);
struct GlobalVariable : public COND_EXPR
virtual Expr *clone() const
{
static COND_EXPR *Create(int16_t segValue, int16_t off);
return new AstIdent(*this);
}
virtual int hlTypeSize(Function *pproc) const;
virtual hlType expType(Function *pproc) const;
virtual Expr * performLongRemoval(eReg regi, LOCAL_ID *locId);
virtual QString walkCondExpr(Function *pProc, int *numLoc) const;
virtual Expr *insertSubTreeReg(Expr *_expr, eReg regi, const LOCAL_ID *locsym);
virtual Expr *insertSubTreeLongReg(Expr *_expr, int longIdx);
virtual bool xClear(rICODE range_to_check, iICODE lastBBinst, const LOCAL_ID &locId);
protected:
eReg otherLongRegi (eReg regi, int idx, LOCAL_ID *locTbl);
};
struct GlobalVariable : public AstIdent
{
bool valid;
int globIdx;
virtual Expr *clone() const
{
return new GlobalVariable(*this);
}
GlobalVariable(int16_t segValue, int16_t off);
QString walkCondExpr(Function *pProc, int *numLoc) const;
int hlTypeSize(Function *pproc) const;
hlType expType(Function *pproc) const;
};
struct GlobalVariableIdx : public AstIdent
{
bool valid;
int idxGlbIdx; /* idx into localId, GLOB_VAR_IDX */
virtual Expr *clone() const
{
return new GlobalVariableIdx(*this);
}
GlobalVariableIdx(int16_t segValue, int16_t off, uint8_t regi, const LOCAL_ID *locSym);
QString walkCondExpr(Function *pProc, int *numLoc) const;
int hlTypeSize(Function *pproc) const;
hlType expType(Function *pproc) const;
};
struct Constant : public AstIdent
{
struct _kte
{ /* for CONSTANT only */
uint32_t kte; /* value of the constant */
uint8_t size; /* #bytes size constant */
} kte;
Constant(uint32_t _kte, uint8_t size)
{
ident.idType = CONSTANT;
kte.kte = _kte;
kte.size = size;
}
virtual Expr *clone() const
{
return new Constant(*this);
}
QString walkCondExpr(Function *pProc, int *numLoc) const;
int hlTypeSize(Function *pproc) const;
hlType expType(Function *pproc) const;
};
struct FuncNode : public AstIdent
{
struct _call { /* for FUNCTION only */
Function *proc;
STKFRAME *args;
} call;
FuncNode(Function *pproc, STKFRAME *args)
{
call.proc = pproc;
call.args = args;
}
virtual Expr *clone() const
{
return new FuncNode(*this);
}
QString walkCondExpr(Function *pProc, int *numLoc) const;
int hlTypeSize(Function *pproc) const;
hlType expType(Function *pproc) const;
};
struct RegisterNode : public AstIdent
{
const LOCAL_ID *m_syms;
regType regiType; /* for REGISTER only */
int regiIdx; /* index into localId, REGISTER */
virtual Expr *insertSubTreeReg(Expr *_expr, eReg regi, const LOCAL_ID *locsym);
RegisterNode(int idx, regType reg_type,const LOCAL_ID *syms)
{
m_syms= syms;
ident.type(REGISTER);
regiType = reg_type;
regiIdx = idx;
}
RegisterNode(const LLOperand &, LOCAL_ID *locsym);
//RegisterNode(eReg regi, uint32_t icodeFlg, LOCAL_ID *locsym);
virtual Expr *clone() const
{
return new RegisterNode(*this);
}
QString walkCondExpr(Function *pProc, int *numLoc) const;
int hlTypeSize(Function *) const;
hlType expType(Function *pproc) const;
bool xClear(rICODE range_to_check, iICODE lastBBinst, const LOCAL_ID &locId);
};
struct Constant : public COND_EXPR
{};

View File

@@ -7,8 +7,10 @@
#pragma once
#include <stdio.h>
#include <vector>
#include <string>
struct strTable : std::vector<std::string>
#include <QtCore/QString>
#include <QtCore/QIODevice>
struct strTable : std::vector<QString>
{
/* Returns the next available index into the table */
size_t nextIdx() {return size();}
@@ -20,9 +22,9 @@ struct bundle
{
public:
void appendCode(const char *format, ...);
void appendCode(const std::string &s);
void appendCode(const QString &s);
void appendDecl(const char *format, ...);
void appendDecl(const std::string &);
void appendDecl(const QString &);
void init()
{
decl.clear();
@@ -33,10 +35,10 @@ public:
int current_indent;
};
extern bundle cCode;
#define lineSize 360 /* 3 lines in the mean time */
//void newBundle (bundle *procCode);
void writeBundle (std::ostream &ios, bundle procCode);
void writeBundle (QIODevice & ios, bundle procCode);
void freeBundle (bundle *procCode);

View File

@@ -5,10 +5,10 @@
#pragma once
//TODO: Remove boolT
#include <llvm/ADT/ilist.h>
#include <utility>
#include <algorithm>
#include <bitset>
#include <QtCore/QString>
#include "Enums.h"
#include "types.h"
@@ -20,32 +20,13 @@
#include "bundle.h"
#include "Procedure.h"
#include "BasicBlock.h"
struct Project;
class Project;
/* CALL GRAPH NODE */
struct CALL_GRAPH
{
ilFunction proc; /* Pointer to procedure in pProcList */
std::vector<CALL_GRAPH *> outEdges; /* array of out edges */
public:
void write();
CALL_GRAPH() : outEdges(0)
{
}
public:
void writeNodeCallGraph(int indIdx);
bool insertCallGraph(ilFunction caller, ilFunction callee);
bool insertCallGraph(Function *caller, ilFunction callee);
void insertArc(ilFunction newProc);
};
//#define NUM_PROCS_DELTA 5 /* delta # procs a proc invokes */
//extern std::list<Function> pProcList;
//extern FunctionListType pProcList;
//extern CALL_GRAPH * callGraph; /* Pointer to the head of the call graph */
extern bundle cCode; /* Output C procedure's declaration and code */
/**** Global variables ****/
extern char *asm1_name, *asm2_name; /* Assembler output filenames */
extern QString asm1_name, asm2_name; /* Assembler output filenames */
typedef struct { /* Command line option flags */
unsigned verbose : 1;
@@ -56,18 +37,15 @@ typedef struct { /* Command line option flags */
unsigned Stats : 1;
unsigned Interact : 1; /* Interactive mode */
unsigned Calls : 1; /* Follow register indirect calls */
char filename[80]; /* The input filename */
QString filename; /* The input filename */
uint32_t CustomEntryPoint;
} OPTION;
extern OPTION option; /* Command line options */
#include "BinaryImage.h"
extern std::bitset<32> duReg[30]; /* def/use bits for registers */
//extern uint32_t duReg[30]; /* def/use bits for registers */
extern std::bitset<32> maskDuReg[30]; /* masks off du bits for regs */
/* Registers used by icode instructions */
/* Memory map states */
enum eAreaType
@@ -94,55 +72,33 @@ extern STATS stats; /* Icode statistics */
/**** Global function prototypes ****/
class DccFrontend
{
void LoadImage(Project &proj);
void parse(Project &proj);
std::string m_fname;
public:
DccFrontend(const std::string &fname) : m_fname(fname)
{
}
bool FrontEnd(); /* frontend.c */
};
void udm(void); /* udm.c */
void freeCFG(BB * cfg); /* graph.c */
BB * newBB(BB *, int, int, uint8_t, int, Function *); /* graph.c */
void BackEnd(char *filename, CALL_GRAPH *); /* backend.c */
char *cChar(uint8_t c); /* backend.c */
void BackEnd(CALL_GRAPH *); /* backend.c */
extern char *cChar(uint8_t c); /* backend.c */
eErrorId scan(uint32_t ip, ICODE &p); /* scanner.c */
void parse (CALL_GRAPH * *); /* parser.c */
int strSize (uint8_t *, char); /* parser.c */
//void disassem(int pass, Function * pProc); /* disassem.c */
void interactDis(Function * initProc, int initIC); /* disassem.c */
extern int strSize (const uint8_t *, char); /* parser.c */
void interactDis(const PtrFunction &, int initIC); /* disassem.c */
bool JmpInst(llIcode opcode); /* idioms.c */
queue::iterator appendQueue(queue &Q, BB *node); /* reducible.c */
void SetupLibCheck(void); /* chklib.c */
bool SetupLibCheck(QString pattern_file_id); /* chklib.c */
void CleanupLibCheck(void); /* chklib.c */
bool LibCheck(Function &p); /* chklib.c */
/* Exported functions from procs.c */
boolT insertCallGraph (CALL_GRAPH *, ilFunction, ilFunction);
void adjustActArgType (COND_EXPR *, hlType, Function *);
/* Exported functions from ast.c */
std::string walkCondExpr (const COND_EXPR *exp, Function * pProc, int *);
int hlTypeSize (const COND_EXPR *, Function *);
//hlType expType (const COND_EXPR *, Function *);
/* Exported functions from hlicode.c */
std::string writeCall (Function *, STKFRAME &, Function *, int *);
char *writeJcond (const HLTYPE &, Function *, int *);
char *writeJcondInv (HLTYPE, Function *, int *);
QString writeJcond(const HLTYPE &, Function *, int *);
QString writeJcondInv(HLTYPE, Function *, int *);
/* Exported funcions from locident.c */
boolT checkLongEq (LONG_STKID_TYPE, iICODE, int, Function *, Assignment &asgn, LLInst &atOffset);
boolT checkLongRegEq (LONGID_TYPE, iICODE, int, Function *, Assignment &asgn, LLInst &);
bool checkLongEq(LONG_STKID_TYPE, iICODE, int, Function *, Assignment &asgn, LLInst &atOffset);
bool checkLongRegEq(LONGID_TYPE, iICODE, int, Function *, Assignment &asgn, LLInst &);
eReg otherLongRegi(eReg, int, LOCAL_ID *);

14
include/dcc_interface.h Normal file
View File

@@ -0,0 +1,14 @@
#pragma once
#include "Procedure.h"
#include <QtCore/QObject>
#include <QtCore/QDir>
class IStructuredTextTarget;
struct IDcc {
static IDcc *get();
virtual bool load(QString name)=0; // load and preprocess -> find entry point
virtual QDir installDir()=0;
virtual QDir dataDir(QString kind)=0;
};

View File

@@ -1,55 +1,45 @@
/****************************************************************************
* dcc project disassembler header
* (C) Mike van Emmerik
****************************************************************************/
/*
***************************************************************************
dcc project disassembler header
(C) Mike van Emmerik
***************************************************************************
*/
#pragma once
#include <sstream>
#include "bundle.h"
#include <memory>
#include <fstream>
#include <vector>
#include "bundle.h"
#include <QString>
#include <QTextStream>
struct LLInst;
class Function;
typedef std::shared_ptr<Function> PtrFunction;
struct Disassembler
{
protected:
int pass;
int g_lab;
//bundle &cCode;
std::ofstream m_fp;
int pass=0;
int g_lab=0;
QIODevice *m_disassembly_target=nullptr;
QTextStream m_fp;
std::vector<std::string> m_decls;
std::vector<std::string> m_code;
public:
Disassembler(int _p) : pass(_p)
{
g_lab=0;
}
public:
void disassem(Function *ppProc);
void disassem(Function *ppProc, int i);
void disassem(PtrFunction ppProc);
void disassem(PtrFunction ppProc, int i);
void dis1Line(LLInst &inst, int loc_ip, int pass);
};
/* Definitions for extended keys (first key is zero) */
#define EXT 0x100 /* "Extended" flag */
#ifdef __MSDOS__
#define KEY_DOWN EXT+'P'
#define KEY_LEFT EXT+'K'
#define KEY_UP EXT+'H'
#define KEY_RIGHT EXT+'M'
#define KEY_NPAGE EXT+'Q'
#define KEY_PPAGE EXT+'I'
#endif
#ifdef _CONSOLE
#define KEY_DOWN 0x50 /* Same as keypad scancodes */
#define KEY_LEFT 0x4B
#define KEY_UP 0x48
#define KEY_RIGHT 0x4D
#define KEY_NPAGE 0x51
#define KEY_PPAGE 0x49
#endif
#ifdef __UNIX__
#define KEY_DOWN EXT+'B'
#define KEY_LEFT EXT+'D'

View File

@@ -1,8 +1,11 @@
/***************************************************************************
/*
=**************************************************************************
* File : dosdcc.h
* Purpose : include file for files decompiled by dcc.
* Copyright (c) Cristina Cifuentes - QUT - 1992
**************************************************************************/
*************************************************************************
*/
/* Type definitions for intel 80x86 architecture */
typedef unsigned int uint16_t; /* 16 bits */

View File

@@ -1,7 +1,10 @@
/*****************************************************************************
/*
****************************************************************************
* Error codes
* (C) Cristina Cifuentes
****************************************************************************/
***************************************************************************
*/
#pragma once
/* These definitions refer to errorMessage in error.c */

View File

@@ -1,11 +1,14 @@
/*****************************************************************************
/*
****************************************************************************
* CFG, BB and interval related definitions
* ( C ) Cristina Cifuentes
****************************************************************************/
****************************************************************************
*/
#pragma once
#include <stdint.h>
#include <list>
#include <vector>
struct Function;
class Function;
/* Types of basic block nodes */
/* Real basic blocks: type defined according to their out-edges */
enum eBBKind
@@ -55,6 +58,7 @@ enum eNodeHeaderType
#define ELSE 1 /* else edge */
/* Basic Block (BB) flags */
#define INVALID_BB 0x0001 /* BB is not valid any more */
#define IS_LATCH_NODE 0x0002 /* BB is the latching node of a loop */
@@ -64,30 +68,24 @@ typedef std::list<BB *> queue;
struct interval
{
uint8_t numInt; /* # of the interval */
uint8_t numOutEdges; /* Number of out edges */
uint8_t numInt=0; /* # of the interval */
uint8_t numOutEdges=0; /* Number of out edges */
queue nodes; /* Nodes of the interval*/
queue::iterator currNode; /* Current node */
interval *next; /* Next interval */
interval * next=0; /* Next interval */
BB * firstOfInt();
interval()
{
numInt=numOutEdges=0;
currNode=nodes.end();
next=0;
interval() : currNode(nodes.end()){
}
void appendNodeInt(queue &pqH, BB *node);
};
/* Derived Sequence structure */
struct derSeq_Entry
{
BB * Gi; /* Graph pointer */
interval * Ii; /* Interval list of Gi */
derSeq_Entry() : Gi(0),Ii(0)
{
}
BB * Gi=nullptr; /* Graph pointer */
std::list<interval *> m_intervals;
interval * Ii=nullptr; /* Interval list of Gi */
~derSeq_Entry();
public:
void findIntervals(Function *c);

View File

@@ -3,35 +3,121 @@
* (C) Cristina Cifuentes
****************************************************************************/
#pragma once
#include "msvc_fixes.h"
#include "BinaryImage.h"
#include "libdis.h"
#include "Enums.h"
#include "state.h" // State depends on INDEXBASE, but later need STATE
#include "CallConvention.h"
#include <boost/range/iterator_range.hpp>
#include <QtCore/QString>
#include <memory>
#include <vector>
#include <list>
#include <bitset>
#include <llvm/ADT/ilist.h>
#include <llvm/ADT/ilist_node.h>
#include <llvm/CodeGen/MachineInstr.h>
#include <llvm/MC/MCInst.h>
#include <llvm/MC/MCAsmInfo.h>
#include <llvm/Value.h>
#include <llvm/Instruction.h>
#include <boost/range.hpp>
#include "libdis.h"
#include "Enums.h"
#include "state.h" // State depends on INDEXBASE, but later need STATE
#include <set>
#include <algorithm>
#include <initializer_list>
//enum condId;
struct LOCAL_ID;
struct BB;
struct Function;
class Function;
struct STKFRAME;
struct CIcodeRec;
class CIcodeRec;
struct ICODE;
struct bundle;
typedef std::list<ICODE>::iterator iICODE;
typedef std::list<ICODE>::reverse_iterator riICODE;
typedef boost::iterator_range<iICODE> rCODE;
extern std::bitset<32> duReg[30];
struct LivenessSet
{
std::set<eReg> registers;
public:
LivenessSet(const std::initializer_list<eReg> &init) : registers(init) {}
LivenessSet() {}
LivenessSet(const LivenessSet &other) : registers(other.registers)
{
}
void reset()
{
registers.clear();
}
// LivenessSet(LivenessSet &&other) : LivenessSet()
// {
// swap(*this,other);
// }
LivenessSet &operator=(LivenessSet other)
{
swap(*this,other);
return *this;
}
friend void swap(LivenessSet& first, LivenessSet& second) // nothrow
{
std::swap(first.registers, second.registers);
}
LivenessSet &operator|=(const LivenessSet &other)
{
registers.insert(other.registers.begin(),other.registers.end());
return *this;
}
LivenessSet &operator&=(const LivenessSet &other)
{
std::set<eReg> res;
std::set_intersection(registers.begin(),registers.end(),
other.registers.begin(),other.registers.end(),
std::inserter(res, res.end()));
registers = res;
return *this;
}
LivenessSet &operator-=(const LivenessSet &other)
{
std::set<eReg> res;
std::set_difference(registers.begin(),registers.end(),
other.registers.begin(),other.registers.end(),
std::inserter(res, res.end()));
registers = res;
return *this;
}
LivenessSet operator-(const LivenessSet &other) const
{
return LivenessSet(*this) -= other;
}
LivenessSet operator+(const LivenessSet &other) const
{
return LivenessSet(*this) |= other;
}
LivenessSet operator &(const LivenessSet &other) const
{
return LivenessSet(*this) &= other;
}
bool any() const
{
return not registers.empty();
}
bool operator==(const LivenessSet &other) const
{
return registers==other.registers;
}
bool operator!=(const LivenessSet &other) const { return not(*this==other);}
LivenessSet &setReg(int r);
LivenessSet &addReg(int r);
bool testReg(int r) const
{
return registers.find(eReg(r))!=registers.end();
}
bool testRegAndSubregs(int r) const;
LivenessSet &clrReg(int r);
private:
void postProcessCompositeRegs();
};
/* uint8_t and uint16_t registers */
/* Def/use of flags - low 4 bits represent flags */
@@ -45,14 +131,16 @@ struct DU
#define MAX_REGS_DEF 4 /* 2 regs def'd for long-reg vars */
struct COND_EXPR;
struct Expr;
struct AstIdent;
struct UnaryOperator;
struct HlTypeSupport
{
//hlIcode opcode; /* hlIcode opcode */
virtual bool removeRegFromLong(eReg regi, LOCAL_ID *locId)=0;
virtual std::string writeOut(Function *pProc, int *numLoc)=0;
virtual QString writeOut(Function *pProc, int *numLoc) const=0;
protected:
void performLongRemoval (eReg regi, LOCAL_ID *locId, COND_EXPR *tree);
Expr * performLongRemoval (eReg regi, LOCAL_ID *locId, Expr *tree);
};
struct CallType : public HlTypeSupport
@@ -61,41 +149,41 @@ struct CallType : public HlTypeSupport
Function * proc;
STKFRAME * args; // actual arguments
void allocStkArgs (int num);
bool newStkArg(COND_EXPR *exp, llIcode opcode, Function *pproc);
void placeStkArg(COND_EXPR *exp, int pos);
virtual COND_EXPR * toId();
bool newStkArg(Expr *exp, llIcode opcode, Function *pproc);
void placeStkArg(Expr *exp, int pos);
virtual Expr * toAst();
public:
bool removeRegFromLong(eReg regi, LOCAL_ID *locId)
bool removeRegFromLong(eReg /*regi*/, LOCAL_ID * /*locId*/)
{
printf("CallType : removeRegFromLong not supproted");
printf("CallType : removeRegFromLong not supproted\n");
return false;
}
std::string writeOut(Function *pProc, int *numLoc);
QString writeOut(Function *pProc, int *numLoc) const;
};
struct AssignType : public HlTypeSupport
{
/* for HLI_ASSIGN */
COND_EXPR *lhs;
COND_EXPR *rhs;
AssignType() : lhs(0),rhs(0) {}
bool removeRegFromLong(eReg regi, LOCAL_ID *locId)
{
performLongRemoval(regi,locId,lhs);
return true;
}
std::string writeOut(Function *pProc, int *numLoc);
protected:
public:
Expr *m_lhs;
Expr *m_rhs;
AssignType() {}
Expr *lhs() const {return m_lhs;}
void lhs(Expr *l);
bool removeRegFromLong(eReg regi, LOCAL_ID *locId);
QString writeOut(Function *pProc, int *numLoc) const;
};
struct ExpType : public HlTypeSupport
{
/* for HLI_JCOND, HLI_RET, HLI_PUSH, HLI_POP*/
COND_EXPR *v;
Expr *v;
ExpType() : v(0) {}
bool removeRegFromLong(eReg regi, LOCAL_ID *locId)
{
performLongRemoval(regi,locId,v);
v=performLongRemoval(regi,locId,v);
return true;
}
std::string writeOut(Function *pProc, int *numLoc);
QString writeOut(Function *pProc, int *numLoc) const;
};
struct HLTYPE
@@ -106,35 +194,27 @@ public:
hlIcode opcode; /* hlIcode opcode */
AssignType asgn;
CallType call;
HlTypeSupport *get()
HlTypeSupport *get();
const HlTypeSupport *get() const
{
switch(opcode)
{
case HLI_ASSIGN: return &asgn;
case HLI_RET:
case HLI_POP:
case HLI_JCOND:
case HLI_PUSH: return &exp;
case HLI_CALL: return &call;
default:
return 0;
}
return const_cast<const HlTypeSupport *>(const_cast<HLTYPE*>(this)->get());
}
void expr(COND_EXPR *e)
void expr(Expr *e)
{
assert(e);
exp.v=e;
}
void replaceExpr(COND_EXPR *e)
Expr *getMyExpr()
{
assert(e);
delete exp.v;
exp.v=e;
if(opcode==HLI_CALL)
return call.toAst();
return expr();
}
COND_EXPR * expr() { return exp.v;}
const COND_EXPR * const expr() const { return exp.v;}
void set(hlIcode i,COND_EXPR *e)
void replaceExpr(Expr *e);
Expr * expr() { return exp.v;}
const Expr * expr() const { return exp.v;}
void set(hlIcode i,Expr *e)
{
if(i!=HLI_RET)
assert(e);
@@ -142,17 +222,12 @@ public:
opcode=i;
exp.v=e;
}
void set(COND_EXPR *l,COND_EXPR *r)
{
assert(l);
assert(r);
opcode = HLI_ASSIGN;
assert((asgn.lhs==0) and (asgn.rhs==0)); //prevent memory leaks
asgn.lhs=l;
asgn.rhs=r;
}
void set(Expr *l,Expr *r);
void setCall(Function *proc);
HLTYPE(hlIcode op=HLI_INVALID) : opcode(op)
{}
// HLTYPE() // help valgrind find uninitialized HLTYPES
// {}
HLTYPE & operator=(const HLTYPE &l)
{
exp = l.exp;
@@ -162,164 +237,202 @@ public:
return *this;
}
public:
std::string write1HlIcode(Function *pProc, int *numLoc);
void setAsgn(COND_EXPR *lhs, COND_EXPR *rhs);
QString write1HlIcode(Function *pProc, int *numLoc) const;
void setAsgn(Expr *lhs, Expr *rhs);
} ;
/* LOW_LEVEL icode operand record */
struct LLOperand
{
llvm::MCOperand llvm_op;
eReg seg; /* CS, DS, ES, SS */
eReg segOver; /* CS, DS, ES, SS if segment override */
int16_t segValue; /* Value of segment seg during analysis */
eReg regi; /* 0 < regs < INDEXBASE <= index modes */
int16_t off; /* memory address offset */
uint32_t opz; /* idx of immed src op */
//union {/* Source operand if (flg & I) */
bool immed;
bool is_offset; // set by jumps
bool is_compound;
size_t width;
/* Source operand if (flg & I) */
struct { /* Call & # actual arg bytes */
Function *proc; /* pointer to target proc (for CALL(F))*/
int cb; /* # actual arg bytes */
} proc;
LLOperand() : seg(rUNDEF),segValue(0),segOver(rUNDEF),regi(rUNDEF),off(0),opz(0)
LLOperand() : seg(rUNDEF),segOver(rUNDEF),segValue(0),regi(rUNDEF),off(0),
opz(0),immed(0),is_offset(false),is_compound(0),width(0)
{
proc.proc=0;
proc.cb=0;
}
LLOperand(eReg r,size_t w) : LLOperand()
{
regi=r;
width=w;
}
bool operator==(const LLOperand &with) const
{
return (seg==with.seg) and
(segOver==with.segOver) and
(segValue==with.segValue) and
(regi == with.regi) and
(off == with.off) and
(opz==with.opz) and
(proc.proc==with.proc.proc);
}
int64_t getImm2() const {return opz;}
void SetImmediateOp(uint32_t dw)
{
opz=dw;
}
eReg getReg2() {return regi;}
eReg getReg2() const {return regi;}
bool isReg() const;
static LLOperand CreateImm2(int64_t Val)
static LLOperand CreateImm2(int64_t Val,uint8_t wdth=2)
{
LLOperand Op;
//Op.Kind = kImmediate;
//Op.ImmVal = Val;
Op.immed=true;
Op.opz = Val;
Op.width = wdth;
return Op;
}
static LLOperand CreateReg2(unsigned Val)
{
LLOperand Op;
// Op.Kind = kRegister;
// Op.RegVal = Reg;
Op.regi = (eReg)Val;
return Op;
}
void addProcInformation(int param_count,uint32_t call_conv);
bool isSet() const
{
return not (*this == LLOperand());
}
void addProcInformation(int param_count, CConv::CC_Type call_conv);
bool isImmediate() const { return immed;}
void setImmediate(bool x) { immed=x;}
bool compound() const {return is_compound;} // dx:ax pair
size_t byteWidth() const { assert(width<=4); return width;}
};
struct LLInst : public llvm::MCInst //: public llvm::ilist_node<LLInst>
struct LLInst
{
protected:
uint32_t m_opcode; // Low level opcode identifier
uint32_t flg; /* icode flags */
LLOperand m_src; /* source operand */
public:
int codeIdx; /* Index into cCode.code */
uint8_t numBytes; /* Number of bytes this instr */
uint32_t label; /* offset in image (20-bit adr) */
LLOperand dst; /* destination operand */
LLOperand m_dst; /* destination operand */
DU flagDU; /* def/use of flags */
int caseEntry;
std::vector<uint32_t> caseTbl2;
int hllLabNum; /* label # for hll codegen */
uint32_t getOpcode() const { return m_opcode;}
void setOpcode(uint32_t op) { m_opcode=op; }
bool conditionalJump()
{
return (getOpcode() >= iJB) && (getOpcode() < iJCXZ);
return (getOpcode() >= iJB) and (getOpcode() < iJCXZ);
}
bool testFlags(uint32_t x) const { return (flg & x)!=0;}
void setFlags(uint32_t flag) {flg |= flag;}
void clrFlags(uint32_t flag)
{
if(getOpcode()==iMOD)
{
assert(false);
}
flg &= ~flag;
}
void clrFlags(uint32_t flag);
uint32_t getFlag() const {return flg;}
//llIcode getOpcode() const { return opcode; }
uint32_t GetLlLabel() const { return label;}
void SetImmediateOp(uint32_t dw) {m_src.SetImmediateOp(dw);}
bool match(llIcode op)
{
return (getOpcode()==op);
}
bool matchWithRegDst(llIcode op)
{
return match(op) and m_dst.isReg();
}
bool match(llIcode op,eReg dest)
{
return (getOpcode()==op)&&dst.regi==dest;
return match(op) and match(dest);
}
bool match(llIcode op,eReg dest,uint32_t flgs)
{
return (getOpcode()==op) and (dst.regi==dest) and testFlags(flgs);
return match(op) and match(dest) and testFlags(flgs);
}
bool match(llIcode op,eReg dest,eReg src_reg)
{
return (getOpcode()==op)&&(dst.regi==dest)&&(m_src.regi==src_reg);
return match(op) and match(dest) and (m_src.regi==src_reg);
}
bool match(eReg dest,eReg src_reg)
{
return (dst.regi==dest)&&(m_src.regi==src_reg);
return match(dest) and (m_src.regi==src_reg);
}
bool matchAny(std::initializer_list<llIcode> ops) {
for(llIcode op : ops) {
if(match(op))
return true;
}
return false;
}
bool match(eReg dest)
{
return (dst.regi==dest);
return (m_dst.regi==dest);
}
bool match(llIcode op,uint32_t flgs)
{
return (getOpcode()==op) and testFlags(flgs);
return match(op) and testFlags(flgs);
}
void set(llIcode op,uint32_t flags)
{
setOpcode(op);
flg =flags;
}
void set(llIcode op,uint32_t flags,eReg dst_reg)
{
setOpcode(op);
m_dst = LLOperand::CreateReg2(dst_reg);
flg =flags;
}
void set(llIcode op,uint32_t flags,eReg dst_reg,const LLOperand &src_op)
{
setOpcode(op);
m_dst = LLOperand::CreateReg2(dst_reg);
m_src = src_op;
flg =flags;
}
void emitGotoLabel(int indLevel);
void findJumpTargets(CIcodeRec &_pc);
void writeIntComment(std::ostringstream &s);
void writeIntComment(QTextStream & s);
void dis1Line(int loc_ip, int pass);
std::ostringstream &strSrc(std::ostringstream &os,bool skip_comma=false);
void flops(std::ostringstream &out);
void flops(QTextStream & out);
bool isJmpInst();
HLTYPE toHighLevel(COND_EXPR *lhs, COND_EXPR *rhs, Function *func);
HLTYPE createCall();
LLInst(ICODE *container) : flg(0),codeIdx(0),numBytes(0),m_link(container)
{
setOpcode(0);
}
const LLOperand & dst() const { return m_dst; }
LLOperand & dst() { return m_dst; }
const LLOperand & src() const { return m_src; }
LLOperand & src() { return m_src; }
void replaceSrc(const LLOperand &with)
{
m_src = with;
}
void replaceSrc(eReg r)
{
m_src = LLOperand::CreateReg2(r);
}
void replaceSrc(int64_t r)
{
m_src = LLOperand::CreateImm2(r);
}
void replaceDst(const LLOperand &with)
{
dst = with;
}
void replaceDst(eReg r)
{
dst = LLOperand::CreateReg2(r);
}
ICODE *m_link;
void replaceSrc(const LLOperand &with) { m_src = with; }
void replaceSrc(eReg r) { m_src = LLOperand::CreateReg2(r); }
void replaceSrc(int64_t r) { m_src = LLOperand::CreateImm2(r); }
void replaceDst(const LLOperand &with) { m_dst = with; }
bool srcIsImmed() const { return (flg & I)!=0; }
condId idType(opLoc sd) const;
const LLOperand * get(opLoc sd) const { return (sd == SRC) ? &src() : &dst; }
LLOperand * get(opLoc sd) { return (sd == SRC) ? &src() : &dst; }
const LLOperand * get(opLoc sd) const { return (sd == SRC) ? &src() : &m_dst; }
LLOperand * get(opLoc sd) { return (sd == SRC) ? &src() : &m_dst; }
ICODE * m_link;
};
struct ADDRESS {
};
struct BinaryArea {
ADDRESS start;
ADDRESS fin;
};
#include <boost/icl/interval_set.hpp>
#include <boost/icl/interval_map.hpp>
/* Icode definition: LOW_LEVEL and HIGH_LEVEL */
struct ICODE
@@ -327,7 +440,7 @@ struct ICODE
// use llvm names at least
typedef BB MachineBasicBlock;
protected:
LLInst m_ll;
LLInst *m_ll;
HLTYPE m_hl;
MachineBasicBlock * Parent; /* BB to which this icode belongs */
bool invalid; /* Has no HIGH_LEVEL equivalent */
@@ -342,17 +455,17 @@ public:
template<int TYPE>
struct TypeFilter
{
bool operator()(ICODE *ic) {return ic->type==HIGH_LEVEL;}
bool operator()(ICODE &ic) {return ic.type==HIGH_LEVEL;}
bool operator()(ICODE *ic) {return ic->type==TYPE;}
bool operator()(ICODE &ic) {return ic.type==TYPE;}
};
template<int TYPE>
struct TypeAndValidFilter
{
bool operator()(ICODE *ic) {return (ic->type==HIGH_LEVEL)&&(ic->valid());}
bool operator()(ICODE &ic) {return (ic.type==HIGH_LEVEL)&&ic.valid();}
bool operator()(ICODE *ic) {return (ic->type==TYPE) and (ic->valid());}
bool operator()(ICODE &ic) {return (ic.type==TYPE) and ic.valid();}
};
static TypeFilter<HIGH_LEVEL> select_high_level;
static TypeAndValidFilter<HIGH_LEVEL> select_valid_high_level;
static TypeFilter<HIGH_LEVEL_ICODE> select_high_level;
static TypeAndValidFilter<HIGH_LEVEL_ICODE> select_valid_high_level;
/* Def/Use of registers and stack variables */
struct DU_ICODE
{
@@ -362,18 +475,21 @@ public:
use.reset();
lastDefRegi.reset();
}
std::bitset<32> def; // For Registers: position in bitset is reg index
std::bitset<32> use; // For Registers: position in uint32_t is reg index
std::bitset<32> lastDefRegi;// Bit set if last def of this register in BB
LivenessSet def; // For Registers: position in bitset is reg index
LivenessSet use; // For Registers: position in uint32_t is reg index
LivenessSet lastDefRegi;// Bit set if last def of this register in BB
void addDefinedAndUsed(eReg r)
{
def |= duReg[r];
use |= duReg[r];
def.addReg(r);
use.addReg(r);
}
};
struct DU1
{
protected:
int numRegsDef; /* # registers defined by this inst */
public:
struct Use
{
int Reg; // used register
@@ -385,11 +501,10 @@ public:
if(iter==uses.end())
return;
uses.erase(iter);
assert("Same user more then once!" && uses.end()==std::find(uses.begin(),uses.end(),us));
assert("Same user more then once!" and uses.end()==std::find(uses.begin(),uses.end(),us));
}
};
int numRegsDef; /* # registers defined by this inst */
uint8_t regi[MAX_REGS_DEF+1]; /* registers defined by this inst */
Use idx[MAX_REGS_DEF+1];
//int idx[MAX_REGS_DEF][MAX_USES]; /* inst that uses this def */
@@ -414,6 +529,11 @@ public:
Use &u(idx[regIdx]);
u.removeUser(ic);
}
int getNumRegsDef() const {return numRegsDef;}
void clearAllDefs() {numRegsDef=0;}
DU1 &addDef(eReg r) {numRegsDef++; return *this;}
DU1 &setDef(eReg r) {numRegsDef=1; return *this;}
void removeDef(eReg r) {numRegsDef--;}
DU1() : numRegsDef(0)
{
}
@@ -423,11 +543,19 @@ public:
DU1 du1; /* du chain 1 */
int loc_ip; // used by CICodeRec to number ICODEs
LLInst * ll() { return &m_ll;}
const LLInst * ll() const { return &m_ll;}
LLInst * ll() { return m_ll;}
const LLInst * ll() const { return m_ll;}
HLTYPE * hl() { return &m_hl;}
const HLTYPE * hl() const { return &m_hl;}
HLTYPE * hlU() {
// assert(type==HIGH_LEVEL);
// assert(m_hl.opcode!=HLI_INVALID);
return &m_hl;
}
const HLTYPE * hl() const {
// assert(type==HIGH_LEVEL);
// assert(m_hl.opcode!=HLI_INVALID);
return &m_hl;
}
void hl(const HLTYPE &v) { m_hl=v;}
void setRegDU(eReg regi, operDu du_in);
@@ -437,13 +565,13 @@ public:
condId idType(opLoc sd);
// HLL setting functions
// set this icode to be an assign
void setAsgn(COND_EXPR *lhs, COND_EXPR *rhs)
void setAsgn(Expr *lhs, Expr *rhs)
{
type=HIGH_LEVEL;
hl()->setAsgn(lhs,rhs);
type=HIGH_LEVEL_ICODE;
hlU()->setAsgn(lhs,rhs);
}
void setUnary(hlIcode op, COND_EXPR *_exp);
void setJCond(COND_EXPR *cexp);
void setUnary(hlIcode op, Expr *_exp);
void setJCond(Expr *cexp);
void emitGotoLabel(int indLevel);
void copyDU(const ICODE &duIcode, operDu _du, operDu duDu);
@@ -452,12 +580,50 @@ public:
public:
bool removeDefRegi(eReg regi, int thisDefIdx, LOCAL_ID *locId);
void checkHlCall();
bool newStkArg(COND_EXPR *exp, llIcode opcode, Function *pproc)
bool newStkArg(Expr *exp, llIcode opcode, Function *pproc)
{
return hl()->call.newStkArg(exp,opcode,pproc);
return hlU()->call.newStkArg(exp,opcode,pproc);
}
ICODE() : m_ll(this),type(NOT_SCANNED),Parent(0),loc_ip(0),invalid(false)
ICODE() :Parent(0),invalid(false),type(NOT_SCANNED_ICODE),loc_ip(0)
{
m_ll = new LLInst(this);
}
~ICODE() {
delete m_ll;
}
ICODE(const ICODE &v) {
m_ll = new LLInst(*v.m_ll);
m_hl = v.m_hl;
Parent = v.Parent;
insn = v.insn;
type = v.type;
du = v.du;
du1 = v.du1;
loc_ip = v.loc_ip;
}
ICODE & operator=(const ICODE &v) {
delete m_ll;
m_ll = v.m_ll;
m_hl = v.m_hl;
Parent = v.Parent;
insn = v.insn;
type = v.type;
du = v.du;
du1 = v.du1;
loc_ip = v.loc_ip;
return *this;
}
ICODE & operator=(ICODE &&v) {
std::swap(m_ll,v.m_ll);
std::swap(m_hl,v.m_hl);
std::swap(Parent , v.Parent);
std::swap(insn , v.insn);
std::swap(type , v.type);
std::swap(du , v.du);
std::swap(du1 , v.du1);
std::swap(loc_ip , v.loc_ip);
return *this;
}
public:
const MachineBasicBlock* getParent() const { return Parent; }
@@ -467,24 +633,23 @@ public:
};
/** Map n low level instructions to m high level instructions
*/
struct MappingLLtoML
{
typedef llvm::iplist<llvm::Instruction> InstListType;
typedef boost::iterator_range<iICODE> rSourceRange;
typedef boost::iterator_range<InstListType::iterator> rTargetRange;
rSourceRange m_low_level;
rTargetRange m_middle_level;
};
//struct MappingLLtoML
//{
// typedef boost::iterator_range<iICODE> rSourceRange;
// typedef boost::iterator_range<InstListType::iterator> rTargetRange;
// rSourceRange m_low_level;
// rTargetRange m_middle_level;
//};
// This is the icode array object.
class CIcodeRec : public std::list<ICODE>
{
public:
CIcodeRec(); // Constructor
ICODE * addIcode(ICODE *pIcode);
ICODE * addIcode(const ICODE * pIcode);
void SetInBB(rCODE &rang, BB* pnewBB);
bool labelSrch(uint32_t target, uint32_t &pIndex);
iterator labelSrch(uint32_t target);
ICODE * GetIcode(int ip);
ICODE * GetIcode(size_t ip);
bool alreadyDecoded(uint32_t target);
};

View File

@@ -37,6 +37,8 @@ struct Idiom18 : public Idiom
protected:
iICODE m_icodes[4];
bool m_is_dec;
/* type of variable: 1 = reg-var, 2 = local */
int m_idiom_type;
public:
Idiom18(Function *f) : Idiom(f)
{
@@ -64,7 +66,7 @@ struct Idiom20 : public Idiom
{
protected:
iICODE m_icodes[4];
bool m_is_dec;
condNodeType m_is_dec;
public:
Idiom20(Function *f) : Idiom(f)
{

11
include/loader.h Normal file
View File

@@ -0,0 +1,11 @@
#pragma once
class ILoader
{
};
class LoaderManger
{
};

View File

@@ -6,19 +6,23 @@
*/
#pragma once
#include "msvc_fixes.h"
#include "types.h"
#include "Enums.h"
#include "machine_x86.h"
#include <QtCore/QString>
#include <stdint.h>
#include <vector>
#include <list>
#include <set>
#include <algorithm>
#include "types.h"
#include "Enums.h"
#include "machine_x86.h"
/* Type definition */
// this array has to stay in-order of addition i.e. not std::set<iICODE,std::less<iICODE> >
// TODO: why ?
struct COND_EXPR;
struct Expr;
struct AstIdent;
struct ICODE;
struct LLInst;
typedef std::list<ICODE>::iterator iICODE;
@@ -30,37 +34,67 @@ struct IDX_ARRAY : public std::vector<iICODE>
}
};
typedef enum
enum frameType
{
STK_FRAME, /* For stack vars */
REG_FRAME, /* For register variables */
GLB_FRAME /* For globals */
} frameType;
};
typedef struct
struct BWGLB_TYPE
{
int16_t seg; /* segment value */
int16_t off; /* offset */
eReg regi; /* optional indexed register */
} BWGLB_TYPE;
typedef struct
{ /* For TYPE_LONG_(UN)SIGN on the stack */
int offH; /* high offset from BP */
int offL; /* low offset from BP */
} LONG_STKID_TYPE;
struct LONGID_TYPE
{ /* For TYPE_LONG_(UN)SIGN registers */
eReg h; /* high register */
eReg l; /* low register */
bool srcDstRegMatch(iICODE a,iICODE b) const;
} ;
/* For TYPE_LONG_(UN)SIGN on the stack */
struct LONG_STKID_TYPE
{
int offH; /* high offset from BP */
int offL; /* low offset from BP */
LONG_STKID_TYPE(int h,int l) : offH(h),offL(l) {}
};
/* For TYPE_LONG_(UN)SIGN registers */
struct LONGID_TYPE
{
protected:
eReg m_h; /* high register */
eReg m_l; /* low register */
public:
void set(eReg highpart,eReg lowpart)
{
m_h = highpart;
m_l = lowpart;
}
eReg l() const { return m_l; }
eReg h() const { return m_h; }
bool srcDstRegMatch(iICODE a,iICODE b) const;
LONGID_TYPE() {} // uninitializing constructor to help valgrind catch uninit accesses
LONGID_TYPE(eReg h,eReg l) : m_h(h),m_l(l) {}
};
struct LONGGLB_TYPE /* For TYPE_LONG_(UN)SIGN globals */
{
int16_t seg; /* segment value */
int16_t offH; /* offset high */
int16_t offL; /* offset low */
uint8_t regi; /* optional indexed register */
LONGGLB_TYPE(int16_t _seg,int16_t _H,int16_t _L,int8_t _reg=0)
{
seg=_seg;
offH=_H;
offL=_L;
regi=_reg;
}
};
/* ID, LOCAL_ID */
struct ID
{
protected:
LONGID_TYPE m_longId; /* For TYPE_LONG_(UN)SIGN registers */
public:
hlType type; /* Probable type */
bool illegal; /* Boolean: not a valid field any more */
//std::vector<iICODE> idx;
@@ -68,38 +102,42 @@ struct ID
frameType loc; /* Frame location */
bool hasMacro; /* Identifier requires a macro */
char macro[10]; /* Macro for this identifier */
std::string name; /* Identifier's name */
union { /* Different types of identifiers */
eReg regi; /* For TYPE_BYTE(uint16_t)_(UN)SIGN registers */
struct { /* For TYPE_BYTE(uint16_t)_(UN)SIGN on the stack */
QString name; /* Identifier's name */
union ID_UNION { /* Different types of identifiers */
LONG_STKID_TYPE longStkId; /* For TYPE_LONG_(UN)SIGN on the stack */
eReg regi; /* For TYPE_BYTE(WORD)_(UN)SIGN registers */
struct { /* For TYPE_BYTE(WORD)_(UN)SIGN on the stack */
uint8_t regOff; /* register offset (if any) */
int off; /* offset from BP */
} bwId;
BWGLB_TYPE bwGlb; /* For TYPE_BYTE(uint16_t)_(UN)SIGN globals */
LONGID_TYPE longId; /* For TYPE_LONG_(UN)SIGN registers */
LONG_STKID_TYPE longStkId; /* For TYPE_LONG_(UN)SIGN on the stack */
struct { /* For TYPE_LONG_(UN)SIGN globals */
int16_t seg; /* segment value */
int16_t offH; /* offset high */
int16_t offL; /* offset low */
uint8_t regi; /* optional indexed register */
} longGlb;
LONGGLB_TYPE longGlb;
struct { /* For TYPE_LONG_(UN)SIGN constants */
uint32_t h; /* high uint16_t */
uint32_t l; /* low uint16_t */
} longKte;
ID_UNION() { /*new (&longStkId) LONG_STKID_TYPE();*/}
} id;
LONGID_TYPE & longId() {assert(isLong() and loc==REG_FRAME); return m_longId;}
const LONGID_TYPE & longId() const {assert(isLong() and loc==REG_FRAME); return m_longId;}
LONG_STKID_TYPE & longStkId() {assert(isLong() and loc==STK_FRAME); return id.longStkId;}
const LONG_STKID_TYPE & longStkId() const {assert(isLong() and loc==STK_FRAME); return id.longStkId;}
ID();
ID(hlType t, frameType f);
bool isSigned() const { return (type==TYPE_BYTE_SIGN)||(type==TYPE_WORD_SIGN)||(type==TYPE_LONG_SIGN);}
ID(hlType t, const LONGID_TYPE &s);
ID(hlType t, const LONG_STKID_TYPE &s);
ID(hlType t, const LONGGLB_TYPE &s);
bool isSigned() const { return (type==TYPE_BYTE_SIGN) or (type==TYPE_WORD_SIGN) or (type==TYPE_LONG_SIGN);}
uint16_t typeBitsize() const
{
return TypeContainer::typeSize(type)*8;
}
bool isLong() const { return (type==TYPE_LONG_UNSIGN) or (type==TYPE_LONG_SIGN); }
void setLocalName(int i)
{
char buf[32];
sprintf (buf, "loc%ld", i);
sprintf (buf, "loc%d", i);
name=buf;
}
};
@@ -121,18 +159,18 @@ public:
std::vector<ID>::iterator end() {return id_arr.end();}
int newByteWordReg(hlType t, eReg regi);
int newByteWordStk(hlType t, int off, uint8_t regOff);
int newIntIdx(int16_t seg, int16_t off, eReg regi, int ix, hlType t);
int newLongReg(hlType t, eReg regH, eReg regL, iICODE ix_);
int newIntIdx(int16_t seg, int16_t off, eReg regi, hlType t);
int newLongReg(hlType t, const LONGID_TYPE &longT, iICODE ix_);
int newLong(opLoc sd, iICODE pIcode, hlFirst f, iICODE ix, operDu du, int off);
int newLong(opLoc sd, iICODE pIcode, hlFirst f, iICODE ix, operDu du, LLInst &atOffset);
void newIdent(hlType t, frameType f);
void flagByteWordId(int off);
void propLongId(uint8_t regL, uint8_t regH, const char *name);
void propLongId(uint8_t regL, uint8_t regH, const QString & name);
size_t csym() const {return id_arr.size();}
void newRegArg(iICODE picode, iICODE ticode) const;
void processTargetIcode(iICODE picode, int &numHlIcodes, iICODE ticode, bool isLong) const;
void forwardSubs(COND_EXPR *lhs, COND_EXPR *rhs, iICODE picode, iICODE ticode, int &numHlIcodes) const;
COND_EXPR *createId(const ID *retVal, iICODE ix_);
void forwardSubs(Expr *lhs, Expr *rhs, iICODE picode, iICODE ticode, int &numHlIcodes) const;
AstIdent *createId(const ID *retVal, iICODE ix_);
};

View File

@@ -1,9 +1,12 @@
#pragma once
#include <QtCore/QString>
#include <stdint.h>
#include <string>
#include <sstream>
#include <bitset>
class QTextStream;
struct LivenessSet;
/* Machine registers */
enum eReg
{
@@ -32,8 +35,9 @@ enum eReg
rBH = 20,
rTMP= 21, /* temp register for DIV/IDIV/MOD */
rTMP2= 22, /* temp register for DIV/IDIV/MOD */
/* Indexed modes go from INDEXBASE to INDEXBASE+7 */
INDEX_BX_SI = 22, // "bx+si"
INDEX_BX_SI = 23, // "bx+si"
INDEX_BX_DI, // "bx+di"
INDEX_BP_SI, // "bp+si"
INDEX_BP_DI, // "bp+di"
@@ -58,25 +62,20 @@ class Machine_X86 : public SourceMachine
public:
Machine_X86();
virtual ~Machine_X86() {}
static const std::string &regName(eReg r);
static const std::string &opcodeName(unsigned r);
static const std::string &floatOpName(unsigned r);
static const QString & regName(eReg r);
static const QString & opcodeName(unsigned r);
static const QString & floatOpName(unsigned r);
bool physicalReg(eReg r);
/* Writes the registers that are set in the bitvector */
//TODO: move this into Machine_X86 ?
static void writeRegVector (std::ostream &ostr,const std::bitset<32> &regi)
{
int j;
for (j = rAX; j < INDEX_BX_SI; j++)
{
if (regi.test(j-1))
ostr << regName(eReg(j))<<" ";
}
}
static eReg subRegH(eReg reg); //TODO: move these into machine_x86
static void writeRegVector (QTextStream & ostr, const LivenessSet &regi);
static eReg subRegH(eReg reg);
static eReg subRegL(eReg reg);
static bool isMemOff(eReg r);
static bool isSubRegisterOf(eReg reg, eReg parent);
static bool hasSubregisters(eReg reg);
static bool isPartOfComposite(eReg reg);
static eReg compositeParent(eReg reg);
};

3
include/msvc_fixes.h Normal file
View File

@@ -0,0 +1,3 @@
#ifdef _MSC_VER
#include <iso646.h>
#endif

View File

@@ -1,41 +0,0 @@
#pragma once
/* Perfect hashing function library. Contains functions to generate perfect
hashing functions
* (C) Mike van Emmerik
*/
//#define bool unsigned char
#define uint8_t unsigned char
#define uint16_t unsigned short
/* Prototypes */
void hashCleanup(void); /* Frees memory allocated by hashParams() */
void map(void); /* Part 1 of creating the tables */
/* The application must provide these functions: */
void getKey(int i, uint8_t **pKeys);/* Set *keys to point to the i+1th key */
void dispKey(int i); /* Display the key */
class PatternHasher
{
uint16_t *T1base, *T2base; /* Pointers to start of T1, T2 */
int NumEntry; /* Number of entries in the hash table (# keys) */
int EntryLen; /* Size (bytes) of each entry (size of keys) */
int SetSize; /* Size of the char set */
char SetMin; /* First char in the set */
int NumVert; /* c times NumEntry */
int *graphNode; /* The array of edges */
int *graphNext; /* Linked list of edges */
int *graphFirst;/* First edge at a vertex */
public:
uint16_t *readT1(void); /* Returns a pointer to the T1 table */
uint16_t *readT2(void); /* Returns a pointer to the T2 table */
uint16_t *readG(void); /* Returns a pointer to the g table */
void init(int _NumEntry, int _EntryLen, int _SetSize, char _SetMin,int _NumVert); /* Set the parameters for the hash table */
void cleanup();
int hash(unsigned char *string); //!< Hash the string to an int 0 .. NUMENTRY-1
};
extern PatternHasher g_pattern_hasher;
/* Macro reads a LH uint16_t from the image regardless of host convention */
#ifndef LH
#define LH(p) ((int)((uint8_t *)(p))[0] + ((int)((uint8_t *)(p))[1] << 8))
#endif

View File

@@ -1,77 +1,174 @@
#pragma once
#include <string>
#include <stdint.h>
#include <cassert>
#include <list>
#include <llvm/ADT/ilist.h>
#include "symtab.h"
#include "BinaryImage.h"
struct Function;
struct SourceMachine;
#include "Procedure.h"
#include "state.h"
#include "src/Command.h"
#include <boost/icl/interval.hpp>
#include <boost/icl/interval_map.hpp>
#include <boost/icl/split_interval_map.hpp>
#include <QtCore/QString>
#include <list>
#include <unordered_set>
#include <unordered_map>
#include <string>
#include <stdint.h>
#include <assert.h>
class QString;
class SourceMachine;
struct CALL_GRAPH;
typedef llvm::iplist<Function> FunctionListType;
typedef FunctionListType lFunction;
typedef lFunction::iterator ilFunction;
struct Project
struct DosLoader;
struct SegOffAddr {
uint16_t seg;
uint32_t addr;
};
enum CompilerVendor {
eUnknownVendor=0,
eBorland,
eMicrosoft,
eLogitech,
};
enum CompilerLanguage {
eUnknownLanguage=0,
eAnsiCorCPP,
ePascal,
eModula2
};
enum CompilerMemoryModel {
eUnknownMemoryModel=0,
eTiny,
eSmall,
eCompact,
eMedium,
eLarge
};
struct LoaderMetadata {
CompilerVendor compiler_vendor;
CompilerLanguage compiler_language;
CompilerMemoryModel compiler_memory_model;
int compiler_version;
QString compilerId() const {
switch(compiler_vendor) {
case eBorland:
switch(compiler_language) {
case eUnknownLanguage:
return QString("bx") + codeModelChar();
case eAnsiCorCPP:
return QString("b%1%2").arg(compiler_version).arg(codeModelChar());
case ePascal:
return QString("tp%1").arg(compiler_version);
default:
return "xxx";
}
case eMicrosoft:
assert(compiler_language==eAnsiCorCPP);
return QString("m%1%2").arg(compiler_version).arg(codeModelChar());
case eLogitech:
assert(compiler_language==eModula2);
return QString("l%1%2").arg(compiler_version).arg(codeModelChar());
case eUnknownVendor:
return "xxx";
}
return "xxx";
}
QChar codeModelChar() const {
switch(compiler_memory_model) {
case eUnknownMemoryModel: return 'x';
case eTiny: return 't';
case eSmall: return 's';
case eCompact: return 'c';
case eMedium: return 'm';
case eLarge: return 'l';
}
return 'x';
}
};
class Project : public QObject
{
SYMTAB symtab; /* Global symbol table */
Q_OBJECT
public:
DosLoader * m_selected_loader;
bool m_metadata_available=false;
LoaderMetadata m_loader_data;
uint32_t SynthLab; //!< Last snthetic lab idx
SYMTAB symtab; //!< Global symbol table
FunctionListType pProcList; //!< List of located functions
CALL_GRAPH * callGraph; //!< Pointer to the head of the call graph
STATE m_entry_state; //!< Machine state at program load
std::string m_fname;
FunctionListType pProcList;
CALL_GRAPH * callGraph; /* Pointer to the head of the call graph */
PROG prog; /* Loaded program image parameters */
Project() {}
// no copies
CommandStream m_project_command_stream;
std::unordered_map<PtrFunction,CommandStream> m_function_streams;
bool m_error_state;
struct PatternLocator *m_pattern_locator;
public:
// prevent Project instance copying
Project(const Project&) = delete;
const Project & operator=(const Project & l) =delete;
// only moves
Project(Project && l)
{
m_fname =l.m_fname;
size_t before=l.pProcList.size();
pProcList.splice(pProcList.end(),l.pProcList);
callGraph=l.callGraph;
l.m_fname.clear();
l.pProcList.clear();
l.callGraph=0;
assert(before==pProcList.size());
}
Project &operator=(Project && l)
{
if(this == &l)
return *this;
m_fname =l.m_fname;
size_t before=l.pProcList.size();
pProcList.splice(pProcList.end(),l.pProcList);
callGraph=l.callGraph;
l.m_fname.clear();
l.pProcList.clear();
l.callGraph=0;
assert(before==pProcList.size());
return *this;
}
Project(); // default constructor,
public:
void create(const QString &a);
bool addLoadCommands(QString fname);
void processAllCommands();
void resetCommandsAndErrorState();
const QString & output_path() const {return m_output_path;}
const QString & project_name() const {return m_project_name;}
const QString & binary_path() const {return m_fname;}
QString output_name(const char *ext);
ilFunction funcIter(Function *to_find);
ilFunction findByEntry(uint32_t entry);
ilFunction createFunction();
PtrFunction findByEntry(uint32_t entry);
PtrFunction findByName(const QString &name);
PtrFunction createFunction(FunctionType *f, const QString & name, SegOffAddr addr);
bool valid(ilFunction iter);
int getSymIdxByAdd(uint32_t adr);
bool validSymIdx(size_t idx);
size_t symbolSize(size_t idx);
hlType symbolType(size_t idx);
const std::string &symbolName(size_t idx);
const QString & symbolName(size_t idx);
const SYM & getSymByIdx(size_t idx) const;
LoaderMetadata &getLoaderMetadata() { assert(m_metadata_available); return m_loader_data; }
void setLoaderMetadata(LoaderMetadata d) { m_loader_data = d; m_metadata_available=true;}
static Project * get();
PROG * binary() {return &prog;}
SourceMachine *machine();
const FunctionListType &functions() const { return pProcList; }
FunctionListType &functions() { return pProcList; }
bool addCommand(Command *cmd);
bool addCommand(PtrFunction f, Command *cmd); // Add function level command
bool hasCommands(const PtrFunction &f);
CommandStream *functionCommands(const PtrFunction &f);
void dumpAllErrors();
void setLoader(DosLoader *ins);
void processCommands(int count=1);
void processFunctionCommands(const PtrFunction & func, int count);
public slots:
void onCommandStreamFinished(bool state);
signals:
void newFunctionCreated(PtrFunction);
void functionUpdate(const PtrFunction &);
void loaderSelected();
void commandListChanged();
protected:
void initialize();
void writeGlobSymTable();
protected:
static Project * s_instance;
QString m_fname;
QString m_project_name;
QString m_output_path;
CommandContext m_command_ctx;
};
//extern Project g_proj;

View File

@@ -4,9 +4,9 @@
*/
#include <stdint.h>
#include "error.h"
/* Extracts reg bits from middle of mod-reg-rm uint8_t */
#define REG(x) ((uint8_t)(x & 0x38) >> 3)
//#define LH(p) ((int)((uint8_t *)(p))[0] + ((int)((uint8_t *)(p))[1] << 8))
struct ICODE;
/* Extracts reg bits from middle of mod-reg-rm uint8_t */
extern eErrorId scan(uint32_t ip, ICODE &p);

View File

@@ -3,15 +3,16 @@
* (C) Cristina Cifuentes, Mike van Emmerik
****************************************************************************/
#pragma once
#include <stdint.h>
#include <cstring>
#include "machine_x86.h"
#include <stdint.h>
#include <string.h>
/* STATE TABLE */
struct STATE
{
uint32_t IP; /* Offset into Image */
int16_t r[INDEX_BX_SI]; /* Value of segs and AX */
int16_t r[INDEX_BX_SI]; /* Register values */
bool f[INDEX_BX_SI]; /* True if r[.] has a value */
struct
{ /* For case stmt indexed reg */
@@ -30,6 +31,10 @@ struct STATE
memset(r,0,sizeof(int16_t)*INDEX_BX_SI); //TODO: move this to machine_x86
memset(f,0,sizeof(uint8_t)*INDEX_BX_SI);
}
void setMemoryByte(uint32_t addr,uint8_t val)
{
//TODO: make this into a full scale value tracking class !
}
};

View File

@@ -3,18 +3,26 @@
* (C) Mike van Emmerik
*/
#pragma once
#include <string>
#include <stdint.h>
#include "Enums.h"
#include "types.h"
struct COND_EXPR;
#include "msvc_fixes.h"
#include <QtCore/QString>
#include <string>
#include <vector>
#include <stdint.h>
class QTextStream;
struct Expr;
struct AstIdent;
struct TypeContainer;
/* * * * * * * * * * * * * * * * * */
/* Symbol table structs and protos */
/* * * * * * * * * * * * * * * * * */
struct SymbolCommon
{
std::string name; /* New name for this variable/symbol/argument */
QString name; /* New name for this variable/symbol/argument */
int size; /* Size/maximum size */
hlType type; /* probable type */
eDuVal duVal; /* DEF, USE, VAL */
@@ -23,30 +31,26 @@ struct SymbolCommon
};
struct SYM : public SymbolCommon
{
typedef uint32_t tLabel;
SYM() : label(0),flg(0)
{
}
int32_t label; /* physical address (20 bit) */
uint32_t label; /* physical address (20 bit) */
uint32_t flg; /* SEG_IMMED, IMPURE, WORD_OFF */
};
/* STACK FRAME */
struct STKSYM : public SymbolCommon
{
COND_EXPR *actual; /* Expression tree of actual parameter */
COND_EXPR *regs; /* For register arguments only */
int16_t label; /* Immediate off from BP (+:args, -:params) */
uint8_t regOff; /* Offset is a register (e.g. SI, DI) */
bool hasMacro; /* This type needs a macro */
std::string macro; /* Macro name */
bool invalid; /* Boolean: invalid entry in formal arg list*/
STKSYM()
{
actual=regs=0;
label=0;
regOff=0;
invalid=hasMacro = false;
}
typedef int16_t tLabel;
Expr * actual=0; /* Expression tree of actual parameter */
AstIdent * regs=0; /* For register arguments only */
tLabel label=0; /* Immediate off from BP (+:args, -:params) */
uint8_t regOff=0; /* Offset is a register (e.g. SI, DI) */
bool hasMacro=false; /* This type needs a macro */
QString macro; /* Macro name */
bool invalid=false; /* Boolean: invalid entry in formal arg list*/
int arrayMembers=1; // for local variables if >1 marks this stack symbol as an array
void setArgName(int i)
{
char buf[32];
@@ -60,13 +64,13 @@ class SymbolTableCommon : public std::vector<T>
public:
typedef typename std::vector<T>::iterator iterator;
typedef typename std::vector<T>::const_iterator const_iterator;
iterator findByLabel(int lab)
iterator findByLabel(typename T::tLabel lab)
{
auto iter = std::find_if(this->begin(),this->end(),
[lab](T &s)->bool {return s.label==lab;});
return iter;
}
const_iterator findByLabel(int lab) const
const_iterator findByLabel(typename T::tLabel lab) const
{
auto iter = std::find_if(this->begin(),this->end(),
[lab](const T &s)->bool {return s.label==lab;});
@@ -82,7 +86,7 @@ public:
void updateSymType(uint32_t symbol, const TypeContainer &tc);
SYM *updateGlobSym(uint32_t operand, int size, uint16_t duFlag, bool &inserted_new);
};
struct Function;
class Function;
struct SYMTABLE
{
std::string pSymName; /* Ptr to symbolic name or comment */
@@ -95,7 +99,7 @@ struct SYMTABLE
{
// does not yse pSymName, to ease finding by symOff/symProc combo
// in map<SYMTABLE,X>
return (symOff==other.symOff) && symProc==(other.symProc);
return (symOff==other.symOff) and symProc==(other.symProc);
}
};
@@ -108,6 +112,6 @@ enum tableType /* The table types */
void createSymTables(void);
void destroySymTables(void);
boolT readVal (std::ostringstream &symName, uint32_t symOff, Function *symProc);
bool readVal (QTextStream & symName, uint32_t symOff, Function *symProc);
void selectTable(tableType); /* Select a particular table */

View File

@@ -1,19 +1,21 @@
/****************************************************************************
/*
***************************************************************************
* dcc project general header
* (C) Cristina Cifuentes, Mike van Emmerik
****************************************************************************/
***************************************************************************
*/
#pragma once
#include "Enums.h"
#include "msvc_fixes.h"
#include <cassert>
#include <stdint.h>
#include "Enums.h"
#include <stdlib.h>
/**** Common definitions and macros ****/
#define MAX 0x7FFFFFFF
/* Type definitions used in the program */
typedef unsigned char byte; /* 8 bits */
typedef unsigned short word;/* 16 bits */
typedef short int16; /* 16 bits */
typedef unsigned char boolT; /* 8 bits */
#define SYNTHESIZED_MIN 0x100000 /* Synthesized labs use bits 21..32 */
@@ -22,17 +24,17 @@ typedef unsigned char boolT; /* 8 bits */
#define PATLEN 23 /* Length of proc patterns */
#define WILD 0xF4 /* The wild byte */
/****** MACROS *******/
/* MACROS */
/* Macro reads a LH word from the image regardless of host convention */
/* Returns a 16 bit quantity, e.g. C000 is read into an Int as C000 */
// Macro reads a LH word from the image regardless of host convention
// Returns a 16 bit quantity, e.g. C000 is read into an Int as C000
//#define LH(p) ((int16)((byte *)(p))[0] + ((int16)((byte *)(p))[1] << 8))
#define LH(p) ((word)((byte *)(p))[0] + ((word)((byte *)(p))[1] << 8))
#define LH(p) ((uint16_t)((uint8_t *)(p))[0] + ((uint16_t)((uint8_t *)(p))[1] << 8))
/* Macro reads a LH word from the image regardless of host convention */
/* Returns a signed quantity, e.g. C000 is read into an Int as FFFFC000 */
#define LH_SIGNED(p) (((byte *)(p))[0] + (((char *)(p))[1] << 8))
#define LH_SIGNED(p) (((uint8_t *)(p))[0] + (((char *)(p))[1] << 8))
/* Macro tests bit b for type t in prog.map */
#define BITMAP(b, t) (prog.map[(b) >> 2] & ((t) << (((b) & 3) << 1)))
@@ -53,24 +55,35 @@ struct eDuVal
USE=2,
VAL=4
};
int def :1; /* Variable was first defined than used */
int use :1; /* Variable was first used than defined */
int val :1; /* Variable has an initial value. 2 cases:
* 1. When variable is used first (ie. global)
* 2. When a value is moved into the variable
* for the first time. */
uint8_t def :1; //!< Variable was first defined than used
uint8_t use :1; //!< Variable was first used than defined
uint8_t val :1; /* Variable has an initial value. 2 cases:
1. When variable is used first (ie. global)
2. When a value is moved into the variable
for the first time.
*/
void setFlags(uint16_t x)
{
def = x&DEF;
use = x&USE;
val = x&VAL;
}
bool isUSE_VAL() {return use&&val;} /* Use and Val */
bool isUSE_VAL() {return use and val;} //Use and Val
};
static constexpr const char * hlTypes[13] = {
"", "char", "unsigned char", "int", "unsigned int",
"long", "unsigned long", "record", "int *", "char *",
"", "float", "double"
"",
"char",
"unsigned char",
"int",
"unsigned int",
"long",
"unsigned long",
"record",
"int *",
"char *",
"",
"float",
"double"
};
struct TypeContainer
@@ -88,6 +101,14 @@ struct TypeContainer
return 2;
case TYPE_BYTE_SIGN: case TYPE_BYTE_UNSIGN:
return 1;
case TYPE_LONG_SIGN: case TYPE_LONG_UNSIGN:
return 4;
case TYPE_FLOAT:
return 4;
case TYPE_PTR:
return 2;
default:
return ~0;
}
return 0;
}

BIN
prototypes/dcclibs.dat Normal file

Binary file not shown.

View File

@@ -14,8 +14,10 @@ def perform_test(exepath,filepath,outname,args)
filepath=path_local(filepath)
joined_args = args.join(' ')
printf("calling:" + "#{exepath} -a1 #{joined_args} -o#{output_path}.a1 #{filepath}\n")
STDERR << "Errors for : #{filepath}\n"
result = `#{exepath} -a 1 -o#{output_path}.a1 #{filepath}`
result = `#{exepath} -a 2 #{joined_args} -o#{output_path}.a2 #{filepath}`
result = `#{exepath} #{joined_args} -o#{output_path} #{filepath}`
puts result
p $?
end

BIN
sigs/dccb2s.sig Normal file

Binary file not shown.

BIN
sigs/dccb3l.sig Normal file

Binary file not shown.

BIN
sigs/dccb3s.SIG Normal file

Binary file not shown.

6
src/Address.h Normal file
View File

@@ -0,0 +1,6 @@
#pragma once
#include <stdint.h>
typedef uint32_t LinearAddress;
#define INVALID_ADDR Address(~0U)

60
src/AutomatedPlanner.cpp Normal file
View File

@@ -0,0 +1,60 @@
#include "AutomatedPlanner.h"
#include "project.h"
#include "FollowControlFlow.h"
#include <QtCore/QDebug>
/**
* @class AutomatedPlanner
* @brief Class responsible for building command lists
*
* The goal for top level [Project] plan is to build a fully decompiled representation of source binaries
*/
AutomatedPlanner::AutomatedPlanner()
{
}
/**
* @brief Given a state of a project, add actions that will advance the decompilation
* @param project
*/
void AutomatedPlanner::planFor(Project &project) {
// TODO: For now this logic is sprinkled all over the place, should move it here
// IF NO BINARY IMAGE LOADED - > add SelectImage/SelectProject command
// IF NO LOADER SELECTED -> add SelectLoader command
// ...
}
void AutomatedPlanner::planFor(Function & func) {
if(func.doNotDecompile())
return; // for functions marked as non-decompileable we don't add any commands
//TODO: Consider cases where commands are queued, but we can still plan some additional steps
bool function_has_commands = Project::get()->hasCommands(func.shared_from_this());
if(function_has_commands) {
qDebug() << "Function "<<func.name<<"still has some commands queued, planning skipped";
}
switch(func.nStep) {
case eNotDecoded:
addAction(func,new FollowControlFlow(func.state));
break;
case eDisassemblyInProgress:
// The command queue is empty and function is in eDisassemblyInProgress state ? Switch to eDisassembled
assert(false and "Not implemented yet");
break;
case eDissassembled:
// addAction(func,new LowLevelMarkImpure(func)
assert(false and "Not implemented yet");
break;
}
}
void AutomatedPlanner::addAction(Function & func, Command * cmd)
{
Project::get()->addCommand(func.shared_from_this(),cmd);
}
void AutomatedPlanner::addAction(Project & func, Command * cmd)
{
func.addCommand(cmd);
}

20
src/AutomatedPlanner.h Normal file
View File

@@ -0,0 +1,20 @@
#ifndef AUTOMATEDPLANNER_H
#define AUTOMATEDPLANNER_H
class Project;
class Function;
class Command;
class AutomatedPlanner
{
public:
AutomatedPlanner();
void planFor(Project & project);
void planFor(Function & func);
protected:
void addAction(Function &func,Command *cmd);
void addAction(Project &func,Command *cmd);
};
#endif // AUTOMATEDPLANNER_H

View File

@@ -1,15 +1,20 @@
#include "BasicBlock.h"
#include "msvc_fixes.h"
#include "Procedure.h"
#include "dcc.h"
#include "msvc_fixes.h"
#include <QtCore/QTextStream>
#include <cassert>
#include <string>
#include <boost/range/rbegin.hpp>
#include <boost/range/rend.hpp>
#include <boost/range/adaptors.hpp>
#include "BasicBlock.h"
#include "Procedure.h"
#include "dcc.h"
using namespace std;
using namespace boost;
BB *BB::Create(void *ctx, const string &s, Function *parent, BB *insertBefore)
BB *BB::Create(void */*ctx*/, const string &/*s*/, Function *parent, BB */*insertBefore*/)
{
BB *pnewBB = new BB;
pnewBB->Parent = parent;
@@ -19,7 +24,7 @@ BB *BB::Create(void *ctx, const string &s, Function *parent, BB *insertBefore)
* @arg start - basic block starts here, might be parent->Icode.end()
* @arg fin - last of basic block's instructions
*/
BB *BB::Create(iICODE start, iICODE fin, uint8_t _nodeType, int numOutEdges, Function *parent)
BB *BB::Create(const rCODE &r,eBBKind _nodeType, Function *parent)
{
BB* pnewBB;
pnewBB = new BB;
@@ -27,47 +32,30 @@ BB *BB::Create(iICODE start, iICODE fin, uint8_t _nodeType, int numOutEdges, Fun
pnewBB->immedDom = NO_DOM;
pnewBB->loopHead = pnewBB->caseHead = pnewBB->caseTail =
pnewBB->latchNode= pnewBB->loopFollow = NO_NODE;
pnewBB->instructions = make_iterator_range(start,fin);
if(start==parent->Icode.end())
{
pnewBB->instructions = make_iterator_range(parent->Icode.end(),parent->Icode.end());
}
else
{
pnewBB->instructions.advance_end(1); // 1 after fin, to create range where fin is inclusive
}
if (numOutEdges)
pnewBB->edges.resize(numOutEdges);
pnewBB->instructions = r;
/* Mark the basic block to which the icodes belong to, but only for
* real code basic blocks (ie. not interval bbs) */
if(parent)
{
if (start != parent->Icode.end())
int addr = pnewBB->begin()->loc_ip;
//setInBB should automatically handle if our range is empty
parent->Icode.SetInBB(pnewBB->instructions, pnewBB);
parent->heldBBs.push_back(pnewBB);
parent->m_cfg.push_back(pnewBB);
assert(parent->m_ip_to_bb.find(addr)==parent->m_ip_to_bb.end());
parent->m_ip_to_bb[addr] = pnewBB;
parent->m_actual_cfg.push_back(pnewBB);
pnewBB->Parent = parent;
}
if ( start != parent->Icode.end() ) /* Only for code BB's */
if ( r.begin() != parent->Icode.end() ) /* Only for code BB's */
stats.numBBbef++;
}
return pnewBB;
}
BB *BB::Create(int start, int ip, uint8_t _nodeType, int numOutEdges, Function *parent)
BB *BB::CreateIntervalBB(Function *parent)
{
iICODE st(parent->Icode.begin());
iICODE fin(parent->Icode.begin());
if(start==-1)
{
st = parent->Icode.end();
fin = parent->Icode.end();
}
else
{
advance(st,start);
advance(fin,ip);
}
return Create(st,fin,_nodeType,numOutEdges,parent);
iICODE endOfParent = parent->Icode.end();
return Create(make_iterator_range(endOfParent,endOfParent),INTERVAL_NODE,nullptr);
}
static const char *const s_nodeType[] = {"branch", "if", "case", "fall", "return", "call",
@@ -81,14 +69,14 @@ static const char *const s_loopType[] = {"noLoop", "while", "repeat", "loop", "f
void BB::display()
{
printf("\nnode type = %s, ", s_nodeType[nodeType]);
printf("start = %ld, length = %ld, #out edges = %ld\n", begin()->loc_ip, size(), edges.size());
printf("start = %d, length = %zd, #out edges = %zd\n", begin()->loc_ip, size(), edges.size());
for (size_t i = 0; i < edges.size(); i++)
{
if(edges[i].BBptr==0)
printf(" outEdge[%2d] = Unlinked out edge to %d\n",i, edges[i].ip);
if(edges[i].BBptr==nullptr)
printf(" outEdge[%2zd] = Unlinked out edge to %d\n",i, edges[i].ip);
else
printf(" outEdge[%2d] = %d\n",i, edges[i].BBptr->begin()->loc_ip);
printf(" outEdge[%2zd] = %d\n",i, edges[i].BBptr->begin()->loc_ip);
}
}
/*****************************************************************************
@@ -101,29 +89,29 @@ void BB::displayDfs()
traversed = DFS_DISP;
printf("node type = %s, ", s_nodeType[nodeType]);
printf("start = %ld, length = %ld, #in-edges = %ld, #out-edges = %ld\n",
printf("start = %d, length = %zd, #in-edges = %zd, #out-edges = %zd\n",
begin()->loc_ip, size(), inEdges.size(), edges.size());
printf("dfsFirst = %ld, dfsLast = %ld, immed dom = %ld\n",
printf("dfsFirst = %d, dfsLast = %d, immed dom = %d\n",
dfsFirstNum, dfsLastNum,
immedDom == MAX ? -1 : immedDom);
printf("loopType = %s, loopHead = %ld, latchNode = %ld, follow = %ld\n",
s_loopType[loopType],
printf("loopType = %s, loopHead = %d, latchNode = %d, follow = %d\n",
s_loopType[(int)loopType],
loopHead == MAX ? -1 : loopHead,
latchNode == MAX ? -1 : latchNode,
loopFollow == MAX ? -1 : loopFollow);
printf ("ifFollow = %ld, caseHead = %ld, caseTail = %ld\n",
printf ("ifFollow = %d, caseHead = %d, caseTail = %d\n",
ifFollow == MAX ? -1 : ifFollow,
caseHead == MAX ? -1 : caseHead,
caseTail == MAX ? -1 : caseTail);
if (nodeType == INTERVAL_NODE)
printf("corresponding interval = %ld\n", correspInt->numInt);
printf("corresponding interval = %d\n", correspInt->numInt);
else
{
int edge_idx=0;
for(BB *node : inEdges)
{
printf (" inEdge[%ld] = %ld\n", edge_idx, node->begin()->loc_ip);
printf (" inEdge[%d] = %d\n", edge_idx, node->begin()->loc_ip);
edge_idx++;
}
}
@@ -132,9 +120,9 @@ void BB::displayDfs()
for(TYPEADR_TYPE &edg : edges)
{
if (nodeType == INTERVAL_NODE)
printf(" outEdge[%ld] = %ld\n", i, edg.BBptr->correspInt->numInt);
printf(" outEdge[%d] = %d\n", i, edg.BBptr->correspInt->numInt);
else
printf(" outEdge[%d] = %ld\n", i, edg.BBptr->begin()->loc_ip);
printf(" outEdge[%d] = %d\n", i, edg.BBptr->begin()->loc_ip);
++i;
}
printf("----\n");
@@ -146,21 +134,22 @@ void BB::displayDfs()
pb.BBptr->displayDfs();
}
}
/* Recursive procedure that writes the code for the given procedure, pointed
* to by pBB.
* Parameters: pBB: pointer to the cfg.
* Icode: pointer to the Icode array for the cfg graph of the
* current procedure.
* indLevel: indentation level - used for formatting.
* numLoc: last # assigned to local variables */
ICODE* BB::writeLoopHeader(int &indLevel, Function* pProc, int *numLoc, BB *&latch, boolT &repCond)
/** Recursive procedure that writes the code for the given procedure, pointed
to by pBB.
\param indLevel indentation level - used for formatting.
\param numLoc: last # assigned to local variables
*/
ICODE* BB::writeLoopHeader(int &indLevel, Function* pProc, int *numLoc, BB *&latch, bool &repCond)
{
if(loopType == eNodeHeaderType::NO_TYPE)
return nullptr;
latch = pProc->m_dfsLast[this->latchNode];
std::ostringstream ostr;
QString ostr_contents;
QTextStream ostr(&ostr_contents);
ICODE* picode;
switch (loopType)
{
case WHILE_TYPE:
case eNodeHeaderType::WHILE_TYPE:
picode = &this->back();
/* Check for error in while condition */
@@ -179,45 +168,48 @@ ICODE* BB::writeLoopHeader(int &indLevel, Function* pProc, int *numLoc, BB *&lat
* the THEN path of the header node */
if (edges[ELSE].BBptr->dfsLastNum == loopFollow)
{
picode->hl()->replaceExpr(picode->hl()->expr()->inverse());
picode->hlU()->replaceExpr(picode->hl()->expr()->inverse());
}
{
string e=walkCondExpr (picode->hl()->expr(), pProc, numLoc);
QString e=picode->hl()->expr()->walkCondExpr (pProc, numLoc);
ostr << "\n"<<indentStr(indLevel)<<"while ("<<e<<") {\n";
}
picode->invalidate();
break;
case REPEAT_TYPE:
case eNodeHeaderType::REPEAT_TYPE:
ostr << "\n"<<indentStr(indLevel)<<"do {\n";
picode = &latch->back();
picode->invalidate();
break;
case ENDLESS_TYPE:
case eNodeHeaderType::ENDLESS_TYPE:
ostr << "\n"<<indentStr(indLevel)<<"for (;;) {\n";
picode = &latch->back();
break;
}
cCode.appendCode(ostr.str());
ostr.flush();
cCode.appendCode(ostr_contents);
stats.numHLIcode += 1;
indLevel++;
return picode;
}
bool BB::isEndOfPath(int latch_node_idx) const
{
return nodeType == RETURN_NODE || nodeType == TERMINATE_NODE ||
nodeType == NOWHERE_NODE || (dfsLastNum == latch_node_idx);
return nodeType == RETURN_NODE or nodeType == TERMINATE_NODE or
nodeType == NOWHERE_NODE or dfsLastNum == latch_node_idx;
}
void BB::writeCode (int indLevel, Function * pProc , int *numLoc,int _latchNode, int _ifFollow)
{
int follow; /* ifFollow */
BB * succ, *latch; /* Successor and latching node */
ICODE * picode; /* Pointer to HLI_JCOND instruction */
char *l; /* Pointer to HLI_JCOND expression */
boolT emptyThen, /* THEN clause is empty */
QString l; /* Pointer to HLI_JCOND expression */
bool emptyThen, /* THEN clause is empty */
repCond; /* Repeat condition for while() */
/* Check if this basic block should be analysed */
if ((_ifFollow != UN_INIT) && (this == pProc->m_dfsLast[_ifFollow]))
if ((_ifFollow != UN_INIT) and (this == pProc->m_dfsLast[_ifFollow]))
return;
if (wasTraversedAtLevel(DFS_ALPHA))
@@ -226,18 +218,17 @@ void BB::writeCode (int indLevel, Function * pProc , int *numLoc,int _latchNode,
/* Check for start of loop */
repCond = false;
latch = NULL;
if (loopType)
{
latch = nullptr;
picode=writeLoopHeader(indLevel, pProc, numLoc, latch, repCond);
}
/* Write the code for this basic block */
if (repCond == false)
{
std::ostringstream ostr;
QString ostr_contents;
QTextStream ostr(&ostr_contents);
writeBB(ostr,indLevel, pProc, numLoc);
cCode.appendCode(ostr.str());
ostr.flush();
cCode.appendCode(ostr_contents);
}
/* Check for end of path */
@@ -245,12 +236,12 @@ void BB::writeCode (int indLevel, Function * pProc , int *numLoc,int _latchNode,
return;
/* Check type of loop/node and process code */
if ( loopType) /* there is a loop */
if ( loopType!=eNodeHeaderType::NO_TYPE ) /* there is a loop */
{
assert(latch);
if (this != latch) /* loop is over several bbs */
{
if (loopType == WHILE_TYPE)
if (loopType == eNodeHeaderType::WHILE_TYPE)
{
succ = edges[THEN].BBptr;
if (succ->dfsLastNum == loopFollow)
@@ -266,9 +257,10 @@ void BB::writeCode (int indLevel, Function * pProc , int *numLoc,int _latchNode,
/* Loop epilogue: generate the loop trailer */
indLevel--;
if (loopType == WHILE_TYPE)
if (loopType == eNodeHeaderType::WHILE_TYPE)
{
std::ostringstream ostr;
QString ostr_contents;
QTextStream ostr(&ostr_contents);
/* Check if there is need to repeat other statements involved
* in while condition, then, emit the loop trailer */
if (repCond)
@@ -276,18 +268,23 @@ void BB::writeCode (int indLevel, Function * pProc , int *numLoc,int _latchNode,
writeBB(ostr,indLevel+1, pProc, numLoc);
}
ostr <<indentStr(indLevel)<< "} /* end of while */\n";
cCode.appendCode(ostr.str());
ostr.flush();
cCode.appendCode(ostr_contents);
}
else if (loopType == ENDLESS_TYPE)
else if (loopType == eNodeHeaderType::ENDLESS_TYPE)
cCode.appendCode( "%s} /* end of loop */\n",indentStr(indLevel));
else if (loopType == REPEAT_TYPE)
else if (loopType == eNodeHeaderType::REPEAT_TYPE)
{
QString e = "//*failed*//";
if (picode->hl()->opcode != HLI_JCOND)
reportError (REPEAT_FAIL);
{
string e=walkCondExpr (picode->hl()->expr(), pProc, numLoc);
cCode.appendCode( "%s} while (%s);\n", indentStr(indLevel),e.c_str());
reportError (REPEAT_FAIL);
}
else
{
e=picode->hl()->expr()->walkCondExpr (pProc, numLoc);
}
cCode.appendCode( "%s} while (%s);\n", indentStr(indLevel),qPrintable(e));
}
/* Recurse on the loop follow */
@@ -319,13 +316,13 @@ void BB::writeCode (int indLevel, Function * pProc , int *numLoc,int _latchNode,
if (succ->dfsLastNum != follow) /* THEN part */
{
l = writeJcond ( *back().hl(), pProc, numLoc);
cCode.appendCode( "\n%s%s", indentStr(indLevel-1), l);
cCode.appendCode( "\n%s%s", indentStr(indLevel-1), qPrintable(l));
succ->writeCode (indLevel, pProc, numLoc, _latchNode,follow);
}
else /* empty THEN part => negate ELSE part */
{
l = writeJcondInv ( *back().hl(), pProc, numLoc);
cCode.appendCode( "\n%s%s", indentStr(indLevel-1), l);
cCode.appendCode( "\n%s%s", indentStr(indLevel-1), qPrintable(l));
edges[ELSE].BBptr->writeCode (indLevel, pProc, numLoc, _latchNode, follow);
emptyThen = true;
}
@@ -345,7 +342,7 @@ void BB::writeCode (int indLevel, Function * pProc , int *numLoc,int _latchNode,
}
/* else (empty ELSE part) */
}
else if (! emptyThen) /* already visited => emit label */
else if (not emptyThen) /* already visited => emit label */
{
cCode.appendCode( "%s}\n%selse {\n",
indentStr(indLevel-1), indentStr(indLevel - 1));
@@ -361,7 +358,7 @@ void BB::writeCode (int indLevel, Function * pProc , int *numLoc,int _latchNode,
else /* no follow => if..then..else */
{
l = writeJcond ( *back().hl(), pProc, numLoc);
cCode.appendCode( "%s%s", indentStr(indLevel-1), l);
cCode.appendCode( "%s%s", indentStr(indLevel-1), qPrintable(l));
edges[THEN].BBptr->writeCode (indLevel, pProc, numLoc, _latchNode, _ifFollow);
cCode.appendCode( "%s}\n%selse {\n", indentStr(indLevel-1), indentStr(indLevel - 1));
edges[ELSE].BBptr->writeCode (indLevel, pProc, numLoc, _latchNode, _ifFollow);
@@ -384,7 +381,7 @@ void BB::writeCode (int indLevel, Function * pProc , int *numLoc,int _latchNode,
* Args: pBB: pointer to the current basic block.
* Icode: pointer to the array of icodes for current procedure.
* lev: indentation level - used for formatting. */
void BB::writeBB(std::ostream &ostr,int lev, Function * pProc, int *numLoc)
void BB::writeBB(QTextStream &ostr,int lev, Function * pProc, int *numLoc)
{
/* Save the index into the code table in case there is a later goto
* into this instruction (first instruction of the BB) */
@@ -394,10 +391,10 @@ void BB::writeBB(std::ostream &ostr,int lev, Function * pProc, int *numLoc)
for(ICODE &pHli : instructions)
{
if ((pHli.type == HIGH_LEVEL) && ( pHli.valid() )) //TODO: use filtering range here.
if ((pHli.type == HIGH_LEVEL_ICODE) and ( pHli.valid() )) //TODO: use filtering range here.
{
std::string line = pHli.hl()->write1HlIcode(pProc, numLoc);
if (!line.empty())
QString line = pHli.hl()->write1HlIcode(pProc, numLoc);
if (not line.isEmpty())
{
ostr<<indentStr(lev)<<line;
stats.numHLIcode++;
@@ -410,27 +407,26 @@ void BB::writeBB(std::ostream &ostr,int lev, Function * pProc, int *numLoc)
iICODE BB::begin()
{
return instructions.begin();//range_start;
return instructions.begin();
}
iICODE BB::end() const
{
return instructions.end();//range_end
return instructions.end();
}
ICODE &BB::back()
{
return instructions.back();//*rbegin();
return instructions.back();
}
size_t BB::size()
{
return distance(instructions.begin(),instructions.end());
}
ICODE &BB::front()
{
return instructions.front();//*begin();
return instructions.front();
}
riICODE BB::rbegin()

View File

@@ -1,7 +1,131 @@
SET(dcc_test_SOURCES tests/comwrite.cpp)
include_directories(${GMOCK_INCLUDE_DIRS} ${GMOCK_ROOT}/gtest/include)
enable_testing()
add_executable(tester ${dcc_test_SOURCES})
target_link_libraries(tester
${GMOCK_BOTH_LIBRARIES} ${REQ_LLVM_LIBRARIES})
add_test(dcc-tests tester)
set(dcc_LIB_SOURCES
CallConvention.cpp
ast.cpp
backend.cpp
bundle.cpp
chklib.cpp
comwrite.cpp
control.cpp
dataflow.cpp
disassem.cpp
DccFrontend.cpp
error.cpp
fixwild.cpp
graph.cpp
hlicode.cpp
hltype.cpp
machine_x86.cpp
icode.cpp
RegisterNode
idioms.cpp
idioms/idiom1.cpp
idioms/arith_idioms.cpp
idioms/call_idioms.cpp
idioms/epilogue_idioms.cpp
idioms/mov_idioms.cpp
idioms/neg_idioms.cpp
idioms/shift_idioms.cpp
idioms/xor_idioms.cpp
locident.cpp
liveness_set.cpp
parser.h
parser.cpp
procs.cpp
project.cpp
Procedure.cpp
proplong.cpp
reducible.cpp
scanner.cpp
symtab.cpp
udm.cpp
BasicBlock.cpp
dcc_interface.cpp
MemoryChunk
MemorySegment
MemorySegmentCoordinator
Command.cpp
Command.h
Loaders.cpp
Loaders.h
FollowControlFlow.cpp
FollowControlFlow.h
AutomatedPlanner
)
set(dcc_UI_SOURCES
ui/DccMainWindow.ui
ui/DccMainWindow.h
ui/DccMainWindow.cpp
ui/FunctionViewWidget.ui
ui/FunctionViewWidget.h
ui/FunctionViewWidget.cpp
ui/FunctionListDockWidget.ui
ui/FunctionListDockWidget.cpp
ui/FunctionListDockWidget.h
ui/RenderTags.cpp
ui/RenderTags.h
ui/CommandQueueView.cpp
ui/CommandQueueView.h
ui/CommandQueueView.ui
)
set(dcc_HEADERS
../include/ast.h
../include/bundle.h
../include/BinaryImage.h
../include/DccFrontend.h
../include/Enums.h
../include/dcc.h
../include/disassem.h
../include/dosdcc.h
../include/error.h
../include/graph.h
../include/hlicode.h
../include/machine_x86.h
../include/icode.h
../include/idioms/idiom.h
../include/idioms/idiom1.h
../include/idioms/arith_idioms.h
../include/idioms/call_idioms.h
../include/idioms/epilogue_idioms.h
../include/idioms/mov_idioms.h
../include/idioms/neg_idioms.h
../include/idioms/shift_idioms.h
../include/idioms/xor_idioms.h
../include/locident.h
../include/CallConvention.h
../include/project.h
../include/scanner.h
../include/state.h
../include/symtab.h
../include/types.h
../include/Procedure.h
../include/StackFrame.h
../include/BasicBlock.h
../include/dcc_interface.h
)
SOURCE_GROUP(Headers FILES ${dcc_HEADERS})
set(dcc_SOURCES
dcc.cpp
)
SOURCE_GROUP(Source FILES ${dcc_SOURCES} ${dcc_LIB_SOURCES})
ADD_LIBRARY(dcc_lib STATIC ${dcc_LIB_SOURCES} ${dcc_HEADERS})
qt5_use_modules(dcc_lib Core)
ADD_EXECUTABLE(dcc_original ${dcc_SOURCES} ${dcc_UI_SOURCES})
ADD_DEPENDENCIES(dcc_original dcc_lib)
TARGET_LINK_LIBRARIES(dcc_original dcc_lib dcc_hash disasm_s)
qt5_use_modules(dcc_original Core Widgets)
SET_PROPERTY(TARGET dcc_original PROPERTY CXX_STANDARD 11)
SET_PROPERTY(TARGET dcc_original PROPERTY CXX_STANDARD_REQUIRED ON)
if(dcc_build_tests)
ADD_SUBDIRECTORY(tests)
endif()

137
src/CallConvention.cpp Normal file
View File

@@ -0,0 +1,137 @@
#include "CallConvention.h"
#include "Procedure.h"
#include <QtCore/QTextStream>
#include <ostream>
#include <cassert>
static void calculateReturnLocations(Function *func) {
switch(func->getReturnType()) {
case TYPE_LONG_SIGN:
case TYPE_LONG_UNSIGN:
func->getFunctionType()->setReturnLocation(LONGID_TYPE(rDX,rAX));
break;
case TYPE_WORD_SIGN:
case TYPE_WORD_UNSIGN:
func->getFunctionType()->setReturnLocation(rAX);
break;
case TYPE_BYTE_SIGN:
case TYPE_BYTE_UNSIGN:
func->getFunctionType()->setReturnLocation(rAL);
break;
}
}
static void calculateArgLocations_allOnStack(Function *func) {
FunctionType *type = func->type;
int stack_offset=2;
if(func->args.size() == type->ContainedTys.size())
return;
func->args.resize(type->ContainedTys.size());
func->args.numArgs=0;
for(Type & argtype : type->ContainedTys) {
STKSYM &arg(func->args[func->args.numArgs]);
arg.label= stack_offset;
arg.size = TypeContainer::typeSize(argtype.dcc_type);
arg.type = argtype.dcc_type;
arg.setArgName(func->args.numArgs);
stack_offset+=arg.size;
func->args.m_maxOff=stack_offset;
func->args.numArgs++;
}
func->cbParam = stack_offset;
}
static void rebuildArguments_FromStackLayout(Function *func) {
STKFRAME &stk(func->args);
std::map<int,const STKSYM *> arg_locations;
FunctionType *f;
for(const STKSYM & s: stk) {
if(s.label>0) {
arg_locations[s.label] = &s;
}
}
if(arg_locations.empty())
return;
std::vector<Type> argtypes;
auto stack_loc_iter = arg_locations.begin();
for(int i=stack_loc_iter->first; i<=arg_locations.rbegin()->first; ) {
int till_next_loc=stack_loc_iter->first-i;
if(till_next_loc==0) {
int entry_size=stack_loc_iter->second->size;
argtypes.push_back({stack_loc_iter->second->type});
i+=entry_size;
++stack_loc_iter;
} else {
if(till_next_loc>=4) {
argtypes.push_back({TYPE_LONG_SIGN});
i+=4;
} else if(till_next_loc>=2) {
argtypes.push_back({TYPE_WORD_SIGN});
i+=2;
} else {
argtypes.push_back({TYPE_BYTE_SIGN});
i+=1;
}
}
}
f = FunctionType::get({func->type->getReturnType()},argtypes,func->type->isVarArg());
f->retVal = func->type->retVal;
delete func->type;
func->type = f;
}
CConv *CConv::create(CC_Type v)
{
static C_CallingConvention *c_call = nullptr;
static Pascal_CallingConvention *p_call = nullptr;
static Unknown_CallingConvention *u_call= nullptr;
if(nullptr==c_call)
c_call = new C_CallingConvention;
if(nullptr==p_call)
p_call = new Pascal_CallingConvention;
if(nullptr==u_call)
u_call = new Unknown_CallingConvention;
switch(v) {
case UNKNOWN: return u_call;
case C: return c_call;
case PASCAL: return p_call;
}
assert(false);
return nullptr;
}
void C_CallingConvention::writeComments(QTextStream & ostr)
{
ostr << " * C calling convention.\n";
}
void C_CallingConvention::calculateStackLayout(Function *func)
{
calculateReturnLocations(func);
rebuildArguments_FromStackLayout(func);
calculateArgLocations_allOnStack(func);
}
void Pascal_CallingConvention::writeComments(QTextStream & ostr)
{
ostr << " * Pascal calling convention.\n";
}
void Pascal_CallingConvention::calculateStackLayout(Function *func)
{
calculateReturnLocations(func);
//TODO: pascal args are passed in reverse order ?
rebuildArguments_FromStackLayout(func);
calculateArgLocations_allOnStack(func);
}
void Unknown_CallingConvention::writeComments(QTextStream & ostr)
{
ostr << " * Unknown calling convention.\n";
}
void Unknown_CallingConvention::calculateStackLayout(Function *func)
{
calculateReturnLocations(func);
rebuildArguments_FromStackLayout(func);
calculateArgLocations_allOnStack(func);
}

138
src/Command.cpp Normal file
View File

@@ -0,0 +1,138 @@
#include "Command.h"
#include "DccFrontend.h"
#include "dcc.h"
#include "project.h"
#include "Loaders.h"
#include <QFile>
bool LoaderSelection::execute(CommandContext * ctx)
{
Project *proj=ctx->m_project;
if(nullptr==proj) {
ctx->recordFailure(this,"No active project ");
return false;
}
if(m_filename.isEmpty()) {
ctx->recordFailure(this,"No executable path given to loader selector");
return false;
}
QFile finfo(m_filename);
/* Open the input file */
if(not finfo.open(QFile::ReadOnly)) {
ctx->recordFailure(this,QString("Cannot open file %1").arg(m_filename));
return false;
}
/* Read in first 2 bytes to check EXE signature */
if (finfo.size()<=2)
{
ctx->recordFailure(this,QString("File %1 is too small").arg(m_filename));
}
ComLoader com_loader;
ExeLoader exe_loader;
if(exe_loader.canLoad(finfo)) {
proj->setLoader(new ExeLoader);
return true;
}
if(com_loader.canLoad(finfo)) {
proj->setLoader(new ComLoader);
return true;
}
ctx->recordFailure(this,QString("None of the available loaders can load file %1").arg(m_filename));
return true;
}
bool LoaderApplication::execute(CommandContext * ctx)
{
Project *proj=ctx->m_project;
if(nullptr==proj) {
ctx->recordFailure(this,"No active project ");
return false;
}
if(!proj->m_selected_loader) {
ctx->recordFailure(this,QString("No loader selected for project %1").arg(proj->project_name()));
return false;
}
QFile finfo(m_filename);
if(not finfo.open(QFile::ReadOnly)) {
ctx->recordFailure(this,QString("Cannot open file %1").arg(m_filename));
return false;
}
bool load_res = proj->m_selected_loader->load(proj->prog,finfo);
if(!load_res) {
ctx->recordFailure(this,QString("Failure during load: %1").arg(m_filename));
return false;
}
if (option.verbose)
proj->prog.displayLoadInfo();
FunctionType *main_type = FunctionType::get(Type{TYPE_UNKNOWN},{ },false);
main_type->setCallingConvention(CConv::UNKNOWN);
/* Create initial procedure at program start address */
PROG &prog(proj->prog);
CreateFunction *cmd = new CreateFunction("start",
SegOffAddr {prog.segMain,((uint32_t)prog.initCS << 4) + prog.initIP},
main_type);
proj->addCommand(cmd);
return true;
}
bool CommandStream::add(Command * c) {
if(m_commands.size()>=m_maximum_command_count)
return false;
m_commands.push_back(c);
return true;
}
void CommandStream::setMaximumCommandCount(int maximum_command_count) {
m_maximum_command_count = maximum_command_count;
}
void CommandStream::processAll(CommandContext *ctx)
{
while(not m_commands.isEmpty()) {
Command *cmd = m_commands.takeFirst();
if(false==cmd->execute(ctx)) {
emit streamCompleted(false);
break;
}
m_recently_executed.push_back(cmd);
}
emit streamCompleted(true);
}
bool CommandStream::processOne(CommandContext *ctx)
{
if(not m_commands.isEmpty()) {
Command *cmd = m_commands.takeFirst();
if(false==cmd->execute(ctx)) {
emit streamChanged();
return false;
}
m_recently_executed.push_back(cmd);
}
emit streamChanged();
return true;
}
void CommandStream::clear()
{
qDeleteAll(m_commands);
qDeleteAll(m_recently_executed);
m_commands.clear();
m_recently_executed.clear();
}
void CommandContext::reset()
{
for(int i=0; i<m_failures.size(); ++i) {
delete m_failures[i].first;
}
m_failures.clear();
}

95
src/Command.h Normal file
View File

@@ -0,0 +1,95 @@
#ifndef COMMAND_H
#define COMMAND_H
#include <memory>
#include <QtCore/QObject>
#include <QtCore/QVector>
#include <QtCore/QPair>
class Project;
class Function;
typedef std::shared_ptr<Function> PtrFunction;
enum CommandLevel {
eProject,
eBinary,
eFunction,
eBasicBlock,
eInstruction
};
class Command;
class CommandContext {
public:
void recordFailure(Command *cmd,QString error_message) {
m_failures.push_back({cmd,error_message});
}
Project *m_project;
PtrFunction m_func;
QVector<QPair<Command *,QString>> m_failures;
void reset();
};
class Command
{
QString m_command_name;
CommandLevel m_level;
public:
Command(QString n,CommandLevel level) : m_command_name(n),m_level(level) {}
virtual ~Command() {}
QString name() const { return m_command_name;}
virtual QString instanceDescription() const { return m_command_name; }
virtual bool execute(CommandContext *) { return false; }
};
class CompoundCommand : public Command {
QVector<Command *> m_contained;
public:
CompoundCommand(QString n,CommandLevel l) : Command(n,l) {
}
void addCommand(Command *c) {
m_contained.push_back(c);
}
bool execute(CommandContext * ctx) {
for(Command * c : m_contained) {
if(!c->execute(ctx))
return false;
}
return true;
}
};
class CommandStream : public QObject
{
Q_OBJECT
int m_maximum_command_count=5;
public:
QVector<Command *> m_recently_executed;
QVector<Command *> m_commands;
bool add(Command *c);
void setMaximumCommandCount(int maximum_command_count);
bool processOne(CommandContext *ctx);
void processAll(CommandContext *ctx);
void clear();
bool isEmpty() const { return m_commands.isEmpty(); }
signals:
void streamCompleted(bool success);
void streamChanged();
};
// Effect: loader has been selected and set in current project
class LoaderSelection : public Command {
QString m_filename;
public:
virtual ~LoaderSelection() {}
LoaderSelection(QString f) : Command("Select loader",eProject),m_filename(f) {}
bool execute(CommandContext * ctx) override;
};
// trigger Project->m_selected_loader has changed
// Effect: the PROG object is loaded using the current loader
class LoaderApplication : public Command {
QString m_filename;
public:
virtual ~LoaderApplication() {}
LoaderApplication(QString f) : Command("Apply loader",eProject),m_filename(f) {}
bool execute(CommandContext * ctx) override;
};
#endif // COMMAND_H

230
src/DccFrontend.cpp Normal file
View File

@@ -0,0 +1,230 @@
#include "DccFrontend.h"
#include "Loaders.h"
#include "dcc.h"
#include "msvc_fixes.h"
#include "project.h"
#include "disassem.h"
#include "CallGraph.h"
#include "Command.h"
#include "chklib.h"
#include <QtCore/QFileInfo>
#include <QtCore/QDebug>
#include <cstdio>
//static void LoadImage(char *filename);
static void displayMemMap(void);
/****************************************************************************
* displayLoadInfo - Displays low level loader type info.
***************************************************************************/
void PROG::displayLoadInfo(void)
{
int i;
printf("File type is %s\n", (fCOM)?"COM":"EXE");
// if (not fCOM) {
// printf("Signature = %02X%02X\n", header.sigLo, header.sigHi);
// printf("File size %% 512 = %04X\n", LH(&header.lastPageSize));
// printf("File size / 512 = %04X pages\n", LH(&header.numPages));
// printf("# relocation items = %04X\n", LH(&header.numReloc));
// printf("Offset to load image = %04X paras\n", LH(&header.numParaHeader));
// printf("Minimum allocation = %04X paras\n", LH(&header.minAlloc));
// printf("Maximum allocation = %04X paras\n", LH(&header.maxAlloc));
// }
printf("Load image size = %08lX\n", cbImage); // - sizeof(PSP)
printf("Initial SS:SP = %04X:%04X\n", initSS, initSP);
printf("Initial CS:IP = %04X:%04X\n", initCS, initIP);
if (option.VeryVerbose and cReloc)
{
printf("\nRelocation Table\n");
for (i = 0; i < cReloc; i++)
{
printf("%06X -> [%04X]\n", relocTable[i],LH(image() + relocTable[i]));
}
}
printf("\n");
}
/*****************************************************************************
* fill - Fills line for displayMemMap()
****************************************************************************/
static void fill(int ip, char *bf)
{
PROG &prog(Project::get()->prog);
static uint8_t type[4] = {'.', 'd', 'c', 'x'};
uint8_t i;
for (i = 0; i < 16; i++, ip++)
{
*bf++ = ' ';
*bf++ = (ip < prog.cbImage)? type[(prog.map[ip >> 2] >> ((ip & 3) * 2)) & 3]: ' ';
}
*bf = '\0';
}
/*****************************************************************************
* displayMemMap - Displays the memory bitmap
****************************************************************************/
static void displayMemMap(void)
{
PROG &prog(Project::get()->prog);
char c, b1[33], b2[33], b3[33];
uint8_t i;
int ip = 0;
printf("\nMemory Map\n");
while (ip < prog.cbImage)
{
fill(ip, b1);
printf("%06X %s\n", ip, b1);
ip += 16;
for (i = 3, c = b1[1]; i < 32 and c == b1[i]; i += 2)
; /* Check if all same */
if (i > 32)
{
fill(ip, b2); /* Skip until next two are not same */
fill(ip+16, b3);
if (not (strcmp(b1, b2) || strcmp(b1, b3)))
{
printf(" :\n");
do
{
ip += 16;
fill(ip+16, b1);
} while (0==strcmp(b1, b2));
}
}
}
printf("\n");
}
DccFrontend::DccFrontend(QObject *parent) :
QObject(parent)
{
}
/*****************************************************************************
* FrontEnd - invokes the loader, parser, disassembler (if asm1), icode
* rewritter, and displays any useful information.
****************************************************************************/
bool DccFrontend::FrontEnd ()
{
/* Do depth first flow analysis building call graph and procedure list,
* and attaching the I-code to each procedure */
parse (*Project::get());
if (option.asm1)
{
qWarning() << "dcc: writing assembler file "<<asm1_name<<'\n';
}
/* Search through code looking for impure references and flag them */
Disassembler ds(1);
for(PtrFunction &f : Project::get()->pProcList)
{
f->markImpure();
if (option.asm1)
{
ds.disassem(f);
}
}
if (option.Interact)
{
interactDis(Project::get()->pProcList.front(), 0); /* Interactive disassembler */
}
/* Converts jump target addresses to icode offsets */
for(PtrFunction &f : Project::get()->pProcList)
{
f->bindIcodeOff();
}
/* Print memory bitmap */
if (option.Map)
displayMemMap();
return(true); // we no longer own proj !
}
/*****************************************************************************
* LoadImage
****************************************************************************/
/* Parses the program, builds the call graph, and returns the list of
* procedures found */
void DccFrontend::parse(Project &proj)
{
/* Set initial state */
proj.addCommand(new MachineStateInitialization);
proj.addCommand(new FindMain);
}
bool MachineStateInitialization::execute(CommandContext *ctx)
{
assert(ctx && ctx->m_project);
Project &proj(*ctx->m_project);
const PROG &prog(proj.prog);
proj.m_entry_state.setState(rES, 0); /* PSP segment */
proj.m_entry_state.setState(rDS, 0);
proj.m_entry_state.setState(rCS, prog.initCS);
proj.m_entry_state.setState(rSS, prog.initSS);
proj.m_entry_state.setState(rSP, prog.initSP);
proj.m_entry_state.IP = ((uint32_t)prog.initCS << 4) + prog.initIP;
proj.SynthLab = SYNTHESIZED_MIN;
return true;
}
bool FindMain::execute(CommandContext *ctx) {
Project &proj(*ctx->m_project);
const PROG &prog(proj.prog);
PtrFunction start_func = proj.findByName("start");
if(ctx->m_project->m_entry_state.IP==0) {
ctx->recordFailure(this,"Cannot search for main func when no entry point was found");
return false;
}
/* Check for special settings of initial state, based on idioms of the startup code */
if(checkStartup(ctx->m_project->m_entry_state)) {
start_func->markDoNotDecompile(); // we have main, do not decompile the start proc
//TODO: main arguments and return values should depend on detected compiler/library
FunctionType *main_type = FunctionType::get(Type{TYPE_WORD_SIGN},{ Type{TYPE_WORD_SIGN},Type{TYPE_PTR} },false);
main_type->setCallingConvention(CConv::C);
proj.addCommand(new CreateFunction("main",SegOffAddr {prog.segMain,prog.offMain},main_type));
proj.addCommand(new LoadPatternLibrary());
} else {
start_func->state = proj.m_entry_state; // just in case we fail to find main, initialize 'state' for start func
}
return true;
}
QString CreateFunction::instanceDescription() const {
return QString("%1 \"%2\" @ 0x%3").arg(name()).arg(m_name).arg(m_addr.addr,0,16,QChar('0'));
}
bool CreateFunction::execute(CommandContext *ctx) {
Project &proj(*ctx->m_project);
const PROG &prog(proj.prog);
PtrFunction func = proj.createFunction(m_type,m_name,m_addr);
if(m_name=="main") {
/* In medium and large models, the segment of main may (will?) not be
the same as the initial CS segment (of the startup code) */
proj.m_entry_state.setState(rCS, prog.segMain);
proj.m_entry_state.IP = prog.offMain;
func->state = proj.m_entry_state;
}
if(m_name=="start") {
proj.addCommand(new MachineStateInitialization);
proj.addCommand(new FindMain);
}
// proj.addCommand(new ProcessFunction);
//proj.addCommand(new FollowControl());
/* Recursively build entire procedure list */
//proj.callGraph->proc->FollowCtrl(proj.callGraph, &proj.m_entry_state);
return true;
}

49
src/FollowControlFlow.cpp Normal file
View File

@@ -0,0 +1,49 @@
#include "FollowControlFlow.h"
#include "project.h"
#include "parser.h"
QString FollowControlFlow::instanceDescription() const
{
return name() + " @ 0x"+QString::number(m_start_state.IP,16);
}
bool FollowControlFlow::execute(CommandContext *ctx)
{
Project &proj(*ctx->m_project);
PtrFunction scanned_func(ctx->m_func);
scanned_func->switchState(eDisassemblyInProgress);
FollowCtrl(*scanned_func,proj.callGraph, &m_start_state);
return false;
}
QString MarkAsSwitchCase::instanceDescription() const
{
return name() + QString(" 0x%1 -> 0x%2 ; case %3")
.arg(m_src_addr,8,16,QChar('0'))
.arg(m_dst_addr,8,16,QChar('0'))
.arg(m_case_label);
}
bool MarkAsSwitchCase::execute(CommandContext * ctx)
{
//TODO: record code/data referneces in project for navigation UI purposes ?
auto switch_insn = ctx->m_func->Icode.labelSrch(m_src_addr);
if(switch_insn==ctx->m_func->Icode.end()) {
ctx->recordFailure(this,QString("switch instruction @ 0x%1 not found in procedure's instructions ?")
.arg(m_src_addr,8,16,QChar('0')));
return false;
}
auto insn = ctx->m_func->Icode.labelSrch(m_dst_addr);
if(insn==ctx->m_func->Icode.end()) {
ctx->recordFailure(this,QString("switch target instruction 0x%1 not found in procedure's instructions ?")
.arg(m_dst_addr,8,16,QChar('0')));
return false;
}
insn->ll()->caseEntry = m_case_label;
insn->ll()->setFlags(CASE);
switch_insn->ll()->caseTbl2.push_back( m_dst_addr );
return true;
}

39
src/FollowControlFlow.h Normal file
View File

@@ -0,0 +1,39 @@
#ifndef FOLLOWCONTROLFLOW_H
#define FOLLOWCONTROLFLOW_H
#include "Command.h"
#include "state.h"
class FollowControlFlow : public Command
{
STATE m_start_state;
public:
FollowControlFlow(STATE addr) : Command("Follow control flow",eFunction),m_start_state(addr) {}
// Command interface
public:
QString instanceDescription() const override;
bool execute(CommandContext *ctx) override;
};
// mark instruction at address m_dst_addr as a case m_case_label of switch located at m_src_addr
class MarkAsSwitchCase : public Command
{
uint32_t m_src_addr;
uint32_t m_dst_addr;
int m_case_label;
public:
MarkAsSwitchCase(uint32_t src_addr,uint32_t dst_addr,int lab) :
Command("Mark as switch case",eFunction),
m_src_addr(src_addr),
m_dst_addr(dst_addr),
m_case_label(lab)
{}
// Command interface
public:
QString instanceDescription() const override;
bool execute(CommandContext *ctx) override;
};
#endif // FOLLOWCONTROLFLOW_H

177
src/Loaders.cpp Normal file
View File

@@ -0,0 +1,177 @@
#include "Loaders.h"
#include "dcc.h"
#include <QtCore/QDebug>
#define EXE_RELOCATION 0x10 /* EXE images rellocated to above PSP */
struct PSP { /* PSP structure */
uint16_t int20h; /* interrupt 20h */
uint16_t eof; /* segment, end of allocation block */
uint8_t res1; /* reserved */
uint8_t dosDisp[5]; /* far call to DOS function dispatcher */
uint8_t int22h[4]; /* vector for terminate routine */
uint8_t int23h[4]; /* vector for ctrl+break routine */
uint8_t int24h[4]; /* vector for error routine */
uint8_t res2[22]; /* reserved */
uint16_t segEnv; /* segment address of environment block */
uint8_t res3[34]; /* reserved */
uint8_t int21h[6]; /* opcode for int21h and far return */
uint8_t res4[6]; /* reserved */
uint8_t fcb1[16]; /* default file control block 1 */
uint8_t fcb2[16]; /* default file control block 2 */
uint8_t res5[4]; /* reserved */
uint8_t cmdTail[0x80]; /* command tail and disk transfer area */
};
static struct MZHeader { /* EXE file header */
uint8_t sigLo; /* .EXE signature: 0x4D 0x5A */
uint8_t sigHi;
uint16_t lastPageSize; /* Size of the last page */
uint16_t numPages; /* Number of pages in the file */
uint16_t numReloc; /* Number of relocation items */
uint16_t numParaHeader; /* # of paragraphs in the header*/
uint16_t minAlloc; /* Minimum number of paragraphs */
uint16_t maxAlloc; /* Maximum number of paragraphs */
uint16_t initSS; /* Segment displacement of stack */
uint16_t initSP; /* Contents of SP at entry */
uint16_t checkSum; /* Complemented checksum */
uint16_t initIP; /* Contents of IP at entry */
uint16_t initCS; /* Segment displacement of code */
uint16_t relocTabOffset; /* Relocation table offset */
uint16_t overlayNum; /* Overlay number */
} header;
void DosLoader::prepareImage(PROG & prog, size_t sz, QFile & fp) {
/* Allocate a block of memory for the program. */
prog.cbImage = sz + sizeof(PSP);
prog.Imagez = new uint8_t [prog.cbImage];
prog.Imagez[0] = 0xCD; /* Fill in PSP int 20h location */
prog.Imagez[1] = 0x20; /* for termination checking */
/* Read in the image past where a PSP would go */
if (sz != fp.read((char *)prog.Imagez + sizeof(PSP),sz))
fatalError(CANNOT_READ, fp.fileName().toLocal8Bit().data());
}
bool ComLoader::canLoad(QFile & fp) {
fp.seek(0);
char sig[2];
if(2==fp.read(sig,2)) {
return not (sig[0] == 0x4D and sig[1] == 0x5A);
}
return false;
}
bool ComLoader::load(PROG & prog, QFile & fp) {
prog.fCOM = true;
fp.seek(0);
/* COM file
* In this case the load module size is just the file length
*/
auto cb = fp.size();
/* COM programs start off with an ORG 100H (to leave room for a PSP)
* This is also the implied start address so if we load the image
* at offset 100H addresses should all line up properly again.
*/
prog.initCS = 0;
prog.initIP = 0x100;
prog.initSS = 0;
prog.initSP = 0xFFFE;
prog.cReloc = 0;
prepareImage(prog,cb,fp);
/* Set up memory map */
cb = (prog.cbImage + 3) / 4;
prog.map = (uint8_t *)malloc(cb);
memset(prog.map, BM_UNKNOWN, (size_t)cb);
return true;
}
bool ExeLoader::canLoad(QFile & fp) {
if(fp.size()<sizeof(header))
return false;
MZHeader tmp_header;
fp.seek(0);
fp.read((char *)&tmp_header, sizeof(header));
if(not (tmp_header.sigLo == 0x4D and tmp_header.sigHi == 0x5A))
return false;
/* This is a typical DOS kludge! */
if (LH(&header.relocTabOffset) == 0x40)
{
qDebug() << "Don't understand new EXE format";
return false;
}
return true;
}
bool ExeLoader::load(PROG & prog, QFile & fp) {
prog.fCOM = false;
/* Read rest of header */
fp.seek(0);
if (fp.read((char *)&header, sizeof(header)) != sizeof(header))
return false;
/* Calculate the load module size.
* This is the number of pages in the file
* less the length of the header and reloc table
* less the number of bytes unused on last page
*/
uint32_t cb = (uint32_t)LH(&header.numPages) * 512 - (uint32_t)LH(&header.numParaHeader) * 16;
if (header.lastPageSize)
{
cb -= 512 - LH(&header.lastPageSize);
}
/* We quietly ignore minAlloc and maxAlloc since for our
* purposes it doesn't really matter where in real memory
* the program would end up. EXE programs can't really rely on
* their load location so setting the PSP segment to 0 is fine.
* Certainly programs that prod around in DOS or BIOS are going
* to have to load DS from a constant so it'll be pretty
* obvious.
*/
prog.initCS = (int16_t)LH(&header.initCS) + EXE_RELOCATION;
prog.initIP = (int16_t)LH(&header.initIP);
prog.initSS = (int16_t)LH(&header.initSS) + EXE_RELOCATION;
prog.initSP = (int16_t)LH(&header.initSP);
prog.cReloc = (int16_t)LH(&header.numReloc);
/* Allocate the relocation table */
if (prog.cReloc)
{
prog.relocTable.resize(prog.cReloc);
fp.seek(LH(&header.relocTabOffset));
/* Read in seg:offset pairs and convert to Image ptrs */
uint8_t buf[4];
for (int i = 0; i < prog.cReloc; i++)
{
fp.read((char *)buf,4);
prog.relocTable[i] = LH(buf) + (((int)LH(buf+2) + EXE_RELOCATION)<<4);
}
}
/* Seek to start of image */
uint32_t start_of_image= LH(&header.numParaHeader) * 16;
fp.seek(start_of_image);
/* Allocate a block of memory for the program. */
prepareImage(prog,cb,fp);
/* Set up memory map */
cb = (prog.cbImage + 3) / 4;
prog.map = (uint8_t *)malloc(cb);
memset(prog.map, BM_UNKNOWN, (size_t)cb);
/* Relocate segment constants */
for(uint32_t v : prog.relocTable) {
uint8_t *p = &prog.Imagez[v];
uint16_t w = (uint16_t)LH(p) + EXE_RELOCATION;
*p++ = (uint8_t)(w & 0x00FF);
*p = (uint8_t)((w & 0xFF00) >> 8);
}
return true;
}

31
src/Loaders.h Normal file
View File

@@ -0,0 +1,31 @@
#ifndef LOADERS_H
#define LOADERS_H
#include "BinaryImage.h"
#include <QtCore/QFile>
#include <stdlib.h>
struct DosLoader {
protected:
void prepareImage(PROG &prog,size_t sz,QFile &fp);
public:
virtual bool canLoad(QFile &fp)=0;
virtual QString loaderName() const =0;
virtual bool load(PROG &prog,QFile &fp)=0;
};
struct ComLoader : public DosLoader {
virtual ~ComLoader() {}
bool canLoad(QFile &fp) override;
bool load(PROG &prog,QFile &fp) override;
QString loaderName() const override { return "16-bit DOS - COM loader"; }
};
struct ExeLoader : public DosLoader {
virtual ~ExeLoader() {}
bool canLoad(QFile &fp) override;
bool load(PROG &prog,QFile &fp) override;
QString loaderName() const override { return "16-bit DOS - EXE loader"; }
};
#endif // LOADERS_H

22
src/MemoryChunk.cpp Normal file
View File

@@ -0,0 +1,22 @@
#include "MemoryChunk.h"
#include <boost/icl/interval.hpp>
#include <boost/icl/right_open_interval.hpp>
#include <boost/icl/left_open_interval.hpp>
#include <boost/icl/closed_interval.hpp>
#include <boost/icl/open_interval.hpp>
using namespace boost::icl;
MemoryChunk::MemoryChunk(LinearAddress start, LinearAddress fin) : m_start(start),m_fin(fin)
{
}
bool MemoryChunk::contains(LinearAddress addr) const
{
return addr>=m_start && addr<m_fin;
}
uint64_t MemoryChunk::size() const
{
return m_fin-m_start;
}

24
src/MemoryChunk.h Normal file
View File

@@ -0,0 +1,24 @@
#ifndef BYTECHUNK_H
#define BYTECHUNK_H
#include "Address.h"
#include <utility>
#include <inttypes.h>
/**
* @brief The MemoryChunk class represents a continuous range of Addresses
*/
class MemoryChunk
{
private:
LinearAddress m_start;
LinearAddress m_fin;
public:
MemoryChunk(LinearAddress start,LinearAddress fin);
bool contains(LinearAddress addr) const;
uint64_t size() const;
std::pair<LinearAddress,LinearAddress> bounds() const { return std::make_pair(m_start,m_fin); }
};
#endif // BYTECHUNK_H

5
src/MemorySegment.cpp Normal file
View File

@@ -0,0 +1,5 @@
#include "MemorySegment.h"
MemorySegment::MemorySegment(LinearAddress base, LinearAddress start, LinearAddress fin) : MemoryChunk(start,fin) {
m_base = base;
}

19
src/MemorySegment.h Normal file
View File

@@ -0,0 +1,19 @@
#pragma once
#include "MemoryChunk.h"
#include <QtCore/QString>
/**
* @brief The MemorySegment represents a single chunk of memory with additional properties.
*/
class MemorySegment : public MemoryChunk
{
uint16_t m_base;
int m_flags;
QString m_name;
public:
MemorySegment(LinearAddress base,LinearAddress start,LinearAddress fin);
const QString &getName() const { return m_name; }
void setName(const QString &v) { m_name = v; }
};

View File

@@ -0,0 +1,54 @@
#include "MemorySegmentCoordinator.h"
#include <boost/icl/interval_map.hpp>
#include <boost/icl/split_interval_map.hpp>
#include <utility>
using namespace boost::icl;
class MemorySegmentCoordinatorImpl {
boost::icl::interval_map<LinearAddress,SegmentHolder> m_segmentation_map;
public:
bool addSegment(LinearAddress base, LinearAddress start, LinearAddress fin, const char * name, int flags) {
if(start>fin)
return false;
if(start<base)
return false;
MemorySegment *seg = new MemorySegment(base,start,fin);
seg->setName(name);
//
auto segment_bounds(seg->bounds());
m_segmentation_map.add(std::make_pair(
interval<LinearAddress>::right_open(segment_bounds.first,segment_bounds.second),
seg)
);
return true;
}
uint32_t numberOfSegments() const { return interval_count(m_segmentation_map); }
const MemorySegment *get(LinearAddress addr) {
auto iter = m_segmentation_map.find(addr);
if(iter==m_segmentation_map.end()) {
return nullptr;
}
return iter->second;
}
};
MemorySegmentCoordinator::MemorySegmentCoordinator()
{
m_impl = new MemorySegmentCoordinatorImpl;
}
bool MemorySegmentCoordinator::addSegment(LinearAddress base, LinearAddress start, LinearAddress fin, const char * name, int flags)
{
return m_impl->addSegment(base,start,fin,name,flags);
}
uint32_t MemorySegmentCoordinator::size()
{
return m_impl->numberOfSegments();
}
MemorySegment *MemorySegmentCoordinator::getSegment(LinearAddress addr)
{
return const_cast<MemorySegment *>(m_impl->get(addr));
}

View File

@@ -0,0 +1,34 @@
#pragma once
#include "MemorySegment.h"
struct SegmentHolder {
SegmentHolder() : val(nullptr) {}
SegmentHolder(MemorySegment *inf) : val(inf) {}
MemorySegment *operator->() { return val;}
MemorySegment &operator*() const { return *val;}
operator MemorySegment *() { return val;}
operator const MemorySegment *() const { return val;}
SegmentHolder operator+=(const SegmentHolder &/*s*/) {
throw std::runtime_error("Cannot aggregate MemorySegments !");
}
MemorySegment *val;
};
/**
* @brief The MemorySegmentCoordinator class is responsible for:
* - Managing the lifetime of MemorySegments
* - Providing convenience functions for querying the segment-related data
*/
class MemorySegmentCoordinator
{
class MemorySegmentCoordinatorImpl *m_impl;
public:
MemorySegmentCoordinator();
bool addSegment(LinearAddress base,LinearAddress start,LinearAddress fin,const char *name,int flags);
uint32_t size();
MemorySegment *getSegment(LinearAddress addr);
};

View File

@@ -1,6 +1,12 @@
#include "Procedure.h"
#include "msvc_fixes.h"
#include "project.h"
#include "scanner.h"
#include "ui/StructuredTextTarget.h"
#include <QtCore/QDebug>
//FunctionType *Function::getFunctionType() const
//{
// return &m_type;
@@ -14,8 +20,8 @@ void JumpTable::pruneEntries(uint16_t cs)
PROG *prg(Project::get()->binary());
for (uint32_t i = start; i < finish; i += 2)
{
uint32_t target = cs + LH(&prg->Image[i]);
if (target < finish && target >= start)
uint32_t target = cs + LH(&prg->image()[i]);
if (target < finish and target >= start)
finish = target;
else if (target >= (uint32_t)prg->cbImage)
finish = i;
@@ -23,10 +29,109 @@ void JumpTable::pruneEntries(uint16_t cs)
ICODE _Icode; // used as scan input
for (uint32_t i = start; i < finish; i += 2)
{
uint32_t target = cs + LH(&prg->Image[i]);
uint32_t target = cs + LH(&prg->image()[i]);
/* Be wary of 00 00 as code - it's probably data */
if (! (prg->Image[target] || prg->Image[target+1]) || scan(target, _Icode))
if (not (prg->image()[target] or prg->image()[target+1]) or scan(target, _Icode))
finish = i;
}
}
void Function::callingConv(CConv::CC_Type v) {
type->setCallingConvention(v);
getFunctionType()->m_call_conv->calculateStackLayout(this);
}
static QString sizeToPtrName(int size)
{
switch(size)
{
case 1:
return "BYTE ptr" ;
case 2:
return "WORD ptr";
case 4:
return "DWORD ptr";
}
return "UNKOWN ptr";
}
static void toStructuredText(STKFRAME &stk,IStructuredTextTarget *out, int level) {
int curlevel = 0;
int maxlevel = stk.m_maxOff - stk.m_minOff;
for(STKSYM & p : stk)
{
if (curlevel > p.label)
{
qWarning() << "error, var collapse!!!";
curlevel = p.label;
}
else if (curlevel < p.label)
{
out->addSpace(4);
out->prtt(QString("gap len = %1").arg(p.label - curlevel,0,16));
curlevel = p.label;
out->addEOL();
}
out->addSpace(4);
out->addTaggedString(XT_Symbol,p.name,&p);
out->prtt("equ");
out->addSpace();
out->prtt(sizeToPtrName(p.size));
out->addSpace();
if (p.arrayMembers>1)
{
out->addTaggedString(XT_Number,QString::number(p.arrayMembers,16));
out->prtt("dup (?)");
out->addSpace();
}
out->TAGbegin(XT_Number, NULL);
out->prtt(QString("%1h").arg(p.label,0,16));
out->TAGend(XT_Number);
out->addEOL();
curlevel += p.size * p.arrayMembers;
}
if (curlevel < maxlevel)
{
out->prtt(QString(" gap len = %1h").arg(maxlevel - curlevel,0,16));
}
}
extern void toStructuredText(LLInst *insn,IStructuredTextTarget *out, int level);
static void toStructuredText(ICODE &stk,IStructuredTextTarget *out, int level) {
if(level==0) {
toStructuredText(stk.ll(),out,level);
}
}
void Function::toStructuredText(IStructuredTextTarget *out, int level)
{
out->TAGbegin(XT_Function, this);
out->addTaggedString(XT_FuncName,name);
out->prtt(" proc");
out->addEOL();
::toStructuredText(args,out,level);
out->addEOL();
for(ICODE &ic : Icode) {
::toStructuredText(ic,out,level);
}
out->addTaggedString(XT_FuncName,name);
out->addSpace();
out->prtt("endp");
out->addEOL();
out->TAGend(XT_Function);
}
void FunctionType::setCallingConvention(CConv::CC_Type cc)
{
m_call_conv=CConv::create(cc);
assert(m_call_conv);
}
void Function::switchState(DecompilationStep s)
{
nStep = s;
}

122
src/RegisterNode.cpp Normal file
View File

@@ -0,0 +1,122 @@
#include "types.h"
#include "msvc_fixes.h"
#include "ast.h"
#include "bundle.h"
#include "machine_x86.h"
#include "project.h"
#include <stdint.h>
#include <string>
#include <sstream>
#include <iostream>
#include <cassert>
#include <boost/range/adaptor/filtered.hpp>
#include <boost/range.hpp>
//#include <boost/range/algorithm.hpp>
//#include <boost/assign.hpp>
using namespace std;
using namespace boost::adaptors;
RegisterNode::RegisterNode(const LLOperand &op, LOCAL_ID *locsym)
{
m_syms = locsym;
ident.type(REGISTER);
hlType type_sel;
regType reg_type;
if (op.byteWidth()==1)
{
type_sel = TYPE_BYTE_SIGN;
reg_type = BYTE_REG;
}
else /* uint16_t */
{
type_sel = TYPE_WORD_SIGN;
reg_type = WORD_REG;
}
regiIdx = locsym->newByteWordReg(type_sel, op.regi);
regiType = reg_type;
}
//RegisterNode::RegisterNode(eReg regi, uint32_t icodeFlg, LOCAL_ID *locsym)
//{
// ident.type(REGISTER);
// hlType type_sel;
// regType reg_type;
// if ((icodeFlg & B) or (icodeFlg & SRC_B))
// {
// type_sel = TYPE_BYTE_SIGN;
// reg_type = BYTE_REG;
// }
// else /* uint16_t */
// {
// type_sel = TYPE_WORD_SIGN;
// reg_type = WORD_REG;
// }
// regiIdx = locsym->newByteWordReg(type_sel, regi);
// regiType = reg_type;
//}
QString RegisterNode::walkCondExpr(Function *pProc, int *numLoc) const
{
QString codeOut;
QString o;
assert(&pProc->localId==m_syms);
ID *id = &pProc->localId.id_arr[regiIdx];
if (id->name[0] == '\0') /* no name */
{
id->setLocalName(++(*numLoc));
codeOut += QString("%1 %2; ").arg(TypeContainer::typeName(id->type)).arg(id->name);
codeOut += QString("/* %1 */\n").arg(Machine_X86::regName(id->id.regi));
}
if (id->hasMacro)
o += QString("%1(%2)").arg(id->macro).arg(id->name);
else
o += id->name;
cCode.appendDecl(codeOut);
return o;
}
int RegisterNode::hlTypeSize(Function *) const
{
if (regiType == BYTE_REG)
return 1;
else
return 2;
}
hlType RegisterNode::expType(Function *pproc) const
{
if (regiType == BYTE_REG)
return TYPE_BYTE_SIGN;
else
return TYPE_WORD_SIGN;
}
Expr *RegisterNode::insertSubTreeReg(Expr *_expr, eReg regi, const LOCAL_ID *locsym)
{
assert(locsym==m_syms);
eReg treeReg = locsym->id_arr[regiIdx].id.regi;
if (treeReg == regi) /* uint16_t reg */
{
return _expr;
}
else if(Machine_X86::isSubRegisterOf(treeReg,regi)) /* uint16_t/uint8_t reg */
{
return _expr;
}
return nullptr;
}
bool RegisterNode::xClear(rICODE range_to_check, iICODE lastBBinst, const LOCAL_ID &locId)
{
uint8_t regi = locId.id_arr[regiIdx].id.regi;
range_to_check.advance_begin(1);
auto all_valid_and_high_level_after_start = range_to_check | filtered(ICODE::select_valid_high_level);
for (ICODE &i : all_valid_and_high_level_after_start)
if (i.du.def.testRegAndSubregs(regi))
return false;
if (all_valid_and_high_level_after_start.end().base() != lastBBinst)
return true;
return false;
}

File diff suppressed because it is too large Load Diff

View File

@@ -4,20 +4,34 @@
* Purpose: Back-end module. Generates C code for each procedure.
* (C) Cristina Cifuentes
****************************************************************************/
#include "dcc.h"
#include "msvc_fixes.h"
#include "disassem.h"
#include "project.h"
#include "CallGraph.h"
#include <QtCore/QDir>
#include <QtCore/QFile>
#include <QtCore/QStringList>
#include <QtCore/QDebug>
#include <cassert>
#include <string>
#include <boost/range.hpp>
#include <boost/range/adaptor/filtered.hpp>
#include <boost/range/algorithm.hpp>
#include "dcc.h"
#include "disassem.h"
#include <fstream>
#include <iostream>
#include <sstream>
#include <string.h>
#include <stdio.h>
#include "project.h"
bundle cCode; /* Procedure declaration and code */
using namespace boost;
using namespace boost::adaptors;
using namespace std;
bundle cCode; /* Procedure declaration and code */
/* Returns a unique index to the next label */
int getNextLabel()
{
@@ -29,14 +43,14 @@ int getNextLabel()
/* displays statistics on the subroutine */
void Function::displayStats ()
{
printf("\nStatistics - Subroutine %s\n", name.c_str());
printf ("Number of Icode instructions:\n");
printf (" Low-level : %4d\n", stats.numLLIcode);
if (! (flg & PROC_ASM))
qDebug() << "\nStatistics - Subroutine" << name;
qDebug() << "Number of Icode instructions:";
qDebug() << " Low-level :" << stats.numLLIcode;
if (not (flg & PROC_ASM))
{
printf (" High-level: %4d\n", stats.numHLIcode);
printf (" Percentage reduction: %2.2f%%\n", 100.0 - (stats.numHLIcode *
100.0) / stats.numLLIcode);
qDebug() << " High-level:"<<stats.numHLIcode;
qDebug() << QString(" Percentage reduction: %1%%").arg(100.0 - (stats.numHLIcode *
100.0) / stats.numLLIcode,4,'f',2,QChar('0'));
}
}
@@ -94,7 +108,7 @@ char *cChar (uint8_t c)
* Note: to get to the value of the variable:
* com file: prog.Image[operand]
* exe file: prog.Image[operand+0x100] */
static void printGlobVar (std::ostream &ostr,SYM * psym)
static void printGlobVar (QTextStream &ostr,SYM * psym)
{
int j;
PROG &prog(Project::get()->prog);
@@ -103,24 +117,24 @@ static void printGlobVar (std::ostream &ostr,SYM * psym)
switch (psym->size)
{
case 1:
ostr << "uint8_t\t"<<psym->name<<" = "<<prog.Image[relocOp]<<";\n";
ostr << "uint8_t\t"<<psym->name<<" = "<<prog.image()[relocOp]<<";\n";
break;
case 2:
ostr << "uint16_t\t"<<psym->name<<" = "<<LH(prog.Image+relocOp)<<";\n";
ostr << "uint16_t\t"<<psym->name<<" = "<<LH(prog.image()+relocOp)<<";\n";
break;
case 4: if (psym->type == TYPE_PTR) /* pointer */
ostr << "uint16_t *\t"<<psym->name<<" = "<<LH(prog.Image+relocOp)<<";\n";
ostr << "uint16_t *\t"<<psym->name<<" = "<<LH(prog.image()+relocOp)<<";\n";
else /* char */
ostr << "char\t"<<psym->name<<"[4] = \""<<
prog.Image[relocOp]<<prog.Image[relocOp+1]<<
prog.Image[relocOp+2]<<prog.Image[relocOp+3]<<";\n";
prog.image()[relocOp]<<prog.image()[relocOp+1]<<
prog.image()[relocOp+2]<<prog.image()[relocOp+3]<<";\n";
break;
default:
{
ostringstream strContents;
QString strContents;
for (j=0; j < psym->size; j++)
strContents << cChar(prog.Image[relocOp + j]);
ostr << "char\t*"<<psym->name<<" = \""<<strContents.str()<<"\";\n";
strContents += cChar(prog.image()[relocOp + j]);
ostr << "char\t*"<<psym->name<<" = \""<<strContents<<"\";\n";
}
}
}
@@ -131,7 +145,8 @@ static void printGlobVar (std::ostream &ostr,SYM * psym)
* initialization. */
void Project::writeGlobSymTable()
{
std::ostringstream ostr;
QString contents;
QTextStream ostr(&contents);
if (symtab.empty())
return;
@@ -155,19 +170,20 @@ void Project::writeGlobSymTable()
}
}
ostr<< "\n";
cCode.appendDecl( ostr.str() );
ostr.flush();
cCode.appendDecl( contents );
}
/* Writes the header information and global variables to the output C file
* fp. */
static void writeHeader (std::ostream &_ios, char *fileName)
static void writeHeader (QIODevice &_ios, const std::string &fileName)
{
PROG &prog(Project::get()->prog);
/* Write header information */
cCode.init();
cCode.appendDecl( "/*\n");
cCode.appendDecl( " * Input file\t: %s\n", fileName);
cCode.appendDecl( " * Input file\t: %s\n", fileName.c_str());
cCode.appendDecl( " * File type\t: %s\n", (prog.fCOM)?"COM":"EXE");
cCode.appendDecl( " */\n\n#include \"dcc.h\"\n\n");
@@ -178,11 +194,12 @@ static void writeHeader (std::ostream &_ios, char *fileName)
}
// Note: Not currently called!
/* Checks the given icode to determine whether it has a label associated
/** Checks the given icode to determine whether it has a label associated
* to it. If so, a goto is emitted to this label; otherwise, a new label
* is created and a goto is also emitted.
* Note: this procedure is to be used when the label is to be forward on
* the code; that is, the target code has not been traversed yet. */
#if 0
static void emitFwdGotoLabel (ICODE * pt, int indLevel)
{
if ( not pt->ll()->testFlags(HLL_LABEL)) /* node hasn't got a lab */
@@ -193,14 +210,15 @@ static void emitFwdGotoLabel (ICODE * pt, int indLevel)
}
cCode.appendCode( "%sgoto l%ld;\n", indentStr(indLevel), pt->ll()->hllLabNum);
}
#endif
/* Writes the procedure's declaration (including arguments), local variables,
* and invokes the procedure that writes the code of the given record *hli */
void Function::codeGen (std::ostream &fs)
void Function::codeGen (QIODevice &fs)
{
int numLoc;
ostringstream ostr;
QString ostr_contents;
QTextStream ostr(&ostr_contents);
//STKFRAME * args; /* Procedure arguments */
//char buf[200], /* Procedure's definition */
// arg[30]; /* One argument */
@@ -208,27 +226,28 @@ void Function::codeGen (std::ostream &fs)
/* Write procedure/function header */
cCode.init();
if (flg & PROC_IS_FUNC) /* Function */
ostr<< "\n"<<TypeContainer::typeName(retVal.type)<<" "<<name<<" (";
if (getReturnType() != TYPE_UNKNOWN) /* Function */
ostr << QString("\n%1 %2 (").arg(TypeContainer::typeName(getReturnType())).arg(name);
else /* Procedure */
ostr<< "\nvoid "<<name<<" (";
ostr << "\nvoid "+name+" (";
/* Write arguments */
for (size_t i = 0; i < args.size(); i++)
struct validArg
{
if ( args[i].invalid )
continue;
ostr<<hlTypes[args[i].type]<<" "<<args[i].name;
if (i < (args.size() - 1))
ostr<<", ";
bool operator()(STKSYM &s) { return s.invalid==false;}
};
QStringList parts;
for (STKSYM &arg : (args | filtered(validArg())))
{
parts << QString("%1 %2").arg(hlTypes[arg.type]).arg(arg.name);
}
ostr<<")\n";
ostr << parts.join(", ")+")\n";
/* Write comments */
writeProcComments( ostr );
/* Write local variables */
if (! (flg & PROC_ASM))
if (not (flg & PROC_ASM))
{
numLoc = 0;
for (ID &refId : localId )
@@ -239,8 +258,8 @@ void Function::codeGen (std::ostream &fs)
if (refId.loc == REG_FRAME)
{
/* Register variables are assigned to a local variable */
if (((flg & SI_REGVAR) && (refId.id.regi == rSI)) ||
((flg & DI_REGVAR) && (refId.id.regi == rDI)))
if (((flg & SI_REGVAR) and (refId.id.regi == rSI)) or
((flg & DI_REGVAR) and (refId.id.regi == rDI)))
{
refId.setLocalName(++numLoc);
ostr << "int "<<refId.name<<";\n";
@@ -256,16 +275,18 @@ void Function::codeGen (std::ostream &fs)
}
}
}
fs<<ostr.str();
ostr.flush();
fs.write(ostr_contents.toLatin1());
/* Write procedure's code */
if (flg & PROC_ASM) /* generate assembler */
{
Disassembler ds(3);
ds.disassem(this);
ds.disassem(this->shared_from_this());
}
else /* generate C */
{
m_cfg.front()->writeCode (1, this, &numLoc, MAX, UN_INIT);
m_actual_cfg.front()->writeCode (1, this, &numLoc, MAX, UN_INIT);
}
cCode.appendCode( "}\n\n");
@@ -273,44 +294,49 @@ void Function::codeGen (std::ostream &fs)
freeBundle (&cCode);
/* Write Live register analysis information */
if (option.verbose)
if (option.verbose) {
QString debug_contents;
QTextStream debug_stream(&debug_contents);
for (size_t i = 0; i < numBBs; i++)
{
pBB = m_dfsLast[i];
if (pBB->flg & INVALID_BB) continue; /* skip invalid BBs */
cout << "BB "<<i<<"\n";
cout << " Start = "<<pBB->begin()->loc_ip;
cout << ", end = "<<pBB->begin()->loc_ip+pBB->size()<<"\n";
cout << " LiveUse = ";
Machine_X86::writeRegVector(cout,pBB->liveUse);
cout << "\n Def = ";
Machine_X86::writeRegVector(cout,pBB->def);
cout << "\n LiveOut = ";
Machine_X86::writeRegVector(cout,pBB->liveOut);
cout << "\n LiveIn = ";
Machine_X86::writeRegVector(cout,pBB->liveIn);
cout <<"\n\n";
debug_stream << "BB "<<i<<"\n";
debug_stream << " Start = "<<pBB->begin()->loc_ip;
debug_stream << ", end = "<<pBB->begin()->loc_ip+pBB->size()<<"\n";
debug_stream << " LiveUse = ";
Machine_X86::writeRegVector(debug_stream,pBB->liveUse);
debug_stream << "\n Def = ";
Machine_X86::writeRegVector(debug_stream,pBB->def);
debug_stream << "\n LiveOut = ";
Machine_X86::writeRegVector(debug_stream,pBB->liveOut);
debug_stream << "\n LiveIn = ";
Machine_X86::writeRegVector(debug_stream,pBB->liveIn);
debug_stream <<"\n\n";
}
debug_stream.flush();
qDebug() << debug_contents.toLatin1();
}
}
/* Recursive procedure. Displays the procedure's code in depth-first order
* of the call graph. */
static void backBackEnd (char *filename, CALL_GRAPH * pcallGraph, std::ostream &_ios)
static void backBackEnd (CALL_GRAPH * pcallGraph, QIODevice &_ios)
{
// IFace.Yield(); /* This is a good place to yield to other apps */
/* Check if this procedure has been processed already */
if ((pcallGraph->proc->flg & PROC_OUTPUT) ||
if ((pcallGraph->proc->flg & PROC_OUTPUT) or
(pcallGraph->proc->flg & PROC_ISLIB))
return;
pcallGraph->proc->flg |= PROC_OUTPUT;
/* Dfs if this procedure has any successors */
for (size_t i = 0; i < pcallGraph->outEdges.size(); i++)
for (auto & elem : pcallGraph->outEdges)
{
backBackEnd (filename, pcallGraph->outEdges[i], _ios);
backBackEnd (elem, _ios);
}
/* Generate code for this procedure */
@@ -321,7 +347,7 @@ static void backBackEnd (char *filename, CALL_GRAPH * pcallGraph, std::ostream &
/* Generate statistics */
if (option.Stats)
pcallGraph->proc->displayStats ();
if (! (pcallGraph->proc->flg & PROC_ASM))
if (not (pcallGraph->proc->flg & PROC_ASM))
{
stats.totalLL += stats.numLLIcode;
stats.totalHL += stats.numHLIcode;
@@ -330,33 +356,31 @@ static void backBackEnd (char *filename, CALL_GRAPH * pcallGraph, std::ostream &
/* Invokes the necessary routines to produce code one procedure at a time. */
void BackEnd (char *fileName, CALL_GRAPH * pcallGraph)
void BackEnd(CALL_GRAPH * pcallGraph)
{
std::ofstream fs; /* Output C file */
/* Get output file name */
std::string outNam(fileName);
outNam = outNam.substr(0,outNam.rfind("."))+".b"; /* b for beta */
QString outNam(Project::get()->output_name("b")); /* b for beta */
QFile fs(outNam); /* Output C file */
/* Open output file */
fs.open(outNam);
if(!fs.is_open())
fatalError (CANNOT_OPEN, outNam.c_str());
printf ("dcc: Writing C beta file %s\n", outNam.c_str());
if(not fs.open(QFile::WriteOnly|QFile::Text))
fatalError (CANNOT_OPEN, outNam.toStdString().c_str());
qDebug()<<"dcc: Writing C beta file"<<outNam;
/* Header information */
writeHeader (fs, fileName);
writeHeader (fs, option.filename.toStdString());
/* Initialize total Icode instructions statistics */
stats.totalLL = 0;
stats.totalHL = 0;
/* Process each procedure at a time */
backBackEnd (fileName, pcallGraph, fs);
backBackEnd (pcallGraph, fs);
/* Close output file */
fs.close();
printf ("dcc: Finished writing C beta file\n");
qDebug() << "dcc: Finished writing C beta file";
}

View File

@@ -10,7 +10,7 @@
#include <memory.h>
#include <stdlib.h>
#include <string.h>
#include <QtCore/QIODevice>
#define deltaProcLines 20
using namespace std;
@@ -21,26 +21,26 @@ using namespace std;
* tab is removed and replaced by this label */
void strTable::addLabelBundle (int idx, int label)
{
char s[16];
sprintf (s, "l%d: ", label);
if(at(idx).size()<4)
at(idx)=s;
QString &processedLine(at(idx));
QString s = QString("l%1: ").arg(label);
if(processedLine.size()<4)
processedLine = s;
else
at(idx) = string(s)+at(idx).substr(4);
processedLine = s+processedLine.mid(4);
}
/* Writes the contents of the string table on the file fp. */
static void writeStrTab (std::ostream &ios, strTable &strTab)
static void writeStrTab (QIODevice &ios, strTable &strTab)
{
for (size_t i = 0; i < strTab.size(); i++)
ios << strTab[i];
ios.write(strTab[i].toLatin1());
}
/* Writes the contents of the bundle (procedure code and declaration) to
* a file. */
void writeBundle (std::ostream &ios, bundle procCode)
void writeBundle (QIODevice &ios, bundle procCode)
{
writeStrTab (ios, procCode.decl);
writeStrTab (ios, procCode.code);
@@ -70,7 +70,7 @@ void bundle::appendCode(const char *format,...)
code.push_back(buf);
va_end (args);
}
void bundle::appendCode(const std::string &s)
void bundle::appendCode(const QString & s)
{
code.push_back(s);
}
@@ -85,7 +85,7 @@ void bundle::appendDecl(const char *format,...)
va_end (args);
}
void bundle::appendDecl(const std::string &v)
void bundle::appendDecl(const QString &v)
{
decl.push_back(v);
}

File diff suppressed because it is too large Load Diff

51
src/chklib.h Normal file
View File

@@ -0,0 +1,51 @@
#ifndef CHKLIB_H
#define CHKLIB_H
#include "Command.h"
#include "Enums.h"
#include "perfhlib.h"
#include <QtCore/QFile>
#include <QtCore/QString>
#include <vector>
class Function;
// This will create a PatternLocator instance load it and pass it to project instance.
struct LoadPatternLibrary : public Command {
LoadPatternLibrary() : Command("Load patterns for the file",eProject) {}
bool execute(CommandContext *ctx) override;
};
class PatternLocator {
std::vector<hlType> pArg; /* Points to the array of param types */
QString pattern_id;
int numFunc=0; /* Number of func names actually stored */
int numArg=0; /* Number of param names actually stored */
public:
struct HT * ht =nullptr; //!< The hash table
struct PH_FUNC_STRUCT * pFunc=nullptr; //!< Points to the array of func names
PatternLocator(QString name) : pattern_id(name) {}
~PatternLocator();
bool load();
int searchPList(const char * name);
bool LibCheck(Function & pProc);
private:
bool readProtoFile();
PerfectHash g_pattern_hasher;
int numKeys=0; /* Number of hash table entries (keys) */
int numVert=0; /* Number of vertices in the graph (also size of g[]) */
unsigned PatLen=0; /* Size of the keys (pattern length) */
unsigned SymLen=0; /* Max size of the symbols, including null */
/* Pointers to start of T1, T2 */
uint16_t * T1base = nullptr;
uint16_t * T2base = nullptr;
uint16_t * g = nullptr; /* g[] */
};
extern bool checkStartup(struct STATE &state);
#endif // CHKLIB_H

View File

@@ -7,9 +7,12 @@
****************************************************************************/
#include "dcc.h"
#include "msvc_fixes.h"
#include "machine_x86.h"
#include <string.h>
#include <sstream>
#include <QTextStream>
using namespace std;
#define intSize 40
@@ -148,21 +151,21 @@ static const char *intOthers[] = {
/* Writes the description of the current interrupt. Appends it to the
* string s. */
void LLInst::writeIntComment (std::ostringstream &s)
void LLInst::writeIntComment (QTextStream &s)
{
uint32_t src_immed=src().getImm2();
s<<"\t/* ";
if (src_immed == 0x21)
{
s <<int21h[dst.off];
s <<int21h[m_dst.off];
}
else if (src_immed > 0x1F && src_immed < 0x2F)
else if (src_immed > 0x1F and src_immed < 0x2F)
{
s <<intOthers[src_immed - 0x20];
}
else if (src_immed == 0x2F)
{
switch (dst.off)
switch (m_dst.off)
{
case 0x01 :
s << "Print spooler";
@@ -186,12 +189,15 @@ void LLInst::writeIntComment (std::ostringstream &s)
//, &cCode.decl
void Function::writeProcComments()
{
std::ostringstream ostr;
QString dest_str;
{
QTextStream ostr(&dest_str);
writeProcComments(ostr);
cCode.appendDecl(ostr.str());
}
cCode.appendDecl(dest_str);
}
void Function::writeProcComments(std::ostream &ostr)
void Function::writeProcComments(QTextStream &ostr)
{
int i;
ID *id; /* Pointer to register argument identifier */
@@ -207,16 +213,16 @@ void Function::writeProcComments(std::ostream &ostr)
{
psym = &this->args[i];
ostr << " * "<<psym->name<<" = ";
if (psym->regs->expr.ident.idType == REGISTER)
if (psym->regs->ident.type() == REGISTER)
{
id = &this->localId.id_arr[psym->regs->expr.ident.idNode.regiIdx];
id = &this->localId.id_arr[((RegisterNode *)psym->regs)->regiIdx];
ostr << Machine_X86::regName(id->id.regi);
}
else /* long register */
{
id = &this->localId.id_arr[psym->regs->expr.ident.idNode.longIdx];
ostr << Machine_X86::regName(id->id.longId.h) << ":";
ostr << Machine_X86::regName(id->id.longId.l);
id = &this->localId.id_arr[psym->regs->ident.idNode.longIdx];
ostr << Machine_X86::regName(id->longId().h()) << ":";
ostr << Machine_X86::regName(id->longId().l());
}
ostr << ".\n";
@@ -233,9 +239,9 @@ void Function::writeProcComments(std::ostream &ostr)
if (this->flg & PROC_ASM)
{
ostr << " * Untranslatable routine. Assembler provided.\n";
if (this->flg & PROC_IS_FUNC)
switch (this->retVal.type) { // TODO: Functions return value in various regs
case TYPE_BYTE_SIGN: case TYPE_BYTE_UNSIGN:
switch (getReturnType()) { // TODO: Functions return value in various regs
case TYPE_BYTE_SIGN:
case TYPE_BYTE_UNSIGN:
ostr << " * Return value in register al.\n";
break;
case TYPE_WORD_SIGN: case TYPE_WORD_UNSIGN:
@@ -244,17 +250,17 @@ void Function::writeProcComments(std::ostream &ostr)
case TYPE_LONG_SIGN: case TYPE_LONG_UNSIGN:
ostr << " * Return value in registers dx:ax.\n";
break;
case TYPE_UNKNOWN:
// void return type
break;
default:
fprintf(stderr,"Unknown retval type %d",getReturnType());
break;
} /* eos */
}
/* Calling convention */
if (this->flg & CALL_PASCAL)
ostr << " * Pascal calling convention.\n";
else if (this->flg & CALL_C)
ostr << " * C calling convention.\n";
else if (this->flg & CALL_UNKNOWN)
ostr << " * Unknown calling convention.\n";
callingConv()->writeComments(ostr);
/* Other flags */
if (this->flg & (PROC_BADINST | PROC_IJMP))
{

View File

@@ -2,6 +2,10 @@
* Description : Performs control flow analysis on the CFG
* (C) Cristina Cifuentes
********************************************************************/
#include "dcc.h"
#include "msvc_fixes.h"
#include <boost/range/algorithm.hpp>
#include <algorithm>
#include <list>
@@ -10,15 +14,10 @@
#include <string.h>
#include <malloc.h>
#include "dcc.h"
//typedef struct list {
// int nodeIdx;
// struct list *next;
//} nodeList;
typedef std::list<int> nodeList; /* dfsLast index to the node */
#define ancestor(a,b) ((a->dfsLastNum < b->dfsLastNum) && (a->dfsFirstNum < b->dfsFirstNum))
#define ancestor(a,b) ((a->dfsLastNum < b->dfsLastNum) and (a->dfsFirstNum < b->dfsFirstNum))
/* there is a path on the DFST from a to b if the a was first visited in a
* dfs, and a was later visited than b when doing the last visit of each
* node. */
@@ -48,7 +47,7 @@ static int commonDom (int currImmDom, int predImmDom, Function * pProc)
if (predImmDom == NO_DOM) /* predecessor is the root */
return (currImmDom);
while ((currImmDom != NO_DOM) && (predImmDom != NO_DOM) &&
while ((currImmDom != NO_DOM) and (predImmDom != NO_DOM) and
(currImmDom != predImmDom))
{
if (currImmDom < predImmDom)
@@ -105,7 +104,7 @@ static void freeList (nodeList &l)
/* Returns whether the node n belongs to the queue list q. */
static boolT inInt(BB * n, queue &q)
static bool inInt(BB * n, queue &q)
{
return std::find(q.begin(),q.end(),n)!=q.end();
}
@@ -121,7 +120,7 @@ static void findEndlessFollow (Function * pProc, nodeList &loopNodes, BB * head)
for (TYPEADR_TYPE &typeaddr: pProc->m_dfsLast[loop_node]->edges)
{
int succ = typeaddr.BBptr->dfsLastNum;
if ((! inList(loopNodes, succ)) && (succ < head->loopFollow))
if ((not inList(loopNodes, succ)) and (succ < head->loopFollow))
head->loopFollow = succ;
}
}
@@ -149,7 +148,7 @@ static void findNodesInLoop(BB * latchNode,BB * head,Function * pProc,queue &int
continue;
immedDom = pProc->m_dfsLast[i]->immedDom;
if (inList (loopNodes, immedDom) && inInt(pProc->m_dfsLast[i], intNodes))
if (inList (loopNodes, immedDom) and inInt(pProc->m_dfsLast[i], intNodes))
{
insertList (loopNodes, i);
if (pProc->m_dfsLast[i]->loopHead == NO_NODE)/*not in other loop*/
@@ -163,12 +162,12 @@ static void findNodesInLoop(BB * latchNode,BB * head,Function * pProc,queue &int
/* Determine type of loop and follow node */
intNodeType = head->nodeType;
if (latchNode->nodeType == TWO_BRANCH)
if ((intNodeType == TWO_BRANCH) || (latchNode == head))
if ((latchNode == head) ||
(inList (loopNodes, head->edges[THEN].BBptr->dfsLastNum) &&
if ((intNodeType == TWO_BRANCH) or (latchNode == head))
if ((latchNode == head) or
(inList (loopNodes, head->edges[THEN].BBptr->dfsLastNum) and
inList (loopNodes, head->edges[ELSE].BBptr->dfsLastNum)))
{
head->loopType = REPEAT_TYPE;
head->loopType = eNodeHeaderType::REPEAT_TYPE;
if (latchNode->edges[0].BBptr == head)
head->loopFollow = latchNode->edges[ELSE].BBptr->dfsLastNum;
else
@@ -177,7 +176,7 @@ static void findNodesInLoop(BB * latchNode,BB * head,Function * pProc,queue &int
}
else
{
head->loopType = WHILE_TYPE;
head->loopType = eNodeHeaderType::WHILE_TYPE;
if (inList (loopNodes, head->edges[THEN].BBptr->dfsLastNum))
head->loopFollow = head->edges[ELSE].BBptr->dfsLastNum;
else
@@ -186,7 +185,7 @@ static void findNodesInLoop(BB * latchNode,BB * head,Function * pProc,queue &int
}
else /* head = anything besides 2-way, latch = 2-way */
{
head->loopType = REPEAT_TYPE;
head->loopType = eNodeHeaderType::REPEAT_TYPE;
if (latchNode->edges[THEN].BBptr == head)
head->loopFollow = latchNode->edges[ELSE].BBptr->dfsLastNum;
else
@@ -196,12 +195,12 @@ static void findNodesInLoop(BB * latchNode,BB * head,Function * pProc,queue &int
else /* latch = 1-way */
if (latchNode->nodeType == LOOP_NODE)
{
head->loopType = REPEAT_TYPE;
head->loopType = eNodeHeaderType::REPEAT_TYPE;
head->loopFollow = latchNode->edges[0].BBptr->dfsLastNum;
}
else if (intNodeType == TWO_BRANCH)
{
head->loopType = WHILE_TYPE;
head->loopType = eNodeHeaderType::WHILE_TYPE;
pbb = latchNode;
thenDfs = head->edges[THEN].BBptr->dfsLastNum;
elseDfs = head->edges[ELSE].BBptr->dfsLastNum;
@@ -222,7 +221,7 @@ static void findNodesInLoop(BB * latchNode,BB * head,Function * pProc,queue &int
* loop, so it is safer to consider it an endless loop */
if (pbb->dfsLastNum <= head->dfsLastNum)
{
head->loopType = ENDLESS_TYPE;
head->loopType = eNodeHeaderType::ENDLESS_TYPE;
findEndlessFollow (pProc, loopNodes, head);
break;
}
@@ -234,7 +233,7 @@ static void findNodesInLoop(BB * latchNode,BB * head,Function * pProc,queue &int
}
else
{
head->loopType = ENDLESS_TYPE;
head->loopType = eNodeHeaderType::ENDLESS_TYPE;
findEndlessFollow (pProc, loopNodes, head);
}
@@ -276,13 +275,13 @@ void Function::structLoops(derSeq *derivedG)
/* Structure loops */
/* for all derived sequences Gi */
for(derSeq::iterator iter=derivedG->begin(); iter!=derivedG->end(); ++iter)
for(auto & elem : *derivedG)
{
level++;
Ii = iter->Ii;
Ii = elem.Ii;
while (Ii) /* for all intervals Ii of Gi */
{
latchNode = NULL;
latchNode = nullptr;
intNodes.clear();
/* Find interval head (original BB node in G1) and create
@@ -299,12 +298,11 @@ void Function::structLoops(derSeq *derivedG)
for (size_t i = 0; i < intHead->inEdges.size(); i++)
{
pred = intHead->inEdges[i];
if (inInt(pred, intNodes) && isBackEdge(pred, intHead))
if (! latchNode)
latchNode = pred;
else
if (inInt(pred, intNodes) and isBackEdge(pred, intHead))
{
if (pred->dfsLastNum > latchNode->dfsLastNum)
if (nullptr == latchNode)
latchNode = pred;
else if (pred->dfsLastNum > latchNode->dfsLastNum)
latchNode = pred;
}
}
@@ -315,7 +313,7 @@ void Function::structLoops(derSeq *derivedG)
/* Check latching node is at the same nesting level of case
* statements (if any) and that the node doesn't belong to
* another loop. */
if ((latchNode->caseHead == intHead->caseHead) &&
if ((latchNode->caseHead == intHead->caseHead) and
(latchNode->loopHead == NO_NODE))
{
intHead->latchNode = latchNode->dfsLastNum;
@@ -337,9 +335,7 @@ void Function::structLoops(derSeq *derivedG)
* h. Note that h is a case node. */
static bool successor (int s, int h, Function * pProc)
{
BB * header;
header = pProc->m_dfsLast[h];
BB * header = pProc->m_dfsLast[h];
auto iter = std::find_if(header->edges.begin(),
header->edges.end(),
[s](const TYPEADR_TYPE &te)->bool{ return te.BBptr->dfsLastNum == s;});
@@ -352,12 +348,11 @@ static bool successor (int s, int h, Function * pProc)
* case). */
static void tagNodesInCase (BB * pBB, nodeList &l, int head, int tail)
{
int current, /* index to current node */
i;
int current; /* index to current node */
pBB->traversed = DFS_CASE;
current = pBB->dfsLastNum;
if ((current != tail) && (pBB->nodeType != MULTI_BRANCH) && (inList (l, pBB->immedDom)))
if ((current != tail) and (pBB->nodeType != MULTI_BRANCH) and (inList (l, pBB->immedDom)))
{
insertList (l, current);
pBB->caseHead = head;
@@ -374,29 +369,31 @@ static void tagNodesInCase (BB * pBB, nodeList &l, int head, int tail)
* has a case node. */
void Function::structCases()
{
int i, j;
BB * caseHeader; /* case header node */
int exitNode = NO_NODE; /* case exit node */
nodeList caseNodes; /* temporary: list of nodes in case */
/* Linear scan of the nodes in reverse dfsLast order, searching for
* case nodes */
for (i = numBBs - 1; i >= 0; i--)
if (m_dfsLast[i]->nodeType == MULTI_BRANCH)
for (int i = numBBs - 1; i >= 0; i--)
{
caseHeader = m_dfsLast[i];
if ((m_dfsLast[i]->nodeType != MULTI_BRANCH))
continue;
BB * caseHeader = m_dfsLast[i];; /* case header node */
/* Find descendant node which has as immediate predecessor
* the current header node, and is not a successor. */
for (j = i + 2; j < numBBs; j++)
for (size_t j = i + 2; j < numBBs; j++)
{
if ((not successor(j, i, this)) and (m_dfsLast[j]->immedDom == i))
{
if ((!successor(j, i, this)) &&
(m_dfsLast[j]->immedDom == i))
if (exitNode == NO_NODE)
{
exitNode = j;
}
else if (m_dfsLast[exitNode]->inEdges.size() < m_dfsLast[j]->inEdges.size())
exitNode = j;
}
}
m_dfsLast[i]->caseTail = exitNode;
/* Tag nodes that belong to the case by recording the
@@ -431,9 +428,9 @@ static void flagNodes (nodeList &l, int f, Function * pProc)
/* Structures if statements */
void Function::structIfs ()
{
size_t followInEdges; /* Largest # in-edges so far */
int curr, /* Index for linear scan of nodes */
desc, /* Index for descendant */
followInEdges, /* Largest # in-edges so far */
/*desc,*/ /* Index for descendant */
follow; /* Possible follow node */
nodeList domDesc, /* List of nodes dominated by curr */
unresolved /* List of unresolved if nodes */
@@ -448,13 +445,13 @@ void Function::structIfs ()
if (currNode->flg & INVALID_BB) /* Do not process invalid BBs */
continue;
if ((currNode->nodeType == TWO_BRANCH) && (!currNode->back().ll()->testFlags(JX_LOOP)))
if ((currNode->nodeType == TWO_BRANCH) and (not currNode->back().ll()->testFlags(JX_LOOP)))
{
followInEdges = 0;
follow = 0;
/* Find all nodes that have this node as immediate dominator */
for (desc = curr+1; desc < numBBs; desc++)
for (size_t desc = curr+1; desc < numBBs; desc++)
{
if (m_dfsLast[desc]->immedDom == curr)
{
@@ -470,10 +467,10 @@ void Function::structIfs ()
/* Determine follow according to number of descendants
* immediately dominated by this node */
if ((follow != 0) && (followInEdges > 1))
if ((follow != 0) and (followInEdges > 1))
{
currNode->ifFollow = follow;
if (!unresolved.empty())
if (not unresolved.empty())
flagNodes (unresolved, follow, this);
}
else
@@ -507,14 +504,14 @@ void Function::replaceInEdge(BB* where, BB* which,BB* with)
}
bool Function::Case_notX_or_Y(BB* pbb, BB* thenBB, BB* elseBB)
{
HLTYPE &hl1(*pbb->back().hl());
HLTYPE &hl2(*thenBB->back().hl());
HLTYPE &hl1(*pbb->back().hlU());
HLTYPE &hl2(*thenBB->back().hlU());
BB* obb = elseBB->edges[THEN].BBptr;
/* Construct compound DBL_OR expression */
hl1.replaceExpr(hl1.expr()->inverse());
hl1.expr(COND_EXPR::boolOp (hl1.expr(), hl2.expr(), DBL_OR));
hl1.expr(BinaryOperator::Create(DBL_OR,hl1.expr(), hl2.expr()));
/* Replace in-edge to obb from e to pbb */
replaceInEdge(obb,elseBB,pbb);
@@ -528,12 +525,14 @@ bool Function::Case_notX_or_Y(BB* pbb, BB* thenBB, BB* elseBB)
}
bool Function::Case_X_and_Y(BB* pbb, BB* thenBB, BB* elseBB)
{
HLTYPE &hl1(*pbb->back().hl());
HLTYPE &hl2(*thenBB->back().hl());
HLTYPE &hl1(*pbb->back().hlU());
HLTYPE &hl2(*thenBB->back().hlU());
BB* obb = elseBB->edges[ELSE].BBptr;
Expr * hl2_expr = hl2.getMyExpr();
/* Construct compound DBL_AND expression */
hl1.expr(COND_EXPR::boolOp (hl1.expr(),hl2.expr(), DBL_AND));
assert(hl1.expr());
assert(hl2_expr);
hl1.expr(BinaryOperator::Create(DBL_AND,hl1.expr(),hl2_expr));
/* Replace in-edge to obb from e to pbb */
replaceInEdge(obb,elseBB,pbb);
@@ -547,15 +546,15 @@ bool Function::Case_X_and_Y(BB* pbb, BB* thenBB, BB* elseBB)
bool Function::Case_notX_and_Y(BB* pbb, BB* thenBB, BB* elseBB)
{
HLTYPE &hl1(*pbb->back().hl());
HLTYPE &hl2(*thenBB->back().hl());
HLTYPE &hl1(*pbb->back().hlU());
HLTYPE &hl2(*thenBB->back().hlU());
BB* obb = thenBB->edges[ELSE].BBptr;
/* Construct compound DBL_AND expression */
hl1.replaceExpr(hl1.expr()->inverse());
hl1.expr(COND_EXPR::boolOp (hl1.expr(), hl2.expr(), DBL_AND));
hl1.expr(BinaryOperator::LogicAnd(hl1.expr(), hl2.expr()));
/* Replace in-edge to obb from t to pbb */
replaceInEdge(obb,thenBB,pbb);
@@ -570,13 +569,13 @@ bool Function::Case_notX_and_Y(BB* pbb, BB* thenBB, BB* elseBB)
bool Function::Case_X_or_Y(BB* pbb, BB* thenBB, BB* elseBB)
{
HLTYPE &hl1(*pbb->back().hl());
HLTYPE &hl2(*thenBB->back().hl());
HLTYPE &hl1(*pbb->back().hlU());
HLTYPE &hl2(*thenBB->back().hlU());
BB * obb = thenBB->edges[THEN].BBptr;
/* Construct compound DBL_OR expression */
hl1.expr(COND_EXPR::boolOp (hl1.expr(), hl2.expr(), DBL_OR));
hl1.expr(BinaryOperator::LogicOr(hl1.expr(), hl2.expr()));
/* Replace in-edge to obb from t to pbb */
@@ -603,7 +602,7 @@ void Function::compoundCond()
/* Traverse nodes in postorder, this way, the header node of a
* compound condition is analysed first */
for (int i = 0; i < this->numBBs; i++)
for (size_t i = 0; i < this->numBBs; i++)
{
pbb = this->m_dfsLast[i];
if (pbb->flg & INVALID_BB)
@@ -617,33 +616,33 @@ void Function::compoundCond()
change = true; //assume change
/* Check (X || Y) case */
if ((thenBB->nodeType == TWO_BRANCH) && (thenBB->numHlIcodes == 1) &&
(thenBB->inEdges.size() == 1) && (thenBB->edges[ELSE].BBptr == elseBB))
/* Check (X or Y) case */
if ((thenBB->nodeType == TWO_BRANCH) and (thenBB->numHlIcodes == 1) and
(thenBB->inEdges.size() == 1) and (thenBB->edges[ELSE].BBptr == elseBB))
{
if(Case_X_or_Y(pbb, thenBB, elseBB))
--i;
}
/* Check (!X && Y) case */
else if ((thenBB->nodeType == TWO_BRANCH) && (thenBB->numHlIcodes == 1) &&
(thenBB->inEdges.size() == 1) && (thenBB->edges[THEN].BBptr == elseBB))
/* Check (not X and Y) case */
else if ((thenBB->nodeType == TWO_BRANCH) and (thenBB->numHlIcodes == 1) and
(thenBB->inEdges.size() == 1) and (thenBB->edges[THEN].BBptr == elseBB))
{
if(Case_notX_and_Y(pbb, thenBB, elseBB))
--i;
}
/* Check (X && Y) case */
else if ((elseBB->nodeType == TWO_BRANCH) && (elseBB->numHlIcodes == 1) &&
(elseBB->inEdges.size()==1) && (elseBB->edges[THEN].BBptr == thenBB))
/* Check (X and Y) case */
else if ((elseBB->nodeType == TWO_BRANCH) and (elseBB->numHlIcodes == 1) and
(elseBB->inEdges.size()==1) and (elseBB->edges[THEN].BBptr == thenBB))
{
if(Case_X_and_Y(pbb, thenBB, elseBB ))
--i;
}
/* Check (!X || Y) case */
else if ((elseBB->nodeType == TWO_BRANCH) && (elseBB->numHlIcodes == 1) &&
(elseBB->inEdges.size() == 1) && (elseBB->edges[ELSE].BBptr == thenBB))
/* Check (not X or Y) case */
else if ((elseBB->nodeType == TWO_BRANCH) and (elseBB->numHlIcodes == 1) and
(elseBB->inEdges.size() == 1) and (elseBB->edges[ELSE].BBptr == thenBB))
{
if(Case_notX_or_Y(pbb, thenBB, elseBB ))
--i;

File diff suppressed because it is too large Load Diff

View File

@@ -5,156 +5,155 @@
****************************************************************************/
#include "dcc.h"
#include "msvc_fixes.h"
#include "project.h"
#include <string.h>
#include "CallGraph.h"
#include "DccFrontend.h"
#include <cstring>
#include <iostream>
#include <QtCore/QCoreApplication>
#include <QtWidgets/QApplication>
#include <QCommandLineParser>
#include <QtCore/QFile>
#include "ui/DccMainWindow.h"
/* Global variables - extern to other modules */
char *asm1_name, *asm2_name; /* Assembler output filenames */
SYMTAB symtab; /* Global symbol table */
STATS stats; /* cfg statistics */
//PROG prog; /* programs fields */
OPTION option; /* Command line options */
//Function * pProcList; /* List of procedures, topologically sort */
//Function * pLastProc; /* Pointer to last node in procedure list */
//FunctionListType pProcList;
//CALL_GRAPH *callGraph; /* Call graph of the program */
extern QString asm1_name, asm2_name; /* Assembler output filenames */
extern SYMTAB symtab; /* Global symbol table */
extern STATS stats; /* cfg statistics */
extern OPTION option; /* Command line options */
static char *initargs(int argc, char *argv[]);
static void displayTotalStats(void);
#include <llvm/Support/raw_os_ostream.h>
/****************************************************************************
* main
***************************************************************************/
#include <iostream>
extern Project g_proj;
int main(int argc, char *argv[])
{
// llvm::MCOperand op=llvm::MCOperand::CreateImm(11);
// llvm::MCAsmInfo info;
// llvm::raw_os_ostream wrap(std::cerr);
// op.print(wrap,&info);
// wrap.flush();
/* Extract switches and filename */
strcpy(option.filename, initargs(argc, argv));
void setupOptions(QCoreApplication &app) {
//[-a1a2cmsi]
QCommandLineParser parser;
parser.setApplicationDescription("dcc");
parser.addHelpOption();
//parser.addVersionOption();
//QCommandLineOption showProgressOption("p", QCoreApplication::translate("main", "Show progress during copy"));
QCommandLineOption boolOpts[] {
QCommandLineOption {"v", QCoreApplication::translate("main", "verbose")},
QCommandLineOption {"V", QCoreApplication::translate("main", "very verbose")},
QCommandLineOption {"c", QCoreApplication::translate("main", "Follow register indirect calls")},
QCommandLineOption {"m", QCoreApplication::translate("main", "Print memory maps of program")},
QCommandLineOption {"s", QCoreApplication::translate("main", "Print stats")}
};
for(QCommandLineOption &o : boolOpts) {
parser.addOption(o);
}
QCommandLineOption assembly("a", QCoreApplication::translate("main", "Produce assembly"),"assembly_level");
QCommandLineOption targetFileOption(QStringList() << "o" << "output",
QCoreApplication::translate("main", "Place output into <file>."),
QCoreApplication::translate("main", "file"));
QCommandLineOption entryPointOption(QStringList() << "E",
QCoreApplication::translate("main", "Custom entry point as hex"),
QCoreApplication::translate("main", "offset"),
"0"
);
parser.addOption(targetFileOption);
parser.addOption(assembly);
parser.addOption(entryPointOption);
//parser.addOption(forceOption);
// Process the actual command line arguments given by the user
parser.addPositionalArgument("source", QCoreApplication::translate("main", "Dos Executable file to decompile."));
parser.process(app);
const QStringList args = parser.positionalArguments();
if(args.empty()) {
parser.showHelp();
}
// source is args.at(0), destination is args.at(1)
option.verbose = parser.isSet(boolOpts[0]);
option.VeryVerbose = parser.isSet(boolOpts[1]);
if(parser.isSet(assembly)) {
option.asm1 = parser.value(assembly).toInt()==1;
option.asm2 = parser.value(assembly).toInt()==2;
}
option.Map = parser.isSet(boolOpts[3]);
option.Stats = parser.isSet(boolOpts[4]);
option.Interact = false;
option.Calls = parser.isSet(boolOpts[2]);
option.filename = args.first();
option.CustomEntryPoint = parser.value(entryPointOption).toUInt(0,16);
if(parser.isSet(targetFileOption))
asm1_name = asm2_name = parser.value(targetFileOption);
else if(option.asm1 or option.asm2) {
asm1_name = option.filename+".a1";
asm2_name = option.filename+".a2";
}
}
int main(int argc, char **argv)
{
QCoreApplication::setApplicationName("dcc");
QCoreApplication::setApplicationVersion("0.2");
if(argc==1) {
QApplication app(argc,argv);
DccMainWindow win;
win.show();
return app.exec();
}
QCoreApplication app(argc,argv);
setupOptions(app);
Project *proj = Project::get();
/* Front end reads in EXE or COM file, parses it into I-code while
* building the call graph and attaching appropriate bits of code for
* each procedure.
*/
DccFrontend fe(option.filename);
proj->create(option.filename);
DccFrontend fe(&app);
proj->addLoadCommands(option.filename);
proj->processAllCommands();
if(proj->m_error_state) {
proj->dumpAllErrors();
return -1;
}
if (option.verbose)
proj->prog.displayLoadInfo();
if(false==fe.FrontEnd ())
return -1;
if(option.asm1)
return 0;
/* In the middle is a so called Universal Decompiling Machine.
* It processes the procedure list and I-code and attaches where it can
* to each procedure an optimised cfg and ud lists
*/
udm();
if(option.asm2)
return 0;
/* Back end converts each procedure into C using I-code, interval
* analysis, data flow etc. and outputs it to output file ready for
* re-compilation.
*/
BackEnd(option.filename, g_proj.callGraph);
BackEnd(proj->callGraph);
g_proj.callGraph->write();
proj->callGraph->write();
if (option.Stats)
displayTotalStats();
/*
freeDataStructures(pProcList);
*/
return 0;
}
/****************************************************************************
* initargs - Extract command line arguments
***************************************************************************/
static char *initargs(int argc, char *argv[])
{
char *pc;
while (--argc > 0 && (*++argv)[0] == '-')
{
for (pc = argv[0]+1; *pc; pc++)
switch (*pc)
{
case 'a': /* Print assembler listing */
if (*(pc+1) == '2')
option.asm2 = true;
else
option.asm1 = true;
if (*(pc+1) == '1' || *(pc+1) == '2')
pc++;
break;
case 'c':
option.Calls = true;
break;
case 'i':
option.Interact = true;
break;
case 'm': /* Print memory map */
option.Map = true;
break;
case 's': /* Print Stats */
option.Stats = true;
break;
case 'V': /* Very verbose => verbose */
option.VeryVerbose = true;
case 'v':
option.verbose = true; /* Make everything verbose */
break;
case 'o': /* assembler output file */
if (*(pc+1)) {
asm1_name = asm2_name = pc+1;
goto NextArg;
}
else if (--argc > 0) {
asm1_name = asm2_name = *++argv;
goto NextArg;
}
default:
fatalError(INVALID_ARG, *pc);
return *argv;
}
NextArg:;
}
if (argc == 1)
{
if (option.asm1 || option.asm2)
{
if (! asm1_name)
{
asm1_name = strcpy((char*)malloc(strlen(*argv)+4), *argv);
pc = strrchr(asm1_name, '.');
if (pc > strrchr(asm1_name, '/'))
{
*pc = '\0';
}
asm2_name = (char*)malloc(strlen(asm1_name)+4) ;
strcat(strcpy(asm2_name, asm1_name), ".a2");
unlink(asm2_name);
strcat(asm1_name, ".a1");
}
unlink(asm1_name); /* Remove asm output files */
}
return *argv; /* filename of the program to decompile */
}
fatalError(USAGE);
return *argv; // does not reach this.
}
static void
displayTotalStats ()
/* Displays final statistics for the complete program */
{
printf ("\nFinal Program Statistics\n");
printf (" Total number of low-level Icodes : %ld\n", stats.totalLL);
printf (" Total number of high-level Icodes: %ld\n", stats.totalHL);
printf (" Total number of low-level Icodes : %d\n", stats.totalLL);
printf (" Total number of high-level Icodes: %d\n", stats.totalHL);
printf (" Total reduction of instructions : %2.2f%%\n", 100.0 -
(stats.totalHL * 100.0) / stats.totalLL);
}

35
src/dcc_interface.cpp Normal file
View File

@@ -0,0 +1,35 @@
#include "dcc_interface.h"
#include "dcc.h"
#include "project.h"
struct DccImpl : public IDcc {
PtrFunction m_current_func;
// IDcc interface
public:
bool load(QString name)
{
option.filename = name;
Project::get()->create(name);
return Project::get()->addLoadCommands(name);
}
void SetCurFunc_by_Name(QString v)
{
PtrFunction p(Project::get()->findByName(v));
if(p!=nullptr)
m_current_func = p;
}
QDir installDir() {
return QDir(".");
}
QDir dataDir(QString kind) { // return directory containing decompilation helper data -> signatures/includes/etc.
QDir res(installDir());
res.cd(kind);
return res;
}
};
IDcc* IDcc::get() {
static IDcc *v=0;
if(nullptr == v)
v = new DccImpl;
return v;
}

View File

@@ -2,6 +2,15 @@
* dcc project disassembler
* (C) Cristina Cifuentes, Mike van Emmerik, Jeff Ledermann
****************************************************************************/
#include "disassem.h"
#include "dcc.h"
#include "msvc_fixes.h"
#include "symtab.h"
#include "project.h"
#include <QtCore/QFile>
#include <QtCore/QDebug>
#include <stdint.h>
#include <vector>
#include <map>
@@ -9,11 +18,8 @@
#include <iomanip>
#include <stdio.h>
#include <string.h>
#include "src/ui/StructuredTextTarget.h"
#include "dcc.h"
#include "symtab.h"
#include "disassem.h"
#include "project.h"
// Note: for the time being, there is no interactive disassembler
// for unix
@@ -70,23 +76,23 @@ static const char *szFlops3C[] =
static const char *szPtr[2] = { "word ptr ", "byte ptr " };
static void formatRM(ostringstream &p, uint32_t flg, const LLOperand &pm);
static ostringstream &strDst(ostringstream &os, uint32_t flg, const LLOperand &pm);
static void formatRM(QTextStream & p, const LLOperand &pm);
static QTextStream & strDst(QTextStream & os, uint32_t flg, const LLOperand &pm);
static char *strHex(uint32_t d);
//static int checkScanned(uint32_t pcCur);
//static void setProc(Function * proc);
//static void dispData(uint16_t dataSeg);
boolT callArg(uint16_t off, char *temp); /* Check for procedure name */
bool callArg(uint16_t off, char *temp); /* Check for procedure name */
//static FILE *dis_g_fp;
static CIcodeRec pc;
static int cb, j, numIcode, allocIcode;
static int cb, numIcode, allocIcode;
static map<int,int> pl;
static uint32_t nextInst;
static boolT fImpure;
//static bool fImpure;
//static int g_lab;
static Function * pProc; /* Points to current proc struct */
static PtrFunction pProc; /* Points to current proc struct */
struct POSSTACK_ENTRY
{
@@ -94,7 +100,7 @@ struct POSSTACK_ENTRY
Function * pProc; /* A pointer to a PROCEDURE structure */
} ;
static vector<POSSTACK_ENTRY> posStack; /* position stack */
static uint8_t iPS; /* Index into the stack */
//static uint8_t iPS; /* Index into the stack */
// These are "curses equivalent" functions. (Used to use curses for all this,
@@ -107,7 +113,7 @@ static uint8_t iPS; /* Index into the stack */
void LLInst::findJumpTargets(CIcodeRec &_pc)
{
if (testFlags(I) && ! testFlags(JMP_ICODE) && isJmpInst())
if (srcIsImmed() and not testFlags(JMP_ICODE) and isJmpInst())
{
/* Replace the immediate operand with an icode index */
iICODE labTgt=_pc.labelSrch(src().getImm2());
@@ -133,7 +139,7 @@ void LLInst::findJumpTargets(CIcodeRec &_pc)
* pass == 3 generates output on file .b
****************************************************************************/
void Disassembler::disassem(Function * ppProc)
void Disassembler::disassem(PtrFunction ppProc)
{
@@ -150,11 +156,11 @@ void Disassembler::disassem(Function * ppProc)
if (pass != 3)
{
auto p = (pass == 1)? asm1_name: asm2_name;
m_fp.open(p,ios_base::app);
if (!m_fp.is_open())
{
fatalError(CANNOT_OPEN, p);
m_disassembly_target = new QFile(p);
if(!m_disassembly_target->open(QFile::WriteOnly|QFile::Text|QFile::Append)) {
fatalError(CANNOT_OPEN, p.toStdString().c_str());
}
m_fp.setDevice(m_disassembly_target);
}
/* Create temporary code array */
// Mike: needs objectising!
@@ -177,7 +183,7 @@ void Disassembler::disassem(Function * ppProc)
/* Write procedure header */
if (pass != 3)
{
std::string near_far=(pProc->flg & PROC_FAR)? "FAR": "NEAR";
const char * near_far=(pProc->flg & PROC_FAR)? "FAR": "NEAR";
m_fp << "\t\t"<<pProc->name<<" PROC "<< near_far<<"\n";
}
@@ -192,7 +198,10 @@ void Disassembler::disassem(Function * ppProc)
if (pass != 3)
{
m_fp << "\n\t\t"<<pProc->name<<" ENDP\n\n";
m_fp.close();
m_fp.setDevice(nullptr);
m_disassembly_target->close();
delete m_disassembly_target;
}
pc.clear();
@@ -205,346 +214,7 @@ void Disassembler::disassem(Function * ppProc)
****************************************************************************/
void Disassembler::dis1Line(LLInst &inst,int loc_ip, int pass)
{
PROG &prog(Project::get()->prog);
ostringstream oper_stream;
ostringstream hex_bytes;
ostringstream result_stream;
ostringstream opcode_with_mods;
ostringstream operands_s;
oper_stream << uppercase;
hex_bytes << uppercase;
/* Disassembly stage 1 --
* Do not try to display NO_CODE entries or synthetic instructions,
* other than JMPs, that have been introduced for def/use analysis. */
if ((option.asm1) &&
( inst.testFlags(NO_CODE) ||
(inst.testFlags(SYNTHETIC) && (inst.getOpcode() != iJMP))))
{
return;
}
else if (inst.testFlags(NO_CODE))
{
return;
}
if (inst.testFlags(TARGET | CASE))
{
if (pass == 3)
cCode.appendCode("\n"); /* Print to c code buffer */
else
m_fp<< "\n"; /* No, print to the stream */
}
/* Find next instruction label and print hex bytes */
if (inst.testFlags(SYNTHETIC))
nextInst = inst.label;
else
{
cb = (uint32_t) inst.numBytes;
nextInst = inst.label + cb;
/* Output hexa code in program image */
if (pass != 3)
{
for (j = 0; j < cb; j++)
{
hex_bytes << hex << setw(2) << setfill('0') << uint16_t(prog.Image[inst.label + j]);
}
hex_bytes << ' ';
}
}
oper_stream << setw(POS_LAB) << left<< hex_bytes.str();
/* Check if there is a symbol here */
selectTable(Label);
oper_stream << setw(5)<<left; // align for the labels
{
ostringstream lab_contents;
if (readVal(lab_contents, inst.label, 0))
{
lab_contents << ':'; /* Also removes the null */
}
else if (inst.testFlags(TARGET)) /* Symbols override Lnn labels */
{
/* Print label */
if (pl.count(loc_ip)==0)
{
pl[loc_ip] = ++g_lab;
}
lab_contents<< "L"<<pl[loc_ip]<<':';
}
oper_stream<< lab_contents.str();
}
if ((inst.getOpcode()==iSIGNEX )&& inst.testFlags(B))
{
inst.setOpcode(iCBW);
}
opcode_with_mods<<Machine_X86::opcodeName(inst.getOpcode());
switch ( inst.getOpcode() )
{
case iADD: case iADC: case iSUB: case iSBB: case iAND: case iOR:
case iXOR: case iTEST: case iCMP: case iMOV: case iLEA: case iXCHG:
strDst(operands_s,inst.getFlag(), inst.dst);
inst.strSrc(operands_s);
break;
case iESC:
inst.flops(operands_s);
break;
case iSAR: case iSHL: case iSHR: case iRCL: case iRCR: case iROL:
case iROR:
strDst(operands_s,inst.getFlag() | I, inst.dst);
if(inst.testFlags(I))
inst.strSrc(operands_s);
else
operands_s<<", cl";
break;
case iINC: case iDEC: case iNEG: case iNOT: case iPOP:
strDst(operands_s,inst.getFlag() | I, inst.dst);
break;
case iPUSH:
if (inst.testFlags(I))
{
operands_s<<strHex(inst.src().getImm2());
}
else
{
strDst(operands_s,inst.getFlag() | I, inst.dst);
}
break;
case iDIV: case iIDIV: case iMUL: case iIMUL: case iMOD:
if (inst.testFlags(I))
{
strDst(operands_s,inst.getFlag(), inst.dst) <<", ";
formatRM(operands_s, inst.getFlag(), inst.src());
inst.strSrc(operands_s);
}
else
strDst(operands_s,inst.getFlag() | I, inst.src());
break;
case iLDS: case iLES: case iBOUND:
strDst(operands_s,inst.getFlag(), inst.dst)<<", dword ptr";
inst.strSrc(operands_s,true);
break;
case iJB: case iJBE: case iJAE: case iJA:
case iJL: case iJLE: case iJGE: case iJG:
case iJE: case iJNE: case iJS: case iJNS:
case iJO: case iJNO: case iJP: case iJNP:
case iJCXZ:case iLOOP: case iLOOPE:case iLOOPNE:
case iJMP: case iJMPF:
/* Check if there is a symbol here */
{
ICODE *lab=pc.GetIcode(inst.src().getImm2());
selectTable(Label);
if ((inst.src().getImm2() < (uint32_t)numIcode) && /* Ensure in range */
readVal(operands_s, lab->ll()->label, 0))
{
break; /* Symbolic label. Done */
}
}
if (inst.testFlags(NO_LABEL))
{
//strcpy(p + WID_PTR, strHex(pIcode->ll()->immed.op));
operands_s<<strHex(inst.src().getImm2());
}
else if (inst.testFlags(I) )
{
j = inst.src().getImm2();
if (pl.count(j)==0) /* Forward jump */
{
pl[j] = ++g_lab;
}
if (inst.getOpcode() == iJMPF)
{
operands_s<<" far ptr ";
}
operands_s<<"L"<<pl[j];
}
else if (inst.getOpcode() == iJMPF)
{
operands_s<<"dword ptr";
inst.strSrc(operands_s,true);
}
else
{
strDst(operands_s,I, inst.src());
}
break;
case iCALL: case iCALLF:
if (inst.testFlags(I))
{
if((inst.getOpcode() == iCALL))
operands_s<< "near";
else
operands_s<< " far";
operands_s<<" ptr "<<(inst.src().proc.proc)->name;
}
else if (inst.getOpcode() == iCALLF)
{
operands_s<<"dword ptr ";
inst.strSrc(operands_s,true);
}
else
strDst(operands_s,I, inst.src());
break;
case iENTER:
operands_s<<strHex(inst.dst.off)<<", ";
operands_s<<strHex(inst.src().getImm2());
break;
case iRET: case iRETF: case iINT:
if (inst.testFlags(I))
{
operands_s<<strHex(inst.src().getImm2());
}
break;
case iCMPS: case iREPNE_CMPS: case iREPE_CMPS:
case iSCAS: case iREPNE_SCAS: case iREPE_SCAS:
case iSTOS: case iREP_STOS:
case iLODS: case iREP_LODS:
case iMOVS: case iREP_MOVS:
case iINS: case iREP_INS:
case iOUTS: case iREP_OUTS:
if (inst.src().segOver)
{
bool is_dx_src=(inst.getOpcode() == iOUTS || inst.getOpcode() == iREP_OUTS);
if(is_dx_src)
operands_s<<"dx, "<<szPtr[inst.getFlag() & B];
else
operands_s<<szPtr[inst.getFlag() & B];
if (inst.getOpcode() == iLODS ||
inst.getOpcode() == iREP_LODS ||
inst.getOpcode() == iOUTS ||
inst.getOpcode() == iREP_OUTS)
{
operands_s<<Machine_X86::regName(inst.src().segOver); // szWreg[src.segOver-rAX]
}
else
{
operands_s<<"es:[di], "<<Machine_X86::regName(inst.src().segOver);
}
operands_s<<":[si]";
}
else
{
(inst.getFlag() & B)? opcode_with_mods<< "B": opcode_with_mods<< "W";
}
break;
case iXLAT:
if (inst.src().segOver)
{
operands_s<<" "<<szPtr[1];
operands_s<<Machine_X86::regName(inst.src().segOver)<<":[bx]";
}
break;
case iIN:
(inst.getFlag() & B)? operands_s<<"al, " : operands_s<< "ax, ";
(inst.testFlags(I))? operands_s << strHex(inst.src().getImm2()) : operands_s<< "dx";
break;
case iOUT:
{
std::string d1=((inst.testFlags(I))? strHex(inst.src().getImm2()): "dx");
std::string d2=((inst.getFlag() & B) ? ", al": ", ax");
operands_s<<d1 << d2;
}
break;
default:
break;
}
oper_stream << setw(15) << left <<opcode_with_mods.str();
oper_stream << operands_s.str();
/* Comments */
if (inst.testFlags(SYNTHETIC))
{
fImpure = false;
}
else
{
for (j = inst.label, fImpure = 0; j > 0 && j < (int)nextInst; j++)
{
fImpure |= BITMAP(j, BM_DATA);
}
}
result_stream << setw(54) << left << oper_stream.str();
/* Check for user supplied comment */
selectTable(Comment);
ostringstream cbuf;
if (readVal(cbuf, inst.label, 0))
{
result_stream <<"; "<<cbuf.str();
}
else if (fImpure || (inst.testFlags(SWITCH | CASE | SEG_IMMED | IMPURE | SYNTHETIC | TERMINATES)))
{
if (inst.testFlags(CASE))
{
result_stream << ";Case l"<< inst.caseEntry;
}
if (inst.testFlags(SWITCH))
{
result_stream << ";Switch ";
}
if (fImpure)
{
result_stream << ";Accessed as data ";
}
if (inst.testFlags(IMPURE))
{
result_stream << ";Impure operand ";
}
if (inst.testFlags(SEG_IMMED))
{
result_stream << ";Segment constant";
}
if (inst.testFlags(TERMINATES))
{
result_stream << ";Exit to DOS";
}
}
/* Comment on iINT icodes */
if (inst.getOpcode() == iINT)
inst.writeIntComment(result_stream);
/* Display output line */
if(pass==3)
{
/* output to .b code buffer */
if (inst.testFlags(SYNTHETIC))
result_stream<<";Synthetic inst";
if (pass == 3) /* output to .b code buffer */
cCode.appendCode("%s\n", result_stream.str().c_str());
}
else
{
char buf[12];
/* output to .a1 or .a2 file */
if (not inst.testFlags(SYNTHETIC) )
{
sprintf(buf,"%03d %06X",loc_ip, inst.label);
}
else /* SYNTHETIC instruction */
{
sprintf(buf,"%03d ",loc_ip);
result_stream<<";Synthetic inst";
}
m_fp<<buf<< " " << result_stream.str() << "\n";
}
assert(false);
}
@@ -552,7 +222,7 @@ void Disassembler::dis1Line(LLInst &inst,int loc_ip, int pass)
/****************************************************************************
* formatRM
***************************************************************************/
static void formatRM(std::ostringstream &p, uint32_t flg, const LLOperand &pm)
static void formatRM(QTextStream &p, const LLOperand &pm)
{
//char seg[4];
@@ -589,13 +259,13 @@ static void formatRM(std::ostringstream &p, uint32_t flg, const LLOperand &pm)
/*****************************************************************************
* strDst
****************************************************************************/
static ostringstream & strDst(ostringstream &os,uint32_t flg, const LLOperand &pm)
static QTextStream & strDst(QTextStream &os,uint32_t flg, const LLOperand &pm)
{
/* Immediates to memory require size descriptor */
//os << setw(WID_PTR);
if ((flg & I) and not pm.isReg())
os << szPtr[flg & B];
formatRM(os, flg, pm);
formatRM(os, pm);
return os;
}
@@ -603,19 +273,19 @@ static ostringstream & strDst(ostringstream &os,uint32_t flg, const LLOperand &p
/****************************************************************************
* strSrc *
****************************************************************************/
ostringstream &LLInst::strSrc(ostringstream &os,bool skip_comma)
{
if(false==skip_comma)
os<<", ";
if (testFlags(I))
os<<strHex(src().getImm2());
else if (testFlags(IM_SRC)) /* level 2 */
os<<"dx:ax";
else
formatRM(os, getFlag(), src());
//QTextStream &LLInst::strSrc(QTextStream &os,bool skip_comma)
//{
// if(false==skip_comma)
// os<<", ";
// if (srcIsImmed())
// os<<strHex(src().getImm2());
// else if (testFlags(IM_SRC)) /* level 2 */
// os<<"dx:ax";
// else
// formatRM(os, src());
return os;
}
// return os;
//}
@@ -627,34 +297,38 @@ static char *strHex(uint32_t d)
static char buf[10];
d &= 0xFFFF;
sprintf(buf, "0%lX%s", d, (d > 9)? "h": "");
sprintf(buf, "0%X%s", d, (d > 9)? "h": "");
return (buf + (buf[1] <= '9'));
}
/****************************************************************************
* interactDis - interactive disassembler *
****************************************************************************/
void interactDis(Function * initProc, int initIC)
void interactDis(const PtrFunction & initProc, int initIC)
{
printf("Sorry - interactive disasassembler option not available for Unix\n");
QString procname = "UNKNOWN";
if(initProc)
procname = initProc->name;
qDebug() << "Wanted to start interactive disasassembler for "<<procname<<":"<<initIC;
return;
}
/* Handle the floating point opcodes (icode iESC) */
void LLInst::flops(std::ostringstream &out)
void LLInst::flops(QTextStream &out)
{
char bf[30];
//char bf[30];
uint8_t op = (uint8_t)src().getImm2();
/* Note that op is set to the escape number, e.g.
esc 0x38 is FILD */
if ( not dst.isReg() )
if ( not m_dst.isReg() )
{
/* The mod/rm mod bits are not set to 11 (i.e. register). This is the normal floating point opcode */
out<<Machine_X86::floatOpName(op)<<' ';
out <<setw(10);
if ((op == 0x29) || (op == 0x1F))
out.setFieldWidth(10);
if ((op == 0x29) or (op == 0x1F))
{
out << "tbyte ptr ";
}
@@ -684,8 +358,8 @@ void LLInst::flops(std::ostringstream &out)
break;
}
}
formatRM(out, getFlag(), dst);
out.setFieldWidth(0);
formatRM(out, m_dst);
}
else
{
@@ -694,7 +368,7 @@ void LLInst::flops(std::ostringstream &out)
normal opcodes. Because the opcodes are slightly different for
this case (e.g. op=04 means FSUB if reg != 3, but FSUBR for
reg == 3), a separate table is used (szFlops2). */
int destRegIdx=dst.regi - rAX;
int destRegIdx=m_dst.regi - rAX;
switch (op)
{
case 0x0C:
@@ -723,7 +397,7 @@ void LLInst::flops(std::ostringstream &out)
break;
default:
out << Machine_X86::floatOpName(0x40+op);
if ((op >= 0x20) && (op <= 0x27))
if ((op >= 0x20) and (op <= 0x27))
{
/* This is the ST(i), ST form. */
out << "ST("<<destRegIdx - rAX<<"),ST";
@@ -738,5 +412,230 @@ void LLInst::flops(std::ostringstream &out)
}
}
}
struct AsmFormatter {
IStructuredTextTarget * target;
int operand_count;
void visitOperand(const LLOperand &pm) {
if(not pm.isSet())
return;
if(operand_count>0) {
target->prtt(", ");
}
operand_count++;
if (pm.immed and not pm.isReg()) {
//target->addTaggedString(XT_Keyword,szPtr[flg&B]);
target->addTaggedString(XT_Number,strHex(pm.getImm2()));
return;
}
if (pm.segOver)
{
target->prtt(Machine_X86::regName(pm.segOver)+':');
}
if (pm.regi == rUNDEF)
{
target->prtt(QString("[")+strHex((uint32_t)pm.off)+"]");
}
else if (pm.isReg())
{
target->prtt(Machine_X86::regName(pm.regi));
}
else if (pm.off)
{
if (pm.off < 0)
{
target->prtt("["+Machine_X86::regName(pm.regi)+"-"+strHex((uint32_t)(- pm.off))+"]");
}
else
{
target->prtt("["+Machine_X86::regName(pm.regi)+"+"+strHex((uint32_t)(pm.off))+"]");
}
}
else
target->prtt("["+Machine_X86::regName(pm.regi)+"]");
}
};
void toStructuredText(LLInst *insn,IStructuredTextTarget *out, int level) {
AsmFormatter formatter {out};
const LLInst &inst(*insn);
QString opcode = Machine_X86::opcodeName(insn->getOpcode());
out->addSpace(4);
out->addTaggedString(XT_Number,QString("%1").arg(insn->label,8,16,QChar('0').toUpper()));
out->addSpace(4);
out->addTaggedString(XT_Keyword,Machine_X86::opcodeName(insn->getOpcode()),insn);
out->addSpace(2);
switch(insn->getOpcode()) {
case iADD: case iADC: case iSUB: case iSBB: case iAND: case iOR:
case iXOR: case iTEST: case iCMP: case iMOV: case iLEA: case iXCHG:
case iSAR: case iSHL: case iSHR: case iRCL: case iRCR: case iROL:
case iROR:
formatter.visitOperand(insn->dst());
formatter.visitOperand(insn->src());
break;
case iINC: case iDEC: case iNEG: case iNOT: case iPOP:
formatter.visitOperand(insn->dst());
break;
case iPUSH:
formatter.visitOperand(insn->dst());
break;
case iDIV: case iIDIV: case iMUL: case iIMUL: case iMOD:
if (inst.srcIsImmed())
{
formatter.visitOperand(insn->dst());
formatter.visitOperand(insn->src());
}
else
formatter.visitOperand(insn->dst());
break;
case iLDS: case iLES: case iBOUND:
formatter.visitOperand(insn->dst());
formatter.visitOperand(insn->src());
break;
case iJB: case iJBE: case iJAE: case iJA:
case iJL: case iJLE: case iJGE: case iJG:
case iJE: case iJNE: case iJS: case iJNS:
case iJO: case iJNO: case iJP: case iJNP:
case iJCXZ:case iLOOP: case iLOOPE:case iLOOPNE:
case iJMP: case iJMPF:
/* Check if there is a symbol here */
{
// ICODE *lab=pc.GetIcode(inst.src().getImm2());
// selectTable(Label);
// if ((inst.src().getImm2() < (uint32_t)numIcode) and /* Ensure in range */
// readVal(operands_s, lab->ll()->label, nullptr))
// {
// break; /* Symbolic label. Done */
// }
}
if (inst.testFlags(NO_LABEL))
{
//strcpy(p + WID_PTR, strHex(pIcode->ll()->immed.op));
out->addTaggedString(XT_AsmLabel,strHex(inst.src().getImm2()));
}
else if (inst.srcIsImmed() )
{
int64_t tgt_addr = inst.src().getImm2();
if (inst.getOpcode() == iJMPF)
{
out->addTaggedString(XT_Keyword," far ptr ");
}
out->addTaggedString(XT_AsmLabel,QString("L_%1").arg(strHex(tgt_addr)));
}
else if (inst.getOpcode() == iJMPF)
{
out->addTaggedString(XT_Keyword,"dword ptr");
formatter.visitOperand(inst.src());
}
else
{
formatter.visitOperand(inst.src());
}
break;
case iCALL: case iCALLF:
if (inst.srcIsImmed())
{
out->addTaggedString(XT_Keyword,QString("%1 ptr ").arg((inst.getOpcode() == iCALL) ? "near" : "far"));
out->addTaggedString(XT_AsmLabel,(inst.src().proc.proc)->name);
}
else if (inst.getOpcode() == iCALLF)
{
out->addTaggedString(XT_Keyword,"dword ptr ");
formatter.visitOperand(inst.src());
}
else
formatter.visitOperand(inst.src());
break;
case iENTER:
formatter.visitOperand(inst.dst());
formatter.visitOperand(inst.src());
break;
case iRET:
case iRETF:
case iINT:
formatter.visitOperand(inst.src());
break;
case iCMPS: case iREPNE_CMPS: case iREPE_CMPS:
case iSCAS: case iREPNE_SCAS: case iREPE_SCAS:
case iSTOS: case iREP_STOS:
case iLODS: case iREP_LODS:
case iMOVS: case iREP_MOVS:
case iINS: case iREP_INS:
case iOUTS: case iREP_OUTS:
if (inst.src().segOver)
{
bool is_dx_src=(inst.getOpcode() == iOUTS or inst.getOpcode() == iREP_OUTS);
if(is_dx_src)
{
out->addTaggedString(XT_Symbol,"dx");
out->prtt(", ");
out->addTaggedString(XT_Keyword,szPtr[inst.getFlag() & B]);
out->addSpace(2);
}
else
out->addTaggedString(XT_Keyword,szPtr[inst.getFlag() & B]);
if (inst.getOpcode() == iLODS or
inst.getOpcode() == iREP_LODS or
inst.getOpcode() == iOUTS or
inst.getOpcode() == iREP_OUTS)
{
out->addTaggedString(XT_Symbol,Machine_X86::regName(inst.src().segOver)); // szWreg[src.segOver-rAX]
}
else
{
out->addTaggedString(XT_Symbol,"es:[di]");
out->prtt(", ");
out->addTaggedString(XT_Symbol,Machine_X86::regName(inst.src().segOver));
}
out->addTaggedString(XT_Symbol,":[si]");
}
else
{
out->delChars(2); // TODO: this is wonky way of adding instruction suffix
if(inst.getFlag() & B)
out->addTaggedString(XT_Keyword,"B");
else
out->addTaggedString(XT_Keyword,"W");
out->addSpace(2);
}
break;
case iXLAT:
if (inst.src().segOver)
{
out->addTaggedString(XT_Keyword,QString(" ") + szPtr[1]);
out->addTaggedString(XT_Symbol,Machine_X86::regName(inst.src().segOver)+":[bx]");
}
break;
case iIN:
out->addTaggedString(XT_Symbol, (inst.getFlag() & B)? "al" : "ax");
out->prtt(", ");
formatter.visitOperand(inst.src());
break;
case iOUT:
{
formatter.visitOperand(inst.src());
if(inst.srcIsImmed())
out->addTaggedString(XT_Number, strHex(inst.src().getImm2()));
else
out->addTaggedString(XT_Symbol, "dx");
out->prtt(", ");
out->addTaggedString(XT_Symbol, (inst.getFlag() & B)? "al" : "ax");
}
}
out->addEOL();
}

View File

@@ -28,7 +28,7 @@
{IP_OUT_OF_RANGE ,"Instruction at location %06lX goes beyond loaded image\n"},
{DEF_NOT_FOUND ,"Definition not found for condition code usage at opcode %d\n"},
{JX_NOT_DEF ,"JX use, definition not supported at opcode #%d\n"},
{NOT_DEF_USE ,"Def - use not supported. Def op = %d, use op = %d.\n"},
{NOT_DEF_USE ,"%x: Def - use not supported. Def op = %d, use op = %d.\n"},
{REPEAT_FAIL ,"Failed to construct repeat..until() condition.\n"},
{WHILE_FAIL ,"Failed to construct while() condition.\n"},
};

View File

@@ -9,18 +9,15 @@
* *
\* * * * * * * * * * * * */
#include "msvc_fixes.h"
#include <memory.h>
#include <stdint.h>
#ifndef PATLEN
#define PATLEN 23
#define WILD 0xF4
#endif
#ifndef bool
#define bool unsigned char
#define uint8_t unsigned char
#endif
static int pc; /* Indexes into pat[] */
/* prototypes */
@@ -82,7 +79,8 @@ TwoWild(uint8_t pat[])
static bool
FourWild(uint8_t pat[])
{
TwoWild(pat);
if(TwoWild(pat))
return true;
return TwoWild(pat);
}
@@ -180,8 +178,7 @@ static bool op0F(uint8_t pat[])
processor is in 16 bit address mode (real mode).
PATLEN bytes are scanned.
*/
void
fixWildCards(uint8_t pat[])
void fixWildCards(uint8_t pat[])
{
uint8_t op, quad, intArg;
@@ -410,7 +407,7 @@ fixWildCards(uint8_t pat[])
case 0xCD: /* int nn */
intArg = pat[pc++];
if ((intArg >= 0x34) && (intArg <= 0x3B))
if ((intArg >= 0x34) and (intArg <= 0x3B))
{
/* Borland/Microsoft FP emulations */
if (ModRM(pat)) return;

View File

@@ -7,10 +7,17 @@
#include "dcc.h"
#include "disassem.h"
#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
#include <stdlib.h>
#include <string.h>
#include <malloc.h> /* For malloc, free, realloc */
#include "project.h"
class Loader
{
bool loadIntoProject(IProject *);
};
typedef struct { /* PSP structure */
uint16_t int20h; /* interrupt 20h */
uint16_t eof; /* segment, end of allocation block */
@@ -50,7 +57,7 @@ static struct { /* EXE file header */
#define EXE_RELOCATION 0x10 /* EXE images rellocated to above PSP */
static void LoadImage(char *filename);
//static void LoadImage(char *filename);
static void displayLoadInfo(void);
static void displayMemMap(void);
@@ -58,31 +65,29 @@ static void displayMemMap(void);
* FrontEnd - invokes the loader, parser, disassembler (if asm1), icode
* rewritter, and displays any useful information.
****************************************************************************/
extern Project g_proj;
bool DccFrontend::FrontEnd ()
{
g_proj.callGraph = 0;
g_proj.m_fname = m_fname;
Project::get()->callGraph = nullptr;
Project::get()->create(m_fname);
/* Load program into memory */
LoadImage(g_proj);
LoadImage(*Project::get());
if (option.verbose)
displayLoadInfo();
/* Do depth first flow analysis building call graph and procedure list,
* and attaching the I-code to each procedure */
parse (g_proj);
parse (*Project::get());
if (option.asm1)
{
printf("dcc: writing assembler file %s\n", asm1_name);
printf("dcc: writing assembler file %s\n", asm1_name.c_str());
}
/* Search through code looking for impure references and flag them */
Disassembler ds(1);
for(Function &f : g_proj.pProcList)
for(Function &f : Project::get()->pProcList)
{
f.markImpure();
if (option.asm1)
@@ -92,11 +97,11 @@ bool DccFrontend::FrontEnd ()
}
if (option.Interact)
{
interactDis(&g_proj.pProcList.front(), 0); /* Interactive disassembler */
interactDis(&Project::get()->pProcList.front(), 0); /* Interactive disassembler */
}
/* Converts jump target addresses to icode offsets */
for(Function &f : g_proj.pProcList)
for(Function &f : Project::get()->pProcList)
{
f.bindIcodeOff();
}
@@ -125,7 +130,7 @@ static void displayLoadInfo(void)
printf("Minimum allocation = %04X paras\n", LH(&header.minAlloc));
printf("Maximum allocation = %04X paras\n", LH(&header.maxAlloc));
}
printf("Load image size = %04X\n", prog.cbImage - sizeof(PSP));
printf("Load image size = %04" PRIiPTR "\n", prog.cbImage - sizeof(PSP));
printf("Initial SS:SP = %04X:%04X\n", prog.initSS, prog.initSP);
printf("Initial CS:IP = %04X:%04X\n", prog.initCS, prog.initIP);
@@ -134,7 +139,7 @@ static void displayLoadInfo(void)
printf("\nRelocation Table\n");
for (i = 0; i < prog.cReloc; i++)
{
printf("%06X -> [%04X]\n", prog.relocTable[i],LH(prog.Image + prog.relocTable[i]));
printf("%06X -> [%04X]\n", prog.relocTable[i],LH(prog.image() + prog.relocTable[i]));
}
}
printf("\n");
@@ -208,23 +213,23 @@ void DccFrontend::LoadImage(Project &proj)
uint8_t buf[4];
/* Open the input file */
if ((fp = fopen(proj.m_fname.c_str(), "rb")) == NULL)
if ((fp = fopen(proj.binary_path().c_str(), "rb")) == nullptr)
{
fatalError(CANNOT_OPEN, proj.m_fname.c_str());
fatalError(CANNOT_OPEN, proj.binary_path().c_str());
}
/* Read in first 2 bytes to check EXE signature */
if (fread(&header, 1, 2, fp) != 2)
{
fatalError(CANNOT_READ, proj.m_fname.c_str());
fatalError(CANNOT_READ, proj.binary_path().c_str());
}
if (! (prog.fCOM = (boolT)(header.sigLo != 0x4D || header.sigHi != 0x5A))) {
prog.fCOM = (header.sigLo != 0x4D || header.sigHi != 0x5A);
if (! prog.fCOM ) {
/* Read rest of header */
fseek(fp, 0, SEEK_SET);
if (fread(&header, sizeof(header), 1, fp) != 1)
{
fatalError(CANNOT_READ, proj.m_fname.c_str());
fatalError(CANNOT_READ, proj.binary_path().c_str());
}
/* This is a typical DOS kludge! */
@@ -298,14 +303,14 @@ void DccFrontend::LoadImage(Project &proj)
/* Allocate a block of memory for the program. */
prog.cbImage = cb + sizeof(PSP);
prog.Image = new uint8_t [prog.cbImage];
prog.Image[0] = 0xCD; /* Fill in PSP int 20h location */
prog.Image[1] = 0x20; /* for termination checking */
prog.Imagez = new uint8_t [prog.cbImage];
prog.Imagez[0] = 0xCD; /* Fill in PSP int 20h location */
prog.Imagez[1] = 0x20; /* for termination checking */
/* Read in the image past where a PSP would go */
if (cb != (int)fread(prog.Image + sizeof(PSP), 1, (size_t)cb, fp))
if (cb != (int)fread(prog.Imagez + sizeof(PSP), 1, (size_t)cb, fp))
{
fatalError(CANNOT_READ, proj.m_fname.c_str());
fatalError(CANNOT_READ, proj.binary_path().c_str());
}
/* Set up memory map */
@@ -318,7 +323,7 @@ void DccFrontend::LoadImage(Project &proj)
{
for (i = 0; i < prog.cReloc; i++)
{
uint8_t *p = &prog.Image[prog.relocTable[i]];
uint8_t *p = &prog.Imagez[prog.relocTable[i]];
uint16_t w = (uint16_t)LH(p) + EXE_RELOCATION;
*p++ = (uint8_t)(w & 0x00FF);
*p = (uint8_t)((w & 0xFF00) >> 8);

Some files were not shown because too many files have changed in this diff Show More