Add original dcc tools to repository
* makedsig has been integrated with makedstp, it should handle both LIB and TPL files * other tools have not been modified
This commit is contained in:
0
tools/parsehdr/CMakeLists.txt
Normal file
0
tools/parsehdr/CMakeLists.txt
Normal file
117
tools/parsehdr/locident.h
Normal file
117
tools/parsehdr/locident.h
Normal file
@@ -0,0 +1,117 @@
|
||||
/*$Log: locident.h,v $
|
||||
* Revision 1.6 94/02/22 15:20:23 cifuente
|
||||
* Code generation is done.
|
||||
*
|
||||
* Revision 1.5 93/12/10 09:38:20 cifuente
|
||||
* New high-level types
|
||||
*
|
||||
* Revision 1.4 93/11/10 17:30:51 cifuente
|
||||
* Procedure header, locals
|
||||
*
|
||||
* Revision 1.3 93/11/08 12:06:35 cifuente
|
||||
* du1 analysis finished. Instantiates procedure arguments for user
|
||||
* declared procedures.
|
||||
*
|
||||
* Revision 1.2 93/10/25 11:01:00 cifuente
|
||||
* New SYNTHETIC instructions for d/u analysis
|
||||
*
|
||||
* Revision 1.1 93/10/11 11:47:39 cifuente
|
||||
* Initial revision
|
||||
*
|
||||
* File: locIdent.h
|
||||
* Purpose: High-level local identifier definitions
|
||||
* Date: October 1993
|
||||
*/
|
||||
|
||||
|
||||
/* Type definition */
|
||||
typedef struct {
|
||||
Int csym; /* # symbols used */
|
||||
Int alloc; /* # symbols allocated */
|
||||
Int *idx; /* Array of integer indexes */
|
||||
} IDX_ARRAY;
|
||||
|
||||
/* Type definitions used in the decompiled program */
|
||||
typedef enum {
|
||||
TYPE_UNKNOWN = 0, /* unknown so far */
|
||||
TYPE_BYTE_SIGN, /* signed byte (8 bits) */
|
||||
TYPE_BYTE_UNSIGN, /* unsigned byte */
|
||||
TYPE_WORD_SIGN, /* signed word (16 bits) */
|
||||
TYPE_WORD_UNSIGN, /* unsigned word (16 bits) */
|
||||
TYPE_LONG_SIGN, /* signed long (32 bits) */
|
||||
TYPE_LONG_UNSIGN, /* unsigned long (32 bits) */
|
||||
TYPE_RECORD, /* record structure */
|
||||
TYPE_PTR, /* pointer (32 bit ptr) */
|
||||
TYPE_STR, /* string */
|
||||
TYPE_CONST, /* constant (any type) */
|
||||
TYPE_FLOAT, /* floating point */
|
||||
TYPE_DOUBLE, /* double precision float */
|
||||
} hlType;
|
||||
|
||||
static char *hlTypes[13] = {"", "char", "unsigned char", "int", "unsigned int",
|
||||
"long", "unsigned long", "record", "int *", "char *",
|
||||
"", "float", "double"};
|
||||
|
||||
typedef enum {
|
||||
STK_FRAME, /* For stack vars */
|
||||
REG_FRAME, /* For register variables */
|
||||
GLB_FRAME, /* For globals */
|
||||
} frameType;
|
||||
|
||||
|
||||
/* Enumeration to determine whether pIcode points to the high or low part
|
||||
* of a long number */
|
||||
typedef enum {
|
||||
HIGH_FIRST, /* High value is first */
|
||||
LOW_FIRST, /* Low value is first */
|
||||
} hlFirst;
|
||||
|
||||
|
||||
/* LOCAL_ID */
|
||||
typedef struct {
|
||||
hlType type; /* Probable type */
|
||||
boolT illegal;/* Boolean: not a valid field any more */
|
||||
IDX_ARRAY idx; /* Index into icode array (REG_FRAME only) */
|
||||
frameType loc; /* Frame location */
|
||||
boolT hasMacro;/* Identifier requires a macro */
|
||||
char macro[10];/* Macro for this identifier */
|
||||
char name[20];/* Identifier's name */
|
||||
union { /* Different types of identifiers */
|
||||
byte regi; /* For TYPE_BYTE(WORD)_(UN)SIGN registers */
|
||||
struct { /* For TYPE_BYTE(WORD)_(UN)SIGN on the stack */
|
||||
byte regOff; /* register offset (if any) */
|
||||
Int off; /* offset from BP */
|
||||
} bwId;
|
||||
struct _bwGlb { /* For TYPE_BYTE(WORD)_(UN)SIGN globals */
|
||||
int16 seg; /* segment value */
|
||||
int16 off; /* offset */
|
||||
byte regi; /* optional indexed register */
|
||||
} bwGlb;
|
||||
struct _longId{ /* For TYPE_LONG_(UN)SIGN registers */
|
||||
byte h; /* high register */
|
||||
byte l; /* low register */
|
||||
} longId;
|
||||
struct _longStkId { /* For TYPE_LONG_(UN)SIGN on the stack */
|
||||
Int offH; /* high offset from BP */
|
||||
Int offL; /* low offset from BP */
|
||||
} longStkId;
|
||||
struct { /* For TYPE_LONG_(UN)SIGN globals */
|
||||
int16 seg; /* segment value */
|
||||
int16 offH; /* offset high */
|
||||
int16 offL; /* offset low */
|
||||
byte regi; /* optional indexed register */
|
||||
} longGlb;
|
||||
struct { /* For TYPE_LONG_(UN)SIGN constants */
|
||||
dword h; /* high word */
|
||||
dword l; /* low word */
|
||||
} longKte;
|
||||
} id;
|
||||
} ID;
|
||||
|
||||
typedef struct {
|
||||
Int csym; /* No. of symbols in the table */
|
||||
Int alloc; /* No. of symbols allocated */
|
||||
ID *id; /* Identifier */
|
||||
} LOCAL_ID;
|
||||
|
||||
|
||||
1538
tools/parsehdr/parsehdr.cpp
Normal file
1538
tools/parsehdr/parsehdr.cpp
Normal file
File diff suppressed because it is too large
Load Diff
98
tools/parsehdr/parsehdr.h
Normal file
98
tools/parsehdr/parsehdr.h
Normal file
@@ -0,0 +1,98 @@
|
||||
/*
|
||||
*$Log: parsehdr.h,v $
|
||||
*/
|
||||
/* Header file for parsehdr.c */
|
||||
|
||||
typedef unsigned long dword; /* 32 bits */
|
||||
typedef unsigned char byte; /* 8 bits */
|
||||
typedef unsigned short word; /* 16 bits */
|
||||
typedef unsigned char boolT; /* 8 bits */
|
||||
|
||||
#define TRUE 1
|
||||
#define FALSE 0
|
||||
|
||||
#define BUFF_SIZE 8192 /* Holds a declaration */
|
||||
#define FBUF_SIZE 32700 /* Holds part of a header file */
|
||||
|
||||
#define NARGS 15
|
||||
#define NAMES_L 160
|
||||
#define TYPES_L 160
|
||||
#define FUNC_L 160
|
||||
|
||||
#define ERRF stdout
|
||||
|
||||
void phError(char *errmsg);
|
||||
void phWarning(char *errmsg);
|
||||
|
||||
#define ERR(msg) phError(msg)
|
||||
#ifdef DEBUG
|
||||
#define DBG(str) printf(str);
|
||||
#else
|
||||
#define DBG(str) ;
|
||||
#endif
|
||||
#define WARN(msg) phWarning(msg)
|
||||
#define OUT(str) fprintf(outfile, str)
|
||||
|
||||
#define PH_PARAMS 32
|
||||
#define PH_NAMESZ 15
|
||||
|
||||
#define SYMLEN 16 /* Including the null */
|
||||
#define Int long /* For locident.h */
|
||||
#define int16 short int /* For locident.h */
|
||||
#include "locident.h" /* For the hlType enum */
|
||||
#define bool unsigned char /* For internal use */
|
||||
#define TRUE 1
|
||||
#define FALSE 0
|
||||
|
||||
typedef
|
||||
struct ph_func_tag
|
||||
{
|
||||
char name[SYMLEN]; /* Name of function or arg */
|
||||
hlType typ; /* Return type */
|
||||
int numArg; /* Number of args */
|
||||
int firstArg; /* Index of first arg in chain */
|
||||
int next; /* Index of next function in chain */
|
||||
bool bVararg; /* True if variable num args */
|
||||
} PH_FUNC_STRUCT;
|
||||
|
||||
typedef
|
||||
struct ph_arg_tag
|
||||
{
|
||||
char name[SYMLEN]; /* Name of function or arg */
|
||||
hlType typ; /* Parameter type */
|
||||
} PH_ARG_STRUCT;
|
||||
|
||||
#define DELTA_FUNC 32 /* Number to alloc at once */
|
||||
|
||||
|
||||
#define PH_JUNK 0 /* LPSTR buffer, nothing happened */
|
||||
#define PH_PROTO 1 /* LPPH_FUNC ret val, func name, args */
|
||||
#define PH_FUNCTION 2 /* LPPH_FUNC ret val, func name, args */
|
||||
#define PH_TYPEDEF 3 /* LPPH_DEF definer and definee */
|
||||
#define PH_DEFINE 4 /* LPPH_DEF definer and definee */
|
||||
#define PH_ERROR 5 /* LPSTR error string */
|
||||
#define PH_WARNING 6 /* LPSTR warning string */
|
||||
#define PH_MPROTO 7 /* ????? multi proto???? */
|
||||
#define PH_VAR 8 /* ????? var decl */
|
||||
|
||||
/* PROTOS */
|
||||
|
||||
boolT phData(char *buff, int ndata);
|
||||
boolT phPost(void);
|
||||
boolT phFree(void);
|
||||
void checkHeap(char *msg); /* For debugging only */
|
||||
|
||||
void phBuffToFunc(char *buff);
|
||||
|
||||
void phBuffToDef(char *buff);
|
||||
|
||||
|
||||
#define TOK_TYPE 256 /* A type name (e.g. "int") */
|
||||
#define TOK_NAME 257 /* A function or parameter name */
|
||||
#define TOK_DOTS 258 /* "..." */
|
||||
#define TOK_EOL 259 /* End of line */
|
||||
|
||||
typedef enum
|
||||
{
|
||||
BT_INT, BT_CHAR, BT_FLOAT, BT_DOUBLE, BT_STRUCT, BT_VOID, BT_UNKWN
|
||||
} baseType;
|
||||
217
tools/parsehdr/parsehdr.txt
Normal file
217
tools/parsehdr/parsehdr.txt
Normal file
@@ -0,0 +1,217 @@
|
||||
PARSEHDR
|
||||
|
||||
1 What is ParseHdr?
|
||||
|
||||
2 What is dcclibs.dat?
|
||||
|
||||
3 How do I use ParseHdr?
|
||||
|
||||
4 What about languages other than C?
|
||||
|
||||
5 What is the structure of the dcclibs.dat file?
|
||||
|
||||
6 What are all these errors, and why do they happen?
|
||||
|
||||
|
||||
1 What is ParseHdr?
|
||||
-------------------
|
||||
|
||||
ParseHdr is a program that creates a special prototype file for DCC
|
||||
from a set of include files (.h files). This allows DCC to be aware
|
||||
of the type of library function arguments, and return types. The file
|
||||
produced is called dcclibs.dat. ParseHdr is designed specifically for
|
||||
C header files.
|
||||
|
||||
As an example, this is what allows DCC to recognise that printf has
|
||||
(at least) a string argument, and so converts the first argument from
|
||||
a numeric constant to a string. So you get
|
||||
printf("Hello world")
|
||||
instead of
|
||||
printf(0x42).
|
||||
|
||||
|
||||
2 What is dcclibs.dat?
|
||||
----------------------
|
||||
|
||||
dcclibs.dat is the file created by the ParseHdr program. It contains
|
||||
a list of function names and parameter and return types. See section
|
||||
5 for details of the contents of the file.
|
||||
|
||||
|
||||
3 How do I use ParseHdr?
|
||||
------------------------
|
||||
|
||||
To use ParseHdr you need a file containing a list of header files,
|
||||
like this:
|
||||
\tc\include\alloc.h
|
||||
\tc\include\assert.h
|
||||
\tc\include\bios.h
|
||||
...
|
||||
\tc\include\time.h
|
||||
|
||||
There must be one file per line, no blank lines, and unless the
|
||||
header files are in the current directory, a full path must be given.
|
||||
The easiest way to create such a file is to redirect the output of a
|
||||
dir command to a file, like this:
|
||||
c>dir \tc\include\*.h > tcfiles.lst
|
||||
and then edit the resultant file. Note that the path will not be
|
||||
included in this, so you will have to add that manually. Remove
|
||||
everything after the .h, such as file size, date, etc.
|
||||
|
||||
Once you have this file, you can run parsehdr:
|
||||
|
||||
parsehdr <listfile>
|
||||
|
||||
For example,
|
||||
|
||||
parsehdr tcfiles.lst
|
||||
|
||||
You will get some messages indicating which files are being
|
||||
processed, but also some error messages. Just ignore the error
|
||||
messages, see section 6 for why they occur.
|
||||
|
||||
|
||||
|
||||
4 What about languages other than C?
|
||||
-----------------------------------------
|
||||
|
||||
ParseHdr will only work on C header files. It would be possible to
|
||||
process files for other languages that contained type information, to
|
||||
produce a dcclibs.dat file specific to that language. Ideally, DCC
|
||||
should look for a different file for each language, but since only a
|
||||
C version of dcclibs.dat has so far been created, this has not been
|
||||
done.
|
||||
|
||||
Prototype information for Turbo Pascal exists in the file turbo.tpl,
|
||||
at least for things like the graphics library, so it would be
|
||||
possible for MakeDsTp to produce a dcclibs.dat file as well as the
|
||||
signature file. However, the format of the turbo.tpl file is not
|
||||
documented by Borland; for details see
|
||||
|
||||
W. L. Peavy, "Inside Turbo Pascal 6.0 Units", Public domain software
|
||||
file tpu6doc.txt in tpu6.zip. Anonymous ftp from garbo.uwasa.fi and
|
||||
mirrors, directory /pc/turbopas, 1991.
|
||||
|
||||
|
||||
|
||||
|
||||
5 What is the structure of the dcclibs.dat file?
|
||||
------------------------------------------------
|
||||
|
||||
The first 4 bytes are "dccp", identifying it as a DCC prototype file.
|
||||
After this, there are two sections.
|
||||
|
||||
The first section begins with "FN", for Function Names. It is
|
||||
followed by a two byte integer giving the number of function names
|
||||
stored. The remainder of this section is an array of structures, one
|
||||
per function name. Each has this structure:
|
||||
char Name[SYMLEN]; /* Name of the function, NULL terminated */
|
||||
int type; /* A 2 byte integer describing the return type */
|
||||
int numArg; /* The number of arguments */
|
||||
int firstArg; /* The index of the first arg, see below */
|
||||
char bVarArg; /* 1 if variable arguments, 0 otherwise */
|
||||
|
||||
SYMLEN is 16, alowing 15 chars before the NULL. Therefore, the length
|
||||
of this structure is 23 bytes.
|
||||
|
||||
The types are as defined in locident.h (actually a part of dcc), and
|
||||
at present are as follows:
|
||||
typedef enum {
|
||||
TYPE_UNKNOWN = 0, /* unknown so far 00 */
|
||||
TYPE_BYTE_SIGN, /* signed byte (8 bits) 01 */
|
||||
TYPE_BYTE_UNSIGN, /* unsigned byte 02 */
|
||||
TYPE_WORD_SIGN, /* signed word (16 bits) 03 */
|
||||
TYPE_WORD_UNSIGN, /* unsigned word (16 bits) 04 */
|
||||
TYPE_LONG_SIGN, /* signed long (32 bits) 05 */
|
||||
TYPE_LONG_UNSIGN, /* unsigned long (32 bits) 06 */
|
||||
TYPE_RECORD, /* record structure 07 */
|
||||
TYPE_PTR, /* pointer (32 bit ptr) 08 */
|
||||
TYPE_STR, /* string 09 */
|
||||
TYPE_CONST, /* constant (any type) 0A */
|
||||
TYPE_FLOAT, /* floating point 0B */
|
||||
TYPE_DOUBLE, /* double precision float 0C */
|
||||
} hlType;
|
||||
|
||||
firstArg is an index into the array in the second section.
|
||||
|
||||
The second section begins with "PM" (for Parameters). It is followed
|
||||
by a 2 byte integer giving the number of parameter records. After
|
||||
this is the array of parameter structures. Initially, the names of the
|
||||
parameters were being stored, but this has been removed at present.
|
||||
The parameter structure is therefore now just a single 2 byte
|
||||
integer, representing the type of that argument.
|
||||
|
||||
The way it all fits together is perhaps best described by an example.
|
||||
Lets consider this entry in dcclibs.dat:
|
||||
|
||||
73 74 72 63 6D 70 00 ; "strcmp"
|
||||
00 00 00 00 00 00 00 00 00 ; Padding to 16 bytes
|
||||
03 00 ; Return type 3, TYPE_WORD_UNSIGN
|
||||
02 00 ; 2 arguments
|
||||
15 02 ; First arg is 0215
|
||||
00 ; Not var args
|
||||
|
||||
If we now skip to the "PM" part of the file, skip the number of
|
||||
arguments word, then skip 215*2 = 42A bytes, we find this:
|
||||
09 00 09 00 09 00 ...
|
||||
|
||||
The first 09 00 (TYPE_STR) refers to the type of the first parameter,
|
||||
and the second to the second parameter. There are only 2 arguments,
|
||||
so the third 09 00 refers to the first parameter of the next
|
||||
function. So both parameters are strings, as is expected.
|
||||
|
||||
For functions with variable parameters, bVarArg is set to 01, and the
|
||||
number of parameters reported is the number of fixed parameters. Here
|
||||
is another example:
|
||||
|
||||
66 70 72 69 6E 74 66 00 ; "fprintf"
|
||||
00 00 00 00 00 00 00 00 ; padding
|
||||
03 00 ; return type 3, TYPE_WORD_UNSIGN
|
||||
02 00 ; 2 fixed args
|
||||
81 01 ; First arg at index 0181
|
||||
01 ; Var args
|
||||
|
||||
and in the "PM" section at offset 181*2 = 0302, we find 08 00 09 00
|
||||
03 00 meaning that the first parameter is a pointer (in fact, we know
|
||||
it's a FILE *), and the second parameter is a string.
|
||||
|
||||
|
||||
|
||||
|
||||
6 What are all these errors, and why do they happen?
|
||||
----------------------------------------------------
|
||||
|
||||
When you run ParseHdr, as well as the progress statements like
|
||||
Processing \tc\include\alloc.h ...
|
||||
|
||||
you can get error messages. Basically, ignore these errors. They occur
|
||||
for a variety of reasons, most of which are detailed below.
|
||||
|
||||
1)
|
||||
Expected type: got ) (29)
|
||||
void __emit__()
|
||||
^
|
||||
This include file contained a non ansi prototype. This is rare, and
|
||||
__emit__ is not a standard function anyway. If it really bothers you,
|
||||
you could add the word "void" to the empty parentheses in your
|
||||
include file.
|
||||
|
||||
2)
|
||||
Expected ',' between parameter defs: got ( (28)
|
||||
void _Cdecl ctrlbrk (int _Cdecl (*handler)(void))
|
||||
|
||||
Here "handler" is a pointer to a function. Being a basically simple
|
||||
program, ParseHdr does not expand all typedef and #define statements,
|
||||
so it cannot distinguish between types and user defined function
|
||||
names. Therefore, it is not possible in general to parse any
|
||||
prototypes containing pointers to functions, so at this stage, any
|
||||
such prototypes will produce an error of some sort. DCC cannot
|
||||
currently make use of this type information anyway, so this is no
|
||||
real loss. There are typically half a dozen such errors.
|
||||
|
||||
3)
|
||||
Unknown type time_t
|
||||
|
||||
Types (such as time_t) that are structures or pointers to structures
|
||||
are not handled by ParseHdr, since typedef and #define statements are
|
||||
ignored. Again, there are typically only about a dozen of these.
|
||||
8
tools/parsehdr/parselib.mak
Normal file
8
tools/parsehdr/parselib.mak
Normal file
@@ -0,0 +1,8 @@
|
||||
CFLAGS = -Zi -c -AS -W3 -D__MSDOS__
|
||||
|
||||
parselib.exe: parselib.obj
|
||||
link /CO parselib;
|
||||
|
||||
parselib.obj: parselib.c
|
||||
cl $(CFLAGS) $*.c
|
||||
|
||||
24
tools/parsehdr/tcfiles.lst
Normal file
24
tools/parsehdr/tcfiles.lst
Normal file
@@ -0,0 +1,24 @@
|
||||
\tc\include\alloc.h
|
||||
\tc\include\assert.h
|
||||
\tc\include\bios.h
|
||||
\tc\include\conio.h
|
||||
\tc\include\ctype.h
|
||||
\tc\include\dir.h
|
||||
\tc\include\dos.h
|
||||
\tc\include\errno.h
|
||||
\tc\include\fcntl.h
|
||||
\tc\include\float.h
|
||||
\tc\include\io.h
|
||||
\tc\include\limits.h
|
||||
\tc\include\math.h
|
||||
\tc\include\mem.h
|
||||
\tc\include\process.h
|
||||
\tc\include\setjmp.h
|
||||
\tc\include\share.h
|
||||
\tc\include\signal.h
|
||||
\tc\include\stdarg.h
|
||||
\tc\include\stddef.h
|
||||
\tc\include\stdio.h
|
||||
\tc\include\stdlib.h
|
||||
\tc\include\string.h
|
||||
\tc\include\time.h
|
||||
Reference in New Issue
Block a user