Add original dcc tools to repository

* makedsig has been integrated with makedstp, it should handle both LIB and TPL files
* other tools have not been modified
This commit is contained in:
nemerle 2015-02-10 17:28:50 +01:00
parent d8c66e7791
commit a697ad05c0
33 changed files with 4560 additions and 149 deletions

View File

@ -26,16 +26,23 @@ enable_testing()
FIND_PACKAGE(GMock) FIND_PACKAGE(GMock)
ENDIF() ENDIF()
ADD_SUBDIRECTORY(3rd_party)
llvm_map_components_to_libraries(REQ_LLVM_LIBRARIES jit native mc support tablegen) llvm_map_components_to_libnames(REQ_LLVM_LIBRARIES jit native mc support tablegen)
INCLUDE_DIRECTORIES( INCLUDE_DIRECTORIES(
3rd_party/libdisasm 3rd_party/libdisasm
include include
include/idioms include/idioms
common
${Boost_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS}
${LLVM_INCLUDE_DIRS} ${LLVM_INCLUDE_DIRS}
) )
ADD_SUBDIRECTORY(3rd_party)
ADD_SUBDIRECTORY(common)
ADD_SUBDIRECTORY(tools)
set(dcc_LIB_SOURCES set(dcc_LIB_SOURCES
src/CallConvention.cpp src/CallConvention.cpp
src/ast.cpp src/ast.cpp
@ -67,7 +74,6 @@ set(dcc_LIB_SOURCES
src/locident.cpp src/locident.cpp
src/liveness_set.cpp src/liveness_set.cpp
src/parser.cpp src/parser.cpp
src/perfhlib.cpp
src/procs.cpp src/procs.cpp
src/project.cpp src/project.cpp
src/Procedure.cpp src/Procedure.cpp
@ -106,7 +112,6 @@ set(dcc_HEADERS
include/idioms/xor_idioms.h include/idioms/xor_idioms.h
include/locident.h include/locident.h
include/CallConvention.h include/CallConvention.h
include/perfhlib.h
include/project.h include/project.h
include/scanner.h include/scanner.h
include/state.h include/state.h
@ -118,6 +123,7 @@ set(dcc_HEADERS
include/dcc_interface.h include/dcc_interface.h
) )
SOURCE_GROUP(Source FILES ${dcc_SOURCES}) SOURCE_GROUP(Source FILES ${dcc_SOURCES})
SOURCE_GROUP(Headers FILES ${dcc_HEADERS}) SOURCE_GROUP(Headers FILES ${dcc_HEADERS})
@ -127,11 +133,10 @@ qt5_use_modules(dcc_lib Core)
ADD_EXECUTABLE(dcc_original ${dcc_SOURCES} ${dcc_HEADERS}) ADD_EXECUTABLE(dcc_original ${dcc_SOURCES} ${dcc_HEADERS})
ADD_DEPENDENCIES(dcc_original dcc_lib) ADD_DEPENDENCIES(dcc_original dcc_lib)
TARGET_LINK_LIBRARIES(dcc_original dcc_lib disasm_s ${REQ_LLVM_LIBRARIES} ncurses LLVMSupport) TARGET_LINK_LIBRARIES(dcc_original dcc_lib dcc_hash disasm_s ${REQ_LLVM_LIBRARIES} ncurses LLVMSupport)
qt5_use_modules(dcc_original Core) qt5_use_modules(dcc_original Core)
#ADD_SUBDIRECTORY(gui) #ADD_SUBDIRECTORY(gui)
if(dcc_build_tests) if(dcc_build_tests)
ADD_SUBDIRECTORY(src) ADD_SUBDIRECTORY(src)
endif() endif()

7
common/CMakeLists.txt Normal file
View File

@ -0,0 +1,7 @@
set(SRC
perfhlib.cpp
perfhlib.h
PatternCollector.h
)
add_library(dcc_hash STATIC ${SRC})

View File

@ -0,0 +1,5 @@
#ifndef PATTERNCOLLECTOR
#define PATTERNCOLLECTOR
#endif // PATTERNCOLLECTOR

440
common/perfhlib.cpp Normal file
View File

@ -0,0 +1,440 @@
/*
*$Log: perfhlib.c,v $
* Revision 1.5 93/09/29 14:45:02 emmerik
* Oops, didn't do the casts last check in
*
* Revision 1.4 93/09/29 14:41:45 emmerik
* Added casts to mod instructions to keep the SVR4 compiler happy
*
*
* Perfect hashing function library. Contains functions to generate perfect
* hashing functions
*/
#include "perfhlib.h"
#include "PatternCollector.h"
#include <stdio.h>
#include <cassert>
#include <stdlib.h>
#include <string.h>
/* Private data structures */
//static int NumEntry; /* Number of entries in the hash table (# keys) */
//static int EntryLen; /* Size (bytes) of each entry (size of keys) */
//static int SetSize; /* Size of the char set */
//static char SetMin; /* First char in the set */
//static int NumVert; /* c times NumEntry */
//static uint16_t *T1base, *T2base; /* Pointers to start of T1, T2 */
static uint16_t *T1, *T2; /* Pointers to T1[i], T2[i] */
static int *graphNode; /* The array of edges */
static int *graphNext; /* Linked list of edges */
static int *graphFirst;/* First edge at a vertex */
static int numEdges; /* An edge counter */
static bool *visited; /* Array of bools: whether visited */
static bool *deleted; /* Array of bools: whether deleted */
/* Private prototypes */
static void initGraph(void);
static void addToGraph(int e, int v1, int v2);
static bool isCycle(void);
static void duplicateKeys(int v1, int v2);
void PerfectHash::setHashParams(int _NumEntry, int _EntryLen, int _SetSize, char _SetMin,
int _NumVert)
{
/* These parameters are stored in statics so as to obviate the need for
passing all these (or defererencing pointers) for every call to hash()
*/
NumEntry = _NumEntry;
EntryLen = _EntryLen;
SetSize = _SetSize;
SetMin = _SetMin;
NumVert = _NumVert;
/* Allocate the variable sized tables etc */
if ((T1base = (uint16_t *)malloc(EntryLen * SetSize * sizeof(uint16_t))) == 0)
{
goto BadAlloc;
}
if ((T2base = (uint16_t *)malloc(EntryLen * SetSize * sizeof(uint16_t))) == 0)
{
goto BadAlloc;
}
if ((graphNode = (int *)malloc((NumEntry*2 + 1) * sizeof(int))) == 0)
{
goto BadAlloc;
}
if ((graphNext = (int *)malloc((NumEntry*2 + 1) * sizeof(int))) == 0)
{
goto BadAlloc;
}
if ((graphFirst = (int *)malloc((NumVert + 1) * sizeof(int))) == 0)
{
goto BadAlloc;
}
if ((g = (short *)malloc((NumVert+1) * sizeof(short))) == 0)
{
goto BadAlloc;
}
if ((visited = (bool *)malloc((NumVert+1) * sizeof(bool))) == 0)
{
goto BadAlloc;
}
if ((deleted = (bool *)malloc((NumEntry+1) * sizeof(bool))) == 0)
{
goto BadAlloc;
}
return;
BadAlloc:
printf("Could not allocate memory\n");
hashCleanup();
exit(1);
}
void PerfectHash::hashCleanup(void)
{
/* Free the storage for variable sized tables etc */
if (T1base) free(T1base);
if (T2base) free(T2base);
if (graphNode) free(graphNode);
if (graphNext) free(graphNext);
if (graphFirst) free(graphFirst);
if (g) free(g);
if (visited) free(visited);
if (deleted) free(deleted);
}
void PerfectHash::map(PatternCollector *collector)
{
m_collector = collector;
assert(nullptr!=collector);
int i, j, c;
uint16_t f1, f2;
bool cycle;
uint8_t *keys;
c = 0;
do
{
initGraph();
cycle = false;
/* Randomly generate T1 and T2 */
for (i=0; i < SetSize*EntryLen; i++)
{
T1base[i] = rand() % NumVert;
T2base[i] = rand() % NumVert;
}
for (i=0; i < NumEntry; i++)
{
f1 = 0; f2 = 0;
keys = m_collector->getKey(i);
for (j=0; j < EntryLen; j++)
{
T1 = T1base + j * SetSize;
T2 = T2base + j * SetSize;
f1 += T1[keys[j] - SetMin];
f2 += T2[keys[j] - SetMin];
}
f1 %= (uint16_t)NumVert;
f2 %= (uint16_t)NumVert;
if (f1 == f2)
{
/* A self loop. Reject! */
printf("Self loop on vertex %d!\n", f1);
cycle = true;
break;
}
addToGraph(numEdges++, f1, f2);
}
if (cycle || (cycle = isCycle())) /* OK - is there a cycle? */
{
printf("Iteration %d\n", ++c);
}
else
{
break;
}
}
while (/* there is a cycle */ 1);
}
/* Initialise the graph */
void PerfectHash::initGraph()
{
int i;
for (i=1; i <= NumVert; i++)
{
graphFirst[i] = 0;
}
for (i= -NumEntry; i <= NumEntry; i++)
{
/* No need to init graphNode[] as they will all be filled by successive
calls to addToGraph() */
graphNext[NumEntry+i] = 0;
}
numEdges = 0;
}
/* Add an edge e between vertices v1 and v2 */
/* e, v1, v2 are 0 based */
void PerfectHash::addToGraph(int e, int v1, int v2)
{
e++; v1++; v2++; /* So much more convenient */
graphNode[NumEntry+e] = v2; /* Insert the edge information */
graphNode[NumEntry-e] = v1;
graphNext[NumEntry+e] = graphFirst[v1]; /* Insert v1 to list of alphas */
graphFirst[v1]= e;
graphNext[NumEntry-e] = graphFirst[v2]; /* Insert v2 to list of omegas */
graphFirst[v2]= -e;
}
bool PerfectHash::DFS(int parentE, int v)
{
int e, w;
/* Depth first search of the graph, starting at vertex v, looking for
cycles. parent and v are origin 1. Note parent is an EDGE,
not a vertex */
visited[v] = true;
/* For each e incident with v .. */
for (e = graphFirst[v]; e; e = graphNext[NumEntry+e])
{
uint8_t *key1;
if (deleted[abs(e)])
{
/* A deleted key. Just ignore it */
continue;
}
key1 = m_collector->getKey(abs(e)-1);
w = graphNode[NumEntry+e];
if (visited[w])
{
/* Did we just come through this edge? If so, ignore it. */
if (abs(e) != abs(parentE))
{
/* There is a cycle in the graph. There is some subtle code here
to work around the distinct possibility that there may be
duplicate keys. Duplicate keys will always cause unit
cycles, since f1 and f2 (used to select v and w) will be the
same for both. The edges (representing an index into the
array of keys) are distinct, but the key values are not.
The logic is as follows: for the candidate edge e, check to
see if it terminates in the parent vertex. If so, we test
the keys associated with e and the parent, and if they are
the same, we can safely ignore e for the purposes of cycle
detection, since edge e adds nothing to the cycle. Cycles
involving v, w, and e0 will still be found. The parent
edge was not similarly eliminated because at the time when
it was a candidate, v was not yet visited.
We still have to remove the key from further consideration,
since each edge is visited twice, but with a different
parent edge each time.
*/
/* We save some stack space by calculating the parent vertex
for these relatively few cases where it is needed */
int parentV = graphNode[NumEntry-parentE];
if (w == parentV)
{
uint8_t *key2;
key2=m_collector->getKey(abs(parentE)-1);
if (memcmp(key1, key2, EntryLen) == 0)
{
printf("Duplicate keys with edges %d and %d (",
e, parentE);
m_collector->dispKey(abs(e)-1);
printf(" & ");
m_collector->dispKey(abs(parentE)-1);
printf(")\n");
deleted[abs(e)] = true; /* Wipe the key */
}
else
{
/* A genuine (unit) cycle. */
printf("There is a unit cycle involving vertex %d and edge %d\n", v, e);
return true;
}
}
else
{
/* We have reached a previously visited vertex not the
parent. Therefore, we have uncovered a genuine cycle */
printf("There is a cycle involving vertex %d and edge %d\n", v, e);
return true;
}
}
}
else /* Not yet seen. Traverse it */
{
if (DFS(e, w))
{
/* Cycle found deeper down. Exit */
return true;
}
}
}
return false;
}
bool PerfectHash::isCycle(void)
{
int v, e;
for (v=1; v <= NumVert; v++)
{
visited[v] = false;
}
for (e=1; e <= NumEntry; e++)
{
deleted[e] = false;
}
for (v=1; v <= NumVert; v++)
{
if (!visited[v])
{
if (DFS(-32767, v))
{
return true;
}
}
}
return false;
}
void PerfectHash::traverse(int u)
{
int w, e;
visited[u] = true;
/* Find w, the neighbours of u, by searching the edges e associated with u */
e = graphFirst[1+u];
while (e)
{
w = graphNode[NumEntry+e]-1;
if (!visited[w])
{
g[w] = (abs(e)-1 - g[u]) % NumEntry;
if (g[w] < 0) g[w] += NumEntry; /* Keep these positive */
traverse(w);
}
e = graphNext[NumEntry+e];
}
}
void PerfectHash::assign(void)
{
int v;
for (v=0; v < NumVert; v++)
{
g[v] = 0; /* g is sparse; leave the gaps 0 */
visited[v] = false;
}
for (v=0; v < NumVert; v++)
{
if (!visited[v])
{
g[v] = 0;
traverse(v);
}
}
}
int PerfectHash::hash(uint8_t *string)
{
uint16_t u, v;
int j;
u = 0;
for (j=0; j < EntryLen; j++)
{
T1 = T1base + j * SetSize;
u += T1[string[j] - SetMin];
}
u %= NumVert;
v = 0;
for (j=0; j < EntryLen; j++)
{
T2 = T2base + j * SetSize;
v += T2[string[j] - SetMin];
}
v %= NumVert;
return (g[u] + g[v]) % NumEntry;
}
#if 0
void dispRecord(int i);
void
duplicateKeys(int v1, int v2)
{
int i, j;
uint8_t *keys;
int u, v;
v1--; v2--; /* These guys are origin 1 */
printf("Duplicate keys:\n");
for (i=0; i < NumEntry; i++)
{
getKey(i, &keys);
u = 0;
for (j=0; j < EntryLen; j++)
{
T1 = T1base + j * SetSize;
u += T1[keys[j] - SetMin];
}
u %= NumVert;
if ((u != v1) && (u != v2)) continue;
v = 0;
for (j=0; j < EntryLen; j++)
{
T2 = T2base + j * SetSize;
v += T2[keys[j] - SetMin];
}
v %= NumVert;
if ((v == v2) || (v == v1))
{
printf("Entry #%d key: ", i+1);
for (j=0; j < EntryLen; j++) printf("%02X ", keys[j]);
printf("\n");
dispRecord(i+1);
}
}
exit(1);
}
#endif

37
common/perfhlib.h Normal file
View File

@ -0,0 +1,37 @@
#include <stdint.h>
/** Perfect hashing function library. Contains functions to generate perfect
hashing functions */
struct PatternCollector;
struct PerfectHash {
uint16_t *T1base;
uint16_t *T2base; /* Pointers to start of T1, T2 */
short *g; /* g[] */
int NumEntry; /* Number of entries in the hash table (# keys) */
int EntryLen; /* Size (bytes) of each entry (size of keys) */
int SetSize; /* Size of the char set */
char SetMin; /* First char in the set */
int NumVert; /* c times NumEntry */
/** Set the parameters for the hash table */
void setHashParams(int _numEntry, int _entryLen, int _setSize, char _setMin, int _numVert);
public:
void map(PatternCollector * collector); /* Part 1 of creating the tables */
void hashCleanup(); /* Frees memory allocated by setHashParams() */
void assign(); /* Part 2 of creating the tables */
int hash(uint8_t *string); /* Hash the string to an int 0 .. NUMENTRY-1 */
const uint16_t *readT1(void) const { return T1base; }
const uint16_t *readT2(void) const { return T2base; }
const uint16_t *readG(void) const { return (uint16_t *)g; }
uint16_t *readT1(void){ return T1base; }
uint16_t *readT2(void){ return T2base; }
uint16_t *readG(void) { return (uint16_t *)g; }
private:
void initGraph();
void addToGraph(int e, int v1, int v2);
bool isCycle();
bool DFS(int parentE, int v);
void traverse(int u);
PatternCollector *m_collector; /* used to retrieve the keys */
};

View File

@ -1,38 +0,0 @@
#pragma once
/* Perfect hashing function library. Contains functions to generate perfect
hashing functions
* (C) Mike van Emmerik
*/
#include <stdint.h>
/* Prototypes */
void hashCleanup(void); /* Frees memory allocated by hashParams() */
void map(void); /* Part 1 of creating the tables */
/* The application must provide these functions: */
void getKey(int i, uint8_t **pKeys);/* Set *keys to point to the i+1th key */
void dispKey(int i); /* Display the key */
class PatternHasher
{
uint16_t *T1base, *T2base; /* Pointers to start of T1, T2 */
int NumEntry; /* Number of entries in the hash table (# keys) */
int EntryLen; /* Size (bytes) of each entry (size of keys) */
int SetSize; /* Size of the char set */
char SetMin; /* First char in the set */
int NumVert; /* c times NumEntry */
int *graphNode; /* The array of edges */
int *graphNext; /* Linked list of edges */
int *graphFirst;/* First edge at a vertex */
public:
uint16_t *readT1(void); /* Returns a pointer to the T1 table */
uint16_t *readT2(void); /* Returns a pointer to the T2 table */
uint16_t *readG(void); /* Returns a pointer to the g table */
void init(int _NumEntry, int _EntryLen, int _SetSize, char _SetMin,int _NumVert); /* Set the parameters for the hash table */
void cleanup();
int hash(unsigned char *string); //!< Hash the string to an int 0 .. NUMENTRY-1
};
extern PatternHasher g_pattern_hasher;
/* Macro reads a LH uint16_t from the image regardless of host convention */
#ifndef LH
#define LH(p) ((int)((uint8_t *)(p))[0] + ((int)((uint8_t *)(p))[1] << 8))
#endif

View File

@ -1,12 +1,14 @@
#include <QtCore/QFileInfo>
#include <QtCore/QDebug>
#include <cstdio>
#include "dcc.h" #include "dcc.h"
#include "DccFrontend.h" #include "DccFrontend.h"
#include "project.h" #include "project.h"
#include "disassem.h" #include "disassem.h"
#include "CallGraph.h" #include "CallGraph.h"
#include <QtCore/QFileInfo>
#include <QtCore/QDebug>
#include <cstdio>
class Loader class Loader
{ {
@ -158,7 +160,7 @@ bool DccFrontend::FrontEnd ()
if (option.asm1) if (option.asm1)
{ {
std::cout << "dcc: writing assembler file "<<asm1_name.toStdString()<<'\n'; qWarning() << "dcc: writing assembler file "<<asm1_name<<'\n';
} }
/* Search through code looking for impure references and flag them */ /* Search through code looking for impure references and flag them */

View File

@ -1,101 +0,0 @@
/*
* Perfect hashing function library. Contains functions to generate perfect
* hashing functions
* (C) Mike van Emmerik
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "perfhlib.h"
/* Private data structures */
static uint16_t *T1, *T2; /* Pointers to T1[i], T2[i] */
static short *g; /* g[] */
//static int numEdges; /* An edge counter */
//static bool *visited; /* Array of bools: whether visited */
/* Private prototypes */
//static void initGraph(void);
//static void addToGraph(int e, int v1, int v2);
//static bool isCycle(void);
//static void duplicateKeys(int v1, int v2);
PatternHasher g_pattern_hasher;
void PatternHasher::init(int _NumEntry, int _EntryLen, int _SetSize, char _SetMin, int _NumVert)
{
/* These parameters are stored in statics so as to obviate the need for
passing all these (or defererencing pointers) for every call to hash()
*/
NumEntry = _NumEntry;
EntryLen = _EntryLen;
SetSize = _SetSize;
SetMin = _SetMin;
NumVert = _NumVert;
/* Allocate the variable sized tables etc */
T1base = new uint16_t [EntryLen * SetSize];
T2base = new uint16_t [EntryLen * SetSize];
graphNode = new int [NumEntry*2 + 1];
graphNext = new int [NumEntry*2 + 1];
graphFirst = new int [NumVert + 1];
g = new short [NumVert + 1];
// visited = new bool [NumVert + 1];
return;
}
void PatternHasher::cleanup(void)
{
/* Free the storage for variable sized tables etc */
delete [] T1base;
delete [] T2base;
delete [] graphNode;
delete [] graphNext;
delete [] graphFirst;
delete [] g;
// delete [] visited;
}
int PatternHasher::hash(uint8_t *string)
{
uint16_t u, v;
int j;
u = 0;
for (j=0; j < EntryLen; j++)
{
T1 = T1base + j * SetSize;
u += T1[string[j] - SetMin];
}
u %= NumVert;
v = 0;
for (j=0; j < EntryLen; j++)
{
T2 = T2base + j * SetSize;
v += T2[string[j] - SetMin];
}
v %= NumVert;
return (g[u] + g[v]) % NumEntry;
}
uint16_t * PatternHasher::readT1(void)
{
return T1base;
}
uint16_t *PatternHasher::readT2(void)
{
return T2base;
}
uint16_t * PatternHasher::readG(void)
{
return (uint16_t *)g;
}

1
tools/CMakeLists.txt Normal file
View File

@ -0,0 +1 @@
add_subdirectory(makedsig)

248
tools/dispsrch/dispsig.cpp Normal file
View File

@ -0,0 +1,248 @@
/* Quick program to copy a named signature to a small file */
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <string.h>
#include "perfhlib.h"
/* statics */
byte buf[100];
int numKeys; /* Number of hash table entries (keys) */
int numVert; /* Number of vertices in the graph (also size of g[]) */
int PatLen; /* Size of the keys (pattern length) */
int SymLen; /* Max size of the symbols, including null */
FILE *f; /* File being read */
FILE *f2; /* File being written */
static word *T1base, *T2base; /* Pointers to start of T1, T2 */
static word *g; /* g[] */
/* prototypes */
void grab(int n);
word readFileShort(void);
void cleanup(void);
#define SYMLEN 16
#define PATLEN 23
/* Hash table structure */
typedef struct HT_tag
{
char htSym[SYMLEN];
byte htPat[PATLEN];
} HT;
HT ht; /* One hash table entry */
void
main(int argc, char *argv[])
{
word w, len;
int i;
if (argc <= 3)
{
printf("Usage: dispsig <SigFilename> <FunctionName> <BinFileName>\n");
printf("Example: dispsig dccm8s.sig printf printf.bin\n");
exit(1);
}
if ((f = fopen(argv[1], "rb")) == NULL)
{
printf("Cannot open %s\n", argv[1]);
exit(2);
}
if ((f2 = fopen(argv[3], "wb")) == NULL)
{
printf("Cannot write to %s\n", argv[3]);
exit(2);
}
/* Read the parameters */
grab(4);
if (memcmp("dccs", buf, 4) != 0)
{
printf("Not a dccs file!\n");
exit(3);
}
numKeys = readFileShort();
numVert = readFileShort();
PatLen = readFileShort();
SymLen = readFileShort();
/* Initialise the perfhlib stuff. Also allocates T1, T2, g, etc */
hashParams( /* Set the parameters for the hash table */
numKeys, /* The number of symbols */
PatLen, /* The length of the pattern to be hashed */
256, /* The character set of the pattern (0-FF) */
0, /* Minimum pattern character value */
numVert); /* Specifies C, the sparseness of the graph.
See Czech, Havas and Majewski for details
*/
T1base = readT1();
T2base = readT2();
g = readG();
/* Read T1 and T2 tables */
grab(2);
if (memcmp("T1", buf, 2) != 0)
{
printf("Expected 'T1'\n");
exit(3);
}
len = PatLen * 256 * sizeof(word);
w = readFileShort();
if (w != len)
{
printf("Problem with size of T1: file %d, calc %d\n", w, len);
exit(4);
}
if (fread(T1base, 1, len, f) != len)
{
printf("Could not read T1\n");
exit(5);
}
grab(2);
if (memcmp("T2", buf, 2) != 0)
{
printf("Expected 'T2'\n");
exit(3);
}
w = readFileShort();
if (w != len)
{
printf("Problem with size of T2: file %d, calc %d\n", w, len);
exit(4);
}
if (fread(T2base, 1, len, f) != len)
{
printf("Could not read T2\n");
exit(5);
}
/* Now read the function g[] */
grab(2);
if (memcmp("gg", buf, 2) != 0)
{
printf("Expected 'gg'\n");
exit(3);
}
len = numVert * sizeof(word);
w = readFileShort();
if (w != len)
{
printf("Problem with size of g[]: file %d, calc %d\n", w, len);
exit(4);
}
if (fread(g, 1, len, f) != len)
{
printf("Could not read T2\n");
exit(5);
}
/* This is now the hash table */
grab(2);
if (memcmp("ht", buf, 2) != 0)
{
printf("Expected 'ht'\n");
exit(3);
}
w = readFileShort();
if (w != numKeys * (SymLen + PatLen + sizeof(word)))
{
printf("Problem with size of hash table: file %d, calc %d\n", w, len);
exit(6);
}
for (i=0; i < numKeys; i++)
{
if (fread(&ht, 1, SymLen + PatLen, f) != (size_t)(SymLen + PatLen))
{
printf("Could not read pattern %d from %s\n", i, argv[1]);
exit(7);
}
if (stricmp(ht.htSym, argv[2]) == 0)
{
/* Found it! */
break;
}
}
fclose(f);
if (i == numKeys)
{
printf("Function %s not found!\n", argv[2]);
exit(2);
}
printf("Function %s index %d\n", ht.htSym, i);
for (i=0; i < PatLen; i++)
{
printf("%02X ", ht.htPat[i]);
}
fwrite(ht.htPat, 1, PatLen, f2);
fclose(f2);
printf("\n");
}
void
cleanup(void)
{
/* Free the storage for variable sized tables etc */
if (T1base) free(T1base);
if (T2base) free(T2base);
if (g) free(g);
}
void grab(int n)
{
if (fread(buf, 1, n, f) != (size_t)n)
{
printf("Could not read\n");
exit(11);
}
}
word
readFileShort(void)
{
byte b1, b2;
if (fread(&b1, 1, 1, f) != 1)
{
printf("Could not read\n");
exit(11);
}
if (fread(&b2, 1, 1, f) != 1)
{
printf("Could not read\n");
exit(11);
}
return (b2 << 8) + b1;
}
/* Following two functions not needed unless creating tables */
void getKey(int i, byte **keys)
{
}
/* Display key i */
void
dispKey(int i)
{
}

View File

@ -0,0 +1,11 @@
CFLAGS = -Zi -c -AL -W3 -D__MSDOS__
dispsig.exe: dispsig.obj perfhlib.obj
link /CO dispsig perfhlib;
dispsig.obj: dispsig.c dcc.h perfhlib.h
cl $(CFLAGS) $*.c
perfhlib.obj: perfhlib.c dcc.h perfhlib.h
cl $(CFLAGS) $*.c

221
tools/dispsrch/dispsrch.txt Normal file
View File

@ -0,0 +1,221 @@
DISPSIG and SRCHSIG
===================
1 What are DispSig and SrchSig?
2 How do I use DispSig?
3 How do I use SrchSig?
4 What can I do with the binary pattern file from DispSig?
5 How can I create a binary pattern file for SrchSig?
1 What are DispSig and SrchSig?
-------------------------------
SrchSig is a program to display the name of a function, given a
signature (pattern).
DispSig is a program to display a signature, given a function name.
Dispsig also writes the signature to a binary file, so you can
disassemble it, or use it in Srchsig to see if some other signature
file has the same pattern.
2 How do I use DispSig?
-----------------------
Just type
DispSig <SignatureFileName> <FunctionName> <BinaryFileName>
For example:
dispsig dccb2s.sig strcmp strcmp.bin
Function index 58
55 8B EC 56 57 8C D8 8E C0 FC 33 C0 8B D8 8B 7E 06 8B F7 32 C0 B9 F4
This tells us that the function was the 59th function in the
signature file (and that the signature above will hash to 58
(decimal)). We can see that it is a standard C function, since it
starts with "55 8B EC", which is the standard C function prologue.
The rest of it is a bit hard to follow, but fortunately we have also
written the pattern to a binary file, strcmp.bin. See section 4 on
how to disassemble this pattern.
If I type
dispsig dcct4p.sig writeln wl.bin
I get
Function writeln not found!
In fact, there is no one function that performs the writeln function;
there are functions like WriteString, WriteInt, CrLf (Carriage
return, linefeed), and so on. Dispsig is case insensitive, so:
dispsig dcct4p.sig writestring wl.bin
produces
Function WriteString index 53
55 8B EC C4 7E 0C E8 F4 F4 75 25 C5 76 08 8B 4E 06 FC AC F4 F4 2B C8
3 How do I use SrchSig?
-----------------------
Just type
srchsig <SignatureFileName> <BinaryFileName>
dispsig dcct4p.sig writeln wl.bin
where BinaryFileName contains a pattern. See section 5 for how to
create one of these. For now, we can use the pattern file from the
first example:
srchsig dccb2s.sig strcmp.bin
Pattern:
55 8B EC 56 57 8C D8 8E C0 FC 33 C0 8B D8 8B 7E 06 8B F7 32 C0 B9 F4
Pattern hashed to 58 (0x3A), symbol strcmp
Pattern matched
Note that the pattern reported above need not be exactly the same as
the one we provided in <BinaryFileName>. The pattern displayed is the
wildcarded and chopped version of the pattern provided; it will have
F4s (wildcards) and possibly zeroes at the end; see the file
makedstp.txt for a simple explanation of wildcarding and chopping.
If we type
srchsig dccb2s.sig ws.bin
we get
Pattern:
55 8B EC C4 7E 0C E8 F4 F4 75 25 C5 76 08 8B 4E 06 FC AC F4 F4 2B C8
Pattern hashed to 0 (0x0), symbol _IOERROR
Pattern mismatch: found following pattern
55 8B EC 56 8B 76 04 0B F6 7C 14 83 FE 58 76 03 BE F4 F4 89 36 F4 F4
300
The pattern often hashes to zero when the pattern is unknown, due to
the sparse nature of the tables used in the hash function. The first
pattern in dccb2s.sig happens to be _IOERROR, and its pattern is
completely different, apart from the first three bytes. The "300" at
the end is actually a running count of signatures searched linearly,
in case there is a problem with the hash function.
4 What can I do with the binary pattern file from DispSig?
----------------------------------------------------------
You can feed it into SrchSig; this might make sense if you wanted to
know if, e.g. the signature for printf was the same for version 2 as
it is for version 3. In this case, you would use DispSig on the
version 2 signature file, and SrchSig on the version 3 file.
You can also disassemble it, using debug (it comes with MS-DOS). For
example
debug strcmp.bin
-u100 l 17
1754:0100 55 PUSH BP
1754:0101 8BEC MOV BP,SP
1754:0103 56 PUSH SI
1754:0104 57 PUSH DI
1754:0105 8CD8 MOV AX,DS
1754:0107 8EC0 MOV ES,AX
1754:0109 FC CLD
1754:010A 33C0 XOR AX,AX
1754:010C 8BD8 MOV BX,AX
1754:010E 8B7E06 MOV DI,[BP+06]
1754:0111 8BF7 MOV SI,DI
1754:0113 32C0 XOR AL,AL
1754:0115 B9F42B MOV CX,2BF4
-q
Note that the "2B" at the end is actually past the end of the
signature. (Signatures are 23 bytes (17 in hex) long, so only
addresses 100-116 are valid). Remember that most 16 bit operands will
be "wildcarded", so don't believe the resultant addresses.
5 How can I create a binary pattern file for SrchSig?
-----------------------------------------------------
Again, you can use debug. Suppose you have found an interesing piece
of code at address 05BE (this example comes from a hello world
program):
-u 5be
15FF:05BE 55 PUSH BP
15FF:05BF 8BEC MOV BP,SP
15FF:05C1 83EC08 SUB SP,+08
15FF:05C4 57 PUSH DI
15FF:05C5 56 PUSH SI
15FF:05C6 BE1E01 MOV SI,011E
15FF:05C9 8D4606 LEA AX,[BP+06]
15FF:05CC 8946FC MOV [BP-04],AX
15FF:05CF 56 PUSH SI
15FF:05D0 E8E901 CALL 07BC
15FF:05D3 83C402 ADD SP,+02
15FF:05D6 8BF8 MOV DI,AX
15FF:05D8 8D4606 LEA AX,[BP+06]
15FF:05DB 50 PUSH AX
15FF:05DC FF7604 PUSH [BP+04]
-mcs:5be l 17 cs:100
-u100 l 17
15FF:0100 55 PUSH BP
15FF:0101 8BEC MOV BP,SP
15FF:0103 83EC08 SUB SP,+08
15FF:0106 57 PUSH DI
15FF:0107 56 PUSH SI
15FF:0108 BE1E01 MOV SI,011E
15FF:010B 8D4606 LEA AX,[BP+06]
15FF:010E 8946FC MOV [BP-04],AX
15FF:0111 56 PUSH SI
15FF:0112 E8E901 CALL 02FE
15FF:0115 83C41F ADD SP,+1F
-nfoo.bin
-rcx
CS 268A
:17
-w
Writing 0017 bytes
-q
c>dir foo.bin
foo.bin 23 3-25-94 12:04
c>
The binary file has to be exactly 23 bytes long; that's why we
changed cx to the value 17 (hex 17 = decimal 23). If you are studying
a large file (> 64K) remember to set bx to 0 as well. The m (block
move) command moves the code of interest to cs:100, which is where
debug will write the file from. The "rcx" changes the length of the
save, and the "nfoo.bin" sets the name of the file to be saved. Now
we can feed this into srchsig:
srchsig dccb2s.sig foo.bin
Pattern:
55 8B EC 83 EC 08 57 56 BE F4 F4 8D 46 06 89 46 FC 56 E8 F4 F4 83 C4
Pattern hashed to 278 (0x116), symbol sleep
Pattern mismatch: found following pattern
55 8B EC 83 EC 04 56 57 8D 46 FC 50 E8 F4 F4 59 80 7E FE 5A 76 05 BF
300
Hmmm. Not a Borland C version 2 small model signature. Perhaps its a
Microsoft Version 5 signature:
Pattern:
55 8B EC 83 EC 08 57 56 BE F4 F4 8D 46 06 89 46 FC 56 E8 F4 F4 83 C4
Pattern hashed to 31 (0x1F), symbol printf
Pattern matched
Yes, it was good old printf. Of course, no need for you to guess, DCC
will figure out the vendor, version number, and model for you.

287
tools/dispsrch/srchsig.cpp Normal file
View File

@ -0,0 +1,287 @@
/* Quick program to see if a pattern is in a sig file. Pattern is supplied
in a small .bin or .com style file */
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include "perfhlib.h"
/* statics */
byte buf[100];
int numKeys; /* Number of hash table entries (keys) */
int numVert; /* Number of vertices in the graph (also size of g[]) */
int PatLen; /* Size of the keys (pattern length) */
int SymLen; /* Max size of the symbols, including null */
FILE *f; /* Sig file being read */
FILE *fpat; /* Pattern file being read */
static word *T1base, *T2base; /* Pointers to start of T1, T2 */
static word *g; /* g[] */
#define SYMLEN 16
#define PATLEN 23
typedef struct HT_tag
{
/* Hash table structure */
char htSym[SYMLEN];
byte htPat[PATLEN];
} HT;
HT *ht; /* Declare a pointer to a hash table */
/* prototypes */
void grab(int n);
word readFileShort(void);
void cleanup(void);
void fixWildCards(char *buf); /* In fixwild.c */
void pattSearch(void);
void
main(int argc, char *argv[])
{
word w, len;
int h, i;
int patlen;
if (argc <= 2)
{
printf("Usage: srchsig <SigFilename> <PattFilename>\n");
printf("Searches the signature file for the given pattern\n");
printf("e.g. %s dccm8s.sig mypatt.bin\n", argv[0]);
exit(1);
}
if ((f = fopen(argv[1], "rb")) == NULL)
{
printf("Cannot open signature file %s\n", argv[1]);
exit(2);
}
if ((fpat = fopen(argv[2], "rb")) == NULL)
{
printf("Cannot open pattern file %s\n", argv[2]);
exit(2);
}
/* Read the parameters */
grab(4);
if (memcmp("dccs", buf, 4) != 0)
{
printf("Not a dccs file!\n");
exit(3);
}
numKeys = readFileShort();
numVert = readFileShort();
PatLen = readFileShort();
SymLen = readFileShort();
/* Initialise the perfhlib stuff. Also allocates T1, T2, g, etc */
hashParams( /* Set the parameters for the hash table */
numKeys, /* The number of symbols */
PatLen, /* The length of the pattern to be hashed */
256, /* The character set of the pattern (0-FF) */
0, /* Minimum pattern character value */
numVert); /* Specifies C, the sparseness of the graph.
See Czech, Havas and Majewski for details
*/
T1base = readT1();
T2base = readT2();
g = readG();
/* Read T1 and T2 tables */
grab(2);
if (memcmp("T1", buf, 2) != 0)
{
printf("Expected 'T1'\n");
exit(3);
}
len = PatLen * 256 * sizeof(word);
w = readFileShort();
if (w != len)
{
printf("Problem with size of T1: file %d, calc %d\n", w, len);
exit(4);
}
if (fread(T1base, 1, len, f) != len)
{
printf("Could not read T1\n");
exit(5);
}
grab(2);
if (memcmp("T2", buf, 2) != 0)
{
printf("Expected 'T2'\n");
exit(3);
}
w = readFileShort();
if (w != len)
{
printf("Problem with size of T2: file %d, calc %d\n", w, len);
exit(4);
}
if (fread(T2base, 1, len, f) != len)
{
printf("Could not read T2\n");
exit(5);
}
/* Now read the function g[] */
grab(2);
if (memcmp("gg", buf, 2) != 0)
{
printf("Expected 'gg'\n");
exit(3);
}
len = numVert * sizeof(word);
w = readFileShort();
if (w != len)
{
printf("Problem with size of g[]: file %d, calc %d\n", w, len);
exit(4);
}
if (fread(g, 1, len, f) != len)
{
printf("Could not read T2\n");
exit(5);
}
/* This is now the hash table */
/* First allocate space for the table */
if ((ht = (HT *)malloc(numKeys * sizeof(HT))) == 0)
{
printf("Could not allocate hash table\n");
exit(1);
}
grab(2);
if (memcmp("ht", buf, 2) != 0)
{
printf("Expected 'ht'\n");
exit(3);
}
w = readFileShort();
if (w != numKeys * (SymLen + PatLen + sizeof(word)))
{
printf("Problem with size of hash table: file %d, calc %d\n", w, len);
exit(6);
}
for (i=0; i < numKeys; i++)
{
if ((int)fread(&ht[i], 1, SymLen + PatLen, f) != SymLen + PatLen)
{
printf("Could not read\n");
exit(11);
}
}
/* Read the pattern to buf */
if ((patlen = fread(buf, 1, 100, fpat)) == 0)
{
printf("Could not read pattern\n");
exit(11);
}
if (patlen != PATLEN)
{
printf("Error: pattern length is %d, should be %d\n", patlen, PATLEN);
exit(12);
}
/* Fix the wildcards */
fixWildCards(buf);
printf("Pattern:\n");
for (i=0; i < PATLEN; i++)
printf("%02X ", buf[i]);
printf("\n");
h = hash(buf);
printf("Pattern hashed to %d (0x%X), symbol %s\n", h, h, ht[h].htSym);
if (memcmp(ht[h].htPat, buf, PATLEN) == 0)
{
printf("Pattern matched");
}
else
{
printf("Pattern mismatch: found following pattern\n");
for (i=0; i < PATLEN; i++)
printf("%02X ", ht[h].htPat[i]);
printf("\n");
pattSearch(); /* Look for it the hard way */
}
cleanup();
free(ht);
fclose(f);
fclose(fpat);
}
void pattSearch(void)
{
int i;
for (i=0; i < numKeys; i++)
{
if ((i % 100) == 0) printf("\r%d ", i);
if (memcmp(ht[i].htPat, buf, PATLEN) == 0)
{
printf("\nPattern matched offset %d (0x%X)\n", i, i);
}
}
printf("\n");
}
void
cleanup(void)
{
/* Free the storage for variable sized tables etc */
if (T1base) free(T1base);
if (T2base) free(T2base);
if (g) free(g);
}
void grab(int n)
{
if (fread(buf, 1, n, f) != (size_t)n)
{
printf("Could not read\n");
exit(11);
}
}
word
readFileShort(void)
{
byte b1, b2;
if (fread(&b1, 1, 1, f) != 1)
{
printf("Could not read\n");
exit(11);
}
if (fread(&b2, 1, 1, f) != 1)
{
printf("Could not read\n");
exit(11);
}
return (b2 << 8) + b1;
}
/* Following two functions not needed unless creating tables */
void getKey(int i, byte **keys)
{
}
/* Display key i */
void
dispKey(int i)
{
}

View File

@ -0,0 +1,14 @@
CFLAGS = -Zi -c -AL -W3 -D__MSDOS__
srchsig.exe: srchsig.obj perfhlib.obj fixwild.obj
link /CO srchsig perfhlib fixwild;
srchsig.obj: srchsig.c dcc.h perfhlib.h
cl $(CFLAGS) $*.c
perfhlib.obj: perfhlib.c dcc.h perfhlib.h
cl $(CFLAGS) $*.c
fixwild.obj: fixwild.c dcc.h
cl $(CFLAGS) $*.c

View File

@ -0,0 +1,11 @@
set(SRC
makedsig
fixwild.cpp
LIB_PatternCollector.cpp
LIB_PatternCollector.h
TPL_PatternCollector.cpp
TPL_PatternCollector.h
)
add_executable(makedsig ${SRC})
target_link_libraries(makedsig dcc_hash)
qt5_use_modules(makedsig Core)

View File

@ -0,0 +1,7 @@
#include "LIB_PatternCollector.h"
LIB_PatternCollector::LIB_PatternCollector()
{
}

View File

@ -0,0 +1,11 @@
#ifndef LIB_PATTERNCOLLECTOR_H
#define LIB_PATTERNCOLLECTOR_H
class LIB_PatternCollector
{
public:
LIB_PatternCollector();
};
#endif // LIB_PATTERNCOLLECTOR_H

View File

@ -0,0 +1 @@

View File

@ -0,0 +1,5 @@
#ifndef TPL_PATTERNCOLLECTOR_H
#define TPL_PATTERNCOLLECTOR_H
#endif // TPL_PATTERNCOLLECTOR_H

525
tools/makedsig/fixwild.cpp Normal file
View File

@ -0,0 +1,525 @@
/*
*$Log: fixwild.c,v $
* Revision 1.10 93/10/28 11:10:10 emmerik
* Addressing mode [reg+nnnn] is now wildcarded
*
* Revision 1.9 93/10/26 13:40:11 cifuente
* op0F(byte pat[])
*
* Revision 1.8 93/10/26 13:01:29 emmerik
* Completed the odd opcodes, like 0F XX and F7. Result: some library
* functions that were not recognised before are recognised now.
*
* Revision 1.7 93/10/11 11:37:01 cifuente
* First walk of HIGH_LEVEL icodes.
*
* Revision 1.6 93/10/01 14:36:21 emmerik
* Added $ log, and made independant of dcc.h
*
*
*/
/* * * * * * * * * * * * *\
* *
* Fix Wild Cards Code *
* *
\* * * * * * * * * * * * */
#include <memory.h>
#include <stdint.h>
#ifndef PATLEN
#define PATLEN 23
#define WILD 0xF4
#endif
static int pc; /* Indexes into pat[] */
/* prototypes */
static bool ModRM(uint8_t pat[]); /* Handle the mod/rm byte */
static bool TwoWild(uint8_t pat[]); /* Make the next 2 bytes wild */
static bool FourWild(uint8_t pat[]); /* Make the next 4 bytes wild */
void fixWildCards(uint8_t pat[]); /* Main routine */
/* Handle the mod/rm case. Returns true if pattern exhausted */
static bool ModRM(uint8_t pat[])
{
uint8_t op;
/* A standard mod/rm byte follows opcode */
op = pat[pc++]; /* The mod/rm byte */
if (pc >= PATLEN) return true; /* Skip Mod/RM */
switch (op & 0xC0)
{
case 0x00: /* [reg] or [nnnn] */
if ((op & 0xC7) == 6)
{
/* Uses [nnnn] address mode */
pat[pc++] = WILD;
if (pc >= PATLEN) return true;
pat[pc++] = WILD;
if (pc >= PATLEN) return true;
}
break;
case 0x40: /* [reg + nn] */
if ((pc+=1) >= PATLEN) return true;
break;
case 0x80: /* [reg + nnnn] */
/* Possibly just a long constant offset from a register,
but often will be an index from a variable */
pat[pc++] = WILD;
if (pc >= PATLEN) return true;
pat[pc++] = WILD;
if (pc >= PATLEN) return true;
break;
case 0xC0: /* reg */
break;
}
return false;
}
/* Change the next two bytes to wild cards */
static bool TwoWild(uint8_t pat[])
{
pat[pc++] = WILD;
if (pc >= PATLEN) return true; /* Pattern exhausted */
pat[pc++] = WILD;
if (pc >= PATLEN) return true;
return false;
}
/* Change the next four bytes to wild cards */
static bool FourWild(uint8_t pat[])
{
TwoWild(pat);
return TwoWild(pat);
}
/* Chop from the current point by wiping with zeroes. Can't rely on anything
after this point */
static void chop(uint8_t pat[])
{
if (pc >= PATLEN) return; /* Could go negative otherwise */
memset(&pat[pc], 0, PATLEN - pc);
}
static bool op0F(uint8_t pat[])
{
/* The two byte opcodes */
uint8_t op = pat[pc++];
switch (op & 0xF0)
{
case 0x00: /* 00 - 0F */
if (op >= 0x06) /* Clts, Invd, Wbinvd */
return false;
else
{
/* Grp 6, Grp 7, LAR, LSL */
return ModRM(pat);
}
case 0x20: /* Various funnies, all with Mod/RM */
return ModRM(pat);
case 0x80:
pc += 2; /* Word displacement cond jumps */
return false;
case 0x90: /* Byte set on condition */
return ModRM(pat);
case 0xA0:
switch (op)
{
case 0xA0: /* Push FS */
case 0xA1: /* Pop FS */
case 0xA8: /* Push GS */
case 0xA9: /* Pop GS */
return false;
case 0xA3: /* Bt Ev,Gv */
case 0xAB: /* Bts Ev,Gv */
return ModRM(pat);
case 0xA4: /* Shld EvGbIb */
case 0xAC: /* Shrd EvGbIb */
if (ModRM(pat)) return true;
pc++; /* The #num bits to shift */
return false;
case 0xA5: /* Shld EvGb CL */
case 0xAD: /* Shrd EvGb CL */
return ModRM(pat);
default: /* CmpXchg, Imul */
return ModRM(pat);
}
case 0xB0:
if (op == 0xBA)
{
/* Grp 8: bt/bts/btr/btc Ev,#nn */
if (ModRM(pat)) return true;
pc++; /* The #num bits to shift */
return false;
}
return ModRM(pat);
case 0xC0:
if (op <= 0xC1)
{
/* Xadd */
return ModRM(pat);
}
/* Else BSWAP */
return false;
default:
return false; /* Treat as double byte opcodes */
}
}
/* Scan through the instructions in pat[], looking for opcodes that may
have operands that vary with different instances. For example, load and
store from statics, calls to other procs (even relative calls; they may
call procs loaded in a different order, etc).
Note that this procedure is architecture specific, and assumes the
processor is in 16 bit address mode (real mode).
PATLEN bytes are scanned.
*/
void fixWildCards(uint8_t pat[])
{
uint8_t op, quad, intArg;
pc=0;
while (pc < PATLEN)
{
op = pat[pc++];
if (pc >= PATLEN) return;
quad = op & 0xC0; /* Quadrant of the opcode map */
if (quad == 0)
{
/* Arithmetic group 00-3F */
if ((op & 0xE7) == 0x26) /* First check for the odds */
{
/* Segment prefix: treat as 1 byte opcode */
continue;
}
if (op == 0x0F) /* 386 2 byte opcodes */
{
if (op0F(pat)) return;
continue;
}
if (op & 0x04)
{
/* All these are constant. Work out the instr length */
if (op & 2)
{
/* Push, pop, other 1 byte opcodes */
continue;
}
else
{
if (op & 1)
{
/* Word immediate operands */
pc += 2;
continue;
}
else
{
/* Byte immediate operands */
pc++;
continue;
}
}
}
else
{
/* All these have mod/rm bytes */
if (ModRM(pat)) return;
continue;
}
}
else if (quad == 0x40)
{
if ((op & 0x60) == 0x40)
{
/* 0x40 - 0x5F -- these are inc, dec, push, pop of general
registers */
continue;
}
else
{
/* 0x60 - 0x70 */
if (op & 0x10)
{
/* 70-7F 2 byte jump opcodes */
pc++;
continue;
}
else
{
/* Odds and sods */
switch (op)
{
case 0x60: /* pusha */
case 0x61: /* popa */
case 0x64: /* overrides */
case 0x65:
case 0x66:
case 0x67:
case 0x6C: /* insb DX */
case 0x6E: /* outsb DX */
continue;
case 0x62: /* bound */
pc += 4;
continue;
case 0x63: /* arpl */
if (TwoWild(pat)) return;
continue;
case 0x68: /* Push byte */
case 0x6A: /* Push byte */
case 0x6D: /* insb port */
case 0x6F: /* outsb port */
/* 2 byte instr, no wilds */
pc++;
continue;
}
}
}
}
else if (quad == 0x80)
{
switch (op & 0xF0)
{
case 0x80: /* 80 - 8F */
/* All have a mod/rm byte */
if (ModRM(pat)) return;
/* These also have immediate values */
switch (op)
{
case 0x80:
case 0x83:
/* One byte immediate */
pc++;
continue;
case 0x81:
/* Immediate 16 bit values might be constant, but
also might be relocatable. Have to make them
wild */
if (TwoWild(pat)) return;
continue;
}
continue;
case 0x90: /* 90 - 9F */
if (op == 0x9A)
{
/* far call */
if (FourWild(pat)) return;
continue;
}
/* All others are 1 byte opcodes */
continue;
case 0xA0: /* A0 - AF */
if ((op & 0x0C) == 0)
{
/* mov al/ax to/from [nnnn] */
if (TwoWild(pat)) return;
continue;
}
else if ((op & 0xFE) == 0xA8)
{
/* test al,#byte or test ax,#word */
if (op & 1) pc += 2;
else pc += 1;
continue;
}
case 0xB0: /* B0 - BF */
{
if (op & 8)
{
/* mov reg, #16 */
/* Immediate 16 bit values might be constant, but also
might be relocatable. For now, make them wild */
if (TwoWild(pat)) return;
}
else
{
/* mov reg, #8 */
pc++;
}
continue;
}
}
}
else
{
/* In the last quadrant of the op code table */
switch (op)
{
case 0xC0: /* 386: Rotate group 2 ModRM, byte, #byte */
case 0xC1: /* 386: Rotate group 2 ModRM, word, #byte */
if (ModRM(pat)) return;
/* Byte immediate value follows ModRM */
pc++;
continue;
case 0xC3: /* Return */
case 0xCB: /* Return far */
chop(pat);
return;
case 0xC2: /* Ret nnnn */
case 0xCA: /* Retf nnnn */
pc += 2;
chop(pat);
return;
case 0xC4: /* les Gv, Mp */
case 0xC5: /* lds Gv, Mp */
if (ModRM(pat)) return;
continue;
case 0xC6: /* Mov ModRM, #nn */
if (ModRM(pat)) return;
/* Byte immediate value follows ModRM */
pc++;
continue;
case 0xC7: /* Mov ModRM, #nnnn */
if (ModRM(pat)) return;
/* Word immediate value follows ModRM */
/* Immediate 16 bit values might be constant, but also
might be relocatable. For now, make them wild */
if (TwoWild(pat)) return;
continue;
case 0xC8: /* Enter Iw, Ib */
pc += 3; /* Constant word, byte */
continue;
case 0xC9: /* Leave */
continue;
case 0xCC: /* Int 3 */
continue;
case 0xCD: /* Int nn */
intArg = pat[pc++];
if ((intArg >= 0x34) && (intArg <= 0x3B))
{
/* Borland/Microsoft FP emulations */
if (ModRM(pat)) return;
}
continue;
case 0xCE: /* Into */
continue;
case 0xCF: /* Iret */
continue;
case 0xD0: /* Group 2 rotate, byte, 1 bit */
case 0xD1: /* Group 2 rotate, word, 1 bit */
case 0xD2: /* Group 2 rotate, byte, CL bits */
case 0xD3: /* Group 2 rotate, word, CL bits */
if (ModRM(pat)) return;
continue;
case 0xD4: /* Aam */
case 0xD5: /* Aad */
case 0xD7: /* Xlat */
continue;
case 0xD8:
case 0xD9:
case 0xDA:
case 0xDB: /* Esc opcodes */
case 0xDC: /* i.e. floating point */
case 0xDD: /* coprocessor calls */
case 0xDE:
case 0xDF:
if (ModRM(pat)) return;
continue;
case 0xE0: /* Loopne */
case 0xE1: /* Loope */
case 0xE2: /* Loop */
case 0xE3: /* Jcxz */
pc++; /* Short jump offset */
continue;
case 0xE4: /* in al,nn */
case 0xE6: /* out nn,al */
pc++;
continue;
case 0xE5: /* in ax,nn */
case 0xE7: /* in nn,ax */
pc += 2;
continue;
case 0xE8: /* Call rel */
if (TwoWild(pat)) return;
continue;
case 0xE9: /* Jump rel, unconditional */
if (TwoWild(pat)) return;
chop(pat);
return;
case 0xEA: /* Jump abs */
if (FourWild(pat)) return;
chop(pat);
return;
case 0xEB: /* Jmp short unconditional */
pc++;
chop(pat);
return;
case 0xEC: /* In al,dx */
case 0xED: /* In ax,dx */
case 0xEE: /* Out dx,al */
case 0xEF: /* Out dx,ax */
continue;
case 0xF0: /* Lock */
case 0xF2: /* Repne */
case 0xF3: /* Rep/repe */
case 0xF4: /* Halt */
case 0xF5: /* Cmc */
case 0xF8: /* Clc */
case 0xF9: /* Stc */
case 0xFA: /* Cli */
case 0xFB: /* Sti */
case 0xFC: /* Cld */
case 0xFD: /* Std */
continue;
case 0xF6: /* Group 3 byte test/not/mul/div */
case 0xF7: /* Group 3 word test/not/mul/div */
case 0xFE: /* Inc/Dec group 4 */
if (ModRM(pat)) return;
continue;
case 0xFF: /* Group 5 Inc/Dec/Call/Jmp/Push */
/* Most are like standard ModRM */
if (ModRM(pat)) return;
continue;
default: /* Rest are single byte opcodes */
continue;
}
}
}
}

175
tools/makedsig/makedsig.cpp Normal file
View File

@ -0,0 +1,175 @@
/* Program for making the DCC signature file */
#include "LIB_PatternCollector.h"
#include "TPL_PatternCollector.h"
#include "perfhlib.h" /* Symbol table prototypes */
#include <QtCore/QCoreApplication>
#include <QtCore/QStringList>
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <memory.h>
#include <string.h>
#include <algorithm>
/* Symbol table constnts */
#define C 2.2 /* Sparseness of graph. See Czech, Havas and Majewski for details */
/* prototypes */
void saveFile(FILE *fl, const PerfectHash &p_hash, PatternCollector *coll); /* Save the info */
int numKeys; /* Number of useful codeview symbols */
static void printUsage(bool longusage) {
if(longusage)
printf(
"This program is to make 'signatures' of known c and tpl library calls for the dcc program.\n"
"It needs as the first arg the name of a library file, and as the second arg, the name "
"of the signature file to be generated.\n"
"Example: makedsig CL.LIB dccb3l.sig\n"
" or makedsig turbo.tpl dcct4p.sig\n"
);
else
printf("Usage: makedsig <libname> <signame>\n"
"or makedsig -h for help\n");
}
int main(int argc, char *argv[])
{
QCoreApplication app(argc,argv);
FILE *f2; // output file
FILE *srcfile; // .lib file
int s;
if(app.arguments().size()<2) {
printUsage(false);
return 0;
}
QString arg2 = app.arguments()[1];
if (arg2.startsWith("-h") || arg2.startsWith("-?"))
{
printUsage(true);
return 0;
}
PatternCollector *collector;
if(arg2.endsWith("tpl")) {
collector = new TPL_PatternCollector;
} else if(arg2.endsWith(".lib")) {
collector = new LIB_PatternCollector;
}
if ((srcfile = fopen(argv[1], "rb")) == NULL)
{
printf("Cannot read %s\n", argv[1]);
exit(2);
}
if ((f2 = fopen(argv[2], "wb")) == NULL)
{
printf("Cannot write %s\n", argv[2]);
exit(2);
}
fprintf(stderr, "Seed: ");
scanf("%d", &s);
srand(s);
PerfectHash p_hash;
numKeys = collector->readSyms(srcfile); /* Read the keys (symbols) */
printf("Num keys: %d; vertices: %d\n", numKeys, (int)(numKeys*C));
/* Set the parameters for the hash table */
p_hash.setHashParams( numKeys, /* The number of symbols */
PATLEN, /* The length of the pattern to be hashed */
256, /* The character set of the pattern (0-FF) */
0, /* Minimum pattern character value */
numKeys*C); /* C is the sparseness of the graph. See Czech,
Havas and Majewski for details */
/* The following two functions are in perfhlib.c */
p_hash.map(collector); /* Perform the mapping. This will call getKey() repeatedly */
p_hash.assign(); /* Generate the function g */
saveFile(f2,p_hash,collector); /* Save the resultant information */
fclose(srcfile);
fclose(f2);
}
/* * * * * * * * * * * * *\
* *
* S a v e t h e s i g f i l e *
* *
\* * * * * * * * * * * * */
void writeFile(FILE *fl,const char *buffer, int len)
{
if ((int)fwrite(buffer, 1, len, fl) != len)
{
printf("Could not write to file\n");
exit(1);
}
}
void writeFileShort(FILE *fl,uint16_t w)
{
uint8_t b;
b = (uint8_t)(w & 0xFF);
writeFile(fl,(char *)&b, 1); /* Write a short little endian */
b = (uint8_t)(w>>8);
writeFile(fl,(char *)&b, 1);
}
void saveFile(FILE *fl, const PerfectHash &p_hash, PatternCollector *coll)
{
int i, len;
const uint16_t *pTable;
writeFile(fl,"dccs", 4); /* Signature */
writeFileShort(fl,numKeys); /* Number of keys */
writeFileShort(fl,(short)(numKeys * C)); /* Number of vertices */
writeFileShort(fl,PATLEN); /* Length of key part of entries */
writeFileShort(fl,SYMLEN); /* Length of symbol part of entries */
/* Write out the tables T1 and T2, with their sig and byte lengths in front */
writeFile(fl,"T1", 2); /* "Signature" */
pTable = p_hash.readT1();
len = PATLEN * 256;
writeFileShort(fl,len * sizeof(uint16_t));
for (i=0; i < len; i++)
{
writeFileShort(fl,pTable[i]);
}
writeFile(fl,"T2", 2);
pTable = p_hash.readT2();
writeFileShort(fl,len * sizeof(uint16_t));
for (i=0; i < len; i++)
{
writeFileShort(fl,pTable[i]);
}
/* Write out g[] */
writeFile(fl,"gg", 2); /* "Signature" */
pTable = p_hash.readG();
len = (short)(numKeys * C);
writeFileShort(fl,len * sizeof(uint16_t));
for (i=0; i < len; i++)
{
writeFileShort(fl,pTable[i]);
}
/* Now the hash table itself */
writeFile(fl,"ht ", 2); /* "Signature" */
writeFileShort(fl,numKeys * (SYMLEN + PATLEN + sizeof(uint16_t))); /* byte len */
for (i=0; i < numKeys; i++)
{
writeFile(fl,(char *)&coll->keys[i], SYMLEN + PATLEN);
}
}

188
tools/makedsig/makedsig.txt Normal file
View File

@ -0,0 +1,188 @@
MAKEDSIG
1 What is MakeDsig?
2 How does it work?
3 How do I use MakeDsig?
4 What's in a signature file?
5 What other tools are useful for signature work?
1 What is MakeDsig?
-------------------
MakeDsig is a program that reads a library (.lib) file from a
compiler, and generates a signature file for use by DCC. Without
signature files, dcc cannot recognise library functions, and so will
attempt to decompile them, and cannot name them. This makes the
resultant decompiled code bulkier and difficult to understand.
2 How does it work?
-------------------
Library files contain complete functions, relocation information,
function names, and more. MakeDsig reads a library file, and for each
function found, it saves the name, and creates a signature. These
are stored in an array. When all functions are done, tables for the
perfect hashing function are generated. During this process,
duplicate keys (functions that produce identical signatures) may be
detected; if so, one of the keys will be zeroed.
The signature file contains information needed by dcc to hash the
signatures, as well as the symbols and signatures. Dcc reads the various
sections of the signature file to be able to hash signatures. The
signatures, not the symbols, are hashed, since dcc gets a signature
from the executable file, and needs to know quickly if there is a
symbolic name for it.
3 How do I use MakeDsig?
------------------------
You can always find out by just executing it with no arguments, or
MakeDsig -h for more details.
Basically, you just give it the names of the files that it needs:
MakeDsig <libname> <signame>
It will ask you for a seed; enter any number, e.g. 1.
You need the library file for the appropriate compiler. For example,
to analyse executable programs created from Turbo C 2.1 small model,
you need the cs.lib file that comes with that compiler.
You also need to know the correct name for the signature file, i.e.
<signame>. Dcc will detect certain compiler vendors and version
numbers, and will look for a signature file named like this:
d c c <vendor> <version> <model> . s i g
Here are the current vendors:
Vendor Vendor letter
Microsoft C/C++ m
Borland C/C++ b
Logitech (Modula) l
Turbo Pascal t
Here are the model codes:
small/tiny s
medium m
compact c
large l
Turbo Pascal p
The version codes are fairly self explanatory:
Microsoft C 5.1 5
Microsoft C 8 8
Borland C 2.0 2
Borland C 3.0 3
Turbo Pascal 3.0 3 Note: currently no way to make dcct3p.sig
Turbo Pascal 4.0 4 Use Makedstp, not makedsig
Turbo Pascal 5.0 5 Use Makedstp, not makedsig
Some examples: the signature file for Borland C version 2.0, small
model, would be dccb2s.sig. To generate it, you would supply as the
library file cs.lib that came with that compiler. Suppose it was in
the \bc\lib directory. To generate the signature file required to
work with files produced by this compiler, you would type
makedsig \bc\lib\cs.lib dccb2s.sig
This will create dccb2s.sig in the current directory. For dcc to use
this file, place it in the same directory as dcc itself, or point the
environment variable DCC to the directory containing it.
Another example: to make the signature file for Microsoft Visual
C/C++ (C 8.0), large model, and assuming the libraries are in
the directory \msvc\lib, you would type
makedsig \msvc\lib\llibce.lib dccm8l.sig
Note that the signature files for Turbo Pascal from version 4 onwards
are generated by makedstp, not makedsig. The latter program reads a
special file called turbo.tpl, as there are no normal .lib files for
turbo pascal. Dcc will recognise turbo pascal 3.0 files, and look
for dcct3p.sig. Because all the library routines are contained in
every Turbo Pascal executable, there are no library files or even a
turbo.tpl file, so the signature file would have to be constructed by
guesswork. You can still use dcc on these files; just ignore the
warning about not finding the signature file.
For executables that dcc does not recognise, it will look for the
signature file dccxxx.sig. That way, if you have a new compiler, you
can at least have dcc detect library calls, even if it attempts to
decompile them all, and has not identified the main program.
Logitech Modula V1.0 files are recognised, and the signature file
dccl1x.sig is looked for. This was experimental in nature, and is not
recommended for serious analysis at this stage.
4 What's in a signature file?
-----------------------------
The details of a signature file are best documented in the source for
makedsig; see the function saveFile(). Briefly:
1) a 4 byte pattern identifying the file as a signature file: "dccs".
2) a two byte integer containing the number of keys (signatures)
3) a two byte integer containing the number of vertices on the graph
used to generate the hash table. See the source code and/or the
Czech, Havas and Majewski articles for details
4) a two byte integer containing the pattern length
5) a two byte integer containing the symbolic name length
The next sections all have the following structure:
1) 2 char ID
2) a two byte integer containing the size of the body
3) the body.
There are 4 sections: "T1", "T2", "gg", and "ht". T1 and T2 are the
tables associated with the hash function. (The hash function is a
random function, meaning that it involves tables. T1 and T2 are the
tables used by the hash function). "gg" is another table associated
with the graph needed by the perfect hashing function algorithm.
"ht" contains the actual hash table. The body of this section is an
array of records of this structure:
typedef struct _hashEntry
{
char name[SYMLEN]; /* The symbol name */
byte pat [PATLEN]; /* The pattern */
word offset; /* Offset (needed temporarily) */
} HASHENTRY;
This part of the signature file can be browsed with a binary dump
program; a PATLEN length signature will follow the (null padded)
symbol name. There are tools for searching signature files, e.g.
srchsig, dispsig, and readsig. See below.
5 What other tools are useful for signature work?
-------------------------------------------------
Makedstp - makes signature files from turbo.tpl. Needed to make
signature files for Turbo Pascal version 4.0 and later.
SrchSig - tells you whether a given pattern exists in a signature
file, and gives its name. You need a binary file with the signature
in it, exactly the right length. This can most easily be done with
debug (comes with MS-DOS).
DispSig - given the name of a function, displays its signature, and
stores the signature into a binary file as well. (You can use this
file with srchsig on another signature file, if you want).
ReadSig - reads a signature file, checking for correct structure, and
displaying duplicate signatures. With the -a switch, it will display
all signatures, with their symbols.
The file perfhlib.c is used by various of these tools to do the work
of the perfect hashing functions. It could be used as part of other
tools that use signature files, or just perfect hashing functions for
that matter.

View File

117
tools/parsehdr/locident.h Normal file
View File

@ -0,0 +1,117 @@
/*$Log: locident.h,v $
* Revision 1.6 94/02/22 15:20:23 cifuente
* Code generation is done.
*
* Revision 1.5 93/12/10 09:38:20 cifuente
* New high-level types
*
* Revision 1.4 93/11/10 17:30:51 cifuente
* Procedure header, locals
*
* Revision 1.3 93/11/08 12:06:35 cifuente
* du1 analysis finished. Instantiates procedure arguments for user
* declared procedures.
*
* Revision 1.2 93/10/25 11:01:00 cifuente
* New SYNTHETIC instructions for d/u analysis
*
* Revision 1.1 93/10/11 11:47:39 cifuente
* Initial revision
*
* File: locIdent.h
* Purpose: High-level local identifier definitions
* Date: October 1993
*/
/* Type definition */
typedef struct {
Int csym; /* # symbols used */
Int alloc; /* # symbols allocated */
Int *idx; /* Array of integer indexes */
} IDX_ARRAY;
/* Type definitions used in the decompiled program */
typedef enum {
TYPE_UNKNOWN = 0, /* unknown so far */
TYPE_BYTE_SIGN, /* signed byte (8 bits) */
TYPE_BYTE_UNSIGN, /* unsigned byte */
TYPE_WORD_SIGN, /* signed word (16 bits) */
TYPE_WORD_UNSIGN, /* unsigned word (16 bits) */
TYPE_LONG_SIGN, /* signed long (32 bits) */
TYPE_LONG_UNSIGN, /* unsigned long (32 bits) */
TYPE_RECORD, /* record structure */
TYPE_PTR, /* pointer (32 bit ptr) */
TYPE_STR, /* string */
TYPE_CONST, /* constant (any type) */
TYPE_FLOAT, /* floating point */
TYPE_DOUBLE, /* double precision float */
} hlType;
static char *hlTypes[13] = {"", "char", "unsigned char", "int", "unsigned int",
"long", "unsigned long", "record", "int *", "char *",
"", "float", "double"};
typedef enum {
STK_FRAME, /* For stack vars */
REG_FRAME, /* For register variables */
GLB_FRAME, /* For globals */
} frameType;
/* Enumeration to determine whether pIcode points to the high or low part
* of a long number */
typedef enum {
HIGH_FIRST, /* High value is first */
LOW_FIRST, /* Low value is first */
} hlFirst;
/* LOCAL_ID */
typedef struct {
hlType type; /* Probable type */
boolT illegal;/* Boolean: not a valid field any more */
IDX_ARRAY idx; /* Index into icode array (REG_FRAME only) */
frameType loc; /* Frame location */
boolT hasMacro;/* Identifier requires a macro */
char macro[10];/* Macro for this identifier */
char name[20];/* Identifier's name */
union { /* Different types of identifiers */
byte regi; /* For TYPE_BYTE(WORD)_(UN)SIGN registers */
struct { /* For TYPE_BYTE(WORD)_(UN)SIGN on the stack */
byte regOff; /* register offset (if any) */
Int off; /* offset from BP */
} bwId;
struct _bwGlb { /* For TYPE_BYTE(WORD)_(UN)SIGN globals */
int16 seg; /* segment value */
int16 off; /* offset */
byte regi; /* optional indexed register */
} bwGlb;
struct _longId{ /* For TYPE_LONG_(UN)SIGN registers */
byte h; /* high register */
byte l; /* low register */
} longId;
struct _longStkId { /* For TYPE_LONG_(UN)SIGN on the stack */
Int offH; /* high offset from BP */
Int offL; /* low offset from BP */
} longStkId;
struct { /* For TYPE_LONG_(UN)SIGN globals */
int16 seg; /* segment value */
int16 offH; /* offset high */
int16 offL; /* offset low */
byte regi; /* optional indexed register */
} longGlb;
struct { /* For TYPE_LONG_(UN)SIGN constants */
dword h; /* high word */
dword l; /* low word */
} longKte;
} id;
} ID;
typedef struct {
Int csym; /* No. of symbols in the table */
Int alloc; /* No. of symbols allocated */
ID *id; /* Identifier */
} LOCAL_ID;

1538
tools/parsehdr/parsehdr.cpp Normal file

File diff suppressed because it is too large Load Diff

98
tools/parsehdr/parsehdr.h Normal file
View File

@ -0,0 +1,98 @@
/*
*$Log: parsehdr.h,v $
*/
/* Header file for parsehdr.c */
typedef unsigned long dword; /* 32 bits */
typedef unsigned char byte; /* 8 bits */
typedef unsigned short word; /* 16 bits */
typedef unsigned char boolT; /* 8 bits */
#define TRUE 1
#define FALSE 0
#define BUFF_SIZE 8192 /* Holds a declaration */
#define FBUF_SIZE 32700 /* Holds part of a header file */
#define NARGS 15
#define NAMES_L 160
#define TYPES_L 160
#define FUNC_L 160
#define ERRF stdout
void phError(char *errmsg);
void phWarning(char *errmsg);
#define ERR(msg) phError(msg)
#ifdef DEBUG
#define DBG(str) printf(str);
#else
#define DBG(str) ;
#endif
#define WARN(msg) phWarning(msg)
#define OUT(str) fprintf(outfile, str)
#define PH_PARAMS 32
#define PH_NAMESZ 15
#define SYMLEN 16 /* Including the null */
#define Int long /* For locident.h */
#define int16 short int /* For locident.h */
#include "locident.h" /* For the hlType enum */
#define bool unsigned char /* For internal use */
#define TRUE 1
#define FALSE 0
typedef
struct ph_func_tag
{
char name[SYMLEN]; /* Name of function or arg */
hlType typ; /* Return type */
int numArg; /* Number of args */
int firstArg; /* Index of first arg in chain */
int next; /* Index of next function in chain */
bool bVararg; /* True if variable num args */
} PH_FUNC_STRUCT;
typedef
struct ph_arg_tag
{
char name[SYMLEN]; /* Name of function or arg */
hlType typ; /* Parameter type */
} PH_ARG_STRUCT;
#define DELTA_FUNC 32 /* Number to alloc at once */
#define PH_JUNK 0 /* LPSTR buffer, nothing happened */
#define PH_PROTO 1 /* LPPH_FUNC ret val, func name, args */
#define PH_FUNCTION 2 /* LPPH_FUNC ret val, func name, args */
#define PH_TYPEDEF 3 /* LPPH_DEF definer and definee */
#define PH_DEFINE 4 /* LPPH_DEF definer and definee */
#define PH_ERROR 5 /* LPSTR error string */
#define PH_WARNING 6 /* LPSTR warning string */
#define PH_MPROTO 7 /* ????? multi proto???? */
#define PH_VAR 8 /* ????? var decl */
/* PROTOS */
boolT phData(char *buff, int ndata);
boolT phPost(void);
boolT phFree(void);
void checkHeap(char *msg); /* For debugging only */
void phBuffToFunc(char *buff);
void phBuffToDef(char *buff);
#define TOK_TYPE 256 /* A type name (e.g. "int") */
#define TOK_NAME 257 /* A function or parameter name */
#define TOK_DOTS 258 /* "..." */
#define TOK_EOL 259 /* End of line */
typedef enum
{
BT_INT, BT_CHAR, BT_FLOAT, BT_DOUBLE, BT_STRUCT, BT_VOID, BT_UNKWN
} baseType;

217
tools/parsehdr/parsehdr.txt Normal file
View File

@ -0,0 +1,217 @@
PARSEHDR
1 What is ParseHdr?
2 What is dcclibs.dat?
3 How do I use ParseHdr?
4 What about languages other than C?
5 What is the structure of the dcclibs.dat file?
6 What are all these errors, and why do they happen?
1 What is ParseHdr?
-------------------
ParseHdr is a program that creates a special prototype file for DCC
from a set of include files (.h files). This allows DCC to be aware
of the type of library function arguments, and return types. The file
produced is called dcclibs.dat. ParseHdr is designed specifically for
C header files.
As an example, this is what allows DCC to recognise that printf has
(at least) a string argument, and so converts the first argument from
a numeric constant to a string. So you get
printf("Hello world")
instead of
printf(0x42).
2 What is dcclibs.dat?
----------------------
dcclibs.dat is the file created by the ParseHdr program. It contains
a list of function names and parameter and return types. See section
5 for details of the contents of the file.
3 How do I use ParseHdr?
------------------------
To use ParseHdr you need a file containing a list of header files,
like this:
\tc\include\alloc.h
\tc\include\assert.h
\tc\include\bios.h
...
\tc\include\time.h
There must be one file per line, no blank lines, and unless the
header files are in the current directory, a full path must be given.
The easiest way to create such a file is to redirect the output of a
dir command to a file, like this:
c>dir \tc\include\*.h > tcfiles.lst
and then edit the resultant file. Note that the path will not be
included in this, so you will have to add that manually. Remove
everything after the .h, such as file size, date, etc.
Once you have this file, you can run parsehdr:
parsehdr <listfile>
For example,
parsehdr tcfiles.lst
You will get some messages indicating which files are being
processed, but also some error messages. Just ignore the error
messages, see section 6 for why they occur.
4 What about languages other than C?
-----------------------------------------
ParseHdr will only work on C header files. It would be possible to
process files for other languages that contained type information, to
produce a dcclibs.dat file specific to that language. Ideally, DCC
should look for a different file for each language, but since only a
C version of dcclibs.dat has so far been created, this has not been
done.
Prototype information for Turbo Pascal exists in the file turbo.tpl,
at least for things like the graphics library, so it would be
possible for MakeDsTp to produce a dcclibs.dat file as well as the
signature file. However, the format of the turbo.tpl file is not
documented by Borland; for details see
W. L. Peavy, "Inside Turbo Pascal 6.0 Units", Public domain software
file tpu6doc.txt in tpu6.zip. Anonymous ftp from garbo.uwasa.fi and
mirrors, directory /pc/turbopas, 1991.
5 What is the structure of the dcclibs.dat file?
------------------------------------------------
The first 4 bytes are "dccp", identifying it as a DCC prototype file.
After this, there are two sections.
The first section begins with "FN", for Function Names. It is
followed by a two byte integer giving the number of function names
stored. The remainder of this section is an array of structures, one
per function name. Each has this structure:
char Name[SYMLEN]; /* Name of the function, NULL terminated */
int type; /* A 2 byte integer describing the return type */
int numArg; /* The number of arguments */
int firstArg; /* The index of the first arg, see below */
char bVarArg; /* 1 if variable arguments, 0 otherwise */
SYMLEN is 16, alowing 15 chars before the NULL. Therefore, the length
of this structure is 23 bytes.
The types are as defined in locident.h (actually a part of dcc), and
at present are as follows:
typedef enum {
TYPE_UNKNOWN = 0, /* unknown so far 00 */
TYPE_BYTE_SIGN, /* signed byte (8 bits) 01 */
TYPE_BYTE_UNSIGN, /* unsigned byte 02 */
TYPE_WORD_SIGN, /* signed word (16 bits) 03 */
TYPE_WORD_UNSIGN, /* unsigned word (16 bits) 04 */
TYPE_LONG_SIGN, /* signed long (32 bits) 05 */
TYPE_LONG_UNSIGN, /* unsigned long (32 bits) 06 */
TYPE_RECORD, /* record structure 07 */
TYPE_PTR, /* pointer (32 bit ptr) 08 */
TYPE_STR, /* string 09 */
TYPE_CONST, /* constant (any type) 0A */
TYPE_FLOAT, /* floating point 0B */
TYPE_DOUBLE, /* double precision float 0C */
} hlType;
firstArg is an index into the array in the second section.
The second section begins with "PM" (for Parameters). It is followed
by a 2 byte integer giving the number of parameter records. After
this is the array of parameter structures. Initially, the names of the
parameters were being stored, but this has been removed at present.
The parameter structure is therefore now just a single 2 byte
integer, representing the type of that argument.
The way it all fits together is perhaps best described by an example.
Lets consider this entry in dcclibs.dat:
73 74 72 63 6D 70 00 ; "strcmp"
00 00 00 00 00 00 00 00 00 ; Padding to 16 bytes
03 00 ; Return type 3, TYPE_WORD_UNSIGN
02 00 ; 2 arguments
15 02 ; First arg is 0215
00 ; Not var args
If we now skip to the "PM" part of the file, skip the number of
arguments word, then skip 215*2 = 42A bytes, we find this:
09 00 09 00 09 00 ...
The first 09 00 (TYPE_STR) refers to the type of the first parameter,
and the second to the second parameter. There are only 2 arguments,
so the third 09 00 refers to the first parameter of the next
function. So both parameters are strings, as is expected.
For functions with variable parameters, bVarArg is set to 01, and the
number of parameters reported is the number of fixed parameters. Here
is another example:
66 70 72 69 6E 74 66 00 ; "fprintf"
00 00 00 00 00 00 00 00 ; padding
03 00 ; return type 3, TYPE_WORD_UNSIGN
02 00 ; 2 fixed args
81 01 ; First arg at index 0181
01 ; Var args
and in the "PM" section at offset 181*2 = 0302, we find 08 00 09 00
03 00 meaning that the first parameter is a pointer (in fact, we know
it's a FILE *), and the second parameter is a string.
6 What are all these errors, and why do they happen?
----------------------------------------------------
When you run ParseHdr, as well as the progress statements like
Processing \tc\include\alloc.h ...
you can get error messages. Basically, ignore these errors. They occur
for a variety of reasons, most of which are detailed below.
1)
Expected type: got ) (29)
void __emit__()
^
This include file contained a non ansi prototype. This is rare, and
__emit__ is not a standard function anyway. If it really bothers you,
you could add the word "void" to the empty parentheses in your
include file.
2)
Expected ',' between parameter defs: got ( (28)
void _Cdecl ctrlbrk (int _Cdecl (*handler)(void))
Here "handler" is a pointer to a function. Being a basically simple
program, ParseHdr does not expand all typedef and #define statements,
so it cannot distinguish between types and user defined function
names. Therefore, it is not possible in general to parse any
prototypes containing pointers to functions, so at this stage, any
such prototypes will produce an error of some sort. DCC cannot
currently make use of this type information anyway, so this is no
real loss. There are typically half a dozen such errors.
3)
Unknown type time_t
Types (such as time_t) that are structures or pointers to structures
are not handled by ParseHdr, since typedef and #define statements are
ignored. Again, there are typically only about a dozen of these.

View File

@ -0,0 +1,8 @@
CFLAGS = -Zi -c -AS -W3 -D__MSDOS__
parselib.exe: parselib.obj
link /CO parselib;
parselib.obj: parselib.c
cl $(CFLAGS) $*.c

View File

@ -0,0 +1,24 @@
\tc\include\alloc.h
\tc\include\assert.h
\tc\include\bios.h
\tc\include\conio.h
\tc\include\ctype.h
\tc\include\dir.h
\tc\include\dos.h
\tc\include\errno.h
\tc\include\fcntl.h
\tc\include\float.h
\tc\include\io.h
\tc\include\limits.h
\tc\include\math.h
\tc\include\mem.h
\tc\include\process.h
\tc\include\setjmp.h
\tc\include\share.h
\tc\include\signal.h
\tc\include\stdarg.h
\tc\include\stddef.h
\tc\include\stdio.h
\tc\include\stdlib.h
\tc\include\string.h
\tc\include\time.h

View File

239
tools/readsig/readsig.cpp Normal file
View File

@ -0,0 +1,239 @@
/* Quick program to read the output from makedsig */
#include <stdio.h>
#include <io.h>
#include <stdlib.h>
#include <memory.h>
#include <string.h>
#include "perfhlib.h"
/* statics */
byte buf[100];
int numKeys; /* Number of hash table entries (keys) */
int numVert; /* Number of vertices in the graph (also size of g[]) */
int PatLen; /* Size of the keys (pattern length) */
int SymLen; /* Max size of the symbols, including null */
FILE *f; /* File being read */
static word *T1base, *T2base; /* Pointers to start of T1, T2 */
static word *g; /* g[] */
/* prototypes */
void grab(int n);
word readFileShort(void);
void cleanup(void);
static bool bDispAll = FALSE;
void
main(int argc, char *argv[])
{
word w, len;
int h, i, j;
long filePos;
if (argc <= 1)
{
printf("Usage: readsig [-a] <SigFilename>\n");
printf("-a for all symbols (else just duplicates)\n");
exit(1);
}
i = 1;
if (strcmp(argv[i], "-a") == 0)
{
i++;
bDispAll = TRUE;
}
if ((f = fopen(argv[i], "rb")) == NULL)
{
printf("Cannot open %s\n", argv[i]);
exit(2);
}
/* Read the parameters */
grab(4);
if (memcmp("dccs", buf, 4) != 0)
{
printf("Not a dccs file!\n");
exit(3);
}
numKeys = readFileShort();
numVert = readFileShort();
PatLen = readFileShort();
SymLen = readFileShort();
/* Initialise the perfhlib stuff. Also allocates T1, T2, g, etc */
hashParams( /* Set the parameters for the hash table */
numKeys, /* The number of symbols */
PatLen, /* The length of the pattern to be hashed */
256, /* The character set of the pattern (0-FF) */
0, /* Minimum pattern character value */
numVert); /* Specifies C, the sparseness of the graph.
See Czech, Havas and Majewski for details
*/
T1base = readT1();
T2base = readT2();
g = readG();
/* Read T1 and T2 tables */
grab(2);
if (memcmp("T1", buf, 2) != 0)
{
printf("Expected 'T1'\n");
exit(3);
}
len = PatLen * 256 * sizeof(word);
w = readFileShort();
if (w != len)
{
printf("Problem with size of T1: file %d, calc %d\n", w, len);
exit(4);
}
if (fread(T1base, 1, len, f) != len)
{
printf("Could not read T1\n");
exit(5);
}
grab(2);
if (memcmp("T2", buf, 2) != 0)
{
printf("Expected 'T2'\n");
exit(3);
}
w = readFileShort();
if (w != len)
{
printf("Problem with size of T2: file %d, calc %d\n", w, len);
exit(4);
}
if (fread(T2base, 1, len, f) != len)
{
printf("Could not read T2\n");
exit(5);
}
/* Now read the function g[] */
grab(2);
if (memcmp("gg", buf, 2) != 0)
{
printf("Expected 'gg'\n");
exit(3);
}
len = numVert * sizeof(word);
w = readFileShort();
if (w != len)
{
printf("Problem with size of g[]: file %d, calc %d\n", w, len);
exit(4);
}
if (fread(g, 1, len, f) != len)
{
printf("Could not read T2\n");
exit(5);
}
/* This is now the hash table */
grab(2);
if (memcmp("ht", buf, 2) != 0)
{
printf("Expected 'ht'\n");
exit(3);
}
w = readFileShort();
if (w != numKeys * (SymLen + PatLen + sizeof(word)))
{
printf("Problem with size of hash table: file %d, calc %d\n", w, len);
exit(6);
}
if (bDispAll)
{
fseek(f, 0, SEEK_CUR); /* Needed due to bug in MS fread()! */
filePos = _lseek(fileno(f), 0, SEEK_CUR);
for (i=0; i < numKeys; i++)
{
grab(SymLen + PatLen);
printf("%16s ", buf);
for (j=0; j < PatLen; j++)
{
printf("%02X", buf[SymLen+j]);
if ((j%4) == 3) printf(" ");
}
printf("\n");
}
printf("\n\n\n");
fseek(f, filePos, SEEK_SET);
}
for (i=0; i < numKeys; i++)
{
grab(SymLen + PatLen);
h = hash(&buf[SymLen]);
if (h != i)
{
printf("Symbol %16s (index %3d) hashed to %d\n",
buf, i, h);
}
}
printf("Done!\n");
fclose(f);
}
void
cleanup(void)
{
/* Free the storage for variable sized tables etc */
if (T1base) free(T1base);
if (T2base) free(T2base);
if (g) free(g);
}
void grab(int n)
{
if (fread(buf, 1, n, f) != (size_t)n)
{
printf("Could not read\n");
exit(11);
}
}
word
readFileShort(void)
{
byte b1, b2;
if (fread(&b1, 1, 1, f) != 1)
{
printf("Could not read\n");
exit(11);
}
if (fread(&b2, 1, 1, f) != 1)
{
printf("Could not read\n");
exit(11);
}
return (b2 << 8) + b1;
}
/* Following two functions not needed unless creating tables */
void getKey(int i, byte **keys)
{
}
/* Display key i */
void
dispKey(int i)
{
}

11
tools/readsig/readsig.mak Normal file
View File

@ -0,0 +1,11 @@
CFLAGS = -Zi -c -AL -W3 -D__MSDOS__
readsig.exe: readsig.obj perfhlib.obj
link /CO readsig perfhlib;
readsig.obj: readsig.c dcc.h perfhlib.h
cl $(CFLAGS) $*.c
perfhlib.obj: perfhlib.c dcc.h perfhlib.h
cl $(CFLAGS) $*.c

97
tools/readsig/readsig.txt Normal file
View File

@ -0,0 +1,97 @@
READSIG
1 What is ReadSig?
2 How do I use ReadSig?
3 What are duplicate signatures?
4 How can I make sense of the signatures?
1 What is ReadSig?
------------------
ReadSig is a quick and dirty program to read signatures from a DCC
signature file. It was originally written as an integrity checker for
signature files, but can now be used to see what's in a signature
file, and which functions have duplicate signatures.
2 How do I use ReadSig?
-----------------------
Just type
readsig <sigfilename>
or
readsig -a <sigfilename>
For example:
readsig -a dcct2p.sig
Either way, you get a list of duplicate signatures, i.e. functions
whose first 23 bytes, after wildcarding and chopping, (see section 3
for details), that have the same signature.
With the -a switch, you also (before the above) get a list of all
symbolic names in the signature file, and the signatures themselves
in hex. This could be a dozen or more pages for large signature
files.
Currently, signatures are 23 bytes long, and the symbolic names are
truncated to 15 characters.
3 What are duplicate signatures?
--------------------------------
Duplicate signatures arise for 3 reasons. 1: length of the signature.
2: wildcards. 3: chopping of the signature.
1: Because signatures are only 23 bytes long, there is a chance that
two distinct signatures (first part of the binary image of a
function) are identical in the first 23 bytes, but diverge later.
2: Because part of the binary image of a function depends on where it
is loaded, parts of the signature are replaced with wildcards. It is
possible that two functions are distinct only in places that are
replaced by the wildcard byte (F4).
3: Signatures are "chopped" (cut short, and the remainder filled with
binary zeroes) after an unconditional branch or subroutine return.
This is to cope with functions shorter than the 23 byte size of
signatures, so unrelated functions are not included at the end of a
signature. (This would cause dcc to fail to recognise these short
signatures if some other function happened to be loaded at the end).
The effect of duplicate signatures is that only one of the functions
that has the same signature will be recognised. For example, suppose
that sin, cos, and tan were just one wildcarded instruction followed
by a jump to the same piece of code. Then all three would have the
same signature, and calls to sin, cos, or tan would all be reported
by dcc as just one of these, e.g. tan. If you suspect that this is
happening, then at least ReadSig can alert you to this problem.
In general, the number of duplicate signatures that would actually be
used in dcc is small, but it is possible that the above problem will
occur.
4 How can I make sense of the signatures?
-----------------------------------------
If you're one of those unfortunate individuals that can't decode hex
instructions in your head, you can always use DispSig to copy it to a
binary file, since you now know the name of the function. Then you
can use debug or some other debugger to disassemble the binary file.
Generally, most entries in signature files will be executable code,
so it should disassemble readily.
Be aware that signatures are wildcarded, so don't pay any attention
to the destination of jmp or call instructions (three or 5 byte
jumps, anyway; 2 byte jumps are not wildcarded), and 16 bit immediate
values. The latter will always be F4F4 (two wildcard bytes),
regardless of what they were in the original function.