From a697ad05c04e87c421bf939af4d009155f6b2cd0 Mon Sep 17 00:00:00 2001 From: nemerle Date: Tue, 10 Feb 2015 17:28:50 +0100 Subject: [PATCH] Add original dcc tools to repository * makedsig has been integrated with makedstp, it should handle both LIB and TPL files * other tools have not been modified --- CMakeLists.txt | 17 +- common/CMakeLists.txt | 7 + common/PatternCollector.h | 5 + common/perfhlib.cpp | 440 +++++++ common/perfhlib.h | 37 + include/perfhlib.h | 38 - src/DccFrontend.cpp | 10 +- src/perfhlib.cpp | 101 -- tools/CMakeLists.txt | 1 + tools/dispsrch/dispsig.cpp | 248 ++++ tools/dispsrch/dispsig.mak | 11 + tools/dispsrch/dispsrch.txt | 221 ++++ tools/dispsrch/srchsig.cpp | 287 +++++ tools/dispsrch/srchsig.mak | 14 + tools/makedsig/CMakeLists.txt | 11 + tools/makedsig/LIB_PatternCollector.cpp | 7 + tools/makedsig/LIB_PatternCollector.h | 11 + tools/makedsig/TPL_PatternCollector.cpp | 1 + tools/makedsig/TPL_PatternCollector.h | 5 + tools/makedsig/fixwild.cpp | 525 ++++++++ tools/makedsig/makedsig.cpp | 175 +++ tools/makedsig/makedsig.txt | 188 +++ tools/parsehdr/CMakeLists.txt | 0 tools/parsehdr/locident.h | 117 ++ tools/parsehdr/parsehdr.cpp | 1538 +++++++++++++++++++++++ tools/parsehdr/parsehdr.h | 98 ++ tools/parsehdr/parsehdr.txt | 217 ++++ tools/parsehdr/parselib.mak | 8 + tools/parsehdr/tcfiles.lst | 24 + tools/readsig/CMakeLists.txt | 0 tools/readsig/readsig.cpp | 239 ++++ tools/readsig/readsig.mak | 11 + tools/readsig/readsig.txt | 97 ++ 33 files changed, 4560 insertions(+), 149 deletions(-) create mode 100644 common/CMakeLists.txt create mode 100644 common/PatternCollector.h create mode 100644 common/perfhlib.cpp create mode 100644 common/perfhlib.h delete mode 100644 include/perfhlib.h delete mode 100644 src/perfhlib.cpp create mode 100644 tools/CMakeLists.txt create mode 100644 tools/dispsrch/dispsig.cpp create mode 100644 tools/dispsrch/dispsig.mak create mode 100644 tools/dispsrch/dispsrch.txt create mode 100644 tools/dispsrch/srchsig.cpp create mode 100644 tools/dispsrch/srchsig.mak create mode 100644 tools/makedsig/CMakeLists.txt create mode 100644 tools/makedsig/LIB_PatternCollector.cpp create mode 100644 tools/makedsig/LIB_PatternCollector.h create mode 100644 tools/makedsig/TPL_PatternCollector.cpp create mode 100644 tools/makedsig/TPL_PatternCollector.h create mode 100644 tools/makedsig/fixwild.cpp create mode 100644 tools/makedsig/makedsig.cpp create mode 100644 tools/makedsig/makedsig.txt create mode 100644 tools/parsehdr/CMakeLists.txt create mode 100644 tools/parsehdr/locident.h create mode 100644 tools/parsehdr/parsehdr.cpp create mode 100644 tools/parsehdr/parsehdr.h create mode 100644 tools/parsehdr/parsehdr.txt create mode 100644 tools/parsehdr/parselib.mak create mode 100644 tools/parsehdr/tcfiles.lst create mode 100644 tools/readsig/CMakeLists.txt create mode 100644 tools/readsig/readsig.cpp create mode 100644 tools/readsig/readsig.mak create mode 100644 tools/readsig/readsig.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 849691b..eafc42a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,16 +26,23 @@ enable_testing() FIND_PACKAGE(GMock) ENDIF() -ADD_SUBDIRECTORY(3rd_party) -llvm_map_components_to_libraries(REQ_LLVM_LIBRARIES jit native mc support tablegen) +llvm_map_components_to_libnames(REQ_LLVM_LIBRARIES jit native mc support tablegen) INCLUDE_DIRECTORIES( 3rd_party/libdisasm include include/idioms + common ${Boost_INCLUDE_DIRS} ${LLVM_INCLUDE_DIRS} ) + + +ADD_SUBDIRECTORY(3rd_party) +ADD_SUBDIRECTORY(common) +ADD_SUBDIRECTORY(tools) + + set(dcc_LIB_SOURCES src/CallConvention.cpp src/ast.cpp @@ -67,7 +74,6 @@ set(dcc_LIB_SOURCES src/locident.cpp src/liveness_set.cpp src/parser.cpp - src/perfhlib.cpp src/procs.cpp src/project.cpp src/Procedure.cpp @@ -106,7 +112,6 @@ set(dcc_HEADERS include/idioms/xor_idioms.h include/locident.h include/CallConvention.h - include/perfhlib.h include/project.h include/scanner.h include/state.h @@ -118,6 +123,7 @@ set(dcc_HEADERS include/dcc_interface.h ) + SOURCE_GROUP(Source FILES ${dcc_SOURCES}) SOURCE_GROUP(Headers FILES ${dcc_HEADERS}) @@ -127,11 +133,10 @@ qt5_use_modules(dcc_lib Core) ADD_EXECUTABLE(dcc_original ${dcc_SOURCES} ${dcc_HEADERS}) ADD_DEPENDENCIES(dcc_original dcc_lib) -TARGET_LINK_LIBRARIES(dcc_original dcc_lib disasm_s ${REQ_LLVM_LIBRARIES} ncurses LLVMSupport) +TARGET_LINK_LIBRARIES(dcc_original dcc_lib dcc_hash disasm_s ${REQ_LLVM_LIBRARIES} ncurses LLVMSupport) qt5_use_modules(dcc_original Core) #ADD_SUBDIRECTORY(gui) if(dcc_build_tests) ADD_SUBDIRECTORY(src) endif() - diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt new file mode 100644 index 0000000..f8c5255 --- /dev/null +++ b/common/CMakeLists.txt @@ -0,0 +1,7 @@ +set(SRC +perfhlib.cpp +perfhlib.h +PatternCollector.h + +) +add_library(dcc_hash STATIC ${SRC}) diff --git a/common/PatternCollector.h b/common/PatternCollector.h new file mode 100644 index 0000000..0cd4996 --- /dev/null +++ b/common/PatternCollector.h @@ -0,0 +1,5 @@ +#ifndef PATTERNCOLLECTOR +#define PATTERNCOLLECTOR + +#endif // PATTERNCOLLECTOR + diff --git a/common/perfhlib.cpp b/common/perfhlib.cpp new file mode 100644 index 0000000..af4ad44 --- /dev/null +++ b/common/perfhlib.cpp @@ -0,0 +1,440 @@ +/* + *$Log: perfhlib.c,v $ + * Revision 1.5 93/09/29 14:45:02 emmerik + * Oops, didn't do the casts last check in + * + * Revision 1.4 93/09/29 14:41:45 emmerik + * Added casts to mod instructions to keep the SVR4 compiler happy + * + * + * Perfect hashing function library. Contains functions to generate perfect + * hashing functions + */ +#include "perfhlib.h" +#include "PatternCollector.h" + +#include +#include +#include +#include +/* Private data structures */ + +//static int NumEntry; /* Number of entries in the hash table (# keys) */ +//static int EntryLen; /* Size (bytes) of each entry (size of keys) */ +//static int SetSize; /* Size of the char set */ +//static char SetMin; /* First char in the set */ +//static int NumVert; /* c times NumEntry */ + +//static uint16_t *T1base, *T2base; /* Pointers to start of T1, T2 */ +static uint16_t *T1, *T2; /* Pointers to T1[i], T2[i] */ + +static int *graphNode; /* The array of edges */ +static int *graphNext; /* Linked list of edges */ +static int *graphFirst;/* First edge at a vertex */ + + +static int numEdges; /* An edge counter */ +static bool *visited; /* Array of bools: whether visited */ +static bool *deleted; /* Array of bools: whether deleted */ + +/* Private prototypes */ +static void initGraph(void); +static void addToGraph(int e, int v1, int v2); +static bool isCycle(void); +static void duplicateKeys(int v1, int v2); + +void PerfectHash::setHashParams(int _NumEntry, int _EntryLen, int _SetSize, char _SetMin, + int _NumVert) +{ + /* These parameters are stored in statics so as to obviate the need for + passing all these (or defererencing pointers) for every call to hash() + */ + + NumEntry = _NumEntry; + EntryLen = _EntryLen; + SetSize = _SetSize; + SetMin = _SetMin; + NumVert = _NumVert; + + /* Allocate the variable sized tables etc */ + if ((T1base = (uint16_t *)malloc(EntryLen * SetSize * sizeof(uint16_t))) == 0) + { + goto BadAlloc; + } + if ((T2base = (uint16_t *)malloc(EntryLen * SetSize * sizeof(uint16_t))) == 0) + { + goto BadAlloc; + } + + if ((graphNode = (int *)malloc((NumEntry*2 + 1) * sizeof(int))) == 0) + { + goto BadAlloc; + } + if ((graphNext = (int *)malloc((NumEntry*2 + 1) * sizeof(int))) == 0) + { + goto BadAlloc; + } + if ((graphFirst = (int *)malloc((NumVert + 1) * sizeof(int))) == 0) + { + goto BadAlloc; + } + + if ((g = (short *)malloc((NumVert+1) * sizeof(short))) == 0) + { + goto BadAlloc; + } + if ((visited = (bool *)malloc((NumVert+1) * sizeof(bool))) == 0) + { + goto BadAlloc; + } + if ((deleted = (bool *)malloc((NumEntry+1) * sizeof(bool))) == 0) + { + goto BadAlloc; + } + return; + +BadAlloc: + printf("Could not allocate memory\n"); + hashCleanup(); + exit(1); +} + +void PerfectHash::hashCleanup(void) +{ + /* Free the storage for variable sized tables etc */ + if (T1base) free(T1base); + if (T2base) free(T2base); + if (graphNode) free(graphNode); + if (graphNext) free(graphNext); + if (graphFirst) free(graphFirst); + if (g) free(g); + if (visited) free(visited); + if (deleted) free(deleted); +} + +void PerfectHash::map(PatternCollector *collector) +{ + m_collector = collector; + assert(nullptr!=collector); + int i, j, c; + uint16_t f1, f2; + bool cycle; + uint8_t *keys; + + c = 0; + + do + { + initGraph(); + cycle = false; + + /* Randomly generate T1 and T2 */ + for (i=0; i < SetSize*EntryLen; i++) + { + T1base[i] = rand() % NumVert; + T2base[i] = rand() % NumVert; + } + + for (i=0; i < NumEntry; i++) + { + f1 = 0; f2 = 0; + keys = m_collector->getKey(i); + for (j=0; j < EntryLen; j++) + { + T1 = T1base + j * SetSize; + T2 = T2base + j * SetSize; + f1 += T1[keys[j] - SetMin]; + f2 += T2[keys[j] - SetMin]; + } + f1 %= (uint16_t)NumVert; + f2 %= (uint16_t)NumVert; + if (f1 == f2) + { + /* A self loop. Reject! */ + printf("Self loop on vertex %d!\n", f1); + cycle = true; + break; + } + addToGraph(numEdges++, f1, f2); + } + if (cycle || (cycle = isCycle())) /* OK - is there a cycle? */ + { + printf("Iteration %d\n", ++c); + } + else + { + break; + } + } + while (/* there is a cycle */ 1); + +} + +/* Initialise the graph */ +void PerfectHash::initGraph() +{ + int i; + + for (i=1; i <= NumVert; i++) + { + graphFirst[i] = 0; + } + + for (i= -NumEntry; i <= NumEntry; i++) + { + /* No need to init graphNode[] as they will all be filled by successive + calls to addToGraph() */ + graphNext[NumEntry+i] = 0; + } + + numEdges = 0; +} + +/* Add an edge e between vertices v1 and v2 */ +/* e, v1, v2 are 0 based */ +void PerfectHash::addToGraph(int e, int v1, int v2) +{ + e++; v1++; v2++; /* So much more convenient */ + + graphNode[NumEntry+e] = v2; /* Insert the edge information */ + graphNode[NumEntry-e] = v1; + + graphNext[NumEntry+e] = graphFirst[v1]; /* Insert v1 to list of alphas */ + graphFirst[v1]= e; + graphNext[NumEntry-e] = graphFirst[v2]; /* Insert v2 to list of omegas */ + graphFirst[v2]= -e; + +} + +bool PerfectHash::DFS(int parentE, int v) +{ + int e, w; + + /* Depth first search of the graph, starting at vertex v, looking for + cycles. parent and v are origin 1. Note parent is an EDGE, + not a vertex */ + + visited[v] = true; + + /* For each e incident with v .. */ + for (e = graphFirst[v]; e; e = graphNext[NumEntry+e]) + { + uint8_t *key1; + + if (deleted[abs(e)]) + { + /* A deleted key. Just ignore it */ + continue; + } + key1 = m_collector->getKey(abs(e)-1); + w = graphNode[NumEntry+e]; + if (visited[w]) + { + /* Did we just come through this edge? If so, ignore it. */ + if (abs(e) != abs(parentE)) + { + /* There is a cycle in the graph. There is some subtle code here + to work around the distinct possibility that there may be + duplicate keys. Duplicate keys will always cause unit + cycles, since f1 and f2 (used to select v and w) will be the + same for both. The edges (representing an index into the + array of keys) are distinct, but the key values are not. + The logic is as follows: for the candidate edge e, check to + see if it terminates in the parent vertex. If so, we test + the keys associated with e and the parent, and if they are + the same, we can safely ignore e for the purposes of cycle + detection, since edge e adds nothing to the cycle. Cycles + involving v, w, and e0 will still be found. The parent + edge was not similarly eliminated because at the time when + it was a candidate, v was not yet visited. + We still have to remove the key from further consideration, + since each edge is visited twice, but with a different + parent edge each time. + */ + /* We save some stack space by calculating the parent vertex + for these relatively few cases where it is needed */ + int parentV = graphNode[NumEntry-parentE]; + + if (w == parentV) + { + uint8_t *key2; + + key2=m_collector->getKey(abs(parentE)-1); + if (memcmp(key1, key2, EntryLen) == 0) + { + printf("Duplicate keys with edges %d and %d (", + e, parentE); + m_collector->dispKey(abs(e)-1); + printf(" & "); + m_collector->dispKey(abs(parentE)-1); + printf(")\n"); + deleted[abs(e)] = true; /* Wipe the key */ + } + else + { + /* A genuine (unit) cycle. */ + printf("There is a unit cycle involving vertex %d and edge %d\n", v, e); + return true; + } + + } + else + { + /* We have reached a previously visited vertex not the + parent. Therefore, we have uncovered a genuine cycle */ + printf("There is a cycle involving vertex %d and edge %d\n", v, e); + return true; + + } + } + } + else /* Not yet seen. Traverse it */ + { + if (DFS(e, w)) + { + /* Cycle found deeper down. Exit */ + return true; + } + } + } + return false; +} + +bool PerfectHash::isCycle(void) +{ + int v, e; + + for (v=1; v <= NumVert; v++) + { + visited[v] = false; + } + for (e=1; e <= NumEntry; e++) + { + deleted[e] = false; + } + for (v=1; v <= NumVert; v++) + { + if (!visited[v]) + { + if (DFS(-32767, v)) + { + return true; + } + } + } + return false; +} + +void PerfectHash::traverse(int u) +{ + int w, e; + + visited[u] = true; + /* Find w, the neighbours of u, by searching the edges e associated with u */ + e = graphFirst[1+u]; + while (e) + { + w = graphNode[NumEntry+e]-1; + if (!visited[w]) + { + g[w] = (abs(e)-1 - g[u]) % NumEntry; + if (g[w] < 0) g[w] += NumEntry; /* Keep these positive */ + traverse(w); + } + e = graphNext[NumEntry+e]; + } + +} + +void PerfectHash::assign(void) +{ + int v; + + + for (v=0; v < NumVert; v++) + { + g[v] = 0; /* g is sparse; leave the gaps 0 */ + visited[v] = false; + } + + for (v=0; v < NumVert; v++) + { + if (!visited[v]) + { + g[v] = 0; + traverse(v); + } + } +} + +int PerfectHash::hash(uint8_t *string) +{ + uint16_t u, v; + int j; + + u = 0; + for (j=0; j < EntryLen; j++) + { + T1 = T1base + j * SetSize; + u += T1[string[j] - SetMin]; + } + u %= NumVert; + + v = 0; + for (j=0; j < EntryLen; j++) + { + T2 = T2base + j * SetSize; + v += T2[string[j] - SetMin]; + } + v %= NumVert; + + return (g[u] + g[v]) % NumEntry; +} + +#if 0 +void dispRecord(int i); + +void +duplicateKeys(int v1, int v2) +{ + int i, j; + uint8_t *keys; + int u, v; + + v1--; v2--; /* These guys are origin 1 */ + + printf("Duplicate keys:\n"); + + for (i=0; i < NumEntry; i++) + { + getKey(i, &keys); + u = 0; + for (j=0; j < EntryLen; j++) + { + T1 = T1base + j * SetSize; + u += T1[keys[j] - SetMin]; + } + u %= NumVert; + if ((u != v1) && (u != v2)) continue; + + v = 0; + for (j=0; j < EntryLen; j++) + { + T2 = T2base + j * SetSize; + v += T2[keys[j] - SetMin]; + } + v %= NumVert; + + if ((v == v2) || (v == v1)) + { + printf("Entry #%d key: ", i+1); + for (j=0; j < EntryLen; j++) printf("%02X ", keys[j]); + printf("\n"); + dispRecord(i+1); + } + } + exit(1); + + +} +#endif diff --git a/common/perfhlib.h b/common/perfhlib.h new file mode 100644 index 0000000..a7221b8 --- /dev/null +++ b/common/perfhlib.h @@ -0,0 +1,37 @@ +#include +/** Perfect hashing function library. Contains functions to generate perfect + hashing functions */ +struct PatternCollector; +struct PerfectHash { + uint16_t *T1base; + uint16_t *T2base; /* Pointers to start of T1, T2 */ + short *g; /* g[] */ + + int NumEntry; /* Number of entries in the hash table (# keys) */ + int EntryLen; /* Size (bytes) of each entry (size of keys) */ + int SetSize; /* Size of the char set */ + char SetMin; /* First char in the set */ + int NumVert; /* c times NumEntry */ + /** Set the parameters for the hash table */ + void setHashParams(int _numEntry, int _entryLen, int _setSize, char _setMin, int _numVert); + +public: + void map(PatternCollector * collector); /* Part 1 of creating the tables */ + void hashCleanup(); /* Frees memory allocated by setHashParams() */ + void assign(); /* Part 2 of creating the tables */ + int hash(uint8_t *string); /* Hash the string to an int 0 .. NUMENTRY-1 */ + const uint16_t *readT1(void) const { return T1base; } + const uint16_t *readT2(void) const { return T2base; } + const uint16_t *readG(void) const { return (uint16_t *)g; } + uint16_t *readT1(void){ return T1base; } + uint16_t *readT2(void){ return T2base; } + uint16_t *readG(void) { return (uint16_t *)g; } +private: + void initGraph(); + void addToGraph(int e, int v1, int v2); + bool isCycle(); + bool DFS(int parentE, int v); + void traverse(int u); + PatternCollector *m_collector; /* used to retrieve the keys */ + +}; diff --git a/include/perfhlib.h b/include/perfhlib.h deleted file mode 100644 index bbcfb98..0000000 --- a/include/perfhlib.h +++ /dev/null @@ -1,38 +0,0 @@ -#pragma once -/* Perfect hashing function library. Contains functions to generate perfect - hashing functions - * (C) Mike van Emmerik - */ -#include - -/* Prototypes */ -void hashCleanup(void); /* Frees memory allocated by hashParams() */ -void map(void); /* Part 1 of creating the tables */ - -/* The application must provide these functions: */ -void getKey(int i, uint8_t **pKeys);/* Set *keys to point to the i+1th key */ -void dispKey(int i); /* Display the key */ -class PatternHasher -{ - uint16_t *T1base, *T2base; /* Pointers to start of T1, T2 */ - int NumEntry; /* Number of entries in the hash table (# keys) */ - int EntryLen; /* Size (bytes) of each entry (size of keys) */ - int SetSize; /* Size of the char set */ - char SetMin; /* First char in the set */ - int NumVert; /* c times NumEntry */ - int *graphNode; /* The array of edges */ - int *graphNext; /* Linked list of edges */ - int *graphFirst;/* First edge at a vertex */ -public: - uint16_t *readT1(void); /* Returns a pointer to the T1 table */ - uint16_t *readT2(void); /* Returns a pointer to the T2 table */ - uint16_t *readG(void); /* Returns a pointer to the g table */ - void init(int _NumEntry, int _EntryLen, int _SetSize, char _SetMin,int _NumVert); /* Set the parameters for the hash table */ - void cleanup(); - int hash(unsigned char *string); //!< Hash the string to an int 0 .. NUMENTRY-1 -}; -extern PatternHasher g_pattern_hasher; -/* Macro reads a LH uint16_t from the image regardless of host convention */ -#ifndef LH -#define LH(p) ((int)((uint8_t *)(p))[0] + ((int)((uint8_t *)(p))[1] << 8)) -#endif diff --git a/src/DccFrontend.cpp b/src/DccFrontend.cpp index eb478e4..4b4175b 100644 --- a/src/DccFrontend.cpp +++ b/src/DccFrontend.cpp @@ -1,12 +1,14 @@ -#include -#include -#include #include "dcc.h" #include "DccFrontend.h" #include "project.h" #include "disassem.h" #include "CallGraph.h" +#include +#include + +#include + class Loader { @@ -158,7 +160,7 @@ bool DccFrontend::FrontEnd () if (option.asm1) { - std::cout << "dcc: writing assembler file "< -#include -#include -#include "perfhlib.h" - -/* Private data structures */ - -static uint16_t *T1, *T2; /* Pointers to T1[i], T2[i] */ -static short *g; /* g[] */ - -//static int numEdges; /* An edge counter */ -//static bool *visited; /* Array of bools: whether visited */ - -/* Private prototypes */ -//static void initGraph(void); -//static void addToGraph(int e, int v1, int v2); -//static bool isCycle(void); -//static void duplicateKeys(int v1, int v2); -PatternHasher g_pattern_hasher; - -void PatternHasher::init(int _NumEntry, int _EntryLen, int _SetSize, char _SetMin, int _NumVert) -{ - /* These parameters are stored in statics so as to obviate the need for - passing all these (or defererencing pointers) for every call to hash() - */ - - NumEntry = _NumEntry; - EntryLen = _EntryLen; - SetSize = _SetSize; - SetMin = _SetMin; - NumVert = _NumVert; - - /* Allocate the variable sized tables etc */ - T1base = new uint16_t [EntryLen * SetSize]; - T2base = new uint16_t [EntryLen * SetSize]; - graphNode = new int [NumEntry*2 + 1]; - graphNext = new int [NumEntry*2 + 1]; - graphFirst = new int [NumVert + 1]; - g = new short [NumVert + 1]; -// visited = new bool [NumVert + 1]; - return; - -} - -void PatternHasher::cleanup(void) -{ - /* Free the storage for variable sized tables etc */ - delete [] T1base; - delete [] T2base; - delete [] graphNode; - delete [] graphNext; - delete [] graphFirst; - delete [] g; -// delete [] visited; -} - -int PatternHasher::hash(uint8_t *string) -{ - uint16_t u, v; - int j; - - u = 0; - for (j=0; j < EntryLen; j++) - { - T1 = T1base + j * SetSize; - u += T1[string[j] - SetMin]; - } - u %= NumVert; - - v = 0; - for (j=0; j < EntryLen; j++) - { - T2 = T2base + j * SetSize; - v += T2[string[j] - SetMin]; - } - v %= NumVert; - - return (g[u] + g[v]) % NumEntry; -} - -uint16_t * PatternHasher::readT1(void) -{ - return T1base; -} - -uint16_t *PatternHasher::readT2(void) -{ - return T2base; -} - -uint16_t * PatternHasher::readG(void) -{ - return (uint16_t *)g; -} - diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt new file mode 100644 index 0000000..8d27447 --- /dev/null +++ b/tools/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(makedsig) diff --git a/tools/dispsrch/dispsig.cpp b/tools/dispsrch/dispsig.cpp new file mode 100644 index 0000000..22b7b8c --- /dev/null +++ b/tools/dispsrch/dispsig.cpp @@ -0,0 +1,248 @@ +/* Quick program to copy a named signature to a small file */ + +#include +#include +#include +#include +#include "perfhlib.h" + +/* statics */ +byte buf[100]; +int numKeys; /* Number of hash table entries (keys) */ +int numVert; /* Number of vertices in the graph (also size of g[]) */ +int PatLen; /* Size of the keys (pattern length) */ +int SymLen; /* Max size of the symbols, including null */ +FILE *f; /* File being read */ +FILE *f2; /* File being written */ + +static word *T1base, *T2base; /* Pointers to start of T1, T2 */ +static word *g; /* g[] */ + +/* prototypes */ +void grab(int n); +word readFileShort(void); +void cleanup(void); + + +#define SYMLEN 16 +#define PATLEN 23 + +/* Hash table structure */ +typedef struct HT_tag +{ + char htSym[SYMLEN]; + byte htPat[PATLEN]; +} HT; + +HT ht; /* One hash table entry */ + +void +main(int argc, char *argv[]) +{ + word w, len; + int i; + + if (argc <= 3) + { + printf("Usage: dispsig \n"); + printf("Example: dispsig dccm8s.sig printf printf.bin\n"); + exit(1); + } + + if ((f = fopen(argv[1], "rb")) == NULL) + { + printf("Cannot open %s\n", argv[1]); + exit(2); + } + + if ((f2 = fopen(argv[3], "wb")) == NULL) + { + printf("Cannot write to %s\n", argv[3]); + exit(2); + } + + + /* Read the parameters */ + grab(4); + if (memcmp("dccs", buf, 4) != 0) + { + printf("Not a dccs file!\n"); + exit(3); + } + numKeys = readFileShort(); + numVert = readFileShort(); + PatLen = readFileShort(); + SymLen = readFileShort(); + + /* Initialise the perfhlib stuff. Also allocates T1, T2, g, etc */ + hashParams( /* Set the parameters for the hash table */ + numKeys, /* The number of symbols */ + PatLen, /* The length of the pattern to be hashed */ + 256, /* The character set of the pattern (0-FF) */ + 0, /* Minimum pattern character value */ + numVert); /* Specifies C, the sparseness of the graph. + See Czech, Havas and Majewski for details + */ + + T1base = readT1(); + T2base = readT2(); + g = readG(); + + /* Read T1 and T2 tables */ + grab(2); + if (memcmp("T1", buf, 2) != 0) + { + printf("Expected 'T1'\n"); + exit(3); + } + len = PatLen * 256 * sizeof(word); + w = readFileShort(); + if (w != len) + { + printf("Problem with size of T1: file %d, calc %d\n", w, len); + exit(4); + } + if (fread(T1base, 1, len, f) != len) + { + printf("Could not read T1\n"); + exit(5); + } + + grab(2); + if (memcmp("T2", buf, 2) != 0) + { + printf("Expected 'T2'\n"); + exit(3); + } + w = readFileShort(); + if (w != len) + { + printf("Problem with size of T2: file %d, calc %d\n", w, len); + exit(4); + } + if (fread(T2base, 1, len, f) != len) + { + printf("Could not read T2\n"); + exit(5); + } + + /* Now read the function g[] */ + grab(2); + if (memcmp("gg", buf, 2) != 0) + { + printf("Expected 'gg'\n"); + exit(3); + } + len = numVert * sizeof(word); + w = readFileShort(); + if (w != len) + { + printf("Problem with size of g[]: file %d, calc %d\n", w, len); + exit(4); + } + if (fread(g, 1, len, f) != len) + { + printf("Could not read T2\n"); + exit(5); + } + + + /* This is now the hash table */ + grab(2); + if (memcmp("ht", buf, 2) != 0) + { + printf("Expected 'ht'\n"); + exit(3); + } + w = readFileShort(); + if (w != numKeys * (SymLen + PatLen + sizeof(word))) + { + printf("Problem with size of hash table: file %d, calc %d\n", w, len); + exit(6); + } + + + for (i=0; i < numKeys; i++) + { + if (fread(&ht, 1, SymLen + PatLen, f) != (size_t)(SymLen + PatLen)) + { + printf("Could not read pattern %d from %s\n", i, argv[1]); + exit(7); + } + if (stricmp(ht.htSym, argv[2]) == 0) + { + /* Found it! */ + break; + } + + } + fclose(f); + if (i == numKeys) + { + printf("Function %s not found!\n", argv[2]); + exit(2); + } + + printf("Function %s index %d\n", ht.htSym, i); + for (i=0; i < PatLen; i++) + { + printf("%02X ", ht.htPat[i]); + } + + fwrite(ht.htPat, 1, PatLen, f2); + fclose(f2); + + printf("\n"); + + +} + + +void +cleanup(void) +{ + /* Free the storage for variable sized tables etc */ + if (T1base) free(T1base); + if (T2base) free(T2base); + if (g) free(g); +} + +void grab(int n) +{ + if (fread(buf, 1, n, f) != (size_t)n) + { + printf("Could not read\n"); + exit(11); + } +} + +word +readFileShort(void) +{ + byte b1, b2; + + if (fread(&b1, 1, 1, f) != 1) + { + printf("Could not read\n"); + exit(11); + } + if (fread(&b2, 1, 1, f) != 1) + { + printf("Could not read\n"); + exit(11); + } + return (b2 << 8) + b1; +} + +/* Following two functions not needed unless creating tables */ + +void getKey(int i, byte **keys) +{ +} + +/* Display key i */ +void +dispKey(int i) +{ +} + diff --git a/tools/dispsrch/dispsig.mak b/tools/dispsrch/dispsig.mak new file mode 100644 index 0000000..de47b0f --- /dev/null +++ b/tools/dispsrch/dispsig.mak @@ -0,0 +1,11 @@ +CFLAGS = -Zi -c -AL -W3 -D__MSDOS__ + +dispsig.exe: dispsig.obj perfhlib.obj + link /CO dispsig perfhlib; + +dispsig.obj: dispsig.c dcc.h perfhlib.h + cl $(CFLAGS) $*.c + +perfhlib.obj: perfhlib.c dcc.h perfhlib.h + cl $(CFLAGS) $*.c + diff --git a/tools/dispsrch/dispsrch.txt b/tools/dispsrch/dispsrch.txt new file mode 100644 index 0000000..5452818 --- /dev/null +++ b/tools/dispsrch/dispsrch.txt @@ -0,0 +1,221 @@ + DISPSIG and SRCHSIG + =================== + +1 What are DispSig and SrchSig? + +2 How do I use DispSig? + +3 How do I use SrchSig? + +4 What can I do with the binary pattern file from DispSig? + +5 How can I create a binary pattern file for SrchSig? + + + +1 What are DispSig and SrchSig? +------------------------------- + +SrchSig is a program to display the name of a function, given a +signature (pattern). +DispSig is a program to display a signature, given a function name. +Dispsig also writes the signature to a binary file, so you can +disassemble it, or use it in Srchsig to see if some other signature +file has the same pattern. + + +2 How do I use DispSig? +----------------------- +Just type +DispSig + +For example: + +dispsig dccb2s.sig strcmp strcmp.bin +Function index 58 +55 8B EC 56 57 8C D8 8E C0 FC 33 C0 8B D8 8B 7E 06 8B F7 32 C0 B9 F4 + +This tells us that the function was the 59th function in the +signature file (and that the signature above will hash to 58 +(decimal)). We can see that it is a standard C function, since it +starts with "55 8B EC", which is the standard C function prologue. +The rest of it is a bit hard to follow, but fortunately we have also +written the pattern to a binary file, strcmp.bin. See section 4 on +how to disassemble this pattern. + +If I type + +dispsig dcct4p.sig writeln wl.bin + +I get +Function writeln not found! + +In fact, there is no one function that performs the writeln function; +there are functions like WriteString, WriteInt, CrLf (Carriage +return, linefeed), and so on. Dispsig is case insensitive, so: + +dispsig dcct4p.sig writestring wl.bin +produces + +Function WriteString index 53 +55 8B EC C4 7E 0C E8 F4 F4 75 25 C5 76 08 8B 4E 06 FC AC F4 F4 2B C8 + + +3 How do I use SrchSig? +----------------------- +Just type + +srchsig + +dispsig dcct4p.sig writeln wl.bin +where BinaryFileName contains a pattern. See section 5 for how to +create one of these. For now, we can use the pattern file from the +first example: + +srchsig dccb2s.sig strcmp.bin + +Pattern: +55 8B EC 56 57 8C D8 8E C0 FC 33 C0 8B D8 8B 7E 06 8B F7 32 C0 B9 F4 +Pattern hashed to 58 (0x3A), symbol strcmp +Pattern matched + +Note that the pattern reported above need not be exactly the same as +the one we provided in . The pattern displayed is the +wildcarded and chopped version of the pattern provided; it will have +F4s (wildcards) and possibly zeroes at the end; see the file +makedstp.txt for a simple explanation of wildcarding and chopping. + +If we type + +srchsig dccb2s.sig ws.bin + +we get + +Pattern: +55 8B EC C4 7E 0C E8 F4 F4 75 25 C5 76 08 8B 4E 06 FC AC F4 F4 2B C8 +Pattern hashed to 0 (0x0), symbol _IOERROR +Pattern mismatch: found following pattern +55 8B EC 56 8B 76 04 0B F6 7C 14 83 FE 58 76 03 BE F4 F4 89 36 F4 F4 +300 + +The pattern often hashes to zero when the pattern is unknown, due to +the sparse nature of the tables used in the hash function. The first +pattern in dccb2s.sig happens to be _IOERROR, and its pattern is +completely different, apart from the first three bytes. The "300" at +the end is actually a running count of signatures searched linearly, +in case there is a problem with the hash function. + + + +4 What can I do with the binary pattern file from DispSig? +---------------------------------------------------------- + +You can feed it into SrchSig; this might make sense if you wanted to +know if, e.g. the signature for printf was the same for version 2 as +it is for version 3. In this case, you would use DispSig on the +version 2 signature file, and SrchSig on the version 3 file. + +You can also disassemble it, using debug (it comes with MS-DOS). For +example +debug strcmp.bin +-u100 l 17 + +1754:0100 55 PUSH BP +1754:0101 8BEC MOV BP,SP +1754:0103 56 PUSH SI +1754:0104 57 PUSH DI +1754:0105 8CD8 MOV AX,DS +1754:0107 8EC0 MOV ES,AX +1754:0109 FC CLD +1754:010A 33C0 XOR AX,AX +1754:010C 8BD8 MOV BX,AX +1754:010E 8B7E06 MOV DI,[BP+06] +1754:0111 8BF7 MOV SI,DI +1754:0113 32C0 XOR AL,AL +1754:0115 B9F42B MOV CX,2BF4 +-q + +Note that the "2B" at the end is actually past the end of the +signature. (Signatures are 23 bytes (17 in hex) long, so only +addresses 100-116 are valid). Remember that most 16 bit operands will +be "wildcarded", so don't believe the resultant addresses. + + +5 How can I create a binary pattern file for SrchSig? +----------------------------------------------------- + +Again, you can use debug. Suppose you have found an interesing piece +of code at address 05BE (this example comes from a hello world +program): + +-u 5be +15FF:05BE 55 PUSH BP +15FF:05BF 8BEC MOV BP,SP +15FF:05C1 83EC08 SUB SP,+08 +15FF:05C4 57 PUSH DI +15FF:05C5 56 PUSH SI +15FF:05C6 BE1E01 MOV SI,011E +15FF:05C9 8D4606 LEA AX,[BP+06] +15FF:05CC 8946FC MOV [BP-04],AX +15FF:05CF 56 PUSH SI +15FF:05D0 E8E901 CALL 07BC +15FF:05D3 83C402 ADD SP,+02 +15FF:05D6 8BF8 MOV DI,AX +15FF:05D8 8D4606 LEA AX,[BP+06] +15FF:05DB 50 PUSH AX +15FF:05DC FF7604 PUSH [BP+04] +-mcs:5be l 17 cs:100 +-u100 l 17 +15FF:0100 55 PUSH BP +15FF:0101 8BEC MOV BP,SP +15FF:0103 83EC08 SUB SP,+08 +15FF:0106 57 PUSH DI +15FF:0107 56 PUSH SI +15FF:0108 BE1E01 MOV SI,011E +15FF:010B 8D4606 LEA AX,[BP+06] +15FF:010E 8946FC MOV [BP-04],AX +15FF:0111 56 PUSH SI +15FF:0112 E8E901 CALL 02FE +15FF:0115 83C41F ADD SP,+1F +-nfoo.bin +-rcx +CS 268A +:17 +-w +Writing 0017 bytes +-q +c>dir foo.bin +foo.bin 23 3-25-94 12:04 +c> + +The binary file has to be exactly 23 bytes long; that's why we +changed cx to the value 17 (hex 17 = decimal 23). If you are studying +a large file (> 64K) remember to set bx to 0 as well. The m (block +move) command moves the code of interest to cs:100, which is where +debug will write the file from. The "rcx" changes the length of the +save, and the "nfoo.bin" sets the name of the file to be saved. Now +we can feed this into srchsig: + +srchsig dccb2s.sig foo.bin +Pattern: +55 8B EC 83 EC 08 57 56 BE F4 F4 8D 46 06 89 46 FC 56 E8 F4 F4 83 C4 +Pattern hashed to 278 (0x116), symbol sleep +Pattern mismatch: found following pattern +55 8B EC 83 EC 04 56 57 8D 46 FC 50 E8 F4 F4 59 80 7E FE 5A 76 05 BF +300 + +Hmmm. Not a Borland C version 2 small model signature. Perhaps its a +Microsoft Version 5 signature: + +Pattern: +55 8B EC 83 EC 08 57 56 BE F4 F4 8D 46 06 89 46 FC 56 E8 F4 F4 83 C4 +Pattern hashed to 31 (0x1F), symbol printf +Pattern matched + +Yes, it was good old printf. Of course, no need for you to guess, DCC +will figure out the vendor, version number, and model for you. + + + + + diff --git a/tools/dispsrch/srchsig.cpp b/tools/dispsrch/srchsig.cpp new file mode 100644 index 0000000..ba6dbda --- /dev/null +++ b/tools/dispsrch/srchsig.cpp @@ -0,0 +1,287 @@ +/* Quick program to see if a pattern is in a sig file. Pattern is supplied + in a small .bin or .com style file */ + +#include +#include +#include +#include "perfhlib.h" + +/* statics */ +byte buf[100]; +int numKeys; /* Number of hash table entries (keys) */ +int numVert; /* Number of vertices in the graph (also size of g[]) */ +int PatLen; /* Size of the keys (pattern length) */ +int SymLen; /* Max size of the symbols, including null */ +FILE *f; /* Sig file being read */ +FILE *fpat; /* Pattern file being read */ + +static word *T1base, *T2base; /* Pointers to start of T1, T2 */ +static word *g; /* g[] */ + +#define SYMLEN 16 +#define PATLEN 23 + +typedef struct HT_tag +{ + /* Hash table structure */ + char htSym[SYMLEN]; + byte htPat[PATLEN]; +} HT; + +HT *ht; /* Declare a pointer to a hash table */ + +/* prototypes */ +void grab(int n); +word readFileShort(void); +void cleanup(void); +void fixWildCards(char *buf); /* In fixwild.c */ +void pattSearch(void); + + +void +main(int argc, char *argv[]) +{ + word w, len; + int h, i; + int patlen; + + if (argc <= 2) + { + printf("Usage: srchsig \n"); + printf("Searches the signature file for the given pattern\n"); + printf("e.g. %s dccm8s.sig mypatt.bin\n", argv[0]); + exit(1); + } + + if ((f = fopen(argv[1], "rb")) == NULL) + { + printf("Cannot open signature file %s\n", argv[1]); + exit(2); + } + + if ((fpat = fopen(argv[2], "rb")) == NULL) + { + printf("Cannot open pattern file %s\n", argv[2]); + exit(2); + } + + /* Read the parameters */ + grab(4); + if (memcmp("dccs", buf, 4) != 0) + { + printf("Not a dccs file!\n"); + exit(3); + } + numKeys = readFileShort(); + numVert = readFileShort(); + PatLen = readFileShort(); + SymLen = readFileShort(); + + /* Initialise the perfhlib stuff. Also allocates T1, T2, g, etc */ + hashParams( /* Set the parameters for the hash table */ + numKeys, /* The number of symbols */ + PatLen, /* The length of the pattern to be hashed */ + 256, /* The character set of the pattern (0-FF) */ + 0, /* Minimum pattern character value */ + numVert); /* Specifies C, the sparseness of the graph. + See Czech, Havas and Majewski for details + */ + + T1base = readT1(); + T2base = readT2(); + g = readG(); + + /* Read T1 and T2 tables */ + grab(2); + if (memcmp("T1", buf, 2) != 0) + { + printf("Expected 'T1'\n"); + exit(3); + } + len = PatLen * 256 * sizeof(word); + w = readFileShort(); + if (w != len) + { + printf("Problem with size of T1: file %d, calc %d\n", w, len); + exit(4); + } + if (fread(T1base, 1, len, f) != len) + { + printf("Could not read T1\n"); + exit(5); + } + + grab(2); + if (memcmp("T2", buf, 2) != 0) + { + printf("Expected 'T2'\n"); + exit(3); + } + w = readFileShort(); + if (w != len) + { + printf("Problem with size of T2: file %d, calc %d\n", w, len); + exit(4); + } + if (fread(T2base, 1, len, f) != len) + { + printf("Could not read T2\n"); + exit(5); + } + + /* Now read the function g[] */ + grab(2); + if (memcmp("gg", buf, 2) != 0) + { + printf("Expected 'gg'\n"); + exit(3); + } + len = numVert * sizeof(word); + w = readFileShort(); + if (w != len) + { + printf("Problem with size of g[]: file %d, calc %d\n", w, len); + exit(4); + } + if (fread(g, 1, len, f) != len) + { + printf("Could not read T2\n"); + exit(5); + } + + + /* This is now the hash table */ + /* First allocate space for the table */ + if ((ht = (HT *)malloc(numKeys * sizeof(HT))) == 0) + { + printf("Could not allocate hash table\n"); + exit(1); + } + grab(2); + if (memcmp("ht", buf, 2) != 0) + { + printf("Expected 'ht'\n"); + exit(3); + } + w = readFileShort(); + if (w != numKeys * (SymLen + PatLen + sizeof(word))) + { + printf("Problem with size of hash table: file %d, calc %d\n", w, len); + exit(6); + } + + for (i=0; i < numKeys; i++) + { + if ((int)fread(&ht[i], 1, SymLen + PatLen, f) != SymLen + PatLen) + { + printf("Could not read\n"); + exit(11); + } + } + + /* Read the pattern to buf */ + if ((patlen = fread(buf, 1, 100, fpat)) == 0) + { + printf("Could not read pattern\n"); + exit(11); + } + if (patlen != PATLEN) + { + printf("Error: pattern length is %d, should be %d\n", patlen, PATLEN); + exit(12); + } + + /* Fix the wildcards */ + fixWildCards(buf); + + printf("Pattern:\n"); + for (i=0; i < PATLEN; i++) + printf("%02X ", buf[i]); + printf("\n"); + + + h = hash(buf); + printf("Pattern hashed to %d (0x%X), symbol %s\n", h, h, ht[h].htSym); + if (memcmp(ht[h].htPat, buf, PATLEN) == 0) + { + printf("Pattern matched"); + } + else + { + printf("Pattern mismatch: found following pattern\n"); + for (i=0; i < PATLEN; i++) + printf("%02X ", ht[h].htPat[i]); + printf("\n"); + pattSearch(); /* Look for it the hard way */ + } + cleanup(); + free(ht); + fclose(f); + fclose(fpat); + +} + +void pattSearch(void) +{ + int i; + + for (i=0; i < numKeys; i++) + { + if ((i % 100) == 0) printf("\r%d ", i); + if (memcmp(ht[i].htPat, buf, PATLEN) == 0) + { + printf("\nPattern matched offset %d (0x%X)\n", i, i); + } + } + printf("\n"); +} + + +void +cleanup(void) +{ + /* Free the storage for variable sized tables etc */ + if (T1base) free(T1base); + if (T2base) free(T2base); + if (g) free(g); +} + +void grab(int n) +{ + if (fread(buf, 1, n, f) != (size_t)n) + { + printf("Could not read\n"); + exit(11); + } +} + +word +readFileShort(void) +{ + byte b1, b2; + + if (fread(&b1, 1, 1, f) != 1) + { + printf("Could not read\n"); + exit(11); + } + if (fread(&b2, 1, 1, f) != 1) + { + printf("Could not read\n"); + exit(11); + } + return (b2 << 8) + b1; +} + +/* Following two functions not needed unless creating tables */ + +void getKey(int i, byte **keys) +{ +} + +/* Display key i */ +void +dispKey(int i) +{ +} + diff --git a/tools/dispsrch/srchsig.mak b/tools/dispsrch/srchsig.mak new file mode 100644 index 0000000..d1296fa --- /dev/null +++ b/tools/dispsrch/srchsig.mak @@ -0,0 +1,14 @@ +CFLAGS = -Zi -c -AL -W3 -D__MSDOS__ + +srchsig.exe: srchsig.obj perfhlib.obj fixwild.obj + link /CO srchsig perfhlib fixwild; + +srchsig.obj: srchsig.c dcc.h perfhlib.h + cl $(CFLAGS) $*.c + +perfhlib.obj: perfhlib.c dcc.h perfhlib.h + cl $(CFLAGS) $*.c + +fixwild.obj: fixwild.c dcc.h + cl $(CFLAGS) $*.c + diff --git a/tools/makedsig/CMakeLists.txt b/tools/makedsig/CMakeLists.txt new file mode 100644 index 0000000..c7bcada --- /dev/null +++ b/tools/makedsig/CMakeLists.txt @@ -0,0 +1,11 @@ +set(SRC +makedsig +fixwild.cpp +LIB_PatternCollector.cpp +LIB_PatternCollector.h +TPL_PatternCollector.cpp +TPL_PatternCollector.h +) +add_executable(makedsig ${SRC}) +target_link_libraries(makedsig dcc_hash) +qt5_use_modules(makedsig Core) diff --git a/tools/makedsig/LIB_PatternCollector.cpp b/tools/makedsig/LIB_PatternCollector.cpp new file mode 100644 index 0000000..1cf5496 --- /dev/null +++ b/tools/makedsig/LIB_PatternCollector.cpp @@ -0,0 +1,7 @@ +#include "LIB_PatternCollector.h" + +LIB_PatternCollector::LIB_PatternCollector() +{ + +} + diff --git a/tools/makedsig/LIB_PatternCollector.h b/tools/makedsig/LIB_PatternCollector.h new file mode 100644 index 0000000..e69da28 --- /dev/null +++ b/tools/makedsig/LIB_PatternCollector.h @@ -0,0 +1,11 @@ +#ifndef LIB_PATTERNCOLLECTOR_H +#define LIB_PATTERNCOLLECTOR_H + + +class LIB_PatternCollector +{ +public: + LIB_PatternCollector(); +}; + +#endif // LIB_PATTERNCOLLECTOR_H diff --git a/tools/makedsig/TPL_PatternCollector.cpp b/tools/makedsig/TPL_PatternCollector.cpp new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/tools/makedsig/TPL_PatternCollector.cpp @@ -0,0 +1 @@ + diff --git a/tools/makedsig/TPL_PatternCollector.h b/tools/makedsig/TPL_PatternCollector.h new file mode 100644 index 0000000..80a4f13 --- /dev/null +++ b/tools/makedsig/TPL_PatternCollector.h @@ -0,0 +1,5 @@ +#ifndef TPL_PATTERNCOLLECTOR_H +#define TPL_PATTERNCOLLECTOR_H + +#endif // TPL_PATTERNCOLLECTOR_H + diff --git a/tools/makedsig/fixwild.cpp b/tools/makedsig/fixwild.cpp new file mode 100644 index 0000000..12323e6 --- /dev/null +++ b/tools/makedsig/fixwild.cpp @@ -0,0 +1,525 @@ +/* + *$Log: fixwild.c,v $ + * Revision 1.10 93/10/28 11:10:10 emmerik + * Addressing mode [reg+nnnn] is now wildcarded + * + * Revision 1.9 93/10/26 13:40:11 cifuente + * op0F(byte pat[]) + * + * Revision 1.8 93/10/26 13:01:29 emmerik + * Completed the odd opcodes, like 0F XX and F7. Result: some library + * functions that were not recognised before are recognised now. + * + * Revision 1.7 93/10/11 11:37:01 cifuente + * First walk of HIGH_LEVEL icodes. + * + * Revision 1.6 93/10/01 14:36:21 emmerik + * Added $ log, and made independant of dcc.h + * + * + */ + +/* * * * * * * * * * * * *\ +* * +* Fix Wild Cards Code * +* * +\* * * * * * * * * * * * */ + +#include +#include +#ifndef PATLEN +#define PATLEN 23 +#define WILD 0xF4 +#endif + +static int pc; /* Indexes into pat[] */ + +/* prototypes */ +static bool ModRM(uint8_t pat[]); /* Handle the mod/rm byte */ +static bool TwoWild(uint8_t pat[]); /* Make the next 2 bytes wild */ +static bool FourWild(uint8_t pat[]); /* Make the next 4 bytes wild */ + void fixWildCards(uint8_t pat[]); /* Main routine */ + + +/* Handle the mod/rm case. Returns true if pattern exhausted */ +static bool ModRM(uint8_t pat[]) +{ + uint8_t op; + + /* A standard mod/rm byte follows opcode */ + op = pat[pc++]; /* The mod/rm byte */ + if (pc >= PATLEN) return true; /* Skip Mod/RM */ + switch (op & 0xC0) + { + case 0x00: /* [reg] or [nnnn] */ + if ((op & 0xC7) == 6) + { + /* Uses [nnnn] address mode */ + pat[pc++] = WILD; + if (pc >= PATLEN) return true; + pat[pc++] = WILD; + if (pc >= PATLEN) return true; + } + break; + case 0x40: /* [reg + nn] */ + if ((pc+=1) >= PATLEN) return true; + break; + case 0x80: /* [reg + nnnn] */ + /* Possibly just a long constant offset from a register, + but often will be an index from a variable */ + pat[pc++] = WILD; + if (pc >= PATLEN) return true; + pat[pc++] = WILD; + if (pc >= PATLEN) return true; + break; + case 0xC0: /* reg */ + break; + } + return false; +} + +/* Change the next two bytes to wild cards */ +static bool TwoWild(uint8_t pat[]) +{ + pat[pc++] = WILD; + if (pc >= PATLEN) return true; /* Pattern exhausted */ + pat[pc++] = WILD; + if (pc >= PATLEN) return true; + return false; +} + +/* Change the next four bytes to wild cards */ +static bool FourWild(uint8_t pat[]) +{ + TwoWild(pat); + return TwoWild(pat); +} + +/* Chop from the current point by wiping with zeroes. Can't rely on anything + after this point */ +static void chop(uint8_t pat[]) +{ + if (pc >= PATLEN) return; /* Could go negative otherwise */ + memset(&pat[pc], 0, PATLEN - pc); +} + +static bool op0F(uint8_t pat[]) +{ + /* The two byte opcodes */ + uint8_t op = pat[pc++]; + switch (op & 0xF0) + { + case 0x00: /* 00 - 0F */ + if (op >= 0x06) /* Clts, Invd, Wbinvd */ + return false; + else + { + /* Grp 6, Grp 7, LAR, LSL */ + return ModRM(pat); + } + case 0x20: /* Various funnies, all with Mod/RM */ + return ModRM(pat); + + case 0x80: + pc += 2; /* Word displacement cond jumps */ + return false; + + case 0x90: /* Byte set on condition */ + return ModRM(pat); + + case 0xA0: + switch (op) + { + case 0xA0: /* Push FS */ + case 0xA1: /* Pop FS */ + case 0xA8: /* Push GS */ + case 0xA9: /* Pop GS */ + return false; + + case 0xA3: /* Bt Ev,Gv */ + case 0xAB: /* Bts Ev,Gv */ + return ModRM(pat); + + case 0xA4: /* Shld EvGbIb */ + case 0xAC: /* Shrd EvGbIb */ + if (ModRM(pat)) return true; + pc++; /* The #num bits to shift */ + return false; + + case 0xA5: /* Shld EvGb CL */ + case 0xAD: /* Shrd EvGb CL */ + return ModRM(pat); + + default: /* CmpXchg, Imul */ + return ModRM(pat); + } + + case 0xB0: + if (op == 0xBA) + { + /* Grp 8: bt/bts/btr/btc Ev,#nn */ + if (ModRM(pat)) return true; + pc++; /* The #num bits to shift */ + return false; + } + return ModRM(pat); + + case 0xC0: + if (op <= 0xC1) + { + /* Xadd */ + return ModRM(pat); + } + /* Else BSWAP */ + return false; + + default: + return false; /* Treat as double byte opcodes */ + + } + +} + +/* Scan through the instructions in pat[], looking for opcodes that may + have operands that vary with different instances. For example, load and + store from statics, calls to other procs (even relative calls; they may + call procs loaded in a different order, etc). + Note that this procedure is architecture specific, and assumes the + processor is in 16 bit address mode (real mode). + PATLEN bytes are scanned. +*/ +void fixWildCards(uint8_t pat[]) +{ + + uint8_t op, quad, intArg; + + + pc=0; + while (pc < PATLEN) + { + op = pat[pc++]; + if (pc >= PATLEN) return; + + quad = op & 0xC0; /* Quadrant of the opcode map */ + if (quad == 0) + { + /* Arithmetic group 00-3F */ + + if ((op & 0xE7) == 0x26) /* First check for the odds */ + { + /* Segment prefix: treat as 1 byte opcode */ + continue; + } + if (op == 0x0F) /* 386 2 byte opcodes */ + { + if (op0F(pat)) return; + continue; + } + + if (op & 0x04) + { + /* All these are constant. Work out the instr length */ + if (op & 2) + { + /* Push, pop, other 1 byte opcodes */ + continue; + } + else + { + if (op & 1) + { + /* Word immediate operands */ + pc += 2; + continue; + } + else + { + /* Byte immediate operands */ + pc++; + continue; + } + } + } + else + { + /* All these have mod/rm bytes */ + if (ModRM(pat)) return; + continue; + } + } + else if (quad == 0x40) + { + if ((op & 0x60) == 0x40) + { + /* 0x40 - 0x5F -- these are inc, dec, push, pop of general + registers */ + continue; + } + else + { + /* 0x60 - 0x70 */ + if (op & 0x10) + { + /* 70-7F 2 byte jump opcodes */ + pc++; + continue; + } + else + { + /* Odds and sods */ + switch (op) + { + case 0x60: /* pusha */ + case 0x61: /* popa */ + case 0x64: /* overrides */ + case 0x65: + case 0x66: + case 0x67: + case 0x6C: /* insb DX */ + case 0x6E: /* outsb DX */ + continue; + + case 0x62: /* bound */ + pc += 4; + continue; + + case 0x63: /* arpl */ + if (TwoWild(pat)) return; + continue; + + case 0x68: /* Push byte */ + case 0x6A: /* Push byte */ + case 0x6D: /* insb port */ + case 0x6F: /* outsb port */ + /* 2 byte instr, no wilds */ + pc++; + continue; + + } + } + + } + } + else if (quad == 0x80) + { + switch (op & 0xF0) + { + case 0x80: /* 80 - 8F */ + /* All have a mod/rm byte */ + if (ModRM(pat)) return; + /* These also have immediate values */ + switch (op) + { + case 0x80: + case 0x83: + /* One byte immediate */ + pc++; + continue; + + case 0x81: + /* Immediate 16 bit values might be constant, but + also might be relocatable. Have to make them + wild */ + if (TwoWild(pat)) return; + continue; + } + continue; + case 0x90: /* 90 - 9F */ + if (op == 0x9A) + { + /* far call */ + if (FourWild(pat)) return; + continue; + } + /* All others are 1 byte opcodes */ + continue; + case 0xA0: /* A0 - AF */ + if ((op & 0x0C) == 0) + { + /* mov al/ax to/from [nnnn] */ + if (TwoWild(pat)) return; + continue; + } + else if ((op & 0xFE) == 0xA8) + { + /* test al,#byte or test ax,#word */ + if (op & 1) pc += 2; + else pc += 1; + continue; + + } + case 0xB0: /* B0 - BF */ + { + if (op & 8) + { + /* mov reg, #16 */ + /* Immediate 16 bit values might be constant, but also + might be relocatable. For now, make them wild */ + if (TwoWild(pat)) return; + } + else + { + /* mov reg, #8 */ + pc++; + } + continue; + } + } + } + else + { + /* In the last quadrant of the op code table */ + switch (op) + { + case 0xC0: /* 386: Rotate group 2 ModRM, byte, #byte */ + case 0xC1: /* 386: Rotate group 2 ModRM, word, #byte */ + if (ModRM(pat)) return; + /* Byte immediate value follows ModRM */ + pc++; + continue; + + case 0xC3: /* Return */ + case 0xCB: /* Return far */ + chop(pat); + return; + case 0xC2: /* Ret nnnn */ + case 0xCA: /* Retf nnnn */ + pc += 2; + chop(pat); + return; + + case 0xC4: /* les Gv, Mp */ + case 0xC5: /* lds Gv, Mp */ + if (ModRM(pat)) return; + continue; + + case 0xC6: /* Mov ModRM, #nn */ + if (ModRM(pat)) return; + /* Byte immediate value follows ModRM */ + pc++; + continue; + case 0xC7: /* Mov ModRM, #nnnn */ + if (ModRM(pat)) return; + /* Word immediate value follows ModRM */ + /* Immediate 16 bit values might be constant, but also + might be relocatable. For now, make them wild */ + if (TwoWild(pat)) return; + continue; + + case 0xC8: /* Enter Iw, Ib */ + pc += 3; /* Constant word, byte */ + continue; + case 0xC9: /* Leave */ + continue; + + case 0xCC: /* Int 3 */ + continue; + + case 0xCD: /* Int nn */ + intArg = pat[pc++]; + if ((intArg >= 0x34) && (intArg <= 0x3B)) + { + /* Borland/Microsoft FP emulations */ + if (ModRM(pat)) return; + } + continue; + + case 0xCE: /* Into */ + continue; + + case 0xCF: /* Iret */ + continue; + + case 0xD0: /* Group 2 rotate, byte, 1 bit */ + case 0xD1: /* Group 2 rotate, word, 1 bit */ + case 0xD2: /* Group 2 rotate, byte, CL bits */ + case 0xD3: /* Group 2 rotate, word, CL bits */ + if (ModRM(pat)) return; + continue; + + case 0xD4: /* Aam */ + case 0xD5: /* Aad */ + case 0xD7: /* Xlat */ + continue; + + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: /* Esc opcodes */ + case 0xDC: /* i.e. floating point */ + case 0xDD: /* coprocessor calls */ + case 0xDE: + case 0xDF: + if (ModRM(pat)) return; + continue; + + case 0xE0: /* Loopne */ + case 0xE1: /* Loope */ + case 0xE2: /* Loop */ + case 0xE3: /* Jcxz */ + pc++; /* Short jump offset */ + continue; + + case 0xE4: /* in al,nn */ + case 0xE6: /* out nn,al */ + pc++; + continue; + + case 0xE5: /* in ax,nn */ + case 0xE7: /* in nn,ax */ + pc += 2; + continue; + + case 0xE8: /* Call rel */ + if (TwoWild(pat)) return; + continue; + case 0xE9: /* Jump rel, unconditional */ + if (TwoWild(pat)) return; + chop(pat); + return; + case 0xEA: /* Jump abs */ + if (FourWild(pat)) return; + chop(pat); + return; + case 0xEB: /* Jmp short unconditional */ + pc++; + chop(pat); + return; + + case 0xEC: /* In al,dx */ + case 0xED: /* In ax,dx */ + case 0xEE: /* Out dx,al */ + case 0xEF: /* Out dx,ax */ + continue; + + case 0xF0: /* Lock */ + case 0xF2: /* Repne */ + case 0xF3: /* Rep/repe */ + case 0xF4: /* Halt */ + case 0xF5: /* Cmc */ + case 0xF8: /* Clc */ + case 0xF9: /* Stc */ + case 0xFA: /* Cli */ + case 0xFB: /* Sti */ + case 0xFC: /* Cld */ + case 0xFD: /* Std */ + continue; + + case 0xF6: /* Group 3 byte test/not/mul/div */ + case 0xF7: /* Group 3 word test/not/mul/div */ + case 0xFE: /* Inc/Dec group 4 */ + if (ModRM(pat)) return; + continue; + + case 0xFF: /* Group 5 Inc/Dec/Call/Jmp/Push */ + /* Most are like standard ModRM */ + if (ModRM(pat)) return; + continue; + + default: /* Rest are single byte opcodes */ + continue; + } + } + } +} + diff --git a/tools/makedsig/makedsig.cpp b/tools/makedsig/makedsig.cpp new file mode 100644 index 0000000..0465dee --- /dev/null +++ b/tools/makedsig/makedsig.cpp @@ -0,0 +1,175 @@ +/* Program for making the DCC signature file */ + +#include "LIB_PatternCollector.h" +#include "TPL_PatternCollector.h" +#include "perfhlib.h" /* Symbol table prototypes */ + +#include +#include + +#include +#include +#include +#include +#include +#include + +/* Symbol table constnts */ +#define C 2.2 /* Sparseness of graph. See Czech, Havas and Majewski for details */ + +/* prototypes */ + +void saveFile(FILE *fl, const PerfectHash &p_hash, PatternCollector *coll); /* Save the info */ + +int numKeys; /* Number of useful codeview symbols */ + + +static void printUsage(bool longusage) { + if(longusage) + printf( + "This program is to make 'signatures' of known c and tpl library calls for the dcc program.\n" + "It needs as the first arg the name of a library file, and as the second arg, the name " + "of the signature file to be generated.\n" + "Example: makedsig CL.LIB dccb3l.sig\n" + " or makedsig turbo.tpl dcct4p.sig\n" + ); + else + printf("Usage: makedsig \n" + "or makedsig -h for help\n"); +} +int main(int argc, char *argv[]) +{ + QCoreApplication app(argc,argv); + FILE *f2; // output file + FILE *srcfile; // .lib file + int s; + if(app.arguments().size()<2) { + printUsage(false); + return 0; + } + QString arg2 = app.arguments()[1]; + if (arg2.startsWith("-h") || arg2.startsWith("-?")) + { + printUsage(true); + return 0; + } + PatternCollector *collector; + if(arg2.endsWith("tpl")) { + collector = new TPL_PatternCollector; + } else if(arg2.endsWith(".lib")) { + collector = new LIB_PatternCollector; + } + if ((srcfile = fopen(argv[1], "rb")) == NULL) + { + printf("Cannot read %s\n", argv[1]); + exit(2); + } + + if ((f2 = fopen(argv[2], "wb")) == NULL) + { + printf("Cannot write %s\n", argv[2]); + exit(2); + } + + fprintf(stderr, "Seed: "); + scanf("%d", &s); + srand(s); + + PerfectHash p_hash; + numKeys = collector->readSyms(srcfile); /* Read the keys (symbols) */ + + printf("Num keys: %d; vertices: %d\n", numKeys, (int)(numKeys*C)); + /* Set the parameters for the hash table */ + p_hash.setHashParams( numKeys, /* The number of symbols */ + PATLEN, /* The length of the pattern to be hashed */ + 256, /* The character set of the pattern (0-FF) */ + 0, /* Minimum pattern character value */ + numKeys*C); /* C is the sparseness of the graph. See Czech, + Havas and Majewski for details */ + + /* The following two functions are in perfhlib.c */ + p_hash.map(collector); /* Perform the mapping. This will call getKey() repeatedly */ + p_hash.assign(); /* Generate the function g */ + + saveFile(f2,p_hash,collector); /* Save the resultant information */ + + fclose(srcfile); + fclose(f2); + +} + +/* * * * * * * * * * * * *\ +* * +* S a v e t h e s i g f i l e * +* * +\* * * * * * * * * * * * */ + + +void writeFile(FILE *fl,const char *buffer, int len) +{ + if ((int)fwrite(buffer, 1, len, fl) != len) + { + printf("Could not write to file\n"); + exit(1); + } +} + +void writeFileShort(FILE *fl,uint16_t w) +{ + uint8_t b; + + b = (uint8_t)(w & 0xFF); + writeFile(fl,(char *)&b, 1); /* Write a short little endian */ + b = (uint8_t)(w>>8); + writeFile(fl,(char *)&b, 1); +} + +void saveFile(FILE *fl, const PerfectHash &p_hash, PatternCollector *coll) +{ + int i, len; + const uint16_t *pTable; + + writeFile(fl,"dccs", 4); /* Signature */ + writeFileShort(fl,numKeys); /* Number of keys */ + writeFileShort(fl,(short)(numKeys * C)); /* Number of vertices */ + writeFileShort(fl,PATLEN); /* Length of key part of entries */ + writeFileShort(fl,SYMLEN); /* Length of symbol part of entries */ + + /* Write out the tables T1 and T2, with their sig and byte lengths in front */ + writeFile(fl,"T1", 2); /* "Signature" */ + pTable = p_hash.readT1(); + len = PATLEN * 256; + writeFileShort(fl,len * sizeof(uint16_t)); + for (i=0; i < len; i++) + { + writeFileShort(fl,pTable[i]); + } + writeFile(fl,"T2", 2); + pTable = p_hash.readT2(); + writeFileShort(fl,len * sizeof(uint16_t)); + for (i=0; i < len; i++) + { + writeFileShort(fl,pTable[i]); + } + + /* Write out g[] */ + writeFile(fl,"gg", 2); /* "Signature" */ + pTable = p_hash.readG(); + len = (short)(numKeys * C); + writeFileShort(fl,len * sizeof(uint16_t)); + for (i=0; i < len; i++) + { + writeFileShort(fl,pTable[i]); + } + + /* Now the hash table itself */ + writeFile(fl,"ht ", 2); /* "Signature" */ + writeFileShort(fl,numKeys * (SYMLEN + PATLEN + sizeof(uint16_t))); /* byte len */ + for (i=0; i < numKeys; i++) + { + writeFile(fl,(char *)&coll->keys[i], SYMLEN + PATLEN); + } +} + + + diff --git a/tools/makedsig/makedsig.txt b/tools/makedsig/makedsig.txt new file mode 100644 index 0000000..8c0ee3c --- /dev/null +++ b/tools/makedsig/makedsig.txt @@ -0,0 +1,188 @@ + MAKEDSIG + +1 What is MakeDsig? + +2 How does it work? + +3 How do I use MakeDsig? + +4 What's in a signature file? + +5 What other tools are useful for signature work? + + +1 What is MakeDsig? +------------------- + +MakeDsig is a program that reads a library (.lib) file from a +compiler, and generates a signature file for use by DCC. Without +signature files, dcc cannot recognise library functions, and so will +attempt to decompile them, and cannot name them. This makes the +resultant decompiled code bulkier and difficult to understand. + + +2 How does it work? +------------------- + +Library files contain complete functions, relocation information, +function names, and more. MakeDsig reads a library file, and for each +function found, it saves the name, and creates a signature. These +are stored in an array. When all functions are done, tables for the +perfect hashing function are generated. During this process, +duplicate keys (functions that produce identical signatures) may be +detected; if so, one of the keys will be zeroed. + +The signature file contains information needed by dcc to hash the +signatures, as well as the symbols and signatures. Dcc reads the various +sections of the signature file to be able to hash signatures. The +signatures, not the symbols, are hashed, since dcc gets a signature +from the executable file, and needs to know quickly if there is a +symbolic name for it. + +3 How do I use MakeDsig? +------------------------ + +You can always find out by just executing it with no arguments, or +MakeDsig -h for more details. + +Basically, you just give it the names of the files that it needs: +MakeDsig + +It will ask you for a seed; enter any number, e.g. 1. + +You need the library file for the appropriate compiler. For example, +to analyse executable programs created from Turbo C 2.1 small model, +you need the cs.lib file that comes with that compiler. + +You also need to know the correct name for the signature file, i.e. +. Dcc will detect certain compiler vendors and version +numbers, and will look for a signature file named like this: +d c c . s i g + +Here are the current vendors: +Vendor Vendor letter +Microsoft C/C++ m +Borland C/C++ b +Logitech (Modula) l +Turbo Pascal t + +Here are the model codes: +small/tiny s +medium m +compact c +large l +Turbo Pascal p + +The version codes are fairly self explanatory: +Microsoft C 5.1 5 +Microsoft C 8 8 +Borland C 2.0 2 +Borland C 3.0 3 +Turbo Pascal 3.0 3 Note: currently no way to make dcct3p.sig +Turbo Pascal 4.0 4 Use Makedstp, not makedsig +Turbo Pascal 5.0 5 Use Makedstp, not makedsig + +Some examples: the signature file for Borland C version 2.0, small +model, would be dccb2s.sig. To generate it, you would supply as the +library file cs.lib that came with that compiler. Suppose it was in +the \bc\lib directory. To generate the signature file required to +work with files produced by this compiler, you would type + +makedsig \bc\lib\cs.lib dccb2s.sig + +This will create dccb2s.sig in the current directory. For dcc to use +this file, place it in the same directory as dcc itself, or point the +environment variable DCC to the directory containing it. + +Another example: to make the signature file for Microsoft Visual +C/C++ (C 8.0), large model, and assuming the libraries are in +the directory \msvc\lib, you would type + +makedsig \msvc\lib\llibce.lib dccm8l.sig + +Note that the signature files for Turbo Pascal from version 4 onwards +are generated by makedstp, not makedsig. The latter program reads a +special file called turbo.tpl, as there are no normal .lib files for +turbo pascal. Dcc will recognise turbo pascal 3.0 files, and look +for dcct3p.sig. Because all the library routines are contained in +every Turbo Pascal executable, there are no library files or even a +turbo.tpl file, so the signature file would have to be constructed by +guesswork. You can still use dcc on these files; just ignore the +warning about not finding the signature file. + +For executables that dcc does not recognise, it will look for the +signature file dccxxx.sig. That way, if you have a new compiler, you +can at least have dcc detect library calls, even if it attempts to +decompile them all, and has not identified the main program. + +Logitech Modula V1.0 files are recognised, and the signature file +dccl1x.sig is looked for. This was experimental in nature, and is not +recommended for serious analysis at this stage. + + + +4 What's in a signature file? +----------------------------- + +The details of a signature file are best documented in the source for +makedsig; see the function saveFile(). Briefly: +1) a 4 byte pattern identifying the file as a signature file: "dccs". +2) a two byte integer containing the number of keys (signatures) +3) a two byte integer containing the number of vertices on the graph + used to generate the hash table. See the source code and/or the + Czech, Havas and Majewski articles for details +4) a two byte integer containing the pattern length +5) a two byte integer containing the symbolic name length + +The next sections all have the following structure: +1) 2 char ID +2) a two byte integer containing the size of the body +3) the body. + +There are 4 sections: "T1", "T2", "gg", and "ht". T1 and T2 are the +tables associated with the hash function. (The hash function is a +random function, meaning that it involves tables. T1 and T2 are the +tables used by the hash function). "gg" is another table associated +with the graph needed by the perfect hashing function algorithm. + +"ht" contains the actual hash table. The body of this section is an +array of records of this structure: +typedef struct _hashEntry +{ + char name[SYMLEN]; /* The symbol name */ + byte pat [PATLEN]; /* The pattern */ + word offset; /* Offset (needed temporarily) */ +} HASHENTRY; + +This part of the signature file can be browsed with a binary dump +program; a PATLEN length signature will follow the (null padded) +symbol name. There are tools for searching signature files, e.g. +srchsig, dispsig, and readsig. See below. + + + +5 What other tools are useful for signature work? +------------------------------------------------- + +Makedstp - makes signature files from turbo.tpl. Needed to make +signature files for Turbo Pascal version 4.0 and later. + +SrchSig - tells you whether a given pattern exists in a signature +file, and gives its name. You need a binary file with the signature +in it, exactly the right length. This can most easily be done with +debug (comes with MS-DOS). + +DispSig - given the name of a function, displays its signature, and +stores the signature into a binary file as well. (You can use this +file with srchsig on another signature file, if you want). + +ReadSig - reads a signature file, checking for correct structure, and +displaying duplicate signatures. With the -a switch, it will display +all signatures, with their symbols. + +The file perfhlib.c is used by various of these tools to do the work +of the perfect hashing functions. It could be used as part of other +tools that use signature files, or just perfect hashing functions for +that matter. + + diff --git a/tools/parsehdr/CMakeLists.txt b/tools/parsehdr/CMakeLists.txt new file mode 100644 index 0000000..e69de29 diff --git a/tools/parsehdr/locident.h b/tools/parsehdr/locident.h new file mode 100644 index 0000000..a57a86a --- /dev/null +++ b/tools/parsehdr/locident.h @@ -0,0 +1,117 @@ +/*$Log: locident.h,v $ + * Revision 1.6 94/02/22 15:20:23 cifuente + * Code generation is done. + * + * Revision 1.5 93/12/10 09:38:20 cifuente + * New high-level types + * + * Revision 1.4 93/11/10 17:30:51 cifuente + * Procedure header, locals + * + * Revision 1.3 93/11/08 12:06:35 cifuente + * du1 analysis finished. Instantiates procedure arguments for user + * declared procedures. + * + * Revision 1.2 93/10/25 11:01:00 cifuente + * New SYNTHETIC instructions for d/u analysis + * + * Revision 1.1 93/10/11 11:47:39 cifuente + * Initial revision + * + * File: locIdent.h + * Purpose: High-level local identifier definitions + * Date: October 1993 + */ + + +/* Type definition */ +typedef struct { + Int csym; /* # symbols used */ + Int alloc; /* # symbols allocated */ + Int *idx; /* Array of integer indexes */ +} IDX_ARRAY; + +/* Type definitions used in the decompiled program */ +typedef enum { + TYPE_UNKNOWN = 0, /* unknown so far */ + TYPE_BYTE_SIGN, /* signed byte (8 bits) */ + TYPE_BYTE_UNSIGN, /* unsigned byte */ + TYPE_WORD_SIGN, /* signed word (16 bits) */ + TYPE_WORD_UNSIGN, /* unsigned word (16 bits) */ + TYPE_LONG_SIGN, /* signed long (32 bits) */ + TYPE_LONG_UNSIGN, /* unsigned long (32 bits) */ + TYPE_RECORD, /* record structure */ + TYPE_PTR, /* pointer (32 bit ptr) */ + TYPE_STR, /* string */ + TYPE_CONST, /* constant (any type) */ + TYPE_FLOAT, /* floating point */ + TYPE_DOUBLE, /* double precision float */ +} hlType; + +static char *hlTypes[13] = {"", "char", "unsigned char", "int", "unsigned int", + "long", "unsigned long", "record", "int *", "char *", + "", "float", "double"}; + +typedef enum { + STK_FRAME, /* For stack vars */ + REG_FRAME, /* For register variables */ + GLB_FRAME, /* For globals */ +} frameType; + + +/* Enumeration to determine whether pIcode points to the high or low part + * of a long number */ +typedef enum { + HIGH_FIRST, /* High value is first */ + LOW_FIRST, /* Low value is first */ +} hlFirst; + + +/* LOCAL_ID */ +typedef struct { + hlType type; /* Probable type */ + boolT illegal;/* Boolean: not a valid field any more */ + IDX_ARRAY idx; /* Index into icode array (REG_FRAME only) */ + frameType loc; /* Frame location */ + boolT hasMacro;/* Identifier requires a macro */ + char macro[10];/* Macro for this identifier */ + char name[20];/* Identifier's name */ + union { /* Different types of identifiers */ + byte regi; /* For TYPE_BYTE(WORD)_(UN)SIGN registers */ + struct { /* For TYPE_BYTE(WORD)_(UN)SIGN on the stack */ + byte regOff; /* register offset (if any) */ + Int off; /* offset from BP */ + } bwId; + struct _bwGlb { /* For TYPE_BYTE(WORD)_(UN)SIGN globals */ + int16 seg; /* segment value */ + int16 off; /* offset */ + byte regi; /* optional indexed register */ + } bwGlb; + struct _longId{ /* For TYPE_LONG_(UN)SIGN registers */ + byte h; /* high register */ + byte l; /* low register */ + } longId; + struct _longStkId { /* For TYPE_LONG_(UN)SIGN on the stack */ + Int offH; /* high offset from BP */ + Int offL; /* low offset from BP */ + } longStkId; + struct { /* For TYPE_LONG_(UN)SIGN globals */ + int16 seg; /* segment value */ + int16 offH; /* offset high */ + int16 offL; /* offset low */ + byte regi; /* optional indexed register */ + } longGlb; + struct { /* For TYPE_LONG_(UN)SIGN constants */ + dword h; /* high word */ + dword l; /* low word */ + } longKte; + } id; +} ID; + +typedef struct { + Int csym; /* No. of symbols in the table */ + Int alloc; /* No. of symbols allocated */ + ID *id; /* Identifier */ +} LOCAL_ID; + + diff --git a/tools/parsehdr/parsehdr.cpp b/tools/parsehdr/parsehdr.cpp new file mode 100644 index 0000000..f2a35c7 --- /dev/null +++ b/tools/parsehdr/parsehdr.cpp @@ -0,0 +1,1538 @@ +/* + *$Log: parsehdr.c,v $ + */ +/* Code to parse a header (.h) file */ +/* Descended from xansi; thanks Geoff! thanks Glenn! */ + +#include +#include +#include +#include +#include /* For debugging */ +#include "parsehdr.h" + +dword userval; + +/* the IGNORE byte */ +byte slosh; +byte last_slosh; +byte quote1; +byte quote2; +byte comment; +byte hash; +byte ignore1; /* Special: ignore egactly 1 char */ +byte double_slash; +byte spare; + +int buff_idx; +char buffer[BUFF_SIZE]; + +byte start; /* Started recording to the buffer */ +byte func; /* Function header detected */ +byte hash_ext; +int curly; /* Level inside curly brackets */ +int xtern; /* Level inside a extern "C" {} situation */ +int round; /* Level inside () */ +int line,col; +dword chars; +char lastch; + +#define NIL -1 /* Used as an illegal index */ + +FILE *datFile; /* Stream of the data (output) file */ +char fileName[81]; /* Name of current header file */ + +PH_FUNC_STRUCT *pFunc; /* Pointer to the functions array */ +int numFunc; /* How many elements saved so far */ +int allocFunc; /* How many elements allocated so far */ +int headFunc; /* Head of the function name linked list */ + +PH_ARG_STRUCT *pArg; /* Pointer to the arguements array */ +int numArg; /* How many elements saved so far */ +int allocArg; /* How many elements allocated so far */ +int headArg; /* Head of the arguements linked list */ + + +// DO Callback +boolT +phDoCB(int id, char *data) +{ +/* return callback(hDCX, id, data, userval);*/ + return TRUE; +} + + +void +phError(char *errmsg) +{ + char msg[200]; + + sprintf(msg,"PH *ERROR*\nFile: %s L=%d C=%d O=%lu\n%s", fileName, line, col, + chars, errmsg); + printf(msg); +} + + +void +phWarning(char *errmsg) +{ + char msg[200]; + + sprintf(msg,"PH -warning-\nFile: %s L=%d C=%d O=%lu\n%s\n", fileName, line, + col, chars, errmsg); + printf(msg); +} + + + + +int +IsIgnore() +{ + return( comment || + quote1 || + quote2 || + slosh || + hash || + ignore1 || + double_slash + ); +} + +boolT +isAlphaNum(char ch) +{ + return(((ch >= 'A') && (ch <= 'Z')) || + ((ch >= 'a') && (ch <= 'z')) || + ((ch >= '0') && (ch <= '9')) || + (ch == '_') + ); +} + + +boolT +AddToBuffer(char ch) +{ + if (buff_idx >= BUFF_SIZE) + { + ERR("function buffer overflow (function unterminated?)\n"); + return FALSE; + } + else + { + buffer[buff_idx++] = ch; + return TRUE; + } +} + +boolT +remFromBuffer() +{ + if (buff_idx == 0) + { + ERR("function buffer underflow (internal error?)\n"); + return FALSE; + } + else + { + buff_idx--; + return TRUE; + } +} + + + + + +/*----------------------------------------------*/ +/* This is a function declaration, typedef, etc.* +/* Do something with it. */ +/*----------------------------------------------*/ + +void +ProcessBuffer(int id) +{ + if (buff_idx > 0) + { + buffer[buff_idx] = '\0'; + + // CALL CALL BACK FUNTION WITH APPRORIATE CODE! + + switch (id) + { + case PH_FUNCTION: + // eek, but... + case PH_PROTO: + // sort out into params etc + phBuffToFunc(buffer); + break; + + case PH_TYPEDEF: + case PH_DEFINE: + // sort out into defs + phBuffToDef(buffer); + break; + + case PH_MPROTO: + // eek! + + case PH_VAR: + // eek! + + case PH_JUNK: + default: + phDoCB(id, buffer); + break; + } + start = FALSE; + func = FALSE; + buff_idx = 0; + } +} + + +void +phInit(char *filename) // filename is for reference only!!! +{ + slosh = + last_slosh = + start = + func = + comment = + double_slash = + hash = + ignore1 = + quote1 = + quote2 = + hash_ext = + FALSE; + + buff_idx = + curly = + xtern = + col = + round = 0; + + line = 1; + + chars = 0L; + + lastch = 0; + +} + + + +boolT +phFree(void) +{ + // remove atoms etc, free buffers + return TRUE; +} + + + + + + +void +phChar(char ch) +{ + col++; + chars++; +/* printf("%d%% done\r", chars*100/filelen); /**/ + + + if (slosh != last_slosh) + { + DBG("[SLOSH OFF]"); + } + + switch(ch) + { + case ',': + if (!IsIgnore() && (curly == xtern) && (start) && (func)) + /* must be multi proto */ + { + if (lastch == ')') /* eg int foo(), bar(); */ + { + ProcessBuffer(PH_MPROTO); + DBG("[END OF MULTIPROTOTYPE]") + } + } + break; + + case ';': + if (!IsIgnore() && (curly == xtern) && (start)) + { + if (func) + { + if (lastch == ')') + { + ProcessBuffer(PH_PROTO); + DBG("[END OF PROTOTYPE]") + } + } + else + { + ProcessBuffer(PH_VAR); + DBG("[END OF VARIABLE]") + } + } + break; + + case 10: /* end of line */ + line++; +/* chars++; */ /* must have been a CR before it methinks */ + col = 0; + if (double_slash) + { + double_slash = FALSE; + DBG("[DOUBLE_SLASH_COMMENT OFF]") + } + else if (hash) + { + if (hash_ext) + { + hash_ext = FALSE; + } + else + { + hash = FALSE; + DBG("[HASH OFF]") + } + } + if (xtern && (strncmp(buffer, "extern", 6) == 0)) + { + start = FALSE; /* Not the start of anything */ + buff_idx = 0; /* Kill the buffer */ + } + break; + + case '#': /* start of # something at beginning of line */ + if ((!IsIgnore()) && (curly == xtern)) + { + hash = TRUE; + DBG("[HASH ON]") + } + break; + + case '{': + if (!IsIgnore()) + { + char st[80]; + + if ((curly == xtern) && (start) && (func)) + { + ProcessBuffer(PH_FUNCTION); + DBG("[FUNCTION DECLARED]") + } + + curly++; + sprintf(st, "[CURLY++ %d]", curly); + DBG(st) + } + break; + + case '}': + if (!IsIgnore()) + { + char st[80]; + + if (curly > 0) + { + if (xtern && (xtern == curly)) + { + xtern = 0; + DBG("[EXTERN OFF]"); + } + curly--; + sprintf(st,"[CURLY-- %d]", curly); + DBG(st) + } + else + { + /* match the {s */ + ERR("too many \"}\"\n"); + } + } + break; + + case '(': + if (!IsIgnore()) + { + char st[80]; + + if ((curly == xtern) && (round == 0) && (start)) + { + func = TRUE; + DBG("[FUNCTION]") + } + round++; + sprintf(st,"[ROUND++ %d]", round); + DBG(st) + } + break; + + case ')': + if (!IsIgnore()) + { + char st[80]; + + if (round > 0) + { + round--; + sprintf(st,"[ROUND-- %d]",round); + DBG(st) + } + else + { + ERR("too many \")\"\n"); + } + } + break; + + case '\\': + if (!slosh && (quote1 || quote2)) + { + last_slosh = TRUE; + DBG("[SLOSH ON]") + } + else + if (hash) + { + hash_ext = TRUE; + } + break; + + case '*': + if (lastch == '/') /* allow nested comments ! */ + { + char st[80]; + + comment++; + + if (start) + { + remFromBuffer(); + } + + sprintf(st, "[COMMENT++ %d]",comment); + DBG(st) + + + } + break; + + case '/': + if ((lastch == '*') && (!quote1) && (!quote2) ) + { + if (comment > 0) + { + char st[80]; + + comment--; + + /* Don't want the closing slash in the buffer */ + ignore1 = TRUE; + + sprintf(st, "[COMMENT-- %d]",comment); + DBG(st) + } + else + { + ERR("too many \"*/\"\n"); + } + } + else + if (lastch == '/') + { + /* Double slash to end of line is a comment. */ + double_slash = TRUE; + + if (start) + { + remFromBuffer(); + } + + DBG("[DOUBLE_SLASH_COMMENT ON]") + } + break; + + case '\"': + if ((!comment) && (!quote1) && (!slosh)) + { + quote2 = (byte) (!quote2); + if (quote2) DBG("[QUOTE2ON]") + if (!quote2) DBG("[QUOTE2OFF]") + + /* We want to catch the extern "C" {} thing... */ + if (!quote2 && start && (lastch == 'C')) + { + if (strcmp(buffer, "extern ") == 0) + { + char st[80]; + + xtern = curly+1; /* The level inside the extern {} */ + sprintf(st, "[EXTERN ON %d]", xtern); + DBG(st) + } + } + } + break; + + case '\'': + if ((!comment) && (!quote2) && (!slosh)) + { + { + quote1 = (byte) (!quote1); + if (quote1) DBG("[QUOTE1ON]") + if (!quote1) DBG("[QUOTE1OFF]") + } + } + break; + + case '\t': + ch = ' '; + break; + + + default: + if ((ch != -1) && !IsIgnore() && (curly == xtern) && (!start) && + (ch != ' ')) + { + start = TRUE; + DBG("[START OF SOMETHING]") + } + break; + + + } + + if (ch != -1) + { + if (start && !IsIgnore()) + { + AddToBuffer(ch); + } + } + + lastch = ch; + slosh = last_slosh; + last_slosh = 0; + ignore1 = FALSE; + +} /* of phChar */ + + +/* Take a lump of data from a header file, and churn the state machine + through each char */ +boolT +phData(char *buff, int ndata) +{ + int i, j; +#ifdef DEBUG + char cLine[81]; + char cfLine[90]; +#endif + + if (ndata < 1) + { + ndata = strlen(buff); + } + j = 0; + + for (i=0; i < ndata; i++) + { + phChar(buff[i]); +#ifdef DEBUG + if (j < 80) cLine[j++] = buff[i]; + if (buff[i] == '\n') + { + cLine[j] = '\0'; + sprintf(cfLine, "\n***%03d %s\n", line, cLine); + DBG(cfLine); + j = 0; + } +#endif + } + + return TRUE; +} + + +boolT +phPost(void) +{ + boolT err=TRUE; + char msg[80]; + + if (quote1) + { + WARN("EOF: \' not closed"); + err = FALSE; + } + + if (quote2) + { + WARN("EOF: \" not closed"); + err = FALSE; + } + + if (comment) + { + WARN("EOF: comment not closed"); + err = FALSE; + } + + if (slosh) + { + WARN("EOF: internal slosh set error"); + err = FALSE; + } + + if (curly > 0) + { + sprintf(msg,"EOF: { level = %d", curly); + WARN(msg); + err = FALSE; + } + + if (round > 0) + { + sprintf(msg,"EOF: ( level = %d", round); + WARN(msg); + err = FALSE; + } + + if (hash) + { + WARN("warning hash is set on last line ???"); + err = FALSE; + } + + return err; +} + + + +#if 0 +enum FUNC_E { NORM , LEFT , RIGHT, NEXT }; + +void +NamesToLabel(LPPH_FUNC lpF) +{ +int i, j; + + for (i=0; i < lpF->num_params; i++) + { + while (isspace(types[i][0])) + { + lstrcdel(types[i], 0); + } + + j = 0; + while(names[i][j] != '\0') + { + if ((names[i][j] != '*') && + (names[i][j] != '[') && + (names[i][j] != ']')) + { + lstrccat(label[i], names[i][j]); + } + j++ ; + } + } +} + +boolT +MoveLastWord(char *src, char *dest) /* take arg name from type */ +{ +int i, l; +LPSTR s2; + + if (s2 = strchr(src, '*')) /* if there is a * use it as starting point */ + { + lstrcpy(dest, s2); + *s2 = '\0'; /* terminate */ + } + else + { + l = strlen(src); + i = l-1; + while ((i > 0) && ((isspace(src[i])) || (src[i] == '[') || (src[i] == ']'))) + /* find last non space or non [] */ + { + if (isspace(src[i])) + { + lstrcdel(src, i); + } /* remove trailing spaces */ + i--; + } + while ((i > 0) && (!isspace(src[i]))) /* find the previous space */ + { + i--; + } + if (i) + { + i++; + lstrcpy(dest, &src[i]); + src[i] = '\0'; /* terminate */ + } + else + { + /* no type !!! */ + +// if ((mode == TOANSI) || +// ((mode == FROMANSI) && (strstr(src, "...") == NULL)) +// ) + + if (strstr(src, "...") == NULL) // not a var arg perhaps? + { + char msg[80]; + +// sprintf(msg,"no type for arg # %d, \"%s\"\n",lpF->num_params, (LPSTR) src); +// ERR(msg); + return FALSE; + } + else // d'oh! + { + WARN("looks like a vararg to me!"); + /* maybe it is a ... !!!!!! */ + } + } + } + + + i = strlen(src) - 1; + + while ((isspace(src[i])) && (i >= 0)) + { + src[i--] = '\0'; + } + + i = 0; + while (dest[i] != '\0') + { + if (isspace(dest[i])) + { + lstrcdel(dest, i); + } + i++ ; + } + + while ((isspace(src[0])) && (src[0] != '\0')) + { + lstrcdel(src, 0); + } + + return TRUE; +} + + + + +int +Lookup(char *aname) /* lookup var name in labels and return arg number */ +{ +int i, p; +char tname[NAMES_L]; +int bstate = FALSE; + +/* eg: lookup *fred[] for match to fred */ + + tname[0] = '\0'; + p = -1; /* default return for no match */ + + /* tname is aname without puncs etc */ + i = 0; + while(aname[i] != '\0') + { + if (aname[i] == '[') + { + bstate = TRUE; + } + else + if (aname[i] == ']') + { + bstate = FALSE; + } + + if ((isalnum(aname[i]) || aname[i] == '-' || aname[i] == '_') && (!bstate)) + { + lstrccat(tname, aname[i]); + } + i++; + } + + /* lookup tname in the labels and find out which arg it is */ + for (i=0; i < num_params; i++) + { + if (lstrcmp(tname, label[i]) == 0) + { + p = i; /* this one ! */ + break; + } + } + return p; +} + + +/* put the name and type at correct arg number */ + + +boolT +Plop(char *atype, char *aname, int FAR *num_params, int FAR *num_found) +{ +char msg[80]; +int t; + + if (num_found >= num_params) + { + sprintf(msg,"extra argument \"%s\" in func \"%s\"\n", (LPSTR) aname, (LPSTR) func_name); + ERR(msg); + return FALSE; + } + + t = Lookup(aname); /* arg number */ + + if (t == -1) /* couldn't find it */ + { + sprintf(msg,"bad argument \"%s\" in func \"%s\"\n", (LPSTR) aname, (LPSTR) func_name); + ERR(msg); + return FALSE; + } + + if ((strlen(types[t]) > 0) || (strlen(names[t]) > 0)) /* in use ? */ + { + + sprintf(msg,"argument \"%s\" already used in \"%s\"\n", (LPSTR) aname, (LPSTR) func_name); + ERR(msg); + return FALSE; + } + + lstrcpy(types[t], atype); + lstrcpy(names[t], aname); + + num_found++; /* got another! */ + + return TRUE; +} + +#define IGN ((*chp == '(') || (*chp == ')') || (*chp == ',') || \ + (*chp == ';') || (isspace(*chp))) + +#define IGNP ((*chp == '(') || (*chp == ')') || (*chp == ',') || \ + (*chp == ';') || (*chp == '\n')) + +#endif + + + + + + + +char token[40]; /* Strings that might be types, nodifiers or idents */ +char ident[40]; /* Names of functions or protos go here */ +char lastChar; +char *p; +int indirect; +boolT isLong, isShort, isUnsigned; +int lastTokPos; /* For "^" in error messages */ +char *buffP; +int tok; /* Current token */ +baseType bt; /* Type of current param (or return type) */ +int argNum; /* Arg number (in case no name: arg1, arg2...) */ + + +void +initType(void) +{ + indirect = 0; + isLong = isShort = isUnsigned = FALSE; + bt = BT_INT; +} + +void +errorParse(char *msg) +{ + printf("%s: got ", msg); + if (tok == TOK_NAME) printf("<%s>", token); + else if (tok == TOK_DOTS) printf("..."); + else printf("%c (%X)", tok, tok); + printf("\n%s\n", buffP); + printf("%*c\n", lastTokPos+1, '^'); +} + + +/* Get a token from pointer p */ +int +getToken(void) +{ + char ch; + + memset(token, 0, sizeof(token)); + while (*p && ((*p == ' ') || (*p == '\n'))) p++; + lastTokPos = p - buffP; /* For error messages */ + if (lastChar) + { + ch = lastChar; + lastChar = '\0'; + return ch; + } + + while (ch = *p++) + { + switch (ch) + { + case '*': + case '[': + case ']': + case '(': + case ')': + case ',': + case ';': + case ' ': + case '\n': + if (strlen(token)) + { + if ((ch != ' ') && (ch != '\n')) lastChar = ch; + return TOK_NAME; + } + else if ((ch == ' ') || (ch == '\n')) break; + else return ch; + + case '.': + if ((*p == '.') && (p[1] == '.')) + { + p += 2; + return TOK_DOTS; + } + + + default: + token[strlen(token)] = ch; + } + } + return TOK_EOL; +} + +boolT +isBaseType(void) +{ + if (tok != TOK_NAME) return FALSE; + + if (strcmp(token, "int") == 0) + { + bt = BT_INT; + } + else if (strcmp(token, "char") == 0) + { + bt = BT_CHAR; + } + else if (strcmp(token, "void") == 0) + { + bt = BT_VOID; + } + else if (strcmp(token, "float") == 0) + { + bt = BT_FLOAT; + } + else if (strcmp(token, "double") == 0) + { + bt = BT_DOUBLE; + } + else if (strcmp(token, "struct") == 0) + { + bt = BT_STRUCT; + tok = getToken(); /* The name of the struct */ + /* Do something with the struct name */ + } + else if (strcmp(token, "union") == 0) + { + bt = BT_STRUCT; /* Well its still a struct */ + tok = getToken(); /* The name of the union */ + /* Do something with the union name */ + } + else if (strcmp(token, "FILE") == 0) + { + bt = BT_STRUCT; + } + else if (strcmp(token, "size_t") == 0) + { + bt = BT_INT; + isUnsigned = TRUE; + } + else if (strcmp(token, "va_list") == 0) + { + bt = BT_VOID; + indirect = 1; /* va_list is a void* */ + } + else return FALSE; + return TRUE; +} + +boolT +isModifier(void) +{ + if (tok != TOK_NAME) return FALSE; + if (strcmp(token, "long") == 0) + { + isLong = TRUE; + } + else if (strcmp(token, "unsigned") == 0) + { + isUnsigned = TRUE; + } + else if (strcmp(token, "short") == 0) + { + isShort = TRUE; + } + else if (strcmp(token, "const") == 0) + { + + } + else if (strcmp(token, "_far") == 0) + { + + } + else return FALSE; + return TRUE; +} + +boolT +isAttrib(void) +{ + + if (tok != TOK_NAME) return FALSE; + if (strcmp(token, "far") == 0) + { + /* Not implemented yet */ + } + else if (strcmp(token, "__far") == 0) + { + /* Not implemented yet */ + } + else if (strcmp(token, "__interrupt") == 0) + { + /* Not implemented yet */ + } + else return FALSE; + return TRUE; +} + + +boolT +isCdecl(void) +{ + return ( + (strcmp(token, "__cdecl") == 0) || + (strcmp(token, "_Cdecl") == 0) || + (strcmp(token, "cdecl") == 0)); +} + +void +getTypeAndIdent(void) +{ + /* Get a type and ident pair. Complicated by the fact that types are + actually optional modifiers followed by types, and the identifier + is also optional. For example: + myfunc(unsigned footype *foovar); + declares an arg named foovar, of type unsigned pointer to footype. + But we don't exand typedefs and #defines, so the footype may not + be recognised as a type. Then it is not possible to know whether + footype is an identifier or an unknown type until the next token is + read (in this case, the star). If it is a comma or paren, then footype + is actually an identifier. (This function gets called for the function + return type and name as well, so "(" is possible as well as ")" and + ","). + The identifier is copied to ident. + */ + + boolT im = FALSE, ib; + + while (isModifier()) + { + tok = getToken(); + im = TRUE; + } + + if (!im && (tok != TOK_NAME)) + { + errorParse("Expected type"); + } + + ib = isBaseType(); + if (ib) tok = getToken(); + + /* Could be modifiers like "far", "interrupt" etc */ + while (isAttrib()) + { + tok = getToken(); + } + + while (tok == '*') + { + indirect++; + tok = getToken(); + } + + /* Ignore the cdecl's */ + while (isCdecl()) tok = getToken(); + + if (tok == TOK_NAME) + { + /* This could be an ident or an unknown type */ + strcpy(ident, token); + tok = getToken(); + } + + if (!ib && (tok != ',') && (tok != '(') && (tok != ')')) + { + /* That was (probably) not an ident! Assume it was an unknown type */ +printf("Unknown type %s\n", ident); + ident[0] = '\0'; + bt = BT_UNKWN; + + while (tok == '*') + { + indirect++; + tok = getToken(); + } + + /* Ignore the cdecl's */ + while (isCdecl()) tok = getToken(); + } + + if (tok == TOK_NAME) + { + /* This has to be the ident */ + strcpy(ident, token); + tok = getToken(); + } + + while (tok == '[') + { + indirect++; /* Treat x[] like *x */ + do + { + tok = getToken(); /* Ignore stuff between the '[' and ']' */ + } + while (tok != ']'); + tok = getToken(); + } + +} + + +hlType +convType(void) +{ + /* Convert from base type and signed/unsigned flags, etc, to a htType + as Cristina currently uses */ + + if (indirect >= 1) + { + if (bt == BT_CHAR) return TYPE_STR; /* Assume char* is ptr */ + /* Pointer to anything else (even unknown) is type pointer */ + else return TYPE_PTR; + } + switch (bt) + { + case BT_INT: + if (isLong) + { + if (isUnsigned) return TYPE_LONG_UNSIGN; + else return TYPE_LONG_SIGN; + } + else + { + if (isUnsigned) return TYPE_WORD_UNSIGN; + else return TYPE_WORD_SIGN; + } + + case BT_CHAR: + if (isUnsigned) return TYPE_BYTE_UNSIGN; + else return TYPE_BYTE_SIGN; + + case BT_FLOAT: + return TYPE_FLOAT; + case BT_DOUBLE: + return TYPE_DOUBLE; + + case BT_STRUCT: + return TYPE_RECORD; + + case BT_VOID: + default: + return TYPE_UNKNOWN; + } +} + + +/* Add a new function to the array of function name and return types. The + array is logically sorted by a linked list. Note that numArg is filled + in later */ +boolT +addNewFunc(char *name, hlType typ) +{ + int i, prev, res; + + /* First see if the name already exists */ + prev = NIL; + for (i=headFunc; i != NIL; i = pFunc[i].next) + { + res = strcmp(pFunc[i].name, name); + if (res > 0) + { + break; /* Exit this loop when just past insert point */ + } + if (res == 0) + { + /* Already have this function name */ + return TRUE; + } + prev = i; + } + + if (numFunc >= allocFunc) + { + allocFunc += DELTA_FUNC; + pFunc = realloc(pFunc, allocFunc * sizeof(PH_FUNC_STRUCT)); + if (pFunc == NULL) + { + fprintf(stderr, "Could not allocate %ud bytes for function array\n", + allocFunc * sizeof(PH_FUNC_STRUCT)); + exit(1); + } + memset(&pFunc[allocFunc - DELTA_FUNC], 0, + DELTA_FUNC * sizeof(PH_FUNC_STRUCT)); + } + + name[SYMLEN-1] = '\0'; + strcpy(pFunc[numFunc].name, name); + pFunc[numFunc].typ = typ; + pFunc[numFunc].firstArg = numArg; + if (prev == NIL) + { + pFunc[numFunc].next = headFunc; + headFunc = numFunc; + } + else + { + pFunc[numFunc].next = pFunc[prev].next; + pFunc[prev].next = numFunc; + } + numFunc++; + + return FALSE; +} + +/* Add a new arguement to the array of arguement name and types. The + array is logically sorted by a linked list */ +void +addNewArg(char *name, hlType typ) +{ + if (numArg >= allocArg) + { + allocArg += DELTA_FUNC; + pArg = realloc(pArg, allocArg * sizeof(PH_ARG_STRUCT)); + if (pArg == NULL) + { + fprintf(stderr, "Could not allocate %ud bytes for arguement array\n", + allocArg * sizeof(PH_ARG_STRUCT)); + exit(1); + } + memset(&pArg[allocArg - DELTA_FUNC], 0, + DELTA_FUNC * sizeof(PH_ARG_STRUCT)); + } + name[SYMLEN-1] = '\0'; + strcpy(pArg[numArg].name, name); + pArg[numArg].typ = typ; + numArg++; + +} + +void +parseParam(void) +{ + initType(); + if (tok == TOK_DOTS) + { + tok = getToken(); + pFunc[numFunc-1].bVararg = TRUE; + return; + } + + getTypeAndIdent(); + + if ((bt == BT_VOID) && (indirect == 0)) + { + /* Just a void arg list. Ignore and pFunc[].numArgs will be set to zero */ + return; + } + argNum++; + if (ident[0]) + { + addNewArg(ident, convType()); + } + else + { + sprintf(ident, "arg%d", argNum); + addNewArg(ident, convType()); + } + +} + + +/* Parse the prototype as follows: + [] ["*"]... "(" [","]...")" +where is +["const"] [] [] modifier could be short, long, far +and where paramdef is +["const"] ["*"]... [] or "..." +Note that the closing semicolon is not seen. +*/ + + +void +phBuffToFunc(char *buff) +{ + + initType(); + p = buffP = buff; + tok = getToken(); + + /* Ignore typedefs, for now */ + if ((tok == TOK_NAME) && (strcmp(token, "typedef") == 0)) return; + + getTypeAndIdent(); + + if (ident[0] == '\0') + { + errorParse("Expected function name"); + return; + } + + if (addNewFunc(ident, convType())) + { + /* Already have this prototype, so ignore it */ + return; + } + + if (tok != '(') + { + errorParse("Expected '('"); + return; + } + tok = getToken(); + + argNum = 0; + while (tok != TOK_EOL) + { + parseParam(); + if ((tok != ',') && (tok != ')')) + { + errorParse("Expected ',' between parameter defs"); + return; + } + tok = getToken(); + } + pFunc[numFunc-1].numArg = argNum; /* Number of args this func */ +} + + +void +phBuffToDef(char *buff) +{ + +} + + +void +writeFile(char *buffer, int len) +{ + if ((int)fwrite(buffer, 1, len, datFile) != len) + { + printf("Could not write to file\n"); + exit(1); + } +} + + +void +writeFileShort(word w) +{ + byte b; + + b = (byte)(w & 0xFF); + writeFile(&b, 1); /* Write a short little endian */ + b = (byte)(w>>8); + writeFile(&b, 1); +} + + +void +saveFile(void) +{ + int i; + + fprintf(datFile, "dccp"); /* Signature */ + fprintf(datFile, "FN"); /* Function name tag */ + writeFileShort(numFunc); /* Number of func name records */ + for (i=headFunc; i != NIL; i = pFunc[i].next) + { + writeFile(pFunc[i].name, SYMLEN); + writeFileShort((word)pFunc[i].typ); + writeFileShort((word)pFunc[i].numArg); + writeFileShort((word)pFunc[i].firstArg); + writeFile((char *)&pFunc[i].bVararg, 1); + } + + fprintf(datFile, "PM"); /* Parameter Name tag */ + writeFileShort(numArg); /* Number of args */ + for (i=0; i < numArg; i++) + { +/* writeFile(pArg[i].name, SYMLEN); /* Don't want names yet */ + writeFileShort((word)pArg[i].typ); + } + +} + +void +main(int argc, char *argv[]) +{ + char *buf; + long fSize; + int ndata; + FILE *f, *fl; + int i; + char *p; + + if (argc != 2) + { + printf("Usage: parsehdr \n" + "where is a file of header file names to parse.\n" + "The file dcclibs.dat will be written\n"); + exit(1); + } + + fl = fopen(argv[1], "rt"); + if (fl == NULL) + { + printf("Could not open file list file %s\n", argv[1]); + exit(1); + } + + datFile = fopen("dcclibs.dat", "wb"); + if (datFile == NULL) + { + printf("Could not open output file dcclibs.dat\n"); + exit(2); + } + + /* Allocate the arrys for function and proto names and types */ + pFunc = malloc(DELTA_FUNC * sizeof(PH_FUNC_STRUCT)); + if (pFunc == 0) + { + fprintf(stderr, "Could not malloc %ud bytes for function name array\n", + DELTA_FUNC * sizeof(PH_FUNC_STRUCT)); + exit(1); + } + memset(pFunc, 0, DELTA_FUNC * sizeof(PH_FUNC_STRUCT)); + allocFunc = DELTA_FUNC; + numFunc = 0; + + pArg = malloc(DELTA_FUNC * sizeof(PH_ARG_STRUCT)); + if (pArg == 0) + { + fprintf(stderr, "Could not malloc %ud bytes for arguement array\n", + DELTA_FUNC * sizeof(PH_ARG_STRUCT)); + exit(1); + } + memset(pArg, 0, DELTA_FUNC * sizeof(PH_ARG_STRUCT)); + allocArg = DELTA_FUNC; + numArg = 0; + + headFunc = headArg = NIL; + + buf = NULL; + while (!feof(fl)) + { + /* Get another filename from the file list */ + p = fgets(fileName, 80, fl); + if (p == NULL) break; /* Otherwise read last filename twice */ + i = strlen(fileName)-1; + if (fileName[i] == '\n') fileName[i] = '\0'; + f = fopen(fileName, "rt"); + if (f == NULL) + { + printf("Could not open header file %s\n", fileName); + exit(1); + } + + printf("Processing %s...\n", fileName); + + fSize = _lseek(_fileno(f), 0, SEEK_END); + fseek(f, 0, SEEK_SET); + ndata = (int) min(fSize, FBUF_SIZE); + if (buf) free(buf); + buf = (char *)malloc(ndata); + if (buf == 0) + { + printf("Could not malloc input file buffer of %d bytes\n", ndata); + exit(1); + } + + while (!feof(f)) + { + ndata = fread(buf, 1, ndata, f); + phData(buf, ndata); + } + phPost(); + fclose(f); + } + saveFile(); + fclose(datFile); + fclose(fl); + + free(buf); + free(pFunc); + free(pArg); +} + + +#if CHECK_HEAP +void +checkHeap(char *msg) + +/* HEAPCHK.C: This program checks the heap for + * consistency and prints an appropriate message. + */ +{ + int heapstatus; + + printf("%s\n", msg); + + /* Check heap status */ + heapstatus = _heapchk(); + switch( heapstatus ) + { + case _HEAPOK: + printf(" OK - heap is fine\n" ); + break; + case _HEAPEMPTY: + printf(" OK - heap is empty\n" ); + break; + case _HEAPBADBEGIN: + printf( "ERROR - bad start of heap\n" ); + break; + case _HEAPBADNODE: + printf( "ERROR - bad node in heap\n" ); + break; + } +} + +#endif + diff --git a/tools/parsehdr/parsehdr.h b/tools/parsehdr/parsehdr.h new file mode 100644 index 0000000..4baf048 --- /dev/null +++ b/tools/parsehdr/parsehdr.h @@ -0,0 +1,98 @@ +/* + *$Log: parsehdr.h,v $ + */ +/* Header file for parsehdr.c */ + +typedef unsigned long dword; /* 32 bits */ +typedef unsigned char byte; /* 8 bits */ +typedef unsigned short word; /* 16 bits */ +typedef unsigned char boolT; /* 8 bits */ + +#define TRUE 1 +#define FALSE 0 + +#define BUFF_SIZE 8192 /* Holds a declaration */ +#define FBUF_SIZE 32700 /* Holds part of a header file */ + +#define NARGS 15 +#define NAMES_L 160 +#define TYPES_L 160 +#define FUNC_L 160 + +#define ERRF stdout + +void phError(char *errmsg); +void phWarning(char *errmsg); + +#define ERR(msg) phError(msg) +#ifdef DEBUG +#define DBG(str) printf(str); +#else +#define DBG(str) ; +#endif +#define WARN(msg) phWarning(msg) +#define OUT(str) fprintf(outfile, str) + +#define PH_PARAMS 32 +#define PH_NAMESZ 15 + +#define SYMLEN 16 /* Including the null */ +#define Int long /* For locident.h */ +#define int16 short int /* For locident.h */ +#include "locident.h" /* For the hlType enum */ +#define bool unsigned char /* For internal use */ +#define TRUE 1 +#define FALSE 0 + +typedef +struct ph_func_tag +{ + char name[SYMLEN]; /* Name of function or arg */ + hlType typ; /* Return type */ + int numArg; /* Number of args */ + int firstArg; /* Index of first arg in chain */ + int next; /* Index of next function in chain */ + bool bVararg; /* True if variable num args */ +} PH_FUNC_STRUCT; + +typedef +struct ph_arg_tag +{ + char name[SYMLEN]; /* Name of function or arg */ + hlType typ; /* Parameter type */ +} PH_ARG_STRUCT; + +#define DELTA_FUNC 32 /* Number to alloc at once */ + + +#define PH_JUNK 0 /* LPSTR buffer, nothing happened */ +#define PH_PROTO 1 /* LPPH_FUNC ret val, func name, args */ +#define PH_FUNCTION 2 /* LPPH_FUNC ret val, func name, args */ +#define PH_TYPEDEF 3 /* LPPH_DEF definer and definee */ +#define PH_DEFINE 4 /* LPPH_DEF definer and definee */ +#define PH_ERROR 5 /* LPSTR error string */ +#define PH_WARNING 6 /* LPSTR warning string */ +#define PH_MPROTO 7 /* ????? multi proto???? */ +#define PH_VAR 8 /* ????? var decl */ + +/* PROTOS */ + +boolT phData(char *buff, int ndata); +boolT phPost(void); +boolT phFree(void); +void checkHeap(char *msg); /* For debugging only */ + +void phBuffToFunc(char *buff); + +void phBuffToDef(char *buff); + + +#define TOK_TYPE 256 /* A type name (e.g. "int") */ +#define TOK_NAME 257 /* A function or parameter name */ +#define TOK_DOTS 258 /* "..." */ +#define TOK_EOL 259 /* End of line */ + +typedef enum +{ + BT_INT, BT_CHAR, BT_FLOAT, BT_DOUBLE, BT_STRUCT, BT_VOID, BT_UNKWN +} baseType; diff --git a/tools/parsehdr/parsehdr.txt b/tools/parsehdr/parsehdr.txt new file mode 100644 index 0000000..5200070 --- /dev/null +++ b/tools/parsehdr/parsehdr.txt @@ -0,0 +1,217 @@ + PARSEHDR + +1 What is ParseHdr? + +2 What is dcclibs.dat? + +3 How do I use ParseHdr? + +4 What about languages other than C? + +5 What is the structure of the dcclibs.dat file? + +6 What are all these errors, and why do they happen? + + +1 What is ParseHdr? +------------------- + +ParseHdr is a program that creates a special prototype file for DCC +from a set of include files (.h files). This allows DCC to be aware +of the type of library function arguments, and return types. The file +produced is called dcclibs.dat. ParseHdr is designed specifically for +C header files. + +As an example, this is what allows DCC to recognise that printf has +(at least) a string argument, and so converts the first argument from +a numeric constant to a string. So you get +printf("Hello world") +instead of +printf(0x42). + + +2 What is dcclibs.dat? +---------------------- + +dcclibs.dat is the file created by the ParseHdr program. It contains +a list of function names and parameter and return types. See section +5 for details of the contents of the file. + + +3 How do I use ParseHdr? +------------------------ + +To use ParseHdr you need a file containing a list of header files, +like this: +\tc\include\alloc.h +\tc\include\assert.h +\tc\include\bios.h +... +\tc\include\time.h + +There must be one file per line, no blank lines, and unless the +header files are in the current directory, a full path must be given. +The easiest way to create such a file is to redirect the output of a +dir command to a file, like this: +c>dir \tc\include\*.h > tcfiles.lst +and then edit the resultant file. Note that the path will not be +included in this, so you will have to add that manually. Remove +everything after the .h, such as file size, date, etc. + +Once you have this file, you can run parsehdr: + +parsehdr + +For example, + +parsehdr tcfiles.lst + +You will get some messages indicating which files are being +processed, but also some error messages. Just ignore the error +messages, see section 6 for why they occur. + + + +4 What about languages other than C? +----------------------------------------- + +ParseHdr will only work on C header files. It would be possible to +process files for other languages that contained type information, to +produce a dcclibs.dat file specific to that language. Ideally, DCC +should look for a different file for each language, but since only a +C version of dcclibs.dat has so far been created, this has not been +done. + +Prototype information for Turbo Pascal exists in the file turbo.tpl, +at least for things like the graphics library, so it would be +possible for MakeDsTp to produce a dcclibs.dat file as well as the +signature file. However, the format of the turbo.tpl file is not +documented by Borland; for details see + +W. L. Peavy, "Inside Turbo Pascal 6.0 Units", Public domain software +file tpu6doc.txt in tpu6.zip. Anonymous ftp from garbo.uwasa.fi and +mirrors, directory /pc/turbopas, 1991. + + + + +5 What is the structure of the dcclibs.dat file? +------------------------------------------------ + +The first 4 bytes are "dccp", identifying it as a DCC prototype file. +After this, there are two sections. + +The first section begins with "FN", for Function Names. It is +followed by a two byte integer giving the number of function names +stored. The remainder of this section is an array of structures, one +per function name. Each has this structure: +char Name[SYMLEN]; /* Name of the function, NULL terminated */ +int type; /* A 2 byte integer describing the return type */ +int numArg; /* The number of arguments */ +int firstArg; /* The index of the first arg, see below */ +char bVarArg; /* 1 if variable arguments, 0 otherwise */ + +SYMLEN is 16, alowing 15 chars before the NULL. Therefore, the length +of this structure is 23 bytes. + +The types are as defined in locident.h (actually a part of dcc), and +at present are as follows: +typedef enum { + TYPE_UNKNOWN = 0, /* unknown so far 00 */ + TYPE_BYTE_SIGN, /* signed byte (8 bits) 01 */ + TYPE_BYTE_UNSIGN, /* unsigned byte 02 */ + TYPE_WORD_SIGN, /* signed word (16 bits) 03 */ + TYPE_WORD_UNSIGN, /* unsigned word (16 bits) 04 */ + TYPE_LONG_SIGN, /* signed long (32 bits) 05 */ + TYPE_LONG_UNSIGN, /* unsigned long (32 bits) 06 */ + TYPE_RECORD, /* record structure 07 */ + TYPE_PTR, /* pointer (32 bit ptr) 08 */ + TYPE_STR, /* string 09 */ + TYPE_CONST, /* constant (any type) 0A */ + TYPE_FLOAT, /* floating point 0B */ + TYPE_DOUBLE, /* double precision float 0C */ +} hlType; + +firstArg is an index into the array in the second section. + +The second section begins with "PM" (for Parameters). It is followed +by a 2 byte integer giving the number of parameter records. After +this is the array of parameter structures. Initially, the names of the +parameters were being stored, but this has been removed at present. +The parameter structure is therefore now just a single 2 byte +integer, representing the type of that argument. + +The way it all fits together is perhaps best described by an example. +Lets consider this entry in dcclibs.dat: + +73 74 72 63 6D 70 00 ; "strcmp" +00 00 00 00 00 00 00 00 00 ; Padding to 16 bytes +03 00 ; Return type 3, TYPE_WORD_UNSIGN +02 00 ; 2 arguments +15 02 ; First arg is 0215 +00 ; Not var args + +If we now skip to the "PM" part of the file, skip the number of +arguments word, then skip 215*2 = 42A bytes, we find this: +09 00 09 00 09 00 ... + +The first 09 00 (TYPE_STR) refers to the type of the first parameter, +and the second to the second parameter. There are only 2 arguments, +so the third 09 00 refers to the first parameter of the next +function. So both parameters are strings, as is expected. + +For functions with variable parameters, bVarArg is set to 01, and the +number of parameters reported is the number of fixed parameters. Here +is another example: + +66 70 72 69 6E 74 66 00 ; "fprintf" +00 00 00 00 00 00 00 00 ; padding +03 00 ; return type 3, TYPE_WORD_UNSIGN +02 00 ; 2 fixed args +81 01 ; First arg at index 0181 +01 ; Var args + +and in the "PM" section at offset 181*2 = 0302, we find 08 00 09 00 +03 00 meaning that the first parameter is a pointer (in fact, we know +it's a FILE *), and the second parameter is a string. + + + + +6 What are all these errors, and why do they happen? +---------------------------------------------------- + +When you run ParseHdr, as well as the progress statements like + Processing \tc\include\alloc.h ... + +you can get error messages. Basically, ignore these errors. They occur +for a variety of reasons, most of which are detailed below. + +1) + Expected type: got ) (29) + void __emit__() + ^ +This include file contained a non ansi prototype. This is rare, and +__emit__ is not a standard function anyway. If it really bothers you, +you could add the word "void" to the empty parentheses in your +include file. + +2) + Expected ',' between parameter defs: got ( (28) + void _Cdecl ctrlbrk (int _Cdecl (*handler)(void)) + +Here "handler" is a pointer to a function. Being a basically simple +program, ParseHdr does not expand all typedef and #define statements, +so it cannot distinguish between types and user defined function +names. Therefore, it is not possible in general to parse any +prototypes containing pointers to functions, so at this stage, any +such prototypes will produce an error of some sort. DCC cannot +currently make use of this type information anyway, so this is no +real loss. There are typically half a dozen such errors. + +3) + Unknown type time_t + +Types (such as time_t) that are structures or pointers to structures +are not handled by ParseHdr, since typedef and #define statements are +ignored. Again, there are typically only about a dozen of these. diff --git a/tools/parsehdr/parselib.mak b/tools/parsehdr/parselib.mak new file mode 100644 index 0000000..2f1f4b5 --- /dev/null +++ b/tools/parsehdr/parselib.mak @@ -0,0 +1,8 @@ +CFLAGS = -Zi -c -AS -W3 -D__MSDOS__ + +parselib.exe: parselib.obj + link /CO parselib; + +parselib.obj: parselib.c + cl $(CFLAGS) $*.c + diff --git a/tools/parsehdr/tcfiles.lst b/tools/parsehdr/tcfiles.lst new file mode 100644 index 0000000..c641865 --- /dev/null +++ b/tools/parsehdr/tcfiles.lst @@ -0,0 +1,24 @@ +\tc\include\alloc.h +\tc\include\assert.h +\tc\include\bios.h +\tc\include\conio.h +\tc\include\ctype.h +\tc\include\dir.h +\tc\include\dos.h +\tc\include\errno.h +\tc\include\fcntl.h +\tc\include\float.h +\tc\include\io.h +\tc\include\limits.h +\tc\include\math.h +\tc\include\mem.h +\tc\include\process.h +\tc\include\setjmp.h +\tc\include\share.h +\tc\include\signal.h +\tc\include\stdarg.h +\tc\include\stddef.h +\tc\include\stdio.h +\tc\include\stdlib.h +\tc\include\string.h +\tc\include\time.h diff --git a/tools/readsig/CMakeLists.txt b/tools/readsig/CMakeLists.txt new file mode 100644 index 0000000..e69de29 diff --git a/tools/readsig/readsig.cpp b/tools/readsig/readsig.cpp new file mode 100644 index 0000000..cc4ad90 --- /dev/null +++ b/tools/readsig/readsig.cpp @@ -0,0 +1,239 @@ +/* Quick program to read the output from makedsig */ + +#include +#include +#include +#include +#include +#include "perfhlib.h" + +/* statics */ +byte buf[100]; +int numKeys; /* Number of hash table entries (keys) */ +int numVert; /* Number of vertices in the graph (also size of g[]) */ +int PatLen; /* Size of the keys (pattern length) */ +int SymLen; /* Max size of the symbols, including null */ +FILE *f; /* File being read */ + +static word *T1base, *T2base; /* Pointers to start of T1, T2 */ +static word *g; /* g[] */ + +/* prototypes */ +void grab(int n); +word readFileShort(void); +void cleanup(void); + +static bool bDispAll = FALSE; + +void +main(int argc, char *argv[]) +{ + word w, len; + int h, i, j; + long filePos; + + if (argc <= 1) + { + printf("Usage: readsig [-a] \n"); + printf("-a for all symbols (else just duplicates)\n"); + exit(1); + } + + i = 1; + + if (strcmp(argv[i], "-a") == 0) + { + i++; + bDispAll = TRUE; + } + if ((f = fopen(argv[i], "rb")) == NULL) + { + printf("Cannot open %s\n", argv[i]); + exit(2); + } + + /* Read the parameters */ + grab(4); + if (memcmp("dccs", buf, 4) != 0) + { + printf("Not a dccs file!\n"); + exit(3); + } + numKeys = readFileShort(); + numVert = readFileShort(); + PatLen = readFileShort(); + SymLen = readFileShort(); + + /* Initialise the perfhlib stuff. Also allocates T1, T2, g, etc */ + hashParams( /* Set the parameters for the hash table */ + numKeys, /* The number of symbols */ + PatLen, /* The length of the pattern to be hashed */ + 256, /* The character set of the pattern (0-FF) */ + 0, /* Minimum pattern character value */ + numVert); /* Specifies C, the sparseness of the graph. + See Czech, Havas and Majewski for details + */ + + T1base = readT1(); + T2base = readT2(); + g = readG(); + + /* Read T1 and T2 tables */ + grab(2); + if (memcmp("T1", buf, 2) != 0) + { + printf("Expected 'T1'\n"); + exit(3); + } + len = PatLen * 256 * sizeof(word); + w = readFileShort(); + if (w != len) + { + printf("Problem with size of T1: file %d, calc %d\n", w, len); + exit(4); + } + if (fread(T1base, 1, len, f) != len) + { + printf("Could not read T1\n"); + exit(5); + } + + grab(2); + if (memcmp("T2", buf, 2) != 0) + { + printf("Expected 'T2'\n"); + exit(3); + } + w = readFileShort(); + if (w != len) + { + printf("Problem with size of T2: file %d, calc %d\n", w, len); + exit(4); + } + if (fread(T2base, 1, len, f) != len) + { + printf("Could not read T2\n"); + exit(5); + } + + /* Now read the function g[] */ + grab(2); + if (memcmp("gg", buf, 2) != 0) + { + printf("Expected 'gg'\n"); + exit(3); + } + len = numVert * sizeof(word); + w = readFileShort(); + if (w != len) + { + printf("Problem with size of g[]: file %d, calc %d\n", w, len); + exit(4); + } + if (fread(g, 1, len, f) != len) + { + printf("Could not read T2\n"); + exit(5); + } + + + /* This is now the hash table */ + grab(2); + if (memcmp("ht", buf, 2) != 0) + { + printf("Expected 'ht'\n"); + exit(3); + } + w = readFileShort(); + if (w != numKeys * (SymLen + PatLen + sizeof(word))) + { + printf("Problem with size of hash table: file %d, calc %d\n", w, len); + exit(6); + } + + if (bDispAll) + { + fseek(f, 0, SEEK_CUR); /* Needed due to bug in MS fread()! */ + filePos = _lseek(fileno(f), 0, SEEK_CUR); + for (i=0; i < numKeys; i++) + { + grab(SymLen + PatLen); + + printf("%16s ", buf); + for (j=0; j < PatLen; j++) + { + printf("%02X", buf[SymLen+j]); + if ((j%4) == 3) printf(" "); + } + printf("\n"); + } + printf("\n\n\n"); + fseek(f, filePos, SEEK_SET); + } + + for (i=0; i < numKeys; i++) + { + grab(SymLen + PatLen); + + h = hash(&buf[SymLen]); + if (h != i) + { + printf("Symbol %16s (index %3d) hashed to %d\n", + buf, i, h); + } + } + + printf("Done!\n"); + fclose(f); + +} + + +void +cleanup(void) +{ + /* Free the storage for variable sized tables etc */ + if (T1base) free(T1base); + if (T2base) free(T2base); + if (g) free(g); +} + +void grab(int n) +{ + if (fread(buf, 1, n, f) != (size_t)n) + { + printf("Could not read\n"); + exit(11); + } +} + +word +readFileShort(void) +{ + byte b1, b2; + + if (fread(&b1, 1, 1, f) != 1) + { + printf("Could not read\n"); + exit(11); + } + if (fread(&b2, 1, 1, f) != 1) + { + printf("Could not read\n"); + exit(11); + } + return (b2 << 8) + b1; +} + +/* Following two functions not needed unless creating tables */ + +void getKey(int i, byte **keys) +{ +} + +/* Display key i */ +void +dispKey(int i) +{ +} + diff --git a/tools/readsig/readsig.mak b/tools/readsig/readsig.mak new file mode 100644 index 0000000..c51f366 --- /dev/null +++ b/tools/readsig/readsig.mak @@ -0,0 +1,11 @@ +CFLAGS = -Zi -c -AL -W3 -D__MSDOS__ + +readsig.exe: readsig.obj perfhlib.obj + link /CO readsig perfhlib; + +readsig.obj: readsig.c dcc.h perfhlib.h + cl $(CFLAGS) $*.c + +perfhlib.obj: perfhlib.c dcc.h perfhlib.h + cl $(CFLAGS) $*.c + diff --git a/tools/readsig/readsig.txt b/tools/readsig/readsig.txt new file mode 100644 index 0000000..be26ce6 --- /dev/null +++ b/tools/readsig/readsig.txt @@ -0,0 +1,97 @@ + READSIG + +1 What is ReadSig? + +2 How do I use ReadSig? + +3 What are duplicate signatures? + +4 How can I make sense of the signatures? + + +1 What is ReadSig? +------------------ + +ReadSig is a quick and dirty program to read signatures from a DCC +signature file. It was originally written as an integrity checker for +signature files, but can now be used to see what's in a signature +file, and which functions have duplicate signatures. + +2 How do I use ReadSig? +----------------------- + +Just type +readsig + +or + +readsig -a + + +For example: +readsig -a dcct2p.sig + +Either way, you get a list of duplicate signatures, i.e. functions +whose first 23 bytes, after wildcarding and chopping, (see section 3 +for details), that have the same signature. + +With the -a switch, you also (before the above) get a list of all +symbolic names in the signature file, and the signatures themselves +in hex. This could be a dozen or more pages for large signature +files. + +Currently, signatures are 23 bytes long, and the symbolic names are +truncated to 15 characters. + + +3 What are duplicate signatures? +-------------------------------- + +Duplicate signatures arise for 3 reasons. 1: length of the signature. +2: wildcards. 3: chopping of the signature. + +1: Because signatures are only 23 bytes long, there is a chance that +two distinct signatures (first part of the binary image of a +function) are identical in the first 23 bytes, but diverge later. + +2: Because part of the binary image of a function depends on where it +is loaded, parts of the signature are replaced with wildcards. It is +possible that two functions are distinct only in places that are +replaced by the wildcard byte (F4). + +3: Signatures are "chopped" (cut short, and the remainder filled with +binary zeroes) after an unconditional branch or subroutine return. +This is to cope with functions shorter than the 23 byte size of +signatures, so unrelated functions are not included at the end of a +signature. (This would cause dcc to fail to recognise these short +signatures if some other function happened to be loaded at the end). + +The effect of duplicate signatures is that only one of the functions +that has the same signature will be recognised. For example, suppose +that sin, cos, and tan were just one wildcarded instruction followed +by a jump to the same piece of code. Then all three would have the +same signature, and calls to sin, cos, or tan would all be reported +by dcc as just one of these, e.g. tan. If you suspect that this is +happening, then at least ReadSig can alert you to this problem. + +In general, the number of duplicate signatures that would actually be +used in dcc is small, but it is possible that the above problem will +occur. + + + +4 How can I make sense of the signatures? +----------------------------------------- + +If you're one of those unfortunate individuals that can't decode hex +instructions in your head, you can always use DispSig to copy it to a +binary file, since you now know the name of the function. Then you +can use debug or some other debugger to disassemble the binary file. +Generally, most entries in signature files will be executable code, +so it should disassemble readily. + +Be aware that signatures are wildcarded, so don't pay any attention +to the destination of jmp or call instructions (three or 5 byte +jumps, anyway; 2 byte jumps are not wildcarded), and 16 bit immediate +values. The latter will always be F4F4 (two wildcard bytes), +regardless of what they were in the original function.