Initial revision
This commit is contained in:
372
util/cpp/LLlex.c
Normal file
372
util/cpp/LLlex.c
Normal file
@@ -0,0 +1,372 @@
|
||||
/* $Header$ */
|
||||
/* L E X I C A L A N A L Y Z E R */
|
||||
|
||||
#include "idfsize.h"
|
||||
#include "numsize.h"
|
||||
#include "strsize.h"
|
||||
|
||||
#include <alloc.h>
|
||||
#include "input.h"
|
||||
#include "idf.h"
|
||||
#include "LLlex.h"
|
||||
#include "Lpars.h"
|
||||
#include "class.h"
|
||||
#include "bits.h"
|
||||
|
||||
/* Data about the token yielded */
|
||||
struct token dot;
|
||||
|
||||
int ReplaceMacros = 1; /* replacing macros */
|
||||
int AccFileSpecifier = 0; /* return filespecifier <...> */
|
||||
int AccDefined = 0; /* accept "defined(...)" */
|
||||
int UnknownIdIsZero = 0; /* interpret unknown id as integer 0 */
|
||||
|
||||
char *string_token();
|
||||
|
||||
PushLex()
|
||||
{
|
||||
DOT = 0;
|
||||
}
|
||||
|
||||
PopLex()
|
||||
{}
|
||||
|
||||
int
|
||||
LLlex()
|
||||
{
|
||||
return (DOT != EOF) ? GetToken(&dot) : EOF;
|
||||
}
|
||||
|
||||
#define BUFSIZ 1024
|
||||
|
||||
int
|
||||
GetToken(ptok)
|
||||
register struct token *ptok;
|
||||
{
|
||||
char buf[BUFSIZ];
|
||||
register int c, nch;
|
||||
|
||||
again: /* rescan the input after an error or replacement */
|
||||
LoadChar(c);
|
||||
go_on:
|
||||
if ((c & 0200) && c != EOI)
|
||||
fatal("non-ascii '\\%03o' read", c & 0377);
|
||||
switch (class(c)) { /* detect character class */
|
||||
case STNL:
|
||||
LineNumber++;
|
||||
return ptok->tk_symb = EOF;
|
||||
case STSKIP:
|
||||
goto again;
|
||||
case STGARB: /* garbage character */
|
||||
if (c == '\\') {
|
||||
/* a '\\' is allowed in #if/#elif expression */
|
||||
LoadChar(c);
|
||||
if (class(c) == STNL) { /* vt , ff ? */
|
||||
++LineNumber;
|
||||
goto again;
|
||||
}
|
||||
PushBack();
|
||||
c = '\\';
|
||||
}
|
||||
if (040 < c && c < 0177)
|
||||
error("garbage char %c", c);
|
||||
else
|
||||
error("garbage char \\%03o", c);
|
||||
goto again;
|
||||
case STSIMP: /* a simple character, no part of compound token*/
|
||||
if (c == '/') { /* probably the start of comment */
|
||||
LoadChar(c);
|
||||
if (c == '*') { /* start of comment */
|
||||
skipcomment();
|
||||
goto again;
|
||||
}
|
||||
else {
|
||||
PushBack();
|
||||
c = '/'; /* restore c */
|
||||
}
|
||||
}
|
||||
return ptok->tk_symb = c;
|
||||
case STCOMP: /* maybe the start of a compound token */
|
||||
LoadChar(nch); /* character lookahead */
|
||||
switch (c) {
|
||||
case '!':
|
||||
if (nch == '=')
|
||||
return ptok->tk_symb = NOTEQUAL;
|
||||
PushBack();
|
||||
return ptok->tk_symb = c;
|
||||
case '&':
|
||||
if (nch == '&')
|
||||
return ptok->tk_symb = AND;
|
||||
PushBack();
|
||||
return ptok->tk_symb = c;
|
||||
case '<':
|
||||
if (AccFileSpecifier) {
|
||||
PushBack(); /* pushback nch */
|
||||
ptok->tk_str =
|
||||
string_token("file specifier", '>');
|
||||
return ptok->tk_symb = FILESPECIFIER;
|
||||
}
|
||||
if (nch == '<')
|
||||
return ptok->tk_symb = LEFT;
|
||||
if (nch == '=')
|
||||
return ptok->tk_symb = LESSEQ;
|
||||
PushBack();
|
||||
return ptok->tk_symb = c;
|
||||
case '=':
|
||||
if (nch == '=')
|
||||
return ptok->tk_symb = EQUAL;
|
||||
/* The following piece of code tries to recognise
|
||||
old-fashioned assignment operators `=op'
|
||||
*/
|
||||
error("illegal character");
|
||||
goto go_on;
|
||||
case '>':
|
||||
if (nch == '=')
|
||||
return ptok->tk_symb = GREATEREQ;
|
||||
if (nch == '>')
|
||||
return ptok->tk_symb = RIGHT;
|
||||
PushBack();
|
||||
return ptok->tk_symb = c;
|
||||
case '|':
|
||||
if (nch == '|')
|
||||
return ptok->tk_symb = OR;
|
||||
PushBack();
|
||||
return ptok->tk_symb = c;
|
||||
}
|
||||
case STIDF:
|
||||
{
|
||||
extern int idfsize; /* ??? */
|
||||
register char *tg = &buf[0];
|
||||
register char *maxpos = &buf[idfsize];
|
||||
register struct idf *idef;
|
||||
|
||||
#define tstmac(bx) if (!(bits[c] & bx)) goto nomac
|
||||
#define cpy if (Unstacked) EnableMacros(); *tg++ = c
|
||||
#define load LoadChar(c); if (!in_idf(c)) goto endidf
|
||||
|
||||
#ifdef DOBITS
|
||||
cpy; tstmac(bit0); load;
|
||||
cpy; tstmac(bit1); load;
|
||||
cpy; tstmac(bit2); load;
|
||||
cpy; tstmac(bit3); load;
|
||||
cpy; tstmac(bit4); load;
|
||||
cpy; tstmac(bit5); load;
|
||||
cpy; tstmac(bit6); load;
|
||||
cpy; tstmac(bit7); load;
|
||||
#endif
|
||||
|
||||
for(;;) {
|
||||
if (tg < maxpos) {
|
||||
cpy;
|
||||
}
|
||||
load;
|
||||
}
|
||||
endidf:
|
||||
PushBack();
|
||||
*tg = '\0'; /* mark the end of the identifier */
|
||||
if (ReplaceMacros) {
|
||||
idef = findidf(buf);
|
||||
if ((idef && idef->id_macro && replace(idef))) {
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
nomac:
|
||||
LoadChar(c);
|
||||
while (in_idf(c)) {
|
||||
if (tg < maxpos) *tg++ = c;
|
||||
LoadChar(c);
|
||||
}
|
||||
PushBack();
|
||||
*tg++ = '\0'; /* mark the end of the identifier */
|
||||
if (UnknownIdIsZero) {
|
||||
ptok->tk_val = 0;
|
||||
return ptok->tk_symb = INTEGER;
|
||||
}
|
||||
ptok->tk_str = Malloc(idfsize + 1);
|
||||
strcpy(ptok->tk_str, buf);
|
||||
return ptok->tk_symb = IDENTIFIER;
|
||||
}
|
||||
case STCHAR: /* character constant */
|
||||
{
|
||||
register int val = 0, size = 0;
|
||||
|
||||
LoadChar(c);
|
||||
if (c == '\'')
|
||||
error("character constant too short");
|
||||
else
|
||||
while (c != '\'') {
|
||||
if (c == '\n') {
|
||||
error("newline in character constant");
|
||||
LineNumber++;
|
||||
break;
|
||||
}
|
||||
if (c == '\\') {
|
||||
LoadChar(c);
|
||||
if (c == '\n') {
|
||||
LoadChar(c);
|
||||
LineNumber++;
|
||||
continue;
|
||||
}
|
||||
c = quoted(c);
|
||||
}
|
||||
val = val*256 + c;
|
||||
size++;
|
||||
LoadChar(c);
|
||||
}
|
||||
if (size > sizeof(int))
|
||||
error("character constant too long");
|
||||
ptok->tk_val = val;
|
||||
return ptok->tk_symb = INTEGER;
|
||||
}
|
||||
case STNUM:
|
||||
{
|
||||
register char *np = &buf[1];
|
||||
register int base = 10;
|
||||
register int vch;
|
||||
register int val = 0;
|
||||
|
||||
if (c == '0') {
|
||||
*np++ = c;
|
||||
LoadChar(c);
|
||||
if (c == 'x' || c == 'X') {
|
||||
base = 16;
|
||||
LoadChar(c);
|
||||
}
|
||||
else
|
||||
base = 8;
|
||||
}
|
||||
while (vch = val_in_base(c, base), vch >= 0) {
|
||||
val = val*base + vch;
|
||||
if (np < &buf[NUMSIZE])
|
||||
*np++ = c;
|
||||
LoadChar(c);
|
||||
}
|
||||
if (c == 'l' || c == 'L')
|
||||
LoadChar(c);
|
||||
PushBack();
|
||||
ptok->tk_val = val;
|
||||
return ptok->tk_symb = INTEGER;
|
||||
}
|
||||
case STSTR:
|
||||
ptok->tk_str = string_token("string", '"');
|
||||
return ptok->tk_symb = STRING;
|
||||
case STEOI: /* end of text on source file */
|
||||
return ptok->tk_symb = EOF;
|
||||
default:
|
||||
crash("Impossible character class");
|
||||
}
|
||||
/*NOTREACHED*/
|
||||
}
|
||||
|
||||
skipcomment()
|
||||
{
|
||||
register int c;
|
||||
|
||||
NoUnstack++;
|
||||
LoadChar(c);
|
||||
do {
|
||||
while (c != '*') {
|
||||
if (class(c) == STNL)
|
||||
++LineNumber;
|
||||
else
|
||||
if (c == EOI) {
|
||||
NoUnstack--;
|
||||
return;
|
||||
}
|
||||
LoadChar(c);
|
||||
}
|
||||
/* Last Character seen was '*' */
|
||||
LoadChar(c);
|
||||
} while (c != '/');
|
||||
NoUnstack--;
|
||||
}
|
||||
|
||||
char *
|
||||
string_token(nm, stop_char)
|
||||
char *nm;
|
||||
{
|
||||
register int c;
|
||||
register int str_size;
|
||||
register char *str = Malloc(str_size = ISTRSIZE);
|
||||
register int pos = 0;
|
||||
|
||||
LoadChar(c);
|
||||
while (c != stop_char) {
|
||||
if (c == '\n') {
|
||||
error("newline in %s", nm);
|
||||
LineNumber++;
|
||||
break;
|
||||
}
|
||||
if (c == EOI) {
|
||||
error("end-of-file inside %s", nm);
|
||||
break;
|
||||
}
|
||||
if (c == '\\') {
|
||||
LoadChar(c);
|
||||
if (c == '\n') {
|
||||
LineNumber++;
|
||||
LoadChar(c);
|
||||
continue;
|
||||
}
|
||||
c = quoted(c);
|
||||
}
|
||||
str[pos++] = c;
|
||||
if (pos == str_size)
|
||||
str = Srealloc(str, str_size += RSTRSIZE);
|
||||
LoadChar(c);
|
||||
}
|
||||
str[pos++] = '\0'; /* for filenames etc. */
|
||||
return str;
|
||||
}
|
||||
|
||||
int
|
||||
quoted(c)
|
||||
register int c;
|
||||
{
|
||||
/* quoted() replaces an escaped character sequence by the
|
||||
character meant.
|
||||
*/
|
||||
/* first char after backslash already in c */
|
||||
if (!is_oct(c)) { /* a quoted char */
|
||||
switch (c) {
|
||||
case 'n':
|
||||
c = '\n';
|
||||
break;
|
||||
case 't':
|
||||
c = '\t';
|
||||
break;
|
||||
case 'b':
|
||||
c = '\b';
|
||||
break;
|
||||
case 'r':
|
||||
c = '\r';
|
||||
break;
|
||||
case 'f':
|
||||
c = '\f';
|
||||
break;
|
||||
}
|
||||
}
|
||||
else { /* a quoted octal */
|
||||
register int oct = 0, cnt = 0;
|
||||
|
||||
do {
|
||||
oct = oct*8 + (c-'0');
|
||||
LoadChar(c);
|
||||
} while (is_oct(c) && ++cnt < 3);
|
||||
PushBack();
|
||||
c = oct;
|
||||
}
|
||||
return c&0377;
|
||||
}
|
||||
|
||||
/* provisional */
|
||||
int
|
||||
val_in_base(c, base)
|
||||
register int c;
|
||||
{
|
||||
return
|
||||
is_dig(c) ? c - '0' :
|
||||
base != 16 ? -1 :
|
||||
is_hex(c) ? (c - 'a' + 10) & 017 :
|
||||
-1;
|
||||
}
|
||||
Reference in New Issue
Block a user