Initial revision
This commit is contained in:
549
lang/cem/cpp.ansi/LLlex.c
Normal file
549
lang/cem/cpp.ansi/LLlex.c
Normal file
@@ -0,0 +1,549 @@
|
||||
/*
|
||||
* (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
|
||||
* See the copyright notice in the ACK home directory, in the file "Copyright".
|
||||
*/
|
||||
/* $Header$ */
|
||||
/* L E X I C A L A N A L Y Z E R */
|
||||
|
||||
#include "idfsize.h"
|
||||
#include "numsize.h"
|
||||
#include "strsize.h"
|
||||
|
||||
#include <alloc.h>
|
||||
#include "input.h"
|
||||
#include "arith.h"
|
||||
#include "macro.h"
|
||||
#include "idf.h"
|
||||
#include "LLlex.h"
|
||||
#include "Lpars.h"
|
||||
#include "class.h"
|
||||
#include "bits.h"
|
||||
|
||||
#define BUFSIZ 1024
|
||||
|
||||
struct token dot;
|
||||
|
||||
int ReplaceMacros = 1; /* replacing macros */
|
||||
int AccDefined = 0; /* accept "defined(...)" */
|
||||
int UnknownIdIsZero = 0; /* interpret unknown id as integer 0 */
|
||||
int Unstacked = 0; /* an unstack is done */
|
||||
int AccFileSpecifier = 0; /* return filespecifier <...> */
|
||||
int LexSave = 0; /* last character read by GetChar */
|
||||
extern int InputLevel; /* # of current macro expansions */
|
||||
|
||||
char *string_token();
|
||||
arith char_constant();
|
||||
#define FLG_ESEEN 0x01 /* possibly a floating point number */
|
||||
#define FLG_DOTSEEN 0x02 /* certainly a floating point number */
|
||||
|
||||
int
|
||||
LLlex()
|
||||
{
|
||||
return (DOT != EOF) ? GetToken(&dot) : EOF;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
GetToken(ptok)
|
||||
register struct token *ptok;
|
||||
{
|
||||
/* GetToken() is the actual token recognizer. It calls the
|
||||
control line interpreter if it encounters a "\n{w}*#"
|
||||
combination. Macro replacement is also performed if it is
|
||||
needed.
|
||||
*/
|
||||
char buf[BUFSIZ];
|
||||
register int ch, nch;
|
||||
|
||||
again: /* rescan the input after an error or replacement */
|
||||
ch = GetChar();
|
||||
go_on: /* rescan, the following character has been read */
|
||||
if ((ch & 0200) && ch != EOI) /* stop on non-ascii character */
|
||||
fatal("non-ascii '\\%03o' read", ch & 0377);
|
||||
/* keep track of the place of the token in the file */
|
||||
|
||||
switch (class(ch)) { /* detect character class */
|
||||
case STNL: /* newline, vertical space or formfeed */
|
||||
LineNumber++;
|
||||
return ptok->tk_symb = EOF;
|
||||
case STSKIP: /* just skip the skip characters */
|
||||
goto again;
|
||||
case STGARB: /* garbage character */
|
||||
garbage:
|
||||
if (040 < ch && ch < 0177)
|
||||
error("garbage char %c", ch);
|
||||
else
|
||||
error("garbage char \\%03o", ch);
|
||||
goto again;
|
||||
case STSIMP: /* a simple character, no part of compound token*/
|
||||
return ptok->tk_symb = ch;
|
||||
case STCOMP: /* maybe the start of a compound token */
|
||||
nch = GetChar(); /* character lookahead */
|
||||
switch (ch) {
|
||||
case '!':
|
||||
if (nch == '=')
|
||||
return ptok->tk_symb = NOTEQUAL;
|
||||
UnGetChar();
|
||||
return ptok->tk_symb = ch;
|
||||
case '&':
|
||||
if (nch == '&')
|
||||
return ptok->tk_symb = AND;
|
||||
else if (nch == '=')
|
||||
return ptok->tk_symb = ANDAB;
|
||||
UnGetChar();
|
||||
return ptok->tk_symb = ch;
|
||||
case '+':
|
||||
if (nch == '+')
|
||||
return ptok->tk_symb = PLUSPLUS;
|
||||
else if (nch == '=')
|
||||
return ptok->tk_symb = PLUSAB;
|
||||
UnGetChar();
|
||||
return ptok->tk_symb = ch;
|
||||
case '-':
|
||||
if (nch == '-')
|
||||
return ptok->tk_symb = MINMIN;
|
||||
else if (nch == '>')
|
||||
return ptok->tk_symb = ARROW;
|
||||
else if (nch == '=')
|
||||
return ptok->tk_symb = MINAB;
|
||||
UnGetChar();
|
||||
return ptok->tk_symb = ch;
|
||||
case '<':
|
||||
if (AccFileSpecifier) {
|
||||
UnGetChar(); /* pushback nch */
|
||||
ptok->tk_str =
|
||||
string_token("file specifier", '>');
|
||||
return ptok->tk_symb = FILESPECIFIER;
|
||||
} else if (nch == '<') {
|
||||
if ((nch = GetChar()) == '=')
|
||||
return ptok->tk_symb = LEFTAB;
|
||||
UnGetChar();
|
||||
return ptok->tk_symb = LEFT;
|
||||
} else if (nch == '=')
|
||||
return ptok->tk_symb = LESSEQ;
|
||||
UnGetChar();
|
||||
return ptok->tk_symb = ch;
|
||||
case '=':
|
||||
if (nch == '=')
|
||||
return ptok->tk_symb = EQUAL;
|
||||
UnGetChar();
|
||||
return ptok->tk_symb = ch;
|
||||
case '>':
|
||||
if (nch == '=')
|
||||
return ptok->tk_symb = GREATEREQ;
|
||||
else if (nch == '>') {
|
||||
if ((nch = GetChar()) == '=')
|
||||
return ptok->tk_symb = RIGHTAB;
|
||||
UnGetChar();
|
||||
return ptok->tk_symb = RIGHT;
|
||||
}
|
||||
UnGetChar();
|
||||
return ptok->tk_symb = ch;
|
||||
case '|':
|
||||
if (nch == '|')
|
||||
return ptok->tk_symb = OR;
|
||||
else if (nch == '=')
|
||||
return ptok->tk_symb = ORAB;
|
||||
UnGetChar();
|
||||
return ptok->tk_symb = ch;
|
||||
case '%':
|
||||
if (nch == '=')
|
||||
return ptok->tk_symb = MODAB;
|
||||
UnGetChar();
|
||||
return ptok->tk_symb = ch;
|
||||
case '*':
|
||||
if (nch == '=')
|
||||
return ptok->tk_symb = TIMESAB;
|
||||
UnGetChar();
|
||||
return ptok->tk_symb = ch;
|
||||
case '^':
|
||||
if (nch == '=')
|
||||
return ptok->tk_symb = XORAB;
|
||||
UnGetChar();
|
||||
return ptok->tk_symb = ch;
|
||||
case '/':
|
||||
if (nch == '*' && !InputLevel) {
|
||||
skipcomment();
|
||||
goto again;
|
||||
}
|
||||
else if (nch == '=')
|
||||
return ptok->tk_symb = DIVAB;
|
||||
UnGetChar();
|
||||
return ptok->tk_symb = ch;
|
||||
default:
|
||||
crash("bad class for char 0%o", ch);
|
||||
/* NOTREACHED */
|
||||
}
|
||||
case STCHAR: /* character constant */
|
||||
ptok->tk_val = char_constant("character");
|
||||
return ptok->tk_symb = INTEGER;
|
||||
case STSTR: /* string */
|
||||
ptok->tk_str = string_token("string", '"');
|
||||
return ptok->tk_symb = STRING;
|
||||
case STELL: /* wide character constant/string prefix */
|
||||
nch = GetChar();
|
||||
if (nch == '"') {
|
||||
ptok->tk_str =
|
||||
string_token("wide character string", '"');
|
||||
return ptok->tk_symb = STRING;
|
||||
} else if (nch == '\'') {
|
||||
ptok->tk_val = char_constant("wide character");
|
||||
return ptok->tk_symb = INTEGER;
|
||||
}
|
||||
UnGetChar();
|
||||
/* fallthrough */
|
||||
case STIDF:
|
||||
{
|
||||
extern int idfsize; /* ??? */
|
||||
register char *tg = &buf[0];
|
||||
register char *maxpos = &buf[idfsize];
|
||||
int NoExpandNext = 0;
|
||||
|
||||
#define tstmac(bx) if (!(bits[ch] & bx)) goto nomac
|
||||
#define cpy *tg++ = ch
|
||||
#define load (ch = GetChar()); if (!in_idf(ch)) goto endidf
|
||||
|
||||
if (Unstacked) EnableMacros(); /* unstack macro's when allowed. */
|
||||
if (ch == NOEXPM) {
|
||||
NoExpandNext = 1;
|
||||
ch = GetChar();
|
||||
}
|
||||
#ifdef DOBITS
|
||||
cpy; tstmac(bit0); load;
|
||||
cpy; tstmac(bit1); load;
|
||||
cpy; tstmac(bit2); load;
|
||||
cpy; tstmac(bit3); load;
|
||||
cpy; tstmac(bit4); load;
|
||||
cpy; tstmac(bit5); load;
|
||||
cpy; tstmac(bit6); load;
|
||||
cpy; tstmac(bit7); load;
|
||||
#endif
|
||||
for(;;) {
|
||||
if (tg < maxpos) {
|
||||
cpy;
|
||||
|
||||
}
|
||||
load;
|
||||
}
|
||||
endidf:
|
||||
/*if (ch != EOI) UnGetChar();*/
|
||||
UnGetChar();
|
||||
*tg++ = '\0'; /* mark the end of the identifier */
|
||||
if (ReplaceMacros) {
|
||||
register struct idf *idef = findidf(buf);
|
||||
|
||||
if (idef && idef->id_macro && !NoExpandNext) {
|
||||
if (replace(idef))
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
|
||||
nomac: /* buf can already be null-terminated. soit */
|
||||
ch = GetChar();
|
||||
while (in_idf(ch)) {
|
||||
if (tg < maxpos) *tg++ = ch;
|
||||
ch = GetChar();
|
||||
}
|
||||
UnGetChar();
|
||||
*tg++ = '\0'; /* mark the end of the identifier */
|
||||
|
||||
NoExpandNext = 0;
|
||||
if (UnknownIdIsZero) {
|
||||
ptok->tk_val = (arith)0;
|
||||
return ptok->tk_symb = INTEGER;
|
||||
}
|
||||
ptok->tk_str = Malloc(tg - buf);
|
||||
strcpy(ptok->tk_str, buf);
|
||||
return IDENTIFIER;
|
||||
}
|
||||
case STNUM: /* a numeric constant */
|
||||
{ /* it may only be an integer constant */
|
||||
register int base = 10, val = 0, vch;
|
||||
|
||||
/* Since the preprocessor only knows integers and has
|
||||
* nothing to do with ellipsis we just return when the
|
||||
* pp-number starts with a '.'
|
||||
*/
|
||||
if (ch == '.') {
|
||||
return ptok->tk_symb = ch;
|
||||
}
|
||||
if (ch == '0') {
|
||||
ch = GetChar();
|
||||
if (ch == 'x' || ch == 'X') {
|
||||
base = 16;
|
||||
ch = GetChar();
|
||||
} else {
|
||||
base = 8;
|
||||
}
|
||||
|
||||
}
|
||||
while ((vch = val_in_base(ch, base)) >= 0) {
|
||||
val = val * base + vch; /* overflow? nah */
|
||||
ch = GetChar();
|
||||
}
|
||||
while (ch == 'l' || ch == 'L' || ch == 'u' || ch == 'U')
|
||||
ch = GetChar();
|
||||
UnGetChar();
|
||||
ptok->tk_val = val;
|
||||
return ptok->tk_symb = INTEGER;
|
||||
}
|
||||
case STEOI: /* end of text on source file */
|
||||
return ptok->tk_symb = EOF;
|
||||
case STMSPEC:
|
||||
if (!InputLevel) goto garbage;
|
||||
if (ch == TOKSEP) goto again;
|
||||
/* fallthrough shouldn't happen */
|
||||
default: /* this cannot happen */
|
||||
crash("bad class for char 0%o", ch);
|
||||
}
|
||||
/*NOTREACHED*/
|
||||
}
|
||||
|
||||
skipcomment()
|
||||
{
|
||||
/* The last character read has been the '*' of '/_*'. The
|
||||
characters, except NL and EOI, between '/_*' and the first
|
||||
occurring '*_/' are not interpreted.
|
||||
NL only affects the LineNumber. EOI is not legal.
|
||||
|
||||
Important note: it is not possible to stop skipping comment
|
||||
beyond the end-of-file of an included file.
|
||||
EOI is returned by LoadChar only on encountering EOF of the
|
||||
top-level file...
|
||||
*/
|
||||
register int c;
|
||||
|
||||
NoUnstack++;
|
||||
c = GetChar();
|
||||
do {
|
||||
while (c != '*') {
|
||||
if (class(c) == STNL) {
|
||||
++LineNumber;
|
||||
} else if (c == EOI) {
|
||||
NoUnstack--;
|
||||
return;
|
||||
}
|
||||
c = GetChar();
|
||||
} /* last Character seen was '*' */
|
||||
c = GetChar();
|
||||
} while (c != '/');
|
||||
NoUnstack--;
|
||||
}
|
||||
|
||||
arith
|
||||
char_constant(nm)
|
||||
char *nm;
|
||||
{
|
||||
register arith val = 0;
|
||||
register int ch;
|
||||
int size = 0;
|
||||
|
||||
ch = GetChar();
|
||||
if (ch == '\'')
|
||||
error("%s constant too short", nm);
|
||||
else
|
||||
while (ch != '\'') {
|
||||
if (ch == '\n') {
|
||||
error("newline in %s constant", nm);
|
||||
LineNumber++;
|
||||
break;
|
||||
}
|
||||
if (ch == '\\')
|
||||
ch = quoted(GetChar());
|
||||
if (ch >= 128) ch -= 256;
|
||||
if (size < (int)size)
|
||||
val |= ch << 8 * size;
|
||||
size++;
|
||||
ch = GetChar();
|
||||
}
|
||||
if (size > 1)
|
||||
strict("%s constant includes more than one character", nm);
|
||||
if (size > sizeof(arith))
|
||||
error("%s constant too long", nm);
|
||||
return val;
|
||||
}
|
||||
|
||||
char *
|
||||
string_token(nm, stop_char)
|
||||
char *nm;
|
||||
{
|
||||
register int ch;
|
||||
register int str_size;
|
||||
register char *str = Malloc((unsigned) (str_size = ISTRSIZE));
|
||||
register int pos = 0;
|
||||
|
||||
ch = GetChar();
|
||||
while (ch != stop_char) {
|
||||
if (ch == '\n') {
|
||||
error("newline in %s", nm);
|
||||
LineNumber++;
|
||||
break;
|
||||
}
|
||||
if (ch == EOI) {
|
||||
error("end-of-file inside %s", nm);
|
||||
break;
|
||||
}
|
||||
if (ch == '\\' && !AccFileSpecifier)
|
||||
ch = quoted(GetChar());
|
||||
str[pos++] = ch;
|
||||
if (pos == str_size)
|
||||
str = Realloc(str, str_size <<= 1);
|
||||
ch = GetChar();
|
||||
}
|
||||
str[pos++] = '\0'; /* for filenames etc. */
|
||||
str = Realloc(str, pos);
|
||||
return str;
|
||||
}
|
||||
|
||||
int
|
||||
quoted(ch)
|
||||
register int ch;
|
||||
{
|
||||
/* quoted() replaces an escaped character sequence by the
|
||||
character meant.
|
||||
*/
|
||||
/* first char after backslash already in ch */
|
||||
if (!is_oct(ch)) { /* a quoted char */
|
||||
switch (ch) {
|
||||
case 'n':
|
||||
ch = '\n';
|
||||
break;
|
||||
case 't':
|
||||
ch = '\t';
|
||||
break;
|
||||
case 'b':
|
||||
ch = '\b';
|
||||
break;
|
||||
case 'r':
|
||||
ch = '\r';
|
||||
break;
|
||||
case 'f':
|
||||
ch = '\f';
|
||||
break;
|
||||
case 'a': /* alert */
|
||||
ch = '\007';
|
||||
break;
|
||||
case 'v': /* vertical tab */
|
||||
ch = '\013';
|
||||
break;
|
||||
case 'x': /* quoted hex */
|
||||
{
|
||||
register int hex = 0;
|
||||
register int vch;
|
||||
|
||||
for (;;) {
|
||||
ch = GetChar();
|
||||
if (vch = val_in_base(ch, 16), vch == -1)
|
||||
break;
|
||||
hex = hex * 16 + vch;
|
||||
}
|
||||
UnGetChar();
|
||||
ch = hex;
|
||||
}
|
||||
}
|
||||
}
|
||||
else { /* a quoted octal */
|
||||
register int oct = 0, cnt = 0;
|
||||
|
||||
do {
|
||||
oct = oct*8 + (ch-'0');
|
||||
ch = GetChar();
|
||||
} while (is_oct(ch) && ++cnt < 3);
|
||||
UnGetChar();
|
||||
ch = oct;
|
||||
}
|
||||
return ch&0377;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
val_in_base(ch, base)
|
||||
register int ch;
|
||||
{
|
||||
switch (base) {
|
||||
case 8:
|
||||
return (is_dig(ch) && ch < '9') ? ch - '0' : -1;
|
||||
case 10:
|
||||
return is_dig(ch) ? ch - '0' : -1;
|
||||
case 16:
|
||||
return is_dig(ch) ? ch - '0'
|
||||
: is_hex(ch) ? (ch - 'a' + 10) & 017
|
||||
: -1;
|
||||
default:
|
||||
fatal("(val_in_base) illegal base value %d", base);
|
||||
/* NOTREACHED */
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
GetChar()
|
||||
{
|
||||
/* The routines GetChar and trigraph parses the trigraph
|
||||
sequences and removes occurences of \\\n.
|
||||
*/
|
||||
register int ch;
|
||||
|
||||
again:
|
||||
LoadChar(ch);
|
||||
|
||||
/* possible trigraph sequence */
|
||||
if (ch == '?')
|
||||
ch = trigraph();
|
||||
|
||||
/* \\\n are removed from the input stream */
|
||||
if (ch == '\\') {
|
||||
LoadChar(ch);
|
||||
if (ch == '\n') {
|
||||
++LineNumber;
|
||||
goto again;
|
||||
}
|
||||
PushBack();
|
||||
ch = '\\';
|
||||
}
|
||||
return(LexSave = ch);
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
trigraph()
|
||||
{
|
||||
register int ch;
|
||||
|
||||
LoadChar(ch);
|
||||
if (ch == '?') {
|
||||
LoadChar(ch);
|
||||
switch (ch) { /* its a trigraph */
|
||||
case '=':
|
||||
ch = '#';
|
||||
return(ch);
|
||||
case '(':
|
||||
ch = '[';
|
||||
return(ch);
|
||||
case '/':
|
||||
ch = '\\';
|
||||
return(ch);
|
||||
case ')':
|
||||
ch = ']';
|
||||
return(ch);
|
||||
case '\'':
|
||||
ch = '^';
|
||||
return(ch);
|
||||
case '<':
|
||||
ch = '{';
|
||||
return(ch);
|
||||
case '!':
|
||||
ch = '|';
|
||||
return(ch);
|
||||
case '>':
|
||||
ch = '}';
|
||||
return(ch);
|
||||
case '-':
|
||||
ch = '~';
|
||||
return(ch);
|
||||
}
|
||||
PushBack();
|
||||
}
|
||||
PushBack();
|
||||
return('?');
|
||||
}
|
||||
Reference in New Issue
Block a user