*** empty log message ***
This commit is contained in:
563
lang/cem/cemcom/LLlex.c
Normal file
563
lang/cem/cemcom/LLlex.c
Normal file
@@ -0,0 +1,563 @@
|
||||
/* $Header$ */
|
||||
/* L E X I C A L A N A L Y Z E R */
|
||||
|
||||
#include "idfsize.h"
|
||||
#include "numsize.h"
|
||||
#include "debug.h"
|
||||
#include "strsize.h"
|
||||
#include "nopp.h"
|
||||
|
||||
#include "input.h"
|
||||
#include "alloc.h"
|
||||
#include "arith.h"
|
||||
#include "def.h"
|
||||
#include "idf.h"
|
||||
#include "LLlex.h"
|
||||
#include "Lpars.h"
|
||||
#include "class.h"
|
||||
#include "assert.h"
|
||||
#include "sizes.h"
|
||||
|
||||
/* Data about the token yielded */
|
||||
struct token dot, ahead, aside;
|
||||
|
||||
unsigned int LineNumber = 0; /* current LineNumber */
|
||||
char *FileName = 0; /* current filename */
|
||||
|
||||
int ReplaceMacros = 1; /* replacing macros */
|
||||
int EoiForNewline = 0; /* return EOI upon encountering newline */
|
||||
int PreProcKeys = 0; /* return preprocessor key */
|
||||
int AccFileSpecifier = 0; /* return filespecifier <...> */
|
||||
int AccDefined = 0; /* accept "defined(...)" */
|
||||
int UnknownIdIsZero = 0; /* interpret unknown id as integer 0 */
|
||||
int SkipEscNewline = 0; /* how to interpret backslash-newline */
|
||||
|
||||
#define MAX_LL_DEPTH 2
|
||||
|
||||
static struct token LexStack[MAX_LL_DEPTH];
|
||||
static LexSP = 0;
|
||||
|
||||
/* In PushLex() the actions are taken in order to initialise or
|
||||
re-initialise the lexical scanner.
|
||||
E.g. at the invocation of a sub-parser that uses LLlex(), the
|
||||
state of the current parser should be saved.
|
||||
*/
|
||||
PushLex()
|
||||
{
|
||||
ASSERT(LexSP < 2);
|
||||
ASSERT(ASIDE == 0); /* ASIDE = 0; */
|
||||
GetToken(&ahead);
|
||||
ahead.tk_line = LineNumber;
|
||||
ahead.tk_file = FileName;
|
||||
LexStack[LexSP++] = dot;
|
||||
}
|
||||
|
||||
PopLex()
|
||||
{
|
||||
ASSERT(LexSP > 0);
|
||||
dot = LexStack[--LexSP];
|
||||
}
|
||||
|
||||
int
|
||||
LLlex()
|
||||
{
|
||||
/* LLlex() plays the role of Lexical Analyzer for the C parser.
|
||||
The look-ahead and putting aside of tokens are taken into
|
||||
account.
|
||||
*/
|
||||
if (ASIDE) { /* a token is put aside */
|
||||
dot = aside;
|
||||
ASIDE = 0;
|
||||
}
|
||||
else { /* read ahead and return the old one */
|
||||
dot = ahead;
|
||||
/* the following test is performed due to the dual
|
||||
task of LLlex(): it is also called for parsing the
|
||||
restricted constant expression following a #if or
|
||||
#elif. The newline character causes EOF to be
|
||||
returned in this case to stop the LLgen parsing task.
|
||||
*/
|
||||
if (DOT != EOI)
|
||||
GetToken(&ahead);
|
||||
else
|
||||
DOT = EOF;
|
||||
}
|
||||
/* keep track of the place of the token in the file */
|
||||
ahead.tk_file = FileName;
|
||||
ahead.tk_line = LineNumber;
|
||||
return DOT;
|
||||
}
|
||||
|
||||
char *string_token();
|
||||
|
||||
int
|
||||
GetToken(ptok)
|
||||
register struct token *ptok;
|
||||
{
|
||||
/* GetToken() is the actual token recognizer. It calls the
|
||||
control line interpreter if it encounters a "\n#"
|
||||
combination. Macro replacement is also performed if it is
|
||||
needed.
|
||||
*/
|
||||
char buf[(IDFSIZE > NUMSIZE ? IDFSIZE : NUMSIZE) + 1];
|
||||
register int ch, nch;
|
||||
|
||||
again: /* rescan the input after an error or replacement */
|
||||
LoadChar(ch);
|
||||
go_on: /* rescan, the following character has been read */
|
||||
/* The following test is made to strip off the nonascii's */
|
||||
if ((ch & 0200) && ch != EOI) {
|
||||
/* this is the only user-error which causes the
|
||||
process to stop abruptly.
|
||||
*/
|
||||
fatal("non-ascii '\\%03o' read", ch & 0377);
|
||||
}
|
||||
switch (class(ch)) { /* detect character class */
|
||||
case STNL: /* newline, vertical space or formfeed */
|
||||
LineNumber++; /* also at vs and ff */
|
||||
if (EoiForNewline) /* called in control line */
|
||||
/* a newline in a control line indicates the
|
||||
end-of-information of the line.
|
||||
*/
|
||||
return ptok->tk_symb = EOI;
|
||||
while (LoadChar(ch), ch == '#') /* a control line follows */
|
||||
domacro();
|
||||
/* We have to loop here, because in
|
||||
`domacro' the nl, vt or ff is read. The
|
||||
character following it may again be a `#'.
|
||||
*/
|
||||
goto go_on;
|
||||
case STSKIP: /* just skip the skip characters */
|
||||
goto again;
|
||||
case STGARB: /* garbage character */
|
||||
#ifndef NOPP
|
||||
if (SkipEscNewline && (ch == '\\')) {
|
||||
/* a '\\' is allowed in #if/#elif expression */
|
||||
LoadChar(ch);
|
||||
if (class(ch) == STNL) { /* vt , ff ? */
|
||||
++LineNumber;
|
||||
goto again;
|
||||
}
|
||||
PushBack();
|
||||
ch = '\\';
|
||||
}
|
||||
#endif NOPP
|
||||
if (040 < ch && ch < 0177)
|
||||
lexerror("garbage char %c", ch);
|
||||
else
|
||||
lexerror("garbage char \\%03o", ch);
|
||||
goto again;
|
||||
case STSIMP: /* a simple character, no part of compound token*/
|
||||
if (ch == '/') { /* probably the start of comment */
|
||||
LoadChar(ch);
|
||||
if (ch == '*') {
|
||||
/* start of comment */
|
||||
skipcomment();
|
||||
goto again;
|
||||
}
|
||||
else {
|
||||
PushBack();
|
||||
ch = '/'; /* restore ch */
|
||||
}
|
||||
}
|
||||
return ptok->tk_symb = ch;
|
||||
case STCOMP: /* maybe the start of a compound token */
|
||||
LoadChar(nch); /* character lookahead */
|
||||
switch (ch) {
|
||||
case '!':
|
||||
if (nch == '=')
|
||||
return ptok->tk_symb = NOTEQUAL;
|
||||
PushBack();
|
||||
return ptok->tk_symb = ch;
|
||||
case '&':
|
||||
if (nch == '&')
|
||||
return ptok->tk_symb = AND;
|
||||
PushBack();
|
||||
return ptok->tk_symb = ch;
|
||||
case '+':
|
||||
if (nch == '+')
|
||||
return ptok->tk_symb = PLUSPLUS;
|
||||
PushBack();
|
||||
return ptok->tk_symb = ch;
|
||||
case '-':
|
||||
if (nch == '-')
|
||||
return ptok->tk_symb = MINMIN;
|
||||
if (nch == '>')
|
||||
return ptok->tk_symb = ARROW;
|
||||
PushBack();
|
||||
return ptok->tk_symb = ch;
|
||||
case '<':
|
||||
if (AccFileSpecifier) {
|
||||
PushBack(); /* pushback nch */
|
||||
ptok->tk_str =
|
||||
string_token("file specifier", '>');
|
||||
return ptok->tk_symb = FILESPECIFIER;
|
||||
}
|
||||
if (nch == '<')
|
||||
return ptok->tk_symb = LEFT;
|
||||
if (nch == '=')
|
||||
return ptok->tk_symb = LESSEQ;
|
||||
PushBack();
|
||||
return ptok->tk_symb = ch;
|
||||
case '=':
|
||||
if (nch == '=')
|
||||
return ptok->tk_symb = EQUAL;
|
||||
/* The following piece of code tries to recognise
|
||||
old-fashioned assignment operators `=op'
|
||||
*/
|
||||
switch (nch) {
|
||||
case '+':
|
||||
return ptok->tk_symb = PLUSAB;
|
||||
case '-':
|
||||
return ptok->tk_symb = MINAB;
|
||||
case '*':
|
||||
return ptok->tk_symb = TIMESAB;
|
||||
case '/':
|
||||
return ptok->tk_symb = DIVAB;
|
||||
case '%':
|
||||
return ptok->tk_symb = MODAB;
|
||||
case '>':
|
||||
case '<':
|
||||
LoadChar(ch);
|
||||
if (ch != nch) {
|
||||
PushBack();
|
||||
lexerror("illegal combination '=%c'",
|
||||
nch);
|
||||
}
|
||||
return ptok->tk_symb =
|
||||
nch == '<' ? LEFTAB : RIGHTAB;
|
||||
case '&':
|
||||
return ptok->tk_symb = ANDAB;
|
||||
case '^':
|
||||
return ptok->tk_symb = XORAB;
|
||||
case '|':
|
||||
return ptok->tk_symb = ORAB;
|
||||
}
|
||||
PushBack();
|
||||
return ptok->tk_symb = ch;
|
||||
case '>':
|
||||
if (nch == '=')
|
||||
return ptok->tk_symb = GREATEREQ;
|
||||
if (nch == '>')
|
||||
return ptok->tk_symb = RIGHT;
|
||||
PushBack();
|
||||
return ptok->tk_symb = ch;
|
||||
case '|':
|
||||
if (nch == '|')
|
||||
return ptok->tk_symb = OR;
|
||||
PushBack();
|
||||
return ptok->tk_symb = ch;
|
||||
}
|
||||
case STIDF:
|
||||
{
|
||||
register char *tg = &buf[0];
|
||||
register int pos = -1;
|
||||
register int hash;
|
||||
register struct idf *idef;
|
||||
extern int idfsize; /* ??? */
|
||||
|
||||
hash = STARTHASH();
|
||||
do { /* read the identifier */
|
||||
if (++pos < idfsize) {
|
||||
*tg++ = ch;
|
||||
hash = ENHASH(hash, ch, pos);
|
||||
}
|
||||
LoadChar(ch);
|
||||
} while (in_idf(ch));
|
||||
hash = STOPHASH(hash);
|
||||
if (ch != EOI)
|
||||
PushBack();
|
||||
*tg++ = '\0'; /* mark the end of the identifier */
|
||||
idef = ptok->tk_idf = idf_hashed(buf, tg - buf, hash);
|
||||
#ifndef NOPP
|
||||
if (idef->id_macro && ReplaceMacros) {
|
||||
/* macro replacement should be performed */
|
||||
if (replace(idef))
|
||||
goto again;
|
||||
/* arrived here: something went wrong in
|
||||
replace, don't substitute in this case
|
||||
*/
|
||||
}
|
||||
else
|
||||
if (UnknownIdIsZero) {
|
||||
ptok->tk_ival = (arith)0;
|
||||
ptok->tk_fund = INT;
|
||||
return ptok->tk_symb = INTEGER;
|
||||
}
|
||||
#endif NOPP
|
||||
ptok->tk_symb = (
|
||||
idef->id_reserved ?
|
||||
idef->id_reserved :
|
||||
idef->id_def && idef->id_def->df_sc == TYPEDEF ?
|
||||
TYPE_IDENTIFIER :
|
||||
IDENTIFIER
|
||||
);
|
||||
return IDENTIFIER;
|
||||
}
|
||||
case STCHAR: /* character constant */
|
||||
{
|
||||
register arith val = 0, size = 0;
|
||||
|
||||
LoadChar(ch);
|
||||
if (ch == '\'')
|
||||
lexerror("character constant too short");
|
||||
else
|
||||
while (ch != '\'') {
|
||||
if (ch == '\n') {
|
||||
lexerror("newline in character constant");
|
||||
LineNumber++;
|
||||
break;
|
||||
}
|
||||
if (ch == '\\') {
|
||||
LoadChar(ch);
|
||||
ch = quoted(ch);
|
||||
}
|
||||
val = val*256 + ch;
|
||||
size++;
|
||||
LoadChar(ch);
|
||||
}
|
||||
if (size > int_size)
|
||||
lexerror("character constant too long");
|
||||
ptok->tk_ival = val;
|
||||
ptok->tk_fund = INT;
|
||||
return ptok->tk_symb = INTEGER;
|
||||
}
|
||||
case STSTR: /* string */
|
||||
ptok->tk_str = string_token("string", '"');
|
||||
return ptok->tk_symb = STRING;
|
||||
case STNUM: /* a numeric constant */
|
||||
{
|
||||
/* It should be noted that 099 means 81(decimal) and
|
||||
099.5 means 99.5 . This severely limits the tricks
|
||||
we can use to scan a numeric value.
|
||||
*/
|
||||
register char *np = &buf[1];
|
||||
register int base = 10;
|
||||
register int vch;
|
||||
register arith val = 0;
|
||||
|
||||
if (ch == '.') { /* an embarrassing ambiguity */
|
||||
LoadChar(vch);
|
||||
PushBack();
|
||||
if (!is_dig(vch)) /* just a `.' */
|
||||
return ptok->tk_symb = ch;
|
||||
*np++ = '0';
|
||||
/* in the rest of the compiler, all floats
|
||||
have to start with a digit.
|
||||
*/
|
||||
}
|
||||
if (ch == '0') {
|
||||
*np++ = ch;
|
||||
LoadChar(ch);
|
||||
if (ch == 'x' || ch == 'X') {
|
||||
base = 16;
|
||||
LoadChar(ch);
|
||||
}
|
||||
else
|
||||
base = 8;
|
||||
}
|
||||
while (vch = val_in_base(ch, base), vch >= 0) {
|
||||
val = val*base + vch;
|
||||
if (np < &buf[NUMSIZE])
|
||||
*np++ = ch;
|
||||
LoadChar(ch);
|
||||
}
|
||||
if (ch == 'l' || ch == 'L') {
|
||||
ptok->tk_ival = val;
|
||||
ptok->tk_fund = LONG;
|
||||
return ptok->tk_symb = INTEGER;
|
||||
}
|
||||
if (base == 16 || !(ch == '.' || ch == 'e' || ch == 'E')) {
|
||||
PushBack();
|
||||
ptok->tk_ival = val;
|
||||
/* The semantic analyser must know if the
|
||||
integral constant is given in octal/hexa-
|
||||
decimal form, in which case its type is
|
||||
UNSIGNED, or in decimal form, in which case
|
||||
its type is signed, indicated by
|
||||
the fund INTEGER.
|
||||
*/
|
||||
ptok->tk_fund =
|
||||
(base == 10 || (base == 8 && val == (arith)0))
|
||||
? INTEGER : UNSIGNED;
|
||||
return ptok->tk_symb = INTEGER;
|
||||
}
|
||||
/* where's the test for the length of the integral ??? */
|
||||
if (ch == '.'){
|
||||
if (np < &buf[NUMSIZE])
|
||||
*np++ = ch;
|
||||
LoadChar(ch);
|
||||
}
|
||||
while (is_dig(ch)){
|
||||
if (np < &buf[NUMSIZE])
|
||||
*np++ = ch;
|
||||
LoadChar(ch);
|
||||
}
|
||||
if (ch == 'e' || ch == 'E') {
|
||||
if (np < &buf[NUMSIZE])
|
||||
*np++ = ch;
|
||||
LoadChar(ch);
|
||||
if (ch == '+' || ch == '-') {
|
||||
if (np < &buf[NUMSIZE])
|
||||
*np++ = ch;
|
||||
LoadChar(ch);
|
||||
}
|
||||
if (!is_dig(ch)) {
|
||||
lexerror("malformed floating constant");
|
||||
if (np < &buf[NUMSIZE])
|
||||
*np++ = ch;
|
||||
}
|
||||
while (is_dig(ch)) {
|
||||
if (np < &buf[NUMSIZE])
|
||||
*np++ = ch;
|
||||
LoadChar(ch);
|
||||
}
|
||||
}
|
||||
PushBack();
|
||||
*np++ = '\0';
|
||||
buf[0] = '-'; /* good heavens... */
|
||||
if (np == &buf[NUMSIZE+1]) {
|
||||
lexerror("floating constant too long");
|
||||
ptok->tk_fval = Salloc("0.0", 5) + 1;
|
||||
}
|
||||
else
|
||||
ptok->tk_fval = Salloc(buf, np - buf) + 1;
|
||||
return ptok->tk_symb = FLOATING;
|
||||
}
|
||||
case STEOI: /* end of text on source file */
|
||||
return ptok->tk_symb = EOI;
|
||||
default: /* this cannot happen */
|
||||
crash("bad class for char 0%o", ch);
|
||||
}
|
||||
/*NOTREACHED*/
|
||||
}
|
||||
|
||||
skipcomment()
|
||||
{
|
||||
/* The last character read has been the '*' of '/_*'. The
|
||||
characters, except NL and EOI, between '/_*' and the first
|
||||
occurring '*_/' are not interpreted.
|
||||
NL only affects the LineNumber. EOI is not legal.
|
||||
|
||||
Important note: it is not possible to stop skipping comment
|
||||
beyond the end-of-file of an included file.
|
||||
EOI is returned by LoadChar only on encountering EOF of the
|
||||
top-level file...
|
||||
*/
|
||||
register int c;
|
||||
|
||||
NoUnstack++;
|
||||
LoadChar(c);
|
||||
do {
|
||||
while (c != '*') {
|
||||
if (class(c) == STNL)
|
||||
++LineNumber;
|
||||
else
|
||||
if (c == EOI) {
|
||||
NoUnstack--;
|
||||
return;
|
||||
}
|
||||
LoadChar(c);
|
||||
}
|
||||
/* Last Character seen was '*' */
|
||||
LoadChar(c);
|
||||
} while (c != '/');
|
||||
NoUnstack--;
|
||||
}
|
||||
|
||||
char *
|
||||
string_token(nm, stop_char)
|
||||
char *nm;
|
||||
{
|
||||
register int ch;
|
||||
register int str_size;
|
||||
register char *str = Malloc(str_size = ISTRSIZE);
|
||||
register int pos = 0;
|
||||
|
||||
LoadChar(ch);
|
||||
while (ch != stop_char) {
|
||||
if (ch == '\n') {
|
||||
lexerror("newline in %s", nm);
|
||||
LineNumber++;
|
||||
break;
|
||||
}
|
||||
if (ch == EOI) {
|
||||
lexerror("end-of-file inside %s", nm);
|
||||
break;
|
||||
}
|
||||
if (ch == '\\') {
|
||||
register int nch;
|
||||
|
||||
LoadChar(nch);
|
||||
if (nch == '\n') {
|
||||
LineNumber++;
|
||||
LoadChar(ch);
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
str[pos++] = '\\';
|
||||
if (pos == str_size)
|
||||
str = Srealloc(str, str_size += RSTRSIZE);
|
||||
ch = nch;
|
||||
}
|
||||
}
|
||||
str[pos++] = ch;
|
||||
if (pos == str_size)
|
||||
str = Srealloc(str, str_size += RSTRSIZE);
|
||||
LoadChar(ch);
|
||||
}
|
||||
str[pos++] = '\0';
|
||||
return str;
|
||||
}
|
||||
|
||||
int
|
||||
quoted(ch)
|
||||
register int ch;
|
||||
{
|
||||
/* quoted() replaces an escaped character sequence by the
|
||||
character meant.
|
||||
*/
|
||||
/* first char after backslash already in ch */
|
||||
if (!is_oct(ch)) { /* a quoted char */
|
||||
switch (ch) {
|
||||
case 'n':
|
||||
ch = '\n';
|
||||
break;
|
||||
case 't':
|
||||
ch = '\t';
|
||||
break;
|
||||
case 'b':
|
||||
ch = '\b';
|
||||
break;
|
||||
case 'r':
|
||||
ch = '\r';
|
||||
break;
|
||||
case 'f':
|
||||
ch = '\f';
|
||||
break;
|
||||
}
|
||||
}
|
||||
else { /* a quoted octal */
|
||||
register int oct = 0, cnt = 0;
|
||||
|
||||
do {
|
||||
oct = oct*8 + (ch-'0');
|
||||
LoadChar(ch);
|
||||
} while (is_oct(ch) && ++cnt < 3);
|
||||
PushBack();
|
||||
ch = oct;
|
||||
}
|
||||
return ch&0377;
|
||||
}
|
||||
|
||||
/* provisional */
|
||||
int
|
||||
val_in_base(ch, base)
|
||||
register int ch;
|
||||
{
|
||||
return
|
||||
is_dig(ch) ? ch - '0' :
|
||||
base != 16 ? -1 :
|
||||
is_hex(ch) ? (ch - 'a' + 10) & 017 :
|
||||
-1;
|
||||
}
|
||||
Reference in New Issue
Block a user