blob: 59eef684775d7c26e75fcddf1dbcf98c88de6f84 [file] [log] [blame] [edit]
/* ANTLRParser.C
*
* SOFTWARE RIGHTS
*
* We reserve no LEGAL rights to the Purdue Compiler Construction Tool
* Set (PCCTS) -- PCCTS is in the public domain. An individual or
* company may do whatever they wish with source code distributed with
* PCCTS or the code generated by PCCTS, including the incorporation of
* PCCTS, or its output, into commerical software.
*
* We encourage users to develop software with PCCTS. However, we do ask
* that credit is given to us for developing PCCTS. By "credit",
* we mean that if you incorporate our source code into one of your
* programs (commercial product, research project, or otherwise) that you
* acknowledge this fact somewhere in the documentation, research report,
* etc... If you like PCCTS and have developed a nice tool with the
* output, please mention that you developed it using PCCTS. In
* addition, we ask that this header remain intact in our source code.
* As long as these guidelines are kept, we expect to continue enhancing
* this system and expect to make other tools available as they are
* completed.
*
* ANTLR 1.33
* Terence Parr
* Parr Research Corporation
* with Purdue University and AHPCRC, University of Minnesota
* 1989-1995
*/
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <stdio.h>
/* I have to put this here due to C++ limitation
* that you can't have a 'forward' decl for enums.
* I hate C++!!!!!!!!!!!!!!!
* Of course, if I could use real templates, this would go away.
*/
// MR1
// MR1 10-Apr-97 133MR1 Prevent use of varying sizes for the
// MR1 ANTLRTokenType enum
// MR1
enum ANTLRTokenType { TER_HATES_CPP=0, ITS_TOO_COMPLICATED=9999}; // MR1
#define ANTLR_SUPPORT_CODE
#include "config.h"
#include ATOKEN_H
#include ATOKENBUFFER_H
#include APARSER_H
static const int zzINF_DEF_TOKEN_BUFFER_SIZE = 2000;
static const int zzINF_BUFFER_TOKEN_CHUNK_SIZE = 1000;
/* L o o k a h e a d M a c r o s */
/* maximum of 32 bits/unsigned int and must be 8 bits/byte;
* we only use 8 bits of it.
*/
SetWordType ANTLRParser::bitmask[sizeof(SetWordType)*8] = {
0x00000001, 0x00000002, 0x00000004, 0x00000008,
0x00000010, 0x00000020, 0x00000040, 0x00000080
};
char ANTLRParser::eMsgBuffer[500] = "";
ANTLRParser::
~ANTLRParser()
{
delete [] token_type;
}
ANTLRParser::
ANTLRParser(ANTLRTokenBuffer *_inputTokens,
int k,
int use_inf_look,
int dlook,
int ssize)
{
LLk = k;
can_use_inf_look = use_inf_look;
demand_look = dlook;
bsetsize = ssize;
guessing = 0;
token_tbl = NULL;
eofToken = (ANTLRTokenType)1;
// allocate lookahead buffer
token_type = new ANTLRTokenType[LLk];
lap = 0;
labase = 0;
dirty = 0;
inf_labase = 0; // MR7
inf_last = 0; // MR7
/* prime lookahead buffer, point to inputTokens */
this->inputTokens = _inputTokens;
this->inputTokens->setMinTokens(k);
_inputTokens->setParser(this); // MR1
}
void ANTLRParser::init()
{
prime_lookahead();
}
int ANTLRParser::
guess(ANTLRParserState *st)
{
saveState(st);
guessing = 1;
return setjmp(guess_start.state);
}
void ANTLRParser::
saveState(ANTLRParserState *buf)
{
buf->guess_start = guess_start;
buf->guessing = guessing;
buf->inf_labase = inf_labase;
buf->inf_last = inf_last;
buf->dirty = dirty;
}
void ANTLRParser::
restoreState(ANTLRParserState *buf)
{
int i;
guess_start = buf->guess_start;
guessing = buf->guessing;
inf_labase = buf->inf_labase;
inf_last = buf->inf_last;
dirty = buf->dirty;
// restore lookahead buffer from k tokens before restored TokenBuffer position
// if demand_look, then I guess we don't look backwards for these tokens.
for (i=1; i<=LLk; i++) token_type[i-1] =
inputTokens->bufferedToken(i-LLk)->getType();
lap = 0;
labase = 0;
}
/* Get the next symbol from the input stream; put it into lookahead buffer;
* fill token_type[] fast reference cache also. NLA is the next place where
* a lookahead ANTLRAbstractToken should go.
*/
void ANTLRParser::
consume()
{
NLA = inputTokens->getToken()->getType();
dirty--;
lap = (lap+1)&(LLk-1);
}
_ANTLRTokenPtr ANTLRParser::
LT(int i)
{
#ifdef DEBUG_TOKENBUFFER
if ( i >= inputTokens->bufferSize() || inputTokens->minTokens() <= LLk )
{
static char buf[2000];
sprintf(buf, "The minimum number of tokens you requested that the\nANTLRTokenBuffer buffer is not enough to satisfy your\nLT(%d) request; increase 'k' argument to constructor for ANTLRTokenBuffer\n", i);
panic(buf);
}
#endif
return inputTokens->bufferedToken(i-LLk);
}
void
ANTLRParser::
look(int k)
{
int i, c = k - (LLk-dirty);
for (i=1; i<=c; i++) consume();
}
/* fill the lookahead buffer up with k symbols (even if DEMAND_LOOK);
*/
void
ANTLRParser::
prime_lookahead()
{
int i;
for(i=1;i<=LLk; i++) consume();
dirty=0;
lap = 0;
labase = 0;
}
/* check to see if the current input symbol matches '_t'.
* During NON demand lookahead mode, dirty will always be 0 and
* hence the extra code for consuming tokens in _match is never
* executed; the same routine can be used for both modes.
*/
int ANTLRParser::
_match(ANTLRTokenType _t, ANTLRChar **MissText,
ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok,
SetWordType **MissSet)
{
if ( dirty==LLk ) {
consume();
}
if ( LA(1)!=_t ) {
*MissText=NULL;
*MissTok= _t; *BadTok = LT(1);
*MissSet=NULL;
return 0;
}
dirty++;
labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
return 1;
}
/* check to see if the current input symbol matches '_t'.
* Used during exception handling.
*/
int ANTLRParser::
_match_wsig(ANTLRTokenType _t)
{
if ( dirty==LLk ) {
consume();
}
if ( LA(1)!=_t ) return 0;
dirty++;
labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
return 1;
}
/* check to see if the current input symbol matches any token in a set.
* During NON demand lookahead mode, dirty will always be 0 and
* hence the extra code for consuming tokens in _match is never
* executed; the same routine can be used for both modes.
*/
int ANTLRParser::
_setmatch(SetWordType *tset, ANTLRChar **MissText,
ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok,
SetWordType **MissSet)
{
if ( dirty==LLk ) {
consume();
}
if ( !set_el(LA(1), tset) ) {
*MissText=NULL;
*MissTok= (ANTLRTokenType)0; *BadTok=LT(1);
*MissSet=tset;
return 0;
}
dirty++;
labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
return 1;
}
int ANTLRParser::
_setmatch_wsig(SetWordType *tset)
{
if ( dirty==LLk ) {
consume();
}
if ( !set_el(LA(1), tset) ) return 0;
dirty++;
labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
return 1;
}
/* Exception handling routines */
//
// 7-Apr-97 133MR1
// Change suggested by Eli Sternheim (eli@interhdl.com)
//
void ANTLRParser::
consumeUntil(SetWordType *st)
{
ANTLRTokenType tmp; // MR1
const int Eof=1; // MR1
while ( !set_el( (tmp=LA(1)), st) && tmp!=Eof) { consume(); } // MR1
}
//
// 7-Apr-97 133MR1
// Change suggested by Eli Sternheim (eli@interhdl.com)
//
void ANTLRParser::
consumeUntilToken(int t)
{
int tmp; // MR1
const int Eof=1; // MR1
while ( (tmp=LA(1)) !=t && tmp!=Eof) { consume(); } // MR1
}
/* Old error stuff */
void ANTLRParser::
resynch(SetWordType *wd,SetWordType mask)
{
static int consumed = 1;
/* if you enter here without having consumed a token from last resynch
* force a token consumption.
*/
if ( !consumed ) {consume(); consumed=1; return;}
/* if current token is in resynch set, we've got what we wanted */
if ( wd[LA(1)]&mask || LA(1) == eofToken ) {consumed=0; return;}
/* scan until we find something in the resynch set */
while ( !(wd[LA(1)]&mask) && LA(1) != eofToken ) {consume();}
consumed=1;
}
/* standard error reporting function that assumes DLG-based scanners;
* you should redefine in subclass to change it or if you use your
* own scanner.
*/
void ANTLRParser::
syn(_ANTLRTokenPtr tok, ANTLRChar *egroup, SetWordType *eset,
ANTLRTokenType etok, int k)
{
int line;
line = LT(1)->getLine();
fprintf(stderr, "line %d: syntax error at \"%s\"",
line, LT(1)->getText());
if ( !etok && !eset ) {fprintf(stderr, "\n"); return;}
if ( k==1 ) fprintf(stderr, " missing");
else
{
fprintf(stderr, "; \"%s\" not", LT(1)->getText());
if ( set_deg(eset)>1 ) fprintf(stderr, " in");
}
if ( set_deg(eset)>0 ) edecode(eset);
else fprintf(stderr, " %s", token_tbl[etok]);
if ( strlen(egroup) > 0 ) fprintf(stderr, " in %s", egroup);
fprintf(stderr, "\n");
}
/* is b an element of set p? */
int ANTLRParser::
set_el(ANTLRTokenType b, SetWordType *p)
{
return( p[DIVWORD(b)] & bitmask[MODWORD(b)] );
}
int ANTLRParser::
set_deg(SetWordType *a)
{
/* Fast compute degree of a set... the number
of elements present in the set. Assumes
that all word bits are used in the set
*/
register SetWordType *p = a;
register SetWordType *endp = &(a[bsetsize]);
register int degree = 0;
if ( a == NULL ) return 0;
while ( p < endp )
{
register SetWordType t = *p;
register SetWordType *b = &(bitmask[0]);
do {
if (t & *b) ++degree;
} while (++b < &(bitmask[sizeof(SetWordType)*8]));
p++;
}
return(degree);
}
void ANTLRParser::
edecode(SetWordType *a)
{
register SetWordType *p = a;
register SetWordType *endp = &(p[bsetsize]);
register unsigned e = 0;
if ( set_deg(a)>1 ) fprintf(stderr, " {");
do {
register SetWordType t = *p;
register SetWordType *b = &(bitmask[0]);
do {
if ( t & *b ) fprintf(stderr, " %s", token_tbl[e]);
e++;
} while (++b < &(bitmask[sizeof(SetWordType)*8]));
} while (++p < endp);
if ( set_deg(a)>1 ) fprintf(stderr, " }");
}
/* input looks like:
* zzFAIL(k, e1, e2, ...,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk)
* where the zzMiss stuff is set here to the token that did not match
* (and which set wasn't it a member of).
*/
void
ANTLRParser::FAIL(int k, ...)
{
//
// MR1 10-Apr-97 Remove static allocation of variable text
//
char *text=new char [1000]; // MR1
SetWordType **f=new SetWordType *[20]; // MR1
SetWordType **miss_set;
ANTLRChar **miss_text;
_ANTLRTokenPtr *bad_tok;
ANTLRChar **bad_text;
//
// 7-Apr-97 133MR1
// err_k is passed as a "int *", not "unsigned *"
//
int *err_k; // MR1
int i;
va_list ap;
va_start(ap, k);
text[0] = '\0';
if ( k>20 ) panic("FAIL: overflowed buffer");
for (i=1; i<=k; i++) /* collect all lookahead sets */
{
f[i-1] = va_arg(ap, SetWordType *);
}
for (i=1; i<=k; i++) /* look for offending token */
{
if ( i>1 ) strcat(text, " ");
// 01/26/04 - tomf TR57959 (unrelated) Protect against crash
// in weird parser state.
if ( NULL != LT(i)->getText() ) {
strcat(text, LT(i)->getText());
}
if ( !set_el(LA(i), f[i-1]) ) break;
}
miss_set = va_arg(ap, SetWordType **);
miss_text = va_arg(ap, ANTLRChar **);
bad_tok = va_arg(ap, _ANTLRTokenPtr *);
bad_text = va_arg(ap, ANTLRChar **);
err_k = va_arg(ap, int *); // MR1
if ( i>k )
{
/* bad; lookahead is permutation that cannot be matched,
* but, the ith token of lookahead is valid at the ith position
* (The old LL sub 1 (k) versus LL(k) parsing technique)
*/
*miss_set = NULL;
*miss_text = LT(1)->getText();
*bad_tok = LT(1);
*bad_text = (*bad_tok)->getText();
*err_k = k;
//
// MR4 20-May-97 erroneously deleted contents of f[]
// MR4 reported by Bruce Guenter (bruceg@qcc.sk.ca)
// MR1 10-Apr-97 release temporary storage
//
delete [] text; // MR1
delete [] f; // MR1
return; // MR1
}
/* fprintf(stderr, "%s not in %dth set\n", zztokens[LA(i)], i);*/
*miss_set = f[i-1];
*miss_text = text;
*bad_tok = LT(i);
*bad_text = (*bad_tok)->getText();
if ( i==1 ) *err_k = 1;
else *err_k = k;
//
// MR4 20-May-97 erroneously deleted contents of f[]
// MR4 reported by Bruce Guenter (bruceg@qcc.sk.ca)
// MR1 10-Apr-97 release temporary storage
//
delete [] text; // MR1
delete [] f; // MR1
return; // MR1
}
int ANTLRParser::
_match_wdfltsig(ANTLRTokenType tokenWanted, SetWordType *whatFollows)
{
if ( dirty==LLk ) consume();
if ( LA(1)!=tokenWanted )
{
fprintf(stderr,
"line %d: syntax error at \"%s\" missing %s\n",
LT(1)->getLine(),
(LA(1)==eofToken)?"<eof>":LT(1)->getText(),
token_tbl[tokenWanted]);
consumeUntil( whatFollows );
return 0;
}
else {
dirty++;
labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
/* if ( !demand_look ) consume(); */
return 1;
}
}
int ANTLRParser::
_setmatch_wdfltsig(SetWordType *tokensWanted,
ANTLRTokenType tokenTypeOfSet,
SetWordType *whatFollows)
{
if ( dirty==LLk ) consume();
if ( !set_el(LA(1), tokensWanted) )
{
fprintf(stderr,
"line %d: syntax error at \"%s\" missing %s\n",
LT(1)->getLine(),
(LA(1)==eofToken)?"<eof>":LT(1)->getText(),
token_tbl[tokenTypeOfSet]);
consumeUntil( whatFollows );
return 0;
}
else {
dirty++;
labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
/* if ( !demand_look ) consume(); */
return 1;
}
}
char *ANTLRParser::
eMsgd(char *err,int d)
{
sprintf(eMsgBuffer, err, d); // dangerous, but I don't care
return eMsgBuffer;
}
char *ANTLRParser::
eMsg(char *err, char *s)
{
sprintf(eMsgBuffer, err, s);
return eMsgBuffer;
}
char *ANTLRParser::
eMsg2(char *err,char *s, char *t)
{
sprintf(eMsgBuffer, err, s, t);
return eMsgBuffer;
}
void ANTLRParser::
panic(char *msg)
{
fprintf(stderr, "ANTLR panic: %s\n", msg);
exit(PCCTS_EXIT_FAILURE); // MR1
}
const ANTLRChar *ANTLRParser:: // MR1
parserTokenName(int tok) { // MR1
return token_tbl[tok]; // MR1
} // MR1