| /* ANTLRParser.C |
| * |
| * SOFTWARE RIGHTS |
| * |
| * We reserve no LEGAL rights to the Purdue Compiler Construction Tool |
| * Set (PCCTS) -- PCCTS is in the public domain. An individual or |
| * company may do whatever they wish with source code distributed with |
| * PCCTS or the code generated by PCCTS, including the incorporation of |
| * PCCTS, or its output, into commerical software. |
| * |
| * We encourage users to develop software with PCCTS. However, we do ask |
| * that credit is given to us for developing PCCTS. By "credit", |
| * we mean that if you incorporate our source code into one of your |
| * programs (commercial product, research project, or otherwise) that you |
| * acknowledge this fact somewhere in the documentation, research report, |
| * etc... If you like PCCTS and have developed a nice tool with the |
| * output, please mention that you developed it using PCCTS. In |
| * addition, we ask that this header remain intact in our source code. |
| * As long as these guidelines are kept, we expect to continue enhancing |
| * this system and expect to make other tools available as they are |
| * completed. |
| * |
| * ANTLR 1.33 |
| * Terence Parr |
| * Parr Research Corporation |
| * with Purdue University and AHPCRC, University of Minnesota |
| * 1989-1995 |
| */ |
| #include <stdlib.h> |
| #include <stdarg.h> |
| #include <string.h> |
| #include <stdio.h> |
| |
| /* I have to put this here due to C++ limitation |
| * that you can't have a 'forward' decl for enums. |
| * I hate C++!!!!!!!!!!!!!!! |
| * Of course, if I could use real templates, this would go away. |
| */ |
| // MR1 |
| // MR1 10-Apr-97 133MR1 Prevent use of varying sizes for the |
| // MR1 ANTLRTokenType enum |
| // MR1 |
| |
| enum ANTLRTokenType { TER_HATES_CPP=0, ITS_TOO_COMPLICATED=9999}; // MR1 |
| |
| #define ANTLR_SUPPORT_CODE |
| |
| #include "config.h" |
| #include ATOKEN_H |
| |
| #include ATOKENBUFFER_H |
| #include APARSER_H |
| |
| static const int zzINF_DEF_TOKEN_BUFFER_SIZE = 2000; |
| static const int zzINF_BUFFER_TOKEN_CHUNK_SIZE = 1000; |
| |
| /* L o o k a h e a d M a c r o s */ |
| |
| /* maximum of 32 bits/unsigned int and must be 8 bits/byte; |
| * we only use 8 bits of it. |
| */ |
| SetWordType ANTLRParser::bitmask[sizeof(SetWordType)*8] = { |
| 0x00000001, 0x00000002, 0x00000004, 0x00000008, |
| 0x00000010, 0x00000020, 0x00000040, 0x00000080 |
| }; |
| |
| char ANTLRParser::eMsgBuffer[500] = ""; |
| |
| ANTLRParser:: |
| ~ANTLRParser() |
| { |
| delete [] token_type; |
| } |
| |
| ANTLRParser:: |
| ANTLRParser(ANTLRTokenBuffer *_inputTokens, |
| int k, |
| int use_inf_look, |
| int dlook, |
| int ssize) |
| { |
| LLk = k; |
| can_use_inf_look = use_inf_look; |
| demand_look = dlook; |
| bsetsize = ssize; |
| |
| guessing = 0; |
| token_tbl = NULL; |
| eofToken = (ANTLRTokenType)1; |
| |
| // allocate lookahead buffer |
| token_type = new ANTLRTokenType[LLk]; |
| lap = 0; |
| labase = 0; |
| dirty = 0; |
| inf_labase = 0; // MR7 |
| inf_last = 0; // MR7 |
| /* prime lookahead buffer, point to inputTokens */ |
| this->inputTokens = _inputTokens; |
| this->inputTokens->setMinTokens(k); |
| _inputTokens->setParser(this); // MR1 |
| } |
| |
| void ANTLRParser::init() |
| { |
| prime_lookahead(); |
| } |
| |
| int ANTLRParser:: |
| guess(ANTLRParserState *st) |
| { |
| saveState(st); |
| guessing = 1; |
| return setjmp(guess_start.state); |
| } |
| |
| void ANTLRParser:: |
| saveState(ANTLRParserState *buf) |
| { |
| buf->guess_start = guess_start; |
| buf->guessing = guessing; |
| buf->inf_labase = inf_labase; |
| buf->inf_last = inf_last; |
| buf->dirty = dirty; |
| } |
| |
| void ANTLRParser:: |
| restoreState(ANTLRParserState *buf) |
| { |
| int i; |
| |
| guess_start = buf->guess_start; |
| guessing = buf->guessing; |
| inf_labase = buf->inf_labase; |
| inf_last = buf->inf_last; |
| dirty = buf->dirty; |
| |
| // restore lookahead buffer from k tokens before restored TokenBuffer position |
| // if demand_look, then I guess we don't look backwards for these tokens. |
| for (i=1; i<=LLk; i++) token_type[i-1] = |
| inputTokens->bufferedToken(i-LLk)->getType(); |
| lap = 0; |
| labase = 0; |
| } |
| |
| /* Get the next symbol from the input stream; put it into lookahead buffer; |
| * fill token_type[] fast reference cache also. NLA is the next place where |
| * a lookahead ANTLRAbstractToken should go. |
| */ |
| void ANTLRParser:: |
| consume() |
| { |
| NLA = inputTokens->getToken()->getType(); |
| dirty--; |
| lap = (lap+1)&(LLk-1); |
| } |
| |
| _ANTLRTokenPtr ANTLRParser:: |
| LT(int i) |
| { |
| #ifdef DEBUG_TOKENBUFFER |
| if ( i >= inputTokens->bufferSize() || inputTokens->minTokens() <= LLk ) |
| { |
| static char buf[2000]; |
| sprintf(buf, "The minimum number of tokens you requested that the\nANTLRTokenBuffer buffer is not enough to satisfy your\nLT(%d) request; increase 'k' argument to constructor for ANTLRTokenBuffer\n", i); |
| panic(buf); |
| } |
| #endif |
| return inputTokens->bufferedToken(i-LLk); |
| } |
| |
| void |
| ANTLRParser:: |
| look(int k) |
| { |
| int i, c = k - (LLk-dirty); |
| for (i=1; i<=c; i++) consume(); |
| } |
| |
| /* fill the lookahead buffer up with k symbols (even if DEMAND_LOOK); |
| */ |
| void |
| ANTLRParser:: |
| prime_lookahead() |
| { |
| int i; |
| for(i=1;i<=LLk; i++) consume(); |
| dirty=0; |
| lap = 0; |
| labase = 0; |
| } |
| |
| /* check to see if the current input symbol matches '_t'. |
| * During NON demand lookahead mode, dirty will always be 0 and |
| * hence the extra code for consuming tokens in _match is never |
| * executed; the same routine can be used for both modes. |
| */ |
| int ANTLRParser:: |
| _match(ANTLRTokenType _t, ANTLRChar **MissText, |
| ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok, |
| SetWordType **MissSet) |
| { |
| if ( dirty==LLk ) { |
| consume(); |
| } |
| if ( LA(1)!=_t ) { |
| *MissText=NULL; |
| *MissTok= _t; *BadTok = LT(1); |
| *MissSet=NULL; |
| return 0; |
| } |
| dirty++; |
| labase = (labase+1)&(LLk-1); // labase maintained even if !demand look |
| return 1; |
| } |
| |
| /* check to see if the current input symbol matches '_t'. |
| * Used during exception handling. |
| */ |
| int ANTLRParser:: |
| _match_wsig(ANTLRTokenType _t) |
| { |
| if ( dirty==LLk ) { |
| consume(); |
| } |
| if ( LA(1)!=_t ) return 0; |
| dirty++; |
| labase = (labase+1)&(LLk-1); // labase maintained even if !demand look |
| return 1; |
| } |
| |
| /* check to see if the current input symbol matches any token in a set. |
| * During NON demand lookahead mode, dirty will always be 0 and |
| * hence the extra code for consuming tokens in _match is never |
| * executed; the same routine can be used for both modes. |
| */ |
| int ANTLRParser:: |
| _setmatch(SetWordType *tset, ANTLRChar **MissText, |
| ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok, |
| SetWordType **MissSet) |
| { |
| if ( dirty==LLk ) { |
| consume(); |
| } |
| if ( !set_el(LA(1), tset) ) { |
| *MissText=NULL; |
| *MissTok= (ANTLRTokenType)0; *BadTok=LT(1); |
| *MissSet=tset; |
| return 0; |
| } |
| dirty++; |
| labase = (labase+1)&(LLk-1); // labase maintained even if !demand look |
| return 1; |
| } |
| |
| int ANTLRParser:: |
| _setmatch_wsig(SetWordType *tset) |
| { |
| if ( dirty==LLk ) { |
| consume(); |
| } |
| if ( !set_el(LA(1), tset) ) return 0; |
| dirty++; |
| labase = (labase+1)&(LLk-1); // labase maintained even if !demand look |
| return 1; |
| } |
| |
| /* Exception handling routines */ |
| // |
| // 7-Apr-97 133MR1 |
| // Change suggested by Eli Sternheim (eli@interhdl.com) |
| // |
| void ANTLRParser:: |
| consumeUntil(SetWordType *st) |
| { |
| ANTLRTokenType tmp; // MR1 |
| const int Eof=1; // MR1 |
| while ( !set_el( (tmp=LA(1)), st) && tmp!=Eof) { consume(); } // MR1 |
| } |
| |
| // |
| // 7-Apr-97 133MR1 |
| // Change suggested by Eli Sternheim (eli@interhdl.com) |
| // |
| void ANTLRParser:: |
| consumeUntilToken(int t) |
| { |
| int tmp; // MR1 |
| const int Eof=1; // MR1 |
| while ( (tmp=LA(1)) !=t && tmp!=Eof) { consume(); } // MR1 |
| } |
| |
| |
| /* Old error stuff */ |
| |
| void ANTLRParser:: |
| resynch(SetWordType *wd,SetWordType mask) |
| { |
| static int consumed = 1; |
| |
| /* if you enter here without having consumed a token from last resynch |
| * force a token consumption. |
| */ |
| if ( !consumed ) {consume(); consumed=1; return;} |
| |
| /* if current token is in resynch set, we've got what we wanted */ |
| if ( wd[LA(1)]&mask || LA(1) == eofToken ) {consumed=0; return;} |
| |
| /* scan until we find something in the resynch set */ |
| while ( !(wd[LA(1)]&mask) && LA(1) != eofToken ) {consume();} |
| consumed=1; |
| } |
| |
| /* standard error reporting function that assumes DLG-based scanners; |
| * you should redefine in subclass to change it or if you use your |
| * own scanner. |
| */ |
| void ANTLRParser:: |
| syn(_ANTLRTokenPtr tok, ANTLRChar *egroup, SetWordType *eset, |
| ANTLRTokenType etok, int k) |
| { |
| int line; |
| |
| line = LT(1)->getLine(); |
| |
| fprintf(stderr, "line %d: syntax error at \"%s\"", |
| line, LT(1)->getText()); |
| if ( !etok && !eset ) {fprintf(stderr, "\n"); return;} |
| if ( k==1 ) fprintf(stderr, " missing"); |
| else |
| { |
| fprintf(stderr, "; \"%s\" not", LT(1)->getText()); |
| if ( set_deg(eset)>1 ) fprintf(stderr, " in"); |
| } |
| if ( set_deg(eset)>0 ) edecode(eset); |
| else fprintf(stderr, " %s", token_tbl[etok]); |
| if ( strlen(egroup) > 0 ) fprintf(stderr, " in %s", egroup); |
| fprintf(stderr, "\n"); |
| } |
| |
| /* is b an element of set p? */ |
| int ANTLRParser:: |
| set_el(ANTLRTokenType b, SetWordType *p) |
| { |
| return( p[DIVWORD(b)] & bitmask[MODWORD(b)] ); |
| } |
| |
| int ANTLRParser:: |
| set_deg(SetWordType *a) |
| { |
| /* Fast compute degree of a set... the number |
| of elements present in the set. Assumes |
| that all word bits are used in the set |
| */ |
| register SetWordType *p = a; |
| register SetWordType *endp = &(a[bsetsize]); |
| register int degree = 0; |
| |
| if ( a == NULL ) return 0; |
| while ( p < endp ) |
| { |
| register SetWordType t = *p; |
| register SetWordType *b = &(bitmask[0]); |
| do { |
| if (t & *b) ++degree; |
| } while (++b < &(bitmask[sizeof(SetWordType)*8])); |
| p++; |
| } |
| |
| return(degree); |
| } |
| |
| void ANTLRParser:: |
| edecode(SetWordType *a) |
| { |
| register SetWordType *p = a; |
| register SetWordType *endp = &(p[bsetsize]); |
| register unsigned e = 0; |
| |
| if ( set_deg(a)>1 ) fprintf(stderr, " {"); |
| do { |
| register SetWordType t = *p; |
| register SetWordType *b = &(bitmask[0]); |
| do { |
| if ( t & *b ) fprintf(stderr, " %s", token_tbl[e]); |
| e++; |
| } while (++b < &(bitmask[sizeof(SetWordType)*8])); |
| } while (++p < endp); |
| if ( set_deg(a)>1 ) fprintf(stderr, " }"); |
| } |
| |
| /* input looks like: |
| * zzFAIL(k, e1, e2, ...,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk) |
| * where the zzMiss stuff is set here to the token that did not match |
| * (and which set wasn't it a member of). |
| */ |
| void |
| ANTLRParser::FAIL(int k, ...) |
| { |
| // |
| // MR1 10-Apr-97 Remove static allocation of variable text |
| // |
| |
| char *text=new char [1000]; // MR1 |
| SetWordType **f=new SetWordType *[20]; // MR1 |
| SetWordType **miss_set; |
| ANTLRChar **miss_text; |
| _ANTLRTokenPtr *bad_tok; |
| ANTLRChar **bad_text; |
| // |
| // 7-Apr-97 133MR1 |
| // err_k is passed as a "int *", not "unsigned *" |
| // |
| int *err_k; // MR1 |
| int i; |
| va_list ap; |
| |
| va_start(ap, k); |
| |
| text[0] = '\0'; |
| if ( k>20 ) panic("FAIL: overflowed buffer"); |
| for (i=1; i<=k; i++) /* collect all lookahead sets */ |
| { |
| f[i-1] = va_arg(ap, SetWordType *); |
| } |
| for (i=1; i<=k; i++) /* look for offending token */ |
| { |
| if ( i>1 ) strcat(text, " "); |
| |
| // 01/26/04 - tomf TR57959 (unrelated) Protect against crash |
| // in weird parser state. |
| |
| if ( NULL != LT(i)->getText() ) { |
| strcat(text, LT(i)->getText()); |
| } |
| if ( !set_el(LA(i), f[i-1]) ) break; |
| } |
| miss_set = va_arg(ap, SetWordType **); |
| miss_text = va_arg(ap, ANTLRChar **); |
| bad_tok = va_arg(ap, _ANTLRTokenPtr *); |
| bad_text = va_arg(ap, ANTLRChar **); |
| err_k = va_arg(ap, int *); // MR1 |
| if ( i>k ) |
| { |
| /* bad; lookahead is permutation that cannot be matched, |
| * but, the ith token of lookahead is valid at the ith position |
| * (The old LL sub 1 (k) versus LL(k) parsing technique) |
| */ |
| *miss_set = NULL; |
| *miss_text = LT(1)->getText(); |
| *bad_tok = LT(1); |
| *bad_text = (*bad_tok)->getText(); |
| *err_k = k; |
| // |
| // MR4 20-May-97 erroneously deleted contents of f[] |
| // MR4 reported by Bruce Guenter (bruceg@qcc.sk.ca) |
| // MR1 10-Apr-97 release temporary storage |
| // |
| delete [] text; // MR1 |
| delete [] f; // MR1 |
| return; // MR1 |
| } |
| /* fprintf(stderr, "%s not in %dth set\n", zztokens[LA(i)], i);*/ |
| *miss_set = f[i-1]; |
| *miss_text = text; |
| *bad_tok = LT(i); |
| *bad_text = (*bad_tok)->getText(); |
| if ( i==1 ) *err_k = 1; |
| else *err_k = k; |
| // |
| // MR4 20-May-97 erroneously deleted contents of f[] |
| // MR4 reported by Bruce Guenter (bruceg@qcc.sk.ca) |
| // MR1 10-Apr-97 release temporary storage |
| // |
| delete [] text; // MR1 |
| delete [] f; // MR1 |
| return; // MR1 |
| } |
| |
| int ANTLRParser:: |
| _match_wdfltsig(ANTLRTokenType tokenWanted, SetWordType *whatFollows) |
| { |
| if ( dirty==LLk ) consume(); |
| |
| if ( LA(1)!=tokenWanted ) |
| { |
| fprintf(stderr, |
| "line %d: syntax error at \"%s\" missing %s\n", |
| LT(1)->getLine(), |
| (LA(1)==eofToken)?"<eof>":LT(1)->getText(), |
| token_tbl[tokenWanted]); |
| consumeUntil( whatFollows ); |
| return 0; |
| } |
| else { |
| dirty++; |
| labase = (labase+1)&(LLk-1); // labase maintained even if !demand look |
| /* if ( !demand_look ) consume(); */ |
| return 1; |
| } |
| } |
| |
| |
| int ANTLRParser:: |
| _setmatch_wdfltsig(SetWordType *tokensWanted, |
| ANTLRTokenType tokenTypeOfSet, |
| SetWordType *whatFollows) |
| { |
| if ( dirty==LLk ) consume(); |
| if ( !set_el(LA(1), tokensWanted) ) |
| { |
| fprintf(stderr, |
| "line %d: syntax error at \"%s\" missing %s\n", |
| LT(1)->getLine(), |
| (LA(1)==eofToken)?"<eof>":LT(1)->getText(), |
| token_tbl[tokenTypeOfSet]); |
| consumeUntil( whatFollows ); |
| return 0; |
| } |
| else { |
| dirty++; |
| labase = (labase+1)&(LLk-1); // labase maintained even if !demand look |
| /* if ( !demand_look ) consume(); */ |
| return 1; |
| } |
| } |
| |
| char *ANTLRParser:: |
| eMsgd(char *err,int d) |
| { |
| sprintf(eMsgBuffer, err, d); // dangerous, but I don't care |
| return eMsgBuffer; |
| } |
| |
| char *ANTLRParser:: |
| eMsg(char *err, char *s) |
| { |
| sprintf(eMsgBuffer, err, s); |
| return eMsgBuffer; |
| } |
| |
| char *ANTLRParser:: |
| eMsg2(char *err,char *s, char *t) |
| { |
| sprintf(eMsgBuffer, err, s, t); |
| return eMsgBuffer; |
| } |
| |
| void ANTLRParser:: |
| panic(char *msg) |
| { |
| fprintf(stderr, "ANTLR panic: %s\n", msg); |
| exit(PCCTS_EXIT_FAILURE); // MR1 |
| } |
| |
| const ANTLRChar *ANTLRParser:: // MR1 |
| parserTokenName(int tok) { // MR1 |
| return token_tbl[tok]; // MR1 |
| } // MR1 |