diff --git a/CHANGES.txt b/CHANGES.txt new file mode 100644 index 000000000..e69de29bb diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 000000000..c267a4dd1 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,26 @@ +[The "BSD license"] +Copyright (c) 2011 Terence Parr +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/build.xml b/build.xml new file mode 100644 index 000000000..8f309edb7 --- /dev/null +++ b/build.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/runtime/Java/src/org/antlr/v4/runtime/ANTLRFileStream.java b/runtime/Java/src/org/antlr/v4/runtime/ANTLRFileStream.java new file mode 100644 index 000000000..077c686a2 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/ANTLRFileStream.java @@ -0,0 +1,81 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; + +/** This is a char buffer stream that is loaded from a file + * all at once when you construct the object. This looks very + * much like an ANTLReader or ANTLRInputStream, but it's a special case + * since we know the exact size of the object to load. We can avoid lots + * of data copying. + */ +public class ANTLRFileStream extends ANTLRStringStream { + protected String fileName; + + public ANTLRFileStream(String fileName) throws IOException { + this(fileName, null); + } + + public ANTLRFileStream(String fileName, String encoding) throws IOException { + this.fileName = fileName; + load(fileName, encoding); + } + + public void load(String fileName, String encoding) + throws IOException + { + if ( fileName==null ) { + return; + } + File f = new File(fileName); + int size = (int)f.length(); + InputStreamReader isr; + FileInputStream fis = new FileInputStream(fileName); + if ( encoding!=null ) { + isr = new InputStreamReader(fis, encoding); + } + else { + isr = new InputStreamReader(fis); + } + try { + data = new char[size]; + super.n = isr.read(data); + } + finally { + isr.close(); + } + } + + public String getSourceName() { + return fileName; + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/ANTLRParserListener.java b/runtime/Java/src/org/antlr/v4/runtime/ANTLRParserListener.java new file mode 100644 index 000000000..efbd9cea9 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/ANTLRParserListener.java @@ -0,0 +1,6 @@ +package org.antlr.v4.runtime; + +/** */ +public interface ANTLRParserListener { + public void error(RecognitionException msg); +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/ANTLRStringStream.java b/runtime/Java/src/org/antlr/v4/runtime/ANTLRStringStream.java new file mode 100644 index 000000000..388494a5d --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/ANTLRStringStream.java @@ -0,0 +1,230 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime; + +import java.util.ArrayList; +import java.util.List; + +/** A pretty quick CharStream that pulls all data from an array + * directly. Every method call counts in the lexer. Java's + * strings aren't very good so I'm avoiding. + */ +public class ANTLRStringStream implements CharStream { + /** The data being scanned */ + protected char[] data; + + /** How many characters are actually in the buffer */ + protected int n; + + /** 0..n-1 index into string of next char */ + protected int p=0; + + /** line number 1..n within the input */ + protected int line = 1; + + /** The index of the character relative to the beginning of the line 0..n-1 */ + protected int charPositionInLine = 0; + + /** tracks how deep mark() calls are nested */ + protected int markDepth = 0; + + /** A list of CharStreamState objects that tracks the stream state + * values line, charPositionInLine, and p that can change as you + * move through the input stream. Indexed from 1..markDepth. + * A null is kept @ index 0. Create upon first call to mark(). + */ + protected List markers; + + /** Track the last mark() call result value for use in rewind(). */ + protected int lastMarker; + + /** What is name or source of this char stream? */ + public String name; + + public ANTLRStringStream() { + } + + /** Copy data in string to a local char array */ + public ANTLRStringStream(String input) { + this(); + this.data = input.toCharArray(); + this.n = input.length(); + } + + /** This is the preferred constructor as no data is copied */ + public ANTLRStringStream(char[] data, int numberOfActualCharsInArray) { + this(); + this.data = data; + this.n = numberOfActualCharsInArray; + } + + /** Reset the stream so that it's in the same state it was + * when the object was created *except* the data array is not + * touched. + */ + public void reset() { + p = 0; + line = 1; + charPositionInLine = 0; + markDepth = 0; + } + + public void consume() { + //System.out.println("prev p="+p+", c="+(char)data[p]); + if ( p < n ) { + charPositionInLine++; + if ( data[p]=='\n' ) { + /* + System.out.println("newline char found on line: "+line+ + "@ pos="+charPositionInLine); + */ + line++; + charPositionInLine=0; + } + p++; + //System.out.println("p moves to "+p+" (c='"+(char)data[p]+"')"); + } + } + + public int LA(int i) { + if ( i==0 ) { + return 0; // undefined + } + if ( i<0 ) { + i++; // e.g., translate LA(-1) to use offset i=0; then data[p+0-1] + if ( (p+i-1) < 0 ) { + return CharStream.EOF; // invalid; no char before first char + } + } + + if ( (p+i-1) >= n ) { + //System.out.println("char LA("+i+")=EOF; p="+p); + return CharStream.EOF; + } + //System.out.println("char LA("+i+")="+(char)data[p+i-1]+"; p="+p); + //System.out.println("LA("+i+"); p="+p+" n="+n+" data.length="+data.length); + return data[p+i-1]; + } + + public int LT(int i) { + return LA(i); + } + + /** Return the current input symbol index 0..n where n indicates the + * last symbol has been read. The index is the index of char to + * be returned from LA(1). + */ + public int index() { + return p; + } + + public int size() { + return n; + } + + public int mark() { + if ( markers==null ) { + markers = new ArrayList(); + markers.add(null); // depth 0 means no backtracking, leave blank + } + markDepth++; + CharStreamState state = null; + if ( markDepth>=markers.size() ) { + state = new CharStreamState(); + markers.add(state); + } + else { + state = (CharStreamState)markers.get(markDepth); + } + state.p = p; + state.line = line; + state.charPositionInLine = charPositionInLine; + lastMarker = markDepth; + return markDepth; + } + + public void rewind(int m) { + CharStreamState state = (CharStreamState)markers.get(m); + // restore stream state + seek(state.p); + line = state.line; + charPositionInLine = state.charPositionInLine; + release(m); + } + + public void rewind() { + rewind(lastMarker); + } + + public void release(int marker) { + // unwind any other markers made after m and release m + markDepth = marker; + // release this marker + markDepth--; + } + + /** consume() ahead until p==index; can't just set p=index as we must + * update line and charPositionInLine. + */ + public void seek(int index) { + if ( index<=p ) { + p = index; // just jump; don't update stream state (line, ...) + return; + } + // seek forward, consume until p hits index + while ( p { + + public static final int MEMO_RULE_FAILED = -2; + public static final int MEMO_RULE_UNKNOWN = -1; + + public static final String NEXT_TOKEN_RULE_NAME = "nextToken"; + + public BaseRecognizer(TokenStream input) { + this(input, new ParserSharedState()); + } + + public BaseRecognizer(TokenStream input, ParserSharedState state) { + if ( state==null ) { + state = new ParserSharedState(); + } + this.state = state; + state.input = input; + } + + /** reset the parser's state; subclasses must rewinds the input stream */ + public void reset() { + state = new ParserSharedState(); + } + + /** Match current input symbol against ttype. Attempt + * single token insertion or deletion error recovery. If + * that fails, throw MismatchedTokenException. + * + * To turn off single token insertion or deletion error + * recovery, override recoverFromMismatchedToken() and have it + * throw an exception. See TreeParser.recoverFromMismatchedToken(). + * This way any error in a rule will cause an exception and + * immediate exit from rule. Rule would recover by resynchronizing + * to the set of symbols that can follow rule ref. + */ + public Object match(int ttype) throws RecognitionException { +// System.out.println("match "+((TokenStream)state.input).LT(1)+" vs expected "+ttype); + Object matchedSymbol = getCurrentInputSymbol(); + if ( state.input.LA(1)==ttype ) { + state.input.consume(); + state.errorRecovery = false; + state.failed = false; + return matchedSymbol; + } + System.out.println("MATCH failure at state "+state.ctx.s+ + ", ctx="+state.ctx.toString(this)); + IntervalSet expecting = _interp.atn.nextTokens(state.ctx); + System.out.println("could match "+expecting); + + matchedSymbol = recoverFromMismatchedToken(ttype, expecting); + System.out.println("rsync'd to "+matchedSymbol); + return matchedSymbol; + } + + // like matchSet but w/o consume; error checking routine. + public void sync(IntervalSet expecting) { + if ( expecting.member(state.input.LA(1)) ) return; + System.out.println("failed sync to "+expecting); + IntervalSet followSet = computeErrorRecoverySet(); + followSet.addAll(expecting); + NoViableAltException e = new NoViableAltException(this, state.ctx); + recoverFromMismatchedSet(e, followSet); + } + + /** Match the wildcard: in a symbol */ + public void matchAny() { + state.errorRecovery = false; + state.failed = false; + state.input.consume(); + } + + public boolean mismatchIsUnwantedToken(int ttype) { + return state.input.LA(2)==ttype; + } + + public boolean mismatchIsMissingToken(IntervalSet follow) { + return false; + /* + if ( follow==null ) { + // we have no information about the follow; we can only consume + // a single token and hope for the best + return false; + } + // compute what can follow this grammar element reference + if ( follow.member(Token.EOR_TOKEN_TYPE) ) { + IntervalSet viableTokensFollowingThisRule = computeNextViableTokenSet(); + follow = follow.or(viableTokensFollowingThisRule); + if ( state.ctx.sp>=0 ) { // remove EOR if we're not the start symbol + follow.remove(Token.EOR_TOKEN_TYPE); + } + } + // if current token is consistent with what could come after set + // then we know we're missing a token; error recovery is free to + // "insert" the missing token + + //System.out.println("viable tokens="+follow.toString(getTokenNames())); + //System.out.println("LT(1)="+((TokenStream)state.input).LT(1)); + + // IntervalSet cannot handle negative numbers like -1 (EOF) so I leave EOR + // in follow set to indicate that the fall of the start symbol is + // in the set (EOF can follow). + if ( follow.member(state.input.LA(1)) || follow.member(Token.EOR_TOKEN_TYPE) ) { + //System.out.println("LT(1)=="+((TokenStream)state.input).LT(1)+" is consistent with what follows; inserting..."); + return true; + } + return false; + */ + } + + /** Report a recognition problem. + * + * This method sets errorRecovery to indicate the parser is recovering + * not parsing. Once in recovery mode, no errors are generated. + * To get out of recovery mode, the parser must successfully match + * a token (after a resync). So it will go: + * + * 1. error occurs + * 2. enter recovery mode, report error + * 3. consume until token found in resynch set + * 4. try to resume parsing + * 5. next match() will reset errorRecovery mode + */ + public void reportError(RecognitionException e) { + // if we've already reported an error and have not matched a token + // yet successfully, don't report any errors. + if ( state.errorRecovery ) { + //System.err.print("[SPURIOUS] "); + return; + } + state.syntaxErrors++; // don't count spurious + state.errorRecovery = true; + + notifyListeners(e); + } + + + /** Get number of recognition errors (lexer, parser, tree parser). Each + * recognizer tracks its own number. So parser and lexer each have + * separate count. Does not count the spurious errors found between + * an error and next valid token match + * + * See also reportError() + */ + public int getNumberOfSyntaxErrors() { + return state.syntaxErrors; + } + + + /** Recover from an error found on the input stream. This is + * for NoViableAlt and mismatched symbol exceptions. If you enable + * single token insertion and deletion, this will usually not + * handle mismatched symbol exceptions but there could be a mismatched + * token that the match() routine could not recover from. + */ + public void recover() { + state.input.consume(); + /* + if ( state.lastErrorIndex==state.input.index() ) { + // uh oh, another error at same token index; must be a case + // where LT(1) is in the recovery token set so nothing is + // consumed; consume a single token so at least to prevent + // an infinite loop; this is a failsafe. + state.input.consume(); + } + state.lastErrorIndex = state.input.index(); + IntervalSet followSet = computeErrorRecoverySet(); + beginResync(); + consumeUntil(followSet); + endResync(); + */ + } + + /** A hook to listen in on the token consumption during error recovery. + * The DebugParser subclasses this to fire events to the listenter. + */ + public void beginResync() { + } + + public void endResync() { + } + + /* Compute the error recovery set for the current rule. During + * rule invocation, the parser pushes the set of tokens that can + * follow that rule reference on the stack; this amounts to + * computing FIRST of what follows the rule reference in the + * enclosing rule. See LinearApproximator.FIRST(). + * This local follow set only includes tokens + * from within the rule; i.e., the FIRST computation done by + * ANTLR stops at the end of a rule. + * + * EXAMPLE + * + * When you find a "no viable alt exception", the input is not + * consistent with any of the alternatives for rule r. The best + * thing to do is to consume tokens until you see something that + * can legally follow a call to r *or* any rule that called r. + * You don't want the exact set of viable next tokens because the + * input might just be missing a token--you might consume the + * rest of the input looking for one of the missing tokens. + * + * Consider grammar: + * + * a : '[' b ']' + * | '(' b ')' + * ; + * b : c '^' INT ; + * c : ID + * | INT + * ; + * + * At each rule invocation, the set of tokens that could follow + * that rule is pushed on a stack. Here are the various + * context-sensitive follow sets: + * + * FOLLOW(b1_in_a) = FIRST(']') = ']' + * FOLLOW(b2_in_a) = FIRST(')') = ')' + * FOLLOW(c_in_b) = FIRST('^') = '^' + * + * Upon erroneous input "[]", the call chain is + * + * a -> b -> c + * + * and, hence, the follow context stack is: + * + * depth follow set start of rule execution + * 0 a (from main()) + * 1 ']' b + * 2 '^' c + * + * Notice that ')' is not included, because b would have to have + * been called from a different context in rule a for ')' to be + * included. + * + * For error recovery, we cannot consider FOLLOW(c) + * (context-sensitive or otherwise). We need the combined set of + * all context-sensitive FOLLOW sets--the set of all tokens that + * could follow any reference in the call chain. We need to + * resync to one of those tokens. Note that FOLLOW(c)='^' and if + * we resync'd to that token, we'd consume until EOF. We need to + * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. + * In this case, for input "[]", LA(1) is ']' and in the set, so we would + * not consume anything. After printing an error, rule c would + * return normally. Rule b would not find the required '^' though. + * At this point, it gets a mismatched token error and throws an + * exception (since LA(1) is not in the viable following token + * set). The rule exception handler tries to recover, but finds + * the same recovery set and doesn't consume anything. Rule b + * exits normally returning to rule a. Now it finds the ']' (and + * with the successful match exits errorRecovery mode). + * + * So, you can see that the parser walks up the call chain looking + * for the token that was a member of the recovery set. + * + * Errors are not generated in errorRecovery mode. + * + * ANTLR's error recovery mechanism is based upon original ideas: + * + * "Algorithms + Data Structures = Programs" by Niklaus Wirth + * + * and + * + * "A note on error recovery in recursive descent parsers": + * http://portal.acm.org/citation.cfm?id=947902.947905 + * + * Later, Josef Grosch had some good ideas: + * + * "Efficient and Comfortable Error Recovery in Recursive Descent + * Parsers": + * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip + * + * Like Grosch I implement context-sensitive FOLLOW sets that are combined + * at run-time upon error to avoid overhead during parsing. + */ + protected IntervalSet computeErrorRecoverySet() { + return null; +// int top = state.ctx.sp; +// IntervalSet followSet = new IntervalSet(); +// for (int i=top; i>=0; i--) { // i==0 is EOF context for start rule invocation +// IntervalSet f = (IntervalSet)state.ctx.get(i).follow; +// followSet.orInPlace(f); +// } +// return followSet; + } + + /** Compute the context-sensitive FOLLOW set for current rule. + * This is set of token types that can follow a specific rule + * reference given a specific call chain. You get the set of + * viable tokens that can possibly come next (lookahead depth 1) + * given the current call chain. Contrast this with the + * definition of plain FOLLOW for rule r: + * + * FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} + * + * where x in T* and alpha, beta in V*; T is set of terminals and + * V is the set of terminals and nonterminals. In other words, + * FOLLOW(r) is the set of all tokens that can possibly follow + * references to r in *any* sentential form (context). At + * runtime, however, we know precisely which context applies as + * we have the call chain. We may compute the exact (rather + * than covering superset) set of following tokens. + * + * For example, consider grammar: + * + * stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} + * | "return" expr '.' + * ; + * expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} + * atom : INT // FOLLOW(atom)=={'+',')',';','.'} + * | '(' expr ')' + * ; + * + * The FOLLOW sets are all inclusive whereas context-sensitive + * FOLLOW sets are precisely what could follow a rule reference. + * For input input "i=(3);", here is the derivation: + * + * stat => ID '=' expr ';' + * => ID '=' atom ('+' atom)* ';' + * => ID '=' '(' expr ')' ('+' atom)* ';' + * => ID '=' '(' atom ')' ('+' atom)* ';' + * => ID '=' '(' INT ')' ('+' atom)* ';' + * => ID '=' '(' INT ')' ';' + * + * At the "3" token, you'd have a call chain of + * + * stat -> expr -> atom -> expr -> atom + * + * What can follow that specific nested ref to atom? Exactly ')' + * as you can see by looking at the derivation of this specific + * input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. + * + * You want the exact viable token set when recovering from a + * token mismatch. Upon token mismatch, if LA(1) is member of + * the viable next token set, then you know there is most likely + * a missing token in the input stream. "Insert" one by just not + * throwing an exception. + */ + public IntervalSet computeNextViableTokenSet() { + return null; +// int top = state.ctx.sp; +// IntervalSet followSet = new IntervalSet(); +// for (int i=top; i>=0; i--) { // i==0 is EOF context for start rule invocation +// IntervalSet f = (IntervalSet)state.ctx.get(i).follow; +// followSet.orInPlace(f); +// // can we see end of rule? if not, don't include follow of this rule +// if ( !f.member(Token.EOR_TOKEN_TYPE) ) break; +// // else combine with tokens that can follow this rule (rm EOR also) +// // EOR indicates we have to include follow(start rule); i.e., EOF +// followSet.remove(Token.EOR_TOKEN_TYPE); +// } +// return followSet; + } + + /** Attempt to recover from a single missing or extra token. + * + * EXTRA TOKEN + * + * LA(1) is not what we are looking for. If LA(2) has the right token, + * however, then assume LA(1) is some extra spurious token. Delete it + * and LA(2) as if we were doing a normal match(), which advances the + * input. + * + * MISSING TOKEN + * + * If current token is consistent with what could come after + * ttype then it is ok to "insert" the missing token, else throw + * exception For example, Input "i=(3;" is clearly missing the + * ')'. When the parser returns from the nested call to expr, it + * will have call chain: + * + * stat -> expr -> atom + * + * and it will be trying to match the ')' at this point in the + * derivation: + * + * => ID '=' '(' INT ')' ('+' atom)* ';' + * ^ + * match() will see that ';' doesn't match ')' and report a + * mismatched token error. To recover, it sees that LA(1)==';' + * is in the set of tokens that can follow the ')' token + * reference in rule atom. It can assume that you forgot the ')'. + */ + protected Object recoverFromMismatchedToken(int ttype, IntervalSet follow) + throws RecognitionException + { + RecognitionException e = null; + // if next token is what we are looking for then "delete" this token + if ( mismatchIsUnwantedToken(ttype) ) { + e = new UnwantedTokenException(this, state.input, ttype); + /* + System.err.println("recoverFromMismatchedToken deleting "+ + ((TokenStream)state.input).LT(1)+ + " since "+((TokenStream)state.input).LT(2)+" is what we want"); + */ + beginResync(); + state.input.consume(); // simply delete extra token + endResync(); + reportError(e); // report after consuming so AW sees the token in the exception + // we want to return the token we're actually matching + Object matchedSymbol = getCurrentInputSymbol(); + state.input.consume(); // move past ttype token as if all were ok + return matchedSymbol; + } + // can't recover with single token deletion, try insertion + if ( mismatchIsMissingToken(follow) ) { + Object inserted = getMissingSymbol(e, ttype, follow); + e = new MissingTokenException(this, state.input, ttype, inserted); + reportError(e); // report after inserting so AW sees the token in the exception + return inserted; + } + // even that didn't work; must throw the exception + e = new MismatchedTokenException(this, state.input, ttype); + throw e; + } + + public Object recoverFromMismatchedSet(RecognitionException e, + IntervalSet follow) + throws RecognitionException + { + if ( mismatchIsMissingToken(follow) ) { + // System.out.println("missing token"); + reportError(e); + // we don't know how to conjure up a token for sets yet + return getMissingSymbol(e, Token.INVALID_TYPE, follow); + } + // TODO do single token deletion like above for Token mismatch + throw e; + } + + /** Match needs to return the current input symbol, which gets put + * into the label for the associated token ref; e.g., x=ID. Token + * and tree parsers need to return different objects. Rather than test + * for input stream type or change the IntStream interface, I use + * a simple method to ask the recognizer to tell me what the current + * input symbol is. + */ + protected Object getCurrentInputSymbol() { return null; } + + /** Conjure up a missing token during error recovery. + * + * The recognizer attempts to recover from single missing + * symbols. But, actions might refer to that missing symbol. + * For example, x=ID {f($x);}. The action clearly assumes + * that there has been an identifier matched previously and that + * $x points at that token. If that token is missing, but + * the next token in the stream is what we want we assume that + * this token is missing and we keep going. Because we + * have to return some token to replace the missing token, + * we have to conjure one up. This method gives the user control + * over the tokens returned for missing tokens. Mostly, + * you will want to create something special for identifier + * tokens. For literals such as '{' and ',', the default + * action in the parser or tree parser works. It simply creates + * a CommonToken of the appropriate type. The text will be the token. + * If you change what tokens must be created by the lexer, + * override this method to create the appropriate tokens. + */ + protected Object getMissingSymbol(RecognitionException e, + int expectedTokenType, + IntervalSet follow) + { + return null; + } + + public void consumeUntil(int tokenType) { + //System.out.println("consumeUntil "+tokenType); + int ttype = state.input.LA(1); + while (ttype != Token.EOF && ttype != tokenType) { + state.input.consume(); + ttype = state.input.LA(1); + } + } + + /** Consume tokens until one matches the given token set */ + public void consumeUntil(IntervalSet set) { + //System.out.println("consumeUntil("+set.toString(getTokenNames())+")"); + int ttype = state.input.LA(1); + while (ttype != Token.EOF && !set.member(ttype) ) { + //System.out.println("consume during recover LA(1)="+getTokenNames()[state.input.LA(1)]); + state.input.consume(); + ttype = state.input.LA(1); + } + } + + /** Return List of the rules in your parser instance + * leading up to a call to this method. You could override if + * you want more details such as the file/line info of where + * in the parser java code a rule is invoked. + * + * This is very useful for error messages and for context-sensitive + * error recovery. + */ + public List getRuleInvocationStack() { + String parserClassName = getClass().getName(); + return getRuleInvocationStack(new Throwable(), parserClassName); + } + + /** A more general version of getRuleInvocationStack where you can + * pass in, for example, a RecognitionException to get it's rule + * stack trace. This routine is shared with all recognizers, hence, + * static. + * + * TODO: move to a utility class or something; weird having lexer call this + */ + public static List getRuleInvocationStack(Throwable e, + String recognizerClassName) + { + List rules = new ArrayList(); + StackTraceElement[] stack = e.getStackTrace(); + int i = 0; + for (i=stack.length-1; i>=0; i--) { + StackTraceElement t = stack[i]; + if ( t.getClassName().startsWith("org.antlr.v4.runtime.") ) { + continue; // skip support code such as this method + } + if ( t.getMethodName().equals(NEXT_TOKEN_RULE_NAME) ) { + continue; + } + if ( !t.getClassName().equals(recognizerClassName) ) { + continue; // must not be part of this parser + } + rules.add(t.getMethodName()); + } + return rules; + } + + /** Return whether or not a backtracking attempt failed. */ + public boolean failed() { return state.failed; } + + /** For debugging and other purposes, might want the grammar name. + * Have ANTLR generate an implementation for this method. + */ + public String getGrammarFileName() { + return null; + } + + public abstract String getSourceName(); + + /** A convenience method for use most often with template rewrites. + * Convert a List to List + */ + public List toStrings(List tokens) { + if ( tokens==null ) return null; + List strings = new ArrayList(tokens.size()); + for (int i=0; i alts, OrderedHashSet configs) {} + + public void reportContextSensitivity(int startIndex, int stopIndex, Set alts, OrderedHashSet configs) {} + + /** If context sensitive parsing, we know it's ambiguity not conflict */ + public void reportAmbiguity(int startIndex, int stopIndex, Set alts, OrderedHashSet configs) {} + +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/BufferedTokenStream.java b/runtime/Java/src/org/antlr/v4/runtime/BufferedTokenStream.java new file mode 100644 index 000000000..76b43814e --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/BufferedTokenStream.java @@ -0,0 +1,274 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.antlr.v4.runtime; + +import org.antlr.runtime.BitSet; + +import java.util.ArrayList; +import java.util.List; +import java.util.NoSuchElementException; + +/** Buffer all input tokens but do on-demand fetching of new tokens from + * lexer. Useful when the parser or lexer has to set context/mode info before + * proper lexing of future tokens. The ST template parser needs this, + * for example, because it has to constantly flip back and forth between + * inside/output templates. E.g., }> has to parse names + * as part of an expression but "hi, " as a nested template. + * + * You can't use this stream if you pass whitespace or other off-channel + * tokens to the parser. The stream can't ignore off-channel tokens. + * (UnbufferedTokenStream is the same way.) + * + * This is not a subclass of UnbufferedTokenStream because I don't want + * to confuse small moving window of tokens it uses for the full buffer. + */ +public class BufferedTokenStream implements TokenStream { + protected TokenSource tokenSource; + + /** Record every single token pulled from the source so we can reproduce + * chunks of it later. The buffer in LookaheadStream overlaps sometimes + * as its moving window moves through the input. This list captures + * everything so we can access complete input text. + */ + protected List tokens = new ArrayList(100); + + /** Track the last mark() call result value for use in rewind(). */ + protected int lastMarker; + + /** The index into the tokens list of the current token (next token + * to consume). tokens[p] should be LT(1). p=-1 indicates need + * to initialize with first token. The ctor doesn't get a token. + * First call to LT(1) or whatever gets the first token and sets p=0; + */ + protected int p = -1; + + protected int range = -1; // how deep have we gone? + + public BufferedTokenStream() {;} + + public BufferedTokenStream(TokenSource tokenSource) { + this.tokenSource = tokenSource; + } + + public TokenSource getTokenSource() { return tokenSource; } + + public int index() { return p; } + + public int range() { return range; } + + public int mark() { + if ( p == -1 ) setup(); + lastMarker = index(); + return lastMarker; + } + + public void release(int marker) { + // no resources to release + } + + public void rewind(int marker) { + seek(marker); + } + + public void rewind() { + seek(lastMarker); + } + + public void reset() { + p = 0; + lastMarker = 0; + } + + public void seek(int index) { p = index; } + + public int size() { return tokens.size(); } + + /** Move the input pointer to the next incoming token. The stream + * must become active with LT(1) available. consume() simply + * moves the input pointer so that LT(1) points at the next + * input symbol. Consume at least one token. + * + * Walk past any token not on the channel the parser is listening to. + */ + public void consume() { + if ( p == -1 ) setup(); + p++; + sync(p); + } + + /** Make sure index i in tokens has a token. */ + protected void sync(int i) { + int n = i - tokens.size() + 1; // how many more elements we need? + //System.out.println("sync("+i+") needs "+n); + if ( n > 0 ) fetch(n); + } + + /** add n elements to buffer */ + protected void fetch(int n) { + for (int i=1; i<=n; i++) { + Token t = tokenSource.nextToken(); + t.setTokenIndex(tokens.size()); + //System.out.println("adding "+t+" at index "+tokens.size()); + tokens.add(t); + if ( t.getType()==Token.EOF ) break; + } + } + + public Token get(int i) { + if ( i < 0 || i >= tokens.size() ) { + throw new NoSuchElementException("token index "+i+" out of range 0.."+(tokens.size()-1)); + } + return tokens.get(i); + } + + /** Get all tokens from start..stop inclusively */ + public List get(int start, int stop) { + if ( start<0 || stop<0 ) return null; + if ( p == -1 ) setup(); + List subset = new ArrayList(); + if ( stop>=tokens.size() ) stop = tokens.size()-1; + for (int i = start; i <= stop; i++) { + Token t = tokens.get(i); + if ( t.getType()==Token.EOF ) break; + subset.add(t); + } + return subset; + } + + public int LA(int i) { return LT(i).getType(); } + + protected Token LB(int k) { + if ( (p-k)<0 ) return null; + return tokens.get(p-k); + } + + public Token LT(int k) { + if ( p == -1 ) setup(); + if ( k==0 ) return null; + if ( k < 0 ) return LB(-k); + + int i = p + k - 1; + sync(i); + if ( i >= tokens.size() ) { // return EOF token + // EOF must be last token + return tokens.get(tokens.size()-1); + } + if ( i>range ) range = i; + return tokens.get(i); + } + + protected void setup() { sync(0); p = 0; } + + /** Reset this token stream by setting its token source. */ + public void setTokenSource(TokenSource tokenSource) { + this.tokenSource = tokenSource; + tokens.clear(); + p = -1; + } + + public List getTokens() { return tokens; } + + public List getTokens(int start, int stop) { + return getTokens(start, stop, (BitSet)null); + } + + /** Given a start and stop index, return a List of all tokens in + * the token type BitSet. Return null if no tokens were found. This + * method looks at both on and off channel tokens. + */ + public List getTokens(int start, int stop, BitSet types) { + if ( p == -1 ) setup(); + if ( stop>=tokens.size() ) stop=tokens.size()-1; + if ( start<0 ) start=0; + if ( start>stop ) return null; + + // list = tokens[start:stop]:{Token t, t.getType() in types} + List filteredTokens = new ArrayList(); + for (int i=start; i<=stop; i++) { + Token t = tokens.get(i); + if ( types==null || types.member(t.getType()) ) { + filteredTokens.add(t); + } + } + if ( filteredTokens.size()==0 ) { + filteredTokens = null; + } + return filteredTokens; + } + + public List getTokens(int start, int stop, List types) { + return getTokens(start,stop,new BitSet(types)); + } + + public List getTokens(int start, int stop, int ttype) { + return getTokens(start,stop,BitSet.of(ttype)); + } + + public String getSourceName() { return tokenSource.getSourceName(); } + + /** Grab *all* tokens from stream and return string */ + public String toString() { + if ( p == -1 ) setup(); + fill(); + return toString(0, tokens.size()-1); + } + + public String toString(int start, int stop) { + if ( start<0 || stop<0 ) return null; + if ( p == -1 ) setup(); + if ( stop>=tokens.size() ) stop = tokens.size()-1; + StringBuffer buf = new StringBuffer(); + for (int i = start; i <= stop; i++) { + Token t = tokens.get(i); + if ( t.getType()==Token.EOF ) break; + buf.append(t.getText()); + } + return buf.toString(); + } + + public String toString(Token start, Token stop) { + if ( start!=null && stop!=null ) { + return toString(start.getTokenIndex(), stop.getTokenIndex()); + } + return null; + } + + /** Get all tokens from lexer until EOF */ + public void fill() { + if ( p == -1 ) setup(); + if ( tokens.get(p).getType()==Token.EOF ) return; + + int i = p+1; + sync(i); + while ( tokens.get(i).getType()!=Token.EOF ) { + i++; + sync(i); + } + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/CharStream.java b/runtime/Java/src/org/antlr/v4/runtime/CharStream.java new file mode 100644 index 000000000..0f47a9c9e --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/CharStream.java @@ -0,0 +1,57 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime; + +/** A source of characters for an ANTLR lexer */ +public interface CharStream extends IntStream { + public static final int EOF = -1; + + /** For infinite streams, you don't need this; primarily I'm providing + * a useful interface for action code. Just make sure actions don't + * use this on streams that don't support it. + */ + public String substring(int start, int stop); + + /** Get the ith character of lookahead. This is the same usually as + * LA(i). This will be used for labels in the generated + * lexer code. I'd prefer to return a char here type-wise, but it's + * probably better to be 32-bit clean and be consistent with LA. + */ + public int LT(int i); + + /** ANTLR tracks the line information automatically */ + int getLine(); + + /** Because this stream can rewind, we need to be able to reset the line */ + void setLine(int line); + + void setCharPositionInLine(int pos); + + /** The index of the character relative to the beginning of the line 0..n-1 */ + int getCharPositionInLine(); +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/CharStreamState.java b/runtime/Java/src/org/antlr/v4/runtime/CharStreamState.java new file mode 100644 index 000000000..250970398 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/CharStreamState.java @@ -0,0 +1,45 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime; + +/** When walking ahead with cyclic DFA or for syntactic predicates, + * we need to record the state of the input stream (char index, + * line, etc...) so that we can rewind the state after scanning ahead. + * + * This is the complete state of a stream. + */ +public class CharStreamState { + /** Index into the char stream of next lookahead char */ + public int p; + + /** What line number is the scanner at before processing buffer[p]? */ + public int line; + + /** What char position 0..n-1 in line is scanner before processing buffer[p]? */ + public int charPositionInLine; +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/CommonToken.java b/runtime/Java/src/org/antlr/v4/runtime/CommonToken.java new file mode 100644 index 000000000..1bb7a60f8 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/CommonToken.java @@ -0,0 +1,193 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime; + +import java.io.Serializable; + +public class CommonToken implements Token, Serializable { + protected int type; + protected int line; + protected int charPositionInLine = -1; // set to invalid position + protected int channel=DEFAULT_CHANNEL; + protected transient CharStream input; + + /** We need to be able to change the text once in a while. If + * this is non-null, then getText should return this. Note that + * start/stop are not affected by changing this. + */ + protected String text; + + /** What token number is this from 0..n-1 tokens; < 0 implies invalid index */ + protected int index = -1; + + /** The char position into the input buffer where this token starts */ + protected int start; + + /** The char position into the input buffer where this token stops */ + protected int stop; + + public CommonToken(int type) { + this.type = type; + } + + public CommonToken(CharStream input, int type, int channel, int start, int stop) { + this.input = input; + this.type = type; + this.channel = channel; + this.start = start; + this.stop = stop; + this.line = input.getLine(); + this.charPositionInLine = input.getCharPositionInLine(); + } + + public CommonToken(int type, String text) { + this.type = type; + this.channel = DEFAULT_CHANNEL; + this.text = text; + } + + public CommonToken(Token oldToken) { + text = oldToken.getText(); + type = oldToken.getType(); + line = oldToken.getLine(); + index = oldToken.getTokenIndex(); + charPositionInLine = oldToken.getCharPositionInLine(); + channel = oldToken.getChannel(); + input = oldToken.getInputStream(); + if ( oldToken instanceof CommonToken ) { + start = ((CommonToken)oldToken).start; + stop = ((CommonToken)oldToken).stop; + } + } + + public int getType() { + return type; + } + + public void setLine(int line) { + this.line = line; + } + + public String getText() { + if ( text!=null ) { + return text; + } + if ( input==null ) { + return null; + } + if ( start0 ) { + channelStr=",channel="+channel; + } + String txt = getText(); + if ( txt!=null ) { + txt = txt.replaceAll("\n","\\\\n"); + txt = txt.replaceAll("\r","\\\\r"); + txt = txt.replaceAll("\t","\\\\t"); + } + else { + txt = ""; + } + return "[@"+getTokenIndex()+","+start+":"+stop+"='"+txt+"',<"+type+">"+channelStr+","+line+":"+getCharPositionInLine()+"]"; + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/CommonTokenStream.java b/runtime/Java/src/org/antlr/v4/runtime/CommonTokenStream.java new file mode 100644 index 000000000..16517fe3b --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/CommonTokenStream.java @@ -0,0 +1,153 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.antlr.v4.runtime; + +/** The most common stream of tokens where every token is buffered up + * and tokens are filtered for a certain channel (the parser will only + * see these tokens). + * + * Even though it buffers all of the tokens, this token stream pulls tokens + * from the tokens source on demand. In other words, until you ask for a + * token using consume(), LT(), etc. the stream does not pull from the lexer. + * + * The only difference between this stream and BufferedTokenStream superclass + * is that this stream knows how to ignore off channel tokens. There may be + * a performance advantage to using the superclass if you don't pass + * whitespace and comments etc. to the parser on a hidden channel (i.e., + * you set $channel instead of calling skip() in lexer rules.) + * + * @see org.antlr.runtime.UnbufferedTokenStream + * @see org.antlr.runtime.BufferedTokenStream + */ +public class CommonTokenStream extends BufferedTokenStream { + /** Skip tokens on any channel but this one; this is how we skip whitespace... */ + protected int channel = Token.DEFAULT_CHANNEL; + + public CommonTokenStream() { ; } + + public CommonTokenStream(TokenSource tokenSource) { + super(tokenSource); + } + + public CommonTokenStream(TokenSource tokenSource, int channel) { + this(tokenSource); + this.channel = channel; + } + + /** Always leave p on an on-channel token. */ + public void consume() { + if ( p == -1 ) setup(); + p++; + sync(p); + while ( tokens.get(p).getChannel()!=channel ) { + p++; + sync(p); + } + } + + protected Token LB(int k) { + if ( k==0 || (p-k)<0 ) return null; + + int i = p; + int n = 1; + // find k good tokens looking backwards + while ( n<=k ) { + // skip off-channel tokens + i = skipOffTokenChannelsReverse(i-1); + n++; + } + if ( i<0 ) return null; + return tokens.get(i); + } + + public Token LT(int k) { + //System.out.println("enter LT("+k+")"); + if ( p == -1 ) setup(); + if ( k == 0 ) return null; + if ( k < 0 ) return LB(-k); + int i = p; + int n = 1; // we know tokens[p] is a good one + // find k good tokens + while ( nrange ) range = i; + return tokens.get(i); + } + + /** Given a starting index, return the index of the first on-channel + * token. + */ + protected int skipOffTokenChannels(int i) { + sync(i); + while ( tokens.get(i).getChannel()!=channel ) { // also stops at EOF (it's onchannel) + i++; + sync(i); + } + return i; + } + + protected int skipOffTokenChannelsReverse(int i) { + while ( i>=0 && ((Token)tokens.get(i)).getChannel()!=channel ) { + i--; + } + return i; + } + + protected void setup() { + p = 0; + sync(0); + int i = 0; + while ( tokens.get(i).getChannel()!=channel ) { + i++; + sync(i); + } + p = i; + } + + /** Count EOF just once. */ + public int getNumberOfOnChannelTokens() { + int n = 0; + fill(); + for (int i = 0; i < tokens.size(); i++) { + Token t = tokens.get(i); + if ( t.getChannel()==channel ) n++; + if ( t.getType()==Token.EOF ) break; + } + return n; + } + + /** Reset this token stream by setting its token source. */ + public void setTokenSource(TokenSource tokenSource) { + super.setTokenSource(tokenSource); + channel = Token.DEFAULT_CHANNEL; + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/EarlyExitException.java b/runtime/Java/src/org/antlr/v4/runtime/EarlyExitException.java new file mode 100644 index 000000000..5227b6fbc --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/EarlyExitException.java @@ -0,0 +1,38 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime; + +/** The recognizer did not match anything for a (..)+ loop. */ +public class EarlyExitException extends RecognitionException { + /** Used for remote debugger deserialization */ + public EarlyExitException() {;} + + public EarlyExitException(BaseRecognizer recognizer, IntStream input) { + super(recognizer, input, recognizer.state.ctx); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/FailedPredicateException.java b/runtime/Java/src/org/antlr/v4/runtime/FailedPredicateException.java new file mode 100644 index 000000000..098d9892b --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/FailedPredicateException.java @@ -0,0 +1,55 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime; + +/** A semantic predicate failed during validation. Validation of predicates + * occurs when normally parsing the alternative just like matching a token. + * Disambiguating predicate evaluation occurs when we hoist a predicate into + * a prediction decision. + */ +public class FailedPredicateException extends RecognitionException { + public String ruleName; + public String predicateText; + + /** Used for remote debugger deserialization */ + public FailedPredicateException() {;} + + public FailedPredicateException(BaseRecognizer recognizer, + IntStream input, + String ruleName, + String predicateText) + { + super(recognizer, input, recognizer.state.ctx); + this.ruleName = ruleName; + this.predicateText = predicateText; + } + + public String toString() { + return "FailedPredicateException("+ruleName+",{"+predicateText+"}?)"; + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/IntStream.java b/runtime/Java/src/org/antlr/v4/runtime/IntStream.java new file mode 100644 index 000000000..4be3f14b6 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/IntStream.java @@ -0,0 +1,95 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime; + +/** A simple stream of integers used when all I care about is the char + * or token type sequence (such as interpretation). + * + * TODO: Notes from Oli: index, seek, LA, LT, consume, getSourceName needed + * Do like Java IO and have new BufferedStream(new TokenStream) rather than + * using inheritance. line/col go into lexer + */ +public interface IntStream { + void consume(); + + /** Get int at current input pointer + i ahead where i=1 is next int. + * Negative indexes are allowed. LA(-1) is previous token (token + * just matched). LA(-i) where i is before first token should + * yield -1, invalid char / EOF. + */ + int LA(int i); + + /** Tell the stream to start buffering if it hasn't already. Return + * current input position, index(), or some other marker so that + * when passed to rewind() you get back to the same spot. + * rewind(mark()) should not affect the input cursor. The Lexer + * track line/col info as well as input index so its markers are + * not pure input indexes. Same for tree node streams. + */ + int mark(); + + /** Return the current input symbol index 0..n where n indicates the + * last symbol has been read. The index is the symbol about to be + * read not the most recently read symbol. + */ + int index(); + + /** Set the input cursor to the position indicated by index. This is + * normally used to seek ahead in the input stream. No buffering is + * required to do this unless you know your stream will use seek to + * move backwards such as when backtracking. + * + * This is different from rewind in its multi-directional + * requirement and in that its argument is strictly an input cursor (index). + * + * For char streams, seeking forward must update the stream state such + * as line number. For seeking backwards, you will be presumably + * backtracking using the mark/rewind mechanism that restores state and + * so this method does not need to update state when seeking backwards. + * + * Currently, this method is only used for efficient backtracking using + * memoization, but in the future it may be used for incremental parsing. + * + * The index is 0..n-1. A seek to position i means that LA(1) will + * return the ith symbol. So, seeking to 0 means LA(1) will return the + * first element in the stream. + */ + void seek(int index); + + /** Only makes sense for streams that buffer everything up probably, but + * might be useful to display the entire stream or for testing. This + * value includes a single EOF. + */ + int size(); + + /** Where are you getting symbols from? Normally, implementations will + * pass the buck all the way to the lexer who can ask its input stream + * for the file name or whatever. + */ + public String getSourceName(); +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/Lexer.java b/runtime/Java/src/org/antlr/v4/runtime/Lexer.java new file mode 100644 index 000000000..9c87b603e --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/Lexer.java @@ -0,0 +1,315 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime; + +import org.antlr.v4.runtime.atn.LexerInterpreter; +import org.antlr.v4.runtime.misc.QStack; + +import java.util.EmptyStackException; + +/** A lexer is recognizer that draws input symbols from a character stream. + * lexer grammars result in a subclass of this object. A Lexer object + * uses simplified match() and error recovery mechanisms in the interest + * of speed. + */ +public abstract class Lexer extends Recognizer + implements TokenSource +{ + public static final int DEFAULT_MODE = 0; + public static final int MORE = -2; + public static final int SKIP = -3; + + public static final int DEFAULT_TOKEN_CHANNEL = Token.DEFAULT_CHANNEL; + public static final int HIDDEN = Token.HIDDEN_CHANNEL; + + public LexerSharedState state; + + public Lexer(CharStream input) { + this(input, new LexerSharedState()); + } + + public Lexer(CharStream input, LexerSharedState state) { + if ( state==null ) { + state = new LexerSharedState(); + } + this.state = state; + state.input = input; + } + + public void reset() { + // wack Lexer state variables + if ( state.input!=null ) { + state.input.seek(0); // rewind the input + } + if ( state==null ) { + return; // no shared state work to do + } + state.token = null; + state.type = Token.INVALID_TYPE; + state.channel = Token.DEFAULT_CHANNEL; + state.tokenStartCharIndex = -1; + state.tokenStartCharPositionInLine = -1; + state.tokenStartLine = -1; + state.text = null; + } + + /** Return a token from this source; i.e., match a token on the char + * stream. + */ + public Token nextToken() { + outer: + while (true) { + state.token = null; + state.channel = Token.DEFAULT_CHANNEL; + state.tokenStartCharIndex = state.input.index(); + state.tokenStartCharPositionInLine = state.input.getCharPositionInLine(); + state.tokenStartLine = state.input.getLine(); + state.text = null; + do { + state.type = Token.INVALID_TYPE; + if ( state.input.LA(1)==CharStream.EOF ) { + Token eof = new CommonToken(state.input,Token.EOF, + Token.DEFAULT_CHANNEL, + state.input.index(),state.input.index()); + eof.setLine(getLine()); + eof.setCharPositionInLine(getCharPositionInLine()); + return eof; + } +// System.out.println("nextToken at "+((char)state.input.LA(1))+ +// " in mode "+state.mode+ +// " at index "+state.input.index()); + int ttype = _interp.match(state.input, state.mode); +// System.out.println("accepted ttype "+ttype); + if ( state.type == Token.INVALID_TYPE) state.type = ttype; + if ( state.type==SKIP ) { + continue outer; + } + } while ( state.type==MORE ); + if ( state.token==null ) emit(); + return state.token; + } + } + + /** Instruct the lexer to skip creating a token for current lexer rule + * and look for another token. nextToken() knows to keep looking when + * a lexer rule finishes with token set to SKIP_TOKEN. Recall that + * if token==null at end of any token rule, it creates one for you + * and emits it. + */ + public void skip() { + state.type = SKIP; + } + + public void more() { + state.type = MORE; + } + + public void mode(int m) { + state.mode = m; + } + + public void pushMode(int m) { +// System.out.println("pushMode "+m); + if ( state.modeStack==null ) state.modeStack = new QStack(); + state.modeStack.push(state.mode); + mode(m); + } + + public int popMode() { + if ( state.modeStack==null ) throw new EmptyStackException(); +// System.out.println("popMode back to "+state.modeStack.peek()); + mode( state.modeStack.pop() ); + return state.mode; + } + + /** Set the char stream and reset the lexer */ + public void setCharStream(CharStream input) { + this.state.input = null; + reset(); + this.state.input = input; + } + + public CharStream getCharStream() { + return ((CharStream)state.input); + } + + public String getSourceName() { + return state.input.getSourceName(); + } + + /** Currently does not support multiple emits per nextToken invocation + * for efficiency reasons. Subclass and override this method and + * nextToken (to push tokens into a list and pull from that list rather + * than a single variable as this implementation does). + */ + public void emit(Token token) { + //System.err.println("emit "+token); + state.token = token; + } + + /** The standard method called to automatically emit a token at the + * outermost lexical rule. The token object should point into the + * char buffer start..stop. If there is a text override in 'text', + * use that to set the token's text. Override this method to emit + * custom Token objects. + * + * If you are building trees, then you should also override + * Parser or TreeParser.getMissingSymbol(). + */ + public Token emit() { + Token t = new CommonToken(((CharStream)state.input), state.type, + state.channel, state.tokenStartCharIndex, + getCharIndex()-1); + t.setLine(state.tokenStartLine); + t.setText(state.text); + t.setCharPositionInLine(state.tokenStartCharPositionInLine); + emit(t); + return t; + } + + public int getLine() { + return ((CharStream)state.input).getLine(); + } + + public int getCharPositionInLine() { + return ((CharStream)state.input).getCharPositionInLine(); + } + + /** What is the index of the current character of lookahead? */ + public int getCharIndex() { + return state.input.index(); + } + + /** Return the text matched so far for the current token or any + * text override. + */ + public String getText() { + if ( state.text!=null ) { + return state.text; + } + return ((CharStream)state.input).substring(state.tokenStartCharIndex,getCharIndex()-1); + } + + /** Set the complete text of this token; it wipes any previous + * changes to the text. + */ + public void setText(String text) { + state.text = text; + } + + public void reportError(RecognitionException e) { + /** TODO: not thought about recovery in lexer yet. + * + // if we've already reported an error and have not matched a token + // yet successfully, don't report any errors. + if ( errorRecovery ) { + //System.err.print("[SPURIOUS] "); + return; + } + errorRecovery = true; + */ + + //displayRecognitionError(this.getTokenNames(), e); + } + + /** Used to print out token names like ID during debugging and + * error reporting. The generated parsers implement a method + * that overrides this to point to their String[] tokenNames. + */ + public String[] getTokenNames() { + return null; + } + + public String getErrorMessage(RecognitionException e) { + String msg = null; + if ( e instanceof MismatchedTokenException ) { + MismatchedTokenException mte = (MismatchedTokenException)e; + msg = "mismatched character "+getCharErrorDisplay(e.c)+" expecting "+ + getCharErrorDisplay(mte.expecting.getSingleElement()); + } + else if ( e instanceof NoViableAltException ) { + NoViableAltException nvae = (NoViableAltException)e; + // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" + // and "(decision="+nvae.decisionNumber+") and + // "state "+nvae.stateNumber + msg = "no viable alternative at character "+getCharErrorDisplay(e.c); + } + else if ( e instanceof EarlyExitException ) { + EarlyExitException eee = (EarlyExitException)e; + // for development, can add "(decision="+eee.decisionNumber+")" + msg = "required (...)+ loop did not match anything at character "+getCharErrorDisplay(e.c); + } + else if ( e instanceof MismatchedNotSetException ) { + MismatchedNotSetException mse = (MismatchedNotSetException)e; + msg = "mismatched character "+getCharErrorDisplay(e.c)+" expecting set "+mse.expecting; + } + else if ( e instanceof MismatchedSetException ) { + MismatchedSetException mse = (MismatchedSetException)e; + msg = "mismatched character "+getCharErrorDisplay(e.c)+" expecting set "+mse.expecting; + } + else if ( e instanceof MismatchedRangeException ) { + MismatchedRangeException mre = (MismatchedRangeException)e; + msg = "mismatched character "+getCharErrorDisplay(e.c)+" expecting set "+ + getCharErrorDisplay(mre.a)+".."+getCharErrorDisplay(mre.b); + } + else { + //msg = super.getErrorMessage(e, tokenNames); + } + return msg; + } + + public String getCharErrorDisplay(int c) { + String s = String.valueOf((char)c); + switch ( c ) { + case Token.EOF : + s = ""; + break; + case '\n' : + s = "\\n"; + break; + case '\t' : + s = "\\t"; + break; + case '\r' : + s = "\\r"; + break; + } + return "'"+s+"'"; + } + + /** Lexers can normally match any char in it's vocabulary after matching + * a token, so do the easy thing and just kill a character and hope + * it all works out. You can instead use the rule invocation stack + * to do sophisticated error recovery if you are in a fragment rule. + */ + public void recover(RecognitionException re) { + //System.out.println("consuming char "+(char)state.input.LA(1)+" during recovery"); + //re.printStackTrace(); + state.input.consume(); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/LexerNoViableAltException.java b/runtime/Java/src/org/antlr/v4/runtime/LexerNoViableAltException.java new file mode 100644 index 000000000..98a6746ed --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/LexerNoViableAltException.java @@ -0,0 +1,26 @@ +package org.antlr.v4.runtime; + +import org.antlr.v4.analysis.ATNConfig; +import org.antlr.v4.misc.OrderedHashSet; + +public class LexerNoViableAltException extends LexerRecognitionExeption { + /** Prediction began at what input index? */ + public int startIndex; + + /** Which configurations did we try at input.index() that couldn't match input.LT(1)? */ + public OrderedHashSet deadEndConfigs; + + /** Used for remote debugger deserialization */ + public LexerNoViableAltException() {;} + + public LexerNoViableAltException(Lexer lexer, + CharStream input, + OrderedHashSet deadEndConfigs) { + super(lexer, input); + this.deadEndConfigs = deadEndConfigs; + } + + public String toString() { + return "NoViableAltException('"+(char)c+"'"; + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/LexerRecognitionExeption.java b/runtime/Java/src/org/antlr/v4/runtime/LexerRecognitionExeption.java new file mode 100644 index 000000000..f181c489d --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/LexerRecognitionExeption.java @@ -0,0 +1,33 @@ +package org.antlr.v4.runtime; + +public class LexerRecognitionExeption extends RuntimeException { + /** Who threw the exception? */ + public Lexer lexer; + + /** What is index of token/char were we looking at when the error occurred? */ + public int index; + + /** The current char when an error occurred. For lexers. */ + public int c; + + /** Track the line at which the error occurred in case this is + * generated from a lexer. We need to track this since the + * unexpected char doesn't carry the line info. + */ + public int line; + + public int charPositionInLine; + + /** Used for remote debugger deserialization */ + public LexerRecognitionExeption() { + } + + public LexerRecognitionExeption(Lexer lexer, CharStream input) { + this.lexer = lexer; + this.index = input.index(); + this.c = input.LA(1); + this.line = input.getLine(); + this.charPositionInLine = input.getCharPositionInLine(); + } + +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/LexerSharedState.java b/runtime/Java/src/org/antlr/v4/runtime/LexerSharedState.java new file mode 100644 index 000000000..0b1641bed --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/LexerSharedState.java @@ -0,0 +1,56 @@ +package org.antlr.v4.runtime; + +import org.antlr.v4.runtime.misc.QStack; + +public class LexerSharedState extends RecognizerSharedState { + //public CharStream input; + + /** The goal of all lexer rules/methods is to create a token object. + * This is an instance variable as multiple rules may collaborate to + * create a single token. nextToken will return this object after + * matching lexer rule(s). If you subclass to allow multiple token + * emissions, then set this to the last token to be matched or + * something nonnull so that the auto token emit mechanism will not + * emit another token. + */ + public Token token; + + /** What character index in the stream did the current token start at? + * Needed, for example, to get the text for current token. Set at + * the start of nextToken. + */ + public int tokenStartCharIndex = -1; + + /** The line on which the first character of the token resides */ + public int tokenStartLine; + + /** The character position of first character within the line */ + public int tokenStartCharPositionInLine; + + /** The channel number for the current token */ + public int channel; + + /** The token type for the current token */ + public int type; + + public QStack modeStack; + public int mode = Lexer.DEFAULT_MODE; + + /** You can set the text for the current token to override what is in + * the input char buffer. Use setText() or can set this instance var. + */ + public String text; + + public LexerSharedState() { + } + + public LexerSharedState(LexerSharedState state) { + this.token = state.token; + this.tokenStartCharIndex = state.tokenStartCharIndex; + this.tokenStartLine = state.tokenStartLine; + this.tokenStartCharPositionInLine = state.tokenStartCharPositionInLine; + this.channel = state.channel; + this.type = state.type; + this.text = state.text; + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/MismatchedNotSetException.java b/runtime/Java/src/org/antlr/v4/runtime/MismatchedNotSetException.java new file mode 100644 index 000000000..3a993f88c --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/MismatchedNotSetException.java @@ -0,0 +1,41 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime; + +public class MismatchedNotSetException extends MismatchedSetException { + /** Used for remote debugger deserialization */ + public MismatchedNotSetException() {;} + + public MismatchedNotSetException(BaseRecognizer recognizer, IntStream input) { + super(recognizer, input); + } + + public String toString() { + return "MismatchedNotSetException("+getUnexpectedType()+"!="+expecting+")"; + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/MismatchedRangeException.java b/runtime/Java/src/org/antlr/v4/runtime/MismatchedRangeException.java new file mode 100644 index 000000000..4d2d0c73d --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/MismatchedRangeException.java @@ -0,0 +1,45 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime; + +public class MismatchedRangeException extends RecognitionException { + public int a,b; + + /** Used for remote debugger deserialization */ + public MismatchedRangeException() {;} + + public MismatchedRangeException(BaseRecognizer recognizer, IntStream input, int a, int b) { + super(recognizer, input, recognizer.state.ctx); + this.a = a; + this.b = b; + } + + public String toString() { + return "MismatchedNotSetException("+getUnexpectedType()+" not in ["+a+","+b+"])"; + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/MismatchedSetException.java b/runtime/Java/src/org/antlr/v4/runtime/MismatchedSetException.java new file mode 100644 index 000000000..03736b273 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/MismatchedSetException.java @@ -0,0 +1,41 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime; + +public class MismatchedSetException extends RecognitionException { + /** Used for remote debugger deserialization */ + public MismatchedSetException() {;} + + public MismatchedSetException(BaseRecognizer recognizer, IntStream input) { + super(recognizer, input, recognizer.state.ctx); + } + + public String toString() { + return "MismatchedSetException("+getUnexpectedType()+"!="+expecting+")"; + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/MismatchedTokenException.java b/runtime/Java/src/org/antlr/v4/runtime/MismatchedTokenException.java new file mode 100644 index 000000000..f277dbcc9 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/MismatchedTokenException.java @@ -0,0 +1,42 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime; + +/** A mismatched char or Token or tree node */ +public class MismatchedTokenException extends RecognitionException { + /** Used for remote debugger deserialization */ + public MismatchedTokenException() {;} + + public MismatchedTokenException(BaseRecognizer recognizer, IntStream input, int expecting) { + super(recognizer, input, recognizer.state.ctx); + } + + public String toString() { + return "MismatchedTokenException("+getUnexpectedType()+"!="+expecting+")"; + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/MismatchedTreeNodeException.java b/runtime/Java/src/org/antlr/v4/runtime/MismatchedTreeNodeException.java new file mode 100644 index 000000000..7a39c59c1 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/MismatchedTreeNodeException.java @@ -0,0 +1,45 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime; + +/** + */ +public class MismatchedTreeNodeException extends RecognitionException { + public MismatchedTreeNodeException() { + } + + public MismatchedTreeNodeException(BaseRecognizer recognizer, + IntStream input, int firstSet) + { + super(recognizer, input, recognizer.state.ctx); + } + + public String toString() { + return "MismatchedTreeNodeException("+getUnexpectedType()+"!="+expecting+")"; + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/MissingTokenException.java b/runtime/Java/src/org/antlr/v4/runtime/MissingTokenException.java new file mode 100644 index 000000000..15021e5d5 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/MissingTokenException.java @@ -0,0 +1,56 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime; + +/** We were expecting a token but it's not found. The current token + * is actually what we wanted next. Used for tree node errors too. + */ +public class MissingTokenException extends MismatchedTokenException { + public Object inserted; + /** Used for remote debugger deserialization */ + public MissingTokenException() {;} + + public MissingTokenException(BaseRecognizer recognizer, IntStream input, int expecting, Object inserted) { + super(recognizer, input, expecting); + this.inserted = inserted; + } + + public int getMissingType() { + return expecting.getSingleElement(); + } + + public String toString() { + if ( inserted!=null && token!=null ) { + return "MissingTokenException(inserted "+inserted+" at "+token.getText()+")"; + } + if ( token!=null ) { + return "MissingTokenException(at "+token.getText()+")"; + } + return "MissingTokenException"; + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/NoViableAltException.java b/runtime/Java/src/org/antlr/v4/runtime/NoViableAltException.java new file mode 100644 index 000000000..5ce905745 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/NoViableAltException.java @@ -0,0 +1,63 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime; + +import org.antlr.v4.analysis.ATNConfig; +import org.antlr.v4.misc.OrderedHashSet; + +public class NoViableAltException extends RecognitionException { + /** Prediction began at what input index? */ + public int startIndex; + + /** Which configurations did we try at input.index() that couldn't match input.LT(1)? */ + public OrderedHashSet deadEndConfigs; + + /** Used for remote debugger deserialization */ + public NoViableAltException() {;} + + public NoViableAltException(BaseRecognizer recognizer, RuleContext ctx) { // LL(1) error + super(recognizer, recognizer.state.input, ctx); + } + + public NoViableAltException(BaseRecognizer recognizer, IntStream input, + OrderedHashSet deadEndConfigs, + RuleContext ctx) + { + super(recognizer, input, ctx); + this.deadEndConfigs = deadEndConfigs; + } + + public String toString() { + if ( recognizer!=null ) { + TokenStream tokens = recognizer.state.input; + String bad = tokens.toString(startIndex, index); + return "NoViableAltException(input=\""+bad+"\" last token type is "+getUnexpectedType(); + } + return "NoViableAltException(last token type is "+getUnexpectedType(); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/Parser.java b/runtime/Java/src/org/antlr/v4/runtime/Parser.java new file mode 100644 index 000000000..e8566ff06 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/Parser.java @@ -0,0 +1,95 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime; + +import org.antlr.v4.runtime.misc.LABitSet; + +/** A parser for TokenStreams. "parser grammars" result in a subclass + * of this. + */ +public class Parser extends BaseRecognizer { + public Parser(TokenStream input) { + super(input); + } + + public Parser(TokenStream input, ParserSharedState state) { + super(input, state); // share the state object with another parser + } + + public void reset() { + super.reset(); // reset all recognizer state variables + if ( state.input!=null ) { + state.input.seek(0); // rewind the input + } + } + + protected Object getCurrentInputSymbol() { + return ((TokenStream)state.input).LT(1); + } + + protected Object getMissingSymbol(RecognitionException e, + int expectedTokenType, + LABitSet follow) + { + String tokenText = null; + if ( expectedTokenType== Token.EOF ) tokenText = ""; + else tokenText = ""; + CommonToken t = new CommonToken(expectedTokenType, tokenText); + Token current = ((TokenStream)state.input).LT(1); + if ( current.getType() == Token.EOF ) { + current = ((TokenStream)state.input).LT(-1); + } + t.line = current.getLine(); + t.charPositionInLine = current.getCharPositionInLine(); + t.channel = Token.DEFAULT_CHANNEL; + return t; + } + + /** Set the token stream and reset the parser */ + public void setTokenStream(TokenStream input) { + this.state.input = null; + reset(); + this.state.input = input; + } + + public TokenStream getTokenStream() { + return (TokenStream)state.input; + } + + public String getSourceName() { + return state.input.getSourceName(); + } + + public void traceIn(String ruleName, int ruleIndex) { + super.traceIn(ruleName, ruleIndex, ((TokenStream)state.input).LT(1)); + } + + public void traceOut(String ruleName, int ruleIndex) { + super.traceOut(ruleName, ruleIndex, ((TokenStream)state.input).LT(1)); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/ParserRuleContext.java b/runtime/Java/src/org/antlr/v4/runtime/ParserRuleContext.java new file mode 100644 index 000000000..1cedcd968 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/ParserRuleContext.java @@ -0,0 +1,71 @@ +/* + [BSD] + Copyright (c) 2010 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime; + +import org.stringtemplate.v4.ST; + +/** Rules return values in an object containing all the values. + * Besides the properties defined in + * RuleLabelScope.predefinedRulePropertiesScope there may be user-defined + * return values. This class simply defines the minimum properties that + * are always defined and methods to access the others that might be + * available depending on output option such as template and tree. + * + * Note text is not an actual property of the return value, it is computed + * from start and stop using the input stream's toString() method. I + * could add a ctor to this so that we can pass in and store the input + * stream, but I'm not sure we want to do that. It would seem to be undefined + * to get the .text property anyway if the rule matches tokens from multiple + * input streams. + * + * I do not use getters for fields of objects that are used simply to + * group values such as this aggregate. The getters/setters are there to + * satisfy the superclass interface. + */ +public class ParserRuleContext extends RuleContext { + public Token start, stop; + public Object tree; + public ST st; + + public ParserRuleContext() { super(); } + + public ParserRuleContext(RuleContext parent, int stateNumber) { + // capture state that called us as we create this context; use later for + // return state in closure + super(parent, parent!=null ? parent.s : -1, stateNumber); + } + + @Override + public Object getTree() { return tree; } + @Override + public ST getTemplate() { return st; } + @Override + public Token getStart() { return start; } + @Override + public Token getStop() { return stop; } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/ParserSharedState.java b/runtime/Java/src/org/antlr/v4/runtime/ParserSharedState.java new file mode 100644 index 000000000..895b65ac4 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/ParserSharedState.java @@ -0,0 +1,98 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime; + +/** The set of fields needed by an abstract recognizer to recognize input + * and recover from errors etc... As a separate state object, it can be + * shared among multiple grammars; e.g., when one grammar imports another. + */ +public class ParserSharedState extends RecognizerSharedState { + /** First on stack is fake a call to start rule from S' : S EOF ; + * Generated start rule does this. + */ +// public QStack ctx; + public ParserRuleContext ctx; // tracks local _ctx var to see from outside + + /** This is true when we see an error and before having successfully + * matched a token. Prevents generation of more than one error message + * per error. + */ + public boolean errorRecovery = false; + + /** The index into the input stream where the last error occurred. + * This is used to prevent infinite loops where an error is found + * but no token is consumed during recovery...another error is found, + * ad naseum. This is a failsafe mechanism to guarantee that at least + * one token/tree node is consumed for two errors. + */ + public int lastErrorIndex = -1; + + /** In lieu of a return value, this indicates that a rule or token + * has failed to match. Reset to false upon valid token match. + */ + public boolean failed = false; + + /** Did the recognizer encounter a syntax error? Track how many. */ + public int syntaxErrors = 0; + + /** If 0, no backtracking is going on. Safe to exec actions etc... + * If >0 then it's the level of backtracking. + */ +// public int backtracking = 0; + + /** An array[size num rules] of Map that tracks + * the stop token index for each rule. ruleMemo[ruleIndex] is + * the memoization table for ruleIndex. For key ruleStartIndex, you + * get back the stop token for associated rule or MEMO_RULE_FAILED. + * + * This is only used if rule memoization is on (which it is by default). + */ +// public Map[] ruleMemo; + + public ParserSharedState() { +// ctx = new RuleContext(); // implicit call to start rule + } + + @Override + public ParserRuleContext getContext() { + return ctx; + } + + // public RecognizerSharedState(RecognizerSharedState state) { +// this.ctx = state.ctx; +// this.errorRecovery = state.errorRecovery; +// this.lastErrorIndex = state.lastErrorIndex; +// this.failed = state.failed; +// this.syntaxErrors = state.syntaxErrors; +// this.backtracking = state.backtracking; +// if ( state.ruleMemo!=null ) { +// this.ruleMemo = new Map[state.ruleMemo.length]; +// System.arraycopy(state.ruleMemo, 0, this.ruleMemo, 0, state.ruleMemo.length); +// } +// } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/RecognitionException.java b/runtime/Java/src/org/antlr/v4/runtime/RecognitionException.java new file mode 100644 index 000000000..f05bfb6c8 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/RecognitionException.java @@ -0,0 +1,202 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime; + +import org.antlr.runtime.CharStream; +import org.antlr.runtime.tree.*; +import org.antlr.v4.misc.IntervalSet; + +/** The root of the ANTLR exception hierarchy. + * + * To avoid English-only error messages and to generally make things + * as flexible as possible, these exceptions are not created with strings, + * but rather the information necessary to generate an error. Then + * the various reporting methods in Parser and Lexer can be overridden + * to generate a localized error message. For example, MismatchedToken + * exceptions are built with the expected token type. + * So, don't expect getMessage() to return anything. + * + * Note that as of Java 1.4, you can access the stack trace, which means + * that you can compute the complete trace of rules from the start symbol. + * This gives you considerable context information with which to generate + * useful error messages. + * + * ANTLR generates code that throws exceptions upon recognition error and + * also generates code to catch these exceptions in each rule. If you + * want to quit upon first error, you can turn off the automatic error + * handling mechanism using rulecatch action, but you still need to + * override methods mismatch and recoverFromMismatchSet. + * + * In general, the recognition exceptions can track where in a grammar a + * problem occurred and/or what was the expected input. While the parser + * knows its state (such as current input symbol and line info) that + * state can change before the exception is reported so current token index + * is computed and stored at exception time. From this info, you can + * perhaps print an entire line of input not just a single token, for example. + * Better to just say the recognizer had a problem and then let the parser + * figure out a fancy report. + */ +// TODO: split out lexer one +public class RecognitionException extends RuntimeException { + /** Who threw the exception? */ + public BaseRecognizer recognizer; + + public RuleContext ctx; // should be what is in recognizer, but won't work when interpreting + + public IntervalSet expecting; + + public IntStream input; + + /** What is index of token/char were we looking at when the error occurred? */ + public int index; + + /** The current Token when an error occurred. Since not all streams + * can retrieve the ith Token, we have to track the Token object. + * For parsers. Even when it's a tree parser, token might be set. + */ + public Token token; + + /** If this is a tree parser exception, node is set to the node with + * the problem. + */ + public Object node; + + /** The current char when an error occurred. For lexers. */ + public int c; + + /** Track the line at which the error occurred in case this is + * generated from a lexer. We need to track this since the + * unexpected char doesn't carry the line info. + */ + public int line; + + public int charPositionInLine; + + /** If you are parsing a tree node stream, you will encounter som + * imaginary nodes w/o line/col info. We now search backwards looking + * for most recent token with line/col info, but notify getErrorHeader() + * that info is approximate. + */ + public boolean approximateLineInfo; + + /** Used for remote debugger deserialization */ + public RecognitionException() { + } + + public RecognitionException(BaseRecognizer recognizer, IntStream input, + RuleContext ctx) + { + this.recognizer = recognizer; + this.input = input; + this.ctx = ctx; + // firstSet is what can we're expecting within rule that calls this ctor. + // must combine with context-sensitive FOLLOW of that rule. +// LABitSet viableTokensFollowingThisRule = recognizer.computeNextViableTokenSet(); +// this.expecting = viableTokensFollowingThisRule.or(firstSet); + if ( recognizer!=null ) expecting = recognizer._interp.atn.nextTokens(ctx); + + this.index = input.index(); + if ( input instanceof TokenStream) { + this.token = ((TokenStream)input).LT(1); + this.line = token.getLine(); + this.charPositionInLine = token.getCharPositionInLine(); + } + if ( input instanceof TreeNodeStream) { + //extractInformationFromTreeNodeStream(input); + } + else if ( input instanceof CharStream) { + this.c = input.LA(1); + this.line = ((CharStream)input).getLine(); + this.charPositionInLine = ((CharStream)input).getCharPositionInLine(); + } + else { + this.c = input.LA(1); + } + } + + /* + protected void extractInformationFromTreeNodeStream(IntStream input) { + TreeNodeStream nodes = (TreeNodeStream)input; + this.node = nodes.LT(1); + TreeAdaptor adaptor = nodes.getTreeAdaptor(); + Token payload = adaptor.getToken(node); + if ( payload!=null ) { + this.token = payload; + if ( payload.getLine()<= 0 ) { + // imaginary node; no line/pos info; scan backwards + int i = -1; + Object priorNode = nodes.LT(i); + while ( priorNode!=null ) { + Token priorPayload = adaptor.getToken(priorNode); + if ( priorPayload!=null && priorPayload.getLine()>0 ) { + // we found the most recent real line / pos info + this.line = priorPayload.getLine(); + this.charPositionInLine = priorPayload.getCharPositionInLine(); + this.approximateLineInfo = true; + break; + } + --i; + priorNode = nodes.LT(i); + } + } + else { // node created from real token + this.line = payload.getLine(); + this.charPositionInLine = payload.getCharPositionInLine(); + } + } + else if ( this.node instanceof Tree) { + this.line = ((Tree)this.node).getLine(); + this.charPositionInLine = ((Tree)this.node).getCharPositionInLine(); + if ( this.node instanceof CommonTree) { + this.token = ((CommonTree)this.node).token; + } + } + else { + int type = adaptor.getType(this.node); + String text = adaptor.getText(this.node); + this.token = new CommonToken(type, text); + } + } + */ + + /** Return the token type or char of the unexpected input element */ + public int getUnexpectedType() { + if ( recognizer==null ) return token.getType(); + if ( recognizer.state.input instanceof TokenStream) { + return token.getType(); + } + else if ( recognizer.state.input instanceof TreeNodeStream ) { + TreeNodeStream nodes = (TreeNodeStream)recognizer.state.input; + TreeAdaptor adaptor = nodes.getTreeAdaptor(); + return adaptor.getType(node); + } + else { + return c; + } + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/Recognizer.java b/runtime/Java/src/org/antlr/v4/runtime/Recognizer.java new file mode 100644 index 000000000..b3a858d41 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/Recognizer.java @@ -0,0 +1,207 @@ +package org.antlr.v4.runtime; + +import org.antlr.v4.runtime.atn.ATN; + +import java.util.*; + +public class Recognizer { + public static final int EOF=-1; + + protected List listeners; + + protected ATNInterpreter _interp; + + /** State of a lexer, parser, or tree parser are collected into a state + * object so the state can be shared. This sharing is needed to + * have one grammar import others and share same error variables + * and other state variables. It's a kind of explicit multiple + * inheritance via delegation of methods and shared state. + */ + public SharedState state; + + /** Used to print out token names like ID during debugging and + * error reporting. The generated parsers implement a method + * that overrides this to point to their String[] tokenNames. + */ + public String[] getTokenNames() { + return null; + } + + public String[] getRuleNames() { + return null; + } + + public ATN getATN() { return null; } + + public ATNInterpreter getInterpreter() { return _interp; } + + public void displayRecognitionError(RecognitionException e) { + String hdr = getErrorHeader(e); + String msg = getErrorMessage(e); + emitErrorMessage(hdr+" "+msg); + } + + /** What error message should be generated for the various + * exception types? + * + * Not very object-oriented code, but I like having all error message + * generation within one method rather than spread among all of the + * exception classes. This also makes it much easier for the exception + * handling because the exception classes do not have to have pointers back + * to this object to access utility routines and so on. Also, changing + * the message for an exception type would be difficult because you + * would have to subclassing exception, but then somehow get ANTLR + * to make those kinds of exception objects instead of the default. + * This looks weird, but trust me--it makes the most sense in terms + * of flexibility. + * + * For grammar debugging, you will want to override this to add + * more information such as the stack frame with + * getRuleInvocationStack(e, this.getClass().getName()) and, + * for no viable alts, the decision description and state etc... + * + * Override this to change the message generated for one or more + * exception types. + */ + public String getErrorMessage(RecognitionException e) { + String[] tokenNames = getTokenNames(); + String msg = e.getMessage(); + if ( e instanceof UnwantedTokenException ) { + UnwantedTokenException ute = (UnwantedTokenException)e; + String tokenName=""; + if ( ute.expecting.member(Token.EOF) ) { + tokenName = "EOF"; + } + else { + tokenName = tokenNames[ute.expecting.getSingleElement()]; + } + msg = "extraneous input "+getTokenErrorDisplay(ute.getUnexpectedToken())+ + " expecting "+tokenName; + } + else if ( e instanceof MissingTokenException ) { + MissingTokenException mte = (MissingTokenException)e; + String tokenName=""; + if ( mte.expecting.member(Token.EOF) ) { + tokenName = "EOF"; + } + else { + tokenName = tokenNames[mte.expecting.getSingleElement()]; + } + msg = "missing "+tokenName+" at "+getTokenErrorDisplay(e.token); + } + else if ( e instanceof MismatchedTokenException ) { + MismatchedTokenException mte = (MismatchedTokenException)e; + String tokenName=""; +// if ( mte.expecting.member(Token.EOF) ) { +// tokenName = "EOF"; +// } +// else { +// tokenName = tokenNames[mte.expecting.getSingleElement()]; +// } + msg = "mismatched input "+getTokenErrorDisplay(e.token)+ + " expecting "+tokenName; + } + else if ( e instanceof MismatchedTreeNodeException ) { + MismatchedTreeNodeException mtne = (MismatchedTreeNodeException)e; + String tokenName=""; + if ( mtne.expecting.member(Token.EOF) ) { + tokenName = "EOF"; + } + else { + tokenName = tokenNames[mtne.expecting.getSingleElement()]; + } + msg = "mismatched tree node: "+mtne.node+ + " expecting "+tokenName; + } + else if ( e instanceof NoViableAltException ) { + //NoViableAltException nvae = (NoViableAltException)e; + // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" + // and "(decision="+nvae.decisionNumber+") and + // "state "+nvae.stateNumber + msg = "no viable alternative at input "+getTokenErrorDisplay(e.token); + } + else if ( e instanceof MismatchedSetException ) { + MismatchedSetException mse = (MismatchedSetException)e; + msg = "mismatched input "+getTokenErrorDisplay(e.token)+ + " expecting set "+mse.expecting; + } + else if ( e instanceof MismatchedNotSetException ) { + MismatchedNotSetException mse = (MismatchedNotSetException)e; + msg = "mismatched input "+getTokenErrorDisplay(e.token)+ + " expecting set "+mse.expecting; + } + else if ( e instanceof FailedPredicateException ) { + FailedPredicateException fpe = (FailedPredicateException)e; + msg = "rule "+fpe.ruleName+" failed predicate: {"+ + fpe.predicateText+"}?"; + } + return msg; + } + + /** What is the error header, normally line/character position information? */ + public String getErrorHeader(RecognitionException e) { + return "line "+e.line+":"+e.charPositionInLine; + } + + /** How should a token be displayed in an error message? The default + * is to display just the text, but during development you might + * want to have a lot of information spit out. Override in that case + * to use t.toString() (which, for CommonToken, dumps everything about + * the token). This is better than forcing you to override a method in + * your token objects because you don't have to go modify your lexer + * so that it creates a new Java type. + */ + public String getTokenErrorDisplay(Token t) { + String s = t.getText(); + if ( s==null ) { + if ( t.getType()==Token.EOF ) { + s = ""; + } + else { + s = "<"+t.getType()+">"; + } + } + s = s.replaceAll("\n","\\\\n"); + s = s.replaceAll("\r","\\\\r"); + s = s.replaceAll("\t","\\\\t"); + return "'"+s+"'"; + } + + /** Override this method to change where error messages go */ + public void emitErrorMessage(String msg) { + System.err.println(msg); + } + + public void addListener(ANTLRParserListener pl) { + if ( listeners==null ) { + listeners = + Collections.synchronizedList(new ArrayList(2)); + } + if ( pl!=null ) listeners.add(pl); + } + + public void removeListener(ANTLRParserListener pl) { listeners.remove(pl); } + + public void removeListeners() { listeners.clear(); } + + public List getListeners() { return listeners; } + + public void notifyListeners(RecognitionException re) { + if ( listeners==null || listeners.size()==0 ) { + // call legacy v3 func; this calls emitErrorMessage(String msg) + displayRecognitionError(re); + return; + } + for (ANTLRParserListener pl : listeners) pl.error(re); + } + + // subclass needs to override these if there are sempreds or actions + // that the ATN interp needs to execute + public boolean sempred(int ruleIndex, int actionIndex) { + return true; + } + + /** In lexer, both indexes are same; one action per rule. */ + public void action(int ruleIndex, int actionIndex) { + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/RecognizerSharedState.java b/runtime/Java/src/org/antlr/v4/runtime/RecognizerSharedState.java new file mode 100644 index 000000000..9b71802e2 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/RecognizerSharedState.java @@ -0,0 +1,6 @@ +package org.antlr.v4.runtime; + +public class RecognizerSharedState { + public StreamType input; + public RuleContext getContext() { return null; }; +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/RuleContext.java b/runtime/Java/src/org/antlr/v4/runtime/RuleContext.java new file mode 100644 index 000000000..2acdbac17 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/RuleContext.java @@ -0,0 +1,250 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime; + +import org.antlr.v4.runtime.atn.ATN; +import org.antlr.v4.runtime.atn.ATNState; +import org.stringtemplate.v4.ST; + +/** Rules can return start/stop info as well as possible trees and templates. + * Each context knows about invoking context and pointer into ATN so we + * can compute FOLLOW for errors and lookahead. + * + * Used during parse to record stack of rule invocations and during + * ATN simulation to record invoking states. + */ +public class RuleContext { + public static final RuleContext EMPTY = new RuleContext(); + + /** What context invoked this rule? */ + public RuleContext parent; + + /** Current ATN state number we are executing. + * + * Not used during ATN simulation; only used during parse that updates + * current location in ATN. + */ + public int s; + + /** What state invoked the rule associated with this context? + * The "return address" is the followState of invokingState + * If parent is null, this should be -1. + */ + public int invokingState = -1; + + /** Computing the hashCode is very expensive and closureBusy() + * uses it to track when it's seen a state|ctx before to avoid + * infinite loops. As we add new contexts, record the hash code + * as this.invokingState + parent.cachedHashCode. Avoids walking + * up the tree for every hashCode(). Note that this caching works + * because a context is a monotonically growing tree of context nodes + * and nothing on the stack is ever modified...ctx just grows + * or shrinks. + */ + protected int cachedHashCode; + + /** Return the start token or tree */ + public Object getStart() { return null; } + + /** Return the stop token or tree */ + public Object getStop() { return null; } + + /** Has a value potentially if output=AST; */ + public Object getTree() { return null; } + + /** Has a value potentially if output=template */ + public ST getTemplate() { return null; } + + public RuleContext() {} + +// public RuleContext(RuleContext parent) { +// this.parent = parent; +//// while ( p!=null ) { +//// System.out.println(); +//// p = p.parent; +//// } +// } + + public RuleContext(RuleContext parent, int invokingState, int stateNumber) { + this.parent = parent; + //if ( parent!=null ) System.out.println("invoke "+stateNumber+" from "+parent); + this.s = stateNumber; + this.invokingState = invokingState; + + this.cachedHashCode = invokingState; + if ( parent!=null ) { + this.cachedHashCode += parent.cachedHashCode; + } + } + + public int hashCode() { +// int h = 0; +// RuleContext p = this; +// while ( p!=null ) { +// h += p.stateNumber; +// p = p.parent; +// } +// return h; + return cachedHashCode; // works with tests; don't recompute. + } + + public int depth() { + int n = 0; + RuleContext p = this; + while ( p!=null ) { + p = p.parent; + n++; + } + return n; + } + /** Two contexts are equals() if both have + * same call stack; walk upwards to the root. + * Note that you may be comparing contexts in different alt trees. + * + * The hashCode is cheap as it's computed once upon each context + * push on the stack. Using it to make equals() more efficient. + */ + public boolean equals(Object o) { + RuleContext other = ((RuleContext)o); + if ( this.cachedHashCode != other.cachedHashCode ) { + return false; // can't be same if hash is different + } + if ( this.hashCode() != other.hashCode() ) { + return false; // can't be same if hash is different + } + if ( this==other ) return true; + + // System.out.println("comparing "+this+" with "+other); + RuleContext sp = this; + while ( sp!=null && other!=null ) { + if ( sp.invokingState != other.invokingState) return false; + sp = sp.parent; + other = other.parent; + } + if ( !(sp==null && other==null) ) { + return false; // both pointers must be at their roots after walk + } + return true; + } + + /** Two contexts conflict() if they are equals() or one is a stack suffix + * of the other. For example, contexts [21 12 $] and [21 9 $] do not + * conflict, but [21 $] and [21 12 $] do conflict. Note that I should + * probably not show the $ in this case. There is a dummy node for each + * stack that just means empty; $ is a marker that's all. + * + * This is used in relation to checking conflicts associated with a + * single NFA state's configurations within a single DFA state. + * If there are configurations s and t within a DFA state such that + * s.state=t.state && s.alt != t.alt && s.ctx conflicts t.ctx then + * the DFA state predicts more than a single alt--it's nondeterministic. + * Two contexts conflict if they are the same or if one is a suffix + * of the other. + * + * When comparing contexts, if one context has a stack and the other + * does not then they should be considered the same context. The only + * way for an NFA state p to have an empty context and a nonempty context + * is the case when closure falls off end of rule without a call stack + * and re-enters the rule with a context. This resolves the issue I + * discussed with Sriram Srinivasan Feb 28, 2005 about not terminating + * fast enough upon nondeterminism. + */ + public boolean conflictsWith(RuleContext other) { + return this.suffix(other); // || this.equals(other); + } + + /** [$] suffix any context + * [21 $] suffix [21 12 $] + * [21 12 $] suffix [21 $] + * [21 18 $] suffix [21 18 12 9 $] + * [21 18 12 9 $] suffix [21 18 $] + * [21 12 $] not suffix [21 9 $] + * + * Example "[21 $] suffix [21 12 $]" means: rule r invoked current rule + * from state 21. Rule s invoked rule r from state 12 which then invoked + * current rule also via state 21. While the context prior to state 21 + * is different, the fact that both contexts emanate from state 21 implies + * that they are now going to track perfectly together. Once they + * converged on state 21, there is no way they can separate. In other + * words, the prior stack state is not consulted when computing where to + * go in the closure operation. ?$ and ??$ are considered the same stack. + * If ? is popped off then $ and ?$ remain; they are now an empty and + * nonempty context comparison. So, if one stack is a suffix of + * another, then it will still degenerate to the simple empty stack + * comparison case. + */ + protected boolean suffix(RuleContext other) { + RuleContext sp = this; + // if one of the contexts is empty, it never enters loop and returns true + while ( sp.parent!=null && other.parent!=null ) { + if ( sp.invokingState != other.invokingState ) { + return false; + } + sp = sp.parent; + other = other.parent; + } + //System.out.println("suffix"); + return true; + } + + /** A context is empty if there is no invoking state; meaning nobody call + * current context. + */ + public boolean isEmpty() { + return invokingState == -1; + } + + public String toString() { + return toString(null); + } + + public String toString(BaseRecognizer recog) { + StringBuffer buf = new StringBuffer(); + RuleContext p = this; + buf.append("["); + while ( p != null ) { + if ( recog!=null ) { + ATN atn = recog.getATN(); + ATNState s = atn.states.get(p.s); + String ruleName = recog.getRuleNames()[s.ruleIndex]; + buf.append(ruleName); + if ( p.parent != null ) buf.append(" "); +// ATNState invoker = atn.states.get(ctx.invokingState); +// RuleTransition rt = (RuleTransition)invoker.transition(0); +// buf.append(recog.getRuleNames()[rt.target.ruleIndex]); + } + else { + if ( !p.isEmpty() ) buf.append(p.invokingState); + if ( p.parent != null && !p.parent.isEmpty() ) buf.append(" "); + } + p = p.parent; + } + buf.append("]"); + return buf.toString(); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/Token.java b/runtime/Java/src/org/antlr/v4/runtime/Token.java new file mode 100644 index 000000000..95e501b97 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/Token.java @@ -0,0 +1,84 @@ +package org.antlr.v4.runtime; + +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +public interface Token { + /** imaginary tree navigation type; traverse "get child" link */ + public static final int DOWN = 1; + /** imaginary tree navigation type; finish with a child list */ + public static final int UP = 2; + + public static final int MIN_TOKEN_TYPE = UP+1; + + public static final int EOF = CharStream.EOF; + + public static final int INVALID_TYPE = 0; + public static final Token INVALID_TOKEN = new CommonToken(INVALID_TYPE); + + /** All tokens go to the parser (unless skip() is called in that rule) + * on a particular "channel". The parser tunes to a particular channel + * so that whitespace etc... can go to the parser on a "hidden" channel. + */ + public static final int DEFAULT_CHANNEL = 0; + + /** Anything on different channel than DEFAULT_CHANNEL is not parsed + * by parser. + */ + public static final int HIDDEN_CHANNEL = 99; + + /** Get the text of the token */ + public String getText(); + public void setText(String text); + + public int getType(); + public void setType(int ttype); + /** The line number on which this token was matched; line=1..n */ + public int getLine(); + public void setLine(int line); + + /** The index of the first character relative to the beginning of the line 0..n-1 */ + public int getCharPositionInLine(); + public void setCharPositionInLine(int pos); + + public int getChannel(); + public void setChannel(int channel); + + /** An index from 0..n-1 of the token object in the input stream. + * This must be valid in order to use the ANTLRWorks debugger. + */ + public int getTokenIndex(); + public void setTokenIndex(int index); + + /** From what character stream was this token created? You don't have to + * implement but it's nice to know where a Token comes from if you have + * include files etc... on the input. + */ + public CharStream getInputStream(); + public void setInputStream(CharStream input); +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/TokenRewriteStream.java b/runtime/Java/src/org/antlr/v4/runtime/TokenRewriteStream.java new file mode 100644 index 000000000..2b72be6b7 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/TokenRewriteStream.java @@ -0,0 +1,590 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime; + +import java.util.*; + +/** Useful for dumping out the input stream after doing some + * augmentation or other manipulations. + * + * You can insert stuff, replace, and delete chunks. Note that the + * operations are done lazily--only if you convert the buffer to a + * String. This is very efficient because you are not moving data around + * all the time. As the buffer of tokens is converted to strings, the + * toString() method(s) check to see if there is an operation at the + * current index. If so, the operation is done and then normal String + * rendering continues on the buffer. This is like having multiple Turing + * machine instruction streams (programs) operating on a single input tape. :) + * + * Since the operations are done lazily at toString-time, operations do not + * screw up the token index values. That is, an insert operation at token + * index i does not change the index values for tokens i+1..n-1. + * + * Because operations never actually alter the buffer, you may always get + * the original token stream back without undoing anything. Since + * the instructions are queued up, you can easily simulate transactions and + * roll back any changes if there is an error just by removing instructions. + * For example, + * + * CharStream input = new ANTLRFileStream("input"); + * TLexer lex = new TLexer(input); + * TokenRewriteStream tokens = new TokenRewriteStream(lex); + * T parser = new T(tokens); + * parser.startRule(); + * + * Then in the rules, you can execute + * Token t,u; + * ... + * input.insertAfter(t, "text to put after t");} + * input.insertAfter(u, "text after u");} + * System.out.println(tokens.toString()); + * + * Actually, you have to cast the 'input' to a TokenRewriteStream. :( + * + * You can also have multiple "instruction streams" and get multiple + * rewrites from a single pass over the input. Just name the instruction + * streams and use that name again when printing the buffer. This could be + * useful for generating a C file and also its header file--all from the + * same buffer: + * + * tokens.insertAfter("pass1", t, "text to put after t");} + * tokens.insertAfter("pass2", u, "text after u");} + * System.out.println(tokens.toString("pass1")); + * System.out.println(tokens.toString("pass2")); + * + * If you don't use named rewrite streams, a "default" stream is used as + * the first example shows. + */ +public class TokenRewriteStream extends CommonTokenStream { + public static final String DEFAULT_PROGRAM_NAME = "default"; + public static final int PROGRAM_INIT_SIZE = 100; + public static final int MIN_TOKEN_INDEX = 0; + + // Define the rewrite operation hierarchy + + class RewriteOperation { + /** What index into rewrites List are we? */ + protected int instructionIndex; + /** Token buffer index. */ + protected int index; + protected Object text; + + protected RewriteOperation(int index) { + this.index = index; + } + + protected RewriteOperation(int index, Object text) { + this.index = index; + this.text = text; + } + /** Execute the rewrite operation by possibly adding to the buffer. + * Return the index of the next token to operate on. + */ + public int execute(StringBuffer buf) { + return index; + } + public String toString() { + String opName = getClass().getName(); + int $index = opName.indexOf('$'); + opName = opName.substring($index+1, opName.length()); + return "<"+opName+"@"+tokens.get(index)+ + ":\""+text+"\">"; + } + } + + class InsertBeforeOp extends RewriteOperation { + public InsertBeforeOp(int index, Object text) { + super(index,text); + } + public int execute(StringBuffer buf) { + buf.append(text); + if ( tokens.get(index).getType()!=Token.EOF ) { + buf.append(tokens.get(index).getText()); + } + return index+1; + } + } + + /** I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp + * instructions. + */ + class ReplaceOp extends RewriteOperation { + protected int lastIndex; + public ReplaceOp(int from, int to, Object text) { + super(from,text); + lastIndex = to; + } + public int execute(StringBuffer buf) { + if ( text!=null ) { + buf.append(text); + } + return lastIndex+1; + } + public String toString() { + if ( text==null ) { + return ""; + } + return ""; + } + } + + /** You may have multiple, named streams of rewrite operations. + * I'm calling these things "programs." + * Maps String (name) -> rewrite (List) + */ + protected Map programs = null; + + /** Map String (program name) -> Integer index */ + protected Map lastRewriteTokenIndexes = null; + + public TokenRewriteStream() { + init(); + } + + protected void init() { + programs = new HashMap(); + programs.put(DEFAULT_PROGRAM_NAME, new ArrayList(PROGRAM_INIT_SIZE)); + lastRewriteTokenIndexes = new HashMap(); + } + + public TokenRewriteStream(TokenSource tokenSource) { + super(tokenSource); + init(); + } + + public TokenRewriteStream(TokenSource tokenSource, int channel) { + super(tokenSource, channel); + init(); + } + + public void rollback(int instructionIndex) { + rollback(DEFAULT_PROGRAM_NAME, instructionIndex); + } + + /** Rollback the instruction stream for a program so that + * the indicated instruction (via instructionIndex) is no + * longer in the stream. UNTESTED! + */ + public void rollback(String programName, int instructionIndex) { + List is = (List)programs.get(programName); + if ( is!=null ) { + programs.put(programName, is.subList(MIN_TOKEN_INDEX,instructionIndex)); + } + } + + public void deleteProgram() { + deleteProgram(DEFAULT_PROGRAM_NAME); + } + + /** Reset the program so that no instructions exist */ + public void deleteProgram(String programName) { + rollback(programName, MIN_TOKEN_INDEX); + } + + public void insertAfter(Token t, Object text) { + insertAfter(DEFAULT_PROGRAM_NAME, t, text); + } + + public void insertAfter(int index, Object text) { + insertAfter(DEFAULT_PROGRAM_NAME, index, text); + } + + public void insertAfter(String programName, Token t, Object text) { + insertAfter(programName,t.getTokenIndex(), text); + } + + public void insertAfter(String programName, int index, Object text) { + // to insert after, just insert before next index (even if past end) + insertBefore(programName,index+1, text); + } + + public void insertBefore(Token t, Object text) { + insertBefore(DEFAULT_PROGRAM_NAME, t, text); + } + + public void insertBefore(int index, Object text) { + insertBefore(DEFAULT_PROGRAM_NAME, index, text); + } + + public void insertBefore(String programName, Token t, Object text) { + insertBefore(programName, t.getTokenIndex(), text); + } + + public void insertBefore(String programName, int index, Object text) { + RewriteOperation op = new InsertBeforeOp(index,text); + List rewrites = getProgram(programName); + op.instructionIndex = rewrites.size(); + rewrites.add(op); + } + + public void replace(int index, Object text) { + replace(DEFAULT_PROGRAM_NAME, index, index, text); + } + + public void replace(int from, int to, Object text) { + replace(DEFAULT_PROGRAM_NAME, from, to, text); + } + + public void replace(Token indexT, Object text) { + replace(DEFAULT_PROGRAM_NAME, indexT, indexT, text); + } + + public void replace(Token from, Token to, Object text) { + replace(DEFAULT_PROGRAM_NAME, from, to, text); + } + + public void replace(String programName, int from, int to, Object text) { + if ( from > to || from<0 || to<0 || to >= tokens.size() ) { + throw new IllegalArgumentException("replace: range invalid: "+from+".."+to+"(size="+tokens.size()+")"); + } + RewriteOperation op = new ReplaceOp(from, to, text); + List rewrites = getProgram(programName); + op.instructionIndex = rewrites.size(); + rewrites.add(op); + } + + public void replace(String programName, Token from, Token to, Object text) { + replace(programName, + from.getTokenIndex(), + to.getTokenIndex(), + text); + } + + public void delete(int index) { + delete(DEFAULT_PROGRAM_NAME, index, index); + } + + public void delete(int from, int to) { + delete(DEFAULT_PROGRAM_NAME, from, to); + } + + public void delete(Token indexT) { + delete(DEFAULT_PROGRAM_NAME, indexT, indexT); + } + + public void delete(Token from, Token to) { + delete(DEFAULT_PROGRAM_NAME, from, to); + } + + public void delete(String programName, int from, int to) { + replace(programName,from,to,null); + } + + public void delete(String programName, Token from, Token to) { + replace(programName,from,to,null); + } + + public int getLastRewriteTokenIndex() { + return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME); + } + + protected int getLastRewriteTokenIndex(String programName) { + Integer I = (Integer)lastRewriteTokenIndexes.get(programName); + if ( I==null ) { + return -1; + } + return I.intValue(); + } + + protected void setLastRewriteTokenIndex(String programName, int i) { + lastRewriteTokenIndexes.put(programName, new Integer(i)); + } + + protected List getProgram(String name) { + List is = (List)programs.get(name); + if ( is==null ) { + is = initializeProgram(name); + } + return is; + } + + private List initializeProgram(String name) { + List is = new ArrayList(PROGRAM_INIT_SIZE); + programs.put(name, is); + return is; + } + + public String toOriginalString() { + fill(); + return toOriginalString(MIN_TOKEN_INDEX, size()-1); + } + + public String toOriginalString(int start, int end) { + StringBuffer buf = new StringBuffer(); + for (int i=start; i>=MIN_TOKEN_INDEX && i<=end && itokens.size()-1 ) end = tokens.size()-1; + if ( start<0 ) start = 0; + + if ( rewrites==null || rewrites.size()==0 ) { + return toOriginalString(start,end); // no instructions to execute + } + StringBuffer buf = new StringBuffer(); + + // First, optimize instruction stream + Map indexToOp = reduceToSingleOperationPerIndex(rewrites); + + // Walk buffer, executing instructions and emitting tokens + int i = start; + while ( i <= end && i < tokens.size() ) { + RewriteOperation op = (RewriteOperation)indexToOp.get(new Integer(i)); + indexToOp.remove(new Integer(i)); // remove so any left have index size-1 + Token t = (Token) tokens.get(i); + if ( op==null ) { + // no operation at that index, just dump token + if ( t.getType()!=Token.EOF ) buf.append(t.getText()); + i++; // move to next token + } + else { + i = op.execute(buf); // execute operation and skip + } + } + + // include stuff after end if it's last index in buffer + // So, if they did an insertAfter(lastValidIndex, "foo"), include + // foo if end==lastValidIndex. + if ( end==tokens.size()-1 ) { + // Scan any remaining operations after last token + // should be included (they will be inserts). + Iterator it = indexToOp.values().iterator(); + while (it.hasNext()) { + RewriteOperation op = (RewriteOperation)it.next(); + if ( op.index >= tokens.size()-1 ) buf.append(op.text); + } + } + return buf.toString(); + } + + /** We need to combine operations and report invalid operations (like + * overlapping replaces that are not completed nested). Inserts to + * same index need to be combined etc... Here are the cases: + * + * I.i.u I.j.v leave alone, nonoverlapping + * I.i.u I.i.v combine: Iivu + * + * R.i-j.u R.x-y.v | i-j in x-y delete first R + * R.i-j.u R.i-j.v delete first R + * R.i-j.u R.x-y.v | x-y in i-j ERROR + * R.i-j.u R.x-y.v | boundaries overlap ERROR + * + * Delete special case of replace (text==null): + * D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right) + * + * I.i.u R.x-y.v | i in (x+1)-y delete I (since insert before + * we're not deleting i) + * I.i.u R.x-y.v | i not in (x+1)-y leave alone, nonoverlapping + * R.x-y.v I.i.u | i in x-y ERROR + * R.x-y.v I.x.u R.x-y.uv (combine, delete I) + * R.x-y.v I.i.u | i not in x-y leave alone, nonoverlapping + * + * I.i.u = insert u before op @ index i + * R.x-y.u = replace x-y indexed tokens with u + * + * First we need to examine replaces. For any replace op: + * + * 1. wipe out any insertions before op within that range. + * 2. Drop any replace op before that is contained completely within + * that range. + * 3. Throw exception upon boundary overlap with any previous replace. + * + * Then we can deal with inserts: + * + * 1. for any inserts to same index, combine even if not adjacent. + * 2. for any prior replace with same left boundary, combine this + * insert with replace and delete this replace. + * 3. throw exception if index in same range as previous replace + * + * Don't actually delete; make op null in list. Easier to walk list. + * Later we can throw as we add to index -> op map. + * + * Note that I.2 R.2-2 will wipe out I.2 even though, technically, the + * inserted stuff would be before the replace range. But, if you + * add tokens in front of a method body '{' and then delete the method + * body, I think the stuff before the '{' you added should disappear too. + * + * Return a map from token index to operation. + */ + protected Map reduceToSingleOperationPerIndex(List rewrites) { +// System.out.println("rewrites="+rewrites); + + // WALK REPLACES + for (int i = 0; i < rewrites.size(); i++) { + RewriteOperation op = (RewriteOperation)rewrites.get(i); + if ( op==null ) continue; + if ( !(op instanceof ReplaceOp) ) continue; + ReplaceOp rop = (ReplaceOp)rewrites.get(i); + // Wipe prior inserts within range + List inserts = getKindOfOps(rewrites, InsertBeforeOp.class, i); + for (int j = 0; j < inserts.size(); j++) { + InsertBeforeOp iop = (InsertBeforeOp) inserts.get(j); + if ( iop.index == rop.index ) { + // E.g., insert before 2, delete 2..2; update replace + // text to include insert before, kill insert + rewrites.set(iop.instructionIndex, null); + rop.text = iop.text.toString() + (rop.text!=null?rop.text.toString():""); + } + else if ( iop.index > rop.index && iop.index <= rop.lastIndex ) { + // delete insert as it's a no-op. + rewrites.set(iop.instructionIndex, null); + } + } + // Drop any prior replaces contained within + List prevReplaces = getKindOfOps(rewrites, ReplaceOp.class, i); + for (int j = 0; j < prevReplaces.size(); j++) { + ReplaceOp prevRop = (ReplaceOp) prevReplaces.get(j); + if ( prevRop.index>=rop.index && prevRop.lastIndex <= rop.lastIndex ) { + // delete replace as it's a no-op. + rewrites.set(prevRop.instructionIndex, null); + continue; + } + // throw exception unless disjoint or identical + boolean disjoint = + prevRop.lastIndex rop.lastIndex; + boolean same = + prevRop.index==rop.index && prevRop.lastIndex==rop.lastIndex; + // Delete special case of replace (text==null): + // D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right) + if ( prevRop.text==null && rop.text==null && !disjoint ) { + //System.out.println("overlapping deletes: "+prevRop+", "+rop); + rewrites.set(prevRop.instructionIndex, null); // kill first delete + rop.index = Math.min(prevRop.index, rop.index); + rop.lastIndex = Math.max(prevRop.lastIndex, rop.lastIndex); + System.out.println("new rop "+rop); + } + else if ( !disjoint && !same ) { + throw new IllegalArgumentException("replace op boundaries of "+rop+ + " overlap with previous "+prevRop); + } + } + } + + // WALK INSERTS + for (int i = 0; i < rewrites.size(); i++) { + RewriteOperation op = (RewriteOperation)rewrites.get(i); + if ( op==null ) continue; + if ( !(op instanceof InsertBeforeOp) ) continue; + InsertBeforeOp iop = (InsertBeforeOp)rewrites.get(i); + // combine current insert with prior if any at same index + List prevInserts = getKindOfOps(rewrites, InsertBeforeOp.class, i); + for (int j = 0; j < prevInserts.size(); j++) { + InsertBeforeOp prevIop = (InsertBeforeOp) prevInserts.get(j); + if ( prevIop.index == iop.index ) { // combine objects + // convert to strings...we're in process of toString'ing + // whole token buffer so no lazy eval issue with any templates + iop.text = catOpText(iop.text,prevIop.text); + // delete redundant prior insert + rewrites.set(prevIop.instructionIndex, null); + } + } + // look for replaces where iop.index is in range; error + List prevReplaces = getKindOfOps(rewrites, ReplaceOp.class, i); + for (int j = 0; j < prevReplaces.size(); j++) { + ReplaceOp rop = (ReplaceOp) prevReplaces.get(j); + if ( iop.index == rop.index ) { + rop.text = catOpText(iop.text,rop.text); + rewrites.set(i, null); // delete current insert + continue; + } + if ( iop.index >= rop.index && iop.index <= rop.lastIndex ) { + throw new IllegalArgumentException("insert op "+iop+ + " within boundaries of previous "+rop); + } + } + } + // System.out.println("rewrites after="+rewrites); + Map m = new HashMap(); + for (int i = 0; i < rewrites.size(); i++) { + RewriteOperation op = (RewriteOperation)rewrites.get(i); + if ( op==null ) continue; // ignore deleted ops + if ( m.get(new Integer(op.index))!=null ) { + throw new Error("should only be one op per index"); + } + m.put(new Integer(op.index), op); + } + //System.out.println("index to op: "+m); + return m; + } + + protected String catOpText(Object a, Object b) { + String x = ""; + String y = ""; + if ( a!=null ) x = a.toString(); + if ( b!=null ) y = b.toString(); + return x+y; + } + protected List getKindOfOps(List rewrites, Class kind) { + return getKindOfOps(rewrites, kind, rewrites.size()); + } + + /** Get all operations before an index of a particular kind */ + protected List getKindOfOps(List rewrites, Class kind, int before) { + List ops = new ArrayList(); + for (int i=0; i=MIN_TOKEN_INDEX && i<=end && i=n, return Token.EOFToken. + * Return null for LT(0) and any index that results in an absolute address + * that is negative. + */ + public Token LT(int k); + + /** How far ahead has the stream been asked to look? The return + * value is a valid index from 0..n-1. + */ + int range(); + + /** Get a token at an absolute index i; 0..n-1. This is really only + * needed for profiling and debugging and token stream rewriting. + * If you don't want to buffer up tokens, then this method makes no + * sense for you. Naturally you can't use the rewrite stream feature. + * I believe DebugTokenStream can easily be altered to not use + * this method, removing the dependency. + */ + public Token get(int i); + + /** Where is this stream pulling tokens from? This is not the name, but + * the object that provides Token objects. + */ + public TokenSource getTokenSource(); + + /** Return the text of all tokens from start to stop, inclusive. + * If the stream does not buffer all the tokens then it can just + * return "" or null; Users should not access $ruleLabel.text in + * an action of course in that case. + */ + public String toString(int start, int stop); + + /** Because the user is not required to use a token with an index stored + * in it, we must provide a means for two token objects themselves to + * indicate the start/end location. Most often this will just delegate + * to the other toString(int,int). This is also parallel with + * the TreeNodeStream.toString(Object,Object). + */ + public String toString(Token start, Token stop); +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/UnwantedTokenException.java b/runtime/Java/src/org/antlr/v4/runtime/UnwantedTokenException.java new file mode 100644 index 000000000..c5d6d62c4 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/UnwantedTokenException.java @@ -0,0 +1,50 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime; + +/** An extra token while parsing a TokenStream */ +public class UnwantedTokenException extends MismatchedTokenException { + /** Used for remote debugger deserialization */ + public UnwantedTokenException() {;} + + public UnwantedTokenException(BaseRecognizer recognizer, IntStream input, int expecting) { + super(recognizer, input, expecting); + } + + public Token getUnexpectedToken() { + return token; + } + + public String toString() { + String exp = ", expected "+expecting; + if ( token==null ) { + return "UnwantedTokenException(found="+null+exp+")"; + } + return "UnwantedTokenException(found="+token.getText()+exp+")"; + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ATN.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ATN.java new file mode 100644 index 000000000..8700c4e7c --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ATN.java @@ -0,0 +1,107 @@ +package org.antlr.v4.runtime.atn; + +import org.antlr.v4.analysis.LL1Analyzer; +import org.antlr.v4.automata.ATNSerializer; +import org.antlr.v4.misc.*; +import org.antlr.v4.runtime.RuleContext; +import org.antlr.v4.tool.*; + +import java.util.*; + +/** */ +// TODO: split into runtime / analysis time? +public class ATN { + public static final int INVALID_ALT_NUMBER = -1; + public static final int INVALID_DECISION_NUMBER = -1; + + public Grammar g; + public List states = new ArrayList(); + public List rules = new ArrayList(); // rule index to start state + + /** Each subrule/rule is a decision point and we must track them so we + * can go back later and build DFA predictors for them. This includes + * all the rules, subrules, optional blocks, ()+, ()* etc... + */ + public List decisionToATNState = new ArrayList(); + + public Map ruleToStartState = new LinkedHashMap(); + public Map ruleToStopState = new LinkedHashMap(); + public Map modeNameToStartState = + new LinkedHashMap(); + + // runtime + public int grammarType; // ANTLRParser.LEXER, ... + public List modeToStartState = new ArrayList(); + + // runtime for lexer + public List ruleToTokenType = new ArrayList(); + public List ruleToActionIndex = new ArrayList(); + + public int maxTokenType; + + int stateNumber = 0; + + // TODO: for runtime all we need is states, decisionToATNState I think + + public ATN(Grammar g) { + this.g = g; + if ( g.isLexer() ) { + ruleToTokenType.add(0); // no rule index 0 + ruleToActionIndex.add(0); // no action index 0 + for (Rule r : g.rules.values()) { + ruleToTokenType.add(g.getTokenType(r.name)); + if ( r.actionIndex>0 ) ruleToActionIndex.add(r.actionIndex); + else ruleToActionIndex.add(0); + } + } + } + + /** Used for runtime deserialization of ATNs from strings */ + public ATN() { } + + public IntervalSet nextTokens(RuleContext ctx) { + return nextTokens(ctx.s, ctx); + } + + public IntervalSet nextTokens(int stateNumber, RuleContext ctx) { + ATNState s = states.get(stateNumber); + if ( s == null ) return null; + LL1Analyzer anal = new LL1Analyzer(this); + IntervalSet next = anal.LOOK(s, ctx); + return next; + } + + public void addState(ATNState state) { + state.atn = this; + states.add(state); + state.stateNumber = stateNumber++; + } + + public int defineDecisionState(DecisionState s) { + decisionToATNState.add(s); + s.decision = decisionToATNState.size()-1; + return s.decision; + } + + public int getNumberOfDecisions() { + return decisionToATNState.size(); + } + + /** Used by Java target to encode short/int array as chars in string. */ + public String getSerializedAsString() { + return new String(Utils.toCharArray(getSerialized())); + } + + public List getSerialized() { + return new ATNSerializer(this).serialize(); + } + + public char[] getSerializedAsChars() { + return Utils.toCharArray(new ATNSerializer(this).serialize()); + } + + public String getDecoded() { + return new ATNSerializer(this).decode(Utils.toCharArray(getSerialized())); + } + +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNInterpreter.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNInterpreter.java new file mode 100644 index 000000000..91a7af973 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNInterpreter.java @@ -0,0 +1,151 @@ +package org.antlr.v4.runtime.atn; + +import org.antlr.v4.analysis.ATNConfig; +import org.antlr.v4.misc.*; +import org.antlr.v4.parse.ANTLRParser; +import org.antlr.v4.runtime.dfa.*; +import org.antlr.v4.tool.*; + +import java.util.*; + +public abstract class ATNInterpreter { + /** Must distinguish between missing edge and edge we know leads nowhere */ + public static DFAState ERROR; + public ATN atn; + + protected ATNConfig prevAccept; // TODO Move down? used to avoid passing int down and back up in method calls + protected int prevAcceptIndex = -1; + + static { + ERROR = new DFAState(new OrderedHashSet()); + ERROR.stateNumber = Integer.MAX_VALUE; + } + + public ATNInterpreter(ATN atn) { + this.atn = atn; + } + + public static ATN deserialize(char[] data) { + ATN atn = new ATN(); + List sets = new ArrayList(); + int p = 0; + atn.grammarType = toInt(data[p++]); + atn.maxTokenType = toInt(data[p++]); + int nstates = toInt(data[p++]); + for (int i=1; i<=nstates; i++) { + int stype = toInt(data[p++]); + if ( stype==0 ) continue; // ignore bad type of states + ATNState s = stateFactory(stype, i); + s.ruleIndex = toInt(data[p++]); + atn.addState(s); + } + int nrules = toInt(data[p++]); + for (int i=1; i<=nrules; i++) { + int s = toInt(data[p++]); + ATNState startState = atn.states.get(s); + atn.rules.add(startState); + if ( atn.ruleToTokenType.size()==0 ) atn.ruleToTokenType.add(0); // we're indexed from 1 + if ( atn.ruleToActionIndex.size()==0 ) atn.ruleToActionIndex.add(0); // we're indexed from 1 + if ( atn.grammarType==ANTLRParser.LEXER ) { + int tokenType = toInt(data[p++]); + atn.ruleToTokenType.add(tokenType); + int actionIndex = toInt(data[p++]); + atn.ruleToActionIndex.add(actionIndex); + } + else { + p += 2; + } + } + int nmodes = toInt(data[p++]); + for (int i=0; i sets) + { + ATNState target = atn.states.get(trg); + switch (type) { + case Transition.EPSILON : return new EpsilonTransition(target); + case Transition.RANGE : return new RangeTransition(arg1, arg2, target); + case Transition.RULE : return new RuleTransition(arg1, atn.states.get(arg1), target); + case Transition.PREDICATE : return new PredicateTransition(target, arg1, arg2); + case Transition.ATOM : return new AtomTransition(arg1, target); + case Transition.ACTION : return new ActionTransition(target, arg1, arg2); + case Transition.FORCED_ACTION : return new ActionTransition(target, arg1, arg2); + case Transition.SET : return new SetTransition(null, sets.get(arg1), target); + case Transition.NOT_ATOM : return new NotAtomTransition(arg1, target); + case Transition.NOT_SET : return new NotSetTransition(null, sets.get(arg1), target); + case Transition.WILDCARD : return new WildcardTransition(target); + } + return null; + } + + public static ATNState stateFactory(int type, int stateNumber) { + ATNState s = null; + switch (type) { + case ATNState.BASIC : s = new ATNState(); break; + case ATNState.RULE_START : s = new RuleStartState(); break; + case ATNState.BLOCK_START : s = new BlockStartState(); break; + case ATNState.PLUS_BLOCK_START : s = new PlusBlockStartState(); break; + case ATNState.STAR_BLOCK_START : s = new StarBlockStartState(); break; + case ATNState.TOKEN_START : s = new TokensStartState(); break; + case ATNState.RULE_STOP : s = new RuleStopState(); break; + case ATNState.BLOCK_END : s = new BlockEndState(); break; + case ATNState.STAR_LOOP_BACK : s = new StarLoopbackState(); break; + case ATNState.PLUS_LOOP_BACK : s = new PlusLoopbackState(); break; + } + s.stateNumber = stateNumber; + return s; + } + + public static void dump(DFA dfa, Grammar g) { + DOTGenerator dot = new DOTGenerator(g); + String output = dot.getDOT(dfa, false); + System.out.println(output); + } + + public static void dump(DFA dfa) { + dump(dfa, null); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNStack.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNStack.java new file mode 100644 index 000000000..0059885a7 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNStack.java @@ -0,0 +1,75 @@ +package org.antlr.v4.runtime.atn; + +/** Identical to ANTLR's static grammar analysis ATNContext object */ +public class ATNStack { + public static final ATNStack EMPTY = new ATNStack(null, -1); + + public ATNStack parent; + + /** The ATN state following state that invoked another rule's start state + * is recorded on the rule invocation context stack. + */ + public int returnAddr; + + /** Computing the hashCode is very expensive and ATN.addToClosure() + * uses it to track when it's seen a state|ctx before to avoid + * infinite loops. As we add new contexts, record the hash code + * as this + parent.cachedHashCode. Avoids walking + * up the tree for every hashCode(). Note that this caching works + * because a context is a monotonically growing tree of context nodes + * and nothing on the stack is ever modified...ctx just grows + * or shrinks. + */ + protected int cachedHashCode; + + public ATNStack(ATNStack parent, int returnAddr) { + this.parent = parent; + this.returnAddr = returnAddr; + if ( returnAddr >= 0 ) { + this.cachedHashCode = returnAddr; + } + if ( parent!=null ) { + this.cachedHashCode += parent.cachedHashCode; + } + } + + public int hashCode() { return cachedHashCode; } + + /** Two contexts are equals() if both have + * same call stack; walk upwards to the root. + * Recall that the root sentinel node has no parent. + * Note that you may be comparing contextsv in different alt trees. + */ + public boolean equals(Object o) { + ATNStack other = ((ATNStack)o); + if ( this.cachedHashCode != other.cachedHashCode ) { + return false; // can't be same if hash is different + } + if ( this==other ) return true; + + // System.out.println("comparing "+this+" with "+other); + ATNStack sp = this; + while ( sp.parent!=null && other.parent!=null ) { + if ( sp.returnAddr != other.returnAddr) return false; + sp = sp.parent; + other = other.parent; + } + if ( !(sp.parent==null && other.parent==null) ) { + return false; // both pointers must be at their roots after walk + } + return true; + } + + public String toString() { + StringBuffer buf = new StringBuffer(); + ATNStack sp = this; + buf.append("["); + while ( sp.parent!=null ) { + buf.append(sp.returnAddr); + buf.append(" "); + sp = sp.parent; + } + buf.append("$]"); + return buf.toString(); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNState.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNState.java new file mode 100644 index 000000000..90fafbff4 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNState.java @@ -0,0 +1,111 @@ +package org.antlr.v4.runtime.atn; + +import org.antlr.v4.tool.*; + +import java.util.*; + +public class ATNState { + // constants for serialization + public static final int BASIC = 1; + public static final int RULE_START = 2; + public static final int BLOCK_START = 3; + public static final int PLUS_BLOCK_START = 4; + public static final int STAR_BLOCK_START = 5; + public static final int TOKEN_START = 6; + public static final int RULE_STOP = 7; + public static final int BLOCK_END = 8; + public static final int STAR_LOOP_BACK = 9; + public static final int PLUS_LOOP_BACK = 10; + + public static String[] serializationNames = { + "INVALID", + "BASIC", + "RULE_START", + "BLOCK_START", + "PLUS_BLOCK_START", + "STAR_BLOCK_START", + "TOKEN_START", + "RULE_STOP", + "BLOCK_END", + "STAR_LOOP_BACK", + "PLUS_LOOP_BACK", + }; + + public static Map serializationTypes = + new HashMap() {{ + put(ATNState.class, BASIC); + put(RuleStartState.class, RULE_START); + put(BlockStartState.class, BLOCK_START); + put(PlusBlockStartState.class, PLUS_BLOCK_START); + put(StarBlockStartState.class, STAR_BLOCK_START); + put(TokensStartState.class, TOKEN_START); + put(RuleStopState.class, RULE_STOP); + put(BlockEndState.class, BLOCK_END); + put(PlusLoopbackState.class, PLUS_LOOP_BACK); + put(StarLoopbackState.class, STAR_LOOP_BACK); + }}; + + public static final int INVALID_STATE_NUMBER = -1; + + public int stateNumber = INVALID_STATE_NUMBER; + + public Rule rule; + // TODO: split runtime / from analysis or cleanup + public int ruleIndex; // at runtime, we don't have Rule objects + + /** Which ATN are we in? */ + public ATN atn = null; + + /** ATN state is associated with which node in AST? */ + public GrammarAST ast; + public Transition transition; + /** For o-A->o type ATN tranitions, record the label that leads to this + * state. Useful for creating rich error messages when we find + * insufficiently (with preds) covered states. + */ + public Transition incidentTransition; + + @Override + public int hashCode() { return stateNumber; } + + @Override + public boolean equals(Object o) { + // are these states same object? + if ( o instanceof ATNState) return stateNumber==((ATNState)o).stateNumber; + return false; + } + + @Override + public String toString() { + return String.valueOf(stateNumber); + } + + public int getNumberOfTransitions() { + if ( transition!=null ) return 1; + return 0; + } + + public void addTransition(Transition e) { + if ( transition!=null ) throw new IllegalArgumentException("only one transition"); + transition = e; + } + + public Transition transition(int i) { + if ( i>0 ) throw new IllegalArgumentException("only one transition"); + return transition; + } + + public boolean onlyHasEpsilonTransitions() { + return transition!=null && transition.isEpsilon(); + } + + public void setTransition(int i, Transition e) { + if ( i>0 ) throw new IllegalArgumentException("only one transition"); + transition = e; + } + + public void setRule(Rule r) { + this.rule = r; + if ( r!=null ) this.ruleIndex = r.index; + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ActionTransition.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ActionTransition.java new file mode 100644 index 000000000..784e0c487 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ActionTransition.java @@ -0,0 +1,37 @@ +package org.antlr.v4.runtime.atn; + +import org.antlr.v4.tool.*; + +public class ActionTransition extends Transition { + public int ruleIndex; + public int actionIndex = -1; + public GrammarAST actionAST; + + public ActionTransition(GrammarAST actionAST, ATNState target) { + super(target); + this.actionAST = actionAST; + } + + public ActionTransition(ATNState target, int ruleIndex, int actionIndex) { + super(target); + this.ruleIndex = ruleIndex; + this.actionIndex = actionIndex; + } + + public boolean isEpsilon() { + return true; // we are to be ignored by analysis 'cept for predicates + } + + public int compareTo(Object o) { + return 0; + } + + public String toString() { + if ( actionAST!=null ) return "{"+actionAST.getText()+"}"; + return "action_"+ruleIndex+":"+actionIndex; + } + + public String toString(Grammar g) { + return toString(); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/AtomTransition.java b/runtime/Java/src/org/antlr/v4/runtime/atn/AtomTransition.java new file mode 100644 index 000000000..0dd916d64 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/AtomTransition.java @@ -0,0 +1,53 @@ +package org.antlr.v4.runtime.atn; + +import org.antlr.v4.misc.IntervalSet; +import org.antlr.v4.tool.Grammar; + +/** TODO: make all transitions sets? no, should remove set edges */ +public class AtomTransition extends Transition { + /** The token type or character value; or, signifies special label. */ + public int label; + + public AtomTransition(int label, ATNState target) { + this.label = label; + this.target = target; + } + + public AtomTransition(ATNState target) { + super(target); + } + + public IntervalSet label() { return IntervalSet.of(label); } + + public int hashCode() { return label; } + + public boolean equals(Object o) { + if ( o==null ) return false; + if ( this == o ) return true; // equals if same object + if ( o.getClass() == SetTransition.class ) { + return IntervalSet.of(label).equals(o); + } + return label!=((AtomTransition)o).label; + } + +// public boolean intersect(Label other) { +// if ( other.getClass() == AtomTransition.class ) { +// return label==((AtomTransition)other).label; +// } +// return ((SetLabel)other).label.member(this.label); +// } + + public int compareTo(Object o) { + return this.label-((AtomTransition)o).label; + } + + @Override + public String toString(Grammar g) { + if (g!=null ) return g.getTokenDisplayName(label); + return toString(); + } + + public String toString() { + return String.valueOf(label); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/BlockEndState.java b/runtime/Java/src/org/antlr/v4/runtime/atn/BlockEndState.java new file mode 100644 index 000000000..ef94016f7 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/BlockEndState.java @@ -0,0 +1,5 @@ +package org.antlr.v4.runtime.atn; + +/** Terminal node of a simple (a|b|c) block */ +public class BlockEndState extends ATNState { +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/BlockStartState.java b/runtime/Java/src/org/antlr/v4/runtime/atn/BlockStartState.java new file mode 100644 index 000000000..7e345bc75 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/BlockStartState.java @@ -0,0 +1,6 @@ +package org.antlr.v4.runtime.atn; + +/** The start of a regular (...) block */ +public class BlockStartState extends DecisionState { + public BlockEndState endState; +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/DecisionState.java b/runtime/Java/src/org/antlr/v4/runtime/atn/DecisionState.java new file mode 100644 index 000000000..f3deb373e --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/DecisionState.java @@ -0,0 +1,32 @@ +package org.antlr.v4.runtime.atn; + +import java.util.*; + +public class DecisionState extends ATNState { + public static final int INITIAL_NUM_TRANSITIONS = 4; + + /** Track the transitions emanating from this ATN state. */ + public List transitions = new ArrayList(INITIAL_NUM_TRANSITIONS); + + public int decision; + + @Override + public int getNumberOfTransitions() { return transitions.size(); } + + @Override + public void addTransition(Transition e) { transitions.add(e); } + + public void addTransitionFirst(Transition e) { transitions.add(0, e); } + + @Override + public Transition transition(int i) { return transitions.get(i); } + + @Override + public boolean onlyHasEpsilonTransitions() { return true; } + + @Override + public void setTransition(int i, Transition e) { + transitions.set(i, e); + } + +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/EpsilonTransition.java b/runtime/Java/src/org/antlr/v4/runtime/atn/EpsilonTransition.java new file mode 100644 index 000000000..1aecdebf4 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/EpsilonTransition.java @@ -0,0 +1,18 @@ +package org.antlr.v4.runtime.atn; + +import org.antlr.v4.tool.Grammar; + +public class EpsilonTransition extends Transition { + public EpsilonTransition(ATNState target) { super(target); } + + public boolean isEpsilon() { return true; } + + public int compareTo(Object o) { + return 0; + } + + @Override + public String toString(Grammar g) { + return "epsilon"; + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerInterpreter.java b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerInterpreter.java new file mode 100644 index 000000000..4d4901817 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerInterpreter.java @@ -0,0 +1,412 @@ +package org.antlr.v4.runtime.atn; + +import org.antlr.v4.analysis.ATNConfig; +import org.antlr.v4.misc.OrderedHashSet; +import org.antlr.v4.runtime.*; +import org.antlr.v4.runtime.dfa.*; + +/** "dup" of ParserInterpreter */ +public class LexerInterpreter extends ATNInterpreter { + public static boolean debug = false; + public static boolean dfa_debug = false; + public static final int NUM_EDGES = 255; + + protected Lexer recog; + + protected DFA[] dfa; + protected int mode = Lexer.DEFAULT_MODE; + + public static int ATN_failover = 0; + public static int match_calls = 0; + + public LexerInterpreter(ATN atn) { + this(null, atn); + } + + public LexerInterpreter(Lexer recog, ATN atn) { + super(atn); + dfa = new DFA[atn.modeToStartState.size()]; + for (int i=0; i s0_closure = computeStartState(input, startState); + int old_mode = mode; + dfa[mode].s0 = addDFAState(s0_closure); + int predict = exec(input, s0_closure); + if ( debug ) System.out.println("DFA after matchATN: "+dfa[old_mode].toLexerString()); + return predict; + } + + public int exec(CharStream input, DFAState s0) { + if ( dfa_debug ) System.out.println("DFA[mode "+recog.state.mode+"] exec LA(1)=="+ + (char)input.LA(1)); + //System.out.println("DFA start of execDFA: "+dfa[mode].toLexerString()); + int prevAcceptMarker = -1; + DFAState prevAcceptState = null; + DFAState s = s0; + int startIndex = input.index(); + int t = input.LA(1); + if ( t==CharStream.EOF ) return -1; // TODO: how to match EOF in lexer rule? + loop: + while ( true ) { + if ( dfa_debug ) System.out.println("state "+s.stateNumber+" LA(1)=="+(char)t); + if ( s.isAcceptState ) { + if ( dfa_debug ) System.out.println("accept; predict "+s.prediction+ + " in state "+s.stateNumber); + prevAcceptState = s; + prevAcceptMarker = input.index(); + // keep going unless we're at EOF; check if something else could match + if ( t==CharStream.EOF ) break; + } + // if no edge, pop over to ATN interpreter, update DFA and return + if ( s.edges == null || t >= s.edges.length || s.edges[t] == null ) { + if ( dfa_debug ) System.out.println("no edge for "+(char)t); + int ttype = -1; + try { + if ( dfa_debug ) { + System.out.println("ATN exec upon "+ + input.substring(startIndex,input.index())+ + " at DFA state "+s.stateNumber+" = "+s.configs); + } + ATN_failover++; + ttype = exec(input, s.configs); + } + catch (LexerNoViableAltException nvae) { + addDFAEdge(s, t, ERROR); + } + if ( dfa_debug ) { + System.out.println("back from DFA update, ttype="+ttype+ + ", dfa[mode "+mode+"]=\n"+dfa[mode].toLexerString()); + } + + if ( ttype==-1 ) { + addDFAEdge(s, t, ERROR); + break loop; // dead end; no where to go, fall back on prev if any + } + // action already executed + return ttype; // we've updated DFA, exec'd action, and have our deepest answer + } + DFAState target = s.edges[t]; + if ( target == ERROR ) break; + s = target; + input.consume(); + t = input.LA(1); + } + if ( prevAcceptState==null ) { + System.out.println("!!! no viable alt in dfa"); + return -1; + } + if ( recog!=null ) { + if ( dfa_debug ) { + System.out.println("ACTION "+ + recog.getRuleNames()[prevAcceptState.ruleIndex]+ + ":"+atn.ruleToActionIndex.get(prevAcceptState.ruleIndex)); + } + recog.action(prevAcceptState.ruleIndex, atn.ruleToActionIndex.get(prevAcceptState.ruleIndex)); + } + input.seek(prevAcceptMarker); + return prevAcceptState.prediction; + } + + public int exec(CharStream input, OrderedHashSet s0) { + //System.out.println("enter exec index "+input.index()+" from "+s0); + OrderedHashSet closure = new OrderedHashSet(); + closure.addAll(s0); + if ( debug ) System.out.println("start state closure="+closure); + + OrderedHashSet reach = new OrderedHashSet(); + ATNConfig prevAccept = null; + int prevAcceptIndex = -1; + + int t = input.LA(1); + if ( t==Token.EOF ) return Token.EOF; + + do { // while more work + if ( debug ) System.out.println("in reach starting closure: " + closure); + for (int ci=0; ci tmp = reach; + reach = closure; + closure = tmp; + reach.clear(); + } while ( true ); + + if ( prevAccept==null ) { + if ( t==Token.EOF ) { + System.out.println("EOF in token at input index "+input.index()); + return Token.EOF; + } +// System.out.println("no viable token at input "+getTokenName(input.LA(1))+", index "+input.index()); + throw new LexerNoViableAltException(recog, input, closure); // TODO: closure is empty + } + + if ( debug ) System.out.println("ACCEPT " + prevAccept.toString(recog, true) + " index " + prevAcceptIndex); + + int ruleIndex = prevAccept.state.ruleIndex; + int ttype = atn.ruleToTokenType.get(ruleIndex); + if ( debug ) { + if ( recog!=null ) System.out.println("ACTION "+recog.getRuleNames()[ruleIndex]+":"+ruleIndex); + else System.out.println("ACTION "+ruleIndex+":"+ruleIndex); + } + int actionIndex = atn.ruleToActionIndex.get(ruleIndex); + if ( actionIndex>0 ) recog.action(ruleIndex, actionIndex); + return ttype; + } + + public ATNState getReachableTarget(Transition trans, int t) { + if ( trans instanceof AtomTransition ) { + AtomTransition at = (AtomTransition)trans; + boolean not = trans instanceof NotAtomTransition; + if ( !not && at.label == t || not && at.label!=t ) { + if ( debug ) { + System.out.println("match "+getTokenName(at.label)); + } + return at.target; + } + } + else if ( trans.getClass() == RangeTransition.class ) { + RangeTransition rt = (RangeTransition)trans; + if ( t>=rt.from && t<=rt.to ) { + if ( debug ) System.out.println("match range "+rt.toString(atn.g)); + return rt.target; + } + } + else if ( trans instanceof SetTransition ) { + SetTransition st = (SetTransition)trans; + boolean not = trans instanceof NotSetTransition; + if ( !not && st.label.member(t) || not && !st.label.member(t) ) { + if ( debug ) System.out.println("match set "+st.label.toString(atn.g)); + return st.target; + } + } + else if ( trans instanceof WildcardTransition && t!=Token.EOF ) { + return trans.target; + } + return null; + } + + /* TODO: use if we need nongreedy + public void deleteConfigsForAlt(OrderedHashSet closure, int ci, int alt) { + int j=ci+1; + while ( j computeStartState(IntStream input, + ATNState p) + { + RuleContext initialContext = RuleContext.EMPTY; + OrderedHashSet configs = new OrderedHashSet(); + for (int i=0; i configs) { + if ( debug ) { + System.out.println("closure("+config.toString(recog, true)+")"); + } + + // TODO? if ( closure.contains(t) ) return; + + if ( config.state instanceof RuleStopState ) { + if ( debug ) System.out.println("closure at rule stop "+config); + if ( config.context == null || config.context.isEmpty() ) { + configs.add(config); + return; + } + RuleContext newContext = config.context.parent; // "pop" invoking state + ATNState invokingState = atn.states.get(config.context.invokingState); + RuleTransition rt = (RuleTransition)invokingState.transition(0); + ATNState retState = rt.followState; + ATNConfig c = new ATNConfig(retState, config.alt, newContext); + closure(c, configs); + return; + } + + // optimization + if ( !config.state.onlyHasEpsilonTransitions() ) { + configs.add(config); + } + + ATNState p = config.state; + for (int i=0; i p, + int t, + OrderedHashSet q) + { +// System.out.println("MOVE "+p+" -> "+q+" upon "+getTokenName(t)); + DFAState from = addDFAState(p); + DFAState to = addDFAState(q); + addDFAEdge(from, t, to); + } + + protected void addDFAEdge(DFAState p, int t, DFAState q) { + if ( p==null ) return; + if ( p.edges==null ) { + p.edges = new DFAState[NUM_EDGES+1]; // TODO: make adaptive + } + p.edges[t] = q; // connect + } + + /** Add a new DFA state if there isn't one with this set of + configurations already. This method also detects the first + configuration containing an ATN rule stop state. Later, when + traversing the DFA, we will know which rule to accept. Also, we + detect if any of the configurations derived from traversing a + semantic predicate. If so, we cannot add a DFA state for this + because the DFA would not test the predicate again in the + future. Rather than creating collections of semantic predicates + like v3 and testing them on prediction, v4 will test them on the + fly all the time using the ATN not the DFA. This is slower but + semantically it's not use that often. One of the key elements to + this predicate mechanism is not adding DFA states that see + predicates immediately afterwards in the ATN. For example, + + a : ID {p1}? | ID {p2}? ; + + should create the start state for rule 'a' (to save start state + competition), but should not create target of ID state. The + collection of ATN states the following ID references includes + states reached by traversing predicates. Since this is when we + test them, we cannot cash the DFA state target of ID. + */ + protected DFAState addDFAState(OrderedHashSet configs) { + DFAState proposed = new DFAState(configs); + DFAState existing = dfa[mode].states.get(proposed); + if ( existing!=null ) return existing; + + DFAState newState = proposed; + + ATNConfig firstConfigWithRuleStopState = null; + boolean traversedPredicate = false; + for (ATNConfig c : configs) { + if ( firstConfigWithRuleStopState==null && + c.state instanceof RuleStopState ) + { + firstConfigWithRuleStopState = c; + } + if ( c.traversedPredicate ) traversedPredicate = true; + } + + if ( firstConfigWithRuleStopState!=null ) { + newState.isAcceptState = true; + newState.ruleIndex = firstConfigWithRuleStopState.state.ruleIndex; + newState.prediction = atn.ruleToTokenType.get(newState.ruleIndex); + } + + if ( traversedPredicate ) return null; // cannot cache + + newState.stateNumber = dfa[mode].states.size(); + newState.configs = new OrderedHashSet(); + newState.configs.addAll(configs); + dfa[mode].states.put(newState, newState); + return newState; + } + + public DFA getDFA(int mode) { + return dfa[mode]; + } + + public String getTokenName(int t) { + if ( t==-1 ) return "EOF"; + //if ( atn.g!=null ) return atn.g.getTokenDisplayName(t); + return "'"+(char)t+"'"; + } + +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/NotAtomTransition.java b/runtime/Java/src/org/antlr/v4/runtime/atn/NotAtomTransition.java new file mode 100644 index 000000000..984e77392 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/NotAtomTransition.java @@ -0,0 +1,17 @@ +package org.antlr.v4.runtime.atn; + +import org.antlr.v4.tool.Grammar; + +public class NotAtomTransition extends AtomTransition { + public NotAtomTransition(int label, ATNState target) { + super(label, target); + } + public NotAtomTransition(ATNState target) { + super(target); + } + + @Override + public String toString(Grammar g) { + return '~'+super.toString(g); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/NotSetTransition.java b/runtime/Java/src/org/antlr/v4/runtime/atn/NotSetTransition.java new file mode 100644 index 000000000..fa000d0a4 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/NotSetTransition.java @@ -0,0 +1,20 @@ +package org.antlr.v4.runtime.atn; + +import org.antlr.v4.misc.IntervalSet; +import org.antlr.v4.tool.Grammar; +import org.antlr.v4.tool.GrammarAST; + +public class NotSetTransition extends SetTransition { + public NotSetTransition(GrammarAST ast, IntervalSet label, ATNState target) { + super(ast, label, target); + } + + public NotSetTransition(ATNState target) { + super(target); + } + + @Override + public String toString(Grammar g) { + return '~'+super.toString(g); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ParserInterpreter.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ParserInterpreter.java new file mode 100644 index 000000000..6fff01453 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ParserInterpreter.java @@ -0,0 +1,632 @@ +package org.antlr.v4.runtime.atn; + +import org.antlr.runtime.CharStream; +import org.antlr.v4.analysis.ATNConfig; +import org.antlr.v4.misc.*; +import org.antlr.v4.runtime.*; +import org.antlr.v4.runtime.dfa.*; +import org.antlr.v4.tool.DOTGenerator; +import org.stringtemplate.v4.misc.MultiMap; + +import java.util.*; + +public class ParserInterpreter extends ATNInterpreter { + public static boolean debug = false; + public static boolean dfa_debug = false; + + public static int ATN_failover = 0; + public static int predict_calls = 0; + + protected BaseRecognizer parser; + + public Map ctxToDFAs; + public Map[] decisionToDFAPerCtx; // TODO: USE THIS ONE + public DFA[] decisionToDFA; + protected boolean userWantsCtxSensitive = false; + + protected Set closureBusy = new HashSet(); + + public ParserInterpreter(ATN atn) { + super(atn); + ctxToDFAs = new HashMap(); + decisionToDFA = new DFA[atn.getNumberOfDecisions()]; + } + + public ParserInterpreter(BaseRecognizer parser, ATN atn) { + super(atn); + this.parser = parser; + ctxToDFAs = new HashMap(); + decisionToDFA = new DFA[atn.getNumberOfDecisions()+1]; + DOTGenerator dot = new DOTGenerator(null); +// System.out.println(dot.getDOT(atn.rules.get(0), parser.getRuleNames())); +// System.out.println(dot.getDOT(atn.rules.get(1), parser.getRuleNames())); + } + + public int adaptivePredict(TokenStream input, int decision, RuleContext ctx) { + predict_calls++; + DFA dfa = decisionToDFA[decision]; + if ( dfa==null || dfa.s0==null ) { + ATNState startState = atn.decisionToATNState.get(decision); + decisionToDFA[decision] = dfa = new DFA(startState); + dfa.decision = decision; + return predictATN(dfa, input, decision, ctx, false); + } + else { + //dump(dfa); + // start with the DFA + int m = input.mark(); + int alt = execDFA(input, dfa, dfa.s0, ctx); + input.seek(m); + return alt; + } + } + + public int predictATN(DFA dfa, TokenStream input, + int decision, + RuleContext originalContext, + boolean useContext) + { + if ( originalContext==null ) originalContext = RuleContext.EMPTY; + RuleContext ctx = RuleContext.EMPTY; + if ( useContext ) ctx = originalContext; + OrderedHashSet s0_closure = computeStartState(dfa.atnStartState, ctx); + dfa.s0 = addDFAState(dfa, s0_closure); + if ( prevAccept!=null ) { + dfa.s0.isAcceptState = true; + dfa.s0.prediction = prevAccept.alt; + } + + int alt = 0; + int m = input.mark(); + try { + alt = execATN(input, dfa, m, s0_closure, originalContext, useContext); + } + catch (NoViableAltException nvae) { dumpDeadEndConfigs(nvae); throw nvae; } + finally { + input.seek(m); + } + if ( debug ) System.out.println("DFA after predictATN: "+dfa.toString()); + return alt; + } + + // doesn't create DFA when matching + public int matchATN(TokenStream input, ATNState startState) { + DFA dfa = new DFA(startState); + RuleContext ctx = new ParserRuleContext(); + OrderedHashSet s0_closure = computeStartState(startState, ctx); + return execATN(input, dfa, input.index(), s0_closure, ctx, false); + } + + public int execDFA(TokenStream input, DFA dfa, DFAState s0, RuleContext originalContext) { + if ( dfa_debug ) System.out.println("DFA decision "+dfa.decision+" exec LA(1)=="+input.LT(1)); +// dump(dfa); + DFAState prevAcceptState = null; + DFAState s = s0; + int t = input.LA(1); + int start = input.index(); + loop: + while ( true ) { + if ( dfa_debug ) System.out.println("DFA state "+s.stateNumber+" LA(1)=="+t); + // TODO: ctxSensitive + if ( s.isCtxSensitive ) { + Integer predI = s.ctxToPrediction.get(originalContext); + if ( dfa_debug ) System.out.println("ctx sensitive state "+originalContext+"->"+predI+ + " in "+s); + if ( predI!=null ) return predI; +// System.out.println("start all over with ATN; can't use DFA"); + // start all over with ATN; can't use DFA + input.seek(start); + DFA throwAwayDFA = new DFA(dfa.atnStartState); + int alt = execATN(input, throwAwayDFA, start, s0.configs, originalContext, false); + s.ctxToPrediction.put(originalContext, alt); + return alt; + } + if ( s.isAcceptState ) { + if ( dfa_debug ) System.out.println("accept; predict "+s.prediction +" in state "+s.stateNumber); + prevAcceptState = s; + // keep going unless we're at EOF or state only has one alt number + // mentioned in configs; check if something else could match + if ( s.complete || t==CharStream.EOF ) break; + } + // if no edge, pop over to ATN interpreter, update DFA and return + if ( s.edges == null || t >= s.edges.length || s.edges[t+1] == null ) { + if ( dfa_debug ) System.out.println("no edge for "+t); + int alt = -1; + if ( dfa_debug ) { + System.out.println("ATN exec upon "+ + input.toString(start,input.index())+ + " at DFA state "+s.stateNumber); + } + try { + alt = execATN(input, dfa, start, s.configs, originalContext, false); + // this adds edge even if next state is accept for + // same alt; e.g., s0-A->:s1=>2-B->:s2=>2 + // TODO: This next stuff kills edge, but extra states remain. :( + if ( s.isAcceptState && alt!=-1 ) { + DFAState d = s.edges[input.LA(1)+1]; + if ( d.isAcceptState && d.prediction==s.prediction ) { + // we can carve it out. + s.edges[input.LA(1)+1] = ERROR; // IGNORE really not error + } + } + } + catch (NoViableAltException nvae) { + alt = -1; + } + if ( dfa_debug ) { + System.out.println("back from DFA update, alt="+alt+", dfa=\n"+dfa); + //dump(dfa); + } + if ( alt==-1 ) { + addDFAEdge(s, t, ERROR); + break loop; // dead end; no where to go, fall back on prev if any + } + // action already executed + if ( dfa_debug ) System.out.println("DFA decision "+dfa.decision+ + " predicts "+alt); + return alt; // we've updated DFA, exec'd action, and have our deepest answer + } + DFAState target = s.edges[t+1]; + if ( target == ERROR ) break; + s = target; + input.consume(); + t = input.LA(1); + } + if ( prevAcceptState==null ) { + System.out.println("!!! no viable alt in dfa"); + return -1; + } + if ( dfa_debug ) System.out.println("DFA decision "+dfa.decision+ + " predicts "+prevAcceptState.prediction); + return prevAcceptState.prediction; + } + + public int execATN(TokenStream input, + DFA dfa, + int startIndex, + OrderedHashSet s0, + RuleContext originalContext, + boolean useContext) + { + if ( debug ) System.out.println("ATN decision "+dfa.decision+" exec LA(1)=="+input.LT(1)); + ATN_failover++; + OrderedHashSet closure = new OrderedHashSet(); + + closure.addAll(s0); + + if ( debug ) System.out.println("start state closure="+closure); + + int t = input.LA(1); + if ( t==Token.EOF && prevAccept!=null ) { + // computeStartState must have reached end of rule + return prevAccept.alt; + } + + prevAccept = null; + prevAcceptIndex = -1; + OrderedHashSet reach = new OrderedHashSet(); + + do { // while more work + if ( debug ) System.out.println("in reach starting closure: " + closure); + int ncl = closure.size(); + for (int ci=0; ci ambigAlts = getAmbiguousAlts(reach); + if ( ambigAlts!=null ) { + if ( debug ) { + ATNState loc = atn.states.get(originalContext.s); + String rname = "n/a"; + if ( parser !=null ) rname = parser.getRuleNames()[loc.ruleIndex]; + System.out.println("AMBIG in "+rname+" for alt "+ambigAlts+" upon "+ + input.toString(startIndex, input.index())); + } + dfa.conflict = true; // at least one DFA state is ambiguous + if ( !userWantsCtxSensitive ) reportConflict(startIndex, input.index(), ambigAlts, reach); + +// ATNState loc = atn.states.get(originalContext.s); +// String rname = recog.getRuleNames()[loc.ruleIndex]; +// System.out.println("AMBIG orig="+originalContext.toString((BaseRecognizer)recog)+" for alt "+ambigAlts+" upon "+ +// input.toString(startIndex, input.index())); + if ( !userWantsCtxSensitive || useContext ) { + resolveToMinAlt(reach, ambigAlts); + } + else { + return retryWithContext(input, dfa, startIndex, originalContext, + closure, t, reach, ambigAlts); + } + } + + // if reach predicts single alt, can stop + + int uniqueAlt = getUniqueAlt(reach); + if ( uniqueAlt!=ATN.INVALID_ALT_NUMBER ) { + if ( debug ) System.out.println("PREDICT alt "+uniqueAlt+ + " decision "+dfa.decision+ + " at index "+input.index()); + addDFAEdge(dfa, closure, t, reach); + makeAcceptState(dfa, reach, uniqueAlt); + return uniqueAlt; + } + + if ( reach.size()==0 ) { + break; + } + + // If we matched t anywhere, need to consume and add closer-t->reach DFA edge + // else error if no previous accept + input.consume(); + addDFAEdge(dfa, closure, t, reach); + t = input.LA(1); + + // swap to avoid reallocating space + OrderedHashSet tmp = reach; + reach = closure; + closure = tmp; + reach.clear(); + } while ( true ); + + if ( prevAccept==null ) { + System.out.println("no viable token at input "+input.LT(1)+", index "+input.index()); + NoViableAltException nvae = new NoViableAltException(parser, input, closure, originalContext); + nvae.startIndex = startIndex; + throw nvae; + } + + if ( debug ) System.out.println("PREDICT " + prevAccept + " index " + prevAccept.alt); + return prevAccept.alt; + } + + protected int resolveToMinAlt(OrderedHashSet reach, Set ambigAlts) { + int min = getMinAlt(ambigAlts); + // if predicting, create DFA accept state for resolved alt + ambigAlts.remove(Utils.integer(min)); + // kill dead alts so we don't chase them ever + killAlts(ambigAlts, reach); + if ( debug ) System.out.println("RESOLVED TO "+reach); + return min; + } + + public int retryWithContext(TokenStream input, + DFA dfa, + int startIndex, + RuleContext originalContext, + OrderedHashSet closure, + int t, + OrderedHashSet reach, + Set ambigAlts) + { + // ASSUMES PREDICT ONLY + // retry using context, if any; if none, kill all but min as before + if ( debug ) System.out.println("RETRY with ctx="+ originalContext); + int min = getMinAlt(ambigAlts); + if ( originalContext==RuleContext.EMPTY ) { + // no point in retrying with ctx since it's same. + // this implies that we have a true ambiguity + reportAmbiguity(startIndex, input.index(), ambigAlts, reach); + return min; + } + // otherwise we have to retry with context, filling in tmp DFA. + // if it comes back with conflict, we have a true ambiguity + input.seek(startIndex); // rewind + DFA ctx_dfa = new DFA(dfa.atnStartState); + int ctx_alt = predictATN(ctx_dfa, input, dfa.decision, originalContext, true); + if ( debug ) System.out.println("retry predicts "+ctx_alt+" vs "+getMinAlt(ambigAlts)+ + " with conflict="+ctx_dfa.conflict+ + " dfa="+ctx_dfa); + if ( ctx_dfa.conflict ) reportAmbiguity(startIndex, input.index(), ambigAlts, reach); + else reportContextSensitivity(startIndex, input.index(), ambigAlts, reach); + // it's not context-sensitive; true ambig. fall thru to strip dead alts + + int predictedAlt = ctx_alt; + DFAState reachTarget = addDFAEdge(dfa, closure, t, reach); + reachTarget.isCtxSensitive = true; + if ( reachTarget.ctxToPrediction==null ) { + reachTarget.ctxToPrediction = new LinkedHashMap(); + } + reachTarget.ctxToPrediction.put(originalContext, predictedAlt); +// System.out.println("RESOLVE to "+predictedAlt); + //System.out.println(reachTarget.ctxToPrediction.size()+" size of ctx map"); + return predictedAlt; + } + + public OrderedHashSet computeStartState(ATNState p, RuleContext ctx) { + RuleContext initialContext = null; + initialContext = ctx; // always at least the implicit call to start rule + OrderedHashSet configs = new OrderedHashSet(); + prevAccept = null; // might reach end rule; track + prevAcceptIndex = -1; + + for (int i=0; i configs) { + closureBusy.clear(); + closure(config, configs, closureBusy); + } + + protected void closure(ATNConfig config, + OrderedHashSet configs, + Set closureBusy) + { + if ( debug ) System.out.println("closure("+config+")"); + + if ( closureBusy.contains(config) ) return; // avoid infinite recursion + closureBusy.add(config); + + if ( config.state instanceof RuleStopState ) { + // We hit rule end. If we have context info, use it + if ( config.context!=null && !config.context.isEmpty() ) { + RuleContext newContext = config.context.parent; // "pop" invoking state + ATNState invokingState = atn.states.get(config.context.invokingState); + RuleTransition rt = (RuleTransition)invokingState.transition(0); + ATNState retState = rt.followState; + ATNConfig c = new ATNConfig(retState, config.alt, newContext); + closure(c, configs, closureBusy); + return; + } + // else if we have no context info, just chase follow links + } + + ATNState p = config.state; + // optimization + if ( !p.onlyHasEpsilonTransitions() ) configs.add(config); + + for (int i=0; i=0 ) { + if ( debug ) System.out.println("DO ACTION "+at.ruleIndex+":"+at.actionIndex); + parser.action(at.ruleIndex, at.actionIndex); + } + else { + // non-forced action traversed to get to t.target + if ( debug && !config.traversedAction ) { + System.out.println("NONFORCED; pruning future pred eval derived from s"+ + config.state.stateNumber); + } + c.traversedAction = true; + } + } + else if ( t.isEpsilon() ) { + c = new ATNConfig(config, t.target); + } + return c; + } + + public void reportConflict(int startIndex, int stopIndex, Set alts, OrderedHashSet configs) { + if ( parser!=null ) parser.reportConflict(startIndex, stopIndex, alts, configs); + } + + public void reportContextSensitivity(int startIndex, int stopIndex, Set alts, OrderedHashSet configs) { + if ( parser!=null ) parser.reportContextSensitivity(startIndex, stopIndex, alts, configs); + } + + /** If context sensitive parsing, we know it's ambiguity not conflict */ + public void reportAmbiguity(int startIndex, int stopIndex, Set alts, OrderedHashSet configs) { + if ( parser!=null ) parser.reportAmbiguity(startIndex, stopIndex, alts, configs); + } + + public static int getUniqueAlt(Collection configs) { + int alt = ATN.INVALID_ALT_NUMBER; + for (ATNConfig c : configs) { + if ( alt == ATN.INVALID_ALT_NUMBER ) { + alt = c.alt; // found first alt + } + else if ( c.alt!=alt ) { + return ATN.INVALID_ALT_NUMBER; + } + } + return alt; + } + + public Set getAmbiguousAlts(OrderedHashSet configs) { +// System.err.println("check ambiguous "+configs); + Set ambigAlts = null; + int numConfigs = configs.size(); + // First get a list of configurations for each state. + // Most of the time, each state will have one associated configuration. + MultiMap stateToConfigListMap = + new MultiMap(); + for (ATNConfig c : configs) { + Integer stateI = Utils.integer(c.state.stateNumber); + stateToConfigListMap.map(stateI, c); + } + // potential conflicts are states with > 1 configuration and diff alts + for (List configsPerAlt : stateToConfigListMap.values()) { + ATNConfig goal = configsPerAlt.get(0); + int size = configsPerAlt.size(); + for (int i=1; i< size; i++) { + ATNConfig c = configsPerAlt.get(i); + if ( c.alt!=goal.alt ) { + //System.out.println("chk stack "+goal+", "+c); + boolean sameCtx = + (goal.context==null&&c.context==null) || + goal.context.equals(c.context) || + c.context.conflictsWith(goal.context); + if ( sameCtx ) { + if ( debug ) { + System.out.println("we reach state "+c.state.stateNumber+ + " in rule "+ + (parser !=null ? parser.getRuleNames()[c.state.ruleIndex]:"n/a")+ + " alts "+goal.alt+","+c.alt+" from ctx "+goal.context.toString((BaseRecognizer) parser) + +" and "+ + c.context.toString((BaseRecognizer) parser)); + } + if ( ambigAlts==null ) ambigAlts = new HashSet(); + ambigAlts.add(goal.alt); + ambigAlts.add(c.alt); + } + } + } + } + if ( ambigAlts!=null ) { + //System.err.println("ambig upon "+input.toString(startIndex, input.index())); + } + return ambigAlts; + } + + public static int getMinAlt(Set ambigAlts) { + int min = Integer.MAX_VALUE; + for (int alt : ambigAlts) { + if ( alt < min ) min = alt; + } + return min; + } + + public static void killAlts(Set alts, OrderedHashSet configs) { + int i = 0; + while ( i p, + int t, + OrderedHashSet q) + { +// System.out.println("MOVE "+p+" -> "+q+" upon "+getTokenName(t)); + DFAState from = addDFAState(dfa, p); + DFAState to = addDFAState(dfa, q); + addDFAEdge(from, t, to); + return to; + } + + protected void addDFAEdge(DFAState p, int t, DFAState q) { + if ( p==null ) return; + if ( p.edges==null ) { + p.edges = new DFAState[atn.maxTokenType+1+1]; // TODO: make adaptive + } + p.edges[t+1] = q; // connect + } + + /** See comment on LexerInterpreter.addDFAState. */ + protected DFAState addDFAState(DFA dfa, OrderedHashSet configs) { + DFAState proposed = new DFAState(configs); + DFAState existing = dfa.states.get(proposed); + if ( existing!=null ) return existing; + + DFAState newState = proposed; + + boolean traversedPredicate = false; + for (ATNConfig c : configs) { + if ( c.traversedPredicate ) {traversedPredicate = true; break;} + } + + if ( traversedPredicate ) return null; // cannot cache + + newState.stateNumber = dfa.states.size(); + newState.configs = new OrderedHashSet(); + newState.configs.addAll(configs); + dfa.states.put(newState, newState); + return newState; + } + + public void makeAcceptState(DFA dfa, OrderedHashSet reach, int uniqueAlt) { + DFAState accept = dfa.states.get(new DFAState(reach)); + if ( accept==null ) return; + accept.isAcceptState = true; + accept.prediction = uniqueAlt; + accept.complete = true; + } + + public String getTokenName(int t) { + if ( t==-1 ) return "EOF"; + if ( atn.g!=null ) return atn.g.getTokenDisplayName(t); + if ( parser !=null && parser.getTokenNames()!=null ) return parser.getTokenNames()[t]+"<"+t+">"; + return String.valueOf(t); + } + + public void setContextSensitive(boolean ctxSensitive) { + this.userWantsCtxSensitive = ctxSensitive; + } + + public void dumpDeadEndConfigs(NoViableAltException nvae) { + System.err.println("dead end configs: "); + for (ATNConfig c : nvae.deadEndConfigs) { + Transition t = c.state.transition(0); + String trans = ""; + if ( t instanceof AtomTransition) { + AtomTransition at = (AtomTransition)t; + trans = "Atom "+getTokenName(at.label); + } + else if ( t instanceof SetTransition ) { + SetTransition st = (SetTransition)t; + trans = "Set "+st.label.toString(); + } + System.err.println(c.toString(parser, true)+":"+trans); + } + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/PlusBlockStartState.java b/runtime/Java/src/org/antlr/v4/runtime/atn/PlusBlockStartState.java new file mode 100644 index 000000000..72c30e92f --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/PlusBlockStartState.java @@ -0,0 +1,11 @@ +package org.antlr.v4.runtime.atn; + +/** Start of (A|B|...)+ loop. Technically a decision state, but + * we don't use for code generation; somebody might need it, so I'm defining + * it for completeness. In reality, the PlusLoopbackState node is the + * real decision-making note for A+ + */ +public class PlusBlockStartState extends BlockStartState { + public PlusLoopbackState loopBackState; + //public BlockEndState endState; +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/PlusLoopbackState.java b/runtime/Java/src/org/antlr/v4/runtime/atn/PlusLoopbackState.java new file mode 100644 index 000000000..4dbd4f452 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/PlusLoopbackState.java @@ -0,0 +1,10 @@ +package org.antlr.v4.runtime.atn; + +/** Decision state for A+ and (A|B)+. The first + * transition points at the start of the first alternative. + * The last transition is the exit transition. + */ +public class PlusLoopbackState extends DecisionState { + @Override + public boolean onlyHasEpsilonTransitions() { return true; } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/PredicateTransition.java b/runtime/Java/src/org/antlr/v4/runtime/atn/PredicateTransition.java new file mode 100644 index 000000000..bc0e24dd1 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/PredicateTransition.java @@ -0,0 +1,63 @@ +package org.antlr.v4.runtime.atn; + +import org.antlr.v4.analysis.SemanticContext; +import org.antlr.v4.tool.Grammar; +import org.antlr.v4.tool.GrammarAST; + +/** TODO: this is old comment: + * A tree of semantic predicates from the grammar AST if label==SEMPRED. + * In the ATN, labels will always be exactly one predicate, but the DFA + * may have to combine a bunch of them as it collects predicates from + * multiple ATN configurations into a single DFA state. + */ +public class PredicateTransition extends Transition { + public int ruleIndex; + public int predIndex; + public GrammarAST predAST; + public SemanticContext semanticContext; + + public PredicateTransition(GrammarAST predicateASTNode, ATNState target) { + super(target); + this.predAST = predicateASTNode; + this.semanticContext = new SemanticContext.Predicate(predicateASTNode); + } + + public PredicateTransition(ATNState target, int ruleIndex, int predIndex) { + super(target); + this.ruleIndex = ruleIndex; + this.predIndex = predIndex; + } + + public boolean isEpsilon() { return true; } + + public int compareTo(Object o) { + return 0; + } + + public int hashCode() { + return semanticContext.hashCode(); + } + + public boolean equals(Object o) { + if ( o==null ) { + return false; + } + if ( this == o ) { + return true; // equals if same object + } + if ( !(o instanceof PredicateTransition) ) { + return false; + } + return semanticContext.equals(((PredicateTransition)o).semanticContext); + } + + public String toString() { + if ( semanticContext!=null ) return semanticContext.toString(); + if ( predAST!=null ) return predAST.getText(); + return "pred-"+ruleIndex+":"+predIndex; + } + + public String toString(Grammar g) { + return toString(); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/RangeTransition.java b/runtime/Java/src/org/antlr/v4/runtime/atn/RangeTransition.java new file mode 100644 index 000000000..57193789f --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/RangeTransition.java @@ -0,0 +1,30 @@ +package org.antlr.v4.runtime.atn; + +import org.antlr.v4.misc.CharSupport; +import org.antlr.v4.misc.IntervalSet; + +public class RangeTransition extends Transition { + public int from; + public int to; + public RangeTransition(int from, int to, ATNState target) { + super(target); + this.from = from; + this.to = to; + } + public RangeTransition(ATNState target) { + super(target); + } + + public int compareTo(Object o) { + return 0; + } + + @Override + public IntervalSet label() { return IntervalSet.of(from,to); } + + @Override + public String toString() { + return CharSupport.getANTLRCharLiteralForChar(from)+".."+ + CharSupport.getANTLRCharLiteralForChar(to); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/RuleStartState.java b/runtime/Java/src/org/antlr/v4/runtime/atn/RuleStartState.java new file mode 100644 index 000000000..3b366547f --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/RuleStartState.java @@ -0,0 +1,5 @@ +package org.antlr.v4.runtime.atn; + +public class RuleStartState extends ATNState { + public RuleStopState stopState; +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/RuleStopState.java b/runtime/Java/src/org/antlr/v4/runtime/atn/RuleStopState.java new file mode 100644 index 000000000..41663baec --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/RuleStopState.java @@ -0,0 +1,33 @@ +package org.antlr.v4.runtime.atn; + +import java.util.ArrayList; +import java.util.List; + +/** The last node in the ATN for a rule, unless that rule is the start symbol. + * In that case, there is one transition to EOF. Later, we might encode + * references to all calls to this rule to compute FOLLOW sets for + * error handling. + */ +public class RuleStopState extends ATNState { + public static final int INITIAL_NUM_TRANSITIONS = 4; + + //public int actionIndex; // for lexer, this is right edge action in rule + + /** Track the transitions emanating from this ATN state. */ + protected List transitions = + new ArrayList(INITIAL_NUM_TRANSITIONS); + + @Override + public int getNumberOfTransitions() { return transitions.size(); } + + @Override + public void addTransition(Transition e) { transitions.add(e); } + + @Override + public Transition transition(int i) { return transitions.get(i); } + + @Override + public void setTransition(int i, Transition e) { + transitions.set(i, e); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/RuleTransition.java b/runtime/Java/src/org/antlr/v4/runtime/atn/RuleTransition.java new file mode 100644 index 000000000..71b7371b1 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/RuleTransition.java @@ -0,0 +1,41 @@ +package org.antlr.v4.runtime.atn; + +import org.antlr.v4.tool.Rule; + +/** */ +public class RuleTransition extends Transition { + /** Ptr to the rule definition object for this rule ref */ + public Rule rule; + public int ruleIndex; // no Rule object at runtime + + /** What node to begin computations following ref to rule */ + public ATNState followState; + + public RuleTransition(Rule rule, + ATNState ruleStart, + ATNState followState) + { + super(ruleStart); + this.rule = rule; + this.followState = followState; + } + + public RuleTransition(int ruleIndex, + ATNState ruleStart, + ATNState followState) + { + super(ruleStart); + this.ruleIndex = ruleIndex; + this.followState = followState; + } + + public RuleTransition(ATNState ruleStart) { + super(ruleStart); + } + + public boolean isEpsilon() { return true; } + + public int compareTo(Object o) { + return 0; + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/SetTransition.java b/runtime/Java/src/org/antlr/v4/runtime/atn/SetTransition.java new file mode 100644 index 000000000..039485897 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/SetTransition.java @@ -0,0 +1,54 @@ +package org.antlr.v4.runtime.atn; + +import org.antlr.v4.misc.IntervalSet; +import org.antlr.v4.runtime.Token; +import org.antlr.v4.tool.*; + +/** A transition containing a set of values */ +public class SetTransition extends Transition { + public IntervalSet label; + public GrammarAST ast; // ~ of ~atom tree, wildcard node + + public SetTransition(GrammarAST ast, IntervalSet label, ATNState target) { + super(target); + this.ast = ast; + if ( label==null ) label = IntervalSet.of(Token.INVALID_TYPE); + this.label = label; + } + + public SetTransition(ATNState target) { + super(target); + } + + public IntervalSet label() { return label; } + + public int compareTo(Object o) { + return 0; + } + + // public boolean intersect(Label other) { +// if ( other.getClass() == SetTransition.class ) { +// return label.and(((SetTransition)other).label).isNil(); +// } +// return label.member(((AtomTransition)other).label); +// } + + public int hashCode() { return label.hashCode(); } + + public boolean equals(Object o) { + if ( o==null ) return false; + if ( this == o ) return true; // equals if same object + if ( o.getClass() == AtomTransition.class ) { + o = IntervalSet.of(((AtomTransition)o).label); + } + return this.label.equals(((SetTransition)o).label); + } + + public String toString(Grammar g) { + return label.toString(g); + } + + public String toString() { + return label.toString(); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/StarBlockStartState.java b/runtime/Java/src/org/antlr/v4/runtime/atn/StarBlockStartState.java new file mode 100644 index 000000000..4d4d2dcff --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/StarBlockStartState.java @@ -0,0 +1,6 @@ +package org.antlr.v4.runtime.atn; + +/** The block that begins a closure loop. */ +public class StarBlockStartState extends BlockStartState { +// public StarLoopbackState loopBackState; +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/StarLoopbackState.java b/runtime/Java/src/org/antlr/v4/runtime/atn/StarLoopbackState.java new file mode 100644 index 000000000..7902e8039 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/StarLoopbackState.java @@ -0,0 +1,4 @@ +package org.antlr.v4.runtime.atn; + +public class StarLoopbackState extends ATNState { +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ThreadState.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ThreadState.java new file mode 100644 index 000000000..9771f3099 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ThreadState.java @@ -0,0 +1,41 @@ +package org.antlr.v4.runtime.atn; + +/** ATN simulation thread state */ +public class ThreadState { + public int addr; + public int alt; // or speculatively matched token type for lexers + public ATNStack context; + public int inputIndex = -1; // char (or token?) index from 0 + public int inputMarker = -1; // accept states track input markers in case we need to rewind + + public ThreadState(int addr, int alt, ATNStack context) { + this.addr = addr; + this.alt = alt; + this.context = context; + } + + public ThreadState(ThreadState t) { + this.addr = t.addr; + this.alt = t.alt; + this.context = t.context; + this.inputIndex = t.inputIndex; + } + + public boolean equals(Object o) { + if ( o==null ) return false; + if ( this==o ) return true; + ThreadState other = (ThreadState)o; + return this.addr==other.addr && + this.alt==other.alt && + this.context.equals(other.context); + } + + public int hashCode() { return addr + context.hashCode(); } + + public String toString() { + if ( context.parent==null ) { + return "("+addr+","+alt+")"; + } + return "("+addr+","+alt+","+context+")"; + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/TokensStartState.java b/runtime/Java/src/org/antlr/v4/runtime/atn/TokensStartState.java new file mode 100644 index 000000000..bdfbdb9c1 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/TokensStartState.java @@ -0,0 +1,5 @@ +package org.antlr.v4.runtime.atn; + +/** The Tokens rule start state linking to each lexer rule start state */ +public class TokensStartState extends BlockStartState { +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/Transition.java b/runtime/Java/src/org/antlr/v4/runtime/atn/Transition.java new file mode 100644 index 000000000..477ae91d5 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/Transition.java @@ -0,0 +1,80 @@ +package org.antlr.v4.runtime.atn; + +import org.antlr.v4.misc.IntervalSet; +import org.antlr.v4.tool.Grammar; + +import java.util.HashMap; +import java.util.Map; + +/** An ATN transition between any two ATN states. Subclasses define + * atom, set, epsilon, action, predicate, rule transitions. + * + * This is a one way link. It emanates from a state (usually via a list of + * transitions) and has a target state. + * + * Since we never have to change the ATN transitions once we construct it, + * we can fix these transitions as specific classes. The DFA transitions + * on the other hand need to update the labels as it adds transitions to + * the states. We'll use the term Edge for the DFA to distinguish them from + * ATN transitions. + */ +public abstract class Transition implements Comparable { + // constants for serialization + public static final int EPSILON = 1; + public static final int RANGE = 2; + public static final int RULE = 3; + public static final int PREDICATE = 4; + public static final int ATOM = 5; + public static final int ACTION = 6; + public static final int FORCED_ACTION = 7; + public static final int SET = 8; // ~(A|B) or ~atom, wildcard, which convert to next 2 + public static final int NOT_ATOM = 9; + public static final int NOT_SET = 10; + public static final int WILDCARD = 11; + + + public static String[] serializationNames = { + "INVALID", + "EPSILON", + "RANGE", + "RULE", + "PREDICATE", + "ATOM", + "ACTION", + "FORCED_ACTION", + "SET", + "NOT_ATOM", + "NOT_SET", + "WILDCARD", + }; + + public static Map serializationTypes = + new HashMap() {{ + put(EpsilonTransition.class, EPSILON); + put(RangeTransition.class, RANGE); + put(RuleTransition.class, RULE); + put(PredicateTransition.class, PREDICATE); + put(AtomTransition.class, ATOM); + put(ActionTransition.class, ACTION); // TODO: FORCED? + put(SetTransition.class, SET); + put(NotAtomTransition.class, NOT_ATOM); + put(NotSetTransition.class, NOT_SET); + put(WildcardTransition.class, WILDCARD); + }}; + + /** The target of this transition */ + public ATNState target; + + public Transition() { } + + public Transition(ATNState target) { this.target = target; } + + public int getSerializationType() { return 0; } + + /** Are we epsilon, action, sempred? */ + public boolean isEpsilon() { return false; } + + public IntervalSet label() { return null; } + + public String toString(Grammar g) { return toString(); } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/WildcardTransition.java b/runtime/Java/src/org/antlr/v4/runtime/atn/WildcardTransition.java new file mode 100644 index 000000000..0b6dbf3fb --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/WildcardTransition.java @@ -0,0 +1,15 @@ +package org.antlr.v4.runtime.atn; + +import org.antlr.v4.tool.Grammar; + +public class WildcardTransition extends Transition { + public WildcardTransition(ATNState target) { super(target); } + public int compareTo(Object o) { + return 0; + } + + @Override + public String toString(Grammar g) { + return "."; + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/dfa/DFA.java b/runtime/Java/src/org/antlr/v4/runtime/dfa/DFA.java new file mode 100644 index 000000000..bd3f5e0f1 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/dfa/DFA.java @@ -0,0 +1,107 @@ +/* + [BSD] + Copyright (c) 2010 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime.dfa; + +import org.antlr.v4.runtime.atn.ATNState; + +import java.util.*; + +public class DFA { + /** A set of all DFA states. Use Map so we can get old state back + * (Set only allows you to see if it's there). + */ + public Map states = new LinkedHashMap(); + public DFAState s0; + public int decision; +// public int maxTokenType; + + /** From which ATN state did we create this DFA? */ + public ATNState atnStartState; + + /** Does at least one state have a conflict? Mainly used as return value + * from predictATN() + */ + public boolean conflict; + + public DFA(ATNState atnStartState) { this.atnStartState = atnStartState; } +// public DFA(int maxTokenType) { this.maxTokenType = maxTokenType; } + +/* + public void addAll(Collection states) { + for (DFAState p : states) { + //addDFAEdge(p, t, q); + } + } + + public void addDFAEdge(OrderedHashSet p, + int t, + OrderedHashSet q) + { +// System.out.println("MOVE "+p+" -> "+q+" upon "+getTokenName(t)); + DFAState from = addDFAState(p); + DFAState to = addDFAState(q); + addDFAEdge(from, t, to); + } + + public void addDFAEdge(DFAState p, int t, DFAState q) { + if ( p.edges==null ) { + p.edges = new DFAState[maxTokenType+1]; // TODO: make adaptive + } + p.edges[t] = q; // connect + } + + protected DFAState addDFAState(OrderedHashSet configs) { + DFAState proposed = new DFAState(configs); + DFAState existing = states.get(proposed); + DFAState p; + if ( existing!=null ) p = existing; + else { + proposed.stateNumber = states.size(); + proposed.configs = new OrderedHashSet(); + proposed.configs.addAll(configs); + states.put(proposed, proposed); + p = proposed; + } + return p; + } + */ + + public String toString() { return toString(null); } + + public String toString(String[] tokenNames) { + if ( s0==null ) return ""; + DFASerializer serializer = new DFASerializer(this,tokenNames); + return serializer.toString(); + } + + public String toLexerString() { + if ( s0==null ) return ""; + DFASerializer serializer = new LexerDFASerializer(this); + return serializer.toString(); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/dfa/DFASerializer.java b/runtime/Java/src/org/antlr/v4/runtime/dfa/DFASerializer.java new file mode 100644 index 000000000..bcbaa5314 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/dfa/DFASerializer.java @@ -0,0 +1,52 @@ +package org.antlr.v4.runtime.dfa; + +/** A DFA walker that knows how to dump them to serialized strings. */ +public class DFASerializer { + String[] tokenNames; + DFA dfa; + + public DFASerializer(DFA dfa, String[] tokenNames) { + this.dfa = dfa; + this.tokenNames = tokenNames; + } + + public String toString() { + if ( dfa.s0==null ) return null; + StringBuilder buf = new StringBuilder(); + for (DFAState s : dfa.states.values()) { + int n = 0; + if ( s.edges!=null ) n = s.edges.length; + for (int i=0; i"+ getStateString(t)+'\n'); + } + } + } + String output = buf.toString(); + //return Utils.sortLinesInString(output); + return output; + } + + protected String getEdgeLabel(int i) { + String label; + if ( i==0 ) return "EOF"; + if ( tokenNames!=null ) label = tokenNames[i-1]; + else label = String.valueOf(i-1); + return label; + } + + String getStateString(DFAState s) { + int n = s.stateNumber; + String stateStr = "s"+n; + if ( s.isAcceptState ) { + stateStr = ":s"+n+"=>"+s.prediction; + } + if ( s.isCtxSensitive ) { + stateStr = ":s"+n+"@"+s.ctxToPrediction; + } + return stateStr; + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/dfa/DFAState.java b/runtime/Java/src/org/antlr/v4/runtime/dfa/DFAState.java new file mode 100644 index 000000000..41fc86e86 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/dfa/DFAState.java @@ -0,0 +1,107 @@ +package org.antlr.v4.runtime.dfa; + +import org.antlr.v4.analysis.ATNConfig; +import org.antlr.v4.misc.OrderedHashSet; +import org.antlr.v4.runtime.RuleContext; + +import java.util.*; + +/** A DFA state represents a set of possible ATN configurations. + * As Aho, Sethi, Ullman p. 117 says "The DFA uses its state + * to keep track of all possible states the ATN can be in after + * reading each input symbol. That is to say, after reading + * input a1a2..an, the DFA is in a state that represents the + * subset T of the states of the ATN that are reachable from the + * ATN's start state along some path labeled a1a2..an." + * In conventional NFA->DFA conversion, therefore, the subset T + * would be a bitset representing the set of states the + * ATN could be in. We need to track the alt predicted by each + * state as well, however. More importantly, we need to maintain + * a stack of states, tracking the closure operations as they + * jump from rule to rule, emulating rule invocations (method calls). + * Recall that ATNs do not normally have a stack like a pushdown-machine + * so I have to add one to simulate the proper lookahead sequences for + * the underlying LL grammar from which the ATN was derived. + * + * I use a list of ATNConfig objects. An ATNConfig + * is both a state (ala normal conversion) and a RuleContext describing + * the chain of rules (if any) followed to arrive at that state. + * + * A DFA state may have multiple references to a particular state, + * but with different ATNContexts (with same or different alts) + * meaning that state was reached via a different set of rule invocations. + */ +public class DFAState { + public int stateNumber = -1; + + /** The set of ATN configurations (state,alt,context) for this DFA state */ + public OrderedHashSet configs = new OrderedHashSet(); + + /** edges[symbol] points to target of symbol */ + public DFAState[] edges; + +// public IntervalSet viableChars; + + public boolean isAcceptState = false; + + public int prediction; // if accept state, what ttype do we match? + + public int ruleIndex; // if accept, exec what action? + + public boolean complete; // all alts predict "prediction" + public boolean isCtxSensitive; + + public Map ctxToPrediction; + + public DFAState() { } + + public DFAState(int stateNumber) { this.stateNumber = stateNumber; } + + public DFAState(OrderedHashSet configs) { this.configs = configs; } + + /** Get the set of all alts mentioned by all ATN configurations in this + * DFA state. + */ + public Set getAltSet() { + Set alts = new HashSet(); + for (ATNConfig c : configs) { + alts.add(c.alt); + } + if ( alts.size()==0 ) return null; + return alts; + } + + /** A decent hash for a DFA state is the sum of the ATN state/alt pairs. */ + public int hashCode() { + int h = 0; + for (ATNConfig c : configs) { + h += c.alt; + } + return h; + } + + /** Two DFAStates are equal if their ATN configuration sets are the + * same. This method is used to see if a DFA state already exists. + * + * Because the number of alternatives and number of ATN configurations are + * finite, there is a finite number of DFA states that can be processed. + * This is necessary to show that the algorithm terminates. + * + * Cannot test the DFA state numbers here because in DFA.addState we need + * to know if any other state exists that has this exact set of ATN + * configurations. The DFAState state number is irrelevant. + */ + public boolean equals(Object o) { + // compare set of ATN configurations in this set with other + if ( this==o ) return true; + DFAState other = (DFAState)o; + boolean sameSet = this.configs.equals(other.configs); +// System.out.println("DFAState.equals: "+configs+(sameSet?"==":"!=")+other.configs); + return sameSet; + } + + + public String toString() { + return stateNumber+":"+configs+(isAcceptState?("=>"+prediction):""); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/dfa/LexerDFASerializer.java b/runtime/Java/src/org/antlr/v4/runtime/dfa/LexerDFASerializer.java new file mode 100644 index 000000000..f77ffa0d1 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/dfa/LexerDFASerializer.java @@ -0,0 +1,12 @@ +package org.antlr.v4.runtime.dfa; + +public class LexerDFASerializer extends DFASerializer { + public LexerDFASerializer(DFA dfa) { + super(dfa, null); + } + + @Override + protected String getEdgeLabel(int i) { + return "'"+(char)i+"'"; + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/misc/FastQueue.java b/runtime/Java/src/org/antlr/v4/runtime/misc/FastQueue.java new file mode 100644 index 000000000..bc8c2442b --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/misc/FastQueue.java @@ -0,0 +1,100 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime.misc; + +import java.util.ArrayList; +import java.util.List; +import java.util.NoSuchElementException; + +/** A queue that can dequeue and get(i) in O(1) and grow arbitrarily large. + * A linked list is fast at dequeue but slow at get(i). An array is + * the reverse. This is O(1) for both operations. + * + * List grows until you dequeue last element at end of buffer. Then + * it resets to start filling at 0 again. If adds/removes are balanced, the + * buffer will not grow too large. + * + * No iterator stuff as that's not how we'll use it. + */ +public class FastQueue { + /** dynamically-sized buffer of elements */ + protected List data = new ArrayList(); + /** index of next element to fill */ + protected int p = 0; + protected int range = -1; // how deep have we gone? + + public void reset() { clear(); } + public void clear() { p = 0; data.clear(); } + + /** Get and remove first element in queue */ + public T remove() { + T o = elementAt(0); + p++; + // have we hit end of buffer? + if ( p == data.size() ) { + // if so, it's an opportunity to start filling at index 0 again + clear(); // size goes to 0, but retains memory + } + return o; + } + + public void add(T o) { data.add(o); } + + public int size() { return data.size() - p; } + + public int range() { return range; } + + public T head() { return elementAt(0); } + + /** Return element i elements ahead of current element. i==0 gets + * current element. This is not an absolute index into the data list + * since p defines the start of the real list. + */ + public T elementAt(int i) { + int absIndex = p + i; + if ( absIndex >= data.size() ) { + throw new NoSuchElementException("queue index "+ absIndex +" > last index "+(data.size()-1)); + } + if ( absIndex < 0 ) { + throw new NoSuchElementException("queue index "+ absIndex +" < 0"); + } + if ( absIndex>range ) range = absIndex; + return data.get(absIndex); + } + + /** Return string of current buffer contents; non-destructive */ + public String toString() { + StringBuffer buf = new StringBuffer(); + int n = size(); + for (int i=0; i> LOG_BITS) + 1]; + } + + /** Construction from a static array of longs */ + public LABitSet(long[] bits_) { + if ( bits_==null || bits_.length==0 ) bits = new long[1]; + else bits = bits_; + } + + /** Construction from a static array of longs */ + public LABitSet(long[] bits_, boolean EOF) { + this(bits_); + this.EOF = EOF; + } + + public static LABitSet of(int el) { + LABitSet s = new LABitSet(el + 1); + s.add(el); + return s; + } + + /** or this element into this set (grow as necessary to accommodate) */ + public void add(int el) { + //System.out.println("add("+el+")"); + if ( el==Token.EOF ) { EOF = true; return; } + int n = wordNumber(el); + //System.out.println("word number is "+n); + //System.out.println("bits.length "+bits.length); + if (n >= bits.length) { + growToInclude(el); + } + bits[n] |= bitMask(el); + } + + public boolean member(int el) { + if ( el == Token.EOF ) return EOF; + int n = wordNumber(el); + if (n >= bits.length) return false; + return (bits[n] & bitMask(el)) != 0; + } + + /** return this | a in a new set */ + public LABitSet or(LABitSet a) { + if ( a==null ) { + return this; + } + LABitSet s = (LABitSet)this.clone(); + s.orInPlace((LABitSet)a); + return s; + } + + public void orInPlace(LABitSet a) { + if ( a==null ) { + return; + } + // If this is smaller than a, grow this first + if (a.bits.length > bits.length) { + setSize(a.bits.length); + } + int min = Math.min(bits.length, a.bits.length); + for (int i = min - 1; i >= 0; i--) { + bits[i] |= a.bits[i]; + } + EOF = EOF | a.EOF; + } + + // remove this element from this set + public void remove(int el) { + if ( el==Token.EOF ) { EOF = false; return; } + int n = wordNumber(el); + if (n >= bits.length) { + throw new IllegalArgumentException(el+" is outside set range of "+bits.length+" words"); + } + bits[n] &= ~bitMask(el); + } + + public Object clone() { + LABitSet s; + try { + s = (LABitSet)super.clone(); + s.bits = new long[bits.length]; + System.arraycopy(bits, 0, s.bits, 0, bits.length); + s.EOF = EOF; + return s; + } + catch (CloneNotSupportedException e) { + e.printStackTrace(System.err); + } + return null; + } + + /** + * Sets the size of a set. + * @param nwords how many words the new set should be + */ + void setSize(int nwords) { + long newbits[] = new long[nwords]; + int n = Math.min(nwords, bits.length); + System.arraycopy(bits, 0, newbits, 0, n); + bits = newbits; + } + + /** Get the first element you find and return it. */ + public int getSingleElement() { + for (int i = 0; i < (bits.length << LOG_BITS); i++) { + if (member(i)) { + return i; + } + } + return Token.INVALID_TYPE; + } + + /** Transform a bit set into a string by formatting each element as an integer + * separator The string to put in between elements + * @return A commma-separated list of values + */ + public String toString() { + StringBuffer buf = new StringBuffer(); + String separator = ","; + boolean havePrintedAnElement = false; + buf.append('{'); + if ( EOF ) { buf.append("EOF"); havePrintedAnElement=true; } + + for (int i = 0; i < (bits.length << LOG_BITS); i++) { + if (member(i)) { + if ( havePrintedAnElement ) { + buf.append(separator); + } + buf.append(i); + havePrintedAnElement = true; + } + } + buf.append('}'); + return buf.toString(); + } + +// /**Create a string representation where instead of integer elements, the +// * ith element of vocabulary is displayed instead. Vocabulary is a Vector +// * of Strings. +// * separator The string to put in between elements +// * @return A commma-separated list of character constants. +// */ +// public String toString(String separator, List vocabulary) { +// String str = ""; +// for (int i = 0; i < (bits.length << LOG_BITS); i++) { +// if (member(i)) { +// if (str.length() > 0) { +// str += separator; +// } +// if (i >= vocabulary.size()) { +// str += "'" + (char)i + "'"; +// } +// else if (vocabulary.get(i) == null) { +// str += "'" + (char)i + "'"; +// } +// else { +// str += (String)vocabulary.get(i); +// } +// } +// } +// return str; +// } + + /** + * Grows the set to a larger number of bits. + * @param bit element that must fit in set + */ + public void growToInclude(int bit) { + int newSize = Math.max(bits.length << 1, numWordsToHold(bit)); + long newbits[] = new long[newSize]; + System.arraycopy(bits, 0, newbits, 0, bits.length); + bits = newbits; + } + + static long bitMask(int bitNumber) { + int bitPosition = bitNumber & MOD_MASK; // bitNumber mod BITS + return 1L << bitPosition; + } + + static int numWordsToHold(int el) { + return (el >> LOG_BITS) + 1; + } + + static int wordNumber(int bit) { + return bit >> LOG_BITS; // bit / BITS + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/misc/LookaheadStream.java b/runtime/Java/src/org/antlr/v4/runtime/misc/LookaheadStream.java new file mode 100644 index 000000000..995aba26e --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/misc/LookaheadStream.java @@ -0,0 +1,161 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime.misc; + +import org.antlr.runtime.misc.FastQueue; + +import java.util.NoSuchElementException; + +/** A lookahead queue that knows how to mark/release locations + * in the buffer for backtracking purposes. Any markers force the FastQueue + * superclass to keep all tokens until no more markers; then can reset + * to avoid growing a huge buffer. + */ +public abstract class LookaheadStream extends FastQueue { + public static final int UNINITIALIZED_EOF_ELEMENT_INDEX = Integer.MAX_VALUE; + + /** Absolute token index. It's the index of the symbol about to be + * read via LT(1). Goes from 0 to numtokens. + */ + protected int currentElementIndex = 0; + + protected T prevElement; + + /** Track object returned by nextElement upon end of stream; + * Return it later when they ask for LT passed end of input. + */ + public T eof = null; + + /** Track the last mark() call result value for use in rewind(). */ + protected int lastMarker; + + /** tracks how deep mark() calls are nested */ + protected int markDepth = 0; + + public void reset() { + super.reset(); + currentElementIndex = 0; + p = 0; + prevElement=null; + } + + /** Implement nextElement to supply a stream of elements to this + * lookahead buffer. Return eof upon end of the stream we're pulling from. + */ + public abstract T nextElement(); + + public abstract boolean isEOF(T o); + + /** Get and remove first element in queue; override FastQueue.remove(); + * it's the same, just checks for backtracking. + */ + public T remove() { + T o = elementAt(0); + p++; + // have we hit end of buffer and not backtracking? + if ( p == data.size() && markDepth==0 ) { + // if so, it's an opportunity to start filling at index 0 again + clear(); // size goes to 0, but retains memory + } + return o; + } + + /** Make sure we have at least one element to remove, even if EOF */ + public void consume() { + syncAhead(1); + prevElement = remove(); + currentElementIndex++; + } + + /** Make sure we have 'need' elements from current position p. Last valid + * p index is data.size()-1. p+need-1 is the data index 'need' elements + * ahead. If we need 1 element, (p+1-1)==p must be < data.size(). + */ + protected void syncAhead(int need) { + int n = (p+need-1) - data.size() + 1; // how many more elements we need? + if ( n > 0 ) fill(n); // out of elements? + } + + /** add n elements to buffer */ + public void fill(int n) { + for (int i=1; i<=n; i++) { + T o = nextElement(); + if ( isEOF(o) ) eof = o; + data.add(o); + } + } + + /** Size of entire stream is unknown; we only know buffer size from FastQueue */ + public int size() { throw new UnsupportedOperationException("streams are of unknown size"); } + + public T LT(int k) { + if ( k==0 ) { + return null; + } + if ( k<0 ) return LB(-k); + //System.out.print("LT(p="+p+","+k+")="); + syncAhead(k); + if ( (p+k-1) > data.size() ) return eof; + return elementAt(k-1); + } + + public int index() { return currentElementIndex; } + + public int mark() { + markDepth++; + lastMarker = p; // track where we are in buffer not absolute token index + return lastMarker; + } + + public void release(int marker) { + // no resources to release + } + + public void rewind(int marker) { + markDepth--; + seek(marker); // assume marker is top + // release(marker); // waste of call; it does nothing in this class + } + + public void rewind() { + seek(lastMarker); // rewind but do not release marker + } + + /** Seek to a 0-indexed position within data buffer. Can't handle + * case where you seek beyond end of existing buffer. Normally used + * to seek backwards in the buffer. Does not force loading of nodes. + * Doesn't see to absolute position in input stream since this stream + * is unbuffered. Seeks only into our moving window of elements. + */ + public void seek(int index) { p = index; } + + protected T LB(int k) { + if ( k==1 ) return prevElement; + throw new NoSuchElementException("can't look backwards more than one token in this stream"); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/misc/QStack.java b/runtime/Java/src/org/antlr/v4/runtime/misc/QStack.java new file mode 100644 index 000000000..e6b29151a --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/misc/QStack.java @@ -0,0 +1,46 @@ +package org.antlr.v4.runtime.misc; + +import java.util.EmptyStackException; + +/** A quicker stack than Stack */ +public class QStack { + T[] elements; + public int sp = -1; + + public QStack() { + elements = (T[])new Object[10]; + } + + public QStack(QStack s) { + elements = (T[])new Object[s.elements.length]; + System.arraycopy(s.elements, 0, elements, 0, s.elements.length); + this.sp = s.sp; + } + + public void push(T fset) { + if ( (sp+1)>=elements.length ) { + T[] f = (T[])new Object[elements.length*2]; + System.arraycopy(elements, 0, f, 0, elements.length); + elements = f; + } + elements[++sp] = fset; + } + + public T peek() { + if ( sp<0 ) throw new EmptyStackException(); + return elements[sp]; + } + + public T get(int i) { + if ( i<0 ) throw new IllegalArgumentException("i<0"); + if ( i>sp ) throw new IllegalArgumentException("i>"+sp); + return elements[sp]; + } + + public T pop() { + if ( sp<0 ) throw new EmptyStackException(); + return elements[sp--]; + } + + public void clear() { sp = -1; } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/tree/BaseTree.java b/runtime/Java/src/org/antlr/v4/runtime/tree/BaseTree.java new file mode 100644 index 000000000..b0b481b08 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/tree/BaseTree.java @@ -0,0 +1,349 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime.tree; + +import java.util.ArrayList; +import java.util.List; + +/** A generic tree implementation with no payload. You must subclass to + * actually have any user data. ANTLR v3 uses a list of children approach + * instead of the child-sibling approach in v2. A flat tree (a list) is + * an empty node whose children represent the list. An empty, but + * non-null node is called "nil". + */ +public abstract class BaseTree implements Tree { + protected List children; + + public BaseTree() { + } + + /** Create a new node from an existing node does nothing for BaseTree + * as there are no fields other than the children list, which cannot + * be copied as the children are not considered part of this node. + */ + public BaseTree(Tree node) { + } + + public Tree getChild(int i) { + if ( children==null || i>=children.size() ) { + return null; + } + return (Tree)children.get(i); + } + + /** Get the children internal List; note that if you directly mess with + * the list, do so at your own risk. + */ + public List getChildren() { + return children; + } + + public Tree getFirstChildWithType(int type) { + for (int i = 0; children!=null && i < children.size(); i++) { + Tree t = (Tree) children.get(i); + if ( t.getType()==type ) { + return t; + } + } + return null; + } + + public int getChildCount() { + if ( children==null ) { + return 0; + } + return children.size(); + } + + /** Add t as child of this node. + * + * Warning: if t has no children, but child does + * and child isNil then this routine moves children to t via + * t.children = child.children; i.e., without copying the array. + */ + public void addChild(Tree t) { + //System.out.println("add child "+t.toStringTree()+" "+this.toStringTree()); + //System.out.println("existing children: "+children); + if ( t==null ) { + return; // do nothing upon addChild(null) + } + BaseTree childTree = (BaseTree)t; + if ( childTree.isNil() ) { // t is an empty node possibly with children + if ( this.children!=null && this.children == childTree.children ) { + throw new RuntimeException("attempt to add child list to itself"); + } + // just add all of childTree's children to this + if ( childTree.children!=null ) { + if ( this.children!=null ) { // must copy, this has children already + int n = childTree.children.size(); + for (int i = 0; i < n; i++) { + Tree c = (Tree)childTree.children.get(i); + this.children.add(c); + // handle double-link stuff for each child of nil root + c.setParent(this); + c.setChildIndex(children.size()-1); + } + } + else { + // no children for this but t has children; just set pointer + // call general freshener routine + this.children = childTree.children; + this.freshenParentAndChildIndexes(); + } + } + } + else { // child is not nil (don't care about children) + if ( children==null ) { + children = createChildrenList(); // create children list on demand + } + children.add(t); + childTree.setParent(this); + childTree.setChildIndex(children.size()-1); + } + // System.out.println("now children are: "+children); + } + + /** Add all elements of kids list as children of this node */ + public void addChildren(List kids) { + for (int i = 0; i < kids.size(); i++) { + Tree t = (Tree) kids.get(i); + addChild(t); + } + } + + public void setChild(int i, Tree t) { + if ( t==null ) { + return; + } + if ( t.isNil() ) { + throw new IllegalArgumentException("Can't set single child to a list"); + } + if ( children==null ) { + children = createChildrenList(); + } + children.set(i, t); + t.setParent(this); + t.setChildIndex(i); + } + + public Object deleteChild(int i) { + if ( children==null ) { + return null; + } + Tree killed = (Tree)children.remove(i); + // walk rest and decrement their child indexes + this.freshenParentAndChildIndexes(i); + return killed; + } + + /** Delete children from start to stop and replace with t even if t is + * a list (nil-root tree). num of children can increase or decrease. + * For huge child lists, inserting children can force walking rest of + * children to set their childindex; could be slow. + */ + public void replaceChildren(int startChildIndex, int stopChildIndex, Object t) { + /* + System.out.println("replaceChildren "+startChildIndex+", "+stopChildIndex+ + " with "+((BaseTree)t).toStringTree()); + System.out.println("in="+toStringTree()); + */ + if ( children==null ) { + throw new IllegalArgumentException("indexes invalid; no children in list"); + } + int replacingHowMany = stopChildIndex - startChildIndex + 1; + int replacingWithHowMany; + BaseTree newTree = (BaseTree)t; + List newChildren = null; + // normalize to a list of children to add: newChildren + if ( newTree.isNil() ) { + newChildren = newTree.children; + } + else { + newChildren = new ArrayList(1); + newChildren.add(newTree); + } + replacingWithHowMany = newChildren.size(); + int numNewChildren = newChildren.size(); + int delta = replacingHowMany - replacingWithHowMany; + // if same number of nodes, do direct replace + if ( delta == 0 ) { + int j = 0; // index into new children + for (int i=startChildIndex; i<=stopChildIndex; i++) { + BaseTree child = (BaseTree)newChildren.get(j); + children.set(i, child); + child.setParent(this); + child.setChildIndex(i); + j++; + } + } + else if ( delta > 0 ) { // fewer new nodes than there were + // set children and then delete extra + for (int j=0; j0 ) { + buf.append(' '); + } + buf.append(t.toStringTree()); + } + if ( !isNil() ) { + buf.append(")"); + } + return buf.toString(); + } + + public int getLine() { + return 0; + } + + public int getCharPositionInLine() { + return 0; + } + + /** Override to say how a node (not a tree) should look as text */ + public abstract String toString(); +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/tree/BaseTreeAdaptor.java b/runtime/Java/src/org/antlr/v4/runtime/tree/BaseTreeAdaptor.java new file mode 100644 index 000000000..564622c28 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/tree/BaseTreeAdaptor.java @@ -0,0 +1,279 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime.tree; + +import org.antlr.v4.runtime.RecognitionException; +import org.antlr.v4.runtime.Token; +import org.antlr.v4.runtime.TokenStream; + +import java.util.HashMap; +import java.util.Map; + +/** A TreeAdaptor that works with any Tree implementation. */ +public abstract class BaseTreeAdaptor implements TreeAdaptor { + /** System.identityHashCode() is not always unique; we have to + * track ourselves. That's ok, it's only for debugging, though it's + * expensive: we have to create a hashtable with all tree nodes in it. + */ + protected Map treeToUniqueIDMap; + protected int uniqueNodeID = 1; + + public Object nil() { + return create(null); + } + + /** create tree node that holds the start and stop tokens associated + * with an error. + * + * If you specify your own kind of tree nodes, you will likely have to + * override this method. CommonTree returns Token.INVALID_TOKEN_TYPE + * if no token payload but you might have to set token type for diff + * node type. + * + * You don't have to subclass CommonErrorNode; you will likely need to + * subclass your own tree node class to avoid class cast exception. + */ + public Object errorNode(TokenStream input, Token start, Token stop, + RecognitionException e) + { + CommonErrorNode t = new CommonErrorNode(input, start, stop, e); + //System.out.println("returning error node '"+t+"' @index="+input.index()); + return t; + } + + public boolean isNil(Object tree) { + return ((Tree)tree).isNil(); + } + + public Object dupTree(Object tree) { + return dupTree(tree, null); + } + + /** This is generic in the sense that it will work with any kind of + * tree (not just Tree interface). It invokes the adaptor routines + * not the tree node routines to do the construction. + */ + public Object dupTree(Object t, Object parent) { + if ( t==null ) { + return null; + } + Object newTree = dupNode(t); + // ensure new subtree root has parent/child index set + setChildIndex(newTree, getChildIndex(t)); // same index in new tree + setParent(newTree, parent); + int n = getChildCount(t); + for (int i = 0; i < n; i++) { + Object child = getChild(t, i); + Object newSubTree = dupTree(child, t); + addChild(newTree, newSubTree); + } + return newTree; + } + + /** Add a child to the tree t. If child is a flat tree (a list), make all + * in list children of t. Warning: if t has no children, but child does + * and child isNil then you can decide it is ok to move children to t via + * t.children = child.children; i.e., without copying the array. Just + * make sure that this is consistent with have the user will build + * ASTs. + */ + public void addChild(Object t, Object child) { + if ( t!=null && child!=null ) { + ((Tree)t).addChild((Tree)child); + } + } + + /** If oldRoot is a nil root, just copy or move the children to newRoot. + * If not a nil root, make oldRoot a child of newRoot. + * + * old=^(nil a b c), new=r yields ^(r a b c) + * old=^(a b c), new=r yields ^(r ^(a b c)) + * + * If newRoot is a nil-rooted single child tree, use the single + * child as the new root node. + * + * old=^(nil a b c), new=^(nil r) yields ^(r a b c) + * old=^(a b c), new=^(nil r) yields ^(r ^(a b c)) + * + * If oldRoot was null, it's ok, just return newRoot (even if isNil). + * + * old=null, new=r yields r + * old=null, new=^(nil r) yields ^(nil r) + * + * Return newRoot. Throw an exception if newRoot is not a + * simple node or nil root with a single child node--it must be a root + * node. If newRoot is ^(nil x) return x as newRoot. + * + * Be advised that it's ok for newRoot to point at oldRoot's + * children; i.e., you don't have to copy the list. We are + * constructing these nodes so we should have this control for + * efficiency. + */ + public Object becomeRoot(Object newRoot, Object oldRoot) { + //System.out.println("becomeroot new "+newRoot.toString()+" old "+oldRoot); + Tree newRootTree = (Tree)newRoot; + Tree oldRootTree = (Tree)oldRoot; + if ( oldRoot==null ) { + return newRoot; + } + // handle ^(nil real-node) + if ( newRootTree.isNil() ) { + int nc = newRootTree.getChildCount(); + if ( nc==1 ) newRootTree = (Tree)newRootTree.getChild(0); + else if ( nc >1 ) { + // TODO: make tree run time exceptions hierarchy + throw new RuntimeException("more than one node as root (TODO: make exception hierarchy)"); + } + } + // add oldRoot to newRoot; addChild takes care of case where oldRoot + // is a flat list (i.e., nil-rooted tree). All children of oldRoot + // are added to newRoot. + newRootTree.addChild(oldRootTree); + return newRootTree; + } + + /** Transform ^(nil x) to x and nil to null */ + public Object rulePostProcessing(Object root) { + //System.out.println("rulePostProcessing: "+((Tree)root).toStringTree()); + Tree r = (Tree)root; + if ( r!=null && r.isNil() ) { + if ( r.getChildCount()==0 ) { + r = null; + } + else if ( r.getChildCount()==1 ) { + r = (Tree)r.getChild(0); + // whoever invokes rule will set parent and child index + r.setParent(null); + r.setChildIndex(-1); + } + } + return r; + } + + public Object becomeRoot(Token newRoot, Object oldRoot) { + return becomeRoot(create(newRoot), oldRoot); + } + + public Object create(int tokenType, Token fromToken) { + fromToken = createToken(fromToken); + //((ClassicToken)fromToken).setType(tokenType); + fromToken.setType(tokenType); + Tree t = (Tree)create(fromToken); + return t; + } + + public Object create(int tokenType, Token fromToken, String text) { + if (fromToken == null) return create(tokenType, text); + fromToken = createToken(fromToken); + fromToken.setType(tokenType); + fromToken.setText(text); + Tree t = (Tree)create(fromToken); + return t; + } + + public Object create(int tokenType, String text) { + Token fromToken = createToken(tokenType, text); + Tree t = (Tree)create(fromToken); + return t; + } + + public int getType(Object t) { + return ((Tree)t).getType(); + } + + public void setType(Object t, int type) { + throw new NoSuchMethodError("don't know enough about Tree node"); + } + + public String getText(Object t) { + return ((Tree)t).getText(); + } + + public void setText(Object t, String text) { + throw new NoSuchMethodError("don't know enough about Tree node"); + } + + public Object getChild(Object t, int i) { + return ((Tree)t).getChild(i); + } + + public void setChild(Object t, int i, Object child) { + ((Tree)t).setChild(i, (Tree)child); + } + + public Object deleteChild(Object t, int i) { + return ((Tree)t).deleteChild(i); + } + + public int getChildCount(Object t) { + return ((Tree)t).getChildCount(); + } + + public int getUniqueID(Object node) { + if ( treeToUniqueIDMap==null ) { + treeToUniqueIDMap = new HashMap(); + } + Integer prevID = (Integer)treeToUniqueIDMap.get(node); + if ( prevID!=null ) { + return prevID.intValue(); + } + int ID = uniqueNodeID; + treeToUniqueIDMap.put(node, new Integer(ID)); + uniqueNodeID++; + return ID; + // GC makes these nonunique: + // return System.identityHashCode(node); + } + + /** Tell me how to create a token for use with imaginary token nodes. + * For example, there is probably no input symbol associated with imaginary + * token DECL, but you need to create it as a payload or whatever for + * the DECL node as in ^(DECL type ID). + * + * If you care what the token payload objects' type is, you should + * override this method and any other createToken variant. + */ + public abstract Token createToken(int tokenType, String text); + + /** Tell me how to create a token for use with imaginary token nodes. + * For example, there is probably no input symbol associated with imaginary + * token DECL, but you need to create it as a payload or whatever for + * the DECL node as in ^(DECL type ID). + * + * This is a variant of createToken where the new token is derived from + * an actual real input token. Typically this is for converting '{' + * tokens to BLOCK etc... You'll see + * + * r : lc='{' ID+ '}' -> ^(BLOCK[$lc] ID+) ; + * + * If you care what the token payload objects' type is, you should + * override this method and any other createToken variant. + */ + public abstract Token createToken(Token fromToken); +} + diff --git a/runtime/Java/src/org/antlr/v4/runtime/tree/CommonErrorNode.java b/runtime/Java/src/org/antlr/v4/runtime/tree/CommonErrorNode.java new file mode 100644 index 000000000..e3d4369a5 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/tree/CommonErrorNode.java @@ -0,0 +1,108 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime.tree; + +import org.antlr.v4.runtime.*; + +/** A node representing erroneous token range in token stream */ +public class CommonErrorNode extends CommonTree { + public IntStream input; + public Token start; + public Token stop; + public RecognitionException trappedException; + + public CommonErrorNode(TokenStream input, Token start, Token stop, + RecognitionException e) + { + //System.out.println("start: "+start+", stop: "+stop); + if ( stop==null || + (stop.getTokenIndex() < start.getTokenIndex() && + stop.getType()!=Token.EOF) ) + { + // sometimes resync does not consume a token (when LT(1) is + // in follow set. So, stop will be 1 to left to start. adjust. + // Also handle case where start is the first token and no token + // is consumed during recovery; LT(-1) will return null. + stop = start; + } + this.input = input; + this.start = start; + this.stop = stop; + this.trappedException = e; + } + + public boolean isNil() { + return false; + } + + public int getType() { + return Token.INVALID_TYPE; + } + + public String getText() { + String badText = null; + if ( start instanceof Token ) { + int i = ((Token)start).getTokenIndex(); + int j = ((Token)stop).getTokenIndex(); + if ( ((Token)stop).getType() == Token.EOF ) { + j = ((TokenStream)input).size(); + } + badText = ((TokenStream)input).toString(i, j); + } + else if ( start instanceof Tree ) { + badText = ((TreeNodeStream)input).toString(start, stop); + } + else { + // people should subclass if they alter the tree type so this + // next one is for sure correct. + badText = ""; + } + return badText; + } + + public String toString() { + if ( trappedException instanceof MissingTokenException ) { + return ""; + } + else if ( trappedException instanceof UnwantedTokenException ) { + return ""; + } + else if ( trappedException instanceof MismatchedTokenException ) { + return ""; + } + else if ( trappedException instanceof NoViableAltException ) { + return ""; + } + return ""; + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/tree/CommonTree.java b/runtime/Java/src/org/antlr/v4/runtime/tree/CommonTree.java new file mode 100644 index 000000000..388bfef0d --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/tree/CommonTree.java @@ -0,0 +1,221 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime.tree; + +import org.antlr.runtime.BitSet; +import org.antlr.v4.runtime.Token; +import org.antlr.v4.runtime.tree.gui.ASTViewer; + +/** A tree node that is wrapper for a Token object. After 3.0 release + * while building tree rewrite stuff, it became clear that computing + * parent and child index is very difficult and cumbersome. Better to + * spend the space in every tree node. If you don't want these extra + * fields, it's easy to cut them out in your own BaseTree subclass. + */ +public class CommonTree extends BaseTree { + /** A single token is the payload */ + public Token token; + + /** What token indexes bracket all tokens associated with this node + * and below? + */ + protected int startIndex=-1, stopIndex=-1; + + /** Who is the parent node of this node; if null, implies node is root */ + public CommonTree parent; + + /** What index is this node in the child list? Range: 0..n-1 */ + public int childIndex = -1; + + public CommonTree() { } + + public CommonTree(CommonTree node) { + super(node); + this.token = node.token; + this.startIndex = node.startIndex; + this.stopIndex = node.stopIndex; + } + + public CommonTree(Token t) { + this.token = t; + } + + public Token getToken() { + return token; + } + + public Tree dupNode() { + return new CommonTree(this); + } + + public boolean isNil() { + return token==null; + } + + public int getType() { + if ( token==null ) { + return Token.INVALID_TYPE; + } + return token.getType(); + } + + public String getText() { + if ( token==null ) { + return null; + } + return token.getText(); + } + + public int getLine() { + if ( token==null || token.getLine()==0 ) { + if ( getChildCount()>0 ) { + return getChild(0).getLine(); + } + return 0; + } + return token.getLine(); + } + + public int getCharPositionInLine() { + if ( token==null || token.getCharPositionInLine()==-1 ) { + if ( getChildCount()>0 ) { + return getChild(0).getCharPositionInLine(); + } + return 0; + } + return token.getCharPositionInLine(); + } + + public int getTokenStartIndex() { + if ( startIndex==-1 && token!=null ) { + return token.getTokenIndex(); + } + return startIndex; + } + + public void setTokenStartIndex(int index) { + startIndex = index; + } + + public int getTokenStopIndex() { + if ( stopIndex==-1 && token!=null ) { + return token.getTokenIndex(); + } + return stopIndex; + } + + public void setTokenStopIndex(int index) { + stopIndex = index; + } + + /** For every node in this subtree, make sure it's start/stop token's + * are set. Walk depth first, visit bottom up. Only updates nodes + * with at least one token index < 0. + */ + public void setUnknownTokenBoundaries() { + if ( children==null ) { + if ( startIndex<0 || stopIndex<0 ) { + startIndex = stopIndex = token.getTokenIndex(); + } + return; + } + for (int i=0; i=0 && stopIndex>=0 ) return; // already set + if ( children.size() > 0 ) { + CommonTree firstChild = (CommonTree)children.get(0); + CommonTree lastChild = (CommonTree)children.get(children.size()-1); + startIndex = firstChild.getTokenStartIndex(); + stopIndex = lastChild.getTokenStopIndex(); + } + } + + public int getChildIndex() { + return childIndex; + } + + public Tree getParent() { + return parent; + } + + public void setParent(Tree t) { + this.parent = (CommonTree)t; + } + + public void setChildIndex(int index) { + this.childIndex = index; + } + + // TODO: move to basetree when i settle on how runtime works + public void inspect() { + ASTViewer viewer = new ASTViewer(this); + viewer.open(); + } + + // TODO: move to basetree when i settle on how runtime works + // TODO: don't include this node!! + // TODO: reuse other method + public CommonTree getFirstDescendantWithType(int type) { + if ( getType()==type ) return this; + if ( children==null ) return null; + for (Object c : children) { + CommonTree t = (CommonTree)c; + if ( t.getType()==type ) return t; + CommonTree d = t.getFirstDescendantWithType(type); + if ( d!=null ) return d; + } + return null; + } + + // TODO: don't include this node!! + public CommonTree getFirstDescendantWithType(BitSet types) { + if ( types.member(getType()) ) return this; + if ( children==null ) return null; + for (Object c : children) { + CommonTree t = (CommonTree)c; + if ( types.member(t.getType()) ) return t; + CommonTree d = t.getFirstDescendantWithType(types); + if ( d!=null ) return d; + } + return null; + } + + public String toString() { + if ( isNil() ) { + return "nil"; + } + if ( getType()==Token.INVALID_TYPE) { + return ""; + } + if ( token==null ) { + return null; + } + return token.getText(); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/tree/CommonTreeAdaptor.java b/runtime/Java/src/org/antlr/v4/runtime/tree/CommonTreeAdaptor.java new file mode 100644 index 000000000..093fc78c8 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/tree/CommonTreeAdaptor.java @@ -0,0 +1,169 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime.tree; + +import org.antlr.v4.runtime.*; + +/** A TreeAdaptor that works with any Tree implementation. It provides + * really just factory methods; all the work is done by BaseTreeAdaptor. + * If you would like to have different tokens created than ClassicToken + * objects, you need to override this and then set the parser tree adaptor to + * use your subclass. + * + * To get your parser to build nodes of a different type, override + * create(Token), errorNode(), and to be safe, YourTreeClass.dupNode(). + * dupNode is called to duplicate nodes during rewrite operations. + */ +public class CommonTreeAdaptor extends BaseTreeAdaptor { + /** Duplicate a node. This is part of the factory; + * override if you want another kind of node to be built. + * + * I could use reflection to prevent having to override this + * but reflection is slow. + */ + public Object dupNode(Object t) { + if ( t==null ) return null; + return ((Tree)t).dupNode(); + } + + public Object create(Token payload) { + return new CommonTree(payload); + } + + /** Tell me how to create a token for use with imaginary token nodes. + * For example, there is probably no input symbol associated with imaginary + * token DECL, but you need to create it as a payload or whatever for + * the DECL node as in ^(DECL type ID). + * + * If you care what the token payload objects' type is, you should + * override this method and any other createToken variant. + */ + @Override + public Token createToken(int tokenType, String text) { + return new CommonToken(tokenType, text); + } + + /** Tell me how to create a token for use with imaginary token nodes. + * For example, there is probably no input symbol associated with imaginary + * token DECL, but you need to create it as a payload or whatever for + * the DECL node as in ^(DECL type ID). + * + * This is a variant of createToken where the new token is derived from + * an actual real input token. Typically this is for converting '{' + * tokens to BLOCK etc... You'll see + * + * r : lc='{' ID+ '}' -> ^(BLOCK[$lc] ID+) ; + * + * If you care what the token payload objects' type is, you should + * override this method and any other createToken variant. + */ + @Override + public Token createToken(Token fromToken) { + return new CommonToken(fromToken); + } + + /** Track start/stop token for subtree root created for a rule. + * Only works with Tree nodes. For rules that match nothing, + * seems like this will yield start=i and stop=i-1 in a nil node. + * Might be useful info so I'll not force to be i..i. + */ + public void setTokenBoundaries(Object t, Token startToken, Token stopToken) { + if ( t==null ) return; + int start = 0; + int stop = 0; + if ( startToken!=null ) start = startToken.getTokenIndex(); + if ( stopToken!=null ) stop = stopToken.getTokenIndex(); + ((Tree)t).setTokenStartIndex(start); + ((Tree)t).setTokenStopIndex(stop); + } + + public int getTokenStartIndex(Object t) { + if ( t==null ) return -1; + return ((Tree)t).getTokenStartIndex(); + } + + public int getTokenStopIndex(Object t) { + if ( t==null ) return -1; + return ((Tree)t).getTokenStopIndex(); + } + + public String getText(Object t) { + if ( t==null ) return null; + return ((Tree)t).getText(); + } + + public int getType(Object t) { + if ( t==null ) return Token.INVALID_TYPE; + return ((Tree)t).getType(); + } + + /** What is the Token associated with this node? If + * you are not using CommonTree, then you must + * override this in your own adaptor. + */ + public Token getToken(Object t) { + if ( t instanceof CommonTree ) { + return ((CommonTree)t).getToken(); + } + return null; // no idea what to do + } + + public Object getChild(Object t, int i) { + if ( t==null ) return null; + return ((Tree)t).getChild(i); + } + + public int getChildCount(Object t) { + if ( t==null ) return 0; + return ((Tree)t).getChildCount(); + } + + public Object getParent(Object t) { + if ( t==null ) return null; + return ((Tree)t).getParent(); + } + + public void setParent(Object t, Object parent) { + if ( t!=null ) ((Tree)t).setParent((Tree)parent); + } + + public int getChildIndex(Object t) { + if ( t==null ) return 0; + return ((Tree)t).getChildIndex(); + } + + public void setChildIndex(Object t, int index) { + if ( t!=null ) ((Tree)t).setChildIndex(index); + } + + public void replaceChildren(Object parent, int startChildIndex, int stopChildIndex, Object t) { + if ( parent!=null ) { + ((Tree)parent).replaceChildren(startChildIndex, stopChildIndex, t); + } + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/tree/CommonTreeNodeStream.java b/runtime/Java/src/org/antlr/v4/runtime/tree/CommonTreeNodeStream.java new file mode 100644 index 000000000..26e72c872 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/tree/CommonTreeNodeStream.java @@ -0,0 +1,169 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime.tree; + +import org.antlr.runtime.misc.IntArray; +import org.antlr.v4.runtime.Token; +import org.antlr.v4.runtime.TokenStream; +import org.antlr.v4.runtime.misc.LookaheadStream; + +public class CommonTreeNodeStream extends LookaheadStream implements TreeNodeStream { + public static final int DEFAULT_INITIAL_BUFFER_SIZE = 100; + public static final int INITIAL_CALL_STACK_SIZE = 10; + + /** Pull nodes from which tree? */ + protected Object root; + + /** If this tree (root) was created from a token stream, track it. */ + protected TokenStream tokens; + + /** What tree adaptor was used to build these trees */ + TreeAdaptor adaptor; + + /** The tree iterator we using */ + protected TreeIterator it; + + /** Stack of indexes used for push/pop calls */ + protected IntArray calls; + + /** Tree (nil A B C) trees like flat A B C streams */ + protected boolean hasNilRoot = false; + + /** Tracks tree depth. Level=0 means we're at root node level. */ + protected int level = 0; + + public CommonTreeNodeStream(Object tree) { + this(new CommonTreeAdaptor(), tree); + } + + public CommonTreeNodeStream(TreeAdaptor adaptor, Object tree) { + this.root = tree; + this.adaptor = adaptor; + it = new TreeIterator(adaptor,root); + } + + public void reset() { + super.reset(); + it.reset(); + hasNilRoot = false; + level = 0; + if ( calls != null ) calls.clear(); + } + + /** Pull elements from tree iterator. Track tree level 0..max_level. + * If nil rooted tree, don't give initial nil and DOWN nor final UP. + */ + public Object nextElement() { + Object t = it.next(); + //System.out.println("pulled "+adaptor.getType(t)); + if ( t == it.up ) { + level--; + if ( level==0 && hasNilRoot ) return it.next(); // don't give last UP; get EOF + } + else if ( t == it.down ) level++; + if ( level==0 && adaptor.isNil(t) ) { // if nil root, scarf nil, DOWN + hasNilRoot = true; + t = it.next(); // t is now DOWN, so get first real node next + level++; + t = it.next(); + } + return t; + } + + public boolean isEOF(Object o) { return adaptor.getType(o) == Token.EOF; } + + public void setUniqueNavigationNodes(boolean uniqueNavigationNodes) { } + + public Object getTreeSource() { return root; } + + public String getSourceName() { return getTokenStream().getSourceName(); } + + public TokenStream getTokenStream() { return tokens; } + + public void setTokenStream(TokenStream tokens) { this.tokens = tokens; } + + public TreeAdaptor getTreeAdaptor() { return adaptor; } + + public void setTreeAdaptor(TreeAdaptor adaptor) { this.adaptor = adaptor; } + + public Object get(int i) { + throw new UnsupportedOperationException("Absolute node indexes are meaningless in an unbuffered stream"); + } + + public int LA(int i) { return adaptor.getType(LT(i)); } + + /** Make stream jump to a new location, saving old location. + * Switch back with pop(). + */ + public void push(int index) { + if ( calls==null ) { + calls = new IntArray(); + } + calls.push(p); // save current index + seek(index); + } + + /** Seek back to previous index saved during last push() call. + * Return top of stack (return index). + */ + public int pop() { + int ret = calls.pop(); + seek(ret); + return ret; + } + + // TREE REWRITE INTERFACE + + public void replaceChildren(Object parent, int startChildIndex, int stopChildIndex, Object t) { + if ( parent!=null ) { + adaptor.replaceChildren(parent, startChildIndex, stopChildIndex, t); + } + } + + public String toString(Object start, Object stop) { + // we'll have to walk from start to stop in tree; we're not keeping + // a complete node stream buffer + return "n/a"; + } + + /** For debugging; destructive: moves tree iterator to end. */ + public String toTokenTypeString() { + reset(); + StringBuffer buf = new StringBuffer(); + Object o = LT(1); + int type = adaptor.getType(o); + while ( type!=Token.EOF ) { + buf.append(" "); + buf.append(type); + consume(); + o = LT(1); + type = adaptor.getType(o); + } + return buf.toString(); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/tree/Tree.java b/runtime/Java/src/org/antlr/v4/runtime/tree/Tree.java new file mode 100644 index 000000000..71e2e85d4 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/tree/Tree.java @@ -0,0 +1,127 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime.tree; + +import org.antlr.v4.runtime.Token; + +import java.util.List; + +/** What does a tree look like? ANTLR has a number of support classes + * such as CommonTreeNodeStream that work on these kinds of trees. You + * don't have to make your trees implement this interface, but if you do, + * you'll be able to use more support code. + * + * NOTE: When constructing trees, ANTLR can build any kind of tree; it can + * even use Token objects as trees if you add a child list to your tokens. + * + * This is a tree node without any payload; just navigation and factory stuff. + */ +public interface Tree { + public static final Tree INVALID_NODE = new CommonTree(Token.INVALID_TOKEN); + + Tree getChild(int i); + + int getChildCount(); + + // Tree tracks parent and child index now > 3.0 + + public Tree getParent(); + + public void setParent(Tree t); + + /** Is there is a node above with token type ttype? */ + public boolean hasAncestor(int ttype); + + /** Walk upwards and get first ancestor with this token type. */ + public Tree getAncestor(int ttype); + + /** Return a list of all ancestors of this node. The first node of + * list is the root and the last is the parent of this node. + */ + public List getAncestors(); + + /** This node is what child index? 0..n-1 */ + public int getChildIndex(); + + public void setChildIndex(int index); + + /** Set the parent and child index values for all children */ + public void freshenParentAndChildIndexes(); + + /** Add t as a child to this node. If t is null, do nothing. If t + * is nil, add all children of t to this' children. + */ + void addChild(Tree t); + + /** Set ith child (0..n-1) to t; t must be non-null and non-nil node */ + public void setChild(int i, Tree t); + + public Object deleteChild(int i); + + /** Delete children from start to stop and replace with t even if t is + * a list (nil-root tree). num of children can increase or decrease. + * For huge child lists, inserting children can force walking rest of + * children to set their childindex; could be slow. + */ + public void replaceChildren(int startChildIndex, int stopChildIndex, Object t); + + /** Indicates the node is a nil node but may still have children, meaning + * the tree is a flat list. + */ + boolean isNil(); + + /** What is the smallest token index (indexing from 0) for this node + * and its children? + */ + int getTokenStartIndex(); + + void setTokenStartIndex(int index); + + /** What is the largest token index (indexing from 0) for this node + * and its children? + */ + int getTokenStopIndex(); + + void setTokenStopIndex(int index); + + Tree dupNode(); + + /** Return a token type; needed for tree parsing */ + int getType(); + + String getText(); + + /** In case we don't have a token payload, what is the line for errors? */ + int getLine(); + + int getCharPositionInLine(); + + String toStringTree(); + + String toString(); +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/tree/TreeAdaptor.java b/runtime/Java/src/org/antlr/v4/runtime/tree/TreeAdaptor.java new file mode 100644 index 000000000..1680781d6 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/tree/TreeAdaptor.java @@ -0,0 +1,263 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime.tree; + +import org.antlr.v4.runtime.RecognitionException; +import org.antlr.v4.runtime.Token; +import org.antlr.v4.runtime.TokenStream; + +/** How to create and navigate trees. Rather than have a separate factory + * and adaptor, I've merged them. Makes sense to encapsulate. + * + * This takes the place of the tree construction code generated in the + * generated code in 2.x and the ASTFactory. + * + * I do not need to know the type of a tree at all so they are all + * generic Objects. This may increase the amount of typecasting needed. :( + */ +public interface TreeAdaptor { + // C o n s t r u c t i o n + + /** Create a tree node from Token object; for CommonTree type trees, + * then the token just becomes the payload. This is the most + * common create call. + * + * Override if you want another kind of node to be built. + */ + public Object create(Token payload); + + /** Duplicate a single tree node. + * Override if you want another kind of node to be built. + */ + public Object dupNode(Object treeNode); + + /** Duplicate tree recursively, using dupNode() for each node */ + public Object dupTree(Object tree); + + /** Return a nil node (an empty but non-null node) that can hold + * a list of element as the children. If you want a flat tree (a list) + * use "t=adaptor.nil(); t.addChild(x); t.addChild(y);" + */ + public Object nil(); + + /** Return a tree node representing an error. This node records the + * tokens consumed during error recovery. The start token indicates the + * input symbol at which the error was detected. The stop token indicates + * the last symbol consumed during recovery. + * + * You must specify the input stream so that the erroneous text can + * be packaged up in the error node. The exception could be useful + * to some applications; default implementation stores ptr to it in + * the CommonErrorNode. + * + * This only makes sense during token parsing, not tree parsing. + * Tree parsing should happen only when parsing and tree construction + * succeed. + */ + public Object errorNode(TokenStream input, Token start, Token stop, RecognitionException e); + + /** Is tree considered a nil node used to make lists of child nodes? */ + public boolean isNil(Object tree); + + /** Add a child to the tree t. If child is a flat tree (a list), make all + * in list children of t. Warning: if t has no children, but child does + * and child isNil then you can decide it is ok to move children to t via + * t.children = child.children; i.e., without copying the array. Just + * make sure that this is consistent with have the user will build + * ASTs. Do nothing if t or child is null. + */ + public void addChild(Object t, Object child); + + /** If oldRoot is a nil root, just copy or move the children to newRoot. + * If not a nil root, make oldRoot a child of newRoot. + * + * old=^(nil a b c), new=r yields ^(r a b c) + * old=^(a b c), new=r yields ^(r ^(a b c)) + * + * If newRoot is a nil-rooted single child tree, use the single + * child as the new root node. + * + * old=^(nil a b c), new=^(nil r) yields ^(r a b c) + * old=^(a b c), new=^(nil r) yields ^(r ^(a b c)) + * + * If oldRoot was null, it's ok, just return newRoot (even if isNil). + * + * old=null, new=r yields r + * old=null, new=^(nil r) yields ^(nil r) + * + * Return newRoot. Throw an exception if newRoot is not a + * simple node or nil root with a single child node--it must be a root + * node. If newRoot is ^(nil x) return x as newRoot. + * + * Be advised that it's ok for newRoot to point at oldRoot's + * children; i.e., you don't have to copy the list. We are + * constructing these nodes so we should have this control for + * efficiency. + */ + public Object becomeRoot(Object newRoot, Object oldRoot); + + /** Given the root of the subtree created for this rule, post process + * it to do any simplifications or whatever you want. A required + * behavior is to convert ^(nil singleSubtree) to singleSubtree + * as the setting of start/stop indexes relies on a single non-nil root + * for non-flat trees. + * + * Flat trees such as for lists like "idlist : ID+ ;" are left alone + * unless there is only one ID. For a list, the start/stop indexes + * are set in the nil node. + * + * This method is executed after all rule tree construction and right + * before setTokenBoundaries(). + */ + public Object rulePostProcessing(Object root); + + /** For identifying trees. + * + * How to identify nodes so we can say "add node to a prior node"? + * Even becomeRoot is an issue. Use System.identityHashCode(node) + * usually. + */ + public int getUniqueID(Object node); + + + // R e w r i t e R u l e s + + /** Create a node for newRoot make it the root of oldRoot. + * If oldRoot is a nil root, just copy or move the children to newRoot. + * If not a nil root, make oldRoot a child of newRoot. + * + * Return node created for newRoot. + * + * Be advised: when debugging ASTs, the DebugTreeAdaptor manually + * calls create(Token child) and then plain becomeRoot(node, node) + * because it needs to trap calls to create, but it can't since it delegates + * to not inherits from the TreeAdaptor. + */ + public Object becomeRoot(Token newRoot, Object oldRoot); + + /** Create a new node derived from a token, with a new token type. + * This is invoked from an imaginary node ref on right side of a + * rewrite rule as IMAG[$tokenLabel]. + * + * This should invoke createToken(Token). + */ + public Object create(int tokenType, Token fromToken); + + /** Same as create(tokenType,fromToken) except set the text too. + * This is invoked from an imaginary node ref on right side of a + * rewrite rule as IMAG[$tokenLabel, "IMAG"]. + * + * This should invoke createToken(Token). + */ + public Object create(int tokenType, Token fromToken, String text); + + /** Create a new node derived from a token, with a new token type. + * This is invoked from an imaginary node ref on right side of a + * rewrite rule as IMAG["IMAG"]. + * + * This should invoke createToken(int,String). + */ + public Object create(int tokenType, String text); + + + // C o n t e n t + + /** For tree parsing, I need to know the token type of a node */ + public int getType(Object t); + + /** Node constructors can set the type of a node */ + public void setType(Object t, int type); + + public String getText(Object t); + + /** Node constructors can set the text of a node */ + public void setText(Object t, String text); + + /** Return the token object from which this node was created. + * Currently used only for printing an error message. + * The error display routine in BaseRecognizer needs to + * display where the input the error occurred. If your + * tree of limitation does not store information that can + * lead you to the token, you can create a token filled with + * the appropriate information and pass that back. See + * BaseRecognizer.getErrorMessage(). + */ + public Token getToken(Object t); + + /** Where are the bounds in the input token stream for this node and + * all children? Each rule that creates AST nodes will call this + * method right before returning. Flat trees (i.e., lists) will + * still usually have a nil root node just to hold the children list. + * That node would contain the start/stop indexes then. + */ + public void setTokenBoundaries(Object t, Token startToken, Token stopToken); + + /** Get the token start index for this subtree; return -1 if no such index */ + public int getTokenStartIndex(Object t); + + /** Get the token stop index for this subtree; return -1 if no such index */ + public int getTokenStopIndex(Object t); + + + // N a v i g a t i o n / T r e e P a r s i n g + + /** Get a child 0..n-1 node */ + public Object getChild(Object t, int i); + + /** Set ith child (0..n-1) to t; t must be non-null and non-nil node */ + public void setChild(Object t, int i, Object child); + + /** Remove ith child and shift children down from right. */ + public Object deleteChild(Object t, int i); + + /** How many children? If 0, then this is a leaf node */ + public int getChildCount(Object t); + + /** Who is the parent node of this node; if null, implies node is root. + * If your node type doesn't handle this, it's ok but the tree rewrites + * in tree parsers need this functionality. + */ + public Object getParent(Object t); + public void setParent(Object t, Object parent); + + /** What index is this node in the child list? Range: 0..n-1 + * If your node type doesn't handle this, it's ok but the tree rewrites + * in tree parsers need this functionality. + */ + public int getChildIndex(Object t); + public void setChildIndex(Object t, int index); + + /** Replace from start to stop child index of parent with t, which might + * be a list. Number of children may be different + * after this call. + * + * If parent is null, don't do anything; must be at root of overall tree. + * Can't replace whatever points to the parent externally. Do nothing. + */ + public void replaceChildren(Object parent, int startChildIndex, int stopChildIndex, Object t); +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/tree/TreeFilter.java b/runtime/Java/src/org/antlr/v4/runtime/tree/TreeFilter.java new file mode 100644 index 000000000..b20aacb8e --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/tree/TreeFilter.java @@ -0,0 +1,134 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime.tree; + +import org.antlr.v4.runtime.ParserSharedState; +import org.antlr.v4.runtime.RecognitionException; +import org.antlr.v4.runtime.RecognizerSharedState; +import org.antlr.v4.runtime.TokenStream; + +/** + Cut-n-paste from material I'm not using in the book anymore (edit later + to make sense): + + Now, how are we going to test these tree patterns against every +subtree in our original tree? In what order should we visit nodes? +For this application, it turns out we need a simple ``apply once'' +rule application strategy and a ``down then up'' tree traversal +strategy. Let's look at rule application first. + +As we visit each node, we need to see if any of our patterns match. If +a pattern matches, we execute the associated tree rewrite and move on +to the next node. In other words, we only look for a single rule +application opportunity (we'll see below that we sometimes need to +repeatedly apply rules). The following method applies a rule in a @cl +TreeParser (derived from a tree grammar) to a tree: + +here is where weReferenced code/walking/patterns/TreePatternMatcher.java + +It uses reflection to lookup the appropriate rule within the generated +tree parser class (@cl Simplify in this case). Most of the time, the +rule will not match the tree. To avoid issuing syntax errors and +attempting error recovery, it bumps up the backtracking level. Upon +failure, the invoked rule immediately returns. If you don't plan on +using this technique in your own ANTLR-based application, don't sweat +the details. This method boils down to ``call a rule to match a tree, +executing any embedded actions and rewrite rules.'' + +At this point, we know how to define tree grammar rules and how to +apply them to a particular subtree. The final piece of the tree +pattern matcher is the actual tree traversal. We have to get the +correct node visitation order. In particular, we need to perform the +scalar-vector multiply transformation on the way down (preorder) and +we need to reduce multiply-by-zero subtrees on the way up (postorder). + +To implement a top-down visitor, we do a depth first walk of the tree, +executing an action in the preorder position. To get a bottom-up +visitor, we execute an action in the postorder position. ANTLR +provides a standard @cl TreeVisitor class with a depth first search @v +visit method. That method executes either a @m pre or @m post method +or both. In our case, we need to call @m applyOnce in both. On the way +down, we'll look for @r vmult patterns. On the way up, +we'll look for @r mult0 patterns. + */ +public class TreeFilter extends TreeParser { + public interface fptr { + public void rule() throws RecognitionException; + } + + protected TokenStream originalTokenStream; + protected TreeAdaptor originalAdaptor; + + public TreeFilter(TreeNodeStream input) { + this(input, new RecognizerSharedState()); + } + public TreeFilter(TreeNodeStream input, RecognizerSharedState state) { + super(input); + originalAdaptor = (TreeAdaptor) input.getTreeAdaptor(); + originalTokenStream = input.getTokenStream(); + } + + public void applyOnce(Object t, fptr whichRule) { + if ( t==null ) return; + try { + // share TreeParser object but not parsing-related state + state = new ParserSharedState(); + input = new CommonTreeNodeStream(originalAdaptor, t); + ((CommonTreeNodeStream)input).setTokenStream(originalTokenStream); + whichRule.rule(); + } + catch (RecognitionException e) { ; } + } + + public void downup(Object t) { + TreeVisitor v = new TreeVisitor(new CommonTreeAdaptor()); + TreeVisitorAction actions = new TreeVisitorAction() { + public Object pre(Object t) { applyOnce(t, topdown_fptr); return t; } + public Object post(Object t) { applyOnce(t, bottomup_fptr); return t; } + }; + v.visit(t, actions); + } + + fptr topdown_fptr = new fptr() { + public void rule() throws RecognitionException { + topdown(); + } + }; + + fptr bottomup_fptr = new fptr() { + public void rule() throws RecognitionException { + bottomup(); + } + }; + + // methods the downup strategy uses to do the up and down rules. + // to override, just define tree grammar rule topdown and turn on + // filter=true. + public void topdown() throws RecognitionException {;} + public void bottomup() throws RecognitionException {;} +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/tree/TreeIterator.java b/runtime/Java/src/org/antlr/v4/runtime/tree/TreeIterator.java new file mode 100644 index 000000000..5b7e43f09 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/tree/TreeIterator.java @@ -0,0 +1,131 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime.tree; + +import org.antlr.runtime.Token; +import org.antlr.runtime.misc.FastQueue; + +import java.util.Iterator; + +/** Return a node stream from a doubly-linked tree whose nodes + * know what child index they are. No remove() is supported. + * + * Emit navigation nodes (DOWN, UP, and EOF) to let show tree structure. + */ +public class TreeIterator implements Iterator { + protected TreeAdaptor adaptor; + protected Object root; + protected Object tree; + protected boolean firstTime = true; + + // navigation nodes to return during walk and at end + public Object up; + public Object down; + public Object eof; + + /** If we emit UP/DOWN nodes, we need to spit out multiple nodes per + * next() call. + */ + protected FastQueue nodes; + + public TreeIterator(Object tree) { + this(new CommonTreeAdaptor(),tree); + } + + public TreeIterator(TreeAdaptor adaptor, Object tree) { + this.adaptor = adaptor; + this.tree = tree; + this.root = tree; + nodes = new FastQueue(); + down = adaptor.create(Token.DOWN, "DOWN"); + up = adaptor.create(Token.UP, "UP"); + eof = adaptor.create(Token.EOF, "EOF"); + } + + public void reset() { + firstTime = true; + tree = root; + nodes.clear(); + } + + public boolean hasNext() { + if ( firstTime ) return root!=null; + if ( nodes!=null && nodes.size()>0 ) return true; + if ( tree==null ) return false; + if ( adaptor.getChildCount(tree)>0 ) return true; + return adaptor.getParent(tree)!=null; // back at root? + } + + public Object next() { + if ( firstTime ) { // initial condition + firstTime = false; + if ( adaptor.getChildCount(tree)==0 ) { // single node tree (special) + nodes.add(eof); + return tree; + } + return tree; + } + // if any queued up, use those first + if ( nodes!=null && nodes.size()>0 ) return nodes.remove(); + + // no nodes left? + if ( tree==null ) return eof; + + // next node will be child 0 if any children + if ( adaptor.getChildCount(tree)>0 ) { + tree = adaptor.getChild(tree, 0); + nodes.add(tree); // real node is next after DOWN + return down; + } + // if no children, look for next sibling of tree or ancestor + Object parent = adaptor.getParent(tree); + // while we're out of siblings, keep popping back up towards root + while ( parent!=null && + adaptor.getChildIndex(tree)+1 >= adaptor.getChildCount(parent) ) + { + nodes.add(up); // we're moving back up + tree = parent; + parent = adaptor.getParent(tree); + } + // no nodes left? + if ( parent==null ) { + tree = null; // back at root? nothing left then + nodes.add(eof); // add to queue, might have UP nodes in there + return nodes.remove(); + } + + // must have found a node with an unvisited sibling + // move to it and return it + int nextSiblingIndex = adaptor.getChildIndex(tree) + 1; + tree = adaptor.getChild(parent, nextSiblingIndex); + nodes.add(tree); // add to queue, might have UP nodes in there + return nodes.remove(); + } + + public void remove() { throw new UnsupportedOperationException(); } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/tree/TreeNodeStream.java b/runtime/Java/src/org/antlr/v4/runtime/tree/TreeNodeStream.java new file mode 100644 index 000000000..dcbfe0a6d --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/tree/TreeNodeStream.java @@ -0,0 +1,106 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime.tree; + +import org.antlr.v4.runtime.IntStream; +import org.antlr.v4.runtime.TokenStream; + +/** A stream of tree nodes, accessing nodes from a tree of some kind */ +public interface TreeNodeStream extends IntStream { + /** Get a tree node at an absolute index i; 0..n-1. + * If you don't want to buffer up nodes, then this method makes no + * sense for you. + */ + public Object get(int i); + + /** Get tree node at current input pointer + i ahead where i=1 is next node. + * i<0 indicates nodes in the past. So LT(-1) is previous node, but + * implementations are not required to provide results for k < -1. + * LT(0) is undefined. For i>=n, return null. + * Return null for LT(0) and any index that results in an absolute address + * that is negative. + * + * This is analogus to the LT() method of the TokenStream, but this + * returns a tree node instead of a token. Makes code gen identical + * for both parser and tree grammars. :) + */ + public Object LT(int k); + + /** Where is this stream pulling nodes from? This is not the name, but + * the object that provides node objects. + */ + public Object getTreeSource(); + + /** If the tree associated with this stream was created from a TokenStream, + * you can specify it here. Used to do rule $text attribute in tree + * parser. Optional unless you use tree parser rule text attribute + * or output=template and rewrite=true options. + */ + public TokenStream getTokenStream(); + + /** What adaptor can tell me how to interpret/navigate nodes and + * trees. E.g., get text of a node. + */ + public TreeAdaptor getTreeAdaptor(); + + /** As we flatten the tree, we use UP, DOWN nodes to represent + * the tree structure. When debugging we need unique nodes + * so we have to instantiate new ones. When doing normal tree + * parsing, it's slow and a waste of memory to create unique + * navigation nodes. Default should be false; + */ + public void setUniqueNavigationNodes(boolean uniqueNavigationNodes); + + /** Reset the tree node stream in such a way that it acts like + * a freshly constructed stream. + */ + public void reset(); + + /** Return the text of all nodes from start to stop, inclusive. + * If the stream does not buffer all the nodes then it can still + * walk recursively from start until stop. You can always return + * null or "" too, but users should not access $ruleLabel.text in + * an action of course in that case. + */ + public String toString(Object start, Object stop); + + + // REWRITING TREES (used by tree parser) + + /** Replace from start to stop child index of parent with t, which might + * be a list. Number of children may be different + * after this call. The stream is notified because it is walking the + * tree and might need to know you are monkeying with the underlying + * tree. Also, it might be able to modify the node stream to avoid + * restreaming for future phases. + * + * If parent is null, don't do anything; must be at root of overall tree. + * Can't replace whatever points to the parent externally. Do nothing. + */ + public void replaceChildren(Object parent, int startChildIndex, int stopChildIndex, Object t); +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/tree/TreeParser.java b/runtime/Java/src/org/antlr/v4/runtime/tree/TreeParser.java new file mode 100644 index 000000000..79c213626 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/tree/TreeParser.java @@ -0,0 +1,240 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.antlr.v4.runtime.tree; + +import org.antlr.runtime.BitSet; +import org.antlr.v4.runtime.*; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** A parser for a stream of tree nodes. "tree grammars" result in a subclass + * of this. All the error reporting and recovery is shared with Parser via + * the BaseRecognizer superclass. +*/ +public class TreeParser extends BaseRecognizer { + public static final int DOWN = Token.DOWN; + public static final int UP = Token.UP; + + // precompiled regex used by inContext + static String dotdot = ".*[^.]\\.\\.[^.].*"; + static String doubleEtc = ".*\\.\\.\\.\\s+\\.\\.\\..*"; + static Pattern dotdotPattern = Pattern.compile(dotdot); + static Pattern doubleEtcPattern = Pattern.compile(doubleEtc); + + protected TreeNodeStream input; + + public TreeParser(TreeNodeStream input) { + super((TokenStream)input); // highlight that we go to super to set state object + setTreeNodeStream(input); + } + + public TreeParser(TreeNodeStream input, RecognizerSharedState state) { + super((TokenStream)input); // share the state object with another parser + setTreeNodeStream(input); + } + + public void reset() { + super.reset(); // reset all recognizer state variables + if ( input!=null ) { + input.seek(0); // rewind the input + } + } + + /** Set the input stream */ + public void setTreeNodeStream(TreeNodeStream input) { + this.input = input; + } + + public TreeNodeStream getTreeNodeStream() { + return input; + } + + public String getSourceName() { + return input.getSourceName(); + } + + protected Object getCurrentInputSymbol(IntStream input) { + return ((TreeNodeStream)input).LT(1); + } + + protected Object getMissingSymbol(IntStream input, + RecognitionException e, + int expectedTokenType, + BitSet follow) + { + String tokenText = + ""; + TreeAdaptor adaptor = ((TreeNodeStream)e.input).getTreeAdaptor(); + return adaptor.create(new CommonToken(expectedTokenType, tokenText)); + } + + /** Match '.' in tree parser has special meaning. Skip node or + * entire tree if node has children. If children, scan until + * corresponding UP node. + */ + public void matchAny(IntStream ignore) { // ignore stream, copy of input + state.errorRecovery = false; + state.failed = false; + Object look = input.LT(1); + if ( input.getTreeAdaptor().getChildCount(look)==0 ) { + input.consume(); // not subtree, consume 1 node and return + return; + } + // current node is a subtree, skip to corresponding UP. + // must count nesting level to get right UP + int level=0; + int tokenType = input.getTreeAdaptor().getType(look); + while ( tokenType!=Token.EOF && !(tokenType==UP && level==0) ) { + input.consume(); + look = input.LT(1); + tokenType = input.getTreeAdaptor().getType(look); + if ( tokenType == DOWN ) { + level++; + } + else if ( tokenType == UP ) { + level--; + } + } + input.consume(); // consume UP + } + + /** We have DOWN/UP nodes in the stream that have no line info; override. + * plus we want to alter the exception type. Don't try to recover + * from tree parser errors inline... + */ + protected Object recoverFromMismatchedToken(IntStream input, + int ttype, + BitSet follow) + throws RecognitionException + { + //throw new MismatchedTreeNodeException(ttype, (TreeNodeStream)input); + return null; + } + + /** Prefix error message with the grammar name because message is + * always intended for the programmer because the parser built + * the input tree not the user. + */ + public String getErrorHeader(RecognitionException e) { + return getGrammarFileName()+": node from "+ + (e.approximateLineInfo?"after ":"")+"line "+e.line+":"+e.charPositionInLine; + } + + /** Tree parsers parse nodes they usually have a token object as + * payload. Set the exception token and do the default behavior. + */ + public String getErrorMessage(RecognitionException e, String[] tokenNames) { + if ( this instanceof TreeParser ) { + TreeAdaptor adaptor = ((TreeNodeStream)e.input).getTreeAdaptor(); + e.token = adaptor.getToken(e.node); + if ( e.token==null ) { // could be an UP/DOWN node + e.token = new CommonToken(adaptor.getType(e.node), + adaptor.getText(e.node)); + } + } + return super.getErrorMessage(e); + } + + /** Check if current node in input has a context. Context means sequence + * of nodes towards root of tree. For example, you might say context + * is "MULT" which means my parent must be MULT. "CLASS VARDEF" says + * current node must be child of a VARDEF and whose parent is a CLASS node. + * You can use "..." to mean zero-or-more nodes. "METHOD ... VARDEF" + * means my parent is VARDEF and somewhere above that is a METHOD node. + * The first node in the context is not necessarily the root. The context + * matcher stops matching and returns true when it runs out of context. + * There is no way to force the first node to be the root. + */ + public boolean inContext(String context) { + return inContext(input.getTreeAdaptor(), getTokenNames(), input.LT(1), context); + } + + /** The worker for inContext. It's static and full of parameters for + * testing purposes. + */ + public static boolean inContext(TreeAdaptor adaptor, + String[] tokenNames, + Object t, + String context) + { + Matcher dotdotMatcher = dotdotPattern.matcher(context); + Matcher doubleEtcMatcher = doubleEtcPattern.matcher(context); + if ( dotdotMatcher.find() ) { // don't allow "..", must be "..." + throw new IllegalArgumentException("invalid syntax: .."); + } + if ( doubleEtcMatcher.find() ) { // don't allow double "..." + throw new IllegalArgumentException("invalid syntax: ... ..."); + } + context = context.replaceAll("\\.\\.\\.", " ... "); // ensure spaces around ... + context = context.trim(); + String[] nodes = context.split("\\s+"); + int ni = nodes.length-1; + t = adaptor.getParent(t); + while ( ni>=0 && t!=null ) { + if ( nodes[ni].equals("...") ) { + // walk upwards until we see nodes[ni-1] then continue walking + if ( ni==0 ) return true; // ... at start is no-op + String goal = nodes[ni-1]; + Object ancestor = getAncestor(adaptor, tokenNames, t, goal); + if ( ancestor==null ) return false; + t = ancestor; + ni--; + } + String name = tokenNames[adaptor.getType(t)]; + if ( !name.equals(nodes[ni]) ) { + //System.err.println("not matched: "+nodes[ni]+" at "+t); + return false; + } + // advance to parent and to previous element in context node list + ni--; + t = adaptor.getParent(t); + } + + if ( t==null && ni>=0 ) return false; // at root but more nodes to match + return true; + } + + /** Helper for static inContext */ + protected static Object getAncestor(TreeAdaptor adaptor, String[] tokenNames, Object t, String goal) { + while ( t!=null ) { + String name = tokenNames[adaptor.getType(t)]; + if ( name.equals(goal) ) return t; + t = adaptor.getParent(t); + } + return null; + } + + public void traceIn(String ruleName, int ruleIndex) { + super.traceIn(ruleName, ruleIndex, input.LT(1)); + } + + public void traceOut(String ruleName, int ruleIndex) { + super.traceOut(ruleName, ruleIndex, input.LT(1)); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/tree/TreeVisitor.java b/runtime/Java/src/org/antlr/v4/runtime/tree/TreeVisitor.java new file mode 100644 index 000000000..798dcec2e --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/tree/TreeVisitor.java @@ -0,0 +1,69 @@ +/* + [The "BSD license"] + Copyright (c) 2005-2009 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.antlr.v4.runtime.tree; + +/** Do a depth first walk of a tree, applying pre() and post() actions + * as we discover and finish nodes. + */ +public class TreeVisitor { + protected TreeAdaptor adaptor; + + public TreeVisitor(TreeAdaptor adaptor) { + this.adaptor = adaptor; + } + public TreeVisitor() { this(new CommonTreeAdaptor()); } + + /** Visit every node in tree t and trigger an action for each node + * before/after having visited all of its children. + * Execute both actions even if t has no children. + * If a child visit yields a new child, it can update its + * parent's child list or just return the new child. The + * child update code works even if the child visit alters its parent + * and returns the new tree. + * + * Return result of applying post action to this node. + */ + public Object visit(Object t, TreeVisitorAction action) { + // System.out.println("visit "+((Tree)t).toStringTree()); + boolean isNil = adaptor.isNil(t); + if ( action!=null && !isNil ) { + t = action.pre(t); // if rewritten, walk children of new t + } + for (int i=0; i