reorg'd lexer
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9226]
This commit is contained in:
parent
95bc423669
commit
2d43a22e27
|
@ -49,7 +49,8 @@ public interface ANTLRErrorListener {
|
|||
* What parser got the error. From this object, you
|
||||
* can access the context as well as the input stream.
|
||||
* @param offendingToken
|
||||
* The offending token in the input token stream.
|
||||
* The offending token in the input token stream, unless recognizer
|
||||
* is a lexer (then it's null)
|
||||
* If no viable alternative error, e has token
|
||||
* at which we started production for the decision.
|
||||
* @param line
|
||||
|
@ -65,8 +66,8 @@ public interface ANTLRErrorListener {
|
|||
* the parser was able to recover in line without exiting the
|
||||
* surrounding rule.
|
||||
*/
|
||||
public void error(BaseRecognizer recognizer,
|
||||
Token offendingToken,
|
||||
public void error(Recognizer recognizer,
|
||||
@Nullable Token offendingToken,
|
||||
int line,
|
||||
int charPositionInLine,
|
||||
String msg,
|
||||
|
|
|
@ -150,7 +150,7 @@ public class ANTLRStringStream implements CharStream {
|
|||
public int mark() {
|
||||
if ( markers==null ) {
|
||||
markers = new ArrayList();
|
||||
markers.add(null); // depth 0 means no backtracking, leave blank
|
||||
markers.add(null); // depth 0
|
||||
}
|
||||
markDepth++;
|
||||
CharStreamState state = null;
|
||||
|
@ -203,9 +203,12 @@ public class ANTLRStringStream implements CharStream {
|
|||
}
|
||||
|
||||
public String substring(int start, int stop) {
|
||||
if ( stop >= n ) stop = n-1;
|
||||
int count = stop - start + 1;
|
||||
if ( count > n ) count = n;
|
||||
if ( start >= n ) return "";
|
||||
// System.err.println("data: "+Arrays.toString(data)+", n="+n+
|
||||
// ", start="+start+
|
||||
// ", stop="+stop);
|
||||
return new String(data, start, count);
|
||||
}
|
||||
|
||||
|
|
|
@ -141,9 +141,6 @@ public abstract class BaseRecognizer extends Recognizer<ParserATNSimulator> {
|
|||
return syntaxErrors;
|
||||
}
|
||||
|
||||
public abstract IntStream getInputStream();
|
||||
public abstract void setInputStream(IntStream input);
|
||||
|
||||
/** Match needs to return the current input symbol, which gets put
|
||||
* into the label for the associated token ref; e.g., x=ID. Token
|
||||
* and tree parsers need to return different objects. Rather than test
|
||||
|
|
|
@ -38,7 +38,7 @@ public class FailedPredicateException extends RecognitionException {
|
|||
public String predicateText;
|
||||
|
||||
public FailedPredicateException(BaseRecognizer recognizer, String predText) {
|
||||
super(recognizer);
|
||||
super(recognizer, recognizer.getInputStream(), recognizer._ctx);
|
||||
this.predicateText = predText;
|
||||
}
|
||||
|
||||
|
|
|
@ -130,13 +130,18 @@ public abstract class Lexer extends Recognizer<LexerATNSimulator>
|
|||
// System.out.println("nextToken line "+tokenStartLine+" at "+((char)input.LA(1))+
|
||||
// " in mode "+mode+
|
||||
// " at index "+input.index());
|
||||
int ttype = _interp.match(input, mode);
|
||||
// System.out.println("accepted ttype "+ttype);
|
||||
|
||||
int ttype;
|
||||
try {
|
||||
ttype = _interp.match(input, mode);
|
||||
}
|
||||
catch (LexerNoViableAltException e) {
|
||||
notifyListeners(e); // report error
|
||||
recover();
|
||||
ttype = SKIP;
|
||||
}
|
||||
if ( input.LA(1)==CharStream.EOF ) {
|
||||
hitEOF = true;
|
||||
}
|
||||
|
||||
if ( type == Token.INVALID_TYPE ) type = ttype;
|
||||
if ( type==SKIP ) {
|
||||
continue outer;
|
||||
|
@ -180,16 +185,18 @@ public abstract class Lexer extends Recognizer<LexerATNSimulator>
|
|||
}
|
||||
|
||||
/** Set the char stream and reset the lexer */
|
||||
public void setCharStream(CharStream input) {
|
||||
@Override
|
||||
public void setInputStream(IntStream input) {
|
||||
this.input = null;
|
||||
reset();
|
||||
this.input = input;
|
||||
this.input = (CharStream)input;
|
||||
}
|
||||
|
||||
public String getSourceName() {
|
||||
return input.getSourceName();
|
||||
}
|
||||
|
||||
@Override
|
||||
public CharStream getInputStream() {
|
||||
return input;
|
||||
}
|
||||
|
@ -260,14 +267,15 @@ public abstract class Lexer extends Recognizer<LexerATNSimulator>
|
|||
if ( text!=null ) {
|
||||
return text;
|
||||
}
|
||||
return ((CharStream)input).substring(tokenStartCharIndex,getCharIndex()-1);
|
||||
return _interp.getText();
|
||||
// return ((CharStream)input).substring(tokenStartCharIndex,getCharIndex()-1);
|
||||
}
|
||||
|
||||
/** Set the complete text of this token; it wipes any previous
|
||||
* changes to the text.
|
||||
*/
|
||||
public void setText(String text) {
|
||||
text = text;
|
||||
this.text = text;
|
||||
}
|
||||
|
||||
public void reportError(RecognitionException e) {
|
||||
|
@ -293,45 +301,23 @@ public abstract class Lexer extends Recognizer<LexerATNSimulator>
|
|||
return null;
|
||||
}
|
||||
|
||||
/*
|
||||
public String getErrorMessage(RecognitionException e) {
|
||||
String msg = null;
|
||||
if ( e instanceof MismatchedTokenException ) {
|
||||
MismatchedTokenException mte = (MismatchedTokenException)e;
|
||||
msg = "mismatched character "+getCharErrorDisplay(e.c)+" expecting "+
|
||||
getCharErrorDisplay(mte.expecting.getSingleElement());
|
||||
}
|
||||
else if ( e instanceof NoViableAltException ) {
|
||||
NoViableAltException nvae = (NoViableAltException)e;
|
||||
// for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
|
||||
// and "(decision="+nvae.decisionNumber+") and
|
||||
// "state "+nvae.stateNumber
|
||||
msg = "no viable alternative at character "+getCharErrorDisplay(e.c);
|
||||
}
|
||||
else if ( e instanceof EarlyExitException ) {
|
||||
EarlyExitException eee = (EarlyExitException)e;
|
||||
// for development, can add "(decision="+eee.decisionNumber+")"
|
||||
msg = "required (...)+ loop did not match anything at character "+getCharErrorDisplay(e.c);
|
||||
}
|
||||
else if ( e instanceof MismatchedNotSetException ) {
|
||||
MismatchedNotSetException mse = (MismatchedNotSetException)e;
|
||||
msg = "mismatched character "+getCharErrorDisplay(e.c)+" expecting set "+mse.expecting;
|
||||
}
|
||||
else if ( e instanceof MismatchedSetException ) {
|
||||
MismatchedSetException mse = (MismatchedSetException)e;
|
||||
msg = "mismatched character "+getCharErrorDisplay(e.c)+" expecting set "+mse.expecting;
|
||||
}
|
||||
else if ( e instanceof MismatchedRangeException ) {
|
||||
MismatchedRangeException mre = (MismatchedRangeException)e;
|
||||
msg = "mismatched character "+getCharErrorDisplay(e.c)+" expecting set "+
|
||||
getCharErrorDisplay(mre.a)+".."+getCharErrorDisplay(mre.b);
|
||||
}
|
||||
else {
|
||||
//msg = super.getErrorMessage(e, tokenNames);
|
||||
}
|
||||
return msg;
|
||||
public void recover() {
|
||||
_interp.consume(input); // skip a char and try again
|
||||
}
|
||||
|
||||
public void notifyListeners(LexerNoViableAltException e) {
|
||||
String msg = "token recognition error at: '"+
|
||||
input.substring(tokenStartCharIndex,input.index())+"'";
|
||||
if ( _listeners==null || _listeners.size()==0 ) {
|
||||
System.err.println("line "+tokenStartLine+":"+
|
||||
tokenStartCharPositionInLine+" "+
|
||||
msg);
|
||||
return;
|
||||
}
|
||||
for (ANTLRErrorListener pl : _listeners) {
|
||||
pl.error(this, null, tokenStartLine, tokenStartCharPositionInLine, msg, e);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
public String getCharErrorDisplay(int c) {
|
||||
String s = String.valueOf((char)c);
|
||||
|
|
|
@ -32,24 +32,23 @@ package org.antlr.v4.runtime;
|
|||
import org.antlr.v4.runtime.atn.ATNConfig;
|
||||
import org.antlr.v4.runtime.misc.OrderedHashSet;
|
||||
|
||||
public class LexerNoViableAltException extends LexerRecognitionExeption {
|
||||
/** Prediction began at what input index? */
|
||||
public class LexerNoViableAltException extends RecognitionException {
|
||||
/** Matching attempted at what input index? */
|
||||
public int startIndex;
|
||||
|
||||
/** Which configurations did we try at input.index() that couldn't match input.LT(1)? */
|
||||
/** Which configurations did we try at input.index() that couldn't match input.LA(1)? */
|
||||
public OrderedHashSet<ATNConfig> deadEndConfigs;
|
||||
|
||||
/** Used for remote debugger deserialization */
|
||||
public LexerNoViableAltException() {;}
|
||||
|
||||
public LexerNoViableAltException(Lexer lexer,
|
||||
CharStream input,
|
||||
int startIndex,
|
||||
OrderedHashSet<ATNConfig> deadEndConfigs) {
|
||||
super(lexer, input);
|
||||
super(lexer, input, null);
|
||||
this.startIndex = startIndex;
|
||||
this.deadEndConfigs = deadEndConfigs;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "NoViableAltException('"+(char)c+"')";
|
||||
return "NoViableAltException('')";
|
||||
}
|
||||
}
|
||||
|
|
|
@ -38,7 +38,7 @@ import org.antlr.v4.runtime.misc.IntervalSet;
|
|||
*/
|
||||
public class RecognitionException extends RuntimeException {
|
||||
/** Who threw the exception? */
|
||||
protected BaseRecognizer recognizer;
|
||||
protected Recognizer recognizer;
|
||||
|
||||
// TODO: make a dummy recognizer for the interpreter to use?
|
||||
// Next two (ctx,input) should be what is in recognizer, but
|
||||
|
@ -71,17 +71,17 @@ public class RecognitionException extends RuntimeException {
|
|||
public boolean approximateLineInfo;
|
||||
*/
|
||||
|
||||
public RecognitionException(BaseRecognizer recognizer) {
|
||||
this(recognizer, recognizer.getInputStream(), recognizer._ctx);
|
||||
}
|
||||
// public RecognitionException(Recognizer recognizer) {
|
||||
// this(recognizer, recognizer.getInputStream(), null);
|
||||
// }
|
||||
|
||||
public RecognitionException(BaseRecognizer recognizer, IntStream input,
|
||||
public RecognitionException(Recognizer recognizer, IntStream input,
|
||||
RuleContext ctx)
|
||||
{
|
||||
this.recognizer = recognizer;
|
||||
this.input = input;
|
||||
this.ctx = ctx;
|
||||
this.offendingState = ctx.s;
|
||||
if ( ctx!=null ) this.offendingState = ctx.s;
|
||||
}
|
||||
|
||||
/** Where was the parser in the ATN when the error occurred?
|
||||
|
@ -93,7 +93,9 @@ public class RecognitionException extends RuntimeException {
|
|||
public int getOffendingState() { return offendingState; }
|
||||
|
||||
public IntervalSet getExpectedTokens() {
|
||||
if ( recognizer!=null ) return recognizer._interp.atn.nextTokens(ctx);
|
||||
if ( recognizer!=null && recognizer instanceof BaseRecognizer) {
|
||||
return ((BaseRecognizer)recognizer)._interp.atn.nextTokens(ctx);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
@ -109,7 +111,7 @@ public class RecognitionException extends RuntimeException {
|
|||
return offendingToken;
|
||||
}
|
||||
|
||||
public BaseRecognizer getRecognizer() {
|
||||
public Recognizer getRecognizer() {
|
||||
return recognizer;
|
||||
}
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@ import org.antlr.v4.runtime.atn.ATN;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
public class Recognizer<ATNInterpreter> {
|
||||
public abstract class Recognizer<ATNInterpreter> {
|
||||
public static final int EOF=-1;
|
||||
|
||||
protected ANTLRErrorStrategy _errHandler = new DefaultANTLRErrorStrategy();
|
||||
|
@ -236,4 +236,7 @@ public class Recognizer<ATNInterpreter> {
|
|||
return new ParserRuleContext(_localctx, s);
|
||||
}
|
||||
*/
|
||||
public abstract IntStream getInputStream();
|
||||
|
||||
public abstract void setInputStream(IntStream input);
|
||||
}
|
||||
|
|
|
@ -39,9 +39,6 @@ public abstract class ATNSimulator {
|
|||
public static DFAState ERROR;
|
||||
public ATN atn;
|
||||
|
||||
protected ATNConfig prevAccept; // TODO Move down? used to avoid passing int down and back up in method calls
|
||||
protected int prevAcceptIndex = -1;
|
||||
|
||||
static {
|
||||
ERROR = new DFAState(new OrderedHashSet<ATNConfig>());
|
||||
ERROR.stateNumber = Integer.MAX_VALUE;
|
||||
|
|
|
@ -39,8 +39,50 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
public static boolean dfa_debug = false;
|
||||
public static final int NUM_EDGES = 255;
|
||||
|
||||
/** When we hit an accept state in either the DFA or the ATN, we
|
||||
* have to notify the character stream to start offering characters
|
||||
* via mark() and record the current state. The current state includes
|
||||
* the current index into the input, the current line, and current
|
||||
* character position in that line. Note that the Lexer is tracking
|
||||
* the starting line and characterization of the token. These
|
||||
* variables track the state of the simulator when it hits an accept state.
|
||||
*
|
||||
* We track these variables separately for the DFA and ATN simulation
|
||||
* because the DFA simulation often has to fail over to the ATN
|
||||
* simulation. If the ATN simulation fails, we need the DFA to fall
|
||||
* back to its previously accepted state, if any. If the ATN succeeds,
|
||||
* then the ATN does the accept and the DFA simulator that invoked it
|
||||
* can simply return thepredicted token type.
|
||||
*/
|
||||
protected static class ExecState {
|
||||
int marker = -1;
|
||||
int index = -1;
|
||||
int line = 0;
|
||||
int charPos = -1;
|
||||
}
|
||||
protected static class DFAExecState extends ExecState {
|
||||
DFAState state = null;
|
||||
}
|
||||
protected static class ATNExecState extends ExecState {
|
||||
ATNConfig config = null;
|
||||
}
|
||||
|
||||
protected Lexer recog;
|
||||
|
||||
/** In case the stream is not offering characters, we need to track
|
||||
* at minimum the text for the current token. This is what
|
||||
* getText() returns.
|
||||
*/
|
||||
protected char[] text = new char[100];
|
||||
protected int textIndex = -1;
|
||||
|
||||
/** The current token's starting index into the character stream.
|
||||
* Shared across DFA to ATN simulation in case the ATN fails and the
|
||||
* DFA did not have a previous accept state. In this case, we use the
|
||||
* ATN-generated exception object.
|
||||
*/
|
||||
protected int startIndex = -1;
|
||||
|
||||
/** line number 1..n within the input */
|
||||
protected int line = 1;
|
||||
|
||||
|
@ -50,8 +92,9 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
protected DFA[] dfa;
|
||||
protected int mode = Lexer.DEFAULT_MODE;
|
||||
|
||||
protected ATNConfig prevAccept = null;
|
||||
protected int prevAcceptIndex = -1;
|
||||
/** Used during DFA/ATN exec to record the most recent accept configuration info */
|
||||
protected DFAExecState dfaPrevAccept = new DFAExecState();
|
||||
protected ATNExecState atnPrevAccept = new ATNExecState();
|
||||
|
||||
public static int ATN_failover = 0;
|
||||
public static int match_calls = 0;
|
||||
|
@ -80,7 +123,10 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
}
|
||||
}
|
||||
|
||||
// only called from test code from outside
|
||||
public int matchATN(CharStream input) {
|
||||
textIndex = -1;
|
||||
startIndex = input.index();
|
||||
ATNState startState = atn.modeToStartState.get(mode);
|
||||
if ( debug ) System.out.println("mode "+ mode +" start: "+startState);
|
||||
OrderedHashSet<ATNConfig> s0_closure = computeStartState(input, startState);
|
||||
|
@ -91,104 +137,123 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
return predict;
|
||||
}
|
||||
|
||||
public int exec(CharStream input, DFAState s0) {
|
||||
protected int exec(CharStream input, DFAState s0) {
|
||||
if ( dfa_debug ) System.out.println("DFA[mode "+(recog==null?0:recog.mode)+"] exec LA(1)=="+
|
||||
(char)input.LA(1));
|
||||
//System.out.println("DFA start of execDFA: "+dfa[mode].toLexerString());
|
||||
int prevAcceptMarker = -1;
|
||||
int prevAcceptIndex = -1;
|
||||
int prevAcceptLine = 0;
|
||||
int prevAcceptCharPos = -1;
|
||||
DFAState prevAcceptState = null;
|
||||
textIndex = -1;
|
||||
startIndex = input.index();
|
||||
dfaPrevAccept.marker = -1;
|
||||
dfaPrevAccept.index = -1;
|
||||
dfaPrevAccept.line = 0;
|
||||
dfaPrevAccept.charPos = -1;
|
||||
dfaPrevAccept.state = null;
|
||||
LexerNoViableAltException atnException = null;
|
||||
DFAState s = s0;
|
||||
int startIndex = input.index();
|
||||
int t = input.LA(1);
|
||||
loop:
|
||||
while ( true ) {
|
||||
if ( dfa_debug ) System.out.println("state "+s.stateNumber+" LA(1)=="+(char)t);
|
||||
if ( s.isAcceptState ) {
|
||||
if ( dfa_debug ) System.out.println("accept; predict "+s.prediction+
|
||||
" in state "+s.stateNumber);
|
||||
prevAcceptState = s;
|
||||
prevAcceptMarker = input.mark();
|
||||
prevAcceptIndex = input.index();
|
||||
prevAcceptLine = line;
|
||||
prevAcceptCharPos = charPositionInLine;
|
||||
// keep going unless we're at EOF; check if something else could match
|
||||
if ( t==CharStream.EOF ) break;
|
||||
}
|
||||
// if no edge, pop over to ATN interpreter, update DFA and return
|
||||
if ( s.edges == null || t >= s.edges.length || t <= CharStream.EOF ||
|
||||
s.edges[t] == null )
|
||||
{
|
||||
if ( dfa_debug ) System.out.println("no edge for "+(char)t);
|
||||
int ttype = CharStream.INVALID_CHAR;
|
||||
if ( dfa_debug ) {
|
||||
System.out.println("ATN exec upon "+
|
||||
input.substring(startIndex,input.index())+
|
||||
" at DFA state "+s.stateNumber+" = "+s.configs);
|
||||
}
|
||||
int ttype = Token.INVALID_TYPE;
|
||||
try {
|
||||
if ( dfa_debug ) {
|
||||
System.out.println("ATN exec upon "+
|
||||
input.substring(startIndex,input.index())+
|
||||
" at DFA state "+s.stateNumber+" = "+s.configs);
|
||||
}
|
||||
ATN_failover++;
|
||||
ttype = exec(input, s.configs);
|
||||
if ( dfa_debug ) {
|
||||
System.out.println("back from DFA update, ttype="+ttype+
|
||||
", dfa[mode "+mode+"]=\n"+
|
||||
dfa[mode].toLexerString());
|
||||
}
|
||||
}
|
||||
catch (LexerNoViableAltException nvae) {
|
||||
addDFAEdge(s, t, ERROR);
|
||||
}
|
||||
if ( dfa_debug ) {
|
||||
System.out.println("back from DFA update, ttype="+ttype+
|
||||
", dfa[mode "+mode+"]=\n"+dfa[mode].toLexerString());
|
||||
// The ATN could not match anything starting from s.configs
|
||||
// so we had an error edge. Re-throw the exception
|
||||
// if there was no previous accept state here in DFA.
|
||||
atnException = nvae;
|
||||
//if ( t!=CharStream.EOF ) addDFAEdge(s, t, ERROR); TODO added by ATN already?
|
||||
// if ( dfaPrevAccept.state==null ) {
|
||||
// throw nvae;
|
||||
// }
|
||||
break loop; // dead end; no where to go, fall back on prev
|
||||
}
|
||||
|
||||
if ( ttype==CharStream.INVALID_CHAR ) {
|
||||
if ( t != CharStream.EOF ) addDFAEdge(s, t, ERROR);
|
||||
break loop; // dead end; no where to go, fall back on prev if any
|
||||
}
|
||||
// action already executed
|
||||
return ttype; // we've updated DFA, exec'd action, and have our deepest answer
|
||||
// if ( ttype==Token.INVALID_TYPE ) {
|
||||
// //if ( t != CharStream.EOF ) addDFAEdge(s, t, ERROR);
|
||||
// break loop; // dead end; no where to go, fall back on prev if any
|
||||
// }
|
||||
// action already executed by ATN
|
||||
// we've updated DFA, exec'd action, and have our deepest answer
|
||||
return ttype;
|
||||
}
|
||||
DFAState target = s.edges[t];
|
||||
if ( target == ERROR ) break;
|
||||
s = target;
|
||||
|
||||
if ( s.isAcceptState ) {
|
||||
if ( dfa_debug ) System.out.println("accept; predict "+s.prediction+
|
||||
" in state "+s.stateNumber);
|
||||
dfaPrevAccept.state = s;
|
||||
dfaPrevAccept.marker = input.mark();
|
||||
dfaPrevAccept.index = input.index();
|
||||
dfaPrevAccept.line = line;
|
||||
dfaPrevAccept.charPos = charPositionInLine;
|
||||
// keep going unless we're at EOF; check if something else could match
|
||||
// EOF never in DFA
|
||||
if ( t==CharStream.EOF ) break;
|
||||
}
|
||||
|
||||
consume(input);
|
||||
t = input.LA(1);
|
||||
}
|
||||
if ( prevAcceptState==null ) {
|
||||
if ( dfaPrevAccept.state==null ) {
|
||||
if ( t==CharStream.EOF ) {
|
||||
return Token.EOF;
|
||||
}
|
||||
if ( debug ) System.out.println("!!! no viable alt in dfa");
|
||||
throw new LexerNoViableAltException(recog, input, s.configs); // TODO: closure is empty
|
||||
if ( atnException!=null ) throw atnException;
|
||||
throw new LexerNoViableAltException(recog, input, startIndex, s.configs);
|
||||
}
|
||||
if ( recog!=null ) {
|
||||
int actionIndex = atn.ruleToActionIndex[prevAcceptState.ruleIndex];
|
||||
int actionIndex = atn.ruleToActionIndex[dfaPrevAccept.state.ruleIndex];
|
||||
if ( dfa_debug ) {
|
||||
System.out.println("ACTION "+
|
||||
recog.getRuleNames()[prevAcceptState.ruleIndex]+
|
||||
recog.getRuleNames()[dfaPrevAccept.state.ruleIndex]+
|
||||
":"+ actionIndex);
|
||||
}
|
||||
if ( actionIndex>=0 ) recog.action(null, prevAcceptState.ruleIndex, actionIndex);
|
||||
if ( actionIndex>=0 ) recog.action(null, dfaPrevAccept.state.ruleIndex, actionIndex);
|
||||
}
|
||||
input.release(prevAcceptMarker);
|
||||
input.seek(prevAcceptIndex);
|
||||
line = prevAcceptLine;
|
||||
charPositionInLine = prevAcceptCharPos;
|
||||
return prevAcceptState.prediction;
|
||||
|
||||
// seek to after last char in token
|
||||
input.release(dfaPrevAccept.marker);
|
||||
input.seek(dfaPrevAccept.index);
|
||||
line = dfaPrevAccept.line;
|
||||
charPositionInLine = dfaPrevAccept.charPos;
|
||||
consume(input);
|
||||
return dfaPrevAccept.state.prediction;
|
||||
}
|
||||
|
||||
public int exec(CharStream input, OrderedHashSet<ATNConfig> s0) {
|
||||
protected int exec(CharStream input, OrderedHashSet<ATNConfig> s0) {
|
||||
//System.out.println("enter exec index "+input.index()+" from "+s0);
|
||||
OrderedHashSet<ATNConfig> closure = new OrderedHashSet<ATNConfig>();
|
||||
closure.addAll(s0);
|
||||
if ( debug ) System.out.println("start state closure="+closure);
|
||||
|
||||
prevAccept = null;
|
||||
prevAcceptIndex = -1;
|
||||
|
||||
OrderedHashSet<ATNConfig> reach = new OrderedHashSet<ATNConfig>();
|
||||
atnPrevAccept.marker = -1;
|
||||
atnPrevAccept.index = -1;
|
||||
atnPrevAccept.line = 0;
|
||||
atnPrevAccept.charPos = -1;
|
||||
atnPrevAccept.config = null;
|
||||
|
||||
int t = input.LA(1);
|
||||
// if ( t==Token.EOF ) return Token.EOF;
|
||||
|
||||
do { // while more work
|
||||
if ( debug ) System.out.println("in reach starting closure: " + closure);
|
||||
|
@ -225,19 +290,20 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
if ( debug ) {
|
||||
System.out.println("in reach we hit accept state "+c+" index "+
|
||||
input.index()+", reach="+reach+
|
||||
", prevAccept="+prevAccept+", prevIndex="+prevAcceptIndex);
|
||||
", prevAccept="+atnPrevAccept.config+
|
||||
", prevIndex="+atnPrevAccept.index);
|
||||
}
|
||||
int index = input.index();
|
||||
if ( index > prevAcceptIndex ) {
|
||||
if ( index > atnPrevAccept.index ) {
|
||||
// will favor prev accept at same index so "int" is keyword not ID
|
||||
prevAccept = c;
|
||||
// if ( t == CharStream.EOF ) {
|
||||
// // later we seek to prevAcceptIndex+1, undo that effect for EOF
|
||||
// index--;
|
||||
// }
|
||||
prevAcceptIndex = index;
|
||||
atnPrevAccept.config = c;
|
||||
atnPrevAccept.index = index;
|
||||
atnPrevAccept.marker = input.mark();
|
||||
atnPrevAccept.line = line;
|
||||
atnPrevAccept.charPos = charPositionInLine;
|
||||
if ( debug ) {
|
||||
System.out.println("mark "+c+" @ index="+index);
|
||||
System.out.println("mark "+c+" @ index="+index+", "+
|
||||
atnPrevAccept.line+":"+atnPrevAccept.charPos);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -263,26 +329,40 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
reach.clear();
|
||||
} while ( true );
|
||||
|
||||
if ( prevAccept==null ) {
|
||||
if ( t==CharStream.EOF ) {
|
||||
|
||||
if ( atnPrevAccept.config==null ) {
|
||||
// if no accept and EOF is first char, return EOF
|
||||
if ( t==CharStream.EOF && input.index()==startIndex ) {
|
||||
return Token.EOF;
|
||||
}
|
||||
// System.out.println("no viable token at input "+getTokenName(input.LA(1))+", index "+input.index());
|
||||
throw new LexerNoViableAltException(recog, input, closure); // TODO: closure is empty
|
||||
// TODO: closure is empty?
|
||||
throw new LexerNoViableAltException(recog, input, startIndex, closure);
|
||||
}
|
||||
|
||||
if ( debug ) System.out.println("ACCEPT " + prevAccept.toString(recog, true) + " index " + prevAcceptIndex);
|
||||
|
||||
input.seek(prevAcceptIndex+1); // seek to after last char in token
|
||||
int ruleIndex = prevAccept.state.ruleIndex;
|
||||
int ttype = atn.ruleToTokenType[ruleIndex];
|
||||
if ( debug ) {
|
||||
if ( recog!=null ) System.out.println("ACTION "+recog.getRuleNames()[ruleIndex]+":"+ruleIndex);
|
||||
else System.out.println("ACTION "+ruleIndex+":"+ruleIndex);
|
||||
System.out.println("ACCEPT " +atnPrevAccept.config.toString(recog, true) +
|
||||
" index " +atnPrevAccept.index);
|
||||
}
|
||||
int actionIndex = atn.ruleToActionIndex[ruleIndex];
|
||||
if ( actionIndex>=0 && recog!=null ) recog.action(null, ruleIndex, actionIndex);
|
||||
return ttype;
|
||||
|
||||
int ruleIndex = atnPrevAccept.config.state.ruleIndex;
|
||||
if ( recog!=null ) {
|
||||
if ( debug ) {
|
||||
if ( recog!=null ) System.out.println("ACTION "+
|
||||
recog.getRuleNames()[ruleIndex]+
|
||||
":"+ruleIndex);
|
||||
else System.out.println("ACTION "+ruleIndex+":"+ruleIndex);
|
||||
}
|
||||
int actionIndex = atn.ruleToActionIndex[ruleIndex];
|
||||
if ( actionIndex>=0 && recog!=null ) recog.action(null, ruleIndex, actionIndex);
|
||||
}
|
||||
|
||||
// seek to after last char in token
|
||||
input.release(atnPrevAccept.marker);
|
||||
input.seek(atnPrevAccept.index);
|
||||
line = atnPrevAccept.line;
|
||||
charPositionInLine = atnPrevAccept.charPos;
|
||||
consume(input);
|
||||
return atn.ruleToTokenType[ruleIndex];
|
||||
}
|
||||
|
||||
public ATNState getReachableTarget(Transition trans, int t) {
|
||||
|
@ -492,6 +572,12 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
return dfa[mode];
|
||||
}
|
||||
|
||||
/** Get the text of the current token */
|
||||
public String getText() {
|
||||
if ( textIndex<0 ) return "";
|
||||
return new String(text, 0, textIndex+1);
|
||||
}
|
||||
|
||||
public int getLine() {
|
||||
return line;
|
||||
}
|
||||
|
@ -502,13 +588,16 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
|
||||
public void consume(CharStream input) {
|
||||
int curChar = input.LA(1);
|
||||
//System.out.println("prev p="+p+", c="+(char)data[p]);
|
||||
if ( curChar!=CharStream.EOF ) {
|
||||
if ( (textIndex+1)>=text.length ) {
|
||||
char[] txt = new char[text.length*2];
|
||||
System.arraycopy(text, 0, txt, 0, text.length);
|
||||
text = txt;
|
||||
}
|
||||
text[++textIndex] = (char)curChar;
|
||||
}
|
||||
charPositionInLine++;
|
||||
if ( curChar=='\n' ) {
|
||||
/*
|
||||
System.out.println("newline char found on line: "+line+
|
||||
"@ pos="+charPositionInLine);
|
||||
*/
|
||||
line++;
|
||||
charPositionInLine=0;
|
||||
}
|
||||
|
|
|
@ -64,6 +64,8 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
* The full stack at any moment is [config.outerContext + config.context].
|
||||
*/
|
||||
protected RuleContext outerContext;
|
||||
protected ATNConfig prevAccept; // TODO Move down? used to avoid passing int down and back up in method calls
|
||||
protected int prevAcceptIndex = -1;
|
||||
|
||||
public ParserATNSimulator(ATN atn) {
|
||||
super(atn);
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
lexer grammar L;
|
||||
DONE : EOF ;
|
||||
A : 'a';
|
||||
I : '0'..'9'+ {System.out.println("I");} ;
|
||||
WS : (' '|'\n') {skip();} ;
|
||||
|
|
|
@ -84,7 +84,7 @@ class TestJava {
|
|||
if ( lexer==null ) {
|
||||
lexer = new JavaLexer(null);
|
||||
}
|
||||
lexer.setCharStream(new ANTLRFileStream(f));
|
||||
lexer.setInputStream(new ANTLRFileStream(f));
|
||||
|
||||
CommonTokenStream tokens = new CommonTokenStream(lexer);
|
||||
long start = System.currentTimeMillis();
|
||||
|
|
|
@ -3,7 +3,7 @@ import org.antlr.v4.runtime.*;
|
|||
public class TestL {
|
||||
public static void main(String[] args) throws Exception {
|
||||
CharStream input = new ANTLRFileStream(args[0]);
|
||||
input = new ANTLRStringStream("");
|
||||
input = new ANTLRStringStream("3 3");
|
||||
L lexer = new L(input);
|
||||
CommonTokenStream tokens = new CommonTokenStream(lexer);
|
||||
tokens.fill();
|
||||
|
|
|
@ -82,7 +82,7 @@ class TestYang {
|
|||
if ( lexer==null ) {
|
||||
lexer = new YangJavaLexer(null);
|
||||
}
|
||||
lexer.setCharStream(new ANTLRFileStream(f));
|
||||
lexer.setInputStream(new ANTLRFileStream(f));
|
||||
|
||||
CommonTokenStream tokens = new CommonTokenStream(lexer);
|
||||
long start = System.currentTimeMillis();
|
||||
|
|
|
@ -801,8 +801,8 @@ if ( options!=null ) {
|
|||
//
|
||||
ruleref
|
||||
: RULE_REF ARG_ACTION?
|
||||
( (op=ROOT|op=BANG) -> ^($op ^(RULE_REF ARG_ACTION<ActionAST>?))
|
||||
| -> ^(RULE_REF ARG_ACTION<ActionAST>?)
|
||||
( (op=ROOT|op=BANG) -> ^($op ^(RULE_REF<RuleRefAST> ARG_ACTION<ActionAST>?))
|
||||
| -> ^(RULE_REF<RuleRefAST> ARG_ACTION<ActionAST>?)
|
||||
)
|
||||
;
|
||||
catch [RecognitionException re] { throw re; } // pass upwards to element
|
||||
|
|
|
@ -29,21 +29,14 @@
|
|||
|
||||
package org.antlr.v4.tool.ast;
|
||||
|
||||
import org.antlr.runtime.CharStream;
|
||||
import org.antlr.runtime.CommonToken;
|
||||
import org.antlr.runtime.Token;
|
||||
import org.antlr.runtime.tree.CommonTree;
|
||||
import org.antlr.runtime.tree.CommonTreeNodeStream;
|
||||
import org.antlr.runtime.tree.Tree;
|
||||
import org.antlr.v4.parse.ANTLRParser;
|
||||
import org.antlr.v4.parse.GrammarASTAdaptor;
|
||||
import org.antlr.runtime.*;
|
||||
import org.antlr.runtime.tree.*;
|
||||
import org.antlr.v4.parse.*;
|
||||
import org.antlr.v4.runtime.atn.ATNState;
|
||||
import org.antlr.v4.runtime.misc.IntervalSet;
|
||||
import org.antlr.v4.tool.Grammar;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
public class GrammarAST extends CommonTree {
|
||||
/** For error msgs, nice to know which grammar this AST lives in */
|
||||
|
@ -57,7 +50,12 @@ public class GrammarAST extends CommonTree {
|
|||
|
||||
public GrammarAST() {;}
|
||||
public GrammarAST(Token t) { super(t); }
|
||||
public GrammarAST(GrammarAST node) { super(node); }
|
||||
public GrammarAST(GrammarAST node) {
|
||||
super(node);
|
||||
this.g = node.g;
|
||||
this.atnState = node.atnState;
|
||||
this.textOverride = node.textOverride;
|
||||
}
|
||||
public GrammarAST(int type) { super(new CommonToken(type, ANTLRParser.tokenNames[type])); }
|
||||
public GrammarAST(int type, Token t) {
|
||||
this(new CommonToken(type, t.getText()));
|
||||
|
|
|
@ -14,23 +14,28 @@ package org.antlr.v4.tool.ast;
|
|||
* t.visit(v);
|
||||
*/
|
||||
public interface GrammarASTVisitor {
|
||||
Object visit(RuleAST node);
|
||||
Object visit(AltAST node);
|
||||
Object visit(DownAST node);
|
||||
|
||||
/** This is the generic visitor method that will be invoked
|
||||
* for any other kind of AST node not covered by the other visit methods.
|
||||
*/
|
||||
Object visit(GrammarAST node);
|
||||
|
||||
Object visit(GrammarRootAST node);
|
||||
Object visit(NotAST node);
|
||||
Object visit(RuleAST node);
|
||||
|
||||
Object visit(BlockAST node);
|
||||
Object visit(OptionalBlockAST node);
|
||||
Object visit(PlusBlockAST node);
|
||||
Object visit(StarBlockAST node);
|
||||
|
||||
Object visit(AltAST node);
|
||||
|
||||
Object visit(NotAST node);
|
||||
Object visit(PredAST node);
|
||||
Object visit(RangeAST node);
|
||||
Object visit(SetAST node);
|
||||
Object visit(StarBlockAST node);
|
||||
Object visit(RuleRefAST node);
|
||||
Object visit(TerminalAST node);
|
||||
Object visit(TreePatternAST node);
|
||||
Object visit(UpAST node);
|
||||
Object visit(DownAST node);
|
||||
}
|
||||
|
|
|
@ -1,7 +1,16 @@
|
|||
package org.antlr.v4.tool.ast;
|
||||
|
||||
import org.antlr.runtime.Token;
|
||||
import org.antlr.runtime.tree.Tree;
|
||||
|
||||
public class OptionalBlockAST extends GrammarAST implements RuleElementAST {
|
||||
public OptionalBlockAST(GrammarAST node) { super(node); }
|
||||
public OptionalBlockAST(int type, Token t) { super(type, t); }
|
||||
|
||||
@Override
|
||||
public Tree dupNode() { return new OptionalBlockAST(this); }
|
||||
|
||||
@Override
|
||||
public Object visit(GrammarASTVisitor v) { return v.visit(this); }
|
||||
|
||||
}
|
||||
|
|
|
@ -1,10 +1,15 @@
|
|||
package org.antlr.v4.tool.ast;
|
||||
|
||||
import org.antlr.runtime.Token;
|
||||
import org.antlr.runtime.tree.Tree;
|
||||
|
||||
public class PlusBlockAST extends GrammarAST implements RuleElementAST {
|
||||
public PlusBlockAST(GrammarAST node) { super(node); }
|
||||
public PlusBlockAST(int type, Token t) { super(type, t); }
|
||||
|
||||
@Override
|
||||
public Tree dupNode() { return new PlusBlockAST(this); }
|
||||
|
||||
@Override
|
||||
public Object visit(GrammarASTVisitor v) { return v.visit(this); }
|
||||
}
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
[The "BSD license"]
|
||||
Copyright (c) 2011 Terence Parr
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. The name of the author may not be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
package org.antlr.v4.tool.ast;
|
||||
|
||||
import org.antlr.runtime.Token;
|
||||
import org.antlr.runtime.tree.Tree;
|
||||
|
||||
public class RuleRefAST extends GrammarASTWithOptions implements RuleElementAST {
|
||||
public RuleRefAST(GrammarAST node) {
|
||||
super(node);
|
||||
}
|
||||
|
||||
public RuleRefAST(Token t) { super(t); }
|
||||
public RuleRefAST(int type) { super(type); }
|
||||
public RuleRefAST(int type, Token t) { super(type, t); }
|
||||
|
||||
@Override
|
||||
public Tree dupNode() { return new TerminalAST(this); }
|
||||
|
||||
@Override
|
||||
public Object visit(GrammarASTVisitor v) { return v.visit(this); }
|
||||
}
|
|
@ -1,10 +1,15 @@
|
|||
package org.antlr.v4.tool.ast;
|
||||
|
||||
import org.antlr.runtime.Token;
|
||||
import org.antlr.runtime.tree.Tree;
|
||||
|
||||
public class StarBlockAST extends GrammarAST implements RuleElementAST {
|
||||
public StarBlockAST(GrammarAST node) { super(node); }
|
||||
public StarBlockAST(int type, Token t) { super(type, t); }
|
||||
|
||||
@Override
|
||||
public Tree dupNode() { return new StarBlockAST(this); }
|
||||
|
||||
@Override
|
||||
public Object visit(GrammarASTVisitor v) { return v.visit(this); }
|
||||
}
|
||||
|
|
|
@ -141,7 +141,7 @@ public abstract class BaseTest {
|
|||
return expectingTokenTypes;
|
||||
}
|
||||
|
||||
public List<Integer> getTokenTypes(String input, LexerATNSimulator lexerATN) {
|
||||
public List<Integer> getTokenTypesViaATN(String input, LexerATNSimulator lexerATN) {
|
||||
ANTLRStringStream in = new ANTLRStringStream(input);
|
||||
List<Integer> tokenTypes = new ArrayList<Integer>();
|
||||
int ttype = 0;
|
||||
|
|
|
@ -267,7 +267,7 @@ public class TestATNInterpreter extends BaseTest {
|
|||
{
|
||||
ATN lexatn = createATN(lg);
|
||||
LexerATNSimulator lexInterp = new LexerATNSimulator(lexatn);
|
||||
List<Integer> types = getTokenTypes(inputString, lexInterp);
|
||||
List<Integer> types = getTokenTypesViaATN(inputString, lexInterp);
|
||||
System.out.println(types);
|
||||
|
||||
semanticProcess(lg);
|
||||
|
|
|
@ -287,7 +287,7 @@ public class TestATNParserPrediction extends BaseTest {
|
|||
|
||||
ParserATNSimulator interp = new ParserATNSimulator(atn);
|
||||
interp.setContextSensitive(true);
|
||||
List<Integer> types = getTokenTypes("ab", lexInterp);
|
||||
List<Integer> types = getTokenTypesViaATN("ab", lexInterp);
|
||||
System.out.println(types);
|
||||
TokenStream input = new IntTokenStream(types);
|
||||
int alt = interp.adaptivePredict(input, 0, b_e_ctx);
|
||||
|
@ -331,7 +331,7 @@ public class TestATNParserPrediction extends BaseTest {
|
|||
"s2-EOF->:s3@{[10]=2, [6]=1}\n";
|
||||
assertEquals(expecting, dfa.toString(g.getTokenDisplayNames()));
|
||||
|
||||
types = getTokenTypes("b", lexInterp);
|
||||
types = getTokenTypesViaATN("b", lexInterp);
|
||||
System.out.println(types);
|
||||
input = new IntTokenStream(types);
|
||||
alt = interp.adaptivePredict(input, 0, null); // ctx irrelevant
|
||||
|
@ -343,7 +343,7 @@ public class TestATNParserPrediction extends BaseTest {
|
|||
"s2-EOF->:s3@{[10]=2, [6]=1}\n";
|
||||
assertEquals(expecting, dfa.toString(g.getTokenDisplayNames()));
|
||||
|
||||
types = getTokenTypes("aab", lexInterp);
|
||||
types = getTokenTypesViaATN("aab", lexInterp);
|
||||
System.out.println(types);
|
||||
input = new IntTokenStream(types);
|
||||
alt = interp.adaptivePredict(input, 0, null);
|
||||
|
@ -488,7 +488,7 @@ public class TestATNParserPrediction extends BaseTest {
|
|||
Tool.internalOption_ShowATNConfigsInDFA = true;
|
||||
ATN lexatn = createATN(lg);
|
||||
LexerATNSimulator lexInterp = new LexerATNSimulator(lexatn);
|
||||
List<Integer> types = getTokenTypes(inputString, lexInterp);
|
||||
List<Integer> types = getTokenTypesViaATN(inputString, lexInterp);
|
||||
System.out.println(types);
|
||||
|
||||
semanticProcess(lg);
|
||||
|
@ -552,7 +552,7 @@ public class TestATNParserPrediction extends BaseTest {
|
|||
// System.out.println(dot.getDOT(atn.ruleToStartState.get(g.getRule("e"))));
|
||||
|
||||
ParserATNSimulator interp = new ParserATNSimulator(atn);
|
||||
List<Integer> types = getTokenTypes(inputString, lexInterp);
|
||||
List<Integer> types = getTokenTypesViaATN(inputString, lexInterp);
|
||||
System.out.println(types);
|
||||
TokenStream input = new IntTokenStream(types);
|
||||
try {
|
||||
|
@ -585,7 +585,7 @@ public class TestATNParserPrediction extends BaseTest {
|
|||
ParserATNSimulator interp = new ParserATNSimulator(atn);
|
||||
for (int i=0; i<inputString.length; i++) {
|
||||
// Check DFA
|
||||
List<Integer> types = getTokenTypes(inputString[i], lexInterp);
|
||||
List<Integer> types = getTokenTypesViaATN(inputString[i], lexInterp);
|
||||
System.out.println(types);
|
||||
TokenStream input = new IntTokenStream(types);
|
||||
try {
|
||||
|
|
Loading…
Reference in New Issue