Simplify LexerATNSimulator to reduce duplicate code (duplicate instruction executions and duplicated code blocks)

This commit is contained in:
Sam Harwell 2012-10-30 09:24:03 -05:00
parent bad2751a2c
commit efa9ea7811
3 changed files with 22 additions and 166 deletions

View File

@ -76,14 +76,12 @@ public class LexerATNSimulator extends ATNSimulator {
protected int line = 0;
protected int charPos = -1;
protected DFAState dfaState;
protected LexerATNConfig config;
protected void reset() {
index = -1;
line = 0;
charPos = -1;
dfaState = null;
config = null;
}
}
@ -111,7 +109,6 @@ public class LexerATNSimulator extends ATNSimulator {
@NotNull
protected final SimState prevAccept = new SimState();
public static int ATN_failover = 0;
public static int match_calls = 0;
public LexerATNSimulator(@NotNull ATN atn, @NotNull DFA[] decisionToDFA,
@ -165,12 +162,14 @@ public class LexerATNSimulator extends ATNSimulator {
int mark = input.mark();
traceBeginMatch(input, mode);
try {
this.startIndex = input.index();
this.prevAccept.reset();
DFA dfa = decisionToDFA[mode];
if ( dfa.s0==null ) {
return matchATN(input);
}
else {
return execDFA(input, dfa.s0);
return execATN(input, dfa.s0);
}
}
finally {
@ -189,10 +188,8 @@ public class LexerATNSimulator extends ATNSimulator {
}
// only called from test code from outside
public int matchATN(@NotNull CharStream input) {
protected int matchATN(@NotNull CharStream input) {
traceMatchATN();
startIndex = input.index();
this.prevAccept.reset();
ATNState startState = atn.modeToStartState.get(mode);
if ( debug ) {
@ -210,7 +207,7 @@ public class LexerATNSimulator extends ATNSimulator {
decisionToDFA[mode].s0 = next;
}
int predict = execATN(input, s0_closure, next);
int predict = execATN(input, next);
if ( debug ) {
System.out.format("DFA after matchATN: %s\n", decisionToDFA[old_mode].toLexerString());
@ -220,65 +217,10 @@ public class LexerATNSimulator extends ATNSimulator {
return predict;
}
protected int execDFA(@NotNull CharStream input, @NotNull DFAState s0) {
traceMatchDFA();
if ( dfa_debug ) {
System.out.format("DFA[mode %d] exec LA(1)==%s\n", recog == null ? 0 : recog._mode, getTokenName(input.LA(1)));
}
//System.out.println("DFA start of execDFA: "+dfa[mode].toLexerString());
startIndex = input.index();
this.prevAccept.reset();
DFAState s = s0;
traceLookahead1();
int t = input.LA(1);
loop:
while ( true ) {
if ( dfa_debug ) {
System.out.format("state %d LA(1)==%s\n", s.stateNumber, getTokenName(t));
}
DFAState target = null;
if (s.edges != null && t >= MIN_DFA_EDGE && t <= MAX_DFA_EDGE) {
target = s.edges[t - MIN_DFA_EDGE];
}
// if no edge, pop over to ATN interpreter, update DFA and return
if (target == null) {
ATN_failover++;
return failOverToATN(input, s);
}
if ( target == ERROR ) break;
s = target;
if ( s.isAcceptState ) {
if ( dfa_debug ) {
System.out.format("accept; predict %d in state %d\n", s.prediction, s.stateNumber);
}
captureSimState(prevAccept, input, s);
// keep going unless we're at EOF; check if something else could match
// EOF never in DFA
if ( t==IntStream.EOF ) break;
}
consume(input);
traceLookahead1();
t = input.LA(1);
}
ATNConfigSet reach = prevAccept.dfaState != null ? prevAccept.dfaState.configs : null;
return failOrAccept(prevAccept, input, reach, t);
}
protected int execATN(@NotNull CharStream input, @NotNull ATNConfigSet s0, @NotNull DFAState ds0) {
//System.out.println("enter exec index "+input.index()+" from "+s0);
@NotNull
ATNConfigSet closure = s0;
protected int execATN(@NotNull CharStream input, @NotNull DFAState ds0) {
//System.out.println("enter exec index "+input.index()+" from "+ds0.configs);
if ( debug ) {
System.out.format("start state closure=%s\n", closure);
System.out.format("start state closure=%s\n", ds0.configs);
}
traceLookahead1();
@ -288,7 +230,7 @@ public class LexerATNSimulator extends ATNSimulator {
while ( true ) { // while more work
if ( debug ) {
System.out.format("execATN loop starting closure: %s\n", closure);
System.out.format("execATN loop starting closure: %s\n", s.configs);
}
// As we move src->trg, src->trg, we keep track of the previous trg to
@ -308,25 +250,22 @@ public class LexerATNSimulator extends ATNSimulator {
// This optimization makes a lot of sense for loops within DFA.
// A character will take us back to an existing DFA state
// that already has lots of edges out of it. e.g., .* in comments.
ATNConfigSet closure = s.configs;
DFAState target = null;
ATNConfigSet reach = null;
if ( s.edges != null && t >= MIN_DFA_EDGE && t <= MAX_DFA_EDGE ) {
closure = s.configs;
target = s.edges[t - MIN_DFA_EDGE];
if (target == ERROR) {
break;
}
else if (target != null) {
if ( debug ) {
System.out.println("reuse state "+s.stateNumber+
" edge to "+target.stateNumber);
}
reach = target.configs;
if (debug && target != null) {
System.out.println("reuse state "+s.stateNumber+
" edge to "+target.stateNumber);
}
}
if (target == null) {
reach = new OrderedATNConfigSet();
ATNConfigSet reach = new OrderedATNConfigSet();
// if we don't find an existing DFA state
// Fill reach starting from closure, following t transitions
@ -344,13 +283,11 @@ public class LexerATNSimulator extends ATNSimulator {
break; // stop when we can't match any more char
}
// Did we hit a stop state during reach op?
reach = processAcceptConfigs(input, reach);
// Add an edge from s to target DFA found/created for reach
target = addDFAEdge(s, t, reach);
}
else if (target.isAcceptState) {
if (target.isAcceptState) {
traceAcceptState(target.prediction);
captureSimState(prevAccept, input, target);
}
@ -358,12 +295,10 @@ public class LexerATNSimulator extends ATNSimulator {
consume(input);
traceLookahead1();
t = input.LA(1);
closure = reach;
s = target; // flip; current DFA target becomes new src/from state
}
return failOrAccept(prevAccept, input, closure, t);
return failOrAccept(prevAccept, input, s.configs, t);
}
protected int failOrAccept(SimState prevAccept, CharStream input,
@ -377,12 +312,6 @@ public class LexerATNSimulator extends ATNSimulator {
tracePredict(prevAccept.dfaState.prediction);
return prevAccept.dfaState.prediction;
}
else if (prevAccept.config != null) {
int ruleIndex = prevAccept.config.state.ruleIndex;
accept(input, ruleIndex, prevAccept.config.lexerActionIndex,
prevAccept.index, prevAccept.line, prevAccept.charPos);
return atn.ruleToTokenType[ruleIndex];
}
else {
// if no accept and EOF is first char, return EOF
if ( t==IntStream.EOF && input.index()==startIndex ) {
@ -427,38 +356,6 @@ public class LexerATNSimulator extends ATNSimulator {
}
}
@NotNull
protected ATNConfigSet processAcceptConfigs(@NotNull CharStream input, @NotNull ATNConfigSet reach) {
if ( debug ) {
System.out.format("processAcceptConfigs: reach=%s, prevAccept=%s, prevIndex=%d\n",
reach, prevAccept.config, prevAccept.index);
}
LexerATNConfig acceptConfig = null;
for (ATNConfig config : reach) {
if (config.state instanceof RuleStopState) {
acceptConfig = (LexerATNConfig)config;
break;
}
}
// mark the new preferred accept state
if (acceptConfig != null && input.index() > prevAccept.index) {
if ( debug ) {
if ( prevAccept.index>=0 ) {
System.out.println("processAcceptConfigs: found longer token");
}
}
// condition > not >= will favor prev accept at same index.
// This way, "int" is keyword not ID if listed first.
traceAcceptState(acceptConfig.alt);
captureSimState(prevAccept, input, acceptConfig);
}
return reach;
}
protected void accept(@NotNull CharStream input, int ruleIndex, int actionIndex,
int index, int line, int charPos)
{
@ -635,28 +532,6 @@ public class LexerATNSimulator extends ATNSimulator {
return c;
}
protected int failOverToATN(@NotNull CharStream input, @NotNull DFAState s) {
traceFailOverToATN();
if ( dfa_debug ) {
System.out.format("no edge for %s\n", getTokenName(input.LA(1)));
System.out.format("ATN exec upon %s at DFA state %d = %s\n",
input.getText(Interval.of(startIndex, input.index())), s.stateNumber, s.configs);
}
int ttype = execATN(input, s.configs, s);
if ( dfa_debug ) {
System.out.format("back from DFA update, ttype=%d, dfa[mode %d]=\n%s\n",
ttype, mode, decisionToDFA[mode].toLexerString());
}
// action already executed by ATN
// we've updated DFA, exec'd action, and have our deepest answer
tracePredict(ttype);
return ttype;
}
protected void captureSimState(@NotNull SimState settings,
@NotNull CharStream input,
@NotNull DFAState dfaState)
@ -664,21 +539,9 @@ public class LexerATNSimulator extends ATNSimulator {
settings.index = input.index();
settings.line = line;
settings.charPos = charPositionInLine;
settings.config = null;
settings.dfaState = dfaState;
}
protected void captureSimState(@NotNull SimState settings,
@NotNull CharStream input,
@NotNull LexerATNConfig config)
{
settings.index = input.index();
settings.line = line;
settings.charPos = charPositionInLine;
settings.config = config;
settings.dfaState = null;
}
@NotNull
protected DFAState addDFAEdge(@NotNull DFAState from,
int t,

View File

@ -222,7 +222,7 @@ public abstract class BaseTest {
IntegerList tokenTypes = new IntegerList();
int ttype;
do {
ttype = lexerATN.matchATN(in);
ttype = lexerATN.match(in, Lexer.DEFAULT_MODE);
tokenTypes.add(ttype);
} while ( ttype!= Token.EOF );
return tokenTypes;
@ -230,8 +230,7 @@ public abstract class BaseTest {
public List<String> getTokenTypes(LexerGrammar lg,
ATN atn,
CharStream input,
boolean adaptive)
CharStream input)
{
LexerATNSimulator interp = new LexerATNSimulator(atn,new DFA[1],null);
List<String> tokenTypes = new ArrayList<String>();
@ -243,8 +242,7 @@ public abstract class BaseTest {
break;
}
int t = input.LA(1);
if ( adaptive ) ttype = interp.match(input, Lexer.DEFAULT_MODE);
else ttype = interp.matchATN(input);
ttype = interp.match(input, Lexer.DEFAULT_MODE);
if ( ttype == Token.EOF ) {
tokenTypes.add("EOF");
}

View File

@ -313,16 +313,11 @@ public class TestATNLexerInterpreter extends BaseTest {
DOTGenerator dot = new DOTGenerator(lg);
System.out.println(dot.getDOT(startState, true));
List<String> tokenTypes = getTokenTypes(lg, atn, input, false);
List<String> tokenTypes = getTokenTypes(lg, atn, input);
String result = Utils.join(tokenTypes.iterator(), ", ");
System.out.println(tokenTypes);
assertEquals(expecting, result);
// try now adaptive DFA
input.seek(0);
List<String> tokenTypes2 = getTokenTypes(lg, atn, input, true);
assertEquals("interp vs adaptive types differ", tokenTypes, tokenTypes2);
}
}