full LL is always correct now, if slower. Stops prediction when resolvesToJustOneViableAlt(). SLL always uses heuristic

This commit is contained in:
Terence Parr 2012-10-14 13:25:34 -07:00
parent e7ece0e90a
commit 01bbce6952
5 changed files with 82 additions and 212 deletions

View File

@ -44,7 +44,6 @@ import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.misc.IntervalSet; import org.antlr.v4.runtime.misc.IntervalSet;
import org.antlr.v4.runtime.misc.NotNull; import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable; import org.antlr.v4.runtime.misc.Nullable;
import org.stringtemplate.v4.misc.MultiMap;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@ -268,9 +267,7 @@ public class ParserATNSimulator extends ATNSimulator {
public static int retry_with_context_predicts_same_as_alt = 0; public static int retry_with_context_predicts_same_as_alt = 0;
public static int retry_with_context_from_dfa = 0; public static int retry_with_context_from_dfa = 0;
/** A Map that uses just the state and the stack context as the key. /** A Map that uses just the state and the stack context as the key. */
* Used by needMoreLookaheadLL.
*/
class AltAndContextMap extends FlexibleHashMap<ATNConfig,BitSet> { class AltAndContextMap extends FlexibleHashMap<ATNConfig,BitSet> {
/** Code is function of (s, _, ctx, _) */ /** Code is function of (s, _, ctx, _) */
@Override @Override
@ -297,18 +294,7 @@ public class ParserATNSimulator extends ATNSimulator {
@NotNull @NotNull
public final DFA[] decisionToDFA; public final DFA[] decisionToDFA;
/** Do only local context prediction (SLL(k) style). */ protected PredictionMode mode = PredictionMode.LL;
protected boolean SLL = false;
/** Tell the full LL prediction algorithm to pursue lookahead until
* it has uniquely predicted alternative without conflict or it's
* certain that it's found and ambiguous input sequence. For speed
* reasons, we terminate the prediction process early when this
* variable is false. When true, the prediction process will
* continue looking for the exact ambiguous sequence even if
* it has already figured out which alternative to predict.
*/
protected boolean exactAmbig = false;
/** Each prediction operation uses a cache for merge of prediction contexts. /** Each prediction operation uses a cache for merge of prediction contexts.
* Don't keep around as it wastes huge amounts of memory. DoubleKeyMap * Don't keep around as it wastes huge amounts of memory. DoubleKeyMap
@ -458,7 +444,7 @@ public class ParserATNSimulator extends ATNSimulator {
loop: loop:
while ( true ) { while ( true ) {
if ( dfa_debug ) System.out.println("DFA state "+s.stateNumber+" LA(1)=="+getLookaheadName(input)); if ( dfa_debug ) System.out.println("DFA state "+s.stateNumber+" LA(1)=="+getLookaheadName(input));
if ( s.requiresFullContext && !SLL ) { if ( s.requiresFullContext && mode != PredictionMode.SLL ) {
if ( dfa_debug ) System.out.println("ctx sensitive state "+outerContext+" in "+s); if ( dfa_debug ) System.out.println("ctx sensitive state "+outerContext+" in "+s);
boolean fullCtx = true; boolean fullCtx = true;
ATNConfigSet s0_closure = ATNConfigSet s0_closure =
@ -612,7 +598,6 @@ public class ParserATNSimulator extends ATNSimulator {
ATNConfigSet previous = s0.configs; ATNConfigSet previous = s0.configs;
DFAState previousD = s0; DFAState previousD = s0;
ATNConfigSet fullCtxSet;
if ( debug ) System.out.println("s0 = "+s0); if ( debug ) System.out.println("s0 = "+s0);
@ -685,30 +670,29 @@ public class ParserATNSimulator extends ATNSimulator {
} }
else { else {
// MORE THAN ONE VIABLE ALTERNATIVE // MORE THAN ONE VIABLE ALTERNATIVE
if ( hasConflictingAltSet(altSubSets) && !hasStateAssociatedWithOneAlt(reach) ) { if ( hasSLLConflictTerminatingPrediction(reach) ) {
D.configs.conflictingAlts = getConflictingAlts(reach); D.configs.conflictingAlts = getConflictingAlts(reach);
if ( outerContext == ParserRuleContext.EMPTY || // in grammar start rule if ( mode == PredictionMode.SLL ) {
!D.configs.dipsIntoOuterContext || // didn't fall out of rule // stop w/o failover for sure
SLL ) // forcing SLL only if ( outerContext == ParserRuleContext.EMPTY || // in grammar start rule
{ !D.configs.dipsIntoOuterContext ) // didn't fall out of rule
// SPECIAL CASE WHERE SLL KNOWS CONFLICT IS AMBIGUITY {
if ( !D.configs.hasSemanticContext ) { // SPECIAL CASE WHERE SLL KNOWS CONFLICT IS AMBIGUITY
reportAmbiguity(dfa, D, startIndex, input.index(), if ( !D.configs.hasSemanticContext ) {
D.configs.conflictingAlts, D.configs); reportAmbiguity(dfa, D, startIndex, input.index(),
D.configs.conflictingAlts, D.configs);
}
} }
// always stop at D
D.isAcceptState = true; D.isAcceptState = true;
D.prediction = D.configs.conflictingAlts.nextSetBit(0); D.prediction = D.configs.conflictingAlts.nextSetBit(0);
if ( debug ) System.out.println("RESOLVED TO "+D.prediction+" for "+D); if ( debug ) System.out.println("SLL RESOLVED TO "+D.prediction+" for "+D);
predictedAlt = D.prediction; predictedAlt = D.prediction;
// Falls through to check predicates below // Falls through to check predicates below
} }
else { else {
// SLL CONFLICT; RETRY WITH FULL LL CONTEXT // RETRY WITH FULL LL CONTEXT
// (it's possible SLL with preds could resolve to single alt
// which would mean we could avoid full LL, but not worth
// code complexity.)
if ( debug ) System.out.println("RETRY with outerContext="+outerContext); if ( debug ) System.out.println("RETRY with outerContext="+outerContext);
// don't look up context in cache now since we're just creating state D
ATNConfigSet s0_closure = ATNConfigSet s0_closure =
computeStartState(dfa.atnStartState, computeStartState(dfa.atnStartState,
outerContext, outerContext,
@ -831,15 +815,12 @@ public class ParserATNSimulator extends ATNSimulator {
// System.out.println("altSubSets: "+altSubSets); // System.out.println("altSubSets: "+altSubSets);
reach.uniqueAlt = getUniqueAlt(altSubSets); reach.uniqueAlt = getUniqueAlt(altSubSets);
if ( reach.uniqueAlt!=ATN.INVALID_ALT_NUMBER ) break; if ( reach.uniqueAlt!=ATN.INVALID_ALT_NUMBER ) break;
if ( exactAmbig ) { if ( mode == PredictionMode.LL_EXACT_AMBIG ) {
if ( allSubsetsConflict(altSubSets) && allSubsetsEqual(altSubSets) ) { if ( allSubsetsConflict(altSubSets) && allSubsetsEqual(altSubSets) ) {
break; break;
} }
} }
else if ( (hasConflictingAltSet(altSubSets) && else if ( resolvesToJustOneViableAlt(altSubSets) ) {
!hasStateAssociatedWithOneAlt(reach)) ||
resolvesToJustOneViableAlt(altSubSets) )
{
break; break;
} }
previous = reach; previous = reach;
@ -882,7 +863,7 @@ public class ParserATNSimulator extends ATNSimulator {
// reportAmbiguity(dfa, D, startIndex, input.index(), getConflictingAlts(reach), reach); // reportAmbiguity(dfa, D, startIndex, input.index(), getConflictingAlts(reach), reach);
// } // }
if ( exactAmbig ) { if ( mode == PredictionMode.LL_EXACT_AMBIG ) {
reportAmbiguity(dfa, D, startIndex, input.index(), getConflictingAlts(reach), reach); reportAmbiguity(dfa, D, startIndex, input.index(), getConflictingAlts(reach), reach);
} }
@ -1440,9 +1421,9 @@ public class ParserATNSimulator extends ATNSimulator {
return false; # all sets conflict with len(viable_alts)>1, stop return false; # all sets conflict with len(viable_alts)>1, stop
*/ */
public boolean needMoreLookaheadSLL(@NotNull ATNConfigSet configs) { public boolean hasSLLConflictTerminatingPrediction(@NotNull ATNConfigSet configs) {
// pure SLL mode parsing // pure SLL mode parsing
if ( SLL ) { if ( mode == PredictionMode.SLL ) {
if ( configs.hasSemanticContext ) { if ( configs.hasSemanticContext ) {
// dup configs, tossing out semantic predicates // dup configs, tossing out semantic predicates
ATNConfigSet dup = new ATNConfigSet(); ATNConfigSet dup = new ATNConfigSet();
@ -1452,15 +1433,15 @@ public class ParserATNSimulator extends ATNSimulator {
} }
configs = dup; configs = dup;
} }
// do usual full LL termination test // now we have combined contexts for configs with dissimilar preds
return needMoreLookaheadLL(configs);
} }
// combined SLL+LL mode parsing
// System.out.println("SLL configs: "+configs); // pure SLL or combined SLL+LL mode parsing
// map (s,_,x,_) -> altset for all configs
Collection<BitSet> altsets = getConflictingAltSubsets(configs); Collection<BitSet> altsets = getConflictingAltSubsets(configs);
// System.out.println("SLL altsets: "+altsets); boolean heuristic =
return hasNonConflictingAltSet(altsets); hasConflictingAltSet(altsets) && !hasStateAssociatedWithOneAlt(configs);
return heuristic;
} }
public boolean allSubsetsConflict(Collection<BitSet> altsets) { public boolean allSubsetsConflict(Collection<BitSet> altsets) {
@ -1637,20 +1618,16 @@ public class ParserATNSimulator extends ATNSimulator {
return false; # len(viable_alts)==1, stop return false; # len(viable_alts)==1, stop
*/ */
public boolean needMoreLookaheadLL(@NotNull ATNConfigSet configs) { public boolean needMoreLookaheadLL(@NotNull ATNConfigSet configs) {
// System.out.println("configs: "+configs);
// map (s,_,x,_) -> altset for all configs // map (s,_,x,_) -> altset for all configs
Collection<BitSet> altsets = getConflictingAltSubsets(configs); Collection<BitSet> altsets = getConflictingAltSubsets(configs);
// System.out.println("altsets: "+altsets);
BitSet viableAlts = new BitSet(); BitSet viableAlts = new BitSet();
for (BitSet alts : altsets) { for (BitSet alts : altsets) {
int minAlt = alts.nextSetBit(0); int minAlt = alts.nextSetBit(0);
viableAlts.set(minAlt); viableAlts.set(minAlt);
if ( viableAlts.cardinality()>1 ) { // more than 1 viable alt if ( viableAlts.cardinality()>1 ) { // more than 1 viable alt
// System.out.println("go; viableAlts="+viableAlts);
return true; // try using more lookahead return true; // try using more lookahead
} }
} }
// System.out.println("stop");
return false; // len(viable_alts)==1, stop return false; // len(viable_alts)==1, stop
} }
@ -1712,33 +1689,29 @@ public class ParserATNSimulator extends ATNSimulator {
return false; return false;
} }
int n = 0;
boolean[] viableAlts = new boolean[100];
public boolean hasMoreThanOneViableAlt2(Collection<BitSet> altsets) {
n = 0;
Arrays.fill(viableAlts, false);
for (BitSet alts : altsets) {
int minAlt = alts.nextSetBit(0);
if ( !viableAlts[minAlt] ) n++;
viableAlts[minAlt] = true;
if ( n > 1 ) { // more than 1 viable alt
return true;
}
}
return false;
}
public BitSet getConflictingAlts(ATNConfigSet configs) { public BitSet getConflictingAlts(ATNConfigSet configs) {
Collection<BitSet> altsets = getConflictingAltSubsets(configs); Collection<BitSet> altsets = getConflictingAltSubsets(configs);
return getAlts(altsets); return getAlts(altsets);
} }
/** /**
* From grammar:
s' : s s ;
s : x? | x ;
x : 'a' ;
config list: (4,1), (11,1,4), (7,1), (3,1,1), (4,1,1), (8,1,1), (7,1,1),
(8,2), (11,2,8), (11,1,[8 1])
state to config list:
3 -> (3,1,1)
4 -> (4,1), (4,1,1)
7 -> (7,1), (7,1,1)
8 -> (8,1,1), (8,2)
11 -> (11,1,4), (11,2,8), (11,1,8 1)
Walk and find state config lists with > 1 alt. If none, no conflict.
return null. Here, states 11 and 8 have lists with both alts 1 and 2.
Must check these config lists for conflicting configs.
Sam pointed out a problem with the previous definition, v3, of Sam pointed out a problem with the previous definition, v3, of
ambiguous states. If we have another state associated with conflicting ambiguous states. If we have another state associated with conflicting
alternatives, we should keep going. For example, the following grammar alternatives, we should keep going. For example, the following grammar
@ -1773,118 +1746,8 @@ public class ParserATNSimulator extends ATNSimulator {
looking for input reasonably, I don't declare the state done. We looking for input reasonably, I don't declare the state done. We
ignore a set of conflicting alts when we have an alternative ignore a set of conflicting alts when we have an alternative
that we still need to pursue. that we still need to pursue.
So, in summary, as long as there is a single configuration that is
not conflicting with any other configuration for that state, then
there is more input we can use to keep going. E.g.,
s->[(s,1,[x]), (s,2,[x]), (s,2,[y])]
s->[(s,1,_)]
s->[(s,1,[y]), (s,2,[x])]
Regardless of what goes on for the other states, this is
sufficient to force us to add this new state to the ATN-to-DFA work list.
TODO: split into "has nonconflict config--add to work list" and getambigalts functions
TODO: now we know contexts are merged, can we optimize? Use big int -> config array?
*/ */
@Nullable
public IntervalSet getConflictingAlts_old(@NotNull ATNConfigSet configs) {
if ( debug ) System.out.println("### check ambiguous "+configs);
// System.out.println("getConflictingAlts; set size="+configs.size());
// First get a list of configurations for each state.
// Most of the time, each state will have one associated configuration.
MultiMap<Integer, ATNConfig> stateToConfigListMap = new MultiMap<Integer, ATNConfig>();
Map<Integer, IntervalSet> stateToAltListMap = new HashMap<Integer, IntervalSet>();
for (ATNConfig c : configs) {
stateToConfigListMap.map(c.state.stateNumber, c);
IntervalSet alts = stateToAltListMap.get(c.state.stateNumber);
if ( alts==null ) {
alts = new IntervalSet();
stateToAltListMap.put(c.state.stateNumber, alts);
}
alts.add(c.alt);
}
// potential conflicts are states, s, with > 1 configurations and diff alts
// find all alts with potential conflicts
int numPotentialConflicts = 0;
IntervalSet altsToIgnore = new IntervalSet();
for (int state : stateToConfigListMap.keySet()) { // for each state
IntervalSet alts = stateToAltListMap.get(state);
if ( alts.size()==1 ) {
if ( !atn.states.get(state).onlyHasEpsilonTransitions() ) {
List<ATNConfig> configsPerState = stateToConfigListMap.get(state);
ATNConfig anyConfig = configsPerState.get(0);
altsToIgnore.add(anyConfig.alt);
if ( debug ) System.out.println("### one alt and all non-ep: "+configsPerState);
}
// remove state's configurations from further checking; no issues with them.
// (can't remove as it's concurrent modification; set to null)
// return null;
stateToConfigListMap.put(state, null);
}
else {
numPotentialConflicts++;
}
}
if ( debug ) System.out.println("### altsToIgnore: "+altsToIgnore);
if ( debug ) System.out.println("### stateToConfigListMap="+stateToConfigListMap);
if ( numPotentialConflicts==0 ) {
return null;
}
// compare each pair of configs in sets for states with > 1 alt in config list, looking for
// (s, i, ctx) and (s, j, ctx') where ctx==ctx' or one is suffix of the other.
IntervalSet ambigAlts = new IntervalSet();
for (int state : stateToConfigListMap.keySet()) {
List<ATNConfig> configsPerState = stateToConfigListMap.get(state);
if (configsPerState == null) continue;
IntervalSet alts = stateToAltListMap.get(state);
// Sam's correction to ambig def is here:
if ( !altsToIgnore.isNil() && alts.and(altsToIgnore).size()<=1 ) {
// System.err.println("ignoring alt since "+alts+"&"+altsToIgnore+
// ".size is "+alts.and(altsToIgnore).size());
continue;
}
int size = configsPerState.size();
for (int i = 0; i < size; i++) {
ATNConfig c = configsPerState.get(i);
for (int j = i+1; j < size; j++) {
ATNConfig d = configsPerState.get(j);
if ( c.alt != d.alt ) {
boolean conflicting = c.context.equals(d.context);
if ( conflicting ) {
if ( debug ) {
System.out.println("we reach state "+c.state.stateNumber+
" in rule "+
(parser !=null ? getRuleName(c.state.ruleIndex) :"n/a")+
" alts "+c.alt+","+d.alt+" from ctx "+c.context.toString(parser)
+" and "+ d.context.toString(parser));
}
ambigAlts.add(c.alt);
ambigAlts.add(d.alt);
}
}
}
}
}
if ( debug ) System.out.println("### ambigAlts="+ambigAlts);
if ( ambigAlts.isNil() ) return null;
// are any configs not represented in ambig alt sets
// for (ATNConfig config : configs) {
// if (!ambigAlts.contains(config.alt)) {
// return null;
// }
// }
return ambigAlts;
}
protected BitSet getConflictingAltsOrUniqueAlt(ATNConfigSet configs) { protected BitSet getConflictingAltsOrUniqueAlt(ATNConfigSet configs) {
BitSet conflictingAlts; BitSet conflictingAlts;
if ( configs.uniqueAlt!= ATN.INVALID_ALT_NUMBER ) { if ( configs.uniqueAlt!= ATN.INVALID_ALT_NUMBER ) {
@ -1897,27 +1760,6 @@ public class ParserATNSimulator extends ATNSimulator {
return conflictingAlts; return conflictingAlts;
} }
// protected int resolveToMinAlt(@NotNull DFAState D, IntervalSet conflictingAlts) {
// // kill dead alts so we don't chase them ever
//// killAlts(conflictingAlts, D.configset);
// D.prediction = conflictingAlts.getMinElement();
// if ( debug ) System.out.println("RESOLVED TO "+D.prediction+" for "+D);
// return D.prediction;
// }
protected int resolveNongreedyToExitBranch(@NotNull ATNConfigSet reach,
@NotNull IntervalSet conflictingAlts)
{
// exit branch is alt 2 always; alt 1 is entry or loopback branch
// since we're predicting, create DFA accept state for exit alt
int exitAlt = 2;
conflictingAlts.remove(exitAlt);
// kill dead alts so we don't chase them ever
// killAlts(conflictingAlts, reach);
if ( debug ) System.out.println("RESOLVED TO "+reach);
return exitAlt;
}
@NotNull @NotNull
public String getTokenName(int t) { public String getTokenName(int t) {
if ( t==Token.EOF ) return "EOF"; if ( t==Token.EOF ) return "EOF";
@ -2078,11 +1920,9 @@ public class ParserATNSimulator extends ATNSimulator {
ambigAlts, configs); ambigAlts, configs);
} }
public void setSLL(boolean SLL) { public void setPredictionMode(PredictionMode mode) {
this.SLL = SLL; this.mode = mode;
} }
public void setExactAmbig(boolean exactAmbig) { public PredictionMode getPredictionMode() { return mode; }
this.exactAmbig = exactAmbig;
}
} }

View File

@ -0,0 +1,27 @@
package org.antlr.v4.runtime.atn;
public enum PredictionMode {
/** Do only local context prediction (SLL(k) style) and using
* heuristic which almost always works but is much faster
* than precise answer.
*/
SLL(1),
/** Full LL that always gets right answer */
LL(2),
/** Tell the full LL prediction algorithm to pursue lookahead until
* it has uniquely predicted alternative without conflict or it's
* certain that it's found and ambiguous input sequence. For speed
* reasons, we terminate the prediction process early when this
* variable is false. When true, the prediction process will
* continue looking for the exact ambiguous sequence even if
* it has already figured out which alternative to predict.
*/
LL_EXACT_AMBIG(4);
int v;
private PredictionMode(int v) {
this.v = v;
}
}

View File

@ -38,6 +38,7 @@ import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.ParserRuleContext; import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.Token; import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.TokenStream; import org.antlr.v4.runtime.TokenStream;
import org.antlr.v4.runtime.atn.PredictionMode;
import javax.print.PrintException; import javax.print.PrintException;
import java.io.FileInputStream; import java.io.FileInputStream;
@ -224,7 +225,7 @@ public class TestRig {
} }
if ( SLL ) { if ( SLL ) {
parser.getInterpreter().setSLL(true); parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
} }
parser.setTokenStream(tokens); parser.setTokenStream(tokens);

View File

@ -6,6 +6,7 @@ import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.Token; import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.atn.LexerATNSimulator; import org.antlr.v4.runtime.atn.LexerATNSimulator;
import org.antlr.v4.runtime.atn.ParserATNSimulator; import org.antlr.v4.runtime.atn.ParserATNSimulator;
import org.antlr.v4.runtime.atn.PredictionMode;
import java.io.File; import java.io.File;
@ -127,7 +128,7 @@ class TestJava {
parser.setTokenStream(tokens); parser.setTokenStream(tokens);
if ( diag ) parser.addErrorListener(new DiagnosticErrorListener()); if ( diag ) parser.addErrorListener(new DiagnosticErrorListener());
if ( SLL ) parser.getInterpreter().setSLL(true); if ( SLL ) parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
// start parsing at the compilationUnit rule // start parsing at the compilationUnit rule
ParserRuleContext<Token> tree = parser.compilationUnit(); ParserRuleContext<Token> tree = parser.compilationUnit();
if ( showTree ) tree.inspect(parser); if ( showTree ) tree.inspect(parser);

View File

@ -36,6 +36,7 @@ import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.Token; import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.atn.LexerATNSimulator; import org.antlr.v4.runtime.atn.LexerATNSimulator;
import org.antlr.v4.runtime.atn.ParserATNSimulator; import org.antlr.v4.runtime.atn.ParserATNSimulator;
import org.antlr.v4.runtime.atn.PredictionMode;
import java.io.File; import java.io.File;
import java.util.ArrayList; import java.util.ArrayList;
@ -260,7 +261,7 @@ class TestJavaLR {
JavaLRParser parser = new JavaLRParser(tokens); JavaLRParser parser = new JavaLRParser(tokens);
if ( diag ) parser.addErrorListener(new DiagnosticErrorListener()); if ( diag ) parser.addErrorListener(new DiagnosticErrorListener());
if ( bail ) parser.setErrorHandler(new BailErrorStrategy()); if ( bail ) parser.setErrorHandler(new BailErrorStrategy());
if ( SLL ) parser.getInterpreter().setSLL(true); if ( SLL ) parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
// start parsing at the compilationUnit rule // start parsing at the compilationUnit rule
ParserRuleContext<Token> t = parser.compilationUnit(); ParserRuleContext<Token> t = parser.compilationUnit();