From 01bbce6952dc54e38ddf3c986cb5190b082e0b94 Mon Sep 17 00:00:00 2001 From: Terence Parr Date: Sun, 14 Oct 2012 13:25:34 -0700 Subject: [PATCH] full LL is always correct now, if slower. Stops prediction when resolvesToJustOneViableAlt(). SLL always uses heuristic --- .../v4/runtime/atn/ParserATNSimulator.java | 258 ++++-------------- .../antlr/v4/runtime/atn/PredictionMode.java | 27 ++ .../org/antlr/v4/runtime/misc/TestRig.java | 3 +- tool/playground/TestJava.java | 3 +- tool/playground/TestJavaLR.java | 3 +- 5 files changed, 82 insertions(+), 212 deletions(-) create mode 100644 runtime/Java/src/org/antlr/v4/runtime/atn/PredictionMode.java diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ParserATNSimulator.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ParserATNSimulator.java index 498f0eda6..c5ee11b48 100755 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/ParserATNSimulator.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ParserATNSimulator.java @@ -44,7 +44,6 @@ import org.antlr.v4.runtime.misc.Interval; import org.antlr.v4.runtime.misc.IntervalSet; import org.antlr.v4.runtime.misc.NotNull; import org.antlr.v4.runtime.misc.Nullable; -import org.stringtemplate.v4.misc.MultiMap; import java.util.ArrayList; import java.util.Arrays; @@ -268,9 +267,7 @@ public class ParserATNSimulator extends ATNSimulator { public static int retry_with_context_predicts_same_as_alt = 0; public static int retry_with_context_from_dfa = 0; - /** A Map that uses just the state and the stack context as the key. - * Used by needMoreLookaheadLL. - */ + /** A Map that uses just the state and the stack context as the key. */ class AltAndContextMap extends FlexibleHashMap { /** Code is function of (s, _, ctx, _) */ @Override @@ -297,18 +294,7 @@ public class ParserATNSimulator extends ATNSimulator { @NotNull public final DFA[] decisionToDFA; - /** Do only local context prediction (SLL(k) style). */ - protected boolean SLL = false; - - /** Tell the full LL prediction algorithm to pursue lookahead until - * it has uniquely predicted alternative without conflict or it's - * certain that it's found and ambiguous input sequence. For speed - * reasons, we terminate the prediction process early when this - * variable is false. When true, the prediction process will - * continue looking for the exact ambiguous sequence even if - * it has already figured out which alternative to predict. - */ - protected boolean exactAmbig = false; + protected PredictionMode mode = PredictionMode.LL; /** Each prediction operation uses a cache for merge of prediction contexts. * Don't keep around as it wastes huge amounts of memory. DoubleKeyMap @@ -458,7 +444,7 @@ public class ParserATNSimulator extends ATNSimulator { loop: while ( true ) { if ( dfa_debug ) System.out.println("DFA state "+s.stateNumber+" LA(1)=="+getLookaheadName(input)); - if ( s.requiresFullContext && !SLL ) { + if ( s.requiresFullContext && mode != PredictionMode.SLL ) { if ( dfa_debug ) System.out.println("ctx sensitive state "+outerContext+" in "+s); boolean fullCtx = true; ATNConfigSet s0_closure = @@ -612,7 +598,6 @@ public class ParserATNSimulator extends ATNSimulator { ATNConfigSet previous = s0.configs; DFAState previousD = s0; - ATNConfigSet fullCtxSet; if ( debug ) System.out.println("s0 = "+s0); @@ -685,30 +670,29 @@ public class ParserATNSimulator extends ATNSimulator { } else { // MORE THAN ONE VIABLE ALTERNATIVE - if ( hasConflictingAltSet(altSubSets) && !hasStateAssociatedWithOneAlt(reach) ) { + if ( hasSLLConflictTerminatingPrediction(reach) ) { D.configs.conflictingAlts = getConflictingAlts(reach); - if ( outerContext == ParserRuleContext.EMPTY || // in grammar start rule - !D.configs.dipsIntoOuterContext || // didn't fall out of rule - SLL ) // forcing SLL only - { - // SPECIAL CASE WHERE SLL KNOWS CONFLICT IS AMBIGUITY - if ( !D.configs.hasSemanticContext ) { - reportAmbiguity(dfa, D, startIndex, input.index(), - D.configs.conflictingAlts, D.configs); + if ( mode == PredictionMode.SLL ) { + // stop w/o failover for sure + if ( outerContext == ParserRuleContext.EMPTY || // in grammar start rule + !D.configs.dipsIntoOuterContext ) // didn't fall out of rule + { + // SPECIAL CASE WHERE SLL KNOWS CONFLICT IS AMBIGUITY + if ( !D.configs.hasSemanticContext ) { + reportAmbiguity(dfa, D, startIndex, input.index(), + D.configs.conflictingAlts, D.configs); + } } + // always stop at D D.isAcceptState = true; D.prediction = D.configs.conflictingAlts.nextSetBit(0); - if ( debug ) System.out.println("RESOLVED TO "+D.prediction+" for "+D); + if ( debug ) System.out.println("SLL RESOLVED TO "+D.prediction+" for "+D); predictedAlt = D.prediction; // Falls through to check predicates below } else { - // SLL CONFLICT; RETRY WITH FULL LL CONTEXT - // (it's possible SLL with preds could resolve to single alt - // which would mean we could avoid full LL, but not worth - // code complexity.) + // RETRY WITH FULL LL CONTEXT if ( debug ) System.out.println("RETRY with outerContext="+outerContext); - // don't look up context in cache now since we're just creating state D ATNConfigSet s0_closure = computeStartState(dfa.atnStartState, outerContext, @@ -831,15 +815,12 @@ public class ParserATNSimulator extends ATNSimulator { // System.out.println("altSubSets: "+altSubSets); reach.uniqueAlt = getUniqueAlt(altSubSets); if ( reach.uniqueAlt!=ATN.INVALID_ALT_NUMBER ) break; - if ( exactAmbig ) { + if ( mode == PredictionMode.LL_EXACT_AMBIG ) { if ( allSubsetsConflict(altSubSets) && allSubsetsEqual(altSubSets) ) { break; } } - else if ( (hasConflictingAltSet(altSubSets) && - !hasStateAssociatedWithOneAlt(reach)) || - resolvesToJustOneViableAlt(altSubSets) ) - { + else if ( resolvesToJustOneViableAlt(altSubSets) ) { break; } previous = reach; @@ -882,7 +863,7 @@ public class ParserATNSimulator extends ATNSimulator { // reportAmbiguity(dfa, D, startIndex, input.index(), getConflictingAlts(reach), reach); // } - if ( exactAmbig ) { + if ( mode == PredictionMode.LL_EXACT_AMBIG ) { reportAmbiguity(dfa, D, startIndex, input.index(), getConflictingAlts(reach), reach); } @@ -1440,9 +1421,9 @@ public class ParserATNSimulator extends ATNSimulator { return false; # all sets conflict with len(viable_alts)>1, stop */ - public boolean needMoreLookaheadSLL(@NotNull ATNConfigSet configs) { + public boolean hasSLLConflictTerminatingPrediction(@NotNull ATNConfigSet configs) { // pure SLL mode parsing - if ( SLL ) { + if ( mode == PredictionMode.SLL ) { if ( configs.hasSemanticContext ) { // dup configs, tossing out semantic predicates ATNConfigSet dup = new ATNConfigSet(); @@ -1452,15 +1433,15 @@ public class ParserATNSimulator extends ATNSimulator { } configs = dup; } - // do usual full LL termination test - return needMoreLookaheadLL(configs); + // now we have combined contexts for configs with dissimilar preds } - // combined SLL+LL mode parsing -// System.out.println("SLL configs: "+configs); - // map (s,_,x,_) -> altset for all configs + + // pure SLL or combined SLL+LL mode parsing + Collection altsets = getConflictingAltSubsets(configs); -// System.out.println("SLL altsets: "+altsets); - return hasNonConflictingAltSet(altsets); + boolean heuristic = + hasConflictingAltSet(altsets) && !hasStateAssociatedWithOneAlt(configs); + return heuristic; } public boolean allSubsetsConflict(Collection altsets) { @@ -1637,20 +1618,16 @@ public class ParserATNSimulator extends ATNSimulator { return false; # len(viable_alts)==1, stop */ public boolean needMoreLookaheadLL(@NotNull ATNConfigSet configs) { -// System.out.println("configs: "+configs); // map (s,_,x,_) -> altset for all configs Collection altsets = getConflictingAltSubsets(configs); -// System.out.println("altsets: "+altsets); BitSet viableAlts = new BitSet(); for (BitSet alts : altsets) { int minAlt = alts.nextSetBit(0); viableAlts.set(minAlt); if ( viableAlts.cardinality()>1 ) { // more than 1 viable alt -// System.out.println("go; viableAlts="+viableAlts); return true; // try using more lookahead } } -// System.out.println("stop"); return false; // len(viable_alts)==1, stop } @@ -1712,33 +1689,29 @@ public class ParserATNSimulator extends ATNSimulator { return false; } + int n = 0; + boolean[] viableAlts = new boolean[100]; + + public boolean hasMoreThanOneViableAlt2(Collection altsets) { + n = 0; + Arrays.fill(viableAlts, false); + for (BitSet alts : altsets) { + int minAlt = alts.nextSetBit(0); + if ( !viableAlts[minAlt] ) n++; + viableAlts[minAlt] = true; + if ( n > 1 ) { // more than 1 viable alt + return true; + } + } + return false; + } + public BitSet getConflictingAlts(ATNConfigSet configs) { Collection altsets = getConflictingAltSubsets(configs); return getAlts(altsets); } /** - * From grammar: - - s' : s s ; - s : x? | x ; - x : 'a' ; - - config list: (4,1), (11,1,4), (7,1), (3,1,1), (4,1,1), (8,1,1), (7,1,1), - (8,2), (11,2,8), (11,1,[8 1]) - - state to config list: - - 3 -> (3,1,1) - 4 -> (4,1), (4,1,1) - 7 -> (7,1), (7,1,1) - 8 -> (8,1,1), (8,2) - 11 -> (11,1,4), (11,2,8), (11,1,8 1) - - Walk and find state config lists with > 1 alt. If none, no conflict. - return null. Here, states 11 and 8 have lists with both alts 1 and 2. - Must check these config lists for conflicting configs. - Sam pointed out a problem with the previous definition, v3, of ambiguous states. If we have another state associated with conflicting alternatives, we should keep going. For example, the following grammar @@ -1773,118 +1746,8 @@ public class ParserATNSimulator extends ATNSimulator { looking for input reasonably, I don't declare the state done. We ignore a set of conflicting alts when we have an alternative that we still need to pursue. - - So, in summary, as long as there is a single configuration that is - not conflicting with any other configuration for that state, then - there is more input we can use to keep going. E.g., - s->[(s,1,[x]), (s,2,[x]), (s,2,[y])] - s->[(s,1,_)] - s->[(s,1,[y]), (s,2,[x])] - Regardless of what goes on for the other states, this is - sufficient to force us to add this new state to the ATN-to-DFA work list. - - TODO: split into "has nonconflict config--add to work list" and getambigalts functions - - TODO: now we know contexts are merged, can we optimize? Use big int -> config array? */ - @Nullable - public IntervalSet getConflictingAlts_old(@NotNull ATNConfigSet configs) { - if ( debug ) System.out.println("### check ambiguous "+configs); -// System.out.println("getConflictingAlts; set size="+configs.size()); - // First get a list of configurations for each state. - // Most of the time, each state will have one associated configuration. - MultiMap stateToConfigListMap = new MultiMap(); - Map stateToAltListMap = new HashMap(); - - for (ATNConfig c : configs) { - stateToConfigListMap.map(c.state.stateNumber, c); - IntervalSet alts = stateToAltListMap.get(c.state.stateNumber); - if ( alts==null ) { - alts = new IntervalSet(); - stateToAltListMap.put(c.state.stateNumber, alts); - } - alts.add(c.alt); - } - // potential conflicts are states, s, with > 1 configurations and diff alts - // find all alts with potential conflicts - int numPotentialConflicts = 0; - IntervalSet altsToIgnore = new IntervalSet(); - for (int state : stateToConfigListMap.keySet()) { // for each state - IntervalSet alts = stateToAltListMap.get(state); - if ( alts.size()==1 ) { - if ( !atn.states.get(state).onlyHasEpsilonTransitions() ) { - List configsPerState = stateToConfigListMap.get(state); - ATNConfig anyConfig = configsPerState.get(0); - altsToIgnore.add(anyConfig.alt); - if ( debug ) System.out.println("### one alt and all non-ep: "+configsPerState); - } - // remove state's configurations from further checking; no issues with them. - // (can't remove as it's concurrent modification; set to null) -// return null; - stateToConfigListMap.put(state, null); - } - else { - numPotentialConflicts++; - } - } - - if ( debug ) System.out.println("### altsToIgnore: "+altsToIgnore); - if ( debug ) System.out.println("### stateToConfigListMap="+stateToConfigListMap); - - if ( numPotentialConflicts==0 ) { - return null; - } - - // compare each pair of configs in sets for states with > 1 alt in config list, looking for - // (s, i, ctx) and (s, j, ctx') where ctx==ctx' or one is suffix of the other. - IntervalSet ambigAlts = new IntervalSet(); - for (int state : stateToConfigListMap.keySet()) { - List configsPerState = stateToConfigListMap.get(state); - if (configsPerState == null) continue; - IntervalSet alts = stateToAltListMap.get(state); -// Sam's correction to ambig def is here: - if ( !altsToIgnore.isNil() && alts.and(altsToIgnore).size()<=1 ) { -// System.err.println("ignoring alt since "+alts+"&"+altsToIgnore+ -// ".size is "+alts.and(altsToIgnore).size()); - continue; - } - int size = configsPerState.size(); - for (int i = 0; i < size; i++) { - ATNConfig c = configsPerState.get(i); - for (int j = i+1; j < size; j++) { - ATNConfig d = configsPerState.get(j); - if ( c.alt != d.alt ) { - boolean conflicting = c.context.equals(d.context); - if ( conflicting ) { - if ( debug ) { - System.out.println("we reach state "+c.state.stateNumber+ - " in rule "+ - (parser !=null ? getRuleName(c.state.ruleIndex) :"n/a")+ - " alts "+c.alt+","+d.alt+" from ctx "+c.context.toString(parser) - +" and "+ d.context.toString(parser)); - } - ambigAlts.add(c.alt); - ambigAlts.add(d.alt); - } - } - } - } - } - - if ( debug ) System.out.println("### ambigAlts="+ambigAlts); - - if ( ambigAlts.isNil() ) return null; - - // are any configs not represented in ambig alt sets -// for (ATNConfig config : configs) { -// if (!ambigAlts.contains(config.alt)) { -// return null; -// } -// } - return ambigAlts; - } - protected BitSet getConflictingAltsOrUniqueAlt(ATNConfigSet configs) { BitSet conflictingAlts; if ( configs.uniqueAlt!= ATN.INVALID_ALT_NUMBER ) { @@ -1897,27 +1760,6 @@ public class ParserATNSimulator extends ATNSimulator { return conflictingAlts; } -// protected int resolveToMinAlt(@NotNull DFAState D, IntervalSet conflictingAlts) { -// // kill dead alts so we don't chase them ever -//// killAlts(conflictingAlts, D.configset); -// D.prediction = conflictingAlts.getMinElement(); -// if ( debug ) System.out.println("RESOLVED TO "+D.prediction+" for "+D); -// return D.prediction; -// } - - protected int resolveNongreedyToExitBranch(@NotNull ATNConfigSet reach, - @NotNull IntervalSet conflictingAlts) - { - // exit branch is alt 2 always; alt 1 is entry or loopback branch - // since we're predicting, create DFA accept state for exit alt - int exitAlt = 2; - conflictingAlts.remove(exitAlt); - // kill dead alts so we don't chase them ever -// killAlts(conflictingAlts, reach); - if ( debug ) System.out.println("RESOLVED TO "+reach); - return exitAlt; - } - @NotNull public String getTokenName(int t) { if ( t==Token.EOF ) return "EOF"; @@ -2078,11 +1920,9 @@ public class ParserATNSimulator extends ATNSimulator { ambigAlts, configs); } - public void setSLL(boolean SLL) { - this.SLL = SLL; + public void setPredictionMode(PredictionMode mode) { + this.mode = mode; } - public void setExactAmbig(boolean exactAmbig) { - this.exactAmbig = exactAmbig; - } + public PredictionMode getPredictionMode() { return mode; } } diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionMode.java b/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionMode.java new file mode 100644 index 000000000..4825ef91d --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionMode.java @@ -0,0 +1,27 @@ +package org.antlr.v4.runtime.atn; + +public enum PredictionMode { + /** Do only local context prediction (SLL(k) style) and using + * heuristic which almost always works but is much faster + * than precise answer. + */ + SLL(1), + + /** Full LL that always gets right answer */ + LL(2), + + /** Tell the full LL prediction algorithm to pursue lookahead until + * it has uniquely predicted alternative without conflict or it's + * certain that it's found and ambiguous input sequence. For speed + * reasons, we terminate the prediction process early when this + * variable is false. When true, the prediction process will + * continue looking for the exact ambiguous sequence even if + * it has already figured out which alternative to predict. + */ + LL_EXACT_AMBIG(4); + int v; + + private PredictionMode(int v) { + this.v = v; + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/misc/TestRig.java b/runtime/Java/src/org/antlr/v4/runtime/misc/TestRig.java index 6d0d68ee6..2d1b0fcfb 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/misc/TestRig.java +++ b/runtime/Java/src/org/antlr/v4/runtime/misc/TestRig.java @@ -38,6 +38,7 @@ import org.antlr.v4.runtime.Parser; import org.antlr.v4.runtime.ParserRuleContext; import org.antlr.v4.runtime.Token; import org.antlr.v4.runtime.TokenStream; +import org.antlr.v4.runtime.atn.PredictionMode; import javax.print.PrintException; import java.io.FileInputStream; @@ -224,7 +225,7 @@ public class TestRig { } if ( SLL ) { - parser.getInterpreter().setSLL(true); + parser.getInterpreter().setPredictionMode(PredictionMode.SLL); } parser.setTokenStream(tokens); diff --git a/tool/playground/TestJava.java b/tool/playground/TestJava.java index c7032a345..f2d39a118 100644 --- a/tool/playground/TestJava.java +++ b/tool/playground/TestJava.java @@ -6,6 +6,7 @@ import org.antlr.v4.runtime.ParserRuleContext; import org.antlr.v4.runtime.Token; import org.antlr.v4.runtime.atn.LexerATNSimulator; import org.antlr.v4.runtime.atn.ParserATNSimulator; +import org.antlr.v4.runtime.atn.PredictionMode; import java.io.File; @@ -127,7 +128,7 @@ class TestJava { parser.setTokenStream(tokens); if ( diag ) parser.addErrorListener(new DiagnosticErrorListener()); - if ( SLL ) parser.getInterpreter().setSLL(true); + if ( SLL ) parser.getInterpreter().setPredictionMode(PredictionMode.SLL); // start parsing at the compilationUnit rule ParserRuleContext tree = parser.compilationUnit(); if ( showTree ) tree.inspect(parser); diff --git a/tool/playground/TestJavaLR.java b/tool/playground/TestJavaLR.java index 0db4ff555..28defde61 100644 --- a/tool/playground/TestJavaLR.java +++ b/tool/playground/TestJavaLR.java @@ -36,6 +36,7 @@ import org.antlr.v4.runtime.ParserRuleContext; import org.antlr.v4.runtime.Token; import org.antlr.v4.runtime.atn.LexerATNSimulator; import org.antlr.v4.runtime.atn.ParserATNSimulator; +import org.antlr.v4.runtime.atn.PredictionMode; import java.io.File; import java.util.ArrayList; @@ -260,7 +261,7 @@ class TestJavaLR { JavaLRParser parser = new JavaLRParser(tokens); if ( diag ) parser.addErrorListener(new DiagnosticErrorListener()); if ( bail ) parser.setErrorHandler(new BailErrorStrategy()); - if ( SLL ) parser.getInterpreter().setSLL(true); + if ( SLL ) parser.getInterpreter().setPredictionMode(PredictionMode.SLL); // start parsing at the compilationUnit rule ParserRuleContext t = parser.compilationUnit();