From 4010c599ba0813d41f6fc9a4e513306d8cad2b84 Mon Sep 17 00:00:00 2001 From: Sam Harwell Date: Thu, 3 Jan 2013 02:46:24 -0600 Subject: [PATCH] Clarify the impact of configurations in rule stop states on the parser prediction termination algorithm --- .../v4/runtime/atn/ParserATNSimulator.java | 97 ++++++++++++++----- .../antlr/v4/runtime/atn/PredictionMode.java | 11 ++- 2 files changed, 81 insertions(+), 27 deletions(-) diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ParserATNSimulator.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ParserATNSimulator.java index d238b36c4..3a91b2f95 100755 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/ParserATNSimulator.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ParserATNSimulator.java @@ -909,25 +909,33 @@ public class ParserATNSimulator extends ATNSimulator { boolean fullCtx) { if ( debug ) System.out.println("in computeReachSet, starting closure: " + closure); - ATNConfigSet reach = new ATNConfigSet(fullCtx); - Set closureBusy = new HashSet(); ATNConfigSet intermediate = new ATNConfigSet(fullCtx); + + /* Configurations already in a rule stop state indicate reaching the end + * of the decision rule (local context) or end of the start rule (full + * context). Once reached, these configurations are never updated by a + * closure operation, so they are handled separately for the performance + * advantage of having a smaller intermediate set when calling closure. + * + * For full-context reach operations, separate handling is required to + * ensure that the alternative matching the longest overall sequence is + * chosen when multiple such configurations can match the input. + */ List skippedStopStates = null; + // First figure out where we can reach on input t for (ATNConfig c : closure) { if ( debug ) System.out.println("testing "+getTokenName(t)+" at "+c.toString()); + if (c.state instanceof RuleStopState) { assert c.context.isEmpty(); - if (fullCtx) { + if (fullCtx || t == IntStream.EOF) { if (skippedStopStates == null) { skippedStopStates = new ArrayList(); } skippedStopStates.add(c); } - else if (t == IntStream.EOF) { - intermediate.add(c, mergeCache); - } continue; } @@ -942,31 +950,71 @@ public class ParserATNSimulator extends ATNSimulator { } } - // Now figure out where the closure can take us, but only if we'll - // need to continue looking for more input. - if ( skippedStopStates == null && intermediate.size()==1 ) { - // Don't pursue the closure if there is just one state. - // It can only have one alternative; just add to result - // Also don't pursue the closure if there is unique alternative - // among the configurations. - reach = new ATNConfigSet(intermediate); + // Now figure out where the reach operation can take us... + + ATNConfigSet reach = null; + + /* This block optimizes the reach operation for intermediate sets which + * trivially indicate a termination state for the overall + * adaptivePredict operation. + * + * The conditions assume that intermediate + * contains all configurations relevant to the reach set, but this + * condition is not true when one or more configurations have been + * withheld in skippedStopStates. + */ + if (skippedStopStates == null) { + if ( intermediate.size()==1 ) { + // Don't pursue the closure if there is just one state. + // It can only have one alternative; just add to result + // Also don't pursue the closure if there is unique alternative + // among the configurations. + reach = intermediate; + } + else if ( getUniqueAlt(intermediate)!=ATN.INVALID_ALT_NUMBER ) { + // Also don't pursue the closure if there is unique alternative + // among the configurations. + reach = intermediate; + } } - else if ( skippedStopStates == null && getUniqueAlt(intermediate)==1 ) { - // Also don't pursue the closure if there is unique alternative - // among the configurations. - reach = new ATNConfigSet(intermediate); - } - else { + + /* If the reach set could not be trivially determined, perform a closure + * operation on the intermediate set to compute its initial value. + */ + if (reach == null) { + reach = new ATNConfigSet(fullCtx); + Set closureBusy = new HashSet(); for (ATNConfig c : intermediate) { closure(c, reach, closureBusy, false, fullCtx); } } if (t == IntStream.EOF) { - reach = removeNonRuleStopStates(reach); + /* After consuming EOF no additional input is possible, so we are + * only interested in configurations which reached the end of the + * decision rule (local context) or end of the start rule (full + * context). Update reach to contain only these configurations. This + * handles both explicit EOF transitions in the grammar and implicit + * EOF transitions following the end of the decision or start rule. + * + * This is handled before the configurations in skippedStopStates, + * because any configurations potentially added from that list are + * already guaranteed to meet this condition whether or not it's + * required. + */ + reach = removeAllConfigsNotInRuleStopState(reach); } - if (skippedStopStates != null && !PredictionMode.hasConfigAtRuleStopState(reach)) { + /* If skippedStopStates is not null, then it contains at least one + * configuration. For full-context reach operations, these + * configurations reached the end of the start rule, in which case we + * only add them back to reach if no configuration during the current + * closure operation reached such a state. This ensures adaptivePredict + * chooses an alternative matching the longest overall sequence when + * multiple alternatives are viable. + */ + if (skippedStopStates != null && (!fullCtx || !PredictionMode.hasConfigInRuleStopState(reach))) { + assert !skippedStopStates.isEmpty(); for (ATNConfig c : skippedStopStates) { reach.add(c, mergeCache); } @@ -987,8 +1035,9 @@ public class ParserATNSimulator extends ATNSimulator { * rule stop state, otherwise return a new configuration set containing only * the configurations from {@code configs} which are in a rule stop state */ - protected ATNConfigSet removeNonRuleStopStates(ATNConfigSet configs) { - if (PredictionMode.onlyRuleStopStates(configs)) { + @NotNull + protected ATNConfigSet removeAllConfigsNotInRuleStopState(@NotNull ATNConfigSet configs) { + if (PredictionMode.allConfigsInRuleStopStates(configs)) { return configs; } diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionMode.java b/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionMode.java index 5179ddbe4..608aef4d1 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionMode.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionMode.java @@ -183,7 +183,12 @@ public enum PredictionMode { predicates. */ public static boolean hasSLLConflictTerminatingPrediction(PredictionMode mode, @NotNull ATNConfigSet configs) { - if (onlyRuleStopStates(configs)) { + /* Configs in rule stop states indicate reaching the end of the decision + * rule (local context) or end of start rule (full context). If all + * configs meet this condition, then none of the configurations is able + * to match additional input so we terminate prediction. + */ + if (allConfigsInRuleStopStates(configs)) { return true; } @@ -222,7 +227,7 @@ public enum PredictionMode { * @return {@code true} if any configuration in {@code configs} is in a * {@link RuleStopState}, otherwise {@code false} */ - public static boolean hasConfigAtRuleStopState(ATNConfigSet configs) { + public static boolean hasConfigInRuleStopState(ATNConfigSet configs) { for (ATNConfig c : configs) { if (c.state instanceof RuleStopState) { return true; @@ -242,7 +247,7 @@ public enum PredictionMode { * @return {@code true} if all configurations in {@code configs} are in a * {@link RuleStopState}, otherwise {@code false} */ - public static boolean onlyRuleStopStates(@NotNull ATNConfigSet configs) { + public static boolean allConfigsInRuleStopStates(@NotNull ATNConfigSet configs) { for (ATNConfig config : configs) { if (!(config.state instanceof RuleStopState)) { return false;