Clarify the impact of configurations in rule stop states on the parser prediction termination algorithm

2013-01-03 02:46:24 -06:00 · 2013-01-03 02:46:24 -06:00 · 4010c599ba
parent 2b2114c3ae
commit 4010c599ba
2 changed files with 81 additions and 27 deletions
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/ParserATNSimulator.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ParserATNSimulator.java
@ -909,25 +909,33 @@ public class ParserATNSimulator extends ATNSimulator {
 										   boolean fullCtx)
 	{
 		if ( debug ) System.out.println("in computeReachSet, starting closure: " + closure);
-		ATNConfigSet reach = new ATNConfigSet(fullCtx);
-		Set<ATNConfig> closureBusy = new HashSet<ATNConfig>();
 		ATNConfigSet intermediate = new ATNConfigSet(fullCtx);
+
+		/* Configurations already in a rule stop state indicate reaching the end
+		 * of the decision rule (local context) or end of the start rule (full
+		 * context). Once reached, these configurations are never updated by a
+		 * closure operation, so they are handled separately for the performance
+		 * advantage of having a smaller intermediate set when calling closure.
+		 *
+		 * For full-context reach operations, separate handling is required to
+		 * ensure that the alternative matching the longest overall sequence is
+		 * chosen when multiple such configurations can match the input.
+		 */
 		List<ATNConfig> skippedStopStates = null;
+
 		// First figure out where we can reach on input t
 		for (ATNConfig c : closure) {
 			if ( debug ) System.out.println("testing "+getTokenName(t)+" at "+c.toString());
+
 			if (c.state instanceof RuleStopState) {
 				assert c.context.isEmpty();
-				if (fullCtx) {
+				if (fullCtx || t == IntStream.EOF) {
 					if (skippedStopStates == null) {
 						skippedStopStates = new ArrayList<ATNConfig>();
 					}

 					skippedStopStates.add(c);
 				}
-				else if (t == IntStream.EOF) {
-					intermediate.add(c, mergeCache);
-				}

 				continue;
 			}
@ -942,31 +950,71 @@ public class ParserATNSimulator extends ATNSimulator {
 			}
 		}

-		// Now figure out where the closure can take us, but only if we'll
-		// need to continue looking for more input.
-		if ( skippedStopStates == null && intermediate.size()==1 ) {
-			// Don't pursue the closure if there is just one state.
-			// It can only have one alternative; just add to result
-			// Also don't pursue the closure if there is unique alternative
-			// among the configurations.
-			reach = new ATNConfigSet(intermediate);
+		// Now figure out where the reach operation can take us...
+
+		ATNConfigSet reach = null;
+
+		/* This block optimizes the reach operation for intermediate sets which
+		 * trivially indicate a termination state for the overall
+		 * adaptivePredict operation.
+		 *
+		 * The conditions assume that intermediate
+		 * contains all configurations relevant to the reach set, but this
+		 * condition is not true when one or more configurations have been
+		 * withheld in skippedStopStates.
+		 */
+		if (skippedStopStates == null) {
+			if ( intermediate.size()==1 ) {
+				// Don't pursue the closure if there is just one state.
+				// It can only have one alternative; just add to result
+				// Also don't pursue the closure if there is unique alternative
+				// among the configurations.
+				reach = intermediate;
+			}
+			else if ( getUniqueAlt(intermediate)!=ATN.INVALID_ALT_NUMBER ) {
+				// Also don't pursue the closure if there is unique alternative
+				// among the configurations.
+				reach = intermediate;
+			}
 		}
-		else if ( skippedStopStates == null && getUniqueAlt(intermediate)==1 ) {
-			// Also don't pursue the closure if there is unique alternative
-			// among the configurations.
-			reach = new ATNConfigSet(intermediate);
-		}
-		else {
+
+		/* If the reach set could not be trivially determined, perform a closure
+		 * operation on the intermediate set to compute its initial value.
+		 */
+		if (reach == null) {
+			reach = new ATNConfigSet(fullCtx);
+			Set<ATNConfig> closureBusy = new HashSet<ATNConfig>();
 			for (ATNConfig c : intermediate) {
 				closure(c, reach, closureBusy, false, fullCtx);
 			}
 		}

 		if (t == IntStream.EOF) {
-			reach = removeNonRuleStopStates(reach);
+			/* After consuming EOF no additional input is possible, so we are
+			 * only interested in configurations which reached the end of the
+			 * decision rule (local context) or end of the start rule (full
+			 * context). Update reach to contain only these configurations. This
+			 * handles both explicit EOF transitions in the grammar and implicit
+			 * EOF transitions following the end of the decision or start rule.
+			 *
+			 * This is handled before the configurations in skippedStopStates,
+			 * because any configurations potentially added from that list are
+			 * already guaranteed to meet this condition whether or not it's
+			 * required.
+			 */
+			reach = removeAllConfigsNotInRuleStopState(reach);
 		}

-		if (skippedStopStates != null && !PredictionMode.hasConfigAtRuleStopState(reach)) {
+		/* If skippedStopStates is not null, then it contains at least one
+		 * configuration. For full-context reach operations, these
+		 * configurations reached the end of the start rule, in which case we
+		 * only add them back to reach if no configuration during the current
+		 * closure operation reached such a state. This ensures adaptivePredict
+		 * chooses an alternative matching the longest overall sequence when
+		 * multiple alternatives are viable.
+		 */
+		if (skippedStopStates != null && (!fullCtx || !PredictionMode.hasConfigInRuleStopState(reach))) {
+			assert !skippedStopStates.isEmpty();
 			for (ATNConfig c : skippedStopStates) {
 				reach.add(c, mergeCache);
 			}
@ -987,8 +1035,9 @@ public class ParserATNSimulator extends ATNSimulator {
 	 * rule stop state, otherwise return a new configuration set containing only
 	 * the configurations from {@code configs} which are in a rule stop state
 	 */
-	protected ATNConfigSet removeNonRuleStopStates(ATNConfigSet configs) {
-		if (PredictionMode.onlyRuleStopStates(configs)) {
+	@NotNull
+	protected ATNConfigSet removeAllConfigsNotInRuleStopState(@NotNull ATNConfigSet configs) {
+		if (PredictionMode.allConfigsInRuleStopStates(configs)) {
 			return configs;
 		}

--- a/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionMode.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionMode.java
@ -183,7 +183,12 @@ public enum PredictionMode {
 	 predicates.
 	*/
 	public static boolean hasSLLConflictTerminatingPrediction(PredictionMode mode, @NotNull ATNConfigSet configs) {
-		if (onlyRuleStopStates(configs)) {
+		/* Configs in rule stop states indicate reaching the end of the decision
+		 * rule (local context) or end of start rule (full context). If all
+		 * configs meet this condition, then none of the configurations is able
+		 * to match additional input so we terminate prediction.
+		 */
+		if (allConfigsInRuleStopStates(configs)) {
 			return true;
 		}

@ -222,7 +227,7 @@ public enum PredictionMode {
 	 * @return {@code true} if any configuration in {@code configs} is in a
 	 * {@link RuleStopState}, otherwise {@code false}
 	 */
-	public static boolean hasConfigAtRuleStopState(ATNConfigSet configs) {
+	public static boolean hasConfigInRuleStopState(ATNConfigSet configs) {
 		for (ATNConfig c : configs) {
 			if (c.state instanceof RuleStopState) {
 				return true;
@ -242,7 +247,7 @@ public enum PredictionMode {
 	 * @return {@code true} if all configurations in {@code configs} are in a
 	 * {@link RuleStopState}, otherwise {@code false}
 	 */
-	public static boolean onlyRuleStopStates(@NotNull ATNConfigSet configs) {
+	public static boolean allConfigsInRuleStopStates(@NotNull ATNConfigSet configs) {
 		for (ATNConfig config : configs) {
 			if (!(config.state instanceof RuleStopState)) {
 				return false;