From 53b65c015c9aa3c910c059334040aa6bf34fbd99 Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Sat, 23 Jan 2021 17:04:01 +0800 Subject: [PATCH] fix #3042 in C# runtime --- .gitignore | 1 + runtime/CSharp/Atn/LL1Analyzer.cs | 401 +++++++++---------------- runtime/CSharp/DefaultErrorStrategy.cs | 31 +- 3 files changed, 170 insertions(+), 263 deletions(-) diff --git a/.gitignore b/.gitignore index 81e058979..00cfcd826 100644 --- a/.gitignore +++ b/.gitignore @@ -100,3 +100,4 @@ javac-services.0.log.lck # Don't ignore python tests !runtime/Python3/test/ +Antlr4.sln diff --git a/runtime/CSharp/Atn/LL1Analyzer.cs b/runtime/CSharp/Atn/LL1Analyzer.cs index 06bcebb33..e67933412 100644 --- a/runtime/CSharp/Atn/LL1Analyzer.cs +++ b/runtime/CSharp/Atn/LL1Analyzer.cs @@ -3,7 +3,6 @@ * can be found in the LICENSE.txt file in the project root. */ using System.Collections.Generic; -using Antlr4.Runtime.Atn; using Antlr4.Runtime.Misc; using Antlr4.Runtime.Sharpen; @@ -11,12 +10,9 @@ namespace Antlr4.Runtime.Atn { public class LL1Analyzer { - /// - /// Special value added to the lookahead sets to indicate that we hit - /// a predicate during analysis if - /// seeThruPreds==false - /// . - /// + /** Special value added to the lookahead sets to indicate that we hit + * a predicate during analysis if {@code seeThruPreds==false}. + */ public const int HitPred = TokenConstants.InvalidType; [NotNull] @@ -27,25 +23,16 @@ namespace Antlr4.Runtime.Atn this.atn = atn; } - /// - /// Calculates the SLL(1) expected lookahead set for each outgoing transition - /// of an - /// - /// . The returned array has one element for each - /// outgoing transition in - /// - /// . If the closure from transition - /// i leads to a semantic predicate before matching a symbol, the - /// element at index i of the result will be - /// - /// . - /// - /// the ATN state - /// - /// the expected symbols for each outgoing transition of - /// - /// . - /// + /** + * Calculates the SLL(1) expected lookahead set for each outgoing transition + * of an {@link ATNState}. The returned array has one element for each + * outgoing transition in {@code s}. If the closure from transition + * i leads to a semantic predicate before matching a symbol, the + * element at index i of the result will be {@code null}. + * + * @param s the ATN state + * @return the expected symbols for each outgoing transition of {@code s}. + */ [return: Nullable] public virtual IntervalSet[] GetDecisionLookahead(ATNState s) { @@ -61,7 +48,7 @@ namespace Antlr4.Runtime.Atn HashSet lookBusy = new HashSet(); bool seeThruPreds = false; // fail to get lookahead upon pred - Look(s.Transition(alt).target, null, PredictionContext.EMPTY, look[alt], lookBusy, new BitSet(), seeThruPreds, false); + Look_(s.Transition(alt).target, null, PredictionContext.EMPTY, look[alt], lookBusy, new BitSet(), seeThruPreds, false); // Wipe out lookahead for this alternative if we found nothing // or we had a predicate when we !seeThruPreds if (look[alt].Count == 0 || look[alt].Contains(HitPred)) @@ -72,190 +59,88 @@ namespace Antlr4.Runtime.Atn return look; } - /// - /// Compute set of tokens that can follow - /// - /// in the ATN in the - /// specified - /// - /// . - ///

If - /// - /// is - /// - /// and the end of the rule containing - /// - /// is reached, - /// - /// is added to the result set. - /// If - /// - /// is not - /// - /// and the end of the outermost rule is - /// reached, - /// - /// is added to the result set.

- ///
- /// the ATN state - /// - /// the complete parser context, or - /// - /// if the context - /// should be ignored - /// - /// - /// The set of tokens that can follow - /// - /// in the ATN in the - /// specified - /// - /// . - /// + /** + * Compute set of tokens that can follow {@code s} in the ATN in the + * specified {@code ctx}. + * + *

If {@code ctx} is {@code null} and the end of the rule containing + * {@code s} is reached, {@link Token#EPSILON} is added to the result set. + * If {@code ctx} is not {@code null} and the end of the outermost rule is + * reached, {@link Token#EOF} is added to the result set.

+ * + * @param s the ATN state + * @param ctx the complete parser context, or {@code null} if the context + * should be ignored + * + * @return The set of tokens that can follow {@code s} in the ATN in the + * specified {@code ctx}. + */ [return: NotNull] public virtual IntervalSet Look(ATNState s, RuleContext ctx) { return Look(s, null, ctx); } - /// - /// Compute set of tokens that can follow - /// - /// in the ATN in the - /// specified - /// - /// . - ///

If - /// - /// is - /// - /// and the end of the rule containing - /// - /// is reached, - /// - /// is added to the result set. - /// If - /// - /// is not - /// PredictionContext#EMPTY_LOCAL - /// and the end of the outermost rule is - /// reached, - /// - /// is added to the result set.

- ///
- /// the ATN state - /// - /// the ATN state to stop at. This can be a - /// - /// to detect epsilon paths through a closure. - /// - /// - /// the complete parser context, or - /// - /// if the context - /// should be ignored - /// - /// - /// The set of tokens that can follow - /// - /// in the ATN in the - /// specified - /// - /// . - /// + /** + * Compute set of tokens that can follow {@code s} in the ATN in the + * specified {@code ctx}. + * + *

If {@code ctx} is {@code null} and the end of the rule containing + * {@code s} is reached, {@link Token#EPSILON} is added to the result set. + * If {@code ctx} is not {@code null} and the end of the outermost rule is + * reached, {@link Token#EOF} is added to the result set.

+ * + * @param s the ATN state + * @param stopState the ATN state to stop at. This can be a + * {@link BlockEndState} to detect epsilon paths through a closure. + * @param ctx the complete parser context, or {@code null} if the context + * should be ignored + * + * @return The set of tokens that can follow {@code s} in the ATN in the + * specified {@code ctx}. + */ [return: NotNull] public virtual IntervalSet Look(ATNState s, ATNState stopState, RuleContext ctx) { IntervalSet r = new IntervalSet(); bool seeThruPreds = true; - PredictionContext lookContext = ctx != null ? PredictionContext.FromRuleContext(s.atn, ctx) : null; - Look(s, stopState, lookContext, r, new HashSet(), new BitSet(), seeThruPreds, true); + PredictionContext lookContext = ctx != null ? PredictionContext.FromRuleContext(s.atn, ctx) : null; + Look_(s, stopState, lookContext, r, new HashSet(), new BitSet(), seeThruPreds, true); return r; } - /// - /// Compute set of tokens that can follow - /// - /// in the ATN in the - /// specified - /// - /// . - ///

- /// If - /// - /// is - /// - /// and - /// - /// or the end of the rule containing - /// - /// is reached, - /// - /// is added to the result set. If - /// - /// is not - /// - /// and - /// - /// is - /// - /// and - /// - /// or the end of the outermost rule is reached, - /// - /// is added to the result set. - ///

- /// the ATN state. - /// - /// the ATN state to stop at. This can be a - /// - /// to detect epsilon paths through a closure. - /// - /// - /// The outer context, or - /// - /// if - /// the outer context should not be used. - /// - /// The result lookahead set. - /// - /// A set used for preventing epsilon closures in the ATN - /// from causing a stack overflow. Outside code should pass - /// new HashSet<ATNConfig> - /// for this argument. - /// - /// - /// A set used for preventing left recursion in the - /// ATN from causing a stack overflow. Outside code should pass - /// new BitSet() - /// for this argument. - /// - /// - /// - /// - /// to true semantic predicates as - /// implicitly - /// - /// and "see through them", otherwise - /// - /// to treat semantic predicates as opaque and add - /// - /// to the - /// result if one is encountered. - /// - /// - /// Add - /// - /// to the result if the end of the - /// outermost context is reached. This parameter has no effect if - /// - /// is - /// - /// . - /// - protected internal virtual void Look(ATNState s, ATNState stopState, PredictionContext ctx, IntervalSet look, HashSet lookBusy, BitSet calledRuleStack, bool seeThruPreds, bool addEOF) + /** + * Compute set of tokens that can follow {@code s} in the ATN in the + * specified {@code ctx}. + * + *

If {@code ctx} is {@code null} and {@code stopState} or the end of the + * rule containing {@code s} is reached, {@link Token#EPSILON} is added to + * the result set. If {@code ctx} is not {@code null} and {@code addEOF} is + * {@code true} and {@code stopState} or the end of the outermost rule is + * reached, {@link Token#EOF} is added to the result set.

+ * + * @param s the ATN state. + * @param stopState the ATN state to stop at. This can be a + * {@link BlockEndState} to detect epsilon paths through a closure. + * @param ctx The outer context, or {@code null} if the outer context should + * not be used. + * @param look The result lookahead set. + * @param lookBusy A set used for preventing epsilon closures in the ATN + * from causing a stack overflow. Outside code should pass + * {@code new HashSet} for this argument. + * @param calledRuleStack A set used for preventing left recursion in the + * ATN from causing a stack overflow. Outside code should pass + * {@code new BitSet()} for this argument. + * @param seeThruPreds {@code true} to true semantic predicates as + * implicitly {@code true} and "see through them", otherwise {@code false} + * to treat semantic predicates as opaque and add {@link #HIT_PRED} to the + * result if one is encountered. + * @param addEOF Add {@link Token#EOF} to the result if the end of the + * outermost context is reached. This parameter has no effect if {@code ctx} + * is {@code null}. + */ + protected internal virtual void Look_(ATNState s, ATNState stopState, PredictionContext ctx, IntervalSet look, HashSet lookBusy, BitSet calledRuleStack, bool seeThruPreds, bool addEOF) { - // System.out.println("_LOOK("+s.stateNumber+", ctx="+ctx); ATNConfig c = new ATNConfig(s, 0, ctx); if (!lookBusy.Add(c)) { @@ -268,50 +153,51 @@ namespace Antlr4.Runtime.Atn look.Add(TokenConstants.EPSILON); return; } - else if (ctx.IsEmpty && addEOF) { - look.Add(TokenConstants.EOF); - return; - } - } - if (s is RuleStopState) - { - if (ctx == null) - { - look.Add(TokenConstants.EPSILON); - return; - } else if (ctx.IsEmpty && addEOF) { look.Add(TokenConstants.EOF); return; } - if (ctx != PredictionContext.EMPTY) - { - for (int i = 0; i < ctx.Size; i++) - { - ATNState returnState = atn.states[ctx.GetReturnState(i)]; - bool removed = calledRuleStack.Get(returnState.ruleIndex); - try - { - calledRuleStack.Clear(returnState.ruleIndex); - Look(returnState, stopState, ctx.GetParent(i), look, lookBusy, calledRuleStack, seeThruPreds, addEOF); - } - finally - { - if (removed) - { - calledRuleStack.Set(returnState.ruleIndex); - } - } - } - return; - } + } + if (s is RuleStopState) + { + if (ctx == null) + { + look.Add(TokenConstants.EPSILON); + return; + } + else if (ctx.IsEmpty && addEOF) + { + look.Add(TokenConstants.EOF); + return; + } + if (ctx != PredictionContext.EMPTY) + { + bool removed = calledRuleStack.Get(s.ruleIndex); + try + { + calledRuleStack.Clear(s.ruleIndex); + for (int i = 0; i < ctx.Size; i++) + { + ATNState returnState = atn.states[ctx.GetReturnState(i)]; + Look_(returnState, stopState, ctx.GetParent(i), look, lookBusy, calledRuleStack, seeThruPreds, addEOF); + } + } + finally + { + if (removed) + { + calledRuleStack.Set(s.ruleIndex); + } + } + return; + } } int n = s.NumberOfTransitions; for (int i_1 = 0; i_1 < n; i_1++) { Transition t = s.Transition(i_1); - if (t is RuleTransition) + if (t.GetType() == typeof(RuleTransition)) { RuleTransition ruleTransition = (RuleTransition)t; if (calledRuleStack.Get(ruleTransition.ruleIndex)) @@ -322,51 +208,42 @@ namespace Antlr4.Runtime.Atn try { calledRuleStack.Set(ruleTransition.target.ruleIndex); - Look(t.target, stopState, newContext, look, lookBusy, calledRuleStack, seeThruPreds, addEOF); + Look_(t.target, stopState, newContext, look, lookBusy, calledRuleStack, seeThruPreds, addEOF); } finally { calledRuleStack.Clear(ruleTransition.target.ruleIndex); } } - else + else if (t is AbstractPredicateTransition) { - if (t is AbstractPredicateTransition) + if (seeThruPreds) { - if (seeThruPreds) - { - Look(t.target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF); - } - else - { - look.Add(HitPred); - } + Look_(t.target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF); } else { - if (t.IsEpsilon) + look.Add(HitPred); + } + } + else if (t.IsEpsilon) + { + Look_(t.target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF); + } + else if (t.GetType() == typeof(WildcardTransition)) + { + look.AddAll(IntervalSet.Of(TokenConstants.MinUserTokenType, atn.maxTokenType)); + } + else + { + IntervalSet set = t.Label; + if (set != null) + { + if (t is NotSetTransition) { - Look(t.target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF); - } - else - { - if (t is WildcardTransition) - { - look.AddAll(IntervalSet.Of(TokenConstants.MinUserTokenType, atn.maxTokenType)); - } - else - { - IntervalSet set = t.Label; - if (set != null) - { - if (t is NotSetTransition) - { - set = set.Complement(IntervalSet.Of(TokenConstants.MinUserTokenType, atn.maxTokenType)); - } - look.AddAll(set); - } - } + set = set.Complement(IntervalSet.Of(TokenConstants.MinUserTokenType, atn.maxTokenType)); } + look.AddAll(set); } } } diff --git a/runtime/CSharp/DefaultErrorStrategy.cs b/runtime/CSharp/DefaultErrorStrategy.cs index 5dee39cdd..f208c610f 100644 --- a/runtime/CSharp/DefaultErrorStrategy.cs +++ b/runtime/CSharp/DefaultErrorStrategy.cs @@ -42,6 +42,21 @@ namespace Antlr4.Runtime protected internal IntervalSet lastErrorStates; + /** + * This field is used to propagate information about the lookahead following + * the previous match. Since prediction prefers completing the current rule + * to error recovery efforts, error reporting may occur later than the + * original point where it was discoverable. The original context is used to + * compute the true expected sets as though the reporting occurred as early + * as possible. + */ + protected ParserRuleContext nextTokensContext; + + /** + * @see #nextTokensContext + */ + protected int nextTokensState; + /// /// ///

The default implementation simply calls @@ -264,8 +279,22 @@ namespace Antlr4.Runtime int la = tokens.LA(1); // try cheaper subset first; might get lucky. seems to shave a wee bit off var nextTokens = recognizer.Atn.NextTokens(s); - if (nextTokens.Contains(TokenConstants.EPSILON) || nextTokens.Contains(la)) + if (nextTokens.Contains(la)) { + nextTokensContext = null; + nextTokensState = ATNState.InvalidStateNumber; + return; + } + + if (nextTokens.Contains(TokenConstants.EPSILON)) + { + if (nextTokensContext == null) + { + // It's possible the next token won't match; information tracked + // by sync is restricted for performance. + nextTokensContext = recognizer.Context; + nextTokensState = recognizer.State; + } return; } switch (s.StateType)