fix #3042 in C# runtime

This commit is contained in:
Eric Vergnaud 2021-01-23 17:04:01 +08:00
parent 107f40c63c
commit 53b65c015c
3 changed files with 170 additions and 263 deletions

1
.gitignore vendored
View File

@ -100,3 +100,4 @@ javac-services.0.log.lck
# Don't ignore python tests
!runtime/Python3/test/
Antlr4.sln

View File

@ -3,7 +3,6 @@
* can be found in the LICENSE.txt file in the project root.
*/
using System.Collections.Generic;
using Antlr4.Runtime.Atn;
using Antlr4.Runtime.Misc;
using Antlr4.Runtime.Sharpen;
@ -11,12 +10,9 @@ namespace Antlr4.Runtime.Atn
{
public class LL1Analyzer
{
/// <summary>
/// Special value added to the lookahead sets to indicate that we hit
/// a predicate during analysis if
/// <c>seeThruPreds==false</c>
/// .
/// </summary>
/** Special value added to the lookahead sets to indicate that we hit
* a predicate during analysis if {@code seeThruPreds==false}.
*/
public const int HitPred = TokenConstants.InvalidType;
[NotNull]
@ -27,25 +23,16 @@ namespace Antlr4.Runtime.Atn
this.atn = atn;
}
/// <summary>
/// Calculates the SLL(1) expected lookahead set for each outgoing transition
/// of an
/// <see cref="ATNState"/>
/// . The returned array has one element for each
/// outgoing transition in
/// <paramref name="s"/>
/// . If the closure from transition
/// <em>i</em> leads to a semantic predicate before matching a symbol, the
/// element at index <em>i</em> of the result will be
/// <see langword="null"/>
/// .
/// </summary>
/// <param name="s">the ATN state</param>
/// <returns>
/// the expected symbols for each outgoing transition of
/// <paramref name="s"/>
/// .
/// </returns>
/**
* Calculates the SLL(1) expected lookahead set for each outgoing transition
* of an {@link ATNState}. The returned array has one element for each
* outgoing transition in {@code s}. If the closure from transition
* <em>i</em> leads to a semantic predicate before matching a symbol, the
* element at index <em>i</em> of the result will be {@code null}.
*
* @param s the ATN state
* @return the expected symbols for each outgoing transition of {@code s}.
*/
[return: Nullable]
public virtual IntervalSet[] GetDecisionLookahead(ATNState s)
{
@ -61,7 +48,7 @@ namespace Antlr4.Runtime.Atn
HashSet<ATNConfig> lookBusy = new HashSet<ATNConfig>();
bool seeThruPreds = false;
// fail to get lookahead upon pred
Look(s.Transition(alt).target, null, PredictionContext.EMPTY, look[alt], lookBusy, new BitSet(), seeThruPreds, false);
Look_(s.Transition(alt).target, null, PredictionContext.EMPTY, look[alt], lookBusy, new BitSet(), seeThruPreds, false);
// Wipe out lookahead for this alternative if we found nothing
// or we had a predicate when we !seeThruPreds
if (look[alt].Count == 0 || look[alt].Contains(HitPred))
@ -72,190 +59,88 @@ namespace Antlr4.Runtime.Atn
return look;
}
/// <summary>
/// Compute set of tokens that can follow
/// <paramref name="s"/>
/// in the ATN in the
/// specified
/// <paramref name="ctx"/>
/// .
/// <p>If
/// <paramref name="ctx"/>
/// is
/// <see langword="null"/>
/// and the end of the rule containing
/// <paramref name="s"/>
/// is reached,
/// <see cref="TokenConstants.EPSILON"/>
/// is added to the result set.
/// If
/// <paramref name="ctx"/>
/// is not
/// <see langword="null"/>
/// and the end of the outermost rule is
/// reached,
/// <see cref="TokenConstants.EOF"/>
/// is added to the result set.</p>
/// </summary>
/// <param name="s">the ATN state</param>
/// <param name="ctx">
/// the complete parser context, or
/// <see langword="null"/>
/// if the context
/// should be ignored
/// </param>
/// <returns>
/// The set of tokens that can follow
/// <paramref name="s"/>
/// in the ATN in the
/// specified
/// <paramref name="ctx"/>
/// .
/// </returns>
/**
* Compute set of tokens that can follow {@code s} in the ATN in the
* specified {@code ctx}.
*
* <p>If {@code ctx} is {@code null} and the end of the rule containing
* {@code s} is reached, {@link Token#EPSILON} is added to the result set.
* If {@code ctx} is not {@code null} and the end of the outermost rule is
* reached, {@link Token#EOF} is added to the result set.</p>
*
* @param s the ATN state
* @param ctx the complete parser context, or {@code null} if the context
* should be ignored
*
* @return The set of tokens that can follow {@code s} in the ATN in the
* specified {@code ctx}.
*/
[return: NotNull]
public virtual IntervalSet Look(ATNState s, RuleContext ctx)
{
return Look(s, null, ctx);
}
/// <summary>
/// Compute set of tokens that can follow
/// <paramref name="s"/>
/// in the ATN in the
/// specified
/// <paramref name="ctx"/>
/// .
/// <p>If
/// <paramref name="ctx"/>
/// is
/// <see langword="null"/>
/// and the end of the rule containing
/// <paramref name="s"/>
/// is reached,
/// <see cref="TokenConstants.EPSILON"/>
/// is added to the result set.
/// If
/// <paramref name="ctx"/>
/// is not
/// <c>PredictionContext#EMPTY_LOCAL</c>
/// and the end of the outermost rule is
/// reached,
/// <see cref="TokenConstants.EOF"/>
/// is added to the result set.</p>
/// </summary>
/// <param name="s">the ATN state</param>
/// <param name="stopState">
/// the ATN state to stop at. This can be a
/// <see cref="BlockEndState"/>
/// to detect epsilon paths through a closure.
/// </param>
/// <param name="ctx">
/// the complete parser context, or
/// <see langword="null"/>
/// if the context
/// should be ignored
/// </param>
/// <returns>
/// The set of tokens that can follow
/// <paramref name="s"/>
/// in the ATN in the
/// specified
/// <paramref name="ctx"/>
/// .
/// </returns>
/**
* Compute set of tokens that can follow {@code s} in the ATN in the
* specified {@code ctx}.
*
* <p>If {@code ctx} is {@code null} and the end of the rule containing
* {@code s} is reached, {@link Token#EPSILON} is added to the result set.
* If {@code ctx} is not {@code null} and the end of the outermost rule is
* reached, {@link Token#EOF} is added to the result set.</p>
*
* @param s the ATN state
* @param stopState the ATN state to stop at. This can be a
* {@link BlockEndState} to detect epsilon paths through a closure.
* @param ctx the complete parser context, or {@code null} if the context
* should be ignored
*
* @return The set of tokens that can follow {@code s} in the ATN in the
* specified {@code ctx}.
*/
[return: NotNull]
public virtual IntervalSet Look(ATNState s, ATNState stopState, RuleContext ctx)
{
IntervalSet r = new IntervalSet();
bool seeThruPreds = true;
PredictionContext lookContext = ctx != null ? PredictionContext.FromRuleContext(s.atn, ctx) : null;
Look(s, stopState, lookContext, r, new HashSet<ATNConfig>(), new BitSet(), seeThruPreds, true);
PredictionContext lookContext = ctx != null ? PredictionContext.FromRuleContext(s.atn, ctx) : null;
Look_(s, stopState, lookContext, r, new HashSet<ATNConfig>(), new BitSet(), seeThruPreds, true);
return r;
}
/// <summary>
/// Compute set of tokens that can follow
/// <paramref name="s"/>
/// in the ATN in the
/// specified
/// <paramref name="ctx"/>
/// .
/// <p/>
/// If
/// <paramref name="ctx"/>
/// is
/// <see cref="PredictionContext.EMPTY"/>
/// and
/// <paramref name="stopState"/>
/// or the end of the rule containing
/// <paramref name="s"/>
/// is reached,
/// <see cref="TokenConstants.EPSILON"/>
/// is added to the result set. If
/// <paramref name="ctx"/>
/// is not
/// <see cref="PredictionContext.EMPTY"/>
/// and
/// <paramref name="addEOF"/>
/// is
/// <see langword="true"/>
/// and
/// <paramref name="stopState"/>
/// or the end of the outermost rule is reached,
/// <see cref="TokenConstants.EOF"/>
/// is added to the result set.
/// </summary>
/// <param name="s">the ATN state.</param>
/// <param name="stopState">
/// the ATN state to stop at. This can be a
/// <see cref="BlockEndState"/>
/// to detect epsilon paths through a closure.
/// </param>
/// <param name="ctx">
/// The outer context, or
/// <see cref="PredictionContext.EMPTY"/>
/// if
/// the outer context should not be used.
/// </param>
/// <param name="look">The result lookahead set.</param>
/// <param name="lookBusy">
/// A set used for preventing epsilon closures in the ATN
/// from causing a stack overflow. Outside code should pass
/// <c>new HashSet&lt;ATNConfig&gt;</c>
/// for this argument.
/// </param>
/// <param name="calledRuleStack">
/// A set used for preventing left recursion in the
/// ATN from causing a stack overflow. Outside code should pass
/// <c>new BitSet()</c>
/// for this argument.
/// </param>
/// <param name="seeThruPreds">
///
/// <see langword="true"/>
/// to true semantic predicates as
/// implicitly
/// <see langword="true"/>
/// and "see through them", otherwise
/// <see langword="false"/>
/// to treat semantic predicates as opaque and add
/// <see cref="HitPred"/>
/// to the
/// result if one is encountered.
/// </param>
/// <param name="addEOF">
/// Add
/// <see cref="TokenConstants.EOF"/>
/// to the result if the end of the
/// outermost context is reached. This parameter has no effect if
/// <paramref name="ctx"/>
/// is
/// <see cref="PredictionContext.EMPTY"/>
/// .
/// </param>
protected internal virtual void Look(ATNState s, ATNState stopState, PredictionContext ctx, IntervalSet look, HashSet<ATNConfig> lookBusy, BitSet calledRuleStack, bool seeThruPreds, bool addEOF)
/**
* Compute set of tokens that can follow {@code s} in the ATN in the
* specified {@code ctx}.
*
* <p>If {@code ctx} is {@code null} and {@code stopState} or the end of the
* rule containing {@code s} is reached, {@link Token#EPSILON} is added to
* the result set. If {@code ctx} is not {@code null} and {@code addEOF} is
* {@code true} and {@code stopState} or the end of the outermost rule is
* reached, {@link Token#EOF} is added to the result set.</p>
*
* @param s the ATN state.
* @param stopState the ATN state to stop at. This can be a
* {@link BlockEndState} to detect epsilon paths through a closure.
* @param ctx The outer context, or {@code null} if the outer context should
* not be used.
* @param look The result lookahead set.
* @param lookBusy A set used for preventing epsilon closures in the ATN
* from causing a stack overflow. Outside code should pass
* {@code new HashSet<ATNConfig>} for this argument.
* @param calledRuleStack A set used for preventing left recursion in the
* ATN from causing a stack overflow. Outside code should pass
* {@code new BitSet()} for this argument.
* @param seeThruPreds {@code true} to true semantic predicates as
* implicitly {@code true} and "see through them", otherwise {@code false}
* to treat semantic predicates as opaque and add {@link #HIT_PRED} to the
* result if one is encountered.
* @param addEOF Add {@link Token#EOF} to the result if the end of the
* outermost context is reached. This parameter has no effect if {@code ctx}
* is {@code null}.
*/
protected internal virtual void Look_(ATNState s, ATNState stopState, PredictionContext ctx, IntervalSet look, HashSet<ATNConfig> lookBusy, BitSet calledRuleStack, bool seeThruPreds, bool addEOF)
{
// System.out.println("_LOOK("+s.stateNumber+", ctx="+ctx);
ATNConfig c = new ATNConfig(s, 0, ctx);
if (!lookBusy.Add(c))
{
@ -268,50 +153,51 @@ namespace Antlr4.Runtime.Atn
look.Add(TokenConstants.EPSILON);
return;
}
else if (ctx.IsEmpty && addEOF) {
look.Add(TokenConstants.EOF);
return;
}
}
if (s is RuleStopState)
{
if (ctx == null)
{
look.Add(TokenConstants.EPSILON);
return;
}
else if (ctx.IsEmpty && addEOF)
{
look.Add(TokenConstants.EOF);
return;
}
if (ctx != PredictionContext.EMPTY)
{
for (int i = 0; i < ctx.Size; i++)
{
ATNState returnState = atn.states[ctx.GetReturnState(i)];
bool removed = calledRuleStack.Get(returnState.ruleIndex);
try
{
calledRuleStack.Clear(returnState.ruleIndex);
Look(returnState, stopState, ctx.GetParent(i), look, lookBusy, calledRuleStack, seeThruPreds, addEOF);
}
finally
{
if (removed)
{
calledRuleStack.Set(returnState.ruleIndex);
}
}
}
return;
}
}
if (s is RuleStopState)
{
if (ctx == null)
{
look.Add(TokenConstants.EPSILON);
return;
}
else if (ctx.IsEmpty && addEOF)
{
look.Add(TokenConstants.EOF);
return;
}
if (ctx != PredictionContext.EMPTY)
{
bool removed = calledRuleStack.Get(s.ruleIndex);
try
{
calledRuleStack.Clear(s.ruleIndex);
for (int i = 0; i < ctx.Size; i++)
{
ATNState returnState = atn.states[ctx.GetReturnState(i)];
Look_(returnState, stopState, ctx.GetParent(i), look, lookBusy, calledRuleStack, seeThruPreds, addEOF);
}
}
finally
{
if (removed)
{
calledRuleStack.Set(s.ruleIndex);
}
}
return;
}
}
int n = s.NumberOfTransitions;
for (int i_1 = 0; i_1 < n; i_1++)
{
Transition t = s.Transition(i_1);
if (t is RuleTransition)
if (t.GetType() == typeof(RuleTransition))
{
RuleTransition ruleTransition = (RuleTransition)t;
if (calledRuleStack.Get(ruleTransition.ruleIndex))
@ -322,51 +208,42 @@ namespace Antlr4.Runtime.Atn
try
{
calledRuleStack.Set(ruleTransition.target.ruleIndex);
Look(t.target, stopState, newContext, look, lookBusy, calledRuleStack, seeThruPreds, addEOF);
Look_(t.target, stopState, newContext, look, lookBusy, calledRuleStack, seeThruPreds, addEOF);
}
finally
{
calledRuleStack.Clear(ruleTransition.target.ruleIndex);
}
}
else
else if (t is AbstractPredicateTransition)
{
if (t is AbstractPredicateTransition)
if (seeThruPreds)
{
if (seeThruPreds)
{
Look(t.target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF);
}
else
{
look.Add(HitPred);
}
Look_(t.target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF);
}
else
{
if (t.IsEpsilon)
look.Add(HitPred);
}
}
else if (t.IsEpsilon)
{
Look_(t.target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF);
}
else if (t.GetType() == typeof(WildcardTransition))
{
look.AddAll(IntervalSet.Of(TokenConstants.MinUserTokenType, atn.maxTokenType));
}
else
{
IntervalSet set = t.Label;
if (set != null)
{
if (t is NotSetTransition)
{
Look(t.target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF);
}
else
{
if (t is WildcardTransition)
{
look.AddAll(IntervalSet.Of(TokenConstants.MinUserTokenType, atn.maxTokenType));
}
else
{
IntervalSet set = t.Label;
if (set != null)
{
if (t is NotSetTransition)
{
set = set.Complement(IntervalSet.Of(TokenConstants.MinUserTokenType, atn.maxTokenType));
}
look.AddAll(set);
}
}
set = set.Complement(IntervalSet.Of(TokenConstants.MinUserTokenType, atn.maxTokenType));
}
look.AddAll(set);
}
}
}

View File

@ -42,6 +42,21 @@ namespace Antlr4.Runtime
protected internal IntervalSet lastErrorStates;
/**
* This field is used to propagate information about the lookahead following
* the previous match. Since prediction prefers completing the current rule
* to error recovery efforts, error reporting may occur later than the
* original point where it was discoverable. The original context is used to
* compute the true expected sets as though the reporting occurred as early
* as possible.
*/
protected ParserRuleContext nextTokensContext;
/**
* @see #nextTokensContext
*/
protected int nextTokensState;
/// <summary>
/// <inheritDoc/>
/// <p>The default implementation simply calls
@ -264,8 +279,22 @@ namespace Antlr4.Runtime
int la = tokens.LA(1);
// try cheaper subset first; might get lucky. seems to shave a wee bit off
var nextTokens = recognizer.Atn.NextTokens(s);
if (nextTokens.Contains(TokenConstants.EPSILON) || nextTokens.Contains(la))
if (nextTokens.Contains(la))
{
nextTokensContext = null;
nextTokensState = ATNState.InvalidStateNumber;
return;
}
if (nextTokens.Contains(TokenConstants.EPSILON))
{
if (nextTokensContext == null)
{
// It's possible the next token won't match; information tracked
// by sync is restricted for performance.
nextTokensContext = recognizer.Context;
nextTokensState = recognizer.State;
}
return;
}
switch (s.StateType)