Updated documentation
This commit is contained in:
@ -32,9 +32,33 @@ package org.antlr.v4.runtime;
import org.antlr.v4.runtime.misc.ParseCancellationException;
/** Bail out of parser at first syntax error. Do this to use it:
* This implementation of {@link ANTLRErrorStrategy} responds to syntax errors
* by immediately canceling the parse operation with a
* {@link ParseCancellationException}. The implementation ensures that the
* {@link ParserRuleContext#exception} field is set for all parse tree nodes
* that were not completed prior to encountering the error.
* <p>{@code myparser.setErrorHandler(new BailErrorStrategy());}</p>
* <p>
* This error strategy is useful in the following scenarios.</p>
* <ul>
* <li><strong>Two-stage parsing:</strong> This error strategy allows the first
* stage of two-stage parsing to immediately terminate if an error is
* encountered, and immediately fall back to the second stage. In addition to
* avoiding wasted work by attempting to recover from errors here, the empty
* implementation of {@link BailErrorStrategy#sync} improves the performance of
* the first stage.</li>
* <li><strong>Silent validation:</strong> When syntax errors are not being
* reported or logged, and the parse result is simply ignored if errors occur,
* the {@link BailErrorStrategy} avoids wasting work on recovering from errors
* when the result will be ignored either way.</li>
* </ul>
* <p>
* {@code myparser.setErrorHandler(new BailErrorStrategy());}</p>
* @see Parser#setErrorHandler(ANTLRErrorStrategy)
public class BailErrorStrategy extends DefaultErrorStrategy {
/** Instead of recovering from exception {@code e}, re-throw it wrapped
@ -37,6 +37,10 @@ import org.antlr.v4.runtime.misc.Nullable;
import java.util.BitSet;
* Provides an empty default implementation of {@link ANTLRErrorListener}. The
* default implementation of each method does nothing, but can be overridden as
* necessary.
* @author Sam Harwell
public class BaseErrorListener implements ANTLRErrorListener {
@ -39,45 +39,53 @@ import java.util.List;
import java.util.Set;
* Buffer all input tokens but do on-demand fetching of new tokens from lexer.
* Useful when the parser or lexer has to set context/mode info before proper
* lexing of future tokens. The ST template parser needs this, for example,
* because it has to constantly flip back and forth between inside/output
* templates. E.g., {@code <names:{hi, <it>}>} has to parse names as part of an
* expression but {@code "hi, <it>"} as a nested template.
* This implementation of {@link TokenStream} loads tokens from a
* {@link TokenSource} on-demand, and places the tokens in a buffer to provide
* access to any previous token by index.
* <p>You can't use this stream if you pass whitespace or other off-channel tokens
* to the parser. The stream can't ignore off-channel tokens.
* ({@link UnbufferedTokenStream} is the same way.) Use
* <p>
* This token stream ignores the value of {@link Token#getChannel}. If your
* parser requires the token stream filter tokens to only those on a particular
* channel, such as {@link Token#DEFAULT_CHANNEL} or
* {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a
* {@link CommonTokenStream}.</p>
public class BufferedTokenStream implements TokenStream {
* The {@link TokenSource} from which tokens for this stream are fetched.
protected TokenSource tokenSource;
* Record every single token pulled from the source so we can reproduce
* chunks of it later. This list captures everything so we can access
* complete input text.
* A collection of all tokens fetched from the token source. The list is
* considered a complete view of the input once {@link #fetchedEOF} is set
* to {@code true}.
protected List<Token> tokens = new ArrayList<Token>(100);
* The index into {@link #tokens} of the current token (next token to
* consume). {@link #tokens}{@code [}{@link #p}{@code ]} should be
* {@link #LT LT(1)}. {@link #p}{@code =-1} indicates need to initialize
* with first token. The constructor doesn't get a token. First call to
* {@link #LT LT(1)} or whatever gets the first token and sets
* {@link #p}{@code =0;}.
* {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be
* {@link #LT LT(1)}.
* <p>This field is set to -1 when the stream is first constructed or when
* {@link #setTokenSource} is called, indicating that the first token has
* not yet been fetched from the token source. For additional information,
* see the documentation of {@link IntStream} for a description of
* Initializing Methods.</p>
protected int p = -1;
* Set to {@code true} when the EOF token is fetched. Do not continue fetching
* tokens after that point, or multiple EOF tokens could end up in the
* {@link #tokens} array.
* Indicates whether the {@link Token#EOF} token has been fetched from
* {@link #tokenSource} and added to {@link #tokens}. This field improves
* performance for the following cases:
* @see #fetch
* <ul>
* <li>{@link #fetch}: The check to prevent adding multiple EOF symbols into
* {@link #tokens} is trivial with this field.</li>
* <ul>
protected boolean fetchedEOF;
@ -36,31 +36,74 @@ import org.antlr.v4.runtime.misc.Pair;
import java.io.Serializable;
public class CommonToken implements WritableToken, Serializable {
* An empty {@link Pair} which is used as the default value of
* {@link #source} for tokens that do not have a source.
protected static final Pair<TokenSource, CharStream> EMPTY_SOURCE =
new Pair<TokenSource, CharStream>(null, null);
* This is the backing field for {@link #getType} and {@link #setType}.
protected int type;
* This is the backing field for {@link #getLine} and {@link #setLine}.
protected int line;
* This is the backing field for {@link #getCharPositionInLine} and
* {@link #setCharPositionInLine}.
protected int charPositionInLine = -1; // set to invalid position
* This is the backing field for {@link #getChannel} and
* {@link #setChannel}.
protected int channel=DEFAULT_CHANNEL;
* This is the backing field for {@link #getTokenSource} and
* {@link #getInputStream}.
* <p>
* These properties share a field to reduce the memory footprint of
* {@link CommonToken}. Tokens created by a {@link CommonTokenFactory} from
* the same source and input stream share a reference to the same
* {@link Pair} containing these values.</p>
protected Pair<TokenSource, CharStream> source;
/** We need to be able to change the text once in a while. If
* this is non-null, then getText should return this. Note that
* start/stop are not affected by changing this.
// TODO: can store these in map in token stream rather than as field here
* This is the backing field for {@link #getText} when the token text is
* explicitly set in the constructor or via {@link #setText}.
* @see #getText()
protected String text;
/** What token number is this from 0..n-1 tokens; < 0 implies invalid index */
* This is the backing field for {@link #getTokenIndex} and
* {@link #setTokenIndex}.
protected int index = -1;
/** The char position into the input buffer where this token starts */
* This is the backing field for {@link #getStartIndex} and
* {@link #setStartIndex}.
protected int start;
/** The char position into the input buffer where this token stops */
* This is the backing field for {@link #getStopIndex} and
* {@link #setStopIndex}.
protected int stop;
* Constructs a new {@link CommonToken} with the specified token type.
* @param type The token type.
public CommonToken(int type) {
this.type = type;
@ -77,6 +120,13 @@ public class CommonToken implements WritableToken, Serializable {
* Constructs a new {@link CommonToken} with the specified token type and
* text.
* @param type The token type.
* @param text The text of the token.
public CommonToken(int type, String text) {
this.type = type;
this.channel = DEFAULT_CHANNEL;
@ -129,10 +179,14 @@ public class CommonToken implements WritableToken, Serializable {
/** Override the text for this token. getText() will return this text
* rather than pulling from the buffer. Note that this does not mean
* that start/stop indexes are not valid. It means that that input
* was converted to a new string in the token object.
* Explicitly set the text for this token. If {code text} is not
* {@code null}, then {@link #getText} will return this value rather than
* extracting the text from the input.
* @param text The explicit text of the token, or {@code null} if the text
* should be obtained from the input along with the start and stop indexes
* of the token.
public void setText(String text) {
@ -33,20 +33,57 @@ package org.antlr.v4.runtime;
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.misc.Pair;
* This default implementation of {@link TokenFactory} creates
* {@link CommonToken} objects.
public class CommonTokenFactory implements TokenFactory<CommonToken> {
* The default {@link CommonTokenFactory} instance.
* <p>
* This token factory does not explicitly copy token text when constructing
* tokens.</p>
public static final TokenFactory<CommonToken> DEFAULT = new CommonTokenFactory();
/** Copy text for token out of input char stream. Useful when input
* stream is unbuffered.
* @see UnbufferedCharStream
* Indicates whether {@link CommonToken#setText} should be called after
* constructing tokens to explicitly set the text. This is useful for cases
* where the input stream might not be able to provide arbitrary substrings
* of text from the input after the lexer creates a token (e.g. the
* implementation of {@link CharStream#getText} in
* {@link UnbufferedCharStream} throws an
* {@link UnsupportedOperationException}). Explicitly setting the token text
* allows {@link Token#getText} to be called at any time regardless of the
* input stream implementation.
* <p>
* The default value is {@code false} to avoid the performance and memory
* overhead of copying text for every token unless explicitly requested.</p>
protected final boolean copyText;
/** Create factory and indicate whether or not the factory copy
* text out of the char stream.
* Constructs a {@link CommonTokenFactory} with the specified value for
* {@link #copyText}.
* <p>
* When {@code copyText} is {@code false}, the {@link #DEFAULT} instance
* should be used instead of constructing a new instance.</p>
* @param copyText The value for {@link #copyText}.
public CommonTokenFactory(boolean copyText) { this.copyText = copyText; }
* Constructs a {@link CommonTokenFactory} with {@link #copyText} set to
* {@code false}.
* <p>
* The {@link #DEFAULT} instance should be used instead of calling this
* directly.</p>
public CommonTokenFactory() { this(false); }
@ -34,8 +34,23 @@ package org.antlr.v4.runtime;
* @author Sam Harwell
public class ConsoleErrorListener extends BaseErrorListener {
* Provides a default instance of {@link ConsoleErrorListener}.
public static final ConsoleErrorListener INSTANCE = new ConsoleErrorListener();
* {@inheritDoc}
* <p>
* This implementation prints messages to {@link System#err} containing the
* values of {@code line}, {@code charPositionInLine}, and {@code msg} using
* the following format.</p>
* <pre>
* line <em>line</em>:<em>charPositionInLine</em> <em>msg</em>
* </pre>
public void syntaxError(Recognizer<?, ?> recognizer,
Object offendingSymbol,
@ -38,13 +38,15 @@ import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
import org.antlr.v4.runtime.misc.Pair;
/** This is the default error handling mechanism for ANTLR parsers
* and tree parsers.
* This is the default implementation of {@link ANTLRErrorStrategy} used for
* error reporting and recovery in ANTLR parsers.
public class DefaultErrorStrategy implements ANTLRErrorStrategy {
/** This is true after we see an error and before having successfully
* matched a token. Prevents generation of more than one error message
* per error.
* Indicates whether the error strategy is currently "recovering from an
* error". This is used to suppress reporting multiple error messages while
* attempting to recover from a detected syntax error.
* @see #inErrorRecoveryMode
@ -31,13 +31,32 @@ package org.antlr.v4.runtime;
import org.antlr.v4.runtime.misc.Nullable;
/** This object is used by the ParserInterpreter and is the same as a regular
* ParserRuleContext except that we need to track the rule index of the
* current context so that we can build parse trees.
* This class extends {@link ParserRuleContext} by allowing the value of
* {@link #getRuleIndex} to be explicitly set for the context.
* <p>
* {@link ParserRuleContext} does not include field storage for the rule index
* since the context classes created by the code generator override the
* {@link #getRuleIndex} method to return the correct value for that context.
* Since the parser interpreter does not use the context classes generated for a
* parser, this class (with slightly more memory overhead per node) is used to
* provide equivalent functionality.</p>
public class InterpreterRuleContext extends ParserRuleContext {
* This is the backing field for {@link #getRuleIndex}.
private final int ruleIndex;
* Constructs a new {@link InterpreterRuleContext} with the specified
* parent, invoking state, and rule index.
* @param parent The parent context.
* @param invokingStateNumber The invoking state number.
* @param ruleIndex The rule index for the current context.
public InterpreterRuleContext(@Nullable ParserRuleContext parent,
int invokingStateNumber,
int ruleIndex)
@ -41,26 +41,69 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
* This enumeration defines the prediction modes available in ANTLR 4 along with
* utility methods for analyzing configuration sets for conflicts and/or
* ambiguities.
public enum PredictionMode {
/** Do only local context prediction (SLL style) and using
* heuristic which almost always works but is much faster
* than precise answer.
* The SLL(*) prediction mode. This prediction mode ignores the current
* parser context when making predictions. This is the fastest prediction
* mode, and provides correct results for many grammars. This prediction
* mode is more powerful than the prediction mode provided by ANTLR 3, but
* may result in syntax errors for grammar and input combinations which are
* not SLL.
* <p>
* When using this prediction mode, the parser will either return a correct
* parse tree (i.e. the same parse tree that would be returned with the
* {@link #LL} prediction mode), or it will report a syntax error. If a
* syntax error is encountered when using the {@link #SLL} prediction mode,
* it may be due to either an actual syntax error in the input or indicate
* that the particular combination of grammar and input requires the more
* powerful {@link #LL} prediction abilities to complete successfully.</p>
* <p>
* This prediction mode does not provide any guarantees for prediction
* behavior for syntactically-incorrect inputs.</p>
/** Full LL(*) that always gets right answer. For speed
* reasons, we terminate the prediction process when we know for
* sure which alt to predict. We don't always know what
* the ambiguity is in this mode.
* The LL(*) prediction mode. This prediction mode allows the current parser
* context to be used for resolving SLL conflicts that occur during
* prediction. This is the fastest prediction mode that guarantees correct
* parse results for all combinations of grammars with syntactically correct
* inputs.
* <p>
* When using this prediction mode, the parser will make correct decisions
* for all syntactically-correct grammar and input combinations. However, in
* cases where the grammar is truly ambiguous this prediction mode might not
* report a precise answer for <em>exactly which</em> alternatives are
* ambiguous.</p>
* <p>
* This prediction mode does not provide any guarantees for prediction
* behavior for syntactically-incorrect inputs.</p>
/** Tell the full LL prediction algorithm to pursue lookahead until
* it has uniquely predicted an alternative without conflict or it's
* certain that it's found an ambiguous input sequence. when this
* variable is false. When true, the prediction process will
* continue looking for the exact ambiguous sequence even if
* it has already figured out which alternative to predict.
* The LL(*) prediction mode with exact ambiguity detection. In addition to
* the correctness guarantees provided by the {@link #LL} prediction mode,
* this prediction mode instructs the prediction algorithm to determine the
* complete and exact set of ambiguous alternatives for every ambiguous
* decision encountered while parsing.
* <p>
* This prediction mode may be used for diagnosing ambiguities during
* grammar development. Due to the performance overhead of calculating sets
* of ambiguous alternatives, this prediction mode should be avoided when
* the exact results are not necessary.</p>
* <p>
* This prediction mode does not provide any guarantees for prediction
* behavior for syntactically-incorrect inputs.</p>
@ -77,7 +120,10 @@ public enum PredictionMode {
private AltAndContextConfigEqualityComparator() {
/** Code is function of (s, _, ctx, _) */
* The hash code is only a function of the {@link ATNState#stateNumber}
* and {@link ATNConfig#context}.
public int hashCode(ATNConfig o) {
int hashCode = MurmurHash.initialize(7);
@ -99,7 +145,8 @@ public enum PredictionMode {
* Computes the SLL prediction termination condition.
* <p>This method computes the SLL prediction termination condition for both of
* <p>
* This method computes the SLL prediction termination condition for both of
* the following cases.</p>
* <ul>
@ -53,6 +53,10 @@ import java.util.Set;
* {@link SemanticContext} within the scope of this outer class.</p>
public abstract class SemanticContext {
* The default {@link SemanticContext}, which is semantically equivalent to
* a predicate of the form {@code {true}?}.
public static final SemanticContext NONE = new Predicate();
public SemanticContext parent;
@ -201,6 +205,10 @@ public abstract class SemanticContext {
* A semantic context which is true whenever none of the contained contexts
* is false.
public static class AND extends SemanticContext {
@NotNull public final SemanticContext[] opnds;
@ -234,6 +242,13 @@ public abstract class SemanticContext {
return MurmurHash.hashCode(opnds, AND.class.hashCode());
* {@inheritDoc}
* <p>
* The evaluation of predicates by this context is short-circuiting, but
* unordered.</p>
public boolean eval(Recognizer<?,?> parser, RuleContext outerContext) {
for (SemanticContext opnd : opnds) {
@ -282,6 +297,10 @@ public abstract class SemanticContext {
* A semantic context which is true whenever at least one of the contained
* contexts is true.
public static class OR extends SemanticContext {
@NotNull public final SemanticContext[] opnds;
@ -315,6 +334,13 @@ public abstract class SemanticContext {
return MurmurHash.hashCode(opnds, OR.class.hashCode());
* {@inheritDoc}
* <p>
* The evaluation of predicates by this context is short-circuiting, but
* unordered.</p>
public boolean eval(Recognizer<?,?> parser, RuleContext outerContext) {
for (SemanticContext opnd : opnds) {
Reference in New Issue