From d5b269b6b6486f416965d7c685dd56370e31a12f Mon Sep 17 00:00:00 2001
From: Sam Harwell
Date: Wed, 22 Jan 2014 21:30:26 -0600
Subject: [PATCH] Updated documentation
---
.../antlr/v4/runtime/BailErrorStrategy.java | 28 ++++++-
.../antlr/v4/runtime/BaseErrorListener.java | 4 +
.../antlr/v4/runtime/BufferedTokenStream.java | 50 +++++++-----
.../src/org/antlr/v4/runtime/CommonToken.java | 78 +++++++++++++++---
.../antlr/v4/runtime/CommonTokenFactory.java | 49 +++++++++--
.../v4/runtime/ConsoleErrorListener.java | 15 ++++
.../v4/runtime/DefaultErrorStrategy.java | 12 +--
.../v4/runtime/InterpreterRuleContext.java | 25 +++++-
.../antlr/v4/runtime/atn/PredictionMode.java | 81 +++++++++++++++----
.../antlr/v4/runtime/atn/SemanticContext.java | 26 ++++++
10 files changed, 302 insertions(+), 66 deletions(-)
diff --git a/runtime/Java/src/org/antlr/v4/runtime/BailErrorStrategy.java b/runtime/Java/src/org/antlr/v4/runtime/BailErrorStrategy.java
index cdf8379bc..f4874dd92 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/BailErrorStrategy.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/BailErrorStrategy.java
@@ -32,9 +32,33 @@ package org.antlr.v4.runtime;
import org.antlr.v4.runtime.misc.ParseCancellationException;
-/** Bail out of parser at first syntax error. Do this to use it:
+/**
+ * This implementation of {@link ANTLRErrorStrategy} responds to syntax errors
+ * by immediately canceling the parse operation with a
+ * {@link ParseCancellationException}. The implementation ensures that the
+ * {@link ParserRuleContext#exception} field is set for all parse tree nodes
+ * that were not completed prior to encountering the error.
*
- * {@code myparser.setErrorHandler(new BailErrorStrategy());}
+ *
+ * This error strategy is useful in the following scenarios.
+ *
+ *
+ * - Two-stage parsing: This error strategy allows the first
+ * stage of two-stage parsing to immediately terminate if an error is
+ * encountered, and immediately fall back to the second stage. In addition to
+ * avoiding wasted work by attempting to recover from errors here, the empty
+ * implementation of {@link BailErrorStrategy#sync} improves the performance of
+ * the first stage.
+ * - Silent validation: When syntax errors are not being
+ * reported or logged, and the parse result is simply ignored if errors occur,
+ * the {@link BailErrorStrategy} avoids wasting work on recovering from errors
+ * when the result will be ignored either way.
+ *
+ *
+ *
+ * {@code myparser.setErrorHandler(new BailErrorStrategy());}
+ *
+ * @see Parser#setErrorHandler(ANTLRErrorStrategy)
*/
public class BailErrorStrategy extends DefaultErrorStrategy {
/** Instead of recovering from exception {@code e}, re-throw it wrapped
diff --git a/runtime/Java/src/org/antlr/v4/runtime/BaseErrorListener.java b/runtime/Java/src/org/antlr/v4/runtime/BaseErrorListener.java
index 180f71d20..7d3e4a182 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/BaseErrorListener.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/BaseErrorListener.java
@@ -37,6 +37,10 @@ import org.antlr.v4.runtime.misc.Nullable;
import java.util.BitSet;
/**
+ * Provides an empty default implementation of {@link ANTLRErrorListener}. The
+ * default implementation of each method does nothing, but can be overridden as
+ * necessary.
+ *
* @author Sam Harwell
*/
public class BaseErrorListener implements ANTLRErrorListener {
diff --git a/runtime/Java/src/org/antlr/v4/runtime/BufferedTokenStream.java b/runtime/Java/src/org/antlr/v4/runtime/BufferedTokenStream.java
index dba0acfd9..9311f6665 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/BufferedTokenStream.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/BufferedTokenStream.java
@@ -39,45 +39,53 @@ import java.util.List;
import java.util.Set;
/**
- * Buffer all input tokens but do on-demand fetching of new tokens from lexer.
- * Useful when the parser or lexer has to set context/mode info before proper
- * lexing of future tokens. The ST template parser needs this, for example,
- * because it has to constantly flip back and forth between inside/output
- * templates. E.g., {@code }>} has to parse names as part of an
- * expression but {@code "hi, "} as a nested template.
+ * This implementation of {@link TokenStream} loads tokens from a
+ * {@link TokenSource} on-demand, and places the tokens in a buffer to provide
+ * access to any previous token by index.
*
- * You can't use this stream if you pass whitespace or other off-channel tokens
- * to the parser. The stream can't ignore off-channel tokens.
- * ({@link UnbufferedTokenStream} is the same way.) Use
+ *
+ * This token stream ignores the value of {@link Token#getChannel}. If your
+ * parser requires the token stream filter tokens to only those on a particular
+ * channel, such as {@link Token#DEFAULT_CHANNEL} or
+ * {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a
* {@link CommonTokenStream}.
*/
public class BufferedTokenStream implements TokenStream {
+ /**
+ * The {@link TokenSource} from which tokens for this stream are fetched.
+ */
@NotNull
protected TokenSource tokenSource;
/**
- * Record every single token pulled from the source so we can reproduce
- * chunks of it later. This list captures everything so we can access
- * complete input text.
+ * A collection of all tokens fetched from the token source. The list is
+ * considered a complete view of the input once {@link #fetchedEOF} is set
+ * to {@code true}.
*/
protected List tokens = new ArrayList(100);
/**
* The index into {@link #tokens} of the current token (next token to
- * consume). {@link #tokens}{@code [}{@link #p}{@code ]} should be
- * {@link #LT LT(1)}. {@link #p}{@code =-1} indicates need to initialize
- * with first token. The constructor doesn't get a token. First call to
- * {@link #LT LT(1)} or whatever gets the first token and sets
- * {@link #p}{@code =0;}.
+ * {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be
+ * {@link #LT LT(1)}.
+ *
+ * This field is set to -1 when the stream is first constructed or when
+ * {@link #setTokenSource} is called, indicating that the first token has
+ * not yet been fetched from the token source. For additional information,
+ * see the documentation of {@link IntStream} for a description of
+ * Initializing Methods.
*/
protected int p = -1;
/**
- * Set to {@code true} when the EOF token is fetched. Do not continue fetching
- * tokens after that point, or multiple EOF tokens could end up in the
- * {@link #tokens} array.
+ * Indicates whether the {@link Token#EOF} token has been fetched from
+ * {@link #tokenSource} and added to {@link #tokens}. This field improves
+ * performance for the following cases:
*
- * @see #fetch
+ *
+ * - {@link #fetch}: The check to prevent adding multiple EOF symbols into
+ * {@link #tokens} is trivial with this field.
+ *
*/
protected boolean fetchedEOF;
diff --git a/runtime/Java/src/org/antlr/v4/runtime/CommonToken.java b/runtime/Java/src/org/antlr/v4/runtime/CommonToken.java
index e316b8155..2ef948f45 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/CommonToken.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/CommonToken.java
@@ -36,31 +36,74 @@ import org.antlr.v4.runtime.misc.Pair;
import java.io.Serializable;
public class CommonToken implements WritableToken, Serializable {
+ /**
+ * An empty {@link Pair} which is used as the default value of
+ * {@link #source} for tokens that do not have a source.
+ */
protected static final Pair EMPTY_SOURCE =
new Pair(null, null);
+ /**
+ * This is the backing field for {@link #getType} and {@link #setType}.
+ */
protected int type;
+ /**
+ * This is the backing field for {@link #getLine} and {@link #setLine}.
+ */
protected int line;
+ /**
+ * This is the backing field for {@link #getCharPositionInLine} and
+ * {@link #setCharPositionInLine}.
+ */
protected int charPositionInLine = -1; // set to invalid position
+ /**
+ * This is the backing field for {@link #getChannel} and
+ * {@link #setChannel}.
+ */
protected int channel=DEFAULT_CHANNEL;
+ /**
+ * This is the backing field for {@link #getTokenSource} and
+ * {@link #getInputStream}.
+ *
+ *
+ * These properties share a field to reduce the memory footprint of
+ * {@link CommonToken}. Tokens created by a {@link CommonTokenFactory} from
+ * the same source and input stream share a reference to the same
+ * {@link Pair} containing these values.
+ */
protected Pair source;
- /** We need to be able to change the text once in a while. If
- * this is non-null, then getText should return this. Note that
- * start/stop are not affected by changing this.
- */
- // TODO: can store these in map in token stream rather than as field here
+ /**
+ * This is the backing field for {@link #getText} when the token text is
+ * explicitly set in the constructor or via {@link #setText}.
+ *
+ * @see #getText()
+ */
protected String text;
- /** What token number is this from 0..n-1 tokens; < 0 implies invalid index */
+ /**
+ * This is the backing field for {@link #getTokenIndex} and
+ * {@link #setTokenIndex}.
+ */
protected int index = -1;
- /** The char position into the input buffer where this token starts */
+ /**
+ * This is the backing field for {@link #getStartIndex} and
+ * {@link #setStartIndex}.
+ */
protected int start;
- /** The char position into the input buffer where this token stops */
+ /**
+ * This is the backing field for {@link #getStopIndex} and
+ * {@link #setStopIndex}.
+ */
protected int stop;
+ /**
+ * Constructs a new {@link CommonToken} with the specified token type.
+ *
+ * @param type The token type.
+ */
public CommonToken(int type) {
this.type = type;
}
@@ -77,6 +120,13 @@ public class CommonToken implements WritableToken, Serializable {
}
}
+ /**
+ * Constructs a new {@link CommonToken} with the specified token type and
+ * text.
+ *
+ * @param type The token type.
+ * @param text The text of the token.
+ */
public CommonToken(int type, String text) {
this.type = type;
this.channel = DEFAULT_CHANNEL;
@@ -129,10 +179,14 @@ public class CommonToken implements WritableToken, Serializable {
}
}
- /** Override the text for this token. getText() will return this text
- * rather than pulling from the buffer. Note that this does not mean
- * that start/stop indexes are not valid. It means that that input
- * was converted to a new string in the token object.
+ /**
+ * Explicitly set the text for this token. If {code text} is not
+ * {@code null}, then {@link #getText} will return this value rather than
+ * extracting the text from the input.
+ *
+ * @param text The explicit text of the token, or {@code null} if the text
+ * should be obtained from the input along with the start and stop indexes
+ * of the token.
*/
@Override
public void setText(String text) {
diff --git a/runtime/Java/src/org/antlr/v4/runtime/CommonTokenFactory.java b/runtime/Java/src/org/antlr/v4/runtime/CommonTokenFactory.java
index b646a5de8..bb764d46b 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/CommonTokenFactory.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/CommonTokenFactory.java
@@ -33,20 +33,57 @@ package org.antlr.v4.runtime;
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.misc.Pair;
+/**
+ * This default implementation of {@link TokenFactory} creates
+ * {@link CommonToken} objects.
+ */
public class CommonTokenFactory implements TokenFactory {
+ /**
+ * The default {@link CommonTokenFactory} instance.
+ *
+ *
+ * This token factory does not explicitly copy token text when constructing
+ * tokens.
+ */
public static final TokenFactory DEFAULT = new CommonTokenFactory();
- /** Copy text for token out of input char stream. Useful when input
- * stream is unbuffered.
- * @see UnbufferedCharStream
- */
+ /**
+ * Indicates whether {@link CommonToken#setText} should be called after
+ * constructing tokens to explicitly set the text. This is useful for cases
+ * where the input stream might not be able to provide arbitrary substrings
+ * of text from the input after the lexer creates a token (e.g. the
+ * implementation of {@link CharStream#getText} in
+ * {@link UnbufferedCharStream} throws an
+ * {@link UnsupportedOperationException}). Explicitly setting the token text
+ * allows {@link Token#getText} to be called at any time regardless of the
+ * input stream implementation.
+ *
+ *
+ * The default value is {@code false} to avoid the performance and memory
+ * overhead of copying text for every token unless explicitly requested.
+ */
protected final boolean copyText;
- /** Create factory and indicate whether or not the factory copy
- * text out of the char stream.
+ /**
+ * Constructs a {@link CommonTokenFactory} with the specified value for
+ * {@link #copyText}.
+ *
+ *
+ * When {@code copyText} is {@code false}, the {@link #DEFAULT} instance
+ * should be used instead of constructing a new instance.
+ *
+ * @param copyText The value for {@link #copyText}.
*/
public CommonTokenFactory(boolean copyText) { this.copyText = copyText; }
+ /**
+ * Constructs a {@link CommonTokenFactory} with {@link #copyText} set to
+ * {@code false}.
+ *
+ *
+ * The {@link #DEFAULT} instance should be used instead of calling this
+ * directly.
+ */
public CommonTokenFactory() { this(false); }
@Override
diff --git a/runtime/Java/src/org/antlr/v4/runtime/ConsoleErrorListener.java b/runtime/Java/src/org/antlr/v4/runtime/ConsoleErrorListener.java
index 0fd3323ce..b33dc2dfc 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/ConsoleErrorListener.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/ConsoleErrorListener.java
@@ -34,8 +34,23 @@ package org.antlr.v4.runtime;
* @author Sam Harwell
*/
public class ConsoleErrorListener extends BaseErrorListener {
+ /**
+ * Provides a default instance of {@link ConsoleErrorListener}.
+ */
public static final ConsoleErrorListener INSTANCE = new ConsoleErrorListener();
+ /**
+ * {@inheritDoc}
+ *
+ *
+ * This implementation prints messages to {@link System#err} containing the
+ * values of {@code line}, {@code charPositionInLine}, and {@code msg} using
+ * the following format.
+ *
+ *
+ * line line:charPositionInLine msg
+ *
+ */
@Override
public void syntaxError(Recognizer, ?> recognizer,
Object offendingSymbol,
diff --git a/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java b/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java
index 79438b485..0ae2d8358 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java
@@ -38,13 +38,15 @@ import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
import org.antlr.v4.runtime.misc.Pair;
-/** This is the default error handling mechanism for ANTLR parsers
- * and tree parsers.
+/**
+ * This is the default implementation of {@link ANTLRErrorStrategy} used for
+ * error reporting and recovery in ANTLR parsers.
*/
public class DefaultErrorStrategy implements ANTLRErrorStrategy {
- /** This is true after we see an error and before having successfully
- * matched a token. Prevents generation of more than one error message
- * per error.
+ /**
+ * Indicates whether the error strategy is currently "recovering from an
+ * error". This is used to suppress reporting multiple error messages while
+ * attempting to recover from a detected syntax error.
*
* @see #inErrorRecoveryMode
*/
diff --git a/runtime/Java/src/org/antlr/v4/runtime/InterpreterRuleContext.java b/runtime/Java/src/org/antlr/v4/runtime/InterpreterRuleContext.java
index 231f4797c..f4359452c 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/InterpreterRuleContext.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/InterpreterRuleContext.java
@@ -31,13 +31,32 @@ package org.antlr.v4.runtime;
import org.antlr.v4.runtime.misc.Nullable;
-/** This object is used by the ParserInterpreter and is the same as a regular
- * ParserRuleContext except that we need to track the rule index of the
- * current context so that we can build parse trees.
+/**
+ * This class extends {@link ParserRuleContext} by allowing the value of
+ * {@link #getRuleIndex} to be explicitly set for the context.
+ *
+ *
+ * {@link ParserRuleContext} does not include field storage for the rule index
+ * since the context classes created by the code generator override the
+ * {@link #getRuleIndex} method to return the correct value for that context.
+ * Since the parser interpreter does not use the context classes generated for a
+ * parser, this class (with slightly more memory overhead per node) is used to
+ * provide equivalent functionality.
*/
public class InterpreterRuleContext extends ParserRuleContext {
+ /**
+ * This is the backing field for {@link #getRuleIndex}.
+ */
private final int ruleIndex;
+ /**
+ * Constructs a new {@link InterpreterRuleContext} with the specified
+ * parent, invoking state, and rule index.
+ *
+ * @param parent The parent context.
+ * @param invokingStateNumber The invoking state number.
+ * @param ruleIndex The rule index for the current context.
+ */
public InterpreterRuleContext(@Nullable ParserRuleContext parent,
int invokingStateNumber,
int ruleIndex)
diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionMode.java b/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionMode.java
index 20a4673fb..15b25f4d0 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionMode.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionMode.java
@@ -41,26 +41,69 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
+/**
+ * This enumeration defines the prediction modes available in ANTLR 4 along with
+ * utility methods for analyzing configuration sets for conflicts and/or
+ * ambiguities.
+ */
public enum PredictionMode {
- /** Do only local context prediction (SLL style) and using
- * heuristic which almost always works but is much faster
- * than precise answer.
+ /**
+ * The SLL(*) prediction mode. This prediction mode ignores the current
+ * parser context when making predictions. This is the fastest prediction
+ * mode, and provides correct results for many grammars. This prediction
+ * mode is more powerful than the prediction mode provided by ANTLR 3, but
+ * may result in syntax errors for grammar and input combinations which are
+ * not SLL.
+ *
+ *
+ * When using this prediction mode, the parser will either return a correct
+ * parse tree (i.e. the same parse tree that would be returned with the
+ * {@link #LL} prediction mode), or it will report a syntax error. If a
+ * syntax error is encountered when using the {@link #SLL} prediction mode,
+ * it may be due to either an actual syntax error in the input or indicate
+ * that the particular combination of grammar and input requires the more
+ * powerful {@link #LL} prediction abilities to complete successfully.
+ *
+ *
+ * This prediction mode does not provide any guarantees for prediction
+ * behavior for syntactically-incorrect inputs.
*/
SLL,
-
- /** Full LL(*) that always gets right answer. For speed
- * reasons, we terminate the prediction process when we know for
- * sure which alt to predict. We don't always know what
- * the ambiguity is in this mode.
+ /**
+ * The LL(*) prediction mode. This prediction mode allows the current parser
+ * context to be used for resolving SLL conflicts that occur during
+ * prediction. This is the fastest prediction mode that guarantees correct
+ * parse results for all combinations of grammars with syntactically correct
+ * inputs.
+ *
+ *
+ * When using this prediction mode, the parser will make correct decisions
+ * for all syntactically-correct grammar and input combinations. However, in
+ * cases where the grammar is truly ambiguous this prediction mode might not
+ * report a precise answer for exactly which alternatives are
+ * ambiguous.
+ *
+ *
+ * This prediction mode does not provide any guarantees for prediction
+ * behavior for syntactically-incorrect inputs.
*/
LL,
-
- /** Tell the full LL prediction algorithm to pursue lookahead until
- * it has uniquely predicted an alternative without conflict or it's
- * certain that it's found an ambiguous input sequence. when this
- * variable is false. When true, the prediction process will
- * continue looking for the exact ambiguous sequence even if
- * it has already figured out which alternative to predict.
+ /**
+ * The LL(*) prediction mode with exact ambiguity detection. In addition to
+ * the correctness guarantees provided by the {@link #LL} prediction mode,
+ * this prediction mode instructs the prediction algorithm to determine the
+ * complete and exact set of ambiguous alternatives for every ambiguous
+ * decision encountered while parsing.
+ *
+ *
+ * This prediction mode may be used for diagnosing ambiguities during
+ * grammar development. Due to the performance overhead of calculating sets
+ * of ambiguous alternatives, this prediction mode should be avoided when
+ * the exact results are not necessary.
+ *
+ *
+ * This prediction mode does not provide any guarantees for prediction
+ * behavior for syntactically-incorrect inputs.
*/
LL_EXACT_AMBIG_DETECTION;
@@ -77,7 +120,10 @@ public enum PredictionMode {
private AltAndContextConfigEqualityComparator() {
}
- /** Code is function of (s, _, ctx, _) */
+ /**
+ * The hash code is only a function of the {@link ATNState#stateNumber}
+ * and {@link ATNConfig#context}.
+ */
@Override
public int hashCode(ATNConfig o) {
int hashCode = MurmurHash.initialize(7);
@@ -99,7 +145,8 @@ public enum PredictionMode {
/**
* Computes the SLL prediction termination condition.
*
- * This method computes the SLL prediction termination condition for both of
+ *
+ * This method computes the SLL prediction termination condition for both of
* the following cases.
*
*
diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/SemanticContext.java b/runtime/Java/src/org/antlr/v4/runtime/atn/SemanticContext.java
index 6362ee57a..4c3613773 100644
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/SemanticContext.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/SemanticContext.java
@@ -53,6 +53,10 @@ import java.util.Set;
* {@link SemanticContext} within the scope of this outer class.
*/
public abstract class SemanticContext {
+ /**
+ * The default {@link SemanticContext}, which is semantically equivalent to
+ * a predicate of the form {@code {true}?}.
+ */
public static final SemanticContext NONE = new Predicate();
public SemanticContext parent;
@@ -201,6 +205,10 @@ public abstract class SemanticContext {
}
}
+ /**
+ * A semantic context which is true whenever none of the contained contexts
+ * is false.
+ */
public static class AND extends SemanticContext {
@NotNull public final SemanticContext[] opnds;
@@ -234,6 +242,13 @@ public abstract class SemanticContext {
return MurmurHash.hashCode(opnds, AND.class.hashCode());
}
+ /**
+ * {@inheritDoc}
+ *
+ *
+ * The evaluation of predicates by this context is short-circuiting, but
+ * unordered.
+ */
@Override
public boolean eval(Recognizer,?> parser, RuleContext outerContext) {
for (SemanticContext opnd : opnds) {
@@ -282,6 +297,10 @@ public abstract class SemanticContext {
}
}
+ /**
+ * A semantic context which is true whenever at least one of the contained
+ * contexts is true.
+ */
public static class OR extends SemanticContext {
@NotNull public final SemanticContext[] opnds;
@@ -315,6 +334,13 @@ public abstract class SemanticContext {
return MurmurHash.hashCode(opnds, OR.class.hashCode());
}
+ /**
+ * {@inheritDoc}
+ *
+ *
+ * The evaluation of predicates by this context is short-circuiting, but
+ * unordered.
+ */
@Override
public boolean eval(Recognizer,?> parser, RuleContext outerContext) {
for (SemanticContext opnd : opnds) {