From d5b269b6b6486f416965d7c685dd56370e31a12f Mon Sep 17 00:00:00 2001 From: Sam Harwell Date: Wed, 22 Jan 2014 21:30:26 -0600 Subject: [PATCH] Updated documentation --- .../antlr/v4/runtime/BailErrorStrategy.java | 28 ++++++- .../antlr/v4/runtime/BaseErrorListener.java | 4 + .../antlr/v4/runtime/BufferedTokenStream.java | 50 +++++++----- .../src/org/antlr/v4/runtime/CommonToken.java | 78 +++++++++++++++--- .../antlr/v4/runtime/CommonTokenFactory.java | 49 +++++++++-- .../v4/runtime/ConsoleErrorListener.java | 15 ++++ .../v4/runtime/DefaultErrorStrategy.java | 12 +-- .../v4/runtime/InterpreterRuleContext.java | 25 +++++- .../antlr/v4/runtime/atn/PredictionMode.java | 81 +++++++++++++++---- .../antlr/v4/runtime/atn/SemanticContext.java | 26 ++++++ 10 files changed, 302 insertions(+), 66 deletions(-) diff --git a/runtime/Java/src/org/antlr/v4/runtime/BailErrorStrategy.java b/runtime/Java/src/org/antlr/v4/runtime/BailErrorStrategy.java index cdf8379bc..f4874dd92 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/BailErrorStrategy.java +++ b/runtime/Java/src/org/antlr/v4/runtime/BailErrorStrategy.java @@ -32,9 +32,33 @@ package org.antlr.v4.runtime; import org.antlr.v4.runtime.misc.ParseCancellationException; -/** Bail out of parser at first syntax error. Do this to use it: +/** + * This implementation of {@link ANTLRErrorStrategy} responds to syntax errors + * by immediately canceling the parse operation with a + * {@link ParseCancellationException}. The implementation ensures that the + * {@link ParserRuleContext#exception} field is set for all parse tree nodes + * that were not completed prior to encountering the error. * - *

{@code myparser.setErrorHandler(new BailErrorStrategy());}

+ *

+ * This error strategy is useful in the following scenarios.

+ * + * + * + *

+ * {@code myparser.setErrorHandler(new BailErrorStrategy());}

+ * + * @see Parser#setErrorHandler(ANTLRErrorStrategy) */ public class BailErrorStrategy extends DefaultErrorStrategy { /** Instead of recovering from exception {@code e}, re-throw it wrapped diff --git a/runtime/Java/src/org/antlr/v4/runtime/BaseErrorListener.java b/runtime/Java/src/org/antlr/v4/runtime/BaseErrorListener.java index 180f71d20..7d3e4a182 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/BaseErrorListener.java +++ b/runtime/Java/src/org/antlr/v4/runtime/BaseErrorListener.java @@ -37,6 +37,10 @@ import org.antlr.v4.runtime.misc.Nullable; import java.util.BitSet; /** + * Provides an empty default implementation of {@link ANTLRErrorListener}. The + * default implementation of each method does nothing, but can be overridden as + * necessary. + * * @author Sam Harwell */ public class BaseErrorListener implements ANTLRErrorListener { diff --git a/runtime/Java/src/org/antlr/v4/runtime/BufferedTokenStream.java b/runtime/Java/src/org/antlr/v4/runtime/BufferedTokenStream.java index dba0acfd9..9311f6665 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/BufferedTokenStream.java +++ b/runtime/Java/src/org/antlr/v4/runtime/BufferedTokenStream.java @@ -39,45 +39,53 @@ import java.util.List; import java.util.Set; /** - * Buffer all input tokens but do on-demand fetching of new tokens from lexer. - * Useful when the parser or lexer has to set context/mode info before proper - * lexing of future tokens. The ST template parser needs this, for example, - * because it has to constantly flip back and forth between inside/output - * templates. E.g., {@code }>} has to parse names as part of an - * expression but {@code "hi, "} as a nested template. + * This implementation of {@link TokenStream} loads tokens from a + * {@link TokenSource} on-demand, and places the tokens in a buffer to provide + * access to any previous token by index. * - *

You can't use this stream if you pass whitespace or other off-channel tokens - * to the parser. The stream can't ignore off-channel tokens. - * ({@link UnbufferedTokenStream} is the same way.) Use + *

+ * This token stream ignores the value of {@link Token#getChannel}. If your + * parser requires the token stream filter tokens to only those on a particular + * channel, such as {@link Token#DEFAULT_CHANNEL} or + * {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a * {@link CommonTokenStream}.

*/ public class BufferedTokenStream implements TokenStream { + /** + * The {@link TokenSource} from which tokens for this stream are fetched. + */ @NotNull protected TokenSource tokenSource; /** - * Record every single token pulled from the source so we can reproduce - * chunks of it later. This list captures everything so we can access - * complete input text. + * A collection of all tokens fetched from the token source. The list is + * considered a complete view of the input once {@link #fetchedEOF} is set + * to {@code true}. */ protected List tokens = new ArrayList(100); /** * The index into {@link #tokens} of the current token (next token to - * consume). {@link #tokens}{@code [}{@link #p}{@code ]} should be - * {@link #LT LT(1)}. {@link #p}{@code =-1} indicates need to initialize - * with first token. The constructor doesn't get a token. First call to - * {@link #LT LT(1)} or whatever gets the first token and sets - * {@link #p}{@code =0;}. + * {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be + * {@link #LT LT(1)}. + * + *

This field is set to -1 when the stream is first constructed or when + * {@link #setTokenSource} is called, indicating that the first token has + * not yet been fetched from the token source. For additional information, + * see the documentation of {@link IntStream} for a description of + * Initializing Methods.

*/ protected int p = -1; /** - * Set to {@code true} when the EOF token is fetched. Do not continue fetching - * tokens after that point, or multiple EOF tokens could end up in the - * {@link #tokens} array. + * Indicates whether the {@link Token#EOF} token has been fetched from + * {@link #tokenSource} and added to {@link #tokens}. This field improves + * performance for the following cases: * - * @see #fetch + *
    + *
  • {@link #fetch}: The check to prevent adding multiple EOF symbols into + * {@link #tokens} is trivial with this field.
  • + *
      */ protected boolean fetchedEOF; diff --git a/runtime/Java/src/org/antlr/v4/runtime/CommonToken.java b/runtime/Java/src/org/antlr/v4/runtime/CommonToken.java index e316b8155..2ef948f45 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/CommonToken.java +++ b/runtime/Java/src/org/antlr/v4/runtime/CommonToken.java @@ -36,31 +36,74 @@ import org.antlr.v4.runtime.misc.Pair; import java.io.Serializable; public class CommonToken implements WritableToken, Serializable { + /** + * An empty {@link Pair} which is used as the default value of + * {@link #source} for tokens that do not have a source. + */ protected static final Pair EMPTY_SOURCE = new Pair(null, null); + /** + * This is the backing field for {@link #getType} and {@link #setType}. + */ protected int type; + /** + * This is the backing field for {@link #getLine} and {@link #setLine}. + */ protected int line; + /** + * This is the backing field for {@link #getCharPositionInLine} and + * {@link #setCharPositionInLine}. + */ protected int charPositionInLine = -1; // set to invalid position + /** + * This is the backing field for {@link #getChannel} and + * {@link #setChannel}. + */ protected int channel=DEFAULT_CHANNEL; + /** + * This is the backing field for {@link #getTokenSource} and + * {@link #getInputStream}. + * + *

      + * These properties share a field to reduce the memory footprint of + * {@link CommonToken}. Tokens created by a {@link CommonTokenFactory} from + * the same source and input stream share a reference to the same + * {@link Pair} containing these values.

      + */ protected Pair source; - /** We need to be able to change the text once in a while. If - * this is non-null, then getText should return this. Note that - * start/stop are not affected by changing this. - */ - // TODO: can store these in map in token stream rather than as field here + /** + * This is the backing field for {@link #getText} when the token text is + * explicitly set in the constructor or via {@link #setText}. + * + * @see #getText() + */ protected String text; - /** What token number is this from 0..n-1 tokens; < 0 implies invalid index */ + /** + * This is the backing field for {@link #getTokenIndex} and + * {@link #setTokenIndex}. + */ protected int index = -1; - /** The char position into the input buffer where this token starts */ + /** + * This is the backing field for {@link #getStartIndex} and + * {@link #setStartIndex}. + */ protected int start; - /** The char position into the input buffer where this token stops */ + /** + * This is the backing field for {@link #getStopIndex} and + * {@link #setStopIndex}. + */ protected int stop; + /** + * Constructs a new {@link CommonToken} with the specified token type. + * + * @param type The token type. + */ public CommonToken(int type) { this.type = type; } @@ -77,6 +120,13 @@ public class CommonToken implements WritableToken, Serializable { } } + /** + * Constructs a new {@link CommonToken} with the specified token type and + * text. + * + * @param type The token type. + * @param text The text of the token. + */ public CommonToken(int type, String text) { this.type = type; this.channel = DEFAULT_CHANNEL; @@ -129,10 +179,14 @@ public class CommonToken implements WritableToken, Serializable { } } - /** Override the text for this token. getText() will return this text - * rather than pulling from the buffer. Note that this does not mean - * that start/stop indexes are not valid. It means that that input - * was converted to a new string in the token object. + /** + * Explicitly set the text for this token. If {code text} is not + * {@code null}, then {@link #getText} will return this value rather than + * extracting the text from the input. + * + * @param text The explicit text of the token, or {@code null} if the text + * should be obtained from the input along with the start and stop indexes + * of the token. */ @Override public void setText(String text) { diff --git a/runtime/Java/src/org/antlr/v4/runtime/CommonTokenFactory.java b/runtime/Java/src/org/antlr/v4/runtime/CommonTokenFactory.java index b646a5de8..bb764d46b 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/CommonTokenFactory.java +++ b/runtime/Java/src/org/antlr/v4/runtime/CommonTokenFactory.java @@ -33,20 +33,57 @@ package org.antlr.v4.runtime; import org.antlr.v4.runtime.misc.Interval; import org.antlr.v4.runtime.misc.Pair; +/** + * This default implementation of {@link TokenFactory} creates + * {@link CommonToken} objects. + */ public class CommonTokenFactory implements TokenFactory { + /** + * The default {@link CommonTokenFactory} instance. + * + *

      + * This token factory does not explicitly copy token text when constructing + * tokens.

      + */ public static final TokenFactory DEFAULT = new CommonTokenFactory(); - /** Copy text for token out of input char stream. Useful when input - * stream is unbuffered. - * @see UnbufferedCharStream - */ + /** + * Indicates whether {@link CommonToken#setText} should be called after + * constructing tokens to explicitly set the text. This is useful for cases + * where the input stream might not be able to provide arbitrary substrings + * of text from the input after the lexer creates a token (e.g. the + * implementation of {@link CharStream#getText} in + * {@link UnbufferedCharStream} throws an + * {@link UnsupportedOperationException}). Explicitly setting the token text + * allows {@link Token#getText} to be called at any time regardless of the + * input stream implementation. + * + *

      + * The default value is {@code false} to avoid the performance and memory + * overhead of copying text for every token unless explicitly requested.

      + */ protected final boolean copyText; - /** Create factory and indicate whether or not the factory copy - * text out of the char stream. + /** + * Constructs a {@link CommonTokenFactory} with the specified value for + * {@link #copyText}. + * + *

      + * When {@code copyText} is {@code false}, the {@link #DEFAULT} instance + * should be used instead of constructing a new instance.

      + * + * @param copyText The value for {@link #copyText}. */ public CommonTokenFactory(boolean copyText) { this.copyText = copyText; } + /** + * Constructs a {@link CommonTokenFactory} with {@link #copyText} set to + * {@code false}. + * + *

      + * The {@link #DEFAULT} instance should be used instead of calling this + * directly.

      + */ public CommonTokenFactory() { this(false); } @Override diff --git a/runtime/Java/src/org/antlr/v4/runtime/ConsoleErrorListener.java b/runtime/Java/src/org/antlr/v4/runtime/ConsoleErrorListener.java index 0fd3323ce..b33dc2dfc 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/ConsoleErrorListener.java +++ b/runtime/Java/src/org/antlr/v4/runtime/ConsoleErrorListener.java @@ -34,8 +34,23 @@ package org.antlr.v4.runtime; * @author Sam Harwell */ public class ConsoleErrorListener extends BaseErrorListener { + /** + * Provides a default instance of {@link ConsoleErrorListener}. + */ public static final ConsoleErrorListener INSTANCE = new ConsoleErrorListener(); + /** + * {@inheritDoc} + * + *

      + * This implementation prints messages to {@link System#err} containing the + * values of {@code line}, {@code charPositionInLine}, and {@code msg} using + * the following format.

      + * + *
      +	 * line line:charPositionInLine msg
      +	 * 
      + */ @Override public void syntaxError(Recognizer recognizer, Object offendingSymbol, diff --git a/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java b/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java index 79438b485..0ae2d8358 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java +++ b/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java @@ -38,13 +38,15 @@ import org.antlr.v4.runtime.misc.NotNull; import org.antlr.v4.runtime.misc.Nullable; import org.antlr.v4.runtime.misc.Pair; -/** This is the default error handling mechanism for ANTLR parsers - * and tree parsers. +/** + * This is the default implementation of {@link ANTLRErrorStrategy} used for + * error reporting and recovery in ANTLR parsers. */ public class DefaultErrorStrategy implements ANTLRErrorStrategy { - /** This is true after we see an error and before having successfully - * matched a token. Prevents generation of more than one error message - * per error. + /** + * Indicates whether the error strategy is currently "recovering from an + * error". This is used to suppress reporting multiple error messages while + * attempting to recover from a detected syntax error. * * @see #inErrorRecoveryMode */ diff --git a/runtime/Java/src/org/antlr/v4/runtime/InterpreterRuleContext.java b/runtime/Java/src/org/antlr/v4/runtime/InterpreterRuleContext.java index 231f4797c..f4359452c 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/InterpreterRuleContext.java +++ b/runtime/Java/src/org/antlr/v4/runtime/InterpreterRuleContext.java @@ -31,13 +31,32 @@ package org.antlr.v4.runtime; import org.antlr.v4.runtime.misc.Nullable; -/** This object is used by the ParserInterpreter and is the same as a regular - * ParserRuleContext except that we need to track the rule index of the - * current context so that we can build parse trees. +/** + * This class extends {@link ParserRuleContext} by allowing the value of + * {@link #getRuleIndex} to be explicitly set for the context. + * + *

      + * {@link ParserRuleContext} does not include field storage for the rule index + * since the context classes created by the code generator override the + * {@link #getRuleIndex} method to return the correct value for that context. + * Since the parser interpreter does not use the context classes generated for a + * parser, this class (with slightly more memory overhead per node) is used to + * provide equivalent functionality.

      */ public class InterpreterRuleContext extends ParserRuleContext { + /** + * This is the backing field for {@link #getRuleIndex}. + */ private final int ruleIndex; + /** + * Constructs a new {@link InterpreterRuleContext} with the specified + * parent, invoking state, and rule index. + * + * @param parent The parent context. + * @param invokingStateNumber The invoking state number. + * @param ruleIndex The rule index for the current context. + */ public InterpreterRuleContext(@Nullable ParserRuleContext parent, int invokingStateNumber, int ruleIndex) diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionMode.java b/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionMode.java index 20a4673fb..15b25f4d0 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionMode.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionMode.java @@ -41,26 +41,69 @@ import java.util.HashMap; import java.util.Iterator; import java.util.Map; +/** + * This enumeration defines the prediction modes available in ANTLR 4 along with + * utility methods for analyzing configuration sets for conflicts and/or + * ambiguities. + */ public enum PredictionMode { - /** Do only local context prediction (SLL style) and using - * heuristic which almost always works but is much faster - * than precise answer. + /** + * The SLL(*) prediction mode. This prediction mode ignores the current + * parser context when making predictions. This is the fastest prediction + * mode, and provides correct results for many grammars. This prediction + * mode is more powerful than the prediction mode provided by ANTLR 3, but + * may result in syntax errors for grammar and input combinations which are + * not SLL. + * + *

      + * When using this prediction mode, the parser will either return a correct + * parse tree (i.e. the same parse tree that would be returned with the + * {@link #LL} prediction mode), or it will report a syntax error. If a + * syntax error is encountered when using the {@link #SLL} prediction mode, + * it may be due to either an actual syntax error in the input or indicate + * that the particular combination of grammar and input requires the more + * powerful {@link #LL} prediction abilities to complete successfully.

      + * + *

      + * This prediction mode does not provide any guarantees for prediction + * behavior for syntactically-incorrect inputs.

      */ SLL, - - /** Full LL(*) that always gets right answer. For speed - * reasons, we terminate the prediction process when we know for - * sure which alt to predict. We don't always know what - * the ambiguity is in this mode. + /** + * The LL(*) prediction mode. This prediction mode allows the current parser + * context to be used for resolving SLL conflicts that occur during + * prediction. This is the fastest prediction mode that guarantees correct + * parse results for all combinations of grammars with syntactically correct + * inputs. + * + *

      + * When using this prediction mode, the parser will make correct decisions + * for all syntactically-correct grammar and input combinations. However, in + * cases where the grammar is truly ambiguous this prediction mode might not + * report a precise answer for exactly which alternatives are + * ambiguous.

      + * + *

      + * This prediction mode does not provide any guarantees for prediction + * behavior for syntactically-incorrect inputs.

      */ LL, - - /** Tell the full LL prediction algorithm to pursue lookahead until - * it has uniquely predicted an alternative without conflict or it's - * certain that it's found an ambiguous input sequence. when this - * variable is false. When true, the prediction process will - * continue looking for the exact ambiguous sequence even if - * it has already figured out which alternative to predict. + /** + * The LL(*) prediction mode with exact ambiguity detection. In addition to + * the correctness guarantees provided by the {@link #LL} prediction mode, + * this prediction mode instructs the prediction algorithm to determine the + * complete and exact set of ambiguous alternatives for every ambiguous + * decision encountered while parsing. + * + *

      + * This prediction mode may be used for diagnosing ambiguities during + * grammar development. Due to the performance overhead of calculating sets + * of ambiguous alternatives, this prediction mode should be avoided when + * the exact results are not necessary.

      + * + *

      + * This prediction mode does not provide any guarantees for prediction + * behavior for syntactically-incorrect inputs.

      */ LL_EXACT_AMBIG_DETECTION; @@ -77,7 +120,10 @@ public enum PredictionMode { private AltAndContextConfigEqualityComparator() { } - /** Code is function of (s, _, ctx, _) */ + /** + * The hash code is only a function of the {@link ATNState#stateNumber} + * and {@link ATNConfig#context}. + */ @Override public int hashCode(ATNConfig o) { int hashCode = MurmurHash.initialize(7); @@ -99,7 +145,8 @@ public enum PredictionMode { /** * Computes the SLL prediction termination condition. * - *

      This method computes the SLL prediction termination condition for both of + *

      + * This method computes the SLL prediction termination condition for both of * the following cases.

      * *
        diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/SemanticContext.java b/runtime/Java/src/org/antlr/v4/runtime/atn/SemanticContext.java index 6362ee57a..4c3613773 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/SemanticContext.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/SemanticContext.java @@ -53,6 +53,10 @@ import java.util.Set; * {@link SemanticContext} within the scope of this outer class.

        */ public abstract class SemanticContext { + /** + * The default {@link SemanticContext}, which is semantically equivalent to + * a predicate of the form {@code {true}?}. + */ public static final SemanticContext NONE = new Predicate(); public SemanticContext parent; @@ -201,6 +205,10 @@ public abstract class SemanticContext { } } + /** + * A semantic context which is true whenever none of the contained contexts + * is false. + */ public static class AND extends SemanticContext { @NotNull public final SemanticContext[] opnds; @@ -234,6 +242,13 @@ public abstract class SemanticContext { return MurmurHash.hashCode(opnds, AND.class.hashCode()); } + /** + * {@inheritDoc} + * + *

        + * The evaluation of predicates by this context is short-circuiting, but + * unordered.

        + */ @Override public boolean eval(Recognizer parser, RuleContext outerContext) { for (SemanticContext opnd : opnds) { @@ -282,6 +297,10 @@ public abstract class SemanticContext { } } + /** + * A semantic context which is true whenever at least one of the contained + * contexts is true. + */ public static class OR extends SemanticContext { @NotNull public final SemanticContext[] opnds; @@ -315,6 +334,13 @@ public abstract class SemanticContext { return MurmurHash.hashCode(opnds, OR.class.hashCode()); } + /** + * {@inheritDoc} + * + *

        + * The evaluation of predicates by this context is short-circuiting, but + * unordered.

        + */ @Override public boolean eval(Recognizer parser, RuleContext outerContext) { for (SemanticContext opnd : opnds) {