Updated documentation

2014-01-22 21:30:26 -06:00 · 2014-01-22 21:30:26 -06:00 · d5b269b6b6
parent 0f25d1c4cf
commit d5b269b6b6
10 changed files with 302 additions and 66 deletions
--- a/runtime/Java/src/org/antlr/v4/runtime/BailErrorStrategy.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/BailErrorStrategy.java
@ -32,9 +32,33 @@ package org.antlr.v4.runtime;

 import org.antlr.v4.runtime.misc.ParseCancellationException;

-/** Bail out of parser at first syntax error. Do this to use it:
+/**
+ * This implementation of {@link ANTLRErrorStrategy} responds to syntax errors
+ * by immediately canceling the parse operation with a
+ * {@link ParseCancellationException}. The implementation ensures that the
+ * {@link ParserRuleContext#exception} field is set for all parse tree nodes
+ * that were not completed prior to encountering the error.
 *
- *  <p>{@code myparser.setErrorHandler(new BailErrorStrategy());}</p>
+ * <p>
+ * This error strategy is useful in the following scenarios.</p>
+ *
+ * <ul>
+ * <li><strong>Two-stage parsing:</strong> This error strategy allows the first
+ * stage of two-stage parsing to immediately terminate if an error is
+ * encountered, and immediately fall back to the second stage. In addition to
+ * avoiding wasted work by attempting to recover from errors here, the empty
+ * implementation of {@link BailErrorStrategy#sync} improves the performance of
+ * the first stage.</li>
+ * <li><strong>Silent validation:</strong> When syntax errors are not being
+ * reported or logged, and the parse result is simply ignored if errors occur,
+ * the {@link BailErrorStrategy} avoids wasting work on recovering from errors
+ * when the result will be ignored either way.</li>
+ * </ul>
+ *
+ * <p>
+ * {@code myparser.setErrorHandler(new BailErrorStrategy());}</p>
+ *
+ * @see Parser#setErrorHandler(ANTLRErrorStrategy)
 */
 public class BailErrorStrategy extends DefaultErrorStrategy {
    /** Instead of recovering from exception {@code e}, re-throw it wrapped
--- a/runtime/Java/src/org/antlr/v4/runtime/BaseErrorListener.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/BaseErrorListener.java
@ -37,6 +37,10 @@ import org.antlr.v4.runtime.misc.Nullable;
 import java.util.BitSet;

 /**
+ * Provides an empty default implementation of {@link ANTLRErrorListener}. The
+ * default implementation of each method does nothing, but can be overridden as
+ * necessary.
+ *
 * @author Sam Harwell
 */
 public class BaseErrorListener implements ANTLRErrorListener {
--- a/runtime/Java/src/org/antlr/v4/runtime/BufferedTokenStream.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/BufferedTokenStream.java
@ -39,45 +39,53 @@ import java.util.List;
 import java.util.Set;

 /**
- * Buffer all input tokens but do on-demand fetching of new tokens from lexer.
- * Useful when the parser or lexer has to set context/mode info before proper
- * lexing of future tokens. The ST template parser needs this, for example,
- * because it has to constantly flip back and forth between inside/output
- * templates. E.g., {@code <names:{hi, <it>}>} has to parse names as part of an
- * expression but {@code "hi, <it>"} as a nested template.
+ * This implementation of {@link TokenStream} loads tokens from a
+ * {@link TokenSource} on-demand, and places the tokens in a buffer to provide
+ * access to any previous token by index.
 *
- * <p>You can't use this stream if you pass whitespace or other off-channel tokens
- * to the parser. The stream can't ignore off-channel tokens.
- * ({@link UnbufferedTokenStream} is the same way.) Use
+ * <p>
+ * This token stream ignores the value of {@link Token#getChannel}. If your
+ * parser requires the token stream filter tokens to only those on a particular
+ * channel, such as {@link Token#DEFAULT_CHANNEL} or
+ * {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a
 * {@link CommonTokenStream}.</p>
 */
 public class BufferedTokenStream implements TokenStream {
+	/**
+	 * The {@link TokenSource} from which tokens for this stream are fetched.
+	 */
 	@NotNull
    protected TokenSource tokenSource;

 	/**
-	 * Record every single token pulled from the source so we can reproduce
-	 * chunks of it later. This list captures everything so we can access
-	 * complete input text.
+	 * A collection of all tokens fetched from the token source. The list is
+	 * considered a complete view of the input once {@link #fetchedEOF} is set
+	 * to {@code true}.
 	 */
    protected List<Token> tokens = new ArrayList<Token>(100);

 	/**
 	 * The index into {@link #tokens} of the current token (next token to
-	 * consume). {@link #tokens}{@code [}{@link #p}{@code ]} should be
-	 * {@link #LT LT(1)}. {@link #p}{@code =-1} indicates need to initialize
-	 * with first token. The constructor doesn't get a token. First call to
-	 * {@link #LT LT(1)} or whatever gets the first token and sets
-	 * {@link #p}{@code =0;}.
+	 * {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be
+	 * {@link #LT LT(1)}.
+	 *
+	 * <p>This field is set to -1 when the stream is first constructed or when
+	 * {@link #setTokenSource} is called, indicating that the first token has
+	 * not yet been fetched from the token source. For additional information,
+	 * see the documentation of {@link IntStream} for a description of
+	 * Initializing Methods.</p>
 	 */
    protected int p = -1;

 	/**
-	 * Set to {@code true} when the EOF token is fetched. Do not continue fetching
-	 * tokens after that point, or multiple EOF tokens could end up in the
-	 * {@link #tokens} array.
+	 * Indicates whether the {@link Token#EOF} token has been fetched from
+	 * {@link #tokenSource} and added to {@link #tokens}. This field improves
+	 * performance for the following cases:
 	 *
-	 * @see #fetch
+	 * <ul>
+	 * <li>{@link #fetch}: The check to prevent adding multiple EOF symbols into
+	 * {@link #tokens} is trivial with this field.</li>
+	 * <ul>
 	 */
 	protected boolean fetchedEOF;

--- a/runtime/Java/src/org/antlr/v4/runtime/CommonToken.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/CommonToken.java
@ -36,31 +36,74 @@ import org.antlr.v4.runtime.misc.Pair;
 import java.io.Serializable;

 public class CommonToken implements WritableToken, Serializable {
+	/**
+	 * An empty {@link Pair} which is used as the default value of
+	 * {@link #source} for tokens that do not have a source.
+	 */
 	protected static final Pair<TokenSource, CharStream> EMPTY_SOURCE =
 		new Pair<TokenSource, CharStream>(null, null);

+	/**
+	 * This is the backing field for {@link #getType} and {@link #setType}.
+	 */
 	protected int type;
+	/**
+	 * This is the backing field for {@link #getLine} and {@link #setLine}.
+	 */
 	protected int line;
+	/**
+	 * This is the backing field for {@link #getCharPositionInLine} and
+	 * {@link #setCharPositionInLine}.
+	 */
 	protected int charPositionInLine = -1; // set to invalid position
+	/**
+	 * This is the backing field for {@link #getChannel} and
+	 * {@link #setChannel}.
+	 */
 	protected int channel=DEFAULT_CHANNEL;
+	/**
+	 * This is the backing field for {@link #getTokenSource} and
+	 * {@link #getInputStream}.
+	 *
+	 * <p>
+	 * These properties share a field to reduce the memory footprint of
+	 * {@link CommonToken}. Tokens created by a {@link CommonTokenFactory} from
+	 * the same source and input stream share a reference to the same
+	 * {@link Pair} containing these values.</p>
+	 */
 	protected Pair<TokenSource, CharStream> source;

-	/** We need to be able to change the text once in a while.  If
-	 *  this is non-null, then getText should return this.  Note that
-	 *  start/stop are not affected by changing this.
-	  */
-	// TODO: can store these in map in token stream rather than as field here
+	/**
+	 * This is the backing field for {@link #getText} when the token text is
+	 * explicitly set in the constructor or via {@link #setText}.
+	 *
+	 * @see #getText()
+	 */
 	protected String text;

-	/** What token number is this from 0..n-1 tokens; &lt; 0 implies invalid index */
+	/**
+	 * This is the backing field for {@link #getTokenIndex} and
+	 * {@link #setTokenIndex}.
+	 */
 	protected int index = -1;

-	/** The char position into the input buffer where this token starts */
+	/**
+	 * This is the backing field for {@link #getStartIndex} and
+	 * {@link #setStartIndex}.
+	 */
 	protected int start;

-	/** The char position into the input buffer where this token stops */
+	/**
+	 * This is the backing field for {@link #getStopIndex} and
+	 * {@link #setStopIndex}.
+	 */
 	protected int stop;

+	/**
+	 * Constructs a new {@link CommonToken} with the specified token type.
+	 *
+	 * @param type The token type.
+	 */
 	public CommonToken(int type) {
 		this.type = type;
 	}
@ -77,6 +120,13 @@ public class CommonToken implements WritableToken, Serializable {
 		}
 	}

+	/**
+	 * Constructs a new {@link CommonToken} with the specified token type and
+	 * text.
+	 *
+	 * @param type The token type.
+	 * @param text The text of the token.
+	 */
 	public CommonToken(int type, String text) {
 		this.type = type;
 		this.channel = DEFAULT_CHANNEL;
@ -129,10 +179,14 @@ public class CommonToken implements WritableToken, Serializable {
 		}
 	}

-	/** Override the text for this token.  getText() will return this text
-	 *  rather than pulling from the buffer.  Note that this does not mean
-	 *  that start/stop indexes are not valid.  It means that that input
-	 *  was converted to a new string in the token object.
+	/**
+	 * Explicitly set the text for this token. If {code text} is not
+	 * {@code null}, then {@link #getText} will return this value rather than
+	 * extracting the text from the input.
+	 *
+	 * @param text The explicit text of the token, or {@code null} if the text
+	 * should be obtained from the input along with the start and stop indexes
+	 * of the token.
 	 */
 	@Override
 	public void setText(String text) {
--- a/runtime/Java/src/org/antlr/v4/runtime/CommonTokenFactory.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/CommonTokenFactory.java
@ -33,20 +33,57 @@ package org.antlr.v4.runtime;
 import org.antlr.v4.runtime.misc.Interval;
 import org.antlr.v4.runtime.misc.Pair;

+/**
+ * This default implementation of {@link TokenFactory} creates
+ * {@link CommonToken} objects.
+ */
 public class CommonTokenFactory implements TokenFactory<CommonToken> {
+	/**
+	 * The default {@link CommonTokenFactory} instance.
+	 *
+	 * <p>
+	 * This token factory does not explicitly copy token text when constructing
+	 * tokens.</p>
+	 */
 	public static final TokenFactory<CommonToken> DEFAULT = new CommonTokenFactory();

-	/** Copy text for token out of input char stream. Useful when input
-	 *  stream is unbuffered.
-	 *  @see UnbufferedCharStream
- 	 */
+	/**
+	 * Indicates whether {@link CommonToken#setText} should be called after
+	 * constructing tokens to explicitly set the text. This is useful for cases
+	 * where the input stream might not be able to provide arbitrary substrings
+	 * of text from the input after the lexer creates a token (e.g. the
+	 * implementation of {@link CharStream#getText} in
+	 * {@link UnbufferedCharStream} throws an
+	 * {@link UnsupportedOperationException}). Explicitly setting the token text
+	 * allows {@link Token#getText} to be called at any time regardless of the
+	 * input stream implementation.
+	 *
+	 * <p>
+	 * The default value is {@code false} to avoid the performance and memory
+	 * overhead of copying text for every token unless explicitly requested.</p>
+	 */
 	protected final boolean copyText;

-	/** Create factory and indicate whether or not the factory copy
-	 *  text out of the char stream.
+	/**
+	 * Constructs a {@link CommonTokenFactory} with the specified value for
+	 * {@link #copyText}.
+	 *
+	 * <p>
+	 * When {@code copyText} is {@code false}, the {@link #DEFAULT} instance
+	 * should be used instead of constructing a new instance.</p>
+	 *
+	 * @param copyText The value for {@link #copyText}.
 	 */
 	public CommonTokenFactory(boolean copyText) { this.copyText = copyText; }

+	/**
+	 * Constructs a {@link CommonTokenFactory} with {@link #copyText} set to
+	 * {@code false}.
+	 *
+	 * <p>
+	 * The {@link #DEFAULT} instance should be used instead of calling this
+	 * directly.</p>
+	 */
 	public CommonTokenFactory() { this(false); }

 	@Override
--- a/runtime/Java/src/org/antlr/v4/runtime/ConsoleErrorListener.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/ConsoleErrorListener.java
@ -34,8 +34,23 @@ package org.antlr.v4.runtime;
 * @author Sam Harwell
 */
 public class ConsoleErrorListener extends BaseErrorListener {
+	/**
+	 * Provides a default instance of {@link ConsoleErrorListener}.
+	 */
 	public static final ConsoleErrorListener INSTANCE = new ConsoleErrorListener();

+	/**
+	 * {@inheritDoc}
+	 *
+	 * <p>
+	 * This implementation prints messages to {@link System#err} containing the
+	 * values of {@code line}, {@code charPositionInLine}, and {@code msg} using
+	 * the following format.</p>
+	 *
+	 * <pre>
+	 * line <em>line</em>:<em>charPositionInLine</em> <em>msg</em>
+	 * </pre>
+	 */
 	@Override
 	public void syntaxError(Recognizer<?, ?> recognizer,
 							Object offendingSymbol,
--- a/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/DefaultErrorStrategy.java
@ -38,13 +38,15 @@ import org.antlr.v4.runtime.misc.NotNull;
 import org.antlr.v4.runtime.misc.Nullable;
 import org.antlr.v4.runtime.misc.Pair;

-/** This is the default error handling mechanism for ANTLR parsers
- *  and tree parsers.
+/**
+ * This is the default implementation of {@link ANTLRErrorStrategy} used for
+ * error reporting and recovery in ANTLR parsers.
 */
 public class DefaultErrorStrategy implements ANTLRErrorStrategy {
-	/** This is true after we see an error and before having successfully
-	 *  matched a token. Prevents generation of more than one error message
-	 *  per error.
+	/**
+	 * Indicates whether the error strategy is currently "recovering from an
+	 * error". This is used to suppress reporting multiple error messages while
+	 * attempting to recover from a detected syntax error.
 	 *
 	 * @see #inErrorRecoveryMode
 	 */
--- a/runtime/Java/src/org/antlr/v4/runtime/InterpreterRuleContext.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/InterpreterRuleContext.java
@ -31,13 +31,32 @@ package org.antlr.v4.runtime;

 import org.antlr.v4.runtime.misc.Nullable;

-/** This object is used by the ParserInterpreter and is the same as a regular
- *  ParserRuleContext except that we need to track the rule index of the
- *  current context so that we can build parse trees.
+/**
+ * This class extends {@link ParserRuleContext} by allowing the value of
+ * {@link #getRuleIndex} to be explicitly set for the context.
+ *
+ * <p>
+ * {@link ParserRuleContext} does not include field storage for the rule index
+ * since the context classes created by the code generator override the
+ * {@link #getRuleIndex} method to return the correct value for that context.
+ * Since the parser interpreter does not use the context classes generated for a
+ * parser, this class (with slightly more memory overhead per node) is used to
+ * provide equivalent functionality.</p>
 */
 public class InterpreterRuleContext extends ParserRuleContext {
+	/**
+	 * This is the backing field for {@link #getRuleIndex}.
+	 */
 	private final int ruleIndex;

+	/**
+	 * Constructs a new {@link InterpreterRuleContext} with the specified
+	 * parent, invoking state, and rule index.
+	 *
+	 * @param parent The parent context.
+	 * @param invokingStateNumber The invoking state number.
+	 * @param ruleIndex The rule index for the current context.
+	 */
 	public InterpreterRuleContext(@Nullable ParserRuleContext parent,
 								  int invokingStateNumber,
 								  int ruleIndex)
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionMode.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionMode.java
@ -41,26 +41,69 @@ import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;

+/**
+ * This enumeration defines the prediction modes available in ANTLR 4 along with
+ * utility methods for analyzing configuration sets for conflicts and/or
+ * ambiguities.
+ */
 public enum PredictionMode {
-	/** Do only local context prediction (SLL style) and using
-	 *  heuristic which almost always works but is much faster
-	 *  than precise answer.
+	/**
+	 * The SLL(*) prediction mode. This prediction mode ignores the current
+	 * parser context when making predictions. This is the fastest prediction
+	 * mode, and provides correct results for many grammars. This prediction
+	 * mode is more powerful than the prediction mode provided by ANTLR 3, but
+	 * may result in syntax errors for grammar and input combinations which are
+	 * not SLL.
+	 *
+	 * <p>
+	 * When using this prediction mode, the parser will either return a correct
+	 * parse tree (i.e. the same parse tree that would be returned with the
+	 * {@link #LL} prediction mode), or it will report a syntax error. If a
+	 * syntax error is encountered when using the {@link #SLL} prediction mode,
+	 * it may be due to either an actual syntax error in the input or indicate
+	 * that the particular combination of grammar and input requires the more
+	 * powerful {@link #LL} prediction abilities to complete successfully.</p>
+	 *
+	 * <p>
+	 * This prediction mode does not provide any guarantees for prediction
+	 * behavior for syntactically-incorrect inputs.</p>
 	 */
 	SLL,
-
-	/** Full LL(*) that always gets right answer. For speed
-	 *  reasons, we terminate the prediction process when we know for
-	 *  sure which alt to predict. We don't always know what
-	 *  the ambiguity is in this mode.
+	/**
+	 * The LL(*) prediction mode. This prediction mode allows the current parser
+	 * context to be used for resolving SLL conflicts that occur during
+	 * prediction. This is the fastest prediction mode that guarantees correct
+	 * parse results for all combinations of grammars with syntactically correct
+	 * inputs.
+	 *
+	 * <p>
+	 * When using this prediction mode, the parser will make correct decisions
+	 * for all syntactically-correct grammar and input combinations. However, in
+	 * cases where the grammar is truly ambiguous this prediction mode might not
+	 * report a precise answer for <em>exactly which</em> alternatives are
+	 * ambiguous.</p>
+	 *
+	 * <p>
+	 * This prediction mode does not provide any guarantees for prediction
+	 * behavior for syntactically-incorrect inputs.</p>
 	 */
 	LL,
-
-	/** Tell the full LL prediction algorithm to pursue lookahead until
-	 *  it has uniquely predicted an alternative without conflict or it's
-	 *  certain that it's found an ambiguous input sequence.  when this
-	 *  variable is false. When true, the prediction process will
-	 *  continue looking for the exact ambiguous sequence even if
-	 *  it has already figured out which alternative to predict.
+	/**
+	 * The LL(*) prediction mode with exact ambiguity detection. In addition to
+	 * the correctness guarantees provided by the {@link #LL} prediction mode,
+	 * this prediction mode instructs the prediction algorithm to determine the
+	 * complete and exact set of ambiguous alternatives for every ambiguous
+	 * decision encountered while parsing.
+	 *
+	 * <p>
+	 * This prediction mode may be used for diagnosing ambiguities during
+	 * grammar development. Due to the performance overhead of calculating sets
+	 * of ambiguous alternatives, this prediction mode should be avoided when
+	 * the exact results are not necessary.</p>
+	 *
+	 * <p>
+	 * This prediction mode does not provide any guarantees for prediction
+	 * behavior for syntactically-incorrect inputs.</p>
 	 */
 	LL_EXACT_AMBIG_DETECTION;

@ -77,7 +120,10 @@ public enum PredictionMode {
 		private AltAndContextConfigEqualityComparator() {
 		}

-		/** Code is function of (s, _, ctx, _) */
+		/**
+		 * The hash code is only a function of the {@link ATNState#stateNumber}
+		 * and {@link ATNConfig#context}.
+		 */
 		@Override
 		public int hashCode(ATNConfig o) {
 			int hashCode = MurmurHash.initialize(7);
@ -99,7 +145,8 @@ public enum PredictionMode {
 	/**
 	 * Computes the SLL prediction termination condition.
 	 *
-	 * <p>This method computes the SLL prediction termination condition for both of
+	 * <p>
+	 * This method computes the SLL prediction termination condition for both of
 	 * the following cases.</p>
 	 *
 	 * <ul>
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/SemanticContext.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/SemanticContext.java
@ -53,6 +53,10 @@ import java.util.Set;
 *  {@link SemanticContext} within the scope of this outer class.</p>
 */
 public abstract class SemanticContext {
+	/**
+	 * The default {@link SemanticContext}, which is semantically equivalent to
+	 * a predicate of the form {@code {true}?}.
+	 */
    public static final SemanticContext NONE = new Predicate();

 	public SemanticContext parent;
@ -201,6 +205,10 @@ public abstract class SemanticContext {
 		}
 	}

+	/**
+	 * A semantic context which is true whenever none of the contained contexts
+	 * is false.
+	 */
    public static class AND extends SemanticContext {
 		@NotNull public final SemanticContext[] opnds;

@ -234,6 +242,13 @@ public abstract class SemanticContext {
 			return MurmurHash.hashCode(opnds, AND.class.hashCode());
 		}

+		/**
+		 * {@inheritDoc}
+		 *
+		 * <p>
+		 * The evaluation of predicates by this context is short-circuiting, but
+		 * unordered.</p>
+		 */
 		@Override
 		public boolean eval(Recognizer<?,?> parser, RuleContext outerContext) {
 			for (SemanticContext opnd : opnds) {
@ -282,6 +297,10 @@ public abstract class SemanticContext {
        }
    }

+	/**
+	 * A semantic context which is true whenever at least one of the contained
+	 * contexts is true.
+	 */
    public static class OR extends SemanticContext {
 		@NotNull public final SemanticContext[] opnds;

@ -315,6 +334,13 @@ public abstract class SemanticContext {
 			return MurmurHash.hashCode(opnds, OR.class.hashCode());
 		}

+		/**
+		 * {@inheritDoc}
+		 *
+		 * <p>
+		 * The evaluation of predicates by this context is short-circuiting, but
+		 * unordered.</p>
+		 */
 		@Override
        public boolean eval(Recognizer<?,?> parser, RuleContext outerContext) {
 			for (SemanticContext opnd : opnds) {