From 6236072e3b1f783392738d949b489c9997400943 Mon Sep 17 00:00:00 2001 From: Sam Harwell Date: Sun, 30 Jun 2013 15:50:56 -0500 Subject: [PATCH] Documentation updates prior to release --- .../antlr/v4/runtime/ANTLRErrorListener.java | 35 ++- .../org/antlr/v4/runtime/ANTLRFileStream.java | 7 +- .../antlr/v4/runtime/ANTLRInputStream.java | 10 +- .../antlr/v4/runtime/BufferedTokenStream.java | 50 ++--- .../antlr/v4/runtime/atn/ATNConfigSet.java | 208 ++---------------- .../v4/runtime/atn/PredictionContext.java | 1 - .../src/org/antlr/v4/runtime/dfa/DFA.java | 10 - .../org/antlr/v4/runtime/dfa/DFAState.java | 2 +- tool/src/org/antlr/v4/codegen/JavaTarget.java | 12 + tool/src/org/antlr/v4/codegen/Target.java | 9 +- 10 files changed, 78 insertions(+), 266 deletions(-) diff --git a/runtime/Java/src/org/antlr/v4/runtime/ANTLRErrorListener.java b/runtime/Java/src/org/antlr/v4/runtime/ANTLRErrorListener.java index c38d9dc97..d78d44657 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/ANTLRErrorListener.java +++ b/runtime/Java/src/org/antlr/v4/runtime/ANTLRErrorListener.java @@ -39,19 +39,19 @@ import org.antlr.v4.runtime.misc.Nullable; import java.util.BitSet; -/** How to emit recognition errors */ +/** How to emit recognition errors. */ public interface ANTLRErrorListener { - /** Upon syntax error, notify any interested parties. This is not - * how to recover from errors or compute error messages. The - * parser ANTLRErrorStrategy specifies how to recover from syntax - * errors and how to compute error messages. This listener's job - * is simply to emit a computed message, though it has enough - * information to create its own message in many cases. - * - * The RecognitionException is non-null for all syntax errors - * except when we discover mismatched token errors that we can - * recover from in-line, without returning from the surrounding - * rule (via the single token insertion and deletion mechanism). + /** + * Upon syntax error, notify any interested parties. This is not how to + * recover from errors or compute error messages. {@link ANTLRErrorStrategy} + * specifies how to recover from syntax errors and how to compute error + * messages. This listener's job is simply to emit a computed message, + * though it has enough information to create its own message in many cases. + *

+ * The {@link RecognitionException} is non-null for all syntax errors except + * when we discover mismatched token errors that we can recover from + * in-line, without returning from the surrounding rule (via the single + * token insertion and deletion mechanism). * * @param recognizer * What parser got the error. From this @@ -59,16 +59,15 @@ public interface ANTLRErrorListener { * as the input stream. * @param offendingSymbol * The offending token in the input token - * stream, unless recognizer is a lexer (then it's null) If - * no viable alternative error, e has token at which we + * stream, unless recognizer is a lexer (then it's null). If + * no viable alternative error, {@code e} has token at which we * started production for the decision. * @param line - * At what line in input to the error occur? This always refers to - * stopTokenIndex + * The line number in the input where the error occurred. * @param charPositionInLine - * At what character position within that line did the error occur. + * The character position within that line where the error occurred. * @param msg - * The message to emit + * The message to emit. * @param e * The exception generated by the parser that led to * the reporting of an error. It is null in the case where diff --git a/runtime/Java/src/org/antlr/v4/runtime/ANTLRFileStream.java b/runtime/Java/src/org/antlr/v4/runtime/ANTLRFileStream.java index f66e48211..340a59e93 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/ANTLRFileStream.java +++ b/runtime/Java/src/org/antlr/v4/runtime/ANTLRFileStream.java @@ -35,10 +35,9 @@ import java.io.IOException; import java.io.InputStreamReader; import java.util.Arrays; -/** This is an ANTLRInputStream that is loaded from a file - * all at once when you construct the object. This is a special case - * since we know the exact size of the object to load. We can avoid lots - * of data copying. +/** + * This is an {@link ANTLRInputStream} that is loaded from a file all at once + * when you construct the object. */ public class ANTLRFileStream extends ANTLRInputStream { protected String fileName; diff --git a/runtime/Java/src/org/antlr/v4/runtime/ANTLRInputStream.java b/runtime/Java/src/org/antlr/v4/runtime/ANTLRInputStream.java index fc57efa93..98db38dce 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/ANTLRInputStream.java +++ b/runtime/Java/src/org/antlr/v4/runtime/ANTLRInputStream.java @@ -37,10 +37,12 @@ import java.io.InputStreamReader; import java.io.Reader; import java.util.Arrays; -/** Vacuum all input from a Reader/InputStream and then treat it like a char[] buffer. - * Can also pass in a string or char[] to use. - * - * If you need encoding, pass in stream/reader with correct encoding. +/** + * Vacuum all input from a {@link Reader}/{@link InputStream} and then treat it + * like a {@code char[]} buffer. Can also pass in a {@link String} or + * {@code char[]} to use. + *

+ * If you need encoding, pass in stream/reader with correct encoding. */ public class ANTLRInputStream implements CharStream { public static final int READ_BUFFER_SIZE = 1024; diff --git a/runtime/Java/src/org/antlr/v4/runtime/BufferedTokenStream.java b/runtime/Java/src/org/antlr/v4/runtime/BufferedTokenStream.java index 83420a864..f0848f914 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/BufferedTokenStream.java +++ b/runtime/Java/src/org/antlr/v4/runtime/BufferedTokenStream.java @@ -38,36 +38,38 @@ import java.util.HashSet; import java.util.List; import java.util.Set; -/** Buffer all input tokens but do on-demand fetching of new tokens from - * lexer. Useful when the parser or lexer has to set context/mode info before - * proper lexing of future tokens. The ST template parser needs this, - * for example, because it has to constantly flip back and forth between - * inside/output templates. E.g., }> has to parse names - * as part of an expression but "hi, " as a nested template. - * - * You can't use this stream if you pass whitespace or other off-channel - * tokens to the parser. The stream can't ignore off-channel tokens. - * (UnbufferedTokenStream is the same way.) Use CommonTokenStream. - * - * This is not a subclass of UnbufferedTokenStream because I don't want - * to confuse small moving window of tokens it uses for the full buffer. +/** + * Buffer all input tokens but do on-demand fetching of new tokens from lexer. + * Useful when the parser or lexer has to set context/mode info before proper + * lexing of future tokens. The ST template parser needs this, for example, + * because it has to constantly flip back and forth between inside/output + * templates. E.g., {@code }>} has to parse names as part of an + * expression but {@code "hi, "} as a nested template. + *

+ * You can't use this stream if you pass whitespace or other off-channel tokens + * to the parser. The stream can't ignore off-channel tokens. + * ({@link UnbufferedTokenStream} is the same way.) Use + * {@link CommonTokenStream}. */ public class BufferedTokenStream implements TokenStream { @NotNull protected TokenSource tokenSource; - /** Record every single token pulled from the source so we can reproduce - * chunks of it later. The buffer in LookaheadStream overlaps sometimes - * as its moving window moves through the input. This list captures - * everything so we can access complete input text. - */ + /** + * Record every single token pulled from the source so we can reproduce + * chunks of it later. This list captures everything so we can access + * complete input text. + */ protected List tokens = new ArrayList(100); - /** The index into the tokens list of the current token (next token - * to consume). tokens[p] should be LT(1). p=-1 indicates need - * to initialize with first token. The ctor doesn't get a token. - * First call to LT(1) or whatever gets the first token and sets p=0; - */ + /** + * The index into {@link #tokens} of the current token (next token to + * consume). {@link #tokens}{@code [}{@link #p}{@code ]} should be + * {@link #LT LT(1)}. {@link #p}{@code =-1} indicates need to initialize + * with first token. The constructor doesn't get a token. First call to + * {@link #LT LT(1)} or whatever gets the first token and sets + * {@link #p}{@code =0;}. + */ protected int p = -1; /** @@ -92,8 +94,6 @@ public class BufferedTokenStream implements TokenStream { @Override public int index() { return p; } -// public int range() { return range; } - @Override public int mark() { return 0; diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfigSet.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfigSet.java index ba638f671..423c1e967 100755 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfigSet.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfigSet.java @@ -44,201 +44,18 @@ import java.util.Iterator; import java.util.List; import java.util.Set; -/** Specialized OrderedHashSet that can track info about the set. - * Might be able to optimize later w/o affecting code that uses this set. - - histogram of lexer DFA configset size: - - 206 30 <- 206 sets with size 30 - 47 1 - 17 31 - 12 2 - 10 3 - 7 32 - 4 4 - 3 35 - 2 9 - 2 6 - 2 5 - 2 34 - 1 7 - 1 33 - 1 29 - 1 12 - 1 119 <- max size - - 322 set size for SLL parser java.* in DFA states: - - 888 1 - 411 54 - 365 88 - 304 56 - 206 80 - 182 16 - 167 86 - 166 78 - 158 84 - 131 2 - 121 20 - 120 8 - 119 112 - 82 10 - 73 6 - 53 174 - 47 90 - 45 4 - 39 12 - 38 122 - 37 89 - 37 62 - 34 3 - 34 18 - 32 81 - 31 87 - 28 45 - 27 144 - 25 41 - 24 132 - 22 91 - 22 7 - 21 82 - 21 28 - 21 27 - 17 9 - 16 29 - 16 155 - 15 51 - 15 118 - 14 146 - 14 114 - 13 5 - 13 38 - 12 48 - 11 64 - 11 50 - 11 22 - 11 134 - 11 131 - 10 79 - 10 76 - 10 59 - 10 58 - 10 55 - 10 39 - 10 116 - 9 74 - 9 47 - 9 310 - ... - - javalr, java.* configs with # preds histogram: - - 4569 0 - 57 1 - 27 27 - 5 76 - 4 28 - 3 72 - 3 38 - 3 30 - 2 6 - 2 32 - 1 9 - 1 2 - - javalr, java.* all atnconfigsets; max size = 322, num sets = 269088 - - 114186 1 <-- optimize - 35712 6 - 28081 78 - 15252 54 - 14171 56 - 13159 12 - 11810 88 - 6873 86 - 6158 80 - 5169 4 - 3773 118 - 2350 16 - 1002 112 - 915 28 - 898 44 - 734 2 - 632 62 - 575 8 - 566 59 - 474 20 - 388 84 - 343 48 - 333 55 - 328 47 - 311 41 - 306 38 - 277 81 - 263 79 - 255 66 - 245 90 - 245 87 - 234 50 - 224 10 - 220 60 - 194 64 - 186 32 - 184 82 - 150 18 - 125 7 - 121 132 - 116 30 - 103 51 - 95 114 - 84 36 - 82 40 - 78 22 - 77 89 - 55 9 - 53 174 - 48 152 - 44 67 - 44 5 - 42 115 - 41 58 - 38 122 - 37 134 - 34 13 - 34 116 - 29 45 - 29 3 - 29 24 - 27 144 - 26 146 - 25 91 - 24 113 - 20 27 - ... - - number with 1-9 elements: - - 114186 1 - 35712 6 - 5169 4 - 734 2 - 575 8 - 125 7 - 55 9 - 44 5 - 29 3 - - Can cover 60% of sizes with size up to 6 - Can cover 44% of sizes with size up to 4 - Can cover 42% of sizes with size up to 1 +/** + * Specialized {@link Set}{@code <}{@link ATNConfig}{@code >} that can track + * info about the set, with support for combining similar configurations using a + * graph-structured stack. */ public class ATNConfigSet implements Set { - /* - The reason that we need this is because we don't want the hash map to use - the standard hash code and equals. We need all configurations with the same - (s,i,_,semctx) to be equal. Unfortunately, this key effectively doubles - the number of objects associated with ATNConfigs. The other solution is to - use a hash table that lets us specify the equals/hashcode operation. + /** + * The reason that we need this is because we don't want the hash map to use + * the standard hash code and equals. We need all configurations with the same + * {@code (s,i,_,semctx)} to be equal. Unfortunately, this key effectively doubles + * the number of objects associated with ATNConfigs. The other solution is to + * use a hash table that lets us specify the equals/hashcode operation. */ public static class ConfigHashSet extends AbstractConfigHashSet { public ConfigHashSet() { @@ -279,8 +96,9 @@ public class ATNConfigSet implements Set { */ protected boolean readonly = false; - /** All configs but hashed by (s, i, _, pi) not incl context. Wiped out - * when we go readonly as this set becomes a DFA state. + /** + * All configs but hashed by (s, i, _, pi) not including context. Wiped out + * when we go readonly as this set becomes a DFA state. */ public AbstractConfigHashSet configLookup; diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionContext.java b/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionContext.java index 1ddcf5d2d..83ad70a35 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionContext.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionContext.java @@ -234,7 +234,6 @@ public abstract class PredictionContext { * @param rootIsWildcard {@code true} if this is a local-context merge, * otherwise false to indicate a full-context merge * @param mergeCache - * @return */ public static PredictionContext mergeSingletons( SingletonPredictionContext a, diff --git a/runtime/Java/src/org/antlr/v4/runtime/dfa/DFA.java b/runtime/Java/src/org/antlr/v4/runtime/dfa/DFA.java index 694783a32..091a07161 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/dfa/DFA.java +++ b/runtime/Java/src/org/antlr/v4/runtime/dfa/DFA.java @@ -29,11 +29,7 @@ */ package org.antlr.v4.runtime.dfa; -import org.antlr.v4.runtime.TokenStream; -import org.antlr.v4.runtime.atn.ATNState; import org.antlr.v4.runtime.atn.DecisionState; -import org.antlr.v4.runtime.atn.ParserATNSimulator; -import org.antlr.v4.runtime.atn.Transition; import org.antlr.v4.runtime.misc.NotNull; import org.antlr.v4.runtime.misc.Nullable; @@ -41,7 +37,6 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -61,11 +56,6 @@ public class DFA { @NotNull public final DecisionState atnStartState; - /** Set of configs for a DFA state with at least one conflict? Mainly used as "return value" - * from {@link ParserATNSimulator#predictATN} for retry. - */ -// public OrderedHashSet conflictSet; - public DFA(@NotNull DecisionState atnStartState) { this(atnStartState, 0); } diff --git a/runtime/Java/src/org/antlr/v4/runtime/dfa/DFAState.java b/runtime/Java/src/org/antlr/v4/runtime/dfa/DFAState.java index 4cf7579d1..49005a6a6 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/dfa/DFAState.java +++ b/runtime/Java/src/org/antlr/v4/runtime/dfa/DFAState.java @@ -94,7 +94,7 @@ public class DFAState { /** * Indicates that this state was created during SLL prediction that * discovered a conflict between the configurations in the state. Future - * {@link ParserATNSimulator#execDFA} invocations immediately jumped doing + * {@link ParserATNSimulator#execATN} invocations immediately jumped doing * full context prediction if this field is true. */ public boolean requiresFullContext; diff --git a/tool/src/org/antlr/v4/codegen/JavaTarget.java b/tool/src/org/antlr/v4/codegen/JavaTarget.java index 25203819f..5d112c8d9 100644 --- a/tool/src/org/antlr/v4/codegen/JavaTarget.java +++ b/tool/src/org/antlr/v4/codegen/JavaTarget.java @@ -77,6 +77,18 @@ public class JavaTarget extends Target { badWords.add("parserRule"); } + /** + * {@inheritDoc} + *

+ * For Java, this is the translation {@code 'a\n"'} → {@code "a\n\""}. + * Expect single quotes around the incoming literal. Just flip the quotes + * and replace double quotes with {@code \"}. + *

+ * Note that we have decided to allow people to use '\"' without penalty, so + * we must build the target string in a loop as {@link String#replace} + * cannot handle both {@code \"} and {@code "} without a lot of messing + * around. + */ @Override public String getTargetStringLiteralFromANTLRStringLiteral( CodeGenerator generator, diff --git a/tool/src/org/antlr/v4/codegen/Target.java b/tool/src/org/antlr/v4/codegen/Target.java index ed862f2e8..b65aef07f 100644 --- a/tool/src/org/antlr/v4/codegen/Target.java +++ b/tool/src/org/antlr/v4/codegen/Target.java @@ -191,14 +191,7 @@ public abstract class Target { /** * Convert from an ANTLR string literal found in a grammar file to an - * equivalent string literal in the target language. For Java, this is the - * translation {@code 'a\n"'} → {@code "a\n\""}. Expect single quotes - * around the incoming literal. Just flip the quotes and replace double - * quotes with {@code \"}. - *

- * Note that we have decided to allow people to use '\"' without penalty, so - * we must build the target string in a loop as {@link Utils#replace} cannot - * handle both {@code \"} and {@code "} without a lot of messing around. + * equivalent string literal in the target language. */ public abstract String getTargetStringLiteralFromANTLRStringLiteral( CodeGenerator generator,