Documentation updates prior to release

This commit is contained in:
Sam Harwell 2013-06-30 15:50:56 -05:00
parent 89a251e497
commit 6236072e3b
10 changed files with 78 additions and 266 deletions

View File

@ -39,19 +39,19 @@ import org.antlr.v4.runtime.misc.Nullable;
import java.util.BitSet; import java.util.BitSet;
/** How to emit recognition errors */ /** How to emit recognition errors. */
public interface ANTLRErrorListener { public interface ANTLRErrorListener {
/** Upon syntax error, notify any interested parties. This is not /**
* how to recover from errors or compute error messages. The * Upon syntax error, notify any interested parties. This is not how to
* parser ANTLRErrorStrategy specifies how to recover from syntax * recover from errors or compute error messages. {@link ANTLRErrorStrategy}
* errors and how to compute error messages. This listener's job * specifies how to recover from syntax errors and how to compute error
* is simply to emit a computed message, though it has enough * messages. This listener's job is simply to emit a computed message,
* information to create its own message in many cases. * though it has enough information to create its own message in many cases.
* * <p/>
* The RecognitionException is non-null for all syntax errors * The {@link RecognitionException} is non-null for all syntax errors except
* except when we discover mismatched token errors that we can * when we discover mismatched token errors that we can recover from
* recover from in-line, without returning from the surrounding * in-line, without returning from the surrounding rule (via the single
* rule (via the single token insertion and deletion mechanism). * token insertion and deletion mechanism).
* *
* @param recognizer * @param recognizer
* What parser got the error. From this * What parser got the error. From this
@ -59,16 +59,15 @@ public interface ANTLRErrorListener {
* as the input stream. * as the input stream.
* @param offendingSymbol * @param offendingSymbol
* The offending token in the input token * The offending token in the input token
* stream, unless recognizer is a lexer (then it's null) If * stream, unless recognizer is a lexer (then it's null). If
* no viable alternative error, e has token at which we * no viable alternative error, {@code e} has token at which we
* started production for the decision. * started production for the decision.
* @param line * @param line
* At what line in input to the error occur? This always refers to * The line number in the input where the error occurred.
* stopTokenIndex
* @param charPositionInLine * @param charPositionInLine
* At what character position within that line did the error occur. * The character position within that line where the error occurred.
* @param msg * @param msg
* The message to emit * The message to emit.
* @param e * @param e
* The exception generated by the parser that led to * The exception generated by the parser that led to
* the reporting of an error. It is null in the case where * the reporting of an error. It is null in the case where

View File

@ -35,10 +35,9 @@ import java.io.IOException;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.util.Arrays; import java.util.Arrays;
/** This is an ANTLRInputStream that is loaded from a file /**
* all at once when you construct the object. This is a special case * This is an {@link ANTLRInputStream} that is loaded from a file all at once
* since we know the exact size of the object to load. We can avoid lots * when you construct the object.
* of data copying.
*/ */
public class ANTLRFileStream extends ANTLRInputStream { public class ANTLRFileStream extends ANTLRInputStream {
protected String fileName; protected String fileName;

View File

@ -37,10 +37,12 @@ import java.io.InputStreamReader;
import java.io.Reader; import java.io.Reader;
import java.util.Arrays; import java.util.Arrays;
/** Vacuum all input from a Reader/InputStream and then treat it like a char[] buffer. /**
* Can also pass in a string or char[] to use. * Vacuum all input from a {@link Reader}/{@link InputStream} and then treat it
* * like a {@code char[]} buffer. Can also pass in a {@link String} or
* If you need encoding, pass in stream/reader with correct encoding. * {@code char[]} to use.
* <p/>
* If you need encoding, pass in stream/reader with correct encoding.
*/ */
public class ANTLRInputStream implements CharStream { public class ANTLRInputStream implements CharStream {
public static final int READ_BUFFER_SIZE = 1024; public static final int READ_BUFFER_SIZE = 1024;

View File

@ -38,36 +38,38 @@ import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
/** Buffer all input tokens but do on-demand fetching of new tokens from /**
* lexer. Useful when the parser or lexer has to set context/mode info before * Buffer all input tokens but do on-demand fetching of new tokens from lexer.
* proper lexing of future tokens. The ST template parser needs this, * Useful when the parser or lexer has to set context/mode info before proper
* for example, because it has to constantly flip back and forth between * lexing of future tokens. The ST template parser needs this, for example,
* inside/output templates. E.g., <names:{hi, <it>}> has to parse names * because it has to constantly flip back and forth between inside/output
* as part of an expression but "hi, <it>" as a nested template. * templates. E.g., {@code <names:{hi, <it>}>} has to parse names as part of an
* * expression but {@code "hi, <it>"} as a nested template.
* You can't use this stream if you pass whitespace or other off-channel * <p/>
* tokens to the parser. The stream can't ignore off-channel tokens. * You can't use this stream if you pass whitespace or other off-channel tokens
* (UnbufferedTokenStream is the same way.) Use CommonTokenStream. * to the parser. The stream can't ignore off-channel tokens.
* * ({@link UnbufferedTokenStream} is the same way.) Use
* This is not a subclass of UnbufferedTokenStream because I don't want * {@link CommonTokenStream}.
* to confuse small moving window of tokens it uses for the full buffer.
*/ */
public class BufferedTokenStream implements TokenStream { public class BufferedTokenStream implements TokenStream {
@NotNull @NotNull
protected TokenSource tokenSource; protected TokenSource tokenSource;
/** Record every single token pulled from the source so we can reproduce /**
* chunks of it later. The buffer in LookaheadStream overlaps sometimes * Record every single token pulled from the source so we can reproduce
* as its moving window moves through the input. This list captures * chunks of it later. This list captures everything so we can access
* everything so we can access complete input text. * complete input text.
*/ */
protected List<Token> tokens = new ArrayList<Token>(100); protected List<Token> tokens = new ArrayList<Token>(100);
/** The index into the tokens list of the current token (next token /**
* to consume). tokens[p] should be LT(1). p=-1 indicates need * The index into {@link #tokens} of the current token (next token to
* to initialize with first token. The ctor doesn't get a token. * consume). {@link #tokens}{@code [}{@link #p}{@code ]} should be
* First call to LT(1) or whatever gets the first token and sets p=0; * {@link #LT LT(1)}. {@link #p}{@code =-1} indicates need to initialize
*/ * with first token. The constructor doesn't get a token. First call to
* {@link #LT LT(1)} or whatever gets the first token and sets
* {@link #p}{@code =0;}.
*/
protected int p = -1; protected int p = -1;
/** /**
@ -92,8 +94,6 @@ public class BufferedTokenStream implements TokenStream {
@Override @Override
public int index() { return p; } public int index() { return p; }
// public int range() { return range; }
@Override @Override
public int mark() { public int mark() {
return 0; return 0;

View File

@ -44,201 +44,18 @@ import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
/** Specialized OrderedHashSet that can track info about the set. /**
* Might be able to optimize later w/o affecting code that uses this set. * Specialized {@link Set}{@code <}{@link ATNConfig}{@code >} that can track
* info about the set, with support for combining similar configurations using a
histogram of lexer DFA configset size: * graph-structured stack.
206 30 <- 206 sets with size 30
47 1
17 31
12 2
10 3
7 32
4 4
3 35
2 9
2 6
2 5
2 34
1 7
1 33
1 29
1 12
1 119 <- max size
322 set size for SLL parser java.* in DFA states:
888 1
411 54
365 88
304 56
206 80
182 16
167 86
166 78
158 84
131 2
121 20
120 8
119 112
82 10
73 6
53 174
47 90
45 4
39 12
38 122
37 89
37 62
34 3
34 18
32 81
31 87
28 45
27 144
25 41
24 132
22 91
22 7
21 82
21 28
21 27
17 9
16 29
16 155
15 51
15 118
14 146
14 114
13 5
13 38
12 48
11 64
11 50
11 22
11 134
11 131
10 79
10 76
10 59
10 58
10 55
10 39
10 116
9 74
9 47
9 310
...
javalr, java.* configs with # preds histogram:
4569 0
57 1
27 27
5 76
4 28
3 72
3 38
3 30
2 6
2 32
1 9
1 2
javalr, java.* all atnconfigsets; max size = 322, num sets = 269088
114186 1 <-- optimize
35712 6
28081 78
15252 54
14171 56
13159 12
11810 88
6873 86
6158 80
5169 4
3773 118
2350 16
1002 112
915 28
898 44
734 2
632 62
575 8
566 59
474 20
388 84
343 48
333 55
328 47
311 41
306 38
277 81
263 79
255 66
245 90
245 87
234 50
224 10
220 60
194 64
186 32
184 82
150 18
125 7
121 132
116 30
103 51
95 114
84 36
82 40
78 22
77 89
55 9
53 174
48 152
44 67
44 5
42 115
41 58
38 122
37 134
34 13
34 116
29 45
29 3
29 24
27 144
26 146
25 91
24 113
20 27
...
number with 1-9 elements:
114186 1
35712 6
5169 4
734 2
575 8
125 7
55 9
44 5
29 3
Can cover 60% of sizes with size up to 6
Can cover 44% of sizes with size up to 4
Can cover 42% of sizes with size up to 1
*/ */
public class ATNConfigSet implements Set<ATNConfig> { public class ATNConfigSet implements Set<ATNConfig> {
/* /**
The reason that we need this is because we don't want the hash map to use * The reason that we need this is because we don't want the hash map to use
the standard hash code and equals. We need all configurations with the same * the standard hash code and equals. We need all configurations with the same
(s,i,_,semctx) to be equal. Unfortunately, this key effectively doubles * {@code (s,i,_,semctx)} to be equal. Unfortunately, this key effectively doubles
the number of objects associated with ATNConfigs. The other solution is to * the number of objects associated with ATNConfigs. The other solution is to
use a hash table that lets us specify the equals/hashcode operation. * use a hash table that lets us specify the equals/hashcode operation.
*/ */
public static class ConfigHashSet extends AbstractConfigHashSet { public static class ConfigHashSet extends AbstractConfigHashSet {
public ConfigHashSet() { public ConfigHashSet() {
@ -279,8 +96,9 @@ public class ATNConfigSet implements Set<ATNConfig> {
*/ */
protected boolean readonly = false; protected boolean readonly = false;
/** All configs but hashed by (s, i, _, pi) not incl context. Wiped out /**
* when we go readonly as this set becomes a DFA state. * All configs but hashed by (s, i, _, pi) not including context. Wiped out
* when we go readonly as this set becomes a DFA state.
*/ */
public AbstractConfigHashSet configLookup; public AbstractConfigHashSet configLookup;

View File

@ -234,7 +234,6 @@ public abstract class PredictionContext {
* @param rootIsWildcard {@code true} if this is a local-context merge, * @param rootIsWildcard {@code true} if this is a local-context merge,
* otherwise false to indicate a full-context merge * otherwise false to indicate a full-context merge
* @param mergeCache * @param mergeCache
* @return
*/ */
public static PredictionContext mergeSingletons( public static PredictionContext mergeSingletons(
SingletonPredictionContext a, SingletonPredictionContext a,

View File

@ -29,11 +29,7 @@
*/ */
package org.antlr.v4.runtime.dfa; package org.antlr.v4.runtime.dfa;
import org.antlr.v4.runtime.TokenStream;
import org.antlr.v4.runtime.atn.ATNState;
import org.antlr.v4.runtime.atn.DecisionState; import org.antlr.v4.runtime.atn.DecisionState;
import org.antlr.v4.runtime.atn.ParserATNSimulator;
import org.antlr.v4.runtime.atn.Transition;
import org.antlr.v4.runtime.misc.NotNull; import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable; import org.antlr.v4.runtime.misc.Nullable;
@ -41,7 +37,6 @@ import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
@ -61,11 +56,6 @@ public class DFA {
@NotNull @NotNull
public final DecisionState atnStartState; public final DecisionState atnStartState;
/** Set of configs for a DFA state with at least one conflict? Mainly used as "return value"
* from {@link ParserATNSimulator#predictATN} for retry.
*/
// public OrderedHashSet<ATNConfig> conflictSet;
public DFA(@NotNull DecisionState atnStartState) { public DFA(@NotNull DecisionState atnStartState) {
this(atnStartState, 0); this(atnStartState, 0);
} }

View File

@ -94,7 +94,7 @@ public class DFAState {
/** /**
* Indicates that this state was created during SLL prediction that * Indicates that this state was created during SLL prediction that
* discovered a conflict between the configurations in the state. Future * discovered a conflict between the configurations in the state. Future
* {@link ParserATNSimulator#execDFA} invocations immediately jumped doing * {@link ParserATNSimulator#execATN} invocations immediately jumped doing
* full context prediction if this field is true. * full context prediction if this field is true.
*/ */
public boolean requiresFullContext; public boolean requiresFullContext;

View File

@ -77,6 +77,18 @@ public class JavaTarget extends Target {
badWords.add("parserRule"); badWords.add("parserRule");
} }
/**
* {@inheritDoc}
* <p/>
* For Java, this is the translation {@code 'a\n"'} &rarr; {@code "a\n\""}.
* Expect single quotes around the incoming literal. Just flip the quotes
* and replace double quotes with {@code \"}.
* <p/>
* Note that we have decided to allow people to use '\"' without penalty, so
* we must build the target string in a loop as {@link String#replace}
* cannot handle both {@code \"} and {@code "} without a lot of messing
* around.
*/
@Override @Override
public String getTargetStringLiteralFromANTLRStringLiteral( public String getTargetStringLiteralFromANTLRStringLiteral(
CodeGenerator generator, CodeGenerator generator,

View File

@ -191,14 +191,7 @@ public abstract class Target {
/** /**
* Convert from an ANTLR string literal found in a grammar file to an * Convert from an ANTLR string literal found in a grammar file to an
* equivalent string literal in the target language. For Java, this is the * equivalent string literal in the target language.
* translation {@code 'a\n"'} &rarr; {@code "a\n\""}. Expect single quotes
* around the incoming literal. Just flip the quotes and replace double
* quotes with {@code \"}.
* <p/>
* Note that we have decided to allow people to use '\"' without penalty, so
* we must build the target string in a loop as {@link Utils#replace} cannot
* handle both {@code \"} and {@code "} without a lot of messing around.
*/ */
public abstract String getTargetStringLiteralFromANTLRStringLiteral( public abstract String getTargetStringLiteralFromANTLRStringLiteral(
CodeGenerator generator, CodeGenerator generator,