Documentation updates prior to release

This commit is contained in:
Sam Harwell 2013-06-30 15:50:56 -05:00
parent 89a251e497
commit 6236072e3b
10 changed files with 78 additions and 266 deletions

View File

@ -39,19 +39,19 @@ import org.antlr.v4.runtime.misc.Nullable;
import java.util.BitSet;
/** How to emit recognition errors */
/** How to emit recognition errors. */
public interface ANTLRErrorListener {
/** Upon syntax error, notify any interested parties. This is not
* how to recover from errors or compute error messages. The
* parser ANTLRErrorStrategy specifies how to recover from syntax
* errors and how to compute error messages. This listener's job
* is simply to emit a computed message, though it has enough
* information to create its own message in many cases.
*
* The RecognitionException is non-null for all syntax errors
* except when we discover mismatched token errors that we can
* recover from in-line, without returning from the surrounding
* rule (via the single token insertion and deletion mechanism).
/**
* Upon syntax error, notify any interested parties. This is not how to
* recover from errors or compute error messages. {@link ANTLRErrorStrategy}
* specifies how to recover from syntax errors and how to compute error
* messages. This listener's job is simply to emit a computed message,
* though it has enough information to create its own message in many cases.
* <p/>
* The {@link RecognitionException} is non-null for all syntax errors except
* when we discover mismatched token errors that we can recover from
* in-line, without returning from the surrounding rule (via the single
* token insertion and deletion mechanism).
*
* @param recognizer
* What parser got the error. From this
@ -59,16 +59,15 @@ public interface ANTLRErrorListener {
* as the input stream.
* @param offendingSymbol
* The offending token in the input token
* stream, unless recognizer is a lexer (then it's null) If
* no viable alternative error, e has token at which we
* stream, unless recognizer is a lexer (then it's null). If
* no viable alternative error, {@code e} has token at which we
* started production for the decision.
* @param line
* At what line in input to the error occur? This always refers to
* stopTokenIndex
* The line number in the input where the error occurred.
* @param charPositionInLine
* At what character position within that line did the error occur.
* The character position within that line where the error occurred.
* @param msg
* The message to emit
* The message to emit.
* @param e
* The exception generated by the parser that led to
* the reporting of an error. It is null in the case where

View File

@ -35,10 +35,9 @@ import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Arrays;
/** This is an ANTLRInputStream that is loaded from a file
* all at once when you construct the object. This is a special case
* since we know the exact size of the object to load. We can avoid lots
* of data copying.
/**
* This is an {@link ANTLRInputStream} that is loaded from a file all at once
* when you construct the object.
*/
public class ANTLRFileStream extends ANTLRInputStream {
protected String fileName;

View File

@ -37,9 +37,11 @@ import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Arrays;
/** Vacuum all input from a Reader/InputStream and then treat it like a char[] buffer.
* Can also pass in a string or char[] to use.
*
/**
* Vacuum all input from a {@link Reader}/{@link InputStream} and then treat it
* like a {@code char[]} buffer. Can also pass in a {@link String} or
* {@code char[]} to use.
* <p/>
* If you need encoding, pass in stream/reader with correct encoding.
*/
public class ANTLRInputStream implements CharStream {

View File

@ -38,35 +38,37 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
/** Buffer all input tokens but do on-demand fetching of new tokens from
* lexer. Useful when the parser or lexer has to set context/mode info before
* proper lexing of future tokens. The ST template parser needs this,
* for example, because it has to constantly flip back and forth between
* inside/output templates. E.g., <names:{hi, <it>}> has to parse names
* as part of an expression but "hi, <it>" as a nested template.
*
* You can't use this stream if you pass whitespace or other off-channel
* tokens to the parser. The stream can't ignore off-channel tokens.
* (UnbufferedTokenStream is the same way.) Use CommonTokenStream.
*
* This is not a subclass of UnbufferedTokenStream because I don't want
* to confuse small moving window of tokens it uses for the full buffer.
/**
* Buffer all input tokens but do on-demand fetching of new tokens from lexer.
* Useful when the parser or lexer has to set context/mode info before proper
* lexing of future tokens. The ST template parser needs this, for example,
* because it has to constantly flip back and forth between inside/output
* templates. E.g., {@code <names:{hi, <it>}>} has to parse names as part of an
* expression but {@code "hi, <it>"} as a nested template.
* <p/>
* You can't use this stream if you pass whitespace or other off-channel tokens
* to the parser. The stream can't ignore off-channel tokens.
* ({@link UnbufferedTokenStream} is the same way.) Use
* {@link CommonTokenStream}.
*/
public class BufferedTokenStream implements TokenStream {
@NotNull
protected TokenSource tokenSource;
/** Record every single token pulled from the source so we can reproduce
* chunks of it later. The buffer in LookaheadStream overlaps sometimes
* as its moving window moves through the input. This list captures
* everything so we can access complete input text.
/**
* Record every single token pulled from the source so we can reproduce
* chunks of it later. This list captures everything so we can access
* complete input text.
*/
protected List<Token> tokens = new ArrayList<Token>(100);
/** The index into the tokens list of the current token (next token
* to consume). tokens[p] should be LT(1). p=-1 indicates need
* to initialize with first token. The ctor doesn't get a token.
* First call to LT(1) or whatever gets the first token and sets p=0;
/**
* The index into {@link #tokens} of the current token (next token to
* consume). {@link #tokens}{@code [}{@link #p}{@code ]} should be
* {@link #LT LT(1)}. {@link #p}{@code =-1} indicates need to initialize
* with first token. The constructor doesn't get a token. First call to
* {@link #LT LT(1)} or whatever gets the first token and sets
* {@link #p}{@code =0;}.
*/
protected int p = -1;
@ -92,8 +94,6 @@ public class BufferedTokenStream implements TokenStream {
@Override
public int index() { return p; }
// public int range() { return range; }
@Override
public int mark() {
return 0;

View File

@ -44,201 +44,18 @@ import java.util.Iterator;
import java.util.List;
import java.util.Set;
/** Specialized OrderedHashSet that can track info about the set.
* Might be able to optimize later w/o affecting code that uses this set.
histogram of lexer DFA configset size:
206 30 <- 206 sets with size 30
47 1
17 31
12 2
10 3
7 32
4 4
3 35
2 9
2 6
2 5
2 34
1 7
1 33
1 29
1 12
1 119 <- max size
322 set size for SLL parser java.* in DFA states:
888 1
411 54
365 88
304 56
206 80
182 16
167 86
166 78
158 84
131 2
121 20
120 8
119 112
82 10
73 6
53 174
47 90
45 4
39 12
38 122
37 89
37 62
34 3
34 18
32 81
31 87
28 45
27 144
25 41
24 132
22 91
22 7
21 82
21 28
21 27
17 9
16 29
16 155
15 51
15 118
14 146
14 114
13 5
13 38
12 48
11 64
11 50
11 22
11 134
11 131
10 79
10 76
10 59
10 58
10 55
10 39
10 116
9 74
9 47
9 310
...
javalr, java.* configs with # preds histogram:
4569 0
57 1
27 27
5 76
4 28
3 72
3 38
3 30
2 6
2 32
1 9
1 2
javalr, java.* all atnconfigsets; max size = 322, num sets = 269088
114186 1 <-- optimize
35712 6
28081 78
15252 54
14171 56
13159 12
11810 88
6873 86
6158 80
5169 4
3773 118
2350 16
1002 112
915 28
898 44
734 2
632 62
575 8
566 59
474 20
388 84
343 48
333 55
328 47
311 41
306 38
277 81
263 79
255 66
245 90
245 87
234 50
224 10
220 60
194 64
186 32
184 82
150 18
125 7
121 132
116 30
103 51
95 114
84 36
82 40
78 22
77 89
55 9
53 174
48 152
44 67
44 5
42 115
41 58
38 122
37 134
34 13
34 116
29 45
29 3
29 24
27 144
26 146
25 91
24 113
20 27
...
number with 1-9 elements:
114186 1
35712 6
5169 4
734 2
575 8
125 7
55 9
44 5
29 3
Can cover 60% of sizes with size up to 6
Can cover 44% of sizes with size up to 4
Can cover 42% of sizes with size up to 1
/**
* Specialized {@link Set}{@code <}{@link ATNConfig}{@code >} that can track
* info about the set, with support for combining similar configurations using a
* graph-structured stack.
*/
public class ATNConfigSet implements Set<ATNConfig> {
/*
The reason that we need this is because we don't want the hash map to use
the standard hash code and equals. We need all configurations with the same
(s,i,_,semctx) to be equal. Unfortunately, this key effectively doubles
the number of objects associated with ATNConfigs. The other solution is to
use a hash table that lets us specify the equals/hashcode operation.
/**
* The reason that we need this is because we don't want the hash map to use
* the standard hash code and equals. We need all configurations with the same
* {@code (s,i,_,semctx)} to be equal. Unfortunately, this key effectively doubles
* the number of objects associated with ATNConfigs. The other solution is to
* use a hash table that lets us specify the equals/hashcode operation.
*/
public static class ConfigHashSet extends AbstractConfigHashSet {
public ConfigHashSet() {
@ -279,7 +96,8 @@ public class ATNConfigSet implements Set<ATNConfig> {
*/
protected boolean readonly = false;
/** All configs but hashed by (s, i, _, pi) not incl context. Wiped out
/**
* All configs but hashed by (s, i, _, pi) not including context. Wiped out
* when we go readonly as this set becomes a DFA state.
*/
public AbstractConfigHashSet configLookup;

View File

@ -234,7 +234,6 @@ public abstract class PredictionContext {
* @param rootIsWildcard {@code true} if this is a local-context merge,
* otherwise false to indicate a full-context merge
* @param mergeCache
* @return
*/
public static PredictionContext mergeSingletons(
SingletonPredictionContext a,

View File

@ -29,11 +29,7 @@
*/
package org.antlr.v4.runtime.dfa;
import org.antlr.v4.runtime.TokenStream;
import org.antlr.v4.runtime.atn.ATNState;
import org.antlr.v4.runtime.atn.DecisionState;
import org.antlr.v4.runtime.atn.ParserATNSimulator;
import org.antlr.v4.runtime.atn.Transition;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
@ -41,7 +37,6 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@ -61,11 +56,6 @@ public class DFA {
@NotNull
public final DecisionState atnStartState;
/** Set of configs for a DFA state with at least one conflict? Mainly used as "return value"
* from {@link ParserATNSimulator#predictATN} for retry.
*/
// public OrderedHashSet<ATNConfig> conflictSet;
public DFA(@NotNull DecisionState atnStartState) {
this(atnStartState, 0);
}

View File

@ -94,7 +94,7 @@ public class DFAState {
/**
* Indicates that this state was created during SLL prediction that
* discovered a conflict between the configurations in the state. Future
* {@link ParserATNSimulator#execDFA} invocations immediately jumped doing
* {@link ParserATNSimulator#execATN} invocations immediately jumped doing
* full context prediction if this field is true.
*/
public boolean requiresFullContext;

View File

@ -77,6 +77,18 @@ public class JavaTarget extends Target {
badWords.add("parserRule");
}
/**
* {@inheritDoc}
* <p/>
* For Java, this is the translation {@code 'a\n"'} &rarr; {@code "a\n\""}.
* Expect single quotes around the incoming literal. Just flip the quotes
* and replace double quotes with {@code \"}.
* <p/>
* Note that we have decided to allow people to use '\"' without penalty, so
* we must build the target string in a loop as {@link String#replace}
* cannot handle both {@code \"} and {@code "} without a lot of messing
* around.
*/
@Override
public String getTargetStringLiteralFromANTLRStringLiteral(
CodeGenerator generator,

View File

@ -191,14 +191,7 @@ public abstract class Target {
/**
* Convert from an ANTLR string literal found in a grammar file to an
* equivalent string literal in the target language. For Java, this is the
* translation {@code 'a\n"'} &rarr; {@code "a\n\""}. Expect single quotes
* around the incoming literal. Just flip the quotes and replace double
* quotes with {@code \"}.
* <p/>
* Note that we have decided to allow people to use '\"' without penalty, so
* we must build the target string in a loop as {@link Utils#replace} cannot
* handle both {@code \"} and {@code "} without a lot of messing around.
* equivalent string literal in the target language.
*/
public abstract String getTargetStringLiteralFromANTLRStringLiteral(
CodeGenerator generator,