Documentation updates prior to release
This commit is contained in:
parent
89a251e497
commit
6236072e3b
|
@ -39,19 +39,19 @@ import org.antlr.v4.runtime.misc.Nullable;
|
|||
|
||||
import java.util.BitSet;
|
||||
|
||||
/** How to emit recognition errors */
|
||||
/** How to emit recognition errors. */
|
||||
public interface ANTLRErrorListener {
|
||||
/** Upon syntax error, notify any interested parties. This is not
|
||||
* how to recover from errors or compute error messages. The
|
||||
* parser ANTLRErrorStrategy specifies how to recover from syntax
|
||||
* errors and how to compute error messages. This listener's job
|
||||
* is simply to emit a computed message, though it has enough
|
||||
* information to create its own message in many cases.
|
||||
*
|
||||
* The RecognitionException is non-null for all syntax errors
|
||||
* except when we discover mismatched token errors that we can
|
||||
* recover from in-line, without returning from the surrounding
|
||||
* rule (via the single token insertion and deletion mechanism).
|
||||
/**
|
||||
* Upon syntax error, notify any interested parties. This is not how to
|
||||
* recover from errors or compute error messages. {@link ANTLRErrorStrategy}
|
||||
* specifies how to recover from syntax errors and how to compute error
|
||||
* messages. This listener's job is simply to emit a computed message,
|
||||
* though it has enough information to create its own message in many cases.
|
||||
* <p/>
|
||||
* The {@link RecognitionException} is non-null for all syntax errors except
|
||||
* when we discover mismatched token errors that we can recover from
|
||||
* in-line, without returning from the surrounding rule (via the single
|
||||
* token insertion and deletion mechanism).
|
||||
*
|
||||
* @param recognizer
|
||||
* What parser got the error. From this
|
||||
|
@ -59,16 +59,15 @@ public interface ANTLRErrorListener {
|
|||
* as the input stream.
|
||||
* @param offendingSymbol
|
||||
* The offending token in the input token
|
||||
* stream, unless recognizer is a lexer (then it's null) If
|
||||
* no viable alternative error, e has token at which we
|
||||
* stream, unless recognizer is a lexer (then it's null). If
|
||||
* no viable alternative error, {@code e} has token at which we
|
||||
* started production for the decision.
|
||||
* @param line
|
||||
* At what line in input to the error occur? This always refers to
|
||||
* stopTokenIndex
|
||||
* The line number in the input where the error occurred.
|
||||
* @param charPositionInLine
|
||||
* At what character position within that line did the error occur.
|
||||
* The character position within that line where the error occurred.
|
||||
* @param msg
|
||||
* The message to emit
|
||||
* The message to emit.
|
||||
* @param e
|
||||
* The exception generated by the parser that led to
|
||||
* the reporting of an error. It is null in the case where
|
||||
|
|
|
@ -35,10 +35,9 @@ import java.io.IOException;
|
|||
import java.io.InputStreamReader;
|
||||
import java.util.Arrays;
|
||||
|
||||
/** This is an ANTLRInputStream that is loaded from a file
|
||||
* all at once when you construct the object. This is a special case
|
||||
* since we know the exact size of the object to load. We can avoid lots
|
||||
* of data copying.
|
||||
/**
|
||||
* This is an {@link ANTLRInputStream} that is loaded from a file all at once
|
||||
* when you construct the object.
|
||||
*/
|
||||
public class ANTLRFileStream extends ANTLRInputStream {
|
||||
protected String fileName;
|
||||
|
|
|
@ -37,10 +37,12 @@ import java.io.InputStreamReader;
|
|||
import java.io.Reader;
|
||||
import java.util.Arrays;
|
||||
|
||||
/** Vacuum all input from a Reader/InputStream and then treat it like a char[] buffer.
|
||||
* Can also pass in a string or char[] to use.
|
||||
*
|
||||
* If you need encoding, pass in stream/reader with correct encoding.
|
||||
/**
|
||||
* Vacuum all input from a {@link Reader}/{@link InputStream} and then treat it
|
||||
* like a {@code char[]} buffer. Can also pass in a {@link String} or
|
||||
* {@code char[]} to use.
|
||||
* <p/>
|
||||
* If you need encoding, pass in stream/reader with correct encoding.
|
||||
*/
|
||||
public class ANTLRInputStream implements CharStream {
|
||||
public static final int READ_BUFFER_SIZE = 1024;
|
||||
|
|
|
@ -38,36 +38,38 @@ import java.util.HashSet;
|
|||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
/** Buffer all input tokens but do on-demand fetching of new tokens from
|
||||
* lexer. Useful when the parser or lexer has to set context/mode info before
|
||||
* proper lexing of future tokens. The ST template parser needs this,
|
||||
* for example, because it has to constantly flip back and forth between
|
||||
* inside/output templates. E.g., <names:{hi, <it>}> has to parse names
|
||||
* as part of an expression but "hi, <it>" as a nested template.
|
||||
*
|
||||
* You can't use this stream if you pass whitespace or other off-channel
|
||||
* tokens to the parser. The stream can't ignore off-channel tokens.
|
||||
* (UnbufferedTokenStream is the same way.) Use CommonTokenStream.
|
||||
*
|
||||
* This is not a subclass of UnbufferedTokenStream because I don't want
|
||||
* to confuse small moving window of tokens it uses for the full buffer.
|
||||
/**
|
||||
* Buffer all input tokens but do on-demand fetching of new tokens from lexer.
|
||||
* Useful when the parser or lexer has to set context/mode info before proper
|
||||
* lexing of future tokens. The ST template parser needs this, for example,
|
||||
* because it has to constantly flip back and forth between inside/output
|
||||
* templates. E.g., {@code <names:{hi, <it>}>} has to parse names as part of an
|
||||
* expression but {@code "hi, <it>"} as a nested template.
|
||||
* <p/>
|
||||
* You can't use this stream if you pass whitespace or other off-channel tokens
|
||||
* to the parser. The stream can't ignore off-channel tokens.
|
||||
* ({@link UnbufferedTokenStream} is the same way.) Use
|
||||
* {@link CommonTokenStream}.
|
||||
*/
|
||||
public class BufferedTokenStream implements TokenStream {
|
||||
@NotNull
|
||||
protected TokenSource tokenSource;
|
||||
|
||||
/** Record every single token pulled from the source so we can reproduce
|
||||
* chunks of it later. The buffer in LookaheadStream overlaps sometimes
|
||||
* as its moving window moves through the input. This list captures
|
||||
* everything so we can access complete input text.
|
||||
*/
|
||||
/**
|
||||
* Record every single token pulled from the source so we can reproduce
|
||||
* chunks of it later. This list captures everything so we can access
|
||||
* complete input text.
|
||||
*/
|
||||
protected List<Token> tokens = new ArrayList<Token>(100);
|
||||
|
||||
/** The index into the tokens list of the current token (next token
|
||||
* to consume). tokens[p] should be LT(1). p=-1 indicates need
|
||||
* to initialize with first token. The ctor doesn't get a token.
|
||||
* First call to LT(1) or whatever gets the first token and sets p=0;
|
||||
*/
|
||||
/**
|
||||
* The index into {@link #tokens} of the current token (next token to
|
||||
* consume). {@link #tokens}{@code [}{@link #p}{@code ]} should be
|
||||
* {@link #LT LT(1)}. {@link #p}{@code =-1} indicates need to initialize
|
||||
* with first token. The constructor doesn't get a token. First call to
|
||||
* {@link #LT LT(1)} or whatever gets the first token and sets
|
||||
* {@link #p}{@code =0;}.
|
||||
*/
|
||||
protected int p = -1;
|
||||
|
||||
/**
|
||||
|
@ -92,8 +94,6 @@ public class BufferedTokenStream implements TokenStream {
|
|||
@Override
|
||||
public int index() { return p; }
|
||||
|
||||
// public int range() { return range; }
|
||||
|
||||
@Override
|
||||
public int mark() {
|
||||
return 0;
|
||||
|
|
|
@ -44,201 +44,18 @@ import java.util.Iterator;
|
|||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
/** Specialized OrderedHashSet that can track info about the set.
|
||||
* Might be able to optimize later w/o affecting code that uses this set.
|
||||
|
||||
histogram of lexer DFA configset size:
|
||||
|
||||
206 30 <- 206 sets with size 30
|
||||
47 1
|
||||
17 31
|
||||
12 2
|
||||
10 3
|
||||
7 32
|
||||
4 4
|
||||
3 35
|
||||
2 9
|
||||
2 6
|
||||
2 5
|
||||
2 34
|
||||
1 7
|
||||
1 33
|
||||
1 29
|
||||
1 12
|
||||
1 119 <- max size
|
||||
|
||||
322 set size for SLL parser java.* in DFA states:
|
||||
|
||||
888 1
|
||||
411 54
|
||||
365 88
|
||||
304 56
|
||||
206 80
|
||||
182 16
|
||||
167 86
|
||||
166 78
|
||||
158 84
|
||||
131 2
|
||||
121 20
|
||||
120 8
|
||||
119 112
|
||||
82 10
|
||||
73 6
|
||||
53 174
|
||||
47 90
|
||||
45 4
|
||||
39 12
|
||||
38 122
|
||||
37 89
|
||||
37 62
|
||||
34 3
|
||||
34 18
|
||||
32 81
|
||||
31 87
|
||||
28 45
|
||||
27 144
|
||||
25 41
|
||||
24 132
|
||||
22 91
|
||||
22 7
|
||||
21 82
|
||||
21 28
|
||||
21 27
|
||||
17 9
|
||||
16 29
|
||||
16 155
|
||||
15 51
|
||||
15 118
|
||||
14 146
|
||||
14 114
|
||||
13 5
|
||||
13 38
|
||||
12 48
|
||||
11 64
|
||||
11 50
|
||||
11 22
|
||||
11 134
|
||||
11 131
|
||||
10 79
|
||||
10 76
|
||||
10 59
|
||||
10 58
|
||||
10 55
|
||||
10 39
|
||||
10 116
|
||||
9 74
|
||||
9 47
|
||||
9 310
|
||||
...
|
||||
|
||||
javalr, java.* configs with # preds histogram:
|
||||
|
||||
4569 0
|
||||
57 1
|
||||
27 27
|
||||
5 76
|
||||
4 28
|
||||
3 72
|
||||
3 38
|
||||
3 30
|
||||
2 6
|
||||
2 32
|
||||
1 9
|
||||
1 2
|
||||
|
||||
javalr, java.* all atnconfigsets; max size = 322, num sets = 269088
|
||||
|
||||
114186 1 <-- optimize
|
||||
35712 6
|
||||
28081 78
|
||||
15252 54
|
||||
14171 56
|
||||
13159 12
|
||||
11810 88
|
||||
6873 86
|
||||
6158 80
|
||||
5169 4
|
||||
3773 118
|
||||
2350 16
|
||||
1002 112
|
||||
915 28
|
||||
898 44
|
||||
734 2
|
||||
632 62
|
||||
575 8
|
||||
566 59
|
||||
474 20
|
||||
388 84
|
||||
343 48
|
||||
333 55
|
||||
328 47
|
||||
311 41
|
||||
306 38
|
||||
277 81
|
||||
263 79
|
||||
255 66
|
||||
245 90
|
||||
245 87
|
||||
234 50
|
||||
224 10
|
||||
220 60
|
||||
194 64
|
||||
186 32
|
||||
184 82
|
||||
150 18
|
||||
125 7
|
||||
121 132
|
||||
116 30
|
||||
103 51
|
||||
95 114
|
||||
84 36
|
||||
82 40
|
||||
78 22
|
||||
77 89
|
||||
55 9
|
||||
53 174
|
||||
48 152
|
||||
44 67
|
||||
44 5
|
||||
42 115
|
||||
41 58
|
||||
38 122
|
||||
37 134
|
||||
34 13
|
||||
34 116
|
||||
29 45
|
||||
29 3
|
||||
29 24
|
||||
27 144
|
||||
26 146
|
||||
25 91
|
||||
24 113
|
||||
20 27
|
||||
...
|
||||
|
||||
number with 1-9 elements:
|
||||
|
||||
114186 1
|
||||
35712 6
|
||||
5169 4
|
||||
734 2
|
||||
575 8
|
||||
125 7
|
||||
55 9
|
||||
44 5
|
||||
29 3
|
||||
|
||||
Can cover 60% of sizes with size up to 6
|
||||
Can cover 44% of sizes with size up to 4
|
||||
Can cover 42% of sizes with size up to 1
|
||||
/**
|
||||
* Specialized {@link Set}{@code <}{@link ATNConfig}{@code >} that can track
|
||||
* info about the set, with support for combining similar configurations using a
|
||||
* graph-structured stack.
|
||||
*/
|
||||
public class ATNConfigSet implements Set<ATNConfig> {
|
||||
/*
|
||||
The reason that we need this is because we don't want the hash map to use
|
||||
the standard hash code and equals. We need all configurations with the same
|
||||
(s,i,_,semctx) to be equal. Unfortunately, this key effectively doubles
|
||||
the number of objects associated with ATNConfigs. The other solution is to
|
||||
use a hash table that lets us specify the equals/hashcode operation.
|
||||
/**
|
||||
* The reason that we need this is because we don't want the hash map to use
|
||||
* the standard hash code and equals. We need all configurations with the same
|
||||
* {@code (s,i,_,semctx)} to be equal. Unfortunately, this key effectively doubles
|
||||
* the number of objects associated with ATNConfigs. The other solution is to
|
||||
* use a hash table that lets us specify the equals/hashcode operation.
|
||||
*/
|
||||
public static class ConfigHashSet extends AbstractConfigHashSet {
|
||||
public ConfigHashSet() {
|
||||
|
@ -279,8 +96,9 @@ public class ATNConfigSet implements Set<ATNConfig> {
|
|||
*/
|
||||
protected boolean readonly = false;
|
||||
|
||||
/** All configs but hashed by (s, i, _, pi) not incl context. Wiped out
|
||||
* when we go readonly as this set becomes a DFA state.
|
||||
/**
|
||||
* All configs but hashed by (s, i, _, pi) not including context. Wiped out
|
||||
* when we go readonly as this set becomes a DFA state.
|
||||
*/
|
||||
public AbstractConfigHashSet configLookup;
|
||||
|
||||
|
|
|
@ -234,7 +234,6 @@ public abstract class PredictionContext {
|
|||
* @param rootIsWildcard {@code true} if this is a local-context merge,
|
||||
* otherwise false to indicate a full-context merge
|
||||
* @param mergeCache
|
||||
* @return
|
||||
*/
|
||||
public static PredictionContext mergeSingletons(
|
||||
SingletonPredictionContext a,
|
||||
|
|
|
@ -29,11 +29,7 @@
|
|||
*/
|
||||
package org.antlr.v4.runtime.dfa;
|
||||
|
||||
import org.antlr.v4.runtime.TokenStream;
|
||||
import org.antlr.v4.runtime.atn.ATNState;
|
||||
import org.antlr.v4.runtime.atn.DecisionState;
|
||||
import org.antlr.v4.runtime.atn.ParserATNSimulator;
|
||||
import org.antlr.v4.runtime.atn.Transition;
|
||||
import org.antlr.v4.runtime.misc.NotNull;
|
||||
import org.antlr.v4.runtime.misc.Nullable;
|
||||
|
||||
|
@ -41,7 +37,6 @@ import java.util.ArrayList;
|
|||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
@ -61,11 +56,6 @@ public class DFA {
|
|||
@NotNull
|
||||
public final DecisionState atnStartState;
|
||||
|
||||
/** Set of configs for a DFA state with at least one conflict? Mainly used as "return value"
|
||||
* from {@link ParserATNSimulator#predictATN} for retry.
|
||||
*/
|
||||
// public OrderedHashSet<ATNConfig> conflictSet;
|
||||
|
||||
public DFA(@NotNull DecisionState atnStartState) {
|
||||
this(atnStartState, 0);
|
||||
}
|
||||
|
|
|
@ -94,7 +94,7 @@ public class DFAState {
|
|||
/**
|
||||
* Indicates that this state was created during SLL prediction that
|
||||
* discovered a conflict between the configurations in the state. Future
|
||||
* {@link ParserATNSimulator#execDFA} invocations immediately jumped doing
|
||||
* {@link ParserATNSimulator#execATN} invocations immediately jumped doing
|
||||
* full context prediction if this field is true.
|
||||
*/
|
||||
public boolean requiresFullContext;
|
||||
|
|
|
@ -77,6 +77,18 @@ public class JavaTarget extends Target {
|
|||
badWords.add("parserRule");
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* <p/>
|
||||
* For Java, this is the translation {@code 'a\n"'} → {@code "a\n\""}.
|
||||
* Expect single quotes around the incoming literal. Just flip the quotes
|
||||
* and replace double quotes with {@code \"}.
|
||||
* <p/>
|
||||
* Note that we have decided to allow people to use '\"' without penalty, so
|
||||
* we must build the target string in a loop as {@link String#replace}
|
||||
* cannot handle both {@code \"} and {@code "} without a lot of messing
|
||||
* around.
|
||||
*/
|
||||
@Override
|
||||
public String getTargetStringLiteralFromANTLRStringLiteral(
|
||||
CodeGenerator generator,
|
||||
|
|
|
@ -191,14 +191,7 @@ public abstract class Target {
|
|||
|
||||
/**
|
||||
* Convert from an ANTLR string literal found in a grammar file to an
|
||||
* equivalent string literal in the target language. For Java, this is the
|
||||
* translation {@code 'a\n"'} → {@code "a\n\""}. Expect single quotes
|
||||
* around the incoming literal. Just flip the quotes and replace double
|
||||
* quotes with {@code \"}.
|
||||
* <p/>
|
||||
* Note that we have decided to allow people to use '\"' without penalty, so
|
||||
* we must build the target string in a loop as {@link Utils#replace} cannot
|
||||
* handle both {@code \"} and {@code "} without a lot of messing around.
|
||||
* equivalent string literal in the target language.
|
||||
*/
|
||||
public abstract String getTargetStringLiteralFromANTLRStringLiteral(
|
||||
CodeGenerator generator,
|
||||
|
|
Loading…
Reference in New Issue