forked from jasder/antlr
commit
eeda06b698
|
@ -35,23 +35,6 @@ import org.antlr.v4.runtime.misc.NotNull;
|
|||
|
||||
/** A source of characters for an ANTLR lexer. */
|
||||
public interface CharStream extends IntStream {
|
||||
/**
|
||||
* The minimum allowed value for a character in a {@code CharStream}.
|
||||
*/
|
||||
public static final int MIN_CHAR = Character.MIN_VALUE;
|
||||
|
||||
/**
|
||||
* The maximum allowed value for a character in a {@code CharStream}.
|
||||
* <p/>
|
||||
* This value is {@code Character.MAX_VALUE - 1}, which reserves the value
|
||||
* {@code Character.MAX_VALUE} for special use within an implementing class.
|
||||
* For some implementations, the data buffers required for supporting the
|
||||
* marked ranges of {@link IntStream} are stored as {@code char[]} instead
|
||||
* of {@code int[]}, with {@code Character.MAX_VALUE} being used instead of
|
||||
* {@code -1} to mark the end of the stream internally.
|
||||
*/
|
||||
public static final int MAX_CHAR = Character.MAX_VALUE-1;
|
||||
|
||||
/**
|
||||
* This method returns the text for a range of characters within this input
|
||||
* stream. This method is guaranteed to not throw an exception if the
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
|
||||
package org.antlr.v4.runtime.atn;
|
||||
|
||||
import org.antlr.v4.runtime.Token;
|
||||
import org.antlr.v4.runtime.dfa.DFAState;
|
||||
import org.antlr.v4.runtime.misc.IntervalSet;
|
||||
import org.antlr.v4.runtime.misc.NotNull;
|
||||
|
@ -56,7 +57,7 @@ public abstract class ATNSimulator {
|
|||
/* WARNING: DO NOT MERGE THIS LINE. If UUIDs differ during a merge,
|
||||
* resolve the conflict by generating a new ID!
|
||||
*/
|
||||
SERIALIZED_UUID = UUID.fromString("065C46D6-8859-4FD7-A158-83E693BF2B52");
|
||||
SERIALIZED_UUID = UUID.fromString("33761B2D-78BB-4A43-8B0B-4F5BEE8AACF3");
|
||||
}
|
||||
|
||||
/** Must distinguish between missing edge and edge we know leads nowhere */
|
||||
|
@ -124,7 +125,6 @@ public abstract class ATNSimulator {
|
|||
data[i] = (char)(data[i] - 2);
|
||||
}
|
||||
|
||||
List<IntervalSet> sets = new ArrayList<IntervalSet>();
|
||||
int p = 0;
|
||||
int version = toInt(data[p++]);
|
||||
if (version != SERIALIZED_VERSION) {
|
||||
|
@ -149,7 +149,7 @@ public abstract class ATNSimulator {
|
|||
List<Pair<LoopEndState, Integer>> loopBackStateNumbers = new ArrayList<Pair<LoopEndState, Integer>>();
|
||||
List<Pair<BlockStartState, Integer>> endStateNumbers = new ArrayList<Pair<BlockStartState, Integer>>();
|
||||
int nstates = toInt(data[p++]);
|
||||
for (int i=1; i<=nstates; i++) {
|
||||
for (int i=0; i<nstates; i++) {
|
||||
int stype = toInt(data[p++]);
|
||||
// ignore bad type of states
|
||||
if ( stype==ATNState.INVALID_TYPE ) {
|
||||
|
@ -158,6 +158,10 @@ public abstract class ATNSimulator {
|
|||
}
|
||||
|
||||
int ruleIndex = toInt(data[p++]);
|
||||
if (ruleIndex == Character.MAX_VALUE) {
|
||||
ruleIndex = -1;
|
||||
}
|
||||
|
||||
ATNState s = stateFactory(stype, ruleIndex);
|
||||
if ( stype == ATNState.LOOP_END ) { // special case
|
||||
int loopBackStateNumber = toInt(data[p++]);
|
||||
|
@ -200,8 +204,16 @@ public abstract class ATNSimulator {
|
|||
atn.ruleToStartState[i] = startState;
|
||||
if ( atn.grammarType == ATNType.LEXER ) {
|
||||
int tokenType = toInt(data[p++]);
|
||||
if (tokenType == 0xFFFF) {
|
||||
tokenType = Token.EOF;
|
||||
}
|
||||
|
||||
atn.ruleToTokenType[i] = tokenType;
|
||||
int actionIndex = toInt(data[p++]);
|
||||
if (actionIndex == 0xFFFF) {
|
||||
actionIndex = -1;
|
||||
}
|
||||
|
||||
atn.ruleToActionIndex[i] = actionIndex;
|
||||
}
|
||||
}
|
||||
|
@ -229,13 +241,20 @@ public abstract class ATNSimulator {
|
|||
//
|
||||
// SETS
|
||||
//
|
||||
List<IntervalSet> sets = new ArrayList<IntervalSet>();
|
||||
int nsets = toInt(data[p++]);
|
||||
for (int i=1; i<=nsets; i++) {
|
||||
for (int i=0; i<nsets; i++) {
|
||||
int nintervals = toInt(data[p]);
|
||||
p++;
|
||||
IntervalSet set = new IntervalSet();
|
||||
sets.add(set);
|
||||
for (int j=1; j<=nintervals; j++) {
|
||||
|
||||
boolean containsEof = toInt(data[p++]) != 0;
|
||||
if (containsEof) {
|
||||
set.add(-1);
|
||||
}
|
||||
|
||||
for (int j=0; j<nintervals; j++) {
|
||||
set.add(toInt(data[p]), toInt(data[p + 1]));
|
||||
p += 2;
|
||||
}
|
||||
|
@ -245,7 +264,7 @@ public abstract class ATNSimulator {
|
|||
// EDGES
|
||||
//
|
||||
int nedges = toInt(data[p++]);
|
||||
for (int i=1; i<=nedges; i++) {
|
||||
for (int i=0; i<nedges; i++) {
|
||||
int src = toInt(data[p]);
|
||||
int trg = toInt(data[p+1]);
|
||||
int ttype = toInt(data[p+2]);
|
||||
|
@ -398,7 +417,7 @@ public abstract class ATNSimulator {
|
|||
}
|
||||
|
||||
public static int toInt(char c) {
|
||||
return c==65535 ? -1 : c;
|
||||
return c;
|
||||
}
|
||||
|
||||
public static int toInt32(char[] data, int offset) {
|
||||
|
@ -425,14 +444,26 @@ public abstract class ATNSimulator {
|
|||
ATNState target = atn.states.get(trg);
|
||||
switch (type) {
|
||||
case Transition.EPSILON : return new EpsilonTransition(target);
|
||||
case Transition.RANGE : return new RangeTransition(target, arg1, arg2);
|
||||
case Transition.RANGE :
|
||||
if (arg3 != 0) {
|
||||
return new RangeTransition(target, Token.EOF, arg2);
|
||||
}
|
||||
else {
|
||||
return new RangeTransition(target, arg1, arg2);
|
||||
}
|
||||
case Transition.RULE :
|
||||
RuleTransition rt = new RuleTransition((RuleStartState)atn.states.get(arg1), arg2, target);
|
||||
return rt;
|
||||
case Transition.PREDICATE :
|
||||
PredicateTransition pt = new PredicateTransition(target, arg1, arg2, arg3 != 0);
|
||||
return pt;
|
||||
case Transition.ATOM : return new AtomTransition(target, arg1);
|
||||
case Transition.ATOM :
|
||||
if (arg3 != 0) {
|
||||
return new AtomTransition(target, Token.EOF);
|
||||
}
|
||||
else {
|
||||
return new AtomTransition(target, arg1);
|
||||
}
|
||||
case Transition.ACTION :
|
||||
ActionTransition a = new ActionTransition(target, arg1, arg2, arg3 != 0);
|
||||
return a;
|
||||
|
|
|
@ -32,6 +32,7 @@ package org.antlr.v4.automata;
|
|||
|
||||
import org.antlr.v4.misc.Utils;
|
||||
import org.antlr.v4.parse.ANTLRParser;
|
||||
import org.antlr.v4.runtime.Token;
|
||||
import org.antlr.v4.runtime.atn.ATN;
|
||||
import org.antlr.v4.runtime.atn.ATNSimulator;
|
||||
import org.antlr.v4.runtime.atn.ATNState;
|
||||
|
@ -54,15 +55,15 @@ import org.antlr.v4.tool.Rule;
|
|||
|
||||
import java.io.InvalidClassException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
|
||||
public class ATNSerializer {
|
||||
public Grammar g;
|
||||
public ATN atn;
|
||||
public List<IntervalSet> sets = new ArrayList<IntervalSet>();
|
||||
|
||||
public ATNSerializer(Grammar g, ATN atn) {
|
||||
this.g = g;
|
||||
|
@ -113,6 +114,9 @@ public class ATNSerializer {
|
|||
data.add(g.getMaxTokenType());
|
||||
int nedges = 0;
|
||||
|
||||
Map<IntervalSet, Integer> setIndices = new HashMap<IntervalSet, Integer>();
|
||||
List<IntervalSet> sets = new ArrayList<IntervalSet>();
|
||||
|
||||
// dump states, count edges and collect sets while doing so
|
||||
IntegerList nonGreedyStates = new IntegerList();
|
||||
data.add(atn.states.size());
|
||||
|
@ -128,7 +132,14 @@ public class ATNSerializer {
|
|||
}
|
||||
|
||||
data.add(stateType);
|
||||
data.add(s.ruleIndex);
|
||||
|
||||
if (s.ruleIndex == -1) {
|
||||
data.add(Character.MAX_VALUE);
|
||||
}
|
||||
else {
|
||||
data.add(s.ruleIndex);
|
||||
}
|
||||
|
||||
if ( s.getStateType() == ATNState.LOOP_END ) {
|
||||
data.add(((LoopEndState)s).loopBackState.stateNumber);
|
||||
}
|
||||
|
@ -146,7 +157,10 @@ public class ATNSerializer {
|
|||
int edgeType = Transition.serializationTypes.get(t.getClass());
|
||||
if ( edgeType == Transition.SET || edgeType == Transition.NOT_SET ) {
|
||||
SetTransition st = (SetTransition)t;
|
||||
sets.add(st.set);
|
||||
if (!setIndices.containsKey(st.set)) {
|
||||
sets.add(st.set);
|
||||
setIndices.put(st.set, sets.size() - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -163,10 +177,20 @@ public class ATNSerializer {
|
|||
ATNState ruleStartState = atn.ruleToStartState[r];
|
||||
data.add(ruleStartState.stateNumber);
|
||||
if ( g.isLexer() ) {
|
||||
data.add(atn.ruleToTokenType[r]);
|
||||
if (atn.ruleToTokenType[r] == Token.EOF) {
|
||||
data.add(Character.MAX_VALUE);
|
||||
}
|
||||
else {
|
||||
data.add(atn.ruleToTokenType[r]);
|
||||
}
|
||||
String ruleName = g.rules.getKey(r);
|
||||
Rule rule = g.getRule(ruleName);
|
||||
data.add(rule.actionIndex);
|
||||
if (rule.actionIndex == -1) {
|
||||
data.add(Character.MAX_VALUE);
|
||||
}
|
||||
else {
|
||||
data.add(rule.actionIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -181,15 +205,33 @@ public class ATNSerializer {
|
|||
int nsets = sets.size();
|
||||
data.add(nsets);
|
||||
for (IntervalSet set : sets) {
|
||||
data.add(set.getIntervals().size());
|
||||
boolean containsEof = set.contains(Token.EOF);
|
||||
if (containsEof && set.getIntervals().get(0).b == Token.EOF) {
|
||||
data.add(set.getIntervals().size() - 1);
|
||||
}
|
||||
else {
|
||||
data.add(set.getIntervals().size());
|
||||
}
|
||||
|
||||
data.add(containsEof ? 1 : 0);
|
||||
for (Interval I : set.getIntervals()) {
|
||||
data.add(I.a);
|
||||
if (I.a == Token.EOF) {
|
||||
if (I.b == Token.EOF) {
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
data.add(0);
|
||||
}
|
||||
}
|
||||
else {
|
||||
data.add(I.a);
|
||||
}
|
||||
|
||||
data.add(I.b);
|
||||
}
|
||||
}
|
||||
|
||||
data.add(nedges);
|
||||
int setIndex = 0;
|
||||
for (ATNState s : atn.states) {
|
||||
if ( s==null ) {
|
||||
// might be optimized away
|
||||
|
@ -228,25 +270,40 @@ public class ATNSerializer {
|
|||
case Transition.RANGE :
|
||||
arg1 = ((RangeTransition)t).from;
|
||||
arg2 = ((RangeTransition)t).to;
|
||||
if (arg1 == Token.EOF) {
|
||||
arg1 = 0;
|
||||
arg3 = 1;
|
||||
}
|
||||
|
||||
break;
|
||||
case Transition.ATOM :
|
||||
arg1 = ((AtomTransition)t).label;
|
||||
if (arg1 == Token.EOF) {
|
||||
arg1 = 0;
|
||||
arg3 = 1;
|
||||
}
|
||||
|
||||
break;
|
||||
case Transition.ACTION :
|
||||
ActionTransition at = (ActionTransition)t;
|
||||
arg1 = at.ruleIndex;
|
||||
arg2 = at.actionIndex;
|
||||
if (arg2 == -1) {
|
||||
arg2 = 0xFFFF;
|
||||
}
|
||||
|
||||
arg3 = at.isCtxDependent ? 1 : 0 ;
|
||||
break;
|
||||
case Transition.SET :
|
||||
arg1 = setIndex++;
|
||||
arg1 = setIndices.get(((SetTransition)t).set);
|
||||
break;
|
||||
case Transition.NOT_SET :
|
||||
arg1 = setIndex++;
|
||||
arg1 = setIndices.get(((SetTransition)t).set);
|
||||
break;
|
||||
case Transition.WILDCARD :
|
||||
break;
|
||||
}
|
||||
|
||||
data.add(src);
|
||||
data.add(trg);
|
||||
data.add(edgeType);
|
||||
|
@ -263,15 +320,11 @@ public class ATNSerializer {
|
|||
|
||||
// don't adjust the first value since that's the version number
|
||||
for (int i = 1; i < data.size(); i++) {
|
||||
if (data.get(i) < -1 || data.get(i) > 0xFFFE) {
|
||||
if (data.get(i) < Character.MIN_VALUE || data.get(i) > Character.MAX_VALUE) {
|
||||
throw new UnsupportedOperationException("Serialized ATN data element out of range.");
|
||||
}
|
||||
|
||||
int value = (data.get(i) + 2) & 0xFFFF;
|
||||
if (value == 0xFFFF) {
|
||||
value = -1;
|
||||
}
|
||||
|
||||
data.set(i, value);
|
||||
}
|
||||
|
||||
|
@ -304,10 +357,14 @@ public class ATNSerializer {
|
|||
int maxType = ATNSimulator.toInt(data[p++]);
|
||||
buf.append("max type ").append(maxType).append("\n");
|
||||
int nstates = ATNSimulator.toInt(data[p++]);
|
||||
for (int i=1; i<=nstates; i++) {
|
||||
for (int i=0; i<nstates; i++) {
|
||||
int stype = ATNSimulator.toInt(data[p++]);
|
||||
if ( stype==ATNState.INVALID_TYPE ) continue; // ignore bad type of states
|
||||
int ruleIndex = ATNSimulator.toInt(data[p++]);
|
||||
if (ruleIndex == Character.MAX_VALUE) {
|
||||
ruleIndex = -1;
|
||||
}
|
||||
|
||||
String arg = "";
|
||||
if ( stype == ATNState.LOOP_END ) {
|
||||
int loopBackStateNumber = ATNSimulator.toInt(data[p++]);
|
||||
|
@ -317,7 +374,7 @@ public class ATNSerializer {
|
|||
int endStateNumber = ATNSimulator.toInt(data[p++]);
|
||||
arg = " "+endStateNumber;
|
||||
}
|
||||
buf.append(i - 1).append(":")
|
||||
buf.append(i).append(":")
|
||||
.append(ATNState.serializationNames.get(stype)).append(" ")
|
||||
.append(ruleIndex).append(arg).append("\n");
|
||||
}
|
||||
|
@ -331,6 +388,9 @@ public class ATNSerializer {
|
|||
if ( g.isLexer() ) {
|
||||
int arg1 = ATNSimulator.toInt(data[p++]);
|
||||
int arg2 = ATNSimulator.toInt(data[p++]);
|
||||
if (arg2 == Character.MAX_VALUE) {
|
||||
arg2 = -1;
|
||||
}
|
||||
buf.append("rule ").append(i).append(":").append(s).append(" ").append(arg1).append(",").append(arg2).append('\n');
|
||||
}
|
||||
else {
|
||||
|
@ -343,18 +403,26 @@ public class ATNSerializer {
|
|||
buf.append("mode ").append(i).append(":").append(s).append('\n');
|
||||
}
|
||||
int nsets = ATNSimulator.toInt(data[p++]);
|
||||
for (int i=1; i<=nsets; i++) {
|
||||
for (int i=0; i<nsets; i++) {
|
||||
int nintervals = ATNSimulator.toInt(data[p++]);
|
||||
buf.append(i-1).append(":");
|
||||
for (int j=1; j<=nintervals; j++) {
|
||||
if ( j>1 ) buf.append(", ");
|
||||
buf.append(i).append(":");
|
||||
boolean containsEof = data[p++] != 0;
|
||||
if (containsEof) {
|
||||
buf.append(getTokenName(Token.EOF));
|
||||
}
|
||||
|
||||
for (int j=0; j<nintervals; j++) {
|
||||
if ( containsEof || j>0 ) {
|
||||
buf.append(", ");
|
||||
}
|
||||
|
||||
buf.append(getTokenName(ATNSimulator.toInt(data[p]))).append("..").append(getTokenName(ATNSimulator.toInt(data[p + 1])));
|
||||
p += 2;
|
||||
}
|
||||
buf.append("\n");
|
||||
}
|
||||
int nedges = ATNSimulator.toInt(data[p++]);
|
||||
for (int i=1; i<=nedges; i++) {
|
||||
for (int i=0; i<nedges; i++) {
|
||||
int src = ATNSimulator.toInt(data[p]);
|
||||
int trg = ATNSimulator.toInt(data[p + 1]);
|
||||
int ttype = ATNSimulator.toInt(data[p + 2]);
|
||||
|
@ -368,9 +436,9 @@ public class ATNSerializer {
|
|||
p += 6;
|
||||
}
|
||||
int ndecisions = ATNSimulator.toInt(data[p++]);
|
||||
for (int i=1; i<=ndecisions; i++) {
|
||||
for (int i=0; i<ndecisions; i++) {
|
||||
int s = ATNSimulator.toInt(data[p++]);
|
||||
buf.append(i-1).append(":").append(s).append("\n");
|
||||
buf.append(i).append(":").append(s).append("\n");
|
||||
}
|
||||
return buf.toString();
|
||||
}
|
||||
|
|
|
@ -285,6 +285,9 @@ public class Grammar implements AttributeResolver {
|
|||
|
||||
protected void initTokenSymbolTables() {
|
||||
tokenNameToTypeMap.put("EOF", Token.EOF);
|
||||
|
||||
// reserve a spot for the INVALID token
|
||||
typeToTokenList.add(null);
|
||||
}
|
||||
|
||||
public void loadImportedGrammars() {
|
||||
|
|
|
@ -77,7 +77,7 @@ public class TestATNSerialization extends BaseTest {
|
|||
"rule 0:0\n" +
|
||||
"0->2 EPSILON 0,0,0\n" +
|
||||
"2->3 ATOM 1,0,0\n" +
|
||||
"3->4 ATOM -1,0,0\n" +
|
||||
"3->4 ATOM 0,0,1\n" +
|
||||
"4->1 EPSILON 0,0,0\n";
|
||||
ATN atn = createATN(g, true);
|
||||
String result = ATNSerializer.getDecoded(g, atn);
|
||||
|
@ -96,7 +96,7 @@ public class TestATNSerialization extends BaseTest {
|
|||
"3:BASIC 0\n" +
|
||||
"4:BASIC 0\n" +
|
||||
"rule 0:0\n" +
|
||||
"0:EOF..EOF, A..A\n" +
|
||||
"0:EOF, A..A\n" +
|
||||
"0->2 EPSILON 0,0,0\n" +
|
||||
"2->3 SET 0,0,0\n" +
|
||||
"3->1 EPSILON 0,0,0\n";
|
||||
|
@ -347,7 +347,7 @@ public class TestATNSerialization extends BaseTest {
|
|||
"0->1 EPSILON 0,0,0\n" +
|
||||
"1->3 EPSILON 0,0,0\n" +
|
||||
"3->4 ATOM 97,0,0\n" +
|
||||
"4->5 ATOM -1,0,0\n" +
|
||||
"4->5 ATOM 0,0,1\n" +
|
||||
"5->2 EPSILON 0,0,0\n" +
|
||||
"0:0\n";
|
||||
ATN atn = createATN(lg, true);
|
||||
|
@ -370,7 +370,7 @@ public class TestATNSerialization extends BaseTest {
|
|||
"6:BLOCK_END 0\n" +
|
||||
"rule 0:1 1,-1\n" +
|
||||
"mode 0:0\n" +
|
||||
"0:EOF..EOF, '\\n'..'\\n'\n" +
|
||||
"0:EOF, '\\n'..'\\n'\n" +
|
||||
"0->1 EPSILON 0,0,0\n" +
|
||||
"1->3 EPSILON 0,0,0\n" +
|
||||
"3->5 ATOM 97,0,0\n" +
|
||||
|
|
Loading…
Reference in New Issue