Reduce size of _serializedATN by adding 2 to each element: new representation avoids embedded values 0 and 0xFFFF which are common and have multi-byte representations in Java's modified UTF-8

This commit is contained in:
Sam Harwell 2013-01-11 13:34:08 -06:00
parent ce279de429
commit 7ae67de110
3 changed files with 31 additions and 3 deletions

View File

@ -8,6 +8,9 @@ January 11, 2013
* Add error 144: multi-character literals are not allowed in lexer sets * Add error 144: multi-character literals are not allowed in lexer sets
* Error 134 now only applies to rule references in lexer sets * Error 134 now only applies to rule references in lexer sets
* Updated error messages (cleanup) * Updated error messages (cleanup)
* Reduce size of _serializedATN by adding 2 to each element: new representation
avoids embedded values 0 and 0xFFFF which are common and have multi-byte
representations in Java's modified UTF-8
January 10, 2013 January 10, 2013

View File

@ -43,7 +43,7 @@ import java.util.List;
public abstract class ATNSimulator { public abstract class ATNSimulator {
public static final int SERIALIZED_VERSION; public static final int SERIALIZED_VERSION;
static { static {
SERIALIZED_VERSION = 1; SERIALIZED_VERSION = 2;
} }
/** Must distinguish between missing edge and edge we know leads nowhere */ /** Must distinguish between missing edge and edge we know leads nowhere */
@ -99,6 +99,12 @@ public abstract class ATNSimulator {
} }
public static ATN deserialize(@NotNull char[] data) { public static ATN deserialize(@NotNull char[] data) {
data = data.clone();
// don't adjust the first value since that's the version number
for (int i = 1; i < data.length; i++) {
data[i] = (char)(data[i] - 2);
}
ATN atn = new ATN(); ATN atn = new ATN();
List<IntervalSet> sets = new ArrayList<IntervalSet>(); List<IntervalSet> sets = new ArrayList<IntervalSet>();
int p = 0; int p = 0;

View File

@ -53,6 +53,7 @@ import org.antlr.v4.tool.Rule;
import java.io.InvalidClassException; import java.io.InvalidClassException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.List; import java.util.List;
public class ATNSerializer { public class ATNSerializer {
@ -254,10 +255,28 @@ public class ATNSerializer {
for (DecisionState decStartState : atn.decisionToState) { for (DecisionState decStartState : atn.decisionToState) {
data.add(decStartState.stateNumber); data.add(decStartState.stateNumber);
} }
// don't adjust the first value since that's the version number
for (int i = 1; i < data.size(); i++) {
assert data.get(i) >= -1 && data.get(i) < 0xFFFF;
int value = (data.get(i) + 2) & 0xFFFF;
if (value == 0xFFFF) {
value = -1;
}
data.set(i, value);
}
return data; return data;
} }
public String decode(char[] data) { public String decode(char[] data) {
data = data.clone();
// don't adjust the first value since that's the version number
for (int i = 1; i < data.length; i++) {
data[i] = (char)(data[i] - 2);
}
StringBuilder buf = new StringBuilder(); StringBuilder buf = new StringBuilder();
int p = 0; int p = 0;
int version = ATNSimulator.toInt(data[p++]); int version = ATNSimulator.toInt(data[p++]);
@ -349,7 +368,7 @@ public class ATNSerializer {
/** Used by Java target to encode short/int array as chars in string. */ /** Used by Java target to encode short/int array as chars in string. */
public static String getSerializedAsString(Grammar g, ATN atn) { public static String getSerializedAsString(Grammar g, ATN atn) {
return new String(Utils.toCharArray(getSerialized(g, atn))); return new String(getSerializedAsChars(g, atn));
} }
public static IntegerList getSerialized(Grammar g, ATN atn) { public static IntegerList getSerialized(Grammar g, ATN atn) {
@ -357,7 +376,7 @@ public class ATNSerializer {
} }
public static char[] getSerializedAsChars(Grammar g, ATN atn) { public static char[] getSerializedAsChars(Grammar g, ATN atn) {
return Utils.toCharArray(new ATNSerializer(g, atn).serialize()); return Utils.toCharArray(getSerialized(g, atn));
} }
public static String getDecoded(Grammar g, ATN atn) { public static String getDecoded(Grammar g, ATN atn) {