Reduce size of _serializedATN by adding 2 to each element: new representation avoids embedded values 0 and 0xFFFF which are common and have multi-byte representations in Java's modified UTF-8

This commit is contained in:
Sam Harwell 2013-01-11 13:34:08 -06:00
parent ce279de429
commit 7ae67de110
3 changed files with 31 additions and 3 deletions

View File

@ -8,6 +8,9 @@ January 11, 2013
* Add error 144: multi-character literals are not allowed in lexer sets
* Error 134 now only applies to rule references in lexer sets
* Updated error messages (cleanup)
* Reduce size of _serializedATN by adding 2 to each element: new representation
avoids embedded values 0 and 0xFFFF which are common and have multi-byte
representations in Java's modified UTF-8
January 10, 2013

View File

@ -43,7 +43,7 @@ import java.util.List;
public abstract class ATNSimulator {
public static final int SERIALIZED_VERSION;
static {
SERIALIZED_VERSION = 1;
SERIALIZED_VERSION = 2;
}
/** Must distinguish between missing edge and edge we know leads nowhere */
@ -99,6 +99,12 @@ public abstract class ATNSimulator {
}
public static ATN deserialize(@NotNull char[] data) {
data = data.clone();
// don't adjust the first value since that's the version number
for (int i = 1; i < data.length; i++) {
data[i] = (char)(data[i] - 2);
}
ATN atn = new ATN();
List<IntervalSet> sets = new ArrayList<IntervalSet>();
int p = 0;

View File

@ -53,6 +53,7 @@ import org.antlr.v4.tool.Rule;
import java.io.InvalidClassException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class ATNSerializer {
@ -254,10 +255,28 @@ public class ATNSerializer {
for (DecisionState decStartState : atn.decisionToState) {
data.add(decStartState.stateNumber);
}
// don't adjust the first value since that's the version number
for (int i = 1; i < data.size(); i++) {
assert data.get(i) >= -1 && data.get(i) < 0xFFFF;
int value = (data.get(i) + 2) & 0xFFFF;
if (value == 0xFFFF) {
value = -1;
}
data.set(i, value);
}
return data;
}
public String decode(char[] data) {
data = data.clone();
// don't adjust the first value since that's the version number
for (int i = 1; i < data.length; i++) {
data[i] = (char)(data[i] - 2);
}
StringBuilder buf = new StringBuilder();
int p = 0;
int version = ATNSimulator.toInt(data[p++]);
@ -349,7 +368,7 @@ public class ATNSerializer {
/** Used by Java target to encode short/int array as chars in string. */
public static String getSerializedAsString(Grammar g, ATN atn) {
return new String(Utils.toCharArray(getSerialized(g, atn)));
return new String(getSerializedAsChars(g, atn));
}
public static IntegerList getSerialized(Grammar g, ATN atn) {
@ -357,7 +376,7 @@ public class ATNSerializer {
}
public static char[] getSerializedAsChars(Grammar g, ATN atn) {
return Utils.toCharArray(new ATNSerializer(g, atn).serialize());
return Utils.toCharArray(getSerialized(g, atn));
}
public static String getDecoded(Grammar g, ATN atn) {