diff --git a/CHANGES.txt b/CHANGES.txt index 128c35214..2e888a474 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -8,6 +8,9 @@ January 11, 2013 * Add error 144: multi-character literals are not allowed in lexer sets * Error 134 now only applies to rule references in lexer sets * Updated error messages (cleanup) +* Reduce size of _serializedATN by adding 2 to each element: new representation + avoids embedded values 0 and 0xFFFF which are common and have multi-byte + representations in Java's modified UTF-8 January 10, 2013 diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNSimulator.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNSimulator.java index 78028d688..04ed72e56 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNSimulator.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNSimulator.java @@ -43,7 +43,7 @@ import java.util.List; public abstract class ATNSimulator { public static final int SERIALIZED_VERSION; static { - SERIALIZED_VERSION = 1; + SERIALIZED_VERSION = 2; } /** Must distinguish between missing edge and edge we know leads nowhere */ @@ -99,6 +99,12 @@ public abstract class ATNSimulator { } public static ATN deserialize(@NotNull char[] data) { + data = data.clone(); + // don't adjust the first value since that's the version number + for (int i = 1; i < data.length; i++) { + data[i] = (char)(data[i] - 2); + } + ATN atn = new ATN(); List sets = new ArrayList(); int p = 0; diff --git a/tool/src/org/antlr/v4/automata/ATNSerializer.java b/tool/src/org/antlr/v4/automata/ATNSerializer.java index 41c95300a..5c8942c99 100644 --- a/tool/src/org/antlr/v4/automata/ATNSerializer.java +++ b/tool/src/org/antlr/v4/automata/ATNSerializer.java @@ -53,6 +53,7 @@ import org.antlr.v4.tool.Rule; import java.io.InvalidClassException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; public class ATNSerializer { @@ -254,10 +255,28 @@ public class ATNSerializer { for (DecisionState decStartState : atn.decisionToState) { data.add(decStartState.stateNumber); } + + // don't adjust the first value since that's the version number + for (int i = 1; i < data.size(); i++) { + assert data.get(i) >= -1 && data.get(i) < 0xFFFF; + int value = (data.get(i) + 2) & 0xFFFF; + if (value == 0xFFFF) { + value = -1; + } + + data.set(i, value); + } + return data; } public String decode(char[] data) { + data = data.clone(); + // don't adjust the first value since that's the version number + for (int i = 1; i < data.length; i++) { + data[i] = (char)(data[i] - 2); + } + StringBuilder buf = new StringBuilder(); int p = 0; int version = ATNSimulator.toInt(data[p++]); @@ -349,7 +368,7 @@ public class ATNSerializer { /** Used by Java target to encode short/int array as chars in string. */ public static String getSerializedAsString(Grammar g, ATN atn) { - return new String(Utils.toCharArray(getSerialized(g, atn))); + return new String(getSerializedAsChars(g, atn)); } public static IntegerList getSerialized(Grammar g, ATN atn) { @@ -357,7 +376,7 @@ public class ATNSerializer { } public static char[] getSerializedAsChars(Grammar g, ATN atn) { - return Utils.toCharArray(new ATNSerializer(g, atn).serialize()); + return Utils.toCharArray(getSerialized(g, atn)); } public static String getDecoded(Grammar g, ATN atn) {