Document the value shifting used in the serialized ATN

Fixes #1270
This commit is contained in:
Sam Harwell 2017-01-07 09:38:04 -06:00
parent 33ac0c3611
commit ff2b2b8ba6
2 changed files with 17 additions and 1 deletions

View File

@ -110,7 +110,22 @@ public class ATNDeserializer {
@SuppressWarnings("deprecation")
public ATN deserialize(char[] data) {
data = data.clone();
// don't adjust the first value since that's the version number
// Each char value in data is shifted by +2 at the entry to this method.
// This is an encoding optimization targeting the serialized values 0
// and -1 (serialized to 0xFFFF), each of which are very common in the
// serialized form of the ATN. In the modified UTF-8 that Java uses for
// compiled string literals, these two character values have multi-byte
// forms. By shifting each value by +2, they become characters 2 and 1
// prior to writing the string, each of which have single-byte
// representations. Since the shift occurs in the tool during ATN
// serialization, each target is responsible for adjusting the values
// during deserialization.
//
// As a special case, note that the first element of data is not
// adjusted because it contains the major version number of the
// serialized ATN, which was fixed at 3 at the time the value shifting
// was implemented.
for (int i = 1; i < data.length; i++) {
data[i] = (char)(data[i] - 2);
}

View File

@ -340,6 +340,7 @@ public class ATNSerializer {
}
}
// Note: This value shifting loop is documented in ATNDeserializer.
// don't adjust the first value since that's the version number
for (int i = 1; i < data.size(); i++) {
if (data.get(i) < Character.MIN_VALUE || data.get(i) > Character.MAX_VALUE) {