Merge branch 'fix-76'

This commit is contained in:
Sam Harwell 2013-03-27 15:02:17 -05:00
commit 2242948c03
7 changed files with 83 additions and 2 deletions

View File

@ -53,6 +53,18 @@ public class Utils {
return buf.toString();
}
public static <T> String join(T[] array, String separator) {
StringBuilder builder = new StringBuilder();
for (int i = 0; i < array.length; i++) {
builder.append(array[i]);
if (i < array.length - 1) {
builder.append(separator);
}
}
return builder.toString();
}
public static int numNonnull(Object[] data) {
int n = 0;
if ( data == null ) return n;

View File

@ -817,10 +817,23 @@ public class <lexer.name> extends <superClass> {
}
>>
SerializedATN(model) ::= <<
<if(rest(model.segments))>
<! requires segmented representation !>
private static final int _serializedATNSegments = <length(model.segments)>;
<model.segments:{segment|private static final String _serializedATNSegment<i0> =
"<segment; wrap={"+<\n><\t>"}>";}; separator="\n">
public static final String _serializedATN = Utils.join(
new String[] {
<model.segments:{segment | _serializedATNSegment<i0>}; separator=",\n">
},
""
);
<else>
<! only one segment, can be inlined !>
public static final String _serializedATN =
"<model.serialized; wrap={"+<\n><\t>"}>";
<endif>
public static final ATN _ATN =
ATNSimulator.deserialize(_serializedATN.toCharArray());
static {

View File

@ -263,7 +263,10 @@ public class ATNSerializer {
// don't adjust the first value since that's the version number
for (int i = 1; i < data.size(); i++) {
assert data.get(i) >= -1 && data.get(i) < 0xFFFF;
if (data.get(i) < -1 || data.get(i) > 0xFFFE) {
throw new UnsupportedOperationException("Serialized ATN data element out of range.");
}
int value = (data.get(i) + 2) & 0xFFFF;
if (value == 0xFFFF) {
value = -1;

View File

@ -163,6 +163,13 @@ public class JavaTarget extends Target {
return "\\u"+hex;
}
@Override
public int getSerializedATNSegmentLimit() {
// 65535 is the class file format byte limit for a UTF-8 encoded string literal
// 3 is the maximum number of bytes it takes to encode a value in the range 0-0xFFFF
return 65535 / 3;
}
@Override
protected boolean visibleGrammarSymbolCausesIssueInGeneratedCode(GrammarAST idNode) {
return getBadWords().contains(idNode.getText());

View File

@ -31,6 +31,7 @@
package org.antlr.v4.codegen;
import org.antlr.v4.codegen.model.RuleFunction;
import org.antlr.v4.codegen.model.SerializedATN;
import org.antlr.v4.misc.Utils;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.runtime.Token;
@ -287,6 +288,18 @@ public abstract class Target {
return getTokenTypeAsTargetLabel(getCodeGenerator().g, ttype);
}
/**
* Gets the maximum number of 16-bit unsigned integers that can be encoded
* in a single segment of the serialized ATN.
*
* @see SerializedATN#getSegments
*
* @return the serialized ATN segment limit
*/
public int getSerializedATNSegmentLimit() {
return Integer.MAX_VALUE;
}
public boolean grammarSymbolCausesIssueInGeneratedCode(GrammarAST idNode) {
switch (idNode.getParent().getType()) {
case ANTLRParser.ASSIGN:

View File

@ -51,4 +51,15 @@ public class SerializedATN extends OutputModelObject {
}
// System.out.println(ATNSerializer.getDecoded(factory.getGrammar(), atn));
}
public String[][] getSegments() {
List<String[]> segments = new ArrayList<String[]>();
int segmentLimit = factory.getGenerator().getTarget().getSerializedATNSegmentLimit();
for (int i = 0; i < serialized.size(); i += segmentLimit) {
List<String> currentSegment = serialized.subList(i, Math.min(i + segmentLimit, serialized.size()));
segments.add(currentSegment.toArray(new String[currentSegment.size()]));
}
return segments.toArray(new String[segments.size()][]);
}
}

View File

@ -693,6 +693,28 @@ public class TestLexerExec extends BaseTest {
assertEquals(expecting, found);
}
/**
* This is a regression test for antlr/antlr4#76 "Serialized ATN strings
* should be split when longer than 2^16 bytes (class file limitation)"
* https://github.com/antlr/antlr4/issues/76
*/
@Test
public void testLargeLexer() throws Exception {
StringBuilder grammar = new StringBuilder();
grammar.append("lexer grammar L;\n");
grammar.append("WS : [ \\t\\r\\n]+ -> skip;\n");
for (int i = 0; i < 4000; i++) {
grammar.append("KW").append(i).append(" : '").append("KW").append(i).append("';\n");
}
String input = "KW400";
String found = execLexer("L.g4", grammar.toString(), "L", input);
String expecting =
"[@0,0:4='KW400',<402>,1:0]\n" +
"[@1,5:4='<EOF>',<-1>,1:5]\n";
assertEquals(expecting, found);
}
protected String load(String fileName, @Nullable String encoding)
throws IOException
{