Merge branch 'fix-76'
This commit is contained in:
commit
2242948c03
|
@ -53,6 +53,18 @@ public class Utils {
|
|||
return buf.toString();
|
||||
}
|
||||
|
||||
public static <T> String join(T[] array, String separator) {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
for (int i = 0; i < array.length; i++) {
|
||||
builder.append(array[i]);
|
||||
if (i < array.length - 1) {
|
||||
builder.append(separator);
|
||||
}
|
||||
}
|
||||
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
public static int numNonnull(Object[] data) {
|
||||
int n = 0;
|
||||
if ( data == null ) return n;
|
||||
|
|
|
@ -817,10 +817,23 @@ public class <lexer.name> extends <superClass> {
|
|||
}
|
||||
>>
|
||||
|
||||
|
||||
SerializedATN(model) ::= <<
|
||||
<if(rest(model.segments))>
|
||||
<! requires segmented representation !>
|
||||
private static final int _serializedATNSegments = <length(model.segments)>;
|
||||
<model.segments:{segment|private static final String _serializedATNSegment<i0> =
|
||||
"<segment; wrap={"+<\n><\t>"}>";}; separator="\n">
|
||||
public static final String _serializedATN = Utils.join(
|
||||
new String[] {
|
||||
<model.segments:{segment | _serializedATNSegment<i0>}; separator=",\n">
|
||||
},
|
||||
""
|
||||
);
|
||||
<else>
|
||||
<! only one segment, can be inlined !>
|
||||
public static final String _serializedATN =
|
||||
"<model.serialized; wrap={"+<\n><\t>"}>";
|
||||
<endif>
|
||||
public static final ATN _ATN =
|
||||
ATNSimulator.deserialize(_serializedATN.toCharArray());
|
||||
static {
|
||||
|
|
|
@ -263,7 +263,10 @@ public class ATNSerializer {
|
|||
|
||||
// don't adjust the first value since that's the version number
|
||||
for (int i = 1; i < data.size(); i++) {
|
||||
assert data.get(i) >= -1 && data.get(i) < 0xFFFF;
|
||||
if (data.get(i) < -1 || data.get(i) > 0xFFFE) {
|
||||
throw new UnsupportedOperationException("Serialized ATN data element out of range.");
|
||||
}
|
||||
|
||||
int value = (data.get(i) + 2) & 0xFFFF;
|
||||
if (value == 0xFFFF) {
|
||||
value = -1;
|
||||
|
|
|
@ -163,6 +163,13 @@ public class JavaTarget extends Target {
|
|||
return "\\u"+hex;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getSerializedATNSegmentLimit() {
|
||||
// 65535 is the class file format byte limit for a UTF-8 encoded string literal
|
||||
// 3 is the maximum number of bytes it takes to encode a value in the range 0-0xFFFF
|
||||
return 65535 / 3;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean visibleGrammarSymbolCausesIssueInGeneratedCode(GrammarAST idNode) {
|
||||
return getBadWords().contains(idNode.getText());
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
package org.antlr.v4.codegen;
|
||||
|
||||
import org.antlr.v4.codegen.model.RuleFunction;
|
||||
import org.antlr.v4.codegen.model.SerializedATN;
|
||||
import org.antlr.v4.misc.Utils;
|
||||
import org.antlr.v4.parse.ANTLRParser;
|
||||
import org.antlr.v4.runtime.Token;
|
||||
|
@ -287,6 +288,18 @@ public abstract class Target {
|
|||
return getTokenTypeAsTargetLabel(getCodeGenerator().g, ttype);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the maximum number of 16-bit unsigned integers that can be encoded
|
||||
* in a single segment of the serialized ATN.
|
||||
*
|
||||
* @see SerializedATN#getSegments
|
||||
*
|
||||
* @return the serialized ATN segment limit
|
||||
*/
|
||||
public int getSerializedATNSegmentLimit() {
|
||||
return Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
public boolean grammarSymbolCausesIssueInGeneratedCode(GrammarAST idNode) {
|
||||
switch (idNode.getParent().getType()) {
|
||||
case ANTLRParser.ASSIGN:
|
||||
|
|
|
@ -51,4 +51,15 @@ public class SerializedATN extends OutputModelObject {
|
|||
}
|
||||
// System.out.println(ATNSerializer.getDecoded(factory.getGrammar(), atn));
|
||||
}
|
||||
|
||||
public String[][] getSegments() {
|
||||
List<String[]> segments = new ArrayList<String[]>();
|
||||
int segmentLimit = factory.getGenerator().getTarget().getSerializedATNSegmentLimit();
|
||||
for (int i = 0; i < serialized.size(); i += segmentLimit) {
|
||||
List<String> currentSegment = serialized.subList(i, Math.min(i + segmentLimit, serialized.size()));
|
||||
segments.add(currentSegment.toArray(new String[currentSegment.size()]));
|
||||
}
|
||||
|
||||
return segments.toArray(new String[segments.size()][]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -693,6 +693,28 @@ public class TestLexerExec extends BaseTest {
|
|||
assertEquals(expecting, found);
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a regression test for antlr/antlr4#76 "Serialized ATN strings
|
||||
* should be split when longer than 2^16 bytes (class file limitation)"
|
||||
* https://github.com/antlr/antlr4/issues/76
|
||||
*/
|
||||
@Test
|
||||
public void testLargeLexer() throws Exception {
|
||||
StringBuilder grammar = new StringBuilder();
|
||||
grammar.append("lexer grammar L;\n");
|
||||
grammar.append("WS : [ \\t\\r\\n]+ -> skip;\n");
|
||||
for (int i = 0; i < 4000; i++) {
|
||||
grammar.append("KW").append(i).append(" : '").append("KW").append(i).append("';\n");
|
||||
}
|
||||
|
||||
String input = "KW400";
|
||||
String found = execLexer("L.g4", grammar.toString(), "L", input);
|
||||
String expecting =
|
||||
"[@0,0:4='KW400',<402>,1:0]\n" +
|
||||
"[@1,5:4='<EOF>',<-1>,1:5]\n";
|
||||
assertEquals(expecting, found);
|
||||
}
|
||||
|
||||
protected String load(String fileName, @Nullable String encoding)
|
||||
throws IOException
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue