Merge pull request #1672 from bhamiltoncx/code-point-transitions

New class CodePointTransitions to create SetTransitions for Unicode code points > U+FFFF
This commit is contained in:
Terence Parr 2017-02-20 14:37:36 -08:00 committed by GitHub
commit 91df265278
3 changed files with 57 additions and 5 deletions

View File

@ -0,0 +1,50 @@
/*
* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
package org.antlr.v4.runtime.atn;
import org.antlr.v4.runtime.misc.IntervalSet;
/**
* Utility class to create {@link AtomTransition}, {@link RangeTransition},
* and {@link SetTransition} appropriately based on the range of the input.
*
* To keep the serialized ATN size small, we only inline atom and
* range transitions for Unicode code points <= U+FFFF.
*
* Whenever we encounter a Unicode code point > U+FFFF, we represent that
* as a set transition (even if it is logically an atom or a range).
*/
public abstract class CodePointTransitions {
/**
* If {@code codePoint} is <= U+FFFF, returns a new {@link AtomTransition}.
* Otherwise, returns a new {@link SetTransition}.
*/
public static Transition createWithCodePoint(ATNState target, int codePoint) {
if (Character.isSupplementaryCodePoint(codePoint)) {
return new SetTransition(target, IntervalSet.of(codePoint));
} else {
return new AtomTransition(target, codePoint);
}
}
/**
* If {@code codePointFrom} and {@code codePointTo} are both
* <= U+FFFF, returns a new {@link RangeTransition}.
* Otherwise, returns a new {@link SetTransition}.
*/
public static Transition createWithCodePointRange(
ATNState target,
int codePointFrom,
int codePointTo) {
if (Character.isSupplementaryCodePoint(codePointFrom) ||
Character.isSupplementaryCodePoint(codePointTo)) {
return new SetTransition(target, IntervalSet.of(codePointFrom, codePointTo));
} else {
return new RangeTransition(target, codePointFrom, codePointTo);
}
}
}

View File

@ -10,6 +10,7 @@ import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.atn.ATNState;
import org.antlr.v4.runtime.atn.AtomTransition;
import org.antlr.v4.runtime.atn.BlockEndState;
import org.antlr.v4.runtime.atn.CodePointTransitions;
import org.antlr.v4.runtime.atn.DecisionState;
import org.antlr.v4.runtime.atn.EpsilonTransition;
import org.antlr.v4.runtime.atn.NotSetTransition;
@ -116,11 +117,11 @@ public class ATNOptimizer {
Transition newTransition;
if (matchSet.getIntervals().size() == 1) {
if (matchSet.size() == 1) {
newTransition = new AtomTransition(blockEndState, matchSet.getMinElement());
newTransition = CodePointTransitions.createWithCodePoint(blockEndState, matchSet.getMinElement());
}
else {
Interval matchInterval = matchSet.getIntervals().get(0);
newTransition = new RangeTransition(blockEndState, matchInterval.a, matchInterval.b);
newTransition = CodePointTransitions.createWithCodePointRange(blockEndState, matchInterval.a, matchInterval.b);
}
}
else {

View File

@ -17,6 +17,7 @@ import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.atn.ATNState;
import org.antlr.v4.runtime.atn.ActionTransition;
import org.antlr.v4.runtime.atn.AtomTransition;
import org.antlr.v4.runtime.atn.CodePointTransitions;
import org.antlr.v4.runtime.atn.LexerAction;
import org.antlr.v4.runtime.atn.LexerChannelAction;
import org.antlr.v4.runtime.atn.LexerCustomAction;
@ -255,7 +256,7 @@ public class LexerATNFactory extends ParserATNFactory {
int t1 = CharSupport.getCharValueFromGrammarCharLiteral(a.getText());
int t2 = CharSupport.getCharValueFromGrammarCharLiteral(b.getText());
checkRange(a, b, t1, t2);
left.addTransition(new RangeTransition(right, t1, t2));
left.addTransition(CodePointTransitions.createWithCodePointRange(right, t1, t2));
a.atnState = left;
b.atnState = left;
return new Handle(left, right);
@ -301,7 +302,7 @@ public class LexerATNFactory extends ParserATNFactory {
Transition transition;
if (set.getIntervals().size() == 1) {
Interval interval = set.getIntervals().get(0);
transition = new RangeTransition(right, interval.a, interval.b);
transition = CodePointTransitions.createWithCodePointRange(right, interval.a, interval.b);
} else {
transition = new SetTransition(right, set);
}
@ -356,7 +357,7 @@ public class LexerATNFactory extends ParserATNFactory {
for (int i = 0; i < n; ) {
right = newState(stringLiteralAST);
int codePoint = chars.codePointAt(i);
prev.addTransition(new AtomTransition(right, codePoint));
prev.addTransition(CodePointTransitions.createWithCodePoint(right, codePoint));
prev = right;
i += Character.charCount(codePoint);
}