forked from jasder/antlr
Merge pull request #1672 from bhamiltoncx/code-point-transitions
New class CodePointTransitions to create SetTransitions for Unicode code points > U+FFFF
This commit is contained in:
commit
91df265278
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
|
||||
* Use of this file is governed by the BSD 3-clause license that
|
||||
* can be found in the LICENSE.txt file in the project root.
|
||||
*/
|
||||
|
||||
package org.antlr.v4.runtime.atn;
|
||||
|
||||
import org.antlr.v4.runtime.misc.IntervalSet;
|
||||
|
||||
/**
|
||||
* Utility class to create {@link AtomTransition}, {@link RangeTransition},
|
||||
* and {@link SetTransition} appropriately based on the range of the input.
|
||||
*
|
||||
* To keep the serialized ATN size small, we only inline atom and
|
||||
* range transitions for Unicode code points <= U+FFFF.
|
||||
*
|
||||
* Whenever we encounter a Unicode code point > U+FFFF, we represent that
|
||||
* as a set transition (even if it is logically an atom or a range).
|
||||
*/
|
||||
public abstract class CodePointTransitions {
|
||||
/**
|
||||
* If {@code codePoint} is <= U+FFFF, returns a new {@link AtomTransition}.
|
||||
* Otherwise, returns a new {@link SetTransition}.
|
||||
*/
|
||||
public static Transition createWithCodePoint(ATNState target, int codePoint) {
|
||||
if (Character.isSupplementaryCodePoint(codePoint)) {
|
||||
return new SetTransition(target, IntervalSet.of(codePoint));
|
||||
} else {
|
||||
return new AtomTransition(target, codePoint);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* If {@code codePointFrom} and {@code codePointTo} are both
|
||||
* <= U+FFFF, returns a new {@link RangeTransition}.
|
||||
* Otherwise, returns a new {@link SetTransition}.
|
||||
*/
|
||||
public static Transition createWithCodePointRange(
|
||||
ATNState target,
|
||||
int codePointFrom,
|
||||
int codePointTo) {
|
||||
if (Character.isSupplementaryCodePoint(codePointFrom) ||
|
||||
Character.isSupplementaryCodePoint(codePointTo)) {
|
||||
return new SetTransition(target, IntervalSet.of(codePointFrom, codePointTo));
|
||||
} else {
|
||||
return new RangeTransition(target, codePointFrom, codePointTo);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -10,6 +10,7 @@ import org.antlr.v4.runtime.atn.ATN;
|
|||
import org.antlr.v4.runtime.atn.ATNState;
|
||||
import org.antlr.v4.runtime.atn.AtomTransition;
|
||||
import org.antlr.v4.runtime.atn.BlockEndState;
|
||||
import org.antlr.v4.runtime.atn.CodePointTransitions;
|
||||
import org.antlr.v4.runtime.atn.DecisionState;
|
||||
import org.antlr.v4.runtime.atn.EpsilonTransition;
|
||||
import org.antlr.v4.runtime.atn.NotSetTransition;
|
||||
|
@ -116,11 +117,11 @@ public class ATNOptimizer {
|
|||
Transition newTransition;
|
||||
if (matchSet.getIntervals().size() == 1) {
|
||||
if (matchSet.size() == 1) {
|
||||
newTransition = new AtomTransition(blockEndState, matchSet.getMinElement());
|
||||
newTransition = CodePointTransitions.createWithCodePoint(blockEndState, matchSet.getMinElement());
|
||||
}
|
||||
else {
|
||||
Interval matchInterval = matchSet.getIntervals().get(0);
|
||||
newTransition = new RangeTransition(blockEndState, matchInterval.a, matchInterval.b);
|
||||
newTransition = CodePointTransitions.createWithCodePointRange(blockEndState, matchInterval.a, matchInterval.b);
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
|
|
@ -17,6 +17,7 @@ import org.antlr.v4.runtime.atn.ATN;
|
|||
import org.antlr.v4.runtime.atn.ATNState;
|
||||
import org.antlr.v4.runtime.atn.ActionTransition;
|
||||
import org.antlr.v4.runtime.atn.AtomTransition;
|
||||
import org.antlr.v4.runtime.atn.CodePointTransitions;
|
||||
import org.antlr.v4.runtime.atn.LexerAction;
|
||||
import org.antlr.v4.runtime.atn.LexerChannelAction;
|
||||
import org.antlr.v4.runtime.atn.LexerCustomAction;
|
||||
|
@ -255,7 +256,7 @@ public class LexerATNFactory extends ParserATNFactory {
|
|||
int t1 = CharSupport.getCharValueFromGrammarCharLiteral(a.getText());
|
||||
int t2 = CharSupport.getCharValueFromGrammarCharLiteral(b.getText());
|
||||
checkRange(a, b, t1, t2);
|
||||
left.addTransition(new RangeTransition(right, t1, t2));
|
||||
left.addTransition(CodePointTransitions.createWithCodePointRange(right, t1, t2));
|
||||
a.atnState = left;
|
||||
b.atnState = left;
|
||||
return new Handle(left, right);
|
||||
|
@ -301,7 +302,7 @@ public class LexerATNFactory extends ParserATNFactory {
|
|||
Transition transition;
|
||||
if (set.getIntervals().size() == 1) {
|
||||
Interval interval = set.getIntervals().get(0);
|
||||
transition = new RangeTransition(right, interval.a, interval.b);
|
||||
transition = CodePointTransitions.createWithCodePointRange(right, interval.a, interval.b);
|
||||
} else {
|
||||
transition = new SetTransition(right, set);
|
||||
}
|
||||
|
@ -356,7 +357,7 @@ public class LexerATNFactory extends ParserATNFactory {
|
|||
for (int i = 0; i < n; ) {
|
||||
right = newState(stringLiteralAST);
|
||||
int codePoint = chars.codePointAt(i);
|
||||
prev.addTransition(new AtomTransition(right, codePoint));
|
||||
prev.addTransition(CodePointTransitions.createWithCodePoint(right, codePoint));
|
||||
prev = right;
|
||||
i += Character.charCount(codePoint);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue