From 291638ca2dcff8de38619cd20f3f1f31468d76bd Mon Sep 17 00:00:00 2001 From: Ben Hamilton Date: Fri, 17 Feb 2017 09:13:58 -0800 Subject: [PATCH] CodePointTransitions --- .../v4/runtime/atn/CodePointTransitions.java | 50 +++++++++++++++++++ .../org/antlr/v4/automata/ATNOptimizer.java | 5 +- .../antlr/v4/automata/LexerATNFactory.java | 7 +-- 3 files changed, 57 insertions(+), 5 deletions(-) create mode 100644 runtime/Java/src/org/antlr/v4/runtime/atn/CodePointTransitions.java diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/CodePointTransitions.java b/runtime/Java/src/org/antlr/v4/runtime/atn/CodePointTransitions.java new file mode 100644 index 000000000..ba996fe93 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/CodePointTransitions.java @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.runtime.atn; + +import org.antlr.v4.runtime.misc.IntervalSet; + +/** + * Utility class to create {@link AtomTransition}, {@link RangeTransition}, + * and {@link SetTransition} appropriately based on the range of the input. + * + * To keep the serialized ATN size small, we only inline atom and + * range transitions for Unicode code points <= U+FFFF. + * + * Whenever we encounter a Unicode code point > U+FFFF, we represent that + * as a set transition (even if it is logically an atom or a range). + */ +public abstract class CodePointTransitions { + /** + * If {@code codePoint} is <= U+FFFF, returns a new {@link AtomTransition}. + * Otherwise, returns a new {@link SetTransition}. + */ + public static Transition createWithCodePoint(ATNState target, int codePoint) { + if (Character.isSupplementaryCodePoint(codePoint)) { + return new SetTransition(target, IntervalSet.of(codePoint)); + } else { + return new AtomTransition(target, codePoint); + } + } + + /** + * If {@code codePointFrom} and {@code codePointTo} are both + * <= U+FFFF, returns a new {@link RangeTransition}. + * Otherwise, returns a new {@link SetTransition}. + */ + public static Transition createWithCodePointRange( + ATNState target, + int codePointFrom, + int codePointTo) { + if (Character.isSupplementaryCodePoint(codePointFrom) || + Character.isSupplementaryCodePoint(codePointTo)) { + return new SetTransition(target, IntervalSet.of(codePointFrom, codePointTo)); + } else { + return new RangeTransition(target, codePointFrom, codePointTo); + } + } +} diff --git a/tool/src/org/antlr/v4/automata/ATNOptimizer.java b/tool/src/org/antlr/v4/automata/ATNOptimizer.java index f49d5ce09..6972feccf 100644 --- a/tool/src/org/antlr/v4/automata/ATNOptimizer.java +++ b/tool/src/org/antlr/v4/automata/ATNOptimizer.java @@ -10,6 +10,7 @@ import org.antlr.v4.runtime.atn.ATN; import org.antlr.v4.runtime.atn.ATNState; import org.antlr.v4.runtime.atn.AtomTransition; import org.antlr.v4.runtime.atn.BlockEndState; +import org.antlr.v4.runtime.atn.CodePointTransitions; import org.antlr.v4.runtime.atn.DecisionState; import org.antlr.v4.runtime.atn.EpsilonTransition; import org.antlr.v4.runtime.atn.NotSetTransition; @@ -116,11 +117,11 @@ public class ATNOptimizer { Transition newTransition; if (matchSet.getIntervals().size() == 1) { if (matchSet.size() == 1) { - newTransition = new AtomTransition(blockEndState, matchSet.getMinElement()); + newTransition = CodePointTransitions.createWithCodePoint(blockEndState, matchSet.getMinElement()); } else { Interval matchInterval = matchSet.getIntervals().get(0); - newTransition = new RangeTransition(blockEndState, matchInterval.a, matchInterval.b); + newTransition = CodePointTransitions.createWithCodePointRange(blockEndState, matchInterval.a, matchInterval.b); } } else { diff --git a/tool/src/org/antlr/v4/automata/LexerATNFactory.java b/tool/src/org/antlr/v4/automata/LexerATNFactory.java index dd83299cf..5c1f9d78e 100644 --- a/tool/src/org/antlr/v4/automata/LexerATNFactory.java +++ b/tool/src/org/antlr/v4/automata/LexerATNFactory.java @@ -17,6 +17,7 @@ import org.antlr.v4.runtime.atn.ATN; import org.antlr.v4.runtime.atn.ATNState; import org.antlr.v4.runtime.atn.ActionTransition; import org.antlr.v4.runtime.atn.AtomTransition; +import org.antlr.v4.runtime.atn.CodePointTransitions; import org.antlr.v4.runtime.atn.LexerAction; import org.antlr.v4.runtime.atn.LexerChannelAction; import org.antlr.v4.runtime.atn.LexerCustomAction; @@ -255,7 +256,7 @@ public class LexerATNFactory extends ParserATNFactory { int t1 = CharSupport.getCharValueFromGrammarCharLiteral(a.getText()); int t2 = CharSupport.getCharValueFromGrammarCharLiteral(b.getText()); checkRange(a, b, t1, t2); - left.addTransition(new RangeTransition(right, t1, t2)); + left.addTransition(CodePointTransitions.createWithCodePointRange(right, t1, t2)); a.atnState = left; b.atnState = left; return new Handle(left, right); @@ -301,7 +302,7 @@ public class LexerATNFactory extends ParserATNFactory { Transition transition; if (set.getIntervals().size() == 1) { Interval interval = set.getIntervals().get(0); - transition = new RangeTransition(right, interval.a, interval.b); + transition = CodePointTransitions.createWithCodePointRange(right, interval.a, interval.b); } else { transition = new SetTransition(right, set); } @@ -356,7 +357,7 @@ public class LexerATNFactory extends ParserATNFactory { for (int i = 0; i < n; ) { right = newState(stringLiteralAST); int codePoint = chars.codePointAt(i); - prev.addTransition(new AtomTransition(right, codePoint)); + prev.addTransition(CodePointTransitions.createWithCodePoint(right, codePoint)); prev = right; i += Character.charCount(codePoint); }