From 291638ca2dcff8de38619cd20f3f1f31468d76bd Mon Sep 17 00:00:00 2001 From: Ben Hamilton Date: Fri, 17 Feb 2017 09:13:58 -0800 Subject: [PATCH 1/7] CodePointTransitions --- .../v4/runtime/atn/CodePointTransitions.java | 50 +++++++++++++++++++ .../org/antlr/v4/automata/ATNOptimizer.java | 5 +- .../antlr/v4/automata/LexerATNFactory.java | 7 +-- 3 files changed, 57 insertions(+), 5 deletions(-) create mode 100644 runtime/Java/src/org/antlr/v4/runtime/atn/CodePointTransitions.java diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/CodePointTransitions.java b/runtime/Java/src/org/antlr/v4/runtime/atn/CodePointTransitions.java new file mode 100644 index 000000000..ba996fe93 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/CodePointTransitions.java @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.runtime.atn; + +import org.antlr.v4.runtime.misc.IntervalSet; + +/** + * Utility class to create {@link AtomTransition}, {@link RangeTransition}, + * and {@link SetTransition} appropriately based on the range of the input. + * + * To keep the serialized ATN size small, we only inline atom and + * range transitions for Unicode code points <= U+FFFF. + * + * Whenever we encounter a Unicode code point > U+FFFF, we represent that + * as a set transition (even if it is logically an atom or a range). + */ +public abstract class CodePointTransitions { + /** + * If {@code codePoint} is <= U+FFFF, returns a new {@link AtomTransition}. + * Otherwise, returns a new {@link SetTransition}. + */ + public static Transition createWithCodePoint(ATNState target, int codePoint) { + if (Character.isSupplementaryCodePoint(codePoint)) { + return new SetTransition(target, IntervalSet.of(codePoint)); + } else { + return new AtomTransition(target, codePoint); + } + } + + /** + * If {@code codePointFrom} and {@code codePointTo} are both + * <= U+FFFF, returns a new {@link RangeTransition}. + * Otherwise, returns a new {@link SetTransition}. + */ + public static Transition createWithCodePointRange( + ATNState target, + int codePointFrom, + int codePointTo) { + if (Character.isSupplementaryCodePoint(codePointFrom) || + Character.isSupplementaryCodePoint(codePointTo)) { + return new SetTransition(target, IntervalSet.of(codePointFrom, codePointTo)); + } else { + return new RangeTransition(target, codePointFrom, codePointTo); + } + } +} diff --git a/tool/src/org/antlr/v4/automata/ATNOptimizer.java b/tool/src/org/antlr/v4/automata/ATNOptimizer.java index f49d5ce09..6972feccf 100644 --- a/tool/src/org/antlr/v4/automata/ATNOptimizer.java +++ b/tool/src/org/antlr/v4/automata/ATNOptimizer.java @@ -10,6 +10,7 @@ import org.antlr.v4.runtime.atn.ATN; import org.antlr.v4.runtime.atn.ATNState; import org.antlr.v4.runtime.atn.AtomTransition; import org.antlr.v4.runtime.atn.BlockEndState; +import org.antlr.v4.runtime.atn.CodePointTransitions; import org.antlr.v4.runtime.atn.DecisionState; import org.antlr.v4.runtime.atn.EpsilonTransition; import org.antlr.v4.runtime.atn.NotSetTransition; @@ -116,11 +117,11 @@ public class ATNOptimizer { Transition newTransition; if (matchSet.getIntervals().size() == 1) { if (matchSet.size() == 1) { - newTransition = new AtomTransition(blockEndState, matchSet.getMinElement()); + newTransition = CodePointTransitions.createWithCodePoint(blockEndState, matchSet.getMinElement()); } else { Interval matchInterval = matchSet.getIntervals().get(0); - newTransition = new RangeTransition(blockEndState, matchInterval.a, matchInterval.b); + newTransition = CodePointTransitions.createWithCodePointRange(blockEndState, matchInterval.a, matchInterval.b); } } else { diff --git a/tool/src/org/antlr/v4/automata/LexerATNFactory.java b/tool/src/org/antlr/v4/automata/LexerATNFactory.java index dd83299cf..5c1f9d78e 100644 --- a/tool/src/org/antlr/v4/automata/LexerATNFactory.java +++ b/tool/src/org/antlr/v4/automata/LexerATNFactory.java @@ -17,6 +17,7 @@ import org.antlr.v4.runtime.atn.ATN; import org.antlr.v4.runtime.atn.ATNState; import org.antlr.v4.runtime.atn.ActionTransition; import org.antlr.v4.runtime.atn.AtomTransition; +import org.antlr.v4.runtime.atn.CodePointTransitions; import org.antlr.v4.runtime.atn.LexerAction; import org.antlr.v4.runtime.atn.LexerChannelAction; import org.antlr.v4.runtime.atn.LexerCustomAction; @@ -255,7 +256,7 @@ public class LexerATNFactory extends ParserATNFactory { int t1 = CharSupport.getCharValueFromGrammarCharLiteral(a.getText()); int t2 = CharSupport.getCharValueFromGrammarCharLiteral(b.getText()); checkRange(a, b, t1, t2); - left.addTransition(new RangeTransition(right, t1, t2)); + left.addTransition(CodePointTransitions.createWithCodePointRange(right, t1, t2)); a.atnState = left; b.atnState = left; return new Handle(left, right); @@ -301,7 +302,7 @@ public class LexerATNFactory extends ParserATNFactory { Transition transition; if (set.getIntervals().size() == 1) { Interval interval = set.getIntervals().get(0); - transition = new RangeTransition(right, interval.a, interval.b); + transition = CodePointTransitions.createWithCodePointRange(right, interval.a, interval.b); } else { transition = new SetTransition(right, set); } @@ -356,7 +357,7 @@ public class LexerATNFactory extends ParserATNFactory { for (int i = 0; i < n; ) { right = newState(stringLiteralAST); int codePoint = chars.codePointAt(i); - prev.addTransition(new AtomTransition(right, codePoint)); + prev.addTransition(CodePointTransitions.createWithCodePoint(right, codePoint)); prev = right; i += Character.charCount(codePoint); } From 7a0f4e716a93b11318e841c82caa136a90030592 Mon Sep 17 00:00:00 2001 From: parrt Date: Mon, 20 Feb 2017 10:36:36 -0800 Subject: [PATCH 2/7] try generic python 3.5 --- .travis.yml | 6 ++++++ .travis/before-install-linux-python3.sh | 4 ---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 562f89c1a..58143f420 100644 --- a/.travis.yml +++ b/.travis.yml @@ -55,6 +55,12 @@ matrix: - os: linux jdk: oraclejdk7 env: TARGET=python3 + addons: + apt: + sources: + - deadsnakes # source required so it finds the package definition below + packages: + - python3.5 - os: linux jdk: oraclejdk7 env: TARGET=javascript diff --git a/.travis/before-install-linux-python3.sh b/.travis/before-install-linux-python3.sh index 23685d75a..a3b091260 100755 --- a/.travis/before-install-linux-python3.sh +++ b/.travis/before-install-linux-python3.sh @@ -2,8 +2,4 @@ set -euo pipefail -sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 3FA7E0328081BFF6A14DA29AA6A19B38D3D831EF -sudo add-apt-repository ppa:fkrull/deadsnakes -y -sudo apt-get update -qq -sudo apt-get install -qq python3.5 python3 --version From 0c45ada6555890ac510560dd0ae92a7a2206356b Mon Sep 17 00:00:00 2001 From: parrt Date: Mon, 20 Feb 2017 10:42:02 -0800 Subject: [PATCH 3/7] fix formatting --- .travis.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 58143f420..e6faa14bb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -56,11 +56,11 @@ matrix: jdk: oraclejdk7 env: TARGET=python3 addons: - apt: - sources: - - deadsnakes # source required so it finds the package definition below - packages: - - python3.5 + apt: + sources: + - deadsnakes # source required so it finds the package definition below + packages: + - python3.5 - os: linux jdk: oraclejdk7 env: TARGET=javascript From 49de9e8f9509c8a804610876c2522d757b821798 Mon Sep 17 00:00:00 2001 From: parrt Date: Mon, 20 Feb 2017 11:27:37 -0800 Subject: [PATCH 4/7] clean up per travis weblint --- .travis.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index e6faa14bb..5184b26bb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,9 +7,7 @@ matrix: - os: linux compiler: clang jdk: oraclejdk7 - env: - - TARGET=cpp - - CXX=g++-5 + env: TARGET=cpp addons: apt: sources: @@ -21,7 +19,6 @@ matrix: - clang-3.7 - os: osx compiler: clang - osx_image: xcode8.1 env: TARGET=cpp addons: apt: @@ -47,7 +44,6 @@ matrix: env: TARGET=csharp - os: linux jdk: oraclejdk7 - dist: trusty env: TARGET=dotnet - os: linux jdk: oraclejdk7 From e968fe271a638f84e2ab49f0ff4fc3238329f2a7 Mon Sep 17 00:00:00 2001 From: parrt Date: Mon, 20 Feb 2017 11:59:22 -0800 Subject: [PATCH 5/7] Revert "clean up per travis weblint" This reverts commit 49de9e8f9509c8a804610876c2522d757b821798. --- .travis.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 5184b26bb..e6faa14bb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,9 @@ matrix: - os: linux compiler: clang jdk: oraclejdk7 - env: TARGET=cpp + env: + - TARGET=cpp + - CXX=g++-5 addons: apt: sources: @@ -19,6 +21,7 @@ matrix: - clang-3.7 - os: osx compiler: clang + osx_image: xcode8.1 env: TARGET=cpp addons: apt: @@ -44,6 +47,7 @@ matrix: env: TARGET=csharp - os: linux jdk: oraclejdk7 + dist: trusty env: TARGET=dotnet - os: linux jdk: oraclejdk7 From 912342b01fd45bfd1ec7e103121d13d999d6ae30 Mon Sep 17 00:00:00 2001 From: parrt Date: Tue, 21 Feb 2017 09:39:02 -0800 Subject: [PATCH 6/7] tweak to C++ runtime; nullptr not 0 --- runtime/Cpp/runtime/src/tree/AbstractParseTreeVisitor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/Cpp/runtime/src/tree/AbstractParseTreeVisitor.h b/runtime/Cpp/runtime/src/tree/AbstractParseTreeVisitor.h index ec1b2551a..d546f78bf 100755 --- a/runtime/Cpp/runtime/src/tree/AbstractParseTreeVisitor.h +++ b/runtime/Cpp/runtime/src/tree/AbstractParseTreeVisitor.h @@ -70,7 +70,7 @@ namespace tree { /// /// The default value returned by visitor methods. virtual antlrcpp::Any defaultResult() { - return 0; + return nullptr; // support isNotNull } /// From f384ef5fb49ad5e7d6b9b5f4d44e70021fd2c3d4 Mon Sep 17 00:00:00 2001 From: Ben Hamilton Date: Tue, 21 Feb 2017 12:26:08 -0800 Subject: [PATCH 7/7] New utility class Unicode --- .../org/antlr/v4/test/tool/TestUnicode.java | 80 ++++++++ tool/pom.xml | 5 + tool/src/org/antlr/v4/codegen/Unicode.java | 194 ++++++++++++++++++ 3 files changed, 279 insertions(+) create mode 100644 tool-testsuite/test/org/antlr/v4/test/tool/TestUnicode.java create mode 100644 tool/src/org/antlr/v4/codegen/Unicode.java diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicode.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicode.java new file mode 100644 index 000000000..195bb9cde --- /dev/null +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicode.java @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.tool; + +import java.util.Map; + +import org.antlr.v4.codegen.Unicode; +import org.antlr.v4.runtime.misc.IntervalSet; + +import org.junit.Test; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class TestUnicode { + @Test + public void testUnicodeCategoryCodes() { + Map unicodeCategoryCodesToCodePointRanges = Unicode.getUnicodeCategoryCodesToCodePointRanges(); + assertTrue(unicodeCategoryCodesToCodePointRanges.get("Lu").contains('X')); + assertFalse(unicodeCategoryCodesToCodePointRanges.get("Lu").contains('x')); + assertTrue(unicodeCategoryCodesToCodePointRanges.get("Ll").contains('x')); + assertFalse(unicodeCategoryCodesToCodePointRanges.get("Ll").contains('X')); + assertTrue(unicodeCategoryCodesToCodePointRanges.get("L").contains('X')); + assertTrue(unicodeCategoryCodesToCodePointRanges.get("L").contains('x')); + assertTrue(unicodeCategoryCodesToCodePointRanges.get("N").contains('0')); + assertTrue(unicodeCategoryCodesToCodePointRanges.get("Z").contains(' ')); + } + + @Test + public void testUnicodeCategoryCodesToNames() { + Map unicodeCategoryCodesToNames = Unicode.getUnicodeCategoryCodesToNames(); + assertEquals("Lowercase_Letter", unicodeCategoryCodesToNames.get("Ll")); + assertEquals("Letter", unicodeCategoryCodesToNames.get("L")); + assertEquals("Enclosing_Mark", unicodeCategoryCodesToNames.get("Me")); + assertEquals("Mark", unicodeCategoryCodesToNames.get("M")); + } + + @Test + public void testUnicodeBinaryPropertyCodesToCodePointRanges() { + Map unicodeBinaryPropertyCodesToCodePointRanges = Unicode.getUnicodeBinaryPropertyCodesToCodePointRanges(); + assertTrue(unicodeBinaryPropertyCodesToCodePointRanges.get("Emoji").contains(0x1F4A9)); + assertFalse(unicodeBinaryPropertyCodesToCodePointRanges.get("Emoji").contains('X')); + assertTrue(unicodeBinaryPropertyCodesToCodePointRanges.get("alnum").contains('9')); + assertFalse(unicodeBinaryPropertyCodesToCodePointRanges.get("alnum").contains(0x1F4A9)); + assertTrue(unicodeBinaryPropertyCodesToCodePointRanges.get("Dash").contains('-')); + assertTrue(unicodeBinaryPropertyCodesToCodePointRanges.get("Hex").contains('D')); + assertFalse(unicodeBinaryPropertyCodesToCodePointRanges.get("Hex").contains('Q')); + } + + @Test + public void testUnicodeBinaryPropertyCodesToNames() { + Map unicodeBinaryPropertyCodesToNames = Unicode.getUnicodeBinaryPropertyCodesToNames(); + assertEquals("Ideographic", unicodeBinaryPropertyCodesToNames.get("Ideo")); + assertEquals("Soft_Dotted", unicodeBinaryPropertyCodesToNames.get("SD")); + assertEquals("Noncharacter_Code_Point", unicodeBinaryPropertyCodesToNames.get("NChar")); + } + + @Test + public void testUnicodeScriptCodesToCodePointRanges() { + Map unicodeScriptCodesToCodePointRanges = Unicode.getUnicodeScriptCodesToCodePointRanges(); + assertTrue(unicodeScriptCodesToCodePointRanges.get("Zyyy").contains('0')); + assertTrue(unicodeScriptCodesToCodePointRanges.get("Latn").contains('X')); + assertTrue(unicodeScriptCodesToCodePointRanges.get("Hani").contains(0x4E04)); + assertTrue(unicodeScriptCodesToCodePointRanges.get("Cyrl").contains(0x0404)); + } + + @Test + public void testUnicodeScriptCodesToNames() { + Map unicodeScriptCodesToNames = Unicode.getUnicodeScriptCodesToNames(); + assertEquals("Common", unicodeScriptCodesToNames.get("Zyyy")); + assertEquals("Latin", unicodeScriptCodesToNames.get("Latn")); + assertEquals("Han", unicodeScriptCodesToNames.get("Hani")); + assertEquals("Cyrillic", unicodeScriptCodesToNames.get("Cyrl")); + } +} diff --git a/tool/pom.xml b/tool/pom.xml index 0f1e5a0c5..4b04ee177 100644 --- a/tool/pom.xml +++ b/tool/pom.xml @@ -42,6 +42,11 @@ javax.json 1.0.4 + + com.ibm.icu + icu4j + 58.2 + diff --git a/tool/src/org/antlr/v4/codegen/Unicode.java b/tool/src/org/antlr/v4/codegen/Unicode.java new file mode 100644 index 000000000..a1360e293 --- /dev/null +++ b/tool/src/org/antlr/v4/codegen/Unicode.java @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.codegen; + +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.lang.UCharacterCategory; +import com.ibm.icu.lang.UProperty; +import com.ibm.icu.lang.UScript; +import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.util.RangeValueIterator; + +import org.antlr.v4.runtime.misc.IntervalSet; + +import java.util.Map; +import java.util.LinkedHashMap; + +/** + * Utility class for calculating {@link IntervalSet}s for various + * Unicode categories and properties. + */ +public abstract class Unicode { + private static void addIntervalForCategory( + Map categoryMap, + String categoryName, + int start, + int finish) { + IntervalSet intervalSet = categoryMap.get(categoryName); + if (intervalSet == null) { + intervalSet = new IntervalSet(); + categoryMap.put(categoryName, intervalSet); + } + intervalSet.add(start, finish); + } + + private static String getShortPropertyName(int property) { + String propertyName = UCharacter.getPropertyName(property, UProperty.NameChoice.SHORT); + // For some reason, a few properties only have long names. + if (propertyName == null) { + propertyName = UCharacter.getPropertyName(property, UProperty.NameChoice.LONG); + } + return propertyName; + } + + /** + * Returns a map of (Unicode general category code: [0-4, 10-20, 5000-6000], ...) + * pairs mapping Unicode general category codes to the {@link IntervalSet} containing + * the Unicode code points in that general category. + * + * Note that a code point belongs to exactly one general category. + * + * {@see http://unicode.org/reports/tr44/#General_Category_Values} + */ + public static Map getUnicodeCategoryCodesToCodePointRanges() { + Map result = new LinkedHashMap<>(); + RangeValueIterator iter = UCharacter.getTypeIterator(); + RangeValueIterator.Element element = new RangeValueIterator.Element(); + while (iter.next(element)) { + String categoryName = UCharacter.getPropertyValueName( + UProperty.GENERAL_CATEGORY_MASK, + 1 << element.value, + UProperty.NameChoice.SHORT); + addIntervalForCategory(result, categoryName, element.start, element.limit - 1); + // Add short category so Ll, Lu, Lo, etc. all show up under L + String shortCategoryName = categoryName.substring(0, 1); + addIntervalForCategory(result, shortCategoryName, element.start, element.limit - 1); + } + return result; + } + + /** + * Returns a map of (Unicode general category code: name, ...) pairs + * mapping Unicode general category codes to their human-readable names. + * + * {@see http://unicode.org/reports/tr44/#General_Category_Values} + */ + public static Map getUnicodeCategoryCodesToNames() { + Map result = new LinkedHashMap<>(); + RangeValueIterator iter = UCharacter.getTypeIterator(); + RangeValueIterator.Element element = new RangeValueIterator.Element(); + while (iter.next(element)) { + String categoryName = UCharacter.getPropertyValueName( + UProperty.GENERAL_CATEGORY_MASK, + 1 << element.value, + UProperty.NameChoice.SHORT); + String longCategoryName = UCharacter.getPropertyValueName( + UProperty.GENERAL_CATEGORY_MASK, + 1 << element.value, + UProperty.NameChoice.LONG); + result.put(categoryName, longCategoryName); + } + // Add short categories + result.put("C", "Control"); + result.put("L", "Letter"); + result.put("N", "Number"); + result.put("M", "Mark"); + result.put("P", "Punctuation"); + result.put("S", "Symbol"); + result.put("Z", "Space"); + return result; + } + + /** + * Returns a map of (Unicode binary property code: [0-4, 10-20, 5000-6000], ...) + * pairs mapping Unicode binary property codes to the {@link IntervalSet} containing + * the Unicode code points which have that binary property set to a true value. + * + * {@see http://unicode.org/reports/tr44/#Property_List_Table} + */ + public static Map getUnicodeBinaryPropertyCodesToCodePointRanges() { + Map result = new LinkedHashMap<>(); + for (int property = UProperty.BINARY_START; + property < UProperty.BINARY_LIMIT; + property++) { + String propertyName = getShortPropertyName(property); + IntervalSet intervalSet = new IntervalSet(); + result.put(propertyName, intervalSet); + UnicodeSet set = new UnicodeSet(); + set.applyIntPropertyValue(property, 1); + for (UnicodeSet.EntryRange range : set.ranges()) { + intervalSet.add(range.codepoint, range.codepointEnd); + } + } + return result; + } + + /** + * Returns a map of (Unicode general category code: name, ...) pairs + * mapping Unicode binary property codes to their human-readable names. + * + * {@see http://unicode.org/reports/tr44/#Property_List_Table} + */ + public static Map getUnicodeBinaryPropertyCodesToNames() { + Map result = new LinkedHashMap<>(); + for (int property = UProperty.BINARY_START; + property < UProperty.BINARY_LIMIT; + property++) { + String propertyName = getShortPropertyName(property); + String longPropertyName = UCharacter.getPropertyName(property, UProperty.NameChoice.LONG); + result.put(propertyName, longPropertyName); + } + return result; + } + + /** + * Returns a map of (Unicode script code: [0-4, 10-20, 5000-6000], ...) + * pairs mapping Unicode script codes to the {@link IntervalSet} containing + * the Unicode code points which use that script. + * + * Note that some code points belong to multiple scripts. + * + * {@see https://en.wikipedia.org/wiki/Script_(Unicode)#Table_of_scripts_in_Unicode} + */ + public static Map getUnicodeScriptCodesToCodePointRanges() { + Map result = new LinkedHashMap<>(); + for (int script = UCharacter.getIntPropertyMinValue(UProperty.SCRIPT); + script <= UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT); + script++) { + UnicodeSet set = new UnicodeSet(); + set.applyIntPropertyValue(UProperty.SCRIPT, script); + String scriptName = UCharacter.getPropertyValueName(UProperty.SCRIPT, script, UProperty.NameChoice.SHORT); + IntervalSet intervalSet = result.get(scriptName); + if (intervalSet == null) { + intervalSet = new IntervalSet(); + result.put(scriptName, intervalSet); + } + for (UnicodeSet.EntryRange range : set.ranges()) { + intervalSet.add(range.codepoint, range.codepointEnd); + } + } + return result; + } + + /** + * Returns a map of (Unicode script code: name, ...) pairs + * mapping Unicode script codes to their human-readable names. + * + * {@see https://en.wikipedia.org/wiki/Script_(Unicode)#Table_of_scripts_in_Unicode} + */ + public static Map getUnicodeScriptCodesToNames() { + Map result = new LinkedHashMap<>(); + for (int script = UCharacter.getIntPropertyMinValue(UProperty.SCRIPT); + script <= UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT); + script++) { + String propertyName = UCharacter.getPropertyValueName(UProperty.SCRIPT, script, UProperty.NameChoice.SHORT); + String longPropertyName = UCharacter.getPropertyValueName(UProperty.SCRIPT, script, UProperty.NameChoice.LONG); + result.put(propertyName, longPropertyName); + } + return result; + } +}