From 291638ca2dcff8de38619cd20f3f1f31468d76bd Mon Sep 17 00:00:00 2001
From: Ben Hamilton <beng@fb.com>
Date: Fri, 17 Feb 2017 09:13:58 -0800
Subject: [PATCH 1/7] CodePointTransitions

---
 .../v4/runtime/atn/CodePointTransitions.java  | 50 +++++++++++++++++++
 .../org/antlr/v4/automata/ATNOptimizer.java   |  5 +-
 .../antlr/v4/automata/LexerATNFactory.java    |  7 +--
 3 files changed, 57 insertions(+), 5 deletions(-)
 create mode 100644 runtime/Java/src/org/antlr/v4/runtime/atn/CodePointTransitions.java

diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/CodePointTransitions.java b/runtime/Java/src/org/antlr/v4/runtime/atn/CodePointTransitions.java
new file mode 100644
index 000000000..ba996fe93
--- /dev/null
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/CodePointTransitions.java
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+package org.antlr.v4.runtime.atn;
+
+import org.antlr.v4.runtime.misc.IntervalSet;
+
+/**
+ * Utility class to create {@link AtomTransition}, {@link RangeTransition},
+ * and {@link SetTransition} appropriately based on the range of the input.
+ *
+ * To keep the serialized ATN size small, we only inline atom and
+ * range transitions for Unicode code points <= U+FFFF.
+ *
+ * Whenever we encounter a Unicode code point > U+FFFF, we represent that
+ * as a set transition (even if it is logically an atom or a range).
+ */
+public abstract class CodePointTransitions {
+	/**
+	 * If {@code codePoint} is <= U+FFFF, returns a new {@link AtomTransition}.
+	 * Otherwise, returns a new {@link SetTransition}.
+	 */
+	public static Transition createWithCodePoint(ATNState target, int codePoint) {
+		if (Character.isSupplementaryCodePoint(codePoint)) {
+			return new SetTransition(target, IntervalSet.of(codePoint));
+		} else {
+			return new AtomTransition(target, codePoint);
+		}
+	}
+
+	/**
+	 * If {@code codePointFrom} and {@code codePointTo} are both
+	 * <= U+FFFF, returns a new {@link RangeTransition}.
+	 * Otherwise, returns a new {@link SetTransition}.
+	 */
+	public static Transition createWithCodePointRange(
+			ATNState target,
+			int codePointFrom,
+			int codePointTo) {
+		if (Character.isSupplementaryCodePoint(codePointFrom) ||
+		    Character.isSupplementaryCodePoint(codePointTo)) {
+			return new SetTransition(target, IntervalSet.of(codePointFrom, codePointTo));
+		} else {
+			return new RangeTransition(target, codePointFrom, codePointTo);
+		}
+	}
+}
diff --git a/tool/src/org/antlr/v4/automata/ATNOptimizer.java b/tool/src/org/antlr/v4/automata/ATNOptimizer.java
index f49d5ce09..6972feccf 100644
--- a/tool/src/org/antlr/v4/automata/ATNOptimizer.java
+++ b/tool/src/org/antlr/v4/automata/ATNOptimizer.java
@@ -10,6 +10,7 @@ import org.antlr.v4.runtime.atn.ATN;
 import org.antlr.v4.runtime.atn.ATNState;
 import org.antlr.v4.runtime.atn.AtomTransition;
 import org.antlr.v4.runtime.atn.BlockEndState;
+import org.antlr.v4.runtime.atn.CodePointTransitions;
 import org.antlr.v4.runtime.atn.DecisionState;
 import org.antlr.v4.runtime.atn.EpsilonTransition;
 import org.antlr.v4.runtime.atn.NotSetTransition;
@@ -116,11 +117,11 @@ public class ATNOptimizer {
 				Transition newTransition;
 				if (matchSet.getIntervals().size() == 1) {
 					if (matchSet.size() == 1) {
-						newTransition = new AtomTransition(blockEndState, matchSet.getMinElement());
+						newTransition = CodePointTransitions.createWithCodePoint(blockEndState, matchSet.getMinElement());
 					}
 					else {
 						Interval matchInterval = matchSet.getIntervals().get(0);
-						newTransition = new RangeTransition(blockEndState, matchInterval.a, matchInterval.b);
+						newTransition = CodePointTransitions.createWithCodePointRange(blockEndState, matchInterval.a, matchInterval.b);
 					}
 				}
 				else {
diff --git a/tool/src/org/antlr/v4/automata/LexerATNFactory.java b/tool/src/org/antlr/v4/automata/LexerATNFactory.java
index dd83299cf..5c1f9d78e 100644
--- a/tool/src/org/antlr/v4/automata/LexerATNFactory.java
+++ b/tool/src/org/antlr/v4/automata/LexerATNFactory.java
@@ -17,6 +17,7 @@ import org.antlr.v4.runtime.atn.ATN;
 import org.antlr.v4.runtime.atn.ATNState;
 import org.antlr.v4.runtime.atn.ActionTransition;
 import org.antlr.v4.runtime.atn.AtomTransition;
+import org.antlr.v4.runtime.atn.CodePointTransitions;
 import org.antlr.v4.runtime.atn.LexerAction;
 import org.antlr.v4.runtime.atn.LexerChannelAction;
 import org.antlr.v4.runtime.atn.LexerCustomAction;
@@ -255,7 +256,7 @@ public class LexerATNFactory extends ParserATNFactory {
 		int t1 = CharSupport.getCharValueFromGrammarCharLiteral(a.getText());
 		int t2 = CharSupport.getCharValueFromGrammarCharLiteral(b.getText());
 		checkRange(a, b, t1, t2);
-		left.addTransition(new  RangeTransition(right, t1, t2));
+		left.addTransition(CodePointTransitions.createWithCodePointRange(right, t1, t2));
 		a.atnState = left;
 		b.atnState = left;
 		return new Handle(left, right);
@@ -301,7 +302,7 @@ public class LexerATNFactory extends ParserATNFactory {
 			Transition transition;
 			if (set.getIntervals().size() == 1) {
 				Interval interval = set.getIntervals().get(0);
-				transition = new RangeTransition(right, interval.a, interval.b);
+				transition = CodePointTransitions.createWithCodePointRange(right, interval.a, interval.b);
 			} else {
 				transition = new SetTransition(right, set);
 			}
@@ -356,7 +357,7 @@ public class LexerATNFactory extends ParserATNFactory {
 		for (int i = 0; i < n; ) {
 			right = newState(stringLiteralAST);
 			int codePoint = chars.codePointAt(i);
-			prev.addTransition(new AtomTransition(right, codePoint));
+			prev.addTransition(CodePointTransitions.createWithCodePoint(right, codePoint));
 			prev = right;
 			i += Character.charCount(codePoint);
 		}

From 7a0f4e716a93b11318e841c82caa136a90030592 Mon Sep 17 00:00:00 2001
From: parrt <parrt@cs.usfca.edu>
Date: Mon, 20 Feb 2017 10:36:36 -0800
Subject: [PATCH 2/7] try generic python 3.5

---
 .travis.yml                             | 6 ++++++
 .travis/before-install-linux-python3.sh | 4 ----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 562f89c1a..58143f420 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -55,6 +55,12 @@ matrix:
     - os: linux
       jdk: oraclejdk7
       env: TARGET=python3
+      addons:
+	apt:
+	  sources:
+	    - deadsnakes # source required so it finds the package definition below
+	  packages:
+	    - python3.5
     - os: linux
       jdk: oraclejdk7
       env: TARGET=javascript
diff --git a/.travis/before-install-linux-python3.sh b/.travis/before-install-linux-python3.sh
index 23685d75a..a3b091260 100755
--- a/.travis/before-install-linux-python3.sh
+++ b/.travis/before-install-linux-python3.sh
@@ -2,8 +2,4 @@
 
 set -euo pipefail
 
-sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 3FA7E0328081BFF6A14DA29AA6A19B38D3D831EF
-sudo add-apt-repository ppa:fkrull/deadsnakes -y
-sudo apt-get update -qq
-sudo apt-get install -qq python3.5
 python3 --version

From 0c45ada6555890ac510560dd0ae92a7a2206356b Mon Sep 17 00:00:00 2001
From: parrt <parrt@cs.usfca.edu>
Date: Mon, 20 Feb 2017 10:42:02 -0800
Subject: [PATCH 3/7] fix formatting

---
 .travis.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 58143f420..e6faa14bb 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -56,11 +56,11 @@ matrix:
       jdk: oraclejdk7
       env: TARGET=python3
       addons:
-	apt:
-	  sources:
-	    - deadsnakes # source required so it finds the package definition below
-	  packages:
-	    - python3.5
+        apt:
+          sources:
+            - deadsnakes # source required so it finds the package definition below
+          packages:
+            - python3.5
     - os: linux
       jdk: oraclejdk7
       env: TARGET=javascript

From 49de9e8f9509c8a804610876c2522d757b821798 Mon Sep 17 00:00:00 2001
From: parrt <parrt@cs.usfca.edu>
Date: Mon, 20 Feb 2017 11:27:37 -0800
Subject: [PATCH 4/7] clean up per travis weblint

---
 .travis.yml | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index e6faa14bb..5184b26bb 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,9 +7,7 @@ matrix:
     - os: linux
       compiler: clang
       jdk: oraclejdk7
-      env:
-        - TARGET=cpp
-        - CXX=g++-5
+      env: TARGET=cpp
       addons:
         apt:
           sources:
@@ -21,7 +19,6 @@ matrix:
             - clang-3.7
     - os: osx
       compiler: clang
-      osx_image: xcode8.1
       env: TARGET=cpp
       addons:
         apt:
@@ -47,7 +44,6 @@ matrix:
       env: TARGET=csharp
     - os: linux
       jdk: oraclejdk7
-      dist: trusty
       env: TARGET=dotnet
     - os: linux
       jdk: oraclejdk7

From e968fe271a638f84e2ab49f0ff4fc3238329f2a7 Mon Sep 17 00:00:00 2001
From: parrt <parrt@cs.usfca.edu>
Date: Mon, 20 Feb 2017 11:59:22 -0800
Subject: [PATCH 5/7] Revert "clean up per travis weblint"

This reverts commit 49de9e8f9509c8a804610876c2522d757b821798.
---
 .travis.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 5184b26bb..e6faa14bb 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,7 +7,9 @@ matrix:
     - os: linux
       compiler: clang
       jdk: oraclejdk7
-      env: TARGET=cpp
+      env:
+        - TARGET=cpp
+        - CXX=g++-5
       addons:
         apt:
           sources:
@@ -19,6 +21,7 @@ matrix:
             - clang-3.7
     - os: osx
       compiler: clang
+      osx_image: xcode8.1
       env: TARGET=cpp
       addons:
         apt:
@@ -44,6 +47,7 @@ matrix:
       env: TARGET=csharp
     - os: linux
       jdk: oraclejdk7
+      dist: trusty
       env: TARGET=dotnet
     - os: linux
       jdk: oraclejdk7

From 912342b01fd45bfd1ec7e103121d13d999d6ae30 Mon Sep 17 00:00:00 2001
From: parrt <parrt@cs.usfca.edu>
Date: Tue, 21 Feb 2017 09:39:02 -0800
Subject: [PATCH 6/7] tweak to C++ runtime; nullptr not 0

---
 runtime/Cpp/runtime/src/tree/AbstractParseTreeVisitor.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/runtime/Cpp/runtime/src/tree/AbstractParseTreeVisitor.h b/runtime/Cpp/runtime/src/tree/AbstractParseTreeVisitor.h
index ec1b2551a..d546f78bf 100755
--- a/runtime/Cpp/runtime/src/tree/AbstractParseTreeVisitor.h
+++ b/runtime/Cpp/runtime/src/tree/AbstractParseTreeVisitor.h
@@ -70,7 +70,7 @@ namespace tree {
     /// </summary>
     /// <returns> The default value returned by visitor methods. </returns>
     virtual antlrcpp::Any defaultResult() {
-      return 0;
+      return nullptr; // support isNotNull
     }
 
     /// <summary>

From f384ef5fb49ad5e7d6b9b5f4d44e70021fd2c3d4 Mon Sep 17 00:00:00 2001
From: Ben Hamilton <beng@fb.com>
Date: Tue, 21 Feb 2017 12:26:08 -0800
Subject: [PATCH 7/7] New utility class Unicode

---
 .../org/antlr/v4/test/tool/TestUnicode.java   |  80 ++++++++
 tool/pom.xml                                  |   5 +
 tool/src/org/antlr/v4/codegen/Unicode.java    | 194 ++++++++++++++++++
 3 files changed, 279 insertions(+)
 create mode 100644 tool-testsuite/test/org/antlr/v4/test/tool/TestUnicode.java
 create mode 100644 tool/src/org/antlr/v4/codegen/Unicode.java

diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicode.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicode.java
new file mode 100644
index 000000000..195bb9cde
--- /dev/null
+++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicode.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+package org.antlr.v4.test.tool;
+
+import java.util.Map;
+
+import org.antlr.v4.codegen.Unicode;
+import org.antlr.v4.runtime.misc.IntervalSet;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class TestUnicode {
+	@Test
+	public void testUnicodeCategoryCodes() {
+		Map<String, IntervalSet> unicodeCategoryCodesToCodePointRanges = Unicode.getUnicodeCategoryCodesToCodePointRanges();
+		assertTrue(unicodeCategoryCodesToCodePointRanges.get("Lu").contains('X'));
+		assertFalse(unicodeCategoryCodesToCodePointRanges.get("Lu").contains('x'));
+		assertTrue(unicodeCategoryCodesToCodePointRanges.get("Ll").contains('x'));
+		assertFalse(unicodeCategoryCodesToCodePointRanges.get("Ll").contains('X'));
+		assertTrue(unicodeCategoryCodesToCodePointRanges.get("L").contains('X'));
+		assertTrue(unicodeCategoryCodesToCodePointRanges.get("L").contains('x'));
+		assertTrue(unicodeCategoryCodesToCodePointRanges.get("N").contains('0'));
+		assertTrue(unicodeCategoryCodesToCodePointRanges.get("Z").contains(' '));
+	}
+
+	@Test
+	public void testUnicodeCategoryCodesToNames() {
+		Map<String, String> unicodeCategoryCodesToNames = Unicode.getUnicodeCategoryCodesToNames();
+		assertEquals("Lowercase_Letter", unicodeCategoryCodesToNames.get("Ll"));
+		assertEquals("Letter", unicodeCategoryCodesToNames.get("L"));
+		assertEquals("Enclosing_Mark", unicodeCategoryCodesToNames.get("Me"));
+		assertEquals("Mark", unicodeCategoryCodesToNames.get("M"));
+	}
+
+	@Test
+	public void testUnicodeBinaryPropertyCodesToCodePointRanges() {
+		Map<String, IntervalSet> unicodeBinaryPropertyCodesToCodePointRanges = Unicode.getUnicodeBinaryPropertyCodesToCodePointRanges();
+		assertTrue(unicodeBinaryPropertyCodesToCodePointRanges.get("Emoji").contains(0x1F4A9));
+		assertFalse(unicodeBinaryPropertyCodesToCodePointRanges.get("Emoji").contains('X'));
+		assertTrue(unicodeBinaryPropertyCodesToCodePointRanges.get("alnum").contains('9'));
+		assertFalse(unicodeBinaryPropertyCodesToCodePointRanges.get("alnum").contains(0x1F4A9));
+		assertTrue(unicodeBinaryPropertyCodesToCodePointRanges.get("Dash").contains('-'));
+		assertTrue(unicodeBinaryPropertyCodesToCodePointRanges.get("Hex").contains('D'));
+		assertFalse(unicodeBinaryPropertyCodesToCodePointRanges.get("Hex").contains('Q'));
+	}
+
+	@Test
+	public void testUnicodeBinaryPropertyCodesToNames() {
+		Map<String, String> unicodeBinaryPropertyCodesToNames = Unicode.getUnicodeBinaryPropertyCodesToNames();
+		assertEquals("Ideographic", unicodeBinaryPropertyCodesToNames.get("Ideo"));
+		assertEquals("Soft_Dotted", unicodeBinaryPropertyCodesToNames.get("SD"));
+		assertEquals("Noncharacter_Code_Point", unicodeBinaryPropertyCodesToNames.get("NChar"));
+	}
+
+	@Test
+	public void testUnicodeScriptCodesToCodePointRanges() {
+		Map<String, IntervalSet> unicodeScriptCodesToCodePointRanges = Unicode.getUnicodeScriptCodesToCodePointRanges();
+		assertTrue(unicodeScriptCodesToCodePointRanges.get("Zyyy").contains('0'));
+		assertTrue(unicodeScriptCodesToCodePointRanges.get("Latn").contains('X'));
+		assertTrue(unicodeScriptCodesToCodePointRanges.get("Hani").contains(0x4E04));
+		assertTrue(unicodeScriptCodesToCodePointRanges.get("Cyrl").contains(0x0404));
+	}
+
+	@Test
+	public void testUnicodeScriptCodesToNames() {
+		Map<String, String> unicodeScriptCodesToNames = Unicode.getUnicodeScriptCodesToNames();
+		assertEquals("Common", unicodeScriptCodesToNames.get("Zyyy"));
+		assertEquals("Latin", unicodeScriptCodesToNames.get("Latn"));
+		assertEquals("Han", unicodeScriptCodesToNames.get("Hani"));
+		assertEquals("Cyrillic", unicodeScriptCodesToNames.get("Cyrl"));
+	}
+}
diff --git a/tool/pom.xml b/tool/pom.xml
index 0f1e5a0c5..4b04ee177 100644
--- a/tool/pom.xml
+++ b/tool/pom.xml
@@ -42,6 +42,11 @@
 			<artifactId>javax.json</artifactId>
 			<version>1.0.4</version>
 		</dependency>
+		<dependency>
+			<groupId>com.ibm.icu</groupId>
+			<artifactId>icu4j</artifactId>
+			<version>58.2</version>
+		</dependency>
 	</dependencies>
 
   <build>
diff --git a/tool/src/org/antlr/v4/codegen/Unicode.java b/tool/src/org/antlr/v4/codegen/Unicode.java
new file mode 100644
index 000000000..a1360e293
--- /dev/null
+++ b/tool/src/org/antlr/v4/codegen/Unicode.java
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+package org.antlr.v4.codegen;
+
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.lang.UCharacterCategory;
+import com.ibm.icu.lang.UProperty;
+import com.ibm.icu.lang.UScript;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.util.RangeValueIterator;
+
+import org.antlr.v4.runtime.misc.IntervalSet;
+
+import java.util.Map;
+import java.util.LinkedHashMap;
+
+/**
+ * Utility class for calculating {@link IntervalSet}s for various
+ * Unicode categories and properties.
+ */
+public abstract class Unicode {
+	private static void addIntervalForCategory(
+			Map<String, IntervalSet> categoryMap,
+			String categoryName,
+			int start,
+			int finish) {
+		IntervalSet intervalSet = categoryMap.get(categoryName);
+		if (intervalSet == null) {
+			intervalSet = new IntervalSet();
+			categoryMap.put(categoryName, intervalSet);
+		}
+		intervalSet.add(start, finish);
+	}
+
+	private static String getShortPropertyName(int property) {
+		String propertyName = UCharacter.getPropertyName(property, UProperty.NameChoice.SHORT);
+		// For some reason, a few properties only have long names.
+		if (propertyName == null) {
+			propertyName = UCharacter.getPropertyName(property, UProperty.NameChoice.LONG);
+		}
+		return propertyName;
+	}
+
+	/**
+	 * Returns a map of (Unicode general category code: [0-4, 10-20, 5000-6000], ...)
+	 * pairs mapping Unicode general category codes to the {@link IntervalSet} containing
+	 * the Unicode code points in that general category.
+	 *
+	 * Note that a code point belongs to exactly one general category.
+	 *
+	 * {@see http://unicode.org/reports/tr44/#General_Category_Values}
+	 */
+	public static Map<String, IntervalSet> getUnicodeCategoryCodesToCodePointRanges() {
+		Map<String, IntervalSet> result = new LinkedHashMap<>();
+		RangeValueIterator iter = UCharacter.getTypeIterator();
+		RangeValueIterator.Element element = new RangeValueIterator.Element();
+		while (iter.next(element)) {
+			String categoryName = UCharacter.getPropertyValueName(
+					UProperty.GENERAL_CATEGORY_MASK,
+					1 << element.value,
+					UProperty.NameChoice.SHORT);
+			addIntervalForCategory(result, categoryName, element.start, element.limit - 1);
+			// Add short category so Ll, Lu, Lo, etc. all show up under L
+			String shortCategoryName = categoryName.substring(0, 1);
+			addIntervalForCategory(result, shortCategoryName, element.start, element.limit - 1);
+		}
+		return result;
+	}
+
+	/**
+	 * Returns a map of (Unicode general category code: name, ...) pairs
+	 * mapping Unicode general category codes to their human-readable names.
+	 *
+	 * {@see http://unicode.org/reports/tr44/#General_Category_Values}
+	 */
+	public static Map<String, String> getUnicodeCategoryCodesToNames() {
+		Map<String, String> result = new LinkedHashMap<>();
+		RangeValueIterator iter = UCharacter.getTypeIterator();
+		RangeValueIterator.Element element = new RangeValueIterator.Element();
+		while (iter.next(element)) {
+			String categoryName = UCharacter.getPropertyValueName(
+					UProperty.GENERAL_CATEGORY_MASK,
+					1 << element.value,
+					UProperty.NameChoice.SHORT);
+			String longCategoryName = UCharacter.getPropertyValueName(
+					UProperty.GENERAL_CATEGORY_MASK,
+					1 << element.value,
+					UProperty.NameChoice.LONG);
+			result.put(categoryName, longCategoryName);
+		}
+		// Add short categories
+		result.put("C", "Control");
+		result.put("L", "Letter");
+		result.put("N", "Number");
+		result.put("M", "Mark");
+		result.put("P", "Punctuation");
+		result.put("S", "Symbol");
+		result.put("Z", "Space");
+		return result;
+	}
+
+	/**
+	 * Returns a map of (Unicode binary property code: [0-4, 10-20, 5000-6000], ...)
+	 * pairs mapping Unicode binary property codes to the {@link IntervalSet} containing
+	 * the Unicode code points which have that binary property set to a true value.
+	 *
+	 * {@see http://unicode.org/reports/tr44/#Property_List_Table}
+	 */
+	public static Map<String, IntervalSet> getUnicodeBinaryPropertyCodesToCodePointRanges() {
+		Map<String, IntervalSet> result = new LinkedHashMap<>();
+		for (int property = UProperty.BINARY_START;
+		     property < UProperty.BINARY_LIMIT;
+		     property++) {
+			String propertyName = getShortPropertyName(property);
+			IntervalSet intervalSet = new IntervalSet();
+			result.put(propertyName, intervalSet);
+			UnicodeSet set = new UnicodeSet();
+			set.applyIntPropertyValue(property, 1);
+			for (UnicodeSet.EntryRange range : set.ranges()) {
+				intervalSet.add(range.codepoint, range.codepointEnd);
+			}
+		}
+		return result;
+	}
+
+	/**
+	 * Returns a map of (Unicode general category code: name, ...) pairs
+	 * mapping Unicode binary property codes to their human-readable names.
+	 *
+	 * {@see http://unicode.org/reports/tr44/#Property_List_Table}
+	 */
+	public static Map<String, String> getUnicodeBinaryPropertyCodesToNames() {
+		Map<String, String> result = new LinkedHashMap<>();
+		for (int property = UProperty.BINARY_START;
+		     property < UProperty.BINARY_LIMIT;
+		     property++) {
+			String propertyName = getShortPropertyName(property);
+			String longPropertyName = UCharacter.getPropertyName(property, UProperty.NameChoice.LONG);
+			result.put(propertyName, longPropertyName);
+		}
+		return result;
+	}
+
+	/**
+	 * Returns a map of (Unicode script code: [0-4, 10-20, 5000-6000], ...)
+	 * pairs mapping Unicode script codes to the {@link IntervalSet} containing
+	 * the Unicode code points which use that script.
+	 *
+	 * Note that some code points belong to multiple scripts.
+	 *
+	 * {@see https://en.wikipedia.org/wiki/Script_(Unicode)#Table_of_scripts_in_Unicode}
+	 */
+	public static Map<String, IntervalSet> getUnicodeScriptCodesToCodePointRanges() {
+		Map<String, IntervalSet> result = new LinkedHashMap<>();
+		for (int script = UCharacter.getIntPropertyMinValue(UProperty.SCRIPT);
+		     script <= UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT);
+		     script++) {
+			UnicodeSet set = new UnicodeSet();
+			set.applyIntPropertyValue(UProperty.SCRIPT, script);
+			String scriptName = UCharacter.getPropertyValueName(UProperty.SCRIPT, script, UProperty.NameChoice.SHORT);
+			IntervalSet intervalSet = result.get(scriptName);
+			if (intervalSet == null) {
+				intervalSet = new IntervalSet();
+				result.put(scriptName, intervalSet);
+			}
+			for (UnicodeSet.EntryRange range : set.ranges()) {
+				intervalSet.add(range.codepoint, range.codepointEnd);
+			}
+		}
+		return result;
+	}
+
+	/**
+	 * Returns a map of (Unicode script code: name, ...) pairs
+	 * mapping Unicode script codes to their human-readable names.
+	 *
+	 * {@see https://en.wikipedia.org/wiki/Script_(Unicode)#Table_of_scripts_in_Unicode}
+	 */
+	public static Map<String, String> getUnicodeScriptCodesToNames() {
+		Map<String, String> result = new LinkedHashMap<>();
+		for (int script = UCharacter.getIntPropertyMinValue(UProperty.SCRIPT);
+		     script <= UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT);
+		     script++) {
+			String propertyName = UCharacter.getPropertyValueName(UProperty.SCRIPT, script, UProperty.NameChoice.SHORT);
+			String longPropertyName = UCharacter.getPropertyValueName(UProperty.SCRIPT, script, UProperty.NameChoice.LONG);
+			result.put(propertyName, longPropertyName);
+		}
+		return result;
+	}
+}