Merge branch 'master' into master

This commit is contained in:
Terence Parr 2017-03-10 13:53:14 -08:00 committed by GitHub
commit 197ea6e4e4
18 changed files with 173 additions and 319 deletions

View File

@ -139,3 +139,4 @@ YYYY/MM/DD, github id, Full name, email
2017/02/20, Thomasb81, Thomas Burg, thomasb81@gmail.com
2017/02/26, jvasileff, John Vasileff, john@vasileff.com
2017/03/03, chund, Christian Hund, christian.hund@gmail.com
2017/03/08, harry-tallbelt, Igor Vysokopoyasny, harry.tallbelt@gmail.com

View File

@ -39,6 +39,7 @@ Resolving deltas: 100% (31898/31898), done.
Checking connectivity... done.
$ cd antlr4
$ export MAVEN_OPTS="-Xmx1G" # don't forget this on linux
$ mvn clean # must be separate, not part of install/compile
$ mvn -DskipTests install
...
[INFO] ------------------------------------------------------------------------
@ -61,7 +62,7 @@ $ mvn -DskipTests install
[INFO] ------------------------------------------------------------------------
```
We do `install` not `compile` as tool tests and such refer to modules that must be pulled from the maven install local cache.
**NOTE:** We do `install` not `compile` as tool tests and such refer to modules that must be pulled from the maven install local cache.
# Installing libs to mvn cache locally

View File

@ -60,11 +60,11 @@ Match that character or sequence of characters. E.g., while or =.</t
<td>[char set]</td><td>
<p>Match one of the characters specified in the character set. Interpret <tt>x-y</tt> as the set of characters between range <tt>x</tt> and <tt>y</tt>, inclusively. The following escaped characters are interpreted as single special characters: <tt>\n</tt>, <tt>\r</tt>, <tt>\b</tt>, <tt>\t</tt>, <tt>\f</tt>, <tt>\uXXXX</tt>, and <tt>\u{XXXXXX}</tt>. To get <tt>]</tt>, <tt>\</tt>, or <tt>-</tt> you must escape them with <tt>\</tt>.</p>
<p>You can also include all characters matching Unicode properties (general category, boolean, script, or block) with <tt>\p{PropertyName}</tt>. (You can invert the test with <tt>\P{PropertyName}</tt>).</p>
<p>You can also include all characters matching Unicode properties (general category, boolean, or enumerated including scripts and blocks) with <tt>\p{PropertyName}</tt> or <tt>\p{EnumProperty=Value}</tt>. (You can invert the test with <tt>\P{PropertyName}</tt> or <tt>\P{EnumProperty=Value}</tt>).</p>
<p>For a list of valid Unicode property names, see <a href="http://unicode.org/reports/tr44/#Properties">Unicode Standard Annex #44</a>. (ANTLR also supports <a href="http://unicode.org/reports/tr44/#General_Category_Values">short and long Unicode general category names</a> like <tt>\p{Lu}</tt>, <tt>\p{Z}</tt>, and <tt>\p{Symbol}</tt>.)</p>
<p>For a list of valid Unicode property names, see <a href="http://unicode.org/reports/tr44/#Properties">Unicode Standard Annex #44</a>. (ANTLR also supports <a href="http://unicode.org/reports/tr44/#General_Category_Values">short and long Unicode general category names and values</a> like <tt>\p{Lu}</tt>, <tt>\p{Z}</tt>, <tt>\p{Symbol}</tt>, <tt>\p{Blk=Latin_1_Sup}</tt>, and <tt>\p{Block=Latin_1_Supplement}</tt>.)</p>
<p>Property names include <a href="http://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt">Unicode block names</a> prefixed with <tt>In</tt> (they overlap with script names) and with spaces changed to <tt>_</tt>. For example: <tt>\p{InLatin_1_Supplement}</tt>, <tt>\p{InYijing_Hexagram_Symbols}</tt>, and <tt>\p{InAncient_Greek_Numbers}</tt>.</p>
<p>As a shortcut for <tt>\p{Block=Latin_1_Supplement}</tt>, you can refer to blocks using <a href="http://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt">Unicode block names</a> prefixed with <tt>In</tt> and with spaces changed to <tt>_</tt>. For example: <tt>\p{InLatin_1_Supplement}</tt>, <tt>\p{InYijing_Hexagram_Symbols}</tt>, and <tt>\p{InAncient_Greek_Numbers}</tt>.</p>
<p>Property names are <b>case-insensitive</b>, and <tt>_</tt> and <tt>-</tt> are treated identically</p>
@ -77,7 +77,7 @@ UNICODE_WS : [\p{White_Space}] -> skip; // match all Unicode whitespace
ID : [a-zA-Z] [a-zA-Z0-9]* ; // match usual identifier spec
UNICODE_ID : [\p{Alpha}] [\p{Alnum}]* ; // match full Unicode alphabetic ids
UNICODE_ID : [\p{Alpha}\p{General_Category=Other_Letter}] [\p{Alnum}\p{General_Category=Other_Letter}]* ; // match full Unicode alphabetic ids
EMOJI : [\u{1F4A9}\u{1F926}] ; // note Unicode code points > U+FFFF

View File

@ -362,27 +362,19 @@ Ref<PredictionContext> PredictionContext::mergeArrays(const Ref<ArrayPredictionC
}
bool PredictionContext::combineCommonParents(std::vector<Ref<PredictionContext>> &parents) {
std::unordered_set<Ref<PredictionContext>, PredictionContextHasher, PredictionContextComparer> uniqueParents;
std::set<Ref<PredictionContext>> uniqueParents;
for (size_t p = 0; p < parents.size(); ++p) {
if (!parents[p])
continue;
Ref<PredictionContext> parent = parents[p];
if (uniqueParents.find(parent) == uniqueParents.end()) { // don't replace
uniqueParents.insert(parent);
}
}
if (uniqueParents.size() == parents.size())
return false;
for (size_t p = 0; p < uniqueParents.size(); ++p) {
if (!parents[p])
continue;
for (size_t p = 0; p < parents.size(); ++p) {
parents[p] = *uniqueParents.find(parents[p]);
}
parents.resize(uniqueParents.size());
return true;
}

View File

@ -119,8 +119,11 @@ ParserRuleContext.prototype.addErrorNode = function(badToken) {
ParserRuleContext.prototype.getChild = function(i, type) {
type = type || null;
if (this.children === null || i < 0 || i >= this.children.length) {
return null;
}
if (type === null) {
return this.children.length>=i ? this.children[i] : null;
return this.children[i];
} else {
for(var j=0; j<this.children.length; j++) {
var child = this.children[j];
@ -138,6 +141,9 @@ ParserRuleContext.prototype.getChild = function(i, type) {
ParserRuleContext.prototype.getToken = function(ttype, i) {
if (this.children === null || i < 0 || i >= this.children.length) {
return null;
}
for(var j=0; j<this.children.length; j++) {
var child = this.children[j];
if (child instanceof TerminalNode) {

View File

@ -1,2 +0,0 @@
T__0=1
'A'=1

View File

@ -1,50 +0,0 @@
// Generated from SwiftTest.g4 by ANTLR 4.6
import Antlr4
/**
* This class provides an empty implementation of {@link SwiftTestListener},
* which can be extended to create a listener which only needs to handle a subset
* of the available methods.
*/
open class SwiftTestBaseListener: SwiftTestListener {
public init() { }
/**
* {@inheritDoc}
*
* <p>The default implementation does nothing.</p>
*/
open func enterS(_ ctx: SwiftTestParser.SContext) { }
/**
* {@inheritDoc}
*
* <p>The default implementation does nothing.</p>
*/
open func exitS(_ ctx: SwiftTestParser.SContext) { }
/**
* {@inheritDoc}
*
* <p>The default implementation does nothing.</p>
*/
open func enterEveryRule(_ ctx: ParserRuleContext) { }
/**
* {@inheritDoc}
*
* <p>The default implementation does nothing.</p>
*/
open func exitEveryRule(_ ctx: ParserRuleContext) { }
/**
* {@inheritDoc}
*
* <p>The default implementation does nothing.</p>
*/
open func visitTerminal(_ node: TerminalNode) { }
/**
* {@inheritDoc}
*
* <p>The default implementation does nothing.</p>
*/
open func visitErrorNode(_ node: ErrorNode) { }
}

View File

@ -1,90 +0,0 @@
// Generated from SwiftTest.g4 by ANTLR 4.6
import Antlr4
open class SwiftTestLexer: Lexer {
internal static var _decisionToDFA: [DFA] = {
var decisionToDFA = [DFA]()
let length = SwiftTestLexer._ATN.getNumberOfDecisions()
for i in 0..<length {
decisionToDFA.append(DFA(SwiftTestLexer._ATN.getDecisionState(i)!, i))
}
return decisionToDFA
}()
internal static let _sharedContextCache:PredictionContextCache = PredictionContextCache()
public static let T__0=1
public static let channelNames: [String] = [
"DEFAULT_TOKEN_CHANNEL", "HIDDEN"
]
public static let modeNames: [String] = [
"DEFAULT_MODE"
]
public static let ruleNames: [String] = [
"T__0"
]
private static let _LITERAL_NAMES: [String?] = [
nil, "'A'"
]
private static let _SYMBOLIC_NAMES: [String?] = [
]
public static let VOCABULARY: Vocabulary = Vocabulary(_LITERAL_NAMES, _SYMBOLIC_NAMES)
/**
* @deprecated Use {@link #VOCABULARY} instead.
*/
//@Deprecated
public let tokenNames: [String?]? = {
let length = _SYMBOLIC_NAMES.count
var tokenNames = [String?](repeating: nil, count: length)
for i in 0..<length {
var name = VOCABULARY.getLiteralName(i)
if name == nil {
name = VOCABULARY.getSymbolicName(i)
}
if name == nil {
name = "<INVALID>"
}
tokenNames[i] = name
}
return tokenNames
}()
override
open func getTokenNames() -> [String?]? {
return tokenNames
}
open override func getVocabulary() -> Vocabulary {
return SwiftTestLexer.VOCABULARY
}
public override init(_ input: CharStream) {
RuntimeMetaData.checkVersion("4.7", RuntimeMetaData.VERSION)
super.init(input)
_interp = LexerATNSimulator(self, SwiftTestLexer._ATN, SwiftTestLexer._decisionToDFA, SwiftTestLexer._sharedContextCache)
}
override
open func getGrammarFileName() -> String { return "SwiftTest.g4" }
override
open func getRuleNames() -> [String] { return SwiftTestLexer.ruleNames }
override
open func getSerializedATN() -> String { return SwiftTestLexer._serializedATN }
override
open func getChannelNames() -> [String] { return HelloLexer.channelNames }
override
open func getModeNames() -> [String] { return SwiftTestLexer.modeNames }
override
open func getATN() -> ATN { return SwiftTestLexer._ATN }
public static let _serializedATN: String = SwiftTestLexerATN().jsonString
public static let _ATN: ATN = ATNDeserializer().deserializeFromJson(_serializedATN)
}

View File

@ -1,2 +0,0 @@
T__0=1
'A'=1

View File

@ -1,3 +0,0 @@
class SwiftTestLexerATN {
let jsonString: String = "{\"version\":3,\"uuid\":\"aadb8d7e-aeef-4415-ad2b-8204d6cf042e\",\"grammarType\":0,\"maxTokenType\":1,\"states\":[{\"stateType\":6,\"ruleIndex\":-1},{\"stateType\":2,\"ruleIndex\":0},{\"stateType\":7,\"ruleIndex\":0},{\"stateType\":1,\"ruleIndex\":0},{\"stateType\":1,\"ruleIndex\":0}],\"nonGreedyStates\":[],\"precedenceStates\":[],\"ruleToStartState\":[{\"stateNumber\":1,\"ruleToTokenType\":1}],\"modeToStartState\":[0],\"nsets\":0,\"IntervalSet\":[],\"allTransitionsBuilder\":[[{\"src\":0,\"trg\":1,\"edgeType\":1,\"arg1\":0,\"arg2\":0,\"arg3\":0}],[{\"src\":1,\"trg\":3,\"edgeType\":1,\"arg1\":0,\"arg2\":0,\"arg3\":0}],[{\"src\":3,\"trg\":4,\"edgeType\":5,\"arg1\":65,\"arg2\":0,\"arg3\":0}],[{\"src\":4,\"trg\":2,\"edgeType\":1,\"arg1\":0,\"arg2\":0,\"arg3\":0}]],\"decisionToState\":[0],\"lexerActions\":[]}"
}

View File

@ -1,21 +0,0 @@
// Generated from SwiftTest.g4 by ANTLR 4.6
import Antlr4
/**
* This interface defines a complete listener for a parse tree produced by
* {@link SwiftTestParser}.
*/
public protocol SwiftTestListener: ParseTreeListener {
/**
* Enter a parse tree produced by {@link SwiftTestParser#s}.
- Parameters:
- ctx: the parse tree
*/
func enterS(_ ctx: SwiftTestParser.SContext)
/**
* Exit a parse tree produced by {@link SwiftTestParser#s}.
- Parameters:
- ctx: the parse tree
*/
func exitS(_ ctx: SwiftTestParser.SContext)
}

View File

@ -1,118 +0,0 @@
// Generated from SwiftTest.g4 by ANTLR 4.6
import Antlr4
open class SwiftTestParser: Parser {
internal static var _decisionToDFA: [DFA] = {
var decisionToDFA = [DFA]()
let length = SwiftTestParser._ATN.getNumberOfDecisions()
for i in 0..<length {
decisionToDFA.append(DFA(SwiftTestParser._ATN.getDecisionState(i)!, i))
}
return decisionToDFA
}()
internal static let _sharedContextCache: PredictionContextCache = PredictionContextCache()
public enum Tokens: Int {
case EOF = -1, T__0 = 1
}
public static let RULE_s = 0
public static let ruleNames: [String] = [
"s"
]
private static let _LITERAL_NAMES: [String?] = [
nil, "'A'"
]
private static let _SYMBOLIC_NAMES: [String?] = [
]
public static let VOCABULARY: Vocabulary = Vocabulary(_LITERAL_NAMES, _SYMBOLIC_NAMES)
/**
* @deprecated Use {@link #VOCABULARY} instead.
*/
//@Deprecated
public let tokenNames: [String?]? = {
let length = _SYMBOLIC_NAMES.count
var tokenNames = [String?](repeating: nil, count: length)
for i in 0..<length {
var name = VOCABULARY.getLiteralName(i)
if name == nil {
name = VOCABULARY.getSymbolicName(i)
}
if name == nil {
name = "<INVALID>"
}
tokenNames[i] = name
}
return tokenNames
}()
override
open func getTokenNames() -> [String?]? {
return tokenNames
}
override
open func getGrammarFileName() -> String { return "SwiftTest.g4" }
override
open func getRuleNames() -> [String] { return SwiftTestParser.ruleNames }
override
open func getSerializedATN() -> String { return SwiftTestParser._serializedATN }
override
open func getATN() -> ATN { return SwiftTestParser._ATN }
open override func getVocabulary() -> Vocabulary {
return SwiftTestParser.VOCABULARY
}
public override init(_ input:TokenStream)throws {
RuntimeMetaData.checkVersion("4.6", RuntimeMetaData.VERSION)
try super.init(input)
_interp = ParserATNSimulator(self,SwiftTestParser._ATN,SwiftTestParser._decisionToDFA, SwiftTestParser._sharedContextCache)
}
open class SContext:ParserRuleContext {
open func EOF() -> TerminalNode? { return getToken(SwiftTestParser.Tokens.EOF.rawValue, 0) }
open override func getRuleIndex() -> Int { return SwiftTestParser.RULE_s }
override
open func enterRule(_ listener: ParseTreeListener) {
if listener is SwiftTestListener {
(listener as! SwiftTestListener).enterS(self)
}
}
override
open func exitRule(_ listener: ParseTreeListener) {
if listener is SwiftTestListener {
(listener as! SwiftTestListener).exitS(self)
}
}
}
@discardableResult
open func s() throws -> SContext {
var _localctx: SContext = SContext(_ctx, getState())
try enterRule(_localctx, 0, SwiftTestParser.RULE_s)
defer {
try! exitRule()
}
do {
try enterOuterAlt(_localctx, 1)
setState(2)
try match(SwiftTestParser.Tokens.T__0.rawValue)
setState(3)
try match(SwiftTestParser.Tokens.EOF.rawValue)
}
catch ANTLRException.recognition(let re) {
_localctx.exception = re
_errHandler.reportError(self, re)
try _errHandler.recover(self, re)
}
return _localctx
}
public static let _serializedATN : String = SwiftTestParserATN().jsonString
public static let _ATN: ATN = ATNDeserializer().deserializeFromJson(_serializedATN)
}

View File

@ -1,3 +0,0 @@
class SwiftTestParserATN {
let jsonString: String = "{\"version\":3,\"uuid\":\"aadb8d7e-aeef-4415-ad2b-8204d6cf042e\",\"grammarType\":1,\"maxTokenType\":1,\"states\":[{\"stateType\":2,\"ruleIndex\":0},{\"stateType\":7,\"ruleIndex\":0},{\"stateType\":1,\"ruleIndex\":0},{\"stateType\":1,\"ruleIndex\":0},{\"stateType\":1,\"ruleIndex\":0},{\"stateType\":1,\"ruleIndex\":0}],\"nonGreedyStates\":[],\"precedenceStates\":[],\"ruleToStartState\":[{\"stateNumber\":0}],\"modeToStartState\":[],\"nsets\":0,\"IntervalSet\":[],\"allTransitionsBuilder\":[[{\"src\":0,\"trg\":2,\"edgeType\":1,\"arg1\":0,\"arg2\":0,\"arg3\":0}],[{\"src\":2,\"trg\":3,\"edgeType\":5,\"arg1\":1,\"arg2\":0,\"arg3\":0}],[{\"src\":3,\"trg\":4,\"edgeType\":5,\"arg1\":0,\"arg2\":0,\"arg3\":1}],[{\"src\":4,\"trg\":1,\"edgeType\":1,\"arg1\":0,\"arg2\":0,\"arg3\":0}],[]],\"decisionToState\":[],\"lexerActions\":[]}"
}

76
runtime/Swift/test.py Executable file
View File

@ -0,0 +1,76 @@
#! /usr/bin/python
"""
Find all .g4 files and generate parsers in the same directory.
the antlr used should be the one located at user's mvn directory
the filename is antlr4-ANTLR_VERSION-SNAPSHOT.jar. You can get it
by running: "mvn install"
NOTE: In case of customized location of .m2 folder, you can change the
USER_M2 constant below.
the java version is used according to environment variable $JAVA_HOME.
"""
import fnmatch
import os.path
from subprocess import call
ANTLR_VERSION = '4.7'
USER_M2 = os.path.expanduser('~') + '/.m2/'
ANTLR4_FOLDER = USER_M2 + 'repository/org/antlr/antlr4/' + ANTLR_VERSION + '-SNAPSHOT/'
ANTLR4_JAR = ANTLR4_FOLDER + 'antlr4-' + ANTLR_VERSION + '-SNAPSHOT-complete.jar'
def jar_exists():
"""
Finds the antlr4 jar.
"""
return os.path.exists(ANTLR4_JAR)
def find_g4():
"""
Find all g4 files and return a list of them.
The recursive search starts from the directory containing
this python file.
"""
file_path = os.path.realpath(__file__)
parent_folder = file_path[0:file_path.rindex('/')+1]
res = []
for cur, _, filenames in os.walk(parent_folder):
cur_files = fnmatch.filter(filenames, "*.g4")
res += [cur+'/'+cur_file for cur_file in cur_files]
return res
def gen_parser(grammar):
"""
Generate parser for the input g4 file.
"""
grammar_folder = grammar[0:grammar.rindex('/')+1]
java_home = os.environ['JAVA_HOME']
java = java_home + '/bin/java'
if not os.path.exists(java):
print 'Cannot find java. Check your JAVA_HOME setting.'
return
call([java, '-jar', ANTLR4_JAR,\
'-Dlanguage=Swift', grammar, '-o', grammar_folder + '/gen'])
def swift_test():
"""
Run unit tests.
"""
call(['swift', 'test'])
if __name__ == '__main__':
if not jar_exists():
print 'Run "mvn install" in antlr4 project root' + \
'first or check mvn settings'
exit()
_ = [gen_parser(f) for f in find_g4()]
swift_test()

View File

@ -21,7 +21,7 @@ public abstract class UnicodeData {
// initialization into one method per Unicode property
<propertyCodePointRanges.keys:{ k | // Unicode code points with property "<k>"
static private void addProperty<k>() {
static private void addProperty<i>() {
List\<Interval\> intervals = Arrays.asList(
<propertyCodePointRanges.(k).intervals:{ interval | Interval.of(<interval.a>, <interval.b>)}; separator=",\n">
);
@ -37,7 +37,7 @@ static private void addProperty<k>() {
// Put it all together
static {
<propertyCodePointRanges.keys:{ k | addProperty<k>(); }; separator="\n">
<propertyCodePointRanges.keys:{ k | addProperty<i>(); }; separator="\n">
addPropertyAliases();
}

View File

@ -74,14 +74,14 @@ public abstract class UnicodeDataTemplateController {
Map<String, IntervalSet> propertyCodePointRanges = new LinkedHashMap<>();
addUnicodeCategoryCodesToCodePointRanges(propertyCodePointRanges);
addUnicodeBinaryPropertyCodesToCodePointRanges(propertyCodePointRanges);
addUnicodeScriptCodesToCodePointRanges(propertyCodePointRanges);
addUnicodeBlocksToCodePointRanges(propertyCodePointRanges);
addUnicodeIntPropertyCodesToCodePointRanges(propertyCodePointRanges);
Map<String, String> propertyAliases = new LinkedHashMap<>();
addUnicodeCategoryCodesToNames(propertyAliases);
addUnicodeBinaryPropertyCodesToNames(propertyAliases);
addUnicodeScriptCodesToNames(propertyAliases);
addUnicodeBlocksToNames(propertyAliases);
addUnicodeIntPropertyCodesToNames(propertyAliases);
Map<String, Object> properties = new LinkedHashMap<>();
properties.put("propertyCodePointRanges", propertyCodePointRanges);
@ -191,20 +191,22 @@ public abstract class UnicodeDataTemplateController {
}
}
private static void addUnicodeScriptCodesToCodePointRanges(Map<String, IntervalSet> propertyCodePointRanges) {
addIntPropertyRanges(UProperty.SCRIPT, "", propertyCodePointRanges);
}
private static void addUnicodeBlocksToCodePointRanges(Map<String, IntervalSet> propertyCodePointRanges) {
addIntPropertyRanges(UProperty.BLOCK, "In", propertyCodePointRanges);
private static void addUnicodeIntPropertyCodesToCodePointRanges(Map<String, IntervalSet> propertyCodePointRanges) {
for (int property = UProperty.INT_START;
property < UProperty.INT_LIMIT;
property++) {
String propertyName = getShortPropertyName(property);
addIntPropertyRanges(property, propertyName + "=", propertyCodePointRanges);
}
}
private static void addIntPropertyAliases(int property, String namePrefix, Map<String, String> propertyAliases) {
String propertyName = getShortPropertyName(property);
for (int propertyValue = UCharacter.getIntPropertyMinValue(property);
propertyValue <= UCharacter.getIntPropertyMaxValue(property);
propertyValue++) {
String propertyName = namePrefix + UCharacter.getPropertyValueName(property, propertyValue, UProperty.NameChoice.SHORT);
int nameChoice = UProperty.NameChoice.LONG;
String aliasTarget = propertyName + "=" + UCharacter.getPropertyValueName(property, propertyValue, UProperty.NameChoice.SHORT);
int nameChoice = UProperty.NameChoice.SHORT;
String alias;
while (true) {
try {
@ -214,7 +216,7 @@ public abstract class UnicodeDataTemplateController {
break;
}
assert alias != null;
addPropertyAlias(propertyAliases, alias, propertyName);
addPropertyAlias(propertyAliases, alias, aliasTarget);
nameChoice++;
}
}
@ -227,4 +229,23 @@ public abstract class UnicodeDataTemplateController {
private static void addUnicodeBlocksToNames(Map<String, String> propertyAliases) {
addIntPropertyAliases(UProperty.BLOCK, "In", propertyAliases);
}
private static void addUnicodeIntPropertyCodesToNames(Map<String, String> propertyAliases) {
for (int property = UProperty.INT_START;
property < UProperty.INT_LIMIT;
property++) {
int nameChoice = UProperty.NameChoice.SHORT + 1;
while (true) {
String propertyNameAlias;
try {
propertyNameAlias = UCharacter.getPropertyName(property, nameChoice);
} catch (IllegalArgumentException e) {
// No more aliases.
break;
}
addIntPropertyAliases(property, propertyNameAlias + "=", propertyAliases);
nameChoice++;
}
}
}
}

View File

@ -100,6 +100,14 @@ public class TestUnicodeData {
assertTrue(UnicodeData.getPropertyCodePoints("Cyrl").contains(0x0404));
}
@Test
public void testUnicodeScriptEquals() {
assertTrue(UnicodeData.getPropertyCodePoints("Script=Zyyy").contains('0'));
assertTrue(UnicodeData.getPropertyCodePoints("Script=Latn").contains('X'));
assertTrue(UnicodeData.getPropertyCodePoints("Script=Hani").contains(0x4E04));
assertTrue(UnicodeData.getPropertyCodePoints("Script=Cyrl").contains(0x0404));
}
@Test
public void testUnicodeScriptAliases() {
assertTrue(UnicodeData.getPropertyCodePoints("Common").contains('0'));
@ -116,12 +124,48 @@ public class TestUnicodeData {
assertTrue(UnicodeData.getPropertyCodePoints("InMisc_Pictographs").contains(0x1F4A9));
}
@Test
public void testUnicodeBlockEquals() {
assertTrue(UnicodeData.getPropertyCodePoints("Block=ASCII").contains('0'));
assertTrue(UnicodeData.getPropertyCodePoints("Block=CJK").contains(0x4E04));
assertTrue(UnicodeData.getPropertyCodePoints("Block=Cyrillic").contains(0x0404));
assertTrue(UnicodeData.getPropertyCodePoints("Block=Misc_Pictographs").contains(0x1F4A9));
}
@Test
public void testUnicodeBlockAliases() {
assertTrue(UnicodeData.getPropertyCodePoints("InBasic_Latin").contains('0'));
assertTrue(UnicodeData.getPropertyCodePoints("InMiscellaneous_Mathematical_Symbols_B").contains(0x29BE));
}
@Test
public void testEnumeratedPropertyEquals() {
assertTrue(
"U+1F481 INFORMATION DESK PERSON is an emoji modifier base",
UnicodeData.getPropertyCodePoints("Grapheme_Cluster_Break=E_Base").contains(0x1F481));
assertFalse(
"U+1F47E ALIEN MONSTER is not an emoji modifier",
UnicodeData.getPropertyCodePoints("Grapheme_Cluster_Break=E_Base").contains(0x1F47E));
assertTrue(
"U+0E33 THAI CHARACTER SARA AM is a spacing mark",
UnicodeData.getPropertyCodePoints("Grapheme_Cluster_Break=E_Base").contains(0x1F481));
assertFalse(
"U+1038 MYANMAR SIGN VISARGA is not a spacing mark",
UnicodeData.getPropertyCodePoints("Grapheme_Cluster_Break=E_Base").contains(0x1038));
assertTrue(
"U+00A1 INVERTED EXCLAMATION MARK has ambiguous East Asian Width",
UnicodeData.getPropertyCodePoints("East_Asian_Width=Ambiguous").contains(0x00A1));
assertFalse(
"U+00A2 CENT SIGN does not have ambiguous East Asian Width",
UnicodeData.getPropertyCodePoints("East_Asian_Width=Ambiguous").contains(0x00A2));
}
@Test
public void testPropertyCaseInsensitivity() {
assertTrue(UnicodeData.getPropertyCodePoints("l").contains('x'));

View File

@ -18,6 +18,8 @@ import java.util.Objects;
* \\u{10ABCD}
* \\p{Foo}
* \\P{Bar}
* \\p{Baz=Blech}
* \\P{Baz=Blech}
*/
public abstract class EscapeSequenceParsing {
public static class Result {