Updated TestPerformance and associated code for the latest release

This commit is contained in:
Sam Harwell 2014-07-01 17:28:30 -05:00
parent 2cdc0537fa
commit 63053efd2a
11 changed files with 1458 additions and 373 deletions

View File

@ -26,7 +26,7 @@
<DebugType>full</DebugType> <DebugType>full</DebugType>
<Optimize>false</Optimize> <Optimize>false</Optimize>
<OutputPath>bin\Portable\Debug\</OutputPath> <OutputPath>bin\Portable\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE;NET_3_5;NET_4_0</DefineConstants> <DefineConstants>DEBUG;TRACE;PORTABLE;NET40;NET40PLUS;NET35PLUS;NET30PLUS;NET20PLUS</DefineConstants>
<ErrorReport>prompt</ErrorReport> <ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel> <WarningLevel>4</WarningLevel>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks> <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
@ -35,7 +35,7 @@
<DebugType>pdbonly</DebugType> <DebugType>pdbonly</DebugType>
<Optimize>true</Optimize> <Optimize>true</Optimize>
<OutputPath>bin\Portable\Release\</OutputPath> <OutputPath>bin\Portable\Release\</OutputPath>
<DefineConstants>TRACE;NET_3_5;NET_4_0</DefineConstants> <DefineConstants>TRACE;PORTABLE;NET40;NET40PLUS;NET35PLUS;NET30PLUS;NET20PLUS</DefineConstants>
<ErrorReport>prompt</ErrorReport> <ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel> <WarningLevel>4</WarningLevel>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks> <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
@ -57,6 +57,7 @@
</Choose> </Choose>
<ItemGroup> <ItemGroup>
<Compile Include="BaseTest.cs" /> <Compile Include="BaseTest.cs" />
<Compile Include="JavaUnicodeInputStream.cs" />
<Compile Include="Properties\AssemblyInfo.cs" /> <Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="Sharpen\Checksum.cs" /> <Compile Include="Sharpen\Checksum.cs" />
<Compile Include="Sharpen\CRC32.cs" /> <Compile Include="Sharpen\CRC32.cs" />

View File

@ -57,6 +57,7 @@
</Choose> </Choose>
<ItemGroup> <ItemGroup>
<Compile Include="BaseTest.cs" /> <Compile Include="BaseTest.cs" />
<Compile Include="JavaUnicodeInputStream.cs" />
<Compile Include="Properties\AssemblyInfo.cs" /> <Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="Sharpen\Checksum.cs" /> <Compile Include="Sharpen\Checksum.cs" />
<Compile Include="Sharpen\CRC32.cs" /> <Compile Include="Sharpen\CRC32.cs" />

View File

@ -57,6 +57,7 @@
</Choose> </Choose>
<ItemGroup> <ItemGroup>
<Compile Include="BaseTest.cs" /> <Compile Include="BaseTest.cs" />
<Compile Include="JavaUnicodeInputStream.cs" />
<Compile Include="Properties\AssemblyInfo.cs" /> <Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="Sharpen\Checksum.cs" /> <Compile Include="Sharpen\Checksum.cs" />
<Compile Include="Sharpen\CRC32.cs" /> <Compile Include="Sharpen\CRC32.cs" />

View File

@ -57,6 +57,7 @@
</Choose> </Choose>
<ItemGroup> <ItemGroup>
<Compile Include="BaseTest.cs" /> <Compile Include="BaseTest.cs" />
<Compile Include="JavaUnicodeInputStream.cs" />
<Compile Include="Properties\AssemblyInfo.cs" /> <Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="Sharpen\Checksum.cs" /> <Compile Include="Sharpen\Checksum.cs" />
<Compile Include="Sharpen\CRC32.cs" /> <Compile Include="Sharpen\CRC32.cs" />

View File

@ -57,6 +57,7 @@
</Choose> </Choose>
<ItemGroup> <ItemGroup>
<Compile Include="BaseTest.cs" /> <Compile Include="BaseTest.cs" />
<Compile Include="JavaUnicodeInputStream.cs" />
<Compile Include="Properties\AssemblyInfo.cs" /> <Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="Sharpen\Checksum.cs" /> <Compile Include="Sharpen\Checksum.cs" />
<Compile Include="Sharpen\CRC32.cs" /> <Compile Include="Sharpen\CRC32.cs" />

View File

@ -57,6 +57,7 @@
</Choose> </Choose>
<ItemGroup> <ItemGroup>
<Compile Include="BaseTest.cs" /> <Compile Include="BaseTest.cs" />
<Compile Include="JavaUnicodeInputStream.cs" />
<Compile Include="Properties\AssemblyInfo.cs" /> <Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="Sharpen\Checksum.cs" /> <Compile Include="Sharpen\Checksum.cs" />
<Compile Include="Sharpen\CRC32.cs" /> <Compile Include="Sharpen\CRC32.cs" />

View File

@ -268,11 +268,11 @@
string compiler = PathCombine(JavaHome, "bin", "java.exe"); string compiler = PathCombine(JavaHome, "bin", "java.exe");
List<string> classpath = new List<string>(); List<string> classpath = new List<string>();
classpath.Add(GetMavenArtifact("com.tunnelvisionlabs", "antlr4-csharp", "4.0.1-SNAPSHOT")); classpath.Add(GetMavenArtifact("com.tunnelvisionlabs", "antlr4-csharp", "4.3-SNAPSHOT"));
classpath.Add(GetMavenArtifact("com.tunnelvisionlabs", "antlr4-runtime", "4.0.1-SNAPSHOT")); classpath.Add(GetMavenArtifact("com.tunnelvisionlabs", "antlr4-runtime", "4.3"));
classpath.Add(GetMavenArtifact("com.tunnelvisionlabs", "antlr4", "4.0.1-SNAPSHOT")); classpath.Add(GetMavenArtifact("com.tunnelvisionlabs", "antlr4", "4.3"));
classpath.Add(GetMavenArtifact("org.antlr", "antlr-runtime", "3.5")); classpath.Add(GetMavenArtifact("org.antlr", "antlr-runtime", "3.5.2"));
classpath.Add(GetMavenArtifact("org.antlr", "ST4", "4.0.7")); classpath.Add(GetMavenArtifact("org.antlr", "ST4", "4.0.8"));
List<string> options = new List<string>(); List<string> options = new List<string>();
options.Add("-cp"); options.Add("-cp");
@ -284,7 +284,23 @@
options.Add(tmpdir); options.Add(tmpdir);
options.Add("-lib"); options.Add("-lib");
options.Add(tmpdir); options.Add(tmpdir);
options.Add("-Dlanguage=CSharp");
#if PORTABLE
options.Add("-Dlanguage=CSharp_v4_5");
#elif NET45
options.Add("-Dlanguage=CSharp_v4_5");
#elif NET40
options.Add("-Dlanguage=CSharp_v4_0");
#elif NET35
options.Add("-Dlanguage=CSharp_v3_5");
#elif NET30
options.Add("-Dlanguage=CSharp_v3_0");
#elif NET20
options.Add("-Dlanguage=CSharp_v2_0");
#else
#error Unknown assembly.
#endif
options.Add(grammarFileName); options.Add(grammarFileName);
System.Diagnostics.Process process = System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(compiler, '"' + Utils.Join("\" \"", options) + '"') System.Diagnostics.Process process = System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(compiler, '"' + Utils.Join("\" \"", options) + '"')

View File

@ -171,14 +171,48 @@
grammar Java; grammar Java;
@lexer::members { @lexer::members {
protected const int EOF = Eof;
protected const int HIDDEN = Hidden; private static bool IsJavaIdentifierCharacter(char c, bool start)
protected bool enumIsKeyword = true; {
protected bool assertIsKeyword = true; switch (char.GetUnicodeCategory(c))
{
case System.Globalization.UnicodeCategory.UppercaseLetter:
case System.Globalization.UnicodeCategory.LowercaseLetter:
case System.Globalization.UnicodeCategory.TitlecaseLetter:
case System.Globalization.UnicodeCategory.ModifierLetter:
case System.Globalization.UnicodeCategory.OtherLetter:
// isLetter returns true
return true;
case System.Globalization.UnicodeCategory.LetterNumber:
// getType returns LETTER_NUMBER
return true;
case System.Globalization.UnicodeCategory.CurrencySymbol:
// a currency symbol (such as "$")
return true;
case System.Globalization.UnicodeCategory.ConnectorPunctuation:
// a connecting punctuation character (such as "_")
return true;
case System.Globalization.UnicodeCategory.DecimalDigitNumber:
// it is a digit
return !start;
case System.Globalization.UnicodeCategory.SpacingCombiningMark:
// it is a combining mark
return !start;
case System.Globalization.UnicodeCategory.NonSpacingMark:
// it is a non-spacing mark
return !start;
default:
return false;
}
} }
@parser::members {
protected const int EOF = Eof;
} }
// starting point for parsing a java file // starting point for parsing a java file
@ -216,14 +250,15 @@ classOrInterfaceModifiers
; ;
classOrInterfaceModifier classOrInterfaceModifier
: annotation // class or interface : annotation // class or interface
| 'public' // class or interface | ( 'public' // class or interface
| 'protected' // class or interface | 'protected' // class or interface
| 'private' // class or interface | 'private' // class or interface
| 'abstract' // class or interface | 'abstract' // class or interface
| 'static' // class or interface | 'static' // class or interface
| 'final' // class only -- does not apply to interfaces | 'final' // class only -- does not apply to interfaces
| 'strictfp' // class or interface | 'strictfp' // class or interface
)
; ;
modifiers modifiers
@ -420,17 +455,18 @@ arrayInitializer
modifier modifier
: annotation : annotation
| 'public' | ( 'public'
| 'protected' | 'protected'
| 'private' | 'private'
| 'static' | 'static'
| 'abstract' | 'abstract'
| 'final' | 'final'
| 'native' | 'native'
| 'synchronized' | 'synchronized'
| 'transient' | 'transient'
| 'volatile' | 'volatile'
| 'strictfp' | 'strictfp'
)
; ;
packageOrTypeName packageOrTypeName
@ -501,39 +537,22 @@ methodBody
; ;
constructorBody constructorBody
: '{' explicitConstructorInvocation? blockStatement* '}' : block
; ;
explicitConstructorInvocation
: nonWildcardTypeArguments? ('this' | 'super') arguments ';'
| primary '.' nonWildcardTypeArguments? 'super' arguments ';'
;
qualifiedName qualifiedName
: Identifier ('.' Identifier)* : Identifier ('.' Identifier)*
; ;
literal literal
: integerLiteral : IntegerLiteral
| FloatingPointLiteral | FloatingPointLiteral
| CharacterLiteral | CharacterLiteral
| StringLiteral | StringLiteral
| booleanLiteral | BooleanLiteral
| 'null' | 'null'
; ;
integerLiteral
: HexLiteral
| OctalLiteral
| DecimalLiteral
;
booleanLiteral
: 'true'
| 'false'
;
// ANNOTATIONS // ANNOTATIONS
annotations annotations
@ -576,6 +595,7 @@ annotationTypeBody
annotationTypeElementDeclaration annotationTypeElementDeclaration
: modifiers annotationTypeElementRest : modifiers annotationTypeElementRest
| ';' // this is not allowed by the grammar, but apparently allowed by the actual compiler
; ;
annotationTypeElementRest annotationTypeElementRest
@ -628,18 +648,14 @@ variableModifiers
; ;
statement statement
@leftfactor{catches}
: block : block
| ASSERT expression (':' expression)? ';' | ASSERT expression (':' expression)? ';'
| 'if' parExpression statement ('else' statement)? | 'if' parExpression statement ('else' statement)?
| 'for' '(' forControl ')' statement | 'for' '(' forControl ')' statement
| 'while' parExpression statement | 'while' parExpression statement
| 'do' statement 'while' parExpression ';' | 'do' statement 'while' parExpression ';'
| 'try' block | 'try' block (catches finallyBlock? | finallyBlock)
( catches 'finally' block | 'try' resourceSpecification block catches? finallyBlock?
| catches
| 'finally' block
)
| 'switch' parExpression '{' switchBlockStatementGroups '}' | 'switch' parExpression '{' switchBlockStatementGroups '}'
| 'synchronized' parExpression block | 'synchronized' parExpression block
| 'return' expression? ';' | 'return' expression? ';'
@ -650,15 +666,35 @@ statement
| statementExpression ';' | statementExpression ';'
| Identifier ':' statement | Identifier ':' statement
; ;
catches catches
: catchClause (catchClause)* : catchClause+
; ;
catchClause catchClause
: 'catch' '(' formalParameter ')' block : 'catch' '(' variableModifiers catchType Identifier ')' block
; ;
catchType
: qualifiedName ('|' qualifiedName)*
;
finallyBlock
: 'finally' block
;
resourceSpecification
: '(' resources ';'? ')'
;
resources
: resource (';' resource)*
;
resource
: variableModifiers classOrInterfaceType variableDeclaratorId '=' expression
;
formalParameter formalParameter
: variableModifiers type variableDeclaratorId : variableModifiers type variableDeclaratorId
; ;
@ -721,9 +757,8 @@ expression
: primary : primary
| expression '.' Identifier | expression '.' Identifier
| expression '.' 'this' | expression '.' 'this'
| expression '.' 'super' '(' expressionList? ')' | expression '.' 'new' nonWildcardTypeArguments? innerCreator
| expression '.' 'new' Identifier '(' expressionList? ')' | expression '.' 'super' superSuffix
| expression '.' 'super' '.' Identifier arguments?
| expression '.' explicitGenericInvocation | expression '.' explicitGenericInvocation
| 'new' creator | 'new' creator
| expression '[' expression ']' | expression '[' expression ']'
@ -735,28 +770,28 @@ expression
| expression ('*'|'/'|'%') expression | expression ('*'|'/'|'%') expression
| expression ('+'|'-') expression | expression ('+'|'-') expression
| expression ('<' '<' | '>' '>' '>' | '>' '>') expression | expression ('<' '<' | '>' '>' '>' | '>' '>') expression
| expression ('<' '=' | '>' '=' | '>' | '<') expression | expression ('<=' | '>=' | '>' | '<') expression
| expression 'instanceof' type | expression 'instanceof' type
| expression ('==' | '!=') expression | expression ('==' | '!=') expression
| expression '&' expression | expression '&' expression
| expression '^'<assoc=right> expression | expression '^' expression
| expression '|' expression | expression '|' expression
| expression '&&' expression | expression '&&' expression
| expression '||' expression | expression '||' expression
| expression '?' expression ':' expression | expression '?' expression ':' expression
| expression |<assoc=right> expression
('='<assoc=right> ( '='
| '+='<assoc=right> | '+='
| '-='<assoc=right> | '-='
| '*='<assoc=right> | '*='
| '/='<assoc=right> | '/='
| '&='<assoc=right> | '&='
| '|='<assoc=right> | '|='
| '^='<assoc=right> | '^='
| '>' '>' '='<assoc=right> | '>>='
| '>' '>' '>' '='<assoc=right> | '>>>='
| '<' '<' '='<assoc=right> | '<<='
| '%='<assoc=right> | '%='
) )
expression expression
; ;
@ -769,6 +804,7 @@ primary
| Identifier | Identifier
| type '.' 'class' | type '.' 'class'
| 'void' '.' 'class' | 'void' '.' 'class'
| nonWildcardTypeArguments (explicitGenericInvocationSuffix | 'this' arguments)
; ;
creator creator
@ -777,12 +813,12 @@ creator
; ;
createdName createdName
: classOrInterfaceType : Identifier typeArgumentsOrDiamond? ('.' Identifier typeArgumentsOrDiamond?)*
| primitiveType | primitiveType
; ;
innerCreator innerCreator
: nonWildcardTypeArguments? Identifier classCreatorRest : Identifier nonWildcardTypeArgumentsOrDiamond? classCreatorRest
; ;
arrayCreatorRest arrayCreatorRest
@ -797,148 +833,461 @@ classCreatorRest
; ;
explicitGenericInvocation explicitGenericInvocation
: nonWildcardTypeArguments Identifier arguments : nonWildcardTypeArguments explicitGenericInvocationSuffix
; ;
nonWildcardTypeArguments nonWildcardTypeArguments
: '<' typeList '>' : '<' typeList '>'
; ;
selector typeArgumentsOrDiamond
: '.' Identifier arguments? : '<' '>'
| '.' 'this' | typeArguments
| '.' 'super' superSuffix ;
| '.' 'new' innerCreator
| '[' expression ']' nonWildcardTypeArgumentsOrDiamond
; : '<' '>'
| nonWildcardTypeArguments
;
superSuffix superSuffix
: arguments : arguments
| '.' Identifier arguments? | '.' Identifier arguments?
; ;
explicitGenericInvocationSuffix
: 'super' superSuffix
| Identifier arguments
;
arguments arguments
: '(' expressionList? ')' : '(' expressionList? ')'
; ;
// LEXER // LEXER
HexLiteral : '0' ('x'|'X') HexDigit+ IntegerTypeSuffix? ; // §3.9 Keywords
DecimalLiteral : ('0' | '1'..'9' '0'..'9'*) IntegerTypeSuffix? ; ABSTRACT : 'abstract';
ASSERT : 'assert';
BOOLEAN : 'boolean';
BREAK : 'break';
BYTE : 'byte';
CASE : 'case';
CATCH : 'catch';
CHAR : 'char';
CLASS : 'class';
CONST : 'const';
CONTINUE : 'continue';
DEFAULT : 'default';
DO : 'do';
DOUBLE : 'double';
ELSE : 'else';
ENUM : 'enum';
EXTENDS : 'extends';
FINAL : 'final';
FINALLY : 'finally';
FLOAT : 'float';
FOR : 'for';
IF : 'if';
GOTO : 'goto';
IMPLEMENTS : 'implements';
IMPORT : 'import';
INSTANCEOF : 'instanceof';
INT : 'int';
INTERFACE : 'interface';
LONG : 'long';
NATIVE : 'native';
NEW : 'new';
PACKAGE : 'package';
PRIVATE : 'private';
PROTECTED : 'protected';
PUBLIC : 'public';
RETURN : 'return';
SHORT : 'short';
STATIC : 'static';
STRICTFP : 'strictfp';
SUPER : 'super';
SWITCH : 'switch';
SYNCHRONIZED : 'synchronized';
THIS : 'this';
THROW : 'throw';
THROWS : 'throws';
TRANSIENT : 'transient';
TRY : 'try';
VOID : 'void';
VOLATILE : 'volatile';
WHILE : 'while';
OctalLiteral : '0' ('0'..'7')+ IntegerTypeSuffix? ; // §3.10.1 Integer Literals
IntegerLiteral
: DecimalIntegerLiteral
| HexIntegerLiteral
| OctalIntegerLiteral
| BinaryIntegerLiteral
;
fragment fragment
HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ; DecimalIntegerLiteral
: DecimalNumeral IntegerTypeSuffix?
;
fragment fragment
IntegerTypeSuffix : ('l'|'L') ; HexIntegerLiteral
: HexNumeral IntegerTypeSuffix?
;
fragment
OctalIntegerLiteral
: OctalNumeral IntegerTypeSuffix?
;
fragment
BinaryIntegerLiteral
: BinaryNumeral IntegerTypeSuffix?
;
fragment
IntegerTypeSuffix
: [lL]
;
fragment
DecimalNumeral
: '0'
| NonZeroDigit (Digits? | Underscores Digits)
;
fragment
Digits
: Digit (DigitsAndUnderscores? Digit)?
;
fragment
Digit
: '0'
| NonZeroDigit
;
fragment
NonZeroDigit
: [1-9]
;
fragment
DigitsAndUnderscores
: DigitOrUnderscore+
;
fragment
DigitOrUnderscore
: Digit
| '_'
;
fragment
Underscores
: '_'+
;
fragment
HexNumeral
: '0' [xX] HexDigits
;
fragment
HexDigits
: HexDigit (HexDigitsAndUnderscores? HexDigit)?
;
fragment
HexDigit
: [0-9a-fA-F]
;
fragment
HexDigitsAndUnderscores
: HexDigitOrUnderscore+
;
fragment
HexDigitOrUnderscore
: HexDigit
| '_'
;
fragment
OctalNumeral
: '0' Underscores? OctalDigits
;
fragment
OctalDigits
: OctalDigit (OctalDigitsAndUnderscores? OctalDigit)?
;
fragment
OctalDigit
: [0-7]
;
fragment
OctalDigitsAndUnderscores
: OctalDigitOrUnderscore+
;
fragment
OctalDigitOrUnderscore
: OctalDigit
| '_'
;
fragment
BinaryNumeral
: '0' [bB] BinaryDigits
;
fragment
BinaryDigits
: BinaryDigit (BinaryDigitsAndUnderscores? BinaryDigit)?
;
fragment
BinaryDigit
: [01]
;
fragment
BinaryDigitsAndUnderscores
: BinaryDigitOrUnderscore+
;
fragment
BinaryDigitOrUnderscore
: BinaryDigit
| '_'
;
// §3.10.2 Floating-Point Literals
FloatingPointLiteral FloatingPointLiteral
: ('0'..'9')+ '.' ('0'..'9')* Exponent? FloatTypeSuffix? : DecimalFloatingPointLiteral
| '.' ('0'..'9')+ Exponent? FloatTypeSuffix? | HexadecimalFloatingPointLiteral
| ('0'..'9')+ Exponent FloatTypeSuffix? ;
| ('0'..'9')+ FloatTypeSuffix
| '0' ('x'|'X')
( HexDigit+ ('.' HexDigit*)? HexExponent FloatTypeSuffix?
| '.' HexDigit+ HexExponent FloatTypeSuffix?
)
;
fragment fragment
Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ; DecimalFloatingPointLiteral
: Digits '.' Digits? ExponentPart? FloatTypeSuffix?
| '.' Digits ExponentPart? FloatTypeSuffix?
| Digits ExponentPart FloatTypeSuffix?
| Digits FloatTypeSuffix
;
fragment fragment
HexExponent : ('p'|'P') ('+'|'-')? ('0'..'9')+ ; ExponentPart
: ExponentIndicator SignedInteger
;
fragment fragment
FloatTypeSuffix : ('f'|'F'|'d'|'D') ; ExponentIndicator
: [eE]
;
fragment
SignedInteger
: Sign? Digits
;
fragment
Sign
: [+-]
;
fragment
FloatTypeSuffix
: [fFdD]
;
fragment
HexadecimalFloatingPointLiteral
: HexSignificand BinaryExponent FloatTypeSuffix?
;
fragment
HexSignificand
: HexNumeral '.'?
| '0' [xX] HexDigits? '.' HexDigits
;
fragment
BinaryExponent
: BinaryExponentIndicator SignedInteger
;
fragment
BinaryExponentIndicator
: [pP]
;
// §3.10.3 Boolean Literals
BooleanLiteral
: 'true'
| 'false'
;
// §3.10.4 Character Literals
CharacterLiteral CharacterLiteral
: '\'' ( EscapeSequence | ~('\''|'\\') ) '\'' : '\'' SingleCharacter '\''
; | '\'' EscapeSequence '\''
;
fragment
SingleCharacter
: ~['\\]
;
// §3.10.5 String Literals
StringLiteral StringLiteral
: '"' ( EscapeSequence | ~('\\'|'"') )* '"' : '"' StringCharacters? '"'
; ;
fragment
StringCharacters
: StringCharacter+
;
fragment
StringCharacter
: ~["\\]
| EscapeSequence
;
// §3.10.6 Escape Sequences for Character and String Literals
fragment fragment
EscapeSequence EscapeSequence
: '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\') : '\\' [btnfr"'\\]
| UnicodeEscape | OctalEscape
| OctalEscape ;
;
fragment fragment
OctalEscape OctalEscape
: '\\' ('0'..'3') ('0'..'7') ('0'..'7') : '\\' OctalDigit
| '\\' ('0'..'7') ('0'..'7') | '\\' OctalDigit OctalDigit
| '\\' ('0'..'7') | '\\' ZeroToThree OctalDigit OctalDigit
; ;
fragment fragment
UnicodeEscape ZeroToThree
: '\\' 'u' HexDigit HexDigit HexDigit HexDigit : [0-3]
; ;
ENUM: 'enum' {enumIsKeyword}? // §3.10.7 The Null Literal
;
ASSERT
: 'assert' {assertIsKeyword}?
;
Identifier
: Letter (Letter|JavaIDDigit)*
;
/**I found this char range in JavaCC's grammar, but Letter and Digit overlap. NullLiteral
Still works, but... : 'null'
*/ ;
fragment
Letter // §3.11 Separators
: '\u0024' |
'\u0041'..'\u005a' | LPAREN : '(';
'\u005f' | RPAREN : ')';
'\u0061'..'\u007a' | LBRACE : '{';
'\u00c0'..'\u00d6' | RBRACE : '}';
'\u00d8'..'\u00f6' | LBRACK : '[';
'\u00f8'..'\u00ff' | RBRACK : ']';
'\u0100'..'\u1fff' | SEMI : ';';
'\u3040'..'\u318f' | COMMA : ',';
'\u3300'..'\u337f' | DOT : '.';
'\u3400'..'\u3d2d' |
'\u4e00'..'\u9fff' | // §3.12 Operators
'\uf900'..'\ufaff'
; ASSIGN : '=';
GT : '>';
LT : '<';
BANG : '!';
TILDE : '~';
QUESTION : '?';
COLON : ':';
EQUAL : '==';
LE : '<=';
GE : '>=';
NOTEQUAL : '!=';
AND : '&&';
OR : '||';
INC : '++';
DEC : '--';
ADD : '+';
SUB : '-';
MUL : '*';
DIV : '/';
BITAND : '&';
BITOR : '|';
CARET : '^';
MOD : '%';
ADD_ASSIGN : '+=';
SUB_ASSIGN : '-=';
MUL_ASSIGN : '*=';
DIV_ASSIGN : '/=';
AND_ASSIGN : '&=';
OR_ASSIGN : '|=';
XOR_ASSIGN : '^=';
MOD_ASSIGN : '%=';
LSHIFT_ASSIGN : '<<=';
RSHIFT_ASSIGN : '>>=';
URSHIFT_ASSIGN : '>>>=';
// §3.8 Identifiers (must appear after all keywords in the grammar)
Identifier
: JavaLetter JavaLetterOrDigit*
;
fragment fragment
JavaIDDigit JavaLetter
: '\u0030'..'\u0039' | : [a-zA-Z$_] // these are the "java letters" below 0xFF
'\u0660'..'\u0669' | | // covers all characters above 0xFF which are not a surrogate
'\u06f0'..'\u06f9' | ~[\u0000-\u00FF\uD800-\uDBFF]
'\u0966'..'\u096f' | {IsJavaIdentifierCharacter((char)_input.La(-1), true)}?
'\u09e6'..'\u09ef' | //| // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
'\u0a66'..'\u0a6f' | // [\uD800-\uDBFF] [\uDC00-\uDFFF]
'\u0ae6'..'\u0aef' | // {Character.isJavaIdentifierStart(Character.toCodePoint((char)_input.La(-2), (char)_input.La(-1)))}?
'\u0b66'..'\u0b6f' | ;
'\u0be7'..'\u0bef' |
'\u0c66'..'\u0c6f' |
'\u0ce6'..'\u0cef' |
'\u0d66'..'\u0d6f' |
'\u0e50'..'\u0e59' |
'\u0ed0'..'\u0ed9' |
'\u1040'..'\u1049'
;
WS : (' '|'\r'|'\t'|'\u000C'|'\n')+ -> channel(HIDDEN) fragment
JavaLetterOrDigit
: [a-zA-Z0-9$_] // these are the "java letters or digits" below 0xFF
| // covers all characters above 0xFF which are not a surrogate
~[\u0000-\u00FF\uD800-\uDBFF]
{IsJavaIdentifierCharacter((char)_input.La(-1), false)}?
//| // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
// [\uD800-\uDBFF] [\uDC00-\uDFFF]
// {Character.isJavaIdentifierPart(Character.toCodePoint((char)_input.La(-2), (char)_input.La(-1)))}?
;
//
// Additional symbols not defined in the lexical specification
//
AT : '@';
ELLIPSIS : '...';
//
// Whitespace and comments
//
WS : [ \t\r\n\u000C]+ -> skip
; ;
COMMENT COMMENT
: '/*' .*? '*/' -> channel(HIDDEN) : '/*' .*? '*/' -> skip
; ;
LINE_COMMENT LINE_COMMENT
: '//' ~('\n'|'\r')* '\r'? '\n' -> channel(HIDDEN) : '//' ~[\r\n]* -> skip
; ;

View File

@ -168,14 +168,48 @@
grammar Java; grammar Java;
@lexer::members { @lexer::members {
protected const int EOF = Eof;
protected const int HIDDEN = Hidden; private static bool IsJavaIdentifierCharacter(char c, bool start)
protected bool enumIsKeyword = true; {
protected bool assertIsKeyword = true; switch (char.GetUnicodeCategory(c))
{
case System.Globalization.UnicodeCategory.UppercaseLetter:
case System.Globalization.UnicodeCategory.LowercaseLetter:
case System.Globalization.UnicodeCategory.TitlecaseLetter:
case System.Globalization.UnicodeCategory.ModifierLetter:
case System.Globalization.UnicodeCategory.OtherLetter:
// isLetter returns true
return true;
case System.Globalization.UnicodeCategory.LetterNumber:
// getType returns LETTER_NUMBER
return true;
case System.Globalization.UnicodeCategory.CurrencySymbol:
// a currency symbol (such as "$")
return true;
case System.Globalization.UnicodeCategory.ConnectorPunctuation:
// a connecting punctuation character (such as "_")
return true;
case System.Globalization.UnicodeCategory.DecimalDigitNumber:
// it is a digit
return !start;
case System.Globalization.UnicodeCategory.SpacingCombiningMark:
// it is a combining mark
return !start;
case System.Globalization.UnicodeCategory.NonSpacingMark:
// it is a non-spacing mark
return !start;
default:
return false;
}
} }
@parser::members {
protected const int EOF = Eof;
} }
// starting point for parsing a java file // starting point for parsing a java file
@ -213,14 +247,15 @@ classOrInterfaceModifiers
; ;
classOrInterfaceModifier classOrInterfaceModifier
: annotation // class or interface : annotation // class or interface
| 'public' // class or interface | ( 'public' // class or interface
| 'protected' // class or interface | 'protected' // class or interface
| 'private' // class or interface | 'private' // class or interface
| 'abstract' // class or interface | 'abstract' // class or interface
| 'static' // class or interface | 'static' // class or interface
| 'final' // class only -- does not apply to interfaces | 'final' // class only -- does not apply to interfaces
| 'strictfp' // class or interface | 'strictfp' // class or interface
)
; ;
modifiers modifiers
@ -417,17 +452,18 @@ arrayInitializer
modifier modifier
: annotation : annotation
| 'public' | ( 'public'
| 'protected' | 'protected'
| 'private' | 'private'
| 'static' | 'static'
| 'abstract' | 'abstract'
| 'final' | 'final'
| 'native' | 'native'
| 'synchronized' | 'synchronized'
| 'transient' | 'transient'
| 'volatile' | 'volatile'
| 'strictfp' | 'strictfp'
)
; ;
packageOrTypeName packageOrTypeName
@ -498,39 +534,22 @@ methodBody
; ;
constructorBody constructorBody
: '{' explicitConstructorInvocation? blockStatement* '}' : block
; ;
explicitConstructorInvocation
: nonWildcardTypeArguments? ('this' | 'super') arguments ';'
| primary '.' nonWildcardTypeArguments? 'super' arguments ';'
;
qualifiedName qualifiedName
: Identifier ('.' Identifier)* : Identifier ('.' Identifier)*
; ;
literal literal
: integerLiteral : IntegerLiteral
| FloatingPointLiteral | FloatingPointLiteral
| CharacterLiteral | CharacterLiteral
| StringLiteral | StringLiteral
| booleanLiteral | BooleanLiteral
| 'null' | 'null'
; ;
integerLiteral
: HexLiteral
| OctalLiteral
| DecimalLiteral
;
booleanLiteral
: 'true'
| 'false'
;
// ANNOTATIONS // ANNOTATIONS
annotations annotations
@ -573,6 +592,7 @@ annotationTypeBody
annotationTypeElementDeclaration annotationTypeElementDeclaration
: modifiers annotationTypeElementRest : modifiers annotationTypeElementRest
| ';' // this is not allowed by the grammar, but apparently allowed by the actual compiler
; ;
annotationTypeElementRest annotationTypeElementRest
@ -625,18 +645,14 @@ variableModifiers
; ;
statement statement
//@leftfactor{catches} : block
: block
| ASSERT expression (':' expression)? ';' | ASSERT expression (':' expression)? ';'
| 'if' parExpression statement ('else' statement)? | 'if' parExpression statement ('else' statement)?
| 'for' '(' forControl ')' statement | 'for' '(' forControl ')' statement
| 'while' parExpression statement | 'while' parExpression statement
| 'do' statement 'while' parExpression ';' | 'do' statement 'while' parExpression ';'
| 'try' block | 'try' block (catches finallyBlock? | finallyBlock)
( catches 'finally' block | 'try' resourceSpecification block catches? finallyBlock?
| catches
| 'finally' block
)
| 'switch' parExpression '{' switchBlockStatementGroups '}' | 'switch' parExpression '{' switchBlockStatementGroups '}'
| 'synchronized' parExpression block | 'synchronized' parExpression block
| 'return' expression? ';' | 'return' expression? ';'
@ -647,15 +663,35 @@ statement
| statementExpression ';' | statementExpression ';'
| Identifier ':' statement | Identifier ':' statement
; ;
catches catches
: catchClause (catchClause)* : catchClause+
; ;
catchClause catchClause
: 'catch' '(' formalParameter ')' block : 'catch' '(' variableModifiers catchType Identifier ')' block
; ;
catchType
: qualifiedName ('|' qualifiedName)*
;
finallyBlock
: 'finally' block
;
resourceSpecification
: '(' resources ';'? ')'
;
resources
: resource (';' resource)*
;
resource
: variableModifiers classOrInterfaceType variableDeclaratorId '=' expression
;
formalParameter formalParameter
: variableModifiers type variableDeclaratorId : variableModifiers type variableDeclaratorId
; ;
@ -728,27 +764,13 @@ assignmentOperator
| '|=' | '|='
| '^=' | '^='
| '%=' | '%='
| t1='<' t2='<' t3='=' | '<<='
// { $t1.getLine() == $t2.getLine() && | '>>='
// $t1.getCharPositionInLine() + 1 == $t2.getCharPositionInLine() && | '>>>='
// $t2.getLine() == $t3.getLine() &&
// $t2.getCharPositionInLine() + 1 == $t3.getCharPositionInLine() }?
| t1='>' t2='>' t3='>' t4='='
// { $t1.getLine() == $t2.getLine() &&
// $t1.getCharPositionInLine() + 1 == $t2.getCharPositionInLine() &&
// $t2.getLine() == $t3.getLine() &&
// $t2.getCharPositionInLine() + 1 == $t3.getCharPositionInLine() &&
// $t3.getLine() == $t4.getLine() &&
// $t3.getCharPositionInLine() + 1 == $t4.getCharPositionInLine() }?
| t1='>' t2='>' t3='='
// { $t1.getLine() == $t2.getLine() &&
// $t1.getCharPositionInLine() + 1 == $t2.getCharPositionInLine() &&
// $t2.getLine() == $t3.getLine() &&
// $t2.getCharPositionInLine() + 1 == $t3.getCharPositionInLine() }?
; ;
conditionalExpression conditionalExpression
: conditionalOrExpression ( '?' conditionalExpression ':' conditionalExpression )? : conditionalOrExpression ( '?' expression ':' conditionalExpression )?
; ;
conditionalOrExpression conditionalOrExpression
@ -784,14 +806,10 @@ relationalExpression
; ;
relationalOp relationalOp
: t1='<' t2='=' : '<='
// { $t1.getLine() == $t2.getLine() && | '>='
// $t1.getCharPositionInLine() + 1 == $t2.getCharPositionInLine() }? | '<'
| t1='>' t2='=' | '>'
// { $t1.getLine() == $t2.getLine() &&
// $t1.getCharPositionInLine() + 1 == $t2.getCharPositionInLine() }?
| '<'
| '>'
; ;
shiftExpression shiftExpression
@ -843,10 +861,11 @@ castExpression
primary primary
: parExpression : parExpression
| 'this' ('.' Identifier)* identifierSuffix? | 'this' arguments?
| 'super' superSuffix | 'super' superSuffix
| literal | literal
| 'new' creator | 'new' creator
| nonWildcardTypeArguments (explicitGenericInvocationSuffix | 'this' arguments)
| Identifier ('.' Identifier)* identifierSuffix? | Identifier ('.' Identifier)* identifierSuffix?
| primitiveType ('[' ']')* '.' 'class' | primitiveType ('[' ']')* '.' 'class'
| 'void' '.' 'class' | 'void' '.' 'class'
@ -854,13 +873,13 @@ primary
identifierSuffix identifierSuffix
: ('[' ']')+ '.' 'class' : ('[' ']')+ '.' 'class'
| ('[' expression ']')+ // can also be matched by selector, but do here | '[' expression ']'
| arguments | arguments
| '.' 'class' | '.' 'class'
| '.' explicitGenericInvocation | '.' explicitGenericInvocation
| '.' 'this' | '.' 'this'
| '.' 'super' arguments | '.' 'super' arguments
| '.' 'new' innerCreator | '.' 'new' nonWildcardTypeArguments? innerCreator
; ;
creator creator
@ -869,12 +888,12 @@ creator
; ;
createdName createdName
: classOrInterfaceType : Identifier typeArgumentsOrDiamond? ('.' Identifier typeArgumentsOrDiamond?)*
| primitiveType | primitiveType
; ;
innerCreator innerCreator
: nonWildcardTypeArguments? Identifier classCreatorRest : Identifier nonWildcardTypeArgumentsOrDiamond? classCreatorRest
; ;
arrayCreatorRest arrayCreatorRest
@ -889,18 +908,29 @@ classCreatorRest
; ;
explicitGenericInvocation explicitGenericInvocation
: nonWildcardTypeArguments Identifier arguments : nonWildcardTypeArguments explicitGenericInvocationSuffix
; ;
nonWildcardTypeArguments nonWildcardTypeArguments
: '<' typeList '>' : '<' typeList '>'
; ;
typeArgumentsOrDiamond
: '<' '>'
| typeArguments
;
nonWildcardTypeArgumentsOrDiamond
: '<' '>'
| nonWildcardTypeArguments
;
selector selector
: '.' Identifier arguments? : '.' Identifier arguments?
| '.' explicitGenericInvocation
| '.' 'this' | '.' 'this'
| '.' 'super' superSuffix | '.' 'super' superSuffix
| '.' 'new' innerCreator | '.' 'new' nonWildcardTypeArguments? innerCreator
| '[' expression ']' | '[' expression ']'
; ;
@ -909,128 +939,439 @@ superSuffix
| '.' Identifier arguments? | '.' Identifier arguments?
; ;
explicitGenericInvocationSuffix
: 'super' superSuffix
| Identifier arguments
;
arguments arguments
: '(' expressionList? ')' : '(' expressionList? ')'
; ;
// LEXER // LEXER
HexLiteral : '0' ('x'|'X') HexDigit+ IntegerTypeSuffix? ; // §3.9 Keywords
DecimalLiteral : ('0' | '1'..'9' '0'..'9'*) IntegerTypeSuffix? ; ABSTRACT : 'abstract';
ASSERT : 'assert';
BOOLEAN : 'boolean';
BREAK : 'break';
BYTE : 'byte';
CASE : 'case';
CATCH : 'catch';
CHAR : 'char';
CLASS : 'class';
CONST : 'const';
CONTINUE : 'continue';
DEFAULT : 'default';
DO : 'do';
DOUBLE : 'double';
ELSE : 'else';
ENUM : 'enum';
EXTENDS : 'extends';
FINAL : 'final';
FINALLY : 'finally';
FLOAT : 'float';
FOR : 'for';
IF : 'if';
GOTO : 'goto';
IMPLEMENTS : 'implements';
IMPORT : 'import';
INSTANCEOF : 'instanceof';
INT : 'int';
INTERFACE : 'interface';
LONG : 'long';
NATIVE : 'native';
NEW : 'new';
PACKAGE : 'package';
PRIVATE : 'private';
PROTECTED : 'protected';
PUBLIC : 'public';
RETURN : 'return';
SHORT : 'short';
STATIC : 'static';
STRICTFP : 'strictfp';
SUPER : 'super';
SWITCH : 'switch';
SYNCHRONIZED : 'synchronized';
THIS : 'this';
THROW : 'throw';
THROWS : 'throws';
TRANSIENT : 'transient';
TRY : 'try';
VOID : 'void';
VOLATILE : 'volatile';
WHILE : 'while';
OctalLiteral : '0' ('0'..'7')+ IntegerTypeSuffix? ; // §3.10.1 Integer Literals
IntegerLiteral
: DecimalIntegerLiteral
| HexIntegerLiteral
| OctalIntegerLiteral
| BinaryIntegerLiteral
;
fragment fragment
HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ; DecimalIntegerLiteral
: DecimalNumeral IntegerTypeSuffix?
;
fragment fragment
IntegerTypeSuffix : ('l'|'L') ; HexIntegerLiteral
: HexNumeral IntegerTypeSuffix?
;
fragment
OctalIntegerLiteral
: OctalNumeral IntegerTypeSuffix?
;
fragment
BinaryIntegerLiteral
: BinaryNumeral IntegerTypeSuffix?
;
fragment
IntegerTypeSuffix
: [lL]
;
fragment
DecimalNumeral
: '0'
| NonZeroDigit (Digits? | Underscores Digits)
;
fragment
Digits
: Digit (DigitsAndUnderscores? Digit)?
;
fragment
Digit
: '0'
| NonZeroDigit
;
fragment
NonZeroDigit
: [1-9]
;
fragment
DigitsAndUnderscores
: DigitOrUnderscore+
;
fragment
DigitOrUnderscore
: Digit
| '_'
;
fragment
Underscores
: '_'+
;
fragment
HexNumeral
: '0' [xX] HexDigits
;
fragment
HexDigits
: HexDigit (HexDigitsAndUnderscores? HexDigit)?
;
fragment
HexDigit
: [0-9a-fA-F]
;
fragment
HexDigitsAndUnderscores
: HexDigitOrUnderscore+
;
fragment
HexDigitOrUnderscore
: HexDigit
| '_'
;
fragment
OctalNumeral
: '0' Underscores? OctalDigits
;
fragment
OctalDigits
: OctalDigit (OctalDigitsAndUnderscores? OctalDigit)?
;
fragment
OctalDigit
: [0-7]
;
fragment
OctalDigitsAndUnderscores
: OctalDigitOrUnderscore+
;
fragment
OctalDigitOrUnderscore
: OctalDigit
| '_'
;
fragment
BinaryNumeral
: '0' [bB] BinaryDigits
;
fragment
BinaryDigits
: BinaryDigit (BinaryDigitsAndUnderscores? BinaryDigit)?
;
fragment
BinaryDigit
: [01]
;
fragment
BinaryDigitsAndUnderscores
: BinaryDigitOrUnderscore+
;
fragment
BinaryDigitOrUnderscore
: BinaryDigit
| '_'
;
// §3.10.2 Floating-Point Literals
FloatingPointLiteral FloatingPointLiteral
: ('0'..'9')+ '.' ('0'..'9')* Exponent? FloatTypeSuffix? : DecimalFloatingPointLiteral
| '.' ('0'..'9')+ Exponent? FloatTypeSuffix? | HexadecimalFloatingPointLiteral
| ('0'..'9')+ Exponent FloatTypeSuffix? ;
| ('0'..'9')+ FloatTypeSuffix
| '0' ('x'|'X')
( HexDigit+ ('.' HexDigit*)? HexExponent FloatTypeSuffix?
| '.' HexDigit+ HexExponent FloatTypeSuffix?
)
;
fragment fragment
Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ; DecimalFloatingPointLiteral
: Digits '.' Digits? ExponentPart? FloatTypeSuffix?
| '.' Digits ExponentPart? FloatTypeSuffix?
| Digits ExponentPart FloatTypeSuffix?
| Digits FloatTypeSuffix
;
fragment fragment
HexExponent : ('p'|'P') ('+'|'-')? ('0'..'9')+ ; ExponentPart
: ExponentIndicator SignedInteger
;
fragment fragment
FloatTypeSuffix : ('f'|'F'|'d'|'D') ; ExponentIndicator
: [eE]
;
fragment
SignedInteger
: Sign? Digits
;
fragment
Sign
: [+-]
;
fragment
FloatTypeSuffix
: [fFdD]
;
fragment
HexadecimalFloatingPointLiteral
: HexSignificand BinaryExponent FloatTypeSuffix?
;
fragment
HexSignificand
: HexNumeral '.'?
| '0' [xX] HexDigits? '.' HexDigits
;
fragment
BinaryExponent
: BinaryExponentIndicator SignedInteger
;
fragment
BinaryExponentIndicator
: [pP]
;
// §3.10.3 Boolean Literals
BooleanLiteral
: 'true'
| 'false'
;
// §3.10.4 Character Literals
CharacterLiteral CharacterLiteral
: '\'' ( EscapeSequence | ~('\''|'\\') ) '\'' : '\'' SingleCharacter '\''
; | '\'' EscapeSequence '\''
;
fragment
SingleCharacter
: ~['\\]
;
// §3.10.5 String Literals
StringLiteral StringLiteral
: '"' ( EscapeSequence | ~('\\'|'"') )* '"' : '"' StringCharacters? '"'
; ;
fragment
StringCharacters
: StringCharacter+
;
fragment
StringCharacter
: ~["\\]
| EscapeSequence
;
// §3.10.6 Escape Sequences for Character and String Literals
fragment fragment
EscapeSequence EscapeSequence
: '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\') : '\\' [btnfr"'\\]
| UnicodeEscape | OctalEscape
| OctalEscape ;
;
fragment fragment
OctalEscape OctalEscape
: '\\' ('0'..'3') ('0'..'7') ('0'..'7') : '\\' OctalDigit
| '\\' ('0'..'7') ('0'..'7') | '\\' OctalDigit OctalDigit
| '\\' ('0'..'7') | '\\' ZeroToThree OctalDigit OctalDigit
; ;
fragment fragment
UnicodeEscape ZeroToThree
: '\\' 'u' HexDigit HexDigit HexDigit HexDigit : [0-3]
; ;
ENUM: 'enum' {enumIsKeyword}? // §3.10.7 The Null Literal
;
ASSERT
: 'assert' {assertIsKeyword}?
;
Identifier
: Letter (Letter|JavaIDDigit)*
;
/**I found this char range in JavaCC's grammar, but Letter and Digit overlap. NullLiteral
Still works, but... : 'null'
*/ ;
fragment
Letter // §3.11 Separators
: '\u0024' |
'\u0041'..'\u005a' | LPAREN : '(';
'\u005f' | RPAREN : ')';
'\u0061'..'\u007a' | LBRACE : '{';
'\u00c0'..'\u00d6' | RBRACE : '}';
'\u00d8'..'\u00f6' | LBRACK : '[';
'\u00f8'..'\u00ff' | RBRACK : ']';
'\u0100'..'\u1fff' | SEMI : ';';
'\u3040'..'\u318f' | COMMA : ',';
'\u3300'..'\u337f' | DOT : '.';
'\u3400'..'\u3d2d' |
'\u4e00'..'\u9fff' | // §3.12 Operators
'\uf900'..'\ufaff'
; ASSIGN : '=';
GT : '>';
LT : '<';
BANG : '!';
TILDE : '~';
QUESTION : '?';
COLON : ':';
EQUAL : '==';
LE : '<=';
GE : '>=';
NOTEQUAL : '!=';
AND : '&&';
OR : '||';
INC : '++';
DEC : '--';
ADD : '+';
SUB : '-';
MUL : '*';
DIV : '/';
BITAND : '&';
BITOR : '|';
CARET : '^';
MOD : '%';
ADD_ASSIGN : '+=';
SUB_ASSIGN : '-=';
MUL_ASSIGN : '*=';
DIV_ASSIGN : '/=';
AND_ASSIGN : '&=';
OR_ASSIGN : '|=';
XOR_ASSIGN : '^=';
MOD_ASSIGN : '%=';
LSHIFT_ASSIGN : '<<=';
RSHIFT_ASSIGN : '>>=';
URSHIFT_ASSIGN : '>>>=';
// §3.8 Identifiers (must appear after all keywords in the grammar)
Identifier
: JavaLetter JavaLetterOrDigit*
;
fragment fragment
JavaIDDigit JavaLetter
: '\u0030'..'\u0039' | : [a-zA-Z$_] // these are the "java letters" below 0xFF
'\u0660'..'\u0669' | | // covers all characters above 0xFF which are not a surrogate
'\u06f0'..'\u06f9' | ~[\u0000-\u00FF\uD800-\uDBFF]
'\u0966'..'\u096f' | {IsJavaIdentifierCharacter((char)_input.La(-1), true)}?
'\u09e6'..'\u09ef' | //| // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
'\u0a66'..'\u0a6f' | // [\uD800-\uDBFF] [\uDC00-\uDFFF]
'\u0ae6'..'\u0aef' | // {Character.isJavaIdentifierStart(Character.toCodePoint((char)_input.La(-2), (char)_input.La(-1)))}?
'\u0b66'..'\u0b6f' | ;
'\u0be7'..'\u0bef' |
'\u0c66'..'\u0c6f' |
'\u0ce6'..'\u0cef' |
'\u0d66'..'\u0d6f' |
'\u0e50'..'\u0e59' |
'\u0ed0'..'\u0ed9' |
'\u1040'..'\u1049'
;
WS : (' '|'\r'|'\t'|'\u000C'|'\n')+ -> channel(HIDDEN) fragment
JavaLetterOrDigit
: [a-zA-Z0-9$_] // these are the "java letters or digits" below 0xFF
| // covers all characters above 0xFF which are not a surrogate
~[\u0000-\u00FF\uD800-\uDBFF]
{IsJavaIdentifierCharacter((char)_input.La(-1), false)}?
//| // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
// [\uD800-\uDBFF] [\uDC00-\uDFFF]
// {Character.isJavaIdentifierPart(Character.toCodePoint((char)_input.La(-2), (char)_input.La(-1)))}?
;
//
// Additional symbols not defined in the lexical specification
//
AT : '@';
ELLIPSIS : '...';
//
// Whitespace and comments
//
WS : [ \t\r\n\u000C]+ -> skip
; ;
COMMENT COMMENT
: '/*' .*? '*/' -> channel(HIDDEN) : '/*' .*? '*/' -> skip
; ;
LINE_COMMENT LINE_COMMENT
: '//' ~('\n'|'\r')* '\r'? '\n' -> channel(HIDDEN) : '//' ~[\r\n]* -> skip
; ;

View File

@ -0,0 +1,272 @@
namespace Antlr4.Runtime.Test
{
using System;
using System.Collections.Generic;
using System.Diagnostics;
using Antlr4.Runtime.Misc;
public class JavaUnicodeInputStream : ICharStream
{
[NotNull]
private readonly ICharStream _source;
private readonly List<int> _escapeIndexes = new List<int>();
private readonly List<int> _escapeCharacters = new List<int>();
private readonly List<int> _escapeIndirectionLevels = new List<int>();
private int _escapeListIndex;
private int _range;
private int _slashCount;
private int _la1;
public JavaUnicodeInputStream([NotNull] ICharStream source)
{
if (source == null)
throw new ArgumentNullException("source");
this._source = source;
this._la1 = source.La(1);
}
public int Size
{
get
{
return _source.Size;
}
}
public int Index
{
get
{
return _source.Index;
}
}
public string SourceName
{
get
{
return _source.SourceName;
}
}
public String GetText(Interval interval)
{
return _source.GetText(interval);
}
public void Consume()
{
if (_la1 != '\\')
{
_source.Consume();
_la1 = _source.La(1);
_range = Math.Max(_range, _source.Index);
_slashCount = 0;
return;
}
// make sure the next character has been processed
this.La(1);
if (_escapeListIndex >= _escapeIndexes.Count || _escapeIndexes[_escapeListIndex] != Index)
{
_source.Consume();
_slashCount++;
}
else
{
int indirectionLevel = _escapeIndirectionLevels[_escapeListIndex];
for (int i = 0; i < 6 + indirectionLevel; i++)
{
_source.Consume();
}
_escapeListIndex++;
_slashCount = 0;
}
_la1 = _source.La(1);
Debug.Assert(_range >= Index);
}
public int La(int i)
{
if (i == 1 && _la1 != '\\')
{
return _la1;
}
if (i <= 0)
{
int desiredIndex = Index + i;
for (int j = _escapeListIndex - 1; j >= 0; j--)
{
if (_escapeIndexes[j] + 6 + _escapeIndirectionLevels[j] > desiredIndex)
{
desiredIndex -= 5 + _escapeIndirectionLevels[j];
}
if (_escapeIndexes[j] == desiredIndex)
{
return _escapeCharacters[j];
}
}
return _source.La(desiredIndex - Index);
}
else
{
int desiredIndex = Index + i - 1;
for (int j = _escapeListIndex; j < _escapeIndexes.Count; j++)
{
if (_escapeIndexes[j] == desiredIndex)
{
return _escapeCharacters[j];
}
else if (_escapeIndexes[j] < desiredIndex)
{
desiredIndex += 5 + _escapeIndirectionLevels[j];
}
else
{
return _source.La(desiredIndex - Index + 1);
}
}
int currentIndex = Index;
int slashCount = _slashCount;
int indirectionLevel = 0;
for (int j = 0; j < i; j++)
{
int previousIndex = currentIndex;
int c = ReadCharAt(ref currentIndex, ref slashCount, ref indirectionLevel);
if (currentIndex > _range)
{
if (currentIndex - previousIndex > 1)
{
_escapeIndexes.Add(previousIndex);
_escapeCharacters.Add(c);
_escapeIndirectionLevels.Add(indirectionLevel);
}
_range = currentIndex;
}
if (j == i - 1)
{
return c;
}
}
throw new InvalidOperationException("shouldn't be reachable");
}
}
public int Mark()
{
return _source.Mark();
}
public void Release(int marker)
{
_source.Release(marker);
}
public void Seek(int index)
{
if (index > _range)
{
throw new NotSupportedException();
}
_source.Seek(index);
_la1 = _source.La(1);
_slashCount = 0;
while (_source.La(-_slashCount - 1) == '\\')
{
_slashCount++;
}
_escapeListIndex = _escapeIndexes.BinarySearch(_source.Index);
if (_escapeListIndex < 0)
{
_escapeListIndex = -_escapeListIndex - 1;
}
}
private static bool IsHexDigit(int c)
{
return c >= '0' && c <= '9'
|| c >= 'a' && c <= 'f'
|| c >= 'A' && c <= 'F';
}
private static int HexValue(int c)
{
if (c >= '0' && c <= '9')
{
return c - '0';
}
if (c >= 'a' && c <= 'f')
{
return c - 'a' + 10;
}
if (c >= 'A' && c <= 'F')
{
return c - 'A' + 10;
}
throw new ArgumentException("c");
}
private int ReadCharAt(ref int nextIndex, ref int slashCount, ref int indirectionLevel)
{
bool blockUnicodeEscape = (slashCount % 2) != 0;
int c0 = _source.La(nextIndex - Index + 1);
if (c0 == '\\')
{
slashCount++;
if (!blockUnicodeEscape)
{
int c1 = _source.La(nextIndex - Index + 2);
if (c1 == 'u')
{
int c2 = _source.La(nextIndex - Index + 3);
indirectionLevel = 0;
while (c2 == 'u')
{
indirectionLevel++;
c2 = _source.La(nextIndex - Index + 3 + indirectionLevel);
}
int c3 = _source.La(nextIndex - Index + 4 + indirectionLevel);
int c4 = _source.La(nextIndex - Index + 5 + indirectionLevel);
int c5 = _source.La(nextIndex - Index + 6 + indirectionLevel);
if (IsHexDigit(c2) && IsHexDigit(c3) && IsHexDigit(c4) && IsHexDigit(c5))
{
int value = HexValue(c2);
value = (value << 4) + HexValue(c3);
value = (value << 4) + HexValue(c4);
value = (value << 4) + HexValue(c5);
nextIndex += 6 + indirectionLevel;
slashCount = 0;
return value;
}
}
}
}
nextIndex++;
return c0;
}
}
}

View File

@ -18,7 +18,9 @@
using File = System.IO.File; using File = System.IO.File;
using FileInfo = System.IO.FileInfo; using FileInfo = System.IO.FileInfo;
using Interlocked = System.Threading.Interlocked; using Interlocked = System.Threading.Interlocked;
using IOException = System.IO.IOException;
using Path = System.IO.Path; using Path = System.IO.Path;
using SearchOption = System.IO.SearchOption;
using Stopwatch = System.Diagnostics.Stopwatch; using Stopwatch = System.Diagnostics.Stopwatch;
using Stream = System.IO.Stream; using Stream = System.IO.Stream;
using StreamReader = System.IO.StreamReader; using StreamReader = System.IO.StreamReader;
@ -44,6 +46,18 @@
* {@link #TOP_PACKAGE}. * {@link #TOP_PACKAGE}.
*/ */
private static readonly bool RECURSIVE = true; private static readonly bool RECURSIVE = true;
/**
* {@code true} to read all source files from disk into memory before
* starting the parse. The default value is {@code true} to help prevent
* drive speed from affecting the performance results. This value may be set
* to {@code false} to support parsing large input sets which would not
* otherwise fit into memory.
*/
private static readonly bool PRELOAD_SOURCES = true;
/**
* The encoding to use when reading source files.
*/
private static readonly Encoding ENCODING = Encoding.UTF8;
/** /**
* {@code true} to use the Java grammar with expressions in the v4 * {@code true} to use the Java grammar with expressions in the v4
@ -95,7 +109,7 @@
* {@code true} to use {@link BailErrorStrategy}, {@code false} to use * {@code true} to use {@link BailErrorStrategy}, {@code false} to use
* {@link DefaultErrorStrategy}. * {@link DefaultErrorStrategy}.
*/ */
private static readonly bool BAIL_ON_ERROR = true; private static readonly bool BAIL_ON_ERROR = false;
/** /**
* {@code true} to compute a checksum for verifying consistency across * {@code true} to compute a checksum for verifying consistency across
* optimizations and multiple passes. * optimizations and multiple passes.
@ -230,7 +244,7 @@
DirectoryInfo directory = new DirectoryInfo(jdkSourceRoot); DirectoryInfo directory = new DirectoryInfo(jdkSourceRoot);
Assert.IsTrue(directory.Exists); Assert.IsTrue(directory.Exists);
IEnumerable<ICharStream> sources = loadSources(directory, "*.java", RECURSIVE); IEnumerable<InputDescriptor> sources = LoadSources(directory, "*.java", RECURSIVE);
Console.Out.Write(getOptionsDescription(TOP_PACKAGE)); Console.Out.Write(getOptionsDescription(TOP_PACKAGE));
@ -327,7 +341,7 @@
* This method is separate from {@link #parse2} so the first pass can be distinguished when analyzing * This method is separate from {@link #parse2} so the first pass can be distinguished when analyzing
* profiler results. * profiler results.
*/ */
protected void parse1(ParserFactory factory, IEnumerable<ICharStream> sources) protected void parse1(ParserFactory factory, IEnumerable<InputDescriptor> sources)
{ {
GC.Collect(); GC.Collect();
parseSources(factory, sources); parseSources(factory, sources);
@ -337,49 +351,33 @@
* This method is separate from {@link #parse1} so the first pass can be distinguished when analyzing * This method is separate from {@link #parse1} so the first pass can be distinguished when analyzing
* profiler results. * profiler results.
*/ */
protected void parse2(ParserFactory factory, IEnumerable<ICharStream> sources) protected void parse2(ParserFactory factory, IEnumerable<InputDescriptor> sources)
{ {
GC.Collect(); GC.Collect();
parseSources(factory, sources); parseSources(factory, sources);
} }
protected IEnumerable<ICharStream> loadSources(DirectoryInfo directory, string filter, bool recursive) protected IList<InputDescriptor> LoadSources(DirectoryInfo directory, string filter, bool recursive)
{ {
return loadSources(directory, filter, null, recursive); IList<InputDescriptor> result = new List<InputDescriptor>();
} LoadSources(directory, filter, recursive, result);
protected IEnumerable<ICharStream> loadSources(DirectoryInfo directory, string filter, Encoding encoding, bool recursive)
{
ICollection<ICharStream> result = new List<ICharStream>();
loadSources(directory, filter, encoding, recursive, result);
return result; return result;
} }
protected void loadSources(DirectoryInfo directory, string filter, Encoding encoding, bool recursive, ICollection<ICharStream> result) protected void LoadSources(DirectoryInfo directory, string filter, bool recursive, ICollection<InputDescriptor> result)
{ {
Debug.Assert(directory.Exists); Debug.Assert(directory.Exists);
FileInfo[] sources = directory.GetFiles(filter); FileInfo[] sources = directory.GetFiles(filter, recursive ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly);
foreach (FileInfo file in sources) foreach (FileInfo file in sources)
{ {
var stream = new StreamReader(File.OpenRead(file.FullName), encoding); result.Add(new InputDescriptor(file.FullName));
ICharStream input = new AntlrInputStream(stream);
result.Add(input);
}
if (recursive)
{
DirectoryInfo[] children = directory.GetDirectories();
foreach (DirectoryInfo child in children)
{
loadSources(child, filter, encoding, true, result);
}
} }
} }
int configOutputSize = 0; int configOutputSize = 0;
protected void parseSources(ParserFactory factory, IEnumerable<ICharStream> sources) protected void parseSources(ParserFactory factory, IEnumerable<InputDescriptor> sources)
{ {
Stopwatch startTime = Stopwatch.StartNew(); Stopwatch startTime = Stopwatch.StartNew();
Thread.VolatileWrite(ref tokenCount, 0); Thread.VolatileWrite(ref tokenCount, 0);
@ -397,8 +395,9 @@
#else #else
ICollection<Func<int>> results = new List<Func<int>>(); ICollection<Func<int>> results = new List<Func<int>>();
#endif #endif
foreach (ICharStream input in sources) foreach (InputDescriptor inputDescriptor in sources)
{ {
ICharStream input = inputDescriptor.GetInputStream();
sourceCount++; sourceCount++;
input.Seek(0); input.Seek(0);
inputSize += input.Size; inputSize += input.Size;
@ -1094,5 +1093,107 @@
updateChecksum(checksum, ctx.Stop); updateChecksum(checksum, ctx.Stop);
} }
} }
protected sealed class InputDescriptor
{
private readonly string source;
private WeakReference<CloneableAntlrFileStream> inputStream;
private CloneableAntlrFileStream strongInputStream;
public InputDescriptor([NotNull] String source)
{
this.source = source;
if (PRELOAD_SOURCES)
{
GetInputStream();
}
}
[return: NotNull]
public ICharStream GetInputStream()
{
CloneableAntlrFileStream stream;
if (!TryGetTarget(out stream))
{
stream = new CloneableAntlrFileStream(source, ENCODING);
SetTarget(stream);
}
return new JavaUnicodeInputStream(stream.CreateCopy());
}
private void SetTarget(CloneableAntlrFileStream stream)
{
if (PRELOAD_SOURCES)
{
strongInputStream = stream;
}
else
{
inputStream = new WeakReference<CloneableAntlrFileStream>(stream);
}
}
private bool TryGetTarget(out CloneableAntlrFileStream stream)
{
if (PRELOAD_SOURCES)
{
stream = strongInputStream;
return strongInputStream != null;
}
else
{
if (inputStream == null)
{
stream = null;
return false;
}
return inputStream.TryGetTarget(out stream);
}
}
}
#if PORTABLE
protected class CloneableAntlrFileStream : AntlrInputStream
#else
protected class CloneableAntlrFileStream : AntlrFileStream
#endif
{
public CloneableAntlrFileStream(String fileName, Encoding encoding)
#if PORTABLE
: base(File.ReadAllText(fileName, encoding))
#else
: base(fileName, encoding)
#endif
{
}
public AntlrInputStream CreateCopy()
{
AntlrInputStream stream = new AntlrInputStream(this.data, this.n);
stream.name = this.SourceName;
return stream;
}
}
#if !NET45
private sealed class WeakReference<T>
where T : class
{
private readonly WeakReference _reference;
public WeakReference(T reference)
{
_reference = new WeakReference(reference);
}
public bool TryGetTarget(out T reference)
{
reference = (T)_reference.Target;
return reference != null;
}
}
#endif
} }
} }