Updated TestPerformance and associated code for the latest release

This commit is contained in:
Sam Harwell 2014-07-01 17:28:30 -05:00
parent 2cdc0537fa
commit 63053efd2a
11 changed files with 1458 additions and 373 deletions

View File

@ -26,7 +26,7 @@
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Portable\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE;NET_3_5;NET_4_0</DefineConstants>
<DefineConstants>DEBUG;TRACE;PORTABLE;NET40;NET40PLUS;NET35PLUS;NET30PLUS;NET20PLUS</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
@ -35,7 +35,7 @@
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Portable\Release\</OutputPath>
<DefineConstants>TRACE;NET_3_5;NET_4_0</DefineConstants>
<DefineConstants>TRACE;PORTABLE;NET40;NET40PLUS;NET35PLUS;NET30PLUS;NET20PLUS</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
@ -57,6 +57,7 @@
</Choose>
<ItemGroup>
<Compile Include="BaseTest.cs" />
<Compile Include="JavaUnicodeInputStream.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="Sharpen\Checksum.cs" />
<Compile Include="Sharpen\CRC32.cs" />

View File

@ -57,6 +57,7 @@
</Choose>
<ItemGroup>
<Compile Include="BaseTest.cs" />
<Compile Include="JavaUnicodeInputStream.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="Sharpen\Checksum.cs" />
<Compile Include="Sharpen\CRC32.cs" />

View File

@ -57,6 +57,7 @@
</Choose>
<ItemGroup>
<Compile Include="BaseTest.cs" />
<Compile Include="JavaUnicodeInputStream.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="Sharpen\Checksum.cs" />
<Compile Include="Sharpen\CRC32.cs" />

View File

@ -57,6 +57,7 @@
</Choose>
<ItemGroup>
<Compile Include="BaseTest.cs" />
<Compile Include="JavaUnicodeInputStream.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="Sharpen\Checksum.cs" />
<Compile Include="Sharpen\CRC32.cs" />

View File

@ -57,6 +57,7 @@
</Choose>
<ItemGroup>
<Compile Include="BaseTest.cs" />
<Compile Include="JavaUnicodeInputStream.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="Sharpen\Checksum.cs" />
<Compile Include="Sharpen\CRC32.cs" />

View File

@ -57,6 +57,7 @@
</Choose>
<ItemGroup>
<Compile Include="BaseTest.cs" />
<Compile Include="JavaUnicodeInputStream.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="Sharpen\Checksum.cs" />
<Compile Include="Sharpen\CRC32.cs" />

View File

@ -268,11 +268,11 @@
string compiler = PathCombine(JavaHome, "bin", "java.exe");
List<string> classpath = new List<string>();
classpath.Add(GetMavenArtifact("com.tunnelvisionlabs", "antlr4-csharp", "4.0.1-SNAPSHOT"));
classpath.Add(GetMavenArtifact("com.tunnelvisionlabs", "antlr4-runtime", "4.0.1-SNAPSHOT"));
classpath.Add(GetMavenArtifact("com.tunnelvisionlabs", "antlr4", "4.0.1-SNAPSHOT"));
classpath.Add(GetMavenArtifact("org.antlr", "antlr-runtime", "3.5"));
classpath.Add(GetMavenArtifact("org.antlr", "ST4", "4.0.7"));
classpath.Add(GetMavenArtifact("com.tunnelvisionlabs", "antlr4-csharp", "4.3-SNAPSHOT"));
classpath.Add(GetMavenArtifact("com.tunnelvisionlabs", "antlr4-runtime", "4.3"));
classpath.Add(GetMavenArtifact("com.tunnelvisionlabs", "antlr4", "4.3"));
classpath.Add(GetMavenArtifact("org.antlr", "antlr-runtime", "3.5.2"));
classpath.Add(GetMavenArtifact("org.antlr", "ST4", "4.0.8"));
List<string> options = new List<string>();
options.Add("-cp");
@ -284,7 +284,23 @@
options.Add(tmpdir);
options.Add("-lib");
options.Add(tmpdir);
options.Add("-Dlanguage=CSharp");
#if PORTABLE
options.Add("-Dlanguage=CSharp_v4_5");
#elif NET45
options.Add("-Dlanguage=CSharp_v4_5");
#elif NET40
options.Add("-Dlanguage=CSharp_v4_0");
#elif NET35
options.Add("-Dlanguage=CSharp_v3_5");
#elif NET30
options.Add("-Dlanguage=CSharp_v3_0");
#elif NET20
options.Add("-Dlanguage=CSharp_v2_0");
#else
#error Unknown assembly.
#endif
options.Add(grammarFileName);
System.Diagnostics.Process process = System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo(compiler, '"' + Utils.Join("\" \"", options) + '"')

View File

@ -171,14 +171,48 @@
grammar Java;
@lexer::members {
protected const int EOF = Eof;
protected const int HIDDEN = Hidden;
protected bool enumIsKeyword = true;
protected bool assertIsKeyword = true;
private static bool IsJavaIdentifierCharacter(char c, bool start)
{
switch (char.GetUnicodeCategory(c))
{
case System.Globalization.UnicodeCategory.UppercaseLetter:
case System.Globalization.UnicodeCategory.LowercaseLetter:
case System.Globalization.UnicodeCategory.TitlecaseLetter:
case System.Globalization.UnicodeCategory.ModifierLetter:
case System.Globalization.UnicodeCategory.OtherLetter:
// isLetter returns true
return true;
case System.Globalization.UnicodeCategory.LetterNumber:
// getType returns LETTER_NUMBER
return true;
case System.Globalization.UnicodeCategory.CurrencySymbol:
// a currency symbol (such as "$")
return true;
case System.Globalization.UnicodeCategory.ConnectorPunctuation:
// a connecting punctuation character (such as "_")
return true;
case System.Globalization.UnicodeCategory.DecimalDigitNumber:
// it is a digit
return !start;
case System.Globalization.UnicodeCategory.SpacingCombiningMark:
// it is a combining mark
return !start;
case System.Globalization.UnicodeCategory.NonSpacingMark:
// it is a non-spacing mark
return !start;
default:
return false;
}
}
@parser::members {
protected const int EOF = Eof;
}
// starting point for parsing a java file
@ -216,14 +250,15 @@ classOrInterfaceModifiers
;
classOrInterfaceModifier
: annotation // class or interface
| 'public' // class or interface
| 'protected' // class or interface
| 'private' // class or interface
| 'abstract' // class or interface
| 'static' // class or interface
| 'final' // class only -- does not apply to interfaces
| 'strictfp' // class or interface
: annotation // class or interface
| ( 'public' // class or interface
| 'protected' // class or interface
| 'private' // class or interface
| 'abstract' // class or interface
| 'static' // class or interface
| 'final' // class only -- does not apply to interfaces
| 'strictfp' // class or interface
)
;
modifiers
@ -420,17 +455,18 @@ arrayInitializer
modifier
: annotation
| 'public'
| 'protected'
| 'private'
| 'static'
| 'abstract'
| 'final'
| 'native'
| 'synchronized'
| 'transient'
| 'volatile'
| 'strictfp'
| ( 'public'
| 'protected'
| 'private'
| 'static'
| 'abstract'
| 'final'
| 'native'
| 'synchronized'
| 'transient'
| 'volatile'
| 'strictfp'
)
;
packageOrTypeName
@ -501,39 +537,22 @@ methodBody
;
constructorBody
: '{' explicitConstructorInvocation? blockStatement* '}'
: block
;
explicitConstructorInvocation
: nonWildcardTypeArguments? ('this' | 'super') arguments ';'
| primary '.' nonWildcardTypeArguments? 'super' arguments ';'
;
qualifiedName
: Identifier ('.' Identifier)*
;
literal
: integerLiteral
: IntegerLiteral
| FloatingPointLiteral
| CharacterLiteral
| StringLiteral
| booleanLiteral
| BooleanLiteral
| 'null'
;
integerLiteral
: HexLiteral
| OctalLiteral
| DecimalLiteral
;
booleanLiteral
: 'true'
| 'false'
;
// ANNOTATIONS
annotations
@ -576,6 +595,7 @@ annotationTypeBody
annotationTypeElementDeclaration
: modifiers annotationTypeElementRest
| ';' // this is not allowed by the grammar, but apparently allowed by the actual compiler
;
annotationTypeElementRest
@ -628,18 +648,14 @@ variableModifiers
;
statement
@leftfactor{catches}
: block
| ASSERT expression (':' expression)? ';'
| 'if' parExpression statement ('else' statement)?
| 'for' '(' forControl ')' statement
| 'while' parExpression statement
| 'do' statement 'while' parExpression ';'
| 'try' block
( catches 'finally' block
| catches
| 'finally' block
)
| 'try' block (catches finallyBlock? | finallyBlock)
| 'try' resourceSpecification block catches? finallyBlock?
| 'switch' parExpression '{' switchBlockStatementGroups '}'
| 'synchronized' parExpression block
| 'return' expression? ';'
@ -650,15 +666,35 @@ statement
| statementExpression ';'
| Identifier ':' statement
;
catches
: catchClause (catchClause)*
: catchClause+
;
catchClause
: 'catch' '(' formalParameter ')' block
: 'catch' '(' variableModifiers catchType Identifier ')' block
;
catchType
: qualifiedName ('|' qualifiedName)*
;
finallyBlock
: 'finally' block
;
resourceSpecification
: '(' resources ';'? ')'
;
resources
: resource (';' resource)*
;
resource
: variableModifiers classOrInterfaceType variableDeclaratorId '=' expression
;
formalParameter
: variableModifiers type variableDeclaratorId
;
@ -721,9 +757,8 @@ expression
: primary
| expression '.' Identifier
| expression '.' 'this'
| expression '.' 'super' '(' expressionList? ')'
| expression '.' 'new' Identifier '(' expressionList? ')'
| expression '.' 'super' '.' Identifier arguments?
| expression '.' 'new' nonWildcardTypeArguments? innerCreator
| expression '.' 'super' superSuffix
| expression '.' explicitGenericInvocation
| 'new' creator
| expression '[' expression ']'
@ -735,28 +770,28 @@ expression
| expression ('*'|'/'|'%') expression
| expression ('+'|'-') expression
| expression ('<' '<' | '>' '>' '>' | '>' '>') expression
| expression ('<' '=' | '>' '=' | '>' | '<') expression
| expression ('<=' | '>=' | '>' | '<') expression
| expression 'instanceof' type
| expression ('==' | '!=') expression
| expression '&' expression
| expression '^'<assoc=right> expression
| expression '^' expression
| expression '|' expression
| expression '&&' expression
| expression '||' expression
| expression '?' expression ':' expression
| expression
('='<assoc=right>
| '+='<assoc=right>
| '-='<assoc=right>
| '*='<assoc=right>
| '/='<assoc=right>
| '&='<assoc=right>
| '|='<assoc=right>
| '^='<assoc=right>
| '>' '>' '='<assoc=right>
| '>' '>' '>' '='<assoc=right>
| '<' '<' '='<assoc=right>
| '%='<assoc=right>
|<assoc=right> expression
( '='
| '+='
| '-='
| '*='
| '/='
| '&='
| '|='
| '^='
| '>>='
| '>>>='
| '<<='
| '%='
)
expression
;
@ -769,6 +804,7 @@ primary
| Identifier
| type '.' 'class'
| 'void' '.' 'class'
| nonWildcardTypeArguments (explicitGenericInvocationSuffix | 'this' arguments)
;
creator
@ -777,12 +813,12 @@ creator
;
createdName
: classOrInterfaceType
: Identifier typeArgumentsOrDiamond? ('.' Identifier typeArgumentsOrDiamond?)*
| primitiveType
;
innerCreator
: nonWildcardTypeArguments? Identifier classCreatorRest
: Identifier nonWildcardTypeArgumentsOrDiamond? classCreatorRest
;
arrayCreatorRest
@ -797,148 +833,461 @@ classCreatorRest
;
explicitGenericInvocation
: nonWildcardTypeArguments Identifier arguments
: nonWildcardTypeArguments explicitGenericInvocationSuffix
;
nonWildcardTypeArguments
: '<' typeList '>'
;
selector
: '.' Identifier arguments?
| '.' 'this'
| '.' 'super' superSuffix
| '.' 'new' innerCreator
| '[' expression ']'
;
typeArgumentsOrDiamond
: '<' '>'
| typeArguments
;
nonWildcardTypeArgumentsOrDiamond
: '<' '>'
| nonWildcardTypeArguments
;
superSuffix
: arguments
| '.' Identifier arguments?
;
explicitGenericInvocationSuffix
: 'super' superSuffix
| Identifier arguments
;
arguments
: '(' expressionList? ')'
;
// LEXER
HexLiteral : '0' ('x'|'X') HexDigit+ IntegerTypeSuffix? ;
// §3.9 Keywords
DecimalLiteral : ('0' | '1'..'9' '0'..'9'*) IntegerTypeSuffix? ;
ABSTRACT : 'abstract';
ASSERT : 'assert';
BOOLEAN : 'boolean';
BREAK : 'break';
BYTE : 'byte';
CASE : 'case';
CATCH : 'catch';
CHAR : 'char';
CLASS : 'class';
CONST : 'const';
CONTINUE : 'continue';
DEFAULT : 'default';
DO : 'do';
DOUBLE : 'double';
ELSE : 'else';
ENUM : 'enum';
EXTENDS : 'extends';
FINAL : 'final';
FINALLY : 'finally';
FLOAT : 'float';
FOR : 'for';
IF : 'if';
GOTO : 'goto';
IMPLEMENTS : 'implements';
IMPORT : 'import';
INSTANCEOF : 'instanceof';
INT : 'int';
INTERFACE : 'interface';
LONG : 'long';
NATIVE : 'native';
NEW : 'new';
PACKAGE : 'package';
PRIVATE : 'private';
PROTECTED : 'protected';
PUBLIC : 'public';
RETURN : 'return';
SHORT : 'short';
STATIC : 'static';
STRICTFP : 'strictfp';
SUPER : 'super';
SWITCH : 'switch';
SYNCHRONIZED : 'synchronized';
THIS : 'this';
THROW : 'throw';
THROWS : 'throws';
TRANSIENT : 'transient';
TRY : 'try';
VOID : 'void';
VOLATILE : 'volatile';
WHILE : 'while';
OctalLiteral : '0' ('0'..'7')+ IntegerTypeSuffix? ;
// §3.10.1 Integer Literals
IntegerLiteral
: DecimalIntegerLiteral
| HexIntegerLiteral
| OctalIntegerLiteral
| BinaryIntegerLiteral
;
fragment
HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ;
DecimalIntegerLiteral
: DecimalNumeral IntegerTypeSuffix?
;
fragment
IntegerTypeSuffix : ('l'|'L') ;
HexIntegerLiteral
: HexNumeral IntegerTypeSuffix?
;
fragment
OctalIntegerLiteral
: OctalNumeral IntegerTypeSuffix?
;
fragment
BinaryIntegerLiteral
: BinaryNumeral IntegerTypeSuffix?
;
fragment
IntegerTypeSuffix
: [lL]
;
fragment
DecimalNumeral
: '0'
| NonZeroDigit (Digits? | Underscores Digits)
;
fragment
Digits
: Digit (DigitsAndUnderscores? Digit)?
;
fragment
Digit
: '0'
| NonZeroDigit
;
fragment
NonZeroDigit
: [1-9]
;
fragment
DigitsAndUnderscores
: DigitOrUnderscore+
;
fragment
DigitOrUnderscore
: Digit
| '_'
;
fragment
Underscores
: '_'+
;
fragment
HexNumeral
: '0' [xX] HexDigits
;
fragment
HexDigits
: HexDigit (HexDigitsAndUnderscores? HexDigit)?
;
fragment
HexDigit
: [0-9a-fA-F]
;
fragment
HexDigitsAndUnderscores
: HexDigitOrUnderscore+
;
fragment
HexDigitOrUnderscore
: HexDigit
| '_'
;
fragment
OctalNumeral
: '0' Underscores? OctalDigits
;
fragment
OctalDigits
: OctalDigit (OctalDigitsAndUnderscores? OctalDigit)?
;
fragment
OctalDigit
: [0-7]
;
fragment
OctalDigitsAndUnderscores
: OctalDigitOrUnderscore+
;
fragment
OctalDigitOrUnderscore
: OctalDigit
| '_'
;
fragment
BinaryNumeral
: '0' [bB] BinaryDigits
;
fragment
BinaryDigits
: BinaryDigit (BinaryDigitsAndUnderscores? BinaryDigit)?
;
fragment
BinaryDigit
: [01]
;
fragment
BinaryDigitsAndUnderscores
: BinaryDigitOrUnderscore+
;
fragment
BinaryDigitOrUnderscore
: BinaryDigit
| '_'
;
// §3.10.2 Floating-Point Literals
FloatingPointLiteral
: ('0'..'9')+ '.' ('0'..'9')* Exponent? FloatTypeSuffix?
| '.' ('0'..'9')+ Exponent? FloatTypeSuffix?
| ('0'..'9')+ Exponent FloatTypeSuffix?
| ('0'..'9')+ FloatTypeSuffix
| '0' ('x'|'X')
( HexDigit+ ('.' HexDigit*)? HexExponent FloatTypeSuffix?
| '.' HexDigit+ HexExponent FloatTypeSuffix?
)
;
: DecimalFloatingPointLiteral
| HexadecimalFloatingPointLiteral
;
fragment
Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ;
DecimalFloatingPointLiteral
: Digits '.' Digits? ExponentPart? FloatTypeSuffix?
| '.' Digits ExponentPart? FloatTypeSuffix?
| Digits ExponentPart FloatTypeSuffix?
| Digits FloatTypeSuffix
;
fragment
HexExponent : ('p'|'P') ('+'|'-')? ('0'..'9')+ ;
ExponentPart
: ExponentIndicator SignedInteger
;
fragment
FloatTypeSuffix : ('f'|'F'|'d'|'D') ;
ExponentIndicator
: [eE]
;
fragment
SignedInteger
: Sign? Digits
;
fragment
Sign
: [+-]
;
fragment
FloatTypeSuffix
: [fFdD]
;
fragment
HexadecimalFloatingPointLiteral
: HexSignificand BinaryExponent FloatTypeSuffix?
;
fragment
HexSignificand
: HexNumeral '.'?
| '0' [xX] HexDigits? '.' HexDigits
;
fragment
BinaryExponent
: BinaryExponentIndicator SignedInteger
;
fragment
BinaryExponentIndicator
: [pP]
;
// §3.10.3 Boolean Literals
BooleanLiteral
: 'true'
| 'false'
;
// §3.10.4 Character Literals
CharacterLiteral
: '\'' ( EscapeSequence | ~('\''|'\\') ) '\''
;
: '\'' SingleCharacter '\''
| '\'' EscapeSequence '\''
;
fragment
SingleCharacter
: ~['\\]
;
// §3.10.5 String Literals
StringLiteral
: '"' ( EscapeSequence | ~('\\'|'"') )* '"'
;
: '"' StringCharacters? '"'
;
fragment
StringCharacters
: StringCharacter+
;
fragment
StringCharacter
: ~["\\]
| EscapeSequence
;
// §3.10.6 Escape Sequences for Character and String Literals
fragment
EscapeSequence
: '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
| UnicodeEscape
| OctalEscape
;
: '\\' [btnfr"'\\]
| OctalEscape
;
fragment
OctalEscape
: '\\' ('0'..'3') ('0'..'7') ('0'..'7')
| '\\' ('0'..'7') ('0'..'7')
| '\\' ('0'..'7')
;
: '\\' OctalDigit
| '\\' OctalDigit OctalDigit
| '\\' ZeroToThree OctalDigit OctalDigit
;
fragment
UnicodeEscape
: '\\' 'u' HexDigit HexDigit HexDigit HexDigit
;
ZeroToThree
: [0-3]
;
ENUM: 'enum' {enumIsKeyword}?
;
ASSERT
: 'assert' {assertIsKeyword}?
;
Identifier
: Letter (Letter|JavaIDDigit)*
;
// §3.10.7 The Null Literal
/**I found this char range in JavaCC's grammar, but Letter and Digit overlap.
Still works, but...
*/
fragment
Letter
: '\u0024' |
'\u0041'..'\u005a' |
'\u005f' |
'\u0061'..'\u007a' |
'\u00c0'..'\u00d6' |
'\u00d8'..'\u00f6' |
'\u00f8'..'\u00ff' |
'\u0100'..'\u1fff' |
'\u3040'..'\u318f' |
'\u3300'..'\u337f' |
'\u3400'..'\u3d2d' |
'\u4e00'..'\u9fff' |
'\uf900'..'\ufaff'
;
NullLiteral
: 'null'
;
// §3.11 Separators
LPAREN : '(';
RPAREN : ')';
LBRACE : '{';
RBRACE : '}';
LBRACK : '[';
RBRACK : ']';
SEMI : ';';
COMMA : ',';
DOT : '.';
// §3.12 Operators
ASSIGN : '=';
GT : '>';
LT : '<';
BANG : '!';
TILDE : '~';
QUESTION : '?';
COLON : ':';
EQUAL : '==';
LE : '<=';
GE : '>=';
NOTEQUAL : '!=';
AND : '&&';
OR : '||';
INC : '++';
DEC : '--';
ADD : '+';
SUB : '-';
MUL : '*';
DIV : '/';
BITAND : '&';
BITOR : '|';
CARET : '^';
MOD : '%';
ADD_ASSIGN : '+=';
SUB_ASSIGN : '-=';
MUL_ASSIGN : '*=';
DIV_ASSIGN : '/=';
AND_ASSIGN : '&=';
OR_ASSIGN : '|=';
XOR_ASSIGN : '^=';
MOD_ASSIGN : '%=';
LSHIFT_ASSIGN : '<<=';
RSHIFT_ASSIGN : '>>=';
URSHIFT_ASSIGN : '>>>=';
// §3.8 Identifiers (must appear after all keywords in the grammar)
Identifier
: JavaLetter JavaLetterOrDigit*
;
fragment
JavaIDDigit
: '\u0030'..'\u0039' |
'\u0660'..'\u0669' |
'\u06f0'..'\u06f9' |
'\u0966'..'\u096f' |
'\u09e6'..'\u09ef' |
'\u0a66'..'\u0a6f' |
'\u0ae6'..'\u0aef' |
'\u0b66'..'\u0b6f' |
'\u0be7'..'\u0bef' |
'\u0c66'..'\u0c6f' |
'\u0ce6'..'\u0cef' |
'\u0d66'..'\u0d6f' |
'\u0e50'..'\u0e59' |
'\u0ed0'..'\u0ed9' |
'\u1040'..'\u1049'
;
JavaLetter
: [a-zA-Z$_] // these are the "java letters" below 0xFF
| // covers all characters above 0xFF which are not a surrogate
~[\u0000-\u00FF\uD800-\uDBFF]
{IsJavaIdentifierCharacter((char)_input.La(-1), true)}?
//| // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
// [\uD800-\uDBFF] [\uDC00-\uDFFF]
// {Character.isJavaIdentifierStart(Character.toCodePoint((char)_input.La(-2), (char)_input.La(-1)))}?
;
WS : (' '|'\r'|'\t'|'\u000C'|'\n')+ -> channel(HIDDEN)
fragment
JavaLetterOrDigit
: [a-zA-Z0-9$_] // these are the "java letters or digits" below 0xFF
| // covers all characters above 0xFF which are not a surrogate
~[\u0000-\u00FF\uD800-\uDBFF]
{IsJavaIdentifierCharacter((char)_input.La(-1), false)}?
//| // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
// [\uD800-\uDBFF] [\uDC00-\uDFFF]
// {Character.isJavaIdentifierPart(Character.toCodePoint((char)_input.La(-2), (char)_input.La(-1)))}?
;
//
// Additional symbols not defined in the lexical specification
//
AT : '@';
ELLIPSIS : '...';
//
// Whitespace and comments
//
WS : [ \t\r\n\u000C]+ -> skip
;
COMMENT
: '/*' .*? '*/' -> channel(HIDDEN)
: '/*' .*? '*/' -> skip
;
LINE_COMMENT
: '//' ~('\n'|'\r')* '\r'? '\n' -> channel(HIDDEN)
: '//' ~[\r\n]* -> skip
;

View File

@ -168,14 +168,48 @@
grammar Java;
@lexer::members {
protected const int EOF = Eof;
protected const int HIDDEN = Hidden;
protected bool enumIsKeyword = true;
protected bool assertIsKeyword = true;
private static bool IsJavaIdentifierCharacter(char c, bool start)
{
switch (char.GetUnicodeCategory(c))
{
case System.Globalization.UnicodeCategory.UppercaseLetter:
case System.Globalization.UnicodeCategory.LowercaseLetter:
case System.Globalization.UnicodeCategory.TitlecaseLetter:
case System.Globalization.UnicodeCategory.ModifierLetter:
case System.Globalization.UnicodeCategory.OtherLetter:
// isLetter returns true
return true;
case System.Globalization.UnicodeCategory.LetterNumber:
// getType returns LETTER_NUMBER
return true;
case System.Globalization.UnicodeCategory.CurrencySymbol:
// a currency symbol (such as "$")
return true;
case System.Globalization.UnicodeCategory.ConnectorPunctuation:
// a connecting punctuation character (such as "_")
return true;
case System.Globalization.UnicodeCategory.DecimalDigitNumber:
// it is a digit
return !start;
case System.Globalization.UnicodeCategory.SpacingCombiningMark:
// it is a combining mark
return !start;
case System.Globalization.UnicodeCategory.NonSpacingMark:
// it is a non-spacing mark
return !start;
default:
return false;
}
}
@parser::members {
protected const int EOF = Eof;
}
// starting point for parsing a java file
@ -213,14 +247,15 @@ classOrInterfaceModifiers
;
classOrInterfaceModifier
: annotation // class or interface
| 'public' // class or interface
| 'protected' // class or interface
| 'private' // class or interface
| 'abstract' // class or interface
| 'static' // class or interface
| 'final' // class only -- does not apply to interfaces
| 'strictfp' // class or interface
: annotation // class or interface
| ( 'public' // class or interface
| 'protected' // class or interface
| 'private' // class or interface
| 'abstract' // class or interface
| 'static' // class or interface
| 'final' // class only -- does not apply to interfaces
| 'strictfp' // class or interface
)
;
modifiers
@ -417,17 +452,18 @@ arrayInitializer
modifier
: annotation
| 'public'
| 'protected'
| 'private'
| 'static'
| 'abstract'
| 'final'
| 'native'
| 'synchronized'
| 'transient'
| 'volatile'
| 'strictfp'
| ( 'public'
| 'protected'
| 'private'
| 'static'
| 'abstract'
| 'final'
| 'native'
| 'synchronized'
| 'transient'
| 'volatile'
| 'strictfp'
)
;
packageOrTypeName
@ -498,39 +534,22 @@ methodBody
;
constructorBody
: '{' explicitConstructorInvocation? blockStatement* '}'
: block
;
explicitConstructorInvocation
: nonWildcardTypeArguments? ('this' | 'super') arguments ';'
| primary '.' nonWildcardTypeArguments? 'super' arguments ';'
;
qualifiedName
: Identifier ('.' Identifier)*
;
literal
: integerLiteral
: IntegerLiteral
| FloatingPointLiteral
| CharacterLiteral
| StringLiteral
| booleanLiteral
| BooleanLiteral
| 'null'
;
integerLiteral
: HexLiteral
| OctalLiteral
| DecimalLiteral
;
booleanLiteral
: 'true'
| 'false'
;
// ANNOTATIONS
annotations
@ -573,6 +592,7 @@ annotationTypeBody
annotationTypeElementDeclaration
: modifiers annotationTypeElementRest
| ';' // this is not allowed by the grammar, but apparently allowed by the actual compiler
;
annotationTypeElementRest
@ -625,18 +645,14 @@ variableModifiers
;
statement
//@leftfactor{catches}
: block
: block
| ASSERT expression (':' expression)? ';'
| 'if' parExpression statement ('else' statement)?
| 'for' '(' forControl ')' statement
| 'while' parExpression statement
| 'do' statement 'while' parExpression ';'
| 'try' block
( catches 'finally' block
| catches
| 'finally' block
)
| 'try' block (catches finallyBlock? | finallyBlock)
| 'try' resourceSpecification block catches? finallyBlock?
| 'switch' parExpression '{' switchBlockStatementGroups '}'
| 'synchronized' parExpression block
| 'return' expression? ';'
@ -647,15 +663,35 @@ statement
| statementExpression ';'
| Identifier ':' statement
;
catches
: catchClause (catchClause)*
: catchClause+
;
catchClause
: 'catch' '(' formalParameter ')' block
: 'catch' '(' variableModifiers catchType Identifier ')' block
;
catchType
: qualifiedName ('|' qualifiedName)*
;
finallyBlock
: 'finally' block
;
resourceSpecification
: '(' resources ';'? ')'
;
resources
: resource (';' resource)*
;
resource
: variableModifiers classOrInterfaceType variableDeclaratorId '=' expression
;
formalParameter
: variableModifiers type variableDeclaratorId
;
@ -728,27 +764,13 @@ assignmentOperator
| '|='
| '^='
| '%='
| t1='<' t2='<' t3='='
// { $t1.getLine() == $t2.getLine() &&
// $t1.getCharPositionInLine() + 1 == $t2.getCharPositionInLine() &&
// $t2.getLine() == $t3.getLine() &&
// $t2.getCharPositionInLine() + 1 == $t3.getCharPositionInLine() }?
| t1='>' t2='>' t3='>' t4='='
// { $t1.getLine() == $t2.getLine() &&
// $t1.getCharPositionInLine() + 1 == $t2.getCharPositionInLine() &&
// $t2.getLine() == $t3.getLine() &&
// $t2.getCharPositionInLine() + 1 == $t3.getCharPositionInLine() &&
// $t3.getLine() == $t4.getLine() &&
// $t3.getCharPositionInLine() + 1 == $t4.getCharPositionInLine() }?
| t1='>' t2='>' t3='='
// { $t1.getLine() == $t2.getLine() &&
// $t1.getCharPositionInLine() + 1 == $t2.getCharPositionInLine() &&
// $t2.getLine() == $t3.getLine() &&
// $t2.getCharPositionInLine() + 1 == $t3.getCharPositionInLine() }?
| '<<='
| '>>='
| '>>>='
;
conditionalExpression
: conditionalOrExpression ( '?' conditionalExpression ':' conditionalExpression )?
: conditionalOrExpression ( '?' expression ':' conditionalExpression )?
;
conditionalOrExpression
@ -784,14 +806,10 @@ relationalExpression
;
relationalOp
: t1='<' t2='='
// { $t1.getLine() == $t2.getLine() &&
// $t1.getCharPositionInLine() + 1 == $t2.getCharPositionInLine() }?
| t1='>' t2='='
// { $t1.getLine() == $t2.getLine() &&
// $t1.getCharPositionInLine() + 1 == $t2.getCharPositionInLine() }?
| '<'
| '>'
: '<='
| '>='
| '<'
| '>'
;
shiftExpression
@ -843,10 +861,11 @@ castExpression
primary
: parExpression
| 'this' ('.' Identifier)* identifierSuffix?
| 'this' arguments?
| 'super' superSuffix
| literal
| 'new' creator
| nonWildcardTypeArguments (explicitGenericInvocationSuffix | 'this' arguments)
| Identifier ('.' Identifier)* identifierSuffix?
| primitiveType ('[' ']')* '.' 'class'
| 'void' '.' 'class'
@ -854,13 +873,13 @@ primary
identifierSuffix
: ('[' ']')+ '.' 'class'
| ('[' expression ']')+ // can also be matched by selector, but do here
| '[' expression ']'
| arguments
| '.' 'class'
| '.' explicitGenericInvocation
| '.' 'this'
| '.' 'super' arguments
| '.' 'new' innerCreator
| '.' 'new' nonWildcardTypeArguments? innerCreator
;
creator
@ -869,12 +888,12 @@ creator
;
createdName
: classOrInterfaceType
| primitiveType
: Identifier typeArgumentsOrDiamond? ('.' Identifier typeArgumentsOrDiamond?)*
| primitiveType
;
innerCreator
: nonWildcardTypeArguments? Identifier classCreatorRest
: Identifier nonWildcardTypeArgumentsOrDiamond? classCreatorRest
;
arrayCreatorRest
@ -889,18 +908,29 @@ classCreatorRest
;
explicitGenericInvocation
: nonWildcardTypeArguments Identifier arguments
: nonWildcardTypeArguments explicitGenericInvocationSuffix
;
nonWildcardTypeArguments
: '<' typeList '>'
;
typeArgumentsOrDiamond
: '<' '>'
| typeArguments
;
nonWildcardTypeArgumentsOrDiamond
: '<' '>'
| nonWildcardTypeArguments
;
selector
: '.' Identifier arguments?
| '.' explicitGenericInvocation
| '.' 'this'
| '.' 'super' superSuffix
| '.' 'new' innerCreator
| '.' 'new' nonWildcardTypeArguments? innerCreator
| '[' expression ']'
;
@ -909,128 +939,439 @@ superSuffix
| '.' Identifier arguments?
;
explicitGenericInvocationSuffix
: 'super' superSuffix
| Identifier arguments
;
arguments
: '(' expressionList? ')'
;
// LEXER
HexLiteral : '0' ('x'|'X') HexDigit+ IntegerTypeSuffix? ;
// §3.9 Keywords
DecimalLiteral : ('0' | '1'..'9' '0'..'9'*) IntegerTypeSuffix? ;
ABSTRACT : 'abstract';
ASSERT : 'assert';
BOOLEAN : 'boolean';
BREAK : 'break';
BYTE : 'byte';
CASE : 'case';
CATCH : 'catch';
CHAR : 'char';
CLASS : 'class';
CONST : 'const';
CONTINUE : 'continue';
DEFAULT : 'default';
DO : 'do';
DOUBLE : 'double';
ELSE : 'else';
ENUM : 'enum';
EXTENDS : 'extends';
FINAL : 'final';
FINALLY : 'finally';
FLOAT : 'float';
FOR : 'for';
IF : 'if';
GOTO : 'goto';
IMPLEMENTS : 'implements';
IMPORT : 'import';
INSTANCEOF : 'instanceof';
INT : 'int';
INTERFACE : 'interface';
LONG : 'long';
NATIVE : 'native';
NEW : 'new';
PACKAGE : 'package';
PRIVATE : 'private';
PROTECTED : 'protected';
PUBLIC : 'public';
RETURN : 'return';
SHORT : 'short';
STATIC : 'static';
STRICTFP : 'strictfp';
SUPER : 'super';
SWITCH : 'switch';
SYNCHRONIZED : 'synchronized';
THIS : 'this';
THROW : 'throw';
THROWS : 'throws';
TRANSIENT : 'transient';
TRY : 'try';
VOID : 'void';
VOLATILE : 'volatile';
WHILE : 'while';
OctalLiteral : '0' ('0'..'7')+ IntegerTypeSuffix? ;
// §3.10.1 Integer Literals
IntegerLiteral
: DecimalIntegerLiteral
| HexIntegerLiteral
| OctalIntegerLiteral
| BinaryIntegerLiteral
;
fragment
HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ;
DecimalIntegerLiteral
: DecimalNumeral IntegerTypeSuffix?
;
fragment
IntegerTypeSuffix : ('l'|'L') ;
HexIntegerLiteral
: HexNumeral IntegerTypeSuffix?
;
fragment
OctalIntegerLiteral
: OctalNumeral IntegerTypeSuffix?
;
fragment
BinaryIntegerLiteral
: BinaryNumeral IntegerTypeSuffix?
;
fragment
IntegerTypeSuffix
: [lL]
;
fragment
DecimalNumeral
: '0'
| NonZeroDigit (Digits? | Underscores Digits)
;
fragment
Digits
: Digit (DigitsAndUnderscores? Digit)?
;
fragment
Digit
: '0'
| NonZeroDigit
;
fragment
NonZeroDigit
: [1-9]
;
fragment
DigitsAndUnderscores
: DigitOrUnderscore+
;
fragment
DigitOrUnderscore
: Digit
| '_'
;
fragment
Underscores
: '_'+
;
fragment
HexNumeral
: '0' [xX] HexDigits
;
fragment
HexDigits
: HexDigit (HexDigitsAndUnderscores? HexDigit)?
;
fragment
HexDigit
: [0-9a-fA-F]
;
fragment
HexDigitsAndUnderscores
: HexDigitOrUnderscore+
;
fragment
HexDigitOrUnderscore
: HexDigit
| '_'
;
fragment
OctalNumeral
: '0' Underscores? OctalDigits
;
fragment
OctalDigits
: OctalDigit (OctalDigitsAndUnderscores? OctalDigit)?
;
fragment
OctalDigit
: [0-7]
;
fragment
OctalDigitsAndUnderscores
: OctalDigitOrUnderscore+
;
fragment
OctalDigitOrUnderscore
: OctalDigit
| '_'
;
fragment
BinaryNumeral
: '0' [bB] BinaryDigits
;
fragment
BinaryDigits
: BinaryDigit (BinaryDigitsAndUnderscores? BinaryDigit)?
;
fragment
BinaryDigit
: [01]
;
fragment
BinaryDigitsAndUnderscores
: BinaryDigitOrUnderscore+
;
fragment
BinaryDigitOrUnderscore
: BinaryDigit
| '_'
;
// §3.10.2 Floating-Point Literals
FloatingPointLiteral
: ('0'..'9')+ '.' ('0'..'9')* Exponent? FloatTypeSuffix?
| '.' ('0'..'9')+ Exponent? FloatTypeSuffix?
| ('0'..'9')+ Exponent FloatTypeSuffix?
| ('0'..'9')+ FloatTypeSuffix
| '0' ('x'|'X')
( HexDigit+ ('.' HexDigit*)? HexExponent FloatTypeSuffix?
| '.' HexDigit+ HexExponent FloatTypeSuffix?
)
;
: DecimalFloatingPointLiteral
| HexadecimalFloatingPointLiteral
;
fragment
Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ;
DecimalFloatingPointLiteral
: Digits '.' Digits? ExponentPart? FloatTypeSuffix?
| '.' Digits ExponentPart? FloatTypeSuffix?
| Digits ExponentPart FloatTypeSuffix?
| Digits FloatTypeSuffix
;
fragment
HexExponent : ('p'|'P') ('+'|'-')? ('0'..'9')+ ;
ExponentPart
: ExponentIndicator SignedInteger
;
fragment
FloatTypeSuffix : ('f'|'F'|'d'|'D') ;
ExponentIndicator
: [eE]
;
fragment
SignedInteger
: Sign? Digits
;
fragment
Sign
: [+-]
;
fragment
FloatTypeSuffix
: [fFdD]
;
fragment
HexadecimalFloatingPointLiteral
: HexSignificand BinaryExponent FloatTypeSuffix?
;
fragment
HexSignificand
: HexNumeral '.'?
| '0' [xX] HexDigits? '.' HexDigits
;
fragment
BinaryExponent
: BinaryExponentIndicator SignedInteger
;
fragment
BinaryExponentIndicator
: [pP]
;
// §3.10.3 Boolean Literals
BooleanLiteral
: 'true'
| 'false'
;
// §3.10.4 Character Literals
CharacterLiteral
: '\'' ( EscapeSequence | ~('\''|'\\') ) '\''
;
: '\'' SingleCharacter '\''
| '\'' EscapeSequence '\''
;
fragment
SingleCharacter
: ~['\\]
;
// §3.10.5 String Literals
StringLiteral
: '"' ( EscapeSequence | ~('\\'|'"') )* '"'
;
: '"' StringCharacters? '"'
;
fragment
StringCharacters
: StringCharacter+
;
fragment
StringCharacter
: ~["\\]
| EscapeSequence
;
// §3.10.6 Escape Sequences for Character and String Literals
fragment
EscapeSequence
: '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
| UnicodeEscape
| OctalEscape
;
: '\\' [btnfr"'\\]
| OctalEscape
;
fragment
OctalEscape
: '\\' ('0'..'3') ('0'..'7') ('0'..'7')
| '\\' ('0'..'7') ('0'..'7')
| '\\' ('0'..'7')
;
: '\\' OctalDigit
| '\\' OctalDigit OctalDigit
| '\\' ZeroToThree OctalDigit OctalDigit
;
fragment
UnicodeEscape
: '\\' 'u' HexDigit HexDigit HexDigit HexDigit
;
ZeroToThree
: [0-3]
;
ENUM: 'enum' {enumIsKeyword}?
;
ASSERT
: 'assert' {assertIsKeyword}?
;
Identifier
: Letter (Letter|JavaIDDigit)*
;
// §3.10.7 The Null Literal
/**I found this char range in JavaCC's grammar, but Letter and Digit overlap.
Still works, but...
*/
fragment
Letter
: '\u0024' |
'\u0041'..'\u005a' |
'\u005f' |
'\u0061'..'\u007a' |
'\u00c0'..'\u00d6' |
'\u00d8'..'\u00f6' |
'\u00f8'..'\u00ff' |
'\u0100'..'\u1fff' |
'\u3040'..'\u318f' |
'\u3300'..'\u337f' |
'\u3400'..'\u3d2d' |
'\u4e00'..'\u9fff' |
'\uf900'..'\ufaff'
;
NullLiteral
: 'null'
;
// §3.11 Separators
LPAREN : '(';
RPAREN : ')';
LBRACE : '{';
RBRACE : '}';
LBRACK : '[';
RBRACK : ']';
SEMI : ';';
COMMA : ',';
DOT : '.';
// §3.12 Operators
ASSIGN : '=';
GT : '>';
LT : '<';
BANG : '!';
TILDE : '~';
QUESTION : '?';
COLON : ':';
EQUAL : '==';
LE : '<=';
GE : '>=';
NOTEQUAL : '!=';
AND : '&&';
OR : '||';
INC : '++';
DEC : '--';
ADD : '+';
SUB : '-';
MUL : '*';
DIV : '/';
BITAND : '&';
BITOR : '|';
CARET : '^';
MOD : '%';
ADD_ASSIGN : '+=';
SUB_ASSIGN : '-=';
MUL_ASSIGN : '*=';
DIV_ASSIGN : '/=';
AND_ASSIGN : '&=';
OR_ASSIGN : '|=';
XOR_ASSIGN : '^=';
MOD_ASSIGN : '%=';
LSHIFT_ASSIGN : '<<=';
RSHIFT_ASSIGN : '>>=';
URSHIFT_ASSIGN : '>>>=';
// §3.8 Identifiers (must appear after all keywords in the grammar)
Identifier
: JavaLetter JavaLetterOrDigit*
;
fragment
JavaIDDigit
: '\u0030'..'\u0039' |
'\u0660'..'\u0669' |
'\u06f0'..'\u06f9' |
'\u0966'..'\u096f' |
'\u09e6'..'\u09ef' |
'\u0a66'..'\u0a6f' |
'\u0ae6'..'\u0aef' |
'\u0b66'..'\u0b6f' |
'\u0be7'..'\u0bef' |
'\u0c66'..'\u0c6f' |
'\u0ce6'..'\u0cef' |
'\u0d66'..'\u0d6f' |
'\u0e50'..'\u0e59' |
'\u0ed0'..'\u0ed9' |
'\u1040'..'\u1049'
;
JavaLetter
: [a-zA-Z$_] // these are the "java letters" below 0xFF
| // covers all characters above 0xFF which are not a surrogate
~[\u0000-\u00FF\uD800-\uDBFF]
{IsJavaIdentifierCharacter((char)_input.La(-1), true)}?
//| // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
// [\uD800-\uDBFF] [\uDC00-\uDFFF]
// {Character.isJavaIdentifierStart(Character.toCodePoint((char)_input.La(-2), (char)_input.La(-1)))}?
;
WS : (' '|'\r'|'\t'|'\u000C'|'\n')+ -> channel(HIDDEN)
fragment
JavaLetterOrDigit
: [a-zA-Z0-9$_] // these are the "java letters or digits" below 0xFF
| // covers all characters above 0xFF which are not a surrogate
~[\u0000-\u00FF\uD800-\uDBFF]
{IsJavaIdentifierCharacter((char)_input.La(-1), false)}?
//| // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
// [\uD800-\uDBFF] [\uDC00-\uDFFF]
// {Character.isJavaIdentifierPart(Character.toCodePoint((char)_input.La(-2), (char)_input.La(-1)))}?
;
//
// Additional symbols not defined in the lexical specification
//
AT : '@';
ELLIPSIS : '...';
//
// Whitespace and comments
//
WS : [ \t\r\n\u000C]+ -> skip
;
COMMENT
: '/*' .*? '*/' -> channel(HIDDEN)
: '/*' .*? '*/' -> skip
;
LINE_COMMENT
: '//' ~('\n'|'\r')* '\r'? '\n' -> channel(HIDDEN)
: '//' ~[\r\n]* -> skip
;

View File

@ -0,0 +1,272 @@
namespace Antlr4.Runtime.Test
{
using System;
using System.Collections.Generic;
using System.Diagnostics;
using Antlr4.Runtime.Misc;
public class JavaUnicodeInputStream : ICharStream
{
[NotNull]
private readonly ICharStream _source;
private readonly List<int> _escapeIndexes = new List<int>();
private readonly List<int> _escapeCharacters = new List<int>();
private readonly List<int> _escapeIndirectionLevels = new List<int>();
private int _escapeListIndex;
private int _range;
private int _slashCount;
private int _la1;
public JavaUnicodeInputStream([NotNull] ICharStream source)
{
if (source == null)
throw new ArgumentNullException("source");
this._source = source;
this._la1 = source.La(1);
}
public int Size
{
get
{
return _source.Size;
}
}
public int Index
{
get
{
return _source.Index;
}
}
public string SourceName
{
get
{
return _source.SourceName;
}
}
public String GetText(Interval interval)
{
return _source.GetText(interval);
}
public void Consume()
{
if (_la1 != '\\')
{
_source.Consume();
_la1 = _source.La(1);
_range = Math.Max(_range, _source.Index);
_slashCount = 0;
return;
}
// make sure the next character has been processed
this.La(1);
if (_escapeListIndex >= _escapeIndexes.Count || _escapeIndexes[_escapeListIndex] != Index)
{
_source.Consume();
_slashCount++;
}
else
{
int indirectionLevel = _escapeIndirectionLevels[_escapeListIndex];
for (int i = 0; i < 6 + indirectionLevel; i++)
{
_source.Consume();
}
_escapeListIndex++;
_slashCount = 0;
}
_la1 = _source.La(1);
Debug.Assert(_range >= Index);
}
public int La(int i)
{
if (i == 1 && _la1 != '\\')
{
return _la1;
}
if (i <= 0)
{
int desiredIndex = Index + i;
for (int j = _escapeListIndex - 1; j >= 0; j--)
{
if (_escapeIndexes[j] + 6 + _escapeIndirectionLevels[j] > desiredIndex)
{
desiredIndex -= 5 + _escapeIndirectionLevels[j];
}
if (_escapeIndexes[j] == desiredIndex)
{
return _escapeCharacters[j];
}
}
return _source.La(desiredIndex - Index);
}
else
{
int desiredIndex = Index + i - 1;
for (int j = _escapeListIndex; j < _escapeIndexes.Count; j++)
{
if (_escapeIndexes[j] == desiredIndex)
{
return _escapeCharacters[j];
}
else if (_escapeIndexes[j] < desiredIndex)
{
desiredIndex += 5 + _escapeIndirectionLevels[j];
}
else
{
return _source.La(desiredIndex - Index + 1);
}
}
int currentIndex = Index;
int slashCount = _slashCount;
int indirectionLevel = 0;
for (int j = 0; j < i; j++)
{
int previousIndex = currentIndex;
int c = ReadCharAt(ref currentIndex, ref slashCount, ref indirectionLevel);
if (currentIndex > _range)
{
if (currentIndex - previousIndex > 1)
{
_escapeIndexes.Add(previousIndex);
_escapeCharacters.Add(c);
_escapeIndirectionLevels.Add(indirectionLevel);
}
_range = currentIndex;
}
if (j == i - 1)
{
return c;
}
}
throw new InvalidOperationException("shouldn't be reachable");
}
}
public int Mark()
{
return _source.Mark();
}
public void Release(int marker)
{
_source.Release(marker);
}
public void Seek(int index)
{
if (index > _range)
{
throw new NotSupportedException();
}
_source.Seek(index);
_la1 = _source.La(1);
_slashCount = 0;
while (_source.La(-_slashCount - 1) == '\\')
{
_slashCount++;
}
_escapeListIndex = _escapeIndexes.BinarySearch(_source.Index);
if (_escapeListIndex < 0)
{
_escapeListIndex = -_escapeListIndex - 1;
}
}
private static bool IsHexDigit(int c)
{
return c >= '0' && c <= '9'
|| c >= 'a' && c <= 'f'
|| c >= 'A' && c <= 'F';
}
private static int HexValue(int c)
{
if (c >= '0' && c <= '9')
{
return c - '0';
}
if (c >= 'a' && c <= 'f')
{
return c - 'a' + 10;
}
if (c >= 'A' && c <= 'F')
{
return c - 'A' + 10;
}
throw new ArgumentException("c");
}
private int ReadCharAt(ref int nextIndex, ref int slashCount, ref int indirectionLevel)
{
bool blockUnicodeEscape = (slashCount % 2) != 0;
int c0 = _source.La(nextIndex - Index + 1);
if (c0 == '\\')
{
slashCount++;
if (!blockUnicodeEscape)
{
int c1 = _source.La(nextIndex - Index + 2);
if (c1 == 'u')
{
int c2 = _source.La(nextIndex - Index + 3);
indirectionLevel = 0;
while (c2 == 'u')
{
indirectionLevel++;
c2 = _source.La(nextIndex - Index + 3 + indirectionLevel);
}
int c3 = _source.La(nextIndex - Index + 4 + indirectionLevel);
int c4 = _source.La(nextIndex - Index + 5 + indirectionLevel);
int c5 = _source.La(nextIndex - Index + 6 + indirectionLevel);
if (IsHexDigit(c2) && IsHexDigit(c3) && IsHexDigit(c4) && IsHexDigit(c5))
{
int value = HexValue(c2);
value = (value << 4) + HexValue(c3);
value = (value << 4) + HexValue(c4);
value = (value << 4) + HexValue(c5);
nextIndex += 6 + indirectionLevel;
slashCount = 0;
return value;
}
}
}
}
nextIndex++;
return c0;
}
}
}

View File

@ -18,7 +18,9 @@
using File = System.IO.File;
using FileInfo = System.IO.FileInfo;
using Interlocked = System.Threading.Interlocked;
using IOException = System.IO.IOException;
using Path = System.IO.Path;
using SearchOption = System.IO.SearchOption;
using Stopwatch = System.Diagnostics.Stopwatch;
using Stream = System.IO.Stream;
using StreamReader = System.IO.StreamReader;
@ -44,6 +46,18 @@
* {@link #TOP_PACKAGE}.
*/
private static readonly bool RECURSIVE = true;
/**
* {@code true} to read all source files from disk into memory before
* starting the parse. The default value is {@code true} to help prevent
* drive speed from affecting the performance results. This value may be set
* to {@code false} to support parsing large input sets which would not
* otherwise fit into memory.
*/
private static readonly bool PRELOAD_SOURCES = true;
/**
* The encoding to use when reading source files.
*/
private static readonly Encoding ENCODING = Encoding.UTF8;
/**
* {@code true} to use the Java grammar with expressions in the v4
@ -95,7 +109,7 @@
* {@code true} to use {@link BailErrorStrategy}, {@code false} to use
* {@link DefaultErrorStrategy}.
*/
private static readonly bool BAIL_ON_ERROR = true;
private static readonly bool BAIL_ON_ERROR = false;
/**
* {@code true} to compute a checksum for verifying consistency across
* optimizations and multiple passes.
@ -230,7 +244,7 @@
DirectoryInfo directory = new DirectoryInfo(jdkSourceRoot);
Assert.IsTrue(directory.Exists);
IEnumerable<ICharStream> sources = loadSources(directory, "*.java", RECURSIVE);
IEnumerable<InputDescriptor> sources = LoadSources(directory, "*.java", RECURSIVE);
Console.Out.Write(getOptionsDescription(TOP_PACKAGE));
@ -327,7 +341,7 @@
* This method is separate from {@link #parse2} so the first pass can be distinguished when analyzing
* profiler results.
*/
protected void parse1(ParserFactory factory, IEnumerable<ICharStream> sources)
protected void parse1(ParserFactory factory, IEnumerable<InputDescriptor> sources)
{
GC.Collect();
parseSources(factory, sources);
@ -337,49 +351,33 @@
* This method is separate from {@link #parse1} so the first pass can be distinguished when analyzing
* profiler results.
*/
protected void parse2(ParserFactory factory, IEnumerable<ICharStream> sources)
protected void parse2(ParserFactory factory, IEnumerable<InputDescriptor> sources)
{
GC.Collect();
parseSources(factory, sources);
}
protected IEnumerable<ICharStream> loadSources(DirectoryInfo directory, string filter, bool recursive)
protected IList<InputDescriptor> LoadSources(DirectoryInfo directory, string filter, bool recursive)
{
return loadSources(directory, filter, null, recursive);
}
protected IEnumerable<ICharStream> loadSources(DirectoryInfo directory, string filter, Encoding encoding, bool recursive)
{
ICollection<ICharStream> result = new List<ICharStream>();
loadSources(directory, filter, encoding, recursive, result);
IList<InputDescriptor> result = new List<InputDescriptor>();
LoadSources(directory, filter, recursive, result);
return result;
}
protected void loadSources(DirectoryInfo directory, string filter, Encoding encoding, bool recursive, ICollection<ICharStream> result)
protected void LoadSources(DirectoryInfo directory, string filter, bool recursive, ICollection<InputDescriptor> result)
{
Debug.Assert(directory.Exists);
FileInfo[] sources = directory.GetFiles(filter);
FileInfo[] sources = directory.GetFiles(filter, recursive ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly);
foreach (FileInfo file in sources)
{
var stream = new StreamReader(File.OpenRead(file.FullName), encoding);
ICharStream input = new AntlrInputStream(stream);
result.Add(input);
}
if (recursive)
{
DirectoryInfo[] children = directory.GetDirectories();
foreach (DirectoryInfo child in children)
{
loadSources(child, filter, encoding, true, result);
}
result.Add(new InputDescriptor(file.FullName));
}
}
int configOutputSize = 0;
protected void parseSources(ParserFactory factory, IEnumerable<ICharStream> sources)
protected void parseSources(ParserFactory factory, IEnumerable<InputDescriptor> sources)
{
Stopwatch startTime = Stopwatch.StartNew();
Thread.VolatileWrite(ref tokenCount, 0);
@ -397,8 +395,9 @@
#else
ICollection<Func<int>> results = new List<Func<int>>();
#endif
foreach (ICharStream input in sources)
foreach (InputDescriptor inputDescriptor in sources)
{
ICharStream input = inputDescriptor.GetInputStream();
sourceCount++;
input.Seek(0);
inputSize += input.Size;
@ -1094,5 +1093,107 @@
updateChecksum(checksum, ctx.Stop);
}
}
protected sealed class InputDescriptor
{
private readonly string source;
private WeakReference<CloneableAntlrFileStream> inputStream;
private CloneableAntlrFileStream strongInputStream;
public InputDescriptor([NotNull] String source)
{
this.source = source;
if (PRELOAD_SOURCES)
{
GetInputStream();
}
}
[return: NotNull]
public ICharStream GetInputStream()
{
CloneableAntlrFileStream stream;
if (!TryGetTarget(out stream))
{
stream = new CloneableAntlrFileStream(source, ENCODING);
SetTarget(stream);
}
return new JavaUnicodeInputStream(stream.CreateCopy());
}
private void SetTarget(CloneableAntlrFileStream stream)
{
if (PRELOAD_SOURCES)
{
strongInputStream = stream;
}
else
{
inputStream = new WeakReference<CloneableAntlrFileStream>(stream);
}
}
private bool TryGetTarget(out CloneableAntlrFileStream stream)
{
if (PRELOAD_SOURCES)
{
stream = strongInputStream;
return strongInputStream != null;
}
else
{
if (inputStream == null)
{
stream = null;
return false;
}
return inputStream.TryGetTarget(out stream);
}
}
}
#if PORTABLE
protected class CloneableAntlrFileStream : AntlrInputStream
#else
protected class CloneableAntlrFileStream : AntlrFileStream
#endif
{
public CloneableAntlrFileStream(String fileName, Encoding encoding)
#if PORTABLE
: base(File.ReadAllText(fileName, encoding))
#else
: base(fileName, encoding)
#endif
{
}
public AntlrInputStream CreateCopy()
{
AntlrInputStream stream = new AntlrInputStream(this.data, this.n);
stream.name = this.SourceName;
return stream;
}
}
#if !NET45
private sealed class WeakReference<T>
where T : class
{
private readonly WeakReference _reference;
public WeakReference(T reference)
{
_reference = new WeakReference(reference);
}
public bool TryGetTarget(out T reference)
{
reference = (T)_reference.Target;
return reference != null;
}
}
#endif
}
}