Merge pull request #617 from sharwell/fix-588

Fix 588
This commit is contained in:
Terence Parr 2014-06-10 15:09:34 -07:00
commit fdc46c53e6
6 changed files with 389 additions and 56 deletions

View File

@ -32,8 +32,8 @@ package org.antlr.v4.automata;
import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.atn.ATNState;
import org.antlr.v4.runtime.atn.ActionTransition;
import org.antlr.v4.runtime.atn.BlockEndState;
import org.antlr.v4.runtime.atn.EpsilonTransition;
import org.antlr.v4.runtime.atn.PlusLoopbackState;
import org.antlr.v4.runtime.atn.RuleTransition;
import org.antlr.v4.runtime.atn.StarLoopbackState;
@ -64,7 +64,7 @@ public class TailEpsilonRemover extends ATNVisitor {
// if edge out of q is single epsilon to block end
// we can strip epsilon p-x->q-eps->r
Transition trans = q.transition(0);
if (q.getNumberOfTransitions() == 1 && trans.isEpsilon() && !(trans instanceof ActionTransition)) {
if (q.getNumberOfTransitions() == 1 && trans instanceof EpsilonTransition) {
ATNState r = trans.target;
if (r instanceof BlockEndState || r instanceof PlusLoopbackState || r instanceof StarLoopbackState) {
// skip over q

View File

@ -335,6 +335,33 @@ public abstract class BaseTest {
return null;
}
protected String load(String fileName, @Nullable String encoding)
throws IOException
{
if ( fileName==null ) {
return null;
}
String fullFileName = getClass().getPackage().getName().replace('.', '/') + '/' + fileName;
int size = 65000;
InputStreamReader isr;
InputStream fis = getClass().getClassLoader().getResourceAsStream(fullFileName);
if ( encoding!=null ) {
isr = new InputStreamReader(fis, encoding);
}
else {
isr = new InputStreamReader(fis);
}
try {
char[] data = new char[size];
int n = isr.read(data);
return new String(data, 0, n);
}
finally {
isr.close();
}
}
/** Wow! much faster than compiling outside of VM. Finicky though.
* Had rules called r and modulo. Wouldn't compile til I changed to 'a'.
*/

View File

@ -0,0 +1,348 @@
grammar Psl;
@parser::members
{
public void printPosition(String name, Token tok)
{
System.out.printf("%s: pos %d, len %d%n",
name, tok.getCharPositionInLine(), tok.getText().length());
}
/**
* Checks whether a set of digit groups and commas construct
* a valid command-number.
*
* @param digits
* The groups of digits, each group in a separate item.
* @param commas
* The commas found separating the digit groups.
*
* There should be one more digit group than commas.
* There should be no internal white space.
*
* @returns true (valid), false (invalid)
*/
public boolean isValidCommaNumber(List<Token> digits, List<Token> commas)
{
Token[] aDigits = new Token[0];
Token[] aCommas = new Token[0];
int j;
aDigits = digits.toArray(aDigits);
aCommas = commas.toArray(aCommas);
if (aDigits.length != aCommas.length + 1)
{
return false;
}
for (j = 0; j < aCommas.length; ++j)
{
int p1, p2, p3;
p1 = aDigits[j].getCharPositionInLine()
+ aDigits[j].getText().length();
p2 = aCommas[j].getCharPositionInLine();
p3 = aDigits[j + 1].getCharPositionInLine();
if (p1 != p2 || (p2 + 1) != p3)
{
return false;
}
}
return true;
}
/**
* Checks whether a the pieces of a floating-point number
* construct a valid number.
*
* @param whole
* The whole part of the number. Can be null.
* @param period
* The decimal point.
* @param fraction
* The fraction part of the number. Can be null.
*
* At least one of the whole or fraction must be present.
* The decimal point is required.
*
* @returns true (valid), false (invalid)
*/
public boolean isValidFloatingConstant(
Token whole,
Token period,
Token fraction
)
{
boolean foundDigits = false;
int column;
if (whole != null)
{
foundDigits = true;
column = whole.getCharPositionInLine()
+ whole.getText().length();
if (column != period.getCharPositionInLine())
{
return false;
}
}
if (fraction != null)
{
foundDigits = true;
column = period.getCharPositionInLine() + 1;
if (column != fraction.getCharPositionInLine())
{
return false;
}
}
return foundDigits;
}
}
translation_unit
: numeric_range
EOF
;
pattern
: numeric_range
;
numeric_range
: EURO_NUMBER
PAREN_LEFT
numeric_endpoint
TILDE
numeric_endpoint
PAREN_RIGHT
| NUMBER
PAREN_LEFT
numeric_endpoint
TILDE
numeric_endpoint
PAREN_RIGHT
;
numeric_endpoint
: ( PLUS | MINUS )? integer_constant
| ( PLUS | MINUS )? floating_constant
| ( PLUS | MINUS )? comma_number
;
/* Floating-point numbers and comma numbers are valid only
* as numeric endpoints in number() or euro_number(). Otherwise,
* the pieces should be parsed as separate lexical tokens, such as
*
* integer_constant '.' integer_constant
*
* Because of parser lookahead and the subtle interactions between
* the parser and the lexer, changing lexical modes from the parser
* is not safe. The code below checks the constraints for floating
* numbers, forbidding internal white space.
*/
floating_constant
: comma_number PERIOD fraction=DIGIT_SEQUENCE?
{
isValidFloatingConstant($comma_number.stop, $PERIOD, $fraction)
}?<fail = {
"COMMA:A floating-point constant cannot have internal white space"
}>
/*| whole=DIGIT_SEQUENCE PERIOD fraction=DIGIT_SEQUENCE?
{
isValidFloatingConstant($whole, $PERIOD, $fraction)
}?/* <fail = {
"DIG:A floating-point constant cannot have internal white space"
}>*/
| PERIOD fraction=DIGIT_SEQUENCE
{
isValidFloatingConstant(null, $PERIOD, $fraction)
}?<fail = {
"DEC:A floating-point constant cannot have internal white space"
}>
;
comma_number
: digits+=DIGIT_SEQUENCE ( commas+=COMMA digits+=DIGIT_SEQUENCE )+
{
isValidCommaNumber($digits, $commas)
}?<fail = {
"A comma-number cannot have internal white space"
}>
;
term_expression
: term
| RETURN
(
PAREN_LEFT
( integer_constant | ALL )
PAREN_RIGHT
)?
term
;
term
: pattern
| PAREN_LEFT term_expression PAREN_RIGHT
;
integer_constant
: DIGIT_SEQUENCE
| INTEGER_CONSTANT
| BINARY_CONSTANT
| DECIMAL_CONSTANT
| HEXADECIMAL_CONSTANT
| OCTAL_CONSTANT
;
// LEXER
/* Letter fragments
*/
fragment A: [Aa] ;
fragment B: [BB] ;
fragment C: [Cc] ;
fragment D: [Dd] ;
fragment E: [Ee] ;
fragment F: [Ff] ;
fragment G: [Gg] ;
fragment H: [Hh] ;
fragment I: [Ii] ;
fragment J: [Jj] ;
fragment K: [Kk] ;
fragment L: [Ll] ;
fragment M: [Mm] ;
fragment N: [Nn] ;
fragment O: [Oo] ;
fragment P: [Pp] ;
fragment Q: [Qq] ;
fragment R: [Rr] ;
fragment S: [Ss] ;
fragment T: [Tt] ;
fragment U: [Uu] ;
fragment V: [Vv] ;
fragment W: [Ww] ;
fragment X: [Xx] ;
fragment Y: [Yy] ;
fragment Z: [Zz] ;
WHITESPACE_IN_LINE
: [ \t]+
-> skip
;
NEWLINE
: '\r'? '\n'
-> skip
;
WHITESPACE_ALL
: [ \n\r\t]+
-> skip
;
/* A sequence of decimal digits is useful on its own,
* to avoid the base-prefixes (0b, 0x, ...) that an
* INTEGER_CONTANT would allow.
* Need to define before INTEGER_CONSTANT to make sure
* DIGIT_SEQUENCE is recognized before INTEGER_CONSTANT.
*/
DIGIT_SEQUENCE
: [0-9]+
;
INTEGER_CONSTANT
: BINARY_CONSTANT
| DECIMAL_CONSTANT
| HEXADECIMAL_CONSTANT
| OCTAL_CONSTANT
;
BINARY_CONSTANT
: '0' [Bb] [0-1]+
;
DECIMAL_CONSTANT
: ( '0' [Dd] )? [0-9]+
;
HEXADECIMAL_CONSTANT
: '0' [HhXx] [0-9a-fA-F]+
;
OCTAL_CONSTANT
: '0' [Oo] [0-7]+
;
/* keywords
*/
ALL
: A L L
;
EURO_NUMBER
: E U R O '_' N U M B E R
;
NUMBER
: N U M B E R
;
RETURN
: R E T U R N
;
IDENTIFIER
: [A-Za-z][A-Za-z0-9_]*
;
/* The single-character tokens.
*/
COMMA
: ','
;
MINUS
: '-'
;
PAREN_LEFT
: '('
;
PAREN_RIGHT
: ')'
;
PERIOD
: '.'
;
PLUS
: '+'
;
TILDE
: '~'
;
/* This rule must be last (or nearly last) to avoid
* matching individual characters for other rules.
*/
ANY_CHAR_BUT_NEWLINE
: ~[\n\r]
;

View File

@ -657,31 +657,4 @@ public class TestLexerExec extends BaseTest {
"[@1,5:4='<EOF>',<-1>,1:5]\n";
assertEquals(expecting, found);
}
protected String load(String fileName, @Nullable String encoding)
throws IOException
{
if ( fileName==null ) {
return null;
}
String fullFileName = getClass().getPackage().getName().replace('.', '/') + '/' + fileName;
int size = 65000;
InputStreamReader isr;
InputStream fis = getClass().getClassLoader().getResourceAsStream(fullFileName);
if ( encoding!=null ) {
isr = new InputStreamReader(fis, encoding);
}
else {
isr = new InputStreamReader(fis);
}
try {
char[] data = new char[size];
int n = isr.read(data);
return new String(data, 0, n);
}
finally {
isr.close();
}
}
}

View File

@ -515,4 +515,16 @@ public class TestParserExec extends BaseTest {
"a 34 c", false);
assertEquals("a34c\n", found);
}
/**
* This is a regression test for antlr/antlr4#588 "ClassCastException during
* semantic predicate handling".
* https://github.com/antlr/antlr4/issues/588
*/
@Test public void testFailedPredicateExceptionState() throws Exception {
String grammar = load("Psl.g4", "UTF-8");
String found = execParser("Psl.g4", grammar, "PslParser", "PslLexer", "floating_constant", " . 234", false);
assertEquals("", found);
assertEquals("line 1:6 rule floating_constant DEC:A floating-point constant cannot have internal white space\n", stderrDuringParse);
}
}

View File

@ -1134,33 +1134,6 @@ public class TestPerformance extends BaseTest {
assertTrue(success);
}
protected String load(String fileName, @Nullable String encoding)
throws IOException
{
if ( fileName==null ) {
return null;
}
String fullFileName = getClass().getPackage().getName().replace('.', '/') + '/' + fileName;
int size = 65000;
InputStreamReader isr;
InputStream fis = getClass().getClassLoader().getResourceAsStream(fullFileName);
if ( encoding!=null ) {
isr = new InputStreamReader(fis, encoding);
}
else {
isr = new InputStreamReader(fis);
}
try {
char[] data = new char[size];
int n = isr.read(data);
return new String(data, 0, n);
}
finally {
isr.close();
}
}
private static void updateChecksum(Checksum checksum, int value) {
checksum.update((value) & 0xFF);
checksum.update((value >>> 8) & 0xFF);