got lexer nongreedy loops going
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9018]
This commit is contained in:
parent
a9782118ac
commit
f44c49a8b8
|
@ -50,6 +50,6 @@ public class LexerNoViableAltException extends LexerRecognitionExeption {
|
|||
}
|
||||
|
||||
public String toString() {
|
||||
return "NoViableAltException('"+(char)c+"'";
|
||||
return "NoViableAltException('"+(char)c+"')";
|
||||
}
|
||||
}
|
||||
|
|
|
@ -184,6 +184,21 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
prevAccept = c;
|
||||
prevAcceptIndex = input.index();
|
||||
}
|
||||
|
||||
// if we reach lexer accept state, toss out any configs in rest
|
||||
// of configs work list associated with this rule (config.alt);
|
||||
// that rule is done. this is how we cut off nongreedy .+ loops.
|
||||
deleteConfigsForAlt(closure, ci, c.alt);
|
||||
// int j=ci+1;
|
||||
// while ( j<closure.size() ) {
|
||||
// ATNConfig c2 = closure.get(j);
|
||||
// if ( c2.alt == c.alt ) {
|
||||
// System.out.println("kill "+c2);
|
||||
// closure.remove(j);
|
||||
// }
|
||||
// else j++;
|
||||
// }
|
||||
|
||||
// move to next char, looking for longer match
|
||||
// (we continue processing if there are states in reach)
|
||||
}
|
||||
|
@ -275,19 +290,19 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
return null;
|
||||
}
|
||||
|
||||
/* TODO: use if we need nongreedy
|
||||
public void deleteConfigsForAlt(OrderedHashSet<ATNConfig> closure, int ci, int alt) {
|
||||
int j=ci+1;
|
||||
while ( j<closure.size() ) {
|
||||
ATNConfig c = closure.get(j);
|
||||
if ( c.alt == alt ) {
|
||||
boolean isWildcard = c.state.getClass() == ATNState.class &&
|
||||
c.state.transition(0).getClass() == WildcardTransition.class;
|
||||
if ( c.alt == alt && isWildcard ) {
|
||||
System.out.println("kill "+c);
|
||||
closure.remove(j);
|
||||
}
|
||||
else j++;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
protected OrderedHashSet<ATNConfig> computeStartState(IntStream input,
|
||||
ATNState p)
|
||||
|
@ -337,31 +352,36 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
ATNState p = config.state;
|
||||
for (int i=0; i<p.getNumberOfTransitions(); i++) {
|
||||
Transition t = p.transition(i);
|
||||
ATNConfig c = null;
|
||||
if ( t.getClass() == RuleTransition.class ) {
|
||||
RuleContext newContext =
|
||||
new RuleContext(config.context, p.stateNumber, t.target.stateNumber);
|
||||
c = new ATNConfig(config, t.target, newContext);
|
||||
}
|
||||
else if ( t.getClass() == PredicateTransition.class ) {
|
||||
PredicateTransition pt = (PredicateTransition)t;
|
||||
if ( recog.sempred(null, pt.ruleIndex, pt.predIndex) ) {
|
||||
c = new ATNConfig(config, t.target);
|
||||
c.traversedPredicate = true;
|
||||
}
|
||||
}
|
||||
// ignore actions; just exec one per rule upon accept
|
||||
else if ( t.getClass() == ActionTransition.class ) {
|
||||
c = new ATNConfig(config, t.target);
|
||||
}
|
||||
// TODO: forced actions?
|
||||
else if ( t.isEpsilon() ) {
|
||||
c = new ATNConfig(config, t.target);
|
||||
}
|
||||
ATNConfig c = getEpsilonTarget(config, t);
|
||||
if ( c!=null ) closure(c, configs);
|
||||
}
|
||||
}
|
||||
|
||||
public ATNConfig getEpsilonTarget(ATNConfig config, Transition t) {
|
||||
ATNState p = config.state;
|
||||
ATNConfig c = null;
|
||||
if ( t.getClass() == RuleTransition.class ) {
|
||||
RuleContext newContext =
|
||||
new RuleContext(config.context, p.stateNumber, t.target.stateNumber);
|
||||
c = new ATNConfig(config, t.target, newContext);
|
||||
}
|
||||
else if ( t.getClass() == PredicateTransition.class ) {
|
||||
PredicateTransition pt = (PredicateTransition)t;
|
||||
if ( recog.sempred(null, pt.ruleIndex, pt.predIndex) ) {
|
||||
c = new ATNConfig(config, t.target);
|
||||
c.traversedPredicate = true;
|
||||
}
|
||||
}
|
||||
// ignore actions; just exec one per rule upon accept
|
||||
else if ( t.getClass() == ActionTransition.class ) {
|
||||
c = new ATNConfig(config, t.target);
|
||||
}
|
||||
else if ( t.isEpsilon() ) {
|
||||
c = new ATNConfig(config, t.target);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
protected void addDFAEdge(OrderedHashSet<ATNConfig> p,
|
||||
int t,
|
||||
OrderedHashSet<ATNConfig> q)
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
grammar Errors;
|
||||
|
||||
stat: 'return' INT
|
||||
| ID '=' expr ';'
|
||||
| ID '(' expr (',' expr)* ')' ';'
|
||||
/ ID .* '(' expr (',' expr)* ')' ';'
|
||||
/ ID '=' .* ';' // bad assignment
|
||||
/ .* ';' // bad stat
|
||||
/ .* // match anything else? when to stop?
|
||||
/ // match anything else?
|
||||
;
|
||||
catch[Exception e] { }
|
||||
finally { }
|
||||
|
||||
// error to match might be diff than how to resynch? maybe just
|
||||
// include resynch pattern on end of error alt.
|
||||
|
||||
/*
|
||||
Traps any recog exception in anything called from rule or matched in that rule.
|
||||
a : expr ';'
|
||||
/ '--' ID ';' // catches any problem in expr or matching ';'
|
||||
;
|
||||
|
||||
If no err alt matches, defaults to normal error mechanism at rule level.
|
||||
report. resync.
|
||||
*/
|
||||
|
||||
atom: '(' expr ')'
|
||||
| INT
|
||||
/ '(' expr // missing RP; how to resync?
|
||||
/ '(' ')'
|
||||
;
|
||||
|
||||
// do error alts affect FOLLOW sync sets? nope.
|
||||
|
||||
// foo -> bar says how to make resulting tree for bad alts
|
||||
|
||||
expr: atom ('*' atom)* ;
|
||||
|
||||
atom: INT ;
|
||||
|
||||
ID : 'a'..'z'+ ;
|
||||
|
||||
WS : (' '|'\n')* ;
|
||||
|
||||
/*
|
||||
Stop .* when it sees any viable following token, even if it uses FOLLOW. So,
|
||||
err alt
|
||||
|
||||
/ .*
|
||||
|
||||
would match until it sees something in FOLLOW (but not context-sensitive follow).
|
||||
actually maybe it would be sensitive; just use real outer context when matching
|
||||
error alts. who cares about speed.
|
||||
|
||||
*/
|
|
@ -368,8 +368,17 @@ public class ParserATNFactory implements ATNFactory {
|
|||
plusAST.atnState = blkStart;
|
||||
blkStart.loopBackState = loop;
|
||||
epsilon(blkEnd, loop); // blk can see loop back
|
||||
epsilon(loop, blkStart); // loop back to start
|
||||
epsilon(loop, end); // or exit
|
||||
|
||||
BlockAST blkAST = (BlockAST)plusAST.getChild(0);
|
||||
if ( !g.isLexer() || isGreedy(blkAST) ) {
|
||||
epsilon(loop, blkStart); // loop back to start
|
||||
epsilon(loop, end); // or exit
|
||||
}
|
||||
else { // only lexers flip entry/exit branches for nongreedy
|
||||
// if not greedy, priority to exit branch; make it first
|
||||
epsilon(loop, end); // exit
|
||||
epsilon(loop, blkStart); // loop back to start
|
||||
}
|
||||
|
||||
return new Handle(blkStart, end);
|
||||
}
|
||||
|
@ -395,8 +404,16 @@ public class ParserATNFactory implements ATNFactory {
|
|||
ATNState end = newState(ATNState.class, starAST);
|
||||
StarLoopbackState loop = (StarLoopbackState)newState(StarLoopbackState.class, starAST);
|
||||
|
||||
epsilon(entry, blkStart); // loop enter edge (alt 1)
|
||||
epsilon(entry, end); // bypass loop edge (alt 2)
|
||||
BlockAST blkAST = (BlockAST)starAST.getChild(0);
|
||||
if ( !g.isLexer() || isGreedy(blkAST) ) {
|
||||
epsilon(entry, blkStart); // loop enter edge (alt 1)
|
||||
epsilon(entry, end); // bypass loop edge (alt 2)
|
||||
}
|
||||
else { // only lexers flip entry/exit branches for nongreedy
|
||||
// if not greedy, priority to exit branch; make it first
|
||||
epsilon(entry, end); // bypass loop edge (alt 1)
|
||||
epsilon(entry, blkStart); // loop enter edge (alt 2)
|
||||
}
|
||||
epsilon(blkEnd, loop); // block end hits loop back
|
||||
epsilon(loop, entry); // loop back to entry/exit decision
|
||||
|
||||
|
@ -494,13 +511,12 @@ public class ParserATNFactory implements ATNFactory {
|
|||
public ATNState newState() { return newState(null); }
|
||||
|
||||
public boolean isGreedy(BlockAST blkAST) {
|
||||
return true;
|
||||
// boolean greedy = true;
|
||||
// String greedyOption = blkAST.getOption("greedy");
|
||||
// if ( blockHasWildcardAlt(blkAST) || greedyOption!=null&&greedyOption.equals("false") ) {
|
||||
// greedy = false;
|
||||
// }
|
||||
// return greedy;
|
||||
boolean greedy = true;
|
||||
String greedyOption = blkAST.getOption("greedy");
|
||||
if ( blockHasWildcardAlt(blkAST) || greedyOption!=null&&greedyOption.equals("false") ) {
|
||||
greedy = false;
|
||||
}
|
||||
return greedy;
|
||||
}
|
||||
|
||||
// (BLOCK (ALT .)) or (BLOCK (ALT 'a') (ALT .))
|
||||
|
|
|
@ -199,10 +199,14 @@ optionsSpec
|
|||
;
|
||||
|
||||
option
|
||||
: ^(ASSIGN ID optionValue)
|
||||
@init {
|
||||
boolean rule = inContext("RULE ...");
|
||||
boolean block = inContext("BLOCK ...");
|
||||
}
|
||||
: ^(a=ASSIGN ID optionValue)
|
||||
{
|
||||
if ( inContext("RULE") ) ruleOption($ID, $optionValue.v);
|
||||
else if ( inContext("BLOCK") ) blockOption($ID, $optionValue.v);
|
||||
if ( rule ) ruleOption($ID, $optionValue.v);
|
||||
else if ( block ) blockOption($ID, $optionValue.v);
|
||||
else grammarOption($ID, $optionValue.v);
|
||||
}
|
||||
;
|
||||
|
|
|
@ -8,6 +8,19 @@ import org.junit.*;
|
|||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Lexer rules are little quirky when it comes to wildcards. Problem
|
||||
* stems from the fact that we want the longest match to win among
|
||||
* several rules and even within a rule. However, that conflicts
|
||||
* with the notion of non-greedy, which by definition tries to match
|
||||
* the fewest possible. During ATN construction, non-greedy loops
|
||||
* have their entry and exit branches reversed so that the ATM
|
||||
* simulator will see the exit branch 1st, giving it a priority. The
|
||||
* 1st path to the stop state kills any other paths for that rule
|
||||
* that begin with the wildcard. In general, this does everything we
|
||||
* want, but occasionally there are some quirks as you'll see from
|
||||
* the tests below.
|
||||
*/
|
||||
public class TestATNLexerInterpreter extends BaseTest {
|
||||
@Test public void testLexerTwoRules() throws Exception {
|
||||
LexerGrammar lg = new LexerGrammar(
|
||||
|
@ -22,12 +35,54 @@ public class TestATNLexerInterpreter extends BaseTest {
|
|||
LexerGrammar lg = new LexerGrammar(
|
||||
"lexer grammar L;\n"+
|
||||
"A : 'xy'\n" +
|
||||
" | 'xyz'\n" +
|
||||
" | 'xyz'\n" + // make sure nongreedy mech cut off doesn't kill this alt
|
||||
" ;\n");
|
||||
checkLexerMatches(lg, "xy", "A, EOF");
|
||||
checkLexerMatches(lg, "xyz", "A, EOF");
|
||||
}
|
||||
|
||||
@Test public void testShortLongRule2() throws Exception {
|
||||
LexerGrammar lg = new LexerGrammar(
|
||||
"lexer grammar L;\n"+
|
||||
"A : 'xyz'\n" + // make sure nongreedy mech cut off doesn't kill this alt
|
||||
" | 'xy'\n" +
|
||||
" ;\n");
|
||||
checkLexerMatches(lg, "xy", "A, EOF");
|
||||
checkLexerMatches(lg, "xyz", "A, EOF");
|
||||
}
|
||||
|
||||
@Test public void testWildOnEnd() throws Exception {
|
||||
LexerGrammar lg = new LexerGrammar(
|
||||
"lexer grammar L;\n"+
|
||||
"A : 'xy' .\n" + // should not pursue '.' since xy already hit stop
|
||||
" | 'xy'\n" +
|
||||
" ;\n");
|
||||
checkLexerMatches(lg, "xy", "A, EOF");
|
||||
checkLexerMatches(lg, "xyz", "A, EOF");
|
||||
}
|
||||
|
||||
@Test public void testWildOnEndLast() throws Exception {
|
||||
LexerGrammar lg = new LexerGrammar(
|
||||
"lexer grammar L;\n"+
|
||||
"A : 'xy'\n" +
|
||||
" | 'xy' .\n" + // should not pursue '.' since xy already hit stop
|
||||
" ;\n");
|
||||
checkLexerMatches(lg, "xy", "A, EOF");
|
||||
LexerRecognitionExeption e = checkLexerMatches(lg, "xyz", "A, EOF");
|
||||
assertEquals("NoViableAltException('z')", e.toString());
|
||||
}
|
||||
|
||||
@Test public void testWildcardQuirk() throws Exception {
|
||||
LexerGrammar lg = new LexerGrammar(
|
||||
"lexer grammar L;\n"+
|
||||
"A : 'xy'\n" +
|
||||
" | 'xy' . 'z'\n" + // will not pursue '.' since xy already hit stop (prior alt)
|
||||
" ;\n");
|
||||
// checkLexerMatches(lg, "xy", "A, EOF");
|
||||
LexerRecognitionExeption e = checkLexerMatches(lg, "xyqz", "A, EOF");
|
||||
assertEquals("NoViableAltException('q')", e.toString());
|
||||
}
|
||||
|
||||
@Test public void testLexerLoops() throws Exception {
|
||||
LexerGrammar lg = new LexerGrammar(
|
||||
"lexer grammar L;\n"+
|
||||
|
@ -98,15 +153,16 @@ public class TestATNLexerInterpreter extends BaseTest {
|
|||
checkLexerMatches(lg, "\"a\"", "STR, EOF");
|
||||
}
|
||||
|
||||
@Ignore public void testLexerWildcardNonGreedyPlusLoopByDefault() throws Exception {
|
||||
@Test public void testLexerWildcardNonGreedyPlusLoopByDefault() throws Exception {
|
||||
LexerGrammar lg = new LexerGrammar(
|
||||
"lexer grammar L;\n"+
|
||||
"CMT : '//' .+ '\\n' ;\n");
|
||||
"CMT : '//' (options {greedy=false;}:.)+ '\\n' ;\n");
|
||||
String expecting = "CMT, CMT, EOF";
|
||||
checkLexerMatches(lg, "//x\n//y\n", expecting);
|
||||
}
|
||||
|
||||
@Ignore public void testLexerGreedyOptionalShouldWorkAsWeExpect() throws Exception {
|
||||
// does not fail since ('*/')? cant match and have rule succeed
|
||||
@Test public void testLexerGreedyOptionalShouldWorkAsWeExpect() throws Exception {
|
||||
LexerGrammar lg = new LexerGrammar(
|
||||
"lexer grammar L;\n"+
|
||||
"CMT : '/*' ('*/')? '*/' ;\n");
|
||||
|
@ -114,7 +170,7 @@ public class TestATNLexerInterpreter extends BaseTest {
|
|||
checkLexerMatches(lg, "/**/", expecting);
|
||||
}
|
||||
|
||||
@Ignore public void testNonGreedyBetweenRules() throws Exception {
|
||||
@Test public void testNonGreedyBetweenRules() throws Exception {
|
||||
LexerGrammar lg = new LexerGrammar(
|
||||
"lexer grammar L;\n"+
|
||||
"A : '<a>' ;\n" +
|
||||
|
@ -123,14 +179,21 @@ public class TestATNLexerInterpreter extends BaseTest {
|
|||
checkLexerMatches(lg, "<a><x>", expecting);
|
||||
}
|
||||
|
||||
protected void checkLexerMatches(LexerGrammar lg, String inputString, String expecting) {
|
||||
protected LexerRecognitionExeption checkLexerMatches(LexerGrammar lg, String inputString, String expecting) {
|
||||
ATN atn = createATN(lg);
|
||||
CharStream input = new ANTLRStringStream(inputString);
|
||||
ATNState startState = atn.modeNameToStartState.get("DEFAULT_MODE");
|
||||
DOTGenerator dot = new DOTGenerator(lg);
|
||||
System.out.println(dot.getDOT(startState, true));
|
||||
|
||||
List<String> tokenTypes = getTokenTypes(lg, atn, input, false);
|
||||
List<String> tokenTypes = null;
|
||||
LexerRecognitionExeption retException = null;
|
||||
try {
|
||||
tokenTypes = getTokenTypes(lg, atn, input, false);
|
||||
}
|
||||
catch (LexerRecognitionExeption lre) { retException = lre; }
|
||||
if ( retException!=null ) return retException;
|
||||
|
||||
String result = Utils.join(tokenTypes.iterator(), ", ");
|
||||
System.out.println(tokenTypes);
|
||||
assertEquals(expecting, result);
|
||||
|
@ -139,6 +202,7 @@ public class TestATNLexerInterpreter extends BaseTest {
|
|||
input.seek(0);
|
||||
List<String> tokenTypes2 = getTokenTypes(lg, atn, input, true);
|
||||
assertEquals("interp vs adaptive types differ", tokenTypes, tokenTypes2);
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -218,7 +218,7 @@ public class TestATNSerialization extends BaseTest {
|
|||
"1->8 ATOM -1,0,0\n" +
|
||||
"2->6 EPSILON 0,0,0\n" +
|
||||
"3->5 EPSILON 0,0,0\n" +
|
||||
"4->5 RULE 2,1,-1\n" +
|
||||
"4->5 RULE 2,1,0\n" +
|
||||
"5->1 EPSILON 0,0,0\n" +
|
||||
"6->7 ATOM 3,0,0\n" +
|
||||
"7->3 EPSILON 0,0,0\n";
|
||||
|
|
Loading…
Reference in New Issue