diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java index 58ac829d4..76cd262d7 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java @@ -156,8 +156,8 @@ public class LexerATNSimulator extends ATNSimulator { s.edges[t] == null ) { try { - int ttype = failOverToATN(input, s); - return ttype; + ATN_failover++; + return failOverToATN(input, s); } catch (LexerNoViableAltException nvae) { atnException = nvae; @@ -196,42 +196,6 @@ public class LexerATNSimulator extends ATNSimulator { return dfaPrevAccept.state.prediction; } - int failOverToATN(CharStream input, DFAState s) { - LexerNoViableAltException atnException = null; - if ( dfa_debug ) System.out.println("no edge for "+(char)input.LA(1)); - if ( dfa_debug ) { - System.out.println("ATN exec upon "+ - input.substring(startIndex,input.index())+ - " at DFA state "+s.stateNumber+" = "+s.configs); - } -// try { - ATN_failover++; - int ttype = exec(input, s.configs); - if ( dfa_debug ) { - System.out.println("back from DFA update, ttype="+ttype+ - ", dfa[mode "+mode+"]=\n"+ - dfa[mode].toLexerString()); - } - // action already executed by ATN - // we've updated DFA, exec'd action, and have our deepest answer - return ttype; -// } -// catch (LexerNoViableAltException nvae) { -// // The ATN could not match anything starting from s.configs -// // so we had an error edge. Re-throw the exception -// // if there was no previous accept state here in DFA. -// throw nvae; -// // dead end; no where to go, fall back on prev -// } - } - - protected void markAcceptState(ExecState state, CharStream input) { - state.marker = input.mark(); - state.index = input.index(); - state.line = line; - state.charPos = charPositionInLine; - } - protected int exec(CharStream input, OrderedHashSet s0) { //System.out.println("enter exec index "+input.index()+" from "+s0); OrderedHashSet closure = new OrderedHashSet(); @@ -275,7 +239,7 @@ public class LexerATNSimulator extends ATNSimulator { processAcceptStates(input, reach); consume(input); - if ( t!=CharStream.EOF ) addDFAEdge(closure, t, reach); + addDFAEdge(closure, t, reach); t = input.LA(1); // swap to avoid reallocating space @@ -295,11 +259,6 @@ public class LexerATNSimulator extends ATNSimulator { throw new LexerNoViableAltException(recog, input, startIndex, reach); } - if ( debug ) { - System.out.println("ACCEPT " +atnPrevAccept.config.toString(recog, true) + - " index " +atnPrevAccept.index); - } - int ruleIndex = atnPrevAccept.config.state.ruleIndex; accept(input, ruleIndex, atnPrevAccept); return atn.ruleToTokenType[ruleIndex]; @@ -480,6 +439,31 @@ public class LexerATNSimulator extends ATNSimulator { return c; } + int failOverToATN(CharStream input, DFAState s) { + if ( dfa_debug ) System.out.println("no edge for "+(char)input.LA(1)); + if ( dfa_debug ) { + System.out.println("ATN exec upon "+ + input.substring(startIndex,input.index())+ + " at DFA state "+s.stateNumber+" = "+s.configs); + } + int ttype = exec(input, s.configs); + if ( dfa_debug ) { + System.out.println("back from DFA update, ttype="+ttype+ + ", dfa[mode "+mode+"]=\n"+ + dfa[mode].toLexerString()); + } + // action already executed by ATN + // we've updated DFA, exec'd action, and have our deepest answer + return ttype; + } + + protected void markAcceptState(ExecState state, CharStream input) { + state.marker = input.mark(); + state.index = input.index(); + state.line = line; + state.charPos = charPositionInLine; + } + protected void resetPrevAccept(ExecState prevAccept) { prevAccept.marker = -1; prevAccept.index = -1; diff --git a/tool/test/org/antlr/v4/test/TestLexerErrors.java b/tool/test/org/antlr/v4/test/TestLexerErrors.java new file mode 100644 index 000000000..35feed0fb --- /dev/null +++ b/tool/test/org/antlr/v4/test/TestLexerErrors.java @@ -0,0 +1,132 @@ +/* + [The "BSD license"] + Copyright (c) 2011 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.antlr.v4.test; + +import org.junit.Test; + +public class TestLexerErrors extends BaseTest { + // TEST DETECTION + @Test public void testInvalidCharAtStart() throws Exception { + String grammar = + "lexer grammar L;\n" + + "A : 'a' 'b' ;\n"; + String tokens = execLexer("L.g", grammar, "L", "x"); + String expectingTokens = + "[@0,1:0='',<-1>,1:1]\n"; + assertEquals(expectingTokens, tokens); + String expectingError = "line 1:0 token recognition error at: 'x'\n"; + String error = stderrDuringParse; + assertEquals(expectingError, error); + } + + @Test public void testInvalidCharAtStartAfterDFACache() throws Exception { + String grammar = + "lexer grammar L;\n" + + "A : 'a' 'b' ;\n"; + String tokens = execLexer("L.g", grammar, "L", "abx"); + String expectingTokens = + "[@0,0:1='ab',<3>,1:0]\n" + + "[@1,3:2='',<-1>,1:3]\n"; + assertEquals(expectingTokens, tokens); + String expectingError = "line 1:2 token recognition error at: 'x'\n"; + String error = stderrDuringParse; + assertEquals(expectingError, error); + } + + @Test public void testInvalidCharInToken() throws Exception { + String grammar = + "lexer grammar L;\n" + + "A : 'a' 'b' ;\n"; + String tokens = execLexer("L.g", grammar, "L", "ax"); + String expectingTokens = + "[@0,2:1='',<-1>,1:1]\n"; + assertEquals(expectingTokens, tokens); + String expectingError = "line 1:0 token recognition error at: 'ax'\n"; + String error = stderrDuringParse; + assertEquals(expectingError, error); + } + + @Test public void testInvalidCharInTokenAfterDFACache() throws Exception { + String grammar = + "lexer grammar L;\n" + + "A : 'a' 'b' ;\n"; + String tokens = execLexer("L.g", grammar, "L", "abax"); + String expectingTokens = + "[@0,0:1='ab',<3>,1:0]\n" + + "[@1,4:3='',<-1>,1:4]\n"; + assertEquals(expectingTokens, tokens); + String expectingError = "line 1:2 token recognition error at: 'ax'\n"; + String error = stderrDuringParse; + assertEquals(expectingError, error); + } + + @Test public void testDFAToATNThatFailsBackToDFA() throws Exception { + String grammar = + "lexer grammar L;\n" + + "A : 'ab' ;\n"+ + "B : 'abc' ;\n"; + // The first ab caches the DFA then abx goes through the DFA but + // into the ATN for the x, which fails. Must go back into DFA + // and return to previous dfa accept state + String tokens = execLexer("L.g", grammar, "L", "ababx"); + String expectingTokens = + "[@0,0:1='ab',<3>,1:0]\n" + + "[@1,2:3='ab',<3>,1:2]\n" + + "[@2,5:4='',<-1>,1:5]\n"; + assertEquals(expectingTokens, tokens); + String expectingError = "line 1:4 token recognition error at: 'x'\n"; + String error = stderrDuringParse; + assertEquals(expectingError, error); + } + + @Test public void testDFAToATNThatMatchesThenFailsInATN() throws Exception { + String grammar = + "lexer grammar L;\n" + + "A : 'ab' ;\n"+ + "B : 'abc' ;\n"+ + "C : 'abcd' ;\n"; + // The first ab caches the DFA then abx goes through the DFA but + // into the ATN for the c. It marks that hasn't except state + // and then keeps going in the ATN. It fails on the x, but + // uses the previous accepted in the ATN not DFA + String tokens = execLexer("L.g", grammar, "L", "ababcx"); + String expectingTokens = + "[@0,0:1='ab',<3>,1:0]\n" + + "[@1,2:4='abc',<4>,1:2]\n" + + "[@2,6:5='',<-1>,1:6]\n"; + assertEquals(expectingTokens, tokens); + String expectingError = "line 1:5 token recognition error at: 'x'\n"; + String error = stderrDuringParse; + assertEquals(expectingError, error); + } + + // TEST RECOVERY + +}