diff --git a/tool/src/org/antlr/v4/automata/ParserATNFactory.java b/tool/src/org/antlr/v4/automata/ParserATNFactory.java index 590038256..1b42ee375 100644 --- a/tool/src/org/antlr/v4/automata/ParserATNFactory.java +++ b/tool/src/org/antlr/v4/automata/ParserATNFactory.java @@ -73,6 +73,7 @@ import org.antlr.v4.tool.ast.AltAST; import org.antlr.v4.tool.ast.BlockAST; import org.antlr.v4.tool.ast.GrammarAST; import org.antlr.v4.tool.ast.PredAST; +import org.antlr.v4.tool.ast.QuantifierAST; import org.antlr.v4.tool.ast.TerminalAST; import java.lang.reflect.Constructor; @@ -425,8 +426,7 @@ public class ParserATNFactory implements ATNFactory { public Handle optional(@NotNull GrammarAST optAST, @NotNull Handle blk) { BlockStartState blkStart = (BlockStartState)blk.left; - BlockAST blkAST = (BlockAST)optAST.getChild(0); - if (isGreedy(blkAST)) { + if (((QuantifierAST)optAST).isGreedy()) { epsilon(blkStart, blk.right); } else { Transition existing = blkStart.removeTransition(0); @@ -463,7 +463,11 @@ public class ParserATNFactory implements ATNFactory { epsilon(blkEnd, loop); // blk can see loop back BlockAST blkAST = (BlockAST)plusAST.getChild(0); - if ( isGreedy(blkAST) ) { + if ( ((QuantifierAST)plusAST).isGreedy() ) { + if (expectNonGreedy(blkAST)) { + g.tool.errMgr.grammarError(ErrorType.EXPECTED_NON_GREEDY_WILDCARD_BLOCK, g.fileName, plusAST.getToken(), plusAST.getToken().getText()); + } + epsilon(loop, blkStart); // loop back to start epsilon(loop, end); // or exit } @@ -502,7 +506,11 @@ public class ParserATNFactory implements ATNFactory { end.loopBackState = loop; BlockAST blkAST = (BlockAST)starAST.getChild(0); - if ( isGreedy(blkAST) ) { + if ( ((QuantifierAST)starAST).isGreedy() ) { + if (expectNonGreedy(blkAST)) { + g.tool.errMgr.grammarError(ErrorType.EXPECTED_NON_GREEDY_WILDCARD_BLOCK, g.fileName, starAST.getToken(), starAST.getToken().getText()); + } + epsilon(entry, blkStart); // loop enter edge (alt 1) epsilon(entry, end); // bypass loop edge (alt 2) } @@ -628,17 +636,12 @@ public class ParserATNFactory implements ATNFactory { @Override public ATNState newState() { return newState(null); } - public boolean isGreedy(@NotNull BlockAST blkAST) { - boolean greedy = true; - String greedyOption = blkAST.getOptionString("greedy"); - if (greedyOption != null) { - return Boolean.parseBoolean(greedyOption); + public boolean expectNonGreedy(@NotNull BlockAST blkAST) { + if ( blockHasWildcardAlt(blkAST) ) { + return true; } - if ( blockHasWildcardAlt(blkAST) ) { - greedy = false; - } - return greedy; + return false; } // (BLOCK (ALT .)) or (BLOCK (ALT 'a') (ALT .)) diff --git a/tool/src/org/antlr/v4/parse/ANTLRParser.g b/tool/src/org/antlr/v4/parse/ANTLRParser.g index 23fa59af5..1b250e970 100644 --- a/tool/src/org/antlr/v4/parse/ANTLRParser.g +++ b/tool/src/org/antlr/v4/parse/ANTLRParser.g @@ -727,9 +727,9 @@ blockSuffix ; ebnfSuffix - : QUESTION -> OPTIONAL[$start] - | STAR -> CLOSURE[$start] - | PLUS -> POSITIVE_CLOSURE[$start] + : QUESTION nongreedy=QUESTION? -> OPTIONAL[$start, $nongreedy] + | STAR nongreedy=QUESTION? -> CLOSURE[$start, $nongreedy] + | PLUS nongreedy=QUESTION? -> POSITIVE_CLOSURE[$start, $nongreedy] ; lexerAtom diff --git a/tool/src/org/antlr/v4/tool/ErrorType.java b/tool/src/org/antlr/v4/tool/ErrorType.java index ab23599ae..51a969544 100644 --- a/tool/src/org/antlr/v4/tool/ErrorType.java +++ b/tool/src/org/antlr/v4/tool/ErrorType.java @@ -151,6 +151,7 @@ public enum ErrorType { ATTRIBUTE_IN_LEXER_ACTION(128, "attribute references not allowed in lexer actions: $", ErrorSeverity.ERROR), WILDCARD_IN_PARSER(129, "wildcard '.' not allowed in parsers", ErrorSeverity.ERROR), LABEL_BLOCK_NOT_A_SET(130, "label assigned to a block which is not a set", ErrorSeverity.ERROR), + EXPECTED_NON_GREEDY_WILDCARD_BLOCK(131, "greedy block () contains wildcard; the non-greedy syntax ()? may be preferred", ErrorSeverity.WARNING), /** Documentation comment is unterminated */ //UNTERMINATED_DOC_COMMENT(, "", ErrorSeverity.ERROR), diff --git a/tool/src/org/antlr/v4/tool/Grammar.java b/tool/src/org/antlr/v4/tool/Grammar.java index ebd4964c4..6129c6a72 100644 --- a/tool/src/org/antlr/v4/tool/Grammar.java +++ b/tool/src/org/antlr/v4/tool/Grammar.java @@ -83,11 +83,9 @@ public class Grammar implements AttributeResolver { }}; public static final Set ParserBlockOptions = new HashSet() {{ - add("greedy"); }}; public static final Set LexerBlockOptions = new HashSet() {{ - add("greedy"); }}; /** Legal options for terminal refs like ID */ diff --git a/tool/src/org/antlr/v4/tool/ast/BlockAST.java b/tool/src/org/antlr/v4/tool/ast/BlockAST.java index 3aaf313eb..2be205dfc 100644 --- a/tool/src/org/antlr/v4/tool/ast/BlockAST.java +++ b/tool/src/org/antlr/v4/tool/ast/BlockAST.java @@ -39,10 +39,10 @@ public class BlockAST extends GrammarASTWithOptions implements RuleElementAST { // TODO: maybe I need a Subrule object like Rule so these options mov to that? /** What are the default options for a subrule? */ public static final Map defaultBlockOptions = - new HashMap() {{put("greedy","true");}}; + new HashMap(); public static final Map defaultLexerBlockOptions = - new HashMap() {{put("greedy","true");}}; + new HashMap(); public BlockAST(GrammarAST node) { super(node); diff --git a/tool/src/org/antlr/v4/tool/ast/OptionalBlockAST.java b/tool/src/org/antlr/v4/tool/ast/OptionalBlockAST.java index b93b7e851..2e30455dd 100644 --- a/tool/src/org/antlr/v4/tool/ast/OptionalBlockAST.java +++ b/tool/src/org/antlr/v4/tool/ast/OptionalBlockAST.java @@ -32,9 +32,23 @@ package org.antlr.v4.tool.ast; import org.antlr.runtime.Token; import org.antlr.runtime.tree.Tree; -public class OptionalBlockAST extends GrammarAST implements RuleElementAST { - public OptionalBlockAST(GrammarAST node) { super(node); } - public OptionalBlockAST(int type, Token t) { super(type, t); } +public class OptionalBlockAST extends GrammarAST implements RuleElementAST, QuantifierAST { + private final boolean _greedy; + + public OptionalBlockAST(OptionalBlockAST node) { + super(node); + _greedy = node._greedy; + } + + public OptionalBlockAST(int type, Token t, Token nongreedy) { + super(type, t); + _greedy = nongreedy == null; + } + + @Override + public boolean isGreedy() { + return _greedy; + } @Override public Tree dupNode() { return new OptionalBlockAST(this); } diff --git a/tool/src/org/antlr/v4/tool/ast/PlusBlockAST.java b/tool/src/org/antlr/v4/tool/ast/PlusBlockAST.java index cb67c488f..61ee3f9e8 100644 --- a/tool/src/org/antlr/v4/tool/ast/PlusBlockAST.java +++ b/tool/src/org/antlr/v4/tool/ast/PlusBlockAST.java @@ -32,9 +32,23 @@ package org.antlr.v4.tool.ast; import org.antlr.runtime.Token; import org.antlr.runtime.tree.Tree; -public class PlusBlockAST extends GrammarAST implements RuleElementAST { - public PlusBlockAST(GrammarAST node) { super(node); } - public PlusBlockAST(int type, Token t) { super(type, t); } +public class PlusBlockAST extends GrammarAST implements RuleElementAST, QuantifierAST { + private final boolean _greedy; + + public PlusBlockAST(PlusBlockAST node) { + super(node); + _greedy = node._greedy; + } + + public PlusBlockAST(int type, Token t, Token nongreedy) { + super(type, t); + _greedy = nongreedy == null; + } + + @Override + public boolean isGreedy() { + return _greedy; + } @Override public Tree dupNode() { return new PlusBlockAST(this); } diff --git a/tool/src/org/antlr/v4/tool/ast/QuantifierAST.java b/tool/src/org/antlr/v4/tool/ast/QuantifierAST.java new file mode 100644 index 000000000..c75b1e813 --- /dev/null +++ b/tool/src/org/antlr/v4/tool/ast/QuantifierAST.java @@ -0,0 +1,41 @@ +/* + [The "BSD license"] + Copyright (c) 2012 Terence Parr + Copyright (c) 2012 Sam Harwell + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.antlr.v4.tool.ast; + +/** + * + * @author Sam Harwell + */ +public interface QuantifierAST { + + boolean isGreedy(); + +} diff --git a/tool/src/org/antlr/v4/tool/ast/StarBlockAST.java b/tool/src/org/antlr/v4/tool/ast/StarBlockAST.java index d0177991d..5868972c9 100644 --- a/tool/src/org/antlr/v4/tool/ast/StarBlockAST.java +++ b/tool/src/org/antlr/v4/tool/ast/StarBlockAST.java @@ -32,9 +32,23 @@ package org.antlr.v4.tool.ast; import org.antlr.runtime.Token; import org.antlr.runtime.tree.Tree; -public class StarBlockAST extends GrammarAST implements RuleElementAST { - public StarBlockAST(GrammarAST node) { super(node); } - public StarBlockAST(int type, Token t) { super(type, t); } +public class StarBlockAST extends GrammarAST implements RuleElementAST, QuantifierAST { + private final boolean _greedy; + + public StarBlockAST(StarBlockAST node) { + super(node); + _greedy = node._greedy; + } + + public StarBlockAST(int type, Token t, Token nongreedy) { + super(type, t); + _greedy = nongreedy == null; + } + + @Override + public boolean isGreedy() { + return _greedy; + } @Override public Tree dupNode() { return new StarBlockAST(this); } diff --git a/tool/test/org/antlr/v4/test/TestATNLexerInterpreter.java b/tool/test/org/antlr/v4/test/TestATNLexerInterpreter.java index f58c00176..5384cc36e 100644 --- a/tool/test/org/antlr/v4/test/TestATNLexerInterpreter.java +++ b/tool/test/org/antlr/v4/test/TestATNLexerInterpreter.java @@ -174,7 +174,7 @@ public class TestATNLexerInterpreter extends BaseTest { @Test public void testRecursiveLexerRuleRefWithWildcard() throws Exception { LexerGrammar lg = new LexerGrammar( "lexer grammar L;\n"+ - "CMT : '/*' (CMT | .)* '*/' ;\n" + + "CMT : '/*' (CMT | .)*? '*/' ;\n" + "WS : (' '|'\n')+ ;"); String expecting = "CMT, WS, CMT, WS, EOF"; @@ -185,10 +185,18 @@ public class TestATNLexerInterpreter extends BaseTest { expecting); } - @Test public void testLexerWildcardNonGreedyLoopByDefault() throws Exception { + @Test public void testLexerWildcardGreedyLoopByDefault() throws Exception { LexerGrammar lg = new LexerGrammar( "lexer grammar L;\n"+ "CMT : '//' .* '\\n' ;\n"); + String expecting = "CMT, EOF"; + checkLexerMatches(lg, "//x\n//y\n", expecting); + } + + @Test public void testLexerWildcardLoopExplicitNonGreedy() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "CMT : '//' .*? '\\n' ;\n"); String expecting = "CMT, CMT, EOF"; checkLexerMatches(lg, "//x\n//y\n", expecting); } @@ -201,10 +209,18 @@ public class TestATNLexerInterpreter extends BaseTest { checkLexerMatches(lg, "[a]", "STR, EOF"); } - @Test public void testLexerWildcardNonGreedyPlusLoopByDefault() throws Exception { + @Test public void testLexerWildcardGreedyPlusLoopByDefault() throws Exception { LexerGrammar lg = new LexerGrammar( "lexer grammar L;\n"+ "CMT : '//' .+ '\\n' ;\n"); + String expecting = "CMT, EOF"; + checkLexerMatches(lg, "//x\n//y\n", expecting); + } + + @Test public void testLexerWildcardExplicitNonGreedyPlusLoop() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "CMT : '//' .+? '\\n' ;\n"); String expecting = "CMT, CMT, EOF"; checkLexerMatches(lg, "//x\n//y\n", expecting); } @@ -218,11 +234,20 @@ public class TestATNLexerInterpreter extends BaseTest { checkLexerMatches(lg, "/**/", expecting); } - @Test public void testNonGreedyBetweenRules() throws Exception { + @Test public void testGreedyBetweenRules() throws Exception { LexerGrammar lg = new LexerGrammar( "lexer grammar L;\n"+ "A : '' ;\n" + "B : '<' .+ '>' ;\n"); + String expecting = "B, EOF"; + checkLexerMatches(lg, "", expecting); + } + + @Test public void testNonGreedyBetweenRules() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "A : '' ;\n" + + "B : '<' .+? '>' ;\n"); String expecting = "A, B, EOF"; checkLexerMatches(lg, "", expecting); } diff --git a/tool/test/org/antlr/v4/test/TestLexerExec.java b/tool/test/org/antlr/v4/test/TestLexerExec.java index a66b45354..98ce588f7 100644 --- a/tool/test/org/antlr/v4/test/TestLexerExec.java +++ b/tool/test/org/antlr/v4/test/TestLexerExec.java @@ -60,10 +60,10 @@ public class TestLexerExec extends BaseTest { } @Test - public void testImplicitNonGreedyTermination() throws Exception { + public void testNonGreedyTermination() throws Exception { String grammar = "lexer grammar L;\n" - + "STRING : '\"' ('\"\"' | .)* '\"';"; + + "STRING : '\"' ('\"\"' | .)*? '\"';"; String found = execLexer("L.g4", grammar, "L", "\"hi\"\"mom\""); assertEquals( @@ -74,10 +74,10 @@ public class TestLexerExec extends BaseTest { } @Test - public void testImplicitGreedyOptional() throws Exception { + public void testGreedyOptional() throws Exception { String grammar = "lexer grammar L;\n" - + "CMT : '//' .* '\\n' CMT?;\n" + + "CMT : '//' .*? '\\n' CMT?;\n" + "WS : (' '|'\\t')+;"; String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n"); @@ -88,24 +88,10 @@ public class TestLexerExec extends BaseTest { } @Test - public void testExplicitGreedyOptional() throws Exception { + public void testNonGreedyOptional() throws Exception { String grammar = "lexer grammar L;\n" - + "CMT : '//' .* '\\n' (options{greedy=true;} : CMT)?;\n" - + "WS : (' '|'\\t')+;"; - - String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n"); - assertEquals( - "[@0,0:13='//blah\\n//blah\\n',<1>,1:0]\n" + - "[@1,14:13='',<-1>,3:14]\n", found); - assertNull(stderrDuringParse); - } - - @Test - public void testExplicitNonGreedyOptional() throws Exception { - String grammar = - "lexer grammar L;\n" - + "CMT : '//' .* '\\n' (options{greedy=false;} : CMT)?;\n" + + "CMT : '//' .*? '\\n' CMT??;\n" + "WS : (' '|'\\t')+;"; String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n"); @@ -117,10 +103,10 @@ public class TestLexerExec extends BaseTest { } @Test - public void testImplicitGreedyClosure() throws Exception { + public void testGreedyClosure() throws Exception { String grammar = "lexer grammar L;\n" - + "CMT : '//' .* '\\n' CMT*;\n" + + "CMT : '//' .*? '\\n' CMT*;\n" + "WS : (' '|'\\t')+;"; String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n"); @@ -131,24 +117,10 @@ public class TestLexerExec extends BaseTest { } @Test - public void testExplicitGreedyClosure() throws Exception { + public void testNonGreedyClosure() throws Exception { String grammar = "lexer grammar L;\n" - + "CMT : '//' .* '\\n' (options{greedy=true;} : CMT)*;\n" - + "WS : (' '|'\\t')+;"; - - String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n"); - assertEquals( - "[@0,0:13='//blah\\n//blah\\n',<1>,1:0]\n" + - "[@1,14:13='',<-1>,3:14]\n", found); - assertNull(stderrDuringParse); - } - - @Test - public void testExplicitNonGreedyClosure() throws Exception { - String grammar = - "lexer grammar L;\n" - + "CMT : '//' .* '\\n' (options{greedy=false;} : CMT)*;\n" + + "CMT : '//' .*? '\\n' CMT*?;\n" + "WS : (' '|'\\t')+;"; String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n"); @@ -160,10 +132,10 @@ public class TestLexerExec extends BaseTest { } @Test - public void testImplicitGreedyPositiveClosure() throws Exception { + public void testGreedyPositiveClosure() throws Exception { String grammar = "lexer grammar L;\n" - + "CMT : ('//' .* '\\n')+;\n" + + "CMT : ('//' .*? '\\n')+;\n" + "WS : (' '|'\\t')+;"; String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n"); @@ -174,24 +146,10 @@ public class TestLexerExec extends BaseTest { } @Test - public void testExplicitGreedyPositiveClosure() throws Exception { + public void testNonGreedyPositiveClosure() throws Exception { String grammar = "lexer grammar L;\n" - + "CMT : (options{greedy=true;} : '//' .* '\\n')+;\n" - + "WS : (' '|'\\t')+;"; - - String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n"); - assertEquals( - "[@0,0:13='//blah\\n//blah\\n',<1>,1:0]\n" + - "[@1,14:13='',<-1>,3:14]\n", found); - assertNull(stderrDuringParse); - } - - @Test - public void testExplicitNonGreedyPositiveClosure() throws Exception { - String grammar = - "lexer grammar L;\n" - + "CMT : (options{greedy=false;} : '//' .* '\\n')+;\n" + + "CMT : ('//' .*? '\\n')+?;\n" + "WS : (' '|'\\t')+;"; String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n"); @@ -205,7 +163,7 @@ public class TestLexerExec extends BaseTest { @Test public void testRecursiveLexerRuleRefWithWildcardStar() throws Exception { String grammar = "lexer grammar L;\n"+ - "CMT : '/*' (CMT | .)* '*/' ;\n" + + "CMT : '/*' (CMT | .)*? '*/' ;\n" + "WS : (' '|'\n')+ ;\n" /*+ "ANY : .;"*/; @@ -243,7 +201,7 @@ public class TestLexerExec extends BaseTest { @Test public void testRecursiveLexerRuleRefWithWildcardPlus() throws Exception { String grammar = "lexer grammar L;\n"+ - "CMT : '/*' (CMT | .)+ '*/' ;\n" + + "CMT : '/*' (CMT | .)+? '*/' ;\n" + "WS : (' '|'\n')+ ;\n" /*+ "ANY : .;"*/;