Add non-greedy quantifier syntax: greedy forms are *, +, ?, and non-greedy forms are *?, +?, ??

"greedy" is no longer a block option, and all loops are greedy unless the non-greedy quantifier is explicitly used
Add warning 131 when .* is used, because they probably intended to use the non-greedy form .*?
This commit is contained in:
Sam Harwell 2012-10-26 15:40:05 -05:00
parent 8494007025
commit e89807e350
11 changed files with 159 additions and 91 deletions

View File

@ -73,6 +73,7 @@ import org.antlr.v4.tool.ast.AltAST;
import org.antlr.v4.tool.ast.BlockAST; import org.antlr.v4.tool.ast.BlockAST;
import org.antlr.v4.tool.ast.GrammarAST; import org.antlr.v4.tool.ast.GrammarAST;
import org.antlr.v4.tool.ast.PredAST; import org.antlr.v4.tool.ast.PredAST;
import org.antlr.v4.tool.ast.QuantifierAST;
import org.antlr.v4.tool.ast.TerminalAST; import org.antlr.v4.tool.ast.TerminalAST;
import java.lang.reflect.Constructor; import java.lang.reflect.Constructor;
@ -425,8 +426,7 @@ public class ParserATNFactory implements ATNFactory {
public Handle optional(@NotNull GrammarAST optAST, @NotNull Handle blk) { public Handle optional(@NotNull GrammarAST optAST, @NotNull Handle blk) {
BlockStartState blkStart = (BlockStartState)blk.left; BlockStartState blkStart = (BlockStartState)blk.left;
BlockAST blkAST = (BlockAST)optAST.getChild(0); if (((QuantifierAST)optAST).isGreedy()) {
if (isGreedy(blkAST)) {
epsilon(blkStart, blk.right); epsilon(blkStart, blk.right);
} else { } else {
Transition existing = blkStart.removeTransition(0); Transition existing = blkStart.removeTransition(0);
@ -463,7 +463,11 @@ public class ParserATNFactory implements ATNFactory {
epsilon(blkEnd, loop); // blk can see loop back epsilon(blkEnd, loop); // blk can see loop back
BlockAST blkAST = (BlockAST)plusAST.getChild(0); BlockAST blkAST = (BlockAST)plusAST.getChild(0);
if ( isGreedy(blkAST) ) { if ( ((QuantifierAST)plusAST).isGreedy() ) {
if (expectNonGreedy(blkAST)) {
g.tool.errMgr.grammarError(ErrorType.EXPECTED_NON_GREEDY_WILDCARD_BLOCK, g.fileName, plusAST.getToken(), plusAST.getToken().getText());
}
epsilon(loop, blkStart); // loop back to start epsilon(loop, blkStart); // loop back to start
epsilon(loop, end); // or exit epsilon(loop, end); // or exit
} }
@ -502,7 +506,11 @@ public class ParserATNFactory implements ATNFactory {
end.loopBackState = loop; end.loopBackState = loop;
BlockAST blkAST = (BlockAST)starAST.getChild(0); BlockAST blkAST = (BlockAST)starAST.getChild(0);
if ( isGreedy(blkAST) ) { if ( ((QuantifierAST)starAST).isGreedy() ) {
if (expectNonGreedy(blkAST)) {
g.tool.errMgr.grammarError(ErrorType.EXPECTED_NON_GREEDY_WILDCARD_BLOCK, g.fileName, starAST.getToken(), starAST.getToken().getText());
}
epsilon(entry, blkStart); // loop enter edge (alt 1) epsilon(entry, blkStart); // loop enter edge (alt 1)
epsilon(entry, end); // bypass loop edge (alt 2) epsilon(entry, end); // bypass loop edge (alt 2)
} }
@ -628,17 +636,12 @@ public class ParserATNFactory implements ATNFactory {
@Override @Override
public ATNState newState() { return newState(null); } public ATNState newState() { return newState(null); }
public boolean isGreedy(@NotNull BlockAST blkAST) { public boolean expectNonGreedy(@NotNull BlockAST blkAST) {
boolean greedy = true; if ( blockHasWildcardAlt(blkAST) ) {
String greedyOption = blkAST.getOptionString("greedy"); return true;
if (greedyOption != null) {
return Boolean.parseBoolean(greedyOption);
} }
if ( blockHasWildcardAlt(blkAST) ) { return false;
greedy = false;
}
return greedy;
} }
// (BLOCK (ALT .)) or (BLOCK (ALT 'a') (ALT .)) // (BLOCK (ALT .)) or (BLOCK (ALT 'a') (ALT .))

View File

@ -727,9 +727,9 @@ blockSuffix
; ;
ebnfSuffix ebnfSuffix
: QUESTION -> OPTIONAL<OptionalBlockAST>[$start] : QUESTION nongreedy=QUESTION? -> OPTIONAL<OptionalBlockAST>[$start, $nongreedy]
| STAR -> CLOSURE<StarBlockAST>[$start] | STAR nongreedy=QUESTION? -> CLOSURE<StarBlockAST>[$start, $nongreedy]
| PLUS -> POSITIVE_CLOSURE<PlusBlockAST>[$start] | PLUS nongreedy=QUESTION? -> POSITIVE_CLOSURE<PlusBlockAST>[$start, $nongreedy]
; ;
lexerAtom lexerAtom

View File

@ -151,6 +151,7 @@ public enum ErrorType {
ATTRIBUTE_IN_LEXER_ACTION(128, "attribute references not allowed in lexer actions: $<arg>", ErrorSeverity.ERROR), ATTRIBUTE_IN_LEXER_ACTION(128, "attribute references not allowed in lexer actions: $<arg>", ErrorSeverity.ERROR),
WILDCARD_IN_PARSER(129, "wildcard '.' not allowed in parsers", ErrorSeverity.ERROR), WILDCARD_IN_PARSER(129, "wildcard '.' not allowed in parsers", ErrorSeverity.ERROR),
LABEL_BLOCK_NOT_A_SET(130, "label <arg> assigned to a block which is not a set", ErrorSeverity.ERROR), LABEL_BLOCK_NOT_A_SET(130, "label <arg> assigned to a block which is not a set", ErrorSeverity.ERROR),
EXPECTED_NON_GREEDY_WILDCARD_BLOCK(131, "greedy block ()<arg> contains wildcard; the non-greedy syntax ()<arg>? may be preferred", ErrorSeverity.WARNING),
/** Documentation comment is unterminated */ /** Documentation comment is unterminated */
//UNTERMINATED_DOC_COMMENT(, "", ErrorSeverity.ERROR), //UNTERMINATED_DOC_COMMENT(, "", ErrorSeverity.ERROR),

View File

@ -83,11 +83,9 @@ public class Grammar implements AttributeResolver {
}}; }};
public static final Set<String> ParserBlockOptions = new HashSet<String>() {{ public static final Set<String> ParserBlockOptions = new HashSet<String>() {{
add("greedy");
}}; }};
public static final Set<String> LexerBlockOptions = new HashSet<String>() {{ public static final Set<String> LexerBlockOptions = new HashSet<String>() {{
add("greedy");
}}; }};
/** Legal options for terminal refs like ID<assoc=right> */ /** Legal options for terminal refs like ID<assoc=right> */

View File

@ -39,10 +39,10 @@ public class BlockAST extends GrammarASTWithOptions implements RuleElementAST {
// TODO: maybe I need a Subrule object like Rule so these options mov to that? // TODO: maybe I need a Subrule object like Rule so these options mov to that?
/** What are the default options for a subrule? */ /** What are the default options for a subrule? */
public static final Map<String, String> defaultBlockOptions = public static final Map<String, String> defaultBlockOptions =
new HashMap<String, String>() {{put("greedy","true");}}; new HashMap<String, String>();
public static final Map<String, String> defaultLexerBlockOptions = public static final Map<String, String> defaultLexerBlockOptions =
new HashMap<String, String>() {{put("greedy","true");}}; new HashMap<String, String>();
public BlockAST(GrammarAST node) { public BlockAST(GrammarAST node) {
super(node); super(node);

View File

@ -32,9 +32,23 @@ package org.antlr.v4.tool.ast;
import org.antlr.runtime.Token; import org.antlr.runtime.Token;
import org.antlr.runtime.tree.Tree; import org.antlr.runtime.tree.Tree;
public class OptionalBlockAST extends GrammarAST implements RuleElementAST { public class OptionalBlockAST extends GrammarAST implements RuleElementAST, QuantifierAST {
public OptionalBlockAST(GrammarAST node) { super(node); } private final boolean _greedy;
public OptionalBlockAST(int type, Token t) { super(type, t); }
public OptionalBlockAST(OptionalBlockAST node) {
super(node);
_greedy = node._greedy;
}
public OptionalBlockAST(int type, Token t, Token nongreedy) {
super(type, t);
_greedy = nongreedy == null;
}
@Override
public boolean isGreedy() {
return _greedy;
}
@Override @Override
public Tree dupNode() { return new OptionalBlockAST(this); } public Tree dupNode() { return new OptionalBlockAST(this); }

View File

@ -32,9 +32,23 @@ package org.antlr.v4.tool.ast;
import org.antlr.runtime.Token; import org.antlr.runtime.Token;
import org.antlr.runtime.tree.Tree; import org.antlr.runtime.tree.Tree;
public class PlusBlockAST extends GrammarAST implements RuleElementAST { public class PlusBlockAST extends GrammarAST implements RuleElementAST, QuantifierAST {
public PlusBlockAST(GrammarAST node) { super(node); } private final boolean _greedy;
public PlusBlockAST(int type, Token t) { super(type, t); }
public PlusBlockAST(PlusBlockAST node) {
super(node);
_greedy = node._greedy;
}
public PlusBlockAST(int type, Token t, Token nongreedy) {
super(type, t);
_greedy = nongreedy == null;
}
@Override
public boolean isGreedy() {
return _greedy;
}
@Override @Override
public Tree dupNode() { return new PlusBlockAST(this); } public Tree dupNode() { return new PlusBlockAST(this); }

View File

@ -0,0 +1,41 @@
/*
[The "BSD license"]
Copyright (c) 2012 Terence Parr
Copyright (c) 2012 Sam Harwell
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.tool.ast;
/**
*
* @author Sam Harwell
*/
public interface QuantifierAST {
boolean isGreedy();
}

View File

@ -32,9 +32,23 @@ package org.antlr.v4.tool.ast;
import org.antlr.runtime.Token; import org.antlr.runtime.Token;
import org.antlr.runtime.tree.Tree; import org.antlr.runtime.tree.Tree;
public class StarBlockAST extends GrammarAST implements RuleElementAST { public class StarBlockAST extends GrammarAST implements RuleElementAST, QuantifierAST {
public StarBlockAST(GrammarAST node) { super(node); } private final boolean _greedy;
public StarBlockAST(int type, Token t) { super(type, t); }
public StarBlockAST(StarBlockAST node) {
super(node);
_greedy = node._greedy;
}
public StarBlockAST(int type, Token t, Token nongreedy) {
super(type, t);
_greedy = nongreedy == null;
}
@Override
public boolean isGreedy() {
return _greedy;
}
@Override @Override
public Tree dupNode() { return new StarBlockAST(this); } public Tree dupNode() { return new StarBlockAST(this); }

View File

@ -174,7 +174,7 @@ public class TestATNLexerInterpreter extends BaseTest {
@Test public void testRecursiveLexerRuleRefWithWildcard() throws Exception { @Test public void testRecursiveLexerRuleRefWithWildcard() throws Exception {
LexerGrammar lg = new LexerGrammar( LexerGrammar lg = new LexerGrammar(
"lexer grammar L;\n"+ "lexer grammar L;\n"+
"CMT : '/*' (CMT | .)* '*/' ;\n" + "CMT : '/*' (CMT | .)*? '*/' ;\n" +
"WS : (' '|'\n')+ ;"); "WS : (' '|'\n')+ ;");
String expecting = "CMT, WS, CMT, WS, EOF"; String expecting = "CMT, WS, CMT, WS, EOF";
@ -185,10 +185,18 @@ public class TestATNLexerInterpreter extends BaseTest {
expecting); expecting);
} }
@Test public void testLexerWildcardNonGreedyLoopByDefault() throws Exception { @Test public void testLexerWildcardGreedyLoopByDefault() throws Exception {
LexerGrammar lg = new LexerGrammar( LexerGrammar lg = new LexerGrammar(
"lexer grammar L;\n"+ "lexer grammar L;\n"+
"CMT : '//' .* '\\n' ;\n"); "CMT : '//' .* '\\n' ;\n");
String expecting = "CMT, EOF";
checkLexerMatches(lg, "//x\n//y\n", expecting);
}
@Test public void testLexerWildcardLoopExplicitNonGreedy() throws Exception {
LexerGrammar lg = new LexerGrammar(
"lexer grammar L;\n"+
"CMT : '//' .*? '\\n' ;\n");
String expecting = "CMT, CMT, EOF"; String expecting = "CMT, CMT, EOF";
checkLexerMatches(lg, "//x\n//y\n", expecting); checkLexerMatches(lg, "//x\n//y\n", expecting);
} }
@ -201,10 +209,18 @@ public class TestATNLexerInterpreter extends BaseTest {
checkLexerMatches(lg, "[a]", "STR, EOF"); checkLexerMatches(lg, "[a]", "STR, EOF");
} }
@Test public void testLexerWildcardNonGreedyPlusLoopByDefault() throws Exception { @Test public void testLexerWildcardGreedyPlusLoopByDefault() throws Exception {
LexerGrammar lg = new LexerGrammar( LexerGrammar lg = new LexerGrammar(
"lexer grammar L;\n"+ "lexer grammar L;\n"+
"CMT : '//' .+ '\\n' ;\n"); "CMT : '//' .+ '\\n' ;\n");
String expecting = "CMT, EOF";
checkLexerMatches(lg, "//x\n//y\n", expecting);
}
@Test public void testLexerWildcardExplicitNonGreedyPlusLoop() throws Exception {
LexerGrammar lg = new LexerGrammar(
"lexer grammar L;\n"+
"CMT : '//' .+? '\\n' ;\n");
String expecting = "CMT, CMT, EOF"; String expecting = "CMT, CMT, EOF";
checkLexerMatches(lg, "//x\n//y\n", expecting); checkLexerMatches(lg, "//x\n//y\n", expecting);
} }
@ -218,11 +234,20 @@ public class TestATNLexerInterpreter extends BaseTest {
checkLexerMatches(lg, "/**/", expecting); checkLexerMatches(lg, "/**/", expecting);
} }
@Test public void testNonGreedyBetweenRules() throws Exception { @Test public void testGreedyBetweenRules() throws Exception {
LexerGrammar lg = new LexerGrammar( LexerGrammar lg = new LexerGrammar(
"lexer grammar L;\n"+ "lexer grammar L;\n"+
"A : '<a>' ;\n" + "A : '<a>' ;\n" +
"B : '<' .+ '>' ;\n"); "B : '<' .+ '>' ;\n");
String expecting = "B, EOF";
checkLexerMatches(lg, "<a><x>", expecting);
}
@Test public void testNonGreedyBetweenRules() throws Exception {
LexerGrammar lg = new LexerGrammar(
"lexer grammar L;\n"+
"A : '<a>' ;\n" +
"B : '<' .+? '>' ;\n");
String expecting = "A, B, EOF"; String expecting = "A, B, EOF";
checkLexerMatches(lg, "<a><x>", expecting); checkLexerMatches(lg, "<a><x>", expecting);
} }

View File

@ -60,10 +60,10 @@ public class TestLexerExec extends BaseTest {
} }
@Test @Test
public void testImplicitNonGreedyTermination() throws Exception { public void testNonGreedyTermination() throws Exception {
String grammar = String grammar =
"lexer grammar L;\n" "lexer grammar L;\n"
+ "STRING : '\"' ('\"\"' | .)* '\"';"; + "STRING : '\"' ('\"\"' | .)*? '\"';";
String found = execLexer("L.g4", grammar, "L", "\"hi\"\"mom\""); String found = execLexer("L.g4", grammar, "L", "\"hi\"\"mom\"");
assertEquals( assertEquals(
@ -74,10 +74,10 @@ public class TestLexerExec extends BaseTest {
} }
@Test @Test
public void testImplicitGreedyOptional() throws Exception { public void testGreedyOptional() throws Exception {
String grammar = String grammar =
"lexer grammar L;\n" "lexer grammar L;\n"
+ "CMT : '//' .* '\\n' CMT?;\n" + "CMT : '//' .*? '\\n' CMT?;\n"
+ "WS : (' '|'\\t')+;"; + "WS : (' '|'\\t')+;";
String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n"); String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n");
@ -88,24 +88,10 @@ public class TestLexerExec extends BaseTest {
} }
@Test @Test
public void testExplicitGreedyOptional() throws Exception { public void testNonGreedyOptional() throws Exception {
String grammar = String grammar =
"lexer grammar L;\n" "lexer grammar L;\n"
+ "CMT : '//' .* '\\n' (options{greedy=true;} : CMT)?;\n" + "CMT : '//' .*? '\\n' CMT??;\n"
+ "WS : (' '|'\\t')+;";
String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n");
assertEquals(
"[@0,0:13='//blah\\n//blah\\n',<1>,1:0]\n" +
"[@1,14:13='<EOF>',<-1>,3:14]\n", found);
assertNull(stderrDuringParse);
}
@Test
public void testExplicitNonGreedyOptional() throws Exception {
String grammar =
"lexer grammar L;\n"
+ "CMT : '//' .* '\\n' (options{greedy=false;} : CMT)?;\n"
+ "WS : (' '|'\\t')+;"; + "WS : (' '|'\\t')+;";
String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n"); String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n");
@ -117,10 +103,10 @@ public class TestLexerExec extends BaseTest {
} }
@Test @Test
public void testImplicitGreedyClosure() throws Exception { public void testGreedyClosure() throws Exception {
String grammar = String grammar =
"lexer grammar L;\n" "lexer grammar L;\n"
+ "CMT : '//' .* '\\n' CMT*;\n" + "CMT : '//' .*? '\\n' CMT*;\n"
+ "WS : (' '|'\\t')+;"; + "WS : (' '|'\\t')+;";
String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n"); String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n");
@ -131,24 +117,10 @@ public class TestLexerExec extends BaseTest {
} }
@Test @Test
public void testExplicitGreedyClosure() throws Exception { public void testNonGreedyClosure() throws Exception {
String grammar = String grammar =
"lexer grammar L;\n" "lexer grammar L;\n"
+ "CMT : '//' .* '\\n' (options{greedy=true;} : CMT)*;\n" + "CMT : '//' .*? '\\n' CMT*?;\n"
+ "WS : (' '|'\\t')+;";
String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n");
assertEquals(
"[@0,0:13='//blah\\n//blah\\n',<1>,1:0]\n" +
"[@1,14:13='<EOF>',<-1>,3:14]\n", found);
assertNull(stderrDuringParse);
}
@Test
public void testExplicitNonGreedyClosure() throws Exception {
String grammar =
"lexer grammar L;\n"
+ "CMT : '//' .* '\\n' (options{greedy=false;} : CMT)*;\n"
+ "WS : (' '|'\\t')+;"; + "WS : (' '|'\\t')+;";
String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n"); String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n");
@ -160,10 +132,10 @@ public class TestLexerExec extends BaseTest {
} }
@Test @Test
public void testImplicitGreedyPositiveClosure() throws Exception { public void testGreedyPositiveClosure() throws Exception {
String grammar = String grammar =
"lexer grammar L;\n" "lexer grammar L;\n"
+ "CMT : ('//' .* '\\n')+;\n" + "CMT : ('//' .*? '\\n')+;\n"
+ "WS : (' '|'\\t')+;"; + "WS : (' '|'\\t')+;";
String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n"); String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n");
@ -174,24 +146,10 @@ public class TestLexerExec extends BaseTest {
} }
@Test @Test
public void testExplicitGreedyPositiveClosure() throws Exception { public void testNonGreedyPositiveClosure() throws Exception {
String grammar = String grammar =
"lexer grammar L;\n" "lexer grammar L;\n"
+ "CMT : (options{greedy=true;} : '//' .* '\\n')+;\n" + "CMT : ('//' .*? '\\n')+?;\n"
+ "WS : (' '|'\\t')+;";
String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n");
assertEquals(
"[@0,0:13='//blah\\n//blah\\n',<1>,1:0]\n" +
"[@1,14:13='<EOF>',<-1>,3:14]\n", found);
assertNull(stderrDuringParse);
}
@Test
public void testExplicitNonGreedyPositiveClosure() throws Exception {
String grammar =
"lexer grammar L;\n"
+ "CMT : (options{greedy=false;} : '//' .* '\\n')+;\n"
+ "WS : (' '|'\\t')+;"; + "WS : (' '|'\\t')+;";
String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n"); String found = execLexer("L.g4", grammar, "L", "//blah\n//blah\n");
@ -205,7 +163,7 @@ public class TestLexerExec extends BaseTest {
@Test public void testRecursiveLexerRuleRefWithWildcardStar() throws Exception { @Test public void testRecursiveLexerRuleRefWithWildcardStar() throws Exception {
String grammar = String grammar =
"lexer grammar L;\n"+ "lexer grammar L;\n"+
"CMT : '/*' (CMT | .)* '*/' ;\n" + "CMT : '/*' (CMT | .)*? '*/' ;\n" +
"WS : (' '|'\n')+ ;\n" "WS : (' '|'\n')+ ;\n"
/*+ "ANY : .;"*/; /*+ "ANY : .;"*/;
@ -243,7 +201,7 @@ public class TestLexerExec extends BaseTest {
@Test public void testRecursiveLexerRuleRefWithWildcardPlus() throws Exception { @Test public void testRecursiveLexerRuleRefWithWildcardPlus() throws Exception {
String grammar = String grammar =
"lexer grammar L;\n"+ "lexer grammar L;\n"+
"CMT : '/*' (CMT | .)+ '*/' ;\n" + "CMT : '/*' (CMT | .)+? '*/' ;\n" +
"WS : (' '|'\n')+ ;\n" "WS : (' '|'\n')+ ;\n"
/*+ "ANY : .;"*/; /*+ "ANY : .;"*/;