~[] stuff is allowed and works inside sets etc...

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9926]
This commit is contained in:
parrt 2012-01-28 14:36:46 -08:00
parent 368b70d53c
commit e90b322dd4
10 changed files with 58 additions and 47 deletions

View File

@ -1,5 +1,9 @@
ANTLR v4 Honey Badger early access
Jan 28, 2012
* ~[] stuff is allowed and works inside sets etc...
Jan 22, 2012
* Added ranges, escapes to [a-z] notation in lexer:

View File

@ -31,6 +31,8 @@ package org.antlr.v4.runtime;
/** A source of characters for an ANTLR lexer */
public interface CharStream extends IntStream {
public static final int EOF = -1;
public static final int MIN_CHAR = Character.MIN_VALUE;
public static final int MAX_CHAR = Character.MAX_VALUE-1; // FFFE is max
/** For unbuffered streams, you can't use this; primarily I'm providing
* a useful interface for action code. Just make sure actions don't

View File

@ -161,7 +161,7 @@ public abstract class ATNSimulator {
ActionTransition a = new ActionTransition(target, arg1, arg2, arg3 != 0);
return a;
case Transition.SET : return new SetTransition(target, sets.get(arg1));
case Transition.NOT_SET : return new NotSetTransition(target, sets.get(arg1), null);
case Transition.NOT_SET : return new NotSetTransition(target, sets.get(arg1));
case Transition.WILDCARD : return new WildcardTransition(target);
}
return null;

View File

@ -29,19 +29,13 @@
package org.antlr.v4.runtime.atn;
import org.antlr.v4.runtime.misc.IntervalSet;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
import org.antlr.v4.runtime.misc.IntervalSet;
public class NotSetTransition extends SetTransition {
// keep both set, notSet; we can only compute at construction time
// since only then do we have grammar, which knows token set for complement.
@Nullable
public final IntervalSet notSet;
public NotSetTransition(@NotNull ATNState target, @Nullable IntervalSet set, @Nullable IntervalSet notSet) {
public NotSetTransition(@NotNull ATNState target, @Nullable IntervalSet set) {
super(target, set);
this.notSet = notSet;
}
@Override

View File

@ -1,4 +1,3 @@
lexer grammar E;
I : '0'..'9'+ {System.out.println("I");} ;
ID : [a-zA-Z] [a-zA-Z0-9]* ;
WS : [ \n\u000D] -> skip ;
I : ~[ab] ~[cd]* {System.out.println("I");} ;
WS : [ \n\u000D]+ -> skip ;

View File

@ -30,7 +30,6 @@
package org.antlr.v4.automata;
import org.antlr.runtime.CommonToken;
import org.antlr.runtime.Token;
import org.antlr.v4.codegen.CodeGenerator;
import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.parse.ANTLRParser;
@ -161,20 +160,21 @@ public class LexerATNFactory extends ParserATNFactory {
ATNState right = newState(associatedAST);
IntervalSet set = new IntervalSet();
for (GrammarAST t : alts) {
if ( t.getType()== ANTLRParser.RANGE ) {
if ( t.getType()==ANTLRParser.RANGE ) {
int a = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(0).getText());
int b = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(1).getText());
set.add(a, b);
}
else if ( t.getType()==ANTLRParser.LEXER_CHAR_SET ) {
set.addAll(getSetFromCharSetLiteral(t));
}
else {
int c = CharSupport.getCharValueFromGrammarCharLiteral(t.getText());
set.add(c);
}
}
if ( invert ) {
// TODO: what? should be chars not token types
IntervalSet notSet = set.complement(Token.MIN_TOKEN_TYPE, g.getMaxTokenType());
left.addTransition(new NotSetTransition(right, set, notSet));
left.addTransition(new NotSetTransition(right, set));
}
else {
left.addTransition(new SetTransition(right, set));
@ -210,36 +210,21 @@ public class LexerATNFactory extends ParserATNFactory {
public Handle charSetLiteral(GrammarAST charSetAST) {
ATNState left = newState(charSetAST);
ATNState right = newState(charSetAST);
String cset = '"'+charSetAST.getText()+'"';
IntervalSet set = getSetFromCharSetLiteral(charSetAST);
left.addTransition(new SetTransition(right, set));
charSetAST.atnState = left;
return new Handle(left, right);
}
public IntervalSet getSetFromCharSetLiteral(GrammarAST charSetAST) {
String chars = charSetAST.getText();
chars = chars.substring(1, chars.length()-1);
String cset = '"'+ chars +'"';
IntervalSet set = new IntervalSet();
// int n = cset.length();
// int i = 0;
// while ( i < n ) {
// if ( (i+2)<n && cset.charAt(i+1)=='-' ) { // range x-y
// int x = cset.charAt(i);
// int y = cset.charAt(i+2);
// if ( y=='\\' ) { i = i+3; continue; } // x-\
// if ( x<=y ) set.add(x,y);
// }
// else if ( cset.charAt(i)=='\\' ) {
// int end = i+2;
// if ( (i+1)>=n ) break; // ignore spurious \ on end
// if ( cset.charAt(i+1) == 'u' ) end = i+6;
// if ( end>n ) break;
// int c = CharSupport.getCharValueFromCharInGrammarLiteral(cset.substring(i,end));
// set.add(c);
// i = end;
// }
// else {
// set.add(cset.charAt(i));
// i++;
// }
// }
// unescape all valid escape char like \n, leaving escaped dashes as '\-'
// so we can avoid seeing them as '-' range ops.
String chars = CharSupport.getStringFromGrammarStringLiteral(cset);
chars = CharSupport.getStringFromGrammarStringLiteral(cset);
// now make x-y become set of char
int n = chars.length();
for (int i=0; i< n; i++) {
@ -257,9 +242,7 @@ public class LexerATNFactory extends ParserATNFactory {
set.add(c);
}
}
left.addTransition(new SetTransition(right, set));
charSetAST.atnState = left;
return new Handle(left, right);
return set;
}
@Override

View File

@ -179,8 +179,7 @@ public class ParserATNFactory implements ATNFactory {
set.add(ttype);
}
if ( invert ) {
IntervalSet notSet = set.complement(Token.MIN_TOKEN_TYPE, g.getMaxTokenType());
left.addTransition(new NotSetTransition(right, set, notSet));
left.addTransition(new NotSetTransition(right, set));
}
else {
left.addTransition(new SetTransition(right, set));

View File

@ -827,6 +827,7 @@ setElement
: TOKEN_REF<TerminalAST>
| STRING_LITERAL<TerminalAST>
| range
| LEXER_CHAR_SET
;
// -------------

View File

@ -163,6 +163,7 @@ setElement
: STRING_LITERAL
| TOKEN_REF
| ^(RANGE a=STRING_LITERAL b=STRING_LITERAL)
| LEXER_CHAR_SET
;
atom returns [ATNFactory.Handle p]

View File

@ -228,6 +228,34 @@ public class TestLexerExec extends BaseTest {
assertEquals(expecting, found);
}
@Test public void testCharSetNot() throws Exception {
String grammar =
"lexer grammar L;\n"+
"I : ~[ab \n] ~[ \ncd]* {System.out.println(\"I\");} ;\n"+
"WS : [ \\n\\u000D]+ -> skip ;";
String found = execLexer("L.g", grammar, "L", "xaf");
String expecting =
"I\n" +
"[@0,0:2='xaf',<3>,1:0]\n" +
"[@1,3:2='<EOF>',<-1>,1:3]\n";
assertEquals(expecting, found);
}
@Test public void testCharSetInSet() throws Exception {
String grammar =
"lexer grammar L;\n"+
"I : (~[ab \n]|'a') {System.out.println(\"I\");} ;\n"+
"WS : [ \\n\\u000D]+ -> skip ;";
String found = execLexer("L.g", grammar, "L", "a x");
String expecting =
"I\n" +
"I\n" +
"[@0,0:0='a',<3>,1:0]\n" +
"[@1,2:2='x',<3>,1:2]\n" +
"[@2,3:2='<EOF>',<-1>,1:3]\n";
assertEquals(expecting, found);
}
@Test public void testCharSetRange() throws Exception {
String grammar =
"lexer grammar L;\n"+