~[] stuff is allowed and works inside sets etc...
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9926]
This commit is contained in:
parent
368b70d53c
commit
e90b322dd4
|
@ -1,5 +1,9 @@
|
|||
ANTLR v4 Honey Badger early access
|
||||
|
||||
Jan 28, 2012
|
||||
|
||||
* ~[] stuff is allowed and works inside sets etc...
|
||||
|
||||
Jan 22, 2012
|
||||
|
||||
* Added ranges, escapes to [a-z] notation in lexer:
|
||||
|
|
|
@ -31,6 +31,8 @@ package org.antlr.v4.runtime;
|
|||
/** A source of characters for an ANTLR lexer */
|
||||
public interface CharStream extends IntStream {
|
||||
public static final int EOF = -1;
|
||||
public static final int MIN_CHAR = Character.MIN_VALUE;
|
||||
public static final int MAX_CHAR = Character.MAX_VALUE-1; // FFFE is max
|
||||
|
||||
/** For unbuffered streams, you can't use this; primarily I'm providing
|
||||
* a useful interface for action code. Just make sure actions don't
|
||||
|
|
|
@ -161,7 +161,7 @@ public abstract class ATNSimulator {
|
|||
ActionTransition a = new ActionTransition(target, arg1, arg2, arg3 != 0);
|
||||
return a;
|
||||
case Transition.SET : return new SetTransition(target, sets.get(arg1));
|
||||
case Transition.NOT_SET : return new NotSetTransition(target, sets.get(arg1), null);
|
||||
case Transition.NOT_SET : return new NotSetTransition(target, sets.get(arg1));
|
||||
case Transition.WILDCARD : return new WildcardTransition(target);
|
||||
}
|
||||
return null;
|
||||
|
|
|
@ -29,19 +29,13 @@
|
|||
|
||||
package org.antlr.v4.runtime.atn;
|
||||
|
||||
import org.antlr.v4.runtime.misc.IntervalSet;
|
||||
import org.antlr.v4.runtime.misc.NotNull;
|
||||
import org.antlr.v4.runtime.misc.Nullable;
|
||||
import org.antlr.v4.runtime.misc.IntervalSet;
|
||||
|
||||
public class NotSetTransition extends SetTransition {
|
||||
// keep both set, notSet; we can only compute at construction time
|
||||
// since only then do we have grammar, which knows token set for complement.
|
||||
@Nullable
|
||||
public final IntervalSet notSet;
|
||||
|
||||
public NotSetTransition(@NotNull ATNState target, @Nullable IntervalSet set, @Nullable IntervalSet notSet) {
|
||||
public NotSetTransition(@NotNull ATNState target, @Nullable IntervalSet set) {
|
||||
super(target, set);
|
||||
this.notSet = notSet;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
lexer grammar E;
|
||||
I : '0'..'9'+ {System.out.println("I");} ;
|
||||
ID : [a-zA-Z] [a-zA-Z0-9]* ;
|
||||
WS : [ \n\u000D] -> skip ;
|
||||
I : ~[ab] ~[cd]* {System.out.println("I");} ;
|
||||
WS : [ \n\u000D]+ -> skip ;
|
||||
|
|
|
@ -30,7 +30,6 @@
|
|||
package org.antlr.v4.automata;
|
||||
|
||||
import org.antlr.runtime.CommonToken;
|
||||
import org.antlr.runtime.Token;
|
||||
import org.antlr.v4.codegen.CodeGenerator;
|
||||
import org.antlr.v4.misc.CharSupport;
|
||||
import org.antlr.v4.parse.ANTLRParser;
|
||||
|
@ -161,20 +160,21 @@ public class LexerATNFactory extends ParserATNFactory {
|
|||
ATNState right = newState(associatedAST);
|
||||
IntervalSet set = new IntervalSet();
|
||||
for (GrammarAST t : alts) {
|
||||
if ( t.getType()== ANTLRParser.RANGE ) {
|
||||
if ( t.getType()==ANTLRParser.RANGE ) {
|
||||
int a = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(0).getText());
|
||||
int b = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(1).getText());
|
||||
set.add(a, b);
|
||||
}
|
||||
else if ( t.getType()==ANTLRParser.LEXER_CHAR_SET ) {
|
||||
set.addAll(getSetFromCharSetLiteral(t));
|
||||
}
|
||||
else {
|
||||
int c = CharSupport.getCharValueFromGrammarCharLiteral(t.getText());
|
||||
set.add(c);
|
||||
}
|
||||
}
|
||||
if ( invert ) {
|
||||
// TODO: what? should be chars not token types
|
||||
IntervalSet notSet = set.complement(Token.MIN_TOKEN_TYPE, g.getMaxTokenType());
|
||||
left.addTransition(new NotSetTransition(right, set, notSet));
|
||||
left.addTransition(new NotSetTransition(right, set));
|
||||
}
|
||||
else {
|
||||
left.addTransition(new SetTransition(right, set));
|
||||
|
@ -210,36 +210,21 @@ public class LexerATNFactory extends ParserATNFactory {
|
|||
public Handle charSetLiteral(GrammarAST charSetAST) {
|
||||
ATNState left = newState(charSetAST);
|
||||
ATNState right = newState(charSetAST);
|
||||
String cset = '"'+charSetAST.getText()+'"';
|
||||
IntervalSet set = getSetFromCharSetLiteral(charSetAST);
|
||||
left.addTransition(new SetTransition(right, set));
|
||||
charSetAST.atnState = left;
|
||||
return new Handle(left, right);
|
||||
}
|
||||
|
||||
public IntervalSet getSetFromCharSetLiteral(GrammarAST charSetAST) {
|
||||
String chars = charSetAST.getText();
|
||||
chars = chars.substring(1, chars.length()-1);
|
||||
String cset = '"'+ chars +'"';
|
||||
IntervalSet set = new IntervalSet();
|
||||
// int n = cset.length();
|
||||
// int i = 0;
|
||||
// while ( i < n ) {
|
||||
// if ( (i+2)<n && cset.charAt(i+1)=='-' ) { // range x-y
|
||||
// int x = cset.charAt(i);
|
||||
// int y = cset.charAt(i+2);
|
||||
// if ( y=='\\' ) { i = i+3; continue; } // x-\
|
||||
// if ( x<=y ) set.add(x,y);
|
||||
// }
|
||||
// else if ( cset.charAt(i)=='\\' ) {
|
||||
// int end = i+2;
|
||||
// if ( (i+1)>=n ) break; // ignore spurious \ on end
|
||||
// if ( cset.charAt(i+1) == 'u' ) end = i+6;
|
||||
// if ( end>n ) break;
|
||||
// int c = CharSupport.getCharValueFromCharInGrammarLiteral(cset.substring(i,end));
|
||||
// set.add(c);
|
||||
// i = end;
|
||||
// }
|
||||
// else {
|
||||
// set.add(cset.charAt(i));
|
||||
// i++;
|
||||
// }
|
||||
// }
|
||||
|
||||
// unescape all valid escape char like \n, leaving escaped dashes as '\-'
|
||||
// so we can avoid seeing them as '-' range ops.
|
||||
String chars = CharSupport.getStringFromGrammarStringLiteral(cset);
|
||||
chars = CharSupport.getStringFromGrammarStringLiteral(cset);
|
||||
// now make x-y become set of char
|
||||
int n = chars.length();
|
||||
for (int i=0; i< n; i++) {
|
||||
|
@ -257,9 +242,7 @@ public class LexerATNFactory extends ParserATNFactory {
|
|||
set.add(c);
|
||||
}
|
||||
}
|
||||
left.addTransition(new SetTransition(right, set));
|
||||
charSetAST.atnState = left;
|
||||
return new Handle(left, right);
|
||||
return set;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -179,8 +179,7 @@ public class ParserATNFactory implements ATNFactory {
|
|||
set.add(ttype);
|
||||
}
|
||||
if ( invert ) {
|
||||
IntervalSet notSet = set.complement(Token.MIN_TOKEN_TYPE, g.getMaxTokenType());
|
||||
left.addTransition(new NotSetTransition(right, set, notSet));
|
||||
left.addTransition(new NotSetTransition(right, set));
|
||||
}
|
||||
else {
|
||||
left.addTransition(new SetTransition(right, set));
|
||||
|
|
|
@ -827,6 +827,7 @@ setElement
|
|||
: TOKEN_REF<TerminalAST>
|
||||
| STRING_LITERAL<TerminalAST>
|
||||
| range
|
||||
| LEXER_CHAR_SET
|
||||
;
|
||||
|
||||
// -------------
|
||||
|
|
|
@ -163,6 +163,7 @@ setElement
|
|||
: STRING_LITERAL
|
||||
| TOKEN_REF
|
||||
| ^(RANGE a=STRING_LITERAL b=STRING_LITERAL)
|
||||
| LEXER_CHAR_SET
|
||||
;
|
||||
|
||||
atom returns [ATNFactory.Handle p]
|
||||
|
|
|
@ -228,6 +228,34 @@ public class TestLexerExec extends BaseTest {
|
|||
assertEquals(expecting, found);
|
||||
}
|
||||
|
||||
@Test public void testCharSetNot() throws Exception {
|
||||
String grammar =
|
||||
"lexer grammar L;\n"+
|
||||
"I : ~[ab \n] ~[ \ncd]* {System.out.println(\"I\");} ;\n"+
|
||||
"WS : [ \\n\\u000D]+ -> skip ;";
|
||||
String found = execLexer("L.g", grammar, "L", "xaf");
|
||||
String expecting =
|
||||
"I\n" +
|
||||
"[@0,0:2='xaf',<3>,1:0]\n" +
|
||||
"[@1,3:2='<EOF>',<-1>,1:3]\n";
|
||||
assertEquals(expecting, found);
|
||||
}
|
||||
|
||||
@Test public void testCharSetInSet() throws Exception {
|
||||
String grammar =
|
||||
"lexer grammar L;\n"+
|
||||
"I : (~[ab \n]|'a') {System.out.println(\"I\");} ;\n"+
|
||||
"WS : [ \\n\\u000D]+ -> skip ;";
|
||||
String found = execLexer("L.g", grammar, "L", "a x");
|
||||
String expecting =
|
||||
"I\n" +
|
||||
"I\n" +
|
||||
"[@0,0:0='a',<3>,1:0]\n" +
|
||||
"[@1,2:2='x',<3>,1:2]\n" +
|
||||
"[@2,3:2='<EOF>',<-1>,1:3]\n";
|
||||
assertEquals(expecting, found);
|
||||
}
|
||||
|
||||
@Test public void testCharSetRange() throws Exception {
|
||||
String grammar =
|
||||
"lexer grammar L;\n"+
|
||||
|
|
Loading…
Reference in New Issue