~[] stuff is allowed and works inside sets etc...
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9926]
This commit is contained in:
parent
368b70d53c
commit
e90b322dd4
|
@ -1,5 +1,9 @@
|
||||||
ANTLR v4 Honey Badger early access
|
ANTLR v4 Honey Badger early access
|
||||||
|
|
||||||
|
Jan 28, 2012
|
||||||
|
|
||||||
|
* ~[] stuff is allowed and works inside sets etc...
|
||||||
|
|
||||||
Jan 22, 2012
|
Jan 22, 2012
|
||||||
|
|
||||||
* Added ranges, escapes to [a-z] notation in lexer:
|
* Added ranges, escapes to [a-z] notation in lexer:
|
||||||
|
|
|
@ -31,6 +31,8 @@ package org.antlr.v4.runtime;
|
||||||
/** A source of characters for an ANTLR lexer */
|
/** A source of characters for an ANTLR lexer */
|
||||||
public interface CharStream extends IntStream {
|
public interface CharStream extends IntStream {
|
||||||
public static final int EOF = -1;
|
public static final int EOF = -1;
|
||||||
|
public static final int MIN_CHAR = Character.MIN_VALUE;
|
||||||
|
public static final int MAX_CHAR = Character.MAX_VALUE-1; // FFFE is max
|
||||||
|
|
||||||
/** For unbuffered streams, you can't use this; primarily I'm providing
|
/** For unbuffered streams, you can't use this; primarily I'm providing
|
||||||
* a useful interface for action code. Just make sure actions don't
|
* a useful interface for action code. Just make sure actions don't
|
||||||
|
|
|
@ -161,7 +161,7 @@ public abstract class ATNSimulator {
|
||||||
ActionTransition a = new ActionTransition(target, arg1, arg2, arg3 != 0);
|
ActionTransition a = new ActionTransition(target, arg1, arg2, arg3 != 0);
|
||||||
return a;
|
return a;
|
||||||
case Transition.SET : return new SetTransition(target, sets.get(arg1));
|
case Transition.SET : return new SetTransition(target, sets.get(arg1));
|
||||||
case Transition.NOT_SET : return new NotSetTransition(target, sets.get(arg1), null);
|
case Transition.NOT_SET : return new NotSetTransition(target, sets.get(arg1));
|
||||||
case Transition.WILDCARD : return new WildcardTransition(target);
|
case Transition.WILDCARD : return new WildcardTransition(target);
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
|
|
|
@ -29,19 +29,13 @@
|
||||||
|
|
||||||
package org.antlr.v4.runtime.atn;
|
package org.antlr.v4.runtime.atn;
|
||||||
|
|
||||||
|
import org.antlr.v4.runtime.misc.IntervalSet;
|
||||||
import org.antlr.v4.runtime.misc.NotNull;
|
import org.antlr.v4.runtime.misc.NotNull;
|
||||||
import org.antlr.v4.runtime.misc.Nullable;
|
import org.antlr.v4.runtime.misc.Nullable;
|
||||||
import org.antlr.v4.runtime.misc.IntervalSet;
|
|
||||||
|
|
||||||
public class NotSetTransition extends SetTransition {
|
public class NotSetTransition extends SetTransition {
|
||||||
// keep both set, notSet; we can only compute at construction time
|
public NotSetTransition(@NotNull ATNState target, @Nullable IntervalSet set) {
|
||||||
// since only then do we have grammar, which knows token set for complement.
|
|
||||||
@Nullable
|
|
||||||
public final IntervalSet notSet;
|
|
||||||
|
|
||||||
public NotSetTransition(@NotNull ATNState target, @Nullable IntervalSet set, @Nullable IntervalSet notSet) {
|
|
||||||
super(target, set);
|
super(target, set);
|
||||||
this.notSet = notSet;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
lexer grammar E;
|
lexer grammar E;
|
||||||
I : '0'..'9'+ {System.out.println("I");} ;
|
I : ~[ab] ~[cd]* {System.out.println("I");} ;
|
||||||
ID : [a-zA-Z] [a-zA-Z0-9]* ;
|
WS : [ \n\u000D]+ -> skip ;
|
||||||
WS : [ \n\u000D] -> skip ;
|
|
||||||
|
|
|
@ -30,7 +30,6 @@
|
||||||
package org.antlr.v4.automata;
|
package org.antlr.v4.automata;
|
||||||
|
|
||||||
import org.antlr.runtime.CommonToken;
|
import org.antlr.runtime.CommonToken;
|
||||||
import org.antlr.runtime.Token;
|
|
||||||
import org.antlr.v4.codegen.CodeGenerator;
|
import org.antlr.v4.codegen.CodeGenerator;
|
||||||
import org.antlr.v4.misc.CharSupport;
|
import org.antlr.v4.misc.CharSupport;
|
||||||
import org.antlr.v4.parse.ANTLRParser;
|
import org.antlr.v4.parse.ANTLRParser;
|
||||||
|
@ -161,20 +160,21 @@ public class LexerATNFactory extends ParserATNFactory {
|
||||||
ATNState right = newState(associatedAST);
|
ATNState right = newState(associatedAST);
|
||||||
IntervalSet set = new IntervalSet();
|
IntervalSet set = new IntervalSet();
|
||||||
for (GrammarAST t : alts) {
|
for (GrammarAST t : alts) {
|
||||||
if ( t.getType()== ANTLRParser.RANGE ) {
|
if ( t.getType()==ANTLRParser.RANGE ) {
|
||||||
int a = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(0).getText());
|
int a = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(0).getText());
|
||||||
int b = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(1).getText());
|
int b = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(1).getText());
|
||||||
set.add(a, b);
|
set.add(a, b);
|
||||||
}
|
}
|
||||||
|
else if ( t.getType()==ANTLRParser.LEXER_CHAR_SET ) {
|
||||||
|
set.addAll(getSetFromCharSetLiteral(t));
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
int c = CharSupport.getCharValueFromGrammarCharLiteral(t.getText());
|
int c = CharSupport.getCharValueFromGrammarCharLiteral(t.getText());
|
||||||
set.add(c);
|
set.add(c);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ( invert ) {
|
if ( invert ) {
|
||||||
// TODO: what? should be chars not token types
|
left.addTransition(new NotSetTransition(right, set));
|
||||||
IntervalSet notSet = set.complement(Token.MIN_TOKEN_TYPE, g.getMaxTokenType());
|
|
||||||
left.addTransition(new NotSetTransition(right, set, notSet));
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
left.addTransition(new SetTransition(right, set));
|
left.addTransition(new SetTransition(right, set));
|
||||||
|
@ -210,36 +210,21 @@ public class LexerATNFactory extends ParserATNFactory {
|
||||||
public Handle charSetLiteral(GrammarAST charSetAST) {
|
public Handle charSetLiteral(GrammarAST charSetAST) {
|
||||||
ATNState left = newState(charSetAST);
|
ATNState left = newState(charSetAST);
|
||||||
ATNState right = newState(charSetAST);
|
ATNState right = newState(charSetAST);
|
||||||
String cset = '"'+charSetAST.getText()+'"';
|
IntervalSet set = getSetFromCharSetLiteral(charSetAST);
|
||||||
|
left.addTransition(new SetTransition(right, set));
|
||||||
|
charSetAST.atnState = left;
|
||||||
|
return new Handle(left, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
public IntervalSet getSetFromCharSetLiteral(GrammarAST charSetAST) {
|
||||||
|
String chars = charSetAST.getText();
|
||||||
|
chars = chars.substring(1, chars.length()-1);
|
||||||
|
String cset = '"'+ chars +'"';
|
||||||
IntervalSet set = new IntervalSet();
|
IntervalSet set = new IntervalSet();
|
||||||
// int n = cset.length();
|
|
||||||
// int i = 0;
|
|
||||||
// while ( i < n ) {
|
|
||||||
// if ( (i+2)<n && cset.charAt(i+1)=='-' ) { // range x-y
|
|
||||||
// int x = cset.charAt(i);
|
|
||||||
// int y = cset.charAt(i+2);
|
|
||||||
// if ( y=='\\' ) { i = i+3; continue; } // x-\
|
|
||||||
// if ( x<=y ) set.add(x,y);
|
|
||||||
// }
|
|
||||||
// else if ( cset.charAt(i)=='\\' ) {
|
|
||||||
// int end = i+2;
|
|
||||||
// if ( (i+1)>=n ) break; // ignore spurious \ on end
|
|
||||||
// if ( cset.charAt(i+1) == 'u' ) end = i+6;
|
|
||||||
// if ( end>n ) break;
|
|
||||||
// int c = CharSupport.getCharValueFromCharInGrammarLiteral(cset.substring(i,end));
|
|
||||||
// set.add(c);
|
|
||||||
// i = end;
|
|
||||||
// }
|
|
||||||
// else {
|
|
||||||
// set.add(cset.charAt(i));
|
|
||||||
// i++;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// unescape all valid escape char like \n, leaving escaped dashes as '\-'
|
// unescape all valid escape char like \n, leaving escaped dashes as '\-'
|
||||||
// so we can avoid seeing them as '-' range ops.
|
// so we can avoid seeing them as '-' range ops.
|
||||||
String chars = CharSupport.getStringFromGrammarStringLiteral(cset);
|
chars = CharSupport.getStringFromGrammarStringLiteral(cset);
|
||||||
// now make x-y become set of char
|
// now make x-y become set of char
|
||||||
int n = chars.length();
|
int n = chars.length();
|
||||||
for (int i=0; i< n; i++) {
|
for (int i=0; i< n; i++) {
|
||||||
|
@ -257,9 +242,7 @@ public class LexerATNFactory extends ParserATNFactory {
|
||||||
set.add(c);
|
set.add(c);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
left.addTransition(new SetTransition(right, set));
|
return set;
|
||||||
charSetAST.atnState = left;
|
|
||||||
return new Handle(left, right);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -179,8 +179,7 @@ public class ParserATNFactory implements ATNFactory {
|
||||||
set.add(ttype);
|
set.add(ttype);
|
||||||
}
|
}
|
||||||
if ( invert ) {
|
if ( invert ) {
|
||||||
IntervalSet notSet = set.complement(Token.MIN_TOKEN_TYPE, g.getMaxTokenType());
|
left.addTransition(new NotSetTransition(right, set));
|
||||||
left.addTransition(new NotSetTransition(right, set, notSet));
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
left.addTransition(new SetTransition(right, set));
|
left.addTransition(new SetTransition(right, set));
|
||||||
|
|
|
@ -827,6 +827,7 @@ setElement
|
||||||
: TOKEN_REF<TerminalAST>
|
: TOKEN_REF<TerminalAST>
|
||||||
| STRING_LITERAL<TerminalAST>
|
| STRING_LITERAL<TerminalAST>
|
||||||
| range
|
| range
|
||||||
|
| LEXER_CHAR_SET
|
||||||
;
|
;
|
||||||
|
|
||||||
// -------------
|
// -------------
|
||||||
|
|
|
@ -163,6 +163,7 @@ setElement
|
||||||
: STRING_LITERAL
|
: STRING_LITERAL
|
||||||
| TOKEN_REF
|
| TOKEN_REF
|
||||||
| ^(RANGE a=STRING_LITERAL b=STRING_LITERAL)
|
| ^(RANGE a=STRING_LITERAL b=STRING_LITERAL)
|
||||||
|
| LEXER_CHAR_SET
|
||||||
;
|
;
|
||||||
|
|
||||||
atom returns [ATNFactory.Handle p]
|
atom returns [ATNFactory.Handle p]
|
||||||
|
|
|
@ -228,6 +228,34 @@ public class TestLexerExec extends BaseTest {
|
||||||
assertEquals(expecting, found);
|
assertEquals(expecting, found);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test public void testCharSetNot() throws Exception {
|
||||||
|
String grammar =
|
||||||
|
"lexer grammar L;\n"+
|
||||||
|
"I : ~[ab \n] ~[ \ncd]* {System.out.println(\"I\");} ;\n"+
|
||||||
|
"WS : [ \\n\\u000D]+ -> skip ;";
|
||||||
|
String found = execLexer("L.g", grammar, "L", "xaf");
|
||||||
|
String expecting =
|
||||||
|
"I\n" +
|
||||||
|
"[@0,0:2='xaf',<3>,1:0]\n" +
|
||||||
|
"[@1,3:2='<EOF>',<-1>,1:3]\n";
|
||||||
|
assertEquals(expecting, found);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test public void testCharSetInSet() throws Exception {
|
||||||
|
String grammar =
|
||||||
|
"lexer grammar L;\n"+
|
||||||
|
"I : (~[ab \n]|'a') {System.out.println(\"I\");} ;\n"+
|
||||||
|
"WS : [ \\n\\u000D]+ -> skip ;";
|
||||||
|
String found = execLexer("L.g", grammar, "L", "a x");
|
||||||
|
String expecting =
|
||||||
|
"I\n" +
|
||||||
|
"I\n" +
|
||||||
|
"[@0,0:0='a',<3>,1:0]\n" +
|
||||||
|
"[@1,2:2='x',<3>,1:2]\n" +
|
||||||
|
"[@2,3:2='<EOF>',<-1>,1:3]\n";
|
||||||
|
assertEquals(expecting, found);
|
||||||
|
}
|
||||||
|
|
||||||
@Test public void testCharSetRange() throws Exception {
|
@Test public void testCharSetRange() throws Exception {
|
||||||
String grammar =
|
String grammar =
|
||||||
"lexer grammar L;\n"+
|
"lexer grammar L;\n"+
|
||||||
|
|
Loading…
Reference in New Issue