~sets weren't working; got closer to java.g working again.

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 8909]
This commit is contained in:
parrt 2011-07-25 18:09:00 -08:00
parent c6fa2fce6e
commit fd3f8b17c9
13 changed files with 99 additions and 47 deletions

View File

@ -143,8 +143,8 @@ public abstract class ATNSimulator {
case Transition.ACTION : return new ActionTransition(target, arg1, arg2);
case Transition.FORCED_ACTION : return new ActionTransition(target, arg1, arg2);
case Transition.SET : return new SetTransition(sets.get(arg1), target);
case Transition.NOT_ATOM : return new NotAtomTransition(arg1, target);
case Transition.NOT_SET : return new NotSetTransition(null, sets.get(arg1), target);
// case Transition.NOT_ATOM : return new NotAtomTransition(arg1, target);
case Transition.NOT_SET : return new NotSetTransition(sets.get(arg1), null, target);
case Transition.WILDCARD : return new WildcardTransition(target);
}
return null;

View File

@ -35,8 +35,8 @@ import org.antlr.v4.runtime.misc.OrderedHashSet;
/** "dup" of ParserInterpreter */
public class LexerATNSimulator extends ATNSimulator {
public static boolean debug = false;
public static boolean dfa_debug = false;
public static boolean debug = true;
public static boolean dfa_debug = true;
public static final int NUM_EDGES = 255;
protected Lexer recog;
@ -263,6 +263,9 @@ public class LexerATNSimulator extends ATNSimulator {
SetTransition st = (SetTransition)trans;
boolean not = trans instanceof NotSetTransition;
if ( !not && st.set.member(t) || not && !st.set.member(t) ) {
// if ( st.set.toString().equals("0") ) {
// System.out.println("eh?");
// }
if ( debug ) System.out.println("match set "+st.set.toString());
return st.target;
}
@ -368,8 +371,13 @@ public class LexerATNSimulator extends ATNSimulator {
protected void addDFAEdge(DFAState p, int t, DFAState q) {
if ( p==null ) return;
if ( p.edges==null ) {
// make room for tokens 1..n and -1 masquerading as index 0
p.edges = new DFAState[NUM_EDGES+1]; // TODO: make adaptive
}
// if ( t==Token.EOF ) {
// System.out.println("state "+p+" has EOF edge");
// t = 0;
// }
p.edges[t] = q; // connect
}

View File

@ -45,9 +45,6 @@ public class NotSetTransition extends SetTransition {
super(target);
}
@Override
public IntervalSet label() { return notSet; }
@Override
public String toString() {
return '~'+super.toString();

View File

@ -55,9 +55,9 @@ public abstract class Transition {
public static final int ACTION = 6;
public static final int FORCED_ACTION = 7;
public static final int SET = 8; // ~(A|B) or ~atom, wildcard, which convert to next 2
public static final int NOT_ATOM = 9;
public static final int NOT_SET = 10;
public static final int WILDCARD = 11;
// public static final int NOT_ATOM = 9;
public static final int NOT_SET = 9;
public static final int WILDCARD = 10;
public static String[] serializationNames = {
@ -70,7 +70,7 @@ public abstract class Transition {
"ACTION",
"FORCED_ACTION",
"SET",
"NOT_ATOM",
// "NOT_ATOM",
"NOT_SET",
"WILDCARD",
};
@ -84,7 +84,7 @@ public abstract class Transition {
put(AtomTransition.class, ATOM);
put(ActionTransition.class, ACTION); // TODO: FORCED?
put(SetTransition.class, SET);
put(NotAtomTransition.class, NOT_ATOM);
// put(NotAtomTransition.class, NOT_ATOM);
put(NotSetTransition.class, NOT_SET);
put(WildcardTransition.class, WILDCARD);
}};

View File

@ -67,7 +67,7 @@ public interface ATNFactory {
Handle tokenRef(TerminalAST node);
Handle set(GrammarAST associatedAST, List<GrammarAST> terminals, boolean invert);
Handle set(GrammarAST associatedAST, List<GrammarAST> alts, boolean invert);
Handle tree(List<Handle> els);

View File

@ -159,9 +159,9 @@ public class ATNSerializer {
case Transition.SET :
arg1 = setIndex++;
break;
case Transition.NOT_ATOM :
arg1 = ((NotAtomTransition)t).label;
break;
// case Transition.NOT_ATOM :
// arg1 = ((NotAtomTransition)t).label;
// break;
case Transition.NOT_SET :
arg1 = setIndex++;
break;
@ -199,7 +199,7 @@ public class ATNSerializer {
ruleIndex + "\n");
}
int nrules = ATNSimulator.toInt(data[p++]);
for (int i=1; i<=nrules; i++) {
for (int i=0; i<nrules; i++) {
int s = ATNSimulator.toInt(data[p++]);
int arg1 = ATNSimulator.toInt(data[p++]);
int arg2 = ATNSimulator.toInt(data[p++]);

View File

@ -31,6 +31,7 @@ package org.antlr.v4.automata;
import org.antlr.runtime.Token;
import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.runtime.atn.*;
import org.antlr.v4.runtime.misc.IntervalSet;
import org.antlr.v4.tool.*;
@ -101,13 +102,20 @@ public class LexerATNFactory extends ParserATNFactory {
}
@Override
public Handle set(GrammarAST associatedAST, List<GrammarAST> terminals, boolean invert) {
public Handle set(GrammarAST associatedAST, List<GrammarAST> alts, boolean invert) {
ATNState left = newState(associatedAST);
ATNState right = newState(associatedAST);
IntervalSet set = new IntervalSet();
for (GrammarAST t : terminals) {
int c = CharSupport.getCharValueFromGrammarCharLiteral(t.getText());
set.add(c);
for (GrammarAST t : alts) {
if ( t.getType()== ANTLRParser.RANGE ) {
int a = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(0).getText());
int b = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(1).getText());
set.add(a, b);
}
else {
int c = CharSupport.getCharValueFromGrammarCharLiteral(t.getText());
set.add(c);
}
}
if ( invert ) {
IntervalSet notSet = (IntervalSet)set.complement(Token.MIN_TOKEN_TYPE, g.getMaxTokenType());

View File

@ -767,6 +767,7 @@ blockSet
setElement
: TOKEN_REF<TerminalAST>
| STRING_LITERAL<TerminalAST>
| range
;
// -------------

View File

@ -128,9 +128,16 @@ subrule returns [ATNFactory.Handle p]
blockSet[boolean invert] returns [ATNFactory.Handle p]
@init {List<GrammarAST> alts = new ArrayList<GrammarAST>();}
: ^(SET (atom {alts.add($atom.start);})+) {$p = factory.set($start, alts, $invert);}
: ^(SET (setElement {alts.add($setElement.start);})+) {$p = factory.set($start, alts, $invert);}
;
/** Don't combine with atom otherwise it will build spurious ATN nodes */
setElement
: STRING_LITERAL
| TOKEN_REF
| ^(RANGE a=STRING_LITERAL b=STRING_LITERAL)
;
astBlockSuffix
: ROOT
| IMPLIES

View File

@ -70,9 +70,9 @@ boolean inLexer = Character.isUpperCase(currentRuleName.charAt(0));
;
setElement[boolean inLexer]
@after {$tree = new TerminalAST($start);} // elem can't be to right of ->
: {!rewriteElems.contains($start.getText())}?
( STRING_LITERAL
| {!inLexer}? TOKEN_REF
| {!inLexer}?=> TOKEN_REF
| {inLexer}?=> ^(RANGE STRING_LITERAL STRING_LITERAL)
)
;

View File

@ -406,6 +406,11 @@ blockSet
setElement
: STRING_LITERAL {stringRef((TerminalAST)$STRING_LITERAL, null);}
| TOKEN_REF {tokenRef((TerminalAST)$TOKEN_REF, null);}
| ^(RANGE a=STRING_LITERAL b=STRING_LITERAL)
{
stringRef((TerminalAST)$a, null);
stringRef((TerminalAST)$b, null);
}
;
block

View File

@ -214,7 +214,6 @@ public class DOTGenerator {
ST edgeST = null;
for (int i = 0; i < s.getNumberOfTransitions(); i++) {
Transition edge = s.transition(i);
System.out.println("dump s"+s.stateNumber+"->"+edge);
if ( edge instanceof RuleTransition ) {
RuleTransition rr = ((RuleTransition)edge);
// don't jump to other rules, but display edge to follow node

View File

@ -19,7 +19,7 @@ public class TestATNSerialization extends BaseTest {
"4:BASIC 0\n" +
"5:BASIC 0\n" +
"6:BASIC 0\n" +
"rule 1:0 0,0\n" +
"rule 0:0 0,0\n" +
"0->2 EPSILON 0,0\n" +
"1->6 ATOM -1,0\n" +
"2->3 ATOM 3,0\n" +
@ -43,12 +43,15 @@ public class TestATNSerialization extends BaseTest {
"2:BASIC 0\n" +
"3:BASIC 0\n" +
"4:BASIC 0\n" +
"rule 1:0 0,0\n" +
"rule 0:0 0,0\n" +
"0:A..A\n" +
"0->2 EPSILON 0,0\n" +
"1->4 ATOM -1,0\n" +
"2->3 NOT_ATOM 3,0\n" +
"2->3 NOT_SET 0,0\n" +
"3->1 EPSILON 0,0\n";
ATN atn = createATN(g);
DOTGenerator gen = new DOTGenerator(g);
System.out.println(gen.getDOT(atn.ruleToStartState[0]));
String result = ATNSerializer.getDecoded(g, atn);
assertEquals(expecting, result);
}
@ -65,7 +68,7 @@ public class TestATNSerialization extends BaseTest {
"2:BASIC 0\n" +
"3:BASIC 0\n" +
"4:BASIC 0\n" +
"rule 1:0 0,0\n" +
"rule 0:0 0,0\n" +
"0->2 EPSILON 0,0\n" +
"1->4 ATOM -1,0\n" +
"2->3 WILDCARD 0,0\n" +
@ -92,7 +95,7 @@ public class TestATNSerialization extends BaseTest {
"8:BLOCK_START 0\n" +
"9:BLOCK_END 0\n" +
"10:BASIC 0\n" +
"rule 1:0 0,0\n" +
"rule 0:0 0,0\n" +
"0->8 EPSILON 0,0\n" +
"1->10 ATOM -1,0\n" +
"2->3 ATOM 3,0\n" +
@ -133,7 +136,7 @@ public class TestATNSerialization extends BaseTest {
"14:BLOCK_START 0\n" +
"15:BLOCK_END 0\n" +
"16:BASIC 0\n" +
"rule 1:0 0,0\n" +
"rule 0:0 0,0\n" +
"0->14 EPSILON 0,0\n" +
"1->16 ATOM -1,0\n" +
"2->3 ATOM 3,0\n" +
@ -175,7 +178,7 @@ public class TestATNSerialization extends BaseTest {
"8:BASIC 0\n" +
"9:BASIC 0\n" +
"10:BASIC 0\n" +
"rule 1:0 0,0\n" +
"rule 0:0 0,0\n" +
"0->4 EPSILON 0,0\n" +
"1->10 ATOM -1,0\n" +
"2->3 ATOM 3,0\n" +
@ -211,8 +214,8 @@ public class TestATNSerialization extends BaseTest {
"6:BASIC 1\n" +
"7:BASIC 1\n" +
"8:BASIC 1\n" +
"rule 1:0 0,0\n" +
"rule 2:2 0,0\n" +
"rule 0:0 0,0\n" +
"rule 1:2 0,0\n" +
"0->4 EPSILON 0,0\n" +
"1->8 ATOM -1,0\n" +
"2->6 EPSILON 0,0\n" +
@ -242,8 +245,8 @@ public class TestATNSerialization extends BaseTest {
"6:BASIC 0\n" +
"7:BASIC 1\n" +
"8:BASIC 1\n" +
"rule 1:1 3,-1\n" +
"rule 2:3 4,-1\n" +
"rule 0:1 3,-1\n" +
"rule 1:3 4,-1\n" +
"mode 0:0\n" +
"0->1 EPSILON 0,0\n" +
"0->3 EPSILON 0,0\n" +
@ -270,7 +273,7 @@ public class TestATNSerialization extends BaseTest {
"2:RULE_STOP 0\n" +
"3:BASIC 0\n" +
"4:BASIC 0\n" +
"rule 1:1 3,-1\n" +
"rule 0:1 3,-1\n" +
"mode 0:0\n" +
"0->1 EPSILON 0,0\n" +
"1->3 EPSILON 0,0\n" +
@ -297,7 +300,7 @@ public class TestATNSerialization extends BaseTest {
"6:BLOCK_END 0\n" +
"7:PLUS_LOOP_BACK 0\n" +
"8:BASIC 0\n" +
"rule 1:1 3,-1\n" +
"rule 0:1 3,-1\n" +
"mode 0:0\n" +
"0->1 EPSILON 0,0\n" +
"1->5 EPSILON 0,0\n" +
@ -340,9 +343,9 @@ public class TestATNSerialization extends BaseTest {
"12:BASIC 2\n" +
"13:BASIC 2\n" +
"14:BASIC 2\n" +
"rule 1:1 3,0\n" +
"rule 2:3 4,-1\n" +
"rule 3:5 5,1\n" +
"rule 0:1 3,0\n" +
"rule 1:3 4,-1\n" +
"rule 2:5 5,1\n" +
"mode 0:0\n" +
"0->1 EPSILON 0,0\n" +
"0->3 EPSILON 0,0\n" +
@ -375,7 +378,7 @@ public class TestATNSerialization extends BaseTest {
"2:RULE_STOP 0\n" +
"3:BASIC 0\n" +
"4:BASIC 0\n" +
"rule 1:1 3,-1\n" +
"rule 0:1 3,-1\n" +
"mode 0:0\n" +
"0:'a'..'b'\n" +
"0->1 EPSILON 0,0\n" +
@ -388,6 +391,30 @@ public class TestATNSerialization extends BaseTest {
assertEquals(expecting, result);
}
@Test public void testLexerSetWithRange() throws Exception {
LexerGrammar lg = new LexerGrammar(
"lexer grammar L;\n"+
"ID : ('a'|'b'|'e'|'p'..'t')\n ;");
String expecting =
"max type 3\n" +
"0:TOKEN_START -1\n" +
"1:RULE_START 0\n" +
"2:RULE_STOP 0\n" +
"3:BASIC 0\n" +
"4:BASIC 0\n" +
"rule 0:1 3,-1\n" +
"mode 0:0\n" +
"0:'a'..'b', 'e'..'e', 'p'..'t'\n" +
"0->1 EPSILON 0,0\n" +
"1->3 EPSILON 0,0\n" +
"3->4 SET 0,0\n" +
"4->2 EPSILON 0,0\n" +
"0:0\n";
ATN atn = createATN(lg);
String result = ATNSerializer.getDecoded(lg, atn);
assertEquals(expecting, result);
}
@Test public void testLexerNotSetWithRange() throws Exception {
LexerGrammar lg = new LexerGrammar(
"lexer grammar L;\n"+
@ -399,7 +426,7 @@ public class TestATNSerialization extends BaseTest {
"2:RULE_STOP 0\n" +
"3:BASIC 0\n" +
"4:BASIC 0\n" +
"rule 1:1 3,-1\n" +
"rule 0:1 3,-1\n" +
"mode 0:0\n" +
"0:'a'..'b', 'e'..'e', 'p'..'t'\n" +
"0->1 EPSILON 0,0\n" +
@ -425,7 +452,7 @@ public class TestATNSerialization extends BaseTest {
"4:BASIC 0\n" +
"5:BASIC 0\n" +
"6:BASIC 0\n" +
"rule 1:1 3,-1\n" +
"rule 0:1 3,-1\n" +
"mode 0:0\n" +
"0:'a'..'b'\n" +
"1:'e'..'e', 'p'..'t'\n" +
@ -466,9 +493,9 @@ public class TestATNSerialization extends BaseTest {
"12:BASIC 1\n" +
"13:BASIC 2\n" +
"14:BASIC 2\n" +
"rule 1:3 3,-1\n" +
"rule 2:5 4,-1\n" +
"rule 3:7 5,-1\n" +
"rule 0:3 3,-1\n" +
"rule 1:5 4,-1\n" +
"rule 2:7 5,-1\n" +
"mode 0:0\n" +
"mode 1:1\n" +
"mode 2:2\n" +