cleaned up atn construction / set detection / unit tests. still not done full context in parser simulation

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 8904]
This commit is contained in:
parrt 2011-07-24 15:25:17 -08:00
parent 1fcc12a7ad
commit b4b02bb813
8 changed files with 117 additions and 37 deletions

View File

@ -414,7 +414,9 @@ public class IntervalSet implements IntSet {
return this.intervals.equals(other.intervals);
}
public String toString() {
public String toString() { return toString(false); }
public String toString(boolean elemAreChar) {
StringBuffer buf = new StringBuffer();
if ( this.intervals==null || this.intervals.size()==0 ) {
return "{}";
@ -428,10 +430,43 @@ public class IntervalSet implements IntSet {
int a = I.a;
int b = I.b;
if ( a==b ) {
buf.append(a);
if ( a==-1 ) buf.append("<EOF>");
else if ( elemAreChar ) buf.append("'"+(char)a+"'");
else buf.append(a);
}
else {
buf.append(a+".."+b);
if ( elemAreChar ) buf.append("'"+(char)a+"'..'"+(char)b+"'");
else buf.append(a+".."+b);
}
if ( iter.hasNext() ) {
buf.append(", ");
}
}
if ( this.size()>1 ) {
buf.append("}");
}
return buf.toString();
}
public String toString(String[] tokenNames) {
StringBuffer buf = new StringBuffer();
if ( this.intervals==null || this.intervals.size()==0 ) {
return "{}";
}
if ( this.size()>1 ) {
buf.append("{");
}
Iterator iter = this.intervals.iterator();
while (iter.hasNext()) {
Interval I = (Interval) iter.next();
int a = I.a;
int b = I.b;
if ( a==b ) {
if ( a==-1 ) buf.append("<EOF>");
else buf.append(tokenNames[a]);
}
else {
buf.append(tokenNames[a]+".."+tokenNames[b]);
}
if ( iter.hasNext() ) {
buf.append(", ");

View File

@ -611,8 +611,8 @@ public static final ATN _ATN =
ATNSimulator.deserialize(_serializedATN.toCharArray());
static {
org.antlr.v4.tool.DOTGenerator dot = new org.antlr.v4.tool.DOTGenerator(null);
//System.out.println(dot.getDOT(_ATN.decisionToState.get(0), ruleNames));
//System.out.println(dot.getDOT(_ATN.ruleToStartState[2], ruleNames));
//System.out.println(dot.getDOT(_ATN.decisionToState.get(0), ruleNames, false));
//System.out.println(dot.getDOT(_ATN.ruleToStartState[2], ruleNames, false));
}
>>

View File

@ -46,7 +46,7 @@ public class ATNPrinter {
this.start = start;
}
public String toString() {
public String asString() {
if ( start==null ) return null;
marked = new HashSet<ATNState>();
@ -60,7 +60,7 @@ public class ATNPrinter {
s = work.remove(0);
if ( marked.contains(s) ) continue;
int n = s.getNumberOfTransitions();
//System.out.println("visit "+getATNStateString(s)+"; edges="+n);
// System.out.println("visit "+s+"; edges="+n);
marked.add(s);
for (int i=0; i<n; i++) {
Transition t = s.transition(i);
@ -73,7 +73,7 @@ public class ATNPrinter {
buf.append("->"+ getStateString(t.target)+'\n');
}
else if ( t instanceof RuleTransition ) {
buf.append("->"+ getStateString(t.target)+'\n');
buf.append("-"+g.getRule(((RuleTransition)t).ruleIndex).name+"->"+ getStateString(t.target)+'\n');
}
else if ( t instanceof ActionTransition ) {
ActionTransition a = (ActionTransition)t;
@ -82,7 +82,12 @@ public class ATNPrinter {
else if ( t instanceof SetTransition ) {
SetTransition st = (SetTransition)t;
boolean not = st instanceof NotSetTransition;
buf.append("-"+(not?"~":"")+st.toString()+"->"+ getStateString(t.target)+'\n');
if ( g.isLexer() ) {
buf.append("-"+(not?"~":"")+st.toString()+"->"+ getStateString(t.target)+'\n');
}
else {
buf.append("-"+(not?"~":"")+st.label().toString(g.getTokenNames())+"->"+ getStateString(t.target)+'\n');
}
}
else if ( t instanceof AtomTransition ) {
AtomTransition a = (AtomTransition)t;
@ -106,7 +111,6 @@ public class ATNPrinter {
String stateStr = "s"+n;
if ( s instanceof StarBlockStartState ) stateStr = "StarBlockStart_"+n;
else if ( s instanceof PlusBlockStartState ) stateStr = "PlusBlockStart_"+n;
else if ( s instanceof StarBlockStartState ) stateStr = "StarBlockStart_"+n;
else if ( s instanceof BlockStartState) stateStr = "BlockStart_"+n;
else if ( s instanceof BlockEndState ) stateStr = "BlockEnd_"+n;
else if ( s instanceof RuleStartState) stateStr = "RuleStart_"+g.getRule(s.ruleIndex).name+"_"+n;

View File

@ -91,8 +91,8 @@ public class ParserATNFactory implements ATNFactory {
RuleStopState stop = atn.ruleToStopState[r.index];
epsilon(blk.right, stop);
Handle h = new Handle(start, stop);
// FASerializer ser = new FASerializer(g, h.left);
// System.out.println(ruleAST.toStringTree()+":\n"+ser);
ATNPrinter ser = new ATNPrinter(g, h.left);
System.out.println(ruleAST.toStringTree()+":\n"+ser.asString());
ruleAST.atnState = start;
return h;
}

View File

@ -62,13 +62,17 @@ ebnfSuffix
blockSet
@init {
boolean inLexer = Character.isUpperCase(currentRuleName.charAt(0));
}
: {Character.isLowerCase(currentRuleName.charAt(0)) &&
!inContext("RULE")}? // if non-lexer rule and not rule block
^(BLOCK ( ^(ALT setElement) )+) -> ^(SET[$BLOCK.token, "SET"] setElement+)
: {!inContext("RULE")}? // if not rule block and > 1 alt
^(BLOCK ^(ALT setElement[inLexer]) ( ^(ALT setElement[inLexer]) )+)
-> ^(SET[$BLOCK.token, "SET"] setElement+)
;
setElement
setElement[boolean inLexer]
@after {$tree = new TerminalAST($start);} // elem can't be to right of ->
: {!rewriteElems.contains($start.getText())}? (STRING_LITERAL|TOKEN_REF)
: {!rewriteElems.contains($start.getText())}?
( STRING_LITERAL
| {!inLexer}? TOKEN_REF
)
;

View File

@ -53,9 +53,7 @@ public class DOTGenerator {
this.grammar = grammar;
}
public String getDOT(DFA dfa,
boolean isLexer)
{
public String getDOT(DFA dfa, boolean isLexer) {
if ( dfa.s0==null ) return null;
ST dot = stlib.getInstanceOf("dfa");
@ -162,18 +160,22 @@ public class DOTGenerator {
}
public String getDOT(ATNState startState) {
return getDOT(startState, false);
}
public String getDOT(ATNState startState, boolean isLexer) {
Set<String> ruleNames = grammar.rules.keySet();
String[] names = new String[ruleNames.size()+1];
int i = 0;
for (String s : ruleNames) names[i++] = s;
return getDOT(startState, names);
return getDOT(startState, names, isLexer);
}
/** Return a String containing a DOT description that, when displayed,
* will show the incoming state machine visually. All nodes reachable
* from startState will be included.
*/
public String getDOT(ATNState startState, String[] ruleNames) {
public String getDOT(ATNState startState, String[] ruleNames, boolean isLexer) {
if ( startState==null ) return null;
// The output DOT graph for visualization
@ -212,6 +214,7 @@ public class DOTGenerator {
ST edgeST = null;
for (int i = 0; i < s.getNumberOfTransitions(); i++) {
Transition edge = s.transition(i);
System.out.println("dump s"+s.stateNumber+"->"+edge);
if ( edge instanceof RuleTransition ) {
RuleTransition rr = ((RuleTransition)edge);
// don't jump to other rules, but display edge to follow node
@ -226,17 +229,46 @@ public class DOTGenerator {
}
if ( edge instanceof ActionTransition) {
edgeST = stlib.getInstanceOf("action-edge");
edgeST.add("label", getEdgeLabel(edge.toString()));
}
else if ( edge instanceof PredicateTransition ) {
edgeST = stlib.getInstanceOf("edge");
edgeST.add("label", getEdgeLabel(edge.toString()));
}
else if ( edge.isEpsilon() ) {
edgeST = stlib.getInstanceOf("epsilon-edge");
edgeST.add("label", getEdgeLabel(edge.toString()));
}
else if ( edge instanceof AtomTransition ) {
edgeST = stlib.getInstanceOf("edge");
AtomTransition atom = (AtomTransition)edge;
String label = String.valueOf(atom.label);
if ( isLexer ) label = "'"+getEdgeLabel(String.valueOf((char)atom.label))+"'";
else if ( grammar!=null ) label = grammar.getTokenDisplayName(atom.label);
if ( edge instanceof NotAtomTransition ) label = "~"+label;
edgeST.add("label", getEdgeLabel(label));
}
else if ( edge instanceof SetTransition ) {
edgeST = stlib.getInstanceOf("edge");
SetTransition set = (SetTransition)edge;
String label = set.label().toString();
if ( isLexer ) label = set.label().toString(true);
else if ( grammar!=null ) label = set.label().toString(grammar.getTokenNames());
if ( edge instanceof NotSetTransition ) label = "~"+label;
edgeST.add("label", getEdgeLabel(label));
}
else if ( edge instanceof RangeTransition ) {
edgeST = stlib.getInstanceOf("edge");
RangeTransition range = (RangeTransition)edge;
String label = range.label().toString();
if ( isLexer ) label = range.toString();
else if ( grammar!=null ) label = range.label().toString(grammar.getTokenNames());
edgeST.add("label", getEdgeLabel(label));
}
else {
edgeST = stlib.getInstanceOf("edge");
edgeST.add("label", getEdgeLabel(edge.toString()));
}
edgeST.add("label", getEdgeLabel(edge.toString()));
edgeST.add("src", "s"+s.stateNumber);
edgeST.add("target", "s"+edge.target.stateNumber);
edgeST.add("arrowhead", arrowhead);

View File

@ -150,7 +150,7 @@ public class TestATNConstruction extends BaseTest {
"b : B ;");
String expecting =
"RuleStart_a_0->s4\n" +
"s4->RuleStart_b_2\n" +
"s4-b->RuleStart_b_2\n" +
"s5->s6\n" +
"s6-A->s7\n" +
"s7->RuleStop_a_1\n" +
@ -239,7 +239,7 @@ public class TestATNConstruction extends BaseTest {
"RuleStart_a_0->BlockStart_8\n" +
"BlockStart_8->s6\n" +
"BlockStart_8->BlockEnd_9\n" +
"s6-{3..4}->s7\n" +
"s6-{A..B}->s7\n" +
"BlockEnd_9->RuleStop_a_1\n" +
"s7->BlockEnd_9\n" +
"RuleStop_a_1-EOF->s10\n";
@ -252,7 +252,7 @@ public class TestATNConstruction extends BaseTest {
"a : (A | B) C;");
String expecting =
"RuleStart_a_0->s6\n" +
"s6-{3..4}->s7\n" +
"s6-{A..B}->s7\n" +
"s7->s8\n" +
"s8-C->s9\n" +
"s9->RuleStop_a_1\n" +
@ -284,7 +284,7 @@ public class TestATNConstruction extends BaseTest {
String expecting =
"RuleStart_a_0->PlusBlockStart_8\n" +
"PlusBlockStart_8->s6\n" +
"s6-{3..4}->s7\n" +
"s6-{A..B}->s7\n" +
"s7->BlockEnd_9\n" +
"BlockEnd_9->PlusLoopBack_10\n" +
"PlusLoopBack_10->s6\n" +
@ -363,17 +363,22 @@ public class TestATNConstruction extends BaseTest {
@Test public void testAorBstar() throws Exception {
Grammar g = new Grammar(
"parser grammar P;\n"+
"a : (A | B)* ;");
"a : (A | B{;})* ;");
String expecting =
"RuleStart_a_0->StarBlockStart_8\n" +
"StarBlockStart_8->s6\n" +
"StarBlockStart_8->s2\n" +
"StarBlockStart_8->s4\n" +
"StarBlockStart_8->s11\n" +
"s6-{3..4}->s7\n" +
"s2-A->s3\n" +
"s4-B->s5\n" +
"s11->RuleStop_a_1\n" +
"s7->BlockEnd_9\n" +
"s3->BlockEnd_9\n" +
"s5->s6\n" +
"RuleStop_a_1-EOF->s12\n" +
"BlockEnd_9->StarLoopBack_10\n" +
"StarLoopBack_10->StarBlockStart_8\n";
"s6-action_0:-1->s7\n" +
"StarLoopBack_10->StarBlockStart_8\n" +
"s7->BlockEnd_9\n";
checkRule(g, "a", expecting);
}
@ -385,8 +390,8 @@ public class TestATNConstruction extends BaseTest {
"RuleStart_a_0->BlockStart_10\n" +
"BlockStart_10->s2\n" +
"BlockStart_10->s6\n" +
"s2-pred-0:0->s3\n" +
"s6-pred-0:1->s7\n" +
"s2-pred_0:0->s3\n" +
"s6-pred_0:1->s7\n" +
"s3->s4\n" +
"s7->s8\n" +
"s4-A->s5\n" +
@ -940,7 +945,7 @@ public class TestATNConstruction extends BaseTest {
ATN nfa = f.createATN();
ATNState startState = nfa.modeNameToStartState.get(modeName);
ATNPrinter serializer = new ATNPrinter(g, startState);
String result = serializer.toString();
String result = serializer.asString();
//System.out.print(result);
assertEquals(expecting, result);
@ -968,7 +973,7 @@ public class TestATNConstruction extends BaseTest {
Rule r = g.getRule(ruleName);
ATNState startState = atn.ruleToStartState[r.index];
ATNPrinter serializer = new ATNPrinter(g, startState);
String result = serializer.toString();
String result = serializer.asString();
//System.out.print(result);
assertEquals(expecting, result);

View File

@ -128,7 +128,7 @@ public class TestATNLexerInterpreter extends BaseTest {
CharStream input = new ANTLRStringStream(inputString);
ATNState startState = atn.modeNameToStartState.get("DEFAULT_MODE");
DOTGenerator dot = new DOTGenerator(lg);
System.out.println(dot.getDOT(startState));
System.out.println(dot.getDOT(startState, true));
List<String> tokenTypes = getTokenTypes(lg, atn, input, false);
String result = Utils.join(tokenTypes.iterator(), ", ");