forked from jasder/antlr
cleaned up atn construction / set detection / unit tests. still not done full context in parser simulation
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 8904]
This commit is contained in:
parent
1fcc12a7ad
commit
b4b02bb813
|
@ -414,7 +414,9 @@ public class IntervalSet implements IntSet {
|
|||
return this.intervals.equals(other.intervals);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
public String toString() { return toString(false); }
|
||||
|
||||
public String toString(boolean elemAreChar) {
|
||||
StringBuffer buf = new StringBuffer();
|
||||
if ( this.intervals==null || this.intervals.size()==0 ) {
|
||||
return "{}";
|
||||
|
@ -428,10 +430,43 @@ public class IntervalSet implements IntSet {
|
|||
int a = I.a;
|
||||
int b = I.b;
|
||||
if ( a==b ) {
|
||||
buf.append(a);
|
||||
if ( a==-1 ) buf.append("<EOF>");
|
||||
else if ( elemAreChar ) buf.append("'"+(char)a+"'");
|
||||
else buf.append(a);
|
||||
}
|
||||
else {
|
||||
buf.append(a+".."+b);
|
||||
if ( elemAreChar ) buf.append("'"+(char)a+"'..'"+(char)b+"'");
|
||||
else buf.append(a+".."+b);
|
||||
}
|
||||
if ( iter.hasNext() ) {
|
||||
buf.append(", ");
|
||||
}
|
||||
}
|
||||
if ( this.size()>1 ) {
|
||||
buf.append("}");
|
||||
}
|
||||
return buf.toString();
|
||||
}
|
||||
|
||||
public String toString(String[] tokenNames) {
|
||||
StringBuffer buf = new StringBuffer();
|
||||
if ( this.intervals==null || this.intervals.size()==0 ) {
|
||||
return "{}";
|
||||
}
|
||||
if ( this.size()>1 ) {
|
||||
buf.append("{");
|
||||
}
|
||||
Iterator iter = this.intervals.iterator();
|
||||
while (iter.hasNext()) {
|
||||
Interval I = (Interval) iter.next();
|
||||
int a = I.a;
|
||||
int b = I.b;
|
||||
if ( a==b ) {
|
||||
if ( a==-1 ) buf.append("<EOF>");
|
||||
else buf.append(tokenNames[a]);
|
||||
}
|
||||
else {
|
||||
buf.append(tokenNames[a]+".."+tokenNames[b]);
|
||||
}
|
||||
if ( iter.hasNext() ) {
|
||||
buf.append(", ");
|
||||
|
|
|
@ -611,8 +611,8 @@ public static final ATN _ATN =
|
|||
ATNSimulator.deserialize(_serializedATN.toCharArray());
|
||||
static {
|
||||
org.antlr.v4.tool.DOTGenerator dot = new org.antlr.v4.tool.DOTGenerator(null);
|
||||
//System.out.println(dot.getDOT(_ATN.decisionToState.get(0), ruleNames));
|
||||
//System.out.println(dot.getDOT(_ATN.ruleToStartState[2], ruleNames));
|
||||
//System.out.println(dot.getDOT(_ATN.decisionToState.get(0), ruleNames, false));
|
||||
//System.out.println(dot.getDOT(_ATN.ruleToStartState[2], ruleNames, false));
|
||||
}
|
||||
>>
|
||||
|
||||
|
|
|
@ -46,7 +46,7 @@ public class ATNPrinter {
|
|||
this.start = start;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
public String asString() {
|
||||
if ( start==null ) return null;
|
||||
marked = new HashSet<ATNState>();
|
||||
|
||||
|
@ -60,7 +60,7 @@ public class ATNPrinter {
|
|||
s = work.remove(0);
|
||||
if ( marked.contains(s) ) continue;
|
||||
int n = s.getNumberOfTransitions();
|
||||
//System.out.println("visit "+getATNStateString(s)+"; edges="+n);
|
||||
// System.out.println("visit "+s+"; edges="+n);
|
||||
marked.add(s);
|
||||
for (int i=0; i<n; i++) {
|
||||
Transition t = s.transition(i);
|
||||
|
@ -73,7 +73,7 @@ public class ATNPrinter {
|
|||
buf.append("->"+ getStateString(t.target)+'\n');
|
||||
}
|
||||
else if ( t instanceof RuleTransition ) {
|
||||
buf.append("->"+ getStateString(t.target)+'\n');
|
||||
buf.append("-"+g.getRule(((RuleTransition)t).ruleIndex).name+"->"+ getStateString(t.target)+'\n');
|
||||
}
|
||||
else if ( t instanceof ActionTransition ) {
|
||||
ActionTransition a = (ActionTransition)t;
|
||||
|
@ -82,7 +82,12 @@ public class ATNPrinter {
|
|||
else if ( t instanceof SetTransition ) {
|
||||
SetTransition st = (SetTransition)t;
|
||||
boolean not = st instanceof NotSetTransition;
|
||||
buf.append("-"+(not?"~":"")+st.toString()+"->"+ getStateString(t.target)+'\n');
|
||||
if ( g.isLexer() ) {
|
||||
buf.append("-"+(not?"~":"")+st.toString()+"->"+ getStateString(t.target)+'\n');
|
||||
}
|
||||
else {
|
||||
buf.append("-"+(not?"~":"")+st.label().toString(g.getTokenNames())+"->"+ getStateString(t.target)+'\n');
|
||||
}
|
||||
}
|
||||
else if ( t instanceof AtomTransition ) {
|
||||
AtomTransition a = (AtomTransition)t;
|
||||
|
@ -106,7 +111,6 @@ public class ATNPrinter {
|
|||
String stateStr = "s"+n;
|
||||
if ( s instanceof StarBlockStartState ) stateStr = "StarBlockStart_"+n;
|
||||
else if ( s instanceof PlusBlockStartState ) stateStr = "PlusBlockStart_"+n;
|
||||
else if ( s instanceof StarBlockStartState ) stateStr = "StarBlockStart_"+n;
|
||||
else if ( s instanceof BlockStartState) stateStr = "BlockStart_"+n;
|
||||
else if ( s instanceof BlockEndState ) stateStr = "BlockEnd_"+n;
|
||||
else if ( s instanceof RuleStartState) stateStr = "RuleStart_"+g.getRule(s.ruleIndex).name+"_"+n;
|
||||
|
|
|
@ -91,8 +91,8 @@ public class ParserATNFactory implements ATNFactory {
|
|||
RuleStopState stop = atn.ruleToStopState[r.index];
|
||||
epsilon(blk.right, stop);
|
||||
Handle h = new Handle(start, stop);
|
||||
// FASerializer ser = new FASerializer(g, h.left);
|
||||
// System.out.println(ruleAST.toStringTree()+":\n"+ser);
|
||||
ATNPrinter ser = new ATNPrinter(g, h.left);
|
||||
System.out.println(ruleAST.toStringTree()+":\n"+ser.asString());
|
||||
ruleAST.atnState = start;
|
||||
return h;
|
||||
}
|
||||
|
|
|
@ -62,13 +62,17 @@ ebnfSuffix
|
|||
|
||||
blockSet
|
||||
@init {
|
||||
boolean inLexer = Character.isUpperCase(currentRuleName.charAt(0));
|
||||
}
|
||||
: {Character.isLowerCase(currentRuleName.charAt(0)) &&
|
||||
!inContext("RULE")}? // if non-lexer rule and not rule block
|
||||
^(BLOCK ( ^(ALT setElement) )+) -> ^(SET[$BLOCK.token, "SET"] setElement+)
|
||||
: {!inContext("RULE")}? // if not rule block and > 1 alt
|
||||
^(BLOCK ^(ALT setElement[inLexer]) ( ^(ALT setElement[inLexer]) )+)
|
||||
-> ^(SET[$BLOCK.token, "SET"] setElement+)
|
||||
;
|
||||
|
||||
setElement
|
||||
setElement[boolean inLexer]
|
||||
@after {$tree = new TerminalAST($start);} // elem can't be to right of ->
|
||||
: {!rewriteElems.contains($start.getText())}? (STRING_LITERAL|TOKEN_REF)
|
||||
: {!rewriteElems.contains($start.getText())}?
|
||||
( STRING_LITERAL
|
||||
| {!inLexer}? TOKEN_REF
|
||||
)
|
||||
;
|
|
@ -53,9 +53,7 @@ public class DOTGenerator {
|
|||
this.grammar = grammar;
|
||||
}
|
||||
|
||||
public String getDOT(DFA dfa,
|
||||
boolean isLexer)
|
||||
{
|
||||
public String getDOT(DFA dfa, boolean isLexer) {
|
||||
if ( dfa.s0==null ) return null;
|
||||
|
||||
ST dot = stlib.getInstanceOf("dfa");
|
||||
|
@ -162,18 +160,22 @@ public class DOTGenerator {
|
|||
}
|
||||
|
||||
public String getDOT(ATNState startState) {
|
||||
return getDOT(startState, false);
|
||||
}
|
||||
|
||||
public String getDOT(ATNState startState, boolean isLexer) {
|
||||
Set<String> ruleNames = grammar.rules.keySet();
|
||||
String[] names = new String[ruleNames.size()+1];
|
||||
int i = 0;
|
||||
for (String s : ruleNames) names[i++] = s;
|
||||
return getDOT(startState, names);
|
||||
return getDOT(startState, names, isLexer);
|
||||
}
|
||||
|
||||
/** Return a String containing a DOT description that, when displayed,
|
||||
* will show the incoming state machine visually. All nodes reachable
|
||||
* from startState will be included.
|
||||
*/
|
||||
public String getDOT(ATNState startState, String[] ruleNames) {
|
||||
public String getDOT(ATNState startState, String[] ruleNames, boolean isLexer) {
|
||||
if ( startState==null ) return null;
|
||||
|
||||
// The output DOT graph for visualization
|
||||
|
@ -212,6 +214,7 @@ public class DOTGenerator {
|
|||
ST edgeST = null;
|
||||
for (int i = 0; i < s.getNumberOfTransitions(); i++) {
|
||||
Transition edge = s.transition(i);
|
||||
System.out.println("dump s"+s.stateNumber+"->"+edge);
|
||||
if ( edge instanceof RuleTransition ) {
|
||||
RuleTransition rr = ((RuleTransition)edge);
|
||||
// don't jump to other rules, but display edge to follow node
|
||||
|
@ -226,17 +229,46 @@ public class DOTGenerator {
|
|||
}
|
||||
if ( edge instanceof ActionTransition) {
|
||||
edgeST = stlib.getInstanceOf("action-edge");
|
||||
edgeST.add("label", getEdgeLabel(edge.toString()));
|
||||
}
|
||||
else if ( edge instanceof PredicateTransition ) {
|
||||
edgeST = stlib.getInstanceOf("edge");
|
||||
edgeST.add("label", getEdgeLabel(edge.toString()));
|
||||
}
|
||||
else if ( edge.isEpsilon() ) {
|
||||
edgeST = stlib.getInstanceOf("epsilon-edge");
|
||||
edgeST.add("label", getEdgeLabel(edge.toString()));
|
||||
}
|
||||
else if ( edge instanceof AtomTransition ) {
|
||||
edgeST = stlib.getInstanceOf("edge");
|
||||
AtomTransition atom = (AtomTransition)edge;
|
||||
String label = String.valueOf(atom.label);
|
||||
if ( isLexer ) label = "'"+getEdgeLabel(String.valueOf((char)atom.label))+"'";
|
||||
else if ( grammar!=null ) label = grammar.getTokenDisplayName(atom.label);
|
||||
if ( edge instanceof NotAtomTransition ) label = "~"+label;
|
||||
edgeST.add("label", getEdgeLabel(label));
|
||||
}
|
||||
else if ( edge instanceof SetTransition ) {
|
||||
edgeST = stlib.getInstanceOf("edge");
|
||||
SetTransition set = (SetTransition)edge;
|
||||
String label = set.label().toString();
|
||||
if ( isLexer ) label = set.label().toString(true);
|
||||
else if ( grammar!=null ) label = set.label().toString(grammar.getTokenNames());
|
||||
if ( edge instanceof NotSetTransition ) label = "~"+label;
|
||||
edgeST.add("label", getEdgeLabel(label));
|
||||
}
|
||||
else if ( edge instanceof RangeTransition ) {
|
||||
edgeST = stlib.getInstanceOf("edge");
|
||||
RangeTransition range = (RangeTransition)edge;
|
||||
String label = range.label().toString();
|
||||
if ( isLexer ) label = range.toString();
|
||||
else if ( grammar!=null ) label = range.label().toString(grammar.getTokenNames());
|
||||
edgeST.add("label", getEdgeLabel(label));
|
||||
}
|
||||
else {
|
||||
edgeST = stlib.getInstanceOf("edge");
|
||||
edgeST.add("label", getEdgeLabel(edge.toString()));
|
||||
}
|
||||
edgeST.add("label", getEdgeLabel(edge.toString()));
|
||||
edgeST.add("src", "s"+s.stateNumber);
|
||||
edgeST.add("target", "s"+edge.target.stateNumber);
|
||||
edgeST.add("arrowhead", arrowhead);
|
||||
|
|
|
@ -150,7 +150,7 @@ public class TestATNConstruction extends BaseTest {
|
|||
"b : B ;");
|
||||
String expecting =
|
||||
"RuleStart_a_0->s4\n" +
|
||||
"s4->RuleStart_b_2\n" +
|
||||
"s4-b->RuleStart_b_2\n" +
|
||||
"s5->s6\n" +
|
||||
"s6-A->s7\n" +
|
||||
"s7->RuleStop_a_1\n" +
|
||||
|
@ -239,7 +239,7 @@ public class TestATNConstruction extends BaseTest {
|
|||
"RuleStart_a_0->BlockStart_8\n" +
|
||||
"BlockStart_8->s6\n" +
|
||||
"BlockStart_8->BlockEnd_9\n" +
|
||||
"s6-{3..4}->s7\n" +
|
||||
"s6-{A..B}->s7\n" +
|
||||
"BlockEnd_9->RuleStop_a_1\n" +
|
||||
"s7->BlockEnd_9\n" +
|
||||
"RuleStop_a_1-EOF->s10\n";
|
||||
|
@ -252,7 +252,7 @@ public class TestATNConstruction extends BaseTest {
|
|||
"a : (A | B) C;");
|
||||
String expecting =
|
||||
"RuleStart_a_0->s6\n" +
|
||||
"s6-{3..4}->s7\n" +
|
||||
"s6-{A..B}->s7\n" +
|
||||
"s7->s8\n" +
|
||||
"s8-C->s9\n" +
|
||||
"s9->RuleStop_a_1\n" +
|
||||
|
@ -284,7 +284,7 @@ public class TestATNConstruction extends BaseTest {
|
|||
String expecting =
|
||||
"RuleStart_a_0->PlusBlockStart_8\n" +
|
||||
"PlusBlockStart_8->s6\n" +
|
||||
"s6-{3..4}->s7\n" +
|
||||
"s6-{A..B}->s7\n" +
|
||||
"s7->BlockEnd_9\n" +
|
||||
"BlockEnd_9->PlusLoopBack_10\n" +
|
||||
"PlusLoopBack_10->s6\n" +
|
||||
|
@ -363,17 +363,22 @@ public class TestATNConstruction extends BaseTest {
|
|||
@Test public void testAorBstar() throws Exception {
|
||||
Grammar g = new Grammar(
|
||||
"parser grammar P;\n"+
|
||||
"a : (A | B)* ;");
|
||||
"a : (A | B{;})* ;");
|
||||
String expecting =
|
||||
"RuleStart_a_0->StarBlockStart_8\n" +
|
||||
"StarBlockStart_8->s6\n" +
|
||||
"StarBlockStart_8->s2\n" +
|
||||
"StarBlockStart_8->s4\n" +
|
||||
"StarBlockStart_8->s11\n" +
|
||||
"s6-{3..4}->s7\n" +
|
||||
"s2-A->s3\n" +
|
||||
"s4-B->s5\n" +
|
||||
"s11->RuleStop_a_1\n" +
|
||||
"s7->BlockEnd_9\n" +
|
||||
"s3->BlockEnd_9\n" +
|
||||
"s5->s6\n" +
|
||||
"RuleStop_a_1-EOF->s12\n" +
|
||||
"BlockEnd_9->StarLoopBack_10\n" +
|
||||
"StarLoopBack_10->StarBlockStart_8\n";
|
||||
"s6-action_0:-1->s7\n" +
|
||||
"StarLoopBack_10->StarBlockStart_8\n" +
|
||||
"s7->BlockEnd_9\n";
|
||||
checkRule(g, "a", expecting);
|
||||
}
|
||||
|
||||
|
@ -385,8 +390,8 @@ public class TestATNConstruction extends BaseTest {
|
|||
"RuleStart_a_0->BlockStart_10\n" +
|
||||
"BlockStart_10->s2\n" +
|
||||
"BlockStart_10->s6\n" +
|
||||
"s2-pred-0:0->s3\n" +
|
||||
"s6-pred-0:1->s7\n" +
|
||||
"s2-pred_0:0->s3\n" +
|
||||
"s6-pred_0:1->s7\n" +
|
||||
"s3->s4\n" +
|
||||
"s7->s8\n" +
|
||||
"s4-A->s5\n" +
|
||||
|
@ -940,7 +945,7 @@ public class TestATNConstruction extends BaseTest {
|
|||
ATN nfa = f.createATN();
|
||||
ATNState startState = nfa.modeNameToStartState.get(modeName);
|
||||
ATNPrinter serializer = new ATNPrinter(g, startState);
|
||||
String result = serializer.toString();
|
||||
String result = serializer.asString();
|
||||
|
||||
//System.out.print(result);
|
||||
assertEquals(expecting, result);
|
||||
|
@ -968,7 +973,7 @@ public class TestATNConstruction extends BaseTest {
|
|||
Rule r = g.getRule(ruleName);
|
||||
ATNState startState = atn.ruleToStartState[r.index];
|
||||
ATNPrinter serializer = new ATNPrinter(g, startState);
|
||||
String result = serializer.toString();
|
||||
String result = serializer.asString();
|
||||
|
||||
//System.out.print(result);
|
||||
assertEquals(expecting, result);
|
||||
|
|
|
@ -128,7 +128,7 @@ public class TestATNLexerInterpreter extends BaseTest {
|
|||
CharStream input = new ANTLRStringStream(inputString);
|
||||
ATNState startState = atn.modeNameToStartState.get("DEFAULT_MODE");
|
||||
DOTGenerator dot = new DOTGenerator(lg);
|
||||
System.out.println(dot.getDOT(startState));
|
||||
System.out.println(dot.getDOT(startState, true));
|
||||
|
||||
List<String> tokenTypes = getTokenTypes(lg, atn, input, false);
|
||||
String result = Utils.join(tokenTypes.iterator(), ", ");
|
||||
|
|
Loading…
Reference in New Issue