From fd3f8b17c9039fc9147adb8e0ce6b692eac786ae Mon Sep 17 00:00:00 2001 From: parrt Date: Mon, 25 Jul 2011 18:09:00 -0800 Subject: [PATCH] ~sets weren't working; got closer to java.g working again. [git-p4: depot-paths = "//depot/code/antlr4/main/": change = 8909] --- .../antlr/v4/runtime/atn/ATNSimulator.java | 4 +- .../v4/runtime/atn/LexerATNSimulator.java | 12 +++- .../v4/runtime/atn/NotSetTransition.java | 3 - .../org/antlr/v4/runtime/atn/Transition.java | 10 +-- .../src/org/antlr/v4/automata/ATNFactory.java | 2 +- .../org/antlr/v4/automata/ATNSerializer.java | 8 +-- .../antlr/v4/automata/LexerATNFactory.java | 16 +++-- tool/src/org/antlr/v4/parse/ANTLRParser.g | 1 + tool/src/org/antlr/v4/parse/ATNBuilder.g | 9 ++- .../org/antlr/v4/parse/BlockSetTransformer.g | 4 +- .../org/antlr/v4/parse/GrammarTreeVisitor.g | 5 ++ tool/src/org/antlr/v4/tool/DOTGenerator.java | 1 - .../antlr/v4/test/TestATNSerialization.java | 71 +++++++++++++------ 13 files changed, 99 insertions(+), 47 deletions(-) diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNSimulator.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNSimulator.java index 3914f67e5..059c0b80c 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNSimulator.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNSimulator.java @@ -143,8 +143,8 @@ public abstract class ATNSimulator { case Transition.ACTION : return new ActionTransition(target, arg1, arg2); case Transition.FORCED_ACTION : return new ActionTransition(target, arg1, arg2); case Transition.SET : return new SetTransition(sets.get(arg1), target); - case Transition.NOT_ATOM : return new NotAtomTransition(arg1, target); - case Transition.NOT_SET : return new NotSetTransition(null, sets.get(arg1), target); +// case Transition.NOT_ATOM : return new NotAtomTransition(arg1, target); + case Transition.NOT_SET : return new NotSetTransition(sets.get(arg1), null, target); case Transition.WILDCARD : return new WildcardTransition(target); } return null; diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java index 95c6f2ad8..9791e62c5 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java @@ -35,8 +35,8 @@ import org.antlr.v4.runtime.misc.OrderedHashSet; /** "dup" of ParserInterpreter */ public class LexerATNSimulator extends ATNSimulator { - public static boolean debug = false; - public static boolean dfa_debug = false; + public static boolean debug = true; + public static boolean dfa_debug = true; public static final int NUM_EDGES = 255; protected Lexer recog; @@ -263,6 +263,9 @@ public class LexerATNSimulator extends ATNSimulator { SetTransition st = (SetTransition)trans; boolean not = trans instanceof NotSetTransition; if ( !not && st.set.member(t) || not && !st.set.member(t) ) { +// if ( st.set.toString().equals("0") ) { +// System.out.println("eh?"); +// } if ( debug ) System.out.println("match set "+st.set.toString()); return st.target; } @@ -368,8 +371,13 @@ public class LexerATNSimulator extends ATNSimulator { protected void addDFAEdge(DFAState p, int t, DFAState q) { if ( p==null ) return; if ( p.edges==null ) { + // make room for tokens 1..n and -1 masquerading as index 0 p.edges = new DFAState[NUM_EDGES+1]; // TODO: make adaptive } +// if ( t==Token.EOF ) { +// System.out.println("state "+p+" has EOF edge"); +// t = 0; +// } p.edges[t] = q; // connect } diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/NotSetTransition.java b/runtime/Java/src/org/antlr/v4/runtime/atn/NotSetTransition.java index 2538b59e3..987f16b54 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/NotSetTransition.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/NotSetTransition.java @@ -45,9 +45,6 @@ public class NotSetTransition extends SetTransition { super(target); } - @Override - public IntervalSet label() { return notSet; } - @Override public String toString() { return '~'+super.toString(); diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/Transition.java b/runtime/Java/src/org/antlr/v4/runtime/atn/Transition.java index 5c4f27120..46f2c022a 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/Transition.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/Transition.java @@ -55,9 +55,9 @@ public abstract class Transition { public static final int ACTION = 6; public static final int FORCED_ACTION = 7; public static final int SET = 8; // ~(A|B) or ~atom, wildcard, which convert to next 2 - public static final int NOT_ATOM = 9; - public static final int NOT_SET = 10; - public static final int WILDCARD = 11; +// public static final int NOT_ATOM = 9; + public static final int NOT_SET = 9; + public static final int WILDCARD = 10; public static String[] serializationNames = { @@ -70,7 +70,7 @@ public abstract class Transition { "ACTION", "FORCED_ACTION", "SET", - "NOT_ATOM", +// "NOT_ATOM", "NOT_SET", "WILDCARD", }; @@ -84,7 +84,7 @@ public abstract class Transition { put(AtomTransition.class, ATOM); put(ActionTransition.class, ACTION); // TODO: FORCED? put(SetTransition.class, SET); - put(NotAtomTransition.class, NOT_ATOM); +// put(NotAtomTransition.class, NOT_ATOM); put(NotSetTransition.class, NOT_SET); put(WildcardTransition.class, WILDCARD); }}; diff --git a/tool/src/org/antlr/v4/automata/ATNFactory.java b/tool/src/org/antlr/v4/automata/ATNFactory.java index ffe3bc0a0..3d1ad1440 100644 --- a/tool/src/org/antlr/v4/automata/ATNFactory.java +++ b/tool/src/org/antlr/v4/automata/ATNFactory.java @@ -67,7 +67,7 @@ public interface ATNFactory { Handle tokenRef(TerminalAST node); - Handle set(GrammarAST associatedAST, List terminals, boolean invert); + Handle set(GrammarAST associatedAST, List alts, boolean invert); Handle tree(List els); diff --git a/tool/src/org/antlr/v4/automata/ATNSerializer.java b/tool/src/org/antlr/v4/automata/ATNSerializer.java index 6329c5a40..cd3137e73 100644 --- a/tool/src/org/antlr/v4/automata/ATNSerializer.java +++ b/tool/src/org/antlr/v4/automata/ATNSerializer.java @@ -159,9 +159,9 @@ public class ATNSerializer { case Transition.SET : arg1 = setIndex++; break; - case Transition.NOT_ATOM : - arg1 = ((NotAtomTransition)t).label; - break; +// case Transition.NOT_ATOM : +// arg1 = ((NotAtomTransition)t).label; +// break; case Transition.NOT_SET : arg1 = setIndex++; break; @@ -199,7 +199,7 @@ public class ATNSerializer { ruleIndex + "\n"); } int nrules = ATNSimulator.toInt(data[p++]); - for (int i=1; i<=nrules; i++) { + for (int i=0; i terminals, boolean invert) { + public Handle set(GrammarAST associatedAST, List alts, boolean invert) { ATNState left = newState(associatedAST); ATNState right = newState(associatedAST); IntervalSet set = new IntervalSet(); - for (GrammarAST t : terminals) { - int c = CharSupport.getCharValueFromGrammarCharLiteral(t.getText()); - set.add(c); + for (GrammarAST t : alts) { + if ( t.getType()== ANTLRParser.RANGE ) { + int a = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(0).getText()); + int b = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(1).getText()); + set.add(a, b); + } + else { + int c = CharSupport.getCharValueFromGrammarCharLiteral(t.getText()); + set.add(c); + } } if ( invert ) { IntervalSet notSet = (IntervalSet)set.complement(Token.MIN_TOKEN_TYPE, g.getMaxTokenType()); diff --git a/tool/src/org/antlr/v4/parse/ANTLRParser.g b/tool/src/org/antlr/v4/parse/ANTLRParser.g index bdb6cdf79..2b3145eee 100644 --- a/tool/src/org/antlr/v4/parse/ANTLRParser.g +++ b/tool/src/org/antlr/v4/parse/ANTLRParser.g @@ -767,6 +767,7 @@ blockSet setElement : TOKEN_REF | STRING_LITERAL + | range ; // ------------- diff --git a/tool/src/org/antlr/v4/parse/ATNBuilder.g b/tool/src/org/antlr/v4/parse/ATNBuilder.g index a473a1ca8..b272d3ccb 100644 --- a/tool/src/org/antlr/v4/parse/ATNBuilder.g +++ b/tool/src/org/antlr/v4/parse/ATNBuilder.g @@ -128,9 +128,16 @@ subrule returns [ATNFactory.Handle p] blockSet[boolean invert] returns [ATNFactory.Handle p] @init {List alts = new ArrayList();} - : ^(SET (atom {alts.add($atom.start);})+) {$p = factory.set($start, alts, $invert);} + : ^(SET (setElement {alts.add($setElement.start);})+) {$p = factory.set($start, alts, $invert);} ; +/** Don't combine with atom otherwise it will build spurious ATN nodes */ +setElement + : STRING_LITERAL + | TOKEN_REF + | ^(RANGE a=STRING_LITERAL b=STRING_LITERAL) + ; + astBlockSuffix : ROOT | IMPLIES diff --git a/tool/src/org/antlr/v4/parse/BlockSetTransformer.g b/tool/src/org/antlr/v4/parse/BlockSetTransformer.g index 24cefb8ab..f62d555c9 100644 --- a/tool/src/org/antlr/v4/parse/BlockSetTransformer.g +++ b/tool/src/org/antlr/v4/parse/BlockSetTransformer.g @@ -70,9 +70,9 @@ boolean inLexer = Character.isUpperCase(currentRuleName.charAt(0)); ; setElement[boolean inLexer] -@after {$tree = new TerminalAST($start);} // elem can't be to right of -> : {!rewriteElems.contains($start.getText())}? ( STRING_LITERAL - | {!inLexer}? TOKEN_REF + | {!inLexer}?=> TOKEN_REF + | {inLexer}?=> ^(RANGE STRING_LITERAL STRING_LITERAL) ) ; \ No newline at end of file diff --git a/tool/src/org/antlr/v4/parse/GrammarTreeVisitor.g b/tool/src/org/antlr/v4/parse/GrammarTreeVisitor.g index 08fdfd9bc..c634984dc 100644 --- a/tool/src/org/antlr/v4/parse/GrammarTreeVisitor.g +++ b/tool/src/org/antlr/v4/parse/GrammarTreeVisitor.g @@ -406,6 +406,11 @@ blockSet setElement : STRING_LITERAL {stringRef((TerminalAST)$STRING_LITERAL, null);} | TOKEN_REF {tokenRef((TerminalAST)$TOKEN_REF, null);} + | ^(RANGE a=STRING_LITERAL b=STRING_LITERAL) + { + stringRef((TerminalAST)$a, null); + stringRef((TerminalAST)$b, null); + } ; block diff --git a/tool/src/org/antlr/v4/tool/DOTGenerator.java b/tool/src/org/antlr/v4/tool/DOTGenerator.java index b8f990c7d..40c9a8b73 100644 --- a/tool/src/org/antlr/v4/tool/DOTGenerator.java +++ b/tool/src/org/antlr/v4/tool/DOTGenerator.java @@ -214,7 +214,6 @@ public class DOTGenerator { ST edgeST = null; for (int i = 0; i < s.getNumberOfTransitions(); i++) { Transition edge = s.transition(i); - System.out.println("dump s"+s.stateNumber+"->"+edge); if ( edge instanceof RuleTransition ) { RuleTransition rr = ((RuleTransition)edge); // don't jump to other rules, but display edge to follow node diff --git a/tool/test/org/antlr/v4/test/TestATNSerialization.java b/tool/test/org/antlr/v4/test/TestATNSerialization.java index 71a140172..0fbed66c6 100644 --- a/tool/test/org/antlr/v4/test/TestATNSerialization.java +++ b/tool/test/org/antlr/v4/test/TestATNSerialization.java @@ -19,7 +19,7 @@ public class TestATNSerialization extends BaseTest { "4:BASIC 0\n" + "5:BASIC 0\n" + "6:BASIC 0\n" + - "rule 1:0 0,0\n" + + "rule 0:0 0,0\n" + "0->2 EPSILON 0,0\n" + "1->6 ATOM -1,0\n" + "2->3 ATOM 3,0\n" + @@ -43,12 +43,15 @@ public class TestATNSerialization extends BaseTest { "2:BASIC 0\n" + "3:BASIC 0\n" + "4:BASIC 0\n" + - "rule 1:0 0,0\n" + + "rule 0:0 0,0\n" + + "0:A..A\n" + "0->2 EPSILON 0,0\n" + "1->4 ATOM -1,0\n" + - "2->3 NOT_ATOM 3,0\n" + + "2->3 NOT_SET 0,0\n" + "3->1 EPSILON 0,0\n"; ATN atn = createATN(g); + DOTGenerator gen = new DOTGenerator(g); + System.out.println(gen.getDOT(atn.ruleToStartState[0])); String result = ATNSerializer.getDecoded(g, atn); assertEquals(expecting, result); } @@ -65,7 +68,7 @@ public class TestATNSerialization extends BaseTest { "2:BASIC 0\n" + "3:BASIC 0\n" + "4:BASIC 0\n" + - "rule 1:0 0,0\n" + + "rule 0:0 0,0\n" + "0->2 EPSILON 0,0\n" + "1->4 ATOM -1,0\n" + "2->3 WILDCARD 0,0\n" + @@ -92,7 +95,7 @@ public class TestATNSerialization extends BaseTest { "8:BLOCK_START 0\n" + "9:BLOCK_END 0\n" + "10:BASIC 0\n" + - "rule 1:0 0,0\n" + + "rule 0:0 0,0\n" + "0->8 EPSILON 0,0\n" + "1->10 ATOM -1,0\n" + "2->3 ATOM 3,0\n" + @@ -133,7 +136,7 @@ public class TestATNSerialization extends BaseTest { "14:BLOCK_START 0\n" + "15:BLOCK_END 0\n" + "16:BASIC 0\n" + - "rule 1:0 0,0\n" + + "rule 0:0 0,0\n" + "0->14 EPSILON 0,0\n" + "1->16 ATOM -1,0\n" + "2->3 ATOM 3,0\n" + @@ -175,7 +178,7 @@ public class TestATNSerialization extends BaseTest { "8:BASIC 0\n" + "9:BASIC 0\n" + "10:BASIC 0\n" + - "rule 1:0 0,0\n" + + "rule 0:0 0,0\n" + "0->4 EPSILON 0,0\n" + "1->10 ATOM -1,0\n" + "2->3 ATOM 3,0\n" + @@ -211,8 +214,8 @@ public class TestATNSerialization extends BaseTest { "6:BASIC 1\n" + "7:BASIC 1\n" + "8:BASIC 1\n" + - "rule 1:0 0,0\n" + - "rule 2:2 0,0\n" + + "rule 0:0 0,0\n" + + "rule 1:2 0,0\n" + "0->4 EPSILON 0,0\n" + "1->8 ATOM -1,0\n" + "2->6 EPSILON 0,0\n" + @@ -242,8 +245,8 @@ public class TestATNSerialization extends BaseTest { "6:BASIC 0\n" + "7:BASIC 1\n" + "8:BASIC 1\n" + - "rule 1:1 3,-1\n" + - "rule 2:3 4,-1\n" + + "rule 0:1 3,-1\n" + + "rule 1:3 4,-1\n" + "mode 0:0\n" + "0->1 EPSILON 0,0\n" + "0->3 EPSILON 0,0\n" + @@ -270,7 +273,7 @@ public class TestATNSerialization extends BaseTest { "2:RULE_STOP 0\n" + "3:BASIC 0\n" + "4:BASIC 0\n" + - "rule 1:1 3,-1\n" + + "rule 0:1 3,-1\n" + "mode 0:0\n" + "0->1 EPSILON 0,0\n" + "1->3 EPSILON 0,0\n" + @@ -297,7 +300,7 @@ public class TestATNSerialization extends BaseTest { "6:BLOCK_END 0\n" + "7:PLUS_LOOP_BACK 0\n" + "8:BASIC 0\n" + - "rule 1:1 3,-1\n" + + "rule 0:1 3,-1\n" + "mode 0:0\n" + "0->1 EPSILON 0,0\n" + "1->5 EPSILON 0,0\n" + @@ -340,9 +343,9 @@ public class TestATNSerialization extends BaseTest { "12:BASIC 2\n" + "13:BASIC 2\n" + "14:BASIC 2\n" + - "rule 1:1 3,0\n" + - "rule 2:3 4,-1\n" + - "rule 3:5 5,1\n" + + "rule 0:1 3,0\n" + + "rule 1:3 4,-1\n" + + "rule 2:5 5,1\n" + "mode 0:0\n" + "0->1 EPSILON 0,0\n" + "0->3 EPSILON 0,0\n" + @@ -375,7 +378,7 @@ public class TestATNSerialization extends BaseTest { "2:RULE_STOP 0\n" + "3:BASIC 0\n" + "4:BASIC 0\n" + - "rule 1:1 3,-1\n" + + "rule 0:1 3,-1\n" + "mode 0:0\n" + "0:'a'..'b'\n" + "0->1 EPSILON 0,0\n" + @@ -388,6 +391,30 @@ public class TestATNSerialization extends BaseTest { assertEquals(expecting, result); } + @Test public void testLexerSetWithRange() throws Exception { + LexerGrammar lg = new LexerGrammar( + "lexer grammar L;\n"+ + "ID : ('a'|'b'|'e'|'p'..'t')\n ;"); + String expecting = + "max type 3\n" + + "0:TOKEN_START -1\n" + + "1:RULE_START 0\n" + + "2:RULE_STOP 0\n" + + "3:BASIC 0\n" + + "4:BASIC 0\n" + + "rule 0:1 3,-1\n" + + "mode 0:0\n" + + "0:'a'..'b', 'e'..'e', 'p'..'t'\n" + + "0->1 EPSILON 0,0\n" + + "1->3 EPSILON 0,0\n" + + "3->4 SET 0,0\n" + + "4->2 EPSILON 0,0\n" + + "0:0\n"; + ATN atn = createATN(lg); + String result = ATNSerializer.getDecoded(lg, atn); + assertEquals(expecting, result); + } + @Test public void testLexerNotSetWithRange() throws Exception { LexerGrammar lg = new LexerGrammar( "lexer grammar L;\n"+ @@ -399,7 +426,7 @@ public class TestATNSerialization extends BaseTest { "2:RULE_STOP 0\n" + "3:BASIC 0\n" + "4:BASIC 0\n" + - "rule 1:1 3,-1\n" + + "rule 0:1 3,-1\n" + "mode 0:0\n" + "0:'a'..'b', 'e'..'e', 'p'..'t'\n" + "0->1 EPSILON 0,0\n" + @@ -425,7 +452,7 @@ public class TestATNSerialization extends BaseTest { "4:BASIC 0\n" + "5:BASIC 0\n" + "6:BASIC 0\n" + - "rule 1:1 3,-1\n" + + "rule 0:1 3,-1\n" + "mode 0:0\n" + "0:'a'..'b'\n" + "1:'e'..'e', 'p'..'t'\n" + @@ -466,9 +493,9 @@ public class TestATNSerialization extends BaseTest { "12:BASIC 1\n" + "13:BASIC 2\n" + "14:BASIC 2\n" + - "rule 1:3 3,-1\n" + - "rule 2:5 4,-1\n" + - "rule 3:7 5,-1\n" + + "rule 0:3 3,-1\n" + + "rule 1:5 4,-1\n" + + "rule 2:7 5,-1\n" + "mode 0:0\n" + "mode 1:1\n" + "mode 2:2\n" +