got DFA interp working from parser grammar + unit tests

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6902]
2010-05-28 12:17:17 -08:00 · 2010-05-28 12:17:17 -08:00 · de380d2fd1
parent 231758b0de
commit de380d2fd1
6 changed files with 279 additions and 154 deletions
--- a/runtime/Java/src/org/antlr/v4/runtime/pda/PDA.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/pda/PDA.java
@ -3,6 +3,7 @@ package org.antlr.v4.runtime.pda;
 import org.antlr.runtime.CharStream;
 import org.antlr.runtime.IntStream;
 import org.antlr.runtime.Token;
 import org.antlr.runtime.TokenStream;
 import org.antlr.v4.runtime.CommonToken;
 import java.util.ArrayList;
@ -121,10 +122,6 @@ processOneChar:
 							addToClosure(reach, ip, alt, context);
 						}
 						break;
 					case Bytecode.SET :
 						System.err.println("not impl");
 						notNextMatch = false;
 						break;
 					case Bytecode.LABEL : // lexers only
 						int labelIndex = getShort(code, ip);
 						labelValues[labelIndex] =
@ -225,6 +222,8 @@ processOneChar:
 		switch (opcode) {
 			case Bytecode.NOT : // see thru NOT but include in closure so we exec during reach
 				closure.add(t);	// add to closure; need to execute during reach
 				// add NOT and next instruction since reach only looks at
 				// what's in closure (it doesn't jump to ip after NOT)
 				addToClosure(closure, ip, alt, context);
 				break;
 			case Bytecode.JMP :
@ -302,6 +301,7 @@ processOneChar:
 	// this stuff below can't do SAVE nor CALL/RET but faster.  (nor preds)
 /*
 	public int execThompson_no_stack(CharStream input, int ip) {
 		int c = input.LA(1);
 		if ( c==Token.EOF ) return Token.EOF;
@ -441,7 +441,7 @@ processOneChar:
 				break;
 		}
 	}
-
+*/
 	// subclass needs to override these if there are sempreds or actions in lexer rules
 	public boolean sempred(int ruleIndex, int actionIndex) {
@ -456,10 +456,91 @@ processOneChar:
 		System.out.println(instr);
 	}
 	void traceDFA(int ip) {
 		String instr = Bytecode.disassembleInstruction(code, ip, false);
 		System.out.println(instr);
 	}
 	public static int getShort(byte[] memory, int index) {
 		return (memory[index]&0xFF) <<(8*1) | (memory[index+1]&0xFF); // prevent sign extension with mask
 	}
 	public static class Context {
 		public int ip;
 		public int inputMarker;
 		public Context(int ip, int inputMarker) {
 			this.ip = ip;
 			this.inputMarker = inputMarker;
 		}
 	}
 	public int execNoRecursion(TokenStream input, int ip) {
 		System.out.println("execNoRecursion @"+ip);
 		List<Context> work = new ArrayList<Context>();
 		work.add(new Context(ip, input.mark()));
 workLoop:
 		while ( work.size()>0 ) {
 			Context ctx = work.remove(work.size()-1); // treat like stack
 			ip = ctx.ip;
 			input.rewind(ctx.inputMarker);
 			while ( ip < code.length ) {
 				int c = input.LA(1);
 				traceDFA(ip);
 				short opcode = code[ip];
 				ip++; // move to next instruction or first byte of operand
 				switch (opcode) {
 					case Bytecode.MATCH8 :
 						if ( c != code[ip] ) continue workLoop;
 						ip++;
 						input.consume();
 						break;
 					case Bytecode.MATCH16 :
 						if ( c != getShort(code, ip) ) continue workLoop;
 						ip += 2;
 						input.consume();
 						break;
 					case Bytecode.RANGE8 :
 						if ( c<code[ip] || c>code[ip+1] ) continue workLoop;
 						ip += 2;
 						input.consume();
 						break;
 					case Bytecode.RANGE16 :
 						if ( c<getShort(code, ip) || c>getShort(code, ip+2) ) continue workLoop;
 						ip += 4;
 						input.consume();
 						break;
 					case Bytecode.ACCEPT :
 						int ruleIndex = getShort(code, ip);
 						ip += 2;
 						System.out.println("accept "+ruleIndex);
 						// returning gives first match not longest; i.e., like PEG
 						return ruleIndex;
 					case Bytecode.JMP :
 						int target = getShort(code, ip);
 						ip = target;
 						continue;
 					case Bytecode.SPLIT :
 						int nopnds = getShort(code, ip);
 						ip += 2;
 						// add split addresses to work queue in reverse order ('cept first one)
 						for (int i=nopnds-1; i>=1; i--) {
 							int addr = getShort(code, ip+i*2);
 							//System.out.println("try alt "+i+" at "+addr);
 							work.add(new Context(addr, input.mark()));
 						}
 						// try first alternative (w/o adding to work list)
 						int addr = getShort(code, ip);
 						ip = addr;
 						//System.out.println("try alt "+nopnds+" at "+addr);
 						continue;
 					default :
 						throw new RuntimeException("invalid instruction @ "+ip+": "+opcode);
 				}
 			}
 		}
 		return 0;
 	}
 /*
 	public int exec(CharStream input, String ruleName) {
 		return exec(input, ruleToAddr.get(ruleName));
@ -527,80 +608,6 @@ processOneChar:
 		return 0;
 	}
 	public static class Context {
 		public int ip;
 		public int inputMarker;
 		public Context(int ip, int inputMarker) {
 			this.ip = ip;
 			this.inputMarker = inputMarker;
 		}
 	}
 	public int execNoRecursion(CharStream input, int ip) {
 		List<Context> work = new ArrayList<Context>();
 		work.add(new Context(ip, input.mark()));
 workLoop:
 		while ( work.size()>0 ) {
 			Context ctx = work.remove(work.size()-1); // treat like stack
 			ip = ctx.ip;
 			input.rewind(ctx.inputMarker);
 			while ( ip < code.length ) {
 				int c = input.LA(1);
 				trace(ip);
 				short opcode = code[ip];
 				ip++; // move to next instruction or first byte of operand
 				switch (opcode) {
 					case Bytecode.MATCH8 :
 						if ( c != code[ip] ) continue workLoop;
 						ip++;
 						input.consume();
 						break;
 					case Bytecode.MATCH16 :
 						if ( c != getShort(code, ip) ) continue workLoop;
 						ip += 2;
 						input.consume();
 						break;
 					case Bytecode.RANGE8 :
 						if ( c<code[ip] || c>code[ip+1] ) continue workLoop;
 						ip += 2;
 						input.consume();
 						break;
 					case Bytecode.RANGE16 :
 						if ( c<getShort(code, ip) || c>getShort(code, ip+2) ) continue workLoop;
 						ip += 4;
 						input.consume();
 						break;
 					case Bytecode.ACCEPT :
 						int ruleIndex = getShort(code, ip);
 						ip += 2;
 						System.out.println("accept "+ruleIndex);
 						// returning gives first match not longest; i.e., like PEG
 						return ruleIndex;
 					case Bytecode.JMP :
 						int target = getShort(code, ip);
 						ip = target;
 						continue;
 					case Bytecode.SPLIT :
 						int nopnds = getShort(code, ip);
 						ip += 2;
 						// add split addresses to work queue in reverse order ('cept first one)
 						for (int i=nopnds-1; i>=1; i--) {
 							int addr = getShort(code, ip+i*2);
 							//System.out.println("try alt "+i+" at "+addr);
 							work.add(new Context(addr, input.mark()));
 						}
 						// try first alternative (w/o adding to work list)
 						int addr = getShort(code, ip);
 						ip = addr;
 						//System.out.println("try alt "+nopnds+" at "+addr);
 						continue;
 					default :
 						throw new RuntimeException("invalid instruction @ "+ip+": "+opcode);
 				}
 			}
 		}
 		return 0;
 	}
 */
 }
--- a/tool/src/org/antlr/v4/codegen/DFACompiler.java
+++ b/tool/src/org/antlr/v4/codegen/DFACompiler.java
@ -4,6 +4,7 @@ import org.antlr.v4.automata.DFA;
 import org.antlr.v4.automata.DFAState;
 import org.antlr.v4.automata.Edge;
 import org.antlr.v4.codegen.pda.*;
 import org.antlr.v4.runtime.pda.Bytecode;
 /** */
 public class DFACompiler {
@ -20,6 +21,8 @@ public class DFACompiler {
 	public CompiledPDA compile() {
 		walk();
 		gen.compile();
 		System.out.println("DFA: ");
 		System.out.println(Bytecode.disassemble(gen.obj.code,false));		
 		return gen.obj;
 	}
@ -30,7 +33,6 @@ public class DFACompiler {
 		// walk code, update jump targets.
 		for (Instr I : gen.obj.instrs) {
 			System.out.println("instr "+I);
 			if ( I instanceof JumpInstr) {
 				JumpInstr J = (JumpInstr)I;
 				J.target = stateToAddr[J.target];
--- a/tool/test/org/antlr/v4/test/BaseTest.java
+++ b/tool/test/org/antlr/v4/test/BaseTest.java
@ -28,16 +28,17 @@
 package org.antlr.v4.test;
-import org.antlr.runtime.CommonTokenStream;
+import org.antlr.runtime.*;
 import org.antlr.runtime.RecognitionException;
 import org.antlr.runtime.Token;
 import org.antlr.runtime.TokenSource;
 import org.antlr.v4.Tool;
 import org.antlr.v4.analysis.DFAMinimizer;
 import org.antlr.v4.analysis.LexerNFAToDFAConverter;
 import org.antlr.v4.analysis.PredictionDFAFactory;
 import org.antlr.v4.automata.DFA;
 import org.antlr.v4.automata.*;
 import org.antlr.v4.codegen.CompiledPDA;
 import org.antlr.v4.codegen.LexerCompiler;
 import org.antlr.v4.misc.Utils;
 import org.antlr.v4.runtime.pda.PDA;
 import org.antlr.v4.semantics.SemanticPipeline;
 import org.antlr.v4.tool.*;
 import org.junit.After;
@ -133,6 +134,37 @@ public abstract class BaseTest {
 		dfa.minimized = dmin.minimize();
 	}
 	PDA getLexerPDA(LexerGrammar g) {
 		NFA nfa = createNFA(g);
 		LexerCompiler comp = new LexerCompiler(g);
 		CompiledPDA obj = comp.compileMode(LexerGrammar.DEFAULT_MODE_NAME);
 		PDA PDA = new PDA(obj.code, obj.altToAddr, obj.nLabels);
 		return PDA;
 	}
 	List<Integer> getTypesFromString(Grammar g, String expecting) {
 		List<Integer> expectingTokenTypes = new ArrayList<Integer>();
 		if ( expecting!=null && !expecting.trim().equals("") ) {
 			for (String tname : expecting.replace(" ", "").split(",")) {
 				int ttype = g.getTokenType(tname);
 				expectingTokenTypes.add(ttype);
 			}
 		}
 		return expectingTokenTypes;
 	}
 	List<Integer> getTokenTypes(String input, PDA lexerPDA) {
 		ANTLRStringStream in = new ANTLRStringStream(input);
 		List<Integer> tokenTypes = new ArrayList<Integer>();
 		int ttype = 0;
 		do {
 			ttype = lexerPDA.execThompson(in);
 			tokenTypes.add(ttype);
 		} while ( ttype!= Token.EOF );
 		return tokenTypes;
 	}
 	List<Message> checkRuleDFA(String gtext, String ruleName, String expecting)
 		throws Exception
 	{
--- a/tool/test/org/antlr/v4/test/TestDFAInterp.java
+++ b/tool/test/org/antlr/v4/test/TestDFAInterp.java
@ -0,0 +1,98 @@
 package org.antlr.v4.test;
 import org.antlr.runtime.ANTLRStringStream;
 import org.antlr.runtime.CharStream;
 import org.antlr.runtime.CommonTokenStream;
 import org.antlr.runtime.Token;
 import org.antlr.v4.automata.DFA;
 import org.antlr.v4.automata.DecisionState;
 import org.antlr.v4.automata.NFA;
 import org.antlr.v4.codegen.CompiledPDA;
 import org.antlr.v4.codegen.DFACompiler;
 import org.antlr.v4.runtime.Lexer;
 import org.antlr.v4.runtime.pda.PDA;
 import org.antlr.v4.tool.Grammar;
 import org.antlr.v4.tool.LexerGrammar;
 import org.junit.Test;
 import java.util.List;
 /** */
 public class TestDFAInterp extends BaseTest {
 	public static class InterpLexer extends Lexer {
 		public InterpLexer(CharStream input, PDA pda) {
 			super(input);
 			modeToPDA = new PDA[] { pda };
 		}
 	}
 	@Test public void testSimpleLL1Decision() throws Exception {
 		LexerGrammar lg = new LexerGrammar(
 			"lexer grammar L;\n" +
 			"ID  : 'a'..'z'+ ;\n" +
 			"INT : '0'..'9'+ ;\n");
 		Grammar g = new Grammar(
 			"parser grammar P;\n" +
 			"a : ID | INT ;\n"
 		);
 		int expecting = 1;
 		checkDFAMatches(g, lg, 0, "ab", expecting);
 		expecting = 2;
 		checkDFAMatches(g, lg, 0, "32", expecting);
 	}
 	@Test public void testArbCommonPrefix() throws Exception {
 		LexerGrammar lg = new LexerGrammar(
 			"lexer grammar L;\n" +
 			"SEMI  : ';' ;\n" +
 			"DOT   : '.' ;\n" +
 			"WS    : ' ' ;\n" +
 			"ID    : 'a'..'z'+ ;\n" +
 			"INT  : '0'..'9'+ ;\n");
 		Grammar g = new Grammar(
 			"parser grammar P;\n" +
 			"tokens { WS; }\n" +
 			"a : ID+ SEMI\n" +
 			"  | ID+ DOT\n" +
 			"  ;\n"
 		);
 		int expecting = 1;
 		checkDFAMatches(g, lg, 2, "a b c ;", expecting);
 		expecting = 2;
 		checkDFAMatches(g, lg, 2, "a b c .", expecting);
 	}
 	int interp(Grammar g, LexerGrammar lg, int decision, String input) {
 		NFA nfa = createNFA(g);
 		DecisionState blk = nfa.decisionToNFAState.get(decision);
 		DFA dfa = createDFA(g, blk);
 		DFACompiler comp = new DFACompiler(dfa);
 		CompiledPDA obj = comp.compile();
 		PDA pda = new PDA(obj.code, obj.altToAddr, obj.nLabels);
 		lg.importVocab(g);
 		PDA lexerPDA = getLexerPDA(lg);
 		Lexer lexer = new InterpLexer(new ANTLRStringStream(input), lexerPDA);
 		CommonTokenStream tokens = new CommonTokenStream(lexer);
 		tokens.fill();
 		List<Token> list = tokens.getTokens();
 		for (Token t : list) {// hide WS
 			if ( t.getType()==g.getTokenType("WS") ) t.setChannel(Token.HIDDEN_CHANNEL);
 		}
 		System.out.println("tokens="+ list);
 		int alt = pda.execNoRecursion(tokens, 0);
 		return alt;
 	}
 	void checkDFAMatches(Grammar g, LexerGrammar lg, int decision,
 						 String input, int expecting) {
 		int result = interp(g, lg, decision, input);
 		assertEquals(expecting, result);
 	}
 }
--- a/tool/test/org/antlr/v4/test/TestDFAtoPDABytecodeGeneration.java
+++ b/tool/test/org/antlr/v4/test/TestDFAtoPDABytecodeGeneration.java
@ -2,6 +2,7 @@ package org.antlr.v4.test;
 import org.antlr.v4.automata.DFA;
 import org.antlr.v4.automata.DecisionState;
 import org.antlr.v4.automata.Edge;
 import org.antlr.v4.automata.NFA;
 import org.antlr.v4.codegen.CompiledPDA;
 import org.antlr.v4.codegen.DFACompiler;
@ -21,24 +22,52 @@ public class TestDFAtoPDABytecodeGeneration extends BaseTest {
 			"0007:\tset           0\n" +
 			"0010:\tjmp           13\n" +
 			"0013:\taccept        1\n" +
-			"0016:\tmatch8        5\n" +
+			"0016:\tmatch8        4\n" +
 			"0018:\tjmp           21\n" +
 			"0021:\taccept        2\n";
 		checkBytecode(g, 0, expecting);
 	}
 	@Test public void testAorBToSameState() throws Exception {
 		Grammar g = new Grammar(
 			"parser grammar T;\n"+
 			"a : A | B ;");
 		String expecting =
 			"0000:\tsplit         7, 15\n" +
 			"0007:\tmatch8        4\n" +
 			"0009:\tjmp           12\n" +
 			"0012:\taccept        2\n" +
 			"0015:\tmatch8        5\n" +
 			"0017:\tjmp           12\n";
 		NFA nfa = createNFA(g);
 		DecisionState blk = nfa.decisionToNFAState.get(0);
 		DFA dfa = createDFA(g, blk);
 		// make S0 go to S1 on both A and B (pinch alts back to single state)
 		Edge e0 = dfa.states.get(0).edge(0);
 		Edge e1 = dfa.states.get(0).edge(1);
 		e0.target = e1.target;
 		System.out.print("altered DFA="+dfa);
 		DFACompiler comp = new DFACompiler(dfa);
 		CompiledPDA obj = comp.compile();
 		PDA pda = new PDA(obj.code, obj.altToAddr, obj.nLabels);
 		assertEquals(expecting, Bytecode.disassemble(pda.code, false));
 	}
 	@Test public void testAorB() throws Exception {
 		Grammar g = new Grammar(
 			"parser grammar T;\n"+
 			"a : A | B ;");
 		String expecting =
 			"0000:\tsplit         7, 15\n" +
-			"0007:\tmatch8        5\n" +
+			"0007:\tmatch8        4\n" +
 			"0009:\tjmp           12\n" +
-			"0012:\taccept        2\n" +
+			"0012:\taccept        1\n" +
-			"0015:\tmatch8        4\n" +
+			"0015:\tmatch8        5\n" +
 			"0017:\tjmp           20\n" +
-			"0020:\taccept        1\n";
+			"0020:\taccept        2\n";
 		checkBytecode(g, 0, expecting);
 	}
@ -82,10 +111,6 @@ public class TestDFAtoPDABytecodeGeneration extends BaseTest {
 		NFA nfa = createNFA(g);
 		DecisionState blk = nfa.decisionToNFAState.get(decision);
 		DFA dfa = createDFA(g, blk);
 //		Edge e0 = dfa.states.get(1).edge(0);
 //		Edge e1 = dfa.states.get(1).edge(1);
 //		e0.target = e1.target;
 //		System.out.print("altered DFA="+dfa);
 		DFACompiler comp = new DFACompiler(dfa);
 		CompiledPDA obj = comp.compile();
 		PDA pda = new PDA(obj.code, obj.altToAddr, obj.nLabels);
--- a/tool/test/org/antlr/v4/test/TestPDABytecodeInterp.java
+++ b/tool/test/org/antlr/v4/test/TestPDABytecodeInterp.java
@ -1,13 +1,7 @@
 package org.antlr.v4.test;
 import org.antlr.runtime.ANTLRStringStream;
 import org.antlr.runtime.Token;
 import org.antlr.v4.Tool;
 import org.antlr.v4.codegen.CompiledPDA;
 import org.antlr.v4.codegen.LexerCompiler;
 import org.antlr.v4.runtime.pda.PDA;
 import org.antlr.v4.semantics.SemanticPipeline;
 import org.antlr.v4.tool.Grammar;
 import org.antlr.v4.tool.LexerGrammar;
 import org.junit.Test;
@ -191,74 +185,41 @@ public class TestPDABytecodeInterp extends BaseTest {
 	}
 	void checkMatches(LexerGrammar g, String input, String expecting) {
-		if ( g.ast!=null && !g.ast.hasErrors ) {
+		PDA pda = getLexerPDA(g);
 			System.out.println(g.ast.toStringTree());
 			Tool antlr = new Tool();
 			SemanticPipeline sem = new SemanticPipeline(g);
 			sem.process();
 			if ( g.getImportedGrammars()!=null ) { // process imported grammars (if any)
 				for (Grammar imp : g.getImportedGrammars()) {
 					antlr.process(imp);
 				}
 			}
 		}
-		List<Integer> expectingTokenTypes = new ArrayList<Integer>();
+		List<Integer> expectingTokenTypes = getTypesFromString(g, expecting);
 		if ( expecting!=null && !expecting.trim().equals("") ) {
 			for (String tname : expecting.replace(" ", "").split(",")) {
 				int ttype = g.getTokenType(tname);
 				expectingTokenTypes.add(ttype);
 			}
 		}
-		LexerCompiler comp = new LexerCompiler(g);
+		List<Integer> tokenTypes = getTokenTypes(input, pda);
 		CompiledPDA obj = comp.compileMode(LexerGrammar.DEFAULT_MODE_NAME);
 		PDA PDA = new PDA(obj.code, obj.altToAddr, obj.nLabels);
 		ANTLRStringStream in = new ANTLRStringStream(input);
 		List<Integer> tokenTypes = new ArrayList<Integer>();
 		int ttype = 0;
 		do {
 			ttype = PDA.execThompson(in);
 			tokenTypes.add(ttype);
 		} while ( ttype!= Token.EOF );
 		assertEquals(expectingTokenTypes, tokenTypes);
 	}
 	void checkLabels(LexerGrammar g, String input, String expecting,
 					  String expectingTokens)
 	{
-		if ( g.ast!=null && !g.ast.hasErrors ) {
+		PDA pda = getLexerPDA(g);
-			System.out.println(g.ast.toStringTree());
+		List<Integer> expectingTokenTypes = getTypesFromString(g, expecting);
 			Tool antlr = new Tool();
 			SemanticPipeline sem = new SemanticPipeline(g);
 			sem.process();
 			if ( g.getImportedGrammars()!=null ) { // process imported grammars (if any)
 				for (Grammar imp : g.getImportedGrammars()) {
 					antlr.process(imp);
 				}
 			}
 		}
 		List<Integer> expectingTokenTypes = new ArrayList<Integer>();
 		if ( expecting!=null && !expecting.trim().equals("") ) {
 			for (String tname : expecting.replace(" ", "").split(",")) {
 				int ttype = g.getTokenType(tname);
 				expectingTokenTypes.add(ttype);
 			}
 		}
 		LexerCompiler comp = new LexerCompiler(g);
 		CompiledPDA obj = comp.compileMode(LexerGrammar.DEFAULT_MODE_NAME);
 		PDA PDA = new PDA(obj.code, obj.altToAddr, obj.nLabels);
 		ANTLRStringStream in = new ANTLRStringStream(input);
 		List<Integer> tokenTypes = new ArrayList<Integer>();
-		int ttype = PDA.execThompson(in);
+		int ttype = pda.execThompson(in);
 		tokenTypes.add(ttype);
 		assertEquals(expectingTokenTypes, tokenTypes);
 		if ( expectingTokens!=null ) {
-			assertEquals(expectingTokens, Arrays.toString(PDA.labelValues));
+			assertEquals(expectingTokens, Arrays.toString(pda.labelValues));
 		}
 	}
 //	List<Token> getTokens(String input, PDA lexerPDA) {
 //		ANTLRStringStream in = new ANTLRStringStream(input);
 //		List<Token> tokens = new ArrayList<Token>();
 //		int ttype = 0;
 //		do {
 //			ttype = lexerPDA.execThompson(in);
 //			tokens.add(new CommonToken(ttype,""));
 //		} while ( ttype!= Token.EOF );
 //		return tokens;
 //	}
 }