got started on DFA to transition table generation

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6910]
This commit is contained in:
parrt 2010-06-10 17:33:01 -08:00
parent c18898a917
commit 024f03b8d7
5 changed files with 168 additions and 33 deletions

View File

@ -301,7 +301,7 @@ import org.antlr.runtime.*;
<lexer>
>>
Lexer(lexerName, modes, pdas, tokens, actions, sempreds, namedActions) ::= <<
Lexer(lexerName, modes, dfas, pdas, tokens, actions, sempreds, namedActions) ::= <<
public class <lexerName> extends Lexer {
<tokens.keys:{k | public static final int <k>=<tokens.(k)>;}; separator="\n">
<modes:{m| public static final int <m> = <i0>;}; separator="\n">
@ -319,10 +319,23 @@ public class <lexerName> extends Lexer {
<actions>
<sempreds>
<dfas>
<pdas>
}
>>
DFA(name, model) ::= <<
public static final char[] <name>_min = {
<model.min; separator=", ">
};
public static final char[] <name>_max = {
<model.max; separator=", ">
};
public static final short[][] <name>_transition = {
<model.transition:{t | {<t; separator=", ">\}}; separator=",\n", null="null">
};
>>
PDA(name, model, actions, sempreds) ::= <<
public static final byte[] <name>_code = {
<model.code; separator=", ">

View File

@ -2,7 +2,6 @@ package org.antlr.v4.analysis;
import org.antlr.v4.automata.DFA;
import org.antlr.v4.automata.DecisionState;
import org.antlr.v4.automata.TokensStartState;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.LexerGrammar;
@ -22,7 +21,6 @@ public class AnalysisPipeline {
// BUILD DFA FOR EACH DECISION
if ( g.isLexer() ) processLexer();
else processParserOrTreeParser();
if ( !g.isLexer() ) processParserOrTreeParser();
}
void processLexer() {
@ -30,8 +28,9 @@ public class AnalysisPipeline {
for (String modeName : lg.modes.keySet()) {
LexerNFAToDFAConverter conv = new LexerNFAToDFAConverter(lg);
DFA dfa = conv.createDFA(modeName);
TokensStartState startState = g.nfa.modeToStartState.get(modeName);
g.setLookaheadDFA(startState.decision, dfa);
lg.modeToDFA.put(modeName, dfa);
//TokensStartState startState = g.nfa.modeToStartState.get(modeName);
//g.setLookaheadDFA(startState.decision, dfa);
if ( g.tool.minimizeDFA ) {
int before = dfa.stateSet.size();

View File

@ -0,0 +1,102 @@
package org.antlr.v4.codegen;
import org.antlr.v4.automata.DFA;
import org.antlr.v4.automata.DFAState;
import org.antlr.v4.automata.Edge;
import org.antlr.v4.automata.Label;
import org.antlr.v4.misc.IntervalSet;
import org.antlr.v4.misc.Utils;
import org.antlr.v4.tool.ErrorManager;
import java.util.Vector;
/** From a DFA, create transition table etc... */
public class CompiledDFA {
public DFA dfa;
public Vector<Vector<Integer>> transition;
public Vector<Integer> min;
public Vector<Integer> max;
public CompiledDFA(DFA dfa) {
this.dfa = dfa;
int n = dfa.states.size();
min = new Vector<Integer>(n); min.setSize(n);
max = new Vector<Integer>(n); max.setSize(n);
transition = new Vector<Vector<Integer>>(n); transition.setSize(n);
for (DFAState d : dfa.states) {
if ( d == null ) continue;
createMinMaxTables(d);
createTransitionTableEntryForState(d);
}
}
protected void createMinMaxTables(DFAState d) {
int smin = Label.MAX_CHAR_VALUE + 1;
int smax = Label.MIN_ATOM_VALUE - 1;
int n = d.edges.size();
for (int j = 0; j < n; j++) {
Edge edge = d.edge(j);
IntervalSet label = edge.label;
int lmin = label.getMinElement();
// if valid char (don't do EOF) and less than current min
if ( lmin<smin && lmin>=Label.MIN_CHAR_VALUE ) {
smin = label.getMinElement();
}
if ( label.getMaxElement()>smax ) {
smax = label.getMaxElement();
}
}
if ( smax<0 ) {
// must be predicates or pure EOT transition; just zero out min, max
smin = Label.MIN_CHAR_VALUE;
smax = Label.MIN_CHAR_VALUE;
}
min.set(d.stateNumber, Utils.integer((char)smin));
max.set(d.stateNumber, Utils.integer((char)smax));
if ( smax<0 || smin>Label.MAX_CHAR_VALUE || smin<0 ) {
ErrorManager.internalError("messed up: min="+min+", max="+max);
}
}
void createTransitionTableEntryForState(DFAState s) {
/*
System.out.println("createTransitionTableEntryForState s"+s.stateNumber+
" dec "+s.dfa.decisionNumber+" cyclic="+s.dfa.isCyclic());
*/
if ( s.edges.size() == 0 ) return;
int smax = ((Integer)max.get(s.stateNumber)).intValue();
int smin = ((Integer)min.get(s.stateNumber)).intValue();
Vector<Integer> stateTransitions = new Vector<Integer>(smax-smin+1);
stateTransitions.setSize(smax-smin+1);
transition.set(s.stateNumber, stateTransitions);
for (Edge e : s.edges) {
int[] atoms = e.label.toArray();
for (int a = 0; a < atoms.length; a++) {
// set the transition if the label is valid (don't do EOF)
if ( atoms[a] >= Label.MIN_CHAR_VALUE ) {
int labelIndex = atoms[a]-smin; // offset from 0
stateTransitions.set(labelIndex,
Utils.integer(e.target.stateNumber));
}
}
}
// track unique state transition tables so we can reuse
// Integer edgeClass = (Integer)edgeTransitionClassMap.get(stateTransitions);
// if ( edgeClass!=null ) {
// //System.out.println("we've seen this array before; size="+stateTransitions.size());
// transitionEdgeTables.set(s.stateNumber, edgeClass);
// }
// else {
// edgeClass = Utils.integer(edgeTransitionClass);
// transitionEdgeTables.set(s.stateNumber, edgeClass);
// edgeTransitionClassMap.put(stateTransitions, edgeClass);
// edgeTransitionClass++;
// }
}
}

View File

@ -1,6 +1,7 @@
package org.antlr.v4.codegen;
import org.antlr.runtime.Token;
import org.antlr.v4.automata.DFA;
import org.antlr.v4.tool.LexerGrammar;
import org.antlr.v4.tool.Rule;
import org.stringtemplate.v4.ST;
@ -25,34 +26,8 @@ public class LexerFactory {
fileST.add("fileName", gen.getRecognizerFileName());
fileST.add("lexer", lexerST);
for (String modeName : lg.modes.keySet()) { // for each mode
LexerCompiler comp = new LexerCompiler(lg);
CompiledPDA pda = comp.compileMode(modeName);
ST pdaST = gen.templates.getInstanceOf("PDA");
for (Rule r : pda.ruleActions.keySet()) {
Set<Token> actionTokens = pda.ruleActions.keySet(r);
ST actionST = gen.templates.getInstanceOf("actionMethod");
actionST.add("name", r.name);
for (Token t : actionTokens) {
actionST.add("actions", Misc.strip(t.getText(),1));
actionST.add("ruleIndex", r.index);
}
pdaST.add("actions", actionST);
lexerST.add("actions", actionST);
}
for (Rule r : pda.ruleSempreds.keySet()) {
Set<Token> sempredTokens = pda.ruleSempreds.keySet(r);
ST sempredST = gen.templates.getInstanceOf("sempredMethod");
sempredST.add("name", r.name);
sempredST.add("ruleIndex", r.index);
for (Token t : sempredTokens) {
sempredST.add("preds", t.getText());
}
pdaST.add("sempreds", sempredST);
lexerST.add("sempreds", sempredST);
}
pdaST.add("name", modeName);
pdaST.add("model", pda);
lexerST.add("pdas", pdaST);
injectDFAs(lg, lexerST, modeName);
injectPDAs(lg, lexerST, modeName);
}
LinkedHashMap<String,Integer> tokens = new LinkedHashMap<String,Integer>();
@ -65,4 +40,44 @@ public class LexerFactory {
return fileST;
}
void injectDFAs(LexerGrammar lg, ST lexerST, String modeName) {
System.out.println("inject dfa for "+modeName);
DFA dfa = lg.modeToDFA.get(modeName);
ST dfaST = gen.templates.getInstanceOf("DFA");
dfaST.add("name", modeName);
dfaST.add("model", new CompiledDFA(dfa));
lexerST.add("dfas", dfaST);
}
void injectPDAs(LexerGrammar lg, ST lexerST, String modeName) {
LexerCompiler comp = new LexerCompiler(lg);
CompiledPDA pda = comp.compileMode(modeName);
ST pdaST = gen.templates.getInstanceOf("PDA");
for (Rule r : pda.ruleActions.keySet()) {
Set<Token> actionTokens = pda.ruleActions.keySet(r);
ST actionST = gen.templates.getInstanceOf("actionMethod");
actionST.add("name", r.name);
for (Token t : actionTokens) {
actionST.add("actions", Misc.strip(t.getText(),1));
actionST.add("ruleIndex", r.index);
}
pdaST.add("actions", actionST);
lexerST.add("actions", actionST);
}
for (Rule r : pda.ruleSempreds.keySet()) {
Set<Token> sempredTokens = pda.ruleSempreds.keySet(r);
ST sempredST = gen.templates.getInstanceOf("sempredMethod");
sempredST.add("name", r.name);
sempredST.add("ruleIndex", r.index);
for (Token t : sempredTokens) {
sempredST.add("preds", t.getText());
}
pdaST.add("sempreds", sempredST);
lexerST.add("sempreds", sempredST);
}
pdaST.add("name", modeName);
pdaST.add("model", pda);
lexerST.add("pdas", pdaST);
}
}

View File

@ -3,6 +3,10 @@ package org.antlr.v4.tool;
import org.antlr.misc.MultiMap;
import org.antlr.runtime.RecognitionException;
import org.antlr.v4.Tool;
import org.antlr.v4.automata.DFA;
import java.util.HashMap;
import java.util.Map;
/** */
public class LexerGrammar extends Grammar {
@ -10,6 +14,8 @@ public class LexerGrammar extends Grammar {
public MultiMap<String, Rule> modes = new MultiMap<String, Rule>();
public Map<String, DFA> modeToDFA = new HashMap<String, DFA>();
public LexerGrammar(Tool tool, GrammarRootAST ast) {
super(tool, ast);
}