forked from jasder/antlr
got started on DFA to transition table generation
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6910]
This commit is contained in:
parent
c18898a917
commit
024f03b8d7
|
@ -301,7 +301,7 @@ import org.antlr.runtime.*;
|
|||
<lexer>
|
||||
>>
|
||||
|
||||
Lexer(lexerName, modes, pdas, tokens, actions, sempreds, namedActions) ::= <<
|
||||
Lexer(lexerName, modes, dfas, pdas, tokens, actions, sempreds, namedActions) ::= <<
|
||||
public class <lexerName> extends Lexer {
|
||||
<tokens.keys:{k | public static final int <k>=<tokens.(k)>;}; separator="\n">
|
||||
<modes:{m| public static final int <m> = <i0>;}; separator="\n">
|
||||
|
@ -319,10 +319,23 @@ public class <lexerName> extends Lexer {
|
|||
|
||||
<actions>
|
||||
<sempreds>
|
||||
<dfas>
|
||||
<pdas>
|
||||
}
|
||||
>>
|
||||
|
||||
DFA(name, model) ::= <<
|
||||
public static final char[] <name>_min = {
|
||||
<model.min; separator=", ">
|
||||
};
|
||||
public static final char[] <name>_max = {
|
||||
<model.max; separator=", ">
|
||||
};
|
||||
public static final short[][] <name>_transition = {
|
||||
<model.transition:{t | {<t; separator=", ">\}}; separator=",\n", null="null">
|
||||
};
|
||||
>>
|
||||
|
||||
PDA(name, model, actions, sempreds) ::= <<
|
||||
public static final byte[] <name>_code = {
|
||||
<model.code; separator=", ">
|
||||
|
|
|
@ -2,7 +2,6 @@ package org.antlr.v4.analysis;
|
|||
|
||||
import org.antlr.v4.automata.DFA;
|
||||
import org.antlr.v4.automata.DecisionState;
|
||||
import org.antlr.v4.automata.TokensStartState;
|
||||
import org.antlr.v4.tool.Grammar;
|
||||
import org.antlr.v4.tool.LexerGrammar;
|
||||
|
||||
|
@ -22,7 +21,6 @@ public class AnalysisPipeline {
|
|||
// BUILD DFA FOR EACH DECISION
|
||||
if ( g.isLexer() ) processLexer();
|
||||
else processParserOrTreeParser();
|
||||
if ( !g.isLexer() ) processParserOrTreeParser();
|
||||
}
|
||||
|
||||
void processLexer() {
|
||||
|
@ -30,8 +28,9 @@ public class AnalysisPipeline {
|
|||
for (String modeName : lg.modes.keySet()) {
|
||||
LexerNFAToDFAConverter conv = new LexerNFAToDFAConverter(lg);
|
||||
DFA dfa = conv.createDFA(modeName);
|
||||
TokensStartState startState = g.nfa.modeToStartState.get(modeName);
|
||||
g.setLookaheadDFA(startState.decision, dfa);
|
||||
lg.modeToDFA.put(modeName, dfa);
|
||||
//TokensStartState startState = g.nfa.modeToStartState.get(modeName);
|
||||
//g.setLookaheadDFA(startState.decision, dfa);
|
||||
|
||||
if ( g.tool.minimizeDFA ) {
|
||||
int before = dfa.stateSet.size();
|
||||
|
|
|
@ -0,0 +1,102 @@
|
|||
package org.antlr.v4.codegen;
|
||||
|
||||
import org.antlr.v4.automata.DFA;
|
||||
import org.antlr.v4.automata.DFAState;
|
||||
import org.antlr.v4.automata.Edge;
|
||||
import org.antlr.v4.automata.Label;
|
||||
import org.antlr.v4.misc.IntervalSet;
|
||||
import org.antlr.v4.misc.Utils;
|
||||
import org.antlr.v4.tool.ErrorManager;
|
||||
|
||||
import java.util.Vector;
|
||||
|
||||
/** From a DFA, create transition table etc... */
|
||||
public class CompiledDFA {
|
||||
public DFA dfa;
|
||||
public Vector<Vector<Integer>> transition;
|
||||
public Vector<Integer> min;
|
||||
public Vector<Integer> max;
|
||||
|
||||
public CompiledDFA(DFA dfa) {
|
||||
this.dfa = dfa;
|
||||
|
||||
int n = dfa.states.size();
|
||||
min = new Vector<Integer>(n); min.setSize(n);
|
||||
max = new Vector<Integer>(n); max.setSize(n);
|
||||
transition = new Vector<Vector<Integer>>(n); transition.setSize(n);
|
||||
|
||||
for (DFAState d : dfa.states) {
|
||||
if ( d == null ) continue;
|
||||
createMinMaxTables(d);
|
||||
createTransitionTableEntryForState(d);
|
||||
}
|
||||
}
|
||||
|
||||
protected void createMinMaxTables(DFAState d) {
|
||||
int smin = Label.MAX_CHAR_VALUE + 1;
|
||||
int smax = Label.MIN_ATOM_VALUE - 1;
|
||||
int n = d.edges.size();
|
||||
for (int j = 0; j < n; j++) {
|
||||
Edge edge = d.edge(j);
|
||||
IntervalSet label = edge.label;
|
||||
int lmin = label.getMinElement();
|
||||
// if valid char (don't do EOF) and less than current min
|
||||
if ( lmin<smin && lmin>=Label.MIN_CHAR_VALUE ) {
|
||||
smin = label.getMinElement();
|
||||
}
|
||||
if ( label.getMaxElement()>smax ) {
|
||||
smax = label.getMaxElement();
|
||||
}
|
||||
}
|
||||
|
||||
if ( smax<0 ) {
|
||||
// must be predicates or pure EOT transition; just zero out min, max
|
||||
smin = Label.MIN_CHAR_VALUE;
|
||||
smax = Label.MIN_CHAR_VALUE;
|
||||
}
|
||||
|
||||
min.set(d.stateNumber, Utils.integer((char)smin));
|
||||
max.set(d.stateNumber, Utils.integer((char)smax));
|
||||
|
||||
if ( smax<0 || smin>Label.MAX_CHAR_VALUE || smin<0 ) {
|
||||
ErrorManager.internalError("messed up: min="+min+", max="+max);
|
||||
}
|
||||
}
|
||||
|
||||
void createTransitionTableEntryForState(DFAState s) {
|
||||
/*
|
||||
System.out.println("createTransitionTableEntryForState s"+s.stateNumber+
|
||||
" dec "+s.dfa.decisionNumber+" cyclic="+s.dfa.isCyclic());
|
||||
*/
|
||||
if ( s.edges.size() == 0 ) return;
|
||||
int smax = ((Integer)max.get(s.stateNumber)).intValue();
|
||||
int smin = ((Integer)min.get(s.stateNumber)).intValue();
|
||||
|
||||
Vector<Integer> stateTransitions = new Vector<Integer>(smax-smin+1);
|
||||
stateTransitions.setSize(smax-smin+1);
|
||||
transition.set(s.stateNumber, stateTransitions);
|
||||
for (Edge e : s.edges) {
|
||||
int[] atoms = e.label.toArray();
|
||||
for (int a = 0; a < atoms.length; a++) {
|
||||
// set the transition if the label is valid (don't do EOF)
|
||||
if ( atoms[a] >= Label.MIN_CHAR_VALUE ) {
|
||||
int labelIndex = atoms[a]-smin; // offset from 0
|
||||
stateTransitions.set(labelIndex,
|
||||
Utils.integer(e.target.stateNumber));
|
||||
}
|
||||
}
|
||||
}
|
||||
// track unique state transition tables so we can reuse
|
||||
// Integer edgeClass = (Integer)edgeTransitionClassMap.get(stateTransitions);
|
||||
// if ( edgeClass!=null ) {
|
||||
// //System.out.println("we've seen this array before; size="+stateTransitions.size());
|
||||
// transitionEdgeTables.set(s.stateNumber, edgeClass);
|
||||
// }
|
||||
// else {
|
||||
// edgeClass = Utils.integer(edgeTransitionClass);
|
||||
// transitionEdgeTables.set(s.stateNumber, edgeClass);
|
||||
// edgeTransitionClassMap.put(stateTransitions, edgeClass);
|
||||
// edgeTransitionClass++;
|
||||
// }
|
||||
}
|
||||
}
|
|
@ -1,6 +1,7 @@
|
|||
package org.antlr.v4.codegen;
|
||||
|
||||
import org.antlr.runtime.Token;
|
||||
import org.antlr.v4.automata.DFA;
|
||||
import org.antlr.v4.tool.LexerGrammar;
|
||||
import org.antlr.v4.tool.Rule;
|
||||
import org.stringtemplate.v4.ST;
|
||||
|
@ -25,34 +26,8 @@ public class LexerFactory {
|
|||
fileST.add("fileName", gen.getRecognizerFileName());
|
||||
fileST.add("lexer", lexerST);
|
||||
for (String modeName : lg.modes.keySet()) { // for each mode
|
||||
LexerCompiler comp = new LexerCompiler(lg);
|
||||
CompiledPDA pda = comp.compileMode(modeName);
|
||||
ST pdaST = gen.templates.getInstanceOf("PDA");
|
||||
for (Rule r : pda.ruleActions.keySet()) {
|
||||
Set<Token> actionTokens = pda.ruleActions.keySet(r);
|
||||
ST actionST = gen.templates.getInstanceOf("actionMethod");
|
||||
actionST.add("name", r.name);
|
||||
for (Token t : actionTokens) {
|
||||
actionST.add("actions", Misc.strip(t.getText(),1));
|
||||
actionST.add("ruleIndex", r.index);
|
||||
}
|
||||
pdaST.add("actions", actionST);
|
||||
lexerST.add("actions", actionST);
|
||||
}
|
||||
for (Rule r : pda.ruleSempreds.keySet()) {
|
||||
Set<Token> sempredTokens = pda.ruleSempreds.keySet(r);
|
||||
ST sempredST = gen.templates.getInstanceOf("sempredMethod");
|
||||
sempredST.add("name", r.name);
|
||||
sempredST.add("ruleIndex", r.index);
|
||||
for (Token t : sempredTokens) {
|
||||
sempredST.add("preds", t.getText());
|
||||
}
|
||||
pdaST.add("sempreds", sempredST);
|
||||
lexerST.add("sempreds", sempredST);
|
||||
}
|
||||
pdaST.add("name", modeName);
|
||||
pdaST.add("model", pda);
|
||||
lexerST.add("pdas", pdaST);
|
||||
injectDFAs(lg, lexerST, modeName);
|
||||
injectPDAs(lg, lexerST, modeName);
|
||||
}
|
||||
|
||||
LinkedHashMap<String,Integer> tokens = new LinkedHashMap<String,Integer>();
|
||||
|
@ -65,4 +40,44 @@ public class LexerFactory {
|
|||
|
||||
return fileST;
|
||||
}
|
||||
|
||||
void injectDFAs(LexerGrammar lg, ST lexerST, String modeName) {
|
||||
System.out.println("inject dfa for "+modeName);
|
||||
DFA dfa = lg.modeToDFA.get(modeName);
|
||||
ST dfaST = gen.templates.getInstanceOf("DFA");
|
||||
dfaST.add("name", modeName);
|
||||
dfaST.add("model", new CompiledDFA(dfa));
|
||||
lexerST.add("dfas", dfaST);
|
||||
}
|
||||
|
||||
void injectPDAs(LexerGrammar lg, ST lexerST, String modeName) {
|
||||
LexerCompiler comp = new LexerCompiler(lg);
|
||||
CompiledPDA pda = comp.compileMode(modeName);
|
||||
ST pdaST = gen.templates.getInstanceOf("PDA");
|
||||
for (Rule r : pda.ruleActions.keySet()) {
|
||||
Set<Token> actionTokens = pda.ruleActions.keySet(r);
|
||||
ST actionST = gen.templates.getInstanceOf("actionMethod");
|
||||
actionST.add("name", r.name);
|
||||
for (Token t : actionTokens) {
|
||||
actionST.add("actions", Misc.strip(t.getText(),1));
|
||||
actionST.add("ruleIndex", r.index);
|
||||
}
|
||||
pdaST.add("actions", actionST);
|
||||
lexerST.add("actions", actionST);
|
||||
}
|
||||
for (Rule r : pda.ruleSempreds.keySet()) {
|
||||
Set<Token> sempredTokens = pda.ruleSempreds.keySet(r);
|
||||
ST sempredST = gen.templates.getInstanceOf("sempredMethod");
|
||||
sempredST.add("name", r.name);
|
||||
sempredST.add("ruleIndex", r.index);
|
||||
for (Token t : sempredTokens) {
|
||||
sempredST.add("preds", t.getText());
|
||||
}
|
||||
pdaST.add("sempreds", sempredST);
|
||||
lexerST.add("sempreds", sempredST);
|
||||
}
|
||||
pdaST.add("name", modeName);
|
||||
pdaST.add("model", pda);
|
||||
lexerST.add("pdas", pdaST);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,6 +3,10 @@ package org.antlr.v4.tool;
|
|||
import org.antlr.misc.MultiMap;
|
||||
import org.antlr.runtime.RecognitionException;
|
||||
import org.antlr.v4.Tool;
|
||||
import org.antlr.v4.automata.DFA;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/** */
|
||||
public class LexerGrammar extends Grammar {
|
||||
|
@ -10,6 +14,8 @@ public class LexerGrammar extends Grammar {
|
|||
|
||||
public MultiMap<String, Rule> modes = new MultiMap<String, Rule>();
|
||||
|
||||
public Map<String, DFA> modeToDFA = new HashMap<String, DFA>();
|
||||
|
||||
public LexerGrammar(Tool tool, GrammarRootAST ast) {
|
||||
super(tool, ast);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue