wow. added dfa minimization

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6755]
This commit is contained in:
parrt 2010-03-16 17:16:07 -08:00
parent c967b39f44
commit d72dbb7af4
9 changed files with 265 additions and 31 deletions

View File

@ -38,6 +38,11 @@ public class AnalysisPipeline {
if ( dfa.valid() ) {
// ambig / unreachable errors
conv.issueAmbiguityWarnings();
System.out.println("MINIMIZE");
DFAMinimizer dmin = new DFAMinimizer(dfa);
dmin.minimize();
return dfa;
}
@ -58,6 +63,10 @@ public class AnalysisPipeline {
}
else System.out.println("recursion limited valid");
System.out.println("MINIMIZE");
DFAMinimizer dmin = new DFAMinimizer(dfa);
dmin.minimize();
return dfa;
}

View File

@ -0,0 +1,208 @@
package org.antlr.v4.analysis;
import org.antlr.v4.automata.DFA;
import org.antlr.v4.automata.DFAState;
import org.antlr.v4.automata.Edge;
import org.antlr.v4.misc.IntSet;
import org.antlr.v4.misc.Interval;
import org.antlr.v4.misc.IntervalSet;
import org.antlr.v4.misc.OrderedHashSet;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
/** First consolidate accept states, which leads to smaller DFA. Also,
* consolidate all edges from p to q into a single edge with set.
*/
public class DFAMinimizer {
DFA dfa;
public DFAMinimizer(DFA dfa) {
this.dfa = dfa;
}
public void minimize() {
int n = dfa.states.size();
boolean[][] distinct = new boolean[n][n];
Set<IntSet> labels = new HashSet<IntSet>();
for (DFAState d : dfa.states) {
for (Edge e : d.edges) {
// todo: slow? might want to flatten to list of int token types
labels.add(e.label);
}
}
System.out.println("labels="+labels);
// create initial partition distinguishing between states and accept states
// we need to distinguish between accepts for different alts.
// we may have also have multiple accepts per alt--put all of them in same partition
for (int alt=1; alt<=dfa.nAlts; alt++) {
List<DFAState> acceptsForAlt = dfa.altToAcceptStates[alt];
// distinguish all these accepts from every other state
for (DFAState p : acceptsForAlt) {
for (int i=0; i<n; i++) {
DFAState q = dfa.states.get(i);
// if q not accept state or p and q predict diff alts, distinguish them
if ( !q.isAcceptState || q.predictsAlt!=alt ) {
distinct[p.stateNumber][i] = true;
distinct[i][p.stateNumber] = true;
}
}
}
}
for (int i=1; i<n; i++) {
for (int j=0; j<i; j++) {
DFAState p = dfa.states.get(i);
DFAState q = dfa.states.get(j);
if ( (p.isAcceptState && !q.isAcceptState) ||
(!p.isAcceptState && q.isAcceptState) )
{
// make symmetric even though algorithsm on web don't
// seems that DISTINCT(?(p, a),?(q, a)) might go out of
// range in my examples. Maybe they assume symmetric
// w/o saying it. Didn't see any code.
distinct[i][j] = true;
distinct[j][i] = true;
}
}
}
print(distinct);
boolean changed = true;
while ( changed ) {
changed = false;
for (int i=1; i<n; i++) {
for (int j=0; j<i; j++) {
if ( distinct[i][j] ) continue;
DFAState p = dfa.states.get(i);
DFAState q = dfa.states.get(j);
for (IntSet label : labels) {
DFAState pt = p.target(label);
DFAState qt = q.target(label);
// System.out.println(p.stateNumber+"-"+label.toString(dfa.g)+"->"+pt);
// System.out.println(q.stateNumber+"-"+label.toString(dfa.g)+"->"+qt);
// if DISTINCT(p,q) is empty and
// DISTINCT(?(p, a),?(q, a)) is not empty
// then DISTINCT(p,q) = a.
// No one seems to show example of case where
// ?(p,a)==nil. I assume that if one of states
// can't transition on label, assume p,q are distinct.
// If both p,q can't transition on label, we don't
// know anything about their distinctness.
// AH! jflex code says alg assumes DFA is "total" so
// it adds error state. If both are errors, same state
// so leave as equiv (nondistinct). If one goes to
// error (pt or qt is null) and other doesn't, must
// be in distinct sets so p,q are distinct.
if ( pt==null && qt==null ) continue;
if ( pt==null || qt==null ||
distinct[pt.stateNumber][qt.stateNumber] )
{
distinct[i][j] = true;
distinct[j][i] = true;
changed = true;
break; // we've marked; move to next state
}
}
}
}
}
print(distinct);
// Make equiv sets using transitive property
IntervalSet[] stateToSet = new IntervalSet[n];
for (int i=0; i<n; i++) stateToSet[i] = new IntervalSet();
System.out.println("equiv pairs:");
for (int i=1; i<n; i++) {
for (int j=0; j<i; j++) {
if ( !distinct[i][j] ) {
System.out.println(i+","+j);
stateToSet[i].add(i);
stateToSet[i].add(j);
stateToSet[j].add(i);
stateToSet[j].add(j);
}
}
}
System.out.println("equiv sets:");
OrderedHashSet<IntervalSet> uniq = new OrderedHashSet<IntervalSet>();
for (int i=0; i<stateToSet.length; i++) {
IntervalSet s = stateToSet[i];
if ( s.isNil() ) s.add(i); // i must be it's own set if not equiv
//if ( s.isNil() ) continue;
System.out.println(s);
uniq.add(s);
}
System.out.println("uniq sets = "+uniq);
if ( uniq.size()==0 ) {
System.out.println("was already minimal");
return;
}
// minimize the DFA (combine equiv sets)
// merge all edges from a set to first state in set
DFAState[] states = new DFAState[n];
// first map all states in set to same DFA state (old min)
for (IntervalSet s : uniq) {
int min = s.getMinElement();
states[min] = dfa.states.get(min);
List<Interval> intervals = s.getIntervals();
for (Interval I : intervals) {
for (int i=I.a; i<=I.b; i++) {
states[i] = states[min];
}
}
}
for (DFAState s : states) System.out.println(s);
// now do edges
for (IntervalSet s : uniq) {
List<Interval> intervals = s.getIntervals();
System.out.println("do set "+s);
for (Interval I : intervals) {
for (int i=I.a; i<=I.b; i++) {
DFAState p = dfa.states.get(i);
for (Edge e : p.edges) {
System.out.println(p.stateNumber+" upon "+e.toString(dfa.g)+
" used to point at "+e.target.stateNumber+
" now points at "+states[e.target.stateNumber].stateNumber);
e.target = states[e.target.stateNumber];
}
}
}
}
// now kill unused states
for (IntervalSet s : uniq) {
List<Interval> intervals = s.getIntervals();
for (Interval I : intervals) {
for (int i=I.a+1; i<=I.b; i++) {
System.out.println("kill "+i);
dfa.stateSet.remove(dfa.states.get(i));
dfa.states.set(i, null);
}
}
}
}
private void print(boolean[][] distinct) {
int n = distinct.length;
for (int i=0; i<n; i++) {
System.out.print(dfa.states.get(i).stateNumber+":");
for (int j=0; j<n; j++) {
System.out.print(" "+(distinct[i][j]?"T":"F"));
}
System.out.println();
}
System.out.print(" ");
for (int j=0; j<n; j++) System.out.print(" "+j);
System.out.println();
System.out.println();
}
}

View File

@ -37,15 +37,15 @@ public class DFAVerifier {
public DFAVerifier(DFA dfa, StackLimitedNFAToDFAConverter converter) {
this.dfa = dfa;
this.converter = converter;
for (DFAState d : dfa.states.values()) {
for (DFAState d : dfa.stateSet.values()) {
for (Edge e : d.edges) incidentStates.map(e.target, d);
}
}
public Set<Integer> getUnreachableAlts() {
Set<Integer> unreachable = new HashSet<Integer>();
for (int alt=0; alt<dfa.nAlts; alt++) {
if ( dfa.altToAcceptState[alt]==null ) unreachable.add(alt);
for (int alt=0; alt<=dfa.nAlts; alt++) {
if ( dfa.altToAcceptStates[alt]==null ) unreachable.add(alt);
}
return unreachable;
}
@ -55,8 +55,8 @@ public class DFAVerifier {
}
public Set<DFAState> getDeadStates() {
Set<DFAState> dead = new HashSet<DFAState>(dfa.states.size());
dead.addAll(dfa.states.values());
Set<DFAState> dead = new HashSet<DFAState>(dfa.stateSet.size());
dead.addAll(dfa.stateSet.values());
// for (DFAState a : dfa.altToAcceptState) {
// if ( a!=null ) dead.remove(a);
// }
@ -67,7 +67,7 @@ public class DFAVerifier {
boolean changed = true;
while ( changed ) {
changed = false;
for (DFAState d : dfa.states.values()) {
for (DFAState d : dfa.stateSet.values()) {
if ( !dead.contains(d) ) {
// if d isn't dead, it reaches accept state.
dead.remove(d);

View File

@ -169,6 +169,7 @@ public class Resolver {
int minAlt = resolveByPickingMinAlt(d, null);
// force it to be an accept state
d.isAcceptState = true;
d.predictsAlt = minAlt;
// might be adding new accept state for alt, but that's ok
converter.dfa.defineAcceptState(minAlt, d);
}

View File

@ -148,7 +148,7 @@ public class StackLimitedNFAToDFAConverter {
/** Add t if not in DFA yet, resolving nondet's and then make d-label->t */
void addTransition(DFAState d, IntervalSet label, DFAState t) {
DFAState existing = dfa.states.get(t);
DFAState existing = dfa.stateSet.get(t);
if ( existing != null ) { // seen before; point at old one
d.addEdge(new Edge(existing, label));
return;
@ -562,8 +562,8 @@ public class StackLimitedNFAToDFAConverter {
public Set<Integer> getUnreachableAlts() {
Set<Integer> unreachable = new HashSet<Integer>();
for (int alt=0; alt<dfa.nAlts; alt++) {
if ( alt>0 && dfa.altToAcceptState[alt]==null ) unreachable.add(alt);
for (int alt=1; alt<=dfa.nAlts; alt++) {
if ( dfa.altToAcceptStates[alt]==null ) unreachable.add(alt);
}
return unreachable;
}
@ -576,7 +576,7 @@ public class StackLimitedNFAToDFAConverter {
Set<Integer> alts = resolver.getAmbiguousAlts(d);
List<Integer> sorted = new ArrayList<Integer>(alts);
Collections.sort(sorted);
System.out.println("ambig alts="+sorted);
System.err.println("ambig alts="+sorted);
List<DFAState> dfaStates = probe.getAnyDFAPathToTarget(d);
System.out.print("path =");
for (DFAState d2 : dfaStates) {
@ -599,6 +599,6 @@ public class StackLimitedNFAToDFAConverter {
System.out.println("path = "+path);
}
}
if ( unreachableAlts.size()>0 ) System.out.println("unreachable="+unreachableAlts);
if ( unreachableAlts.size()>0 ) System.err.println("unreachable="+unreachableAlts);
}
}

View File

@ -24,31 +24,25 @@ public class DFA {
/** From what NFAState did we create the DFA? */
public DecisionState decisionNFAStartState;
/** A set of all uniquely-numbered DFA states. Maps hash of DFAState
* to the actual DFAState object. We use this to detect
* existing DFA states. Map<DFAState,DFAState>. Use Map so
/** A set of all DFA states. Use Map so
* we can get old state back (Set only allows you to see if it's there).
* Not used during fixed k lookahead as it's a waste to fill it with
* a dup of states array.
*/
public Map<DFAState, DFAState> states = new HashMap<DFAState, DFAState>();
public Map<DFAState, DFAState> stateSet = new HashMap<DFAState, DFAState>();
/** Maps the state number to the actual DFAState. This contains all
* states, but the states are not unique. s3 might be same as s1 so
* s3 -> s1 in this table. This is how cycles occur. If fixed k,
* then these states will all be unique as states[i] always points
* at state i when no cycles exist.
/** Maps the state number to the actual DFAState.
*
* This is managed in parallel with uniqueStates and simply provides
* a way to go from state number to DFAState rather than via a
* hash lookup.
*/
//protected List<DFAState> states = new ArrayList<DFAState>();
public List<DFAState> states = new ArrayList<DFAState>();
public int nAlts = 0;
/** We only want one accept state per predicted alt; track here */
public List<DFAState>[] altToAcceptState;
public List<DFAState>[] altToAcceptStates;
/** Unique state numbers per DFA */
int stateCounter = 0;
@ -60,22 +54,24 @@ public class DFA {
this.decisionNFAStartState = startState;
nAlts = startState.getNumberOfTransitions();
decision = startState.decision;
altToAcceptState = (ArrayList<DFAState>[])Array.newInstance(ArrayList.class,nAlts+1);
altToAcceptStates = (ArrayList<DFAState>[])Array.newInstance(ArrayList.class,nAlts+1);
}
/** Add a new DFA state to this DFA (doesn't check if already present). */
public void addState(DFAState d) {
states.put(d,d);
stateSet.put(d,d);
d.stateNumber = stateCounter++;
states.add( d ); // index in states should be d.stateCounter
}
public void defineAcceptState(int alt, DFAState acceptState) {
acceptState.isAcceptState = true;
if ( states.get(acceptState)==null ) addState(acceptState);
if ( altToAcceptState[alt]==null ) {
altToAcceptState[alt] = new ArrayList<DFAState>();
acceptState.predictsAlt = alt;
if ( stateSet.get(acceptState)==null ) addState(acceptState);
if ( altToAcceptStates[alt]==null ) {
altToAcceptStates[alt] = new ArrayList<DFAState>();
}
altToAcceptState[alt].add(acceptState);
altToAcceptStates[alt].add(acceptState);
}
public DFAState newState() {

View File

@ -4,6 +4,7 @@ import org.antlr.v4.analysis.NFAConfig;
import org.antlr.v4.analysis.NFAContext;
import org.antlr.v4.analysis.Resolver;
import org.antlr.v4.analysis.SemanticContext;
import org.antlr.v4.misc.IntSet;
import org.antlr.v4.misc.OrderedHashSet;
import java.util.ArrayList;
@ -46,6 +47,9 @@ public class DFAState {
public boolean isAcceptState = false;
/** If accept, which alt does it predict? */
public int predictsAlt;
/** State in which DFA? */
public DFA dfa;
@ -136,6 +140,13 @@ public class DFAState {
public Edge edge(int i) { return edges.get(i); }
public DFAState target(IntSet label) {
for (Edge e : edges) {
if ( !e.label.and(label).isNil() ) return e.target;
}
return null;
}
/** A decent hash for a DFA state is the sum of the NFA state/alt pairs. */
public int hashCode() {
int h = 0;

View File

@ -543,7 +543,16 @@ public class IntervalSet implements IntSet {
return intervals;
}
/** Are two IntervalSets equal? Because all intervals are sorted
@Override
public int hashCode() {
if ( isNil() ) return 0;
int n = 0;
// just add left edge of intervals
for (Interval I : intervals) n += I.a;
return n;
}
/** Are two IntervalSets equal? Because all intervals are sorted
* and disjoint, equals is a simple linear walk over both lists
* to make sure they are the same. Interval.equals() is used
* by the List.equals() method to check the ranges.

View File

@ -146,7 +146,7 @@ public class DOTGenerator {
List<DFAState> work = new LinkedList<DFAState>();
// define stop states first; seems to be a bug in DOT where doublecircle
for (List<DFAState> stops : startState.dfa.altToAcceptState) {
for (List<DFAState> stops : startState.dfa.altToAcceptStates) {
if ( stops==null ) continue;
for (DFAState d : stops) {
if ( d==null ) continue;
@ -157,7 +157,7 @@ public class DOTGenerator {
}
}
for (DFAState d : startState.dfa.states.values()) {
for (DFAState d : startState.dfa.stateSet.values()) {
if ( d.isAcceptState ) continue;
ST st = stlib.getInstanceOf("state");
st.add("name", "s"+d.stateNumber);