forked from jasder/antlr
wow. added dfa minimization
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6755]
This commit is contained in:
parent
c967b39f44
commit
d72dbb7af4
|
@ -38,6 +38,11 @@ public class AnalysisPipeline {
|
|||
if ( dfa.valid() ) {
|
||||
// ambig / unreachable errors
|
||||
conv.issueAmbiguityWarnings();
|
||||
|
||||
System.out.println("MINIMIZE");
|
||||
DFAMinimizer dmin = new DFAMinimizer(dfa);
|
||||
dmin.minimize();
|
||||
|
||||
return dfa;
|
||||
}
|
||||
|
||||
|
@ -58,6 +63,10 @@ public class AnalysisPipeline {
|
|||
}
|
||||
else System.out.println("recursion limited valid");
|
||||
|
||||
System.out.println("MINIMIZE");
|
||||
DFAMinimizer dmin = new DFAMinimizer(dfa);
|
||||
dmin.minimize();
|
||||
|
||||
return dfa;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,208 @@
|
|||
package org.antlr.v4.analysis;
|
||||
|
||||
import org.antlr.v4.automata.DFA;
|
||||
import org.antlr.v4.automata.DFAState;
|
||||
import org.antlr.v4.automata.Edge;
|
||||
import org.antlr.v4.misc.IntSet;
|
||||
import org.antlr.v4.misc.Interval;
|
||||
import org.antlr.v4.misc.IntervalSet;
|
||||
import org.antlr.v4.misc.OrderedHashSet;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
/** First consolidate accept states, which leads to smaller DFA. Also,
|
||||
* consolidate all edges from p to q into a single edge with set.
|
||||
*/
|
||||
public class DFAMinimizer {
|
||||
DFA dfa;
|
||||
|
||||
public DFAMinimizer(DFA dfa) {
|
||||
this.dfa = dfa;
|
||||
}
|
||||
|
||||
public void minimize() {
|
||||
int n = dfa.states.size();
|
||||
boolean[][] distinct = new boolean[n][n];
|
||||
|
||||
Set<IntSet> labels = new HashSet<IntSet>();
|
||||
for (DFAState d : dfa.states) {
|
||||
for (Edge e : d.edges) {
|
||||
// todo: slow? might want to flatten to list of int token types
|
||||
labels.add(e.label);
|
||||
}
|
||||
}
|
||||
System.out.println("labels="+labels);
|
||||
|
||||
// create initial partition distinguishing between states and accept states
|
||||
// we need to distinguish between accepts for different alts.
|
||||
// we may have also have multiple accepts per alt--put all of them in same partition
|
||||
for (int alt=1; alt<=dfa.nAlts; alt++) {
|
||||
List<DFAState> acceptsForAlt = dfa.altToAcceptStates[alt];
|
||||
// distinguish all these accepts from every other state
|
||||
for (DFAState p : acceptsForAlt) {
|
||||
for (int i=0; i<n; i++) {
|
||||
DFAState q = dfa.states.get(i);
|
||||
// if q not accept state or p and q predict diff alts, distinguish them
|
||||
if ( !q.isAcceptState || q.predictsAlt!=alt ) {
|
||||
distinct[p.stateNumber][i] = true;
|
||||
distinct[i][p.stateNumber] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i=1; i<n; i++) {
|
||||
for (int j=0; j<i; j++) {
|
||||
DFAState p = dfa.states.get(i);
|
||||
DFAState q = dfa.states.get(j);
|
||||
if ( (p.isAcceptState && !q.isAcceptState) ||
|
||||
(!p.isAcceptState && q.isAcceptState) )
|
||||
{
|
||||
// make symmetric even though algorithsm on web don't
|
||||
// seems that DISTINCT(?(p, a),?(q, a)) might go out of
|
||||
// range in my examples. Maybe they assume symmetric
|
||||
// w/o saying it. Didn't see any code.
|
||||
distinct[i][j] = true;
|
||||
distinct[j][i] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
print(distinct);
|
||||
|
||||
boolean changed = true;
|
||||
while ( changed ) {
|
||||
changed = false;
|
||||
|
||||
for (int i=1; i<n; i++) {
|
||||
for (int j=0; j<i; j++) {
|
||||
if ( distinct[i][j] ) continue;
|
||||
DFAState p = dfa.states.get(i);
|
||||
DFAState q = dfa.states.get(j);
|
||||
for (IntSet label : labels) {
|
||||
DFAState pt = p.target(label);
|
||||
DFAState qt = q.target(label);
|
||||
// System.out.println(p.stateNumber+"-"+label.toString(dfa.g)+"->"+pt);
|
||||
// System.out.println(q.stateNumber+"-"+label.toString(dfa.g)+"->"+qt);
|
||||
// if DISTINCT(p,q) is empty and
|
||||
// DISTINCT(?(p, a),?(q, a)) is not empty
|
||||
// then DISTINCT(p,q) = a.
|
||||
// No one seems to show example of case where
|
||||
// ?(p,a)==nil. I assume that if one of states
|
||||
// can't transition on label, assume p,q are distinct.
|
||||
// If both p,q can't transition on label, we don't
|
||||
// know anything about their distinctness.
|
||||
// AH! jflex code says alg assumes DFA is "total" so
|
||||
// it adds error state. If both are errors, same state
|
||||
// so leave as equiv (nondistinct). If one goes to
|
||||
// error (pt or qt is null) and other doesn't, must
|
||||
// be in distinct sets so p,q are distinct.
|
||||
if ( pt==null && qt==null ) continue;
|
||||
if ( pt==null || qt==null ||
|
||||
distinct[pt.stateNumber][qt.stateNumber] )
|
||||
{
|
||||
distinct[i][j] = true;
|
||||
distinct[j][i] = true;
|
||||
changed = true;
|
||||
break; // we've marked; move to next state
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
print(distinct);
|
||||
|
||||
// Make equiv sets using transitive property
|
||||
IntervalSet[] stateToSet = new IntervalSet[n];
|
||||
for (int i=0; i<n; i++) stateToSet[i] = new IntervalSet();
|
||||
|
||||
System.out.println("equiv pairs:");
|
||||
for (int i=1; i<n; i++) {
|
||||
for (int j=0; j<i; j++) {
|
||||
if ( !distinct[i][j] ) {
|
||||
System.out.println(i+","+j);
|
||||
stateToSet[i].add(i);
|
||||
stateToSet[i].add(j);
|
||||
stateToSet[j].add(i);
|
||||
stateToSet[j].add(j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
System.out.println("equiv sets:");
|
||||
OrderedHashSet<IntervalSet> uniq = new OrderedHashSet<IntervalSet>();
|
||||
for (int i=0; i<stateToSet.length; i++) {
|
||||
IntervalSet s = stateToSet[i];
|
||||
if ( s.isNil() ) s.add(i); // i must be it's own set if not equiv
|
||||
//if ( s.isNil() ) continue;
|
||||
System.out.println(s);
|
||||
uniq.add(s);
|
||||
}
|
||||
System.out.println("uniq sets = "+uniq);
|
||||
if ( uniq.size()==0 ) {
|
||||
System.out.println("was already minimal");
|
||||
return;
|
||||
}
|
||||
|
||||
// minimize the DFA (combine equiv sets)
|
||||
// merge all edges from a set to first state in set
|
||||
DFAState[] states = new DFAState[n];
|
||||
// first map all states in set to same DFA state (old min)
|
||||
for (IntervalSet s : uniq) {
|
||||
int min = s.getMinElement();
|
||||
states[min] = dfa.states.get(min);
|
||||
List<Interval> intervals = s.getIntervals();
|
||||
for (Interval I : intervals) {
|
||||
for (int i=I.a; i<=I.b; i++) {
|
||||
states[i] = states[min];
|
||||
}
|
||||
}
|
||||
}
|
||||
for (DFAState s : states) System.out.println(s);
|
||||
// now do edges
|
||||
for (IntervalSet s : uniq) {
|
||||
List<Interval> intervals = s.getIntervals();
|
||||
System.out.println("do set "+s);
|
||||
for (Interval I : intervals) {
|
||||
for (int i=I.a; i<=I.b; i++) {
|
||||
DFAState p = dfa.states.get(i);
|
||||
for (Edge e : p.edges) {
|
||||
System.out.println(p.stateNumber+" upon "+e.toString(dfa.g)+
|
||||
" used to point at "+e.target.stateNumber+
|
||||
" now points at "+states[e.target.stateNumber].stateNumber);
|
||||
e.target = states[e.target.stateNumber];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// now kill unused states
|
||||
for (IntervalSet s : uniq) {
|
||||
List<Interval> intervals = s.getIntervals();
|
||||
for (Interval I : intervals) {
|
||||
for (int i=I.a+1; i<=I.b; i++) {
|
||||
System.out.println("kill "+i);
|
||||
dfa.stateSet.remove(dfa.states.get(i));
|
||||
dfa.states.set(i, null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void print(boolean[][] distinct) {
|
||||
int n = distinct.length;
|
||||
for (int i=0; i<n; i++) {
|
||||
System.out.print(dfa.states.get(i).stateNumber+":");
|
||||
for (int j=0; j<n; j++) {
|
||||
System.out.print(" "+(distinct[i][j]?"T":"F"));
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
System.out.print(" ");
|
||||
for (int j=0; j<n; j++) System.out.print(" "+j);
|
||||
System.out.println();
|
||||
System.out.println();
|
||||
}
|
||||
}
|
|
@ -37,15 +37,15 @@ public class DFAVerifier {
|
|||
public DFAVerifier(DFA dfa, StackLimitedNFAToDFAConverter converter) {
|
||||
this.dfa = dfa;
|
||||
this.converter = converter;
|
||||
for (DFAState d : dfa.states.values()) {
|
||||
for (DFAState d : dfa.stateSet.values()) {
|
||||
for (Edge e : d.edges) incidentStates.map(e.target, d);
|
||||
}
|
||||
}
|
||||
|
||||
public Set<Integer> getUnreachableAlts() {
|
||||
Set<Integer> unreachable = new HashSet<Integer>();
|
||||
for (int alt=0; alt<dfa.nAlts; alt++) {
|
||||
if ( dfa.altToAcceptState[alt]==null ) unreachable.add(alt);
|
||||
for (int alt=0; alt<=dfa.nAlts; alt++) {
|
||||
if ( dfa.altToAcceptStates[alt]==null ) unreachable.add(alt);
|
||||
}
|
||||
return unreachable;
|
||||
}
|
||||
|
@ -55,8 +55,8 @@ public class DFAVerifier {
|
|||
}
|
||||
|
||||
public Set<DFAState> getDeadStates() {
|
||||
Set<DFAState> dead = new HashSet<DFAState>(dfa.states.size());
|
||||
dead.addAll(dfa.states.values());
|
||||
Set<DFAState> dead = new HashSet<DFAState>(dfa.stateSet.size());
|
||||
dead.addAll(dfa.stateSet.values());
|
||||
// for (DFAState a : dfa.altToAcceptState) {
|
||||
// if ( a!=null ) dead.remove(a);
|
||||
// }
|
||||
|
@ -67,7 +67,7 @@ public class DFAVerifier {
|
|||
boolean changed = true;
|
||||
while ( changed ) {
|
||||
changed = false;
|
||||
for (DFAState d : dfa.states.values()) {
|
||||
for (DFAState d : dfa.stateSet.values()) {
|
||||
if ( !dead.contains(d) ) {
|
||||
// if d isn't dead, it reaches accept state.
|
||||
dead.remove(d);
|
||||
|
|
|
@ -169,6 +169,7 @@ public class Resolver {
|
|||
int minAlt = resolveByPickingMinAlt(d, null);
|
||||
// force it to be an accept state
|
||||
d.isAcceptState = true;
|
||||
d.predictsAlt = minAlt;
|
||||
// might be adding new accept state for alt, but that's ok
|
||||
converter.dfa.defineAcceptState(minAlt, d);
|
||||
}
|
||||
|
|
|
@ -148,7 +148,7 @@ public class StackLimitedNFAToDFAConverter {
|
|||
|
||||
/** Add t if not in DFA yet, resolving nondet's and then make d-label->t */
|
||||
void addTransition(DFAState d, IntervalSet label, DFAState t) {
|
||||
DFAState existing = dfa.states.get(t);
|
||||
DFAState existing = dfa.stateSet.get(t);
|
||||
if ( existing != null ) { // seen before; point at old one
|
||||
d.addEdge(new Edge(existing, label));
|
||||
return;
|
||||
|
@ -562,8 +562,8 @@ public class StackLimitedNFAToDFAConverter {
|
|||
|
||||
public Set<Integer> getUnreachableAlts() {
|
||||
Set<Integer> unreachable = new HashSet<Integer>();
|
||||
for (int alt=0; alt<dfa.nAlts; alt++) {
|
||||
if ( alt>0 && dfa.altToAcceptState[alt]==null ) unreachable.add(alt);
|
||||
for (int alt=1; alt<=dfa.nAlts; alt++) {
|
||||
if ( dfa.altToAcceptStates[alt]==null ) unreachable.add(alt);
|
||||
}
|
||||
return unreachable;
|
||||
}
|
||||
|
@ -576,7 +576,7 @@ public class StackLimitedNFAToDFAConverter {
|
|||
Set<Integer> alts = resolver.getAmbiguousAlts(d);
|
||||
List<Integer> sorted = new ArrayList<Integer>(alts);
|
||||
Collections.sort(sorted);
|
||||
System.out.println("ambig alts="+sorted);
|
||||
System.err.println("ambig alts="+sorted);
|
||||
List<DFAState> dfaStates = probe.getAnyDFAPathToTarget(d);
|
||||
System.out.print("path =");
|
||||
for (DFAState d2 : dfaStates) {
|
||||
|
@ -599,6 +599,6 @@ public class StackLimitedNFAToDFAConverter {
|
|||
System.out.println("path = "+path);
|
||||
}
|
||||
}
|
||||
if ( unreachableAlts.size()>0 ) System.out.println("unreachable="+unreachableAlts);
|
||||
if ( unreachableAlts.size()>0 ) System.err.println("unreachable="+unreachableAlts);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,31 +24,25 @@ public class DFA {
|
|||
/** From what NFAState did we create the DFA? */
|
||||
public DecisionState decisionNFAStartState;
|
||||
|
||||
/** A set of all uniquely-numbered DFA states. Maps hash of DFAState
|
||||
* to the actual DFAState object. We use this to detect
|
||||
* existing DFA states. Map<DFAState,DFAState>. Use Map so
|
||||
/** A set of all DFA states. Use Map so
|
||||
* we can get old state back (Set only allows you to see if it's there).
|
||||
* Not used during fixed k lookahead as it's a waste to fill it with
|
||||
* a dup of states array.
|
||||
*/
|
||||
public Map<DFAState, DFAState> states = new HashMap<DFAState, DFAState>();
|
||||
public Map<DFAState, DFAState> stateSet = new HashMap<DFAState, DFAState>();
|
||||
|
||||
/** Maps the state number to the actual DFAState. This contains all
|
||||
* states, but the states are not unique. s3 might be same as s1 so
|
||||
* s3 -> s1 in this table. This is how cycles occur. If fixed k,
|
||||
* then these states will all be unique as states[i] always points
|
||||
* at state i when no cycles exist.
|
||||
/** Maps the state number to the actual DFAState.
|
||||
*
|
||||
* This is managed in parallel with uniqueStates and simply provides
|
||||
* a way to go from state number to DFAState rather than via a
|
||||
* hash lookup.
|
||||
*/
|
||||
//protected List<DFAState> states = new ArrayList<DFAState>();
|
||||
public List<DFAState> states = new ArrayList<DFAState>();
|
||||
|
||||
public int nAlts = 0;
|
||||
|
||||
/** We only want one accept state per predicted alt; track here */
|
||||
public List<DFAState>[] altToAcceptState;
|
||||
public List<DFAState>[] altToAcceptStates;
|
||||
|
||||
/** Unique state numbers per DFA */
|
||||
int stateCounter = 0;
|
||||
|
@ -60,22 +54,24 @@ public class DFA {
|
|||
this.decisionNFAStartState = startState;
|
||||
nAlts = startState.getNumberOfTransitions();
|
||||
decision = startState.decision;
|
||||
altToAcceptState = (ArrayList<DFAState>[])Array.newInstance(ArrayList.class,nAlts+1);
|
||||
altToAcceptStates = (ArrayList<DFAState>[])Array.newInstance(ArrayList.class,nAlts+1);
|
||||
}
|
||||
|
||||
/** Add a new DFA state to this DFA (doesn't check if already present). */
|
||||
public void addState(DFAState d) {
|
||||
states.put(d,d);
|
||||
stateSet.put(d,d);
|
||||
d.stateNumber = stateCounter++;
|
||||
states.add( d ); // index in states should be d.stateCounter
|
||||
}
|
||||
|
||||
public void defineAcceptState(int alt, DFAState acceptState) {
|
||||
acceptState.isAcceptState = true;
|
||||
if ( states.get(acceptState)==null ) addState(acceptState);
|
||||
if ( altToAcceptState[alt]==null ) {
|
||||
altToAcceptState[alt] = new ArrayList<DFAState>();
|
||||
acceptState.predictsAlt = alt;
|
||||
if ( stateSet.get(acceptState)==null ) addState(acceptState);
|
||||
if ( altToAcceptStates[alt]==null ) {
|
||||
altToAcceptStates[alt] = new ArrayList<DFAState>();
|
||||
}
|
||||
altToAcceptState[alt].add(acceptState);
|
||||
altToAcceptStates[alt].add(acceptState);
|
||||
}
|
||||
|
||||
public DFAState newState() {
|
||||
|
|
|
@ -4,6 +4,7 @@ import org.antlr.v4.analysis.NFAConfig;
|
|||
import org.antlr.v4.analysis.NFAContext;
|
||||
import org.antlr.v4.analysis.Resolver;
|
||||
import org.antlr.v4.analysis.SemanticContext;
|
||||
import org.antlr.v4.misc.IntSet;
|
||||
import org.antlr.v4.misc.OrderedHashSet;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
@ -46,6 +47,9 @@ public class DFAState {
|
|||
|
||||
public boolean isAcceptState = false;
|
||||
|
||||
/** If accept, which alt does it predict? */
|
||||
public int predictsAlt;
|
||||
|
||||
/** State in which DFA? */
|
||||
public DFA dfa;
|
||||
|
||||
|
@ -136,6 +140,13 @@ public class DFAState {
|
|||
|
||||
public Edge edge(int i) { return edges.get(i); }
|
||||
|
||||
public DFAState target(IntSet label) {
|
||||
for (Edge e : edges) {
|
||||
if ( !e.label.and(label).isNil() ) return e.target;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/** A decent hash for a DFA state is the sum of the NFA state/alt pairs. */
|
||||
public int hashCode() {
|
||||
int h = 0;
|
||||
|
|
|
@ -543,7 +543,16 @@ public class IntervalSet implements IntSet {
|
|||
return intervals;
|
||||
}
|
||||
|
||||
/** Are two IntervalSets equal? Because all intervals are sorted
|
||||
@Override
|
||||
public int hashCode() {
|
||||
if ( isNil() ) return 0;
|
||||
int n = 0;
|
||||
// just add left edge of intervals
|
||||
for (Interval I : intervals) n += I.a;
|
||||
return n;
|
||||
}
|
||||
|
||||
/** Are two IntervalSets equal? Because all intervals are sorted
|
||||
* and disjoint, equals is a simple linear walk over both lists
|
||||
* to make sure they are the same. Interval.equals() is used
|
||||
* by the List.equals() method to check the ranges.
|
||||
|
|
|
@ -146,7 +146,7 @@ public class DOTGenerator {
|
|||
List<DFAState> work = new LinkedList<DFAState>();
|
||||
|
||||
// define stop states first; seems to be a bug in DOT where doublecircle
|
||||
for (List<DFAState> stops : startState.dfa.altToAcceptState) {
|
||||
for (List<DFAState> stops : startState.dfa.altToAcceptStates) {
|
||||
if ( stops==null ) continue;
|
||||
for (DFAState d : stops) {
|
||||
if ( d==null ) continue;
|
||||
|
@ -157,7 +157,7 @@ public class DOTGenerator {
|
|||
}
|
||||
}
|
||||
|
||||
for (DFAState d : startState.dfa.states.values()) {
|
||||
for (DFAState d : startState.dfa.stateSet.values()) {
|
||||
if ( d.isAcceptState ) continue;
|
||||
ST st = stlib.getInstanceOf("state");
|
||||
st.add("name", "s"+d.stateNumber);
|
||||
|
|
Loading…
Reference in New Issue