forked from jasder/antlr
have pieces of DFA conversion working
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6742]
This commit is contained in:
parent
6ad8ff2a46
commit
665b64fee0
|
@ -1 +1,3 @@
|
|||
action-edge() ::= <<
|
||||
<src> -> <target> [fontsize=11, fontname="Courier", arrowsize=.7, label = "<label>"<if(arrowhead)>, arrowhead = <arrowhead><endif>];
|
||||
>>
|
||||
|
|
|
@ -1 +1,4 @@
|
|||
decision-rank() ::= <<
|
||||
{rank=same; rankdir=TB; <states; separator="; ">}
|
||||
>>
|
||||
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
dfa() ::= <<
|
||||
digraph NFA {
|
||||
<if(rankdir)>rankdir=<rankdir>;<endif>
|
||||
<decisionRanks; separator="\n">
|
||||
<states; separator="\n">
|
||||
<edges; separator="\n">
|
||||
}
|
||||
|
||||
>>
|
||||
|
|
|
@ -1 +1,4 @@
|
|||
edge(src,target,label,arrowhead) ::= <<
|
||||
<src> -> <target> [fontsize=11, fontname="Courier", arrowsize=.7, label = "<label>"<if(arrowhead)>, arrowhead = <arrowhead><endif>];
|
||||
>>
|
||||
|
||||
|
|
|
@ -1 +1,3 @@
|
|||
epsilon-edge(src,label,target,arrowhead) ::= <<
|
||||
<src> -> <target> [fontname="Times-Italic", label = "e"];
|
||||
>>
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
nfa(startState, states, edges, rankdir, decisionRanks) ::= <<
|
||||
digraph NFA {
|
||||
rankdir=LR;
|
||||
<decisionRanks; separator="\n">
|
||||
<states; separator="\n">
|
||||
<edges; separator="\n">
|
||||
}
|
||||
>>
|
||||
|
|
|
@ -1 +1,3 @@
|
|||
state(state, useBox, name) ::= <<
|
||||
node [fontsize=11, shape = <if(useBox)>box<else>circle, fixedsize=true, width=.4<endif>]; <name>
|
||||
>>
|
|
@ -1 +1,3 @@
|
|||
stopstate(name,useBox) ::= <<
|
||||
node [fontsize=11, shape = <if(useBox)>polygon,sides=4,peripheries=2<else>doublecircle, fixedsize=true, width=.6<endif>]; <name>
|
||||
>>
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
package org.antlr.v4;
|
||||
|
||||
import org.antlr.codegen.CodeGenerator;
|
||||
import org.antlr.runtime.*;
|
||||
import org.antlr.runtime.tree.TreeWizard;
|
||||
import org.antlr.v4.analysis.AnalysisPipeline;
|
||||
|
@ -12,8 +13,7 @@ import org.antlr.v4.parse.GrammarASTAdaptor;
|
|||
import org.antlr.v4.semantics.SemanticPipeline;
|
||||
import org.antlr.v4.tool.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
public class Tool {
|
||||
|
@ -371,6 +371,9 @@ public class Tool {
|
|||
NFAFactory factory = new ParserNFAFactory(g);
|
||||
if ( g.getType()==ANTLRParser.LEXER ) factory = new LexerNFAFactory(g);
|
||||
g.nfa = factory.createNFA();
|
||||
|
||||
if ( generate_NFA_dot ) generateNFAs(g);
|
||||
|
||||
|
||||
// PERFORM GRAMMAR ANALYSIS ON NFA: BUILD DECISION DFAs
|
||||
AnalysisPipeline anal = new AnalysisPipeline(g);
|
||||
|
@ -480,6 +483,151 @@ public class Tool {
|
|||
return lexerAST;
|
||||
}
|
||||
|
||||
protected void generateNFAs(Grammar g) {
|
||||
DOTGenerator dotGenerator = new DOTGenerator(g);
|
||||
List<Grammar> grammars = new ArrayList<Grammar>();
|
||||
grammars.add(g);
|
||||
List<Grammar> imported = g.getAllImportedGrammars();
|
||||
if ( imported!=null ) grammars.addAll(imported);
|
||||
for (Grammar ig : grammars) {
|
||||
for (Rule r : ig.rules.values()) {
|
||||
try {
|
||||
String dot = dotGenerator.getDOT(g.nfa.ruleToStartState.get(r));
|
||||
if (dot != null) {
|
||||
writeDOTFile(g, r, dot);
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
ErrorManager.toolError(ErrorType.CANNOT_WRITE_FILE, ioe);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void writeDOTFile(Grammar g, Rule r, String dot) throws IOException {
|
||||
writeDOTFile(g, r.g.name + "." + r.name, dot);
|
||||
}
|
||||
|
||||
protected void writeDOTFile(Grammar g, String name, String dot) throws IOException {
|
||||
Writer fw = getOutputFile(g, name + ".dot");
|
||||
fw.write(dot);
|
||||
fw.close();
|
||||
}
|
||||
|
||||
/** This method is used by all code generators to create new output
|
||||
* files. If the outputDir set by -o is not present it will be created.
|
||||
* The final filename is sensitive to the output directory and
|
||||
* the directory where the grammar file was found. If -o is /tmp
|
||||
* and the original grammar file was foo/t.g then output files
|
||||
* go in /tmp/foo.
|
||||
*
|
||||
* The output dir -o spec takes precedence if it's absolute.
|
||||
* E.g., if the grammar file dir is absolute the output dir is given
|
||||
* precendence. "-o /tmp /usr/lib/t.g" results in "/tmp/T.java" as
|
||||
* output (assuming t.g holds T.java).
|
||||
*
|
||||
* If no -o is specified, then just write to the directory where the
|
||||
* grammar file was found.
|
||||
*
|
||||
* If outputDirectory==null then write a String.
|
||||
*/
|
||||
public Writer getOutputFile(Grammar g, String fileName) throws IOException {
|
||||
if (outputDirectory == null) {
|
||||
return new StringWriter();
|
||||
}
|
||||
// output directory is a function of where the grammar file lives
|
||||
// for subdir/T.g, you get subdir here. Well, depends on -o etc...
|
||||
// But, if this is a .tokens file, then we force the output to
|
||||
// be the base output directory (or current directory if there is not a -o)
|
||||
//
|
||||
File outputDir;
|
||||
if (fileName.endsWith(CodeGenerator.VOCAB_FILE_EXTENSION)) {
|
||||
if (haveOutputDir) {
|
||||
outputDir = new File(outputDirectory);
|
||||
}
|
||||
else {
|
||||
outputDir = new File(".");
|
||||
}
|
||||
}
|
||||
else {
|
||||
outputDir = getOutputDirectory(g.fileName);
|
||||
}
|
||||
File outputFile = new File(outputDir, fileName);
|
||||
|
||||
if (!outputDir.exists()) {
|
||||
outputDir.mkdirs();
|
||||
}
|
||||
FileWriter fw = new FileWriter(outputFile);
|
||||
return new BufferedWriter(fw);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the location where ANTLR will generate output files for a given file. This is a
|
||||
* base directory and output files will be relative to here in some cases
|
||||
* such as when -o option is used and input files are given relative
|
||||
* to the input directory.
|
||||
*
|
||||
* @param fileNameWithPath path to input source
|
||||
* @return
|
||||
*/
|
||||
public File getOutputDirectory(String fileNameWithPath) {
|
||||
|
||||
File outputDir = new File(outputDirectory);
|
||||
String fileDirectory;
|
||||
|
||||
// Some files are given to us without a PATH but should should
|
||||
// still be written to the output directory in the relative path of
|
||||
// the output directory. The file directory is either the set of sub directories
|
||||
// or just or the relative path recorded for the parent grammar. This means
|
||||
// that when we write the tokens files, or the .java files for imported grammars
|
||||
// taht we will write them in the correct place.
|
||||
//
|
||||
if (fileNameWithPath.lastIndexOf(File.separatorChar) == -1) {
|
||||
|
||||
// No path is included in the file name, so make the file
|
||||
// directory the same as the parent grammar (which might sitll be just ""
|
||||
// but when it is not, we will write the file in the correct place.
|
||||
//
|
||||
fileDirectory = grammarOutputDirectory;
|
||||
|
||||
}
|
||||
else {
|
||||
fileDirectory = fileNameWithPath.substring(0, fileNameWithPath.lastIndexOf(File.separatorChar));
|
||||
}
|
||||
if ( fileDirectory == null ) {
|
||||
fileDirectory = ".";
|
||||
}
|
||||
if (haveOutputDir) {
|
||||
// -o /tmp /var/lib/t.g => /tmp/T.java
|
||||
// -o subdir/output /usr/lib/t.g => subdir/output/T.java
|
||||
// -o . /usr/lib/t.g => ./T.java
|
||||
if ((fileDirectory != null && !forceRelativeOutput) &&
|
||||
(new File(fileDirectory).isAbsolute() ||
|
||||
fileDirectory.startsWith("~")) || // isAbsolute doesn't count this :(
|
||||
forceAllFilesToOutputDir) {
|
||||
// somebody set the dir, it takes precendence; write new file there
|
||||
outputDir = new File(outputDirectory);
|
||||
}
|
||||
else {
|
||||
// -o /tmp subdir/t.g => /tmp/subdir/t.g
|
||||
if (fileDirectory != null) {
|
||||
outputDir = new File(outputDirectory, fileDirectory);
|
||||
}
|
||||
else {
|
||||
outputDir = new File(outputDirectory);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
// they didn't specify a -o dir so just write to location
|
||||
// where grammar is, absolute or relative, this will only happen
|
||||
// with command line invocation as build tools will always
|
||||
// supply an output directory.
|
||||
//
|
||||
outputDir = new File(fileDirectory);
|
||||
}
|
||||
return outputDir;
|
||||
}
|
||||
|
||||
private static void version() {
|
||||
ErrorManager.info("ANTLR Parser Generator Version " + new Tool().VERSION);
|
||||
}
|
||||
|
|
|
@ -3,7 +3,7 @@ package org.antlr.v4.automata;
|
|||
import org.antlr.v4.misc.IntervalSet;
|
||||
import org.antlr.v4.tool.Grammar;
|
||||
|
||||
/** */
|
||||
/** TODO: make all transitions sets? */
|
||||
public class AtomTransition extends Transition {
|
||||
/** The token type or character value; or, signifies special label. */
|
||||
protected int label;
|
||||
|
@ -13,6 +13,8 @@ public class AtomTransition extends Transition {
|
|||
this.target = target;
|
||||
}
|
||||
|
||||
public IntervalSet label() { return IntervalSet.of(label); }
|
||||
|
||||
public int hashCode() { return label; }
|
||||
|
||||
public boolean equals(Object o) {
|
||||
|
|
|
@ -37,7 +37,7 @@ public class DFASerializer {
|
|||
marked.add(s);
|
||||
for (int i=0; i<n; i++) {
|
||||
Edge t = s.transition(i);
|
||||
buf.append("-"+t.toString()+"->"+ getStateString(t.target)+'\n');
|
||||
buf.append("-"+t.toString(g)+"->"+ getStateString(t.target)+'\n');
|
||||
}
|
||||
}
|
||||
return buf.toString();
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
package org.antlr.v4.automata;
|
||||
|
||||
import org.antlr.analysis.NFA;
|
||||
import org.antlr.v4.misc.Utils;
|
||||
import org.stringtemplate.v4.misc.MultiMap;
|
||||
|
||||
|
@ -195,7 +196,7 @@ public class DFAState {
|
|||
if ( cachedUniquelyPredicatedAlt!=NFA.INVALID_ALT_NUMBER ) {
|
||||
return cachedUniquelyPredicatedAlt;
|
||||
}
|
||||
int alt = org.antlr.analysis.NFA.INVALID_ALT_NUMBER;
|
||||
int alt = NFA.INVALID_ALT_NUMBER;
|
||||
for (NFAConfig c : nfaConfigs) {
|
||||
if ( alt== NFA.INVALID_ALT_NUMBER ) {
|
||||
alt = c.alt; // found first nonresolved alt
|
||||
|
@ -208,6 +209,23 @@ public class DFAState {
|
|||
return alt;
|
||||
}
|
||||
|
||||
/** Return the uniquely mentioned alt from the NFA configurations;
|
||||
* Ignore the resolved bit etc... Return INVALID_ALT_NUMBER
|
||||
* if there is more than one alt mentioned.
|
||||
*/
|
||||
public int getUniqueAlt() {
|
||||
int alt = NFA.INVALID_ALT_NUMBER;
|
||||
for (NFAConfig c : nfaConfigs) {
|
||||
if ( alt== NFA.INVALID_ALT_NUMBER ) {
|
||||
alt = c.alt; // found first alt
|
||||
}
|
||||
else if ( c.alt!=alt ) {
|
||||
return NFA.INVALID_ALT_NUMBER;
|
||||
}
|
||||
}
|
||||
return alt;
|
||||
}
|
||||
|
||||
/** Get the set of all alts mentioned by all NFA configurations in this
|
||||
* DFA state.
|
||||
*/
|
||||
|
@ -220,7 +238,13 @@ public class DFAState {
|
|||
return alts;
|
||||
}
|
||||
|
||||
|
||||
public Set<NFAState> getUniqueNFAStates() {
|
||||
Set<NFAState> alts = new HashSet<NFAState>();
|
||||
for (NFAConfig c : nfaConfigs) alts.add(c.state);
|
||||
if ( alts.size()==0 ) return null;
|
||||
return alts;
|
||||
}
|
||||
|
||||
public int getNumberOfTransitions() { return edges.size(); }
|
||||
|
||||
public void addTransition(Edge e) { edges.add(e); }
|
||||
|
|
|
@ -5,15 +5,17 @@ import org.antlr.v4.tool.Grammar;
|
|||
|
||||
/** A DFA edge (NFA edges are called transitions) */
|
||||
public class Edge {
|
||||
public int atom = Label.INVALID;
|
||||
public IntervalSet set;
|
||||
|
||||
public IntervalSet label;
|
||||
public DFAState target;
|
||||
|
||||
public Edge(DFAState target) { this.target = target; }
|
||||
public Edge(DFAState target, IntervalSet label) {
|
||||
this.target = target;
|
||||
this.label = label;
|
||||
}
|
||||
|
||||
public String toString() { return label.toString(); }
|
||||
|
||||
public String toString(Grammar g) {
|
||||
if ( set==null ) return g.getTokenDisplayName(atom);
|
||||
else return set.toString(g);
|
||||
return label.toString(g);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package org.antlr.v4.automata;
|
||||
|
||||
import org.antlr.runtime.Token;
|
||||
import org.antlr.v4.misc.IntervalSet;
|
||||
|
||||
/** A state machine transition label. A label can be either a simple
|
||||
* label such as a token or character. A label can be a set of char or
|
||||
|
@ -73,6 +74,10 @@ public abstract class Label implements /*Comparable, */ Cloneable {
|
|||
*/
|
||||
public static final int EOR_TOKEN_TYPE = Token.EOR_TOKEN_TYPE;
|
||||
|
||||
public int atom = Label.INVALID;
|
||||
public IntervalSet set;
|
||||
|
||||
|
||||
public int compareTo(Object o) {
|
||||
return 0; // TODO: impl
|
||||
}
|
||||
|
|
|
@ -1,10 +1,9 @@
|
|||
package org.antlr.v4.automata;
|
||||
|
||||
import org.antlr.v4.misc.IntervalSet;
|
||||
import org.antlr.v4.tool.Grammar;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
/** Code that embodies the NFA conversion to DFA. A new object is needed
|
||||
* per DFA (also required for thread safety if multiple conversions
|
||||
|
@ -21,6 +20,12 @@ public class NFAToDFAConverter {
|
|||
/** A list of DFA states we still need to process during NFA conversion */
|
||||
List<DFAState> work = new LinkedList<DFAState>();
|
||||
|
||||
/** Used to prevent the closure operation from looping to itself and
|
||||
* hence looping forever. Sensitive to the NFA state, the alt, and
|
||||
* the stack context.
|
||||
*/
|
||||
Set<NFAConfig> closureBusy;
|
||||
|
||||
public static boolean debug = false;
|
||||
|
||||
public NFAToDFAConverter(Grammar g, DecisionState nfaStartState) {
|
||||
|
@ -36,12 +41,94 @@ public class NFAToDFAConverter {
|
|||
|
||||
// while more DFA states to check, process them
|
||||
while ( work.size()>0 ) {
|
||||
|
||||
DFAState d = work.get(0);
|
||||
reach(d);
|
||||
work.remove(0); // we're done with this DFA state
|
||||
}
|
||||
|
||||
return dfa;
|
||||
}
|
||||
|
||||
/** From this node, add a d--a-->t transition for all
|
||||
* labels 'a' where t is a DFA node created
|
||||
* from the set of NFA states reachable from any NFA
|
||||
* state in DFA state d.
|
||||
*/
|
||||
void reach(DFAState d) {
|
||||
OrderedHashSet<IntervalSet> labels = getReachableLabels(d);
|
||||
|
||||
for (IntervalSet label : labels) {
|
||||
DFAState newd = reach(d, label);
|
||||
if ( debug ) {
|
||||
System.out.println("DFA state after reach "+label+" "+d+"-" +
|
||||
label.toString(g)+"->"+newd);
|
||||
}
|
||||
if ( newd==null ) {
|
||||
// nothing was reached by label due to conflict resolution
|
||||
// EOT also seems to be in here occasionally probably due
|
||||
// to an end-of-rule state seeing it even though we'll pop
|
||||
// an invoking state off the state; don't bother to conflict
|
||||
// as this labels set is a covering approximation only.
|
||||
continue;
|
||||
}
|
||||
if ( newd.getUniqueAlt()== NFA.INVALID_ALT_NUMBER ) {
|
||||
// Only compute closure if a unique alt number is not known.
|
||||
// If a unique alternative is mentioned among all NFA
|
||||
// configurations then there is no possibility of needing to look
|
||||
// beyond this state; also no possibility of a nondeterminism.
|
||||
// This optimization May 22, 2006 just dropped -Xint time
|
||||
// for analysis of Java grammar from 11.5s to 2s! Wow.
|
||||
closure(newd); // add any NFA states reachable via epsilon
|
||||
}
|
||||
|
||||
// add if not in DFA yet and then make d-label->t
|
||||
DFAState targetState = newd;
|
||||
DFAState existingState = dfa.addState(newd);
|
||||
if ( newd != existingState ) {
|
||||
// already there...use/return the existing DFA state.
|
||||
targetState = existingState;
|
||||
}
|
||||
|
||||
d.addTransition(new Edge(targetState, label));
|
||||
|
||||
//
|
||||
// numberOfEdgesEmanating +=
|
||||
// addTransition(d, label, targetState, targetToLabelMap);
|
||||
}
|
||||
}
|
||||
|
||||
/** Given the set of NFA states in DFA state d, find all NFA states
|
||||
* reachable traversing label arcs. By definition, there can be
|
||||
* only one DFA state reachable by a single label from DFA state d so we must
|
||||
* find and merge all NFA states reachable via label. Return a new
|
||||
* DFAState that has all of those NFA states with their context.
|
||||
*
|
||||
* Because we cannot jump to another rule nor fall off the end of a rule
|
||||
* via a non-epsilon transition, NFA states reachable from d have the
|
||||
* same configuration as the NFA state in d. So if NFA state 7 in d's
|
||||
* configurations can reach NFA state 13 then 13 will be added to the
|
||||
* new DFAState (labelDFATarget) with the same configuration as state
|
||||
* 7 had.
|
||||
*/
|
||||
public DFAState reach(DFAState d, IntervalSet label) {
|
||||
//System.out.println("reach "+label.toString(dfa.nfa.grammar)+" from "+d.stateNumber);
|
||||
DFAState labelTarget = dfa.newState();
|
||||
|
||||
for (NFAConfig c : d.nfaConfigs) {
|
||||
int n = c.state.getNumberOfTransitions();
|
||||
for (int i=0; i<n; i++) { // for each transition
|
||||
Transition t = c.state.transition(i);
|
||||
// found a transition with label; does it collide with label?
|
||||
if ( !t.isEpsilon() && !t.label().and(label).isNil() ) {
|
||||
// add NFA target to (potentially) new DFA state
|
||||
labelTarget.addNFAConfig(t.target, c.alt, c.context);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return labelTarget;
|
||||
}
|
||||
|
||||
/** From this first NFA state of a decision, create a DFA.
|
||||
* Walk each alt in decision and compute closure from the start of that
|
||||
* rule, making sure that the closure does not include other alts within
|
||||
|
@ -57,10 +144,12 @@ public class NFAToDFAConverter {
|
|||
for (int altNum=1; altNum<=dfa.nAlts; altNum++) {
|
||||
Transition t = nfaStartState.transition(altNum-1);
|
||||
NFAState altStart = t.target;
|
||||
d.addNFAConfig(altStart, altNum+1, null);
|
||||
d.addNFAConfig(altStart, altNum, null);
|
||||
|
||||
}
|
||||
|
||||
System.out.println("before closure start d="+d);
|
||||
|
||||
closure(d);
|
||||
|
||||
return d;
|
||||
|
@ -75,12 +164,16 @@ public class NFAToDFAConverter {
|
|||
System.out.println("closure("+d+")");
|
||||
}
|
||||
|
||||
closureBusy = new HashSet<NFAConfig>();
|
||||
|
||||
List<NFAConfig> configs = new ArrayList<NFAConfig>();
|
||||
for (NFAConfig c : d.nfaConfigs) {
|
||||
closure(c.state, c.alt, c.context, configs);
|
||||
}
|
||||
d.nfaConfigs.addAll(configs); // Add new NFA configs to DFA state d
|
||||
|
||||
closureBusy = null; // wack all that memory used during closure
|
||||
|
||||
System.out.println("after closure d="+d);
|
||||
}
|
||||
|
||||
|
@ -92,15 +185,159 @@ public class NFAToDFAConverter {
|
|||
NFAConfig proposedNFAConfig =
|
||||
new NFAConfig(s, altNum, context);
|
||||
|
||||
if ( closureBusy.contains(proposedNFAConfig) ) {
|
||||
return;
|
||||
}
|
||||
|
||||
closureBusy.add(proposedNFAConfig);
|
||||
|
||||
// p itself is always in closure
|
||||
configs.add(proposedNFAConfig);
|
||||
|
||||
if ( s instanceof RuleStopState ) {
|
||||
// do follow
|
||||
int n = s.getNumberOfTransitions();
|
||||
for (int i=0; i<n; i++) {
|
||||
Transition t = s.transition(i);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
int n = s.getNumberOfTransitions();
|
||||
for (int i=0; i<n; i++) {
|
||||
Transition t = s.transition(i);
|
||||
if ( t.isEpsilon() ) {
|
||||
if ( t instanceof RuleTransition ) {
|
||||
NFAState newContext = context;
|
||||
if ( context==null ) newContext = s; // push new context
|
||||
closure(t.target, altNum, newContext, configs);
|
||||
}
|
||||
else if ( t.isEpsilon() ) {
|
||||
closure(t.target, altNum, context, configs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public OrderedHashSet<IntervalSet> getReachableLabels(DFAState d) {
|
||||
OrderedHashSet<IntervalSet> reachableLabels = new OrderedHashSet<IntervalSet>();
|
||||
for (NFAState s : d.getUniqueNFAStates()) { // for each state
|
||||
int n = s.getNumberOfTransitions();
|
||||
for (int i=0; i<n; i++) { // for each transition
|
||||
Transition t = s.transition(i);
|
||||
IntervalSet label = null;
|
||||
if ( t instanceof AtomTransition ) {
|
||||
label = IntervalSet.of(((AtomTransition)t).label);
|
||||
}
|
||||
else if ( t instanceof SetTransition ) {
|
||||
label = ((SetTransition)t).label;
|
||||
}
|
||||
if ( label!=null ) {
|
||||
addReachableLabel(reachableLabels, label);
|
||||
}
|
||||
}
|
||||
}
|
||||
System.out.println("reachable labels for "+d+"="+reachableLabels);
|
||||
return reachableLabels;
|
||||
}
|
||||
|
||||
/** Add label uniquely and disjointly; intersection with
|
||||
* another set or int/char forces breaking up the set(s).
|
||||
*
|
||||
* Example, if reachable list of labels is [a..z, {k,9}, 0..9],
|
||||
* the disjoint list will be [{a..j,l..z}, k, 9, 0..8].
|
||||
*
|
||||
* As we add NFA configurations to a DFA state, we might as well track
|
||||
* the set of all possible transition labels to make the DFA conversion
|
||||
* more efficient. W/o the reachable labels, we'd need to check the
|
||||
* whole vocabulary space (could be 0..\uFFFE)! The problem is that
|
||||
* labels can be sets, which may overlap with int labels or other sets.
|
||||
* As we need a deterministic set of transitions from any
|
||||
* state in the DFA, we must make the reachable labels set disjoint.
|
||||
* This operation amounts to finding the character classes for this
|
||||
* DFA state whereas with tools like flex, that need to generate a
|
||||
* homogeneous DFA, must compute char classes across all states.
|
||||
* We are going to generate DFAs with heterogeneous states so we
|
||||
* only care that the set of transitions out of a single state is
|
||||
* unique. :)
|
||||
*
|
||||
* The idea for adding a new set, t, is to look for overlap with the
|
||||
* elements of existing list s. Upon overlap, replace
|
||||
* existing set s[i] with two new disjoint sets, s[i]-t and s[i]&t.
|
||||
* (if s[i]-t is nil, don't add). The remainder is t-s[i], which is
|
||||
* what you want to add to the set minus what was already there. The
|
||||
* remainder must then be compared against the i+1..n elements in s
|
||||
* looking for another collision. Each collision results in a smaller
|
||||
* and smaller remainder. Stop when you run out of s elements or
|
||||
* remainder goes to nil. If remainder is non nil when you run out of
|
||||
* s elements, then add remainder to the end.
|
||||
*/
|
||||
protected void addReachableLabel(OrderedHashSet<IntervalSet> reachableLabels,
|
||||
IntervalSet label)
|
||||
{
|
||||
/*
|
||||
System.out.println("addReachableLabel to state "+dfa.decisionNumber+"."+stateNumber+": "+label.getSet().toString(dfa.nfa.grammar));
|
||||
System.out.println("start of add to state "+dfa.decisionNumber+"."+stateNumber+": " +
|
||||
"reachableLabels="+reachableLabels.toString());
|
||||
*/
|
||||
if ( reachableLabels.contains(label) ) { // exact label present
|
||||
return;
|
||||
}
|
||||
IntervalSet remainder = label; // remainder starts out as whole set to add
|
||||
int n = reachableLabels.size(); // only look at initial elements
|
||||
// walk the existing list looking for the collision
|
||||
for (int i=0; i<n; i++) {
|
||||
IntervalSet rl = reachableLabels.get(i);
|
||||
/*
|
||||
System.out.println("comparing ["+i+"]: "+label.toString(dfa.nfa.grammar)+" & "+
|
||||
rl.toString(dfa.nfa.grammar)+"="+
|
||||
intersection.toString(dfa.nfa.grammar));
|
||||
*/
|
||||
IntervalSet intersection = (IntervalSet)label.and(rl);
|
||||
if ( intersection.isNil() ) {
|
||||
continue;
|
||||
}
|
||||
//System.out.println(label+" collides with "+rl);
|
||||
|
||||
// For any (s_i, t) with s_i&t!=nil replace with (s_i-t, s_i&t)
|
||||
// (ignoring s_i-t if nil; don't put in list)
|
||||
|
||||
// Replace existing s_i with intersection since we
|
||||
// know that will always be a non nil character class
|
||||
IntervalSet s_i = rl;
|
||||
reachableLabels.set(i, intersection);
|
||||
|
||||
// Compute s_i-t to see what is in current set and not in incoming
|
||||
IntervalSet existingMinusNewElements = (IntervalSet)s_i.subtract(label);
|
||||
//System.out.println(s_i+"-"+t+"="+existingMinusNewElements);
|
||||
if ( !existingMinusNewElements.isNil() ) {
|
||||
// found a new character class, add to the end (doesn't affect
|
||||
// outer loop duration due to n computation a priori.
|
||||
reachableLabels.add(existingMinusNewElements);
|
||||
}
|
||||
|
||||
/*
|
||||
System.out.println("after collision, " +
|
||||
"reachableLabels="+reachableLabels.toString());
|
||||
*/
|
||||
|
||||
// anything left to add to the reachableLabels?
|
||||
remainder = (IntervalSet)label.subtract(s_i);
|
||||
if ( remainder.isNil() ) {
|
||||
break; // nothing left to add to set. done!
|
||||
}
|
||||
|
||||
label = remainder;
|
||||
}
|
||||
if ( !remainder.isNil() ) {
|
||||
/*
|
||||
System.out.println("before add remainder to state "+dfa.decisionNumber+"."+stateNumber+": " +
|
||||
"reachableLabels="+reachableLabels.toString());
|
||||
System.out.println("remainder state "+dfa.decisionNumber+"."+stateNumber+": "+remainder.toString(dfa.nfa.grammar));
|
||||
*/
|
||||
reachableLabels.add(remainder);
|
||||
}
|
||||
/*
|
||||
System.out.println("#END of add to state "+dfa.decisionNumber+"."+stateNumber+": " +
|
||||
"reachableLabels="+reachableLabels.toString());
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package org.antlr.v4.automata;
|
||||
|
||||
import org.antlr.v4.codegen.Target;
|
||||
import org.antlr.v4.misc.IntervalSet;
|
||||
|
||||
public class RangeTransition extends Transition {
|
||||
public int from;
|
||||
|
@ -10,10 +11,14 @@ public class RangeTransition extends Transition {
|
|||
this.from = from;
|
||||
this.to = to;
|
||||
}
|
||||
|
||||
public int compareTo(Object o) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalSet label() { return IntervalSet.of(from,to); }
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return Target.getANTLRCharLiteralForChar(from)+".."+
|
||||
|
|
|
@ -1,14 +1,13 @@
|
|||
package org.antlr.v4.automata;
|
||||
|
||||
import org.antlr.v4.misc.IntSet;
|
||||
import org.antlr.v4.misc.IntervalSet;
|
||||
|
||||
/** A label containing a set of values */
|
||||
public class SetTransition extends Transition {
|
||||
/** A set of token types or character codes if label==SET */
|
||||
protected IntSet label;
|
||||
protected IntervalSet label;
|
||||
|
||||
public SetTransition(IntSet label) {
|
||||
public SetTransition(IntervalSet label) {
|
||||
if ( label==null ) {
|
||||
this.label = IntervalSet.of(Label.INVALID);
|
||||
return;
|
||||
|
@ -16,6 +15,8 @@ public class SetTransition extends Transition {
|
|||
this.label = label;
|
||||
}
|
||||
|
||||
public IntervalSet label() { return label; }
|
||||
|
||||
public int compareTo(Object o) {
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
package org.antlr.v4.automata;
|
||||
|
||||
import org.antlr.v4.misc.IntervalSet;
|
||||
import org.antlr.v4.tool.Grammar;
|
||||
|
||||
/** An NFA transition between any two NFA states. Subclasses define
|
||||
|
@ -25,5 +26,7 @@ public abstract class Transition implements Comparable {
|
|||
/** Are we epsilon, action, sempred? */
|
||||
public boolean isEpsilon() { return false; }
|
||||
|
||||
public IntervalSet label() { return null; }
|
||||
|
||||
public String toString(Grammar g) { return toString(); }
|
||||
}
|
||||
|
|
|
@ -48,7 +48,7 @@ public class DOTGenerator {
|
|||
Utils.integer(startState.stateNumber));
|
||||
walkRuleNFACreatingDOT(dot, startState);
|
||||
dot.add("rankdir", rankdir);
|
||||
return dot.toString();
|
||||
return dot.render();
|
||||
}
|
||||
|
||||
public String getDOT(DFAState startState) {
|
||||
|
@ -65,24 +65,9 @@ public class DOTGenerator {
|
|||
Boolean.valueOf(Tool.internalOption_ShowNFAConfigsInDFA));
|
||||
walkCreatingDFADOT(dot, (DFAState)startState);
|
||||
dot.add("rankdir", rankdir);
|
||||
return dot.toString();
|
||||
return dot.render();
|
||||
}
|
||||
|
||||
/** Return a String containing a DOT description that, when displayed,
|
||||
* will show the incoming state machine visually. All nodes reachable
|
||||
* from startState will be included.
|
||||
public String getRuleNFADOT(State startState) {
|
||||
// The output DOT graph for visualization
|
||||
ST dot = stlib.getInstanceOf("org/antlr/tool/templates/dot/nfa");
|
||||
|
||||
markedStates = new HashSet();
|
||||
dot.add("startState",
|
||||
Utils.integer(startState.stateNumber));
|
||||
walkRuleNFACreatingDOT(dot, startState);
|
||||
return dot.toString();
|
||||
}
|
||||
*/
|
||||
|
||||
/** Do a depth-first walk of the state machine graph and
|
||||
* fill a DOT description template. Keep filling the
|
||||
* states and edges attributes.
|
||||
|
|
Loading…
Reference in New Issue