forked from jasder/antlr
more work on NFA
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6735]
This commit is contained in:
parent
bf08801022
commit
d7bda19fd1
|
@ -27,13 +27,12 @@
|
||||||
*/
|
*/
|
||||||
package org.antlr.v4.runtime.tree;
|
package org.antlr.v4.runtime.tree;
|
||||||
|
|
||||||
|
import org.antlr.runtime.BitSet;
|
||||||
import org.antlr.runtime.Token;
|
import org.antlr.runtime.Token;
|
||||||
import org.antlr.runtime.tree.BaseTree;
|
import org.antlr.runtime.tree.BaseTree;
|
||||||
import org.antlr.runtime.tree.Tree;
|
import org.antlr.runtime.tree.Tree;
|
||||||
import org.antlr.v4.runtime.tree.gui.ASTViewer;
|
import org.antlr.v4.runtime.tree.gui.ASTViewer;
|
||||||
|
|
||||||
import java.util.BitSet;
|
|
||||||
|
|
||||||
/** A tree node that is wrapper for a Token object. After 3.0 release
|
/** A tree node that is wrapper for a Token object. After 3.0 release
|
||||||
* while building tree rewrite stuff, it became clear that computing
|
* while building tree rewrite stuff, it became clear that computing
|
||||||
* parent and child index is very difficult and cumbersome. Better to
|
* parent and child index is very difficult and cumbersome. Better to
|
||||||
|
@ -183,7 +182,7 @@ public class CommonTree extends BaseTree {
|
||||||
|
|
||||||
// TODO: move to basetree when i settle on how runtime works
|
// TODO: move to basetree when i settle on how runtime works
|
||||||
// TODO: don't include this node!!
|
// TODO: don't include this node!!
|
||||||
/** include this node */
|
// TODO: reuse other method
|
||||||
public CommonTree getFirstDescendantWithType(int type) {
|
public CommonTree getFirstDescendantWithType(int type) {
|
||||||
if ( getType()==type ) return this;
|
if ( getType()==type ) return this;
|
||||||
if ( children==null ) return null;
|
if ( children==null ) return null;
|
||||||
|
@ -196,12 +195,13 @@ public class CommonTree extends BaseTree {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: don't include this node!!
|
||||||
public CommonTree getFirstDescendantWithType(BitSet types) {
|
public CommonTree getFirstDescendantWithType(BitSet types) {
|
||||||
if ( types.get(getType()) ) return this;
|
if ( types.member(getType()) ) return this;
|
||||||
if ( children==null ) return null;
|
if ( children==null ) return null;
|
||||||
for (Object c : children) {
|
for (Object c : children) {
|
||||||
CommonTree t = (CommonTree)c;
|
CommonTree t = (CommonTree)c;
|
||||||
if ( types.get(t.getType()) ) return t;
|
if ( types.member(t.getType()) ) return t;
|
||||||
CommonTree d = t.getFirstDescendantWithType(types);
|
CommonTree d = t.getFirstDescendantWithType(types);
|
||||||
if ( d!=null ) return d;
|
if ( d!=null ) return d;
|
||||||
}
|
}
|
||||||
|
|
|
@ -369,13 +369,23 @@ public class Tool {
|
||||||
}
|
}
|
||||||
|
|
||||||
// BUILD NFA FROM AST
|
// BUILD NFA FROM AST
|
||||||
|
NFAFactory factory = new ParserNFAFactory(g);
|
||||||
|
if ( g.getType()==ANTLRParser.LEXER ) factory = new LexerNFAFactory(g);
|
||||||
|
GrammarAST rules = (GrammarAST)g.ast.getFirstChildWithType(ANTLRParser.RULES);
|
||||||
|
List<GrammarAST> kids = rules.getChildren();
|
||||||
|
for (GrammarAST n : kids) {
|
||||||
|
if ( n.getType()!=ANTLRParser.RULE ) continue;
|
||||||
GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
|
GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
|
||||||
BufferedTreeNodeStream nodes =
|
BufferedTreeNodeStream nodes =
|
||||||
new BufferedTreeNodeStream(adaptor,g.ast);
|
new BufferedTreeNodeStream(adaptor,n);
|
||||||
NFAFactory fac = new ParserNFAFactory(g);
|
NFABuilder b = new NFABuilder(nodes,factory);
|
||||||
if ( g.getType()==ANTLRParser.LEXER ) fac = new LexerNFAFactory(g);
|
try {
|
||||||
NFABuilder nfaBuilder = new NFABuilder(nodes,fac);
|
b.rule();
|
||||||
nfaBuilder.downup(g.ast);
|
}
|
||||||
|
catch (RecognitionException re) {
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// PERFORM GRAMMAR ANALYSIS ON NFA: BUILD DECISION DFAs
|
// PERFORM GRAMMAR ANALYSIS ON NFA: BUILD DECISION DFAs
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package org.antlr.v4.automata;
|
package org.antlr.v4.automata;
|
||||||
|
|
||||||
import org.antlr.v4.misc.IntervalSet;
|
import org.antlr.v4.misc.IntervalSet;
|
||||||
|
import org.antlr.v4.tool.Grammar;
|
||||||
|
|
||||||
/** */
|
/** */
|
||||||
public class AtomTransition extends Transition {
|
public class AtomTransition extends Transition {
|
||||||
|
@ -33,4 +34,8 @@ public class AtomTransition extends Transition {
|
||||||
public int compareTo(Object o) {
|
public int compareTo(Object o) {
|
||||||
return this.label-((AtomTransition)o).label;
|
return this.label-((AtomTransition)o).label;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String toString(Grammar g) {
|
||||||
|
return g.getTokenDisplayName(label);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,4 +11,22 @@ public class BasicState extends NFAState {
|
||||||
public Transition incidentTransition;
|
public Transition incidentTransition;
|
||||||
|
|
||||||
public BasicState(NFA nfa) { super(nfa); }
|
public BasicState(NFA nfa) { super(nfa); }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getNumberOfTransitions() {
|
||||||
|
if ( transition!=null ) return 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void addTransition(Transition e) {
|
||||||
|
if ( transition!=null ) throw new IllegalArgumentException("only one transition");
|
||||||
|
transition = e;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Transition transition(int i) {
|
||||||
|
if ( i>0 ) throw new IllegalArgumentException("only one transition");
|
||||||
|
return transition;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
package org.antlr.v4.automata;
|
package org.antlr.v4.automata;
|
||||||
|
|
||||||
|
import org.antlr.v4.tool.Grammar;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -11,21 +13,61 @@ import java.util.Set;
|
||||||
public class FASerializer {
|
public class FASerializer {
|
||||||
List<State> work;
|
List<State> work;
|
||||||
Set<State> marked;
|
Set<State> marked;
|
||||||
|
Grammar g;
|
||||||
|
State start;
|
||||||
|
|
||||||
public String serialize(State s) {
|
public FASerializer(Grammar g, State start) {
|
||||||
if ( s==null ) return null;
|
this.g = g;
|
||||||
|
this.start = start;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
if ( start==null ) return null;
|
||||||
work = new ArrayList<State>();
|
work = new ArrayList<State>();
|
||||||
marked = new HashSet<State>();
|
marked = new HashSet<State>();
|
||||||
work.add(s);
|
work.add(start);
|
||||||
|
|
||||||
|
StringBuilder buf = new StringBuilder();
|
||||||
|
State s = null;
|
||||||
while ( work.size()>0 ) {
|
while ( work.size()>0 ) {
|
||||||
s = work.remove(work.size()-1); // pop
|
s = work.remove(0);
|
||||||
System.out.println(s);
|
|
||||||
marked.add(s);
|
|
||||||
// add targets
|
|
||||||
int n = s.getNumberOfTransitions();
|
int n = s.getNumberOfTransitions();
|
||||||
for (int i=0; i<n; i++) work.add( s.transition(i).target );
|
//System.out.println("visit "+getStateString(s)+"; edges="+n);
|
||||||
|
marked.add(s);
|
||||||
|
for (int i=0; i<n; i++) {
|
||||||
|
Transition t = s.transition(i);
|
||||||
|
work.add( t.target );
|
||||||
|
buf.append(getStateString(s));
|
||||||
|
if ( t instanceof EpsilonTransition ) {
|
||||||
|
buf.append("->"+getStateString(t.target)+'\n');
|
||||||
}
|
}
|
||||||
return "";
|
else if ( t instanceof RuleTransition ) {
|
||||||
|
buf.append("->"+getStateString(t.target)+'\n');
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
AtomTransition a = (AtomTransition)t;
|
||||||
|
buf.append("-"+a.toString(g)+"->"+getStateString(t.target)+'\n');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return buf.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
String getStateString(State s) {
|
||||||
|
int n = s.stateNumber;
|
||||||
|
String stateStr = ".s"+n;
|
||||||
|
// if ( s instanceof DFAState ) {
|
||||||
|
// stateStr = ":s"+n+"=>"+((DFAState)s).getUniquelyPredictedAlt();
|
||||||
|
// }
|
||||||
|
// else
|
||||||
|
if ( s instanceof StarBlockStartState ) stateStr = "StarBlockStart_"+n;
|
||||||
|
if ( s instanceof PlusBlockStartState ) stateStr = "PlusBlockStart_"+n;
|
||||||
|
if ( s instanceof StarBlockStartState ) stateStr = "StarBlockStart_"+n;
|
||||||
|
if ( s instanceof BlockStartState ) stateStr = "BlockStart_"+n;
|
||||||
|
if ( s instanceof BlockEndState ) stateStr = "BlockEnd_"+n;
|
||||||
|
if ( s instanceof RuleStartState ) stateStr = "RuleStart_"+n;
|
||||||
|
if ( s instanceof RuleStopState ) stateStr = "RuleStop"+n;
|
||||||
|
if ( s instanceof LoopbackState ) stateStr = "LoopBack_"+n;
|
||||||
|
return stateStr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -57,8 +57,8 @@ public abstract class Label implements /*Comparable, */ Cloneable {
|
||||||
*/
|
*/
|
||||||
public static final int MIN_ATOM_VALUE = EOT;
|
public static final int MIN_ATOM_VALUE = EOT;
|
||||||
|
|
||||||
// public static final int MIN_CHAR_VALUE = '\u0000';
|
public static final int MIN_CHAR_VALUE = '\u0000';
|
||||||
// public static final int MAX_CHAR_VALUE = '\uFFFE';
|
public static final int MAX_CHAR_VALUE = '\uFFFE';
|
||||||
|
|
||||||
/** End of rule token type; imaginary token type used only for
|
/** End of rule token type; imaginary token type used only for
|
||||||
* local, partial FOLLOW sets to indicate that the local FOLLOW
|
* local, partial FOLLOW sets to indicate that the local FOLLOW
|
||||||
|
|
|
@ -9,6 +9,13 @@ import java.util.List;
|
||||||
public class NFA {
|
public class NFA {
|
||||||
public Grammar g;
|
public Grammar g;
|
||||||
public List<NFAState> states = new ArrayList<NFAState>();
|
public List<NFAState> states = new ArrayList<NFAState>();
|
||||||
|
|
||||||
|
/** Each subrule/rule is a decision point and we must track them so we
|
||||||
|
* can go back later and build DFA predictors for them. This includes
|
||||||
|
* all the rules, subrules, optional blocks, ()+, ()* etc...
|
||||||
|
*/
|
||||||
|
protected List<NFAState> decisionToNFAState = new ArrayList<NFAState>();
|
||||||
|
|
||||||
int stateNumber = 0;
|
int stateNumber = 0;
|
||||||
|
|
||||||
public NFA(Grammar g) { this.g = g; }
|
public NFA(Grammar g) { this.g = g; }
|
||||||
|
@ -17,4 +24,9 @@ public class NFA {
|
||||||
states.add(state);
|
states.add(state);
|
||||||
state.stateNumber = stateNumber++;
|
state.stateNumber = stateNumber++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int defineDecisionState(NFAState s) {
|
||||||
|
decisionToNFAState.add(s);
|
||||||
|
return decisionToNFAState.size()-1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +0,0 @@
|
||||||
package org.antlr.v4.automata;
|
|
||||||
|
|
||||||
/** */
|
|
||||||
public class OptionalBlockStartState extends BlockStartState {
|
|
||||||
public OptionalBlockStartState(NFA nfa) { super(nfa); }
|
|
||||||
}
|
|
|
@ -2,11 +2,9 @@ package org.antlr.v4.automata;
|
||||||
|
|
||||||
|
|
||||||
import org.antlr.v4.misc.IntSet;
|
import org.antlr.v4.misc.IntSet;
|
||||||
import org.antlr.v4.tool.Grammar;
|
import org.antlr.v4.tool.*;
|
||||||
import org.antlr.v4.tool.GrammarAST;
|
|
||||||
import org.antlr.v4.tool.Rule;
|
|
||||||
import org.antlr.v4.tool.TerminalAST;
|
|
||||||
|
|
||||||
|
import java.lang.reflect.Constructor;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
@ -43,6 +41,20 @@ public class ParserNFAFactory implements NFAFactory {
|
||||||
this.currentRule = g.getRule(name);
|
this.currentRule = g.getRule(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public NFAState newState(Class nodeType, GrammarAST node) {
|
||||||
|
try {
|
||||||
|
Constructor ctor = nodeType.getConstructor(NFA.class);
|
||||||
|
NFAState s = (NFAState)ctor.newInstance(nfa);
|
||||||
|
s.ast = node;
|
||||||
|
nfa.addState(s);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
ErrorManager.internalError("can't create NFA node: "+nodeType.getName(), e);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
public BasicState newState(GrammarAST node) {
|
public BasicState newState(GrammarAST node) {
|
||||||
BasicState n = new BasicState(nfa);
|
BasicState n = new BasicState(nfa);
|
||||||
n.ast = node;
|
n.ast = node;
|
||||||
|
@ -157,18 +169,16 @@ public class ParserNFAFactory implements NFAFactory {
|
||||||
|
|
||||||
/** From A|B|..|Z alternative block build
|
/** From A|B|..|Z alternative block build
|
||||||
*
|
*
|
||||||
* o->o-A->o->o (last NFAState is blockEndNFAState pointed to by all alts)
|
* o->o-A->o->o (last NFAState is BlockEndState pointed to by all alts)
|
||||||
* | ^
|
* | ^
|
||||||
* o->o-B->o--|
|
* |->o-B->o--|
|
||||||
* | |
|
* | |
|
||||||
* ... |
|
* ... |
|
||||||
* | |
|
* | |
|
||||||
* o->o-Z->o--|
|
* |->o-Z->o--|
|
||||||
*
|
*
|
||||||
* So every alternative gets begin NFAState connected by epsilon
|
* So start node points at every alternative with epsilon transition
|
||||||
* and every alt right side points at a block end NFAState. There is a
|
* and every alt right side points at a block end NFAState.
|
||||||
* new NFAState in the NFAState in the handle for each alt plus one for the
|
|
||||||
* end NFAState.
|
|
||||||
*
|
*
|
||||||
* Special case: only one alternative: don't make a block with alt
|
* Special case: only one alternative: don't make a block with alt
|
||||||
* begin/end.
|
* begin/end.
|
||||||
|
@ -176,11 +186,23 @@ public class ParserNFAFactory implements NFAFactory {
|
||||||
* Special case: if just a list of tokens/chars/sets, then collapse
|
* Special case: if just a list of tokens/chars/sets, then collapse
|
||||||
* to a single edge'd o-set->o graph.
|
* to a single edge'd o-set->o graph.
|
||||||
*
|
*
|
||||||
* Set alt number (1..n) in the left-Transition NFAState.
|
* TODO: Set alt number (1..n) in the states?
|
||||||
*/
|
*/
|
||||||
public Handle block(List<Handle> alts) {
|
public Handle block(GrammarAST blkAST, List<Handle> alts) {
|
||||||
System.out.println("block: "+alts);
|
System.out.println("block: "+alts);
|
||||||
return null;
|
if ( alts.size()==1 ) return alts.get(0);
|
||||||
|
|
||||||
|
BlockStartState start = (BlockStartState)newState(BlockStartState.class, blkAST);
|
||||||
|
BlockEndState end = (BlockEndState)newState(BlockEndState.class, blkAST);
|
||||||
|
for (Handle alt : alts) {
|
||||||
|
epsilon(start, alt.left);
|
||||||
|
epsilon(alt.right, end);
|
||||||
|
}
|
||||||
|
Handle h = new Handle(start, end);
|
||||||
|
FASerializer ser = new FASerializer(g, h.left);
|
||||||
|
nfa.defineDecisionState(start);
|
||||||
|
System.out.println(blkAST.toStringTree()+":\n"+ser);
|
||||||
|
return h;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Handle alt(List<Handle> els) {
|
public Handle alt(List<Handle> els) {
|
||||||
|
@ -197,13 +219,27 @@ public class ParserNFAFactory implements NFAFactory {
|
||||||
*
|
*
|
||||||
* or, if A is a block, just add an empty alt to the end of the block
|
* or, if A is a block, just add an empty alt to the end of the block
|
||||||
*/
|
*/
|
||||||
public Handle optional(Handle A) {
|
public Handle optional(GrammarAST optAST, Handle blk) {
|
||||||
OptionalBlockStartState left = new OptionalBlockStartState(nfa);
|
if ( blk.left instanceof BlockStartState ) {
|
||||||
BlockEndState right = new BlockEndState(nfa);
|
epsilon(blk.left, blk.right);
|
||||||
epsilon(left, A.left);
|
FASerializer ser = new FASerializer(g, blk.left);
|
||||||
epsilon(A.right, right);
|
System.out.println(optAST.toStringTree()+":\n"+ser);
|
||||||
epsilon(left, right);
|
return blk;
|
||||||
return new Handle(left, right);
|
}
|
||||||
|
|
||||||
|
// construct block
|
||||||
|
BlockStartState start = (BlockStartState)newState(BlockStartState.class, optAST);
|
||||||
|
BlockEndState end = (BlockEndState)newState(BlockEndState.class, optAST);
|
||||||
|
epsilon(start, blk.left);
|
||||||
|
epsilon(blk.right, end);
|
||||||
|
epsilon(start, end);
|
||||||
|
|
||||||
|
nfa.defineDecisionState(start);
|
||||||
|
|
||||||
|
Handle h = new Handle(start, end);
|
||||||
|
FASerializer ser = new FASerializer(g, h.left);
|
||||||
|
System.out.println(optAST.toStringTree()+":\n"+ser);
|
||||||
|
return h;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** From (A)+ build
|
/** From (A)+ build
|
||||||
|
@ -219,7 +255,7 @@ public class ParserNFAFactory implements NFAFactory {
|
||||||
* During analysis we'll call the follow link (transition 1) alt n+1 for
|
* During analysis we'll call the follow link (transition 1) alt n+1 for
|
||||||
* an n-alt A block.
|
* an n-alt A block.
|
||||||
*/
|
*/
|
||||||
public Handle plus(Handle A) { return null; }
|
public Handle plus(GrammarAST plusAST, Handle blk) { return null; }
|
||||||
|
|
||||||
/** From (A)* build
|
/** From (A)* build
|
||||||
*
|
*
|
||||||
|
@ -251,7 +287,7 @@ public class ParserNFAFactory implements NFAFactory {
|
||||||
* is sufficient to let me make an appropriate enter, exit, loop
|
* is sufficient to let me make an appropriate enter, exit, loop
|
||||||
* determination. See codegen.g
|
* determination. See codegen.g
|
||||||
*/
|
*/
|
||||||
public Handle star(Handle A) { return null; }
|
public Handle star(GrammarAST starAST, Handle blk) { return null; }
|
||||||
|
|
||||||
/** Build an atom with all possible values in its label */
|
/** Build an atom with all possible values in its label */
|
||||||
public Handle wildcard(GrammarAST associatedAST) { return null; }
|
public Handle wildcard(GrammarAST associatedAST) { return null; }
|
||||||
|
|
|
@ -28,4 +28,9 @@ public abstract class State {
|
||||||
if ( o instanceof State ) return this == (State)o;
|
if ( o instanceof State ) return this == (State)o;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return String.valueOf(stateNumber);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,65 @@
|
||||||
|
package org.antlr.v4.codegen;
|
||||||
|
|
||||||
|
import org.antlr.analysis.Label;
|
||||||
|
import org.antlr.tool.ErrorManager;
|
||||||
|
|
||||||
|
/** */
|
||||||
|
public class Target {
|
||||||
|
/** When converting ANTLR char and string literals, here is the
|
||||||
|
* value set of escape chars.
|
||||||
|
*/
|
||||||
|
public static int ANTLRLiteralEscapedCharValue[] = new int[255];
|
||||||
|
|
||||||
|
/** Given a char, we need to be able to show as an ANTLR literal.
|
||||||
|
*/
|
||||||
|
public static String ANTLRLiteralCharValueEscape[] = new String[255];
|
||||||
|
|
||||||
|
static {
|
||||||
|
ANTLRLiteralEscapedCharValue['n'] = '\n';
|
||||||
|
ANTLRLiteralEscapedCharValue['r'] = '\r';
|
||||||
|
ANTLRLiteralEscapedCharValue['t'] = '\t';
|
||||||
|
ANTLRLiteralEscapedCharValue['b'] = '\b';
|
||||||
|
ANTLRLiteralEscapedCharValue['f'] = '\f';
|
||||||
|
ANTLRLiteralEscapedCharValue['\\'] = '\\';
|
||||||
|
ANTLRLiteralEscapedCharValue['\''] = '\'';
|
||||||
|
ANTLRLiteralEscapedCharValue['"'] = '"';
|
||||||
|
ANTLRLiteralCharValueEscape['\n'] = "\\n";
|
||||||
|
ANTLRLiteralCharValueEscape['\r'] = "\\r";
|
||||||
|
ANTLRLiteralCharValueEscape['\t'] = "\\t";
|
||||||
|
ANTLRLiteralCharValueEscape['\b'] = "\\b";
|
||||||
|
ANTLRLiteralCharValueEscape['\f'] = "\\f";
|
||||||
|
ANTLRLiteralCharValueEscape['\\'] = "\\\\";
|
||||||
|
ANTLRLiteralCharValueEscape['\''] = "\\'";
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return a string representing the escaped char for code c. E.g., If c
|
||||||
|
* has value 0x100, you will get "\u0100". ASCII gets the usual
|
||||||
|
* char (non-hex) representation. Control characters are spit out
|
||||||
|
* as unicode. While this is specially set up for returning Java strings,
|
||||||
|
* it can be used by any language target that has the same syntax. :)
|
||||||
|
*/
|
||||||
|
public static String getANTLRCharLiteralForChar(int c) {
|
||||||
|
if ( c< Label.MIN_CHAR_VALUE ) {
|
||||||
|
ErrorManager.internalError("invalid char value "+c);
|
||||||
|
return "'<INVALID>'";
|
||||||
|
}
|
||||||
|
if ( c<ANTLRLiteralCharValueEscape.length && ANTLRLiteralCharValueEscape[c]!=null ) {
|
||||||
|
return '\''+ANTLRLiteralCharValueEscape[c]+'\'';
|
||||||
|
}
|
||||||
|
if ( Character.UnicodeBlock.of((char)c)==Character.UnicodeBlock.BASIC_LATIN &&
|
||||||
|
!Character.isISOControl((char)c) ) {
|
||||||
|
if ( c=='\\' ) {
|
||||||
|
return "'\\\\'";
|
||||||
|
}
|
||||||
|
if ( c=='\'') {
|
||||||
|
return "'\\''";
|
||||||
|
}
|
||||||
|
return '\''+Character.toString((char)c)+'\'';
|
||||||
|
}
|
||||||
|
// turn on the bit above max "\uFFFF" value so that we pad with zeros
|
||||||
|
// then only take last 4 digits
|
||||||
|
String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5);
|
||||||
|
String unicodeStr = "'\\u"+hex+"'";
|
||||||
|
return unicodeStr;
|
||||||
|
}
|
||||||
|
}
|
|
@ -29,7 +29,7 @@ options {
|
||||||
language = Java;
|
language = Java;
|
||||||
tokenVocab = ANTLRParser;
|
tokenVocab = ANTLRParser;
|
||||||
ASTLabelType = GrammarAST;
|
ASTLabelType = GrammarAST;
|
||||||
filter = true;
|
// filter = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Include the copyright in this source and also the generated source
|
// Include the copyright in this source and also the generated source
|
||||||
|
@ -84,13 +84,13 @@ bottomup
|
||||||
;
|
;
|
||||||
|
|
||||||
rule returns [NFAFactory.Handle p]
|
rule returns [NFAFactory.Handle p]
|
||||||
: ^(RULE name=ID .+) {factory.setCurrentRuleName($name.text);}
|
: ^(RULE name=ID ~BLOCK* block) {factory.setCurrentRuleName($name.text);}
|
||||||
;
|
;
|
||||||
|
|
||||||
block returns [NFAFactory.Handle p]
|
block returns [NFAFactory.Handle p]
|
||||||
@init {List<NFAFactory.Handle> alts = new ArrayList<NFAFactory.Handle>();}
|
@init {List<NFAFactory.Handle> alts = new ArrayList<NFAFactory.Handle>();}
|
||||||
: ^(BLOCK ~ALT* (a=alternative {alts.add($a.p);})+)
|
: ^(BLOCK ~ALT* (a=alternative {alts.add($a.p);})+)
|
||||||
{factory.block(alts);}
|
{$p = factory.block($BLOCK, alts);}
|
||||||
;
|
;
|
||||||
|
|
||||||
alternative returns [NFAFactory.Handle p]
|
alternative returns [NFAFactory.Handle p]
|
||||||
|
@ -125,9 +125,9 @@ treeSpec returns [NFAFactory.Handle p]
|
||||||
|
|
||||||
ebnf returns [NFAFactory.Handle p]
|
ebnf returns [NFAFactory.Handle p]
|
||||||
: ^(astBlockSuffix block) {$p = $block.p;}
|
: ^(astBlockSuffix block) {$p = $block.p;}
|
||||||
| ^(OPTIONAL block) {$p = factory.optional($block.p);}
|
| ^(OPTIONAL block) {$p = factory.optional($start, $block.p);}
|
||||||
| ^(CLOSURE block) {$p = factory.star($block.p);}
|
| ^(CLOSURE block) {$p = factory.star($start, $block.p);}
|
||||||
| ^(POSITIVE_CLOSURE block) {$p = factory.plus($block.p);}
|
| ^(POSITIVE_CLOSURE block) {$p = factory.plus($start, $block.p);}
|
||||||
| block {$p = $block.p;}
|
| block {$p = $block.p;}
|
||||||
;
|
;
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -4,6 +4,7 @@ import org.antlr.runtime.*;
|
||||||
import org.antlr.runtime.tree.TreeWizard;
|
import org.antlr.runtime.tree.TreeWizard;
|
||||||
import org.antlr.v4.Tool;
|
import org.antlr.v4.Tool;
|
||||||
import org.antlr.v4.automata.Label;
|
import org.antlr.v4.automata.Label;
|
||||||
|
import org.antlr.v4.codegen.Target;
|
||||||
import org.antlr.v4.parse.ANTLRLexer;
|
import org.antlr.v4.parse.ANTLRLexer;
|
||||||
import org.antlr.v4.parse.ANTLRParser;
|
import org.antlr.v4.parse.ANTLRParser;
|
||||||
import org.antlr.v4.parse.GrammarASTAdaptor;
|
import org.antlr.v4.parse.GrammarASTAdaptor;
|
||||||
|
@ -65,6 +66,8 @@ public class Grammar implements AttributeResolver {
|
||||||
* field will have entries both mapped to 35.
|
* field will have entries both mapped to 35.
|
||||||
*/
|
*/
|
||||||
public Map<String, Integer> stringLiteralToTypeMap = new LinkedHashMap<String, Integer>();
|
public Map<String, Integer> stringLiteralToTypeMap = new LinkedHashMap<String, Integer>();
|
||||||
|
/** Reverse index for stringLiteralToTypeMap */
|
||||||
|
public Vector<String> typeToStringLiteralList = new Vector<String>();
|
||||||
|
|
||||||
/** Map a token type to its token name.
|
/** Map a token type to its token name.
|
||||||
* Must subtract MIN_TOKEN_TYPE from index.
|
* Must subtract MIN_TOKEN_TYPE from index.
|
||||||
|
@ -82,6 +85,8 @@ public class Grammar implements AttributeResolver {
|
||||||
public Map<String, String> options;
|
public Map<String, String> options;
|
||||||
|
|
||||||
public Map<String, AttributeDict> scopes = new LinkedHashMap<String, AttributeDict>();
|
public Map<String, AttributeDict> scopes = new LinkedHashMap<String, AttributeDict>();
|
||||||
|
public static final String AUTO_GENERATED_TOKEN_NAME_PREFIX = "T__";
|
||||||
|
|
||||||
|
|
||||||
public Grammar(Tool tool, GrammarRootAST ast) {
|
public Grammar(Tool tool, GrammarRootAST ast) {
|
||||||
if ( ast==null ) throw new IllegalArgumentException("can't pass null tree");
|
if ( ast==null ) throw new IllegalArgumentException("can't pass null tree");
|
||||||
|
@ -284,7 +289,7 @@ public class Grammar implements AttributeResolver {
|
||||||
|
|
||||||
public String getStringLiteralLexerRuleName(String lit) {
|
public String getStringLiteralLexerRuleName(String lit) {
|
||||||
int ttype = getTokenType(lit);
|
int ttype = getTokenType(lit);
|
||||||
return "T__"+ttype;
|
return AUTO_GENERATED_TOKEN_NAME_PREFIX +ttype;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Return grammar directly imported by this grammar */
|
/** Return grammar directly imported by this grammar */
|
||||||
|
@ -308,6 +313,44 @@ public class Grammar implements AttributeResolver {
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Given a token type, get a meaningful name for it such as the ID
|
||||||
|
* or string literal. If this is a lexer and the ttype is in the
|
||||||
|
* char vocabulary, compute an ANTLR-valid (possibly escaped) char literal.
|
||||||
|
*/
|
||||||
|
public String getTokenDisplayName(int ttype) {
|
||||||
|
String tokenName = null;
|
||||||
|
int index=0;
|
||||||
|
// inside any target's char range and is lexer grammar?
|
||||||
|
if ( getType()==ANTLRParser.LEXER &&
|
||||||
|
ttype >= Label.MIN_CHAR_VALUE && ttype <= Label.MAX_CHAR_VALUE )
|
||||||
|
{
|
||||||
|
return Target.getANTLRCharLiteralForChar(ttype);
|
||||||
|
}
|
||||||
|
// faux label?
|
||||||
|
else if ( ttype<0 ) {
|
||||||
|
tokenName = typeToTokenList.get(Label.NUM_FAUX_LABELS+ttype);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// compute index in typeToTokenList for ttype
|
||||||
|
index = ttype-1; // normalize to 0..n-1
|
||||||
|
index += Label.NUM_FAUX_LABELS; // jump over faux tokens
|
||||||
|
|
||||||
|
if ( index<typeToTokenList.size() ) {
|
||||||
|
tokenName = typeToTokenList.get(index);
|
||||||
|
if ( tokenName!=null &&
|
||||||
|
tokenName.startsWith(AUTO_GENERATED_TOKEN_NAME_PREFIX) )
|
||||||
|
{
|
||||||
|
tokenName = typeToStringLiteralList.get(ttype);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
tokenName = String.valueOf(ttype);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//System.out.println("getTokenDisplayName ttype="+ttype+", index="+index+", name="+tokenName);
|
||||||
|
return tokenName;
|
||||||
|
}
|
||||||
|
|
||||||
/** Return a new unique integer in the token type space */
|
/** Return a new unique integer in the token type space */
|
||||||
public int getNewTokenType() {
|
public int getNewTokenType() {
|
||||||
maxTokenType++;
|
maxTokenType++;
|
||||||
|
@ -333,6 +376,12 @@ public class Grammar implements AttributeResolver {
|
||||||
if ( !stringLiteralToTypeMap.containsKey(lit) ) {
|
if ( !stringLiteralToTypeMap.containsKey(lit) ) {
|
||||||
int ttype = getNewTokenType();
|
int ttype = getNewTokenType();
|
||||||
stringLiteralToTypeMap.put(lit, ttype);
|
stringLiteralToTypeMap.put(lit, ttype);
|
||||||
|
// track in reverse index too
|
||||||
|
if ( ttype>=typeToStringLiteralList.size() ) {
|
||||||
|
typeToStringLiteralList.setSize(ttype+1);
|
||||||
|
}
|
||||||
|
typeToStringLiteralList.set(ttype, text);
|
||||||
|
|
||||||
setTokenForType(ttype, lit);
|
setTokenForType(ttype, lit);
|
||||||
return ttype;
|
return ttype;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,11 +1,16 @@
|
||||||
package org.antlr.v4.tool;
|
package org.antlr.v4.tool;
|
||||||
|
|
||||||
|
import org.antlr.runtime.BitSet;
|
||||||
import org.antlr.runtime.CommonToken;
|
import org.antlr.runtime.CommonToken;
|
||||||
import org.antlr.runtime.Token;
|
import org.antlr.runtime.Token;
|
||||||
import org.antlr.runtime.tree.Tree;
|
import org.antlr.runtime.tree.Tree;
|
||||||
import org.antlr.v4.parse.ANTLRParser;
|
import org.antlr.v4.parse.ANTLRParser;
|
||||||
import org.antlr.v4.runtime.tree.CommonTree;
|
import org.antlr.v4.runtime.tree.CommonTree;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
public class GrammarAST extends CommonTree {
|
public class GrammarAST extends CommonTree {
|
||||||
public GrammarAST() {;}
|
public GrammarAST() {;}
|
||||||
public GrammarAST(Token t) { super(t); }
|
public GrammarAST(Token t) { super(t); }
|
||||||
|
@ -18,6 +23,23 @@ public class GrammarAST extends CommonTree {
|
||||||
t.setText(text);
|
t.setText(text);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<GrammarAST> getNodesWithType(int ttype) {
|
||||||
|
return getNodesWithType(BitSet.of(ttype));
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<GrammarAST> getNodesWithType(BitSet types) {
|
||||||
|
List<GrammarAST> nodes = new ArrayList<GrammarAST>();
|
||||||
|
List<GrammarAST> work = new LinkedList<GrammarAST>();
|
||||||
|
work.add(this);
|
||||||
|
GrammarAST t = null;
|
||||||
|
while ( work.size()>0 ) {
|
||||||
|
t = work.remove(0);
|
||||||
|
if ( types.member(t.getType()) ) nodes.add(this);
|
||||||
|
work.addAll(children);
|
||||||
|
}
|
||||||
|
return nodes;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Tree dupNode() {
|
public Tree dupNode() {
|
||||||
return new GrammarAST(this);
|
return new GrammarAST(this);
|
||||||
|
|
|
@ -1,15 +1,14 @@
|
||||||
package org.antlr.v4.tool;
|
package org.antlr.v4.tool;
|
||||||
|
|
||||||
|
import org.antlr.runtime.BitSet;
|
||||||
import org.antlr.v4.parse.ANTLRParser;
|
import org.antlr.v4.parse.ANTLRParser;
|
||||||
|
|
||||||
import java.util.BitSet;
|
|
||||||
|
|
||||||
public class LabelElementPair {
|
public class LabelElementPair {
|
||||||
public static final BitSet tokenTypeForTokens = new BitSet();
|
public static final BitSet tokenTypeForTokens = new BitSet();
|
||||||
static {
|
static {
|
||||||
tokenTypeForTokens.set(ANTLRParser.TOKEN_REF);
|
tokenTypeForTokens.add(ANTLRParser.TOKEN_REF);
|
||||||
tokenTypeForTokens.set(ANTLRParser.STRING_LITERAL);
|
tokenTypeForTokens.add(ANTLRParser.STRING_LITERAL);
|
||||||
tokenTypeForTokens.set(ANTLRParser.WILDCARD);
|
tokenTypeForTokens.add(ANTLRParser.WILDCARD);
|
||||||
}
|
}
|
||||||
|
|
||||||
public GrammarAST label;
|
public GrammarAST label;
|
||||||
|
|
Loading…
Reference in New Issue