adding new files
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 8658]
This commit is contained in:
parent
72ee89294f
commit
2ddeb7c769
|
@ -27,9 +27,8 @@
|
||||||
*/
|
*/
|
||||||
package org.antlr.v4.runtime;
|
package org.antlr.v4.runtime;
|
||||||
|
|
||||||
import org.antlr.v4.analysis.ATNConfig;
|
|
||||||
import org.antlr.v4.misc.*;
|
import org.antlr.v4.misc.*;
|
||||||
import org.antlr.v4.runtime.atn.ParserInterpreter;
|
import org.antlr.v4.runtime.atn.*;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
package org.antlr.v4.runtime;
|
package org.antlr.v4.runtime;
|
||||||
|
|
||||||
import org.antlr.v4.analysis.ATNConfig;
|
|
||||||
import org.antlr.v4.misc.OrderedHashSet;
|
import org.antlr.v4.misc.OrderedHashSet;
|
||||||
|
import org.antlr.v4.runtime.atn.ATNConfig;
|
||||||
|
|
||||||
public class LexerNoViableAltException extends LexerRecognitionExeption {
|
public class LexerNoViableAltException extends LexerRecognitionExeption {
|
||||||
/** Prediction began at what input index? */
|
/** Prediction began at what input index? */
|
||||||
|
|
|
@ -27,8 +27,8 @@
|
||||||
*/
|
*/
|
||||||
package org.antlr.v4.runtime;
|
package org.antlr.v4.runtime;
|
||||||
|
|
||||||
import org.antlr.v4.analysis.ATNConfig;
|
|
||||||
import org.antlr.v4.misc.OrderedHashSet;
|
import org.antlr.v4.misc.OrderedHashSet;
|
||||||
|
import org.antlr.v4.runtime.atn.ATNConfig;
|
||||||
|
|
||||||
public class NoViableAltException extends RecognitionException {
|
public class NoViableAltException extends RecognitionException {
|
||||||
/** Prediction began at what input index? */
|
/** Prediction began at what input index? */
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
package org.antlr.v4.runtime.atn;
|
package org.antlr.v4.runtime.atn;
|
||||||
|
|
||||||
import org.antlr.v4.analysis.LL1Analyzer;
|
|
||||||
import org.antlr.v4.automata.ATNSerializer;
|
import org.antlr.v4.automata.ATNSerializer;
|
||||||
import org.antlr.v4.misc.*;
|
import org.antlr.v4.misc.*;
|
||||||
import org.antlr.v4.runtime.RuleContext;
|
import org.antlr.v4.runtime.RuleContext;
|
||||||
|
|
|
@ -0,0 +1,124 @@
|
||||||
|
package org.antlr.v4.runtime.atn;
|
||||||
|
|
||||||
|
import org.antlr.v4.runtime.*;
|
||||||
|
|
||||||
|
/** An ATN state, predicted alt, and syntactic/semantic context.
|
||||||
|
* The syntactic context is a pointer into the rule invocation
|
||||||
|
* chain used to arrive at the state. The semantic context is
|
||||||
|
* the unordered set semantic predicates encountered before reaching
|
||||||
|
* an ATN state.
|
||||||
|
*/
|
||||||
|
public class ATNConfig {
|
||||||
|
/** The ATN state associated with this configuration */
|
||||||
|
public ATNState state;
|
||||||
|
|
||||||
|
/** What alt (or lexer rule) is predicted by this configuration */
|
||||||
|
public int alt;
|
||||||
|
|
||||||
|
/** The stack of invoking states leading to the rule/states associated
|
||||||
|
* wit this config.
|
||||||
|
*/
|
||||||
|
public RuleContext context;
|
||||||
|
|
||||||
|
/**
|
||||||
|
Indicates that we have reached this ATN configuration after
|
||||||
|
traversing a predicate transition. This is important because we
|
||||||
|
cannot cache DFA states derived from such configurations
|
||||||
|
otherwise predicates would not get executed again (DFAs don't
|
||||||
|
have predicated edges in v4).
|
||||||
|
*/
|
||||||
|
public boolean traversedPredicate;
|
||||||
|
|
||||||
|
/**
|
||||||
|
Indicates that we have reached this ATN configuration after
|
||||||
|
traversing a non-force action transition. We do not execute
|
||||||
|
predicates after such actions because the predicates could be
|
||||||
|
functions of the side effects. Force actions must be either side
|
||||||
|
effect free or automatically undone as the parse continues.
|
||||||
|
*/
|
||||||
|
public boolean traversedAction;
|
||||||
|
|
||||||
|
public ATNConfig(ATNState state,
|
||||||
|
int alt,
|
||||||
|
RuleContext context)
|
||||||
|
{
|
||||||
|
this.state = state;
|
||||||
|
this.alt = alt;
|
||||||
|
this.context = context;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ATNConfig(ATNConfig c) {
|
||||||
|
this.state = c.state;
|
||||||
|
this.alt = c.alt;
|
||||||
|
this.context = c.context;
|
||||||
|
this.traversedPredicate = c.traversedPredicate;
|
||||||
|
this.traversedAction = c.traversedAction;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ATNConfig(ATNConfig c, ATNState state) {
|
||||||
|
this(c);
|
||||||
|
this.state = state;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ATNConfig(ATNConfig c, ATNState state, RuleContext context) {
|
||||||
|
this(c);
|
||||||
|
this.state = state;
|
||||||
|
this.context = context;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ATNConfig(ATNConfig c, RuleContext context) {
|
||||||
|
this(c);
|
||||||
|
this.context = context;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** An ATN configuration is equal to another if both have
|
||||||
|
* the same state, they predict the same alternative, and
|
||||||
|
* syntactic/semantic contexts are the same.
|
||||||
|
*/
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if ( o==null ) return false;
|
||||||
|
if ( this==o ) return true;
|
||||||
|
ATNConfig other = (ATNConfig)o;
|
||||||
|
return this.state.stateNumber==other.state.stateNumber &&
|
||||||
|
this.alt==other.alt &&
|
||||||
|
(this.context==other.context ||
|
||||||
|
this.context.equals(other.context));
|
||||||
|
}
|
||||||
|
|
||||||
|
public int hashCode() {
|
||||||
|
if ( state==null ) {
|
||||||
|
System.out.println("eh?");
|
||||||
|
}
|
||||||
|
int h = state.stateNumber + alt;
|
||||||
|
if ( context!=null ) h += context.hashCode();
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return toString(null, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString(Recognizer<?,?> recog, boolean showAlt) {
|
||||||
|
StringBuffer buf = new StringBuffer();
|
||||||
|
if ( state.ruleIndex>0 ) {
|
||||||
|
if ( recog!=null ) buf.append(recog.getRuleNames()[state.ruleIndex]+":");
|
||||||
|
else buf.append(state.ruleIndex+":");
|
||||||
|
}
|
||||||
|
buf.append(state);
|
||||||
|
if ( showAlt ) {
|
||||||
|
buf.append("|");
|
||||||
|
buf.append(alt);
|
||||||
|
}
|
||||||
|
if ( context!=null ) {
|
||||||
|
buf.append("|");
|
||||||
|
buf.append(context);
|
||||||
|
}
|
||||||
|
// if (isAccept) {
|
||||||
|
// buf.append("|=>"+alt);
|
||||||
|
// }
|
||||||
|
// if ( context.approximated ) {
|
||||||
|
// buf.append("|approx");
|
||||||
|
// }
|
||||||
|
return buf.toString();
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,10 +1,9 @@
|
||||||
package org.antlr.v4.runtime.atn;
|
package org.antlr.v4.runtime.atn;
|
||||||
|
|
||||||
import org.antlr.v4.analysis.ATNConfig;
|
|
||||||
import org.antlr.v4.misc.*;
|
import org.antlr.v4.misc.*;
|
||||||
import org.antlr.v4.parse.ANTLRParser;
|
import org.antlr.v4.parse.ANTLRParser;
|
||||||
import org.antlr.v4.runtime.dfa.*;
|
import org.antlr.v4.runtime.dfa.*;
|
||||||
import org.antlr.v4.tool.*;
|
import org.antlr.v4.tool.Grammar;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
|
@ -1,75 +0,0 @@
|
||||||
package org.antlr.v4.runtime.atn;
|
|
||||||
|
|
||||||
/** Identical to ANTLR's static grammar analysis ATNContext object */
|
|
||||||
public class ATNStack {
|
|
||||||
public static final ATNStack EMPTY = new ATNStack(null, -1);
|
|
||||||
|
|
||||||
public ATNStack parent;
|
|
||||||
|
|
||||||
/** The ATN state following state that invoked another rule's start state
|
|
||||||
* is recorded on the rule invocation context stack.
|
|
||||||
*/
|
|
||||||
public int returnAddr;
|
|
||||||
|
|
||||||
/** Computing the hashCode is very expensive and ATN.addToClosure()
|
|
||||||
* uses it to track when it's seen a state|ctx before to avoid
|
|
||||||
* infinite loops. As we add new contexts, record the hash code
|
|
||||||
* as this + parent.cachedHashCode. Avoids walking
|
|
||||||
* up the tree for every hashCode(). Note that this caching works
|
|
||||||
* because a context is a monotonically growing tree of context nodes
|
|
||||||
* and nothing on the stack is ever modified...ctx just grows
|
|
||||||
* or shrinks.
|
|
||||||
*/
|
|
||||||
protected int cachedHashCode;
|
|
||||||
|
|
||||||
public ATNStack(ATNStack parent, int returnAddr) {
|
|
||||||
this.parent = parent;
|
|
||||||
this.returnAddr = returnAddr;
|
|
||||||
if ( returnAddr >= 0 ) {
|
|
||||||
this.cachedHashCode = returnAddr;
|
|
||||||
}
|
|
||||||
if ( parent!=null ) {
|
|
||||||
this.cachedHashCode += parent.cachedHashCode;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public int hashCode() { return cachedHashCode; }
|
|
||||||
|
|
||||||
/** Two contexts are equals() if both have
|
|
||||||
* same call stack; walk upwards to the root.
|
|
||||||
* Recall that the root sentinel node has no parent.
|
|
||||||
* Note that you may be comparing contextsv in different alt trees.
|
|
||||||
*/
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
ATNStack other = ((ATNStack)o);
|
|
||||||
if ( this.cachedHashCode != other.cachedHashCode ) {
|
|
||||||
return false; // can't be same if hash is different
|
|
||||||
}
|
|
||||||
if ( this==other ) return true;
|
|
||||||
|
|
||||||
// System.out.println("comparing "+this+" with "+other);
|
|
||||||
ATNStack sp = this;
|
|
||||||
while ( sp.parent!=null && other.parent!=null ) {
|
|
||||||
if ( sp.returnAddr != other.returnAddr) return false;
|
|
||||||
sp = sp.parent;
|
|
||||||
other = other.parent;
|
|
||||||
}
|
|
||||||
if ( !(sp.parent==null && other.parent==null) ) {
|
|
||||||
return false; // both pointers must be at their roots after walk
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
StringBuffer buf = new StringBuffer();
|
|
||||||
ATNStack sp = this;
|
|
||||||
buf.append("[");
|
|
||||||
while ( sp.parent!=null ) {
|
|
||||||
buf.append(sp.returnAddr);
|
|
||||||
buf.append(" ");
|
|
||||||
sp = sp.parent;
|
|
||||||
}
|
|
||||||
buf.append("$]");
|
|
||||||
return buf.toString();
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,6 +1,5 @@
|
||||||
package org.antlr.v4.runtime.atn;
|
package org.antlr.v4.runtime.atn;
|
||||||
|
|
||||||
import org.antlr.v4.analysis.ATNConfig;
|
|
||||||
import org.antlr.v4.misc.OrderedHashSet;
|
import org.antlr.v4.misc.OrderedHashSet;
|
||||||
import org.antlr.v4.runtime.*;
|
import org.antlr.v4.runtime.*;
|
||||||
import org.antlr.v4.runtime.dfa.*;
|
import org.antlr.v4.runtime.dfa.*;
|
||||||
|
|
|
@ -1,11 +1,8 @@
|
||||||
package org.antlr.v4.runtime.atn;
|
package org.antlr.v4.runtime.atn;
|
||||||
|
|
||||||
import org.antlr.runtime.CharStream;
|
|
||||||
import org.antlr.v4.analysis.ATNConfig;
|
|
||||||
import org.antlr.v4.misc.*;
|
import org.antlr.v4.misc.*;
|
||||||
import org.antlr.v4.runtime.*;
|
import org.antlr.v4.runtime.*;
|
||||||
import org.antlr.v4.runtime.dfa.*;
|
import org.antlr.v4.runtime.dfa.*;
|
||||||
import org.antlr.v4.tool.DOTGenerator;
|
|
||||||
import org.stringtemplate.v4.misc.MultiMap;
|
import org.stringtemplate.v4.misc.MultiMap;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
@ -37,7 +34,7 @@ public class ParserInterpreter extends ATNInterpreter {
|
||||||
this.parser = parser;
|
this.parser = parser;
|
||||||
ctxToDFAs = new HashMap<RuleContext, DFA[]>();
|
ctxToDFAs = new HashMap<RuleContext, DFA[]>();
|
||||||
decisionToDFA = new DFA[atn.getNumberOfDecisions()+1];
|
decisionToDFA = new DFA[atn.getNumberOfDecisions()+1];
|
||||||
DOTGenerator dot = new DOTGenerator(null);
|
// DOTGenerator dot = new DOTGenerator(null);
|
||||||
// System.out.println(dot.getDOT(atn.rules.get(0), parser.getRuleNames()));
|
// System.out.println(dot.getDOT(atn.rules.get(0), parser.getRuleNames()));
|
||||||
// System.out.println(dot.getDOT(atn.rules.get(1), parser.getRuleNames()));
|
// System.out.println(dot.getDOT(atn.rules.get(1), parser.getRuleNames()));
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,7 +38,6 @@ public class DFA {
|
||||||
public Map<DFAState, DFAState> states = new LinkedHashMap<DFAState, DFAState>();
|
public Map<DFAState, DFAState> states = new LinkedHashMap<DFAState, DFAState>();
|
||||||
public DFAState s0;
|
public DFAState s0;
|
||||||
public int decision;
|
public int decision;
|
||||||
// public int maxTokenType;
|
|
||||||
|
|
||||||
/** From which ATN state did we create this DFA? */
|
/** From which ATN state did we create this DFA? */
|
||||||
public ATNState atnStartState;
|
public ATNState atnStartState;
|
||||||
|
@ -49,47 +48,6 @@ public class DFA {
|
||||||
public boolean conflict;
|
public boolean conflict;
|
||||||
|
|
||||||
public DFA(ATNState atnStartState) { this.atnStartState = atnStartState; }
|
public DFA(ATNState atnStartState) { this.atnStartState = atnStartState; }
|
||||||
// public DFA(int maxTokenType) { this.maxTokenType = maxTokenType; }
|
|
||||||
|
|
||||||
/*
|
|
||||||
public void addAll(Collection<DFAState> states) {
|
|
||||||
for (DFAState p : states) {
|
|
||||||
//addDFAEdge(p, t, q);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void addDFAEdge(OrderedHashSet<ATNConfig> p,
|
|
||||||
int t,
|
|
||||||
OrderedHashSet<ATNConfig> q)
|
|
||||||
{
|
|
||||||
// System.out.println("MOVE "+p+" -> "+q+" upon "+getTokenName(t));
|
|
||||||
DFAState from = addDFAState(p);
|
|
||||||
DFAState to = addDFAState(q);
|
|
||||||
addDFAEdge(from, t, to);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void addDFAEdge(DFAState p, int t, DFAState q) {
|
|
||||||
if ( p.edges==null ) {
|
|
||||||
p.edges = new DFAState[maxTokenType+1]; // TODO: make adaptive
|
|
||||||
}
|
|
||||||
p.edges[t] = q; // connect
|
|
||||||
}
|
|
||||||
|
|
||||||
protected DFAState addDFAState(OrderedHashSet<ATNConfig> configs) {
|
|
||||||
DFAState proposed = new DFAState(configs);
|
|
||||||
DFAState existing = states.get(proposed);
|
|
||||||
DFAState p;
|
|
||||||
if ( existing!=null ) p = existing;
|
|
||||||
else {
|
|
||||||
proposed.stateNumber = states.size();
|
|
||||||
proposed.configs = new OrderedHashSet<ATNConfig>();
|
|
||||||
proposed.configs.addAll(configs);
|
|
||||||
states.put(proposed, proposed);
|
|
||||||
p = proposed;
|
|
||||||
}
|
|
||||||
return p;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
public String toString() { return toString(null); }
|
public String toString() { return toString(null); }
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
package org.antlr.v4.runtime.dfa;
|
package org.antlr.v4.runtime.dfa;
|
||||||
|
|
||||||
import org.antlr.v4.analysis.ATNConfig;
|
|
||||||
import org.antlr.v4.misc.OrderedHashSet;
|
import org.antlr.v4.misc.OrderedHashSet;
|
||||||
import org.antlr.v4.runtime.RuleContext;
|
import org.antlr.v4.runtime.RuleContext;
|
||||||
|
import org.antlr.v4.runtime.atn.ATNConfig;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
|
@ -1,219 +0,0 @@
|
||||||
package org.antlr.v4.runtime.misc;
|
|
||||||
|
|
||||||
import org.antlr.v4.runtime.Token;
|
|
||||||
|
|
||||||
/** */
|
|
||||||
public class LABitSet implements Cloneable {
|
|
||||||
public final static int BITS = 64; // number of bits / long
|
|
||||||
public final static int LOG_BITS = 6; // 2^6 == 64
|
|
||||||
|
|
||||||
/* We will often need to do a mod operator (i mod nbits). Its
|
|
||||||
* turns out that, for powers of two, this mod operation is
|
|
||||||
* same as (i & (nbits-1)). Since mod is slow, we use a
|
|
||||||
* precomputed mod mask to do the mod instead.
|
|
||||||
*/
|
|
||||||
public final static int MOD_MASK = BITS - 1;
|
|
||||||
|
|
||||||
public static final LABitSet EOF_SET = LABitSet.of(Token.EOF);
|
|
||||||
|
|
||||||
/** The actual data bits */
|
|
||||||
public long bits[];
|
|
||||||
|
|
||||||
public boolean EOF; // is EOF in set (-1)?
|
|
||||||
|
|
||||||
/** Construct a bitset of size one word (64 bits) */
|
|
||||||
public LABitSet() {
|
|
||||||
this(BITS);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Construct a bitset given the size
|
|
||||||
* @param nbits The size of the bitset in bits
|
|
||||||
*/
|
|
||||||
public LABitSet(int nbits) {
|
|
||||||
bits = new long[((nbits - 1) >> LOG_BITS) + 1];
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Construction from a static array of longs */
|
|
||||||
public LABitSet(long[] bits_) {
|
|
||||||
if ( bits_==null || bits_.length==0 ) bits = new long[1];
|
|
||||||
else bits = bits_;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Construction from a static array of longs */
|
|
||||||
public LABitSet(long[] bits_, boolean EOF) {
|
|
||||||
this(bits_);
|
|
||||||
this.EOF = EOF;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static LABitSet of(int el) {
|
|
||||||
LABitSet s = new LABitSet(el + 1);
|
|
||||||
s.add(el);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** or this element into this set (grow as necessary to accommodate) */
|
|
||||||
public void add(int el) {
|
|
||||||
//System.out.println("add("+el+")");
|
|
||||||
if ( el==Token.EOF ) { EOF = true; return; }
|
|
||||||
int n = wordNumber(el);
|
|
||||||
//System.out.println("word number is "+n);
|
|
||||||
//System.out.println("bits.length "+bits.length);
|
|
||||||
if (n >= bits.length) {
|
|
||||||
growToInclude(el);
|
|
||||||
}
|
|
||||||
bits[n] |= bitMask(el);
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean member(int el) {
|
|
||||||
if ( el == Token.EOF ) return EOF;
|
|
||||||
int n = wordNumber(el);
|
|
||||||
if (n >= bits.length) return false;
|
|
||||||
return (bits[n] & bitMask(el)) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** return this | a in a new set */
|
|
||||||
public LABitSet or(LABitSet a) {
|
|
||||||
if ( a==null ) {
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
LABitSet s = (LABitSet)this.clone();
|
|
||||||
s.orInPlace((LABitSet)a);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void orInPlace(LABitSet a) {
|
|
||||||
if ( a==null ) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// If this is smaller than a, grow this first
|
|
||||||
if (a.bits.length > bits.length) {
|
|
||||||
setSize(a.bits.length);
|
|
||||||
}
|
|
||||||
int min = Math.min(bits.length, a.bits.length);
|
|
||||||
for (int i = min - 1; i >= 0; i--) {
|
|
||||||
bits[i] |= a.bits[i];
|
|
||||||
}
|
|
||||||
EOF = EOF | a.EOF;
|
|
||||||
}
|
|
||||||
|
|
||||||
// remove this element from this set
|
|
||||||
public void remove(int el) {
|
|
||||||
if ( el==Token.EOF ) { EOF = false; return; }
|
|
||||||
int n = wordNumber(el);
|
|
||||||
if (n >= bits.length) {
|
|
||||||
throw new IllegalArgumentException(el+" is outside set range of "+bits.length+" words");
|
|
||||||
}
|
|
||||||
bits[n] &= ~bitMask(el);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Object clone() {
|
|
||||||
LABitSet s;
|
|
||||||
try {
|
|
||||||
s = (LABitSet)super.clone();
|
|
||||||
s.bits = new long[bits.length];
|
|
||||||
System.arraycopy(bits, 0, s.bits, 0, bits.length);
|
|
||||||
s.EOF = EOF;
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
catch (CloneNotSupportedException e) {
|
|
||||||
e.printStackTrace(System.err);
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets the size of a set.
|
|
||||||
* @param nwords how many words the new set should be
|
|
||||||
*/
|
|
||||||
void setSize(int nwords) {
|
|
||||||
long newbits[] = new long[nwords];
|
|
||||||
int n = Math.min(nwords, bits.length);
|
|
||||||
System.arraycopy(bits, 0, newbits, 0, n);
|
|
||||||
bits = newbits;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Get the first element you find and return it. */
|
|
||||||
public int getSingleElement() {
|
|
||||||
for (int i = 0; i < (bits.length << LOG_BITS); i++) {
|
|
||||||
if (member(i)) {
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return Token.INVALID_TYPE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Transform a bit set into a string by formatting each element as an integer
|
|
||||||
* separator The string to put in between elements
|
|
||||||
* @return A commma-separated list of values
|
|
||||||
*/
|
|
||||||
public String toString() {
|
|
||||||
StringBuffer buf = new StringBuffer();
|
|
||||||
String separator = ",";
|
|
||||||
boolean havePrintedAnElement = false;
|
|
||||||
buf.append('{');
|
|
||||||
if ( EOF ) { buf.append("EOF"); havePrintedAnElement=true; }
|
|
||||||
|
|
||||||
for (int i = 0; i < (bits.length << LOG_BITS); i++) {
|
|
||||||
if (member(i)) {
|
|
||||||
if ( havePrintedAnElement ) {
|
|
||||||
buf.append(separator);
|
|
||||||
}
|
|
||||||
buf.append(i);
|
|
||||||
havePrintedAnElement = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
buf.append('}');
|
|
||||||
return buf.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
// /**Create a string representation where instead of integer elements, the
|
|
||||||
// * ith element of vocabulary is displayed instead. Vocabulary is a Vector
|
|
||||||
// * of Strings.
|
|
||||||
// * separator The string to put in between elements
|
|
||||||
// * @return A commma-separated list of character constants.
|
|
||||||
// */
|
|
||||||
// public String toString(String separator, List vocabulary) {
|
|
||||||
// String str = "";
|
|
||||||
// for (int i = 0; i < (bits.length << LOG_BITS); i++) {
|
|
||||||
// if (member(i)) {
|
|
||||||
// if (str.length() > 0) {
|
|
||||||
// str += separator;
|
|
||||||
// }
|
|
||||||
// if (i >= vocabulary.size()) {
|
|
||||||
// str += "'" + (char)i + "'";
|
|
||||||
// }
|
|
||||||
// else if (vocabulary.get(i) == null) {
|
|
||||||
// str += "'" + (char)i + "'";
|
|
||||||
// }
|
|
||||||
// else {
|
|
||||||
// str += (String)vocabulary.get(i);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// return str;
|
|
||||||
// }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Grows the set to a larger number of bits.
|
|
||||||
* @param bit element that must fit in set
|
|
||||||
*/
|
|
||||||
public void growToInclude(int bit) {
|
|
||||||
int newSize = Math.max(bits.length << 1, numWordsToHold(bit));
|
|
||||||
long newbits[] = new long[newSize];
|
|
||||||
System.arraycopy(bits, 0, newbits, 0, bits.length);
|
|
||||||
bits = newbits;
|
|
||||||
}
|
|
||||||
|
|
||||||
static long bitMask(int bitNumber) {
|
|
||||||
int bitPosition = bitNumber & MOD_MASK; // bitNumber mod BITS
|
|
||||||
return 1L << bitPosition;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int numWordsToHold(int el) {
|
|
||||||
return (el >> LOG_BITS) + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int wordNumber(int bit) {
|
|
||||||
return bit >> LOG_BITS; // bit / BITS
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -27,8 +27,6 @@
|
||||||
*/
|
*/
|
||||||
package org.antlr.v4.runtime.misc;
|
package org.antlr.v4.runtime.misc;
|
||||||
|
|
||||||
import org.antlr.runtime.misc.FastQueue;
|
|
||||||
|
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
|
|
||||||
/** A lookahead queue that knows how to mark/release locations
|
/** A lookahead queue that knows how to mark/release locations
|
||||||
|
|
|
@ -27,10 +27,11 @@
|
||||||
*/
|
*/
|
||||||
package org.antlr.v4.runtime.tree;
|
package org.antlr.v4.runtime.tree;
|
||||||
|
|
||||||
import org.antlr.runtime.BitSet;
|
|
||||||
import org.antlr.v4.runtime.Token;
|
import org.antlr.v4.runtime.Token;
|
||||||
import org.antlr.v4.runtime.tree.gui.ASTViewer;
|
import org.antlr.v4.runtime.tree.gui.ASTViewer;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
/** A tree node that is wrapper for a Token object. After 3.0 release
|
/** A tree node that is wrapper for a Token object. After 3.0 release
|
||||||
* while building tree rewrite stuff, it became clear that computing
|
* while building tree rewrite stuff, it became clear that computing
|
||||||
* parent and child index is very difficult and cumbersome. Better to
|
* parent and child index is very difficult and cumbersome. Better to
|
||||||
|
@ -194,12 +195,12 @@ public class CommonTree extends BaseTree {
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: don't include this node!!
|
// TODO: don't include this node!!
|
||||||
public CommonTree getFirstDescendantWithType(BitSet types) {
|
public CommonTree getFirstDescendantWithType(Set<Integer> types) {
|
||||||
if ( types.member(getType()) ) return this;
|
if ( types.contains(getType()) ) return this;
|
||||||
if ( children==null ) return null;
|
if ( children==null ) return null;
|
||||||
for (Object c : children) {
|
for (Object c : children) {
|
||||||
CommonTree t = (CommonTree)c;
|
CommonTree t = (CommonTree)c;
|
||||||
if ( types.member(t.getType()) ) return t;
|
if ( types.contains(t.getType()) ) return t;
|
||||||
CommonTree d = t.getFirstDescendantWithType(types);
|
CommonTree d = t.getFirstDescendantWithType(types);
|
||||||
if ( d!=null ) return d;
|
if ( d!=null ) return d;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,28 +1,35 @@
|
||||||
package org.antlr.v4;
|
package org.antlr.v4;
|
||||||
|
|
||||||
|
import org.antlr.runtime.*;
|
||||||
|
import org.antlr.tool.DOTGenerator;
|
||||||
|
import org.antlr.v4.parse.*;
|
||||||
import org.antlr.v4.tool.*;
|
import org.antlr.v4.tool.*;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.lang.reflect.Field;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
public class Tool {
|
public class Tool {
|
||||||
public String VERSION = "4.0-"+new Date();
|
public String VERSION = "4.0-"+new Date();
|
||||||
|
|
||||||
public static enum OptionArgType { NONE, STRING, INT }
|
public static enum OptionArgType { NONE, STRING }
|
||||||
public static class Option {
|
public static class Option {
|
||||||
|
String fieldName;
|
||||||
String name;
|
String name;
|
||||||
OptionArgType argType;
|
OptionArgType argType;
|
||||||
Object defaultArgValue;
|
Object defaultArgValue;
|
||||||
String description;
|
String description;
|
||||||
|
|
||||||
public Option(String name, String description) {
|
public Option(String fieldName, String name, String description) {
|
||||||
this(name, OptionArgType.NONE, null, description);
|
this(fieldName, name, OptionArgType.NONE, null, description);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Option(String name, OptionArgType argType, String description) {
|
public Option(String fieldName, String name, OptionArgType argType, String description) {
|
||||||
this(name, argType, null, description);
|
this(fieldName, name, argType, null, description);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Option(String name, OptionArgType argType, Object defaultArgValue, String description) {
|
public Option(String fieldName, String name, OptionArgType argType, Object defaultArgValue, String description) {
|
||||||
|
this.fieldName = fieldName;
|
||||||
this.name = name;
|
this.name = name;
|
||||||
this.argType = argType;
|
this.argType = argType;
|
||||||
this.defaultArgValue = defaultArgValue;
|
this.defaultArgValue = defaultArgValue;
|
||||||
|
@ -30,29 +37,42 @@ public class Tool {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// fields set by option manager
|
||||||
|
|
||||||
|
public String outputDirectory = ".";
|
||||||
|
public String libDirectory = ".";
|
||||||
|
public boolean report = false;
|
||||||
|
public boolean printGrammar = false;
|
||||||
|
public boolean debug = false;
|
||||||
|
public boolean profile = false;
|
||||||
|
public boolean trace = false;
|
||||||
|
public boolean generate_ATN_dot = false;
|
||||||
|
public String msgFormat = "antlr";
|
||||||
|
public boolean saveLexer = false;
|
||||||
|
public boolean launch_ST_inspector = false;
|
||||||
|
|
||||||
public static Option[] optionDefs = {
|
public static Option[] optionDefs = {
|
||||||
new Option("o", OptionArgType.STRING, ".", "specify output directory where all output is generated"),
|
new Option("outputDirectory", "-o", OptionArgType.STRING, ".", "specify output directory where all output is generated"),
|
||||||
new Option("fo", OptionArgType.STRING, "same as -o but force even files with relative paths to dir"),
|
new Option("libDirectory", "-lib", OptionArgType.STRING, ".", "specify location of .token files"),
|
||||||
new Option("lib", "specify location of .token files"),
|
new Option("report", "-report", "print out a report about the grammar(s) processed"),
|
||||||
new Option("report", "print out a report about the grammar(s) processed"),
|
new Option("printGrammar", "-print", "print out the grammar without actions"),
|
||||||
new Option("print", "print out the grammar without actions"),
|
new Option("debug", "-debug", "generate a parser that emits debugging events"),
|
||||||
new Option("debug", "generate a parser that emits debugging events"),
|
new Option("profile", "-profile", "generate a parser that computes profiling information"),
|
||||||
new Option("profile", "generate a parser that computes profiling information"),
|
new Option("trace", "-trace", "generate a recognizer that traces rule entry/exit"),
|
||||||
new Option("atn", "generate rule augmented transition networks"),
|
new Option("generate_ATN_dot", "-atn", "generate rule augmented transition networks"),
|
||||||
new Option("message-format", OptionArgType.STRING, "specify output style for messages"),
|
new Option("msgFormat", "-message-format", OptionArgType.STRING, "antlr", "specify output style for messages"),
|
||||||
new Option("version", "print the version of ANTLR and exit"),
|
new Option("saveLexer", "-savelexer", "save temp lexer file created for combined grammars"),
|
||||||
new Option("savelexer", "save temp lexer file created for combined grammars"),
|
new Option("launch_ST_inspector", "-dbgST", "launch StringTemplate visualizer on generated code"),
|
||||||
new Option("dbgST", "launch StringTemplate visualizer on generated code"),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
protected Map<String, Object> options = new HashMap<String, Object>();
|
public final String[] args;
|
||||||
|
|
||||||
protected String[] args;
|
protected List<String> grammarFiles = new ArrayList<String>();
|
||||||
|
|
||||||
public ErrorManager errMgr = new ErrorManager(this);
|
public ErrorManager errMgr = new ErrorManager(this);
|
||||||
|
|
||||||
List<ANTLRToolListener> listeners =
|
List<ANTLRToolListener> listeners =
|
||||||
Collections.synchronizedList(new ArrayList<ANTLRToolListener>());
|
Collections.synchronizedList(new ArrayList<ANTLRToolListener>());
|
||||||
|
|
||||||
/** Track separately so if someone adds a listener, it's the only one
|
/** Track separately so if someone adds a listener, it's the only one
|
||||||
* instead of it and the default stderr listener.
|
* instead of it and the default stderr listener.
|
||||||
|
@ -61,7 +81,8 @@ public class Tool {
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
Tool antlr = new Tool(args);
|
Tool antlr = new Tool(args);
|
||||||
antlr.help();
|
if ( args.length == 0 ) { antlr.help(); antlr.exit(0); }
|
||||||
|
|
||||||
antlr.processGrammarsOnCommandLine();
|
antlr.processGrammarsOnCommandLine();
|
||||||
|
|
||||||
if (antlr.errMgr.getNumErrors() > 0) {
|
if (antlr.errMgr.getNumErrors() > 0) {
|
||||||
|
@ -79,18 +100,294 @@ public class Tool {
|
||||||
|
|
||||||
public Tool(String[] args) {
|
public Tool(String[] args) {
|
||||||
this.args = args;
|
this.args = args;
|
||||||
|
parseArgs();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void parseArgs() {
|
||||||
|
int i=0;
|
||||||
|
while ( args!=null && i<args.length ) {
|
||||||
|
String arg = args[i];
|
||||||
|
i++;
|
||||||
|
if ( arg.charAt(0)!='-' ) { // file name
|
||||||
|
grammarFiles.add(arg);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for (Option o : optionDefs) {
|
||||||
|
if ( arg.equals(o.name) ) {
|
||||||
|
String value = null;
|
||||||
|
if ( o.argType==OptionArgType.STRING ) {
|
||||||
|
value = args[i];
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
// use reflection to set field
|
||||||
|
Class c = this.getClass();
|
||||||
|
try {
|
||||||
|
Field f = c.getField(o.fieldName);
|
||||||
|
if ( value==null ) f.setBoolean(this, true);
|
||||||
|
else f.set(this, value);
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
errMgr.toolError(ErrorType.INTERNAL_ERROR, "can't access field "+o.fieldName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void processGrammarsOnCommandLine() {
|
public void processGrammarsOnCommandLine() {
|
||||||
|
for (String fileName : grammarFiles) {
|
||||||
|
GrammarAST t = load(fileName);
|
||||||
|
if ( t instanceof GrammarASTErrorNode ) return; // came back as error node
|
||||||
|
if ( ((GrammarRootAST)t).hasErrors ) return;
|
||||||
|
|
||||||
|
GrammarRootAST ast = (GrammarRootAST)t;
|
||||||
|
Grammar g = createGrammar(ast);
|
||||||
|
g.fileName = grammarFileNames.get(0);
|
||||||
|
process(g);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void process(Grammar g) {
|
||||||
|
GrammarRootAST lexerAST = null;
|
||||||
|
if ( g.ast!=null && g.ast.grammarType== ANTLRParser.COMBINED &&
|
||||||
|
!g.ast.hasErrors )
|
||||||
|
{
|
||||||
|
lexerAST = extractImplicitLexer(g); // alters ast
|
||||||
|
}
|
||||||
|
processNonCombinedGrammar(g);
|
||||||
|
if ( g.ast!=null && g.ast.grammarType== ANTLRParser.COMBINED &&
|
||||||
|
!g.ast.hasErrors )
|
||||||
|
{
|
||||||
|
if ( lexerAST!=null ) {
|
||||||
|
LexerGrammar lexerg = new LexerGrammar(this, lexerAST);
|
||||||
|
lexerg.fileName = g.fileName;
|
||||||
|
g.implicitLexer = lexerg;
|
||||||
|
lexerg.implicitLexerOwner = g;
|
||||||
|
lexerg.importVocab(g);
|
||||||
|
processNonCombinedGrammar(lexerg);
|
||||||
|
g.importVocab(lexerg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void processNonCombinedGrammar(Grammar g) {
|
||||||
|
g.loadImportedGrammars();
|
||||||
|
if ( g.ast!=null && internalOption_PrintGrammarTree ) System.out.println(g.ast.toStringTree());
|
||||||
|
//g.ast.inspect();
|
||||||
|
|
||||||
|
// MAKE SURE GRAMMAR IS SEMANTICALLY CORRECT (FILL IN GRAMMAR OBJECT)
|
||||||
|
SemanticPipeline sem = new SemanticPipeline(g);
|
||||||
|
sem.process();
|
||||||
|
|
||||||
|
if ( errMgr.getNumErrors()>0 ) return;
|
||||||
|
|
||||||
|
if ( g.getImportedGrammars()!=null ) { // process imported grammars (if any)
|
||||||
|
for (Grammar imp : g.getImportedGrammars()) {
|
||||||
|
processNonCombinedGrammar(imp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// BUILD ATN FROM AST
|
||||||
|
ATNFactory factory = new ParserATNFactory(g);
|
||||||
|
if ( g.isLexer() ) factory = new LexerATNFactory((LexerGrammar)g);
|
||||||
|
g.atn = factory.createATN();
|
||||||
|
|
||||||
|
if ( generate_ATN_dot ) generateATNs(g);
|
||||||
|
|
||||||
|
// PERFORM GRAMMAR ANALYSIS ON ATN: BUILD DECISION DFAs
|
||||||
|
AnalysisPipeline anal = new AnalysisPipeline(g);
|
||||||
|
anal.process();
|
||||||
|
|
||||||
|
//if ( generate_DFA_dot ) generateDFAs(g);
|
||||||
|
|
||||||
|
if ( g.tool.getNumErrors()>0 ) return;
|
||||||
|
|
||||||
|
// GENERATE CODE
|
||||||
|
CodeGenPipeline gen = new CodeGenPipeline(g);
|
||||||
|
gen.process();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Grammar createGrammar(GrammarRootAST ast) {
|
||||||
|
if ( ast.grammarType==ANTLRParser.LEXER ) return new LexerGrammar(this, ast);
|
||||||
|
else return new Grammar(this, ast);
|
||||||
|
}
|
||||||
|
|
||||||
|
public GrammarAST load(String fileName) {
|
||||||
|
ANTLRFileStream in = null;
|
||||||
|
try {
|
||||||
|
in = new ANTLRFileStream(fileName);
|
||||||
|
}
|
||||||
|
catch (IOException ioe) {
|
||||||
|
errMgr.toolError(ErrorType.CANNOT_OPEN_FILE, fileName, ioe);
|
||||||
|
}
|
||||||
|
return load(in);
|
||||||
|
}
|
||||||
|
|
||||||
|
public GrammarAST loadFromString(String grammar) {
|
||||||
|
return load(new ANTLRStringStream(grammar));
|
||||||
|
}
|
||||||
|
|
||||||
|
public GrammarAST load(CharStream in) {
|
||||||
|
try {
|
||||||
|
ANTLRLexer lexer = new ANTLRLexer(in);
|
||||||
|
CommonTokenStream tokens = new CommonTokenStream(lexer);
|
||||||
|
ToolANTLRParser p = new ToolANTLRParser(tokens, this);
|
||||||
|
p.setTreeAdaptor(new GrammarASTAdaptor(in));
|
||||||
|
ParserRuleReturnScope r = p.grammarSpec();
|
||||||
|
GrammarAST root = (GrammarAST) r.getTree();
|
||||||
|
if ( root instanceof GrammarRootAST ) {
|
||||||
|
((GrammarRootAST)root).hasErrors = p.getNumberOfSyntaxErrors()>0;
|
||||||
|
}
|
||||||
|
return root;
|
||||||
|
}
|
||||||
|
catch (RecognitionException re) {
|
||||||
|
// TODO: do we gen errors now?
|
||||||
|
errMgr.internalError("can't generate this message at moment; antlr recovers");
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Build lexer grammar from combined grammar that looks like:
|
||||||
|
*
|
||||||
|
* (COMBINED_GRAMMAR A
|
||||||
|
* (tokens { X (= Y 'y'))
|
||||||
|
* (OPTIONS (= x 'y'))
|
||||||
|
* (scope Blort { int x; })
|
||||||
|
* (@ members {foo})
|
||||||
|
* (@ lexer header {package jj;})
|
||||||
|
* (RULES (RULE .+)))
|
||||||
|
*
|
||||||
|
* Move rules and actions to new tree, don't dup. Split AST apart.
|
||||||
|
* We'll have this Grammar share token symbols later; don't generate
|
||||||
|
* tokenVocab or tokens{} section.
|
||||||
|
*
|
||||||
|
* Side-effects: it removes children from GRAMMAR & RULES nodes
|
||||||
|
* in combined AST. Careful: nodes are shared between
|
||||||
|
* trees after this call.
|
||||||
|
*/
|
||||||
|
public GrammarRootAST extractImplicitLexer(Grammar combinedGrammar) {
|
||||||
|
GrammarRootAST combinedAST = combinedGrammar.ast;
|
||||||
|
//System.out.println("before="+combinedAST.toStringTree());
|
||||||
|
GrammarASTAdaptor adaptor = new GrammarASTAdaptor(combinedAST.token.getInputStream());
|
||||||
|
List<org.antlr.v4.tool.GrammarAST> elements = combinedAST.getChildren();
|
||||||
|
|
||||||
|
// MAKE A GRAMMAR ROOT and ID
|
||||||
|
String lexerName = combinedAST.getChild(0).getText()+"Lexer";
|
||||||
|
GrammarRootAST lexerAST =
|
||||||
|
new GrammarRootAST(new CommonToken(ANTLRParser.GRAMMAR,"LEXER_GRAMMAR"));
|
||||||
|
lexerAST.grammarType = ANTLRParser.LEXER;
|
||||||
|
lexerAST.token.setInputStream(combinedAST.token.getInputStream());
|
||||||
|
lexerAST.addChild((org.antlr.v4.tool.GrammarAST)adaptor.create(ANTLRParser.ID, lexerName));
|
||||||
|
|
||||||
|
// MOVE OPTIONS
|
||||||
|
org.antlr.v4.tool.GrammarAST optionsRoot =
|
||||||
|
(org.antlr.v4.tool.GrammarAST)combinedAST.getFirstChildWithType(ANTLRParser.OPTIONS);
|
||||||
|
if ( optionsRoot!=null ) {
|
||||||
|
org.antlr.v4.tool.GrammarAST lexerOptionsRoot = (org.antlr.v4.tool.GrammarAST)adaptor.dupNode(optionsRoot);
|
||||||
|
lexerAST.addChild(lexerOptionsRoot);
|
||||||
|
List<org.antlr.v4.tool.GrammarAST> options = optionsRoot.getChildren();
|
||||||
|
for (org.antlr.v4.tool.GrammarAST o : options) {
|
||||||
|
String optionName = o.getChild(0).getText();
|
||||||
|
if ( !Grammar.doNotCopyOptionsToLexer.contains(optionName) ) {
|
||||||
|
lexerOptionsRoot.addChild(o);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MOVE lexer:: actions
|
||||||
|
List<org.antlr.v4.tool.GrammarAST> actionsWeMoved = new ArrayList<org.antlr.v4.tool.GrammarAST>();
|
||||||
|
for (org.antlr.v4.tool.GrammarAST e : elements) {
|
||||||
|
if ( e.getType()==ANTLRParser.AT ) {
|
||||||
|
if ( e.getChild(0).getText().equals("lexer") ) {
|
||||||
|
lexerAST.addChild(e);
|
||||||
|
actionsWeMoved.add(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
elements.removeAll(actionsWeMoved);
|
||||||
|
org.antlr.v4.tool.GrammarAST combinedRulesRoot =
|
||||||
|
(org.antlr.v4.tool.GrammarAST)combinedAST.getFirstChildWithType(ANTLRParser.RULES);
|
||||||
|
if ( combinedRulesRoot==null ) return lexerAST;
|
||||||
|
|
||||||
|
// MOVE lexer rules
|
||||||
|
|
||||||
|
org.antlr.v4.tool.GrammarAST lexerRulesRoot =
|
||||||
|
(org.antlr.v4.tool.GrammarAST)adaptor.create(ANTLRParser.RULES, "RULES");
|
||||||
|
lexerAST.addChild(lexerRulesRoot);
|
||||||
|
List<org.antlr.v4.tool.GrammarAST> rulesWeMoved = new ArrayList<org.antlr.v4.tool.GrammarAST>();
|
||||||
|
List<GrammarASTWithOptions> rules = combinedRulesRoot.getChildren();
|
||||||
|
for (GrammarASTWithOptions r : rules) {
|
||||||
|
String ruleName = r.getChild(0).getText();
|
||||||
|
if ( Character.isUpperCase(ruleName.charAt(0)) ) {
|
||||||
|
lexerRulesRoot.addChild(r);
|
||||||
|
rulesWeMoved.add(r);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int nLexicalRules = rulesWeMoved.size();
|
||||||
|
rules.removeAll(rulesWeMoved);
|
||||||
|
|
||||||
|
// Will track 'if' from IF : 'if' ; rules to avoid defining new token for 'if'
|
||||||
|
Map<String,String> litAliases =
|
||||||
|
Grammar.getStringLiteralAliasesFromLexerRules(lexerAST);
|
||||||
|
|
||||||
|
if ( nLexicalRules==0 && (litAliases==null||litAliases.size()==0) &&
|
||||||
|
combinedGrammar.stringLiteralToTypeMap.size()==0 )
|
||||||
|
{
|
||||||
|
// no rules, tokens{}, or 'literals' in grammar
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// add strings from combined grammar (and imported grammars) into to lexer
|
||||||
|
for (String lit : combinedGrammar.stringLiteralToTypeMap.keySet()) {
|
||||||
|
if ( litAliases!=null && litAliases.containsKey(lit) ) continue; // already has rule
|
||||||
|
// create for each literal: (RULE <uniquename> (BLOCK (ALT <lit>))
|
||||||
|
String rname = combinedGrammar.getStringLiteralLexerRuleName(lit);
|
||||||
|
// can't use wizard; need special node types
|
||||||
|
org.antlr.v4.tool.GrammarAST litRule = new RuleAST(ANTLRParser.RULE);
|
||||||
|
BlockAST blk = new BlockAST(ANTLRParser.BLOCK);
|
||||||
|
AltAST alt = new AltAST(ANTLRParser.ALT);
|
||||||
|
TerminalAST slit = new TerminalAST(new org.antlr.runtime.CommonToken(ANTLRParser.STRING_LITERAL, lit));
|
||||||
|
alt.addChild(slit);
|
||||||
|
blk.addChild(alt);
|
||||||
|
CommonToken idToken = new CommonToken(ANTLRParser.ID, rname);
|
||||||
|
litRule.addChild(new TerminalAST(idToken));
|
||||||
|
litRule.addChild(blk);
|
||||||
|
lexerRulesRoot.addChild(litRule);
|
||||||
|
|
||||||
|
// (GrammarAST)
|
||||||
|
// wiz.create("(RULE ID["+rname+"] (BLOCK (ALT STRING_LITERAL["+lit+"])))");
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.println("after ="+combinedAST.toStringTree());
|
||||||
|
System.out.println("lexer ="+lexerAST.toStringTree());
|
||||||
|
return lexerAST;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void generateATNs(Grammar g) {
|
||||||
|
DOTGenerator dotGenerator = new DOTGenerator(g);
|
||||||
|
List<Grammar> grammars = new ArrayList<Grammar>();
|
||||||
|
grammars.add(g);
|
||||||
|
List<Grammar> imported = g.getAllImportedGrammars();
|
||||||
|
if ( imported!=null ) grammars.addAll(imported);
|
||||||
|
for (Grammar ig : grammars) {
|
||||||
|
for (Rule r : ig.rules.values()) {
|
||||||
|
try {
|
||||||
|
String dot = dotGenerator.getDOT(g.atn.ruleToStartState.get(r));
|
||||||
|
if (dot != null) {
|
||||||
|
writeDOTFile(g, r, dot);
|
||||||
|
}
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
errMgr.toolError(ErrorType.CANNOT_WRITE_FILE, ioe);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void help() {
|
public void help() {
|
||||||
info("ANTLR Parser Generator Version " + new Tool().VERSION);
|
info("ANTLR Parser Generator Version " + new Tool().VERSION);
|
||||||
for (Option o : optionDefs) {
|
for (Option o : optionDefs) {
|
||||||
String name = o.name + (o.argType!=OptionArgType.NONE? " ___" : "");
|
String name = o.name + (o.argType!=OptionArgType.NONE? " ___" : "");
|
||||||
String s = String.format(" -%-19s %s", name, o.description);
|
String s = String.format(" %-19s %s", name, o.description);
|
||||||
info(s);
|
info(s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -124,10 +421,9 @@ public class Tool {
|
||||||
for (ANTLRToolListener l : listeners) l.warning(msg);
|
for (ANTLRToolListener l : listeners) l.warning(msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void version() {
|
||||||
public void version() {
|
info("ANTLR Parser Generator Version " + new Tool().VERSION);
|
||||||
info("ANTLR Parser Generator Version " + new Tool().VERSION);
|
}
|
||||||
}
|
|
||||||
|
|
||||||
public void exit(int e) { System.exit(e); }
|
public void exit(int e) { System.exit(e); }
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,189 @@
|
||||||
|
package org.antlr.v4.automata;
|
||||||
|
|
||||||
|
import org.antlr.v4.misc.IntervalSet;
|
||||||
|
import org.antlr.v4.runtime.atn.*;
|
||||||
|
import org.antlr.v4.tool.*;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public interface ATNFactory {
|
||||||
|
/** A pair of states pointing to the left/right (start and end) states of a
|
||||||
|
* state submachine. Used to build ATNs.
|
||||||
|
*/
|
||||||
|
public static class Handle {
|
||||||
|
public ATNState left;
|
||||||
|
public ATNState right;
|
||||||
|
|
||||||
|
public Handle(ATNState left, ATNState right) {
|
||||||
|
this.left = left;
|
||||||
|
this.right = right;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "("+left+","+right+")";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ATN createATN();
|
||||||
|
|
||||||
|
void setCurrentRuleName(String name);
|
||||||
|
|
||||||
|
Handle rule(GrammarAST ruleAST, String name, Handle blk);
|
||||||
|
|
||||||
|
ATNState newState();
|
||||||
|
|
||||||
|
Handle label(Handle t);
|
||||||
|
|
||||||
|
Handle listLabel(Handle t);
|
||||||
|
|
||||||
|
Handle tokenRef(TerminalAST node);
|
||||||
|
|
||||||
|
/** From set build single edge graph o->o-set->o. To conform to
|
||||||
|
* what an alt block looks like, must have extra state on left.
|
||||||
|
*/
|
||||||
|
Handle set(IntervalSet set, GrammarAST associatedAST);
|
||||||
|
|
||||||
|
Handle tree(List<Handle> els);
|
||||||
|
|
||||||
|
Handle range(GrammarAST a, GrammarAST b);
|
||||||
|
|
||||||
|
Handle not(GrammarAST a);
|
||||||
|
|
||||||
|
/** For a non-lexer, just build a simple token reference atom.
|
||||||
|
* For a lexer, a string is a sequence of char to match. That is,
|
||||||
|
* "fog" is treated as 'f' 'o' 'g' not as a single transition in
|
||||||
|
* the DFA. Machine== o-'f'->o-'o'->o-'g'->o and has n+1 states
|
||||||
|
* for n characters.
|
||||||
|
*/
|
||||||
|
Handle stringLiteral(TerminalAST stringLiteralAST);
|
||||||
|
|
||||||
|
/** For reference to rule r, build
|
||||||
|
*
|
||||||
|
* o-e->(r) o
|
||||||
|
*
|
||||||
|
* where (r) is the start of rule r and the trailing o is not linked
|
||||||
|
* to from rule ref state directly (it's done thru the transition(0)
|
||||||
|
* RuleClosureTransition.
|
||||||
|
*
|
||||||
|
* If the rule r is just a list of tokens, it's block will be just
|
||||||
|
* a set on an edge o->o->o-set->o->o->o, could inline it rather than doing
|
||||||
|
* the rule reference, but i'm not doing this yet as I'm not sure
|
||||||
|
* it would help much in the ATN->DFA construction.
|
||||||
|
*
|
||||||
|
* TODO add to codegen: collapse alt blks that are sets into single matchSet
|
||||||
|
* @param node
|
||||||
|
*/
|
||||||
|
Handle ruleRef(GrammarAST node);
|
||||||
|
|
||||||
|
/** From an empty alternative build Grip o-e->o */
|
||||||
|
Handle epsilon(GrammarAST node);
|
||||||
|
|
||||||
|
/** Build what amounts to an epsilon transition with a semantic
|
||||||
|
* predicate action. The pred is a pointer into the AST of
|
||||||
|
* the SEMPRED token.
|
||||||
|
*/
|
||||||
|
Handle sempred(PredAST pred);
|
||||||
|
Handle gated_sempred(GrammarAST pred);
|
||||||
|
|
||||||
|
/** Build what amounts to an epsilon transition with an action.
|
||||||
|
* The action goes into ATN though it is ignored during analysis.
|
||||||
|
* It slows things down a bit, but I must ignore predicates after
|
||||||
|
* having seen an action (5-5-2008).
|
||||||
|
*/
|
||||||
|
Handle action(ActionAST action);
|
||||||
|
|
||||||
|
Handle alt(List<Handle> els);
|
||||||
|
|
||||||
|
/** From A|B|..|Z alternative block build
|
||||||
|
*
|
||||||
|
* o->o-A->o->o (last ATNState is blockEndATNState pointed to by all alts)
|
||||||
|
* | ^
|
||||||
|
* o->o-B->o--|
|
||||||
|
* | |
|
||||||
|
* ... |
|
||||||
|
* | |
|
||||||
|
* o->o-Z->o--|
|
||||||
|
*
|
||||||
|
* So every alternative gets begin ATNState connected by epsilon
|
||||||
|
* and every alt right side points at a block end ATNState. There is a
|
||||||
|
* new ATNState in the ATNState in the Grip for each alt plus one for the
|
||||||
|
* end ATNState.
|
||||||
|
*
|
||||||
|
* Special case: only one alternative: don't make a block with alt
|
||||||
|
* begin/end.
|
||||||
|
*
|
||||||
|
* Special case: if just a list of tokens/chars/sets, then collapse
|
||||||
|
* to a single edge'd o-set->o graph.
|
||||||
|
*
|
||||||
|
* Set alt number (1..n) in the left-Transition ATNState.
|
||||||
|
*/
|
||||||
|
Handle block(BlockAST blockAST, GrammarAST ebnfRoot, List<Handle> alternativeGrips);
|
||||||
|
|
||||||
|
Handle notBlock(GrammarAST blockAST, List<GrammarAST> terminals);
|
||||||
|
|
||||||
|
/** From (A)? build either:
|
||||||
|
*
|
||||||
|
* o--A->o
|
||||||
|
* | ^
|
||||||
|
* o---->|
|
||||||
|
*
|
||||||
|
* or, if A is a block, just add an empty alt to the end of the block
|
||||||
|
*/
|
||||||
|
Handle optional(GrammarAST optAST, Handle blk);
|
||||||
|
|
||||||
|
/** From (A)+ build
|
||||||
|
*
|
||||||
|
* |---| (Transition 2 from A.right points at alt 1)
|
||||||
|
* v | (follow of loop is Transition 1)
|
||||||
|
* o->o-A-o->o
|
||||||
|
*
|
||||||
|
* Meaning that the last ATNState in A points back to A's left Transition ATNState
|
||||||
|
* and we add a new begin/end ATNState. A can be single alternative or
|
||||||
|
* multiple.
|
||||||
|
*
|
||||||
|
* During analysis we'll call the follow link (transition 1) alt n+1 for
|
||||||
|
* an n-alt A block.
|
||||||
|
*/
|
||||||
|
Handle plus(GrammarAST plusAST, Handle blk);
|
||||||
|
|
||||||
|
/** From (A)* build
|
||||||
|
*
|
||||||
|
* |---|
|
||||||
|
* v |
|
||||||
|
* o->o-A-o--o (Transition 2 from block end points at alt 1; follow is Transition 1)
|
||||||
|
* | ^
|
||||||
|
* o---------| (optional branch is 2nd alt of optional block containing A+)
|
||||||
|
*
|
||||||
|
* Meaning that the last (end) ATNState in A points back to A's
|
||||||
|
* left side ATNState and we add 3 new ATNStates (the
|
||||||
|
* optional branch is built just like an optional subrule).
|
||||||
|
* See the Aplus() method for more on the loop back Transition.
|
||||||
|
* The new node on right edge is set to RIGHT_EDGE_OF_CLOSURE so we
|
||||||
|
* can detect nested (A*)* loops and insert an extra node. Previously,
|
||||||
|
* two blocks shared same EOB node.
|
||||||
|
*
|
||||||
|
* There are 2 or 3 decision points in a A*. If A is not a block (i.e.,
|
||||||
|
* it only has one alt), then there are two decisions: the optional bypass
|
||||||
|
* and then loopback. If A is a block of alts, then there are three
|
||||||
|
* decisions: bypass, loopback, and A's decision point.
|
||||||
|
*
|
||||||
|
* Note that the optional bypass must be outside the loop as (A|B)* is
|
||||||
|
* not the same thing as (A|B|)+.
|
||||||
|
*
|
||||||
|
* This is an accurate ATN representation of the meaning of (A)*, but
|
||||||
|
* for generating code, I don't need a DFA for the optional branch by
|
||||||
|
* virtue of how I generate code. The exit-loopback-branch decision
|
||||||
|
* is sufficient to let me make an appropriate enter, exit, loop
|
||||||
|
* determination. See codegen.g
|
||||||
|
*/
|
||||||
|
Handle star(GrammarAST starAST, Handle blk);
|
||||||
|
|
||||||
|
/** Build an atom with all possible values in its label */
|
||||||
|
Handle wildcard(GrammarAST associatedAST);
|
||||||
|
|
||||||
|
/** Build a subrule matching ^(. .*) (any tree or node). Let's use
|
||||||
|
* (^(. .+) | .) to be safe.
|
||||||
|
*/
|
||||||
|
Handle wildcardTree(GrammarAST associatedAST);
|
||||||
|
}
|
|
@ -0,0 +1,85 @@
|
||||||
|
package org.antlr.v4.automata;
|
||||||
|
|
||||||
|
import org.antlr.v4.runtime.atn.*;
|
||||||
|
import org.antlr.v4.tool.Grammar;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/** An ATN walker that knows how to dump them to serialized strings. */
|
||||||
|
public class ATNPrinter {
|
||||||
|
List<ATNState> work;
|
||||||
|
Set<ATNState> marked;
|
||||||
|
Grammar g;
|
||||||
|
ATNState start;
|
||||||
|
|
||||||
|
public ATNPrinter(Grammar g, ATNState start) {
|
||||||
|
this.g = g;
|
||||||
|
this.start = start;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
if ( start==null ) return null;
|
||||||
|
marked = new HashSet<ATNState>();
|
||||||
|
|
||||||
|
work = new ArrayList<ATNState>();
|
||||||
|
work.add(start);
|
||||||
|
|
||||||
|
StringBuilder buf = new StringBuilder();
|
||||||
|
ATNState s = null;
|
||||||
|
|
||||||
|
while ( work.size()>0 ) {
|
||||||
|
s = work.remove(0);
|
||||||
|
if ( marked.contains(s) ) continue;
|
||||||
|
int n = s.getNumberOfTransitions();
|
||||||
|
//System.out.println("visit "+getATNStateString(s)+"; edges="+n);
|
||||||
|
marked.add(s);
|
||||||
|
for (int i=0; i<n; i++) {
|
||||||
|
Transition t = s.transition(i);
|
||||||
|
if ( !(s instanceof RuleStopState) ) { // don't add follow states to work
|
||||||
|
if ( t instanceof RuleTransition ) work.add(((RuleTransition)t).followState);
|
||||||
|
else work.add( t.target );
|
||||||
|
}
|
||||||
|
buf.append(getStateString(s));
|
||||||
|
if ( t instanceof EpsilonTransition ) {
|
||||||
|
buf.append("->"+ getStateString(t.target)+'\n');
|
||||||
|
}
|
||||||
|
else if ( t instanceof RuleTransition ) {
|
||||||
|
buf.append("->"+ getStateString(t.target)+'\n');
|
||||||
|
}
|
||||||
|
else if ( t instanceof ActionTransition ) {
|
||||||
|
ActionTransition a = (ActionTransition)t;
|
||||||
|
buf.append("-"+a.actionAST.getText()+"->"+ getStateString(t.target)+'\n');
|
||||||
|
}
|
||||||
|
else if ( t instanceof AtomTransition ) {
|
||||||
|
AtomTransition a = (AtomTransition)t;
|
||||||
|
buf.append("-"+a.toString(g)+"->"+ getStateString(t.target)+'\n');
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
buf.append("-"+t.toString(g)+"->"+ getStateString(t.target)+'\n');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return buf.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
String getStateString(ATNState s) {
|
||||||
|
if ( s==null ) {
|
||||||
|
System.out.println("s==null");
|
||||||
|
}
|
||||||
|
int n = s.stateNumber;
|
||||||
|
String stateStr = "s"+n;
|
||||||
|
if ( s instanceof StarBlockStartState ) stateStr = "StarBlockStart_"+n;
|
||||||
|
else if ( s instanceof PlusBlockStartState ) stateStr = "PlusBlockStart_"+n;
|
||||||
|
else if ( s instanceof StarBlockStartState ) stateStr = "StarBlockStart_"+n;
|
||||||
|
else if ( s instanceof BlockStartState) stateStr = "BlockStart_"+n;
|
||||||
|
else if ( s instanceof BlockEndState ) stateStr = "BlockEnd_"+n;
|
||||||
|
else if ( s instanceof RuleStartState) stateStr = "RuleStart_"+s.rule.name+"_"+n;
|
||||||
|
else if ( s instanceof RuleStopState ) stateStr = "RuleStop_"+s.rule.name+"_"+n;
|
||||||
|
else if ( s instanceof PlusLoopbackState) stateStr = "PlusLoopBack_"+n;
|
||||||
|
else if ( s instanceof StarLoopbackState) stateStr = "StarLoopBack_"+n;
|
||||||
|
return stateStr;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,218 @@
|
||||||
|
package org.antlr.v4.automata;
|
||||||
|
|
||||||
|
import org.antlr.v4.misc.*;
|
||||||
|
import org.antlr.v4.runtime.atn.*;
|
||||||
|
import org.antlr.v4.tool.Rule;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
public class ATNSerializer {
|
||||||
|
public ATN atn;
|
||||||
|
public List<IntervalSet> sets = new ArrayList<IntervalSet>();
|
||||||
|
|
||||||
|
public ATNSerializer(ATN atn) { this.atn = atn; }
|
||||||
|
|
||||||
|
/** Serialize state descriptors, edge descriptors, and decision->state map
|
||||||
|
* into list of ints:
|
||||||
|
*
|
||||||
|
* grammar-type, (ANTLRParser.LEXER, ...)
|
||||||
|
* max token type,
|
||||||
|
* num states,
|
||||||
|
* state-0-type ruleIndex, state-1-type ruleIndex, ...
|
||||||
|
* num rules,
|
||||||
|
* rule-1-start-state rule-1-args, rule-2-start-state rule-2-args, ...
|
||||||
|
* (args are token type,actionIndex in lexer else 0,0)
|
||||||
|
* num modes,
|
||||||
|
* mode-0-start-state, mode-1-start-state, ... (parser has 0 modes)
|
||||||
|
* num sets
|
||||||
|
* set-0-interval-count intervals, set-1-interval-count intervals, ...
|
||||||
|
* num total edges,
|
||||||
|
* src, trg, edge-type, edge arg1, optional edge arg2 (present always), ...
|
||||||
|
* num decisions,
|
||||||
|
* decision-0-start-state, decision-1-start-state, ...
|
||||||
|
*
|
||||||
|
* Convenient to pack into unsigned shorts to make as Java string.
|
||||||
|
*/
|
||||||
|
public List<Integer> serialize() {
|
||||||
|
List<Integer> data = new ArrayList<Integer>();
|
||||||
|
data.add(atn.g.getType());
|
||||||
|
data.add(atn.g.getMaxTokenType());
|
||||||
|
data.add(atn.states.size());
|
||||||
|
int nedges = 0;
|
||||||
|
// dump states, count edges and collect sets while doing so
|
||||||
|
for (ATNState s : atn.states) {
|
||||||
|
data.add(ATNState.serializationTypes.get(s.getClass()));
|
||||||
|
if ( s.rule!=null ) data.add(s.rule.index);
|
||||||
|
else data.add(s.ruleIndex);
|
||||||
|
nedges += s.getNumberOfTransitions();
|
||||||
|
for (int i=0; i<s.getNumberOfTransitions(); i++) {
|
||||||
|
Transition t = s.transition(i);
|
||||||
|
int edgeType = Transition.serializationTypes.get(t.getClass());
|
||||||
|
if ( edgeType == Transition.SET || edgeType == Transition.NOT_SET ) {
|
||||||
|
SetTransition st = (SetTransition)t;
|
||||||
|
sets.add(st.label);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int nrules = atn.rules.size();
|
||||||
|
data.add(nrules);
|
||||||
|
for (int r=1; r<=nrules; r++) {
|
||||||
|
ATNState ruleStartState = atn.rules.get(r-1);
|
||||||
|
data.add(ruleStartState.stateNumber);
|
||||||
|
if ( atn.g.isLexer() ) {
|
||||||
|
data.add(atn.ruleToTokenType.get(r));
|
||||||
|
String ruleName = atn.g.rules.getKey(r-1);
|
||||||
|
Rule rule = atn.g.getRule(ruleName);
|
||||||
|
data.add(rule.actionIndex);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
data.add(0);
|
||||||
|
data.add(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int nmodes = atn.modeToStartState.size();
|
||||||
|
data.add(nmodes);
|
||||||
|
if ( nmodes>0 ) {
|
||||||
|
for (ATNState modeStartState : atn.modeToStartState) {
|
||||||
|
data.add(modeStartState.stateNumber);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int nsets = sets.size();
|
||||||
|
data.add(nsets);
|
||||||
|
for (IntervalSet set : sets) {
|
||||||
|
data.add(set.getIntervals().size());
|
||||||
|
for (Interval I : set.getIntervals()) {
|
||||||
|
data.add(I.a);
|
||||||
|
data.add(I.b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
data.add(nedges);
|
||||||
|
int setIndex = 0;
|
||||||
|
for (ATNState s : atn.states) {
|
||||||
|
for (int i=0; i<s.getNumberOfTransitions(); i++) {
|
||||||
|
Transition t = s.transition(i);
|
||||||
|
int src = s.stateNumber;
|
||||||
|
int trg = t.target.stateNumber;
|
||||||
|
int edgeType = Transition.serializationTypes.get(t.getClass());
|
||||||
|
int arg1 = 0;
|
||||||
|
int arg2 = 0;
|
||||||
|
switch ( edgeType ) {
|
||||||
|
case Transition.RULE :
|
||||||
|
trg = ((RuleTransition)t).followState.stateNumber;
|
||||||
|
arg1 = ((RuleTransition)t).target.stateNumber;
|
||||||
|
if ( ((RuleTransition)t).rule!=null ) {
|
||||||
|
arg2 = ((RuleTransition)t).rule.index;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
arg2 = ((RuleTransition)t).ruleIndex;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case Transition.PREDICATE :
|
||||||
|
PredicateTransition pt = (PredicateTransition)t;
|
||||||
|
arg1 = pt.ruleIndex;
|
||||||
|
arg2 = pt.predIndex;
|
||||||
|
break;
|
||||||
|
case Transition.RANGE :
|
||||||
|
arg1 = ((RangeTransition)t).from;
|
||||||
|
arg2 = ((RangeTransition)t).to;
|
||||||
|
break;
|
||||||
|
case Transition.ATOM :
|
||||||
|
arg1 = ((AtomTransition)t).label;
|
||||||
|
break;
|
||||||
|
case Transition.ACTION :
|
||||||
|
ActionTransition at = (ActionTransition)t;
|
||||||
|
arg1 = at.ruleIndex;
|
||||||
|
arg2 = at.actionIndex;
|
||||||
|
break;
|
||||||
|
case Transition.SET :
|
||||||
|
arg1 = setIndex++;
|
||||||
|
break;
|
||||||
|
case Transition.NOT_ATOM :
|
||||||
|
arg1 = ((NotAtomTransition)t).label;
|
||||||
|
break;
|
||||||
|
case Transition.NOT_SET :
|
||||||
|
arg1 = setIndex++;
|
||||||
|
break;
|
||||||
|
case Transition.WILDCARD :
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
data.add(src);
|
||||||
|
data.add(trg);
|
||||||
|
data.add(edgeType);
|
||||||
|
data.add(arg1);
|
||||||
|
data.add(arg2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int ndecisions = atn.decisionToATNState.size();
|
||||||
|
data.add(ndecisions);
|
||||||
|
for (ATNState decStartState : atn.decisionToATNState) {
|
||||||
|
data.add(decStartState.stateNumber);
|
||||||
|
}
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String decode(char[] data) {
|
||||||
|
StringBuilder buf = new StringBuilder();
|
||||||
|
int p = 0;
|
||||||
|
int grammarType = ATNInterpreter.toInt(data[p++]);
|
||||||
|
int maxType = ATNInterpreter.toInt(data[p++]);
|
||||||
|
buf.append("max type "+maxType+"\n");
|
||||||
|
int nstates = ATNInterpreter.toInt(data[p++]);
|
||||||
|
for (int i=1; i<=nstates; i++) {
|
||||||
|
int stype = ATNInterpreter.toInt(data[p++]);
|
||||||
|
int ruleIndex = ATNInterpreter.toInt(data[p++]);
|
||||||
|
if ( stype==0 ) continue; // ignore bad type of states
|
||||||
|
buf.append((i - 1) + ":" +
|
||||||
|
ATNState.serializationNames[stype] + " "+
|
||||||
|
ruleIndex + "\n");
|
||||||
|
}
|
||||||
|
int nrules = ATNInterpreter.toInt(data[p++]);
|
||||||
|
for (int i=1; i<=nrules; i++) {
|
||||||
|
int s = ATNInterpreter.toInt(data[p++]);
|
||||||
|
int arg1 = ATNInterpreter.toInt(data[p++]);
|
||||||
|
int arg2 = ATNInterpreter.toInt(data[p++]);
|
||||||
|
buf.append("rule "+i+":"+s+" "+arg1+","+arg2+'\n');
|
||||||
|
}
|
||||||
|
int nmodes = ATNInterpreter.toInt(data[p++]);
|
||||||
|
for (int i=0; i<nmodes; i++) {
|
||||||
|
int s = ATNInterpreter.toInt(data[p++]);
|
||||||
|
buf.append("mode "+i+":"+s+'\n');
|
||||||
|
}
|
||||||
|
int nsets = ATNInterpreter.toInt(data[p++]);
|
||||||
|
for (int i=1; i<=nsets; i++) {
|
||||||
|
int nintervals = ATNInterpreter.toInt(data[p++]);
|
||||||
|
buf.append((i-1)+":");
|
||||||
|
for (int j=1; j<=nintervals; j++) {
|
||||||
|
if ( j>1 ) buf.append(", ");
|
||||||
|
buf.append(getTokenName(ATNInterpreter.toInt(data[p]))+".."+getTokenName(ATNInterpreter.toInt(data[p+1])));
|
||||||
|
p += 2;
|
||||||
|
}
|
||||||
|
buf.append("\n");
|
||||||
|
}
|
||||||
|
int nedges = ATNInterpreter.toInt(data[p++]);
|
||||||
|
for (int i=1; i<=nedges; i++) {
|
||||||
|
int src = ATNInterpreter.toInt(data[p]);
|
||||||
|
int trg = ATNInterpreter.toInt(data[p+1]);
|
||||||
|
int ttype = ATNInterpreter.toInt(data[p+2]);
|
||||||
|
int arg1 = ATNInterpreter.toInt(data[p+3]);
|
||||||
|
int arg2 = ATNInterpreter.toInt(data[p+4]);
|
||||||
|
buf.append(src+"->"+trg+
|
||||||
|
" "+Transition.serializationNames[ttype]+
|
||||||
|
" "+arg1+","+arg2+
|
||||||
|
"\n");
|
||||||
|
p += 5;
|
||||||
|
}
|
||||||
|
int ndecisions = ATNInterpreter.toInt(data[p++]);
|
||||||
|
for (int i=1; i<=ndecisions; i++) {
|
||||||
|
int s = ATNInterpreter.toInt(data[p++]);
|
||||||
|
buf.append((i-1)+":"+s+"\n");
|
||||||
|
}
|
||||||
|
return buf.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTokenName(int t) {
|
||||||
|
if ( t==-1 ) return "EOF";
|
||||||
|
if ( atn.g!=null ) return atn.g.getTokenDisplayName(t);
|
||||||
|
return String.valueOf(t);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,90 @@
|
||||||
|
package org.antlr.v4.automata;
|
||||||
|
|
||||||
|
import org.antlr.v4.misc.CharSupport;
|
||||||
|
import org.antlr.v4.runtime.atn.*;
|
||||||
|
import org.antlr.v4.tool.*;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class LexerATNFactory extends ParserATNFactory {
|
||||||
|
public LexerATNFactory(LexerGrammar g) { super(g); }
|
||||||
|
|
||||||
|
public ATN createATN() {
|
||||||
|
// BUILD ALL START STATES (ONE PER MODE)
|
||||||
|
for (String modeName : ((LexerGrammar)g).modes.keySet()) {
|
||||||
|
// create s0, start state; implied Tokens rule node
|
||||||
|
TokensStartState startState =
|
||||||
|
(TokensStartState)newState(TokensStartState.class, null);
|
||||||
|
atn.modeNameToStartState.put(modeName, startState);
|
||||||
|
atn.modeToStartState.add(startState);
|
||||||
|
atn.defineDecisionState(startState);
|
||||||
|
}
|
||||||
|
|
||||||
|
// CREATE ATN FOR EACH RULE
|
||||||
|
_createATN(g.rules.values());
|
||||||
|
|
||||||
|
// LINK MODE START STATE TO EACH TOKEN RULE
|
||||||
|
for (String modeName : ((LexerGrammar)g).modes.keySet()) {
|
||||||
|
List<Rule> rules = ((LexerGrammar)g).modes.get(modeName);
|
||||||
|
TokensStartState startState = atn.modeNameToStartState.get(modeName);
|
||||||
|
for (Rule r : rules) {
|
||||||
|
if ( !r.isFragment() ) {
|
||||||
|
RuleStartState s = atn.ruleToStartState.get(r);
|
||||||
|
epsilon(startState, s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return atn;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Handle action(ActionAST action) {
|
||||||
|
// Handle h = super.action(action);
|
||||||
|
// ActionTransition a = (ActionTransition)h.left.transition(0);
|
||||||
|
// a.actionIndex = g.actions.get(action);
|
||||||
|
// return h;
|
||||||
|
// no actions in lexer ATN; just one on end and we exec via action number
|
||||||
|
ATNState x = newState(action);
|
||||||
|
return new Handle(x, x); // return just one blank state
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Handle range(GrammarAST a, GrammarAST b) {
|
||||||
|
ATNState left = newState(a);
|
||||||
|
ATNState right = newState(b);
|
||||||
|
int t1 = CharSupport.getCharValueFromGrammarCharLiteral(a.getText());
|
||||||
|
int t2 = CharSupport.getCharValueFromGrammarCharLiteral(b.getText());
|
||||||
|
left.transition = new RangeTransition(t1, t2, right);
|
||||||
|
a.atnState = left;
|
||||||
|
b.atnState = left;
|
||||||
|
return new Handle(left, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** For a lexer, a string is a sequence of char to match. That is,
|
||||||
|
* "fog" is treated as 'f' 'o' 'g' not as a single transition in
|
||||||
|
* the DFA. Machine== o-'f'->o-'o'->o-'g'->o and has n+1 states
|
||||||
|
* for n characters.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Handle stringLiteral(TerminalAST stringLiteralAST) {
|
||||||
|
String chars = stringLiteralAST.getText();
|
||||||
|
chars = CharSupport.getStringFromGrammarStringLiteral(chars);
|
||||||
|
int n = chars.length();
|
||||||
|
ATNState left = newState(stringLiteralAST);
|
||||||
|
ATNState prev = left;
|
||||||
|
ATNState right = null;
|
||||||
|
for (int i=0; i<n; i++) {
|
||||||
|
right = newState(stringLiteralAST);
|
||||||
|
prev.transition = new AtomTransition(chars.charAt(i), right);
|
||||||
|
prev = right;
|
||||||
|
}
|
||||||
|
stringLiteralAST.atnState = left;
|
||||||
|
return new Handle(left, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Handle tokenRef(TerminalAST node) {
|
||||||
|
return _ruleRef(node);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,506 @@
|
||||||
|
package org.antlr.v4.automata;
|
||||||
|
|
||||||
|
|
||||||
|
import org.antlr.runtime.RecognitionException;
|
||||||
|
import org.antlr.runtime.tree.*;
|
||||||
|
import org.antlr.v4.misc.*;
|
||||||
|
import org.antlr.v4.parse.*;
|
||||||
|
import org.antlr.v4.runtime.atn.*;
|
||||||
|
import org.antlr.v4.tool.*;
|
||||||
|
|
||||||
|
import java.lang.reflect.Constructor;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/** ATN construction routines triggered by ATNBuilder.g.
|
||||||
|
*
|
||||||
|
* No side-effects. It builds an ATN object and returns it.
|
||||||
|
*/
|
||||||
|
public class ParserATNFactory implements ATNFactory {
|
||||||
|
public Grammar g;
|
||||||
|
public Rule currentRule;
|
||||||
|
ATN atn;
|
||||||
|
|
||||||
|
public ParserATNFactory(Grammar g) { this.g = g; atn = new ATN(g); }
|
||||||
|
|
||||||
|
public ATN createATN() {
|
||||||
|
_createATN(g.rules.values());
|
||||||
|
atn.maxTokenType = g.getMaxTokenType();
|
||||||
|
addEOFTransitionToStartRules();
|
||||||
|
return atn;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void _createATN(Collection<Rule> rules) {
|
||||||
|
createRuleStartAndStopATNStates();
|
||||||
|
|
||||||
|
GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
|
||||||
|
for (Rule r : rules) {
|
||||||
|
// find rule's block
|
||||||
|
GrammarAST blk = (GrammarAST)r.ast.getFirstChildWithType(ANTLRParser.BLOCK);
|
||||||
|
CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk);
|
||||||
|
ATNBuilder b = new ATNBuilder(nodes,this);
|
||||||
|
try {
|
||||||
|
setCurrentRuleName(r.name);
|
||||||
|
Handle h = b.block(null);
|
||||||
|
rule(r.ast, r.name, h);
|
||||||
|
}
|
||||||
|
catch (RecognitionException re) {
|
||||||
|
ErrorManager.fatalInternalError("bad grammar AST structure", re);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCurrentRuleName(String name) {
|
||||||
|
this.currentRule = g.getRule(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* start->ruleblock->end */
|
||||||
|
public Handle rule(GrammarAST ruleAST, String name, Handle blk) {
|
||||||
|
Rule r = g.getRule(name);
|
||||||
|
RuleStartState start = atn.ruleToStartState.get(r);
|
||||||
|
epsilon(start, blk.left);
|
||||||
|
RuleStopState stop = atn.ruleToStopState.get(r);
|
||||||
|
epsilon(blk.right, stop);
|
||||||
|
Handle h = new Handle(start, stop);
|
||||||
|
// FASerializer ser = new FASerializer(g, h.left);
|
||||||
|
// System.out.println(ruleAST.toStringTree()+":\n"+ser);
|
||||||
|
ruleAST.atnState = start;
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** From label A build Graph o-A->o */
|
||||||
|
public Handle tokenRef(TerminalAST node) {
|
||||||
|
ATNState left = newState(node);
|
||||||
|
ATNState right = newState(node);
|
||||||
|
int ttype = g.getTokenType(node.getText());
|
||||||
|
left.transition = new AtomTransition(ttype, right);
|
||||||
|
right.incidentTransition = left.transition;
|
||||||
|
node.atnState = left;
|
||||||
|
return new Handle(left, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** From set build single edge graph o->o-set->o. To conform to
|
||||||
|
* what an alt block looks like, must have extra state on left.
|
||||||
|
*/
|
||||||
|
public Handle set(IntervalSet set, GrammarAST associatedAST) {
|
||||||
|
ATNState left = newState(associatedAST);
|
||||||
|
ATNState right = newState(associatedAST);
|
||||||
|
left.transition = new SetTransition(associatedAST, set, right);
|
||||||
|
right.incidentTransition = left.transition;
|
||||||
|
associatedAST.atnState = left;
|
||||||
|
return new Handle(left, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Handle tree(List<Handle> els) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Not valid for non-lexers */
|
||||||
|
public Handle range(GrammarAST a, GrammarAST b) { throw new UnsupportedOperationException(); }
|
||||||
|
|
||||||
|
/** ~atom only */
|
||||||
|
public Handle not(GrammarAST node) {
|
||||||
|
ATNState left = newState(node);
|
||||||
|
ATNState right = newState(node);
|
||||||
|
int ttype = getTokenType((GrammarAST) node.getChild(0));
|
||||||
|
left.transition = new NotAtomTransition(ttype, right);
|
||||||
|
right.incidentTransition = left.transition;
|
||||||
|
node.atnState = left;
|
||||||
|
return new Handle(left, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected int getTokenType(GrammarAST atom) {
|
||||||
|
int ttype;
|
||||||
|
if ( g.isLexer() ) {
|
||||||
|
ttype = CharSupport.getCharValueFromGrammarCharLiteral(atom.getText());
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ttype = g.getTokenType(atom.getText());
|
||||||
|
}
|
||||||
|
return ttype;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** For a non-lexer, just build a simple token reference atom. */
|
||||||
|
public Handle stringLiteral(TerminalAST stringLiteralAST) {
|
||||||
|
return tokenRef(stringLiteralAST);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** For reference to rule r, build
|
||||||
|
*
|
||||||
|
* o->(r) o
|
||||||
|
*
|
||||||
|
* where (r) is the start of rule r and the trailing o is not linked
|
||||||
|
* to from rule ref state directly (uses followState).
|
||||||
|
*/
|
||||||
|
public Handle ruleRef(GrammarAST node) {
|
||||||
|
Handle h = _ruleRef(node);
|
||||||
|
Rule r = g.getRule(node.getText());
|
||||||
|
addFollowLink(r, h.right);
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Handle _ruleRef(GrammarAST node) {
|
||||||
|
Rule r = g.getRule(node.getText());
|
||||||
|
RuleStartState start = atn.ruleToStartState.get(r);
|
||||||
|
ATNState left = newState(node);
|
||||||
|
ATNState right = newState(node);
|
||||||
|
RuleTransition call = new RuleTransition(r, start, right);
|
||||||
|
left.addTransition(call);
|
||||||
|
|
||||||
|
node.atnState = left;
|
||||||
|
return new Handle(left, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addFollowLink(Rule r, ATNState right) {
|
||||||
|
// add follow edge from end of invoked rule
|
||||||
|
RuleStopState stop = atn.ruleToStopState.get(r);
|
||||||
|
epsilon(stop, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** From an empty alternative build o-e->o */
|
||||||
|
public Handle epsilon(GrammarAST node) {
|
||||||
|
ATNState left = newState(node);
|
||||||
|
ATNState right = newState(node);
|
||||||
|
epsilon(left, right);
|
||||||
|
node.atnState = left;
|
||||||
|
return new Handle(left, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Build what amounts to an epsilon transition with a semantic
|
||||||
|
* predicate action. The pred is a pointer into the AST of
|
||||||
|
* the SEMPRED token.
|
||||||
|
*/
|
||||||
|
public Handle sempred(PredAST pred) {
|
||||||
|
//System.out.println("sempred: "+ pred);
|
||||||
|
ATNState left = newState(pred);
|
||||||
|
ATNState right = newState(pred);
|
||||||
|
PredicateTransition p = new PredicateTransition(pred, right);
|
||||||
|
p.ruleIndex = currentRule.index;
|
||||||
|
p.predIndex = g.sempreds.get(pred);
|
||||||
|
left.transition = p;
|
||||||
|
pred.atnState = left;
|
||||||
|
return new Handle(left, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Handle gated_sempred(GrammarAST pred) {
|
||||||
|
ATNState left = newState(pred);
|
||||||
|
ATNState right = newState(pred);
|
||||||
|
left.transition = new PredicateTransition(pred, right);
|
||||||
|
pred.atnState = left;
|
||||||
|
return new Handle(left, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Build what amounts to an epsilon transition with an action.
|
||||||
|
* The action goes into ATN though it is ignored during analysis.
|
||||||
|
* It slows things down a bit, but I must ignore predicates after
|
||||||
|
* having seen an action (5-5-2008).
|
||||||
|
*/
|
||||||
|
public Handle action(ActionAST action) {
|
||||||
|
//System.out.println("action: "+action);
|
||||||
|
ATNState left = newState(action);
|
||||||
|
ATNState right = newState(action);
|
||||||
|
ActionTransition a = new ActionTransition(action, right);
|
||||||
|
a.ruleIndex = currentRule.index;
|
||||||
|
if ( action.getType()==ANTLRParser.FORCED_ACTION ) {
|
||||||
|
a.actionIndex = g.actions.get(action);
|
||||||
|
}
|
||||||
|
left.transition = a;
|
||||||
|
action.atnState = left;
|
||||||
|
return new Handle(left, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** From A|B|..|Z alternative block build
|
||||||
|
*
|
||||||
|
* o->o-A->o->o (last ATNState is BlockEndState pointed to by all alts)
|
||||||
|
* | ^
|
||||||
|
* |->o-B->o--|
|
||||||
|
* | |
|
||||||
|
* ... |
|
||||||
|
* | |
|
||||||
|
* |->o-Z->o--|
|
||||||
|
*
|
||||||
|
* So start node points at every alternative with epsilon transition
|
||||||
|
* and every alt right side points at a block end ATNState.
|
||||||
|
*
|
||||||
|
* Special case: only one alternative: don't make a block with alt
|
||||||
|
* begin/end.
|
||||||
|
*
|
||||||
|
* Special case: if just a list of tokens/chars/sets, then collapse
|
||||||
|
* to a single edge'd o-set->o graph.
|
||||||
|
*
|
||||||
|
* TODO: Set alt number (1..n) in the states?
|
||||||
|
*/
|
||||||
|
public Handle block(BlockAST blkAST, GrammarAST ebnfRoot, List<Handle> alts) {
|
||||||
|
if ( ebnfRoot==null ) {
|
||||||
|
if ( alts.size()==1 ) {
|
||||||
|
Handle h = alts.get(0);
|
||||||
|
blkAST.atnState = h.left;
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
BlockStartState start = (BlockStartState)newState(BlockStartState.class, blkAST);
|
||||||
|
return makeBlock(start, blkAST, alts);
|
||||||
|
}
|
||||||
|
switch ( ebnfRoot.getType() ) {
|
||||||
|
case ANTLRParser.OPTIONAL :
|
||||||
|
BlockStartState start = (BlockStartState)newState(BlockStartState.class, blkAST);
|
||||||
|
Handle h = makeBlock(start, blkAST, alts);
|
||||||
|
return optional(ebnfRoot, h);
|
||||||
|
case ANTLRParser.CLOSURE :
|
||||||
|
BlockStartState star = (StarBlockStartState)newState(StarBlockStartState.class, ebnfRoot);
|
||||||
|
h = makeBlock(star, blkAST, alts);
|
||||||
|
return star(ebnfRoot, h);
|
||||||
|
case ANTLRParser.POSITIVE_CLOSURE :
|
||||||
|
PlusBlockStartState plus = (PlusBlockStartState)newState(PlusBlockStartState.class, ebnfRoot);
|
||||||
|
h = makeBlock(plus, blkAST, alts);
|
||||||
|
return plus(ebnfRoot, h);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Handle makeBlock(BlockStartState start, GrammarAST blkAST, List<Handle> alts) {
|
||||||
|
BlockEndState end = (BlockEndState)newState(BlockEndState.class, blkAST);
|
||||||
|
start.endState = end;
|
||||||
|
for (Handle alt : alts) {
|
||||||
|
epsilon(start, alt.left);
|
||||||
|
epsilon(alt.right, end);
|
||||||
|
}
|
||||||
|
atn.defineDecisionState(start);
|
||||||
|
Handle h = new Handle(start, end);
|
||||||
|
// FASerializer ser = new FASerializer(g, h.left);
|
||||||
|
// System.out.println(blkAST.toStringTree()+":\n"+ser);
|
||||||
|
blkAST.atnState = start;
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Handle notBlock(GrammarAST notAST, List<GrammarAST> terminals) {
|
||||||
|
// assume list of atoms
|
||||||
|
IntervalSet notSet = new IntervalSet();
|
||||||
|
for (GrammarAST elemAST : terminals) {
|
||||||
|
if ( elemAST.getType()==ANTLRParser.RANGE ) {
|
||||||
|
GrammarAST from = (GrammarAST)elemAST.getChild(0);
|
||||||
|
GrammarAST to = (GrammarAST)elemAST.getChild(1);
|
||||||
|
notSet.add(getTokenType(from), getTokenType(to));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
notSet.add(getTokenType(elemAST));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ATNState left = newState(notAST);
|
||||||
|
ATNState right = newState(notAST);
|
||||||
|
left.transition = new NotSetTransition(notAST, notSet, right);
|
||||||
|
right.incidentTransition = left.transition;
|
||||||
|
notAST.atnState = left;
|
||||||
|
return new Handle(left, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Handle alt(List<Handle> els) {
|
||||||
|
Handle prev = null;
|
||||||
|
for (Handle el : els) { // hook up elements
|
||||||
|
if ( prev!=null ) epsilon(prev.right, el.left);
|
||||||
|
prev = el;
|
||||||
|
}
|
||||||
|
Handle first = els.get(0);
|
||||||
|
Handle last = els.get(els.size()-1);
|
||||||
|
if ( first==null || last==null ) {
|
||||||
|
g.tool.errMgr.toolError(ErrorType.INTERNAL_ERROR, "alt Handle has first|last == null");
|
||||||
|
}
|
||||||
|
return new Handle(first.left, last.right);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** From (A)? build either:
|
||||||
|
*
|
||||||
|
* o--A->o
|
||||||
|
* | ^
|
||||||
|
* o---->|
|
||||||
|
*
|
||||||
|
* or, if A is a block, just add an empty alt to the end of the block
|
||||||
|
*/
|
||||||
|
public Handle optional(GrammarAST optAST, Handle blk) {
|
||||||
|
// TODO: no such thing as nongreedy ()? so give error
|
||||||
|
BlockStartState blkStart = (BlockStartState)blk.left;
|
||||||
|
epsilon(blkStart, blk.right);
|
||||||
|
optAST.atnState = blk.left;
|
||||||
|
return blk;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** From (blk)+ build
|
||||||
|
*
|
||||||
|
* |---------|
|
||||||
|
* v |
|
||||||
|
* o->o-A-o->o->o->o loop back points at start of all alts
|
||||||
|
* | ^
|
||||||
|
* |->o-B-o--|
|
||||||
|
*
|
||||||
|
* Meaning that the last ATNState in A blk points to loop back node,
|
||||||
|
* which points back to block start. We add start/end nodes to
|
||||||
|
* outside.
|
||||||
|
*/
|
||||||
|
public Handle plus(GrammarAST plusAST, Handle blk) {
|
||||||
|
PlusBlockStartState start = (PlusBlockStartState)blk.left;
|
||||||
|
atn.defineDecisionState(start); // we don't use in code gen though
|
||||||
|
plusAST.atnState = start;
|
||||||
|
PlusLoopbackState loop = (PlusLoopbackState)newState(PlusLoopbackState.class, plusAST);
|
||||||
|
ATNState end = (ATNState)newState(ATNState.class, plusAST);
|
||||||
|
start.loopBackState = loop;
|
||||||
|
epsilon(blk.right, loop);
|
||||||
|
BlockAST blkAST = (BlockAST)plusAST.getChild(0);
|
||||||
|
// if not greedy, priority to exit branch; make it first
|
||||||
|
if ( !isGreedy(blkAST) ) epsilon(loop, end);
|
||||||
|
// connect loop back to all alt left edges
|
||||||
|
for (Transition trans : start.transitions) {
|
||||||
|
epsilon(loop, trans.target);
|
||||||
|
}
|
||||||
|
// if greedy, last alt of decisions is exit branch
|
||||||
|
if ( isGreedy(blkAST) ) epsilon(loop, end);
|
||||||
|
atn.defineDecisionState(loop);
|
||||||
|
return new Handle(start, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** From (blk)* build
|
||||||
|
*
|
||||||
|
* |----------|
|
||||||
|
* v |
|
||||||
|
* o-[blk]-o->o o
|
||||||
|
* | ^
|
||||||
|
* o-------------| (optional branch is nth alt of StarBlockStartState)
|
||||||
|
*
|
||||||
|
* There 1 decision point in a A*.
|
||||||
|
*
|
||||||
|
* Note that the optional bypass must jump outside the loop as (A|B)* is
|
||||||
|
* not the same thing as (A|B|)+.
|
||||||
|
*/
|
||||||
|
public Handle star(GrammarAST starAST, Handle elem) {
|
||||||
|
BlockAST blkAST = (BlockAST)starAST.getChild(0);
|
||||||
|
|
||||||
|
StarBlockStartState blkStart = (StarBlockStartState)elem.left;
|
||||||
|
BlockEndState blkEnd = (BlockEndState)elem.right;
|
||||||
|
|
||||||
|
StarLoopbackState loop = (StarLoopbackState)newState(StarLoopbackState.class, starAST);
|
||||||
|
ATNState end = (ATNState)newState(ATNState.class, starAST);
|
||||||
|
// If greedy, exit alt is last, else exit is first
|
||||||
|
if ( isGreedy(blkAST) ) {
|
||||||
|
epsilon(blkStart, end); // bypass edge
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
blkStart.addTransitionFirst(new EpsilonTransition(end));
|
||||||
|
}
|
||||||
|
epsilon(loop, blkStart);
|
||||||
|
epsilon(blkEnd, loop);
|
||||||
|
starAST.atnState = blkStart;
|
||||||
|
return new Handle(blkStart, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Build an atom with all possible values in its label */
|
||||||
|
public Handle wildcard(GrammarAST node) {
|
||||||
|
ATNState left = newState(node);
|
||||||
|
ATNState right = newState(node);
|
||||||
|
int ttype = g.getTokenType(node.getText());
|
||||||
|
left.transition = new WildcardTransition(right);
|
||||||
|
right.incidentTransition = left.transition;
|
||||||
|
node.atnState = left;
|
||||||
|
return new Handle(left, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Build a subrule matching ^(. .*) (any tree or node). Let's use
|
||||||
|
* (^(. .+) | .) to be safe.
|
||||||
|
*/
|
||||||
|
public Handle wildcardTree(GrammarAST associatedAST) { throw new UnsupportedOperationException(); }
|
||||||
|
|
||||||
|
void epsilon(ATNState a, ATNState b) {
|
||||||
|
if ( a!=null ) a.addTransition(new EpsilonTransition(b));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Define all the rule begin/end ATNStates to solve forward reference
|
||||||
|
* issues.
|
||||||
|
*/
|
||||||
|
void createRuleStartAndStopATNStates() {
|
||||||
|
for (Rule r : g.rules.values()) {
|
||||||
|
RuleStartState start = (RuleStartState)newState(RuleStartState.class, r.ast);
|
||||||
|
RuleStopState stop = (RuleStopState)newState(RuleStopState.class, r.ast);
|
||||||
|
start.stopState = stop;
|
||||||
|
start.setRule(r);
|
||||||
|
stop.setRule(r);
|
||||||
|
atn.ruleToStartState.put(r, start);
|
||||||
|
atn.rules.add(start);
|
||||||
|
atn.ruleToStopState.put(r, stop);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** add an EOF transition to any rule end ATNState that points to nothing
|
||||||
|
* (i.e., for all those rules not invoked by another rule). These
|
||||||
|
* are start symbols then.
|
||||||
|
*
|
||||||
|
* Return the number of grammar entry points; i.e., how many rules are
|
||||||
|
* not invoked by another rule (they can only be invoked from outside).
|
||||||
|
* These are the start rules.
|
||||||
|
*/
|
||||||
|
public int addEOFTransitionToStartRules() {
|
||||||
|
int n = 0;
|
||||||
|
ATNState eofTarget = newState(null); // one unique EOF target for all rules
|
||||||
|
for (Rule r : g.rules.values()) {
|
||||||
|
ATNState stop = atn.ruleToStopState.get(r);
|
||||||
|
if ( stop.getNumberOfTransitions()>0 ) continue;
|
||||||
|
n++;
|
||||||
|
Transition t = new AtomTransition(Token.EOF, eofTarget);
|
||||||
|
stop.addTransition(t);
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Handle label(Handle t) {
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Handle listLabel(Handle t) {
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ATNState newState(Class nodeType, GrammarAST node) {
|
||||||
|
try {
|
||||||
|
Constructor ctor = nodeType.getConstructor();
|
||||||
|
ATNState s = (ATNState)ctor.newInstance();
|
||||||
|
s.ast = node;
|
||||||
|
s.setRule(currentRule);
|
||||||
|
atn.addState(s);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
ErrorManager.internalError("can't create ATN node: "+nodeType.getName(), e);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ATNState newState(GrammarAST node) {
|
||||||
|
ATNState n = new ATNState();
|
||||||
|
n.setRule(currentRule);
|
||||||
|
n.ast = node;
|
||||||
|
atn.addState(n);
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ATNState newState() { return newState(null); }
|
||||||
|
|
||||||
|
public boolean isGreedy(BlockAST blkAST) {
|
||||||
|
boolean greedy = true;
|
||||||
|
String greedyOption = blkAST.getOption("greedy");
|
||||||
|
if ( blockHasWildcardAlt(blkAST) || greedyOption!=null&&greedyOption.equals("false") ) {
|
||||||
|
greedy = false;
|
||||||
|
}
|
||||||
|
return greedy;
|
||||||
|
}
|
||||||
|
|
||||||
|
// (BLOCK (ALT .)) or (BLOCK (ALT 'a') (ALT .))
|
||||||
|
public static boolean blockHasWildcardAlt(GrammarAST block) {
|
||||||
|
for (Object alt : block.getChildren()) {
|
||||||
|
if ( !(alt instanceof AltAST) ) continue;
|
||||||
|
AltAST altAST = (AltAST)alt;
|
||||||
|
if ( altAST.getChildCount()==1 ) {
|
||||||
|
Tree e = altAST.getChild(0);
|
||||||
|
if ( e.getType()==ANTLRParser.WILDCARD ) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,129 @@
|
||||||
|
package org.antlr.v4.misc;
|
||||||
|
|
||||||
|
import org.antlr.v4.tool.Grammar;
|
||||||
|
|
||||||
|
/** */
|
||||||
|
public class CharSupport {
|
||||||
|
/** When converting ANTLR char and string literals, here is the
|
||||||
|
* value set of escape chars.
|
||||||
|
*/
|
||||||
|
public static int ANTLRLiteralEscapedCharValue[] = new int[255];
|
||||||
|
|
||||||
|
/** Given a char, we need to be able to show as an ANTLR literal.
|
||||||
|
*/
|
||||||
|
public static String ANTLRLiteralCharValueEscape[] = new String[255];
|
||||||
|
|
||||||
|
static {
|
||||||
|
ANTLRLiteralEscapedCharValue['n'] = '\n';
|
||||||
|
ANTLRLiteralEscapedCharValue['r'] = '\r';
|
||||||
|
ANTLRLiteralEscapedCharValue['t'] = '\t';
|
||||||
|
ANTLRLiteralEscapedCharValue['b'] = '\b';
|
||||||
|
ANTLRLiteralEscapedCharValue['f'] = '\f';
|
||||||
|
ANTLRLiteralEscapedCharValue['\\'] = '\\';
|
||||||
|
ANTLRLiteralEscapedCharValue['\''] = '\'';
|
||||||
|
ANTLRLiteralEscapedCharValue['"'] = '"';
|
||||||
|
ANTLRLiteralCharValueEscape['\n'] = "\\n";
|
||||||
|
ANTLRLiteralCharValueEscape['\r'] = "\\r";
|
||||||
|
ANTLRLiteralCharValueEscape['\t'] = "\\t";
|
||||||
|
ANTLRLiteralCharValueEscape['\b'] = "\\b";
|
||||||
|
ANTLRLiteralCharValueEscape['\f'] = "\\f";
|
||||||
|
ANTLRLiteralCharValueEscape['\\'] = "\\\\";
|
||||||
|
ANTLRLiteralCharValueEscape['\''] = "\\'";
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return a string representing the escaped char for code c. E.g., If c
|
||||||
|
* has value 0x100, you will get "\u0100". ASCII gets the usual
|
||||||
|
* char (non-hex) representation. Control characters are spit out
|
||||||
|
* as unicode. While this is specially set up for returning Java strings,
|
||||||
|
* it can be used by any language target that has the same syntax. :)
|
||||||
|
*/
|
||||||
|
public static String getANTLRCharLiteralForChar(int c) {
|
||||||
|
if ( c< Grammar.MIN_CHAR_VALUE ) {
|
||||||
|
return "'<INVALID>'";
|
||||||
|
}
|
||||||
|
if ( c<ANTLRLiteralCharValueEscape.length && ANTLRLiteralCharValueEscape[c]!=null ) {
|
||||||
|
return '\''+ANTLRLiteralCharValueEscape[c]+'\'';
|
||||||
|
}
|
||||||
|
if ( Character.UnicodeBlock.of((char)c)==Character.UnicodeBlock.BASIC_LATIN &&
|
||||||
|
!Character.isISOControl((char)c) ) {
|
||||||
|
if ( c=='\\' ) {
|
||||||
|
return "'\\\\'";
|
||||||
|
}
|
||||||
|
if ( c=='\'') {
|
||||||
|
return "'\\''";
|
||||||
|
}
|
||||||
|
return '\''+Character.toString((char)c)+'\'';
|
||||||
|
}
|
||||||
|
// turn on the bit above max "\uFFFF" value so that we pad with zeros
|
||||||
|
// then only take last 4 digits
|
||||||
|
String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5);
|
||||||
|
String unicodeStr = "'\\u"+hex+"'";
|
||||||
|
return unicodeStr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Given a literal like (the 3 char sequence with single quotes) 'a',
|
||||||
|
* return the int value of 'a'. Convert escape sequences here also.
|
||||||
|
*/
|
||||||
|
public static int getCharValueFromGrammarCharLiteral(String literal) {
|
||||||
|
switch ( literal.length() ) {
|
||||||
|
case 3 :
|
||||||
|
// 'x'
|
||||||
|
return literal.charAt(1); // no escape char
|
||||||
|
case 4 :
|
||||||
|
// '\x' (antlr lexer will catch invalid char)
|
||||||
|
if ( Character.isDigit(literal.charAt(2)) ) {
|
||||||
|
// ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
|
||||||
|
// "invalid char literal: "+literal);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
int escChar = literal.charAt(2);
|
||||||
|
int charVal = ANTLRLiteralEscapedCharValue[escChar];
|
||||||
|
if ( charVal==0 ) {
|
||||||
|
// Unnecessary escapes like '\{' should just yield {
|
||||||
|
return escChar;
|
||||||
|
}
|
||||||
|
return charVal;
|
||||||
|
case 8 :
|
||||||
|
// '\u1234'
|
||||||
|
String unicodeChars = literal.substring(3,literal.length()-1);
|
||||||
|
return Integer.parseInt(unicodeChars, 16);
|
||||||
|
default :
|
||||||
|
// ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
|
||||||
|
// "invalid char literal: "+literal);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String getStringFromGrammarStringLiteral(String literal) {
|
||||||
|
StringBuilder buf = new StringBuilder();
|
||||||
|
int n = literal.length();
|
||||||
|
int i = 1; // skip first quote
|
||||||
|
while ( i < (n-1) ) { // scan all but last quote
|
||||||
|
switch ( literal.charAt(i) ) {
|
||||||
|
case '\\' :
|
||||||
|
i++;
|
||||||
|
if ( literal.charAt(i)=='u' ) { // '\u1234'
|
||||||
|
i++;
|
||||||
|
String unicodeChars = literal.substring(i,i+4);
|
||||||
|
int h = Integer.parseInt(unicodeChars, 16);
|
||||||
|
buf.append((char)h);
|
||||||
|
i += 4;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
char escChar = literal.charAt(i);
|
||||||
|
int charVal = ANTLRLiteralEscapedCharValue[escChar];
|
||||||
|
if ( charVal==0 ) buf.append(escChar); // Unnecessary escapes like '\{' should just yield {
|
||||||
|
else buf.append((char)charVal);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default :
|
||||||
|
buf.append(literal.charAt(i));
|
||||||
|
i++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return buf.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,55 @@
|
||||||
|
package org.antlr.v4.misc;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/** Sometimes we need to map a key to a value but key is two pieces of data.
|
||||||
|
* This nested hash table saves creating a single key each time we access
|
||||||
|
* map; avoids mem creation.
|
||||||
|
*/
|
||||||
|
public class DoubleKeyMap<Key1, Key2, Value> {
|
||||||
|
Map<Key1, Map<Key2, Value>> data = new LinkedHashMap<Key1, Map<Key2, Value>>();
|
||||||
|
|
||||||
|
public Value put(Key1 k1, Key2 k2, Value v) {
|
||||||
|
Map<Key2, Value> data2 = data.get(k1);
|
||||||
|
Value prev = null;
|
||||||
|
if ( data2==null ) {
|
||||||
|
data2 = new LinkedHashMap<Key2, Value>();
|
||||||
|
data.put(k1, data2);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
prev = data2.get(k2);
|
||||||
|
}
|
||||||
|
data2.put(k2, v);
|
||||||
|
return prev;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Value get(Key1 k1, Key2 k2) {
|
||||||
|
Map<Key2, Value> data2 = data.get(k1);
|
||||||
|
if ( data2==null ) return null;
|
||||||
|
return data2.get(k2);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<Key2, Value> get(Key1 k1) { return data.get(k1); }
|
||||||
|
|
||||||
|
/** Get all values associated with primary key */
|
||||||
|
public Collection<Value> values(Key1 k1) {
|
||||||
|
Map<Key2, Value> data2 = data.get(k1);
|
||||||
|
if ( data2==null ) return null;
|
||||||
|
return data2.values();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** get all primary keys */
|
||||||
|
public Set<Key1> keySet() {
|
||||||
|
return data.keySet();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** get all secondary keys associated with a primary key */
|
||||||
|
public Set<Key2> keySet(Key1 k1) {
|
||||||
|
Map<Key2, Value> data2 = data.get(k1);
|
||||||
|
if ( data2==null ) return null;
|
||||||
|
return data2.keySet();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,142 @@
|
||||||
|
/*
|
||||||
|
[The "BSD license"]
|
||||||
|
Copyright (c) 2005-2009 Terence Parr
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
3. The name of the author may not be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||||
|
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||||
|
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||||
|
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||||
|
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
package org.antlr.v4.misc;
|
||||||
|
|
||||||
|
/** An immutable inclusive interval a..b */
|
||||||
|
public class Interval {
|
||||||
|
public static final int INTERVAL_POOL_MAX_VALUE = 1000;
|
||||||
|
|
||||||
|
static Interval[] cache = new Interval[INTERVAL_POOL_MAX_VALUE+1];
|
||||||
|
|
||||||
|
public int a;
|
||||||
|
public int b;
|
||||||
|
|
||||||
|
public static int creates = 0;
|
||||||
|
public static int misses = 0;
|
||||||
|
public static int hits = 0;
|
||||||
|
public static int outOfRange = 0;
|
||||||
|
|
||||||
|
public Interval(int a, int b) { this.a=a; this.b=b; }
|
||||||
|
|
||||||
|
/** Interval objects are used readonly so share all with the
|
||||||
|
* same single value a==b up to some max size. Use an array as a perfect hash.
|
||||||
|
* Return shared object for 0..INTERVAL_POOL_MAX_VALUE or a new
|
||||||
|
* Interval object with a..a in it. On Java.g, 218623 IntervalSets
|
||||||
|
* have a..a (set with 1 element).
|
||||||
|
*/
|
||||||
|
public static Interval create(int a, int b) {
|
||||||
|
//return new Interval(a,b);
|
||||||
|
// cache just a..a
|
||||||
|
if ( a!=b || a<0 || a>INTERVAL_POOL_MAX_VALUE ) {
|
||||||
|
return new Interval(a,b);
|
||||||
|
}
|
||||||
|
if ( cache[a]==null ) {
|
||||||
|
cache[a] = new Interval(a,a);
|
||||||
|
}
|
||||||
|
return cache[a];
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if ( o==null ) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
Interval other = (Interval)o;
|
||||||
|
return this.a==other.a && this.b==other.b;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Does this start completely before other? Disjoint */
|
||||||
|
public boolean startsBeforeDisjoint(Interval other) {
|
||||||
|
return this.a<other.a && this.b<other.a;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Does this start at or before other? Nondisjoint */
|
||||||
|
public boolean startsBeforeNonDisjoint(Interval other) {
|
||||||
|
return this.a<=other.a && this.b>=other.a;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Does this.a start after other.b? May or may not be disjoint */
|
||||||
|
public boolean startsAfter(Interval other) { return this.a>other.a; }
|
||||||
|
|
||||||
|
/** Does this start completely after other? Disjoint */
|
||||||
|
public boolean startsAfterDisjoint(Interval other) {
|
||||||
|
return this.a>other.b;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Does this start after other? NonDisjoint */
|
||||||
|
public boolean startsAfterNonDisjoint(Interval other) {
|
||||||
|
return this.a>other.a && this.a<=other.b; // this.b>=other.b implied
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Are both ranges disjoint? I.e., no overlap? */
|
||||||
|
public boolean disjoint(Interval other) {
|
||||||
|
return startsBeforeDisjoint(other) || startsAfterDisjoint(other);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Are two intervals adjacent such as 0..41 and 42..42? */
|
||||||
|
public boolean adjacent(Interval other) {
|
||||||
|
return this.a == other.b+1 || this.b == other.a-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean properlyContains(Interval other) {
|
||||||
|
return other.a >= this.a && other.b <= this.b;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return the interval computed from combining this and other */
|
||||||
|
public Interval union(Interval other) {
|
||||||
|
return Interval.create(Math.min(a,other.a), Math.max(b,other.b));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return the interval in common between this and o */
|
||||||
|
public Interval intersection(Interval other) {
|
||||||
|
return Interval.create(Math.max(a,other.a), Math.min(b,other.b));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return the interval with elements from this not in other;
|
||||||
|
* other must not be totally enclosed (properly contained)
|
||||||
|
* within this, which would result in two disjoint intervals
|
||||||
|
* instead of the single one returned by this method.
|
||||||
|
*/
|
||||||
|
public Interval differenceNotProperlyContained(Interval other) {
|
||||||
|
Interval diff = null;
|
||||||
|
// other.a to left of this.a (or same)
|
||||||
|
if ( other.startsBeforeNonDisjoint(this) ) {
|
||||||
|
diff = Interval.create(Math.max(this.a,other.b+1),
|
||||||
|
this.b);
|
||||||
|
}
|
||||||
|
|
||||||
|
// other.a to right of this.a
|
||||||
|
else if ( other.startsAfterNonDisjoint(this) ) {
|
||||||
|
diff = Interval.create(this.a, other.a-1);
|
||||||
|
}
|
||||||
|
return diff;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return a+".."+b;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,536 @@
|
||||||
|
/*
|
||||||
|
[The "BSD license"]
|
||||||
|
Copyright (c) 2005-2009 Terence Parr
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
3. The name of the author may not be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||||
|
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||||
|
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||||
|
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||||
|
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
package org.antlr.v4.misc;
|
||||||
|
|
||||||
|
import org.antlr.v4.runtime.Token;
|
||||||
|
import org.antlr.v4.tool.Grammar;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/** A set of integers that relies on ranges being common to do
|
||||||
|
* "run-length-encoded" like compression (if you view an IntSet like
|
||||||
|
* a BitSet with runs of 0s and 1s). Only ranges are recorded so that
|
||||||
|
* a few ints up near value 1000 don't cause massive bitsets, just two
|
||||||
|
* integer intervals.
|
||||||
|
*
|
||||||
|
* element values may be negative. Useful for sets of EPSILON and EOF.
|
||||||
|
*
|
||||||
|
* 0..9 char range is index pair ['\u0030','\u0039'].
|
||||||
|
* Multiple ranges are encoded with multiple index pairs. Isolated
|
||||||
|
* elements are encoded with an index pair where both intervals are the same.
|
||||||
|
*
|
||||||
|
* The ranges are ordered and disjoint so that 2..6 appears before 101..103.
|
||||||
|
*/
|
||||||
|
public class IntervalSet implements IntSet {
|
||||||
|
public static final IntervalSet COMPLETE_SET = IntervalSet.of(0, Grammar.MAX_CHAR_VALUE);
|
||||||
|
public static final IntervalSet EMPTY_SET = new IntervalSet();
|
||||||
|
|
||||||
|
/** The list of sorted, disjoint intervals. */
|
||||||
|
protected List<Interval> intervals;
|
||||||
|
|
||||||
|
/** Create a set with no elements */
|
||||||
|
public IntervalSet() {
|
||||||
|
intervals = new ArrayList<Interval>(2); // most sets are 1 or 2 elements
|
||||||
|
}
|
||||||
|
|
||||||
|
public IntervalSet(List<Interval> intervals) {
|
||||||
|
this.intervals = intervals;
|
||||||
|
}
|
||||||
|
|
||||||
|
public IntervalSet(IntervalSet set) {
|
||||||
|
this();
|
||||||
|
addAll(set);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Create a set with a single element, el. */
|
||||||
|
public static IntervalSet of(int a) {
|
||||||
|
IntervalSet s = new IntervalSet();
|
||||||
|
s.add(a);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Create a set with all ints within range [a..b] (inclusive) */
|
||||||
|
public static IntervalSet of(int a, int b) {
|
||||||
|
IntervalSet s = new IntervalSet();
|
||||||
|
s.add(a,b);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void clear() {
|
||||||
|
intervals.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Add a single element to the set. An isolated element is stored
|
||||||
|
* as a range el..el.
|
||||||
|
*/
|
||||||
|
public void add(int el) {
|
||||||
|
add(el,el);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Add interval; i.e., add all integers from a to b to set.
|
||||||
|
* If b<a, do nothing.
|
||||||
|
* Keep list in sorted order (by left range value).
|
||||||
|
* If overlap, combine ranges. For example,
|
||||||
|
* If this is {1..5, 10..20}, adding 6..7 yields
|
||||||
|
* {1..5, 6..7, 10..20}. Adding 4..8 yields {1..8, 10..20}.
|
||||||
|
*/
|
||||||
|
public void add(int a, int b) {
|
||||||
|
add(Interval.create(a,b));
|
||||||
|
}
|
||||||
|
|
||||||
|
// copy on write so we can cache a..a intervals and sets of that
|
||||||
|
protected void add(Interval addition) {
|
||||||
|
//System.out.println("add "+addition+" to "+intervals.toString());
|
||||||
|
if ( addition.b<addition.a ) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// find position in list
|
||||||
|
// Use iterators as we modify list in place
|
||||||
|
for (ListIterator iter = intervals.listIterator(); iter.hasNext();) {
|
||||||
|
Interval r = (Interval) iter.next();
|
||||||
|
if ( addition.equals(r) ) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if ( addition.adjacent(r) || !addition.disjoint(r) ) {
|
||||||
|
// next to each other, make a single larger interval
|
||||||
|
Interval bigger = addition.union(r);
|
||||||
|
iter.set(bigger);
|
||||||
|
// make sure we didn't just create an interval that
|
||||||
|
// should be merged with next interval in list
|
||||||
|
if ( iter.hasNext() ) {
|
||||||
|
Interval next = (Interval) iter.next();
|
||||||
|
if ( bigger.adjacent(next)||!bigger.disjoint(next) ) {
|
||||||
|
// if we bump up against or overlap next, merge
|
||||||
|
iter.remove(); // remove this one
|
||||||
|
iter.previous(); // move backwards to what we just set
|
||||||
|
iter.set(bigger.union(next)); // set to 3 merged ones
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if ( addition.startsBeforeDisjoint(r) ) {
|
||||||
|
// insert before r
|
||||||
|
iter.previous();
|
||||||
|
iter.add(addition);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// if disjoint and after r, a future iteration will handle it
|
||||||
|
}
|
||||||
|
// ok, must be after last interval (and disjoint from last interval)
|
||||||
|
// just add it
|
||||||
|
intervals.add(addition);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** combine all sets in the array returned the or'd value */
|
||||||
|
public static IntervalSet or(IntervalSet[] sets) {
|
||||||
|
IntervalSet r = new IntervalSet();
|
||||||
|
for (IntervalSet s : sets) r.addAll(s);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
public IntSet addAll(IntSet set) {
|
||||||
|
if ( set==null ) {
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
if ( !(set instanceof IntervalSet) ) {
|
||||||
|
throw new IllegalArgumentException("can't add non IntSet ("+
|
||||||
|
set.getClass().getName()+
|
||||||
|
") to IntervalSet");
|
||||||
|
}
|
||||||
|
IntervalSet other = (IntervalSet)set;
|
||||||
|
// walk set and add each interval
|
||||||
|
int n = other.intervals.size();
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
Interval I = (Interval) other.intervals.get(i);
|
||||||
|
this.add(I.a,I.b);
|
||||||
|
}
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public IntSet complement(int minElement, int maxElement) {
|
||||||
|
return this.complement(IntervalSet.of(minElement,maxElement));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Given the set of possible values (rather than, say UNICODE or MAXINT),
|
||||||
|
* return a new set containing all elements in vocabulary, but not in
|
||||||
|
* this. The computation is (vocabulary - this).
|
||||||
|
*
|
||||||
|
* 'this' is assumed to be either a subset or equal to vocabulary.
|
||||||
|
*/
|
||||||
|
public IntSet complement(IntSet vocabulary) {
|
||||||
|
if ( vocabulary==null ) {
|
||||||
|
return null; // nothing in common with null set
|
||||||
|
}
|
||||||
|
if ( !(vocabulary instanceof IntervalSet ) ) {
|
||||||
|
throw new IllegalArgumentException("can't complement with non IntervalSet ("+
|
||||||
|
vocabulary.getClass().getName()+")");
|
||||||
|
}
|
||||||
|
IntervalSet vocabularyIS = ((IntervalSet)vocabulary);
|
||||||
|
int maxElement = vocabularyIS.getMaxElement();
|
||||||
|
|
||||||
|
IntervalSet compl = new IntervalSet();
|
||||||
|
int n = intervals.size();
|
||||||
|
if ( n ==0 ) {
|
||||||
|
return compl;
|
||||||
|
}
|
||||||
|
Interval first = (Interval)intervals.get(0);
|
||||||
|
// add a range from 0 to first.a constrained to vocab
|
||||||
|
if ( first.a > 0 ) {
|
||||||
|
IntervalSet s = IntervalSet.of(0, first.a-1);
|
||||||
|
IntervalSet a = (IntervalSet)s.and(vocabularyIS);
|
||||||
|
compl.addAll(a);
|
||||||
|
}
|
||||||
|
for (int i=1; i<n; i++) { // from 2nd interval .. nth
|
||||||
|
Interval previous = (Interval)intervals.get(i-1);
|
||||||
|
Interval current = (Interval)intervals.get(i);
|
||||||
|
IntervalSet s = IntervalSet.of(previous.b+1, current.a-1);
|
||||||
|
IntervalSet a = (IntervalSet)s.and(vocabularyIS);
|
||||||
|
compl.addAll(a);
|
||||||
|
}
|
||||||
|
Interval last = (Interval)intervals.get(n -1);
|
||||||
|
// add a range from last.b to maxElement constrained to vocab
|
||||||
|
if ( last.b < maxElement ) {
|
||||||
|
IntervalSet s = IntervalSet.of(last.b+1, maxElement);
|
||||||
|
IntervalSet a = (IntervalSet)s.and(vocabularyIS);
|
||||||
|
compl.addAll(a);
|
||||||
|
}
|
||||||
|
return compl;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Compute this-other via this&~other.
|
||||||
|
* Return a new set containing all elements in this but not in other.
|
||||||
|
* other is assumed to be a subset of this;
|
||||||
|
* anything that is in other but not in this will be ignored.
|
||||||
|
*/
|
||||||
|
public IntSet subtract(IntSet other) {
|
||||||
|
// assume the whole unicode range here for the complement
|
||||||
|
// because it doesn't matter. Anything beyond the max of this' set
|
||||||
|
// will be ignored since we are doing this & ~other. The intersection
|
||||||
|
// will be empty. The only problem would be when this' set max value
|
||||||
|
// goes beyond MAX_CHAR_VALUE, but hopefully the constant MAX_CHAR_VALUE
|
||||||
|
// will prevent this.
|
||||||
|
return this.and(((IntervalSet)other).complement(COMPLETE_SET));
|
||||||
|
}
|
||||||
|
|
||||||
|
public IntSet or(IntSet a) {
|
||||||
|
IntervalSet o = new IntervalSet();
|
||||||
|
o.addAll(this);
|
||||||
|
o.addAll(a);
|
||||||
|
return o;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return a new set with the intersection of this set with other. Because
|
||||||
|
* the intervals are sorted, we can use an iterator for each list and
|
||||||
|
* just walk them together. This is roughly O(min(n,m)) for interval
|
||||||
|
* list lengths n and m.
|
||||||
|
*/
|
||||||
|
public IntSet and(IntSet other) {
|
||||||
|
if ( other==null ) { //|| !(other instanceof IntervalSet) ) {
|
||||||
|
return null; // nothing in common with null set
|
||||||
|
}
|
||||||
|
|
||||||
|
ArrayList myIntervals = (ArrayList)this.intervals;
|
||||||
|
ArrayList theirIntervals = (ArrayList)((IntervalSet)other).intervals;
|
||||||
|
IntervalSet intersection = null;
|
||||||
|
int mySize = myIntervals.size();
|
||||||
|
int theirSize = theirIntervals.size();
|
||||||
|
int i = 0;
|
||||||
|
int j = 0;
|
||||||
|
// iterate down both interval lists looking for nondisjoint intervals
|
||||||
|
while ( i<mySize && j<theirSize ) {
|
||||||
|
Interval mine = (Interval)myIntervals.get(i);
|
||||||
|
Interval theirs = (Interval)theirIntervals.get(j);
|
||||||
|
//System.out.println("mine="+mine+" and theirs="+theirs);
|
||||||
|
if ( mine.startsBeforeDisjoint(theirs) ) {
|
||||||
|
// move this iterator looking for interval that might overlap
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
else if ( theirs.startsBeforeDisjoint(mine) ) {
|
||||||
|
// move other iterator looking for interval that might overlap
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
else if ( mine.properlyContains(theirs) ) {
|
||||||
|
// overlap, add intersection, get next theirs
|
||||||
|
if ( intersection==null ) {
|
||||||
|
intersection = new IntervalSet();
|
||||||
|
}
|
||||||
|
intersection.add(mine.intersection(theirs));
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
else if ( theirs.properlyContains(mine) ) {
|
||||||
|
// overlap, add intersection, get next mine
|
||||||
|
if ( intersection==null ) {
|
||||||
|
intersection = new IntervalSet();
|
||||||
|
}
|
||||||
|
intersection.add(mine.intersection(theirs));
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
else if ( !mine.disjoint(theirs) ) {
|
||||||
|
// overlap, add intersection
|
||||||
|
if ( intersection==null ) {
|
||||||
|
intersection = new IntervalSet();
|
||||||
|
}
|
||||||
|
intersection.add(mine.intersection(theirs));
|
||||||
|
// Move the iterator of lower range [a..b], but not
|
||||||
|
// the upper range as it may contain elements that will collide
|
||||||
|
// with the next iterator. So, if mine=[0..115] and
|
||||||
|
// theirs=[115..200], then intersection is 115 and move mine
|
||||||
|
// but not theirs as theirs may collide with the next range
|
||||||
|
// in thisIter.
|
||||||
|
// move both iterators to next ranges
|
||||||
|
if ( mine.startsAfterNonDisjoint(theirs) ) {
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
else if ( theirs.startsAfterNonDisjoint(mine) ) {
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ( intersection==null ) {
|
||||||
|
return new IntervalSet();
|
||||||
|
}
|
||||||
|
return intersection;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Is el in any range of this set? */
|
||||||
|
public boolean member(int el) {
|
||||||
|
int n = intervals.size();
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
Interval I = (Interval) intervals.get(i);
|
||||||
|
int a = I.a;
|
||||||
|
int b = I.b;
|
||||||
|
if ( el<a ) {
|
||||||
|
break; // list is sorted and el is before this interval; not here
|
||||||
|
}
|
||||||
|
if ( el>=a && el<=b ) {
|
||||||
|
return true; // found in this interval
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
/*
|
||||||
|
for (ListIterator iter = intervals.listIterator(); iter.hasNext();) {
|
||||||
|
Interval I = (Interval) iter.next();
|
||||||
|
if ( el<I.a ) {
|
||||||
|
break; // list is sorted and el is before this interval; not here
|
||||||
|
}
|
||||||
|
if ( el>=I.a && el<=I.b ) {
|
||||||
|
return true; // found in this interval
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
/** return true if this set has no members */
|
||||||
|
public boolean isNil() {
|
||||||
|
return intervals==null || intervals.size()==0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** If this set is a single integer, return it otherwise Token.INVALID_TYPE */
|
||||||
|
public int getSingleElement() {
|
||||||
|
if ( intervals!=null && intervals.size()==1 ) {
|
||||||
|
Interval I = (Interval)intervals.get(0);
|
||||||
|
if ( I.a == I.b ) {
|
||||||
|
return I.a;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Token.INVALID_TYPE;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getMaxElement() {
|
||||||
|
if ( isNil() ) {
|
||||||
|
return Token.INVALID_TYPE;
|
||||||
|
}
|
||||||
|
Interval last = (Interval)intervals.get(intervals.size()-1);
|
||||||
|
return last.b;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return minimum element >= 0 */
|
||||||
|
public int getMinElement() {
|
||||||
|
if ( isNil() ) {
|
||||||
|
return Token.INVALID_TYPE;
|
||||||
|
}
|
||||||
|
int n = intervals.size();
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
Interval I = (Interval) intervals.get(i);
|
||||||
|
int a = I.a;
|
||||||
|
int b = I.b;
|
||||||
|
for (int v=a; v<=b; v++) {
|
||||||
|
if ( v>=0 ) return v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Token.INVALID_TYPE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return a list of Interval objects. */
|
||||||
|
public List<Interval> getIntervals() {
|
||||||
|
return intervals;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
if ( isNil() ) return 0;
|
||||||
|
int n = 0;
|
||||||
|
// just add left edge of intervals
|
||||||
|
for (Interval I : intervals) n += I.a;
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Are two IntervalSets equal? Because all intervals are sorted
|
||||||
|
* and disjoint, equals is a simple linear walk over both lists
|
||||||
|
* to make sure they are the same. Interval.equals() is used
|
||||||
|
* by the List.equals() method to check the ranges.
|
||||||
|
*/
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
if ( obj==null || !(obj instanceof IntervalSet) ) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
IntervalSet other = (IntervalSet)obj;
|
||||||
|
return this.intervals.equals(other.intervals);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return toString((Grammar)null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString(Grammar g) {
|
||||||
|
StringBuffer buf = new StringBuffer();
|
||||||
|
if ( this.intervals==null || this.intervals.size()==0 ) {
|
||||||
|
return "{}";
|
||||||
|
}
|
||||||
|
if ( this.size()>1 ) {
|
||||||
|
buf.append("{");
|
||||||
|
}
|
||||||
|
Iterator iter = this.intervals.iterator();
|
||||||
|
while (iter.hasNext()) {
|
||||||
|
Interval I = (Interval) iter.next();
|
||||||
|
int a = I.a;
|
||||||
|
int b = I.b;
|
||||||
|
if ( a==b ) {
|
||||||
|
if ( g!=null ) {
|
||||||
|
buf.append(g.getTokenDisplayName(a));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
buf.append(a);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if ( g!=null ) {
|
||||||
|
if ( !g.isLexer() ) {
|
||||||
|
for (int i=a; i<=b; i++) {
|
||||||
|
if ( i>a ) buf.append(", ");
|
||||||
|
buf.append(g.getTokenDisplayName(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
buf.append(g.getTokenDisplayName(a)+".."+g.getTokenDisplayName(b));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
buf.append(a+".."+b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ( iter.hasNext() ) {
|
||||||
|
buf.append(", ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ( this.size()>1 ) {
|
||||||
|
buf.append("}");
|
||||||
|
}
|
||||||
|
return buf.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int size() {
|
||||||
|
int n = 0;
|
||||||
|
int numIntervals = intervals.size();
|
||||||
|
if ( numIntervals==1 ) {
|
||||||
|
Interval firstInterval = this.intervals.get(0);
|
||||||
|
return firstInterval.b-firstInterval.a+1;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < numIntervals; i++) {
|
||||||
|
Interval I = (Interval) intervals.get(i);
|
||||||
|
n += (I.b-I.a+1);
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Integer> toList() {
|
||||||
|
List<Integer> values = new ArrayList<Integer>();
|
||||||
|
int n = intervals.size();
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
Interval I = (Interval) intervals.get(i);
|
||||||
|
int a = I.a;
|
||||||
|
int b = I.b;
|
||||||
|
for (int v=a; v<=b; v++) {
|
||||||
|
values.add(Utils.integer(v));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return values;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Get the ith element of ordered set. Used only by RandomPhrase so
|
||||||
|
* don't bother to implement if you're not doing that for a new
|
||||||
|
* ANTLR code gen target.
|
||||||
|
*/
|
||||||
|
public int get(int i) {
|
||||||
|
int n = intervals.size();
|
||||||
|
int index = 0;
|
||||||
|
for (int j = 0; j < n; j++) {
|
||||||
|
Interval I = (Interval) intervals.get(j);
|
||||||
|
int a = I.a;
|
||||||
|
int b = I.b;
|
||||||
|
for (int v=a; v<=b; v++) {
|
||||||
|
if ( index==i ) {
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int[] toArray() {
|
||||||
|
int[] values = new int[size()];
|
||||||
|
int n = intervals.size();
|
||||||
|
int j = 0;
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
Interval I = (Interval) intervals.get(i);
|
||||||
|
int a = I.a;
|
||||||
|
int b = I.b;
|
||||||
|
for (int v=a; v<=b; v++) {
|
||||||
|
values[j] = v;
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return values;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void remove(int el) {
|
||||||
|
throw new NoSuchMethodError("IntervalSet.remove() unimplemented");
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,30 @@
|
||||||
|
package org.antlr.v4.misc;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/** I need the get-element-i functionality so I'm subclassing
|
||||||
|
* LinkedHashMap.
|
||||||
|
*/
|
||||||
|
public class OrderedHashMap<K,V> extends LinkedHashMap<K,V> {
|
||||||
|
/** Track the elements as they are added to the set */
|
||||||
|
protected List<K> elements = new ArrayList<K>();
|
||||||
|
|
||||||
|
public K getKey(int i) { return elements.get(i); }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public V put(K key, V value) {
|
||||||
|
elements.add(key);
|
||||||
|
return super.put(key, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public V remove(Object key) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void clear() {
|
||||||
|
elements.clear();
|
||||||
|
super.clear();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,88 @@
|
||||||
|
package org.antlr.v4.misc;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/** A HashMap that remembers the order that the elements were added.
|
||||||
|
* You can alter the ith element with set(i,value) too :) Unique list.
|
||||||
|
* I need the replace/set-element-i functionality so I'm subclassing
|
||||||
|
* OrderedHashSet.
|
||||||
|
*/
|
||||||
|
public class OrderedHashSet<T> extends LinkedHashSet<T> {
|
||||||
|
/** Track the elements as they are added to the set */
|
||||||
|
protected List<T> elements = new ArrayList<T>();
|
||||||
|
|
||||||
|
public T get(int i) {
|
||||||
|
return elements.get(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Replace an existing value with a new value; updates the element
|
||||||
|
* list and the hash table, but not the key as that has not changed.
|
||||||
|
*/
|
||||||
|
public T set(int i, T value) {
|
||||||
|
T oldElement = elements.get(i);
|
||||||
|
elements.set(i,value); // update list
|
||||||
|
super.remove(oldElement); // now update the set: remove/add
|
||||||
|
super.add(value);
|
||||||
|
return oldElement;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean remove(int i) {
|
||||||
|
T o = elements.remove(i);
|
||||||
|
return super.remove(o);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Add a value to list; keep in hashtable for consistency also;
|
||||||
|
* Key is object itself. Good for say asking if a certain string is in
|
||||||
|
* a list of strings.
|
||||||
|
*/
|
||||||
|
public boolean add(T value) {
|
||||||
|
boolean result = super.add(value);
|
||||||
|
if ( result ) { // only track if new element not in set
|
||||||
|
elements.add(value);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean remove(Object o) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void clear() {
|
||||||
|
elements.clear();
|
||||||
|
super.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return elements.hashCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
// System.out.print("equals " + this + ", " + o+" = ");
|
||||||
|
boolean same = elements!=null && elements.equals(((OrderedHashSet)o).elements);
|
||||||
|
// System.out.println(same);
|
||||||
|
return same;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Iterator<T> iterator() {
|
||||||
|
return elements.iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return the List holding list of table elements. Note that you are
|
||||||
|
* NOT getting a copy so don't write to the list.
|
||||||
|
*/
|
||||||
|
public List<T> elements() {
|
||||||
|
return elements;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object[] toArray() {
|
||||||
|
return elements.toArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return elements.toString();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,714 @@
|
||||||
|
// File : A3Lexer.g
|
||||||
|
// Author : Jim Idle (jimi@temporal-wave.com)
|
||||||
|
// Copyright : Free BSD - See @header clause below
|
||||||
|
// Version : First implemented as part of ANTLR 3.2 this is the self
|
||||||
|
// hosting ANTLR 3 Lexer.
|
||||||
|
//
|
||||||
|
// Description
|
||||||
|
// -----------
|
||||||
|
// This is the definitive lexer grammar for parsing ANTLR V3.x.x grammars. All other
|
||||||
|
// gramnmars are derived from this grammar via source code control integration (perforce)
|
||||||
|
// or by the gdiff tool.
|
||||||
|
//
|
||||||
|
// This grammar and its associated grmmmars A3Parser.g and A3Walker.g exhibit the following
|
||||||
|
// traits, which are recommended for all production quality grammars:
|
||||||
|
//
|
||||||
|
// 1) They are separate grammars, not composite grammars;
|
||||||
|
// 2) They implement all supporting methods in a superclass (at least this is recommended
|
||||||
|
// for language targets that support inheritence;
|
||||||
|
// 3) All errors are pushed as far down the parsing chain as possible, which means
|
||||||
|
// that the lexer tries to defer error reporting to the parser, and the parser
|
||||||
|
// tries to defer error reporting to a semantic phase consisting of a single
|
||||||
|
// walk of the AST. The reason for this is that the error messages produced
|
||||||
|
// from later phases of the parse will generally have better context and so
|
||||||
|
// be more useful to the end user. Consider the message: "Syntax error at 'options'"
|
||||||
|
// vs: "You cannot specify two options{} sections in a single grammar file".
|
||||||
|
// 4) The lexer is 'programmed' to catch common mistakes such as unterminated literals
|
||||||
|
// and report them specifically and not just issue confusing lexer mismatch errors.
|
||||||
|
//
|
||||||
|
|
||||||
|
/** Read in an ANTLR grammar and build an AST. Try not to do
|
||||||
|
* any actions, just build the tree.
|
||||||
|
*
|
||||||
|
* The phases are:
|
||||||
|
*
|
||||||
|
* A3Lexer.g (this file)
|
||||||
|
* A3Parser.g
|
||||||
|
* A3Verify.g (derived from A3Walker.g)
|
||||||
|
* assign.types.g
|
||||||
|
* define.g
|
||||||
|
* buildnfa.g
|
||||||
|
* antlr.print.g (optional)
|
||||||
|
* codegen.g
|
||||||
|
*
|
||||||
|
* Terence Parr
|
||||||
|
* University of San Francisco
|
||||||
|
* 2005
|
||||||
|
* Jim Idle (this v3 grammar)
|
||||||
|
* Temporal Wave LLC
|
||||||
|
* 2009
|
||||||
|
*/
|
||||||
|
lexer grammar ANTLRLexer;
|
||||||
|
|
||||||
|
// ==============================================================================
|
||||||
|
// Note that while this grammar does not care about order of constructs
|
||||||
|
// that don't really matter, such as options before @header etc, it must first
|
||||||
|
// be parsed by the original v2 parser, before it replaces it. That parser does
|
||||||
|
// care about order of structures. Hence we are constrained by the v2 parser
|
||||||
|
// for at least the first bootstrap release that causes this parser to replace
|
||||||
|
// the v2 version.
|
||||||
|
// ==============================================================================
|
||||||
|
|
||||||
|
// -------
|
||||||
|
// Options
|
||||||
|
//
|
||||||
|
// V3 option directives to tell the tool what we are asking of it for this
|
||||||
|
// grammar.
|
||||||
|
//
|
||||||
|
options {
|
||||||
|
|
||||||
|
// Target language is Java, which is the default but being specific
|
||||||
|
// here as this grammar is also meant as a good example grammar for
|
||||||
|
// for users.
|
||||||
|
//
|
||||||
|
language = Java;
|
||||||
|
|
||||||
|
// The super class that this lexer should expect to inherit from, and
|
||||||
|
// which contains any and all support routines for the lexer. This is
|
||||||
|
// commented out in this baseline (definitive or normative grammar)
|
||||||
|
// - see the ANTLR tool implementation for hints on how to use the super
|
||||||
|
// class
|
||||||
|
//
|
||||||
|
//superclass = AbstractA3Lexer;
|
||||||
|
}
|
||||||
|
|
||||||
|
tokens { SEMPRED; FORCED_ACTION; }
|
||||||
|
|
||||||
|
// Include the copyright in this source and also the generated source
|
||||||
|
//
|
||||||
|
@lexer::header {
|
||||||
|
/*
|
||||||
|
[The "BSD licence"]
|
||||||
|
Copyright (c) 2005-2009 Terence Parr
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
3. The name of the author may not be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||||
|
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||||
|
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||||
|
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||||
|
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
package org.antlr.v4.parse;
|
||||||
|
}
|
||||||
|
|
||||||
|
// +=====================+
|
||||||
|
// | Lexer specification |
|
||||||
|
// +=====================+
|
||||||
|
|
||||||
|
// --------
|
||||||
|
// Comments
|
||||||
|
//
|
||||||
|
// ANTLR comments can be multi or single line and we don't care
|
||||||
|
// which particularly. However we also accept Javadoc style comments
|
||||||
|
// of the form: /** ... */ and we do take care to distinguish those
|
||||||
|
// from ordinary multi-line comments
|
||||||
|
// Note how we guide the lexical PATH because we want to issue a decriptive
|
||||||
|
// error message in case of a standalone '/' character, which makes no
|
||||||
|
// sense in ANTLR source code. We alo trap unterminated multi-line comments
|
||||||
|
//
|
||||||
|
fragment DOC_COMMENT : ;
|
||||||
|
COMMENT
|
||||||
|
@init {
|
||||||
|
|
||||||
|
// Record the start line and offsets as if we need to report an
|
||||||
|
// unterminated comment, then we want to show the start of the comment
|
||||||
|
// we think is broken, not the end, where people will have to try and work
|
||||||
|
// it out themselves.
|
||||||
|
//
|
||||||
|
int startLine = $line;
|
||||||
|
int offset = getCharPositionInLine();
|
||||||
|
}
|
||||||
|
: // Eat the first character only, then see if we have a comment
|
||||||
|
// or something silly.
|
||||||
|
//
|
||||||
|
'/' // Comment introducer
|
||||||
|
|
||||||
|
(
|
||||||
|
// Single line comment, possibly with embedded src/line directives
|
||||||
|
// in a similar style to the C pre-processor, allowing generated
|
||||||
|
// code to refer the programmer back to the original source code
|
||||||
|
// in case of error.
|
||||||
|
//
|
||||||
|
'/'
|
||||||
|
(
|
||||||
|
(' $ANTLR')=> ' $ANTLR' SRC
|
||||||
|
| ~(NLCHARS)*
|
||||||
|
)
|
||||||
|
|
||||||
|
| // Multi-line comment, which may be a documentation comment
|
||||||
|
// if it starts /** (note that we protect against accidentaly
|
||||||
|
// recognizing a comment /**/ as a documentation comment
|
||||||
|
//
|
||||||
|
'*' (
|
||||||
|
{ input.LA(2) != '/'}?=> '*' { $type = DOC_COMMENT; }
|
||||||
|
| { true }?=> // Required to cover all alts with predicates
|
||||||
|
)
|
||||||
|
|
||||||
|
// Should we support embedded multiline comments here?
|
||||||
|
//
|
||||||
|
(
|
||||||
|
// Pick out end of multiline comment and exit the loop
|
||||||
|
// if we find it.
|
||||||
|
//
|
||||||
|
{ !(input.LA(1) == '*' && input.LA(2) == '/') }?
|
||||||
|
|
||||||
|
// Anything else other than the non-greedy match of
|
||||||
|
// the comment close sequence
|
||||||
|
//
|
||||||
|
.
|
||||||
|
)*
|
||||||
|
(
|
||||||
|
// Look for the comment terminator, but if it is accidentally
|
||||||
|
// unterminated, then we will hit EOF, which will trigger the
|
||||||
|
// epsilon alt and hence we can issue an error message relative
|
||||||
|
// to the start of the unterminated multi-line comment
|
||||||
|
//
|
||||||
|
'*/'
|
||||||
|
|
||||||
|
| // Unterminated comment!
|
||||||
|
//
|
||||||
|
{
|
||||||
|
// ErrorManager.msg(Msg.UNTERMINATED_DOC_COMMENT, startLine, offset, $pos, startLine, offset, $pos, (Object)null);
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
| // There was nothing that made sense following the opening '/' and so
|
||||||
|
// we issue an error regarding the malformed comment
|
||||||
|
//
|
||||||
|
{
|
||||||
|
// TODO: Insert error message relative to comment start
|
||||||
|
//
|
||||||
|
}
|
||||||
|
)
|
||||||
|
{
|
||||||
|
// Unless we had a documentation comment, then we do not wish to
|
||||||
|
// pass the comments in to the parser. If you are writing a formatter
|
||||||
|
// then you will want to preserve the comments off channel, but could
|
||||||
|
// just skip and save token space if not.
|
||||||
|
//
|
||||||
|
if ($type != DOC_COMMENT) {
|
||||||
|
|
||||||
|
$channel=2; // Comments are on channel 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
DOUBLE_QUOTE_STRING_LITERAL
|
||||||
|
: '"' (('\\')=>'\\' . | ~'"' )* '"'
|
||||||
|
;
|
||||||
|
|
||||||
|
DOUBLE_ANGLE_STRING_LITERAL
|
||||||
|
: '<<' (options {greedy=false;} : . )* '>>'
|
||||||
|
;
|
||||||
|
|
||||||
|
// --------------
|
||||||
|
// Argument specs
|
||||||
|
//
|
||||||
|
// Certain argument lists, such as those specifying call parameters
|
||||||
|
// to a rule invocation, or input parameters to a rule specification
|
||||||
|
// are contained within square brackets. In the lexer we consume them
|
||||||
|
// all at once and sort them out later in the grammar analysis.
|
||||||
|
//
|
||||||
|
ARG_ACTION
|
||||||
|
@init
|
||||||
|
{
|
||||||
|
StringBuffer theText = new StringBuffer();
|
||||||
|
}
|
||||||
|
: '['
|
||||||
|
(
|
||||||
|
('\\')=>'\\'
|
||||||
|
(
|
||||||
|
(']')=>']'
|
||||||
|
{
|
||||||
|
// We do not include the \ character itself when picking up an escaped ]
|
||||||
|
//
|
||||||
|
theText.append(']');
|
||||||
|
}
|
||||||
|
| c=.
|
||||||
|
{
|
||||||
|
// We DO include the \ character when finding any other escape
|
||||||
|
//
|
||||||
|
theText.append('\\');
|
||||||
|
theText.append((char)$c);
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
| ('"')=>as=ACTION_STRING_LITERAL
|
||||||
|
{
|
||||||
|
// Append the embedded string literal test
|
||||||
|
//
|
||||||
|
theText.append($as.text);
|
||||||
|
}
|
||||||
|
|
||||||
|
| ('\'')=>ac=ACTION_CHAR_LITERAL
|
||||||
|
{
|
||||||
|
// Append the embedded chracter literal text
|
||||||
|
//
|
||||||
|
theText.append($ac.text);
|
||||||
|
}
|
||||||
|
|
||||||
|
| c=~']'
|
||||||
|
{
|
||||||
|
// Whatever else we found in the scan
|
||||||
|
//
|
||||||
|
theText.append((char)$c);
|
||||||
|
}
|
||||||
|
)*
|
||||||
|
|
||||||
|
']'
|
||||||
|
{
|
||||||
|
// Set the token text to our gathered string
|
||||||
|
//
|
||||||
|
setText(theText.toString());
|
||||||
|
}
|
||||||
|
;
|
||||||
|
|
||||||
|
// -------
|
||||||
|
// Actions
|
||||||
|
//
|
||||||
|
// Other than making sure to distinguish between { and } embedded
|
||||||
|
// within what we have assumed to be literals in the action code, the
|
||||||
|
// job of the lexer is merely to gather the code within the action
|
||||||
|
// (delimited by {}) and pass it to the parser as a single token.
|
||||||
|
// Note the special case of the {{ }} action, which is a forced
|
||||||
|
// action, that the generated code will execute regardless of
|
||||||
|
// backtracking (predicate) level.
|
||||||
|
// We know that this token will be asked for its text somewhere
|
||||||
|
// in the upcoming parse, so setting the text here to exclude
|
||||||
|
// the delimiting {} is no additional overhead.
|
||||||
|
//
|
||||||
|
ACTION
|
||||||
|
: NESTED_ACTION ('?' {$type = SEMPRED;} )?
|
||||||
|
{
|
||||||
|
// Note that because of the sempred detection above, we
|
||||||
|
// will not see {{ action }}? as a forced action, but as a semantic
|
||||||
|
// predicate.
|
||||||
|
if ( $text.startsWith("{{") && $text.endsWith("}}") ) {
|
||||||
|
// Switch types to a forced action
|
||||||
|
$type = FORCED_ACTION;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
;
|
||||||
|
|
||||||
|
// ----------------
|
||||||
|
// Action structure
|
||||||
|
//
|
||||||
|
// Many language targets use {} as block delimiters and so we
|
||||||
|
// must recursively match {} delimited blocks to balance the
|
||||||
|
// braces. Additionally, we must make some assumptions about
|
||||||
|
// literal string representation in the target language. We assume
|
||||||
|
// that they are delimited by ' or " and so consume these
|
||||||
|
// in their own alts so as not to inadvertantly match {}.
|
||||||
|
// This rule calls itself on matching a {
|
||||||
|
//
|
||||||
|
fragment
|
||||||
|
NESTED_ACTION
|
||||||
|
@init {
|
||||||
|
|
||||||
|
// Record the start line and offsets as if we need to report an
|
||||||
|
// unterminated block, then we want to show the start of the comment
|
||||||
|
// we think is broken, not the end, where people will have to try and work
|
||||||
|
// it out themselves.
|
||||||
|
//
|
||||||
|
int startLine = getLine();
|
||||||
|
int offset = getCharPositionInLine();
|
||||||
|
}
|
||||||
|
|
||||||
|
: // Action and other blocks start with opening {
|
||||||
|
//
|
||||||
|
'{'
|
||||||
|
(
|
||||||
|
// And now we can match one of a number of embedded
|
||||||
|
// elements within the action until we find a
|
||||||
|
// } that balances the opening {. If we do not find
|
||||||
|
// the balanced } then we will hit EOF and can issue
|
||||||
|
// an error message about the brace that we belive to
|
||||||
|
// be mismatched. This won't be foolproof but we will
|
||||||
|
// be able to at least report an error against the
|
||||||
|
// opening brace that we feel is in error and this will
|
||||||
|
// guide the user to the correction as best we can.
|
||||||
|
//
|
||||||
|
|
||||||
|
|
||||||
|
// An embedded {} block
|
||||||
|
//
|
||||||
|
NESTED_ACTION
|
||||||
|
|
||||||
|
| // What appears to be a literal
|
||||||
|
//
|
||||||
|
ACTION_CHAR_LITERAL
|
||||||
|
|
||||||
|
| // We have assumed that the target language has C/Java
|
||||||
|
// type comments.
|
||||||
|
//
|
||||||
|
COMMENT
|
||||||
|
|
||||||
|
| // What appears to be a literal
|
||||||
|
//
|
||||||
|
ACTION_STRING_LITERAL
|
||||||
|
|
||||||
|
| // What appears to be an escape sequence
|
||||||
|
//
|
||||||
|
ACTION_ESC
|
||||||
|
|
||||||
|
| // Some other single character that is not
|
||||||
|
// handled above
|
||||||
|
//
|
||||||
|
~('\\'|'"'|'\''|'/'|'{'|'}')
|
||||||
|
|
||||||
|
)*
|
||||||
|
|
||||||
|
(
|
||||||
|
// Correctly balanced closing brace
|
||||||
|
//
|
||||||
|
'}'
|
||||||
|
|
||||||
|
| // Looks like have an imblanced {} block, report
|
||||||
|
// with respect to the opening brace.
|
||||||
|
//
|
||||||
|
{
|
||||||
|
// TODO: Report imbalanced {}
|
||||||
|
System.out.println("Block starting at line " + startLine + " offset " + (offset+1) + " contains imbalanced {} or is missing a }");
|
||||||
|
}
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
// Keywords
|
||||||
|
// --------
|
||||||
|
// keywords used to specify ANTLR v3 grammars. Keywords may not be used as
|
||||||
|
// labels for rules or in any other context where they woudl be ambiguous
|
||||||
|
// with the keyword vs some other identifier
|
||||||
|
// OPTIONS and TOKENS must also consume the opening brace that captures
|
||||||
|
// their option block, as this is teh easiest way to parse it separate
|
||||||
|
// to an ACTION block, despite it usingthe same {} delimiters.
|
||||||
|
//
|
||||||
|
OPTIONS : 'options' WSNLCHARS* '{' ;
|
||||||
|
TOKENS : 'tokens' WSNLCHARS* '{' ;
|
||||||
|
|
||||||
|
SCOPE : 'scope' ;
|
||||||
|
IMPORT : 'import' ;
|
||||||
|
FRAGMENT : 'fragment' ;
|
||||||
|
LEXER : 'lexer' ;
|
||||||
|
PARSER : 'parser' ;
|
||||||
|
TREE : 'tree' ;
|
||||||
|
GRAMMAR : 'grammar' ;
|
||||||
|
PROTECTED : 'protected' ;
|
||||||
|
PUBLIC : 'public' ;
|
||||||
|
PRIVATE : 'private' ;
|
||||||
|
RETURNS : 'returns' ;
|
||||||
|
THROWS : 'throws' ;
|
||||||
|
CATCH : 'catch' ;
|
||||||
|
FINALLY : 'finally' ;
|
||||||
|
TEMPLATE : 'template' ;
|
||||||
|
MODE : 'mode' ;
|
||||||
|
|
||||||
|
// -----------
|
||||||
|
// Punctuation
|
||||||
|
//
|
||||||
|
// Character sequences used as separators, delimters, operators, etc
|
||||||
|
//
|
||||||
|
COLON : ':' ;
|
||||||
|
COLONCOLON : '::' ;
|
||||||
|
COMMA : ',' ;
|
||||||
|
SEMI : ';' ;
|
||||||
|
LPAREN : '(' ;
|
||||||
|
RPAREN : ')' ;
|
||||||
|
IMPLIES : '=>' ;
|
||||||
|
LT : '<' ;
|
||||||
|
GT : '>' ;
|
||||||
|
ASSIGN : '=' ;
|
||||||
|
QUESTION : '?' ;
|
||||||
|
BANG : '!' ;
|
||||||
|
STAR : '*' ;
|
||||||
|
PLUS : '+' ;
|
||||||
|
PLUS_ASSIGN : '+=' ;
|
||||||
|
OR : '|' ;
|
||||||
|
ROOT : '^' ;
|
||||||
|
DOLLAR : '$' ;
|
||||||
|
DOT : '.' ; // can be WILDCARD or DOT in qid or imported rule ref
|
||||||
|
RANGE : '..' ;
|
||||||
|
ETC : '...' ;
|
||||||
|
RARROW : '->' ;
|
||||||
|
TREE_BEGIN : '^(' ;
|
||||||
|
AT : '@' ;
|
||||||
|
NOT : '~' ;
|
||||||
|
RBRACE : '}' ;
|
||||||
|
|
||||||
|
// ---------------
|
||||||
|
// Token reference
|
||||||
|
//
|
||||||
|
// The names of all tokens must start with an upper case letter and so
|
||||||
|
// the lexer can distinguish them directly.
|
||||||
|
//
|
||||||
|
TOKEN_REF
|
||||||
|
: ('A'..'Z') ('A'..'Z' | 'a'..'z' | '0'..'9' | '_')*
|
||||||
|
;
|
||||||
|
|
||||||
|
// --------------
|
||||||
|
// Rule reference
|
||||||
|
//
|
||||||
|
// The names of all rules must start with a lower case letter
|
||||||
|
// so the lexer can distibguish them directly. The parser takes
|
||||||
|
// care of the case such as id=rulename
|
||||||
|
//
|
||||||
|
RULE_REF
|
||||||
|
: ('a'..'z') ('A'..'Z' | 'a'..'z' | '0'..'9' | '_')*
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
// ----------------------------
|
||||||
|
// Literals embedded in actions
|
||||||
|
//
|
||||||
|
// Note that we have made the assumption that the language used within
|
||||||
|
// actions uses the fairly standard " and ' delimiters for literals and
|
||||||
|
// that within these literals, characters are escaped using the \ character.
|
||||||
|
// There are some languages which do not conform to this in all cases, such
|
||||||
|
// as by using /string/ and so on. We will have to deal with such cases if
|
||||||
|
// if they come up in targets.
|
||||||
|
//
|
||||||
|
|
||||||
|
// Within actions, or other structures that are not part of the ANTLR
|
||||||
|
// syntax, we may encounter literal characters. Within these, we do
|
||||||
|
// not want to inadvertantly match things like '}' and so we eat them
|
||||||
|
// specifically. While this rule is called CHAR it allows for the fact that
|
||||||
|
// some languages may use/allow ' as the string delimiter.
|
||||||
|
//
|
||||||
|
fragment
|
||||||
|
ACTION_CHAR_LITERAL
|
||||||
|
: '\'' (('\\')=>ACTION_ESC | ~'\'' )* '\''
|
||||||
|
;
|
||||||
|
|
||||||
|
// Within actions, or other structures that are not part of the ANTLR
|
||||||
|
// syntax, we may encounter literal strings. Within these, we do
|
||||||
|
// not want to inadvertantly match things like '}' and so we eat them
|
||||||
|
// specifically.
|
||||||
|
//
|
||||||
|
fragment
|
||||||
|
ACTION_STRING_LITERAL
|
||||||
|
: '"' (('\\')=>ACTION_ESC | ~'"')* '"'
|
||||||
|
;
|
||||||
|
|
||||||
|
// Within literal strings and characters that are not part of the ANTLR
|
||||||
|
// syntax, we must allow for escaped character sequences so that we do not
|
||||||
|
// inadvertantly recognize the end of a string or character when the terminating
|
||||||
|
// delimiter has been esacped.
|
||||||
|
//
|
||||||
|
fragment
|
||||||
|
ACTION_ESC
|
||||||
|
: '\\' .
|
||||||
|
;
|
||||||
|
|
||||||
|
// -------
|
||||||
|
// Integer
|
||||||
|
//
|
||||||
|
// Obviously (I hope) match an aribtrary long sequence of digits.
|
||||||
|
//
|
||||||
|
INT : ('0'..'9')+
|
||||||
|
;
|
||||||
|
|
||||||
|
// -----------
|
||||||
|
// Source spec
|
||||||
|
//
|
||||||
|
// A fragment rule for picking up information about an origrinating
|
||||||
|
// file from which the grammar we are parsing has been generated. This allows
|
||||||
|
// ANTLR to report errors against the originating file and not the generated
|
||||||
|
// file.
|
||||||
|
//
|
||||||
|
fragment
|
||||||
|
SRC : 'src' WSCHARS+ file=ACTION_STRING_LITERAL WSCHARS+ line=INT
|
||||||
|
{
|
||||||
|
// TODO: Add target specific code to change the source file name and current line number
|
||||||
|
//
|
||||||
|
}
|
||||||
|
;
|
||||||
|
|
||||||
|
// --------------
|
||||||
|
// Literal string
|
||||||
|
//
|
||||||
|
// ANTLR makes no disticintion between a single character literal and a
|
||||||
|
// multi-character string. All literals are single quote delimited and
|
||||||
|
// may contain unicode escape sequences of the form \uxxxx, where x
|
||||||
|
// is a valid hexadecimal number (as per Java basically).
|
||||||
|
STRING_LITERAL
|
||||||
|
@init {
|
||||||
|
int len = 0;
|
||||||
|
}
|
||||||
|
: '\'' ( ( ESC_SEQ | ~('\\'|'\'') ) {len++;} )* '\''
|
||||||
|
;
|
||||||
|
|
||||||
|
// A valid hex digit specification
|
||||||
|
//
|
||||||
|
fragment
|
||||||
|
HEX_DIGIT : ('0'..'9'|'a'..'f'|'A'..'F') ;
|
||||||
|
|
||||||
|
// Any kind of escaped character that we can embed within ANTLR
|
||||||
|
// literal strings.
|
||||||
|
//
|
||||||
|
fragment
|
||||||
|
ESC_SEQ
|
||||||
|
: '\\'
|
||||||
|
(
|
||||||
|
// The standard escaped character set such as tab, newline,
|
||||||
|
// etc.
|
||||||
|
//
|
||||||
|
'b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\'
|
||||||
|
|
||||||
|
| // A Java style Unicode escape sequence
|
||||||
|
//
|
||||||
|
UNICODE_ESC
|
||||||
|
|
||||||
|
| // An illegal escape seqeunce
|
||||||
|
//
|
||||||
|
{
|
||||||
|
// TODO: Issue error message
|
||||||
|
//
|
||||||
|
}
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
fragment
|
||||||
|
UNICODE_ESC
|
||||||
|
|
||||||
|
@init {
|
||||||
|
|
||||||
|
// Flag to tell us whether we have a valid number of
|
||||||
|
// hex digits in the escape sequence
|
||||||
|
//
|
||||||
|
int hCount = 0;
|
||||||
|
}
|
||||||
|
: 'u' // Leadin for unicode escape sequence
|
||||||
|
|
||||||
|
// We now require 4 hex digits. Note though
|
||||||
|
// that we accept any number of characters
|
||||||
|
// and issue an error if we do not get 4. We cannot
|
||||||
|
// use an inifinite count such as + because this
|
||||||
|
// might consume too many, so we lay out the lexical
|
||||||
|
// options and issue an error at the invalid paths.
|
||||||
|
//
|
||||||
|
(
|
||||||
|
(
|
||||||
|
HEX_DIGIT { hCount++; }
|
||||||
|
(
|
||||||
|
HEX_DIGIT { hCount++; }
|
||||||
|
(
|
||||||
|
HEX_DIGIT { hCount++; }
|
||||||
|
(
|
||||||
|
// Four valid hex digits, we are good
|
||||||
|
//
|
||||||
|
HEX_DIGIT { hCount++; }
|
||||||
|
|
||||||
|
| // Three valid digits
|
||||||
|
)
|
||||||
|
|
||||||
|
| // Two valid digits
|
||||||
|
)
|
||||||
|
|
||||||
|
| // One valid digit
|
||||||
|
)
|
||||||
|
)
|
||||||
|
| // No valid hex digits at all
|
||||||
|
)
|
||||||
|
|
||||||
|
// Now check the digit count and issue an error if we need to
|
||||||
|
//
|
||||||
|
{
|
||||||
|
if (hCount != 4) {
|
||||||
|
|
||||||
|
// TODO: Issue error message
|
||||||
|
}
|
||||||
|
}
|
||||||
|
;
|
||||||
|
|
||||||
|
// ----------
|
||||||
|
// Whitespace
|
||||||
|
//
|
||||||
|
// Characters and character constructs that are of no import
|
||||||
|
// to the parser and are used to make the grammar easier to read
|
||||||
|
// for humans.
|
||||||
|
//
|
||||||
|
WS
|
||||||
|
: (
|
||||||
|
' '
|
||||||
|
| '\t'
|
||||||
|
| '\r'
|
||||||
|
| '\n'
|
||||||
|
| '\f'
|
||||||
|
)+
|
||||||
|
{
|
||||||
|
|
||||||
|
$channel=2;
|
||||||
|
}
|
||||||
|
;
|
||||||
|
|
||||||
|
// A fragment rule for use in recognizing end of line in
|
||||||
|
// rules like COMMENT.
|
||||||
|
//
|
||||||
|
fragment
|
||||||
|
NLCHARS
|
||||||
|
: '\n' | '\r'
|
||||||
|
;
|
||||||
|
|
||||||
|
// A fragment rule for recognizing traditional whitespace
|
||||||
|
// characters within lexer rules.
|
||||||
|
//
|
||||||
|
fragment
|
||||||
|
WSCHARS
|
||||||
|
: ' ' | '\t' | '\f'
|
||||||
|
;
|
||||||
|
|
||||||
|
// A fragment rule for recognizing both traditional whitespace and
|
||||||
|
// end of line markers, when we don't care to distinguish but don't
|
||||||
|
// want any action code going on.
|
||||||
|
//
|
||||||
|
fragment
|
||||||
|
WSNLCHARS
|
||||||
|
: ' ' | '\t' | '\f' | '\n' | '\r'
|
||||||
|
;
|
||||||
|
|
||||||
|
// -----------------
|
||||||
|
// Illegal Character
|
||||||
|
//
|
||||||
|
// This is an illegal character trap which is always the last rule in the
|
||||||
|
// lexer specification. It matches a single character of any value and being
|
||||||
|
// the last rule in the file will match when no other rule knows what to do
|
||||||
|
// about the character. It is reported as an error but is not passed on to the
|
||||||
|
// parser. This means that the parser to deal with the gramamr file anyway
|
||||||
|
// but we will not try to analyse or code generate from a file with lexical
|
||||||
|
// errors.
|
||||||
|
//
|
||||||
|
ERRCHAR
|
||||||
|
: .
|
||||||
|
{
|
||||||
|
// TODO: Issue error message
|
||||||
|
//
|
||||||
|
skip();
|
||||||
|
}
|
||||||
|
;
|
|
@ -0,0 +1,964 @@
|
||||||
|
/*
|
||||||
|
[The "BSD license"]
|
||||||
|
Copyright (c) 2010 Jim Idle, Terence Parr
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
3. The name of the author may not be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||||
|
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||||
|
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||||
|
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||||
|
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** The definitive ANTLR v3 grammar to parse ANTLR v4 grammars.
|
||||||
|
* The grammar builds ASTs that are sniffed by subsequent stages.
|
||||||
|
*/
|
||||||
|
parser grammar ANTLRParser;
|
||||||
|
|
||||||
|
options {
|
||||||
|
// Target language is Java, which is the default but being specific
|
||||||
|
// here as this grammar is also meant as a good example grammar for
|
||||||
|
// for users.
|
||||||
|
language = Java;
|
||||||
|
|
||||||
|
// The output of this grammar is going to be an AST upon which
|
||||||
|
// we run a semantic checking phase, then the rest of the analysis
|
||||||
|
// including final code generation.
|
||||||
|
output = AST;
|
||||||
|
|
||||||
|
// The vocabulary (tokens and their int token types) we are using
|
||||||
|
// for the parser. This is generated by the lexer. The vocab will be extended
|
||||||
|
// to include the imaginary tokens below.
|
||||||
|
tokenVocab = ANTLRLexer;
|
||||||
|
|
||||||
|
ASTLabelType = GrammarAST;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Imaginary Tokens
|
||||||
|
//
|
||||||
|
// Imaginary tokens do not exist as far as the lexer is concerned, and it cannot
|
||||||
|
// generate them. However we sometimes need additional 'tokens' to use as root
|
||||||
|
// nodes for the AST we are generating. The tokens section is where we
|
||||||
|
// specify any such tokens
|
||||||
|
tokens {
|
||||||
|
LEXER;
|
||||||
|
RULE;
|
||||||
|
RULES;
|
||||||
|
RULEMODIFIERS;
|
||||||
|
RULEACTIONS;
|
||||||
|
BLOCK;
|
||||||
|
REWRITE_BLOCK;
|
||||||
|
OPTIONAL;
|
||||||
|
CLOSURE;
|
||||||
|
POSITIVE_CLOSURE;
|
||||||
|
SYNPRED;
|
||||||
|
RANGE;
|
||||||
|
CHAR_RANGE;
|
||||||
|
EPSILON;
|
||||||
|
ALT;
|
||||||
|
ALTLIST;
|
||||||
|
ID;
|
||||||
|
ARG;
|
||||||
|
ARGLIST;
|
||||||
|
RET;
|
||||||
|
COMBINED;
|
||||||
|
INITACTION;
|
||||||
|
LABEL; // $x used in rewrite rules
|
||||||
|
TEMPLATE;
|
||||||
|
GATED_SEMPRED; // {p}? =>
|
||||||
|
SYN_SEMPRED; // (...) => it's a manually-specified synpred converted to sempred
|
||||||
|
BACKTRACK_SEMPRED; // auto backtracking mode syn pred converted to sempred
|
||||||
|
WILDCARD;
|
||||||
|
// A generic node indicating a list of something when we don't
|
||||||
|
// really need to distinguish what we have a list of as the AST
|
||||||
|
// will 'kinow' by context.
|
||||||
|
//
|
||||||
|
LIST;
|
||||||
|
ELEMENT_OPTIONS; // TOKEN<options>
|
||||||
|
ST_RESULT; // distinguish between ST and tree rewrites
|
||||||
|
RESULT;
|
||||||
|
ALT_REWRITE; // indicate ALT is rewritten
|
||||||
|
}
|
||||||
|
|
||||||
|
// Include the copyright in this source and also the generated source
|
||||||
|
//
|
||||||
|
@header {
|
||||||
|
/*
|
||||||
|
[The "BSD licence"]
|
||||||
|
Copyright (c) 2005-2009 Terence Parr
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
3. The name of the author may not be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||||
|
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||||
|
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||||
|
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||||
|
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
package org.antlr.v4.parse;
|
||||||
|
|
||||||
|
import org.antlr.v4.tool.*;
|
||||||
|
}
|
||||||
|
|
||||||
|
@members {
|
||||||
|
Stack paraphrases = new Stack();
|
||||||
|
}
|
||||||
|
|
||||||
|
// The main entry point for parsing a V3 grammar from top to toe. This is
|
||||||
|
// the method call from whence to obtain the AST for the parse.
|
||||||
|
//
|
||||||
|
grammarSpec
|
||||||
|
:
|
||||||
|
// The grammar itself can have a documenation comment, which is the
|
||||||
|
// first terminal in the file.
|
||||||
|
//
|
||||||
|
DOC_COMMENT?
|
||||||
|
|
||||||
|
// Next we should see the type and name of the grammar file that
|
||||||
|
// we are about to parse.
|
||||||
|
//
|
||||||
|
grammarType id SEMI
|
||||||
|
|
||||||
|
// There now follows zero or more declaration sections that should
|
||||||
|
// be given to us before the rules are declared
|
||||||
|
//
|
||||||
|
// A number of things can be declared/stated before the grammar rules
|
||||||
|
// 'proper' are parsed. These include grammar imports (delegate), grammar
|
||||||
|
// options, imaginary token declarations, global scope declarations,
|
||||||
|
// and actions such as @header. In this rule we allow any number of
|
||||||
|
// these constructs in any order so that the grammar author is not
|
||||||
|
// constrained by some arbitrary order of declarations that nobody
|
||||||
|
// can remember. In the next phase of the parse, we verify that these
|
||||||
|
// constructs are valid, not repeated and so on.
|
||||||
|
sync ( prequelConstruct sync )*
|
||||||
|
|
||||||
|
// We should now see at least one ANTLR EBNF style rule
|
||||||
|
// declaration. If the rules are missing we will let the
|
||||||
|
// semantic verification phase tell the user about it.
|
||||||
|
//
|
||||||
|
rules
|
||||||
|
|
||||||
|
mode*
|
||||||
|
|
||||||
|
// And we force ANTLR to process everything it finds in the input
|
||||||
|
// stream by specifying hte need to match End Of File before the
|
||||||
|
// parse is complete.
|
||||||
|
//
|
||||||
|
EOF
|
||||||
|
|
||||||
|
// Having parsed everything in the file and accumulated the relevant
|
||||||
|
// subtrees, we can now rewrite everything into the main AST form
|
||||||
|
// that our tree walkers are expecting.
|
||||||
|
//
|
||||||
|
|
||||||
|
-> ^(grammarType // The grammar type is our root AST node
|
||||||
|
id // We need to identify the grammar of course
|
||||||
|
DOC_COMMENT? // We may or may not have a global documentation comment for the file
|
||||||
|
prequelConstruct* // The set of declarations we accumulated
|
||||||
|
rules // And of course, we need the set of rules we discovered
|
||||||
|
mode*
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
grammarType
|
||||||
|
@after {
|
||||||
|
if ( $t!=null ) ((GrammarRootAST)$tree).grammarType = $t.type;
|
||||||
|
else ((GrammarRootAST)$tree).grammarType=COMBINED;
|
||||||
|
}
|
||||||
|
: ( t=LEXER g=GRAMMAR -> GRAMMAR<GrammarRootAST>[$g, "LEXER_GRAMMAR"]
|
||||||
|
| // A standalone parser specification
|
||||||
|
t=PARSER g=GRAMMAR -> GRAMMAR<GrammarRootAST>[$g, "PARSER_GRAMMAR"]
|
||||||
|
|
||||||
|
| // A standalone tree parser specification
|
||||||
|
t=TREE g=GRAMMAR -> GRAMMAR<GrammarRootAST>[$g, "TREE_GRAMMAR"]
|
||||||
|
|
||||||
|
// A combined lexer and parser specification
|
||||||
|
| g=GRAMMAR -> GRAMMAR<GrammarRootAST>[$g, "COMBINED_GRAMMAR"]
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
// This is the list of all constructs that can be declared before
|
||||||
|
// the set of rules that compose the grammar, and is invoked 0..n
|
||||||
|
// times by the grammarPrequel rule.
|
||||||
|
prequelConstruct
|
||||||
|
: // A list of options that affect analysis and/or code generation
|
||||||
|
optionsSpec
|
||||||
|
|
||||||
|
| // A list of grammars to which this grammar will delegate certain
|
||||||
|
// parts of the parsing sequence - a set of imported grammars
|
||||||
|
delegateGrammars
|
||||||
|
|
||||||
|
| // The declaration of any token types we need that are not already
|
||||||
|
// specified by a preceeding grammar, such as when a parser declares
|
||||||
|
// imaginary tokens with which to construct the AST, or a rewriting
|
||||||
|
// tree parser adds further imaginary tokens to ones defined in a prior
|
||||||
|
// {tree} parser.
|
||||||
|
tokensSpec
|
||||||
|
|
||||||
|
| // A declaration of a scope that may be used in multiple rules within
|
||||||
|
// the grammar spec, rather than being delcared and therefore associated
|
||||||
|
// with, a specific rule.
|
||||||
|
attrScope
|
||||||
|
|
||||||
|
| // A declaration of language target implemented constructs. All such
|
||||||
|
// action sections start with '@' and are given to the language target's
|
||||||
|
// StringTemplate group. For instance @parser::header and @lexer::header
|
||||||
|
// are gathered here.
|
||||||
|
action
|
||||||
|
;
|
||||||
|
|
||||||
|
// A list of options that affect analysis and/or code generation
|
||||||
|
optionsSpec
|
||||||
|
: OPTIONS (option SEMI)* RBRACE -> ^(OPTIONS[$OPTIONS, "OPTIONS"] option+)
|
||||||
|
;
|
||||||
|
|
||||||
|
option
|
||||||
|
: id ASSIGN^ optionValue
|
||||||
|
;
|
||||||
|
|
||||||
|
// ------------
|
||||||
|
// Option Value
|
||||||
|
//
|
||||||
|
// The actual value of an option - Doh!
|
||||||
|
//
|
||||||
|
optionValue
|
||||||
|
: // If the option value is a single word that conforms to the
|
||||||
|
// lexical rules of token or rule names, then the user may skip quotes
|
||||||
|
// and so on. Many option values meet this description
|
||||||
|
//
|
||||||
|
qid
|
||||||
|
|
||||||
|
| // The value is a long string
|
||||||
|
//
|
||||||
|
STRING_LITERAL<TerminalAST>
|
||||||
|
|
||||||
|
| // The value was an integer number
|
||||||
|
//
|
||||||
|
INT
|
||||||
|
|
||||||
|
| // Asterisk, used for things like k=*
|
||||||
|
//
|
||||||
|
STAR
|
||||||
|
;
|
||||||
|
|
||||||
|
// A list of grammars to which this grammar will delegate certain
|
||||||
|
// parts of the parsing sequence - a set of imported grammars
|
||||||
|
delegateGrammars
|
||||||
|
: IMPORT delegateGrammar (COMMA delegateGrammar)* SEMI -> ^(IMPORT delegateGrammar+)
|
||||||
|
;
|
||||||
|
|
||||||
|
// A possibly named grammar file that should be imported to this gramamr
|
||||||
|
// and delgated to for the rules it specifies
|
||||||
|
delegateGrammar
|
||||||
|
: id ASSIGN^ id
|
||||||
|
| id
|
||||||
|
;
|
||||||
|
|
||||||
|
/** The declaration of any token types we need that are not already
|
||||||
|
* specified by a preceeding grammar, such as when a parser declares
|
||||||
|
* imaginary tokens with which to construct the AST, or a rewriting
|
||||||
|
* tree parser adds further imaginary tokens to ones defined in a prior
|
||||||
|
* {tree} parser.
|
||||||
|
*/
|
||||||
|
tokensSpec
|
||||||
|
: TOKENS tokenSpec+ RBRACE -> ^(TOKENS tokenSpec+)
|
||||||
|
;
|
||||||
|
|
||||||
|
tokenSpec
|
||||||
|
: id
|
||||||
|
( ASSIGN STRING_LITERAL -> ^(ASSIGN id STRING_LITERAL<TerminalAST>)
|
||||||
|
| -> id
|
||||||
|
)
|
||||||
|
SEMI
|
||||||
|
| RULE_REF // INVALID! (an error alt)
|
||||||
|
;
|
||||||
|
|
||||||
|
// A declaration of a scope that may be used in multiple rules within
|
||||||
|
// the grammar spec, rather than being declared within and therefore associated
|
||||||
|
// with, a specific rule.
|
||||||
|
attrScope
|
||||||
|
: SCOPE id ACTION -> ^(SCOPE id ACTION<ActionAST>)
|
||||||
|
;
|
||||||
|
|
||||||
|
// A declaration of a language target specifc section,
|
||||||
|
// such as @header, @includes and so on. We do not verify these
|
||||||
|
// sections, they are just passed on to the language target.
|
||||||
|
/** Match stuff like @parser::members {int i;} */
|
||||||
|
action
|
||||||
|
: AT (actionScopeName COLONCOLON)? id ACTION -> ^(AT actionScopeName? id ACTION<ActionAST>)
|
||||||
|
;
|
||||||
|
|
||||||
|
/** Sometimes the scope names will collide with keywords; allow them as
|
||||||
|
* ids for action scopes.
|
||||||
|
*/
|
||||||
|
actionScopeName
|
||||||
|
: id
|
||||||
|
| LEXER -> ID[$LEXER]
|
||||||
|
| PARSER -> ID[$PARSER]
|
||||||
|
;
|
||||||
|
|
||||||
|
mode: MODE id SEMI sync (rule sync)+ -> ^(MODE id rule+) ;
|
||||||
|
|
||||||
|
rules
|
||||||
|
: sync (rule sync)*
|
||||||
|
// Rewrite with an enclosing node as this is good for counting
|
||||||
|
// the number of rules and an easy marker for the walker to detect
|
||||||
|
// that there are no rules.
|
||||||
|
->^(RULES rule*)
|
||||||
|
;
|
||||||
|
|
||||||
|
sync
|
||||||
|
@init {
|
||||||
|
BitSet followSet = computeErrorRecoverySet();
|
||||||
|
if ( input.LA(1)!=Token.EOF && !followSet.member(input.LA(1)) ) {
|
||||||
|
reportError(new NoViableAltException("",0,0,input));
|
||||||
|
beginResync();
|
||||||
|
consumeUntil(input, followSet);
|
||||||
|
endResync();
|
||||||
|
}
|
||||||
|
} :
|
||||||
|
;
|
||||||
|
|
||||||
|
// The specification of an EBNF rule in ANTLR style, with all the
|
||||||
|
// rule level parameters, declarations, actions, rewrite specs and so
|
||||||
|
// on.
|
||||||
|
//
|
||||||
|
// Note that here we allow any number of rule declaration sections (such
|
||||||
|
// as scope, returns, etc) in any order and we let the upcoming semantic
|
||||||
|
// verification of the AST determine if things are repeated or if a
|
||||||
|
// particular functional element is not valid in the context of the
|
||||||
|
// grammar type, such as using returns in lexer rules and so on.
|
||||||
|
rule
|
||||||
|
@init { paraphrases.push("matching a rule"); }
|
||||||
|
@after { paraphrases.pop(); }
|
||||||
|
: // A rule may start with an optional documentation comment
|
||||||
|
DOC_COMMENT?
|
||||||
|
|
||||||
|
// Following the documentation, we can declare a rule to be
|
||||||
|
// public, private and so on. This is only valid for some
|
||||||
|
// language targets of course but the target will ignore these
|
||||||
|
// modifiers if they make no sense in that language.
|
||||||
|
ruleModifiers?
|
||||||
|
|
||||||
|
// Next comes the rule name. Here we do not distinguish between
|
||||||
|
// parser or lexer rules, the semantic verification phase will
|
||||||
|
// reject any rules that make no sense, such as lexer rules in
|
||||||
|
// a pure parser or tree parser.
|
||||||
|
id
|
||||||
|
|
||||||
|
// Immediately following the rulename, there may be a specification
|
||||||
|
// of input parameters for the rule. We do not do anything with the
|
||||||
|
// parameters here except gather them for future phases such as
|
||||||
|
// semantic verifcation, type assignment etc. We require that
|
||||||
|
// the input parameters are the next syntactically significant element
|
||||||
|
// following the rule id.
|
||||||
|
ARG_ACTION?
|
||||||
|
|
||||||
|
ruleReturns?
|
||||||
|
|
||||||
|
// Now, before the rule specification itself, which is introduced
|
||||||
|
// with a COLON, we may have zero or more configuration sections.
|
||||||
|
// As usual we just accept anything that is syntactically valid for
|
||||||
|
// one form of the rule or another and let the semantic verification
|
||||||
|
// phase throw out anything that is invalid.
|
||||||
|
// At the rule level, a programmer may specify a number of sections, such
|
||||||
|
// as scope declarations, rule return elements, @ sections (which may be
|
||||||
|
// language target specific) and so on. We allow any number of these in any
|
||||||
|
// order here and as usual rely onthe semantic verification phase to reject
|
||||||
|
// anything invalid using its addinotal context information. Here we are
|
||||||
|
// context free and just accept anything that is a syntactically correct
|
||||||
|
// construct.
|
||||||
|
//
|
||||||
|
rulePrequels
|
||||||
|
|
||||||
|
COLON
|
||||||
|
|
||||||
|
// The rule is, at the top level, just a list of alts, with
|
||||||
|
// finer grained structure defined within the alts.
|
||||||
|
ruleBlock
|
||||||
|
|
||||||
|
SEMI
|
||||||
|
|
||||||
|
exceptionGroup
|
||||||
|
|
||||||
|
-> ^( RULE<RuleAST> id DOC_COMMENT? ruleModifiers? ARG_ACTION?
|
||||||
|
ruleReturns? rulePrequels? ruleBlock exceptionGroup*
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
// Many language targets support exceptions and the rule will
|
||||||
|
// generally be able to throw the language target equivalent
|
||||||
|
// of a recognition exception. The grammar programmar can
|
||||||
|
// specify a list of exceptions to catch or a generic catch all
|
||||||
|
// and the target language code generation template is
|
||||||
|
// responsible for generating code that makes sense.
|
||||||
|
exceptionGroup
|
||||||
|
: exceptionHandler* finallyClause?
|
||||||
|
;
|
||||||
|
|
||||||
|
// Specifies a handler for a particular type of exception
|
||||||
|
// thrown by a rule
|
||||||
|
exceptionHandler
|
||||||
|
: CATCH ARG_ACTION ACTION -> ^(CATCH ARG_ACTION ACTION<ActionAST>)
|
||||||
|
;
|
||||||
|
|
||||||
|
// Specifies a block of code to run after the rule and any
|
||||||
|
// expcetion blocks have exceuted.
|
||||||
|
finallyClause
|
||||||
|
: FINALLY ACTION -> ^(FINALLY ACTION<ActionAST>)
|
||||||
|
;
|
||||||
|
|
||||||
|
rulePrequels
|
||||||
|
@init { paraphrases.push("matching rule preamble"); }
|
||||||
|
@after { paraphrases.pop(); }
|
||||||
|
: sync (rulePrequel sync)* -> rulePrequel*
|
||||||
|
;
|
||||||
|
|
||||||
|
// An individual rule level configuration as referenced by the ruleActions
|
||||||
|
// rule above.
|
||||||
|
//
|
||||||
|
rulePrequel
|
||||||
|
: throwsSpec
|
||||||
|
| ruleScopeSpec
|
||||||
|
| optionsSpec
|
||||||
|
| ruleAction
|
||||||
|
;
|
||||||
|
|
||||||
|
// A rule can return elements that it constructs as it executes.
|
||||||
|
// The return values are specified in a 'returns' prequel element,
|
||||||
|
// which contains COMMA separated declarations, where the declaration
|
||||||
|
// is target language specific. Here we see the returns declaration
|
||||||
|
// as a single lexical action element, to be processed later.
|
||||||
|
//
|
||||||
|
ruleReturns
|
||||||
|
: RETURNS^ ARG_ACTION
|
||||||
|
;
|
||||||
|
|
||||||
|
// --------------
|
||||||
|
// Exception spec
|
||||||
|
//
|
||||||
|
// Some target languages, such as Java and C# support exceptions
|
||||||
|
// and they are specified as a prequel element for each rule that
|
||||||
|
// wishes to throw its own exception type. Note that the name of the
|
||||||
|
// exception is just a single word, so the header section of the grammar
|
||||||
|
// must specify the correct import statements (or language equivalent).
|
||||||
|
// Target languages that do not support exceptions just safely ignore
|
||||||
|
// them.
|
||||||
|
//
|
||||||
|
throwsSpec
|
||||||
|
: THROWS qid (COMMA qid)* -> ^(THROWS qid+)
|
||||||
|
;
|
||||||
|
|
||||||
|
// As well as supporting globally specifed scopes, ANTLR supports rule
|
||||||
|
// level scopes, which are tracked in a rule specific stack. Rule specific
|
||||||
|
// scopes are specified at this level, and globally specified scopes
|
||||||
|
// are merely referenced here.
|
||||||
|
ruleScopeSpec
|
||||||
|
: SCOPE ACTION -> ^(SCOPE ACTION)
|
||||||
|
| SCOPE id (COMMA id)* SEMI -> ^(SCOPE id+)
|
||||||
|
;
|
||||||
|
|
||||||
|
// @ Sections are generally target language specific things
|
||||||
|
// such as local variable declarations, code to run before the
|
||||||
|
// rule starts and so on. Fir instance most targets support the
|
||||||
|
// @init {} section where declarations and code can be placed
|
||||||
|
// to run before the rule is entered. The C target also has
|
||||||
|
// an @declarations {} section, where local variables are declared
|
||||||
|
// in order that the generated code is C89 copmliant.
|
||||||
|
//
|
||||||
|
/** Match stuff like @init {int i;} */
|
||||||
|
ruleAction
|
||||||
|
: AT id ACTION -> ^(AT id ACTION<ActionAST>)
|
||||||
|
;
|
||||||
|
|
||||||
|
// A set of access modifiers that may be applied to rule declarations
|
||||||
|
// and which may or may not mean something to the target language.
|
||||||
|
// Note that the parser allows any number of these in any order and the
|
||||||
|
// semantic pass will throw out invalid combinations.
|
||||||
|
//
|
||||||
|
ruleModifiers
|
||||||
|
: ruleModifier+ -> ^(RULEMODIFIERS ruleModifier+)
|
||||||
|
;
|
||||||
|
|
||||||
|
// An individual access modifier for a rule. The 'fragment' modifier
|
||||||
|
// is an internal indication for lexer rules that they do not match
|
||||||
|
// from the input but are like subroutines for other lexer rules to
|
||||||
|
// reuse for certain lexical patterns. The other modifiers are passed
|
||||||
|
// to the code generation templates and may be ignored by the template
|
||||||
|
// if they are of no use in that language.
|
||||||
|
ruleModifier
|
||||||
|
: PUBLIC
|
||||||
|
| PRIVATE
|
||||||
|
| PROTECTED
|
||||||
|
| FRAGMENT
|
||||||
|
;
|
||||||
|
|
||||||
|
altList
|
||||||
|
: alternative (OR alternative)* -> alternative+
|
||||||
|
;
|
||||||
|
|
||||||
|
// A set of alts, rewritten as a BLOCK for generic processing
|
||||||
|
// in tree walkers. Used by the rule 'rule' so that the list of
|
||||||
|
// alts for a rule appears as a BLOCK containing the alts and
|
||||||
|
// can be processed by the generic BLOCK rule. Note that we
|
||||||
|
// use a separate rule so that the BLOCK node has start and stop
|
||||||
|
// boundaries set correctly by rule post processing of rewrites.
|
||||||
|
ruleBlock
|
||||||
|
@init {Token colon = input.LT(-1);}
|
||||||
|
: altList -> ^(BLOCK<BlockAST>[colon,"BLOCK"] altList)
|
||||||
|
;
|
||||||
|
catch [ResyncToEndOfRuleBlock e] {
|
||||||
|
// just resyncing; ignore error
|
||||||
|
retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), null);
|
||||||
|
}
|
||||||
|
|
||||||
|
// An individual alt with an optional rewrite clause for the
|
||||||
|
// elements of the alt.
|
||||||
|
alternative
|
||||||
|
@init { paraphrases.push("matching alternative"); }
|
||||||
|
@after { paraphrases.pop(); }
|
||||||
|
: elements
|
||||||
|
( rewrite -> ^(ALT_REWRITE<AltAST> elements rewrite)
|
||||||
|
| -> elements
|
||||||
|
)
|
||||||
|
| rewrite -> ^(ALT_REWRITE<AltAST> ^(ALT<AltAST> EPSILON) rewrite) // empty alt with rewrite
|
||||||
|
| -> ^(ALT<AltAST> EPSILON) // empty alt
|
||||||
|
;
|
||||||
|
|
||||||
|
elements
|
||||||
|
: e+=element+ -> ^(ALT<AltAST> $e+)
|
||||||
|
;
|
||||||
|
|
||||||
|
element
|
||||||
|
@init {
|
||||||
|
paraphrases.push("looking for rule element");
|
||||||
|
int m = input.mark();
|
||||||
|
}
|
||||||
|
@after { paraphrases.pop(); }
|
||||||
|
: labeledElement
|
||||||
|
( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK<BlockAST>[$labeledElement.start,"BLOCK"] ^(ALT<AltAST> labeledElement ) ))
|
||||||
|
| -> labeledElement
|
||||||
|
)
|
||||||
|
| atom
|
||||||
|
( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK<BlockAST>[$atom.start,"BLOCK"] ^(ALT<AltAST> atom) ) )
|
||||||
|
| -> atom
|
||||||
|
)
|
||||||
|
| ebnf
|
||||||
|
| ACTION<ActionAST>
|
||||||
|
| FORCED_ACTION<ActionAST>
|
||||||
|
| SEMPRED
|
||||||
|
( IMPLIES -> GATED_SEMPRED[$SEMPRED]
|
||||||
|
| -> SEMPRED<PredAST>
|
||||||
|
)
|
||||||
|
| treeSpec
|
||||||
|
( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK<BlockAST>[$treeSpec.start,"BLOCK"] ^(ALT<AltAST> treeSpec ) ) )
|
||||||
|
| -> treeSpec
|
||||||
|
)
|
||||||
|
;
|
||||||
|
catch [RecognitionException re] {
|
||||||
|
retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), re);
|
||||||
|
int ttype = input.get(input.range()).getType();
|
||||||
|
// look for anything that really belongs at the start of the rule minus the initial ID
|
||||||
|
if ( ttype==COLON || ttype==RETURNS || ttype==CATCH || ttype==FINALLY || ttype==AT ) {
|
||||||
|
RecognitionException missingSemi =
|
||||||
|
new v4ParserException("unterminated rule (missing ';') detected at '"+
|
||||||
|
input.LT(1).getText()+" "+input.LT(2).getText()+"'", input);
|
||||||
|
reportError(missingSemi);
|
||||||
|
if ( ttype==CATCH || ttype==FINALLY ) {
|
||||||
|
input.seek(input.range()); // ignore what's before rule trailer stuff
|
||||||
|
}
|
||||||
|
if ( ttype==RETURNS || ttype==AT ) { // scan back looking for ID of rule header
|
||||||
|
int p = input.index();
|
||||||
|
Token t = input.get(p);
|
||||||
|
while ( t.getType()!=RULE_REF && t.getType()!=TOKEN_REF ) {
|
||||||
|
p--;
|
||||||
|
t = input.get(p);
|
||||||
|
}
|
||||||
|
input.seek(p);
|
||||||
|
}
|
||||||
|
throw new ResyncToEndOfRuleBlock(); // make sure it goes back to rule block level to recover
|
||||||
|
}
|
||||||
|
reportError(re);
|
||||||
|
recover(input,re);
|
||||||
|
/*
|
||||||
|
input.rewind(m);
|
||||||
|
final List subset = input.get(input.index(), input.range());
|
||||||
|
System.out.println("failed to match as element: '"+subset);
|
||||||
|
CommonTokenStream ns = new CommonTokenStream(
|
||||||
|
new TokenSource() {
|
||||||
|
int i = 0;
|
||||||
|
public Token nextToken() {
|
||||||
|
if ( i>=subset.size() ) return Token.EOF_TOKEN;
|
||||||
|
return (Token)subset.get(i++);
|
||||||
|
}
|
||||||
|
public String getSourceName() { return null; }
|
||||||
|
});
|
||||||
|
ANTLRParser errorParser = new ANTLRParser(ns);
|
||||||
|
errorParser.setTreeAdaptor(this.adaptor);
|
||||||
|
errorParser.element_errors(re);
|
||||||
|
retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), re);
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
element_errors[RecognitionException origError]
|
||||||
|
options {backtrack=true;}
|
||||||
|
@init {
|
||||||
|
int m = input.mark();
|
||||||
|
//state.backtracking++;
|
||||||
|
}
|
||||||
|
@after {
|
||||||
|
//state.backtracking--;
|
||||||
|
}
|
||||||
|
: ( DOC_COMMENT? ruleModifiers? id ARG_ACTION? ruleReturns? rulePrequel* COLON
|
||||||
|
| exceptionGroup
|
||||||
|
)
|
||||||
|
{reportError(missingSemi); recover(input,null);}
|
||||||
|
;
|
||||||
|
catch [RecognitionException ignore] {
|
||||||
|
input.rewind(m);
|
||||||
|
input.consume(); // kill at least one token
|
||||||
|
reportError(origError);
|
||||||
|
BitSet followSet = computeErrorRecoverySet();
|
||||||
|
beginResync();
|
||||||
|
consumeUntil(input, followSet);
|
||||||
|
endResync();
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
labeledElement : id (ASSIGN^|PLUS_ASSIGN^) (atom|block) ;
|
||||||
|
|
||||||
|
// Tree specifying alt
|
||||||
|
// Tree grammars need to have alts that describe a tree structure they
|
||||||
|
// will walk of course. Alts for trees therefore start with ^( XXX, which
|
||||||
|
// says we will see a root node of XXX then DOWN etc
|
||||||
|
treeSpec
|
||||||
|
: TREE_BEGIN
|
||||||
|
// Only a subset of elements are allowed to be a root node. However
|
||||||
|
// we allow any element to appear here and reject silly ones later
|
||||||
|
// when we walk the AST.
|
||||||
|
element
|
||||||
|
// After the tree root we get the usual suspects,
|
||||||
|
// all members of the element set
|
||||||
|
element+
|
||||||
|
RPAREN
|
||||||
|
-> ^(TREE_BEGIN element+)
|
||||||
|
;
|
||||||
|
|
||||||
|
// A block of gramamr structure optionally followed by standard EBNF
|
||||||
|
// notation, or ANTLR specific notation. I.E. ? + ^ and so on
|
||||||
|
ebnf
|
||||||
|
: block
|
||||||
|
// And now we see if we have any of the optional suffixs and rewrite
|
||||||
|
// the AST for this rule accordingly
|
||||||
|
//
|
||||||
|
( blockSuffixe -> ^(blockSuffixe block)
|
||||||
|
| -> block
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
// The standard EBNF suffixes with additional components that make
|
||||||
|
// sense only to ANTLR, in the context of a grammar block.
|
||||||
|
blockSuffixe
|
||||||
|
: ebnfSuffix // Standard EBNF
|
||||||
|
|
||||||
|
// ANTLR Specific Suffixes
|
||||||
|
| ROOT
|
||||||
|
| IMPLIES // We will change this to syn/sem pred in the next phase
|
||||||
|
| BANG
|
||||||
|
;
|
||||||
|
|
||||||
|
ebnfSuffix
|
||||||
|
@init {
|
||||||
|
Token op = input.LT(1);
|
||||||
|
}
|
||||||
|
: QUESTION -> OPTIONAL[op]
|
||||||
|
| STAR -> CLOSURE[op]
|
||||||
|
| PLUS -> POSITIVE_CLOSURE[op]
|
||||||
|
;
|
||||||
|
|
||||||
|
atom: // Qualified reference delegate.rule. This must be
|
||||||
|
// lexically contiguous (no spaces either side of the DOT)
|
||||||
|
// otherwise it is two references with a wildcard in between
|
||||||
|
// and not a qualified reference.
|
||||||
|
{
|
||||||
|
input.LT(1).getCharPositionInLine()+input.LT(1).getText().length()==
|
||||||
|
input.LT(2).getCharPositionInLine() &&
|
||||||
|
input.LT(2).getCharPositionInLine()+1==input.LT(3).getCharPositionInLine()
|
||||||
|
}?
|
||||||
|
id DOT ruleref -> ^(DOT id ruleref)
|
||||||
|
| range (ROOT^ | BANG^)? // Range x..y - only valid in lexers
|
||||||
|
| terminal (ROOT^ | BANG^)?
|
||||||
|
| ruleref
|
||||||
|
| notSet (ROOT^|BANG^)?
|
||||||
|
| // Wildcard '.' means any character in a lexer, any
|
||||||
|
// token in parser and any token or node in a tree parser
|
||||||
|
// Because the terminal rule is allowed to be the node
|
||||||
|
// specification for the start of a tree rule, we must
|
||||||
|
// later check that wildcard was not used for that.
|
||||||
|
DOT elementOptions? -> ^(WILDCARD<TerminalAST>[$DOT] elementOptions?)
|
||||||
|
;
|
||||||
|
catch [RecognitionException re] { throw re; } // pass upwards to element
|
||||||
|
|
||||||
|
// --------------------
|
||||||
|
// Inverted element set
|
||||||
|
//
|
||||||
|
// A set of characters (in a lexer) or terminal tokens, if a parser
|
||||||
|
// that are then used to create the inverse set of them.
|
||||||
|
//
|
||||||
|
notSet
|
||||||
|
: NOT terminal -> ^(NOT terminal)
|
||||||
|
| NOT blockSet -> ^(NOT blockSet)
|
||||||
|
;
|
||||||
|
|
||||||
|
blockSet
|
||||||
|
: LPAREN
|
||||||
|
setElement (OR setElement)*
|
||||||
|
RPAREN
|
||||||
|
-> ^(BLOCK<BlockAST>[$LPAREN,"BLOCK"] setElement+ )
|
||||||
|
;
|
||||||
|
|
||||||
|
setElement
|
||||||
|
: range
|
||||||
|
| terminal
|
||||||
|
;
|
||||||
|
|
||||||
|
// -------------
|
||||||
|
// Grammar Block
|
||||||
|
//
|
||||||
|
// Anywhere where an element is valid, the grammar may start a new block
|
||||||
|
// of alts by surrounding that block with ( ). A new block may also have a set
|
||||||
|
// of options, which apply only to that block.
|
||||||
|
//
|
||||||
|
block
|
||||||
|
: LPAREN
|
||||||
|
// A new blocked altlist may have a set of options set sepcifically
|
||||||
|
// for it.
|
||||||
|
( optionsSpec? ra+=ruleAction* COLON )?
|
||||||
|
altList
|
||||||
|
RPAREN
|
||||||
|
-> ^(BLOCK<BlockAST>[$LPAREN,"BLOCK"] optionsSpec? $ra* altList )
|
||||||
|
;
|
||||||
|
|
||||||
|
// ----------------
|
||||||
|
// Parser rule ref
|
||||||
|
//
|
||||||
|
// Reference to a parser rule with optional arguments and optional
|
||||||
|
// directive to become the root node or ignore the tree produced
|
||||||
|
//
|
||||||
|
ruleref
|
||||||
|
: RULE_REF ARG_ACTION?
|
||||||
|
( (op=ROOT|op=BANG) -> ^($op ^(RULE_REF ARG_ACTION?))
|
||||||
|
| -> ^(RULE_REF ARG_ACTION?)
|
||||||
|
)
|
||||||
|
;
|
||||||
|
catch [RecognitionException re] { throw re; } // pass upwards to element
|
||||||
|
|
||||||
|
// ---------------
|
||||||
|
// Character Range
|
||||||
|
//
|
||||||
|
// Specifies a range of characters. Valid for lexer rules only, but
|
||||||
|
// we do not check that here, the tree walkers shoudl do that.
|
||||||
|
// Note also that the parser also allows through more than just
|
||||||
|
// character literals so that we can produce a much nicer semantic
|
||||||
|
// error about any abuse of the .. operator.
|
||||||
|
//
|
||||||
|
range
|
||||||
|
: STRING_LITERAL<TerminalAST> RANGE^ STRING_LITERAL<TerminalAST>
|
||||||
|
;
|
||||||
|
|
||||||
|
terminal
|
||||||
|
: // Args are only valid for lexer rules
|
||||||
|
TOKEN_REF ARG_ACTION? elementOptions? -> ^(TOKEN_REF<TerminalAST> ARG_ACTION? elementOptions?)
|
||||||
|
| STRING_LITERAL elementOptions? -> ^(STRING_LITERAL<TerminalAST> elementOptions?)
|
||||||
|
;
|
||||||
|
|
||||||
|
// Terminals may be adorned with certain options when
|
||||||
|
// reference in the grammar: TOK<,,,>
|
||||||
|
elementOptions
|
||||||
|
: LT elementOption (COMMA elementOption)* GT -> ^(ELEMENT_OPTIONS elementOption+)
|
||||||
|
;
|
||||||
|
|
||||||
|
// WHen used with elements we can specify what the tree node type can
|
||||||
|
// be and also assign settings of various options (which we do not check here)
|
||||||
|
elementOption
|
||||||
|
: // This format indicates the default node option
|
||||||
|
qid
|
||||||
|
|
||||||
|
| // This format indicates option assignment
|
||||||
|
id ASSIGN^ (qid | STRING_LITERAL<TerminalAST>)
|
||||||
|
;
|
||||||
|
|
||||||
|
rewrite
|
||||||
|
: predicatedRewrite* nakedRewrite -> predicatedRewrite* nakedRewrite
|
||||||
|
;
|
||||||
|
|
||||||
|
predicatedRewrite
|
||||||
|
: RARROW SEMPRED rewriteAlt
|
||||||
|
-> {$rewriteAlt.isTemplate}? ^(ST_RESULT[$RARROW] SEMPRED<PredAST> rewriteAlt)
|
||||||
|
-> ^(RESULT[$RARROW] SEMPRED<PredAST> rewriteAlt)
|
||||||
|
;
|
||||||
|
|
||||||
|
nakedRewrite
|
||||||
|
: RARROW rewriteAlt -> {$rewriteAlt.isTemplate}? ^(ST_RESULT[$RARROW] rewriteAlt)
|
||||||
|
-> ^(RESULT[$RARROW] rewriteAlt)
|
||||||
|
;
|
||||||
|
|
||||||
|
// distinguish between ST and tree rewrites; for ETC/EPSILON and trees,
|
||||||
|
// rule altAndRewrite makes REWRITE root. for ST, we use ST_REWRITE
|
||||||
|
rewriteAlt returns [boolean isTemplate]
|
||||||
|
options {backtrack=true;}
|
||||||
|
: // try to parse a template rewrite
|
||||||
|
rewriteTemplate {$isTemplate=true;}
|
||||||
|
|
||||||
|
| // If we are not building templates, then we must be
|
||||||
|
// building ASTs or have rewrites in a grammar that does not
|
||||||
|
// have output=AST; options. If that is the case, we will issue
|
||||||
|
// errors/warnings in the next phase, so we just eat them here
|
||||||
|
rewriteTreeAlt
|
||||||
|
|
||||||
|
| ETC
|
||||||
|
|
||||||
|
| /* empty rewrite */ -> EPSILON
|
||||||
|
;
|
||||||
|
|
||||||
|
rewriteTreeAlt
|
||||||
|
: rewriteTreeElement+ -> ^(ALT rewriteTreeElement+)
|
||||||
|
;
|
||||||
|
|
||||||
|
rewriteTreeElement
|
||||||
|
: rewriteTreeAtom
|
||||||
|
| rewriteTreeAtom ebnfSuffix -> ^( ebnfSuffix ^(REWRITE_BLOCK ^(ALT rewriteTreeAtom)) )
|
||||||
|
| rewriteTree
|
||||||
|
( ebnfSuffix
|
||||||
|
-> ^(ebnfSuffix ^(REWRITE_BLOCK ^(ALT rewriteTree)) )
|
||||||
|
| -> rewriteTree
|
||||||
|
)
|
||||||
|
| rewriteTreeEbnf
|
||||||
|
;
|
||||||
|
|
||||||
|
rewriteTreeAtom
|
||||||
|
: TOKEN_REF elementOptions? ARG_ACTION? -> ^(TOKEN_REF<TerminalAST> elementOptions? ARG_ACTION?) // for imaginary nodes
|
||||||
|
| RULE_REF
|
||||||
|
| STRING_LITERAL elementOptions? -> ^(STRING_LITERAL<TerminalAST> elementOptions?)
|
||||||
|
| DOLLAR id -> LABEL[$DOLLAR,$id.text] // reference to a label in a rewrite rule
|
||||||
|
| ACTION<ActionAST>
|
||||||
|
;
|
||||||
|
|
||||||
|
rewriteTreeEbnf
|
||||||
|
@init {
|
||||||
|
Token firstToken = input.LT(1);
|
||||||
|
}
|
||||||
|
@after {
|
||||||
|
$rewriteTreeEbnf.tree.getToken().setLine(firstToken.getLine());
|
||||||
|
$rewriteTreeEbnf.tree.getToken().setCharPositionInLine(firstToken.getCharPositionInLine());
|
||||||
|
}
|
||||||
|
: lp=LPAREN rewriteTreeAlt RPAREN ebnfSuffix -> ^(ebnfSuffix ^(REWRITE_BLOCK[$lp] rewriteTreeAlt))
|
||||||
|
;
|
||||||
|
|
||||||
|
rewriteTree
|
||||||
|
: TREE_BEGIN rewriteTreeAtom rewriteTreeElement* RPAREN
|
||||||
|
-> ^(TREE_BEGIN rewriteTreeAtom rewriteTreeElement* )
|
||||||
|
;
|
||||||
|
|
||||||
|
/** Build a tree for a template rewrite:
|
||||||
|
^(TEMPLATE (ID|ACTION) ^(ARGLIST ^(ARG ID ACTION) ...) )
|
||||||
|
ID can be "template" keyword. If first child is ACTION then it's
|
||||||
|
an indirect template ref
|
||||||
|
|
||||||
|
-> foo(a={...}, b={...})
|
||||||
|
-> ({string-e})(a={...}, b={...}) // e evaluates to template name
|
||||||
|
-> {%{$ID.text}} // create literal template from string (done in ActionTranslator)
|
||||||
|
-> {st-expr} // st-expr evaluates to ST
|
||||||
|
*/
|
||||||
|
rewriteTemplate
|
||||||
|
: // -> template(a={...},...) "..." inline template
|
||||||
|
TEMPLATE LPAREN rewriteTemplateArgs RPAREN
|
||||||
|
( str=DOUBLE_QUOTE_STRING_LITERAL | str=DOUBLE_ANGLE_STRING_LITERAL )
|
||||||
|
-> ^(TEMPLATE[$TEMPLATE,"TEMPLATE"] rewriteTemplateArgs? $str)
|
||||||
|
|
||||||
|
| // -> foo(a={...}, ...)
|
||||||
|
rewriteTemplateRef
|
||||||
|
|
||||||
|
| // -> ({expr})(a={...}, ...)
|
||||||
|
rewriteIndirectTemplateHead
|
||||||
|
|
||||||
|
| // -> {...}
|
||||||
|
ACTION<ActionAST>
|
||||||
|
;
|
||||||
|
|
||||||
|
/** -> foo(a={...}, ...) */
|
||||||
|
rewriteTemplateRef
|
||||||
|
: id LPAREN rewriteTemplateArgs RPAREN
|
||||||
|
-> ^(TEMPLATE[$LPAREN,"TEMPLATE"] id rewriteTemplateArgs?)
|
||||||
|
;
|
||||||
|
|
||||||
|
/** -> ({expr})(a={...}, ...) */
|
||||||
|
rewriteIndirectTemplateHead
|
||||||
|
: lp=LPAREN ACTION RPAREN LPAREN rewriteTemplateArgs RPAREN
|
||||||
|
-> ^(TEMPLATE[$lp,"TEMPLATE"] ACTION<ActionAST> rewriteTemplateArgs?)
|
||||||
|
;
|
||||||
|
|
||||||
|
rewriteTemplateArgs
|
||||||
|
: rewriteTemplateArg (COMMA rewriteTemplateArg)*
|
||||||
|
-> ^(ARGLIST rewriteTemplateArg+)
|
||||||
|
|
|
||||||
|
;
|
||||||
|
|
||||||
|
rewriteTemplateArg
|
||||||
|
: id ASSIGN ACTION -> ^(ARG[$ASSIGN] id ACTION<ActionAST>)
|
||||||
|
;
|
||||||
|
|
||||||
|
// The name of the grammar, and indeed some other grammar elements may
|
||||||
|
// come through to the parser looking like a rule reference or a token
|
||||||
|
// reference, hence this rule is used to pick up whichever it is and rewrite
|
||||||
|
// it as a generic ID token.
|
||||||
|
id
|
||||||
|
@init { paraphrases.push("looking for an identifier"); }
|
||||||
|
@after { paraphrases.pop(); }
|
||||||
|
: RULE_REF ->ID[$RULE_REF]
|
||||||
|
| TOKEN_REF ->ID[$TOKEN_REF]
|
||||||
|
| TEMPLATE ->ID[$TEMPLATE] // keyword
|
||||||
|
;
|
||||||
|
|
||||||
|
qid
|
||||||
|
@init { paraphrases.push("looking for a qualified identifier"); }
|
||||||
|
@after { paraphrases.pop(); }
|
||||||
|
: id (DOT id)* -> ID[$qid.start, $text]
|
||||||
|
;
|
||||||
|
|
||||||
|
alternativeEntry : alternative EOF ; // allow gunit to call alternative and see EOF afterwards
|
||||||
|
elementEntry : element EOF ;
|
||||||
|
ruleEntry : rule EOF ;
|
||||||
|
blockEntry : block EOF ;
|
|
@ -0,0 +1,431 @@
|
||||||
|
/*
|
||||||
|
[The "BSD license"]
|
||||||
|
Copyright (c) 2010 Terence Parr
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
3. The name of the author may not be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||||
|
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||||
|
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||||
|
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||||
|
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** The definitive ANTLR v3 tree grammar to parse ANTLR v4 grammars.
|
||||||
|
* Parses trees created in ANTLRParser.g.
|
||||||
|
*/
|
||||||
|
tree grammar ASTVerifier;
|
||||||
|
options {
|
||||||
|
language = Java;
|
||||||
|
tokenVocab = ANTLRParser;
|
||||||
|
ASTLabelType = GrammarAST;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Include the copyright in this source and also the generated source
|
||||||
|
@header {
|
||||||
|
/*
|
||||||
|
[The "BSD license"]
|
||||||
|
Copyright (c) 2005-2009 Terence Parr
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
3. The name of the author may not be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||||
|
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||||
|
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||||
|
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||||
|
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
package org.antlr.v4.parse;
|
||||||
|
import org.antlr.v4.tool.*;
|
||||||
|
import org.antlr.v4.runtime.tree.CommonTree; // use updated v4 one not v3
|
||||||
|
}
|
||||||
|
|
||||||
|
@members {
|
||||||
|
public String getErrorMessage(RecognitionException e,
|
||||||
|
String[] tokenNames)
|
||||||
|
{
|
||||||
|
List stack = getRuleInvocationStack(e, this.getClass().getName());
|
||||||
|
String msg = null;
|
||||||
|
String inputContext =
|
||||||
|
input.LT(-3) == null ? "" : ((Tree)input.LT(-3)).getText()+" "+
|
||||||
|
input.LT(-2) == null ? "" : ((Tree)input.LT(-2)).getText()+" "+
|
||||||
|
input.LT(-1) == null ? "" : ((Tree)input.LT(-1)).getText()+" >>>"+
|
||||||
|
input.LT(1) == null ? "" : ((Tree)input.LT(1)).getText()+"<<< "+
|
||||||
|
input.LT(2) == null ? "" : ((Tree)input.LT(2)).getText()+" "+
|
||||||
|
input.LT(3) == null ? "" : ((Tree)input.LT(3)).getText();
|
||||||
|
if ( e instanceof NoViableAltException ) {
|
||||||
|
NoViableAltException nvae = (NoViableAltException)e;
|
||||||
|
msg = " no viable alt; token="+e.token+
|
||||||
|
" (decision="+nvae.decisionNumber+
|
||||||
|
" state "+nvae.stateNumber+")"+
|
||||||
|
" decision=<<"+nvae.grammarDecisionDescription+">>";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
msg = super.getErrorMessage(e, tokenNames);
|
||||||
|
}
|
||||||
|
return stack+" "+msg+"\ncontext=..."+inputContext+"...";
|
||||||
|
}
|
||||||
|
public String getTokenErrorDisplay(Token t) {
|
||||||
|
return t.toString();
|
||||||
|
}
|
||||||
|
public void traceIn(String ruleName, int ruleIndex) {
|
||||||
|
System.out.print("enter "+ruleName+" "+
|
||||||
|
((GrammarAST)input.LT(1)).token+" "+
|
||||||
|
((GrammarAST)input.LT(2)).token+" "+
|
||||||
|
((GrammarAST)input.LT(3)).token+" "+
|
||||||
|
((GrammarAST)input.LT(4)).token);
|
||||||
|
if ( state.backtracking>0 ) {
|
||||||
|
System.out.print(" backtracking="+state.backtracking);
|
||||||
|
}
|
||||||
|
System.out.println();
|
||||||
|
}
|
||||||
|
protected void mismatch(IntStream input, int ttype, BitSet follow)
|
||||||
|
throws RecognitionException {
|
||||||
|
throw new MismatchedTokenException(ttype, input);
|
||||||
|
}
|
||||||
|
public void recoverFromMismatchedToken(IntStream input,
|
||||||
|
RecognitionException e, BitSet follow)
|
||||||
|
throws RecognitionException
|
||||||
|
|
||||||
|
{
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Alter code generation so catch-clauses get replace with // this action.
|
||||||
|
@rulecatch { catch (RecognitionException e) {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
grammarSpec
|
||||||
|
: ^(GRAMMAR ID DOC_COMMENT? prequelConstruct* rules mode*)
|
||||||
|
;
|
||||||
|
|
||||||
|
prequelConstruct
|
||||||
|
: optionsSpec
|
||||||
|
| delegateGrammars
|
||||||
|
| tokensSpec
|
||||||
|
| attrScope
|
||||||
|
| action
|
||||||
|
;
|
||||||
|
|
||||||
|
optionsSpec
|
||||||
|
: ^(OPTIONS option*)
|
||||||
|
;
|
||||||
|
|
||||||
|
option
|
||||||
|
: ^(ASSIGN ID optionValue)
|
||||||
|
;
|
||||||
|
|
||||||
|
optionValue returns [String v]
|
||||||
|
@init {$v = $start.token.getText();}
|
||||||
|
: ID
|
||||||
|
| STRING_LITERAL
|
||||||
|
| INT
|
||||||
|
| STAR
|
||||||
|
;
|
||||||
|
|
||||||
|
delegateGrammars
|
||||||
|
: ^(IMPORT delegateGrammar+)
|
||||||
|
;
|
||||||
|
|
||||||
|
delegateGrammar
|
||||||
|
: ^(ASSIGN ID ID)
|
||||||
|
| ID
|
||||||
|
;
|
||||||
|
|
||||||
|
tokensSpec
|
||||||
|
: ^(TOKENS tokenSpec+)
|
||||||
|
;
|
||||||
|
|
||||||
|
tokenSpec
|
||||||
|
: ^(ASSIGN ID STRING_LITERAL)
|
||||||
|
| ID
|
||||||
|
;
|
||||||
|
|
||||||
|
attrScope
|
||||||
|
: ^(SCOPE ID ACTION)
|
||||||
|
;
|
||||||
|
|
||||||
|
action
|
||||||
|
: ^(AT ID? ID ACTION)
|
||||||
|
;
|
||||||
|
|
||||||
|
rules
|
||||||
|
: ^(RULES rule*)
|
||||||
|
;
|
||||||
|
|
||||||
|
mode: ^( MODE ID rule+ ) ;
|
||||||
|
|
||||||
|
rule: ^( RULE ID DOC_COMMENT? ruleModifiers? ARG_ACTION?
|
||||||
|
ruleReturns? rulePrequel* altListAsBlock exceptionGroup
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
exceptionGroup
|
||||||
|
: exceptionHandler* finallyClause?
|
||||||
|
;
|
||||||
|
|
||||||
|
exceptionHandler
|
||||||
|
: ^(CATCH ARG_ACTION ACTION)
|
||||||
|
;
|
||||||
|
|
||||||
|
finallyClause
|
||||||
|
: ^(FINALLY ACTION)
|
||||||
|
;
|
||||||
|
|
||||||
|
rulePrequel
|
||||||
|
: throwsSpec
|
||||||
|
| ruleScopeSpec
|
||||||
|
| optionsSpec
|
||||||
|
| ruleAction
|
||||||
|
;
|
||||||
|
|
||||||
|
ruleReturns
|
||||||
|
: ^(RETURNS ARG_ACTION)
|
||||||
|
;
|
||||||
|
throwsSpec
|
||||||
|
: ^(THROWS ID+)
|
||||||
|
;
|
||||||
|
|
||||||
|
ruleScopeSpec
|
||||||
|
: ^(SCOPE ACTION)
|
||||||
|
| ^(SCOPE ID+)
|
||||||
|
;
|
||||||
|
|
||||||
|
ruleAction
|
||||||
|
: ^(AT ID ACTION)
|
||||||
|
;
|
||||||
|
|
||||||
|
ruleModifiers
|
||||||
|
: ^(RULEMODIFIERS ruleModifier+)
|
||||||
|
;
|
||||||
|
|
||||||
|
ruleModifier
|
||||||
|
: PUBLIC
|
||||||
|
| PRIVATE
|
||||||
|
| PROTECTED
|
||||||
|
| FRAGMENT
|
||||||
|
;
|
||||||
|
|
||||||
|
altList
|
||||||
|
: alternative+
|
||||||
|
;
|
||||||
|
|
||||||
|
altListAsBlock
|
||||||
|
: ^(BLOCK altList)
|
||||||
|
;
|
||||||
|
|
||||||
|
alternative
|
||||||
|
: ^(ALT_REWRITE alternative rewrite)
|
||||||
|
| ^(ALT EPSILON)
|
||||||
|
| elements
|
||||||
|
;
|
||||||
|
|
||||||
|
elements
|
||||||
|
: ^(ALT element+)
|
||||||
|
;
|
||||||
|
|
||||||
|
element
|
||||||
|
: labeledElement
|
||||||
|
| atom
|
||||||
|
| ebnf
|
||||||
|
| ACTION
|
||||||
|
| FORCED_ACTION
|
||||||
|
| SEMPRED
|
||||||
|
| GATED_SEMPRED
|
||||||
|
| treeSpec
|
||||||
|
;
|
||||||
|
|
||||||
|
labeledElement
|
||||||
|
: ^(ASSIGN ID atom)
|
||||||
|
| ^(ASSIGN ID block)
|
||||||
|
| ^(PLUS_ASSIGN ID atom)
|
||||||
|
| ^(PLUS_ASSIGN ID block)
|
||||||
|
;
|
||||||
|
|
||||||
|
treeSpec
|
||||||
|
: ^(TREE_BEGIN element+)
|
||||||
|
;
|
||||||
|
|
||||||
|
ebnf: ^(blockSuffix block)
|
||||||
|
| block
|
||||||
|
;
|
||||||
|
|
||||||
|
blockSuffix
|
||||||
|
: ebnfSuffix
|
||||||
|
| ROOT
|
||||||
|
| IMPLIES
|
||||||
|
| BANG
|
||||||
|
;
|
||||||
|
|
||||||
|
ebnfSuffix
|
||||||
|
: OPTIONAL
|
||||||
|
| CLOSURE
|
||||||
|
| POSITIVE_CLOSURE
|
||||||
|
;
|
||||||
|
|
||||||
|
atom: ^(ROOT range)
|
||||||
|
| ^(BANG range)
|
||||||
|
| ^(ROOT notSet)
|
||||||
|
| ^(BANG notSet)
|
||||||
|
| notSet
|
||||||
|
| ^(ROOT terminal)
|
||||||
|
| ^(BANG terminal)
|
||||||
|
| range
|
||||||
|
| ^(DOT ID terminal)
|
||||||
|
| ^(DOT ID ruleref)
|
||||||
|
| ^(WILDCARD elementOptions)
|
||||||
|
| WILDCARD
|
||||||
|
| terminal
|
||||||
|
| ruleref
|
||||||
|
;
|
||||||
|
|
||||||
|
notSet
|
||||||
|
: ^(NOT setElement)
|
||||||
|
| ^(NOT blockSet)
|
||||||
|
;
|
||||||
|
|
||||||
|
blockSet
|
||||||
|
: ^(BLOCK setElement+)
|
||||||
|
;
|
||||||
|
|
||||||
|
setElement
|
||||||
|
: STRING_LITERAL
|
||||||
|
| TOKEN_REF
|
||||||
|
| ^(RANGE STRING_LITERAL STRING_LITERAL)
|
||||||
|
;
|
||||||
|
|
||||||
|
block
|
||||||
|
: ^(BLOCK optionsSpec? ruleAction* ACTION? altList)
|
||||||
|
;
|
||||||
|
|
||||||
|
ruleref
|
||||||
|
: ^(ROOT ^(RULE_REF ARG_ACTION?))
|
||||||
|
| ^(BANG ^(RULE_REF ARG_ACTION?))
|
||||||
|
| ^(RULE_REF ARG_ACTION?)
|
||||||
|
;
|
||||||
|
|
||||||
|
range
|
||||||
|
: ^(RANGE STRING_LITERAL STRING_LITERAL)
|
||||||
|
;
|
||||||
|
|
||||||
|
terminal
|
||||||
|
: ^(STRING_LITERAL elementOptions)
|
||||||
|
| STRING_LITERAL
|
||||||
|
| ^(TOKEN_REF ARG_ACTION elementOptions)
|
||||||
|
| ^(TOKEN_REF ARG_ACTION)
|
||||||
|
| ^(TOKEN_REF elementOptions)
|
||||||
|
| TOKEN_REF
|
||||||
|
;
|
||||||
|
|
||||||
|
elementOptions
|
||||||
|
: ^(ELEMENT_OPTIONS elementOption+)
|
||||||
|
;
|
||||||
|
|
||||||
|
elementOption
|
||||||
|
: ID
|
||||||
|
| ^(ASSIGN ID ID)
|
||||||
|
| ^(ASSIGN ID STRING_LITERAL)
|
||||||
|
;
|
||||||
|
|
||||||
|
rewrite
|
||||||
|
: predicatedRewrite* nakedRewrite
|
||||||
|
;
|
||||||
|
|
||||||
|
predicatedRewrite
|
||||||
|
: ^(ST_RESULT SEMPRED rewriteAlt)
|
||||||
|
| ^(RESULT SEMPRED rewriteAlt)
|
||||||
|
;
|
||||||
|
|
||||||
|
nakedRewrite
|
||||||
|
: ^(ST_RESULT rewriteAlt)
|
||||||
|
| ^(RESULT rewriteAlt)
|
||||||
|
;
|
||||||
|
|
||||||
|
rewriteAlt
|
||||||
|
: rewriteTemplate
|
||||||
|
| rewriteTreeAlt
|
||||||
|
| ETC
|
||||||
|
| EPSILON
|
||||||
|
;
|
||||||
|
|
||||||
|
rewriteTreeAlt
|
||||||
|
: ^(ALT rewriteTreeElement+)
|
||||||
|
;
|
||||||
|
|
||||||
|
rewriteTreeElement
|
||||||
|
: rewriteTreeAtom
|
||||||
|
| rewriteTree
|
||||||
|
| rewriteTreeEbnf
|
||||||
|
;
|
||||||
|
|
||||||
|
rewriteTreeAtom
|
||||||
|
: ^(TOKEN_REF elementOptions ARG_ACTION)
|
||||||
|
| ^(TOKEN_REF elementOptions)
|
||||||
|
| ^(TOKEN_REF ARG_ACTION)
|
||||||
|
| TOKEN_REF
|
||||||
|
| RULE_REF
|
||||||
|
| ^(STRING_LITERAL elementOptions)
|
||||||
|
| STRING_LITERAL
|
||||||
|
| LABEL
|
||||||
|
| ACTION
|
||||||
|
;
|
||||||
|
|
||||||
|
rewriteTreeEbnf
|
||||||
|
: ^(ebnfSuffix ^(REWRITE_BLOCK rewriteTreeAlt))
|
||||||
|
;
|
||||||
|
rewriteTree
|
||||||
|
: ^(TREE_BEGIN rewriteTreeAtom rewriteTreeElement* )
|
||||||
|
;
|
||||||
|
|
||||||
|
rewriteTemplate
|
||||||
|
: ^(TEMPLATE rewriteTemplateArgs? DOUBLE_QUOTE_STRING_LITERAL)
|
||||||
|
| ^(TEMPLATE rewriteTemplateArgs? DOUBLE_ANGLE_STRING_LITERAL)
|
||||||
|
| rewriteTemplateRef
|
||||||
|
| rewriteIndirectTemplateHead
|
||||||
|
| ACTION
|
||||||
|
;
|
||||||
|
|
||||||
|
rewriteTemplateRef
|
||||||
|
: ^(TEMPLATE ID rewriteTemplateArgs?)
|
||||||
|
;
|
||||||
|
|
||||||
|
rewriteIndirectTemplateHead
|
||||||
|
: ^(TEMPLATE ACTION rewriteTemplateArgs?)
|
||||||
|
;
|
||||||
|
|
||||||
|
rewriteTemplateArgs
|
||||||
|
: ^(ARGLIST rewriteTemplateArg+)
|
||||||
|
;
|
||||||
|
|
||||||
|
rewriteTemplateArg
|
||||||
|
: ^(ARG ID ACTION)
|
||||||
|
;
|
|
@ -0,0 +1,176 @@
|
||||||
|
/*
|
||||||
|
[The "BSD license"]
|
||||||
|
Copyright (c) 2010 Terence Parr
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
3. The name of the author may not be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||||
|
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||||
|
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||||
|
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||||
|
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
tree grammar ATNBuilder;
|
||||||
|
options {
|
||||||
|
language = Java;
|
||||||
|
tokenVocab = ANTLRParser;
|
||||||
|
ASTLabelType = GrammarAST;
|
||||||
|
// filter = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Include the copyright in this source and also the generated source
|
||||||
|
@header {
|
||||||
|
/*
|
||||||
|
[The "BSD license"]
|
||||||
|
Copyright (c) 2010 Terence Parr
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
3. The name of the author may not be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||||
|
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||||
|
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||||
|
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||||
|
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
package org.antlr.v4.parse;
|
||||||
|
import org.antlr.v4.tool.*;
|
||||||
|
import org.antlr.v4.automata.ATNFactory;
|
||||||
|
import org.antlr.v4.runtime.tree.CommonTree; // use updated v4 one not v3
|
||||||
|
}
|
||||||
|
|
||||||
|
@members {
|
||||||
|
ATNFactory factory;
|
||||||
|
public ATNBuilder(TreeNodeStream input, ATNFactory factory) {
|
||||||
|
this(input);
|
||||||
|
this.factory = factory;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
block[GrammarAST ebnfRoot] returns [ATNFactory.Handle p]
|
||||||
|
@init {List<ATNFactory.Handle> alts = new ArrayList<ATNFactory.Handle>();}
|
||||||
|
: ^(BLOCK (^(OPTIONS .+))? (a=alternative {alts.add($a.p);})+)
|
||||||
|
{$p = factory.block((BlockAST)$BLOCK, ebnfRoot, alts);}
|
||||||
|
;
|
||||||
|
|
||||||
|
alternative returns [ATNFactory.Handle p]
|
||||||
|
@init {List<ATNFactory.Handle> els = new ArrayList<ATNFactory.Handle>();}
|
||||||
|
: ^(ALT_REWRITE a=alternative .) {$p = $a.p;}
|
||||||
|
| ^(ALT EPSILON) {$p = factory.epsilon($EPSILON);}
|
||||||
|
| ^(ALT (e=element {els.add($e.p);})+)
|
||||||
|
{$p = factory.alt(els);}
|
||||||
|
;
|
||||||
|
|
||||||
|
element returns [ATNFactory.Handle p]
|
||||||
|
: labeledElement {$p = $labeledElement.p;}
|
||||||
|
| atom {$p = $atom.p;}
|
||||||
|
| ebnf {$p = $ebnf.p;}
|
||||||
|
| ACTION {$p = factory.action((ActionAST)$ACTION);}
|
||||||
|
| FORCED_ACTION {$p = factory.action((ActionAST)$FORCED_ACTION);}
|
||||||
|
| SEMPRED {$p = factory.sempred((PredAST)$SEMPRED);}
|
||||||
|
| GATED_SEMPRED {$p = factory.gated_sempred($GATED_SEMPRED);}
|
||||||
|
| treeSpec {$p = $treeSpec.p;}
|
||||||
|
;
|
||||||
|
|
||||||
|
labeledElement returns [ATNFactory.Handle p]
|
||||||
|
: ^(ASSIGN ID atom) {$p = factory.label($atom.p);}
|
||||||
|
| ^(ASSIGN ID block[null]) {$p = factory.label($block.p);}
|
||||||
|
| ^(PLUS_ASSIGN ID atom) {$p = factory.listLabel($atom.p);}
|
||||||
|
| ^(PLUS_ASSIGN ID block[null]) {$p = factory.listLabel($block.p);}
|
||||||
|
;
|
||||||
|
|
||||||
|
treeSpec returns [ATNFactory.Handle p]
|
||||||
|
@init {List<ATNFactory.Handle> els = new ArrayList<ATNFactory.Handle>();}
|
||||||
|
: ^(TREE_BEGIN (e=element {els.add($e.p);})+) {$p = factory.tree(els);}
|
||||||
|
;
|
||||||
|
|
||||||
|
ebnf returns [ATNFactory.Handle p]
|
||||||
|
: ^(astBlockSuffix block[null]) {$p = $block.p;}
|
||||||
|
| ^(OPTIONAL block[$start]) {$p = $block.p;}
|
||||||
|
| ^(CLOSURE block[$start]) {$p = $block.p;}
|
||||||
|
| ^(POSITIVE_CLOSURE block[$start]) {$p = $block.p;}
|
||||||
|
| block[null] {$p = $block.p;}
|
||||||
|
;
|
||||||
|
|
||||||
|
astBlockSuffix
|
||||||
|
: ROOT
|
||||||
|
| IMPLIES
|
||||||
|
| BANG
|
||||||
|
;
|
||||||
|
|
||||||
|
atom returns [ATNFactory.Handle p]
|
||||||
|
: ^(ROOT range) {$p = $range.p;}
|
||||||
|
| ^(BANG range) {$p = $range.p;}
|
||||||
|
| ^(ROOT notSet) {$p = $notSet.p;}
|
||||||
|
| ^(BANG notSet) {$p = $notSet.p;}
|
||||||
|
| notSet {$p = $notSet.p;}
|
||||||
|
| range {$p = $range.p;}
|
||||||
|
| ^(DOT ID terminal) {$p = $terminal.p;}
|
||||||
|
| ^(DOT ID ruleref) {$p = $ruleref.p;}
|
||||||
|
| ^(WILDCARD .) {$p = factory.wildcard($start);}
|
||||||
|
| WILDCARD {$p = factory.wildcard($start);}
|
||||||
|
| terminal {$p = $terminal.p;}
|
||||||
|
| ruleref {$p = $ruleref.p;}
|
||||||
|
;
|
||||||
|
|
||||||
|
notSet returns [ATNFactory.Handle p]
|
||||||
|
: ^(NOT setElement) {$p = factory.not($NOT);}
|
||||||
|
| ^(NOT blockSet) {$p = factory.notBlock($NOT, $blockSet.alts);}
|
||||||
|
;
|
||||||
|
|
||||||
|
blockSet returns [List<GrammarAST> alts]
|
||||||
|
@init {$alts = new ArrayList<GrammarAST>();}
|
||||||
|
: ^(BLOCK (t=setElement {$alts.add($t.start);})+)
|
||||||
|
;
|
||||||
|
|
||||||
|
setElement
|
||||||
|
: STRING_LITERAL
|
||||||
|
| TOKEN_REF
|
||||||
|
| ^(RANGE STRING_LITERAL STRING_LITERAL)
|
||||||
|
;
|
||||||
|
|
||||||
|
ruleref returns [ATNFactory.Handle p]
|
||||||
|
: ^(ROOT ^(RULE_REF ARG_ACTION?)) {$p = factory.ruleRef($RULE_REF);}
|
||||||
|
| ^(BANG ^(RULE_REF ARG_ACTION?)) {$p = factory.ruleRef($RULE_REF);}
|
||||||
|
| ^(RULE_REF ARG_ACTION?) {$p = factory.ruleRef($RULE_REF);}
|
||||||
|
;
|
||||||
|
|
||||||
|
range returns [ATNFactory.Handle p]
|
||||||
|
: ^(RANGE a=STRING_LITERAL b=STRING_LITERAL) {$p = factory.range($a,$b);}
|
||||||
|
;
|
||||||
|
|
||||||
|
terminal returns [ATNFactory.Handle p]
|
||||||
|
: ^(STRING_LITERAL .) {$p = factory.stringLiteral((TerminalAST)$start);}
|
||||||
|
| STRING_LITERAL {$p = factory.stringLiteral((TerminalAST)$start);}
|
||||||
|
| ^(TOKEN_REF ARG_ACTION .) {$p = factory.tokenRef((TerminalAST)$start);}
|
||||||
|
| ^(TOKEN_REF .) {$p = factory.tokenRef((TerminalAST)$start);}
|
||||||
|
| TOKEN_REF {$p = factory.tokenRef((TerminalAST)$start);}
|
||||||
|
| ^(ROOT t=terminal) {$p = $t.p;}
|
||||||
|
| ^(BANG t=terminal) {$p = $t.p;}
|
||||||
|
;
|
|
@ -0,0 +1,172 @@
|
||||||
|
lexer grammar ActionSplitter;
|
||||||
|
|
||||||
|
options { filter=true; }
|
||||||
|
|
||||||
|
@header {
|
||||||
|
package org.antlr.v4.parse;
|
||||||
|
import org.antlr.v4.tool.*;
|
||||||
|
}
|
||||||
|
|
||||||
|
@members {
|
||||||
|
ActionSplitterListener delegate;
|
||||||
|
|
||||||
|
public ActionSplitter(CharStream input, ActionSplitterListener delegate) {
|
||||||
|
this(input, new RecognizerSharedState());
|
||||||
|
this.delegate = delegate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void emit(Token token) {
|
||||||
|
super.emit(token);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/** force filtering (and return tokens). triggers all above actions. */
|
||||||
|
public List<Token> getActionTokens() {
|
||||||
|
List<Token> chunks = new ArrayList<Token>();
|
||||||
|
Token t = nextToken();
|
||||||
|
while ( t.getType()!=Token.EOF ) {
|
||||||
|
chunks.add(t);
|
||||||
|
t = nextToken();
|
||||||
|
}
|
||||||
|
return chunks;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ignore comments right away
|
||||||
|
|
||||||
|
COMMENT
|
||||||
|
: '/*' ( options {greedy=false;} : . )* '*/' {delegate.text($text);}
|
||||||
|
;
|
||||||
|
|
||||||
|
LINE_COMMENT
|
||||||
|
: '//' ~('\n'|'\r')* '\r'? '\n' {delegate.text($text);}
|
||||||
|
;
|
||||||
|
|
||||||
|
ESC
|
||||||
|
: '\\$' {delegate.text("$");}
|
||||||
|
| '\\%' {delegate.text("\%");}
|
||||||
|
;
|
||||||
|
|
||||||
|
SET_QUALIFIED_ATTR
|
||||||
|
: '$' x=ID '.' y=ID WS? '=' expr=ATTR_VALUE_EXPR ';'
|
||||||
|
{delegate.setQualifiedAttr($text, $x, $y, $expr);}
|
||||||
|
;
|
||||||
|
|
||||||
|
QUALIFIED_ATTR
|
||||||
|
: '$' x=ID '.' y=ID {input.LA(1)!='('}? {delegate.qualifiedAttr($text, $x, $y);}
|
||||||
|
;
|
||||||
|
|
||||||
|
SET_DYNAMIC_SCOPE_ATTR
|
||||||
|
: '$' x=ID '::' y=ID WS? '=' expr=ATTR_VALUE_EXPR ';'
|
||||||
|
{delegate.setDynamicScopeAttr($text, $x, $y, $expr);}
|
||||||
|
;
|
||||||
|
|
||||||
|
DYNAMIC_SCOPE_ATTR
|
||||||
|
: '$' x=ID '::' y=ID {delegate.dynamicScopeAttr($text, $x, $y);}
|
||||||
|
;
|
||||||
|
|
||||||
|
/** To access deeper (than top of stack) scopes, use the notation:
|
||||||
|
*
|
||||||
|
* $x[-1]::y previous (just under top of stack)
|
||||||
|
* $x[-i]::y top of stack - i where the '-' MUST BE PRESENT;
|
||||||
|
* i.e., i cannot simply be negative without the '-' sign!
|
||||||
|
* $x[i]::y absolute index i (0..size-1)
|
||||||
|
* $x[0]::y is the absolute 0 indexed element (bottom of the stack)
|
||||||
|
*/
|
||||||
|
SET_DYNAMIC_NEGATIVE_INDEXED_SCOPE_ATTR
|
||||||
|
: '$' x=ID '[' '-' index=SCOPE_INDEX_EXPR ']' '::' y=ID
|
||||||
|
WS? '=' expr=ATTR_VALUE_EXPR ';'
|
||||||
|
{delegate.setDynamicNegativeIndexedScopeAttr($text, $x, $y, $index, $expr);}
|
||||||
|
;
|
||||||
|
|
||||||
|
DYNAMIC_NEGATIVE_INDEXED_SCOPE_ATTR
|
||||||
|
: '$' x=ID '[' '-' index=SCOPE_INDEX_EXPR ']' '::' y=ID
|
||||||
|
{delegate.dynamicNegativeIndexedScopeAttr($text, $x, $y, $index);}
|
||||||
|
;
|
||||||
|
|
||||||
|
SET_DYNAMIC_ABSOLUTE_INDEXED_SCOPE_ATTR
|
||||||
|
: '$' x=ID '[' index=SCOPE_INDEX_EXPR ']' '::' y=ID
|
||||||
|
WS? '=' expr=ATTR_VALUE_EXPR ';'
|
||||||
|
{delegate.setDynamicAbsoluteIndexedScopeAttr($text, $x, $y, $index, $expr);}
|
||||||
|
;
|
||||||
|
|
||||||
|
DYNAMIC_ABSOLUTE_INDEXED_SCOPE_ATTR
|
||||||
|
: '$' x=ID '[' index=SCOPE_INDEX_EXPR ']' '::' y=ID
|
||||||
|
{delegate.dynamicAbsoluteIndexedScopeAttr($text, $x, $y, $index);}
|
||||||
|
;
|
||||||
|
|
||||||
|
SET_ATTR
|
||||||
|
: '$' x=ID WS? '=' expr=ATTR_VALUE_EXPR ';' {delegate.setAttr($text, $x, $expr);}
|
||||||
|
;
|
||||||
|
|
||||||
|
ATTR
|
||||||
|
: '$' x=ID {delegate.attr($text, $x);}
|
||||||
|
;
|
||||||
|
|
||||||
|
/** %foo(a={},b={},...) ctor */
|
||||||
|
TEMPLATE_INSTANCE
|
||||||
|
: '%' ID '(' ( WS? ARG (',' WS? ARG)* WS? )? ')'
|
||||||
|
;
|
||||||
|
|
||||||
|
/** %({name-expr})(a={},...) indirect template ctor reference */
|
||||||
|
INDIRECT_TEMPLATE_INSTANCE
|
||||||
|
: '%' '(' ACTION ')' '(' ( WS? ARG (',' WS? ARG)* WS? )? ')'
|
||||||
|
;
|
||||||
|
|
||||||
|
/** %{expr}.y = z; template attribute y of StringTemplate-typed expr to z */
|
||||||
|
SET_EXPR_ATTRIBUTE
|
||||||
|
: '%' a=ACTION '.' ID WS? '=' expr=ATTR_VALUE_EXPR ';'
|
||||||
|
;
|
||||||
|
|
||||||
|
/* %x.y = z; set template attribute y of x (always set never get attr)
|
||||||
|
* to z [languages like python without ';' must still use the
|
||||||
|
* ';' which the code generator is free to remove during code gen]
|
||||||
|
*/
|
||||||
|
SET_ATTRIBUTE
|
||||||
|
: '%' x=ID '.' y=ID WS? '=' expr=ATTR_VALUE_EXPR ';'
|
||||||
|
;
|
||||||
|
|
||||||
|
/** %{string-expr} anonymous template from string expr */
|
||||||
|
TEMPLATE_EXPR
|
||||||
|
: '%' a=ACTION
|
||||||
|
;
|
||||||
|
|
||||||
|
UNKNOWN_SYNTAX
|
||||||
|
@after {delegate.unknownSyntax(emit());}
|
||||||
|
: '%' (ID|'.'|'('|')'|','|'{'|'}'|'"')*
|
||||||
|
;
|
||||||
|
|
||||||
|
// Anything else is just random text
|
||||||
|
TEXT
|
||||||
|
@after {delegate.text($text);}
|
||||||
|
: ~('$'|'%') // can't do (...)+ here since it gobbles \$, \%
|
||||||
|
;
|
||||||
|
|
||||||
|
fragment
|
||||||
|
ACTION
|
||||||
|
: '{' ('\\}'|~'}')* '}'
|
||||||
|
;
|
||||||
|
|
||||||
|
fragment
|
||||||
|
ARG : ID '=' ACTION
|
||||||
|
;
|
||||||
|
|
||||||
|
fragment
|
||||||
|
ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')*
|
||||||
|
;
|
||||||
|
|
||||||
|
/** Don't allow an = as first char to prevent $x == 3; kind of stuff. */
|
||||||
|
fragment
|
||||||
|
ATTR_VALUE_EXPR
|
||||||
|
: ~'=' (~';')*
|
||||||
|
;
|
||||||
|
|
||||||
|
fragment
|
||||||
|
SCOPE_INDEX_EXPR
|
||||||
|
: ('\\]'|~']')+
|
||||||
|
;
|
||||||
|
|
||||||
|
fragment
|
||||||
|
WS : (' '|'\t'|'\n'|'\r')+
|
||||||
|
;
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
package org.antlr.v4.parse;
|
||||||
|
|
||||||
|
import org.antlr.runtime.Token;
|
||||||
|
|
||||||
|
/** */
|
||||||
|
public interface ActionSplitterListener {
|
||||||
|
void setQualifiedAttr(String expr, Token x, Token y, Token rhs);
|
||||||
|
void qualifiedAttr(String expr, Token x, Token y);
|
||||||
|
void setAttr(String expr, Token x, Token rhs);
|
||||||
|
void attr(String expr, Token x);
|
||||||
|
|
||||||
|
void setDynamicScopeAttr(String expr, Token x, Token y, Token rhs);
|
||||||
|
void dynamicScopeAttr(String expr, Token x, Token y);
|
||||||
|
void setDynamicNegativeIndexedScopeAttr(String expr, Token x, Token y, Token index, Token rhs);
|
||||||
|
void dynamicNegativeIndexedScopeAttr(String expr, Token x, Token y, Token index);
|
||||||
|
void setDynamicAbsoluteIndexedScopeAttr(String expr, Token x, Token y, Token index, Token rhs);
|
||||||
|
void dynamicAbsoluteIndexedScopeAttr(String expr, Token x, Token y, Token index);
|
||||||
|
|
||||||
|
void templateInstance(String expr);
|
||||||
|
void indirectTemplateInstance(String expr);
|
||||||
|
void setExprAttribute(String expr); // TODO: rename
|
||||||
|
void setSTAttribute(String expr);
|
||||||
|
void templateExpr(String expr);
|
||||||
|
|
||||||
|
void unknownSyntax(Token t);
|
||||||
|
void text(String text);
|
||||||
|
}
|
|
@ -0,0 +1,46 @@
|
||||||
|
package org.antlr.v4.parse;
|
||||||
|
|
||||||
|
import org.antlr.runtime.*;
|
||||||
|
import org.antlr.runtime.tree.CommonTreeAdaptor;
|
||||||
|
import org.antlr.v4.tool.*;
|
||||||
|
|
||||||
|
public class GrammarASTAdaptor extends CommonTreeAdaptor {
|
||||||
|
org.antlr.runtime.CharStream input; // where we can find chars ref'd by tokens in tree
|
||||||
|
public GrammarASTAdaptor() { ; }
|
||||||
|
public GrammarASTAdaptor(org.antlr.runtime.CharStream input) { this.input = input; }
|
||||||
|
|
||||||
|
public Object create(Token token) {
|
||||||
|
return new GrammarAST(token);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/** Make sure even imaginary nodes know the input stream */
|
||||||
|
public Object create(int tokenType, String text) {
|
||||||
|
GrammarAST t = null;
|
||||||
|
if ( tokenType==ANTLRParser.RULE ) {
|
||||||
|
// needed by TreeWizard to make RULE tree
|
||||||
|
t = new GrammarASTWithOptions(new CommonToken(tokenType, text));
|
||||||
|
}
|
||||||
|
else if ( tokenType==ANTLRParser.STRING_LITERAL ) {
|
||||||
|
// implicit lexer construction done with wizard; needs this node type
|
||||||
|
// whereas grammar ANTLRParser.g can use token option to spec node type
|
||||||
|
t = new TerminalAST(new CommonToken(tokenType, text));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
t = (GrammarAST)super.create(tokenType, text);
|
||||||
|
}
|
||||||
|
((CommonToken)t.token).setInputStream(input);
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object dupNode(Object t) {
|
||||||
|
if ( t==null ) return null;
|
||||||
|
return ((GrammarAST)t).dupNode(); //create(((GrammarAST)t).token);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object errorNode(org.antlr.runtime.TokenStream input, org.antlr.runtime.Token start, org.antlr.runtime.Token stop,
|
||||||
|
org.antlr.runtime.RecognitionException e)
|
||||||
|
{
|
||||||
|
return new GrammarASTErrorNode(input, start, stop, e);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,7 @@
|
||||||
|
package org.antlr.v4.parse;
|
||||||
|
|
||||||
|
/** Used to throw us out of deeply nested element back to end of a rule's
|
||||||
|
* alt list. Note it's not under RecognitionException.
|
||||||
|
*/
|
||||||
|
public class ResyncToEndOfRuleBlock extends RuntimeException {
|
||||||
|
}
|
|
@ -0,0 +1,237 @@
|
||||||
|
package org.antlr.v4.parse;
|
||||||
|
|
||||||
|
import org.antlr.tool.ErrorManager;
|
||||||
|
import org.antlr.v4.tool.*;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/** Parse args, return values, and dynamic scopes.
|
||||||
|
*
|
||||||
|
* rule[arg1, arg2, ..., argN] returns [ret1, ..., retN]
|
||||||
|
* scope { decl1; decl2; ... declN; }
|
||||||
|
*
|
||||||
|
* The ',' and ';' are significant. Use \, and \; to use within
|
||||||
|
* types if necessary like [Map<String\,String> foo, int y].
|
||||||
|
*
|
||||||
|
* arg, ret, and decl are target language dependent. Java/C#/C/C++ would
|
||||||
|
* use "int i" but ruby/python would use "i".
|
||||||
|
*/
|
||||||
|
public class ScopeParser {
|
||||||
|
/** Given an arg or retval scope definition list like
|
||||||
|
*
|
||||||
|
* Map<String, String>, int[] j3, char *foo32[3]
|
||||||
|
*
|
||||||
|
* or
|
||||||
|
*
|
||||||
|
* int i=3, j=a[34]+20
|
||||||
|
*
|
||||||
|
* convert to an attribute scope.
|
||||||
|
*/
|
||||||
|
public static AttributeDict parseTypeList(String s) { return parse(s, ','); }
|
||||||
|
|
||||||
|
public static AttributeDict parseDynamicScope(String s) {
|
||||||
|
// ignore outer {..} if present
|
||||||
|
s = s.trim();
|
||||||
|
if ( s.startsWith("{") ) {
|
||||||
|
int lastCurly = s.lastIndexOf('}');
|
||||||
|
s = s.substring(1, lastCurly);
|
||||||
|
}
|
||||||
|
return parse(s, ';');
|
||||||
|
}
|
||||||
|
|
||||||
|
public static AttributeDict parse(String s, char separator) {
|
||||||
|
int i = 0;
|
||||||
|
int n = s.length();
|
||||||
|
AttributeDict dict = new AttributeDict();
|
||||||
|
while ( i<n ) {
|
||||||
|
StringBuilder buf = new StringBuilder();
|
||||||
|
while ( i<n && s.charAt(i)!=separator ) {
|
||||||
|
if ( s.charAt(i)=='\\' ) {
|
||||||
|
i++;
|
||||||
|
if ( i<n && s.charAt(i)==separator ) {
|
||||||
|
buf.append(s.charAt(i));
|
||||||
|
i++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
buf.append('\\');
|
||||||
|
}
|
||||||
|
buf.append(s.charAt(i));
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
i++; // skip separator
|
||||||
|
String def = buf.toString();
|
||||||
|
//System.out.println("def="+ def);
|
||||||
|
if ( def.trim().length()>0 ) {
|
||||||
|
Attribute a = parseAttributeDef(def);
|
||||||
|
dict.add(a);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return dict;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** For decls like "String foo" or "char *foo32[]" compute the ID
|
||||||
|
* and type declarations. Also handle "int x=3" and 'T t = new T("foo")'
|
||||||
|
* but if the separator is ',' you cannot use ',' in the initvalue
|
||||||
|
* unless you escape use "\," escape.
|
||||||
|
*/
|
||||||
|
public static Attribute parseAttributeDef(String decl) {
|
||||||
|
if ( decl==null ) return null;
|
||||||
|
Attribute attr = new Attribute();
|
||||||
|
boolean inID = false;
|
||||||
|
int start = -1;
|
||||||
|
int rightEdgeOfDeclarator = decl.length()-1;
|
||||||
|
int equalsIndex = decl.indexOf('=');
|
||||||
|
if ( equalsIndex>0 ) {
|
||||||
|
// everything after the '=' is the init value
|
||||||
|
attr.initValue = decl.substring(equalsIndex+1,decl.length());
|
||||||
|
rightEdgeOfDeclarator = equalsIndex-1;
|
||||||
|
}
|
||||||
|
// walk backwards looking for start of an ID
|
||||||
|
for (int i=rightEdgeOfDeclarator; i>=0; i--) {
|
||||||
|
// if we haven't found the end yet, keep going
|
||||||
|
if ( !inID && Character.isLetterOrDigit(decl.charAt(i)) ) {
|
||||||
|
inID = true;
|
||||||
|
}
|
||||||
|
else if ( inID &&
|
||||||
|
!(Character.isLetterOrDigit(decl.charAt(i))||
|
||||||
|
decl.charAt(i)=='_') ) {
|
||||||
|
start = i+1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ( start<0 && inID ) {
|
||||||
|
start = 0;
|
||||||
|
}
|
||||||
|
if ( start<0 ) {
|
||||||
|
ErrorManager.error(ErrorManager.MSG_CANNOT_FIND_ATTRIBUTE_NAME_IN_DECL,decl);
|
||||||
|
}
|
||||||
|
// walk forwards looking for end of an ID
|
||||||
|
int stop=-1;
|
||||||
|
for (int i=start; i<=rightEdgeOfDeclarator; i++) {
|
||||||
|
// if we haven't found the end yet, keep going
|
||||||
|
if ( !(Character.isLetterOrDigit(decl.charAt(i))||
|
||||||
|
decl.charAt(i)=='_') )
|
||||||
|
{
|
||||||
|
stop = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if ( i==rightEdgeOfDeclarator ) {
|
||||||
|
stop = i+1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// the name is the last ID
|
||||||
|
attr.name = decl.substring(start,stop);
|
||||||
|
|
||||||
|
// the type is the decl minus the ID (could be empty)
|
||||||
|
attr.type = decl.substring(0,start);
|
||||||
|
if ( stop<=rightEdgeOfDeclarator ) {
|
||||||
|
attr.type += decl.substring(stop,rightEdgeOfDeclarator+1);
|
||||||
|
}
|
||||||
|
attr.type = attr.type.trim();
|
||||||
|
if ( attr.type.length()==0 ) {
|
||||||
|
attr.type = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
attr.decl = decl;
|
||||||
|
return attr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Given an argument list like
|
||||||
|
*
|
||||||
|
* x, (*a).foo(21,33), 3.2+1, '\n',
|
||||||
|
* "a,oo\nick", {bl, "fdkj"eck}, ["cat\n,", x, 43]
|
||||||
|
*
|
||||||
|
* convert to a list of attributes. Allow nested square brackets etc...
|
||||||
|
* Set separatorChar to ';' or ',' or whatever you want.
|
||||||
|
*/
|
||||||
|
public static List<String> splitArgumentList(String s, int separatorChar) {
|
||||||
|
List<String> args = new ArrayList<String>();
|
||||||
|
_splitArgumentList(s, 0, -1, separatorChar, args);
|
||||||
|
return args;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static int _splitArgumentList(String actionText,
|
||||||
|
int start,
|
||||||
|
int targetChar,
|
||||||
|
int separatorChar,
|
||||||
|
List<String> args)
|
||||||
|
{
|
||||||
|
if ( actionText==null ) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
actionText = actionText.replaceAll("//.*\n", "");
|
||||||
|
int n = actionText.length();
|
||||||
|
//System.out.println("actionText@"+start+"->"+(char)targetChar+"="+actionText.substring(start,n));
|
||||||
|
int p = start;
|
||||||
|
int last = p;
|
||||||
|
while ( p<n && actionText.charAt(p)!=targetChar ) {
|
||||||
|
int c = actionText.charAt(p);
|
||||||
|
switch ( c ) {
|
||||||
|
case '\'' :
|
||||||
|
p++;
|
||||||
|
while ( p<n && actionText.charAt(p)!='\'' ) {
|
||||||
|
if ( actionText.charAt(p)=='\\' && (p+1)<n &&
|
||||||
|
actionText.charAt(p+1)=='\'' )
|
||||||
|
{
|
||||||
|
p++; // skip escaped quote
|
||||||
|
}
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
p++;
|
||||||
|
break;
|
||||||
|
case '"' :
|
||||||
|
p++;
|
||||||
|
while ( p<n && actionText.charAt(p)!='\"' ) {
|
||||||
|
if ( actionText.charAt(p)=='\\' && (p+1)<n &&
|
||||||
|
actionText.charAt(p+1)=='\"' )
|
||||||
|
{
|
||||||
|
p++; // skip escaped quote
|
||||||
|
}
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
p++;
|
||||||
|
break;
|
||||||
|
case '(' :
|
||||||
|
p = _splitArgumentList(actionText,p+1,')',separatorChar,args);
|
||||||
|
break;
|
||||||
|
case '{' :
|
||||||
|
p = _splitArgumentList(actionText,p+1,'}',separatorChar,args);
|
||||||
|
break;
|
||||||
|
case '<' :
|
||||||
|
if ( actionText.indexOf('>',p+1)>=p ) {
|
||||||
|
// do we see a matching '>' ahead? if so, hope it's a generic
|
||||||
|
// and not less followed by expr with greater than
|
||||||
|
p = _splitArgumentList(actionText,p+1,'>',separatorChar,args);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
p++; // treat as normal char
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case '[' :
|
||||||
|
p = _splitArgumentList(actionText,p+1,']',separatorChar,args);
|
||||||
|
break;
|
||||||
|
default :
|
||||||
|
if ( c==separatorChar && targetChar==-1 ) {
|
||||||
|
String arg = actionText.substring(last, p);
|
||||||
|
//System.out.println("arg="+arg);
|
||||||
|
args.add(arg.trim());
|
||||||
|
last = p+1;
|
||||||
|
}
|
||||||
|
p++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ( targetChar==-1 && p<=n ) {
|
||||||
|
String arg = actionText.substring(last, p).trim();
|
||||||
|
//System.out.println("arg="+arg);
|
||||||
|
if ( arg.length()>0 ) {
|
||||||
|
args.add(arg.trim());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
p++;
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,142 @@
|
||||||
|
package org.antlr.v4.parse;
|
||||||
|
|
||||||
|
import org.antlr.codegen.CodeGenerator;
|
||||||
|
import org.antlr.misc.Utils;
|
||||||
|
import org.antlr.tool.ErrorManager;
|
||||||
|
import org.antlr.v4.Tool;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/** */
|
||||||
|
public class TokenVocabParser {
|
||||||
|
Tool tool;
|
||||||
|
String vocabName;
|
||||||
|
|
||||||
|
public TokenVocabParser(Tool tool, String vocabName) {
|
||||||
|
this.tool = tool;
|
||||||
|
this.vocabName = vocabName;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Load a vocab file <vocabName>.tokens and return mapping. */
|
||||||
|
public Map<String,Integer> load() {
|
||||||
|
Map<String,Integer> tokens = new LinkedHashMap<String,Integer>();
|
||||||
|
int maxTokenType = -1;
|
||||||
|
File fullFile = getImportedVocabFile();
|
||||||
|
try {
|
||||||
|
FileReader fr = new FileReader(fullFile);
|
||||||
|
BufferedReader br = new BufferedReader(fr);
|
||||||
|
StreamTokenizer tokenizer = new StreamTokenizer(br);
|
||||||
|
tokenizer.parseNumbers();
|
||||||
|
tokenizer.wordChars('_', '_');
|
||||||
|
tokenizer.eolIsSignificant(true);
|
||||||
|
tokenizer.slashSlashComments(true);
|
||||||
|
tokenizer.slashStarComments(true);
|
||||||
|
tokenizer.ordinaryChar('=');
|
||||||
|
tokenizer.quoteChar('\'');
|
||||||
|
tokenizer.whitespaceChars(' ',' ');
|
||||||
|
tokenizer.whitespaceChars('\t','\t');
|
||||||
|
int lineNum = 1;
|
||||||
|
int token = tokenizer.nextToken();
|
||||||
|
while (token != StreamTokenizer.TT_EOF) {
|
||||||
|
String tokenID;
|
||||||
|
if ( token == StreamTokenizer.TT_WORD ) {
|
||||||
|
tokenID = tokenizer.sval;
|
||||||
|
}
|
||||||
|
else if ( token == '\'' ) {
|
||||||
|
tokenID = "'"+tokenizer.sval+"'";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
|
||||||
|
vocabName+ CodeGenerator.VOCAB_FILE_EXTENSION,
|
||||||
|
Utils.integer(lineNum));
|
||||||
|
while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {;}
|
||||||
|
token = tokenizer.nextToken();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
token = tokenizer.nextToken();
|
||||||
|
if ( token != '=' ) {
|
||||||
|
ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
|
||||||
|
vocabName+CodeGenerator.VOCAB_FILE_EXTENSION,
|
||||||
|
Utils.integer(lineNum));
|
||||||
|
while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {;}
|
||||||
|
token = tokenizer.nextToken();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
token = tokenizer.nextToken(); // skip '='
|
||||||
|
if ( token != StreamTokenizer.TT_NUMBER ) {
|
||||||
|
ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
|
||||||
|
vocabName+CodeGenerator.VOCAB_FILE_EXTENSION,
|
||||||
|
Utils.integer(lineNum));
|
||||||
|
while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {;}
|
||||||
|
token = tokenizer.nextToken();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
int tokenType = (int)tokenizer.nval;
|
||||||
|
token = tokenizer.nextToken();
|
||||||
|
System.out.println("import "+tokenID+"="+tokenType);
|
||||||
|
tokens.put(tokenID, tokenType);
|
||||||
|
maxTokenType = Math.max(maxTokenType,tokenType);
|
||||||
|
lineNum++;
|
||||||
|
if ( token != StreamTokenizer.TT_EOL ) {
|
||||||
|
ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
|
||||||
|
vocabName+CodeGenerator.VOCAB_FILE_EXTENSION,
|
||||||
|
Utils.integer(lineNum));
|
||||||
|
while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {;}
|
||||||
|
token = tokenizer.nextToken();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
token = tokenizer.nextToken(); // skip newline
|
||||||
|
}
|
||||||
|
br.close();
|
||||||
|
}
|
||||||
|
catch (FileNotFoundException fnfe) {
|
||||||
|
ErrorManager.error(ErrorManager.MSG_CANNOT_FIND_TOKENS_FILE,
|
||||||
|
fullFile);
|
||||||
|
}
|
||||||
|
catch (IOException ioe) {
|
||||||
|
ErrorManager.error(ErrorManager.MSG_ERROR_READING_TOKENS_FILE,
|
||||||
|
fullFile,
|
||||||
|
ioe);
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
ErrorManager.error(ErrorManager.MSG_ERROR_READING_TOKENS_FILE,
|
||||||
|
fullFile,
|
||||||
|
e);
|
||||||
|
}
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return a File descriptor for vocab file. Look in library or
|
||||||
|
* in -o output path. antlr -o foo T.g U.g where U needs T.tokens
|
||||||
|
* won't work unless we look in foo too. If we do not find the
|
||||||
|
* file in the lib directory then must assume that the .tokens file
|
||||||
|
* is going to be generated as part of this build and we have defined
|
||||||
|
* .tokens files so that they ALWAYS are generated in the base output
|
||||||
|
* directory, which means the current directory for the command line tool if there
|
||||||
|
* was no output directory specified.
|
||||||
|
*/
|
||||||
|
public File getImportedVocabFile() {
|
||||||
|
|
||||||
|
File f = new File(tool.getLibraryDirectory(),
|
||||||
|
File.separator +
|
||||||
|
vocabName +
|
||||||
|
CodeGenerator.VOCAB_FILE_EXTENSION);
|
||||||
|
if (f.exists()) {
|
||||||
|
return f;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We did not find the vocab file in the lib directory, so we need
|
||||||
|
// to look for it in the output directory which is where .tokens
|
||||||
|
// files are generated (in the base, not relative to the input
|
||||||
|
// location.)
|
||||||
|
//
|
||||||
|
if (tool.haveOutputDir) {
|
||||||
|
f = new File(tool.getOutputDirectory(), vocabName + CodeGenerator.VOCAB_FILE_EXTENSION);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
f = new File(vocabName + CodeGenerator.VOCAB_FILE_EXTENSION);
|
||||||
|
}
|
||||||
|
return f;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,47 @@
|
||||||
|
package org.antlr.v4.parse;
|
||||||
|
|
||||||
|
import org.antlr.runtime.*;
|
||||||
|
import org.antlr.v4.Tool;
|
||||||
|
import org.antlr.v4.tool.ErrorType;
|
||||||
|
|
||||||
|
/** Override error handling for use with ANTLR tool itself; leaves
|
||||||
|
* nothing in grammar associated with Tool so others can use in IDEs, ...
|
||||||
|
*/
|
||||||
|
public class ToolANTLRParser extends ANTLRParser {
|
||||||
|
public Tool tool;
|
||||||
|
|
||||||
|
public ToolANTLRParser(TokenStream input, Tool tool) {
|
||||||
|
super(input);
|
||||||
|
this.tool = tool;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void displayRecognitionError(String[] tokenNames,
|
||||||
|
RecognitionException e)
|
||||||
|
{
|
||||||
|
String msg = getParserErrorMessage(this, e);
|
||||||
|
if ( paraphrases.size()>0 ) {
|
||||||
|
String paraphrase = (String)paraphrases.peek();
|
||||||
|
msg = msg+" while "+paraphrase;
|
||||||
|
}
|
||||||
|
// List stack = getRuleInvocationStack(e, this.getClass().getName());
|
||||||
|
// msg += ", rule stack = "+stack;
|
||||||
|
tool.errMgr.syntaxError(ErrorType.SYNTAX_ERROR, getSourceName(), e.token, e, msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getParserErrorMessage(Parser parser, RecognitionException e) {
|
||||||
|
String msg = null;
|
||||||
|
if ( e instanceof NoViableAltException) {
|
||||||
|
String name = parser.getTokenErrorDisplay(e.token);
|
||||||
|
msg = name+" came as a complete surprise to me";
|
||||||
|
}
|
||||||
|
else if ( e instanceof v4ParserException) {
|
||||||
|
msg = ((v4ParserException)e).msg;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
msg = parser.getErrorMessage(e, parser.getTokenNames());
|
||||||
|
}
|
||||||
|
return msg;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,16 @@
|
||||||
|
package org.antlr.v4.parse;
|
||||||
|
|
||||||
|
import org.antlr.runtime.*;
|
||||||
|
|
||||||
|
/** */
|
||||||
|
public class v4ParserException extends RecognitionException {
|
||||||
|
public String msg;
|
||||||
|
/** Used for remote debugger deserialization */
|
||||||
|
public v4ParserException() {;}
|
||||||
|
|
||||||
|
public v4ParserException(String msg, IntStream input) {
|
||||||
|
super(input);
|
||||||
|
this.msg = msg;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,29 @@
|
||||||
|
package org.antlr.v4.tool;
|
||||||
|
|
||||||
|
import org.antlr.runtime.Token;
|
||||||
|
import org.antlr.runtime.tree.Tree;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class ActionAST extends GrammarAST {
|
||||||
|
// Alt, rule, grammar space
|
||||||
|
public AttributeResolver resolver;
|
||||||
|
public List<Token> chunks; // useful for ANTLR IDE developers
|
||||||
|
/** In which alt does this node live? */
|
||||||
|
// public Alternative alt;
|
||||||
|
|
||||||
|
|
||||||
|
public ActionAST(GrammarAST node) {
|
||||||
|
super(node);
|
||||||
|
this.resolver = ((ActionAST)node).resolver;
|
||||||
|
this.chunks = ((ActionAST)node).chunks;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ActionAST(Token t) { super(t); }
|
||||||
|
public ActionAST(int type) { super(type); }
|
||||||
|
public ActionAST(int type, Token t) { super(type, t); }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Tree dupNode() { return new ActionAST(this); }
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,21 @@
|
||||||
|
package org.antlr.v4.tool;
|
||||||
|
|
||||||
|
import org.antlr.runtime.Token;
|
||||||
|
import org.antlr.runtime.tree.Tree;
|
||||||
|
|
||||||
|
/** An ALT or ALT_REWRITE node (left of ->) */
|
||||||
|
public class AltAST extends GrammarAST {
|
||||||
|
public Alternative alt;
|
||||||
|
|
||||||
|
public AltAST(GrammarAST node) {
|
||||||
|
super(node);
|
||||||
|
this.alt = ((AltAST)node).alt;
|
||||||
|
}
|
||||||
|
|
||||||
|
public AltAST(Token t) { super(t); }
|
||||||
|
public AltAST(int type) { super(type); }
|
||||||
|
public AltAST(int type, Token t) { super(type, t); }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Tree dupNode() { return new AltAST(this); }
|
||||||
|
}
|
|
@ -0,0 +1,136 @@
|
||||||
|
package org.antlr.v4.tool;
|
||||||
|
|
||||||
|
|
||||||
|
import org.stringtemplate.v4.misc.MultiMap;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/** Record use/def information about an outermost alternative in a subrule
|
||||||
|
* or rule of a grammar.
|
||||||
|
*/
|
||||||
|
public class Alternative implements AttributeResolver {
|
||||||
|
Rule rule;
|
||||||
|
|
||||||
|
public AltAST ast;
|
||||||
|
|
||||||
|
// token IDs, string literals in this alt
|
||||||
|
public MultiMap<String, TerminalAST> tokenRefs = new MultiMap<String, TerminalAST>();
|
||||||
|
|
||||||
|
// does not include labels
|
||||||
|
public MultiMap<String, GrammarAST> tokenRefsInActions = new MultiMap<String, GrammarAST>();
|
||||||
|
|
||||||
|
// all rule refs in this alt
|
||||||
|
public MultiMap<String, GrammarAST> ruleRefs = new MultiMap<String, GrammarAST>();
|
||||||
|
|
||||||
|
// does not include labels
|
||||||
|
public MultiMap<String, GrammarAST> ruleRefsInActions = new MultiMap<String, GrammarAST>();
|
||||||
|
|
||||||
|
/** A list of all LabelElementPair attached to tokens like id=ID, ids+=ID */
|
||||||
|
public MultiMap<String, LabelElementPair> labelDefs = new MultiMap<String, LabelElementPair>();
|
||||||
|
|
||||||
|
// track all token, rule, label refs in rewrite (right of ->)
|
||||||
|
public List<GrammarAST> rewriteElements = new ArrayList<GrammarAST>();
|
||||||
|
|
||||||
|
/** Track all executable actions other than named actions like @init
|
||||||
|
* and catch/finally (not in an alt). Also tracks predicates, rewrite actions.
|
||||||
|
* We need to examine these actions before code generation so
|
||||||
|
* that we can detect refs to $rule.attr etc...
|
||||||
|
*/
|
||||||
|
public List<ActionAST> actions = new ArrayList<ActionAST>();
|
||||||
|
|
||||||
|
public Alternative(Rule r) { this.rule = r; }
|
||||||
|
|
||||||
|
public boolean resolvesToToken(String x, ActionAST node) {
|
||||||
|
if ( tokenRefs.get(x)!=null ) return true;
|
||||||
|
LabelElementPair anyLabelDef = getAnyLabelDef(x);
|
||||||
|
if ( anyLabelDef!=null && anyLabelDef.type==LabelType.TOKEN_LABEL ) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// public String getTokenLabel(String x, ActionAST node) {
|
||||||
|
// LabelElementPair anyLabelDef = getAnyLabelDef(x);
|
||||||
|
// if ( anyLabelDef!=null ) return anyLabelDef.label.getText();
|
||||||
|
// if ( tokenRefs.get(x)!=null ) {
|
||||||
|
//
|
||||||
|
// }
|
||||||
|
// LabelElementPair anyLabelDef = getAnyLabelDef(x);
|
||||||
|
// if ( anyLabelDef!=null && anyLabelDef.type==LabelType.TOKEN_LABEL ) return true;
|
||||||
|
// return false;
|
||||||
|
// }
|
||||||
|
|
||||||
|
public boolean resolvesToAttributeDict(String x, ActionAST node) {
|
||||||
|
if ( resolvesToToken(x, node) ) return true;
|
||||||
|
if ( x.equals(rule.name) ) return true; // $r for action in rule r, $r is a dict
|
||||||
|
if ( rule!=null && rule.scope!=null ) return true;
|
||||||
|
if ( rule.g.scopes.get(x)!=null ) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** $x Attribute: rule arguments, return values, predefined rule prop.
|
||||||
|
*/
|
||||||
|
public Attribute resolveToAttribute(String x, ActionAST node) {
|
||||||
|
return rule.resolveToAttribute(x, node); // reuse that code
|
||||||
|
}
|
||||||
|
|
||||||
|
/** $x.y, x can be surrounding rule, token/rule/label ref. y is visible
|
||||||
|
* attr in that dictionary. Can't see args on rule refs.
|
||||||
|
*/
|
||||||
|
public Attribute resolveToAttribute(String x, String y, ActionAST node) {
|
||||||
|
if ( rule.name.equals(x) ) { // x is this rule?
|
||||||
|
return rule.resolveToAttribute(x, y, node);
|
||||||
|
}
|
||||||
|
if ( tokenRefs.get(x)!=null ) { // token ref in this alt?
|
||||||
|
return rule.getPredefinedScope(LabelType.TOKEN_LABEL).get(y);
|
||||||
|
}
|
||||||
|
if ( ruleRefs.get(x)!=null ) { // rule ref in this alt?
|
||||||
|
// look up rule, ask it to resolve y (must be retval or predefined)
|
||||||
|
return rule.g.getRule(x).resolveRetvalOrProperty(y);
|
||||||
|
}
|
||||||
|
LabelElementPair anyLabelDef = getAnyLabelDef(x);
|
||||||
|
if ( anyLabelDef!=null && anyLabelDef.type==LabelType.RULE_LABEL ) {
|
||||||
|
return rule.g.getRule(anyLabelDef.element.getText()).resolveRetvalOrProperty(y);
|
||||||
|
}
|
||||||
|
else if ( anyLabelDef!=null ) {
|
||||||
|
return rule.getPredefinedScope(anyLabelDef.type).get(y);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public AttributeDict resolveToDynamicScope(String x, ActionAST node) {
|
||||||
|
Rule r = resolveToRule(x);
|
||||||
|
if ( r!=null && r.scope !=null ) return r.scope;
|
||||||
|
return rule.resolveToDynamicScope(x, node);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean resolvesToLabel(String x, ActionAST node) {
|
||||||
|
LabelElementPair anyLabelDef = getAnyLabelDef(x);
|
||||||
|
return anyLabelDef!=null &&
|
||||||
|
(anyLabelDef.type==LabelType.TOKEN_LABEL ||
|
||||||
|
anyLabelDef.type==LabelType.RULE_LABEL);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean resolvesToListLabel(String x, ActionAST node) {
|
||||||
|
LabelElementPair anyLabelDef = getAnyLabelDef(x);
|
||||||
|
return anyLabelDef!=null &&
|
||||||
|
(anyLabelDef.type==LabelType.RULE_LIST_LABEL ||
|
||||||
|
anyLabelDef.type==LabelType.TOKEN_LIST_LABEL);
|
||||||
|
}
|
||||||
|
|
||||||
|
public LabelElementPair getAnyLabelDef(String x) {
|
||||||
|
List<LabelElementPair> labels = labelDefs.get(x);
|
||||||
|
if ( labels!=null ) return labels.get(0);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** x can be ruleref or rule label. */
|
||||||
|
public Rule resolveToRule(String x) {
|
||||||
|
if ( ruleRefs.get(x)!=null ) return rule.g.getRule(x);
|
||||||
|
LabelElementPair anyLabelDef = getAnyLabelDef(x);
|
||||||
|
if ( anyLabelDef!=null && anyLabelDef.type==LabelType.RULE_LABEL ) {
|
||||||
|
return rule.g.getRule(anyLabelDef.element.getText());
|
||||||
|
}
|
||||||
|
if ( x.equals(rule.name) ) return rule;
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,37 @@
|
||||||
|
package org.antlr.v4.tool;
|
||||||
|
|
||||||
|
/** Track the names of attributes define in arg lists, return values,
|
||||||
|
* scope blocks etc...
|
||||||
|
*/
|
||||||
|
public class Attribute {
|
||||||
|
/** The entire declaration such as "String foo;" */
|
||||||
|
public String decl;
|
||||||
|
|
||||||
|
/** The type; might be empty such as for Python which has no static typing */
|
||||||
|
public String type;
|
||||||
|
|
||||||
|
/** The name of the attribute "foo" */
|
||||||
|
public String name;
|
||||||
|
|
||||||
|
/** The optional attribute intialization expression */
|
||||||
|
public String initValue;
|
||||||
|
|
||||||
|
/** Who contains us? */
|
||||||
|
public AttributeDict dict;
|
||||||
|
|
||||||
|
public Attribute() {;}
|
||||||
|
|
||||||
|
public Attribute(String name) { this(name,null); }
|
||||||
|
|
||||||
|
public Attribute(String name, String decl) {
|
||||||
|
this.name = name;
|
||||||
|
this.decl = decl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
if ( initValue!=null ) {
|
||||||
|
return type+" "+name+"="+initValue;
|
||||||
|
}
|
||||||
|
return type+" "+name;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,86 @@
|
||||||
|
package org.antlr.v4.tool;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/** Track the attributes within retval, arg lists etc...
|
||||||
|
*
|
||||||
|
* Each rule has potentially 3 scopes: return values,
|
||||||
|
* parameters, and an implicitly-named scope (i.e., a scope defined in a rule).
|
||||||
|
* Implicitly-defined scopes are named after the rule; rules and scopes then
|
||||||
|
* must live in the same name space--no collisions allowed.
|
||||||
|
*/
|
||||||
|
public class AttributeDict {
|
||||||
|
public String name;
|
||||||
|
public GrammarAST ast;
|
||||||
|
public DictType type;
|
||||||
|
|
||||||
|
/** All token scopes (token labels) share the same fixed scope of
|
||||||
|
* of predefined attributes. I keep this out of the runtime.Token
|
||||||
|
* object to avoid a runtime type leakage.
|
||||||
|
*/
|
||||||
|
public static AttributeDict predefinedTokenDict = new AttributeDict(DictType.TOKEN) {{
|
||||||
|
add(new Attribute("text"));
|
||||||
|
add(new Attribute("type"));
|
||||||
|
add(new Attribute("line"));
|
||||||
|
add(new Attribute("index"));
|
||||||
|
add(new Attribute("pos"));
|
||||||
|
add(new Attribute("channel"));
|
||||||
|
add(new Attribute("tree"));
|
||||||
|
add(new Attribute("int"));
|
||||||
|
}};
|
||||||
|
|
||||||
|
public static enum DictType {
|
||||||
|
ARG, RET, TOKEN,
|
||||||
|
PREDEFINED_RULE, PREDEFINED_TREE_RULE, PREDEFINED_LEXER_RULE,
|
||||||
|
GLOBAL_SCOPE, // scope symbols { ...}
|
||||||
|
RULE_SCOPE; // scope { int i; int j; }
|
||||||
|
}
|
||||||
|
|
||||||
|
/** The list of Attribute objects */
|
||||||
|
|
||||||
|
public LinkedHashMap<String, Attribute> attributes =
|
||||||
|
new LinkedHashMap<String, Attribute>();
|
||||||
|
|
||||||
|
public AttributeDict() {;}
|
||||||
|
public AttributeDict(DictType type) { this.type = type; }
|
||||||
|
|
||||||
|
public Attribute add(Attribute a) { a.dict = this; return attributes.put(a.name, a); }
|
||||||
|
public Attribute get(String name) { return attributes.get(name); }
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
// if ( isParameterScope ) {
|
||||||
|
// return name+"_parameter";
|
||||||
|
// }
|
||||||
|
// else if ( isReturnScope ) {
|
||||||
|
// return name+"_return";
|
||||||
|
// }
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int size() { return attributes==null?0:attributes.size(); }
|
||||||
|
|
||||||
|
/** Return the set of keys that collide from
|
||||||
|
* this and other.
|
||||||
|
*/
|
||||||
|
public Set intersection(AttributeDict other) {
|
||||||
|
if ( other==null || other.size()==0 || size()==0 ) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
Set<String> inter = new HashSet<String>();
|
||||||
|
Set thisKeys = attributes.keySet();
|
||||||
|
for (Iterator it = thisKeys.iterator(); it.hasNext();) {
|
||||||
|
String key = (String) it.next();
|
||||||
|
if ( other.attributes.get(key)!=null ) {
|
||||||
|
inter.add(key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ( inter.size()==0 ) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return inter;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return getName()+":"+attributes;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,40 @@
|
||||||
|
package org.antlr.v4.tool;
|
||||||
|
|
||||||
|
/** Grammars, rules, and alternatives all have symbols visible to
|
||||||
|
* actions. To evaluate attr exprs, ask action for its resolver
|
||||||
|
* then ask resolver to look up various symbols. Depending on the context,
|
||||||
|
* some symbols are available at some aren't.
|
||||||
|
*
|
||||||
|
* Alternative level:
|
||||||
|
*
|
||||||
|
* $x Attribute: rule arguments, return values, predefined rule prop.
|
||||||
|
* AttributeDict: references to tokens and token labels in the
|
||||||
|
* current alt (including any elements within subrules contained
|
||||||
|
* in that outermost alt). x can be rule with scope or a global scope.
|
||||||
|
* List label: x is a token/rule list label.
|
||||||
|
* $x.y Attribute: x is surrounding rule, rule/token/label ref
|
||||||
|
* $s::y Attribute: s is any rule with scope or global scope; y is prop within
|
||||||
|
*
|
||||||
|
* Rule level:
|
||||||
|
*
|
||||||
|
* $x Attribute: rule arguments, return values, predefined rule prop.
|
||||||
|
* AttributeDict: references to token labels in *any* alt. x can
|
||||||
|
* be any rule with scope or global scope.
|
||||||
|
* List label: x is a token/rule list label.
|
||||||
|
* $x.y Attribute: x is surrounding rule, label ref (in any alts)
|
||||||
|
* $s::y Attribute: s is any rule with scope or global scope; y is prop within
|
||||||
|
*
|
||||||
|
* Grammar level:
|
||||||
|
*
|
||||||
|
* $s AttributeDict: s is a global scope
|
||||||
|
* $s::y Attribute: s is a global scope; y is prop within
|
||||||
|
*/
|
||||||
|
public interface AttributeResolver {
|
||||||
|
public boolean resolvesToListLabel(String x, ActionAST node);
|
||||||
|
public boolean resolvesToLabel(String x, ActionAST node);
|
||||||
|
public boolean resolvesToAttributeDict(String x, ActionAST node);
|
||||||
|
public boolean resolvesToToken(String x, ActionAST node);
|
||||||
|
public Attribute resolveToAttribute(String x, ActionAST node);
|
||||||
|
public Attribute resolveToAttribute(String x, String y, ActionAST node);
|
||||||
|
public AttributeDict resolveToDynamicScope(String x, ActionAST node);
|
||||||
|
}
|
|
@ -0,0 +1,28 @@
|
||||||
|
package org.antlr.v4.tool;
|
||||||
|
|
||||||
|
import org.antlr.runtime.Token;
|
||||||
|
import org.antlr.runtime.tree.Tree;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
public class BlockAST extends GrammarASTWithOptions {
|
||||||
|
// TODO: maybe I need a Subrule object like Rule so these options mov to that?
|
||||||
|
/** What are the default options for a subrule? */
|
||||||
|
public static final Map defaultBlockOptions =
|
||||||
|
new HashMap() {{put("greedy","true");}};
|
||||||
|
|
||||||
|
public static final Map defaultLexerBlockOptions =
|
||||||
|
new HashMap() {{put("greedy","true");}};
|
||||||
|
|
||||||
|
public BlockAST(GrammarAST node) {
|
||||||
|
super(node);
|
||||||
|
}
|
||||||
|
|
||||||
|
public BlockAST(Token t) { super(t); }
|
||||||
|
public BlockAST(int type) { super(type); }
|
||||||
|
public BlockAST(int type, Token t) { super(type, t); }
|
||||||
|
public BlockAST(int type, Token t, String text) { super(type,t,text); }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Tree dupNode() { return new BlockAST(this); }
|
||||||
|
}
|
|
@ -1,4 +1,632 @@
|
||||||
package org.antlr.v4.tool;
|
package org.antlr.v4.tool;
|
||||||
|
|
||||||
public class Grammar {
|
import org.antlr.runtime.*;
|
||||||
|
import org.antlr.runtime.tree.TreeWizard;
|
||||||
|
import org.antlr.v4.Tool;
|
||||||
|
import org.antlr.v4.misc.*;
|
||||||
|
import org.antlr.v4.parse.*;
|
||||||
|
import org.antlr.v4.runtime.Token;
|
||||||
|
import org.antlr.v4.runtime.atn.ATN;
|
||||||
|
import org.antlr.v4.runtime.dfa.DFA;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
public class Grammar implements AttributeResolver {
|
||||||
|
public static final Set doNotCopyOptionsToLexer =
|
||||||
|
new HashSet() {
|
||||||
|
{
|
||||||
|
add("output"); add("ASTLabelType"); add("superClass");
|
||||||
|
add("k"); add("backtrack"); add("memoize"); add("rewrite");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
public static Map<String, AttributeDict> grammarAndLabelRefTypeToScope =
|
||||||
|
new HashMap<String, AttributeDict>() {{
|
||||||
|
put("lexer:RULE_LABEL", Rule.predefinedLexerRulePropertiesDict);
|
||||||
|
put("lexer:LEXER_STRING_LABEL", Rule.predefinedLexerRulePropertiesDict);
|
||||||
|
put("lexer:TOKEN_LABEL", AttributeDict.predefinedTokenDict);
|
||||||
|
put("parser:RULE_LABEL", Rule.predefinedRulePropertiesDict);
|
||||||
|
put("parser:TOKEN_LABEL", AttributeDict.predefinedTokenDict);
|
||||||
|
put("tree:RULE_LABEL", Rule.predefinedTreeRulePropertiesDict);
|
||||||
|
put("tree:TOKEN_LABEL", AttributeDict.predefinedTokenDict);
|
||||||
|
put("tree:WILDCARD_TREE_LABEL", AttributeDict.predefinedTokenDict);
|
||||||
|
put("combined:RULE_LABEL", Rule.predefinedRulePropertiesDict);
|
||||||
|
put("combined:TOKEN_LABEL", AttributeDict.predefinedTokenDict);
|
||||||
|
}};
|
||||||
|
public static final int MIN_CHAR_VALUE = '\u0000';
|
||||||
|
public static final int MAX_CHAR_VALUE = '\uFFFE';
|
||||||
|
|
||||||
|
public String name;
|
||||||
|
public GrammarRootAST ast;
|
||||||
|
public String text; // testing only
|
||||||
|
public String fileName;
|
||||||
|
|
||||||
|
/** Was this created from a COMBINED grammar? */
|
||||||
|
public Grammar implicitLexer;
|
||||||
|
public Grammar implicitLexerOwner;
|
||||||
|
|
||||||
|
/** If we're imported, who imported us? If null, implies grammar is root */
|
||||||
|
public Grammar parent;
|
||||||
|
public List<Grammar> importedGrammars;
|
||||||
|
|
||||||
|
/** All rules defined in this specific grammar, not imported. Also does
|
||||||
|
* not include lexical rules if combined.
|
||||||
|
*/
|
||||||
|
public OrderedHashMap<String, Rule> rules = new OrderedHashMap<String, Rule>();
|
||||||
|
int ruleNumber = 1;
|
||||||
|
|
||||||
|
/** The ATN that represents the grammar with edges labelled with tokens
|
||||||
|
* or epsilon. It is more suitable to analysis than an AST representation.
|
||||||
|
*/
|
||||||
|
public ATN atn;
|
||||||
|
|
||||||
|
public Map<Integer, DFA> decisionDFAs = new HashMap<Integer, DFA>();
|
||||||
|
|
||||||
|
public Vector<IntervalSet[]> decisionLOOK;
|
||||||
|
|
||||||
|
public Tool tool;
|
||||||
|
|
||||||
|
/** Token names and literal tokens like "void" are uniquely indexed.
|
||||||
|
* with -1 implying EOF. Characters are different; they go from
|
||||||
|
* -1 (EOF) to \uFFFE. For example, 0 could be a binary byte you
|
||||||
|
* want to lexer. Labels of DFA/ATN transitions can be both tokens
|
||||||
|
* and characters. I use negative numbers for bookkeeping labels
|
||||||
|
* like EPSILON. Char/String literals and token types overlap in the same
|
||||||
|
* space, however.
|
||||||
|
*/
|
||||||
|
int maxTokenType = Token.MIN_TOKEN_TYPE-1;
|
||||||
|
|
||||||
|
/** Map token like ID (but not literals like "while") to its token type */
|
||||||
|
public Map<String, Integer> tokenNameToTypeMap = new LinkedHashMap<String, Integer>();
|
||||||
|
|
||||||
|
/** Map token literals like "while" to its token type. It may be that
|
||||||
|
* WHILE="while"=35, in which case both tokenIDToTypeMap and this
|
||||||
|
* field will have entries both mapped to 35.
|
||||||
|
*/
|
||||||
|
public Map<String, Integer> stringLiteralToTypeMap = new LinkedHashMap<String, Integer>();
|
||||||
|
/** Reverse index for stringLiteralToTypeMap. Indexed with raw token type.
|
||||||
|
* 0 is invalid. */
|
||||||
|
public Vector<String> typeToStringLiteralList = new Vector<String>();
|
||||||
|
|
||||||
|
/** Map a token type to its token name. Indexed with raw token type.
|
||||||
|
* 0 is invalid.
|
||||||
|
*/
|
||||||
|
public Vector<String> typeToTokenList = new Vector<String>();
|
||||||
|
|
||||||
|
/** Map a name to an action.
|
||||||
|
* The code generator will use this to fill holes in the output files.
|
||||||
|
* I track the AST node for the action in case I need the line number
|
||||||
|
* for errors.
|
||||||
|
*/
|
||||||
|
public Map<String,ActionAST> namedActions = new HashMap<String,ActionAST>();
|
||||||
|
|
||||||
|
|
||||||
|
/** Tracks all forced actions in all alternatives of all rules.
|
||||||
|
* Or if lexer all rules period. Doesn't track sempreds.
|
||||||
|
* maps tree node to action index.
|
||||||
|
*/
|
||||||
|
public LinkedHashMap<ActionAST, Integer> actions = new LinkedHashMap<ActionAST, Integer>();
|
||||||
|
|
||||||
|
/** All sempreds found in grammar; maps tree node to sempred index */
|
||||||
|
public LinkedHashMap<PredAST, Integer> sempreds = new LinkedHashMap<PredAST, Integer>();
|
||||||
|
|
||||||
|
public Map<String, AttributeDict> scopes = new LinkedHashMap<String, AttributeDict>();
|
||||||
|
public static final String AUTO_GENERATED_TOKEN_NAME_PREFIX = "T__";
|
||||||
|
|
||||||
|
public Grammar(Tool tool, GrammarRootAST ast) {
|
||||||
|
if ( ast==null ) throw new IllegalArgumentException("can't pass null tree");
|
||||||
|
this.tool = tool;
|
||||||
|
this.ast = ast;
|
||||||
|
this.name = ((GrammarAST)ast.getChild(0)).getText();
|
||||||
|
initTokenSymbolTables();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** For testing */
|
||||||
|
public Grammar(String grammarText) throws org.antlr.runtime.RecognitionException {
|
||||||
|
this("<string>", grammarText, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** For testing */
|
||||||
|
public Grammar(String grammarText, ANTLRToolListener listener)
|
||||||
|
throws org.antlr.runtime.RecognitionException
|
||||||
|
{
|
||||||
|
this("<string>", grammarText, listener);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** For testing; only builds trees; no sem anal */
|
||||||
|
public Grammar(String fileName, String grammarText, ANTLRToolListener listener)
|
||||||
|
throws org.antlr.runtime.RecognitionException
|
||||||
|
{
|
||||||
|
this.text = grammarText;
|
||||||
|
this.fileName = fileName;
|
||||||
|
this.tool = new Tool();
|
||||||
|
this.tool.addListener(listener);
|
||||||
|
org.antlr.runtime.ANTLRStringStream in = new org.antlr.runtime.ANTLRStringStream(grammarText);
|
||||||
|
in.name = fileName;
|
||||||
|
ANTLRLexer lexer = new ANTLRLexer(in);
|
||||||
|
CommonTokenStream tokens = new CommonTokenStream(lexer);
|
||||||
|
ToolANTLRParser p = new ToolANTLRParser(tokens,tool);
|
||||||
|
p.setTreeAdaptor(new GrammarASTAdaptor(in));
|
||||||
|
ParserRuleReturnScope r = p.grammarSpec();
|
||||||
|
if ( r.getTree() instanceof GrammarRootAST ) {
|
||||||
|
this.ast = (GrammarRootAST)r.getTree();
|
||||||
|
this.ast.hasErrors = p.getNumberOfSyntaxErrors()>0;
|
||||||
|
this.name = ((GrammarAST)ast.getChild(0)).getText();
|
||||||
|
}
|
||||||
|
initTokenSymbolTables();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void initTokenSymbolTables() {
|
||||||
|
if ( isTreeGrammar() ) {
|
||||||
|
typeToTokenList.setSize(Token.UP + 1);
|
||||||
|
typeToTokenList.set(Token.DOWN, "DOWN");
|
||||||
|
typeToTokenList.set(Token.UP, "UP");
|
||||||
|
tokenNameToTypeMap.put("DOWN", Token.DOWN);
|
||||||
|
tokenNameToTypeMap.put("UP", Token.UP);
|
||||||
|
}
|
||||||
|
tokenNameToTypeMap.put("EOF", Token.EOF);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void loadImportedGrammars() {
|
||||||
|
if ( ast==null ) return;
|
||||||
|
GrammarAST i = (GrammarAST)ast.getFirstChildWithType(ANTLRParser.IMPORT);
|
||||||
|
if ( i==null ) return;
|
||||||
|
importedGrammars = new ArrayList<Grammar>();
|
||||||
|
for (Object c : i.getChildren()) {
|
||||||
|
GrammarAST t = (GrammarAST)c;
|
||||||
|
String importedGrammarName = null;
|
||||||
|
if ( t.getType()==ANTLRParser.ASSIGN ) {
|
||||||
|
importedGrammarName = t.getChild(1).getText();
|
||||||
|
System.out.println("import "+ importedGrammarName);
|
||||||
|
}
|
||||||
|
else if ( t.getType()==ANTLRParser.ID ) {
|
||||||
|
importedGrammarName = t.getText();
|
||||||
|
System.out.println("import "+t.getText());
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
GrammarAST root = tool.load(importedGrammarName+".g");
|
||||||
|
if ( root instanceof GrammarASTErrorNode ) return; // came back as error node
|
||||||
|
GrammarRootAST ast = (GrammarRootAST)root;
|
||||||
|
Grammar g = tool.createGrammar(ast);
|
||||||
|
g.fileName = importedGrammarName+".g";
|
||||||
|
g.parent = this;
|
||||||
|
importedGrammars.add(g);
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
System.err.println("can't load grammar "+importedGrammarName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void defineAction(GrammarAST atAST) {
|
||||||
|
if ( atAST.getChildCount()==2 ) {
|
||||||
|
String name = atAST.getChild(0).getText();
|
||||||
|
namedActions.put(name, (ActionAST)atAST.getChild(1));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
String scope = atAST.getChild(0).getText();
|
||||||
|
if ( scope.equals(getTypeString()) ) {
|
||||||
|
String name = atAST.getChild(1).getText();
|
||||||
|
namedActions.put(name, (ActionAST)atAST.getChild(2));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void defineRule(Rule r) {
|
||||||
|
if ( rules.get(r.name)!=null ) return;
|
||||||
|
rules.put(r.name, r);
|
||||||
|
r.index = ruleNumber++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// public int getNumRules() {
|
||||||
|
// int n = rules.size();
|
||||||
|
// List<Grammar> imports = getAllImportedGrammars();
|
||||||
|
// if ( imports!=null ) {
|
||||||
|
// for (Grammar g : imports) n += g.getNumRules();
|
||||||
|
// }
|
||||||
|
// return n;
|
||||||
|
// }
|
||||||
|
|
||||||
|
public Rule getRule(String name) {
|
||||||
|
Rule r = rules.get(name);
|
||||||
|
if ( r!=null ) return r;
|
||||||
|
List<Grammar> imports = getAllImportedGrammars();
|
||||||
|
if ( imports==null ) return null;
|
||||||
|
for (Grammar g : imports) {
|
||||||
|
r = g.rules.get(name);
|
||||||
|
if ( r!=null ) return r;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Rule getRule(String grammarName, String ruleName) {
|
||||||
|
if ( grammarName!=null ) { // scope override
|
||||||
|
Grammar g = getImportedGrammar(grammarName);
|
||||||
|
if ( g ==null ) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return g.rules.get(ruleName);
|
||||||
|
}
|
||||||
|
return getRule(ruleName);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void defineScope(AttributeDict s) { scopes.put(s.getName(), s); }
|
||||||
|
|
||||||
|
/** Get list of all imports from all grammars in the delegate subtree of g.
|
||||||
|
* The grammars are in import tree preorder. Don't include ourselves
|
||||||
|
* in list as we're not a delegate of ourselves.
|
||||||
|
*/
|
||||||
|
public List<Grammar> getAllImportedGrammars() {
|
||||||
|
if ( importedGrammars==null ) return null;
|
||||||
|
List<Grammar> delegates = new ArrayList<Grammar>();
|
||||||
|
for (int i = 0; i < importedGrammars.size(); i++) {
|
||||||
|
Grammar d = importedGrammars.get(i);
|
||||||
|
delegates.add(d);
|
||||||
|
List<Grammar> ds = d.getAllImportedGrammars();
|
||||||
|
if ( ds!=null ) delegates.addAll( ds );
|
||||||
|
}
|
||||||
|
return delegates;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Grammar> getImportedGrammars() { return importedGrammars; }
|
||||||
|
|
||||||
|
/** Get delegates below direct delegates of g
|
||||||
|
public List<Grammar> getIndirectDelegates(Grammar g) {
|
||||||
|
List<Grammar> direct = getDirectDelegates(g);
|
||||||
|
List<Grammar> delegates = getDelegates(g);
|
||||||
|
delegates.removeAll(direct);
|
||||||
|
return delegates;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** Return list of imported grammars from root down to our parent.
|
||||||
|
* Order is [root, ..., this.parent]. (us not included).
|
||||||
|
*/
|
||||||
|
public List<Grammar> getGrammarAncestors() {
|
||||||
|
Grammar root = getOutermostGrammar();
|
||||||
|
if ( this==root ) return null;
|
||||||
|
List<Grammar> grammars = new ArrayList<Grammar>();
|
||||||
|
// walk backwards to root, collecting grammars
|
||||||
|
Grammar p = this.parent;
|
||||||
|
while ( p!=null ) {
|
||||||
|
grammars.add(0, p); // add to head so in order later
|
||||||
|
p = p.parent;
|
||||||
|
}
|
||||||
|
return grammars;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return the grammar that imported us and our parents. Return this
|
||||||
|
* if we're root.
|
||||||
|
*/
|
||||||
|
public Grammar getOutermostGrammar() {
|
||||||
|
if ( parent==null ) return this;
|
||||||
|
return parent.getOutermostGrammar();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Get the name of the generated recognizer; may or may not be same
|
||||||
|
* as grammar name.
|
||||||
|
* Recognizer is TParser and TLexer from T if combined, else
|
||||||
|
* just use T regardless of grammar type.
|
||||||
|
*/
|
||||||
|
public String getRecognizerName() {
|
||||||
|
String suffix = "";
|
||||||
|
List<Grammar> grammarsFromRootToMe = getOutermostGrammar().getGrammarAncestors();
|
||||||
|
String qualifiedName = name;
|
||||||
|
if ( grammarsFromRootToMe!=null ) {
|
||||||
|
StringBuffer buf = new StringBuffer();
|
||||||
|
for (Grammar g : grammarsFromRootToMe) {
|
||||||
|
buf.append(g.name);
|
||||||
|
buf.append('_');
|
||||||
|
}
|
||||||
|
buf.append(name);
|
||||||
|
qualifiedName = buf.toString();
|
||||||
|
}
|
||||||
|
if ( isCombined() || (isLexer() && implicitLexer!=null) )
|
||||||
|
{
|
||||||
|
suffix = Grammar.getGrammarTypeToFileNameSuffix(getType());
|
||||||
|
}
|
||||||
|
return qualifiedName+suffix;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getStringLiteralLexerRuleName(String lit) {
|
||||||
|
int ttype = getTokenType(lit);
|
||||||
|
return AUTO_GENERATED_TOKEN_NAME_PREFIX +ttype;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return grammar directly imported by this grammar */
|
||||||
|
public Grammar getImportedGrammar(String name) {
|
||||||
|
for (Grammar g : importedGrammars) {
|
||||||
|
if ( g.name.equals(name) ) return g;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getTokenType(String token) {
|
||||||
|
Integer I = null;
|
||||||
|
if ( token.charAt(0)=='\'') {
|
||||||
|
I = stringLiteralToTypeMap.get(token);
|
||||||
|
}
|
||||||
|
else { // must be a label like ID
|
||||||
|
I = tokenNameToTypeMap.get(token);
|
||||||
|
}
|
||||||
|
int i = (I!=null)?I.intValue(): Token.INVALID_TYPE;
|
||||||
|
//System.out.println("grammar type "+type+" "+tokenName+"->"+i);
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Given a token type, get a meaningful name for it such as the ID
|
||||||
|
* or string literal. If this is a lexer and the ttype is in the
|
||||||
|
* char vocabulary, compute an ANTLR-valid (possibly escaped) char literal.
|
||||||
|
*/
|
||||||
|
public String getTokenDisplayName(int ttype) {
|
||||||
|
String tokenName = null;
|
||||||
|
int index=0;
|
||||||
|
// inside any target's char range and is lexer grammar?
|
||||||
|
if ( isLexer() &&
|
||||||
|
ttype >= MIN_CHAR_VALUE && ttype <= MAX_CHAR_VALUE )
|
||||||
|
{
|
||||||
|
return CharSupport.getANTLRCharLiteralForChar(ttype);
|
||||||
|
}
|
||||||
|
else if ( ttype==Token.EOF ) {
|
||||||
|
tokenName = "EOF";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if ( ttype<typeToTokenList.size() ) {
|
||||||
|
tokenName = typeToTokenList.get(ttype);
|
||||||
|
if ( tokenName!=null &&
|
||||||
|
tokenName.startsWith(AUTO_GENERATED_TOKEN_NAME_PREFIX) &&
|
||||||
|
typeToStringLiteralList.get(ttype)!=null)
|
||||||
|
{
|
||||||
|
tokenName = typeToStringLiteralList.get(ttype);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
tokenName = String.valueOf(ttype);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//System.out.println("getTokenDisplayName ttype="+ttype+", index="+index+", name="+tokenName);
|
||||||
|
return tokenName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getTokenDisplayNames(Collection<Integer> types) {
|
||||||
|
List<String> names = new ArrayList<String>();
|
||||||
|
for (int t : types) names.add(getTokenDisplayName(t));
|
||||||
|
return names;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String[] getTokenNames() {
|
||||||
|
int numTokens = getMaxTokenType();
|
||||||
|
String[] tokenNames = new String[numTokens+1];
|
||||||
|
for (String t : tokenNameToTypeMap.keySet()) {
|
||||||
|
Integer ttype = tokenNameToTypeMap.get(t);
|
||||||
|
if ( ttype>0 ) tokenNames[ttype] = t;
|
||||||
|
}
|
||||||
|
return tokenNames;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String[] getTokenDisplayNames() {
|
||||||
|
int numTokens = getMaxTokenType();
|
||||||
|
String[] tokenNames = new String[numTokens+1];
|
||||||
|
for (String t : tokenNameToTypeMap.keySet()) {
|
||||||
|
Integer ttype = tokenNameToTypeMap.get(t);
|
||||||
|
if ( ttype>0 ) tokenNames[ttype] = t;
|
||||||
|
}
|
||||||
|
for (String t : stringLiteralToTypeMap.keySet()) {
|
||||||
|
Integer ttype = stringLiteralToTypeMap.get(t);
|
||||||
|
if ( ttype>0 ) tokenNames[ttype] = t;
|
||||||
|
}
|
||||||
|
return tokenNames;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** What is the max char value possible for this grammar's target? Use
|
||||||
|
* unicode max if no target defined.
|
||||||
|
*/
|
||||||
|
public int getMaxCharValue() {
|
||||||
|
return MAX_CHAR_VALUE;
|
||||||
|
// if ( generator!=null ) {
|
||||||
|
// return generator.target.getMaxCharValue(generator);
|
||||||
|
// }
|
||||||
|
// else {
|
||||||
|
// return Label.MAX_CHAR_VALUE;
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return a set of all possible token or char types for this grammar */
|
||||||
|
public IntSet getTokenTypes() {
|
||||||
|
if ( isLexer() ) {
|
||||||
|
return getAllCharValues();
|
||||||
|
}
|
||||||
|
return IntervalSet.of(Token.MIN_TOKEN_TYPE, getMaxTokenType());
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return min to max char as defined by the target.
|
||||||
|
* If no target, use max unicode char value.
|
||||||
|
*/
|
||||||
|
public IntSet getAllCharValues() {
|
||||||
|
return IntervalSet.of(MIN_CHAR_VALUE, getMaxCharValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
/** How many token types have been allocated so far? */
|
||||||
|
public int getMaxTokenType() {
|
||||||
|
return typeToTokenList.size() - 1; // don't count 0 (invalid)
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return a new unique integer in the token type space */
|
||||||
|
public int getNewTokenType() {
|
||||||
|
maxTokenType++;
|
||||||
|
return maxTokenType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void importVocab(Grammar g) {
|
||||||
|
this.tokenNameToTypeMap.putAll( g.tokenNameToTypeMap );
|
||||||
|
this.stringLiteralToTypeMap.putAll( g.stringLiteralToTypeMap );
|
||||||
|
int max = Math.max(this.typeToTokenList.size(), g.typeToTokenList.size());
|
||||||
|
this.typeToTokenList.setSize(max);
|
||||||
|
for (int ttype=0; ttype<g.typeToTokenList.size(); ttype++) {
|
||||||
|
maxTokenType = Math.max(maxTokenType, ttype);
|
||||||
|
this.typeToTokenList.set(ttype, g.typeToTokenList.get(ttype));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int defineTokenName(String name) {
|
||||||
|
return defineTokenName(name, getNewTokenType());
|
||||||
|
}
|
||||||
|
|
||||||
|
public int defineTokenName(String name, int ttype) {
|
||||||
|
Integer prev = tokenNameToTypeMap.get(name);
|
||||||
|
if ( prev!=null ) return prev;
|
||||||
|
tokenNameToTypeMap.put(name, ttype);
|
||||||
|
setTokenForType(ttype, name);
|
||||||
|
maxTokenType = Math.max(maxTokenType, ttype);
|
||||||
|
return ttype;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int defineStringLiteral(String lit) {
|
||||||
|
return defineStringLiteral(lit, getNewTokenType());
|
||||||
|
}
|
||||||
|
|
||||||
|
public int defineStringLiteral(String lit, int ttype) {
|
||||||
|
if ( !stringLiteralToTypeMap.containsKey(lit) ) {
|
||||||
|
stringLiteralToTypeMap.put(lit, ttype);
|
||||||
|
// track in reverse index too
|
||||||
|
if ( ttype>=typeToStringLiteralList.size() ) {
|
||||||
|
typeToStringLiteralList.setSize(ttype+1);
|
||||||
|
}
|
||||||
|
typeToStringLiteralList.set(ttype, text);
|
||||||
|
|
||||||
|
setTokenForType(ttype, lit);
|
||||||
|
return ttype;
|
||||||
|
}
|
||||||
|
return Token.INVALID_TYPE;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int defineTokenAlias(String name, String lit) {
|
||||||
|
int ttype = defineTokenName(name);
|
||||||
|
stringLiteralToTypeMap.put(lit, ttype);
|
||||||
|
setTokenForType(ttype, name);
|
||||||
|
return ttype;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTokenForType(int ttype, String text) {
|
||||||
|
if ( ttype>=typeToTokenList.size() ) {
|
||||||
|
typeToTokenList.setSize(ttype+1);
|
||||||
|
}
|
||||||
|
String prevToken = typeToTokenList.get(ttype);
|
||||||
|
if ( prevToken==null || prevToken.charAt(0)=='\'' ) {
|
||||||
|
// only record if nothing there before or if thing before was a literal
|
||||||
|
typeToTokenList.set(ttype, text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// no isolated attr at grammar action level
|
||||||
|
public Attribute resolveToAttribute(String x, ActionAST node) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// no $x.y makes sense here
|
||||||
|
public Attribute resolveToAttribute(String x, String y, ActionAST node) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public AttributeDict resolveToDynamicScope(String x, ActionAST node) {
|
||||||
|
return scopes.get(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean resolvesToLabel(String x, ActionAST node) { return false; }
|
||||||
|
|
||||||
|
public boolean resolvesToListLabel(String x, ActionAST node) { return false; }
|
||||||
|
|
||||||
|
public boolean resolvesToToken(String x, ActionAST node) { return false; }
|
||||||
|
|
||||||
|
public boolean resolvesToAttributeDict(String x, ActionAST node) {
|
||||||
|
return scopes.get(x)!=null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Given a grammar type, what should be the default action scope?
|
||||||
|
* If I say @members in a COMBINED grammar, for example, the
|
||||||
|
* default scope should be "parser".
|
||||||
|
*/
|
||||||
|
public String getDefaultActionScope() {
|
||||||
|
switch ( getType() ) {
|
||||||
|
case ANTLRParser.LEXER :
|
||||||
|
return "lexer";
|
||||||
|
case ANTLRParser.PARSER :
|
||||||
|
case ANTLRParser.COMBINED :
|
||||||
|
return "parser";
|
||||||
|
case ANTLRParser.TREE :
|
||||||
|
return "treeparser";
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getType() {
|
||||||
|
if ( ast!=null ) return ast.grammarType;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isLexer() { return getType()==ANTLRParser.LEXER; }
|
||||||
|
public boolean isParser() { return getType()==ANTLRParser.PARSER; }
|
||||||
|
public boolean isTreeGrammar() { return getType()==ANTLRParser.TREE; }
|
||||||
|
public boolean isCombined() { return getType()==ANTLRParser.COMBINED; }
|
||||||
|
|
||||||
|
public String getTypeString() {
|
||||||
|
if ( ast==null ) return null;
|
||||||
|
return ANTLRParser.tokenNames[getType()].toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String getGrammarTypeToFileNameSuffix(int type) {
|
||||||
|
switch ( type ) {
|
||||||
|
case ANTLRParser.LEXER : return "Lexer";
|
||||||
|
case ANTLRParser.PARSER : return "Parser";
|
||||||
|
case ANTLRParser.TREE : return "";
|
||||||
|
// if combined grammar, gen Parser and Lexer will be done later
|
||||||
|
// TODO: we are separate now right?
|
||||||
|
case ANTLRParser.COMBINED : return "Parser";
|
||||||
|
default :
|
||||||
|
return "<invalid>";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getOption(String key) {
|
||||||
|
if ( ast.options==null ) return null;
|
||||||
|
return ast.options.get(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getOption(String key, String defaultValue) {
|
||||||
|
if ( ast.options==null ) return defaultValue;
|
||||||
|
String v = ast.options.get(key);
|
||||||
|
if ( v!=null ) return v;
|
||||||
|
return defaultValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Map<String,String> getStringLiteralAliasesFromLexerRules(GrammarRootAST ast) {
|
||||||
|
GrammarAST combinedRulesRoot =
|
||||||
|
(GrammarAST)ast.getFirstChildWithType(ANTLRParser.RULES);
|
||||||
|
if ( combinedRulesRoot==null ) return null;
|
||||||
|
|
||||||
|
List<GrammarASTWithOptions> ruleNodes = combinedRulesRoot.getChildren();
|
||||||
|
if ( ruleNodes==null || ruleNodes.size()==0 ) return null;
|
||||||
|
GrammarASTAdaptor adaptor = new GrammarASTAdaptor(ruleNodes.get(0).token.getInputStream());
|
||||||
|
TreeWizard wiz = new TreeWizard(adaptor,ANTLRParser.tokenNames);
|
||||||
|
Map<String,String> lexerRuleToStringLiteral = new HashMap<String,String>();
|
||||||
|
|
||||||
|
for (GrammarASTWithOptions r : ruleNodes) {
|
||||||
|
String ruleName = r.getChild(0).getText();
|
||||||
|
if ( Character.isUpperCase(ruleName.charAt(0)) ) {
|
||||||
|
Map nodes = new HashMap();
|
||||||
|
boolean isLitRule =
|
||||||
|
wiz.parse(r, "(RULE %name:ID (BLOCK (ALT %lit:STRING_LITERAL)))", nodes);
|
||||||
|
if ( isLitRule ) {
|
||||||
|
GrammarAST litNode = (GrammarAST)nodes.get("lit");
|
||||||
|
GrammarAST nameNode = (GrammarAST)nodes.get("name");
|
||||||
|
lexerRuleToStringLiteral.put(litNode.getText(), nameNode.getText());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return lexerRuleToStringLiteral;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLookaheadDFA(int decision, DFA lookaheadDFA) {
|
||||||
|
decisionDFAs.put(Utils.integer(decision), lookaheadDFA);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,99 @@
|
||||||
|
package org.antlr.v4.tool;
|
||||||
|
|
||||||
|
import org.antlr.runtime.*;
|
||||||
|
import org.antlr.runtime.tree.*;
|
||||||
|
import org.antlr.v4.misc.IntervalSet;
|
||||||
|
import org.antlr.v4.parse.ANTLRParser;
|
||||||
|
import org.antlr.v4.runtime.atn.ATNState;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
public class GrammarAST extends CommonTree {
|
||||||
|
/** If we build an ATN, we make AST node point at left edge of ATN construct */
|
||||||
|
public ATNState atnState;
|
||||||
|
|
||||||
|
public GrammarAST() {;}
|
||||||
|
public GrammarAST(Token t) { super(t); }
|
||||||
|
public GrammarAST(GrammarAST node) { super(node); }
|
||||||
|
public GrammarAST(int type) { super(new CommonToken(type, ANTLRParser.tokenNames[type])); }
|
||||||
|
public GrammarAST(int type, Token t) {
|
||||||
|
this(new CommonToken(type, t.getText()));
|
||||||
|
token.setInputStream(t.getInputStream());
|
||||||
|
token.setLine(t.getLine());
|
||||||
|
token.setCharPositionInLine(t.getCharPositionInLine());
|
||||||
|
}
|
||||||
|
public GrammarAST(int type, Token t, String text) {
|
||||||
|
this(new CommonToken(type, text));
|
||||||
|
token.setInputStream(t.getInputStream());
|
||||||
|
token.setLine(t.getLine());
|
||||||
|
token.setCharPositionInLine(t.getCharPositionInLine());
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<GrammarAST> getNodesWithType(int ttype) {
|
||||||
|
return getNodesWithType(IntervalSet.of(ttype));
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<GrammarAST> getNodesWithType(IntervalSet types) {
|
||||||
|
List<GrammarAST> nodes = new ArrayList<GrammarAST>();
|
||||||
|
List<GrammarAST> work = new LinkedList<GrammarAST>();
|
||||||
|
work.add(this);
|
||||||
|
GrammarAST t = null;
|
||||||
|
while ( work.size()>0 ) {
|
||||||
|
t = work.remove(0);
|
||||||
|
if ( types.member(t.getType()) ) nodes.add(t);
|
||||||
|
if ( t.children!=null ) work.addAll(t.children);
|
||||||
|
}
|
||||||
|
return nodes;
|
||||||
|
}
|
||||||
|
|
||||||
|
public AltAST getOutermostAltNode() {
|
||||||
|
if ( this instanceof AltAST && parent.parent instanceof RuleAST ) {
|
||||||
|
return (AltAST)this;
|
||||||
|
}
|
||||||
|
if ( parent!=null ) return ((GrammarAST)parent).getOutermostAltNode();
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: move to basetree when i settle on how runtime works
|
||||||
|
// TODO: don't include this node!!
|
||||||
|
// TODO: reuse other method
|
||||||
|
public CommonTree getFirstDescendantWithType(int type) {
|
||||||
|
if ( getType()==type ) return this;
|
||||||
|
if ( children==null ) return null;
|
||||||
|
for (Object c : children) {
|
||||||
|
GrammarAST t = (GrammarAST)c;
|
||||||
|
if ( t.getType()==type ) return t;
|
||||||
|
CommonTree d = t.getFirstDescendantWithType(type);
|
||||||
|
if ( d!=null ) return d;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: don't include this node!!
|
||||||
|
public CommonTree getFirstDescendantWithType(org.antlr.runtime.BitSet types) {
|
||||||
|
if ( types.member(getType()) ) return this;
|
||||||
|
if ( children==null ) return null;
|
||||||
|
for (Object c : children) {
|
||||||
|
GrammarAST t = (GrammarAST)c;
|
||||||
|
if ( types.member(t.getType()) ) return t;
|
||||||
|
CommonTree d = t.getFirstDescendantWithType(types);
|
||||||
|
if ( d!=null ) return d;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// @Override
|
||||||
|
// public boolean equals(Object obj) {
|
||||||
|
// return super.equals(obj);
|
||||||
|
// }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Tree dupNode() {
|
||||||
|
return new GrammarAST(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return super.toString();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,33 @@
|
||||||
|
package org.antlr.v4.tool;
|
||||||
|
|
||||||
|
import org.antlr.runtime.Token;
|
||||||
|
import org.antlr.runtime.tree.Tree;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
public class GrammarRootAST extends GrammarASTWithOptions {
|
||||||
|
public static final Map defaultOptions =
|
||||||
|
new HashMap() {
|
||||||
|
{
|
||||||
|
put("language","Java");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
public int grammarType; // LEXER, PARSER, TREE, GRAMMAR (combined)
|
||||||
|
public boolean hasErrors;
|
||||||
|
|
||||||
|
public GrammarRootAST(GrammarAST node) {
|
||||||
|
super(node);
|
||||||
|
this.grammarType = ((GrammarRootAST)node).grammarType;
|
||||||
|
this.hasErrors = ((GrammarRootAST)node).hasErrors;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Tree dupNode() { return new GrammarRootAST(this); }
|
||||||
|
|
||||||
|
public GrammarRootAST(int type) { super(type); }
|
||||||
|
public GrammarRootAST(Token t) { super(t); }
|
||||||
|
public GrammarRootAST(int type, Token t) { super(type, t); }
|
||||||
|
public GrammarRootAST(int type, Token t, String text) {
|
||||||
|
super(type,t,text);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,48 @@
|
||||||
|
package org.antlr.v4.tool;
|
||||||
|
|
||||||
|
import org.antlr.runtime.BitSet;
|
||||||
|
import org.antlr.v4.parse.ANTLRParser;
|
||||||
|
|
||||||
|
public class LabelElementPair {
|
||||||
|
public static final BitSet tokenTypeForTokens = new BitSet();
|
||||||
|
static {
|
||||||
|
tokenTypeForTokens.add(ANTLRParser.TOKEN_REF);
|
||||||
|
tokenTypeForTokens.add(ANTLRParser.STRING_LITERAL);
|
||||||
|
tokenTypeForTokens.add(ANTLRParser.WILDCARD);
|
||||||
|
}
|
||||||
|
|
||||||
|
public GrammarAST label;
|
||||||
|
public GrammarAST element;
|
||||||
|
public LabelType type;
|
||||||
|
|
||||||
|
public LabelElementPair(Grammar g, GrammarAST label, GrammarAST element, int labelOp) {
|
||||||
|
this.label = label;
|
||||||
|
this.element = element;
|
||||||
|
// compute general case for label type
|
||||||
|
if ( element.getFirstDescendantWithType(tokenTypeForTokens)!=null ) {
|
||||||
|
if ( labelOp==ANTLRParser.ASSIGN ) type = LabelType.TOKEN_LABEL;
|
||||||
|
else type = LabelType.TOKEN_LIST_LABEL;
|
||||||
|
}
|
||||||
|
else if ( element.getFirstDescendantWithType(ANTLRParser.RULE_REF)!=null ) {
|
||||||
|
if ( labelOp==ANTLRParser.ASSIGN ) type = LabelType.RULE_LABEL;
|
||||||
|
else type = LabelType.RULE_LIST_LABEL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// now reset if lexer and string
|
||||||
|
if ( g.isLexer() ) {
|
||||||
|
if ( element.getFirstDescendantWithType(ANTLRParser.STRING_LITERAL)!=null ) {
|
||||||
|
if ( labelOp==ANTLRParser.ASSIGN ) type = LabelType.LEXER_STRING_LABEL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if ( g.isTreeGrammar() ) {
|
||||||
|
if ( element.getFirstDescendantWithType(ANTLRParser.WILDCARD)!=null ) {
|
||||||
|
if ( labelOp==ANTLRParser.ASSIGN ) type = LabelType.WILDCARD_TREE_LABEL;
|
||||||
|
else type = LabelType.WILDCARD_TREE_LIST_LABEL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return label.getText()+" "+type+" "+element.toString();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,15 @@
|
||||||
|
package org.antlr.v4.tool;
|
||||||
|
|
||||||
|
/** the various kinds of labels. t=type, id=ID, types+=type ids+=ID */
|
||||||
|
public enum LabelType {
|
||||||
|
RULE_LABEL,
|
||||||
|
TOKEN_LABEL,
|
||||||
|
RULE_LIST_LABEL,
|
||||||
|
TOKEN_LIST_LABEL,
|
||||||
|
LEXER_STRING_LABEL, // used in lexer for x='a'
|
||||||
|
SUBRULE_LABEL, // x=(...)
|
||||||
|
SUBRULE_LIST_LABEL, // x+=(...)
|
||||||
|
WILDCARD_TREE_LABEL, // Used in tree grammar x=.
|
||||||
|
WILDCARD_TREE_LIST_LABEL // Used in tree grammar x+=.
|
||||||
|
;
|
||||||
|
}
|
|
@ -0,0 +1,36 @@
|
||||||
|
package org.antlr.v4.tool;
|
||||||
|
|
||||||
|
import org.antlr.misc.MultiMap;
|
||||||
|
import org.antlr.runtime.RecognitionException;
|
||||||
|
import org.antlr.tool.Rule;
|
||||||
|
import org.antlr.v4.Tool;
|
||||||
|
|
||||||
|
/** */
|
||||||
|
public class LexerGrammar extends Grammar {
|
||||||
|
public static final String DEFAULT_MODE_NAME = "DEFAULT_MODE";
|
||||||
|
|
||||||
|
public MultiMap<String, Rule> modes = new MultiMap<String, Rule>();
|
||||||
|
|
||||||
|
public LexerGrammar(Tool tool, GrammarRootAST ast) {
|
||||||
|
super(tool, ast);
|
||||||
|
}
|
||||||
|
|
||||||
|
public LexerGrammar(String grammarText) throws RecognitionException {
|
||||||
|
super(grammarText);
|
||||||
|
}
|
||||||
|
|
||||||
|
public LexerGrammar(String grammarText, ANTLRToolListener listener) throws RecognitionException {
|
||||||
|
super(grammarText, listener);
|
||||||
|
}
|
||||||
|
|
||||||
|
public LexerGrammar(String fileName, String grammarText, ANTLRToolListener listener) throws RecognitionException {
|
||||||
|
super(fileName, grammarText, listener);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void defineRule(Rule r) {
|
||||||
|
super.defineRule(r);
|
||||||
|
modes.map(r.mode, r);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,15 @@
|
||||||
|
package org.antlr.v4.tool;
|
||||||
|
|
||||||
|
import org.antlr.runtime.Token;
|
||||||
|
|
||||||
|
public class PredAST extends ActionAST {
|
||||||
|
public PredAST(GrammarAST node) {
|
||||||
|
super(node);
|
||||||
|
this.resolver = ((ActionAST)node).resolver;
|
||||||
|
this.chunks = ((ActionAST)node).chunks;
|
||||||
|
}
|
||||||
|
|
||||||
|
public PredAST(Token t) { super(t); }
|
||||||
|
public PredAST(int type) { super(type); }
|
||||||
|
public PredAST(int type, Token t) { super(type, t); }
|
||||||
|
}
|
|
@ -0,0 +1,246 @@
|
||||||
|
package org.antlr.v4.tool;
|
||||||
|
|
||||||
|
import org.antlr.runtime.Token;
|
||||||
|
import org.antlr.v4.parse.ANTLRParser;
|
||||||
|
import org.stringtemplate.v4.misc.MultiMap;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
public class Rule implements AttributeResolver {
|
||||||
|
/** Rule refs have a predefined set of attributes as well as
|
||||||
|
* the return values and args.
|
||||||
|
*/
|
||||||
|
public static AttributeDict predefinedRulePropertiesDict =
|
||||||
|
new AttributeDict(AttributeDict.DictType.PREDEFINED_RULE) {{
|
||||||
|
add(new Attribute("text"));
|
||||||
|
add(new Attribute("start"));
|
||||||
|
add(new Attribute("stop"));
|
||||||
|
add(new Attribute("tree"));
|
||||||
|
add(new Attribute("st"));
|
||||||
|
}};
|
||||||
|
|
||||||
|
public static AttributeDict predefinedTreeRulePropertiesDict =
|
||||||
|
new AttributeDict(AttributeDict.DictType.PREDEFINED_TREE_RULE) {{
|
||||||
|
add(new Attribute("text"));
|
||||||
|
add(new Attribute("start")); // note: no stop; not meaningful
|
||||||
|
add(new Attribute("tree"));
|
||||||
|
add(new Attribute("st"));
|
||||||
|
}};
|
||||||
|
|
||||||
|
public static AttributeDict predefinedLexerRulePropertiesDict =
|
||||||
|
new AttributeDict(AttributeDict.DictType.PREDEFINED_LEXER_RULE) {{
|
||||||
|
add(new Attribute("text"));
|
||||||
|
add(new Attribute("type"));
|
||||||
|
add(new Attribute("line"));
|
||||||
|
add(new Attribute("index"));
|
||||||
|
add(new Attribute("pos"));
|
||||||
|
add(new Attribute("channel"));
|
||||||
|
add(new Attribute("start"));
|
||||||
|
add(new Attribute("stop"));
|
||||||
|
add(new Attribute("int"));
|
||||||
|
}};
|
||||||
|
|
||||||
|
public String name;
|
||||||
|
public List<GrammarAST> modifiers;
|
||||||
|
|
||||||
|
public RuleAST ast;
|
||||||
|
public AttributeDict args;
|
||||||
|
public AttributeDict retvals;
|
||||||
|
public AttributeDict scope; // scope { int i; }
|
||||||
|
|
||||||
|
/** A list of scope names used by this rule */
|
||||||
|
public List<Token> useScopes;
|
||||||
|
|
||||||
|
public Grammar g;
|
||||||
|
|
||||||
|
/** If we're in a lexer grammar, we might be in a mode */
|
||||||
|
public String mode;
|
||||||
|
|
||||||
|
/** Map a name to an action for this rule like @init {...}.
|
||||||
|
* The code generator will use this to fill holes in the rule template.
|
||||||
|
* I track the AST node for the action in case I need the line number
|
||||||
|
* for errors.
|
||||||
|
*/
|
||||||
|
public Map<String, ActionAST> namedActions =
|
||||||
|
new HashMap<String, ActionAST>();
|
||||||
|
|
||||||
|
/** Track exception handler actions (exception type is prev child);
|
||||||
|
* don't track finally action
|
||||||
|
*/
|
||||||
|
public List<ActionAST> exceptionActions = new ArrayList<ActionAST>();
|
||||||
|
|
||||||
|
public ActionAST finallyAction;
|
||||||
|
|
||||||
|
public int numberOfAlts;
|
||||||
|
|
||||||
|
public boolean isStartRule = true; // nobody calls us
|
||||||
|
|
||||||
|
public Alternative[] alt;
|
||||||
|
|
||||||
|
/** All rules have unique index 1..n */
|
||||||
|
public int index;
|
||||||
|
|
||||||
|
public int actionIndex; // if lexer
|
||||||
|
|
||||||
|
public Rule(Grammar g, String name, RuleAST ast, int numberOfAlts) {
|
||||||
|
this.g = g;
|
||||||
|
this.name = name;
|
||||||
|
this.ast = ast;
|
||||||
|
this.numberOfAlts = numberOfAlts;
|
||||||
|
alt = new Alternative[numberOfAlts+1]; // 1..n
|
||||||
|
for (int i=1; i<=numberOfAlts; i++) alt[i] = new Alternative(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void defineActionInAlt(int currentAlt, ActionAST actionAST) {
|
||||||
|
alt[currentAlt].actions.add(actionAST);
|
||||||
|
if ( g.isLexer() || actionAST.getType()== ANTLRParser.FORCED_ACTION ) {
|
||||||
|
actionIndex = g.actions.size() + 1;
|
||||||
|
g.actions.put(actionAST, actionIndex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void definePredicateInAlt(int currentAlt, PredAST predAST) {
|
||||||
|
alt[currentAlt].actions.add(predAST);
|
||||||
|
g.sempreds.put(predAST, g.sempreds.size() + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Attribute resolveRetvalOrProperty(String y) {
|
||||||
|
if ( retvals!=null ) {
|
||||||
|
Attribute a = retvals.get(y);
|
||||||
|
if ( a!=null ) return a;
|
||||||
|
}
|
||||||
|
AttributeDict d = getPredefinedScope(LabelType.RULE_LABEL);
|
||||||
|
return d.get(y);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Set<String> getTokenRefs() {
|
||||||
|
Set<String> refs = new HashSet<String>();
|
||||||
|
for (int i=1; i<=numberOfAlts; i++) {
|
||||||
|
refs.addAll(alt[i].tokenRefs.keySet());
|
||||||
|
}
|
||||||
|
return refs;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Set<String> getLabelNames() {
|
||||||
|
Set<String> refs = new HashSet<String>();
|
||||||
|
for (int i=1; i<=numberOfAlts; i++) {
|
||||||
|
refs.addAll(alt[i].labelDefs.keySet());
|
||||||
|
}
|
||||||
|
return refs;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: called frequently; make it more efficient
|
||||||
|
public MultiMap<String, LabelElementPair> getLabelDefs() {
|
||||||
|
MultiMap<String, LabelElementPair> defs =
|
||||||
|
new MultiMap<String, LabelElementPair>();
|
||||||
|
for (int i=1; i<=numberOfAlts; i++) {
|
||||||
|
for (List<LabelElementPair> pairs : alt[i].labelDefs.values()) {
|
||||||
|
for (LabelElementPair p : pairs) {
|
||||||
|
defs.map(p.label.getText(), p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return defs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** $x Attribute: rule arguments, return values, predefined rule prop.
|
||||||
|
*/
|
||||||
|
public Attribute resolveToAttribute(String x, ActionAST node) {
|
||||||
|
if ( args!=null ) {
|
||||||
|
Attribute a = args.get(x); if ( a!=null ) return a;
|
||||||
|
}
|
||||||
|
if ( retvals!=null ) {
|
||||||
|
Attribute a = retvals.get(x); if ( a!=null ) return a;
|
||||||
|
}
|
||||||
|
AttributeDict properties = getPredefinedScope(LabelType.RULE_LABEL);
|
||||||
|
return properties.get(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** $x.y Attribute: x is surrounding rule, label ref (in any alts) */
|
||||||
|
public Attribute resolveToAttribute(String x, String y, ActionAST node) {
|
||||||
|
if ( this.name.equals(x) ) { // x is this rule?
|
||||||
|
return resolveToAttribute(y, node);
|
||||||
|
}
|
||||||
|
LabelElementPair anyLabelDef = getAnyLabelDef(x);
|
||||||
|
if ( anyLabelDef!=null ) {
|
||||||
|
if ( anyLabelDef.type==LabelType.RULE_LABEL ) {
|
||||||
|
return g.getRule(anyLabelDef.element.getText()).resolveRetvalOrProperty(y);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return getPredefinedScope(anyLabelDef.type).get(y);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public AttributeDict resolveToDynamicScope(String x, ActionAST node) {
|
||||||
|
Rule r = resolveToRule(x);
|
||||||
|
if ( r!=null && r.scope!=null ) return r.scope;
|
||||||
|
return g.scopes.get(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean resolvesToLabel(String x, ActionAST node) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean resolvesToListLabel(String x, ActionAST node) {
|
||||||
|
LabelElementPair anyLabelDef = getAnyLabelDef(x);
|
||||||
|
return anyLabelDef!=null &&
|
||||||
|
(anyLabelDef.type==LabelType.RULE_LIST_LABEL ||
|
||||||
|
anyLabelDef.type==LabelType.TOKEN_LIST_LABEL);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean resolvesToToken(String x, ActionAST node) {
|
||||||
|
LabelElementPair anyLabelDef = getAnyLabelDef(x);
|
||||||
|
if ( anyLabelDef!=null && anyLabelDef.type==LabelType.TOKEN_LABEL ) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean resolvesToAttributeDict(String x, ActionAST node) {
|
||||||
|
if ( resolvesToToken(x, node) ) return true;
|
||||||
|
if ( x.equals(name) ) return true; // $r for action in rule r, $r is a dict
|
||||||
|
if ( scope!=null ) return true;
|
||||||
|
if ( g.scopes.get(x)!=null ) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Rule resolveToRule(String x) {
|
||||||
|
if ( x.equals(this.name) ) return this;
|
||||||
|
LabelElementPair anyLabelDef = getAnyLabelDef(x);
|
||||||
|
if ( anyLabelDef!=null && anyLabelDef.type==LabelType.RULE_LABEL ) {
|
||||||
|
return g.getRule(anyLabelDef.element.getText());
|
||||||
|
}
|
||||||
|
return g.getRule(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
public LabelElementPair getAnyLabelDef(String x) {
|
||||||
|
List<LabelElementPair> labels = getLabelDefs().get(x);
|
||||||
|
if ( labels!=null ) return labels.get(0);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public AttributeDict getPredefinedScope(LabelType ltype) {
|
||||||
|
String grammarLabelKey = g.getTypeString() + ":" + ltype;
|
||||||
|
return Grammar.grammarAndLabelRefTypeToScope.get(grammarLabelKey);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isFragment() {
|
||||||
|
if ( modifiers==null ) return false;
|
||||||
|
for (GrammarAST a : modifiers) {
|
||||||
|
if ( a.getText().equals("fragment") ) return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
StringBuilder buf = new StringBuilder();
|
||||||
|
buf.append("Rule{name="+name);
|
||||||
|
if ( args!=null ) buf.append(", args=" + args);
|
||||||
|
if ( retvals!=null ) buf.append(", retvals=" + retvals);
|
||||||
|
if ( scope!=null ) buf.append(", scope=" + scope);
|
||||||
|
buf.append("}");
|
||||||
|
return buf.toString();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,29 @@
|
||||||
|
package org.antlr.v4.tool;
|
||||||
|
|
||||||
|
import org.antlr.runtime.Token;
|
||||||
|
import org.antlr.runtime.tree.Tree;
|
||||||
|
import org.antlr.v4.parse.ANTLRParser;
|
||||||
|
|
||||||
|
public class RuleAST extends GrammarASTWithOptions {
|
||||||
|
public RuleAST(GrammarAST node) {
|
||||||
|
super(node);
|
||||||
|
}
|
||||||
|
|
||||||
|
public RuleAST(Token t) { super(t); }
|
||||||
|
public RuleAST(int type) { super(type); }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Tree dupNode() { return new RuleAST(this); }
|
||||||
|
|
||||||
|
public ActionAST getLexerAction() {
|
||||||
|
Tree blk = getFirstChildWithType(ANTLRParser.BLOCK);
|
||||||
|
if ( blk.getChildCount()==1 ) {
|
||||||
|
Tree onlyAlt = blk.getChild(0);
|
||||||
|
Tree lastChild = onlyAlt.getChild(onlyAlt.getChildCount()-1);
|
||||||
|
if ( lastChild.getType()==ANTLRParser.ACTION ) {
|
||||||
|
return (ActionAST)lastChild;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,19 @@
|
||||||
|
package org.antlr.v4.tool;
|
||||||
|
|
||||||
|
import org.antlr.runtime.Token;
|
||||||
|
import org.antlr.runtime.tree.Tree;
|
||||||
|
|
||||||
|
public class TerminalAST extends GrammarASTWithOptions {
|
||||||
|
public static final String defaultTokenOption = "node";
|
||||||
|
|
||||||
|
public TerminalAST(GrammarAST node) {
|
||||||
|
super(node);
|
||||||
|
}
|
||||||
|
|
||||||
|
public TerminalAST(Token t) { super(t); }
|
||||||
|
public TerminalAST(int type) { super(type); }
|
||||||
|
public TerminalAST(int type, Token t) { super(type, t); }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Tree dupNode() { return new TerminalAST(this); }
|
||||||
|
}
|
Loading…
Reference in New Issue