adding new files

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 8658]
This commit is contained in:
parrt 2011-06-17 15:42:21 -08:00
parent 72ee89294f
commit 2ddeb7c769
55 changed files with 7051 additions and 383 deletions

View File

@ -27,9 +27,8 @@
*/
package org.antlr.v4.runtime;
import org.antlr.v4.analysis.ATNConfig;
import org.antlr.v4.misc.*;
import org.antlr.v4.runtime.atn.ParserInterpreter;
import org.antlr.v4.runtime.atn.*;
import java.util.*;

View File

@ -1,7 +1,7 @@
package org.antlr.v4.runtime;
import org.antlr.v4.analysis.ATNConfig;
import org.antlr.v4.misc.OrderedHashSet;
import org.antlr.v4.runtime.atn.ATNConfig;
public class LexerNoViableAltException extends LexerRecognitionExeption {
/** Prediction began at what input index? */

View File

@ -27,8 +27,8 @@
*/
package org.antlr.v4.runtime;
import org.antlr.v4.analysis.ATNConfig;
import org.antlr.v4.misc.OrderedHashSet;
import org.antlr.v4.runtime.atn.ATNConfig;
public class NoViableAltException extends RecognitionException {
/** Prediction began at what input index? */

View File

@ -1,6 +1,5 @@
package org.antlr.v4.runtime.atn;
import org.antlr.v4.analysis.LL1Analyzer;
import org.antlr.v4.automata.ATNSerializer;
import org.antlr.v4.misc.*;
import org.antlr.v4.runtime.RuleContext;

View File

@ -0,0 +1,124 @@
package org.antlr.v4.runtime.atn;
import org.antlr.v4.runtime.*;
/** An ATN state, predicted alt, and syntactic/semantic context.
* The syntactic context is a pointer into the rule invocation
* chain used to arrive at the state. The semantic context is
* the unordered set semantic predicates encountered before reaching
* an ATN state.
*/
public class ATNConfig {
/** The ATN state associated with this configuration */
public ATNState state;
/** What alt (or lexer rule) is predicted by this configuration */
public int alt;
/** The stack of invoking states leading to the rule/states associated
* wit this config.
*/
public RuleContext context;
/**
Indicates that we have reached this ATN configuration after
traversing a predicate transition. This is important because we
cannot cache DFA states derived from such configurations
otherwise predicates would not get executed again (DFAs don't
have predicated edges in v4).
*/
public boolean traversedPredicate;
/**
Indicates that we have reached this ATN configuration after
traversing a non-force action transition. We do not execute
predicates after such actions because the predicates could be
functions of the side effects. Force actions must be either side
effect free or automatically undone as the parse continues.
*/
public boolean traversedAction;
public ATNConfig(ATNState state,
int alt,
RuleContext context)
{
this.state = state;
this.alt = alt;
this.context = context;
}
public ATNConfig(ATNConfig c) {
this.state = c.state;
this.alt = c.alt;
this.context = c.context;
this.traversedPredicate = c.traversedPredicate;
this.traversedAction = c.traversedAction;
}
public ATNConfig(ATNConfig c, ATNState state) {
this(c);
this.state = state;
}
public ATNConfig(ATNConfig c, ATNState state, RuleContext context) {
this(c);
this.state = state;
this.context = context;
}
public ATNConfig(ATNConfig c, RuleContext context) {
this(c);
this.context = context;
}
/** An ATN configuration is equal to another if both have
* the same state, they predict the same alternative, and
* syntactic/semantic contexts are the same.
*/
public boolean equals(Object o) {
if ( o==null ) return false;
if ( this==o ) return true;
ATNConfig other = (ATNConfig)o;
return this.state.stateNumber==other.state.stateNumber &&
this.alt==other.alt &&
(this.context==other.context ||
this.context.equals(other.context));
}
public int hashCode() {
if ( state==null ) {
System.out.println("eh?");
}
int h = state.stateNumber + alt;
if ( context!=null ) h += context.hashCode();
return h;
}
public String toString() {
return toString(null, true);
}
public String toString(Recognizer<?,?> recog, boolean showAlt) {
StringBuffer buf = new StringBuffer();
if ( state.ruleIndex>0 ) {
if ( recog!=null ) buf.append(recog.getRuleNames()[state.ruleIndex]+":");
else buf.append(state.ruleIndex+":");
}
buf.append(state);
if ( showAlt ) {
buf.append("|");
buf.append(alt);
}
if ( context!=null ) {
buf.append("|");
buf.append(context);
}
// if (isAccept) {
// buf.append("|=>"+alt);
// }
// if ( context.approximated ) {
// buf.append("|approx");
// }
return buf.toString();
}
}

View File

@ -1,10 +1,9 @@
package org.antlr.v4.runtime.atn;
import org.antlr.v4.analysis.ATNConfig;
import org.antlr.v4.misc.*;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.runtime.dfa.*;
import org.antlr.v4.tool.*;
import org.antlr.v4.tool.Grammar;
import java.util.*;

View File

@ -1,75 +0,0 @@
package org.antlr.v4.runtime.atn;
/** Identical to ANTLR's static grammar analysis ATNContext object */
public class ATNStack {
public static final ATNStack EMPTY = new ATNStack(null, -1);
public ATNStack parent;
/** The ATN state following state that invoked another rule's start state
* is recorded on the rule invocation context stack.
*/
public int returnAddr;
/** Computing the hashCode is very expensive and ATN.addToClosure()
* uses it to track when it's seen a state|ctx before to avoid
* infinite loops. As we add new contexts, record the hash code
* as this + parent.cachedHashCode. Avoids walking
* up the tree for every hashCode(). Note that this caching works
* because a context is a monotonically growing tree of context nodes
* and nothing on the stack is ever modified...ctx just grows
* or shrinks.
*/
protected int cachedHashCode;
public ATNStack(ATNStack parent, int returnAddr) {
this.parent = parent;
this.returnAddr = returnAddr;
if ( returnAddr >= 0 ) {
this.cachedHashCode = returnAddr;
}
if ( parent!=null ) {
this.cachedHashCode += parent.cachedHashCode;
}
}
public int hashCode() { return cachedHashCode; }
/** Two contexts are equals() if both have
* same call stack; walk upwards to the root.
* Recall that the root sentinel node has no parent.
* Note that you may be comparing contextsv in different alt trees.
*/
public boolean equals(Object o) {
ATNStack other = ((ATNStack)o);
if ( this.cachedHashCode != other.cachedHashCode ) {
return false; // can't be same if hash is different
}
if ( this==other ) return true;
// System.out.println("comparing "+this+" with "+other);
ATNStack sp = this;
while ( sp.parent!=null && other.parent!=null ) {
if ( sp.returnAddr != other.returnAddr) return false;
sp = sp.parent;
other = other.parent;
}
if ( !(sp.parent==null && other.parent==null) ) {
return false; // both pointers must be at their roots after walk
}
return true;
}
public String toString() {
StringBuffer buf = new StringBuffer();
ATNStack sp = this;
buf.append("[");
while ( sp.parent!=null ) {
buf.append(sp.returnAddr);
buf.append(" ");
sp = sp.parent;
}
buf.append("$]");
return buf.toString();
}
}

View File

@ -1,6 +1,5 @@
package org.antlr.v4.runtime.atn;
import org.antlr.v4.analysis.ATNConfig;
import org.antlr.v4.misc.OrderedHashSet;
import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.dfa.*;

View File

@ -1,11 +1,8 @@
package org.antlr.v4.runtime.atn;
import org.antlr.runtime.CharStream;
import org.antlr.v4.analysis.ATNConfig;
import org.antlr.v4.misc.*;
import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.dfa.*;
import org.antlr.v4.tool.DOTGenerator;
import org.stringtemplate.v4.misc.MultiMap;
import java.util.*;
@ -37,7 +34,7 @@ public class ParserInterpreter extends ATNInterpreter {
this.parser = parser;
ctxToDFAs = new HashMap<RuleContext, DFA[]>();
decisionToDFA = new DFA[atn.getNumberOfDecisions()+1];
DOTGenerator dot = new DOTGenerator(null);
// DOTGenerator dot = new DOTGenerator(null);
// System.out.println(dot.getDOT(atn.rules.get(0), parser.getRuleNames()));
// System.out.println(dot.getDOT(atn.rules.get(1), parser.getRuleNames()));
}

View File

@ -38,7 +38,6 @@ public class DFA {
public Map<DFAState, DFAState> states = new LinkedHashMap<DFAState, DFAState>();
public DFAState s0;
public int decision;
// public int maxTokenType;
/** From which ATN state did we create this DFA? */
public ATNState atnStartState;
@ -49,47 +48,6 @@ public class DFA {
public boolean conflict;
public DFA(ATNState atnStartState) { this.atnStartState = atnStartState; }
// public DFA(int maxTokenType) { this.maxTokenType = maxTokenType; }
/*
public void addAll(Collection<DFAState> states) {
for (DFAState p : states) {
//addDFAEdge(p, t, q);
}
}
public void addDFAEdge(OrderedHashSet<ATNConfig> p,
int t,
OrderedHashSet<ATNConfig> q)
{
// System.out.println("MOVE "+p+" -> "+q+" upon "+getTokenName(t));
DFAState from = addDFAState(p);
DFAState to = addDFAState(q);
addDFAEdge(from, t, to);
}
public void addDFAEdge(DFAState p, int t, DFAState q) {
if ( p.edges==null ) {
p.edges = new DFAState[maxTokenType+1]; // TODO: make adaptive
}
p.edges[t] = q; // connect
}
protected DFAState addDFAState(OrderedHashSet<ATNConfig> configs) {
DFAState proposed = new DFAState(configs);
DFAState existing = states.get(proposed);
DFAState p;
if ( existing!=null ) p = existing;
else {
proposed.stateNumber = states.size();
proposed.configs = new OrderedHashSet<ATNConfig>();
proposed.configs.addAll(configs);
states.put(proposed, proposed);
p = proposed;
}
return p;
}
*/
public String toString() { return toString(null); }

View File

@ -1,8 +1,8 @@
package org.antlr.v4.runtime.dfa;
import org.antlr.v4.analysis.ATNConfig;
import org.antlr.v4.misc.OrderedHashSet;
import org.antlr.v4.runtime.RuleContext;
import org.antlr.v4.runtime.atn.ATNConfig;
import java.util.*;

View File

@ -1,219 +0,0 @@
package org.antlr.v4.runtime.misc;
import org.antlr.v4.runtime.Token;
/** */
public class LABitSet implements Cloneable {
public final static int BITS = 64; // number of bits / long
public final static int LOG_BITS = 6; // 2^6 == 64
/* We will often need to do a mod operator (i mod nbits). Its
* turns out that, for powers of two, this mod operation is
* same as (i & (nbits-1)). Since mod is slow, we use a
* precomputed mod mask to do the mod instead.
*/
public final static int MOD_MASK = BITS - 1;
public static final LABitSet EOF_SET = LABitSet.of(Token.EOF);
/** The actual data bits */
public long bits[];
public boolean EOF; // is EOF in set (-1)?
/** Construct a bitset of size one word (64 bits) */
public LABitSet() {
this(BITS);
}
/** Construct a bitset given the size
* @param nbits The size of the bitset in bits
*/
public LABitSet(int nbits) {
bits = new long[((nbits - 1) >> LOG_BITS) + 1];
}
/** Construction from a static array of longs */
public LABitSet(long[] bits_) {
if ( bits_==null || bits_.length==0 ) bits = new long[1];
else bits = bits_;
}
/** Construction from a static array of longs */
public LABitSet(long[] bits_, boolean EOF) {
this(bits_);
this.EOF = EOF;
}
public static LABitSet of(int el) {
LABitSet s = new LABitSet(el + 1);
s.add(el);
return s;
}
/** or this element into this set (grow as necessary to accommodate) */
public void add(int el) {
//System.out.println("add("+el+")");
if ( el==Token.EOF ) { EOF = true; return; }
int n = wordNumber(el);
//System.out.println("word number is "+n);
//System.out.println("bits.length "+bits.length);
if (n >= bits.length) {
growToInclude(el);
}
bits[n] |= bitMask(el);
}
public boolean member(int el) {
if ( el == Token.EOF ) return EOF;
int n = wordNumber(el);
if (n >= bits.length) return false;
return (bits[n] & bitMask(el)) != 0;
}
/** return this | a in a new set */
public LABitSet or(LABitSet a) {
if ( a==null ) {
return this;
}
LABitSet s = (LABitSet)this.clone();
s.orInPlace((LABitSet)a);
return s;
}
public void orInPlace(LABitSet a) {
if ( a==null ) {
return;
}
// If this is smaller than a, grow this first
if (a.bits.length > bits.length) {
setSize(a.bits.length);
}
int min = Math.min(bits.length, a.bits.length);
for (int i = min - 1; i >= 0; i--) {
bits[i] |= a.bits[i];
}
EOF = EOF | a.EOF;
}
// remove this element from this set
public void remove(int el) {
if ( el==Token.EOF ) { EOF = false; return; }
int n = wordNumber(el);
if (n >= bits.length) {
throw new IllegalArgumentException(el+" is outside set range of "+bits.length+" words");
}
bits[n] &= ~bitMask(el);
}
public Object clone() {
LABitSet s;
try {
s = (LABitSet)super.clone();
s.bits = new long[bits.length];
System.arraycopy(bits, 0, s.bits, 0, bits.length);
s.EOF = EOF;
return s;
}
catch (CloneNotSupportedException e) {
e.printStackTrace(System.err);
}
return null;
}
/**
* Sets the size of a set.
* @param nwords how many words the new set should be
*/
void setSize(int nwords) {
long newbits[] = new long[nwords];
int n = Math.min(nwords, bits.length);
System.arraycopy(bits, 0, newbits, 0, n);
bits = newbits;
}
/** Get the first element you find and return it. */
public int getSingleElement() {
for (int i = 0; i < (bits.length << LOG_BITS); i++) {
if (member(i)) {
return i;
}
}
return Token.INVALID_TYPE;
}
/** Transform a bit set into a string by formatting each element as an integer
* separator The string to put in between elements
* @return A commma-separated list of values
*/
public String toString() {
StringBuffer buf = new StringBuffer();
String separator = ",";
boolean havePrintedAnElement = false;
buf.append('{');
if ( EOF ) { buf.append("EOF"); havePrintedAnElement=true; }
for (int i = 0; i < (bits.length << LOG_BITS); i++) {
if (member(i)) {
if ( havePrintedAnElement ) {
buf.append(separator);
}
buf.append(i);
havePrintedAnElement = true;
}
}
buf.append('}');
return buf.toString();
}
// /**Create a string representation where instead of integer elements, the
// * ith element of vocabulary is displayed instead. Vocabulary is a Vector
// * of Strings.
// * separator The string to put in between elements
// * @return A commma-separated list of character constants.
// */
// public String toString(String separator, List vocabulary) {
// String str = "";
// for (int i = 0; i < (bits.length << LOG_BITS); i++) {
// if (member(i)) {
// if (str.length() > 0) {
// str += separator;
// }
// if (i >= vocabulary.size()) {
// str += "'" + (char)i + "'";
// }
// else if (vocabulary.get(i) == null) {
// str += "'" + (char)i + "'";
// }
// else {
// str += (String)vocabulary.get(i);
// }
// }
// }
// return str;
// }
/**
* Grows the set to a larger number of bits.
* @param bit element that must fit in set
*/
public void growToInclude(int bit) {
int newSize = Math.max(bits.length << 1, numWordsToHold(bit));
long newbits[] = new long[newSize];
System.arraycopy(bits, 0, newbits, 0, bits.length);
bits = newbits;
}
static long bitMask(int bitNumber) {
int bitPosition = bitNumber & MOD_MASK; // bitNumber mod BITS
return 1L << bitPosition;
}
static int numWordsToHold(int el) {
return (el >> LOG_BITS) + 1;
}
static int wordNumber(int bit) {
return bit >> LOG_BITS; // bit / BITS
}
}

View File

@ -27,8 +27,6 @@
*/
package org.antlr.v4.runtime.misc;
import org.antlr.runtime.misc.FastQueue;
import java.util.NoSuchElementException;
/** A lookahead queue that knows how to mark/release locations

View File

@ -27,10 +27,11 @@
*/
package org.antlr.v4.runtime.tree;
import org.antlr.runtime.BitSet;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.tree.gui.ASTViewer;
import java.util.Set;
/** A tree node that is wrapper for a Token object. After 3.0 release
* while building tree rewrite stuff, it became clear that computing
* parent and child index is very difficult and cumbersome. Better to
@ -194,12 +195,12 @@ public class CommonTree extends BaseTree {
}
// TODO: don't include this node!!
public CommonTree getFirstDescendantWithType(BitSet types) {
if ( types.member(getType()) ) return this;
public CommonTree getFirstDescendantWithType(Set<Integer> types) {
if ( types.contains(getType()) ) return this;
if ( children==null ) return null;
for (Object c : children) {
CommonTree t = (CommonTree)c;
if ( types.member(t.getType()) ) return t;
if ( types.contains(t.getType()) ) return t;
CommonTree d = t.getFirstDescendantWithType(types);
if ( d!=null ) return d;
}

View File

@ -1,28 +1,35 @@
package org.antlr.v4;
import org.antlr.runtime.*;
import org.antlr.tool.DOTGenerator;
import org.antlr.v4.parse.*;
import org.antlr.v4.tool.*;
import java.io.IOException;
import java.lang.reflect.Field;
import java.util.*;
public class Tool {
public String VERSION = "4.0-"+new Date();
public static enum OptionArgType { NONE, STRING, INT }
public static enum OptionArgType { NONE, STRING }
public static class Option {
String fieldName;
String name;
OptionArgType argType;
Object defaultArgValue;
String description;
public Option(String name, String description) {
this(name, OptionArgType.NONE, null, description);
public Option(String fieldName, String name, String description) {
this(fieldName, name, OptionArgType.NONE, null, description);
}
public Option(String name, OptionArgType argType, String description) {
this(name, argType, null, description);
public Option(String fieldName, String name, OptionArgType argType, String description) {
this(fieldName, name, argType, null, description);
}
public Option(String name, OptionArgType argType, Object defaultArgValue, String description) {
public Option(String fieldName, String name, OptionArgType argType, Object defaultArgValue, String description) {
this.fieldName = fieldName;
this.name = name;
this.argType = argType;
this.defaultArgValue = defaultArgValue;
@ -30,29 +37,42 @@ public class Tool {
}
}
// fields set by option manager
public String outputDirectory = ".";
public String libDirectory = ".";
public boolean report = false;
public boolean printGrammar = false;
public boolean debug = false;
public boolean profile = false;
public boolean trace = false;
public boolean generate_ATN_dot = false;
public String msgFormat = "antlr";
public boolean saveLexer = false;
public boolean launch_ST_inspector = false;
public static Option[] optionDefs = {
new Option("o", OptionArgType.STRING, ".", "specify output directory where all output is generated"),
new Option("fo", OptionArgType.STRING, "same as -o but force even files with relative paths to dir"),
new Option("lib", "specify location of .token files"),
new Option("report", "print out a report about the grammar(s) processed"),
new Option("print", "print out the grammar without actions"),
new Option("debug", "generate a parser that emits debugging events"),
new Option("profile", "generate a parser that computes profiling information"),
new Option("atn", "generate rule augmented transition networks"),
new Option("message-format", OptionArgType.STRING, "specify output style for messages"),
new Option("version", "print the version of ANTLR and exit"),
new Option("savelexer", "save temp lexer file created for combined grammars"),
new Option("dbgST", "launch StringTemplate visualizer on generated code"),
new Option("outputDirectory", "-o", OptionArgType.STRING, ".", "specify output directory where all output is generated"),
new Option("libDirectory", "-lib", OptionArgType.STRING, ".", "specify location of .token files"),
new Option("report", "-report", "print out a report about the grammar(s) processed"),
new Option("printGrammar", "-print", "print out the grammar without actions"),
new Option("debug", "-debug", "generate a parser that emits debugging events"),
new Option("profile", "-profile", "generate a parser that computes profiling information"),
new Option("trace", "-trace", "generate a recognizer that traces rule entry/exit"),
new Option("generate_ATN_dot", "-atn", "generate rule augmented transition networks"),
new Option("msgFormat", "-message-format", OptionArgType.STRING, "antlr", "specify output style for messages"),
new Option("saveLexer", "-savelexer", "save temp lexer file created for combined grammars"),
new Option("launch_ST_inspector", "-dbgST", "launch StringTemplate visualizer on generated code"),
};
protected Map<String, Object> options = new HashMap<String, Object>();
public final String[] args;
protected String[] args;
protected List<String> grammarFiles = new ArrayList<String>();
public ErrorManager errMgr = new ErrorManager(this);
List<ANTLRToolListener> listeners =
Collections.synchronizedList(new ArrayList<ANTLRToolListener>());
Collections.synchronizedList(new ArrayList<ANTLRToolListener>());
/** Track separately so if someone adds a listener, it's the only one
* instead of it and the default stderr listener.
@ -61,7 +81,8 @@ public class Tool {
public static void main(String[] args) {
Tool antlr = new Tool(args);
antlr.help();
if ( args.length == 0 ) { antlr.help(); antlr.exit(0); }
antlr.processGrammarsOnCommandLine();
if (antlr.errMgr.getNumErrors() > 0) {
@ -79,18 +100,294 @@ public class Tool {
public Tool(String[] args) {
this.args = args;
parseArgs();
}
protected void parseArgs() {
int i=0;
while ( args!=null && i<args.length ) {
String arg = args[i];
i++;
if ( arg.charAt(0)!='-' ) { // file name
grammarFiles.add(arg);
continue;
}
for (Option o : optionDefs) {
if ( arg.equals(o.name) ) {
String value = null;
if ( o.argType==OptionArgType.STRING ) {
value = args[i];
i++;
}
// use reflection to set field
Class c = this.getClass();
try {
Field f = c.getField(o.fieldName);
if ( value==null ) f.setBoolean(this, true);
else f.set(this, value);
}
catch (Exception e) {
errMgr.toolError(ErrorType.INTERNAL_ERROR, "can't access field "+o.fieldName);
}
}
}
}
}
public void processGrammarsOnCommandLine() {
for (String fileName : grammarFiles) {
GrammarAST t = load(fileName);
if ( t instanceof GrammarASTErrorNode ) return; // came back as error node
if ( ((GrammarRootAST)t).hasErrors ) return;
GrammarRootAST ast = (GrammarRootAST)t;
Grammar g = createGrammar(ast);
g.fileName = grammarFileNames.get(0);
process(g);
}
}
public void process(Grammar g) {
GrammarRootAST lexerAST = null;
if ( g.ast!=null && g.ast.grammarType== ANTLRParser.COMBINED &&
!g.ast.hasErrors )
{
lexerAST = extractImplicitLexer(g); // alters ast
}
processNonCombinedGrammar(g);
if ( g.ast!=null && g.ast.grammarType== ANTLRParser.COMBINED &&
!g.ast.hasErrors )
{
if ( lexerAST!=null ) {
LexerGrammar lexerg = new LexerGrammar(this, lexerAST);
lexerg.fileName = g.fileName;
g.implicitLexer = lexerg;
lexerg.implicitLexerOwner = g;
lexerg.importVocab(g);
processNonCombinedGrammar(lexerg);
g.importVocab(lexerg);
}
}
}
public void processNonCombinedGrammar(Grammar g) {
g.loadImportedGrammars();
if ( g.ast!=null && internalOption_PrintGrammarTree ) System.out.println(g.ast.toStringTree());
//g.ast.inspect();
// MAKE SURE GRAMMAR IS SEMANTICALLY CORRECT (FILL IN GRAMMAR OBJECT)
SemanticPipeline sem = new SemanticPipeline(g);
sem.process();
if ( errMgr.getNumErrors()>0 ) return;
if ( g.getImportedGrammars()!=null ) { // process imported grammars (if any)
for (Grammar imp : g.getImportedGrammars()) {
processNonCombinedGrammar(imp);
}
}
// BUILD ATN FROM AST
ATNFactory factory = new ParserATNFactory(g);
if ( g.isLexer() ) factory = new LexerATNFactory((LexerGrammar)g);
g.atn = factory.createATN();
if ( generate_ATN_dot ) generateATNs(g);
// PERFORM GRAMMAR ANALYSIS ON ATN: BUILD DECISION DFAs
AnalysisPipeline anal = new AnalysisPipeline(g);
anal.process();
//if ( generate_DFA_dot ) generateDFAs(g);
if ( g.tool.getNumErrors()>0 ) return;
// GENERATE CODE
CodeGenPipeline gen = new CodeGenPipeline(g);
gen.process();
}
public Grammar createGrammar(GrammarRootAST ast) {
if ( ast.grammarType==ANTLRParser.LEXER ) return new LexerGrammar(this, ast);
else return new Grammar(this, ast);
}
public GrammarAST load(String fileName) {
ANTLRFileStream in = null;
try {
in = new ANTLRFileStream(fileName);
}
catch (IOException ioe) {
errMgr.toolError(ErrorType.CANNOT_OPEN_FILE, fileName, ioe);
}
return load(in);
}
public GrammarAST loadFromString(String grammar) {
return load(new ANTLRStringStream(grammar));
}
public GrammarAST load(CharStream in) {
try {
ANTLRLexer lexer = new ANTLRLexer(in);
CommonTokenStream tokens = new CommonTokenStream(lexer);
ToolANTLRParser p = new ToolANTLRParser(tokens, this);
p.setTreeAdaptor(new GrammarASTAdaptor(in));
ParserRuleReturnScope r = p.grammarSpec();
GrammarAST root = (GrammarAST) r.getTree();
if ( root instanceof GrammarRootAST ) {
((GrammarRootAST)root).hasErrors = p.getNumberOfSyntaxErrors()>0;
}
return root;
}
catch (RecognitionException re) {
// TODO: do we gen errors now?
errMgr.internalError("can't generate this message at moment; antlr recovers");
}
return null;
}
/** Build lexer grammar from combined grammar that looks like:
*
* (COMBINED_GRAMMAR A
* (tokens { X (= Y 'y'))
* (OPTIONS (= x 'y'))
* (scope Blort { int x; })
* (@ members {foo})
* (@ lexer header {package jj;})
* (RULES (RULE .+)))
*
* Move rules and actions to new tree, don't dup. Split AST apart.
* We'll have this Grammar share token symbols later; don't generate
* tokenVocab or tokens{} section.
*
* Side-effects: it removes children from GRAMMAR & RULES nodes
* in combined AST. Careful: nodes are shared between
* trees after this call.
*/
public GrammarRootAST extractImplicitLexer(Grammar combinedGrammar) {
GrammarRootAST combinedAST = combinedGrammar.ast;
//System.out.println("before="+combinedAST.toStringTree());
GrammarASTAdaptor adaptor = new GrammarASTAdaptor(combinedAST.token.getInputStream());
List<org.antlr.v4.tool.GrammarAST> elements = combinedAST.getChildren();
// MAKE A GRAMMAR ROOT and ID
String lexerName = combinedAST.getChild(0).getText()+"Lexer";
GrammarRootAST lexerAST =
new GrammarRootAST(new CommonToken(ANTLRParser.GRAMMAR,"LEXER_GRAMMAR"));
lexerAST.grammarType = ANTLRParser.LEXER;
lexerAST.token.setInputStream(combinedAST.token.getInputStream());
lexerAST.addChild((org.antlr.v4.tool.GrammarAST)adaptor.create(ANTLRParser.ID, lexerName));
// MOVE OPTIONS
org.antlr.v4.tool.GrammarAST optionsRoot =
(org.antlr.v4.tool.GrammarAST)combinedAST.getFirstChildWithType(ANTLRParser.OPTIONS);
if ( optionsRoot!=null ) {
org.antlr.v4.tool.GrammarAST lexerOptionsRoot = (org.antlr.v4.tool.GrammarAST)adaptor.dupNode(optionsRoot);
lexerAST.addChild(lexerOptionsRoot);
List<org.antlr.v4.tool.GrammarAST> options = optionsRoot.getChildren();
for (org.antlr.v4.tool.GrammarAST o : options) {
String optionName = o.getChild(0).getText();
if ( !Grammar.doNotCopyOptionsToLexer.contains(optionName) ) {
lexerOptionsRoot.addChild(o);
}
}
}
// MOVE lexer:: actions
List<org.antlr.v4.tool.GrammarAST> actionsWeMoved = new ArrayList<org.antlr.v4.tool.GrammarAST>();
for (org.antlr.v4.tool.GrammarAST e : elements) {
if ( e.getType()==ANTLRParser.AT ) {
if ( e.getChild(0).getText().equals("lexer") ) {
lexerAST.addChild(e);
actionsWeMoved.add(e);
}
}
}
elements.removeAll(actionsWeMoved);
org.antlr.v4.tool.GrammarAST combinedRulesRoot =
(org.antlr.v4.tool.GrammarAST)combinedAST.getFirstChildWithType(ANTLRParser.RULES);
if ( combinedRulesRoot==null ) return lexerAST;
// MOVE lexer rules
org.antlr.v4.tool.GrammarAST lexerRulesRoot =
(org.antlr.v4.tool.GrammarAST)adaptor.create(ANTLRParser.RULES, "RULES");
lexerAST.addChild(lexerRulesRoot);
List<org.antlr.v4.tool.GrammarAST> rulesWeMoved = new ArrayList<org.antlr.v4.tool.GrammarAST>();
List<GrammarASTWithOptions> rules = combinedRulesRoot.getChildren();
for (GrammarASTWithOptions r : rules) {
String ruleName = r.getChild(0).getText();
if ( Character.isUpperCase(ruleName.charAt(0)) ) {
lexerRulesRoot.addChild(r);
rulesWeMoved.add(r);
}
}
int nLexicalRules = rulesWeMoved.size();
rules.removeAll(rulesWeMoved);
// Will track 'if' from IF : 'if' ; rules to avoid defining new token for 'if'
Map<String,String> litAliases =
Grammar.getStringLiteralAliasesFromLexerRules(lexerAST);
if ( nLexicalRules==0 && (litAliases==null||litAliases.size()==0) &&
combinedGrammar.stringLiteralToTypeMap.size()==0 )
{
// no rules, tokens{}, or 'literals' in grammar
return null;
}
// add strings from combined grammar (and imported grammars) into to lexer
for (String lit : combinedGrammar.stringLiteralToTypeMap.keySet()) {
if ( litAliases!=null && litAliases.containsKey(lit) ) continue; // already has rule
// create for each literal: (RULE <uniquename> (BLOCK (ALT <lit>))
String rname = combinedGrammar.getStringLiteralLexerRuleName(lit);
// can't use wizard; need special node types
org.antlr.v4.tool.GrammarAST litRule = new RuleAST(ANTLRParser.RULE);
BlockAST blk = new BlockAST(ANTLRParser.BLOCK);
AltAST alt = new AltAST(ANTLRParser.ALT);
TerminalAST slit = new TerminalAST(new org.antlr.runtime.CommonToken(ANTLRParser.STRING_LITERAL, lit));
alt.addChild(slit);
blk.addChild(alt);
CommonToken idToken = new CommonToken(ANTLRParser.ID, rname);
litRule.addChild(new TerminalAST(idToken));
litRule.addChild(blk);
lexerRulesRoot.addChild(litRule);
// (GrammarAST)
// wiz.create("(RULE ID["+rname+"] (BLOCK (ALT STRING_LITERAL["+lit+"])))");
}
System.out.println("after ="+combinedAST.toStringTree());
System.out.println("lexer ="+lexerAST.toStringTree());
return lexerAST;
}
public void generateATNs(Grammar g) {
DOTGenerator dotGenerator = new DOTGenerator(g);
List<Grammar> grammars = new ArrayList<Grammar>();
grammars.add(g);
List<Grammar> imported = g.getAllImportedGrammars();
if ( imported!=null ) grammars.addAll(imported);
for (Grammar ig : grammars) {
for (Rule r : ig.rules.values()) {
try {
String dot = dotGenerator.getDOT(g.atn.ruleToStartState.get(r));
if (dot != null) {
writeDOTFile(g, r, dot);
}
} catch (IOException ioe) {
errMgr.toolError(ErrorType.CANNOT_WRITE_FILE, ioe);
}
}
}
}
public void help() {
info("ANTLR Parser Generator Version " + new Tool().VERSION);
for (Option o : optionDefs) {
String name = o.name + (o.argType!=OptionArgType.NONE? " ___" : "");
String s = String.format(" -%-19s %s", name, o.description);
String s = String.format(" %-19s %s", name, o.description);
info(s);
}
}
@ -124,10 +421,9 @@ public class Tool {
for (ANTLRToolListener l : listeners) l.warning(msg);
}
public void version() {
info("ANTLR Parser Generator Version " + new Tool().VERSION);
}
public void version() {
info("ANTLR Parser Generator Version " + new Tool().VERSION);
}
public void exit(int e) { System.exit(e); }

View File

@ -0,0 +1,189 @@
package org.antlr.v4.automata;
import org.antlr.v4.misc.IntervalSet;
import org.antlr.v4.runtime.atn.*;
import org.antlr.v4.tool.*;
import java.util.List;
public interface ATNFactory {
/** A pair of states pointing to the left/right (start and end) states of a
* state submachine. Used to build ATNs.
*/
public static class Handle {
public ATNState left;
public ATNState right;
public Handle(ATNState left, ATNState right) {
this.left = left;
this.right = right;
}
@Override
public String toString() {
return "("+left+","+right+")";
}
}
ATN createATN();
void setCurrentRuleName(String name);
Handle rule(GrammarAST ruleAST, String name, Handle blk);
ATNState newState();
Handle label(Handle t);
Handle listLabel(Handle t);
Handle tokenRef(TerminalAST node);
/** From set build single edge graph o->o-set->o. To conform to
* what an alt block looks like, must have extra state on left.
*/
Handle set(IntervalSet set, GrammarAST associatedAST);
Handle tree(List<Handle> els);
Handle range(GrammarAST a, GrammarAST b);
Handle not(GrammarAST a);
/** For a non-lexer, just build a simple token reference atom.
* For a lexer, a string is a sequence of char to match. That is,
* "fog" is treated as 'f' 'o' 'g' not as a single transition in
* the DFA. Machine== o-'f'->o-'o'->o-'g'->o and has n+1 states
* for n characters.
*/
Handle stringLiteral(TerminalAST stringLiteralAST);
/** For reference to rule r, build
*
* o-e->(r) o
*
* where (r) is the start of rule r and the trailing o is not linked
* to from rule ref state directly (it's done thru the transition(0)
* RuleClosureTransition.
*
* If the rule r is just a list of tokens, it's block will be just
* a set on an edge o->o->o-set->o->o->o, could inline it rather than doing
* the rule reference, but i'm not doing this yet as I'm not sure
* it would help much in the ATN->DFA construction.
*
* TODO add to codegen: collapse alt blks that are sets into single matchSet
* @param node
*/
Handle ruleRef(GrammarAST node);
/** From an empty alternative build Grip o-e->o */
Handle epsilon(GrammarAST node);
/** Build what amounts to an epsilon transition with a semantic
* predicate action. The pred is a pointer into the AST of
* the SEMPRED token.
*/
Handle sempred(PredAST pred);
Handle gated_sempred(GrammarAST pred);
/** Build what amounts to an epsilon transition with an action.
* The action goes into ATN though it is ignored during analysis.
* It slows things down a bit, but I must ignore predicates after
* having seen an action (5-5-2008).
*/
Handle action(ActionAST action);
Handle alt(List<Handle> els);
/** From A|B|..|Z alternative block build
*
* o->o-A->o->o (last ATNState is blockEndATNState pointed to by all alts)
* | ^
* o->o-B->o--|
* | |
* ... |
* | |
* o->o-Z->o--|
*
* So every alternative gets begin ATNState connected by epsilon
* and every alt right side points at a block end ATNState. There is a
* new ATNState in the ATNState in the Grip for each alt plus one for the
* end ATNState.
*
* Special case: only one alternative: don't make a block with alt
* begin/end.
*
* Special case: if just a list of tokens/chars/sets, then collapse
* to a single edge'd o-set->o graph.
*
* Set alt number (1..n) in the left-Transition ATNState.
*/
Handle block(BlockAST blockAST, GrammarAST ebnfRoot, List<Handle> alternativeGrips);
Handle notBlock(GrammarAST blockAST, List<GrammarAST> terminals);
/** From (A)? build either:
*
* o--A->o
* | ^
* o---->|
*
* or, if A is a block, just add an empty alt to the end of the block
*/
Handle optional(GrammarAST optAST, Handle blk);
/** From (A)+ build
*
* |---| (Transition 2 from A.right points at alt 1)
* v | (follow of loop is Transition 1)
* o->o-A-o->o
*
* Meaning that the last ATNState in A points back to A's left Transition ATNState
* and we add a new begin/end ATNState. A can be single alternative or
* multiple.
*
* During analysis we'll call the follow link (transition 1) alt n+1 for
* an n-alt A block.
*/
Handle plus(GrammarAST plusAST, Handle blk);
/** From (A)* build
*
* |---|
* v |
* o->o-A-o--o (Transition 2 from block end points at alt 1; follow is Transition 1)
* | ^
* o---------| (optional branch is 2nd alt of optional block containing A+)
*
* Meaning that the last (end) ATNState in A points back to A's
* left side ATNState and we add 3 new ATNStates (the
* optional branch is built just like an optional subrule).
* See the Aplus() method for more on the loop back Transition.
* The new node on right edge is set to RIGHT_EDGE_OF_CLOSURE so we
* can detect nested (A*)* loops and insert an extra node. Previously,
* two blocks shared same EOB node.
*
* There are 2 or 3 decision points in a A*. If A is not a block (i.e.,
* it only has one alt), then there are two decisions: the optional bypass
* and then loopback. If A is a block of alts, then there are three
* decisions: bypass, loopback, and A's decision point.
*
* Note that the optional bypass must be outside the loop as (A|B)* is
* not the same thing as (A|B|)+.
*
* This is an accurate ATN representation of the meaning of (A)*, but
* for generating code, I don't need a DFA for the optional branch by
* virtue of how I generate code. The exit-loopback-branch decision
* is sufficient to let me make an appropriate enter, exit, loop
* determination. See codegen.g
*/
Handle star(GrammarAST starAST, Handle blk);
/** Build an atom with all possible values in its label */
Handle wildcard(GrammarAST associatedAST);
/** Build a subrule matching ^(. .*) (any tree or node). Let's use
* (^(. .+) | .) to be safe.
*/
Handle wildcardTree(GrammarAST associatedAST);
}

View File

@ -0,0 +1,85 @@
package org.antlr.v4.automata;
import org.antlr.v4.runtime.atn.*;
import org.antlr.v4.tool.Grammar;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
/** An ATN walker that knows how to dump them to serialized strings. */
public class ATNPrinter {
List<ATNState> work;
Set<ATNState> marked;
Grammar g;
ATNState start;
public ATNPrinter(Grammar g, ATNState start) {
this.g = g;
this.start = start;
}
public String toString() {
if ( start==null ) return null;
marked = new HashSet<ATNState>();
work = new ArrayList<ATNState>();
work.add(start);
StringBuilder buf = new StringBuilder();
ATNState s = null;
while ( work.size()>0 ) {
s = work.remove(0);
if ( marked.contains(s) ) continue;
int n = s.getNumberOfTransitions();
//System.out.println("visit "+getATNStateString(s)+"; edges="+n);
marked.add(s);
for (int i=0; i<n; i++) {
Transition t = s.transition(i);
if ( !(s instanceof RuleStopState) ) { // don't add follow states to work
if ( t instanceof RuleTransition ) work.add(((RuleTransition)t).followState);
else work.add( t.target );
}
buf.append(getStateString(s));
if ( t instanceof EpsilonTransition ) {
buf.append("->"+ getStateString(t.target)+'\n');
}
else if ( t instanceof RuleTransition ) {
buf.append("->"+ getStateString(t.target)+'\n');
}
else if ( t instanceof ActionTransition ) {
ActionTransition a = (ActionTransition)t;
buf.append("-"+a.actionAST.getText()+"->"+ getStateString(t.target)+'\n');
}
else if ( t instanceof AtomTransition ) {
AtomTransition a = (AtomTransition)t;
buf.append("-"+a.toString(g)+"->"+ getStateString(t.target)+'\n');
}
else {
buf.append("-"+t.toString(g)+"->"+ getStateString(t.target)+'\n');
}
}
}
return buf.toString();
}
String getStateString(ATNState s) {
if ( s==null ) {
System.out.println("s==null");
}
int n = s.stateNumber;
String stateStr = "s"+n;
if ( s instanceof StarBlockStartState ) stateStr = "StarBlockStart_"+n;
else if ( s instanceof PlusBlockStartState ) stateStr = "PlusBlockStart_"+n;
else if ( s instanceof StarBlockStartState ) stateStr = "StarBlockStart_"+n;
else if ( s instanceof BlockStartState) stateStr = "BlockStart_"+n;
else if ( s instanceof BlockEndState ) stateStr = "BlockEnd_"+n;
else if ( s instanceof RuleStartState) stateStr = "RuleStart_"+s.rule.name+"_"+n;
else if ( s instanceof RuleStopState ) stateStr = "RuleStop_"+s.rule.name+"_"+n;
else if ( s instanceof PlusLoopbackState) stateStr = "PlusLoopBack_"+n;
else if ( s instanceof StarLoopbackState) stateStr = "StarLoopBack_"+n;
return stateStr;
}
}

View File

@ -0,0 +1,218 @@
package org.antlr.v4.automata;
import org.antlr.v4.misc.*;
import org.antlr.v4.runtime.atn.*;
import org.antlr.v4.tool.Rule;
import java.util.*;
public class ATNSerializer {
public ATN atn;
public List<IntervalSet> sets = new ArrayList<IntervalSet>();
public ATNSerializer(ATN atn) { this.atn = atn; }
/** Serialize state descriptors, edge descriptors, and decision->state map
* into list of ints:
*
* grammar-type, (ANTLRParser.LEXER, ...)
* max token type,
* num states,
* state-0-type ruleIndex, state-1-type ruleIndex, ...
* num rules,
* rule-1-start-state rule-1-args, rule-2-start-state rule-2-args, ...
* (args are token type,actionIndex in lexer else 0,0)
* num modes,
* mode-0-start-state, mode-1-start-state, ... (parser has 0 modes)
* num sets
* set-0-interval-count intervals, set-1-interval-count intervals, ...
* num total edges,
* src, trg, edge-type, edge arg1, optional edge arg2 (present always), ...
* num decisions,
* decision-0-start-state, decision-1-start-state, ...
*
* Convenient to pack into unsigned shorts to make as Java string.
*/
public List<Integer> serialize() {
List<Integer> data = new ArrayList<Integer>();
data.add(atn.g.getType());
data.add(atn.g.getMaxTokenType());
data.add(atn.states.size());
int nedges = 0;
// dump states, count edges and collect sets while doing so
for (ATNState s : atn.states) {
data.add(ATNState.serializationTypes.get(s.getClass()));
if ( s.rule!=null ) data.add(s.rule.index);
else data.add(s.ruleIndex);
nedges += s.getNumberOfTransitions();
for (int i=0; i<s.getNumberOfTransitions(); i++) {
Transition t = s.transition(i);
int edgeType = Transition.serializationTypes.get(t.getClass());
if ( edgeType == Transition.SET || edgeType == Transition.NOT_SET ) {
SetTransition st = (SetTransition)t;
sets.add(st.label);
}
}
}
int nrules = atn.rules.size();
data.add(nrules);
for (int r=1; r<=nrules; r++) {
ATNState ruleStartState = atn.rules.get(r-1);
data.add(ruleStartState.stateNumber);
if ( atn.g.isLexer() ) {
data.add(atn.ruleToTokenType.get(r));
String ruleName = atn.g.rules.getKey(r-1);
Rule rule = atn.g.getRule(ruleName);
data.add(rule.actionIndex);
}
else {
data.add(0);
data.add(0);
}
}
int nmodes = atn.modeToStartState.size();
data.add(nmodes);
if ( nmodes>0 ) {
for (ATNState modeStartState : atn.modeToStartState) {
data.add(modeStartState.stateNumber);
}
}
int nsets = sets.size();
data.add(nsets);
for (IntervalSet set : sets) {
data.add(set.getIntervals().size());
for (Interval I : set.getIntervals()) {
data.add(I.a);
data.add(I.b);
}
}
data.add(nedges);
int setIndex = 0;
for (ATNState s : atn.states) {
for (int i=0; i<s.getNumberOfTransitions(); i++) {
Transition t = s.transition(i);
int src = s.stateNumber;
int trg = t.target.stateNumber;
int edgeType = Transition.serializationTypes.get(t.getClass());
int arg1 = 0;
int arg2 = 0;
switch ( edgeType ) {
case Transition.RULE :
trg = ((RuleTransition)t).followState.stateNumber;
arg1 = ((RuleTransition)t).target.stateNumber;
if ( ((RuleTransition)t).rule!=null ) {
arg2 = ((RuleTransition)t).rule.index;
}
else {
arg2 = ((RuleTransition)t).ruleIndex;
}
break;
case Transition.PREDICATE :
PredicateTransition pt = (PredicateTransition)t;
arg1 = pt.ruleIndex;
arg2 = pt.predIndex;
break;
case Transition.RANGE :
arg1 = ((RangeTransition)t).from;
arg2 = ((RangeTransition)t).to;
break;
case Transition.ATOM :
arg1 = ((AtomTransition)t).label;
break;
case Transition.ACTION :
ActionTransition at = (ActionTransition)t;
arg1 = at.ruleIndex;
arg2 = at.actionIndex;
break;
case Transition.SET :
arg1 = setIndex++;
break;
case Transition.NOT_ATOM :
arg1 = ((NotAtomTransition)t).label;
break;
case Transition.NOT_SET :
arg1 = setIndex++;
break;
case Transition.WILDCARD :
break;
}
data.add(src);
data.add(trg);
data.add(edgeType);
data.add(arg1);
data.add(arg2);
}
}
int ndecisions = atn.decisionToATNState.size();
data.add(ndecisions);
for (ATNState decStartState : atn.decisionToATNState) {
data.add(decStartState.stateNumber);
}
return data;
}
public String decode(char[] data) {
StringBuilder buf = new StringBuilder();
int p = 0;
int grammarType = ATNInterpreter.toInt(data[p++]);
int maxType = ATNInterpreter.toInt(data[p++]);
buf.append("max type "+maxType+"\n");
int nstates = ATNInterpreter.toInt(data[p++]);
for (int i=1; i<=nstates; i++) {
int stype = ATNInterpreter.toInt(data[p++]);
int ruleIndex = ATNInterpreter.toInt(data[p++]);
if ( stype==0 ) continue; // ignore bad type of states
buf.append((i - 1) + ":" +
ATNState.serializationNames[stype] + " "+
ruleIndex + "\n");
}
int nrules = ATNInterpreter.toInt(data[p++]);
for (int i=1; i<=nrules; i++) {
int s = ATNInterpreter.toInt(data[p++]);
int arg1 = ATNInterpreter.toInt(data[p++]);
int arg2 = ATNInterpreter.toInt(data[p++]);
buf.append("rule "+i+":"+s+" "+arg1+","+arg2+'\n');
}
int nmodes = ATNInterpreter.toInt(data[p++]);
for (int i=0; i<nmodes; i++) {
int s = ATNInterpreter.toInt(data[p++]);
buf.append("mode "+i+":"+s+'\n');
}
int nsets = ATNInterpreter.toInt(data[p++]);
for (int i=1; i<=nsets; i++) {
int nintervals = ATNInterpreter.toInt(data[p++]);
buf.append((i-1)+":");
for (int j=1; j<=nintervals; j++) {
if ( j>1 ) buf.append(", ");
buf.append(getTokenName(ATNInterpreter.toInt(data[p]))+".."+getTokenName(ATNInterpreter.toInt(data[p+1])));
p += 2;
}
buf.append("\n");
}
int nedges = ATNInterpreter.toInt(data[p++]);
for (int i=1; i<=nedges; i++) {
int src = ATNInterpreter.toInt(data[p]);
int trg = ATNInterpreter.toInt(data[p+1]);
int ttype = ATNInterpreter.toInt(data[p+2]);
int arg1 = ATNInterpreter.toInt(data[p+3]);
int arg2 = ATNInterpreter.toInt(data[p+4]);
buf.append(src+"->"+trg+
" "+Transition.serializationNames[ttype]+
" "+arg1+","+arg2+
"\n");
p += 5;
}
int ndecisions = ATNInterpreter.toInt(data[p++]);
for (int i=1; i<=ndecisions; i++) {
int s = ATNInterpreter.toInt(data[p++]);
buf.append((i-1)+":"+s+"\n");
}
return buf.toString();
}
public String getTokenName(int t) {
if ( t==-1 ) return "EOF";
if ( atn.g!=null ) return atn.g.getTokenDisplayName(t);
return String.valueOf(t);
}
}

View File

@ -0,0 +1,90 @@
package org.antlr.v4.automata;
import org.antlr.v4.misc.CharSupport;
import org.antlr.v4.runtime.atn.*;
import org.antlr.v4.tool.*;
import java.util.List;
public class LexerATNFactory extends ParserATNFactory {
public LexerATNFactory(LexerGrammar g) { super(g); }
public ATN createATN() {
// BUILD ALL START STATES (ONE PER MODE)
for (String modeName : ((LexerGrammar)g).modes.keySet()) {
// create s0, start state; implied Tokens rule node
TokensStartState startState =
(TokensStartState)newState(TokensStartState.class, null);
atn.modeNameToStartState.put(modeName, startState);
atn.modeToStartState.add(startState);
atn.defineDecisionState(startState);
}
// CREATE ATN FOR EACH RULE
_createATN(g.rules.values());
// LINK MODE START STATE TO EACH TOKEN RULE
for (String modeName : ((LexerGrammar)g).modes.keySet()) {
List<Rule> rules = ((LexerGrammar)g).modes.get(modeName);
TokensStartState startState = atn.modeNameToStartState.get(modeName);
for (Rule r : rules) {
if ( !r.isFragment() ) {
RuleStartState s = atn.ruleToStartState.get(r);
epsilon(startState, s);
}
}
}
return atn;
}
@Override
public Handle action(ActionAST action) {
// Handle h = super.action(action);
// ActionTransition a = (ActionTransition)h.left.transition(0);
// a.actionIndex = g.actions.get(action);
// return h;
// no actions in lexer ATN; just one on end and we exec via action number
ATNState x = newState(action);
return new Handle(x, x); // return just one blank state
}
@Override
public Handle range(GrammarAST a, GrammarAST b) {
ATNState left = newState(a);
ATNState right = newState(b);
int t1 = CharSupport.getCharValueFromGrammarCharLiteral(a.getText());
int t2 = CharSupport.getCharValueFromGrammarCharLiteral(b.getText());
left.transition = new RangeTransition(t1, t2, right);
a.atnState = left;
b.atnState = left;
return new Handle(left, right);
}
/** For a lexer, a string is a sequence of char to match. That is,
* "fog" is treated as 'f' 'o' 'g' not as a single transition in
* the DFA. Machine== o-'f'->o-'o'->o-'g'->o and has n+1 states
* for n characters.
*/
@Override
public Handle stringLiteral(TerminalAST stringLiteralAST) {
String chars = stringLiteralAST.getText();
chars = CharSupport.getStringFromGrammarStringLiteral(chars);
int n = chars.length();
ATNState left = newState(stringLiteralAST);
ATNState prev = left;
ATNState right = null;
for (int i=0; i<n; i++) {
right = newState(stringLiteralAST);
prev.transition = new AtomTransition(chars.charAt(i), right);
prev = right;
}
stringLiteralAST.atnState = left;
return new Handle(left, right);
}
@Override
public Handle tokenRef(TerminalAST node) {
return _ruleRef(node);
}
}

View File

@ -0,0 +1,506 @@
package org.antlr.v4.automata;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.tree.*;
import org.antlr.v4.misc.*;
import org.antlr.v4.parse.*;
import org.antlr.v4.runtime.atn.*;
import org.antlr.v4.tool.*;
import java.lang.reflect.Constructor;
import java.util.*;
/** ATN construction routines triggered by ATNBuilder.g.
*
* No side-effects. It builds an ATN object and returns it.
*/
public class ParserATNFactory implements ATNFactory {
public Grammar g;
public Rule currentRule;
ATN atn;
public ParserATNFactory(Grammar g) { this.g = g; atn = new ATN(g); }
public ATN createATN() {
_createATN(g.rules.values());
atn.maxTokenType = g.getMaxTokenType();
addEOFTransitionToStartRules();
return atn;
}
public void _createATN(Collection<Rule> rules) {
createRuleStartAndStopATNStates();
GrammarASTAdaptor adaptor = new GrammarASTAdaptor();
for (Rule r : rules) {
// find rule's block
GrammarAST blk = (GrammarAST)r.ast.getFirstChildWithType(ANTLRParser.BLOCK);
CommonTreeNodeStream nodes = new CommonTreeNodeStream(adaptor,blk);
ATNBuilder b = new ATNBuilder(nodes,this);
try {
setCurrentRuleName(r.name);
Handle h = b.block(null);
rule(r.ast, r.name, h);
}
catch (RecognitionException re) {
ErrorManager.fatalInternalError("bad grammar AST structure", re);
}
}
}
public void setCurrentRuleName(String name) {
this.currentRule = g.getRule(name);
}
/* start->ruleblock->end */
public Handle rule(GrammarAST ruleAST, String name, Handle blk) {
Rule r = g.getRule(name);
RuleStartState start = atn.ruleToStartState.get(r);
epsilon(start, blk.left);
RuleStopState stop = atn.ruleToStopState.get(r);
epsilon(blk.right, stop);
Handle h = new Handle(start, stop);
// FASerializer ser = new FASerializer(g, h.left);
// System.out.println(ruleAST.toStringTree()+":\n"+ser);
ruleAST.atnState = start;
return h;
}
/** From label A build Graph o-A->o */
public Handle tokenRef(TerminalAST node) {
ATNState left = newState(node);
ATNState right = newState(node);
int ttype = g.getTokenType(node.getText());
left.transition = new AtomTransition(ttype, right);
right.incidentTransition = left.transition;
node.atnState = left;
return new Handle(left, right);
}
/** From set build single edge graph o->o-set->o. To conform to
* what an alt block looks like, must have extra state on left.
*/
public Handle set(IntervalSet set, GrammarAST associatedAST) {
ATNState left = newState(associatedAST);
ATNState right = newState(associatedAST);
left.transition = new SetTransition(associatedAST, set, right);
right.incidentTransition = left.transition;
associatedAST.atnState = left;
return new Handle(left, right);
}
public Handle tree(List<Handle> els) {
return null;
}
/** Not valid for non-lexers */
public Handle range(GrammarAST a, GrammarAST b) { throw new UnsupportedOperationException(); }
/** ~atom only */
public Handle not(GrammarAST node) {
ATNState left = newState(node);
ATNState right = newState(node);
int ttype = getTokenType((GrammarAST) node.getChild(0));
left.transition = new NotAtomTransition(ttype, right);
right.incidentTransition = left.transition;
node.atnState = left;
return new Handle(left, right);
}
protected int getTokenType(GrammarAST atom) {
int ttype;
if ( g.isLexer() ) {
ttype = CharSupport.getCharValueFromGrammarCharLiteral(atom.getText());
}
else {
ttype = g.getTokenType(atom.getText());
}
return ttype;
}
/** For a non-lexer, just build a simple token reference atom. */
public Handle stringLiteral(TerminalAST stringLiteralAST) {
return tokenRef(stringLiteralAST);
}
/** For reference to rule r, build
*
* o->(r) o
*
* where (r) is the start of rule r and the trailing o is not linked
* to from rule ref state directly (uses followState).
*/
public Handle ruleRef(GrammarAST node) {
Handle h = _ruleRef(node);
Rule r = g.getRule(node.getText());
addFollowLink(r, h.right);
return h;
}
public Handle _ruleRef(GrammarAST node) {
Rule r = g.getRule(node.getText());
RuleStartState start = atn.ruleToStartState.get(r);
ATNState left = newState(node);
ATNState right = newState(node);
RuleTransition call = new RuleTransition(r, start, right);
left.addTransition(call);
node.atnState = left;
return new Handle(left, right);
}
public void addFollowLink(Rule r, ATNState right) {
// add follow edge from end of invoked rule
RuleStopState stop = atn.ruleToStopState.get(r);
epsilon(stop, right);
}
/** From an empty alternative build o-e->o */
public Handle epsilon(GrammarAST node) {
ATNState left = newState(node);
ATNState right = newState(node);
epsilon(left, right);
node.atnState = left;
return new Handle(left, right);
}
/** Build what amounts to an epsilon transition with a semantic
* predicate action. The pred is a pointer into the AST of
* the SEMPRED token.
*/
public Handle sempred(PredAST pred) {
//System.out.println("sempred: "+ pred);
ATNState left = newState(pred);
ATNState right = newState(pred);
PredicateTransition p = new PredicateTransition(pred, right);
p.ruleIndex = currentRule.index;
p.predIndex = g.sempreds.get(pred);
left.transition = p;
pred.atnState = left;
return new Handle(left, right);
}
public Handle gated_sempred(GrammarAST pred) {
ATNState left = newState(pred);
ATNState right = newState(pred);
left.transition = new PredicateTransition(pred, right);
pred.atnState = left;
return new Handle(left, right);
}
/** Build what amounts to an epsilon transition with an action.
* The action goes into ATN though it is ignored during analysis.
* It slows things down a bit, but I must ignore predicates after
* having seen an action (5-5-2008).
*/
public Handle action(ActionAST action) {
//System.out.println("action: "+action);
ATNState left = newState(action);
ATNState right = newState(action);
ActionTransition a = new ActionTransition(action, right);
a.ruleIndex = currentRule.index;
if ( action.getType()==ANTLRParser.FORCED_ACTION ) {
a.actionIndex = g.actions.get(action);
}
left.transition = a;
action.atnState = left;
return new Handle(left, right);
}
/** From A|B|..|Z alternative block build
*
* o->o-A->o->o (last ATNState is BlockEndState pointed to by all alts)
* | ^
* |->o-B->o--|
* | |
* ... |
* | |
* |->o-Z->o--|
*
* So start node points at every alternative with epsilon transition
* and every alt right side points at a block end ATNState.
*
* Special case: only one alternative: don't make a block with alt
* begin/end.
*
* Special case: if just a list of tokens/chars/sets, then collapse
* to a single edge'd o-set->o graph.
*
* TODO: Set alt number (1..n) in the states?
*/
public Handle block(BlockAST blkAST, GrammarAST ebnfRoot, List<Handle> alts) {
if ( ebnfRoot==null ) {
if ( alts.size()==1 ) {
Handle h = alts.get(0);
blkAST.atnState = h.left;
return h;
}
BlockStartState start = (BlockStartState)newState(BlockStartState.class, blkAST);
return makeBlock(start, blkAST, alts);
}
switch ( ebnfRoot.getType() ) {
case ANTLRParser.OPTIONAL :
BlockStartState start = (BlockStartState)newState(BlockStartState.class, blkAST);
Handle h = makeBlock(start, blkAST, alts);
return optional(ebnfRoot, h);
case ANTLRParser.CLOSURE :
BlockStartState star = (StarBlockStartState)newState(StarBlockStartState.class, ebnfRoot);
h = makeBlock(star, blkAST, alts);
return star(ebnfRoot, h);
case ANTLRParser.POSITIVE_CLOSURE :
PlusBlockStartState plus = (PlusBlockStartState)newState(PlusBlockStartState.class, ebnfRoot);
h = makeBlock(plus, blkAST, alts);
return plus(ebnfRoot, h);
}
return null;
}
protected Handle makeBlock(BlockStartState start, GrammarAST blkAST, List<Handle> alts) {
BlockEndState end = (BlockEndState)newState(BlockEndState.class, blkAST);
start.endState = end;
for (Handle alt : alts) {
epsilon(start, alt.left);
epsilon(alt.right, end);
}
atn.defineDecisionState(start);
Handle h = new Handle(start, end);
// FASerializer ser = new FASerializer(g, h.left);
// System.out.println(blkAST.toStringTree()+":\n"+ser);
blkAST.atnState = start;
return h;
}
public Handle notBlock(GrammarAST notAST, List<GrammarAST> terminals) {
// assume list of atoms
IntervalSet notSet = new IntervalSet();
for (GrammarAST elemAST : terminals) {
if ( elemAST.getType()==ANTLRParser.RANGE ) {
GrammarAST from = (GrammarAST)elemAST.getChild(0);
GrammarAST to = (GrammarAST)elemAST.getChild(1);
notSet.add(getTokenType(from), getTokenType(to));
}
else {
notSet.add(getTokenType(elemAST));
}
}
ATNState left = newState(notAST);
ATNState right = newState(notAST);
left.transition = new NotSetTransition(notAST, notSet, right);
right.incidentTransition = left.transition;
notAST.atnState = left;
return new Handle(left, right);
}
public Handle alt(List<Handle> els) {
Handle prev = null;
for (Handle el : els) { // hook up elements
if ( prev!=null ) epsilon(prev.right, el.left);
prev = el;
}
Handle first = els.get(0);
Handle last = els.get(els.size()-1);
if ( first==null || last==null ) {
g.tool.errMgr.toolError(ErrorType.INTERNAL_ERROR, "alt Handle has first|last == null");
}
return new Handle(first.left, last.right);
}
/** From (A)? build either:
*
* o--A->o
* | ^
* o---->|
*
* or, if A is a block, just add an empty alt to the end of the block
*/
public Handle optional(GrammarAST optAST, Handle blk) {
// TODO: no such thing as nongreedy ()? so give error
BlockStartState blkStart = (BlockStartState)blk.left;
epsilon(blkStart, blk.right);
optAST.atnState = blk.left;
return blk;
}
/** From (blk)+ build
*
* |---------|
* v |
* o->o-A-o->o->o->o loop back points at start of all alts
* | ^
* |->o-B-o--|
*
* Meaning that the last ATNState in A blk points to loop back node,
* which points back to block start. We add start/end nodes to
* outside.
*/
public Handle plus(GrammarAST plusAST, Handle blk) {
PlusBlockStartState start = (PlusBlockStartState)blk.left;
atn.defineDecisionState(start); // we don't use in code gen though
plusAST.atnState = start;
PlusLoopbackState loop = (PlusLoopbackState)newState(PlusLoopbackState.class, plusAST);
ATNState end = (ATNState)newState(ATNState.class, plusAST);
start.loopBackState = loop;
epsilon(blk.right, loop);
BlockAST blkAST = (BlockAST)plusAST.getChild(0);
// if not greedy, priority to exit branch; make it first
if ( !isGreedy(blkAST) ) epsilon(loop, end);
// connect loop back to all alt left edges
for (Transition trans : start.transitions) {
epsilon(loop, trans.target);
}
// if greedy, last alt of decisions is exit branch
if ( isGreedy(blkAST) ) epsilon(loop, end);
atn.defineDecisionState(loop);
return new Handle(start, end);
}
/** From (blk)* build
*
* |----------|
* v |
* o-[blk]-o->o o
* | ^
* o-------------| (optional branch is nth alt of StarBlockStartState)
*
* There 1 decision point in a A*.
*
* Note that the optional bypass must jump outside the loop as (A|B)* is
* not the same thing as (A|B|)+.
*/
public Handle star(GrammarAST starAST, Handle elem) {
BlockAST blkAST = (BlockAST)starAST.getChild(0);
StarBlockStartState blkStart = (StarBlockStartState)elem.left;
BlockEndState blkEnd = (BlockEndState)elem.right;
StarLoopbackState loop = (StarLoopbackState)newState(StarLoopbackState.class, starAST);
ATNState end = (ATNState)newState(ATNState.class, starAST);
// If greedy, exit alt is last, else exit is first
if ( isGreedy(blkAST) ) {
epsilon(blkStart, end); // bypass edge
}
else {
blkStart.addTransitionFirst(new EpsilonTransition(end));
}
epsilon(loop, blkStart);
epsilon(blkEnd, loop);
starAST.atnState = blkStart;
return new Handle(blkStart, end);
}
/** Build an atom with all possible values in its label */
public Handle wildcard(GrammarAST node) {
ATNState left = newState(node);
ATNState right = newState(node);
int ttype = g.getTokenType(node.getText());
left.transition = new WildcardTransition(right);
right.incidentTransition = left.transition;
node.atnState = left;
return new Handle(left, right);
}
/** Build a subrule matching ^(. .*) (any tree or node). Let's use
* (^(. .+) | .) to be safe.
*/
public Handle wildcardTree(GrammarAST associatedAST) { throw new UnsupportedOperationException(); }
void epsilon(ATNState a, ATNState b) {
if ( a!=null ) a.addTransition(new EpsilonTransition(b));
}
/** Define all the rule begin/end ATNStates to solve forward reference
* issues.
*/
void createRuleStartAndStopATNStates() {
for (Rule r : g.rules.values()) {
RuleStartState start = (RuleStartState)newState(RuleStartState.class, r.ast);
RuleStopState stop = (RuleStopState)newState(RuleStopState.class, r.ast);
start.stopState = stop;
start.setRule(r);
stop.setRule(r);
atn.ruleToStartState.put(r, start);
atn.rules.add(start);
atn.ruleToStopState.put(r, stop);
}
}
/** add an EOF transition to any rule end ATNState that points to nothing
* (i.e., for all those rules not invoked by another rule). These
* are start symbols then.
*
* Return the number of grammar entry points; i.e., how many rules are
* not invoked by another rule (they can only be invoked from outside).
* These are the start rules.
*/
public int addEOFTransitionToStartRules() {
int n = 0;
ATNState eofTarget = newState(null); // one unique EOF target for all rules
for (Rule r : g.rules.values()) {
ATNState stop = atn.ruleToStopState.get(r);
if ( stop.getNumberOfTransitions()>0 ) continue;
n++;
Transition t = new AtomTransition(Token.EOF, eofTarget);
stop.addTransition(t);
}
return n;
}
public Handle label(Handle t) {
return t;
}
public Handle listLabel(Handle t) {
return t;
}
public ATNState newState(Class nodeType, GrammarAST node) {
try {
Constructor ctor = nodeType.getConstructor();
ATNState s = (ATNState)ctor.newInstance();
s.ast = node;
s.setRule(currentRule);
atn.addState(s);
return s;
}
catch (Exception e) {
ErrorManager.internalError("can't create ATN node: "+nodeType.getName(), e);
}
return null;
}
public ATNState newState(GrammarAST node) {
ATNState n = new ATNState();
n.setRule(currentRule);
n.ast = node;
atn.addState(n);
return n;
}
public ATNState newState() { return newState(null); }
public boolean isGreedy(BlockAST blkAST) {
boolean greedy = true;
String greedyOption = blkAST.getOption("greedy");
if ( blockHasWildcardAlt(blkAST) || greedyOption!=null&&greedyOption.equals("false") ) {
greedy = false;
}
return greedy;
}
// (BLOCK (ALT .)) or (BLOCK (ALT 'a') (ALT .))
public static boolean blockHasWildcardAlt(GrammarAST block) {
for (Object alt : block.getChildren()) {
if ( !(alt instanceof AltAST) ) continue;
AltAST altAST = (AltAST)alt;
if ( altAST.getChildCount()==1 ) {
Tree e = altAST.getChild(0);
if ( e.getType()==ANTLRParser.WILDCARD ) {
return true;
}
}
}
return false;
}
}

View File

@ -0,0 +1,129 @@
package org.antlr.v4.misc;
import org.antlr.v4.tool.Grammar;
/** */
public class CharSupport {
/** When converting ANTLR char and string literals, here is the
* value set of escape chars.
*/
public static int ANTLRLiteralEscapedCharValue[] = new int[255];
/** Given a char, we need to be able to show as an ANTLR literal.
*/
public static String ANTLRLiteralCharValueEscape[] = new String[255];
static {
ANTLRLiteralEscapedCharValue['n'] = '\n';
ANTLRLiteralEscapedCharValue['r'] = '\r';
ANTLRLiteralEscapedCharValue['t'] = '\t';
ANTLRLiteralEscapedCharValue['b'] = '\b';
ANTLRLiteralEscapedCharValue['f'] = '\f';
ANTLRLiteralEscapedCharValue['\\'] = '\\';
ANTLRLiteralEscapedCharValue['\''] = '\'';
ANTLRLiteralEscapedCharValue['"'] = '"';
ANTLRLiteralCharValueEscape['\n'] = "\\n";
ANTLRLiteralCharValueEscape['\r'] = "\\r";
ANTLRLiteralCharValueEscape['\t'] = "\\t";
ANTLRLiteralCharValueEscape['\b'] = "\\b";
ANTLRLiteralCharValueEscape['\f'] = "\\f";
ANTLRLiteralCharValueEscape['\\'] = "\\\\";
ANTLRLiteralCharValueEscape['\''] = "\\'";
}
/** Return a string representing the escaped char for code c. E.g., If c
* has value 0x100, you will get "\u0100". ASCII gets the usual
* char (non-hex) representation. Control characters are spit out
* as unicode. While this is specially set up for returning Java strings,
* it can be used by any language target that has the same syntax. :)
*/
public static String getANTLRCharLiteralForChar(int c) {
if ( c< Grammar.MIN_CHAR_VALUE ) {
return "'<INVALID>'";
}
if ( c<ANTLRLiteralCharValueEscape.length && ANTLRLiteralCharValueEscape[c]!=null ) {
return '\''+ANTLRLiteralCharValueEscape[c]+'\'';
}
if ( Character.UnicodeBlock.of((char)c)==Character.UnicodeBlock.BASIC_LATIN &&
!Character.isISOControl((char)c) ) {
if ( c=='\\' ) {
return "'\\\\'";
}
if ( c=='\'') {
return "'\\''";
}
return '\''+Character.toString((char)c)+'\'';
}
// turn on the bit above max "\uFFFF" value so that we pad with zeros
// then only take last 4 digits
String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5);
String unicodeStr = "'\\u"+hex+"'";
return unicodeStr;
}
/** Given a literal like (the 3 char sequence with single quotes) 'a',
* return the int value of 'a'. Convert escape sequences here also.
*/
public static int getCharValueFromGrammarCharLiteral(String literal) {
switch ( literal.length() ) {
case 3 :
// 'x'
return literal.charAt(1); // no escape char
case 4 :
// '\x' (antlr lexer will catch invalid char)
if ( Character.isDigit(literal.charAt(2)) ) {
// ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
// "invalid char literal: "+literal);
return -1;
}
int escChar = literal.charAt(2);
int charVal = ANTLRLiteralEscapedCharValue[escChar];
if ( charVal==0 ) {
// Unnecessary escapes like '\{' should just yield {
return escChar;
}
return charVal;
case 8 :
// '\u1234'
String unicodeChars = literal.substring(3,literal.length()-1);
return Integer.parseInt(unicodeChars, 16);
default :
// ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
// "invalid char literal: "+literal);
return -1;
}
}
public static String getStringFromGrammarStringLiteral(String literal) {
StringBuilder buf = new StringBuilder();
int n = literal.length();
int i = 1; // skip first quote
while ( i < (n-1) ) { // scan all but last quote
switch ( literal.charAt(i) ) {
case '\\' :
i++;
if ( literal.charAt(i)=='u' ) { // '\u1234'
i++;
String unicodeChars = literal.substring(i,i+4);
int h = Integer.parseInt(unicodeChars, 16);
buf.append((char)h);
i += 4;
}
else {
char escChar = literal.charAt(i);
int charVal = ANTLRLiteralEscapedCharValue[escChar];
if ( charVal==0 ) buf.append(escChar); // Unnecessary escapes like '\{' should just yield {
else buf.append((char)charVal);
i++;
}
break;
default :
buf.append(literal.charAt(i));
i++;
break;
}
}
return buf.toString();
}
}

View File

@ -0,0 +1,55 @@
package org.antlr.v4.misc;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
/** Sometimes we need to map a key to a value but key is two pieces of data.
* This nested hash table saves creating a single key each time we access
* map; avoids mem creation.
*/
public class DoubleKeyMap<Key1, Key2, Value> {
Map<Key1, Map<Key2, Value>> data = new LinkedHashMap<Key1, Map<Key2, Value>>();
public Value put(Key1 k1, Key2 k2, Value v) {
Map<Key2, Value> data2 = data.get(k1);
Value prev = null;
if ( data2==null ) {
data2 = new LinkedHashMap<Key2, Value>();
data.put(k1, data2);
}
else {
prev = data2.get(k2);
}
data2.put(k2, v);
return prev;
}
public Value get(Key1 k1, Key2 k2) {
Map<Key2, Value> data2 = data.get(k1);
if ( data2==null ) return null;
return data2.get(k2);
}
public Map<Key2, Value> get(Key1 k1) { return data.get(k1); }
/** Get all values associated with primary key */
public Collection<Value> values(Key1 k1) {
Map<Key2, Value> data2 = data.get(k1);
if ( data2==null ) return null;
return data2.values();
}
/** get all primary keys */
public Set<Key1> keySet() {
return data.keySet();
}
/** get all secondary keys associated with a primary key */
public Set<Key2> keySet(Key1 k1) {
Map<Key2, Value> data2 = data.get(k1);
if ( data2==null ) return null;
return data2.keySet();
}
}

View File

@ -0,0 +1,142 @@
/*
[The "BSD license"]
Copyright (c) 2005-2009 Terence Parr
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.misc;
/** An immutable inclusive interval a..b */
public class Interval {
public static final int INTERVAL_POOL_MAX_VALUE = 1000;
static Interval[] cache = new Interval[INTERVAL_POOL_MAX_VALUE+1];
public int a;
public int b;
public static int creates = 0;
public static int misses = 0;
public static int hits = 0;
public static int outOfRange = 0;
public Interval(int a, int b) { this.a=a; this.b=b; }
/** Interval objects are used readonly so share all with the
* same single value a==b up to some max size. Use an array as a perfect hash.
* Return shared object for 0..INTERVAL_POOL_MAX_VALUE or a new
* Interval object with a..a in it. On Java.g, 218623 IntervalSets
* have a..a (set with 1 element).
*/
public static Interval create(int a, int b) {
//return new Interval(a,b);
// cache just a..a
if ( a!=b || a<0 || a>INTERVAL_POOL_MAX_VALUE ) {
return new Interval(a,b);
}
if ( cache[a]==null ) {
cache[a] = new Interval(a,a);
}
return cache[a];
}
public boolean equals(Object o) {
if ( o==null ) {
return false;
}
Interval other = (Interval)o;
return this.a==other.a && this.b==other.b;
}
/** Does this start completely before other? Disjoint */
public boolean startsBeforeDisjoint(Interval other) {
return this.a<other.a && this.b<other.a;
}
/** Does this start at or before other? Nondisjoint */
public boolean startsBeforeNonDisjoint(Interval other) {
return this.a<=other.a && this.b>=other.a;
}
/** Does this.a start after other.b? May or may not be disjoint */
public boolean startsAfter(Interval other) { return this.a>other.a; }
/** Does this start completely after other? Disjoint */
public boolean startsAfterDisjoint(Interval other) {
return this.a>other.b;
}
/** Does this start after other? NonDisjoint */
public boolean startsAfterNonDisjoint(Interval other) {
return this.a>other.a && this.a<=other.b; // this.b>=other.b implied
}
/** Are both ranges disjoint? I.e., no overlap? */
public boolean disjoint(Interval other) {
return startsBeforeDisjoint(other) || startsAfterDisjoint(other);
}
/** Are two intervals adjacent such as 0..41 and 42..42? */
public boolean adjacent(Interval other) {
return this.a == other.b+1 || this.b == other.a-1;
}
public boolean properlyContains(Interval other) {
return other.a >= this.a && other.b <= this.b;
}
/** Return the interval computed from combining this and other */
public Interval union(Interval other) {
return Interval.create(Math.min(a,other.a), Math.max(b,other.b));
}
/** Return the interval in common between this and o */
public Interval intersection(Interval other) {
return Interval.create(Math.max(a,other.a), Math.min(b,other.b));
}
/** Return the interval with elements from this not in other;
* other must not be totally enclosed (properly contained)
* within this, which would result in two disjoint intervals
* instead of the single one returned by this method.
*/
public Interval differenceNotProperlyContained(Interval other) {
Interval diff = null;
// other.a to left of this.a (or same)
if ( other.startsBeforeNonDisjoint(this) ) {
diff = Interval.create(Math.max(this.a,other.b+1),
this.b);
}
// other.a to right of this.a
else if ( other.startsAfterNonDisjoint(this) ) {
diff = Interval.create(this.a, other.a-1);
}
return diff;
}
public String toString() {
return a+".."+b;
}
}

View File

@ -0,0 +1,536 @@
/*
[The "BSD license"]
Copyright (c) 2005-2009 Terence Parr
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.misc;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.tool.Grammar;
import java.util.*;
/** A set of integers that relies on ranges being common to do
* "run-length-encoded" like compression (if you view an IntSet like
* a BitSet with runs of 0s and 1s). Only ranges are recorded so that
* a few ints up near value 1000 don't cause massive bitsets, just two
* integer intervals.
*
* element values may be negative. Useful for sets of EPSILON and EOF.
*
* 0..9 char range is index pair ['\u0030','\u0039'].
* Multiple ranges are encoded with multiple index pairs. Isolated
* elements are encoded with an index pair where both intervals are the same.
*
* The ranges are ordered and disjoint so that 2..6 appears before 101..103.
*/
public class IntervalSet implements IntSet {
public static final IntervalSet COMPLETE_SET = IntervalSet.of(0, Grammar.MAX_CHAR_VALUE);
public static final IntervalSet EMPTY_SET = new IntervalSet();
/** The list of sorted, disjoint intervals. */
protected List<Interval> intervals;
/** Create a set with no elements */
public IntervalSet() {
intervals = new ArrayList<Interval>(2); // most sets are 1 or 2 elements
}
public IntervalSet(List<Interval> intervals) {
this.intervals = intervals;
}
public IntervalSet(IntervalSet set) {
this();
addAll(set);
}
/** Create a set with a single element, el. */
public static IntervalSet of(int a) {
IntervalSet s = new IntervalSet();
s.add(a);
return s;
}
/** Create a set with all ints within range [a..b] (inclusive) */
public static IntervalSet of(int a, int b) {
IntervalSet s = new IntervalSet();
s.add(a,b);
return s;
}
public void clear() {
intervals.clear();
}
/** Add a single element to the set. An isolated element is stored
* as a range el..el.
*/
public void add(int el) {
add(el,el);
}
/** Add interval; i.e., add all integers from a to b to set.
* If b<a, do nothing.
* Keep list in sorted order (by left range value).
* If overlap, combine ranges. For example,
* If this is {1..5, 10..20}, adding 6..7 yields
* {1..5, 6..7, 10..20}. Adding 4..8 yields {1..8, 10..20}.
*/
public void add(int a, int b) {
add(Interval.create(a,b));
}
// copy on write so we can cache a..a intervals and sets of that
protected void add(Interval addition) {
//System.out.println("add "+addition+" to "+intervals.toString());
if ( addition.b<addition.a ) {
return;
}
// find position in list
// Use iterators as we modify list in place
for (ListIterator iter = intervals.listIterator(); iter.hasNext();) {
Interval r = (Interval) iter.next();
if ( addition.equals(r) ) {
return;
}
if ( addition.adjacent(r) || !addition.disjoint(r) ) {
// next to each other, make a single larger interval
Interval bigger = addition.union(r);
iter.set(bigger);
// make sure we didn't just create an interval that
// should be merged with next interval in list
if ( iter.hasNext() ) {
Interval next = (Interval) iter.next();
if ( bigger.adjacent(next)||!bigger.disjoint(next) ) {
// if we bump up against or overlap next, merge
iter.remove(); // remove this one
iter.previous(); // move backwards to what we just set
iter.set(bigger.union(next)); // set to 3 merged ones
}
}
return;
}
if ( addition.startsBeforeDisjoint(r) ) {
// insert before r
iter.previous();
iter.add(addition);
return;
}
// if disjoint and after r, a future iteration will handle it
}
// ok, must be after last interval (and disjoint from last interval)
// just add it
intervals.add(addition);
}
/** combine all sets in the array returned the or'd value */
public static IntervalSet or(IntervalSet[] sets) {
IntervalSet r = new IntervalSet();
for (IntervalSet s : sets) r.addAll(s);
return r;
}
public IntSet addAll(IntSet set) {
if ( set==null ) {
return this;
}
if ( !(set instanceof IntervalSet) ) {
throw new IllegalArgumentException("can't add non IntSet ("+
set.getClass().getName()+
") to IntervalSet");
}
IntervalSet other = (IntervalSet)set;
// walk set and add each interval
int n = other.intervals.size();
for (int i = 0; i < n; i++) {
Interval I = (Interval) other.intervals.get(i);
this.add(I.a,I.b);
}
return this;
}
public IntSet complement(int minElement, int maxElement) {
return this.complement(IntervalSet.of(minElement,maxElement));
}
/** Given the set of possible values (rather than, say UNICODE or MAXINT),
* return a new set containing all elements in vocabulary, but not in
* this. The computation is (vocabulary - this).
*
* 'this' is assumed to be either a subset or equal to vocabulary.
*/
public IntSet complement(IntSet vocabulary) {
if ( vocabulary==null ) {
return null; // nothing in common with null set
}
if ( !(vocabulary instanceof IntervalSet ) ) {
throw new IllegalArgumentException("can't complement with non IntervalSet ("+
vocabulary.getClass().getName()+")");
}
IntervalSet vocabularyIS = ((IntervalSet)vocabulary);
int maxElement = vocabularyIS.getMaxElement();
IntervalSet compl = new IntervalSet();
int n = intervals.size();
if ( n ==0 ) {
return compl;
}
Interval first = (Interval)intervals.get(0);
// add a range from 0 to first.a constrained to vocab
if ( first.a > 0 ) {
IntervalSet s = IntervalSet.of(0, first.a-1);
IntervalSet a = (IntervalSet)s.and(vocabularyIS);
compl.addAll(a);
}
for (int i=1; i<n; i++) { // from 2nd interval .. nth
Interval previous = (Interval)intervals.get(i-1);
Interval current = (Interval)intervals.get(i);
IntervalSet s = IntervalSet.of(previous.b+1, current.a-1);
IntervalSet a = (IntervalSet)s.and(vocabularyIS);
compl.addAll(a);
}
Interval last = (Interval)intervals.get(n -1);
// add a range from last.b to maxElement constrained to vocab
if ( last.b < maxElement ) {
IntervalSet s = IntervalSet.of(last.b+1, maxElement);
IntervalSet a = (IntervalSet)s.and(vocabularyIS);
compl.addAll(a);
}
return compl;
}
/** Compute this-other via this&~other.
* Return a new set containing all elements in this but not in other.
* other is assumed to be a subset of this;
* anything that is in other but not in this will be ignored.
*/
public IntSet subtract(IntSet other) {
// assume the whole unicode range here for the complement
// because it doesn't matter. Anything beyond the max of this' set
// will be ignored since we are doing this & ~other. The intersection
// will be empty. The only problem would be when this' set max value
// goes beyond MAX_CHAR_VALUE, but hopefully the constant MAX_CHAR_VALUE
// will prevent this.
return this.and(((IntervalSet)other).complement(COMPLETE_SET));
}
public IntSet or(IntSet a) {
IntervalSet o = new IntervalSet();
o.addAll(this);
o.addAll(a);
return o;
}
/** Return a new set with the intersection of this set with other. Because
* the intervals are sorted, we can use an iterator for each list and
* just walk them together. This is roughly O(min(n,m)) for interval
* list lengths n and m.
*/
public IntSet and(IntSet other) {
if ( other==null ) { //|| !(other instanceof IntervalSet) ) {
return null; // nothing in common with null set
}
ArrayList myIntervals = (ArrayList)this.intervals;
ArrayList theirIntervals = (ArrayList)((IntervalSet)other).intervals;
IntervalSet intersection = null;
int mySize = myIntervals.size();
int theirSize = theirIntervals.size();
int i = 0;
int j = 0;
// iterate down both interval lists looking for nondisjoint intervals
while ( i<mySize && j<theirSize ) {
Interval mine = (Interval)myIntervals.get(i);
Interval theirs = (Interval)theirIntervals.get(j);
//System.out.println("mine="+mine+" and theirs="+theirs);
if ( mine.startsBeforeDisjoint(theirs) ) {
// move this iterator looking for interval that might overlap
i++;
}
else if ( theirs.startsBeforeDisjoint(mine) ) {
// move other iterator looking for interval that might overlap
j++;
}
else if ( mine.properlyContains(theirs) ) {
// overlap, add intersection, get next theirs
if ( intersection==null ) {
intersection = new IntervalSet();
}
intersection.add(mine.intersection(theirs));
j++;
}
else if ( theirs.properlyContains(mine) ) {
// overlap, add intersection, get next mine
if ( intersection==null ) {
intersection = new IntervalSet();
}
intersection.add(mine.intersection(theirs));
i++;
}
else if ( !mine.disjoint(theirs) ) {
// overlap, add intersection
if ( intersection==null ) {
intersection = new IntervalSet();
}
intersection.add(mine.intersection(theirs));
// Move the iterator of lower range [a..b], but not
// the upper range as it may contain elements that will collide
// with the next iterator. So, if mine=[0..115] and
// theirs=[115..200], then intersection is 115 and move mine
// but not theirs as theirs may collide with the next range
// in thisIter.
// move both iterators to next ranges
if ( mine.startsAfterNonDisjoint(theirs) ) {
j++;
}
else if ( theirs.startsAfterNonDisjoint(mine) ) {
i++;
}
}
}
if ( intersection==null ) {
return new IntervalSet();
}
return intersection;
}
/** Is el in any range of this set? */
public boolean member(int el) {
int n = intervals.size();
for (int i = 0; i < n; i++) {
Interval I = (Interval) intervals.get(i);
int a = I.a;
int b = I.b;
if ( el<a ) {
break; // list is sorted and el is before this interval; not here
}
if ( el>=a && el<=b ) {
return true; // found in this interval
}
}
return false;
/*
for (ListIterator iter = intervals.listIterator(); iter.hasNext();) {
Interval I = (Interval) iter.next();
if ( el<I.a ) {
break; // list is sorted and el is before this interval; not here
}
if ( el>=I.a && el<=I.b ) {
return true; // found in this interval
}
}
return false;
*/
}
/** return true if this set has no members */
public boolean isNil() {
return intervals==null || intervals.size()==0;
}
/** If this set is a single integer, return it otherwise Token.INVALID_TYPE */
public int getSingleElement() {
if ( intervals!=null && intervals.size()==1 ) {
Interval I = (Interval)intervals.get(0);
if ( I.a == I.b ) {
return I.a;
}
}
return Token.INVALID_TYPE;
}
public int getMaxElement() {
if ( isNil() ) {
return Token.INVALID_TYPE;
}
Interval last = (Interval)intervals.get(intervals.size()-1);
return last.b;
}
/** Return minimum element >= 0 */
public int getMinElement() {
if ( isNil() ) {
return Token.INVALID_TYPE;
}
int n = intervals.size();
for (int i = 0; i < n; i++) {
Interval I = (Interval) intervals.get(i);
int a = I.a;
int b = I.b;
for (int v=a; v<=b; v++) {
if ( v>=0 ) return v;
}
}
return Token.INVALID_TYPE;
}
/** Return a list of Interval objects. */
public List<Interval> getIntervals() {
return intervals;
}
@Override
public int hashCode() {
if ( isNil() ) return 0;
int n = 0;
// just add left edge of intervals
for (Interval I : intervals) n += I.a;
return n;
}
/** Are two IntervalSets equal? Because all intervals are sorted
* and disjoint, equals is a simple linear walk over both lists
* to make sure they are the same. Interval.equals() is used
* by the List.equals() method to check the ranges.
*/
public boolean equals(Object obj) {
if ( obj==null || !(obj instanceof IntervalSet) ) {
return false;
}
IntervalSet other = (IntervalSet)obj;
return this.intervals.equals(other.intervals);
}
public String toString() {
return toString((Grammar)null);
}
public String toString(Grammar g) {
StringBuffer buf = new StringBuffer();
if ( this.intervals==null || this.intervals.size()==0 ) {
return "{}";
}
if ( this.size()>1 ) {
buf.append("{");
}
Iterator iter = this.intervals.iterator();
while (iter.hasNext()) {
Interval I = (Interval) iter.next();
int a = I.a;
int b = I.b;
if ( a==b ) {
if ( g!=null ) {
buf.append(g.getTokenDisplayName(a));
}
else {
buf.append(a);
}
}
else {
if ( g!=null ) {
if ( !g.isLexer() ) {
for (int i=a; i<=b; i++) {
if ( i>a ) buf.append(", ");
buf.append(g.getTokenDisplayName(i));
}
}
else {
buf.append(g.getTokenDisplayName(a)+".."+g.getTokenDisplayName(b));
}
}
else {
buf.append(a+".."+b);
}
}
if ( iter.hasNext() ) {
buf.append(", ");
}
}
if ( this.size()>1 ) {
buf.append("}");
}
return buf.toString();
}
public int size() {
int n = 0;
int numIntervals = intervals.size();
if ( numIntervals==1 ) {
Interval firstInterval = this.intervals.get(0);
return firstInterval.b-firstInterval.a+1;
}
for (int i = 0; i < numIntervals; i++) {
Interval I = (Interval) intervals.get(i);
n += (I.b-I.a+1);
}
return n;
}
public List<Integer> toList() {
List<Integer> values = new ArrayList<Integer>();
int n = intervals.size();
for (int i = 0; i < n; i++) {
Interval I = (Interval) intervals.get(i);
int a = I.a;
int b = I.b;
for (int v=a; v<=b; v++) {
values.add(Utils.integer(v));
}
}
return values;
}
/** Get the ith element of ordered set. Used only by RandomPhrase so
* don't bother to implement if you're not doing that for a new
* ANTLR code gen target.
*/
public int get(int i) {
int n = intervals.size();
int index = 0;
for (int j = 0; j < n; j++) {
Interval I = (Interval) intervals.get(j);
int a = I.a;
int b = I.b;
for (int v=a; v<=b; v++) {
if ( index==i ) {
return v;
}
index++;
}
}
return -1;
}
public int[] toArray() {
int[] values = new int[size()];
int n = intervals.size();
int j = 0;
for (int i = 0; i < n; i++) {
Interval I = (Interval) intervals.get(i);
int a = I.a;
int b = I.b;
for (int v=a; v<=b; v++) {
values[j] = v;
j++;
}
}
return values;
}
public void remove(int el) {
throw new NoSuchMethodError("IntervalSet.remove() unimplemented");
}
}

View File

@ -0,0 +1,30 @@
package org.antlr.v4.misc;
import java.util.*;
/** I need the get-element-i functionality so I'm subclassing
* LinkedHashMap.
*/
public class OrderedHashMap<K,V> extends LinkedHashMap<K,V> {
/** Track the elements as they are added to the set */
protected List<K> elements = new ArrayList<K>();
public K getKey(int i) { return elements.get(i); }
@Override
public V put(K key, V value) {
elements.add(key);
return super.put(key, value);
}
@Override
public V remove(Object key) {
throw new UnsupportedOperationException();
}
@Override
public void clear() {
elements.clear();
super.clear();
}
}

View File

@ -0,0 +1,88 @@
package org.antlr.v4.misc;
import java.util.*;
/** A HashMap that remembers the order that the elements were added.
* You can alter the ith element with set(i,value) too :) Unique list.
* I need the replace/set-element-i functionality so I'm subclassing
* OrderedHashSet.
*/
public class OrderedHashSet<T> extends LinkedHashSet<T> {
/** Track the elements as they are added to the set */
protected List<T> elements = new ArrayList<T>();
public T get(int i) {
return elements.get(i);
}
/** Replace an existing value with a new value; updates the element
* list and the hash table, but not the key as that has not changed.
*/
public T set(int i, T value) {
T oldElement = elements.get(i);
elements.set(i,value); // update list
super.remove(oldElement); // now update the set: remove/add
super.add(value);
return oldElement;
}
public boolean remove(int i) {
T o = elements.remove(i);
return super.remove(o);
}
/** Add a value to list; keep in hashtable for consistency also;
* Key is object itself. Good for say asking if a certain string is in
* a list of strings.
*/
public boolean add(T value) {
boolean result = super.add(value);
if ( result ) { // only track if new element not in set
elements.add(value);
}
return result;
}
public boolean remove(Object o) {
throw new UnsupportedOperationException();
}
public void clear() {
elements.clear();
super.clear();
}
@Override
public int hashCode() {
return elements.hashCode();
}
@Override
public boolean equals(Object o) {
// System.out.print("equals " + this + ", " + o+" = ");
boolean same = elements!=null && elements.equals(((OrderedHashSet)o).elements);
// System.out.println(same);
return same;
}
@Override
public Iterator<T> iterator() {
return elements.iterator();
}
/** Return the List holding list of table elements. Note that you are
* NOT getting a copy so don't write to the list.
*/
public List<T> elements() {
return elements;
}
@Override
public Object[] toArray() {
return elements.toArray();
}
public String toString() {
return elements.toString();
}
}

View File

@ -0,0 +1,714 @@
// File : A3Lexer.g
// Author : Jim Idle (jimi@temporal-wave.com)
// Copyright : Free BSD - See @header clause below
// Version : First implemented as part of ANTLR 3.2 this is the self
// hosting ANTLR 3 Lexer.
//
// Description
// -----------
// This is the definitive lexer grammar for parsing ANTLR V3.x.x grammars. All other
// gramnmars are derived from this grammar via source code control integration (perforce)
// or by the gdiff tool.
//
// This grammar and its associated grmmmars A3Parser.g and A3Walker.g exhibit the following
// traits, which are recommended for all production quality grammars:
//
// 1) They are separate grammars, not composite grammars;
// 2) They implement all supporting methods in a superclass (at least this is recommended
// for language targets that support inheritence;
// 3) All errors are pushed as far down the parsing chain as possible, which means
// that the lexer tries to defer error reporting to the parser, and the parser
// tries to defer error reporting to a semantic phase consisting of a single
// walk of the AST. The reason for this is that the error messages produced
// from later phases of the parse will generally have better context and so
// be more useful to the end user. Consider the message: "Syntax error at 'options'"
// vs: "You cannot specify two options{} sections in a single grammar file".
// 4) The lexer is 'programmed' to catch common mistakes such as unterminated literals
// and report them specifically and not just issue confusing lexer mismatch errors.
//
/** Read in an ANTLR grammar and build an AST. Try not to do
* any actions, just build the tree.
*
* The phases are:
*
* A3Lexer.g (this file)
* A3Parser.g
* A3Verify.g (derived from A3Walker.g)
* assign.types.g
* define.g
* buildnfa.g
* antlr.print.g (optional)
* codegen.g
*
* Terence Parr
* University of San Francisco
* 2005
* Jim Idle (this v3 grammar)
* Temporal Wave LLC
* 2009
*/
lexer grammar ANTLRLexer;
// ==============================================================================
// Note that while this grammar does not care about order of constructs
// that don't really matter, such as options before @header etc, it must first
// be parsed by the original v2 parser, before it replaces it. That parser does
// care about order of structures. Hence we are constrained by the v2 parser
// for at least the first bootstrap release that causes this parser to replace
// the v2 version.
// ==============================================================================
// -------
// Options
//
// V3 option directives to tell the tool what we are asking of it for this
// grammar.
//
options {
// Target language is Java, which is the default but being specific
// here as this grammar is also meant as a good example grammar for
// for users.
//
language = Java;
// The super class that this lexer should expect to inherit from, and
// which contains any and all support routines for the lexer. This is
// commented out in this baseline (definitive or normative grammar)
// - see the ANTLR tool implementation for hints on how to use the super
// class
//
//superclass = AbstractA3Lexer;
}
tokens { SEMPRED; FORCED_ACTION; }
// Include the copyright in this source and also the generated source
//
@lexer::header {
/*
[The "BSD licence"]
Copyright (c) 2005-2009 Terence Parr
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.parse;
}
// +=====================+
// | Lexer specification |
// +=====================+
// --------
// Comments
//
// ANTLR comments can be multi or single line and we don't care
// which particularly. However we also accept Javadoc style comments
// of the form: /** ... */ and we do take care to distinguish those
// from ordinary multi-line comments
// Note how we guide the lexical PATH because we want to issue a decriptive
// error message in case of a standalone '/' character, which makes no
// sense in ANTLR source code. We alo trap unterminated multi-line comments
//
fragment DOC_COMMENT : ;
COMMENT
@init {
// Record the start line and offsets as if we need to report an
// unterminated comment, then we want to show the start of the comment
// we think is broken, not the end, where people will have to try and work
// it out themselves.
//
int startLine = $line;
int offset = getCharPositionInLine();
}
: // Eat the first character only, then see if we have a comment
// or something silly.
//
'/' // Comment introducer
(
// Single line comment, possibly with embedded src/line directives
// in a similar style to the C pre-processor, allowing generated
// code to refer the programmer back to the original source code
// in case of error.
//
'/'
(
(' $ANTLR')=> ' $ANTLR' SRC
| ~(NLCHARS)*
)
| // Multi-line comment, which may be a documentation comment
// if it starts /** (note that we protect against accidentaly
// recognizing a comment /**/ as a documentation comment
//
'*' (
{ input.LA(2) != '/'}?=> '*' { $type = DOC_COMMENT; }
| { true }?=> // Required to cover all alts with predicates
)
// Should we support embedded multiline comments here?
//
(
// Pick out end of multiline comment and exit the loop
// if we find it.
//
{ !(input.LA(1) == '*' && input.LA(2) == '/') }?
// Anything else other than the non-greedy match of
// the comment close sequence
//
.
)*
(
// Look for the comment terminator, but if it is accidentally
// unterminated, then we will hit EOF, which will trigger the
// epsilon alt and hence we can issue an error message relative
// to the start of the unterminated multi-line comment
//
'*/'
| // Unterminated comment!
//
{
// ErrorManager.msg(Msg.UNTERMINATED_DOC_COMMENT, startLine, offset, $pos, startLine, offset, $pos, (Object)null);
}
)
| // There was nothing that made sense following the opening '/' and so
// we issue an error regarding the malformed comment
//
{
// TODO: Insert error message relative to comment start
//
}
)
{
// Unless we had a documentation comment, then we do not wish to
// pass the comments in to the parser. If you are writing a formatter
// then you will want to preserve the comments off channel, but could
// just skip and save token space if not.
//
if ($type != DOC_COMMENT) {
$channel=2; // Comments are on channel 2
}
}
;
DOUBLE_QUOTE_STRING_LITERAL
: '"' (('\\')=>'\\' . | ~'"' )* '"'
;
DOUBLE_ANGLE_STRING_LITERAL
: '<<' (options {greedy=false;} : . )* '>>'
;
// --------------
// Argument specs
//
// Certain argument lists, such as those specifying call parameters
// to a rule invocation, or input parameters to a rule specification
// are contained within square brackets. In the lexer we consume them
// all at once and sort them out later in the grammar analysis.
//
ARG_ACTION
@init
{
StringBuffer theText = new StringBuffer();
}
: '['
(
('\\')=>'\\'
(
(']')=>']'
{
// We do not include the \ character itself when picking up an escaped ]
//
theText.append(']');
}
| c=.
{
// We DO include the \ character when finding any other escape
//
theText.append('\\');
theText.append((char)$c);
}
)
| ('"')=>as=ACTION_STRING_LITERAL
{
// Append the embedded string literal test
//
theText.append($as.text);
}
| ('\'')=>ac=ACTION_CHAR_LITERAL
{
// Append the embedded chracter literal text
//
theText.append($ac.text);
}
| c=~']'
{
// Whatever else we found in the scan
//
theText.append((char)$c);
}
)*
']'
{
// Set the token text to our gathered string
//
setText(theText.toString());
}
;
// -------
// Actions
//
// Other than making sure to distinguish between { and } embedded
// within what we have assumed to be literals in the action code, the
// job of the lexer is merely to gather the code within the action
// (delimited by {}) and pass it to the parser as a single token.
// Note the special case of the {{ }} action, which is a forced
// action, that the generated code will execute regardless of
// backtracking (predicate) level.
// We know that this token will be asked for its text somewhere
// in the upcoming parse, so setting the text here to exclude
// the delimiting {} is no additional overhead.
//
ACTION
: NESTED_ACTION ('?' {$type = SEMPRED;} )?
{
// Note that because of the sempred detection above, we
// will not see {{ action }}? as a forced action, but as a semantic
// predicate.
if ( $text.startsWith("{{") && $text.endsWith("}}") ) {
// Switch types to a forced action
$type = FORCED_ACTION;
}
}
;
// ----------------
// Action structure
//
// Many language targets use {} as block delimiters and so we
// must recursively match {} delimited blocks to balance the
// braces. Additionally, we must make some assumptions about
// literal string representation in the target language. We assume
// that they are delimited by ' or " and so consume these
// in their own alts so as not to inadvertantly match {}.
// This rule calls itself on matching a {
//
fragment
NESTED_ACTION
@init {
// Record the start line and offsets as if we need to report an
// unterminated block, then we want to show the start of the comment
// we think is broken, not the end, where people will have to try and work
// it out themselves.
//
int startLine = getLine();
int offset = getCharPositionInLine();
}
: // Action and other blocks start with opening {
//
'{'
(
// And now we can match one of a number of embedded
// elements within the action until we find a
// } that balances the opening {. If we do not find
// the balanced } then we will hit EOF and can issue
// an error message about the brace that we belive to
// be mismatched. This won't be foolproof but we will
// be able to at least report an error against the
// opening brace that we feel is in error and this will
// guide the user to the correction as best we can.
//
// An embedded {} block
//
NESTED_ACTION
| // What appears to be a literal
//
ACTION_CHAR_LITERAL
| // We have assumed that the target language has C/Java
// type comments.
//
COMMENT
| // What appears to be a literal
//
ACTION_STRING_LITERAL
| // What appears to be an escape sequence
//
ACTION_ESC
| // Some other single character that is not
// handled above
//
~('\\'|'"'|'\''|'/'|'{'|'}')
)*
(
// Correctly balanced closing brace
//
'}'
| // Looks like have an imblanced {} block, report
// with respect to the opening brace.
//
{
// TODO: Report imbalanced {}
System.out.println("Block starting at line " + startLine + " offset " + (offset+1) + " contains imbalanced {} or is missing a }");
}
)
;
// Keywords
// --------
// keywords used to specify ANTLR v3 grammars. Keywords may not be used as
// labels for rules or in any other context where they woudl be ambiguous
// with the keyword vs some other identifier
// OPTIONS and TOKENS must also consume the opening brace that captures
// their option block, as this is teh easiest way to parse it separate
// to an ACTION block, despite it usingthe same {} delimiters.
//
OPTIONS : 'options' WSNLCHARS* '{' ;
TOKENS : 'tokens' WSNLCHARS* '{' ;
SCOPE : 'scope' ;
IMPORT : 'import' ;
FRAGMENT : 'fragment' ;
LEXER : 'lexer' ;
PARSER : 'parser' ;
TREE : 'tree' ;
GRAMMAR : 'grammar' ;
PROTECTED : 'protected' ;
PUBLIC : 'public' ;
PRIVATE : 'private' ;
RETURNS : 'returns' ;
THROWS : 'throws' ;
CATCH : 'catch' ;
FINALLY : 'finally' ;
TEMPLATE : 'template' ;
MODE : 'mode' ;
// -----------
// Punctuation
//
// Character sequences used as separators, delimters, operators, etc
//
COLON : ':' ;
COLONCOLON : '::' ;
COMMA : ',' ;
SEMI : ';' ;
LPAREN : '(' ;
RPAREN : ')' ;
IMPLIES : '=>' ;
LT : '<' ;
GT : '>' ;
ASSIGN : '=' ;
QUESTION : '?' ;
BANG : '!' ;
STAR : '*' ;
PLUS : '+' ;
PLUS_ASSIGN : '+=' ;
OR : '|' ;
ROOT : '^' ;
DOLLAR : '$' ;
DOT : '.' ; // can be WILDCARD or DOT in qid or imported rule ref
RANGE : '..' ;
ETC : '...' ;
RARROW : '->' ;
TREE_BEGIN : '^(' ;
AT : '@' ;
NOT : '~' ;
RBRACE : '}' ;
// ---------------
// Token reference
//
// The names of all tokens must start with an upper case letter and so
// the lexer can distinguish them directly.
//
TOKEN_REF
: ('A'..'Z') ('A'..'Z' | 'a'..'z' | '0'..'9' | '_')*
;
// --------------
// Rule reference
//
// The names of all rules must start with a lower case letter
// so the lexer can distibguish them directly. The parser takes
// care of the case such as id=rulename
//
RULE_REF
: ('a'..'z') ('A'..'Z' | 'a'..'z' | '0'..'9' | '_')*
;
// ----------------------------
// Literals embedded in actions
//
// Note that we have made the assumption that the language used within
// actions uses the fairly standard " and ' delimiters for literals and
// that within these literals, characters are escaped using the \ character.
// There are some languages which do not conform to this in all cases, such
// as by using /string/ and so on. We will have to deal with such cases if
// if they come up in targets.
//
// Within actions, or other structures that are not part of the ANTLR
// syntax, we may encounter literal characters. Within these, we do
// not want to inadvertantly match things like '}' and so we eat them
// specifically. While this rule is called CHAR it allows for the fact that
// some languages may use/allow ' as the string delimiter.
//
fragment
ACTION_CHAR_LITERAL
: '\'' (('\\')=>ACTION_ESC | ~'\'' )* '\''
;
// Within actions, or other structures that are not part of the ANTLR
// syntax, we may encounter literal strings. Within these, we do
// not want to inadvertantly match things like '}' and so we eat them
// specifically.
//
fragment
ACTION_STRING_LITERAL
: '"' (('\\')=>ACTION_ESC | ~'"')* '"'
;
// Within literal strings and characters that are not part of the ANTLR
// syntax, we must allow for escaped character sequences so that we do not
// inadvertantly recognize the end of a string or character when the terminating
// delimiter has been esacped.
//
fragment
ACTION_ESC
: '\\' .
;
// -------
// Integer
//
// Obviously (I hope) match an aribtrary long sequence of digits.
//
INT : ('0'..'9')+
;
// -----------
// Source spec
//
// A fragment rule for picking up information about an origrinating
// file from which the grammar we are parsing has been generated. This allows
// ANTLR to report errors against the originating file and not the generated
// file.
//
fragment
SRC : 'src' WSCHARS+ file=ACTION_STRING_LITERAL WSCHARS+ line=INT
{
// TODO: Add target specific code to change the source file name and current line number
//
}
;
// --------------
// Literal string
//
// ANTLR makes no disticintion between a single character literal and a
// multi-character string. All literals are single quote delimited and
// may contain unicode escape sequences of the form \uxxxx, where x
// is a valid hexadecimal number (as per Java basically).
STRING_LITERAL
@init {
int len = 0;
}
: '\'' ( ( ESC_SEQ | ~('\\'|'\'') ) {len++;} )* '\''
;
// A valid hex digit specification
//
fragment
HEX_DIGIT : ('0'..'9'|'a'..'f'|'A'..'F') ;
// Any kind of escaped character that we can embed within ANTLR
// literal strings.
//
fragment
ESC_SEQ
: '\\'
(
// The standard escaped character set such as tab, newline,
// etc.
//
'b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\'
| // A Java style Unicode escape sequence
//
UNICODE_ESC
| // An illegal escape seqeunce
//
{
// TODO: Issue error message
//
}
)
;
fragment
UNICODE_ESC
@init {
// Flag to tell us whether we have a valid number of
// hex digits in the escape sequence
//
int hCount = 0;
}
: 'u' // Leadin for unicode escape sequence
// We now require 4 hex digits. Note though
// that we accept any number of characters
// and issue an error if we do not get 4. We cannot
// use an inifinite count such as + because this
// might consume too many, so we lay out the lexical
// options and issue an error at the invalid paths.
//
(
(
HEX_DIGIT { hCount++; }
(
HEX_DIGIT { hCount++; }
(
HEX_DIGIT { hCount++; }
(
// Four valid hex digits, we are good
//
HEX_DIGIT { hCount++; }
| // Three valid digits
)
| // Two valid digits
)
| // One valid digit
)
)
| // No valid hex digits at all
)
// Now check the digit count and issue an error if we need to
//
{
if (hCount != 4) {
// TODO: Issue error message
}
}
;
// ----------
// Whitespace
//
// Characters and character constructs that are of no import
// to the parser and are used to make the grammar easier to read
// for humans.
//
WS
: (
' '
| '\t'
| '\r'
| '\n'
| '\f'
)+
{
$channel=2;
}
;
// A fragment rule for use in recognizing end of line in
// rules like COMMENT.
//
fragment
NLCHARS
: '\n' | '\r'
;
// A fragment rule for recognizing traditional whitespace
// characters within lexer rules.
//
fragment
WSCHARS
: ' ' | '\t' | '\f'
;
// A fragment rule for recognizing both traditional whitespace and
// end of line markers, when we don't care to distinguish but don't
// want any action code going on.
//
fragment
WSNLCHARS
: ' ' | '\t' | '\f' | '\n' | '\r'
;
// -----------------
// Illegal Character
//
// This is an illegal character trap which is always the last rule in the
// lexer specification. It matches a single character of any value and being
// the last rule in the file will match when no other rule knows what to do
// about the character. It is reported as an error but is not passed on to the
// parser. This means that the parser to deal with the gramamr file anyway
// but we will not try to analyse or code generate from a file with lexical
// errors.
//
ERRCHAR
: .
{
// TODO: Issue error message
//
skip();
}
;

View File

@ -0,0 +1,964 @@
/*
[The "BSD license"]
Copyright (c) 2010 Jim Idle, Terence Parr
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** The definitive ANTLR v3 grammar to parse ANTLR v4 grammars.
* The grammar builds ASTs that are sniffed by subsequent stages.
*/
parser grammar ANTLRParser;
options {
// Target language is Java, which is the default but being specific
// here as this grammar is also meant as a good example grammar for
// for users.
language = Java;
// The output of this grammar is going to be an AST upon which
// we run a semantic checking phase, then the rest of the analysis
// including final code generation.
output = AST;
// The vocabulary (tokens and their int token types) we are using
// for the parser. This is generated by the lexer. The vocab will be extended
// to include the imaginary tokens below.
tokenVocab = ANTLRLexer;
ASTLabelType = GrammarAST;
}
// Imaginary Tokens
//
// Imaginary tokens do not exist as far as the lexer is concerned, and it cannot
// generate them. However we sometimes need additional 'tokens' to use as root
// nodes for the AST we are generating. The tokens section is where we
// specify any such tokens
tokens {
LEXER;
RULE;
RULES;
RULEMODIFIERS;
RULEACTIONS;
BLOCK;
REWRITE_BLOCK;
OPTIONAL;
CLOSURE;
POSITIVE_CLOSURE;
SYNPRED;
RANGE;
CHAR_RANGE;
EPSILON;
ALT;
ALTLIST;
ID;
ARG;
ARGLIST;
RET;
COMBINED;
INITACTION;
LABEL; // $x used in rewrite rules
TEMPLATE;
GATED_SEMPRED; // {p}? =>
SYN_SEMPRED; // (...) => it's a manually-specified synpred converted to sempred
BACKTRACK_SEMPRED; // auto backtracking mode syn pred converted to sempred
WILDCARD;
// A generic node indicating a list of something when we don't
// really need to distinguish what we have a list of as the AST
// will 'kinow' by context.
//
LIST;
ELEMENT_OPTIONS; // TOKEN<options>
ST_RESULT; // distinguish between ST and tree rewrites
RESULT;
ALT_REWRITE; // indicate ALT is rewritten
}
// Include the copyright in this source and also the generated source
//
@header {
/*
[The "BSD licence"]
Copyright (c) 2005-2009 Terence Parr
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.parse;
import org.antlr.v4.tool.*;
}
@members {
Stack paraphrases = new Stack();
}
// The main entry point for parsing a V3 grammar from top to toe. This is
// the method call from whence to obtain the AST for the parse.
//
grammarSpec
:
// The grammar itself can have a documenation comment, which is the
// first terminal in the file.
//
DOC_COMMENT?
// Next we should see the type and name of the grammar file that
// we are about to parse.
//
grammarType id SEMI
// There now follows zero or more declaration sections that should
// be given to us before the rules are declared
//
// A number of things can be declared/stated before the grammar rules
// 'proper' are parsed. These include grammar imports (delegate), grammar
// options, imaginary token declarations, global scope declarations,
// and actions such as @header. In this rule we allow any number of
// these constructs in any order so that the grammar author is not
// constrained by some arbitrary order of declarations that nobody
// can remember. In the next phase of the parse, we verify that these
// constructs are valid, not repeated and so on.
sync ( prequelConstruct sync )*
// We should now see at least one ANTLR EBNF style rule
// declaration. If the rules are missing we will let the
// semantic verification phase tell the user about it.
//
rules
mode*
// And we force ANTLR to process everything it finds in the input
// stream by specifying hte need to match End Of File before the
// parse is complete.
//
EOF
// Having parsed everything in the file and accumulated the relevant
// subtrees, we can now rewrite everything into the main AST form
// that our tree walkers are expecting.
//
-> ^(grammarType // The grammar type is our root AST node
id // We need to identify the grammar of course
DOC_COMMENT? // We may or may not have a global documentation comment for the file
prequelConstruct* // The set of declarations we accumulated
rules // And of course, we need the set of rules we discovered
mode*
)
;
grammarType
@after {
if ( $t!=null ) ((GrammarRootAST)$tree).grammarType = $t.type;
else ((GrammarRootAST)$tree).grammarType=COMBINED;
}
: ( t=LEXER g=GRAMMAR -> GRAMMAR<GrammarRootAST>[$g, "LEXER_GRAMMAR"]
| // A standalone parser specification
t=PARSER g=GRAMMAR -> GRAMMAR<GrammarRootAST>[$g, "PARSER_GRAMMAR"]
| // A standalone tree parser specification
t=TREE g=GRAMMAR -> GRAMMAR<GrammarRootAST>[$g, "TREE_GRAMMAR"]
// A combined lexer and parser specification
| g=GRAMMAR -> GRAMMAR<GrammarRootAST>[$g, "COMBINED_GRAMMAR"]
)
;
// This is the list of all constructs that can be declared before
// the set of rules that compose the grammar, and is invoked 0..n
// times by the grammarPrequel rule.
prequelConstruct
: // A list of options that affect analysis and/or code generation
optionsSpec
| // A list of grammars to which this grammar will delegate certain
// parts of the parsing sequence - a set of imported grammars
delegateGrammars
| // The declaration of any token types we need that are not already
// specified by a preceeding grammar, such as when a parser declares
// imaginary tokens with which to construct the AST, or a rewriting
// tree parser adds further imaginary tokens to ones defined in a prior
// {tree} parser.
tokensSpec
| // A declaration of a scope that may be used in multiple rules within
// the grammar spec, rather than being delcared and therefore associated
// with, a specific rule.
attrScope
| // A declaration of language target implemented constructs. All such
// action sections start with '@' and are given to the language target's
// StringTemplate group. For instance @parser::header and @lexer::header
// are gathered here.
action
;
// A list of options that affect analysis and/or code generation
optionsSpec
: OPTIONS (option SEMI)* RBRACE -> ^(OPTIONS[$OPTIONS, "OPTIONS"] option+)
;
option
: id ASSIGN^ optionValue
;
// ------------
// Option Value
//
// The actual value of an option - Doh!
//
optionValue
: // If the option value is a single word that conforms to the
// lexical rules of token or rule names, then the user may skip quotes
// and so on. Many option values meet this description
//
qid
| // The value is a long string
//
STRING_LITERAL<TerminalAST>
| // The value was an integer number
//
INT
| // Asterisk, used for things like k=*
//
STAR
;
// A list of grammars to which this grammar will delegate certain
// parts of the parsing sequence - a set of imported grammars
delegateGrammars
: IMPORT delegateGrammar (COMMA delegateGrammar)* SEMI -> ^(IMPORT delegateGrammar+)
;
// A possibly named grammar file that should be imported to this gramamr
// and delgated to for the rules it specifies
delegateGrammar
: id ASSIGN^ id
| id
;
/** The declaration of any token types we need that are not already
* specified by a preceeding grammar, such as when a parser declares
* imaginary tokens with which to construct the AST, or a rewriting
* tree parser adds further imaginary tokens to ones defined in a prior
* {tree} parser.
*/
tokensSpec
: TOKENS tokenSpec+ RBRACE -> ^(TOKENS tokenSpec+)
;
tokenSpec
: id
( ASSIGN STRING_LITERAL -> ^(ASSIGN id STRING_LITERAL<TerminalAST>)
| -> id
)
SEMI
| RULE_REF // INVALID! (an error alt)
;
// A declaration of a scope that may be used in multiple rules within
// the grammar spec, rather than being declared within and therefore associated
// with, a specific rule.
attrScope
: SCOPE id ACTION -> ^(SCOPE id ACTION<ActionAST>)
;
// A declaration of a language target specifc section,
// such as @header, @includes and so on. We do not verify these
// sections, they are just passed on to the language target.
/** Match stuff like @parser::members {int i;} */
action
: AT (actionScopeName COLONCOLON)? id ACTION -> ^(AT actionScopeName? id ACTION<ActionAST>)
;
/** Sometimes the scope names will collide with keywords; allow them as
* ids for action scopes.
*/
actionScopeName
: id
| LEXER -> ID[$LEXER]
| PARSER -> ID[$PARSER]
;
mode: MODE id SEMI sync (rule sync)+ -> ^(MODE id rule+) ;
rules
: sync (rule sync)*
// Rewrite with an enclosing node as this is good for counting
// the number of rules and an easy marker for the walker to detect
// that there are no rules.
->^(RULES rule*)
;
sync
@init {
BitSet followSet = computeErrorRecoverySet();
if ( input.LA(1)!=Token.EOF && !followSet.member(input.LA(1)) ) {
reportError(new NoViableAltException("",0,0,input));
beginResync();
consumeUntil(input, followSet);
endResync();
}
} :
;
// The specification of an EBNF rule in ANTLR style, with all the
// rule level parameters, declarations, actions, rewrite specs and so
// on.
//
// Note that here we allow any number of rule declaration sections (such
// as scope, returns, etc) in any order and we let the upcoming semantic
// verification of the AST determine if things are repeated or if a
// particular functional element is not valid in the context of the
// grammar type, such as using returns in lexer rules and so on.
rule
@init { paraphrases.push("matching a rule"); }
@after { paraphrases.pop(); }
: // A rule may start with an optional documentation comment
DOC_COMMENT?
// Following the documentation, we can declare a rule to be
// public, private and so on. This is only valid for some
// language targets of course but the target will ignore these
// modifiers if they make no sense in that language.
ruleModifiers?
// Next comes the rule name. Here we do not distinguish between
// parser or lexer rules, the semantic verification phase will
// reject any rules that make no sense, such as lexer rules in
// a pure parser or tree parser.
id
// Immediately following the rulename, there may be a specification
// of input parameters for the rule. We do not do anything with the
// parameters here except gather them for future phases such as
// semantic verifcation, type assignment etc. We require that
// the input parameters are the next syntactically significant element
// following the rule id.
ARG_ACTION?
ruleReturns?
// Now, before the rule specification itself, which is introduced
// with a COLON, we may have zero or more configuration sections.
// As usual we just accept anything that is syntactically valid for
// one form of the rule or another and let the semantic verification
// phase throw out anything that is invalid.
// At the rule level, a programmer may specify a number of sections, such
// as scope declarations, rule return elements, @ sections (which may be
// language target specific) and so on. We allow any number of these in any
// order here and as usual rely onthe semantic verification phase to reject
// anything invalid using its addinotal context information. Here we are
// context free and just accept anything that is a syntactically correct
// construct.
//
rulePrequels
COLON
// The rule is, at the top level, just a list of alts, with
// finer grained structure defined within the alts.
ruleBlock
SEMI
exceptionGroup
-> ^( RULE<RuleAST> id DOC_COMMENT? ruleModifiers? ARG_ACTION?
ruleReturns? rulePrequels? ruleBlock exceptionGroup*
)
;
// Many language targets support exceptions and the rule will
// generally be able to throw the language target equivalent
// of a recognition exception. The grammar programmar can
// specify a list of exceptions to catch or a generic catch all
// and the target language code generation template is
// responsible for generating code that makes sense.
exceptionGroup
: exceptionHandler* finallyClause?
;
// Specifies a handler for a particular type of exception
// thrown by a rule
exceptionHandler
: CATCH ARG_ACTION ACTION -> ^(CATCH ARG_ACTION ACTION<ActionAST>)
;
// Specifies a block of code to run after the rule and any
// expcetion blocks have exceuted.
finallyClause
: FINALLY ACTION -> ^(FINALLY ACTION<ActionAST>)
;
rulePrequels
@init { paraphrases.push("matching rule preamble"); }
@after { paraphrases.pop(); }
: sync (rulePrequel sync)* -> rulePrequel*
;
// An individual rule level configuration as referenced by the ruleActions
// rule above.
//
rulePrequel
: throwsSpec
| ruleScopeSpec
| optionsSpec
| ruleAction
;
// A rule can return elements that it constructs as it executes.
// The return values are specified in a 'returns' prequel element,
// which contains COMMA separated declarations, where the declaration
// is target language specific. Here we see the returns declaration
// as a single lexical action element, to be processed later.
//
ruleReturns
: RETURNS^ ARG_ACTION
;
// --------------
// Exception spec
//
// Some target languages, such as Java and C# support exceptions
// and they are specified as a prequel element for each rule that
// wishes to throw its own exception type. Note that the name of the
// exception is just a single word, so the header section of the grammar
// must specify the correct import statements (or language equivalent).
// Target languages that do not support exceptions just safely ignore
// them.
//
throwsSpec
: THROWS qid (COMMA qid)* -> ^(THROWS qid+)
;
// As well as supporting globally specifed scopes, ANTLR supports rule
// level scopes, which are tracked in a rule specific stack. Rule specific
// scopes are specified at this level, and globally specified scopes
// are merely referenced here.
ruleScopeSpec
: SCOPE ACTION -> ^(SCOPE ACTION)
| SCOPE id (COMMA id)* SEMI -> ^(SCOPE id+)
;
// @ Sections are generally target language specific things
// such as local variable declarations, code to run before the
// rule starts and so on. Fir instance most targets support the
// @init {} section where declarations and code can be placed
// to run before the rule is entered. The C target also has
// an @declarations {} section, where local variables are declared
// in order that the generated code is C89 copmliant.
//
/** Match stuff like @init {int i;} */
ruleAction
: AT id ACTION -> ^(AT id ACTION<ActionAST>)
;
// A set of access modifiers that may be applied to rule declarations
// and which may or may not mean something to the target language.
// Note that the parser allows any number of these in any order and the
// semantic pass will throw out invalid combinations.
//
ruleModifiers
: ruleModifier+ -> ^(RULEMODIFIERS ruleModifier+)
;
// An individual access modifier for a rule. The 'fragment' modifier
// is an internal indication for lexer rules that they do not match
// from the input but are like subroutines for other lexer rules to
// reuse for certain lexical patterns. The other modifiers are passed
// to the code generation templates and may be ignored by the template
// if they are of no use in that language.
ruleModifier
: PUBLIC
| PRIVATE
| PROTECTED
| FRAGMENT
;
altList
: alternative (OR alternative)* -> alternative+
;
// A set of alts, rewritten as a BLOCK for generic processing
// in tree walkers. Used by the rule 'rule' so that the list of
// alts for a rule appears as a BLOCK containing the alts and
// can be processed by the generic BLOCK rule. Note that we
// use a separate rule so that the BLOCK node has start and stop
// boundaries set correctly by rule post processing of rewrites.
ruleBlock
@init {Token colon = input.LT(-1);}
: altList -> ^(BLOCK<BlockAST>[colon,"BLOCK"] altList)
;
catch [ResyncToEndOfRuleBlock e] {
// just resyncing; ignore error
retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), null);
}
// An individual alt with an optional rewrite clause for the
// elements of the alt.
alternative
@init { paraphrases.push("matching alternative"); }
@after { paraphrases.pop(); }
: elements
( rewrite -> ^(ALT_REWRITE<AltAST> elements rewrite)
| -> elements
)
| rewrite -> ^(ALT_REWRITE<AltAST> ^(ALT<AltAST> EPSILON) rewrite) // empty alt with rewrite
| -> ^(ALT<AltAST> EPSILON) // empty alt
;
elements
: e+=element+ -> ^(ALT<AltAST> $e+)
;
element
@init {
paraphrases.push("looking for rule element");
int m = input.mark();
}
@after { paraphrases.pop(); }
: labeledElement
( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK<BlockAST>[$labeledElement.start,"BLOCK"] ^(ALT<AltAST> labeledElement ) ))
| -> labeledElement
)
| atom
( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK<BlockAST>[$atom.start,"BLOCK"] ^(ALT<AltAST> atom) ) )
| -> atom
)
| ebnf
| ACTION<ActionAST>
| FORCED_ACTION<ActionAST>
| SEMPRED
( IMPLIES -> GATED_SEMPRED[$SEMPRED]
| -> SEMPRED<PredAST>
)
| treeSpec
( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK<BlockAST>[$treeSpec.start,"BLOCK"] ^(ALT<AltAST> treeSpec ) ) )
| -> treeSpec
)
;
catch [RecognitionException re] {
retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), re);
int ttype = input.get(input.range()).getType();
// look for anything that really belongs at the start of the rule minus the initial ID
if ( ttype==COLON || ttype==RETURNS || ttype==CATCH || ttype==FINALLY || ttype==AT ) {
RecognitionException missingSemi =
new v4ParserException("unterminated rule (missing ';') detected at '"+
input.LT(1).getText()+" "+input.LT(2).getText()+"'", input);
reportError(missingSemi);
if ( ttype==CATCH || ttype==FINALLY ) {
input.seek(input.range()); // ignore what's before rule trailer stuff
}
if ( ttype==RETURNS || ttype==AT ) { // scan back looking for ID of rule header
int p = input.index();
Token t = input.get(p);
while ( t.getType()!=RULE_REF && t.getType()!=TOKEN_REF ) {
p--;
t = input.get(p);
}
input.seek(p);
}
throw new ResyncToEndOfRuleBlock(); // make sure it goes back to rule block level to recover
}
reportError(re);
recover(input,re);
/*
input.rewind(m);
final List subset = input.get(input.index(), input.range());
System.out.println("failed to match as element: '"+subset);
CommonTokenStream ns = new CommonTokenStream(
new TokenSource() {
int i = 0;
public Token nextToken() {
if ( i>=subset.size() ) return Token.EOF_TOKEN;
return (Token)subset.get(i++);
}
public String getSourceName() { return null; }
});
ANTLRParser errorParser = new ANTLRParser(ns);
errorParser.setTreeAdaptor(this.adaptor);
errorParser.element_errors(re);
retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), re);
*/
}
/*
element_errors[RecognitionException origError]
options {backtrack=true;}
@init {
int m = input.mark();
//state.backtracking++;
}
@after {
//state.backtracking--;
}
: ( DOC_COMMENT? ruleModifiers? id ARG_ACTION? ruleReturns? rulePrequel* COLON
| exceptionGroup
)
{reportError(missingSemi); recover(input,null);}
;
catch [RecognitionException ignore] {
input.rewind(m);
input.consume(); // kill at least one token
reportError(origError);
BitSet followSet = computeErrorRecoverySet();
beginResync();
consumeUntil(input, followSet);
endResync();
}
*/
labeledElement : id (ASSIGN^|PLUS_ASSIGN^) (atom|block) ;
// Tree specifying alt
// Tree grammars need to have alts that describe a tree structure they
// will walk of course. Alts for trees therefore start with ^( XXX, which
// says we will see a root node of XXX then DOWN etc
treeSpec
: TREE_BEGIN
// Only a subset of elements are allowed to be a root node. However
// we allow any element to appear here and reject silly ones later
// when we walk the AST.
element
// After the tree root we get the usual suspects,
// all members of the element set
element+
RPAREN
-> ^(TREE_BEGIN element+)
;
// A block of gramamr structure optionally followed by standard EBNF
// notation, or ANTLR specific notation. I.E. ? + ^ and so on
ebnf
: block
// And now we see if we have any of the optional suffixs and rewrite
// the AST for this rule accordingly
//
( blockSuffixe -> ^(blockSuffixe block)
| -> block
)
;
// The standard EBNF suffixes with additional components that make
// sense only to ANTLR, in the context of a grammar block.
blockSuffixe
: ebnfSuffix // Standard EBNF
// ANTLR Specific Suffixes
| ROOT
| IMPLIES // We will change this to syn/sem pred in the next phase
| BANG
;
ebnfSuffix
@init {
Token op = input.LT(1);
}
: QUESTION -> OPTIONAL[op]
| STAR -> CLOSURE[op]
| PLUS -> POSITIVE_CLOSURE[op]
;
atom: // Qualified reference delegate.rule. This must be
// lexically contiguous (no spaces either side of the DOT)
// otherwise it is two references with a wildcard in between
// and not a qualified reference.
{
input.LT(1).getCharPositionInLine()+input.LT(1).getText().length()==
input.LT(2).getCharPositionInLine() &&
input.LT(2).getCharPositionInLine()+1==input.LT(3).getCharPositionInLine()
}?
id DOT ruleref -> ^(DOT id ruleref)
| range (ROOT^ | BANG^)? // Range x..y - only valid in lexers
| terminal (ROOT^ | BANG^)?
| ruleref
| notSet (ROOT^|BANG^)?
| // Wildcard '.' means any character in a lexer, any
// token in parser and any token or node in a tree parser
// Because the terminal rule is allowed to be the node
// specification for the start of a tree rule, we must
// later check that wildcard was not used for that.
DOT elementOptions? -> ^(WILDCARD<TerminalAST>[$DOT] elementOptions?)
;
catch [RecognitionException re] { throw re; } // pass upwards to element
// --------------------
// Inverted element set
//
// A set of characters (in a lexer) or terminal tokens, if a parser
// that are then used to create the inverse set of them.
//
notSet
: NOT terminal -> ^(NOT terminal)
| NOT blockSet -> ^(NOT blockSet)
;
blockSet
: LPAREN
setElement (OR setElement)*
RPAREN
-> ^(BLOCK<BlockAST>[$LPAREN,"BLOCK"] setElement+ )
;
setElement
: range
| terminal
;
// -------------
// Grammar Block
//
// Anywhere where an element is valid, the grammar may start a new block
// of alts by surrounding that block with ( ). A new block may also have a set
// of options, which apply only to that block.
//
block
: LPAREN
// A new blocked altlist may have a set of options set sepcifically
// for it.
( optionsSpec? ra+=ruleAction* COLON )?
altList
RPAREN
-> ^(BLOCK<BlockAST>[$LPAREN,"BLOCK"] optionsSpec? $ra* altList )
;
// ----------------
// Parser rule ref
//
// Reference to a parser rule with optional arguments and optional
// directive to become the root node or ignore the tree produced
//
ruleref
: RULE_REF ARG_ACTION?
( (op=ROOT|op=BANG) -> ^($op ^(RULE_REF ARG_ACTION?))
| -> ^(RULE_REF ARG_ACTION?)
)
;
catch [RecognitionException re] { throw re; } // pass upwards to element
// ---------------
// Character Range
//
// Specifies a range of characters. Valid for lexer rules only, but
// we do not check that here, the tree walkers shoudl do that.
// Note also that the parser also allows through more than just
// character literals so that we can produce a much nicer semantic
// error about any abuse of the .. operator.
//
range
: STRING_LITERAL<TerminalAST> RANGE^ STRING_LITERAL<TerminalAST>
;
terminal
: // Args are only valid for lexer rules
TOKEN_REF ARG_ACTION? elementOptions? -> ^(TOKEN_REF<TerminalAST> ARG_ACTION? elementOptions?)
| STRING_LITERAL elementOptions? -> ^(STRING_LITERAL<TerminalAST> elementOptions?)
;
// Terminals may be adorned with certain options when
// reference in the grammar: TOK<,,,>
elementOptions
: LT elementOption (COMMA elementOption)* GT -> ^(ELEMENT_OPTIONS elementOption+)
;
// WHen used with elements we can specify what the tree node type can
// be and also assign settings of various options (which we do not check here)
elementOption
: // This format indicates the default node option
qid
| // This format indicates option assignment
id ASSIGN^ (qid | STRING_LITERAL<TerminalAST>)
;
rewrite
: predicatedRewrite* nakedRewrite -> predicatedRewrite* nakedRewrite
;
predicatedRewrite
: RARROW SEMPRED rewriteAlt
-> {$rewriteAlt.isTemplate}? ^(ST_RESULT[$RARROW] SEMPRED<PredAST> rewriteAlt)
-> ^(RESULT[$RARROW] SEMPRED<PredAST> rewriteAlt)
;
nakedRewrite
: RARROW rewriteAlt -> {$rewriteAlt.isTemplate}? ^(ST_RESULT[$RARROW] rewriteAlt)
-> ^(RESULT[$RARROW] rewriteAlt)
;
// distinguish between ST and tree rewrites; for ETC/EPSILON and trees,
// rule altAndRewrite makes REWRITE root. for ST, we use ST_REWRITE
rewriteAlt returns [boolean isTemplate]
options {backtrack=true;}
: // try to parse a template rewrite
rewriteTemplate {$isTemplate=true;}
| // If we are not building templates, then we must be
// building ASTs or have rewrites in a grammar that does not
// have output=AST; options. If that is the case, we will issue
// errors/warnings in the next phase, so we just eat them here
rewriteTreeAlt
| ETC
| /* empty rewrite */ -> EPSILON
;
rewriteTreeAlt
: rewriteTreeElement+ -> ^(ALT rewriteTreeElement+)
;
rewriteTreeElement
: rewriteTreeAtom
| rewriteTreeAtom ebnfSuffix -> ^( ebnfSuffix ^(REWRITE_BLOCK ^(ALT rewriteTreeAtom)) )
| rewriteTree
( ebnfSuffix
-> ^(ebnfSuffix ^(REWRITE_BLOCK ^(ALT rewriteTree)) )
| -> rewriteTree
)
| rewriteTreeEbnf
;
rewriteTreeAtom
: TOKEN_REF elementOptions? ARG_ACTION? -> ^(TOKEN_REF<TerminalAST> elementOptions? ARG_ACTION?) // for imaginary nodes
| RULE_REF
| STRING_LITERAL elementOptions? -> ^(STRING_LITERAL<TerminalAST> elementOptions?)
| DOLLAR id -> LABEL[$DOLLAR,$id.text] // reference to a label in a rewrite rule
| ACTION<ActionAST>
;
rewriteTreeEbnf
@init {
Token firstToken = input.LT(1);
}
@after {
$rewriteTreeEbnf.tree.getToken().setLine(firstToken.getLine());
$rewriteTreeEbnf.tree.getToken().setCharPositionInLine(firstToken.getCharPositionInLine());
}
: lp=LPAREN rewriteTreeAlt RPAREN ebnfSuffix -> ^(ebnfSuffix ^(REWRITE_BLOCK[$lp] rewriteTreeAlt))
;
rewriteTree
: TREE_BEGIN rewriteTreeAtom rewriteTreeElement* RPAREN
-> ^(TREE_BEGIN rewriteTreeAtom rewriteTreeElement* )
;
/** Build a tree for a template rewrite:
^(TEMPLATE (ID|ACTION) ^(ARGLIST ^(ARG ID ACTION) ...) )
ID can be "template" keyword. If first child is ACTION then it's
an indirect template ref
-> foo(a={...}, b={...})
-> ({string-e})(a={...}, b={...}) // e evaluates to template name
-> {%{$ID.text}} // create literal template from string (done in ActionTranslator)
-> {st-expr} // st-expr evaluates to ST
*/
rewriteTemplate
: // -> template(a={...},...) "..." inline template
TEMPLATE LPAREN rewriteTemplateArgs RPAREN
( str=DOUBLE_QUOTE_STRING_LITERAL | str=DOUBLE_ANGLE_STRING_LITERAL )
-> ^(TEMPLATE[$TEMPLATE,"TEMPLATE"] rewriteTemplateArgs? $str)
| // -> foo(a={...}, ...)
rewriteTemplateRef
| // -> ({expr})(a={...}, ...)
rewriteIndirectTemplateHead
| // -> {...}
ACTION<ActionAST>
;
/** -> foo(a={...}, ...) */
rewriteTemplateRef
: id LPAREN rewriteTemplateArgs RPAREN
-> ^(TEMPLATE[$LPAREN,"TEMPLATE"] id rewriteTemplateArgs?)
;
/** -> ({expr})(a={...}, ...) */
rewriteIndirectTemplateHead
: lp=LPAREN ACTION RPAREN LPAREN rewriteTemplateArgs RPAREN
-> ^(TEMPLATE[$lp,"TEMPLATE"] ACTION<ActionAST> rewriteTemplateArgs?)
;
rewriteTemplateArgs
: rewriteTemplateArg (COMMA rewriteTemplateArg)*
-> ^(ARGLIST rewriteTemplateArg+)
|
;
rewriteTemplateArg
: id ASSIGN ACTION -> ^(ARG[$ASSIGN] id ACTION<ActionAST>)
;
// The name of the grammar, and indeed some other grammar elements may
// come through to the parser looking like a rule reference or a token
// reference, hence this rule is used to pick up whichever it is and rewrite
// it as a generic ID token.
id
@init { paraphrases.push("looking for an identifier"); }
@after { paraphrases.pop(); }
: RULE_REF ->ID[$RULE_REF]
| TOKEN_REF ->ID[$TOKEN_REF]
| TEMPLATE ->ID[$TEMPLATE] // keyword
;
qid
@init { paraphrases.push("looking for a qualified identifier"); }
@after { paraphrases.pop(); }
: id (DOT id)* -> ID[$qid.start, $text]
;
alternativeEntry : alternative EOF ; // allow gunit to call alternative and see EOF afterwards
elementEntry : element EOF ;
ruleEntry : rule EOF ;
blockEntry : block EOF ;

View File

@ -0,0 +1,431 @@
/*
[The "BSD license"]
Copyright (c) 2010 Terence Parr
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** The definitive ANTLR v3 tree grammar to parse ANTLR v4 grammars.
* Parses trees created in ANTLRParser.g.
*/
tree grammar ASTVerifier;
options {
language = Java;
tokenVocab = ANTLRParser;
ASTLabelType = GrammarAST;
}
// Include the copyright in this source and also the generated source
@header {
/*
[The "BSD license"]
Copyright (c) 2005-2009 Terence Parr
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.parse;
import org.antlr.v4.tool.*;
import org.antlr.v4.runtime.tree.CommonTree; // use updated v4 one not v3
}
@members {
public String getErrorMessage(RecognitionException e,
String[] tokenNames)
{
List stack = getRuleInvocationStack(e, this.getClass().getName());
String msg = null;
String inputContext =
input.LT(-3) == null ? "" : ((Tree)input.LT(-3)).getText()+" "+
input.LT(-2) == null ? "" : ((Tree)input.LT(-2)).getText()+" "+
input.LT(-1) == null ? "" : ((Tree)input.LT(-1)).getText()+" >>>"+
input.LT(1) == null ? "" : ((Tree)input.LT(1)).getText()+"<<< "+
input.LT(2) == null ? "" : ((Tree)input.LT(2)).getText()+" "+
input.LT(3) == null ? "" : ((Tree)input.LT(3)).getText();
if ( e instanceof NoViableAltException ) {
NoViableAltException nvae = (NoViableAltException)e;
msg = " no viable alt; token="+e.token+
" (decision="+nvae.decisionNumber+
" state "+nvae.stateNumber+")"+
" decision=<<"+nvae.grammarDecisionDescription+">>";
}
else {
msg = super.getErrorMessage(e, tokenNames);
}
return stack+" "+msg+"\ncontext=..."+inputContext+"...";
}
public String getTokenErrorDisplay(Token t) {
return t.toString();
}
public void traceIn(String ruleName, int ruleIndex) {
System.out.print("enter "+ruleName+" "+
((GrammarAST)input.LT(1)).token+" "+
((GrammarAST)input.LT(2)).token+" "+
((GrammarAST)input.LT(3)).token+" "+
((GrammarAST)input.LT(4)).token);
if ( state.backtracking>0 ) {
System.out.print(" backtracking="+state.backtracking);
}
System.out.println();
}
protected void mismatch(IntStream input, int ttype, BitSet follow)
throws RecognitionException {
throw new MismatchedTokenException(ttype, input);
}
public void recoverFromMismatchedToken(IntStream input,
RecognitionException e, BitSet follow)
throws RecognitionException
{
throw e;
}
}
// Alter code generation so catch-clauses get replace with // this action.
@rulecatch { catch (RecognitionException e) {
throw e;
}
}
grammarSpec
: ^(GRAMMAR ID DOC_COMMENT? prequelConstruct* rules mode*)
;
prequelConstruct
: optionsSpec
| delegateGrammars
| tokensSpec
| attrScope
| action
;
optionsSpec
: ^(OPTIONS option*)
;
option
: ^(ASSIGN ID optionValue)
;
optionValue returns [String v]
@init {$v = $start.token.getText();}
: ID
| STRING_LITERAL
| INT
| STAR
;
delegateGrammars
: ^(IMPORT delegateGrammar+)
;
delegateGrammar
: ^(ASSIGN ID ID)
| ID
;
tokensSpec
: ^(TOKENS tokenSpec+)
;
tokenSpec
: ^(ASSIGN ID STRING_LITERAL)
| ID
;
attrScope
: ^(SCOPE ID ACTION)
;
action
: ^(AT ID? ID ACTION)
;
rules
: ^(RULES rule*)
;
mode: ^( MODE ID rule+ ) ;
rule: ^( RULE ID DOC_COMMENT? ruleModifiers? ARG_ACTION?
ruleReturns? rulePrequel* altListAsBlock exceptionGroup
)
;
exceptionGroup
: exceptionHandler* finallyClause?
;
exceptionHandler
: ^(CATCH ARG_ACTION ACTION)
;
finallyClause
: ^(FINALLY ACTION)
;
rulePrequel
: throwsSpec
| ruleScopeSpec
| optionsSpec
| ruleAction
;
ruleReturns
: ^(RETURNS ARG_ACTION)
;
throwsSpec
: ^(THROWS ID+)
;
ruleScopeSpec
: ^(SCOPE ACTION)
| ^(SCOPE ID+)
;
ruleAction
: ^(AT ID ACTION)
;
ruleModifiers
: ^(RULEMODIFIERS ruleModifier+)
;
ruleModifier
: PUBLIC
| PRIVATE
| PROTECTED
| FRAGMENT
;
altList
: alternative+
;
altListAsBlock
: ^(BLOCK altList)
;
alternative
: ^(ALT_REWRITE alternative rewrite)
| ^(ALT EPSILON)
| elements
;
elements
: ^(ALT element+)
;
element
: labeledElement
| atom
| ebnf
| ACTION
| FORCED_ACTION
| SEMPRED
| GATED_SEMPRED
| treeSpec
;
labeledElement
: ^(ASSIGN ID atom)
| ^(ASSIGN ID block)
| ^(PLUS_ASSIGN ID atom)
| ^(PLUS_ASSIGN ID block)
;
treeSpec
: ^(TREE_BEGIN element+)
;
ebnf: ^(blockSuffix block)
| block
;
blockSuffix
: ebnfSuffix
| ROOT
| IMPLIES
| BANG
;
ebnfSuffix
: OPTIONAL
| CLOSURE
| POSITIVE_CLOSURE
;
atom: ^(ROOT range)
| ^(BANG range)
| ^(ROOT notSet)
| ^(BANG notSet)
| notSet
| ^(ROOT terminal)
| ^(BANG terminal)
| range
| ^(DOT ID terminal)
| ^(DOT ID ruleref)
| ^(WILDCARD elementOptions)
| WILDCARD
| terminal
| ruleref
;
notSet
: ^(NOT setElement)
| ^(NOT blockSet)
;
blockSet
: ^(BLOCK setElement+)
;
setElement
: STRING_LITERAL
| TOKEN_REF
| ^(RANGE STRING_LITERAL STRING_LITERAL)
;
block
: ^(BLOCK optionsSpec? ruleAction* ACTION? altList)
;
ruleref
: ^(ROOT ^(RULE_REF ARG_ACTION?))
| ^(BANG ^(RULE_REF ARG_ACTION?))
| ^(RULE_REF ARG_ACTION?)
;
range
: ^(RANGE STRING_LITERAL STRING_LITERAL)
;
terminal
: ^(STRING_LITERAL elementOptions)
| STRING_LITERAL
| ^(TOKEN_REF ARG_ACTION elementOptions)
| ^(TOKEN_REF ARG_ACTION)
| ^(TOKEN_REF elementOptions)
| TOKEN_REF
;
elementOptions
: ^(ELEMENT_OPTIONS elementOption+)
;
elementOption
: ID
| ^(ASSIGN ID ID)
| ^(ASSIGN ID STRING_LITERAL)
;
rewrite
: predicatedRewrite* nakedRewrite
;
predicatedRewrite
: ^(ST_RESULT SEMPRED rewriteAlt)
| ^(RESULT SEMPRED rewriteAlt)
;
nakedRewrite
: ^(ST_RESULT rewriteAlt)
| ^(RESULT rewriteAlt)
;
rewriteAlt
: rewriteTemplate
| rewriteTreeAlt
| ETC
| EPSILON
;
rewriteTreeAlt
: ^(ALT rewriteTreeElement+)
;
rewriteTreeElement
: rewriteTreeAtom
| rewriteTree
| rewriteTreeEbnf
;
rewriteTreeAtom
: ^(TOKEN_REF elementOptions ARG_ACTION)
| ^(TOKEN_REF elementOptions)
| ^(TOKEN_REF ARG_ACTION)
| TOKEN_REF
| RULE_REF
| ^(STRING_LITERAL elementOptions)
| STRING_LITERAL
| LABEL
| ACTION
;
rewriteTreeEbnf
: ^(ebnfSuffix ^(REWRITE_BLOCK rewriteTreeAlt))
;
rewriteTree
: ^(TREE_BEGIN rewriteTreeAtom rewriteTreeElement* )
;
rewriteTemplate
: ^(TEMPLATE rewriteTemplateArgs? DOUBLE_QUOTE_STRING_LITERAL)
| ^(TEMPLATE rewriteTemplateArgs? DOUBLE_ANGLE_STRING_LITERAL)
| rewriteTemplateRef
| rewriteIndirectTemplateHead
| ACTION
;
rewriteTemplateRef
: ^(TEMPLATE ID rewriteTemplateArgs?)
;
rewriteIndirectTemplateHead
: ^(TEMPLATE ACTION rewriteTemplateArgs?)
;
rewriteTemplateArgs
: ^(ARGLIST rewriteTemplateArg+)
;
rewriteTemplateArg
: ^(ARG ID ACTION)
;

View File

@ -0,0 +1,176 @@
/*
[The "BSD license"]
Copyright (c) 2010 Terence Parr
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
tree grammar ATNBuilder;
options {
language = Java;
tokenVocab = ANTLRParser;
ASTLabelType = GrammarAST;
// filter = true;
}
// Include the copyright in this source and also the generated source
@header {
/*
[The "BSD license"]
Copyright (c) 2010 Terence Parr
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.parse;
import org.antlr.v4.tool.*;
import org.antlr.v4.automata.ATNFactory;
import org.antlr.v4.runtime.tree.CommonTree; // use updated v4 one not v3
}
@members {
ATNFactory factory;
public ATNBuilder(TreeNodeStream input, ATNFactory factory) {
this(input);
this.factory = factory;
}
}
block[GrammarAST ebnfRoot] returns [ATNFactory.Handle p]
@init {List<ATNFactory.Handle> alts = new ArrayList<ATNFactory.Handle>();}
: ^(BLOCK (^(OPTIONS .+))? (a=alternative {alts.add($a.p);})+)
{$p = factory.block((BlockAST)$BLOCK, ebnfRoot, alts);}
;
alternative returns [ATNFactory.Handle p]
@init {List<ATNFactory.Handle> els = new ArrayList<ATNFactory.Handle>();}
: ^(ALT_REWRITE a=alternative .) {$p = $a.p;}
| ^(ALT EPSILON) {$p = factory.epsilon($EPSILON);}
| ^(ALT (e=element {els.add($e.p);})+)
{$p = factory.alt(els);}
;
element returns [ATNFactory.Handle p]
: labeledElement {$p = $labeledElement.p;}
| atom {$p = $atom.p;}
| ebnf {$p = $ebnf.p;}
| ACTION {$p = factory.action((ActionAST)$ACTION);}
| FORCED_ACTION {$p = factory.action((ActionAST)$FORCED_ACTION);}
| SEMPRED {$p = factory.sempred((PredAST)$SEMPRED);}
| GATED_SEMPRED {$p = factory.gated_sempred($GATED_SEMPRED);}
| treeSpec {$p = $treeSpec.p;}
;
labeledElement returns [ATNFactory.Handle p]
: ^(ASSIGN ID atom) {$p = factory.label($atom.p);}
| ^(ASSIGN ID block[null]) {$p = factory.label($block.p);}
| ^(PLUS_ASSIGN ID atom) {$p = factory.listLabel($atom.p);}
| ^(PLUS_ASSIGN ID block[null]) {$p = factory.listLabel($block.p);}
;
treeSpec returns [ATNFactory.Handle p]
@init {List<ATNFactory.Handle> els = new ArrayList<ATNFactory.Handle>();}
: ^(TREE_BEGIN (e=element {els.add($e.p);})+) {$p = factory.tree(els);}
;
ebnf returns [ATNFactory.Handle p]
: ^(astBlockSuffix block[null]) {$p = $block.p;}
| ^(OPTIONAL block[$start]) {$p = $block.p;}
| ^(CLOSURE block[$start]) {$p = $block.p;}
| ^(POSITIVE_CLOSURE block[$start]) {$p = $block.p;}
| block[null] {$p = $block.p;}
;
astBlockSuffix
: ROOT
| IMPLIES
| BANG
;
atom returns [ATNFactory.Handle p]
: ^(ROOT range) {$p = $range.p;}
| ^(BANG range) {$p = $range.p;}
| ^(ROOT notSet) {$p = $notSet.p;}
| ^(BANG notSet) {$p = $notSet.p;}
| notSet {$p = $notSet.p;}
| range {$p = $range.p;}
| ^(DOT ID terminal) {$p = $terminal.p;}
| ^(DOT ID ruleref) {$p = $ruleref.p;}
| ^(WILDCARD .) {$p = factory.wildcard($start);}
| WILDCARD {$p = factory.wildcard($start);}
| terminal {$p = $terminal.p;}
| ruleref {$p = $ruleref.p;}
;
notSet returns [ATNFactory.Handle p]
: ^(NOT setElement) {$p = factory.not($NOT);}
| ^(NOT blockSet) {$p = factory.notBlock($NOT, $blockSet.alts);}
;
blockSet returns [List<GrammarAST> alts]
@init {$alts = new ArrayList<GrammarAST>();}
: ^(BLOCK (t=setElement {$alts.add($t.start);})+)
;
setElement
: STRING_LITERAL
| TOKEN_REF
| ^(RANGE STRING_LITERAL STRING_LITERAL)
;
ruleref returns [ATNFactory.Handle p]
: ^(ROOT ^(RULE_REF ARG_ACTION?)) {$p = factory.ruleRef($RULE_REF);}
| ^(BANG ^(RULE_REF ARG_ACTION?)) {$p = factory.ruleRef($RULE_REF);}
| ^(RULE_REF ARG_ACTION?) {$p = factory.ruleRef($RULE_REF);}
;
range returns [ATNFactory.Handle p]
: ^(RANGE a=STRING_LITERAL b=STRING_LITERAL) {$p = factory.range($a,$b);}
;
terminal returns [ATNFactory.Handle p]
: ^(STRING_LITERAL .) {$p = factory.stringLiteral((TerminalAST)$start);}
| STRING_LITERAL {$p = factory.stringLiteral((TerminalAST)$start);}
| ^(TOKEN_REF ARG_ACTION .) {$p = factory.tokenRef((TerminalAST)$start);}
| ^(TOKEN_REF .) {$p = factory.tokenRef((TerminalAST)$start);}
| TOKEN_REF {$p = factory.tokenRef((TerminalAST)$start);}
| ^(ROOT t=terminal) {$p = $t.p;}
| ^(BANG t=terminal) {$p = $t.p;}
;

View File

@ -0,0 +1,172 @@
lexer grammar ActionSplitter;
options { filter=true; }
@header {
package org.antlr.v4.parse;
import org.antlr.v4.tool.*;
}
@members {
ActionSplitterListener delegate;
public ActionSplitter(CharStream input, ActionSplitterListener delegate) {
this(input, new RecognizerSharedState());
this.delegate = delegate;
}
public void emit(Token token) {
super.emit(token);
}
/** force filtering (and return tokens). triggers all above actions. */
public List<Token> getActionTokens() {
List<Token> chunks = new ArrayList<Token>();
Token t = nextToken();
while ( t.getType()!=Token.EOF ) {
chunks.add(t);
t = nextToken();
}
return chunks;
}
}
// ignore comments right away
COMMENT
: '/*' ( options {greedy=false;} : . )* '*/' {delegate.text($text);}
;
LINE_COMMENT
: '//' ~('\n'|'\r')* '\r'? '\n' {delegate.text($text);}
;
ESC
: '\\$' {delegate.text("$");}
| '\\%' {delegate.text("\%");}
;
SET_QUALIFIED_ATTR
: '$' x=ID '.' y=ID WS? '=' expr=ATTR_VALUE_EXPR ';'
{delegate.setQualifiedAttr($text, $x, $y, $expr);}
;
QUALIFIED_ATTR
: '$' x=ID '.' y=ID {input.LA(1)!='('}? {delegate.qualifiedAttr($text, $x, $y);}
;
SET_DYNAMIC_SCOPE_ATTR
: '$' x=ID '::' y=ID WS? '=' expr=ATTR_VALUE_EXPR ';'
{delegate.setDynamicScopeAttr($text, $x, $y, $expr);}
;
DYNAMIC_SCOPE_ATTR
: '$' x=ID '::' y=ID {delegate.dynamicScopeAttr($text, $x, $y);}
;
/** To access deeper (than top of stack) scopes, use the notation:
*
* $x[-1]::y previous (just under top of stack)
* $x[-i]::y top of stack - i where the '-' MUST BE PRESENT;
* i.e., i cannot simply be negative without the '-' sign!
* $x[i]::y absolute index i (0..size-1)
* $x[0]::y is the absolute 0 indexed element (bottom of the stack)
*/
SET_DYNAMIC_NEGATIVE_INDEXED_SCOPE_ATTR
: '$' x=ID '[' '-' index=SCOPE_INDEX_EXPR ']' '::' y=ID
WS? '=' expr=ATTR_VALUE_EXPR ';'
{delegate.setDynamicNegativeIndexedScopeAttr($text, $x, $y, $index, $expr);}
;
DYNAMIC_NEGATIVE_INDEXED_SCOPE_ATTR
: '$' x=ID '[' '-' index=SCOPE_INDEX_EXPR ']' '::' y=ID
{delegate.dynamicNegativeIndexedScopeAttr($text, $x, $y, $index);}
;
SET_DYNAMIC_ABSOLUTE_INDEXED_SCOPE_ATTR
: '$' x=ID '[' index=SCOPE_INDEX_EXPR ']' '::' y=ID
WS? '=' expr=ATTR_VALUE_EXPR ';'
{delegate.setDynamicAbsoluteIndexedScopeAttr($text, $x, $y, $index, $expr);}
;
DYNAMIC_ABSOLUTE_INDEXED_SCOPE_ATTR
: '$' x=ID '[' index=SCOPE_INDEX_EXPR ']' '::' y=ID
{delegate.dynamicAbsoluteIndexedScopeAttr($text, $x, $y, $index);}
;
SET_ATTR
: '$' x=ID WS? '=' expr=ATTR_VALUE_EXPR ';' {delegate.setAttr($text, $x, $expr);}
;
ATTR
: '$' x=ID {delegate.attr($text, $x);}
;
/** %foo(a={},b={},...) ctor */
TEMPLATE_INSTANCE
: '%' ID '(' ( WS? ARG (',' WS? ARG)* WS? )? ')'
;
/** %({name-expr})(a={},...) indirect template ctor reference */
INDIRECT_TEMPLATE_INSTANCE
: '%' '(' ACTION ')' '(' ( WS? ARG (',' WS? ARG)* WS? )? ')'
;
/** %{expr}.y = z; template attribute y of StringTemplate-typed expr to z */
SET_EXPR_ATTRIBUTE
: '%' a=ACTION '.' ID WS? '=' expr=ATTR_VALUE_EXPR ';'
;
/* %x.y = z; set template attribute y of x (always set never get attr)
* to z [languages like python without ';' must still use the
* ';' which the code generator is free to remove during code gen]
*/
SET_ATTRIBUTE
: '%' x=ID '.' y=ID WS? '=' expr=ATTR_VALUE_EXPR ';'
;
/** %{string-expr} anonymous template from string expr */
TEMPLATE_EXPR
: '%' a=ACTION
;
UNKNOWN_SYNTAX
@after {delegate.unknownSyntax(emit());}
: '%' (ID|'.'|'('|')'|','|'{'|'}'|'"')*
;
// Anything else is just random text
TEXT
@after {delegate.text($text);}
: ~('$'|'%') // can't do (...)+ here since it gobbles \$, \%
;
fragment
ACTION
: '{' ('\\}'|~'}')* '}'
;
fragment
ARG : ID '=' ACTION
;
fragment
ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')*
;
/** Don't allow an = as first char to prevent $x == 3; kind of stuff. */
fragment
ATTR_VALUE_EXPR
: ~'=' (~';')*
;
fragment
SCOPE_INDEX_EXPR
: ('\\]'|~']')+
;
fragment
WS : (' '|'\t'|'\n'|'\r')+
;

View File

@ -0,0 +1,27 @@
package org.antlr.v4.parse;
import org.antlr.runtime.Token;
/** */
public interface ActionSplitterListener {
void setQualifiedAttr(String expr, Token x, Token y, Token rhs);
void qualifiedAttr(String expr, Token x, Token y);
void setAttr(String expr, Token x, Token rhs);
void attr(String expr, Token x);
void setDynamicScopeAttr(String expr, Token x, Token y, Token rhs);
void dynamicScopeAttr(String expr, Token x, Token y);
void setDynamicNegativeIndexedScopeAttr(String expr, Token x, Token y, Token index, Token rhs);
void dynamicNegativeIndexedScopeAttr(String expr, Token x, Token y, Token index);
void setDynamicAbsoluteIndexedScopeAttr(String expr, Token x, Token y, Token index, Token rhs);
void dynamicAbsoluteIndexedScopeAttr(String expr, Token x, Token y, Token index);
void templateInstance(String expr);
void indirectTemplateInstance(String expr);
void setExprAttribute(String expr); // TODO: rename
void setSTAttribute(String expr);
void templateExpr(String expr);
void unknownSyntax(Token t);
void text(String text);
}

View File

@ -0,0 +1,46 @@
package org.antlr.v4.parse;
import org.antlr.runtime.*;
import org.antlr.runtime.tree.CommonTreeAdaptor;
import org.antlr.v4.tool.*;
public class GrammarASTAdaptor extends CommonTreeAdaptor {
org.antlr.runtime.CharStream input; // where we can find chars ref'd by tokens in tree
public GrammarASTAdaptor() { ; }
public GrammarASTAdaptor(org.antlr.runtime.CharStream input) { this.input = input; }
public Object create(Token token) {
return new GrammarAST(token);
}
@Override
/** Make sure even imaginary nodes know the input stream */
public Object create(int tokenType, String text) {
GrammarAST t = null;
if ( tokenType==ANTLRParser.RULE ) {
// needed by TreeWizard to make RULE tree
t = new GrammarASTWithOptions(new CommonToken(tokenType, text));
}
else if ( tokenType==ANTLRParser.STRING_LITERAL ) {
// implicit lexer construction done with wizard; needs this node type
// whereas grammar ANTLRParser.g can use token option to spec node type
t = new TerminalAST(new CommonToken(tokenType, text));
}
else {
t = (GrammarAST)super.create(tokenType, text);
}
((CommonToken)t.token).setInputStream(input);
return t;
}
public Object dupNode(Object t) {
if ( t==null ) return null;
return ((GrammarAST)t).dupNode(); //create(((GrammarAST)t).token);
}
public Object errorNode(org.antlr.runtime.TokenStream input, org.antlr.runtime.Token start, org.antlr.runtime.Token stop,
org.antlr.runtime.RecognitionException e)
{
return new GrammarASTErrorNode(input, start, stop, e);
}
}

View File

@ -0,0 +1,7 @@
package org.antlr.v4.parse;
/** Used to throw us out of deeply nested element back to end of a rule's
* alt list. Note it's not under RecognitionException.
*/
public class ResyncToEndOfRuleBlock extends RuntimeException {
}

View File

@ -0,0 +1,237 @@
package org.antlr.v4.parse;
import org.antlr.tool.ErrorManager;
import org.antlr.v4.tool.*;
import java.util.*;
/** Parse args, return values, and dynamic scopes.
*
* rule[arg1, arg2, ..., argN] returns [ret1, ..., retN]
* scope { decl1; decl2; ... declN; }
*
* The ',' and ';' are significant. Use \, and \; to use within
* types if necessary like [Map<String\,String> foo, int y].
*
* arg, ret, and decl are target language dependent. Java/C#/C/C++ would
* use "int i" but ruby/python would use "i".
*/
public class ScopeParser {
/** Given an arg or retval scope definition list like
*
* Map<String, String>, int[] j3, char *foo32[3]
*
* or
*
* int i=3, j=a[34]+20
*
* convert to an attribute scope.
*/
public static AttributeDict parseTypeList(String s) { return parse(s, ','); }
public static AttributeDict parseDynamicScope(String s) {
// ignore outer {..} if present
s = s.trim();
if ( s.startsWith("{") ) {
int lastCurly = s.lastIndexOf('}');
s = s.substring(1, lastCurly);
}
return parse(s, ';');
}
public static AttributeDict parse(String s, char separator) {
int i = 0;
int n = s.length();
AttributeDict dict = new AttributeDict();
while ( i<n ) {
StringBuilder buf = new StringBuilder();
while ( i<n && s.charAt(i)!=separator ) {
if ( s.charAt(i)=='\\' ) {
i++;
if ( i<n && s.charAt(i)==separator ) {
buf.append(s.charAt(i));
i++;
continue;
}
buf.append('\\');
}
buf.append(s.charAt(i));
i++;
}
i++; // skip separator
String def = buf.toString();
//System.out.println("def="+ def);
if ( def.trim().length()>0 ) {
Attribute a = parseAttributeDef(def);
dict.add(a);
}
}
return dict;
}
/** For decls like "String foo" or "char *foo32[]" compute the ID
* and type declarations. Also handle "int x=3" and 'T t = new T("foo")'
* but if the separator is ',' you cannot use ',' in the initvalue
* unless you escape use "\," escape.
*/
public static Attribute parseAttributeDef(String decl) {
if ( decl==null ) return null;
Attribute attr = new Attribute();
boolean inID = false;
int start = -1;
int rightEdgeOfDeclarator = decl.length()-1;
int equalsIndex = decl.indexOf('=');
if ( equalsIndex>0 ) {
// everything after the '=' is the init value
attr.initValue = decl.substring(equalsIndex+1,decl.length());
rightEdgeOfDeclarator = equalsIndex-1;
}
// walk backwards looking for start of an ID
for (int i=rightEdgeOfDeclarator; i>=0; i--) {
// if we haven't found the end yet, keep going
if ( !inID && Character.isLetterOrDigit(decl.charAt(i)) ) {
inID = true;
}
else if ( inID &&
!(Character.isLetterOrDigit(decl.charAt(i))||
decl.charAt(i)=='_') ) {
start = i+1;
break;
}
}
if ( start<0 && inID ) {
start = 0;
}
if ( start<0 ) {
ErrorManager.error(ErrorManager.MSG_CANNOT_FIND_ATTRIBUTE_NAME_IN_DECL,decl);
}
// walk forwards looking for end of an ID
int stop=-1;
for (int i=start; i<=rightEdgeOfDeclarator; i++) {
// if we haven't found the end yet, keep going
if ( !(Character.isLetterOrDigit(decl.charAt(i))||
decl.charAt(i)=='_') )
{
stop = i;
break;
}
if ( i==rightEdgeOfDeclarator ) {
stop = i+1;
}
}
// the name is the last ID
attr.name = decl.substring(start,stop);
// the type is the decl minus the ID (could be empty)
attr.type = decl.substring(0,start);
if ( stop<=rightEdgeOfDeclarator ) {
attr.type += decl.substring(stop,rightEdgeOfDeclarator+1);
}
attr.type = attr.type.trim();
if ( attr.type.length()==0 ) {
attr.type = null;
}
attr.decl = decl;
return attr;
}
/** Given an argument list like
*
* x, (*a).foo(21,33), 3.2+1, '\n',
* "a,oo\nick", {bl, "fdkj"eck}, ["cat\n,", x, 43]
*
* convert to a list of attributes. Allow nested square brackets etc...
* Set separatorChar to ';' or ',' or whatever you want.
*/
public static List<String> splitArgumentList(String s, int separatorChar) {
List<String> args = new ArrayList<String>();
_splitArgumentList(s, 0, -1, separatorChar, args);
return args;
}
public static int _splitArgumentList(String actionText,
int start,
int targetChar,
int separatorChar,
List<String> args)
{
if ( actionText==null ) {
return -1;
}
actionText = actionText.replaceAll("//.*\n", "");
int n = actionText.length();
//System.out.println("actionText@"+start+"->"+(char)targetChar+"="+actionText.substring(start,n));
int p = start;
int last = p;
while ( p<n && actionText.charAt(p)!=targetChar ) {
int c = actionText.charAt(p);
switch ( c ) {
case '\'' :
p++;
while ( p<n && actionText.charAt(p)!='\'' ) {
if ( actionText.charAt(p)=='\\' && (p+1)<n &&
actionText.charAt(p+1)=='\'' )
{
p++; // skip escaped quote
}
p++;
}
p++;
break;
case '"' :
p++;
while ( p<n && actionText.charAt(p)!='\"' ) {
if ( actionText.charAt(p)=='\\' && (p+1)<n &&
actionText.charAt(p+1)=='\"' )
{
p++; // skip escaped quote
}
p++;
}
p++;
break;
case '(' :
p = _splitArgumentList(actionText,p+1,')',separatorChar,args);
break;
case '{' :
p = _splitArgumentList(actionText,p+1,'}',separatorChar,args);
break;
case '<' :
if ( actionText.indexOf('>',p+1)>=p ) {
// do we see a matching '>' ahead? if so, hope it's a generic
// and not less followed by expr with greater than
p = _splitArgumentList(actionText,p+1,'>',separatorChar,args);
}
else {
p++; // treat as normal char
}
break;
case '[' :
p = _splitArgumentList(actionText,p+1,']',separatorChar,args);
break;
default :
if ( c==separatorChar && targetChar==-1 ) {
String arg = actionText.substring(last, p);
//System.out.println("arg="+arg);
args.add(arg.trim());
last = p+1;
}
p++;
break;
}
}
if ( targetChar==-1 && p<=n ) {
String arg = actionText.substring(last, p).trim();
//System.out.println("arg="+arg);
if ( arg.length()>0 ) {
args.add(arg.trim());
}
}
p++;
return p;
}
}

View File

@ -0,0 +1,142 @@
package org.antlr.v4.parse;
import org.antlr.codegen.CodeGenerator;
import org.antlr.misc.Utils;
import org.antlr.tool.ErrorManager;
import org.antlr.v4.Tool;
import java.io.*;
import java.util.*;
/** */
public class TokenVocabParser {
Tool tool;
String vocabName;
public TokenVocabParser(Tool tool, String vocabName) {
this.tool = tool;
this.vocabName = vocabName;
}
/** Load a vocab file <vocabName>.tokens and return mapping. */
public Map<String,Integer> load() {
Map<String,Integer> tokens = new LinkedHashMap<String,Integer>();
int maxTokenType = -1;
File fullFile = getImportedVocabFile();
try {
FileReader fr = new FileReader(fullFile);
BufferedReader br = new BufferedReader(fr);
StreamTokenizer tokenizer = new StreamTokenizer(br);
tokenizer.parseNumbers();
tokenizer.wordChars('_', '_');
tokenizer.eolIsSignificant(true);
tokenizer.slashSlashComments(true);
tokenizer.slashStarComments(true);
tokenizer.ordinaryChar('=');
tokenizer.quoteChar('\'');
tokenizer.whitespaceChars(' ',' ');
tokenizer.whitespaceChars('\t','\t');
int lineNum = 1;
int token = tokenizer.nextToken();
while (token != StreamTokenizer.TT_EOF) {
String tokenID;
if ( token == StreamTokenizer.TT_WORD ) {
tokenID = tokenizer.sval;
}
else if ( token == '\'' ) {
tokenID = "'"+tokenizer.sval+"'";
}
else {
ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
vocabName+ CodeGenerator.VOCAB_FILE_EXTENSION,
Utils.integer(lineNum));
while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {;}
token = tokenizer.nextToken();
continue;
}
token = tokenizer.nextToken();
if ( token != '=' ) {
ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
vocabName+CodeGenerator.VOCAB_FILE_EXTENSION,
Utils.integer(lineNum));
while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {;}
token = tokenizer.nextToken();
continue;
}
token = tokenizer.nextToken(); // skip '='
if ( token != StreamTokenizer.TT_NUMBER ) {
ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
vocabName+CodeGenerator.VOCAB_FILE_EXTENSION,
Utils.integer(lineNum));
while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {;}
token = tokenizer.nextToken();
continue;
}
int tokenType = (int)tokenizer.nval;
token = tokenizer.nextToken();
System.out.println("import "+tokenID+"="+tokenType);
tokens.put(tokenID, tokenType);
maxTokenType = Math.max(maxTokenType,tokenType);
lineNum++;
if ( token != StreamTokenizer.TT_EOL ) {
ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
vocabName+CodeGenerator.VOCAB_FILE_EXTENSION,
Utils.integer(lineNum));
while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {;}
token = tokenizer.nextToken();
continue;
}
token = tokenizer.nextToken(); // skip newline
}
br.close();
}
catch (FileNotFoundException fnfe) {
ErrorManager.error(ErrorManager.MSG_CANNOT_FIND_TOKENS_FILE,
fullFile);
}
catch (IOException ioe) {
ErrorManager.error(ErrorManager.MSG_ERROR_READING_TOKENS_FILE,
fullFile,
ioe);
}
catch (Exception e) {
ErrorManager.error(ErrorManager.MSG_ERROR_READING_TOKENS_FILE,
fullFile,
e);
}
return tokens;
}
/** Return a File descriptor for vocab file. Look in library or
* in -o output path. antlr -o foo T.g U.g where U needs T.tokens
* won't work unless we look in foo too. If we do not find the
* file in the lib directory then must assume that the .tokens file
* is going to be generated as part of this build and we have defined
* .tokens files so that they ALWAYS are generated in the base output
* directory, which means the current directory for the command line tool if there
* was no output directory specified.
*/
public File getImportedVocabFile() {
File f = new File(tool.getLibraryDirectory(),
File.separator +
vocabName +
CodeGenerator.VOCAB_FILE_EXTENSION);
if (f.exists()) {
return f;
}
// We did not find the vocab file in the lib directory, so we need
// to look for it in the output directory which is where .tokens
// files are generated (in the base, not relative to the input
// location.)
//
if (tool.haveOutputDir) {
f = new File(tool.getOutputDirectory(), vocabName + CodeGenerator.VOCAB_FILE_EXTENSION);
}
else {
f = new File(vocabName + CodeGenerator.VOCAB_FILE_EXTENSION);
}
return f;
}
}

View File

@ -0,0 +1,47 @@
package org.antlr.v4.parse;
import org.antlr.runtime.*;
import org.antlr.v4.Tool;
import org.antlr.v4.tool.ErrorType;
/** Override error handling for use with ANTLR tool itself; leaves
* nothing in grammar associated with Tool so others can use in IDEs, ...
*/
public class ToolANTLRParser extends ANTLRParser {
public Tool tool;
public ToolANTLRParser(TokenStream input, Tool tool) {
super(input);
this.tool = tool;
}
public void displayRecognitionError(String[] tokenNames,
RecognitionException e)
{
String msg = getParserErrorMessage(this, e);
if ( paraphrases.size()>0 ) {
String paraphrase = (String)paraphrases.peek();
msg = msg+" while "+paraphrase;
}
// List stack = getRuleInvocationStack(e, this.getClass().getName());
// msg += ", rule stack = "+stack;
tool.errMgr.syntaxError(ErrorType.SYNTAX_ERROR, getSourceName(), e.token, e, msg);
}
public String getParserErrorMessage(Parser parser, RecognitionException e) {
String msg = null;
if ( e instanceof NoViableAltException) {
String name = parser.getTokenErrorDisplay(e.token);
msg = name+" came as a complete surprise to me";
}
else if ( e instanceof v4ParserException) {
msg = ((v4ParserException)e).msg;
}
else {
msg = parser.getErrorMessage(e, parser.getTokenNames());
}
return msg;
}
}

View File

@ -0,0 +1,16 @@
package org.antlr.v4.parse;
import org.antlr.runtime.*;
/** */
public class v4ParserException extends RecognitionException {
public String msg;
/** Used for remote debugger deserialization */
public v4ParserException() {;}
public v4ParserException(String msg, IntStream input) {
super(input);
this.msg = msg;
}
}

View File

@ -0,0 +1,29 @@
package org.antlr.v4.tool;
import org.antlr.runtime.Token;
import org.antlr.runtime.tree.Tree;
import java.util.List;
public class ActionAST extends GrammarAST {
// Alt, rule, grammar space
public AttributeResolver resolver;
public List<Token> chunks; // useful for ANTLR IDE developers
/** In which alt does this node live? */
// public Alternative alt;
public ActionAST(GrammarAST node) {
super(node);
this.resolver = ((ActionAST)node).resolver;
this.chunks = ((ActionAST)node).chunks;
}
public ActionAST(Token t) { super(t); }
public ActionAST(int type) { super(type); }
public ActionAST(int type, Token t) { super(type, t); }
@Override
public Tree dupNode() { return new ActionAST(this); }
}

View File

@ -0,0 +1,21 @@
package org.antlr.v4.tool;
import org.antlr.runtime.Token;
import org.antlr.runtime.tree.Tree;
/** An ALT or ALT_REWRITE node (left of ->) */
public class AltAST extends GrammarAST {
public Alternative alt;
public AltAST(GrammarAST node) {
super(node);
this.alt = ((AltAST)node).alt;
}
public AltAST(Token t) { super(t); }
public AltAST(int type) { super(type); }
public AltAST(int type, Token t) { super(type, t); }
@Override
public Tree dupNode() { return new AltAST(this); }
}

View File

@ -0,0 +1,136 @@
package org.antlr.v4.tool;
import org.stringtemplate.v4.misc.MultiMap;
import java.util.ArrayList;
import java.util.List;
/** Record use/def information about an outermost alternative in a subrule
* or rule of a grammar.
*/
public class Alternative implements AttributeResolver {
Rule rule;
public AltAST ast;
// token IDs, string literals in this alt
public MultiMap<String, TerminalAST> tokenRefs = new MultiMap<String, TerminalAST>();
// does not include labels
public MultiMap<String, GrammarAST> tokenRefsInActions = new MultiMap<String, GrammarAST>();
// all rule refs in this alt
public MultiMap<String, GrammarAST> ruleRefs = new MultiMap<String, GrammarAST>();
// does not include labels
public MultiMap<String, GrammarAST> ruleRefsInActions = new MultiMap<String, GrammarAST>();
/** A list of all LabelElementPair attached to tokens like id=ID, ids+=ID */
public MultiMap<String, LabelElementPair> labelDefs = new MultiMap<String, LabelElementPair>();
// track all token, rule, label refs in rewrite (right of ->)
public List<GrammarAST> rewriteElements = new ArrayList<GrammarAST>();
/** Track all executable actions other than named actions like @init
* and catch/finally (not in an alt). Also tracks predicates, rewrite actions.
* We need to examine these actions before code generation so
* that we can detect refs to $rule.attr etc...
*/
public List<ActionAST> actions = new ArrayList<ActionAST>();
public Alternative(Rule r) { this.rule = r; }
public boolean resolvesToToken(String x, ActionAST node) {
if ( tokenRefs.get(x)!=null ) return true;
LabelElementPair anyLabelDef = getAnyLabelDef(x);
if ( anyLabelDef!=null && anyLabelDef.type==LabelType.TOKEN_LABEL ) return true;
return false;
}
// public String getTokenLabel(String x, ActionAST node) {
// LabelElementPair anyLabelDef = getAnyLabelDef(x);
// if ( anyLabelDef!=null ) return anyLabelDef.label.getText();
// if ( tokenRefs.get(x)!=null ) {
//
// }
// LabelElementPair anyLabelDef = getAnyLabelDef(x);
// if ( anyLabelDef!=null && anyLabelDef.type==LabelType.TOKEN_LABEL ) return true;
// return false;
// }
public boolean resolvesToAttributeDict(String x, ActionAST node) {
if ( resolvesToToken(x, node) ) return true;
if ( x.equals(rule.name) ) return true; // $r for action in rule r, $r is a dict
if ( rule!=null && rule.scope!=null ) return true;
if ( rule.g.scopes.get(x)!=null ) return true;
return false;
}
/** $x Attribute: rule arguments, return values, predefined rule prop.
*/
public Attribute resolveToAttribute(String x, ActionAST node) {
return rule.resolveToAttribute(x, node); // reuse that code
}
/** $x.y, x can be surrounding rule, token/rule/label ref. y is visible
* attr in that dictionary. Can't see args on rule refs.
*/
public Attribute resolveToAttribute(String x, String y, ActionAST node) {
if ( rule.name.equals(x) ) { // x is this rule?
return rule.resolveToAttribute(x, y, node);
}
if ( tokenRefs.get(x)!=null ) { // token ref in this alt?
return rule.getPredefinedScope(LabelType.TOKEN_LABEL).get(y);
}
if ( ruleRefs.get(x)!=null ) { // rule ref in this alt?
// look up rule, ask it to resolve y (must be retval or predefined)
return rule.g.getRule(x).resolveRetvalOrProperty(y);
}
LabelElementPair anyLabelDef = getAnyLabelDef(x);
if ( anyLabelDef!=null && anyLabelDef.type==LabelType.RULE_LABEL ) {
return rule.g.getRule(anyLabelDef.element.getText()).resolveRetvalOrProperty(y);
}
else if ( anyLabelDef!=null ) {
return rule.getPredefinedScope(anyLabelDef.type).get(y);
}
return null;
}
public AttributeDict resolveToDynamicScope(String x, ActionAST node) {
Rule r = resolveToRule(x);
if ( r!=null && r.scope !=null ) return r.scope;
return rule.resolveToDynamicScope(x, node);
}
public boolean resolvesToLabel(String x, ActionAST node) {
LabelElementPair anyLabelDef = getAnyLabelDef(x);
return anyLabelDef!=null &&
(anyLabelDef.type==LabelType.TOKEN_LABEL ||
anyLabelDef.type==LabelType.RULE_LABEL);
}
public boolean resolvesToListLabel(String x, ActionAST node) {
LabelElementPair anyLabelDef = getAnyLabelDef(x);
return anyLabelDef!=null &&
(anyLabelDef.type==LabelType.RULE_LIST_LABEL ||
anyLabelDef.type==LabelType.TOKEN_LIST_LABEL);
}
public LabelElementPair getAnyLabelDef(String x) {
List<LabelElementPair> labels = labelDefs.get(x);
if ( labels!=null ) return labels.get(0);
return null;
}
/** x can be ruleref or rule label. */
public Rule resolveToRule(String x) {
if ( ruleRefs.get(x)!=null ) return rule.g.getRule(x);
LabelElementPair anyLabelDef = getAnyLabelDef(x);
if ( anyLabelDef!=null && anyLabelDef.type==LabelType.RULE_LABEL ) {
return rule.g.getRule(anyLabelDef.element.getText());
}
if ( x.equals(rule.name) ) return rule;
return null;
}
}

View File

@ -0,0 +1,37 @@
package org.antlr.v4.tool;
/** Track the names of attributes define in arg lists, return values,
* scope blocks etc...
*/
public class Attribute {
/** The entire declaration such as "String foo;" */
public String decl;
/** The type; might be empty such as for Python which has no static typing */
public String type;
/** The name of the attribute "foo" */
public String name;
/** The optional attribute intialization expression */
public String initValue;
/** Who contains us? */
public AttributeDict dict;
public Attribute() {;}
public Attribute(String name) { this(name,null); }
public Attribute(String name, String decl) {
this.name = name;
this.decl = decl;
}
public String toString() {
if ( initValue!=null ) {
return type+" "+name+"="+initValue;
}
return type+" "+name;
}
}

View File

@ -0,0 +1,86 @@
package org.antlr.v4.tool;
import java.util.*;
/** Track the attributes within retval, arg lists etc...
*
* Each rule has potentially 3 scopes: return values,
* parameters, and an implicitly-named scope (i.e., a scope defined in a rule).
* Implicitly-defined scopes are named after the rule; rules and scopes then
* must live in the same name space--no collisions allowed.
*/
public class AttributeDict {
public String name;
public GrammarAST ast;
public DictType type;
/** All token scopes (token labels) share the same fixed scope of
* of predefined attributes. I keep this out of the runtime.Token
* object to avoid a runtime type leakage.
*/
public static AttributeDict predefinedTokenDict = new AttributeDict(DictType.TOKEN) {{
add(new Attribute("text"));
add(new Attribute("type"));
add(new Attribute("line"));
add(new Attribute("index"));
add(new Attribute("pos"));
add(new Attribute("channel"));
add(new Attribute("tree"));
add(new Attribute("int"));
}};
public static enum DictType {
ARG, RET, TOKEN,
PREDEFINED_RULE, PREDEFINED_TREE_RULE, PREDEFINED_LEXER_RULE,
GLOBAL_SCOPE, // scope symbols { ...}
RULE_SCOPE; // scope { int i; int j; }
}
/** The list of Attribute objects */
public LinkedHashMap<String, Attribute> attributes =
new LinkedHashMap<String, Attribute>();
public AttributeDict() {;}
public AttributeDict(DictType type) { this.type = type; }
public Attribute add(Attribute a) { a.dict = this; return attributes.put(a.name, a); }
public Attribute get(String name) { return attributes.get(name); }
public String getName() {
// if ( isParameterScope ) {
// return name+"_parameter";
// }
// else if ( isReturnScope ) {
// return name+"_return";
// }
return name;
}
public int size() { return attributes==null?0:attributes.size(); }
/** Return the set of keys that collide from
* this and other.
*/
public Set intersection(AttributeDict other) {
if ( other==null || other.size()==0 || size()==0 ) {
return null;
}
Set<String> inter = new HashSet<String>();
Set thisKeys = attributes.keySet();
for (Iterator it = thisKeys.iterator(); it.hasNext();) {
String key = (String) it.next();
if ( other.attributes.get(key)!=null ) {
inter.add(key);
}
}
if ( inter.size()==0 ) {
return null;
}
return inter;
}
public String toString() {
return getName()+":"+attributes;
}
}

View File

@ -0,0 +1,40 @@
package org.antlr.v4.tool;
/** Grammars, rules, and alternatives all have symbols visible to
* actions. To evaluate attr exprs, ask action for its resolver
* then ask resolver to look up various symbols. Depending on the context,
* some symbols are available at some aren't.
*
* Alternative level:
*
* $x Attribute: rule arguments, return values, predefined rule prop.
* AttributeDict: references to tokens and token labels in the
* current alt (including any elements within subrules contained
* in that outermost alt). x can be rule with scope or a global scope.
* List label: x is a token/rule list label.
* $x.y Attribute: x is surrounding rule, rule/token/label ref
* $s::y Attribute: s is any rule with scope or global scope; y is prop within
*
* Rule level:
*
* $x Attribute: rule arguments, return values, predefined rule prop.
* AttributeDict: references to token labels in *any* alt. x can
* be any rule with scope or global scope.
* List label: x is a token/rule list label.
* $x.y Attribute: x is surrounding rule, label ref (in any alts)
* $s::y Attribute: s is any rule with scope or global scope; y is prop within
*
* Grammar level:
*
* $s AttributeDict: s is a global scope
* $s::y Attribute: s is a global scope; y is prop within
*/
public interface AttributeResolver {
public boolean resolvesToListLabel(String x, ActionAST node);
public boolean resolvesToLabel(String x, ActionAST node);
public boolean resolvesToAttributeDict(String x, ActionAST node);
public boolean resolvesToToken(String x, ActionAST node);
public Attribute resolveToAttribute(String x, ActionAST node);
public Attribute resolveToAttribute(String x, String y, ActionAST node);
public AttributeDict resolveToDynamicScope(String x, ActionAST node);
}

View File

@ -0,0 +1,28 @@
package org.antlr.v4.tool;
import org.antlr.runtime.Token;
import org.antlr.runtime.tree.Tree;
import java.util.*;
public class BlockAST extends GrammarASTWithOptions {
// TODO: maybe I need a Subrule object like Rule so these options mov to that?
/** What are the default options for a subrule? */
public static final Map defaultBlockOptions =
new HashMap() {{put("greedy","true");}};
public static final Map defaultLexerBlockOptions =
new HashMap() {{put("greedy","true");}};
public BlockAST(GrammarAST node) {
super(node);
}
public BlockAST(Token t) { super(t); }
public BlockAST(int type) { super(type); }
public BlockAST(int type, Token t) { super(type, t); }
public BlockAST(int type, Token t, String text) { super(type,t,text); }
@Override
public Tree dupNode() { return new BlockAST(this); }
}

View File

@ -1,4 +1,632 @@
package org.antlr.v4.tool;
public class Grammar {
import org.antlr.runtime.*;
import org.antlr.runtime.tree.TreeWizard;
import org.antlr.v4.Tool;
import org.antlr.v4.misc.*;
import org.antlr.v4.parse.*;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.dfa.DFA;
import java.util.*;
public class Grammar implements AttributeResolver {
public static final Set doNotCopyOptionsToLexer =
new HashSet() {
{
add("output"); add("ASTLabelType"); add("superClass");
add("k"); add("backtrack"); add("memoize"); add("rewrite");
}
};
public static Map<String, AttributeDict> grammarAndLabelRefTypeToScope =
new HashMap<String, AttributeDict>() {{
put("lexer:RULE_LABEL", Rule.predefinedLexerRulePropertiesDict);
put("lexer:LEXER_STRING_LABEL", Rule.predefinedLexerRulePropertiesDict);
put("lexer:TOKEN_LABEL", AttributeDict.predefinedTokenDict);
put("parser:RULE_LABEL", Rule.predefinedRulePropertiesDict);
put("parser:TOKEN_LABEL", AttributeDict.predefinedTokenDict);
put("tree:RULE_LABEL", Rule.predefinedTreeRulePropertiesDict);
put("tree:TOKEN_LABEL", AttributeDict.predefinedTokenDict);
put("tree:WILDCARD_TREE_LABEL", AttributeDict.predefinedTokenDict);
put("combined:RULE_LABEL", Rule.predefinedRulePropertiesDict);
put("combined:TOKEN_LABEL", AttributeDict.predefinedTokenDict);
}};
public static final int MIN_CHAR_VALUE = '\u0000';
public static final int MAX_CHAR_VALUE = '\uFFFE';
public String name;
public GrammarRootAST ast;
public String text; // testing only
public String fileName;
/** Was this created from a COMBINED grammar? */
public Grammar implicitLexer;
public Grammar implicitLexerOwner;
/** If we're imported, who imported us? If null, implies grammar is root */
public Grammar parent;
public List<Grammar> importedGrammars;
/** All rules defined in this specific grammar, not imported. Also does
* not include lexical rules if combined.
*/
public OrderedHashMap<String, Rule> rules = new OrderedHashMap<String, Rule>();
int ruleNumber = 1;
/** The ATN that represents the grammar with edges labelled with tokens
* or epsilon. It is more suitable to analysis than an AST representation.
*/
public ATN atn;
public Map<Integer, DFA> decisionDFAs = new HashMap<Integer, DFA>();
public Vector<IntervalSet[]> decisionLOOK;
public Tool tool;
/** Token names and literal tokens like "void" are uniquely indexed.
* with -1 implying EOF. Characters are different; they go from
* -1 (EOF) to \uFFFE. For example, 0 could be a binary byte you
* want to lexer. Labels of DFA/ATN transitions can be both tokens
* and characters. I use negative numbers for bookkeeping labels
* like EPSILON. Char/String literals and token types overlap in the same
* space, however.
*/
int maxTokenType = Token.MIN_TOKEN_TYPE-1;
/** Map token like ID (but not literals like "while") to its token type */
public Map<String, Integer> tokenNameToTypeMap = new LinkedHashMap<String, Integer>();
/** Map token literals like "while" to its token type. It may be that
* WHILE="while"=35, in which case both tokenIDToTypeMap and this
* field will have entries both mapped to 35.
*/
public Map<String, Integer> stringLiteralToTypeMap = new LinkedHashMap<String, Integer>();
/** Reverse index for stringLiteralToTypeMap. Indexed with raw token type.
* 0 is invalid. */
public Vector<String> typeToStringLiteralList = new Vector<String>();
/** Map a token type to its token name. Indexed with raw token type.
* 0 is invalid.
*/
public Vector<String> typeToTokenList = new Vector<String>();
/** Map a name to an action.
* The code generator will use this to fill holes in the output files.
* I track the AST node for the action in case I need the line number
* for errors.
*/
public Map<String,ActionAST> namedActions = new HashMap<String,ActionAST>();
/** Tracks all forced actions in all alternatives of all rules.
* Or if lexer all rules period. Doesn't track sempreds.
* maps tree node to action index.
*/
public LinkedHashMap<ActionAST, Integer> actions = new LinkedHashMap<ActionAST, Integer>();
/** All sempreds found in grammar; maps tree node to sempred index */
public LinkedHashMap<PredAST, Integer> sempreds = new LinkedHashMap<PredAST, Integer>();
public Map<String, AttributeDict> scopes = new LinkedHashMap<String, AttributeDict>();
public static final String AUTO_GENERATED_TOKEN_NAME_PREFIX = "T__";
public Grammar(Tool tool, GrammarRootAST ast) {
if ( ast==null ) throw new IllegalArgumentException("can't pass null tree");
this.tool = tool;
this.ast = ast;
this.name = ((GrammarAST)ast.getChild(0)).getText();
initTokenSymbolTables();
}
/** For testing */
public Grammar(String grammarText) throws org.antlr.runtime.RecognitionException {
this("<string>", grammarText, null);
}
/** For testing */
public Grammar(String grammarText, ANTLRToolListener listener)
throws org.antlr.runtime.RecognitionException
{
this("<string>", grammarText, listener);
}
/** For testing; only builds trees; no sem anal */
public Grammar(String fileName, String grammarText, ANTLRToolListener listener)
throws org.antlr.runtime.RecognitionException
{
this.text = grammarText;
this.fileName = fileName;
this.tool = new Tool();
this.tool.addListener(listener);
org.antlr.runtime.ANTLRStringStream in = new org.antlr.runtime.ANTLRStringStream(grammarText);
in.name = fileName;
ANTLRLexer lexer = new ANTLRLexer(in);
CommonTokenStream tokens = new CommonTokenStream(lexer);
ToolANTLRParser p = new ToolANTLRParser(tokens,tool);
p.setTreeAdaptor(new GrammarASTAdaptor(in));
ParserRuleReturnScope r = p.grammarSpec();
if ( r.getTree() instanceof GrammarRootAST ) {
this.ast = (GrammarRootAST)r.getTree();
this.ast.hasErrors = p.getNumberOfSyntaxErrors()>0;
this.name = ((GrammarAST)ast.getChild(0)).getText();
}
initTokenSymbolTables();
}
protected void initTokenSymbolTables() {
if ( isTreeGrammar() ) {
typeToTokenList.setSize(Token.UP + 1);
typeToTokenList.set(Token.DOWN, "DOWN");
typeToTokenList.set(Token.UP, "UP");
tokenNameToTypeMap.put("DOWN", Token.DOWN);
tokenNameToTypeMap.put("UP", Token.UP);
}
tokenNameToTypeMap.put("EOF", Token.EOF);
}
public void loadImportedGrammars() {
if ( ast==null ) return;
GrammarAST i = (GrammarAST)ast.getFirstChildWithType(ANTLRParser.IMPORT);
if ( i==null ) return;
importedGrammars = new ArrayList<Grammar>();
for (Object c : i.getChildren()) {
GrammarAST t = (GrammarAST)c;
String importedGrammarName = null;
if ( t.getType()==ANTLRParser.ASSIGN ) {
importedGrammarName = t.getChild(1).getText();
System.out.println("import "+ importedGrammarName);
}
else if ( t.getType()==ANTLRParser.ID ) {
importedGrammarName = t.getText();
System.out.println("import "+t.getText());
}
try {
GrammarAST root = tool.load(importedGrammarName+".g");
if ( root instanceof GrammarASTErrorNode ) return; // came back as error node
GrammarRootAST ast = (GrammarRootAST)root;
Grammar g = tool.createGrammar(ast);
g.fileName = importedGrammarName+".g";
g.parent = this;
importedGrammars.add(g);
}
catch (Exception e) {
System.err.println("can't load grammar "+importedGrammarName);
}
}
}
public void defineAction(GrammarAST atAST) {
if ( atAST.getChildCount()==2 ) {
String name = atAST.getChild(0).getText();
namedActions.put(name, (ActionAST)atAST.getChild(1));
}
else {
String scope = atAST.getChild(0).getText();
if ( scope.equals(getTypeString()) ) {
String name = atAST.getChild(1).getText();
namedActions.put(name, (ActionAST)atAST.getChild(2));
}
}
}
public void defineRule(Rule r) {
if ( rules.get(r.name)!=null ) return;
rules.put(r.name, r);
r.index = ruleNumber++;
}
// public int getNumRules() {
// int n = rules.size();
// List<Grammar> imports = getAllImportedGrammars();
// if ( imports!=null ) {
// for (Grammar g : imports) n += g.getNumRules();
// }
// return n;
// }
public Rule getRule(String name) {
Rule r = rules.get(name);
if ( r!=null ) return r;
List<Grammar> imports = getAllImportedGrammars();
if ( imports==null ) return null;
for (Grammar g : imports) {
r = g.rules.get(name);
if ( r!=null ) return r;
}
return null;
}
public Rule getRule(String grammarName, String ruleName) {
if ( grammarName!=null ) { // scope override
Grammar g = getImportedGrammar(grammarName);
if ( g ==null ) {
return null;
}
return g.rules.get(ruleName);
}
return getRule(ruleName);
}
public void defineScope(AttributeDict s) { scopes.put(s.getName(), s); }
/** Get list of all imports from all grammars in the delegate subtree of g.
* The grammars are in import tree preorder. Don't include ourselves
* in list as we're not a delegate of ourselves.
*/
public List<Grammar> getAllImportedGrammars() {
if ( importedGrammars==null ) return null;
List<Grammar> delegates = new ArrayList<Grammar>();
for (int i = 0; i < importedGrammars.size(); i++) {
Grammar d = importedGrammars.get(i);
delegates.add(d);
List<Grammar> ds = d.getAllImportedGrammars();
if ( ds!=null ) delegates.addAll( ds );
}
return delegates;
}
public List<Grammar> getImportedGrammars() { return importedGrammars; }
/** Get delegates below direct delegates of g
public List<Grammar> getIndirectDelegates(Grammar g) {
List<Grammar> direct = getDirectDelegates(g);
List<Grammar> delegates = getDelegates(g);
delegates.removeAll(direct);
return delegates;
}
*/
/** Return list of imported grammars from root down to our parent.
* Order is [root, ..., this.parent]. (us not included).
*/
public List<Grammar> getGrammarAncestors() {
Grammar root = getOutermostGrammar();
if ( this==root ) return null;
List<Grammar> grammars = new ArrayList<Grammar>();
// walk backwards to root, collecting grammars
Grammar p = this.parent;
while ( p!=null ) {
grammars.add(0, p); // add to head so in order later
p = p.parent;
}
return grammars;
}
/** Return the grammar that imported us and our parents. Return this
* if we're root.
*/
public Grammar getOutermostGrammar() {
if ( parent==null ) return this;
return parent.getOutermostGrammar();
}
/** Get the name of the generated recognizer; may or may not be same
* as grammar name.
* Recognizer is TParser and TLexer from T if combined, else
* just use T regardless of grammar type.
*/
public String getRecognizerName() {
String suffix = "";
List<Grammar> grammarsFromRootToMe = getOutermostGrammar().getGrammarAncestors();
String qualifiedName = name;
if ( grammarsFromRootToMe!=null ) {
StringBuffer buf = new StringBuffer();
for (Grammar g : grammarsFromRootToMe) {
buf.append(g.name);
buf.append('_');
}
buf.append(name);
qualifiedName = buf.toString();
}
if ( isCombined() || (isLexer() && implicitLexer!=null) )
{
suffix = Grammar.getGrammarTypeToFileNameSuffix(getType());
}
return qualifiedName+suffix;
}
public String getStringLiteralLexerRuleName(String lit) {
int ttype = getTokenType(lit);
return AUTO_GENERATED_TOKEN_NAME_PREFIX +ttype;
}
/** Return grammar directly imported by this grammar */
public Grammar getImportedGrammar(String name) {
for (Grammar g : importedGrammars) {
if ( g.name.equals(name) ) return g;
}
return null;
}
public int getTokenType(String token) {
Integer I = null;
if ( token.charAt(0)=='\'') {
I = stringLiteralToTypeMap.get(token);
}
else { // must be a label like ID
I = tokenNameToTypeMap.get(token);
}
int i = (I!=null)?I.intValue(): Token.INVALID_TYPE;
//System.out.println("grammar type "+type+" "+tokenName+"->"+i);
return i;
}
/** Given a token type, get a meaningful name for it such as the ID
* or string literal. If this is a lexer and the ttype is in the
* char vocabulary, compute an ANTLR-valid (possibly escaped) char literal.
*/
public String getTokenDisplayName(int ttype) {
String tokenName = null;
int index=0;
// inside any target's char range and is lexer grammar?
if ( isLexer() &&
ttype >= MIN_CHAR_VALUE && ttype <= MAX_CHAR_VALUE )
{
return CharSupport.getANTLRCharLiteralForChar(ttype);
}
else if ( ttype==Token.EOF ) {
tokenName = "EOF";
}
else {
if ( ttype<typeToTokenList.size() ) {
tokenName = typeToTokenList.get(ttype);
if ( tokenName!=null &&
tokenName.startsWith(AUTO_GENERATED_TOKEN_NAME_PREFIX) &&
typeToStringLiteralList.get(ttype)!=null)
{
tokenName = typeToStringLiteralList.get(ttype);
}
}
else {
tokenName = String.valueOf(ttype);
}
}
//System.out.println("getTokenDisplayName ttype="+ttype+", index="+index+", name="+tokenName);
return tokenName;
}
public List<String> getTokenDisplayNames(Collection<Integer> types) {
List<String> names = new ArrayList<String>();
for (int t : types) names.add(getTokenDisplayName(t));
return names;
}
public String[] getTokenNames() {
int numTokens = getMaxTokenType();
String[] tokenNames = new String[numTokens+1];
for (String t : tokenNameToTypeMap.keySet()) {
Integer ttype = tokenNameToTypeMap.get(t);
if ( ttype>0 ) tokenNames[ttype] = t;
}
return tokenNames;
}
public String[] getTokenDisplayNames() {
int numTokens = getMaxTokenType();
String[] tokenNames = new String[numTokens+1];
for (String t : tokenNameToTypeMap.keySet()) {
Integer ttype = tokenNameToTypeMap.get(t);
if ( ttype>0 ) tokenNames[ttype] = t;
}
for (String t : stringLiteralToTypeMap.keySet()) {
Integer ttype = stringLiteralToTypeMap.get(t);
if ( ttype>0 ) tokenNames[ttype] = t;
}
return tokenNames;
}
/** What is the max char value possible for this grammar's target? Use
* unicode max if no target defined.
*/
public int getMaxCharValue() {
return MAX_CHAR_VALUE;
// if ( generator!=null ) {
// return generator.target.getMaxCharValue(generator);
// }
// else {
// return Label.MAX_CHAR_VALUE;
// }
}
/** Return a set of all possible token or char types for this grammar */
public IntSet getTokenTypes() {
if ( isLexer() ) {
return getAllCharValues();
}
return IntervalSet.of(Token.MIN_TOKEN_TYPE, getMaxTokenType());
}
/** Return min to max char as defined by the target.
* If no target, use max unicode char value.
*/
public IntSet getAllCharValues() {
return IntervalSet.of(MIN_CHAR_VALUE, getMaxCharValue());
}
/** How many token types have been allocated so far? */
public int getMaxTokenType() {
return typeToTokenList.size() - 1; // don't count 0 (invalid)
}
/** Return a new unique integer in the token type space */
public int getNewTokenType() {
maxTokenType++;
return maxTokenType;
}
public void importVocab(Grammar g) {
this.tokenNameToTypeMap.putAll( g.tokenNameToTypeMap );
this.stringLiteralToTypeMap.putAll( g.stringLiteralToTypeMap );
int max = Math.max(this.typeToTokenList.size(), g.typeToTokenList.size());
this.typeToTokenList.setSize(max);
for (int ttype=0; ttype<g.typeToTokenList.size(); ttype++) {
maxTokenType = Math.max(maxTokenType, ttype);
this.typeToTokenList.set(ttype, g.typeToTokenList.get(ttype));
}
}
public int defineTokenName(String name) {
return defineTokenName(name, getNewTokenType());
}
public int defineTokenName(String name, int ttype) {
Integer prev = tokenNameToTypeMap.get(name);
if ( prev!=null ) return prev;
tokenNameToTypeMap.put(name, ttype);
setTokenForType(ttype, name);
maxTokenType = Math.max(maxTokenType, ttype);
return ttype;
}
public int defineStringLiteral(String lit) {
return defineStringLiteral(lit, getNewTokenType());
}
public int defineStringLiteral(String lit, int ttype) {
if ( !stringLiteralToTypeMap.containsKey(lit) ) {
stringLiteralToTypeMap.put(lit, ttype);
// track in reverse index too
if ( ttype>=typeToStringLiteralList.size() ) {
typeToStringLiteralList.setSize(ttype+1);
}
typeToStringLiteralList.set(ttype, text);
setTokenForType(ttype, lit);
return ttype;
}
return Token.INVALID_TYPE;
}
public int defineTokenAlias(String name, String lit) {
int ttype = defineTokenName(name);
stringLiteralToTypeMap.put(lit, ttype);
setTokenForType(ttype, name);
return ttype;
}
public void setTokenForType(int ttype, String text) {
if ( ttype>=typeToTokenList.size() ) {
typeToTokenList.setSize(ttype+1);
}
String prevToken = typeToTokenList.get(ttype);
if ( prevToken==null || prevToken.charAt(0)=='\'' ) {
// only record if nothing there before or if thing before was a literal
typeToTokenList.set(ttype, text);
}
}
// no isolated attr at grammar action level
public Attribute resolveToAttribute(String x, ActionAST node) {
return null;
}
// no $x.y makes sense here
public Attribute resolveToAttribute(String x, String y, ActionAST node) {
return null;
}
public AttributeDict resolveToDynamicScope(String x, ActionAST node) {
return scopes.get(x);
}
public boolean resolvesToLabel(String x, ActionAST node) { return false; }
public boolean resolvesToListLabel(String x, ActionAST node) { return false; }
public boolean resolvesToToken(String x, ActionAST node) { return false; }
public boolean resolvesToAttributeDict(String x, ActionAST node) {
return scopes.get(x)!=null;
}
/** Given a grammar type, what should be the default action scope?
* If I say @members in a COMBINED grammar, for example, the
* default scope should be "parser".
*/
public String getDefaultActionScope() {
switch ( getType() ) {
case ANTLRParser.LEXER :
return "lexer";
case ANTLRParser.PARSER :
case ANTLRParser.COMBINED :
return "parser";
case ANTLRParser.TREE :
return "treeparser";
}
return null;
}
public int getType() {
if ( ast!=null ) return ast.grammarType;
return 0;
}
public boolean isLexer() { return getType()==ANTLRParser.LEXER; }
public boolean isParser() { return getType()==ANTLRParser.PARSER; }
public boolean isTreeGrammar() { return getType()==ANTLRParser.TREE; }
public boolean isCombined() { return getType()==ANTLRParser.COMBINED; }
public String getTypeString() {
if ( ast==null ) return null;
return ANTLRParser.tokenNames[getType()].toLowerCase();
}
public static String getGrammarTypeToFileNameSuffix(int type) {
switch ( type ) {
case ANTLRParser.LEXER : return "Lexer";
case ANTLRParser.PARSER : return "Parser";
case ANTLRParser.TREE : return "";
// if combined grammar, gen Parser and Lexer will be done later
// TODO: we are separate now right?
case ANTLRParser.COMBINED : return "Parser";
default :
return "<invalid>";
}
}
public String getOption(String key) {
if ( ast.options==null ) return null;
return ast.options.get(key);
}
public String getOption(String key, String defaultValue) {
if ( ast.options==null ) return defaultValue;
String v = ast.options.get(key);
if ( v!=null ) return v;
return defaultValue;
}
public static Map<String,String> getStringLiteralAliasesFromLexerRules(GrammarRootAST ast) {
GrammarAST combinedRulesRoot =
(GrammarAST)ast.getFirstChildWithType(ANTLRParser.RULES);
if ( combinedRulesRoot==null ) return null;
List<GrammarASTWithOptions> ruleNodes = combinedRulesRoot.getChildren();
if ( ruleNodes==null || ruleNodes.size()==0 ) return null;
GrammarASTAdaptor adaptor = new GrammarASTAdaptor(ruleNodes.get(0).token.getInputStream());
TreeWizard wiz = new TreeWizard(adaptor,ANTLRParser.tokenNames);
Map<String,String> lexerRuleToStringLiteral = new HashMap<String,String>();
for (GrammarASTWithOptions r : ruleNodes) {
String ruleName = r.getChild(0).getText();
if ( Character.isUpperCase(ruleName.charAt(0)) ) {
Map nodes = new HashMap();
boolean isLitRule =
wiz.parse(r, "(RULE %name:ID (BLOCK (ALT %lit:STRING_LITERAL)))", nodes);
if ( isLitRule ) {
GrammarAST litNode = (GrammarAST)nodes.get("lit");
GrammarAST nameNode = (GrammarAST)nodes.get("name");
lexerRuleToStringLiteral.put(litNode.getText(), nameNode.getText());
}
}
}
return lexerRuleToStringLiteral;
}
public void setLookaheadDFA(int decision, DFA lookaheadDFA) {
decisionDFAs.put(Utils.integer(decision), lookaheadDFA);
}
}

View File

@ -0,0 +1,99 @@
package org.antlr.v4.tool;
import org.antlr.runtime.*;
import org.antlr.runtime.tree.*;
import org.antlr.v4.misc.IntervalSet;
import org.antlr.v4.parse.ANTLRParser;
import org.antlr.v4.runtime.atn.ATNState;
import java.util.*;
public class GrammarAST extends CommonTree {
/** If we build an ATN, we make AST node point at left edge of ATN construct */
public ATNState atnState;
public GrammarAST() {;}
public GrammarAST(Token t) { super(t); }
public GrammarAST(GrammarAST node) { super(node); }
public GrammarAST(int type) { super(new CommonToken(type, ANTLRParser.tokenNames[type])); }
public GrammarAST(int type, Token t) {
this(new CommonToken(type, t.getText()));
token.setInputStream(t.getInputStream());
token.setLine(t.getLine());
token.setCharPositionInLine(t.getCharPositionInLine());
}
public GrammarAST(int type, Token t, String text) {
this(new CommonToken(type, text));
token.setInputStream(t.getInputStream());
token.setLine(t.getLine());
token.setCharPositionInLine(t.getCharPositionInLine());
}
public List<GrammarAST> getNodesWithType(int ttype) {
return getNodesWithType(IntervalSet.of(ttype));
}
public List<GrammarAST> getNodesWithType(IntervalSet types) {
List<GrammarAST> nodes = new ArrayList<GrammarAST>();
List<GrammarAST> work = new LinkedList<GrammarAST>();
work.add(this);
GrammarAST t = null;
while ( work.size()>0 ) {
t = work.remove(0);
if ( types.member(t.getType()) ) nodes.add(t);
if ( t.children!=null ) work.addAll(t.children);
}
return nodes;
}
public AltAST getOutermostAltNode() {
if ( this instanceof AltAST && parent.parent instanceof RuleAST ) {
return (AltAST)this;
}
if ( parent!=null ) return ((GrammarAST)parent).getOutermostAltNode();
return null;
}
// TODO: move to basetree when i settle on how runtime works
// TODO: don't include this node!!
// TODO: reuse other method
public CommonTree getFirstDescendantWithType(int type) {
if ( getType()==type ) return this;
if ( children==null ) return null;
for (Object c : children) {
GrammarAST t = (GrammarAST)c;
if ( t.getType()==type ) return t;
CommonTree d = t.getFirstDescendantWithType(type);
if ( d!=null ) return d;
}
return null;
}
// TODO: don't include this node!!
public CommonTree getFirstDescendantWithType(org.antlr.runtime.BitSet types) {
if ( types.member(getType()) ) return this;
if ( children==null ) return null;
for (Object c : children) {
GrammarAST t = (GrammarAST)c;
if ( types.member(t.getType()) ) return t;
CommonTree d = t.getFirstDescendantWithType(types);
if ( d!=null ) return d;
}
return null;
}
// @Override
// public boolean equals(Object obj) {
// return super.equals(obj);
// }
@Override
public Tree dupNode() {
return new GrammarAST(this);
}
@Override
public String toString() {
return super.toString();
}
}

View File

@ -0,0 +1,33 @@
package org.antlr.v4.tool;
import org.antlr.runtime.Token;
import org.antlr.runtime.tree.Tree;
import java.util.*;
public class GrammarRootAST extends GrammarASTWithOptions {
public static final Map defaultOptions =
new HashMap() {
{
put("language","Java");
}
};
public int grammarType; // LEXER, PARSER, TREE, GRAMMAR (combined)
public boolean hasErrors;
public GrammarRootAST(GrammarAST node) {
super(node);
this.grammarType = ((GrammarRootAST)node).grammarType;
this.hasErrors = ((GrammarRootAST)node).hasErrors;
}
@Override
public Tree dupNode() { return new GrammarRootAST(this); }
public GrammarRootAST(int type) { super(type); }
public GrammarRootAST(Token t) { super(t); }
public GrammarRootAST(int type, Token t) { super(type, t); }
public GrammarRootAST(int type, Token t, String text) {
super(type,t,text);
}
}

View File

@ -0,0 +1,48 @@
package org.antlr.v4.tool;
import org.antlr.runtime.BitSet;
import org.antlr.v4.parse.ANTLRParser;
public class LabelElementPair {
public static final BitSet tokenTypeForTokens = new BitSet();
static {
tokenTypeForTokens.add(ANTLRParser.TOKEN_REF);
tokenTypeForTokens.add(ANTLRParser.STRING_LITERAL);
tokenTypeForTokens.add(ANTLRParser.WILDCARD);
}
public GrammarAST label;
public GrammarAST element;
public LabelType type;
public LabelElementPair(Grammar g, GrammarAST label, GrammarAST element, int labelOp) {
this.label = label;
this.element = element;
// compute general case for label type
if ( element.getFirstDescendantWithType(tokenTypeForTokens)!=null ) {
if ( labelOp==ANTLRParser.ASSIGN ) type = LabelType.TOKEN_LABEL;
else type = LabelType.TOKEN_LIST_LABEL;
}
else if ( element.getFirstDescendantWithType(ANTLRParser.RULE_REF)!=null ) {
if ( labelOp==ANTLRParser.ASSIGN ) type = LabelType.RULE_LABEL;
else type = LabelType.RULE_LIST_LABEL;
}
// now reset if lexer and string
if ( g.isLexer() ) {
if ( element.getFirstDescendantWithType(ANTLRParser.STRING_LITERAL)!=null ) {
if ( labelOp==ANTLRParser.ASSIGN ) type = LabelType.LEXER_STRING_LABEL;
}
}
else if ( g.isTreeGrammar() ) {
if ( element.getFirstDescendantWithType(ANTLRParser.WILDCARD)!=null ) {
if ( labelOp==ANTLRParser.ASSIGN ) type = LabelType.WILDCARD_TREE_LABEL;
else type = LabelType.WILDCARD_TREE_LIST_LABEL;
}
}
}
public String toString() {
return label.getText()+" "+type+" "+element.toString();
}
}

View File

@ -0,0 +1,15 @@
package org.antlr.v4.tool;
/** the various kinds of labels. t=type, id=ID, types+=type ids+=ID */
public enum LabelType {
RULE_LABEL,
TOKEN_LABEL,
RULE_LIST_LABEL,
TOKEN_LIST_LABEL,
LEXER_STRING_LABEL, // used in lexer for x='a'
SUBRULE_LABEL, // x=(...)
SUBRULE_LIST_LABEL, // x+=(...)
WILDCARD_TREE_LABEL, // Used in tree grammar x=.
WILDCARD_TREE_LIST_LABEL // Used in tree grammar x+=.
;
}

View File

@ -0,0 +1,36 @@
package org.antlr.v4.tool;
import org.antlr.misc.MultiMap;
import org.antlr.runtime.RecognitionException;
import org.antlr.tool.Rule;
import org.antlr.v4.Tool;
/** */
public class LexerGrammar extends Grammar {
public static final String DEFAULT_MODE_NAME = "DEFAULT_MODE";
public MultiMap<String, Rule> modes = new MultiMap<String, Rule>();
public LexerGrammar(Tool tool, GrammarRootAST ast) {
super(tool, ast);
}
public LexerGrammar(String grammarText) throws RecognitionException {
super(grammarText);
}
public LexerGrammar(String grammarText, ANTLRToolListener listener) throws RecognitionException {
super(grammarText, listener);
}
public LexerGrammar(String fileName, String grammarText, ANTLRToolListener listener) throws RecognitionException {
super(fileName, grammarText, listener);
}
@Override
public void defineRule(Rule r) {
super.defineRule(r);
modes.map(r.mode, r);
}
}

View File

@ -0,0 +1,15 @@
package org.antlr.v4.tool;
import org.antlr.runtime.Token;
public class PredAST extends ActionAST {
public PredAST(GrammarAST node) {
super(node);
this.resolver = ((ActionAST)node).resolver;
this.chunks = ((ActionAST)node).chunks;
}
public PredAST(Token t) { super(t); }
public PredAST(int type) { super(type); }
public PredAST(int type, Token t) { super(type, t); }
}

View File

@ -0,0 +1,246 @@
package org.antlr.v4.tool;
import org.antlr.runtime.Token;
import org.antlr.v4.parse.ANTLRParser;
import org.stringtemplate.v4.misc.MultiMap;
import java.util.*;
public class Rule implements AttributeResolver {
/** Rule refs have a predefined set of attributes as well as
* the return values and args.
*/
public static AttributeDict predefinedRulePropertiesDict =
new AttributeDict(AttributeDict.DictType.PREDEFINED_RULE) {{
add(new Attribute("text"));
add(new Attribute("start"));
add(new Attribute("stop"));
add(new Attribute("tree"));
add(new Attribute("st"));
}};
public static AttributeDict predefinedTreeRulePropertiesDict =
new AttributeDict(AttributeDict.DictType.PREDEFINED_TREE_RULE) {{
add(new Attribute("text"));
add(new Attribute("start")); // note: no stop; not meaningful
add(new Attribute("tree"));
add(new Attribute("st"));
}};
public static AttributeDict predefinedLexerRulePropertiesDict =
new AttributeDict(AttributeDict.DictType.PREDEFINED_LEXER_RULE) {{
add(new Attribute("text"));
add(new Attribute("type"));
add(new Attribute("line"));
add(new Attribute("index"));
add(new Attribute("pos"));
add(new Attribute("channel"));
add(new Attribute("start"));
add(new Attribute("stop"));
add(new Attribute("int"));
}};
public String name;
public List<GrammarAST> modifiers;
public RuleAST ast;
public AttributeDict args;
public AttributeDict retvals;
public AttributeDict scope; // scope { int i; }
/** A list of scope names used by this rule */
public List<Token> useScopes;
public Grammar g;
/** If we're in a lexer grammar, we might be in a mode */
public String mode;
/** Map a name to an action for this rule like @init {...}.
* The code generator will use this to fill holes in the rule template.
* I track the AST node for the action in case I need the line number
* for errors.
*/
public Map<String, ActionAST> namedActions =
new HashMap<String, ActionAST>();
/** Track exception handler actions (exception type is prev child);
* don't track finally action
*/
public List<ActionAST> exceptionActions = new ArrayList<ActionAST>();
public ActionAST finallyAction;
public int numberOfAlts;
public boolean isStartRule = true; // nobody calls us
public Alternative[] alt;
/** All rules have unique index 1..n */
public int index;
public int actionIndex; // if lexer
public Rule(Grammar g, String name, RuleAST ast, int numberOfAlts) {
this.g = g;
this.name = name;
this.ast = ast;
this.numberOfAlts = numberOfAlts;
alt = new Alternative[numberOfAlts+1]; // 1..n
for (int i=1; i<=numberOfAlts; i++) alt[i] = new Alternative(this);
}
public void defineActionInAlt(int currentAlt, ActionAST actionAST) {
alt[currentAlt].actions.add(actionAST);
if ( g.isLexer() || actionAST.getType()== ANTLRParser.FORCED_ACTION ) {
actionIndex = g.actions.size() + 1;
g.actions.put(actionAST, actionIndex);
}
}
public void definePredicateInAlt(int currentAlt, PredAST predAST) {
alt[currentAlt].actions.add(predAST);
g.sempreds.put(predAST, g.sempreds.size() + 1);
}
public Attribute resolveRetvalOrProperty(String y) {
if ( retvals!=null ) {
Attribute a = retvals.get(y);
if ( a!=null ) return a;
}
AttributeDict d = getPredefinedScope(LabelType.RULE_LABEL);
return d.get(y);
}
public Set<String> getTokenRefs() {
Set<String> refs = new HashSet<String>();
for (int i=1; i<=numberOfAlts; i++) {
refs.addAll(alt[i].tokenRefs.keySet());
}
return refs;
}
public Set<String> getLabelNames() {
Set<String> refs = new HashSet<String>();
for (int i=1; i<=numberOfAlts; i++) {
refs.addAll(alt[i].labelDefs.keySet());
}
return refs;
}
// TODO: called frequently; make it more efficient
public MultiMap<String, LabelElementPair> getLabelDefs() {
MultiMap<String, LabelElementPair> defs =
new MultiMap<String, LabelElementPair>();
for (int i=1; i<=numberOfAlts; i++) {
for (List<LabelElementPair> pairs : alt[i].labelDefs.values()) {
for (LabelElementPair p : pairs) {
defs.map(p.label.getText(), p);
}
}
}
return defs;
}
/** $x Attribute: rule arguments, return values, predefined rule prop.
*/
public Attribute resolveToAttribute(String x, ActionAST node) {
if ( args!=null ) {
Attribute a = args.get(x); if ( a!=null ) return a;
}
if ( retvals!=null ) {
Attribute a = retvals.get(x); if ( a!=null ) return a;
}
AttributeDict properties = getPredefinedScope(LabelType.RULE_LABEL);
return properties.get(x);
}
/** $x.y Attribute: x is surrounding rule, label ref (in any alts) */
public Attribute resolveToAttribute(String x, String y, ActionAST node) {
if ( this.name.equals(x) ) { // x is this rule?
return resolveToAttribute(y, node);
}
LabelElementPair anyLabelDef = getAnyLabelDef(x);
if ( anyLabelDef!=null ) {
if ( anyLabelDef.type==LabelType.RULE_LABEL ) {
return g.getRule(anyLabelDef.element.getText()).resolveRetvalOrProperty(y);
}
else {
return getPredefinedScope(anyLabelDef.type).get(y);
}
}
return null;
}
public AttributeDict resolveToDynamicScope(String x, ActionAST node) {
Rule r = resolveToRule(x);
if ( r!=null && r.scope!=null ) return r.scope;
return g.scopes.get(x);
}
public boolean resolvesToLabel(String x, ActionAST node) {
return false;
}
public boolean resolvesToListLabel(String x, ActionAST node) {
LabelElementPair anyLabelDef = getAnyLabelDef(x);
return anyLabelDef!=null &&
(anyLabelDef.type==LabelType.RULE_LIST_LABEL ||
anyLabelDef.type==LabelType.TOKEN_LIST_LABEL);
}
public boolean resolvesToToken(String x, ActionAST node) {
LabelElementPair anyLabelDef = getAnyLabelDef(x);
if ( anyLabelDef!=null && anyLabelDef.type==LabelType.TOKEN_LABEL ) return true;
return false;
}
public boolean resolvesToAttributeDict(String x, ActionAST node) {
if ( resolvesToToken(x, node) ) return true;
if ( x.equals(name) ) return true; // $r for action in rule r, $r is a dict
if ( scope!=null ) return true;
if ( g.scopes.get(x)!=null ) return true;
return false;
}
public Rule resolveToRule(String x) {
if ( x.equals(this.name) ) return this;
LabelElementPair anyLabelDef = getAnyLabelDef(x);
if ( anyLabelDef!=null && anyLabelDef.type==LabelType.RULE_LABEL ) {
return g.getRule(anyLabelDef.element.getText());
}
return g.getRule(x);
}
public LabelElementPair getAnyLabelDef(String x) {
List<LabelElementPair> labels = getLabelDefs().get(x);
if ( labels!=null ) return labels.get(0);
return null;
}
public AttributeDict getPredefinedScope(LabelType ltype) {
String grammarLabelKey = g.getTypeString() + ":" + ltype;
return Grammar.grammarAndLabelRefTypeToScope.get(grammarLabelKey);
}
public boolean isFragment() {
if ( modifiers==null ) return false;
for (GrammarAST a : modifiers) {
if ( a.getText().equals("fragment") ) return true;
}
return false;
}
@Override
public String toString() {
StringBuilder buf = new StringBuilder();
buf.append("Rule{name="+name);
if ( args!=null ) buf.append(", args=" + args);
if ( retvals!=null ) buf.append(", retvals=" + retvals);
if ( scope!=null ) buf.append(", scope=" + scope);
buf.append("}");
return buf.toString();
}
}

View File

@ -0,0 +1,29 @@
package org.antlr.v4.tool;
import org.antlr.runtime.Token;
import org.antlr.runtime.tree.Tree;
import org.antlr.v4.parse.ANTLRParser;
public class RuleAST extends GrammarASTWithOptions {
public RuleAST(GrammarAST node) {
super(node);
}
public RuleAST(Token t) { super(t); }
public RuleAST(int type) { super(type); }
@Override
public Tree dupNode() { return new RuleAST(this); }
public ActionAST getLexerAction() {
Tree blk = getFirstChildWithType(ANTLRParser.BLOCK);
if ( blk.getChildCount()==1 ) {
Tree onlyAlt = blk.getChild(0);
Tree lastChild = onlyAlt.getChild(onlyAlt.getChildCount()-1);
if ( lastChild.getType()==ANTLRParser.ACTION ) {
return (ActionAST)lastChild;
}
}
return null;
}
}

View File

@ -0,0 +1,19 @@
package org.antlr.v4.tool;
import org.antlr.runtime.Token;
import org.antlr.runtime.tree.Tree;
public class TerminalAST extends GrammarASTWithOptions {
public static final String defaultTokenOption = "node";
public TerminalAST(GrammarAST node) {
super(node);
}
public TerminalAST(Token t) { super(t); }
public TerminalAST(int type) { super(type); }
public TerminalAST(int type, Token t) { super(type, t); }
@Override
public Tree dupNode() { return new TerminalAST(this); }
}