Merge pull request #352 from parrt/xpath

Add XPath to collect parse tree nodes/subtrees.
2013-11-13 16:22:37 -08:00 · 2013-11-13 16:22:37 -08:00 · a3d71db1bc
parent b80ad5052d e58045474a
commit a3d71db1bc
21 changed files with 973 additions and 14 deletions
--- a/.gitignore
+++ b/.gitignore
@ -27,4 +27,4 @@ nbactions*.xml
 *.hprof

 # Playground
-/tool/playground/
+#/tool/playground/
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -1,5 +1,45 @@
 ANTLR v4 Honey Badger

+November 13, 2013
+
+* move getChildren() from Tree into Trees (to avoid breaking change)
+* Notation:
+	/prog/func,         -> all funcs under prog at root
+	/prog/*,            -> all children of prog at root
+	/*/func,            -> all func kids of any root node
+	prog,               -> prog must be root node
+	/prog,              -> prog must be root node
+	/*,                 -> any root
+	*,                  -> any root
+	//ID,               -> any ID in tree
+	//expr/primary/ID,  -> any ID child of a primary under any expr
+	//body//ID,         -> any ID under a body
+	//'return',         -> any 'return' literal in tree
+	//primary/*,        -> all kids of any primary
+	//func/*/stat,      -> all stat nodes grandkids of any func node
+	/prog/func/'def',   -> all def literal kids of func kid of prog
+	//stat/';',         -> all ';' under any stat node
+	//expr/primary/!ID, -> anything but ID under primary under any expr node
+	//expr/!primary,    -> anything but primary under any expr node
+	//!*,               -> nothing anywhere
+	/!*,                -> nothing at root
+
+September 16, 2013
+
+* Updated build.xml to support v4 grammars in v4 itself; compiles XPathLexer.g4
+* Add to XPath:
+	Collection<ParseTree> findAll(String xpath);
+
+September 11, 2013
+
+* Add ! operator to XPath
+* Use ANTLR v4 XPathLexer.g4 not regex
+* Copy lots of find node stuff from v3 GrammarAST to Trees class in runtime.
+
+September 10, 2013
+
+* Adding in XPath stuff.
+
 August 31, 2013

 * Lots of little fixes thanks to Coverity Scan
--- a/build.xml
+++ b/build.xml
@ -19,6 +19,18 @@
        <property name="antlr3.touch" value="${build.dir}/antlr3-${antlr3.version}.touch"/>
    </target>

+    <target name="antlr4-init" depends="basic-init">
+        <property name="antlr4.version" value="4.1"/>
+        <property name="antlr4.jar.name" value="antlr-${antlr4.version}-complete.jar"/>
+        <property name="antlr4.jar" value="${lib.dir}/${antlr4.jar.name}"/>
+        <mkdir dir="${lib.dir}"/>
+        <get src="http://antlr.org/download/${antlr4.jar.name}" dest="${antlr4.jar}" skipexisting="true"/>
+        <path id="cp.antlr4" path="${antlr4.jar}"/>
+
+        <property name="build.antlr4.dir" value="${build.dir}/generated-sources/antlr4" />
+        <property name="antlr4.touch" value="${build.dir}/antlr4-${antlr4.version}.touch"/>
+    </target>
+
    <target name="build-init" depends="basic-init">
        <property name="version" value="4.1.1-dev"/>
        <property name="build.sysclasspath" value="ignore"/>
@ -45,7 +57,17 @@
        </uptodate>
    </target>

-    <target name="up-to-date" depends="antlr3-up-to-date,build-init">
+    <target name="antlr4-up-to-date" depends="basic-init,antlr4-init">
+        <uptodate targetfile="${antlr4.touch}" property="is.antlr4.uptodate">
+            <srcfiles dir="${basedir}/tool/src">
+                <include name="**/*.g4"/>
+                <include name="**/*.tokens"/>
+            </srcfiles>
+            <srcfiles file="${antlr4.jar}"/>
+        </uptodate>
+    </target>
+
+    <target name="up-to-date" depends="antlr3-up-to-date,antlr4-up-to-date,build-init">
        <uptodate targetfile="${jar.file}" property="is.source.uptodate">
            <srcfiles dir="${basedir}/tool/src">
                <include name="**/*.java"/>
@ -102,6 +124,33 @@
        </sequential>
    </macrodef>

+    <macrodef name="antlr4">
+        <attribute name="srcpath"/>
+        <element name="args" optional="true"/>
+        <sequential>
+            <local name="path.antlr4.local"/>
+            <local name="sources.antlr4.local"/>
+            <path id="path.antlr4.local">
+                <fileset dir="${basedir}/runtime/Java/src/@{srcpath}" includes="*.g4"/>
+            </path>
+            <pathconvert pathsep=" " property="sources.antlr4.local" refid="path.antlr4.local">
+                <map from="${basedir}/runtime/Java/src/@{srcpath}/" to=""/>
+            </pathconvert>
+            <mkdir dir="${build.antlr4.dir}/@{srcpath}"/>
+            <java classname="org.antlr.v4.Tool" fork="true" failonerror="true" maxmemory="300m"
+                  dir="${basedir}/runtime/Java/src/@{srcpath}">
+                <arg value="-o"/>
+                <arg value="${build.antlr4.dir}/@{srcpath}"/>
+                <args/>
+                <arg line="${sources.antlr4.local}"/>
+                <classpath>
+                    <path refid="cp.antlr4"/>
+                    <pathelement location="${java.class.path}"/>
+                </classpath>
+            </java>
+        </sequential>
+    </macrodef>
+
 	<target name="antlr3" depends="build-init,antlr3-init,antlr3-up-to-date" unless="is.antlr3.uptodate">
        <mkdir dir="${build.antlr3.dir}" />

@ -125,7 +174,28 @@
        <touch file="${antlr3.touch}" mkdirs="true"/>
 	</target>

-    <target name="compile" depends="build-init,antlr3,up-to-date" description="Compile for generic OS" unless="is.jar.uptodate">
+    <target name="antlr4" depends="build-init,antlr4-init,antlr4-up-to-date" unless="is.antlr4.uptodate">
+           <mkdir dir="${build.antlr4.dir}" />
+
+           <path id="sources.antlr4">
+               <fileset dir="${basedir}/runtime/Java/src" includes="**/*.g4"/>
+           </path>
+           <pathconvert pathsep="${line.separator}  " property="echo.sources.antlr4" refid="sources.antlr4">
+               <map from="${basedir}/runtime/Java/src/" to=""/>
+           </pathconvert>
+           <echo message="Generating ANTLR 4 grammars:${line.separator}  ${echo.sources.antlr4}"/>
+
+           <antlr4 srcpath="org/antlr/v4/runtime/tree/xpath">
+                <args>
+                    <arg value="-package"/>
+                    <arg value="org.antlr.v4.runtime.tree.xpath"/>
+                </args>
+           </antlr4>
+
+           <touch file="${antlr4.touch}" mkdirs="true"/>
+   	</target>
+
+    <target name="compile" depends="build-init,antlr3,antlr4,up-to-date" description="Compile for generic OS" unless="is.jar.uptodate">
        <mkdir dir="${build.dir}/classes"/>
        <javac
                destdir="${build.dir}/classes"
@ -139,7 +209,7 @@
                <path refid="cp.antlr3"/>
                <pathelement location="${basedir}/runtime/Java/lib/org.abego.treelayout.core.jar"/>
            </classpath>
-            <src path="${basedir}/tool/src:${basedir}/runtime/Java/src:${build.antlr3.dir}"/>
+            <src path="${basedir}/tool/src:${basedir}/runtime/Java/src:${build.antlr3.dir}:${build.antlr4.dir}"/>
        </javac>
    </target>

--- a/runtime/Java/src/org/antlr/v4/runtime/RuleContext.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/RuleContext.java
@ -38,7 +38,7 @@ import org.antlr.v4.runtime.tree.Trees;
 import org.antlr.v4.runtime.tree.gui.TreeViewer;

 import javax.print.PrintException;
-import javax.swing.JDialog;
+import javax.swing.*;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.List;
@ -59,7 +59,7 @@ import java.util.concurrent.Future;
 *  getting error information.
 *
 *  These objects are used during parsing and prediction.
- *  For the special case of parsers and tree parsers, we use the subclass
+ *  For the special case of parsers, we use the subclass
 *  ParserRuleContext.
 *
 *  @see ParserRuleContext
--- a/runtime/Java/src/org/antlr/v4/runtime/misc/Utils.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/misc/Utils.java
@ -38,7 +38,9 @@ import java.io.FileWriter;
 import java.io.IOException;
 import java.io.Writer;
 import java.util.Collection;
+import java.util.HashMap;
 import java.util.Iterator;
+import java.util.Map;

 public class Utils {
    // Seriously: why isn't this built in to java? ugh!
@ -133,4 +135,15 @@ public class Utils {

 		t.join();
 	}
+
+	/** Convert array of strings to string->index map. Useful for
+	 *  converting rulenames to name->ruleindex map.
+	 */
+	public static Map<String, Integer> toMap(String[] keys) {
+		Map<String, Integer> m = new HashMap<String, Integer>();
+		for (int i=0; i<keys.length; i++) {
+			m.put(keys[i], i);
+		}
+		return m;
+	}
 }
--- a/runtime/Java/src/org/antlr/v4/runtime/tree/ParseTree.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/tree/ParseTree.java
@ -49,16 +49,16 @@ public interface ParseTree extends SyntaxTree {
 	ParseTree getChild(int i);

 	/** The {@link ParseTreeVisitor} needs a double dispatch method. */
-	public <T> T accept(ParseTreeVisitor<? extends T> visitor);
+	<T> T accept(ParseTreeVisitor<? extends T> visitor);

 	/** Return the combined text of all leaf nodes. Does not get any
 	 *  off-channel tokens (if any) so won't return whitespace and
 	 *  comments if they are sent to parser on hidden channel.
 	 */
-	public String getText();
+	String getText();

 	/** Specialize toStringTree so that it can print out more information
 	 * 	based upon the parser.
 	 */
-	public String toStringTree(Parser parser);
+	String toStringTree(Parser parser);
 }
--- a/runtime/Java/src/org/antlr/v4/runtime/tree/TerminalNodeImpl.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/tree/TerminalNodeImpl.java
@ -34,6 +34,8 @@ import org.antlr.v4.runtime.Parser;
 import org.antlr.v4.runtime.Token;
 import org.antlr.v4.runtime.misc.Interval;

+import java.util.List;
+
 public class TerminalNodeImpl implements TerminalNode {
 	public Token symbol;
 	public ParseTree parent;
--- a/runtime/Java/src/org/antlr/v4/runtime/tree/Tree.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/tree/Tree.java
@ -33,6 +33,9 @@ package org.antlr.v4.runtime.tree;
 import org.antlr.v4.runtime.RuleContext;
 import org.antlr.v4.runtime.Token;

+import java.util.Collection;
+import java.util.List;
+
 /** The basic notion of a tree has a parent, a payload, and a list of children.
 *  It is the most abstract interface for all the trees used by ANTLR.
 */
--- a/runtime/Java/src/org/antlr/v4/runtime/tree/Trees.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/tree/Trees.java
@ -31,6 +31,7 @@
 package org.antlr.v4.runtime.tree;

 import org.antlr.v4.runtime.Parser;
+import org.antlr.v4.runtime.ParserRuleContext;
 import org.antlr.v4.runtime.Token;
 import org.antlr.v4.runtime.misc.NotNull;
 import org.antlr.v4.runtime.misc.Nullable;
@ -42,6 +43,7 @@ import java.io.FileWriter;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.List;

@ -152,6 +154,16 @@ public class Trees {
 		return t.getPayload().toString();
 	}

+
+	/** Return ordered list of all children of this node */
+	public static List<Tree> getChildren(Tree t) {
+		List<Tree> kids = new ArrayList<Tree>();
+		for (int i=0; i<t.getChildCount(); i++) {
+			kids.add(t.getChild(i));
+		}
+		return kids;
+	}
+
 	/** Return a list of all ancestors of this node.  The first node of
 	 *  list is the root and the last is the parent of this node.
 	 */
@ -167,6 +179,49 @@ public class Trees {
 		return ancestors;
 	}

+	public static Collection<ParseTree> findAllTokenNodes(ParseTree t, int ttype) {
+		return findAllNodes(t, ttype, true);
+	}
+
+	public static Collection<ParseTree> findAllRuleNodes(ParseTree t, int ruleIndex) {
+		return findAllNodes(t, ruleIndex, false);
+	}
+
+	public static List<ParseTree> findAllNodes(ParseTree t, int index, boolean findTokens) {
+		List<? super ParseTree> nodes = new ArrayList<ParseTree>();
+		_findAllNodes(t, index, findTokens, nodes);
+		return (List<ParseTree>)nodes;
+	}
+
+	public static void _findAllNodes(ParseTree t, int index, boolean findTokens,
+									 List<? super ParseTree> nodes)
+	{
+		// check this node (the root) first
+		if ( findTokens && t instanceof TerminalNode ) {
+			TerminalNode tnode = (TerminalNode)t;
+			if ( tnode.getSymbol().getType()==index ) nodes.add(t);
+		}
+		else if ( !findTokens && t instanceof ParserRuleContext ) {
+			ParserRuleContext ctx = (ParserRuleContext)t;
+			if ( ctx.getRuleIndex() == index ) nodes.add(t);
+		}
+		// check children
+		for (int i = 0; i < t.getChildCount(); i++){
+			_findAllNodes(t.getChild(i), index, findTokens, nodes);
+		}
+	}
+
+	public static List<ParseTree> descendants(ParseTree t){
+		List<ParseTree> nodes = new ArrayList<ParseTree>();
+		nodes.add(t);
+
+		int n = t.getChildCount();
+		for (int i = 0 ; i < n ; i++){
+			nodes.addAll(descendants(t.getChild(i)));
+		}
+		return nodes;
+	}
+
 	private Trees() {
 	}
 }
--- a/runtime/Java/src/org/antlr/v4/runtime/tree/xpath/XPath.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/tree/xpath/XPath.java
@ -0,0 +1,206 @@
+package org.antlr.v4.runtime.tree.xpath;
+
+import org.antlr.v4.runtime.ANTLRInputStream;
+import org.antlr.v4.runtime.CommonTokenStream;
+import org.antlr.v4.runtime.LexerNoViableAltException;
+import org.antlr.v4.runtime.Parser;
+import org.antlr.v4.runtime.ParserRuleContext;
+import org.antlr.v4.runtime.Token;
+import org.antlr.v4.runtime.misc.Utils;
+import org.antlr.v4.runtime.tree.ParseTree;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+/** Represent a subset of XPath XML path syntax for use in identifying nodes in
+ *  parse trees.
+ *
+ *  Split path into words and separators / and // via ANTLR itself then walk
+ *  path elements from left to right.  At each separator-word pair, find set
+ *  of nodes. Next stage uses those as work list.
+ *
+ *  The basic interface is ParseTree.findAll(parser, pathString). But that is
+ *  just shorthand for:
+ *
+ *  XPath p = new XPath(parser, xpath);
+ *  return p.evaluate(this);
+ *
+ *  See {@link org.antlr.v4.test.TestXPath} for descriptions. In short, this allows
+ *  operators:
+ *
+ *      /         root
+ *      //        anywhere
+ *      !         invert; this must appear directly after root or anywhere operator
+ *
+ *  and path elements:
+ *
+ *      ID        token name
+ *      'string'  any string literal token from the grammar
+ *      expr      rule name
+ *      *         wildcard matching any node
+ *
+ *  Whitespace is not allowed.
+ */
+public class XPath {
+	public static final String WILDCARD = "*"; // word not operator/separator
+	public static final String NOT = "!"; 	   // word for invert operator
+
+	protected String path;
+	protected XPathElement[] elements;
+	protected Parser parser;
+
+	public XPath(Parser parser, String path) {
+		this.parser = parser;
+		this.path = path;
+		elements = split(path);
+//		System.out.println(Arrays.toString(elements));
+	}
+
+	// TODO: check for invalid token/rule names, bad syntax
+
+	public XPathElement[] split(String path) {
+		ANTLRInputStream in;
+		try {
+			in = new ANTLRInputStream(new StringReader(path));
+		}
+		catch (IOException ioe) {
+			throw new IllegalArgumentException("Could not read path: "+path, ioe);
+		}
+		XPathLexer lexer = new XPathLexer(in) {
+			public void recover(LexerNoViableAltException e) { throw e;	}
+		};
+		lexer.removeErrorListeners();
+		lexer.addErrorListener(new XPathLexerErrorListener());
+		CommonTokenStream tokenStream = new CommonTokenStream(lexer);
+		try {
+			tokenStream.fill();
+		}
+		catch (LexerNoViableAltException e) {
+			int pos = lexer.getCharPositionInLine();
+			String msg = "Invalid tokens or characters at index "+pos+" in path '"+path+"'";
+			throw new IllegalArgumentException(msg, e);
+		}
+
+		List<Token> tokens = tokenStream.getTokens();
+//		System.out.println("path="+path+"=>"+tokens);
+		List<XPathElement> elements = new ArrayList<XPathElement>();
+		int n = tokens.size();
+		int i=0;
+loop:
+		while ( i<n ) {
+			Token el = tokens.get(i);
+			Token next = null;
+			switch ( el.getType() ) {
+				case XPathLexer.ROOT :
+				case XPathLexer.ANYWHERE :
+					boolean anywhere = el.getType() == XPathLexer.ANYWHERE;
+					i++;
+					next = tokens.get(i);
+					boolean invert = next.getType()==XPathLexer.BANG;
+					if ( invert ) {
+						i++;
+						next = tokens.get(i);
+					}
+					XPathElement pathElement = getXPathElement(next, anywhere);
+					pathElement.invert = invert;
+					elements.add(pathElement);
+					i++;
+					break;
+
+				case XPathLexer.TOKEN_REF :
+				case XPathLexer.RULE_REF :
+				case XPathLexer.WILDCARD :
+					elements.add( getXPathElement(el, false) );
+					i++;
+					break;
+
+				case Token.EOF :
+					break loop;
+
+				default :
+					throw new IllegalArgumentException("Unknowth path element "+el);
+			}
+		}
+		return elements.toArray(new XPathElement[0]);
+	}
+
+	/** Convert word like * or ID or expr to a path element. anywhere is true
+	 *  if // precedes the word.
+	 */
+	protected XPathElement getXPathElement(Token wordToken, boolean anywhere) {
+		if ( wordToken.getType()==Token.EOF ) {
+			throw new IllegalArgumentException("Missing path element at end of path");
+		}
+		String word = wordToken.getText();
+		Map<String, Integer> ruleIndexes = Utils.toMap(parser.getRuleNames());
+		Map<String, Integer> tokenTypes = Utils.toMap(parser.getTokenNames());
+		Integer ttype = tokenTypes.get(word);
+		Integer ruleIndex = ruleIndexes.get(word);
+		switch ( wordToken.getType() ) {
+			case XPathLexer.WILDCARD :
+				return anywhere ?
+					new XPathWildcardAnywhereElement() :
+					new XPathWildcardElement();
+			case XPathLexer.TOKEN_REF :
+			case XPathLexer.STRING :
+				if ( ttype==null ) {
+					throw new IllegalArgumentException(word+
+													   " at index "+
+													   wordToken.getStartIndex()+
+													   " isn't a valid token name");
+				}
+				return anywhere ?
+					new XPathTokenAnywhereElement(word, ttype) :
+					new XPathTokenElement(word, ttype);
+			default :
+				if ( ruleIndex==null ) {
+					throw new IllegalArgumentException(word+
+													   " at index "+
+													   wordToken.getStartIndex()+
+													   " isn't a valid rule name");
+				}
+				return anywhere ?
+					new XPathRuleAnywhereElement(word, ruleIndex) :
+					new XPathRuleElement(word, ruleIndex);
+		}
+	}
+
+
+	public static Collection<ParseTree> findAll(ParseTree tree, String xpath, Parser parser) {
+		XPath p = new XPath(parser, xpath);
+		return p.evaluate(tree);
+	}
+
+	/** Return a list of all nodes starting at t as root that satisfy the path.
+	 *  The root / is relative to the node passed to evaluate().
+	 */
+	public Collection<ParseTree> evaluate(final ParseTree t) {
+		ParserRuleContext dummyRoot = new ParserRuleContext();
+		dummyRoot.children = new ArrayList<ParseTree>() {{add(t);}}; // don't set t's parent.
+
+		Collection<ParseTree> work = new ArrayList<ParseTree>();
+		work.add(dummyRoot);
+
+		int i = 0;
+		while ( i < elements.length ) {
+			Collection<ParseTree> next = new ArrayList<ParseTree>();
+			for (ParseTree node : work) {
+				if ( node.getChildCount()>0 ) {
+					// only try to match next element if it has children
+					// e.g., //func/*/stat might have a token node for which
+					// we can't go looking for stat nodes.
+					Collection<? extends ParseTree> matching = elements[i].evaluate(node);
+					next.addAll(matching);
+				}
+			}
+			i++;
+			work = next;
+		}
+
+		return work;
+	}
+}
--- a/runtime/Java/src/org/antlr/v4/runtime/tree/xpath/XPathElement.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/tree/xpath/XPathElement.java
@ -0,0 +1,26 @@
+package org.antlr.v4.runtime.tree.xpath;
+
+import org.antlr.v4.runtime.tree.ParseTree;
+
+import java.util.Collection;
+
+public abstract class XPathElement {
+	protected String nodeName;
+	protected boolean invert;
+
+	/** Construct element like /ID or or ID or "/*" etc...
+	 *  op is null if just node
+	 */
+	public XPathElement(String nodeName) {
+		this.nodeName = nodeName;
+	}
+
+	/** Given tree rooted at t return all nodes matched by this path element */
+	public abstract Collection<ParseTree> evaluate(ParseTree t);
+
+	@Override
+	public String toString() {
+		String inv = invert ? "!" : "";
+		return getClass().getSimpleName()+"["+inv+nodeName+"]";
+	}
+}
--- a/runtime/Java/src/org/antlr/v4/runtime/tree/xpath/XPathLexer.g4
+++ b/runtime/Java/src/org/antlr/v4/runtime/tree/xpath/XPathLexer.g4
@ -0,0 +1,63 @@
+lexer grammar XPathLexer;
+
+tokens { TOKEN_REF, RULE_REF }
+
+/*
+path : separator? word (separator word)* EOF ;
+
+separator
+	:	'/'  '!'
+	|	'//' '!'
+	|	'/'
+	|	'//'
+	;
+
+word:	TOKEN_REF
+	|	RULE_REF
+	|	STRING
+	|	'*'
+	;
+*/
+
+ANYWHERE : '//' ;
+ROOT	 : '/' ;
+WILDCARD : '*' ;
+BANG	 : '!' ;
+
+ID			:	NameStartChar NameChar*
+				{
+				String text = getText();
+				if ( Character.isUpperCase(text.charAt(0)) ) setType(TOKEN_REF);
+				else setType(RULE_REF);
+				}
+			;
+
+fragment
+NameChar    :   NameStartChar
+            |   '0'..'9'
+            |   '_'
+            |   '\u00B7'
+            |   '\u0300'..'\u036F'
+            |   '\u203F'..'\u2040'
+            ;
+
+fragment
+NameStartChar
+            :   'A'..'Z' | 'a'..'z'
+            |   '\u00C0'..'\u00D6'
+            |   '\u00D8'..'\u00F6'
+            |   '\u00F8'..'\u02FF'
+            |   '\u0370'..'\u037D'
+            |   '\u037F'..'\u1FFF'
+            |   '\u200C'..'\u200D'
+            |   '\u2070'..'\u218F'
+            |   '\u2C00'..'\u2FEF'
+            |   '\u3001'..'\uD7FF'
+            |   '\uF900'..'\uFDCF'
+            |   '\uFDF0'..'\uFFFD'
+            ; // ignores | ['\u10000-'\uEFFFF] ;
+
+STRING : '\'' .*? '\'' ;
+
+//WS : [ \t\r\n]+ -> skip ;
+
--- a/runtime/Java/src/org/antlr/v4/runtime/tree/xpath/XPathLexerErrorListener.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/tree/xpath/XPathLexerErrorListener.java
@ -0,0 +1,14 @@
+package org.antlr.v4.runtime.tree.xpath;
+
+import org.antlr.v4.runtime.BaseErrorListener;
+import org.antlr.v4.runtime.RecognitionException;
+import org.antlr.v4.runtime.Recognizer;
+
+public class XPathLexerErrorListener extends BaseErrorListener {
+	@Override
+	public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol,
+							int line, int charPositionInLine, String msg,
+							RecognitionException e)
+	{
+	}
+}
--- a/runtime/Java/src/org/antlr/v4/runtime/tree/xpath/XPathRuleAnywhereElement.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/tree/xpath/XPathRuleAnywhereElement.java
@ -0,0 +1,20 @@
+package org.antlr.v4.runtime.tree.xpath;
+
+import org.antlr.v4.runtime.tree.ParseTree;
+import org.antlr.v4.runtime.tree.Trees;
+
+import java.util.Collection;
+
+/** Either ID at start of path or ...//ID in middle of path */
+public class XPathRuleAnywhereElement extends XPathElement {
+	protected int ruleIndex;
+	public XPathRuleAnywhereElement(String ruleName, int ruleIndex) {
+		super(ruleName);
+		this.ruleIndex = ruleIndex;
+	}
+
+	@Override
+	public Collection<ParseTree> evaluate(ParseTree t) {
+		return Trees.findAllRuleNodes(t, ruleIndex);
+	}
+}
--- a/runtime/Java/src/org/antlr/v4/runtime/tree/xpath/XPathRuleElement.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/tree/xpath/XPathRuleElement.java
@ -0,0 +1,35 @@
+package org.antlr.v4.runtime.tree.xpath;
+
+import org.antlr.v4.runtime.ParserRuleContext;
+import org.antlr.v4.runtime.tree.ParseTree;
+import org.antlr.v4.runtime.tree.Tree;
+import org.antlr.v4.runtime.tree.Trees;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+public class XPathRuleElement extends XPathElement {
+	protected int ruleIndex;
+	public XPathRuleElement(String ruleName, int ruleIndex) {
+		super(ruleName);
+		this.ruleIndex = ruleIndex;
+	}
+
+	@Override
+	public Collection<ParseTree> evaluate(ParseTree t) {
+				// return all children of t that match nodeName
+		List<ParseTree> nodes = new ArrayList<ParseTree>();
+		for (Tree c : Trees.getChildren(t)) {
+			if ( c instanceof ParserRuleContext ) {
+				ParserRuleContext ctx = (ParserRuleContext)c;
+				if ( (ctx.getRuleIndex() == ruleIndex && !invert) ||
+					 (ctx.getRuleIndex() != ruleIndex && invert) )
+				{
+					nodes.add(ctx);
+				}
+			}
+		}
+		return nodes;
+	}
+}
--- a/runtime/Java/src/org/antlr/v4/runtime/tree/xpath/XPathTokenAnywhereElement.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/tree/xpath/XPathTokenAnywhereElement.java
@ -0,0 +1,19 @@
+package org.antlr.v4.runtime.tree.xpath;
+
+import org.antlr.v4.runtime.tree.ParseTree;
+import org.antlr.v4.runtime.tree.Trees;
+
+import java.util.Collection;
+
+public class XPathTokenAnywhereElement extends XPathElement {
+	protected int tokenType;
+	public XPathTokenAnywhereElement(String tokenName, int tokenType) {
+		super(tokenName);
+		this.tokenType = tokenType;
+	}
+
+	@Override
+	public Collection<ParseTree> evaluate(ParseTree t) {
+		return Trees.findAllTokenNodes(t, tokenType);
+	}
+}
--- a/runtime/Java/src/org/antlr/v4/runtime/tree/xpath/XPathTokenElement.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/tree/xpath/XPathTokenElement.java
@ -0,0 +1,35 @@
+package org.antlr.v4.runtime.tree.xpath;
+
+import org.antlr.v4.runtime.tree.ParseTree;
+import org.antlr.v4.runtime.tree.TerminalNode;
+import org.antlr.v4.runtime.tree.Tree;
+import org.antlr.v4.runtime.tree.Trees;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+public class XPathTokenElement extends XPathElement {
+	protected int tokenType;
+	public XPathTokenElement(String tokenName, int tokenType) {
+		super(tokenName);
+		this.tokenType = tokenType;
+	}
+
+	@Override
+	public Collection<ParseTree> evaluate(ParseTree t) {
+		// return all children of t that match nodeName
+		List<ParseTree> nodes = new ArrayList<ParseTree>();
+		for (Tree c : Trees.getChildren(t)) {
+			if ( c instanceof TerminalNode ) {
+				TerminalNode tnode = (TerminalNode)c;
+				if ( (tnode.getSymbol().getType() == tokenType && !invert) ||
+					 (tnode.getSymbol().getType() != tokenType && invert) )
+				{
+					nodes.add(tnode);
+				}
+			}
+		}
+		return nodes;
+	}
+}
--- a/runtime/Java/src/org/antlr/v4/runtime/tree/xpath/XPathWildcardAnywhereElement.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/tree/xpath/XPathWildcardAnywhereElement.java
@ -0,0 +1,19 @@
+package org.antlr.v4.runtime.tree.xpath;
+
+import org.antlr.v4.runtime.tree.ParseTree;
+import org.antlr.v4.runtime.tree.Trees;
+
+import java.util.ArrayList;
+import java.util.Collection;
+
+public class XPathWildcardAnywhereElement extends XPathElement {
+	public XPathWildcardAnywhereElement() {
+		super(XPath.WILDCARD);
+	}
+
+	@Override
+	public Collection<ParseTree> evaluate(ParseTree t) {
+		if ( invert ) return new ArrayList<ParseTree>(); // !* is weird but valid (empty)
+		return Trees.descendants(t);
+	}
+}
--- a/runtime/Java/src/org/antlr/v4/runtime/tree/xpath/XPathWildcardElement.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/tree/xpath/XPathWildcardElement.java
@ -0,0 +1,26 @@
+package org.antlr.v4.runtime.tree.xpath;
+
+import org.antlr.v4.runtime.tree.ParseTree;
+import org.antlr.v4.runtime.tree.Tree;
+import org.antlr.v4.runtime.tree.Trees;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+
+public class XPathWildcardElement extends XPathElement {
+	public XPathWildcardElement() {
+		super(XPath.WILDCARD);
+	}
+
+	@Override
+	public Collection<ParseTree> evaluate(final ParseTree t) {
+		if ( invert ) return new ArrayList<ParseTree>(); // !* is weird but valid (empty)
+		List<ParseTree> kids = new ArrayList<ParseTree>();
+		for (Tree c : Trees.getChildren(t)) {
+			kids.add((ParseTree)c);
+		}
+		return kids;
+	}
+}
--- a/tool/test/org/antlr/v4/test/BaseTest.java
+++ b/tool/test/org/antlr/v4/test/BaseTest.java
@ -42,6 +42,7 @@ import org.antlr.v4.runtime.CommonToken;
 import org.antlr.v4.runtime.CommonTokenStream;
 import org.antlr.v4.runtime.IntStream;
 import org.antlr.v4.runtime.Lexer;
+import org.antlr.v4.runtime.Parser;
 import org.antlr.v4.runtime.RuleContext;
 import org.antlr.v4.runtime.Token;
 import org.antlr.v4.runtime.TokenSource;
@ -57,6 +58,8 @@ import org.antlr.v4.runtime.misc.IntegerList;
 import org.antlr.v4.runtime.misc.Interval;
 import org.antlr.v4.runtime.misc.NotNull;
 import org.antlr.v4.runtime.misc.Nullable;
+import org.antlr.v4.runtime.misc.Pair;
+import org.antlr.v4.runtime.tree.ParseTree;
 import org.antlr.v4.semantics.SemanticPipeline;
 import org.antlr.v4.tool.ANTLRMessage;
 import org.antlr.v4.tool.DOTGenerator;
@ -64,7 +67,11 @@ import org.antlr.v4.tool.DefaultToolListener;
 import org.antlr.v4.tool.Grammar;
 import org.antlr.v4.tool.GrammarSemanticsMessage;
 import org.antlr.v4.tool.LexerGrammar;
+import org.antlr.v4.tool.Rule;
 import org.junit.Before;
+import org.junit.rules.TestRule;
+import org.junit.rules.TestWatcher;
+import org.junit.runner.Description;
 import org.stringtemplate.v4.ST;
 import org.stringtemplate.v4.STGroup;
 import org.stringtemplate.v4.STGroupString;
@ -73,10 +80,6 @@ import javax.tools.JavaCompiler;
 import javax.tools.JavaFileObject;
 import javax.tools.StandardJavaFileManager;
 import javax.tools.ToolProvider;
-import org.antlr.v4.tool.Rule;
-import org.junit.rules.TestRule;
-import org.junit.rules.TestWatcher;
-import org.junit.runner.Description;
 import java.io.BufferedReader;
 import java.io.BufferedWriter;
 import java.io.File;
@ -87,6 +90,8 @@ import java.io.InputStreamReader;
 import java.io.PipedInputStream;
 import java.io.PipedOutputStream;
 import java.io.PrintStream;
+import java.io.StringReader;
+import java.lang.reflect.Constructor;
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
 import java.net.MalformedURLException;
@ -104,7 +109,11 @@ import java.util.TreeMap;
 import java.util.logging.Level;
 import java.util.logging.Logger;

-import static org.junit.Assert.*;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;

 public abstract class BaseTest {
 	// -J-Dorg.antlr.v4.test.BaseTest.level=FINE
@ -478,6 +487,69 @@ public abstract class BaseTest {
 		return output;
 	}

+	public ParseTree execParser(String startRuleName, String input,
+								String parserName, String lexerName)
+		throws Exception
+	{
+		Pair<Parser, Lexer> pl = getParserAndLexer(input, parserName, lexerName);
+		Parser parser = pl.a;
+		return execStartRule(startRuleName, parser);
+	}
+
+	public ParseTree execStartRule(String startRuleName, Parser parser)
+		throws IllegalAccessException, InvocationTargetException,
+			   NoSuchMethodException
+	{
+		Method startRule = null;
+		Object[] args = null;
+		try {
+			startRule = parser.getClass().getMethod(startRuleName);
+		}
+		catch (NoSuchMethodException nsme) {
+			// try with int _p arg for recursive func
+			startRule = parser.getClass().getMethod(startRuleName, int.class);
+			args = new Integer[] {0};
+		}
+		ParseTree result = (ParseTree)startRule.invoke(parser, args);
+//		System.out.println("parse tree = "+result.toStringTree(parser));
+		return result;
+	}
+
+	public Pair<Parser, Lexer> getParserAndLexer(String input,
+												 String parserName, String lexerName)
+		throws Exception
+	{
+		final Class<? extends Lexer> lexerClass = loadLexerClassFromTempDir(lexerName);
+		final Class<? extends Parser> parserClass = loadParserClassFromTempDir(parserName);
+
+		ANTLRInputStream in = new ANTLRInputStream(new StringReader(input));
+
+		Class<? extends Lexer> c = lexerClass.asSubclass(Lexer.class);
+		Constructor<? extends Lexer> ctor = c.getConstructor(CharStream.class);
+		Lexer lexer = ctor.newInstance(in);
+
+		Class<? extends Parser> pc = parserClass.asSubclass(Parser.class);
+		Constructor<? extends Parser> pctor = pc.getConstructor(TokenStream.class);
+		CommonTokenStream tokens = new CommonTokenStream(lexer);
+		Parser parser = pctor.newInstance(tokens);
+		return new Pair<Parser, Lexer>(parser, lexer);
+	}
+
+	public Class<?> loadClassFromTempDir(String name) throws Exception {
+		ClassLoader loader =
+			new URLClassLoader(new URL[] { new File(tmpdir).toURI().toURL() },
+							   ClassLoader.getSystemClassLoader());
+		return loader.loadClass(name);
+	}
+
+	public Class<? extends Lexer> loadLexerClassFromTempDir(String name) throws Exception {
+		return (Class<? extends Lexer>)loadClassFromTempDir(name);
+	}
+
+	public Class<? extends Parser> loadParserClassFromTempDir(String name) throws Exception {
+		return (Class<? extends Parser>)loadClassFromTempDir(name);
+	}
+
 	protected String execParser(String grammarFileName,
 								String grammarStr,
 								String parserName,
--- a/tool/test/org/antlr/v4/test/TestXPath.java
+++ b/tool/test/org/antlr/v4/test/TestXPath.java
@ -0,0 +1,241 @@
+package org.antlr.v4.test;
+
+import org.antlr.v4.runtime.Lexer;
+import org.antlr.v4.runtime.Parser;
+import org.antlr.v4.runtime.RuleContext;
+import org.antlr.v4.runtime.misc.Pair;
+import org.antlr.v4.runtime.tree.ParseTree;
+import org.antlr.v4.runtime.tree.TerminalNode;
+import org.antlr.v4.runtime.tree.xpath.XPath;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+public class TestXPath extends BaseTest {
+	public static final String grammar =
+		"grammar Expr;\n" +
+		"prog:   func+ ;\n" +
+		"func:  'def' ID '(' arg (',' arg)* ')' body ;\n" +
+		"body:  '{' stat+ '}' ;\n" +
+		"arg :  ID ;\n" +
+		"stat:   expr ';'                 # printExpr\n" +
+		"    |   ID '=' expr ';'          # assign\n" +
+		"    |   'return' expr ';'        # ret\n" +
+		"    |   ';'                      # blank\n" +
+		"    ;\n" +
+		"expr:   expr ('*'|'/') expr      # MulDiv\n" +
+		"    |   expr ('+'|'-') expr      # AddSub\n" +
+		"    |   primary                  # prim\n" +
+		"    ;\n" +
+		"primary" +
+		"    :   INT                      # int\n" +
+		"    |   ID                       # id\n" +
+		"    |   '(' expr ')'             # parens\n" +
+		"	 ;" +
+		"\n" +
+		"MUL :   '*' ; // assigns token name to '*' used above in grammar\n" +
+		"DIV :   '/' ;\n" +
+		"ADD :   '+' ;\n" +
+		"SUB :   '-' ;\n" +
+		"ID  :   [a-zA-Z]+ ;      // match identifiers\n" +
+		"INT :   [0-9]+ ;         // match integers\n" +
+		"NEWLINE:'\\r'? '\\n' -> skip;     // return newlines to parser (is end-statement signal)\n" +
+		"WS  :   [ \\t]+ -> skip ; // toss out whitespace\n";
+
+	@Test public void testValidPaths() throws Exception {
+		boolean ok =
+			rawGenerateAndBuildRecognizer("Expr.g4", grammar, "ExprParser",
+										  "ExprLexer", false);
+		assertTrue(ok);
+
+		String input =
+			"def f(x,y) { x = 3+4; y; ; }\n" +
+			"def g(x) { return 1+2*x; }\n";
+		String xpath[] = {
+			"/prog/func",		// all funcs under prog at root
+			"/prog/*",			// all children of prog at root
+			"/*/func",			// all func kids of any root node
+			"prog",				// prog must be root node
+			"/prog",			// prog must be root node
+			"/*",				// any root
+			"*",				// any root
+			"//ID",				// any ID in tree
+			"//expr/primary/ID",// any ID child of a primary under any expr
+			"//body//ID",		// any ID under a body
+			"//'return'",		// any 'return' literal in tree
+			"//primary/*",		// all kids of any primary
+			"//func/*/stat",	// all stat nodes grandkids of any func node
+			"/prog/func/'def'",	// all def literal kids of func kid of prog
+			"//stat/';'",		// all ';' under any stat node
+			"//expr/primary/!ID",	// anything but ID under primary under any expr node
+			"//expr/!primary",	// anything but primary under any expr node
+			"//!*",				// nothing anywhere
+			"/!*",				// nothing at root
+		};
+		String expected[] = {
+			"[func, func]",
+			"[func, func]",
+			"[func, func]",
+			"[prog]",
+			"[prog]",
+			"[prog]",
+			"[prog]",
+			"[f, x, y, x, y, g, x, x]",
+			"[y, x]",
+			"[x, y, x]",
+			"[return]",
+			"[3, 4, y, 1, 2, x]",
+			"[stat, stat, stat, stat]",
+			"[def, def]",
+			"[;, ;, ;, ;]",
+			"[3, 4, 1, 2]",
+			"[expr, expr, expr, expr, expr, expr]",
+			"[]",
+			"[]",
+		};
+
+		for (int i=0; i<xpath.length; i++) {
+			List<String> nodes = getNodeStrings(input, xpath[i], "prog", "ExprParser", "ExprLexer");
+			String result = nodes.toString();
+			assertEquals("path "+xpath[i]+" failed", expected[i], result);
+		}
+	}
+
+	@Test public void testWeirdChar() throws Exception {
+		boolean ok =
+			rawGenerateAndBuildRecognizer("Expr.g4", grammar, "ExprParser",
+										  "ExprLexer", false);
+		assertTrue(ok);
+
+		String input =
+			"def f(x,y) { x = 3+4; y; ; }\n" +
+			"def g(x) { return 1+2*x; }\n";
+		String path = "&";
+		String expected = "Invalid tokens or characters at index 0 in path '&'";
+
+		testError(input, path, expected, "prog", "ExprParser", "ExprLexer");
+	}
+
+	@Test public void testWeirdChar2() throws Exception {
+		boolean ok =
+			rawGenerateAndBuildRecognizer("Expr.g4", grammar, "ExprParser",
+										  "ExprLexer", false);
+		assertTrue(ok);
+
+		String input =
+			"def f(x,y) { x = 3+4; y; ; }\n" +
+			"def g(x) { return 1+2*x; }\n";
+		String path = "//w&e/";
+		String expected = "Invalid tokens or characters at index 3 in path '//w&e/'";
+
+		testError(input, path, expected, "prog", "ExprParser", "ExprLexer");
+	}
+
+	@Test public void testBadSyntax() throws Exception {
+		boolean ok =
+			rawGenerateAndBuildRecognizer("Expr.g4", grammar, "ExprParser",
+										  "ExprLexer", false);
+		assertTrue(ok);
+
+		String input =
+			"def f(x,y) { x = 3+4; y; ; }\n" +
+			"def g(x) { return 1+2*x; }\n";
+		String path = "///";
+		String expected = "/ at index 2 isn't a valid rule name";
+
+		testError(input, path, expected, "prog", "ExprParser", "ExprLexer");
+	}
+
+	@Test public void testMissingWordAtEnd() throws Exception {
+		boolean ok =
+			rawGenerateAndBuildRecognizer("Expr.g4", grammar, "ExprParser",
+										  "ExprLexer", false);
+		assertTrue(ok);
+
+		String input =
+			"def f(x,y) { x = 3+4; y; ; }\n" +
+			"def g(x) { return 1+2*x; }\n";
+		String path = "//";
+		String expected = "Missing path element at end of path";
+
+		testError(input, path, expected, "prog", "ExprParser", "ExprLexer");
+	}
+
+	@Test public void testBadTokenName() throws Exception {
+		boolean ok =
+			rawGenerateAndBuildRecognizer("Expr.g4", grammar, "ExprParser",
+										  "ExprLexer", false);
+		assertTrue(ok);
+
+		String input =
+			"def f(x,y) { x = 3+4; y; ; }\n" +
+			"def g(x) { return 1+2*x; }\n";
+		String path = "//Ick";
+		String expected = "Ick at index 2 isn't a valid token name";
+
+		testError(input, path, expected, "prog", "ExprParser", "ExprLexer");
+	}
+
+	@Test public void testBadRuleName() throws Exception {
+		boolean ok =
+			rawGenerateAndBuildRecognizer("Expr.g4", grammar, "ExprParser",
+										  "ExprLexer", false);
+		assertTrue(ok);
+
+		String input =
+			"def f(x,y) { x = 3+4; y; ; }\n" +
+			"def g(x) { return 1+2*x; }\n";
+		String path = "/prog/ick";
+		String expected = "ick at index 6 isn't a valid rule name";
+
+		testError(input, path, expected, "prog", "ExprParser", "ExprLexer");
+	}
+
+	protected void testError(String input, String path, String expected,
+							 String startRuleName,
+							 String parserName, String lexerName)
+		throws Exception
+	{
+		Pair<Parser, Lexer> pl = getParserAndLexer(input, parserName, lexerName);
+		Parser parser = pl.a;
+		ParseTree tree = execStartRule(startRuleName, parser);
+
+		IllegalArgumentException e = null;
+		try {
+			XPath.findAll(tree, path, parser);
+		}
+		catch (IllegalArgumentException iae) {
+			e = iae;
+		}
+		assertNotNull(e);
+		assertEquals(expected, e.getMessage());
+	}
+
+	public List<String> getNodeStrings(String input, String xpath,
+									   String startRuleName,
+									   String parserName, String lexerName)
+		throws Exception
+	{
+		Pair<Parser, Lexer> pl = getParserAndLexer(input, parserName, lexerName);
+		Parser parser = pl.a;
+		ParseTree tree = execStartRule(startRuleName, parser);
+
+		List<String> nodes = new ArrayList<String>();
+		for (ParseTree t : XPath.findAll(tree, xpath, parser) ) {
+			if ( t instanceof RuleContext) {
+				RuleContext r = (RuleContext)t;
+				nodes.add(parser.getRuleNames()[r.getRuleIndex()]);
+			}
+			else {
+				TerminalNode token = (TerminalNode)t;
+				nodes.add(token.getText());
+			}
+		}
+		return nodes;
+	}
+}