Merge pull request #352 from parrt/xpath

Add XPath to collect parse tree nodes/subtrees.
This commit is contained in:
Terence Parr 2013-11-13 16:22:37 -08:00
commit a3d71db1bc
21 changed files with 973 additions and 14 deletions

2
.gitignore vendored
View File

@ -27,4 +27,4 @@ nbactions*.xml
*.hprof
# Playground
/tool/playground/
#/tool/playground/

View File

@ -1,5 +1,45 @@
ANTLR v4 Honey Badger
November 13, 2013
* move getChildren() from Tree into Trees (to avoid breaking change)
* Notation:
/prog/func, -> all funcs under prog at root
/prog/*, -> all children of prog at root
/*/func, -> all func kids of any root node
prog, -> prog must be root node
/prog, -> prog must be root node
/*, -> any root
*, -> any root
//ID, -> any ID in tree
//expr/primary/ID, -> any ID child of a primary under any expr
//body//ID, -> any ID under a body
//'return', -> any 'return' literal in tree
//primary/*, -> all kids of any primary
//func/*/stat, -> all stat nodes grandkids of any func node
/prog/func/'def', -> all def literal kids of func kid of prog
//stat/';', -> all ';' under any stat node
//expr/primary/!ID, -> anything but ID under primary under any expr node
//expr/!primary, -> anything but primary under any expr node
//!*, -> nothing anywhere
/!*, -> nothing at root
September 16, 2013
* Updated build.xml to support v4 grammars in v4 itself; compiles XPathLexer.g4
* Add to XPath:
Collection<ParseTree> findAll(String xpath);
September 11, 2013
* Add ! operator to XPath
* Use ANTLR v4 XPathLexer.g4 not regex
* Copy lots of find node stuff from v3 GrammarAST to Trees class in runtime.
September 10, 2013
* Adding in XPath stuff.
August 31, 2013
* Lots of little fixes thanks to Coverity Scan

View File

@ -19,6 +19,18 @@
<property name="antlr3.touch" value="${build.dir}/antlr3-${antlr3.version}.touch"/>
</target>
<target name="antlr4-init" depends="basic-init">
<property name="antlr4.version" value="4.1"/>
<property name="antlr4.jar.name" value="antlr-${antlr4.version}-complete.jar"/>
<property name="antlr4.jar" value="${lib.dir}/${antlr4.jar.name}"/>
<mkdir dir="${lib.dir}"/>
<get src="http://antlr.org/download/${antlr4.jar.name}" dest="${antlr4.jar}" skipexisting="true"/>
<path id="cp.antlr4" path="${antlr4.jar}"/>
<property name="build.antlr4.dir" value="${build.dir}/generated-sources/antlr4" />
<property name="antlr4.touch" value="${build.dir}/antlr4-${antlr4.version}.touch"/>
</target>
<target name="build-init" depends="basic-init">
<property name="version" value="4.1.1-dev"/>
<property name="build.sysclasspath" value="ignore"/>
@ -45,7 +57,17 @@
</uptodate>
</target>
<target name="up-to-date" depends="antlr3-up-to-date,build-init">
<target name="antlr4-up-to-date" depends="basic-init,antlr4-init">
<uptodate targetfile="${antlr4.touch}" property="is.antlr4.uptodate">
<srcfiles dir="${basedir}/tool/src">
<include name="**/*.g4"/>
<include name="**/*.tokens"/>
</srcfiles>
<srcfiles file="${antlr4.jar}"/>
</uptodate>
</target>
<target name="up-to-date" depends="antlr3-up-to-date,antlr4-up-to-date,build-init">
<uptodate targetfile="${jar.file}" property="is.source.uptodate">
<srcfiles dir="${basedir}/tool/src">
<include name="**/*.java"/>
@ -102,6 +124,33 @@
</sequential>
</macrodef>
<macrodef name="antlr4">
<attribute name="srcpath"/>
<element name="args" optional="true"/>
<sequential>
<local name="path.antlr4.local"/>
<local name="sources.antlr4.local"/>
<path id="path.antlr4.local">
<fileset dir="${basedir}/runtime/Java/src/@{srcpath}" includes="*.g4"/>
</path>
<pathconvert pathsep=" " property="sources.antlr4.local" refid="path.antlr4.local">
<map from="${basedir}/runtime/Java/src/@{srcpath}/" to=""/>
</pathconvert>
<mkdir dir="${build.antlr4.dir}/@{srcpath}"/>
<java classname="org.antlr.v4.Tool" fork="true" failonerror="true" maxmemory="300m"
dir="${basedir}/runtime/Java/src/@{srcpath}">
<arg value="-o"/>
<arg value="${build.antlr4.dir}/@{srcpath}"/>
<args/>
<arg line="${sources.antlr4.local}"/>
<classpath>
<path refid="cp.antlr4"/>
<pathelement location="${java.class.path}"/>
</classpath>
</java>
</sequential>
</macrodef>
<target name="antlr3" depends="build-init,antlr3-init,antlr3-up-to-date" unless="is.antlr3.uptodate">
<mkdir dir="${build.antlr3.dir}" />
@ -125,7 +174,28 @@
<touch file="${antlr3.touch}" mkdirs="true"/>
</target>
<target name="compile" depends="build-init,antlr3,up-to-date" description="Compile for generic OS" unless="is.jar.uptodate">
<target name="antlr4" depends="build-init,antlr4-init,antlr4-up-to-date" unless="is.antlr4.uptodate">
<mkdir dir="${build.antlr4.dir}" />
<path id="sources.antlr4">
<fileset dir="${basedir}/runtime/Java/src" includes="**/*.g4"/>
</path>
<pathconvert pathsep="${line.separator} " property="echo.sources.antlr4" refid="sources.antlr4">
<map from="${basedir}/runtime/Java/src/" to=""/>
</pathconvert>
<echo message="Generating ANTLR 4 grammars:${line.separator} ${echo.sources.antlr4}"/>
<antlr4 srcpath="org/antlr/v4/runtime/tree/xpath">
<args>
<arg value="-package"/>
<arg value="org.antlr.v4.runtime.tree.xpath"/>
</args>
</antlr4>
<touch file="${antlr4.touch}" mkdirs="true"/>
</target>
<target name="compile" depends="build-init,antlr3,antlr4,up-to-date" description="Compile for generic OS" unless="is.jar.uptodate">
<mkdir dir="${build.dir}/classes"/>
<javac
destdir="${build.dir}/classes"
@ -139,7 +209,7 @@
<path refid="cp.antlr3"/>
<pathelement location="${basedir}/runtime/Java/lib/org.abego.treelayout.core.jar"/>
</classpath>
<src path="${basedir}/tool/src:${basedir}/runtime/Java/src:${build.antlr3.dir}"/>
<src path="${basedir}/tool/src:${basedir}/runtime/Java/src:${build.antlr3.dir}:${build.antlr4.dir}"/>
</javac>
</target>

View File

@ -38,7 +38,7 @@ import org.antlr.v4.runtime.tree.Trees;
import org.antlr.v4.runtime.tree.gui.TreeViewer;
import javax.print.PrintException;
import javax.swing.JDialog;
import javax.swing.*;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
@ -59,7 +59,7 @@ import java.util.concurrent.Future;
* getting error information.
*
* These objects are used during parsing and prediction.
* For the special case of parsers and tree parsers, we use the subclass
* For the special case of parsers, we use the subclass
* ParserRuleContext.
*
* @see ParserRuleContext

View File

@ -38,7 +38,9 @@ import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
public class Utils {
// Seriously: why isn't this built in to java? ugh!
@ -133,4 +135,15 @@ public class Utils {
t.join();
}
/** Convert array of strings to string->index map. Useful for
* converting rulenames to name->ruleindex map.
*/
public static Map<String, Integer> toMap(String[] keys) {
Map<String, Integer> m = new HashMap<String, Integer>();
for (int i=0; i<keys.length; i++) {
m.put(keys[i], i);
}
return m;
}
}

View File

@ -49,16 +49,16 @@ public interface ParseTree extends SyntaxTree {
ParseTree getChild(int i);
/** The {@link ParseTreeVisitor} needs a double dispatch method. */
public <T> T accept(ParseTreeVisitor<? extends T> visitor);
<T> T accept(ParseTreeVisitor<? extends T> visitor);
/** Return the combined text of all leaf nodes. Does not get any
* off-channel tokens (if any) so won't return whitespace and
* comments if they are sent to parser on hidden channel.
*/
public String getText();
String getText();
/** Specialize toStringTree so that it can print out more information
* based upon the parser.
*/
public String toStringTree(Parser parser);
String toStringTree(Parser parser);
}

View File

@ -34,6 +34,8 @@ import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.misc.Interval;
import java.util.List;
public class TerminalNodeImpl implements TerminalNode {
public Token symbol;
public ParseTree parent;

View File

@ -33,6 +33,9 @@ package org.antlr.v4.runtime.tree;
import org.antlr.v4.runtime.RuleContext;
import org.antlr.v4.runtime.Token;
import java.util.Collection;
import java.util.List;
/** The basic notion of a tree has a parent, a payload, and a list of children.
* It is the most abstract interface for all the trees used by ANTLR.
*/

View File

@ -31,6 +31,7 @@
package org.antlr.v4.runtime.tree;
import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
@ -42,6 +43,7 @@ import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
@ -152,6 +154,16 @@ public class Trees {
return t.getPayload().toString();
}
/** Return ordered list of all children of this node */
public static List<Tree> getChildren(Tree t) {
List<Tree> kids = new ArrayList<Tree>();
for (int i=0; i<t.getChildCount(); i++) {
kids.add(t.getChild(i));
}
return kids;
}
/** Return a list of all ancestors of this node. The first node of
* list is the root and the last is the parent of this node.
*/
@ -167,6 +179,49 @@ public class Trees {
return ancestors;
}
public static Collection<ParseTree> findAllTokenNodes(ParseTree t, int ttype) {
return findAllNodes(t, ttype, true);
}
public static Collection<ParseTree> findAllRuleNodes(ParseTree t, int ruleIndex) {
return findAllNodes(t, ruleIndex, false);
}
public static List<ParseTree> findAllNodes(ParseTree t, int index, boolean findTokens) {
List<? super ParseTree> nodes = new ArrayList<ParseTree>();
_findAllNodes(t, index, findTokens, nodes);
return (List<ParseTree>)nodes;
}
public static void _findAllNodes(ParseTree t, int index, boolean findTokens,
List<? super ParseTree> nodes)
{
// check this node (the root) first
if ( findTokens && t instanceof TerminalNode ) {
TerminalNode tnode = (TerminalNode)t;
if ( tnode.getSymbol().getType()==index ) nodes.add(t);
}
else if ( !findTokens && t instanceof ParserRuleContext ) {
ParserRuleContext ctx = (ParserRuleContext)t;
if ( ctx.getRuleIndex() == index ) nodes.add(t);
}
// check children
for (int i = 0; i < t.getChildCount(); i++){
_findAllNodes(t.getChild(i), index, findTokens, nodes);
}
}
public static List<ParseTree> descendants(ParseTree t){
List<ParseTree> nodes = new ArrayList<ParseTree>();
nodes.add(t);
int n = t.getChildCount();
for (int i = 0 ; i < n ; i++){
nodes.addAll(descendants(t.getChild(i)));
}
return nodes;
}
private Trees() {
}
}

View File

@ -0,0 +1,206 @@
package org.antlr.v4.runtime.tree.xpath;
import org.antlr.v4.runtime.ANTLRInputStream;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.LexerNoViableAltException;
import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.misc.Utils;
import org.antlr.v4.runtime.tree.ParseTree;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
/** Represent a subset of XPath XML path syntax for use in identifying nodes in
* parse trees.
*
* Split path into words and separators / and // via ANTLR itself then walk
* path elements from left to right. At each separator-word pair, find set
* of nodes. Next stage uses those as work list.
*
* The basic interface is ParseTree.findAll(parser, pathString). But that is
* just shorthand for:
*
* XPath p = new XPath(parser, xpath);
* return p.evaluate(this);
*
* See {@link org.antlr.v4.test.TestXPath} for descriptions. In short, this allows
* operators:
*
* / root
* // anywhere
* ! invert; this must appear directly after root or anywhere operator
*
* and path elements:
*
* ID token name
* 'string' any string literal token from the grammar
* expr rule name
* * wildcard matching any node
*
* Whitespace is not allowed.
*/
public class XPath {
public static final String WILDCARD = "*"; // word not operator/separator
public static final String NOT = "!"; // word for invert operator
protected String path;
protected XPathElement[] elements;
protected Parser parser;
public XPath(Parser parser, String path) {
this.parser = parser;
this.path = path;
elements = split(path);
// System.out.println(Arrays.toString(elements));
}
// TODO: check for invalid token/rule names, bad syntax
public XPathElement[] split(String path) {
ANTLRInputStream in;
try {
in = new ANTLRInputStream(new StringReader(path));
}
catch (IOException ioe) {
throw new IllegalArgumentException("Could not read path: "+path, ioe);
}
XPathLexer lexer = new XPathLexer(in) {
public void recover(LexerNoViableAltException e) { throw e; }
};
lexer.removeErrorListeners();
lexer.addErrorListener(new XPathLexerErrorListener());
CommonTokenStream tokenStream = new CommonTokenStream(lexer);
try {
tokenStream.fill();
}
catch (LexerNoViableAltException e) {
int pos = lexer.getCharPositionInLine();
String msg = "Invalid tokens or characters at index "+pos+" in path '"+path+"'";
throw new IllegalArgumentException(msg, e);
}
List<Token> tokens = tokenStream.getTokens();
// System.out.println("path="+path+"=>"+tokens);
List<XPathElement> elements = new ArrayList<XPathElement>();
int n = tokens.size();
int i=0;
loop:
while ( i<n ) {
Token el = tokens.get(i);
Token next = null;
switch ( el.getType() ) {
case XPathLexer.ROOT :
case XPathLexer.ANYWHERE :
boolean anywhere = el.getType() == XPathLexer.ANYWHERE;
i++;
next = tokens.get(i);
boolean invert = next.getType()==XPathLexer.BANG;
if ( invert ) {
i++;
next = tokens.get(i);
}
XPathElement pathElement = getXPathElement(next, anywhere);
pathElement.invert = invert;
elements.add(pathElement);
i++;
break;
case XPathLexer.TOKEN_REF :
case XPathLexer.RULE_REF :
case XPathLexer.WILDCARD :
elements.add( getXPathElement(el, false) );
i++;
break;
case Token.EOF :
break loop;
default :
throw new IllegalArgumentException("Unknowth path element "+el);
}
}
return elements.toArray(new XPathElement[0]);
}
/** Convert word like * or ID or expr to a path element. anywhere is true
* if // precedes the word.
*/
protected XPathElement getXPathElement(Token wordToken, boolean anywhere) {
if ( wordToken.getType()==Token.EOF ) {
throw new IllegalArgumentException("Missing path element at end of path");
}
String word = wordToken.getText();
Map<String, Integer> ruleIndexes = Utils.toMap(parser.getRuleNames());
Map<String, Integer> tokenTypes = Utils.toMap(parser.getTokenNames());
Integer ttype = tokenTypes.get(word);
Integer ruleIndex = ruleIndexes.get(word);
switch ( wordToken.getType() ) {
case XPathLexer.WILDCARD :
return anywhere ?
new XPathWildcardAnywhereElement() :
new XPathWildcardElement();
case XPathLexer.TOKEN_REF :
case XPathLexer.STRING :
if ( ttype==null ) {
throw new IllegalArgumentException(word+
" at index "+
wordToken.getStartIndex()+
" isn't a valid token name");
}
return anywhere ?
new XPathTokenAnywhereElement(word, ttype) :
new XPathTokenElement(word, ttype);
default :
if ( ruleIndex==null ) {
throw new IllegalArgumentException(word+
" at index "+
wordToken.getStartIndex()+
" isn't a valid rule name");
}
return anywhere ?
new XPathRuleAnywhereElement(word, ruleIndex) :
new XPathRuleElement(word, ruleIndex);
}
}
public static Collection<ParseTree> findAll(ParseTree tree, String xpath, Parser parser) {
XPath p = new XPath(parser, xpath);
return p.evaluate(tree);
}
/** Return a list of all nodes starting at t as root that satisfy the path.
* The root / is relative to the node passed to evaluate().
*/
public Collection<ParseTree> evaluate(final ParseTree t) {
ParserRuleContext dummyRoot = new ParserRuleContext();
dummyRoot.children = new ArrayList<ParseTree>() {{add(t);}}; // don't set t's parent.
Collection<ParseTree> work = new ArrayList<ParseTree>();
work.add(dummyRoot);
int i = 0;
while ( i < elements.length ) {
Collection<ParseTree> next = new ArrayList<ParseTree>();
for (ParseTree node : work) {
if ( node.getChildCount()>0 ) {
// only try to match next element if it has children
// e.g., //func/*/stat might have a token node for which
// we can't go looking for stat nodes.
Collection<? extends ParseTree> matching = elements[i].evaluate(node);
next.addAll(matching);
}
}
i++;
work = next;
}
return work;
}
}

View File

@ -0,0 +1,26 @@
package org.antlr.v4.runtime.tree.xpath;
import org.antlr.v4.runtime.tree.ParseTree;
import java.util.Collection;
public abstract class XPathElement {
protected String nodeName;
protected boolean invert;
/** Construct element like /ID or or ID or "/*" etc...
* op is null if just node
*/
public XPathElement(String nodeName) {
this.nodeName = nodeName;
}
/** Given tree rooted at t return all nodes matched by this path element */
public abstract Collection<ParseTree> evaluate(ParseTree t);
@Override
public String toString() {
String inv = invert ? "!" : "";
return getClass().getSimpleName()+"["+inv+nodeName+"]";
}
}

View File

@ -0,0 +1,63 @@
lexer grammar XPathLexer;
tokens { TOKEN_REF, RULE_REF }
/*
path : separator? word (separator word)* EOF ;
separator
: '/' '!'
| '//' '!'
| '/'
| '//'
;
word: TOKEN_REF
| RULE_REF
| STRING
| '*'
;
*/
ANYWHERE : '//' ;
ROOT : '/' ;
WILDCARD : '*' ;
BANG : '!' ;
ID : NameStartChar NameChar*
{
String text = getText();
if ( Character.isUpperCase(text.charAt(0)) ) setType(TOKEN_REF);
else setType(RULE_REF);
}
;
fragment
NameChar : NameStartChar
| '0'..'9'
| '_'
| '\u00B7'
| '\u0300'..'\u036F'
| '\u203F'..'\u2040'
;
fragment
NameStartChar
: 'A'..'Z' | 'a'..'z'
| '\u00C0'..'\u00D6'
| '\u00D8'..'\u00F6'
| '\u00F8'..'\u02FF'
| '\u0370'..'\u037D'
| '\u037F'..'\u1FFF'
| '\u200C'..'\u200D'
| '\u2070'..'\u218F'
| '\u2C00'..'\u2FEF'
| '\u3001'..'\uD7FF'
| '\uF900'..'\uFDCF'
| '\uFDF0'..'\uFFFD'
; // ignores | ['\u10000-'\uEFFFF] ;
STRING : '\'' .*? '\'' ;
//WS : [ \t\r\n]+ -> skip ;

View File

@ -0,0 +1,14 @@
package org.antlr.v4.runtime.tree.xpath;
import org.antlr.v4.runtime.BaseErrorListener;
import org.antlr.v4.runtime.RecognitionException;
import org.antlr.v4.runtime.Recognizer;
public class XPathLexerErrorListener extends BaseErrorListener {
@Override
public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol,
int line, int charPositionInLine, String msg,
RecognitionException e)
{
}
}

View File

@ -0,0 +1,20 @@
package org.antlr.v4.runtime.tree.xpath;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.Trees;
import java.util.Collection;
/** Either ID at start of path or ...//ID in middle of path */
public class XPathRuleAnywhereElement extends XPathElement {
protected int ruleIndex;
public XPathRuleAnywhereElement(String ruleName, int ruleIndex) {
super(ruleName);
this.ruleIndex = ruleIndex;
}
@Override
public Collection<ParseTree> evaluate(ParseTree t) {
return Trees.findAllRuleNodes(t, ruleIndex);
}
}

View File

@ -0,0 +1,35 @@
package org.antlr.v4.runtime.tree.xpath;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.Tree;
import org.antlr.v4.runtime.tree.Trees;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
public class XPathRuleElement extends XPathElement {
protected int ruleIndex;
public XPathRuleElement(String ruleName, int ruleIndex) {
super(ruleName);
this.ruleIndex = ruleIndex;
}
@Override
public Collection<ParseTree> evaluate(ParseTree t) {
// return all children of t that match nodeName
List<ParseTree> nodes = new ArrayList<ParseTree>();
for (Tree c : Trees.getChildren(t)) {
if ( c instanceof ParserRuleContext ) {
ParserRuleContext ctx = (ParserRuleContext)c;
if ( (ctx.getRuleIndex() == ruleIndex && !invert) ||
(ctx.getRuleIndex() != ruleIndex && invert) )
{
nodes.add(ctx);
}
}
}
return nodes;
}
}

View File

@ -0,0 +1,19 @@
package org.antlr.v4.runtime.tree.xpath;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.Trees;
import java.util.Collection;
public class XPathTokenAnywhereElement extends XPathElement {
protected int tokenType;
public XPathTokenAnywhereElement(String tokenName, int tokenType) {
super(tokenName);
this.tokenType = tokenType;
}
@Override
public Collection<ParseTree> evaluate(ParseTree t) {
return Trees.findAllTokenNodes(t, tokenType);
}
}

View File

@ -0,0 +1,35 @@
package org.antlr.v4.runtime.tree.xpath;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.TerminalNode;
import org.antlr.v4.runtime.tree.Tree;
import org.antlr.v4.runtime.tree.Trees;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
public class XPathTokenElement extends XPathElement {
protected int tokenType;
public XPathTokenElement(String tokenName, int tokenType) {
super(tokenName);
this.tokenType = tokenType;
}
@Override
public Collection<ParseTree> evaluate(ParseTree t) {
// return all children of t that match nodeName
List<ParseTree> nodes = new ArrayList<ParseTree>();
for (Tree c : Trees.getChildren(t)) {
if ( c instanceof TerminalNode ) {
TerminalNode tnode = (TerminalNode)c;
if ( (tnode.getSymbol().getType() == tokenType && !invert) ||
(tnode.getSymbol().getType() != tokenType && invert) )
{
nodes.add(tnode);
}
}
}
return nodes;
}
}

View File

@ -0,0 +1,19 @@
package org.antlr.v4.runtime.tree.xpath;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.Trees;
import java.util.ArrayList;
import java.util.Collection;
public class XPathWildcardAnywhereElement extends XPathElement {
public XPathWildcardAnywhereElement() {
super(XPath.WILDCARD);
}
@Override
public Collection<ParseTree> evaluate(ParseTree t) {
if ( invert ) return new ArrayList<ParseTree>(); // !* is weird but valid (empty)
return Trees.descendants(t);
}
}

View File

@ -0,0 +1,26 @@
package org.antlr.v4.runtime.tree.xpath;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.Tree;
import org.antlr.v4.runtime.tree.Trees;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
public class XPathWildcardElement extends XPathElement {
public XPathWildcardElement() {
super(XPath.WILDCARD);
}
@Override
public Collection<ParseTree> evaluate(final ParseTree t) {
if ( invert ) return new ArrayList<ParseTree>(); // !* is weird but valid (empty)
List<ParseTree> kids = new ArrayList<ParseTree>();
for (Tree c : Trees.getChildren(t)) {
kids.add((ParseTree)c);
}
return kids;
}
}

View File

@ -42,6 +42,7 @@ import org.antlr.v4.runtime.CommonToken;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.IntStream;
import org.antlr.v4.runtime.Lexer;
import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.RuleContext;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.TokenSource;
@ -57,6 +58,8 @@ import org.antlr.v4.runtime.misc.IntegerList;
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
import org.antlr.v4.runtime.misc.Pair;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.semantics.SemanticPipeline;
import org.antlr.v4.tool.ANTLRMessage;
import org.antlr.v4.tool.DOTGenerator;
@ -64,7 +67,11 @@ import org.antlr.v4.tool.DefaultToolListener;
import org.antlr.v4.tool.Grammar;
import org.antlr.v4.tool.GrammarSemanticsMessage;
import org.antlr.v4.tool.LexerGrammar;
import org.antlr.v4.tool.Rule;
import org.junit.Before;
import org.junit.rules.TestRule;
import org.junit.rules.TestWatcher;
import org.junit.runner.Description;
import org.stringtemplate.v4.ST;
import org.stringtemplate.v4.STGroup;
import org.stringtemplate.v4.STGroupString;
@ -73,10 +80,6 @@ import javax.tools.JavaCompiler;
import javax.tools.JavaFileObject;
import javax.tools.StandardJavaFileManager;
import javax.tools.ToolProvider;
import org.antlr.v4.tool.Rule;
import org.junit.rules.TestRule;
import org.junit.rules.TestWatcher;
import org.junit.runner.Description;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
@ -87,6 +90,8 @@ import java.io.InputStreamReader;
import java.io.PipedInputStream;
import java.io.PipedOutputStream;
import java.io.PrintStream;
import java.io.StringReader;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.net.MalformedURLException;
@ -104,7 +109,11 @@ import java.util.TreeMap;
import java.util.logging.Level;
import java.util.logging.Logger;
import static org.junit.Assert.*;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
public abstract class BaseTest {
// -J-Dorg.antlr.v4.test.BaseTest.level=FINE
@ -478,6 +487,69 @@ public abstract class BaseTest {
return output;
}
public ParseTree execParser(String startRuleName, String input,
String parserName, String lexerName)
throws Exception
{
Pair<Parser, Lexer> pl = getParserAndLexer(input, parserName, lexerName);
Parser parser = pl.a;
return execStartRule(startRuleName, parser);
}
public ParseTree execStartRule(String startRuleName, Parser parser)
throws IllegalAccessException, InvocationTargetException,
NoSuchMethodException
{
Method startRule = null;
Object[] args = null;
try {
startRule = parser.getClass().getMethod(startRuleName);
}
catch (NoSuchMethodException nsme) {
// try with int _p arg for recursive func
startRule = parser.getClass().getMethod(startRuleName, int.class);
args = new Integer[] {0};
}
ParseTree result = (ParseTree)startRule.invoke(parser, args);
// System.out.println("parse tree = "+result.toStringTree(parser));
return result;
}
public Pair<Parser, Lexer> getParserAndLexer(String input,
String parserName, String lexerName)
throws Exception
{
final Class<? extends Lexer> lexerClass = loadLexerClassFromTempDir(lexerName);
final Class<? extends Parser> parserClass = loadParserClassFromTempDir(parserName);
ANTLRInputStream in = new ANTLRInputStream(new StringReader(input));
Class<? extends Lexer> c = lexerClass.asSubclass(Lexer.class);
Constructor<? extends Lexer> ctor = c.getConstructor(CharStream.class);
Lexer lexer = ctor.newInstance(in);
Class<? extends Parser> pc = parserClass.asSubclass(Parser.class);
Constructor<? extends Parser> pctor = pc.getConstructor(TokenStream.class);
CommonTokenStream tokens = new CommonTokenStream(lexer);
Parser parser = pctor.newInstance(tokens);
return new Pair<Parser, Lexer>(parser, lexer);
}
public Class<?> loadClassFromTempDir(String name) throws Exception {
ClassLoader loader =
new URLClassLoader(new URL[] { new File(tmpdir).toURI().toURL() },
ClassLoader.getSystemClassLoader());
return loader.loadClass(name);
}
public Class<? extends Lexer> loadLexerClassFromTempDir(String name) throws Exception {
return (Class<? extends Lexer>)loadClassFromTempDir(name);
}
public Class<? extends Parser> loadParserClassFromTempDir(String name) throws Exception {
return (Class<? extends Parser>)loadClassFromTempDir(name);
}
protected String execParser(String grammarFileName,
String grammarStr,
String parserName,

View File

@ -0,0 +1,241 @@
package org.antlr.v4.test;
import org.antlr.v4.runtime.Lexer;
import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.RuleContext;
import org.antlr.v4.runtime.misc.Pair;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.TerminalNode;
import org.antlr.v4.runtime.tree.xpath.XPath;
import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
public class TestXPath extends BaseTest {
public static final String grammar =
"grammar Expr;\n" +
"prog: func+ ;\n" +
"func: 'def' ID '(' arg (',' arg)* ')' body ;\n" +
"body: '{' stat+ '}' ;\n" +
"arg : ID ;\n" +
"stat: expr ';' # printExpr\n" +
" | ID '=' expr ';' # assign\n" +
" | 'return' expr ';' # ret\n" +
" | ';' # blank\n" +
" ;\n" +
"expr: expr ('*'|'/') expr # MulDiv\n" +
" | expr ('+'|'-') expr # AddSub\n" +
" | primary # prim\n" +
" ;\n" +
"primary" +
" : INT # int\n" +
" | ID # id\n" +
" | '(' expr ')' # parens\n" +
" ;" +
"\n" +
"MUL : '*' ; // assigns token name to '*' used above in grammar\n" +
"DIV : '/' ;\n" +
"ADD : '+' ;\n" +
"SUB : '-' ;\n" +
"ID : [a-zA-Z]+ ; // match identifiers\n" +
"INT : [0-9]+ ; // match integers\n" +
"NEWLINE:'\\r'? '\\n' -> skip; // return newlines to parser (is end-statement signal)\n" +
"WS : [ \\t]+ -> skip ; // toss out whitespace\n";
@Test public void testValidPaths() throws Exception {
boolean ok =
rawGenerateAndBuildRecognizer("Expr.g4", grammar, "ExprParser",
"ExprLexer", false);
assertTrue(ok);
String input =
"def f(x,y) { x = 3+4; y; ; }\n" +
"def g(x) { return 1+2*x; }\n";
String xpath[] = {
"/prog/func", // all funcs under prog at root
"/prog/*", // all children of prog at root
"/*/func", // all func kids of any root node
"prog", // prog must be root node
"/prog", // prog must be root node
"/*", // any root
"*", // any root
"//ID", // any ID in tree
"//expr/primary/ID",// any ID child of a primary under any expr
"//body//ID", // any ID under a body
"//'return'", // any 'return' literal in tree
"//primary/*", // all kids of any primary
"//func/*/stat", // all stat nodes grandkids of any func node
"/prog/func/'def'", // all def literal kids of func kid of prog
"//stat/';'", // all ';' under any stat node
"//expr/primary/!ID", // anything but ID under primary under any expr node
"//expr/!primary", // anything but primary under any expr node
"//!*", // nothing anywhere
"/!*", // nothing at root
};
String expected[] = {
"[func, func]",
"[func, func]",
"[func, func]",
"[prog]",
"[prog]",
"[prog]",
"[prog]",
"[f, x, y, x, y, g, x, x]",
"[y, x]",
"[x, y, x]",
"[return]",
"[3, 4, y, 1, 2, x]",
"[stat, stat, stat, stat]",
"[def, def]",
"[;, ;, ;, ;]",
"[3, 4, 1, 2]",
"[expr, expr, expr, expr, expr, expr]",
"[]",
"[]",
};
for (int i=0; i<xpath.length; i++) {
List<String> nodes = getNodeStrings(input, xpath[i], "prog", "ExprParser", "ExprLexer");
String result = nodes.toString();
assertEquals("path "+xpath[i]+" failed", expected[i], result);
}
}
@Test public void testWeirdChar() throws Exception {
boolean ok =
rawGenerateAndBuildRecognizer("Expr.g4", grammar, "ExprParser",
"ExprLexer", false);
assertTrue(ok);
String input =
"def f(x,y) { x = 3+4; y; ; }\n" +
"def g(x) { return 1+2*x; }\n";
String path = "&";
String expected = "Invalid tokens or characters at index 0 in path '&'";
testError(input, path, expected, "prog", "ExprParser", "ExprLexer");
}
@Test public void testWeirdChar2() throws Exception {
boolean ok =
rawGenerateAndBuildRecognizer("Expr.g4", grammar, "ExprParser",
"ExprLexer", false);
assertTrue(ok);
String input =
"def f(x,y) { x = 3+4; y; ; }\n" +
"def g(x) { return 1+2*x; }\n";
String path = "//w&e/";
String expected = "Invalid tokens or characters at index 3 in path '//w&e/'";
testError(input, path, expected, "prog", "ExprParser", "ExprLexer");
}
@Test public void testBadSyntax() throws Exception {
boolean ok =
rawGenerateAndBuildRecognizer("Expr.g4", grammar, "ExprParser",
"ExprLexer", false);
assertTrue(ok);
String input =
"def f(x,y) { x = 3+4; y; ; }\n" +
"def g(x) { return 1+2*x; }\n";
String path = "///";
String expected = "/ at index 2 isn't a valid rule name";
testError(input, path, expected, "prog", "ExprParser", "ExprLexer");
}
@Test public void testMissingWordAtEnd() throws Exception {
boolean ok =
rawGenerateAndBuildRecognizer("Expr.g4", grammar, "ExprParser",
"ExprLexer", false);
assertTrue(ok);
String input =
"def f(x,y) { x = 3+4; y; ; }\n" +
"def g(x) { return 1+2*x; }\n";
String path = "//";
String expected = "Missing path element at end of path";
testError(input, path, expected, "prog", "ExprParser", "ExprLexer");
}
@Test public void testBadTokenName() throws Exception {
boolean ok =
rawGenerateAndBuildRecognizer("Expr.g4", grammar, "ExprParser",
"ExprLexer", false);
assertTrue(ok);
String input =
"def f(x,y) { x = 3+4; y; ; }\n" +
"def g(x) { return 1+2*x; }\n";
String path = "//Ick";
String expected = "Ick at index 2 isn't a valid token name";
testError(input, path, expected, "prog", "ExprParser", "ExprLexer");
}
@Test public void testBadRuleName() throws Exception {
boolean ok =
rawGenerateAndBuildRecognizer("Expr.g4", grammar, "ExprParser",
"ExprLexer", false);
assertTrue(ok);
String input =
"def f(x,y) { x = 3+4; y; ; }\n" +
"def g(x) { return 1+2*x; }\n";
String path = "/prog/ick";
String expected = "ick at index 6 isn't a valid rule name";
testError(input, path, expected, "prog", "ExprParser", "ExprLexer");
}
protected void testError(String input, String path, String expected,
String startRuleName,
String parserName, String lexerName)
throws Exception
{
Pair<Parser, Lexer> pl = getParserAndLexer(input, parserName, lexerName);
Parser parser = pl.a;
ParseTree tree = execStartRule(startRuleName, parser);
IllegalArgumentException e = null;
try {
XPath.findAll(tree, path, parser);
}
catch (IllegalArgumentException iae) {
e = iae;
}
assertNotNull(e);
assertEquals(expected, e.getMessage());
}
public List<String> getNodeStrings(String input, String xpath,
String startRuleName,
String parserName, String lexerName)
throws Exception
{
Pair<Parser, Lexer> pl = getParserAndLexer(input, parserName, lexerName);
Parser parser = pl.a;
ParseTree tree = execStartRule(startRuleName, parser);
List<String> nodes = new ArrayList<String>();
for (ParseTree t : XPath.findAll(tree, xpath, parser) ) {
if ( t instanceof RuleContext) {
RuleContext r = (RuleContext)t;
nodes.add(parser.getRuleNames()[r.getRuleIndex()]);
}
else {
TerminalNode token = (TerminalNode)t;
nodes.add(token.getText());
}
}
return nodes;
}
}