Merge pull request #405 from parrt/lexer-interp

Lexer interp
2014-01-14 18:05:22 -08:00 · 2014-01-14 18:05:22 -08:00 · f4e5b23baf
parent 28589361db 8f3c01fb3b
commit f4e5b23baf
6 changed files with 66 additions and 27 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -1,5 +1,19 @@
 ANTLR v4 Honey Badger

+December 29, 2013
+
+* Internal change: Tool.loadGrammar() -> parseGrammar(). Tool.load()->parse()
+
+* Added Tool.loadGrammar(fileName) that completely parses, extracts implicit lexer,
+  and processes into Grammar object. Does not geneate code.  Use
+  Grammar.getImplicitLexer() to get the lexer created during processing of
+  combined grammar.
+
+* Added Grammar.load(fileName) that creates Tool object for you. loadGrammar()
+  lets you create your own Tool for setting error handlers etc...
+
+	final Grammar g = Grammar.load("/tmp/MyGrammar.g4");
+
 December 19, 2013

 * Sam:
@ -14,19 +28,19 @@ November 24, 2013

 * Ter adds tree pattern matching.  Preferred interface:

-	   ParseTree t = parser.expr();
-	   ParseTreePattern p = parser.compileParseTreePattern("<ID>+0", MyParser.RULE_expr);
-	   ParseTreeMatch m = p.match(t);
-	   String id = m.get("ID");
+	ParseTree t = parser.expr();
+	ParseTreePattern p = parser.compileParseTreePattern("<ID>+0", MyParser.RULE_expr);
+	ParseTreeMatch m = p.match(t);
+	String id = m.get("ID");

  or

-		String xpath = "//blockStatement/*";
-		String treePattern = "int <Identifier> = <expression>;";
-		ParseTreePattern p =
-			parser.compileParseTreePattern(treePattern,
-										   JavaParser.RULE_localVariableDeclarationStatement);
-		List<ParseTreeMatch> matches = p.findAll(tree, xpath);
+	String xpath = "//blockStatement/*";
+	String treePattern = "int <Identifier> = <expression>;";
+	ParseTreePattern p =
+		parser.compileParseTreePattern(treePattern,
+									   JavaParser.RULE_localVariableDeclarationStatement);
+	List<ParseTreeMatch> matches = p.findAll(tree, xpath);

 November 20, 2013

--- a/README.md
+++ b/README.md
@ -3,12 +3,12 @@
 **ANTLR** (ANother Tool for Language Recognition) is a powerful parser generator for reading, processing, executing, or translating structured text or binary files. It's widely used to build languages, tools, and frameworks. From a grammar, ANTLR generates a parser that can build and walk parse trees.

 * [Wikipedia](https://en.wikipedia.org/wiki/ANTLR)
-* [Official site](http://www.antlr3.org/)
+* [Official site](http://www.antlr.org/)
 * [ANTLR v3](http://www.antlr3.org/)

 ## INTRODUCTION

-Hi and welcome to the Honey Badger 4.1 release of ANTLR!
+Hi and welcome to the Honey Badger 4.1 release (June 30, 2013) of ANTLR!

 ## INSTALLATION

@ -124,8 +124,10 @@ Programmers run into parsing problems all the time. Whether it’s a data format
 root directory name is the all-lowercase name of the language parsed
 by the grammar. For example, java, cpp, csharp, c, etc...

-## Author
+## Authors

 [Terence Parr](http://www.cs.usfca.edu/~parrt/), parrt@cs.usfca.edu
 ANTLR project lead and supreme dictator for life<br/>
 [University of San Francisco](http://www.usfca.edu/)
+
+[Sam Harwell](http://tunnelvisionlabs.com/)
--- a/runtime/Java/src/org/antlr/v4/runtime/LexerInterpreter.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/LexerInterpreter.java
@ -30,8 +30,6 @@

 package org.antlr.v4.runtime;

-import org.antlr.v4.runtime.CharStream;
-import org.antlr.v4.runtime.Lexer;
 import org.antlr.v4.runtime.atn.ATN;
 import org.antlr.v4.runtime.atn.ATNType;
 import org.antlr.v4.runtime.atn.LexerATNSimulator;
@ -69,7 +67,7 @@ public class LexerInterpreter extends Lexer {
 		for (int i = 0; i < _decisionToDFA.length; i++) {
 			_decisionToDFA[i] = new DFA(atn.getDecisionState(i), i);
 		}
-		this._interp = new LexerATNSimulator(atn,_decisionToDFA,_sharedContextCache);
+		this._interp = new LexerATNSimulator(this,atn,_decisionToDFA,_sharedContextCache);
 	}

 	@Override
--- a/runtime/Java/src/org/antlr/v4/runtime/tree/pattern/ParseTreeMatch.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/tree/pattern/ParseTreeMatch.java
@ -115,7 +115,7 @@ public class ParseTreeMatch {
 	@Nullable
 	public ParseTree get(String label) {
 		List<ParseTree> parseTrees = labels.get(label);
-		if ( parseTrees==null ) {
+		if ( parseTrees==null || parseTrees.size()==0 ) {
 			return null;
 		}

--- a/tool/src/org/antlr/v4/Tool.java
+++ b/tool/src/org/antlr/v4/Tool.java
@ -490,7 +490,7 @@ public class Tool {
 		Graph<String> g = new Graph<String>();
 		List<GrammarRootAST> roots = new ArrayList<GrammarRootAST>();
 		for (String fileName : fileNames) {
-			GrammarAST t = loadGrammar(fileName);
+			GrammarAST t = parseGrammar(fileName);
 			if ( t==null || t instanceof GrammarASTErrorNode) continue; // came back as error node
 			if ( ((GrammarRootAST)t).hasErrors ) continue;
 			GrammarRootAST root = (GrammarRootAST)t;
@ -558,7 +558,7 @@ public class Tool {
 		return g;
 	}

-	public GrammarRootAST loadGrammar(String fileName) {
+	public GrammarRootAST parseGrammar(String fileName) {
 		try {
 			File file = new File(fileName);
 			if (!file.isAbsolute()) {
@ -566,7 +566,7 @@ public class Tool {
 			}

 			ANTLRFileStream in = new ANTLRFileStream(file.getAbsolutePath(), grammarEncoding);
-			GrammarRootAST t = load(fileName, in);
+			GrammarRootAST t = parse(fileName, in);
 			return t;
 		}
 		catch (IOException ioe) {
@ -575,6 +575,18 @@ public class Tool {
 		return null;
 	}

+	/** Convenience method to load and process an ANTLR grammar. Useful
+	 *  when creating interpreters.  If you need to access to the lexer
+	 *  grammar created while processing a combined grammar, use
+	 *  getImplicitLexer() on returned grammar.
+	 */
+	public Grammar loadGrammar(String fileName) {
+		GrammarRootAST grammarRootAST = parseGrammar(fileName);
+		final Grammar g = createGrammar(grammarRootAST);
+		process(g, false);
+		return g;
+	}
+
 	/**
 	 * Try current dir then dir of g then lib dir
 	 * @param g
@ -596,17 +608,17 @@ public class Tool {
 		}

 		ANTLRFileStream in = new ANTLRFileStream(importedFile.getAbsolutePath());
-		GrammarRootAST root = load(g.fileName, in);
+		GrammarRootAST root = parse(g.fileName, in);
 		Grammar imported = createGrammar(root);
 		imported.fileName = importedFile.getAbsolutePath();
 		return imported;
 	}

-	public GrammarRootAST loadFromString(String grammar) {
-		return load("<string>", new ANTLRStringStream(grammar));
+	public GrammarRootAST parseGrammarFromString(String grammar) {
+		return parse("<string>", new ANTLRStringStream(grammar));
 	}

-	public GrammarRootAST load(String fileName, CharStream in) {
+	public GrammarRootAST parse(String fileName, CharStream in) {
 		try {
 			GrammarASTAdaptor adaptor = new GrammarASTAdaptor(in);
 			ToolANTLRLexer lexer = new ToolANTLRLexer(in, this);
--- a/tool/src/org/antlr/v4/tool/Grammar.java
+++ b/tool/src/org/antlr/v4/tool/Grammar.java
@ -273,7 +273,7 @@ public class Grammar implements AttributeResolver {
 		org.antlr.runtime.ANTLRStringStream in = new org.antlr.runtime.ANTLRStringStream(grammarText);
 		in.name = fileName;

-		this.ast = tool.load(fileName, in);
+		this.ast = tool.parse(fileName, in);
 		if ( ast==null ) {
 			throw new UnsupportedOperationException();
 		}
@ -427,7 +427,17 @@ public class Grammar implements AttributeResolver {
    }
 */

-    /** Return list of imported grammars from root down to our parent.
+	public LexerGrammar getImplicitLexer() {
+		return implicitLexer;
+	}
+
+	/** convenience method for Tool.loadGrammar() */
+	public static Grammar load(String fileName) {
+		Tool antlr = new Tool();
+		return antlr.loadGrammar(fileName);
+	}
+
+	/** Return list of imported grammars from root down to our parent.
     *  Order is [root, ..., this.parent].  (us not included).
     */
    public List<Grammar> getGrammarAncestors() {
@ -556,7 +566,10 @@ public class Grammar implements AttributeResolver {
 		String[] tokenNames = new String[numTokens+1];
 		for (String tokenName : tokenNameToTypeMap.keySet()) {
 			Integer ttype = tokenNameToTypeMap.get(tokenName);
-			if ( tokenName!=null && tokenName.startsWith(AUTO_GENERATED_TOKEN_NAME_PREFIX) ) {
+			if ( tokenName!=null &&
+                 tokenName.startsWith(AUTO_GENERATED_TOKEN_NAME_PREFIX) &&
+                 ttype < typeToStringLiteralList.size() )
+            {
 				tokenName = typeToStringLiteralList.get(ttype);
 			}
 			if ( ttype>0 ) tokenNames[ttype] = tokenName;