diff --git a/doc/interpreters.md b/doc/interpreters.md index c387ae6b2..3be57f12b 100644 --- a/doc/interpreters.md +++ b/doc/interpreters.md @@ -1,8 +1,14 @@ -# Parser and lexer interpreters +# Parser and Lexer Interpreters *Since ANTLR 4.2* -For small parsing tasks it is sometimes convenient to use ANTLR in interpreted mode, rather than generating a parser in a particular target, compiling it and running it as part of your application. Here's some sample code that creates lexer and parser Grammar objects and then creates interpreters. Once we have a ParserInterpreter, we can use it to parse starting in any rule we like, given a rule index (which the Grammar can provide). +For small parsing tasks it is sometimes convenient to use ANTLR in interpreted mode, rather than generating a parser in a particular target, compiling it and running it as part of your application. Here's some sample code that creates lexer and parser Grammar objects and then creates interpreters. Once we have a ParserInterpreter, we can use it to parse starting in any rule we like, given a rule index (which the grammar + the parser can provide). + +## Action Code + +Since interpreters don't use generated parsers + lexers they cannot execute any action code (including predicates). That means the interpreter runs as if there were no predicates at all. If your grammar requires action code in order to parse correctly you will not be able to test it using this approach. + +## Java Target Interpreter Setup ```java LexerGrammar lg = new LexerGrammar( @@ -76,4 +82,49 @@ ParseTree t = parse(fileName, XMLLexerGrammar, XMLParserGrammar, "document"); This is also how we will integrate instantaneous parsing into ANTLRWorks2 and development environment plug-ins. -See [TestParserInterpreter.java](https://github.com/antlr/antlr4/blob/master/tool-testsuite/test/org/antlr/v4/test/tool/TestParserInterpreter.java). +See [TestParserInterpreter.java](../tool-testsuite/test/org/antlr/v4/test/tool/TestParserInterpreter.java). + +## Non-Java Target Interpreter Setup +The ANTLR4 runtimes do not contain any grammar parsing classes (they are in the ANTLR4 tool jar). Hence we cannot use `LexerGrammar` and `Grammar` to parse grammars for the interpreter. Instead we directly instantiate `LexerInterpreter` and `ParserInterpreter` objects. They require some data (namely symbol information and the ATNs) which only the ANTLR4 tool can give us. However, on each generation run ANTLR not only produces your parser + lexer files but also interpreter data files (*.interp) which contain all you need to feed the interpreters. + +A support class (`InterpreterDataReader`) is used to load the data for your convenience, which makes this very easy to use. Btw. even the Java target go this route instead of using the non-runtime classes `Grammar` and `LexerGrammar`. Sometimes it might not be feasible to use the tool jar for whatever reason. + +Here's how the setup looks like (C++ example): + +```cpp +/** + * sourceFileName - name of the file with content to parse + * lexerName - the name of your lexer (arbitrary, that's what is used in error messages) + * parserName - ditto for the parser + * lexerDataFileName - the lexer interpeter data file name (e.g. `/ExprLexer.interp`) + * parserDataFileName - ditto for the parser (e.g. `/Expr.interp`) + * startRule - the name of the rule to start parsing at + */ +void parse(std::string const& sourceFileName, + std::string const& lexerName, std::string const& parserName, + std::string const& lexerDataFileName, std::string const& parserDataFileName, + std::string const& startRule) { + + InterpreterData lexerData = InterpreterDataReader::parseFile(lexerDataFileName); + InterpreterData parserData = InterpreterDataReader::parseFile(parserDataFileName); + + ANTLRFileStream input(sourceFileName); + LexerInterpreter lexEngine(lexerName, lexerData.vocabulary, lexerData.ruleNames, + lexerData.channels, lexerData.modes, lexerData.atn, &input); + CommonTokenStream tokens(&lexEngine); + + /* Remove comment to print the tokens. + tokens.fill(); + std::cout << "INPUT:" << std::endl; + for (auto token : tokens.getTokens()) { + std::cout << token->toString() << std::endl; + } + */ + + ParserInterpreter parser(parserName, parserData.vocabulary, parserData.ruleNames, + parserData.atn, &tokens); + tree::ParseTree *tree = parser.parse(parser.getRuleIndex(startRule)); + + std::cout << "parse tree: " << tree->toStringTree(&parser) << std::endl; +} +``` diff --git a/tool/src/org/antlr/v4/Tool.java b/tool/src/org/antlr/v4/Tool.java index a15b99911..9a0091977 100644 --- a/tool/src/org/antlr/v4/Tool.java +++ b/tool/src/org/antlr/v4/Tool.java @@ -104,7 +104,6 @@ public class Tool { public String outputDirectory; public String libDirectory; public boolean generate_ATN_dot = false; - public boolean genInterpreterData = false; public String grammarEncoding = null; // use default locale's encoding public String msgFormat = "antlr"; public boolean launch_ST_inspector = false; @@ -123,7 +122,6 @@ public class Tool { new Option("outputDirectory", "-o", OptionArgType.STRING, "specify output directory where all output is generated"), new Option("libDirectory", "-lib", OptionArgType.STRING, "specify location of grammars, tokens files"), new Option("generate_ATN_dot", "-atn", "generate rule augmented transition network diagrams"), - new Option("genInterpreterData", "-interpreter", "generate only data required for interpreters"), new Option("grammarEncoding", "-encoding", OptionArgType.STRING, "specify grammar file encoding; e.g., euc-jp"), new Option("msgFormat", "-message-format", OptionArgType.STRING, "specify output style for messages in antlr, gnu, vs2005"), new Option("longMessages", "-long-messages", "show exception details when available for errors and warnings"), @@ -394,10 +392,7 @@ public class Tool { if ( generate_ATN_dot ) generateATNs(g); - if ( genInterpreterData ) { - generateInterpreterData(g); - return; - } + generateInterpreterData(g); // PERFORM GRAMMAR ANALYSIS ON ATN: BUILD DECISION DFAs AnalysisPipeline anal = new AnalysisPipeline(g); @@ -752,7 +747,7 @@ public class Tool { content.append(serializedATN.toString()); try { - Writer fw = getOutputFileWriter(ig, ig.name + ".interpreter.txt"); + Writer fw = getOutputFileWriter(ig, ig.name + ".interp"); try { fw.write(content.toString()); }