Merge branch 'master' of https://github.com/antlr/antlr4 into fix_1233
This commit is contained in:
commit
a077d3d072
|
@ -1,5 +1,7 @@
|
|||
# ANTLR v4
|
||||
|
||||
[![Build Status](https://travis-ci.org/antlr/antlr4.png?branch=master)](https://travis-ci.org/antlr/antlr4) [![Java 7+](https://img.shields.io/badge/java-7+-4c7e9f.svg)](http://java.oracle.com) [![License](https://img.shields.io/badge/license-BSD-blue.svg)](https://raw.githubusercontent.com/antlr/antlr4/master/LICENSE.txt)
|
||||
|
||||
**ANTLR** (ANother Tool for Language Recognition) is a powerful parser generator for reading, processing, executing, or translating structured text or binary files. It's widely used to build languages, tools, and frameworks. From a grammar, ANTLR generates a parser that can build parse trees and also generates a listener interface (or visitor) that makes it easy to respond to the recognition of phrases of interest.
|
||||
|
||||
*Given day-job constraints, my time working on this project is limited so I'll have to focus first on fixing bugs rather than changing/improving the feature set. Likely I'll do it in bursts every few months. Please do not be offended if your bug or pull request does not yield a response! --parrt*
|
||||
|
@ -45,8 +47,3 @@ You will find the [Book source code](http://pragprog.com/titles/tpantlr2/source_
|
|||
[This repository](https://github.com/antlr/grammars-v4) is a collection of grammars without actions where the
|
||||
root directory name is the all-lowercase name of the language parsed
|
||||
by the grammar. For example, java, cpp, csharp, c, etc...
|
||||
|
||||
Travis Status
|
||||
---------
|
||||
|
||||
<a href="https://travis-ci.org/antlr/antlr4"><img src="https://api.travis-ci.org/antlr/antlr4.png"></a>
|
||||
|
|
|
@ -95,7 +95,13 @@ public class Antlr4Mojo extends AbstractMojo {
|
|||
* specify grammar file encoding; e.g., euc-jp
|
||||
*/
|
||||
@Parameter(property = "project.build.sourceEncoding")
|
||||
protected String encoding;
|
||||
protected String inputEncoding;
|
||||
|
||||
/**
|
||||
* specify output file encoding; defaults to source encoding
|
||||
*/
|
||||
@Parameter(property = "project.build.sourceEncoding")
|
||||
protected String outputEncoding;
|
||||
|
||||
/**
|
||||
* Generate parse tree listener interface and base class.
|
||||
|
@ -336,9 +342,9 @@ public class Antlr4Mojo extends AbstractMojo {
|
|||
args.add("-atn");
|
||||
}
|
||||
|
||||
if (encoding != null && !encoding.isEmpty()) {
|
||||
if ( inputEncoding!= null && !inputEncoding.isEmpty()) {
|
||||
args.add("-encoding");
|
||||
args.add(encoding);
|
||||
args.add(inputEncoding);
|
||||
}
|
||||
|
||||
if (listener) {
|
||||
|
@ -546,7 +552,7 @@ public class Antlr4Mojo extends AbstractMojo {
|
|||
URI relativePath = project.getBasedir().toURI().relativize(outputFile.toURI());
|
||||
getLog().debug(" Writing file: " + relativePath);
|
||||
OutputStream outputStream = buildContext.newFileOutputStream(outputFile);
|
||||
return new BufferedWriter(new OutputStreamWriter(outputStream));
|
||||
return new BufferedWriter(new OutputStreamWriter(outputStream, outputEncoding));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
ANTLR Project Contributors Certification of Origin and Rights
|
||||
ANTLR Project Contributors Certification of Origin and Rights
|
||||
|
||||
All contributors to ANTLR v4 must formally agree to abide by this
|
||||
certificate of origin by signing on the bottom with their github
|
||||
|
@ -103,6 +103,7 @@ YYYY/MM/DD, github id, Full name, email
|
|||
2016/08/08, wjkohnen, Wolfgang Johannes Kohnen, wjkohnen-go-antlr@ko-sys.com
|
||||
2016/08/11, BurtHarris, Ralph "Burt" Harris, Burt_Harris_antlr4@azxs.33mail.com
|
||||
2016/08/19, andjo403, Andreas Jonson, andjo403@hotmail.com
|
||||
2016/09/27, harriman, Kurt Harriman, harriman@acm.org
|
||||
2016/10/13, cgudrian, Christian Gudrian, christian.gudrian@gmx.de
|
||||
2016/10/13, nielsbasjes, Niels Basjes, niels@basjes.nl
|
||||
2016/10/21, FloorGoddijn, Floor Goddijn, floor.goddijn[at]aimms.com
|
||||
|
@ -110,4 +111,6 @@ YYYY/MM/DD, github id, Full name, email
|
|||
2016/11/05, runner-mei, meifakun, runner.mei@gmail.com
|
||||
2016/11/15, hanjoes, Hanzhou Shi, hanzhou87@gmail.com
|
||||
2016/11/16, sridharxp, Sridharan S, aurosridhar@gmail.com
|
||||
2016/11/06, NoodleOfDeath, Thom Morgan, github@bytemeapp.com
|
||||
2016/11/06, NoodleOfDeath, Thom Morgan, github@bytemeapp.com
|
||||
2016/11/01, sebkur, Sebastian Kürten, sebastian@topobyte.de
|
||||
2016/04/13, renatahodovan, Renata Hodovan, reni@inf.u-szeged.hu
|
||||
|
|
|
@ -70,7 +70,8 @@ $ brew install node
|
|||
To run the tests and **install into local repository** `~/.m2/repository/org/antlr`, do this:
|
||||
|
||||
```bash
|
||||
$ mvn install
|
||||
$ mvn install -DskipTests=true # make sure all artifacts are visible on this machine
|
||||
$ mvn install # now "do it with feeling"
|
||||
...
|
||||
-------------------------------------------------------
|
||||
T E S T S
|
||||
|
@ -129,9 +130,12 @@ Note that ANTLR is written in itself, which is why maven downloads antlr4-4.5.ja
|
|||
|
||||
## Running test subsets
|
||||
|
||||
*From the `runtime-testsuite` dir*
|
||||
|
||||
### Run one test group across targets
|
||||
|
||||
```bash
|
||||
$ cd runtime-testsuite
|
||||
$ mvn -Dtest=TestParserExec test
|
||||
-------------------------------------------------------
|
||||
T E S T S
|
||||
|
|
|
@ -29,7 +29,7 @@ It's also a good idea to put this in your `.bash_profile` or whatever your start
|
|||
3. Create aliases for the ANTLR Tool, and `TestRig`.
|
||||
```
|
||||
$ alias antlr4='java -Xmx500M -cp "/usr/local/lib/antlr-4.5-complete.jar:$CLASSPATH" org.antlr.v4.Tool'
|
||||
$ alias grun='java org.antlr.v4.runtime.misc.TestRig'
|
||||
$ alias grun='java org.antlr.v4.gui.TestRig'
|
||||
```
|
||||
|
||||
### WINDOWS
|
||||
|
@ -51,12 +51,12 @@ SET CLASSPATH=.;C:\Javalib\antlr-4.5-complete.jar;%CLASSPATH%
|
|||
java org.antlr.v4.Tool %*
|
||||
```
|
||||
```
|
||||
java org.antlr.v4.runtime.misc.TestRig %*
|
||||
java org.antlr.v4.gui.TestRig %*
|
||||
```
|
||||
* Or, use doskey commands:
|
||||
```
|
||||
doskey antlr4=java org.antlr.v4.Tool $*
|
||||
doskey grun =java org.antlr.v4.runtime.misc.TestRig $*
|
||||
doskey grun =java org.antlr.v4.gui.TestRig $*
|
||||
```
|
||||
|
||||
### Testing the installation
|
||||
|
|
|
@ -13,7 +13,7 @@ Each target language for ANTLR has a runtime package for running parser generate
|
|||
Get the runtime and install it on your GOPATH:
|
||||
|
||||
```bash
|
||||
go get github.com/antlr/antlr4
|
||||
go get github.com/antlr/antlr4/runtime/Go/antlr
|
||||
```
|
||||
|
||||
#### 3. Set the release tag (optional)
|
||||
|
|
|
@ -51,6 +51,8 @@ This documentation is a reference and summarizes grammar syntax and the key sema
|
|||
|
||||
* [Runtime Libraries and Code Generation Targets](targets.md)
|
||||
|
||||
* [Parsing binary streams](parsing-binary-files.md)
|
||||
|
||||
* [Parser and lexer interpreters](interpreters.md)
|
||||
|
||||
* [Resources](resources.md)
|
||||
|
|
|
@ -70,7 +70,7 @@ Right now, there is no npm package available, so you need to register a link ins
|
|||
$ npm link antlr4
|
||||
```
|
||||
|
||||
This will install antlr4 using the package.son descriptor that comes with the script.
|
||||
This will install antlr4 using the package.json descriptor that comes with the script.
|
||||
|
||||
## How do I run the generated lexer and/or parser?
|
||||
|
||||
|
|
|
@ -0,0 +1,182 @@
|
|||
# Parsing Binary Files
|
||||
|
||||
Parsing binary files is no different than parsing character-based files except that the "characters" are actually bytes not 16-bit unsigned short unicode characters. From a lexer/parser point of view, there is no difference except that the characters are likely not printable. If you want to match a special 2-byte marker 0xCA then 0xFE, the following rule is sufficient.
|
||||
|
||||
```
|
||||
MARKER : '\u00CA' '\u00FE' ;
|
||||
```
|
||||
|
||||
The parser of course would refer to that token like any other token.
|
||||
|
||||
Here is a sample grammar for use with the code snippets below.
|
||||
|
||||
```
|
||||
grammar IP;
|
||||
|
||||
file : ip+ (MARKER ip)* ;
|
||||
|
||||
ip : BYTE '.' BYTE '.' BYTE '.' BYTE ;
|
||||
|
||||
MARKER : '\u00CA' '\u00FE' ;
|
||||
BYTE : '\u0000'..'\u00FF' ;
|
||||
```
|
||||
|
||||
Notice that `BYTE` is using a range operator to match anything between 0 and 255. We can't use character classes like `[a-z]` naturally because we are not parsing character codes. All character specifiers must have `00` as their upper byte. E.g., `\uCAFE` is not a valid character because that 16-bit value will never be created from the input stream (bytes only remember).
|
||||
|
||||
If there are actual characters like `$` or `!` encoded as bytes in the binary file, you can refer to them via literals like `'$'` as you normally would. See `'.'` in the grammar.
|
||||
|
||||
## Binary streams
|
||||
|
||||
There are many targets now so I'm not sure exactly how they process text files but most targets will pull in text per the machine's locale. Much of the time this will mean UTF-8 encoding of text converted to 16-bit Unicode. ANTLR's lexers operate on `int` so we can handle any kind of character you want to send in that fits in `int`.
|
||||
|
||||
Once the lexer gets an input stream, it doesn't care whether the characters come from / represent bytes or actual Unicode characters.
|
||||
|
||||
Let's get a binary file called `ips` and put it in our resources directory:
|
||||
|
||||
```java
|
||||
public class WriteBinaryFile {
|
||||
public static final byte[] bytes = {
|
||||
(byte)172, 0, 0, 1, (byte)0xCA, (byte)0xFE,
|
||||
(byte)10, 10, 10, 1, (byte)0xCA, (byte)0xFE,
|
||||
(byte)10, 10, 10, 99
|
||||
};
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
Files.write(new File("resources/ips").toPath(), bytes);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Now we need to create a stream of bytes satisfactory to ANTLR, which is as simple as:
|
||||
|
||||
```java
|
||||
ANTLRFileStream bytesAsChar = new ANTLRFileStream("resources/ips", "ISO-8859-1");
|
||||
```
|
||||
|
||||
The `ISO-8859-1` encoding is just the 8-bit char encoding for LATIN-1, which effectively tells the stream to treat each byte as a character. That's what we want. Then we have the usual test rig:
|
||||
|
||||
|
||||
```java
|
||||
ANTLRFileStream bytesAsChar = new ANTLRFileStream("resources/ips", "ISO-8859-1");
|
||||
IPLexer lexer = new IPLexer(bytesAsChar);
|
||||
CommonTokenStream tokens = new CommonTokenStream(lexer);
|
||||
IPParser parser = new IPParser(tokens);
|
||||
ParseTree tree = parser.file();
|
||||
IPBaseListener listener = new MyIPListener();
|
||||
ParseTreeWalker.DEFAULT.walk(listener, tree);
|
||||
```
|
||||
|
||||
Here is the listener:
|
||||
|
||||
```java
|
||||
class MyIPListener extends IPBaseListener {
|
||||
@Override
|
||||
public void exitIp(IPParser.IpContext ctx) {
|
||||
List<TerminalNode> octets = ctx.BYTE();
|
||||
short[] ip = new short[4];
|
||||
for (int i = 0; i<octets.size(); i++) {
|
||||
String oneCharStringHoldingOctet = octets.get(i).getText();
|
||||
ip[i] = (short)oneCharStringHoldingOctet.charAt(0);
|
||||
}
|
||||
System.out.println(Arrays.toString(ip));
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
We can't just print out the text because we are not reading in text. We need to emit each byte as a decimal value. The output should be the following when you run the test code:
|
||||
|
||||
```
|
||||
[172, 0, 0, 1]
|
||||
[10, 10, 10, 1]
|
||||
[10, 10, 10, 99]
|
||||
```
|
||||
|
||||
## Custom stream
|
||||
|
||||
If you want to play around with the stream, you can. Here's an example that alters how "text" is computed from the byte stream (which changes how tokens print out their text as well):
|
||||
|
||||
```java
|
||||
/** make a stream treating file as full of single unsigned byte characters */
|
||||
class BinaryANTLRFileStream extends ANTLRFileStream {
|
||||
public BinaryANTLRFileStream(String fileName) throws IOException {
|
||||
super(fileName, "ISO-8859-1");
|
||||
}
|
||||
|
||||
/** Print the decimal value rather than treat as char */
|
||||
@Override
|
||||
public String getText(Interval interval) {
|
||||
StringBuilder buf = new StringBuilder();
|
||||
int start = interval.a;
|
||||
int stop = interval.b;
|
||||
if(stop >= this.n) {
|
||||
stop = this.n - 1;
|
||||
}
|
||||
|
||||
for (int i = start; i<=stop; i++) {
|
||||
int v = data[i];
|
||||
buf.append(v);
|
||||
}
|
||||
return buf.toString();
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The new test code starts out like this:
|
||||
|
||||
```java
|
||||
ANTLRFileStream bytesAsChar = new BinaryANTLRFileStream("resources/ips");
|
||||
IPLexer lexer = new IPLexer(bytesAsChar);
|
||||
...
|
||||
```
|
||||
|
||||
This simplifies our listener then:
|
||||
|
||||
```java
|
||||
class MyIPListenerCustomStream extends IPBaseListener {
|
||||
@Override
|
||||
public void exitIp(IPParser.IpContext ctx) {
|
||||
List<TerminalNode> octets = ctx.BYTE();
|
||||
System.out.println(octets);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
You should get this enhanced output:
|
||||
|
||||
```
|
||||
[172(0xAC), 0(0x0), 0(0x0), 1(0x1)]
|
||||
[10(0xA), 10(0xA), 10(0xA), 1(0x1)]
|
||||
[10(0xA), 10(0xA), 10(0xA), 99(0x63)]
|
||||
```
|
||||
|
||||
## Error handling in binary files
|
||||
|
||||
Error handling proceeds exactly like any other parser. For example, let's alter the binary file so that it is missing one of the 0's in the first IP address:
|
||||
|
||||
```java
|
||||
public static final byte[] bytes = {
|
||||
(byte)172, '.', 0, '.', '.', 1, (byte)0xCA, (byte)0xFE, // OOOPS
|
||||
(byte)10, '.', 10, '.', 10, '.', 1, (byte)0xCA, (byte)0xFE,
|
||||
(byte)10, '.', 10, '.', 10, '.', 99
|
||||
};
|
||||
```
|
||||
|
||||
Running the original test case gives us:
|
||||
|
||||
```
|
||||
line 1:4 extraneous input '.' expecting BYTE
|
||||
line 1:6 mismatched input 'Êþ' expecting '.'
|
||||
[172, 0, 1, 0]
|
||||
[10, 10, 10, 1]
|
||||
[10, 10, 10, 99]
|
||||
```
|
||||
|
||||
That `'Êþ'` is just to the character representation of two bytes 0xCA and 0xFE. Using the enhanced binary stream, we see:
|
||||
|
||||
```
|
||||
line 1:4 extraneous input '46(0x2E)' expecting BYTE
|
||||
line 1:6 mismatched input '202(0xCA)254(0xFE)' expecting '.'
|
||||
[172(0xAC), 0(0x0), 1(0x1)]
|
||||
[10(0xA), 10(0xA), 10(0xA), 1(0x1)]
|
||||
[10(0xA), 10(0xA), 10(0xA), 99(0x63)]
|
||||
```
|
|
@ -32,7 +32,7 @@ We will not document here how to refer to the runtime from your Python project,
|
|||
|
||||
## How do I run the generated lexer and/or parser?
|
||||
|
||||
Let's suppose that your grammar is named, as above, "MyGrammar". Let's suppose this parser comprises a rule named "StartRule". The tool will have generated for you the following files:
|
||||
Let's suppose that your grammar is named, as above, "MyGrammar". Let's suppose this parser comprises a rule named "startRule". The tool will have generated for you the following files:
|
||||
|
||||
* MyGrammarLexer.py
|
||||
* MyGrammarParser.py
|
||||
|
@ -44,6 +44,7 @@ Let's suppose that your grammar is named, as above, "MyGrammar". Let's suppose t
|
|||
Now a fully functioning script might look like the following:
|
||||
|
||||
```python
|
||||
import sys
|
||||
from antlr4 import *
|
||||
from MyGrammarLexer import MyGrammarLexer
|
||||
from MyGrammarParser import MyGrammarParser
|
||||
|
@ -53,7 +54,7 @@ def main(argv):
|
|||
lexer = MyGrammarLexer(input)
|
||||
stream = CommonTokenStream(lexer)
|
||||
parser = MyGrammarParser(stream)
|
||||
tree = parser.StartRule()
|
||||
tree = parser.startRule()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(sys.argv)
|
||||
|
@ -95,7 +96,7 @@ In order to execute this listener, you would simply add the following lines to t
|
|||
|
||||
```
|
||||
...
|
||||
tree = parser.StartRule() - only repeated here for reference
|
||||
tree = parser.startRule() - only repeated here for reference
|
||||
printer = KeyPrinter()
|
||||
walker = ParseTreeWalker()
|
||||
walker.walk(printer, tree)
|
||||
|
|
|
@ -494,7 +494,7 @@ public class BaseCSharpTest implements RuntimeTestSupport, SpecialRuntimeTestAss
|
|||
}
|
||||
|
||||
private String locateTool(String tool) {
|
||||
String[] roots = { "/usr/bin/", "/usr/local/bin/" };
|
||||
String[] roots = { "/opt/local/bin/", "/usr/bin/", "/usr/local/bin/" };
|
||||
for(String root : roots) {
|
||||
if(new File(root + tool).exists())
|
||||
return root + tool;
|
||||
|
|
|
@ -558,6 +558,12 @@ public class LeftRecursionDescriptors {
|
|||
public String output = "12\n";
|
||||
}
|
||||
|
||||
/** Test for https://github.com/antlr/antlr4/issues/1295 in addition to #433. */
|
||||
public static class MultipleAlternativesWithCommonLabel_5 extends MultipleAlternativesWithCommonLabel {
|
||||
public String input = "(99)+3";
|
||||
public String output = "102\n";
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a regression test for antlr/antlr4#509 "Incorrect rule chosen in
|
||||
* unambiguous grammar".
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
package org.antlr.v4.test.runtime.java;
|
||||
|
||||
import org.antlr.v4.Tool;
|
||||
import org.antlr.v4.analysis.AnalysisPipeline;
|
||||
import org.antlr.v4.automata.ATNFactory;
|
||||
import org.antlr.v4.automata.ATNPrinter;
|
||||
import org.antlr.v4.automata.LexerATNFactory;
|
||||
|
@ -913,6 +914,9 @@ public class BaseJavaTest implements RuntimeTestSupport {
|
|||
if ( g.isLexer() ) factory = new LexerATNFactory((LexerGrammar)g);
|
||||
g.atn = factory.createATN();
|
||||
|
||||
AnalysisPipeline anal = new AnalysisPipeline(g);
|
||||
anal.process();
|
||||
|
||||
CodeGenerator gen = new CodeGenerator(g);
|
||||
ST outputFileST = gen.generateParser(false);
|
||||
String output = outputFileST.render();
|
||||
|
|
|
@ -450,21 +450,35 @@ public class BaseNodeTest implements RuntimeTestSupport {
|
|||
return null;
|
||||
}
|
||||
|
||||
private boolean canExecute(String tool) {
|
||||
try {
|
||||
ProcessBuilder builder = new ProcessBuilder(tool, "--version");
|
||||
builder.redirectErrorStream(true);
|
||||
Process process = builder.start();
|
||||
StreamVacuum vacuum = new StreamVacuum(process.getInputStream());
|
||||
vacuum.start();
|
||||
process.waitFor();
|
||||
vacuum.join();
|
||||
return process.exitValue() == 0;
|
||||
}
|
||||
catch (Exception e) {
|
||||
;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private String locateNodeJS() {
|
||||
// typically /usr/local/bin/node
|
||||
String propName = "antlr-javascript-nodejs";
|
||||
String prop = System.getProperty(propName);
|
||||
if (prop == null || prop.length() == 0) {
|
||||
prop = locateTool("nodejs"); // seems to be nodejs on ubuntu
|
||||
|
||||
if ( prop!=null && prop.length()!=0 ) {
|
||||
return prop;
|
||||
}
|
||||
if ( prop==null ) {
|
||||
prop = locateTool("node"); // seems to be node on mac
|
||||
if (canExecute("nodejs")) {
|
||||
return "nodejs"; // nodejs on Debian without node-legacy package
|
||||
}
|
||||
File file = new File(prop);
|
||||
if (!file.exists()) {
|
||||
throw new RuntimeException("Missing system property:" + propName);
|
||||
}
|
||||
return prop;
|
||||
return "node"; // everywhere else
|
||||
}
|
||||
|
||||
private String locateRuntime() {
|
||||
|
|
|
@ -592,10 +592,11 @@ public abstract class BasePythonTest implements RuntimeTestSupport {
|
|||
}
|
||||
|
||||
private String locateTool(String tool) {
|
||||
String[] roots = { "/usr/bin/", "/usr/local/bin/" };
|
||||
String[] roots = { "/opt/local/bin", "/usr/bin/", "/usr/local/bin/" };
|
||||
for(String root : roots) {
|
||||
if(new File(root + tool).exists())
|
||||
return root + tool;
|
||||
if(new File(root + tool).exists()) {
|
||||
return root+tool;
|
||||
}
|
||||
}
|
||||
throw new RuntimeException("Could not locate " + tool);
|
||||
}
|
||||
|
|
|
@ -74,7 +74,7 @@ struct Any
|
|||
|
||||
auto derived = dynamic_cast<Derived<T> *>(_ptr);
|
||||
|
||||
return derived;
|
||||
return derived != nullptr;
|
||||
}
|
||||
|
||||
template<class U>
|
||||
|
@ -156,6 +156,11 @@ private:
|
|||
|
||||
};
|
||||
|
||||
template<> inline
|
||||
Any::Any(std::nullptr_t&& ) : _ptr(nullptr) {
|
||||
}
|
||||
|
||||
|
||||
} // namespace antlrcpp
|
||||
|
||||
#ifdef _WIN32
|
||||
|
|
|
@ -50,7 +50,7 @@ import java.util.List;
|
|||
* The root node always has a null pointer and invokingState of -1.
|
||||
*
|
||||
* Upon entry to parsing, the first invoked rule function creates a
|
||||
* context object (asubclass specialized for that rule such as
|
||||
* context object (a subclass specialized for that rule such as
|
||||
* SContext) and makes it the root of a parse tree, recorded by field
|
||||
* Parser._ctx.
|
||||
*
|
||||
|
|
|
@ -545,7 +545,11 @@ public enum PredictionMode {
|
|||
return all;
|
||||
}
|
||||
|
||||
/** Get union of all alts from configs. @since 4.5.1 */
|
||||
/**
|
||||
* Get union of all alts from configs.
|
||||
*
|
||||
* @since 4.5.1
|
||||
*/
|
||||
public static BitSet getAlts(ATNConfigSet configs) {
|
||||
BitSet alts = new BitSet();
|
||||
for (ATNConfig config : configs) {
|
||||
|
|
|
@ -50,7 +50,7 @@ public class Array2DHashSet<T> implements Set<T> {
|
|||
/** How many elements in set */
|
||||
protected int n = 0;
|
||||
|
||||
protected int threshold = (int)(INITAL_CAPACITY * LOAD_FACTOR); // when to expand
|
||||
protected int threshold = (int)Math.floor(INITAL_CAPACITY * LOAD_FACTOR); // when to expand
|
||||
|
||||
protected int currentPrime = 1; // jump by 4 primes each expand or whatever
|
||||
protected int initialBucketCapacity = INITAL_BUCKET_CAPACITY;
|
||||
|
@ -407,6 +407,7 @@ public class Array2DHashSet<T> implements Set<T> {
|
|||
public void clear() {
|
||||
buckets = createBuckets(INITAL_CAPACITY);
|
||||
n = 0;
|
||||
threshold = (int)Math.floor(INITAL_CAPACITY * LOAD_FACTOR);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -81,7 +81,7 @@ class ListTokenSource(TokenSource):
|
|||
return self.eofToken
|
||||
t = self.tokens[self.pos]
|
||||
if self.pos == len(self.tokens) - 1 and t.type == Token.EOF:
|
||||
eofToken = t
|
||||
self.eofToken = t
|
||||
self.pos += 1
|
||||
return t
|
||||
|
||||
|
|
|
@ -70,7 +70,7 @@ class PredictionContext(object):
|
|||
# }
|
||||
# </pre>
|
||||
#/
|
||||
|
||||
|
||||
def __init__(self, cachedHashCode):
|
||||
self.cachedHashCode = cachedHashCode
|
||||
|
||||
|
@ -85,7 +85,7 @@ class PredictionContext(object):
|
|||
return self.getReturnState(len(self) - 1) == self.EMPTY_RETURN_STATE
|
||||
|
||||
def getReturnState(self, index):
|
||||
raise "illegal!"
|
||||
raise IllegalStateException("illegal!")
|
||||
|
||||
def __hash__(self):
|
||||
return self.cachedHashCode
|
||||
|
|
|
@ -443,7 +443,7 @@ class ParserATNSimulator(ATNSimulator):
|
|||
|
||||
if D.requiresFullContext and self.predictionMode != PredictionMode.SLL:
|
||||
# IF PREDS, MIGHT RESOLVE TO SINGLE ALT => SLL (or syntax error)
|
||||
conflictingAlts = None
|
||||
conflictingAlts = D.configs.conflictingAlts
|
||||
if D.predicates is not None:
|
||||
if ParserATNSimulator.debug:
|
||||
print("DFA state has preds in DFA sim LL failover")
|
||||
|
|
|
@ -213,7 +213,7 @@ class PredictionMode(object):
|
|||
# dup configs, tossing out semantic predicates
|
||||
dup = ATNConfigSet()
|
||||
for c in configs:
|
||||
c = ATNConfig(c,SemanticContext.NONE)
|
||||
c = ATNConfig(config=c, semantic=SemanticContext.NONE)
|
||||
dup.add(c)
|
||||
configs = dup
|
||||
# now we have combined contexts for configs with dissimilar preds
|
||||
|
|
|
@ -81,7 +81,7 @@ class ListTokenSource(TokenSource):
|
|||
return self.eofToken
|
||||
t = self.tokens[self.pos]
|
||||
if self.pos == len(self.tokens) - 1 and t.type == Token.EOF:
|
||||
eofToken = t
|
||||
self.eofToken = t
|
||||
self.pos += 1
|
||||
return t
|
||||
|
||||
|
|
|
@ -371,7 +371,7 @@ class Parser (Recognizer):
|
|||
# Always called by generated parsers upon entry to a rule. Access field
|
||||
# {@link #_ctx} get the current context.
|
||||
#
|
||||
def enterRule(self, localctx:ParserRuleContext , state:int , ruleIndexint ):
|
||||
def enterRule(self, localctx:ParserRuleContext , state:int , ruleIndex:int):
|
||||
self.state = state
|
||||
self._ctx = localctx
|
||||
self._ctx.start = self._input.LT(1)
|
||||
|
|
|
@ -70,7 +70,7 @@ class PredictionContext(object):
|
|||
# }
|
||||
# </pre>
|
||||
#/
|
||||
|
||||
|
||||
def __init__(self, cachedHashCode:int):
|
||||
self.cachedHashCode = cachedHashCode
|
||||
|
||||
|
@ -85,7 +85,7 @@ class PredictionContext(object):
|
|||
return self.getReturnState(len(self) - 1) == self.EMPTY_RETURN_STATE
|
||||
|
||||
def getReturnState(self, index:int):
|
||||
raise "illegal!"
|
||||
raise IllegalStateException("illegal!")
|
||||
|
||||
def __hash__(self):
|
||||
return self.cachedHashCode
|
||||
|
|
|
@ -448,7 +448,7 @@ class ParserATNSimulator(ATNSimulator):
|
|||
|
||||
if D.requiresFullContext and self.predictionMode != PredictionMode.SLL:
|
||||
# IF PREDS, MIGHT RESOLVE TO SINGLE ALT => SLL (or syntax error)
|
||||
conflictingAlts = None
|
||||
conflictingAlts = D.configs.conflictingAlts
|
||||
if D.predicates is not None:
|
||||
if ParserATNSimulator.debug:
|
||||
print("DFA state has preds in DFA sim LL failover")
|
||||
|
|
|
@ -216,7 +216,7 @@ class PredictionMode(Enum):
|
|||
# dup configs, tossing out semantic predicates
|
||||
dup = ATNConfigSet()
|
||||
for c in configs:
|
||||
c = ATNConfig(c,SemanticContext.NONE)
|
||||
c = ATNConfig(config=c, semantic=SemanticContext.NONE)
|
||||
dup.add(c)
|
||||
configs = dup
|
||||
# now we have combined contexts for configs with dissimilar preds
|
||||
|
|
|
@ -36,9 +36,9 @@
|
|||
# <p>I have scoped the {@link AND}, {@link OR}, and {@link Predicate} subclasses of
|
||||
# {@link SemanticContext} within the scope of this outer class.</p>
|
||||
#
|
||||
from io import StringIO
|
||||
from antlr4.Recognizer import Recognizer
|
||||
from antlr4.RuleContext import RuleContext
|
||||
from io import StringIO
|
||||
|
||||
|
||||
class SemanticContext(object):
|
||||
|
@ -115,7 +115,7 @@ def orContext(a:SemanticContext, b:SemanticContext):
|
|||
else:
|
||||
return result
|
||||
|
||||
def filterPrecedencePredicates(collection:list):
|
||||
def filterPrecedencePredicates(collection:set):
|
||||
return [context for context in collection if isinstance(context, PrecedencePredicate)]
|
||||
|
||||
|
||||
|
|
|
@ -176,6 +176,45 @@ public class TestActionTranslation extends BaseJavaTest {
|
|||
testActions(attributeTemplate, "inline", action, expected);
|
||||
}
|
||||
|
||||
/**
|
||||
* Regression test for issue #1295
|
||||
* $e.v yields incorrect value 0 in "e returns [int v] : '1' {$v = 1;} | '(' e ')' {$v = $e.v;} ;"
|
||||
* https://github.com/antlr/antlr4/issues/1295
|
||||
*/
|
||||
@Test public void testRuleRefsRecursive() throws Exception {
|
||||
String recursiveTemplate =
|
||||
"recursiveTemplate(inline) ::= <<\n" +
|
||||
"parser grammar A;\n"+
|
||||
"e returns [int v]\n" +
|
||||
" : INT {$v = $INT.int;}\n" +
|
||||
" | '(' e ')' {\n" +
|
||||
" #inline#<inline>#end-inline#\n" +
|
||||
" }\n" +
|
||||
" ;\n" +
|
||||
">>";
|
||||
String leftRecursiveTemplate =
|
||||
"recursiveTemplate(inline) ::= <<\n" +
|
||||
"parser grammar A;\n"+
|
||||
"e returns [int v]\n" +
|
||||
" : a=e op=('*'|'/') b=e {$v = eval($a.v, $op.type, $b.v);}\n" +
|
||||
" | INT {$v = $INT.int;}\n" +
|
||||
" | '(' e ')' {\n" +
|
||||
" #inline#<inline>#end-inline#\n" +
|
||||
" }\n" +
|
||||
" ;\n" +
|
||||
">>";
|
||||
// ref to value returned from recursive call to rule
|
||||
String action = "$v = $e.v;";
|
||||
String expected = "((EContext)_localctx).v = ((EContext)_localctx).e.v;";
|
||||
testActions(recursiveTemplate, "inline", action, expected);
|
||||
testActions(leftRecursiveTemplate, "inline", action, expected);
|
||||
// ref to predefined attribute obtained from recursive call to rule
|
||||
action = "$v = $e.text.length();";
|
||||
expected = "((EContext)_localctx).v = (((EContext)_localctx).e!=null?_input.getText(((EContext)_localctx).e.start,((EContext)_localctx).e.stop):null).length();";
|
||||
testActions(recursiveTemplate, "inline", action, expected);
|
||||
testActions(leftRecursiveTemplate, "inline", action, expected);
|
||||
}
|
||||
|
||||
@Test public void testRefToTextAttributeForCurrentRule() throws Exception {
|
||||
String action = "$ctx.text; $text";
|
||||
|
||||
|
|
|
@ -65,7 +65,7 @@ public class TestSymbolIssues extends BaseJavaTest {
|
|||
"warning(" + ErrorType.IMPLICIT_TOKEN_DEFINITION.code + "): A.g4:10:20: implicit definition of token Y in parser\n" +
|
||||
"warning(" + ErrorType.IMPLICIT_TOKEN_DEFINITION.code + "): A.g4:11:4: implicit definition of token FJKD in parser\n" +
|
||||
"error(" + ErrorType.RULE_HAS_NO_ARGS.code + "): A.g4:9:37: rule b has no defined parameters\n" +
|
||||
"error(" + ErrorType.MISSING_RULE_ARGS.code + "): A.g4:10:31: missing arguments(s) on rule reference: a\n"
|
||||
"error(" + ErrorType.MISSING_RULE_ARGS.code + "): A.g4:10:31: missing argument(s) on rule reference: a\n"
|
||||
};
|
||||
|
||||
static String[] B = {
|
||||
|
|
|
@ -521,6 +521,19 @@ public class Tool {
|
|||
// Make grammars depend on any tokenVocab options
|
||||
if ( tokenVocabNode!=null ) {
|
||||
String vocabName = tokenVocabNode.getText();
|
||||
// Strip quote characters if any
|
||||
int len = vocabName.length();
|
||||
int firstChar = vocabName.charAt(0);
|
||||
int lastChar = vocabName.charAt(len - 1);
|
||||
if (len >= 2 && firstChar == '\'' && lastChar == '\'') {
|
||||
vocabName = vocabName.substring(1, len-1);
|
||||
}
|
||||
// If the name contains a path delimited by forward slashes,
|
||||
// use only the part after the last slash as the name
|
||||
int lastSlash = vocabName.lastIndexOf('/');
|
||||
if (lastSlash >= 0) {
|
||||
vocabName = vocabName.substring(lastSlash + 1);
|
||||
}
|
||||
g.addEdge(grammarName, vocabName);
|
||||
}
|
||||
// add cycle to graph so we always process a grammar if no error
|
||||
|
@ -713,7 +726,7 @@ public class Tool {
|
|||
*
|
||||
* The output dir -o spec takes precedence if it's absolute.
|
||||
* E.g., if the grammar file dir is absolute the output dir is given
|
||||
* precendence. "-o /tmp /usr/lib/t.g4" results in "/tmp/T.java" as
|
||||
* precedence. "-o /tmp /usr/lib/t.g4" results in "/tmp/T.java" as
|
||||
* output (assuming t.g4 holds T.java).
|
||||
*
|
||||
* If no -o is specified, then just write to the directory where the
|
||||
|
|
|
@ -214,23 +214,10 @@ public class ActionTranslator implements ActionSplitterListener {
|
|||
switch ( a.dict.type ) {
|
||||
case ARG: chunks.add(new ArgRef(nodeContext,y.getText())); break; // has to be current rule
|
||||
case RET:
|
||||
if ( factory.getCurrentRuleFunction()!=null &&
|
||||
factory.getCurrentRuleFunction().name.equals(x.getText()) )
|
||||
{
|
||||
chunks.add(new RetValueRef(rf.ruleCtx, y.getText())); break;
|
||||
}
|
||||
else {
|
||||
chunks.add(new QRetValueRef(nodeContext, getRuleLabel(x.getText()), y.getText())); break;
|
||||
}
|
||||
chunks.add(new QRetValueRef(nodeContext, getRuleLabel(x.getText()), y.getText()));
|
||||
break;
|
||||
case PREDEFINED_RULE:
|
||||
if ( factory.getCurrentRuleFunction()!=null &&
|
||||
factory.getCurrentRuleFunction().name.equals(x.getText()) )
|
||||
{
|
||||
chunks.add(getRulePropertyRef(y));
|
||||
}
|
||||
else {
|
||||
chunks.add(getRulePropertyRef(x, y));
|
||||
}
|
||||
chunks.add(getRulePropertyRef(x, y));
|
||||
break;
|
||||
case TOKEN:
|
||||
chunks.add(getTokenPropertyRef(x, y));
|
||||
|
|
|
@ -65,7 +65,7 @@ public class ElementFrequenciesVisitor extends GrammarTreeVisitor {
|
|||
* @param b The second set.
|
||||
* @param clip The maximum value to allow for any output.
|
||||
* @return The sum of the two sets, with the individual elements clipped
|
||||
* to the maximum value gived by {@code clip}.
|
||||
* to the maximum value given by {@code clip}.
|
||||
*/
|
||||
protected static FrequencySet<String> combineAndClip(FrequencySet<String> a, FrequencySet<String> b, int clip) {
|
||||
FrequencySet<String> result = new FrequencySet<String>();
|
||||
|
|
|
@ -120,7 +120,7 @@ public class Utils {
|
|||
return b;
|
||||
}
|
||||
|
||||
/** Find exact object type or sublass of cl in list */
|
||||
/** Find exact object type or subclass of cl in list */
|
||||
public static <T> T find(List<?> ops, Class<T> cl) {
|
||||
for (Object o : ops) {
|
||||
if ( cl.isInstance(o) ) return cl.cast(o);
|
||||
|
|
|
@ -157,7 +157,7 @@ public class ErrorManager {
|
|||
}
|
||||
|
||||
/**
|
||||
* Raise a predefined message with some number of paramters for the StringTemplate but for which there
|
||||
* Raise a predefined message with some number of parameters for the StringTemplate but for which there
|
||||
* is no location information possible.
|
||||
* @param errorType The Message Descriptor
|
||||
* @param args The arguments to pass to the StringTemplate
|
||||
|
|
|
@ -61,7 +61,7 @@ public enum ErrorSeverity {
|
|||
/**
|
||||
* Standard constructor to build an instance of the Enum entries
|
||||
*
|
||||
* @param text The human readable string representing the serverity level
|
||||
* @param text The human readable string representing the severity level
|
||||
*/
|
||||
private ErrorSeverity(String text) { this.text = text; }
|
||||
}
|
||||
|
|
|
@ -328,9 +328,9 @@ public enum ErrorType {
|
|||
/**
|
||||
* Compiler Error 79.
|
||||
*
|
||||
* <p>missing arguments(s) on rule reference: <em>rule</em></p>
|
||||
* <p>missing argument(s) on rule reference: <em>rule</em></p>
|
||||
*/
|
||||
MISSING_RULE_ARGS(79, "missing arguments(s) on rule reference: <arg>", ErrorSeverity.ERROR),
|
||||
MISSING_RULE_ARGS(79, "missing argument(s) on rule reference: <arg>", ErrorSeverity.ERROR),
|
||||
/**
|
||||
* Compiler Error 80.
|
||||
*
|
||||
|
|
|
@ -48,6 +48,7 @@ import org.antlr.v4.runtime.atn.DecisionState;
|
|||
import org.antlr.v4.runtime.atn.PredictionMode;
|
||||
import org.antlr.v4.runtime.atn.RuleStartState;
|
||||
import org.antlr.v4.runtime.atn.StarLoopEntryState;
|
||||
import org.antlr.v4.runtime.misc.Interval;
|
||||
import org.antlr.v4.runtime.tree.Trees;
|
||||
|
||||
import java.lang.reflect.Constructor;
|
||||
|
@ -292,22 +293,26 @@ public class GrammarParserInterpreter extends ParserInterpreter {
|
|||
* ambig input.
|
||||
*/
|
||||
public static List<ParserRuleContext> getAllPossibleParseTrees(Grammar g,
|
||||
Parser originalParser,
|
||||
TokenStream tokens,
|
||||
int decision,
|
||||
BitSet alts,
|
||||
int startIndex,
|
||||
int stopIndex,
|
||||
int startRuleIndex)
|
||||
throws RecognitionException
|
||||
{
|
||||
Parser originalParser,
|
||||
TokenStream tokens,
|
||||
int decision,
|
||||
BitSet alts,
|
||||
int startIndex,
|
||||
int stopIndex,
|
||||
int startRuleIndex)
|
||||
throws RecognitionException {
|
||||
List<ParserRuleContext> trees = new ArrayList<ParserRuleContext>();
|
||||
// Create a new parser interpreter to parse the ambiguous subphrase
|
||||
ParserInterpreter parser = deriveTempParserInterpreter(g, originalParser, tokens);
|
||||
|
||||
if ( stopIndex>=(tokens.size()-1) ) { // if we are pointing at EOF token
|
||||
// EOF is not in tree, so must be 1 less than last non-EOF token
|
||||
stopIndex = tokens.size()-2;
|
||||
}
|
||||
|
||||
// get ambig trees
|
||||
int alt = alts.nextSetBit(0);
|
||||
while (alt >= 0) {
|
||||
while ( alt>=0 ) {
|
||||
// re-parse entire input for all ambiguous alternatives
|
||||
// (don't have to do first as it's been parsed, but do again for simplicity
|
||||
// using this temp parser.)
|
||||
|
@ -318,16 +323,15 @@ public class GrammarParserInterpreter extends ParserInterpreter {
|
|||
(GrammarInterpreterRuleContext) Trees.getRootOfSubtreeEnclosingRegion(t, startIndex, stopIndex);
|
||||
// Use higher of overridden decision tree or tree enclosing all tokens
|
||||
if ( Trees.isAncestorOf(parser.getOverrideDecisionRoot(), ambigSubTree) ) {
|
||||
ambigSubTree = (GrammarInterpreterRuleContext)parser.getOverrideDecisionRoot();
|
||||
ambigSubTree = (GrammarInterpreterRuleContext) parser.getOverrideDecisionRoot();
|
||||
}
|
||||
trees.add(ambigSubTree);
|
||||
alt = alts.nextSetBit(alt + 1);
|
||||
alt = alts.nextSetBit(alt+1);
|
||||
}
|
||||
|
||||
return trees;
|
||||
}
|
||||
|
||||
|
||||
/** Return a list of parse trees, one for each alternative in a decision
|
||||
* given the same input.
|
||||
*
|
||||
|
@ -355,25 +359,25 @@ public class GrammarParserInterpreter extends ParserInterpreter {
|
|||
* @since 4.5.1
|
||||
*/
|
||||
public static List<ParserRuleContext> getLookaheadParseTrees(Grammar g,
|
||||
ParserInterpreter originalParser,
|
||||
TokenStream tokens,
|
||||
int startRuleIndex,
|
||||
int decision,
|
||||
int startIndex,
|
||||
int stopIndex)
|
||||
{
|
||||
ParserInterpreter originalParser,
|
||||
TokenStream tokens,
|
||||
int startRuleIndex,
|
||||
int decision,
|
||||
int startIndex,
|
||||
int stopIndex) {
|
||||
List<ParserRuleContext> trees = new ArrayList<ParserRuleContext>();
|
||||
// Create a new parser interpreter to parse the ambiguous subphrase
|
||||
ParserInterpreter parser = deriveTempParserInterpreter(g, originalParser, tokens);
|
||||
BailButConsumeErrorStrategy errorHandler = new BailButConsumeErrorStrategy();
|
||||
parser.setErrorHandler(errorHandler);
|
||||
|
||||
DecisionState decisionState = originalParser.getATN().decisionToState.get(decision);
|
||||
|
||||
for (int alt=1; alt<=decisionState.getTransitions().length; alt++) {
|
||||
for (int alt = 1; alt<=decisionState.getTransitions().length; alt++) {
|
||||
// re-parse entire input for all ambiguous alternatives
|
||||
// (don't have to do first as it's been parsed, but do again for simplicity
|
||||
// using this temp parser.)
|
||||
GrammarParserInterpreter.BailButConsumeErrorStrategy errorHandler =
|
||||
new GrammarParserInterpreter.BailButConsumeErrorStrategy();
|
||||
parser.setErrorHandler(errorHandler);
|
||||
parser.reset();
|
||||
parser.addDecisionOverride(decision, startIndex, alt);
|
||||
ParserRuleContext tt = parser.parse(startRuleIndex);
|
||||
|
@ -381,10 +385,17 @@ public class GrammarParserInterpreter extends ParserInterpreter {
|
|||
if ( errorHandler.firstErrorTokenIndex>=0 ) {
|
||||
stopTreeAt = errorHandler.firstErrorTokenIndex; // cut off rest at first error
|
||||
}
|
||||
Interval overallRange = tt.getSourceInterval();
|
||||
if ( stopTreeAt>overallRange.b ) {
|
||||
// If we try to look beyond range of tree, stopTreeAt must be EOF
|
||||
// for which there is no EOF ref in grammar. That means tree
|
||||
// will not have node for stopTreeAt; limit to overallRange.b
|
||||
stopTreeAt = overallRange.b;
|
||||
}
|
||||
ParserRuleContext subtree =
|
||||
Trees.getRootOfSubtreeEnclosingRegion(tt,
|
||||
startIndex,
|
||||
stopTreeAt);
|
||||
startIndex,
|
||||
stopTreeAt);
|
||||
// Use higher of overridden decision tree or tree enclosing all tokens
|
||||
if ( Trees.isAncestorOf(parser.getOverrideDecisionRoot(), subtree) ) {
|
||||
subtree = parser.getOverrideDecisionRoot();
|
||||
|
|
Loading…
Reference in New Issue