diff --git a/.travis.yml b/.travis.yml index cae5dae3d..85147d97b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -152,6 +152,10 @@ matrix: jdk: openjdk8 env: TARGET=csharp stage: main-test + - os: linux + jdk: openjdk8 + env: TARGET=dart + stage: main-test - os: linux language: php php: diff --git a/.travis/before-install-linux-dart.sh b/.travis/before-install-linux-dart.sh new file mode 100755 index 000000000..d87086ed4 --- /dev/null +++ b/.travis/before-install-linux-dart.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +set -euo pipefail +wget https://storage.googleapis.com/dart-archive/channels/stable/release/2.8.4/linux_packages/dart_2.8.4-1_amd64.deb +sudo dpkg -i ./dart_2.8.4-1_amd64.deb +sudo rm ./dart_2.8.4-1_amd64.deb +sudo apt-get install -f diff --git a/.travis/run-tests-dart.sh b/.travis/run-tests-dart.sh new file mode 100755 index 000000000..8053a90d7 --- /dev/null +++ b/.travis/run-tests-dart.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +set -euo pipefail +mvn -q -Dparallel=classes -DthreadCount=4 -Dtest=dart.* test diff --git a/README.md b/README.md index ec5c21c62..44cbf62a8 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ ANTLR project lead and supreme dictator for life * [Ewan Mellor](https://github.com/ewanmellor), [Hanzhou Shi](https://github.com/hanjoes) (Swift target merging) * [Ben Hamilton](https://github.com/bhamiltoncx) (Full Unicode support in serialized ATN and all languages' runtimes for code points > U+FFFF) * [Marcos Passos](https://github.com/marcospassos) (PHP target) +* [Lingyu Li](https://github.com/lingyv-li) (Dart target) ## Useful information diff --git a/appveyor.yml b/appveyor.yml index 5788acadf..275a26d03 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -8,6 +8,7 @@ install: - git submodule update --init --recursive - cinst -y php --params "/InstallDir:C:\tools\php" - cinst -y composer + - cinst -y dart-sdk --version=2.8.4 build_script: - mvn -DskipTests install --batch-mode - msbuild /target:restore /target:rebuild /property:Configuration=Release /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" /verbosity:detailed runtime/CSharp/runtime/CSharp/Antlr4.dotnet.sln @@ -15,7 +16,7 @@ build_script: after_build: - msbuild /target:pack /property:Configuration=Release /verbosity:detailed runtime/CSharp/runtime/CSharp/Antlr4.dotnet.sln test_script: - - mvn install -Dantlr-php-php="C:\tools\php\php.exe" -Dantlr-python2-python="C:\Python27\python.exe" -Dantlr-python3-python="C:\Python35\python.exe" -Dantlr-javascript-nodejs="C:\Program Files (x86)\nodejs\node.exe" --batch-mode + - mvn install -Dantlr-php-php="C:\tools\php\php.exe" -Dantlr-dart-dart="C:\tools\dart-sdk\bin\dart.exe" -Dantlr-dart-pub="C:\tools\dart-sdk\bin\pub.bat" -Dantlr-dart-dart2native="C:\tools\dart-sdk\bin\dart2native.bat" -Dantlr-python2-python="C:\Python27\python.exe" -Dantlr-python3-python="C:\Python35\python.exe" -Dantlr-javascript-nodejs="C:\Program Files (x86)\nodejs\node.exe" --batch-mode artifacts: - path: 'runtime\**\*.nupkg' name: NuGet \ No newline at end of file diff --git a/contributors.txt b/contributors.txt index e1dc8347b..5033f1033 100644 --- a/contributors.txt +++ b/contributors.txt @@ -1,5 +1,9 @@ ANTLR Project Contributors Certification of Origin and Rights +NOTE: This tool is mature and Terence is mostly occupied elsewhere. We +can't accept any changes that could have widespread effects on thousands +of existing projects. Sorry! + All contributors to ANTLR v4 must formally agree to abide by this certificate of origin by signing on the bottom with their github userid, full name, email address (you can obscure your e-mail, but it @@ -237,10 +241,14 @@ YYYY/MM/DD, github id, Full name, email 2019/11/11, foxeverl, Liu Xinfeng, liuxf1986[at]gmail[dot]com 2019/11/17, felixn, Felix Nieuwenhuizhen, felix@tdlrali.com 2019/11/18, mlilback, Mark Lilback, mark@lilback.com +2020/01/19, lingyv-li, Lingyu Li, lingyv.li@gmail.com 2020/02/02, carocad, Camilo Roca, carocad@unal.edu.co 2020/02/10, julibert, Julián Bermúdez Ortega, julibert.dev@gmail.com 2020/02/21, StochasticTinkr, Daniel Pitts, github@coloraura.com 2020/03/17, XsongyangX, Song Yang, songyang1218@gmail.com 2020/04/07, deniskyashif, Denis Kyashif, denis.kyashif@gmail.com 2020/04/30, TristonianJones, Tristan Swadell, tswadell@google.com +2020/05/25, graknol, Sindre van der Linden, graknol@gmail.com +2020/05/31, d-markey, David Markey, dmarkey@free.fr 2020/06/04, IohannRabeson, Iohann Rabeson, iotaka6@gmail.com +2020/07/01, sha-N, Shan M Mathews, admin@bluestarqatar.com \ No newline at end of file diff --git a/doc/creating-a-language-target.md b/doc/creating-a-language-target.md index ff7db290e..dd06208ea 100644 --- a/doc/creating-a-language-target.md +++ b/doc/creating-a-language-target.md @@ -10,6 +10,8 @@ Creating a new target involves the following key elements: 1. Create *X*.stg in directory tool/resources/org/antlr/v4/tool/templates/codegen/*X*/*X*.stg. This is a [StringTemplate](http://www.stringtemplate.org/) group file (`.stg`) that tells ANTLR how to express all of the parsing elements needed to generate code. You will see templates called `ParserFile`, `Parser`, `Lexer`, `CodeBlockForAlt`, `AltBlock`, etc... Each of these must be described how to build the indicated chunk of code. Your best bet is to find the closest existing target, copy that template file, and tweak to suit. 1. Create a runtime library to support the parsers generated by ANTLR. Under directory runtime/*X*, you are in complete control of the directory structure as dictated by common usage of that target language. For example, Java has: `runtime/Java/lib` and `runtime/Java/src` directories. Under `src`, you will find a directory structure for package `org.antlr.v4.runtime` and below. 1. Create a template file for runtime tests. All you have to do is provide a few templates that indicate how to print values and declare variables. Our runtime test mechanism in dir `runtime-testsuite` will automatically generate code using these templates for each target and check the test results. It needs to know how to define various class fields, compare members and so on. You must create a *X*.test.stg file underneath [runtime-testsuite/resources/org/antlr/v4/test/runtime](https://github.com/antlr/antlr4/tree/master/runtime-testsuite/resources/org/antlr/v4/test/runtime). Again, your best bet is to copy the templates from the closest language to your target and tweak it to suit. +1. Create test files under [/runtime-testsuite/test/org/antlr/v4/test/runtime](https://github.com/antlr/antlr4/tree/master/runtime-testsuite/test/org/antlr/v4/test/runtime). They will load defined test cases in each test descriptor. Also add the `/runtime-testsuite/test/org/antlr/v4/test/runtime/X/BaseXTest.java` which defines how test cases will execute and output. +1. Create/edit shell scripts in [/.travis](https://github.com/antlr/antlr4/blob/master/.travis) and [/appveyor.yml](https://github.com/antlr/antlr4/blob/master/appveyor.yml) to run tests in CI pipelines. ## Getting started diff --git a/doc/dart-target.md b/doc/dart-target.md new file mode 100644 index 000000000..eb4da0f72 --- /dev/null +++ b/doc/dart-target.md @@ -0,0 +1,117 @@ +# ANTLR4 Runtime for Dart + +Notice: Dart target may generate code incompatible with Dart 2.9 sound null safety. Please set the minimum SDK constraint to 2.8.4 or lower if such violation is found. Contributions are welcomed. + +### First steps + +#### 1. Install ANTLR4 + +[The getting started guide](https://github.com/antlr/antlr4/blob/master/doc/getting-started.md) +should get you started. + +#### 2. Install the Dart ANTLR runtime + +Each target language for ANTLR has a runtime package for running parser +generated by ANTLR4. The runtime provides a common set of tools for using your parser. + +Install the runtime with the same version as the main ANTLR tool: + +Add this to your package's pubspec.yaml file: +```yaml +... +dependencies: + antlr4: +... +``` + +#### 3. Generate your parser + +You use the ANTLR4 "tool" to generate a parser. These will reference the ANTLR +runtime, installed above. + +Suppose you're using a UNIX system and have set up an alias for the ANTLR4 tool +as described in [the getting started guide](https://github.com/antlr/antlr4/blob/master/doc/getting-started.md). +To generate your Dart parser, run the following command: + +```shell script +antlr4 -Dlanguage=Dart MyGrammar.g4 +``` + +For a full list of antlr4 tool options, please visit the +[tool documentation page](https://github.com/antlr/antlr4/blob/master/doc/tool-options.md). + +### Complete example + +Suppose you're using the JSON grammar from https://github.com/antlr/grammars-v4/tree/master/json. + +Then, invoke `antlr4 -Dlanguage=Dart JSON.g4`. The result of this is a +collection of `.dart` including: + +* JsonLexer.dart +* JsonParser.dart +* JsonBaseListener.dart +* JsonListener.dart (if you have not activated the -no-listener option) +* JsonVisitor.dart (if you have activated the -visitor option) + +We'll write a small main func to call the generated parser/lexer +(assuming they are separate). This one writes out the encountered +`ParseTreeContext`'s: + +```dart +import 'package:antlr4/antlr4.dart'; +import 'package:my_project/JSONParser.dart'; +import 'package:my_project/JSONLexer.dart'; + +class TreeShapeListener implements ParseTreeListener { + @override + void enterEveryRule(ParserRuleContext ctx) { + print(ctx.text); + } + + @override + void exitEveryRule(ParserRuleContext node) { + } + + @override + void visitErrorNode(ErrorNode node) { + } + + @override + void visitTerminal(TerminalNode node) { + } +} + +void main(List args) async { + JSONLexer.checkVersion(); + JSONParser.checkVersion(); + final input = await InputStream.fromPath(args[0]); + final lexer = JSONLexer(input); + final tokens = CommonTokenStream(lexer); + final parser = JSONParser(tokens); + parser.addErrorListener(DiagnosticErrorListener()); + parser.buildParseTree = true; + final tree = parser.json(); + ParseTreeWalker.DEFAULT.walk(TreeShapeListener(), tree); +} +``` + +Create a `example.json` file: +```json +{"a":1} +``` + +Parse the input file: + +```shell script +dart bin/main.dart example.json +``` + +The expected output is: + +``` +{"a":1} +{"a":1} +{"a":1} +"a":1 +1 +``` \ No newline at end of file diff --git a/doc/releasing-antlr.md b/doc/releasing-antlr.md index cd1d0b9b4..a09fd33eb 100644 --- a/doc/releasing-antlr.md +++ b/doc/releasing-antlr.md @@ -65,6 +65,8 @@ Edit the repository looking for 4.5 or whatever and update it. Bump version in t * runtime/Cpp/demo/generate.cmd * runtime/Go/antlr/recognizer.go * runtime/Swift/Antlr4/org/antlr/v4/runtime/RuntimeMetaData.swift + * runtime/Dart/lib/src/runtime_meta_data.dart + * runtime/Dart/pubspec.yaml * tool/src/org/antlr/v4/codegen/target/GoTarget.java * tool/src/org/antlr/v4/codegen/target/CppTarget.java * tool/src/org/antlr/v4/codegen/target/CSharpTarget.java @@ -442,6 +444,19 @@ git push origin gh-pages popd ``` +### Dart + +Push to pub.dev + +```bash +cd runtime/Dart +pub publish +``` + +It will warn that no change log found for the new version. +If there are changes relevant to dart in this release, edit [CHANGELOG.md](https://github.com/antlr/antlr4/blob/master/runtime/Dart/CHANGELOG.md) to describe the changes. +Otherwise enter `N` to ignore the warning. + ## Update javadoc for runtime and tool First, gen javadoc: diff --git a/doc/targets.md b/doc/targets.md index c2341ec48..ad6e7dba9 100644 --- a/doc/targets.md +++ b/doc/targets.md @@ -10,12 +10,13 @@ This page lists the available and upcoming ANTLR runtimes. Please note that you * [C++](cpp-target.md) * [Swift](swift-target.md) * [PHP](php-target.md) +* [Dart](dart-target.md) ## Target feature parity New features generally appear in the Java target and then migrate to the other targets, but these other targets don't always get updated in the same overall tool release. This section tries to identify features added to Java that have not been added to the other targets. -|Feature|Java|C♯|Python2|Python3|JavaScript|Go|C++|Swift|PHP -|---|---|---|---|---|---|---|---|---|---| -|Ambiguous tree construction|4.5.1|-|-|-|-|-|-|-|-| +|Feature|Java|C♯|Python2|Python3|JavaScript|Go|C++|Swift|PHP|Dart +|---|---|---|---|---|---|---|---|---|---|---| +|Ambiguous tree construction|4.5.1|-|-|-|-|-|-|-|-|-| diff --git a/runtime-testsuite/pom.xml b/runtime-testsuite/pom.xml index 8363414bd..b6368917d 100644 --- a/runtime-testsuite/pom.xml +++ b/runtime-testsuite/pom.xml @@ -103,6 +103,7 @@ **/python2/Test*.java **/python3/Test*.java **/php/Test*.java + **/dart/Test*.java ${antlr.tests.swift} diff --git a/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/Dart.test.stg b/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/Dart.test.stg new file mode 100644 index 000000000..9f7d65cb0 --- /dev/null +++ b/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/Dart.test.stg @@ -0,0 +1,318 @@ +writeln(s) ::= <);>> +write(s) ::= <);>> +writeList(s) ::= <);>> + +False() ::= "false" + +True() ::= "true" + +Not(v) ::= "!" + +Assert(s) ::= <);>> + +Cast(t,v) ::= "( as )" + +Append(a,b) ::= ".toString() + .toString()" + +AppendStr(a,b) ::= <%%> + +Concat(a,b) ::= "" + +AssertIsList(v) ::= "assert ( is List);" // just use static type system + +AssignLocal(s,v) ::= " = ;" + +InitIntMember(n,v) ::= <%int = ;%> + +InitBooleanMember(n,v) ::= <%bool = ;%> + +InitIntVar(n,v) ::= <%%> + +IntArg(n) ::= "int " + +VarRef(n) ::= "" + +GetMember(n) ::= <%this.%> + +SetMember(n,v) ::= <%this. = ;%> + +AddMember(n,v) ::= <%this. += ;%> + +MemberEquals(n,v) ::= <%this. == %> + +ModMemberEquals(n,m,v) ::= <%this. % == %> + +ModMemberNotEquals(n,m,v) ::= <%this. % != %> + +DumpDFA() ::= "this.dumpDFA();" + +Pass() ::= "" + +StringList() ::= "List\" + +BuildParseTrees() ::= "buildParseTree = true;" + +BailErrorStrategy() ::= <%errorHandler = new BailErrorStrategy();%> + +ToStringTree(s) ::= <%.toStringTree(parser: this)%> + +Column() ::= "this.charPositionInLine" + +Text() ::= "this.text" + +ValEquals(a,b) ::= <%==%> + +TextEquals(a) ::= <%this.text == ""%> + +PlusText(a) ::= <%"" + this.text%> + +InputText() ::= "tokenStream.text" + +LTEquals(i, v) ::= <%tokenStream.LT().text == %> + +LANotEquals(i, v) ::= <%tokenStream.LA()!=%> + +TokenStartColumnEquals(i) ::= <%this.tokenStartCharPositionInLine==%> + +ImportListener(X) ::= "" + +GetExpectedTokenNames() ::= "this.expectedTokens.toString(vocabulary: this.vocabulary)" + +RuleInvocationStack() ::= "ruleInvocationStack" + +LL_EXACT_AMBIG_DETECTION() ::= <> + +ParserToken(parser, token) ::= <%.TOKEN_%> + +Production(p) ::= <%

%> + +Result(r) ::= <%%> + +ParserPropertyMember() ::= << +@members { +bool Property() { + return true; +} +} +>> + +ParserPropertyCall(p, call) ::= "

." + +PositionAdjustingLexerDef() ::= << +class PositionAdjustingLexerATNSimulator extends LexerATNSimulator { + PositionAdjustingLexerATNSimulator(Lexer recog, ATN atn, + List\ decisionToDFA, PredictionContextCache sharedContextCache) + : super(atn, decisionToDFA, sharedContextCache, recog: recog); + + void resetAcceptPosition(CharStream input, int index, int line, + int charPositionInLine) { + input.seek(index); + this.line = line; + this.charPositionInLine = charPositionInLine; + consume(input); + } +} +>> + +PositionAdjustingLexer() ::= << +@override +Token nextToken() { + if (!(super.interpreter is PositionAdjustingLexerATNSimulator)) { + interpreter = new PositionAdjustingLexerATNSimulator( + this, _ATN, _decisionToDFA, _sharedContextCache); + } + + return super.nextToken(); +} + +@override +Token emit() { + switch (type) { + case TOKEN_TOKENS: + handleAcceptPositionForKeyword("tokens"); + break; + + case TOKEN_LABEL: + handleAcceptPositionForIdentifier(); + break; + + default: + break; + } + + return super.emit(); +} + +bool handleAcceptPositionForIdentifier() { + String tokenText = text; + int identifierLength = 0; + while (identifierLength \< tokenText.length && + isIdentifierChar(tokenText[identifierLength])) { + identifierLength++; + } + + if (inputStream.index > tokenStartCharIndex + identifierLength) { + int offset = identifierLength - 1; + interpreter.resetAcceptPosition(inputStream, tokenStartCharIndex + offset, + tokenStartLine, tokenStartCharPositionInLine + offset); + return true; + } + + return false; +} + +bool handleAcceptPositionForKeyword(String keyword) { + if (inputStream.index > tokenStartCharIndex + keyword.length) { + int offset = keyword.length - 1; + interpreter.resetAcceptPosition(inputStream, tokenStartCharIndex + offset, + tokenStartLine, tokenStartCharPositionInLine + offset); + return true; + } + + return false; +} + +@override +PositionAdjustingLexerATNSimulator get interpreter { + return super.interpreter as PositionAdjustingLexerATNSimulator; +} + +static bool isIdentifierChar(String c) { + return isLetterOrDigit(c) || c == '_'; +} + +static const ZERO = 48; +static const LOWER_A = 97; +static const LOWER_Z = 122; +static const UPPER_A = 65; +static const UPPER_Z = 90; + +static bool isLetterOrDigit(String char) => isLetter(char) || isDigit(char); + +// Note: this is intentially ASCII only +static bool isLetter(String char) { + if (char == null) return false; + var cc = char.codeUnitAt(0); + return cc >= LOWER_A && cc \<= LOWER_Z || cc >= UPPER_A && cc \<= UPPER_Z; +} + +static bool isDigit(String char) { + if (char == null) return false; + var cc = char.codeUnitAt(0); + return cc >= ZERO && cc \< ZERO + 10; +} +>> + +BasicListener(X) ::= << +@parser::definitions { +class LeafListener extends TBaseListener { + void visitTerminal(TerminalNode node) { + print(node.symbol.text); + } +} +} +>> + +WalkListener(s) ::= << +ParseTreeWalker walker = new ParseTreeWalker(); +walker.walk(new LeafListener(), ); +>> + +TreeNodeWithAltNumField(X) ::= << +@parser::definitions { +class MyRuleNode extends ParserRuleContext { + int altNum; + + MyRuleNode(ParserRuleContext parent, int invokingStateNumber) + : super(parent, invokingStateNumber); + + @override int get altNumber { + return altNum; + } + + @override void set altNumber(int altNum) { + this.altNum = altNum; + } +} +} +>> + +TokenGetterListener(X) ::= << +@parser::definitions { +class LeafListener extends TBaseListener { + void exitA(AContext ctx) { + if (ctx.childCount==2) + stdout.write("${ctx.INT(0).symbol.text} ${ctx.INT(1).symbol.text} ${ctx.INTs()}"); + else + print(ctx.ID().symbol); + } +} +} +>> + +RuleGetterListener(X) ::= << +@parser::definitions { +class LeafListener extends TBaseListener { + void exitA(AContext ctx) { + if (ctx.childCount==2) { + stdout.write("${ctx.b(0).start.text} ${ctx.b(1).start.text} ${ctx.bs()[0].start.text}"); + } else + print(ctx.b(0).start.text); + } +} +} +>> + + +LRListener(X) ::= << +@parser::definitions { +class LeafListener extends TBaseListener { + void exitE(EContext ctx) { + if (ctx.childCount==3) { + stdout.write("${ctx.e(0).start.text} ${ctx.e(1).start.text} ${ctx.es()[0].start.text}\n"); + } else + print(ctx.INT().symbol.text); + } +} +} +>> + +LRWithLabelsListener(X) ::= << +@parser::definitions { +class LeafListener extends TBaseListener { + void exitCall(CallContext ctx) { + stdout.write("${ctx.e().start.text} ${ctx.eList()}"); + } + void exitInt(IntContext ctx) { + print(ctx.INT().symbol.text); + } +} +} +>> + +DeclareContextListGettersFunction() ::= << +void foo() { + SContext s = null; + List\ a = s.as(); + List\ b = s.bs(); +} +>> + +Declare_foo() ::= << + void foo() {print("foo");} +>> + +Invoke_foo() ::= "foo();" + +Declare_pred() ::= <> + +Invoke_pred(v) ::= <)>> + +ParserTokenType(t) ::= "Parser." +ContextRuleFunction(ctx, rule) ::= "." +StringType() ::= "String" +ContextMember(ctx, subctx, member) ::= ".." diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/BaseRuntimeTest.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/BaseRuntimeTest.java index 5f8a98f07..6f28cf94a 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/BaseRuntimeTest.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/BaseRuntimeTest.java @@ -51,7 +51,8 @@ public abstract class BaseRuntimeTest { "CSharp", "Python2", "Python3", "PHP", - "Node" + "Node", + "Dart" }; static { @@ -299,6 +300,16 @@ public abstract class BaseRuntimeTest { } } + public static String readFile(String dir, String fileName) { + try { + return String.copyValueOf(Utils.readFile(dir+"/"+fileName, "UTF-8")); + } + catch (IOException ioe) { + System.err.println("can't read file"); + ioe.printStackTrace(System.err); + } + return null; + } protected static void assertCorrectOutput(RuntimeTestDescriptor descriptor, RuntimeTestSupport delegate, String actualOutput) { String actualParseErrors = delegate.getParseErrors(); diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/BaseDartTest.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/BaseDartTest.java new file mode 100644 index 000000000..88c69d755 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/BaseDartTest.java @@ -0,0 +1,1185 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.Tool; +import org.antlr.v4.analysis.AnalysisPipeline; +import org.antlr.v4.automata.ATNFactory; +import org.antlr.v4.automata.ATNPrinter; +import org.antlr.v4.automata.LexerATNFactory; +import org.antlr.v4.automata.ParserATNFactory; +import org.antlr.v4.codegen.CodeGenerator; +import org.antlr.v4.misc.Utils; +import org.antlr.v4.runtime.*; +import org.antlr.v4.runtime.atn.*; +import org.antlr.v4.runtime.dfa.DFA; +import org.antlr.v4.runtime.misc.IntegerList; +import org.antlr.v4.runtime.misc.Interval; +import org.antlr.v4.runtime.misc.Pair; +import org.antlr.v4.runtime.tree.ParseTree; +import org.antlr.v4.semantics.SemanticPipeline; +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.ErrorQueue; +import org.antlr.v4.test.runtime.RuntimeTestSupport; +import org.antlr.v4.test.runtime.StreamVacuum; +import org.antlr.v4.test.runtime.descriptors.LexerExecDescriptors; +import org.antlr.v4.test.runtime.descriptors.PerformanceDescriptors; +import org.antlr.v4.tool.*; +import org.stringtemplate.v4.ST; +import org.stringtemplate.v4.STGroup; +import org.stringtemplate.v4.STGroupString; + +import java.io.*; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.net.URL; +import java.net.URLClassLoader; +import java.util.*; + +import static junit.framework.TestCase.*; +import static org.antlr.v4.test.runtime.BaseRuntimeTest.readFile; +import static org.antlr.v4.test.runtime.BaseRuntimeTest.writeFile; +import static org.junit.Assert.assertArrayEquals; + + +public class BaseDartTest implements RuntimeTestSupport { + private static final List AOT_COMPILE_TESTS = Arrays.asList( + new PerformanceDescriptors.DropLoopEntryBranchInLRRule_4().input, + new LexerExecDescriptors.LargeLexer().input + ); + + public static final String newline = System.getProperty("line.separator"); + public static final String pathSep = System.getProperty("path.separator"); + + + /** + * When the {@code antlr.preserve-test-dir} runtime property is set to + * {@code true}, the temporary directories created by the test run will not + * be removed at the end of the test run, even for tests that completed + * successfully. + *

+ *

+ * The default behavior (used in all other cases) is removing the temporary + * directories for all tests which completed successfully, and preserving + * the directories for tests which failed.

+ */ + public static final boolean PRESERVE_TEST_DIR = Boolean.parseBoolean(System.getProperty("antlr.preserve-test-dir", "false")); + + /** + * The base test directory is the directory where generated files get placed + * during unit test execution. + *

+ *

+ * The default value for this property is the {@code java.io.tmpdir} system + * property, and can be overridden by setting the + * {@code antlr.java-test-dir} property to a custom location. Note that the + * {@code antlr.java-test-dir} property directly affects the + * {@link #CREATE_PER_TEST_DIRECTORIES} value as well.

+ */ + public static final String BASE_TEST_DIR; + + /** + * When {@code true}, a temporary directory will be created for each test + * executed during the test run. + *

+ *

+ * This value is {@code true} when the {@code antlr.java-test-dir} system + * property is set, and otherwise {@code false}.

+ */ + public static final boolean CREATE_PER_TEST_DIRECTORIES; + + static { + String baseTestDir = System.getProperty("antlr.dart-test-dir"); + boolean perTestDirectories = false; + if (baseTestDir == null || baseTestDir.isEmpty()) { + baseTestDir = System.getProperty("java.io.tmpdir"); + perTestDirectories = true; + } + + if (!new File(baseTestDir).isDirectory()) { + throw new UnsupportedOperationException("The specified base test directory does not exist: " + baseTestDir); + } + + BASE_TEST_DIR = baseTestDir; + CREATE_PER_TEST_DIRECTORIES = perTestDirectories; + } + + /** + * Build up the full classpath we need, including the surefire path (if present) + */ + public static final String CLASSPATH = System.getProperty("java.class.path"); + + public String tmpdir = null; + + /** + * If error during parser execution, store stderr here; can't return + * stdout and stderr. This doesn't trap errors from running antlr. + */ + protected String stderrDuringParse; + + /** + * Errors found while running antlr + */ + protected StringBuilder antlrToolErrors; + + private static String cacheDartPackages; + + private String getPropertyPrefix() { + return "antlr-dart"; + } + + @Override + public void testSetUp() throws Exception { + if (CREATE_PER_TEST_DIRECTORIES) { + // new output dir for each test + String threadName = Thread.currentThread().getName(); + String testDirectory = getClass().getSimpleName() + "-" + threadName + "-" + System.nanoTime(); + tmpdir = new File(BASE_TEST_DIR, testDirectory).getAbsolutePath(); + } else { + tmpdir = new File(BASE_TEST_DIR).getAbsolutePath(); + if (!PRESERVE_TEST_DIR && new File(tmpdir).exists()) { + eraseFiles(); + } + } + antlrToolErrors = new StringBuilder(); + } + + @Override + public void testTearDown() throws Exception { + } + + @Override + public String getTmpDir() { + return tmpdir; + } + + @Override + public String getStdout() { + return null; + } + + @Override + public String getParseErrors() { + return stderrDuringParse; + } + + @Override + public String getANTLRToolErrors() { + if (antlrToolErrors.length() == 0) { + return null; + } + return antlrToolErrors.toString(); + } + + protected Tool newTool(String[] args) { + Tool tool = new Tool(args); + return tool; + } + + protected ATN createATN(Grammar g, boolean useSerializer) { + if (g.atn == null) { + semanticProcess(g); + assertEquals(0, g.tool.getNumErrors()); + + ParserATNFactory f; + if (g.isLexer()) { + f = new LexerATNFactory((LexerGrammar) g); + } else { + f = new ParserATNFactory(g); + } + + g.atn = f.createATN(); + assertEquals(0, g.tool.getNumErrors()); + } + + ATN atn = g.atn; + if (useSerializer) { + char[] serialized = ATNSerializer.getSerializedAsChars(atn); + return new ATNDeserializer().deserialize(serialized); + } + + return atn; + } + + protected void semanticProcess(Grammar g) { + if (g.ast != null && !g.ast.hasErrors) { +// System.out.println(g.ast.toStringTree()); + Tool antlr = new Tool(); + SemanticPipeline sem = new SemanticPipeline(g); + sem.process(); + if (g.getImportedGrammars() != null) { // process imported grammars (if any) + for (Grammar imp : g.getImportedGrammars()) { + antlr.processNonCombinedGrammar(imp, false); + } + } + } + } + + public DFA createDFA(Grammar g, DecisionState s) { +// PredictionDFAFactory conv = new PredictionDFAFactory(g, s); +// DFA dfa = conv.createDFA(); +// conv.issueAmbiguityWarnings(); +// System.out.print("DFA="+dfa); +// return dfa; + return null; + } + +// public void minimizeDFA(DFA dfa) { +// DFAMinimizer dmin = new DFAMinimizer(dfa); +// dfa.minimized = dmin.minimize(); +// } + + IntegerList getTypesFromString(Grammar g, String expecting) { + IntegerList expectingTokenTypes = new IntegerList(); + if (expecting != null && !expecting.trim().isEmpty()) { + for (String tname : expecting.replace(" ", "").split(",")) { + int ttype = g.getTokenType(tname); + expectingTokenTypes.add(ttype); + } + } + return expectingTokenTypes; + } + + public IntegerList getTokenTypesViaATN(String input, LexerATNSimulator lexerATN) { + ANTLRInputStream in = new ANTLRInputStream(input); + IntegerList tokenTypes = new IntegerList(); + int ttype; + do { + ttype = lexerATN.match(in, Lexer.DEFAULT_MODE); + tokenTypes.add(ttype); + } while (ttype != Token.EOF); + return tokenTypes; + } + + public List getTokenTypes(LexerGrammar lg, + ATN atn, + CharStream input) { + LexerATNSimulator interp = new LexerATNSimulator(atn, new DFA[]{new DFA(atn.modeToStartState.get(Lexer.DEFAULT_MODE))}, null); + List tokenTypes = new ArrayList(); + int ttype; + boolean hitEOF = false; + do { + if (hitEOF) { + tokenTypes.add("EOF"); + break; + } + int t = input.LA(1); + ttype = interp.match(input, Lexer.DEFAULT_MODE); + if (ttype == Token.EOF) { + tokenTypes.add("EOF"); + } else { + tokenTypes.add(lg.typeToTokenList.get(ttype)); + } + + if (t == IntStream.EOF) { + hitEOF = true; + } + } while (ttype != Token.EOF); + return tokenTypes; + } + + List checkRuleDFA(String gtext, String ruleName, String expecting) + throws Exception { + ErrorQueue equeue = new ErrorQueue(); + Grammar g = new Grammar(gtext, equeue); + ATN atn = createATN(g, false); + ATNState s = atn.ruleToStartState[g.getRule(ruleName).index]; + if (s == null) { + System.err.println("no such rule: " + ruleName); + return null; + } + ATNState t = s.transition(0).target; + if (!(t instanceof DecisionState)) { + System.out.println(ruleName + " has no decision"); + return null; + } + DecisionState blk = (DecisionState) t; + checkRuleDFA(g, blk, expecting); + return equeue.all; + } + + List checkRuleDFA(String gtext, int decision, String expecting) + throws Exception { + ErrorQueue equeue = new ErrorQueue(); + Grammar g = new Grammar(gtext, equeue); + ATN atn = createATN(g, false); + DecisionState blk = atn.decisionToState.get(decision); + checkRuleDFA(g, blk, expecting); + return equeue.all; + } + + void checkRuleDFA(Grammar g, DecisionState blk, String expecting) + throws Exception { + DFA dfa = createDFA(g, blk); + String result = null; + if (dfa != null) result = dfa.toString(); + assertEquals(expecting, result); + } + + List checkLexerDFA(String gtext, String expecting) + throws Exception { + return checkLexerDFA(gtext, LexerGrammar.DEFAULT_MODE_NAME, expecting); + } + + List checkLexerDFA(String gtext, String modeName, String expecting) + throws Exception { + ErrorQueue equeue = new ErrorQueue(); + LexerGrammar g = new LexerGrammar(gtext, equeue); + g.atn = createATN(g, false); +// LexerATNToDFAConverter conv = new LexerATNToDFAConverter(g); +// DFA dfa = conv.createDFA(modeName); +// g.setLookaheadDFA(0, dfa); // only one decision to worry about +// +// String result = null; +// if ( dfa!=null ) result = dfa.toString(); +// assertEquals(expecting, result); +// +// return equeue.all; + return null; + } + + protected String load(String fileName, String encoding) + throws IOException { + if (fileName == null) { + return null; + } + + String fullFileName = getClass().getPackage().getName().replace('.', '/') + '/' + fileName; + int size = 65000; + InputStreamReader isr; + InputStream fis = getClass().getClassLoader().getResourceAsStream(fullFileName); + if (encoding != null) { + isr = new InputStreamReader(fis, encoding); + } else { + isr = new InputStreamReader(fis); + } + try { + char[] data = new char[size]; + int n = isr.read(data); + return new String(data, 0, n); + } finally { + isr.close(); + } + } + + protected String execLexer(String grammarFileName, + String grammarStr, + String lexerName, + String input) { + return execLexer(grammarFileName, grammarStr, lexerName, input, false); + } + + @Override + public String execLexer(String grammarFileName, + String grammarStr, + String lexerName, + String input, + boolean showDFA) { + boolean success = rawGenerateAndBuildRecognizer(grammarFileName, + grammarStr, + null, + lexerName); + assertTrue(success); + writeFile(tmpdir, "input", input); + writeLexerTestFile(lexerName, showDFA); + String output = execClass("Test", AOT_COMPILE_TESTS.contains(input)); + return output; + } + + public ParseTree execParser(String startRuleName, String input, + String parserName, String lexerName) + throws Exception { + Pair pl = getParserAndLexer(input, parserName, lexerName); + Parser parser = pl.a; + return execStartRule(startRuleName, parser); + } + + public ParseTree execStartRule(String startRuleName, Parser parser) + throws IllegalAccessException, InvocationTargetException, + NoSuchMethodException { + Method startRule = null; + Object[] args = null; + try { + startRule = parser.getClass().getMethod(startRuleName); + } catch (NoSuchMethodException nsme) { + // try with int _p arg for recursive func + startRule = parser.getClass().getMethod(startRuleName, int.class); + args = new Integer[]{0}; + } + ParseTree result = (ParseTree) startRule.invoke(parser, args); +// System.out.println("parse tree = "+result.toStringTree(parser)); + return result; + } + + public Pair getParserAndLexer(String input, + String parserName, String lexerName) + throws Exception { + final Class lexerClass = loadLexerClassFromTempDir(lexerName); + final Class parserClass = loadParserClassFromTempDir(parserName); + + ANTLRInputStream in = new ANTLRInputStream(new StringReader(input)); + + Class c = lexerClass.asSubclass(Lexer.class); + Constructor ctor = c.getConstructor(CharStream.class); + Lexer lexer = ctor.newInstance(in); + + Class pc = parserClass.asSubclass(Parser.class); + Constructor pctor = pc.getConstructor(TokenStream.class); + CommonTokenStream tokens = new CommonTokenStream(lexer); + Parser parser = pctor.newInstance(tokens); + return new Pair(parser, lexer); + } + + public Class loadClassFromTempDir(String name) throws Exception { + ClassLoader loader = + new URLClassLoader(new URL[]{new File(tmpdir).toURI().toURL()}, + ClassLoader.getSystemClassLoader()); + return loader.loadClass(name); + } + + public Class loadLexerClassFromTempDir(String name) throws Exception { + return loadClassFromTempDir(name).asSubclass(Lexer.class); + } + + public Class loadParserClassFromTempDir(String name) throws Exception { + return loadClassFromTempDir(name).asSubclass(Parser.class); + } + + @Override + public String execParser(String grammarFileName, + String grammarStr, + String parserName, + String lexerName, + String listenerName, + String visitorName, + String startRuleName, + String input, + boolean showDiagnosticErrors) { + return execParser(grammarFileName, grammarStr, parserName, lexerName, + listenerName, visitorName, startRuleName, input, showDiagnosticErrors, false); + } + + public String execParser(String grammarFileName, + String grammarStr, + String parserName, + String lexerName, + String listenerName, + String visitorName, + String startRuleName, + String input, + boolean showDiagnosticErrors, + boolean profile) { + boolean success = rawGenerateAndBuildRecognizer(grammarFileName, + grammarStr, + parserName, + lexerName, + "-visitor"); + assertTrue(success); + writeFile(tmpdir, "input", input); + return rawExecRecognizer(parserName, + lexerName, + startRuleName, + showDiagnosticErrors, + profile, + AOT_COMPILE_TESTS.contains(input)); + } + + /** + * Return true if all is well + */ + protected boolean rawGenerateAndBuildRecognizer(String grammarFileName, + String grammarStr, + String parserName, + String lexerName, + String... extraOptions) { + return rawGenerateAndBuildRecognizer(grammarFileName, grammarStr, parserName, lexerName, false, extraOptions); + } + + /** + * Return true if all is well + */ + protected boolean rawGenerateAndBuildRecognizer(String grammarFileName, + String grammarStr, + String parserName, + String lexerName, + boolean defaultListener, + String... extraOptions) { + ErrorQueue equeue = + BaseRuntimeTest.antlrOnString(getTmpDir(), "Dart", grammarFileName, grammarStr, defaultListener, extraOptions); + if (!equeue.errors.isEmpty()) { + return false; + } + + List files = new ArrayList(); + if (lexerName != null) { + files.add(lexerName + ".dart"); + } + if (parserName != null) { + files.add(parserName + ".dart"); + Set optionsSet = new HashSet(Arrays.asList(extraOptions)); + String grammarName = grammarFileName.substring(0, grammarFileName.lastIndexOf('.')); + if (!optionsSet.contains("-no-listener")) { + files.add(grammarName + "Listener.dart"); + files.add(grammarName + "BaseListener.dart"); + } + if (optionsSet.contains("-visitor")) { + files.add(grammarName + "Visitor.dart"); + files.add(grammarName + "BaseVisitor.dart"); + } + } + + String runtime = locateRuntime(); + writeFile(tmpdir, "pubspec.yaml", + "name: \"test\"\n" + + "dependencies:\n" + + " antlr4:\n" + + " path: " + runtime + "\n"); + if (cacheDartPackages == null) { + try { + Process process = Runtime.getRuntime().exec(new String[]{locatePub(), "get"}, null, new File(tmpdir)); + StreamVacuum stderrVacuum = new StreamVacuum(process.getErrorStream()); + stderrVacuum.start(); + process.waitFor(); + stderrVacuum.join(); + String stderrDuringPubGet = stderrVacuum.toString(); + if (!stderrDuringPubGet.isEmpty()) { + System.out.println("Pub Get error: " + stderrVacuum.toString()); + } + } catch (IOException | InterruptedException e) { + e.printStackTrace(); + return false; + } + cacheDartPackages = readFile(tmpdir, ".packages"); + } else { + writeFile(tmpdir, ".packages", cacheDartPackages); + } + return true; // allIsWell: no compile + } + + protected String rawExecRecognizer(String parserName, + String lexerName, + String parserStartRuleName, + boolean debug, + boolean profile, + boolean aotCompile) { + this.stderrDuringParse = null; + if (parserName == null) { + writeLexerTestFile(lexerName, false); + } else { + writeTestFile(parserName, + lexerName, + parserStartRuleName, + debug, + profile); + } + + return execClass("Test", aotCompile); + } + + public String execClass(String className, boolean compile) { + try { + if (compile) { + String[] args = new String[]{ + locateDart2Native(), + className + ".dart", "-o", className + }; + String cmdLine = Utils.join(args, " "); + System.err.println("Compile: " + cmdLine); + Process process = + Runtime.getRuntime().exec(args, null, new File(tmpdir)); + StreamVacuum stderrVacuum = new StreamVacuum(process.getErrorStream()); + stderrVacuum.start(); + int result = process.waitFor(); + if (result != 0) { + stderrVacuum.join(); + System.err.print("Error compiling dart file: " + stderrVacuum.toString()); + } + } + + String[] args; + if (compile) { + args = new String[]{ + new File(tmpdir, className).getAbsolutePath(), new File(tmpdir, "input").getAbsolutePath() + }; + } else { + args = new String[]{ + locateDart(), + className + ".dart", new File(tmpdir, "input").getAbsolutePath() + }; + } + //String cmdLine = Utils.join(args, " "); + //System.err.println("execParser: " + cmdLine); + Process process = + Runtime.getRuntime().exec(args, null, new File(tmpdir)); + StreamVacuum stdoutVacuum = new StreamVacuum(process.getInputStream()); + StreamVacuum stderrVacuum = new StreamVacuum(process.getErrorStream()); + stdoutVacuum.start(); + stderrVacuum.start(); + process.waitFor(); + stdoutVacuum.join(); + stderrVacuum.join(); + String output = stdoutVacuum.toString(); + if (output.length() == 0) { + output = null; + } + if (stderrVacuum.toString().length() > 0) { + this.stderrDuringParse = stderrVacuum.toString(); + } + return output; + } catch (Exception e) { + System.err.println("can't exec recognizer"); + e.printStackTrace(System.err); + } + return null; + } + + private String locateTool(String tool) { + final String dartPath = System.getProperty("DART_PATH"); + + final String[] tools = isWindows() + ? new String[]{tool + ".exe", tool + ".bat", tool} + : new String[]{tool}; + + if (dartPath != null) { + for (String t : tools) { + if (new File(dartPath + t).exists()) { + return dartPath + t; + } + } + } + + final String[] roots = isWindows() + ? new String[]{"C:\\tools\\dart-sdk\\bin\\"} + : new String[]{"/usr/local/bin/", "/opt/local/bin/", "/usr/bin/", "/usr/lib/dart/bin/"}; + + for (String root : roots) { + for (String t : tools) { + if (new File(root + t).exists()) { + return root + t; + } + } + } + + throw new RuntimeException("Could not locate " + tool); + } + + protected String locatePub() { + String propName = getPropertyPrefix() + "-pub"; + String prop = System.getProperty(propName); + + if (prop == null || prop.length() == 0) { + prop = locateTool("pub"); + } + + File file = new File(prop); + + if (!file.exists()) { + throw new RuntimeException("Missing system property:" + propName); + } + + return file.getAbsolutePath(); + } + + protected String locateDart() { + String propName = getPropertyPrefix() + "-dart"; + String prop = System.getProperty(propName); + + if (prop == null || prop.length() == 0) { + prop = locateTool("dart"); + } + + File file = new File(prop); + + if (!file.exists()) { + throw new RuntimeException("Missing system property:" + propName); + } + + return file.getAbsolutePath(); + } + + protected String locateDart2Native() { + String propName = getPropertyPrefix() + "-dart2native"; + String prop = System.getProperty(propName); + + if (prop == null || prop.length() == 0) { + prop = locateTool("dart2native"); + } + + File file = new File(prop); + + if (!file.exists()) { + throw new RuntimeException("Missing system property:" + propName); + } + + return file.getAbsolutePath(); + } + + private String locateRuntime() { + final ClassLoader loader = Thread.currentThread().getContextClassLoader(); + final URL runtimeSrc = loader.getResource("Dart"); + if (runtimeSrc == null) { + throw new RuntimeException("Cannot find Dart runtime"); + } + if (isWindows()) { + return runtimeSrc.getPath().replaceFirst("/", ""); + } + return runtimeSrc.getPath(); + } + + private boolean isWindows() { + return System.getProperty("os.name").toLowerCase().contains("windows"); + } + +// void ambig(List msgs, int[] expectedAmbigAlts, String expectedAmbigInput) +// throws Exception +// { +// ambig(msgs, 0, expectedAmbigAlts, expectedAmbigInput); +// } + +// void ambig(List msgs, int i, int[] expectedAmbigAlts, String expectedAmbigInput) +// throws Exception +// { +// List amsgs = getMessagesOfType(msgs, AmbiguityMessage.class); +// AmbiguityMessage a = (AmbiguityMessage)amsgs.get(i); +// if ( a==null ) assertNull(expectedAmbigAlts); +// else { +// assertEquals(a.conflictingAlts.toString(), Arrays.toString(expectedAmbigAlts)); +// } +// assertEquals(expectedAmbigInput, a.input); +// } + +// void unreachable(List msgs, int[] expectedUnreachableAlts) +// throws Exception +// { +// unreachable(msgs, 0, expectedUnreachableAlts); +// } + +// void unreachable(List msgs, int i, int[] expectedUnreachableAlts) +// throws Exception +// { +// List amsgs = getMessagesOfType(msgs, UnreachableAltsMessage.class); +// UnreachableAltsMessage u = (UnreachableAltsMessage)amsgs.get(i); +// if ( u==null ) assertNull(expectedUnreachableAlts); +// else { +// assertEquals(u.conflictingAlts.toString(), Arrays.toString(expectedUnreachableAlts)); +// } +// } + + List getMessagesOfType(List msgs, Class c) { + List filtered = new ArrayList(); + for (ANTLRMessage m : msgs) { + if (m.getClass() == c) filtered.add(m); + } + return filtered; + } + + public void checkRuleATN(Grammar g, String ruleName, String expecting) { +// DOTGenerator dot = new DOTGenerator(g); +// System.out.println(dot.getDOT(g.atn.ruleToStartState[g.getRule(ruleName).index])); + + Rule r = g.getRule(ruleName); + ATNState startState = g.getATN().ruleToStartState[r.index]; + ATNPrinter serializer = new ATNPrinter(g, startState); + String result = serializer.asString(); + + //System.out.print(result); + assertEquals(expecting, result); + } + + public void testActions(String templates, String actionName, String action, String expected) throws org.antlr.runtime.RecognitionException { + int lp = templates.indexOf('('); + String name = templates.substring(0, lp); + STGroup group = new STGroupString(templates); + ST st = group.getInstanceOf(name); + st.add(actionName, action); + String grammar = st.render(); + ErrorQueue equeue = new ErrorQueue(); + Grammar g = new Grammar(grammar, equeue); + if (g.ast != null && !g.ast.hasErrors) { + SemanticPipeline sem = new SemanticPipeline(g); + sem.process(); + + ATNFactory factory = new ParserATNFactory(g); + if (g.isLexer()) factory = new LexerATNFactory((LexerGrammar) g); + g.atn = factory.createATN(); + + AnalysisPipeline anal = new AnalysisPipeline(g); + anal.process(); + + CodeGenerator gen = new CodeGenerator(g); + ST outputFileST = gen.generateParser(false); + String output = outputFileST.render(); + //System.out.println(output); + String b = "#" + actionName + "#"; + int start = output.indexOf(b); + String e = "#end-" + actionName + "#"; + int end = output.indexOf(e); + String snippet = output.substring(start + b.length(), end); + assertEquals(expected, snippet); + } + if (equeue.size() > 0) { +// System.err.println(equeue.toString()); + } + } + + protected void checkGrammarSemanticsError(ErrorQueue equeue, + GrammarSemanticsMessage expectedMessage) + throws Exception { + ANTLRMessage foundMsg = null; + for (int i = 0; i < equeue.errors.size(); i++) { + ANTLRMessage m = equeue.errors.get(i); + if (m.getErrorType() == expectedMessage.getErrorType()) { + foundMsg = m; + } + } + assertNotNull("no error; " + expectedMessage.getErrorType() + " expected", foundMsg); + assertTrue("error is not a GrammarSemanticsMessage", + foundMsg instanceof GrammarSemanticsMessage); + assertEquals(Arrays.toString(expectedMessage.getArgs()), Arrays.toString(foundMsg.getArgs())); + if (equeue.size() != 1) { + System.err.println(equeue); + } + } + + protected void checkGrammarSemanticsWarning(ErrorQueue equeue, + GrammarSemanticsMessage expectedMessage) + throws Exception { + ANTLRMessage foundMsg = null; + for (int i = 0; i < equeue.warnings.size(); i++) { + ANTLRMessage m = equeue.warnings.get(i); + if (m.getErrorType() == expectedMessage.getErrorType()) { + foundMsg = m; + } + } + assertNotNull("no error; " + expectedMessage.getErrorType() + " expected", foundMsg); + assertTrue("error is not a GrammarSemanticsMessage", + foundMsg instanceof GrammarSemanticsMessage); + assertEquals(Arrays.toString(expectedMessage.getArgs()), Arrays.toString(foundMsg.getArgs())); + if (equeue.size() != 1) { + System.err.println(equeue); + } + } + + protected void checkError(ErrorQueue equeue, + ANTLRMessage expectedMessage) + throws Exception { + //System.out.println("errors="+equeue); + ANTLRMessage foundMsg = null; + for (int i = 0; i < equeue.errors.size(); i++) { + ANTLRMessage m = equeue.errors.get(i); + if (m.getErrorType() == expectedMessage.getErrorType()) { + foundMsg = m; + } + } + assertTrue("no error; " + expectedMessage.getErrorType() + " expected", !equeue.errors.isEmpty()); + assertTrue("too many errors; " + equeue.errors, equeue.errors.size() <= 1); + assertNotNull("couldn't find expected error: " + expectedMessage.getErrorType(), foundMsg); + /* + * assertTrue("error is not a GrammarSemanticsMessage", foundMsg + * instanceof GrammarSemanticsMessage); + */ + assertArrayEquals(expectedMessage.getArgs(), foundMsg.getArgs()); + } + + public static class FilteringTokenStream extends CommonTokenStream { + public FilteringTokenStream(TokenSource src) { + super(src); + } + + Set hide = new HashSet(); + + @Override + protected boolean sync(int i) { + if (!super.sync(i)) { + return false; + } + + Token t = get(i); + if (hide.contains(t.getType())) { + ((WritableToken) t).setChannel(Token.HIDDEN_CHANNEL); + } + + return true; + } + + public void setTokenTypeChannel(int ttype, int channel) { + hide.add(ttype); + } + } + + protected void writeTestFile(String parserName, + String lexerName, + String parserStartRuleName, + boolean debug, + boolean profile) { + ST outputFileST = new ST( + "import 'package:antlr4/antlr4.dart';\n" + + "\n" + + "import '.dart';\n" + + "import '.dart';\n" + + "\n" + + "void main(List\\ args) async {\n" + + " CharStream input = await InputStream.fromPath(args[0]);\n" + + " final lex = (input);\n" + + " final tokens = CommonTokenStream(lex);\n" + + " \n" + + " parser.buildParseTree = true;\n" + + " \n" + + " ParserRuleContext tree = parser.();\n" + + " print('[${profiler.getDecisionInfo().join(', ')}]');\n" + + " ParseTreeWalker.DEFAULT.walk(TreeShapeListener(), tree);\n" + + "}\n" + + "\n" + + "class TreeShapeListener implements ParseTreeListener {\n" + + " @override void visitTerminal(TerminalNode node) {}\n" + + "\n" + + " @override void visitErrorNode(ErrorNode node) {}\n" + + "\n" + + " @override void exitEveryRule(ParserRuleContext ctx) {}\n" + + "\n" + + " @override\n" + + " void enterEveryRule(ParserRuleContext ctx) {\n" + + " for (var i = 0; i \\< ctx.childCount; i++) {\n" + + " final parent = ctx.getChild(i).parent;\n" + + " if (!(parent is RuleNode) || (parent as RuleNode).ruleContext != ctx) {\n" + + " throw StateError('Invalid parse tree shape detected.');\n" + + " }\n" + + " }\n" + + " }\n" + + "}\n" + ); + ST createParserST = new ST("final parser = (tokens);\n"); + if (debug) { + createParserST = + new ST( + "final parser = (tokens);\n" + + " parser.addErrorListener(new DiagnosticErrorListener());\n"); + } + if (profile) { + outputFileST.add("profile", + "ProfilingATNSimulator profiler = ProfilingATNSimulator(parser);\n" + + "parser.setInterpreter(profiler);"); + } else { + outputFileST.add("profile", new ArrayList()); + } + outputFileST.add("createParser", createParserST); + outputFileST.add("parserName", parserName); + outputFileST.add("lexerName", lexerName); + outputFileST.add("parserStartRuleName", parserStartRuleName); + writeFile(tmpdir, "Test.dart", outputFileST.render()); + } + + protected void writeLexerTestFile(String lexerName, boolean showDFA) { + ST outputFileST = new ST( + "import 'dart:io';\n" + + "\n" + + "import 'package:antlr4/antlr4.dart';\n" + + "\n" + + "import '.dart';\n" + + "\n" + + "void main(List\\ args) async {\n" + + " CharStream input = await InputStream.fromPath(args[0]);\n" + + " lex = (input);\n" + + " CommonTokenStream tokens = CommonTokenStream(lex);\n" + + " tokens.fill();\n" + + " for (Object t in tokens.getTokens())\n" + + " print(t);\n" + + "\n" + + (showDFA ? "stdout.write(lex.interpreter.getDFA(Lexer.DEFAULT_MODE).toLexerString());\n" : "") + + "}\n" + ); + + outputFileST.add("lexerName", lexerName); + writeFile(tmpdir, "Test.dart", outputFileST.render()); + } + + protected void eraseFiles() { + if (tmpdir == null) { + return; + } + + File tmpdirF = new File(tmpdir); + String[] files = tmpdirF.list(); + for (int i = 0; files != null && i < files.length; i++) { + new File(tmpdir + "/" + files[i]).delete(); + } + } + + @Override + public void eraseTempDir() { + File tmpdirF = new File(tmpdir); + if (tmpdirF.exists()) { + eraseFiles(); + tmpdirF.delete(); + } + } + + public String getFirstLineOfException() { + if (this.stderrDuringParse == null) { + return null; + } + String[] lines = this.stderrDuringParse.split("\n"); + String prefix = "Exception in thread \"main\" "; + return lines[0].substring(prefix.length(), lines[0].length()); + } + + /** + * When looking at a result set that consists of a Map/HashTable + * we cannot rely on the output order, as the hashing algorithm or other aspects + * of the implementation may be different on differnt JDKs or platforms. Hence + * we take the Map, convert the keys to a List, sort them and Stringify the Map, which is a + * bit of a hack, but guarantees that we get the same order on all systems. We assume that + * the keys are strings. + * + * @param m The Map that contains keys we wish to return in sorted order + * @return A string that represents all the keys in sorted order. + */ + public String sortMapToString(Map m) { + // Pass in crap, and get nothing back + // + if (m == null) { + return null; + } + + System.out.println("Map toString looks like: " + m.toString()); + + // Sort the keys in the Map + // + TreeMap nset = new TreeMap(m); + + System.out.println("Tree map looks like: " + nset.toString()); + return nset.toString(); + } + + public List realElements(List elements) { + return elements.subList(Token.MIN_USER_TOKEN_TYPE, elements.size()); + } + + public void assertNotNullOrEmpty(String message, String text) { + assertNotNull(message, text); + assertFalse(message, text.isEmpty()); + } + + public void assertNotNullOrEmpty(String text) { + assertNotNull(text); + assertFalse(text.isEmpty()); + } + + public static class IntTokenStream implements TokenStream { + public IntegerList types; + int p = 0; + + public IntTokenStream(IntegerList types) { + this.types = types; + } + + @Override + public void consume() { + p++; + } + + @Override + public int LA(int i) { + return LT(i).getType(); + } + + @Override + public int mark() { + return index(); + } + + @Override + public int index() { + return p; + } + + @Override + public void release(int marker) { + seek(marker); + } + + @Override + public void seek(int index) { + p = index; + } + + @Override + public int size() { + return types.size(); + } + + @Override + public String getSourceName() { + return UNKNOWN_SOURCE_NAME; + } + + @Override + public Token LT(int i) { + CommonToken t; + int rawIndex = p + i - 1; + if (rawIndex >= types.size()) t = new CommonToken(Token.EOF); + else t = new CommonToken(types.get(rawIndex)); + t.setTokenIndex(rawIndex); + return t; + } + + @Override + public Token get(int i) { + return new CommonToken(types.get(i)); + } + + @Override + public TokenSource getTokenSource() { + return null; + } + + @Override + public String getText() { + throw new UnsupportedOperationException("can't give strings"); + } + + @Override + public String getText(Interval interval) { + throw new UnsupportedOperationException("can't give strings"); + } + + @Override + public String getText(RuleContext ctx) { + throw new UnsupportedOperationException("can't give strings"); + } + + @Override + public String getText(Token start, Token stop) { + throw new UnsupportedOperationException("can't give strings"); + } + } + + /** + * Sort a list + */ + public > List sort(List data) { + List dup = new ArrayList(); + dup.addAll(data); + Collections.sort(dup); + return dup; + } + + /** + * Return map sorted by key + */ + public , V> LinkedHashMap sort(Map data) { + LinkedHashMap dup = new LinkedHashMap(); + List keys = new ArrayList(); + keys.addAll(data.keySet()); + Collections.sort(keys); + for (K k : keys) { + dup.put(k, data.get(k)); + } + return dup; + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestCompositeLexers.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestCompositeLexers.java new file mode 100644 index 000000000..60aa4a35a --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestCompositeLexers.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.CompositeLexersDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestCompositeLexers extends BaseRuntimeTest { + public TestCompositeLexers(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(CompositeLexersDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestCompositeParsers.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestCompositeParsers.java new file mode 100644 index 000000000..638413f9a --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestCompositeParsers.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.CompositeParsersDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestCompositeParsers extends BaseRuntimeTest { + public TestCompositeParsers(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(CompositeParsersDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestFullContextParsing.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestFullContextParsing.java new file mode 100644 index 000000000..a0d7f9c1a --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestFullContextParsing.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.FullContextParsingDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestFullContextParsing extends BaseRuntimeTest { + public TestFullContextParsing(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(FullContextParsingDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestLeftRecursion.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestLeftRecursion.java new file mode 100644 index 000000000..e92f1b306 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestLeftRecursion.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.LeftRecursionDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestLeftRecursion extends BaseRuntimeTest { + public TestLeftRecursion(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(LeftRecursionDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestLexerErrors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestLexerErrors.java new file mode 100644 index 000000000..b95cd59bf --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestLexerErrors.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.LexerErrorsDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestLexerErrors extends BaseRuntimeTest { + public TestLexerErrors(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(LexerErrorsDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestLexerExec.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestLexerExec.java new file mode 100644 index 000000000..1ed1d84ca --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestLexerExec.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.LexerExecDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestLexerExec extends BaseRuntimeTest { + public TestLexerExec(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(LexerExecDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestListeners.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestListeners.java new file mode 100644 index 000000000..e15dee9ae --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestListeners.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.ListenersDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestListeners extends BaseRuntimeTest { + public TestListeners(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(ListenersDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestParseTrees.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestParseTrees.java new file mode 100644 index 000000000..0115e384d --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestParseTrees.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.ParseTreesDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestParseTrees extends BaseRuntimeTest { + public TestParseTrees(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(ParseTreesDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestParserErrors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestParserErrors.java new file mode 100644 index 000000000..87b850fb9 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestParserErrors.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.ParserErrorsDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestParserErrors extends BaseRuntimeTest { + public TestParserErrors(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(ParserErrorsDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestParserExec.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestParserExec.java new file mode 100644 index 000000000..c22aa8ceb --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestParserExec.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.ParserExecDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestParserExec extends BaseRuntimeTest { + public TestParserExec(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(ParserExecDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestPerformance.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestPerformance.java new file mode 100644 index 000000000..78e6942ac --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestPerformance.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.PerformanceDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestPerformance extends BaseRuntimeTest { + public TestPerformance(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(PerformanceDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestSemPredEvalLexer.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestSemPredEvalLexer.java new file mode 100644 index 000000000..8825042cf --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestSemPredEvalLexer.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.SemPredEvalLexerDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestSemPredEvalLexer extends BaseRuntimeTest { + public TestSemPredEvalLexer(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(SemPredEvalLexerDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestSemPredEvalParser.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestSemPredEvalParser.java new file mode 100644 index 000000000..87d6a9dea --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestSemPredEvalParser.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.SemPredEvalParserDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestSemPredEvalParser extends BaseRuntimeTest { + public TestSemPredEvalParser(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(SemPredEvalParserDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestSets.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestSets.java new file mode 100644 index 000000000..4fe603fd4 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/dart/TestSets.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.dart; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.SetsDescriptors; +import org.antlr.v4.test.runtime.dart.BaseDartTest; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestSets extends BaseRuntimeTest { + public TestSets(RuntimeTestDescriptor descriptor) { + super(descriptor,new BaseDartTest()); + } + + @Parameterized.Parameters(name="{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(SetsDescriptors.class, "Dart"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/LexerExecDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/LexerExecDescriptors.java index 7c74e74cb..ab7825be8 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/LexerExecDescriptors.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/LexerExecDescriptors.java @@ -1051,7 +1051,7 @@ public class LexerExecDescriptors { grammar = new String(Files.readAllBytes(Paths.get(stuff.toURI()))); } catch (Exception e) { - System.err.println("Cannot find grammar org/antlr/v4/test/runtime/LarseLexer.g4"); + System.err.println("Cannot find grammar org/antlr/v4/test/runtime/LargeLexer.g4"); } return new Pair<>(grammarName, grammar); diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParseTreesDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParseTreesDescriptors.java index bda6fecf3..5bd26693c 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParseTreesDescriptors.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParseTreesDescriptors.java @@ -112,7 +112,7 @@ public class ParseTreesDescriptors { @Override public boolean ignore(String targetName) { - return !targetName.matches("Java|Python2|Python3|Node|Swift|CSharp"); + return !targetName.matches("Java|Python2|Python3|Node|Swift|CSharp|Dart"); } } diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserErrorsDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserErrorsDescriptors.java index 0a3e40b19..b6f1c4686 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserErrorsDescriptors.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserErrorsDescriptors.java @@ -639,7 +639,7 @@ public class ParserErrorsDescriptors { @Override public boolean ignore(String targetName) { - return !"Java".equals(targetName) && !"Swift".equals(targetName); + return !"Java".equals(targetName) && !"Swift".equals(targetName) && !"Dart".equals(targetName); } } } diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserExecDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserExecDescriptors.java index ca6e393dd..d2b0931f0 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserExecDescriptors.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserExecDescriptors.java @@ -889,4 +889,32 @@ public class ParserExecDescriptors { @CommentHasStringValue public String grammar; } + + /** + * This is a regression test for antlr/antlr4#2728 + * It should generate correct code for grammars with more than 65 tokens. + * https://github.com/antlr/antlr4/pull/2728#issuecomment-622940562 + */ + public static class TokenOffset extends BaseParserTestDescriptor { + public String input = "12 34 56 66"; + public String output = "12345666\n"; + + public String errors = null; + public String startRule = "a"; + public String grammarName = "L"; + + /** + grammar L; + a : ('1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'|'10'|'11'|'12'|'13'|'14'|'15'|'16' + |'17'|'18'|'19'|'20'|'21'|'22'|'23'|'24'|'25'|'26'|'27'|'28'|'29'|'30'|'31'|'32' + |'33'|'34'|'35'|'36'|'37'|'38'|'39'|'40'|'41'|'42'|'43'|'44'|'45'|'46'|'47'|'48' + |'49'|'50'|'51'|'52'|'53'|'54'|'55'|'56'|'57'|'58'|'59'|'60'|'61'|'62'|'63'|'64' + |'65'|'66')+ { + + }; + WS : (' '|'\n') -> skip; + */ + @CommentHasStringValue + public String grammar; + } } diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/PerformanceDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/PerformanceDescriptors.java index 2c2702fe5..69dbd9a07 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/PerformanceDescriptors.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/PerformanceDescriptors.java @@ -113,7 +113,7 @@ public class PerformanceDescriptors { @Override public boolean ignore(String targetName) { - return !Arrays.asList("Java", "CSharp", "Python2", "Python3", "Node", "Cpp", "Swift").contains(targetName); + return !Arrays.asList("Java", "CSharp", "Python2", "Python3", "Node", "Cpp", "Swift", "Dart").contains(targetName); } } @@ -199,7 +199,7 @@ public class PerformanceDescriptors { @Override public boolean ignore(String targetName) { // passes, but still too slow in Python and JavaScript - return !Arrays.asList("Java", "CSharp", "Cpp", "Swift").contains(targetName); + return !Arrays.asList("Java", "CSharp", "Cpp", "Swift", "Dart").contains(targetName); } } diff --git a/runtime/Dart/.gitignore b/runtime/Dart/.gitignore new file mode 100644 index 000000000..a45b12ec7 --- /dev/null +++ b/runtime/Dart/.gitignore @@ -0,0 +1,23 @@ +!lib + +# See https://www.dartlang.org/guides/libraries/private-files + +# Files and directories created by pub +.dart_tool/ +.packages +build/ +# If you're building an application, you may want to check-in your pubspec.lock +pubspec.lock + +# Directory created by dartdoc +# If you don't generate documentation locally you can remove this line. +doc/api/ + +# Avoid committing generated Javascript files: +*.dart.js +*.info.json # Produced by the --dump-info flag. +*.js # When generated by dart2js. Don't specify *.js if your + # project includes source files written in JavaScript. +*.js_ +*.js.deps +*.js.map \ No newline at end of file diff --git a/runtime/Dart/CHANGELOG.md b/runtime/Dart/CHANGELOG.md new file mode 100644 index 000000000..84522ce54 --- /dev/null +++ b/runtime/Dart/CHANGELOG.md @@ -0,0 +1,4 @@ + +## 4.8.0-dev.2 + +* Initial release \ No newline at end of file diff --git a/runtime/Dart/LICENSE b/runtime/Dart/LICENSE new file mode 100644 index 000000000..2042d1bda --- /dev/null +++ b/runtime/Dart/LICENSE @@ -0,0 +1,52 @@ +[The "BSD 3-clause license"] +Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +===== + +MIT License for codepointat.js from https://git.io/codepointat +MIT License for fromcodepoint.js from https://git.io/vDW1m + +Copyright Mathias Bynens + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/runtime/Dart/README.md b/runtime/Dart/README.md new file mode 100644 index 000000000..3b2b4a78b --- /dev/null +++ b/runtime/Dart/README.md @@ -0,0 +1,11 @@ +# Dart target for ANTLR 4 + +Dart runtime libraries for ANTLR 4 + +This runtime is available through [pub](https://pub.dev). The package name is 'antlr4'. + +See www.antlr.org for more information on ANTLR. + +See https://github.com/antlr/antlr4/blob/master/doc/dart-target.md for more information on using ANTLR in Dart. + + diff --git a/runtime/Dart/analysis_options.yaml b/runtime/Dart/analysis_options.yaml new file mode 100644 index 000000000..108d1058a --- /dev/null +++ b/runtime/Dart/analysis_options.yaml @@ -0,0 +1 @@ +include: package:pedantic/analysis_options.yaml diff --git a/runtime/Dart/lib/antlr4.dart b/runtime/Dart/lib/antlr4.dart new file mode 100644 index 000000000..0a4b4f60e --- /dev/null +++ b/runtime/Dart/lib/antlr4.dart @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +library antlr4; + +export 'src/atn/atn.dart'; +export 'src/dfa/dfa.dart'; +export 'src/tree/tree.dart'; +export 'src/error/error.dart'; +export 'src/rule_context.dart'; +export 'src/input_stream.dart'; +export 'src/token_stream.dart'; +export 'src/lexer.dart'; +export 'src/parser.dart'; +export 'src/parser_rule_context.dart'; +export 'src/vocabulary.dart'; +export 'src/runtime_meta_data.dart'; +export 'src/token.dart'; diff --git a/runtime/Dart/lib/src/atn/atn.dart b/runtime/Dart/lib/src/atn/atn.dart new file mode 100644 index 000000000..a0400f41d --- /dev/null +++ b/runtime/Dart/lib/src/atn/atn.dart @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +export 'src/atn.dart'; +export 'src/atn_config.dart'; +export 'src/atn_config_set.dart'; +export 'src/atn_deserializer.dart'; +export 'src/atn_simulator.dart'; +export 'src/atn_state.dart'; +export 'src/info.dart'; +export 'src/lexer_action_executor.dart'; +export 'src/lexer_atn_simulator.dart'; +export 'src/parser_atn_simulator.dart'; +export 'src/profiling_atn_simulator.dart'; +export 'src/transition.dart'; diff --git a/runtime/Dart/lib/src/atn/src/atn.dart b/runtime/Dart/lib/src/atn/src/atn.dart new file mode 100644 index 000000000..5c8c4a455 --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/atn.dart @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import '../../interval_set.dart'; +import '../../ll1_analyzer.dart'; +import '../../rule_context.dart'; +import '../../token.dart'; +import 'atn_state.dart'; +import 'atn_type.dart'; +import 'lexer_action.dart'; +import 'transition.dart'; + +class ATN { + static final INVALID_ALT_NUMBER = 0; + + List states = []; + + /// Each subrule/rule is a decision point and we must track them so we + /// can go back later and build DFA predictors for them. This includes + /// all the rules, subrules, optional blocks, ()+, ()* etc... + List decisionToState = []; + + /// Maps from rule index to starting state number. + List ruleToStartState; + + /// Maps from rule index to stop state number. + List ruleToStopState; + + Map modeNameToStartState = {}; + + /// The type of the ATN. + final ATNType grammarType; + + /// The maximum value for any symbol recognized by a transition in the ATN. + final int maxTokenType; + + /// For lexer ATNs, this maps the rule index to the resulting token type. + /// For parser ATNs, this maps the rule index to the generated bypass token + /// type if the + /// {@link ATNDeserializationOptions#isGenerateRuleBypassTransitions} + /// deserialization option was specified; otherwise, this is null. + List ruleToTokenType; + + /// For lexer ATNs, this is an array of [LexerAction] objects which may + /// be referenced by action transitions in the ATN. + List lexerActions; + + List modeToStartState = []; + + /// Used for runtime deserialization of ATNs from strings */ + ATN(this.grammarType, this.maxTokenType); + + /// TODO merge doc comment + /// Compute the set of valid tokens that can occur starting in state [s]. + /// If [ctx] is null, the set of tokens will not include what can follow + /// the rule surrounding [s]. In other words, the set will be + /// restricted to tokens reachable staying within [s]'s rule. + /// + /// Compute the set of valid tokens that can occur starting in [s] and + /// staying in same rule. {@link Token#EPSILON} is in set if we reach end of + /// rule. + IntervalSet nextTokens(ATNState s, [RuleContext ctx]) { + if (ctx != null) { + return LL1Analyzer(this).LOOK(s, ctx); + } + if (s.nextTokenWithinRule != null) return s.nextTokenWithinRule; + s.nextTokenWithinRule = LL1Analyzer(this).LOOK(s, null); + s.nextTokenWithinRule.setReadonly(true); + return s.nextTokenWithinRule; + } + + void addState(ATNState state) { + if (state != null) { + state.atn = this; + state.stateNumber = states.length; + } + + states.add(state); + } + + void removeState(ATNState state) { + states[state.stateNumber] = + null; // just free mem, don't shift states in list + } + + int defineDecisionState(DecisionState s) { + decisionToState.add(s); + s.decision = decisionToState.length - 1; + return s.decision; + } + + DecisionState getDecisionState(int decision) { + if (decisionToState.isNotEmpty) { + return decisionToState[decision]; + } + return null; + } + + int get numberOfDecisions { + return decisionToState.length; + } + + /// Computes the set of input symbols which could follow ATN state number + /// [stateNumber] in the specified full [context]. This method + /// considers the complete parser context, but does not evaluate semantic + /// predicates (i.e. all predicates encountered during the calculation are + /// assumed true). If a path in the ATN exists from the starting state to the + /// [RuleStopState] of the outermost context without matching any + /// symbols, {@link Token#EOF} is added to the returned set. + /// + ///

If [context] is null, it is treated as {@link ParserRuleContext#EMPTY}.

+ /// + /// Note that this does NOT give you the set of all tokens that could + /// appear at a given token position in the input phrase. In other words, + /// it does not answer: + /// + /// "Given a specific partial input phrase, return the set of all tokens + /// that can follow the last token in the input phrase." + /// + /// The big difference is that with just the input, the parser could + /// land right in the middle of a lookahead decision. Getting + /// all *possible* tokens given a partial input stream is a separate + /// computation. See https://github.com/antlr/antlr4/issues/1428 + /// + /// For this function, we are specifying an ATN state and call stack to compute + /// what token(s) can come next and specifically: outside of a lookahead decision. + /// That is what you want for error reporting and recovery upon parse error. + /// + /// @param stateNumber the ATN state number + /// @param context the full parse context + /// @return The set of potentially valid input symbols which could follow the + /// specified state in the specified context. + /// @throws IllegalArgumentException if the ATN does not contain a state with + /// number [stateNumber] + IntervalSet getExpectedTokens(int stateNumber, RuleContext context) { + if (stateNumber < 0 || stateNumber >= states.length) { + throw RangeError.index(stateNumber, states, 'stateNumber'); + } + + var ctx = context; + final s = states[stateNumber]; + var following = nextTokens(s); + if (!following.contains(Token.EPSILON)) { + return following; + } + + final expected = IntervalSet(); + expected.addAll(following); + expected.remove(Token.EPSILON); + while (ctx != null && + ctx.invokingState >= 0 && + following.contains(Token.EPSILON)) { + final invokingState = states[ctx.invokingState]; + RuleTransition rt = invokingState.transition(0); + following = nextTokens(rt.followState); + expected.addAll(following); + expected.remove(Token.EPSILON); + ctx = ctx.parent; + } + + if (following.contains(Token.EPSILON)) { + expected.addOne(Token.EOF); + } + + return expected; + } +} diff --git a/runtime/Dart/lib/src/atn/src/atn_config.dart b/runtime/Dart/lib/src/atn/src/atn_config.dart new file mode 100644 index 000000000..5b7e9ba84 --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/atn_config.dart @@ -0,0 +1,242 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import '../../prediction_context.dart'; +import '../../recognizer.dart'; +import '../../util/murmur_hash.dart'; +import 'atn_state.dart'; +import 'lexer_action_executor.dart'; +import 'semantic_context.dart'; + +Map checkParams(params, isCfg) { + if (params == null) { + final result = { + 'state': null, + 'alt': null, + 'context': null, + 'semanticContext': null + }; + if (isCfg) { + result['reachesIntoOuterContext'] = 0; + } + return result; + } else { + final props = {}; + props['state'] = params.state; + props['alt'] = (params.alt == null) ? null : params.alt; + props['context'] = params.context; + props['semanticContext'] = params.semanticContext; + if (isCfg) { + props['reachesIntoOuterContext'] = params.reachesIntoOuterContext ?? 0; + props['precedenceFilterSuppressed'] = + params.precedenceFilterSuppressed ?? false; + } + return props; + } +} + +/// A tuple: (ATN state, predicted alt, syntactic, semantic context). +/// The syntactic context is a graph-structured stack node whose +/// path(s) to the root is the rule invocation(s) +/// chain used to arrive at the state. The semantic context is +/// the tree of semantic predicates encountered before reaching +/// an ATN state. +class ATNConfig { + /// This field stores the bit mask for implementing the + /// {@link #isPrecedenceFilterSuppressed} property as a bit within the + /// existing {@link #reachesIntoOuterContext} field. + static final int SUPPRESS_PRECEDENCE_FILTER = 0x40000000; + + /// The ATN state associated with this configuration */ + ATNState state; + + /// What alt (or lexer rule) is predicted by this configuration */ + int alt; + + /// The stack of invoking states leading to the rule/states associated + /// with this config. We track only those contexts pushed during + /// execution of the ATN simulator. + PredictionContext context; + + /// We cannot execute predicates dependent upon local context unless + /// we know for sure we are in the correct context. Because there is + /// no way to do this efficiently, we simply cannot evaluate + /// dependent predicates unless we are in the rule that initially + /// invokes the ATN simulator. + /// + ///

+ /// closure() tracks the depth of how far we dip into the outer context: + /// depth > 0. Note that it may not be totally accurate depth since I + /// don't ever decrement. TODO: make it a bool then

+ /// + ///

+ /// For memory efficiency, the {@link #isPrecedenceFilterSuppressed} method + /// is also backed by this field. Since the field is ly accessible, the + /// highest bit which would not cause the value to become negative is used to + /// store this field. This choice minimizes the risk that code which only + /// compares this value to 0 would be affected by the new purpose of the + /// flag. It also ensures the performance of the existing [ATNConfig] + /// constructors as well as certain operations like + /// {@link ATNConfigSet#add(ATNConfig, DoubleKeyMap)} method are + /// completely unaffected by the change.

+ int reachesIntoOuterContext = 0; + + SemanticContext semanticContext; + + ATNConfig(this.state, this.alt, this.context, + [this.semanticContext = SemanticContext.NONE]); + + ATNConfig.dup(ATNConfig c, + {this.state, this.alt, this.context, this.semanticContext}) { + state = state ?? c.state; + alt = alt ?? c.alt; + context = context ?? c.context; + semanticContext = semanticContext ?? c.semanticContext; + reachesIntoOuterContext = + c.reachesIntoOuterContext ?? reachesIntoOuterContext; + } + + /// This method gets the value of the {@link #reachesIntoOuterContext} field + /// as it existed prior to the introduction of the + /// {@link #isPrecedenceFilterSuppressed} method. + int get outerContextDepth { + return reachesIntoOuterContext & ~SUPPRESS_PRECEDENCE_FILTER; + } + + bool isPrecedenceFilterSuppressed() { + return (reachesIntoOuterContext & SUPPRESS_PRECEDENCE_FILTER) != 0; + } + + void setPrecedenceFilterSuppressed(bool value) { + if (value) { + reachesIntoOuterContext |= 0x40000000; + } else { + reachesIntoOuterContext &= ~SUPPRESS_PRECEDENCE_FILTER; + } + } + + /// An ATN configuration is equal to another if both have + /// the same state, they predict the same alternative, and + /// syntactic/semantic contexts are the same. + @override + bool operator ==(Object other) { + if (other is ATNConfig && other != null) { + return state.stateNumber == other.state.stateNumber && + alt == other.alt && + (context == other.context || + (context != null && context == other.context)) && + semanticContext == other.semanticContext && + isPrecedenceFilterSuppressed() == + other.isPrecedenceFilterSuppressed(); + } + return false; + } + + @override + int get hashCode { + var hashCode = MurmurHash.initialize(7); + hashCode = MurmurHash.update(hashCode, state.stateNumber); + hashCode = MurmurHash.update(hashCode, alt); + hashCode = MurmurHash.update(hashCode, context); + hashCode = MurmurHash.update(hashCode, semanticContext); + hashCode = MurmurHash.finish(hashCode, 4); + return hashCode; + } + + @override + String toString([Recognizer recog, bool showAlt = true]) { + final buf = StringBuffer(); + // if ( state.ruleIndex>=0 ) { + // if ( recog!=null ) buf.write(recog.ruleNames[state.ruleIndex]+":"); + // else buf.write(state.ruleIndex+":"); + // } + buf.write('('); + buf.write(state); + if (showAlt) { + buf.write(','); + buf.write(alt); + } + if (context != null) { + buf.write(',['); + buf.write(context.toString()); + buf.write(']'); + } + if (semanticContext != null && semanticContext != SemanticContext.NONE) { + buf.write(','); + buf.write(semanticContext); + } + if (outerContextDepth > 0) { + buf.write(',up='); + buf.write(outerContextDepth); + } + buf.write(')'); + return buf.toString(); + } +} + +class LexerATNConfig extends ATNConfig { + /// Gets the [LexerActionExecutor] capable of executing the embedded + /// action(s) for the current configuration. + LexerActionExecutor lexerActionExecutor; + + bool passedThroughNonGreedyDecision = false; + + LexerATNConfig(ATNState state, int alt, PredictionContext context, + [this.lexerActionExecutor]) + : super(state, alt, context, SemanticContext.NONE) { + passedThroughNonGreedyDecision = false; + } + + LexerATNConfig.dup(LexerATNConfig c, ATNState state, + {this.lexerActionExecutor, PredictionContext context}) + : super.dup(c, state: state, context: context) { + lexerActionExecutor = lexerActionExecutor ?? c.lexerActionExecutor; + passedThroughNonGreedyDecision = checkNonGreedyDecision(c, state); + } + + bool hasPassedThroughNonGreedyDecision() { + return passedThroughNonGreedyDecision; + } + + @override + int get hashCode { + var hashCode = MurmurHash.initialize(7); + hashCode = MurmurHash.update(hashCode, state.stateNumber); + hashCode = MurmurHash.update(hashCode, alt); + hashCode = MurmurHash.update(hashCode, context); + hashCode = MurmurHash.update(hashCode, semanticContext); + hashCode = + MurmurHash.update(hashCode, passedThroughNonGreedyDecision ? 1 : 0); + hashCode = MurmurHash.update(hashCode, lexerActionExecutor); + hashCode = MurmurHash.finish(hashCode, 6); + return hashCode; + } + + @override + bool operator ==(Object other) { + if (identical(this, other)) { + return true; + } else if (other is LexerATNConfig) { + final lexerOther = other; + if (passedThroughNonGreedyDecision != + lexerOther.passedThroughNonGreedyDecision) { + return false; + } + + if (lexerActionExecutor != lexerOther.lexerActionExecutor) { + return false; + } + + return super == other; + } + return false; + } + + static bool checkNonGreedyDecision(LexerATNConfig source, ATNState target) { + return source.passedThroughNonGreedyDecision || + target is DecisionState && target.nonGreedy; + } +} diff --git a/runtime/Dart/lib/src/atn/src/atn_config_set.dart b/runtime/Dart/lib/src/atn/src/atn_config_set.dart new file mode 100644 index 000000000..7a70dd925 --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/atn_config_set.dart @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:collection'; +import 'dart:math'; + +import 'package:collection/collection.dart'; + +import '../../misc/pair.dart'; +import '../../prediction_context.dart'; +import '../../util/bit_set.dart'; +import '../../util/utils.dart'; +import 'atn.dart'; +import 'atn_config.dart'; +import 'atn_state.dart'; +import 'semantic_context.dart'; + +class ATNConfigSet extends Iterable { + /// Indicates that the set of configurations is read-only. Do not + /// allow any code to manipulate the set; DFA states will point at + /// the sets and they must not change. This does not protect the other + /// fields; in particular, conflictingAlts is set after + /// we've made this readonly. + bool _readOnly = false; + + bool get readOnly => _readOnly; + + set readOnly(bool readOnly) { + _readOnly = readOnly; + if (readOnly) { + configLookup = null; // can't mod, no need for lookup cache + } + } + + /// The reason that we need this is because we don't want the hash map to use + /// the standard hash code and equals. We need all configurations with the same + /// {@code (s,i,_,semctx)} to be equal. Unfortunately, this key effectively doubles + /// the number of objects associated with ATNConfigs. The other solution is to + /// use a hash table that lets us specify the equals/hashcode operation. + /// + /// All configs but hashed by (s, i, _, pi) not including context. Wiped out + /// when we go readonly as this set becomes a DFA state. + Set configLookup = HashSet(equals: (a, b) { + if (a == null || b == null) return false; + return a.state.stateNumber == b.state.stateNumber && + a.alt == b.alt && + a.semanticContext == b.semanticContext; + }, hashCode: (ATNConfig o) { + var hashCode = 7; + hashCode = 31 * hashCode + o.state.stateNumber; + hashCode = 31 * hashCode + o.alt; + hashCode = 31 * hashCode + o.semanticContext.hashCode; + return hashCode; + }); + + /// Track the elements as they are added to the set; supports get(i) */ + final List configs = []; + + // TODO: these fields make me pretty uncomfortable but nice to pack up info together, saves recomputation + // TODO: can we track conflicts as they are added to save scanning configs later? + int uniqueAlt = 0; + + /// Currently this is only used when we detect SLL conflict; this does + /// not necessarily represent the ambiguous alternatives. In fact, + /// I should also point out that this seems to include predicated alternatives + /// that have predicates that evaluate to false. Computed in computeTargetState(). + BitSet conflictingAlts; + + // Used in parser and lexer. In lexer, it indicates we hit a pred + // while computing a closure operation. Don't make a DFA state from this. + bool hasSemanticContext = false; + bool dipsIntoOuterContext = false; + + /// Indicates that this configuration set is part of a full context + /// LL prediction. It will be used to determine how to merge $. With SLL + /// it's a wildcard whereas it is not for LL context merge. + bool fullCtx; + + int cachedHashCode = -1; + + ATNConfigSet([this.fullCtx = true]); + + ATNConfigSet.dup(ATNConfigSet old) { + fullCtx = old.fullCtx; + addAll(old); + uniqueAlt = old.uniqueAlt; + conflictingAlts = old.conflictingAlts; + hasSemanticContext = old.hasSemanticContext; + dipsIntoOuterContext = old.dipsIntoOuterContext; + } + + /// Adding a new config means merging contexts with existing configs for + /// {@code (s, i, pi, _)}, where [s] is the + /// {@link ATNConfig#state}, [i] is the {@link ATNConfig#alt}, and + /// [pi] is the {@link ATNConfig#semanticContext}. We use + /// {@code (s,i,pi)} as key. + /// + ///

This method updates {@link #dipsIntoOuterContext} and + /// {@link #hasSemanticContext} when necessary.

+ bool add(ATNConfig config, + [Map, PredictionContext> + mergeCache]) { + if (readOnly) throw StateError('This set is readonly'); + if (config.semanticContext != SemanticContext.NONE) { + hasSemanticContext = true; + } + if (config.outerContextDepth > 0) { + dipsIntoOuterContext = true; + } + final existing = configLookup.lookup(config) ?? config; + if (identical(existing, config)) { + // we added this new one + cachedHashCode = -1; + configLookup.add(config); + configs.add(config); // track order here + return true; + } + // a previous (s,i,pi,_), merge with it and save result + final rootIsWildcard = !fullCtx; + final merged = PredictionContext.merge( + existing.context, config.context, rootIsWildcard, mergeCache); + // no need to check for existing.context, config.context in cache + // since only way to create new graphs is "call rule" and here. We + // cache at both places. + existing.reachesIntoOuterContext = + max(existing.reachesIntoOuterContext, config.reachesIntoOuterContext); + + // make sure to preserve the precedence filter suppression during the merge + if (config.isPrecedenceFilterSuppressed()) { + existing.setPrecedenceFilterSuppressed(true); + } + + existing.context = merged; // replace context; no need to alt mapping + return true; + } + + /// Return a List holding list of configs */ + List get elements { + return configs; + } + + Set get states { + final states = {}; + for (var i = 0; i < configs.length; i++) { + states.add(configs[i].state); + } + return states; + } + + /// Gets the complete set of represented alternatives for the configuration + /// set. + /// + /// @return the set of represented alternatives in this configuration set + /// + /// @since 4.3 + BitSet get alts { + final alts = BitSet(); + for (var config in configs) { + alts.set(config.alt); + } + return alts; + } + + List get predicates { + final preds = []; + for (var c in configs) { + if (c.semanticContext != SemanticContext.NONE) { + preds.add(c.semanticContext); + } + } + return preds; + } + + ATNConfig get(int i) { + return configs[i]; + } + + void optimizeConfigs(interpreter) { + if (readOnly) throw StateError('This set is readonly'); + + if (configLookup.isEmpty) return; + + for (var config in configs) { +// int before = PredictionContext.getAllContextNodes(config.context).length; + config.context = interpreter.getCachedContext(config.context); +// int after = PredictionContext.getAllContextNodes(config.context).length; +// System.out.println("configs "+before+"->"+after); + } + } + + bool addAll(coll) { + for (ATNConfig c in coll) { + add(c); + } + return false; + } + + @override + bool operator ==(other) { + return identical(this, other) || + (other is ATNConfigSet && + other != null && + ListEquality().equals(configs, other.configs) && + fullCtx == other.fullCtx && + uniqueAlt == other.uniqueAlt && + conflictingAlts == other.conflictingAlts && + hasSemanticContext == other.hasSemanticContext && + dipsIntoOuterContext == other.dipsIntoOuterContext); + } + + @override + int get hashCode { + if (readOnly) { + if (cachedHashCode == -1) { + cachedHashCode = ListEquality().hash(configs); + } + + return cachedHashCode; + } + + return ListEquality().hash(configs); + } + + @override + int get length { + return configs.length; + } + + @override + bool get isEmpty => configs.isEmpty; + + void updateHashCode(hash) { + if (readOnly) { + if (cachedHashCode == -1) { + cachedHashCode = hashCode; + } + hash.update(cachedHashCode); + } else { + hash.update(hashCode); + } + } + + @override + bool contains(Object o) { + if (configLookup == null) { + throw UnsupportedError( + 'This method is not implemented for readonly sets.'); + } + + return configLookup.contains(o); + } + + @override + Iterator get iterator => configs.iterator; + + void clear() { + if (readOnly) throw StateError('This set is readonly'); + configs.clear(); + cachedHashCode = -1; + configLookup.clear(); + } + + @override + String toString() { + final buf = StringBuffer(); + buf.write(arrayToString(elements)); + if (hasSemanticContext) { + buf.write(',hasSemanticContext=$hasSemanticContext'); + } + if (uniqueAlt != ATN.INVALID_ALT_NUMBER) buf.write(',uniqueAlt=$uniqueAlt'); + if (conflictingAlts != null) buf.write(',conflictingAlts=$conflictingAlts'); + if (dipsIntoOuterContext) buf.write(',dipsIntoOuterContext'); + return buf.toString(); + } +} + +class OrderedATNConfigSet extends ATNConfigSet { + @override + final configLookup = {}; +} diff --git a/runtime/Dart/lib/src/atn/src/atn_deserializer.dart b/runtime/Dart/lib/src/atn/src/atn_deserializer.dart new file mode 100644 index 000000000..fbbec6b0c --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/atn_deserializer.dart @@ -0,0 +1,809 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import '../../interval_set.dart'; +import '../../misc/pair.dart'; +import '../../token.dart'; +import 'atn.dart'; +import 'atn_state.dart'; +import 'atn_type.dart'; +import 'lexer_action.dart'; +import 'transition.dart'; + +class ATNDeserializationOptions { + static final ATNDeserializationOptions defaultOptions = + ATNDeserializationOptions()..makeReadOnly(); + + bool readOnly; + bool verifyATN; + bool generateRuleBypassTransitions; + + ATNDeserializationOptions([ATNDeserializationOptions options]) { + if (options == null) { + verifyATN = true; + generateRuleBypassTransitions = false; + } else { + verifyATN = options.verifyATN; + generateRuleBypassTransitions = + options.generateRuleBypassTransitions; + } + } + + bool isReadOnly() { + return readOnly; + } + + void makeReadOnly() { + readOnly = true; + } + + bool isVerifyATN() { + return verifyATN; + } + + void setVerifyATN(bool verifyATN) { + throwIfReadOnly(); + this.verifyATN = verifyATN; + } + + bool isGenerateRuleBypassTransitions() { + return generateRuleBypassTransitions; + } + + void setGenerateRuleBypassTransitions(bool generateRuleBypassTransitions) { + throwIfReadOnly(); + this.generateRuleBypassTransitions = generateRuleBypassTransitions; + } + + void throwIfReadOnly() { + if (isReadOnly()) { + throw StateError('The object is read only.'); + } + } +} + +class ATNDeserializer { + /// This value should never change. Updates following this version are + /// reflected as change in the unique ID SERIALIZED_UUID. + static final SERIALIZED_VERSION = 3; + + /** WARNING: DO NOT MERGE THESE LINES. If UUIDs differ during a merge, + * resolve the conflict by generating a new ID! + */ + /// This is the earliest supported serialized UUID. + static final BASE_SERIALIZED_UUID = '33761B2D-78BB-4A43-8B0B-4F5BEE8AACF3'; + + /// This UUID indicates an extension of {@link BASE_SERIALIZED_UUID} for the + /// addition of precedence predicates. + static final ADDED_PRECEDENCE_TRANSITIONS = + '1DA0C57D-6C06-438A-9B27-10BCB3CE0F61'; + + /// This UUID indicates an extension of {@link #ADDED_PRECEDENCE_TRANSITIONS} + /// for the addition of lexer actions encoded as a sequence of + /// [LexerAction] instances. + static final ADDED_LEXER_ACTIONS = 'AADB8D7E-AEEF-4415-AD2B-8204D6CF042E'; + + /// This UUID indicates the serialized ATN contains two sets of + /// IntervalSets, where the second set's values are encoded as + /// 32-bit integers to support the full Unicode SMP range up to U+10FFFF. + static final ADDED_UNICODE_SMP = '59627784-3BE5-417A-B9EB-8131A7286089'; + + /// This list contains all of the currently supported UUIDs, ordered by when + /// the feature first appeared in this branch. + static final SUPPORTED_UUIDS = [ + BASE_SERIALIZED_UUID, + ADDED_PRECEDENCE_TRANSITIONS, + ADDED_LEXER_ACTIONS, + ADDED_UNICODE_SMP + ]; + + /// This is the current serialized UUID. + static final SERIALIZED_UUID = ADDED_UNICODE_SMP; + + ATNDeserializationOptions deserializationOptions; + List data; + var pos; + String uuid; + + ATNDeserializer([options]) { + deserializationOptions = + options ?? ATNDeserializationOptions.defaultOptions; + } + + /// Determines if a particular serialized representation of an ATN supports + /// a particular feature, identified by the [UUID] used for serializing + /// the ATN at the time the feature was first introduced. + /// + /// @param feature The [UUID] marking the first time the feature was + /// supported in the serialized ATN. + /// @param actualUuid The [UUID] of the actual serialized ATN which is + /// currently being deserialized. + /// @return [true] if the [actualUuid] value represents a + /// serialized ATN at or after the feature identified by [feature] was + /// introduced; otherwise, [false]. + bool isFeatureSupported(feature, actualUuid) { + final idx1 = SUPPORTED_UUIDS.indexOf(feature); + if (idx1 < 0) { + return false; + } + final idx2 = SUPPORTED_UUIDS.indexOf(actualUuid); + return idx2 >= idx1; + } + + ATN deserialize(List data) { + reset(data); + checkVersion(); + checkUUID(); + final atn = readATN(); + readStates(atn); + readRules(atn); + readModes(atn); + final sets = []; + // First, deserialize sets with 16-bit arguments <= U+FFFF. + readSets(atn, sets, () => readInt()); + // Next, if the ATN was serialized with the Unicode SMP feature, + // deserialize sets with 32-bit arguments <= U+10FFFF. + if (isFeatureSupported(ADDED_UNICODE_SMP, uuid)) { + readSets(atn, sets, () => readInt32()); + } + readEdges(atn, sets); + readDecisions(atn); + readLexerActions(atn); + markPrecedenceDecisions(atn); + verifyATN(atn); + if (deserializationOptions.generateRuleBypassTransitions && + atn.grammarType == ATNType.PARSER) { + generateRuleBypassTransitions(atn); + // re-verify after modification + verifyATN(atn); + } + return atn; + } + + /// Each char value in data is shifted by +2 at the entry to this method. + /// This is an encoding optimization targeting the serialized values 0 + /// and -1 (serialized to 0xFFFF), each of which are very common in the + /// serialized form of the ATN. In the modified UTF-8 that Java uses for + /// compiled string literals, these two character values have multi-byte + /// forms. By shifting each value by +2, they become characters 2 and 1 + /// prior to writing the string, each of which have single-byte + /// representations. Since the shift occurs in the tool during ATN + /// serialization, each target is responsible for adjusting the values + /// during deserialization. + /// + /// As a special case, note that the first element of data is not + /// adjusted because it contains the major version number of the + /// serialized ATN, which was fixed at 3 at the time the value shifting + /// was implemented. + void reset(List data) { + final adjust = (int c) { + final v = c; + return v > 1 ? v - 2 : v + 65534; + }; + final temp = data.map(adjust).toList(); + // don't adjust the first value since that's the version number + temp[0] = data[0]; + this.data = temp; + pos = 0; + } + + void checkVersion() { + final version = readInt(); + if (version != SERIALIZED_VERSION) { + throw ('Could not deserialize ATN with version $version (expected $SERIALIZED_VERSION).'); + } + } + + void checkUUID() { + final uuid = readUUID(); + if (!SUPPORTED_UUIDS.contains(uuid)) { + throw ('Could not deserialize ATN with UUID: $uuid (expected $SERIALIZED_UUID or a legacy UUID).'); + } + this.uuid = uuid; + } + + ATN readATN() { + final grammarType = readInt(); + final maxTokenType = readInt(); + return ATN(ATNType.values[grammarType], maxTokenType); + } + + void readStates(ATN atn) { + final loopBackStateNumbers = >[]; + final endStateNumbers = >[]; + final nstates = readInt(); + for (var i = 0; i < nstates; i++) { + final stype = StateType.values[readInt()]; + // ignore bad type of states + if (stype == StateType.INVALID_TYPE) { + atn.addState(null); + continue; + } + + var ruleIndex = readInt(); + if (ruleIndex == 0xFFFF) { + ruleIndex = -1; + } + + final s = stateFactory(stype, ruleIndex); + if (s is LoopEndState) { + // special case + final loopBackStateNumber = readInt(); + loopBackStateNumbers.add(Pair(s, loopBackStateNumber)); + } else if (s is BlockStartState) { + final endStateNumber = readInt(); + endStateNumbers.add(Pair(s, endStateNumber)); + } + atn.addState(s); + } + + // delay the assignment of loop back and end states until we know all the state instances have been initialized + for (final pair in loopBackStateNumbers) { + pair.a.loopBackState = atn.states[pair.b]; + } + + for (final pair in endStateNumbers) { + pair.a.endState = atn.states[pair.b] as BlockEndState; + } + + final numNonGreedyStates = readInt(); + for (var i = 0; i < numNonGreedyStates; i++) { + final stateNumber = readInt(); + (atn.states[stateNumber] as DecisionState).nonGreedy = true; + } + if (isFeatureSupported(ADDED_PRECEDENCE_TRANSITIONS, uuid)) { + final numPrecedenceStates = readInt(); + for (var i = 0; i < numPrecedenceStates; i++) { + final stateNumber = readInt(); + (atn.states[stateNumber] as RuleStartState).isLeftRecursiveRule = true; + } + } + } + + void readRules(ATN atn) { + final nrules = readInt(); + if (atn.grammarType == ATNType.LEXER) { + atn.ruleToTokenType = List(nrules); + } + + atn.ruleToStartState = List(nrules); + for (var i = 0; i < nrules; i++) { + final s = readInt(); + RuleStartState startState = atn.states[s]; + atn.ruleToStartState[i] = startState; + if (atn.grammarType == ATNType.LEXER) { + var tokenType = readInt(); + if (tokenType == 0xFFFF) { + tokenType = Token.EOF; + } + + atn.ruleToTokenType[i] = tokenType; + + if (!isFeatureSupported(ADDED_LEXER_ACTIONS, uuid)) { + // this piece of unused metadata was serialized prior to the + // addition of LexerAction + final actionIndexIgnored = readInt(); + } + } + } + + atn.ruleToStopState = List(nrules); + for (var state in atn.states) { + if (!(state is RuleStopState)) { + continue; + } + + RuleStopState stopState = state; + atn.ruleToStopState[state.ruleIndex] = stopState; + atn.ruleToStartState[state.ruleIndex].stopState = stopState; + } + } + + void readModes(ATN atn) { + final nmodes = readInt(); + for (var i = 0; i < nmodes; i++) { + final s = readInt(); + atn.modeToStartState.add(atn.states[s] as TokensStartState); + } + } + + void readSets(ATN atn, List sets, readUnicode) { + final nsets = readInt(); + for (var i = 0; i < nsets; i++) { + final nintervals = readInt(); + final set = IntervalSet(); + sets.add(set); + + final containsEof = readInt() != 0; + if (containsEof) { + set.addOne(-1); + } + + for (var j = 0; j < nintervals; j++) { + int a = readUnicode(); + int b = readUnicode(); + set.addRange(a, b); + } + } + } + + void readEdges(ATN atn, sets) { + final nedges = readInt(); + for (var i = 0; i < nedges; i++) { + final src = readInt(); + final trg = readInt(); + final ttype = TransitionType.values[readInt()]; + final arg1 = readInt(); + final arg2 = readInt(); + final arg3 = readInt(); + final trans = + edgeFactory(atn, ttype, src, trg, arg1, arg2, arg3, sets); +// System.out.println("EDGE "+trans.getClass().getSimpleName()+" "+ +// src+"->"+trg+ +// " "+Transition.serializationNames[ttype]+ +// " "+arg1+","+arg2+","+arg3); + final srcState = atn.states[src]; + srcState.addTransition(trans); + } + + // edges for rule stop states can be derived, so they aren't serialized + for (var state in atn.states) { + for (var i = 0; i < state.numberOfTransitions; i++) { + final t = state.transition(i); + if (t is RuleTransition) { + final ruleTransition = t; + var outermostPrecedenceReturn = -1; + if (atn.ruleToStartState[ruleTransition.target.ruleIndex] + .isLeftRecursiveRule) { + if (ruleTransition.precedence == 0) { + outermostPrecedenceReturn = ruleTransition.target.ruleIndex; + } + } + + final returnTransition = EpsilonTransition( + ruleTransition.followState, outermostPrecedenceReturn); + atn.ruleToStopState[ruleTransition.target.ruleIndex] + .addTransition(returnTransition); + } + } + } + + for (var state in atn.states) { + if (state is BlockStartState) { + // we need to know the end state to set its start state + if (state.endState == null) { + throw StateError(''); + } + + // block end states can only be associated to a single block start state + if (state.endState.startState != null) { + throw StateError(''); + } + + state.endState.startState = state; + } + + if (state is PlusLoopbackState) { + final loopbackState = state; + for (var i = 0; i < loopbackState.numberOfTransitions; i++) { + final target = loopbackState.transition(i).target; + if (target is PlusBlockStartState) { + target.loopBackState = loopbackState; + } + } + } else if (state is StarLoopbackState) { + final loopbackState = state; + for (var i = 0; i < loopbackState.numberOfTransitions; i++) { + final target = loopbackState.transition(i).target; + if (target is StarLoopEntryState) { + target.loopBackState = loopbackState; + } + } + } + } + } + + void readDecisions(ATN atn) { + final ndecisions = readInt(); + for (var i = 1; i <= ndecisions; i++) { + final s = readInt(); + DecisionState decState = atn.states[s]; + atn.decisionToState.add(decState); + decState.decision = i - 1; + } + } + + void readLexerActions(ATN atn) { + if (atn.grammarType == ATNType.LEXER) { + if (isFeatureSupported(ADDED_LEXER_ACTIONS, uuid)) { + atn.lexerActions = List(readInt()); + for (var i = 0; i < atn.lexerActions.length; i++) { + final actionType = LexerActionType.values[readInt()]; + var data1 = readInt(); + if (data1 == 0xFFFF) { + data1 = -1; + } + + var data2 = readInt(); + if (data2 == 0xFFFF) { + data2 = -1; + } + final lexerAction = + lexerActionFactory(actionType, data1, data2); + + atn.lexerActions[i] = lexerAction; + } + } else { + // for compatibility with older serialized ATNs, convert the old + // serialized action index for action transitions to the new + // form, which is the index of a LexerCustomAction + final legacyLexerActions = []; + for (var state in atn.states) { + for (var i = 0; i < state.numberOfTransitions; i++) { + final transition = state.transition(i); + if (transition is ActionTransition) { + final ruleIndex = transition.ruleIndex; + final actionIndex = transition.actionIndex; + final lexerAction = + LexerCustomAction(ruleIndex, actionIndex); + state.setTransition( + i, + ActionTransition(transition.target, ruleIndex, + legacyLexerActions.length, false)); + legacyLexerActions.add(lexerAction); + } + } + } + + atn.lexerActions = legacyLexerActions; + } + } + } + + void generateRuleBypassTransitions(ATN atn) { + for (var i = 0; i < atn.ruleToStartState.length; i++) { + atn.ruleToTokenType[i] = atn.maxTokenType + i + 1; + } + for (var i = 0; i < atn.ruleToStartState.length; i++) { + generateRuleBypassTransition(atn, i); + } + } + + void generateRuleBypassTransition(ATN atn, int idx) { + final bypassStart = BasicBlockStartState(); + bypassStart.ruleIndex = idx; + atn.addState(bypassStart); + + final bypassStop = BlockEndState(); + bypassStop.ruleIndex = idx; + atn.addState(bypassStop); + + bypassStart.endState = bypassStop; + atn.defineDecisionState(bypassStart); + + bypassStop.startState = bypassStart; + + ATNState endState; + Transition excludeTransition; + if (atn.ruleToStartState[idx].isLeftRecursiveRule) { + // wrap from the beginning of the rule to the StarLoopEntryState + endState = null; + for (var state in atn.states) { + if (state.ruleIndex != idx) { + continue; + } + + if (!(state is StarLoopEntryState)) { + continue; + } + + final maybeLoopEndState = + state.transition(state.numberOfTransitions - 1).target; + if (!(maybeLoopEndState is LoopEndState)) { + continue; + } + + if (maybeLoopEndState.epsilonOnlyTransitions && + maybeLoopEndState.transition(0).target is RuleStopState) { + endState = state; + break; + } + } + + if (endState == null) { + throw UnsupportedError( + "Couldn't identify final state of the precedence rule prefix section."); + } + + excludeTransition = + (endState as StarLoopEntryState).loopBackState.transition(0); + } else { + endState = atn.ruleToStopState[idx]; + } + + // all non-excluded transitions that currently target end state need to target blockEnd instead + for (var state in atn.states) { + for (var transition in state.transitions) { + if (transition == excludeTransition) { + continue; + } + + if (transition.target == endState) { + transition.target = bypassStop; + } + } + } + + // all transitions leaving the rule start state need to leave blockStart instead + while (atn.ruleToStartState[idx].numberOfTransitions > 0) { + final transition = atn.ruleToStartState[idx].removeTransition( + atn.ruleToStartState[idx].numberOfTransitions - 1); + bypassStart.addTransition(transition); + } + + // link the new states + atn.ruleToStartState[idx].addTransition(EpsilonTransition(bypassStart)); + bypassStop.addTransition(EpsilonTransition(endState)); + + ATNState matchState = BasicState(); + atn.addState(matchState); + matchState.addTransition( + AtomTransition(bypassStop, atn.ruleToTokenType[idx])); + bypassStart.addTransition(EpsilonTransition(matchState)); + } + + /// Analyze the [StarLoopEntryState] states in the specified ATN to set + /// the {@link StarLoopEntryState#isPrecedenceDecision} field to the + /// correct value. + /// + /// @param atn The ATN. + void markPrecedenceDecisions(ATN atn) { + for (var state in atn.states) { + if (state is StarLoopEntryState) { + /* We analyze the ATN to determine if this ATN decision state is the + * decision for the closure block that determines whether a + * precedence rule should continue or complete. + */ + if (atn.ruleToStartState[state.ruleIndex].isLeftRecursiveRule) { + final maybeLoopEndState = + state.transition(state.numberOfTransitions - 1).target; + if (maybeLoopEndState is LoopEndState) { + if (maybeLoopEndState.epsilonOnlyTransitions && + maybeLoopEndState.transition(0).target is RuleStopState) { + state.isPrecedenceDecision = true; + } + } + } + } + } + } + + void verifyATN(ATN atn) { + // verify assumptions + for (var state in atn.states) { + if (state == null) { + continue; + } + + checkCondition(state.onlyHasEpsilonTransitions() || + state.numberOfTransitions <= 1); + + if (state is PlusBlockStartState) { + checkCondition(state.loopBackState != null); + } + + if (state is StarLoopEntryState) { + final starLoopEntryState = state; + checkCondition(starLoopEntryState.loopBackState != null); + checkCondition(starLoopEntryState.numberOfTransitions == 2); + + if (starLoopEntryState.transition(0).target is StarBlockStartState) { + checkCondition( + starLoopEntryState.transition(1).target is LoopEndState); + checkCondition(!starLoopEntryState.nonGreedy); + } else if (starLoopEntryState.transition(0).target is LoopEndState) { + checkCondition( + starLoopEntryState.transition(1).target is StarBlockStartState); + checkCondition(starLoopEntryState.nonGreedy); + } else { + throw StateError(''); + } + } + + if (state is StarLoopbackState) { + checkCondition(state.numberOfTransitions == 1); + checkCondition(state.transition(0).target is StarLoopEntryState); + } + + if (state is LoopEndState) { + checkCondition(state.loopBackState != null); + } + + if (state is RuleStartState) { + checkCondition(state.stopState != null); + } + + if (state is BlockStartState) { + checkCondition(state.endState != null); + } + + if (state is BlockEndState) { + checkCondition(state.startState != null); + } + + if (state is DecisionState) { + final decisionState = state; + checkCondition(decisionState.numberOfTransitions <= 1 || + decisionState.decision >= 0); + } else { + checkCondition( + state.numberOfTransitions <= 1 || state is RuleStopState); + } + } + } + + void checkCondition(bool condition, [String message = '']) { + if (!condition) { + throw StateError(message); + } + } + + int readInt() { + return data[pos++]; + } + + int readInt32() { + final low = readInt(); + final high = readInt(); + return low | (high << 16); + } + + int readLong() { + final low = readInt32(); + final high = readInt32(); + return (low & 0x00000000FFFFFFFF) | (high << 32); + } + + static final byteToHex = List.generate(256, (i) => i.toRadixString(16).padLeft(2, '0').toUpperCase()); + + String readUUID() { + final bb = List(16); + for (var i = 7; i >= 0; i--) { + final int = readInt(); + /* jshint bitwise: false */ + bb[(2 * i) + 1] = int & 0xFF; + bb[2 * i] = (int >> 8) & 0xFF; + } + return byteToHex[bb[0]] + byteToHex[bb[1]] + + byteToHex[bb[2]] + byteToHex[bb[3]] + '-' + + byteToHex[bb[4]] + byteToHex[bb[5]] + '-' + + byteToHex[bb[6]] + byteToHex[bb[7]] + '-' + + byteToHex[bb[8]] + byteToHex[bb[9]] + '-' + + byteToHex[bb[10]] + byteToHex[bb[11]] + + byteToHex[bb[12]] + byteToHex[bb[13]] + + byteToHex[bb[14]] + byteToHex[bb[15]]; + } + + Transition edgeFactory(ATN atn, TransitionType type, int src, int trg, + int arg1, int arg2, int arg3, List sets) { + final target = atn.states[trg]; + switch (type) { + case TransitionType.EPSILON: + return EpsilonTransition(target); + case TransitionType.RANGE: + return arg3 != 0 + ? RangeTransition(target, Token.EOF, arg2) + : RangeTransition(target, arg1, arg2); + case TransitionType.RULE: + final rt = + RuleTransition(atn.states[arg1], arg2, arg3, target); + return rt; + case TransitionType.PREDICATE: + final pt = + PredicateTransition(target, arg1, arg2, arg3 != 0); + return pt; + case TransitionType.PRECEDENCE: + return PrecedencePredicateTransition(target, arg1); + case TransitionType.ATOM: + return arg3 != 0 + ? AtomTransition(target, Token.EOF) + : AtomTransition(target, arg1); + case TransitionType.ACTION: + final a = + ActionTransition(target, arg1, arg2, arg3 != 0); + return a; + case TransitionType.SET: + return SetTransition(target, sets[arg1]); + case TransitionType.NOT_SET: + return NotSetTransition(target, sets[arg1]); + case TransitionType.WILDCARD: + return WildcardTransition(target); + case TransitionType.INVALID: + throw ArgumentError.value(type, 'transition type', 'not valid.'); + default: + throw ArgumentError.value(type, 'transition type', 'not valid.'); + } + } + + ATNState stateFactory(StateType type, int ruleIndex) { + ATNState s; + switch (type) { + case StateType.INVALID_TYPE: + return null; + case StateType.BASIC: + s = BasicState(); + break; + case StateType.RULE_START: + s = RuleStartState(); + break; + case StateType.BLOCK_START: + s = BasicBlockStartState(); + break; + case StateType.PLUS_BLOCK_START: + s = PlusBlockStartState(); + break; + case StateType.STAR_BLOCK_START: + s = StarBlockStartState(); + break; + case StateType.TOKEN_START: + s = TokensStartState(); + break; + case StateType.RULE_STOP: + s = RuleStopState(); + break; + case StateType.BLOCK_END: + s = BlockEndState(); + break; + case StateType.STAR_LOOP_BACK: + s = StarLoopbackState(); + break; + case StateType.STAR_LOOP_ENTRY: + s = StarLoopEntryState(); + break; + case StateType.PLUS_LOOP_BACK: + s = PlusLoopbackState(); + break; + case StateType.LOOP_END: + s = LoopEndState(); + break; + default: + throw ArgumentError.value(type, 'state type', 'not valid.'); + } + + s.ruleIndex = ruleIndex; + return s; + } + + LexerAction lexerActionFactory(LexerActionType type, int data1, int data2) { + switch (type) { + case LexerActionType.CHANNEL: + return LexerChannelAction(data1); + + case LexerActionType.CUSTOM: + return LexerCustomAction(data1, data2); + + case LexerActionType.MODE: + return LexerModeAction(data1); + + case LexerActionType.MORE: + return LexerMoreAction.INSTANCE; + + case LexerActionType.POP_MODE: + return LexerPopModeAction.INSTANCE; + + case LexerActionType.PUSH_MODE: + return LexerPushModeAction(data1); + + case LexerActionType.SKIP: + return LexerSkipAction.INSTANCE; + + case LexerActionType.TYPE: + return LexerTypeAction(data1); + default: + throw ArgumentError.value(type, 'lexer action type', 'not valid.'); + } + } +} diff --git a/runtime/Dart/lib/src/atn/src/atn_simulator.dart b/runtime/Dart/lib/src/atn/src/atn_simulator.dart new file mode 100644 index 000000000..0af0d4b16 --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/atn_simulator.dart @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import '../../dfa/dfa.dart'; +import '../../prediction_context.dart'; +import 'atn.dart'; +import 'atn_config_set.dart'; + +abstract class ATNSimulator { + /// Must distinguish between missing edge and edge we know leads nowhere */ + + static final DFAState ERROR = + DFAState(stateNumber: 0x7FFFFFFF, configs: ATNConfigSet()); + + final ATN atn; + + /// The context cache maps all PredictionContext objects that are equals() + /// to a single cached copy. This cache is shared across all contexts + /// in all ATNConfigs in all DFA states. We rebuild each ATNConfigSet + /// to use only cached nodes/graphs in addDFAState(). We don't want to + /// fill this during closure() since there are lots of contexts that + /// pop up but are not used ever again. It also greatly slows down closure(). + /// + ///

This cache makes a huge difference in memory and a little bit in speed. + /// For the Java grammar on java.*, it dropped the memory requirements + /// at the end from 25M to 16M. We don't store any of the full context + /// graphs in the DFA because they are limited to local context only, + /// but apparently there's a lot of repetition there as well. We optimize + /// the config contexts before storing the config set in the DFA states + /// by literally rebuilding them with cached subgraphs only.

+ /// + ///

I tried a cache for use during closure operations, that was + /// whacked after each adaptivePredict(). It cost a little bit + /// more time I think and doesn't save on the overall footprint + /// so it's not worth the complexity.

+ final PredictionContextCache sharedContextCache; + + ATNSimulator(this.atn, this.sharedContextCache); + + void reset(); + + /// Clear the DFA cache used by the current instance. Since the DFA cache may + /// be shared by multiple ATN simulators, this method may affect the + /// performance (but not accuracy) of other parsers which are being used + /// concurrently. + /// + /// @throws UnsupportedOperationException if the current instance does not + /// support clearing the DFA. + /// + /// @since 4.3 + void clearDFA() { + throw UnsupportedError( + 'This ATN simulator does not support clearing the DFA.'); + } + + PredictionContext getCachedContext(PredictionContext context) { + if (sharedContextCache == null) return context; + + final visited = {}; + return PredictionContext.getCachedContext( + context, sharedContextCache, visited); + } +} + +/// Used to cache [PredictionContext] objects. Its used for the shared +/// context cash associated with contexts in DFA states. This cache +/// can be used for both lexers and parsers. +class PredictionContextCache { + final cache = {}; + + /// Add a context to the cache and return it. If the context already exists, + /// return that one instead and do not add a new context to the cache. + /// Protect shared cache from unsafe thread access. + PredictionContext add(PredictionContext ctx) { + if (ctx == PredictionContext.EMPTY) return PredictionContext.EMPTY; + final existing = cache[ctx]; + if (existing != null) { +// System.out.println(name+" reuses "+existing); + return existing; + } + cache[ctx] = ctx; + return ctx; + } + + PredictionContext operator [](PredictionContext ctx) { + return cache[ctx]; + } + + int get length { + return cache.length; + } +} diff --git a/runtime/Dart/lib/src/atn/src/atn_state.dart b/runtime/Dart/lib/src/atn/src/atn_state.dart new file mode 100644 index 000000000..340e97208 --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/atn_state.dart @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:developer'; + +import 'package:logging/logging.dart'; + +import '../../interval_set.dart'; +import 'atn.dart'; +import 'transition.dart'; + +var INITIAL_NUM_TRANSITIONS = 4; + +enum StateType { + INVALID_TYPE, + BASIC, + RULE_START, + BLOCK_START, + PLUS_BLOCK_START, + STAR_BLOCK_START, + TOKEN_START, + RULE_STOP, + BLOCK_END, + STAR_LOOP_BACK, + STAR_LOOP_ENTRY, + PLUS_LOOP_BACK, + LOOP_END, +} + +/// The following images show the relation of states and +/// {@link ATNState#transitions} for various grammar constructs. +/// +///
    +/// +///
  • Solid edges marked with an ε indicate a required +/// [EpsilonTransition].
  • +/// +///
  • Dashed edges indicate locations where any transition derived from +/// [Transition] might appear.
  • +/// +///
  • Dashed nodes are place holders for either a sequence of linked +/// [BasicState] states or the inclusion of a block representing a nested +/// construct in one of the forms below.
  • +/// +///
  • Nodes showing multiple outgoing alternatives with a {@code ...} support +/// any number of alternatives (one or more). Nodes without the {@code ...} only +/// support the exact number of alternatives shown in the diagram.
  • +/// +///
+/// +///

Basic Blocks

+/// +///

Rule

+/// +/// +/// +///

Block of 1 or more alternatives

+/// +/// +/// +///

Greedy Loops

+/// +///

Greedy Closure: {@code (...)*}

+/// +/// +/// +///

Greedy Positive Closure: {@code (...)+}

+/// +/// +/// +///

Greedy Optional: {@code (...)?}

+/// +/// +/// +///

Non-Greedy Loops

+/// +///

Non-Greedy Closure: {@code (...)*?}

+/// +/// +/// +///

Non-Greedy Positive Closure: {@code (...)+?}

+/// +/// +/// +///

Non-Greedy Optional: {@code (...)??}

+/// +/// +abstract class ATNState { + static final int INITIAL_NUM_TRANSITIONS = 4; + + static final int INVALID_STATE_NUMBER = -1; + + /// Which ATN are we in? */ + ATN atn; + + int stateNumber = INVALID_STATE_NUMBER; + + int ruleIndex; // at runtime, we don't have Rule objects + + bool epsilonOnlyTransitions = false; + + /// Track the transitions emanating from this ATN state. */ + List transitions = []; + + /// Used to cache lookahead during parsing, not used during construction */ + IntervalSet nextTokenWithinRule; + + @override + int get hashCode { + return stateNumber; + } + + @override + bool operator ==(Object o) { + // are these states same object? + if (o is ATNState) return stateNumber == o.stateNumber; + return false; + } + + bool isNonGreedyExitState() { + return false; + } + + @override + String toString() { + return stateNumber.toString(); + } + + int get numberOfTransitions { + return transitions.length; + } + + void addTransition(Transition e) { + addTransitionAt(transitions.length, e); + } + + void addTransitionAt(int index, Transition e) { + if (transitions.isEmpty) { + epsilonOnlyTransitions = e.isEpsilon; + } else if (epsilonOnlyTransitions != e.isEpsilon) { + log('ATN state $stateNumber has both epsilon and non-epsilon transitions.\n', + level: Level.SEVERE.value); + epsilonOnlyTransitions = false; + } + + var alreadyPresent = false; + for (var t in transitions) { + if (t.target.stateNumber == e.target.stateNumber) { + if (t.label != null && e.label != null && t.label == e.label) { +// System.err.println("Repeated transition upon "+e.label()+" from "+stateNumber+"->"+t.target.stateNumber); + alreadyPresent = true; + break; + } else if (t.isEpsilon && e.isEpsilon) { +// System.err.println("Repeated epsilon transition from "+stateNumber+"->"+t.target.stateNumber); + alreadyPresent = true; + break; + } + } + } + if (!alreadyPresent) { + transitions.insert(index, e); + } + } + + Transition transition(int i) { + return transitions[i]; + } + + void setTransition(int i, Transition e) { + transitions[i] = e; + } + + Transition removeTransition(int index) { + return transitions.removeAt(index); + } + + StateType get stateType; + + bool onlyHasEpsilonTransitions() => epsilonOnlyTransitions; + + void setRuleIndex(int ruleIndex) { + this.ruleIndex = ruleIndex; + } +} + +class BasicState extends ATNState { + @override + StateType get stateType => StateType.BASIC; +} + +class RuleStartState extends ATNState { + var stopState; + var isLeftRecursiveRule = false; + + @override + StateType get stateType => StateType.RULE_START; +} + +abstract class DecisionState extends ATNState { + int decision = 0; + bool nonGreedy = false; +} + +// The start of a regular {@code (...)} block. +abstract class BlockStartState extends DecisionState { + BlockEndState endState; +} + +class BasicBlockStartState extends BlockStartState { + @override + StateType get stateType => StateType.BLOCK_START; +} + +/// Start of {@code (A|B|...)+} loop. Technically a decision state, but +/// we don't use for code generation; somebody might need it, so I'm defining +/// it for completeness. In reality, the [PlusLoopbackState] node is the +/// real decision-making note for {@code A+}. +class PlusBlockStartState extends BlockStartState { + PlusLoopbackState loopBackState; + + @override + StateType get stateType => StateType.PLUS_BLOCK_START; +} + +/// The block that begins a closure loop. +class StarBlockStartState extends BlockStartState { + @override + StateType get stateType => StateType.STAR_BLOCK_START; +} + +/// The Tokens rule start state linking to each lexer rule start state */ +class TokensStartState extends DecisionState { + @override + StateType get stateType => StateType.TOKEN_START; +} + +/// The last node in the ATN for a rule, unless that rule is the start symbol. +/// In that case, there is one transition to EOF. Later, we might encode +/// references to all calls to this rule to compute FOLLOW sets for +/// error handling. +class RuleStopState extends ATNState { + @override + StateType get stateType => StateType.RULE_STOP; +} + +/// Terminal node of a simple {@code (a|b|c)} block. +class BlockEndState extends ATNState { + BlockStartState startState; + + @override + StateType get stateType => StateType.BLOCK_END; +} + +class StarLoopbackState extends ATNState { + StarLoopEntryState get loopEntryState { + return transition(0).target; + } + + @override + StateType get stateType => StateType.STAR_LOOP_BACK; +} + +class StarLoopEntryState extends DecisionState { + StarLoopbackState loopBackState; + + /// Indicates whether this state can benefit from a precedence DFA during SLL + /// decision making. + /// + ///

This is a computed property that is calculated during ATN deserialization + /// and stored for use in [ParserATNSimulator] and + /// [ParserInterpreter].

+ /// + /// @see DFA#isPrecedenceDfa() + bool isPrecedenceDecision = false; + + @override + StateType get stateType => StateType.STAR_LOOP_ENTRY; +} + +/// Decision state for {@code A+} and {@code (A|B)+}. It has two transitions: +/// one to the loop back to start of the block and one to exit. +class PlusLoopbackState extends DecisionState { + @override + StateType get stateType => StateType.PLUS_LOOP_BACK; +} + +/// Mark the end of a * or + loop. +class LoopEndState extends ATNState { + ATNState loopBackState; + + @override + StateType get stateType => StateType.LOOP_END; +} diff --git a/runtime/Dart/lib/src/atn/src/atn_type.dart b/runtime/Dart/lib/src/atn/src/atn_type.dart new file mode 100644 index 000000000..fa1f56844 --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/atn_type.dart @@ -0,0 +1,14 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +/// Represents the type of recognizer an ATN applies to. +enum ATNType { + /// A lexer grammar. + LEXER, + + /// A parser grammar. + PARSER +} diff --git a/runtime/Dart/lib/src/atn/src/info.dart b/runtime/Dart/lib/src/atn/src/info.dart new file mode 100644 index 000000000..368c372a3 --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/info.dart @@ -0,0 +1,553 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import '../../token_stream.dart'; +import '../../util/bit_set.dart'; +import 'atn_config_set.dart'; +import 'profiling_atn_simulator.dart'; +import 'semantic_context.dart'; + +/// This class represents profiling event information for a context sensitivity. +/// Context sensitivities are decisions where a particular input resulted in an +/// SLL conflict, but LL prediction produced a single unique alternative. +/// +///

+/// In some cases, the unique alternative identified by LL prediction is not +/// equal to the minimum represented alternative in the conflicting SLL +/// configuration set. Grammars and inputs which result in this scenario are +/// unable to use {@link PredictionMode#SLL}, which in turn means they cannot use +/// the two-stage parsing strategy to improve parsing performance for that +/// input.

+/// +/// @see ParserATNSimulator#reportContextSensitivity +/// @see ANTLRErrorListener#reportContextSensitivity +/// +/// @since 4.3 +class ContextSensitivityInfo extends DecisionEventInfo { + /// Constructs a new instance of the [ContextSensitivityInfo] class + /// with the specified detailed context sensitivity information. + /// + /// @param decision The decision number + /// @param configs The final configuration set containing the unique + /// alternative identified by full-context prediction + /// @param input The input token stream + /// @param startIndex The start index for the current prediction + /// @param stopIndex The index at which the context sensitivity was + /// identified during full-context prediction + ContextSensitivityInfo(int decision, ATNConfigSet configs, TokenStream input, + int startIndex, int stopIndex) + : super(decision, configs, input, startIndex, stopIndex, true); +} + +/// This is the base class for gathering detailed information about prediction +/// events which occur during parsing. +/// +/// Note that we could record the parser call stack at the time this event +/// occurred but in the presence of left recursive rules, the stack is kind of +/// meaningless. It's better to look at the individual configurations for their +/// individual stacks. Of course that is a [PredictionContext] object +/// not a parse tree node and so it does not have information about the extent +/// (start...stop) of the various subtrees. Examining the stack tops of all +/// configurations provide the return states for the rule invocations. +/// From there you can get the enclosing rule. +/// +/// @since 4.3 +class DecisionEventInfo { + /// The invoked decision number which this event is related to. + /// + /// @see ATN#decisionToState + final int decision; + + /// The configuration set containing additional information relevant to the + /// prediction state when the current event occurred, or null if no + /// additional information is relevant or available. + final ATNConfigSet configs; + + /// The input token stream which is being parsed. + final TokenStream input; + + /// The token index in the input stream at which the current prediction was + /// originally invoked. + final int startIndex; + + /// The token index in the input stream at which the current event occurred. + final int stopIndex; + + /// [true] if the current event occurred during LL prediction; + /// otherwise, [false] if the input occurred during SLL prediction. + final bool fullCtx; + + DecisionEventInfo(this.decision, this.configs, this.input, this.startIndex, + this.stopIndex, this.fullCtx); +} + +/// This class contains profiling gathered for a particular decision. +/// +///

+/// Parsing performance in ANTLR 4 is heavily influenced by both static factors +/// (e.g. the form of the rules in the grammar) and dynamic factors (e.g. the +/// choice of input and the state of the DFA cache at the time profiling +/// operations are started). For best results, gather and use aggregate +/// statistics from a large sample of inputs representing the inputs expected in +/// production before using the results to make changes in the grammar.

+/// +/// @since 4.3 +class DecisionInfo { + /// The decision number, which is an index into {@link ATN#decisionToState}. + final int decision; + + /// The total number of times {@link ParserATNSimulator#adaptivePredict} was + /// invoked for this decision. + int invocations; + + /// The total time spent in {@link ParserATNSimulator#adaptivePredict} for + /// this decision, in nanoseconds. + /// + ///

+ /// The value of this field contains the sum of differential results obtained + /// by {@link System#nanoTime()}, and is not adjusted to compensate for JIT + /// and/or garbage collection overhead. For best accuracy, use a modern JVM + /// implementation that provides precise results from + /// {@link System#nanoTime()}, and perform profiling in a separate process + /// which is warmed up by parsing the input prior to profiling. If desired, + /// call {@link ATNSimulator#clearDFA} to reset the DFA cache to its initial + /// state before starting the profiling measurement pass.

+ int timeInPrediction; + + /// The sum of the lookahead required for SLL prediction for this decision. + /// Note that SLL prediction is used before LL prediction for performance + /// reasons even when {@link PredictionMode#LL} or + /// {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} is used. + int SLL_TotalLook; + + /// Gets the minimum lookahead required for any single SLL prediction to + /// complete for this decision, by reaching a unique prediction, reaching an + /// SLL conflict state, or encountering a syntax error. + int SLL_MinLook; + + /// Gets the maximum lookahead required for any single SLL prediction to + /// complete for this decision, by reaching a unique prediction, reaching an + /// SLL conflict state, or encountering a syntax error. + int SLL_MaxLook; + + /// Gets the [LookaheadEventInfo] associated with the event where the + /// {@link #SLL_MaxLook} value was set. + LookaheadEventInfo SLL_MaxLookEvent; + + /// The sum of the lookahead required for LL prediction for this decision. + /// Note that LL prediction is only used when SLL prediction reaches a + /// conflict state. + int LL_TotalLook; + + /// Gets the minimum lookahead required for any single LL prediction to + /// complete for this decision. An LL prediction completes when the algorithm + /// reaches a unique prediction, a conflict state (for + /// {@link PredictionMode#LL}, an ambiguity state (for + /// {@link PredictionMode#LL_EXACT_AMBIG_DETECTION}, or a syntax error. + int LL_MinLook; + + /// Gets the maximum lookahead required for any single LL prediction to + /// complete for this decision. An LL prediction completes when the algorithm + /// reaches a unique prediction, a conflict state (for + /// {@link PredictionMode#LL}, an ambiguity state (for + /// {@link PredictionMode#LL_EXACT_AMBIG_DETECTION}, or a syntax error. + int LL_MaxLook; + + /// Gets the [LookaheadEventInfo] associated with the event where the + /// {@link #LL_MaxLook} value was set. + LookaheadEventInfo LL_MaxLookEvent; + + /// A collection of [ContextSensitivityInfo] instances describing the + /// context sensitivities encountered during LL prediction for this decision. + /// + /// @see ContextSensitivityInfo + final List contextSensitivities = []; + + /// A collection of [ErrorInfo] instances describing the parse errors + /// identified during calls to {@link ParserATNSimulator#adaptivePredict} for + /// this decision. + /// + /// @see ErrorInfo + final List errors = []; + + /// A collection of [AmbiguityInfo] instances describing the + /// ambiguities encountered during LL prediction for this decision. + /// + /// @see AmbiguityInfo + final List ambiguities = []; + + /// A collection of [PredicateEvalInfo] instances describing the + /// results of evaluating individual predicates during prediction for this + /// decision. + /// + /// @see PredicateEvalInfo + final List predicateEvals = []; + + /// The total number of ATN transitions required during SLL prediction for + /// this decision. An ATN transition is determined by the number of times the + /// DFA does not contain an edge that is required for prediction, resulting + /// in on-the-fly computation of that edge. + /// + ///

+ /// If DFA caching of SLL transitions is employed by the implementation, ATN + /// computation may cache the computed edge for efficient lookup during + /// future parsing of this decision. Otherwise, the SLL parsing algorithm + /// will use ATN transitions exclusively.

+ /// + /// @see #SLL_ATNTransitions + /// @see ParserATNSimulator#computeTargetState + /// @see LexerATNSimulator#computeTargetState + int SLL_ATNTransitions; + + /// The total number of DFA transitions required during SLL prediction for + /// this decision. + /// + ///

If the ATN simulator implementation does not use DFA caching for SLL + /// transitions, this value will be 0.

+ /// + /// @see ParserATNSimulator#getExistingTargetState + /// @see LexerATNSimulator#getExistingTargetState + int SLL_DFATransitions; + + /// Gets the total number of times SLL prediction completed in a conflict + /// state, resulting in fallback to LL prediction. + /// + ///

Note that this value is not related to whether or not + /// {@link PredictionMode#SLL} may be used successfully with a particular + /// grammar. If the ambiguity resolution algorithm applied to the SLL + /// conflicts for this decision produce the same result as LL prediction for + /// this decision, {@link PredictionMode#SLL} would produce the same overall + /// parsing result as {@link PredictionMode#LL}.

+ int LL_Fallback; + + /// The total number of ATN transitions required during LL prediction for + /// this decision. An ATN transition is determined by the number of times the + /// DFA does not contain an edge that is required for prediction, resulting + /// in on-the-fly computation of that edge. + /// + ///

+ /// If DFA caching of LL transitions is employed by the implementation, ATN + /// computation may cache the computed edge for efficient lookup during + /// future parsing of this decision. Otherwise, the LL parsing algorithm will + /// use ATN transitions exclusively.

+ /// + /// @see #LL_DFATransitions + /// @see ParserATNSimulator#computeTargetState + /// @see LexerATNSimulator#computeTargetState + int LL_ATNTransitions; + + /// The total number of DFA transitions required during LL prediction for + /// this decision. + /// + ///

If the ATN simulator implementation does not use DFA caching for LL + /// transitions, this value will be 0.

+ /// + /// @see ParserATNSimulator#getExistingTargetState + /// @see LexerATNSimulator#getExistingTargetState + int LL_DFATransitions; + + /// Constructs a new instance of the [DecisionInfo] class to contain + /// statistics for a particular decision. + /// + /// @param decision The decision number + DecisionInfo(this.decision); + + @override + String toString() { + return '{' + 'decision=$decision' + ', contextSensitivities=${contextSensitivities.length}' + ', errors=${errors.length}' + ', ambiguities=${ambiguities.length}' + ', SLL_lookahead=$SLL_TotalLook' + ', SLL_ATNTransitions=$SLL_ATNTransitions, SLL_DFATransitions=$SLL_DFATransitions, LL_Fallback=$LL_Fallback, LL_lookahead=$LL_TotalLook, LL_ATNTransitions=$LL_ATNTransitions}'; + } +} + +/// This class represents profiling event information for an ambiguity. +/// Ambiguities are decisions where a particular input resulted in an SLL +/// conflict, followed by LL prediction also reaching a conflict state +/// (indicating a true ambiguity in the grammar). +/// +///

+/// This event may be reported during SLL prediction in cases where the +/// conflicting SLL configuration set provides sufficient information to +/// determine that the SLL conflict is truly an ambiguity. For example, if none +/// of the ATN configurations in the conflicting SLL configuration set have +/// traversed a global follow transition (i.e. +/// {@link ATNConfig#reachesIntoOuterContext} is 0 for all configurations), then +/// the result of SLL prediction for that input is known to be equivalent to the +/// result of LL prediction for that input.

+/// +///

+/// In some cases, the minimum represented alternative in the conflicting LL +/// configuration set is not equal to the minimum represented alternative in the +/// conflicting SLL configuration set. Grammars and inputs which result in this +/// scenario are unable to use {@link PredictionMode#SLL}, which in turn means +/// they cannot use the two-stage parsing strategy to improve parsing performance +/// for that input.

+/// +/// @see ParserATNSimulator#reportAmbiguity +/// @see ANTLRErrorListener#reportAmbiguity +/// +/// @since 4.3 +class AmbiguityInfo extends DecisionEventInfo { + /// The set of alternative numbers for this decision event that lead to a valid parse. */ + BitSet ambigAlts; + + /// Constructs a new instance of the [AmbiguityInfo] class with the + /// specified detailed ambiguity information. + /// + /// @param decision The decision number + /// @param configs The final configuration set identifying the ambiguous + /// alternatives for the current input + /// @param ambigAlts The set of alternatives in the decision that lead to a valid parse. + /// The predicted alt is the min(ambigAlts) + /// @param input The input token stream + /// @param startIndex The start index for the current prediction + /// @param stopIndex The index at which the ambiguity was identified during + /// prediction + /// @param fullCtx [true] if the ambiguity was identified during LL + /// prediction; otherwise, [false] if the ambiguity was identified + /// during SLL prediction + AmbiguityInfo(int decision, ATNConfigSet configs, this.ambigAlts, + TokenStream input, int startIndex, int stopIndex, bool fullCtx) + : super(decision, configs, input, startIndex, stopIndex, fullCtx); +} + +/// This class represents profiling event information for a syntax error +/// identified during prediction. Syntax errors occur when the prediction +/// algorithm is unable to identify an alternative which would lead to a +/// successful parse. +/// +/// @see Parser#notifyErrorListeners(Token, String, RecognitionException) +/// @see ANTLRErrorListener#syntaxError +/// +/// @since 4.3 +class ErrorInfo extends DecisionEventInfo { + /// Constructs a new instance of the [ErrorInfo] class with the + /// specified detailed syntax error information. + /// + /// @param decision The decision number + /// @param configs The final configuration set reached during prediction + /// prior to reaching the {@link ATNSimulator#ERROR} state + /// @param input The input token stream + /// @param startIndex The start index for the current prediction + /// @param stopIndex The index at which the syntax error was identified + /// @param fullCtx [true] if the syntax error was identified during LL + /// prediction; otherwise, [false] if the syntax error was identified + /// during SLL prediction + ErrorInfo(int decision, ATNConfigSet configs, TokenStream input, + int startIndex, int stopIndex, bool fullCtx) + : super(decision, configs, input, startIndex, stopIndex, fullCtx); +} + +/// This class represents profiling event information for tracking the lookahead +/// depth required in order to make a prediction. +/// +/// @since 4.3 +class LookaheadEventInfo extends DecisionEventInfo { + /// The alternative chosen by adaptivePredict(), not necessarily + /// the outermost alt shown for a rule; left-recursive rules have + /// user-level alts that differ from the rewritten rule with a (...) block + /// and a (..)* loop. + int predictedAlt; + + /// Constructs a new instance of the [LookaheadEventInfo] class with + /// the specified detailed lookahead information. + /// + /// @param decision The decision number + /// @param configs The final configuration set containing the necessary + /// information to determine the result of a prediction, or null if + /// the final configuration set is not available + /// @param input The input token stream + /// @param startIndex The start index for the current prediction + /// @param stopIndex The index at which the prediction was finally made + /// @param fullCtx [true] if the current lookahead is part of an LL + /// prediction; otherwise, [false] if the current lookahead is part of + /// an SLL prediction + LookaheadEventInfo(int decision, ATNConfigSet configs, this.predictedAlt, + TokenStream input, int startIndex, int stopIndex, bool fullCtx) + : super(decision, configs, input, startIndex, stopIndex, fullCtx); +} + +/// This class represents profiling event information for semantic predicate +/// evaluations which occur during prediction. +/// +/// @see ParserATNSimulator#evalSemanticContext +/// +/// @since 4.3 +class PredicateEvalInfo extends DecisionEventInfo { + /// The semantic context which was evaluated. + final SemanticContext semctx; + + /// The alternative number for the decision which is guarded by the semantic + /// context {@link #semctx}. Note that other ATN + /// configurations may predict the same alternative which are guarded by + /// other semantic contexts and/or {@link SemanticContext#NONE}. + final int predictedAlt; + + /// The result of evaluating the semantic context {@link #semctx}. + final bool evalResult; + + /// Constructs a new instance of the [PredicateEvalInfo] class with the + /// specified detailed predicate evaluation information. + /// + /// @param decision The decision number + /// @param input The input token stream + /// @param startIndex The start index for the current prediction + /// @param stopIndex The index at which the predicate evaluation was + /// triggered. Note that the input stream may be reset to other positions for + /// the actual evaluation of individual predicates. + /// @param semctx The semantic context which was evaluated + /// @param evalResult The results of evaluating the semantic context + /// @param predictedAlt The alternative number for the decision which is + /// guarded by the semantic context [semctx]. See {@link #predictedAlt} + /// for more information. + /// @param fullCtx [true] if the semantic context was + /// evaluated during LL prediction; otherwise, [false] if the semantic + /// context was evaluated during SLL prediction + /// + /// @see ParserATNSimulator#evalSemanticContext(SemanticContext, ParserRuleContext, int, boolean) + /// @see SemanticContext#eval(Recognizer, RuleContext) + PredicateEvalInfo( + int decision, + TokenStream input, + int startIndex, + int stopIndex, + this.semctx, + this.evalResult, + this.predictedAlt, + bool fullCtx) + : super(decision, ATNConfigSet(), input, startIndex, stopIndex, fullCtx); +} + +/// This class provides access to specific and aggregate statistics gathered +/// during profiling of a parser. +/// +/// @since 4.3 +class ParseInfo { + final ProfilingATNSimulator atnSimulator; + + ParseInfo(this.atnSimulator); + + /// Gets an array of [DecisionInfo] instances containing the profiling + /// information gathered for each decision in the ATN. + /// + /// @return An array of [DecisionInfo] instances, indexed by decision + /// number. + List get decisionInfo { + return atnSimulator.decisionInfo; + } + + /// Gets the decision numbers for decisions that required one or more + /// full-context predictions during parsing. These are decisions for which + /// {@link DecisionInfo#LL_Fallback} is non-zero. + /// + /// @return A list of decision numbers which required one or more + /// full-context predictions during parsing. + List get llDecisions { + final decisions = atnSimulator.decisionInfo; + final LL = []; + for (var i = 0; i < decisions.length; i++) { + final fallBack = decisions[i].LL_Fallback; + if (fallBack > 0) LL.add(i); + } + return LL; + } + + /// Gets the total time spent during prediction across all decisions made + /// during parsing. This value is the sum of + /// {@link DecisionInfo#timeInPrediction} for all decisions. + int get totalTimeInPrediction { + final decisions = atnSimulator.decisionInfo; + var t = 0; + for (var i = 0; i < decisions.length; i++) { + t += decisions[i].timeInPrediction; + } + return t; + } + + /// Gets the total number of SLL lookahead operations across all decisions + /// made during parsing. This value is the sum of + /// {@link DecisionInfo#SLL_TotalLook} for all decisions. + int get totalSLLLookaheadOps { + final decisions = atnSimulator.decisionInfo; + var k = 0; + for (var i = 0; i < decisions.length; i++) { + k += decisions[i].SLL_TotalLook; + } + return k; + } + + /// Gets the total number of LL lookahead operations across all decisions + /// made during parsing. This value is the sum of + /// {@link DecisionInfo#LL_TotalLook} for all decisions. + int get totalLLLookaheadOps { + final decisions = atnSimulator.decisionInfo; + var k = 0; + for (var i = 0; i < decisions.length; i++) { + k += decisions[i].LL_TotalLook; + } + return k; + } + + /// Gets the total number of ATN lookahead operations for SLL prediction + /// across all decisions made during parsing. + int get totalSLLATNLookaheadOps { + final decisions = atnSimulator.decisionInfo; + var k = 0; + for (var i = 0; i < decisions.length; i++) { + k += decisions[i].SLL_ATNTransitions; + } + return k; + } + + /// Gets the total number of ATN lookahead operations for LL prediction + /// across all decisions made during parsing. + int get totalLLATNLookaheadOps { + final decisions = atnSimulator.decisionInfo; + var k = 0; + for (var i = 0; i < decisions.length; i++) { + k += decisions[i].LL_ATNTransitions; + } + return k; + } + + /// Gets the total number of ATN lookahead operations for SLL and LL + /// prediction across all decisions made during parsing. + /// + ///

+ /// This value is the sum of {@link #getTotalSLLATNLookaheadOps} and + /// {@link #getTotalLLATNLookaheadOps}.

+ int get totalATNLookaheadOps { + final decisions = atnSimulator.decisionInfo; + var k = 0; + for (var i = 0; i < decisions.length; i++) { + k += decisions[i].SLL_ATNTransitions; + k += decisions[i].LL_ATNTransitions; + } + return k; + } + + /// Gets the total number of DFA states stored in the DFA cache for all + /// decisions in the ATN. + int get dfaSize { + var n = 0; + final decisionToDFA = atnSimulator.decisionToDFA; + for (var i = 0; i < decisionToDFA.length; i++) { + n += getDFASizeAt(i); + } + return n; + } + + /// Gets the total number of DFA states stored in the DFA cache for a + /// particular decision. + int getDFASizeAt(int decision) { + final decisionToDFA = atnSimulator.decisionToDFA[decision]; + return decisionToDFA.states.length; + } +} diff --git a/runtime/Dart/lib/src/atn/src/lexer_action.dart b/runtime/Dart/lib/src/atn/src/lexer_action.dart new file mode 100644 index 000000000..a8630671e --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/lexer_action.dart @@ -0,0 +1,601 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import '../../lexer.dart'; +import '../../util/murmur_hash.dart'; + +/// Represents the serialization type of a [LexerAction]. +/// +/// @since 4.2 +enum LexerActionType { + /// The type of a [LexerChannelAction] action. + CHANNEL, + /// The type of a [LexerCustomAction] action. + CUSTOM, + /// The type of a [LexerModeAction] action. + MODE, + /// The type of a [LexerMoreAction] action. + MORE, + /// The type of a [LexerPopModeAction] action. + POP_MODE, + /// The type of a [LexerPushModeAction] action. + PUSH_MODE, + /// The type of a [LexerSkipAction] action. + SKIP, + /// The type of a [LexerTypeAction] action. + TYPE, +} + +/// Represents a single action which can be executed following the successful +/// match of a lexer rule. Lexer actions are used for both embedded action syntax +/// and ANTLR 4's new lexer command syntax. +/// +/// @since 4.2 +abstract class LexerAction { + /// Gets the serialization type of the lexer action. + /// + /// @return The serialization type of the lexer action. + LexerActionType get actionType; + + /// Gets whether the lexer action is position-dependent. Position-dependent + /// actions may have different semantics depending on the [CharStream] + /// index at the time the action is executed. + /// + ///

Many lexer commands, including [type], [skip], and + /// [more], do not check the input index during their execution. + /// Actions like this are position-independent, and may be stored more + /// efficiently as part of the {@link LexerATNConfig#lexerActionExecutor}.

+ /// + /// @return [true] if the lexer action semantics can be affected by the + /// position of the input [CharStream] at the time it is executed; + /// otherwise, [false]. + bool get isPositionDependent; + + /// Execute the lexer action in the context of the specified [Lexer]. + /// + ///

For position-dependent actions, the input stream must already be + /// positioned correctly prior to calling this method.

+ /// + /// @param lexer The lexer instance. + void execute(Lexer lexer); +} + +/// Implements the [channel] lexer action by calling +/// {@link Lexer#setChannel} with the assigned channel. +/// +/// @since 4.2 +class LexerChannelAction implements LexerAction { + /// Gets the channel to use for the [Token] created by the lexer. + /// + /// @return The channel to use for the [Token] created by the lexer. + final int channel; + + /// Constructs a new [channel] action with the specified channel value. + /// @param channel The channel value to pass to {@link Lexer#setChannel}. + LexerChannelAction(this.channel); + + @override + LexerActionType get actionType => LexerActionType.CHANNEL; + + @override + bool get isPositionDependent => false; + + /// {@inheritDoc} + /// + ///

This action is implemented by calling {@link Lexer#setChannel} with the + /// value provided by {@link #getChannel}.

+ @override + void execute(Lexer lexer) { + lexer.channel = channel; + } + + @override + int get hashCode { + var hash = MurmurHash.initialize(); + hash = MurmurHash.update(hash, actionType.index); + hash = MurmurHash.update(hash, channel); + return MurmurHash.finish(hash, 2); + } + + @override + bool operator ==(Object obj) { + if (identical(obj, this)) { + return true; + } else if (obj is LexerChannelAction) { + return channel == obj.channel; + } + + return false; + } + + @override + String toString() { + return 'channel($channel)'; + } +} + +/// Executes a custom lexer action by calling {@link Recognizer#action} with the +/// rule and action indexes assigned to the custom action. The implementation of +/// a custom action is added to the generated code for the lexer in an override +/// of {@link Recognizer#action} when the grammar is compiled. +/// +///

This class may represent embedded actions created with the {...} +/// syntax in ANTLR 4, as well as actions created for lexer commands where the +/// command argument could not be evaluated when the grammar was compiled.

+/// +/// @since 4.2 +class LexerCustomAction implements LexerAction { + /// Gets the rule index to use for calls to {@link Recognizer#action}. + /// + /// @return The rule index for the custom action. + final int ruleIndex; + + /// Gets the action index to use for calls to {@link Recognizer#action}. + /// + /// @return The action index for the custom action. + final int actionIndex; + + /// Constructs a custom lexer action with the specified rule and action + /// indexes. + /// + /// @param ruleIndex The rule index to use for calls to + /// {@link Recognizer#action}. + /// @param actionIndex The action index to use for calls to + /// {@link Recognizer#action}. + LexerCustomAction(this.ruleIndex, this.actionIndex); + + /// {@inheritDoc} + /// + /// @return This method returns {@link LexerActionType#CUSTOM}. + + @override + LexerActionType get actionType => LexerActionType.CUSTOM; + + /// Gets whether the lexer action is position-dependent. Position-dependent + /// actions may have different semantics depending on the [CharStream] + /// index at the time the action is executed. + /// + ///

Custom actions are position-dependent since they may represent a + /// user-defined embedded action which makes calls to methods like + /// {@link Lexer#getText}.

+ /// + /// @return This method returns [true]. + + @override + bool get isPositionDependent => true; + + /// {@inheritDoc} + /// + ///

Custom actions are implemented by calling {@link Lexer#action} with the + /// appropriate rule and action indexes.

+ + @override + void execute(Lexer lexer) { + lexer.action(null, ruleIndex, actionIndex); + } + + @override + int get hashCode { + var hash = MurmurHash.initialize(); + hash = MurmurHash.update(hash, actionType.index); + hash = MurmurHash.update(hash, ruleIndex); + hash = MurmurHash.update(hash, actionIndex); + return MurmurHash.finish(hash, 3); + } + + @override + bool operator ==(Object obj) { + if (identical(obj, this)) { + return true; + } else if (obj is LexerCustomAction) { + return ruleIndex == obj.ruleIndex && actionIndex == obj.actionIndex; + } + return false; + } +} + +/// Implements the [mode] lexer action by calling {@link Lexer#mode} with +/// the assigned mode. +/// +/// @since 4.2 +class LexerModeAction implements LexerAction { + /// Get the lexer mode this action should transition the lexer to. + /// + /// @return The lexer mode for this [mode] command. + final int mode; + + /// Constructs a new [mode] action with the specified mode value. + /// @param mode The mode value to pass to {@link Lexer#mode}. + LexerModeAction(this.mode); + + /// {@inheritDoc} + /// @return This method returns {@link LexerActionType#MODE}. + + @override + LexerActionType get actionType => LexerActionType.MODE; + + /// {@inheritDoc} + /// @return This method returns [false]. + + @override + bool get isPositionDependent => false; + + /// {@inheritDoc} + /// + ///

This action is implemented by calling {@link Lexer#mode} with the + /// value provided by {@link #getMode}.

+ + @override + void execute(Lexer lexer) { + lexer.mode(mode); + } + + @override + int get hashCode { + var hash = MurmurHash.initialize(); + hash = MurmurHash.update(hash, actionType.index); + hash = MurmurHash.update(hash, mode); + return MurmurHash.finish(hash, 2); + } + + @override + bool operator ==(Object obj) { + if (identical(obj, this)) { + return true; + } else if (obj is LexerModeAction) { + return mode == obj.mode; + } + return false; + } + + @override + String toString() { + return 'mode($mode)'; + } +} + +/// Implements the [more] lexer action by calling {@link Lexer#more}. +/// +///

The [more] command does not have any parameters, so this action is +/// implemented as a singleton instance exposed by {@link #INSTANCE}.

+/// +/// @since 4.2 +class LexerMoreAction implements LexerAction { + /// Provides a singleton instance of this parameterless lexer action. + static final LexerMoreAction INSTANCE = LexerMoreAction(); + + /// {@inheritDoc} + /// @return This method returns {@link LexerActionType#MORE}. + @override + LexerActionType get actionType => LexerActionType.MORE; + + /// {@inheritDoc} + /// @return This method returns [false]. + + @override + bool get isPositionDependent => false; + + /// {@inheritDoc} + /// + ///

This action is implemented by calling {@link Lexer#more}.

+ + @override + void execute(Lexer lexer) { + lexer.more(); + } + + @override + int get hashCode { + var hash = MurmurHash.initialize(); + hash = MurmurHash.update(hash, actionType.index); + return MurmurHash.finish(hash, 1); + } + + @override + bool operator ==(Object obj) { + return identical(obj, this); + } + + @override + String toString() { + return 'more'; + } +} + +/// Implements the [popMode] lexer action by calling {@link Lexer#popMode}. +/// +///

The [popMode] command does not have any parameters, so this action is +/// implemented as a singleton instance exposed by {@link #INSTANCE}.

+/// +/// @since 4.2 +class LexerPopModeAction implements LexerAction { + /// Provides a singleton instance of this parameterless lexer action. + static final LexerPopModeAction INSTANCE = LexerPopModeAction(); + + /// {@inheritDoc} + /// @return This method returns {@link LexerActionType#POP_MODE}. + + @override + LexerActionType get actionType => LexerActionType.POP_MODE; + + /// {@inheritDoc} + /// @return This method returns [false]. + + @override + bool get isPositionDependent => false; + + /// {@inheritDoc} + /// + ///

This action is implemented by calling {@link Lexer#popMode}.

+ + @override + void execute(Lexer lexer) { + lexer.popMode(); + } + + @override + int get hashCode { + var hash = MurmurHash.initialize(); + hash = MurmurHash.update(hash, actionType.index); + return MurmurHash.finish(hash, 1); + } + + @override + bool operator ==(Object obj) { + return identical(obj, this); + } + + @override + String toString() { + return 'popMode'; + } +} + +/// Implements the [pushMode] lexer action by calling +/// {@link Lexer#pushMode} with the assigned mode. +/// +/// @since 4.2 +class LexerPushModeAction implements LexerAction { + /// Get the lexer mode this action should transition the lexer to. + /// + /// @return The lexer mode for this [pushMode] command. + final int mode; + + /// Constructs a new [pushMode] action with the specified mode value. + /// @param mode The mode value to pass to {@link Lexer#pushMode}. + LexerPushModeAction(this.mode); + + /// {@inheritDoc} + /// @return This method returns {@link LexerActionType#PUSH_MODE}. + + @override + LexerActionType get actionType => LexerActionType.PUSH_MODE; + + /// {@inheritDoc} + /// @return This method returns [false]. + + @override + bool get isPositionDependent => false; + + /// {@inheritDoc} + /// + ///

This action is implemented by calling {@link Lexer#pushMode} with the + /// value provided by {@link #getMode}.

+ + @override + void execute(Lexer lexer) { + lexer.pushMode(mode); + } + + @override + int get hashCode { + var hash = MurmurHash.initialize(); + hash = MurmurHash.update(hash, actionType.index); + hash = MurmurHash.update(hash, mode); + return MurmurHash.finish(hash, 2); + } + + @override + bool operator ==(Object obj) { + if (identical(obj, this)) { + return true; + } else if (obj is LexerPushModeAction) { + return mode == obj.mode; + } + return false; + } + + @override + String toString() { + return 'pushMode($mode)'; + } +} + +/// Implements the [skip] lexer action by calling {@link Lexer#skip}. +/// +///

The [skip] command does not have any parameters, so this action is +/// implemented as a singleton instance exposed by {@link #INSTANCE}.

+/// +/// @since 4.2 +class LexerSkipAction implements LexerAction { + /// Provides a singleton instance of this parameterless lexer action. + static final LexerSkipAction INSTANCE = LexerSkipAction(); + + /// {@inheritDoc} + /// @return This method returns {@link LexerActionType#SKIP}. + + @override + LexerActionType get actionType => LexerActionType.SKIP; + + /// {@inheritDoc} + /// @return This method returns [false]. + + @override + bool get isPositionDependent => false; + + /// {@inheritDoc} + /// + ///

This action is implemented by calling {@link Lexer#skip}.

+ @override + void execute(Lexer lexer) { + lexer.skip(); + } + + @override + int get hashCode { + var hash = MurmurHash.initialize(); + hash = MurmurHash.update(hash, actionType.index); + return MurmurHash.finish(hash, 1); + } + + @override + bool operator ==(Object obj) { + return identical(obj, this); + } + + @override + String toString() { + return 'skip'; + } +} + +/// Implements the [type] lexer action by calling {@link Lexer#setType} +/// with the assigned type. +/// +/// @since 4.2 +class LexerTypeAction implements LexerAction { + /// Gets the type to assign to a token created by the lexer. + /// @return The type to assign to a token created by the lexer. + final int type; + + /// Constructs a new [type] action with the specified token type value. + /// @param type The type to assign to the token using {@link Lexer#setType}. + LexerTypeAction(this.type); + + /// {@inheritDoc} + /// @return This method returns {@link LexerActionType#TYPE}. + @override + LexerActionType get actionType => LexerActionType.TYPE; + + /// {@inheritDoc} + /// @return This method returns [false]. + + @override + bool get isPositionDependent => false; + + /// {@inheritDoc} + /// + ///

This action is implemented by calling {@link Lexer#setType} with the + /// value provided by {@link #getType}.

+ + @override + void execute(Lexer lexer) { + lexer.type = type; + } + + @override + int get hashCode { + var hash = MurmurHash.initialize(); + hash = MurmurHash.update(hash, actionType.index); + hash = MurmurHash.update(hash, type); + return MurmurHash.finish(hash, 2); + } + + @override + bool operator ==(Object obj) { + if (identical(obj, this)) { + return true; + } else if (obj is LexerTypeAction) { + return type == obj.type; + } + return false; + } + + @override + String toString() { + return 'type($type)'; + } +} + +/// This implementation of [LexerAction] is used for tracking input offsets +/// for position-dependent actions within a [LexerActionExecutor]. +/// +///

This action is not serialized as part of the ATN, and is only required for +/// position-dependent lexer actions which appear at a location other than the +/// end of a rule. For more information about DFA optimizations employed for +/// lexer actions, see {@link LexerActionExecutor#append} and +/// {@link LexerActionExecutor#fixOffsetBeforeMatch}.

+/// +/// @since 4.2 +class LexerIndexedCustomAction implements LexerAction { + /// Gets the location in the input [CharStream] at which the lexer + /// action should be executed. The value is interpreted as an offset relative + /// to the token start index. + /// + /// @return The location in the input [CharStream] at which the lexer + /// action should be executed. + final int offset; + + /// Gets the lexer action to execute. + /// + /// @return A [LexerAction] object which executes the lexer action. + final LexerAction action; + + /// Constructs a new indexed custom action by associating a character offset + /// with a [LexerAction]. + /// + ///

Note: This class is only required for lexer actions for which + /// {@link LexerAction#isPositionDependent} returns [true].

+ /// + /// @param offset The offset into the input [CharStream], relative to + /// the token start index, at which the specified lexer action should be + /// executed. + /// @param action The lexer action to execute at a particular offset in the + /// input [CharStream]. + LexerIndexedCustomAction(this.offset, this.action); + + /// {@inheritDoc} + /// + /// @return This method returns the result of calling {@link #getActionType} + /// on the [LexerAction] returned by {@link #getAction}. + @override + LexerActionType get actionType => action.actionType; + + /// {@inheritDoc} + /// @return This method returns [true]. + + @override + bool get isPositionDependent => true; + + /// {@inheritDoc} + /// + ///

This method calls {@link #execute} on the result of {@link #getAction} + /// using the provided [lexer].

+ + @override + void execute(Lexer lexer) { +// assume the input stream position was properly set by the calling code + action.execute(lexer); + } + + @override + int get hashCode { + var hash = MurmurHash.initialize(); + hash = MurmurHash.update(hash, offset); + hash = MurmurHash.update(hash, action); + return MurmurHash.finish(hash, 2); + } + + @override + bool operator ==(Object obj) { + if (obj == this) { + return true; + } else if (obj is LexerIndexedCustomAction) { + return offset == obj.offset && action == obj.action; + } + return false; + } +} diff --git a/runtime/Dart/lib/src/atn/src/lexer_action_executor.dart b/runtime/Dart/lib/src/atn/src/lexer_action_executor.dart new file mode 100644 index 000000000..44957122f --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/lexer_action_executor.dart @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'package:collection/collection.dart'; + +import '../../input_stream.dart'; +import '../../lexer.dart'; +import '../../util/murmur_hash.dart'; +import 'lexer_action.dart'; + +/// Represents an executor for a sequence of lexer actions which traversed during +/// the matching operation of a lexer rule (token). +/// +///

The executor tracks position information for position-dependent lexer actions +/// efficiently, ensuring that actions appearing only at the end of the rule do +/// not cause bloating of the [DFA] created for the lexer.

+/// +/// @since 4.2 +class LexerActionExecutor { + /// Gets the lexer actions to be executed by this executor. + /// @return The lexer actions to be executed by this executor. + final List lexerActions; + + /// Caches the result of {@link #hashCode} since the hash code is an element + /// of the performance-critical {@link LexerATNConfig#hashCode} operation. + @override + int get hashCode { + var hash = MurmurHash.initialize(); + for (var lexerAction in lexerActions) { + hash = MurmurHash.update(hash, lexerAction); + } + + return MurmurHash.finish(hash, lexerActions.length); + } + + /// Constructs an executor for a sequence of [LexerAction] actions. + /// @param lexerActions The lexer actions to execute. + LexerActionExecutor(this.lexerActions); + + /// Creates a [LexerActionExecutor] which executes the actions for + /// the input [lexerActionExecutor] followed by a specified + /// [lexerAction]. + /// + /// @param lexerActionExecutor The executor for actions already traversed by + /// the lexer while matching a token within a particular + /// [LexerATNConfig]. If this is null, the method behaves as + /// though it were an empty executor. + /// @param lexerAction The lexer action to execute after the actions + /// specified in [lexerActionExecutor]. + /// + /// @return A [LexerActionExecutor] for executing the combine actions + /// of [lexerActionExecutor] and [lexerAction]. + static LexerActionExecutor append( + LexerActionExecutor lexerActionExecutor, LexerAction lexerAction) { + if (lexerActionExecutor == null) { + return LexerActionExecutor([lexerAction]); + } + + final lexerActions = + List.from(lexerActionExecutor.lexerActions); + lexerActions.add(lexerAction); + return LexerActionExecutor(lexerActions); + } + + /// Creates a [LexerActionExecutor] which encodes the current offset + /// for position-dependent lexer actions. + /// + ///

Normally, when the executor encounters lexer actions where + /// {@link LexerAction#isPositionDependent} returns [true], it calls + /// {@link IntStream#seek} on the input [CharStream] to set the input + /// position to the end of the current token. This behavior provides + /// for efficient DFA representation of lexer actions which appear at the end + /// of a lexer rule, even when the lexer rule matches a variable number of + /// characters.

+ /// + ///

Prior to traversing a match transition in the ATN, the current offset + /// from the token start index is assigned to all position-dependent lexer + /// actions which have not already been assigned a fixed offset. By storing + /// the offsets relative to the token start index, the DFA representation of + /// lexer actions which appear in the middle of tokens remains efficient due + /// to sharing among tokens of the same length, regardless of their absolute + /// position in the input stream.

+ /// + ///

If the current executor already has offsets assigned to all + /// position-dependent lexer actions, the method returns [this].

+ /// + /// @param offset The current offset to assign to all position-dependent + /// lexer actions which do not already have offsets assigned. + /// + /// @return A [LexerActionExecutor] which stores input stream offsets + /// for all position-dependent lexer actions. + LexerActionExecutor fixOffsetBeforeMatch(int offset) { + List updatedLexerActions; + for (var i = 0; i < lexerActions.length; i++) { + if (lexerActions[i].isPositionDependent && + !(lexerActions[i] is LexerIndexedCustomAction)) { + updatedLexerActions ??= List.from(lexerActions); + + updatedLexerActions[i] = + LexerIndexedCustomAction(offset, lexerActions[i]); + } + } + + if (updatedLexerActions == null) { + return this; + } + + return LexerActionExecutor(updatedLexerActions); + } + + /// Execute the actions encapsulated by this executor within the context of a + /// particular [Lexer]. + /// + ///

This method calls {@link IntStream#seek} to set the position of the + /// [input] [CharStream] prior to calling + /// {@link LexerAction#execute} on a position-dependent action. Before the + /// method returns, the input position will be restored to the same position + /// it was in when the method was invoked.

+ /// + /// @param lexer The lexer instance. + /// @param input The input stream which is the source for the current token. + /// When this method is called, the current {@link IntStream#index} for + /// [input] should be the start of the following token, i.e. 1 + /// character past the end of the current token. + /// @param startIndex The token start index. This value may be passed to + /// {@link IntStream#seek} to set the [input] position to the beginning + /// of the token. + void execute(Lexer lexer, CharStream input, int startIndex) { + var requiresSeek = false; + final stopIndex = input.index; + try { + for (var lexerAction in lexerActions) { + if (lexerAction is LexerIndexedCustomAction) { + final offset = (lexerAction as LexerIndexedCustomAction).offset; + input.seek(startIndex + offset); + lexerAction = (lexerAction as LexerIndexedCustomAction).action; + requiresSeek = (startIndex + offset) != stopIndex; + } else if (lexerAction.isPositionDependent) { + input.seek(stopIndex); + requiresSeek = false; + } + + lexerAction.execute(lexer); + } + } finally { + if (requiresSeek) { + input.seek(stopIndex); + } + } + } + + @override + bool operator ==(Object obj) { + if (identical(obj, this)) { + return true; + } else if (!(obj is LexerActionExecutor)) { + return false; + } + + LexerActionExecutor other = obj; + return hashCode == other.hashCode && + ListEquality().equals(lexerActions, other.lexerActions); + } +} diff --git a/runtime/Dart/lib/src/atn/src/lexer_atn_simulator.dart b/runtime/Dart/lib/src/atn/src/lexer_atn_simulator.dart new file mode 100644 index 000000000..5762d84c8 --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/lexer_atn_simulator.dart @@ -0,0 +1,731 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:developer'; + +import 'package:logging/logging.dart'; + +import '../../dfa/dfa.dart'; +import '../../error/error.dart'; +import '../../input_stream.dart'; +import '../../interval_set.dart'; +import '../../lexer.dart'; +import '../../prediction_context.dart'; +import '../../token.dart'; +import 'atn.dart'; +import 'atn_config.dart'; +import 'atn_config_set.dart'; +import 'atn_simulator.dart'; +import 'atn_state.dart'; +import 'lexer_action_executor.dart'; +import 'transition.dart'; + +/// When we hit an accept state in either the DFA or the ATN, we +/// have to notify the character stream to start buffering characters +/// via {@link IntStream#mark} and record the current state. The current sim state +/// includes the current index into the input, the current line, +/// and current character position in that line. Note that the Lexer is +/// tracking the starting line and characterization of the token. These +/// variables track the "state" of the simulator when it hits an accept state. +/// +///

We track these variables separately for the DFA and ATN simulation +/// because the DFA simulation often has to fail over to the ATN +/// simulation. If the ATN simulation fails, we need the DFA to fall +/// back to its previously accepted state, if any. If the ATN succeeds, +/// then the ATN does the accept and the DFA simulator that invoked it +/// can simply return the predicted token type.

+class SimState { + int index = -1; + int line = 0; + int charPos = -1; + + DFAState dfaState; + + void reset() { + index = -1; + line = 0; + charPos = -1; + dfaState = null; + } +} + +/// "dup" of ParserInterpreter */ +class LexerATNSimulator extends ATNSimulator { + static final bool debug = true; + static final bool dfa_debug = true; + + static final int MIN_DFA_EDGE = 0; + static final int MAX_DFA_EDGE = 127; // forces unicode to stay in ATN + + final Lexer recog; + + /// The current token's starting index into the character stream. + /// Shared across DFA to ATN simulation in case the ATN fails and the + /// DFA did not have a previous accept state. In this case, we use the + /// ATN-generated exception object. + int startIndex = -1; + + /// line number 1..n within the input */ + int line = 1; + + /// The index of the character relative to the beginning of the line 0..n-1 */ + int charPositionInLine = 0; + + List decisionToDFA; + int mode = Lexer.DEFAULT_MODE; + + /// Used during DFA/ATN exec to record the most recent accept configuration info */ + + final SimState prevAccept = SimState(); + + LexerATNSimulator(ATN atn, this.decisionToDFA, + PredictionContextCache sharedContextCache, + {this.recog}) + : super(atn, sharedContextCache); + + void copyState(LexerATNSimulator simulator) { + charPositionInLine = simulator.charPositionInLine; + line = simulator.line; + mode = simulator.mode; + startIndex = simulator.startIndex; + } + + int match(CharStream input, int mode) { + this.mode = mode; + final mark = input.mark(); + try { + startIndex = input.index; + prevAccept.reset(); + final dfa = decisionToDFA[mode]; + if (dfa.s0 == null) { + return matchATN(input); + } else { + return execATN(input, dfa.s0); + } + } finally { + input.release(mark); + } + } + + @override + void reset() { + prevAccept.reset(); + startIndex = -1; + line = 1; + charPositionInLine = 0; + mode = Lexer.DEFAULT_MODE; + } + + @override + void clearDFA() { + for (var d = 0; d < decisionToDFA.length; d++) { + decisionToDFA[d] = DFA(atn.getDecisionState(d), d); + } + } + + int matchATN(CharStream input) { + ATNState startState = atn.modeToStartState[mode]; + + if (debug) { + log('matchATN mode $mode start: $startState\n', level: Level.FINE.value); + } + + final old_mode = mode; + + final s0_closure = computeStartState(input, startState); + final suppressEdge = s0_closure.hasSemanticContext; + s0_closure.hasSemanticContext = false; + + final next = addDFAState(s0_closure); + if (!suppressEdge) { + decisionToDFA[mode].s0 = next; + } + + final predict = execATN(input, next); + + if (debug) { + log('DFA after matchATN: ${decisionToDFA[old_mode].toLexerString()}\n', + level: Level.FINE.value); + } + + return predict; + } + + int execATN(CharStream input, DFAState ds0) { + //log("enter exec index "+input.index()+" from "+ds0.configs, level: Level.FINE.value); + if (debug) { + log('start state closure=${ds0.configs}\n', level: Level.FINE.value); + } + + if (ds0.isAcceptState) { + // allow zero-length tokens + captureSimState(prevAccept, input, ds0); + } + + var t = input.LA(1); + + var s = ds0; // s is current/from DFA state + + while (true) { + // while more work + if (debug) { + log('execATN loop starting closure: ${s.configs}\n', + level: Level.FINE.value); + } + + // As we move src->trg, src->trg, we keep track of the previous trg to + // avoid looking up the DFA state again, which is expensive. + // If the previous target was already part of the DFA, we might + // be able to avoid doing a reach operation upon t. If s!=null, + // it means that semantic predicates didn't prevent us from + // creating a DFA state. Once we know s!=null, we check to see if + // the DFA state has an edge already for t. If so, we can just reuse + // it's configuration set; there's no point in re-computing it. + // This is kind of like doing DFA simulation within the ATN + // simulation because DFA simulation is really just a way to avoid + // computing reach/closure sets. Technically, once we know that + // we have a previously added DFA state, we could jump over to + // the DFA simulator. But, that would mean popping back and forth + // a lot and making things more complicated algorithmically. + // This optimization makes a lot of sense for loops within DFA. + // A character will take us back to an existing DFA state + // that already has lots of edges out of it. e.g., .* in comments. + var target = getExistingTargetState(s, t); + target ??= computeTargetState(input, s, t); + + if (target == ATNSimulator.ERROR) { + break; + } + + // If this is a consumable input element, make sure to consume before + // capturing the accept state so the input index, line, and char + // position accurately reflect the state of the interpreter at the + // end of the token. + if (t != IntStream.EOF) { + consume(input); + } + + if (target.isAcceptState) { + captureSimState(prevAccept, input, target); + if (t == IntStream.EOF) { + break; + } + } + + t = input.LA(1); + s = target; // flip; current DFA target becomes new src/from state + } + + return failOrAccept(prevAccept, input, s.configs, t); + } + + /// Get an existing target state for an edge in the DFA. If the target state + /// for the edge has not yet been computed or is otherwise not available, + /// this method returns null. + /// + /// @param s The current DFA state + /// @param t The next input symbol + /// @return The existing target DFA state for the given input symbol + /// [t], or null if the target state for this edge is not + /// already cached + + DFAState getExistingTargetState(DFAState s, int t) { + if (s.edges == null || t < MIN_DFA_EDGE || t > MAX_DFA_EDGE) { + return null; + } + + final target = s.edges[t - MIN_DFA_EDGE]; + if (debug && target != null) { + log('reuse state ${s.stateNumber} edge to ${target.stateNumber}', + level: Level.FINE.value); + } + + return target; + } + + /// Compute a target state for an edge in the DFA, and attempt to add the + /// computed state and corresponding edge to the DFA. + /// + /// @param input The input stream + /// @param s The current DFA state + /// @param t The next input symbol + /// + /// @return The computed target DFA state for the given input symbol + /// [t]. If [t] does not lead to a valid DFA state, this method + /// returns {@link #ERROR}. + + DFAState computeTargetState(CharStream input, DFAState s, int t) { + ATNConfigSet reach = OrderedATNConfigSet(); + + // if we don't find an existing DFA state + // Fill reach starting from closure, following t transitions + getReachableConfigSet(input, s.configs, reach, t); + + if (reach.isEmpty) { + // we got nowhere on t from s + if (!reach.hasSemanticContext) { + // we got nowhere on t, don't throw out this knowledge; it'd + // cause a failover from DFA later. + addDFAEdge(s, t, ATNSimulator.ERROR); + } + + // stop when we can't match any more char + return ATNSimulator.ERROR; + } + + // Add an edge from s to target DFA found/created for reach + return addDFAEdgeByConfig(s, t, reach); + } + + int failOrAccept( + SimState prevAccept, CharStream input, ATNConfigSet reach, int t) { + if (prevAccept.dfaState != null) { + final lexerActionExecutor = + prevAccept.dfaState.lexerActionExecutor; + accept(input, lexerActionExecutor, startIndex, prevAccept.index, + prevAccept.line, prevAccept.charPos); + return prevAccept.dfaState.prediction; + } else { + // if no accept and EOF is first char, return EOF + if (t == IntStream.EOF && input.index == startIndex) { + return Token.EOF; + } + + throw LexerNoViableAltException(recog, input, startIndex, reach); + } + } + + /// Given a starting configuration set, figure out all ATN configurations + /// we can reach upon input [t]. Parameter [reach] is a return + /// parameter. + void getReachableConfigSet( + CharStream input, ATNConfigSet configs, ATNConfigSet reach, int t) { + // this is used to skip processing for configs which have a lower priority + // than a config that already reached an accept state for the same rule + var skipAlt = ATN.INVALID_ALT_NUMBER; + for (var c in configs) { + final currentAltReachedAcceptState = c.alt == skipAlt; + if (currentAltReachedAcceptState && + (c as LexerATNConfig).hasPassedThroughNonGreedyDecision()) { + continue; + } + + if (debug) { + log('testing ${getTokenName(t)} at ${c.toString(recog, true)}\n', + level: Level.FINE.value); + } + + final n = c.state.numberOfTransitions; + for (var ti = 0; ti < n; ti++) { + // for each transition + final trans = c.state.transition(ti); + final target = getReachableTarget(trans, t); + if (target != null) { + var lexerActionExecutor = + (c as LexerATNConfig).lexerActionExecutor; + if (lexerActionExecutor != null) { + lexerActionExecutor = lexerActionExecutor + .fixOffsetBeforeMatch(input.index - startIndex); + } + + final treatEofAsEpsilon = t == IntStream.EOF; + if (closure( + input, + LexerATNConfig.dup(c, target, + lexerActionExecutor: lexerActionExecutor), + reach, + currentAltReachedAcceptState, + true, + treatEofAsEpsilon)) { + // any remaining configs for this alt have a lower priority than + // the one that just reached an accept state. + skipAlt = c.alt; + break; + } + } + } + } + } + + void accept(CharStream input, LexerActionExecutor lexerActionExecutor, + int startIndex, int index, int line, int charPos) { + if (debug) { + log('ACTION $lexerActionExecutor\n', level: Level.FINE.value); + } + + // seek to after last char in token + input.seek(index); + this.line = line; + charPositionInLine = charPos; + + if (lexerActionExecutor != null && recog != null) { + lexerActionExecutor.execute(recog, input, startIndex); + } + } + + ATNState getReachableTarget(Transition trans, int t) { + if (trans.matches(t, Lexer.MIN_CHAR_VALUE, Lexer.MAX_CHAR_VALUE)) { + return trans.target; + } + + return null; + } + + ATNConfigSet computeStartState(CharStream input, ATNState p) { + PredictionContext initialContext = PredictionContext.EMPTY; + ATNConfigSet configs = OrderedATNConfigSet(); + for (var i = 0; i < p.numberOfTransitions; i++) { + final target = p.transition(i).target; + final c = LexerATNConfig(target, i + 1, initialContext); + closure(input, c, configs, false, false, false); + } + return configs; + } + + /// Since the alternatives within any lexer decision are ordered by + /// preference, this method stops pursuing the closure as soon as an accept + /// state is reached. After the first accept state is reached by depth-first + /// search from [config], all other (potentially reachable) states for + /// this rule would have a lower priority. + /// + /// @return [true] if an accept state is reached, otherwise + /// [false]. + bool closure( + CharStream input, + LexerATNConfig config, + ATNConfigSet configs, + bool currentAltReachedAcceptState, + bool speculative, + bool treatEofAsEpsilon) { + if (debug) { + log('closure(' + config.toString(recog, true) + ')', + level: Level.FINE.value); + } + + if (config.state is RuleStopState) { + if (debug) { + if (recog != null) { + log('closure at ${recog.ruleNames[config.state.ruleIndex]} rule stop $config\n', + level: Level.FINE.value); + } else { + log('closure at rule stop $config\n', level: Level.FINE.value); + } + } + + if (config.context == null || config.context.hasEmptyPath()) { + if (config.context == null || config.context.isEmpty) { + configs.add(config); + return true; + } else { + configs.add(LexerATNConfig.dup(config, config.state, + context: PredictionContext.EMPTY)); + currentAltReachedAcceptState = true; + } + } + + if (config.context != null && !config.context.isEmpty) { + for (var i = 0; i < config.context.length; i++) { + if (config.context.getReturnState(i) != + PredictionContext.EMPTY_RETURN_STATE) { + final newContext = + config.context.getParent(i); // "pop" return state + final returnState = atn.states[config.context.getReturnState(i)]; + final c = LexerATNConfig.dup(config, returnState, + context: newContext); + currentAltReachedAcceptState = closure(input, c, configs, + currentAltReachedAcceptState, speculative, treatEofAsEpsilon); + } + } + } + + return currentAltReachedAcceptState; + } + + // optimization + if (!config.state.onlyHasEpsilonTransitions()) { + if (!currentAltReachedAcceptState || + !config.hasPassedThroughNonGreedyDecision()) { + configs.add(config); + } + } + + final p = config.state; + for (var i = 0; i < p.numberOfTransitions; i++) { + final t = p.transition(i); + final c = getEpsilonTarget( + input, config, t, configs, speculative, treatEofAsEpsilon); + if (c != null) { + currentAltReachedAcceptState = closure(input, c, configs, + currentAltReachedAcceptState, speculative, treatEofAsEpsilon); + } + } + + return currentAltReachedAcceptState; + } + + // side-effect: can alter configs.hasSemanticContext + + LexerATNConfig getEpsilonTarget( + CharStream input, + LexerATNConfig config, + Transition t, + ATNConfigSet configs, + bool speculative, + bool treatEofAsEpsilon) { + LexerATNConfig c; + switch (t.type) { + case TransitionType.RULE: + RuleTransition ruleTransition = t; + PredictionContext newContext = SingletonPredictionContext.create( + config.context, ruleTransition.followState.stateNumber); + c = LexerATNConfig.dup(config, t.target, context: newContext); + break; + + case TransitionType.PRECEDENCE: + throw UnsupportedError( + 'Precedence predicates are not supported in lexers.'); + case TransitionType.PREDICATE: + /* Track traversing semantic predicates. If we traverse, + we cannot add a DFA state for this "reach" computation + because the DFA would not test the predicate again in the + future. Rather than creating collections of semantic predicates + like v3 and testing them on prediction, v4 will test them on the + fly all the time using the ATN not the DFA. This is slower but + semantically it's not used that often. One of the key elements to + this predicate mechanism is not adding DFA states that see + predicates immediately afterwards in the ATN. For example, + + a : ID {p1}? | ID {p2}? ; + + should create the start state for rule 'a' (to save start state + competition), but should not create target of ID state. The + collection of ATN states the following ID references includes + states reached by traversing predicates. Since this is when we + test them, we cannot cash the DFA state target of ID. + */ + PredicateTransition pt = t; + if (debug) { + log('EVAL rule ${pt.ruleIndex}:${pt.predIndex}', + level: Level.FINE.value); + } + configs.hasSemanticContext = true; + if (evaluatePredicate(input, pt.ruleIndex, pt.predIndex, speculative)) { + c = LexerATNConfig.dup(config, t.target); + } + break; + case TransitionType.ACTION: + if (config.context == null || config.context.hasEmptyPath()) { + // execute actions anywhere in the start rule for a token. + // + // TODO: if the entry rule is invoked recursively, some + // actions may be executed during the recursive call. The + // problem can appear when hasEmptyPath() is true but + // isEmpty is false. In this case, the config needs to be + // split into two contexts - one with just the empty path + // and another with everything but the empty path. + // Unfortunately, the current algorithm does not allow + // getEpsilonTarget to return two configurations, so + // additional modifications are needed before we can support + // the split operation. + final lexerActionExecutor = LexerActionExecutor.append( + config.lexerActionExecutor, + atn.lexerActions[(t as ActionTransition).actionIndex]); + c = LexerATNConfig.dup(config, t.target, + lexerActionExecutor: lexerActionExecutor); + } else { + // ignore actions in referenced rules + c = LexerATNConfig.dup(config, t.target); + } + break; + + case TransitionType.EPSILON: + c = LexerATNConfig.dup(config, t.target); + break; + + case TransitionType.ATOM: + case TransitionType.RANGE: + case TransitionType.SET: + if (treatEofAsEpsilon) { + if (t.matches( + IntStream.EOF, Lexer.MIN_CHAR_VALUE, Lexer.MAX_CHAR_VALUE)) { + c = LexerATNConfig.dup(config, t.target); + break; + } + } + break; + case TransitionType.NOT_SET: + break; + case TransitionType.WILDCARD: + break; + case TransitionType.INVALID: + throw ArgumentError.value(t.type, 'TransitionType'); + break; + } + + return c; + } + + /// Evaluate a predicate specified in the lexer. + /// + ///

If [speculative] is [true], this method was called before + /// {@link #consume} for the matched character. This method should call + /// {@link #consume} before evaluating the predicate to ensure position + /// sensitive values, including {@link Lexer#getText}, {@link Lexer#getLine}, + /// and {@link Lexer#getCharPositionInLine}, properly reflect the current + /// lexer state. This method should restore [input] and the simulator + /// to the original state before returning (i.e. undo the actions made by the + /// call to {@link #consume}.

+ /// + /// @param input The input stream. + /// @param ruleIndex The rule containing the predicate. + /// @param predIndex The index of the predicate within the rule. + /// @param speculative [true] if the current index in [input] is + /// one character before the predicate's location. + /// + /// @return [true] if the specified predicate evaluates to + /// [true]. + bool evaluatePredicate( + CharStream input, int ruleIndex, int predIndex, bool speculative) { + // assume true if no recognizer was provided + if (recog == null) { + return true; + } + + if (!speculative) { + return recog.sempred(null, ruleIndex, predIndex); + } + + final savedCharPositionInLine = charPositionInLine; + final savedLine = line; + final index = input.index; + final marker = input.mark(); + try { + consume(input); + return recog.sempred(null, ruleIndex, predIndex); + } finally { + charPositionInLine = savedCharPositionInLine; + line = savedLine; + input.seek(index); + input.release(marker); + } + } + + void captureSimState(SimState settings, CharStream input, DFAState dfaState) { + settings.index = input.index; + settings.line = line; + settings.charPos = charPositionInLine; + settings.dfaState = dfaState; + } + + DFAState addDFAEdgeByConfig(DFAState from, int t, ATNConfigSet q) { + /* leading to this call, ATNConfigSet.hasSemanticContext is used as a + * marker indicating dynamic predicate evaluation makes this edge + * dependent on the specific input sequence, so the static edge in the + * DFA should be omitted. The target DFAState is still created since + * execATN has the ability to resynchronize with the DFA state cache + * following the predicate evaluation step. + * + * TJP notes: next time through the DFA, we see a pred again and eval. + * If that gets us to a previously created (but dangling) DFA + * state, we can continue in pure DFA mode from there. + */ + final suppressEdge = q.hasSemanticContext; + q.hasSemanticContext = false; + + final to = addDFAState(q); + + if (suppressEdge) { + return to; + } + + addDFAEdge(from, t, to); + return to; + } + + void addDFAEdge(DFAState p, int t, DFAState q) { + if (t < MIN_DFA_EDGE || t > MAX_DFA_EDGE) { + // Only track edges within the DFA bounds + return; + } + + if (debug) { + log('EDGE $p -> $q upon ${String.fromCharCode(t)}', + level: Level.FINE.value); + } + + p.edges ??= List(MAX_DFA_EDGE - MIN_DFA_EDGE + 1); + p.edges[t - MIN_DFA_EDGE] = q; // connect + } + + /// Add a new DFA state if there isn't one with this set of + /// configurations already. This method also detects the first + /// configuration containing an ATN rule stop state. Later, when + /// traversing the DFA, we will know which rule to accept. + DFAState addDFAState(ATNConfigSet configs) { + /* the lexer evaluates predicates on-the-fly; by this point configs + * should not contain any configurations with unevaluated predicates. + */ + assert(!configs.hasSemanticContext); + + final proposed = DFAState(configs: configs); + ATNConfig firstConfigWithRuleStopState; + for (var c in configs) { + if (c.state is RuleStopState) { + firstConfigWithRuleStopState = c; + break; + } + } + + if (firstConfigWithRuleStopState != null) { + proposed.isAcceptState = true; + proposed.lexerActionExecutor = + (firstConfigWithRuleStopState as LexerATNConfig) + .lexerActionExecutor; + proposed.prediction = + atn.ruleToTokenType[firstConfigWithRuleStopState.state.ruleIndex]; + } + + final dfa = decisionToDFA[mode]; + final existing = dfa.states[proposed]; + if (existing != null) return existing; + + final newState = proposed; + + newState.stateNumber = dfa.states.length; + configs.readOnly = true; + newState.configs = configs; + dfa.states[newState] = newState; + return newState; + } + + DFA getDFA(int mode) { + return decisionToDFA[mode]; + } + + /// Get the text matched so far for the current token. + + String getText(CharStream input) { + // index is first lookahead char, don't include. + return input.getText(Interval.of(startIndex, input.index - 1)); + } + + void consume(CharStream input) { + final curChar = input.LA(1); + if (curChar == 10) { // Is new line + line++; + charPositionInLine = 0; + } else { + charPositionInLine++; + } + input.consume(); + } + + String getTokenName(int t) { + if (t == -1) return 'EOF'; + //if ( atn.g!=null ) return atn.g.getTokenDisplayName(t); + return "'${String.fromCharCode(t)}'"; + } +} diff --git a/runtime/Dart/lib/src/atn/src/parser_atn_simulator.dart b/runtime/Dart/lib/src/atn/src/parser_atn_simulator.dart new file mode 100644 index 000000000..8ed2a5122 --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/parser_atn_simulator.dart @@ -0,0 +1,2630 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:collection'; +import 'dart:developer'; + +import 'package:logging/logging.dart'; + +import '../../dfa/dfa.dart'; +import '../../error/error.dart'; +import '../../input_stream.dart'; +import '../../interval_set.dart'; +import '../../misc/pair.dart'; +import '../../parser.dart'; +import '../../parser_rule_context.dart'; +import '../../prediction_context.dart'; +import '../../rule_context.dart'; +import '../../token.dart'; +import '../../token_stream.dart'; +import '../../util/bit_set.dart'; +import '../../util/murmur_hash.dart'; +import '../../vocabulary.dart'; +import 'atn.dart'; +import 'atn_config.dart'; +import 'atn_config_set.dart'; +import 'atn_simulator.dart'; +import 'atn_state.dart'; +import 'semantic_context.dart'; +import 'transition.dart'; + +/// The embodiment of the adaptive LL(*), ALL(*), parsing strategy. +/// +///

+/// The basic complexity of the adaptive strategy makes it harder to understand. +/// We begin with ATN simulation to build paths in a DFA. Subsequent prediction +/// requests go through the DFA first. If they reach a state without an edge for +/// the current symbol, the algorithm fails over to the ATN simulation to +/// complete the DFA path for the current input (until it finds a conflict state +/// or uniquely predicting state).

+/// +///

+/// All of that is done without using the outer context because we want to create +/// a DFA that is not dependent upon the rule invocation stack when we do a +/// prediction. One DFA works in all contexts. We avoid using context not +/// necessarily because it's slower, although it can be, but because of the DFA +/// caching problem. The closure routine only considers the rule invocation stack +/// created during prediction beginning in the decision rule. For example, if +/// prediction occurs without invoking another rule's ATN, there are no context +/// stacks in the configurations. When lack of context leads to a conflict, we +/// don't know if it's an ambiguity or a weakness in the strong LL(*) parsing +/// strategy (versus full LL(*)).

+/// +///

+/// When SLL yields a configuration set with conflict, we rewind the input and +/// retry the ATN simulation, this time using full outer context without adding +/// to the DFA. Configuration context stacks will be the full invocation stacks +/// from the start rule. If we get a conflict using full context, then we can +/// definitively say we have a true ambiguity for that input sequence. If we +/// don't get a conflict, it implies that the decision is sensitive to the outer +/// context. (It is not context-sensitive in the sense of context-sensitive +/// grammars.)

+/// +///

+/// The next time we reach this DFA state with an SLL conflict, through DFA +/// simulation, we will again retry the ATN simulation using full context mode. +/// This is slow because we can't save the results and have to "interpret" the +/// ATN each time we get that input.

+/// +///

+/// CACHING FULL CONTEXT PREDICTIONS

+/// +///

+/// We could cache results from full context to predicted alternative easily and +/// that saves a lot of time but doesn't work in presence of predicates. The set +/// of visible predicates from the ATN start state changes depending on the +/// context, because closure can fall off the end of a rule. I tried to cache +/// tuples (stack context, semantic context, predicted alt) but it was slower +/// than interpreting and much more complicated. Also required a huge amount of +/// memory. The goal is not to create the world's fastest parser anyway. I'd like +/// to keep this algorithm simple. By launching multiple threads, we can improve +/// the speed of parsing across a large number of files.

+/// +///

+/// There is no strict ordering between the amount of input used by SLL vs LL, +/// which makes it really hard to build a cache for full context. Let's say that +/// we have input A B C that leads to an SLL conflict with full context X. That +/// implies that using X we might only use A B but we could also use A B C D to +/// resolve conflict. Input A B C D could predict alternative 1 in one position +/// in the input and A B C E could predict alternative 2 in another position in +/// input. The conflicting SLL configurations could still be non-unique in the +/// full context prediction, which would lead us to requiring more input than the +/// original A B C. To make a prediction cache work, we have to track the exact +/// input used during the previous prediction. That amounts to a cache that maps +/// X to a specific DFA for that context.

+/// +///

+/// Something should be done for left-recursive expression predictions. They are +/// likely LL(1) + pred eval. Easier to do the whole SLL unless error and retry +/// with full LL thing Sam does.

+/// +///

+/// AVOIDING FULL CONTEXT PREDICTION

+/// +///

+/// We avoid doing full context retry when the outer context is empty, we did not +/// dip into the outer context by falling off the end of the decision state rule, +/// or when we force SLL mode.

+/// +///

+/// As an example of the not dip into outer context case, consider as super +/// constructor calls versus function calls. One grammar might look like +/// this:

+/// +///
+/// ctorBody
+///   : '{' superCall? stat* '}'
+///   ;
+/// 
+/// +///

+/// Or, you might see something like

+/// +///
+/// stat
+///   : superCall ';'
+///   | expression ';'
+///   | ...
+///   ;
+/// 
+/// +///

+/// In both cases I believe that no closure operations will dip into the outer +/// context. In the first case ctorBody in the worst case will stop at the '}'. +/// In the 2nd case it should stop at the ';'. Both cases should stay within the +/// entry rule and not dip into the outer context.

+/// +///

+/// PREDICATES

+/// +///

+/// Predicates are always evaluated if present in either SLL or LL both. SLL and +/// LL simulation deals with predicates differently. SLL collects predicates as +/// it performs closure operations like ANTLR v3 did. It delays predicate +/// evaluation until it reaches and accept state. This allows us to cache the SLL +/// ATN simulation whereas, if we had evaluated predicates on-the-fly during +/// closure, the DFA state configuration sets would be different and we couldn't +/// build up a suitable DFA.

+/// +///

+/// When building a DFA accept state during ATN simulation, we evaluate any +/// predicates and return the sole semantically valid alternative. If there is +/// more than 1 alternative, we report an ambiguity. If there are 0 alternatives, +/// we throw an exception. Alternatives without predicates act like they have +/// true predicates. The simple way to think about it is to strip away all +/// alternatives with false predicates and choose the minimum alternative that +/// remains.

+/// +///

+/// When we start in the DFA and reach an accept state that's predicated, we test +/// those and return the minimum semantically viable alternative. If no +/// alternatives are viable, we throw an exception.

+/// +///

+/// During full LL ATN simulation, closure always evaluates predicates and +/// on-the-fly. This is crucial to reducing the configuration set size during +/// closure. It hits a landmine when parsing with the Java grammar, for example, +/// without this on-the-fly evaluation.

+/// +///

+/// SHARING DFA

+/// +///

+/// All instances of the same parser share the same decision DFAs through a +/// static field. Each instance gets its own ATN simulator but they share the +/// same {@link #decisionToDFA} field. They also share a +/// [PredictionContextCache] object that makes sure that all +/// [PredictionContext] objects are shared among the DFA states. This makes +/// a big size difference.

+/// +///

+/// THREAD SAFETY

+/// +///

+/// The [ParserATNSimulator] locks on the {@link #decisionToDFA} field when +/// it adds a new DFA object to that array. {@link #addDFAEdge} +/// locks on the DFA for the current decision when setting the +/// {@link DFAState#edges} field. {@link #addDFAState} locks on +/// the DFA for the current decision when looking up a DFA state to see if it +/// already exists. We must make sure that all requests to add DFA states that +/// are equivalent result in the same shared DFA object. This is because lots of +/// threads will be trying to update the DFA at once. The +/// {@link #addDFAState} method also locks inside the DFA lock +/// but this time on the shared context cache when it rebuilds the +/// configurations' [PredictionContext] objects using cached +/// subgraphs/nodes. No other locking occurs, even during DFA simulation. This is +/// safe as long as we can guarantee that all threads referencing +/// {@code s.edge[t]} get the same physical target [DFAState], or +/// null. Once into the DFA, the DFA simulation does not reference the +/// {@link DFA#states} map. It follows the {@link DFAState#edges} field to new +/// targets. The DFA simulator will either find {@link DFAState#edges} to be +/// null, to be non-null and {@code dfa.edges[t]} null, or +/// {@code dfa.edges[t]} to be non-null. The +/// {@link #addDFAEdge} method could be racing to set the field +/// but in either case the DFA simulator works; if null, and requests ATN +/// simulation. It could also race trying to get {@code dfa.edges[t]}, but either +/// way it will work because it's not doing a test and set operation.

+/// +///

+/// Starting with SLL then failing to combined SLL/LL (Two-Stage +/// Parsing)

+/// +///

+/// Sam pointed out that if SLL does not give a syntax error, then there is no +/// point in doing full LL, which is slower. We only have to try LL if we get a +/// syntax error. For maximum speed, Sam starts the parser set to pure SLL +/// mode with the [BailErrorStrategy]:

+/// +///
+/// parser.{@link Parser#interpreter interpreter}.{@link #setPredictionMode setPredictionMode}{@code (}{@link PredictionMode#SLL}{@code )};
+/// parser.{@link Parser#setErrorHandler setErrorHandler}(new [BailErrorStrategy]());
+/// 
+/// +///

+/// If it does not get a syntax error, then we're done. If it does get a syntax +/// error, we need to retry with the combined SLL/LL strategy.

+/// +///

+/// The reason this works is as follows. If there are no SLL conflicts, then the +/// grammar is SLL (at least for that input set). If there is an SLL conflict, +/// the full LL analysis must yield a set of viable alternatives which is a +/// subset of the alternatives reported by SLL. If the LL set is a singleton, +/// then the grammar is LL but not SLL. If the LL set is the same size as the SLL +/// set, the decision is SLL. If the LL set has size > 1, then that decision +/// is truly ambiguous on the current input. If the LL set is smaller, then the +/// SLL conflict resolution might choose an alternative that the full LL would +/// rule out as a possibility based upon better context information. If that's +/// the case, then the SLL parse will definitely get an error because the full LL +/// analysis says it's not viable. If SLL conflict resolution chooses an +/// alternative within the LL set, them both SLL and LL would choose the same +/// alternative because they both choose the minimum of multiple conflicting +/// alternatives.

+/// +///

+/// Let's say we have a set of SLL conflicting alternatives {@code {1, 2, 3}} and +/// a smaller LL set called s. If s is {@code {2, 3}}, then SLL +/// parsing will get an error because SLL will pursue alternative 1. If +/// s is {@code {1, 2}} or {@code {1, 3}} then both SLL and LL will +/// choose the same alternative because alternative one is the minimum of either +/// set. If s is {@code {2}} or {@code {3}} then SLL will get a syntax +/// error. If s is {@code {1}} then SLL will succeed.

+/// +///

+/// Of course, if the input is invalid, then we will get an error for sure in +/// both SLL and LL parsing. Erroneous input will therefore require 2 passes over +/// the input.

+class ParserATNSimulator extends ATNSimulator { + static final bool debug = false; + static final bool debug_list_atn_decisions = false; + static final bool dfa_debug = false; + static final bool retry_debug = false; + + /// Just in case this optimization is bad, add an ENV variable to turn it off */ + static final bool TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT = + bool.fromEnvironment('TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT'); + + final Parser parser; + + final List decisionToDFA; + + /// SLL, LL, or LL + exact ambig detection? */ + + PredictionMode predictionMode = PredictionMode.LL; + + /// Each prediction operation uses a cache for merge of prediction contexts. + /// Don't keep around as it wastes huge amounts of memory. DoubleKeyMap + /// isn't synchronized but we're ok since two threads shouldn't reuse same + /// parser/atnsim object because it can only handle one input at a time. + /// This maps graphs a and b to merged result c. (a,b)→c. We can avoid + /// the merge if we ever see a and b again. Note that (b,a)→c should + /// also be examined during cache lookup. + Map, PredictionContext> mergeCache; + + // LAME globals to avoid parameters!!!!! I need these down deep in predTransition + TokenStream input; + int startIndex = 0; + ParserRuleContext _outerContext; + DFA _dfa; + + ParserATNSimulator(this.parser, ATN atn, this.decisionToDFA, + PredictionContextCache sharedContextCache) + : super(atn, sharedContextCache) { + // DOTGenerator dot = new DOTGenerator(null); + // log(dot.getDOT(atn.rules.get(0), parser.getRuleNames())); + // log(dot.getDOT(atn.rules.get(1), parser.getRuleNames())); + } + + @override + void reset() {} + + @override + void clearDFA() { + for (var d = 0; d < decisionToDFA.length; d++) { + decisionToDFA[d] = DFA(atn.getDecisionState(d), d); + } + } + + int adaptivePredict( + TokenStream input_, int decision, ParserRuleContext outerContext) { + if (debug || debug_list_atn_decisions) { + log('adaptivePredict decision $decision' ' exec LA(1)==' + + getLookaheadName(input_) + + ' line ${input_.LT(1).line}:${input_.LT(1).charPositionInLine}'); + } + + input = input_; + startIndex = input_.index; + _outerContext = outerContext; + final dfa = decisionToDFA[decision]; + _dfa = dfa; + + final m = input_.mark(); + final index = startIndex; + + // Now we are certain to have a specific decision's DFA + // But, do we still need an initial state? + try { + DFAState s0; + if (dfa.isPrecedenceDfa()) { + // the start state for a precedence DFA depends on the current + // parser precedence, and is provided by a DFA method. + s0 = dfa.getPrecedenceStartState(parser.precedence); + } else { + // the start state for a "regular" DFA is just s0 + s0 = dfa.s0; + } + + if (s0 == null) { + outerContext ??= ParserRuleContext.EMPTY; + if (debug || debug_list_atn_decisions) { + log('predictATN decision ${dfa.decision}' ' exec LA(1)==' + + getLookaheadName(input_) + + ', outerContext=' + + outerContext.toString(recog: parser)); + } + + final fullCtx = false; + var s0_closure = computeStartState( + dfa.atnStartState, ParserRuleContext.EMPTY, fullCtx); + + if (dfa.isPrecedenceDfa()) { + /* If this is a precedence DFA, we use applyPrecedenceFilter + * to convert the computed start state to a precedence start + * state. We then use DFA.setPrecedenceStartState to set the + * appropriate start state for the precedence level rather + * than simply setting DFA.s0. + */ + dfa.s0.configs = + s0_closure; // not used for prediction but useful to know start configs anyway + s0_closure = applyPrecedenceFilter(s0_closure); + s0 = addDFAState(dfa, DFAState(configs: s0_closure)); + dfa.setPrecedenceStartState(parser.precedence, s0); + } else { + s0 = addDFAState(dfa, DFAState(configs: s0_closure)); + dfa.s0 = s0; + } + } + + final alt = execATN(dfa, s0, input_, index, outerContext); + if (debug) { + log('DFA after predictATN: ' + dfa.toString(parser.vocabulary)); + } + return alt; + } finally { + mergeCache = null; // wack cache after each prediction + _dfa = null; + input_.seek(index); + input_.release(m); + } + } + + /// Performs ATN simulation to compute a predicted alternative based + /// upon the remaining input, but also updates the DFA cache to avoid + /// having to traverse the ATN again for the same input sequence. + /// + /// There are some key conditions we're looking for after computing a new + /// set of ATN configs (proposed DFA state): + /// if the set is empty, there is no viable alternative for current symbol + /// does the state uniquely predict an alternative? + /// does the state have a conflict that would prevent us from + /// putting it on the work list? + /// + /// We also have some key operations to do: + /// add an edge from previous DFA state to potentially new DFA state, D, + /// upon current symbol but only if adding to work list, which means in all + /// cases except no viable alternative (and possibly non-greedy decisions?) + /// collecting predicates and adding semantic context to DFA accept states + /// adding rule context to context-sensitive DFA accept states + /// consuming an input symbol + /// reporting a conflict + /// reporting an ambiguity + /// reporting a context sensitivity + /// reporting insufficient predicates + /// + /// cover these cases: + /// dead end + /// single alt + /// single alt + preds + /// conflict + /// conflict + preds + /// + int execATN(DFA dfa, DFAState s0, TokenStream input, int startIndex, + ParserRuleContext outerContext) { + if (debug || debug_list_atn_decisions) { + log('execATN decision ${dfa.decision}' ' exec LA(1)==' + + getLookaheadName(input) + + ' line ${input.LT(1).line}' + + ':${input.LT(1).charPositionInLine}'); + } + + var previousD = s0; + + if (debug) log('s0 = $s0'); + + var t = input.LA(1); + + while (true) { + // while more work + var D = getExistingTargetState(previousD, t); + D ??= computeTargetState(dfa, previousD, t); + + if (D == ATNSimulator.ERROR) { + // if any configs in previous dipped into outer context, that + // means that input up to t actually finished entry rule + // at least for SLL decision. Full LL doesn't dip into outer + // so don't need special case. + // We will get an error no matter what so delay until after + // decision; better error message. Also, no reachable target + // ATN states in SLL implies LL will also get nowhere. + // If conflict in states that dip out, choose min since we + // will get error no matter what. + final e = + noViableAlt(input, outerContext, previousD.configs, startIndex); + input.seek(startIndex); + final alt = getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule( + previousD.configs, outerContext); + if (alt != ATN.INVALID_ALT_NUMBER) { + return alt; + } + throw e; + } + + if (D.requiresFullContext && predictionMode != PredictionMode.SLL) { + // IF PREDS, MIGHT RESOLVE TO SINGLE ALT => SLL (or syntax error) + var conflictingAlts = D.configs.conflictingAlts; + if (D.predicates != null) { + if (debug) log('DFA state has preds in DFA sim LL failover'); + final conflictIndex = input.index; + if (conflictIndex != startIndex) { + input.seek(startIndex); + } + + conflictingAlts = + evalSemanticContext(D.predicates, outerContext, true); + if (conflictingAlts.cardinality == 1) { + if (debug) log('Full LL avoided'); + return conflictingAlts.nextset(0); + } + + if (conflictIndex != startIndex) { + // restore the index so reporting the fallback to full + // context occurs with the index at the correct spot + input.seek(conflictIndex); + } + } + + if (dfa_debug) log('ctx sensitive state ${outerContext} in $D'); + final fullCtx = true; + final s0_closure = + computeStartState(dfa.atnStartState, outerContext, fullCtx); + reportAttemptingFullContext( + dfa, conflictingAlts, D.configs, startIndex, input.index); + final alt = execATNWithFullContext( + dfa, D, s0_closure, input, startIndex, outerContext); + return alt; + } + + if (D.isAcceptState) { + if (D.predicates == null) { + return D.prediction; + } + + final stopIndex = input.index; + input.seek(startIndex); + final alts = evalSemanticContext(D.predicates, outerContext, true); + switch (alts.cardinality) { + case 0: + throw noViableAlt(input, outerContext, D.configs, startIndex); + + case 1: + return alts.nextset(0); + + default: + // report ambiguity after predicate evaluation to make sure the correct + // set of ambig alts is reported. + reportAmbiguity( + dfa, D, startIndex, stopIndex, false, alts, D.configs); + return alts.nextset(0); + } + } + + previousD = D; + + if (t != IntStream.EOF) { + input.consume(); + t = input.LA(1); + } + } + } + + /// Get an existing target state for an edge in the DFA. If the target state + /// for the edge has not yet been computed or is otherwise not available, + /// this method returns null. + /// + /// @param previousD The current DFA state + /// @param t The next input symbol + /// @return The existing target DFA state for the given input symbol + /// [t], or null if the target state for this edge is not + /// already cached + DFAState getExistingTargetState(DFAState previousD, int t) { + final edges = previousD.edges; + if (edges == null || t + 1 < 0 || t + 1 >= edges.length) { + return null; + } + + return edges[t + 1]; + } + + /// Compute a target state for an edge in the DFA, and attempt to add the + /// computed state and corresponding edge to the DFA. + /// + /// @param dfa The DFA + /// @param previousD The current DFA state + /// @param t The next input symbol + /// + /// @return The computed target DFA state for the given input symbol + /// [t]. If [t] does not lead to a valid DFA state, this method + /// returns {@link #ERROR}. + DFAState computeTargetState(DFA dfa, DFAState previousD, int t) { + final reach = computeReachSet(previousD.configs, t, false); + if (reach == null) { + addDFAEdge(dfa, previousD, t, ATNSimulator.ERROR); + return ATNSimulator.ERROR; + } + + // create new target state; we'll add to DFA after it's complete + var D = DFAState(configs: reach); + + final predictedAlt = getUniqueAlt(reach); + + if (debug) { + final altSubSets = + PredictionModeExtension.getConflictingAltSubsets(reach); + log( + 'SLL altSubSets=$altSubSets' ', configs=$reach' ', predict=$predictedAlt, allSubsetsConflict=${PredictionModeExtension + .allSubsetsConflict( + altSubSets)}, conflictingAlts=${getConflictingAlts(reach)}'); + } + + if (predictedAlt != ATN.INVALID_ALT_NUMBER) { + // NO CONFLICT, UNIQUELY PREDICTED ALT + D.isAcceptState = true; + D.configs.uniqueAlt = predictedAlt; + D.prediction = predictedAlt; + } else if (PredictionModeExtension.hasSLLConflictTerminatingPrediction( + predictionMode, reach)) { + // MORE THAN ONE VIABLE ALTERNATIVE + D.configs.conflictingAlts = getConflictingAlts(reach); + D.requiresFullContext = true; + // in SLL-only mode, we will stop at this state and return the minimum alt + D.isAcceptState = true; + D.prediction = D.configs.conflictingAlts.nextset(0); + } + + if (D.isAcceptState && D.configs.hasSemanticContext) { + predicateDFAState(D, atn.getDecisionState(dfa.decision)); + if (D.predicates != null) { + D.prediction = ATN.INVALID_ALT_NUMBER; + } + } + + // all adds to dfa are done after we've created full D state + D = addDFAEdge(dfa, previousD, t, D); + return D; + } + + void predicateDFAState(DFAState dfaState, DecisionState decisionState) { + // We need to test all predicates, even in DFA states that + // uniquely predict alternative. + final nalts = decisionState.numberOfTransitions; + // Update DFA so reach becomes accept state with (predicate,alt) + // pairs if preds found for conflicting alts + final altsToCollectPredsFrom = + getConflictingAltsOrUniqueAlt(dfaState.configs); + final altToPred = + getPredsForAmbigAlts(altsToCollectPredsFrom, dfaState.configs, nalts); + if (altToPred != null) { + dfaState.predicates = + getPredicatePredictions(altsToCollectPredsFrom, altToPred); + dfaState.prediction = ATN.INVALID_ALT_NUMBER; // make sure we use preds + } else { + // There are preds in configs but they might go away + // when OR'd together like {p}? || NONE == NONE. If neither + // alt has preds, resolve to min alt + dfaState.prediction = altsToCollectPredsFrom.nextset(0); + } + } + + // comes back with reach.uniqueAlt set to a valid alt + int execATNWithFullContext( + DFA dfa, + DFAState D, // how far we got in SLL DFA before failing over + ATNConfigSet s0, + TokenStream input, + int startIndex, + ParserRuleContext outerContext) { + if (debug || debug_list_atn_decisions) { + log('execATNWithFullContext $s0'); + } + final fullCtx = true; + var foundExactAmbig = false; + ATNConfigSet reach; + var previous = s0; + input.seek(startIndex); + var t = input.LA(1); + int predictedAlt; + while (true) { + // while more work +// log("LL REACH "+getLookaheadName(input)+ +// " from configs.size="+previous.length+ +// " line "+input.LT(1).getLine()+":"+input.LT(1).getCharPositionInLine()); + reach = computeReachSet(previous, t, fullCtx); + if (reach == null) { + // if any configs in previous dipped into outer context, that + // means that input up to t actually finished entry rule + // at least for LL decision. Full LL doesn't dip into outer + // so don't need special case. + // We will get an error no matter what so delay until after + // decision; better error message. Also, no reachable target + // ATN states in SLL implies LL will also get nowhere. + // If conflict in states that dip out, choose min since we + // will get error no matter what. + final e = + noViableAlt(input, outerContext, previous, startIndex); + input.seek(startIndex); + final alt = getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule( + previous, outerContext); + if (alt != ATN.INVALID_ALT_NUMBER) { + return alt; + } + throw e; + } + + final altSubSets = + PredictionModeExtension.getConflictingAltSubsets(reach); + if (debug) { + log('LL altSubSets=$altSubSets' ', predict=${PredictionModeExtension.getUniqueAlt(altSubSets)}' ', resolvesToJustOneViableAlt=${PredictionModeExtension.resolvesToJustOneViableAlt(altSubSets)}'); + } + +// log("altSubSets: "+altSubSets); +// log("reach="+reach+", "+reach.conflictingAlts, level: Level.SEVERE.value); + reach.uniqueAlt = getUniqueAlt(reach); + // unique prediction? + if (reach.uniqueAlt != ATN.INVALID_ALT_NUMBER) { + predictedAlt = reach.uniqueAlt; + break; + } + if (predictionMode != PredictionMode.LL_EXACT_AMBIG_DETECTION) { + predictedAlt = + PredictionModeExtension.resolvesToJustOneViableAlt(altSubSets); + if (predictedAlt != ATN.INVALID_ALT_NUMBER) { + break; + } + } else { + // In exact ambiguity mode, we never try to terminate early. + // Just keeps scarfing until we know what the conflict is + if (PredictionModeExtension.allSubsetsConflict(altSubSets) && + PredictionModeExtension.allSubsetsEqual(altSubSets)) { + foundExactAmbig = true; + predictedAlt = PredictionModeExtension.getSingleViableAlt(altSubSets); + break; + } + // else there are multiple non-conflicting subsets or + // we're not sure what the ambiguity is yet. + // So, keep going. + } + + previous = reach; + if (t != IntStream.EOF) { + input.consume(); + t = input.LA(1); + } + } + + // If the configuration set uniquely predicts an alternative, + // without conflict, then we know that it's a full LL decision + // not SLL. + if (reach.uniqueAlt != ATN.INVALID_ALT_NUMBER) { + reportContextSensitivity( + dfa, predictedAlt, reach, startIndex, input.index); + return predictedAlt; + } + + // We do not check predicates here because we have checked them + // on-the-fly when doing full context prediction. + + /* + In non-exact ambiguity detection mode, we might actually be able to + detect an exact ambiguity, but I'm not going to spend the cycles + needed to check. We only emit ambiguity warnings in exact ambiguity + mode. + + For example, we might know that we have conflicting configurations. + But, that does not mean that there is no way forward without a + conflict. It's possible to have nonconflicting alt subsets as in: + + LL altSubSets=[{1, 2}, {1, 2}, {1}, {1, 2}] + + from + + [(17,1,[5 $]), (13,1,[5 10 $]), (21,1,[5 10 $]), (11,1,[$]), + (13,2,[5 10 $]), (21,2,[5 10 $]), (11,2,[$])] + + In this case, (17,1,[5 $]) indicates there is some next sequence that + would resolve this without conflict to alternative 1. Any other viable + next sequence, however, is associated with a conflict. We stop + looking for input because no amount of further lookahead will alter + the fact that we should predict alternative 1. We just can't say for + sure that there is an ambiguity without looking further. + */ + reportAmbiguity(dfa, D, startIndex, input.index, foundExactAmbig, + reach.alts, reach); + + return predictedAlt; + } + + ATNConfigSet computeReachSet(ATNConfigSet config, int t, bool fullCtx) { + if (debug) log('in computeReachSet, starting closure: $config'); + + mergeCache ??= {}; + + final intermediate = ATNConfigSet(fullCtx); + + /* Configurations already in a rule stop state indicate reaching the end + * of the decision rule (local context) or end of the start rule (full + * context). Once reached, these configurations are never updated by a + * closure operation, so they are handled separately for the performance + * advantage of having a smaller intermediate set when calling closure. + * + * For full-context reach operations, separate handling is required to + * ensure that the alternative matching the longest overall sequence is + * chosen when multiple such configurations can match the input. + */ + List skippedStopStates; + + // First figure out where we can reach on input t + for (var c in config) { + if (debug) log('testing ' + getTokenName(t) + ' at ' + c.toString()); + + if (c.state is RuleStopState) { + assert(c.context.isEmpty); + if (fullCtx || t == IntStream.EOF) { + skippedStopStates ??= []; + + skippedStopStates.add(c); + } + + continue; + } + + final n = c.state.numberOfTransitions; + for (var ti = 0; ti < n; ti++) { + // for each transition + final trans = c.state.transition(ti); + final target = getReachableTarget(trans, t); + if (target != null) { + intermediate.add(ATNConfig.dup(c, state: target), mergeCache); + } + } + } + + // Now figure out where the reach operation can take us... + + ATNConfigSet reach; + + /* This block optimizes the reach operation for intermediate sets which + * trivially indicate a termination state for the overall + * adaptivePredict operation. + * + * The conditions assume that intermediate + * contains all configurations relevant to the reach set, but this + * condition is not true when one or more configurations have been + * withheld in skippedStopStates, or when the current symbol is EOF. + */ + if (skippedStopStates == null && t != Token.EOF) { + if (intermediate.length == 1) { + // Don't pursue the closure if there is just one state. + // It can only have one alternative; just add to result + // Also don't pursue the closure if there is unique alternative + // among the configurations. + reach = intermediate; + } else if (getUniqueAlt(intermediate) != ATN.INVALID_ALT_NUMBER) { + // Also don't pursue the closure if there is unique alternative + // among the configurations. + reach = intermediate; + } + } + + /* If the reach set could not be trivially determined, perform a closure + * operation on the intermediate set to compute its initial value. + */ + if (reach == null) { + reach = ATNConfigSet(fullCtx); + final closureBusy = {}; + final treatEofAsEpsilon = t == Token.EOF; + for (var c in intermediate) { + closure(c, reach, closureBusy, false, fullCtx, treatEofAsEpsilon); + } + } + + if (t == IntStream.EOF) { + /* After consuming EOF no additional input is possible, so we are + * only interested in configurations which reached the end of the + * decision rule (local context) or end of the start rule (full + * context). Update reach to contain only these configurations. This + * handles both explicit EOF transitions in the grammar and implicit + * EOF transitions following the end of the decision or start rule. + * + * When reach==intermediate, no closure operation was performed. In + * this case, removeAllConfigsNotInRuleStopState needs to check for + * reachable rule stop states as well as configurations already in + * a rule stop state. + * + * This is handled before the configurations in skippedStopStates, + * because any configurations potentially added from that list are + * already guaranteed to meet this condition whether or not it's + * required. + */ + reach = removeAllConfigsNotInRuleStopState(reach, reach == intermediate); + } + + /* If skippedStopStates is not null, then it contains at least one + * configuration. For full-context reach operations, these + * configurations reached the end of the start rule, in which case we + * only add them back to reach if no configuration during the current + * closure operation reached such a state. This ensures adaptivePredict + * chooses an alternative matching the longest overall sequence when + * multiple alternatives are viable. + */ + if (skippedStopStates != null && + (!fullCtx || + !PredictionModeExtension.hasConfigInRuleStopState(reach))) { + assert(skippedStopStates.isNotEmpty); + for (var c in skippedStopStates) { + reach.add(c, mergeCache); + } + } + + if (reach.isEmpty) return null; + return reach; + } + + /// Return a configuration set containing only the configurations from + /// [configs] which are in a [RuleStopState]. If all + /// configurations in [configs] are already in a rule stop state, this + /// method simply returns [configs]. + /// + ///

When [lookToEndOfRule] is true, this method uses + /// {@link ATN#nextTokens} for each configuration in [configs] which is + /// not already in a rule stop state to see if a rule stop state is reachable + /// from the configuration via epsilon-only transitions.

+ /// + /// @param configs the configuration set to update + /// @param lookToEndOfRule when true, this method checks for rule stop states + /// reachable by epsilon-only transitions from each configuration in + /// [configs]. + /// + /// @return [configs] if all configurations in [configs] are in a + /// rule stop state, otherwise return a new configuration set containing only + /// the configurations from [configs] which are in a rule stop state + ATNConfigSet removeAllConfigsNotInRuleStopState( + ATNConfigSet configs, bool lookToEndOfRule) { + if (PredictionModeExtension.allConfigsInRuleStopStates(configs)) { + return configs; + } + + final result = ATNConfigSet(configs.fullCtx); + for (var config in configs) { + if (config.state is RuleStopState) { + result.add(config, mergeCache); + continue; + } + + if (lookToEndOfRule && config.state.onlyHasEpsilonTransitions()) { + final nextTokens = atn.nextTokens(config.state); + if (nextTokens.contains(Token.EPSILON)) { + ATNState endOfRuleState = atn.ruleToStopState[config.state.ruleIndex]; + result.add( + ATNConfig.dup(config, state: endOfRuleState), mergeCache); + } + } + } + + return result; + } + + ATNConfigSet computeStartState(ATNState p, RuleContext ctx, bool fullCtx) { + // always at least the implicit call to start rule + final initialContext = + PredictionContext.fromRuleContext(atn, ctx); + final configs = ATNConfigSet(fullCtx); + + for (var i = 0; i < p.numberOfTransitions; i++) { + final target = p.transition(i).target; + final c = ATNConfig(target, i + 1, initialContext); + final closureBusy = {}; + closure(c, configs, closureBusy, true, fullCtx, false); + } + + return configs; + } + + /* parrt internal source braindump that doesn't mess up + * external API spec. + context-sensitive in that they can only be properly evaluated + in the context of the proper prec argument. Without pruning, + these predicates are normal predicates evaluated when we reach + conflict state (or unique prediction). As we cannot evaluate + these predicates out of context, the resulting conflict leads + to full LL evaluation and nonlinear prediction which shows up + very clearly with fairly large expressions. + + Example grammar: + + e : e '*' e + | e '+' e + | INT + ; + + We convert that to the following: + + e[int prec] + : INT + ( {3>=prec}? '*' e[4] + | {2>=prec}? '+' e[3] + )* + ; + + The (..)* loop has a decision for the inner block as well as + an enter or exit decision, which is what concerns us here. At + the 1st + of input 1+2+3, the loop entry sees both predicates + and the loop exit also sees both predicates by falling off the + edge of e. This is because we have no stack information with + SLL and find the follow of e, which will hit the return states + inside the loop after e[4] and e[3], which brings it back to + the enter or exit decision. In this case, we know that we + cannot evaluate those predicates because we have fallen off + the edge of the stack and will in general not know which prec + parameter is the right one to use in the predicate. + + Because we have special information, that these are precedence + predicates, we can resolve them without failing over to full + LL despite their context sensitive nature. We make an + assumption that prec[-1] <= prec[0], meaning that the current + precedence level is greater than or equal to the precedence + level of recursive invocations above us in the stack. For + example, if predicate {3>=prec}? is true of the current prec, + then one option is to enter the loop to match it now. The + other option is to exit the loop and the left recursive rule + to match the current operator in rule invocation further up + the stack. But, we know that all of those prec are lower or + the same value and so we can decide to enter the loop instead + of matching it later. That means we can strip out the other + configuration for the exit branch. + + So imagine we have (14,1,$,{2>=prec}?) and then + (14,2,$-dipsIntoOuterContext,{2>=prec}?). The optimization + allows us to collapse these two configurations. We know that + if {2>=prec}? is true for the current prec parameter, it will + also be true for any prec from an invoking e call, indicated + by dipsIntoOuterContext. As the predicates are both true, we + have the option to evaluate them early in the decision start + state. We do this by stripping both predicates and choosing to + enter the loop as it is consistent with the notion of operator + precedence. It's also how the full LL conflict resolution + would work. + + The solution requires a different DFA start state for each + precedence level. + + The basic filter mechanism is to remove configurations of the + form (p, 2, pi) if (p, 1, pi) exists for the same p and pi. In + other words, for the same ATN state and predicate context, + remove any configuration associated with an exit branch if + there is a configuration associated with the enter branch. + + It's also the case that the filter evaluates precedence + predicates and resolves conflicts according to precedence + levels. For example, for input 1+2+3 at the first +, we see + prediction filtering + + [(11,1,[$],{3>=prec}?), (14,1,[$],{2>=prec}?), (5,2,[$],up=1), + (11,2,[$],up=1), (14,2,[$],up=1)],hasSemanticContext=true,dipsIntoOuterContext + + to + + [(11,1,[$]), (14,1,[$]), (5,2,[$],up=1)],dipsIntoOuterContext + + This filters because {3>=prec}? evals to true and collapses + (11,1,[$],{3>=prec}?) and (11,2,[$],up=1) since early conflict + resolution based upon rules of operator precedence fits with + our usual match first alt upon conflict. + + We noticed a problem where a recursive call resets precedence + to 0. Sam's fix: each config has flag indicating if it has + returned from an expr[0] call. then just don't filter any + config with that flag set. flag is carried along in + closure(). so to avoid adding field, set bit just under sign + bit of dipsIntoOuterContext (SUPPRESS_PRECEDENCE_FILTER). + With the change you filter "unless (p, 2, pi) was reached + after leaving the rule stop state of the LR rule containing + state p, corresponding to a rule invocation with precedence + level 0" + */ + + /// This method transforms the start state computed by + /// {@link #computeStartState} to the special start state used by a + /// precedence DFA for a particular precedence value. The transformation + /// process applies the following changes to the start state's configuration + /// set. + /// + ///
    + ///
  1. Evaluate the precedence predicates for each configuration using + /// {@link SemanticContext#evalPrecedence}.
  2. + ///
  3. When {@link ATNConfig#isPrecedenceFilterSuppressed} is [false], + /// remove all configurations which predict an alternative greater than 1, + /// for which another configuration that predicts alternative 1 is in the + /// same ATN state with the same prediction context. This transformation is + /// valid for the following reasons: + ///
      + ///
    • The closure block cannot contain any epsilon transitions which bypass + /// the body of the closure, so all states reachable via alternative 1 are + /// part of the precedence alternatives of the transformed left-recursive + /// rule.
    • + ///
    • The "primary" portion of a left recursive rule cannot contain an + /// epsilon transition, so the only way an alternative other than 1 can exist + /// in a state that is also reachable via alternative 1 is by nesting calls + /// to the left-recursive rule, with the outer calls not being at the + /// preferred precedence level. The + /// {@link ATNConfig#isPrecedenceFilterSuppressed} property marks ATN + /// configurations which do not meet this condition, and therefore are not + /// eligible for elimination during the filtering process.
    • + ///
    + ///
  4. + ///
+ /// + ///

+ /// The prediction context must be considered by this filter to address + /// situations like the following. + ///

+ /// + ///
+  /// grammar TA;
+  /// prog: statement* EOF;
+  /// statement: letterA | statement letterA 'b' ;
+  /// letterA: 'a';
+  /// 
+ ///
+ ///

+ /// If the above grammar, the ATN state immediately before the token + /// reference {@code 'a'} in [letterA] is reachable from the left edge + /// of both the primary and closure blocks of the left-recursive rule + /// [statement]. The prediction context associated with each of these + /// configurations distinguishes between them, and prevents the alternative + /// which stepped out to [prog] (and then back in to [statement] + /// from being eliminated by the filter. + ///

+ /// + /// @param configs The configuration set computed by + /// {@link #computeStartState} as the start state for the DFA. + /// @return The transformed configuration set representing the start state + /// for a precedence DFA at a particular precedence level (determined by + /// calling {@link Parser#getPrecedence}). + ATNConfigSet applyPrecedenceFilter(ATNConfigSet configs) { + final statesFromAlt1 = {}; + final configSet = ATNConfigSet(configs.fullCtx); + for (var config in configs) { + // handle alt 1 first + if (config.alt != 1) { + continue; + } + + final updatedContext = + config.semanticContext.evalPrecedence(parser, _outerContext); + if (updatedContext == null) { + // the configuration was eliminated + continue; + } + + statesFromAlt1[config.state.stateNumber] = config.context; + if (updatedContext != config.semanticContext) { + configSet.add( + ATNConfig.dup(config, semanticContext: updatedContext), + mergeCache); + } else { + configSet.add(config, mergeCache); + } + } + + for (var config in configs) { + if (config.alt == 1) { + // already handled + continue; + } + + if (!config.isPrecedenceFilterSuppressed()) { + /* In the future, this elimination step could be updated to also + * filter the prediction context for alternatives predicting alt>1 + * (basically a graph subtraction algorithm). + */ + final context = statesFromAlt1[config.state.stateNumber]; + if (context != null && context == config.context) { + // eliminated + continue; + } + } + + configSet.add(config, mergeCache); + } + + return configSet; + } + + ATNState getReachableTarget(Transition trans, int ttype) { + if (trans.matches(ttype, 0, atn.maxTokenType)) { + return trans.target; + } + + return null; + } + + List getPredsForAmbigAlts( + BitSet ambigAlts, ATNConfigSet configs, int nalts) { + // REACH=[1|1|[]|0:0, 1|2|[]|0:1] + /* altToPred starts as an array of all null contexts. The entry at index i + * corresponds to alternative i. altToPred[i] may have one of three values: + * 1. null: no ATNConfig c is found such that c.alt==i + * 2. SemanticContext.NONE: At least one ATNConfig c exists such that + * c.alt==i and c.semanticContext==SemanticContext.NONE. In other words, + * alt i has at least one unpredicated config. + * 3. Non-NONE Semantic Context: There exists at least one, and for all + * ATNConfig c such that c.alt==i, c.semanticContext!=SemanticContext.NONE. + * + * From this, it is clear that NONE||anything==NONE. + */ + var altToPred = List(nalts + 1); + for (var c in configs) { + if (ambigAlts[c.alt]) { + altToPred[c.alt] = + SemanticContext.or(altToPred[c.alt], c.semanticContext); + } + } + + var nPredAlts = 0; + for (var i = 1; i <= nalts; i++) { + if (altToPred[i] == null) { + altToPred[i] = SemanticContext.NONE; + } else if (altToPred[i] != SemanticContext.NONE) { + nPredAlts++; + } + } + +// // Optimize away p||p and p&&p TODO: optimize() was a no-op +// for (int i = 0; i < altToPred.length; i++) { +// altToPred[i] = altToPred[i].optimize(); +// } + + // nonambig alts are null in altToPred + if (nPredAlts == 0) altToPred = null; + if (debug) log('getPredsForAmbigAlts result $altToPred'); + return altToPred; + } + + List getPredicatePredictions( + BitSet ambigAlts, List altToPred) { + final pairs = []; + var containsPredicate = false; + for (var i = 1; i < altToPred.length; i++) { + final pred = altToPred[i]; + + // unpredicated is indicated by SemanticContext.NONE + assert(pred != null); + + if (ambigAlts != null && ambigAlts[i]) { + pairs.add(PredPrediction(pred, i)); + } + if (pred != SemanticContext.NONE) containsPredicate = true; + } + + if (!containsPredicate) { + return null; + } + +// log(Arrays.toString(altToPred)+"->"+pairs); + return pairs; + } + + /// This method is used to improve the localization of error messages by + /// choosing an alternative rather than throwing a + /// [NoViableAltException] in particular prediction scenarios where the + /// {@link #ERROR} state was reached during ATN simulation. + /// + ///

+ /// The default implementation of this method uses the following + /// algorithm to identify an ATN configuration which successfully parsed the + /// decision entry rule. Choosing such an alternative ensures that the + /// [ParserRuleContext] returned by the calling rule will be complete + /// and valid, and the syntax error will be reported later at a more + /// localized location.

+ /// + ///
    + ///
  • If a syntactically valid path or paths reach the end of the decision rule and + /// they are semantically valid if predicated, return the min associated alt.
  • + ///
  • Else, if a semantically invalid but syntactically valid path exist + /// or paths exist, return the minimum associated alt. + ///
  • + ///
  • Otherwise, return {@link ATN#INVALID_ALT_NUMBER}.
  • + ///
+ /// + ///

+ /// In some scenarios, the algorithm described above could predict an + /// alternative which will result in a [FailedPredicateException] in + /// the parser. Specifically, this could occur if the only configuration + /// capable of successfully parsing to the end of the decision rule is + /// blocked by a semantic predicate. By choosing this alternative within + /// {@link #adaptivePredict} instead of throwing a + /// [NoViableAltException], the resulting + /// [FailedPredicateException] in the parser will identify the specific + /// predicate which is preventing the parser from successfully parsing the + /// decision rule, which helps developers identify and correct logic errors + /// in semantic predicates. + ///

+ /// + /// @param configs The ATN configurations which were valid immediately before + /// the {@link #ERROR} state was reached + /// @param outerContext The is the \gamma_0 initial parser context from the paper + /// or the parser stack at the instant before prediction commences. + /// + /// @return The value to return from {@link #adaptivePredict}, or + /// {@link ATN#INVALID_ALT_NUMBER} if a suitable alternative was not + /// identified and {@link #adaptivePredict} should report an error instead. + int getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule( + ATNConfigSet configs, ParserRuleContext outerContext) { + final sets = + splitAccordingToSemanticValidity(configs, outerContext); + final semValidConfigs = sets.a; + final semInvalidConfigs = sets.b; + var alt = getAltThatFinishedDecisionEntryRule(semValidConfigs); + if (alt != ATN.INVALID_ALT_NUMBER) { + // semantically/syntactically viable path exists + return alt; + } + // Is there a syntactically valid path with a failed pred? + if (semInvalidConfigs.isNotEmpty) { + alt = getAltThatFinishedDecisionEntryRule(semInvalidConfigs); + if (alt != ATN.INVALID_ALT_NUMBER) { + // syntactically viable path exists + return alt; + } + } + return ATN.INVALID_ALT_NUMBER; + } + + int getAltThatFinishedDecisionEntryRule(ATNConfigSet configs) { + final alts = IntervalSet(); + for (var c in configs) { + if (c.outerContextDepth > 0 || + (c.state is RuleStopState && c.context.hasEmptyPath())) { + alts.addOne(c.alt); + } + } + if (alts.length == 0) return ATN.INVALID_ALT_NUMBER; + return alts.minElement; + } + + /// Walk the list of configurations and split them according to + /// those that have preds evaluating to true/false. If no pred, assume + /// true pred and include in succeeded set. Returns Pair of sets. + /// + /// Create a new set so as not to alter the incoming parameter. + /// + /// Assumption: the input stream has been restored to the starting point + /// prediction, which is where predicates need to evaluate. + Pair splitAccordingToSemanticValidity( + ATNConfigSet configs, ParserRuleContext outerContext) { + final succeeded = ATNConfigSet(configs.fullCtx); + final failed = ATNConfigSet(configs.fullCtx); + for (var c in configs) { + if (c.semanticContext != SemanticContext.NONE) { + final predicateEvaluationResult = evalSemanticContextOne( + c.semanticContext, outerContext, c.alt, configs.fullCtx); + if (predicateEvaluationResult) { + succeeded.add(c); + } else { + failed.add(c); + } + } else { + succeeded.add(c); + } + } + return Pair(succeeded, failed); + } + + /// Look through a list of predicate/alt pairs, returning alts for the + /// pairs that win. A [NONE] predicate indicates an alt containing an + /// unpredicated config which behaves as "always true." If !complete + /// then we stop at the first predicate that evaluates to true. This + /// includes pairs with null predicates. + BitSet evalSemanticContext(List predPredictions, + ParserRuleContext outerContext, bool complete) { + final predictions = BitSet(); + for (var pair in predPredictions) { + if (pair.pred == SemanticContext.NONE) { + predictions.set(pair.alt); + if (!complete) { + break; + } + continue; + } + + final fullCtx = false; // in dfa + final predicateEvaluationResult = + evalSemanticContextOne(pair.pred, outerContext, pair.alt, fullCtx); + if (debug || dfa_debug) { + log('eval pred $pair=$predicateEvaluationResult'); + } + + if (predicateEvaluationResult) { + if (debug || dfa_debug) log('PREDICT ' + pair.alt); + predictions.set(pair.alt); + if (!complete) { + break; + } + } + } + + return predictions; + } + + /// Evaluate a semantic context within a specific parser context. + /// + ///

+ /// This method might not be called for every semantic context evaluated + /// during the prediction process. In particular, we currently do not + /// evaluate the following but it may change in the future:

+ /// + ///
    + ///
  • Precedence predicates (represented by + /// {@link SemanticContext.PrecedencePredicate}) are not currently evaluated + /// through this method.
  • + ///
  • Operator predicates (represented by {@link SemanticContext.AND} and + /// {@link SemanticContext.OR}) are evaluated as a single semantic + /// context, rather than evaluating the operands individually. + /// Implementations which require evaluation results from individual + /// predicates should override this method to explicitly handle evaluation of + /// the operands within operator predicates.
  • + ///
+ /// + /// @param pred The semantic context to evaluate + /// @param parserCallStack The parser context in which to evaluate the + /// semantic context + /// @param alt The alternative which is guarded by [pred] + /// @param fullCtx [true] if the evaluation is occurring during LL + /// prediction; otherwise, [false] if the evaluation is occurring + /// during SLL prediction + /// + /// @since 4.3 + bool evalSemanticContextOne(SemanticContext pred, + ParserRuleContext parserCallStack, int alt, bool fullCtx) { + return pred.eval(parser, parserCallStack); + } + + /* TODO: If we are doing predicates, there is no point in pursuing + closure operations if we reach a DFA state that uniquely predicts + alternative. We will not be caching that DFA state and it is a + waste to pursue the closure. Might have to advance when we do + ambig detection thought :( + */ + + void closure( + ATNConfig config, + ATNConfigSet configs, + Set closureBusy, + bool collectPredicates, + bool fullCtx, + bool treatEofAsEpsilon) { + final initialDepth = 0; + closureCheckingStopState(config, configs, closureBusy, collectPredicates, + fullCtx, initialDepth, treatEofAsEpsilon); + assert(!fullCtx || !configs.dipsIntoOuterContext); + } + + void closureCheckingStopState( + ATNConfig config, + ATNConfigSet configs, + Set closureBusy, + bool collectPredicates, + bool fullCtx, + int depth, + bool treatEofAsEpsilon) { + if (debug) log('closure(' + config.toString(parser, true) + ')'); + + if (config.state is RuleStopState) { + // We hit rule end. If we have context info, use it + // run thru all possible stack tops in ctx + if (!config.context.isEmpty) { + for (var i = 0; i < config.context.length; i++) { + if (config.context.getReturnState(i) == + PredictionContext.EMPTY_RETURN_STATE) { + if (fullCtx) { + configs.add( + ATNConfig.dup(config, + state: config.state, context: PredictionContext.EMPTY), + mergeCache); + continue; + } else { + // we have no context info, just chase follow links (if greedy) + if (debug) { + log('FALLING off rule ' + getRuleName(config.state.ruleIndex)); + } + closure_(config, configs, closureBusy, collectPredicates, fullCtx, + depth, treatEofAsEpsilon); + } + continue; + } + final returnState = atn.states[config.context.getReturnState(i)]; + final newContext = + config.context.getParent(i); // "pop" return state + final c = ATNConfig( + returnState, config.alt, newContext, config.semanticContext); + // While we have context to pop back from, we may have + // gotten that context AFTER having falling off a rule. + // Make sure we track that we are now out of context. + // + // This assignment also propagates the + // isPrecedenceFilterSuppressed() value to the new + // configuration. + c.reachesIntoOuterContext = config.reachesIntoOuterContext; +// assert(depth > int.MIN_VALUE); + closureCheckingStopState(c, configs, closureBusy, collectPredicates, + fullCtx, depth - 1, treatEofAsEpsilon); + } + return; + } else if (fullCtx) { + // reached end of start rule + configs.add(config, mergeCache); + return; + } else { + // else if we have no context info, just chase follow links (if greedy) + if (debug) { + log('FALLING off rule ' + getRuleName(config.state.ruleIndex)); + } + } + } + + closure_(config, configs, closureBusy, collectPredicates, fullCtx, depth, + treatEofAsEpsilon); + } + + /// Do the actual work of walking epsilon edges */ + void closure_( + ATNConfig config, + ATNConfigSet configs, + Set closureBusy, + bool collectPredicates, + bool fullCtx, + int depth, + bool treatEofAsEpsilon) { + final p = config.state; + // optimization + if (!p.onlyHasEpsilonTransitions()) { + configs.add(config, mergeCache); + // make sure to not return here, because EOF transitions can act as + // both epsilon transitions and non-epsilon transitions. +// if ( debug ) log("added config "+configs); + } + + for (var i = 0; i < p.numberOfTransitions; i++) { + if (i == 0 && canDropLoopEntryEdgeInLeftRecursiveRule(config)) continue; + + final t = p.transition(i); + final continueCollecting = !(t is ActionTransition) && collectPredicates; + final c = getEpsilonTarget(config, t, continueCollecting, depth == 0, + fullCtx, treatEofAsEpsilon); + if (c != null) { + var newDepth = depth; + if (config.state is RuleStopState) { + assert(!fullCtx); + // target fell off end of rule; mark resulting c as having dipped into outer context + // We can't get here if incoming config was rule stop and we had context + // track how far we dip into outer context. Might + // come in handy and we avoid evaluating context dependent + // preds if this is > 0. + + if (_dfa != null && _dfa.isPrecedenceDfa()) { + final outermostPrecedenceReturn = + (t as EpsilonTransition).outermostPrecedenceReturn; + if (outermostPrecedenceReturn == _dfa.atnStartState.ruleIndex) { + c.setPrecedenceFilterSuppressed(true); + } + } + + c.reachesIntoOuterContext++; + + if (!closureBusy.add(c)) { + // avoid infinite recursion for right-recursive rules + continue; + } + + // TODO: can remove? only care when we add to set per middle of this method + configs.dipsIntoOuterContext = true; +// assert(newDepth > int.MIN_VALUE); + newDepth--; + if (debug) log('dips into outer ctx: $c'); + } else { + if (!t.isEpsilon && !closureBusy.add(c)) { + // avoid infinite recursion for EOF* and EOF+ + continue; + } + + if (t is RuleTransition) { + // latch when newDepth goes negative - once we step out of the entry context we can't return + if (newDepth >= 0) { + newDepth++; + } + } + } + + closureCheckingStopState(c, configs, closureBusy, continueCollecting, + fullCtx, newDepth, treatEofAsEpsilon); + } + } + } + + /// Implements first-edge (loop entry) elimination as an optimization + /// during closure operations. See antlr/antlr4#1398. + /// + /// The optimization is to avoid adding the loop entry config when + /// the exit path can only lead back to the same + /// StarLoopEntryState after popping context at the rule end state + /// (traversing only epsilon edges, so we're still in closure, in + /// this same rule). + /// + /// We need to detect any state that can reach loop entry on + /// epsilon w/o exiting rule. We don't have to look at FOLLOW + /// links, just ensure that all stack tops for config refer to key + /// states in LR rule. + /// + /// To verify we are in the right situation we must first check + /// closure is at a StarLoopEntryState generated during LR removal. + /// Then we check that each stack top of context is a return state + /// from one of these cases: + /// + /// 1. 'not' expr, '(' type ')' expr. The return state points at loop entry state + /// 2. expr op expr. The return state is the block end of internal block of (...)* + /// 3. 'between' expr 'and' expr. The return state of 2nd expr reference. + /// That state points at block end of internal block of (...)*. + /// 4. expr '?' expr ':' expr. The return state points at block end, + /// which points at loop entry state. + /// + /// If any is true for each stack top, then closure does not add a + /// config to the current config set for edge[0], the loop entry branch. + /// + /// Conditions fail if any context for the current config is: + /// + /// a. empty (we'd fall out of expr to do a global FOLLOW which could + /// even be to some weird spot in expr) or, + /// b. lies outside of expr or, + /// c. lies within expr but at a state not the BlockEndState + /// generated during LR removal + /// + /// Do we need to evaluate predicates ever in closure for this case? + /// + /// No. Predicates, including precedence predicates, are only + /// evaluated when computing a DFA start state. I.e., only before + /// the lookahead (but not parser) consumes a token. + /// + /// There are no epsilon edges allowed in LR rule alt blocks or in + /// the "primary" part (ID here). If closure is in + /// StarLoopEntryState any lookahead operation will have consumed a + /// token as there are no epsilon-paths that lead to + /// StarLoopEntryState. We do not have to evaluate predicates + /// therefore if we are in the generated StarLoopEntryState of a LR + /// rule. Note that when making a prediction starting at that + /// decision point, decision d=2, compute-start-state performs + /// closure starting at edges[0], edges[1] emanating from + /// StarLoopEntryState. That means it is not performing closure on + /// StarLoopEntryState during compute-start-state. + /// + /// How do we know this always gives same prediction answer? + /// + /// Without predicates, loop entry and exit paths are ambiguous + /// upon remaining input +b (in, say, a+b). Either paths lead to + /// valid parses. Closure can lead to consuming + immediately or by + /// falling out of this call to expr back into expr and loop back + /// again to StarLoopEntryState to match +b. In this special case, + /// we choose the more efficient path, which is to take the bypass + /// path. + /// + /// The lookahead language has not changed because closure chooses + /// one path over the other. Both paths lead to consuming the same + /// remaining input during a lookahead operation. If the next token + /// is an operator, lookahead will enter the choice block with + /// operators. If it is not, lookahead will exit expr. Same as if + /// closure had chosen to enter the choice block immediately. + /// + /// Closure is examining one config (some loopentrystate, some alt, + /// context) which means it is considering exactly one alt. Closure + /// always copies the same alt to any derived configs. + /// + /// How do we know this optimization doesn't mess up precedence in + /// our parse trees? + /// + /// Looking through expr from left edge of stat only has to confirm + /// that an input, say, a+b+c; begins with any valid interpretation + /// of an expression. The precedence actually doesn't matter when + /// making a decision in stat seeing through expr. It is only when + /// parsing rule expr that we must use the precedence to get the + /// right interpretation and, hence, parse tree. + /// + /// @since 4.6 + bool canDropLoopEntryEdgeInLeftRecursiveRule(ATNConfig config) { + if (TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT) return false; + final p = config.state; + // First check to see if we are in StarLoopEntryState generated during + // left-recursion elimination. For efficiency, also check if + // the context has an empty stack case. If so, it would mean + // global FOLLOW so we can't perform optimization + if (p.stateType != StateType.STAR_LOOP_ENTRY || + !(p as StarLoopEntryState) + .isPrecedenceDecision || // Are we the special loop entry/exit state? + config.context.isEmpty || // If SLL wildcard + config.context.hasEmptyPath()) { + return false; + } + + // Require all return states to return back to the same rule + // that p is in. + final numCtxs = config.context.length; + for (var i = 0; i < numCtxs; i++) { + // for each stack context + final returnState = atn.states[config.context.getReturnState(i)]; + if (returnState.ruleIndex != p.ruleIndex) return false; + } + + BlockStartState decisionStartState = p.transition(0).target; + final blockEndStateNum = decisionStartState.endState.stateNumber; + BlockEndState blockEndState = atn.states[blockEndStateNum]; + + // Verify that the top of each stack context leads to loop entry/exit + // state through epsilon edges and w/o leaving rule. + for (var i = 0; i < numCtxs; i++) { + // for each stack context + final returnStateNumber = config.context.getReturnState(i); + final returnState = atn.states[returnStateNumber]; + // all states must have single outgoing epsilon edge + if (returnState.numberOfTransitions != 1 || + !returnState.transition(0).isEpsilon) { + return false; + } + // Look for prefix op case like 'not expr', (' type ')' expr + final returnStateTarget = returnState.transition(0).target; + if (returnState.stateType == StateType.BLOCK_END && + returnStateTarget == p) { + continue; + } + // Look for 'expr op expr' or case where expr's return state is block end + // of (...)* internal block; the block end points to loop back + // which points to p but we don't need to check that + if (returnState == blockEndState) { + continue; + } + // Look for ternary expr ? expr : expr. The return state points at block end, + // which points at loop entry state + if (returnStateTarget == blockEndState) { + continue; + } + // Look for complex prefix 'between expr and expr' case where 2nd expr's + // return state points at block end state of (...)* internal block + if (returnStateTarget.stateType == StateType.BLOCK_END && + returnStateTarget.numberOfTransitions == 1 && + returnStateTarget.transition(0).isEpsilon && + returnStateTarget.transition(0).target == p) { + continue; + } + + // anything else ain't conforming + return false; + } + + return true; + } + + String getRuleName(int index) { + if (parser != null && index >= 0) return parser.ruleNames[index]; + return ''; + } + + ATNConfig getEpsilonTarget( + ATNConfig config, + Transition t, + bool collectPredicates, + bool inContext, + bool fullCtx, + bool treatEofAsEpsilon) { + switch (t.type) { + case TransitionType.RULE: + return ruleTransition(config, t); + + case TransitionType.PRECEDENCE: + return precedenceTransition( + config, t, collectPredicates, inContext, fullCtx); + + case TransitionType.PREDICATE: + return predTransition(config, t, collectPredicates, inContext, fullCtx); + case TransitionType.ACTION: + return actionTransition(config, t); + + case TransitionType.EPSILON: + return ATNConfig.dup(config, state: t.target); + + case TransitionType.ATOM: + case TransitionType.RANGE: + case TransitionType.SET: + // EOF transitions act like epsilon transitions after the first EOF + // transition is traversed + if (treatEofAsEpsilon) { + if (t.matches(Token.EOF, 0, 1)) { + return ATNConfig.dup(config, state: t.target); + } + } + + return null; + + default: + return null; + } + } + + ATNConfig actionTransition(ATNConfig config, ActionTransition t) { + if (debug) log('ACTION edge ${t.ruleIndex}:${t.actionIndex}'); + return ATNConfig.dup(config, state: t.target); + } + + ATNConfig precedenceTransition( + ATNConfig config, + PrecedencePredicateTransition pt, + bool collectPredicates, + bool inContext, + bool fullCtx) { + if (debug) { + log('PRED (collectPredicates=$collectPredicates) ${pt.precedence}>=_p, ctx dependent=true'); + if (parser != null) { + log('context surrounding pred is ${parser.getRuleInvocationStack()}'); + } + } + + ATNConfig c; + if (collectPredicates && inContext) { + if (fullCtx) { + // In full context mode, we can evaluate predicates on-the-fly + // during closure, which dramatically reduces the size of + // the config sets. It also obviates the need to test predicates + // later during conflict resolution. + final currentPosition = input.index; + input.seek(startIndex); + final predSucceeds = evalSemanticContextOne( + pt.predicate, _outerContext, config.alt, fullCtx); + input.seek(currentPosition); + if (predSucceeds) { + c = ATNConfig.dup(config, state: pt.target); // no pred context + } + } else { + final newSemCtx = + SemanticContext.and(config.semanticContext, pt.predicate); + c = ATNConfig.dup(config, + state: pt.target, semanticContext: newSemCtx); + } + } else { + c = ATNConfig.dup(config, state: pt.target); + } + + if (debug) log('config from pred transition=$c'); + return c; + } + + ATNConfig predTransition(ATNConfig config, PredicateTransition pt, + bool collectPredicates, bool inContext, bool fullCtx) { + if (debug) { + log('PRED (collectPredicates=$collectPredicates) ' '${pt.ruleIndex}:${pt.predIndex}' ', ctx dependent=${pt.isCtxDependent}'); + if (parser != null) { + log('context surrounding pred is ${parser.getRuleInvocationStack()}'); + } + } + + ATNConfig c; + if (collectPredicates && + (!pt.isCtxDependent || (pt.isCtxDependent && inContext))) { + if (fullCtx) { + // In full context mode, we can evaluate predicates on-the-fly + // during closure, which dramatically reduces the size of + // the config sets. It also obviates the need to test predicates + // later during conflict resolution. + final currentPosition = input.index; + input.seek(startIndex); + final predSucceeds = evalSemanticContextOne( + pt.predicate, _outerContext, config.alt, fullCtx); + input.seek(currentPosition); + if (predSucceeds) { + c = ATNConfig.dup(config, state: pt.target); // no pred context + } + } else { + final newSemCtx = + SemanticContext.and(config.semanticContext, pt.predicate); + c = ATNConfig.dup(config, + state: pt.target, semanticContext: newSemCtx); + } + } else { + c = ATNConfig.dup(config, state: pt.target); + } + + if (debug) log('config from pred transition=$c'); + return c; + } + + ATNConfig ruleTransition(ATNConfig config, RuleTransition t) { + if (debug) { + log('CALL rule ' + + getRuleName(t.target.ruleIndex) + + ', ctx=${config.context}'); + } + + final returnState = t.followState; + PredictionContext newContext = SingletonPredictionContext.create( + config.context, returnState.stateNumber); + return ATNConfig.dup(config, state: t.target, context: newContext); + } + + /// Gets a [BitSet] containing the alternatives in [configs] + /// which are part of one or more conflicting alternative subsets. + /// + /// @param configs The [ATNConfigSet] to analyze. + /// @return The alternatives in [configs] which are part of one or more + /// conflicting alternative subsets. If [configs] does not contain any + /// conflicting subsets, this method returns an empty [BitSet]. + BitSet getConflictingAlts(ATNConfigSet configs) { + final altsets = + PredictionModeExtension.getConflictingAltSubsets(configs); + return PredictionModeExtension.getAlts(altsets); + } + + /// Sam pointed out a problem with the previous definition, v3, of + /// ambiguous states. If we have another state associated with conflicting + /// alternatives, we should keep going. For example, the following grammar + /// + /// s : (ID | ID ID?) ';' ; + /// + /// When the ATN simulation reaches the state before ';', it has a DFA + /// state that looks like: [12|1|[], 6|2|[], 12|2|[]]. Naturally + /// 12|1|[] and 12|2|[] conflict, but we cannot stop processing this node + /// because alternative to has another way to continue, via [6|2|[]]. + /// The key is that we have a single state that has config's only associated + /// with a single alternative, 2, and crucially the state transitions + /// among the configurations are all non-epsilon transitions. That means + /// we don't consider any conflicts that include alternative 2. So, we + /// ignore the conflict between alts 1 and 2. We ignore a set of + /// conflicting alts when there is an intersection with an alternative + /// associated with a single alt state in the state→config-list map. + /// + /// It's also the case that we might have two conflicting configurations but + /// also a 3rd nonconflicting configuration for a different alternative: + /// [1|1|[], 1|2|[], 8|3|[]]. This can come about from grammar: + /// + /// a : A | A | A B ; + /// + /// After matching input A, we reach the stop state for rule A, state 1. + /// State 8 is the state right before B. Clearly alternatives 1 and 2 + /// conflict and no amount of further lookahead will separate the two. + /// However, alternative 3 will be able to continue and so we do not + /// stop working on this state. In the previous example, we're concerned + /// with states associated with the conflicting alternatives. Here alt + /// 3 is not associated with the conflicting configs, but since we can continue + /// looking for input reasonably, I don't declare the state done. We + /// ignore a set of conflicting alts when we have an alternative + /// that we still need to pursue. + BitSet getConflictingAltsOrUniqueAlt(ATNConfigSet configs) { + BitSet conflictingAlts; + if (configs.uniqueAlt != ATN.INVALID_ALT_NUMBER) { + conflictingAlts = BitSet(); + conflictingAlts.set(configs.uniqueAlt); + } else { + conflictingAlts = configs.conflictingAlts; + } + return conflictingAlts; + } + + String getTokenName(int t) { + if (t == Token.EOF) { + return 'EOF'; + } + + final vocabulary = parser != null + ? parser.vocabulary + : VocabularyImpl.EMPTY_VOCABULARY; + final displayName = vocabulary.getDisplayName(t); + if (displayName == t.toString()) { + return displayName; + } + + return displayName + '<$t>'; + } + + String getLookaheadName(TokenStream input) { + return getTokenName(input.LA(1)); + } + + /// Used for debugging in adaptivePredict around execATN but I cut + /// it out for clarity now that alg. works well. We can leave this + /// "dead" code for a bit. + void dumpDeadEndConfigs(NoViableAltException nvae) { + log('dead end configs: ', level: Level.SEVERE.value); + for (var c in nvae.deadEndConfigs) { + var trans = 'no edges'; + if (c.state.numberOfTransitions > 0) { + final t = c.state.transition(0); + if (t is AtomTransition) { + final at = t; + trans = 'Atom ' + getTokenName(at.atomLabel); + } else if (t is SetTransition) { + final st = t; + final not = st is NotSetTransition; + trans = (not ? '~' : '') + 'Set ' + st.label.toString(); + } + } + log(c.toString(parser, true) + ':' + trans, level: Level.SEVERE.value); + } + } + + NoViableAltException noViableAlt(TokenStream input, + ParserRuleContext outerContext, ATNConfigSet configs, int startIndex) { + return NoViableAltException(parser, input, input.get(startIndex), + input.LT(1), configs, outerContext); + } + + static int getUniqueAlt(ATNConfigSet configs) { + var alt = ATN.INVALID_ALT_NUMBER; + for (var c in configs) { + if (alt == ATN.INVALID_ALT_NUMBER) { + alt = c.alt; // found first alt + } else if (c.alt != alt) { + return ATN.INVALID_ALT_NUMBER; + } + } + return alt; + } + + /// Add an edge to the DFA, if possible. This method calls + /// {@link #addDFAState} to ensure the [to] state is present in the + /// DFA. If [from] is null, or if [t] is outside the + /// range of edges that can be represented in the DFA tables, this method + /// returns without adding the edge to the DFA. + /// + ///

If [to] is null, this method returns null. + /// Otherwise, this method returns the [DFAState] returned by calling + /// {@link #addDFAState} for the [to] state.

+ /// + /// @param dfa The DFA + /// @param from The source state for the edge + /// @param t The input symbol + /// @param to The target state for the edge + /// + /// @return If [to] is null, this method returns null; + /// otherwise this method returns the result of calling {@link #addDFAState} + /// on [to] + DFAState addDFAEdge(DFA dfa, DFAState from, int t, DFAState to) { + if (debug) { + log('EDGE $from -> $to upon ' + getTokenName(t)); + } + + if (to == null) { + return null; + } + + to = addDFAState(dfa, to); // used existing if possible not incoming + if (from == null || t < -1 || t > atn.maxTokenType) { + return to; + } + + from.edges ??= List(atn.maxTokenType + 1 + 1); + + from.edges[t + 1] = to; // connect + + if (debug) { + log('DFA=\n' + + dfa.toString(parser != null + ? parser.vocabulary + : VocabularyImpl.EMPTY_VOCABULARY)); + } + + return to; + } + + /// Add state [D] to the DFA if it is not already present, and return + /// the actual instance stored in the DFA. If a state equivalent to [D] + /// is already in the DFA, the existing state is returned. Otherwise this + /// method returns [D] after adding it to the DFA. + /// + ///

If [D] is {@link #ERROR}, this method returns {@link #ERROR} and + /// does not change the DFA.

+ /// + /// @param dfa The dfa + /// @param D The DFA state to add + /// @return The state stored in the DFA. This will be either the existing + /// state if [D] is already in the DFA, or [D] itself if the + /// state was not already present. + DFAState addDFAState(DFA dfa, DFAState D) { + if (D == ATNSimulator.ERROR) { + return D; + } + + final existing = dfa.states[D]; + if (existing != null) return existing; + + D.stateNumber = dfa.states.length; + if (!D.configs.readOnly) { + D.configs.optimizeConfigs(this); + D.configs.readOnly = true; + } + dfa.states[D] = D; + if (debug) log('adding new DFA state: $D'); + return D; + } + + void reportAttemptingFullContext(DFA dfa, BitSet conflictingAlts, + ATNConfigSet configs, int startIndex, int stopIndex) { + if (debug || retry_debug) { + final interval = Interval.of(startIndex, stopIndex); + log('reportAttemptingFullContext decision=${dfa.decision}:$configs' ', input=' + + parser.tokenStream.getText(interval)); + } + if (parser != null) { + parser.errorListenerDispatch.reportAttemptingFullContext( + parser, dfa, startIndex, stopIndex, conflictingAlts, configs); + } + } + + void reportContextSensitivity(DFA dfa, int prediction, ATNConfigSet configs, + int startIndex, int stopIndex) { + if (debug || retry_debug) { + final interval = Interval.of(startIndex, stopIndex); + log('reportContextSensitivity decision=${dfa.decision}:$configs' ', input=' + + parser.tokenStream.getText(interval)); + } + if (parser != null) { + parser.errorListenerDispatch.reportContextSensitivity( + parser, dfa, startIndex, stopIndex, prediction, configs); + } + } + + /// If context sensitive parsing, we know it's ambiguity not conflict */ + void reportAmbiguity( + DFA dfa, + DFAState D, // the DFA state from execATN() that had SLL conflicts + int startIndex, + int stopIndex, + bool exact, + BitSet ambigAlts, + ATNConfigSet configs) // configs that LL not SLL considered conflicting + { + if (debug || retry_debug) { + final interval = Interval.of(startIndex, stopIndex); + log('reportAmbiguity $ambigAlts:$configs' ', input=' + + parser.tokenStream.getText(interval)); + } + if (parser != null) { + parser.errorListenerDispatch.reportAmbiguity( + parser, dfa, startIndex, stopIndex, exact, ambigAlts, configs); + } + } +} + +/// This enumeration defines the prediction modes available in ANTLR 4 along with +/// utility methods for analyzing configuration sets for conflicts and/or +/// ambiguities. +enum PredictionMode { + /// The SLL(*) prediction mode. This prediction mode ignores the current + /// parser context when making predictions. This is the fastest prediction + /// mode, and provides correct results for many grammars. This prediction + /// mode is more powerful than the prediction mode provided by ANTLR 3, but + /// may result in syntax errors for grammar and input combinations which are + /// not SLL. + /// + ///

+ /// When using this prediction mode, the parser will either return a correct + /// parse tree (i.e. the same parse tree that would be returned with the + /// {@link #LL} prediction mode), or it will report a syntax error. If a + /// syntax error is encountered when using the {@link #SLL} prediction mode, + /// it may be due to either an actual syntax error in the input or indicate + /// that the particular combination of grammar and input requires the more + /// powerful {@link #LL} prediction abilities to complete successfully.

+ /// + ///

+ /// This prediction mode does not provide any guarantees for prediction + /// behavior for syntactically-incorrect inputs.

+ SLL, + /// The LL(*) prediction mode. This prediction mode allows the current parser + /// context to be used for resolving SLL conflicts that occur during + /// prediction. This is the fastest prediction mode that guarantees correct + /// parse results for all combinations of grammars with syntactically correct + /// inputs. + /// + ///

+ /// When using this prediction mode, the parser will make correct decisions + /// for all syntactically-correct grammar and input combinations. However, in + /// cases where the grammar is truly ambiguous this prediction mode might not + /// report a precise answer for exactly which alternatives are + /// ambiguous.

+ /// + ///

+ /// This prediction mode does not provide any guarantees for prediction + /// behavior for syntactically-incorrect inputs.

+ LL, + /// The LL(*) prediction mode with exact ambiguity detection. In addition to + /// the correctness guarantees provided by the {@link #LL} prediction mode, + /// this prediction mode instructs the prediction algorithm to determine the + /// complete and exact set of ambiguous alternatives for every ambiguous + /// decision encountered while parsing. + /// + ///

+ /// This prediction mode may be used for diagnosing ambiguities during + /// grammar development. Due to the performance overhead of calculating sets + /// of ambiguous alternatives, this prediction mode should be avoided when + /// the exact results are not necessary.

+ /// + ///

+ /// This prediction mode does not provide any guarantees for prediction + /// behavior for syntactically-incorrect inputs.

+ LL_EXACT_AMBIG_DETECTION, +} + +extension PredictionModeExtension on PredictionMode { + /// Computes the SLL prediction termination condition. + /// + ///

+ /// This method computes the SLL prediction termination condition for both of + /// the following cases.

+ /// + ///
    + ///
  • The usual SLL+LL fallback upon SLL conflict
  • + ///
  • Pure SLL without LL fallback
  • + ///
+ /// + ///

COMBINED SLL+LL PARSING

+ /// + ///

When LL-fallback is enabled upon SLL conflict, correct predictions are + /// ensured regardless of how the termination condition is computed by this + /// method. Due to the substantially higher cost of LL prediction, the + /// prediction should only fall back to LL when the additional lookahead + /// cannot lead to a unique SLL prediction.

+ /// + ///

Assuming combined SLL+LL parsing, an SLL configuration set with only + /// conflicting subsets should fall back to full LL, even if the + /// configuration sets don't resolve to the same alternative (e.g. + /// {@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting + /// configuration, SLL could continue with the hopes that more lookahead will + /// resolve via one of those non-conflicting configurations.

+ /// + ///

Here's the prediction termination rule them: SLL (for SLL+LL parsing) + /// stops when it sees only conflicting configuration subsets. In contrast, + /// full LL keeps going when there is uncertainty.

+ /// + ///

HEURISTIC

+ /// + ///

As a heuristic, we stop prediction when we see any conflicting subset + /// unless we see a state that only has one alternative associated with it. + /// The single-alt-state thing lets prediction continue upon rules like + /// (otherwise, it would admit defeat too soon):

+ /// + ///

{@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;}

+ /// + ///

When the ATN simulation reaches the state before {@code ';'}, it has a + /// DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally + /// {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop + /// processing this node because alternative to has another way to continue, + /// via {@code [6|2|[]]}.

+ /// + ///

It also let's us continue for this rule:

+ /// + ///

{@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;}

+ /// + ///

After matching input A, we reach the stop state for rule A, state 1. + /// State 8 is the state right before B. Clearly alternatives 1 and 2 + /// conflict and no amount of further lookahead will separate the two. + /// However, alternative 3 will be able to continue and so we do not stop + /// working on this state. In the previous example, we're concerned with + /// states associated with the conflicting alternatives. Here alt 3 is not + /// associated with the conflicting configs, but since we can continue + /// looking for input reasonably, don't declare the state done.

+ /// + ///

PURE SLL PARSING

+ /// + ///

To handle pure SLL parsing, all we have to do is make sure that we + /// combine stack contexts for configurations that differ only by semantic + /// predicate. From there, we can do the usual SLL termination heuristic.

+ /// + ///

PREDICATES IN SLL+LL PARSING

+ /// + ///

SLL decisions don't evaluate predicates until after they reach DFA stop + /// states because they need to create the DFA cache that works in all + /// semantic situations. In contrast, full LL evaluates predicates collected + /// during start state computation so it can ignore predicates thereafter. + /// This means that SLL termination detection can totally ignore semantic + /// predicates.

+ /// + ///

Implementation-wise, [ATNConfigSet] combines stack contexts but not + /// semantic predicate contexts so we might see two configurations like the + /// following.

+ /// + ///

{@code (s, 1, x, {}), (s, 1, x', {p})}

+ /// + ///

Before testing these configurations against others, we have to merge + /// [x] and {@code x'} (without modifying the existing configurations). + /// For example, we test {@code (x+x')==x''} when looking for conflicts in + /// the following configurations.

+ /// + ///

{@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}

+ /// + ///

If the configuration set has predicates (as indicated by + /// {@link ATNConfigSet#hasSemanticContext}), this algorithm makes a copy of + /// the configurations to strip out all of the predicates so that a standard + /// [ATNConfigSet] will merge everything ignoring predicates.

+ static bool hasSLLConflictTerminatingPrediction( + PredictionMode mode, ATNConfigSet configs) { +/* Configs in rule stop states indicate reaching the end of the decision + * rule (local context) or end of start rule (full context). If all + * configs meet this condition, then none of the configurations is able + * to match additional input so we terminate prediction. + */ + if (allConfigsInRuleStopStates(configs)) { + return true; + } + +// pure SLL mode parsing + if (mode == PredictionMode.SLL) { +// Don't bother with combining configs from different semantic +// contexts if we can fail over to full LL; costs more time +// since we'll often fail over anyway. + if (configs.hasSemanticContext) { +// dup configs, tossing out semantic predicates + final dup = ATNConfigSet(); + for (var c in configs) { + c = ATNConfig.dup(c, semanticContext: SemanticContext.NONE); + dup.add(c); + } + configs = dup; + } +// now we have combined contexts for configs with dissimilar preds + } + +// pure SLL or combined SLL+LL mode parsing + + final altsets = getConflictingAltSubsets(configs); + final heuristic = + hasConflictingAltSet(altsets) && !hasStateAssociatedWithOneAlt(configs); + return heuristic; + } + + /// Checks if any configuration in [configs] is in a + /// [RuleStopState]. Configurations meeting this condition have reached + /// the end of the decision rule (local context) or end of start rule (full + /// context). + /// + /// @param configs the configuration set to test + /// @return [true] if any configuration in [configs] is in a + /// [RuleStopState], otherwise [false] + static bool hasConfigInRuleStopState(ATNConfigSet configs) { + for (var c in configs) { + if (c.state is RuleStopState) { + return true; + } + } + + return false; + } + + /// Checks if all configurations in [configs] are in a + /// [RuleStopState]. Configurations meeting this condition have reached + /// the end of the decision rule (local context) or end of start rule (full + /// context). + /// + /// @param configs the configuration set to test + /// @return [true] if all configurations in [configs] are in a + /// [RuleStopState], otherwise [false] + static bool allConfigsInRuleStopStates(ATNConfigSet configs) { + for (var config in configs) { + if (!(config.state is RuleStopState)) { + return false; + } + } + + return true; + } + + /// Full LL prediction termination. + /// + ///

Can we stop looking ahead during ATN simulation or is there some + /// uncertainty as to which alternative we will ultimately pick, after + /// consuming more input? Even if there are partial conflicts, we might know + /// that everything is going to resolve to the same minimum alternative. That + /// means we can stop since no more lookahead will change that fact. On the + /// other hand, there might be multiple conflicts that resolve to different + /// minimums. That means we need more look ahead to decide which of those + /// alternatives we should predict.

+ /// + ///

The basic idea is to split the set of configurations [C], into + /// conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with + /// non-conflicting configurations. Two configurations conflict if they have + /// identical {@link ATNConfig#state} and {@link ATNConfig#context} values + /// but different {@link ATNConfig#alt} value, e.g. {@code (s, i, ctx, _)} + /// and {@code (s, j, ctx, _)} for {@code i!=j}.

+ /// + ///

Reduce these configuration subsets to the set of possible alternatives. + /// You can compute the alternative subsets in one pass as follows:

+ /// + ///

{@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in + /// [C] holding [s] and [ctx] fixed.

+ /// + ///

Or in pseudo-code, for each configuration [c] in [C]:

+ /// + ///
+  /// map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
+  /// alt and not pred
+  /// 
+ /// + ///

The values in [map] are the set of {@code A_s,ctx} sets.

+ /// + ///

If {@code |A_s,ctx|=1} then there is no conflict associated with + /// [s] and [ctx].

+ /// + ///

Reduce the subsets to singletons by choosing a minimum of each subset. If + /// the union of these alternative subsets is a singleton, then no amount of + /// more lookahead will help us. We will always pick that alternative. If, + /// however, there is more than one alternative, then we are uncertain which + /// alternative to predict and must continue looking for resolution. We may + /// or may not discover an ambiguity in the future, even if there are no + /// conflicting subsets this round.

+ /// + ///

The biggest sin is to terminate early because it means we've made a + /// decision but were uncertain as to the eventual outcome. We haven't used + /// enough lookahead. On the other hand, announcing a conflict too late is no + /// big deal; you will still have the conflict. It's just inefficient. It + /// might even look until the end of file.

+ /// + ///

No special consideration for semantic predicates is required because + /// predicates are evaluated on-the-fly for full LL prediction, ensuring that + /// no configuration contains a semantic context during the termination + /// check.

+ /// + ///

CONFLICTING CONFIGS

+ /// + ///

Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict + /// when {@code i!=j} but {@code x=x'}. Because we merge all + /// {@code (s, i, _)} configurations together, that means that there are at + /// most [n] configurations associated with state [s] for + /// [n] possible alternatives in the decision. The merged stacks + /// complicate the comparison of configuration contexts [x] and + /// {@code x'}. Sam checks to see if one is a subset of the other by calling + /// merge and checking to see if the merged result is either [x] or + /// {@code x'}. If the [x] associated with lowest alternative [i] + /// is the superset, then [i] is the only possible prediction since the + /// others resolve to {@code min(i)} as well. However, if [x] is + /// associated with {@code j>i} then at least one stack configuration for + /// [j] is not in conflict with alternative [i]. The algorithm + /// should keep going, looking for more lookahead due to the uncertainty.

+ /// + ///

For simplicity, I'm doing a equality check between [x] and + /// {@code x'} that lets the algorithm continue to consume lookahead longer + /// than necessary. The reason I like the equality is of course the + /// simplicity but also because that is the test you need to detect the + /// alternatives that are actually in conflict.

+ /// + ///

CONTINUE/STOP RULE

+ /// + ///

Continue if union of resolved alternative sets from non-conflicting and + /// conflicting alternative subsets has more than one alternative. We are + /// uncertain about which alternative to predict.

+ /// + ///

The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which + /// alternatives are still in the running for the amount of input we've + /// consumed at this point. The conflicting sets let us to strip away + /// configurations that won't lead to more states because we resolve + /// conflicts to the configuration with a minimum alternate for the + /// conflicting set.

+ /// + ///

CASES

+ /// + ///
    + /// + ///
  • no conflicts and more than 1 alternative in set => continue
  • + /// + ///
  • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s, 3, z)}, + /// {@code (s', 1, y)}, {@code (s', 2, y)} yields non-conflicting set + /// {@code {3}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = + /// {@code {1,3}} => continue + ///
  • + /// + ///
  • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, + /// {@code (s', 2, y)}, {@code (s'', 1, z)} yields non-conflicting set + /// {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = + /// {@code {1}} => stop and predict 1
  • + /// + ///
  • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, + /// {@code (s', 2, y)} yields conflicting, reduced sets {@code {1}} U + /// {@code {1}} = {@code {1}} => stop and predict 1, can announce + /// ambiguity {@code {1,2}}
  • + /// + ///
  • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 2, y)}, + /// {@code (s', 3, y)} yields conflicting, reduced sets {@code {1}} U + /// {@code {2}} = {@code {1,2}} => continue
  • + /// + ///
  • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 3, y)}, + /// {@code (s', 4, y)} yields conflicting, reduced sets {@code {1}} U + /// {@code {3}} = {@code {1,3}} => continue
  • + /// + ///
+ /// + ///

EXACT AMBIGUITY DETECTION

+ /// + ///

If all states report the same conflicting set of alternatives, then we + /// know we have the exact ambiguity set.

+ /// + ///

|A_i|>1 and + /// A_i = A_j for all i, j.

+ /// + ///

In other words, we continue examining lookahead until all {@code A_i} + /// have more than one alternative and all {@code A_i} are the same. If + /// {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate + /// because the resolved set is {@code {1}}. To determine what the real + /// ambiguity is, we have to know whether the ambiguity is between one and + /// two or one and three so we keep going. We can only stop prediction when + /// we need exact ambiguity detection when the sets look like + /// {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...

+ static int resolvesToJustOneViableAlt(List altsets) { + return getSingleViableAlt(altsets); + } + + /// Determines if every alternative subset in [altsets] contains more + /// than one alternative. + /// + /// @param altsets a collection of alternative subsets + /// @return [true] if every [BitSet] in [altsets] has + /// {@link BitSet#cardinality cardinality} > 1, otherwise [false] + static bool allSubsetsConflict(List altsets) { + return !hasNonConflictingAltSet(altsets); + } + + /// Determines if any single alternative subset in [altsets] contains + /// exactly one alternative. + /// + /// @param altsets a collection of alternative subsets + /// @return [true] if [altsets] contains a [BitSet] with + /// {@link BitSet#cardinality cardinality} 1, otherwise [false] + static bool hasNonConflictingAltSet(List altsets) { + for (var alts in altsets) { + if (alts.cardinality == 1) { + return true; + } + } + return false; + } + + /// Determines if any single alternative subset in [altsets] contains + /// more than one alternative. + /// + /// @param altsets a collection of alternative subsets + /// @return [true] if [altsets] contains a [BitSet] with + /// {@link BitSet#cardinality cardinality} > 1, otherwise [false] + static bool hasConflictingAltSet(List altsets) { + for (var alts in altsets) { + if (alts.cardinality > 1) { + return true; + } + } + return false; + } + + /// Determines if every alternative subset in [altsets] is equivalent. + /// + /// @param altsets a collection of alternative subsets + /// @return [true] if every member of [altsets] is equal to the + /// others, otherwise [false] + static bool allSubsetsEqual(List altsets) { + final first = altsets.first; + return altsets.every((e) => e == first); + } + + /// Returns the unique alternative predicted by all alternative subsets in + /// [altsets]. If no such alternative exists, this method returns + /// {@link ATN#INVALID_ALT_NUMBER}. + /// + /// @param altsets a collection of alternative subsets + static int getUniqueAlt(List altsets) { + final all = getAlts(altsets); + if (all.cardinality == 1) return all.nextset(0); + return ATN.INVALID_ALT_NUMBER; + } + + /// Gets the complete set of represented alternatives for a collection of + /// alternative subsets. This method returns the union of each [BitSet] + /// in [altsets]. + /// + /// @param altsets a collection of alternative subsets + /// @return the set of represented alternatives in [altsets] + static BitSet getAlts(List altsets) { + final all = BitSet(); + for (var alts in altsets) { + all.or(alts); + } + return all; + } + + /// Get union of all alts from configs. + /// + /// @since 4.5.1 + static BitSet getAltsFromConfigs(ATNConfigSet configs) { + final alts = BitSet(); + for (var config in configs) { + alts.set(config.alt); + } + return alts; + } + + /// This function gets the conflicting alt subsets from a configuration set. + /// For each configuration [c] in [configs]: + /// + ///
+  /// map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
+  /// alt and not pred
+  /// 
+ static List getConflictingAltSubsets(ATNConfigSet configs) { + final configToAlts = + HashMap(equals: (ATNConfig a, ATNConfig b) { + if (identical(a, b)) return true; + if (a == null || b == null) return false; + return a.state.stateNumber == b.state.stateNumber && + a.context == b.context; + }, hashCode: (ATNConfig o) { + /** + * The hash code is only a function of the {@link ATNState#stateNumber} + * and {@link ATNConfig#context}. + */ + var hashCode = MurmurHash.initialize(7); + hashCode = MurmurHash.update(hashCode, o.state.stateNumber); + hashCode = MurmurHash.update(hashCode, o.context); + hashCode = MurmurHash.finish(hashCode, 2); + return hashCode; + }); + for (var c in configs) { + var alts = configToAlts[c]; + if (alts == null) { + alts = BitSet(); + configToAlts[c] = alts; + } + alts.set(c.alt); + } + return configToAlts.values.toList(); + } + + /// Get a map from state to alt subset from a configuration set. For each + /// configuration [c] in [configs]: + /// + ///
+  /// map[c.{@link ATNConfig#state state}] U= c.{@link ATNConfig#alt alt}
+  /// 
+ static Map getStateToAltMap(ATNConfigSet configs) { + final m = {}; + for (var c in configs) { + var alts = m[c.state]; + if (alts == null) { + alts = BitSet(); + m[c.state] = alts; + } + alts.set(c.alt); + } + return m; + } + + static bool hasStateAssociatedWithOneAlt(ATNConfigSet configs) { + final x = getStateToAltMap(configs); + for (var alts in x.values) { + if (alts.cardinality == 1) return true; + } + return false; + } + + static int getSingleViableAlt(List altsets) { + final viableAlts = BitSet(); + for (var alts in altsets) { + final minAlt = alts.nextset(0); + viableAlts.set(minAlt); + if (viableAlts.cardinality > 1) { + // more than 1 viable alt + return ATN.INVALID_ALT_NUMBER; + } + } + return viableAlts.nextset(0); + } +} diff --git a/runtime/Dart/lib/src/atn/src/profiling_atn_simulator.dart b/runtime/Dart/lib/src/atn/src/profiling_atn_simulator.dart new file mode 100644 index 000000000..a796acb61 --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/profiling_atn_simulator.dart @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:math'; + +import '../../dfa/dfa.dart'; +import '../../parser.dart'; +import '../../parser_rule_context.dart'; +import '../../token_stream.dart'; +import '../../util/bit_set.dart'; +import 'atn_config_set.dart'; +import 'atn_simulator.dart'; +import 'info.dart'; +import 'parser_atn_simulator.dart'; +import 'semantic_context.dart'; + +class ProfilingATNSimulator extends ParserATNSimulator { + List decisions; + int numDecisions; + + int _sllStopIndex; + int _llStopIndex; + + int currentDecision; + DFAState currentState; + + /// At the point of LL failover, we record how SLL would resolve the conflict so that + /// we can determine whether or not a decision / input pair is context-sensitive. + /// If LL gives a different result than SLL's predicted alternative, we have a + /// context sensitivity for sure. The converse is not necessarily true, however. + /// It's possible that after conflict resolution chooses minimum alternatives, + /// SLL could get the same answer as LL. Regardless of whether or not the result indicates + /// an ambiguity, it is not treated as a context sensitivity because LL prediction + /// was not required in order to produce a correct prediction for this decision and input sequence. + /// It may in fact still be a context sensitivity but we don't know by looking at the + /// minimum alternatives for the current input. + int conflictingAltResolvedBySLL; + + ProfilingATNSimulator(Parser parser) + : super(parser, parser.interpreter.atn, parser.interpreter.decisionToDFA, + parser.interpreter.sharedContextCache) { + numDecisions = atn.decisionToState.length; + decisions = List(numDecisions); + for (var i = 0; i < numDecisions; i++) { + decisions[i] = DecisionInfo(i); + } + } + + @override + int adaptivePredict( + TokenStream input, int decision, ParserRuleContext outerContext) { + try { + _sllStopIndex = -1; + _llStopIndex = -1; + currentDecision = decision; + + final start = + DateTime.now(); // TODO get nano seconds expensive but useful info + final alt = super.adaptivePredict(input, decision, outerContext); + final stop = DateTime.now(); + decisions[decision].timeInPrediction += + (stop.difference(start)).inMicroseconds; + decisions[decision].invocations++; + + final SLL_k = _sllStopIndex - startIndex + 1; + decisions[decision].SLL_TotalLook += SLL_k; + decisions[decision].SLL_MinLook = decisions[decision].SLL_MinLook == 0 + ? SLL_k + : min(decisions[decision].SLL_MinLook, SLL_k); + if (SLL_k > decisions[decision].SLL_MaxLook) { + decisions[decision].SLL_MaxLook = SLL_k; + decisions[decision].SLL_MaxLookEvent = LookaheadEventInfo( + decision, null, alt, input, startIndex, _sllStopIndex, false); + } + + if (_llStopIndex >= 0) { + final LL_k = _llStopIndex - startIndex + 1; + decisions[decision].LL_TotalLook += LL_k; + decisions[decision].LL_MinLook = decisions[decision].LL_MinLook == 0 + ? LL_k + : min(decisions[decision].LL_MinLook, LL_k); + if (LL_k > decisions[decision].LL_MaxLook) { + decisions[decision].LL_MaxLook = LL_k; + decisions[decision].LL_MaxLookEvent = LookaheadEventInfo( + decision, null, alt, input, startIndex, _llStopIndex, true); + } + } + + return alt; + } finally { + currentDecision = -1; + } + } + + @override + DFAState getExistingTargetState(DFAState previousD, int t) { + // this method is called after each time the input position advances + // during SLL prediction + _sllStopIndex = input.index; + + final existingTargetState = super.getExistingTargetState(previousD, t); + if (existingTargetState != null) { + decisions[currentDecision] + .SLL_DFATransitions++; // count only if we transition over a DFA state + if (existingTargetState == ATNSimulator.ERROR) { + decisions[currentDecision].errors.add(ErrorInfo(currentDecision, + previousD.configs, input, startIndex, _sllStopIndex, false)); + } + } + + currentState = existingTargetState; + return existingTargetState; + } + + @override + DFAState computeTargetState(DFA dfa, DFAState previousD, int t) { + final state = super.computeTargetState(dfa, previousD, t); + currentState = state; + return state; + } + + @override + ATNConfigSet computeReachSet(ATNConfigSet closure, int t, bool fullCtx) { + if (fullCtx) { + // this method is called after each time the input position advances + // during full context prediction + _llStopIndex = input.index; + } + + final reachConfigs = super.computeReachSet(closure, t, fullCtx); + if (fullCtx) { + decisions[currentDecision] + .LL_ATNTransitions++; // count computation even if error + if (reachConfigs != null) { + } else { + // no reach on current lookahead symbol. ERROR. + // TODO: does not handle delayed errors per getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule() + decisions[currentDecision].errors.add(ErrorInfo( + currentDecision, closure, input, startIndex, _llStopIndex, true)); + } + } else { + decisions[currentDecision].SLL_ATNTransitions++; + if (reachConfigs != null) { + } else { + // no reach on current lookahead symbol. ERROR. + decisions[currentDecision].errors.add(ErrorInfo( + currentDecision, closure, input, startIndex, _sllStopIndex, false)); + } + } + return reachConfigs; + } + + @override + bool evalSemanticContextOne(SemanticContext pred, + ParserRuleContext parserCallStack, int alt, bool fullCtx) { + final result = + super.evalSemanticContextOne(pred, parserCallStack, alt, fullCtx); + if (!(pred is PrecedencePredicate)) { + final fullContext = _llStopIndex >= 0; + final stopIndex = fullContext ? _llStopIndex : _sllStopIndex; + decisions[currentDecision].predicateEvals.add(PredicateEvalInfo( + currentDecision, + input, + startIndex, + stopIndex, + pred, + result, + alt, + fullCtx)); + } + + return result; + } + + @override + void reportAttemptingFullContext(DFA dfa, BitSet conflictingAlts, + ATNConfigSet configs, int startIndex, int stopIndex) { + if (conflictingAlts != null) { + conflictingAltResolvedBySLL = conflictingAlts.nextset(0); + } else { + conflictingAltResolvedBySLL = configs.alts.nextset(0); + } + decisions[currentDecision].LL_Fallback++; + super.reportAttemptingFullContext( + dfa, conflictingAlts, configs, startIndex, stopIndex); + } + + @override + void reportContextSensitivity(DFA dfa, int prediction, ATNConfigSet configs, + int startIndex, int stopIndex) { + if (prediction != conflictingAltResolvedBySLL) { + decisions[currentDecision].contextSensitivities.add( + ContextSensitivityInfo( + currentDecision, configs, input, startIndex, stopIndex)); + } + super.reportContextSensitivity( + dfa, prediction, configs, startIndex, stopIndex); + } + + @override + void reportAmbiguity(DFA dfa, DFAState D, int startIndex, int stopIndex, + bool exact, BitSet ambigAlts, ATNConfigSet configs) { + final prediction = + ambigAlts != null ? ambigAlts.nextset(0) : configs.alts.nextset(0); + if (configs.fullCtx && prediction != conflictingAltResolvedBySLL) { + // Even though this is an ambiguity we are reporting, we can + // still detect some context sensitivities. Both SLL and LL + // are showing a conflict, hence an ambiguity, but if they resolve + // to different minimum alternatives we have also identified a + // context sensitivity. + decisions[currentDecision].contextSensitivities.add( + ContextSensitivityInfo( + currentDecision, configs, input, startIndex, stopIndex)); + } + decisions[currentDecision].ambiguities.add(AmbiguityInfo(currentDecision, + configs, ambigAlts, input, startIndex, stopIndex, configs.fullCtx)); + super.reportAmbiguity( + dfa, D, startIndex, stopIndex, exact, ambigAlts, configs); + } + + // --------------------------------------------------------------------- + + List get decisionInfo { + return decisions; + } +} diff --git a/runtime/Dart/lib/src/atn/src/semantic_context.dart b/runtime/Dart/lib/src/atn/src/semantic_context.dart new file mode 100644 index 000000000..8ea747964 --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/semantic_context.dart @@ -0,0 +1,404 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'package:collection/collection.dart'; + +import '../../recognizer.dart'; +import '../../rule_context.dart'; +import '../../util/murmur_hash.dart'; + +/// A tree structure used to record the semantic context in which +/// an ATN configuration is valid. It's either a single predicate, +/// a conjunction {@code p1&&p2}, or a sum of products {@code p1||p2}. +/// +///

I have scoped the [AND], [OR], and [Predicate] subclasses of +/// [SemanticContext] within the scope of this outer class.

+abstract class SemanticContext { + /// The default [SemanticContext], which is semantically equivalent to + /// a predicate of the form {@code {true}?}. + static const SemanticContext NONE = Predicate(); + + const SemanticContext(); + + /// For context independent predicates, we evaluate them without a local + /// context (i.e., null context). That way, we can evaluate them without + /// having to create proper rule-specific context during prediction (as + /// opposed to the parser, which creates them naturally). In a practical + /// sense, this avoids a cast exception from RuleContext to myruleContext. + /// + ///

For context dependent predicates, we must pass in a local context so that + /// references such as $arg evaluate properly as _localctx.arg. We only + /// capture context dependent predicates in the context in which we begin + /// prediction, so we passed in the outer context here in case of context + /// dependent predicate evaluation.

+ bool eval(Recognizer parser, RuleContext parserCallStack); + + /// Evaluate the precedence predicates for the context and reduce the result. + /// + /// @param parser The parser instance. + /// @param parserCallStack + /// @return The simplified semantic context after precedence predicates are + /// evaluated, which will be one of the following values. + ///
    + ///
  • {@link #NONE}in if the predicate simplifies to [true] after + /// precedence predicates are evaluated.
  • + ///
  • nullin if the predicate simplifies to [false] after + /// precedence predicates are evaluated.
  • + ///
  • [this]in if the semantic context is not changed as a result of + /// precedence predicate evaluation.
  • + ///
  • A non-null [SemanticContext]in the new simplified + /// semantic context after precedence predicates are evaluated.
  • + ///
+ SemanticContext evalPrecedence(Recognizer parser, + RuleContext parserCallStack) { + return this; + } + + static SemanticContext and(SemanticContext a, SemanticContext b) { + if (a == null || a == NONE) return b; + if (b == null || b == NONE) return a; + final result = AND(a, b); + if (result.opnds.length == 1) { + return result.opnds[0]; + } + + return result; + } + + /// + /// @see ParserATNSimulator#getPredsForAmbigAlts + static SemanticContext or(SemanticContext a, SemanticContext b) { + if (a == null) return b; + if (b == null) return a; + if (a == NONE || b == NONE) return NONE; + final result = OR(a, b); + if (result.opnds.length == 1) { + return result.opnds[0]; + } + + return result; + } + + static Iterable filterPrecedencePredicates( + Iterable collection) { + return collection.whereType(); + } + + static Iterable filterNonPrecedencePredicates( + Iterable collection) { + return collection.where((e) => !(e is PrecedencePredicate)); + } +} + +class Predicate extends SemanticContext { + final int ruleIndex; + final int predIndex; + final bool isCtxDependent; // e.g., $i ref in pred + + const Predicate( + [this.ruleIndex = -1, this.predIndex = -1, this.isCtxDependent = false]); + + @override + bool eval(Recognizer parser, RuleContext parserCallStack) { + final localctx = isCtxDependent ? parserCallStack : null; + return parser.sempred(localctx, ruleIndex, predIndex); + } + + @override + int get hashCode { + var hashCode = MurmurHash.initialize(); + hashCode = MurmurHash.update(hashCode, ruleIndex); + hashCode = MurmurHash.update(hashCode, predIndex); + hashCode = MurmurHash.update(hashCode, isCtxDependent ? 1 : 0); + hashCode = MurmurHash.finish(hashCode, 3); + return hashCode; + } + + @override + bool operator ==(Object obj) { + return obj is Predicate && + ruleIndex == obj.ruleIndex && + predIndex == obj.predIndex && + isCtxDependent == obj.isCtxDependent; + } + + @override + String toString() { + return '{$ruleIndex:$predIndex}?'; + } +} + +class PrecedencePredicate extends SemanticContext + implements Comparable { + final int precedence; + + PrecedencePredicate([this.precedence = 0]); + + @override + bool eval(Recognizer parser, RuleContext parserCallStack) { + return parser.precpred(parserCallStack, precedence); + } + + @override + SemanticContext evalPrecedence(Recognizer parser, + RuleContext parserCallStack) { + if (parser.precpred(parserCallStack, precedence)) { + return SemanticContext.NONE; + } else { + return null; + } + } + + @override + int compareTo(PrecedencePredicate o) { + return precedence - o.precedence; + } + + @override + int get hashCode { + var hashCode = 1; + hashCode = 31 * hashCode + precedence; + return hashCode; + } + + @override + bool operator ==(Object obj) { + if (!(obj is PrecedencePredicate)) { + return false; + } + PrecedencePredicate other = obj; + return precedence == other.precedence; + } + +// precedence >= _precedenceStack.peek() + @override + String toString() { + return '{$precedence>=prec}?'; + } +} + +/// This is the base class for semantic context "operators", which operate on +/// a collection of semantic context "operands". +/// +/// @since 4.3 +abstract class Operator extends SemanticContext { + /// Gets the operands for the semantic context operator. + /// + /// @return a collection of [SemanticContext] operands for the + /// operator. + /// + /// @since 4.3 + List get operands; +} + +/// A semantic context which is true whenever none of the contained contexts +/// is false. + +class AND extends Operator { + List opnds; + + AND(SemanticContext a, SemanticContext b) { + var operands = {}; + if (a is AND) { + operands.addAll(a.opnds); + } else { + operands.add(a); + } + if (b is AND) { + operands.addAll(b.opnds); + } else { + operands.add(b); + } + + final precedencePredicates = + SemanticContext.filterPrecedencePredicates(operands); + + operands = SemanticContext.filterNonPrecedencePredicates(operands).toSet(); + if (precedencePredicates.isNotEmpty) { + // interested in the transition with the lowest precedence + final reduced = + precedencePredicates.reduce((a, b) => a.compareTo(b) <= 0 ? a : b); + operands.add(reduced); + } + + opnds = operands.toList(); + } + + @override + List get operands { + return opnds; + } + + @override + bool operator ==(Object obj) { + if (!(obj is AND)) return false; + AND other = obj; + return ListEquality().equals(opnds, other.opnds); + } + + @override + int get hashCode { + return MurmurHash.getHashCode(opnds, runtimeType.hashCode); + } + + /// {@inheritDoc} + /// + ///

+ /// The evaluation of predicates by this context is short-circuiting, but + /// unordered.

+ + @override + bool eval(Recognizer parser, RuleContext parserCallStack) { + for (var opnd in opnds) { + if (!opnd.eval(parser, parserCallStack)) return false; + } + return true; + } + + @override + SemanticContext evalPrecedence(Recognizer parser, + RuleContext parserCallStack) { + var differs = false; + final operands = []; + for (var context in opnds) { + final evaluated = + context.evalPrecedence(parser, parserCallStack); + differs |= (evaluated != context); + if (evaluated == null) { + // The AND context is false if any element is false + return null; + } else if (evaluated != SemanticContext.NONE) { + // Reduce the result by skipping true elements + operands.add(evaluated); + } + } + + if (!differs) { + return this; + } + + if (operands.isEmpty) { + // all elements were true, so the AND context is true + return SemanticContext.NONE; + } + + var result = operands[0]; + for (var i = 1; i < operands.length; i++) { + result = SemanticContext.and(result, operands[i]); + } + + return result; + } + + @override + String toString() { + return opnds.join('&&'); + } +} + +/// A semantic context which is true whenever at least one of the contained +/// contexts is true. +class OR extends Operator { + List opnds; + + OR(SemanticContext a, SemanticContext b) { + var operands = {}; + if (a is OR) { + operands.addAll(a.opnds); + } else { + operands.add(a); + } + if (b is OR) { + operands.addAll(b.opnds); + } else { + operands.add(b); + } + + final precedencePredicates = + SemanticContext.filterPrecedencePredicates(operands); + + operands = SemanticContext.filterNonPrecedencePredicates(operands).toSet(); + if (precedencePredicates.isNotEmpty) { + // interested in the transition with the highest precedence + final reduced = + precedencePredicates.reduce((a, b) => a.compareTo(b) >= 0 ? a : b); + operands.add(reduced); + } + + opnds = operands.toList(); + } + + @override + List get operands { + return opnds; + } + + @override + bool operator ==(Object obj) { + if (!(obj is OR)) return false; + OR other = obj; + return ListEquality().equals(opnds, other.opnds); + } + + @override + int get hashCode { + return MurmurHash.getHashCode(opnds, runtimeType.hashCode); + } + + /// {@inheritDoc} + /// + ///

+ /// The evaluation of predicates by this context is short-circuiting, but + /// unordered.

+ + @override + bool eval(Recognizer parser, RuleContext parserCallStack) { + for (var opnd in opnds) { + if (opnd.eval(parser, parserCallStack)) return true; + } + return false; + } + + @override + SemanticContext evalPrecedence(Recognizer parser, + RuleContext parserCallStack) { + var differs = false; + final operands = []; + for (var context in opnds) { + final evaluated = + context.evalPrecedence(parser, parserCallStack); + differs |= (evaluated != context); + if (evaluated == SemanticContext.NONE) { + // The OR context is true if any element is true + return SemanticContext.NONE; + } else if (evaluated != null) { + // Reduce the result by skipping false elements + operands.add(evaluated); + } + } + + if (!differs) { + return this; + } + + if (operands.isEmpty) { + // all elements were false, so the OR context is false + return null; + } + + var result = operands[0]; + for (var i = 1; i < operands.length; i++) { + result = SemanticContext.or(result, operands[i]); + } + + return result; + } + + @override + String toString() { + return opnds.join('||'); + } +} diff --git a/runtime/Dart/lib/src/atn/src/transition.dart b/runtime/Dart/lib/src/atn/src/transition.dart new file mode 100644 index 000000000..06d873844 --- /dev/null +++ b/runtime/Dart/lib/src/atn/src/transition.dart @@ -0,0 +1,305 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import '../../interval_set.dart'; +import '../../token.dart'; +import 'atn_state.dart'; +import 'semantic_context.dart'; + +enum TransitionType { + INVALID, // 0 is not used + EPSILON, + RANGE, + RULE, + PREDICATE, // e.g., {isType(input.LT(1))}? + ATOM, + ACTION, + SET, // ~(A|B) or ~atom, wildcard, which convert to next 2 + NOT_SET, + WILDCARD, + PRECEDENCE, +} + +/// An ATN transition between any two ATN states. Subclasses define +/// atom, set, epsilon, action, predicate, rule transitions. +/// +///

This is a one way link. It emanates from a state (usually via a list of +/// transitions) and has a target state.

+/// +///

Since we never have to change the ATN transitions once we construct it, +/// we can fix these transitions as specific classes. The DFA transitions +/// on the other hand need to update the labels as it adds transitions to +/// the states. We'll use the term Edge for the DFA to distinguish them from +/// ATN transitions.

+abstract class Transition { + /// The target of this transition. */ + ATNState target; + + Transition(this.target) { + if (target == null) { + throw ArgumentError.notNull('target cannot be null.'); + } + } + + TransitionType get type; + + /// Determines if the transition is an "epsilon" transition. + /// + ///

The default implementation returns [false].

+ /// + /// @return [true] if traversing this transition in the ATN does not + /// consume an input symbol; otherwise, [false] if traversing this + /// transition consumes (matches) an input symbol. + bool get isEpsilon => false; + + IntervalSet get label => null; + + bool matches(int symbol, int minVocabSymbol, int maxVocabSymbol); +} + +class EpsilonTransition extends Transition { + /// @return the rule index of a precedence rule for which this transition is + /// returning from, where the precedence value is 0; otherwise, -1. + /// + /// @see ATNConfig#isPrecedenceFilterSuppressed() + /// @see ParserATNSimulator#applyPrecedenceFilter(ATNConfigSet) + /// @since 4.4.1 + final int outermostPrecedenceReturn; + + EpsilonTransition(ATNState target, [this.outermostPrecedenceReturn = -1]) + : super(target); + + @override + bool get isEpsilon => true; + + @override + bool matches(int symbol, int minVocabSymbol, int maxVocabSymbol) { + return false; + } + + @override + String toString() { + return 'epsilon'; + } + + @override + TransitionType get type => TransitionType.EPSILON; +} + +class RangeTransition extends Transition { + final int from; + final int to; + + RangeTransition(ATNState target, this.from, this.to) : super(target); + + @override + IntervalSet get label { + return IntervalSet.ofRange(from, to); + } + + @override + bool matches(int symbol, int minVocabSymbol, int maxVocabSymbol) { + return symbol >= from && symbol <= to; + } + + @override + String toString() { + return "'$from..$to'"; + } + + @override + TransitionType get type => TransitionType.RANGE; +} + +class RuleTransition extends Transition { + /// Ptr to the rule definition object for this rule ref */ + final int ruleIndex; // no Rule object at runtime + + final int precedence; + + /// What node to begin computations following ref to rule */ + ATNState followState; + + RuleTransition(RuleStartState ruleStart, this.ruleIndex, this.precedence, + this.followState) + : super(ruleStart); + + @override + bool get isEpsilon => true; + + @override + bool matches(int symbol, int minVocabSymbol, int maxVocabSymbol) { + return false; + } + + @override + TransitionType get type => TransitionType.RULE; +} + +abstract class AbstractPredicateTransition extends Transition { + AbstractPredicateTransition(ATNState target) : super(target); +} + +class PredicateTransition extends AbstractPredicateTransition { + final int ruleIndex; + final int predIndex; + final bool isCtxDependent; // e.g., $i ref in pred + + PredicateTransition( + target, this.ruleIndex, this.predIndex, this.isCtxDependent) + : super(target); + + @override + bool get isEpsilon => true; + + @override + bool matches(symbol, minVocabSymbol, maxVocabSymbol) { + return false; + } + + Predicate get predicate => Predicate(ruleIndex, predIndex, isCtxDependent); + + + @override + String toString() { + return 'pred_$ruleIndex:$predIndex'; + } + + @override + TransitionType get type => TransitionType.PREDICATE; +} + +/// TODO: make all transitions sets? no, should remove set edges */ +class AtomTransition extends Transition { + /// The token type or character value; or, signifies special label. */ + final int atomLabel; + + AtomTransition(ATNState target, this.atomLabel) : super(target); + + @override + IntervalSet get label { + return IntervalSet.ofOne(atomLabel); + } + + @override + bool matches(int symbol, int minVocabSymbol, int maxVocabSymbol) { + return atomLabel == symbol; + } + + @override + String toString() { + return label.toString(); + } + + @override + TransitionType get type => TransitionType.ATOM; +} + +class ActionTransition extends Transition { + final int ruleIndex; + final int actionIndex; + final bool isCtxDependent; // e.g., $i ref in pred + + ActionTransition(target, this.ruleIndex, + [this.actionIndex = -1, this.isCtxDependent = false]) + : super(target); + + @override + bool get isEpsilon => + true; // we are to be ignored by analysis 'cept for predicates + + @override + bool matches(symbol, minVocabSymbol, maxVocabSymbol) => false; + + @override + String toString() { + return 'action_$ruleIndex:$actionIndex'; + } + + @override + TransitionType get type => TransitionType.ACTION; +} + +// A transition containing a set of values. +class SetTransition extends Transition { + @override + IntervalSet label; + + SetTransition(ATNState target, [IntervalSet st]) : super(target) { + label = st ?? IntervalSet.ofOne(Token.INVALID_TYPE); + } + + @override + bool matches(symbol, minVocabSymbol, maxVocabSymbol) { + return label.contains(symbol); + } + + @override + String toString() { + return label.toString(); + } + + @override + TransitionType get type => TransitionType.SET; +} + +class NotSetTransition extends SetTransition { + NotSetTransition(target, st) : super(target, st); + + @override + bool matches(symbol, minVocabSymbol, maxVocabSymbol) { + return symbol >= minVocabSymbol && + symbol <= maxVocabSymbol && + !super.matches(symbol, minVocabSymbol, maxVocabSymbol); + } + + @override + String toString() { + return '~' + super.toString(); + } + + @override + TransitionType get type => TransitionType.NOT_SET; +} + +class WildcardTransition extends Transition { + WildcardTransition(target) : super(target); + + @override + bool matches(symbol, minVocabSymbol, maxVocabSymbol) { + return symbol >= minVocabSymbol && symbol <= maxVocabSymbol; + } + + @override + String toString() { + return '.'; + } + + @override + TransitionType get type => TransitionType.WILDCARD; +} + +class PrecedencePredicateTransition extends AbstractPredicateTransition { + final int precedence; + + PrecedencePredicateTransition(target, this.precedence) : super(target); + + @override + bool get isEpsilon => true; + + @override + bool matches(symbol, minVocabSymbol, maxVocabSymbol) => false; + + PrecedencePredicate get predicate { + return PrecedencePredicate(precedence); + } + + @override + String toString() => '$precedence >= _p'; + + @override + TransitionType get type => TransitionType.PRECEDENCE; +} diff --git a/runtime/Dart/lib/src/dfa/dfa.dart b/runtime/Dart/lib/src/dfa/dfa.dart new file mode 100644 index 000000000..552ddef7d --- /dev/null +++ b/runtime/Dart/lib/src/dfa/dfa.dart @@ -0,0 +1,8 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +export 'src/dfa.dart'; +export 'src/dfa_state.dart'; diff --git a/runtime/Dart/lib/src/dfa/src/dfa.dart b/runtime/Dart/lib/src/dfa/src/dfa.dart new file mode 100644 index 000000000..30aa2af36 --- /dev/null +++ b/runtime/Dart/lib/src/dfa/src/dfa.dart @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import '../../vocabulary.dart'; +import '../../atn/atn.dart'; +import 'dfa_serializer.dart'; +import 'dfa_state.dart'; + +class DFA { + /// A set of all DFA states. Use [Map] so we can get old state back + /// ([Set] only allows you to see if it's there). + + Map states = {}; + + DFAState s0; + + final int decision; + + /// From which ATN state did we create this DFA? */ + + DecisionState atnStartState; + + /// [true] if this DFA is for a precedence decision; otherwise, + /// [false]. This is the backing field for {@link #isPrecedenceDfa}. + bool precedenceDfa; + + DFA(this.atnStartState, [this.decision]) { + var precedenceDfa = false; + if (atnStartState is StarLoopEntryState) { + if ((atnStartState as StarLoopEntryState).isPrecedenceDecision) { + precedenceDfa = true; + final precedenceState = DFAState(configs: ATNConfigSet()); + precedenceState.edges = []; + precedenceState.isAcceptState = false; + precedenceState.requiresFullContext = false; + s0 = precedenceState; + } + } + + this.precedenceDfa = precedenceDfa; + } + + /// Gets whether this DFA is a precedence DFA. Precedence DFAs use a special + /// start state {@link #s0} which is not stored in {@link #states}. The + /// {@link DFAState#edges} array for this start state contains outgoing edges + /// supplying individual start states corresponding to specific precedence + /// values. + /// + /// @return [true] if this is a precedence DFA; otherwise, + /// [false]. + /// @see Parser#getPrecedence() + bool isPrecedenceDfa() { + return precedenceDfa; + } + + /// Get the start state for a specific precedence value. + /// + /// @param precedence The current precedence. + /// @return The start state corresponding to the specified precedence, or + /// null if no start state exists for the specified precedence. + /// + /// @throws IllegalStateException if this is not a precedence DFA. + /// @see #isPrecedenceDfa() + DFAState getPrecedenceStartState(int precedence) { + if (!isPrecedenceDfa()) { + throw StateError( + 'Only precedence DFAs may contain a precedence start state.'); + } + + // s0.edges is never null for a precedence DFA + if (precedence < 0 || precedence >= s0.edges.length) { + return null; + } + + return s0.edges[precedence]; + } + + /// Set the start state for a specific precedence value. + /// + /// @param precedence The current precedence. + /// @param startState The start state corresponding to the specified + /// precedence. + /// + /// @throws IllegalStateException if this is not a precedence DFA. + /// @see #isPrecedenceDfa() + void setPrecedenceStartState(int precedence, DFAState startState) { + if (!isPrecedenceDfa()) { + throw StateError( + 'Only precedence DFAs may contain a precedence start state.'); + } + + if (precedence < 0) { + return; + } + + // synchronization on s0 here is ok. when the DFA is turned into a + // precedence DFA, s0 will be initialized once and not updated again + // s0.edges is never null for a precedence DFA + if (precedence >= s0.edges.length) { + final original = s0.edges; + s0.edges = List(precedence + 1); + List.copyRange(s0.edges, 0, original); + } + + s0.edges[precedence] = startState; + } + + /// Return a list of all states in this DFA, ordered by state number. + + List getStates() { + final result = states.keys.toList(); + result.sort((DFAState o1, DFAState o2) { + return o1.stateNumber - o2.stateNumber; + }); + + return result; + } + + @override + String toString([Vocabulary vocabulary]) { + vocabulary = vocabulary ?? VocabularyImpl.EMPTY_VOCABULARY; + if (s0 == null) { + return ''; + } + + final serializer = DFASerializer(this, vocabulary); + return serializer.toString(); + } + + String toLexerString() { + if (s0 == null) return ''; + DFASerializer serializer = LexerDFASerializer(this); + return serializer.toString(); + } +} diff --git a/runtime/Dart/lib/src/dfa/src/dfa_serializer.dart b/runtime/Dart/lib/src/dfa/src/dfa_serializer.dart new file mode 100644 index 000000000..7ffb42bbb --- /dev/null +++ b/runtime/Dart/lib/src/dfa/src/dfa_serializer.dart @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import '../../util/utils.dart'; +import '../../vocabulary.dart'; +import 'dfa.dart'; +import 'dfa_state.dart'; + +/// A DFA walker that knows how to dump them to serialized strings. */ +class DFASerializer { + final DFA dfa; + + final Vocabulary vocabulary; + + DFASerializer(this.dfa, this.vocabulary); + + @override + String toString() { + if (dfa.s0 == null) return null; + final buf = StringBuffer(); + final states = dfa.getStates(); + for (var s in states) { + var n = 0; + if (s.edges != null) n = s.edges.length; + for (var i = 0; i < n; i++) { + final t = s.edges[i]; + if (t != null && t.stateNumber != 0x7FFFFFFF) { + buf.write(getStateString(s)); + final label = getEdgeLabel(i); + buf.write('-'); + buf.write(label); + buf.write('->'); + buf.write(getStateString(t)); + buf.write('\n'); + } + } + } + + final output = buf.toString(); + if (output.isEmpty) return null; + //return Utils.sortLinesInString(output); + return output; + } + + String getEdgeLabel(int i) { + return vocabulary.getDisplayName(i - 1); + } + + String getStateString(DFAState s) { + final n = s.stateNumber; + final baseStateStr = (s.isAcceptState ? ':' : '') + + 's$n' + + (s.requiresFullContext ? '^' : ''); + if (s.isAcceptState) { + if (s.predicates != null) { + return baseStateStr + '=>${arrayToString(s.predicates)}'; + } else { + return baseStateStr + '=>${s.prediction}'; + } + } else { + return baseStateStr; + } + } +} + +class LexerDFASerializer extends DFASerializer { + LexerDFASerializer(dfa) : super(dfa, VocabularyImpl.EMPTY_VOCABULARY); + + @override + String getEdgeLabel(i) { + return "'" + String.fromCharCode(i) + "'"; + } +} diff --git a/runtime/Dart/lib/src/dfa/src/dfa_state.dart b/runtime/Dart/lib/src/dfa/src/dfa_state.dart new file mode 100644 index 000000000..17a2f4f7d --- /dev/null +++ b/runtime/Dart/lib/src/dfa/src/dfa_state.dart @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import '../../atn/atn.dart'; +import '../../util/murmur_hash.dart'; +import '../../util/utils.dart'; + +/// Map a predicate to a predicted alternative. */ +class PredPrediction { + final alt; + final pred; + + PredPrediction(this.pred, this.alt); + + @override + String toString() { + return '($pred, $alt)'; + } +} + +/// A DFA state represents a set of possible ATN configurations. +/// As Aho, Sethi, Ullman p. 117 says "The DFA uses its state +/// to keep track of all possible states the ATN can be in after +/// reading each input symbol. That is to say, after reading +/// input a1a2..an, the DFA is in a state that represents the +/// subset T of the states of the ATN that are reachable from the +/// ATN's start state along some path labeled a1a2..an." +/// In conventional NFA→DFA conversion, therefore, the subset T +/// would be a bitset representing the set of states the +/// ATN could be in. We need to track the alt predicted by each +/// state as well, however. More importantly, we need to maintain +/// a stack of states, tracking the closure operations as they +/// jump from rule to rule, emulating rule invocations (method calls). +/// I have to add a stack to simulate the proper lookahead sequences for +/// the underlying LL grammar from which the ATN was derived. +/// +///

I use a set of ATNConfig objects not simple states. An ATNConfig +/// is both a state (ala normal conversion) and a RuleContext describing +/// the chain of rules (if any) followed to arrive at that state.

+/// +///

A DFA state may have multiple references to a particular state, +/// but with different ATN contexts (with same or different alts) +/// meaning that state was reached via a different set of rule invocations.

+class DFAState { + int stateNumber = -1; + + ATNConfigSet configs = ATNConfigSet(); + + /// {@code edges[symbol]} points to target of symbol. Shift up by 1 so (-1) + /// {@link Token#EOF} maps to {@code edges[0]}. + + List edges; + + bool isAcceptState = false; + + /// if accept state, what ttype do we match or alt do we predict? + /// This is set to {@link ATN#INVALID_ALT_NUMBER} when {@link #predicates}{@code !=null} or + /// {@link #requiresFullContext}. + int prediction = 0; + + LexerActionExecutor lexerActionExecutor; + + /// Indicates that this state was created during SLL prediction that + /// discovered a conflict between the configurations in the state. Future + /// {@link ParserATNSimulator#execATN} invocations immediately jumped doing + /// full context prediction if this field is true. + bool requiresFullContext = false; + + /// During SLL parsing, this is a list of predicates associated with the + /// ATN configurations of the DFA state. When we have predicates, + /// {@link #requiresFullContext} is [false] since full context prediction evaluates predicates + /// on-the-fly. If this is not null, then {@link #prediction} is + /// {@link ATN#INVALID_ALT_NUMBER}. + /// + ///

We only use these for non-{@link #requiresFullContext} but conflicting states. That + /// means we know from the context (it's $ or we don't dip into outer + /// context) that it's an ambiguity not a conflict.

+ /// + ///

This list is computed by {@link ParserATNSimulator#predicateDFAState}.

+ + List predicates; + + DFAState({this.stateNumber, this.configs}); + + /// Get the set of all alts mentioned by all ATN configurations in this + /// DFA state. + Set get altSet { + final alts = {}; + if (configs != null) { + for (var c in configs) { + alts.add(c.alt); + } + } + if (alts.isEmpty) return null; + return alts; + } + + @override + int get hashCode { + var hash = MurmurHash.initialize(7); + hash = MurmurHash.update(hash, configs.hashCode); + hash = MurmurHash.finish(hash, 1); + return hash; + } + + /// Two [DFAState] instances are equal if their ATN configuration sets + /// are the same. This method is used to see if a state already exists. + /// + ///

Because the number of alternatives and number of ATN configurations are + /// finite, there is a finite number of DFA states that can be processed. + /// This is necessary to show that the algorithm terminates.

+ /// + ///

Cannot test the DFA state numbers here because in + /// {@link ParserATNSimulator#addDFAState} we need to know if any other state + /// exists that has this exact set of ATN configurations. The + /// {@link #stateNumber} is irrelevant.

+ + @override + bool operator ==(Object o) { + // compare set of ATN configurations in this set with other + if (identical(this, o)) return true; + + if (!(o is DFAState)) { + return false; + } + + DFAState other = o; + // TODO (sam): what to do when configs==null? + final sameSet = configs == other.configs; +// System.out.println("DFAState.equals: "+configs+(sameSet?"==":"!=")+other.configs); + return sameSet; + } + + @override + String toString() { + final buf = StringBuffer(); + buf.write('$stateNumber:$configs'); + if (isAcceptState) { + buf.write('=>'); + if (predicates != null) { + buf.write(arrayToString(predicates)); + } else { + buf.write(prediction); + } + } + return buf.toString(); + } +} diff --git a/runtime/Dart/lib/src/error/error.dart b/runtime/Dart/lib/src/error/error.dart new file mode 100644 index 000000000..8b96c26b9 --- /dev/null +++ b/runtime/Dart/lib/src/error/error.dart @@ -0,0 +1,10 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +export 'src/diagnostic_error_listener.dart'; +export 'src/error_listener.dart'; +export 'src/error_strategy.dart'; +export 'src/errors.dart'; diff --git a/runtime/Dart/lib/src/error/src/diagnostic_error_listener.dart b/runtime/Dart/lib/src/error/src/diagnostic_error_listener.dart new file mode 100644 index 000000000..7ea39ab9f --- /dev/null +++ b/runtime/Dart/lib/src/error/src/diagnostic_error_listener.dart @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import '../../atn/atn.dart'; +import '../../dfa/dfa.dart'; +import '../../interval_set.dart'; +import '../../parser.dart'; +import '../../util/bit_set.dart'; +import 'error_listener.dart'; + +/// This implementation of [ANTLRErrorListener] can be used to identify +/// certain potential correctness and performance problems in grammars. "Reports" +/// are made by calling {@link Parser#notifyErrorListeners} with the appropriate +/// message. +/// +///
    +///
  • Ambiguities: These are cases where more than one path through the +/// grammar can match the input.
  • +///
  • Weak context sensitivity: These are cases where full-context +/// prediction resolved an SLL conflict to a unique alternative which equaled the +/// minimum alternative of the SLL conflict.
  • +///
  • Strong (forced) context sensitivity: These are cases where the +/// full-context prediction resolved an SLL conflict to a unique alternative, +/// and the minimum alternative of the SLL conflict was found to not be +/// a truly viable alternative. Two-stage parsing cannot be used for inputs where +/// this situation occurs.
  • +///
+class DiagnosticErrorListener extends BaseErrorListener { + /// When [true], only exactly known ambiguities are reported. + final bool exactOnly; + + /// Initializes a new instance of [DiagnosticErrorListener], specifying + /// whether all ambiguities or only exact ambiguities are reported. + /// + /// @param exactOnly [true] to report only exact ambiguities, otherwise + /// [false] to report all ambiguities. + DiagnosticErrorListener([this.exactOnly = true]); + + @override + void reportAmbiguity(Parser recognizer, DFA dfa, int startIndex, + int stopIndex, bool exact, BitSet ambigAlts, ATNConfigSet configs) { + if (exactOnly && !exact) { + return; + } + + final decision = getDecisionDescription(recognizer, dfa); + final conflictingAlts = getConflictingAlts(ambigAlts, configs); + final text = + recognizer.tokenStream.getText(Interval.of(startIndex, stopIndex)); + final message = + "reportAmbiguity d=$decision: ambigAlts=$conflictingAlts, input='$text'"; + recognizer.notifyErrorListeners(message); + } + + @override + void reportAttemptingFullContext(Parser recognizer, DFA dfa, int startIndex, + int stopIndex, BitSet conflictingAlts, ATNConfigSet configs) { + final decision = getDecisionDescription(recognizer, dfa); + final text = + recognizer.tokenStream.getText(Interval.of(startIndex, stopIndex)); + final message = "reportAttemptingFullContext d=$decision, input='$text'"; + recognizer.notifyErrorListeners(message); + } + + @override + void reportContextSensitivity(Parser recognizer, DFA dfa, int startIndex, + int stopIndex, int prediction, ATNConfigSet configs) { + final decision = getDecisionDescription(recognizer, dfa); + final text = + recognizer.tokenStream.getText(Interval.of(startIndex, stopIndex)); + final message = "reportContextSensitivity d=$decision, input='$text'"; + recognizer.notifyErrorListeners(message); + } + + String getDecisionDescription(Parser recognizer, DFA dfa) { + final decision = dfa.decision; + final ruleIndex = dfa.atnStartState.ruleIndex; + + final ruleNames = recognizer.ruleNames; + if (ruleIndex < 0 || ruleIndex >= ruleNames.length) { + return decision.toString(); + } + + final ruleName = ruleNames[ruleIndex]; + if (ruleName == null || ruleName.isEmpty) { + return decision.toString(); + } + + return '$decision ($ruleName)'; + } + + /// Computes the set of conflicting or ambiguous alternatives from a + /// configuration set, if that information was not already provided by the + /// parser. + /// + /// @param reportedAlts The set of conflicting or ambiguous alternatives, as + /// reported by the parser. + /// @param configs The conflicting or ambiguous configuration set. + /// @return Returns [reportedAlts] if it is not null, otherwise + /// returns the set of alternatives represented in [configs]. + BitSet getConflictingAlts(BitSet reportedAlts, ATNConfigSet configs) { + if (reportedAlts != null) { + return reportedAlts; + } + + final result = BitSet(); + for (var config in configs) { + result.set(config.alt); + } + + return result; + } +} diff --git a/runtime/Dart/lib/src/error/src/error_listener.dart b/runtime/Dart/lib/src/error/src/error_listener.dart new file mode 100644 index 000000000..e2458434a --- /dev/null +++ b/runtime/Dart/lib/src/error/src/error_listener.dart @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:io'; + +import '../../atn/atn.dart'; +import '../../dfa/dfa.dart'; +import '../../parser.dart'; +import '../../recognizer.dart'; +import '../../util/bit_set.dart'; +import 'errors.dart'; + +abstract class ErrorListener { + /// Upon syntax error, notify any interested parties. This is not how to + /// recover from errors or compute error messages. [ANTLRErrorStrategy] + /// specifies how to recover from syntax errors and how to compute error + /// messages. This listener's job is simply to emit a computed message, + /// though it has enough information to create its own message in many cases. + /// + ///

The [RecognitionException] is non-null for all syntax errors except + /// when we discover mismatched token errors that we can recover from + /// in-line, without returning from the surrounding rule (via the single + /// token insertion and deletion mechanism).

+ /// + /// @param recognizer + /// What parser got the error. From this + /// object, you can access the context as well + /// as the input stream. + /// @param offendingSymbol + /// The offending token in the input token + /// stream, unless recognizer is a lexer (then it's null). If + /// no viable alternative error, [e] has token at which we + /// started production for the decision. + /// @param line + /// The line number in the input where the error occurred. + /// @param charPositionInLine + /// The character position within that line where the error occurred. + /// @param msg + /// The message to emit. + /// @param e + /// The exception generated by the parser that led to + /// the reporting of an error. It is null in the case where + /// the parser was able to recover in line without exiting the + /// surrounding rule. + void syntaxError(Recognizer recognizer, Object offendingSymbol, int line, + int charPositionInLine, String msg, RecognitionException e); + + /// This method is called by the parser when a full-context prediction + /// results in an ambiguity. + /// + ///

Each full-context prediction which does not result in a syntax error + /// will call either {@link #reportContextSensitivity} or + /// {@link #reportAmbiguity}.

+ /// + ///

When [ambigAlts] is not null, it contains the set of potentially + /// viable alternatives identified by the prediction algorithm. When + /// [ambigAlts] is null, use {@link ATNConfigSet#getAlts} to obtain the + /// represented alternatives from the [configs] argument.

+ /// + ///

When [exact] is [true], all of the potentially + /// viable alternatives are truly viable, i.e. this is reporting an exact + /// ambiguity. When [exact] is [false], at least two of + /// the potentially viable alternatives are viable for the current input, but + /// the prediction algorithm terminated as soon as it determined that at + /// least the minimum potentially viable alternative is truly + /// viable.

+ /// + ///

When the {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} prediction + /// mode is used, the parser is required to identify exact ambiguities so + /// [exact] will always be [true].

+ /// + ///

This method is not used by lexers.

+ /// + /// @param recognizer the parser instance + /// @param dfa the DFA for the current decision + /// @param startIndex the input index where the decision started + /// @param stopIndex the input input where the ambiguity was identified + /// @param exact [true] if the ambiguity is exactly known, otherwise + /// [false]. This is always [true] when + /// {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} is used. + /// @param ambigAlts the potentially ambiguous alternatives, or null + /// to indicate that the potentially ambiguous alternatives are the complete + /// set of represented alternatives in [configs] + /// @param configs the ATN configuration set where the ambiguity was + /// identified + void reportAmbiguity(Parser recognizer, DFA dfa, int startIndex, + int stopIndex, bool exact, BitSet ambigAlts, ATNConfigSet configs); + + /// This method is called when an SLL conflict occurs and the parser is about + /// to use the full context information to make an LL decision. + /// + ///

If one or more configurations in [configs] contains a semantic + /// predicate, the predicates are evaluated before this method is called. The + /// subset of alternatives which are still viable after predicates are + /// evaluated is reported in [conflictingAlts].

+ /// + ///

This method is not used by lexers.

+ /// + /// @param recognizer the parser instance + /// @param dfa the DFA for the current decision + /// @param startIndex the input index where the decision started + /// @param stopIndex the input index where the SLL conflict occurred + /// @param conflictingAlts The specific conflicting alternatives. If this is + /// null, the conflicting alternatives are all alternatives + /// represented in [configs]. At the moment, conflictingAlts is non-null + /// (for the reference implementation, but Sam's optimized version can see this + /// as null). + /// @param configs the ATN configuration set where the SLL conflict was + /// detected + void reportAttemptingFullContext(Parser recognizer, DFA dfa, int startIndex, + int stopIndex, BitSet conflictingAlts, ATNConfigSet configs); + + /// This method is called by the parser when a full-context prediction has a + /// unique result. + /// + ///

Each full-context prediction which does not result in a syntax error + /// will call either {@link #reportContextSensitivity} or + /// {@link #reportAmbiguity}.

+ /// + ///

For prediction implementations that only evaluate full-context + /// predictions when an SLL conflict is found (including the default + /// [ParserATNSimulator] implementation), this method reports cases + /// where SLL conflicts were resolved to unique full-context predictions, + /// i.e. the decision was context-sensitive. This report does not necessarily + /// indicate a problem, and it may appear even in completely unambiguous + /// grammars.

+ /// + ///

[configs] may have more than one represented alternative if the + /// full-context prediction algorithm does not evaluate predicates before + /// beginning the full-context prediction. In all cases, the final prediction + /// is passed as the [prediction] argument.

+ /// + ///

Note that the definition of "context sensitivity" in this method + /// differs from the concept in {@link DecisionInfo#contextSensitivities}. + /// This method reports all instances where an SLL conflict occurred but LL + /// parsing produced a unique result, whether or not that unique result + /// matches the minimum alternative in the SLL conflicting set.

+ /// + ///

This method is not used by lexers.

+ /// + /// @param recognizer the parser instance + /// @param dfa the DFA for the current decision + /// @param startIndex the input index where the decision started + /// @param stopIndex the input index where the context sensitivity was + /// finally determined + /// @param prediction the unambiguous result of the full-context prediction + /// @param configs the ATN configuration set where the unambiguous prediction + /// was determined + void reportContextSensitivity(Parser recognizer, DFA dfa, int startIndex, + int stopIndex, int prediction, ATNConfigSet configs); +} + +class BaseErrorListener extends ErrorListener { + @override + void reportAmbiguity(Parser recognizer, DFA dfa, int startIndex, + int stopIndex, bool exact, BitSet ambigAlts, ATNConfigSet configs) {} + + @override + void reportAttemptingFullContext(Parser recognizer, DFA dfa, int startIndex, + int stopIndex, BitSet conflictingAlts, ATNConfigSet configs) {} + + @override + void reportContextSensitivity(Parser recognizer, DFA dfa, int startIndex, + int stopIndex, int prediction, ATNConfigSet configs) {} + + @override + void syntaxError(Recognizer recognizer, Object offendingSymbol, + int line, int charPositionInLine, String msg, RecognitionException e) {} +} + +class ConsoleErrorListener extends BaseErrorListener { + /// Provides a default instance of [ConsoleErrorListener]. + static final INSTANCE = ConsoleErrorListener(); + + /// {@inheritDoc} + /// + ///

+ /// This implementation prints messages to {@link System//err} containing the + /// values of [line], [charPositionInLine], and [msg] using + /// the following format.

+ /// + ///
+  /// line line:charPositionInLine msg
+  /// 
+ @override + void syntaxError(recognizer, offendingSymbol, line, column, msg, e) { + stderr.writeln('line $line:$column $msg'); + } +} + +/// This implementation of [ErrorListener] dispatches all calls to a +/// collection of delegate listeners. This reduces the effort required to support multiple +/// listeners. +class ProxyErrorListener implements ErrorListener { + final List delegates; + + ProxyErrorListener(this.delegates) { + if (delegates == null) { + throw ArgumentError.notNull('delegates'); + } + } + + @override + void syntaxError(Recognizer recognizer, Object offendingSymbol, int line, + int charPositionInLine, String msg, RecognitionException e) { + for (final listener in delegates) { + listener.syntaxError( + recognizer, offendingSymbol, line, charPositionInLine, msg, e); + } + } + + @override + void reportAmbiguity(Parser recognizer, DFA dfa, int startIndex, + int stopIndex, bool exact, BitSet ambigAlts, ATNConfigSet configs) { + for (final listener in delegates) { + listener.reportAmbiguity( + recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs); + } + } + + @override + void reportAttemptingFullContext(Parser recognizer, DFA dfa, int startIndex, + int stopIndex, BitSet conflictingAlts, ATNConfigSet configs) { + for (final listener in delegates) { + listener.reportAttemptingFullContext( + recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs); + } + } + + @override + void reportContextSensitivity(Parser recognizer, DFA dfa, int startIndex, + int stopIndex, int prediction, ATNConfigSet configs) { + for (final listener in delegates) { + listener.reportContextSensitivity( + recognizer, dfa, startIndex, stopIndex, prediction, configs); + } + } +} diff --git a/runtime/Dart/lib/src/error/src/error_strategy.dart b/runtime/Dart/lib/src/error/src/error_strategy.dart new file mode 100644 index 000000000..c5d02ef20 --- /dev/null +++ b/runtime/Dart/lib/src/error/src/error_strategy.dart @@ -0,0 +1,902 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:developer'; + +import 'package:logging/logging.dart'; + +import '../../atn/atn.dart'; +import '../../interval_set.dart'; +import '../../misc/pair.dart'; +import '../../parser.dart'; +import '../../parser_rule_context.dart'; +import '../../rule_context.dart'; +import '../../token.dart'; +import '../../tree/tree.dart'; +import 'errors.dart'; + +/// The interface for defining strategies to deal with syntax errors encountered +/// during a parse by ANTLR-generated parsers. We distinguish between three +/// different kinds of errors: +/// +///
    +///
  • The parser could not figure out which path to take in the ATN (none of +/// the available alternatives could possibly match)
  • +///
  • The current input does not match what we were looking for
  • +///
  • A predicate evaluated to false
  • +///
+/// +/// Implementations of this interface report syntax errors by calling +/// {@link Parser#notifyErrorListeners}. +/// +///

TODO: what to do about lexers

+abstract class ErrorStrategy { + /// Reset the error handler state for the specified [recognizer]. + /// @param recognizer the parser instance + void reset(Parser recognizer); + + /// This method is called when an unexpected symbol is encountered during an + /// inline match operation, such as {@link Parser#match}. If the error + /// strategy successfully recovers from the match failure, this method + /// returns the [Token] instance which should be treated as the + /// successful result of the match. + /// + ///

This method handles the consumption of any tokens - the caller should + /// not call {@link Parser#consume} after a successful recovery.

+ /// + ///

Note that the calling code will not report an error if this method + /// returns successfully. The error strategy implementation is responsible + /// for calling {@link Parser#notifyErrorListeners} as appropriate.

+ /// + /// @param recognizer the parser instance + /// @ if the error strategy was not able to + /// recover from the unexpected input symbol + Token recoverInline(Parser recognizer); + + /// This method is called to recover from exception [e]. This method is + /// called after {@link #reportError} by the default exception handler + /// generated for a rule method. + /// + /// @see #reportError + /// + /// @param recognizer the parser instance + /// @param e the recognition exception to recover from + /// @ if the error strategy could not recover from + /// the recognition exception + void recover(Parser recognizer, RecognitionException e); + + /// This method provides the error handler with an opportunity to handle + /// syntactic or semantic errors in the input stream before they result in a + /// [RecognitionException]. + /// + ///

The generated code currently contains calls to {@link #sync} after + /// entering the decision state of a closure block ({@code (...)*} or + /// {@code (...)+}).

+ /// + ///

For an implementation based on Jim Idle's "magic sync" mechanism, see + /// {@link DefaultErrorStrategy#sync}.

+ /// + /// @see DefaultErrorStrategy#sync + /// + /// @param recognizer the parser instance + /// @ if an error is detected by the error + /// strategy but cannot be automatically recovered at the current state in + /// the parsing process + void sync(Parser recognizer); + + /// Tests whether or not [recognizer] is in the process of recovering + /// from an error. In error recovery mode, {@link Parser#consume} adds + /// symbols to the parse tree by calling + /// {@link Parser#createErrorNode(ParserRuleContext, Token)} then + /// {@link ParserRuleContext#addErrorNode(ErrorNode)} instead of + /// {@link Parser#createTerminalNode(ParserRuleContext, Token)}. + /// + /// @param recognizer the parser instance + /// @return [true] if the parser is currently recovering from a parse + /// error, otherwise [false] + bool inErrorRecoveryMode(Parser recognizer); + + /// This method is called by when the parser successfully matches an input + /// symbol. + /// + /// @param recognizer the parser instance + void reportMatch(Parser recognizer); + + /// Report any kind of [RecognitionException]. This method is called by + /// the default exception handler generated for a rule method. + /// + /// @param recognizer the parser instance + /// @param e the recognition exception to report + void reportError(Parser recognizer, RecognitionException e); +} + +/// This is the default implementation of [ANTLRErrorStrategy] used for +/// error reporting and recovery in ANTLR parsers. +class DefaultErrorStrategy implements ErrorStrategy { + /// Indicates whether the error strategy is currently "recovering from an + /// error". This is used to suppress reporting multiple error messages while + /// attempting to recover from a detected syntax error. + /// + /// @see #inErrorRecoveryMode + bool errorRecoveryMode = false; + + /// The index into the input stream where the last error occurred. + /// This is used to prevent infinite loops where an error is found + /// but no token is consumed during recovery...another error is found, + /// ad nauseum. This is a failsafe mechanism to guarantee that at least + /// one token/tree node is consumed for two errors. + int lastErrorIndex = -1; + + IntervalSet lastErrorStates; + + /// This field is used to propagate information about the lookahead following + /// the previous match. Since prediction prefers completing the current rule + /// to error recovery efforts, error reporting may occur later than the + /// original point where it was discoverable. The original context is used to + /// compute the true expected sets as though the reporting occurred as early + /// as possible. + ParserRuleContext nextTokensContext; + + /// @see #nextTokensContext + int nextTokensState; + + /// {@inheritDoc} + /// + ///

The default implementation simply calls {@link #endErrorCondition} to + /// ensure that the handler is not in error recovery mode.

+ + @override + void reset(Parser recognizer) { + endErrorCondition(recognizer); + } + + /// This method is called to enter error recovery mode when a recognition + /// exception is reported. + /// + /// @param recognizer the parser instance + void beginErrorCondition(Parser recognizer) { + errorRecoveryMode = true; + } + + /// {@inheritDoc} + + @override + bool inErrorRecoveryMode(Parser recognizer) { + return errorRecoveryMode; + } + + /// This method is called to leave error recovery mode after recovering from + /// a recognition exception. + /// + /// @param recognizer + void endErrorCondition(Parser recognizer) { + errorRecoveryMode = false; + lastErrorStates = null; + lastErrorIndex = -1; + } + + /// {@inheritDoc} + /// + ///

The default implementation simply calls {@link #endErrorCondition}.

+ + @override + void reportMatch(Parser recognizer) { + endErrorCondition(recognizer); + } + + /// {@inheritDoc} + /// + ///

The default implementation returns immediately if the handler is already + /// in error recovery mode. Otherwise, it calls {@link #beginErrorCondition} + /// and dispatches the reporting task based on the runtime type of [e] + /// according to the following table.

+ /// + ///
    + ///
  • [NoViableAltException]: Dispatches the call to + /// {@link #reportNoViableAlternative}
  • + ///
  • [InputMismatchException]: Dispatches the call to + /// {@link #reportInputMismatch}
  • + ///
  • [FailedPredicateException]: Dispatches the call to + /// {@link #reportFailedPredicate}
  • + ///
  • All other types: calls {@link Parser#notifyErrorListeners} to report + /// the exception
  • + ///
+ + @override + void reportError(Parser recognizer, RecognitionException e) { + // if we've already reported an error and have not matched a token + // yet successfully, don't report any errors. + if (inErrorRecoveryMode(recognizer)) { +// System.err.print("[SPURIOUS] "); + return; // don't report spurious errors + } + beginErrorCondition(recognizer); + if (e is NoViableAltException) { + reportNoViableAlternative(recognizer, e); + } else if (e is InputMismatchException) { + reportInputMismatch(recognizer, e); + } else if (e is FailedPredicateException) { + reportFailedPredicate(recognizer, e); + } else { + log('unknown recognition error type: ${e.runtimeType}', + level: Level.SEVERE.value); + recognizer.notifyErrorListeners(e.message, e.offendingToken, e); + } + } + + /// {@inheritDoc} + /// + ///

The default implementation resynchronizes the parser by consuming tokens + /// until we find one in the resynchronization set--loosely the set of tokens + /// that can follow the current rule.

+ + @override + void recover(Parser recognizer, RecognitionException e) { +// System.out.println("recover in "+recognizer.getRuleInvocationStack()+ +// " index="+recognizer.inputStream.index()+ +// ", lastErrorIndex="+ +// lastErrorIndex+ +// ", states="+lastErrorStates); + if (lastErrorIndex == recognizer.inputStream.index && + lastErrorStates != null && + lastErrorStates.contains(recognizer.state)) { + // uh oh, another error at same token index and previously-visited + // state in ATN; must be a case where LT(1) is in the recovery + // token set so nothing got consumed. Consume a single token + // at least to prevent an infinite loop; this is a failsafe. +// log("seen error condition before index=, level: Level.SEVERE.value"+ +// lastErrorIndex+", states="+lastErrorStates); +// log("FAILSAFE consumes "+recognizer.getTokenNames()[recognizer.inputStream.LA(1)], level: Level.SEVERE.value); + recognizer.consume(); + } + lastErrorIndex = recognizer.inputStream.index; + lastErrorStates ??= IntervalSet(); lastErrorStates.addOne(recognizer.state); + final followSet = getErrorRecoverySet(recognizer); + consumeUntil(recognizer, followSet); + } + + /// The default implementation of {@link ANTLRErrorStrategy#sync} makes sure + /// that the current lookahead symbol is consistent with what were expecting + /// at this point in the ATN. You can call this anytime but ANTLR only + /// generates code to check before subrules/loops and each iteration. + /// + ///

Implements Jim Idle's magic sync mechanism in closures and optional + /// subrules. E.g.,

+ /// + ///
+  /// a : sync ( stuff sync )* ;
+  /// sync : {consume to what can follow sync} ;
+  /// 
+ /// + /// At the start of a sub rule upon error, {@link #sync} performs single + /// token deletion, if possible. If it can't do that, it bails on the current + /// rule and uses the default error recovery, which consumes until the + /// resynchronization set of the current rule. + /// + ///

If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block + /// with an empty alternative), then the expected set includes what follows + /// the subrule.

+ /// + ///

During loop iteration, it consumes until it sees a token that can start a + /// sub rule or what follows loop. Yes, that is pretty aggressive. We opt to + /// stay in the loop as long as possible.

+ /// + ///

ORIGINS

+ /// + ///

Previous versions of ANTLR did a poor job of their recovery within loops. + /// A single mismatch token or missing token would force the parser to bail + /// out of the entire rules surrounding the loop. So, for rule

+ /// + ///
+  /// classDef : 'class' ID '{' member* '}'
+  /// 
+ /// + /// input with an extra token between members would force the parser to + /// consume until it found the next class definition rather than the next + /// member definition of the current class. + /// + ///

This functionality cost a little bit of effort because the parser has to + /// compare token set at the start of the loop and at each iteration. If for + /// some reason speed is suffering for you, you can turn off this + /// functionality by simply overriding this method as a blank { }.

+ + @override + void sync(Parser recognizer) { + final s = recognizer.interpreter.atn.states[recognizer.state]; +// log("sync @ "+s.stateNumber+"="+s.getClass().getSimpleName(), level: Level.SEVERE.value); + // If already recovering, don't try to sync + if (inErrorRecoveryMode(recognizer)) { + return; + } + + final tokens = recognizer.inputStream; + final la = tokens.LA(1); + + // try cheaper subset first; might get lucky. seems to shave a wee bit off + final nextTokens = recognizer.getATN().nextTokens(s); + if (nextTokens.contains(la)) { + // We are sure the token matches + nextTokensContext = null; + nextTokensState = ATNState.INVALID_STATE_NUMBER; + return; + } + + if (nextTokens.contains(Token.EPSILON)) { + if (nextTokensContext == null) { + // It's possible the next token won't match; information tracked + // by sync is restricted for performance. + nextTokensContext = recognizer.context; + nextTokensState = recognizer.state; + } + return; + } + + switch (s.stateType) { + case StateType.BLOCK_START: + case StateType.STAR_BLOCK_START: + case StateType.PLUS_BLOCK_START: + case StateType.STAR_LOOP_ENTRY: + // report error and recover if possible + if (singleTokenDeletion(recognizer) != null) { + return; + } + + throw InputMismatchException(recognizer); + + case StateType.PLUS_LOOP_BACK: + case StateType.STAR_LOOP_BACK: +// log("at loop back: "+s.getClass().getSimpleName(), level: Level.SEVERE.value); + reportUnwantedToken(recognizer); + final expecting = recognizer.expectedTokens; + final whatFollowsLoopIterationOrRule = + expecting | getErrorRecoverySet(recognizer); + consumeUntil(recognizer, whatFollowsLoopIterationOrRule); + break; + + default: + // do nothing if we can't identify the exact kind of ATN state + break; + } + } + + /// This is called by {@link #reportError} when the exception is a + /// [NoViableAltException]. + /// + /// @see #reportError + /// + /// @param recognizer the parser instance + /// @param e the recognition exception + void reportNoViableAlternative(Parser recognizer, NoViableAltException e) { + final tokens = recognizer.inputStream; + String input; + if (tokens != null) { + if (e.startToken.type == Token.EOF) { + input = ''; + } else { + input = tokens.getTextRange(e.startToken, e.offendingToken); + } + } else { + input = ''; + } + final msg = 'no viable alternative at input ' + escapeWSAndQuote(input); + recognizer.notifyErrorListeners(msg, e.offendingToken, e); + } + + /// This is called by {@link #reportError} when the exception is an + /// [InputMismatchException]. + /// + /// @see #reportError + /// + /// @param recognizer the parser instance + /// @param e the recognition exception + void reportInputMismatch(Parser recognizer, InputMismatchException e) { + final msg = 'mismatched input ' + + getTokenErrorDisplay(e.offendingToken) + + ' expecting ' + + e.expectedTokens.toString(vocabulary: recognizer.vocabulary); + recognizer.notifyErrorListeners(msg, e.offendingToken, e); + } + + /// This is called by {@link #reportError} when the exception is a + /// [FailedPredicateException]. + /// + /// @see #reportError + /// + /// @param recognizer the parser instance + /// @param e the recognition exception + void reportFailedPredicate(Parser recognizer, FailedPredicateException e) { + final ruleName = + recognizer.ruleNames[recognizer.context.ruleIndex]; + final msg = 'rule ' + ruleName + ' ' + e.message; + recognizer.notifyErrorListeners(msg, e.offendingToken, e); + } + + /// This method is called to report a syntax error which requires the removal + /// of a token from the input stream. At the time this method is called, the + /// erroneous symbol is current {@code LT(1)} symbol and has not yet been + /// removed from the input stream. When this method returns, + /// [recognizer] is in error recovery mode. + /// + ///

This method is called when {@link #singleTokenDeletion} identifies + /// single-token deletion as a viable recovery strategy for a mismatched + /// input error.

+ /// + ///

The default implementation simply returns if the handler is already in + /// error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to + /// enter error recovery mode, followed by calling + /// {@link Parser#notifyErrorListeners}.

+ /// + /// @param recognizer the parser instance + void reportUnwantedToken(Parser recognizer) { + if (inErrorRecoveryMode(recognizer)) { + return; + } + + beginErrorCondition(recognizer); + + final t = recognizer.currentToken; + final tokenName = getTokenErrorDisplay(t); + final expecting = getExpectedTokens(recognizer); + final msg = 'extraneous input ' + + tokenName + + ' expecting ' + + expecting.toString(vocabulary: recognizer.vocabulary); + recognizer.notifyErrorListeners(msg, t, null); + } + + /// This method is called to report a syntax error which requires the + /// insertion of a missing token into the input stream. At the time this + /// method is called, the missing token has not yet been inserted. When this + /// method returns, [recognizer] is in error recovery mode. + /// + ///

This method is called when {@link #singleTokenInsertion} identifies + /// single-token insertion as a viable recovery strategy for a mismatched + /// input error.

+ /// + ///

The default implementation simply returns if the handler is already in + /// error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to + /// enter error recovery mode, followed by calling + /// {@link Parser#notifyErrorListeners}.

+ /// + /// @param recognizer the parser instance + void reportMissingToken(Parser recognizer) { + if (inErrorRecoveryMode(recognizer)) { + return; + } + + beginErrorCondition(recognizer); + + final t = recognizer.currentToken; + final expecting = getExpectedTokens(recognizer); + final msg = 'missing ' + + expecting.toString(vocabulary: recognizer.vocabulary) + + ' at ' + + getTokenErrorDisplay(t); + + recognizer.notifyErrorListeners(msg, t, null); + } + + /// {@inheritDoc} + /// + ///

The default implementation attempts to recover from the mismatched input + /// by using single token insertion and deletion as described below. If the + /// recovery attempt fails, this method throws an + /// [InputMismatchException].

+ /// + ///

EXTRA TOKEN (single token deletion)

+ /// + ///

{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the + /// right token, however, then assume {@code LA(1)} is some extra spurious + /// token and delete it. Then consume and return the next token (which was + /// the {@code LA(2)} token) as the successful result of the match operation.

+ /// + ///

This recovery strategy is implemented by {@link #singleTokenDeletion}.

+ /// + ///

MISSING TOKEN (single token insertion)

+ /// + ///

If current token (at {@code LA(1)}) is consistent with what could come + /// after the expected {@code LA(1)} token, then assume the token is missing + /// and use the parser's [TokenFactory] to create it on the fly. The + /// "insertion" is performed by returning the created token as the successful + /// result of the match operation.

+ /// + ///

This recovery strategy is implemented by {@link #singleTokenInsertion}.

+ /// + ///

EXAMPLE

+ /// + ///

For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When + /// the parser returns from the nested call to [expr], it will have + /// call chain:

+ /// + ///
+  /// stat → expr → atom
+  /// 
+ /// + /// and it will be trying to match the {@code ')'} at this point in the + /// derivation: + /// + ///
+  /// => ID '=' '(' INT ')' ('+' atom)* ';'
+  ///                    ^
+  /// 
+ /// + /// The attempt to match {@code ')'} will fail when it sees {@code ';'} and + /// call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'} + /// is in the set of tokens that can follow the {@code ')'} token reference + /// in rule [atom]. It can assume that you forgot the {@code ')'}. + + @override + Token recoverInline(Parser recognizer) { +// SINGLE TOKEN DELETION + final matchedSymbol = singleTokenDeletion(recognizer); + if (matchedSymbol != null) { +// we have deleted the extra token. +// now, move past ttype token as if all were ok + recognizer.consume(); + return matchedSymbol; + } + +// SINGLE TOKEN INSERTION + if (singleTokenInsertion(recognizer)) { + return getMissingSymbol(recognizer); + } + +// even that didn't work; must throw the exception + InputMismatchException e; + if (nextTokensContext == null) { + e = InputMismatchException(recognizer); + } else { + e = InputMismatchException( + recognizer, nextTokensState, nextTokensContext); + } + + throw e; + } + + /// This method implements the single-token insertion inline error recovery + /// strategy. It is called by {@link #recoverInline} if the single-token + /// deletion strategy fails to recover from the mismatched input. If this + /// method returns [true], [recognizer] will be in error recovery + /// mode. + /// + ///

This method determines whether or not single-token insertion is viable by + /// checking if the {@code LA(1)} input symbol could be successfully matched + /// if it were instead the {@code LA(2)} symbol. If this method returns + /// [true], the caller is responsible for creating and inserting a + /// token with the correct type to produce this behavior.

+ /// + /// @param recognizer the parser instance + /// @return [true] if single-token insertion is a viable recovery + /// strategy for the current mismatched input, otherwise [false] + bool singleTokenInsertion(Parser recognizer) { + final currentSymbolType = recognizer.inputStream.LA(1); + // if current token is consistent with what could come after current + // ATN state, then we know we're missing a token; error recovery + // is free to conjure up and insert the missing token + final currentState = + recognizer.interpreter.atn.states[recognizer.state]; + final next = currentState.transition(0).target; + final atn = recognizer.interpreter.atn; + final expectingAtLL2 = atn.nextTokens(next, recognizer.context); +// System.out.println("LT(2) set="+expectingAtLL2.toString(recognizer.getTokenNames())); + if (expectingAtLL2.contains(currentSymbolType)) { + reportMissingToken(recognizer); + return true; + } + return false; + } + + /// This method implements the single-token deletion inline error recovery + /// strategy. It is called by {@link #recoverInline} to attempt to recover + /// from mismatched input. If this method returns null, the parser and error + /// handler state will not have changed. If this method returns non-null, + /// [recognizer] will not be in error recovery mode since the + /// returned token was a successful match. + /// + ///

If the single-token deletion is successful, this method calls + /// {@link #reportUnwantedToken} to report the error, followed by + /// {@link Parser#consume} to actually "delete" the extraneous token. Then, + /// before returning {@link #reportMatch} is called to signal a successful + /// match.

+ /// + /// @param recognizer the parser instance + /// @return the successfully matched [Token] instance if single-token + /// deletion successfully recovers from the mismatched input, otherwise + /// null + Token singleTokenDeletion(Parser recognizer) { + final nextTokenType = recognizer.inputStream.LA(2); + final expecting = getExpectedTokens(recognizer); + if (expecting.contains(nextTokenType)) { + reportUnwantedToken(recognizer); + /* + log("recoverFromMismatchedToken deleting , level: Level.SEVERE.value"+ + ((TokenStream)recognizer.inputStream).LT(1)+ + " since "+((TokenStream)recognizer.inputStream).LT(2)+ + " is what we want"); + */ + recognizer.consume(); // simply delete extra token + // we want to return the token we're actually matching + final matchedSymbol = recognizer.currentToken; + reportMatch(recognizer); // we know current token is correct + return matchedSymbol; + } + return null; + } + + /// Conjure up a missing token during error recovery. + /// + /// The recognizer attempts to recover from single missing + /// symbols. But, actions might refer to that missing symbol. + /// For example, x=ID {f($x);}. The action clearly assumes + /// that there has been an identifier matched previously and that + /// $x points at that token. If that token is missing, but + /// the next token in the stream is what we want we assume that + /// this token is missing and we keep going. Because we + /// have to return some token to replace the missing token, + /// we have to conjure one up. This method gives the user control + /// over the tokens returned for missing tokens. Mostly, + /// you will want to create something special for identifier + /// tokens. For literals such as '{' and ',', the default + /// action in the parser or tree parser works. It simply creates + /// a CommonToken of the appropriate type. The text will be the token. + /// If you change what tokens must be created by the lexer, + /// override this method to create the appropriate tokens. + Token getMissingSymbol(Parser recognizer) { + final currentSymbol = recognizer.currentToken; + final expecting = getExpectedTokens(recognizer); + var expectedTokenType = Token.INVALID_TYPE; + if (!expecting.isNil) { + expectedTokenType = expecting.minElement; // get any element + } + String tokenText; + if (expectedTokenType == Token.EOF) { + tokenText = ''; + } else { + tokenText = ''; + } + var current = currentSymbol; + final lookback = recognizer.inputStream.LT(-1); + if (current.type == Token.EOF && lookback != null) { + current = lookback; + } + return recognizer.tokenFactory.create( + expectedTokenType, + tokenText, + Pair(current.tokenSource, current.tokenSource.inputStream), + Token.DEFAULT_CHANNEL, + -1, + -1, + current.line, + current.charPositionInLine); + } + + IntervalSet getExpectedTokens(Parser recognizer) { + return recognizer.expectedTokens; + } + + /// How should a token be displayed in an error message? The default + /// is to display just the text, but during development you might + /// want to have a lot of information spit out. Override in that case + /// to use t.toString() (which, for CommonToken, dumps everything about + /// the token). This is better than forcing you to override a method in + /// your token objects because you don't have to go modify your lexer + /// so that it creates a new Java type. + String getTokenErrorDisplay(Token t) { + if (t == null) return ''; + var s = getSymbolText(t); + if (s == null) { + if (getSymbolType(t) == Token.EOF) { + s = ''; + } else { + s = '<${getSymbolType(t)}>'; + } + } + return escapeWSAndQuote(s); + } + + String getSymbolText(Token symbol) { + return symbol.text; + } + + int getSymbolType(Token symbol) { + return symbol.type; + } + + String escapeWSAndQuote(String s) { +// if ( s==null ) return s; + s = s.replaceAll('\n', r'\n'); + s = s.replaceAll('\r', r'\r'); + s = s.replaceAll('\t', r'\t'); + return "'" + s + "'"; + } + +/* Compute the error recovery set for the current rule. During + * rule invocation, the parser pushes the set of tokens that can + * follow that rule reference on the stack; this amounts to + * computing FIRST of what follows the rule reference in the + * enclosing rule. See LinearApproximator.FIRST(). + * This local follow set only includes tokens + * from within the rule; i.e., the FIRST computation done by + * ANTLR stops at the end of a rule. + * + * EXAMPLE + * + * When you find a "no viable alt exception", the input is not + * consistent with any of the alternatives for rule r. The best + * thing to do is to consume tokens until you see something that + * can legally follow a call to r *or* any rule that called r. + * You don't want the exact set of viable next tokens because the + * input might just be missing a token--you might consume the + * rest of the input looking for one of the missing tokens. + * + * Consider grammar: + * + * a : '[' b ']' + * | '(' b ')' + * ; + * b : c '^' INT ; + * c : ID + * | INT + * ; + * + * At each rule invocation, the set of tokens that could follow + * that rule is pushed on a stack. Here are the various + * context-sensitive follow sets: + * + * FOLLOW(b1_in_a) = FIRST(']') = ']' + * FOLLOW(b2_in_a) = FIRST(')') = ')' + * FOLLOW(c_in_b) = FIRST('^') = '^' + * + * Upon erroneous input "[]", the call chain is + * + * a -> b -> c + * + * and, hence, the follow context stack is: + * + * depth follow set start of rule execution + * 0 a (from main()) + * 1 ']' b + * 2 '^' c + * + * Notice that ')' is not included, because b would have to have + * been called from a different context in rule a for ')' to be + * included. + * + * For error recovery, we cannot consider FOLLOW(c) + * (context-sensitive or otherwise). We need the combined set of + * all context-sensitive FOLLOW sets--the set of all tokens that + * could follow any reference in the call chain. We need to + * resync to one of those tokens. Note that FOLLOW(c)='^' and if + * we resync'd to that token, we'd consume until EOF. We need to + * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. + * In this case, for input "[]", LA(1) is ']' and in the set, so we would + * not consume anything. After printing an error, rule c would + * return normally. Rule b would not find the required '^' though. + * At this point, it gets a mismatched token error and throws an + * exception (since LA(1) is not in the viable following token + * set). The rule exception handler tries to recover, but finds + * the same recovery set and doesn't consume anything. Rule b + * exits normally returning to rule a. Now it finds the ']' (and + * with the successful match exits errorRecovery mode). + * + * So, you can see that the parser walks up the call chain looking + * for the token that was a member of the recovery set. + * + * Errors are not generated in errorRecovery mode. + * + * ANTLR's error recovery mechanism is based upon original ideas: + * + * "Algorithms + Data Structures = Programs" by Niklaus Wirth + * + * and + * + * "A note on error recovery in recursive descent parsers": + * http://portal.acm.org/citation.cfm?id=947902.947905 + * + * Later, Josef Grosch had some good ideas: + * + * "Efficient and Comfortable Error Recovery in Recursive Descent + * Parsers": + * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip + * + * Like Grosch I implement context-sensitive FOLLOW sets that are combined + * at run-time upon error to avoid overhead during parsing. + */ + IntervalSet getErrorRecoverySet(Parser recognizer) { + final atn = recognizer.interpreter.atn; + RuleContext ctx = recognizer.context; + final recoverSet = IntervalSet(); + while (ctx != null && ctx.invokingState >= 0) { + // compute what follows who invoked us + final invokingState = atn.states[ctx.invokingState]; + RuleTransition rt = invokingState.transition(0); + final follow = atn.nextTokens(rt.followState); + recoverSet.addAll(follow); + ctx = ctx.parent; + } + recoverSet.remove(Token.EPSILON); +// System.out.println("recover set "+recoverSet.toString(recognizer.getTokenNames())); + return recoverSet; + } + + /// Consume tokens until one matches the given token set. */ + void consumeUntil(Parser recognizer, IntervalSet set) { +// log("consumeUntil("+set.toString(recognizer.getTokenNames())+")", level: Level.SEVERE.value); + var ttype = recognizer.inputStream.LA(1); + while (ttype != Token.EOF && !set.contains(ttype)) { + //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]); +// recognizer.inputStream.consume(); + recognizer.consume(); + ttype = recognizer.inputStream.LA(1); + } + } +} + +/// This implementation of [ANTLRErrorStrategy] responds to syntax errors +/// by immediately canceling the parse operation with a +/// [ParseCancellationException]. The implementation ensures that the +/// {@link ParserRuleContext#exception} field is set for all parse tree nodes +/// that were not completed prior to encountering the error. +/// +///

+/// This error strategy is useful in the following scenarios.

+/// +///
    +///
  • Two-stage parsing: This error strategy allows the first +/// stage of two-stage parsing to immediately terminate if an error is +/// encountered, and immediately fall back to the second stage. In addition to +/// avoiding wasted work by attempting to recover from errors here, the empty +/// implementation of {@link BailErrorStrategy#sync} improves the performance of +/// the first stage.
  • +///
  • Silent validation: When syntax errors are not being +/// reported or logged, and the parse result is simply ignored if errors occur, +/// the [BailErrorStrategy] avoids wasting work on recovering from errors +/// when the result will be ignored either way.
  • +///
+/// +///

+/// {@code myparser.setErrorHandler(new BailErrorStrategy());}

+/// +/// @see Parser#setErrorHandler(ANTLRErrorStrategy) +class BailErrorStrategy extends DefaultErrorStrategy { + /// Instead of recovering from exception [e], re-throw it wrapped + /// in a [ParseCancellationException] so it is not caught by the + /// rule function catches. Use {@link Exception#getCause()} to get the + /// original [RecognitionException]. + + @override + void recover(Parser recognizer, RecognitionException e) { + for (var context = recognizer.context; + context != null; + context = context.parent) { + context.exception = e; + } + + throw ParseCancellationException(e.message); + } + + /// Make sure we don't attempt to recover inline; if the parser + /// successfully recovers, it won't throw an exception. + + @override + Token recoverInline(Parser recognizer) { + final e = InputMismatchException(recognizer); + for (var context = recognizer.context; + context != null; + context = context.parent) { + context.exception = e; + } + + throw ParseCancellationException(e.message); + } + + /// Make sure we don't attempt to recover from problems in subrules. */ + + @override + void sync(Parser recognizer) {} +} diff --git a/runtime/Dart/lib/src/error/src/errors.dart b/runtime/Dart/lib/src/error/src/errors.dart new file mode 100644 index 000000000..653905ec6 --- /dev/null +++ b/runtime/Dart/lib/src/error/src/errors.dart @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import '../../atn/atn.dart'; +import '../../input_stream.dart'; +import '../../interval_set.dart'; +import '../../lexer.dart'; +import '../../parser.dart'; +import '../../parser_rule_context.dart'; +import '../../recognizer.dart'; +import '../../rule_context.dart'; +import '../../token.dart'; +import '../../token_stream.dart'; +import '../../util/utils.dart'; + +/// The root of the ANTLR exception hierarchy. In general, ANTLR tracks just +/// 3 kinds of errors: prediction errors, failed predicate errors, and +/// mismatched input errors. In each case, the parser knows where it is +/// in the input, where it is in the ATN, the rule invocation stack, +/// and what kind of problem occurred. +class RecognitionException extends StateError { + /// Gets the [Recognizer] where this exception occurred. + /// + ///

If the recognizer is not available, this method returns null.

+ /// + /// @return The recognizer where this exception occurred, or null if + /// the recognizer is not available. + final Recognizer recognizer; + + /// Gets the [RuleContext] at the time this exception was thrown. + /// + ///

If the context is not available, this method returns null.

+ /// + /// @return The [RuleContext] at the time this exception was thrown. + /// If the context is not available, this method returns null. + final RuleContext ctx; + + /// Gets the input stream which is the symbol source for the recognizer where + /// this exception was thrown. + /// + ///

If the input stream is not available, this method returns null.

+ /// + /// @return The input stream which is the symbol source for the recognizer + /// where this exception was thrown, or null if the stream is not + /// available. + final IntStream inputStream; + + /// The current [Token] when an error occurred. Since not all streams + /// support accessing symbols by index, we have to track the [Token] + /// instance itself. + Token offendingToken; + + /// Get the ATN state number the parser was in at the time the error + /// occurred. For [NoViableAltException] and + /// [LexerNoViableAltException] exceptions, this is the + /// [DecisionState] number. For others, it is the state whose outgoing + /// edge we couldn't match. + /// + ///

If the state number is not known, this method returns -1.

+ int offendingState = -1; + + RecognitionException(this.recognizer, this.inputStream, this.ctx, + [String message = '']) + : super(message) { + if (recognizer != null) offendingState = recognizer.state; + } + + /// Gets the set of input symbols which could potentially follow the + /// previously matched symbol at the time this exception was thrown. + /// + ///

If the set of expected tokens is not known and could not be computed, + /// this method returns null.

+ /// + /// @return The set of token types that could potentially follow the current + /// state in the ATN, or null if the information is not available. + IntervalSet get expectedTokens { + if (recognizer != null) { + return recognizer.getATN().getExpectedTokens(offendingState, ctx); + } + return null; + } +} + +class LexerNoViableAltException extends RecognitionException { + /// Matching attempted at what input index? */ + final int startIndex; + + /// Which configurations did we try at input.index() that couldn't match input.LA(1)? */ + final ATNConfigSet deadEndConfigs; + + LexerNoViableAltException( + Lexer lexer, CharStream input, this.startIndex, this.deadEndConfigs) + : super(lexer, input, null); + + @override + CharStream get inputStream { + return super.inputStream; + } + + @override + String toString() { + var symbol = ''; + if (startIndex >= 0 && startIndex < inputStream.size) { + symbol = inputStream.getText(Interval.of(startIndex, startIndex)); + symbol = escapeWhitespace(symbol); + } + + return "${LexerNoViableAltException}('${symbol}')"; + } +} + +/// Indicates that the parser could not decide which of two or more paths +/// to take based upon the remaining input. It tracks the starting token +/// of the offending input and also knows where the parser was +/// in the various paths when the error. Reported by reportNoViableAlternative() +class NoViableAltException extends RecognitionException { + /// Which configurations did we try at input.index() that couldn't match input.LT(1)? */ + + final ATNConfigSet deadEndConfigs; + + /// The token object at the start index; the input stream might + /// not be buffering tokens so get a reference to it. (At the + /// time the error occurred, of course the stream needs to keep a + /// buffer all of the tokens but later we might not have access to those.) + + final Token startToken; + +// NoViableAltException(Parser recognizer) { // LL(1) error +// this(recognizer, +// recognizer.inputStream, +// recognizer.getCurrentToken(), +// recognizer.getCurrentToken(), +// null, +// recognizer._ctx); +// } + + NoViableAltException._(Parser recognizer, TokenStream input, this.startToken, + Token offendingToken, this.deadEndConfigs, ParserRuleContext ctx) + : super(recognizer, input, ctx) { + this.offendingToken = offendingToken; + } + + NoViableAltException(Parser recognizer, + [TokenStream input, + Token startToken, + Token offendingToken, + ATNConfigSet deadEndConfigs, + ParserRuleContext ctx]) + : this._( + recognizer, + input ?? recognizer.inputStream, + startToken ?? recognizer.currentToken, + offendingToken ?? recognizer.currentToken, + deadEndConfigs, + ctx ?? recognizer.context); +} + +/// This signifies any kind of mismatched input exceptions such as +/// when the current input does not match the expected token. +class InputMismatchException extends RecognitionException { + InputMismatchException(Parser recognizer, + [int state = -1, ParserRuleContext ctx]) + : super(recognizer, recognizer.inputStream, ctx ?? recognizer.context) { + if (state != -1 && ctx != null) { + offendingState = state; + } + offendingToken = recognizer.currentToken; + } +} + +/// A semantic predicate failed during validation. Validation of predicates +/// occurs when normally parsing the alternative just like matching a token. +/// Disambiguating predicate evaluation occurs when we test a predicate during +/// prediction. +class FailedPredicateException extends RecognitionException { + int ruleIndex; + int predIndex; + final String predicate; + + FailedPredicateException(Parser recognizer, + [this.predicate, String message]) + : super(recognizer, recognizer.inputStream, recognizer.context, + formatMessage(predicate, message)) { + final s = recognizer.interpreter.atn.states[recognizer.state]; + + AbstractPredicateTransition trans = s.transition(0); + if (trans is PredicateTransition) { + ruleIndex = trans.ruleIndex; + predIndex = trans.predIndex; + } + offendingToken = recognizer.currentToken; + } + + static String formatMessage(String predicate, String message) { + if (message != null) { + return message; + } + + return 'failed predicate: {$predicate}?'; + } +} diff --git a/runtime/Dart/lib/src/input_stream.dart b/runtime/Dart/lib/src/input_stream.dart new file mode 100644 index 000000000..63dbd6089 --- /dev/null +++ b/runtime/Dart/lib/src/input_stream.dart @@ -0,0 +1,335 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:async'; +import 'dart:convert'; +import 'dart:io'; +import 'dart:math'; + +import 'interval_set.dart'; +import 'token.dart'; + +/// A simple stream of symbols whose values are represented as integers. This +/// interface provides marked ranges with support for a minimum level +/// of buffering necessary to implement arbitrary lookahead during prediction. +/// For more information on marked ranges, see {@link #mark}. +/// +///

Initializing Methods: Some methods in this interface have +/// unspecified behavior if no call to an initializing method has occurred after +/// the stream was constructed. The following is a list of initializing methods:

+/// +///
    +///
  • {@link #LA}
  • +///
  • {@link #consume}
  • +///
  • {@link #size}
  • +///
+abstract class IntStream { + /// The value returned by {@link #LA LA()} when the end of the stream is + /// reached. + static const int EOF = -1; + + /// The value returned by {@link #getSourceName} when the actual name of the + /// underlying source is not known. + static const UNKNOWN_SOURCE_NAME = ''; + + /// Consumes the current symbol in the stream. This method has the following + /// effects: + /// + ///
    + ///
  • Forward movement: The value of {@link #index index()} + /// before calling this method is less than the value of {@code index()} + /// after calling this method.
  • + ///
  • Ordered lookahead: The value of {@code LA(1)} before + /// calling this method becomes the value of {@code LA(-1)} after calling + /// this method.
  • + ///
+ /// + /// Note that calling this method does not guarantee that {@code index()} is + /// incremented by exactly 1, as that would preclude the ability to implement + /// filtering streams (e.g. [CommonTokenStream] which distinguishes + /// between "on-channel" and "off-channel" tokens). + /// + /// @throws IllegalStateException if an attempt is made to consume the + /// end of the stream (i.e. if {@code LA(1)==}{@link #EOF EOF} before calling + /// [consume]). + void consume(); + + /// Gets the value of the symbol at offset [i] from the current + /// position. When {@code i==1}, this method returns the value of the current + /// symbol in the stream (which is the next symbol to be consumed). When + /// {@code i==-1}, this method returns the value of the previously read + /// symbol in the stream. It is not valid to call this method with + /// {@code i==0}, but the specific behavior is unspecified because this + /// method is frequently called from performance-critical code. + /// + ///

This method is guaranteed to succeed if any of the following are true:

+ /// + ///
    + ///
  • {@code i>0}
  • + ///
  • {@code i==-1} and {@link #index index()} returns a value greater + /// than the value of {@code index()} after the stream was constructed + /// and {@code LA(1)} was called in that order. Specifying the current + /// {@code index()} relative to the index after the stream was created + /// allows for filtering implementations that do not return every symbol + /// from the underlying source. Specifying the call to {@code LA(1)} + /// allows for lazily initialized streams.
  • + ///
  • {@code LA(i)} refers to a symbol consumed within a marked region + /// that has not yet been released.
  • + ///
+ /// + ///

If [i] represents a position at or beyond the end of the stream, + /// this method returns {@link #EOF}.

+ /// + ///

The return value is unspecified if {@code i<0} and fewer than {@code -i} + /// calls to {@link #consume consume()} have occurred from the beginning of + /// the stream before calling this method.

+ /// + /// @throws UnsupportedOperationException if the stream does not support + /// retrieving the value of the specified symbol + int LA(int i); + + /// A mark provides a guarantee that {@link #seek seek()} operations will be + /// valid over a "marked range" extending from the index where {@code mark()} + /// was called to the current {@link #index index()}. This allows the use of + /// streaming input sources by specifying the minimum buffering requirements + /// to support arbitrary lookahead during prediction. + /// + ///

The returned mark is an opaque handle (type [int]) which is passed + /// to {@link #release release()} when the guarantees provided by the marked + /// range are no longer necessary. When calls to + /// {@code mark()}/{@code release()} are nested, the marks must be released + /// in reverse order of which they were obtained. Since marked regions are + /// used during performance-critical sections of prediction, the specific + /// behavior of invalid usage is unspecified (i.e. a mark is not released, or + /// a mark is released twice, or marks are not released in reverse order from + /// which they were created).

+ /// + ///

The behavior of this method is unspecified if no call to an + /// {@link IntStream initializing method} has occurred after this stream was + /// constructed.

+ /// + ///

This method does not change the current position in the input stream.

+ /// + ///

The following example shows the use of {@link #mark mark()}, + /// {@link #release release(mark)}, {@link #index index()}, and + /// {@link #seek seek(index)} as part of an operation to safely work within a + /// marked region, then restore the stream position to its original value and + /// release the mark.

+ ///
+  /// IntStream stream = ...;
+  /// int index = -1;
+  /// int mark = stream.mark();
+  /// try {
+  ///   index = stream.index();
+  ///   // perform work here...
+  /// } finally {
+  ///   if (index != -1) {
+  ///     stream.seek(index);
+  ///   }
+  ///   stream.release(mark);
+  /// }
+  /// 
+ /// + /// @return An opaque marker which should be passed to + /// {@link #release release()} when the marked range is no longer required. + int mark(); + + /// This method releases a marked range created by a call to + /// {@link #mark mark()}. Calls to {@code release()} must appear in the + /// reverse order of the corresponding calls to {@code mark()}. If a mark is + /// released twice, or if marks are not released in reverse order of the + /// corresponding calls to {@code mark()}, the behavior is unspecified. + /// + ///

For more information and an example, see {@link #mark}.

+ /// + /// @param marker A marker returned by a call to {@code mark()}. + /// @see #mark + void release(int marker); + + /// Return the index into the stream of the input symbol referred to by + /// {@code LA(1)}. + /// + ///

The behavior of this method is unspecified if no call to an + /// {@link IntStream initializing method} has occurred after this stream was + /// constructed.

+ int get index; + + /// Set the input cursor to the position indicated by [index]. If the + /// specified index lies past the end of the stream, the operation behaves as + /// though [index] was the index of the EOF symbol. After this method + /// returns without throwing an exception, then at least one of the following + /// will be true. + /// + ///
    + ///
  • {@link #index index()} will return the index of the first symbol + /// appearing at or after the specified [index]. Specifically, + /// implementations which filter their sources should automatically + /// adjust [index] forward the minimum amount required for the + /// operation to target a non-ignored symbol.
  • + ///
  • {@code LA(1)} returns {@link #EOF}
  • + ///
+ /// + /// This operation is guaranteed to not throw an exception if [index] + /// lies within a marked region. For more information on marked regions, see + /// {@link #mark}. The behavior of this method is unspecified if no call to + /// an {@link IntStream initializing method} has occurred after this stream + /// was constructed. + /// + /// @param index The absolute index to seek to. + /// + /// @throws IllegalArgumentException if [index] is less than 0 + /// @throws UnsupportedOperationException if the stream does not support + /// seeking to the specified index + void seek(int index); + + /// Returns the total number of symbols in the stream, including a single EOF + /// symbol. + /// + /// @throws UnsupportedOperationException if the size of the stream is + /// unknown. + int get size; + + /// Gets the name of the underlying symbol source. This method returns a + /// non-null, non-empty string. If such a name is not known, this method + /// returns {@link #UNKNOWN_SOURCE_NAME}. + + String get sourceName; +} + +/// A source of characters for an ANTLR lexer. */ +abstract class CharStream extends IntStream { + /// This method returns the text for a range of characters within this input + /// stream. This method is guaranteed to not throw an exception if the + /// specified [interval] lies entirely within a marked range. For more + /// information about marked ranges, see {@link IntStream#mark}. + /// + /// @param interval an interval within the stream + /// @return the text of the specified interval + /// + /// @throws NullPointerException if [interval] is null + /// @throws IllegalArgumentException if {@code interval.a < 0}, or if + /// {@code interval.b < interval.a - 1}, or if {@code interval.b} lies at or + /// past the end of the stream + /// @throws UnsupportedOperationException if the stream does not support + /// getting the text of the specified interval + String getText(Interval interval); +} + +// Vacuum all input from a string and then treat it like a buffer. +class InputStream extends CharStream { + final name = ''; + List data; + int _index = 0; + bool decodeToUnicodeCodePoints = false; + + InputStream(List data) { + this.data = data; + } + + InputStream.fromString(String data) { + this.data = data.runes.toList(growable: false); + } + + static Future fromStringStream(Stream stream) async { + final data = StringBuffer(); + await stream.listen((buf) { + data.write(buf); + }).asFuture(); + return InputStream.fromString(data.toString()); + } + + static Future fromStream(Stream> stream, + {Encoding encoding = utf8}) { + final data = stream.transform(encoding.decoder); + return fromStringStream(data); + } + + static Future fromPath(String path, {Encoding encoding = utf8}) { + return fromStream(File(path).openRead()); + } + + @override + int get index { + return _index; + } + + @override + int get size { + return data.length; + } + + /// Reset the stream so that it's in the same state it was + /// when the object was created *except* the data array is not + /// touched. + void reset() { + _index = 0; + } + + @override + void consume() { + if (_index >= size) { + // assert this.LA(1) == Token.EOF + throw ('cannot consume EOF'); + } + _index += 1; + } + + @override + int LA(int offset) { + if (offset == 0) { + return 0; // undefined + } + if (offset < 0) { + offset += 1; // e.g., translate LA(-1) to use offset=0 + } + final pos = _index + offset - 1; + if (pos < 0 || pos >= size) { + // invalid + return Token.EOF; + } + return data[pos]; + } + + /// mark/release do nothing; we have entire buffer + @override + int mark() { + return -1; + } + + @override + void release(int marker) {} + + /// consume() ahead until p==_index; can't just set p=_index as we must + /// update line and column. If we seek backwards, just set p + @override + void seek(int _index) { + if (_index <= this._index) { + this._index = _index; // just jump; don't update stream state (line, + // ...) + return; + } + // seek forward + this._index = min(_index, size); + } + + @override + String getText(Interval interval) { + final startIdx = min(interval.a, size); + final len = min(interval.b - interval.a + 1, size - startIdx); + return String.fromCharCodes(data, startIdx, startIdx + len); + } + + @override + String toString() { + return String.fromCharCodes(data); + } + + @override + String get sourceName { + // TODO: implement getSourceName + return IntStream.UNKNOWN_SOURCE_NAME; + } +} diff --git a/runtime/Dart/lib/src/interval_set.dart b/runtime/Dart/lib/src/interval_set.dart new file mode 100644 index 000000000..09cdba533 --- /dev/null +++ b/runtime/Dart/lib/src/interval_set.dart @@ -0,0 +1,735 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:math'; + +import 'package:collection/collection.dart'; + +import 'lexer.dart'; +import 'token.dart'; +import 'util/murmur_hash.dart'; +import 'vocabulary.dart'; + +/// An immutable inclusive interval a..b */ +class Interval { + static final int INTERVAL_POOL_MAX_VALUE = 1000; + + static final Interval INVALID = Interval(-1, -2); + + static List cache = List(INTERVAL_POOL_MAX_VALUE + 1); + + int a; + int b; + + static int creates = 0; + static int misses = 0; + static int hits = 0; + static int outOfRange = 0; + + Interval(this.a, this.b); + + /// Interval objects are used readonly so share all with the + /// same single value a==b up to some max size. Use an array as a perfect hash. + /// Return shared object for 0..INTERVAL_POOL_MAX_VALUE or a new + /// Interval object with a..a in it. On Java.g4, 218623 IntervalSets + /// have a..a (set with 1 element). + static Interval of(int a, int b) { + // cache just a..a + if (a != b || a < 0 || a > INTERVAL_POOL_MAX_VALUE) { + return Interval(a, b); + } + if (cache[a] == null) { + cache[a] = Interval(a, a); + } + return cache[a]; + } + + /// return number of elements between a and b inclusively. x..x is length 1. + /// if b < a, then length is 0. 9..10 has length 2. + int get length { + if (b < a) return 0; + return b - a + 1; + } + + @override + bool operator ==(Object o) { + if (o == null || !(o is Interval)) { + return false; + } + Interval other = o; + return a == other.a && b == other.b; + } + + @override + int get hashCode { + var hash = 23; + hash = hash * 31 + a; + hash = hash * 31 + b; + return hash; + } + + /// Does this start completely before other? Disjoint */ + bool startsBeforeDisjoint(Interval other) { + return a < other.a && b < other.a; + } + + /// Does this start at or before other? Nondisjoint */ + bool startsBeforeNonDisjoint(Interval other) { + return a <= other.a && b >= other.a; + } + + /// Does this.a start after other.b? May or may not be disjoint */ + bool startsAfter(Interval other) { + return a > other.a; + } + + /// Does this start completely after other? Disjoint */ + bool startsAfterDisjoint(Interval other) { + return a > other.b; + } + + /// Does this start after other? NonDisjoint */ + bool startsAfterNonDisjoint(Interval other) { + return a > other.a && a <= other.b; // this.b>=other.b implied + } + + /// Are both ranges disjoint? I.e., no overlap? */ + bool disjoint(Interval other) { + return startsBeforeDisjoint(other) || startsAfterDisjoint(other); + } + + /// Are two intervals adjacent such as 0..41 and 42..42? */ + bool adjacent(Interval other) { + return a == other.b + 1 || b == other.a - 1; + } + + bool properlyContains(Interval other) { + return other.a >= a && other.b <= b; + } + + /// Return the interval computed from combining this and other */ + Interval union(Interval other) { + return Interval.of(min(a, other.a), max(b, other.b)); + } + + /// Return the interval in common between this and o */ + Interval intersection(Interval other) { + return Interval.of(max(a, other.a), min(b, other.b)); + } + + /// Return the interval with elements from this not in other; + /// other must not be totally enclosed (properly contained) + /// within this, which would result in two disjoint intervals + /// instead of the single one returned by this method. + Interval differenceNotProperlyContained(Interval other) { + Interval diff; + // other.a to left of this.a (or same) + if (other.startsBeforeNonDisjoint(this)) { + diff = Interval.of(max(a, other.b + 1), b); + } + + // other.a to right of this.a + else if (other.startsAfterNonDisjoint(this)) { + diff = Interval.of(a, other.a - 1); + } + return diff; + } + + @override + String toString() { + return '$a..$b'; + } +} + +/// This class implements the [IntervalSet] backed by a sorted array of +/// non-overlapping intervals. It is particularly efficient for representing +/// large collections of numbers, where the majority of elements appear as part +/// of a sequential range of numbers that are all part of the set. For example, +/// the set { 1, 2, 3, 4, 7, 8 } may be represented as { [1, 4], [7, 8] }. +/// +///

+/// This class is able to represent sets containing any combination of values in +/// the range {@link int#MIN_VALUE} to {@link int#MAX_VALUE} +/// (inclusive).

+class IntervalSet { + static final IntervalSet COMPLETE_CHAR_SET = + IntervalSet.ofRange(Lexer.MIN_CHAR_VALUE, Lexer.MAX_CHAR_VALUE) + ..setReadonly(true); + + static final IntervalSet EMPTY_SET = IntervalSet([])..setReadonly(true); + + /// The list of sorted, disjoint intervals. */ + List intervals = []; + + bool readonly = false; + + IntervalSet([List intervals]) { + this.intervals = intervals ?? []; + } + + IntervalSet.ofSet(IntervalSet set) { + addAll(set); + } + +// TODO +// IntervalSet(int... els) { +//if ( els==null ) { +//intervals = new ArrayList(2); // most sets are 1 or 2 elements +//} +//else { +//intervals = new ArrayList(els.length); +//for (int e : els) add(e); +//} +//} + + /// Create a set with a single element, el. */ + + IntervalSet.ofOne(int a) { + addOne(a); + } + + /// Create a set with all ints within range [a..b] (inclusive) */ + static IntervalSet ofRange(int a, int b) { + final s = IntervalSet(); + s.addRange(a, b); + return s; + } + + void clear() { + if (readonly) throw StateError("can't alter readonly IntervalSet"); + intervals.clear(); + } + + /// Add a single element to the set. An isolated element is stored + /// as a range el..el. + + void addOne(int el) { + if (readonly) throw StateError("can't alter readonly IntervalSet"); + addRange(el, el); + } + + /// Add interval; i.e., add all integers from a to b to set. + /// If b<a, do nothing. + /// Keep list in sorted order (by left range value). + /// If overlap, combine ranges. For example, + /// If this is {1..5, 10..20}, adding 6..7 yields + /// {1..5, 6..7, 10..20}. Adding 4..8 yields {1..8, 10..20}. + void addRange(int a, int b) { + add(Interval.of(a, b)); + } + + // copy on write so we can cache a..a intervals and sets of that + void add(Interval addition) { + if (readonly) throw StateError("can't alter readonly IntervalSet"); + //System.out.println("add "+addition+" to "+intervals.toString()); + if (addition.b < addition.a) { + return; + } + for (var i = 0; i < intervals.length; i++) { + final r = intervals[i]; + if (addition == r) { + return; + } + if (addition.adjacent(r) || !addition.disjoint(r)) { + // next to each other, make a single larger interval + final bigger = addition.union(r); + intervals[i] = bigger; + + // make sure we didn't just create an interval that + // should be merged with next interval in list + for (i++; i < intervals.length; i++) { + final next = intervals[i]; + if (!bigger.adjacent(next) && bigger.disjoint(next)) { + break; + } + + // if we bump up against or overlap next, merge + intervals.removeAt(i); // remove this one + intervals[i - 1] = + bigger.union(next); // set previous to 3 merged ones + } + return; + } + if (addition.startsBeforeDisjoint(r)) { + // insert before r + intervals.insert(i, addition); + return; + } + // if disjoint and after r, a future iteration will handle it + + } + // ok, must be after last interval (and disjoint from last interval) + // just add it + intervals.add(addition); + } + + /// combine all sets in the array returned the or'd value */ + static IntervalSet or(List sets) { + final r = IntervalSet(); + for (final s in sets) { + r.addAll(s); + } + return r; + } + + IntervalSet operator |(IntervalSet a) { + final o = IntervalSet(); + o.addAll(this); + o.addAll(a); + return o; + } + + IntervalSet addAll(IntervalSet set) { + if (set == null) { + return this; + } + + if (set is IntervalSet) { + final other = set; + // walk set and add each interval + final n = other.intervals.length; + for (var i = 0; i < n; i++) { + final I = other.intervals[i]; + addRange(I.a, I.b); + } + } else { + for (final value in set.toList()) { + addOne(value); + } + } + + return this; + } + + IntervalSet complementRange(int minElement, int maxElement) { + return complement(IntervalSet.ofRange(minElement, maxElement)); + } + + /// {@inheritDoc} */ + IntervalSet complement(IntervalSet vocabulary) { + if (vocabulary == null || vocabulary.isNil) { + return null; // nothing in common with null set + } + IntervalSet vocabularyIS; + if (vocabulary is IntervalSet) { + vocabularyIS = vocabulary; + } else { + vocabularyIS = IntervalSet(); + vocabularyIS.addAll(vocabulary); + } + + return vocabularyIS - this; + } + + IntervalSet operator -(IntervalSet a) { + if (a == null || a.isNil) { + return IntervalSet.ofSet(this); + } + + if (a is IntervalSet) { + return subtract(this, a); + } + + final other = IntervalSet(); + other.addAll(a); + return subtract(this, other); + } + + /// Compute the set difference between two interval sets. The specific + /// operation is {@code left - right}. If either of the input sets is + /// null, it is treated as though it was an empty set. + static IntervalSet subtract(IntervalSet left, IntervalSet right) { + if (left == null || left.isNil) { + return IntervalSet(); + } + + final result = IntervalSet.ofSet(left); + if (right == null || right.isNil) { + // right set has no elements; just return the copy of the current set + return result; + } + + var resultI = 0; + var rightI = 0; + while ( + resultI < result.intervals.length && rightI < right.intervals.length) { + final resultInterval = result.intervals[resultI]; + final rightInterval = right.intervals[rightI]; + +// operation: (resultInterval - rightInterval) and update indexes + + if (rightInterval.b < resultInterval.a) { + rightI++; + continue; + } + + if (rightInterval.a > resultInterval.b) { + resultI++; + continue; + } + + Interval beforeCurrent; + Interval afterCurrent; + if (rightInterval.a > resultInterval.a) { + beforeCurrent = Interval(resultInterval.a, rightInterval.a - 1); + } + + if (rightInterval.b < resultInterval.b) { + afterCurrent = Interval(rightInterval.b + 1, resultInterval.b); + } + + if (beforeCurrent != null) { + if (afterCurrent != null) { +// split the current interval into two + result.intervals[resultI] = beforeCurrent; + result.intervals.insert(resultI + 1, afterCurrent); + resultI++; + rightI++; + continue; + } else { +// replace the current interval + result.intervals[resultI] = beforeCurrent; + resultI++; + continue; + } + } else { + if (afterCurrent != null) { +// replace the current interval + result.intervals[resultI] = afterCurrent; + rightI++; + continue; + } else { +// remove the current interval (thus no need to increment resultI) + result.intervals.removeAt(resultI); + continue; + } + } + } + +// If rightI reached right.intervals.length, no more intervals to subtract from result. +// If resultI reached result.intervals.length, we would be subtracting from an empty set. +// Either way, we are done. + return result; + } + + /// {@inheritDoc} */ + IntervalSet operator +(IntervalSet other) { + if (other == null) { + //|| !(other is IntervalSet) ) { + return null; // nothing in common with null set + } + + final myIntervals = intervals; + final theirIntervals = (other).intervals; + IntervalSet intersection; + final mySize = myIntervals.length; + final theirSize = theirIntervals.length; + var i = 0; + var j = 0; +// iterate down both interval lists looking for nondisjoint intervals + while (i < mySize && j < theirSize) { + final mine = myIntervals[i]; + final theirs = theirIntervals[j]; +//System.out.println("mine="+mine+" and theirs="+theirs); + if (mine.startsBeforeDisjoint(theirs)) { +// move this iterator looking for interval that might overlap + i++; + } else if (theirs.startsBeforeDisjoint(mine)) { +// move other iterator looking for interval that might overlap + j++; + } else if (mine.properlyContains(theirs)) { +// overlap, add intersection, get next theirs + intersection ??= IntervalSet(); intersection.add(mine.intersection(theirs)); + j++; + } else if (theirs.properlyContains(mine)) { +// overlap, add intersection, get next mine + intersection ??= IntervalSet(); intersection.add(mine.intersection(theirs)); + i++; + } else if (!mine.disjoint(theirs)) { +// overlap, add intersection + intersection ??= IntervalSet(); intersection.add(mine.intersection(theirs)); +// Move the iterator of lower range [a..b], but not +// the upper range as it may contain elements that will collide +// with the next iterator. So, if mine=[0..115] and +// theirs=[115..200], then intersection is 115 and move mine +// but not theirs as theirs may collide with the next range +// in thisIter. +// move both iterators to next ranges + if (mine.startsAfterNonDisjoint(theirs)) { + j++; + } else if (theirs.startsAfterNonDisjoint(mine)) { + i++; + } + } + } + if (intersection == null) { + return IntervalSet(); + } + return intersection; + } + + /// {@inheritDoc} */ + + bool contains(int el) { + final n = intervals.length; + var l = 0; + var r = n - 1; +// Binary search for the element in the (sorted, +// disjoint) array of intervals. + while (l <= r) { + final m = ((l + r) / 2).floor(); + final I = intervals[m]; + final a = I.a; + final b = I.b; + if (b < el) { + l = m + 1; + } else if (a > el) { + r = m - 1; + } else { + // el >= a && el <= b + return true; + } + } + return false; + } + + /// {@inheritDoc} */ + + bool get isNil { + return intervals == null || intervals.isEmpty; + } + + /// Returns the maximum value contained in the set if not isNil(). + /// + /// @return the maximum value contained in the set. + /// @throws RuntimeException if set is empty + int get maxElement { + if (isNil) { + throw StateError('set is empty'); + } + return intervals.last.b; + } + + /// Returns the minimum value contained in the set if not isNil(). + /// + /// @return the minimum value contained in the set. + /// @throws RuntimeException if set is empty + int get minElement { + if (isNil) { + throw StateError('set is empty'); + } + + return intervals.first.a; + } + + @override + int get hashCode { + var hash = MurmurHash.initialize(); + for (final I in intervals) { + hash = MurmurHash.update(hash, I.a); + hash = MurmurHash.update(hash, I.b); + } + + hash = MurmurHash.finish(hash, intervals.length * 2); + return hash; + } + + /// Are two IntervalSets equal? Because all intervals are sorted + /// and disjoint, equals is a simple linear walk over both lists + /// to make sure they are the same. Interval.equals() is used + /// by the List.equals() method to check the ranges. + + @override + bool operator ==(Object obj) { + if (obj == null || !(obj is IntervalSet)) { + return false; + } + IntervalSet other = obj; + return ListEquality().equals(intervals, other?.intervals); + } + + @override + String toString({bool elemAreChar = false, Vocabulary vocabulary}) { + if (intervals == null || intervals.isEmpty) { + return '{}'; + } + + final elemStr = intervals.map((Interval I) { + final buf = StringBuffer(); + final a = I.a; + final b = I.b; + if (a == b) { + if (vocabulary != null) { + buf.write(elementName(vocabulary, a)); + } else { + if (a == Token.EOF) { + buf.write(''); + } else if (elemAreChar) { + buf.write("'"); + buf.writeCharCode(a); + buf.write("'"); + } else { + buf.write(a); + } + } + } else { + if (vocabulary != null) { + for (var i = a; i <= b; i++) { + if (i > a) buf.write(', '); + buf.write(elementName(vocabulary, i)); + } + } else { + if (elemAreChar) { + buf.write("'"); + buf.writeCharCode(a); + buf.write("'..'"); + buf.writeCharCode(b); + buf.write("'"); + } else { + buf.write(a); + buf.write('..'); + buf.write(b); + } + } + } + return buf; + }).join(', '); + if (length > 1) { + return '{$elemStr}'; + } + return elemStr; + } + + String elementName(Vocabulary vocabulary, int a) { + if (a == Token.EOF) { + return ''; + } else if (a == Token.EPSILON) { + return ''; + } else { + return vocabulary.getDisplayName(a); + } + } + + int get length { + var n = 0; + final numIntervals = intervals.length; + if (numIntervals == 1) { + final firstInterval = intervals[0]; + return firstInterval.b - firstInterval.a + 1; + } + for (var i = 0; i < numIntervals; i++) { + final I = intervals[i]; + n += (I.b - I.a + 1); + } + return n; + } + + List toIntegerList() { + final values = List(length); + final n = intervals.length; + for (var i = 0; i < n; i++) { + final I = intervals[i]; + final a = I.a; + final b = I.b; + for (var v = a; v <= b; v++) { + values.add(v); + } + } + return values; + } + + List toList() { + final values = []; + final n = intervals.length; + for (var i = 0; i < n; i++) { + final I = intervals[i]; + final a = I.a; + final b = I.b; + for (var v = a; v <= b; v++) { + values.add(v); + } + } + return values; + } + + Set toSet() { + final s = {}; + for (final I in intervals) { + final a = I.a; + final b = I.b; + for (var v = a; v <= b; v++) { + s.add(v); + } + } + return s; + } + + /// Get the ith element of ordered set. Used only by RandomPhrase so + /// don't bother to implement if you're not doing that for a new + /// ANTLR code gen target. + int get(int i) { + final n = intervals.length; + var index = 0; + for (var j = 0; j < n; j++) { + final I = intervals[j]; + final a = I.a; + final b = I.b; + for (var v = a; v <= b; v++) { + if (index == i) { + return v; + } + index++; + } + } + return -1; + } + + void remove(int el) { + if (readonly) throw StateError("can't alter readonly IntervalSet"); + final n = intervals.length; + for (var i = 0; i < n; i++) { + final I = intervals[i]; + final a = I.a; + final b = I.b; + if (el < a) { + break; // list is sorted and el is before this interval; not here + } +// if whole interval x..x, rm + if (el == a && el == b) { + intervals.removeAt(i); + break; + } +// if on left edge x..b, adjust left + if (el == a) { + I.a++; + break; + } +// if on right edge a..x, adjust right + if (el == b) { + I.b--; + break; + } +// if in middle a..x..b, split interval + if (el > a && el < b) { + // found in this interval + final oldb = I.b; + I.b = el - 1; // [a..x-1] + addRange(el + 1, oldb); // add [x+1..b] + } + } + } + + bool isReadonly() { + return readonly; + } + + void setReadonly(bool readonly) { + if (this.readonly && !readonly) { + throw StateError("can't alter readonly IntervalSet"); + } + this.readonly = readonly; + } +} diff --git a/runtime/Dart/lib/src/lexer.dart b/runtime/Dart/lib/src/lexer.dart new file mode 100644 index 000000000..90dcc3660 --- /dev/null +++ b/runtime/Dart/lib/src/lexer.dart @@ -0,0 +1,343 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:developer'; + +import 'atn/atn.dart'; +import 'error/error.dart'; +import 'input_stream.dart'; +import 'interval_set.dart'; +import 'misc/pair.dart'; +import 'recognizer.dart'; +import 'token.dart'; +import 'token_factory.dart'; +import 'token_source.dart'; +import 'util/utils.dart'; + +abstract class Lexer extends Recognizer + implements TokenSource { + static final DEFAULT_MODE = 0; + static final MORE = -2; + static final SKIP = -3; + + static final DEFAULT_TOKEN_CHANNEL = Token.DEFAULT_CHANNEL; + static final HIDDEN = Token.HIDDEN_CHANNEL; + static final MIN_CHAR_VALUE = 0x0000; + static final MAX_CHAR_VALUE = 0x10FFFF; + + CharStream _input; + + Pair _tokenFactorySourcePair; + @override + TokenFactory tokenFactory = CommonTokenFactory.DEFAULT; + + /// The goal of all lexer rules/methods is to create a token object. + /// this is an instance variable as multiple rules may collaborate to + /// create a single token. nextToken will return this object after + /// matching lexer rule(s). If you subclass to allow multiple token + /// emissions, then set this to the last token to be matched or + /// something nonnull so that the auto token emit mechanism will not + /// emit another token. + Token _token; + + /// What character index in the stream did the current token start at? + /// Needed, for example, to get the text for current token. Set at + /// the start of nextToken. + int tokenStartCharIndex = -1; + + /// The line on which the first character of the token resides + int tokenStartLine = -1; + + /// The character position of first character within the line + int tokenStartCharPositionInLine = -1; + + /// Once we see EOF on char stream, next token will be EOF. + /// If you have DONE : EOF ; then you see DONE EOF. + bool _hitEOF = false; + + /// The channel number for the current token + int channel = Token.DEFAULT_CHANNEL; + + /// The token type for the current token + int type = Token.INVALID_TYPE; + + final List _modeStack = []; + int mode_ = Lexer.DEFAULT_MODE; + + /// You can set the text for the current token to override what is in + /// the input char buffer. Use setText() or can set this instance var. + String _text; + + Lexer(CharStream input) { + _input = input; + _tokenFactorySourcePair = Pair(this, input); + } + + void reset() { + // wack Lexer state variables + if (_input != null) { + _input.seek(0); // rewind the input + } + _token = null; + type = Token.INVALID_TYPE; + channel = Token.DEFAULT_CHANNEL; + tokenStartCharIndex = -1; + tokenStartCharPositionInLine = -1; + tokenStartLine = -1; + _text = null; + + _hitEOF = false; + mode_ = Lexer.DEFAULT_MODE; + _modeStack.clear(); + + interpreter.reset(); + } + + /// Return a token from this source; i.e., match a token on the char stream. + @override + Token nextToken() { + if (_input == null) { + throw StateError('nextToken requires a non-null input stream.'); + } + + // Mark start location in char stream so unbuffered streams are + // guaranteed at least have text of current token + final tokenStartMarker = _input.mark(); + try { + outer: + while (true) { + if (_hitEOF) { + emitEOF(); + return _token; + } + + _token = null; + channel = Token.DEFAULT_CHANNEL; + tokenStartCharIndex = _input.index; + tokenStartCharPositionInLine = interpreter.charPositionInLine; + tokenStartLine = interpreter.line; + _text = null; + do { + type = Token.INVALID_TYPE; +// System.out.println("nextToken line "+tokenStartLine+" at "+((char)input.LA(1))+ +// " in mode "+mode+ +// " at index "+input.index()); + int ttype; + try { + ttype = interpreter.match(_input, mode_); + } on LexerNoViableAltException catch (e) { + notifyListeners(e); // report error + recover(e); + ttype = SKIP; + } + if (_input.LA(1) == IntStream.EOF) { + _hitEOF = true; + } + if (type == Token.INVALID_TYPE) type = ttype; + if (type == SKIP) { + continue outer; + } + } while (type == MORE); + if (_token == null) emit(); + return _token; + } + } finally { + // make sure we release marker after match or + // unbuffered char stream will keep buffering + _input.release(tokenStartMarker); + } + } + + /// Instruct the lexer to skip creating a token for current lexer rule + /// and look for another token. nextToken() knows to keep looking when + /// a lexer rule finishes with token set to SKIP_TOKEN. Recall that + /// if token==null at end of any token rule, it creates one for you + /// and emits it. + void skip() { + type = Lexer.SKIP; + } + + void more() { + type = Lexer.MORE; + } + + void mode(int m) { + mode_ = m; + } + + void pushMode(int m) { + if (LexerATNSimulator.debug) { + log('pushMode $m'); + } + _modeStack.add(mode_); + mode(m); + } + + int popMode() { + if (_modeStack.isEmpty) throw StateError(''); + if (LexerATNSimulator.debug) log('popMode back to ${_modeStack.last}'); + mode(_modeStack.removeLast()); + return mode_; + } + + /// Set the char stream and reset the lexer + @override + set inputStream(IntStream input) { + _input = null; + _tokenFactorySourcePair = + Pair(this, _input); + reset(); + _input = input; + _tokenFactorySourcePair = + Pair(this, _input); + } + + @override + String get sourceName { + return _input.sourceName; + } + + @override + CharStream get inputStream { + return _input; + } + + /// By default does not support multiple emits per nextToken invocation + /// for efficiency reasons. Subclass and override this method, nextToken, + /// and getToken (to push tokens into a list and pull from that list + /// rather than a single variable as this implementation does). + void emitToken(Token token) { + //System.err.println("emit "+token); + _token = token; + } + + /// The standard method called to automatically emit a token at the + /// outermost lexical rule. The token object should point into the + /// char buffer start..stop. If there is a text override in 'text', + /// use that to set the token's text. Override this method to emit + /// custom Token objects or provide a new factory. + Token emit() { + final t = tokenFactory.create( + type, + _text, + _tokenFactorySourcePair, + channel, + tokenStartCharIndex, + charIndex - 1, + tokenStartLine, + tokenStartCharPositionInLine); + emitToken(t); + return t; + } + + Token emitEOF() { + final cpos = charPositionInLine; + final eof = tokenFactory.create(Token.EOF, null, _tokenFactorySourcePair, + Token.DEFAULT_CHANNEL, _input.index, _input.index - 1, line, cpos); + emitToken(eof); + return eof; + } + + @override + int get charPositionInLine { + return interpreter.charPositionInLine; + } + + @override + int get line { + return interpreter.line; + } + + set line(int line) { + interpreter.line = line; + } + + set charPositionInLine(int charPositionInLine) { + interpreter.charPositionInLine = charPositionInLine; + } + + /// What is the index of the current character of lookahead? + int get charIndex { + return _input.index; + } + + /// Return the text matched so far for the current token or any + /// text override. + String get text { + if (_text != null) { + return _text; + } + return interpreter.getText(_input); + } + + /// Set the complete text of this token; it wipes any previous + /// changes to the text. + set text(String text) { + _text = text; + } + + /// Override if emitting multiple tokens. + Token get token { + return _token; + } + + void setToken(Token _token) { + this._token = _token; + } + + List get channelNames => null; + + List get modeNames => null; + + /// Return a list of all Token objects in input char stream. + /// Forces load of all tokens. Does not include EOF token. + List get allTokens { + final tokens = []; + var t = nextToken(); + while (t.type != Token.EOF) { + tokens.add(t); + t = nextToken(); + } + return tokens; + } + + void notifyListeners(LexerNoViableAltException e) { + final text = + _input.getText(Interval.of(tokenStartCharIndex, _input.index)); + final msg = "token recognition error at: '" + getErrorDisplay(text) + "'"; + + final listener = errorListenerDispatch; + listener.syntaxError( + this, null, tokenStartLine, tokenStartCharPositionInLine, msg, e); + } + + String getErrorDisplay(String s) { + return escapeWhitespace(s); + } + + String getCharErrorDisplay(int c) { + final s = getErrorDisplay(String.fromCharCode(c)); + return "'$s'"; + } + + /// Lexers can normally match any char in it's vocabulary after matching + /// a token, so do the easy thing and just kill a character and hope + /// it all works out. You can instead use the rule invocation stack + /// to do sophisticated error recovery if you are in a fragment rule. + void recover(RecognitionException re) { + if (re is LexerNoViableAltException) { + if (_input.LA(1) != IntStream.EOF) { + // skip a char and try again + interpreter.consume(_input); + } + } else { + //System.out.println("consuming char "+(char)input.LA(1)+" during recovery"); + //re.printStackTrace(); + // TODO: Do we lose character or line position information? + _input.consume(); + } + } +} diff --git a/runtime/Dart/lib/src/ll1_analyzer.dart b/runtime/Dart/lib/src/ll1_analyzer.dart new file mode 100644 index 000000000..773fa8213 --- /dev/null +++ b/runtime/Dart/lib/src/ll1_analyzer.dart @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import './util/bit_set.dart'; +import 'atn/atn.dart'; +import 'interval_set.dart'; +import 'prediction_context.dart'; +import 'rule_context.dart'; +import 'token.dart'; +import 'util/bit_set.dart'; + +class LL1Analyzer { + /// Special value added to the lookahead sets to indicate that we hit + /// a predicate during analysis if {@code seeThruPreds==false}. + static final int HIT_PRED = Token.INVALID_TYPE; + + final ATN atn; + + LL1Analyzer(this.atn); + + /// Calculates the SLL(1) expected lookahead set for each outgoing transition + /// of an [ATNState]. The returned array has one element for each + /// outgoing transition in [s]. If the closure from transition + /// i leads to a semantic predicate before matching a symbol, the + /// element at index i of the result will be null. + /// + /// @param s the ATN state + /// @return the expected symbols for each outgoing transition of [s]. + List getDecisionLookahead(ATNState s) { +// System.out.println("LOOK("+s.stateNumber+")"); + if (s == null) { + return null; + } + + final look = List(s.numberOfTransitions); + for (var alt = 0; alt < s.numberOfTransitions; alt++) { + look[alt] = IntervalSet(); + final lookBusy = {}; + final seeThruPreds = false; // fail to get lookahead upon pred + _LOOK(s.transition(alt).target, null, PredictionContext.EMPTY, look[alt], + lookBusy, BitSet(), seeThruPreds, false); + // Wipe out lookahead for this alternative if we found nothing + // or we had a predicate when we !seeThruPreds + if (look[alt].length == 0 || look[alt].contains(HIT_PRED)) { + look[alt] = null; + } + } + return look; + } + + /// Compute set of tokens that can follow [s] in the ATN in the + /// specified [ctx]. + /// + ///

If [ctx] is null and the end of the rule containing + /// [s] is reached, {@link Token#EPSILON} is added to the result set. + /// If [ctx] is not null and the end of the outermost rule is + /// reached, {@link Token#EOF} is added to the result set.

+ /// + /// @param s the ATN state + /// @param stopState the ATN state to stop at. This can be a + /// [BlockEndState] to detect epsilon paths through a closure. + /// @param ctx the complete parser context, or null if the context + /// should be ignored + /// + /// @return The set of tokens that can follow [s] in the ATN in the + /// specified [ctx]. + + IntervalSet LOOK(ATNState s, RuleContext ctx, [ATNState stopState]) { + final r = IntervalSet(); + final seeThruPreds = true; // ignore preds; get all lookahead + final lookContext = + ctx != null ? PredictionContext.fromRuleContext(s.atn, ctx) : null; + _LOOK( + s, stopState, lookContext, r, {}, BitSet(), seeThruPreds, true); + return r; + } + + /// Compute set of tokens that can follow [s] in the ATN in the + /// specified [ctx]. + /// + ///

If [ctx] is null and [stopState] or the end of the + /// rule containing [s] is reached, {@link Token#EPSILON} is added to + /// the result set. If [ctx] is not null and [addEOF] is + /// [true] and [stopState] or the end of the outermost rule is + /// reached, {@link Token#EOF} is added to the result set.

+ /// + /// @param s the ATN state. + /// @param stopState the ATN state to stop at. This can be a + /// [BlockEndState] to detect epsilon paths through a closure. + /// @param ctx The outer context, or null if the outer context should + /// not be used. + /// @param look The result lookahead set. + /// @param lookBusy A set used for preventing epsilon closures in the ATN + /// from causing a stack overflow. Outside code should pass + /// {@code new HashSet} for this argument. + /// @param calledRuleStack A set used for preventing left recursion in the + /// ATN from causing a stack overflow. Outside code should pass + /// {@code new BitSet()} for this argument. + /// @param seeThruPreds [true] to true semantic predicates as + /// implicitly [true] and "see through them", otherwise [false] + /// to treat semantic predicates as opaque and add {@link #HIT_PRED} to the + /// result if one is encountered. + /// @param addEOF Add {@link Token#EOF} to the result if the end of the + /// outermost context is reached. This parameter has no effect if [ctx] + /// is null. + void _LOOK( + ATNState s, + ATNState stopState, + PredictionContext ctx, + IntervalSet look, + Set lookBusy, + BitSet calledRuleStack, + bool seeThruPreds, + bool addEOF) { +// System.out.println("_LOOK("+s.stateNumber+", ctx="+ctx); + final c = ATNConfig(s, 0, ctx); + if (!lookBusy.add(c)) return; + + if (s == stopState) { + if (ctx == null) { + look.addOne(Token.EPSILON); + return; + } else if (ctx.isEmpty && addEOF) { + look.addOne(Token.EOF); + return; + } + } + + if (s is RuleStopState) { + if (ctx == null) { + look.addOne(Token.EPSILON); + return; + } else if (ctx.isEmpty && addEOF) { + look.addOne(Token.EOF); + return; + } + + if (ctx != PredictionContext.EMPTY) { + // run thru all possible stack tops in ctx + final removed = calledRuleStack[s.ruleIndex]; + try { + calledRuleStack.clear(s.ruleIndex); + for (var i = 0; i < ctx.length; i++) { + final returnState = atn.states[ctx.getReturnState(i)]; +// System.out.println("popping back to "+retState); + _LOOK(returnState, stopState, ctx.getParent(i), look, lookBusy, + calledRuleStack, seeThruPreds, addEOF); + } + } finally { + if (removed) { + calledRuleStack.set(s.ruleIndex); + } + } + return; + } + } + + for (var i = 0; i < s.numberOfTransitions; i++) { + final t = s.transition(i); + if (t is RuleTransition) { + if (calledRuleStack[t.target.ruleIndex]) { + continue; + } + + PredictionContext newContext = + SingletonPredictionContext.create(ctx, t.followState.stateNumber); + + try { + calledRuleStack.set(t.target.ruleIndex); + _LOOK(t.target, stopState, newContext, look, lookBusy, + calledRuleStack, seeThruPreds, addEOF); + } finally { + calledRuleStack.clear(t.target.ruleIndex); + } + } else if (t is AbstractPredicateTransition) { + if (seeThruPreds) { + _LOOK(t.target, stopState, ctx, look, lookBusy, calledRuleStack, + seeThruPreds, addEOF); + } else { + look.addOne(HIT_PRED); + } + } else if (t.isEpsilon) { + _LOOK(t.target, stopState, ctx, look, lookBusy, calledRuleStack, + seeThruPreds, addEOF); + } else if (t is WildcardTransition) { + look.addAll( + IntervalSet.ofRange(Token.MIN_USER_TOKEN_TYPE, atn.maxTokenType)); + } else { +// System.out.println("adding "+ t); + var set = t.label; + if (set != null) { + if (t is NotSetTransition) { + set = set.complement(IntervalSet.ofRange( + Token.MIN_USER_TOKEN_TYPE, atn.maxTokenType)); + } + look.addAll(set); + } + } + } + } +} diff --git a/runtime/Dart/lib/src/misc/multi_map.dart b/runtime/Dart/lib/src/misc/multi_map.dart new file mode 100644 index 000000000..274eda939 --- /dev/null +++ b/runtime/Dart/lib/src/misc/multi_map.dart @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'package:collection/collection.dart'; + +import 'pair.dart'; + +class MultiMap extends DelegatingMap> { + MultiMap() : super({}); + + void put(K key, V value) { + var elementsForKey = this[key]; + if (elementsForKey == null) { + elementsForKey = []; + this[key] = elementsForKey; + } + elementsForKey.add(value); + } + + List> get pairs { + final pairs = >[]; + for (var key in keys) { + for (var value in this[key]) { + pairs.add(Pair(key, value)); + } + } + return pairs; + } +} diff --git a/runtime/Dart/lib/src/misc/pair.dart b/runtime/Dart/lib/src/misc/pair.dart new file mode 100644 index 000000000..6d3e0d551 --- /dev/null +++ b/runtime/Dart/lib/src/misc/pair.dart @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import '../util/murmur_hash.dart'; + +class Pair { + final A a; + final B b; + + const Pair(this.a, this.b); + + @override + bool operator ==(other) { + return identical(this, other) || other is Pair && a == other.a && b == other.b; + } + + @override + String toString() { + return '($a, $b)'; + } + + @override + int get hashCode { + MurmurHash.initialize(); + + var hash = MurmurHash.initialize(); + hash = MurmurHash.update(hash, a); + hash = MurmurHash.update(hash, b); + return MurmurHash.finish(hash, 2); + } +} diff --git a/runtime/Dart/lib/src/parser.dart b/runtime/Dart/lib/src/parser.dart new file mode 100644 index 000000000..bc9c7001d --- /dev/null +++ b/runtime/Dart/lib/src/parser.dart @@ -0,0 +1,777 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:io'; + +import 'atn/atn.dart'; +import 'error/error.dart'; +import 'input_stream.dart'; +import 'interval_set.dart'; +import 'lexer.dart'; +import 'parser_rule_context.dart'; +import 'recognizer.dart'; +import 'rule_context.dart'; +import 'token.dart'; +import 'token_factory.dart'; +import 'token_stream.dart'; +import 'tree/tree.dart'; + +/// This is all the parsing support code essentially; most of it is error recovery stuff. */ +abstract class Parser extends Recognizer { + /// This field maps from the serialized ATN string to the deserialized [ATN] with + /// bypass alternatives. + /// + /// @see ATNDeserializationOptions#isGenerateRuleBypassTransitions() + static final Map bypassAltsAtnCache = {}; + + /// The error handling strategy for the parser. The default value is a new + /// instance of [DefaultErrorStrategy]. + /// + /// @see #getErrorHandler + /// @see #setErrorHandler + + ErrorStrategy errorHandler = DefaultErrorStrategy(); + + /// The input stream. + /// + /// @see #getInputStream + /// @see #setInputStream + TokenStream _input; + + final List _precedenceStack = [0]; + + /// The [ParserRuleContext] object for the currently executing rule. + /// This is always non-null during the parsing process. + ParserRuleContext _ctx; + + /// Specifies whether or not the parser should construct a parse tree during + /// the parsing process. The default value is [true]. + /// + /// @see #getBuildParseTree + /// @see #setBuildParseTree + bool _buildParseTrees = true; + + /// When {@link #setTrace}{@code (true)} is called, a reference to the + /// [TraceListener] is stored here so it can be easily removed in a + /// later call to {@link #setTrace}{@code (false)}. The listener itself is + /// implemented as a parser listener so this field is not directly used by + /// other parser methods. + TraceListener _tracer; + + /// The list of [ParseTreeListener] listeners registered to receive + /// events during the parse. + /// + /// @see #addParseListener + List _parseListeners; + + /// The number of syntax errors reported during parsing. This value is + /// incremented each time {@link #notifyErrorListeners} is called. + int _syntaxErrors = 0; + + /// Indicates parser has match()ed EOF token. See {@link #exitRule()}. */ + bool matchedEOF = false; + + Parser(TokenStream input) { + inputStream = input; + } + + /// reset the parser's state */ + void reset() { + if (inputStream != null) inputStream.seek(0); + errorHandler.reset(this); + _ctx = null; + _syntaxErrors = 0; + matchedEOF = false; + setTrace(false); + _precedenceStack.clear(); + _precedenceStack.add(0); + if (interpreter != null) { + interpreter.reset(); + } + } + + /// Match current input symbol against [ttype]. If the symbol type + /// matches, {@link ANTLRErrorStrategy#reportMatch} and {@link #consume} are + /// called to complete the match process. + /// + ///

If the symbol type does not match, + /// {@link ANTLRErrorStrategy#recoverInline} is called on the current error + /// strategy to attempt recovery. If {@link #getBuildParseTree} is + /// [true] and the token index of the symbol returned by + /// {@link ANTLRErrorStrategy#recoverInline} is -1, the symbol is added to + /// the parse tree by calling {@link #createErrorNode(ParserRuleContext, Token)} then + /// {@link ParserRuleContext#addErrorNode(ErrorNode)}.

+ /// + /// @param ttype the token type to match + /// @return the matched symbol + /// @throws RecognitionException if the current input symbol did not match + /// [ttype] and the error strategy could not recover from the + /// mismatched symbol + Token match(int ttype) { + var t = currentToken; + if (t.type == ttype) { + if (ttype == Token.EOF) { + matchedEOF = true; + } + errorHandler.reportMatch(this); + consume(); + } else { + t = errorHandler.recoverInline(this); + if (_buildParseTrees && t.tokenIndex == -1) { + // we must have conjured up a new token during single token insertion + // if it's not the current symbol + _ctx.addErrorNode(createErrorNode(_ctx, t)); + } + } + return t; + } + + /// Match current input symbol as a wildcard. If the symbol type matches + /// (i.e. has a value greater than 0), {@link ANTLRErrorStrategy#reportMatch} + /// and {@link #consume} are called to complete the match process. + /// + ///

If the symbol type does not match, + /// {@link ANTLRErrorStrategy#recoverInline} is called on the current error + /// strategy to attempt recovery. If {@link #getBuildParseTree} is + /// [true] and the token index of the symbol returned by + /// {@link ANTLRErrorStrategy#recoverInline} is -1, the symbol is added to + /// the parse tree by calling {@link Parser#createErrorNode(ParserRuleContext, Token)}. then + /// {@link ParserRuleContext#addErrorNode(ErrorNode)}

+ /// + /// @return the matched symbol + /// @throws RecognitionException if the current input symbol did not match + /// a wildcard and the error strategy could not recover from the mismatched + /// symbol + Token matchWildcard() { + var t = currentToken; + if (t.type > 0) { + errorHandler.reportMatch(this); + consume(); + } else { + t = errorHandler.recoverInline(this); + if (_buildParseTrees && t.tokenIndex == -1) { + // we must have conjured up a new token during single token insertion + // if it's not the current symbol + _ctx.addErrorNode(createErrorNode(_ctx, t)); + } + } + + return t; + } + + /// Track the [ParserRuleContext] objects during the parse and hook + /// them up using the {@link ParserRuleContext#children} list so that it + /// forms a parse tree. The [ParserRuleContext] returned from the start + /// rule represents the root of the parse tree. + /// + ///

Note that if we are not building parse trees, rule contexts only point + /// upwards. When a rule exits, it returns the context but that gets garbage + /// collected if nobody holds a reference. It points upwards but nobody + /// points at it.

+ /// + ///

When we build parse trees, we are adding all of these contexts to + /// {@link ParserRuleContext#children} list. Contexts are then not candidates + /// for garbage collection.

+ set buildParseTree(bool buildParseTrees) { + _buildParseTrees = buildParseTrees; + } + + /// Gets whether or not a complete parse tree will be constructed while + /// parsing. This property is [true] for a newly constructed parser. + /// + /// @return [true] if a complete parse tree will be constructed while + /// parsing, otherwise [false] + bool get buildParseTree { + return _buildParseTrees; + } + + /// Trim the internal lists of the parse tree during parsing to conserve memory. + /// This property is set to [false] by default for a newly constructed parser. + /// + /// @param trimParseTrees [true] to trim the capacity of the {@link ParserRuleContext#children} + /// list to its size after a rule is parsed. + set trimParseTree(bool trimParseTrees) { + if (trimParseTrees) { + if (trimParseTree) return; + addParseListener(TrimToSizeListener.INSTANCE); + } else { + removeParseListener(TrimToSizeListener.INSTANCE); + } + } + + /// @return [true] if the {@link ParserRuleContext#children} list is trimmed + /// using the default {@link Parser.TrimToSizeListener} during the parse process. + bool get trimParseTree { + return parseListeners.contains(TrimToSizeListener.INSTANCE); + } + + List get parseListeners => _parseListeners; + + /// Registers [listener] to receive events during the parsing process. + /// + ///

To support output-preserving grammar transformations (including but not + /// limited to left-recursion removal, automated left-factoring, and + /// optimized code generation), calls to listener methods during the parse + /// may differ substantially from calls made by + /// {@link ParseTreeWalker#DEFAULT} used after the parse is complete. In + /// particular, rule entry and exit events may occur in a different order + /// during the parse than after the parser. In addition, calls to certain + /// rule entry methods may be omitted.

+ /// + ///

With the following specific exceptions, calls to listener events are + /// deterministic, i.e. for identical input the calls to listener + /// methods will be the same.

+ /// + ///
    + ///
  • Alterations to the grammar used to generate code may change the + /// behavior of the listener calls.
  • + ///
  • Alterations to the command line options passed to ANTLR 4 when + /// generating the parser may change the behavior of the listener calls.
  • + ///
  • Changing the version of the ANTLR Tool used to generate the parser + /// may change the behavior of the listener calls.
  • + ///
+ /// + /// @param listener the listener to add + /// + /// @throws NullPointerException if {@code} listener is null + void addParseListener(ParseTreeListener listener) { + if (listener == null) { + throw ArgumentError.notNull('listener'); + } + + _parseListeners ??= []; + + _parseListeners.add(listener); + } + + /// Remove [listener] from the list of parse listeners. + /// + ///

If [listener] is null or has not been added as a parse + /// listener, this method does nothing.

+ /// + /// @see #addParseListener + /// + /// @param listener the listener to remove + void removeParseListener(ParseTreeListener listener) { + if (_parseListeners != null) { + if (_parseListeners.remove(listener)) { + if (_parseListeners.isEmpty) { + _parseListeners = null; + } + } + } + } + + /// Remove all parse listeners. + /// + /// @see #addParseListener + void removeParseListeners() { + _parseListeners = null; + } + + /// Notify any parse listeners of an enter rule event. + /// + /// @see #addParseListener + void triggerEnterRuleEvent() { + for (var listener in _parseListeners) { + listener.enterEveryRule(_ctx); + _ctx.enterRule(listener); + } + } + + /// Notify any parse listeners of an exit rule event. + /// + /// @see #addParseListener + void triggerExitRuleEvent() { + // reverse order walk of listeners + for (var i = _parseListeners.length - 1; i >= 0; i--) { + final listener = _parseListeners[i]; + _ctx.exitRule(listener); + listener.exitEveryRule(_ctx); + } + } + + /// Gets the number of syntax errors reported during parsing. This value is + /// incremented each time {@link #notifyErrorListeners} is called. + /// + /// @see #notifyErrorListeners + int get numberOfSyntaxErrors { + return _syntaxErrors; + } + + @override + TokenFactory get tokenFactory { + return _input.tokenSource.tokenFactory; + } + + /// Tell our token source and error strategy about a new way to create tokens. */ + @override + set tokenFactory(TokenFactory factory) { + _input.tokenSource.tokenFactory = factory; + } + + /// The ATN with bypass alternatives is expensive to create so we create it + /// lazily. + /// + /// @throws UnsupportedOperationException if the current parser does not + /// implement the {@link #getSerializedATN()} method. + ATN get ATNWithBypassAlts { + final serializedAtn = serializedATN; + if (serializedAtn == null) { + throw UnsupportedError( + 'The current parser does not support an ATN with bypass alternatives.'); + } + + var result = bypassAltsAtnCache[serializedAtn]; + if (result == null) { + final deserializationOptions = + ATNDeserializationOptions(); + deserializationOptions.setGenerateRuleBypassTransitions(true); + result = ATNDeserializer(deserializationOptions) + .deserialize(serializedAtn.codeUnits); + bypassAltsAtnCache[serializedAtn] = result; + } + + return result; + } + + /// The preferred method of getting a tree pattern. For example, here's a + /// sample use: + /// + ///
+  /// ParseTree t = parser.expr();
+  /// ParseTreePattern p = parser.compileParseTreePattern("<ID>+0", MyParser.RULE_expr);
+  /// ParseTreeMatch m = p.match(t);
+  /// String id = m.get("ID");
+  /// 
+ ParseTreePattern compileParseTreePattern(String pattern, int patternRuleIndex, + [Lexer lexer]) { + if (lexer == null) { + final tokenSource = tokenStream?.tokenSource; + if (tokenSource == null || !(tokenSource is Lexer)) { + throw UnsupportedError("Parser can't discover a lexer to use"); + } + lexer = tokenSource; + } + + final m = ParseTreePatternMatcher(lexer, this); + return m.compile(pattern, patternRuleIndex); + } + + @override + TokenStream get inputStream => tokenStream; + + @override + set inputStream(IntStream input) { + setTokenStream(input); + } + + TokenStream get tokenStream => _input; + + /// Set the token stream and reset the parser. */ + void setTokenStream(TokenStream input) { + _input = null; + reset(); + _input = input; + } + + /// Match needs to return the current input symbol, which gets put + /// into the label for the associated token ref; e.g., x=ID. + + Token get currentToken { + return _input.LT(1); + } + + void notifyErrorListeners(String msg, + [Token offendingToken, RecognitionException e]) { + offendingToken = offendingToken ?? currentToken; + _syntaxErrors++; + var line = -1; + var charPositionInLine = -1; + line = offendingToken.line; + charPositionInLine = offendingToken.charPositionInLine; + + final listener = errorListenerDispatch; + listener.syntaxError( + this, offendingToken, line, charPositionInLine, msg, e); + } + + /// Consume and return the {@linkplain #getCurrentToken current symbol}. + /// + ///

E.g., given the following input with [A] being the current + /// lookahead symbol, this function moves the cursor to [B] and returns + /// [A].

+ /// + ///
+  ///  A B
+  ///  ^
+  /// 
+ /// + /// If the parser is not in error recovery mode, the consumed symbol is added + /// to the parse tree using {@link ParserRuleContext#addChild}, and + /// {@link ParseTreeListener#visitTerminal} is called on any parse listeners. + /// If the parser is in error recovery mode, the consumed symbol is + /// added to the parse tree using {@link #createErrorNode(ParserRuleContext, Token)} then + /// {@link ParserRuleContext#addErrorNode(ErrorNode)} and + /// {@link ParseTreeListener#visitErrorNode} is called on any parse + /// listeners. + Token consume() { + final o = currentToken; + if (o.type != IntStream.EOF) { + inputStream.consume(); + } + final hasListener = _parseListeners != null && _parseListeners.isNotEmpty; + if (_buildParseTrees || hasListener) { + if (errorHandler.inErrorRecoveryMode(this)) { + final node = _ctx.addErrorNode(createErrorNode(_ctx, o)); + if (_parseListeners != null) { + for (var listener in _parseListeners) { + listener.visitErrorNode(node); + } + } + } else { + final node = _ctx.addChild(createTerminalNode(_ctx, o)); + if (_parseListeners != null) { + for (var listener in _parseListeners) { + listener.visitTerminal(node); + } + } + } + } + return o; + } + + /// How to create a token leaf node associated with a parent. + /// Typically, the terminal node to create is not a function of the parent. + /// + /// @since 4.7 + TerminalNode createTerminalNode(ParserRuleContext parent, Token t) { + return TerminalNodeImpl(t); + } + + /// How to create an error node, given a token, associated with a parent. + /// Typically, the error node to create is not a function of the parent. + /// + /// @since 4.7 + ErrorNode createErrorNode(ParserRuleContext parent, Token t) { + return ErrorNodeImpl(t); + } + + void addContextToParseTree() { + final parent = _ctx.parent; + // add current context to parent if we have a parent + if (parent != null) { + parent.addAnyChild(_ctx); + } + } + + /// Always called by generated parsers upon entry to a rule. Access field + /// {@link #_ctx} get the current context. + void enterRule(ParserRuleContext localctx, int state, int ruleIndex) { + this.state = state; + _ctx = localctx; + _ctx.start = _input.LT(1); + if (_buildParseTrees) addContextToParseTree(); + if (_parseListeners != null) triggerEnterRuleEvent(); + } + + void exitRule() { + if (matchedEOF) { + // if we have matched EOF, it cannot consume past EOF so we use LT(1) here + _ctx.stop = _input.LT(1); // LT(1) will be end of file + } else { + _ctx.stop = _input.LT(-1); // stop node is what we just matched + } + // trigger event on _ctx, before it reverts to parent + if (_parseListeners != null) triggerExitRuleEvent(); + state = _ctx.invokingState; + _ctx = _ctx.parent; + } + + void enterOuterAlt(ParserRuleContext localctx, int altNum) { + localctx.altNumber = altNum; + // if we have new localctx, make sure we replace existing ctx + // that is previous child of parse tree + if (_buildParseTrees && _ctx != localctx) { + final parent = _ctx.parent; + if (parent != null) { + parent.removeLastChild(); + parent.addAnyChild(localctx); + } + } + _ctx = localctx; + } + + /// Get the precedence level for the top-most precedence rule. + /// + /// @return The precedence level for the top-most precedence rule, or -1 if + /// the parser context is not nested within a precedence rule. + int get precedence { + if (_precedenceStack.isEmpty) { + return -1; + } + + return _precedenceStack.last; + } + + void enterRecursionRule( + ParserRuleContext localctx, int state, int ruleIndex, int precedence) { + this.state = state; + _precedenceStack.add(precedence); + _ctx = localctx; + _ctx.start = _input.LT(1); + if (_parseListeners != null) { + triggerEnterRuleEvent(); // simulates rule entry for left-recursive rules + } + } + + /// Like {@link #enterRule} but for recursive rules. + /// Make the current context the child of the incoming localctx. + void pushNewRecursionContext( + ParserRuleContext localctx, int state, int ruleIndex) { + final previous = _ctx; + previous.parent = localctx; + previous.invokingState = state; + previous.stop = _input.LT(-1); + + _ctx = localctx; + _ctx.start = previous.start; + if (_buildParseTrees) { + _ctx.addAnyChild(previous); + } + + if (_parseListeners != null) { + triggerEnterRuleEvent(); // simulates rule entry for left-recursive rules + } + } + + void unrollRecursionContexts(ParserRuleContext _parentctx) { + _precedenceStack.removeLast(); + _ctx.stop = _input.LT(-1); + final retctx = _ctx; // save current ctx (return value) + + // unroll so _ctx is as it was before call to recursive method + if (_parseListeners != null) { + while (_ctx != _parentctx) { + triggerExitRuleEvent(); + _ctx = _ctx.parent; + } + } else { + _ctx = _parentctx; + } + + // hook into tree + retctx.parent = _parentctx; + + if (_buildParseTrees && _parentctx != null) { + // add return ctx into invoking rule's tree + _parentctx.addAnyChild(retctx); + } + } + + ParserRuleContext getInvokingContext(int ruleIndex) { + var p = _ctx; + while (p != null) { + if (p.ruleIndex == ruleIndex) return p; + p = p.parent; + } + return null; + } + + ParserRuleContext get context { + return _ctx; + } + + set context(ParserRuleContext ctx) { + _ctx = ctx; + } + + @override + bool precpred(RuleContext localctx, int precedence) { + return precedence >= _precedenceStack.last; + } + + bool inContext(String context) { + // TODO: useful in parser? + return false; + } + + /// Checks whether or not [symbol] can follow the current state in the + /// ATN. The behavior of this method is equivalent to the following, but is + /// implemented such that the complete context-sensitive follow set does not + /// need to be explicitly constructed. + /// + ///
+  /// return expectedTokens.contains(symbol);
+  /// 
+ /// + /// @param symbol the symbol type to check + /// @return [true] if [symbol] can follow the current state in + /// the ATN, otherwise [false]. + bool isExpectedToken(int symbol) { +// return interpreter.atn.nextTokens(_ctx); + final atn = interpreter.atn; + var ctx = _ctx; + final s = atn.states[state]; + var following = atn.nextTokens(s); + if (following.contains(symbol)) { + return true; + } +// log("following "+s+"="+following); + if (!following.contains(Token.EPSILON)) return false; + + while (ctx != null && + ctx.invokingState >= 0 && + following.contains(Token.EPSILON)) { + final invokingState = atn.states[ctx.invokingState]; + RuleTransition rt = invokingState.transition(0); + following = atn.nextTokens(rt.followState); + if (following.contains(symbol)) { + return true; + } + + ctx = ctx.parent; + } + + if (following.contains(Token.EPSILON) && symbol == Token.EOF) { + return true; + } + + return false; + } + + bool isMatchedEOF() { + return matchedEOF; + } + + /// Computes the set of input symbols which could follow the current parser + /// state and context, as given by {@link #getState} and {@link #getContext}, + /// respectively. + /// + /// @see ATN#getExpectedTokens(int, RuleContext) + IntervalSet get expectedTokens { + return getATN().getExpectedTokens(state, context); + } + + IntervalSet get expectedTokensWithinCurrentRule { + final atn = interpreter.atn; + final s = atn.states[state]; + return atn.nextTokens(s); + } + + /// Get a rule's index (i.e., {@code RULE_ruleName} field) or -1 if not found. */ + int getRuleIndex(String ruleName) { + final ruleIndex = ruleIndexMap[ruleName]; + if (ruleIndex != null) return ruleIndex; + return -1; + } + + ParserRuleContext get ruleContext { + return _ctx; + } + + List get ruleInvocationStack => getRuleInvocationStack(); + + /// Return List<String> of the rule names in your parser instance + /// leading up to a call to the current rule. You could override if + /// you want more details such as the file/line info of where + /// in the ATN a rule is invoked. + /// + /// This is very useful for error messages. + List getRuleInvocationStack([RuleContext p]) { + p = p ?? _ctx; + final _ruleNames = ruleNames; + final stack = []; + while (p != null) { + // compute what follows who invoked us + final ruleIndex = p.ruleIndex; + if (ruleIndex < 0) { + stack.add('n/a'); + } else { + stack.add(_ruleNames[ruleIndex]); + } + p = p.parent; + } + return stack; + } + + /// For debugging and other purposes. */ + List get dfaStrings { + final s = []; + for (var d = 0; d < interpreter.decisionToDFA.length; d++) { + final dfa = interpreter.decisionToDFA[d]; + s.add(dfa.toString(vocabulary)); + } + return s; + } + + /// For debugging and other purposes. */ + void dumpDFA() { + var seenOne = false; + for (var d = 0; d < interpreter.decisionToDFA.length; d++) { + final dfa = interpreter.decisionToDFA[d]; + if (dfa.states.isNotEmpty) { + if (seenOne) print(''); + print('Decision ${dfa.decision}:'); + stdout.write(dfa.toString(vocabulary)); + seenOne = true; + } + } + } + + String get sourceName { + return _input.sourceName; + } + + @override + ParseInfo get parseInfo { + final interp = interpreter; + if (interp is ProfilingATNSimulator) { + return ParseInfo(interp); + } + return null; + } + + /// @since 4.3 + void setProfile(bool profile) { + final interp = interpreter; + final saveMode = interp.predictionMode; + if (profile) { + if (!(interp is ProfilingATNSimulator)) { + interpreter = ProfilingATNSimulator(this); + } + } else if (interp is ProfilingATNSimulator) { + final sim = ParserATNSimulator( + this, getATN(), interp.decisionToDFA, interp.sharedContextCache); + interpreter = sim; + } + interpreter.predictionMode = saveMode; + } + + /// During a parse is sometimes useful to listen in on the rule entry and exit + /// events as well as token matches. This is for quick and dirty debugging. + void setTrace(bool trace) { + if (!trace) { + removeParseListener(_tracer); + _tracer = null; + } else { + if (_tracer != null) { + removeParseListener(_tracer); + } else { + _tracer = TraceListener(this); + } + addParseListener(_tracer); + } + } + + /// Gets whether a [TraceListener] is registered as a parse listener + /// for the parser. + /// + /// @see #setTrace(bool) + bool isTrace() { + return _tracer != null; + } +} diff --git a/runtime/Dart/lib/src/parser_interpreter.dart b/runtime/Dart/lib/src/parser_interpreter.dart new file mode 100644 index 000000000..e14c547d5 --- /dev/null +++ b/runtime/Dart/lib/src/parser_interpreter.dart @@ -0,0 +1,393 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:collection'; + +import 'atn/atn.dart'; +import 'dfa/dfa.dart'; +import 'error/error.dart'; +import 'misc/pair.dart'; +import 'parser.dart'; +import 'parser_rule_context.dart'; +import 'token.dart'; +import 'token_stream.dart'; +import 'vocabulary.dart'; + +/// A parser simulator that mimics what ANTLR's generated +/// parser code does. A ParserATNSimulator is used to make +/// predictions via adaptivePredict but this class moves a pointer through the +/// ATN to simulate parsing. ParserATNSimulator just +/// makes us efficient rather than having to backtrack, for example. +/// +/// This properly creates parse trees even for left recursive rules. +/// +/// We rely on the left recursive rule invocation and special predicate +/// transitions to make left recursive rules work. +/// +/// See TestParserInterpreter for examples. +class ParserInterpreter extends Parser { + @override + final String grammarFileName; + final ATN atn; + + List decisionToDFA; // not shared like it is for generated parsers + final PredictionContextCache sharedContextCache = + PredictionContextCache(); + + @override + final List ruleNames; + + @override + final Vocabulary vocabulary; + + /// This stack corresponds to the _parentctx, _parentState pair of locals + /// that would exist on call stack frames with a recursive descent parser; + /// in the generated function for a left-recursive rule you'd see: + /// + /// EContext e(int _p) throws RecognitionException { + /// ParserRuleContext _parentctx = context; // Pair.a + /// int _parentState = state; // Pair.b + /// ... + /// } + /// + /// Those values are used to create new recursive rule invocation contexts + /// associated with left operand of an alt like "expr '*' expr". + final DoubleLinkedQueue> _parentContextStack = + DoubleLinkedQueue(); + + /// We need a map from (decision,inputIndex)->forced alt for computing ambiguous + /// parse trees. For now, we allow exactly one override. + int overrideDecision = -1; + int overrideDecisionInputIndex = -1; + int overrideDecisionAlt = -1; + bool overrideDecisionReached = + false; // latch and only override once; error might trigger infinite loop + + /// What is the current context when we override a decisions? This tells + /// us what the root of the parse tree is when using override + /// for an ambiguity/lookahead check. + InterpreterRuleContext overrideDecisionRoot; + + /// Return the root of the parse, which can be useful if the parser + /// bails out. You still can access the top node. Note that, + /// because of the way left recursive rules add children, it's possible + /// that the root will not have any children if the start rule immediately + /// called and left recursive rule that fails. + /// + /// @since 4.5.1 + InterpreterRuleContext rootContext; + + ParserInterpreter(this.grammarFileName, this.vocabulary, this.ruleNames, + this.atn, TokenStream input) + : super(input) { + // init decision DFA + final numberOfDecisions = atn.numberOfDecisions; + decisionToDFA = List(numberOfDecisions); + for (var i = 0; i < numberOfDecisions; i++) { + final decisionState = atn.getDecisionState(i); + decisionToDFA[i] = DFA(decisionState, i); + } + + // get atn simulator that knows how to do predictions + interpreter = + ParserATNSimulator(this, atn, decisionToDFA, sharedContextCache); + } + + @override + void reset() { + super.reset(); + overrideDecisionReached = false; + overrideDecisionRoot = null; + } + + @override + ATN getATN() { + return atn; + } + + /// Begin parsing at startRuleIndex */ + ParserRuleContext parse(int startRuleIndex) { + final startRuleStartState = atn.ruleToStartState[startRuleIndex]; + + rootContext = createInterpreterRuleContext( + null, ATNState.INVALID_STATE_NUMBER, startRuleIndex); + if (startRuleStartState.isLeftRecursiveRule) { + enterRecursionRule( + rootContext, startRuleStartState.stateNumber, startRuleIndex, 0); + } else { + enterRule(rootContext, startRuleStartState.stateNumber, startRuleIndex); + } + + while (true) { + final p = atnState; + switch (p.stateType) { + case StateType.RULE_STOP: + // pop; return from rule + if (context.isEmpty) { + if (startRuleStartState.isLeftRecursiveRule) { + final result = context; + final parentContext = + _parentContextStack.removeLast(); + unrollRecursionContexts(parentContext.a); + return result; + } else { + exitRule(); + return rootContext; + } + } + + visitRuleStopState(p); + break; + + default: + try { + visitState(p); + } on RecognitionException catch (e) { + state = atn.ruleToStopState[p.ruleIndex].stateNumber; + context.exception = e; + errorHandler.reportError(this, e); + recover(e); + } + + break; + } + } + } + + @override + void enterRecursionRule( + ParserRuleContext localctx, int state, int ruleIndex, int precedence) { + final pair = + Pair(context, localctx.invokingState); + _parentContextStack.add(pair); + super.enterRecursionRule(localctx, state, ruleIndex, precedence); + } + + ATNState get atnState { + return atn.states[state]; + } + + void visitState(ATNState p) { +// System.out.println("visitState "+p.stateNumber); + var predictedAlt = 1; + if (p is DecisionState) { + predictedAlt = visitDecisionState(p); + } + + final transition = p.transition(predictedAlt - 1); + switch (transition.type) { + case TransitionType.EPSILON: + if (p.stateType == StateType.STAR_LOOP_ENTRY && + (p as StarLoopEntryState).isPrecedenceDecision && + !(transition.target is LoopEndState)) { + // We are at the start of a left recursive rule's (...)* loop + // and we're not taking the exit branch of loop. + final localctx = createInterpreterRuleContext( + _parentContextStack.last.a, + _parentContextStack.last.b, + context.ruleIndex); + pushNewRecursionContext(localctx, + atn.ruleToStartState[p.ruleIndex].stateNumber, context.ruleIndex); + } + break; + + case TransitionType.ATOM: + match((transition as AtomTransition).atomLabel); + break; + + case TransitionType.RANGE: + case TransitionType.SET: + case TransitionType.NOT_SET: + if (!transition.matches( + inputStream.LA(1), Token.MIN_USER_TOKEN_TYPE, 65535)) { + recoverInline(); + } + matchWildcard(); + break; + + case TransitionType.WILDCARD: + matchWildcard(); + break; + + case TransitionType.RULE: + RuleStartState ruleStartState = transition.target; + final ruleIndex = ruleStartState.ruleIndex; + final newctx = + createInterpreterRuleContext(context, p.stateNumber, ruleIndex); + if (ruleStartState.isLeftRecursiveRule) { + enterRecursionRule(newctx, ruleStartState.stateNumber, ruleIndex, + (transition as RuleTransition).precedence); + } else { + enterRule(newctx, transition.target.stateNumber, ruleIndex); + } + break; + + case TransitionType.PREDICATE: + PredicateTransition predicateTransition = transition; + if (!sempred(context, predicateTransition.ruleIndex, + predicateTransition.predIndex)) { + throw FailedPredicateException(this); + } + + break; + + case TransitionType.ACTION: + ActionTransition actionTransition = transition; + action( + context, actionTransition.ruleIndex, actionTransition.actionIndex); + break; + + case TransitionType.PRECEDENCE: + if (!precpred(context, + (transition as PrecedencePredicateTransition).precedence)) { + throw FailedPredicateException(this, + 'precpred(context, ${(transition as PrecedencePredicateTransition).precedence})'); + } + break; + + default: + throw UnsupportedError('Unrecognized ATN transition type.'); + } + + state = transition.target.stateNumber; + } + + /// Method visitDecisionState() is called when the interpreter reaches + /// a decision state (instance of DecisionState). It gives an opportunity + /// for subclasses to track interesting things. + int visitDecisionState(DecisionState p) { + var predictedAlt = 1; + if (p.numberOfTransitions > 1) { + errorHandler.sync(this); + final decision = p.decision; + if (decision == overrideDecision && + inputStream.index == overrideDecisionInputIndex && + !overrideDecisionReached) { + predictedAlt = overrideDecisionAlt; + overrideDecisionReached = true; + } else { + predictedAlt = + interpreter.adaptivePredict(inputStream, decision, context); + } + } + return predictedAlt; + } + + /// Provide simple "factory" for InterpreterRuleContext's. + /// @since 4.5.1 + InterpreterRuleContext createInterpreterRuleContext( + ParserRuleContext parent, int invokingStateNumber, int ruleIndex) { + return InterpreterRuleContext(parent, invokingStateNumber, ruleIndex); + } + + void visitRuleStopState(ATNState p) { + final ruleStartState = atn.ruleToStartState[p.ruleIndex]; + if (ruleStartState.isLeftRecursiveRule) { + final parentContext = + _parentContextStack.removeLast(); + unrollRecursionContexts(parentContext.a); + state = parentContext.b; + } else { + exitRule(); + } + + RuleTransition ruleTransition = atn.states[state].transition(0); + state = ruleTransition.followState.stateNumber; + } + + /// Override this parser interpreters normal decision-making process + /// at a particular decision and input token index. Instead of + /// allowing the adaptive prediction mechanism to choose the + /// first alternative within a block that leads to a successful parse, + /// force it to take the alternative, 1..n for n alternatives. + /// + /// As an implementation limitation right now, you can only specify one + /// override. This is sufficient to allow construction of different + /// parse trees for ambiguous input. It means re-parsing the entire input + /// in general because you're never sure where an ambiguous sequence would + /// live in the various parse trees. For example, in one interpretation, + /// an ambiguous input sequence would be matched completely in expression + /// but in another it could match all the way back to the root. + /// + /// s : e '!'? ; + /// e : ID + /// | ID '!' + /// ; + /// + /// Here, x! can be matched as (s (e ID) !) or (s (e ID !)). In the first + /// case, the ambiguous sequence is fully contained only by the root. + /// In the second case, the ambiguous sequences fully contained within just + /// e, as in: (e ID !). + /// + /// Rather than trying to optimize this and make + /// some intelligent decisions for optimization purposes, I settled on + /// just re-parsing the whole input and then using + /// {link Trees#getRootOfSubtreeEnclosingRegion} to find the minimal + /// subtree that contains the ambiguous sequence. I originally tried to + /// record the call stack at the point the parser detected and ambiguity but + /// left recursive rules create a parse tree stack that does not reflect + /// the actual call stack. That impedance mismatch was enough to make + /// it it challenging to restart the parser at a deeply nested rule + /// invocation. + /// + /// Only parser interpreters can override decisions so as to avoid inserting + /// override checking code in the critical ALL(*) prediction execution path. + /// + /// @since 4.5.1 + void addDecisionOverride(int decision, int tokenIndex, int forcedAlt) { + overrideDecision = decision; + overrideDecisionInputIndex = tokenIndex; + overrideDecisionAlt = forcedAlt; + } + + /// Rely on the error handler for this parser but, if no tokens are consumed + /// to recover, add an error node. Otherwise, nothing is seen in the parse + /// tree. + void recover(RecognitionException e) { + final i = inputStream.index; + errorHandler.recover(this, e); + if (inputStream.index == i) { + // no input consumed, better add an error node + if (e is InputMismatchException) { + final ime = e; + final tok = e.offendingToken; + var expectedTokenType = Token.INVALID_TYPE; + if (!ime.expectedTokens.isNil) { + expectedTokenType = ime.expectedTokens.minElement; // get any element + } + final errToken = tokenFactory.create( + expectedTokenType, + tok.text, + Pair(tok.tokenSource, tok.tokenSource.inputStream), + Token.DEFAULT_CHANNEL, + -1, + -1, + // invalid start/stop + tok.line, + tok.charPositionInLine); + context.addErrorNode(createErrorNode(context, errToken)); + } else { + // NoViableAlt + final tok = e.offendingToken; + final errToken = tokenFactory.create( + Token.INVALID_TYPE, + tok.text, + Pair(tok.tokenSource, tok.tokenSource.inputStream), + Token.DEFAULT_CHANNEL, + -1, + -1, + // invalid start/stop + tok.line, + tok.charPositionInLine); + context.addErrorNode(createErrorNode(context, errToken)); + } + } + } + + Token recoverInline() { + return errorHandler.recoverInline(this); + } +} diff --git a/runtime/Dart/lib/src/parser_rule_context.dart b/runtime/Dart/lib/src/parser_rule_context.dart new file mode 100644 index 000000000..c6c594704 --- /dev/null +++ b/runtime/Dart/lib/src/parser_rule_context.dart @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'error/error.dart'; +import 'interval_set.dart'; +import 'parser.dart'; +import 'rule_context.dart'; +import 'token.dart'; +import 'tree/tree.dart'; + +/// A rule invocation record for parsing. +/// +/// Contains all of the information about the current rule not stored in the +/// RuleContext. It handles parse tree children list, Any ATN state +/// tracing, and the default values available for rule invocations: +/// start, stop, rule index, current alt number. +/// +/// Subclasses made for each rule and grammar track the parameters, +/// return values, locals, and labels specific to that rule. These +/// are the objects that are returned from rules. +/// +/// Note text is not an actual field of a rule return value; it is computed +/// from start and stop using the input stream's toString() method. I +/// could add a ctor to this so that we can pass in and store the input +/// stream, but I'm not sure we want to do that. It would seem to be undefined +/// to get the .text property anyway if the rule matches tokens from multiple +/// input streams. +/// +/// I do not use getters for fields of objects that are used simply to +/// group values such as this aggregate. The getters/setters are there to +/// satisfy the superclass interface. +class ParserRuleContext extends RuleContext { + /// If we are debugging or building a parse tree for a visitor, + /// we need to track all of the tokens and rule invocations associated + /// with this rule's context. This is empty for parsing w/o tree constr. + /// operation because we don't the need to track the details about + /// how we parse this rule. + List children; + + /// Get the initial/final token in this context. + /// Note that the range from start to stop is inclusive, so for rules that do not consume anything + /// (for example, zero length or error productions) this token may exceed stop. + Token start, stop; + + /// The exception that forced this rule to return. If the rule successfully + /// completed, this is null. + RecognitionException exception; + + ParserRuleContext([RuleContext parent, int invokingStateNumber]) + : super(parent: parent, invokingState: invokingStateNumber); + + /// COPY a ctx (I'm deliberately not using copy constructor) to avoid + /// confusion with creating node with parent. Does not copy children + /// (except error leaves). + /// + /// This is used in the generated parser code to flip a generic XContext + /// node for rule X to a YContext for alt label Y. In that sense, it is + /// not really a generic copy function. + /// + /// If we do an error sync() at start of a rule, we might add error nodes + /// to the generic XContext so this function must copy those nodes to + /// the YContext as well else they are lost! + void copyFrom(ParserRuleContext ctx) { + parent = ctx.parent; + invokingState = ctx.invokingState; + + start = ctx.start; + stop = ctx.stop; + + // copy any error nodes to alt label node + if (ctx.children != null) { + children = []; + // reset parent pointer for any error nodes + for (var child in ctx.children) { + if (child is ErrorNode) { + addChild(child); + } + } + } + } + + // Double dispatch methods for listeners + + void enterRule(ParseTreeListener listener) {} + + void exitRule(ParseTreeListener listener) {} + + /// Add a parse tree node to this as a child. Works for + /// internal and leaf nodes. Does not set parent link; + /// other add methods must do that. Other addChild methods + /// call this. + /// + /// We cannot set the parent pointer of the incoming node + /// because the existing interfaces do not have a setParent() + /// method and I don't want to break backward compatibility for this. + /// + /// @since 4.7 + T addAnyChild(T t) { + children ??= []; + children.add(t); + return t; + } + + /// Add a token leaf node child and force its parent to be this node. */ + TerminalNode addChild(TerminalNode t) { + t.parent = this; + return addAnyChild(t); + } + + /// Add an error node child and force its parent to be this node. + /// + /// @since 4.7 + ErrorNode addErrorNode(ErrorNode errorNode) { + errorNode.parent=this; + return addAnyChild(errorNode); + } + + /// Used by enterOuterAlt to toss out a RuleContext previously added as + /// we entered a rule. If we have # label, we will need to remove + /// generic ruleContext object. + void removeLastChild() { + if (children != null) { + children.removeLast(); + } + } + + // Override to make type more specific + @override + ParserRuleContext get parent { + return super.parent; + } + + @override + ParseTree getChild(int i) { + if (children == null || i < 0 || i >= children.length) { + return null; + } + + if (T == null) { + return children[i]; + } + var j = -1; // what element have we found with ctxType? + for (var o in children) { + if (o is T) { + j++; + if (j == i) { + return o; + } + } + } + return null; + } + + TerminalNode getToken(int ttype, int i) { + if (children == null || i < 0 || i >= children.length) { + return null; + } + + var j = -1; // what token with ttype have we found? + for (var o in children) { + if (o is TerminalNode) { + final tnode = o; + final symbol = tnode.symbol; + if (symbol.type == ttype) { + j++; + if (j == i) { + return tnode; + } + } + } + } + + return null; + } + + List getTokens(int ttype) { + if (children == null) { + return []; + } + + List tokens; + for (var o in children) { + if (o is TerminalNode) { + final tnode = o; + final symbol = tnode.symbol; + if (symbol.type == ttype) { + tokens ??= []; + tokens.add(tnode); + } + } + } + + if (tokens == null) { + return []; + } + + return tokens; + } + + T getRuleContext(int i) { + return getChild(i); + } + + List getRuleContexts() { + if (children == null) { + return []; + } + + List contexts; + for (var o in children) { + if (o is T) { + contexts ??= []; + + contexts.add(o); + } + } + + if (contexts == null) { + return []; + } + + return contexts; + } + + @override + int get childCount => children?.length ?? 0; + + @override + Interval get sourceInterval { + if (start == null) { + return Interval.INVALID; + } + if (stop == null || stop.tokenIndex < start.tokenIndex) { + return Interval(start.tokenIndex, start.tokenIndex - 1); // empty + } + return Interval(start.tokenIndex, stop.tokenIndex); + } + + /// Used for rule context info debugging during parse-time, not so much for ATN debugging */ + String toInfoString(Parser recognizer) { + final rules = recognizer.getRuleInvocationStack(this); + + return "ParserRuleContext${rules.reversed}{start=$start, stop=$stop}'"; + } + + static final EMPTY = ParserRuleContext(); +} + +/// This class extends [ParserRuleContext] by allowing the value of +/// {@link #getRuleIndex} to be explicitly set for the context. +/// +///

+/// [ParserRuleContext] does not include field storage for the rule index +/// since the context classes created by the code generator override the +/// {@link #getRuleIndex} method to return the correct value for that context. +/// Since the parser interpreter does not use the context classes generated for a +/// parser, this class (with slightly more memory overhead per node) is used to +/// provide equivalent functionality.

+class InterpreterRuleContext extends ParserRuleContext { + @override + int ruleIndex = -1; + + /// Constructs a new [InterpreterRuleContext] with the specified + /// parent, invoking state, and rule index. + /// + /// @param parent The parent context. + /// @param invokingStateNumber The invoking state number. + /// @param ruleIndex The rule index for the current context. + InterpreterRuleContext( + ParserRuleContext parent, int invokingStateNumber, this.ruleIndex) + : super(parent, invokingStateNumber); +} diff --git a/runtime/Dart/lib/src/prediction_context.dart b/runtime/Dart/lib/src/prediction_context.dart new file mode 100644 index 000000000..dcee2e385 --- /dev/null +++ b/runtime/Dart/lib/src/prediction_context.dart @@ -0,0 +1,877 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'package:collection/collection.dart'; + +import 'atn/atn.dart'; +import 'misc/pair.dart'; +import 'recognizer.dart'; +import 'rule_context.dart'; +import 'util/murmur_hash.dart'; + +abstract class PredictionContext { + /// Represents {@code $} in local context prediction, which means wildcard. + /// {@code *+x = *}. + static final EmptyPredictionContext EMPTY = EmptyPredictionContext(); + + /// Represents {@code $} in an array in full context mode, when {@code $} + /// doesn't mean wildcard: {@code $ + x = [$,x]}. Here, + /// {@code $} = {@link #EMPTY_RETURN_STATE}. + static final int EMPTY_RETURN_STATE = 0x7FFFFFFF; + + static final int INITIAL_HASH = 1; + + static int globalNodeCount = 0; + int id = globalNodeCount++; + + /// Stores the computed hash code of this [PredictionContext]. The hash + /// code is computed in parts to match the following reference algorithm. + /// + ///
+  ///   int referenceHashCode() {
+  ///      int hash = {@link MurmurHash#initialize MurmurHash.initialize}({@link #INITIAL_HASH});
+  ///
+  ///      for (int i = 0; i < {@link #size()}; i++) {
+  ///          hash = {@link MurmurHash#update MurmurHash.update}(hash, {@link #getParent getParent}(i));
+  ///      }
+  ///
+  ///      for (int i = 0; i < {@link #size()}; i++) {
+  ///          hash = {@link MurmurHash#update MurmurHash.update}(hash, {@link #getReturnState getReturnState}(i));
+  ///      }
+  ///
+  ///      hash = {@link MurmurHash#finish MurmurHash.finish}(hash, 2 * {@link #size()});
+  ///      return hash;
+  ///  }
+  /// 
+ final int cachedHashCode; + + PredictionContext(this.cachedHashCode); + + /// Convert a [RuleContext] tree to a [PredictionContext] graph. + /// Return {@link #EMPTY} if [outerContext] is empty or null. + static PredictionContext fromRuleContext(ATN atn, RuleContext outerContext) { + outerContext ??= RuleContext.EMPTY; + + // if we are in RuleContext of start rule, s, then PredictionContext + // is EMPTY. Nobody called us. (if we are empty, return empty) + if (outerContext.parent == null || outerContext == RuleContext.EMPTY) { + return PredictionContext.EMPTY; + } + + // If we have a parent, convert it to a PredictionContext graph + PredictionContext parent = EMPTY; + parent = PredictionContext.fromRuleContext(atn, outerContext.parent); + + final state = atn.states[outerContext.invokingState]; + RuleTransition transition = state.transition(0); + return SingletonPredictionContext.create( + parent, transition.followState.stateNumber); + } + + int get length; + + PredictionContext getParent(int index); + + int getReturnState(int index); + + /// This means only the {@link #EMPTY} (wildcard? not sure) context is in set. */ + bool get isEmpty { + return this == EMPTY; + } + + bool hasEmptyPath() { + // since EMPTY_RETURN_STATE can only appear in the last position, we check last one + return getReturnState(length - 1) == EMPTY_RETURN_STATE; + } + + @override + int get hashCode { + return cachedHashCode; + } + + @override + bool operator ==(Object obj); + + static int calculateEmptyHashCode() { + var hash = MurmurHash.initialize(INITIAL_HASH); + hash = MurmurHash.finish(hash, 0); + return hash; + } + + static int calculateHashCode( + List parents, List returnStates) { + var hash = MurmurHash.initialize(INITIAL_HASH); + + for (var parent in parents) { + hash = MurmurHash.update(hash, parent); + } + + for (var returnState in returnStates) { + hash = MurmurHash.update(hash, returnState); + } + + hash = MurmurHash.finish(hash, 2 * parents.length); + return hash; + } + + // dispatch + static PredictionContext merge( + PredictionContext a, + PredictionContext b, + bool rootIsWildcard, + Map, PredictionContext> + mergeCache) { + assert(a != null && b != null); // must be empty context, never null + + // share same graph if both same + if (a == b || a == b) return a; + + if (a is SingletonPredictionContext && b is SingletonPredictionContext) { + return mergeSingletons(a, b, rootIsWildcard, mergeCache); + } + + // At least one of a or b is array + // If one is $ and rootIsWildcard, return $ as * wildcard + if (rootIsWildcard) { + if (a is EmptyPredictionContext) return a; + if (b is EmptyPredictionContext) return b; + } + + // convert singleton so both are arrays to normalize + if (a is SingletonPredictionContext) { + a = ArrayPredictionContext.of(a); + } + if (b is SingletonPredictionContext) { + b = ArrayPredictionContext.of(b); + } + return mergeArrays(a, b, rootIsWildcard, mergeCache); + } + + /// Merge two [SingletonPredictionContext] instances. + /// + ///

Stack tops equal, parents merge is same; return left graph.
+ ///

+ /// + ///

Same stack top, parents differ; merge parents giving array node, then + /// remainders of those graphs. A new root node is created to point to the + /// merged parents.
+ ///

+ /// + ///

Different stack tops pointing to same parent. Make array node for the + /// root where both element in the root point to the same (original) + /// parent.
+ ///

+ /// + ///

Different stack tops pointing to different parents. Make array node for + /// the root where each element points to the corresponding original + /// parent.
+ ///

+ /// + /// @param a the first [SingletonPredictionContext] + /// @param b the second [SingletonPredictionContext] + /// @param rootIsWildcard [true] if this is a local-context merge, + /// otherwise false to indicate a full-context merge + /// @param mergeCache + static PredictionContext mergeSingletons( + SingletonPredictionContext a, + SingletonPredictionContext b, + bool rootIsWildcard, + Map, PredictionContext> + mergeCache) { + if (mergeCache != null) { + var previous = mergeCache[Pair(a, b)]; + if (previous != null) return previous; + previous = mergeCache[Pair(b, a)]; + if (previous != null) return previous; + } + + final rootMerge = mergeRoot(a, b, rootIsWildcard); + if (rootMerge != null) { + if (mergeCache != null) mergeCache[Pair(a, b)] = rootMerge; + return rootMerge; + } + + if (a.returnState == b.returnState) { + // a == b + final parent = + merge(a.parent, b.parent, rootIsWildcard, mergeCache); + // if parent is same as existing a or b parent or reduced to a parent, return it + if (parent == a.parent) return a; // ax + bx = ax, if a=b + if (parent == b.parent) return b; // ax + bx = bx, if a=b + // else: ax + ay = a'[x,y] + // merge parents x and y, giving array node with x,y then remainders + // of those graphs. dup a, a' points at merged array + // new joined parent so create new singleton pointing to it, a' + PredictionContext a_ = + SingletonPredictionContext.create(parent, a.returnState); + if (mergeCache != null) mergeCache[Pair(a, b)] = a_; + return a_; + } else { + // a != b payloads differ + // see if we can collapse parents due to $+x parents if local ctx + PredictionContext singleParent; + if (a == b || (a.parent != null && a.parent == b.parent)) { + // ax + bx = [a,b]x + singleParent = a.parent; + } + if (singleParent != null) { + // parents are same + // sort payloads and use same parent + final payloads = [a.returnState, b.returnState]; + if (a.returnState > b.returnState) { + payloads[0] = b.returnState; + payloads[1] = a.returnState; + } + final parents = [singleParent, singleParent]; + PredictionContext a_ = ArrayPredictionContext(parents, payloads); + if (mergeCache != null) mergeCache[Pair(a, b)] = a_; + return a_; + } + // parents differ and can't merge them. Just pack together + // into array; can't merge. + // ax + by = [ax,by] + final payloads = [a.returnState, b.returnState]; + var parents = [a.parent, b.parent]; + if (a.returnState > b.returnState) { + // sort by payload + payloads[0] = b.returnState; + payloads[1] = a.returnState; + parents = [b.parent, a.parent]; + } + PredictionContext a_ = ArrayPredictionContext(parents, payloads); + if (mergeCache != null) mergeCache[Pair(a, b)] = a_; + return a_; + } + } + + /// Handle case where at least one of [a] or [b] is + /// {@link #EMPTY}. In the following diagrams, the symbol {@code $} is used + /// to represent {@link #EMPTY}. + /// + ///

Local-Context Merges

+ /// + ///

These local-context merge operations are used when [rootIsWildcard] + /// is true.

+ /// + ///

{@link #EMPTY} is superset of any graph; return {@link #EMPTY}.
+ ///

+ /// + ///

{@link #EMPTY} and anything is {@code #EMPTY}, so merged parent is + /// {@code #EMPTY}; return left graph.
+ ///

+ /// + ///

Special case of last merge if local context.
+ ///

+ /// + ///

Full-Context Merges

+ /// + ///

These full-context merge operations are used when [rootIsWildcard] + /// is false.

+ /// + ///

+ /// + ///

Must keep all contexts; {@link #EMPTY} in array is a special value (and + /// null parent).
+ ///

+ /// + ///

+ /// + /// @param a the first [SingletonPredictionContext] + /// @param b the second [SingletonPredictionContext] + /// @param rootIsWildcard [true] if this is a local-context merge, + /// otherwise false to indicate a full-context merge + static PredictionContext mergeRoot(SingletonPredictionContext a, + SingletonPredictionContext b, bool rootIsWildcard) { + if (rootIsWildcard) { + if (a == EMPTY) return EMPTY; // * + b = * + if (b == EMPTY) return EMPTY; // a + * = * + } else { + if (a == EMPTY && b == EMPTY) return EMPTY; // $ + $ = $ + if (a == EMPTY) { + // $ + x = [x,$] + final payloads = [b.returnState, EMPTY_RETURN_STATE]; + final parents = [b.parent, null]; + PredictionContext joined = + ArrayPredictionContext(parents, payloads); + return joined; + } + if (b == EMPTY) { + // x + $ = [x,$] ($ is always last if present) + final payloads = [a.returnState, EMPTY_RETURN_STATE]; + final parents = [a.parent, null]; + PredictionContext joined = + ArrayPredictionContext(parents, payloads); + return joined; + } + } + return null; + } + + /// Merge two [ArrayPredictionContext] instances. + /// + ///

Different tops, different parents.
+ ///

+ /// + ///

Shared top, same parents.
+ ///

+ /// + ///

Shared top, different parents.
+ ///

+ /// + ///

Shared top, all shared parents.
+ ///

+ /// + ///

Equal tops, merge parents and reduce top to + /// [SingletonPredictionContext].
+ ///

+ static PredictionContext mergeArrays( + ArrayPredictionContext a, + ArrayPredictionContext b, + bool rootIsWildcard, + Map, PredictionContext> + mergeCache) { + if (mergeCache != null) { + var previous = mergeCache[Pair(a, b)]; + if (previous != null) return previous; + previous = mergeCache[Pair(b, a)]; + if (previous != null) return previous; + } + + // merge sorted payloads a + b => M + var i = 0; // walks a + var j = 0; // walks b + var k = 0; // walks target M array + + var mergedReturnStates = List( + a.returnStates.length + b.returnStates.length); // TODO Will it grow? + var mergedParents = List( + a.returnStates.length + b.returnStates.length); // TODO Will it grow? + // walk and merge to yield mergedParents, mergedReturnStates + while (i < a.returnStates.length && j < b.returnStates.length) { + final a_parent = a.parents[i]; + final b_parent = b.parents[j]; + if (a.returnStates[i] == b.returnStates[j]) { + // same payload (stack tops are equal), must yield merged singleton + final payload = a.returnStates[i]; + // $+$ = $ + final both$ = payload == EMPTY_RETURN_STATE && + a_parent == null && + b_parent == null; + final ax_ax = (a_parent != null && b_parent != null) && + a_parent == b_parent; // ax+ax -> ax + if (both$ || ax_ax) { + mergedParents[k] = a_parent; // choose left + mergedReturnStates[k] = payload; + } else { + // ax+ay -> a'[x,y] + final mergedParent = + merge(a_parent, b_parent, rootIsWildcard, mergeCache); + mergedParents[k] = mergedParent; + mergedReturnStates[k] = payload; + } + i++; // hop over left one as usual + j++; // but also skip one in right side since we merge + } else if (a.returnStates[i] < b.returnStates[j]) { + // copy a[i] to M + mergedParents[k] = a_parent; + mergedReturnStates[k] = a.returnStates[i]; + i++; + } else { + // b > a, copy b[j] to M + mergedParents[k] = b_parent; + mergedReturnStates[k] = b.returnStates[j]; + j++; + } + k++; + } + + // copy over any payloads remaining in either array + if (i < a.returnStates.length) { + for (var p = i; p < a.returnStates.length; p++) { + mergedParents[k] = a.parents[p]; + mergedReturnStates[k] = a.returnStates[p]; + k++; + } + } else { + for (var p = j; p < b.returnStates.length; p++) { + mergedParents[k] = b.parents[p]; + mergedReturnStates[k] = b.returnStates[p]; + k++; + } + } + + // trim merged if we combined a few that had same stack tops + if (k < mergedParents.length) { + // write index < last position; trim + if (k == 1) { + // for just one merged element, return singleton top + PredictionContext a_ = SingletonPredictionContext.create( + mergedParents[0], mergedReturnStates[0]); + if (mergeCache != null) mergeCache[Pair(a, b)] = a_; + return a_; + } + mergedParents = List(k)..setRange(0, k, mergedParents); + mergedReturnStates = List(k)..setRange(0, k, mergedReturnStates); + } + + PredictionContext M = + ArrayPredictionContext(mergedParents, mergedReturnStates); + + // if we created same array as a or b, return that instead + // TODO: track whether this is possible above during merge sort for speed + if (M == a) { + if (mergeCache != null) mergeCache[Pair(a, b)] = a; + return a; + } + if (M == b) { + if (mergeCache != null) mergeCache[Pair(a, b)] = b; + return b; + } + + combineCommonParents(mergedParents); + + if (mergeCache != null) mergeCache[Pair(a, b)] = M; + return M; + } + + /// Make pass over all M [parents]; merge any {@code equals()} + /// ones. + static void combineCommonParents(List parents) { + final uniqueParents = + {}; + + for (var p = 0; p < parents.length; p++) { + final parent = parents[p]; + if (!uniqueParents.containsKey(parent)) { + // don't replace + uniqueParents[parent] = parent; + } + } + + for (var p = 0; p < parents.length; p++) { + parents[p] = uniqueParents[parents[p]]; + } + } + + static String toDOTString(PredictionContext context) { + if (context == null) return ''; + final buf = StringBuffer(); + buf.write('digraph G {\n'); + buf.write('rankdir=LR;\n'); + + final nodes = getAllContextNodes(context); + nodes.sort((PredictionContext o1, PredictionContext o2) { + return o1.id - o2.id; + }); + + for (var current in nodes) { + if (current is SingletonPredictionContext) { + final s = current.id.toString(); + buf.write(' s'); + buf.write(s); + var returnState = current.getReturnState(0).toString(); + if (current is EmptyPredictionContext) returnState = r'$'; + buf.write(' [label=\"'); + buf.write(returnState); + buf.write('\"];\n'); + continue; + } + ArrayPredictionContext arr = current; + buf.write(' s'); + buf.write(arr.id); + buf.write(' [shape=box, label=\"'); + buf.write('['); + var first = true; + for (var inv in arr.returnStates) { + if (!first) buf.write(', '); + if (inv == EMPTY_RETURN_STATE) { + buf.write(r'$'); + } else { + buf.write(inv); + } + first = false; + } + buf.write(']'); + buf.write('\"];\n'); + } + + for (var current in nodes) { + if (current == EMPTY) continue; + for (var i = 0; i < current.length; i++) { + if (current.getParent(i) == null) continue; + final s = current.id.toString(); + buf.write(' s'); + buf.write(s); + buf.write('->'); + buf.write('s'); + buf.write(current.getParent(i).id); + if (current.length > 1) { + buf.write(' [label=\"parent[$i]\"];\n'); + } else { + buf.write(';\n'); + } + } + } + + buf.write('}\n'); + return buf.toString(); + } + + // From Sam + static PredictionContext getCachedContext( + PredictionContext context, + PredictionContextCache contextCache, + Map visited) { + if (context.isEmpty) { + return context; + } + + var existing = visited[context]; + if (existing != null) { + return existing; + } + + existing = contextCache[context]; + if (existing != null) { + visited[context] = existing; + return existing; + } + + var changed = false; + var parents = List(context.length); + for (var i = 0; i < parents.length; i++) { + final parent = + getCachedContext(context.getParent(i), contextCache, visited); + if (changed || parent != context.getParent(i)) { + if (!changed) { + parents = List(context.length); + for (var j = 0; j < context.length; j++) { + parents[j] = context.getParent(j); + } + + changed = true; + } + + parents[i] = parent; + } + } + + if (!changed) { + contextCache.add(context); + visited[context] = context; + return context; + } + + PredictionContext updated; + if (parents.isEmpty) { + updated = EMPTY; + } else if (parents.length == 1) { + updated = SingletonPredictionContext.create( + parents[0], context.getReturnState(0)); + } else { + ArrayPredictionContext arrayPredictionContext = context; + updated = ArrayPredictionContext( + parents, arrayPredictionContext.returnStates); + } + + contextCache.add(updated); + visited[updated] = updated; + visited[context] = updated; + + return updated; + } + +// // extra structures, but cut/paste/morphed works, so leave it. +// // seems to do a breadth-first walk +// static List getAllNodes(PredictionContext context) { +// Map visited = +// new IdentityHashMap(); +// Deque workList = new ArrayDeque(); +// workList.add(context); +// visited.put(context, context); +// List nodes = new ArrayList(); +// while (!workList.isEmpty) { +// PredictionContext current = workList.pop(); +// nodes.add(current); +// for (int i = 0; i < current.length; i++) { +// PredictionContext parent = current.getParent(i); +// if ( parent!=null && visited.put(parent, parent) == null) { +// workList.push(parent); +// } +// } +// } +// return nodes; +// } + + // ter's recursive version of Sam's getAllNodes() + static List getAllContextNodes(PredictionContext context) { + final nodes = []; + final visited = + {}; + getAllContextNodes_(context, nodes, visited); + return nodes; + } + + static void getAllContextNodes_( + PredictionContext context, + List nodes, + Map visited) { + if (context == null || visited.containsKey(context)) return; + visited[context] = context; + nodes.add(context); + for (var i = 0; i < context.length; i++) { + getAllContextNodes_(context.getParent(i), nodes, visited); + } + } + + // FROM SAM + List toStrings( + Recognizer recognizer, PredictionContext stop, int currentState) { + final result = []; + + outer: + for (var perm = 0;; perm++) { + var offset = 0; + var last = true; + var p = this; + var stateNumber = currentState; + final localBuffer = StringBuffer(); + localBuffer.write('['); + while (!p.isEmpty && p != stop) { + var index = 0; + if (p.length > 0) { + var bits = 1; + while ((1 << bits) < p.length) { + bits++; + } + + final mask = (1 << bits) - 1; + index = (perm >> offset) & mask; + last &= index >= p.length - 1; + if (index >= p.length) { + continue outer; + } + offset += bits; + } + + if (recognizer != null) { + if (localBuffer.length > 1) { + // first char is '[', if more than that this isn't the first rule + localBuffer.write(' '); + } + + final atn = recognizer.getATN(); + final s = atn.states[stateNumber]; + final ruleName = recognizer.ruleNames[s.ruleIndex]; + localBuffer.write(ruleName); + } else if (p.getReturnState(index) != EMPTY_RETURN_STATE) { + if (!p.isEmpty) { + if (localBuffer.length > 1) { + // first char is '[', if more than that this isn't the first rule + localBuffer.write(' '); + } + + localBuffer.write(p.getReturnState(index)); + } + } + stateNumber = p.getReturnState(index); + p = p.getParent(index); + } + localBuffer.write(']'); + result.add(localBuffer.toString()); + + if (last) { + break; + } + } + + return result; + } +} + +class SingletonPredictionContext extends PredictionContext { + final PredictionContext parent; + final int returnState; + + SingletonPredictionContext(this.parent, this.returnState) + : super(parent != null + ? PredictionContext.calculateHashCode([parent], [returnState]) + : PredictionContext.calculateEmptyHashCode()) { + assert(returnState != ATNState.INVALID_STATE_NUMBER); + } + + static SingletonPredictionContext create( + PredictionContext parent, int returnState) { + if (returnState == PredictionContext.EMPTY_RETURN_STATE && parent == null) { + // someone can pass in the bits of an array ctx that mean $ + return PredictionContext.EMPTY; + } + return SingletonPredictionContext(parent, returnState); + } + + @override + int get length { + return 1; + } + + @override + PredictionContext getParent(int index) { + assert(index == 0); + return parent; + } + + @override + int getReturnState(int index) { + assert(index == 0); + return returnState; + } + + @override + bool operator ==(Object o) { + if (identical(this, o)) { + return true; + } else if (o is SingletonPredictionContext) { + if (hashCode != o.hashCode) { + return false; // can't be same if hash is different + } + + final s = o; + return returnState == s.returnState && + (parent != null && parent == s.parent); + } + return false; + } + + @override + String toString() { + final up = parent != null ? parent.toString() : ''; + if (up.isEmpty) { + if (returnState == PredictionContext.EMPTY_RETURN_STATE) { + return r'$'; + } + return returnState.toString(); + } + return '$returnState $up'; + } +} + +class EmptyPredictionContext extends SingletonPredictionContext { + EmptyPredictionContext() : super(null, PredictionContext.EMPTY_RETURN_STATE); + + @override + bool get isEmpty { + return true; + } + + @override + int get length { + return 1; + } + + @override + PredictionContext getParent(int index) { + return null; + } + + @override + int getReturnState(int index) { + return returnState; + } + + @override + String toString() { + return r'$'; + } +} + +class ArrayPredictionContext extends PredictionContext { + /// Parent can be null only if full ctx mode and we make an array + /// from {@link #EMPTY} and non-empty. We merge {@link #EMPTY} by using null parent and + /// returnState == {@link #EMPTY_RETURN_STATE}. + List parents; + + /// Sorted for merge, no duplicates; if present, + /// {@link #EMPTY_RETURN_STATE} is always last. + List returnStates; + + ArrayPredictionContext.of(SingletonPredictionContext a) + : this([a.parent], [a.returnState]); + + ArrayPredictionContext( + List parents, List returnStates) + : super(PredictionContext.calculateHashCode(parents, returnStates)) { + assert(parents != null && parents.isNotEmpty); + assert(returnStates != null && returnStates.isNotEmpty); +// System.err.println("CREATE ARRAY: "+Arrays.toString(parents)+", "+Arrays.toString(returnStates)); + this.parents = parents; + this.returnStates = returnStates; + } + + @override + bool get isEmpty { + // since EMPTY_RETURN_STATE can only appear in the last position, we + // don't need to verify that size==1 + return returnStates[0] == PredictionContext.EMPTY_RETURN_STATE; + } + + @override + int get length { + return returnStates.length; + } + + @override + PredictionContext getParent(int index) { + return parents[index]; + } + + @override + int getReturnState(int index) { + return returnStates[index]; + } + +// int findReturnState(int returnState) { +// return Arrays.binarySearch(returnStates, returnState); +// } + + @override + bool operator ==(Object o) { + if (identical(this, o)) { + return true; + } else if (o is ArrayPredictionContext) { + if (hashCode != o.hashCode) { + return false; // can't be same if hash is different + } + + final a = o; + return ListEquality().equals(returnStates, a.returnStates) && + ListEquality().equals(parents, a.parents); + } + return false; + } + + @override + String toString() { + if (isEmpty) return '[]'; + final buf = StringBuffer(); + buf.write('['); + for (var i = 0; i < returnStates.length; i++) { + if (i > 0) buf.write(', '); + if (returnStates[i] == PredictionContext.EMPTY_RETURN_STATE) { + buf.write(r'$'); + continue; + } + buf.write(returnStates[i]); + if (parents[i] != null) { + buf.write(' '); + buf.write(parents[i].toString()); + } else { + buf.write('null'); + } + } + buf.write(']'); + return buf.toString(); + } +} diff --git a/runtime/Dart/lib/src/recognizer.dart b/runtime/Dart/lib/src/recognizer.dart new file mode 100644 index 000000000..78180b53c --- /dev/null +++ b/runtime/Dart/lib/src/recognizer.dart @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'vocabulary.dart'; +import 'atn/atn.dart'; +import 'error/error.dart'; +import 'input_stream.dart'; +import 'rule_context.dart'; +import 'token.dart'; +import 'token_factory.dart'; +import 'util/utils.dart'; + +abstract class Recognizer { + static const EOF = -1; + + static final Map> tokenTypeMapCache = {}; + static final Map, Map> ruleIndexMapCache = {}; + final List _listeners = [ConsoleErrorListener.INSTANCE]; + + /// The ATN interpreter used by the recognizer for prediction. + ATNInterpreter interpreter; + int _stateNumber = -1; + + List get ruleNames; + + /// Get the vocabulary used by the recognizer. + /// + /// @return A [Vocabulary] instance providing information about the + /// vocabulary used by the grammar. + Vocabulary get vocabulary; + + /// Get a map from token names to token types. + /// + ///

Used for XPath and tree pattern compilation.

+ Map get tokenTypeMap { + final _vocabulary = vocabulary; + + var result = tokenTypeMapCache[_vocabulary]; + if (result == null) { + result = {}; + for (var i = 0; i <= getATN().maxTokenType; i++) { + final literalName = _vocabulary.getLiteralName(i); + if (literalName != null) { + result[literalName] = i; + } + + final symbolicName = _vocabulary.getSymbolicName(i); + if (symbolicName != null) { + result[symbolicName] = i; + } + } + + result['EOF'] = Token.EOF; + result = Map.unmodifiable(result); + tokenTypeMapCache[_vocabulary] = result; + } + + return result; + } + + /// Get a map from rule names to rule indexes. + /// + ///

Used for XPath and tree pattern compilation.

+ Map get ruleIndexMap { + final _ruleNames = ruleNames; + if (_ruleNames == null) { + throw UnsupportedError( + 'The current recognizer does not provide a list of rule names.'); + } + + var result = ruleIndexMapCache[_ruleNames]; + if (result == null) { + result = Map.unmodifiable(toMap(_ruleNames)); + ruleIndexMapCache[_ruleNames] = result; + } + + return result; + } + + int getTokenType(String tokenName) { + final ttype = tokenTypeMap[tokenName]; + if (ttype != null) return ttype; + return Token.INVALID_TYPE; + } + + /// If this recognizer was generated, it will have a serialized ATN + /// representation of the grammar. + /// + ///

For interpreters, we don't know their serialized ATN despite having + /// created the interpreter from it.

+ String get serializedATN { + throw UnsupportedError('there is no serialized ATN'); + } + + /// For debugging and other purposes, might want the grammar name. + /// Have ANTLR generate an implementation for this method. + String get grammarFileName; + + /// Get the [ATN] used by the recognizer for prediction. + /// + /// @return The [ATN] used by the recognizer for prediction. + ATN getATN(); + + /// If profiling during the parse/lex, this will return DecisionInfo records + /// for each decision in recognizer in a ParseInfo object. + /// + /// @since 4.3 + ParseInfo get parseInfo { + return null; + } + + /// What is the error header, normally line/character position information? */ + String getErrorHeader(RecognitionException e) { + final line = e.offendingToken.line; + final charPositionInLine = e.offendingToken.charPositionInLine; + return 'line $line:$charPositionInLine'; + } + + /// @exception NullPointerException if [listener] is null. + void addErrorListener(ErrorListener listener) { + if (listener == null) { + throw ArgumentError.notNull('listener'); + } + + _listeners.add(listener); + } + + void removeErrorListener(ErrorListener listener) { + _listeners.remove(listener); + } + + void removeErrorListeners() { + _listeners.clear(); + } + + List get errorListeners { + return _listeners; + } + + ErrorListener get errorListenerDispatch { + return ProxyErrorListener(errorListeners); + } + + // subclass needs to override these if there are sempreds or actions + // that the ATN interp needs to execute + bool sempred(RuleContext _localctx, int ruleIndex, int actionIndex) { + return true; + } + + bool precpred(RuleContext localctx, int precedence) { + return true; + } + + void action(RuleContext _localctx, int ruleIndex, int actionIndex) {} + + int get state { + return _stateNumber; + } + + /// Indicate that the recognizer has changed internal state that is + /// consistent with the ATN state passed in. This way we always know + /// where we are in the ATN as the parser goes along. The rule + /// context objects form a stack that lets us see the stack of + /// invoking rules. Combine this and we have complete ATN + /// configuration information. + set state(int atnState) { +// System.err.println("setState "+atnState); + _stateNumber = atnState; +// if ( traceATNStates ) _ctx.trace(atnState); + } + + IntStream get inputStream; + + set inputStream(IntStream input); + + TokenFactory get tokenFactory; + + set tokenFactory(TokenFactory input); +} diff --git a/runtime/Dart/lib/src/rule_context.dart b/runtime/Dart/lib/src/rule_context.dart new file mode 100644 index 000000000..1932aa059 --- /dev/null +++ b/runtime/Dart/lib/src/rule_context.dart @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'atn/atn.dart'; +import 'interval_set.dart'; +import 'parser.dart'; +import 'parser_rule_context.dart'; +import 'recognizer.dart'; +import 'tree/tree.dart'; + +/// A rule context is a record of a single rule invocation. +/// +/// We form a stack of these context objects using the parent +/// pointer. A parent pointer of null indicates that the current +/// context is the bottom of the stack. The ParserRuleContext subclass +/// as a children list so that we can turn this data structure into a +/// tree. +/// +/// The root node always has a null pointer and invokingState of -1. +/// +/// Upon entry to parsing, the first invoked rule function creates a +/// context object (a subclass specialized for that rule such as +/// SContext) and makes it the root of a parse tree, recorded by field +/// Parser._ctx. +/// +/// public final SContext s() throws RecognitionException { +/// SContext _localctx = new SContext(_ctx, getState()); <-- create new node +/// enterRule(_localctx, 0, RULE_s); <-- push it +/// ... +/// exitRule(); <-- pop back to _localctx +/// return _localctx; +/// } +/// +/// A subsequent rule invocation of r from the start rule s pushes a +/// new context object for r whose parent points at s and use invoking +/// state is the state with r emanating as edge label. +/// +/// The invokingState fields from a context object to the root +/// together form a stack of rule indication states where the root +/// (bottom of the stack) has a -1 sentinel value. If we invoke start +/// symbol s then call r1, which calls r2, the would look like +/// this: +/// +/// SContext[-1] <- root node (bottom of the stack) +/// R1Context[p] <- p in rule s called r1 +/// R2Context[q] <- q in rule r1 called r2 +/// +/// So the top of the stack, _ctx, represents a call to the current +/// rule and it holds the return address from another rule that invoke +/// to this rule. To invoke a rule, we must always have a current context. +/// +/// The parent contexts are useful for computing lookahead sets and +/// getting error information. +/// +/// These objects are used during parsing and prediction. +/// For the special case of parsers, we use the subclass +/// ParserRuleContext. +/// +/// @see ParserRuleContext +abstract class RuleContext extends RuleNode { + /// What context invoked this rule? + @override + RuleContext parent; + + /// What state invoked the rule associated with this context? + /// The "return address" is the followState of invokingState + /// If parent is null, this should be -1. + int invokingState; + + RuleContext({this.parent, this.invokingState}) { + invokingState = invokingState ?? -1; + } + + int depth() { + var n = 0; + var p = this; + while (p != null) { + p = p.parent; + n++; + } + return n; + } + + /// A context is empty if there is no invoking state; meaning nobody call + /// current context. + bool get isEmpty => invokingState == -1; + + /// satisfy the ParseTree / SyntaxTree interface + @override + Interval get sourceInterval => Interval.INVALID; + + @override + RuleContext get ruleContext => this; + + @override + RuleContext get payload => this; + + /// Return the combined text of all child nodes. This method only considers + /// tokens which have been added to the parse tree. + ///

+ /// Since tokens on hidden channels (e.g. whitespace or comments) are not + /// added to the parse trees, they will not appear in the output of this + /// method. + @override + String get text { + if (childCount == 0) { + return ''; + } + + final builder = StringBuffer(); + for (var i = 0; i < childCount; i++) { + builder.write(getChild(i).text); + } + + return builder.toString(); + } + + int get ruleIndex => -1; + + /// For rule associated with this parse tree internal node, return + /// the outer alternative number used to match the input. Default + /// implementation does not compute nor store this alt num. Create + /// a subclass of ParserRuleContext with backing field and set + /// option contextSuperClass. + /// to set it. + int get altNumber => ATN.INVALID_ALT_NUMBER; + + /// Set the outer alternative number for this context node. Default + /// implementation does nothing to avoid backing field overhead for + /// trees that don't need it. Create + /// a subclass of ParserRuleContext with backing field and set + /// option contextSuperClass. + set altNumber(int altNumber) {} + + @override + ParseTree getChild(int i) { + return null; + } + + @override + int get childCount => 0; + + @override + T accept(ParseTreeVisitor visitor) { + return visitor.visitChildren(this); + } + + /// Print out a whole tree, not just a node, in LISP format + /// (root child1 .. childN). Print just a node if this is a leaf. + /// + @override + String toStringTree({List ruleNames, Parser parser}) { + return Trees.toStringTree(this, ruleNames: ruleNames, recog: parser); + } + + @override + String toString( + {List ruleNames, Recognizer recog, RuleContext stop}) { + ruleNames = ruleNames ?? recog?.ruleNames; + final buf = StringBuffer(); + var p = this; + buf.write('['); + while (p != null && p != stop) { + if (ruleNames == null) { + if (!p.isEmpty) { + buf.write(p.invokingState); + } + } else { + final ruleIndex = p.ruleIndex; + final ruleName = ruleIndex >= 0 && ruleIndex < ruleNames.length + ? ruleNames[ruleIndex] + : ruleIndex.toString(); + buf.write(ruleName); + } + + if (p.parent != null && + (ruleNames != null || !p.parent.isEmpty)) { + buf.write(' '); + } + + p = p.parent; + } + + buf.write(']'); + return buf.toString(); + } + + static final EMPTY = ParserRuleContext(); +} diff --git a/runtime/Dart/lib/src/runtime_meta_data.dart b/runtime/Dart/lib/src/runtime_meta_data.dart new file mode 100644 index 000000000..37232e4de --- /dev/null +++ b/runtime/Dart/lib/src/runtime_meta_data.dart @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:developer'; +import 'dart:math' as math; + +import 'package:logging/logging.dart'; + +/// This class provides access to the current version of the ANTLR 4 runtime +/// library as compile-time and runtime constants, along with methods for +/// checking for matching version numbers and notifying listeners in the case +/// where a version mismatch is detected. +/// +///

+/// The runtime version information is provided by {@link #VERSION} and +/// {@link #getRuntimeVersion()}. Detailed information about these values is +/// provided in the documentation for each member.

+/// +///

+/// The runtime version check is implemented by {@link #checkVersion}. Detailed +/// information about incorporating this call into user code, as well as its use +/// in generated code, is provided in the documentation for the method.

+/// +///

+/// Version strings x.y and x.y.z are considered "compatible" and no error +/// would be generated. Likewise, version strings x.y-SNAPSHOT and x.y.z are +/// considered "compatible" because the major and minor components x.y +/// are the same in each.

+/// +///

+/// To trap any error messages issued by this code, use System.setErr() +/// in your main() startup code. +///

+/// +/// @since 4.3 +class RuntimeMetaData { + /// A compile-time constant containing the current version of the ANTLR 4 + /// runtime library. + /// + ///

+ /// This compile-time constant value allows generated parsers and other + /// libraries to include a literal reference to the version of the ANTLR 4 + /// runtime library the code was compiled against. At each release, we + /// change this value.

+ /// + ///

Version numbers are assumed to have the form + /// + /// major.minor.patch.revision-suffix, + /// + /// with the individual components defined as follows.

+ /// + ///
    + ///
  • major is a required non-negative integer, and is equal to + /// {@code 4} for ANTLR 4.
  • + ///
  • minor is a required non-negative integer.
  • + ///
  • patch is an optional non-negative integer. When + /// patch is omitted, the {@code .} (dot) appearing before it is + /// also omitted.
  • + ///
  • revision is an optional non-negative integer, and may only + /// be included when patch is also included. When revision + /// is omitted, the {@code .} (dot) appearing before it is also omitted.
  • + ///
  • suffix is an optional string. When suffix is + /// omitted, the {@code -} (hyphen-minus) appearing before it is also + /// omitted.
  • + ///
+ static final String VERSION = '4.8'; + + /// Gets the currently executing version of the ANTLR 4 runtime library. + /// + ///

+ /// This method provides runtime access to the [VERSION] field, as + /// opposed to directly referencing the field as a compile-time constant.

+ /// + /// @return The currently executing version of the ANTLR 4 library + static String get runtimeVersion { + return VERSION; + } + + /// This method provides the ability to detect mismatches between the version + /// of ANTLR 4 used to generate a parser, the version of the ANTLR runtime a + /// parser was compiled against, and the version of the ANTLR runtime which + /// is currently executing. + /// + ///

+ /// The version check is designed to detect the following two specific + /// scenarios.

+ /// + ///
    + ///
  • The ANTLR Tool version used for code generation does not match the + /// currently executing runtime version.
  • + ///
  • The ANTLR Runtime version referenced at the time a parser was + /// compiled does not match the currently executing runtime version.
  • + ///
+ /// + ///

+ /// Starting with ANTLR 4.3, the code generator emits a call to this method + /// using two constants in each generated lexer and parser: a hard-coded + /// constant indicating the version of the tool used to generate the parser + /// and a reference to the compile-time constant {@link #VERSION}. At + /// runtime, this method is called during the initialization of the generated + /// parser to detect mismatched versions, and notify the registered listeners + /// prior to creating instances of the parser.

+ /// + ///

+ /// This method does not perform any detection or filtering of semantic + /// changes between tool and runtime versions. It simply checks for a + /// version match and emits an error to stderr if a difference + /// is detected.

+ /// + ///

+ /// Note that some breaking changes between releases could result in other + /// types of runtime exceptions, such as a [LinkageError], prior to + /// calling this method. In these cases, the underlying version mismatch will + /// not be reported here. This method is primarily intended to + /// notify users of potential semantic changes between releases that do not + /// result in binary compatibility problems which would be detected by the + /// class loader. As with semantic changes, changes that break binary + /// compatibility between releases are mentioned in the release notes + /// accompanying the affected release.

+ /// + ///

+ /// Additional note for target developers: The version check + /// implemented by this class is designed to address specific compatibility + /// concerns that may arise during the execution of Java applications. Other + /// targets should consider the implementation of this method in the context + /// of that target's known execution environment, which may or may not + /// resemble the design provided for the Java target.

+ /// + /// @param generatingToolVersion The version of the tool used to generate a parser. + /// This value may be null when called from user code that was not generated + /// by, and does not reference, the ANTLR 4 Tool itself. + /// @param compileTimeVersion The version of the runtime the parser was + /// compiled against. This should always be passed using a direct reference + /// to [VERSION]. + static void checkVersion( + String generatingToolVersion, String compileTimeVersion) { + final runtimeVersion = VERSION; + var runtimeConflictsWithGeneratingTool = false; + var runtimeConflictsWithCompileTimeTool = false; + + if (generatingToolVersion != null) { + runtimeConflictsWithGeneratingTool = + !(runtimeVersion == generatingToolVersion) && + !(getMajorMinorVersion(runtimeVersion) == + getMajorMinorVersion(generatingToolVersion)); + } + + runtimeConflictsWithCompileTimeTool = + !(runtimeVersion == compileTimeVersion) && + !(getMajorMinorVersion(runtimeVersion) == + getMajorMinorVersion(compileTimeVersion)); + + if (runtimeConflictsWithGeneratingTool) { + log('ANTLR Tool version $generatingToolVersion used for code generation does not match the current runtime version $runtimeVersion', + level: Level.SEVERE.value); + } + if (runtimeConflictsWithCompileTimeTool) { + log('ANTLR Runtime version $compileTimeVersion used for parser compilation does not match the current runtime version $runtimeVersion', + level: Level.SEVERE.value); + } + } + + /// Gets the major and minor version numbers from a version string. For + /// details about the syntax of the input [version]. + /// E.g., from x.y.z return x.y. + /// + /// @param version The complete version string. + /// @return A string of the form major.minor containing + /// only the major and minor components of the version string. + static String getMajorMinorVersion(String version) { + final firstDot = version.indexOf('.'); + final secondDot = firstDot >= 0 ? version.indexOf('.', firstDot + 1) : -1; + final firstDash = version.indexOf('-'); + var referenceLength = version.length; + if (secondDot >= 0) { + referenceLength = math.min(referenceLength, secondDot); + } + + if (firstDash >= 0) { + referenceLength = math.min(referenceLength, firstDash); + } + + return version.substring(0, referenceLength); + } +} diff --git a/runtime/Dart/lib/src/token.dart b/runtime/Dart/lib/src/token.dart new file mode 100644 index 000000000..49c1b43cb --- /dev/null +++ b/runtime/Dart/lib/src/token.dart @@ -0,0 +1,431 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'input_stream.dart'; +import 'interval_set.dart'; +import 'misc/pair.dart'; +import 'recognizer.dart'; +import 'token_source.dart'; + +/// A token has properties: text, type, line, character position in the line +/// (so we can ignore tabs), token channel, index, and source from which +/// we obtained this token. +abstract class Token { + static const int INVALID_TYPE = 0; + + /// During lookahead operations, this "token" signifies we hit rule end ATN state + /// and did not follow it despite needing to. + static const int EPSILON = -2; + + static const int MIN_USER_TOKEN_TYPE = 1; + + static const int EOF = IntStream.EOF; + + /// All tokens go to the parser (unless skip() is called in that rule) + /// on a particular "channel". The parser tunes to a particular channel + /// so that whitespace etc... can go to the parser on a "hidden" channel. + static const int DEFAULT_CHANNEL = 0; + + /// Anything on different channel than DEFAULT_CHANNEL is not parsed + /// by parser. + static const int HIDDEN_CHANNEL = 1; + + /// This is the minimum constant value which can be assigned to a + /// user-defined token channel. + /// + ///

+ /// The non-negative numbers less than {@link #MIN_USER_CHANNEL_VALUE} are + /// assigned to the predefined channels {@link #DEFAULT_CHANNEL} and + /// {@link #HIDDEN_CHANNEL}.

+ /// + /// @see Token#getChannel() + static const int MIN_USER_CHANNEL_VALUE = 2; + + /// Get the text of the token. + String get text; + + /// Get the token type of the token */ + int get type; + + /// The line number on which the 1st character of this token was matched, + /// line=1..n + int get line; + + /// The index of the first character of this token relative to the + /// beginning of the line at which it occurs, 0..n-1 + int get charPositionInLine; + + /// Return the channel this token. Each token can arrive at the parser + /// on a different channel, but the parser only "tunes" to a single channel. + /// The parser ignores everything not on DEFAULT_CHANNEL. + int get channel; + + /// An index from 0..n-1 of the token object in the input stream. + /// This must be valid in order to print token streams and + /// use TokenRewriteStream. + /// + /// Return -1 to indicate that this token was conjured up since + /// it doesn't have a valid index. + int get tokenIndex; + + /// The starting character index of the token + /// This method is optional; return -1 if not implemented. + int get startIndex; + + /// The last character index of the token. + /// This method is optional; return -1 if not implemented. + int get stopIndex; + + /// Gets the [TokenSource] which created this token. + TokenSource get tokenSource; + + /// Gets the [CharStream] from which this token was derived. + CharStream get inputStream; +} + +abstract class WritableToken extends Token { + set text(String text); + + set type(int ttype); + + set line(int line); + + set charPositionInLine(int pos); + + set channel(int channel); + + set tokenIndex(int index); +} + +class CommonToken extends WritableToken { + /// An empty [Pair] which is used as the default value of + /// {@link #source} for tokens that do not have a source. + static const Pair EMPTY_SOURCE = + Pair(null, null); + + @override + int type; + + @override + int line; + + @override + int charPositionInLine = -1; // set to invalid position + + @override + int channel = Token.DEFAULT_CHANNEL; + + /// These properties share a field to reduce the memory footprint of + /// [CommonToken]. Tokens created by a [CommonTokenFactory] from + /// the same source and input stream share a reference to the same + /// [Pair] containing these values.

+ Pair source; + + /// This is the backing field for {@link #getText} when the token text is + /// explicitly set in the constructor or via {@link #setText}. + /// + /// @see #getText() + String _text; + + @override + int tokenIndex = -1; + + @override + int startIndex; + + @override + int stopIndex; + + /// Constructs a new [CommonToken] with the specified token type and + /// text. + /// + /// @param type The token type. + /// @param text The text of the token. + CommonToken(this.type, + {this.source = EMPTY_SOURCE, + this.channel = Token.DEFAULT_CHANNEL, + this.startIndex, + this.stopIndex, + text}) { + _text = text; + if (source.a != null) { + line = source.a.line; + charPositionInLine = source.a.charPositionInLine; + } + } + + /// Constructs a new [CommonToken] as a copy of another [Token]. + /// + ///

+ /// If [oldToken] is also a [CommonToken] instance, the newly + /// constructed token will share a reference to the {@link #text} field and + /// the [Pair] stored in {@link #source}. Otherwise, {@link #text} will + /// be assigned the result of calling {@link #getText}, and {@link #source} + /// will be constructed from the result of {@link Token#getTokenSource} and + /// {@link Token#getInputStream}.

+ /// + /// @param oldToken The token to copy. + CommonToken.copy(Token oldToken) { + type = oldToken.type; + line = oldToken.line; + tokenIndex = oldToken.tokenIndex; + charPositionInLine = oldToken.charPositionInLine; + channel = oldToken.channel; + startIndex = oldToken.startIndex; + stopIndex = oldToken.stopIndex; + + if (oldToken is CommonToken) { + _text = oldToken.text; + source = oldToken.source; + } else { + _text = oldToken.text; + source = Pair( + oldToken.tokenSource, oldToken.inputStream); + } + } + + @override + String get text { + if (_text != null) { + return _text; + } + + final input = inputStream; + if (input == null) return null; + final n = input.size; + if (startIndex < n && stopIndex < n) { + return input.getText(Interval.of(startIndex, stopIndex)); + } else { + return ''; + } + } + + /// Explicitly set the text for this token. If {code text} is not + /// null, then {@link #getText} will return this value rather than + /// extracting the text from the input. + /// + /// @param text The explicit text of the token, or null if the text + /// should be obtained from the input along with the start and stop indexes + /// of the token. + @override + set text(String text) { + _text = text; + } + + @override + TokenSource get tokenSource { + return source.a; + } + + @override + CharStream get inputStream { + return source.b; + } + + @override + String toString([Recognizer r]) { + var txt = text; + if (txt != null) { + txt = txt + .replaceAll('\n', r'\n') + .replaceAll('\r', r'\r') + .replaceAll('\t', r'\t'); + } else { + txt = ''; + } + return "[@$tokenIndex,$startIndex:$stopIndex='$txt',<$type>" + + (channel > 0 ? ',channel=$channel' : '') + + ',$line:$charPositionInLine]'; + } +} + +/// A [Token] object representing an entire subtree matched by a parser +/// rule; e.g., {@code }. These tokens are created for [TagChunk] +/// chunks where the tag corresponds to a parser rule. +class RuleTagToken implements Token { + /// Gets the name of the rule associated with this rule tag. + /// + /// @return The name of the parser rule associated with this rule tag. + final String ruleName; + + /// The token type for the current token. This is the token type assigned to + /// the bypass alternative for the rule during ATN deserialization. + final int bypassTokenType; + + /// Gets the label associated with the rule tag. + /// + /// @return The name of the label associated with the rule tag, or + /// null if this is an unlabeled rule tag. + final String label; + + /// Constructs a new instance of [RuleTagToken] with the specified rule + /// name, bypass token type, and label. + /// + /// @param ruleName The name of the parser rule this rule tag matches. + /// @param bypassTokenType The bypass token type assigned to the parser rule. + /// @param label The label associated with the rule tag, or null if + /// the rule tag is unlabeled. + /// + /// @exception ArgumentError.value(value) if [ruleName] is null + /// or empty. + RuleTagToken(this.ruleName, this.bypassTokenType, [this.label]) { + if (ruleName == null || ruleName.isEmpty) { + throw ArgumentError.value( + ruleName, 'ruleName', 'cannot be null or empty.'); + } + } + + /// {@inheritDoc} + /// + ///

Rule tag tokens are always placed on the {@link #DEFAULT_CHANNEL}.

+ + @override + int get channel { + return Token.DEFAULT_CHANNEL; + } + + /// {@inheritDoc} + /// + ///

This method returns the rule tag formatted with {@code <} and {@code >} + /// delimiters.

+ + @override + String get text { + if (label != null) { + return '<' + label + ':' + ruleName + '>'; + } + + return '<' + ruleName + '>'; + } + + /// {@inheritDoc} + /// + ///

Rule tag tokens have types assigned according to the rule bypass + /// transitions created during ATN deserialization.

+ + @override + int get type { + return bypassTokenType; + } + + /// {@inheritDoc} + /// + ///

The implementation for [RuleTagToken] always returns 0.

+ + @override + int get line { + return 0; + } + + /// {@inheritDoc} + /// + ///

The implementation for [RuleTagToken] always returns -1.

+ @override + int get charPositionInLine { + return -1; + } + + /// {@inheritDoc} + /// + ///

The implementation for [RuleTagToken] always returns -1.

+ @override + int get tokenIndex { + return -1; + } + + /// {@inheritDoc} + /// + ///

The implementation for [RuleTagToken] always returns -1.

+ @override + int get startIndex { + return -1; + } + + /// {@inheritDoc} + /// + ///

The implementation for [RuleTagToken] always returns -1.

+ + @override + int get stopIndex { + return -1; + } + + /// {@inheritDoc} + /// + ///

The implementation for [RuleTagToken] always returns null.

+ + @override + TokenSource get tokenSource { + return null; + } + + /// {@inheritDoc} + /// + ///

The implementation for [RuleTagToken] always returns null.

+ + @override + CharStream get inputStream { + return null; + } + + /// {@inheritDoc} + /// + ///

The implementation for [RuleTagToken] returns a string of the form + /// {@code ruleName:bypassTokenType}.

+ + @override + String toString() { + return ruleName + ':$bypassTokenType'; + } +} + +/// A [Token] object representing a token of a particular type; e.g., +/// {@code }. These tokens are created for [TagChunk] chunks where the +/// tag corresponds to a lexer rule or token type. +class TokenTagToken extends CommonToken { + /// Gets the token name. + /// @return The token name. + final String tokenName; + + /// Gets the label associated with the rule tag. + /// + /// @return The name of the label associated with the rule tag, or + /// null if this is an unlabeled rule tag. + final String label; + + /// Constructs a new instance of [TokenTagToken] with the specified + /// token name, type, and label. + /// + /// @param tokenName The token name. + /// @param type The token type. + /// @param label The label associated with the token tag, or null if + /// the token tag is unlabeled. + TokenTagToken(this.tokenName, type, [this.label]) : super(type); + + /// {@inheritDoc} + /// + ///

The implementation for [TokenTagToken] returns the token tag + /// formatted with {@code <} and {@code >} delimiters.

+ + @override + String get text { + if (label != null) { + return '<' + label + ':' + tokenName + '>'; + } + + return '<' + tokenName + '>'; + } + + /// {@inheritDoc} + /// + ///

The implementation for [TokenTagToken] returns a string of the form + /// {@code tokenName:type}.

+ + @override + String toString([recognizer]) { + return tokenName + ':$type'; + } +} diff --git a/runtime/Dart/lib/src/token_factory.dart b/runtime/Dart/lib/src/token_factory.dart new file mode 100644 index 000000000..cbfe820f2 --- /dev/null +++ b/runtime/Dart/lib/src/token_factory.dart @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'input_stream.dart'; +import 'interval_set.dart'; +import 'misc/pair.dart'; +import 'token.dart'; +import 'token_source.dart'; + +/// The default mechanism for creating tokens. It's used by default in Lexer and +/// the error handling strategy (to create missing tokens). Notifying the parser +/// of a new factory means that it notifies its token source and error strategy. +abstract class TokenFactory { + /// This is the method used to create tokens in the lexer and in the + /// error handling strategy. If text!=null, than the start and stop positions + /// are wiped to -1 in the text override is set in the CommonToken. + Symbol create(int type, String text, + [Pair source, + int channel, + int start, + int stop, + int line, + int charPositionInLine]); +} + +/// This default implementation of [TokenFactory] creates +/// [CommonToken] objects. +class CommonTokenFactory implements TokenFactory { + /// The default [CommonTokenFactory] instance. + /// + ///

+ /// This token factory does not explicitly copy token text when constructing + /// tokens.

+ static final TokenFactory DEFAULT = CommonTokenFactory(); + + /// Indicates whether {@link CommonToken#setText} should be called after + /// constructing tokens to explicitly set the text. This is useful for cases + /// where the input stream might not be able to provide arbitrary substrings + /// of text from the input after the lexer creates a token (e.g. the + /// implementation of {@link CharStream#getText} in + /// [UnbufferedCharStream] throws an + /// [UnsupportedOperationException]). Explicitly setting the token text + /// allows {@link Token#getText} to be called at any time regardless of the + /// input stream implementation. + /// + ///

+ /// The default value is [false] to avoid the performance and memory + /// overhead of copying text for every token unless explicitly requested.

+ final bool copyText; + + /// Constructs a [CommonTokenFactory] with the specified value for + /// {@link #copyText}. + /// + ///

+ /// When [copyText] is [false], the {@link #DEFAULT} instance + /// should be used instead of constructing a new instance.

+ /// + /// @param copyText The value for {@link #copyText}. + CommonTokenFactory([this.copyText = false]); + + @override + CommonToken create(int type, String text, + [Pair source, + int channel, + int start, + int stop, + int line, + int charPositionInLine]) { + if (source == null) { + return CommonToken(type, text: text); + } + + final t = CommonToken(type, + source: source, channel: channel, startIndex: start, stopIndex: stop); + t.line = line; + t.charPositionInLine = charPositionInLine; + if (text != null) { + t.text = text; + } else if (copyText && source.b != null) { + t.text = source.b.getText(Interval.of(start, stop)); + } + + return t; + } +} diff --git a/runtime/Dart/lib/src/token_source.dart b/runtime/Dart/lib/src/token_source.dart new file mode 100644 index 000000000..05cfabe03 --- /dev/null +++ b/runtime/Dart/lib/src/token_source.dart @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:math'; + +import 'input_stream.dart'; +import 'misc/pair.dart'; +import 'token.dart'; +import 'token_factory.dart'; + +/// A source of tokens must provide a sequence of tokens via {@link #nextToken()} +/// and also must reveal it's source of characters; [CommonToken]'s text is +/// computed from a [CharStream]; it only store indices into the char +/// stream. +/// +///

Errors from the lexer are never passed to the parser. Either you want to keep +/// going or you do not upon token recognition error. If you do not want to +/// continue lexing then you do not want to continue parsing. Just throw an +/// exception not under [RecognitionException] and Java will naturally toss +/// you all the way out of the recognizers. If you want to continue lexing then +/// you should not throw an exception to the parser--it has already requested a +/// token. Keep lexing until you get a valid one. Just report errors and keep +/// going, looking for a valid token.

+abstract class TokenSource { + /// Return a [Token] object from your input stream (usually a + /// [CharStream]). Do not fail/return upon lexing error; keep chewing + /// on the characters until you get a good one; errors are not passed through + /// to the parser. + Token nextToken(); + + /// Get the line number for the current position in the input stream. The + /// first line in the input is line 1. + /// + /// @return The line number for the current position in the input stream, or + /// 0 if the current token source does not track line numbers. + int get line; + + /// Get the index into the current line for the current position in the input + /// stream. The first character on a line has position 0. + /// + /// @return The line number for the current position in the input stream, or + /// -1 if the current token source does not track character positions. + int get charPositionInLine; + + /// Get the [CharStream] from which this token source is currently + /// providing tokens. + /// + /// @return The [CharStream] associated with the current position in + /// the input, or null if no input stream is available for the token + /// source. + CharStream get inputStream; + + /// Gets the name of the underlying input source. This method returns a + /// non-null, non-empty string. If such a name is not known, this method + /// returns {@link IntStream#UNKNOWN_SOURCE_NAME}. + String get sourceName; + + /// Set the [TokenFactory] this token source should use for creating + /// [Token] objects from the input. + /// + /// @param factory The [TokenFactory] to use for creating tokens. + set tokenFactory(TokenFactory factory); + + /// Gets the [TokenFactory] this token source is currently using for + /// creating [Token] objects from the input. + /// + /// @return The [TokenFactory] currently used by this token source. + TokenFactory get tokenFactory; +} + +/// Provides an implementation of [TokenSource] as a wrapper around a list +/// of [Token] objects. +/// +///

If the final token in the list is an {@link Token#EOF} token, it will be used +/// as the EOF token for every call to {@link #nextToken} after the end of the +/// list is reached. Otherwise, an EOF token will be created.

+class ListTokenSource implements TokenSource { + /// The wrapped collection of [Token] objects to return. + final List tokens; + + final String _sourceName; + + /// The index into {@link #tokens} of token to return by the next call to + /// {@link #nextToken}. The end of the input is indicated by this value + /// being greater than or equal to the number of items in {@link #tokens}. + int i; + + /// This field caches the EOF token for the token source. + Token eofToken; + + /// This is the backing field for {@link #getTokenFactory} and + /// [setTokenFactory]. + @override + TokenFactory tokenFactory = CommonTokenFactory.DEFAULT; + + /** + * Constructs a new [ListTokenSource] instance from the specified + * collection of [Token] objects. + * + * @param tokens The collection of [Token] objects to provide as a + * [TokenSource]. + * @exception NullPointerException if [tokens] is null + */ + + /// Constructs a new [ListTokenSource] instance from the specified + /// collection of [Token] objects and source name. + /// + /// @param tokens The collection of [Token] objects to provide as a + /// [TokenSource]. + /// @param sourceName The name of the [TokenSource]. If this value is + /// null, {@link #getSourceName} will attempt to infer the name from + /// the next [Token] (or the previous token if the end of the input has + /// been reached). + /// + /// @exception NullPointerException if [tokens] is null + ListTokenSource(this.tokens, [this._sourceName]) { + if (tokens == null) { + throw ArgumentError.notNull('tokens'); + } + } + + /// {@inheritDoc} + + @override + int get charPositionInLine { + if (i < tokens.length) { + return tokens[i].charPositionInLine; + } else if (eofToken != null) { + return eofToken.charPositionInLine; + } else if (tokens.isNotEmpty) { + // have to calculate the result from the line/column of the previous + // token, along with the text of the token. + final lastToken = tokens[tokens.length - 1]; + final tokenText = lastToken.text; + if (tokenText != null) { + final lastNewLine = tokenText.lastIndexOf('\n'); + if (lastNewLine >= 0) { + return tokenText.length - lastNewLine - 1; + } + } + + return lastToken.charPositionInLine + + lastToken.stopIndex - + lastToken.startIndex + + 1; + } + + // only reach this if tokens is empty, meaning EOF occurs at the first + // position in the input + return 0; + } + + /// {@inheritDoc} + + @override + Token nextToken() { + if (i >= tokens.length) { + if (eofToken == null) { + var start = -1; + if (tokens.isNotEmpty) { + final previousStop = tokens[tokens.length - 1].stopIndex; + if (previousStop != -1) { + start = previousStop + 1; + } + } + + final stop = max(-1, start - 1); + eofToken = tokenFactory.create(Token.EOF, 'EOF', Pair(this, inputStream), + Token.DEFAULT_CHANNEL, start, stop, line, charPositionInLine); + } + + return eofToken; + } + + final t = tokens[i]; + if (i == tokens.length - 1 && t.type == Token.EOF) { + eofToken = t; + } + + i++; + return t; + } + + /// {@inheritDoc} + + @override + int get line { + if (i < tokens.length) { + return tokens[i].line; + } else if (eofToken != null) { + return eofToken.line; + } else if (tokens.isNotEmpty) { + // have to calculate the result from the line/column of the previous + // token, along with the text of the token. + final lastToken = tokens[tokens.length - 1]; + var line = lastToken.line; + + final tokenText = lastToken.text; + if (tokenText != null) { + for (var i = 0; i < tokenText.length; i++) { + if (tokenText[i] == '\n') { + line++; + } + } + } + + // if no text is available, assume the token did not contain any newline characters. + return line; + } + + // only reach this if tokens is empty, meaning EOF occurs at the first + // position in the input + return 1; + } + + /// {@inheritDoc} + + @override + CharStream get inputStream { + if (i < tokens.length) { + return tokens[i].inputStream; + } else if (eofToken != null) { + return eofToken.inputStream; + } else if (tokens.isNotEmpty) { + return tokens[tokens.length - 1].inputStream; + } + + // no input stream information is available + return null; + } + + /// The name of the input source. If this value is null, a call to + /// {@link #getSourceName} should return the source name used to create the + /// the next token in {@link #tokens} (or the previous token if the end of + /// the input has been reached). + @override + String get sourceName =>_sourceName ?? inputStream?.sourceName ?? 'List'; +} diff --git a/runtime/Dart/lib/src/token_stream.dart b/runtime/Dart/lib/src/token_stream.dart new file mode 100644 index 000000000..e434b54a7 --- /dev/null +++ b/runtime/Dart/lib/src/token_stream.dart @@ -0,0 +1,627 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ +import 'input_stream.dart'; +import 'interval_set.dart'; +import 'lexer.dart'; +import 'rule_context.dart'; +import 'token.dart'; +import 'token_source.dart'; + +/// An [IntStream] whose symbols are [Token] instances. +abstract class TokenStream extends IntStream { + /// Get the [Token] instance associated with the value returned by + /// {@link #LA LA(k)}. This method has the same pre- and post-conditions as + /// {@link IntStream#LA}. In addition, when the preconditions of this method + /// are met, the return value is non-null and the value of + /// {@code LT(k).getType()==LA(k)}. + /// + /// @see IntStream#LA + Token LT(int k); + + /// Gets the [Token] at the specified [index] in the stream. When + /// the preconditions of this method are met, the return value is non-null. + /// + ///

The preconditions for this method are the same as the preconditions of + /// {@link IntStream#seek}. If the behavior of {@code seek(index)} is + /// unspecified for the current state and given [index], then the + /// behavior of this method is also unspecified.

+ /// + ///

The symbol referred to by [index] differs from {@code seek()} only + /// in the case of filtering streams where [index] lies before the end + /// of the stream. Unlike {@code seek()}, this method does not adjust + /// [index] to point to a non-ignored symbol.

+ /// + /// @throws IllegalArgumentException if {code index} is less than 0 + /// @throws UnsupportedOperationException if the stream does not support + /// retrieving the token at the specified index + Token get(int index); + + /// Gets the underlying [TokenSource] which provides tokens for this + /// stream. + TokenSource get tokenSource; + + /// Return the text of all tokens within the specified [interval]. This + /// method behaves like the following code (including potential exceptions + /// for violating preconditions of {@link #get}, but may be optimized by the + /// specific implementation. + /// + ///
+  /// TokenStream stream = ...;
+  /// String text = "";
+  /// for (int i = interval.a; i <= interval.b; i++) {
+  ///   text += stream.get(i).getText();
+  /// }
+  /// 
+ /// + ///
+  /// TokenStream stream = ...;
+  /// String text = stream.getText(new Interval(0, stream.length));
+  /// 
+ /// + ///
+  /// TokenStream stream = ...;
+  /// String text = stream.getText(ctx.getSourceInterval());
+  /// 
+ /// + /// @param interval The interval of tokens within this stream to get text + /// for. + /// @return The text of all tokens / within the specified interval in this + /// stream. + String getText([Interval interval]); + + String get text; + + /// Return the text of all tokens in the source interval of the specified + /// context. This method behaves like the following code, including potential + /// exceptions from the call to {@link #getText(Interval)}, but may be + /// optimized by the specific implementation. + /// + ///

If {@code ctx.getSourceInterval()} does not return a valid interval of + /// tokens provided by this stream, the behavior is unspecified.

+ /// + /// @param ctx The context providing the source interval of tokens to get + /// text for. + /// @return The text of all tokens within the source interval of [ctx]. + String getTextFromCtx(RuleContext ctx); + + /// Return the text of all tokens in this stream between [start] and + /// [stop] (inclusive). + /// + ///

If the specified [start] or [stop] token was not provided by + /// this stream, or if the [stop] occurred before the [start] + /// token, the behavior is unspecified.

+ /// + ///

For streams which ensure that the {@link Token#getTokenIndex} method is + /// accurate for all of its provided tokens, this method behaves like the + /// following code. Other streams may implement this method in other ways + /// provided the behavior is consistent with this at a high level.

+ /// + ///
+  /// TokenStream stream = ...;
+  /// String text = "";
+  /// for (int i = start.getTokenIndex(); i <= stop.getTokenIndex(); i++) {
+  ///   text += stream.get(i).getText();
+  /// }
+  /// 
+ /// + /// @param start The first token in the interval to get text for. + /// @param stop The last token in the interval to get text for (inclusive). + /// @return The text of all tokens lying between the specified [start] + /// and [stop] tokens. + /// + /// @throws UnsupportedOperationException if this stream does not support + /// this method for the specified tokens + String getTextRange(Token start, Token stop); +} + +/// This implementation of [TokenStream] loads tokens from a +/// [TokenSource] on-demand, and places the tokens in a buffer to provide +/// access to any previous token by index. +/// +///

+/// This token stream ignores the value of {@link Token#getChannel}. If your +/// parser requires the token stream filter tokens to only those on a particular +/// channel, such as {@link Token#DEFAULT_CHANNEL} or +/// {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a +/// [CommonTokenStream].

+class BufferedTokenStream implements TokenStream { + /// The [TokenSource] from which tokens for this stream are fetched. + TokenSource _tokenSource; + + /// A collection of all tokens fetched from the token source. The list is + /// considered a complete view of the input once {@link #fetchedEOF} is set + /// to [true]. + List tokens = []; + + /// The index into [tokens] of the current token (next token to [consume]). + /// [tokens][p] should be [LT(1)]. + /// + ///

This field is set to -1 when the stream is first constructed or when + /// [tokenSource] is set, indicating that the first token has + /// not yet been fetched from the token source. For additional information, + /// see the documentation of [IntStream] for a description of + /// Initializing Methods.

+ int p = -1; + + /// Indicates whether the [Token.EOF] token has been fetched from + /// [tokenSource] and added to [tokens]. This field improves + /// performance for the following cases: + /// + ///
    + ///
  • {@link #consume}: The lookahead check in {@link #consume} to prevent + /// consuming the EOF symbol is optimized by checking the values of + /// {@link #fetchedEOF} and {@link #p} instead of calling {@link #LA}.
  • + ///
  • {@link #fetch}: The check to prevent adding multiple EOF symbols into + /// [{@link #]tokens} is trivial with this field.
  • + ///
      + bool fetchedEOF = false; + + BufferedTokenStream(this._tokenSource) { + if (_tokenSource == null) { + throw ArgumentError.notNull('tokenSource'); + } + } + + @override + int get index => p; + + @override + int mark() { + return 0; + } + + @override + void release(int marker) { + // no resources to release + } + + @override + void seek(int index) { + lazyInit(); + p = adjustSeekIndex(index); + } + + @override + int get size { + return tokens.length; + } + + @override + void consume() { + bool skipEofCheck; + if (p >= 0) { + if (fetchedEOF) { + // the last token in tokens is EOF. skip check if p indexes any + // fetched token except the last. + skipEofCheck = p < tokens.length - 1; + } else { + // no EOF token in tokens. skip check if p indexes a fetched token. + skipEofCheck = p < tokens.length; + } + } else { + // not yet initialized + skipEofCheck = false; + } + + if (!skipEofCheck && LA(1) == IntStream.EOF) { + throw StateError('cannot consume EOF'); + } + + if (sync(p + 1)) { + p = adjustSeekIndex(p + 1); + } + } + + /// Make sure index [i] in tokens has a token. + /// + /// @return [true] if a token is located at index [i], otherwise + /// [false]. + /// @see #get(int i) + bool sync(int i) { + assert(i >= 0); + final n = i - tokens.length + 1; // how many more elements we need? + //System.out.println("sync("+i+") needs "+n); + if (n > 0) { + final fetched = fetch(n); + return fetched >= n; + } + + return true; + } + + /// Add [n] elements to buffer. + /// + /// @return The actual number of elements added to the buffer. + int fetch(int n) { + if (fetchedEOF) { + return 0; + } + + for (var i = 0; i < n; i++) { + final t = tokenSource.nextToken(); + if (t is WritableToken) { + t.tokenIndex = tokens.length; + } + tokens.add(t); + if (t.type == Token.EOF) { + fetchedEOF = true; + return i + 1; + } + } + + return n; + } + + @override + Token get(int i) { + if (i < 0 || i >= tokens.length) { + throw RangeError.index(i, tokens); + } + return tokens[i]; + } + + /// Get all tokens from start..stop inclusively */ + List getRange(int start, [int stop]) { + if (start < 0 || stop < 0) return null; + lazyInit(); + final subset = []; + if (stop >= tokens.length) stop = tokens.length - 1; + for (var i = start; i <= stop; i++) { + final t = tokens[i]; + if (t.type == Token.EOF) break; + subset.add(t); + } + return subset; + } + + @override + int LA(int i) { + return LT(i).type; + } + + Token LB(int k) { + if ((p - k) < 0) return null; + return tokens[p - k]; + } + + @override + Token LT(int k) { + lazyInit(); + if (k == 0) return null; + if (k < 0) return LB(-k); + + final i = p + k - 1; + sync(i); + if (i >= tokens.length) { + // return EOF token + // EOF must be last token + return tokens.last; + } +// if ( i>range ) range = i; + return tokens[i]; + } + + /// Allowed derived classes to modify the behavior of operations which change + /// the current stream position by adjusting the target token index of a seek + /// operation. The default implementation simply returns [i]. If an + /// exception is thrown in this method, the current stream index should not be + /// changed. + /// + ///

      For example, [CommonTokenStream] overrides this method to ensure that + /// the seek target is always an on-channel token.

      + /// + /// @param i The target token index. + /// @return The adjusted target token index. + int adjustSeekIndex(int i) { + return i; + } + + void lazyInit() { + if (p == -1) { + setup(); + } + } + + void setup() { + sync(0); + p = adjustSeekIndex(0); + } + + @override + TokenSource get tokenSource => _tokenSource; + + /// Reset this token stream by setting its token source. */ + set tokenSource(TokenSource tokenSource) { + _tokenSource = tokenSource; + tokens.clear(); + p = -1; + fetchedEOF = false; + } + + /// Given a start and stop index, return a List of all tokens in + /// the token type BitSet. Return null if no tokens were found. This + /// method looks at both on and off channel tokens. + List getTokens( + [int start, int stop, Set types]) { + if (start == null && stop == null) { + return tokens; + } + lazyInit(); + if (start < 0 || start >= tokens.length) { + throw RangeError.index(start, tokens); + } else if (stop < 0 || stop >= tokens.length) { + throw RangeError.index(stop, tokens); + } + if (start > stop) return null; + + // list = tokens[start:stop]:{T t, t.getType() in types} + var filteredTokens = []; + for (var i = start; i <= stop; i++) { + final t = tokens[i]; + if (types == null || types.contains(t.type)) { + filteredTokens.add(t); + } + } + if (filteredTokens.isEmpty) { + filteredTokens = null; + } + return filteredTokens; + } + + /// Given a starting index, return the index of the next token on channel. + /// Return [i] if {@code tokens[i]} is on channel. Return the index of + /// the EOF token if there are no tokens on channel between [i] and + /// EOF. + int nextTokenOnChannel(int i, int channel) { + sync(i); + if (i >= size) { + return size - 1; + } + + var token = tokens[i]; + while (token.channel != channel) { + if (token.type == Token.EOF) { + return i; + } + + i++; + sync(i); + token = tokens[i]; + } + + return i; + } + + /// Given a starting index, return the index of the previous token on + /// channel. Return [i] if {@code tokens[i]} is on channel. Return -1 + /// if there are no tokens on channel between [i] and 0. + /// + ///

      + /// If [i] specifies an index at or after the EOF token, the EOF token + /// index is returned. This is due to the fact that the EOF token is treated + /// as though it were on every channel.

      + int previousTokenOnChannel(int i, int channel) { + sync(i); + if (i >= size) { + // the EOF token is on every channel + return size - 1; + } + + while (i >= 0) { + final token = tokens[i]; + if (token.type == Token.EOF || token.channel == channel) { + return i; + } + + i--; + } + + return i; + } + + /// Collect all tokens on specified channel to the right of + /// the current token up until we see a token on DEFAULT_TOKEN_CHANNEL or + /// EOF. If channel is -1, find any non default channel token. + List getHiddenTokensToRight(int tokenIndex, [int channel = -1]) { + lazyInit(); + if (tokenIndex < 0 || tokenIndex >= tokens.length) { + throw RangeError.index(tokenIndex, tokens); + } + + final nextOnChannel = + nextTokenOnChannel(tokenIndex + 1, Lexer.DEFAULT_TOKEN_CHANNEL); + // if none onchannel to right, nextOnChannel=-1 so set to = last token + final to = nextOnChannel == -1 ? size - 1 : nextOnChannel; + final from = tokenIndex + 1; + + return filterForChannel(from, to, channel); + } + + /// Collect all tokens on specified channel to the left of + /// the current token up until we see a token on DEFAULT_TOKEN_CHANNEL. + /// If channel is -1, find any non default channel token. + List getHiddenTokensToLeft(int tokenIndex, [int channel = -1]) { + lazyInit(); + if (tokenIndex < 0 || tokenIndex >= tokens.length) { + throw RangeError.index(tokenIndex, tokens); + } + + if (tokenIndex == 0) { + // obviously no tokens can appear before the first token + return null; + } + + final prevOnChannel = + previousTokenOnChannel(tokenIndex - 1, Lexer.DEFAULT_TOKEN_CHANNEL); + if (prevOnChannel == tokenIndex - 1) return null; + // if none onchannel to left, prevOnChannel=-1 then from=0 + final from = prevOnChannel + 1; + final to = tokenIndex - 1; + + return filterForChannel(from, to, channel); + } + + List filterForChannel(int from, int to, int channel) { + final hidden = []; + for (var i = from; i <= to; i++) { + final t = tokens[i]; + if (channel == -1) { + if (t.channel != Lexer.DEFAULT_TOKEN_CHANNEL) hidden.add(t); + } else { + if (t.channel == channel) hidden.add(t); + } + } + if (hidden.isEmpty) return null; + return hidden; + } + + @override + String get sourceName => tokenSource.sourceName; + + @override + String get text => getText(); + + @override + String getText([Interval interval]) { + interval = interval ?? + Interval.of(0, size - 1); // Get the text of all tokens in this buffer. + final start = interval.a; + var stop = interval.b; + if (start < 0 || stop < 0) return ''; + fill(); + if (stop >= tokens.length) stop = tokens.length - 1; + + final buf = StringBuffer(); + for (var i = start; i <= stop; i++) { + final t = tokens[i]; + if (t.type == Token.EOF) break; + buf.write(t.text); + } + return buf.toString(); + } + + @override + String getTextFromCtx(RuleContext ctx) { + return getText(ctx.sourceInterval); + } + + @override + String getTextRange(Token start, Token stop) { + if (start != null && stop != null) { + return getText(Interval.of(start.tokenIndex, stop.tokenIndex)); + } + + return ''; + } + + /// Get all tokens from lexer until EOF */ + void fill() { + lazyInit(); + final blockSize = 1000; + while (true) { + final fetched = fetch(blockSize); + if (fetched < blockSize) { + return; + } + } + } +} + +/// This class extends [BufferedTokenStream] with functionality to filter +/// token streams to tokens on a particular channel (tokens where +/// {@link Token#getChannel} returns a particular value). +/// +///

      +/// This token stream provides access to all tokens by index or when calling +/// methods like {@link #getText}. The channel filtering is only used for code +/// accessing tokens via the lookahead methods {@link #LA}, {@link #LT}, and +/// {@link #LB}.

      +/// +///

      +/// By default, tokens are placed on the default channel +/// ({@link Token#DEFAULT_CHANNEL}), but may be reassigned by using the +/// {@code ->channel(HIDDEN)} lexer command, or by using an embedded action to +/// call {@link Lexer#setChannel}. +///

      +/// +///

      +/// Note: lexer rules which use the {@code ->skip} lexer command or call +/// {@link Lexer#skip} do not produce tokens at all, so input text matched by +/// such a rule will not be available as part of the token stream, regardless of +/// channel.

      we +class CommonTokenStream extends BufferedTokenStream { + /// Specifies the channel to use for filtering tokens. + /// + ///

      + /// The default value is {@link Token#DEFAULT_CHANNEL}, which matches the + /// default channel assigned to tokens created by the lexer.

      + int channel; + + /// Constructs a new [CommonTokenStream] using the specified token + /// source and filtering tokens to the specified channel. Only tokens whose + /// {@link Token#getChannel} matches [channel] or have the + /// {@link Token#getType} equal to {@link Token#EOF} will be returned by the + /// token stream lookahead methods. + /// + /// @param tokenSource The token source. + /// @param channel The channel to use for filtering tokens. + CommonTokenStream(TokenSource tokenSource, + [this.channel = Token.DEFAULT_CHANNEL]) + : super(tokenSource); + + @override + int adjustSeekIndex(int i) { + return nextTokenOnChannel(i, channel); + } + + @override + Token LB(int k) { + if (k == 0 || (p - k) < 0) return null; + + var i = p; + var n = 1; + // find k good tokens looking backwards + while (n <= k && i > 0) { + // skip off-channel tokens + i = previousTokenOnChannel(i - 1, channel); + n++; + } + if (i < 0) return null; + return tokens[i]; + } + + @override + Token LT(int k) { + //System.out.println("enter LT("+k+")"); + lazyInit(); + if (k == 0) return null; + if (k < 0) return LB(-k); + var i = p; + var n = 1; // we know tokens[p] is a good one + // find k good tokens + while (n < k) { + // skip off-channel tokens, but make sure to not look past EOF + if (sync(i + 1)) { + i = nextTokenOnChannel(i + 1, channel); + } + n++; + } +// if ( i>range ) range = i; + return tokens[i]; + } + + /// Count EOF just once. */ + int get numberOfOnChannelTokens { + var n = 0; + fill(); + for (var i = 0; i < tokens.length; i++) { + final t = tokens[i]; + if (t.channel == channel) n++; + if (t.type == Token.EOF) break; + } + return n; + } +} diff --git a/runtime/Dart/lib/src/tree/src/pattern/chunk.dart b/runtime/Dart/lib/src/tree/src/pattern/chunk.dart new file mode 100644 index 000000000..bf2d3f474 --- /dev/null +++ b/runtime/Dart/lib/src/tree/src/pattern/chunk.dart @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +/// A chunk is either a token tag, a rule tag, or a span of literal text within a +/// tree pattern. +/// +///

      The method {@link ParseTreePatternMatcher#split(String)} returns a list of +/// chunks in preparation for creating a token stream by +/// {@link ParseTreePatternMatcher#tokenize(String)}. From there, we get a parse +/// tree from with {@link ParseTreePatternMatcher#compile(String, int)}. These +/// chunks are converted to [RuleTagToken], [TokenTagToken], or the +/// regular tokens of the text surrounding the tags.

      +abstract class Chunk {} + +/// Represents a placeholder tag in a tree pattern. A tag can have any of the +/// following forms. +/// +///
        +///
      • [expr]: An unlabeled placeholder for a parser rule [expr].
      • +///
      • [ID]: An unlabeled placeholder for a token of type [ID].
      • +///
      • {@code e:expr}: A labeled placeholder for a parser rule [expr].
      • +///
      • {@code id:ID}: A labeled placeholder for a token of type [ID].
      • +///
      +/// +/// This class does not perform any validation on the tag or label names aside +/// from ensuring that the tag is a non-null, non-empty string. +class TagChunk extends Chunk { + /// The tag for the chunk. + final String tag; + + /// The label assigned to this chunk, or null if no label is + /// assigned to the chunk. + final String label; + + /// Construct a new instance of [TagChunk] using the specified label + /// and tag. + /// + /// @param label The label for the tag. If this is null, the + /// [TagChunk] represents an unlabeled tag. + /// @param tag The tag, which should be the name of a parser rule or token + /// type. + /// + /// @exception ArgumentError if [tag] is null or empty. + TagChunk(this.tag, {this.label}) { + if (tag == null || tag.isEmpty) { + throw ArgumentError.value(tag, 'tag', 'cannot be null or empty'); + } + } + + /// This method returns a text representation of the tag chunk. Labeled tags + /// are returned in the form {@code label:tag}, and unlabeled tags are + /// returned as just the tag name. + @override + String toString() { + if (label != null) { + return label + ':' + tag; + } + + return tag; + } +} + +/// Represents a span of raw text (concrete syntax) between tags in a tree +/// pattern string. +class TextChunk extends Chunk { + /// The text of the chunk. + final String text; + + /// Constructs a new instance of [TextChunk] with the specified text. + /// + /// @param text The text of this chunk. + /// @exception IllegalArgumentException if [text] is null. + TextChunk(this.text) { + if (text == null) { + throw ArgumentError.notNull('text'); + } + } + + /// {@inheritDoc} + /// + ///

      The implementation for [TextChunk] returns the result of + /// {@link #getText()} in single quotes.

      + @override + String toString() { + return "'" + text + "'"; + } +} diff --git a/runtime/Dart/lib/src/tree/src/pattern/parse_tree_match.dart b/runtime/Dart/lib/src/tree/src/pattern/parse_tree_match.dart new file mode 100644 index 000000000..175517c97 --- /dev/null +++ b/runtime/Dart/lib/src/tree/src/pattern/parse_tree_match.dart @@ -0,0 +1,635 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import '../../../error/error.dart'; +import '../../../input_stream.dart'; +import '../../../lexer.dart'; +import '../../../misc/multi_map.dart'; +import '../../../parser.dart'; +import '../../../parser_interpreter.dart'; +import '../../../parser_rule_context.dart'; +import '../../../token.dart'; +import '../../../token_source.dart'; +import '../../../token_stream.dart'; +import '../../../util/utils.dart'; +import '../tree.dart'; +import 'chunk.dart'; + +/// Represents the result of matching a [ParseTree] against a tree pattern. +class ParseTreeMatch { + /// Get the parse tree we are trying to match to a pattern. + /// + /// @return The [ParseTree] we are trying to match to a pattern. + final ParseTree tree; + + /// Get the tree pattern we are matching against. + /// + /// @return The tree pattern we are matching against. + final ParseTreePattern pattern; + + + /// Return a mapping from label → [list of nodes]. + /// + ///

      The map includes special entries corresponding to the names of rules and + /// tokens referenced in tags in the original pattern. For additional + /// information, see the description of {@link #getAll(String)}.

      + /// + /// @return A mapping from labels to parse tree nodes. If the parse tree + /// pattern did not contain any rule or token tags, this map will be empty. + final MultiMap labels; + + /// Get the node at which we first detected a mismatch. + /// + /// @return the node at which we first detected a mismatch, or null + /// if the match was successful. + final ParseTree mismatchedNode; + + /// Constructs a new instance of [ParseTreeMatch] from the specified + /// parse tree and pattern. + /// + /// @param tree The parse tree to match against the pattern. + /// @param pattern The parse tree pattern. + /// @param labels A mapping from label names to collections of + /// [ParseTree] objects located by the tree pattern matching process. + /// @param mismatchedNode The first node which failed to match the tree + /// pattern during the matching process. + /// + /// @exception ArgumentError.notNull) if [tree] is null + /// @exception ArgumentError.notNull) if [pattern] is null + /// @exception ArgumentError.notNull) if [labels] is null + ParseTreeMatch(this.tree, this.pattern, this.labels, this.mismatchedNode) { + if (tree == null) { + throw ArgumentError.notNull('tree'); + } + + if (pattern == null) { + throw ArgumentError.notNull('pattern'); + } + + if (labels == null) { + throw ArgumentError.notNull('labels'); + } + } + + /// Get the last node associated with a specific [label]. + /// + ///

      For example, for pattern {@code }, {@code get("id")} returns the + /// node matched for that [ID]. If more than one node + /// matched the specified label, only the last is returned. If there is + /// no node associated with the label, this returns null.

      + /// + ///

      Pattern tags like {@code } and {@code } without labels are + /// considered to be labeled with [ID] and [expr], respectively.

      + /// + /// @param label The label to check. + /// + /// @return The last [ParseTree] to match a tag with the specified + /// label, or null if no parse tree matched a tag with the label. + + ParseTree get(String label) { + final parseTrees = labels[label]; + if (parseTrees == null || parseTrees.isEmpty) { + return null; + } + + return parseTrees[parseTrees.length - 1]; // return last if multiple + } + + /// Return all nodes matching a rule or token tag with the specified label. + /// + ///

      If the [label] is the name of a parser rule or token in the + /// grammar, the resulting list will contain both the parse trees matching + /// rule or tags explicitly labeled with the label and the complete set of + /// parse trees matching the labeled and unlabeled tags in the pattern for + /// the parser rule or token. For example, if [label] is {@code "foo"}, + /// the result will contain all of the following.

      + /// + ///
        + ///
      • Parse tree nodes matching tags of the form {@code } and + /// {@code }.
      • + ///
      • Parse tree nodes matching tags of the form {@code }.
      • + ///
      • Parse tree nodes matching tags of the form {@code }.
      • + ///
      + /// + /// @param label The label. + /// + /// @return A collection of all [ParseTree] nodes matching tags with + /// the specified [label]. If no nodes matched the label, an empty list + /// is returned. + + List getAll(String label) { + final nodes = labels[label]; + if (nodes == null) { + return []; + } + + return nodes; + } + + /// Gets a value indicating whether the match operation succeeded. + /// + /// @return [true] if the match operation succeeded; otherwise, + /// [false]. + bool get succeeded => mismatchedNode == null; + + /// {@inheritDoc} + @override + String toString() { + return "Match ${succeeded ? "succeeded" : "failed"}; found ${labels.length} labels"; + } +} + +/// A pattern like {@code = ;} converted to a [ParseTree] by +/// {@link ParseTreePatternMatcher#compile(String, int)}. +class ParseTreePattern { + /// Get the parser rule which serves as the outermost rule for the tree + /// pattern. + /// + /// @return The parser rule which serves as the outermost rule for the tree + /// pattern. + final int patternRuleIndex; + + /// Get the tree pattern in concrete syntax form. + /// + /// @return The tree pattern in concrete syntax form. + final String pattern; + + + /// Get the tree pattern as a [ParseTree]. The rule and token tags from + /// the pattern are present in the parse tree as terminal nodes with a symbol + /// of type [RuleTagToken] or [TokenTagToken]. + /// + /// @return The tree pattern as a [ParseTree]. + final ParseTree patternTree; + + /// Get the [ParseTreePatternMatcher] which created this tree pattern. + /// + /// @return The [ParseTreePatternMatcher] which created this tree + /// pattern. + final ParseTreePatternMatcher matcher; + + /// Construct a new instance of the [ParseTreePattern] class. + /// + /// @param matcher The [ParseTreePatternMatcher] which created this + /// tree pattern. + /// @param pattern The tree pattern in concrete syntax form. + /// @param patternRuleIndex The parser rule which serves as the root of the + /// tree pattern. + /// @param patternTree The tree pattern in [ParseTree] form. + ParseTreePattern( + this.matcher, this.pattern, this.patternRuleIndex, this.patternTree); + + /// Match a specific parse tree against this tree pattern. + /// + /// @param tree The parse tree to match against this tree pattern. + /// @return A [ParseTreeMatch] object describing the result of the + /// match operation. The {@link ParseTreeMatch#succeeded()} method can be + /// used to determine whether or not the match was successful. + + ParseTreeMatch match(ParseTree tree) { + return matcher.match(tree, pattern: this); + } + + /// Determine whether or not a parse tree matches this tree pattern. + /// + /// @param tree The parse tree to match against this tree pattern. + /// @return [true] if [tree] is a match for the current tree + /// pattern; otherwise, [false]. + bool matches(ParseTree tree) { + return matcher.match(tree, pattern: this).succeeded; + } +} + +/// A tree pattern matching mechanism for ANTLR [ParseTree]s. +/// +///

      Patterns are strings of source input text with special tags representing +/// token or rule references such as:

      +/// +///

      {@code = ;}

      +/// +///

      Given a pattern start rule such as [statement], this object constructs +/// a [ParseTree] with placeholders for the [ID] and [expr] +/// subtree. Then the {@link #match} routines can compare an actual +/// [ParseTree] from a parse with this pattern. Tag {@code } matches +/// any [ID] token and tag {@code } references the result of the +/// [expr] rule (generally an instance of [ExprContext].

      +/// +///

      Pattern {@code x = 0;} is a similar pattern that matches the same pattern +/// except that it requires the identifier to be [x] and the expression to +/// be {@code 0}.

      +/// +///

      The {@link #matches} routines return [true] or [false] based +/// upon a match for the tree rooted at the parameter sent in. The +/// {@link #match} routines return a [ParseTreeMatch] object that +/// contains the parse tree, the parse tree pattern, and a map from tag name to +/// matched nodes (more below). A subtree that fails to match, returns with +/// {@link ParseTreeMatch#mismatchedNode} set to the first tree node that did not +/// match.

      +/// +///

      For efficiency, you can compile a tree pattern in string form to a +/// [ParseTreePattern] object.

      +/// +///

      See [TestParseTreeMatcher] for lots of examples. +/// [ParseTreePattern] has two static helper methods: +/// {@link ParseTreePattern#findAll} and {@link ParseTreePattern#match} that +/// are easy to use but not super efficient because they create new +/// [ParseTreePatternMatcher] objects each time and have to compile the +/// pattern in string form before using it.

      +/// +///

      The lexer and parser that you pass into the [ParseTreePatternMatcher] +/// constructor are used to parse the pattern in string form. The lexer converts +/// the {@code = ;} into a sequence of four tokens (assuming lexer +/// throws out whitespace or puts it on a hidden channel). Be aware that the +/// input stream is reset for the lexer (but not the parser; a +/// [ParserInterpreter] is created to parse the input.). Any user-defined +/// fields you have put into the lexer might get changed when this mechanism asks +/// it to scan the pattern string.

      +/// +///

      Normally a parser does not accept token {@code } as a valid +/// [expr] but, from the parser passed in, we create a special version of +/// the underlying grammar representation (an [ATN]) that allows imaginary +/// tokens representing rules ({@code }) to match entire rules. We call +/// these bypass alternatives.

      +/// +///

      Delimiters are {@code <} and {@code >}, with {@code \} as the escape string +/// by default, but you can set them to whatever you want using +/// {@link #setDelimiters}. You must escape both start and stop strings +/// {@code \<} and {@code \>}.

      +class ParseTreePatternMatcher { + /// Used to convert the tree pattern string into a series of tokens. The + /// input stream is reset. + final Lexer lexer; + + /// Used to collect to the grammar file name, token names, rule names for + /// used to parse the pattern into a parse tree. + final Parser parser; + + String start = '<'; + String stop = '>'; + String escape = '\\'; // e.g., \< and \> must escape BOTH! + + /// Constructs a [ParseTreePatternMatcher] or from a [Lexer] and + /// [Parser] object. The lexer input stream is altered for tokenizing + /// the tree patterns. The parser is used as a convenient mechanism to get + /// the grammar name, plus token, rule names. + ParseTreePatternMatcher(this.lexer, this.parser); + + /// Set the delimiters used for marking rule and token tags within concrete + /// syntax used by the tree pattern parser. + /// + /// @param start The start delimiter. + /// @param stop The stop delimiter. + /// @param escapeLeft The escape sequence to use for escaping a start or stop delimiter. + /// + /// @exception ArgumentError if [start] is null or empty. + /// @exception ArgumentError if [stop] is null or empty. + void setDelimiters(String start, String stop, String escapeLeft) { + if (start == null || start.isEmpty) { + throw ArgumentError.value(start, 'start', 'cannot be null or empty'); + } + + if (stop == null || stop.isEmpty) { + throw ArgumentError.value(stop, 'stop', 'cannot be null or empty'); + } + + this.start = start; + this.stop = stop; + escape = escapeLeft; + } + + /// Does [pattern] matched as rule patternRuleIndex match tree? Pass in a + /// compiled pattern instead of a string representation of a tree pattern. + bool matches(ParseTree tree, + {ParseTreePattern pattern, String patternStr, int patternRuleIndex}) { + pattern ??= compile(patternStr, patternRuleIndex); + + final labels = MultiMap(); + final mismatchedNode = + matchImpl(tree, pattern.patternTree, labels); + return mismatchedNode == null; + } + + /// Compare [pattern] matched against [tree] and return a + /// [ParseTreeMatch] object that contains the matched elements, or the + /// node at which the match failed. Pass in a compiled pattern instead of a + /// string representation of a tree pattern. + + ParseTreeMatch match(ParseTree tree, + {ParseTreePattern pattern, String patternStr, int patternRuleIndex}) { + pattern ??= compile(patternStr, patternRuleIndex); + + final labels = MultiMap(); + final mismatchedNode = + matchImpl(tree, pattern.patternTree, labels); + return ParseTreeMatch(tree, pattern, labels, mismatchedNode); + } + + /// For repeated use of a tree pattern, compile it to a + /// [ParseTreePattern] using this method. + ParseTreePattern compile(String pattern, int patternRuleIndex) { + final tokenList = tokenize(pattern); + final tokenSrc = ListTokenSource(tokenList); + final tokens = CommonTokenStream(tokenSrc); + + final parserInterp = ParserInterpreter( + parser.grammarFileName, + parser.vocabulary, + parser.ruleNames, + parser.ATNWithBypassAlts, + tokens); + + ParseTree tree; + try { + parserInterp.errorHandler = BailErrorStrategy(); + tree = parserInterp.parse(patternRuleIndex); +// System.out.println("pattern tree = "+tree.toStringTree(parserInterp)); + } on ParseCancellationException { + rethrow; + } on RecognitionException { + rethrow; + } catch (e) { + throw CannotInvokeStartRule(e); + } + + // Make sure tree pattern compilation checks for a complete parse + if (tokens.LA(1) != Token.EOF) { + throw StartRuleDoesNotConsumeFullPattern(); + } + + return ParseTreePattern(this, pattern, patternRuleIndex, tree); + } + + // ---- SUPPORT CODE ---- + + /// Recursively walk [tree] against [patternTree], filling + /// {@code match.}{@link ParseTreeMatch#labels labels}. + /// + /// @return the first node encountered in [tree] which does not match + /// a corresponding node in [patternTree], or null if the match + /// was successful. The specific node returned depends on the matching + /// algorithm used by the implementation, and may be overridden. + + ParseTree matchImpl(ParseTree tree, ParseTree patternTree, + MultiMap labels) { + if (tree == null) { + throw ArgumentError('tree cannot be null'); + } + + if (patternTree == null) { + throw ArgumentError('patternTree cannot be null'); + } + + // x and , x and y, or x and x; or could be mismatched types + if (tree is TerminalNode && patternTree is TerminalNode) { + final t1 = tree; + final t2 = patternTree; + ParseTree mismatchedNode; + // both are tokens and they have same type + if (t1.symbol.type == t2.symbol.type) { + if (t2.symbol is TokenTagToken) { + // x and + TokenTagToken tokenTagToken = t2.symbol; + // track label->list-of-nodes for both token name and label (if any) + labels.put(tokenTagToken.tokenName, tree); + if (tokenTagToken.label != null) { + labels.put(tokenTagToken.label, tree); + } + } else if (t1.text == t2.text) { + // x and x + } else { + // x and y + mismatchedNode ??= t1; + } + } else { + mismatchedNode ??= t1; + } + + return mismatchedNode; + } + + if (tree is ParserRuleContext && patternTree is ParserRuleContext) { + final r1 = tree; + final r2 = patternTree; + ParseTree mismatchedNode; + // (expr ...) and + final ruleTagToken = getRuleTagToken(r2); + if (ruleTagToken != null) { + if (r1.ruleContext.ruleIndex == r2.ruleContext.ruleIndex) { + // track label->list-of-nodes for both rule name and label (if any) + labels.put(ruleTagToken.ruleName, tree); + if (ruleTagToken.label != null) { + labels.put(ruleTagToken.label, tree); + } + } else { + mismatchedNode ??= r1; + } + + return mismatchedNode; + } + + // (expr ...) and (expr ...) + if (r1.childCount != r2.childCount) { + mismatchedNode ??= r1; + + return mismatchedNode; + } + + final n = r1.childCount; + for (var i = 0; i < n; i++) { + final childMatch = + matchImpl(r1.getChild(i), patternTree.getChild(i), labels); + if (childMatch != null) { + return childMatch; + } + } + + return mismatchedNode; + } + + // if nodes aren't both tokens or both rule nodes, can't match + return tree; + } + + /// Is [t] {@code (expr )} subtree? */ + RuleTagToken getRuleTagToken(ParseTree t) { + if (t is RuleNode) { + final r = t; + if (r.childCount == 1 && r.getChild(0) is TerminalNode) { + TerminalNode c = r.getChild(0); + if (c.symbol is RuleTagToken) { +// System.out.println("rule tag subtree "+t.toStringTree(parser)); + return c.symbol; + } + } + } + return null; + } + + List tokenize(String pattern) { + // split pattern into chunks: sea (raw input) and islands (, ) + final chunks = split(pattern); + + // create token stream from text and tags + final tokens = []; + for (var chunk in chunks) { + if (chunk is TagChunk) { + final tagChunk = chunk; + // add special rule token or conjure up new token from name + if (isUpperCase(tagChunk.tag[0])) { + final ttype = parser.getTokenType(tagChunk.tag); + if (ttype == Token.INVALID_TYPE) { + throw ArgumentError('Unknown token ' + + tagChunk.tag + + ' in pattern: ' + + pattern); + } + final t = + TokenTagToken(tagChunk.tag, ttype, tagChunk.label); + tokens.add(t); + } else if (isLowerCase(tagChunk.tag[0])) { + final ruleIndex = parser.getRuleIndex(tagChunk.tag); + if (ruleIndex == -1) { + throw ArgumentError('Unknown rule ' + + tagChunk.tag + + ' in pattern: ' + + pattern); + } + final ruleImaginaryTokenType = + parser.ATNWithBypassAlts.ruleToTokenType[ruleIndex]; + tokens.add(RuleTagToken( + tagChunk.tag, ruleImaginaryTokenType, tagChunk.label)); + } else { + throw ArgumentError( + 'invalid tag: ' + tagChunk.tag + ' in pattern: ' + pattern); + } + } else { + TextChunk textChunk = chunk; + final inputStream = + InputStream.fromString(textChunk.text); + lexer.inputStream = inputStream; + var t = lexer.nextToken(); + while (t.type != Token.EOF) { + tokens.add(t); + t = lexer.nextToken(); + } + } + } + +// System.out.println("tokens="+tokens); + return tokens; + } + + /// Split {@code = ;} into 4 chunks for tokenizing by {@link #tokenize}. */ + List split(String pattern) { + var p = 0; + final n = pattern.length; + final chunks = []; + // find all start and stop indexes first, then collect + final starts = []; + final stops = []; + while (p < n) { + if (p == pattern.indexOf(escape + start, p)) { + p += escape.length + start.length; + } else if (p == pattern.indexOf(escape + stop, p)) { + p += escape.length + stop.length; + } else if (p == pattern.indexOf(start, p)) { + starts.add(p); + p += start.length; + } else if (p == pattern.indexOf(stop, p)) { + stops.add(p); + p += stop.length; + } else { + p++; + } + } + +// System.out.println(""); +// System.out.println(starts); +// System.out.println(stops); + if (starts.length > stops.length) { + throw ArgumentError('unterminated tag in pattern: ' + pattern); + } + + if (starts.length < stops.length) { + throw ArgumentError('missing start tag in pattern: ' + pattern); + } + + final ntags = starts.length; + for (var i = 0; i < ntags; i++) { + if (starts[i] >= stops[i]) { + throw ArgumentError( + 'tag delimiters out of order in pattern: ' + pattern); + } + } + + // collect into chunks now + if (ntags == 0) { + final text = pattern.substring(0, n); + chunks.add(TextChunk(text)); + } + + if (ntags > 0 && starts[0] > 0) { + // copy text up to first tag into chunks + final text = pattern.substring(0, starts[0]); + chunks.add(TextChunk(text)); + } + for (var i = 0; i < ntags; i++) { + // copy inside of + final tag = pattern.substring(starts[i] + start.length, stops[i]); + var ruleOrToken = tag; + String label; + final colon = tag.indexOf(':'); + if (colon >= 0) { + label = tag.substring(0, colon); + ruleOrToken = tag.substring(colon + 1, tag.length); + } + chunks.add(TagChunk(ruleOrToken, label: label)); + if (i + 1 < ntags) { + // copy from end of to start of next + final text = pattern.substring(stops[i] + stop.length, starts[i + 1]); + chunks.add(TextChunk(text)); + } + } + if (ntags > 0) { + final afterLastTag = stops[ntags - 1] + stop.length; + if (afterLastTag < n) { + // copy text from end of last tag to end + final text = pattern.substring(afterLastTag, n); + chunks.add(TextChunk(text)); + } + } + + // strip out the escape sequences from text chunks but not tags + for (var i = 0; i < chunks.length; i++) { + final c = chunks[i]; + if (c is TextChunk) { + final tc = c; + final unescaped = tc.text.replaceAll(escape, ''); + if (unescaped.length < tc.text.length) { + chunks[i] = TextChunk(unescaped); + } + } + } + + return chunks; + } +} + +class CannotInvokeStartRule extends StateError { + CannotInvokeStartRule(String message) : super(message); +} + +// Fixes https://github.com/antlr/antlr4/issues/413 +// "Tree pattern compilation doesn't check for a complete parse" +class StartRuleDoesNotConsumeFullPattern extends Error {} + +/// This exception is thrown to cancel a parsing operation. This exception does +/// not extend [RecognitionException], allowing it to bypass the standard +/// error recovery mechanisms. [BailErrorStrategy] throws this exception in +/// response to a parse error. +class ParseCancellationException extends StateError { + ParseCancellationException(String message) : super(message); +} diff --git a/runtime/Dart/lib/src/tree/src/tree.dart b/runtime/Dart/lib/src/tree/src/tree.dart new file mode 100644 index 000000000..f2eb32313 --- /dev/null +++ b/runtime/Dart/lib/src/tree/src/tree.dart @@ -0,0 +1,370 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:developer'; + +import '../../interval_set.dart'; +import '../../parser.dart'; +import '../../parser_rule_context.dart'; +import '../../rule_context.dart'; +import '../../token.dart'; + +/// The basic notion of a tree has a parent, a payload, and a list of children. +/// It is the most abstract interface for all the trees used by ANTLR. +abstract class Tree { + Tree get parent; + + dynamic get payload; + + Tree getChild(int i); + +// Tree getChild(int i); + + int get childCount; + + String toStringTree(); +} + +abstract class SyntaxTree extends Tree { + /// Return an [Interval] indicating the index in the + /// [TokenStream] of the first and last token associated with this + /// subtree. If this node is a leaf, then the interval represents a single + /// token and has interval i..i for token index i. + /// + ///

      An interval of i..i-1 indicates an empty interval at position + /// i in the input stream, where 0 <= i <= the size of the input + /// token stream. Currently, the code base can only have i=0..n-1 but + /// in concept one could have an empty interval after EOF.

      + /// + ///

      If source interval is unknown, this returns {@link Interval#INVALID}.

      + /// + ///

      As a weird special case, the source interval for rules matched after + /// EOF is unspecified.

      + Interval get sourceInterval; +} + +abstract class ParseTree extends SyntaxTree { + // the following methods narrow the return type; they are not additional methods + @override + ParseTree get parent; + + @override + ParseTree getChild(int i); + + /// Set the parent for this node. + /// + /// This is not backward compatible as it changes + /// the interface but no one was able to create custom + /// nodes anyway so I'm adding as it improves internal + /// code quality. + /// + /// One could argue for a restructuring of + /// the class/interface hierarchy so that + /// setParent, addChild are moved up to Tree + /// but that's a major change. So I'll do the + /// minimal change, which is to add this method. + /// + /// @since 4.7 + set parent(RuleContext parent); + + /// The [ParseTreeVisitor] needs a double dispatch method. */ + T accept(ParseTreeVisitor visitor); + + /// Return the combined text of all leaf nodes. Does not get any + /// off-channel tokens (if any) so won't return whitespace and + /// comments if they are sent to parser on hidden channel. + String get text; + + /// Specialize toStringTree so that it can print out more information + /// based upon the parser. + @override + String toStringTree({Parser parser}); +} + +abstract class RuleNode extends ParseTree { + RuleContext get ruleContext; +} + +abstract class TerminalNode extends ParseTree { + Token get symbol; +} + +abstract class ErrorNode extends TerminalNode {} + +abstract class ParseTreeVisitor { + /// {@inheritDoc} + /// + ///

      The default implementation calls {@link ParseTree#accept} on the + /// specified tree.

      + T visit(ParseTree tree) { + return tree.accept(this); + } + + /// {@inheritDoc} + /// + ///

      The default implementation initializes the aggregate result to + /// {@link #defaultResult defaultResult()}. Before visiting each child, it + /// calls {@link #shouldVisitNextChild shouldVisitNextChild}; if the result + /// is [false] no more children are visited and the current aggregate + /// result is returned. After visiting a child, the aggregate result is + /// updated by calling {@link #aggregateResult aggregateResult} with the + /// previous aggregate result and the result of visiting the child.

      + /// + ///

      The default implementation is not safe for use in visitors that modify + /// the tree structure. Visitors that modify the tree should override this + /// method to behave properly in respect to the specific algorithm in use.

      + T visitChildren(RuleNode node) { + var result = defaultResult(); + final n = node.childCount; + for (var i = 0; i < n; i++) { + if (!shouldVisitNextChild(node, result)) { + break; + } + + final c = node.getChild(i); + final childResult = c.accept(this); + result = aggregateResult(result, childResult); + } + + return result; + } + + /// {@inheritDoc} + /// + ///

      The default implementation returns the result of + /// {@link #defaultResult defaultResult}.

      + + T visitTerminal(TerminalNode node) { + return defaultResult(); + } + + /// {@inheritDoc} + /// + ///

      The default implementation returns the result of + /// {@link #defaultResult defaultResult}.

      + + T visitErrorNode(ErrorNode node) { + return defaultResult(); + } + + /// Gets the default value returned by visitor methods. This value is + /// returned by the default implementations of + /// {@link #visitTerminal visitTerminal}, {@link #visitErrorNode visitErrorNode}. + /// The default implementation of {@link #visitChildren visitChildren} + /// initializes its aggregate result to this value. + /// + ///

      The base implementation returns null.

      + /// + /// @return The default value returned by visitor methods. + T defaultResult() { + return null; + } + + /// Aggregates the results of visiting multiple children of a node. After + /// either all children are visited or {@link #shouldVisitNextChild} returns + /// [false], the aggregate value is returned as the result of + /// {@link #visitChildren}. + /// + ///

      The default implementation returns [nextResult], meaning + /// {@link #visitChildren} will return the result of the last child visited + /// (or return the initial value if the node has no children).

      + /// + /// @param aggregate The previous aggregate value. In the default + /// implementation, the aggregate value is initialized to + /// {@link #defaultResult}, which is passed as the [aggregate] argument + /// to this method after the first child node is visited. + /// @param nextResult The result of the immediately preceeding call to visit + /// a child node. + /// + /// @return The updated aggregate result. + T aggregateResult(T aggregate, T nextResult) => nextResult; + + /// This method is called after visiting each child in + /// {@link #visitChildren}. This method is first called before the first + /// child is visited; at that point [currentResult] will be the initial + /// value (in the default implementation, the initial value is returned by a + /// call to {@link #defaultResult}. This method is not called after the last + /// child is visited. + /// + ///

      The default implementation always returns [true], indicating that + /// [visitChildren] should only return after all children are visited. + /// One reason to override this method is to provide a "short circuit" + /// evaluation option for situations where the result of visiting a single + /// child has the potential to determine the result of the visit operation as + /// a whole.

      + /// + /// @param node The [RuleNode] whose children are currently being + /// visited. + /// @param currentResult The current aggregate result of the children visited + /// to the current point. + /// + /// @return [true] to continue visiting children. Otherwise return + /// [false] to stop visiting children and immediately return the + /// current aggregate result from {@link #visitChildren}. + bool shouldVisitNextChild(RuleNode node, T currentResult) => true; +} + +abstract class ParseTreeListener { + void visitTerminal(TerminalNode node); + + void visitErrorNode(ErrorNode node); + + void enterEveryRule(ParserRuleContext node); + + void exitEveryRule(ParserRuleContext node); +} + +class TraceListener implements ParseTreeListener { + final Parser parser; + + TraceListener(this.parser); + + @override + void enterEveryRule(ParserRuleContext ctx) { + log('enter ' + + parser.ruleNames[ctx.ruleIndex] + + ', LT(1)=${parser.inputStream.LT(1).text}'); + } + + @override + void visitTerminal(TerminalNode node) { + log('consume ${node.symbol} rule ' + + parser.ruleNames[parser.context.ruleIndex]); + } + + @override + void visitErrorNode(ErrorNode node) {} + + @override + void exitEveryRule(ParserRuleContext ctx) { + log('exit ${parser.ruleNames[ctx.ruleIndex]}' ', LT(1)=' + + parser.inputStream.LT(1).text); + } +} + +class TrimToSizeListener implements ParseTreeListener { + static final TrimToSizeListener INSTANCE = TrimToSizeListener(); + + @override + void enterEveryRule(ParserRuleContext ctx) {} + + @override + void visitTerminal(TerminalNode node) {} + + @override + void visitErrorNode(ErrorNode node) {} + + @override + void exitEveryRule(ParserRuleContext ctx) { + // TODO trim dart List's size +// if (ctx.children is List) { +// (ctx.children).trimToSize(); +// } + } +} + +class TerminalNodeImpl extends TerminalNode { + @override + Token symbol; + @override + ParseTree parent; + + TerminalNodeImpl(this.symbol); + + @override + ParseTree getChild(i) { + return null; + } + + @override + Token get payload => symbol; + + @override + Interval get sourceInterval { + if (symbol == null) return Interval.INVALID; + + final tokenIndex = symbol.tokenIndex; + return Interval(tokenIndex, tokenIndex); + } + + @override + int get childCount { + return 0; + } + + @override + T accept(ParseTreeVisitor visitor) { + return visitor.visitTerminal(this); + } + + @override + String get text { + return symbol.text; + } + + @override + String toStringTree({Parser parser}) { + return toString(); + } + + @override + String toString() { + if (symbol.type == Token.EOF) return ''; + return symbol.text; + } +} + +/// Represents a token that was consumed during resynchronization +/// rather than during a valid match operation. For example, +/// we will create this kind of a node during single token insertion +/// and deletion as well as during "consume until error recovery set" +/// upon no viable alternative exceptions. +class ErrorNodeImpl extends TerminalNodeImpl implements ErrorNode { + ErrorNodeImpl(token) : super(token); + + bool isErrorNode() => true; + + @override + T accept(ParseTreeVisitor visitor) { + return visitor.visitErrorNode(this); + } +} + +class ParseTreeWalker { + void walk(ParseTreeListener listener, ParseTree t) { + if (t is ErrorNode) { + listener.visitErrorNode(t); + return; + } else if (t is TerminalNode) { + listener.visitTerminal(t); + return; + } + RuleNode r = t; + enterRule(listener, r); + for (var i = 0; i < r.childCount; i++) { + walk(listener, r.getChild(i)); + } + exitRule(listener, r); + } + + /// The discovery of a rule node, involves sending two events: the generic + /// {@link ParseTreeListener#enterEveryRule} and a + /// [RuleContext]-specific event. First we trigger the generic and then + /// the rule specific. We to them in reverse order upon finishing the node. + void enterRule(ParseTreeListener listener, RuleNode r) { + ParserRuleContext ctx = r.ruleContext; + listener.enterEveryRule(ctx); + ctx.enterRule(listener); + } + + void exitRule(ParseTreeListener listener, RuleNode r) { + ParserRuleContext ctx = r.ruleContext; + ctx.exitRule(listener); + listener.exitEveryRule(ctx); + } + + static final DEFAULT = ParseTreeWalker(); +} diff --git a/runtime/Dart/lib/src/tree/src/trees.dart b/runtime/Dart/lib/src/tree/src/trees.dart new file mode 100644 index 000000000..80a447238 --- /dev/null +++ b/runtime/Dart/lib/src/tree/src/trees.dart @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:core'; + +import '../../atn/atn.dart'; +import '../../parser.dart'; +import '../../parser_rule_context.dart'; +import '../../rule_context.dart'; +import '../../token.dart'; +import '../../util/utils.dart'; +import 'tree.dart'; + +/// A set of utility routines useful for all kinds of ANTLR trees. */ +class Trees { + /// Print out a whole tree in LISP form. {@link #getNodeText} is used on the + /// node payloads to get the text for the nodes. Detect + /// parse trees and extract data appropriately. + static String toStringTree(Tree t, {Parser recog, List ruleNames}) { + ruleNames ??= recog?.ruleNames; + var s = escapeWhitespace(getNodeText(t, ruleNames: ruleNames), false); + if (t.childCount == 0) return s; + final buf = StringBuffer(); + buf.write('('); + s = escapeWhitespace(getNodeText(t, ruleNames: ruleNames), false); + buf.write(s); + buf.write(' '); + for (var i = 0; i < t.childCount; i++) { + if (i > 0) buf.write(' '); + buf.write(toStringTree(t.getChild(i), ruleNames: ruleNames)); + } + buf.write(')'); + return buf.toString(); + } + + static String getNodeText(Tree t, {Parser recog, List ruleNames}) { + ruleNames ??= recog?.ruleNames; + if (ruleNames != null) { + if (t is RuleContext) { + final ruleIndex = t.ruleContext.ruleIndex; + final ruleName = ruleNames[ruleIndex]; + final altNumber = t.altNumber; + if (altNumber != ATN.INVALID_ALT_NUMBER) { + return ruleName + ':$altNumber'; + } + return ruleName; + } else if (t is ErrorNode) { + return t.toString(); + } else if (t is TerminalNode) { + final symbol = (t).symbol; + if (symbol != null) { + final s = symbol.text; + return s; + } + } + } + // no recog for rule names + Object payload = t.payload; + if (payload is Token) { + return payload.text; + } + return t.payload.toString(); + } + + /// Return ordered list of all children of this node */ + static List getChildren(Tree t) { + final kids = []; + for (var i = 0; i < t.childCount; i++) { + kids.add(t.getChild(i)); + } + return kids; + } + + /// Return a list of all ancestors of this node. The first node of + /// list is the root and the last is the parent of this node. + /// + /// @since 4.5.1 + static List getAncestors(Tree t) { + if (t.parent == null) return []; + final ancestors = []; + t = t.parent; + while (t != null) { + ancestors.insert(0, t); // insert at start + t = t.parent; + } + return ancestors; + } + + /// Return true if t is u's parent or a node on path to root from u. + /// Use == not equals(). + /// + /// @since 4.5.1 + static bool isAncestorOf(Tree t, Tree u) { + if (t == null || u == null || t.parent == null) return false; + var p = u.parent; + while (p != null) { + if (t == p) return true; + p = p.parent; + } + return false; + } + + static List findAllTokenNodes(ParseTree t, int ttype) { + return findAllNodes(t, ttype, true); + } + + static List findAllRuleNodes(ParseTree t, int ruleIndex) { + return findAllNodes(t, ruleIndex, false); + } + + static List findAllNodes(ParseTree t, int index, bool findTokens) { + final nodes = []; + _findAllNodes(t, index, findTokens, nodes); + return nodes; + } + + static void _findAllNodes( + ParseTree t, int index, bool findTokens, List nodes) { + // check this node (the root) first + if (findTokens && t is TerminalNode) { + final tnode = t; + if (tnode.symbol.type == index) nodes.add(t); + } else if (!findTokens && t is ParserRuleContext) { + final ctx = t; + if (ctx.ruleIndex == index) nodes.add(t); + } + // check children + for (var i = 0; i < t.childCount; i++) { + _findAllNodes(t.getChild(i), index, findTokens, nodes); + } + } + + /// Get all descendents; includes t itself. + /// + /// @since 4.5.1 + static List getDescendants(ParseTree t) { + final nodes = []; + nodes.add(t); + + final n = t.childCount; + for (var i = 0; i < n; i++) { + nodes.addAll(getDescendants(t.getChild(i))); + } + return nodes; + } + + /// @deprecated */ + static List descendants(ParseTree t) { + return getDescendants(t); + } + + /// Find smallest subtree of t enclosing range startTokenIndex..stopTokenIndex + /// inclusively using postorder traversal. Recursive depth-first-search. + /// + /// @since 4.5.1 + static ParserRuleContext getRootOfSubtreeEnclosingRegion( + ParseTree t, + int startTokenIndex, // inclusive + int stopTokenIndex) // inclusive + { + final n = t.childCount; + for (var i = 0; i < n; i++) { + final child = t.getChild(i); + final r = getRootOfSubtreeEnclosingRegion( + child, startTokenIndex, stopTokenIndex); + if (r != null) return r; + } + if (t is ParserRuleContext) { + final r = t; + if (startTokenIndex >= + r.start.tokenIndex && // is range fully contained in t? + (r.stop == null || stopTokenIndex <= r.stop.tokenIndex)) { + // note: r.getStop()==null likely implies that we bailed out of parser and there's nothing to the right + return r; + } + } + return null; + } + + /// Replace any subtree siblings of root that are completely to left + /// or right of lookahead range with a CommonToken(Token.INVALID_TYPE,"...") + /// node. The source interval for t is not altered to suit smaller range! + /// + /// WARNING: destructive to t. + /// + /// @since 4.5.1 + static void stripChildrenOutOfRange(ParserRuleContext t, + ParserRuleContext root, int startIndex, int stopIndex) { + if (t == null) return; + for (var i = 0; i < t.childCount; i++) { + final child = t.getChild(i); + final range = child.sourceInterval; + if (child is ParserRuleContext && + (range.b < startIndex || range.a > stopIndex)) { + if (isAncestorOf(child, root)) { + // replace only if subtree doesn't have displayed root + final abbrev = CommonToken(Token.INVALID_TYPE, text: '...'); + t.children[i] = TerminalNodeImpl(abbrev); + } + } + } + } + + /// Return first node satisfying the pred + /// + /// @since 4.5.1 + static Tree findNodeSuchThat(Tree t, Predicate pred) { + if (pred.test(t)) return t; + + if (t == null) return null; + + final n = t.childCount; + for (var i = 0; i < n; i++) { + final u = findNodeSuchThat(t.getChild(i), pred); + if (u != null) return u; + } + return null; + } +} + +abstract class Predicate { + bool test(T t); +} diff --git a/runtime/Dart/lib/src/tree/tree.dart b/runtime/Dart/lib/src/tree/tree.dart new file mode 100644 index 000000000..9010020ea --- /dev/null +++ b/runtime/Dart/lib/src/tree/tree.dart @@ -0,0 +1,10 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +export 'src/pattern/chunk.dart'; +export 'src/pattern/parse_tree_match.dart'; +export 'src/tree.dart'; +export 'src/trees.dart'; diff --git a/runtime/Dart/lib/src/util/bit_set.dart b/runtime/Dart/lib/src/util/bit_set.dart new file mode 100644 index 000000000..087584945 --- /dev/null +++ b/runtime/Dart/lib/src/util/bit_set.dart @@ -0,0 +1,307 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:math'; +import 'dart:typed_data'; + +class BitSet { + static final Uint32List EmptyBits = Uint32List(0); + static const BitsPerElement = 8 * 32; + + Uint32List _data = EmptyBits; + + BitSet([int nbits = 0]) { + if (nbits == 0) { + return; + } + if (nbits < 0) throw RangeError('nbits'); + + if (nbits > 0) { + final length = ((nbits + BitsPerElement - 1) / BitsPerElement).floor(); + _data = Uint32List(length); + } + } + + static int getBitCount(Uint32List value) { + var data = 0; + final size = value.length; + const m1 = 0x5555555555555555; + const m2 = 0x3333333333333333; + const m4 = 0x0F0F0F0F0F0F0F0F; + const m8 = 0x00FF00FF00FF00FF; + const m16 = 0x0000FFFF0000FFFF; + const h01 = 0x0101010101010101; + + var bitCount = 0; + final limit30 = size - size % 30; + + // 64-bit tree merging (merging3) + for (var i = 0; i < limit30; i += 30, data += 30) { + var acc = 0; + for (var j = 0; j < 30; j += 3) { + var count1 = value[data + j]; + var count2 = value[data + j + 1]; + var half1 = value[data + j + 2]; + var half2 = half1; + half1 &= m1; + half2 = (half2 >> 1) & m1; + count1 -= (count1 >> 1) & m1; + count2 -= (count2 >> 1) & m1; + count1 += half1; + count2 += half2; + count1 = (count1 & m2) + ((count1 >> 2) & m2); + count1 += (count2 & m2) + ((count2 >> 2) & m2); + acc += (count1 & m4) + ((count1 >> 4) & m4); + } + + acc = (acc & m8) + ((acc >> 8) & m8); + acc = (acc + (acc >> 16)) & m16; + acc = acc + (acc >> 32); + bitCount += acc; + } + + // count the bits of the remaining bytes (MAX 29*8) using + // "Counting bits set, in parallel" from the "Bit Twiddling Hacks", + // the code uses wikipedia's 64-bit popcount_3() implementation: + // http://en.wikipedia.org/wiki/Hamming_weight#Efficient_implementation + for (var i = 0; i < size - limit30; i++) { + var x = value[data + i]; + x = x - ((x >> 1) & m1); + x = (x & m2) + ((x >> 2) & m2); + x = (x + (x >> 4)) & m4; + bitCount += ((x * h01) >> 56); + } + + return bitCount; + } + + static final List index64 = [ + 0, + 47, + 1, + 56, + 48, + 27, + 2, + 60, + 57, + 49, + 41, + 37, + 28, + 16, + 3, + 61, + 54, + 58, + 35, + 52, + 50, + 42, + 21, + 44, + 38, + 32, + 29, + 23, + 17, + 11, + 4, + 62, + 46, + 55, + 26, + 59, + 40, + 36, + 15, + 53, + 34, + 51, + 20, + 43, + 31, + 22, + 10, + 45, + 25, + 39, + 14, + 33, + 19, + 30, + 9, + 24, + 13, + 18, + 8, + 12, + 7, + 6, + 5, + 63 + ]; + + static int BitScanForward(int value) { + if (value == 0) return -1; + + const debruijn64 = 0x03f79d71b4cb0a89; + return index64[(((value ^ (value - 1)) * debruijn64) >> 58) % 64]; + } + + BitSet clone() { + final result = BitSet(); + result._data = List.from(_data); + return result; + } + + void clear(int index) { + if (index < 0) throw RangeError('index'); + + final element = (index / BitsPerElement).floor(); + if (element >= _data.length) return; + + _data[element] &= ~(1 << (index % BitsPerElement)); + } + + bool operator [](int index) { + return get(index); + } + + bool get(int index) { + if (index < 0) throw RangeError('index'); + + final element = (index / BitsPerElement).floor(); + if (element >= _data.length) return false; + + return (_data[element] & (1 << (index % BitsPerElement))) != 0; + } + + void set(int index) { + if (index < 0) throw RangeError('index'); + + final element = (index / BitsPerElement).floor(); + if (element >= _data.length) { + final newList = Uint32List(max(_data.length * 2, element + 1)) + ..setRange(0, _data.length, _data); + _data = newList; + } + _data[element] |= 1 << (index % BitsPerElement); + } + + bool get isEmpty { + for (var i = 0; i < _data.length; i++) { + if (_data[i] != 0) return false; + } + + return true; + } + + int get cardinality { + return getBitCount(_data); + } + + int nextset(int fromIndex) { + if (fromIndex < 0) throw RangeError('fromIndex'); + + if (isEmpty) return -1; + + var i = (fromIndex / BitsPerElement).floor(); + if (i >= _data.length) return -1; + + var current = _data[i] & ~((1 << (fromIndex % BitsPerElement)) - 1); + + while (true) { + final bit = BitScanForward(current); + if (bit >= 0) return bit + i * BitsPerElement; + + i++; + if (i >= _data.length) break; + + current = _data[i]; + } + + return -1; + } + + void and(BitSet set) { + if (set == null) throw ArgumentError.notNull('set'); + + final length = min(_data.length, set._data.length); + for (var i = 0; i < length; i++) { + _data[i] &= set._data[i]; + } + + for (var i = length; i < _data.length; i++) { + _data[i] = 0; + } + } + + void or(BitSet set) { + if (set == null) throw ArgumentError.notNull('set'); + + if (set._data.length > _data.length) { + final newList = Uint32List(set._data.length) + ..setRange(0, _data.length, _data); + _data = newList; + } + + for (var i = 0; i < set._data.length; i++) { + _data[i] |= set._data[i]; + } + } + + @override + bool operator ==(obj) { + final other = obj as BitSet; + if (other == null) return false; + + if (isEmpty) return other.isEmpty; + + final minlength = min(_data.length, other._data.length); + for (var i = 0; i < minlength; i++) { + if (_data[i] != other._data[i]) return false; + } + + for (var i = minlength; i < _data.length; i++) { + if (_data[i] != 0) return false; + } + + for (var i = minlength; i < other._data.length; i++) { + if (other._data[i] != 0) return false; + } + + return true; + } + + @override + int get hashCode { + var result = 1; + for (var i = 0; i < _data.length; i++) { + if (_data[i] != 0) { + result = result * 31 ^ i; + result = result * 31 ^ _data[i]; + } + } + + return result.hashCode; + } + + @override + String toString() { + final builder = StringBuffer(); + builder.write('{'); + + for (var i = nextset(0); i >= 0; i = nextset(i + 1)) { + if (builder.length > 1) builder.write(', '); + + builder.write(i); + } + + builder.write('}'); + return builder.toString(); + } +} diff --git a/runtime/Dart/lib/src/util/murmur_hash.dart b/runtime/Dart/lib/src/util/murmur_hash.dart new file mode 100644 index 000000000..daffd0cd2 --- /dev/null +++ b/runtime/Dart/lib/src/util/murmur_hash.dart @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +class MurmurHash { + static final int DEFAULT_SEED = 0; + + /// Initialize the hash using the specified [seed]. + /// + /// @param seed the seed + /// @return the intermediate hash value + static int initialize([int seed]) { + return seed ?? DEFAULT_SEED; + } + + /// Update the intermediate hash value for the next input [value]. + /// + /// @param hash the intermediate hash value + /// @param value the value to add to the current hash + /// @return the updated intermediate hash value + static int update(int hash, [value]) { + final c1 = 0xCC9E2D51; + final c2 = 0x1B873593; + final r1 = 15; + final r2 = 13; + final m = 5; + final n = 0xE6546B64; + + var k = value is int ? value : value?.hashCode ?? 0; + + k = k * c1; + k = (k << r1) | (k >> (32 - r1)); + k = k * c2; + + hash = hash ^ k; + hash = (hash << r2) | (hash >> (32 - r2)); + hash = hash * m + n; + + return hash; + } + + /// Apply the final computation steps to the intermediate value [hash] + /// to form the final result of the MurmurHash 3 hash function. + /// + /// @param hash the intermediate hash value + /// @param numberOfWords the number of integer values added to the hash + /// @return the final hash result + static int finish(int hash, int numberOfWords) { + hash = hash ^ (numberOfWords * 4); + hash = hash ^ (hash >> 16); + hash = hash * 0x85EBCA6B; + hash = hash ^ (hash >> 13); + hash = hash * 0xC2B2AE35; + hash = hash ^ (hash >> 16); + return hash; + } + + /// Utility function to compute the hash code of an array using the + /// MurmurHash algorithm. + /// + /// @param the array element type + /// @param data the array data + /// @param seed the seed for the MurmurHash algorithm + /// @return the hash code of the data + static int getHashCode(List data, int seed) { + var hash = initialize(seed); + + for (var value in data) { + hash = update(hash, value); + } + + hash = finish(hash, data.length); + return hash; + } +} diff --git a/runtime/Dart/lib/src/util/utils.dart b/runtime/Dart/lib/src/util/utils.dart new file mode 100644 index 000000000..1b3e7f7e0 --- /dev/null +++ b/runtime/Dart/lib/src/util/utils.dart @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +/// Convert array of strings to string→index map. Useful for +/// converting rulenames to name→ruleindex map. +Map toMap(List keys) { + final m = {}; + for (var i = 0; i < keys.length; i++) { + m[keys[i]] = i; + } + return m; +} + +String arrayToString(a) { + return '[' + a.join(', ') + ']'; +} + +String escapeWhitespace(String s, [bool escapeSpaces = false]) { + if (escapeSpaces) s = s.replaceAll(' ', '\u00B7'); + s = s.replaceAll('\n', r'\n'); + s = s.replaceAll('\r', r'\r'); + s = s.replaceAll('\t', r'\t'); + return s; +} + +bool isLowerCase(String s) => s.toLowerCase() == s; + +bool isUpperCase(String s) => s.toUpperCase() == s; diff --git a/runtime/Dart/lib/src/vocabulary.dart b/runtime/Dart/lib/src/vocabulary.dart new file mode 100644 index 000000000..a0f170045 --- /dev/null +++ b/runtime/Dart/lib/src/vocabulary.dart @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import 'dart:math'; + +import 'token.dart'; + +/// This interface provides information about the vocabulary used by a +/// recognizer. +/// +/// @see Recognizer#getVocabulary() +abstract class Vocabulary { + /// Returns the highest token type value. It can be used to iterate from + /// zero to that number, inclusively, thus querying all stored entries. + /// @return the highest token type value + int get maxTokenType; + + /// Gets the string literal associated with a token type. The string returned + /// by this method, when not null, can be used unaltered in a parser + /// grammar to represent this token type. + /// + ///

      The following table shows examples of lexer rules and the literal + /// names assigned to the corresponding token types.

      + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + ///
      RuleLiteral NameJava String Literal
      {@code THIS : 'this';}{@code 'this'}{@code "'this'"}
      {@code SQUOTE : '\'';}{@code '\''}{@code "'\\''"}
      {@code ID : [A-Z]+;}n/anull
      + /// + /// @param tokenType The token type. + /// + /// @return The string literal associated with the specified token type, or + /// null if no string literal is associated with the type. + String getLiteralName(int tokenType); + + /// Gets the symbolic name associated with a token type. The string returned + /// by this method, when not null, can be used unaltered in a parser + /// grammar to represent this token type. + /// + ///

      This method supports token types defined by any of the following + /// methods:

      + /// + ///
        + ///
      • Tokens created by lexer rules.
      • + ///
      • Tokens defined in a tokens{} block in a lexer or parser + /// grammar.
      • + ///
      • The implicitly defined [EOF] token, which has the token type + /// {@link Token#EOF}.
      • + ///
      + /// + ///

      The following table shows examples of lexer rules and the literal + /// names assigned to the corresponding token types.

      + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + ///
      RuleSymbolic Name
      {@code THIS : 'this';}[THIS]
      {@code SQUOTE : '\'';}[SQUOTE]
      {@code ID : [A-Z]+;}[ID]
      + /// + /// @param tokenType The token type. + /// + /// @return The symbolic name associated with the specified token type, or + /// null if no symbolic name is associated with the type. + String getSymbolicName(int tokenType); + + /// Gets the display name of a token type. + /// + ///

      ANTLR provides a default implementation of this method, but + /// applications are free to override the behavior in any manner which makes + /// sense for the application. The default implementation returns the first + /// result from the following list which produces a non-null + /// result.

      + /// + ///
        + ///
      1. The result of {@link #getLiteralName}
      2. + ///
      3. The result of {@link #getSymbolicName}
      4. + ///
      5. The result of {@link Integer#toString}
      6. + ///
      + /// + /// @param tokenType The token type. + /// + /// @return The display name of the token type, for use in error reporting or + /// other user-visible messages which reference specific token types. + String getDisplayName(int tokenType); +} + +/// This class provides a default implementation of the [Vocabulary] +/// interface. +class VocabularyImpl implements Vocabulary { + static const List EMPTY_NAMES = []; + + /// Gets an empty [Vocabulary] instance. + /// + ///

      + /// No literal or symbol names are assigned to token types, so + /// {@link #getDisplayName(int)} returns the numeric value for all tokens + /// except {@link Token#EOF}.

      + static final VocabularyImpl EMPTY_VOCABULARY = + VocabularyImpl(EMPTY_NAMES, EMPTY_NAMES, EMPTY_NAMES); + + final List literalNames; + + final List symbolicNames; + + final List displayNames; + + @override + int maxTokenType; + + /// Constructs a new instance of [VocabularyImpl] from the specified + /// literal, symbolic, and display token names. + /// + /// @param literalNames The literal names assigned to tokens, or null + /// if no literal names are assigned. + /// @param symbolicNames The symbolic names assigned to tokens, or + /// null if no symbolic names are assigned. + /// @param displayNames The display names assigned to tokens, or null + /// to use the values in [literalNames] and [symbolicNames] as + /// the source of display names, as described in + /// {@link #getDisplayName(int)}. + /// + /// @see #getLiteralName(int) + /// @see #getSymbolicName(int) + /// @see #getDisplayName(int) + VocabularyImpl(this.literalNames, this.symbolicNames, + [this.displayNames = EMPTY_NAMES]) { + // See note here on -1 part: https://github.com/antlr/antlr4/pull/1146 + maxTokenType = max(displayNames.length, + max(literalNames.length, symbolicNames.length)) - + 1; + } + + /// Returns a [VocabularyImpl] instance from the specified set of token + /// names. This method acts as a compatibility layer for the single + /// [tokenNames] array generated by previous releases of ANTLR. + /// + ///

      The resulting vocabulary instance returns null for + /// {@link #getLiteralName(int)} and {@link #getSymbolicName(int)}, and the + /// value from [tokenNames] for the display names.

      + /// + /// @param tokenNames The token names, or null if no token names are + /// available. + /// @return A [Vocabulary] instance which uses [tokenNames] for + /// the display names of tokens. + static Vocabulary fromTokenNames(List tokenNames) { + if (tokenNames == null || tokenNames.isEmpty) { + return EMPTY_VOCABULARY; + } + + final literalNames = List.from(tokenNames); + final symbolicNames = List.from(tokenNames); + for (var i = 0; i < tokenNames.length; i++) { + final tokenName = tokenNames[i]; + if (tokenName == null) { + continue; + } + + if (tokenName.isNotEmpty) { + final firstChar = tokenName[0]; + if (firstChar == '\'') { + symbolicNames[i] = null; + continue; + } else if (firstChar.toUpperCase() == firstChar) { + literalNames[i] = null; + continue; + } + } + + // wasn't a literal or symbolic name + literalNames[i] = null; + symbolicNames[i] = null; + } + + return VocabularyImpl(literalNames, symbolicNames, tokenNames); + } + + @override + String getLiteralName(int tokenType) { + if (tokenType >= 0 && tokenType < literalNames.length) { + return literalNames[tokenType]; + } + + return null; + } + + @override + String getSymbolicName(int tokenType) { + if (tokenType >= 0 && tokenType < symbolicNames.length) { + return symbolicNames[tokenType]; + } + + if (tokenType == Token.EOF) { + return 'EOF'; + } + + return null; + } + + @override + String getDisplayName(int tokenType) { + if (tokenType >= 0 && tokenType < displayNames.length) { + final displayName = displayNames[tokenType]; + if (displayName != null) { + return displayName; + } + } + + final literalName = getLiteralName(tokenType); + if (literalName != null) { + return literalName; + } + + final symbolicName = getSymbolicName(tokenType); + if (symbolicName != null) { + return symbolicName; + } + + return tokenType.toString(); + } +} diff --git a/runtime/Dart/pubspec.yaml b/runtime/Dart/pubspec.yaml new file mode 100644 index 000000000..3a5c79645 --- /dev/null +++ b/runtime/Dart/pubspec.yaml @@ -0,0 +1,13 @@ +name: "antlr4" +version: "4.8.0-dev.2" +description: "New Dart runtime for ANTLR4." +homepage: "https://github.com/antlr/antlr4" +license: "BSD-3-Clause" +dependencies: + logging: ^0.11.4 + collection: ^1.14.12 +dev_dependencies: + pedantic: ^1.0.0 + +environment: + sdk: ">=2.7.0 <3.0.0" diff --git a/runtime/JavaScript/src/antlr4/Utils.js b/runtime/JavaScript/src/antlr4/Utils.js index eb2d05f84..4b52424dd 100644 --- a/runtime/JavaScript/src/antlr4/Utils.js +++ b/runtime/JavaScript/src/antlr4/Utils.js @@ -66,11 +66,11 @@ String.prototype.hashCode = function () { }; function standardEqualsFunction(a, b) { - return a.equals(b); + return a ? a.equals(b) : a==b; } function standardHashCodeFunction(a) { - return a.hashCode(); + return a ? a.hashCode() : -1; } class Set { diff --git a/runtime/JavaScript/src/antlr4/error/index.js b/runtime/JavaScript/src/antlr4/error/index.js index 73d7740c3..482b47edb 100644 --- a/runtime/JavaScript/src/antlr4/error/index.js +++ b/runtime/JavaScript/src/antlr4/error/index.js @@ -10,4 +10,5 @@ module.exports.InputMismatchException = require('./Errors').InputMismatchExcepti module.exports.FailedPredicateException = require('./Errors').FailedPredicateException; module.exports.DiagnosticErrorListener = require('./DiagnosticErrorListener'); module.exports.BailErrorStrategy = require('./ErrorStrategy').BailErrorStrategy; +module.exports.DefaultErrorStrategy = require('./ErrorStrategy').DefaultErrorStrategy; module.exports.ErrorListener = require('./ErrorListener').ErrorListener; diff --git a/runtime/JavaScript/src/antlr4/index.js b/runtime/JavaScript/src/antlr4/index.js index 7bad24203..a8392d6d7 100644 --- a/runtime/JavaScript/src/antlr4/index.js +++ b/runtime/JavaScript/src/antlr4/index.js @@ -20,4 +20,6 @@ var pc = require('./PredictionContext'); exports.PredictionContextCache = pc.PredictionContextCache; exports.ParserRuleContext = require('./ParserRuleContext'); exports.Interval = require('./IntervalSet').Interval; +exports.IntervalSet = require('./IntervalSet').IntervalSet; exports.Utils = require('./Utils'); +exports.LL1Analyzer = require('./LL1Analyzer').LL1Analyzer; diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/CSharp/CSharp.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/CSharp/CSharp.stg index a665ba67b..13d497dcb 100644 --- a/tool/resources/org/antlr/v4/tool/templates/codegen/CSharp/CSharp.stg +++ b/tool/resources/org/antlr/v4/tool/templates/codegen/CSharp/CSharp.stg @@ -116,6 +116,7 @@ using ParserRuleContext = Antlr4.Runtime.ParserRuleContext; /// of the available methods. /// \ [System.CodeDom.Compiler.GeneratedCode("ANTLR", "")] +[System.Diagnostics.DebuggerNonUserCode] [System.CLSCompliant(false)] public partial class BaseListener : IListener { /// \The return type of the visit operation.\ [System.CodeDom.Compiler.GeneratedCode("ANTLR", "")] +[System.Diagnostics.DebuggerNonUserCode] [System.CLSCompliant(false)] public partial class BaseVisitor\ : AbstractParseTreeVisitor\, IVisitor\ { ContextTokenGetterDecl(t) ::= - "public ITerminalNode () { return GetToken(., 0); }" + "[System.Diagnostics.DebuggerNonUserCode] public ITerminalNode () { return GetToken(., 0); }" ContextTokenListGetterDecl(t) ::= << -public () { return GetTokens(.); } +[System.Diagnostics.DebuggerNonUserCode] public () { return GetTokens(.); } >> ContextTokenListIndexedGetterDecl(t) ::= << -public ITerminalNode (int i) { +[System.Diagnostics.DebuggerNonUserCode] public ITerminalNode (int i) { return GetToken(., i); } >> ContextRuleGetterDecl(r) ::= << -public () { +[System.Diagnostics.DebuggerNonUserCode] public () { return GetRuleContext\<\>(0); } >> ContextRuleListGetterDecl(r) ::= << -public })> () { +[System.Diagnostics.DebuggerNonUserCode] public })> () { return GetRuleContexts\<\>(); } >> ContextRuleListIndexedGetterDecl(r) ::= << -public (int i) { +[System.Diagnostics.DebuggerNonUserCode] public (int i) { return GetRuleContext\<\>(i); } >> @@ -887,6 +889,7 @@ public partial class : Context { >> ListenerDispatchMethod(method) ::= << +[System.Diagnostics.DebuggerNonUserCode] public override void EnterExitRule(IParseTreeListener listener) { IListener typedListener = listener as IListener; if (typedListener != null) typedListener.EnterExit(this); @@ -894,6 +897,7 @@ public override void EnterExitRule(IParseTreeLi >> VisitorDispatchMethod(method) ::= << +[System.Diagnostics.DebuggerNonUserCode] public override TResult Accept\(IParseTreeVisitor\ visitor) { IVisitor\ typedVisitor = visitor as IVisitor\; if (typedVisitor != null) return typedVisitor.Visit(this); diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg index 4c367dda5..6930e3e47 100644 --- a/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg +++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg @@ -961,19 +961,19 @@ AddToLabelList(a) ::= << TokenLabelType() ::= " *" -TokenDeclHeader(t) ::= "antlr4:: = nullptr;" +TokenDeclHeader(t) ::= "antlr4:: = nullptr" TokenDecl(t) ::= "" TokenTypeDeclHeader(t) ::= "" TokenTypeDecl(t) ::= "size_t = 0;" -TokenListDeclHeader(t) ::= "std::vector\ ;" +TokenListDeclHeader(t) ::= "std::vector\ " TokenListDecl(t) ::= "" -RuleContextDeclHeader(r) ::= ":: * = nullptr;" +RuleContextDeclHeader(r) ::= ":: * = nullptr" RuleContextDecl(r) ::= "" -RuleContextListDeclHeader(rdecl) ::= "std::vector\< *> ;" +RuleContextListDeclHeader(rdecl) ::= "std::vector\< *> " RuleContextListDecl(rdecl) ::= "" ContextTokenGetterDeclHeader(t) ::= "antlr4::tree::TerminalNode *();" diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Dart/Dart.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Dart/Dart.stg new file mode 100644 index 000000000..39f453bec --- /dev/null +++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Dart/Dart.stg @@ -0,0 +1,908 @@ +/* + * [The "BSD license"] + * Copyright (c) 2012 Terence Parr + * Copyright (c) 2012 Sam Harwell + * Copyright (c) 2014 Tiago Mazzutti + * Copyright (c) 2017 Tobe Osakwe + * Copyright (c) 2020 Larry Li + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +dartTypeInitMap ::= [ + "int":"0", + "double":"0.0", + "bool":"false", + default:"null" // anything other than a primitive type is an object +] + +// args must be , + +ParserFile(file, parser, namedActions, contextSuperClass) ::= << + + +library ; + +import 'package:antlr4/antlr4.dart'; +import 'dart:io'; + + +part 'Listener.dart'; +part 'BaseListener.dart'; + + +part 'Visitor.dart'; +part 'BaseVisitor.dart'; + +part 'Lexer.dart'; + +import 'package:antlr4/antlr4.dart'; +import 'dart:io'; + + +import 'Listener.dart'; +import 'BaseListener.dart'; + + +import 'Visitor.dart'; +import 'BaseVisitor.dart'; + + + + + +>> + +ListenerFile(file, header, namedActions) ::= << + + +part of ; + +import 'package:antlr4/antlr4.dart'; + +import '.dart'; + +
      + +/// This abstract class defines a complete listener for a parse tree produced by +/// []. +abstract class Listener extends ParseTreeListener { + + /// Enter a parse tree produced by the [] + /// labeled alternative in [file.parserName>.]. + + /// Enter a parse tree produced by [.]. + + /// [ctx] the parse tree + void enter(Context ctx); + + /// Exit a parse tree produced by the [] + /// labeled alternative in [.]. + + /// Exit a parse tree produced by [.]. + + /// [ctx] the parse tree + void exit(Context ctx);}; separator="\n"> +} +>> + +BaseListenerFile(file, header, namedActions) ::= << + + +part of ; + +import 'package:antlr4/antlr4.dart'; + +import '.dart'; +import 'Listener.dart'; + + +
      + +/// This class provides an empty implementation of [Listener], +/// which can be extended to create a listener which only needs to handle +/// a subset of the available methods. +class BaseListener implements Listener { +(Context ctx) {\} + + /// The default implementation does nothing. + @override + void exit(Context ctx) {\}}; separator="\n"> + + /// The default implementation does nothing. + @override + void enterEveryRule(ParserRuleContext ctx) {} + + /// The default implementation does nothing. + @override + void exitEveryRule(ParserRuleContext ctx) {} + + /// The default implementation does nothing. + @override + void visitTerminal(TerminalNode node) {} + + /// The default implementation does nothing. + @override + void visitErrorNode(ErrorNode node) {} +} + +>> + +VisitorFile(file, header, namedActions) ::= << + + +part of ; + +import 'package:antlr4/antlr4.dart'; + +import '.dart'; + +
      + +/// This abstract class defines a complete generic visitor for a parse tree +/// produced by []. +/// +/// [T] is the eturn type of the visit operation. Use `void` for +/// operations with no return type. +abstract class Visitor\ extends ParseTreeVisitor\ { + +/// Visit a parse tree produced by the {@code \} +/// labeled alternative in {@link #\}. + +/// Visit a parse tree produced by [.]. + +/// [ctx] the parse tree. +/// Return the visitor result. +T visit(Context ctx);}; separator="\n"> +} +>> + +BaseVisitorFile(file, header, namedActions) ::= << + + +part of ; + +import 'package:antlr4/antlr4.dart'; + +import '.dart'; +import 'Visitor.dart'; + +
      + +/// This class provides an empty implementation of [Visitor], +/// which can be extended to create a visitor which only needs to handle +/// a subset of the available methods. +/// +/// [T] is the return type of the visit operation. Use `void` for +/// operations with no return type. +class BaseVisitor\ extends ParseTreeVisitor\ implements Visitor\ { + (Context ctx) => visitChildren(ctx);}; separator="\n"> +} +>> + +fileHeader(grammarFileName, ANTLRVersion) ::= << +// Generated from by ANTLR +// ignore_for_file: unused_import, unused_local_variable, prefer_single_quotes +>> + +Parser(parser, funcs, atn, sempredFuncs, superClass) ::= << + +>> + +Parser_(parser, funcs, atn, sempredFuncs, ctor, superClass) ::= << + +const int = }; separator=", ", wrap, anchor>; +class extends { + static final checkVersion = () => RuntimeMetaData.checkVersion('', RuntimeMetaData.VERSION); + static const int TOKEN_EOF = IntStream.EOF; + + static final List\ _decisionToDFA = List.generate( + _ATN.numberOfDecisions, (i) => DFA(_ATN.getDecisionState(i), i)); + static final PredictionContextCache _sharedContextCache = PredictionContextCache(); + + static const int = }; separator=", ", wrap, anchor>; + + + @override + final List\ ruleNames = [ + '}; separator=", ", wrap, anchor> + ]; + + + + @override + String get grammarFileName => ''; + + @override + String get serializedATN => _serializedATN; + + @override + ATN getATN() { + return _ATN; + } + + + + + + + @override + bool sempred(RuleContext _localctx, int ruleIndex, int predIndex) { + switch (ruleIndex) { + : + return __sempred(_localctx, predIndex);}; separator="\n"> + } + return true; + } + + + + +} +}; separator="\n\n"> + +}; separator="\n\n">}> +>> + +vocabulary(literalNames, symbolicNames) ::= << +static final List\ _LITERAL_NAMES = [ + }; null="null", separator=", ", wrap, anchor> +]; +static final List\ _SYMBOLIC_NAMES = [ + }; null="null", separator=", ", wrap, anchor> +]; +static final Vocabulary VOCABULARY = VocabularyImpl(_LITERAL_NAMES, _SYMBOLIC_NAMES); + +@override +Vocabulary get vocabulary { + return VOCABULARY; +} +>> + +dumpActions(recog, argFuncs, actionFuncs, sempredFuncs) ::= << + +void action(RuleContext _localctx, int ruleIndex, int actionIndex) { + switch (ruleIndex) { + : + __action(_localctx, actionIndex); + break;}; separator="\n"> + } +} + + + +bool sempred(RuleContext _localctx, int ruleIndex, int predIndex) { + switch (ruleIndex) { + : + return __sempred(_localctx, predIndex);}; separator="\n"> + } + return true; +} + + +>> + +parser_ctor(p) ::= << +(TokenStream input) : super(input) { + interpreter = ParserATNSimulator(this, _ATN, _decisionToDFA, _sharedContextCache); +} +>> + +/// This generates a private method since the actionIndex is generated, making an +/// overriding implementation impossible to maintain. +RuleActionFunction(r, actions) ::= << +void __action( _localctx, int actionIndex) { + switch (actionIndex) { + : break;}; separator="\n"> + } +} +>> + +/// This generates a private method since the predIndex is generated, making an +/// overriding implementation impossible to maintain. +RuleSempredFunction(r, actions) ::= << +bool __sempred( _localctx, int predIndex) { + switch (predIndex) { + : return ;}; separator="\n"> + } + return true; +} +>> + +RuleFunction(currentRule,args,code,locals,ruleCtx,altLabelCtxs,namedActions,finallyAction,postamble,exceptions) ::= << + + }> () { + dynamic _localctx = (context, state}>); + enterRule(_localctx, , RULE_); + + + try { + + int _alt; + + + + + } on RecognitionException catch (re) { + _localctx.exception = re; + errorHandler.reportError(this, re); + errorHandler.recover(this, re); + } finally { + + exitRule(); + } + return _localctx; +} +>> + +LeftRecursiveRuleFunction(currentRule,args,code,locals,ruleCtx,altLabelCtxs, + namedActions,finallyAction,postamble) ::= +<< + + ([int _p = 0]}>) { + final _parentctx = context; + final _parentState = state; + dynamic _localctx = (context, _parentState}>); + var _prevctx = _localctx; + var _startState = ; + enterRecursionRule(_localctx, , RULE_, _p); + + + try { + + int _alt; + + + + + } on RecognitionException catch (re) { + _localctx.exception = re; + errorHandler.reportError(this, re); + errorHandler.recover(this, re); + } finally { + + unrollRecursionContexts(_parentctx); + } + return _localctx; +} +>> + +CodeBlockForOuterMostAlt(currentOuterMostAltCodeBlock, locals, preamble, ops) ::= << +_localctx = Context(_localctx); +enterOuterAlt(_localctx, ); + +>> + +CodeBlockForAlt(currentAltCodeBlock, locals, preamble, ops) ::= << + + + +>> + +LL1AltBlock(choice, preamble, alts, error) ::= << +state = ; +errorHandler.sync(this); + = tokenStream.LT(1); + +switch (tokenStream.LA(1)) { + + + break;}; separator="\n"> +default: + +} +>> + +LL1OptionalBlock(choice, alts, error) ::= << +state = ; +errorHandler.sync(this); +switch (tokenStream.LA(1)) { + + + break;}; separator="\n"> +default: + break; +} +>> + +LL1OptionalBlockSingleAlt(choice, expr, alts, preamble, error, followExpr) ::= << +state = ; +errorHandler.sync(this); + +if () { + +} +) ) !> +>> + +LL1StarBlockSingleAlt(choice, loopExpr, alts, preamble, iteration) ::= << +state = ; +errorHandler.sync(this); + +while () { + + state = ; + errorHandler.sync(this); + +} +>> + +LL1PlusBlockSingleAlt(choice, loopExpr, alts, preamble, iteration) ::= << +state = ; +errorHandler.sync(this); + +do { + + state = ; + errorHandler.sync(this); + +} while (); +>> + +// LL(*) stuff + +AltBlock(choice, preamble, alts, error) ::= << +state = ; +errorHandler.sync(this); + = tokenStream.LT(1); + +switch (interpreter.adaptivePredict(tokenStream, , context)) { +: + + break;}; separator="\n"> +} +>> + +OptionalBlock(choice, alts, error) ::= << +state = ; +errorHandler.sync(this); +switch (interpreter.adaptivePredict(tokenStream, , context)) { ++1: + + break;}; separator="\n"> +} +>> + +StarBlock(choice, alts, sync, iteration) ::= << +state = ; +errorHandler.sync(this); +_alt = interpreter.adaptivePredict(tokenStream, , context); +while (_alt != && _alt != ATN.INVALID_ALT_NUMBER) { + if (_alt == 1 + 1) { + + + } + state = ; + errorHandler.sync(this); + _alt = interpreter.adaptivePredict(tokenStream, , context); +} +>> + +PlusBlock(choice, alts, error) ::= << +state = ; +errorHandler.sync(this); +_alt = 1+1; +do { + switch (_alt) { + + 1: + + break;}; separator="\n"> + default: + + } + state = ; + errorHandler.sync(this); + _alt = interpreter.adaptivePredict(tokenStream, , context); +} while (_alt != && _alt != ATN.INVALID_ALT_NUMBER); +>> + +Sync(s) ::= "sync();" + +ThrowNoViableAlt(t) ::= "throw NoViableAltException(this);" + +TestSetInline(s) ::= << +}; separator=" || "> +>> + +// Java language spec 15.19 - shift operators mask operands rather than overflow to 0... need range test +testShiftInRange(shiftAmount) ::= << +(() & ~0x3f) == 0 +>> + +// produces smaller bytecode only when bits.ttypes contains more than two items +bitsetBitfieldComparison(s, bits) ::= <% +(})> && ((BigInt.one \<\< ) & (}, bits.shift)>)}; separator=" | ">)) != BigInt.zero) +%> + +isZero ::= [ +"0":true, +default:false +] + +offsetShift(shiftAmount, offset) ::= <% +( - ) +%> + +// produces more efficient bytecode when bits.ttypes contains at most two items +bitsetInlineComparison(s, bits) ::= <% + == TOKEN_}; separator=" || "> +%> + +cases(ttypes) ::= << +:}; separator="\n"> +>> + +InvokeRule(r, argExprsChunks) ::=<< +state = ; + = }>(,); +>> + +MatchToken(m) ::= << +state = ; + = }>match(TOKEN_); +>> + +MatchSet(m, expr, capture) ::= "" + +MatchNotSet(m, expr, capture) ::= "" + +CommonSetStuff(m, expr, capture, invert) ::= << +state = ; + = }>tokenStream.LT(1); + +if ( \<= 0 || !()) { + = }>errorHandler.recoverInline(this); +} else { + if ( tokenStream.LA(1)==IntStream.EOF ) matchedEOF = true; + errorHandler.reportMatch(this); + consume(); +} +>> + +Wildcard(w) ::= << +state = ; + = }>matchWildcard(); +>> + +// ACTION STUFF + +Action(a, foo, chunks) ::= "" + +ArgAction(a, chunks) ::= "" + +SemPred(p, chunks, failChunks) ::= << +state = ; +if (!()) { + throw FailedPredicateException(this, , , ); +} +>> + +ExceptionClause(e, catchArg, catchAction) ::= << +catch () { + +} +>> + +// lexer actions are not associated with model objects + +LexerSkipCommand() ::= "skip();" +LexerMoreCommand() ::= "more();" +LexerPopModeCommand() ::= "popMode();" + +LexerTypeCommand(arg, grammar) ::= "type = ;" +LexerChannelCommand(arg, grammar) ::= "channel = ;" +LexerModeCommand(arg, grammar) ::= "mode_ = ;" +LexerPushModeCommand(arg, grammar) ::= "pushMode();" + +ActionText(t) ::= "" +ActionTemplate(t) ::= "" +ArgRef(a) ::= "_localctx." +LocalRef(a) ::= "_localctx." +RetValueRef(a) ::= "_localctx." +QRetValueRef(a) ::= ".." +/** How to translate $tokenLabel */ +TokenRef(t) ::= "." +LabelRef(t) ::= "." +ListLabelRef(t) ::= "." +SetAttr(s,rhsChunks) ::= ". = ;" + +TokenLabelType() ::= "" +InputSymbolType() ::= "" + +TokenPropertyRef_text(t) ::= ".?.text" +TokenPropertyRef_type(t) ::= ". != null ? ..type : 0" +TokenPropertyRef_line(t) ::= ". != null ? ..line : 0" +TokenPropertyRef_pos(t) ::= ". != null ? ..charPositionInLine : 0" +TokenPropertyRef_channel(t) ::= ". != null ? ..channel : 0" +TokenPropertyRef_index(t) ::= ". != null ? ..tokenIndex : 0" +TokenPropertyRef_int(t) ::= ". != null ? int.parse(..text) : 0" + +RulePropertyRef_start(r) ::= ".?.start" +RulePropertyRef_stop(r) ::= ".?.stop" +RulePropertyRef_text(r) ::= "(. != null ? tokenStream.getTextRange(..start, ..stop) : null)" +RulePropertyRef_ctx(r) ::= "." +RulePropertyRef_parser(r) ::= "this" + +ThisRulePropertyRef_start(r) ::= "_localctx.start" +ThisRulePropertyRef_stop(r) ::= "_localctx.stop" +ThisRulePropertyRef_text(r) ::= "tokenStream.getTextRange(_localctx.start, tokenStream.LT(-1))" +ThisRulePropertyRef_ctx(r) ::= "_localctx" +ThisRulePropertyRef_parser(r) ::= "this" + +NonLocalAttrRef(s) ::= "(getInvokingContext() as Context)." +SetNonLocalAttr(s, rhsChunks) ::= + "(getInvokingContext() as Context). = ;" + +AddToLabelList(a) ::= "..add();" + +TokenDecl(t) ::= " " +TokenTypeDecl(t) ::= "int ;" +TokenListDecl(t) ::= "List\ = List\()" +RuleContextDecl(r) ::= " " +RuleContextListDecl(rdecl) ::= "List\<> = List\<>()" + +ContextTokenGetterDecl(t) ::= << +TerminalNode () => getToken(.TOKEN_, 0); +>> +ContextTokenListGetterDecl(t) ::= << +List\ s() => getTokens(.TOKEN_); +>> +ContextTokenListIndexedGetterDecl(t) ::= << +TerminalNode (int i) => getToken(.TOKEN_, i); +>> +ContextRuleGetterDecl(r) ::= << + () => getRuleContext\<\>(0); +>> +ContextRuleListGetterDecl(r) ::= << +List\<\> s() => getRuleContexts\<\>(); +>> +ContextRuleListIndexedGetterDecl(r) ::= << + (int i) => getRuleContext\<\>(i); +>> + +LexerRuleContext() ::= "RuleContext" + +/// The rule context name is the rule followed by a suffix; e.g., +/// r becomes rContext. +RuleContextNameSuffix() ::= "Context" + +ImplicitTokenLabel(tokenName) ::= "_" +ImplicitRuleLabel(ruleName) ::= "_" +ImplicitSetLabel(id) ::= "_tset" +ListLabelName(label) ::= "